smftools 0.2.5__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. smftools/__init__.py +39 -7
  2. smftools/_settings.py +2 -0
  3. smftools/_version.py +3 -1
  4. smftools/cli/__init__.py +1 -0
  5. smftools/cli/archived/cli_flows.py +2 -0
  6. smftools/cli/helpers.py +34 -6
  7. smftools/cli/hmm_adata.py +239 -33
  8. smftools/cli/latent_adata.py +318 -0
  9. smftools/cli/load_adata.py +167 -131
  10. smftools/cli/preprocess_adata.py +180 -53
  11. smftools/cli/spatial_adata.py +152 -100
  12. smftools/cli_entry.py +38 -1
  13. smftools/config/__init__.py +2 -0
  14. smftools/config/conversion.yaml +11 -1
  15. smftools/config/default.yaml +42 -2
  16. smftools/config/experiment_config.py +59 -1
  17. smftools/constants.py +65 -0
  18. smftools/datasets/__init__.py +2 -0
  19. smftools/hmm/HMM.py +97 -3
  20. smftools/hmm/__init__.py +24 -13
  21. smftools/hmm/archived/apply_hmm_batched.py +2 -0
  22. smftools/hmm/archived/calculate_distances.py +2 -0
  23. smftools/hmm/archived/call_hmm_peaks.py +2 -0
  24. smftools/hmm/archived/train_hmm.py +2 -0
  25. smftools/hmm/call_hmm_peaks.py +5 -2
  26. smftools/hmm/display_hmm.py +4 -1
  27. smftools/hmm/hmm_readwrite.py +7 -2
  28. smftools/hmm/nucleosome_hmm_refinement.py +2 -0
  29. smftools/informatics/__init__.py +59 -34
  30. smftools/informatics/archived/bam_conversion.py +2 -0
  31. smftools/informatics/archived/bam_direct.py +2 -0
  32. smftools/informatics/archived/basecall_pod5s.py +2 -0
  33. smftools/informatics/archived/basecalls_to_adata.py +2 -0
  34. smftools/informatics/archived/conversion_smf.py +2 -0
  35. smftools/informatics/archived/deaminase_smf.py +1 -0
  36. smftools/informatics/archived/direct_smf.py +2 -0
  37. smftools/informatics/archived/fast5_to_pod5.py +2 -0
  38. smftools/informatics/archived/helpers/archived/__init__.py +2 -0
  39. smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py +2 -0
  40. smftools/informatics/archived/helpers/archived/aligned_BAM_to_bed.py +2 -0
  41. smftools/informatics/archived/helpers/archived/bed_to_bigwig.py +2 -0
  42. smftools/informatics/archived/helpers/archived/canoncall.py +2 -0
  43. smftools/informatics/archived/helpers/archived/converted_BAM_to_adata.py +2 -0
  44. smftools/informatics/archived/helpers/archived/count_aligned_reads.py +2 -0
  45. smftools/informatics/archived/helpers/archived/demux_and_index_BAM.py +2 -0
  46. smftools/informatics/archived/helpers/archived/extract_base_identities.py +2 -0
  47. smftools/informatics/archived/helpers/archived/extract_mods.py +2 -0
  48. smftools/informatics/archived/helpers/archived/extract_read_features_from_bam.py +2 -0
  49. smftools/informatics/archived/helpers/archived/extract_read_lengths_from_bed.py +2 -0
  50. smftools/informatics/archived/helpers/archived/extract_readnames_from_BAM.py +2 -0
  51. smftools/informatics/archived/helpers/archived/find_conversion_sites.py +2 -0
  52. smftools/informatics/archived/helpers/archived/generate_converted_FASTA.py +2 -0
  53. smftools/informatics/archived/helpers/archived/get_chromosome_lengths.py +2 -0
  54. smftools/informatics/archived/helpers/archived/get_native_references.py +2 -0
  55. smftools/informatics/archived/helpers/archived/index_fasta.py +2 -0
  56. smftools/informatics/archived/helpers/archived/informatics.py +2 -0
  57. smftools/informatics/archived/helpers/archived/load_adata.py +2 -0
  58. smftools/informatics/archived/helpers/archived/make_modbed.py +2 -0
  59. smftools/informatics/archived/helpers/archived/modQC.py +2 -0
  60. smftools/informatics/archived/helpers/archived/modcall.py +2 -0
  61. smftools/informatics/archived/helpers/archived/ohe_batching.py +2 -0
  62. smftools/informatics/archived/helpers/archived/ohe_layers_decode.py +2 -0
  63. smftools/informatics/archived/helpers/archived/one_hot_decode.py +2 -0
  64. smftools/informatics/archived/helpers/archived/one_hot_encode.py +2 -0
  65. smftools/informatics/archived/helpers/archived/plot_bed_histograms.py +2 -0
  66. smftools/informatics/archived/helpers/archived/separate_bam_by_bc.py +2 -0
  67. smftools/informatics/archived/helpers/archived/split_and_index_BAM.py +2 -0
  68. smftools/informatics/archived/print_bam_query_seq.py +2 -0
  69. smftools/informatics/archived/subsample_fasta_from_bed.py +2 -0
  70. smftools/informatics/archived/subsample_pod5.py +2 -0
  71. smftools/informatics/bam_functions.py +1093 -176
  72. smftools/informatics/basecalling.py +2 -0
  73. smftools/informatics/bed_functions.py +271 -61
  74. smftools/informatics/binarize_converted_base_identities.py +3 -0
  75. smftools/informatics/complement_base_list.py +2 -0
  76. smftools/informatics/converted_BAM_to_adata.py +641 -176
  77. smftools/informatics/fasta_functions.py +94 -10
  78. smftools/informatics/h5ad_functions.py +123 -4
  79. smftools/informatics/modkit_extract_to_adata.py +1019 -431
  80. smftools/informatics/modkit_functions.py +2 -0
  81. smftools/informatics/ohe.py +2 -0
  82. smftools/informatics/pod5_functions.py +3 -2
  83. smftools/informatics/sequence_encoding.py +72 -0
  84. smftools/logging_utils.py +21 -2
  85. smftools/machine_learning/__init__.py +22 -6
  86. smftools/machine_learning/data/__init__.py +2 -0
  87. smftools/machine_learning/data/anndata_data_module.py +18 -4
  88. smftools/machine_learning/data/preprocessing.py +2 -0
  89. smftools/machine_learning/evaluation/__init__.py +2 -0
  90. smftools/machine_learning/evaluation/eval_utils.py +2 -0
  91. smftools/machine_learning/evaluation/evaluators.py +14 -9
  92. smftools/machine_learning/inference/__init__.py +2 -0
  93. smftools/machine_learning/inference/inference_utils.py +2 -0
  94. smftools/machine_learning/inference/lightning_inference.py +6 -1
  95. smftools/machine_learning/inference/sklearn_inference.py +2 -0
  96. smftools/machine_learning/inference/sliding_window_inference.py +2 -0
  97. smftools/machine_learning/models/__init__.py +2 -0
  98. smftools/machine_learning/models/base.py +7 -2
  99. smftools/machine_learning/models/cnn.py +7 -2
  100. smftools/machine_learning/models/lightning_base.py +16 -11
  101. smftools/machine_learning/models/mlp.py +5 -1
  102. smftools/machine_learning/models/positional.py +7 -2
  103. smftools/machine_learning/models/rnn.py +5 -1
  104. smftools/machine_learning/models/sklearn_models.py +14 -9
  105. smftools/machine_learning/models/transformer.py +7 -2
  106. smftools/machine_learning/models/wrappers.py +6 -2
  107. smftools/machine_learning/training/__init__.py +2 -0
  108. smftools/machine_learning/training/train_lightning_model.py +13 -3
  109. smftools/machine_learning/training/train_sklearn_model.py +2 -0
  110. smftools/machine_learning/utils/__init__.py +2 -0
  111. smftools/machine_learning/utils/device.py +5 -1
  112. smftools/machine_learning/utils/grl.py +5 -1
  113. smftools/metadata.py +1 -1
  114. smftools/optional_imports.py +31 -0
  115. smftools/plotting/__init__.py +41 -31
  116. smftools/plotting/autocorrelation_plotting.py +9 -5
  117. smftools/plotting/classifiers.py +16 -4
  118. smftools/plotting/general_plotting.py +2415 -629
  119. smftools/plotting/hmm_plotting.py +97 -9
  120. smftools/plotting/position_stats.py +15 -7
  121. smftools/plotting/qc_plotting.py +6 -1
  122. smftools/preprocessing/__init__.py +36 -37
  123. smftools/preprocessing/append_base_context.py +17 -17
  124. smftools/preprocessing/append_mismatch_frequency_sites.py +158 -0
  125. smftools/preprocessing/archived/add_read_length_and_mapping_qc.py +2 -0
  126. smftools/preprocessing/archived/calculate_complexity.py +2 -0
  127. smftools/preprocessing/archived/mark_duplicates.py +2 -0
  128. smftools/preprocessing/archived/preprocessing.py +2 -0
  129. smftools/preprocessing/archived/remove_duplicates.py +2 -0
  130. smftools/preprocessing/binary_layers_to_ohe.py +2 -1
  131. smftools/preprocessing/calculate_complexity_II.py +4 -1
  132. smftools/preprocessing/calculate_consensus.py +1 -1
  133. smftools/preprocessing/calculate_pairwise_differences.py +2 -0
  134. smftools/preprocessing/calculate_pairwise_hamming_distances.py +3 -0
  135. smftools/preprocessing/calculate_position_Youden.py +9 -2
  136. smftools/preprocessing/calculate_read_modification_stats.py +6 -1
  137. smftools/preprocessing/filter_reads_on_length_quality_mapping.py +2 -0
  138. smftools/preprocessing/filter_reads_on_modification_thresholds.py +2 -0
  139. smftools/preprocessing/flag_duplicate_reads.py +42 -54
  140. smftools/preprocessing/make_dirs.py +2 -1
  141. smftools/preprocessing/min_non_diagonal.py +2 -0
  142. smftools/preprocessing/recipes.py +2 -0
  143. smftools/readwrite.py +53 -17
  144. smftools/schema/anndata_schema_v1.yaml +15 -1
  145. smftools/tools/__init__.py +30 -18
  146. smftools/tools/archived/apply_hmm.py +2 -0
  147. smftools/tools/archived/classifiers.py +2 -0
  148. smftools/tools/archived/classify_methylated_features.py +2 -0
  149. smftools/tools/archived/classify_non_methylated_features.py +2 -0
  150. smftools/tools/archived/subset_adata_v1.py +2 -0
  151. smftools/tools/archived/subset_adata_v2.py +2 -0
  152. smftools/tools/calculate_leiden.py +57 -0
  153. smftools/tools/calculate_nmf.py +119 -0
  154. smftools/tools/calculate_umap.py +93 -8
  155. smftools/tools/cluster_adata_on_methylation.py +7 -1
  156. smftools/tools/position_stats.py +17 -27
  157. smftools/tools/rolling_nn_distance.py +235 -0
  158. smftools/tools/tensor_factorization.py +169 -0
  159. {smftools-0.2.5.dist-info → smftools-0.3.1.dist-info}/METADATA +69 -33
  160. smftools-0.3.1.dist-info/RECORD +189 -0
  161. smftools-0.2.5.dist-info/RECORD +0 -181
  162. {smftools-0.2.5.dist-info → smftools-0.3.1.dist-info}/WHEEL +0 -0
  163. {smftools-0.2.5.dist-info → smftools-0.3.1.dist-info}/entry_points.txt +0 -0
  164. {smftools-0.2.5.dist-info → smftools-0.3.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  ## bam_conversion
2
4
 
3
5
  def bam_conversion(fasta, output_directory, conversion_types, strands, basecalled_path, split_dir, mapping_threshold, experiment_name, bam_suffix):
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  ## bam_direct
2
4
 
3
5
  def bam_direct(fasta, output_directory, mod_list, thresholds, bam_path, split_dir, mapping_threshold, experiment_name, bam_suffix, batch_size):
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  # basecall_pod5s
2
4
 
3
5
  def basecall_pod5s(config_path):
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  ## basecalls_to_adata
2
4
 
3
5
  def basecalls_to_adata(config_path):
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  ## conversion_smf
2
4
 
3
5
  def conversion_smf(fasta, output_directory, conversion_types, strands, model_dir, model, input_data_path, split_dir, barcode_kit, mapping_threshold, experiment_name, bam_suffix, basecall, barcode_both_ends, trim, device, make_bigwigs, threads, input_already_demuxed):
@@ -1,3 +1,4 @@
1
+ from __future__ import annotations
1
2
 
2
3
  def deaminase_smf(fasta, output_directory, conversion_types, strands, model_dir, model, input_data_path, split_dir, barcode_kit, mapping_threshold, experiment_name, bam_suffix, basecall, barcode_both_ends, trim, device, make_bigwigs, threads, input_already_demuxed):
3
4
  """
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  ## direct_smf
2
4
 
3
5
  def direct_smf(fasta, output_directory, mod_list, model_dir, model, thresholds, input_data_path, split_dir, barcode_kit, mapping_threshold, experiment_name, bam_suffix, batch_size, basecall, barcode_both_ends, trim, device, make_bigwigs, skip_unclassified, delete_batch_hdfs, threads):
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  from pathlib import Path
2
4
  import subprocess
3
5
  from typing import Union, List
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  # from .align_and_sort_BAM import align_and_sort_BAM
2
4
  # from .aligned_BAM_to_bed import aligned_BAM_to_bed
3
5
  # from .bam_qc import bam_qc
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  from pathlib import Path
2
4
  import os
3
5
  import subprocess
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  def aligned_BAM_to_bed(aligned_BAM, out_dir, fasta, make_bigwigs, threads=None):
2
4
  """
3
5
  Takes an aligned BAM as input and writes a BED file of reads as output.
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  from pathlib import Path
2
4
  import pybedtools
3
5
  import pyBigWig
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  ## canoncall
2
4
 
3
5
  # Conversion SMF specific
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  ## converted_BAM_to_adata
2
4
 
3
5
  def converted_BAM_to_adata(converted_FASTA, split_dir, mapping_threshold, experiment_name, conversion_types, bam_suffix):
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  ## count_aligned_reads
2
4
 
3
5
  # General
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  ## demux_and_index_BAM
2
4
 
3
5
  def demux_and_index_BAM(aligned_sorted_BAM, split_dir, bam_suffix, barcode_kit, barcode_both_ends, trim, fasta, make_bigwigs, threads):
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  def extract_base_identities(bam_file, chromosome, positions, max_reference_length, sequence):
2
4
  """
3
5
  Efficiently extracts base identities from mapped reads with reference coordinates.
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  ## extract_mods
2
4
 
3
5
  def extract_mods(thresholds, mod_tsv_dir, split_dir, bam_suffix, skip_unclassified=True, modkit_summary=False, threads=None):
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  # extract_read_features_from_bam
2
4
 
3
5
  def extract_read_features_from_bam(bam_file_path):
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  # extract_read_lengths_from_bed
2
4
 
3
5
  def extract_read_lengths_from_bed(file_path):
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  # extract_readnames_from_BAM
2
4
 
3
5
  def extract_readnames_from_BAM(aligned_BAM):
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  def find_conversion_sites(fasta_file, modification_type, conversions, deaminase_footprinting=False):
2
4
  """
3
5
  Finds genomic coordinates of modified bases (5mC or 6mA) in a reference FASTA file.
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  import numpy as np
2
4
  import gzip
3
5
  import os
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  # get_chromosome_lengths
2
4
 
3
5
  def get_chromosome_lengths(fasta):
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  ## get_native_references
2
4
 
3
5
  # Direct methylation specific
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  import pysam
2
4
  from pathlib import Path
3
5
 
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  ## fasta_module
2
4
  from .. import readwrite
3
5
  # bioinformatic operations
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  # load_adata
2
4
  ######################################################################################################
3
5
  # Archived helper; legacy imports removed for syntax compatibility.
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  ## make_modbed
2
4
 
3
5
  # Direct SMF
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  ## modQC
2
4
 
3
5
  # Direct SMF
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  ## modcall
2
4
 
3
5
  # Direct methylation specific
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  import os
2
4
  import anndata as ad
3
5
  import numpy as np
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  # ohe_layers_decode
2
4
 
3
5
  def ohe_layers_decode(adata, obs_names):
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  # one_hot_decode
2
4
 
3
5
  # String encodings
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  # one_hot_encode
2
4
 
3
5
  def one_hot_encode(sequence, device='auto'):
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  # plot_bed_histograms
2
4
 
3
5
  def plot_bed_histograms(
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  ## separate_bam_by_bc
2
4
 
3
5
  def separate_bam_by_bc(input_bam, output_prefix, bam_suffix, split_dir):
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  ## split_and_index_BAM
2
4
 
3
5
  def split_and_index_BAM(aligned_sorted_BAM, split_dir, bam_suffix):
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  import pysam
2
4
  import sys
3
5
 
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  from pathlib import Path
2
4
  from pyfaidx import Fasta
3
5
 
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  # subsample_pod5
2
4
 
3
5
  def subsample_pod5(pod5_path, read_name_path, output_directory):