smftools 0.2.4__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (317) hide show
  1. smftools-0.3.0/.github/workflows/ci.yml +118 -0
  2. {smftools-0.2.4 → smftools-0.3.0}/.gitignore +3 -1
  3. smftools-0.3.0/.pre-commit-config.yaml +7 -0
  4. {smftools-0.2.4 → smftools-0.3.0}/.readthedocs.yaml +2 -3
  5. smftools-0.3.0/AGENTS.md +69 -0
  6. smftools-0.3.0/PKG-INFO +147 -0
  7. smftools-0.3.0/README.md +14 -0
  8. smftools-0.3.0/docs/source/api/datasets.md +14 -0
  9. smftools-0.3.0/docs/source/api/informatics.md +44 -0
  10. smftools-0.3.0/docs/source/api/preprocessing.md +43 -0
  11. smftools-0.3.0/docs/source/api/tools.md +20 -0
  12. {smftools-0.2.4 → smftools-0.3.0}/docs/source/basic_usage.md +36 -1
  13. smftools-0.3.0/docs/source/cli.md +6 -0
  14. smftools-0.3.0/docs/source/conf.py +233 -0
  15. {smftools-0.2.4 → smftools-0.3.0}/docs/source/contributors.md +1 -1
  16. {smftools-0.2.4 → smftools-0.3.0}/docs/source/index.md +3 -1
  17. {smftools-0.2.4 → smftools-0.3.0}/docs/source/installation.md +35 -9
  18. smftools-0.3.0/docs/source/release-notes/0.1.0.md +6 -0
  19. smftools-0.3.0/docs/source/release-notes/0.1.1.md +4 -0
  20. smftools-0.3.0/docs/source/release-notes/0.1.6.md +4 -0
  21. smftools-0.3.0/docs/source/release-notes/0.2.1.md +4 -0
  22. smftools-0.3.0/docs/source/release-notes/0.2.3.md +4 -0
  23. smftools-0.3.0/docs/source/release-notes/0.3.0.md +4 -0
  24. smftools-0.3.0/docs/source/release-notes/index.md +35 -0
  25. smftools-0.3.0/docs/source/requirements.txt +16 -0
  26. smftools-0.3.0/docs/source/schema/anndata_schema.md +13 -0
  27. smftools-0.3.0/docs/source/tutorials/cli_usage.md +91 -0
  28. smftools-0.3.0/docs/source/tutorials/experiment_config.md +52 -0
  29. smftools-0.3.0/docs/source/tutorials/index.md +13 -0
  30. {smftools-0.2.4 → smftools-0.3.0}/pyproject.toml +144 -38
  31. {smftools-0.2.4 → smftools-0.3.0}/requirements.txt +21 -15
  32. smftools-0.3.0/smftools/__init__.py +60 -0
  33. {smftools-0.2.4 → smftools-0.3.0}/smftools/_settings.py +6 -6
  34. smftools-0.3.0/smftools/_version.py +3 -0
  35. smftools-0.3.0/smftools/cli/__init__.py +1 -0
  36. {smftools-0.2.4 → smftools-0.3.0}/smftools/cli/archived/cli_flows.py +2 -0
  37. {smftools-0.2.4 → smftools-0.3.0}/smftools/cli/helpers.py +9 -1
  38. smftools-0.3.0/smftools/cli/hmm_adata.py +1024 -0
  39. {smftools-0.2.4 → smftools-0.3.0}/smftools/cli/load_adata.py +432 -280
  40. {smftools-0.2.4 → smftools-0.3.0}/smftools/cli/preprocess_adata.py +287 -171
  41. {smftools-0.2.4 → smftools-0.3.0}/smftools/cli/spatial_adata.py +141 -53
  42. {smftools-0.2.4 → smftools-0.3.0}/smftools/cli_entry.py +119 -178
  43. smftools-0.3.0/smftools/config/__init__.py +3 -0
  44. {smftools-0.2.4 → smftools-0.3.0}/smftools/config/conversion.yaml +5 -1
  45. {smftools-0.2.4 → smftools-0.3.0}/smftools/config/deaminase.yaml +1 -1
  46. {smftools-0.2.4 → smftools-0.3.0}/smftools/config/default.yaml +26 -18
  47. {smftools-0.2.4 → smftools-0.3.0}/smftools/config/direct.yaml +8 -3
  48. {smftools-0.2.4 → smftools-0.3.0}/smftools/config/discover_input_files.py +19 -5
  49. {smftools-0.2.4 → smftools-0.3.0}/smftools/config/experiment_config.py +511 -276
  50. smftools-0.3.0/smftools/constants.py +37 -0
  51. smftools-0.3.0/smftools/datasets/__init__.py +5 -0
  52. smftools-0.3.0/smftools/datasets/datasets.py +42 -0
  53. smftools-0.3.0/smftools/hmm/HMM.py +2292 -0
  54. smftools-0.3.0/smftools/hmm/__init__.py +24 -0
  55. {smftools-0.2.4 → smftools-0.3.0}/smftools/hmm/archived/apply_hmm_batched.py +2 -0
  56. {smftools-0.2.4 → smftools-0.3.0}/smftools/hmm/archived/calculate_distances.py +2 -0
  57. {smftools-0.2.4 → smftools-0.3.0}/smftools/hmm/archived/call_hmm_peaks.py +18 -1
  58. {smftools-0.2.4 → smftools-0.3.0}/smftools/hmm/archived/train_hmm.py +2 -0
  59. smftools-0.3.0/smftools/hmm/call_hmm_peaks.py +317 -0
  60. smftools-0.3.0/smftools/hmm/display_hmm.py +34 -0
  61. smftools-0.3.0/smftools/hmm/hmm_readwrite.py +30 -0
  62. smftools-0.3.0/smftools/hmm/nucleosome_hmm_refinement.py +194 -0
  63. smftools-0.3.0/smftools/informatics/__init__.py +62 -0
  64. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/bam_conversion.py +2 -0
  65. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/bam_direct.py +2 -0
  66. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/basecall_pod5s.py +2 -0
  67. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/basecalls_to_adata.py +2 -0
  68. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/conversion_smf.py +2 -0
  69. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/deaminase_smf.py +1 -0
  70. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/direct_smf.py +2 -0
  71. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/fast5_to_pod5.py +2 -0
  72. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/helpers/archived/__init__.py +2 -0
  73. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py +16 -1
  74. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/helpers/archived/aligned_BAM_to_bed.py +2 -0
  75. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/helpers/archived/bam_qc.py +14 -1
  76. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/helpers/archived/bed_to_bigwig.py +2 -0
  77. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/helpers/archived/canoncall.py +2 -0
  78. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/helpers/archived/concatenate_fastqs_to_bam.py +8 -1
  79. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/helpers/archived/converted_BAM_to_adata.py +2 -0
  80. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/helpers/archived/count_aligned_reads.py +2 -0
  81. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/helpers/archived/demux_and_index_BAM.py +2 -0
  82. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/helpers/archived/extract_base_identities.py +2 -0
  83. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/helpers/archived/extract_mods.py +2 -0
  84. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/helpers/archived/extract_read_features_from_bam.py +2 -0
  85. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/helpers/archived/extract_read_lengths_from_bed.py +2 -0
  86. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/helpers/archived/extract_readnames_from_BAM.py +2 -0
  87. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/helpers/archived/find_conversion_sites.py +2 -0
  88. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/helpers/archived/generate_converted_FASTA.py +2 -0
  89. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/helpers/archived/get_chromosome_lengths.py +2 -0
  90. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/helpers/archived/get_native_references.py +2 -0
  91. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/helpers/archived/index_fasta.py +2 -0
  92. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/helpers/archived/informatics.py +2 -0
  93. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/helpers/archived/load_adata.py +5 -3
  94. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/helpers/archived/make_modbed.py +2 -0
  95. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/helpers/archived/modQC.py +2 -0
  96. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/helpers/archived/modcall.py +2 -0
  97. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/helpers/archived/ohe_batching.py +2 -0
  98. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/helpers/archived/ohe_layers_decode.py +2 -0
  99. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/helpers/archived/one_hot_decode.py +2 -0
  100. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/helpers/archived/one_hot_encode.py +2 -0
  101. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/helpers/archived/plot_bed_histograms.py +5 -1
  102. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/helpers/archived/separate_bam_by_bc.py +2 -0
  103. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/helpers/archived/split_and_index_BAM.py +2 -0
  104. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/print_bam_query_seq.py +9 -1
  105. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/subsample_fasta_from_bed.py +2 -0
  106. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/archived/subsample_pod5.py +2 -0
  107. smftools-0.3.0/smftools/informatics/bam_functions.py +1601 -0
  108. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/basecalling.py +53 -9
  109. smftools-0.3.0/smftools/informatics/bed_functions.py +609 -0
  110. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/binarize_converted_base_identities.py +21 -7
  111. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/complement_base_list.py +9 -6
  112. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/converted_BAM_to_adata.py +324 -137
  113. smftools-0.3.0/smftools/informatics/fasta_functions.py +417 -0
  114. smftools-0.3.0/smftools/informatics/h5ad_functions.py +369 -0
  115. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/modkit_extract_to_adata.py +623 -274
  116. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/modkit_functions.py +87 -44
  117. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/ohe.py +46 -21
  118. {smftools-0.2.4 → smftools-0.3.0}/smftools/informatics/pod5_functions.py +114 -74
  119. smftools-0.3.0/smftools/informatics/run_multiqc.py +37 -0
  120. smftools-0.3.0/smftools/logging_utils.py +51 -0
  121. smftools-0.3.0/smftools/machine_learning/__init__.py +23 -0
  122. {smftools-0.2.4 → smftools-0.3.0}/smftools/machine_learning/data/__init__.py +2 -0
  123. {smftools-0.2.4 → smftools-0.3.0}/smftools/machine_learning/data/anndata_data_module.py +157 -50
  124. {smftools-0.2.4 → smftools-0.3.0}/smftools/machine_learning/data/preprocessing.py +4 -1
  125. smftools-0.3.0/smftools/machine_learning/evaluation/__init__.py +4 -0
  126. {smftools-0.2.4 → smftools-0.3.0}/smftools/machine_learning/evaluation/eval_utils.py +13 -14
  127. {smftools-0.2.4 → smftools-0.3.0}/smftools/machine_learning/evaluation/evaluators.py +52 -34
  128. {smftools-0.2.4 → smftools-0.3.0}/smftools/machine_learning/inference/__init__.py +3 -1
  129. {smftools-0.2.4 → smftools-0.3.0}/smftools/machine_learning/inference/inference_utils.py +9 -4
  130. {smftools-0.2.4 → smftools-0.3.0}/smftools/machine_learning/inference/lightning_inference.py +14 -13
  131. {smftools-0.2.4 → smftools-0.3.0}/smftools/machine_learning/inference/sklearn_inference.py +8 -8
  132. {smftools-0.2.4 → smftools-0.3.0}/smftools/machine_learning/inference/sliding_window_inference.py +37 -25
  133. {smftools-0.2.4 → smftools-0.3.0}/smftools/machine_learning/models/__init__.py +12 -5
  134. {smftools-0.2.4 → smftools-0.3.0}/smftools/machine_learning/models/base.py +34 -43
  135. {smftools-0.2.4 → smftools-0.3.0}/smftools/machine_learning/models/cnn.py +22 -13
  136. {smftools-0.2.4 → smftools-0.3.0}/smftools/machine_learning/models/lightning_base.py +78 -42
  137. {smftools-0.2.4 → smftools-0.3.0}/smftools/machine_learning/models/mlp.py +18 -5
  138. {smftools-0.2.4 → smftools-0.3.0}/smftools/machine_learning/models/positional.py +10 -4
  139. {smftools-0.2.4 → smftools-0.3.0}/smftools/machine_learning/models/rnn.py +8 -3
  140. {smftools-0.2.4 → smftools-0.3.0}/smftools/machine_learning/models/sklearn_models.py +46 -24
  141. {smftools-0.2.4 → smftools-0.3.0}/smftools/machine_learning/models/transformer.py +75 -55
  142. {smftools-0.2.4 → smftools-0.3.0}/smftools/machine_learning/models/wrappers.py +8 -3
  143. smftools-0.3.0/smftools/machine_learning/training/__init__.py +4 -0
  144. {smftools-0.2.4 → smftools-0.3.0}/smftools/machine_learning/training/train_lightning_model.py +42 -23
  145. {smftools-0.2.4 → smftools-0.3.0}/smftools/machine_learning/training/train_sklearn_model.py +11 -15
  146. smftools-0.3.0/smftools/machine_learning/utils/__init__.py +4 -0
  147. smftools-0.3.0/smftools/machine_learning/utils/device.py +17 -0
  148. {smftools-0.2.4 → smftools-0.3.0}/smftools/machine_learning/utils/grl.py +8 -2
  149. smftools-0.3.0/smftools/metadata.py +443 -0
  150. smftools-0.3.0/smftools/optional_imports.py +31 -0
  151. smftools-0.3.0/smftools/plotting/__init__.py +33 -0
  152. {smftools-0.2.4 → smftools-0.3.0}/smftools/plotting/autocorrelation_plotting.py +153 -48
  153. {smftools-0.2.4 → smftools-0.3.0}/smftools/plotting/classifiers.py +175 -73
  154. {smftools-0.2.4 → smftools-0.3.0}/smftools/plotting/general_plotting.py +350 -168
  155. {smftools-0.2.4 → smftools-0.3.0}/smftools/plotting/hmm_plotting.py +53 -14
  156. {smftools-0.2.4 → smftools-0.3.0}/smftools/plotting/position_stats.py +155 -87
  157. {smftools-0.2.4 → smftools-0.3.0}/smftools/plotting/qc_plotting.py +25 -12
  158. smftools-0.3.0/smftools/preprocessing/__init__.py +36 -0
  159. smftools-0.3.0/smftools/preprocessing/append_base_context.py +157 -0
  160. {smftools-0.2.4 → smftools-0.3.0}/smftools/preprocessing/append_binary_layer_by_base_context.py +75 -37
  161. {smftools-0.2.4/smftools/preprocessing/archives → smftools-0.3.0/smftools/preprocessing/archived}/add_read_length_and_mapping_qc.py +2 -0
  162. {smftools-0.2.4/smftools/preprocessing/archives → smftools-0.3.0/smftools/preprocessing/archived}/calculate_complexity.py +5 -1
  163. {smftools-0.2.4/smftools/preprocessing/archives → smftools-0.3.0/smftools/preprocessing/archived}/mark_duplicates.py +2 -0
  164. {smftools-0.2.4/smftools/preprocessing/archives → smftools-0.3.0/smftools/preprocessing/archived}/preprocessing.py +10 -6
  165. {smftools-0.2.4/smftools/preprocessing/archives → smftools-0.3.0/smftools/preprocessing/archived}/remove_duplicates.py +2 -0
  166. smftools-0.3.0/smftools/preprocessing/binarize.py +34 -0
  167. smftools-0.3.0/smftools/preprocessing/binarize_on_Youden.py +143 -0
  168. {smftools-0.2.4 → smftools-0.3.0}/smftools/preprocessing/binary_layers_to_ohe.py +18 -11
  169. {smftools-0.2.4 → smftools-0.3.0}/smftools/preprocessing/calculate_complexity_II.py +89 -59
  170. {smftools-0.2.4 → smftools-0.3.0}/smftools/preprocessing/calculate_consensus.py +28 -19
  171. smftools-0.3.0/smftools/preprocessing/calculate_coverage.py +76 -0
  172. {smftools-0.2.4 → smftools-0.3.0}/smftools/preprocessing/calculate_pairwise_differences.py +4 -1
  173. {smftools-0.2.4 → smftools-0.3.0}/smftools/preprocessing/calculate_pairwise_hamming_distances.py +7 -3
  174. smftools-0.3.0/smftools/preprocessing/calculate_position_Youden.py +186 -0
  175. {smftools-0.2.4 → smftools-0.3.0}/smftools/preprocessing/calculate_read_length_stats.py +52 -23
  176. smftools-0.3.0/smftools/preprocessing/calculate_read_modification_stats.py +135 -0
  177. smftools-0.3.0/smftools/preprocessing/clean_NaN.py +72 -0
  178. {smftools-0.2.4 → smftools-0.3.0}/smftools/preprocessing/filter_adata_by_nan_proportion.py +24 -12
  179. {smftools-0.2.4 → smftools-0.3.0}/smftools/preprocessing/filter_reads_on_length_quality_mapping.py +72 -37
  180. {smftools-0.2.4 → smftools-0.3.0}/smftools/preprocessing/filter_reads_on_modification_thresholds.py +183 -73
  181. {smftools-0.2.4 → smftools-0.3.0}/smftools/preprocessing/flag_duplicate_reads.py +708 -303
  182. smftools-0.3.0/smftools/preprocessing/invert_adata.py +52 -0
  183. smftools-0.3.0/smftools/preprocessing/load_sample_sheet.py +71 -0
  184. {smftools-0.2.4 → smftools-0.3.0}/smftools/preprocessing/make_dirs.py +9 -3
  185. {smftools-0.2.4 → smftools-0.3.0}/smftools/preprocessing/min_non_diagonal.py +4 -1
  186. {smftools-0.2.4 → smftools-0.3.0}/smftools/preprocessing/recipes.py +58 -23
  187. smftools-0.3.0/smftools/preprocessing/reindex_references_adata.py +103 -0
  188. {smftools-0.2.4 → smftools-0.3.0}/smftools/preprocessing/subsample_adata.py +33 -16
  189. {smftools-0.2.4 → smftools-0.3.0}/smftools/readwrite.py +264 -109
  190. smftools-0.3.0/smftools/schema/__init__.py +11 -0
  191. smftools-0.3.0/smftools/schema/anndata_schema_v1.yaml +227 -0
  192. smftools-0.3.0/smftools/tools/__init__.py +27 -0
  193. {smftools-0.2.4 → smftools-0.3.0}/smftools/tools/archived/apply_hmm.py +2 -0
  194. {smftools-0.2.4 → smftools-0.3.0}/smftools/tools/archived/classifiers.py +165 -0
  195. {smftools-0.2.4 → smftools-0.3.0}/smftools/tools/archived/classify_methylated_features.py +2 -0
  196. {smftools-0.2.4 → smftools-0.3.0}/smftools/tools/archived/classify_non_methylated_features.py +2 -0
  197. {smftools-0.2.4 → smftools-0.3.0}/smftools/tools/archived/subset_adata_v1.py +12 -1
  198. {smftools-0.2.4 → smftools-0.3.0}/smftools/tools/archived/subset_adata_v2.py +14 -1
  199. smftools-0.3.0/smftools/tools/calculate_umap.py +103 -0
  200. smftools-0.3.0/smftools/tools/cluster_adata_on_methylation.py +180 -0
  201. smftools-0.3.0/smftools/tools/general_tools.py +114 -0
  202. {smftools-0.2.4 → smftools-0.3.0}/smftools/tools/position_stats.py +220 -99
  203. {smftools-0.2.4 → smftools-0.3.0}/smftools/tools/read_stats.py +50 -29
  204. {smftools-0.2.4 → smftools-0.3.0}/smftools/tools/spatial_autocorrelation.py +365 -192
  205. smftools-0.3.0/smftools/tools/subset_adata.py +30 -0
  206. smftools-0.3.0/tests/__init__.py +8 -0
  207. smftools-0.3.0/tests/_test_inputs/_test_bed_I.bed +2 -0
  208. smftools-0.3.0/tests/_test_inputs/_test_fasta_I.fa +78 -0
  209. smftools-0.3.0/tests/_test_inputs/_test_fasta_I.fa.fai +2 -0
  210. smftools-0.3.0/tests/_test_inputs/_test_pod5_I.pod5 +0 -0
  211. smftools-0.3.0/tests/_test_inputs/test_experiment_config_conversion_I.csv +13 -0
  212. smftools-0.3.0/tests/_test_inputs/test_experiment_config_deaminase_I.csv +13 -0
  213. smftools-0.3.0/tests/_test_inputs/test_experiment_config_direct_I.csv +15 -0
  214. smftools-0.3.0/tests/e2e/cli/test_load_adata.py +29 -0
  215. smftools-0.3.0/tests/e2e/cli/test_spatial_adata.py +29 -0
  216. smftools-0.3.0/tests/integration/__init__.py +0 -0
  217. smftools-0.3.0/tests/smoke/__init__.py +0 -0
  218. smftools-0.3.0/tests/smoke/cli/test_cli_imports.py +20 -0
  219. smftools-0.3.0/tests/smoke/config/test_config_imports.py +17 -0
  220. smftools-0.3.0/tests/smoke/datasets/test_datasets_imports.py +16 -0
  221. smftools-0.3.0/tests/smoke/hmm/test_hmm_imports.py +20 -0
  222. smftools-0.3.0/tests/smoke/import_helpers.py +16 -0
  223. smftools-0.3.0/tests/smoke/informatics/test_informatics_imports.py +28 -0
  224. smftools-0.3.0/tests/smoke/machine_learning/data/test_data_imports.py +17 -0
  225. smftools-0.3.0/tests/smoke/machine_learning/evaluation/test_evaluation_imports.py +17 -0
  226. smftools-0.3.0/tests/smoke/machine_learning/inference/test_inference_imports.py +19 -0
  227. smftools-0.3.0/tests/smoke/machine_learning/models/test_models_imports.py +24 -0
  228. smftools-0.3.0/tests/smoke/machine_learning/training/test_training_imports.py +17 -0
  229. smftools-0.3.0/tests/smoke/machine_learning/utils/test_utils_imports.py +17 -0
  230. smftools-0.3.0/tests/smoke/plotting/test_plotting_imports.py +21 -0
  231. smftools-0.3.0/tests/smoke/preprocessing/test_preprocessing_imports.py +40 -0
  232. smftools-0.3.0/tests/smoke/test_smftools_imports.py +21 -0
  233. smftools-0.3.0/tests/smoke/tools/test_tools_imports.py +22 -0
  234. smftools-0.3.0/tests/unit/__init__.py +0 -0
  235. smftools-0.3.0/tests/unit/config/test_LoadExperimentConfig.py +20 -0
  236. smftools-0.3.0/tests/unit/informatics/test_tool_backends.py +168 -0
  237. smftools-0.3.0/tests/unit/test_metadata.py +53 -0
  238. smftools-0.3.0/tests/unit/test_readwrite.py +7 -0
  239. smftools-0.2.4/PKG-INFO +0 -141
  240. smftools-0.2.4/README.md +0 -47
  241. smftools-0.2.4/docs/source/api/datasets.md +0 -9
  242. smftools-0.2.4/docs/source/api/informatics.md +0 -27
  243. smftools-0.2.4/docs/source/api/preprocessing.md +0 -14
  244. smftools-0.2.4/docs/source/api/tools.md +0 -9
  245. smftools-0.2.4/docs/source/conf.py +0 -117
  246. smftools-0.2.4/docs/source/release-notes/0.1.0.md +0 -4
  247. smftools-0.2.4/docs/source/release-notes/index.md +0 -8
  248. smftools-0.2.4/docs/source/requirements.txt +0 -14
  249. smftools-0.2.4/docs/source/tutorials/index.md +0 -3
  250. smftools-0.2.4/smftools/__init__.py +0 -30
  251. smftools-0.2.4/smftools/_version.py +0 -1
  252. smftools-0.2.4/smftools/cli/hmm_adata.py +0 -361
  253. smftools-0.2.4/smftools/config/__init__.py +0 -1
  254. smftools-0.2.4/smftools/datasets/__init__.py +0 -9
  255. smftools-0.2.4/smftools/datasets/datasets.py +0 -28
  256. smftools-0.2.4/smftools/hmm/HMM.py +0 -1587
  257. smftools-0.2.4/smftools/hmm/__init__.py +0 -14
  258. smftools-0.2.4/smftools/hmm/call_hmm_peaks.py +0 -334
  259. smftools-0.2.4/smftools/hmm/display_hmm.py +0 -18
  260. smftools-0.2.4/smftools/hmm/hmm_readwrite.py +0 -16
  261. smftools-0.2.4/smftools/hmm/nucleosome_hmm_refinement.py +0 -104
  262. smftools-0.2.4/smftools/informatics/__init__.py +0 -20
  263. smftools-0.2.4/smftools/informatics/bam_functions.py +0 -811
  264. smftools-0.2.4/smftools/informatics/bed_functions.py +0 -366
  265. smftools-0.2.4/smftools/informatics/fasta_functions.py +0 -255
  266. smftools-0.2.4/smftools/informatics/h5ad_functions.py +0 -197
  267. smftools-0.2.4/smftools/informatics/run_multiqc.py +0 -31
  268. smftools-0.2.4/smftools/machine_learning/__init__.py +0 -12
  269. smftools-0.2.4/smftools/machine_learning/evaluation/__init__.py +0 -2
  270. smftools-0.2.4/smftools/machine_learning/training/__init__.py +0 -2
  271. smftools-0.2.4/smftools/machine_learning/utils/__init__.py +0 -2
  272. smftools-0.2.4/smftools/machine_learning/utils/device.py +0 -10
  273. smftools-0.2.4/smftools/plotting/__init__.py +0 -18
  274. smftools-0.2.4/smftools/preprocessing/__init__.py +0 -38
  275. smftools-0.2.4/smftools/preprocessing/append_base_context.py +0 -131
  276. smftools-0.2.4/smftools/preprocessing/binarize.py +0 -17
  277. smftools-0.2.4/smftools/preprocessing/binarize_on_Youden.py +0 -47
  278. smftools-0.2.4/smftools/preprocessing/calculate_coverage.py +0 -54
  279. smftools-0.2.4/smftools/preprocessing/calculate_position_Youden.py +0 -131
  280. smftools-0.2.4/smftools/preprocessing/calculate_read_modification_stats.py +0 -101
  281. smftools-0.2.4/smftools/preprocessing/clean_NaN.py +0 -62
  282. smftools-0.2.4/smftools/preprocessing/invert_adata.py +0 -37
  283. smftools-0.2.4/smftools/preprocessing/load_sample_sheet.py +0 -53
  284. smftools-0.2.4/smftools/preprocessing/reindex_references_adata.py +0 -37
  285. smftools-0.2.4/smftools/tools/__init__.py +0 -20
  286. smftools-0.2.4/smftools/tools/calculate_umap.py +0 -62
  287. smftools-0.2.4/smftools/tools/cluster_adata_on_methylation.py +0 -105
  288. smftools-0.2.4/smftools/tools/general_tools.py +0 -69
  289. smftools-0.2.4/smftools/tools/subset_adata.py +0 -28
  290. smftools-0.2.4/tests/informatics/helpers/test_LoadExperimentConfig.py +0 -17
  291. smftools-0.2.4/tests/test_readwrite.py +0 -12
  292. {smftools-0.2.4 → smftools-0.3.0}/.gitattributes +0 -0
  293. {smftools-0.2.4 → smftools-0.3.0}/CONTRIBUTING.md +0 -0
  294. {smftools-0.2.4 → smftools-0.3.0}/LICENSE +0 -0
  295. {smftools-0.2.4 → smftools-0.3.0}/docs/Makefile +0 -0
  296. {smftools-0.2.4 → smftools-0.3.0}/docs/make.bat +0 -0
  297. {smftools-0.2.4 → smftools-0.3.0}/docs/source/_static/converted_BAM_to_adata.png +0 -0
  298. {smftools-0.2.4 → smftools-0.3.0}/docs/source/_static/modkit_extract_to_adata.png +0 -0
  299. {smftools-0.2.4 → smftools-0.3.0}/docs/source/_static/smftools-1.svg +0 -0
  300. {smftools-0.2.4 → smftools-0.3.0}/docs/source/_static/smftools-1.tif +0 -0
  301. {smftools-0.2.4 → smftools-0.3.0}/docs/source/_static/smftools_informatics_diagram.pdf +0 -0
  302. {smftools-0.2.4 → smftools-0.3.0}/docs/source/_static/smftools_informatics_diagram.png +0 -0
  303. {smftools-0.2.4 → smftools-0.3.0}/docs/source/_static/smftools_preprocessing_diagram.png +0 -0
  304. {smftools-0.2.4 → smftools-0.3.0}/docs/source/_templates/tmp +0 -0
  305. {smftools-0.2.4 → smftools-0.3.0}/docs/source/api/index.md +0 -0
  306. {smftools-0.2.4 → smftools-0.3.0}/docs/source/dev/index.md +0 -0
  307. {smftools-0.2.4 → smftools-0.3.0}/docs/source/references.bib +0 -0
  308. {smftools-0.2.4 → smftools-0.3.0}/docs/source/references.rst +0 -0
  309. {smftools-0.2.4 → smftools-0.3.0}/experiment_config.csv +0 -0
  310. {smftools-0.2.4 → smftools-0.3.0}/notebooks/Kissiov_and_McKenna_2025_example_notebook.ipynb +0 -0
  311. {smftools-0.2.4 → smftools-0.3.0}/notebooks/Kissiov_and_McKenna_2025_sample_sheet.csv +0 -0
  312. {smftools-0.2.4 → smftools-0.3.0}/sample_sheet.csv +0 -0
  313. {smftools-0.2.4 → smftools-0.3.0}/smftools/datasets/F1_hybrid_NKG2A_enhander_promoter_GpC_conversion_SMF.h5ad.gz +0 -0
  314. {smftools-0.2.4 → smftools-0.3.0}/smftools/datasets/F1_sample_sheet.csv +0 -0
  315. {smftools-0.2.4 → smftools-0.3.0}/smftools/datasets/dCas9_m6A_invitro_kinetics.h5ad.gz +0 -0
  316. {smftools-0.2.4/smftools/cli → smftools-0.3.0/tests/e2e}/__init__.py +0 -0
  317. {smftools-0.2.4/tests → smftools-0.3.0/tests/unit}/datasets/test_datasets.py +0 -0
@@ -0,0 +1,118 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: ["main", "0.3.0"]
6
+ pull_request:
7
+ branches: ["main", "0.3.0"]
8
+
9
+ concurrency:
10
+ group: ${{ github.workflow }}-${{ github.ref }}
11
+ cancel-in-progress: true
12
+
13
+ jobs:
14
+ format:
15
+ runs-on: ubuntu-latest
16
+ strategy:
17
+ fail-fast: false
18
+ matrix:
19
+ python-version: ["3.12"]
20
+ steps:
21
+ - uses: actions/checkout@v4
22
+ - uses: actions/setup-python@v5
23
+ with:
24
+ python-version: ${{ matrix.python-version }}
25
+ cache: pip
26
+ - name: Install dependencies
27
+ run: |
28
+ python -m pip install --upgrade pip
29
+ python -m pip install "ruff==0.8.6"
30
+ - name: Check formatting with ruff
31
+ run: ruff format --check .
32
+
33
+ lint:
34
+ runs-on: ubuntu-latest
35
+ strategy:
36
+ fail-fast: false
37
+ matrix:
38
+ python-version: ["3.12"]
39
+ steps:
40
+ - uses: actions/checkout@v4
41
+ - uses: actions/setup-python@v5
42
+ with:
43
+ python-version: ${{ matrix.python-version }}
44
+ cache: pip
45
+ - name: Install dependencies
46
+ run: |
47
+ python -m pip install --upgrade pip
48
+ python -m pip install "ruff==0.8.6"
49
+ - name: Lint with ruff
50
+ run: ruff check --output-format=github .
51
+
52
+ smoke:
53
+ runs-on: ubuntu-latest
54
+ strategy:
55
+ fail-fast: false
56
+ matrix:
57
+ python-version: ["3.10", "3.12"]
58
+ steps:
59
+ - uses: actions/checkout@v4
60
+ - uses: actions/setup-python@v5
61
+ with:
62
+ python-version: ${{ matrix.python-version }}
63
+ cache: pip
64
+ - name: Install system deps for pysam
65
+ run: |
66
+ sudo apt-get update
67
+ sudo apt-get install -y \
68
+ build-essential \
69
+ zlib1g-dev \
70
+ libbz2-dev \
71
+ liblzma-dev \
72
+ libcurl4-openssl-dev \
73
+ pkg-config
74
+ - name: Install dependencies
75
+ run: |
76
+ python -m pip install --upgrade pip
77
+ python -m pip install .[dev]
78
+ - name: Run smoke tests
79
+ run: pytest -m smoke -q
80
+
81
+ docs:
82
+ runs-on: ubuntu-latest
83
+ strategy:
84
+ matrix:
85
+ python-version: ["3.12"]
86
+ steps:
87
+ - uses: actions/checkout@v4
88
+ - uses: actions/setup-python@v5
89
+ with:
90
+ python-version: ${{ matrix.python-version }}
91
+ cache: pip
92
+ - name: Install docs deps
93
+ run: |
94
+ python -m pip install --upgrade pip
95
+ python -m pip install .[docs]
96
+ - name: Build docs
97
+ run: sphinx-build -W -b html docs/source docs/_build/html
98
+
99
+ build:
100
+ runs-on: ubuntu-latest
101
+ strategy:
102
+ fail-fast: false
103
+ matrix:
104
+ python-version: ["3.10", "3.11", "3.12"]
105
+ steps:
106
+ - uses: actions/checkout@v4
107
+ - uses: actions/setup-python@v5
108
+ with:
109
+ python-version: ${{ matrix.python-version }}
110
+ cache: pip
111
+ - name: Install build tooling
112
+ run: |
113
+ python -m pip install --upgrade pip
114
+ python -m pip install build twine
115
+ - name: Build package
116
+ run: python -m build
117
+ - name: Check dist metadata
118
+ run: twine check dist/*
@@ -20,8 +20,10 @@ venvs/
20
20
  /environment.yml
21
21
 
22
22
  # Tests
23
- /tests/_test_inputs/
23
+ /tests/_test_inputs/dorado_models
24
24
  /tests/_test_outputs/
25
+ /tests/_test_outputs*/
26
+
25
27
 
26
28
  # OS
27
29
  .DS_Store
@@ -0,0 +1,7 @@
1
+ repos:
2
+ - repo: https://github.com/astral-sh/ruff-pre-commit
3
+ rev: v0.6.8
4
+ hooks:
5
+ - id: ruff
6
+ args: ["--fix"]
7
+ - id: ruff-format
@@ -1,6 +1,6 @@
1
1
  version: 2
2
2
  build:
3
- os: ubuntu-20.04
3
+ os: ubuntu-22.04
4
4
  tools:
5
5
  python: "3.12"
6
6
  sphinx:
@@ -10,8 +10,7 @@ python:
10
10
  install:
11
11
  - method: pip
12
12
  path: .
13
- extra_requirements:
14
- - docs
13
+ - requirements: docs/source/requirements.txt
15
14
  submodules:
16
15
  include: all
17
16
  recursive: true
@@ -0,0 +1,69 @@
1
+ # AGENTS.md
2
+
3
+ This file tells coding agents (including OpenAI Codex) how to work in this repo.
4
+
5
+ ## Goals
6
+ - Make minimal, correct changes.
7
+ - Prefer small PRs / diffs.
8
+ - Keep behavior stable unless the task explicitly requests changes.
9
+
10
+ ## Repo orientation
11
+ - Read existing patterns before inventing new ones.
12
+ - Don’t refactor broadly unless asked.
13
+ - If you’re unsure about intended behavior, look for tests/docs first.
14
+ - Ignore all files in any directory named "archived".
15
+
16
+ ## Project dependencies
17
+ - A core set of dependencies is required for the project.
18
+ - Various optional dependencies are provided for:
19
+ - Optional functional modules of the package (ont, plotting, ml-base, ml-extended, scanpy, qc)
20
+ - If a Python version of a CLI tool is preferred (Such as for Samtools, Bedtools, BedGraphToBigWig).
21
+ - For potential performance boosts in computation (torch)
22
+ - All dependencies can be installed with `pip install -e ".[all]"`
23
+
24
+ ## Setup
25
+ - Create env (pick one):
26
+ - `python -m venv .venv && source .venv/bin/activate`
27
+ - or `conda env create -f environment.yml && conda activate <env>`
28
+ - Install:
29
+ - `pip install -e ".[dev]"`
30
+
31
+ ## How to run checks
32
+ - Smoke tests: `pytest -m smoke -q`
33
+ - Unit tests: `pytest -m unit -q`
34
+ - Integration tests: `pytest -m integration -q`
35
+ - E2E tests: `pytest -m e2e -q`
36
+ - Coverage (if configured): `pytest --cov`
37
+ - Lint: `ruff check .`
38
+ - Format: `ruff format .`
39
+ - Type-check (if configured): `mypy .`
40
+
41
+ ## Coding conventions
42
+ - Follow existing style and module layout.
43
+ - Prefer clear, explicit code over cleverness.
44
+ - Add/adjust tests for bug fixes and new behavior.
45
+ - Keep public APIs backward compatible unless explicitly changing them.
46
+ - Python:
47
+ - Use type hints for new/modified functions where reasonable.
48
+ - Use Google style docstring format.
49
+ - Avoid heavy dependencies unless necessary.
50
+ - Use typing.TYPE_CHECKING and annotations.
51
+
52
+ ## Testing expectations
53
+ - New functionality must include tests.
54
+ - Bug fix PRs should include a regression test.
55
+ - If tests are flaky or slow, note it and scope the change.
56
+
57
+ ## Logging & secrets
58
+ - Don’t log secrets, tokens, or PII.
59
+ - Never hardcode credentials.
60
+ - If sample keys are needed, use obvious placeholders like `YOUR_API_KEY_HERE`.
61
+
62
+ ## Git / PR hygiene
63
+ - Keep commits focused.
64
+ - Update docs/changelog if behavior or user-facing CLI changes.
65
+ - If you change a CLI flag or config schema, add a migration note.
66
+
67
+ ## If something fails
68
+ - If a command fails, paste the full error and summarize likely causes.
69
+ - Don’t “fix” by deleting tests or weakening assertions unless explicitly instructed.
@@ -0,0 +1,147 @@
1
+ Metadata-Version: 2.3
2
+ Name: smftools
3
+ Version: 0.3.0
4
+ Summary: Single Molecule Footprinting Analysis in Python.
5
+ Project-URL: Source, https://github.com/jkmckenna/smftools
6
+ Project-URL: Documentation, https://smftools.readthedocs.io/
7
+ Author: Joseph McKenna
8
+ Maintainer-email: Joseph McKenna <jkmckenna@berkeley.edu>
9
+ License: MIT License
10
+
11
+ Copyright (c) 2024 jkmckenna
12
+
13
+ Permission is hereby granted, free of charge, to any person obtaining a copy
14
+ of this software and associated documentation files (the "Software"), to deal
15
+ in the Software without restriction, including without limitation the rights
16
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17
+ copies of the Software, and to permit persons to whom the Software is
18
+ furnished to do so, subject to the following conditions:
19
+
20
+ The above copyright notice and this permission notice shall be included in all
21
+ copies or substantial portions of the Software.
22
+
23
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29
+ SOFTWARE.
30
+ License-File: LICENSE
31
+ Keywords: anndata,chromatin-accessibility,machine-learning,nanopore,protein-dna-binding,single-locus,single-molecule-footprinting
32
+ Classifier: Development Status :: 3 - Alpha
33
+ Classifier: Environment :: Console
34
+ Classifier: Intended Audience :: Developers
35
+ Classifier: Intended Audience :: Science/Research
36
+ Classifier: License :: OSI Approved :: MIT License
37
+ Classifier: Natural Language :: English
38
+ Classifier: Operating System :: MacOS :: MacOS X
39
+ Classifier: Programming Language :: Python :: 3
40
+ Classifier: Programming Language :: Python :: 3.10
41
+ Classifier: Programming Language :: Python :: 3.11
42
+ Classifier: Programming Language :: Python :: 3.12
43
+ Classifier: Programming Language :: Python :: 3.13
44
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
45
+ Classifier: Topic :: Scientific/Engineering :: Visualization
46
+ Requires-Python: >=3.10
47
+ Requires-Dist: anndata>=0.10.0
48
+ Requires-Dist: biopython>=1.79
49
+ Requires-Dist: click
50
+ Requires-Dist: numpy<2,>=1.22.0
51
+ Requires-Dist: pandas>=1.4.2
52
+ Requires-Dist: scipy>=1.7.3
53
+ Requires-Dist: tqdm
54
+ Provides-Extra: all
55
+ Requires-Dist: captum; extra == 'all'
56
+ Requires-Dist: fastcluster; extra == 'all'
57
+ Requires-Dist: hydra-core; extra == 'all'
58
+ Requires-Dist: igraph; extra == 'all'
59
+ Requires-Dist: leidenalg; extra == 'all'
60
+ Requires-Dist: lightning; extra == 'all'
61
+ Requires-Dist: matplotlib>=3.6.2; extra == 'all'
62
+ Requires-Dist: multiqc; extra == 'all'
63
+ Requires-Dist: networkx>=3.2; extra == 'all'
64
+ Requires-Dist: omegaconf; extra == 'all'
65
+ Requires-Dist: pod5>=0.1.21; extra == 'all'
66
+ Requires-Dist: pybedtools>=0.12.0; extra == 'all'
67
+ Requires-Dist: pybigwig>=0.3.24; extra == 'all'
68
+ Requires-Dist: pysam>=0.19.1; extra == 'all'
69
+ Requires-Dist: scanpy>=1.9; extra == 'all'
70
+ Requires-Dist: scikit-learn>=1.0.2; extra == 'all'
71
+ Requires-Dist: seaborn>=0.11; extra == 'all'
72
+ Requires-Dist: shap; extra == 'all'
73
+ Requires-Dist: torch>=1.9.0; extra == 'all'
74
+ Requires-Dist: upsetplot; extra == 'all'
75
+ Requires-Dist: wandb; extra == 'all'
76
+ Provides-Extra: cluster
77
+ Requires-Dist: fastcluster; extra == 'cluster'
78
+ Requires-Dist: leidenalg; extra == 'cluster'
79
+ Provides-Extra: dev
80
+ Requires-Dist: pre-commit; extra == 'dev'
81
+ Requires-Dist: pytest; extra == 'dev'
82
+ Requires-Dist: pytest-cov; extra == 'dev'
83
+ Requires-Dist: ruff; extra == 'dev'
84
+ Provides-Extra: docs
85
+ Requires-Dist: ipython>=7.20; extra == 'docs'
86
+ Requires-Dist: matplotlib!=3.6.1; extra == 'docs'
87
+ Requires-Dist: myst-nb<2,>=1; extra == 'docs'
88
+ Requires-Dist: myst-parser<3,>=2; extra == 'docs'
89
+ Requires-Dist: nbsphinx>=0.9; extra == 'docs'
90
+ Requires-Dist: pyyaml; extra == 'docs'
91
+ Requires-Dist: readthedocs-sphinx-search; extra == 'docs'
92
+ Requires-Dist: setuptools; extra == 'docs'
93
+ Requires-Dist: sphinx-autodoc-typehints<4,>=1.25.2; extra == 'docs'
94
+ Requires-Dist: sphinx-book-theme<2,>=1.1; extra == 'docs'
95
+ Requires-Dist: sphinx-click<7,>=5; extra == 'docs'
96
+ Requires-Dist: sphinx-copybutton<0.6,>=0.5; extra == 'docs'
97
+ Requires-Dist: sphinx-design; extra == 'docs'
98
+ Requires-Dist: sphinx<8,>=7; extra == 'docs'
99
+ Requires-Dist: sphinxcontrib-bibtex<3,>=2; extra == 'docs'
100
+ Requires-Dist: sphinxext-opengraph<0.10,>=0.9; extra == 'docs'
101
+ Provides-Extra: misc
102
+ Requires-Dist: networkx>=3.2; extra == 'misc'
103
+ Requires-Dist: upsetplot; extra == 'misc'
104
+ Provides-Extra: ml-base
105
+ Requires-Dist: scikit-learn>=1.0.2; extra == 'ml-base'
106
+ Requires-Dist: torch>=1.9.0; extra == 'ml-base'
107
+ Provides-Extra: ml-extended
108
+ Requires-Dist: captum; extra == 'ml-extended'
109
+ Requires-Dist: hydra-core; extra == 'ml-extended'
110
+ Requires-Dist: lightning; extra == 'ml-extended'
111
+ Requires-Dist: omegaconf; extra == 'ml-extended'
112
+ Requires-Dist: shap; extra == 'ml-extended'
113
+ Requires-Dist: wandb; extra == 'ml-extended'
114
+ Provides-Extra: ont
115
+ Requires-Dist: pod5>=0.1.21; extra == 'ont'
116
+ Provides-Extra: plotting
117
+ Requires-Dist: matplotlib>=3.6.2; extra == 'plotting'
118
+ Requires-Dist: seaborn>=0.11; extra == 'plotting'
119
+ Provides-Extra: pybedtools
120
+ Requires-Dist: pybedtools>=0.12.0; extra == 'pybedtools'
121
+ Provides-Extra: pybigwig
122
+ Requires-Dist: pybigwig>=0.3.24; extra == 'pybigwig'
123
+ Provides-Extra: pysam
124
+ Requires-Dist: pysam>=0.19.1; extra == 'pysam'
125
+ Provides-Extra: qc
126
+ Requires-Dist: multiqc; extra == 'qc'
127
+ Provides-Extra: scanpy
128
+ Requires-Dist: igraph; extra == 'scanpy'
129
+ Requires-Dist: scanpy>=1.9; extra == 'scanpy'
130
+ Provides-Extra: torch
131
+ Requires-Dist: torch>=1.9.0; extra == 'torch'
132
+ Description-Content-Type: text/markdown
133
+
134
+ [![PyPI](https://img.shields.io/pypi/v/smftools.svg)](https://pypi.org/project/smftools)
135
+ [![Docs](https://readthedocs.org/projects/smftools/badge/?version=latest)](https://smftools.readthedocs.io/en/latest/?badge=latest)
136
+
137
+ # smftools
138
+ A Python tool for processing raw sequencing data derived from single molecule footprinting experiments into [anndata](https://anndata.readthedocs.io/en/latest/) objects. Additional functionality for preprocessing, spatial analyses, and HMM based feature annotation.
139
+
140
+ ## Philosophy
141
+ While genomic data structures (SAM/BAM) were built to handle low-coverage data (<1000X) along large references, smftools prioritizes high-coverage data (scalable to >1,000,000X coverage) of a few genomic loci at a time. This enables efficient data storage, rapid data operations, hierarchical metadata handling, seamless integration with various machine-learning packages, and ease of visualization. Furthermore, functionality is modularized, enabling analysis sessions to be saved, reloaded, and easily shared with collaborators. Analyses are centered around the [anndata](https://anndata.readthedocs.io/en/latest/) object, and are heavily inspired by the work conducted within the single-cell genomics community.
142
+
143
+ ## Dependencies
144
+ The following CLI tools need to be installed and configured before using the informatics (smftools.inform) module of smftools, which is used by the smftools load CLI command:
145
+ 1) [Dorado](https://github.com/nanoporetech/dorado) -> Basecalling, alignment, demultiplexing. Required for Nanopore SMF experiments, but not Illumina SMF experiments.
146
+ 2) [Minimap2](https://github.com/lh3/minimap2) -> Aligner if not using dorado. Support for other aligners could eventually be added if needed.
147
+ 3) [Modkit](https://github.com/nanoporetech/modkit) -> Extracting read level methylation metrics from the MM/ML tags in BAM files. Only required for direct modification detection SMF protocols.
@@ -0,0 +1,14 @@
1
+ [![PyPI](https://img.shields.io/pypi/v/smftools.svg)](https://pypi.org/project/smftools)
2
+ [![Docs](https://readthedocs.org/projects/smftools/badge/?version=latest)](https://smftools.readthedocs.io/en/latest/?badge=latest)
3
+
4
+ # smftools
5
+ A Python tool for processing raw sequencing data derived from single molecule footprinting experiments into [anndata](https://anndata.readthedocs.io/en/latest/) objects. Additional functionality for preprocessing, spatial analyses, and HMM based feature annotation.
6
+
7
+ ## Philosophy
8
+ While genomic data structures (SAM/BAM) were built to handle low-coverage data (<1000X) along large references, smftools prioritizes high-coverage data (scalable to >1,000,000X coverage) of a few genomic loci at a time. This enables efficient data storage, rapid data operations, hierarchical metadata handling, seamless integration with various machine-learning packages, and ease of visualization. Furthermore, functionality is modularized, enabling analysis sessions to be saved, reloaded, and easily shared with collaborators. Analyses are centered around the [anndata](https://anndata.readthedocs.io/en/latest/) object, and are heavily inspired by the work conducted within the single-cell genomics community.
9
+
10
+ ## Dependencies
11
+ The following CLI tools need to be installed and configured before using the informatics (smftools.inform) module of smftools, which is used by the smftools load CLI command:
12
+ 1) [Dorado](https://github.com/nanoporetech/dorado) -> Basecalling, alignment, demultiplexing. Required for Nanopore SMF experiments, but not Illumina SMF experiments.
13
+ 2) [Minimap2](https://github.com/lh3/minimap2) -> Aligner if not using dorado. Support for other aligners could eventually be added if needed.
14
+ 3) [Modkit](https://github.com/nanoporetech/modkit) -> Extracting read level methylation metrics from the MM/ML tags in BAM files. Only required for direct modification detection SMF protocols.
@@ -0,0 +1,14 @@
1
+ ## Datasets:
2
+
3
+ ```{eval-rst}
4
+ .. autosummary::
5
+ :toctree: generated/datasets
6
+
7
+ smftools.datasets.datasets
8
+ ```
9
+
10
+ ```{eval-rst}
11
+ .. automodule:: smftools.datasets
12
+ :no-members:
13
+ :show-inheritance:
14
+ ```
@@ -0,0 +1,44 @@
1
+ ## Informatics: `inform`
2
+
3
+ ## Informatics module diagram
4
+ ```{image} ../_static/smftools_informatics_diagram.png
5
+ :width: 1000px
6
+ ```
7
+
8
+ Processes raw sequencing data to load an adata object.
9
+
10
+ ```{eval-rst}
11
+ .. autosummary::
12
+ :toctree: generated/informatics
13
+
14
+ smftools.informatics.bam_functions
15
+ smftools.informatics.basecalling
16
+ smftools.informatics.bed_functions
17
+ smftools.informatics.binarize_converted_base_identities
18
+ smftools.informatics.complement_base_list
19
+ smftools.informatics.converted_BAM_to_adata
20
+ smftools.informatics.fasta_functions
21
+ smftools.informatics.h5ad_functions
22
+ smftools.informatics.modkit_extract_to_adata
23
+ smftools.informatics.modkit_functions
24
+ smftools.informatics.ohe
25
+ smftools.informatics.pod5_functions
26
+ smftools.informatics.run_multiqc
27
+ ```
28
+
29
+ ```{eval-rst}
30
+ .. automodule:: smftools.informatics
31
+ :no-members:
32
+ :show-inheritance:
33
+ ```
34
+
35
+
36
+ ### Diagram of final steps of Direct SMF workflow
37
+ ```{image} ../_static/modkit_extract_to_adata.png
38
+ :width: 1000px
39
+ ```
40
+
41
+ ### Diagram of final steps of Conversion SMF workflow
42
+ ```{image} ../_static/converted_BAM_to_adata.png
43
+ :width: 1000px
44
+ ```
@@ -0,0 +1,43 @@
1
+ ## Preprocessing: `pp`
2
+
3
+ ## Preprocessing module diagram
4
+ ```{image} ../_static/smftools_preprocessing_diagram.png
5
+ :width: 1000px
6
+ ```
7
+
8
+ ```{eval-rst}
9
+ .. autosummary::
10
+ :toctree: generated/preprocessing
11
+
12
+ smftools.preprocessing.append_base_context
13
+ smftools.preprocessing.append_binary_layer_by_base_context
14
+ smftools.preprocessing.binarize
15
+ smftools.preprocessing.binarize_on_Youden
16
+ smftools.preprocessing.binary_layers_to_ohe
17
+ smftools.preprocessing.calculate_complexity_II
18
+ smftools.preprocessing.calculate_consensus
19
+ smftools.preprocessing.calculate_coverage
20
+ smftools.preprocessing.calculate_pairwise_differences
21
+ smftools.preprocessing.calculate_pairwise_hamming_distances
22
+ smftools.preprocessing.calculate_position_Youden
23
+ smftools.preprocessing.calculate_read_length_stats
24
+ smftools.preprocessing.calculate_read_modification_stats
25
+ smftools.preprocessing.clean_NaN
26
+ smftools.preprocessing.filter_adata_by_nan_proportion
27
+ smftools.preprocessing.filter_reads_on_length_quality_mapping
28
+ smftools.preprocessing.filter_reads_on_modification_thresholds
29
+ smftools.preprocessing.flag_duplicate_reads
30
+ smftools.preprocessing.invert_adata
31
+ smftools.preprocessing.load_sample_sheet
32
+ smftools.preprocessing.make_dirs
33
+ smftools.preprocessing.min_non_diagonal
34
+ smftools.preprocessing.recipes
35
+ smftools.preprocessing.reindex_references_adata
36
+ smftools.preprocessing.subsample_adata
37
+ ```
38
+
39
+ ```{eval-rst}
40
+ .. automodule:: smftools.preprocessing
41
+ :no-members:
42
+ :show-inheritance:
43
+ ```
@@ -0,0 +1,20 @@
1
+ ## Tools: `tl`
2
+
3
+ ```{eval-rst}
4
+ .. autosummary::
5
+ :toctree: generated/tools
6
+
7
+ smftools.tools.calculate_umap
8
+ smftools.tools.cluster_adata_on_methylation
9
+ smftools.tools.general_tools
10
+ smftools.tools.position_stats
11
+ smftools.tools.read_stats
12
+ smftools.tools.spatial_autocorrelation
13
+ smftools.tools.subset_adata
14
+ ```
15
+
16
+ ```{eval-rst}
17
+ .. automodule:: smftools.tools
18
+ :no-members:
19
+ :show-inheritance:
20
+ ```
@@ -13,6 +13,8 @@ This command takes a user passed config file handling:
13
13
  - Experiment info (SMF modality, sequencer type, barcoding kit if nanopore, sample sheet with metadata mapping)
14
14
  - Options to override default workflow parameters from smftools/config. Params are handled from default.yaml -> modality_type.yaml -> user passed config.csv.
15
15
 
16
+ ![](_static/smftools_informatics_diagram.png)
17
+
16
18
  ## Preprocess Usage
17
19
 
18
20
  This command performs preprocessing on the anndata object. It automatically runs the load command under the hood if starting from raw data.
@@ -21,6 +23,8 @@ This command performs preprocessing on the anndata object. It automatically runs
21
23
  smftools preprocess "/Path_to_experiment_config.csv"
22
24
  ```
23
25
 
26
+ ![](_static/smftools_preprocessing_diagram.png)
27
+
24
28
  ## Spatial Usage
25
29
 
26
30
  This command performs spatial analysis on the anndata object. It automatically runs the load command and preprocessing under the hood if they have not been already run.
@@ -29,6 +33,8 @@ This command performs spatial analysis on the anndata object. It automatically r
29
33
  smftools spatial "/Path_to_experiment_config.csv"
30
34
  ```
31
35
 
36
+ - Currently Includes: Position X Position correlation matrices, clustering, dimensionality reduction, spatial autocorrelation.
37
+
32
38
  ## HMM Usage
33
39
 
34
40
  This command performs hmm based feature annotation on the anndata object. It automatically runs the load command and preprocessing under the hood if they have not been already run.
@@ -37,6 +43,8 @@ This command performs hmm based feature annotation on the anndata object. It aut
37
43
  smftools hmm "/Path_to_experiment_config.csv"
38
44
  ```
39
45
 
46
+ - Main outputs wills be stored in adata.layers
47
+
40
48
  ## Batch Usage
41
49
 
42
50
  This command performs batch processing of any of the above commands across multiple experiments. It takes in a tsv, txt, or csv of experiment specific config csvs.
@@ -44,11 +52,38 @@ This command performs batch processing of any of the above commands across multi
44
52
  smftools batch preprocess "/Path_to_experiment_config_path_list.csv"
45
53
  ```
46
54
 
55
+ - Nice when analyzing multiple experiments
56
+
47
57
  ## Concatenate Usage
48
58
 
49
59
  This command concatenates multiple h5ad files and saves them to a new output. The h5ads to concatenate are provided as a txt, tsv, or h5ad file of paths.
50
60
  ```shell
51
- smftools concatenate output.h5ad "/Path_to_h5ad_path_list.csv"
61
+ smftools concatenate output.h5ad -c "/Path_to_h5ad_path_list.csv"
62
+ ```
63
+
64
+ Alternatively, you can just concatenate all h5ads within a given directory.
65
+ ```shell
66
+ smftools concatenate output.h5ad -d "/Path_to_h5ad_file_dir/"
67
+ ```
68
+
69
+ - Mainly used for combining multiple experiments into a single anndata object.
70
+
71
+ ## Subsample POD5 Usage
72
+
73
+ This command subsamples a POD5 file or a directory of POD5 files. It can be done by passing a txt file of read names to use, or an integer number of reads.
74
+ ```shell
75
+ smftools subsample-pod5 -r "/Path_to_read_name_list.txt" -o "/Path_to_output_directory" "/Path_to_input_POD5_dir_or_file"
76
+ ```
77
+
78
+ ```shell
79
+ smftools subsample-pod5 -n 1000 -o "/Path_to_output_directory" "/Path_to_input_POD5_dir_or_file"
80
+ ```
81
+
82
+ ## Optional run logging
83
+
84
+ If you want to maintain run log files of CLI processes, you can use the following syntax to any of the CLI commands. Here is an example using smftools load with logging performed on INFO level logging outputs and above.
85
+ ```shell
86
+ smftools --log-file "/Path_to_output_log_file.log" --log-level INFO load "/Path_to_input_config.csv"
52
87
  ```
53
88
 
54
89
  ## Reading AnnData objects created by smftools
@@ -0,0 +1,6 @@
1
+ # Command-line interface
2
+
3
+ ```{click} smftools.cli_entry:cli
4
+ :prog: smftools
5
+ :nested: full
6
+ ```