smftools 0.3.0__tar.gz → 0.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (344) hide show
  1. {smftools-0.3.0 → smftools-0.3.2}/.github/workflows/ci.yml +6 -6
  2. {smftools-0.3.0 → smftools-0.3.2}/.gitignore +3 -0
  3. smftools-0.3.2/AGENTS.md +172 -0
  4. smftools-0.3.2/CLAUDE.md +3 -0
  5. {smftools-0.3.0 → smftools-0.3.2}/PKG-INFO +9 -5
  6. {smftools-0.3.0 → smftools-0.3.2}/docs/source/basic_usage.md +38 -4
  7. smftools-0.3.2/docs/source/tutorials/cli_usage.md +136 -0
  8. {smftools-0.3.0 → smftools-0.3.2}/pyproject.toml +11 -7
  9. {smftools-0.3.0 → smftools-0.3.2}/requirements.txt +3 -1
  10. {smftools-0.3.0 → smftools-0.3.2}/smftools/_version.py +1 -1
  11. smftools-0.3.2/smftools/cli/chimeric_adata.py +1563 -0
  12. smftools-0.3.2/smftools/cli/helpers.py +98 -0
  13. {smftools-0.3.0 → smftools-0.3.2}/smftools/cli/hmm_adata.py +250 -32
  14. smftools-0.3.2/smftools/cli/latent_adata.py +773 -0
  15. {smftools-0.3.0 → smftools-0.3.2}/smftools/cli/load_adata.py +78 -74
  16. {smftools-0.3.0 → smftools-0.3.2}/smftools/cli/preprocess_adata.py +122 -58
  17. smftools-0.3.2/smftools/cli/recipes.py +26 -0
  18. {smftools-0.3.0 → smftools-0.3.2}/smftools/cli/spatial_adata.py +74 -112
  19. smftools-0.3.2/smftools/cli/variant_adata.py +423 -0
  20. {smftools-0.3.0 → smftools-0.3.2}/smftools/cli_entry.py +52 -4
  21. {smftools-0.3.0 → smftools-0.3.2}/smftools/config/conversion.yaml +1 -1
  22. {smftools-0.3.0 → smftools-0.3.2}/smftools/config/deaminase.yaml +3 -0
  23. {smftools-0.3.0 → smftools-0.3.2}/smftools/config/default.yaml +85 -12
  24. {smftools-0.3.0 → smftools-0.3.2}/smftools/config/experiment_config.py +146 -1
  25. smftools-0.3.2/smftools/constants.py +106 -0
  26. {smftools-0.3.0 → smftools-0.3.2}/smftools/hmm/HMM.py +88 -0
  27. {smftools-0.3.0 → smftools-0.3.2}/smftools/hmm/call_hmm_peaks.py +1 -1
  28. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/__init__.py +6 -0
  29. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/bam_functions.py +358 -8
  30. smftools-0.3.2/smftools/informatics/binarize_converted_base_identities.py +99 -0
  31. smftools-0.3.2/smftools/informatics/converted_BAM_to_adata.py +1165 -0
  32. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/h5ad_functions.py +198 -2
  33. smftools-0.3.2/smftools/informatics/modkit_extract_to_adata.py +1901 -0
  34. smftools-0.3.2/smftools/informatics/sequence_encoding.py +72 -0
  35. {smftools-0.3.0 → smftools-0.3.2}/smftools/logging_utils.py +21 -2
  36. {smftools-0.3.0 → smftools-0.3.2}/smftools/metadata.py +1 -1
  37. smftools-0.3.2/smftools/plotting/__init__.py +56 -0
  38. {smftools-0.3.0 → smftools-0.3.2}/smftools/plotting/autocorrelation_plotting.py +22 -4
  39. smftools-0.3.2/smftools/plotting/chimeric_plotting.py +1893 -0
  40. {smftools-0.3.0 → smftools-0.3.2}/smftools/plotting/classifiers.py +28 -14
  41. smftools-0.3.2/smftools/plotting/general_plotting.py +64 -0
  42. smftools-0.3.2/smftools/plotting/hmm_plotting.py +1961 -0
  43. smftools-0.3.2/smftools/plotting/latent_plotting.py +804 -0
  44. smftools-0.3.2/smftools/plotting/plotting_utils.py +243 -0
  45. {smftools-0.3.0 → smftools-0.3.2}/smftools/plotting/position_stats.py +16 -8
  46. smftools-0.3.2/smftools/plotting/preprocess_plotting.py +281 -0
  47. {smftools-0.3.0 → smftools-0.3.2}/smftools/plotting/qc_plotting.py +8 -3
  48. smftools-0.3.2/smftools/plotting/spatial_plotting.py +1134 -0
  49. smftools-0.3.2/smftools/plotting/variant_plotting.py +1231 -0
  50. {smftools-0.3.0 → smftools-0.3.2}/smftools/preprocessing/__init__.py +4 -0
  51. {smftools-0.3.0 → smftools-0.3.2}/smftools/preprocessing/append_base_context.py +18 -18
  52. smftools-0.3.2/smftools/preprocessing/append_mismatch_frequency_sites.py +187 -0
  53. smftools-0.3.2/smftools/preprocessing/append_sequence_mismatch_annotations.py +171 -0
  54. smftools-0.3.2/smftools/preprocessing/append_variant_call_layer.py +480 -0
  55. {smftools-0.3.0 → smftools-0.3.2}/smftools/preprocessing/calculate_consensus.py +1 -1
  56. {smftools-0.3.0 → smftools-0.3.2}/smftools/preprocessing/calculate_read_modification_stats.py +6 -1
  57. {smftools-0.3.0 → smftools-0.3.2}/smftools/preprocessing/flag_duplicate_reads.py +4 -4
  58. {smftools-0.3.0 → smftools-0.3.2}/smftools/preprocessing/invert_adata.py +1 -0
  59. {smftools-0.3.0 → smftools-0.3.2}/smftools/readwrite.py +159 -99
  60. {smftools-0.3.0 → smftools-0.3.2}/smftools/schema/anndata_schema_v1.yaml +15 -1
  61. {smftools-0.3.0 → smftools-0.3.2}/smftools/tools/__init__.py +10 -0
  62. smftools-0.3.2/smftools/tools/calculate_knn.py +121 -0
  63. smftools-0.3.2/smftools/tools/calculate_leiden.py +57 -0
  64. smftools-0.3.2/smftools/tools/calculate_nmf.py +130 -0
  65. smftools-0.3.2/smftools/tools/calculate_pca.py +180 -0
  66. smftools-0.3.2/smftools/tools/calculate_umap.py +102 -0
  67. {smftools-0.3.0 → smftools-0.3.2}/smftools/tools/position_stats.py +4 -4
  68. smftools-0.3.2/smftools/tools/rolling_nn_distance.py +872 -0
  69. smftools-0.3.2/smftools/tools/sequence_alignment.py +140 -0
  70. smftools-0.3.2/smftools/tools/tensor_factorization.py +217 -0
  71. smftools-0.3.2/tests/conftest.py +13 -0
  72. {smftools-0.3.0 → smftools-0.3.2}/tests/smoke/tools/test_tools_imports.py +2 -0
  73. smftools-0.3.2/tests/unit/hmm/test_mask_read_span.py +43 -0
  74. smftools-0.3.2/tests/unit/informatics/test_bam_base_identities.py +81 -0
  75. smftools-0.3.2/tests/unit/informatics/test_bam_read_tags.py +50 -0
  76. smftools-0.3.2/tests/unit/informatics/test_bam_secondary_supplementary.py +125 -0
  77. smftools-0.3.2/tests/unit/informatics/test_h5ad_secondary_supplementary.py +33 -0
  78. smftools-0.3.2/tests/unit/informatics/test_modkit_sequence_batch_files.py +43 -0
  79. smftools-0.3.2/tests/unit/informatics/test_modkit_sequence_encoding.py +49 -0
  80. {smftools-0.3.0 → smftools-0.3.2}/tests/unit/informatics/test_tool_backends.py +4 -2
  81. smftools-0.3.2/tests/unit/test_annotate_zero_hamming_segments_parent_layer.py +122 -0
  82. smftools-0.3.2/tests/unit/test_append_mismatch_frequency_sites.py +72 -0
  83. smftools-0.3.2/tests/unit/test_append_reference_strand_quality_stats.py +78 -0
  84. smftools-0.3.2/tests/unit/test_append_sequence_mismatch_annotations.py +27 -0
  85. smftools-0.3.2/tests/unit/test_append_variant_segment_layer.py +94 -0
  86. smftools-0.3.2/tests/unit/test_calculate_nmf.py +51 -0
  87. smftools-0.3.2/tests/unit/test_chimeric_adata_mod_hamming_flag.py +24 -0
  88. smftools-0.3.2/tests/unit/test_chimeric_adata_span_layer.py +72 -0
  89. smftools-0.3.2/tests/unit/test_chimeric_adata_top_segments.py +36 -0
  90. smftools-0.3.2/tests/unit/test_combined_hmm_length_clustermap_barplot.py +46 -0
  91. smftools-0.3.2/tests/unit/test_combined_hmm_length_clustermap_outputs.py +41 -0
  92. smftools-0.3.2/tests/unit/test_combined_hmm_raw_clustermap_nan_fill.py +43 -0
  93. smftools-0.3.2/tests/unit/test_combined_raw_clustermap_barplot_nan_ignore.py +91 -0
  94. smftools-0.3.2/tests/unit/test_combined_raw_clustermap_nan_fill.py +43 -0
  95. smftools-0.3.2/tests/unit/test_hmm_clustermap_colormap.py +19 -0
  96. smftools-0.3.2/tests/unit/test_hmm_variant_overlay_index_mapping.py +86 -0
  97. smftools-0.3.2/tests/unit/test_latent_adata_var_filters.py +88 -0
  98. smftools-0.3.2/tests/unit/test_length_layer_subclass_mapping.py +29 -0
  99. smftools-0.3.2/tests/unit/test_methylation_fraction_nan_handling.py +37 -0
  100. smftools-0.3.2/tests/unit/test_plot_cp_sequence_components.py +66 -0
  101. smftools-0.3.2/tests/unit/test_plot_hamming_span_trio.py +65 -0
  102. smftools-0.3.2/tests/unit/test_plot_hmm_size_contours_feature_ranges.py +41 -0
  103. smftools-0.3.2/tests/unit/test_plot_hmm_size_contours_nan_values.py +35 -0
  104. smftools-0.3.2/tests/unit/test_plot_mismatch_base_frequency_by_position.py +196 -0
  105. smftools-0.3.2/tests/unit/test_plot_nmf_components.py +26 -0
  106. smftools-0.3.2/tests/unit/test_plot_pca_components.py +27 -0
  107. smftools-0.3.2/tests/unit/test_plot_read_span_quality_clustermaps.py +35 -0
  108. smftools-0.3.2/tests/unit/test_plot_rolling_nn_and_layer.py +121 -0
  109. smftools-0.3.2/tests/unit/test_plot_rolling_nn_and_two_layers.py +74 -0
  110. smftools-0.3.2/tests/unit/test_plot_sequence_integer_encoding_clustermaps.py +82 -0
  111. smftools-0.3.2/tests/unit/test_plot_variant_segment_clustermaps.py +66 -0
  112. smftools-0.3.2/tests/unit/test_plot_zero_hamming_pair_counts.py +45 -0
  113. smftools-0.3.2/tests/unit/test_plot_zero_hamming_span_and_layer.py +69 -0
  114. smftools-0.3.2/tests/unit/test_readwrite.py +36 -0
  115. smftools-0.3.2/tests/unit/test_rolling_nn_distance.py +368 -0
  116. smftools-0.3.2/tests/unit/test_tensor_factorization.py +151 -0
  117. smftools-0.3.2/tests/unit/test_variant_adata_overlay_config_forwarding.py +93 -0
  118. smftools-0.3.2/tests/unit/tools/test_calculate_umap.py +115 -0
  119. smftools-0.3.2/tests/unit/tools/test_sequence_alignment.py +39 -0
  120. smftools-0.3.0/AGENTS.md +0 -69
  121. smftools-0.3.0/docs/source/tutorials/cli_usage.md +0 -91
  122. smftools-0.3.0/smftools/cli/helpers.py +0 -56
  123. smftools-0.3.0/smftools/constants.py +0 -37
  124. smftools-0.3.0/smftools/informatics/binarize_converted_base_identities.py +0 -186
  125. smftools-0.3.0/smftools/informatics/converted_BAM_to_adata.py +0 -704
  126. smftools-0.3.0/smftools/informatics/modkit_extract_to_adata.py +0 -1319
  127. smftools-0.3.0/smftools/plotting/__init__.py +0 -33
  128. smftools-0.3.0/smftools/plotting/general_plotting.py +0 -1585
  129. smftools-0.3.0/smftools/plotting/hmm_plotting.py +0 -299
  130. smftools-0.3.0/smftools/tools/calculate_umap.py +0 -103
  131. smftools-0.3.0/tests/unit/test_readwrite.py +0 -7
  132. {smftools-0.3.0 → smftools-0.3.2}/.gitattributes +0 -0
  133. {smftools-0.3.0 → smftools-0.3.2}/.pre-commit-config.yaml +0 -0
  134. {smftools-0.3.0 → smftools-0.3.2}/.readthedocs.yaml +0 -0
  135. {smftools-0.3.0 → smftools-0.3.2}/CONTRIBUTING.md +0 -0
  136. {smftools-0.3.0 → smftools-0.3.2}/LICENSE +0 -0
  137. {smftools-0.3.0 → smftools-0.3.2}/README.md +0 -0
  138. {smftools-0.3.0 → smftools-0.3.2}/docs/Makefile +0 -0
  139. {smftools-0.3.0 → smftools-0.3.2}/docs/make.bat +0 -0
  140. {smftools-0.3.0 → smftools-0.3.2}/docs/source/_static/converted_BAM_to_adata.png +0 -0
  141. {smftools-0.3.0 → smftools-0.3.2}/docs/source/_static/modkit_extract_to_adata.png +0 -0
  142. {smftools-0.3.0 → smftools-0.3.2}/docs/source/_static/smftools-1.svg +0 -0
  143. {smftools-0.3.0 → smftools-0.3.2}/docs/source/_static/smftools-1.tif +0 -0
  144. {smftools-0.3.0 → smftools-0.3.2}/docs/source/_static/smftools_informatics_diagram.pdf +0 -0
  145. {smftools-0.3.0 → smftools-0.3.2}/docs/source/_static/smftools_informatics_diagram.png +0 -0
  146. {smftools-0.3.0 → smftools-0.3.2}/docs/source/_static/smftools_preprocessing_diagram.png +0 -0
  147. {smftools-0.3.0 → smftools-0.3.2}/docs/source/_templates/tmp +0 -0
  148. {smftools-0.3.0 → smftools-0.3.2}/docs/source/api/datasets.md +0 -0
  149. {smftools-0.3.0 → smftools-0.3.2}/docs/source/api/index.md +0 -0
  150. {smftools-0.3.0 → smftools-0.3.2}/docs/source/api/informatics.md +0 -0
  151. {smftools-0.3.0 → smftools-0.3.2}/docs/source/api/preprocessing.md +0 -0
  152. {smftools-0.3.0 → smftools-0.3.2}/docs/source/api/tools.md +0 -0
  153. {smftools-0.3.0 → smftools-0.3.2}/docs/source/cli.md +0 -0
  154. {smftools-0.3.0 → smftools-0.3.2}/docs/source/conf.py +0 -0
  155. {smftools-0.3.0 → smftools-0.3.2}/docs/source/contributors.md +0 -0
  156. {smftools-0.3.0 → smftools-0.3.2}/docs/source/dev/index.md +0 -0
  157. {smftools-0.3.0 → smftools-0.3.2}/docs/source/index.md +0 -0
  158. {smftools-0.3.0 → smftools-0.3.2}/docs/source/installation.md +0 -0
  159. {smftools-0.3.0 → smftools-0.3.2}/docs/source/references.bib +0 -0
  160. {smftools-0.3.0 → smftools-0.3.2}/docs/source/references.rst +0 -0
  161. {smftools-0.3.0 → smftools-0.3.2}/docs/source/release-notes/0.1.0.md +0 -0
  162. {smftools-0.3.0 → smftools-0.3.2}/docs/source/release-notes/0.1.1.md +0 -0
  163. {smftools-0.3.0 → smftools-0.3.2}/docs/source/release-notes/0.1.6.md +0 -0
  164. {smftools-0.3.0 → smftools-0.3.2}/docs/source/release-notes/0.2.1.md +0 -0
  165. {smftools-0.3.0 → smftools-0.3.2}/docs/source/release-notes/0.2.3.md +0 -0
  166. {smftools-0.3.0 → smftools-0.3.2}/docs/source/release-notes/0.3.0.md +0 -0
  167. {smftools-0.3.0 → smftools-0.3.2}/docs/source/release-notes/index.md +0 -0
  168. {smftools-0.3.0 → smftools-0.3.2}/docs/source/requirements.txt +0 -0
  169. {smftools-0.3.0 → smftools-0.3.2}/docs/source/schema/anndata_schema.md +0 -0
  170. {smftools-0.3.0 → smftools-0.3.2}/docs/source/tutorials/experiment_config.md +0 -0
  171. {smftools-0.3.0 → smftools-0.3.2}/docs/source/tutorials/index.md +0 -0
  172. {smftools-0.3.0 → smftools-0.3.2}/experiment_config.csv +0 -0
  173. {smftools-0.3.0 → smftools-0.3.2}/notebooks/Kissiov_and_McKenna_2025_example_notebook.ipynb +0 -0
  174. {smftools-0.3.0 → smftools-0.3.2}/notebooks/Kissiov_and_McKenna_2025_sample_sheet.csv +0 -0
  175. {smftools-0.3.0 → smftools-0.3.2}/sample_sheet.csv +0 -0
  176. {smftools-0.3.0 → smftools-0.3.2}/smftools/__init__.py +0 -0
  177. {smftools-0.3.0 → smftools-0.3.2}/smftools/_settings.py +0 -0
  178. {smftools-0.3.0 → smftools-0.3.2}/smftools/cli/__init__.py +0 -0
  179. {smftools-0.3.0 → smftools-0.3.2}/smftools/cli/archived/cli_flows.py +0 -0
  180. {smftools-0.3.0 → smftools-0.3.2}/smftools/config/__init__.py +0 -0
  181. {smftools-0.3.0 → smftools-0.3.2}/smftools/config/direct.yaml +0 -0
  182. {smftools-0.3.0 → smftools-0.3.2}/smftools/config/discover_input_files.py +0 -0
  183. {smftools-0.3.0 → smftools-0.3.2}/smftools/datasets/F1_hybrid_NKG2A_enhander_promoter_GpC_conversion_SMF.h5ad.gz +0 -0
  184. {smftools-0.3.0 → smftools-0.3.2}/smftools/datasets/F1_sample_sheet.csv +0 -0
  185. {smftools-0.3.0 → smftools-0.3.2}/smftools/datasets/__init__.py +0 -0
  186. {smftools-0.3.0 → smftools-0.3.2}/smftools/datasets/dCas9_m6A_invitro_kinetics.h5ad.gz +0 -0
  187. {smftools-0.3.0 → smftools-0.3.2}/smftools/datasets/datasets.py +0 -0
  188. {smftools-0.3.0 → smftools-0.3.2}/smftools/hmm/__init__.py +0 -0
  189. {smftools-0.3.0 → smftools-0.3.2}/smftools/hmm/archived/apply_hmm_batched.py +0 -0
  190. {smftools-0.3.0 → smftools-0.3.2}/smftools/hmm/archived/calculate_distances.py +0 -0
  191. {smftools-0.3.0 → smftools-0.3.2}/smftools/hmm/archived/call_hmm_peaks.py +0 -0
  192. {smftools-0.3.0 → smftools-0.3.2}/smftools/hmm/archived/train_hmm.py +0 -0
  193. {smftools-0.3.0 → smftools-0.3.2}/smftools/hmm/display_hmm.py +0 -0
  194. {smftools-0.3.0 → smftools-0.3.2}/smftools/hmm/hmm_readwrite.py +0 -0
  195. {smftools-0.3.0 → smftools-0.3.2}/smftools/hmm/nucleosome_hmm_refinement.py +0 -0
  196. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/bam_conversion.py +0 -0
  197. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/bam_direct.py +0 -0
  198. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/basecall_pod5s.py +0 -0
  199. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/basecalls_to_adata.py +0 -0
  200. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/conversion_smf.py +0 -0
  201. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/deaminase_smf.py +0 -0
  202. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/direct_smf.py +0 -0
  203. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/fast5_to_pod5.py +0 -0
  204. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/__init__.py +0 -0
  205. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py +0 -0
  206. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/aligned_BAM_to_bed.py +0 -0
  207. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/bam_qc.py +0 -0
  208. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/bed_to_bigwig.py +0 -0
  209. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/canoncall.py +0 -0
  210. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/concatenate_fastqs_to_bam.py +0 -0
  211. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/converted_BAM_to_adata.py +0 -0
  212. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/count_aligned_reads.py +0 -0
  213. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/demux_and_index_BAM.py +0 -0
  214. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/extract_base_identities.py +0 -0
  215. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/extract_mods.py +0 -0
  216. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/extract_read_features_from_bam.py +0 -0
  217. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/extract_read_lengths_from_bed.py +0 -0
  218. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/extract_readnames_from_BAM.py +0 -0
  219. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/find_conversion_sites.py +0 -0
  220. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/generate_converted_FASTA.py +0 -0
  221. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/get_chromosome_lengths.py +0 -0
  222. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/get_native_references.py +0 -0
  223. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/index_fasta.py +0 -0
  224. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/informatics.py +0 -0
  225. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/load_adata.py +0 -0
  226. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/make_modbed.py +0 -0
  227. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/modQC.py +0 -0
  228. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/modcall.py +0 -0
  229. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/ohe_batching.py +0 -0
  230. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/ohe_layers_decode.py +0 -0
  231. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/one_hot_decode.py +0 -0
  232. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/one_hot_encode.py +0 -0
  233. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/plot_bed_histograms.py +0 -0
  234. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/separate_bam_by_bc.py +0 -0
  235. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/split_and_index_BAM.py +0 -0
  236. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/print_bam_query_seq.py +0 -0
  237. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/subsample_fasta_from_bed.py +0 -0
  238. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/archived/subsample_pod5.py +0 -0
  239. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/basecalling.py +0 -0
  240. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/bed_functions.py +0 -0
  241. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/complement_base_list.py +0 -0
  242. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/fasta_functions.py +0 -0
  243. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/modkit_functions.py +0 -0
  244. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/ohe.py +0 -0
  245. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/pod5_functions.py +0 -0
  246. {smftools-0.3.0 → smftools-0.3.2}/smftools/informatics/run_multiqc.py +0 -0
  247. {smftools-0.3.0 → smftools-0.3.2}/smftools/machine_learning/__init__.py +0 -0
  248. {smftools-0.3.0 → smftools-0.3.2}/smftools/machine_learning/data/__init__.py +0 -0
  249. {smftools-0.3.0 → smftools-0.3.2}/smftools/machine_learning/data/anndata_data_module.py +0 -0
  250. {smftools-0.3.0 → smftools-0.3.2}/smftools/machine_learning/data/preprocessing.py +0 -0
  251. {smftools-0.3.0 → smftools-0.3.2}/smftools/machine_learning/evaluation/__init__.py +0 -0
  252. {smftools-0.3.0 → smftools-0.3.2}/smftools/machine_learning/evaluation/eval_utils.py +0 -0
  253. {smftools-0.3.0 → smftools-0.3.2}/smftools/machine_learning/evaluation/evaluators.py +0 -0
  254. {smftools-0.3.0 → smftools-0.3.2}/smftools/machine_learning/inference/__init__.py +0 -0
  255. {smftools-0.3.0 → smftools-0.3.2}/smftools/machine_learning/inference/inference_utils.py +0 -0
  256. {smftools-0.3.0 → smftools-0.3.2}/smftools/machine_learning/inference/lightning_inference.py +0 -0
  257. {smftools-0.3.0 → smftools-0.3.2}/smftools/machine_learning/inference/sklearn_inference.py +0 -0
  258. {smftools-0.3.0 → smftools-0.3.2}/smftools/machine_learning/inference/sliding_window_inference.py +0 -0
  259. {smftools-0.3.0 → smftools-0.3.2}/smftools/machine_learning/models/__init__.py +0 -0
  260. {smftools-0.3.0 → smftools-0.3.2}/smftools/machine_learning/models/base.py +0 -0
  261. {smftools-0.3.0 → smftools-0.3.2}/smftools/machine_learning/models/cnn.py +0 -0
  262. {smftools-0.3.0 → smftools-0.3.2}/smftools/machine_learning/models/lightning_base.py +0 -0
  263. {smftools-0.3.0 → smftools-0.3.2}/smftools/machine_learning/models/mlp.py +0 -0
  264. {smftools-0.3.0 → smftools-0.3.2}/smftools/machine_learning/models/positional.py +0 -0
  265. {smftools-0.3.0 → smftools-0.3.2}/smftools/machine_learning/models/rnn.py +0 -0
  266. {smftools-0.3.0 → smftools-0.3.2}/smftools/machine_learning/models/sklearn_models.py +0 -0
  267. {smftools-0.3.0 → smftools-0.3.2}/smftools/machine_learning/models/transformer.py +0 -0
  268. {smftools-0.3.0 → smftools-0.3.2}/smftools/machine_learning/models/wrappers.py +0 -0
  269. {smftools-0.3.0 → smftools-0.3.2}/smftools/machine_learning/training/__init__.py +0 -0
  270. {smftools-0.3.0 → smftools-0.3.2}/smftools/machine_learning/training/train_lightning_model.py +0 -0
  271. {smftools-0.3.0 → smftools-0.3.2}/smftools/machine_learning/training/train_sklearn_model.py +0 -0
  272. {smftools-0.3.0 → smftools-0.3.2}/smftools/machine_learning/utils/__init__.py +0 -0
  273. {smftools-0.3.0 → smftools-0.3.2}/smftools/machine_learning/utils/device.py +0 -0
  274. {smftools-0.3.0 → smftools-0.3.2}/smftools/machine_learning/utils/grl.py +0 -0
  275. {smftools-0.3.0 → smftools-0.3.2}/smftools/optional_imports.py +0 -0
  276. {smftools-0.3.0 → smftools-0.3.2}/smftools/preprocessing/append_binary_layer_by_base_context.py +0 -0
  277. {smftools-0.3.0 → smftools-0.3.2}/smftools/preprocessing/archived/add_read_length_and_mapping_qc.py +0 -0
  278. {smftools-0.3.0 → smftools-0.3.2}/smftools/preprocessing/archived/calculate_complexity.py +0 -0
  279. {smftools-0.3.0 → smftools-0.3.2}/smftools/preprocessing/archived/mark_duplicates.py +0 -0
  280. {smftools-0.3.0 → smftools-0.3.2}/smftools/preprocessing/archived/preprocessing.py +0 -0
  281. {smftools-0.3.0 → smftools-0.3.2}/smftools/preprocessing/archived/remove_duplicates.py +0 -0
  282. {smftools-0.3.0 → smftools-0.3.2}/smftools/preprocessing/binarize.py +0 -0
  283. {smftools-0.3.0 → smftools-0.3.2}/smftools/preprocessing/binarize_on_Youden.py +0 -0
  284. {smftools-0.3.0 → smftools-0.3.2}/smftools/preprocessing/binary_layers_to_ohe.py +0 -0
  285. {smftools-0.3.0 → smftools-0.3.2}/smftools/preprocessing/calculate_complexity_II.py +0 -0
  286. {smftools-0.3.0 → smftools-0.3.2}/smftools/preprocessing/calculate_coverage.py +0 -0
  287. {smftools-0.3.0 → smftools-0.3.2}/smftools/preprocessing/calculate_pairwise_differences.py +0 -0
  288. {smftools-0.3.0 → smftools-0.3.2}/smftools/preprocessing/calculate_pairwise_hamming_distances.py +0 -0
  289. {smftools-0.3.0 → smftools-0.3.2}/smftools/preprocessing/calculate_position_Youden.py +0 -0
  290. {smftools-0.3.0 → smftools-0.3.2}/smftools/preprocessing/calculate_read_length_stats.py +0 -0
  291. {smftools-0.3.0 → smftools-0.3.2}/smftools/preprocessing/clean_NaN.py +0 -0
  292. {smftools-0.3.0 → smftools-0.3.2}/smftools/preprocessing/filter_adata_by_nan_proportion.py +0 -0
  293. {smftools-0.3.0 → smftools-0.3.2}/smftools/preprocessing/filter_reads_on_length_quality_mapping.py +0 -0
  294. {smftools-0.3.0 → smftools-0.3.2}/smftools/preprocessing/filter_reads_on_modification_thresholds.py +0 -0
  295. {smftools-0.3.0 → smftools-0.3.2}/smftools/preprocessing/load_sample_sheet.py +0 -0
  296. {smftools-0.3.0 → smftools-0.3.2}/smftools/preprocessing/make_dirs.py +0 -0
  297. {smftools-0.3.0 → smftools-0.3.2}/smftools/preprocessing/min_non_diagonal.py +0 -0
  298. {smftools-0.3.0 → smftools-0.3.2}/smftools/preprocessing/recipes.py +0 -0
  299. {smftools-0.3.0 → smftools-0.3.2}/smftools/preprocessing/reindex_references_adata.py +0 -0
  300. {smftools-0.3.0 → smftools-0.3.2}/smftools/preprocessing/subsample_adata.py +0 -0
  301. {smftools-0.3.0 → smftools-0.3.2}/smftools/schema/__init__.py +0 -0
  302. {smftools-0.3.0 → smftools-0.3.2}/smftools/tools/archived/apply_hmm.py +0 -0
  303. {smftools-0.3.0 → smftools-0.3.2}/smftools/tools/archived/classifiers.py +0 -0
  304. {smftools-0.3.0 → smftools-0.3.2}/smftools/tools/archived/classify_methylated_features.py +0 -0
  305. {smftools-0.3.0 → smftools-0.3.2}/smftools/tools/archived/classify_non_methylated_features.py +0 -0
  306. {smftools-0.3.0 → smftools-0.3.2}/smftools/tools/archived/subset_adata_v1.py +0 -0
  307. {smftools-0.3.0 → smftools-0.3.2}/smftools/tools/archived/subset_adata_v2.py +0 -0
  308. {smftools-0.3.0 → smftools-0.3.2}/smftools/tools/cluster_adata_on_methylation.py +0 -0
  309. {smftools-0.3.0 → smftools-0.3.2}/smftools/tools/general_tools.py +0 -0
  310. {smftools-0.3.0 → smftools-0.3.2}/smftools/tools/read_stats.py +0 -0
  311. {smftools-0.3.0 → smftools-0.3.2}/smftools/tools/spatial_autocorrelation.py +0 -0
  312. {smftools-0.3.0 → smftools-0.3.2}/smftools/tools/subset_adata.py +0 -0
  313. {smftools-0.3.0 → smftools-0.3.2}/tests/__init__.py +0 -0
  314. {smftools-0.3.0 → smftools-0.3.2}/tests/_test_inputs/_test_bed_I.bed +0 -0
  315. {smftools-0.3.0 → smftools-0.3.2}/tests/_test_inputs/_test_fasta_I.fa +0 -0
  316. {smftools-0.3.0 → smftools-0.3.2}/tests/_test_inputs/_test_fasta_I.fa.fai +0 -0
  317. {smftools-0.3.0 → smftools-0.3.2}/tests/_test_inputs/_test_pod5_I.pod5 +0 -0
  318. {smftools-0.3.0 → smftools-0.3.2}/tests/_test_inputs/test_experiment_config_conversion_I.csv +0 -0
  319. {smftools-0.3.0 → smftools-0.3.2}/tests/_test_inputs/test_experiment_config_deaminase_I.csv +0 -0
  320. {smftools-0.3.0 → smftools-0.3.2}/tests/_test_inputs/test_experiment_config_direct_I.csv +0 -0
  321. {smftools-0.3.0 → smftools-0.3.2}/tests/e2e/__init__.py +0 -0
  322. {smftools-0.3.0 → smftools-0.3.2}/tests/e2e/cli/test_load_adata.py +0 -0
  323. {smftools-0.3.0 → smftools-0.3.2}/tests/e2e/cli/test_spatial_adata.py +0 -0
  324. {smftools-0.3.0 → smftools-0.3.2}/tests/integration/__init__.py +0 -0
  325. {smftools-0.3.0 → smftools-0.3.2}/tests/smoke/__init__.py +0 -0
  326. {smftools-0.3.0 → smftools-0.3.2}/tests/smoke/cli/test_cli_imports.py +0 -0
  327. {smftools-0.3.0 → smftools-0.3.2}/tests/smoke/config/test_config_imports.py +0 -0
  328. {smftools-0.3.0 → smftools-0.3.2}/tests/smoke/datasets/test_datasets_imports.py +0 -0
  329. {smftools-0.3.0 → smftools-0.3.2}/tests/smoke/hmm/test_hmm_imports.py +0 -0
  330. {smftools-0.3.0 → smftools-0.3.2}/tests/smoke/import_helpers.py +0 -0
  331. {smftools-0.3.0 → smftools-0.3.2}/tests/smoke/informatics/test_informatics_imports.py +0 -0
  332. {smftools-0.3.0 → smftools-0.3.2}/tests/smoke/machine_learning/data/test_data_imports.py +0 -0
  333. {smftools-0.3.0 → smftools-0.3.2}/tests/smoke/machine_learning/evaluation/test_evaluation_imports.py +0 -0
  334. {smftools-0.3.0 → smftools-0.3.2}/tests/smoke/machine_learning/inference/test_inference_imports.py +0 -0
  335. {smftools-0.3.0 → smftools-0.3.2}/tests/smoke/machine_learning/models/test_models_imports.py +0 -0
  336. {smftools-0.3.0 → smftools-0.3.2}/tests/smoke/machine_learning/training/test_training_imports.py +0 -0
  337. {smftools-0.3.0 → smftools-0.3.2}/tests/smoke/machine_learning/utils/test_utils_imports.py +0 -0
  338. {smftools-0.3.0 → smftools-0.3.2}/tests/smoke/plotting/test_plotting_imports.py +0 -0
  339. {smftools-0.3.0 → smftools-0.3.2}/tests/smoke/preprocessing/test_preprocessing_imports.py +0 -0
  340. {smftools-0.3.0 → smftools-0.3.2}/tests/smoke/test_smftools_imports.py +0 -0
  341. {smftools-0.3.0 → smftools-0.3.2}/tests/unit/__init__.py +0 -0
  342. {smftools-0.3.0 → smftools-0.3.2}/tests/unit/config/test_LoadExperimentConfig.py +0 -0
  343. {smftools-0.3.0 → smftools-0.3.2}/tests/unit/datasets/test_datasets.py +0 -0
  344. {smftools-0.3.0 → smftools-0.3.2}/tests/unit/test_metadata.py +0 -0
@@ -2,9 +2,9 @@ name: CI
2
2
 
3
3
  on:
4
4
  push:
5
- branches: ["main", "0.3.0"]
5
+ branches: ["main"]
6
6
  pull_request:
7
- branches: ["main", "0.3.0"]
7
+ branches: ["main"]
8
8
 
9
9
  concurrency:
10
10
  group: ${{ github.workflow }}-${{ github.ref }}
@@ -49,7 +49,7 @@ jobs:
49
49
  - name: Lint with ruff
50
50
  run: ruff check --output-format=github .
51
51
 
52
- smoke:
52
+ pytest:
53
53
  runs-on: ubuntu-latest
54
54
  strategy:
55
55
  fail-fast: false
@@ -74,9 +74,9 @@ jobs:
74
74
  - name: Install dependencies
75
75
  run: |
76
76
  python -m pip install --upgrade pip
77
- python -m pip install .[dev]
78
- - name: Run smoke tests
79
- run: pytest -m smoke -q
77
+ python -m pip install .[dev,torch,plotting]
78
+ - name: Run pytest
79
+ run: pytest -m "smoke" -q
80
80
 
81
81
  docs:
82
82
  runs-on: ubuntu-latest
@@ -19,6 +19,9 @@ venv/
19
19
  venvs/
20
20
  /environment.yml
21
21
 
22
+ # Development
23
+ /dev/
24
+
22
25
  # Tests
23
26
  /tests/_test_inputs/dorado_models
24
27
  /tests/_test_outputs/
@@ -0,0 +1,172 @@
1
+ # AGENTS.md
2
+
3
+ This file tells coding agents (including OpenAI's Codex, Anthropic's Claude Code, and Google's Gemini) how to work in this repo.
4
+
5
+ - For AGENTS.md or CLAUDE.md files:
6
+ - Agents can read from these files.
7
+ - Agents can never edit these files.
8
+
9
+ ## Goals
10
+ - Make minimal, correct changes.
11
+ - Prefer small PRs / diffs.
12
+ - Keep behavior stable unless the task explicitly requests changes.
13
+ - Generate production grade, scalable code.
14
+
15
+ ## Prompt interface
16
+ - When asked about a problem or task, first read all files relevent to the task's scope.
17
+ - Describe the problem given the context.
18
+ - Formulate a plan to address the problem within scope.
19
+ - Refine the plan with user input.
20
+ - Implement code after being told to proceed.
21
+
22
+ ## Repo orientation
23
+ - Read existing patterns before inventing new ones.
24
+ - Don’t refactor broadly unless asked.
25
+ - If you’re unsure about intended behavior, look for tests or docs first.
26
+ - If behavior is not clear after reading tests and docs, look at the Click commands section in this file.
27
+ - Ignore all files in any directory named "archived".
28
+ - User defined parameters exist within src/smftools/config.
29
+ - Parameters are herited from default.yaml -> MODALITY.yaml -> user_defined_config.csv
30
+ - Frequently used non user defined variables should exist within src/smftools/constants.py
31
+ - Logging functionality is defined within src/smftools/logging_utils.py
32
+ - Optional dependency handling is defined within src/smftools/optional_imports.py
33
+ - Frequently used I/O functionality is defined within src/smftools/readwrite.py
34
+ - CLI functionality is provided through click and is defined within:
35
+ - src/smftools/cli_entry.py
36
+ - Modules of the src/smtools/cli subpackage
37
+ - RTD documentation organization through smftools/docs
38
+ - Pytest testing within smftools/tests
39
+
40
+ ## Project dependencies
41
+ - A core set of dependencies is required for the project.
42
+ - Various optional dependencies are provided for:
43
+ - Optional functional modules of the package (ont, plotting, ml-base, ml-extended, umap, qc)
44
+ - If available, a Python version of a CLI tool is preferred (Such as for Samtools, Bedtools, BedGraphToBigWig).
45
+ - torch is listed as an extra dependency, but is currently required.
46
+ - All dependencies can be installed with `pip install -e ".[all]"`
47
+ - Certain command line tools are currently needed for certain functionalities within smftools load:
48
+ - dorado: Used for nanopore basecalling from POD5/FAST5 files to BAM.
49
+ - dorado/minimap2: Used for alignment of reads to reference.
50
+ - dorado: Used for demultiplexing of nanopore derived BAMs.
51
+ - modkit: Used for extracting modification probabilities from MM/ML BAM tags for native smf modality.
52
+
53
+ ## Setup
54
+ - Use current environment if the core dependencies are installed.
55
+ - If dependencies are not found, create a venv in smftools/venvs/ directory:
56
+ - `python3 -m venv .temp-venv && source .temp-venv/bin/activate`
57
+ - Install the core dependencies and development dependencies for testing/formatting/linting:
58
+ - `pip install -e ".[dev,torch]"`
59
+ - If code is raising dependencies errors and they are in the optional dependencies:
60
+ - `pip install -e ".[EXTRA_DEPENDENCY_NAME]"`
61
+
62
+ ## How to run checks
63
+ - Smoke tests: `pytest -m smoke -q`
64
+ - Unit tests: `pytest -m unit -q`
65
+ - Integration tests: `pytest -m integration -q`
66
+ - E2E tests: `pytest -m e2e -q`
67
+ - Coverage (if configured): `pytest --cov`
68
+ - Lint: `ruff check .`
69
+ - Format: `ruff format .`
70
+ - Type-check (if configured): `mypy .`
71
+
72
+ ## Coding conventions
73
+ - Follow existing style and module layout.
74
+ - Prefer clear, explicit code over cleverness.
75
+ - Prefer modular functionality to facilitate testing and future development.
76
+ - Do not over-parametize functions when possible.
77
+ - For function parameters that a user may want to tune, use the config management strategy.
78
+ - Use constants.py when appropriate.
79
+ - Annotate code blocks to describe functionality.
80
+ - Add/adjust tests for bug fixes and new behavior.
81
+ - Keep public APIs backward compatible unless explicitly changing them.
82
+ - Python:
83
+ - Use type hints for new/modified functions where reasonable.
84
+ - Use Google style docstring format.
85
+ - Avoid heavy dependencies unless necessary.
86
+ - Use typing.TYPE_CHECKING and annotations.
87
+ - In docstring of new functions, define the purpose of the function and what it does.
88
+
89
+ ## Testing expectations
90
+ - New functionality must include tests.
91
+ - If tests are flaky or slow, note it and scope the change.
92
+
93
+ ## Logging & secrets
94
+ - Don’t log secrets, tokens, or PII.
95
+ - Never hardcode credentials.
96
+ - If sample keys are needed, use obvious placeholders like `YOUR_API_KEY_HERE`.
97
+
98
+ ## Git / PR hygiene
99
+ - Keep commits focused.
100
+ - Update docs/changelog if behavior or user-facing CLI changes.
101
+ - If you change a CLI flag or config schema, add a migration note.
102
+
103
+ ## If something fails
104
+ - If a command fails, paste the full error and summarize likely causes.
105
+ - Don’t “fix” by deleting tests or weakening assertions unless explicitly instructed.
106
+
107
+ ## Click commands and their primary intent. Look in docs first, and underneath if the task is still not clear.
108
+ - smftools load:
109
+ - Take a variety of raw sequencing input options (FASTQs, POD5s, BAMs) from a single molecule footprinting experiment.
110
+ - Determine the smf modality specified by the user (conversion, deaminase, native).
111
+ - Handle FASTA inputs
112
+ - Basecall the files using dorado if needed.
113
+ - Align the reads using dorado or minimap2.
114
+ - Sort/Index/Demultiplex BAMs.
115
+ - BAM QC.
116
+ - Extract Base modification probabilities for native smf modality
117
+ - Load an AnnData object containing:
118
+ - adata.X with a read X position matrix of SMF data.
119
+ - adata.layers with:
120
+ - integer encoded DNA sequences of each read.
121
+ - mismatch encodings of DNA sequence vs reference for each read.
122
+ - Base Q-scores for each read.
123
+ - Read span masks indicating where the read aligned.
124
+ - adata.var with per Reference_strand FASTA bases across positions.
125
+ - adata.var_names being positional indexes within each read.
126
+ - adata.obs_names being read names.
127
+ - adata.obs with read level metadata
128
+ - adata.uns with various unstructured data metrics.
129
+ - Run multiqc on the BAM qc files.
130
+ - Directory temp file cleanup.
131
+ - Write out the adata, it's backup accessory data, and csv files of obs, var, and keys.
132
+ - smftools preprocess:
133
+ - Requires the adata produced by smftools load.
134
+ - Adds various QC metrics and performs data preprocessing and filtering.
135
+ - Read length, quality, and mapping based QC.
136
+ - Per reference position level QC.
137
+ - Appending base context for each reference.
138
+ - Binarization of SMF probabilities for the native smf modality
139
+ - NaN filling strategies in adata.layers.
140
+ - Read level modification QC and filtering.
141
+ - Duplicate detection and complexity analysis for conversion/deaminase modalities.
142
+ - Visualizing read spans and base quality clustermaps.
143
+ - Optionally inverts the adata along the var-axis.
144
+ - Optionally reindexes var.
145
+ - smftools variant:
146
+ - Requires at least a preprocessed adata object.
147
+ - Calculates per position mismatch frequencies/types for each reference/sample.
148
+ - Optional variant site labeling if comparing two references.
149
+ - Visualized sequence encodings and mismatch encodings with clustermaps.
150
+ - smftools chimeric:
151
+ - Requires at least a preprocessed adata object.
152
+ - Meant to detect putative PCR chimeras.
153
+ - smftools spatial:
154
+ - Requires at least a preprocessed adata object.
155
+ - Basic spatial signal analyses.
156
+ - Clustermaps to visualize smf signal per reference/sample.
157
+ - Spatial autocorrelation.
158
+ - Position x position correlation matrices (Pearson, Binary covariance, chi2, relative risk)
159
+ - smftools hmm:
160
+ - Requires at least a preprocessed adata object.
161
+ - Fits/saves/applies HMM to adata to label putative molecular features.
162
+ - Creates adata.layers that hold binary masks of each feature class/subclass.
163
+ - Creates adata.layers that hold HMM emission probabilities.
164
+ - Visualizes HMM layers with clustermaps.
165
+ - Performs peak calling on HMM layers and labels reads with the features in obs.
166
+ - smftools latent:
167
+ - Requires at least a preprocessed adata object.
168
+ - Generates latent representations of the smf data.
169
+ - PCA/KNN/UMAP/NMF/CP decomposition strategies.
170
+ - Represents full sequences.
171
+ - Represents modified sites only.
172
+ - Represents non-modified sites only.
@@ -0,0 +1,3 @@
1
+ # Claude Code Agent Instructions
2
+
3
+ You are the implementation agent defined in smftools/AGENTS.md
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: smftools
3
- Version: 0.3.0
3
+ Version: 0.3.2
4
4
  Summary: Single Molecule Footprinting Analysis in Python.
5
5
  Project-URL: Source, https://github.com/jkmckenna/smftools
6
6
  Project-URL: Documentation, https://smftools.readthedocs.io/
@@ -65,16 +65,19 @@ Requires-Dist: omegaconf; extra == 'all'
65
65
  Requires-Dist: pod5>=0.1.21; extra == 'all'
66
66
  Requires-Dist: pybedtools>=0.12.0; extra == 'all'
67
67
  Requires-Dist: pybigwig>=0.3.24; extra == 'all'
68
+ Requires-Dist: pynndescent>=0.5.10; extra == 'all'
68
69
  Requires-Dist: pysam>=0.19.1; extra == 'all'
69
- Requires-Dist: scanpy>=1.9; extra == 'all'
70
70
  Requires-Dist: scikit-learn>=1.0.2; extra == 'all'
71
71
  Requires-Dist: seaborn>=0.11; extra == 'all'
72
72
  Requires-Dist: shap; extra == 'all'
73
+ Requires-Dist: tensorly; extra == 'all'
73
74
  Requires-Dist: torch>=1.9.0; extra == 'all'
75
+ Requires-Dist: umap-learn>=0.5.5; extra == 'all'
74
76
  Requires-Dist: upsetplot; extra == 'all'
75
77
  Requires-Dist: wandb; extra == 'all'
76
78
  Provides-Extra: cluster
77
79
  Requires-Dist: fastcluster; extra == 'cluster'
80
+ Requires-Dist: igraph; extra == 'cluster'
78
81
  Requires-Dist: leidenalg; extra == 'cluster'
79
82
  Provides-Extra: dev
80
83
  Requires-Dist: pre-commit; extra == 'dev'
@@ -103,6 +106,7 @@ Requires-Dist: networkx>=3.2; extra == 'misc'
103
106
  Requires-Dist: upsetplot; extra == 'misc'
104
107
  Provides-Extra: ml-base
105
108
  Requires-Dist: scikit-learn>=1.0.2; extra == 'ml-base'
109
+ Requires-Dist: tensorly; extra == 'ml-base'
106
110
  Requires-Dist: torch>=1.9.0; extra == 'ml-base'
107
111
  Provides-Extra: ml-extended
108
112
  Requires-Dist: captum; extra == 'ml-extended'
@@ -124,11 +128,11 @@ Provides-Extra: pysam
124
128
  Requires-Dist: pysam>=0.19.1; extra == 'pysam'
125
129
  Provides-Extra: qc
126
130
  Requires-Dist: multiqc; extra == 'qc'
127
- Provides-Extra: scanpy
128
- Requires-Dist: igraph; extra == 'scanpy'
129
- Requires-Dist: scanpy>=1.9; extra == 'scanpy'
130
131
  Provides-Extra: torch
131
132
  Requires-Dist: torch>=1.9.0; extra == 'torch'
133
+ Provides-Extra: umap
134
+ Requires-Dist: pynndescent>=0.5.10; extra == 'umap'
135
+ Requires-Dist: umap-learn>=0.5.5; extra == 'umap'
132
136
  Description-Content-Type: text/markdown
133
137
 
134
138
  [![PyPI](https://img.shields.io/pypi/v/smftools.svg)](https://pypi.org/project/smftools)
@@ -17,7 +17,7 @@ This command takes a user passed config file handling:
17
17
 
18
18
  ## Preprocess Usage
19
19
 
20
- This command performs preprocessing on the anndata object. It automatically runs the load command under the hood if starting from raw data.
20
+ This command performs preprocessing on the anndata object.
21
21
 
22
22
  ```shell
23
23
  smftools preprocess "/Path_to_experiment_config.csv"
@@ -25,19 +25,36 @@ smftools preprocess "/Path_to_experiment_config.csv"
25
25
 
26
26
  ![](_static/smftools_preprocessing_diagram.png)
27
27
 
28
+
29
+ ## Variant Usage
30
+
31
+ This command performs DNA sequence variation based analyses on the anndata object.
32
+
33
+ ```shell
34
+ smftools variant "/Path_to_experiment_config.csv"
35
+ ```
36
+
37
+ ## Chimeric Usage
38
+
39
+ This command performs putative PCR chimera detection on the anndata object.
40
+
41
+ ```shell
42
+ smftools chimeric "/Path_to_experiment_config.csv"
43
+ ```
44
+
28
45
  ## Spatial Usage
29
46
 
30
- This command performs spatial analysis on the anndata object. It automatically runs the load command and preprocessing under the hood if they have not been already run.
47
+ This command performs spatial analysis on the anndata object.
31
48
 
32
49
  ```shell
33
50
  smftools spatial "/Path_to_experiment_config.csv"
34
51
  ```
35
52
 
36
- - Currently Includes: Position X Position correlation matrices, clustering, dimensionality reduction, spatial autocorrelation.
53
+ - Currently Includes: Position X Position correlation matrices, read x position clustermaps, and spatial autocorrelation.
37
54
 
38
55
  ## HMM Usage
39
56
 
40
- This command performs hmm based feature annotation on the anndata object. It automatically runs the load command and preprocessing under the hood if they have not been already run.
57
+ This command performs hmm based feature annotation on the anndata object.
41
58
 
42
59
  ```shell
43
60
  smftools hmm "/Path_to_experiment_config.csv"
@@ -45,6 +62,23 @@ smftools hmm "/Path_to_experiment_config.csv"
45
62
 
46
63
  - Main outputs wills be stored in adata.layers
47
64
 
65
+
66
+ ## Latent Usage
67
+
68
+ This command constructs various latent representations of the anndata object.
69
+
70
+ ```shell
71
+ smftools latent "/Path_to_experiment_config.csv"
72
+ ```
73
+
74
+ ## Full Usage
75
+
76
+ This command is a wrapper that sequentially runs load, preprocess, variant, chimeric, spatial, hmm, latent workflows.
77
+
78
+ ```shell
79
+ smftools full "/Path_to_experiment_config.csv"
80
+ ```
81
+
48
82
  ## Batch Usage
49
83
 
50
84
  This command performs batch processing of any of the above commands across multiple experiments. It takes in a tsv, txt, or csv of experiment specific config csvs.
@@ -0,0 +1,136 @@
1
+ # Command line tutorials
2
+
3
+ ## Quick start
4
+
5
+ Most CLI workflows start with an experiment configuration CSV that points to your data, FASTA, and
6
+ output directory. Once the configuration is ready, you can run commands such as:
7
+
8
+ ```shell
9
+ smftools load /path/to/experiment_config.csv
10
+ smftools preprocess /path/to/experiment_config.csv
11
+ smftools full /path/to/experiment_config.csv
12
+ smftools batch full /path/to/config_paths.csv
13
+ ```
14
+
15
+ Each command will create (or reuse) stage-specific AnnData files in the output directory. Later
16
+ commands reuse results from earlier stages unless you explicitly force a redo via configuration
17
+ flags.
18
+
19
+ ## What each command does
20
+
21
+ ### `smftools load`
22
+
23
+ The load command builds the raw AnnData object from your raw sequencing data. It:
24
+
25
+ - Handles input formats (fast5/pod5/fastq/bam).
26
+ - Performs basecalling, alignment, demultiplexing, and BAM QC.
27
+ - Optionally generates BED/bigWig outputs for alignment summaries.
28
+ - Constructs the raw AnnData object (Single molecules x Positional coordinates).
29
+ - adata.X contains binarized modification data (conversion/deaminase), or modification probabilitiesc (native).
30
+ - Adds basic read-level QC annotations (Read start, end, length, mean quality).
31
+ - Adds layers encoding read DNA sequences, base quality scores, base mismatches.
32
+ - Maintains BAM Tags/Flags in adata.obs.
33
+ - Writes the raw AnnData to the canonical output path and runs MultiQC.
34
+ - Optionally deletes intermediate BAMs, H5ADs, and TSVs.
35
+
36
+ ### `smftools preprocess`
37
+
38
+ The preprocess command performs QC, binarization, filtering, and duplicate detection. It:
39
+
40
+ - Requires an Anndata created by smftools load.
41
+ - Loads sample sheet metadata (if provided).
42
+ - Generates read length/quality QC plots and filters reads on these metrics.
43
+ - Binarizes direct-modification calls based on thresholds (hard or fit thresholds).
44
+ - Cleans NaNs from adata.X and stores in adata.layers (nan0_0minus1, nan_half).
45
+ - Computes positional coverage and base-context annotations (GpC, CpG, ambiguous, other C, any C).
46
+ - Calculates read modification statistics and QC plots.
47
+ - Filters reads based on modification thresholds.
48
+ - Adds base-context binary modification layers.
49
+ - Optionally inverts and reindexes the data along the var (positions) axis.
50
+ - Flags duplicate reads based on nearest neighbor hamming distance of overlapping valid sites (Conversion/deamination).
51
+ - Performs complexity analyses using duplicate read clusters and Lander/Waterman fits (conversion/deamination workflows).
52
+ - Visualizes read span masks and base quality scores with clustermaps.
53
+ - Writes preprocessed (duplicates flagged, but kept) and preprocessed/deduplicated AnnData outputs.
54
+
55
+ ### `smftools variant`
56
+
57
+ The variant command focuses on DNA sequence variation analyses. It:
58
+
59
+ - Requires at least a preprocessed AnnData object.
60
+ - Calculates position level variation frequencies per reference/sample.
61
+ - Generates z-scores for variant occurance given read level Q-scores and assuming uniform Palt transitions.
62
+ - Visualizes read DNA sequence encodings and mismatch encodings.
63
+
64
+ ### `smftools chimeric`
65
+
66
+ The chimeric command is meant to find putative PCR chimeras. It:
67
+
68
+ - Requires at least a preprocessed AnnData object.
69
+ - Performs sliding window nearest neighbor hamming distance analysis per read.
70
+ - Visualizes the windowed nearest neighbor hamming distances per read.
71
+ - Assembles maximum spanning intervals of 0-hamming distance neighbors per read within the reference/sample.
72
+ - In progress.
73
+
74
+ ### `smftools spatial`
75
+
76
+ The spatial command runs downstream spatial analyses on the preprocessed data. It:
77
+
78
+ - Requires at least a preprocessed AnnData object.
79
+ - Optionally loads sample sheet metadata.
80
+ - Optionally inverts and reindexes the data along the positions axis.
81
+ - Generates clustermaps for preprocessed (and deduplicated) AnnData.
82
+ - Computes spatial autocorrelation, rolling metrics, and grid summaries.
83
+ - Generates positionwise correlation matrices.
84
+ - Writes the spatial AnnData output.
85
+
86
+ ### `smftools hmm`
87
+
88
+ The hmm command adds HMM-based feature annotation and summary plots. It:
89
+
90
+ - Requires at least a preprocessed AnnData object.
91
+ - Fits or reuses HMM models for configured feature sets.
92
+ - Annotates AnnData with HMM-derived feature layers (State layers and probability layers)
93
+ - Calls HMM feature peaks and writes peak-calling outputs.
94
+ - Generates clustermaps, bulk feature traces, and fragment size distribution plots for HMM layers.
95
+ - Writes the HMM AnnData output.
96
+
97
+ ### `smftools latent`
98
+
99
+ The latent command constructs latent representations of the data. It:
100
+
101
+ - Requires at least a preprocessed AnnData object.
102
+ - Runs various dimensionality reduction and graph construction modalities:
103
+ - Principle component analysis (PCA)
104
+ - K-nearest neighbor (KNN)
105
+ - Uniform manifold approximation and projection (UMAP)
106
+ - Non-negative matrix factorization (NMF)
107
+ - Canonical polyadic decomposition (PARAFAC)
108
+
109
+ ### `smftools full`
110
+
111
+ The full command is a workflow wrapper. It runs the following sequentially:
112
+
113
+ - Load / preprocess / variant / chimeric / spatial / hmm / latent.
114
+
115
+
116
+ ## Batch processing
117
+
118
+ Use the batch command to run a single task across multiple experiments.
119
+
120
+ ```shell
121
+ smftools batch preprocess /path/to/config_paths.csv
122
+ ```
123
+
124
+ The batch command accepts:
125
+
126
+ - **CSV/TSV** tables with a column of config paths (default column name: `config_path`).
127
+ - **TXT** files with one config path per line.
128
+
129
+ You can override the column name or delimiter if needed:
130
+
131
+ ```shell
132
+ smftools batch spatial /path/to/configs.tsv --column my_config --sep $'\t'
133
+ ```
134
+
135
+ Each path is validated; missing configs are skipped with a message, while valid configs run the
136
+ requested task in sequence.
@@ -86,6 +86,7 @@ docs = [
86
86
 
87
87
  cluster = [
88
88
  "fastcluster",
89
+ "igraph",
89
90
  "leidenalg",
90
91
  ]
91
92
 
@@ -117,6 +118,7 @@ pysam = [
117
118
 
118
119
  ml-base = [
119
120
  "scikit-learn>=1.0.2",
121
+ "tensorly",
120
122
  "torch>=1.9.0",
121
123
  ]
122
124
 
@@ -133,9 +135,9 @@ qc = [
133
135
  "multiqc",
134
136
  ]
135
137
 
136
- scanpy = [
137
- "igraph",
138
- "scanpy>=1.9",
138
+ umap = [
139
+ "pynndescent>=0.5.10",
140
+ "umap-learn>=0.5.5",
139
141
  ]
140
142
 
141
143
  torch = [
@@ -145,6 +147,7 @@ torch = [
145
147
  all = [
146
148
  # cluster
147
149
  "fastcluster",
150
+ "igraph",
148
151
  "leidenalg",
149
152
 
150
153
  # informatics
@@ -170,12 +173,13 @@ all = [
170
173
  "omegaconf",
171
174
  "scikit-learn>=1.0.2",
172
175
  "shap",
176
+ "tensorly",
173
177
  "torch>=1.9.0",
174
178
  "wandb",
175
179
 
176
- # scanpy
177
- "igraph",
178
- "scanpy>=1.9",
180
+ # umap
181
+ "pynndescent>=0.5.10",
182
+ "umap-learn>=0.5.5",
179
183
 
180
184
  # qc
181
185
  "multiqc",
@@ -229,4 +233,4 @@ line-ending = "lf"
229
233
 
230
234
  [tool.ruff.lint]
231
235
  select = ["E", "F", "I", "B", "UP", "SIM", "ISC"]
232
- ignore = ["E501", "F", "B", "UP","SIM"]
236
+ ignore = ["E501", "F", "B", "UP","SIM"]
@@ -20,7 +20,8 @@ pod5>=0.1.21
20
20
  pybedtools>=0.12.0
21
21
  pyBigWig>=0.3.24
22
22
  pysam>=0.19.1
23
- scanpy>=1.11
23
+ umap-learn>=0.5.5
24
+ pynndescent>=0.5.10
24
25
 
25
26
  # ML / modeling
26
27
  captum
@@ -29,6 +30,7 @@ lightning
29
30
  omegaconf
30
31
  scikit-learn>=1.0.2
31
32
  shap
33
+ tensorly
32
34
  torch>=1.9.0
33
35
  wandb
34
36
 
@@ -1,3 +1,3 @@
1
1
  from __future__ import annotations
2
2
 
3
- __version__ = "0.3.0"
3
+ __version__ = "0.3.2"