smftools 0.3.0__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (304) hide show
  1. {smftools-0.3.0 → smftools-0.3.1}/PKG-INFO +8 -6
  2. {smftools-0.3.0 → smftools-0.3.1}/pyproject.toml +9 -7
  3. {smftools-0.3.0 → smftools-0.3.1}/requirements.txt +3 -1
  4. {smftools-0.3.0 → smftools-0.3.1}/smftools/_version.py +1 -1
  5. smftools-0.3.1/smftools/cli/helpers.py +82 -0
  6. {smftools-0.3.0 → smftools-0.3.1}/smftools/cli/hmm_adata.py +232 -31
  7. smftools-0.3.1/smftools/cli/latent_adata.py +318 -0
  8. {smftools-0.3.0 → smftools-0.3.1}/smftools/cli/load_adata.py +77 -73
  9. {smftools-0.3.0 → smftools-0.3.1}/smftools/cli/preprocess_adata.py +178 -53
  10. {smftools-0.3.0 → smftools-0.3.1}/smftools/cli/spatial_adata.py +149 -101
  11. {smftools-0.3.0 → smftools-0.3.1}/smftools/cli_entry.py +12 -0
  12. {smftools-0.3.0 → smftools-0.3.1}/smftools/config/conversion.yaml +11 -1
  13. {smftools-0.3.0 → smftools-0.3.1}/smftools/config/default.yaml +38 -1
  14. {smftools-0.3.0 → smftools-0.3.1}/smftools/config/experiment_config.py +53 -1
  15. smftools-0.3.1/smftools/constants.py +102 -0
  16. {smftools-0.3.0 → smftools-0.3.1}/smftools/hmm/HMM.py +88 -0
  17. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/__init__.py +6 -0
  18. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/bam_functions.py +358 -8
  19. smftools-0.3.1/smftools/informatics/converted_BAM_to_adata.py +1125 -0
  20. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/h5ad_functions.py +115 -2
  21. smftools-0.3.1/smftools/informatics/modkit_extract_to_adata.py +1897 -0
  22. smftools-0.3.1/smftools/informatics/sequence_encoding.py +72 -0
  23. {smftools-0.3.0 → smftools-0.3.1}/smftools/logging_utils.py +21 -2
  24. {smftools-0.3.0 → smftools-0.3.1}/smftools/metadata.py +1 -1
  25. {smftools-0.3.0 → smftools-0.3.1}/smftools/plotting/__init__.py +9 -0
  26. smftools-0.3.1/smftools/plotting/general_plotting.py +3368 -0
  27. {smftools-0.3.0 → smftools-0.3.1}/smftools/plotting/hmm_plotting.py +85 -7
  28. {smftools-0.3.0 → smftools-0.3.1}/smftools/preprocessing/__init__.py +1 -0
  29. {smftools-0.3.0 → smftools-0.3.1}/smftools/preprocessing/append_base_context.py +17 -17
  30. smftools-0.3.1/smftools/preprocessing/append_mismatch_frequency_sites.py +158 -0
  31. {smftools-0.3.0 → smftools-0.3.1}/smftools/preprocessing/calculate_consensus.py +1 -1
  32. {smftools-0.3.0 → smftools-0.3.1}/smftools/preprocessing/calculate_read_modification_stats.py +6 -1
  33. {smftools-0.3.0 → smftools-0.3.1}/smftools/readwrite.py +53 -17
  34. {smftools-0.3.0 → smftools-0.3.1}/smftools/schema/anndata_schema_v1.yaml +15 -1
  35. {smftools-0.3.0 → smftools-0.3.1}/smftools/tools/__init__.py +4 -0
  36. smftools-0.3.1/smftools/tools/calculate_leiden.py +57 -0
  37. smftools-0.3.1/smftools/tools/calculate_nmf.py +119 -0
  38. {smftools-0.3.0 → smftools-0.3.1}/smftools/tools/calculate_umap.py +91 -8
  39. smftools-0.3.1/smftools/tools/rolling_nn_distance.py +235 -0
  40. smftools-0.3.1/smftools/tools/tensor_factorization.py +169 -0
  41. {smftools-0.3.0 → smftools-0.3.1}/tests/smoke/tools/test_tools_imports.py +2 -0
  42. smftools-0.3.1/tests/unit/hmm/test_mask_read_span.py +43 -0
  43. smftools-0.3.1/tests/unit/informatics/test_bam_base_identities.py +81 -0
  44. smftools-0.3.1/tests/unit/informatics/test_bam_read_tags.py +50 -0
  45. smftools-0.3.1/tests/unit/informatics/test_bam_secondary_supplementary.py +125 -0
  46. smftools-0.3.1/tests/unit/informatics/test_h5ad_secondary_supplementary.py +33 -0
  47. smftools-0.3.1/tests/unit/informatics/test_modkit_sequence_batch_files.py +43 -0
  48. smftools-0.3.1/tests/unit/informatics/test_modkit_sequence_encoding.py +49 -0
  49. {smftools-0.3.0 → smftools-0.3.1}/tests/unit/informatics/test_tool_backends.py +4 -2
  50. smftools-0.3.1/tests/unit/test_append_mismatch_frequency_sites.py +62 -0
  51. smftools-0.3.1/tests/unit/test_calculate_nmf.py +29 -0
  52. smftools-0.3.1/tests/unit/test_combined_hmm_length_clustermap_barplot.py +46 -0
  53. smftools-0.3.1/tests/unit/test_combined_hmm_length_clustermap_outputs.py +41 -0
  54. smftools-0.3.1/tests/unit/test_combined_hmm_raw_clustermap_nan_fill.py +43 -0
  55. smftools-0.3.1/tests/unit/test_combined_raw_clustermap_barplot_nan_ignore.py +91 -0
  56. smftools-0.3.1/tests/unit/test_combined_raw_clustermap_nan_fill.py +43 -0
  57. smftools-0.3.1/tests/unit/test_hmm_clustermap_colormap.py +18 -0
  58. smftools-0.3.1/tests/unit/test_length_layer_subclass_mapping.py +28 -0
  59. smftools-0.3.1/tests/unit/test_methylation_fraction_nan_handling.py +36 -0
  60. smftools-0.3.1/tests/unit/test_plot_cp_sequence_components.py +38 -0
  61. smftools-0.3.1/tests/unit/test_plot_hmm_size_contours_feature_ranges.py +41 -0
  62. smftools-0.3.1/tests/unit/test_plot_hmm_size_contours_nan_values.py +35 -0
  63. smftools-0.3.1/tests/unit/test_plot_nmf_components.py +26 -0
  64. smftools-0.3.1/tests/unit/test_plot_read_span_quality_clustermaps.py +35 -0
  65. smftools-0.3.1/tests/unit/test_plot_rolling_nn_and_layer.py +61 -0
  66. smftools-0.3.1/tests/unit/test_plot_sequence_integer_encoding_clustermaps.py +40 -0
  67. smftools-0.3.1/tests/unit/test_readwrite.py +36 -0
  68. smftools-0.3.1/tests/unit/test_rolling_nn_distance.py +232 -0
  69. smftools-0.3.1/tests/unit/test_tensor_factorization.py +65 -0
  70. smftools-0.3.1/tests/unit/tools/test_calculate_umap.py +74 -0
  71. smftools-0.3.0/smftools/cli/helpers.py +0 -56
  72. smftools-0.3.0/smftools/constants.py +0 -37
  73. smftools-0.3.0/smftools/informatics/converted_BAM_to_adata.py +0 -704
  74. smftools-0.3.0/smftools/informatics/modkit_extract_to_adata.py +0 -1319
  75. smftools-0.3.0/smftools/plotting/general_plotting.py +0 -1585
  76. smftools-0.3.0/tests/unit/test_readwrite.py +0 -7
  77. {smftools-0.3.0 → smftools-0.3.1}/.gitattributes +0 -0
  78. {smftools-0.3.0 → smftools-0.3.1}/.github/workflows/ci.yml +0 -0
  79. {smftools-0.3.0 → smftools-0.3.1}/.gitignore +0 -0
  80. {smftools-0.3.0 → smftools-0.3.1}/.pre-commit-config.yaml +0 -0
  81. {smftools-0.3.0 → smftools-0.3.1}/.readthedocs.yaml +0 -0
  82. {smftools-0.3.0 → smftools-0.3.1}/AGENTS.md +0 -0
  83. {smftools-0.3.0 → smftools-0.3.1}/CONTRIBUTING.md +0 -0
  84. {smftools-0.3.0 → smftools-0.3.1}/LICENSE +0 -0
  85. {smftools-0.3.0 → smftools-0.3.1}/README.md +0 -0
  86. {smftools-0.3.0 → smftools-0.3.1}/docs/Makefile +0 -0
  87. {smftools-0.3.0 → smftools-0.3.1}/docs/make.bat +0 -0
  88. {smftools-0.3.0 → smftools-0.3.1}/docs/source/_static/converted_BAM_to_adata.png +0 -0
  89. {smftools-0.3.0 → smftools-0.3.1}/docs/source/_static/modkit_extract_to_adata.png +0 -0
  90. {smftools-0.3.0 → smftools-0.3.1}/docs/source/_static/smftools-1.svg +0 -0
  91. {smftools-0.3.0 → smftools-0.3.1}/docs/source/_static/smftools-1.tif +0 -0
  92. {smftools-0.3.0 → smftools-0.3.1}/docs/source/_static/smftools_informatics_diagram.pdf +0 -0
  93. {smftools-0.3.0 → smftools-0.3.1}/docs/source/_static/smftools_informatics_diagram.png +0 -0
  94. {smftools-0.3.0 → smftools-0.3.1}/docs/source/_static/smftools_preprocessing_diagram.png +0 -0
  95. {smftools-0.3.0 → smftools-0.3.1}/docs/source/_templates/tmp +0 -0
  96. {smftools-0.3.0 → smftools-0.3.1}/docs/source/api/datasets.md +0 -0
  97. {smftools-0.3.0 → smftools-0.3.1}/docs/source/api/index.md +0 -0
  98. {smftools-0.3.0 → smftools-0.3.1}/docs/source/api/informatics.md +0 -0
  99. {smftools-0.3.0 → smftools-0.3.1}/docs/source/api/preprocessing.md +0 -0
  100. {smftools-0.3.0 → smftools-0.3.1}/docs/source/api/tools.md +0 -0
  101. {smftools-0.3.0 → smftools-0.3.1}/docs/source/basic_usage.md +0 -0
  102. {smftools-0.3.0 → smftools-0.3.1}/docs/source/cli.md +0 -0
  103. {smftools-0.3.0 → smftools-0.3.1}/docs/source/conf.py +0 -0
  104. {smftools-0.3.0 → smftools-0.3.1}/docs/source/contributors.md +0 -0
  105. {smftools-0.3.0 → smftools-0.3.1}/docs/source/dev/index.md +0 -0
  106. {smftools-0.3.0 → smftools-0.3.1}/docs/source/index.md +0 -0
  107. {smftools-0.3.0 → smftools-0.3.1}/docs/source/installation.md +0 -0
  108. {smftools-0.3.0 → smftools-0.3.1}/docs/source/references.bib +0 -0
  109. {smftools-0.3.0 → smftools-0.3.1}/docs/source/references.rst +0 -0
  110. {smftools-0.3.0 → smftools-0.3.1}/docs/source/release-notes/0.1.0.md +0 -0
  111. {smftools-0.3.0 → smftools-0.3.1}/docs/source/release-notes/0.1.1.md +0 -0
  112. {smftools-0.3.0 → smftools-0.3.1}/docs/source/release-notes/0.1.6.md +0 -0
  113. {smftools-0.3.0 → smftools-0.3.1}/docs/source/release-notes/0.2.1.md +0 -0
  114. {smftools-0.3.0 → smftools-0.3.1}/docs/source/release-notes/0.2.3.md +0 -0
  115. {smftools-0.3.0 → smftools-0.3.1}/docs/source/release-notes/0.3.0.md +0 -0
  116. {smftools-0.3.0 → smftools-0.3.1}/docs/source/release-notes/index.md +0 -0
  117. {smftools-0.3.0 → smftools-0.3.1}/docs/source/requirements.txt +0 -0
  118. {smftools-0.3.0 → smftools-0.3.1}/docs/source/schema/anndata_schema.md +0 -0
  119. {smftools-0.3.0 → smftools-0.3.1}/docs/source/tutorials/cli_usage.md +0 -0
  120. {smftools-0.3.0 → smftools-0.3.1}/docs/source/tutorials/experiment_config.md +0 -0
  121. {smftools-0.3.0 → smftools-0.3.1}/docs/source/tutorials/index.md +0 -0
  122. {smftools-0.3.0 → smftools-0.3.1}/experiment_config.csv +0 -0
  123. {smftools-0.3.0 → smftools-0.3.1}/notebooks/Kissiov_and_McKenna_2025_example_notebook.ipynb +0 -0
  124. {smftools-0.3.0 → smftools-0.3.1}/notebooks/Kissiov_and_McKenna_2025_sample_sheet.csv +0 -0
  125. {smftools-0.3.0 → smftools-0.3.1}/sample_sheet.csv +0 -0
  126. {smftools-0.3.0 → smftools-0.3.1}/smftools/__init__.py +0 -0
  127. {smftools-0.3.0 → smftools-0.3.1}/smftools/_settings.py +0 -0
  128. {smftools-0.3.0 → smftools-0.3.1}/smftools/cli/__init__.py +0 -0
  129. {smftools-0.3.0 → smftools-0.3.1}/smftools/cli/archived/cli_flows.py +0 -0
  130. {smftools-0.3.0 → smftools-0.3.1}/smftools/config/__init__.py +0 -0
  131. {smftools-0.3.0 → smftools-0.3.1}/smftools/config/deaminase.yaml +0 -0
  132. {smftools-0.3.0 → smftools-0.3.1}/smftools/config/direct.yaml +0 -0
  133. {smftools-0.3.0 → smftools-0.3.1}/smftools/config/discover_input_files.py +0 -0
  134. {smftools-0.3.0 → smftools-0.3.1}/smftools/datasets/F1_hybrid_NKG2A_enhander_promoter_GpC_conversion_SMF.h5ad.gz +0 -0
  135. {smftools-0.3.0 → smftools-0.3.1}/smftools/datasets/F1_sample_sheet.csv +0 -0
  136. {smftools-0.3.0 → smftools-0.3.1}/smftools/datasets/__init__.py +0 -0
  137. {smftools-0.3.0 → smftools-0.3.1}/smftools/datasets/dCas9_m6A_invitro_kinetics.h5ad.gz +0 -0
  138. {smftools-0.3.0 → smftools-0.3.1}/smftools/datasets/datasets.py +0 -0
  139. {smftools-0.3.0 → smftools-0.3.1}/smftools/hmm/__init__.py +0 -0
  140. {smftools-0.3.0 → smftools-0.3.1}/smftools/hmm/archived/apply_hmm_batched.py +0 -0
  141. {smftools-0.3.0 → smftools-0.3.1}/smftools/hmm/archived/calculate_distances.py +0 -0
  142. {smftools-0.3.0 → smftools-0.3.1}/smftools/hmm/archived/call_hmm_peaks.py +0 -0
  143. {smftools-0.3.0 → smftools-0.3.1}/smftools/hmm/archived/train_hmm.py +0 -0
  144. {smftools-0.3.0 → smftools-0.3.1}/smftools/hmm/call_hmm_peaks.py +0 -0
  145. {smftools-0.3.0 → smftools-0.3.1}/smftools/hmm/display_hmm.py +0 -0
  146. {smftools-0.3.0 → smftools-0.3.1}/smftools/hmm/hmm_readwrite.py +0 -0
  147. {smftools-0.3.0 → smftools-0.3.1}/smftools/hmm/nucleosome_hmm_refinement.py +0 -0
  148. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/bam_conversion.py +0 -0
  149. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/bam_direct.py +0 -0
  150. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/basecall_pod5s.py +0 -0
  151. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/basecalls_to_adata.py +0 -0
  152. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/conversion_smf.py +0 -0
  153. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/deaminase_smf.py +0 -0
  154. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/direct_smf.py +0 -0
  155. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/fast5_to_pod5.py +0 -0
  156. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/helpers/archived/__init__.py +0 -0
  157. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py +0 -0
  158. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/helpers/archived/aligned_BAM_to_bed.py +0 -0
  159. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/helpers/archived/bam_qc.py +0 -0
  160. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/helpers/archived/bed_to_bigwig.py +0 -0
  161. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/helpers/archived/canoncall.py +0 -0
  162. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/helpers/archived/concatenate_fastqs_to_bam.py +0 -0
  163. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/helpers/archived/converted_BAM_to_adata.py +0 -0
  164. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/helpers/archived/count_aligned_reads.py +0 -0
  165. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/helpers/archived/demux_and_index_BAM.py +0 -0
  166. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/helpers/archived/extract_base_identities.py +0 -0
  167. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/helpers/archived/extract_mods.py +0 -0
  168. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/helpers/archived/extract_read_features_from_bam.py +0 -0
  169. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/helpers/archived/extract_read_lengths_from_bed.py +0 -0
  170. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/helpers/archived/extract_readnames_from_BAM.py +0 -0
  171. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/helpers/archived/find_conversion_sites.py +0 -0
  172. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/helpers/archived/generate_converted_FASTA.py +0 -0
  173. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/helpers/archived/get_chromosome_lengths.py +0 -0
  174. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/helpers/archived/get_native_references.py +0 -0
  175. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/helpers/archived/index_fasta.py +0 -0
  176. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/helpers/archived/informatics.py +0 -0
  177. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/helpers/archived/load_adata.py +0 -0
  178. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/helpers/archived/make_modbed.py +0 -0
  179. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/helpers/archived/modQC.py +0 -0
  180. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/helpers/archived/modcall.py +0 -0
  181. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/helpers/archived/ohe_batching.py +0 -0
  182. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/helpers/archived/ohe_layers_decode.py +0 -0
  183. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/helpers/archived/one_hot_decode.py +0 -0
  184. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/helpers/archived/one_hot_encode.py +0 -0
  185. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/helpers/archived/plot_bed_histograms.py +0 -0
  186. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/helpers/archived/separate_bam_by_bc.py +0 -0
  187. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/helpers/archived/split_and_index_BAM.py +0 -0
  188. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/print_bam_query_seq.py +0 -0
  189. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/subsample_fasta_from_bed.py +0 -0
  190. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/archived/subsample_pod5.py +0 -0
  191. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/basecalling.py +0 -0
  192. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/bed_functions.py +0 -0
  193. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/binarize_converted_base_identities.py +0 -0
  194. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/complement_base_list.py +0 -0
  195. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/fasta_functions.py +0 -0
  196. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/modkit_functions.py +0 -0
  197. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/ohe.py +0 -0
  198. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/pod5_functions.py +0 -0
  199. {smftools-0.3.0 → smftools-0.3.1}/smftools/informatics/run_multiqc.py +0 -0
  200. {smftools-0.3.0 → smftools-0.3.1}/smftools/machine_learning/__init__.py +0 -0
  201. {smftools-0.3.0 → smftools-0.3.1}/smftools/machine_learning/data/__init__.py +0 -0
  202. {smftools-0.3.0 → smftools-0.3.1}/smftools/machine_learning/data/anndata_data_module.py +0 -0
  203. {smftools-0.3.0 → smftools-0.3.1}/smftools/machine_learning/data/preprocessing.py +0 -0
  204. {smftools-0.3.0 → smftools-0.3.1}/smftools/machine_learning/evaluation/__init__.py +0 -0
  205. {smftools-0.3.0 → smftools-0.3.1}/smftools/machine_learning/evaluation/eval_utils.py +0 -0
  206. {smftools-0.3.0 → smftools-0.3.1}/smftools/machine_learning/evaluation/evaluators.py +0 -0
  207. {smftools-0.3.0 → smftools-0.3.1}/smftools/machine_learning/inference/__init__.py +0 -0
  208. {smftools-0.3.0 → smftools-0.3.1}/smftools/machine_learning/inference/inference_utils.py +0 -0
  209. {smftools-0.3.0 → smftools-0.3.1}/smftools/machine_learning/inference/lightning_inference.py +0 -0
  210. {smftools-0.3.0 → smftools-0.3.1}/smftools/machine_learning/inference/sklearn_inference.py +0 -0
  211. {smftools-0.3.0 → smftools-0.3.1}/smftools/machine_learning/inference/sliding_window_inference.py +0 -0
  212. {smftools-0.3.0 → smftools-0.3.1}/smftools/machine_learning/models/__init__.py +0 -0
  213. {smftools-0.3.0 → smftools-0.3.1}/smftools/machine_learning/models/base.py +0 -0
  214. {smftools-0.3.0 → smftools-0.3.1}/smftools/machine_learning/models/cnn.py +0 -0
  215. {smftools-0.3.0 → smftools-0.3.1}/smftools/machine_learning/models/lightning_base.py +0 -0
  216. {smftools-0.3.0 → smftools-0.3.1}/smftools/machine_learning/models/mlp.py +0 -0
  217. {smftools-0.3.0 → smftools-0.3.1}/smftools/machine_learning/models/positional.py +0 -0
  218. {smftools-0.3.0 → smftools-0.3.1}/smftools/machine_learning/models/rnn.py +0 -0
  219. {smftools-0.3.0 → smftools-0.3.1}/smftools/machine_learning/models/sklearn_models.py +0 -0
  220. {smftools-0.3.0 → smftools-0.3.1}/smftools/machine_learning/models/transformer.py +0 -0
  221. {smftools-0.3.0 → smftools-0.3.1}/smftools/machine_learning/models/wrappers.py +0 -0
  222. {smftools-0.3.0 → smftools-0.3.1}/smftools/machine_learning/training/__init__.py +0 -0
  223. {smftools-0.3.0 → smftools-0.3.1}/smftools/machine_learning/training/train_lightning_model.py +0 -0
  224. {smftools-0.3.0 → smftools-0.3.1}/smftools/machine_learning/training/train_sklearn_model.py +0 -0
  225. {smftools-0.3.0 → smftools-0.3.1}/smftools/machine_learning/utils/__init__.py +0 -0
  226. {smftools-0.3.0 → smftools-0.3.1}/smftools/machine_learning/utils/device.py +0 -0
  227. {smftools-0.3.0 → smftools-0.3.1}/smftools/machine_learning/utils/grl.py +0 -0
  228. {smftools-0.3.0 → smftools-0.3.1}/smftools/optional_imports.py +0 -0
  229. {smftools-0.3.0 → smftools-0.3.1}/smftools/plotting/autocorrelation_plotting.py +0 -0
  230. {smftools-0.3.0 → smftools-0.3.1}/smftools/plotting/classifiers.py +0 -0
  231. {smftools-0.3.0 → smftools-0.3.1}/smftools/plotting/position_stats.py +0 -0
  232. {smftools-0.3.0 → smftools-0.3.1}/smftools/plotting/qc_plotting.py +0 -0
  233. {smftools-0.3.0 → smftools-0.3.1}/smftools/preprocessing/append_binary_layer_by_base_context.py +0 -0
  234. {smftools-0.3.0 → smftools-0.3.1}/smftools/preprocessing/archived/add_read_length_and_mapping_qc.py +0 -0
  235. {smftools-0.3.0 → smftools-0.3.1}/smftools/preprocessing/archived/calculate_complexity.py +0 -0
  236. {smftools-0.3.0 → smftools-0.3.1}/smftools/preprocessing/archived/mark_duplicates.py +0 -0
  237. {smftools-0.3.0 → smftools-0.3.1}/smftools/preprocessing/archived/preprocessing.py +0 -0
  238. {smftools-0.3.0 → smftools-0.3.1}/smftools/preprocessing/archived/remove_duplicates.py +0 -0
  239. {smftools-0.3.0 → smftools-0.3.1}/smftools/preprocessing/binarize.py +0 -0
  240. {smftools-0.3.0 → smftools-0.3.1}/smftools/preprocessing/binarize_on_Youden.py +0 -0
  241. {smftools-0.3.0 → smftools-0.3.1}/smftools/preprocessing/binary_layers_to_ohe.py +0 -0
  242. {smftools-0.3.0 → smftools-0.3.1}/smftools/preprocessing/calculate_complexity_II.py +0 -0
  243. {smftools-0.3.0 → smftools-0.3.1}/smftools/preprocessing/calculate_coverage.py +0 -0
  244. {smftools-0.3.0 → smftools-0.3.1}/smftools/preprocessing/calculate_pairwise_differences.py +0 -0
  245. {smftools-0.3.0 → smftools-0.3.1}/smftools/preprocessing/calculate_pairwise_hamming_distances.py +0 -0
  246. {smftools-0.3.0 → smftools-0.3.1}/smftools/preprocessing/calculate_position_Youden.py +0 -0
  247. {smftools-0.3.0 → smftools-0.3.1}/smftools/preprocessing/calculate_read_length_stats.py +0 -0
  248. {smftools-0.3.0 → smftools-0.3.1}/smftools/preprocessing/clean_NaN.py +0 -0
  249. {smftools-0.3.0 → smftools-0.3.1}/smftools/preprocessing/filter_adata_by_nan_proportion.py +0 -0
  250. {smftools-0.3.0 → smftools-0.3.1}/smftools/preprocessing/filter_reads_on_length_quality_mapping.py +0 -0
  251. {smftools-0.3.0 → smftools-0.3.1}/smftools/preprocessing/filter_reads_on_modification_thresholds.py +0 -0
  252. {smftools-0.3.0 → smftools-0.3.1}/smftools/preprocessing/flag_duplicate_reads.py +0 -0
  253. {smftools-0.3.0 → smftools-0.3.1}/smftools/preprocessing/invert_adata.py +0 -0
  254. {smftools-0.3.0 → smftools-0.3.1}/smftools/preprocessing/load_sample_sheet.py +0 -0
  255. {smftools-0.3.0 → smftools-0.3.1}/smftools/preprocessing/make_dirs.py +0 -0
  256. {smftools-0.3.0 → smftools-0.3.1}/smftools/preprocessing/min_non_diagonal.py +0 -0
  257. {smftools-0.3.0 → smftools-0.3.1}/smftools/preprocessing/recipes.py +0 -0
  258. {smftools-0.3.0 → smftools-0.3.1}/smftools/preprocessing/reindex_references_adata.py +0 -0
  259. {smftools-0.3.0 → smftools-0.3.1}/smftools/preprocessing/subsample_adata.py +0 -0
  260. {smftools-0.3.0 → smftools-0.3.1}/smftools/schema/__init__.py +0 -0
  261. {smftools-0.3.0 → smftools-0.3.1}/smftools/tools/archived/apply_hmm.py +0 -0
  262. {smftools-0.3.0 → smftools-0.3.1}/smftools/tools/archived/classifiers.py +0 -0
  263. {smftools-0.3.0 → smftools-0.3.1}/smftools/tools/archived/classify_methylated_features.py +0 -0
  264. {smftools-0.3.0 → smftools-0.3.1}/smftools/tools/archived/classify_non_methylated_features.py +0 -0
  265. {smftools-0.3.0 → smftools-0.3.1}/smftools/tools/archived/subset_adata_v1.py +0 -0
  266. {smftools-0.3.0 → smftools-0.3.1}/smftools/tools/archived/subset_adata_v2.py +0 -0
  267. {smftools-0.3.0 → smftools-0.3.1}/smftools/tools/cluster_adata_on_methylation.py +0 -0
  268. {smftools-0.3.0 → smftools-0.3.1}/smftools/tools/general_tools.py +0 -0
  269. {smftools-0.3.0 → smftools-0.3.1}/smftools/tools/position_stats.py +0 -0
  270. {smftools-0.3.0 → smftools-0.3.1}/smftools/tools/read_stats.py +0 -0
  271. {smftools-0.3.0 → smftools-0.3.1}/smftools/tools/spatial_autocorrelation.py +0 -0
  272. {smftools-0.3.0 → smftools-0.3.1}/smftools/tools/subset_adata.py +0 -0
  273. {smftools-0.3.0 → smftools-0.3.1}/tests/__init__.py +0 -0
  274. {smftools-0.3.0 → smftools-0.3.1}/tests/_test_inputs/_test_bed_I.bed +0 -0
  275. {smftools-0.3.0 → smftools-0.3.1}/tests/_test_inputs/_test_fasta_I.fa +0 -0
  276. {smftools-0.3.0 → smftools-0.3.1}/tests/_test_inputs/_test_fasta_I.fa.fai +0 -0
  277. {smftools-0.3.0 → smftools-0.3.1}/tests/_test_inputs/_test_pod5_I.pod5 +0 -0
  278. {smftools-0.3.0 → smftools-0.3.1}/tests/_test_inputs/test_experiment_config_conversion_I.csv +0 -0
  279. {smftools-0.3.0 → smftools-0.3.1}/tests/_test_inputs/test_experiment_config_deaminase_I.csv +0 -0
  280. {smftools-0.3.0 → smftools-0.3.1}/tests/_test_inputs/test_experiment_config_direct_I.csv +0 -0
  281. {smftools-0.3.0 → smftools-0.3.1}/tests/e2e/__init__.py +0 -0
  282. {smftools-0.3.0 → smftools-0.3.1}/tests/e2e/cli/test_load_adata.py +0 -0
  283. {smftools-0.3.0 → smftools-0.3.1}/tests/e2e/cli/test_spatial_adata.py +0 -0
  284. {smftools-0.3.0 → smftools-0.3.1}/tests/integration/__init__.py +0 -0
  285. {smftools-0.3.0 → smftools-0.3.1}/tests/smoke/__init__.py +0 -0
  286. {smftools-0.3.0 → smftools-0.3.1}/tests/smoke/cli/test_cli_imports.py +0 -0
  287. {smftools-0.3.0 → smftools-0.3.1}/tests/smoke/config/test_config_imports.py +0 -0
  288. {smftools-0.3.0 → smftools-0.3.1}/tests/smoke/datasets/test_datasets_imports.py +0 -0
  289. {smftools-0.3.0 → smftools-0.3.1}/tests/smoke/hmm/test_hmm_imports.py +0 -0
  290. {smftools-0.3.0 → smftools-0.3.1}/tests/smoke/import_helpers.py +0 -0
  291. {smftools-0.3.0 → smftools-0.3.1}/tests/smoke/informatics/test_informatics_imports.py +0 -0
  292. {smftools-0.3.0 → smftools-0.3.1}/tests/smoke/machine_learning/data/test_data_imports.py +0 -0
  293. {smftools-0.3.0 → smftools-0.3.1}/tests/smoke/machine_learning/evaluation/test_evaluation_imports.py +0 -0
  294. {smftools-0.3.0 → smftools-0.3.1}/tests/smoke/machine_learning/inference/test_inference_imports.py +0 -0
  295. {smftools-0.3.0 → smftools-0.3.1}/tests/smoke/machine_learning/models/test_models_imports.py +0 -0
  296. {smftools-0.3.0 → smftools-0.3.1}/tests/smoke/machine_learning/training/test_training_imports.py +0 -0
  297. {smftools-0.3.0 → smftools-0.3.1}/tests/smoke/machine_learning/utils/test_utils_imports.py +0 -0
  298. {smftools-0.3.0 → smftools-0.3.1}/tests/smoke/plotting/test_plotting_imports.py +0 -0
  299. {smftools-0.3.0 → smftools-0.3.1}/tests/smoke/preprocessing/test_preprocessing_imports.py +0 -0
  300. {smftools-0.3.0 → smftools-0.3.1}/tests/smoke/test_smftools_imports.py +0 -0
  301. {smftools-0.3.0 → smftools-0.3.1}/tests/unit/__init__.py +0 -0
  302. {smftools-0.3.0 → smftools-0.3.1}/tests/unit/config/test_LoadExperimentConfig.py +0 -0
  303. {smftools-0.3.0 → smftools-0.3.1}/tests/unit/datasets/test_datasets.py +0 -0
  304. {smftools-0.3.0 → smftools-0.3.1}/tests/unit/test_metadata.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: smftools
3
- Version: 0.3.0
3
+ Version: 0.3.1
4
4
  Summary: Single Molecule Footprinting Analysis in Python.
5
5
  Project-URL: Source, https://github.com/jkmckenna/smftools
6
6
  Project-URL: Documentation, https://smftools.readthedocs.io/
@@ -55,7 +55,6 @@ Provides-Extra: all
55
55
  Requires-Dist: captum; extra == 'all'
56
56
  Requires-Dist: fastcluster; extra == 'all'
57
57
  Requires-Dist: hydra-core; extra == 'all'
58
- Requires-Dist: igraph; extra == 'all'
59
58
  Requires-Dist: leidenalg; extra == 'all'
60
59
  Requires-Dist: lightning; extra == 'all'
61
60
  Requires-Dist: matplotlib>=3.6.2; extra == 'all'
@@ -65,16 +64,18 @@ Requires-Dist: omegaconf; extra == 'all'
65
64
  Requires-Dist: pod5>=0.1.21; extra == 'all'
66
65
  Requires-Dist: pybedtools>=0.12.0; extra == 'all'
67
66
  Requires-Dist: pybigwig>=0.3.24; extra == 'all'
67
+ Requires-Dist: pynndescent>=0.5.10; extra == 'all'
68
68
  Requires-Dist: pysam>=0.19.1; extra == 'all'
69
- Requires-Dist: scanpy>=1.9; extra == 'all'
70
69
  Requires-Dist: scikit-learn>=1.0.2; extra == 'all'
71
70
  Requires-Dist: seaborn>=0.11; extra == 'all'
72
71
  Requires-Dist: shap; extra == 'all'
73
72
  Requires-Dist: torch>=1.9.0; extra == 'all'
73
+ Requires-Dist: umap-learn>=0.5.5; extra == 'all'
74
74
  Requires-Dist: upsetplot; extra == 'all'
75
75
  Requires-Dist: wandb; extra == 'all'
76
76
  Provides-Extra: cluster
77
77
  Requires-Dist: fastcluster; extra == 'cluster'
78
+ Requires-Dist: igraph; extra == 'cluster'
78
79
  Requires-Dist: leidenalg; extra == 'cluster'
79
80
  Provides-Extra: dev
80
81
  Requires-Dist: pre-commit; extra == 'dev'
@@ -103,6 +104,7 @@ Requires-Dist: networkx>=3.2; extra == 'misc'
103
104
  Requires-Dist: upsetplot; extra == 'misc'
104
105
  Provides-Extra: ml-base
105
106
  Requires-Dist: scikit-learn>=1.0.2; extra == 'ml-base'
107
+ Requires-Dist: tensorly; extra == 'ml-base'
106
108
  Requires-Dist: torch>=1.9.0; extra == 'ml-base'
107
109
  Provides-Extra: ml-extended
108
110
  Requires-Dist: captum; extra == 'ml-extended'
@@ -124,11 +126,11 @@ Provides-Extra: pysam
124
126
  Requires-Dist: pysam>=0.19.1; extra == 'pysam'
125
127
  Provides-Extra: qc
126
128
  Requires-Dist: multiqc; extra == 'qc'
127
- Provides-Extra: scanpy
128
- Requires-Dist: igraph; extra == 'scanpy'
129
- Requires-Dist: scanpy>=1.9; extra == 'scanpy'
130
129
  Provides-Extra: torch
131
130
  Requires-Dist: torch>=1.9.0; extra == 'torch'
131
+ Provides-Extra: umap
132
+ Requires-Dist: pynndescent>=0.5.10; extra == 'umap'
133
+ Requires-Dist: umap-learn>=0.5.5; extra == 'umap'
132
134
  Description-Content-Type: text/markdown
133
135
 
134
136
  [![PyPI](https://img.shields.io/pypi/v/smftools.svg)](https://pypi.org/project/smftools)
@@ -86,6 +86,7 @@ docs = [
86
86
 
87
87
  cluster = [
88
88
  "fastcluster",
89
+ "igraph",
89
90
  "leidenalg",
90
91
  ]
91
92
 
@@ -117,6 +118,7 @@ pysam = [
117
118
 
118
119
  ml-base = [
119
120
  "scikit-learn>=1.0.2",
121
+ "tensorly",
120
122
  "torch>=1.9.0",
121
123
  ]
122
124
 
@@ -133,9 +135,9 @@ qc = [
133
135
  "multiqc",
134
136
  ]
135
137
 
136
- scanpy = [
137
- "igraph",
138
- "scanpy>=1.9",
138
+ umap = [
139
+ "pynndescent>=0.5.10",
140
+ "umap-learn>=0.5.5",
139
141
  ]
140
142
 
141
143
  torch = [
@@ -173,9 +175,9 @@ all = [
173
175
  "torch>=1.9.0",
174
176
  "wandb",
175
177
 
176
- # scanpy
177
- "igraph",
178
- "scanpy>=1.9",
178
+ # umap
179
+ "pynndescent>=0.5.10",
180
+ "umap-learn>=0.5.5",
179
181
 
180
182
  # qc
181
183
  "multiqc",
@@ -229,4 +231,4 @@ line-ending = "lf"
229
231
 
230
232
  [tool.ruff.lint]
231
233
  select = ["E", "F", "I", "B", "UP", "SIM", "ISC"]
232
- ignore = ["E501", "F", "B", "UP","SIM"]
234
+ ignore = ["E501", "F", "B", "UP","SIM"]
@@ -20,7 +20,8 @@ pod5>=0.1.21
20
20
  pybedtools>=0.12.0
21
21
  pyBigWig>=0.3.24
22
22
  pysam>=0.19.1
23
- scanpy>=1.11
23
+ umap-learn>=0.5.5
24
+ pynndescent>=0.5.10
24
25
 
25
26
  # ML / modeling
26
27
  captum
@@ -29,6 +30,7 @@ lightning
29
30
  omegaconf
30
31
  scikit-learn>=1.0.2
31
32
  shap
33
+ tensorly
32
34
  torch>=1.9.0
33
35
  wandb
34
36
 
@@ -1,3 +1,3 @@
1
1
  from __future__ import annotations
2
2
 
3
- __version__ = "0.3.0"
3
+ __version__ = "0.3.1"
@@ -0,0 +1,82 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+
6
+ import anndata as ad
7
+
8
+ from smftools.constants import H5_DIR, HMM_DIR, LATENT_DIR, LOAD_DIR, PREPROCESS_DIR, SPATIAL_DIR
9
+
10
+ from ..metadata import write_runtime_schema_yaml
11
+ from ..readwrite import safe_write_h5ad
12
+
13
+
14
+ @dataclass
15
+ class AdataPaths:
16
+ raw: Path
17
+ pp: Path
18
+ pp_dedup: Path
19
+ spatial: Path
20
+ hmm: Path
21
+ latent: Path
22
+
23
+
24
+ def get_adata_paths(cfg) -> AdataPaths:
25
+ """
26
+ Central helper: given cfg, compute all standard AnnData paths.
27
+ """
28
+ output_directory = Path(cfg.output_directory)
29
+
30
+ raw = output_directory / LOAD_DIR / H5_DIR / f"{cfg.experiment_name}.h5ad.gz"
31
+
32
+ pp = output_directory / PREPROCESS_DIR / H5_DIR / f"{cfg.experiment_name}_preprocessed.h5ad.gz"
33
+
34
+ if cfg.smf_modality == "direct":
35
+ # direct SMF: duplicate-removed path is just preprocessed path
36
+ pp_dedup = pp
37
+ else:
38
+ pp_dedup = (
39
+ output_directory
40
+ / PREPROCESS_DIR
41
+ / H5_DIR
42
+ / f"{cfg.experiment_name}_preprocessed_duplicates_removed.h5ad.gz"
43
+ )
44
+
45
+ pp_dedup_base = pp_dedup.name.removesuffix(".h5ad.gz")
46
+
47
+ spatial = output_directory / SPATIAL_DIR / H5_DIR / f"{pp_dedup_base}_spatial.h5ad.gz"
48
+ hmm = output_directory / HMM_DIR / H5_DIR / f"{pp_dedup_base}_hmm.h5ad.gz"
49
+ latent = output_directory / LATENT_DIR / H5_DIR / f"{pp_dedup_base}_latent.h5ad.gz"
50
+
51
+ return AdataPaths(
52
+ raw=raw,
53
+ pp=pp,
54
+ pp_dedup=pp_dedup,
55
+ spatial=spatial,
56
+ hmm=hmm,
57
+ latent=latent,
58
+ )
59
+
60
+
61
+ def load_experiment_config(config_path: str):
62
+ """Load ExperimentConfig without invoking any pipeline stages."""
63
+ from datetime import datetime
64
+ from importlib import resources
65
+
66
+ from ..config import ExperimentConfig, LoadExperimentConfig
67
+
68
+ date_str = datetime.today().strftime("%y%m%d")
69
+ loader = LoadExperimentConfig(config_path)
70
+ defaults_dir = resources.files("smftools").joinpath("config")
71
+ cfg, _ = ExperimentConfig.from_var_dict(
72
+ loader.var_dict, date_str=date_str, defaults_dir=defaults_dir
73
+ )
74
+ return cfg
75
+
76
+
77
+ def write_gz_h5ad(adata: ad.AnnData, path: Path) -> Path:
78
+ if path.suffix != ".gz":
79
+ path = path.with_name(path.name + ".gz")
80
+ safe_write_h5ad(adata, path, compression="gzip", backup=True)
81
+ write_runtime_schema_yaml(adata, path, step_name="runtime")
82
+ return path
@@ -1,13 +1,15 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import copy
4
+ import logging
4
5
  from dataclasses import dataclass
5
6
  from pathlib import Path
6
7
  from typing import TYPE_CHECKING, Any, List, Optional, Sequence, Tuple, Union
7
8
 
8
9
  import numpy as np
9
10
 
10
- from smftools.logging_utils import get_logger
11
+ from smftools.constants import HMM_DIR, LOGGING_DIR
12
+ from smftools.logging_utils import get_logger, setup_logging
11
13
  from smftools.optional_imports import require
12
14
 
13
15
  # FIX: import _to_dense_np to avoid NameError
@@ -19,12 +21,132 @@ if TYPE_CHECKING:
19
21
  import torch as torch_types
20
22
 
21
23
  torch = require("torch", extra="torch", purpose="HMM CLI")
24
+ mpl = require("matplotlib", extra="plotting", purpose="HMM plotting")
25
+ mpl_colors = require("matplotlib.colors", extra="plotting", purpose="HMM plotting")
22
26
 
23
27
  # =============================================================================
24
28
  # Helpers: extracting training arrays
25
29
  # =============================================================================
26
30
 
27
31
 
32
+ def _strip_hmm_layer_prefix(layer: str) -> str:
33
+ """Strip methbase prefixes and length suffixes from an HMM layer name.
34
+
35
+ Args:
36
+ layer: Full layer name (e.g., "GpC_small_accessible_patch_lengths").
37
+
38
+ Returns:
39
+ The base layer name without methbase prefixes or length suffixes.
40
+ """
41
+ base = layer
42
+ for prefix in ("Combined_", "GpC_", "CpG_", "C_", "A_"):
43
+ if base.startswith(prefix):
44
+ base = base[len(prefix) :]
45
+ break
46
+ if base.endswith("_lengths"):
47
+ base = base[: -len("_lengths")]
48
+ if base.endswith("_merged"):
49
+ base = base[: -len("_merged")]
50
+ return base
51
+
52
+
53
+ def _resolve_feature_colormap(layer: str, cfg, default_cmap: str) -> Any:
54
+ """Resolve a colormap for a given HMM layer.
55
+
56
+ Args:
57
+ layer: Full layer name.
58
+ cfg: Experiment config.
59
+ default_cmap: Fallback colormap name.
60
+
61
+ Returns:
62
+ A matplotlib colormap or colormap name.
63
+ """
64
+ feature_maps = getattr(cfg, "hmm_feature_colormaps", {}) or {}
65
+ if not isinstance(feature_maps, dict):
66
+ feature_maps = {}
67
+
68
+ base = _strip_hmm_layer_prefix(layer)
69
+ value = feature_maps.get(layer, feature_maps.get(base))
70
+ if value is None:
71
+ return default_cmap
72
+
73
+ if isinstance(value, (list, tuple)):
74
+ return mpl_colors.ListedColormap(list(value))
75
+
76
+ if isinstance(value, str):
77
+ try:
78
+ mpl.colormaps.get_cmap(value)
79
+ return value
80
+ except Exception:
81
+ return mpl_colors.LinearSegmentedColormap.from_list(
82
+ f"hmm_{base}_cmap", ["#ffffff", value]
83
+ )
84
+
85
+ return default_cmap
86
+
87
+
88
+ def _resolve_feature_color(layer: str, cfg, fallback_cmap: str, idx: int, total: int) -> Any:
89
+ """Resolve a line color for a given HMM layer."""
90
+ feature_maps = getattr(cfg, "hmm_feature_colormaps", {}) or {}
91
+ if not isinstance(feature_maps, dict):
92
+ feature_maps = {}
93
+
94
+ base = _strip_hmm_layer_prefix(layer)
95
+ value = feature_maps.get(layer, feature_maps.get(base))
96
+ if isinstance(value, str):
97
+ try:
98
+ mpl.colormaps.get_cmap(value)
99
+ except Exception:
100
+ return value
101
+ return mpl.colormaps.get_cmap(value)(0.75)
102
+ if isinstance(value, (list, tuple)) and value:
103
+ return value[-1]
104
+
105
+ cmap_obj = mpl.colormaps.get_cmap(fallback_cmap)
106
+ if total <= 1:
107
+ return cmap_obj(0.5)
108
+ return cmap_obj(idx / (total - 1))
109
+
110
+
111
+ def _resolve_length_feature_ranges(
112
+ layer: str, cfg, default_cmap: str
113
+ ) -> List[Tuple[int, int, Any]]:
114
+ """Resolve length-based feature ranges to colors for size contour overlays."""
115
+ base = _strip_hmm_layer_prefix(layer)
116
+ feature_sets = getattr(cfg, "hmm_feature_sets", {}) or {}
117
+ if not isinstance(feature_sets, dict):
118
+ return []
119
+
120
+ feature_key = None
121
+ if "accessible" in base:
122
+ feature_key = "accessible"
123
+ elif "footprint" in base:
124
+ feature_key = "footprint"
125
+
126
+ if feature_key is None:
127
+ return []
128
+
129
+ features = feature_sets.get(feature_key, {}).get("features", {})
130
+ if not isinstance(features, dict):
131
+ return []
132
+
133
+ ranges: List[Tuple[int, int, Any]] = []
134
+ for feature_name, bounds in features.items():
135
+ if not isinstance(bounds, (list, tuple)) or len(bounds) != 2:
136
+ continue
137
+ min_len, max_len = bounds
138
+ if max_len is None or (isinstance(max_len, (float, int)) and np.isinf(max_len)):
139
+ max_len = int(1e9)
140
+ try:
141
+ min_len_int = int(min_len)
142
+ max_len_int = int(max_len)
143
+ except (TypeError, ValueError):
144
+ continue
145
+ color = _resolve_feature_color(feature_name, cfg, default_cmap, 0, 1)
146
+ ranges.append((min_len_int, max_len_int, color))
147
+ return ranges
148
+
149
+
28
150
  def _get_training_matrix(
29
151
  subset, cols_mask: np.ndarray, smf_modality: Optional[str], cfg
30
152
  ) -> Tuple[np.ndarray, Optional[str]]:
@@ -445,31 +567,25 @@ def hmm_adata(config_path: str):
445
567
  - Call hmm_adata_core(cfg, adata, paths)
446
568
  """
447
569
  from ..readwrite import safe_read_h5ad
448
- from .helpers import get_adata_paths
449
- from .load_adata import load_adata
450
- from .preprocess_adata import preprocess_adata
451
- from .spatial_adata import spatial_adata
570
+ from .helpers import get_adata_paths, load_experiment_config
452
571
 
453
572
  # 1) load cfg / stage paths
454
- _, _, cfg = load_adata(config_path)
455
- paths = get_adata_paths(cfg)
573
+ cfg = load_experiment_config(config_path)
456
574
 
457
- # 2) make sure upstream stages are run (they have their own skipping logic)
458
- preprocess_adata(config_path)
459
- spatial_ad, spatial_path = spatial_adata(config_path)
575
+ paths = get_adata_paths(cfg)
460
576
 
461
- # 3) choose starting AnnData
577
+ # 2) choose starting AnnData
462
578
  # Prefer:
463
579
  # - existing HMM h5ad if not forcing redo
464
580
  # - in-memory spatial_ad from wrapper call
465
581
  # - saved spatial / pp_dedup / pp / raw on disk
466
582
  if paths.hmm.exists() and not (cfg.force_redo_hmm_fit or cfg.force_redo_hmm_apply):
467
- adata, _ = safe_read_h5ad(paths.hmm)
468
- return adata, paths.hmm
583
+ logger.debug(f"Skipping hmm. HMM AnnData found: {paths.hmm}")
584
+ return None
469
585
 
470
- if spatial_ad is not None:
471
- adata = spatial_ad
472
- source_path = spatial_path
586
+ if paths.hmm.exists():
587
+ adata, _ = safe_read_h5ad(paths.hmm)
588
+ source_path = paths.hmm
473
589
  elif paths.spatial.exists():
474
590
  adata, _ = safe_read_h5ad(paths.spatial)
475
591
  source_path = paths.spatial
@@ -516,11 +632,14 @@ def hmm_adata_core(
516
632
  Does NOT decide which h5ad to start from – that is the wrapper's job.
517
633
  """
518
634
 
635
+ from datetime import datetime
636
+
519
637
  import numpy as np
520
638
 
521
639
  from ..hmm import call_hmm_peaks
522
640
  from ..metadata import record_smftools_metadata
523
641
  from ..plotting import (
642
+ combined_hmm_length_clustermap,
524
643
  combined_hmm_raw_clustermap,
525
644
  plot_hmm_layers_rolling_by_sample_ref,
526
645
  plot_hmm_size_contours,
@@ -528,18 +647,33 @@ def hmm_adata_core(
528
647
  from ..readwrite import make_dirs
529
648
  from .helpers import write_gz_h5ad
530
649
 
650
+ date_str = datetime.today().strftime("%y%m%d")
651
+ now = datetime.now()
652
+ time_str = now.strftime("%H%M%S")
653
+
654
+ log_level = getattr(logging, cfg.log_level.upper(), logging.INFO)
655
+
531
656
  smf_modality = cfg.smf_modality
532
657
  deaminase = smf_modality == "deaminase"
533
658
 
534
659
  output_directory = Path(cfg.output_directory)
535
- make_dirs([output_directory])
660
+ hmm_directory = output_directory / HMM_DIR
661
+ logging_directory = hmm_directory / LOGGING_DIR
662
+
663
+ make_dirs([output_directory, hmm_directory])
664
+
665
+ if cfg.emit_log_file:
666
+ log_file = logging_directory / f"{date_str}_{time_str}_log.log"
667
+ make_dirs([logging_directory])
668
+ else:
669
+ log_file = None
536
670
 
537
- pp_dir = output_directory / "preprocessed" / "deduplicated"
671
+ setup_logging(level=log_level, log_file=log_file, reconfigure=log_file is not None)
538
672
 
539
673
  # ---------------------------- HMM annotate stage ----------------------------
540
674
  if not (cfg.bypass_hmm_fit and cfg.bypass_hmm_apply):
541
- hmm_models_dir = pp_dir / "10_hmm_models"
542
- make_dirs([pp_dir, hmm_models_dir])
675
+ hmm_models_dir = hmm_directory / "10_hmm_models"
676
+ make_dirs([hmm_directory, hmm_models_dir])
543
677
 
544
678
  # Standard bookkeeping
545
679
  uns_key = "hmm_appended_layers"
@@ -743,6 +877,8 @@ def hmm_adata_core(
743
877
  uns_key=uns_key,
744
878
  uns_flag="hmm_annotated_combined",
745
879
  force_redo=force_apply,
880
+ mask_to_read_span=True,
881
+ mask_use_original_var_names=True,
746
882
  )
747
883
 
748
884
  for core_layer, dist in (
@@ -855,11 +991,11 @@ def hmm_adata_core(
855
991
  logger.info(f"HMM appended layers: {hmm_layers}")
856
992
 
857
993
  # ---------------------------- HMM peak calling stage ----------------------------
858
- hmm_dir = pp_dir / "11_hmm_peak_calling"
994
+ hmm_dir = hmm_directory / "11_hmm_peak_calling"
859
995
  if hmm_dir.is_dir():
860
996
  pass
861
997
  else:
862
- make_dirs([pp_dir, hmm_dir])
998
+ make_dirs([hmm_directory, hmm_dir])
863
999
 
864
1000
  call_hmm_peaks(
865
1001
  adata,
@@ -888,8 +1024,8 @@ def hmm_adata_core(
888
1024
 
889
1025
  ############################################### HMM based feature plotting ###############################################
890
1026
 
891
- hmm_dir = pp_dir / "12_hmm_clustermaps"
892
- make_dirs([pp_dir, hmm_dir])
1027
+ hmm_dir = hmm_directory / "12_hmm_clustermaps"
1028
+ make_dirs([hmm_directory, hmm_dir])
893
1029
 
894
1030
  layers: list[str] = []
895
1031
 
@@ -914,6 +1050,7 @@ def hmm_adata_core(
914
1050
  pass
915
1051
  else:
916
1052
  make_dirs([hmm_cluster_save_dir])
1053
+ hmm_cmap = _resolve_feature_colormap(layer, cfg, cfg.clustermap_cmap_hmm)
917
1054
 
918
1055
  combined_hmm_raw_clustermap(
919
1056
  adata,
@@ -924,7 +1061,7 @@ def hmm_adata_core(
924
1061
  layer_cpg=cfg.layer_for_clustermap_plotting,
925
1062
  layer_c=cfg.layer_for_clustermap_plotting,
926
1063
  layer_a=cfg.layer_for_clustermap_plotting,
927
- cmap_hmm=cfg.clustermap_cmap_hmm,
1064
+ cmap_hmm=hmm_cmap,
928
1065
  cmap_gpc=cfg.clustermap_cmap_gpc,
929
1066
  cmap_cpg=cfg.clustermap_cmap_cpg,
930
1067
  cmap_c=cfg.clustermap_cmap_c,
@@ -935,7 +1072,7 @@ def hmm_adata_core(
935
1072
  0
936
1073
  ],
937
1074
  min_position_valid_fraction=1 - cfg.position_max_nan_threshold,
938
- demux_types=("double", "already"),
1075
+ demux_types=cfg.clustermap_demux_types_to_plot,
939
1076
  save_path=hmm_cluster_save_dir,
940
1077
  normalize_hmm=False,
941
1078
  sort_by=cfg.hmm_clustermap_sortby, # options: 'gpc', 'cpg', 'gpc_cpg', 'none', or 'obs:<column>'
@@ -945,12 +1082,68 @@ def hmm_adata_core(
945
1082
  index_col_suffix=cfg.reindexed_var_suffix,
946
1083
  )
947
1084
 
948
- hmm_dir = pp_dir / "13_hmm_bulk_traces"
1085
+ hmm_length_dir = hmm_directory / "12b_hmm_length_clustermaps"
1086
+ make_dirs([hmm_directory, hmm_length_dir])
1087
+
1088
+ length_layers: list[str] = []
1089
+ length_layer_roots = list(
1090
+ getattr(cfg, "hmm_clustermap_length_layers", cfg.hmm_clustermap_feature_layers)
1091
+ )
1092
+
1093
+ for base in cfg.hmm_methbases:
1094
+ length_layers.extend([f"{base}_{layer}_lengths" for layer in length_layer_roots])
1095
+
1096
+ if getattr(cfg, "hmm_run_multichannel", True) and len(cfg.hmm_methbases) >= 2:
1097
+ length_layers.extend([f"Combined_{layer}_lengths" for layer in length_layer_roots])
1098
+
1099
+ if cfg.cpg:
1100
+ length_layers.extend(["CpG_cpg_patch_lengths"])
1101
+
1102
+ for layer in length_layers:
1103
+ hmm_cluster_save_dir = hmm_length_dir / layer
1104
+ if hmm_cluster_save_dir.is_dir():
1105
+ pass
1106
+ else:
1107
+ make_dirs([hmm_cluster_save_dir])
1108
+ length_cmap = _resolve_feature_colormap(layer, cfg, "Greens")
1109
+ length_feature_ranges = _resolve_length_feature_ranges(layer, cfg, "Greens")
1110
+
1111
+ combined_hmm_length_clustermap(
1112
+ adata,
1113
+ sample_col=cfg.sample_name_col_for_plotting,
1114
+ reference_col=cfg.reference_column,
1115
+ length_layer=layer,
1116
+ layer_gpc=cfg.layer_for_clustermap_plotting,
1117
+ layer_cpg=cfg.layer_for_clustermap_plotting,
1118
+ layer_c=cfg.layer_for_clustermap_plotting,
1119
+ layer_a=cfg.layer_for_clustermap_plotting,
1120
+ cmap_lengths=length_cmap,
1121
+ cmap_gpc=cfg.clustermap_cmap_gpc,
1122
+ cmap_cpg=cfg.clustermap_cmap_cpg,
1123
+ cmap_c=cfg.clustermap_cmap_c,
1124
+ cmap_a=cfg.clustermap_cmap_a,
1125
+ min_quality=cfg.read_quality_filter_thresholds[0],
1126
+ min_length=cfg.read_len_filter_thresholds[0],
1127
+ min_mapped_length_to_reference_length_ratio=cfg.read_len_to_ref_ratio_filter_thresholds[
1128
+ 0
1129
+ ],
1130
+ min_position_valid_fraction=1 - cfg.position_max_nan_threshold,
1131
+ demux_types=cfg.clustermap_demux_types_to_plot,
1132
+ save_path=hmm_cluster_save_dir,
1133
+ sort_by=cfg.hmm_clustermap_sortby,
1134
+ bins=None,
1135
+ deaminase=deaminase,
1136
+ min_signal=0,
1137
+ index_col_suffix=cfg.reindexed_var_suffix,
1138
+ length_feature_ranges=length_feature_ranges,
1139
+ )
1140
+
1141
+ hmm_dir = hmm_directory / "13_hmm_bulk_traces"
949
1142
 
950
1143
  if hmm_dir.is_dir():
951
1144
  logger.debug(f"{hmm_dir} already exists.")
952
1145
  else:
953
- make_dirs([pp_dir, hmm_dir])
1146
+ make_dirs([hmm_directory, hmm_dir])
954
1147
  from ..plotting import plot_hmm_layers_rolling_by_sample_ref
955
1148
 
956
1149
  bulk_hmm_layers = [
@@ -958,6 +1151,10 @@ def hmm_adata_core(
958
1151
  for layer in hmm_layers
959
1152
  if not any(s in layer for s in ("_lengths", "_states", "_posterior"))
960
1153
  ]
1154
+ layer_colors = {
1155
+ layer: _resolve_feature_color(layer, cfg, "tab20", idx, len(bulk_hmm_layers))
1156
+ for idx, layer in enumerate(bulk_hmm_layers)
1157
+ }
961
1158
  saved = plot_hmm_layers_rolling_by_sample_ref(
962
1159
  adata,
963
1160
  layers=bulk_hmm_layers,
@@ -969,14 +1166,15 @@ def hmm_adata_core(
969
1166
  output_dir=hmm_dir,
970
1167
  save=True,
971
1168
  show_raw=False,
1169
+ layer_colors=layer_colors,
972
1170
  )
973
1171
 
974
- hmm_dir = pp_dir / "14_hmm_fragment_distributions"
1172
+ hmm_dir = hmm_directory / "14_hmm_fragment_distributions"
975
1173
 
976
1174
  if hmm_dir.is_dir():
977
1175
  logger.debug(f"{hmm_dir} already exists.")
978
1176
  else:
979
- make_dirs([pp_dir, hmm_dir])
1177
+ make_dirs([hmm_directory, hmm_dir])
980
1178
  from ..plotting import plot_hmm_size_contours
981
1179
 
982
1180
  if smf_modality == "deaminase":
@@ -1001,6 +1199,8 @@ def hmm_adata_core(
1001
1199
  for layer, max in fragments:
1002
1200
  save_path = hmm_dir / layer
1003
1201
  make_dirs([save_path])
1202
+ layer_cmap = _resolve_feature_colormap(layer, cfg, "Greens")
1203
+ feature_ranges = _resolve_length_feature_ranges(layer, cfg, "Greens")
1004
1204
 
1005
1205
  figs = plot_hmm_size_contours(
1006
1206
  adata,
@@ -1016,8 +1216,9 @@ def hmm_adata_core(
1016
1216
  dpi=200,
1017
1217
  smoothing_sigma=(10, 10),
1018
1218
  normalize_after_smoothing=True,
1019
- cmap="Greens",
1219
+ cmap=layer_cmap,
1020
1220
  log_scale_z=True,
1221
+ feature_ranges=tuple(feature_ranges),
1021
1222
  )
1022
1223
  ########################################################################################################################
1023
1224