smftools 0.1.6__tar.gz → 0.1.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. {smftools-0.1.6 → smftools-0.1.7}/.gitignore +1 -0
  2. {smftools-0.1.6 → smftools-0.1.7}/PKG-INFO +5 -2
  3. {smftools-0.1.6 → smftools-0.1.7}/README.md +4 -1
  4. {smftools-0.1.6 → smftools-0.1.7}/docs/source/installation.md +4 -4
  5. {smftools-0.1.6 → smftools-0.1.7}/pyproject.toml +1 -1
  6. smftools-0.1.7/smftools/_version.py +1 -0
  7. smftools-0.1.7/smftools/tools/data/__init__.py +2 -0
  8. smftools-0.1.7/smftools/tools/data/anndata_data_module.py +90 -0
  9. smftools-0.1.7/smftools/tools/inference/__init__.py +1 -0
  10. smftools-0.1.7/smftools/tools/inference/lightning_inference.py +41 -0
  11. {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/models/__init__.py +3 -1
  12. smftools-0.1.7/smftools/tools/models/lightning_base.py +41 -0
  13. smftools-0.1.7/smftools/tools/models/sklearn_models.py +40 -0
  14. smftools-0.1.7/smftools/tools/training/__init__.py +1 -0
  15. smftools-0.1.7/smftools/tools/training/train_lightning_model.py +47 -0
  16. smftools-0.1.6/smftools/_version.py +0 -1
  17. smftools-0.1.6/smftools/tools/data/__init__.py +0 -1
  18. smftools-0.1.6/smftools/tools/inference/__init__.py +0 -0
  19. smftools-0.1.6/smftools/tools/models/sklearn_models.py +0 -0
  20. smftools-0.1.6/smftools/tools/training/__init__.py +0 -0
  21. {smftools-0.1.6 → smftools-0.1.7}/.gitattributes +0 -0
  22. {smftools-0.1.6 → smftools-0.1.7}/.readthedocs.yaml +0 -0
  23. {smftools-0.1.6 → smftools-0.1.7}/CONTRIBUTING.md +0 -0
  24. {smftools-0.1.6 → smftools-0.1.7}/LICENSE +0 -0
  25. {smftools-0.1.6 → smftools-0.1.7}/docs/Makefile +0 -0
  26. {smftools-0.1.6 → smftools-0.1.7}/docs/make.bat +0 -0
  27. {smftools-0.1.6 → smftools-0.1.7}/docs/source/_static/converted_BAM_to_adata.png +0 -0
  28. {smftools-0.1.6 → smftools-0.1.7}/docs/source/_static/modkit_extract_to_adata.png +0 -0
  29. {smftools-0.1.6 → smftools-0.1.7}/docs/source/_static/smftools-1.svg +0 -0
  30. {smftools-0.1.6 → smftools-0.1.7}/docs/source/_static/smftools-1.tif +0 -0
  31. {smftools-0.1.6 → smftools-0.1.7}/docs/source/_static/smftools_informatics_diagram.pdf +0 -0
  32. {smftools-0.1.6 → smftools-0.1.7}/docs/source/_static/smftools_informatics_diagram.png +0 -0
  33. {smftools-0.1.6 → smftools-0.1.7}/docs/source/_static/smftools_preprocessing_diagram.png +0 -0
  34. {smftools-0.1.6 → smftools-0.1.7}/docs/source/_templates/tmp +0 -0
  35. {smftools-0.1.6 → smftools-0.1.7}/docs/source/api/datasets.md +0 -0
  36. {smftools-0.1.6 → smftools-0.1.7}/docs/source/api/index.md +0 -0
  37. {smftools-0.1.6 → smftools-0.1.7}/docs/source/api/informatics.md +0 -0
  38. {smftools-0.1.6 → smftools-0.1.7}/docs/source/api/preprocessing.md +0 -0
  39. {smftools-0.1.6 → smftools-0.1.7}/docs/source/api/tools.md +0 -0
  40. {smftools-0.1.6 → smftools-0.1.7}/docs/source/basic_usage.md +0 -0
  41. {smftools-0.1.6 → smftools-0.1.7}/docs/source/conf.py +0 -0
  42. {smftools-0.1.6 → smftools-0.1.7}/docs/source/contributors.md +0 -0
  43. {smftools-0.1.6 → smftools-0.1.7}/docs/source/dev/index.md +0 -0
  44. {smftools-0.1.6 → smftools-0.1.7}/docs/source/index.md +0 -0
  45. {smftools-0.1.6 → smftools-0.1.7}/docs/source/references.bib +0 -0
  46. {smftools-0.1.6 → smftools-0.1.7}/docs/source/references.rst +0 -0
  47. {smftools-0.1.6 → smftools-0.1.7}/docs/source/release-notes/0.1.0.md +0 -0
  48. {smftools-0.1.6 → smftools-0.1.7}/docs/source/release-notes/index.md +0 -0
  49. {smftools-0.1.6 → smftools-0.1.7}/docs/source/requirements.txt +0 -0
  50. {smftools-0.1.6 → smftools-0.1.7}/docs/source/tutorials/index.md +0 -0
  51. {smftools-0.1.6 → smftools-0.1.7}/experiment_config.csv +0 -0
  52. {smftools-0.1.6 → smftools-0.1.7}/notebooks/Kissiov_and_McKenna_2025_example_notebook.ipynb +0 -0
  53. {smftools-0.1.6 → smftools-0.1.7}/notebooks/Kissiov_and_McKenna_2025_sample_sheet.csv +0 -0
  54. {smftools-0.1.6 → smftools-0.1.7}/requirements.txt +0 -0
  55. {smftools-0.1.6 → smftools-0.1.7}/sample_sheet.csv +0 -0
  56. {smftools-0.1.6 → smftools-0.1.7}/smftools/__init__.py +0 -0
  57. {smftools-0.1.6 → smftools-0.1.7}/smftools/_settings.py +0 -0
  58. {smftools-0.1.6 → smftools-0.1.7}/smftools/datasets/F1_hybrid_NKG2A_enhander_promoter_GpC_conversion_SMF.h5ad.gz +0 -0
  59. {smftools-0.1.6 → smftools-0.1.7}/smftools/datasets/F1_sample_sheet.csv +0 -0
  60. {smftools-0.1.6 → smftools-0.1.7}/smftools/datasets/__init__.py +0 -0
  61. {smftools-0.1.6 → smftools-0.1.7}/smftools/datasets/dCas9_m6A_invitro_kinetics.h5ad.gz +0 -0
  62. {smftools-0.1.6 → smftools-0.1.7}/smftools/datasets/datasets.py +0 -0
  63. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/__init__.py +0 -0
  64. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/archived/bam_conversion.py +0 -0
  65. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/archived/bam_direct.py +0 -0
  66. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/archived/basecalls_to_adata.py +0 -0
  67. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/archived/print_bam_query_seq.py +0 -0
  68. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/basecall_pod5s.py +0 -0
  69. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/conversion_smf.py +0 -0
  70. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/direct_smf.py +0 -0
  71. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/fast5_to_pod5.py +0 -0
  72. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/LoadExperimentConfig.py +0 -0
  73. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/__init__.py +0 -0
  74. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/align_and_sort_BAM.py +0 -0
  75. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/aligned_BAM_to_bed.py +0 -0
  76. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/archived/informatics.py +0 -0
  77. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/archived/load_adata.py +0 -0
  78. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/bam_qc.py +0 -0
  79. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/bed_to_bigwig.py +0 -0
  80. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/binarize_converted_base_identities.py +0 -0
  81. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/canoncall.py +0 -0
  82. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/complement_base_list.py +0 -0
  83. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/concatenate_fastqs_to_bam.py +0 -0
  84. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/converted_BAM_to_adata.py +0 -0
  85. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/converted_BAM_to_adata_II.py +0 -0
  86. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/count_aligned_reads.py +0 -0
  87. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/demux_and_index_BAM.py +0 -0
  88. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/extract_base_identities.py +0 -0
  89. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/extract_mods.py +0 -0
  90. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/extract_read_features_from_bam.py +0 -0
  91. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/extract_read_lengths_from_bed.py +0 -0
  92. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/extract_readnames_from_BAM.py +0 -0
  93. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/find_conversion_sites.py +0 -0
  94. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/generate_converted_FASTA.py +0 -0
  95. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/get_chromosome_lengths.py +0 -0
  96. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/get_native_references.py +0 -0
  97. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/index_fasta.py +0 -0
  98. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/make_dirs.py +0 -0
  99. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/make_modbed.py +0 -0
  100. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/modQC.py +0 -0
  101. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/modcall.py +0 -0
  102. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/modkit_extract_to_adata.py +0 -0
  103. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/ohe_batching.py +0 -0
  104. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/ohe_layers_decode.py +0 -0
  105. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/one_hot_decode.py +0 -0
  106. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/one_hot_encode.py +0 -0
  107. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/plot_read_length_and_coverage_histograms.py +0 -0
  108. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/run_multiqc.py +0 -0
  109. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/separate_bam_by_bc.py +0 -0
  110. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/split_and_index_BAM.py +0 -0
  111. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/load_adata.py +0 -0
  112. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/readwrite.py +0 -0
  113. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/subsample_fasta_from_bed.py +0 -0
  114. {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/subsample_pod5.py +0 -0
  115. {smftools-0.1.6 → smftools-0.1.7}/smftools/plotting/__init__.py +0 -0
  116. {smftools-0.1.6 → smftools-0.1.7}/smftools/plotting/classifiers.py +0 -0
  117. {smftools-0.1.6 → smftools-0.1.7}/smftools/plotting/general_plotting.py +0 -0
  118. {smftools-0.1.6 → smftools-0.1.7}/smftools/plotting/position_stats.py +0 -0
  119. {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/__init__.py +0 -0
  120. {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/append_C_context.py +0 -0
  121. {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/archives/mark_duplicates.py +0 -0
  122. {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/archives/preprocessing.py +0 -0
  123. {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/archives/remove_duplicates.py +0 -0
  124. {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/binarize_on_Youden.py +0 -0
  125. {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/binary_layers_to_ohe.py +0 -0
  126. {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/calculate_complexity.py +0 -0
  127. {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/calculate_consensus.py +0 -0
  128. {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/calculate_converted_read_methylation_stats.py +0 -0
  129. {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/calculate_coverage.py +0 -0
  130. {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/calculate_pairwise_differences.py +0 -0
  131. {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/calculate_pairwise_hamming_distances.py +0 -0
  132. {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/calculate_position_Youden.py +0 -0
  133. {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/calculate_read_length_stats.py +0 -0
  134. {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/clean_NaN.py +0 -0
  135. {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/filter_adata_by_nan_proportion.py +0 -0
  136. {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/filter_converted_reads_on_methylation.py +0 -0
  137. {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/filter_reads_on_length.py +0 -0
  138. {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/flag_duplicate_reads.py +0 -0
  139. {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/invert_adata.py +0 -0
  140. {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/load_sample_sheet.py +0 -0
  141. {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/make_dirs.py +0 -0
  142. {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/min_non_diagonal.py +0 -0
  143. {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/recipes.py +0 -0
  144. {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/subsample_adata.py +0 -0
  145. {smftools-0.1.6 → smftools-0.1.7}/smftools/readwrite.py +0 -0
  146. {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/__init__.py +0 -0
  147. {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/apply_hmm.py +0 -0
  148. {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/apply_hmm_batched.py +0 -0
  149. {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/archived/classify_methylated_features.py +0 -0
  150. {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/archived/classify_non_methylated_features.py +0 -0
  151. {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/archived/subset_adata_v1.py +0 -0
  152. {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/archived/subset_adata_v2.py +0 -0
  153. {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/calculate_distances.py +0 -0
  154. {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/calculate_umap.py +0 -0
  155. {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/call_hmm_peaks.py +0 -0
  156. {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/classifiers.py +0 -0
  157. {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/cluster_adata_on_methylation.py +0 -0
  158. {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/data/preprocessing.py +0 -0
  159. {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/display_hmm.py +0 -0
  160. {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/evaluation/__init__.py +0 -0
  161. {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/general_tools.py +0 -0
  162. {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/hmm_readwrite.py +0 -0
  163. {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/models/base.py +0 -0
  164. {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/models/cnn.py +0 -0
  165. {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/models/mlp.py +0 -0
  166. {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/models/positional.py +0 -0
  167. {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/models/rnn.py +0 -0
  168. {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/models/transformer.py +0 -0
  169. {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/models/wrappers.py +0 -0
  170. {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/nucleosome_hmm_refinement.py +0 -0
  171. {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/position_stats.py +0 -0
  172. {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/read_stats.py +0 -0
  173. {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/subset_adata.py +0 -0
  174. {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/train_hmm.py +0 -0
  175. {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/utils/__init__.py +0 -0
  176. {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/utils/device.py +0 -0
  177. {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/utils/grl.py +0 -0
  178. {smftools-0.1.6 → smftools-0.1.7}/tests/datasets/test_datasets.py +0 -0
  179. {smftools-0.1.6 → smftools-0.1.7}/tests/informatics/helpers/test_LoadExperimentConfig.py +0 -0
  180. {smftools-0.1.6 → smftools-0.1.7}/tests/test_readwrite.py +0 -0
@@ -16,6 +16,7 @@ build/
16
16
  /env-*/
17
17
  /venv-*/
18
18
  venv/
19
+ venvs/
19
20
  /environment.yml
20
21
 
21
22
  # Tests
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: smftools
3
- Version: 0.1.6
3
+ Version: 0.1.7
4
4
  Summary: Single Molecule Footprinting Analysis in Python.
5
5
  Project-URL: Source, https://github.com/jkmckenna/smftools
6
6
  Project-URL: Documentation, https://smftools.readthedocs.io/
@@ -118,7 +118,10 @@ The following CLI tools need to be installed and configured before using the inf
118
118
 
119
119
  ## Announcements
120
120
 
121
- ### 10/01/24 - More recent versions are being updated through github and are not currently on pypi, please install from source. Thank you!
121
+ ### 05/29/25 - Version 0.1.6 is available through PyPI.
122
+ Informatics, preprocessing, tools, plotting modules have core functionality that is approaching stability on MacOS(Intel/Silicon) and Linux(Ubuntu). I will work on improving documentation/tutorials shortly. The base PyTorch/Scikit-Learn ML-infrastructure is going through some organizational changes to work with PyTorch Lightning, Hydra, and WanDB to facilitate organizational scaling, multi-device usage, and logging.
123
+
124
+ ### 10/01/24 - More recent versions are being updated frequently. Installation from source over PyPI is recommended!
122
125
 
123
126
  ### 09/09/24 - The version 0.1.1 package ([smftools-0.1.1](https://pypi.org/project/smftools/)) is installable through pypi!
124
127
  The informatics module has been bumped to alpha-phase status. This module can deal with POD5s and unaligned BAMS from nanopore conversion and direct SMF experiments, as well as FASTQs from Illumina conversion SMF experiments. Primary output from this module is an AnnData object containing all relevant SMF data, which is compatible with all downstream smftools modules. The other modules are still in pre-alpha phase. Preprocessing, Tools, and Plotting modules should be promoted to alpha-phase within the next month or so.
@@ -29,7 +29,10 @@ The following CLI tools need to be installed and configured before using the inf
29
29
 
30
30
  ## Announcements
31
31
 
32
- ### 10/01/24 - More recent versions are being updated through github and are not currently on pypi, please install from source. Thank you!
32
+ ### 05/29/25 - Version 0.1.6 is available through PyPI.
33
+ Informatics, preprocessing, tools, plotting modules have core functionality that is approaching stability on MacOS(Intel/Silicon) and Linux(Ubuntu). I will work on improving documentation/tutorials shortly. The base PyTorch/Scikit-Learn ML-infrastructure is going through some organizational changes to work with PyTorch Lightning, Hydra, and WanDB to facilitate organizational scaling, multi-device usage, and logging.
34
+
35
+ ### 10/01/24 - More recent versions are being updated frequently. Installation from source over PyPI is recommended!
33
36
 
34
37
  ### 09/09/24 - The version 0.1.1 package ([smftools-0.1.1](https://pypi.org/project/smftools/)) is installable through pypi!
35
38
  The informatics module has been bumped to alpha-phase status. This module can deal with POD5s and unaligned BAMS from nanopore conversion and direct SMF experiments, as well as FASTQs from Illumina conversion SMF experiments. Primary output from this module is an AnnData object containing all relevant SMF data, which is compatible with all downstream smftools modules. The other modules are still in pre-alpha phase. Preprocessing, Tools, and Plotting modules should be promoted to alpha-phase within the next month or so.
@@ -1,8 +1,8 @@
1
1
  # Installation
2
2
 
3
- ## PyPi version
3
+ ## PyPi version - Easiest starting point
4
4
 
5
- Pull smftools from [PyPI](https://pypi.org/project/smftools):
5
+ Install smftools from [PyPI](https://pypi.org/project/smftools):
6
6
 
7
7
  ```shell
8
8
  pip install smftools
@@ -33,7 +33,7 @@ chmod +x bedGraphToBigWig
33
33
  sudo mv bedGraphToBigWig /usr/local/bin/
34
34
  ```
35
35
 
36
- ## Development Version
36
+ ## Development Version - recommended to use this method for most up to date versions
37
37
 
38
38
  Clone smftools from source and change into the smftools directory:
39
39
 
@@ -42,7 +42,7 @@ git clone https://github.com/jkmckenna/smftools.git
42
42
  cd smftools
43
43
  ```
44
44
 
45
- A virtual environment can be created for the current version within the smftools directory:
45
+ A python virtual environment can be created as an alternative to conda. I like to do venv-smftools-X.X.X to keep a seperate venv for each version:
46
46
 
47
47
  ```shell
48
48
  python -m venv venv-smftools
@@ -97,7 +97,7 @@ docs = [
97
97
  sources = ["src"]
98
98
 
99
99
  [tool.hatch.build.targets.wheel]
100
- packages = ["smftools"]
100
+ include = ["smftools"]
101
101
 
102
102
  [tool.hatch.version]
103
103
  path = "src/smftools/_version.py"
@@ -0,0 +1 @@
1
+ __version__ = "0.1.7"
@@ -0,0 +1,2 @@
1
+ from .anndata_data_module import AnnDataModule
2
+ from .preprocessing import random_fill_nans
@@ -0,0 +1,90 @@
1
+ import torch
2
+ from torch.utils.data import DataLoader, TensorDataset, random_split
3
+ import pytorch_lightning as pl
4
+ import numpy as np
5
+ import pandas as pd
6
+
7
+ class AnnDataModule(pl.LightningDataModule):
8
+ def __init__(self, adata, tensor_source="X", tensor_key=None, label_col="labels",
9
+ batch_size=64, train_frac=0.7, random_seed=42, split_col='train_val_split', split_save_path=None, load_existing_split=False,
10
+ inference_mode=False):
11
+ super().__init__()
12
+ self.adata = adata # The adata object
13
+ self.tensor_source = tensor_source # X, layers, obsm
14
+ self.tensor_key = tensor_key # name of the layer or obsm key
15
+ self.label_col = label_col # name of the label column in obs
16
+ self.batch_size = batch_size
17
+ self.train_frac = train_frac
18
+ self.random_seed = random_seed
19
+ self.split_col = split_col # Name of obs column to store "train"/"val"
20
+ self.split_save_path = split_save_path # Where to save the obs_names and train/test split logging
21
+ self.load_existing_split = load_existing_split # Whether to load from an existing split
22
+ self.inference_mode = inference_mode # Whether to load the AnnDataModule in inference mode.
23
+
24
+ def setup(self, stage=None):
25
+ # Load feature matrix
26
+ if self.tensor_source == "X":
27
+ X = self.adata.X
28
+ elif self.tensor_source == "layers":
29
+ assert self.tensor_key in self.adata.layers, f"Layer '{self.tensor_key}' not found."
30
+ X = self.adata.layers[self.tensor_key]
31
+ elif self.tensor_source == "obsm":
32
+ assert self.tensor_key in self.adata.obsm, f"obsm key '{self.tensor_key}' not found."
33
+ X = self.adata.obsm[self.tensor_key]
34
+ else:
35
+ raise ValueError(f"Invalid tensor_source: {self.tensor_source}")
36
+
37
+ # Convert to tensor
38
+ X_tensor = torch.tensor(X, dtype=torch.float32)
39
+
40
+ if self.inference_mode:
41
+ self.infer_dataset = TensorDataset(X_tensor)
42
+
43
+ else:
44
+ # Load and encode labels
45
+ y = self.adata.obs[self.label_col]
46
+ if y.dtype.name == 'category':
47
+ y = y.cat.codes
48
+ y_tensor = torch.tensor(y.values, dtype=torch.long)
49
+
50
+ # Use existing split
51
+ if self.load_existing_split:
52
+ split_df = pd.read_csv(self.split_save_path, index_col=0)
53
+ assert self.split_col in split_df.columns, f"'{self.split_col}' column missing in split file."
54
+ self.adata.obs[self.split_col] = split_df.loc[self.adata.obs_names][self.split_col].values
55
+
56
+ # If no split exists, create one
57
+ if self.split_col not in self.adata.obs:
58
+ full_dataset = TensorDataset(X_tensor, y_tensor)
59
+ n_train = int(self.train_frac * len(full_dataset))
60
+ n_val = len(full_dataset) - n_train
61
+ self.train_set, self.val_set = random_split(
62
+ full_dataset, [n_train, n_val],
63
+ generator=torch.Generator().manual_seed(self.random_seed)
64
+ )
65
+ # Assign split labels
66
+ split_array = np.full(len(self.adata), "val", dtype=object)
67
+ train_idx = self.train_set.indices if hasattr(self.train_set, "indices") else self.train_set._indices
68
+ split_array[train_idx] = "train"
69
+ self.adata.obs[self.split_col] = split_array
70
+
71
+ # Save to disk
72
+ if self.split_save_path:
73
+ self.adata.obs[[self.split_col]].to_csv(self.split_save_path)
74
+ else:
75
+ split_labels = self.adata.obs[self.split_col].values
76
+ train_mask = split_labels == "train"
77
+ val_mask = split_labels == "val"
78
+ self.train_set = TensorDataset(X_tensor[train_mask], y_tensor[train_mask])
79
+ self.val_set = TensorDataset(X_tensor[val_mask], y_tensor[val_mask])
80
+
81
+ def train_dataloader(self):
82
+ return DataLoader(self.train_set, batch_size=self.batch_size, shuffle=True)
83
+
84
+ def val_dataloader(self):
85
+ return DataLoader(self.val_set, batch_size=self.batch_size)
86
+
87
+ def predict_dataloader(self):
88
+ if not self.inference_mode:
89
+ raise RuntimeError("predict_dataloader only available in inference mode.")
90
+ return DataLoader(self.infer_dataset, batch_size=self.batch_size)
@@ -0,0 +1 @@
1
+ from .lightning_inference import run_lightning_inference
@@ -0,0 +1,41 @@
1
+ import torch
2
+ import pandas as pd
3
+ import numpy as np
4
+ from pytorch_lightning import Trainer
5
+
6
+ def run_lightning_inference(
7
+ adata,
8
+ model,
9
+ datamodule,
10
+ label_col="labels",
11
+ prefix="model"
12
+ ):
13
+
14
+ # Get class labels
15
+ if label_col in adata.obs and pd.api.types.is_categorical_dtype(adata.obs[label_col]):
16
+ class_labels = adata.obs[label_col].cat.categories.tolist()
17
+ else:
18
+ raise ValueError("label_col must be a categorical column in adata.obs")
19
+
20
+ # Run predictions
21
+ trainer = Trainer(accelerator="auto", devices=1, logger=False, enable_checkpointing=False)
22
+ preds = trainer.predict(model, datamodule=datamodule)
23
+ probs = torch.cat(preds, dim=0).cpu().numpy() # (N, C)
24
+ pred_class_idx = probs.argmax(axis=1)
25
+ pred_class_labels = [class_labels[i] for i in pred_class_idx]
26
+ pred_class_probs = probs[np.arange(len(probs)), pred_class_idx]
27
+
28
+ # Construct full prefix with label_col
29
+ full_prefix = f"{prefix}_{label_col}"
30
+
31
+ # Store predictions in obs
32
+ adata.obs[f"{full_prefix}_pred"] = pred_class_idx
33
+ adata.obs[f"{full_prefix}_pred_label"] = pd.Categorical(pred_class_labels, categories=class_labels)
34
+ adata.obs[f"{full_prefix}_pred_prob"] = pred_class_probs
35
+
36
+ # Per-class probabilities
37
+ for i, class_name in enumerate(class_labels):
38
+ adata.obs[f"{full_prefix}_prob_{class_name}"] = probs[:, i]
39
+
40
+ # Full probability matrix in obsm
41
+ adata.obsm[f"{full_prefix}_pred_prob_all"] = probs
@@ -4,4 +4,6 @@ from .cnn import CNNClassifier
4
4
  from .rnn import RNNClassifier
5
5
  from .transformer import BaseTransformer, TransformerClassifier, DANNTransformerClassifier, MaskedTransformerPretrainer
6
6
  from .positional import PositionalEncoding
7
- from .wrappers import ScaledModel
7
+ from .wrappers import ScaledModel
8
+ from .lightning_base import TorchClassifierWrapper
9
+ from .sklearn_models import SklearnModelWrapper
@@ -0,0 +1,41 @@
1
+ import torch
2
+ import pytorch_lightning as pl
3
+
4
+ class TorchClassifierWrapper(pl.LightningModule):
5
+ def __init__(
6
+ self,
7
+ model: torch.nn.Module,
8
+ optimizer_cls=torch.optim.AdamW,
9
+ optimizer_kwargs=None,
10
+ criterion_cls=torch.nn.CrossEntropyLoss,
11
+ criterion_kwargs=None,
12
+ lr: float = 1e-3,
13
+ ):
14
+ super().__init__()
15
+ self.model = model
16
+ self.save_hyperparameters(ignore=['model']) # logs all except actual model instance
17
+ self.optimizer_cls = optimizer_cls
18
+ self.optimizer_kwargs = optimizer_kwargs or {}
19
+ self.criterion = criterion_cls(**(criterion_kwargs or {}))
20
+ self.lr = lr
21
+
22
+ def forward(self, x):
23
+ return self.model(x)
24
+
25
+ def training_step(self, batch, batch_idx):
26
+ x, y = batch
27
+ logits = self(x)
28
+ loss = self.criterion(logits, y)
29
+ self.log("train_loss", loss, prog_bar=True)
30
+ return loss
31
+
32
+ def validation_step(self, batch, batch_idx):
33
+ x, y = batch
34
+ logits = self(x)
35
+ loss = self.criterion(logits, y)
36
+ acc = (logits.argmax(dim=1) == y).float().mean()
37
+ self.log_dict({"val_loss": loss, "val_acc": acc}, prog_bar=True)
38
+ return loss
39
+
40
+ def configure_optimizers(self):
41
+ return self.optimizer_cls(self.parameters(), lr=self.lr, **self.optimizer_kwargs)
@@ -0,0 +1,40 @@
1
+ from sklearn.ensemble import RandomForestClassifier
2
+ from sklearn.naive_bayes import GaussianNB
3
+ from sklearn.metrics import (
4
+ roc_curve, precision_recall_curve, auc, f1_score, confusion_matrix
5
+ )
6
+ from sklearn.utils.class_weight import compute_class_weight
7
+
8
+ import numpy as np
9
+
10
+ class SklearnModelWrapper:
11
+ def __init__(self, model):
12
+ self.model = model
13
+
14
+ def fit(self, X_train, y_train):
15
+ self.model.fit(X_train, y_train)
16
+
17
+ def predict(self, X):
18
+ return self.model.predict(X)
19
+
20
+ def predict_proba(self, X):
21
+ return self.model.predict_proba(X)
22
+
23
+ def evaluate(self, X_test, y_test):
24
+ probs = self.predict_proba(X_test)[:, 1]
25
+ preds = self.predict(X_test)
26
+
27
+ fpr, tpr, _ = roc_curve(y_test, probs)
28
+ precision, recall, _ = precision_recall_curve(y_test, probs)
29
+ f1 = f1_score(y_test, preds)
30
+ auc_score = auc(fpr, tpr)
31
+ pr_auc = auc(recall, precision)
32
+ cm = confusion_matrix(y_test, preds)
33
+ pos_freq = np.mean(y_test == 1)
34
+ pr_auc_norm = pr_auc / pos_freq
35
+
36
+ return {
37
+ "fpr": fpr, "tpr": tpr, "precision": precision, "recall": recall,
38
+ "f1": f1, "auc": auc_score, "pr_auc": pr_auc,
39
+ "pr_auc_norm": pr_auc_norm, "confusion_matrix": cm
40
+ }
@@ -0,0 +1 @@
1
+ from .train_lightning_model import train_lightning_model
@@ -0,0 +1,47 @@
1
+ import torch
2
+ from pytorch_lightning import Trainer
3
+ from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
4
+
5
+ def train_lightning_model(
6
+ model,
7
+ datamodule,
8
+ max_epochs=20,
9
+ patience=5,
10
+ monitor_metric="val_loss",
11
+ checkpoint_path=None,
12
+ ):
13
+ # Device logic
14
+ if torch.cuda.is_available():
15
+ accelerator = "gpu"
16
+ devices = 1
17
+ elif torch.backends.mps.is_available():
18
+ accelerator = "mps"
19
+ devices = 1
20
+ else:
21
+ accelerator = "cpu"
22
+ devices = 1
23
+
24
+ # Callbacks
25
+ callbacks = [
26
+ EarlyStopping(monitor=monitor_metric, patience=patience, mode="min"),
27
+ ]
28
+ if checkpoint_path:
29
+ callbacks.append(ModelCheckpoint(
30
+ dirpath=checkpoint_path,
31
+ filename="{epoch}-{val_loss:.4f}",
32
+ monitor=monitor_metric,
33
+ save_top_k=1,
34
+ mode="min",
35
+ ))
36
+
37
+ # Trainer setup
38
+ trainer = Trainer(
39
+ max_epochs=max_epochs,
40
+ callbacks=callbacks,
41
+ accelerator=accelerator,
42
+ devices=devices,
43
+ log_every_n_steps=10,
44
+ )
45
+ trainer.fit(model, datamodule=datamodule)
46
+
47
+ return trainer
@@ -1 +0,0 @@
1
- __version__ = "0.1.6"
@@ -1 +0,0 @@
1
- from .preprocessing import random_fill_nans
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes