smftools 0.1.0__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. {smftools-0.1.0 → smftools-0.1.3}/.gitignore +15 -10
  2. smftools-0.1.3/.readthedocs.yaml +17 -0
  3. smftools-0.1.3/CONTRIBUTING.md +3 -0
  4. smftools-0.1.3/PKG-INFO +94 -0
  5. smftools-0.1.3/README.md +32 -0
  6. smftools-0.1.3/docs/Makefile +20 -0
  7. smftools-0.1.3/docs/make.bat +35 -0
  8. smftools-0.1.3/docs/source/_static/converted_BAM_to_adata.png +0 -0
  9. smftools-0.1.3/docs/source/_static/modkit_extract_to_adata.png +0 -0
  10. smftools-0.1.3/docs/source/_static/smftools_informatics_diagram.pdf +0 -0
  11. smftools-0.1.3/docs/source/_static/smftools_informatics_diagram.png +0 -0
  12. smftools-0.1.3/docs/source/_static/smftools_preprocessing_diagram.png +0 -0
  13. smftools-0.1.3/docs/source/_templates/tmp +1 -0
  14. smftools-0.1.3/docs/source/api/datasets.md +9 -0
  15. smftools-0.1.3/docs/source/api/index.md +26 -0
  16. smftools-0.1.3/docs/source/api/informatics.md +27 -0
  17. smftools-0.1.3/docs/source/api/preprocessing.md +14 -0
  18. smftools-0.1.3/docs/source/api/tools.md +9 -0
  19. smftools-0.1.3/docs/source/basic_usage.md +75 -0
  20. smftools-0.1.3/docs/source/conf.py +117 -0
  21. smftools-0.1.3/docs/source/contributors.md +9 -0
  22. smftools-0.1.3/docs/source/dev/index.md +3 -0
  23. smftools-0.1.3/docs/source/index.md +54 -0
  24. smftools-0.1.3/docs/source/installation.md +60 -0
  25. smftools-0.1.3/docs/source/references.bib +406 -0
  26. smftools-0.1.3/docs/source/references.rst +4 -0
  27. smftools-0.1.3/docs/source/release-notes/0.1.0.md +4 -0
  28. smftools-0.1.3/docs/source/release-notes/index.md +8 -0
  29. smftools-0.1.3/docs/source/requirements.txt +14 -0
  30. smftools-0.1.3/docs/source/tutorials/index.md +3 -0
  31. smftools-0.1.3/experiment_config.csv +17 -0
  32. smftools-0.1.3/notebooks/Kissiov_and_McKenna_2025_example_notebook.ipynb +85 -0
  33. smftools-0.1.3/notebooks/Kissiov_and_McKenna_2025_sample_sheet.csv +11 -0
  34. {smftools-0.1.0 → smftools-0.1.3}/pyproject.toml +15 -39
  35. {smftools-0.1.0 → smftools-0.1.3}/requirements.txt +4 -1
  36. smftools-0.1.3/sample_sheet.csv +11 -0
  37. {smftools-0.1.0 → smftools-0.1.3}/src/smftools/__init__.py +0 -2
  38. {smftools-0.1.0 → smftools-0.1.3}/src/smftools/_settings.py +3 -2
  39. smftools-0.1.3/src/smftools/_version.py +1 -0
  40. smftools-0.1.3/src/smftools/datasets/F1_sample_sheet.csv +5 -0
  41. smftools-0.1.3/src/smftools/datasets/datasets.py +28 -0
  42. smftools-0.1.3/src/smftools/informatics/__init__.py +14 -0
  43. smftools-0.1.3/src/smftools/informatics/archived/bam_conversion.py +59 -0
  44. smftools-0.1.3/src/smftools/informatics/archived/bam_direct.py +63 -0
  45. smftools-0.1.3/src/smftools/informatics/archived/basecalls_to_adata.py +71 -0
  46. smftools-0.1.3/src/smftools/informatics/conversion_smf.py +79 -0
  47. smftools-0.1.3/src/smftools/informatics/direct_smf.py +89 -0
  48. smftools-0.1.3/src/smftools/informatics/fast5_to_pod5.py +21 -0
  49. smftools-0.1.3/src/smftools/informatics/helpers/LoadExperimentConfig.py +74 -0
  50. {smftools-0.1.0 → smftools-0.1.3}/src/smftools/informatics/helpers/__init__.py +22 -4
  51. smftools-0.1.3/src/smftools/informatics/helpers/align_and_sort_BAM.py +48 -0
  52. smftools-0.1.3/src/smftools/informatics/helpers/aligned_BAM_to_bed.py +73 -0
  53. smftools-0.1.3/src/smftools/informatics/helpers/bed_to_bigwig.py +39 -0
  54. {smftools-0.1.0 → smftools-0.1.3}/src/smftools/informatics/helpers/binarize_converted_base_identities.py +11 -4
  55. smftools-0.1.3/src/smftools/informatics/helpers/canoncall.py +25 -0
  56. smftools-0.1.3/src/smftools/informatics/helpers/complement_base_list.py +21 -0
  57. smftools-0.1.3/src/smftools/informatics/helpers/concatenate_fastqs_to_bam.py +54 -0
  58. smftools-0.1.3/src/smftools/informatics/helpers/converted_BAM_to_adata.py +233 -0
  59. smftools-0.1.3/src/smftools/informatics/helpers/count_aligned_reads.py +43 -0
  60. smftools-0.1.3/src/smftools/informatics/helpers/extract_base_identities.py +57 -0
  61. {smftools-0.1.0 → smftools-0.1.3}/src/smftools/informatics/helpers/extract_mods.py +17 -5
  62. smftools-0.1.3/src/smftools/informatics/helpers/extract_readnames_from_BAM.py +22 -0
  63. {smftools-0.1.0 → smftools-0.1.3}/src/smftools/informatics/helpers/find_conversion_sites.py +24 -16
  64. smftools-0.1.3/src/smftools/informatics/helpers/generate_converted_FASTA.py +98 -0
  65. smftools-0.1.3/src/smftools/informatics/helpers/get_chromosome_lengths.py +32 -0
  66. {smftools-0.1.0 → smftools-0.1.3}/src/smftools/informatics/helpers/get_native_references.py +10 -7
  67. smftools-0.1.3/src/smftools/informatics/helpers/index_fasta.py +12 -0
  68. smftools-0.1.3/src/smftools/informatics/helpers/make_dirs.py +21 -0
  69. {smftools-0.1.0 → smftools-0.1.3}/src/smftools/informatics/helpers/make_modbed.py +10 -4
  70. {smftools-0.1.0 → smftools-0.1.3}/src/smftools/informatics/helpers/modQC.py +10 -2
  71. smftools-0.1.3/src/smftools/informatics/helpers/modcall.py +28 -0
  72. smftools-0.1.3/src/smftools/informatics/helpers/modkit_extract_to_adata.py +518 -0
  73. smftools-0.1.3/src/smftools/informatics/helpers/ohe_batching.py +52 -0
  74. smftools-0.1.3/src/smftools/informatics/helpers/one_hot_encode.py +21 -0
  75. smftools-0.1.3/src/smftools/informatics/helpers/plot_read_length_and_coverage_histograms.py +52 -0
  76. {smftools-0.1.0 → smftools-0.1.3}/src/smftools/informatics/helpers/separate_bam_by_bc.py +20 -5
  77. smftools-0.1.3/src/smftools/informatics/helpers/split_and_index_BAM.py +41 -0
  78. smftools-0.1.3/src/smftools/informatics/load_adata.py +127 -0
  79. {smftools-0.1.0 → smftools-0.1.3}/src/smftools/informatics/readwrite.py +13 -16
  80. smftools-0.1.3/src/smftools/informatics/subsample_fasta_from_bed.py +47 -0
  81. smftools-0.1.3/src/smftools/informatics/subsample_pod5.py +104 -0
  82. {smftools-0.1.0 → smftools-0.1.3}/src/smftools/preprocessing/__init__.py +6 -7
  83. smftools-0.1.3/src/smftools/preprocessing/append_C_context.py +69 -0
  84. {smftools-0.1.0 → smftools-0.1.3}/src/smftools/preprocessing/binarize_on_Youden.py +8 -4
  85. {smftools-0.1.0 → smftools-0.1.3}/src/smftools/preprocessing/binary_layers_to_ohe.py +9 -4
  86. {smftools-0.1.0 → smftools-0.1.3}/src/smftools/preprocessing/calculate_complexity.py +26 -14
  87. smftools-0.1.3/src/smftools/preprocessing/calculate_consensus.py +47 -0
  88. smftools-0.1.3/src/smftools/preprocessing/calculate_converted_read_methylation_stats.py +96 -0
  89. {smftools-0.1.0 → smftools-0.1.3}/src/smftools/preprocessing/calculate_coverage.py +14 -8
  90. {smftools-0.1.0 → smftools-0.1.3}/src/smftools/preprocessing/calculate_pairwise_hamming_distances.py +11 -6
  91. {smftools-0.1.0 → smftools-0.1.3}/src/smftools/preprocessing/calculate_position_Youden.py +21 -12
  92. smftools-0.1.3/src/smftools/preprocessing/calculate_read_length_stats.py +86 -0
  93. {smftools-0.1.0 → smftools-0.1.3}/src/smftools/preprocessing/clean_NaN.py +13 -6
  94. {smftools-0.1.0 → smftools-0.1.3}/src/smftools/preprocessing/filter_converted_reads_on_methylation.py +15 -6
  95. {smftools-0.1.0 → smftools-0.1.3}/src/smftools/preprocessing/filter_reads_on_length.py +16 -6
  96. {smftools-0.1.0 → smftools-0.1.3}/src/smftools/preprocessing/invert_adata.py +10 -5
  97. smftools-0.1.3/src/smftools/preprocessing/load_sample_sheet.py +24 -0
  98. smftools-0.1.3/src/smftools/preprocessing/make_dirs.py +21 -0
  99. {smftools-0.1.0 → smftools-0.1.3}/src/smftools/preprocessing/mark_duplicates.py +54 -30
  100. {smftools-0.1.0 → smftools-0.1.3}/src/smftools/preprocessing/min_non_diagonal.py +9 -4
  101. smftools-0.1.3/src/smftools/preprocessing/recipes.py +125 -0
  102. smftools-0.1.3/src/smftools/preprocessing/remove_duplicates.py +21 -0
  103. {smftools-0.1.0 → smftools-0.1.3}/src/smftools/readwrite.py +13 -16
  104. smftools-0.1.3/src/smftools/tools/apply_HMM.py +1 -0
  105. smftools-0.1.3/src/smftools/tools/read_HMM.py +1 -0
  106. smftools-0.1.3/src/smftools/tools/subset_adata.py +32 -0
  107. smftools-0.1.3/src/smftools/tools/train_HMM.py +43 -0
  108. smftools-0.1.3/tests/informatics/helpers/test_LoadExperimentConfig.py +17 -0
  109. smftools-0.1.0/PKG-INFO +0 -75
  110. smftools-0.1.0/README.md +0 -9
  111. smftools-0.1.0/experiment_config.csv +0 -20
  112. smftools-0.1.0/src/smftools/datasets/datasets.py +0 -25
  113. smftools-0.1.0/src/smftools/informatics/__init__.py +0 -11
  114. smftools-0.1.0/src/smftools/informatics/helpers/align_BAM.py +0 -49
  115. smftools-0.1.0/src/smftools/informatics/helpers/canoncall.py +0 -12
  116. smftools-0.1.0/src/smftools/informatics/helpers/converted_BAM_to_adata.py +0 -147
  117. smftools-0.1.0/src/smftools/informatics/helpers/count_aligned_reads.py +0 -32
  118. smftools-0.1.0/src/smftools/informatics/helpers/extract_base_identities.py +0 -36
  119. smftools-0.1.0/src/smftools/informatics/helpers/generate_converted_FASTA.py +0 -59
  120. smftools-0.1.0/src/smftools/informatics/helpers/load_experiment_config.py +0 -17
  121. smftools-0.1.0/src/smftools/informatics/helpers/make_dirs.py +0 -15
  122. smftools-0.1.0/src/smftools/informatics/helpers/modcall.py +0 -14
  123. smftools-0.1.0/src/smftools/informatics/helpers/modkit_extract_to_adata.py +0 -355
  124. smftools-0.1.0/src/smftools/informatics/helpers/one_hot_encode.py +0 -14
  125. smftools-0.1.0/src/smftools/informatics/helpers/split_and_index_BAM.py +0 -21
  126. smftools-0.1.0/src/smftools/informatics/pod5_conversion.py +0 -26
  127. smftools-0.1.0/src/smftools/informatics/pod5_direct.py +0 -29
  128. smftools-0.1.0/src/smftools/informatics/pod5_to_adata.py +0 -17
  129. smftools-0.1.0/src/smftools/preprocessing/append_C_context.py +0 -39
  130. smftools-0.1.0/src/smftools/preprocessing/calculate_converted_read_methylation_stats.py +0 -38
  131. smftools-0.1.0/src/smftools/preprocessing/calculate_read_length_stats.py +0 -27
  132. smftools-0.1.0/src/smftools/preprocessing/remove_duplicates.py +0 -12
  133. smftools-0.1.0/tests/informatics/helpers/test_align_BAM.py +0 -49
  134. smftools-0.1.0/tests/informatics/helpers/test_binarize_converted_base_identities.py +0 -24
  135. smftools-0.1.0/tests/informatics/helpers/test_canoncall.py +0 -12
  136. smftools-0.1.0/tests/informatics/helpers/test_converted_BAM_to_adata.py +0 -147
  137. smftools-0.1.0/tests/informatics/helpers/test_count_aligned_reads.py +0 -32
  138. smftools-0.1.0/tests/informatics/helpers/test_extract_base_identities.py +0 -36
  139. smftools-0.1.0/tests/informatics/helpers/test_extract_mods.py +0 -39
  140. smftools-0.1.0/tests/informatics/helpers/test_find_conversion_sites.py +0 -53
  141. smftools-0.1.0/tests/informatics/helpers/test_generate_converted_FASTA.py +0 -59
  142. smftools-0.1.0/tests/informatics/helpers/test_get_native_references.py +0 -25
  143. smftools-0.1.0/tests/informatics/helpers/test_informatics.py +0 -260
  144. smftools-0.1.0/tests/informatics/helpers/test_load_adata.py +0 -516
  145. smftools-0.1.0/tests/informatics/helpers/test_load_experiment_config.py +0 -17
  146. smftools-0.1.0/tests/informatics/helpers/test_make_dirs.py +0 -15
  147. smftools-0.1.0/tests/informatics/helpers/test_make_modbed.py +0 -21
  148. smftools-0.1.0/tests/informatics/helpers/test_modQC.py +0 -19
  149. smftools-0.1.0/tests/informatics/helpers/test_modcall.py +0 -14
  150. smftools-0.1.0/tests/informatics/helpers/test_modkit_extract_to_adata.py +0 -355
  151. smftools-0.1.0/tests/informatics/helpers/test_one_hot_encode.py +0 -14
  152. smftools-0.1.0/tests/informatics/helpers/test_separate_bam_by_bc.py +0 -28
  153. smftools-0.1.0/tests/informatics/helpers/test_split_and_index_BAM.py +0 -21
  154. smftools-0.1.0/tests/informatics/test_pod5_conversion.py +0 -26
  155. smftools-0.1.0/tests/informatics/test_pod5_direct.py +0 -29
  156. smftools-0.1.0/tests/informatics/test_pod5_to_adata.py +0 -17
  157. smftools-0.1.0/tests/preprocessing/test_append_C_context.py +0 -39
  158. smftools-0.1.0/tests/preprocessing/test_binarize_on_Youden.py +0 -38
  159. smftools-0.1.0/tests/preprocessing/test_binary_layers_to_ohe.py +0 -25
  160. smftools-0.1.0/tests/preprocessing/test_calculate_complexity.py +0 -59
  161. smftools-0.1.0/tests/preprocessing/test_calculate_converted_read_methylation_stats.py +0 -38
  162. smftools-0.1.0/tests/preprocessing/test_calculate_coverage.py +0 -35
  163. smftools-0.1.0/tests/preprocessing/test_calculate_pairwise_hamming_distances.py +0 -22
  164. smftools-0.1.0/tests/preprocessing/test_calculate_position_Youden.py +0 -95
  165. smftools-0.1.0/tests/preprocessing/test_calculate_read_length_stats.py +0 -27
  166. smftools-0.1.0/tests/preprocessing/test_clean_NaN.py +0 -31
  167. smftools-0.1.0/tests/preprocessing/test_filter_converted_reads_on_methylation.py +0 -20
  168. smftools-0.1.0/tests/preprocessing/test_filter_reads_on_length.py +0 -31
  169. smftools-0.1.0/tests/preprocessing/test_invert_adata.py +0 -18
  170. smftools-0.1.0/tests/preprocessing/test_mark_duplicates.py +0 -110
  171. smftools-0.1.0/tests/preprocessing/test_min_non_diagonal.py +0 -20
  172. smftools-0.1.0/tests/preprocessing/test_preprocessing.py +0 -614
  173. smftools-0.1.0/tests/preprocessing/test_remove_duplicates.py +0 -12
  174. {smftools-0.1.0 → smftools-0.1.3}/.gitattributes +0 -0
  175. {smftools-0.1.0 → smftools-0.1.3}/LICENSE +0 -0
  176. {smftools-0.1.0 → smftools-0.1.3}/src/smftools/datasets/F1_hybrid_NKG2A_enhander_promoter_GpC_conversion_SMF.h5ad.gz +0 -0
  177. {smftools-0.1.0 → smftools-0.1.3}/src/smftools/datasets/__init__.py +0 -0
  178. {smftools-0.1.0 → smftools-0.1.3}/src/smftools/datasets/dCas9_m6A_invitro_kinetics.h5ad.gz +0 -0
  179. {smftools-0.1.0/src/smftools/informatics/helpers → smftools-0.1.3/src/smftools/informatics/helpers/archived}/informatics.py +0 -0
  180. {smftools-0.1.0/src/smftools/informatics/helpers → smftools-0.1.3/src/smftools/informatics/helpers/archived}/load_adata.py +0 -0
  181. {smftools-0.1.0 → smftools-0.1.3}/src/smftools/plotting/__init__.py +0 -0
  182. {smftools-0.1.0/src/smftools/preprocessing → smftools-0.1.3/src/smftools/preprocessing/archives}/preprocessing.py +0 -0
  183. {smftools-0.1.0 → smftools-0.1.3}/src/smftools/tools/__init__.py +0 -0
  184. /smftools-0.1.0/tests/__init__.py → /smftools-0.1.3/src/smftools/tools/cluster.py +0 -0
  185. {smftools-0.1.0 → smftools-0.1.3}/tests/datasets/test_datasets.py +0 -0
  186. {smftools-0.1.0 → smftools-0.1.3}/tests/test_readwrite.py +0 -0
@@ -1,10 +1,9 @@
1
1
  # Python
2
2
  __pycache__/
3
- /src/smftools/_version.py
4
3
 
5
4
  # Build files
6
5
  build/
7
- dist/
6
+ /dist/
8
7
  /hatch.toml
9
8
  /Pipfile
10
9
  /Pipfile.lock
@@ -16,30 +15,36 @@ dist/
16
15
  /*-venv/
17
16
  /env-*/
18
17
  /venv-*/
18
+ venv/
19
19
  /environment.yml
20
20
 
21
+ # Tests
22
+ /tests/_test_inputs/
23
+ /tests/_test_outputs/
24
+
21
25
  # OS
22
- *.DS_Store
23
- *.LSOverride
24
- *Thumbs.db
26
+ .DS_Store
27
+ .LSOverride
28
+ Thumbs.db
25
29
  *.ipynb_checkpoints/
26
30
  *.directory
27
31
 
28
32
  # IDEs and editors
29
- *.vscode/
30
- *.idea/
33
+ .vscode/
34
+ .idea/
31
35
  *.iml
32
36
 
33
37
  # Logs
34
38
  *.log
35
39
 
36
40
  # temp files
37
- temp/
38
- tmp/
41
+ /temp*
42
+ /tmp*
43
+ *.temp
44
+ *.tmp
39
45
 
40
46
  # Coverage reports
41
47
  .coverage
42
48
  htmlcov/
43
49
 
44
50
  # Docs
45
- /docs
@@ -0,0 +1,17 @@
1
+ version: 2
2
+ build:
3
+ os: ubuntu-20.04
4
+ tools:
5
+ python: "3.12"
6
+ sphinx:
7
+ configuration: docs/source/conf.py
8
+ fail_on_warning: true
9
+ python:
10
+ install:
11
+ - method: pip
12
+ path: .
13
+ extra_requirements:
14
+ - docs
15
+ submodules:
16
+ include: all
17
+ recursive: true
@@ -0,0 +1,3 @@
1
+ Contributing
2
+ ============
3
+ Contributions to smftools are not currently being reviewed or accepted due to the pre-alpha phase status of the project. More mature versions of the project will have contribution guidelines added.
@@ -0,0 +1,94 @@
1
+ Metadata-Version: 2.3
2
+ Name: smftools
3
+ Version: 0.1.3
4
+ Summary: Single Molecule Footprinting Analysis in Python.
5
+ Project-URL: Source, https://github.com/jkmckenna/smftools
6
+ Project-URL: Documentation, https://smftools.readthedocs.io/
7
+ Author: Joseph McKenna
8
+ Maintainer-email: Joseph McKenna <jkmckenna@berkeley.edu>
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: anndata,chromatin-accessibility,machine-learning,nanopore,protein-dna-binding,single-locus,single-molecule-footprinting
12
+ Classifier: Development Status :: 2 - Pre-Alpha
13
+ Classifier: Environment :: Console
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Science/Research
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Natural Language :: English
18
+ Classifier: Operating System :: MacOS :: MacOS X
19
+ Classifier: Programming Language :: Python :: 3
20
+ Classifier: Programming Language :: Python :: 3.9
21
+ Classifier: Programming Language :: Python :: 3.10
22
+ Classifier: Programming Language :: Python :: 3.11
23
+ Classifier: Programming Language :: Python :: 3.12
24
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
25
+ Classifier: Topic :: Scientific/Engineering :: Visualization
26
+ Requires-Python: >=3.9
27
+ Requires-Dist: anndata>=0.10.0
28
+ Requires-Dist: biopython>=1.79
29
+ Requires-Dist: cython>=0.29.28
30
+ Requires-Dist: networkx>=3.2
31
+ Requires-Dist: numpy<2,>=1.22.0
32
+ Requires-Dist: pandas>=1.4.2
33
+ Requires-Dist: pod5>=0.1.21
34
+ Requires-Dist: pomegranate>1.0.0
35
+ Requires-Dist: pyfaidx>=0.8.0
36
+ Requires-Dist: pysam>=0.19.1
37
+ Requires-Dist: scanpy>=1.9
38
+ Requires-Dist: scikit-learn>=1.0.2
39
+ Requires-Dist: scipy>=1.7.3
40
+ Requires-Dist: seaborn>=0.11
41
+ Requires-Dist: torch>=1.9.0
42
+ Requires-Dist: tqdm
43
+ Provides-Extra: docs
44
+ Requires-Dist: ipython>=7.20; extra == 'docs'
45
+ Requires-Dist: matplotlib!=3.6.1; extra == 'docs'
46
+ Requires-Dist: myst-nb>=1; extra == 'docs'
47
+ Requires-Dist: myst-parser>=2; extra == 'docs'
48
+ Requires-Dist: nbsphinx>=0.9; extra == 'docs'
49
+ Requires-Dist: readthedocs-sphinx-search; extra == 'docs'
50
+ Requires-Dist: setuptools; extra == 'docs'
51
+ Requires-Dist: sphinx-autodoc-typehints>=1.25.2; extra == 'docs'
52
+ Requires-Dist: sphinx-book-theme>=1.1.0; extra == 'docs'
53
+ Requires-Dist: sphinx-copybutton; extra == 'docs'
54
+ Requires-Dist: sphinx-design; extra == 'docs'
55
+ Requires-Dist: sphinx>=7; extra == 'docs'
56
+ Requires-Dist: sphinxcontrib-bibtex; extra == 'docs'
57
+ Requires-Dist: sphinxext-opengraph; extra == 'docs'
58
+ Provides-Extra: tests
59
+ Requires-Dist: pytest; extra == 'tests'
60
+ Requires-Dist: pytest-cov; extra == 'tests'
61
+ Description-Content-Type: text/markdown
62
+
63
+ [![PyPI](https://img.shields.io/pypi/v/smftools.svg)](https://pypi.org/project/smftools)
64
+ [![Docs](https://readthedocs.org/projects/smftools/badge/?version=latest)](https://smftools.readthedocs.io/en/latest/?badge=latest)
65
+
66
+ # smftools
67
+ A Python tool for processing raw sequencing data derived from single molecule footprinting experiments into [anndata](https://anndata.readthedocs.io/en/latest/) objects. Additional functionality for preprocessing, analysis, and visualization.
68
+
69
+ ## Philosophy
70
+ While most genomic data structures handle low-coverage data (<100X) along large references, smftools prioritizes high-coverage data (scalable to at least 1 million X coverage) of a few genomic loci at a time. This enables efficient data storage, rapid data operations, hierarchical metadata handling, seamless integration with various machine-learning packages, and ease of visualization. Furthermore, functionality is modularized, enabling analysis sessions to be saved, reloaded, and easily shared with collaborators. Analyses are centered around the [anndata](https://anndata.readthedocs.io/en/latest/) object, and are heavily inspired by the work conducted within the single-cell genomics community.
71
+
72
+ ## Dependencies
73
+ The following CLI tools need to be installed and configured before using the informatics (smftools.inform) module of smftools:
74
+ 1) [Dorado](https://github.com/nanoporetech/dorado) -> For standard/modified basecalling and alignment. Can be attained by downloading and configuring nanopore MinKnow software.
75
+ 2) [Samtools](https://github.com/samtools/samtools) -> For working with SAM/BAM files
76
+ 3) [Minimap2](https://github.com/lh3/minimap2) -> The aligner used by Dorado
77
+ 4) [Modkit](https://github.com/nanoporetech/modkit) -> Extracting summary statistics and read level methylation calls from modified BAM files
78
+ 5) [Bedtools](https://github.com/arq5x/bedtools2) -> For generating Bedgraphs from BAM alignment files.
79
+ 6) [BedGraphToBigWig](https://genome.ucsc.edu/goldenPath/help/bigWig.html) -> For converting BedGraphs to BigWig files for IGV sessions.
80
+
81
+ ## Modules
82
+ ### Informatics: Processes raw Nanopore/Illumina data from SMF experiments into an AnnData object.
83
+ ![](docs/source/_static/smftools_informatics_diagram.png)
84
+ ### Preprocessing: Appends QC metrics to the AnnData object and perfroms filtering.
85
+ ![](docs/source/_static/smftools_preprocessing_diagram.png)
86
+ - Tools: Appends various analyses to the AnnData object.
87
+ - Plotting: Visualization of analyses stored within the AnnData object.
88
+
89
+ ## Announcements
90
+ ### 09/09/24 - The pre-alpha phase package ([smftools-0.1.1](https://pypi.org/project/smftools/))
91
+ The informatics module has been bumped to alpha-phase status. This module can deal with POD5s and unaligned BAMS from nanopore conversion and direct SMF experiments, as well as FASTQs from Illumina conversion SMF experiments. Primary output from this module is an AnnData object containing all relevant SMF data, which is compatible with all downstream smftools modules. The other modules are still in pre-alpha phase. Preprocessing, Tools, and Plotting modules should be promoted to alpha-phase within the next month or so.
92
+
93
+ ### 08/30/24 - The pre-alpha phase package ([smftools-0.1.0](https://pypi.org/project/smftools/)) is installable through pypi!
94
+ Currently, this package (smftools-0.1.0) is going through rapid improvement (dependency handling accross Linux and Mac OS, testing, documentation, debugging) and is still too early in development for standard use. The underlying functionality was originally developed as a collection of scripts for single molecule footprinting (SMF) experiments in our lab, but is being packaged/developed to facilitate the expansion of SMF to any lab that is interested in performing these styles of experiments/analyses. The alpha-phase package is expected to be available within a couple months, so stay tuned!
@@ -0,0 +1,32 @@
1
+ [![PyPI](https://img.shields.io/pypi/v/smftools.svg)](https://pypi.org/project/smftools)
2
+ [![Docs](https://readthedocs.org/projects/smftools/badge/?version=latest)](https://smftools.readthedocs.io/en/latest/?badge=latest)
3
+
4
+ # smftools
5
+ A Python tool for processing raw sequencing data derived from single molecule footprinting experiments into [anndata](https://anndata.readthedocs.io/en/latest/) objects. Additional functionality for preprocessing, analysis, and visualization.
6
+
7
+ ## Philosophy
8
+ While most genomic data structures handle low-coverage data (<100X) along large references, smftools prioritizes high-coverage data (scalable to at least 1 million X coverage) of a few genomic loci at a time. This enables efficient data storage, rapid data operations, hierarchical metadata handling, seamless integration with various machine-learning packages, and ease of visualization. Furthermore, functionality is modularized, enabling analysis sessions to be saved, reloaded, and easily shared with collaborators. Analyses are centered around the [anndata](https://anndata.readthedocs.io/en/latest/) object, and are heavily inspired by the work conducted within the single-cell genomics community.
9
+
10
+ ## Dependencies
11
+ The following CLI tools need to be installed and configured before using the informatics (smftools.inform) module of smftools:
12
+ 1) [Dorado](https://github.com/nanoporetech/dorado) -> For standard/modified basecalling and alignment. Can be attained by downloading and configuring nanopore MinKnow software.
13
+ 2) [Samtools](https://github.com/samtools/samtools) -> For working with SAM/BAM files
14
+ 3) [Minimap2](https://github.com/lh3/minimap2) -> The aligner used by Dorado
15
+ 4) [Modkit](https://github.com/nanoporetech/modkit) -> Extracting summary statistics and read level methylation calls from modified BAM files
16
+ 5) [Bedtools](https://github.com/arq5x/bedtools2) -> For generating Bedgraphs from BAM alignment files.
17
+ 6) [BedGraphToBigWig](https://genome.ucsc.edu/goldenPath/help/bigWig.html) -> For converting BedGraphs to BigWig files for IGV sessions.
18
+
19
+ ## Modules
20
+ ### Informatics: Processes raw Nanopore/Illumina data from SMF experiments into an AnnData object.
21
+ ![](docs/source/_static/smftools_informatics_diagram.png)
22
+ ### Preprocessing: Appends QC metrics to the AnnData object and perfroms filtering.
23
+ ![](docs/source/_static/smftools_preprocessing_diagram.png)
24
+ - Tools: Appends various analyses to the AnnData object.
25
+ - Plotting: Visualization of analyses stored within the AnnData object.
26
+
27
+ ## Announcements
28
+ ### 09/09/24 - The pre-alpha phase package ([smftools-0.1.1](https://pypi.org/project/smftools/))
29
+ The informatics module has been bumped to alpha-phase status. This module can deal with POD5s and unaligned BAMS from nanopore conversion and direct SMF experiments, as well as FASTQs from Illumina conversion SMF experiments. Primary output from this module is an AnnData object containing all relevant SMF data, which is compatible with all downstream smftools modules. The other modules are still in pre-alpha phase. Preprocessing, Tools, and Plotting modules should be promoted to alpha-phase within the next month or so.
30
+
31
+ ### 08/30/24 - The pre-alpha phase package ([smftools-0.1.0](https://pypi.org/project/smftools/)) is installable through pypi!
32
+ Currently, this package (smftools-0.1.0) is going through rapid improvement (dependency handling accross Linux and Mac OS, testing, documentation, debugging) and is still too early in development for standard use. The underlying functionality was originally developed as a collection of scripts for single molecule footprinting (SMF) experiments in our lab, but is being packaged/developed to facilitate the expansion of SMF to any lab that is interested in performing these styles of experiments/analyses. The alpha-phase package is expected to be available within a couple months, so stay tuned!
@@ -0,0 +1,20 @@
1
+ # Minimal makefile for Sphinx documentation
2
+ #
3
+
4
+ # You can set these variables from the command line, and also
5
+ # from the environment for the first two.
6
+ SPHINXOPTS ?=
7
+ SPHINXBUILD ?= sphinx-build
8
+ SOURCEDIR = source
9
+ BUILDDIR = build
10
+
11
+ # Put it first so that "make" without argument is like "make help".
12
+ help:
13
+ @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14
+
15
+ .PHONY: help Makefile
16
+
17
+ # Catch-all target: route all unknown targets to Sphinx using the new
18
+ # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19
+ %: Makefile
20
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
@@ -0,0 +1,35 @@
1
+ @ECHO OFF
2
+
3
+ pushd %~dp0
4
+
5
+ REM Command file for Sphinx documentation
6
+
7
+ if "%SPHINXBUILD%" == "" (
8
+ set SPHINXBUILD=sphinx-build
9
+ )
10
+ set SOURCEDIR=source
11
+ set BUILDDIR=build
12
+
13
+ %SPHINXBUILD% >NUL 2>NUL
14
+ if errorlevel 9009 (
15
+ echo.
16
+ echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17
+ echo.installed, then set the SPHINXBUILD environment variable to point
18
+ echo.to the full path of the 'sphinx-build' executable. Alternatively you
19
+ echo.may add the Sphinx directory to PATH.
20
+ echo.
21
+ echo.If you don't have Sphinx installed, grab it from
22
+ echo.https://www.sphinx-doc.org/
23
+ exit /b 1
24
+ )
25
+
26
+ if "%1" == "" goto help
27
+
28
+ %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29
+ goto end
30
+
31
+ :help
32
+ %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33
+
34
+ :end
35
+ popd
@@ -0,0 +1 @@
1
+
@@ -0,0 +1,9 @@
1
+ ## Datasets:
2
+
3
+ ```{eval-rst}
4
+ .. module:: smftools.datasets
5
+ ```
6
+
7
+ ```{eval-rst}
8
+ .. currentmodule:: smftools
9
+ ```
@@ -0,0 +1,26 @@
1
+ # API
2
+
3
+ Import smftools as:
4
+
5
+ ```
6
+ import smftools as smf
7
+ ```
8
+
9
+ ```{toctree}
10
+ :maxdepth: 2
11
+
12
+ informatics
13
+ preprocessing
14
+ tools
15
+ datasets
16
+ ```
17
+
18
+ ## Informatics module diagram
19
+ ```{image} ../_static/smftools_informatics_diagram.png
20
+ :width: 800px
21
+ ```
22
+
23
+ ## Preprocessing module diagram
24
+ ```{image} ../_static/smftools_preprocessing_diagram.png
25
+ :width: 800px
26
+ ```
@@ -0,0 +1,27 @@
1
+ ## Informatics: `inform`
2
+
3
+ ## Informatics module diagram
4
+ ```{image} ../_static/smftools_informatics_diagram.png
5
+ :width: 1000px
6
+ ```
7
+
8
+ ```{eval-rst}
9
+ .. module:: smftools.inform
10
+ ```
11
+
12
+ ```{eval-rst}
13
+ .. currentmodule:: smftools
14
+ ```
15
+
16
+ Processes raw sequencing data to load an adata object.
17
+
18
+
19
+ ### Diagram of final steps of Direct SMF workflow
20
+ ```{image} ../_static/modkit_extract_to_adata.png
21
+ :width: 1000px
22
+ ```
23
+
24
+ ### Diagram of final steps of Conversion SMF workflow
25
+ ```{image} ../_static/converted_BAM_to_adata.png
26
+ :width: 1000px
27
+ ```
@@ -0,0 +1,14 @@
1
+ ## Preprocessing: `pp`
2
+
3
+ ## Preprocessing module diagram
4
+ ```{image} ../_static/smftools_preprocessing_diagram.png
5
+ :width: 1000px
6
+ ```
7
+
8
+ ```{eval-rst}
9
+ .. module:: smftools.pp
10
+ ```
11
+
12
+ ```{eval-rst}
13
+ .. currentmodule:: smftools
14
+ ```
@@ -0,0 +1,9 @@
1
+ ## Tools: `tl`
2
+
3
+ ```{eval-rst}
4
+ .. module:: smftools.tl
5
+ ```
6
+
7
+ ```{eval-rst}
8
+ .. currentmodule:: smftools
9
+ ```
@@ -0,0 +1,75 @@
1
+ # Basic Usage
2
+
3
+ Import SmfTools:
4
+
5
+ ```
6
+ import smftools as smf
7
+ ```
8
+
9
+ ## Informatics Module Usage
10
+
11
+ Many use cases for smftools begin here. For most users, the call below will be sufficient to convert any raw SMF dataset to an AnnData object:
12
+
13
+ ```
14
+ config_path = "/Path_to_experiment_config.csv"
15
+ smf.inform.load_adata(config_path)
16
+ ```
17
+
18
+ ## Loading AnnData objects created by the informatics module
19
+
20
+ After creating an AnnData object holding your experiment's SMF data, you can load the AnnData object as so:
21
+
22
+ ```
23
+ import anndata as ad
24
+ input_adata = "/Path_to_experiment_AnnData.h5ad.gz"
25
+ adata = ad.read_h5ad(input_file)
26
+ adata.obs_names_make_unique()
27
+ ```
28
+
29
+ If you don't have an AnnData object yet, but want to play with the downstream Preprocessing, Tools, and Plotting modules, you can load a pre-loaded SMF dataset.
30
+
31
+ Currently, you can do this with our lab's in vitro dCas9 binding kinetics dataset generated from a Hia5 SMF dataset generated with direct m6A high accuracy basecalls:
32
+
33
+ ```
34
+ adata = smf.datasets.dCas9_kinetics()
35
+ adata.obs_names_make_unique()
36
+ ```
37
+
38
+ Alternatively, you can do this with our lab's M.CviPI SMF test data in F1-hybrid natural killer cells generated by NEB EMseq conversion followed by canonical basecalling:
39
+
40
+ ```
41
+ adata = smf.datasets.Kissiov_and_McKenna_2025()
42
+ adata.obs_names_make_unique()
43
+ ```
44
+
45
+ ## Writing out AnnData objects to save analysis progress
46
+
47
+ After preprocessing and downstream analysis of the AnnData object, you can save the AnnData object at any step as so:
48
+
49
+ ```
50
+ import anndata as ad
51
+ import os
52
+
53
+ output_dir = '/Path_to_output_directory'
54
+ output_adata = 'analyzed_adata.h5ad.gz'
55
+ final_output = os.path.join(output_dir, output_adata)
56
+ adata.write_h5ad(final_output, compression='gzip')
57
+ ```
58
+
59
+
60
+ ## Troubleshooting
61
+ For more advanced usage and help troubleshooting, the API and tutorials for each of the modules is still being developed.
62
+ However, you can currently learn about the functions contained within the module by calling:
63
+
64
+ ```
65
+ smf.inform.__all__
66
+ ```
67
+
68
+ This lists the functions within any given module. If you want to see the associated docstring for a given function, here is an example:
69
+
70
+ ```
71
+ print(smf.inform.load_adata.__doc__)
72
+ ```
73
+
74
+ These docstrings will provide a brief description of the function and also tell you the input parameters and what the function returns.
75
+ In some cases, usage examples will also be provided in the docstring in the form of doctests.
@@ -0,0 +1,117 @@
1
+ # Configuration file for the Sphinx documentation builder.
2
+ #
3
+ # For the full list of built-in configuration values, see the documentation:
4
+ # https://www.sphinx-doc.org/en/master/usage/configuration.html
5
+ import sys
6
+ import os
7
+ from pathlib import Path
8
+ HERE = Path(__file__).parent
9
+ PARENT_PARENT_HERE = HERE.parents[1]
10
+ SRC_PATH = PARENT_PARENT_HERE / 'src'
11
+ sys.path.insert(0, str(SRC_PATH))
12
+ for x in os.walk(str(SRC_PATH)):
13
+ sys.path.insert(0, x[0])
14
+ print(sys.path)
15
+ try:
16
+ import smftools
17
+ print("smftools imported successfully.")
18
+ except ImportError:
19
+ print("smftools is not imported.")
20
+ # -- Project information -----------------------------------------------------
21
+ # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
22
+
23
+ project = 'smftools'
24
+ copyright = '2024, Joseph McKenna'
25
+ author = 'Joseph McKenna'
26
+ release = '0.1.0'
27
+ repository_url = 'https://github.com/jkmckenna/smftools'
28
+
29
+ # -- General configuration ---------------------------------------------------
30
+ # Bibliography settings
31
+ bibtex_bibfiles = ["references.bib"]
32
+ bibtex_reference_style = "author_year"
33
+ nitpicky = True
34
+ needs_sphinx = "4.0"
35
+
36
+ master_doc = "index"
37
+ templates_path = ['_templates']
38
+ exclude_patterns = [
39
+ "_build",
40
+ "Thumbs.db",
41
+ ".DS_Store",
42
+ # exclude version md files
43
+ "release-notes/[!i]*.md"
44
+ ]
45
+ extensions = [
46
+ "myst_nb",
47
+ "sphinx_copybutton",
48
+ "sphinx.ext.autodoc",
49
+ "sphinx.ext.intersphinx",
50
+ "sphinx.ext.doctest",
51
+ "sphinx.ext.coverage",
52
+ "sphinx.ext.mathjax",
53
+ "sphinx.ext.napoleon",
54
+ "sphinx.ext.autosummary",
55
+ "sphinx.ext.extlinks",
56
+ "sphinxcontrib.bibtex",
57
+ "matplotlib.sphinxext.plot_directive",
58
+ "sphinx_autodoc_typehints",
59
+ "sphinx_design",
60
+ "sphinx_search.extension",
61
+ "sphinxext.opengraph",
62
+ ]
63
+
64
+ # Generate the API documentation when building
65
+ autosummary_generate = True
66
+ autodoc_member_order = "bysource"
67
+ napoleon_google_docstring = True
68
+ napoleon_numpy_docstring = False
69
+ napoleon_include_init_with_doc = False
70
+ napoleon_use_rtype = True # having a separate entry generally helps readability
71
+ napoleon_use_param = True
72
+ napoleon_custom_sections = [("Params", "Parameters")]
73
+ todo_include_todos = False
74
+ api_dir = HERE / "api"
75
+ myst_enable_extensions = [
76
+ "amsmath",
77
+ "colon_fence",
78
+ "deflist",
79
+ "dollarmath",
80
+ "html_image",
81
+ "html_admonition",
82
+ ]
83
+ myst_url_schemes = ("http", "https", "mailto", "ftp")
84
+ myst_heading_anchors = 3
85
+ nb_output_stderr = "remove"
86
+ nb_execution_mode = "off"
87
+ nb_merge_streams = True
88
+
89
+ suppress_warnings = [
90
+ "myst.header"
91
+ ]
92
+
93
+ typehints_defaults = "braces"
94
+
95
+ # html_context = {
96
+ # "display_github": True,
97
+ # "github_user": "jkmckenna",
98
+ # "github_repo": project,
99
+ # "github_version": "main",
100
+ # "conf_py_path": "/docs/source/",
101
+ # }
102
+
103
+ # -- Options for HTML output -------------------------------------------------
104
+ # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
105
+
106
+ html_theme = "sphinx_book_theme"
107
+ html_title = project
108
+
109
+ html_theme_options = {
110
+ "repository_url": repository_url,
111
+ "use_repository_button": True,
112
+ "show_toc_level": 1,
113
+ "path_to_docs": "docs/",
114
+ "repository_branch": release,
115
+ }
116
+
117
+ html_static_path = ['_static']
@@ -0,0 +1,9 @@
1
+ # Contributors
2
+
3
+ ## Current
4
+
5
+ - Joseph Mckenna, lead developer (2024)
6
+
7
+ ## Acknowledgments
8
+
9
+ - [Tjian/Darzacq Lab](https://www.tjian-darzacq.mcb.berkeley.edu/): Funding and a supportive lab environment!
@@ -0,0 +1,3 @@
1
+ (contribution-guide)=
2
+
3
+ # Contributing
@@ -0,0 +1,54 @@
1
+ smftools documentation
2
+ ======================
3
+
4
+ ```{include} ../../README.md
5
+ :end-before: '## Dependencies'
6
+ ```
7
+
8
+ ::::{grid} 1 2 2 2
9
+ :gutter: 2
10
+
11
+ :::{grid-item-card} Installation {octicon}`plug;1em;`
12
+ :link: installation
13
+ :link-type: doc
14
+
15
+ smftools installation instructions
16
+ :::
17
+
18
+ :::{grid-item-card} Tutorials {octicon}`play;1em;`
19
+ :link: tutorials/index
20
+ :link-type: doc
21
+
22
+ Jupyter notebook tutorial of smftools usage.
23
+ :::
24
+
25
+ :::{grid-item-card} API reference {octicon}`book;1em;`
26
+ :link: api/index
27
+ :link-type: doc
28
+
29
+ The API reference contains a detailed description of
30
+ the smftools API.
31
+ :::
32
+
33
+ :::{grid-item-card} GitHub {octicon}`mark-github;1em;`
34
+ :link: https://github.com/jkmckenna/smftools
35
+
36
+ smftools GitHub link
37
+ :::
38
+ ::::
39
+
40
+ ```{toctree}
41
+ :hidden: true
42
+ :maxdepth: 1
43
+
44
+ installation
45
+ basic_usage
46
+ tutorials/index
47
+ api/index
48
+ release-notes/index
49
+ dev/index
50
+ contributors
51
+ references
52
+ ```
53
+
54
+ [github]: https://github.com/jkmckenna/smftools