smftools 0.1.0__tar.gz → 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. {smftools-0.1.0 → smftools-0.1.1}/.gitignore +11 -10
  2. smftools-0.1.1/.readthedocs.yaml +17 -0
  3. smftools-0.1.1/CONTRIBUTING.md +3 -0
  4. smftools-0.1.1/PKG-INFO +88 -0
  5. smftools-0.1.1/README.md +28 -0
  6. smftools-0.1.1/docs/Makefile +20 -0
  7. smftools-0.1.1/docs/make.bat +35 -0
  8. smftools-0.1.1/docs/source/_static/tmp +1 -0
  9. smftools-0.1.1/docs/source/_templates/tmp +1 -0
  10. smftools-0.1.1/docs/source/api/datasets.md +9 -0
  11. smftools-0.1.1/docs/source/api/index.md +16 -0
  12. smftools-0.1.1/docs/source/api/informatics.md +11 -0
  13. smftools-0.1.1/docs/source/api/preprocessing.md +9 -0
  14. smftools-0.1.1/docs/source/api/tools.md +9 -0
  15. smftools-0.1.1/docs/source/conf.py +117 -0
  16. smftools-0.1.1/docs/source/contributors.md +9 -0
  17. smftools-0.1.1/docs/source/dev/index.md +3 -0
  18. smftools-0.1.1/docs/source/index.md +53 -0
  19. smftools-0.1.1/docs/source/installation.md +20 -0
  20. smftools-0.1.1/docs/source/references.bib +406 -0
  21. smftools-0.1.1/docs/source/references.rst +4 -0
  22. smftools-0.1.1/docs/source/release-notes/0.1.0.md +4 -0
  23. smftools-0.1.1/docs/source/release-notes/index.md +8 -0
  24. smftools-0.1.1/docs/source/requirements.txt +14 -0
  25. smftools-0.1.1/docs/source/tutorials/index.md +3 -0
  26. smftools-0.1.1/experiment_config.csv +17 -0
  27. {smftools-0.1.0 → smftools-0.1.1}/pyproject.toml +6 -32
  28. {smftools-0.1.0 → smftools-0.1.1}/requirements.txt +3 -1
  29. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/__init__.py +0 -2
  30. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/_settings.py +1 -1
  31. smftools-0.1.1/src/smftools/_version.py +1 -0
  32. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/datasets/datasets.py +11 -9
  33. smftools-0.1.1/src/smftools/informatics/__init__.py +12 -0
  34. smftools-0.1.1/src/smftools/informatics/bam_conversion.py +47 -0
  35. smftools-0.1.1/src/smftools/informatics/bam_direct.py +49 -0
  36. smftools-0.1.1/src/smftools/informatics/basecalls_to_adata.py +42 -0
  37. smftools-0.1.1/src/smftools/informatics/fast5_to_pod5.py +19 -0
  38. smftools-0.1.1/src/smftools/informatics/helpers/LoadExperimentConfig.py +74 -0
  39. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/informatics/helpers/__init__.py +4 -4
  40. smftools-0.1.1/src/smftools/informatics/helpers/align_and_sort_BAM.py +52 -0
  41. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/informatics/helpers/binarize_converted_base_identities.py +10 -3
  42. smftools-0.1.1/src/smftools/informatics/helpers/canoncall.py +23 -0
  43. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/informatics/helpers/converted_BAM_to_adata.py +30 -13
  44. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/informatics/helpers/count_aligned_reads.py +12 -5
  45. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/informatics/helpers/extract_base_identities.py +13 -6
  46. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/informatics/helpers/extract_mods.py +17 -5
  47. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/informatics/helpers/find_conversion_sites.py +15 -9
  48. smftools-0.1.1/src/smftools/informatics/helpers/generate_converted_FASTA.py +79 -0
  49. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/informatics/helpers/get_native_references.py +10 -7
  50. smftools-0.1.1/src/smftools/informatics/helpers/make_dirs.py +21 -0
  51. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/informatics/helpers/make_modbed.py +10 -4
  52. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/informatics/helpers/modQC.py +10 -2
  53. smftools-0.1.1/src/smftools/informatics/helpers/modcall.py +26 -0
  54. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/informatics/helpers/modkit_extract_to_adata.py +25 -13
  55. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/informatics/helpers/one_hot_encode.py +8 -3
  56. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/informatics/helpers/separate_bam_by_bc.py +18 -5
  57. smftools-0.1.1/src/smftools/informatics/helpers/split_and_index_BAM.py +29 -0
  58. smftools-0.1.1/src/smftools/informatics/pod5_conversion.py +53 -0
  59. smftools-0.1.1/src/smftools/informatics/pod5_direct.py +55 -0
  60. smftools-0.1.1/src/smftools/informatics/pod5_to_adata.py +40 -0
  61. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/informatics/readwrite.py +13 -16
  62. smftools-0.1.1/src/smftools/informatics/subsample_pod5.py +48 -0
  63. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/preprocessing/__init__.py +0 -6
  64. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/preprocessing/append_C_context.py +15 -8
  65. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/preprocessing/binarize_on_Youden.py +8 -4
  66. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/preprocessing/binary_layers_to_ohe.py +9 -4
  67. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/preprocessing/calculate_complexity.py +26 -14
  68. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/preprocessing/calculate_converted_read_methylation_stats.py +12 -5
  69. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/preprocessing/calculate_coverage.py +13 -7
  70. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/preprocessing/calculate_pairwise_hamming_distances.py +11 -6
  71. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/preprocessing/calculate_position_Youden.py +21 -12
  72. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/preprocessing/calculate_read_length_stats.py +11 -6
  73. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/preprocessing/clean_NaN.py +12 -5
  74. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/preprocessing/filter_converted_reads_on_methylation.py +12 -5
  75. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/preprocessing/filter_reads_on_length.py +13 -5
  76. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/preprocessing/invert_adata.py +9 -5
  77. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/preprocessing/mark_duplicates.py +20 -11
  78. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/preprocessing/min_non_diagonal.py +9 -4
  79. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/preprocessing/remove_duplicates.py +9 -3
  80. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/readwrite.py +13 -16
  81. smftools-0.1.0/PKG-INFO +0 -75
  82. smftools-0.1.0/README.md +0 -9
  83. smftools-0.1.0/experiment_config.csv +0 -20
  84. smftools-0.1.0/src/smftools/informatics/__init__.py +0 -11
  85. smftools-0.1.0/src/smftools/informatics/helpers/align_BAM.py +0 -49
  86. smftools-0.1.0/src/smftools/informatics/helpers/canoncall.py +0 -12
  87. smftools-0.1.0/src/smftools/informatics/helpers/generate_converted_FASTA.py +0 -59
  88. smftools-0.1.0/src/smftools/informatics/helpers/load_experiment_config.py +0 -17
  89. smftools-0.1.0/src/smftools/informatics/helpers/make_dirs.py +0 -15
  90. smftools-0.1.0/src/smftools/informatics/helpers/modcall.py +0 -14
  91. smftools-0.1.0/src/smftools/informatics/helpers/split_and_index_BAM.py +0 -21
  92. smftools-0.1.0/src/smftools/informatics/pod5_conversion.py +0 -26
  93. smftools-0.1.0/src/smftools/informatics/pod5_direct.py +0 -29
  94. smftools-0.1.0/src/smftools/informatics/pod5_to_adata.py +0 -17
  95. {smftools-0.1.0 → smftools-0.1.1}/.gitattributes +0 -0
  96. {smftools-0.1.0 → smftools-0.1.1}/LICENSE +0 -0
  97. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/datasets/F1_hybrid_NKG2A_enhander_promoter_GpC_conversion_SMF.h5ad.gz +0 -0
  98. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/datasets/__init__.py +0 -0
  99. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/datasets/dCas9_m6A_invitro_kinetics.h5ad.gz +0 -0
  100. {smftools-0.1.0/src/smftools/informatics/helpers → smftools-0.1.1/src/smftools/informatics/helpers/archived}/informatics.py +0 -0
  101. {smftools-0.1.0/src/smftools/informatics/helpers → smftools-0.1.1/src/smftools/informatics/helpers/archived}/load_adata.py +0 -0
  102. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/plotting/__init__.py +0 -0
  103. {smftools-0.1.0/src/smftools/preprocessing → smftools-0.1.1/src/smftools/preprocessing/archives}/preprocessing.py +0 -0
  104. {smftools-0.1.0 → smftools-0.1.1}/src/smftools/tools/__init__.py +0 -0
  105. {smftools-0.1.0 → smftools-0.1.1}/tests/__init__.py +0 -0
  106. {smftools-0.1.0 → smftools-0.1.1}/tests/datasets/test_datasets.py +0 -0
  107. {smftools-0.1.0 → smftools-0.1.1}/tests/informatics/helpers/test_align_BAM.py +0 -0
  108. {smftools-0.1.0 → smftools-0.1.1}/tests/informatics/helpers/test_binarize_converted_base_identities.py +0 -0
  109. {smftools-0.1.0 → smftools-0.1.1}/tests/informatics/helpers/test_canoncall.py +0 -0
  110. {smftools-0.1.0 → smftools-0.1.1}/tests/informatics/helpers/test_converted_BAM_to_adata.py +0 -0
  111. {smftools-0.1.0 → smftools-0.1.1}/tests/informatics/helpers/test_count_aligned_reads.py +0 -0
  112. {smftools-0.1.0 → smftools-0.1.1}/tests/informatics/helpers/test_extract_base_identities.py +0 -0
  113. {smftools-0.1.0 → smftools-0.1.1}/tests/informatics/helpers/test_extract_mods.py +0 -0
  114. {smftools-0.1.0 → smftools-0.1.1}/tests/informatics/helpers/test_find_conversion_sites.py +0 -0
  115. {smftools-0.1.0 → smftools-0.1.1}/tests/informatics/helpers/test_generate_converted_FASTA.py +0 -0
  116. {smftools-0.1.0 → smftools-0.1.1}/tests/informatics/helpers/test_get_native_references.py +0 -0
  117. {smftools-0.1.0 → smftools-0.1.1}/tests/informatics/helpers/test_informatics.py +0 -0
  118. {smftools-0.1.0 → smftools-0.1.1}/tests/informatics/helpers/test_load_adata.py +0 -0
  119. {smftools-0.1.0 → smftools-0.1.1}/tests/informatics/helpers/test_load_experiment_config.py +0 -0
  120. {smftools-0.1.0 → smftools-0.1.1}/tests/informatics/helpers/test_make_dirs.py +0 -0
  121. {smftools-0.1.0 → smftools-0.1.1}/tests/informatics/helpers/test_make_modbed.py +0 -0
  122. {smftools-0.1.0 → smftools-0.1.1}/tests/informatics/helpers/test_modQC.py +0 -0
  123. {smftools-0.1.0 → smftools-0.1.1}/tests/informatics/helpers/test_modcall.py +0 -0
  124. {smftools-0.1.0 → smftools-0.1.1}/tests/informatics/helpers/test_modkit_extract_to_adata.py +0 -0
  125. {smftools-0.1.0 → smftools-0.1.1}/tests/informatics/helpers/test_one_hot_encode.py +0 -0
  126. {smftools-0.1.0 → smftools-0.1.1}/tests/informatics/helpers/test_separate_bam_by_bc.py +0 -0
  127. {smftools-0.1.0 → smftools-0.1.1}/tests/informatics/helpers/test_split_and_index_BAM.py +0 -0
  128. {smftools-0.1.0 → smftools-0.1.1}/tests/informatics/test_pod5_conversion.py +0 -0
  129. {smftools-0.1.0 → smftools-0.1.1}/tests/informatics/test_pod5_direct.py +0 -0
  130. {smftools-0.1.0 → smftools-0.1.1}/tests/informatics/test_pod5_to_adata.py +0 -0
  131. {smftools-0.1.0 → smftools-0.1.1}/tests/preprocessing/test_append_C_context.py +0 -0
  132. {smftools-0.1.0 → smftools-0.1.1}/tests/preprocessing/test_binarize_on_Youden.py +0 -0
  133. {smftools-0.1.0 → smftools-0.1.1}/tests/preprocessing/test_binary_layers_to_ohe.py +0 -0
  134. {smftools-0.1.0 → smftools-0.1.1}/tests/preprocessing/test_calculate_complexity.py +0 -0
  135. {smftools-0.1.0 → smftools-0.1.1}/tests/preprocessing/test_calculate_converted_read_methylation_stats.py +0 -0
  136. {smftools-0.1.0 → smftools-0.1.1}/tests/preprocessing/test_calculate_coverage.py +0 -0
  137. {smftools-0.1.0 → smftools-0.1.1}/tests/preprocessing/test_calculate_pairwise_hamming_distances.py +0 -0
  138. {smftools-0.1.0 → smftools-0.1.1}/tests/preprocessing/test_calculate_position_Youden.py +0 -0
  139. {smftools-0.1.0 → smftools-0.1.1}/tests/preprocessing/test_calculate_read_length_stats.py +0 -0
  140. {smftools-0.1.0 → smftools-0.1.1}/tests/preprocessing/test_clean_NaN.py +0 -0
  141. {smftools-0.1.0 → smftools-0.1.1}/tests/preprocessing/test_filter_converted_reads_on_methylation.py +0 -0
  142. {smftools-0.1.0 → smftools-0.1.1}/tests/preprocessing/test_filter_reads_on_length.py +0 -0
  143. {smftools-0.1.0 → smftools-0.1.1}/tests/preprocessing/test_invert_adata.py +0 -0
  144. {smftools-0.1.0 → smftools-0.1.1}/tests/preprocessing/test_mark_duplicates.py +0 -0
  145. {smftools-0.1.0 → smftools-0.1.1}/tests/preprocessing/test_min_non_diagonal.py +0 -0
  146. {smftools-0.1.0 → smftools-0.1.1}/tests/preprocessing/test_preprocessing.py +0 -0
  147. {smftools-0.1.0 → smftools-0.1.1}/tests/preprocessing/test_remove_duplicates.py +0 -0
  148. {smftools-0.1.0 → smftools-0.1.1}/tests/test_readwrite.py +0 -0
@@ -1,10 +1,9 @@
1
1
  # Python
2
2
  __pycache__/
3
- /src/smftools/_version.py
4
3
 
5
4
  # Build files
6
5
  build/
7
- dist/
6
+ /dist/
8
7
  /hatch.toml
9
8
  /Pipfile
10
9
  /Pipfile.lock
@@ -16,30 +15,32 @@ dist/
16
15
  /*-venv/
17
16
  /env-*/
18
17
  /venv-*/
18
+ venv/
19
19
  /environment.yml
20
20
 
21
21
  # OS
22
- *.DS_Store
23
- *.LSOverride
24
- *Thumbs.db
22
+ .DS_Store
23
+ .LSOverride
24
+ Thumbs.db
25
25
  *.ipynb_checkpoints/
26
26
  *.directory
27
27
 
28
28
  # IDEs and editors
29
- *.vscode/
30
- *.idea/
29
+ .vscode/
30
+ .idea/
31
31
  *.iml
32
32
 
33
33
  # Logs
34
34
  *.log
35
35
 
36
36
  # temp files
37
- temp/
38
- tmp/
37
+ /temp*
38
+ /tmp*
39
+ *.temp
40
+ *.tmp
39
41
 
40
42
  # Coverage reports
41
43
  .coverage
42
44
  htmlcov/
43
45
 
44
46
  # Docs
45
- /docs
@@ -0,0 +1,17 @@
1
+ version: 2
2
+ build:
3
+ os: ubuntu-20.04
4
+ tools:
5
+ python: "3.12"
6
+ sphinx:
7
+ configuration: docs/source/conf.py
8
+ fail_on_warning: true
9
+ python:
10
+ install:
11
+ - method: pip
12
+ path: .
13
+ extra_requirements:
14
+ - docs
15
+ submodules:
16
+ include: all
17
+ recursive: true
@@ -0,0 +1,3 @@
1
+ Contributing
2
+ ============
3
+ Contributions to smftools are not currently being reviewed or accepted due to the pre-alpha phase status of the project. More mature versions of the project will have contribution guidelines added.
@@ -0,0 +1,88 @@
1
+ Metadata-Version: 2.3
2
+ Name: smftools
3
+ Version: 0.1.1
4
+ Summary: Single Molecule Footprinting Analysis in Python.
5
+ Project-URL: Source, https://github.com/jkmckenna/smftools
6
+ Author: Joseph McKenna
7
+ Maintainer-email: Joseph McKenna <jkmckenna@berkeley.edu>
8
+ License-Expression: MIT
9
+ License-File: LICENSE
10
+ Keywords: anndata,chromatin-accessibility,machine-learning,nanopore,protein-dna-binding,single-locus,single-molecule-footprinting
11
+ Classifier: Development Status :: 2 - Pre-Alpha
12
+ Classifier: Environment :: Console
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Natural Language :: English
17
+ Classifier: Operating System :: MacOS :: MacOS X
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.9
20
+ Classifier: Programming Language :: Python :: 3.10
21
+ Classifier: Programming Language :: Python :: 3.11
22
+ Classifier: Programming Language :: Python :: 3.12
23
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
24
+ Classifier: Topic :: Scientific/Engineering :: Visualization
25
+ Requires-Python: >=3.9
26
+ Requires-Dist: anndata>=0.10.0
27
+ Requires-Dist: biopython>=1.79
28
+ Requires-Dist: cython>=0.29.28
29
+ Requires-Dist: networkx>=3.2
30
+ Requires-Dist: numpy<2,>=1.22.0
31
+ Requires-Dist: pandas>=1.4.2
32
+ Requires-Dist: pod5>=0.1.21
33
+ Requires-Dist: pomegranate>1.0.0
34
+ Requires-Dist: pysam>=0.19.1
35
+ Requires-Dist: scanpy>=1.9
36
+ Requires-Dist: scikit-learn>=1.0.2
37
+ Requires-Dist: scipy>=1.7.3
38
+ Requires-Dist: seaborn>=0.11
39
+ Requires-Dist: torch>=1.9.0
40
+ Requires-Dist: tqdm
41
+ Provides-Extra: base-tests
42
+ Requires-Dist: pytest; extra == 'base-tests'
43
+ Requires-Dist: pytest-cov; extra == 'base-tests'
44
+ Provides-Extra: docs
45
+ Requires-Dist: ipython>=7.20; extra == 'docs'
46
+ Requires-Dist: matplotlib!=3.6.1; extra == 'docs'
47
+ Requires-Dist: myst-nb>=1; extra == 'docs'
48
+ Requires-Dist: myst-parser>=2; extra == 'docs'
49
+ Requires-Dist: nbsphinx>=0.9; extra == 'docs'
50
+ Requires-Dist: readthedocs-sphinx-search; extra == 'docs'
51
+ Requires-Dist: setuptools; extra == 'docs'
52
+ Requires-Dist: sphinx-autodoc-typehints>=1.25.2; extra == 'docs'
53
+ Requires-Dist: sphinx-book-theme>=1.1.0; extra == 'docs'
54
+ Requires-Dist: sphinx-copybutton; extra == 'docs'
55
+ Requires-Dist: sphinx-design; extra == 'docs'
56
+ Requires-Dist: sphinx>=7; extra == 'docs'
57
+ Requires-Dist: sphinxcontrib-bibtex; extra == 'docs'
58
+ Requires-Dist: sphinxext-opengraph; extra == 'docs'
59
+ Description-Content-Type: text/markdown
60
+
61
+ [![PyPI](https://img.shields.io/pypi/v/smftools.svg)](https://pypi.org/project/smftools)
62
+ [![Docs](https://readthedocs.org/projects/smftools/badge/?version=latest)](https://smftools.readthedocs.io/en/latest/?badge=latest)
63
+
64
+ # smftools
65
+ A Python tool for processing raw sequencing data derived from single molecule footprinting experiments into [anndata](https://anndata.readthedocs.io/en/latest/) objects. Additional functionality for preprocessing, analysis, and visualization. Data structures are compatible with analyses developed within the [scverse](https://github.com/scverse) project, including [scanpy](https://github.com/scverse/scanpy) and [scvi-tools](https://github.com/scverse/scvi-tools).
66
+
67
+ ## Philosophy
68
+ While most genomic data structures handle low-coverage data (<100X) along large references, smftools prioritizes high-coverage data (scalable to at least 1 million X coverage) of a few genomic loci at a time. This enables efficient data storage, rapid data operations, hierarchical metadata handling, seamless integration with various machine-learning packages, and ease of visualization. Furthermore, functionality is modularized, enabling analysis sessions to be saved, reloaded, and easily shared with collaborators. Analyses are centered around the [anndata](https://anndata.readthedocs.io/en/latest/) object, and are heavily inspired by the work conducted within the single-cell genomics community.
69
+
70
+ ## Dependencies
71
+ The following CLI tools need to be installed and configured before using the informatics (smftools.inform) module of smftools:
72
+ 1) [Dorado](https://github.com/nanoporetech/dorado) -> For standard/modified basecalling and alignment. Can be attained by downloading and configuring nanopore MinKnow software.
73
+ 2) [Samtools](https://github.com/samtools/samtools) -> For working with SAM/BAM files
74
+ 3) [Minimap2](https://github.com/lh3/minimap2) -> The aligner used by Dorado
75
+ 4) [Modkit](https://github.com/nanoporetech/modkit) -> Extracting summary statistics and read level methylation calls from modified BAM files
76
+
77
+ ## Modules
78
+ - Informatics: Processes raw SMF data coming from Nanopore POD5 files, BAM files, or FASTQ files and organizes it into an AnnData object.
79
+ - Preprocessing: Filters the AnnData object on read length, total methylation, and a variety of QC metrics.
80
+ - Tools: Appends various analyses to the AnnData object.
81
+ - Plotting: Visualization of analyses stored within the AnnData object.
82
+
83
+ ## Announcements
84
+ ### 09/09/24 - The pre-alpha phase package ([smftools-0.1.1](https://pypi.org/project/smftools/))
85
+ The informatics module has been bumped to alpha-phase status. This module can deal with POD5s and unaligned BAMS from nanopore conversion and direct SMF experiments, as well as FASTQs from Illumina conversion SMF experiments. Primary output from this module is an AnnData object containing all relevant SMF data, which is compatible with all downstream smftools modules. The other modules are still in pre-alpha phase. Preprocessing, Tools, and Plotting modules should be promoted to alpha-phase within the next month or so.
86
+
87
+ ### 08/30/24 - The pre-alpha phase package ([smftools-0.1.0](https://pypi.org/project/smftools/)) is installable through pypi!
88
+ Currently, this package (smftools-0.1.0) is going through rapid improvement (dependency handling accross Linux and Mac OS, testing, documentation, debugging) and is still too early in development for standard use. The underlying functionality was originally developed as a collection of scripts for single molecule footprinting (SMF) experiments in our lab, but is being packaged/developed to facilitate the expansion of SMF to any lab that is interested in performing these styles of experiments/analyses. The alpha-phase package is expected to be available within a couple months, so stay tuned!
@@ -0,0 +1,28 @@
1
+ [![PyPI](https://img.shields.io/pypi/v/smftools.svg)](https://pypi.org/project/smftools)
2
+ [![Docs](https://readthedocs.org/projects/smftools/badge/?version=latest)](https://smftools.readthedocs.io/en/latest/?badge=latest)
3
+
4
+ # smftools
5
+ A Python tool for processing raw sequencing data derived from single molecule footprinting experiments into [anndata](https://anndata.readthedocs.io/en/latest/) objects. Additional functionality for preprocessing, analysis, and visualization. Data structures are compatible with analyses developed within the [scverse](https://github.com/scverse) project, including [scanpy](https://github.com/scverse/scanpy) and [scvi-tools](https://github.com/scverse/scvi-tools).
6
+
7
+ ## Philosophy
8
+ While most genomic data structures handle low-coverage data (<100X) along large references, smftools prioritizes high-coverage data (scalable to at least 1 million X coverage) of a few genomic loci at a time. This enables efficient data storage, rapid data operations, hierarchical metadata handling, seamless integration with various machine-learning packages, and ease of visualization. Furthermore, functionality is modularized, enabling analysis sessions to be saved, reloaded, and easily shared with collaborators. Analyses are centered around the [anndata](https://anndata.readthedocs.io/en/latest/) object, and are heavily inspired by the work conducted within the single-cell genomics community.
9
+
10
+ ## Dependencies
11
+ The following CLI tools need to be installed and configured before using the informatics (smftools.inform) module of smftools:
12
+ 1) [Dorado](https://github.com/nanoporetech/dorado) -> For standard/modified basecalling and alignment. Can be attained by downloading and configuring nanopore MinKnow software.
13
+ 2) [Samtools](https://github.com/samtools/samtools) -> For working with SAM/BAM files
14
+ 3) [Minimap2](https://github.com/lh3/minimap2) -> The aligner used by Dorado
15
+ 4) [Modkit](https://github.com/nanoporetech/modkit) -> Extracting summary statistics and read level methylation calls from modified BAM files
16
+
17
+ ## Modules
18
+ - Informatics: Processes raw SMF data coming from Nanopore POD5 files, BAM files, or FASTQ files and organizes it into an AnnData object.
19
+ - Preprocessing: Filters the AnnData object on read length, total methylation, and a variety of QC metrics.
20
+ - Tools: Appends various analyses to the AnnData object.
21
+ - Plotting: Visualization of analyses stored within the AnnData object.
22
+
23
+ ## Announcements
24
+ ### 09/09/24 - The pre-alpha phase package ([smftools-0.1.1](https://pypi.org/project/smftools/))
25
+ The informatics module has been bumped to alpha-phase status. This module can deal with POD5s and unaligned BAMS from nanopore conversion and direct SMF experiments, as well as FASTQs from Illumina conversion SMF experiments. Primary output from this module is an AnnData object containing all relevant SMF data, which is compatible with all downstream smftools modules. The other modules are still in pre-alpha phase. Preprocessing, Tools, and Plotting modules should be promoted to alpha-phase within the next month or so.
26
+
27
+ ### 08/30/24 - The pre-alpha phase package ([smftools-0.1.0](https://pypi.org/project/smftools/)) is installable through pypi!
28
+ Currently, this package (smftools-0.1.0) is going through rapid improvement (dependency handling accross Linux and Mac OS, testing, documentation, debugging) and is still too early in development for standard use. The underlying functionality was originally developed as a collection of scripts for single molecule footprinting (SMF) experiments in our lab, but is being packaged/developed to facilitate the expansion of SMF to any lab that is interested in performing these styles of experiments/analyses. The alpha-phase package is expected to be available within a couple months, so stay tuned!
@@ -0,0 +1,20 @@
1
+ # Minimal makefile for Sphinx documentation
2
+ #
3
+
4
+ # You can set these variables from the command line, and also
5
+ # from the environment for the first two.
6
+ SPHINXOPTS ?=
7
+ SPHINXBUILD ?= sphinx-build
8
+ SOURCEDIR = source
9
+ BUILDDIR = build
10
+
11
+ # Put it first so that "make" without argument is like "make help".
12
+ help:
13
+ @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14
+
15
+ .PHONY: help Makefile
16
+
17
+ # Catch-all target: route all unknown targets to Sphinx using the new
18
+ # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19
+ %: Makefile
20
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
@@ -0,0 +1,35 @@
1
+ @ECHO OFF
2
+
3
+ pushd %~dp0
4
+
5
+ REM Command file for Sphinx documentation
6
+
7
+ if "%SPHINXBUILD%" == "" (
8
+ set SPHINXBUILD=sphinx-build
9
+ )
10
+ set SOURCEDIR=source
11
+ set BUILDDIR=build
12
+
13
+ %SPHINXBUILD% >NUL 2>NUL
14
+ if errorlevel 9009 (
15
+ echo.
16
+ echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17
+ echo.installed, then set the SPHINXBUILD environment variable to point
18
+ echo.to the full path of the 'sphinx-build' executable. Alternatively you
19
+ echo.may add the Sphinx directory to PATH.
20
+ echo.
21
+ echo.If you don't have Sphinx installed, grab it from
22
+ echo.https://www.sphinx-doc.org/
23
+ exit /b 1
24
+ )
25
+
26
+ if "%1" == "" goto help
27
+
28
+ %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29
+ goto end
30
+
31
+ :help
32
+ %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33
+
34
+ :end
35
+ popd
@@ -0,0 +1 @@
1
+
@@ -0,0 +1 @@
1
+
@@ -0,0 +1,9 @@
1
+ ## Datasets:
2
+
3
+ ```{eval-rst}
4
+ .. module:: smftools.datasets
5
+ ```
6
+
7
+ ```{eval-rst}
8
+ .. currentmodule:: smftools
9
+ ```
@@ -0,0 +1,16 @@
1
+ # API
2
+
3
+ Import smftools as:
4
+
5
+ ```
6
+ import smftools as smf
7
+ ```
8
+
9
+ ```{toctree}
10
+ :maxdepth: 2
11
+
12
+ informatics
13
+ preprocessing
14
+ tools
15
+ datasets
16
+ ```
@@ -0,0 +1,11 @@
1
+ ## Informatics: `inform`
2
+
3
+ ```{eval-rst}
4
+ .. module:: smftools.inform
5
+ ```
6
+
7
+ ```{eval-rst}
8
+ .. currentmodule:: smftools
9
+ ```
10
+
11
+ Processes raw sequencing data to load an adata object.
@@ -0,0 +1,9 @@
1
+ ## Preprocessing: `pp`
2
+
3
+ ```{eval-rst}
4
+ .. module:: smftools.pp
5
+ ```
6
+
7
+ ```{eval-rst}
8
+ .. currentmodule:: smftools
9
+ ```
@@ -0,0 +1,9 @@
1
+ ## Tools: `tl`
2
+
3
+ ```{eval-rst}
4
+ .. module:: smftools.tl
5
+ ```
6
+
7
+ ```{eval-rst}
8
+ .. currentmodule:: smftools
9
+ ```
@@ -0,0 +1,117 @@
1
+ # Configuration file for the Sphinx documentation builder.
2
+ #
3
+ # For the full list of built-in configuration values, see the documentation:
4
+ # https://www.sphinx-doc.org/en/master/usage/configuration.html
5
+ import sys
6
+ import os
7
+ from pathlib import Path
8
+ HERE = Path(__file__).parent
9
+ PARENT_PARENT_HERE = HERE.parents[1]
10
+ SRC_PATH = PARENT_PARENT_HERE / 'src'
11
+ sys.path.insert(0, str(SRC_PATH))
12
+ for x in os.walk(str(SRC_PATH)):
13
+ sys.path.insert(0, x[0])
14
+ print(sys.path)
15
+ try:
16
+ import smftools
17
+ print("smftools imported successfully.")
18
+ except ImportError:
19
+ print("smftools is not imported.")
20
+ # -- Project information -----------------------------------------------------
21
+ # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
22
+
23
+ project = 'smftools'
24
+ copyright = '2024, Joseph McKenna'
25
+ author = 'Joseph McKenna'
26
+ release = '0.1.0'
27
+ repository_url = 'https://github.com/jkmckenna/smftools'
28
+
29
+ # -- General configuration ---------------------------------------------------
30
+ # Bibliography settings
31
+ bibtex_bibfiles = ["references.bib"]
32
+ bibtex_reference_style = "author_year"
33
+ nitpicky = True
34
+ needs_sphinx = "4.0"
35
+
36
+ master_doc = "index"
37
+ templates_path = ['_templates']
38
+ exclude_patterns = [
39
+ "_build",
40
+ "Thumbs.db",
41
+ ".DS_Store",
42
+ # exclude version md files
43
+ "release-notes/[!i]*.md"
44
+ ]
45
+ extensions = [
46
+ "myst_nb",
47
+ "sphinx_copybutton",
48
+ "sphinx.ext.autodoc",
49
+ "sphinx.ext.intersphinx",
50
+ "sphinx.ext.doctest",
51
+ "sphinx.ext.coverage",
52
+ "sphinx.ext.mathjax",
53
+ "sphinx.ext.napoleon",
54
+ "sphinx.ext.autosummary",
55
+ "sphinx.ext.extlinks",
56
+ "sphinxcontrib.bibtex",
57
+ "matplotlib.sphinxext.plot_directive",
58
+ "sphinx_autodoc_typehints",
59
+ "sphinx_design",
60
+ "sphinx_search.extension",
61
+ "sphinxext.opengraph",
62
+ ]
63
+
64
+ # Generate the API documentation when building
65
+ autosummary_generate = True
66
+ autodoc_member_order = "bysource"
67
+ napoleon_google_docstring = True
68
+ napoleon_numpy_docstring = False
69
+ napoleon_include_init_with_doc = False
70
+ napoleon_use_rtype = True # having a separate entry generally helps readability
71
+ napoleon_use_param = True
72
+ napoleon_custom_sections = [("Params", "Parameters")]
73
+ todo_include_todos = False
74
+ api_dir = HERE / "api"
75
+ myst_enable_extensions = [
76
+ "amsmath",
77
+ "colon_fence",
78
+ "deflist",
79
+ "dollarmath",
80
+ "html_image",
81
+ "html_admonition",
82
+ ]
83
+ myst_url_schemes = ("http", "https", "mailto", "ftp")
84
+ myst_heading_anchors = 3
85
+ nb_output_stderr = "remove"
86
+ nb_execution_mode = "off"
87
+ nb_merge_streams = True
88
+
89
+ suppress_warnings = [
90
+ "myst.header"
91
+ ]
92
+
93
+ typehints_defaults = "braces"
94
+
95
+ # html_context = {
96
+ # "display_github": True,
97
+ # "github_user": "jkmckenna",
98
+ # "github_repo": project,
99
+ # "github_version": "main",
100
+ # "conf_py_path": "/docs/source/",
101
+ # }
102
+
103
+ # -- Options for HTML output -------------------------------------------------
104
+ # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
105
+
106
+ html_theme = "sphinx_book_theme"
107
+ html_title = project
108
+
109
+ html_theme_options = {
110
+ "repository_url": repository_url,
111
+ "use_repository_button": True,
112
+ "show_toc_level": 1,
113
+ "path_to_docs": "docs/",
114
+ "repository_branch": release,
115
+ }
116
+
117
+ html_static_path = ['_static']
@@ -0,0 +1,9 @@
1
+ # Contributors
2
+
3
+ ## Current
4
+
5
+ - Joseph Mckenna, lead developer (2024)
6
+
7
+ ## Acknowledgments
8
+
9
+ - [Tjian/Darzacq Lab](https://www.tjian-darzacq.mcb.berkeley.edu/): Funding and a supportive lab environment!
@@ -0,0 +1,3 @@
1
+ (contribution-guide)=
2
+
3
+ # Contributing
@@ -0,0 +1,53 @@
1
+ smftools documentation
2
+ ======================
3
+
4
+ ```{include} ../../README.md
5
+ :end-before: '## Dependencies'
6
+ ```
7
+
8
+ ::::{grid} 1 2 2 2
9
+ :gutter: 2
10
+
11
+ :::{grid-item-card} Installation {octicon}`plug;1em;`
12
+ :link: installation
13
+ :link-type: doc
14
+
15
+ smftools installation instructions
16
+ :::
17
+
18
+ :::{grid-item-card} Tutorials {octicon}`play;1em;`
19
+ :link: tutorials/index
20
+ :link-type: doc
21
+
22
+ Jupyter notebook tutorial of smftools usage.
23
+ :::
24
+
25
+ :::{grid-item-card} API reference {octicon}`book;1em;`
26
+ :link: api/index
27
+ :link-type: doc
28
+
29
+ The API reference contains a detailed description of
30
+ the smftools API.
31
+ :::
32
+
33
+ :::{grid-item-card} GitHub {octicon}`mark-github;1em;`
34
+ :link: https://github.com/jkmckenna/smftools
35
+
36
+ smftools GitHub link
37
+ :::
38
+ ::::
39
+
40
+ ```{toctree}
41
+ :hidden: true
42
+ :maxdepth: 1
43
+
44
+ installation
45
+ tutorials/index
46
+ api/index
47
+ release-notes/index
48
+ dev/index
49
+ contributors
50
+ references
51
+ ```
52
+
53
+ [github]: https://github.com/jkmckenna/smftools
@@ -0,0 +1,20 @@
1
+ # Installation
2
+
3
+ ## PyPi version
4
+
5
+ Pull smftools from [PyPI](https://pypi.org/project/smftools):
6
+
7
+ ```shell
8
+ pip install smftools
9
+ ```
10
+
11
+ ## Development Version
12
+
13
+ ```shell
14
+ git clone https://github.com/jkmckenna/smftools.git
15
+ cd smftools
16
+ ```
17
+
18
+ ```shell
19
+ pip install .
20
+ ```