smftools 0.1.7__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. smftools/__init__.py +7 -6
  2. smftools/_version.py +1 -1
  3. smftools/cli/cli_flows.py +94 -0
  4. smftools/cli/hmm_adata.py +338 -0
  5. smftools/cli/load_adata.py +577 -0
  6. smftools/cli/preprocess_adata.py +363 -0
  7. smftools/cli/spatial_adata.py +564 -0
  8. smftools/cli_entry.py +435 -0
  9. smftools/config/__init__.py +1 -0
  10. smftools/config/conversion.yaml +38 -0
  11. smftools/config/deaminase.yaml +61 -0
  12. smftools/config/default.yaml +264 -0
  13. smftools/config/direct.yaml +41 -0
  14. smftools/config/discover_input_files.py +115 -0
  15. smftools/config/experiment_config.py +1288 -0
  16. smftools/hmm/HMM.py +1576 -0
  17. smftools/hmm/__init__.py +20 -0
  18. smftools/{tools → hmm}/apply_hmm_batched.py +8 -7
  19. smftools/hmm/call_hmm_peaks.py +106 -0
  20. smftools/{tools → hmm}/display_hmm.py +3 -3
  21. smftools/{tools → hmm}/nucleosome_hmm_refinement.py +2 -2
  22. smftools/{tools → hmm}/train_hmm.py +1 -1
  23. smftools/informatics/__init__.py +13 -9
  24. smftools/informatics/archived/deaminase_smf.py +132 -0
  25. smftools/informatics/archived/fast5_to_pod5.py +43 -0
  26. smftools/informatics/archived/helpers/archived/__init__.py +71 -0
  27. smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py +126 -0
  28. smftools/informatics/archived/helpers/archived/aligned_BAM_to_bed.py +87 -0
  29. smftools/informatics/archived/helpers/archived/bam_qc.py +213 -0
  30. smftools/informatics/archived/helpers/archived/bed_to_bigwig.py +90 -0
  31. smftools/informatics/archived/helpers/archived/concatenate_fastqs_to_bam.py +259 -0
  32. smftools/informatics/{helpers → archived/helpers/archived}/count_aligned_reads.py +2 -2
  33. smftools/informatics/{helpers → archived/helpers/archived}/demux_and_index_BAM.py +8 -10
  34. smftools/informatics/{helpers → archived/helpers/archived}/extract_base_identities.py +30 -4
  35. smftools/informatics/{helpers → archived/helpers/archived}/extract_mods.py +15 -13
  36. smftools/informatics/{helpers → archived/helpers/archived}/extract_read_features_from_bam.py +4 -2
  37. smftools/informatics/{helpers → archived/helpers/archived}/find_conversion_sites.py +5 -4
  38. smftools/informatics/{helpers → archived/helpers/archived}/generate_converted_FASTA.py +2 -0
  39. smftools/informatics/{helpers → archived/helpers/archived}/get_chromosome_lengths.py +9 -8
  40. smftools/informatics/archived/helpers/archived/index_fasta.py +24 -0
  41. smftools/informatics/{helpers → archived/helpers/archived}/make_modbed.py +1 -2
  42. smftools/informatics/{helpers → archived/helpers/archived}/modQC.py +2 -2
  43. smftools/informatics/archived/helpers/archived/plot_bed_histograms.py +250 -0
  44. smftools/informatics/{helpers → archived/helpers/archived}/separate_bam_by_bc.py +8 -7
  45. smftools/informatics/{helpers → archived/helpers/archived}/split_and_index_BAM.py +8 -12
  46. smftools/informatics/archived/subsample_fasta_from_bed.py +49 -0
  47. smftools/informatics/bam_functions.py +812 -0
  48. smftools/informatics/basecalling.py +67 -0
  49. smftools/informatics/bed_functions.py +366 -0
  50. smftools/informatics/binarize_converted_base_identities.py +172 -0
  51. smftools/informatics/{helpers/converted_BAM_to_adata_II.py → converted_BAM_to_adata.py} +198 -50
  52. smftools/informatics/fasta_functions.py +255 -0
  53. smftools/informatics/h5ad_functions.py +197 -0
  54. smftools/informatics/{helpers/modkit_extract_to_adata.py → modkit_extract_to_adata.py} +147 -61
  55. smftools/informatics/modkit_functions.py +129 -0
  56. smftools/informatics/ohe.py +160 -0
  57. smftools/informatics/pod5_functions.py +224 -0
  58. smftools/informatics/{helpers/run_multiqc.py → run_multiqc.py} +5 -2
  59. smftools/machine_learning/__init__.py +12 -0
  60. smftools/machine_learning/data/__init__.py +2 -0
  61. smftools/machine_learning/data/anndata_data_module.py +234 -0
  62. smftools/machine_learning/evaluation/__init__.py +2 -0
  63. smftools/machine_learning/evaluation/eval_utils.py +31 -0
  64. smftools/machine_learning/evaluation/evaluators.py +223 -0
  65. smftools/machine_learning/inference/__init__.py +3 -0
  66. smftools/machine_learning/inference/inference_utils.py +27 -0
  67. smftools/machine_learning/inference/lightning_inference.py +68 -0
  68. smftools/machine_learning/inference/sklearn_inference.py +55 -0
  69. smftools/machine_learning/inference/sliding_window_inference.py +114 -0
  70. smftools/machine_learning/models/base.py +295 -0
  71. smftools/machine_learning/models/cnn.py +138 -0
  72. smftools/machine_learning/models/lightning_base.py +345 -0
  73. smftools/machine_learning/models/mlp.py +26 -0
  74. smftools/{tools → machine_learning}/models/positional.py +3 -2
  75. smftools/{tools → machine_learning}/models/rnn.py +2 -1
  76. smftools/machine_learning/models/sklearn_models.py +273 -0
  77. smftools/machine_learning/models/transformer.py +303 -0
  78. smftools/machine_learning/training/__init__.py +2 -0
  79. smftools/machine_learning/training/train_lightning_model.py +135 -0
  80. smftools/machine_learning/training/train_sklearn_model.py +114 -0
  81. smftools/plotting/__init__.py +4 -1
  82. smftools/plotting/autocorrelation_plotting.py +609 -0
  83. smftools/plotting/general_plotting.py +1292 -140
  84. smftools/plotting/hmm_plotting.py +260 -0
  85. smftools/plotting/qc_plotting.py +270 -0
  86. smftools/preprocessing/__init__.py +15 -8
  87. smftools/preprocessing/add_read_length_and_mapping_qc.py +129 -0
  88. smftools/preprocessing/append_base_context.py +122 -0
  89. smftools/preprocessing/append_binary_layer_by_base_context.py +143 -0
  90. smftools/preprocessing/binarize.py +17 -0
  91. smftools/preprocessing/binarize_on_Youden.py +2 -2
  92. smftools/preprocessing/calculate_complexity_II.py +248 -0
  93. smftools/preprocessing/calculate_coverage.py +10 -1
  94. smftools/preprocessing/calculate_position_Youden.py +1 -1
  95. smftools/preprocessing/calculate_read_modification_stats.py +101 -0
  96. smftools/preprocessing/clean_NaN.py +17 -1
  97. smftools/preprocessing/filter_reads_on_length_quality_mapping.py +158 -0
  98. smftools/preprocessing/filter_reads_on_modification_thresholds.py +352 -0
  99. smftools/preprocessing/flag_duplicate_reads.py +1326 -124
  100. smftools/preprocessing/invert_adata.py +12 -5
  101. smftools/preprocessing/load_sample_sheet.py +19 -4
  102. smftools/readwrite.py +1021 -89
  103. smftools/tools/__init__.py +3 -32
  104. smftools/tools/calculate_umap.py +5 -5
  105. smftools/tools/general_tools.py +3 -3
  106. smftools/tools/position_stats.py +468 -106
  107. smftools/tools/read_stats.py +115 -1
  108. smftools/tools/spatial_autocorrelation.py +562 -0
  109. {smftools-0.1.7.dist-info → smftools-0.2.3.dist-info}/METADATA +14 -9
  110. smftools-0.2.3.dist-info/RECORD +173 -0
  111. smftools-0.2.3.dist-info/entry_points.txt +2 -0
  112. smftools/informatics/fast5_to_pod5.py +0 -21
  113. smftools/informatics/helpers/LoadExperimentConfig.py +0 -75
  114. smftools/informatics/helpers/__init__.py +0 -74
  115. smftools/informatics/helpers/align_and_sort_BAM.py +0 -59
  116. smftools/informatics/helpers/aligned_BAM_to_bed.py +0 -74
  117. smftools/informatics/helpers/bam_qc.py +0 -66
  118. smftools/informatics/helpers/bed_to_bigwig.py +0 -39
  119. smftools/informatics/helpers/binarize_converted_base_identities.py +0 -79
  120. smftools/informatics/helpers/concatenate_fastqs_to_bam.py +0 -55
  121. smftools/informatics/helpers/index_fasta.py +0 -12
  122. smftools/informatics/helpers/make_dirs.py +0 -21
  123. smftools/informatics/helpers/plot_read_length_and_coverage_histograms.py +0 -53
  124. smftools/informatics/load_adata.py +0 -182
  125. smftools/informatics/readwrite.py +0 -106
  126. smftools/informatics/subsample_fasta_from_bed.py +0 -47
  127. smftools/preprocessing/append_C_context.py +0 -82
  128. smftools/preprocessing/calculate_converted_read_methylation_stats.py +0 -94
  129. smftools/preprocessing/filter_converted_reads_on_methylation.py +0 -44
  130. smftools/preprocessing/filter_reads_on_length.py +0 -51
  131. smftools/tools/call_hmm_peaks.py +0 -105
  132. smftools/tools/data/__init__.py +0 -2
  133. smftools/tools/data/anndata_data_module.py +0 -90
  134. smftools/tools/inference/__init__.py +0 -1
  135. smftools/tools/inference/lightning_inference.py +0 -41
  136. smftools/tools/models/base.py +0 -14
  137. smftools/tools/models/cnn.py +0 -34
  138. smftools/tools/models/lightning_base.py +0 -41
  139. smftools/tools/models/mlp.py +0 -17
  140. smftools/tools/models/sklearn_models.py +0 -40
  141. smftools/tools/models/transformer.py +0 -133
  142. smftools/tools/training/__init__.py +0 -1
  143. smftools/tools/training/train_lightning_model.py +0 -47
  144. smftools-0.1.7.dist-info/RECORD +0 -136
  145. /smftools/{tools/evaluation → cli}/__init__.py +0 -0
  146. /smftools/{tools → hmm}/calculate_distances.py +0 -0
  147. /smftools/{tools → hmm}/hmm_readwrite.py +0 -0
  148. /smftools/informatics/{basecall_pod5s.py → archived/basecall_pod5s.py} +0 -0
  149. /smftools/informatics/{conversion_smf.py → archived/conversion_smf.py} +0 -0
  150. /smftools/informatics/{direct_smf.py → archived/direct_smf.py} +0 -0
  151. /smftools/informatics/{helpers → archived/helpers/archived}/canoncall.py +0 -0
  152. /smftools/informatics/{helpers → archived/helpers/archived}/converted_BAM_to_adata.py +0 -0
  153. /smftools/informatics/{helpers → archived/helpers/archived}/extract_read_lengths_from_bed.py +0 -0
  154. /smftools/informatics/{helpers → archived/helpers/archived}/extract_readnames_from_BAM.py +0 -0
  155. /smftools/informatics/{helpers → archived/helpers/archived}/get_native_references.py +0 -0
  156. /smftools/informatics/{helpers → archived/helpers}/archived/informatics.py +0 -0
  157. /smftools/informatics/{helpers → archived/helpers}/archived/load_adata.py +0 -0
  158. /smftools/informatics/{helpers → archived/helpers/archived}/modcall.py +0 -0
  159. /smftools/informatics/{helpers → archived/helpers/archived}/ohe_batching.py +0 -0
  160. /smftools/informatics/{helpers → archived/helpers/archived}/ohe_layers_decode.py +0 -0
  161. /smftools/informatics/{helpers → archived/helpers/archived}/one_hot_decode.py +0 -0
  162. /smftools/informatics/{helpers → archived/helpers/archived}/one_hot_encode.py +0 -0
  163. /smftools/informatics/{subsample_pod5.py → archived/subsample_pod5.py} +0 -0
  164. /smftools/informatics/{helpers/complement_base_list.py → complement_base_list.py} +0 -0
  165. /smftools/{tools → machine_learning}/data/preprocessing.py +0 -0
  166. /smftools/{tools → machine_learning}/models/__init__.py +0 -0
  167. /smftools/{tools → machine_learning}/models/wrappers.py +0 -0
  168. /smftools/{tools → machine_learning}/utils/__init__.py +0 -0
  169. /smftools/{tools → machine_learning}/utils/device.py +0 -0
  170. /smftools/{tools → machine_learning}/utils/grl.py +0 -0
  171. /smftools/tools/{apply_hmm.py → archived/apply_hmm.py} +0 -0
  172. /smftools/tools/{classifiers.py → archived/classifiers.py} +0 -0
  173. {smftools-0.1.7.dist-info → smftools-0.2.3.dist-info}/WHEEL +0 -0
  174. {smftools-0.1.7.dist-info → smftools-0.2.3.dist-info}/licenses/LICENSE +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: smftools
3
- Version: 0.1.7
3
+ Version: 0.2.3
4
4
  Summary: Single Molecule Footprinting Analysis in Python.
5
5
  Project-URL: Source, https://github.com/jkmckenna/smftools
6
6
  Project-URL: Documentation, https://smftools.readthedocs.io/
@@ -43,9 +43,11 @@ Classifier: Programming Language :: Python :: 3.11
43
43
  Classifier: Programming Language :: Python :: 3.12
44
44
  Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
45
45
  Classifier: Topic :: Scientific/Engineering :: Visualization
46
- Requires-Python: >=3.9
46
+ Requires-Python: <3.13,>=3.9
47
47
  Requires-Dist: anndata>=0.10.0
48
48
  Requires-Dist: biopython>=1.79
49
+ Requires-Dist: captum
50
+ Requires-Dist: click
49
51
  Requires-Dist: fastcluster
50
52
  Requires-Dist: hydra-core
51
53
  Requires-Dist: igraph
@@ -57,15 +59,18 @@ Requires-Dist: numpy<2,>=1.22.0
57
59
  Requires-Dist: omegaconf
58
60
  Requires-Dist: pandas>=1.4.2
59
61
  Requires-Dist: pod5>=0.1.21
60
- Requires-Dist: pomegranate>=1.0.0
62
+ Requires-Dist: pybedtools>=0.12.0
63
+ Requires-Dist: pybigwig>=0.3.24
61
64
  Requires-Dist: pyfaidx>=0.8.0
62
65
  Requires-Dist: pysam>=0.19.1
63
66
  Requires-Dist: scanpy>=1.9
64
67
  Requires-Dist: scikit-learn>=1.0.2
65
68
  Requires-Dist: scipy>=1.7.3
66
69
  Requires-Dist: seaborn>=0.11
70
+ Requires-Dist: shap
67
71
  Requires-Dist: torch>=1.9.0
68
72
  Requires-Dist: tqdm
73
+ Requires-Dist: upsetplot
69
74
  Requires-Dist: wandb
70
75
  Provides-Extra: docs
71
76
  Requires-Dist: ipython>=7.20; extra == 'docs'
@@ -98,12 +103,9 @@ While most genomic data structures handle low-coverage data (<100X) along large
98
103
 
99
104
  ## Dependencies
100
105
  The following CLI tools need to be installed and configured before using the informatics (smftools.inform) module of smftools:
101
- 1) [Dorado](https://github.com/nanoporetech/dorado) -> For standard/modified basecalling and alignment. Can be attained by downloading and configuring nanopore MinKnow software.
102
- 2) [Samtools](https://github.com/samtools/samtools) -> For working with SAM/BAM files
103
- 3) [Minimap2](https://github.com/lh3/minimap2) -> The aligner used by Dorado
104
- 4) [Modkit](https://github.com/nanoporetech/modkit) -> Extracting summary statistics and read level methylation calls from modified BAM files
105
- 5) [Bedtools](https://github.com/arq5x/bedtools2) -> For generating Bedgraphs from BAM alignment files.
106
- 6) [BedGraphToBigWig](https://genome.ucsc.edu/goldenPath/help/bigWig.html) -> For converting BedGraphs to BigWig files for IGV sessions.
106
+ 1) [Dorado](https://github.com/nanoporetech/dorado) -> Basecalling, alignment, demultiplexing.
107
+ 2) [Minimap2](https://github.com/lh3/minimap2) -> Alignment if not using dorado.
108
+ 3) [Modkit](https://github.com/nanoporetech/modkit) -> Extracting read level methylation metrics from modified BAM files.
107
109
 
108
110
  ## Modules
109
111
  ### Informatics: Processes raw Nanopore/Illumina data from SMF experiments into an AnnData object.
@@ -118,6 +120,9 @@ The following CLI tools need to be installed and configured before using the inf
118
120
 
119
121
  ## Announcements
120
122
 
123
+ ### 11/05/25 - Version 0.2.1 is available through PyPI
124
+ Version 0.2.1 makes the core workflow (smftools load) a command line tool that takes in an experiment_config.csv file for input/output and parameter management.
125
+
121
126
  ### 05/29/25 - Version 0.1.6 is available through PyPI.
122
127
  Informatics, preprocessing, tools, plotting modules have core functionality that is approaching stability on MacOS(Intel/Silicon) and Linux(Ubuntu). I will work on improving documentation/tutorials shortly. The base PyTorch/Scikit-Learn ML-infrastructure is going through some organizational changes to work with PyTorch Lightning, Hydra, and WanDB to facilitate organizational scaling, multi-device usage, and logging.
123
128
 
@@ -0,0 +1,173 @@
1
+ smftools/__init__.py,sha256=aZlrZBVexf_nEnzQeZu7NU_Kp6OnxcYpLo1KPImi7sI,599
2
+ smftools/_settings.py,sha256=Ed8lzKUA5ncq5ZRfSp0t6_rphEEjMxts6guttwTZP5Y,409
3
+ smftools/_version.py,sha256=X0PliCRFAeVnSTceUeHX1eM0j1HFhGFDWCRxLdde2Bs,21
4
+ smftools/cli_entry.py,sha256=_QdtEKcVK5o-e5s9ETB9sOIdftPVlrDxvvjBKcP6YNk,14680
5
+ smftools/readwrite.py,sha256=ExKZHNZ0QB-PtSck08drXfHTqbPeSUTHiYhv951SH1s,45994
6
+ smftools/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ smftools/cli/cli_flows.py,sha256=xRiFUThoAL3LX1xdXaHVg4LjyJI4uNpGsc9aQ_wVCto,4941
8
+ smftools/cli/hmm_adata.py,sha256=PApUJW0lO4kcLjsiqqQopXgL3Dg-AascIqJrgvSY1Rg,15916
9
+ smftools/cli/load_adata.py,sha256=Qt1ej-osyJ47fpBkGaSDgR1F8E4aBNAdcXeBAGM-Lqg,29100
10
+ smftools/cli/preprocess_adata.py,sha256=EKGbSTli7qvL44OQUmMalYJjsH9vn3w4Rx7U7BL0ybs,20991
11
+ smftools/cli/spatial_adata.py,sha256=AX6iyBfbXud9actteTvDuaQUU_SE3SyBIeknR317g34,30212
12
+ smftools/config/__init__.py,sha256=ObUnnR7aRSoD_uvpmsxA_BUFt4NOOfWNopDVCqjp7tg,69
13
+ smftools/config/conversion.yaml,sha256=HrFz2f9QRe1RuhmgU6ZtMHaM4ZzY61_aLcugsmpV40Q,969
14
+ smftools/config/deaminase.yaml,sha256=mw2aY222y2xg08Rs5CWvjlrXo3vaEim7JwBThA80y4o,1349
15
+ smftools/config/default.yaml,sha256=3IrX0OrUyjhVc3CqTjM8uiprKWrrBdVtil4YhtVzKdQ,10233
16
+ smftools/config/direct.yaml,sha256=SBhdtG7PKm-z5xxQmA7JV3NQsGnUJ4p58fGH8BnoMrM,2137
17
+ smftools/config/discover_input_files.py,sha256=G9vyAmK_n_8Ur5dOnumevVLG3ydHchMy_JQrJdiuuz0,3892
18
+ smftools/config/experiment_config.py,sha256=d_6f_Uv3CY-1orHbxpHtAZDsY2gwxw079_pNgR9wDUg,58837
19
+ smftools/datasets/F1_hybrid_NKG2A_enhander_promoter_GpC_conversion_SMF.h5ad.gz,sha256=q6wJtgFRDln0o20XNCx1qad3lwcdCoylqPN7wskTfI8,2926497
20
+ smftools/datasets/F1_sample_sheet.csv,sha256=9PodIIOXK2eamYPbC6DGnXdzgi9bRDovf296j1aM0ak,259
21
+ smftools/datasets/__init__.py,sha256=xkSTlPuakVYVCuRurif9BceNBDt6bsngJvvjI8757QI,142
22
+ smftools/datasets/dCas9_m6A_invitro_kinetics.h5ad.gz,sha256=niOcVHaYY7h3XyvwSkN-V_NMBaRt2vTP5TrJO0CwMCs,8385050
23
+ smftools/datasets/datasets.py,sha256=0y597Ntp707bOgDwN6O-JEt9yxgplj66p0aj6Zs_IB4,779
24
+ smftools/hmm/HMM.py,sha256=K8rt-EHn3ylIHpQ3dHf_OZCXxCBVSS2UWTgSGOatwHw,71046
25
+ smftools/hmm/__init__.py,sha256=BkX145eGVy-kFOtyqOcu-Hzv9ZJLDQ3cfDe51eKBTwY,585
26
+ smftools/hmm/apply_hmm_batched.py,sha256=BBeJ8DiIuuMWzLwtDdk2DO2vvrfLCrVe4JtRYPFItIU,10648
27
+ smftools/hmm/calculate_distances.py,sha256=KDWimQ6u-coyxCKrbTm42Fh_Alf_gURBZ0vfFaem848,644
28
+ smftools/hmm/call_hmm_peaks.py,sha256=T-3Ld8H4t3Mgg2whBTYP9s2QL7rY-9RIzVCgB6avKhE,4625
29
+ smftools/hmm/display_hmm.py,sha256=3WuQCPvM3wPfzAdgbhfiBTd0g5mQdx9HTUdqAxs2aj4,825
30
+ smftools/hmm/hmm_readwrite.py,sha256=DjJ3hunpBQ7N0GVvxL7-0QUas_SkA88LVgL72mVK2cI,359
31
+ smftools/hmm/nucleosome_hmm_refinement.py,sha256=nQWimvse6dclcXhbU707rGbRVMKHM0mU_ZhH9g2yCMA,4641
32
+ smftools/hmm/train_hmm.py,sha256=srzRcB9LEmNuHyBM0R5Z0VEnxecifQt-MoaJhADxGT8,2477
33
+ smftools/informatics/__init__.py,sha256=vLvSrCtCVYRUCCNLW7fL3ltPr3h_w8FhT--V6el3ZkQ,1191
34
+ smftools/informatics/bam_functions.py,sha256=otgl3TRPLn5Fnsx1jXX75du90k3XB3RHGzlfamvETsU,32670
35
+ smftools/informatics/basecalling.py,sha256=jc39jneaa8Gt1azutHgBGWHqCoPeTVSGBu3kyQwP7xM,3460
36
+ smftools/informatics/bed_functions.py,sha256=uETVxT5mRWDNn7t0OqhDi8kDiq7uDakeHB1L2JsP4PA,13377
37
+ smftools/informatics/binarize_converted_base_identities.py,sha256=yOepGaNBGfZJEsMiLRwKauvsmaHn_JRrxaGp8LmKAXs,7778
38
+ smftools/informatics/complement_base_list.py,sha256=k6EkLtxFoajaIufxw1p0pShJ2nPHyGLTbzZmIFFjB4o,532
39
+ smftools/informatics/converted_BAM_to_adata.py,sha256=Y2kQNWly0WjjGN9El9zL1nLfjVxmPLWONvX5VNgZUh0,22554
40
+ smftools/informatics/fasta_functions.py,sha256=5IfTkX_GIj5gRJB9PjL_WjyEktpBHwGsmS_nnO1ETjI,9790
41
+ smftools/informatics/h5ad_functions.py,sha256=iAOxJjhaDslTUC78kjUHlCELigDl73sWo0fvXcKuFoI,7824
42
+ smftools/informatics/modkit_extract_to_adata.py,sha256=TrgrL_IgfqzNJ9qZ_2EvF_B38_Syw8mP38Sl7v0Riwo,55278
43
+ smftools/informatics/modkit_functions.py,sha256=lywjeqAJ7Cdd7k-0P3YaL_9cAZvEDTDLh91rIRcSMWE,5604
44
+ smftools/informatics/ohe.py,sha256=MEmh3ps-ZSSyXuIrr5LMzQvCsDJRCYiy7JS-WD4TlYs,5805
45
+ smftools/informatics/pod5_functions.py,sha256=vxwhD_d_iWpJydIpbf0uce7VGHm8sBnCwb7tLNpYBc8,9859
46
+ smftools/informatics/run_multiqc.py,sha256=n6LvQuGQpLfsutVGmgvHfV0SV5PqTQ8wa_SeKOjRssM,1052
47
+ smftools/informatics/archived/bam_conversion.py,sha256=I8EzXjQixMmqx2oWnoNSH5NURBhfT-krbWHkoi_M964,3330
48
+ smftools/informatics/archived/bam_direct.py,sha256=jbEFtUIiUR8Wlp3po_sWkr19AUNS9WZjglojb9j28vo,3606
49
+ smftools/informatics/archived/basecall_pod5s.py,sha256=Ynmxscsxj6qp-zVY0RWodq513oDuHDaHnpqoepB3RUU,3930
50
+ smftools/informatics/archived/basecalls_to_adata.py,sha256=-Nag6lr_NAtU4t8jo0GSMdgIAIfmDge-5VEUPQbEatE,3692
51
+ smftools/informatics/archived/conversion_smf.py,sha256=QhlISVi3Z-XqFKyDG_CenLojovAt5-ZhuVe9hus36lg,7177
52
+ smftools/informatics/archived/deaminase_smf.py,sha256=mNeg1mIYYVLIiW8powEpz0CqrGRDsrmY5-aoIgwMGHs,7221
53
+ smftools/informatics/archived/direct_smf.py,sha256=ylPGFBvRLdxLHeDJjAwq98j8Q8_lfGK3k5JJnQxrwJw,7485
54
+ smftools/informatics/archived/fast5_to_pod5.py,sha256=TRG_FYYGCGWUPzZCt0ZqzB8gQv_HKvkssp9nTctWzXU,1398
55
+ smftools/informatics/archived/print_bam_query_seq.py,sha256=8Z2ZJEOOlfWYUXiZGjteLWU4yTgvV8KQzEIBHUmamGM,838
56
+ smftools/informatics/archived/subsample_fasta_from_bed.py,sha256=7YTKhXg_mtP4KWpnD-TB4nuFEL4crOa9_d84IJKllyQ,1633
57
+ smftools/informatics/archived/subsample_pod5.py,sha256=zDw9tRcrFRmPI62xkcy9dh8IfsJcuYm7R-FVeBC_g3s,4701
58
+ smftools/informatics/archived/helpers/archived/__init__.py,sha256=DiiBerFJAxZeG5y0ScpJSaVBJ8b4XWdfEJCh8Q7k8jU,2783
59
+ smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py,sha256=yaRfhQDh3HpsSTme6QnSqBgElCS0kv2G6TunhvR1weY,5493
60
+ smftools/informatics/archived/helpers/archived/aligned_BAM_to_bed.py,sha256=N3NAOaoSt_M4V48vtTP_m_iF1tRuNIPS_uNJ3Y0IA4E,3391
61
+ smftools/informatics/archived/helpers/archived/bam_qc.py,sha256=PWl3dViCHGOcjB4UKkxBFz34Gc0PXHVTHjpYVNckVH0,7975
62
+ smftools/informatics/archived/helpers/archived/bed_to_bigwig.py,sha256=Bg9wFsavUU9Ha57n_99vYlYpVcbDUz3tLtYJ7ZFVR9k,2986
63
+ smftools/informatics/archived/helpers/archived/canoncall.py,sha256=5WS6lwukc_xYTdPQy0OSj-WLbx0Rg70Cun1lCucY7w8,1741
64
+ smftools/informatics/archived/helpers/archived/concatenate_fastqs_to_bam.py,sha256=6GTHXG1dfaC8rBin5NthG3xgyGqOsT6wIGxJVCmCq58,9774
65
+ smftools/informatics/archived/helpers/archived/converted_BAM_to_adata.py,sha256=sRmOtn0kNosLYfogqslDHg1Azk51l6nfNOLgQOnQjlA,14591
66
+ smftools/informatics/archived/helpers/archived/count_aligned_reads.py,sha256=ZF_kkzAf1RvM4PwDYhxD36UiuVuMM_MBvZgiXom1NQ0,2176
67
+ smftools/informatics/archived/helpers/archived/demux_and_index_BAM.py,sha256=KmU7nqGQ-MfDrp8h3txbToGn4h95Rkvg0WEiuext-vY,2000
68
+ smftools/informatics/archived/helpers/archived/extract_base_identities.py,sha256=CaFqNBjkDujYlyiUnOeRock1OQWs3CeiD3yTL96sjIs,3043
69
+ smftools/informatics/archived/helpers/archived/extract_mods.py,sha256=Mrs7mrLFgCTiRGfPFSyvJm6brq--LGzZrNDiFB-jynI,3895
70
+ smftools/informatics/archived/helpers/archived/extract_read_features_from_bam.py,sha256=SYAb4Q1HxiJzCx5bIz86MdH_TvVPsRAVodZD9082HGY,1491
71
+ smftools/informatics/archived/helpers/archived/extract_read_lengths_from_bed.py,sha256=Cw39wgp1eBTV45Wk1l0c9l-upBW5N2OcgyWXTAXln90,678
72
+ smftools/informatics/archived/helpers/archived/extract_readnames_from_BAM.py,sha256=3FxSNqbZ1VsOK2RfHrvevQTzhWATf5E8bZ5yVOqayvk,759
73
+ smftools/informatics/archived/helpers/archived/find_conversion_sites.py,sha256=JPlDipmzeCBkV_T6esGD5ptwmbQmk8gJMTh7NMaSYd4,2480
74
+ smftools/informatics/archived/helpers/archived/generate_converted_FASTA.py,sha256=Us6iH1cIhsXDnTvDxI-FEHB6ndbB30hd1ss-9dIoWVE,3819
75
+ smftools/informatics/archived/helpers/archived/get_chromosome_lengths.py,sha256=BEroXshYSpjf5wt_vrEAFiTJmSuf-kvD-Z1B_1gusME,1000
76
+ smftools/informatics/archived/helpers/archived/get_native_references.py,sha256=fRuyEm9UJkfd5DwHmFb1bxEtNvtSI1_BxGRmrCymGkw,981
77
+ smftools/informatics/archived/helpers/archived/index_fasta.py,sha256=w6xHFSaoXVk-YWZWftZ9Xv8rywZ_IuuIouLQ12KL3ro,779
78
+ smftools/informatics/archived/helpers/archived/informatics.py,sha256=gKb2ZJ_LcAeEXuQqn9e-QDF_sS4tMpMTr2vZlqa7n54,14572
79
+ smftools/informatics/archived/helpers/archived/load_adata.py,sha256=DhvYYqO9VLsZqhL1WjN9sd-e3fgvdXGlgTP18z1h0L0,33654
80
+ smftools/informatics/archived/helpers/archived/make_modbed.py,sha256=Wh0UCSOL4fMZbWYK-3oGGHwJtqPurJ3Bl6wJWBaTXoM,923
81
+ smftools/informatics/archived/helpers/archived/modQC.py,sha256=pz2EscFgO-j-9dfNgNDseweXXqM5-a-Rj2abBLErLd0,1051
82
+ smftools/informatics/archived/helpers/archived/modcall.py,sha256=LVPrdMNVp2gyQTJ4BNp8NJNm89AueDjsKaY7Gqkluho,1777
83
+ smftools/informatics/archived/helpers/archived/ohe_batching.py,sha256=QVOiyl9fYHNIFWM23afYnQo0uaOjf1NR3ASKGVSrmuw,2975
84
+ smftools/informatics/archived/helpers/archived/ohe_layers_decode.py,sha256=gIgUC9L8TFLi-fTnjR4PRzXdUaH5D6WL2Hump6XOoy0,1042
85
+ smftools/informatics/archived/helpers/archived/one_hot_decode.py,sha256=3n4rzY8_aC9YKmgrftsguMsH7fUyQ-DbWmrOYF6la9s,906
86
+ smftools/informatics/archived/helpers/archived/one_hot_encode.py,sha256=5hHigA6-SZLK84WH_RHo06F_6aTg7S3TJgvSr8gxGX8,1968
87
+ smftools/informatics/archived/helpers/archived/plot_bed_histograms.py,sha256=78i0mYFuElTPGA2Dt1feO6Z4Grh1Nro3m-F8D5FRBOw,9914
88
+ smftools/informatics/archived/helpers/archived/separate_bam_by_bc.py,sha256=pCLev0OQji1jBdVr25lI_gt9fsozSG8vh7TQkE_UHnY,1800
89
+ smftools/informatics/archived/helpers/archived/split_and_index_BAM.py,sha256=Q7I5qJ5JjW6mSKysfl9NdlFZ6LIy3C8G5rGmG7cn2eA,1224
90
+ smftools/machine_learning/__init__.py,sha256=cWyGN_QVcssqBr_VVr7xh2Inz0P7ylqUmBBcpMgsK0k,257
91
+ smftools/machine_learning/data/__init__.py,sha256=xbfLE-gNjdgxvZ9LKTdvjAtbIHOcs2TR0Gz3YRFbo38,113
92
+ smftools/machine_learning/data/anndata_data_module.py,sha256=ktrdMVMk5yhIUrnu-G_Xf3y7G-KP9PyhYZhobv8TCVg,10063
93
+ smftools/machine_learning/data/preprocessing.py,sha256=dSs6Qs3wmlccFPZSpOc-uy1nlFSf68wWQKwF1iTqMok,137
94
+ smftools/machine_learning/evaluation/__init__.py,sha256=KHvcC7bTYv-ThptAi6G8wD-hW5Iz1HPgMcQ3AewtK3c,122
95
+ smftools/machine_learning/evaluation/eval_utils.py,sha256=t9WIevIJ6b6HqU6OYaNx7UBAa5TEIPFmZow6n_ZDZeY,1105
96
+ smftools/machine_learning/evaluation/evaluators.py,sha256=KqYHqbVV2WOs0Yo4GIhLS_0h1oKY6nd1yi6piDWYQLg,8184
97
+ smftools/machine_learning/inference/__init__.py,sha256=vWLQD-JNEKKNGuzDtx7vcE4czKKXEO6S-0Zp5-21fPs,172
98
+ smftools/machine_learning/inference/inference_utils.py,sha256=aJuXvTgC8v4BOjLCgOU9vT3S2y1UGoZjq4mQpPswTQU,947
99
+ smftools/machine_learning/inference/lightning_inference.py,sha256=34WVnPfpPDf4KM8ZN5MOsx4tYgsrUclkens6GXgB4Ek,2160
100
+ smftools/machine_learning/inference/sklearn_inference.py,sha256=FomgQF5jFBfAj1-H2Q0_RPmvR9rDJsmUeaWOVRhbpTw,1612
101
+ smftools/machine_learning/inference/sliding_window_inference.py,sha256=8zjQs2hGhj0Dww4gWljLVK0g002_U96dyIqQJiDdSDY,4426
102
+ smftools/machine_learning/models/__init__.py,sha256=bMfPbQ5bDmn_kWv82virLuUhjb12Yow7t_j96afNbyA,421
103
+ smftools/machine_learning/models/base.py,sha256=p3d77iyY8BVx0tYL0TjmOSnPNP1ZrKTzn_J05e2GF0A,9626
104
+ smftools/machine_learning/models/cnn.py,sha256=KKZmJLQ6Bjm_HI8GULnafjz6mRy5BZ6Y0ZCgDSuS268,4465
105
+ smftools/machine_learning/models/lightning_base.py,sha256=3nC3wajPIupFMtOq3YUf24_SHvDoW_9BIGyIvEwzN9w,13626
106
+ smftools/machine_learning/models/mlp.py,sha256=Y2hc_qHj6vpM_mHpreFxBULn4MkR25oEA1LXu5sPA_w,820
107
+ smftools/machine_learning/models/positional.py,sha256=EfTyYnY0pCB-aVJIWf-4DVNpyGlvx1q_09PzfrC-VlA,652
108
+ smftools/machine_learning/models/rnn.py,sha256=uJnHDGpT2_l_HqHGsx33XGF3v3EYZPeOtSQ89uvhdpE,717
109
+ smftools/machine_learning/models/sklearn_models.py,sha256=ssV-mR3rmcjycQEzKccRcbVaEjZp0zRNUL5-R6m1UKU,10402
110
+ smftools/machine_learning/models/transformer.py,sha256=8YXS0vCcOWT-33h-8yeDfFM5ibPHQ-CMSEhGWzR4pm8,11039
111
+ smftools/machine_learning/models/wrappers.py,sha256=HEY2A6-Bk6MtVZ9jOaPT8S1Qi0L98SyEg1nbKqYZoag,697
112
+ smftools/machine_learning/training/__init__.py,sha256=teUmwpnmAl0oNFaqVrfoijEpxBjLwI5YtBwLHT3uXck,185
113
+ smftools/machine_learning/training/train_lightning_model.py,sha256=usEBaQ4vNjfatefP5XDCXkywzgZ2D-YppGmT3-3gTGE,4070
114
+ smftools/machine_learning/training/train_sklearn_model.py,sha256=m1k1Gsynpj6SJI64rl4B3cfXm1SliU0fwMAj1-bAAeE,3166
115
+ smftools/machine_learning/utils/__init__.py,sha256=yOpzBc9AXbarSRfN8Ixh2Z1uWLGpgpjRR46h6E46_2w,62
116
+ smftools/machine_learning/utils/device.py,sha256=GITrULOty2Fr96Bqt1wi1PaYl_oVgB5Z99Gfn5vQy4o,274
117
+ smftools/machine_learning/utils/grl.py,sha256=BWBDp_kQBigrUzQpRbZzgpfr_WOcd2K2V3MQL-aAIc4,334
118
+ smftools/plotting/__init__.py,sha256=7T3-hZFgTY0nfQgV4J6Vn9ogwkNMlY315kguZR7V1AI,866
119
+ smftools/plotting/autocorrelation_plotting.py,sha256=cF9X3CgKiwzL79mgMUFO1tSqdybDoPN1COQQ567InCY,27455
120
+ smftools/plotting/classifiers.py,sha256=8_zabh4NNB1_yVxLD22lfrfl5yfzbEoG3XWqlIqdtrQ,13786
121
+ smftools/plotting/general_plotting.py,sha256=2JzE7agm_tILpQ67BHs5pdyPRsHBwcENZe7n4gfMWgM,61350
122
+ smftools/plotting/hmm_plotting.py,sha256=3Eq82gty_0b8GkSMCQgUlbKfzR9h2fJ5rZkB8yYGX-M,10934
123
+ smftools/plotting/position_stats.py,sha256=4XukYIWeWZ_aGSZg1K0t37KA2aknjNNKT5kcKFfuz8Q,17428
124
+ smftools/plotting/qc_plotting.py,sha256=q5Ri0q89udvNUFUNxHzgk9atvQYqUkqkS5-JFq9EqoI,10045
125
+ smftools/preprocessing/__init__.py,sha256=GAQBULUH7fGVabzK5Cq5Wj-0ew0vNA-jWQtR5LAowvs,1746
126
+ smftools/preprocessing/add_read_length_and_mapping_qc.py,sha256=zD_Kxw3DvyOypfuSMGv0ESyt-02w4XlAAMqQxb7yDNQ,5700
127
+ smftools/preprocessing/append_base_context.py,sha256=wGBAADePnys8DLUR15MpRe2BUcfCMDJWaCDDNyjn6AU,6209
128
+ smftools/preprocessing/append_binary_layer_by_base_context.py,sha256=s-7t-VKCs9Y67pX7kH6DNCEkC-RW4nM-UPsBQV2ZwtE,6186
129
+ smftools/preprocessing/binarize.py,sha256=6Vr7Z8zgtJ5rS_uPAx1n3EnQR670V33DlZ_95JmOeWc,484
130
+ smftools/preprocessing/binarize_on_Youden.py,sha256=HGs4p7XiOSYU3_z8QswNHIA9HlrI-7Pp1Kggrn6yUnI,1834
131
+ smftools/preprocessing/binary_layers_to_ohe.py,sha256=Lxd8knelNTaUozfGMFNMlnrOb6uP28Laj3Ymw6cRHL0,1826
132
+ smftools/preprocessing/calculate_complexity.py,sha256=cXMpFrhkwkPipQo2GZGT5yFknMYUMt1t8gz0Cse1DrA,3288
133
+ smftools/preprocessing/calculate_complexity_II.py,sha256=DGfl0jkuBPUpzhKVItN0W7EPzh-QYuR4IxRObPE6gAQ,9301
134
+ smftools/preprocessing/calculate_consensus.py,sha256=6zRpRmb2xdfDu5hctZrReALRb7Pjn8sy8xJZTm3o0nU,2442
135
+ smftools/preprocessing/calculate_coverage.py,sha256=4WTILzKLzxGLSsQrZkshXP-IRQpoVu3Fkqc0QTpux3Y,2132
136
+ smftools/preprocessing/calculate_pairwise_differences.py,sha256=5zJbNNaFld5qgKRoPyplCmMHflbvAQ9eKWCXPXPpJ60,1774
137
+ smftools/preprocessing/calculate_pairwise_hamming_distances.py,sha256=e5Mzyex7pT29H2PY014uU4Fi_eewbut1JkzC1ffBbCg,961
138
+ smftools/preprocessing/calculate_position_Youden.py,sha256=yaSd6UDXPCddoN1UR6LgTqE5teJ79Ldw0BAlemc9fB4,7453
139
+ smftools/preprocessing/calculate_read_length_stats.py,sha256=gNNePwMqYZJidzGgT1ZkfSlvc5Y3I3bi5KNYpP6wQQc,4584
140
+ smftools/preprocessing/calculate_read_modification_stats.py,sha256=mIlLBqNflVIkuoLxhbyujq3JEKyPl8iebhUlikB9brM,4775
141
+ smftools/preprocessing/clean_NaN.py,sha256=IOcnN5YF05gpPQc3cc3IS83petCnhCpkYiyT6bXEyx0,1937
142
+ smftools/preprocessing/filter_adata_by_nan_proportion.py,sha256=GZcvr2JCsthX8EMw34S9-W3fc6JElw6ka99Jy6f2JvA,1292
143
+ smftools/preprocessing/filter_reads_on_length_quality_mapping.py,sha256=93LgTy_vsPnOZgoiXhZ1-w_pix2oFdBk-dsBUoz33Go,7379
144
+ smftools/preprocessing/filter_reads_on_modification_thresholds.py,sha256=4TUvChkSH8R4p_0TpRCh7TounkdUgQHh71TGNmsZ29A,19355
145
+ smftools/preprocessing/flag_duplicate_reads.py,sha256=MySI9En6xVp0FqL7hfiLw0EP3JnGVJWM_yZfkvN-m1U,65585
146
+ smftools/preprocessing/invert_adata.py,sha256=HYMJ1sR3Ui8j6bDjY8OcVQOETzZV-_rrpIYaWLZL6S4,1049
147
+ smftools/preprocessing/load_sample_sheet.py,sha256=AjJf2MrqGHJJ2rNjYi09zV1QkLTq8qGaHGVklXHnPuU,1908
148
+ smftools/preprocessing/make_dirs.py,sha256=lWHXpwC76MFM5sSme9i_WeYUaxutzybendokhny03ds,537
149
+ smftools/preprocessing/min_non_diagonal.py,sha256=hx1asW8CEmLaIroZISW8EcAf_RnBEC_nofGD8QG0b1E,711
150
+ smftools/preprocessing/recipes.py,sha256=cfKEpKW8TtQLe1CMdSHyPuIgKiWOPn7uP6uMIoRlnaQ,7063
151
+ smftools/preprocessing/subsample_adata.py,sha256=ivJvJIOvEtyvAjqZ7cwEeVedm4QgJxCJEI7sFaTuI3w,2360
152
+ smftools/preprocessing/archives/mark_duplicates.py,sha256=kwfstcWb7KkqeNB321dB-NLe8yd9_hZsSmpL8pCVBQg,8747
153
+ smftools/preprocessing/archives/preprocessing.py,sha256=4mLT09A7vwRZ78FHmuwtv38mH9TQ9qrZc_WjHRhhkIw,34379
154
+ smftools/preprocessing/archives/remove_duplicates.py,sha256=Erooi5_1VOUNfWpzddzmMNYMCl1U1jJryt7ZtMhabAs,699
155
+ smftools/tools/__init__.py,sha256=QV3asy5_lP9wcRzpNTfxGTCcpykkbNYvzxSMpFw4KXU,719
156
+ smftools/tools/calculate_umap.py,sha256=2arbAQdFOtnWoPq22TWicyr6fLYZ5PTNeZv_jdwuk_I,2491
157
+ smftools/tools/cluster_adata_on_methylation.py,sha256=UDC5lpW8fZ6O-16ETu-mbflLkNBKuIg7RIzQ9r7knvA,5760
158
+ smftools/tools/general_tools.py,sha256=YbobB6Zllz6cUq50yolGH9Jr6uuAMvEI4m3hiJ6FmAI,2561
159
+ smftools/tools/position_stats.py,sha256=Z7VW54wUVzH1RQ9xhP6KO7ewp-xeLybd07I5umV_aqM,24369
160
+ smftools/tools/read_stats.py,sha256=w3Zaim6l__Kt8EPCJKXTlMgO51Iy2Milj6yUb88HXiI,6324
161
+ smftools/tools/spatial_autocorrelation.py,sha256=uQkuPi2PJCj5lZzb33IWTL-e-p3J6PdMeM88rUFfQRw,21212
162
+ smftools/tools/subset_adata.py,sha256=nBbtAxCNteZCUBmPnZ9swQNyU74XgWM8aJHHWg2AuL0,1025
163
+ smftools/tools/archived/apply_hmm.py,sha256=pJXCULay0zbmubrwql368y7yiHAZr2bJhuGx2QUuKnE,9321
164
+ smftools/tools/archived/classifiers.py,sha256=mwSTpWUXBPjmUuV5i_SMG1lIPpHSMCzsKhl8wTbm-Og,36903
165
+ smftools/tools/archived/classify_methylated_features.py,sha256=Z0N2UKw3luD3CTQ8wcUvdnMY7w-8574OJbEcwzNsy88,2897
166
+ smftools/tools/archived/classify_non_methylated_features.py,sha256=IJERTozEs7IPL7K-VIjq2q2K36wRCW9iiNSYLAXasrA,3256
167
+ smftools/tools/archived/subset_adata_v1.py,sha256=qyU9iCal03edb5aUS3AZ2U4TlL3uQ42jGI9hX3QF7Fc,1047
168
+ smftools/tools/archived/subset_adata_v2.py,sha256=OKZoUpvdURPtckIQxGTWmOI5jLa-_EU62Xs3LyyehnA,1880
169
+ smftools-0.2.3.dist-info/METADATA,sha256=w_PRsBPndPoTQZviW9WTuiZV1Pk3ukeJ155OvC4E57M,8787
170
+ smftools-0.2.3.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
171
+ smftools-0.2.3.dist-info/entry_points.txt,sha256=q4hg4w-mKkI2leekM_-YZc5XRJzp96Mh1FcU3hac82g,52
172
+ smftools-0.2.3.dist-info/licenses/LICENSE,sha256=F8LwmL6vMPddaCt1z1S83Kh_OZv50alTlY7BvVx1RXw,1066
173
+ smftools-0.2.3.dist-info/RECORD,,
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ smftools = smftools.cli_entry:cli
@@ -1,21 +0,0 @@
1
- # fast5_to_pod5
2
-
3
- def fast5_to_pod5(fast5_dir, output_pod5='FAST5s_to_POD5.pod5'):
4
- """
5
- Convert Nanopore FAST5 files to POD5 file
6
-
7
- Parameters:
8
- fast5_dir (str): String representing the file path to a directory containing all FAST5 files to convert into a single POD5 output.
9
- output_pod5 (str): The name of the output POD5.
10
-
11
- Returns:
12
- None
13
-
14
- """
15
- import subprocess
16
- from pathlib import Path
17
-
18
- if Path(fast5_dir).is_file():
19
- subprocess.run(["pod5", "convert", "fast5", fast5_dir, "--output", output_pod5])
20
- elif Path(fast5_dir).is_dir():
21
- subprocess.run(["pod5", "convert", "fast5", f".{fast5_dir}*.fast5", "--output", output_pod5])
@@ -1,75 +0,0 @@
1
- ## LoadExperimentConfig
2
-
3
- class LoadExperimentConfig:
4
- """
5
- Loads in the experiment configuration csv and saves global variables with experiment configuration parameters.
6
- Parameters:
7
- experiment_config (str): A string representing the file path to the experiment configuration csv file.
8
-
9
- Attributes:
10
- var_dict (dict): A dictionary containing experiment configuration parameters.
11
-
12
- Example:
13
- >>> import pandas as pd
14
- >>> from io import StringIO
15
- >>> csv_data = '''variable,value,type
16
- ... mapping_threshold,0.05,float
17
- ... batch_size,4,int
18
- ... testing_bool,True,bool
19
- ... strands,"[bottom, top]",list
20
- ... split_dir,split_bams,string
21
- ... pod5_dir,None,string
22
- ... pod5_dir,,string
23
- ... '''
24
- >>> csv_file = StringIO(csv_data)
25
- >>> df = pd.read_csv(csv_file)
26
- >>> df.to_csv('test_config.csv', index=False)
27
- >>> config_loader = LoadExperimentConfig('test_config.csv')
28
- >>> config_loader.var_dict['mapping_threshold']
29
- 0.05
30
- >>> config_loader.var_dict['batch_size']
31
- 4
32
- >>> config_loader.var_dict['testing_bool']
33
- True
34
- >>> config_loader.var_dict['strands']
35
- ['bottom', 'top']
36
- >>> config_loader.var_dict['split_dir']
37
- 'split_bams'
38
- >>> config_loader.var_dict['pod5_dir'] is None
39
- True
40
- >>> config_loader.var_dict['pod5_dir'] is None
41
- True
42
- """
43
- def __init__(self, experiment_config):
44
- import pandas as pd
45
- print(f"Loading experiment config from {experiment_config}")
46
- # Read the CSV into a pandas DataFrame
47
- df = pd.read_csv(experiment_config)
48
- # Initialize an empty dictionary to store variables
49
- var_dict = {}
50
- # Iterate through each row in the DataFrame
51
- for _, row in df.iterrows():
52
- var_name = str(row['variable'])
53
- value = row['value']
54
- dtype = row['type']
55
- # Handle empty and None values
56
- if pd.isna(value) or value in ['None', '']:
57
- value = None
58
- else:
59
- # Handle different data types
60
- if dtype == 'list':
61
- # Convert the string representation of a list to an actual list
62
- value = value.strip('()[]').replace(', ', ',').split(',')
63
- elif dtype == 'int':
64
- value = int(value)
65
- elif dtype == 'float':
66
- value = float(value)
67
- elif dtype == 'bool':
68
- value = value.lower() == 'true'
69
- elif dtype == 'string':
70
- value = str(value)
71
- # Store the variable in the dictionary
72
- var_dict[var_name] = value
73
- # Save the dictionary as an attribute of the class
74
- self.var_dict = var_dict
75
-
@@ -1,74 +0,0 @@
1
- from .align_and_sort_BAM import align_and_sort_BAM
2
- from .aligned_BAM_to_bed import aligned_BAM_to_bed
3
- from .bam_qc import bam_qc
4
- from .bed_to_bigwig import bed_to_bigwig
5
- from .binarize_converted_base_identities import binarize_converted_base_identities
6
- from .canoncall import canoncall
7
- from .complement_base_list import complement_base_list
8
- from .converted_BAM_to_adata_II import converted_BAM_to_adata_II
9
- from .concatenate_fastqs_to_bam import concatenate_fastqs_to_bam
10
- from .count_aligned_reads import count_aligned_reads
11
- from .demux_and_index_BAM import demux_and_index_BAM
12
- from .extract_base_identities import extract_base_identities
13
- from .extract_mods import extract_mods
14
- from .extract_read_features_from_bam import extract_read_features_from_bam
15
- from .extract_read_lengths_from_bed import extract_read_lengths_from_bed
16
- from .extract_readnames_from_BAM import extract_readnames_from_BAM
17
- from .find_conversion_sites import find_conversion_sites
18
- from .generate_converted_FASTA import convert_FASTA_record, generate_converted_FASTA
19
- from .get_chromosome_lengths import get_chromosome_lengths
20
- from .get_native_references import get_native_references
21
- from .index_fasta import index_fasta
22
- from .LoadExperimentConfig import LoadExperimentConfig
23
- from .make_dirs import make_dirs
24
- from .make_modbed import make_modbed
25
- from .modcall import modcall
26
- from .modkit_extract_to_adata import modkit_extract_to_adata
27
- from .modQC import modQC
28
- from .one_hot_encode import one_hot_encode
29
- from .ohe_batching import ohe_batching
30
- from .one_hot_decode import one_hot_decode
31
- from .ohe_layers_decode import ohe_layers_decode
32
- from .plot_read_length_and_coverage_histograms import plot_read_length_and_coverage_histograms
33
- from .run_multiqc import run_multiqc
34
- from .separate_bam_by_bc import separate_bam_by_bc
35
- from .split_and_index_BAM import split_and_index_BAM
36
-
37
- __all__ = [
38
- "align_and_sort_BAM",
39
- "aligned_BAM_to_bed",
40
- "bam_qc",
41
- "bed_to_bigwig",
42
- "binarize_converted_base_identities",
43
- "canoncall",
44
- "complement_base_list",
45
- "converted_BAM_to_adata_II",
46
- "concatenate_fastqs_to_bam",
47
- "count_aligned_reads",
48
- "demux_and_index_BAM",
49
- "extract_base_identities",
50
- "extract_mods",
51
- "extract_read_features_from_bam",
52
- "extract_read_lengths_from_bed",
53
- "extract_readnames_from_BAM",
54
- "find_conversion_sites",
55
- "convert_FASTA_record",
56
- "generate_converted_FASTA",
57
- "get_chromosome_lengths",
58
- "get_native_references",
59
- "index_fasta",
60
- "LoadExperimentConfig",
61
- "make_dirs",
62
- "make_modbed",
63
- "modcall",
64
- "modkit_extract_to_adata",
65
- "modQC",
66
- "one_hot_encode",
67
- "ohe_batching",
68
- "one_hot_decode",
69
- "ohe_layers_decode",
70
- "plot_read_length_and_coverage_histograms",
71
- "run_multiqc",
72
- "separate_bam_by_bc",
73
- "split_and_index_BAM"
74
- ]
@@ -1,59 +0,0 @@
1
- ## align_and_sort_BAM
2
-
3
- def align_and_sort_BAM(fasta, input, bam_suffix='.bam', output_directory='aligned_outputs', make_bigwigs=False, threads=None):
4
- """
5
- A wrapper for running dorado aligner and samtools functions
6
-
7
- Parameters:
8
- fasta (str): File path to the reference genome to align to.
9
- input (str): File path to the basecalled file to align. Works for .bam and .fastq files
10
- bam_suffix (str): The suffix to use for the BAM file.
11
- output_directory (str): A file path to the directory to output all the analyses.
12
- make_bigwigs (bool): Whether to make bigwigs
13
- threads (int): Number of additional threads to use
14
-
15
- Returns:
16
- None
17
- The function writes out files for: 1) An aligned BAM, 2) and aligned_sorted BAM, 3) an index file for the aligned_sorted BAM, 4) A bed file for the aligned_sorted BAM, 5) A text file containing read names in the aligned_sorted BAM
18
- """
19
- import subprocess
20
- import os
21
-
22
- input_basename = os.path.basename(input)
23
- input_suffix = '.' + input_basename.split('.')[1]
24
-
25
- output_path_minus_suffix = os.path.join(output_directory, input_basename.split(input_suffix)[0])
26
-
27
- aligned_BAM=f"{output_path_minus_suffix}_aligned"
28
- aligned_sorted_BAM=f"{aligned_BAM}_sorted"
29
- aligned_output = aligned_BAM + bam_suffix
30
- aligned_sorted_output = aligned_sorted_BAM + bam_suffix
31
-
32
- if threads:
33
- threads = str(threads)
34
- else:
35
- pass
36
-
37
- # Run dorado aligner
38
- print(f"Aligning BAM to Reference: {input}")
39
- if threads:
40
- alignment_command = ["dorado", "aligner", "-t", threads, '--mm2-opts', "-N 1", fasta, input]
41
- else:
42
- alignment_command = ["dorado", "aligner", '--mm2-opts', "-N 1", fasta, input]
43
- subprocess.run(alignment_command, stdout=open(aligned_output, "w"))
44
-
45
- # Sort the BAM on positional coordinates
46
- print(f"Sorting BAM: {aligned_output}")
47
- if threads:
48
- sort_command = ["samtools", "sort", "-@", threads, "-o", aligned_sorted_output, aligned_output]
49
- else:
50
- sort_command = ["samtools", "sort", "-o", aligned_sorted_output, aligned_output]
51
- subprocess.run(sort_command)
52
-
53
- # Create a BAM index file
54
- print(f"Indexing BAM: {aligned_sorted_output}")
55
- if threads:
56
- index_command = ["samtools", "index", "-@", threads, aligned_sorted_output]
57
- else:
58
- index_command = ["samtools", "index", aligned_sorted_output]
59
- subprocess.run(index_command)
@@ -1,74 +0,0 @@
1
- def aligned_BAM_to_bed(aligned_BAM, out_dir, fasta, make_bigwigs, threads=None):
2
- """
3
- Takes an aligned BAM as input and writes a BED file of reads as output.
4
- Bed columns are: Record name, start position, end position, read length, read name.
5
-
6
- Parameters:
7
- aligned_BAM (str): Path to an input aligned_BAM to extract to a BED file.
8
- out_dir (str): Directory to output files.
9
- fasta (str): File path to the reference genome.
10
- make_bigwigs (bool): Whether to generate bigwig files.
11
- threads (int): Number of threads to use.
12
-
13
- Returns:
14
- None
15
- """
16
- import subprocess
17
- import os
18
- import concurrent.futures
19
- from concurrent.futures import ProcessPoolExecutor
20
- from .bed_to_bigwig import bed_to_bigwig
21
- from . import make_dirs
22
- from .plot_read_length_and_coverage_histograms import plot_read_length_and_coverage_histograms
23
-
24
- threads = threads or os.cpu_count() # Use max available cores if not specified
25
-
26
- # Create necessary directories
27
- plotting_dir = os.path.join(out_dir, "bed_cov_histograms")
28
- bed_dir = os.path.join(out_dir, "beds")
29
- make_dirs([plotting_dir, bed_dir])
30
-
31
- bed_output = os.path.join(bed_dir, os.path.basename(aligned_BAM).replace(".bam", "_bed.bed"))
32
-
33
- print(f"Creating BED from BAM: {aligned_BAM} using {threads} threads...")
34
-
35
- # Convert BAM to BED format
36
- with open(bed_output, "w") as output_file:
37
- samtools_view = subprocess.Popen(["samtools", "view", "-@", str(threads), aligned_BAM], stdout=subprocess.PIPE)
38
- awk_process = subprocess.Popen(
39
- ["awk", '{print $3 "\t" $4 "\t" $4+length($10)-1 "\t" length($10)-1 "\t" $1}'],
40
- stdin=samtools_view.stdout,
41
- stdout=output_file
42
- )
43
-
44
- samtools_view.stdout.close()
45
- awk_process.wait()
46
- samtools_view.wait()
47
-
48
- print(f"BED file created: {bed_output}")
49
-
50
- def split_bed(bed):
51
- """Splits BED into aligned and unaligned reads."""
52
- aligned = bed.replace(".bed", "_aligned.bed")
53
- unaligned = bed.replace(".bed", "_unaligned.bed")
54
-
55
- with open(bed, "r") as infile, open(aligned, "w") as aligned_out, open(unaligned, "w") as unaligned_out:
56
- for line in infile:
57
- (unaligned_out if line.startswith("*") else aligned_out).write(line)
58
-
59
- os.remove(bed)
60
- return aligned
61
-
62
- print(f"Splitting BED: {bed_output}")
63
- aligned_bed = split_bed(bed_output)
64
-
65
- with ProcessPoolExecutor() as executor: # Use processes instead of threads
66
- futures = []
67
- futures.append(executor.submit(plot_read_length_and_coverage_histograms, aligned_bed, plotting_dir))
68
- if make_bigwigs:
69
- futures.append(executor.submit(bed_to_bigwig, fasta, aligned_bed))
70
-
71
- # Wait for all tasks to complete
72
- concurrent.futures.wait(futures)
73
-
74
- print("Processing completed successfully.")
@@ -1,66 +0,0 @@
1
- ## bam_qc
2
-
3
- def bam_qc(bam_files, bam_qc_dir, threads, modality, stats=True, flagstats=True, idxstats=True):
4
- """
5
- Performs QC on BAM files by running samtools stats, flagstat, and idxstats.
6
-
7
- Parameters:
8
- - bam_files: List of BAM file paths.
9
- - bam_qc_dir: Directory to save QC reports.
10
- - threads: Number threads to use.
11
- - modality: 'conversion' or 'direct' (affects processing mode).
12
- - stats: Run `samtools stats` if True.
13
- - flagstats: Run `samtools flagstat` if True.
14
- - idxstats: Run `samtools idxstats` if True.
15
- """
16
- import os
17
- import subprocess
18
-
19
- # Ensure the QC output directory exists
20
- os.makedirs(bam_qc_dir, exist_ok=True)
21
-
22
- if threads:
23
- threads = str(threads)
24
- else:
25
- pass
26
-
27
- for bam in bam_files:
28
- bam_name = os.path.basename(bam).replace(".bam", "") # Extract filename without extension
29
-
30
- # Run samtools QC commands based on selected options
31
- if stats:
32
- stats_out = os.path.join(bam_qc_dir, f"{bam_name}_stats.txt")
33
- if threads:
34
- command = ["samtools", "stats", "-@", threads, bam]
35
- else:
36
- command = ["samtools", "stats", bam]
37
- print(f"Running: {' '.join(command)} > {stats_out}")
38
- with open(stats_out, "w") as out_file:
39
- subprocess.run(command, stdout=out_file)
40
-
41
- if flagstats:
42
- flagstats_out = os.path.join(bam_qc_dir, f"{bam_name}_flagstat.txt")
43
- if threads:
44
- command = ["samtools", "flagstat", "-@", threads, bam]
45
- else:
46
- command = ["samtools", "flagstat", bam]
47
- print(f"Running: {' '.join(command)} > {flagstats_out}")
48
- with open(flagstats_out, "w") as out_file:
49
- subprocess.run(command, stdout=out_file)
50
-
51
- if idxstats:
52
- idxstats_out = os.path.join(bam_qc_dir, f"{bam_name}_idxstats.txt")
53
- if threads:
54
- command = ["samtools", "idxstats", "-@", threads, bam]
55
- else:
56
- command = ["samtools", "idxstats", bam]
57
- print(f"Running: {' '.join(command)} > {idxstats_out}")
58
- with open(idxstats_out, "w") as out_file:
59
- subprocess.run(command, stdout=out_file)
60
-
61
- if modality == 'conversion':
62
- pass
63
- elif modality == 'direct':
64
- pass
65
-
66
- print("QC processing completed.")