smftools 0.1.7__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- smftools/__init__.py +7 -6
- smftools/_version.py +1 -1
- smftools/cli/cli_flows.py +94 -0
- smftools/cli/hmm_adata.py +338 -0
- smftools/cli/load_adata.py +577 -0
- smftools/cli/preprocess_adata.py +363 -0
- smftools/cli/spatial_adata.py +564 -0
- smftools/cli_entry.py +435 -0
- smftools/config/__init__.py +1 -0
- smftools/config/conversion.yaml +38 -0
- smftools/config/deaminase.yaml +61 -0
- smftools/config/default.yaml +264 -0
- smftools/config/direct.yaml +41 -0
- smftools/config/discover_input_files.py +115 -0
- smftools/config/experiment_config.py +1288 -0
- smftools/hmm/HMM.py +1576 -0
- smftools/hmm/__init__.py +20 -0
- smftools/{tools → hmm}/apply_hmm_batched.py +8 -7
- smftools/hmm/call_hmm_peaks.py +106 -0
- smftools/{tools → hmm}/display_hmm.py +3 -3
- smftools/{tools → hmm}/nucleosome_hmm_refinement.py +2 -2
- smftools/{tools → hmm}/train_hmm.py +1 -1
- smftools/informatics/__init__.py +13 -9
- smftools/informatics/archived/deaminase_smf.py +132 -0
- smftools/informatics/archived/fast5_to_pod5.py +43 -0
- smftools/informatics/archived/helpers/archived/__init__.py +71 -0
- smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py +126 -0
- smftools/informatics/archived/helpers/archived/aligned_BAM_to_bed.py +87 -0
- smftools/informatics/archived/helpers/archived/bam_qc.py +213 -0
- smftools/informatics/archived/helpers/archived/bed_to_bigwig.py +90 -0
- smftools/informatics/archived/helpers/archived/concatenate_fastqs_to_bam.py +259 -0
- smftools/informatics/{helpers → archived/helpers/archived}/count_aligned_reads.py +2 -2
- smftools/informatics/{helpers → archived/helpers/archived}/demux_and_index_BAM.py +8 -10
- smftools/informatics/{helpers → archived/helpers/archived}/extract_base_identities.py +30 -4
- smftools/informatics/{helpers → archived/helpers/archived}/extract_mods.py +15 -13
- smftools/informatics/{helpers → archived/helpers/archived}/extract_read_features_from_bam.py +4 -2
- smftools/informatics/{helpers → archived/helpers/archived}/find_conversion_sites.py +5 -4
- smftools/informatics/{helpers → archived/helpers/archived}/generate_converted_FASTA.py +2 -0
- smftools/informatics/{helpers → archived/helpers/archived}/get_chromosome_lengths.py +9 -8
- smftools/informatics/archived/helpers/archived/index_fasta.py +24 -0
- smftools/informatics/{helpers → archived/helpers/archived}/make_modbed.py +1 -2
- smftools/informatics/{helpers → archived/helpers/archived}/modQC.py +2 -2
- smftools/informatics/archived/helpers/archived/plot_bed_histograms.py +250 -0
- smftools/informatics/{helpers → archived/helpers/archived}/separate_bam_by_bc.py +8 -7
- smftools/informatics/{helpers → archived/helpers/archived}/split_and_index_BAM.py +8 -12
- smftools/informatics/archived/subsample_fasta_from_bed.py +49 -0
- smftools/informatics/bam_functions.py +812 -0
- smftools/informatics/basecalling.py +67 -0
- smftools/informatics/bed_functions.py +366 -0
- smftools/informatics/binarize_converted_base_identities.py +172 -0
- smftools/informatics/{helpers/converted_BAM_to_adata_II.py → converted_BAM_to_adata.py} +198 -50
- smftools/informatics/fasta_functions.py +255 -0
- smftools/informatics/h5ad_functions.py +197 -0
- smftools/informatics/{helpers/modkit_extract_to_adata.py → modkit_extract_to_adata.py} +147 -61
- smftools/informatics/modkit_functions.py +129 -0
- smftools/informatics/ohe.py +160 -0
- smftools/informatics/pod5_functions.py +224 -0
- smftools/informatics/{helpers/run_multiqc.py → run_multiqc.py} +5 -2
- smftools/machine_learning/__init__.py +12 -0
- smftools/machine_learning/data/__init__.py +2 -0
- smftools/machine_learning/data/anndata_data_module.py +234 -0
- smftools/machine_learning/evaluation/__init__.py +2 -0
- smftools/machine_learning/evaluation/eval_utils.py +31 -0
- smftools/machine_learning/evaluation/evaluators.py +223 -0
- smftools/machine_learning/inference/__init__.py +3 -0
- smftools/machine_learning/inference/inference_utils.py +27 -0
- smftools/machine_learning/inference/lightning_inference.py +68 -0
- smftools/machine_learning/inference/sklearn_inference.py +55 -0
- smftools/machine_learning/inference/sliding_window_inference.py +114 -0
- smftools/machine_learning/models/base.py +295 -0
- smftools/machine_learning/models/cnn.py +138 -0
- smftools/machine_learning/models/lightning_base.py +345 -0
- smftools/machine_learning/models/mlp.py +26 -0
- smftools/{tools → machine_learning}/models/positional.py +3 -2
- smftools/{tools → machine_learning}/models/rnn.py +2 -1
- smftools/machine_learning/models/sklearn_models.py +273 -0
- smftools/machine_learning/models/transformer.py +303 -0
- smftools/machine_learning/training/__init__.py +2 -0
- smftools/machine_learning/training/train_lightning_model.py +135 -0
- smftools/machine_learning/training/train_sklearn_model.py +114 -0
- smftools/plotting/__init__.py +4 -1
- smftools/plotting/autocorrelation_plotting.py +609 -0
- smftools/plotting/general_plotting.py +1292 -140
- smftools/plotting/hmm_plotting.py +260 -0
- smftools/plotting/qc_plotting.py +270 -0
- smftools/preprocessing/__init__.py +15 -8
- smftools/preprocessing/add_read_length_and_mapping_qc.py +129 -0
- smftools/preprocessing/append_base_context.py +122 -0
- smftools/preprocessing/append_binary_layer_by_base_context.py +143 -0
- smftools/preprocessing/binarize.py +17 -0
- smftools/preprocessing/binarize_on_Youden.py +2 -2
- smftools/preprocessing/calculate_complexity_II.py +248 -0
- smftools/preprocessing/calculate_coverage.py +10 -1
- smftools/preprocessing/calculate_position_Youden.py +1 -1
- smftools/preprocessing/calculate_read_modification_stats.py +101 -0
- smftools/preprocessing/clean_NaN.py +17 -1
- smftools/preprocessing/filter_reads_on_length_quality_mapping.py +158 -0
- smftools/preprocessing/filter_reads_on_modification_thresholds.py +352 -0
- smftools/preprocessing/flag_duplicate_reads.py +1326 -124
- smftools/preprocessing/invert_adata.py +12 -5
- smftools/preprocessing/load_sample_sheet.py +19 -4
- smftools/readwrite.py +1021 -89
- smftools/tools/__init__.py +3 -32
- smftools/tools/calculate_umap.py +5 -5
- smftools/tools/general_tools.py +3 -3
- smftools/tools/position_stats.py +468 -106
- smftools/tools/read_stats.py +115 -1
- smftools/tools/spatial_autocorrelation.py +562 -0
- {smftools-0.1.7.dist-info → smftools-0.2.3.dist-info}/METADATA +14 -9
- smftools-0.2.3.dist-info/RECORD +173 -0
- smftools-0.2.3.dist-info/entry_points.txt +2 -0
- smftools/informatics/fast5_to_pod5.py +0 -21
- smftools/informatics/helpers/LoadExperimentConfig.py +0 -75
- smftools/informatics/helpers/__init__.py +0 -74
- smftools/informatics/helpers/align_and_sort_BAM.py +0 -59
- smftools/informatics/helpers/aligned_BAM_to_bed.py +0 -74
- smftools/informatics/helpers/bam_qc.py +0 -66
- smftools/informatics/helpers/bed_to_bigwig.py +0 -39
- smftools/informatics/helpers/binarize_converted_base_identities.py +0 -79
- smftools/informatics/helpers/concatenate_fastqs_to_bam.py +0 -55
- smftools/informatics/helpers/index_fasta.py +0 -12
- smftools/informatics/helpers/make_dirs.py +0 -21
- smftools/informatics/helpers/plot_read_length_and_coverage_histograms.py +0 -53
- smftools/informatics/load_adata.py +0 -182
- smftools/informatics/readwrite.py +0 -106
- smftools/informatics/subsample_fasta_from_bed.py +0 -47
- smftools/preprocessing/append_C_context.py +0 -82
- smftools/preprocessing/calculate_converted_read_methylation_stats.py +0 -94
- smftools/preprocessing/filter_converted_reads_on_methylation.py +0 -44
- smftools/preprocessing/filter_reads_on_length.py +0 -51
- smftools/tools/call_hmm_peaks.py +0 -105
- smftools/tools/data/__init__.py +0 -2
- smftools/tools/data/anndata_data_module.py +0 -90
- smftools/tools/inference/__init__.py +0 -1
- smftools/tools/inference/lightning_inference.py +0 -41
- smftools/tools/models/base.py +0 -14
- smftools/tools/models/cnn.py +0 -34
- smftools/tools/models/lightning_base.py +0 -41
- smftools/tools/models/mlp.py +0 -17
- smftools/tools/models/sklearn_models.py +0 -40
- smftools/tools/models/transformer.py +0 -133
- smftools/tools/training/__init__.py +0 -1
- smftools/tools/training/train_lightning_model.py +0 -47
- smftools-0.1.7.dist-info/RECORD +0 -136
- /smftools/{tools/evaluation → cli}/__init__.py +0 -0
- /smftools/{tools → hmm}/calculate_distances.py +0 -0
- /smftools/{tools → hmm}/hmm_readwrite.py +0 -0
- /smftools/informatics/{basecall_pod5s.py → archived/basecall_pod5s.py} +0 -0
- /smftools/informatics/{conversion_smf.py → archived/conversion_smf.py} +0 -0
- /smftools/informatics/{direct_smf.py → archived/direct_smf.py} +0 -0
- /smftools/informatics/{helpers → archived/helpers/archived}/canoncall.py +0 -0
- /smftools/informatics/{helpers → archived/helpers/archived}/converted_BAM_to_adata.py +0 -0
- /smftools/informatics/{helpers → archived/helpers/archived}/extract_read_lengths_from_bed.py +0 -0
- /smftools/informatics/{helpers → archived/helpers/archived}/extract_readnames_from_BAM.py +0 -0
- /smftools/informatics/{helpers → archived/helpers/archived}/get_native_references.py +0 -0
- /smftools/informatics/{helpers → archived/helpers}/archived/informatics.py +0 -0
- /smftools/informatics/{helpers → archived/helpers}/archived/load_adata.py +0 -0
- /smftools/informatics/{helpers → archived/helpers/archived}/modcall.py +0 -0
- /smftools/informatics/{helpers → archived/helpers/archived}/ohe_batching.py +0 -0
- /smftools/informatics/{helpers → archived/helpers/archived}/ohe_layers_decode.py +0 -0
- /smftools/informatics/{helpers → archived/helpers/archived}/one_hot_decode.py +0 -0
- /smftools/informatics/{helpers → archived/helpers/archived}/one_hot_encode.py +0 -0
- /smftools/informatics/{subsample_pod5.py → archived/subsample_pod5.py} +0 -0
- /smftools/informatics/{helpers/complement_base_list.py → complement_base_list.py} +0 -0
- /smftools/{tools → machine_learning}/data/preprocessing.py +0 -0
- /smftools/{tools → machine_learning}/models/__init__.py +0 -0
- /smftools/{tools → machine_learning}/models/wrappers.py +0 -0
- /smftools/{tools → machine_learning}/utils/__init__.py +0 -0
- /smftools/{tools → machine_learning}/utils/device.py +0 -0
- /smftools/{tools → machine_learning}/utils/grl.py +0 -0
- /smftools/tools/{apply_hmm.py → archived/apply_hmm.py} +0 -0
- /smftools/tools/{classifiers.py → archived/classifiers.py} +0 -0
- {smftools-0.1.7.dist-info → smftools-0.2.3.dist-info}/WHEEL +0 -0
- {smftools-0.1.7.dist-info → smftools-0.2.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: smftools
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: Single Molecule Footprinting Analysis in Python.
|
|
5
5
|
Project-URL: Source, https://github.com/jkmckenna/smftools
|
|
6
6
|
Project-URL: Documentation, https://smftools.readthedocs.io/
|
|
@@ -43,9 +43,11 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
43
43
|
Classifier: Programming Language :: Python :: 3.12
|
|
44
44
|
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
45
45
|
Classifier: Topic :: Scientific/Engineering :: Visualization
|
|
46
|
-
Requires-Python:
|
|
46
|
+
Requires-Python: <3.13,>=3.9
|
|
47
47
|
Requires-Dist: anndata>=0.10.0
|
|
48
48
|
Requires-Dist: biopython>=1.79
|
|
49
|
+
Requires-Dist: captum
|
|
50
|
+
Requires-Dist: click
|
|
49
51
|
Requires-Dist: fastcluster
|
|
50
52
|
Requires-Dist: hydra-core
|
|
51
53
|
Requires-Dist: igraph
|
|
@@ -57,15 +59,18 @@ Requires-Dist: numpy<2,>=1.22.0
|
|
|
57
59
|
Requires-Dist: omegaconf
|
|
58
60
|
Requires-Dist: pandas>=1.4.2
|
|
59
61
|
Requires-Dist: pod5>=0.1.21
|
|
60
|
-
Requires-Dist:
|
|
62
|
+
Requires-Dist: pybedtools>=0.12.0
|
|
63
|
+
Requires-Dist: pybigwig>=0.3.24
|
|
61
64
|
Requires-Dist: pyfaidx>=0.8.0
|
|
62
65
|
Requires-Dist: pysam>=0.19.1
|
|
63
66
|
Requires-Dist: scanpy>=1.9
|
|
64
67
|
Requires-Dist: scikit-learn>=1.0.2
|
|
65
68
|
Requires-Dist: scipy>=1.7.3
|
|
66
69
|
Requires-Dist: seaborn>=0.11
|
|
70
|
+
Requires-Dist: shap
|
|
67
71
|
Requires-Dist: torch>=1.9.0
|
|
68
72
|
Requires-Dist: tqdm
|
|
73
|
+
Requires-Dist: upsetplot
|
|
69
74
|
Requires-Dist: wandb
|
|
70
75
|
Provides-Extra: docs
|
|
71
76
|
Requires-Dist: ipython>=7.20; extra == 'docs'
|
|
@@ -98,12 +103,9 @@ While most genomic data structures handle low-coverage data (<100X) along large
|
|
|
98
103
|
|
|
99
104
|
## Dependencies
|
|
100
105
|
The following CLI tools need to be installed and configured before using the informatics (smftools.inform) module of smftools:
|
|
101
|
-
1) [Dorado](https://github.com/nanoporetech/dorado) ->
|
|
102
|
-
2) [
|
|
103
|
-
3) [
|
|
104
|
-
4) [Modkit](https://github.com/nanoporetech/modkit) -> Extracting summary statistics and read level methylation calls from modified BAM files
|
|
105
|
-
5) [Bedtools](https://github.com/arq5x/bedtools2) -> For generating Bedgraphs from BAM alignment files.
|
|
106
|
-
6) [BedGraphToBigWig](https://genome.ucsc.edu/goldenPath/help/bigWig.html) -> For converting BedGraphs to BigWig files for IGV sessions.
|
|
106
|
+
1) [Dorado](https://github.com/nanoporetech/dorado) -> Basecalling, alignment, demultiplexing.
|
|
107
|
+
2) [Minimap2](https://github.com/lh3/minimap2) -> Alignment if not using dorado.
|
|
108
|
+
3) [Modkit](https://github.com/nanoporetech/modkit) -> Extracting read level methylation metrics from modified BAM files.
|
|
107
109
|
|
|
108
110
|
## Modules
|
|
109
111
|
### Informatics: Processes raw Nanopore/Illumina data from SMF experiments into an AnnData object.
|
|
@@ -118,6 +120,9 @@ The following CLI tools need to be installed and configured before using the inf
|
|
|
118
120
|
|
|
119
121
|
## Announcements
|
|
120
122
|
|
|
123
|
+
### 11/05/25 - Version 0.2.1 is available through PyPI
|
|
124
|
+
Version 0.2.1 makes the core workflow (smftools load) a command line tool that takes in an experiment_config.csv file for input/output and parameter management.
|
|
125
|
+
|
|
121
126
|
### 05/29/25 - Version 0.1.6 is available through PyPI.
|
|
122
127
|
Informatics, preprocessing, tools, plotting modules have core functionality that is approaching stability on MacOS(Intel/Silicon) and Linux(Ubuntu). I will work on improving documentation/tutorials shortly. The base PyTorch/Scikit-Learn ML-infrastructure is going through some organizational changes to work with PyTorch Lightning, Hydra, and WanDB to facilitate organizational scaling, multi-device usage, and logging.
|
|
123
128
|
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
smftools/__init__.py,sha256=aZlrZBVexf_nEnzQeZu7NU_Kp6OnxcYpLo1KPImi7sI,599
|
|
2
|
+
smftools/_settings.py,sha256=Ed8lzKUA5ncq5ZRfSp0t6_rphEEjMxts6guttwTZP5Y,409
|
|
3
|
+
smftools/_version.py,sha256=X0PliCRFAeVnSTceUeHX1eM0j1HFhGFDWCRxLdde2Bs,21
|
|
4
|
+
smftools/cli_entry.py,sha256=_QdtEKcVK5o-e5s9ETB9sOIdftPVlrDxvvjBKcP6YNk,14680
|
|
5
|
+
smftools/readwrite.py,sha256=ExKZHNZ0QB-PtSck08drXfHTqbPeSUTHiYhv951SH1s,45994
|
|
6
|
+
smftools/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
+
smftools/cli/cli_flows.py,sha256=xRiFUThoAL3LX1xdXaHVg4LjyJI4uNpGsc9aQ_wVCto,4941
|
|
8
|
+
smftools/cli/hmm_adata.py,sha256=PApUJW0lO4kcLjsiqqQopXgL3Dg-AascIqJrgvSY1Rg,15916
|
|
9
|
+
smftools/cli/load_adata.py,sha256=Qt1ej-osyJ47fpBkGaSDgR1F8E4aBNAdcXeBAGM-Lqg,29100
|
|
10
|
+
smftools/cli/preprocess_adata.py,sha256=EKGbSTli7qvL44OQUmMalYJjsH9vn3w4Rx7U7BL0ybs,20991
|
|
11
|
+
smftools/cli/spatial_adata.py,sha256=AX6iyBfbXud9actteTvDuaQUU_SE3SyBIeknR317g34,30212
|
|
12
|
+
smftools/config/__init__.py,sha256=ObUnnR7aRSoD_uvpmsxA_BUFt4NOOfWNopDVCqjp7tg,69
|
|
13
|
+
smftools/config/conversion.yaml,sha256=HrFz2f9QRe1RuhmgU6ZtMHaM4ZzY61_aLcugsmpV40Q,969
|
|
14
|
+
smftools/config/deaminase.yaml,sha256=mw2aY222y2xg08Rs5CWvjlrXo3vaEim7JwBThA80y4o,1349
|
|
15
|
+
smftools/config/default.yaml,sha256=3IrX0OrUyjhVc3CqTjM8uiprKWrrBdVtil4YhtVzKdQ,10233
|
|
16
|
+
smftools/config/direct.yaml,sha256=SBhdtG7PKm-z5xxQmA7JV3NQsGnUJ4p58fGH8BnoMrM,2137
|
|
17
|
+
smftools/config/discover_input_files.py,sha256=G9vyAmK_n_8Ur5dOnumevVLG3ydHchMy_JQrJdiuuz0,3892
|
|
18
|
+
smftools/config/experiment_config.py,sha256=d_6f_Uv3CY-1orHbxpHtAZDsY2gwxw079_pNgR9wDUg,58837
|
|
19
|
+
smftools/datasets/F1_hybrid_NKG2A_enhander_promoter_GpC_conversion_SMF.h5ad.gz,sha256=q6wJtgFRDln0o20XNCx1qad3lwcdCoylqPN7wskTfI8,2926497
|
|
20
|
+
smftools/datasets/F1_sample_sheet.csv,sha256=9PodIIOXK2eamYPbC6DGnXdzgi9bRDovf296j1aM0ak,259
|
|
21
|
+
smftools/datasets/__init__.py,sha256=xkSTlPuakVYVCuRurif9BceNBDt6bsngJvvjI8757QI,142
|
|
22
|
+
smftools/datasets/dCas9_m6A_invitro_kinetics.h5ad.gz,sha256=niOcVHaYY7h3XyvwSkN-V_NMBaRt2vTP5TrJO0CwMCs,8385050
|
|
23
|
+
smftools/datasets/datasets.py,sha256=0y597Ntp707bOgDwN6O-JEt9yxgplj66p0aj6Zs_IB4,779
|
|
24
|
+
smftools/hmm/HMM.py,sha256=K8rt-EHn3ylIHpQ3dHf_OZCXxCBVSS2UWTgSGOatwHw,71046
|
|
25
|
+
smftools/hmm/__init__.py,sha256=BkX145eGVy-kFOtyqOcu-Hzv9ZJLDQ3cfDe51eKBTwY,585
|
|
26
|
+
smftools/hmm/apply_hmm_batched.py,sha256=BBeJ8DiIuuMWzLwtDdk2DO2vvrfLCrVe4JtRYPFItIU,10648
|
|
27
|
+
smftools/hmm/calculate_distances.py,sha256=KDWimQ6u-coyxCKrbTm42Fh_Alf_gURBZ0vfFaem848,644
|
|
28
|
+
smftools/hmm/call_hmm_peaks.py,sha256=T-3Ld8H4t3Mgg2whBTYP9s2QL7rY-9RIzVCgB6avKhE,4625
|
|
29
|
+
smftools/hmm/display_hmm.py,sha256=3WuQCPvM3wPfzAdgbhfiBTd0g5mQdx9HTUdqAxs2aj4,825
|
|
30
|
+
smftools/hmm/hmm_readwrite.py,sha256=DjJ3hunpBQ7N0GVvxL7-0QUas_SkA88LVgL72mVK2cI,359
|
|
31
|
+
smftools/hmm/nucleosome_hmm_refinement.py,sha256=nQWimvse6dclcXhbU707rGbRVMKHM0mU_ZhH9g2yCMA,4641
|
|
32
|
+
smftools/hmm/train_hmm.py,sha256=srzRcB9LEmNuHyBM0R5Z0VEnxecifQt-MoaJhADxGT8,2477
|
|
33
|
+
smftools/informatics/__init__.py,sha256=vLvSrCtCVYRUCCNLW7fL3ltPr3h_w8FhT--V6el3ZkQ,1191
|
|
34
|
+
smftools/informatics/bam_functions.py,sha256=otgl3TRPLn5Fnsx1jXX75du90k3XB3RHGzlfamvETsU,32670
|
|
35
|
+
smftools/informatics/basecalling.py,sha256=jc39jneaa8Gt1azutHgBGWHqCoPeTVSGBu3kyQwP7xM,3460
|
|
36
|
+
smftools/informatics/bed_functions.py,sha256=uETVxT5mRWDNn7t0OqhDi8kDiq7uDakeHB1L2JsP4PA,13377
|
|
37
|
+
smftools/informatics/binarize_converted_base_identities.py,sha256=yOepGaNBGfZJEsMiLRwKauvsmaHn_JRrxaGp8LmKAXs,7778
|
|
38
|
+
smftools/informatics/complement_base_list.py,sha256=k6EkLtxFoajaIufxw1p0pShJ2nPHyGLTbzZmIFFjB4o,532
|
|
39
|
+
smftools/informatics/converted_BAM_to_adata.py,sha256=Y2kQNWly0WjjGN9El9zL1nLfjVxmPLWONvX5VNgZUh0,22554
|
|
40
|
+
smftools/informatics/fasta_functions.py,sha256=5IfTkX_GIj5gRJB9PjL_WjyEktpBHwGsmS_nnO1ETjI,9790
|
|
41
|
+
smftools/informatics/h5ad_functions.py,sha256=iAOxJjhaDslTUC78kjUHlCELigDl73sWo0fvXcKuFoI,7824
|
|
42
|
+
smftools/informatics/modkit_extract_to_adata.py,sha256=TrgrL_IgfqzNJ9qZ_2EvF_B38_Syw8mP38Sl7v0Riwo,55278
|
|
43
|
+
smftools/informatics/modkit_functions.py,sha256=lywjeqAJ7Cdd7k-0P3YaL_9cAZvEDTDLh91rIRcSMWE,5604
|
|
44
|
+
smftools/informatics/ohe.py,sha256=MEmh3ps-ZSSyXuIrr5LMzQvCsDJRCYiy7JS-WD4TlYs,5805
|
|
45
|
+
smftools/informatics/pod5_functions.py,sha256=vxwhD_d_iWpJydIpbf0uce7VGHm8sBnCwb7tLNpYBc8,9859
|
|
46
|
+
smftools/informatics/run_multiqc.py,sha256=n6LvQuGQpLfsutVGmgvHfV0SV5PqTQ8wa_SeKOjRssM,1052
|
|
47
|
+
smftools/informatics/archived/bam_conversion.py,sha256=I8EzXjQixMmqx2oWnoNSH5NURBhfT-krbWHkoi_M964,3330
|
|
48
|
+
smftools/informatics/archived/bam_direct.py,sha256=jbEFtUIiUR8Wlp3po_sWkr19AUNS9WZjglojb9j28vo,3606
|
|
49
|
+
smftools/informatics/archived/basecall_pod5s.py,sha256=Ynmxscsxj6qp-zVY0RWodq513oDuHDaHnpqoepB3RUU,3930
|
|
50
|
+
smftools/informatics/archived/basecalls_to_adata.py,sha256=-Nag6lr_NAtU4t8jo0GSMdgIAIfmDge-5VEUPQbEatE,3692
|
|
51
|
+
smftools/informatics/archived/conversion_smf.py,sha256=QhlISVi3Z-XqFKyDG_CenLojovAt5-ZhuVe9hus36lg,7177
|
|
52
|
+
smftools/informatics/archived/deaminase_smf.py,sha256=mNeg1mIYYVLIiW8powEpz0CqrGRDsrmY5-aoIgwMGHs,7221
|
|
53
|
+
smftools/informatics/archived/direct_smf.py,sha256=ylPGFBvRLdxLHeDJjAwq98j8Q8_lfGK3k5JJnQxrwJw,7485
|
|
54
|
+
smftools/informatics/archived/fast5_to_pod5.py,sha256=TRG_FYYGCGWUPzZCt0ZqzB8gQv_HKvkssp9nTctWzXU,1398
|
|
55
|
+
smftools/informatics/archived/print_bam_query_seq.py,sha256=8Z2ZJEOOlfWYUXiZGjteLWU4yTgvV8KQzEIBHUmamGM,838
|
|
56
|
+
smftools/informatics/archived/subsample_fasta_from_bed.py,sha256=7YTKhXg_mtP4KWpnD-TB4nuFEL4crOa9_d84IJKllyQ,1633
|
|
57
|
+
smftools/informatics/archived/subsample_pod5.py,sha256=zDw9tRcrFRmPI62xkcy9dh8IfsJcuYm7R-FVeBC_g3s,4701
|
|
58
|
+
smftools/informatics/archived/helpers/archived/__init__.py,sha256=DiiBerFJAxZeG5y0ScpJSaVBJ8b4XWdfEJCh8Q7k8jU,2783
|
|
59
|
+
smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py,sha256=yaRfhQDh3HpsSTme6QnSqBgElCS0kv2G6TunhvR1weY,5493
|
|
60
|
+
smftools/informatics/archived/helpers/archived/aligned_BAM_to_bed.py,sha256=N3NAOaoSt_M4V48vtTP_m_iF1tRuNIPS_uNJ3Y0IA4E,3391
|
|
61
|
+
smftools/informatics/archived/helpers/archived/bam_qc.py,sha256=PWl3dViCHGOcjB4UKkxBFz34Gc0PXHVTHjpYVNckVH0,7975
|
|
62
|
+
smftools/informatics/archived/helpers/archived/bed_to_bigwig.py,sha256=Bg9wFsavUU9Ha57n_99vYlYpVcbDUz3tLtYJ7ZFVR9k,2986
|
|
63
|
+
smftools/informatics/archived/helpers/archived/canoncall.py,sha256=5WS6lwukc_xYTdPQy0OSj-WLbx0Rg70Cun1lCucY7w8,1741
|
|
64
|
+
smftools/informatics/archived/helpers/archived/concatenate_fastqs_to_bam.py,sha256=6GTHXG1dfaC8rBin5NthG3xgyGqOsT6wIGxJVCmCq58,9774
|
|
65
|
+
smftools/informatics/archived/helpers/archived/converted_BAM_to_adata.py,sha256=sRmOtn0kNosLYfogqslDHg1Azk51l6nfNOLgQOnQjlA,14591
|
|
66
|
+
smftools/informatics/archived/helpers/archived/count_aligned_reads.py,sha256=ZF_kkzAf1RvM4PwDYhxD36UiuVuMM_MBvZgiXom1NQ0,2176
|
|
67
|
+
smftools/informatics/archived/helpers/archived/demux_and_index_BAM.py,sha256=KmU7nqGQ-MfDrp8h3txbToGn4h95Rkvg0WEiuext-vY,2000
|
|
68
|
+
smftools/informatics/archived/helpers/archived/extract_base_identities.py,sha256=CaFqNBjkDujYlyiUnOeRock1OQWs3CeiD3yTL96sjIs,3043
|
|
69
|
+
smftools/informatics/archived/helpers/archived/extract_mods.py,sha256=Mrs7mrLFgCTiRGfPFSyvJm6brq--LGzZrNDiFB-jynI,3895
|
|
70
|
+
smftools/informatics/archived/helpers/archived/extract_read_features_from_bam.py,sha256=SYAb4Q1HxiJzCx5bIz86MdH_TvVPsRAVodZD9082HGY,1491
|
|
71
|
+
smftools/informatics/archived/helpers/archived/extract_read_lengths_from_bed.py,sha256=Cw39wgp1eBTV45Wk1l0c9l-upBW5N2OcgyWXTAXln90,678
|
|
72
|
+
smftools/informatics/archived/helpers/archived/extract_readnames_from_BAM.py,sha256=3FxSNqbZ1VsOK2RfHrvevQTzhWATf5E8bZ5yVOqayvk,759
|
|
73
|
+
smftools/informatics/archived/helpers/archived/find_conversion_sites.py,sha256=JPlDipmzeCBkV_T6esGD5ptwmbQmk8gJMTh7NMaSYd4,2480
|
|
74
|
+
smftools/informatics/archived/helpers/archived/generate_converted_FASTA.py,sha256=Us6iH1cIhsXDnTvDxI-FEHB6ndbB30hd1ss-9dIoWVE,3819
|
|
75
|
+
smftools/informatics/archived/helpers/archived/get_chromosome_lengths.py,sha256=BEroXshYSpjf5wt_vrEAFiTJmSuf-kvD-Z1B_1gusME,1000
|
|
76
|
+
smftools/informatics/archived/helpers/archived/get_native_references.py,sha256=fRuyEm9UJkfd5DwHmFb1bxEtNvtSI1_BxGRmrCymGkw,981
|
|
77
|
+
smftools/informatics/archived/helpers/archived/index_fasta.py,sha256=w6xHFSaoXVk-YWZWftZ9Xv8rywZ_IuuIouLQ12KL3ro,779
|
|
78
|
+
smftools/informatics/archived/helpers/archived/informatics.py,sha256=gKb2ZJ_LcAeEXuQqn9e-QDF_sS4tMpMTr2vZlqa7n54,14572
|
|
79
|
+
smftools/informatics/archived/helpers/archived/load_adata.py,sha256=DhvYYqO9VLsZqhL1WjN9sd-e3fgvdXGlgTP18z1h0L0,33654
|
|
80
|
+
smftools/informatics/archived/helpers/archived/make_modbed.py,sha256=Wh0UCSOL4fMZbWYK-3oGGHwJtqPurJ3Bl6wJWBaTXoM,923
|
|
81
|
+
smftools/informatics/archived/helpers/archived/modQC.py,sha256=pz2EscFgO-j-9dfNgNDseweXXqM5-a-Rj2abBLErLd0,1051
|
|
82
|
+
smftools/informatics/archived/helpers/archived/modcall.py,sha256=LVPrdMNVp2gyQTJ4BNp8NJNm89AueDjsKaY7Gqkluho,1777
|
|
83
|
+
smftools/informatics/archived/helpers/archived/ohe_batching.py,sha256=QVOiyl9fYHNIFWM23afYnQo0uaOjf1NR3ASKGVSrmuw,2975
|
|
84
|
+
smftools/informatics/archived/helpers/archived/ohe_layers_decode.py,sha256=gIgUC9L8TFLi-fTnjR4PRzXdUaH5D6WL2Hump6XOoy0,1042
|
|
85
|
+
smftools/informatics/archived/helpers/archived/one_hot_decode.py,sha256=3n4rzY8_aC9YKmgrftsguMsH7fUyQ-DbWmrOYF6la9s,906
|
|
86
|
+
smftools/informatics/archived/helpers/archived/one_hot_encode.py,sha256=5hHigA6-SZLK84WH_RHo06F_6aTg7S3TJgvSr8gxGX8,1968
|
|
87
|
+
smftools/informatics/archived/helpers/archived/plot_bed_histograms.py,sha256=78i0mYFuElTPGA2Dt1feO6Z4Grh1Nro3m-F8D5FRBOw,9914
|
|
88
|
+
smftools/informatics/archived/helpers/archived/separate_bam_by_bc.py,sha256=pCLev0OQji1jBdVr25lI_gt9fsozSG8vh7TQkE_UHnY,1800
|
|
89
|
+
smftools/informatics/archived/helpers/archived/split_and_index_BAM.py,sha256=Q7I5qJ5JjW6mSKysfl9NdlFZ6LIy3C8G5rGmG7cn2eA,1224
|
|
90
|
+
smftools/machine_learning/__init__.py,sha256=cWyGN_QVcssqBr_VVr7xh2Inz0P7ylqUmBBcpMgsK0k,257
|
|
91
|
+
smftools/machine_learning/data/__init__.py,sha256=xbfLE-gNjdgxvZ9LKTdvjAtbIHOcs2TR0Gz3YRFbo38,113
|
|
92
|
+
smftools/machine_learning/data/anndata_data_module.py,sha256=ktrdMVMk5yhIUrnu-G_Xf3y7G-KP9PyhYZhobv8TCVg,10063
|
|
93
|
+
smftools/machine_learning/data/preprocessing.py,sha256=dSs6Qs3wmlccFPZSpOc-uy1nlFSf68wWQKwF1iTqMok,137
|
|
94
|
+
smftools/machine_learning/evaluation/__init__.py,sha256=KHvcC7bTYv-ThptAi6G8wD-hW5Iz1HPgMcQ3AewtK3c,122
|
|
95
|
+
smftools/machine_learning/evaluation/eval_utils.py,sha256=t9WIevIJ6b6HqU6OYaNx7UBAa5TEIPFmZow6n_ZDZeY,1105
|
|
96
|
+
smftools/machine_learning/evaluation/evaluators.py,sha256=KqYHqbVV2WOs0Yo4GIhLS_0h1oKY6nd1yi6piDWYQLg,8184
|
|
97
|
+
smftools/machine_learning/inference/__init__.py,sha256=vWLQD-JNEKKNGuzDtx7vcE4czKKXEO6S-0Zp5-21fPs,172
|
|
98
|
+
smftools/machine_learning/inference/inference_utils.py,sha256=aJuXvTgC8v4BOjLCgOU9vT3S2y1UGoZjq4mQpPswTQU,947
|
|
99
|
+
smftools/machine_learning/inference/lightning_inference.py,sha256=34WVnPfpPDf4KM8ZN5MOsx4tYgsrUclkens6GXgB4Ek,2160
|
|
100
|
+
smftools/machine_learning/inference/sklearn_inference.py,sha256=FomgQF5jFBfAj1-H2Q0_RPmvR9rDJsmUeaWOVRhbpTw,1612
|
|
101
|
+
smftools/machine_learning/inference/sliding_window_inference.py,sha256=8zjQs2hGhj0Dww4gWljLVK0g002_U96dyIqQJiDdSDY,4426
|
|
102
|
+
smftools/machine_learning/models/__init__.py,sha256=bMfPbQ5bDmn_kWv82virLuUhjb12Yow7t_j96afNbyA,421
|
|
103
|
+
smftools/machine_learning/models/base.py,sha256=p3d77iyY8BVx0tYL0TjmOSnPNP1ZrKTzn_J05e2GF0A,9626
|
|
104
|
+
smftools/machine_learning/models/cnn.py,sha256=KKZmJLQ6Bjm_HI8GULnafjz6mRy5BZ6Y0ZCgDSuS268,4465
|
|
105
|
+
smftools/machine_learning/models/lightning_base.py,sha256=3nC3wajPIupFMtOq3YUf24_SHvDoW_9BIGyIvEwzN9w,13626
|
|
106
|
+
smftools/machine_learning/models/mlp.py,sha256=Y2hc_qHj6vpM_mHpreFxBULn4MkR25oEA1LXu5sPA_w,820
|
|
107
|
+
smftools/machine_learning/models/positional.py,sha256=EfTyYnY0pCB-aVJIWf-4DVNpyGlvx1q_09PzfrC-VlA,652
|
|
108
|
+
smftools/machine_learning/models/rnn.py,sha256=uJnHDGpT2_l_HqHGsx33XGF3v3EYZPeOtSQ89uvhdpE,717
|
|
109
|
+
smftools/machine_learning/models/sklearn_models.py,sha256=ssV-mR3rmcjycQEzKccRcbVaEjZp0zRNUL5-R6m1UKU,10402
|
|
110
|
+
smftools/machine_learning/models/transformer.py,sha256=8YXS0vCcOWT-33h-8yeDfFM5ibPHQ-CMSEhGWzR4pm8,11039
|
|
111
|
+
smftools/machine_learning/models/wrappers.py,sha256=HEY2A6-Bk6MtVZ9jOaPT8S1Qi0L98SyEg1nbKqYZoag,697
|
|
112
|
+
smftools/machine_learning/training/__init__.py,sha256=teUmwpnmAl0oNFaqVrfoijEpxBjLwI5YtBwLHT3uXck,185
|
|
113
|
+
smftools/machine_learning/training/train_lightning_model.py,sha256=usEBaQ4vNjfatefP5XDCXkywzgZ2D-YppGmT3-3gTGE,4070
|
|
114
|
+
smftools/machine_learning/training/train_sklearn_model.py,sha256=m1k1Gsynpj6SJI64rl4B3cfXm1SliU0fwMAj1-bAAeE,3166
|
|
115
|
+
smftools/machine_learning/utils/__init__.py,sha256=yOpzBc9AXbarSRfN8Ixh2Z1uWLGpgpjRR46h6E46_2w,62
|
|
116
|
+
smftools/machine_learning/utils/device.py,sha256=GITrULOty2Fr96Bqt1wi1PaYl_oVgB5Z99Gfn5vQy4o,274
|
|
117
|
+
smftools/machine_learning/utils/grl.py,sha256=BWBDp_kQBigrUzQpRbZzgpfr_WOcd2K2V3MQL-aAIc4,334
|
|
118
|
+
smftools/plotting/__init__.py,sha256=7T3-hZFgTY0nfQgV4J6Vn9ogwkNMlY315kguZR7V1AI,866
|
|
119
|
+
smftools/plotting/autocorrelation_plotting.py,sha256=cF9X3CgKiwzL79mgMUFO1tSqdybDoPN1COQQ567InCY,27455
|
|
120
|
+
smftools/plotting/classifiers.py,sha256=8_zabh4NNB1_yVxLD22lfrfl5yfzbEoG3XWqlIqdtrQ,13786
|
|
121
|
+
smftools/plotting/general_plotting.py,sha256=2JzE7agm_tILpQ67BHs5pdyPRsHBwcENZe7n4gfMWgM,61350
|
|
122
|
+
smftools/plotting/hmm_plotting.py,sha256=3Eq82gty_0b8GkSMCQgUlbKfzR9h2fJ5rZkB8yYGX-M,10934
|
|
123
|
+
smftools/plotting/position_stats.py,sha256=4XukYIWeWZ_aGSZg1K0t37KA2aknjNNKT5kcKFfuz8Q,17428
|
|
124
|
+
smftools/plotting/qc_plotting.py,sha256=q5Ri0q89udvNUFUNxHzgk9atvQYqUkqkS5-JFq9EqoI,10045
|
|
125
|
+
smftools/preprocessing/__init__.py,sha256=GAQBULUH7fGVabzK5Cq5Wj-0ew0vNA-jWQtR5LAowvs,1746
|
|
126
|
+
smftools/preprocessing/add_read_length_and_mapping_qc.py,sha256=zD_Kxw3DvyOypfuSMGv0ESyt-02w4XlAAMqQxb7yDNQ,5700
|
|
127
|
+
smftools/preprocessing/append_base_context.py,sha256=wGBAADePnys8DLUR15MpRe2BUcfCMDJWaCDDNyjn6AU,6209
|
|
128
|
+
smftools/preprocessing/append_binary_layer_by_base_context.py,sha256=s-7t-VKCs9Y67pX7kH6DNCEkC-RW4nM-UPsBQV2ZwtE,6186
|
|
129
|
+
smftools/preprocessing/binarize.py,sha256=6Vr7Z8zgtJ5rS_uPAx1n3EnQR670V33DlZ_95JmOeWc,484
|
|
130
|
+
smftools/preprocessing/binarize_on_Youden.py,sha256=HGs4p7XiOSYU3_z8QswNHIA9HlrI-7Pp1Kggrn6yUnI,1834
|
|
131
|
+
smftools/preprocessing/binary_layers_to_ohe.py,sha256=Lxd8knelNTaUozfGMFNMlnrOb6uP28Laj3Ymw6cRHL0,1826
|
|
132
|
+
smftools/preprocessing/calculate_complexity.py,sha256=cXMpFrhkwkPipQo2GZGT5yFknMYUMt1t8gz0Cse1DrA,3288
|
|
133
|
+
smftools/preprocessing/calculate_complexity_II.py,sha256=DGfl0jkuBPUpzhKVItN0W7EPzh-QYuR4IxRObPE6gAQ,9301
|
|
134
|
+
smftools/preprocessing/calculate_consensus.py,sha256=6zRpRmb2xdfDu5hctZrReALRb7Pjn8sy8xJZTm3o0nU,2442
|
|
135
|
+
smftools/preprocessing/calculate_coverage.py,sha256=4WTILzKLzxGLSsQrZkshXP-IRQpoVu3Fkqc0QTpux3Y,2132
|
|
136
|
+
smftools/preprocessing/calculate_pairwise_differences.py,sha256=5zJbNNaFld5qgKRoPyplCmMHflbvAQ9eKWCXPXPpJ60,1774
|
|
137
|
+
smftools/preprocessing/calculate_pairwise_hamming_distances.py,sha256=e5Mzyex7pT29H2PY014uU4Fi_eewbut1JkzC1ffBbCg,961
|
|
138
|
+
smftools/preprocessing/calculate_position_Youden.py,sha256=yaSd6UDXPCddoN1UR6LgTqE5teJ79Ldw0BAlemc9fB4,7453
|
|
139
|
+
smftools/preprocessing/calculate_read_length_stats.py,sha256=gNNePwMqYZJidzGgT1ZkfSlvc5Y3I3bi5KNYpP6wQQc,4584
|
|
140
|
+
smftools/preprocessing/calculate_read_modification_stats.py,sha256=mIlLBqNflVIkuoLxhbyujq3JEKyPl8iebhUlikB9brM,4775
|
|
141
|
+
smftools/preprocessing/clean_NaN.py,sha256=IOcnN5YF05gpPQc3cc3IS83petCnhCpkYiyT6bXEyx0,1937
|
|
142
|
+
smftools/preprocessing/filter_adata_by_nan_proportion.py,sha256=GZcvr2JCsthX8EMw34S9-W3fc6JElw6ka99Jy6f2JvA,1292
|
|
143
|
+
smftools/preprocessing/filter_reads_on_length_quality_mapping.py,sha256=93LgTy_vsPnOZgoiXhZ1-w_pix2oFdBk-dsBUoz33Go,7379
|
|
144
|
+
smftools/preprocessing/filter_reads_on_modification_thresholds.py,sha256=4TUvChkSH8R4p_0TpRCh7TounkdUgQHh71TGNmsZ29A,19355
|
|
145
|
+
smftools/preprocessing/flag_duplicate_reads.py,sha256=MySI9En6xVp0FqL7hfiLw0EP3JnGVJWM_yZfkvN-m1U,65585
|
|
146
|
+
smftools/preprocessing/invert_adata.py,sha256=HYMJ1sR3Ui8j6bDjY8OcVQOETzZV-_rrpIYaWLZL6S4,1049
|
|
147
|
+
smftools/preprocessing/load_sample_sheet.py,sha256=AjJf2MrqGHJJ2rNjYi09zV1QkLTq8qGaHGVklXHnPuU,1908
|
|
148
|
+
smftools/preprocessing/make_dirs.py,sha256=lWHXpwC76MFM5sSme9i_WeYUaxutzybendokhny03ds,537
|
|
149
|
+
smftools/preprocessing/min_non_diagonal.py,sha256=hx1asW8CEmLaIroZISW8EcAf_RnBEC_nofGD8QG0b1E,711
|
|
150
|
+
smftools/preprocessing/recipes.py,sha256=cfKEpKW8TtQLe1CMdSHyPuIgKiWOPn7uP6uMIoRlnaQ,7063
|
|
151
|
+
smftools/preprocessing/subsample_adata.py,sha256=ivJvJIOvEtyvAjqZ7cwEeVedm4QgJxCJEI7sFaTuI3w,2360
|
|
152
|
+
smftools/preprocessing/archives/mark_duplicates.py,sha256=kwfstcWb7KkqeNB321dB-NLe8yd9_hZsSmpL8pCVBQg,8747
|
|
153
|
+
smftools/preprocessing/archives/preprocessing.py,sha256=4mLT09A7vwRZ78FHmuwtv38mH9TQ9qrZc_WjHRhhkIw,34379
|
|
154
|
+
smftools/preprocessing/archives/remove_duplicates.py,sha256=Erooi5_1VOUNfWpzddzmMNYMCl1U1jJryt7ZtMhabAs,699
|
|
155
|
+
smftools/tools/__init__.py,sha256=QV3asy5_lP9wcRzpNTfxGTCcpykkbNYvzxSMpFw4KXU,719
|
|
156
|
+
smftools/tools/calculate_umap.py,sha256=2arbAQdFOtnWoPq22TWicyr6fLYZ5PTNeZv_jdwuk_I,2491
|
|
157
|
+
smftools/tools/cluster_adata_on_methylation.py,sha256=UDC5lpW8fZ6O-16ETu-mbflLkNBKuIg7RIzQ9r7knvA,5760
|
|
158
|
+
smftools/tools/general_tools.py,sha256=YbobB6Zllz6cUq50yolGH9Jr6uuAMvEI4m3hiJ6FmAI,2561
|
|
159
|
+
smftools/tools/position_stats.py,sha256=Z7VW54wUVzH1RQ9xhP6KO7ewp-xeLybd07I5umV_aqM,24369
|
|
160
|
+
smftools/tools/read_stats.py,sha256=w3Zaim6l__Kt8EPCJKXTlMgO51Iy2Milj6yUb88HXiI,6324
|
|
161
|
+
smftools/tools/spatial_autocorrelation.py,sha256=uQkuPi2PJCj5lZzb33IWTL-e-p3J6PdMeM88rUFfQRw,21212
|
|
162
|
+
smftools/tools/subset_adata.py,sha256=nBbtAxCNteZCUBmPnZ9swQNyU74XgWM8aJHHWg2AuL0,1025
|
|
163
|
+
smftools/tools/archived/apply_hmm.py,sha256=pJXCULay0zbmubrwql368y7yiHAZr2bJhuGx2QUuKnE,9321
|
|
164
|
+
smftools/tools/archived/classifiers.py,sha256=mwSTpWUXBPjmUuV5i_SMG1lIPpHSMCzsKhl8wTbm-Og,36903
|
|
165
|
+
smftools/tools/archived/classify_methylated_features.py,sha256=Z0N2UKw3luD3CTQ8wcUvdnMY7w-8574OJbEcwzNsy88,2897
|
|
166
|
+
smftools/tools/archived/classify_non_methylated_features.py,sha256=IJERTozEs7IPL7K-VIjq2q2K36wRCW9iiNSYLAXasrA,3256
|
|
167
|
+
smftools/tools/archived/subset_adata_v1.py,sha256=qyU9iCal03edb5aUS3AZ2U4TlL3uQ42jGI9hX3QF7Fc,1047
|
|
168
|
+
smftools/tools/archived/subset_adata_v2.py,sha256=OKZoUpvdURPtckIQxGTWmOI5jLa-_EU62Xs3LyyehnA,1880
|
|
169
|
+
smftools-0.2.3.dist-info/METADATA,sha256=w_PRsBPndPoTQZviW9WTuiZV1Pk3ukeJ155OvC4E57M,8787
|
|
170
|
+
smftools-0.2.3.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
171
|
+
smftools-0.2.3.dist-info/entry_points.txt,sha256=q4hg4w-mKkI2leekM_-YZc5XRJzp96Mh1FcU3hac82g,52
|
|
172
|
+
smftools-0.2.3.dist-info/licenses/LICENSE,sha256=F8LwmL6vMPddaCt1z1S83Kh_OZv50alTlY7BvVx1RXw,1066
|
|
173
|
+
smftools-0.2.3.dist-info/RECORD,,
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
# fast5_to_pod5
|
|
2
|
-
|
|
3
|
-
def fast5_to_pod5(fast5_dir, output_pod5='FAST5s_to_POD5.pod5'):
|
|
4
|
-
"""
|
|
5
|
-
Convert Nanopore FAST5 files to POD5 file
|
|
6
|
-
|
|
7
|
-
Parameters:
|
|
8
|
-
fast5_dir (str): String representing the file path to a directory containing all FAST5 files to convert into a single POD5 output.
|
|
9
|
-
output_pod5 (str): The name of the output POD5.
|
|
10
|
-
|
|
11
|
-
Returns:
|
|
12
|
-
None
|
|
13
|
-
|
|
14
|
-
"""
|
|
15
|
-
import subprocess
|
|
16
|
-
from pathlib import Path
|
|
17
|
-
|
|
18
|
-
if Path(fast5_dir).is_file():
|
|
19
|
-
subprocess.run(["pod5", "convert", "fast5", fast5_dir, "--output", output_pod5])
|
|
20
|
-
elif Path(fast5_dir).is_dir():
|
|
21
|
-
subprocess.run(["pod5", "convert", "fast5", f".{fast5_dir}*.fast5", "--output", output_pod5])
|
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
## LoadExperimentConfig
|
|
2
|
-
|
|
3
|
-
class LoadExperimentConfig:
|
|
4
|
-
"""
|
|
5
|
-
Loads in the experiment configuration csv and saves global variables with experiment configuration parameters.
|
|
6
|
-
Parameters:
|
|
7
|
-
experiment_config (str): A string representing the file path to the experiment configuration csv file.
|
|
8
|
-
|
|
9
|
-
Attributes:
|
|
10
|
-
var_dict (dict): A dictionary containing experiment configuration parameters.
|
|
11
|
-
|
|
12
|
-
Example:
|
|
13
|
-
>>> import pandas as pd
|
|
14
|
-
>>> from io import StringIO
|
|
15
|
-
>>> csv_data = '''variable,value,type
|
|
16
|
-
... mapping_threshold,0.05,float
|
|
17
|
-
... batch_size,4,int
|
|
18
|
-
... testing_bool,True,bool
|
|
19
|
-
... strands,"[bottom, top]",list
|
|
20
|
-
... split_dir,split_bams,string
|
|
21
|
-
... pod5_dir,None,string
|
|
22
|
-
... pod5_dir,,string
|
|
23
|
-
... '''
|
|
24
|
-
>>> csv_file = StringIO(csv_data)
|
|
25
|
-
>>> df = pd.read_csv(csv_file)
|
|
26
|
-
>>> df.to_csv('test_config.csv', index=False)
|
|
27
|
-
>>> config_loader = LoadExperimentConfig('test_config.csv')
|
|
28
|
-
>>> config_loader.var_dict['mapping_threshold']
|
|
29
|
-
0.05
|
|
30
|
-
>>> config_loader.var_dict['batch_size']
|
|
31
|
-
4
|
|
32
|
-
>>> config_loader.var_dict['testing_bool']
|
|
33
|
-
True
|
|
34
|
-
>>> config_loader.var_dict['strands']
|
|
35
|
-
['bottom', 'top']
|
|
36
|
-
>>> config_loader.var_dict['split_dir']
|
|
37
|
-
'split_bams'
|
|
38
|
-
>>> config_loader.var_dict['pod5_dir'] is None
|
|
39
|
-
True
|
|
40
|
-
>>> config_loader.var_dict['pod5_dir'] is None
|
|
41
|
-
True
|
|
42
|
-
"""
|
|
43
|
-
def __init__(self, experiment_config):
|
|
44
|
-
import pandas as pd
|
|
45
|
-
print(f"Loading experiment config from {experiment_config}")
|
|
46
|
-
# Read the CSV into a pandas DataFrame
|
|
47
|
-
df = pd.read_csv(experiment_config)
|
|
48
|
-
# Initialize an empty dictionary to store variables
|
|
49
|
-
var_dict = {}
|
|
50
|
-
# Iterate through each row in the DataFrame
|
|
51
|
-
for _, row in df.iterrows():
|
|
52
|
-
var_name = str(row['variable'])
|
|
53
|
-
value = row['value']
|
|
54
|
-
dtype = row['type']
|
|
55
|
-
# Handle empty and None values
|
|
56
|
-
if pd.isna(value) or value in ['None', '']:
|
|
57
|
-
value = None
|
|
58
|
-
else:
|
|
59
|
-
# Handle different data types
|
|
60
|
-
if dtype == 'list':
|
|
61
|
-
# Convert the string representation of a list to an actual list
|
|
62
|
-
value = value.strip('()[]').replace(', ', ',').split(',')
|
|
63
|
-
elif dtype == 'int':
|
|
64
|
-
value = int(value)
|
|
65
|
-
elif dtype == 'float':
|
|
66
|
-
value = float(value)
|
|
67
|
-
elif dtype == 'bool':
|
|
68
|
-
value = value.lower() == 'true'
|
|
69
|
-
elif dtype == 'string':
|
|
70
|
-
value = str(value)
|
|
71
|
-
# Store the variable in the dictionary
|
|
72
|
-
var_dict[var_name] = value
|
|
73
|
-
# Save the dictionary as an attribute of the class
|
|
74
|
-
self.var_dict = var_dict
|
|
75
|
-
|
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
from .align_and_sort_BAM import align_and_sort_BAM
|
|
2
|
-
from .aligned_BAM_to_bed import aligned_BAM_to_bed
|
|
3
|
-
from .bam_qc import bam_qc
|
|
4
|
-
from .bed_to_bigwig import bed_to_bigwig
|
|
5
|
-
from .binarize_converted_base_identities import binarize_converted_base_identities
|
|
6
|
-
from .canoncall import canoncall
|
|
7
|
-
from .complement_base_list import complement_base_list
|
|
8
|
-
from .converted_BAM_to_adata_II import converted_BAM_to_adata_II
|
|
9
|
-
from .concatenate_fastqs_to_bam import concatenate_fastqs_to_bam
|
|
10
|
-
from .count_aligned_reads import count_aligned_reads
|
|
11
|
-
from .demux_and_index_BAM import demux_and_index_BAM
|
|
12
|
-
from .extract_base_identities import extract_base_identities
|
|
13
|
-
from .extract_mods import extract_mods
|
|
14
|
-
from .extract_read_features_from_bam import extract_read_features_from_bam
|
|
15
|
-
from .extract_read_lengths_from_bed import extract_read_lengths_from_bed
|
|
16
|
-
from .extract_readnames_from_BAM import extract_readnames_from_BAM
|
|
17
|
-
from .find_conversion_sites import find_conversion_sites
|
|
18
|
-
from .generate_converted_FASTA import convert_FASTA_record, generate_converted_FASTA
|
|
19
|
-
from .get_chromosome_lengths import get_chromosome_lengths
|
|
20
|
-
from .get_native_references import get_native_references
|
|
21
|
-
from .index_fasta import index_fasta
|
|
22
|
-
from .LoadExperimentConfig import LoadExperimentConfig
|
|
23
|
-
from .make_dirs import make_dirs
|
|
24
|
-
from .make_modbed import make_modbed
|
|
25
|
-
from .modcall import modcall
|
|
26
|
-
from .modkit_extract_to_adata import modkit_extract_to_adata
|
|
27
|
-
from .modQC import modQC
|
|
28
|
-
from .one_hot_encode import one_hot_encode
|
|
29
|
-
from .ohe_batching import ohe_batching
|
|
30
|
-
from .one_hot_decode import one_hot_decode
|
|
31
|
-
from .ohe_layers_decode import ohe_layers_decode
|
|
32
|
-
from .plot_read_length_and_coverage_histograms import plot_read_length_and_coverage_histograms
|
|
33
|
-
from .run_multiqc import run_multiqc
|
|
34
|
-
from .separate_bam_by_bc import separate_bam_by_bc
|
|
35
|
-
from .split_and_index_BAM import split_and_index_BAM
|
|
36
|
-
|
|
37
|
-
__all__ = [
|
|
38
|
-
"align_and_sort_BAM",
|
|
39
|
-
"aligned_BAM_to_bed",
|
|
40
|
-
"bam_qc",
|
|
41
|
-
"bed_to_bigwig",
|
|
42
|
-
"binarize_converted_base_identities",
|
|
43
|
-
"canoncall",
|
|
44
|
-
"complement_base_list",
|
|
45
|
-
"converted_BAM_to_adata_II",
|
|
46
|
-
"concatenate_fastqs_to_bam",
|
|
47
|
-
"count_aligned_reads",
|
|
48
|
-
"demux_and_index_BAM",
|
|
49
|
-
"extract_base_identities",
|
|
50
|
-
"extract_mods",
|
|
51
|
-
"extract_read_features_from_bam",
|
|
52
|
-
"extract_read_lengths_from_bed",
|
|
53
|
-
"extract_readnames_from_BAM",
|
|
54
|
-
"find_conversion_sites",
|
|
55
|
-
"convert_FASTA_record",
|
|
56
|
-
"generate_converted_FASTA",
|
|
57
|
-
"get_chromosome_lengths",
|
|
58
|
-
"get_native_references",
|
|
59
|
-
"index_fasta",
|
|
60
|
-
"LoadExperimentConfig",
|
|
61
|
-
"make_dirs",
|
|
62
|
-
"make_modbed",
|
|
63
|
-
"modcall",
|
|
64
|
-
"modkit_extract_to_adata",
|
|
65
|
-
"modQC",
|
|
66
|
-
"one_hot_encode",
|
|
67
|
-
"ohe_batching",
|
|
68
|
-
"one_hot_decode",
|
|
69
|
-
"ohe_layers_decode",
|
|
70
|
-
"plot_read_length_and_coverage_histograms",
|
|
71
|
-
"run_multiqc",
|
|
72
|
-
"separate_bam_by_bc",
|
|
73
|
-
"split_and_index_BAM"
|
|
74
|
-
]
|
|
@@ -1,59 +0,0 @@
|
|
|
1
|
-
## align_and_sort_BAM
|
|
2
|
-
|
|
3
|
-
def align_and_sort_BAM(fasta, input, bam_suffix='.bam', output_directory='aligned_outputs', make_bigwigs=False, threads=None):
|
|
4
|
-
"""
|
|
5
|
-
A wrapper for running dorado aligner and samtools functions
|
|
6
|
-
|
|
7
|
-
Parameters:
|
|
8
|
-
fasta (str): File path to the reference genome to align to.
|
|
9
|
-
input (str): File path to the basecalled file to align. Works for .bam and .fastq files
|
|
10
|
-
bam_suffix (str): The suffix to use for the BAM file.
|
|
11
|
-
output_directory (str): A file path to the directory to output all the analyses.
|
|
12
|
-
make_bigwigs (bool): Whether to make bigwigs
|
|
13
|
-
threads (int): Number of additional threads to use
|
|
14
|
-
|
|
15
|
-
Returns:
|
|
16
|
-
None
|
|
17
|
-
The function writes out files for: 1) An aligned BAM, 2) and aligned_sorted BAM, 3) an index file for the aligned_sorted BAM, 4) A bed file for the aligned_sorted BAM, 5) A text file containing read names in the aligned_sorted BAM
|
|
18
|
-
"""
|
|
19
|
-
import subprocess
|
|
20
|
-
import os
|
|
21
|
-
|
|
22
|
-
input_basename = os.path.basename(input)
|
|
23
|
-
input_suffix = '.' + input_basename.split('.')[1]
|
|
24
|
-
|
|
25
|
-
output_path_minus_suffix = os.path.join(output_directory, input_basename.split(input_suffix)[0])
|
|
26
|
-
|
|
27
|
-
aligned_BAM=f"{output_path_minus_suffix}_aligned"
|
|
28
|
-
aligned_sorted_BAM=f"{aligned_BAM}_sorted"
|
|
29
|
-
aligned_output = aligned_BAM + bam_suffix
|
|
30
|
-
aligned_sorted_output = aligned_sorted_BAM + bam_suffix
|
|
31
|
-
|
|
32
|
-
if threads:
|
|
33
|
-
threads = str(threads)
|
|
34
|
-
else:
|
|
35
|
-
pass
|
|
36
|
-
|
|
37
|
-
# Run dorado aligner
|
|
38
|
-
print(f"Aligning BAM to Reference: {input}")
|
|
39
|
-
if threads:
|
|
40
|
-
alignment_command = ["dorado", "aligner", "-t", threads, '--mm2-opts', "-N 1", fasta, input]
|
|
41
|
-
else:
|
|
42
|
-
alignment_command = ["dorado", "aligner", '--mm2-opts', "-N 1", fasta, input]
|
|
43
|
-
subprocess.run(alignment_command, stdout=open(aligned_output, "w"))
|
|
44
|
-
|
|
45
|
-
# Sort the BAM on positional coordinates
|
|
46
|
-
print(f"Sorting BAM: {aligned_output}")
|
|
47
|
-
if threads:
|
|
48
|
-
sort_command = ["samtools", "sort", "-@", threads, "-o", aligned_sorted_output, aligned_output]
|
|
49
|
-
else:
|
|
50
|
-
sort_command = ["samtools", "sort", "-o", aligned_sorted_output, aligned_output]
|
|
51
|
-
subprocess.run(sort_command)
|
|
52
|
-
|
|
53
|
-
# Create a BAM index file
|
|
54
|
-
print(f"Indexing BAM: {aligned_sorted_output}")
|
|
55
|
-
if threads:
|
|
56
|
-
index_command = ["samtools", "index", "-@", threads, aligned_sorted_output]
|
|
57
|
-
else:
|
|
58
|
-
index_command = ["samtools", "index", aligned_sorted_output]
|
|
59
|
-
subprocess.run(index_command)
|
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
def aligned_BAM_to_bed(aligned_BAM, out_dir, fasta, make_bigwigs, threads=None):
|
|
2
|
-
"""
|
|
3
|
-
Takes an aligned BAM as input and writes a BED file of reads as output.
|
|
4
|
-
Bed columns are: Record name, start position, end position, read length, read name.
|
|
5
|
-
|
|
6
|
-
Parameters:
|
|
7
|
-
aligned_BAM (str): Path to an input aligned_BAM to extract to a BED file.
|
|
8
|
-
out_dir (str): Directory to output files.
|
|
9
|
-
fasta (str): File path to the reference genome.
|
|
10
|
-
make_bigwigs (bool): Whether to generate bigwig files.
|
|
11
|
-
threads (int): Number of threads to use.
|
|
12
|
-
|
|
13
|
-
Returns:
|
|
14
|
-
None
|
|
15
|
-
"""
|
|
16
|
-
import subprocess
|
|
17
|
-
import os
|
|
18
|
-
import concurrent.futures
|
|
19
|
-
from concurrent.futures import ProcessPoolExecutor
|
|
20
|
-
from .bed_to_bigwig import bed_to_bigwig
|
|
21
|
-
from . import make_dirs
|
|
22
|
-
from .plot_read_length_and_coverage_histograms import plot_read_length_and_coverage_histograms
|
|
23
|
-
|
|
24
|
-
threads = threads or os.cpu_count() # Use max available cores if not specified
|
|
25
|
-
|
|
26
|
-
# Create necessary directories
|
|
27
|
-
plotting_dir = os.path.join(out_dir, "bed_cov_histograms")
|
|
28
|
-
bed_dir = os.path.join(out_dir, "beds")
|
|
29
|
-
make_dirs([plotting_dir, bed_dir])
|
|
30
|
-
|
|
31
|
-
bed_output = os.path.join(bed_dir, os.path.basename(aligned_BAM).replace(".bam", "_bed.bed"))
|
|
32
|
-
|
|
33
|
-
print(f"Creating BED from BAM: {aligned_BAM} using {threads} threads...")
|
|
34
|
-
|
|
35
|
-
# Convert BAM to BED format
|
|
36
|
-
with open(bed_output, "w") as output_file:
|
|
37
|
-
samtools_view = subprocess.Popen(["samtools", "view", "-@", str(threads), aligned_BAM], stdout=subprocess.PIPE)
|
|
38
|
-
awk_process = subprocess.Popen(
|
|
39
|
-
["awk", '{print $3 "\t" $4 "\t" $4+length($10)-1 "\t" length($10)-1 "\t" $1}'],
|
|
40
|
-
stdin=samtools_view.stdout,
|
|
41
|
-
stdout=output_file
|
|
42
|
-
)
|
|
43
|
-
|
|
44
|
-
samtools_view.stdout.close()
|
|
45
|
-
awk_process.wait()
|
|
46
|
-
samtools_view.wait()
|
|
47
|
-
|
|
48
|
-
print(f"BED file created: {bed_output}")
|
|
49
|
-
|
|
50
|
-
def split_bed(bed):
|
|
51
|
-
"""Splits BED into aligned and unaligned reads."""
|
|
52
|
-
aligned = bed.replace(".bed", "_aligned.bed")
|
|
53
|
-
unaligned = bed.replace(".bed", "_unaligned.bed")
|
|
54
|
-
|
|
55
|
-
with open(bed, "r") as infile, open(aligned, "w") as aligned_out, open(unaligned, "w") as unaligned_out:
|
|
56
|
-
for line in infile:
|
|
57
|
-
(unaligned_out if line.startswith("*") else aligned_out).write(line)
|
|
58
|
-
|
|
59
|
-
os.remove(bed)
|
|
60
|
-
return aligned
|
|
61
|
-
|
|
62
|
-
print(f"Splitting BED: {bed_output}")
|
|
63
|
-
aligned_bed = split_bed(bed_output)
|
|
64
|
-
|
|
65
|
-
with ProcessPoolExecutor() as executor: # Use processes instead of threads
|
|
66
|
-
futures = []
|
|
67
|
-
futures.append(executor.submit(plot_read_length_and_coverage_histograms, aligned_bed, plotting_dir))
|
|
68
|
-
if make_bigwigs:
|
|
69
|
-
futures.append(executor.submit(bed_to_bigwig, fasta, aligned_bed))
|
|
70
|
-
|
|
71
|
-
# Wait for all tasks to complete
|
|
72
|
-
concurrent.futures.wait(futures)
|
|
73
|
-
|
|
74
|
-
print("Processing completed successfully.")
|
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
## bam_qc
|
|
2
|
-
|
|
3
|
-
def bam_qc(bam_files, bam_qc_dir, threads, modality, stats=True, flagstats=True, idxstats=True):
|
|
4
|
-
"""
|
|
5
|
-
Performs QC on BAM files by running samtools stats, flagstat, and idxstats.
|
|
6
|
-
|
|
7
|
-
Parameters:
|
|
8
|
-
- bam_files: List of BAM file paths.
|
|
9
|
-
- bam_qc_dir: Directory to save QC reports.
|
|
10
|
-
- threads: Number threads to use.
|
|
11
|
-
- modality: 'conversion' or 'direct' (affects processing mode).
|
|
12
|
-
- stats: Run `samtools stats` if True.
|
|
13
|
-
- flagstats: Run `samtools flagstat` if True.
|
|
14
|
-
- idxstats: Run `samtools idxstats` if True.
|
|
15
|
-
"""
|
|
16
|
-
import os
|
|
17
|
-
import subprocess
|
|
18
|
-
|
|
19
|
-
# Ensure the QC output directory exists
|
|
20
|
-
os.makedirs(bam_qc_dir, exist_ok=True)
|
|
21
|
-
|
|
22
|
-
if threads:
|
|
23
|
-
threads = str(threads)
|
|
24
|
-
else:
|
|
25
|
-
pass
|
|
26
|
-
|
|
27
|
-
for bam in bam_files:
|
|
28
|
-
bam_name = os.path.basename(bam).replace(".bam", "") # Extract filename without extension
|
|
29
|
-
|
|
30
|
-
# Run samtools QC commands based on selected options
|
|
31
|
-
if stats:
|
|
32
|
-
stats_out = os.path.join(bam_qc_dir, f"{bam_name}_stats.txt")
|
|
33
|
-
if threads:
|
|
34
|
-
command = ["samtools", "stats", "-@", threads, bam]
|
|
35
|
-
else:
|
|
36
|
-
command = ["samtools", "stats", bam]
|
|
37
|
-
print(f"Running: {' '.join(command)} > {stats_out}")
|
|
38
|
-
with open(stats_out, "w") as out_file:
|
|
39
|
-
subprocess.run(command, stdout=out_file)
|
|
40
|
-
|
|
41
|
-
if flagstats:
|
|
42
|
-
flagstats_out = os.path.join(bam_qc_dir, f"{bam_name}_flagstat.txt")
|
|
43
|
-
if threads:
|
|
44
|
-
command = ["samtools", "flagstat", "-@", threads, bam]
|
|
45
|
-
else:
|
|
46
|
-
command = ["samtools", "flagstat", bam]
|
|
47
|
-
print(f"Running: {' '.join(command)} > {flagstats_out}")
|
|
48
|
-
with open(flagstats_out, "w") as out_file:
|
|
49
|
-
subprocess.run(command, stdout=out_file)
|
|
50
|
-
|
|
51
|
-
if idxstats:
|
|
52
|
-
idxstats_out = os.path.join(bam_qc_dir, f"{bam_name}_idxstats.txt")
|
|
53
|
-
if threads:
|
|
54
|
-
command = ["samtools", "idxstats", "-@", threads, bam]
|
|
55
|
-
else:
|
|
56
|
-
command = ["samtools", "idxstats", bam]
|
|
57
|
-
print(f"Running: {' '.join(command)} > {idxstats_out}")
|
|
58
|
-
with open(idxstats_out, "w") as out_file:
|
|
59
|
-
subprocess.run(command, stdout=out_file)
|
|
60
|
-
|
|
61
|
-
if modality == 'conversion':
|
|
62
|
-
pass
|
|
63
|
-
elif modality == 'direct':
|
|
64
|
-
pass
|
|
65
|
-
|
|
66
|
-
print("QC processing completed.")
|