smftools 0.1.0__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- smftools/__init__.py +0 -2
- smftools/_settings.py +3 -2
- smftools/_version.py +1 -0
- smftools/datasets/F1_sample_sheet.csv +5 -0
- smftools/datasets/datasets.py +14 -11
- smftools/informatics/__init__.py +10 -7
- smftools/informatics/archived/bam_conversion.py +59 -0
- smftools/informatics/archived/bam_direct.py +63 -0
- smftools/informatics/archived/basecalls_to_adata.py +71 -0
- smftools/informatics/conversion_smf.py +79 -0
- smftools/informatics/direct_smf.py +89 -0
- smftools/informatics/fast5_to_pod5.py +21 -0
- smftools/informatics/helpers/LoadExperimentConfig.py +74 -0
- smftools/informatics/helpers/__init__.py +22 -4
- smftools/informatics/helpers/align_and_sort_BAM.py +48 -0
- smftools/informatics/helpers/aligned_BAM_to_bed.py +73 -0
- smftools/informatics/helpers/bed_to_bigwig.py +39 -0
- smftools/informatics/helpers/binarize_converted_base_identities.py +11 -4
- smftools/informatics/helpers/canoncall.py +14 -1
- smftools/informatics/helpers/complement_base_list.py +21 -0
- smftools/informatics/helpers/concatenate_fastqs_to_bam.py +54 -0
- smftools/informatics/helpers/converted_BAM_to_adata.py +183 -97
- smftools/informatics/helpers/count_aligned_reads.py +25 -14
- smftools/informatics/helpers/extract_base_identities.py +44 -23
- smftools/informatics/helpers/extract_mods.py +17 -5
- smftools/informatics/helpers/extract_readnames_from_BAM.py +22 -0
- smftools/informatics/helpers/find_conversion_sites.py +24 -16
- smftools/informatics/helpers/generate_converted_FASTA.py +60 -21
- smftools/informatics/helpers/get_chromosome_lengths.py +32 -0
- smftools/informatics/helpers/get_native_references.py +10 -7
- smftools/informatics/helpers/index_fasta.py +12 -0
- smftools/informatics/helpers/make_dirs.py +9 -3
- smftools/informatics/helpers/make_modbed.py +10 -4
- smftools/informatics/helpers/modQC.py +10 -2
- smftools/informatics/helpers/modcall.py +16 -2
- smftools/informatics/helpers/modkit_extract_to_adata.py +486 -323
- smftools/informatics/helpers/ohe_batching.py +52 -0
- smftools/informatics/helpers/one_hot_encode.py +15 -8
- smftools/informatics/helpers/plot_read_length_and_coverage_histograms.py +52 -0
- smftools/informatics/helpers/separate_bam_by_bc.py +20 -5
- smftools/informatics/helpers/split_and_index_BAM.py +31 -11
- smftools/informatics/load_adata.py +127 -0
- smftools/informatics/readwrite.py +13 -16
- smftools/informatics/subsample_fasta_from_bed.py +47 -0
- smftools/informatics/subsample_pod5.py +104 -0
- smftools/preprocessing/__init__.py +6 -7
- smftools/preprocessing/append_C_context.py +52 -22
- smftools/preprocessing/binarize_on_Youden.py +8 -4
- smftools/preprocessing/binary_layers_to_ohe.py +9 -4
- smftools/preprocessing/calculate_complexity.py +26 -14
- smftools/preprocessing/calculate_consensus.py +47 -0
- smftools/preprocessing/calculate_converted_read_methylation_stats.py +69 -11
- smftools/preprocessing/calculate_coverage.py +14 -8
- smftools/preprocessing/calculate_pairwise_hamming_distances.py +11 -6
- smftools/preprocessing/calculate_position_Youden.py +21 -12
- smftools/preprocessing/calculate_read_length_stats.py +67 -8
- smftools/preprocessing/clean_NaN.py +13 -6
- smftools/preprocessing/filter_converted_reads_on_methylation.py +15 -6
- smftools/preprocessing/filter_reads_on_length.py +16 -6
- smftools/preprocessing/invert_adata.py +10 -5
- smftools/preprocessing/load_sample_sheet.py +24 -0
- smftools/preprocessing/make_dirs.py +21 -0
- smftools/preprocessing/mark_duplicates.py +54 -30
- smftools/preprocessing/min_non_diagonal.py +9 -4
- smftools/preprocessing/recipes.py +125 -0
- smftools/preprocessing/remove_duplicates.py +15 -6
- smftools/readwrite.py +13 -16
- smftools/tools/apply_HMM.py +1 -0
- smftools/tools/cluster.py +0 -0
- smftools/tools/read_HMM.py +1 -0
- smftools/tools/subset_adata.py +32 -0
- smftools/tools/train_HMM.py +43 -0
- smftools-0.1.3.dist-info/METADATA +94 -0
- smftools-0.1.3.dist-info/RECORD +84 -0
- smftools/informatics/helpers/align_BAM.py +0 -49
- smftools/informatics/helpers/load_experiment_config.py +0 -17
- smftools/informatics/pod5_conversion.py +0 -26
- smftools/informatics/pod5_direct.py +0 -29
- smftools/informatics/pod5_to_adata.py +0 -17
- smftools-0.1.0.dist-info/METADATA +0 -75
- smftools-0.1.0.dist-info/RECORD +0 -58
- /smftools/informatics/helpers/{informatics.py → archived/informatics.py} +0 -0
- /smftools/informatics/helpers/{load_adata.py → archived/load_adata.py} +0 -0
- /smftools/preprocessing/{preprocessing.py → archives/preprocessing.py} +0 -0
- {smftools-0.1.0.dist-info → smftools-0.1.3.dist-info}/WHEEL +0 -0
- {smftools-0.1.0.dist-info → smftools-0.1.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
smftools/__init__.py,sha256=zy4ckT7hKrLrlm6NiZQoupvc6oSN7wJsyOBCYdzukcQ,401
|
|
2
|
+
smftools/_settings.py,sha256=Ed8lzKUA5ncq5ZRfSp0t6_rphEEjMxts6guttwTZP5Y,409
|
|
3
|
+
smftools/_version.py,sha256=R5TtpJu7Qu6sOarfDpp-5Oyy8Pi2Ir3VewCvsCQiAgo,21
|
|
4
|
+
smftools/readwrite.py,sha256=DgVisHYdkjzaO7suPbUvluImeTc3jqGDlioNveHUxPc,4158
|
|
5
|
+
smftools/datasets/F1_hybrid_NKG2A_enhander_promoter_GpC_conversion_SMF.h5ad.gz,sha256=q6wJtgFRDln0o20XNCx1qad3lwcdCoylqPN7wskTfI8,2926497
|
|
6
|
+
smftools/datasets/F1_sample_sheet.csv,sha256=9PodIIOXK2eamYPbC6DGnXdzgi9bRDovf296j1aM0ak,259
|
|
7
|
+
smftools/datasets/__init__.py,sha256=xkSTlPuakVYVCuRurif9BceNBDt6bsngJvvjI8757QI,142
|
|
8
|
+
smftools/datasets/dCas9_m6A_invitro_kinetics.h5ad.gz,sha256=niOcVHaYY7h3XyvwSkN-V_NMBaRt2vTP5TrJO0CwMCs,8385050
|
|
9
|
+
smftools/datasets/datasets.py,sha256=0y597Ntp707bOgDwN6O-JEt9yxgplj66p0aj6Zs_IB4,779
|
|
10
|
+
smftools/informatics/__init__.py,sha256=WQiMBr1yjDrlmHg8UNgW2MJsq4fPrVfh-UBr5tYI9x4,326
|
|
11
|
+
smftools/informatics/conversion_smf.py,sha256=PS-TjgMttr3VRrT0zg5L_L01xMOewB_OXSsQyoM7DWI,4333
|
|
12
|
+
smftools/informatics/direct_smf.py,sha256=ue7p7deuRwaZtEh9EFV1YTE8HKRAmOsx9oaRJdjCrbY,4697
|
|
13
|
+
smftools/informatics/fast5_to_pod5.py,sha256=xfdZU3QluaAcR-q2uBRz8hcBwYt73nCnrFeahvi0OKQ,704
|
|
14
|
+
smftools/informatics/load_adata.py,sha256=i-2YCSaeLzbPfNtKPrLwfkv-9u_TrTAZrbtNAj3FRWY,7271
|
|
15
|
+
smftools/informatics/readwrite.py,sha256=DgVisHYdkjzaO7suPbUvluImeTc3jqGDlioNveHUxPc,4158
|
|
16
|
+
smftools/informatics/subsample_fasta_from_bed.py,sha256=YqYV09rvEQdeiS5hTTrKa8xYmJfeM3Vk-UUqwpw0qBk,1983
|
|
17
|
+
smftools/informatics/subsample_pod5.py,sha256=zDw9tRcrFRmPI62xkcy9dh8IfsJcuYm7R-FVeBC_g3s,4701
|
|
18
|
+
smftools/informatics/archived/bam_conversion.py,sha256=I8EzXjQixMmqx2oWnoNSH5NURBhfT-krbWHkoi_M964,3330
|
|
19
|
+
smftools/informatics/archived/bam_direct.py,sha256=jbEFtUIiUR8Wlp3po_sWkr19AUNS9WZjglojb9j28vo,3606
|
|
20
|
+
smftools/informatics/archived/basecalls_to_adata.py,sha256=-Nag6lr_NAtU4t8jo0GSMdgIAIfmDge-5VEUPQbEatE,3692
|
|
21
|
+
smftools/informatics/helpers/LoadExperimentConfig.py,sha256=gsWGoa9cydwY4Kd-hTXF2gtmxc8glRRD2V1JB88e9js,2822
|
|
22
|
+
smftools/informatics/helpers/__init__.py,sha256=KrfyM08_RgDf3Ajvb4KNTvcOqZiWYSIVhEznCr01Gcc,2255
|
|
23
|
+
smftools/informatics/helpers/align_and_sort_BAM.py,sha256=DouG6nGWXtz2ulZD5p0sEShE-4dbPudHaWcHFm4-oJA,2184
|
|
24
|
+
smftools/informatics/helpers/aligned_BAM_to_bed.py,sha256=eYkGQFSM2gPEauASkY_-9Yvy6727vP8Q4wx_st85Dpc,2638
|
|
25
|
+
smftools/informatics/helpers/bed_to_bigwig.py,sha256=AazYEZzKgKgukSFwCpeiApzxh1kbt11X4RFqRIiBIaY,1466
|
|
26
|
+
smftools/informatics/helpers/binarize_converted_base_identities.py,sha256=iJlDah-YJ0zx0UrlHdtgvrALVNSA0TTTdDoKmNCVg0Q,1846
|
|
27
|
+
smftools/informatics/helpers/canoncall.py,sha256=M7HEqhYsWMUB0tLP3hzMM0L7PhcOTXgetl5lV3GgIaw,1062
|
|
28
|
+
smftools/informatics/helpers/complement_base_list.py,sha256=k6EkLtxFoajaIufxw1p0pShJ2nPHyGLTbzZmIFFjB4o,532
|
|
29
|
+
smftools/informatics/helpers/concatenate_fastqs_to_bam.py,sha256=RXPn7e6Dcwol9tnUsfXJu3EuZcMSOJJo5LNWouovvZs,2715
|
|
30
|
+
smftools/informatics/helpers/converted_BAM_to_adata.py,sha256=Rsnydzpf9lMS3TQjXpbXJSSfCzhVTPn3rBDLiK-8utA,13991
|
|
31
|
+
smftools/informatics/helpers/count_aligned_reads.py,sha256=uYyUYglF1asiaoxr-LKxPMUEbfyD7FS-dumTg2hJHzQ,2170
|
|
32
|
+
smftools/informatics/helpers/extract_base_identities.py,sha256=E-_m9W82N52NjX5kz9Af5YH0S2k58hnq9KTrm4S5vgM,4370
|
|
33
|
+
smftools/informatics/helpers/extract_mods.py,sha256=UBFjXDKz_A6ivjcocYT1_pKjvygY2Fdg0RjQmMS8UuA,2269
|
|
34
|
+
smftools/informatics/helpers/extract_readnames_from_BAM.py,sha256=3FxSNqbZ1VsOK2RfHrvevQTzhWATf5E8bZ5yVOqayvk,759
|
|
35
|
+
smftools/informatics/helpers/find_conversion_sites.py,sha256=5AghDQzEoSvE2Og98VsKoeWUFSLnIGY1LnRu1BtQavM,3700
|
|
36
|
+
smftools/informatics/helpers/generate_converted_FASTA.py,sha256=ueaAsFnBuc7zKwkBivBR3DJg4DtkxkHHIQcVVSWzv-w,5161
|
|
37
|
+
smftools/informatics/helpers/get_chromosome_lengths.py,sha256=sLumLrGsU_Xg_oJcdOpQyjUGpJoT2HbcmxWwbwzXUlE,1036
|
|
38
|
+
smftools/informatics/helpers/get_native_references.py,sha256=fRuyEm9UJkfd5DwHmFb1bxEtNvtSI1_BxGRmrCymGkw,981
|
|
39
|
+
smftools/informatics/helpers/index_fasta.py,sha256=N3IErfSiavYldeaat8xcQgA1MpykoQHcE0gHUeWuClE,267
|
|
40
|
+
smftools/informatics/helpers/make_dirs.py,sha256=lWHXpwC76MFM5sSme9i_WeYUaxutzybendokhny03ds,537
|
|
41
|
+
smftools/informatics/helpers/make_modbed.py,sha256=cOQ97gPfRiCcw_fqboxousXIiOYjp78IFYLbu749U1Y,939
|
|
42
|
+
smftools/informatics/helpers/modQC.py,sha256=LeOBObG8gAVVdgESIMceYhd5AW1gfN7ABo91OQtOzTM,1041
|
|
43
|
+
smftools/informatics/helpers/modcall.py,sha256=9PH7Peq4y-VBqQcMkbv0TwgePBlD5aM4_FmI7H4hbQQ,1142
|
|
44
|
+
smftools/informatics/helpers/modkit_extract_to_adata.py,sha256=duPlRAIz4VWM-jm9iaLY7N6JHQcun_L0nhr2VyUjNTI,38184
|
|
45
|
+
smftools/informatics/helpers/ohe_batching.py,sha256=_Mz2p1We5PVIb8S6Hbq_hREKJ9mGQiADwfFK_NgMGhA,1909
|
|
46
|
+
smftools/informatics/helpers/one_hot_encode.py,sha256=hpZAuwa9ndkhyCm9sO65KVHE0lbFDKqRylfliEKyD4o,632
|
|
47
|
+
smftools/informatics/helpers/plot_read_length_and_coverage_histograms.py,sha256=tAnXFleGzXJNjHRAgZ0NUJuZ0P3aKmUYIrK-V9VoJKY,1860
|
|
48
|
+
smftools/informatics/helpers/separate_bam_by_bc.py,sha256=Fsi8OEmv5Ny13cWoHVV9JmEjVFEXT_ZxbBOlRdmyPbE,1742
|
|
49
|
+
smftools/informatics/helpers/split_and_index_BAM.py,sha256=_TFJ8fcLbIf37JG83hSc1zgs1yxX70-NhA8y-PbhTpo,1966
|
|
50
|
+
smftools/informatics/helpers/archived/informatics.py,sha256=gKb2ZJ_LcAeEXuQqn9e-QDF_sS4tMpMTr2vZlqa7n54,14572
|
|
51
|
+
smftools/informatics/helpers/archived/load_adata.py,sha256=DhvYYqO9VLsZqhL1WjN9sd-e3fgvdXGlgTP18z1h0L0,33654
|
|
52
|
+
smftools/plotting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
53
|
+
smftools/preprocessing/__init__.py,sha256=5FQNrj51KmaDLeAGGBA8iWMkYiSOe7O91ES8mT4aVtE,1399
|
|
54
|
+
smftools/preprocessing/append_C_context.py,sha256=pP5u9o5U4JmHras0PK6yas65u4-U5KlX3sKLb-duo80,3728
|
|
55
|
+
smftools/preprocessing/binarize_on_Youden.py,sha256=slkkt56DZ1FZWy8Un5mNJEZ49JlPnPKow2zU4GoHEr8,2303
|
|
56
|
+
smftools/preprocessing/binary_layers_to_ohe.py,sha256=931eHuVda6pMZTvC7jVTKkY2a_KQWpSfgi-nkA5NmaI,1238
|
|
57
|
+
smftools/preprocessing/calculate_complexity.py,sha256=ut60et8bmIswtiLhctJWHNseIV4ZRQultYdtJPHcRPs,3224
|
|
58
|
+
smftools/preprocessing/calculate_consensus.py,sha256=6zRpRmb2xdfDu5hctZrReALRb7Pjn8sy8xJZTm3o0nU,2442
|
|
59
|
+
smftools/preprocessing/calculate_converted_read_methylation_stats.py,sha256=Si0DcES0lLMvg3XgdKpedxfPnXQ14tEFKrOAFRn3fHs,6059
|
|
60
|
+
smftools/preprocessing/calculate_coverage.py,sha256=ZgRxQGpydxQg1exkvSiy8nHmzDIPGGqL5vL9XQ2PZQ4,2068
|
|
61
|
+
smftools/preprocessing/calculate_pairwise_hamming_distances.py,sha256=e5Mzyex7pT29H2PY014uU4Fi_eewbut1JkzC1ffBbCg,961
|
|
62
|
+
smftools/preprocessing/calculate_position_Youden.py,sha256=mfQ6nFfUaEaKg_icyHA1zZlhh0wHjpLE56BZDXOdP_4,6364
|
|
63
|
+
smftools/preprocessing/calculate_read_length_stats.py,sha256=6m362JaCKlD0QoBUMnM2qsB6Jo_4shl7xFzqU1uZccU,4945
|
|
64
|
+
smftools/preprocessing/clean_NaN.py,sha256=1vieT026p0gDJCbqB_CiLvAGGxlc-5xufoKJgZuBFFk,1150
|
|
65
|
+
smftools/preprocessing/filter_converted_reads_on_methylation.py,sha256=SN5q0rqYtYW9j3i0sVSyTv9EmR_uLKI7GkjmJixeOU0,1307
|
|
66
|
+
smftools/preprocessing/filter_reads_on_length.py,sha256=sAT66bjuI8ZtXyQc9SuPzq1dPIB1CNVx6VfWqVng4Dg,2191
|
|
67
|
+
smftools/preprocessing/invert_adata.py,sha256=u6Y70EH0B5mXb9-HuukIlzpMgZ6rhzcJuy3YZZTx3SA,684
|
|
68
|
+
smftools/preprocessing/load_sample_sheet.py,sha256=uGjzG9x-1t_1lCooH85P8Tfg80GdvVx8Jv1LPl9XNFM,915
|
|
69
|
+
smftools/preprocessing/make_dirs.py,sha256=lWHXpwC76MFM5sSme9i_WeYUaxutzybendokhny03ds,537
|
|
70
|
+
smftools/preprocessing/mark_duplicates.py,sha256=sQuPcTw8JsQoONOk-kMlAF965sIk2Pu-M7rIyfbyGGs,8145
|
|
71
|
+
smftools/preprocessing/min_non_diagonal.py,sha256=hx1asW8CEmLaIroZISW8EcAf_RnBEC_nofGD8QG0b1E,711
|
|
72
|
+
smftools/preprocessing/recipes.py,sha256=KzSw5JW0WJGzSis5Fm7moQY5PxOYl6-uYYf1NDj6nOE,7117
|
|
73
|
+
smftools/preprocessing/remove_duplicates.py,sha256=Erooi5_1VOUNfWpzddzmMNYMCl1U1jJryt7ZtMhabAs,699
|
|
74
|
+
smftools/preprocessing/archives/preprocessing.py,sha256=4mLT09A7vwRZ78FHmuwtv38mH9TQ9qrZc_WjHRhhkIw,34379
|
|
75
|
+
smftools/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
76
|
+
smftools/tools/apply_HMM.py,sha256=AuVtOki69-Xs4mhjhTXJzd49KCVXwixFyWSUgDjtR6s,11
|
|
77
|
+
smftools/tools/cluster.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
78
|
+
smftools/tools/read_HMM.py,sha256=N0MGG494VjlxYJcCVz1jN4OasGtRITZS98SJ2xB_j8k,10
|
|
79
|
+
smftools/tools/subset_adata.py,sha256=qyU9iCal03edb5aUS3AZ2U4TlL3uQ42jGI9hX3QF7Fc,1047
|
|
80
|
+
smftools/tools/train_HMM.py,sha256=x5ZcXj-heWQqDOX86nuuDoj1tPkYKl04fYA1fCKNQ0c,1380
|
|
81
|
+
smftools-0.1.3.dist-info/METADATA,sha256=u26Og8tpAF2TgXZztotk3Q4EuP7Fvf73s1tlIjBDD-A,6410
|
|
82
|
+
smftools-0.1.3.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
83
|
+
smftools-0.1.3.dist-info/licenses/LICENSE,sha256=F8LwmL6vMPddaCt1z1S83Kh_OZv50alTlY7BvVx1RXw,1066
|
|
84
|
+
smftools-0.1.3.dist-info/RECORD,,
|
|
@@ -1,49 +0,0 @@
|
|
|
1
|
-
## align_BAM
|
|
2
|
-
import subprocess
|
|
3
|
-
|
|
4
|
-
def align_BAM(fasta, bam, bam_suffix):
|
|
5
|
-
"""
|
|
6
|
-
A wrapper for running dorado aligner and samtools functions
|
|
7
|
-
"""
|
|
8
|
-
aligned_BAM=f"{bam}_aligned"
|
|
9
|
-
aligned_sorted_BAM=f"{aligned_BAM}_sorted"
|
|
10
|
-
output = bam + bam_suffix
|
|
11
|
-
aligned_output = aligned_BAM + bam_suffix
|
|
12
|
-
aligned_sorted_output = aligned_sorted_BAM + bam_suffix
|
|
13
|
-
|
|
14
|
-
# Run dorado aligner
|
|
15
|
-
subprocess.run([
|
|
16
|
-
"dorado", "aligner",
|
|
17
|
-
"--secondary=no",
|
|
18
|
-
fasta,
|
|
19
|
-
output
|
|
20
|
-
], stdout=open(aligned_output, "w"))
|
|
21
|
-
|
|
22
|
-
# Sort the BAM on positional coordinates
|
|
23
|
-
subprocess.run([
|
|
24
|
-
"samtools", "sort",
|
|
25
|
-
"-o", aligned_sorted_output,
|
|
26
|
-
aligned_output
|
|
27
|
-
])
|
|
28
|
-
|
|
29
|
-
# Create a BAM index file
|
|
30
|
-
subprocess.run([
|
|
31
|
-
"samtools", "index",
|
|
32
|
-
aligned_sorted_output
|
|
33
|
-
])
|
|
34
|
-
|
|
35
|
-
# Make a bed file of coordinates for the BAM
|
|
36
|
-
subprocess.run([
|
|
37
|
-
"samtools", "view",
|
|
38
|
-
aligned_sorted_output
|
|
39
|
-
], stdout=subprocess.PIPE) | subprocess.run([
|
|
40
|
-
"awk", '{print $3, $4, $4+length($10)-1}'
|
|
41
|
-
], stdin=subprocess.PIPE, stdout=open(f"{aligned_sorted_BAM}_bed.bed", "w"))
|
|
42
|
-
|
|
43
|
-
# Make a text file of reads for the BAM
|
|
44
|
-
subprocess.run([
|
|
45
|
-
"samtools", "view",
|
|
46
|
-
aligned_sorted_output
|
|
47
|
-
], stdout=subprocess.PIPE) | subprocess.run([
|
|
48
|
-
"cut", "-f1"
|
|
49
|
-
], stdin=subprocess.PIPE, stdout=open(f"aligned_sorted_BAM_read_names.txt", "w"))
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
## load_experiment_config
|
|
2
|
-
import csv
|
|
3
|
-
|
|
4
|
-
def load_experiment_config(experiment_config):
|
|
5
|
-
"""
|
|
6
|
-
Loads in the experiment configuration csv and saves global variables with experiment configuration parameters
|
|
7
|
-
"""
|
|
8
|
-
with open(experiment_config, mode='r', encoding='utf-8-sig') as csvfile:
|
|
9
|
-
reader = csv.DictReader(csvfile)
|
|
10
|
-
for row in reader:
|
|
11
|
-
# Extract variable name and value from each row
|
|
12
|
-
var_name = row['variable']
|
|
13
|
-
value = row['value']
|
|
14
|
-
|
|
15
|
-
# Alternatively, set it directly in the globals() dictionary
|
|
16
|
-
globals()[var_name] = value
|
|
17
|
-
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
## pod5_conversion
|
|
2
|
-
from .helpers import align_BAM, canoncall, converted_BAM_to_adata, generate_converted_FASTA, split_and_index_BAM
|
|
3
|
-
import subprocess
|
|
4
|
-
|
|
5
|
-
def pod5_conversion(fasta, output_directory, conversion_types, strands, model, pod5_dir, split_dir, barcode_kit, mapping_threshold, experiment_name, bam_suffix):
|
|
6
|
-
"""
|
|
7
|
-
Converts a POD5 file from a nanopore conversion SMF experiment to an adata object
|
|
8
|
-
"""
|
|
9
|
-
bam=f"{output_directory}/HAC_basecalls"
|
|
10
|
-
aligned_BAM=f"{bam}_aligned"
|
|
11
|
-
aligned_sorted_BAM=f"{aligned_BAM}_sorted"
|
|
12
|
-
# 1) Convert FASTA file
|
|
13
|
-
converted_FASTA=fasta.split('.fa')[0]+'_converted.fasta'
|
|
14
|
-
generate_converted_FASTA(fasta, conversion_types, strands, converted_FASTA)
|
|
15
|
-
|
|
16
|
-
# 2) Basecall from the input POD5 to generate a singular output BAM
|
|
17
|
-
canoncall(model, pod5_dir, barcode_kit, bam, bam_suffix)
|
|
18
|
-
|
|
19
|
-
# 3) Align the BAM to the converted reference FASTA and sort the bam on positional coordinates. Also make an index and a bed file of mapped reads
|
|
20
|
-
align_BAM(converted_FASTA, bam, bam_suffix)
|
|
21
|
-
|
|
22
|
-
### 4) Split the aligned and sorted BAM files by barcode (BC Tag) into the split_BAM directory###
|
|
23
|
-
split_and_index_BAM(aligned_sorted_BAM, split_dir, bam_suffix)
|
|
24
|
-
|
|
25
|
-
# 5) Take the converted BAM and load it into an adata object.
|
|
26
|
-
converted_BAM_to_adata(converted_FASTA, split_dir, mapping_threshold, experiment_name, conversion_types, bam_suffix)
|
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
## pod5_direct
|
|
2
|
-
from .helpers import align_BAM, extract_mods, make_modbed, modcall, modkit_extract_to_adata, modQC, split_and_index_BAM
|
|
3
|
-
|
|
4
|
-
def pod5_direct(fasta, output_directory, mod_list, model, thresholds, pod5_dir, split_dir, barcode_kit, mapping_threshold, experiment_name, bam_suffix, batch_size):
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
"""
|
|
8
|
-
bam=f"{output_directory}/HAC_mod_calls"
|
|
9
|
-
aligned_BAM=f"{bam}_aligned"
|
|
10
|
-
aligned_sorted_BAM=f"{aligned_BAM}_sorted"
|
|
11
|
-
mod_bed_dir=f"{output_directory}/split_mod_beds"
|
|
12
|
-
mod_tsv_dir=f"{output_directory}/split_mod_tsvs"
|
|
13
|
-
|
|
14
|
-
aligned_sorted_output = aligned_sorted_BAM + bam_suffix
|
|
15
|
-
mod_map = {'6mA': '6mA', '5mC_5hmC': '5mC'}
|
|
16
|
-
mods = [mod_map[mod] for mod in mod_list]
|
|
17
|
-
|
|
18
|
-
# 1) Basecall using dorado
|
|
19
|
-
modcall(model, pod5_dir, barcode_kit, mod_list, bam, bam_suffix)
|
|
20
|
-
# 2) Align the BAM to the converted reference FASTA. Also make an index and a bed file of mapped reads
|
|
21
|
-
align_BAM(fasta, bam, bam_suffix)
|
|
22
|
-
# 3) Split the aligned and sorted BAM files by barcode (BC Tag) into the split_BAM directory
|
|
23
|
-
split_and_index_BAM(aligned_sorted_BAM, split_dir, bam_suffix)
|
|
24
|
-
# 4) Using nanopore modkit to work with modified BAM files ###
|
|
25
|
-
modQC(aligned_sorted_output, thresholds) # get QC metrics for mod calls
|
|
26
|
-
make_modbed(aligned_sorted_output, thresholds, mod_bed_dir) # Generate bed files of position methylation summaries for every sample
|
|
27
|
-
extract_mods(thresholds, mod_tsv_dir, split_dir, bam_suffix) # Extract methylations calls for split BAM files into split TSV files
|
|
28
|
-
#5 Load the modification data from TSVs into an adata object
|
|
29
|
-
modkit_extract_to_adata(fasta, aligned_sorted_output, mapping_threshold, experiment_name, mods, batch_size)
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
## pod5_to_adata
|
|
2
|
-
from .helpers import load_experiment_config
|
|
3
|
-
from.pod5_direct import pod5_direct
|
|
4
|
-
from.pod5_conversion import pod5_conversion
|
|
5
|
-
|
|
6
|
-
def pod5_to_adata(config_path, ):
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
"""
|
|
10
|
-
# Load experiment config parameters into global variables
|
|
11
|
-
load_experiment_config(config_path)
|
|
12
|
-
if smf_modality == 'conversion':
|
|
13
|
-
(fasta, output_directory, conversion_types, strands, model, pod5_dir, split_dir, barcode_kit, mapping_threshold, experiment_name, bam_suffix)
|
|
14
|
-
elif smf_modality == 'direct':
|
|
15
|
-
pod5_direct(fasta, output_directory, mod_list, model, thresholds, pod5_dir, split_dir, barcode_kit, mapping_threshold, experiment_name, bam_suffix, batch_size)
|
|
16
|
-
else:
|
|
17
|
-
print("Error")
|
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.3
|
|
2
|
-
Name: smftools
|
|
3
|
-
Version: 0.1.0
|
|
4
|
-
Summary: Single Molecule Footprinting Analysis in Python.
|
|
5
|
-
Project-URL: Source, https://github.com/jkmckenna/smftools
|
|
6
|
-
Author: Joseph McKenna
|
|
7
|
-
Maintainer-email: Joseph McKenna <jkmckenna@berkeley.edu>
|
|
8
|
-
License-Expression: MIT
|
|
9
|
-
License-File: LICENSE
|
|
10
|
-
Keywords: anndata,chromatin-accessibility,machine-learning,nanopore,protein-dna-binding,single-locus,single-molecule-footprinting
|
|
11
|
-
Classifier: Development Status :: 2 - Pre-Alpha
|
|
12
|
-
Classifier: Environment :: Console
|
|
13
|
-
Classifier: Intended Audience :: Developers
|
|
14
|
-
Classifier: Intended Audience :: Science/Research
|
|
15
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
-
Classifier: Natural Language :: English
|
|
17
|
-
Classifier: Operating System :: MacOS :: MacOS X
|
|
18
|
-
Classifier: Programming Language :: Python :: 3
|
|
19
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
20
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
21
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
22
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
23
|
-
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
24
|
-
Classifier: Topic :: Scientific/Engineering :: Visualization
|
|
25
|
-
Requires-Python: >=3.9
|
|
26
|
-
Requires-Dist: anndata>=0.10.0
|
|
27
|
-
Requires-Dist: biopython>=1.79
|
|
28
|
-
Requires-Dist: cython>=0.29.28
|
|
29
|
-
Requires-Dist: networkx>=3.2
|
|
30
|
-
Requires-Dist: numpy<2,>=1.22.0
|
|
31
|
-
Requires-Dist: pandas>=1.4.2
|
|
32
|
-
Requires-Dist: pomegranate>1.0.0
|
|
33
|
-
Requires-Dist: pysam>=0.19.1
|
|
34
|
-
Requires-Dist: scanpy>=1.9
|
|
35
|
-
Requires-Dist: scikit-learn>=1.0.2
|
|
36
|
-
Requires-Dist: scipy>=1.7.3
|
|
37
|
-
Requires-Dist: seaborn>=0.11
|
|
38
|
-
Requires-Dist: tqdm
|
|
39
|
-
Provides-Extra: base-tests
|
|
40
|
-
Requires-Dist: pytest; extra == 'base-tests'
|
|
41
|
-
Requires-Dist: pytest-cov; extra == 'base-tests'
|
|
42
|
-
Provides-Extra: doc
|
|
43
|
-
Requires-Dist: ipython>=7.20; extra == 'doc'
|
|
44
|
-
Requires-Dist: matplotlib!=3.6.1; extra == 'doc'
|
|
45
|
-
Requires-Dist: myst-nb>=1; extra == 'doc'
|
|
46
|
-
Requires-Dist: myst-parser>=2; extra == 'doc'
|
|
47
|
-
Requires-Dist: nbsphinx>=0.9; extra == 'doc'
|
|
48
|
-
Requires-Dist: readthedocs-sphinx-search; extra == 'doc'
|
|
49
|
-
Requires-Dist: setuptools; extra == 'doc'
|
|
50
|
-
Requires-Dist: sphinx-autodoc-typehints>=1.25.2; extra == 'doc'
|
|
51
|
-
Requires-Dist: sphinx-book-theme>=1.1.0; extra == 'doc'
|
|
52
|
-
Requires-Dist: sphinx-copybutton; extra == 'doc'
|
|
53
|
-
Requires-Dist: sphinx-design; extra == 'doc'
|
|
54
|
-
Requires-Dist: sphinx>=7; extra == 'doc'
|
|
55
|
-
Requires-Dist: sphinxcontrib-bibtex; extra == 'doc'
|
|
56
|
-
Requires-Dist: sphinxext-opengraph; extra == 'doc'
|
|
57
|
-
Provides-Extra: torch
|
|
58
|
-
Requires-Dist: pomeganate>=1.0.0; extra == 'torch'
|
|
59
|
-
Requires-Dist: torch>=1.9.0; extra == 'torch'
|
|
60
|
-
Provides-Extra: torch-tests
|
|
61
|
-
Requires-Dist: pomeganate>=1.0.0; extra == 'torch-tests'
|
|
62
|
-
Requires-Dist: pytest; extra == 'torch-tests'
|
|
63
|
-
Requires-Dist: pytest-cov; extra == 'torch-tests'
|
|
64
|
-
Requires-Dist: torch>=1.9.0; extra == 'torch-tests'
|
|
65
|
-
Description-Content-Type: text/markdown
|
|
66
|
-
|
|
67
|
-
# smftools
|
|
68
|
-
A tool for processing raw sequencing data for single molecule footprinting experiments at single genomic loci.
|
|
69
|
-
|
|
70
|
-
## Dependencies
|
|
71
|
-
The following tools need to be installed and configured:
|
|
72
|
-
1) [Dorado](https://github.com/nanoporetech/dorado) -> For standard/modified basecalling and alignment. Can be attained by downloading and configuring nanopore MinKnow software.
|
|
73
|
-
2) [Samtools](https://github.com/samtools/samtools) -> For working with SAM/BAM files
|
|
74
|
-
3) [Minimap2](https://github.com/lh3/minimap2) -> The aligner used by Dorado
|
|
75
|
-
4) [Modkit](https://github.com/nanoporetech/modkit) -> Extracting summary statistics and read level methylation calls from modified BAM files
|
smftools-0.1.0.dist-info/RECORD
DELETED
|
@@ -1,58 +0,0 @@
|
|
|
1
|
-
smftools/__init__.py,sha256=pWcysCXCokCdW4YySaA8BMumZkE56m15otMPG88nQGc,444
|
|
2
|
-
smftools/_settings.py,sha256=a1uYWNBNtQb30cGSdpjeiIMnQV1Fip7IZAQrNzjXR5w,324
|
|
3
|
-
smftools/readwrite.py,sha256=p-K_RYOrM0vDawBTcuCUyuwVzmYwJqNMvhv9fCTLDKE,4159
|
|
4
|
-
smftools/datasets/F1_hybrid_NKG2A_enhander_promoter_GpC_conversion_SMF.h5ad.gz,sha256=q6wJtgFRDln0o20XNCx1qad3lwcdCoylqPN7wskTfI8,2926497
|
|
5
|
-
smftools/datasets/__init__.py,sha256=xkSTlPuakVYVCuRurif9BceNBDt6bsngJvvjI8757QI,142
|
|
6
|
-
smftools/datasets/dCas9_m6A_invitro_kinetics.h5ad.gz,sha256=niOcVHaYY7h3XyvwSkN-V_NMBaRt2vTP5TrJO0CwMCs,8385050
|
|
7
|
-
smftools/datasets/datasets.py,sha256=rAcp7_Raa8Uv95DISj-oACY1fE_5fIfb5Poj-9WVOWo,473
|
|
8
|
-
smftools/informatics/__init__.py,sha256=Bjufdncl978d-tNriuRHX92mjeAO5axjTlZP7iePjms,235
|
|
9
|
-
smftools/informatics/pod5_conversion.py,sha256=m_qNRSNeUndl5KO8PJPMLCOWqcVq-TJSvoyUnJW-UHE,1399
|
|
10
|
-
smftools/informatics/pod5_direct.py,sha256=MGQkpHI2qQuEO0IDFOiXjL0Pq59oC1DwUaIWH7jTHiU,1707
|
|
11
|
-
smftools/informatics/pod5_to_adata.py,sha256=R31bkGbparRGpYpTZ69znQThK1xCGChlf4oP836IC9Y,722
|
|
12
|
-
smftools/informatics/readwrite.py,sha256=p-K_RYOrM0vDawBTcuCUyuwVzmYwJqNMvhv9fCTLDKE,4159
|
|
13
|
-
smftools/informatics/helpers/__init__.py,sha256=ws8Zyxin68L7G5R9Rna_qoBnkSNOaD1ndlcrooV2d-k,1466
|
|
14
|
-
smftools/informatics/helpers/align_BAM.py,sha256=vZpkbI-mUqd6qJaovRhuNM03s816fjd5hNEcfA0oHxo,1414
|
|
15
|
-
smftools/informatics/helpers/binarize_converted_base_identities.py,sha256=rTdk06BmU_bvuE1NOU1LGSQs9ytkl7vQjZcwMbA5Yx0,1409
|
|
16
|
-
smftools/informatics/helpers/canoncall.py,sha256=Ujz0Pkp_wW-XJyb3uB2fzVpB12c2MCOWVTg_uIOQL8c,397
|
|
17
|
-
smftools/informatics/helpers/converted_BAM_to_adata.py,sha256=7tntnkTZpNqS0WTMbO42ksxXqzk9NFpXK0q6uWSqtkM,8593
|
|
18
|
-
smftools/informatics/helpers/count_aligned_reads.py,sha256=Q9iU0zwwNZRn0oOxRl_x5OAF7YgaRkSpQBojaBccQsI,1814
|
|
19
|
-
smftools/informatics/helpers/extract_base_identities.py,sha256=nrQy8cUyOA2C8cKKL6SpZ97U0ZGhChw8Qdk0BwuxIT4,2406
|
|
20
|
-
smftools/informatics/helpers/extract_mods.py,sha256=IQdpQxh_2NStKK4kIVKa8UAcV-fVJSs_bzB3JLe0Jx4,1602
|
|
21
|
-
smftools/informatics/helpers/find_conversion_sites.py,sha256=aer63p2JHqaoB3wSK9xqpSjow7w7UyrMSJ06aKTQSiQ,3208
|
|
22
|
-
smftools/informatics/helpers/generate_converted_FASTA.py,sha256=xHjspkeCiSKjqb6zUrjCG13OKW597LJ4_w33tg-wFok,3006
|
|
23
|
-
smftools/informatics/helpers/get_native_references.py,sha256=wx_RXnPwj0NGp7Tx1_hXyO8ZzQJHZwa0b3a6r3266FY,976
|
|
24
|
-
smftools/informatics/helpers/informatics.py,sha256=gKb2ZJ_LcAeEXuQqn9e-QDF_sS4tMpMTr2vZlqa7n54,14572
|
|
25
|
-
smftools/informatics/helpers/load_adata.py,sha256=DhvYYqO9VLsZqhL1WjN9sd-e3fgvdXGlgTP18z1h0L0,33654
|
|
26
|
-
smftools/informatics/helpers/load_experiment_config.py,sha256=boF524jZZKzBjc2yvAiMYvhM4OW_efXSbwU-nINKDdg,607
|
|
27
|
-
smftools/informatics/helpers/make_dirs.py,sha256=UxghbuquyXgDD-H24Ghf1B7Kfpdt04NgMs8GE6zSJ3U,475
|
|
28
|
-
smftools/informatics/helpers/make_modbed.py,sha256=m1lQUbw0W63YjiM_Tmy6QOL3GkHgvSuCrkX2Bo8sCco,741
|
|
29
|
-
smftools/informatics/helpers/modQC.py,sha256=C-WVaoLN7Dxh--JcWa4UXzhwFpf0AXrFA99IsxfVXwo,770
|
|
30
|
-
smftools/informatics/helpers/modcall.py,sha256=f41SkaXi2fYgv7B0oAFwj-9CZ0XVhvzWHfe977rT0wQ,493
|
|
31
|
-
smftools/informatics/helpers/modkit_extract_to_adata.py,sha256=TvIHDXWTO2QxArGyIU3w-dQBVyq0rQ1A4iMvV9Rb_7A,24407
|
|
32
|
-
smftools/informatics/helpers/one_hot_encode.py,sha256=jxfTNREED0YhdvwhVmRrt2BZUfiOrOUURVSOVtGypns,439
|
|
33
|
-
smftools/informatics/helpers/separate_bam_by_bc.py,sha256=AFT-v0XXuW2rRYG6FC-8gulhAA6a4YGAa0UEuMDlimc,1235
|
|
34
|
-
smftools/informatics/helpers/split_and_index_BAM.py,sha256=HqPAy5YxK0jokCWCUlUIWSLf2n-Gubkge-xxYfX4XLE,755
|
|
35
|
-
smftools/plotting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
36
|
-
smftools/preprocessing/__init__.py,sha256=ngs4XYcd9gSXwSa-gi1pWMblCRJ_uZo5MMkg5vuhDOU,1438
|
|
37
|
-
smftools/preprocessing/append_C_context.py,sha256=sfHiV3gBOum3KyZ6wFOCUcDUieW9rra3hKFkWEb8wmk,2063
|
|
38
|
-
smftools/preprocessing/binarize_on_Youden.py,sha256=CO0KnxxHmCwq7tlrBh7BuY0_6SEacR7NwRhkwaWoVUg,2056
|
|
39
|
-
smftools/preprocessing/binary_layers_to_ohe.py,sha256=zahq1YcgAeva-b1CcEVaBK4XaNmMgE6IpfiedYepI5k,876
|
|
40
|
-
smftools/preprocessing/calculate_complexity.py,sha256=H4j0mCWL-jFWZ8UoTGa5lNEcqByfwIDtlsJrpNRNkg4,2751
|
|
41
|
-
smftools/preprocessing/calculate_converted_read_methylation_stats.py,sha256=ZIOKGkbWI15RzpnfgWU4MUXFz3LlUjL_yjodGrye8-A,2626
|
|
42
|
-
smftools/preprocessing/calculate_coverage.py,sha256=Q-RjTqbYt9jc-Axk807_h0m7_oDFdewrO805FQARLUA,1852
|
|
43
|
-
smftools/preprocessing/calculate_pairwise_hamming_distances.py,sha256=l0IYlWu9RCDq4R2pJm_qGXN_RyFkIuah9fFLvg1Hti0,843
|
|
44
|
-
smftools/preprocessing/calculate_position_Youden.py,sha256=FUSsrhp8L8TLoJaX2cSl8u1phNbYpTRJfVqsrwMOWgY,6008
|
|
45
|
-
smftools/preprocessing/calculate_read_length_stats.py,sha256=kKcEw4zS-GnJ2nyC5c24YVMY2oBmxmcxjPWLGnrkwws,1711
|
|
46
|
-
smftools/preprocessing/clean_NaN.py,sha256=HCRX_nA6H3o7CysCa6yxN07xQEoh6LvdkX7aAYqSKR8,1024
|
|
47
|
-
smftools/preprocessing/filter_converted_reads_on_methylation.py,sha256=krqDb6TNjQx4IICXbEQ8SDcaSjrWZ-9ChtaEiIxU5KY,962
|
|
48
|
-
smftools/preprocessing/filter_reads_on_length.py,sha256=-tXMIpg8Mx8GskCfjBy0ZBczuJRTZdyuSZtDyb6KDJs,1737
|
|
49
|
-
smftools/preprocessing/invert_adata.py,sha256=vpR0jynLODhE8mpiHZQIv1XUY9pd7cEG0ujC-GArXIE,616
|
|
50
|
-
smftools/preprocessing/mark_duplicates.py,sha256=Qd1fluCHkL7ZAY37wGmBe40HwkRipOkbDAp6lnoLU9I,6818
|
|
51
|
-
smftools/preprocessing/min_non_diagonal.py,sha256=o79E5xy-aO-cSwN5dUVi5oj8_EfQBDPcj1D0_7fvk1Q,644
|
|
52
|
-
smftools/preprocessing/preprocessing.py,sha256=4mLT09A7vwRZ78FHmuwtv38mH9TQ9qrZc_WjHRhhkIw,34379
|
|
53
|
-
smftools/preprocessing/remove_duplicates.py,sha256=sgdRjZSLakocTRwAukdp1RpFhODbeOjNN_EWZkTshAc,395
|
|
54
|
-
smftools/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
55
|
-
smftools-0.1.0.dist-info/METADATA,sha256=Loh3iFQgPLn6Xe_WdbeATlGiMqggFaYJOqgwf5e8WRI,3422
|
|
56
|
-
smftools-0.1.0.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
57
|
-
smftools-0.1.0.dist-info/licenses/LICENSE,sha256=F8LwmL6vMPddaCt1z1S83Kh_OZv50alTlY7BvVx1RXw,1066
|
|
58
|
-
smftools-0.1.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|