smftools 0.2.1__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- smftools/__init__.py +2 -6
- smftools/_version.py +1 -1
- smftools/cli/__init__.py +0 -0
- smftools/cli/archived/cli_flows.py +94 -0
- smftools/cli/helpers.py +48 -0
- smftools/cli/hmm_adata.py +361 -0
- smftools/cli/load_adata.py +637 -0
- smftools/cli/preprocess_adata.py +455 -0
- smftools/cli/spatial_adata.py +697 -0
- smftools/cli_entry.py +434 -0
- smftools/config/conversion.yaml +18 -6
- smftools/config/deaminase.yaml +18 -11
- smftools/config/default.yaml +151 -36
- smftools/config/direct.yaml +28 -1
- smftools/config/discover_input_files.py +115 -0
- smftools/config/experiment_config.py +225 -27
- smftools/hmm/HMM.py +12 -1
- smftools/hmm/__init__.py +0 -6
- smftools/hmm/archived/call_hmm_peaks.py +106 -0
- smftools/hmm/call_hmm_peaks.py +318 -90
- smftools/informatics/__init__.py +13 -7
- smftools/informatics/archived/fast5_to_pod5.py +43 -0
- smftools/informatics/archived/helpers/archived/__init__.py +71 -0
- smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py +126 -0
- smftools/informatics/{helpers → archived/helpers/archived}/aligned_BAM_to_bed.py +6 -4
- smftools/informatics/archived/helpers/archived/bam_qc.py +213 -0
- smftools/informatics/archived/helpers/archived/bed_to_bigwig.py +90 -0
- smftools/informatics/archived/helpers/archived/concatenate_fastqs_to_bam.py +259 -0
- smftools/informatics/{helpers → archived/helpers/archived}/count_aligned_reads.py +2 -2
- smftools/informatics/{helpers → archived/helpers/archived}/demux_and_index_BAM.py +8 -10
- smftools/informatics/{helpers → archived/helpers/archived}/extract_base_identities.py +1 -1
- smftools/informatics/{helpers → archived/helpers/archived}/extract_mods.py +15 -13
- smftools/informatics/{helpers → archived/helpers/archived}/generate_converted_FASTA.py +2 -0
- smftools/informatics/{helpers → archived/helpers/archived}/get_chromosome_lengths.py +9 -8
- smftools/informatics/archived/helpers/archived/index_fasta.py +24 -0
- smftools/informatics/{helpers → archived/helpers/archived}/make_modbed.py +1 -2
- smftools/informatics/{helpers → archived/helpers/archived}/modQC.py +2 -2
- smftools/informatics/{helpers → archived/helpers/archived}/plot_bed_histograms.py +0 -19
- smftools/informatics/{helpers → archived/helpers/archived}/separate_bam_by_bc.py +6 -5
- smftools/informatics/{helpers → archived/helpers/archived}/split_and_index_BAM.py +7 -7
- smftools/informatics/archived/subsample_fasta_from_bed.py +49 -0
- smftools/informatics/bam_functions.py +811 -0
- smftools/informatics/basecalling.py +67 -0
- smftools/informatics/bed_functions.py +366 -0
- smftools/informatics/{helpers/converted_BAM_to_adata_II.py → converted_BAM_to_adata.py} +42 -30
- smftools/informatics/fasta_functions.py +255 -0
- smftools/informatics/h5ad_functions.py +197 -0
- smftools/informatics/{helpers/modkit_extract_to_adata.py → modkit_extract_to_adata.py} +142 -59
- smftools/informatics/modkit_functions.py +129 -0
- smftools/informatics/ohe.py +160 -0
- smftools/informatics/pod5_functions.py +224 -0
- smftools/informatics/{helpers/run_multiqc.py → run_multiqc.py} +5 -2
- smftools/plotting/autocorrelation_plotting.py +1 -3
- smftools/plotting/general_plotting.py +1084 -363
- smftools/plotting/position_stats.py +3 -3
- smftools/preprocessing/__init__.py +4 -4
- smftools/preprocessing/append_base_context.py +35 -26
- smftools/preprocessing/append_binary_layer_by_base_context.py +6 -6
- smftools/preprocessing/binarize.py +17 -0
- smftools/preprocessing/binarize_on_Youden.py +11 -9
- smftools/preprocessing/calculate_complexity_II.py +1 -1
- smftools/preprocessing/calculate_coverage.py +16 -13
- smftools/preprocessing/calculate_position_Youden.py +42 -26
- smftools/preprocessing/calculate_read_modification_stats.py +2 -2
- smftools/preprocessing/filter_reads_on_length_quality_mapping.py +1 -1
- smftools/preprocessing/filter_reads_on_modification_thresholds.py +20 -20
- smftools/preprocessing/flag_duplicate_reads.py +2 -2
- smftools/preprocessing/invert_adata.py +1 -1
- smftools/preprocessing/load_sample_sheet.py +1 -1
- smftools/preprocessing/reindex_references_adata.py +37 -0
- smftools/readwrite.py +360 -140
- {smftools-0.2.1.dist-info → smftools-0.2.4.dist-info}/METADATA +26 -19
- smftools-0.2.4.dist-info/RECORD +176 -0
- smftools-0.2.4.dist-info/entry_points.txt +2 -0
- smftools/cli.py +0 -184
- smftools/informatics/fast5_to_pod5.py +0 -24
- smftools/informatics/helpers/__init__.py +0 -73
- smftools/informatics/helpers/align_and_sort_BAM.py +0 -86
- smftools/informatics/helpers/bam_qc.py +0 -66
- smftools/informatics/helpers/bed_to_bigwig.py +0 -39
- smftools/informatics/helpers/concatenate_fastqs_to_bam.py +0 -378
- smftools/informatics/helpers/discover_input_files.py +0 -100
- smftools/informatics/helpers/index_fasta.py +0 -12
- smftools/informatics/helpers/make_dirs.py +0 -21
- smftools/informatics/readwrite.py +0 -106
- smftools/informatics/subsample_fasta_from_bed.py +0 -47
- smftools/load_adata.py +0 -1346
- smftools-0.2.1.dist-info/RECORD +0 -161
- smftools-0.2.1.dist-info/entry_points.txt +0 -2
- /smftools/hmm/{apply_hmm_batched.py → archived/apply_hmm_batched.py} +0 -0
- /smftools/hmm/{calculate_distances.py → archived/calculate_distances.py} +0 -0
- /smftools/hmm/{train_hmm.py → archived/train_hmm.py} +0 -0
- /smftools/informatics/{basecall_pod5s.py → archived/basecall_pod5s.py} +0 -0
- /smftools/informatics/{helpers → archived/helpers/archived}/canoncall.py +0 -0
- /smftools/informatics/{helpers → archived/helpers/archived}/converted_BAM_to_adata.py +0 -0
- /smftools/informatics/{helpers → archived/helpers/archived}/extract_read_features_from_bam.py +0 -0
- /smftools/informatics/{helpers → archived/helpers/archived}/extract_read_lengths_from_bed.py +0 -0
- /smftools/informatics/{helpers → archived/helpers/archived}/extract_readnames_from_BAM.py +0 -0
- /smftools/informatics/{helpers → archived/helpers/archived}/find_conversion_sites.py +0 -0
- /smftools/informatics/{helpers → archived/helpers/archived}/get_native_references.py +0 -0
- /smftools/informatics/{helpers → archived/helpers}/archived/informatics.py +0 -0
- /smftools/informatics/{helpers → archived/helpers}/archived/load_adata.py +0 -0
- /smftools/informatics/{helpers → archived/helpers/archived}/modcall.py +0 -0
- /smftools/informatics/{helpers → archived/helpers/archived}/ohe_batching.py +0 -0
- /smftools/informatics/{helpers → archived/helpers/archived}/ohe_layers_decode.py +0 -0
- /smftools/informatics/{helpers → archived/helpers/archived}/one_hot_decode.py +0 -0
- /smftools/informatics/{helpers → archived/helpers/archived}/one_hot_encode.py +0 -0
- /smftools/informatics/{subsample_pod5.py → archived/subsample_pod5.py} +0 -0
- /smftools/informatics/{helpers/binarize_converted_base_identities.py → binarize_converted_base_identities.py} +0 -0
- /smftools/informatics/{helpers/complement_base_list.py → complement_base_list.py} +0 -0
- /smftools/preprocessing/{add_read_length_and_mapping_qc.py → archives/add_read_length_and_mapping_qc.py} +0 -0
- /smftools/preprocessing/{calculate_complexity.py → archives/calculate_complexity.py} +0 -0
- {smftools-0.2.1.dist-info → smftools-0.2.4.dist-info}/WHEEL +0 -0
- {smftools-0.2.1.dist-info → smftools-0.2.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
smftools/__init__.py,sha256=aZlrZBVexf_nEnzQeZu7NU_Kp6OnxcYpLo1KPImi7sI,599
|
|
2
|
+
smftools/_settings.py,sha256=Ed8lzKUA5ncq5ZRfSp0t6_rphEEjMxts6guttwTZP5Y,409
|
|
3
|
+
smftools/_version.py,sha256=k2uKAAzDEmm1BIVWeztFlHrCh9fq64H6szFcsXW7tvs,21
|
|
4
|
+
smftools/cli_entry.py,sha256=LvobMVtEb_jrLZScoWCB-OBjUMue9JQBXJZW1oMbHnw,14618
|
|
5
|
+
smftools/readwrite.py,sha256=mbuCKj7LfEKp4bDBxxxMiaTddMwblwURpcCKpgmU6Sw,48678
|
|
6
|
+
smftools/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
+
smftools/cli/helpers.py,sha256=tgjxUlOIhFGCLGD2ON7zlD45UPx93vENM82mM_BpLFk,1281
|
|
8
|
+
smftools/cli/hmm_adata.py,sha256=2ria8u6cCBQnzX_GjUUO3wBVOd7a4m3Al-vzwk0OasQ,17728
|
|
9
|
+
smftools/cli/load_adata.py,sha256=W4NgbM28wOzQHkLnZNILJyblRgee-O4oLnNZcyPDCXc,30486
|
|
10
|
+
smftools/cli/preprocess_adata.py,sha256=g9aHQ1DSScb4zx8RfpCjcEmam6APWHiu8Ow0sza6D2Y,22203
|
|
11
|
+
smftools/cli/spatial_adata.py,sha256=pp0KLK8d-MYjl_hF1ziDVKc6uOJGDDDbKNQELQcRUa8,28980
|
|
12
|
+
smftools/cli/archived/cli_flows.py,sha256=xRiFUThoAL3LX1xdXaHVg4LjyJI4uNpGsc9aQ_wVCto,4941
|
|
13
|
+
smftools/config/__init__.py,sha256=ObUnnR7aRSoD_uvpmsxA_BUFt4NOOfWNopDVCqjp7tg,69
|
|
14
|
+
smftools/config/conversion.yaml,sha256=07dKEXykQeP5VoVxa4xst-tcbSX4B6ErqyqtWJ5RCKk,1177
|
|
15
|
+
smftools/config/deaminase.yaml,sha256=okXdMFAghUAsDyx6P5Kru7ydF2bcbrhMPOaMpXlZPGM,1359
|
|
16
|
+
smftools/config/default.yaml,sha256=cKUUxVkH42kkHQM82mNJC8bfcak6lY063AnIif5o-1g,13071
|
|
17
|
+
smftools/config/direct.yaml,sha256=s30JbOTOOdIiBIefPSEi72YABHnfcCyFXj9WwZ7duJQ,2173
|
|
18
|
+
smftools/config/discover_input_files.py,sha256=G9vyAmK_n_8Ur5dOnumevVLG3ydHchMy_JQrJdiuuz0,3892
|
|
19
|
+
smftools/config/experiment_config.py,sha256=f7hVIc9ShUZk852Ypp6Dfelus8iKFHrSbThiyhpuQsE,63259
|
|
20
|
+
smftools/datasets/F1_hybrid_NKG2A_enhander_promoter_GpC_conversion_SMF.h5ad.gz,sha256=q6wJtgFRDln0o20XNCx1qad3lwcdCoylqPN7wskTfI8,2926497
|
|
21
|
+
smftools/datasets/F1_sample_sheet.csv,sha256=9PodIIOXK2eamYPbC6DGnXdzgi9bRDovf296j1aM0ak,259
|
|
22
|
+
smftools/datasets/__init__.py,sha256=xkSTlPuakVYVCuRurif9BceNBDt6bsngJvvjI8757QI,142
|
|
23
|
+
smftools/datasets/dCas9_m6A_invitro_kinetics.h5ad.gz,sha256=niOcVHaYY7h3XyvwSkN-V_NMBaRt2vTP5TrJO0CwMCs,8385050
|
|
24
|
+
smftools/datasets/datasets.py,sha256=0y597Ntp707bOgDwN6O-JEt9yxgplj66p0aj6Zs_IB4,779
|
|
25
|
+
smftools/hmm/HMM.py,sha256=Y7YB-45HoLN--JloajoLBgC0rIYmHuWHDfmKRXfFuFk,71458
|
|
26
|
+
smftools/hmm/__init__.py,sha256=_-plMbL5xq8d0szNIYgUrgUwdb8oybuyTn6jned8eSU,382
|
|
27
|
+
smftools/hmm/call_hmm_peaks.py,sha256=BMlwDh-_k8bzqRn4LSYuTk3dCcUoNYHp8eohvWYNn7A,14573
|
|
28
|
+
smftools/hmm/display_hmm.py,sha256=3WuQCPvM3wPfzAdgbhfiBTd0g5mQdx9HTUdqAxs2aj4,825
|
|
29
|
+
smftools/hmm/hmm_readwrite.py,sha256=DjJ3hunpBQ7N0GVvxL7-0QUas_SkA88LVgL72mVK2cI,359
|
|
30
|
+
smftools/hmm/nucleosome_hmm_refinement.py,sha256=nQWimvse6dclcXhbU707rGbRVMKHM0mU_ZhH9g2yCMA,4641
|
|
31
|
+
smftools/hmm/archived/apply_hmm_batched.py,sha256=BBeJ8DiIuuMWzLwtDdk2DO2vvrfLCrVe4JtRYPFItIU,10648
|
|
32
|
+
smftools/hmm/archived/calculate_distances.py,sha256=KDWimQ6u-coyxCKrbTm42Fh_Alf_gURBZ0vfFaem848,644
|
|
33
|
+
smftools/hmm/archived/call_hmm_peaks.py,sha256=T-3Ld8H4t3Mgg2whBTYP9s2QL7rY-9RIzVCgB6avKhE,4625
|
|
34
|
+
smftools/hmm/archived/train_hmm.py,sha256=srzRcB9LEmNuHyBM0R5Z0VEnxecifQt-MoaJhADxGT8,2477
|
|
35
|
+
smftools/informatics/__init__.py,sha256=vLvSrCtCVYRUCCNLW7fL3ltPr3h_w8FhT--V6el3ZkQ,1191
|
|
36
|
+
smftools/informatics/bam_functions.py,sha256=SCtOQWgF7Nqbk7-22fAq9J8kRYrd2V5chmM0x1lLJh0,32261
|
|
37
|
+
smftools/informatics/basecalling.py,sha256=jc39jneaa8Gt1azutHgBGWHqCoPeTVSGBu3kyQwP7xM,3460
|
|
38
|
+
smftools/informatics/bed_functions.py,sha256=uETVxT5mRWDNn7t0OqhDi8kDiq7uDakeHB1L2JsP4PA,13377
|
|
39
|
+
smftools/informatics/binarize_converted_base_identities.py,sha256=yOepGaNBGfZJEsMiLRwKauvsmaHn_JRrxaGp8LmKAXs,7778
|
|
40
|
+
smftools/informatics/complement_base_list.py,sha256=k6EkLtxFoajaIufxw1p0pShJ2nPHyGLTbzZmIFFjB4o,532
|
|
41
|
+
smftools/informatics/converted_BAM_to_adata.py,sha256=Y2kQNWly0WjjGN9El9zL1nLfjVxmPLWONvX5VNgZUh0,22554
|
|
42
|
+
smftools/informatics/fasta_functions.py,sha256=5IfTkX_GIj5gRJB9PjL_WjyEktpBHwGsmS_nnO1ETjI,9790
|
|
43
|
+
smftools/informatics/h5ad_functions.py,sha256=9zUKuARwjjt0J-i_kBqo2jxLtD6Gud1VxKT0pV-ACeA,7829
|
|
44
|
+
smftools/informatics/modkit_extract_to_adata.py,sha256=TrgrL_IgfqzNJ9qZ_2EvF_B38_Syw8mP38Sl7v0Riwo,55278
|
|
45
|
+
smftools/informatics/modkit_functions.py,sha256=lywjeqAJ7Cdd7k-0P3YaL_9cAZvEDTDLh91rIRcSMWE,5604
|
|
46
|
+
smftools/informatics/ohe.py,sha256=MEmh3ps-ZSSyXuIrr5LMzQvCsDJRCYiy7JS-WD4TlYs,5805
|
|
47
|
+
smftools/informatics/pod5_functions.py,sha256=vxwhD_d_iWpJydIpbf0uce7VGHm8sBnCwb7tLNpYBc8,9859
|
|
48
|
+
smftools/informatics/run_multiqc.py,sha256=n6LvQuGQpLfsutVGmgvHfV0SV5PqTQ8wa_SeKOjRssM,1052
|
|
49
|
+
smftools/informatics/archived/bam_conversion.py,sha256=I8EzXjQixMmqx2oWnoNSH5NURBhfT-krbWHkoi_M964,3330
|
|
50
|
+
smftools/informatics/archived/bam_direct.py,sha256=jbEFtUIiUR8Wlp3po_sWkr19AUNS9WZjglojb9j28vo,3606
|
|
51
|
+
smftools/informatics/archived/basecall_pod5s.py,sha256=Ynmxscsxj6qp-zVY0RWodq513oDuHDaHnpqoepB3RUU,3930
|
|
52
|
+
smftools/informatics/archived/basecalls_to_adata.py,sha256=-Nag6lr_NAtU4t8jo0GSMdgIAIfmDge-5VEUPQbEatE,3692
|
|
53
|
+
smftools/informatics/archived/conversion_smf.py,sha256=QhlISVi3Z-XqFKyDG_CenLojovAt5-ZhuVe9hus36lg,7177
|
|
54
|
+
smftools/informatics/archived/deaminase_smf.py,sha256=mNeg1mIYYVLIiW8powEpz0CqrGRDsrmY5-aoIgwMGHs,7221
|
|
55
|
+
smftools/informatics/archived/direct_smf.py,sha256=ylPGFBvRLdxLHeDJjAwq98j8Q8_lfGK3k5JJnQxrwJw,7485
|
|
56
|
+
smftools/informatics/archived/fast5_to_pod5.py,sha256=TRG_FYYGCGWUPzZCt0ZqzB8gQv_HKvkssp9nTctWzXU,1398
|
|
57
|
+
smftools/informatics/archived/print_bam_query_seq.py,sha256=8Z2ZJEOOlfWYUXiZGjteLWU4yTgvV8KQzEIBHUmamGM,838
|
|
58
|
+
smftools/informatics/archived/subsample_fasta_from_bed.py,sha256=7YTKhXg_mtP4KWpnD-TB4nuFEL4crOa9_d84IJKllyQ,1633
|
|
59
|
+
smftools/informatics/archived/subsample_pod5.py,sha256=zDw9tRcrFRmPI62xkcy9dh8IfsJcuYm7R-FVeBC_g3s,4701
|
|
60
|
+
smftools/informatics/archived/helpers/archived/__init__.py,sha256=DiiBerFJAxZeG5y0ScpJSaVBJ8b4XWdfEJCh8Q7k8jU,2783
|
|
61
|
+
smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py,sha256=yaRfhQDh3HpsSTme6QnSqBgElCS0kv2G6TunhvR1weY,5493
|
|
62
|
+
smftools/informatics/archived/helpers/archived/aligned_BAM_to_bed.py,sha256=N3NAOaoSt_M4V48vtTP_m_iF1tRuNIPS_uNJ3Y0IA4E,3391
|
|
63
|
+
smftools/informatics/archived/helpers/archived/bam_qc.py,sha256=PWl3dViCHGOcjB4UKkxBFz34Gc0PXHVTHjpYVNckVH0,7975
|
|
64
|
+
smftools/informatics/archived/helpers/archived/bed_to_bigwig.py,sha256=Bg9wFsavUU9Ha57n_99vYlYpVcbDUz3tLtYJ7ZFVR9k,2986
|
|
65
|
+
smftools/informatics/archived/helpers/archived/canoncall.py,sha256=5WS6lwukc_xYTdPQy0OSj-WLbx0Rg70Cun1lCucY7w8,1741
|
|
66
|
+
smftools/informatics/archived/helpers/archived/concatenate_fastqs_to_bam.py,sha256=6GTHXG1dfaC8rBin5NthG3xgyGqOsT6wIGxJVCmCq58,9774
|
|
67
|
+
smftools/informatics/archived/helpers/archived/converted_BAM_to_adata.py,sha256=sRmOtn0kNosLYfogqslDHg1Azk51l6nfNOLgQOnQjlA,14591
|
|
68
|
+
smftools/informatics/archived/helpers/archived/count_aligned_reads.py,sha256=ZF_kkzAf1RvM4PwDYhxD36UiuVuMM_MBvZgiXom1NQ0,2176
|
|
69
|
+
smftools/informatics/archived/helpers/archived/demux_and_index_BAM.py,sha256=KmU7nqGQ-MfDrp8h3txbToGn4h95Rkvg0WEiuext-vY,2000
|
|
70
|
+
smftools/informatics/archived/helpers/archived/extract_base_identities.py,sha256=CaFqNBjkDujYlyiUnOeRock1OQWs3CeiD3yTL96sjIs,3043
|
|
71
|
+
smftools/informatics/archived/helpers/archived/extract_mods.py,sha256=Mrs7mrLFgCTiRGfPFSyvJm6brq--LGzZrNDiFB-jynI,3895
|
|
72
|
+
smftools/informatics/archived/helpers/archived/extract_read_features_from_bam.py,sha256=SYAb4Q1HxiJzCx5bIz86MdH_TvVPsRAVodZD9082HGY,1491
|
|
73
|
+
smftools/informatics/archived/helpers/archived/extract_read_lengths_from_bed.py,sha256=Cw39wgp1eBTV45Wk1l0c9l-upBW5N2OcgyWXTAXln90,678
|
|
74
|
+
smftools/informatics/archived/helpers/archived/extract_readnames_from_BAM.py,sha256=3FxSNqbZ1VsOK2RfHrvevQTzhWATf5E8bZ5yVOqayvk,759
|
|
75
|
+
smftools/informatics/archived/helpers/archived/find_conversion_sites.py,sha256=JPlDipmzeCBkV_T6esGD5ptwmbQmk8gJMTh7NMaSYd4,2480
|
|
76
|
+
smftools/informatics/archived/helpers/archived/generate_converted_FASTA.py,sha256=Us6iH1cIhsXDnTvDxI-FEHB6ndbB30hd1ss-9dIoWVE,3819
|
|
77
|
+
smftools/informatics/archived/helpers/archived/get_chromosome_lengths.py,sha256=BEroXshYSpjf5wt_vrEAFiTJmSuf-kvD-Z1B_1gusME,1000
|
|
78
|
+
smftools/informatics/archived/helpers/archived/get_native_references.py,sha256=fRuyEm9UJkfd5DwHmFb1bxEtNvtSI1_BxGRmrCymGkw,981
|
|
79
|
+
smftools/informatics/archived/helpers/archived/index_fasta.py,sha256=w6xHFSaoXVk-YWZWftZ9Xv8rywZ_IuuIouLQ12KL3ro,779
|
|
80
|
+
smftools/informatics/archived/helpers/archived/informatics.py,sha256=gKb2ZJ_LcAeEXuQqn9e-QDF_sS4tMpMTr2vZlqa7n54,14572
|
|
81
|
+
smftools/informatics/archived/helpers/archived/load_adata.py,sha256=DhvYYqO9VLsZqhL1WjN9sd-e3fgvdXGlgTP18z1h0L0,33654
|
|
82
|
+
smftools/informatics/archived/helpers/archived/make_modbed.py,sha256=Wh0UCSOL4fMZbWYK-3oGGHwJtqPurJ3Bl6wJWBaTXoM,923
|
|
83
|
+
smftools/informatics/archived/helpers/archived/modQC.py,sha256=pz2EscFgO-j-9dfNgNDseweXXqM5-a-Rj2abBLErLd0,1051
|
|
84
|
+
smftools/informatics/archived/helpers/archived/modcall.py,sha256=LVPrdMNVp2gyQTJ4BNp8NJNm89AueDjsKaY7Gqkluho,1777
|
|
85
|
+
smftools/informatics/archived/helpers/archived/ohe_batching.py,sha256=QVOiyl9fYHNIFWM23afYnQo0uaOjf1NR3ASKGVSrmuw,2975
|
|
86
|
+
smftools/informatics/archived/helpers/archived/ohe_layers_decode.py,sha256=gIgUC9L8TFLi-fTnjR4PRzXdUaH5D6WL2Hump6XOoy0,1042
|
|
87
|
+
smftools/informatics/archived/helpers/archived/one_hot_decode.py,sha256=3n4rzY8_aC9YKmgrftsguMsH7fUyQ-DbWmrOYF6la9s,906
|
|
88
|
+
smftools/informatics/archived/helpers/archived/one_hot_encode.py,sha256=5hHigA6-SZLK84WH_RHo06F_6aTg7S3TJgvSr8gxGX8,1968
|
|
89
|
+
smftools/informatics/archived/helpers/archived/plot_bed_histograms.py,sha256=78i0mYFuElTPGA2Dt1feO6Z4Grh1Nro3m-F8D5FRBOw,9914
|
|
90
|
+
smftools/informatics/archived/helpers/archived/separate_bam_by_bc.py,sha256=pCLev0OQji1jBdVr25lI_gt9fsozSG8vh7TQkE_UHnY,1800
|
|
91
|
+
smftools/informatics/archived/helpers/archived/split_and_index_BAM.py,sha256=Q7I5qJ5JjW6mSKysfl9NdlFZ6LIy3C8G5rGmG7cn2eA,1224
|
|
92
|
+
smftools/machine_learning/__init__.py,sha256=cWyGN_QVcssqBr_VVr7xh2Inz0P7ylqUmBBcpMgsK0k,257
|
|
93
|
+
smftools/machine_learning/data/__init__.py,sha256=xbfLE-gNjdgxvZ9LKTdvjAtbIHOcs2TR0Gz3YRFbo38,113
|
|
94
|
+
smftools/machine_learning/data/anndata_data_module.py,sha256=ktrdMVMk5yhIUrnu-G_Xf3y7G-KP9PyhYZhobv8TCVg,10063
|
|
95
|
+
smftools/machine_learning/data/preprocessing.py,sha256=dSs6Qs3wmlccFPZSpOc-uy1nlFSf68wWQKwF1iTqMok,137
|
|
96
|
+
smftools/machine_learning/evaluation/__init__.py,sha256=KHvcC7bTYv-ThptAi6G8wD-hW5Iz1HPgMcQ3AewtK3c,122
|
|
97
|
+
smftools/machine_learning/evaluation/eval_utils.py,sha256=t9WIevIJ6b6HqU6OYaNx7UBAa5TEIPFmZow6n_ZDZeY,1105
|
|
98
|
+
smftools/machine_learning/evaluation/evaluators.py,sha256=KqYHqbVV2WOs0Yo4GIhLS_0h1oKY6nd1yi6piDWYQLg,8184
|
|
99
|
+
smftools/machine_learning/inference/__init__.py,sha256=vWLQD-JNEKKNGuzDtx7vcE4czKKXEO6S-0Zp5-21fPs,172
|
|
100
|
+
smftools/machine_learning/inference/inference_utils.py,sha256=aJuXvTgC8v4BOjLCgOU9vT3S2y1UGoZjq4mQpPswTQU,947
|
|
101
|
+
smftools/machine_learning/inference/lightning_inference.py,sha256=34WVnPfpPDf4KM8ZN5MOsx4tYgsrUclkens6GXgB4Ek,2160
|
|
102
|
+
smftools/machine_learning/inference/sklearn_inference.py,sha256=FomgQF5jFBfAj1-H2Q0_RPmvR9rDJsmUeaWOVRhbpTw,1612
|
|
103
|
+
smftools/machine_learning/inference/sliding_window_inference.py,sha256=8zjQs2hGhj0Dww4gWljLVK0g002_U96dyIqQJiDdSDY,4426
|
|
104
|
+
smftools/machine_learning/models/__init__.py,sha256=bMfPbQ5bDmn_kWv82virLuUhjb12Yow7t_j96afNbyA,421
|
|
105
|
+
smftools/machine_learning/models/base.py,sha256=p3d77iyY8BVx0tYL0TjmOSnPNP1ZrKTzn_J05e2GF0A,9626
|
|
106
|
+
smftools/machine_learning/models/cnn.py,sha256=KKZmJLQ6Bjm_HI8GULnafjz6mRy5BZ6Y0ZCgDSuS268,4465
|
|
107
|
+
smftools/machine_learning/models/lightning_base.py,sha256=3nC3wajPIupFMtOq3YUf24_SHvDoW_9BIGyIvEwzN9w,13626
|
|
108
|
+
smftools/machine_learning/models/mlp.py,sha256=Y2hc_qHj6vpM_mHpreFxBULn4MkR25oEA1LXu5sPA_w,820
|
|
109
|
+
smftools/machine_learning/models/positional.py,sha256=EfTyYnY0pCB-aVJIWf-4DVNpyGlvx1q_09PzfrC-VlA,652
|
|
110
|
+
smftools/machine_learning/models/rnn.py,sha256=uJnHDGpT2_l_HqHGsx33XGF3v3EYZPeOtSQ89uvhdpE,717
|
|
111
|
+
smftools/machine_learning/models/sklearn_models.py,sha256=ssV-mR3rmcjycQEzKccRcbVaEjZp0zRNUL5-R6m1UKU,10402
|
|
112
|
+
smftools/machine_learning/models/transformer.py,sha256=8YXS0vCcOWT-33h-8yeDfFM5ibPHQ-CMSEhGWzR4pm8,11039
|
|
113
|
+
smftools/machine_learning/models/wrappers.py,sha256=HEY2A6-Bk6MtVZ9jOaPT8S1Qi0L98SyEg1nbKqYZoag,697
|
|
114
|
+
smftools/machine_learning/training/__init__.py,sha256=teUmwpnmAl0oNFaqVrfoijEpxBjLwI5YtBwLHT3uXck,185
|
|
115
|
+
smftools/machine_learning/training/train_lightning_model.py,sha256=usEBaQ4vNjfatefP5XDCXkywzgZ2D-YppGmT3-3gTGE,4070
|
|
116
|
+
smftools/machine_learning/training/train_sklearn_model.py,sha256=m1k1Gsynpj6SJI64rl4B3cfXm1SliU0fwMAj1-bAAeE,3166
|
|
117
|
+
smftools/machine_learning/utils/__init__.py,sha256=yOpzBc9AXbarSRfN8Ixh2Z1uWLGpgpjRR46h6E46_2w,62
|
|
118
|
+
smftools/machine_learning/utils/device.py,sha256=GITrULOty2Fr96Bqt1wi1PaYl_oVgB5Z99Gfn5vQy4o,274
|
|
119
|
+
smftools/machine_learning/utils/grl.py,sha256=BWBDp_kQBigrUzQpRbZzgpfr_WOcd2K2V3MQL-aAIc4,334
|
|
120
|
+
smftools/plotting/__init__.py,sha256=7T3-hZFgTY0nfQgV4J6Vn9ogwkNMlY315kguZR7V1AI,866
|
|
121
|
+
smftools/plotting/autocorrelation_plotting.py,sha256=cF9X3CgKiwzL79mgMUFO1tSqdybDoPN1COQQ567InCY,27455
|
|
122
|
+
smftools/plotting/classifiers.py,sha256=8_zabh4NNB1_yVxLD22lfrfl5yfzbEoG3XWqlIqdtrQ,13786
|
|
123
|
+
smftools/plotting/general_plotting.py,sha256=o4aPXm_2JRj69XyHINKSTAJGaw9VA-csDgX1pyirso0,63151
|
|
124
|
+
smftools/plotting/hmm_plotting.py,sha256=3Eq82gty_0b8GkSMCQgUlbKfzR9h2fJ5rZkB8yYGX-M,10934
|
|
125
|
+
smftools/plotting/position_stats.py,sha256=Ia15EuYq5r3Ckz3jVjYMHON6IHZboatAVqJdb2WrUA4,17415
|
|
126
|
+
smftools/plotting/qc_plotting.py,sha256=q5Ri0q89udvNUFUNxHzgk9atvQYqUkqkS5-JFq9EqoI,10045
|
|
127
|
+
smftools/preprocessing/__init__.py,sha256=mcmovdFq6jt1kWIe0sVW6MwCXs4tUVTy3Qak7RDts74,1644
|
|
128
|
+
smftools/preprocessing/append_base_context.py,sha256=VnxKf8sI4uWale215FEFFoE2me6uJszXvswl-dFQmUY,6702
|
|
129
|
+
smftools/preprocessing/append_binary_layer_by_base_context.py,sha256=qgjeDyfOghuqWZAzCjd4eE5riCWAgra6CIZ9UCyUgTs,6207
|
|
130
|
+
smftools/preprocessing/binarize.py,sha256=6Vr7Z8zgtJ5rS_uPAx1n3EnQR670V33DlZ_95JmOeWc,484
|
|
131
|
+
smftools/preprocessing/binarize_on_Youden.py,sha256=OwI0JwKBsSPVdPr61D31dR9XhnF0N4e5PnbboTpk8xI,1891
|
|
132
|
+
smftools/preprocessing/binary_layers_to_ohe.py,sha256=Lxd8knelNTaUozfGMFNMlnrOb6uP28Laj3Ymw6cRHL0,1826
|
|
133
|
+
smftools/preprocessing/calculate_complexity_II.py,sha256=oh5y0jbM1-k29ujRUfvXoL3ir4E6bVXLE9bWxlD5efc,9306
|
|
134
|
+
smftools/preprocessing/calculate_consensus.py,sha256=6zRpRmb2xdfDu5hctZrReALRb7Pjn8sy8xJZTm3o0nU,2442
|
|
135
|
+
smftools/preprocessing/calculate_coverage.py,sha256=L417_XWAadMH3vxVDGEEAqxIGOiV48nfzVzD7HYyhus,2199
|
|
136
|
+
smftools/preprocessing/calculate_pairwise_differences.py,sha256=5zJbNNaFld5qgKRoPyplCmMHflbvAQ9eKWCXPXPpJ60,1774
|
|
137
|
+
smftools/preprocessing/calculate_pairwise_hamming_distances.py,sha256=e5Mzyex7pT29H2PY014uU4Fi_eewbut1JkzC1ffBbCg,961
|
|
138
|
+
smftools/preprocessing/calculate_position_Youden.py,sha256=JJLvU62zpBcvWm5QnsQ3FeRgIv5TMQbz5zTHa3z_Y1s,8342
|
|
139
|
+
smftools/preprocessing/calculate_read_length_stats.py,sha256=gNNePwMqYZJidzGgT1ZkfSlvc5Y3I3bi5KNYpP6wQQc,4584
|
|
140
|
+
smftools/preprocessing/calculate_read_modification_stats.py,sha256=hZzoEe1Acc1TQV3crkjyGZBWTMkMMcqXymJb3vJMHks,4784
|
|
141
|
+
smftools/preprocessing/clean_NaN.py,sha256=IOcnN5YF05gpPQc3cc3IS83petCnhCpkYiyT6bXEyx0,1937
|
|
142
|
+
smftools/preprocessing/filter_adata_by_nan_proportion.py,sha256=GZcvr2JCsthX8EMw34S9-W3fc6JElw6ka99Jy6f2JvA,1292
|
|
143
|
+
smftools/preprocessing/filter_reads_on_length_quality_mapping.py,sha256=UhMXpM_qxbhTCorjpKAePRk1qQVls8DP6Z51aFVnr3k,7380
|
|
144
|
+
smftools/preprocessing/filter_reads_on_modification_thresholds.py,sha256=LK3u0mIwD-T_qwqIH8v7BP1ZRL88HtRXPkDJwchsCjk,19363
|
|
145
|
+
smftools/preprocessing/flag_duplicate_reads.py,sha256=8Z3sVQr8gmci3ZtYfQGDAHI7GpKGhzmAFHoZVyL6nK4,65581
|
|
146
|
+
smftools/preprocessing/invert_adata.py,sha256=TmvwRGlkJKnMajOADAzpE_C2kYEtDVYDYtQKv3IthKs,1047
|
|
147
|
+
smftools/preprocessing/load_sample_sheet.py,sha256=WXAKfIhbnptnkbIpI5hEe6p02HhpQ3eRX1EDGEEvH-8,1916
|
|
148
|
+
smftools/preprocessing/make_dirs.py,sha256=lWHXpwC76MFM5sSme9i_WeYUaxutzybendokhny03ds,537
|
|
149
|
+
smftools/preprocessing/min_non_diagonal.py,sha256=hx1asW8CEmLaIroZISW8EcAf_RnBEC_nofGD8QG0b1E,711
|
|
150
|
+
smftools/preprocessing/recipes.py,sha256=cfKEpKW8TtQLe1CMdSHyPuIgKiWOPn7uP6uMIoRlnaQ,7063
|
|
151
|
+
smftools/preprocessing/reindex_references_adata.py,sha256=4oViEcWWSi7bnX3Yyf-DdSZBSocvuiqr4LC-jDFHwu0,1137
|
|
152
|
+
smftools/preprocessing/subsample_adata.py,sha256=ivJvJIOvEtyvAjqZ7cwEeVedm4QgJxCJEI7sFaTuI3w,2360
|
|
153
|
+
smftools/preprocessing/archives/add_read_length_and_mapping_qc.py,sha256=zD_Kxw3DvyOypfuSMGv0ESyt-02w4XlAAMqQxb7yDNQ,5700
|
|
154
|
+
smftools/preprocessing/archives/calculate_complexity.py,sha256=cXMpFrhkwkPipQo2GZGT5yFknMYUMt1t8gz0Cse1DrA,3288
|
|
155
|
+
smftools/preprocessing/archives/mark_duplicates.py,sha256=kwfstcWb7KkqeNB321dB-NLe8yd9_hZsSmpL8pCVBQg,8747
|
|
156
|
+
smftools/preprocessing/archives/preprocessing.py,sha256=4mLT09A7vwRZ78FHmuwtv38mH9TQ9qrZc_WjHRhhkIw,34379
|
|
157
|
+
smftools/preprocessing/archives/remove_duplicates.py,sha256=Erooi5_1VOUNfWpzddzmMNYMCl1U1jJryt7ZtMhabAs,699
|
|
158
|
+
smftools/tools/__init__.py,sha256=QV3asy5_lP9wcRzpNTfxGTCcpykkbNYvzxSMpFw4KXU,719
|
|
159
|
+
smftools/tools/calculate_umap.py,sha256=2arbAQdFOtnWoPq22TWicyr6fLYZ5PTNeZv_jdwuk_I,2491
|
|
160
|
+
smftools/tools/cluster_adata_on_methylation.py,sha256=UDC5lpW8fZ6O-16ETu-mbflLkNBKuIg7RIzQ9r7knvA,5760
|
|
161
|
+
smftools/tools/general_tools.py,sha256=YbobB6Zllz6cUq50yolGH9Jr6uuAMvEI4m3hiJ6FmAI,2561
|
|
162
|
+
smftools/tools/position_stats.py,sha256=Z7VW54wUVzH1RQ9xhP6KO7ewp-xeLybd07I5umV_aqM,24369
|
|
163
|
+
smftools/tools/read_stats.py,sha256=w3Zaim6l__Kt8EPCJKXTlMgO51Iy2Milj6yUb88HXiI,6324
|
|
164
|
+
smftools/tools/spatial_autocorrelation.py,sha256=uQkuPi2PJCj5lZzb33IWTL-e-p3J6PdMeM88rUFfQRw,21212
|
|
165
|
+
smftools/tools/subset_adata.py,sha256=nBbtAxCNteZCUBmPnZ9swQNyU74XgWM8aJHHWg2AuL0,1025
|
|
166
|
+
smftools/tools/archived/apply_hmm.py,sha256=pJXCULay0zbmubrwql368y7yiHAZr2bJhuGx2QUuKnE,9321
|
|
167
|
+
smftools/tools/archived/classifiers.py,sha256=mwSTpWUXBPjmUuV5i_SMG1lIPpHSMCzsKhl8wTbm-Og,36903
|
|
168
|
+
smftools/tools/archived/classify_methylated_features.py,sha256=Z0N2UKw3luD3CTQ8wcUvdnMY7w-8574OJbEcwzNsy88,2897
|
|
169
|
+
smftools/tools/archived/classify_non_methylated_features.py,sha256=IJERTozEs7IPL7K-VIjq2q2K36wRCW9iiNSYLAXasrA,3256
|
|
170
|
+
smftools/tools/archived/subset_adata_v1.py,sha256=qyU9iCal03edb5aUS3AZ2U4TlL3uQ42jGI9hX3QF7Fc,1047
|
|
171
|
+
smftools/tools/archived/subset_adata_v2.py,sha256=OKZoUpvdURPtckIQxGTWmOI5jLa-_EU62Xs3LyyehnA,1880
|
|
172
|
+
smftools-0.2.4.dist-info/METADATA,sha256=BVgWPtWTeDoNF6d1IOpvXyV0IE4fI5X_fLIs4nmVvJ4,9138
|
|
173
|
+
smftools-0.2.4.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
174
|
+
smftools-0.2.4.dist-info/entry_points.txt,sha256=q4hg4w-mKkI2leekM_-YZc5XRJzp96Mh1FcU3hac82g,52
|
|
175
|
+
smftools-0.2.4.dist-info/licenses/LICENSE,sha256=F8LwmL6vMPddaCt1z1S83Kh_OZv50alTlY7BvVx1RXw,1066
|
|
176
|
+
smftools-0.2.4.dist-info/RECORD,,
|
smftools/cli.py
DELETED
|
@@ -1,184 +0,0 @@
|
|
|
1
|
-
import click
|
|
2
|
-
import pandas as pd
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
from typing import Dict, Optional
|
|
5
|
-
|
|
6
|
-
from . import load_adata
|
|
7
|
-
from .readwrite import merge_barcoded_anndatas_core, safe_read_h5ad, safe_write_h5ad
|
|
8
|
-
|
|
9
|
-
@click.group()
|
|
10
|
-
def cli():
|
|
11
|
-
"""Command-line interface for smftools."""
|
|
12
|
-
pass
|
|
13
|
-
|
|
14
|
-
####### Main processing workflow ###########
|
|
15
|
-
@cli.command()
|
|
16
|
-
@click.argument("config_path", type=click.Path(exists=True))
|
|
17
|
-
def load(config_path):
|
|
18
|
-
"""Load and process data from CONFIG_PATH."""
|
|
19
|
-
load_adata(config_path)
|
|
20
|
-
##########################################
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
####### Merging existing anndatas from an experiment that used two different demultiplexing rules #######
|
|
24
|
-
REQUIRED_KEYS = ("adata_single_path", "adata_double_path")
|
|
25
|
-
OPTIONAL_KEYS = (
|
|
26
|
-
"adata_single_backups_path",
|
|
27
|
-
"adata_double_backups_path",
|
|
28
|
-
"output_path",
|
|
29
|
-
"merged_filename",
|
|
30
|
-
)
|
|
31
|
-
|
|
32
|
-
def _read_config_csv(csv_path: Path) -> Dict[str, str]:
|
|
33
|
-
"""
|
|
34
|
-
Read a multi-row, two-column CSV of key,value pairs into a dict.
|
|
35
|
-
|
|
36
|
-
Supported features:
|
|
37
|
-
- Optional header ("key,value") or none.
|
|
38
|
-
- Comments starting with '#' and blank lines are ignored.
|
|
39
|
-
- If duplicate keys occur, the last one wins.
|
|
40
|
-
- Keys are matched literally against REQUIRED_KEYS/OPTIONAL_KEYS.
|
|
41
|
-
"""
|
|
42
|
-
try:
|
|
43
|
-
# Read as two columns regardless of header; comments ignored.
|
|
44
|
-
df = pd.read_csv(
|
|
45
|
-
csv_path,
|
|
46
|
-
dtype=str,
|
|
47
|
-
comment="#",
|
|
48
|
-
header=None, # treat everything as rows; we'll normalize below
|
|
49
|
-
usecols=[0, 1],
|
|
50
|
-
names=["key", "value"]
|
|
51
|
-
)
|
|
52
|
-
except Exception as e:
|
|
53
|
-
raise click.ClickException(f"Failed to read CSV: {e}") from e
|
|
54
|
-
|
|
55
|
-
# Drop completely empty rows
|
|
56
|
-
df = df.fillna("").astype(str)
|
|
57
|
-
df["key"] = df["key"].str.strip()
|
|
58
|
-
df["value"] = df["value"].str.strip()
|
|
59
|
-
df = df[(df["key"] != "") & (df["key"].notna())]
|
|
60
|
-
|
|
61
|
-
if df.empty:
|
|
62
|
-
raise click.ClickException("Config CSV is empty after removing comments/blank lines.")
|
|
63
|
-
|
|
64
|
-
# Remove an optional header row if present
|
|
65
|
-
if df.iloc[0]["key"].lower() in {"key", "keys"}:
|
|
66
|
-
df = df.iloc[1:]
|
|
67
|
-
df = df[(df["key"] != "") & (df["key"].notna())]
|
|
68
|
-
if df.empty:
|
|
69
|
-
raise click.ClickException("Config CSV contains only a header row.")
|
|
70
|
-
|
|
71
|
-
# Build dict; last occurrence of a key wins
|
|
72
|
-
cfg = {}
|
|
73
|
-
for k, v in zip(df["key"], df["value"]):
|
|
74
|
-
cfg[k] = v
|
|
75
|
-
|
|
76
|
-
# Validate required keys
|
|
77
|
-
missing = [k for k in REQUIRED_KEYS if not cfg.get(k)]
|
|
78
|
-
if missing:
|
|
79
|
-
raise click.ClickException(
|
|
80
|
-
"Missing required keys in CSV: "
|
|
81
|
-
+ ", ".join(missing)
|
|
82
|
-
+ "\nExpected keys:\n - "
|
|
83
|
-
+ "\n - ".join(REQUIRED_KEYS)
|
|
84
|
-
+ "\nOptional keys:\n - "
|
|
85
|
-
+ "\n - ".join(OPTIONAL_KEYS)
|
|
86
|
-
)
|
|
87
|
-
|
|
88
|
-
return cfg
|
|
89
|
-
|
|
90
|
-
def _resolve_output_path(cfg: Dict[str, str], single_path: Path, double_path: Path) -> Path:
|
|
91
|
-
"""Decide on the output .h5ad path based on CSV; create directories if needed."""
|
|
92
|
-
merged_filename = cfg.get("merged_filename") or f"merged_{single_path.stem}__{double_path.stem}.h5ad"
|
|
93
|
-
if not merged_filename.endswith(".h5ad"):
|
|
94
|
-
merged_filename += ".h5ad"
|
|
95
|
-
|
|
96
|
-
output_path_raw = cfg.get("output_path", "").strip()
|
|
97
|
-
|
|
98
|
-
if not output_path_raw:
|
|
99
|
-
out_dir = Path.cwd() / "merged_output"
|
|
100
|
-
out_dir.mkdir(parents=True, exist_ok=True)
|
|
101
|
-
return out_dir / merged_filename
|
|
102
|
-
|
|
103
|
-
output_path = Path(output_path_raw)
|
|
104
|
-
|
|
105
|
-
if output_path.suffix.lower() == ".h5ad":
|
|
106
|
-
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
107
|
-
return output_path
|
|
108
|
-
|
|
109
|
-
# Treat as directory
|
|
110
|
-
output_path.mkdir(parents=True, exist_ok=True)
|
|
111
|
-
return output_path / merged_filename
|
|
112
|
-
|
|
113
|
-
def _maybe_read_adata(label: str, primary: Path, backups: Optional[Path]):
|
|
114
|
-
|
|
115
|
-
if backups:
|
|
116
|
-
click.echo(f"Loading {label} from {primary} with backups at {backups} ...")
|
|
117
|
-
return safe_read_h5ad(primary, backups_path=backups, restore_backups=True)
|
|
118
|
-
else:
|
|
119
|
-
click.echo(f"Loading {label} from {primary} with backups disabled ...")
|
|
120
|
-
return safe_read_h5ad(primary, restore_backups=False)
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
@cli.command()
|
|
124
|
-
@click.argument("config_path", type=click.Path(exists=True, dir_okay=False, readable=True, path_type=Path))
|
|
125
|
-
def merge_barcoded_anndatas(config_path: Path):
|
|
126
|
-
"""
|
|
127
|
-
Merge two AnnData objects from the same experiment that were demultiplexed
|
|
128
|
-
under different end-barcoding requirements, using a 1-row CSV for config.
|
|
129
|
-
|
|
130
|
-
CSV must include:
|
|
131
|
-
- adata_single_path
|
|
132
|
-
- adata_double_path
|
|
133
|
-
|
|
134
|
-
Optional columns:
|
|
135
|
-
- adata_single_backups_path
|
|
136
|
-
- adata_double_backups_path
|
|
137
|
-
- output_path (file or directory; default: ./merged_output/)
|
|
138
|
-
- merged_filename (default: merged_<single>__<double>.h5ad)
|
|
139
|
-
|
|
140
|
-
Example CSV:
|
|
141
|
-
|
|
142
|
-
adata_single_path,adata_double_path,adata_single_backups_path,adata_double_backups_path,output_path,merged_filename
|
|
143
|
-
/path/single.h5ad,/path/double.h5ad,,,,merged_output,merged_run.h5ad
|
|
144
|
-
"""
|
|
145
|
-
try:
|
|
146
|
-
cfg = _read_config_csv(config_path)
|
|
147
|
-
|
|
148
|
-
single_path = Path(cfg["adata_single_path"]).expanduser().resolve()
|
|
149
|
-
double_path = Path(cfg["adata_double_path"]).expanduser().resolve()
|
|
150
|
-
|
|
151
|
-
for p, label in [(single_path, "adata_single_path"), (double_path, "adata_double_path")]:
|
|
152
|
-
if not p.exists():
|
|
153
|
-
raise click.ClickException(f"{label} does not exist: {p}")
|
|
154
|
-
|
|
155
|
-
single_backups = Path(cfg["adata_single_backups_path"]).expanduser().resolve() if cfg.get("adata_single_backups_path") else None
|
|
156
|
-
double_backups = Path(cfg["adata_double_backups_path"]).expanduser().resolve() if cfg.get("adata_double_backups_path") else None
|
|
157
|
-
|
|
158
|
-
if single_backups and not single_backups.exists():
|
|
159
|
-
raise click.ClickException(f"adata_single_backups_path does not exist: {single_backups}")
|
|
160
|
-
if double_backups and not double_backups.exists():
|
|
161
|
-
raise click.ClickException(f"adata_double_backups_path does not exist: {double_backups}")
|
|
162
|
-
|
|
163
|
-
output_path = _resolve_output_path(cfg, single_path, double_path)
|
|
164
|
-
|
|
165
|
-
# Load
|
|
166
|
-
adata_single, read_report_single = _maybe_read_adata("single-barcoded AnnData", single_path, single_backups)
|
|
167
|
-
adata_double, read_report_double = _maybe_read_adata("double-barcoded AnnData", double_path, double_backups)
|
|
168
|
-
|
|
169
|
-
click.echo("Merging AnnDatas ...")
|
|
170
|
-
merged = merge_barcoded_anndatas_core(adata_single, adata_double)
|
|
171
|
-
|
|
172
|
-
click.echo(f"Writing merged AnnData to: {output_path}")
|
|
173
|
-
backup_dir = output_path.cwd() / "merged_backups"
|
|
174
|
-
safe_write_h5ad(merged, output_path, backup=True, backup_dir=backup_dir)
|
|
175
|
-
|
|
176
|
-
click.secho(f"Done. Merged AnnData saved to {output_path}", fg="green")
|
|
177
|
-
|
|
178
|
-
except click.ClickException:
|
|
179
|
-
raise
|
|
180
|
-
except Exception as e:
|
|
181
|
-
# Surface unexpected errors cleanly
|
|
182
|
-
raise click.ClickException(f"Unexpected error: {e}") from e
|
|
183
|
-
|
|
184
|
-
################################################################################################################
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
# fast5_to_pod5
|
|
2
|
-
|
|
3
|
-
def fast5_to_pod5(fast5_dir, output_pod5='FAST5s_to_POD5.pod5'):
|
|
4
|
-
"""
|
|
5
|
-
Convert Nanopore FAST5 files to POD5 file
|
|
6
|
-
|
|
7
|
-
Parameters:
|
|
8
|
-
fast5_dir (str): String representing the file path to a directory containing all FAST5 files to convert into a single POD5 output.
|
|
9
|
-
output_pod5 (str): The name of the output POD5.
|
|
10
|
-
|
|
11
|
-
Returns:
|
|
12
|
-
None
|
|
13
|
-
|
|
14
|
-
"""
|
|
15
|
-
import subprocess
|
|
16
|
-
from pathlib import Path
|
|
17
|
-
|
|
18
|
-
if isinstance(fast5_dir, (list, tuple)):
|
|
19
|
-
cmd = ["pod5", "convert", "fast5"] + fast5_dir + ["--output", output_pod5]
|
|
20
|
-
subprocess.run(cmd)
|
|
21
|
-
elif Path(fast5_dir).is_file():
|
|
22
|
-
subprocess.run(["pod5", "convert", "fast5", fast5_dir, "--output", output_pod5])
|
|
23
|
-
elif Path(fast5_dir).is_dir():
|
|
24
|
-
subprocess.run(["pod5", "convert", "fast5", f".{fast5_dir}*.fast5", "--output", output_pod5])
|
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
from .align_and_sort_BAM import align_and_sort_BAM
|
|
2
|
-
from .aligned_BAM_to_bed import aligned_BAM_to_bed
|
|
3
|
-
from .bam_qc import bam_qc
|
|
4
|
-
from .bed_to_bigwig import bed_to_bigwig
|
|
5
|
-
from .binarize_converted_base_identities import binarize_converted_base_identities
|
|
6
|
-
from .canoncall import canoncall
|
|
7
|
-
from .complement_base_list import complement_base_list
|
|
8
|
-
from .converted_BAM_to_adata_II import converted_BAM_to_adata_II
|
|
9
|
-
from .concatenate_fastqs_to_bam import concatenate_fastqs_to_bam
|
|
10
|
-
from .count_aligned_reads import count_aligned_reads
|
|
11
|
-
from .demux_and_index_BAM import demux_and_index_BAM
|
|
12
|
-
from .discover_input_files import *
|
|
13
|
-
from .extract_base_identities import extract_base_identities
|
|
14
|
-
from .extract_mods import extract_mods
|
|
15
|
-
from .extract_read_features_from_bam import extract_read_features_from_bam
|
|
16
|
-
from .extract_read_lengths_from_bed import extract_read_lengths_from_bed
|
|
17
|
-
from .extract_readnames_from_BAM import extract_readnames_from_BAM
|
|
18
|
-
from .find_conversion_sites import find_conversion_sites
|
|
19
|
-
from .generate_converted_FASTA import convert_FASTA_record, generate_converted_FASTA
|
|
20
|
-
from .get_chromosome_lengths import get_chromosome_lengths
|
|
21
|
-
from .get_native_references import get_native_references
|
|
22
|
-
from .index_fasta import index_fasta
|
|
23
|
-
from .make_dirs import make_dirs
|
|
24
|
-
from .make_modbed import make_modbed
|
|
25
|
-
from .modcall import modcall
|
|
26
|
-
from .modkit_extract_to_adata import modkit_extract_to_adata
|
|
27
|
-
from .modQC import modQC
|
|
28
|
-
from .one_hot_encode import one_hot_encode
|
|
29
|
-
from .ohe_batching import ohe_batching
|
|
30
|
-
from .one_hot_decode import one_hot_decode
|
|
31
|
-
from .ohe_layers_decode import ohe_layers_decode
|
|
32
|
-
from .plot_bed_histograms import plot_bed_histograms
|
|
33
|
-
from .run_multiqc import run_multiqc
|
|
34
|
-
from .separate_bam_by_bc import separate_bam_by_bc
|
|
35
|
-
from .split_and_index_BAM import split_and_index_BAM
|
|
36
|
-
|
|
37
|
-
__all__ = [
|
|
38
|
-
"align_and_sort_BAM",
|
|
39
|
-
"aligned_BAM_to_bed",
|
|
40
|
-
"bam_qc",
|
|
41
|
-
"bed_to_bigwig",
|
|
42
|
-
"binarize_converted_base_identities",
|
|
43
|
-
"canoncall",
|
|
44
|
-
"complement_base_list",
|
|
45
|
-
"converted_BAM_to_adata_II",
|
|
46
|
-
"concatenate_fastqs_to_bam",
|
|
47
|
-
"count_aligned_reads",
|
|
48
|
-
"demux_and_index_BAM",
|
|
49
|
-
"extract_base_identities",
|
|
50
|
-
"extract_mods",
|
|
51
|
-
"extract_read_features_from_bam",
|
|
52
|
-
"extract_read_lengths_from_bed",
|
|
53
|
-
"extract_readnames_from_BAM",
|
|
54
|
-
"find_conversion_sites",
|
|
55
|
-
"convert_FASTA_record",
|
|
56
|
-
"generate_converted_FASTA",
|
|
57
|
-
"get_chromosome_lengths",
|
|
58
|
-
"get_native_references",
|
|
59
|
-
"index_fasta",
|
|
60
|
-
"make_dirs",
|
|
61
|
-
"make_modbed",
|
|
62
|
-
"modcall",
|
|
63
|
-
"modkit_extract_to_adata",
|
|
64
|
-
"modQC",
|
|
65
|
-
"one_hot_encode",
|
|
66
|
-
"ohe_batching",
|
|
67
|
-
"one_hot_decode",
|
|
68
|
-
"ohe_layers_decode",
|
|
69
|
-
"plot_bed_histograms",
|
|
70
|
-
"run_multiqc",
|
|
71
|
-
"separate_bam_by_bc",
|
|
72
|
-
"split_and_index_BAM"
|
|
73
|
-
]
|
|
@@ -1,86 +0,0 @@
|
|
|
1
|
-
## align_and_sort_BAM
|
|
2
|
-
|
|
3
|
-
def align_and_sort_BAM(fasta,
|
|
4
|
-
input,
|
|
5
|
-
bam_suffix='.bam',
|
|
6
|
-
output_directory='aligned_outputs',
|
|
7
|
-
make_bigwigs=False,
|
|
8
|
-
threads=None,
|
|
9
|
-
aligner='minimap2',
|
|
10
|
-
aligner_args=['-a', '-x', 'map-ont', '--MD', '-Y', '-y', '-N', '5', '--secondary=no']):
|
|
11
|
-
"""
|
|
12
|
-
A wrapper for running dorado aligner and samtools functions
|
|
13
|
-
|
|
14
|
-
Parameters:
|
|
15
|
-
fasta (str): File path to the reference genome to align to.
|
|
16
|
-
input (str): File path to the basecalled file to align. Works for .bam and .fastq files
|
|
17
|
-
bam_suffix (str): The suffix to use for the BAM file.
|
|
18
|
-
output_directory (str): A file path to the directory to output all the analyses.
|
|
19
|
-
make_bigwigs (bool): Whether to make bigwigs
|
|
20
|
-
threads (int): Number of additional threads to use
|
|
21
|
-
aligner (str): Aligner to use. minimap2 and dorado options
|
|
22
|
-
aligner_args (list): list of optional parameters to use for the alignment
|
|
23
|
-
|
|
24
|
-
Returns:
|
|
25
|
-
None
|
|
26
|
-
The function writes out files for: 1) An aligned BAM, 2) and aligned_sorted BAM, 3) an index file for the aligned_sorted BAM, 4) A bed file for the aligned_sorted BAM, 5) A text file containing read names in the aligned_sorted BAM
|
|
27
|
-
"""
|
|
28
|
-
import subprocess
|
|
29
|
-
import os
|
|
30
|
-
|
|
31
|
-
input_basename = os.path.basename(input)
|
|
32
|
-
input_suffix = '.' + input_basename.split('.')[1]
|
|
33
|
-
input_as_fastq = input_basename.split('.')[0] + '.fastq'
|
|
34
|
-
|
|
35
|
-
output_path_minus_suffix = os.path.join(output_directory, input_basename.split(input_suffix)[0])
|
|
36
|
-
|
|
37
|
-
aligned_BAM=f"{output_path_minus_suffix}_aligned"
|
|
38
|
-
aligned_sorted_BAM=f"{aligned_BAM}_sorted"
|
|
39
|
-
aligned_output = aligned_BAM + bam_suffix
|
|
40
|
-
aligned_sorted_output = aligned_sorted_BAM + bam_suffix
|
|
41
|
-
|
|
42
|
-
if threads:
|
|
43
|
-
threads = str(threads)
|
|
44
|
-
else:
|
|
45
|
-
pass
|
|
46
|
-
|
|
47
|
-
if aligner == 'minimap2':
|
|
48
|
-
print(f"Converting BAM to FASTQ: {input}")
|
|
49
|
-
bam_to_fastq_command = ['samtools', 'fastq', input]
|
|
50
|
-
subprocess.run(bam_to_fastq_command, stdout=open(input_as_fastq, "w"))
|
|
51
|
-
print(f"Aligning FASTQ to Reference: {input_as_fastq}")
|
|
52
|
-
if threads:
|
|
53
|
-
minimap_command = ['minimap2'] + aligner_args + ['-t', threads, fasta, input_as_fastq]
|
|
54
|
-
else:
|
|
55
|
-
minimap_command = ['minimap2'] + aligner_args + [fasta, input_as_fastq]
|
|
56
|
-
subprocess.run(minimap_command, stdout=open(aligned_output, "w"))
|
|
57
|
-
os.remove(input_as_fastq)
|
|
58
|
-
|
|
59
|
-
elif aligner == 'dorado':
|
|
60
|
-
# Run dorado aligner
|
|
61
|
-
print(f"Aligning BAM to Reference: {input}")
|
|
62
|
-
if threads:
|
|
63
|
-
alignment_command = ["dorado", "aligner", "-t", threads] + aligner_args + [fasta, input]
|
|
64
|
-
else:
|
|
65
|
-
alignment_command = ["dorado", "aligner"] + aligner_args + [fasta, input]
|
|
66
|
-
subprocess.run(alignment_command, stdout=open(aligned_output, "w"))
|
|
67
|
-
|
|
68
|
-
else:
|
|
69
|
-
print(f'Aligner not recognized: {aligner}. Choose from minimap2 and dorado')
|
|
70
|
-
return
|
|
71
|
-
|
|
72
|
-
# Sort the BAM on positional coordinates
|
|
73
|
-
print(f"Sorting BAM: {aligned_output}")
|
|
74
|
-
if threads:
|
|
75
|
-
sort_command = ["samtools", "sort", "-@", threads, "-o", aligned_sorted_output, aligned_output]
|
|
76
|
-
else:
|
|
77
|
-
sort_command = ["samtools", "sort", "-o", aligned_sorted_output, aligned_output]
|
|
78
|
-
subprocess.run(sort_command)
|
|
79
|
-
|
|
80
|
-
# Create a BAM index file
|
|
81
|
-
print(f"Indexing BAM: {aligned_sorted_output}")
|
|
82
|
-
if threads:
|
|
83
|
-
index_command = ["samtools", "index", "-@", threads, aligned_sorted_output]
|
|
84
|
-
else:
|
|
85
|
-
index_command = ["samtools", "index", aligned_sorted_output]
|
|
86
|
-
subprocess.run(index_command)
|
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
## bam_qc
|
|
2
|
-
|
|
3
|
-
def bam_qc(bam_files, bam_qc_dir, threads, modality, stats=True, flagstats=True, idxstats=True):
|
|
4
|
-
"""
|
|
5
|
-
Performs QC on BAM files by running samtools stats, flagstat, and idxstats.
|
|
6
|
-
|
|
7
|
-
Parameters:
|
|
8
|
-
- bam_files: List of BAM file paths.
|
|
9
|
-
- bam_qc_dir: Directory to save QC reports.
|
|
10
|
-
- threads: Number threads to use.
|
|
11
|
-
- modality: 'conversion' or 'direct' (affects processing mode).
|
|
12
|
-
- stats: Run `samtools stats` if True.
|
|
13
|
-
- flagstats: Run `samtools flagstat` if True.
|
|
14
|
-
- idxstats: Run `samtools idxstats` if True.
|
|
15
|
-
"""
|
|
16
|
-
import os
|
|
17
|
-
import subprocess
|
|
18
|
-
|
|
19
|
-
# Ensure the QC output directory exists
|
|
20
|
-
os.makedirs(bam_qc_dir, exist_ok=True)
|
|
21
|
-
|
|
22
|
-
if threads:
|
|
23
|
-
threads = str(threads)
|
|
24
|
-
else:
|
|
25
|
-
pass
|
|
26
|
-
|
|
27
|
-
for bam in bam_files:
|
|
28
|
-
bam_name = os.path.basename(bam).replace(".bam", "") # Extract filename without extension
|
|
29
|
-
|
|
30
|
-
# Run samtools QC commands based on selected options
|
|
31
|
-
if stats:
|
|
32
|
-
stats_out = os.path.join(bam_qc_dir, f"{bam_name}_stats.txt")
|
|
33
|
-
if threads:
|
|
34
|
-
command = ["samtools", "stats", "-@", threads, bam]
|
|
35
|
-
else:
|
|
36
|
-
command = ["samtools", "stats", bam]
|
|
37
|
-
print(f"Running: {' '.join(command)} > {stats_out}")
|
|
38
|
-
with open(stats_out, "w") as out_file:
|
|
39
|
-
subprocess.run(command, stdout=out_file)
|
|
40
|
-
|
|
41
|
-
if flagstats:
|
|
42
|
-
flagstats_out = os.path.join(bam_qc_dir, f"{bam_name}_flagstat.txt")
|
|
43
|
-
if threads:
|
|
44
|
-
command = ["samtools", "flagstat", "-@", threads, bam]
|
|
45
|
-
else:
|
|
46
|
-
command = ["samtools", "flagstat", bam]
|
|
47
|
-
print(f"Running: {' '.join(command)} > {flagstats_out}")
|
|
48
|
-
with open(flagstats_out, "w") as out_file:
|
|
49
|
-
subprocess.run(command, stdout=out_file)
|
|
50
|
-
|
|
51
|
-
if idxstats:
|
|
52
|
-
idxstats_out = os.path.join(bam_qc_dir, f"{bam_name}_idxstats.txt")
|
|
53
|
-
if threads:
|
|
54
|
-
command = ["samtools", "idxstats", "-@", threads, bam]
|
|
55
|
-
else:
|
|
56
|
-
command = ["samtools", "idxstats", bam]
|
|
57
|
-
print(f"Running: {' '.join(command)} > {idxstats_out}")
|
|
58
|
-
with open(idxstats_out, "w") as out_file:
|
|
59
|
-
subprocess.run(command, stdout=out_file)
|
|
60
|
-
|
|
61
|
-
if modality == 'conversion':
|
|
62
|
-
pass
|
|
63
|
-
elif modality == 'direct':
|
|
64
|
-
pass
|
|
65
|
-
|
|
66
|
-
print("QC processing completed.")
|