smftools 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. smftools/__init__.py +2 -6
  2. smftools/_version.py +1 -1
  3. smftools/cli/__init__.py +0 -0
  4. smftools/cli/cli_flows.py +94 -0
  5. smftools/cli/hmm_adata.py +338 -0
  6. smftools/cli/load_adata.py +577 -0
  7. smftools/cli/preprocess_adata.py +363 -0
  8. smftools/cli/spatial_adata.py +564 -0
  9. smftools/cli_entry.py +435 -0
  10. smftools/config/conversion.yaml +11 -6
  11. smftools/config/deaminase.yaml +12 -7
  12. smftools/config/default.yaml +36 -25
  13. smftools/config/direct.yaml +25 -1
  14. smftools/config/discover_input_files.py +115 -0
  15. smftools/config/experiment_config.py +109 -12
  16. smftools/informatics/__init__.py +13 -7
  17. smftools/informatics/archived/fast5_to_pod5.py +43 -0
  18. smftools/informatics/archived/helpers/archived/__init__.py +71 -0
  19. smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py +126 -0
  20. smftools/informatics/{helpers → archived/helpers/archived}/aligned_BAM_to_bed.py +6 -4
  21. smftools/informatics/archived/helpers/archived/bam_qc.py +213 -0
  22. smftools/informatics/archived/helpers/archived/bed_to_bigwig.py +90 -0
  23. smftools/informatics/archived/helpers/archived/concatenate_fastqs_to_bam.py +259 -0
  24. smftools/informatics/{helpers → archived/helpers/archived}/count_aligned_reads.py +2 -2
  25. smftools/informatics/{helpers → archived/helpers/archived}/demux_and_index_BAM.py +8 -10
  26. smftools/informatics/{helpers → archived/helpers/archived}/extract_base_identities.py +1 -1
  27. smftools/informatics/{helpers → archived/helpers/archived}/extract_mods.py +15 -13
  28. smftools/informatics/{helpers → archived/helpers/archived}/generate_converted_FASTA.py +2 -0
  29. smftools/informatics/{helpers → archived/helpers/archived}/get_chromosome_lengths.py +9 -8
  30. smftools/informatics/archived/helpers/archived/index_fasta.py +24 -0
  31. smftools/informatics/{helpers → archived/helpers/archived}/make_modbed.py +1 -2
  32. smftools/informatics/{helpers → archived/helpers/archived}/modQC.py +2 -2
  33. smftools/informatics/{helpers → archived/helpers/archived}/plot_bed_histograms.py +0 -19
  34. smftools/informatics/{helpers → archived/helpers/archived}/separate_bam_by_bc.py +6 -5
  35. smftools/informatics/{helpers → archived/helpers/archived}/split_and_index_BAM.py +7 -7
  36. smftools/informatics/archived/subsample_fasta_from_bed.py +49 -0
  37. smftools/informatics/bam_functions.py +812 -0
  38. smftools/informatics/basecalling.py +67 -0
  39. smftools/informatics/bed_functions.py +366 -0
  40. smftools/informatics/{helpers/converted_BAM_to_adata_II.py → converted_BAM_to_adata.py} +42 -30
  41. smftools/informatics/fasta_functions.py +255 -0
  42. smftools/informatics/h5ad_functions.py +197 -0
  43. smftools/informatics/{helpers/modkit_extract_to_adata.py → modkit_extract_to_adata.py} +142 -59
  44. smftools/informatics/modkit_functions.py +129 -0
  45. smftools/informatics/ohe.py +160 -0
  46. smftools/informatics/pod5_functions.py +224 -0
  47. smftools/informatics/{helpers/run_multiqc.py → run_multiqc.py} +5 -2
  48. smftools/plotting/autocorrelation_plotting.py +1 -3
  49. smftools/plotting/general_plotting.py +1037 -362
  50. smftools/preprocessing/__init__.py +2 -0
  51. smftools/preprocessing/append_base_context.py +3 -3
  52. smftools/preprocessing/append_binary_layer_by_base_context.py +4 -4
  53. smftools/preprocessing/binarize.py +17 -0
  54. smftools/preprocessing/binarize_on_Youden.py +2 -2
  55. smftools/preprocessing/calculate_position_Youden.py +1 -1
  56. smftools/preprocessing/calculate_read_modification_stats.py +1 -1
  57. smftools/preprocessing/filter_reads_on_modification_thresholds.py +19 -19
  58. smftools/preprocessing/flag_duplicate_reads.py +1 -1
  59. smftools/readwrite.py +266 -140
  60. {smftools-0.2.1.dist-info → smftools-0.2.3.dist-info}/METADATA +10 -9
  61. {smftools-0.2.1.dist-info → smftools-0.2.3.dist-info}/RECORD +82 -70
  62. smftools-0.2.3.dist-info/entry_points.txt +2 -0
  63. smftools/cli.py +0 -184
  64. smftools/informatics/fast5_to_pod5.py +0 -24
  65. smftools/informatics/helpers/__init__.py +0 -73
  66. smftools/informatics/helpers/align_and_sort_BAM.py +0 -86
  67. smftools/informatics/helpers/bam_qc.py +0 -66
  68. smftools/informatics/helpers/bed_to_bigwig.py +0 -39
  69. smftools/informatics/helpers/concatenate_fastqs_to_bam.py +0 -378
  70. smftools/informatics/helpers/discover_input_files.py +0 -100
  71. smftools/informatics/helpers/index_fasta.py +0 -12
  72. smftools/informatics/helpers/make_dirs.py +0 -21
  73. smftools/informatics/readwrite.py +0 -106
  74. smftools/informatics/subsample_fasta_from_bed.py +0 -47
  75. smftools/load_adata.py +0 -1346
  76. smftools-0.2.1.dist-info/entry_points.txt +0 -2
  77. /smftools/informatics/{basecall_pod5s.py → archived/basecall_pod5s.py} +0 -0
  78. /smftools/informatics/{helpers → archived/helpers/archived}/canoncall.py +0 -0
  79. /smftools/informatics/{helpers → archived/helpers/archived}/converted_BAM_to_adata.py +0 -0
  80. /smftools/informatics/{helpers → archived/helpers/archived}/extract_read_features_from_bam.py +0 -0
  81. /smftools/informatics/{helpers → archived/helpers/archived}/extract_read_lengths_from_bed.py +0 -0
  82. /smftools/informatics/{helpers → archived/helpers/archived}/extract_readnames_from_BAM.py +0 -0
  83. /smftools/informatics/{helpers → archived/helpers/archived}/find_conversion_sites.py +0 -0
  84. /smftools/informatics/{helpers → archived/helpers/archived}/get_native_references.py +0 -0
  85. /smftools/informatics/{helpers → archived/helpers}/archived/informatics.py +0 -0
  86. /smftools/informatics/{helpers → archived/helpers}/archived/load_adata.py +0 -0
  87. /smftools/informatics/{helpers → archived/helpers/archived}/modcall.py +0 -0
  88. /smftools/informatics/{helpers → archived/helpers/archived}/ohe_batching.py +0 -0
  89. /smftools/informatics/{helpers → archived/helpers/archived}/ohe_layers_decode.py +0 -0
  90. /smftools/informatics/{helpers → archived/helpers/archived}/one_hot_decode.py +0 -0
  91. /smftools/informatics/{helpers → archived/helpers/archived}/one_hot_encode.py +0 -0
  92. /smftools/informatics/{subsample_pod5.py → archived/subsample_pod5.py} +0 -0
  93. /smftools/informatics/{helpers/binarize_converted_base_identities.py → binarize_converted_base_identities.py} +0 -0
  94. /smftools/informatics/{helpers/complement_base_list.py → complement_base_list.py} +0 -0
  95. {smftools-0.2.1.dist-info → smftools-0.2.3.dist-info}/WHEEL +0 -0
  96. {smftools-0.2.1.dist-info → smftools-0.2.3.dist-info}/licenses/LICENSE +0 -0
@@ -1,15 +1,21 @@
1
- smftools/__init__.py,sha256=OXW2_b5NUGZhTXsH8qY0PzfJnaz8T2y6lCqMnSVSuIk,676
1
+ smftools/__init__.py,sha256=aZlrZBVexf_nEnzQeZu7NU_Kp6OnxcYpLo1KPImi7sI,599
2
2
  smftools/_settings.py,sha256=Ed8lzKUA5ncq5ZRfSp0t6_rphEEjMxts6guttwTZP5Y,409
3
- smftools/_version.py,sha256=tC9CwL4Nm8brVXJnZNGk_eoZaJj6eOtLKtOrdJMrpoI,21
4
- smftools/cli.py,sha256=MNObu279y322JHkmugssM0rVHo0UQ1zboTG9MlqnMgQ,7033
5
- smftools/load_adata.py,sha256=VJMUBqRC8InIj48JMnkZKLuqEz1u8uSTNx_ARl0cn7M,74313
6
- smftools/readwrite.py,sha256=ObNxBj6Y_zIHqQpAvmHAddAypLjg7F3qARF-sH-V3do,42706
3
+ smftools/_version.py,sha256=X0PliCRFAeVnSTceUeHX1eM0j1HFhGFDWCRxLdde2Bs,21
4
+ smftools/cli_entry.py,sha256=_QdtEKcVK5o-e5s9ETB9sOIdftPVlrDxvvjBKcP6YNk,14680
5
+ smftools/readwrite.py,sha256=ExKZHNZ0QB-PtSck08drXfHTqbPeSUTHiYhv951SH1s,45994
6
+ smftools/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ smftools/cli/cli_flows.py,sha256=xRiFUThoAL3LX1xdXaHVg4LjyJI4uNpGsc9aQ_wVCto,4941
8
+ smftools/cli/hmm_adata.py,sha256=PApUJW0lO4kcLjsiqqQopXgL3Dg-AascIqJrgvSY1Rg,15916
9
+ smftools/cli/load_adata.py,sha256=Qt1ej-osyJ47fpBkGaSDgR1F8E4aBNAdcXeBAGM-Lqg,29100
10
+ smftools/cli/preprocess_adata.py,sha256=EKGbSTli7qvL44OQUmMalYJjsH9vn3w4Rx7U7BL0ybs,20991
11
+ smftools/cli/spatial_adata.py,sha256=AX6iyBfbXud9actteTvDuaQUU_SE3SyBIeknR317g34,30212
7
12
  smftools/config/__init__.py,sha256=ObUnnR7aRSoD_uvpmsxA_BUFt4NOOfWNopDVCqjp7tg,69
8
- smftools/config/conversion.yaml,sha256=rJGhrVd95p6_6OVxLq2lvobJu8SGzNYI80jU0fLeK_g,795
9
- smftools/config/deaminase.yaml,sha256=Vh3Wg0bCb88S20Ob-8zi3eQJ1g_pcBulR9pPbAX9U1o,1138
10
- smftools/config/default.yaml,sha256=0DYIvvdbzoB2eJgsoxEzx4Rc0TVGaiHa85nxo1VwCqQ,9704
11
- smftools/config/direct.yaml,sha256=2F_fGploWW3f88Y7sTZ68Vk9fgNaO-sb5AK-Cutc2TQ,735
12
- smftools/config/experiment_config.py,sha256=zQhWaag9hPuexnTOqZ-Od--c3iHs18c4Wc2sU-LOyts,52872
13
+ smftools/config/conversion.yaml,sha256=HrFz2f9QRe1RuhmgU6ZtMHaM4ZzY61_aLcugsmpV40Q,969
14
+ smftools/config/deaminase.yaml,sha256=mw2aY222y2xg08Rs5CWvjlrXo3vaEim7JwBThA80y4o,1349
15
+ smftools/config/default.yaml,sha256=3IrX0OrUyjhVc3CqTjM8uiprKWrrBdVtil4YhtVzKdQ,10233
16
+ smftools/config/direct.yaml,sha256=SBhdtG7PKm-z5xxQmA7JV3NQsGnUJ4p58fGH8BnoMrM,2137
17
+ smftools/config/discover_input_files.py,sha256=G9vyAmK_n_8Ur5dOnumevVLG3ydHchMy_JQrJdiuuz0,3892
18
+ smftools/config/experiment_config.py,sha256=d_6f_Uv3CY-1orHbxpHtAZDsY2gwxw079_pNgR9wDUg,58837
13
19
  smftools/datasets/F1_hybrid_NKG2A_enhander_promoter_GpC_conversion_SMF.h5ad.gz,sha256=q6wJtgFRDln0o20XNCx1qad3lwcdCoylqPN7wskTfI8,2926497
14
20
  smftools/datasets/F1_sample_sheet.csv,sha256=9PodIIOXK2eamYPbC6DGnXdzgi9bRDovf296j1aM0ak,259
15
21
  smftools/datasets/__init__.py,sha256=xkSTlPuakVYVCuRurif9BceNBDt6bsngJvvjI8757QI,142
@@ -24,58 +30,63 @@ smftools/hmm/display_hmm.py,sha256=3WuQCPvM3wPfzAdgbhfiBTd0g5mQdx9HTUdqAxs2aj4,8
24
30
  smftools/hmm/hmm_readwrite.py,sha256=DjJ3hunpBQ7N0GVvxL7-0QUas_SkA88LVgL72mVK2cI,359
25
31
  smftools/hmm/nucleosome_hmm_refinement.py,sha256=nQWimvse6dclcXhbU707rGbRVMKHM0mU_ZhH9g2yCMA,4641
26
32
  smftools/hmm/train_hmm.py,sha256=srzRcB9LEmNuHyBM0R5Z0VEnxecifQt-MoaJhADxGT8,2477
27
- smftools/informatics/__init__.py,sha256=8tvVG08L_Z-bP28PusBtVt1UTnHxuKi0lImLNcP7qso,338
28
- smftools/informatics/basecall_pod5s.py,sha256=Ynmxscsxj6qp-zVY0RWodq513oDuHDaHnpqoepB3RUU,3930
29
- smftools/informatics/fast5_to_pod5.py,sha256=h-cUZX5sWwPCkQ4g3kyz3koSBjZOWI6EjSpWO8zib1I,862
30
- smftools/informatics/readwrite.py,sha256=DgVisHYdkjzaO7suPbUvluImeTc3jqGDlioNveHUxPc,4158
31
- smftools/informatics/subsample_fasta_from_bed.py,sha256=YqYV09rvEQdeiS5hTTrKa8xYmJfeM3Vk-UUqwpw0qBk,1983
32
- smftools/informatics/subsample_pod5.py,sha256=zDw9tRcrFRmPI62xkcy9dh8IfsJcuYm7R-FVeBC_g3s,4701
33
+ smftools/informatics/__init__.py,sha256=vLvSrCtCVYRUCCNLW7fL3ltPr3h_w8FhT--V6el3ZkQ,1191
34
+ smftools/informatics/bam_functions.py,sha256=otgl3TRPLn5Fnsx1jXX75du90k3XB3RHGzlfamvETsU,32670
35
+ smftools/informatics/basecalling.py,sha256=jc39jneaa8Gt1azutHgBGWHqCoPeTVSGBu3kyQwP7xM,3460
36
+ smftools/informatics/bed_functions.py,sha256=uETVxT5mRWDNn7t0OqhDi8kDiq7uDakeHB1L2JsP4PA,13377
37
+ smftools/informatics/binarize_converted_base_identities.py,sha256=yOepGaNBGfZJEsMiLRwKauvsmaHn_JRrxaGp8LmKAXs,7778
38
+ smftools/informatics/complement_base_list.py,sha256=k6EkLtxFoajaIufxw1p0pShJ2nPHyGLTbzZmIFFjB4o,532
39
+ smftools/informatics/converted_BAM_to_adata.py,sha256=Y2kQNWly0WjjGN9El9zL1nLfjVxmPLWONvX5VNgZUh0,22554
40
+ smftools/informatics/fasta_functions.py,sha256=5IfTkX_GIj5gRJB9PjL_WjyEktpBHwGsmS_nnO1ETjI,9790
41
+ smftools/informatics/h5ad_functions.py,sha256=iAOxJjhaDslTUC78kjUHlCELigDl73sWo0fvXcKuFoI,7824
42
+ smftools/informatics/modkit_extract_to_adata.py,sha256=TrgrL_IgfqzNJ9qZ_2EvF_B38_Syw8mP38Sl7v0Riwo,55278
43
+ smftools/informatics/modkit_functions.py,sha256=lywjeqAJ7Cdd7k-0P3YaL_9cAZvEDTDLh91rIRcSMWE,5604
44
+ smftools/informatics/ohe.py,sha256=MEmh3ps-ZSSyXuIrr5LMzQvCsDJRCYiy7JS-WD4TlYs,5805
45
+ smftools/informatics/pod5_functions.py,sha256=vxwhD_d_iWpJydIpbf0uce7VGHm8sBnCwb7tLNpYBc8,9859
46
+ smftools/informatics/run_multiqc.py,sha256=n6LvQuGQpLfsutVGmgvHfV0SV5PqTQ8wa_SeKOjRssM,1052
33
47
  smftools/informatics/archived/bam_conversion.py,sha256=I8EzXjQixMmqx2oWnoNSH5NURBhfT-krbWHkoi_M964,3330
34
48
  smftools/informatics/archived/bam_direct.py,sha256=jbEFtUIiUR8Wlp3po_sWkr19AUNS9WZjglojb9j28vo,3606
49
+ smftools/informatics/archived/basecall_pod5s.py,sha256=Ynmxscsxj6qp-zVY0RWodq513oDuHDaHnpqoepB3RUU,3930
35
50
  smftools/informatics/archived/basecalls_to_adata.py,sha256=-Nag6lr_NAtU4t8jo0GSMdgIAIfmDge-5VEUPQbEatE,3692
36
51
  smftools/informatics/archived/conversion_smf.py,sha256=QhlISVi3Z-XqFKyDG_CenLojovAt5-ZhuVe9hus36lg,7177
37
52
  smftools/informatics/archived/deaminase_smf.py,sha256=mNeg1mIYYVLIiW8powEpz0CqrGRDsrmY5-aoIgwMGHs,7221
38
53
  smftools/informatics/archived/direct_smf.py,sha256=ylPGFBvRLdxLHeDJjAwq98j8Q8_lfGK3k5JJnQxrwJw,7485
54
+ smftools/informatics/archived/fast5_to_pod5.py,sha256=TRG_FYYGCGWUPzZCt0ZqzB8gQv_HKvkssp9nTctWzXU,1398
39
55
  smftools/informatics/archived/print_bam_query_seq.py,sha256=8Z2ZJEOOlfWYUXiZGjteLWU4yTgvV8KQzEIBHUmamGM,838
40
- smftools/informatics/helpers/__init__.py,sha256=EgCIcJ6o3_R3vzsFwhtvOcKKWnmmMmN_GZXDQ_K_-NI,2693
41
- smftools/informatics/helpers/align_and_sort_BAM.py,sha256=gy_BU6KfDd584LPFybJ7JzNwfCD95dZXx6MccnT4Qro,3725
42
- smftools/informatics/helpers/aligned_BAM_to_bed.py,sha256=e6yg5-yHcw0QPFI3oRVHrhfAUj7US77Ir2VVzE3c-x8,3374
43
- smftools/informatics/helpers/bam_qc.py,sha256=IlrXXpCdTYIv_89SE8D5tJ1wtTzxWGjk9vc-rbC1UjU,2430
44
- smftools/informatics/helpers/bed_to_bigwig.py,sha256=AazYEZzKgKgukSFwCpeiApzxh1kbt11X4RFqRIiBIaY,1466
45
- smftools/informatics/helpers/binarize_converted_base_identities.py,sha256=yOepGaNBGfZJEsMiLRwKauvsmaHn_JRrxaGp8LmKAXs,7778
46
- smftools/informatics/helpers/canoncall.py,sha256=5WS6lwukc_xYTdPQy0OSj-WLbx0Rg70Cun1lCucY7w8,1741
47
- smftools/informatics/helpers/complement_base_list.py,sha256=k6EkLtxFoajaIufxw1p0pShJ2nPHyGLTbzZmIFFjB4o,532
48
- smftools/informatics/helpers/concatenate_fastqs_to_bam.py,sha256=0jy4H1ORuqaarsznv9tS1SM8CCRjaaD20NMknNvQPv0,16212
49
- smftools/informatics/helpers/converted_BAM_to_adata.py,sha256=sRmOtn0kNosLYfogqslDHg1Azk51l6nfNOLgQOnQjlA,14591
50
- smftools/informatics/helpers/converted_BAM_to_adata_II.py,sha256=9Tz-qWtK9v1DTlK6yManvhOlMcaHFQUmmrRZQ5eiECw,22229
51
- smftools/informatics/helpers/count_aligned_reads.py,sha256=uYyUYglF1asiaoxr-LKxPMUEbfyD7FS-dumTg2hJHzQ,2170
52
- smftools/informatics/helpers/demux_and_index_BAM.py,sha256=2B_UiU05ln3gYvcN9aC_w6qs8j_WAF4pHWZekAYsXm4,2114
53
- smftools/informatics/helpers/discover_input_files.py,sha256=hUes2iKBQW_sVmAYD-1JnLD9Ub-COEHzrRKWNFipl0g,3725
54
- smftools/informatics/helpers/extract_base_identities.py,sha256=2yvr5uff9ah0jylFjNMt7oRJb1z_YdhvM6htSxI0frg,3038
55
- smftools/informatics/helpers/extract_mods.py,sha256=MbSIiyj3zx7WlSSWMRPriLMkBtxYc1EWZiAAirMVgqA,3865
56
- smftools/informatics/helpers/extract_read_features_from_bam.py,sha256=SYAb4Q1HxiJzCx5bIz86MdH_TvVPsRAVodZD9082HGY,1491
57
- smftools/informatics/helpers/extract_read_lengths_from_bed.py,sha256=Cw39wgp1eBTV45Wk1l0c9l-upBW5N2OcgyWXTAXln90,678
58
- smftools/informatics/helpers/extract_readnames_from_BAM.py,sha256=3FxSNqbZ1VsOK2RfHrvevQTzhWATf5E8bZ5yVOqayvk,759
59
- smftools/informatics/helpers/find_conversion_sites.py,sha256=JPlDipmzeCBkV_T6esGD5ptwmbQmk8gJMTh7NMaSYd4,2480
60
- smftools/informatics/helpers/generate_converted_FASTA.py,sha256=UniQfERNt4FC5L8T1tzr4cLQOJc3wMBPhuWmC-lC8Fs,3747
61
- smftools/informatics/helpers/get_chromosome_lengths.py,sha256=sLumLrGsU_Xg_oJcdOpQyjUGpJoT2HbcmxWwbwzXUlE,1036
62
- smftools/informatics/helpers/get_native_references.py,sha256=fRuyEm9UJkfd5DwHmFb1bxEtNvtSI1_BxGRmrCymGkw,981
63
- smftools/informatics/helpers/index_fasta.py,sha256=N3IErfSiavYldeaat8xcQgA1MpykoQHcE0gHUeWuClE,267
64
- smftools/informatics/helpers/make_dirs.py,sha256=lWHXpwC76MFM5sSme9i_WeYUaxutzybendokhny03ds,537
65
- smftools/informatics/helpers/make_modbed.py,sha256=cOQ97gPfRiCcw_fqboxousXIiOYjp78IFYLbu749U1Y,939
66
- smftools/informatics/helpers/modQC.py,sha256=LeOBObG8gAVVdgESIMceYhd5AW1gfN7ABo91OQtOzTM,1041
67
- smftools/informatics/helpers/modcall.py,sha256=LVPrdMNVp2gyQTJ4BNp8NJNm89AueDjsKaY7Gqkluho,1777
68
- smftools/informatics/helpers/modkit_extract_to_adata.py,sha256=yjG_Onh6YgwpE11I8mgQyN6F-8yStJgvDcI38L13i4A,52098
69
- smftools/informatics/helpers/ohe_batching.py,sha256=QVOiyl9fYHNIFWM23afYnQo0uaOjf1NR3ASKGVSrmuw,2975
70
- smftools/informatics/helpers/ohe_layers_decode.py,sha256=gIgUC9L8TFLi-fTnjR4PRzXdUaH5D6WL2Hump6XOoy0,1042
71
- smftools/informatics/helpers/one_hot_decode.py,sha256=3n4rzY8_aC9YKmgrftsguMsH7fUyQ-DbWmrOYF6la9s,906
72
- smftools/informatics/helpers/one_hot_encode.py,sha256=5hHigA6-SZLK84WH_RHo06F_6aTg7S3TJgvSr8gxGX8,1968
73
- smftools/informatics/helpers/plot_bed_histograms.py,sha256=sdtz_ieU_5rz8WyfAzjxbzY_w8kLdE_Rklvjax1hl3Q,10442
74
- smftools/informatics/helpers/run_multiqc.py,sha256=qkw48DeBdTEqzhKFGjMUlvNmTehp8wRPkcxdkwERkHc,980
75
- smftools/informatics/helpers/separate_bam_by_bc.py,sha256=WJZwKCYODUvzFaVWwX3SUE8sxEXmeYmSi7Dl9h2J2EY,1802
76
- smftools/informatics/helpers/split_and_index_BAM.py,sha256=yowMusTGoC7uRD0jAwOHzBegX6MV7f-uY-XSzkX5cBw,1253
77
- smftools/informatics/helpers/archived/informatics.py,sha256=gKb2ZJ_LcAeEXuQqn9e-QDF_sS4tMpMTr2vZlqa7n54,14572
78
- smftools/informatics/helpers/archived/load_adata.py,sha256=DhvYYqO9VLsZqhL1WjN9sd-e3fgvdXGlgTP18z1h0L0,33654
56
+ smftools/informatics/archived/subsample_fasta_from_bed.py,sha256=7YTKhXg_mtP4KWpnD-TB4nuFEL4crOa9_d84IJKllyQ,1633
57
+ smftools/informatics/archived/subsample_pod5.py,sha256=zDw9tRcrFRmPI62xkcy9dh8IfsJcuYm7R-FVeBC_g3s,4701
58
+ smftools/informatics/archived/helpers/archived/__init__.py,sha256=DiiBerFJAxZeG5y0ScpJSaVBJ8b4XWdfEJCh8Q7k8jU,2783
59
+ smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py,sha256=yaRfhQDh3HpsSTme6QnSqBgElCS0kv2G6TunhvR1weY,5493
60
+ smftools/informatics/archived/helpers/archived/aligned_BAM_to_bed.py,sha256=N3NAOaoSt_M4V48vtTP_m_iF1tRuNIPS_uNJ3Y0IA4E,3391
61
+ smftools/informatics/archived/helpers/archived/bam_qc.py,sha256=PWl3dViCHGOcjB4UKkxBFz34Gc0PXHVTHjpYVNckVH0,7975
62
+ smftools/informatics/archived/helpers/archived/bed_to_bigwig.py,sha256=Bg9wFsavUU9Ha57n_99vYlYpVcbDUz3tLtYJ7ZFVR9k,2986
63
+ smftools/informatics/archived/helpers/archived/canoncall.py,sha256=5WS6lwukc_xYTdPQy0OSj-WLbx0Rg70Cun1lCucY7w8,1741
64
+ smftools/informatics/archived/helpers/archived/concatenate_fastqs_to_bam.py,sha256=6GTHXG1dfaC8rBin5NthG3xgyGqOsT6wIGxJVCmCq58,9774
65
+ smftools/informatics/archived/helpers/archived/converted_BAM_to_adata.py,sha256=sRmOtn0kNosLYfogqslDHg1Azk51l6nfNOLgQOnQjlA,14591
66
+ smftools/informatics/archived/helpers/archived/count_aligned_reads.py,sha256=ZF_kkzAf1RvM4PwDYhxD36UiuVuMM_MBvZgiXom1NQ0,2176
67
+ smftools/informatics/archived/helpers/archived/demux_and_index_BAM.py,sha256=KmU7nqGQ-MfDrp8h3txbToGn4h95Rkvg0WEiuext-vY,2000
68
+ smftools/informatics/archived/helpers/archived/extract_base_identities.py,sha256=CaFqNBjkDujYlyiUnOeRock1OQWs3CeiD3yTL96sjIs,3043
69
+ smftools/informatics/archived/helpers/archived/extract_mods.py,sha256=Mrs7mrLFgCTiRGfPFSyvJm6brq--LGzZrNDiFB-jynI,3895
70
+ smftools/informatics/archived/helpers/archived/extract_read_features_from_bam.py,sha256=SYAb4Q1HxiJzCx5bIz86MdH_TvVPsRAVodZD9082HGY,1491
71
+ smftools/informatics/archived/helpers/archived/extract_read_lengths_from_bed.py,sha256=Cw39wgp1eBTV45Wk1l0c9l-upBW5N2OcgyWXTAXln90,678
72
+ smftools/informatics/archived/helpers/archived/extract_readnames_from_BAM.py,sha256=3FxSNqbZ1VsOK2RfHrvevQTzhWATf5E8bZ5yVOqayvk,759
73
+ smftools/informatics/archived/helpers/archived/find_conversion_sites.py,sha256=JPlDipmzeCBkV_T6esGD5ptwmbQmk8gJMTh7NMaSYd4,2480
74
+ smftools/informatics/archived/helpers/archived/generate_converted_FASTA.py,sha256=Us6iH1cIhsXDnTvDxI-FEHB6ndbB30hd1ss-9dIoWVE,3819
75
+ smftools/informatics/archived/helpers/archived/get_chromosome_lengths.py,sha256=BEroXshYSpjf5wt_vrEAFiTJmSuf-kvD-Z1B_1gusME,1000
76
+ smftools/informatics/archived/helpers/archived/get_native_references.py,sha256=fRuyEm9UJkfd5DwHmFb1bxEtNvtSI1_BxGRmrCymGkw,981
77
+ smftools/informatics/archived/helpers/archived/index_fasta.py,sha256=w6xHFSaoXVk-YWZWftZ9Xv8rywZ_IuuIouLQ12KL3ro,779
78
+ smftools/informatics/archived/helpers/archived/informatics.py,sha256=gKb2ZJ_LcAeEXuQqn9e-QDF_sS4tMpMTr2vZlqa7n54,14572
79
+ smftools/informatics/archived/helpers/archived/load_adata.py,sha256=DhvYYqO9VLsZqhL1WjN9sd-e3fgvdXGlgTP18z1h0L0,33654
80
+ smftools/informatics/archived/helpers/archived/make_modbed.py,sha256=Wh0UCSOL4fMZbWYK-3oGGHwJtqPurJ3Bl6wJWBaTXoM,923
81
+ smftools/informatics/archived/helpers/archived/modQC.py,sha256=pz2EscFgO-j-9dfNgNDseweXXqM5-a-Rj2abBLErLd0,1051
82
+ smftools/informatics/archived/helpers/archived/modcall.py,sha256=LVPrdMNVp2gyQTJ4BNp8NJNm89AueDjsKaY7Gqkluho,1777
83
+ smftools/informatics/archived/helpers/archived/ohe_batching.py,sha256=QVOiyl9fYHNIFWM23afYnQo0uaOjf1NR3ASKGVSrmuw,2975
84
+ smftools/informatics/archived/helpers/archived/ohe_layers_decode.py,sha256=gIgUC9L8TFLi-fTnjR4PRzXdUaH5D6WL2Hump6XOoy0,1042
85
+ smftools/informatics/archived/helpers/archived/one_hot_decode.py,sha256=3n4rzY8_aC9YKmgrftsguMsH7fUyQ-DbWmrOYF6la9s,906
86
+ smftools/informatics/archived/helpers/archived/one_hot_encode.py,sha256=5hHigA6-SZLK84WH_RHo06F_6aTg7S3TJgvSr8gxGX8,1968
87
+ smftools/informatics/archived/helpers/archived/plot_bed_histograms.py,sha256=78i0mYFuElTPGA2Dt1feO6Z4Grh1Nro3m-F8D5FRBOw,9914
88
+ smftools/informatics/archived/helpers/archived/separate_bam_by_bc.py,sha256=pCLev0OQji1jBdVr25lI_gt9fsozSG8vh7TQkE_UHnY,1800
89
+ smftools/informatics/archived/helpers/archived/split_and_index_BAM.py,sha256=Q7I5qJ5JjW6mSKysfl9NdlFZ6LIy3C8G5rGmG7cn2eA,1224
79
90
  smftools/machine_learning/__init__.py,sha256=cWyGN_QVcssqBr_VVr7xh2Inz0P7ylqUmBBcpMgsK0k,257
80
91
  smftools/machine_learning/data/__init__.py,sha256=xbfLE-gNjdgxvZ9LKTdvjAtbIHOcs2TR0Gz3YRFbo38,113
81
92
  smftools/machine_learning/data/anndata_data_module.py,sha256=ktrdMVMk5yhIUrnu-G_Xf3y7G-KP9PyhYZhobv8TCVg,10063
@@ -105,17 +116,18 @@ smftools/machine_learning/utils/__init__.py,sha256=yOpzBc9AXbarSRfN8Ixh2Z1uWLGpg
105
116
  smftools/machine_learning/utils/device.py,sha256=GITrULOty2Fr96Bqt1wi1PaYl_oVgB5Z99Gfn5vQy4o,274
106
117
  smftools/machine_learning/utils/grl.py,sha256=BWBDp_kQBigrUzQpRbZzgpfr_WOcd2K2V3MQL-aAIc4,334
107
118
  smftools/plotting/__init__.py,sha256=7T3-hZFgTY0nfQgV4J6Vn9ogwkNMlY315kguZR7V1AI,866
108
- smftools/plotting/autocorrelation_plotting.py,sha256=wdqQ4dawibgZeXRs_G4WterkqOgxHWrJlgZ4PdtO-OA,27456
119
+ smftools/plotting/autocorrelation_plotting.py,sha256=cF9X3CgKiwzL79mgMUFO1tSqdybDoPN1COQQ567InCY,27455
109
120
  smftools/plotting/classifiers.py,sha256=8_zabh4NNB1_yVxLD22lfrfl5yfzbEoG3XWqlIqdtrQ,13786
110
- smftools/plotting/general_plotting.py,sha256=JOeF_lq2hCDt1Vgy8KYLKSzNj8SXGi3z6qMt2P68TDU,31458
121
+ smftools/plotting/general_plotting.py,sha256=2JzE7agm_tILpQ67BHs5pdyPRsHBwcENZe7n4gfMWgM,61350
111
122
  smftools/plotting/hmm_plotting.py,sha256=3Eq82gty_0b8GkSMCQgUlbKfzR9h2fJ5rZkB8yYGX-M,10934
112
123
  smftools/plotting/position_stats.py,sha256=4XukYIWeWZ_aGSZg1K0t37KA2aknjNNKT5kcKFfuz8Q,17428
113
124
  smftools/plotting/qc_plotting.py,sha256=q5Ri0q89udvNUFUNxHzgk9atvQYqUkqkS5-JFq9EqoI,10045
114
- smftools/preprocessing/__init__.py,sha256=VqhiwJg57m0ePCRAGfX3cJniNLV2jNJpoXZEM2j-0wU,1687
125
+ smftools/preprocessing/__init__.py,sha256=GAQBULUH7fGVabzK5Cq5Wj-0ew0vNA-jWQtR5LAowvs,1746
115
126
  smftools/preprocessing/add_read_length_and_mapping_qc.py,sha256=zD_Kxw3DvyOypfuSMGv0ESyt-02w4XlAAMqQxb7yDNQ,5700
116
- smftools/preprocessing/append_base_context.py,sha256=ohtdHNS1Y9ttLvhLKSwrOyar7HyU2Dw0Ach9WVx5QM8,6221
117
- smftools/preprocessing/append_binary_layer_by_base_context.py,sha256=I3iiZkVqqB1KqSiA-s-ctl-ESkuTpd7Ot82M0xv_Cm4,6202
118
- smftools/preprocessing/binarize_on_Youden.py,sha256=O5E3vFc2zXMfKW0p0JGDlmRKEx2_VP6dAqfvrumzz00,1797
127
+ smftools/preprocessing/append_base_context.py,sha256=wGBAADePnys8DLUR15MpRe2BUcfCMDJWaCDDNyjn6AU,6209
128
+ smftools/preprocessing/append_binary_layer_by_base_context.py,sha256=s-7t-VKCs9Y67pX7kH6DNCEkC-RW4nM-UPsBQV2ZwtE,6186
129
+ smftools/preprocessing/binarize.py,sha256=6Vr7Z8zgtJ5rS_uPAx1n3EnQR670V33DlZ_95JmOeWc,484
130
+ smftools/preprocessing/binarize_on_Youden.py,sha256=HGs4p7XiOSYU3_z8QswNHIA9HlrI-7Pp1Kggrn6yUnI,1834
119
131
  smftools/preprocessing/binary_layers_to_ohe.py,sha256=Lxd8knelNTaUozfGMFNMlnrOb6uP28Laj3Ymw6cRHL0,1826
120
132
  smftools/preprocessing/calculate_complexity.py,sha256=cXMpFrhkwkPipQo2GZGT5yFknMYUMt1t8gz0Cse1DrA,3288
121
133
  smftools/preprocessing/calculate_complexity_II.py,sha256=DGfl0jkuBPUpzhKVItN0W7EPzh-QYuR4IxRObPE6gAQ,9301
@@ -123,14 +135,14 @@ smftools/preprocessing/calculate_consensus.py,sha256=6zRpRmb2xdfDu5hctZrReALRb7P
123
135
  smftools/preprocessing/calculate_coverage.py,sha256=4WTILzKLzxGLSsQrZkshXP-IRQpoVu3Fkqc0QTpux3Y,2132
124
136
  smftools/preprocessing/calculate_pairwise_differences.py,sha256=5zJbNNaFld5qgKRoPyplCmMHflbvAQ9eKWCXPXPpJ60,1774
125
137
  smftools/preprocessing/calculate_pairwise_hamming_distances.py,sha256=e5Mzyex7pT29H2PY014uU4Fi_eewbut1JkzC1ffBbCg,961
126
- smftools/preprocessing/calculate_position_Youden.py,sha256=9GY_WWwaxpB2Xppck3WT1zHtFOhTXrpuDIgbxLC9A7E,7450
138
+ smftools/preprocessing/calculate_position_Youden.py,sha256=yaSd6UDXPCddoN1UR6LgTqE5teJ79Ldw0BAlemc9fB4,7453
127
139
  smftools/preprocessing/calculate_read_length_stats.py,sha256=gNNePwMqYZJidzGgT1ZkfSlvc5Y3I3bi5KNYpP6wQQc,4584
128
- smftools/preprocessing/calculate_read_modification_stats.py,sha256=fQYtwsGt6zq7QBlWtAEaFOkbV_4yXjrj9GnBryEEztc,4779
140
+ smftools/preprocessing/calculate_read_modification_stats.py,sha256=mIlLBqNflVIkuoLxhbyujq3JEKyPl8iebhUlikB9brM,4775
129
141
  smftools/preprocessing/clean_NaN.py,sha256=IOcnN5YF05gpPQc3cc3IS83petCnhCpkYiyT6bXEyx0,1937
130
142
  smftools/preprocessing/filter_adata_by_nan_proportion.py,sha256=GZcvr2JCsthX8EMw34S9-W3fc6JElw6ka99Jy6f2JvA,1292
131
143
  smftools/preprocessing/filter_reads_on_length_quality_mapping.py,sha256=93LgTy_vsPnOZgoiXhZ1-w_pix2oFdBk-dsBUoz33Go,7379
132
- smftools/preprocessing/filter_reads_on_modification_thresholds.py,sha256=wOmHhQj3xQALQdtQ4-v4POEOat5bEJa-BVmzEE_yrKA,19403
133
- smftools/preprocessing/flag_duplicate_reads.py,sha256=D7KrDuyy_TSgGvB5aRRmY01k36p92n48YEwmwsUd3IY,65595
144
+ smftools/preprocessing/filter_reads_on_modification_thresholds.py,sha256=4TUvChkSH8R4p_0TpRCh7TounkdUgQHh71TGNmsZ29A,19355
145
+ smftools/preprocessing/flag_duplicate_reads.py,sha256=MySI9En6xVp0FqL7hfiLw0EP3JnGVJWM_yZfkvN-m1U,65585
134
146
  smftools/preprocessing/invert_adata.py,sha256=HYMJ1sR3Ui8j6bDjY8OcVQOETzZV-_rrpIYaWLZL6S4,1049
135
147
  smftools/preprocessing/load_sample_sheet.py,sha256=AjJf2MrqGHJJ2rNjYi09zV1QkLTq8qGaHGVklXHnPuU,1908
136
148
  smftools/preprocessing/make_dirs.py,sha256=lWHXpwC76MFM5sSme9i_WeYUaxutzybendokhny03ds,537
@@ -154,8 +166,8 @@ smftools/tools/archived/classify_methylated_features.py,sha256=Z0N2UKw3luD3CTQ8w
154
166
  smftools/tools/archived/classify_non_methylated_features.py,sha256=IJERTozEs7IPL7K-VIjq2q2K36wRCW9iiNSYLAXasrA,3256
155
167
  smftools/tools/archived/subset_adata_v1.py,sha256=qyU9iCal03edb5aUS3AZ2U4TlL3uQ42jGI9hX3QF7Fc,1047
156
168
  smftools/tools/archived/subset_adata_v2.py,sha256=OKZoUpvdURPtckIQxGTWmOI5jLa-_EU62Xs3LyyehnA,1880
157
- smftools-0.2.1.dist-info/METADATA,sha256=MXyiJbt1w_Ln4ENxQNbLU0JWwE-S6z1oNZkd8gkf3J8,8958
158
- smftools-0.2.1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
159
- smftools-0.2.1.dist-info/entry_points.txt,sha256=NflK6zRv2zlvnjCnDSHycp9w9CczHLfGz9zAc4FtI0I,46
160
- smftools-0.2.1.dist-info/licenses/LICENSE,sha256=F8LwmL6vMPddaCt1z1S83Kh_OZv50alTlY7BvVx1RXw,1066
161
- smftools-0.2.1.dist-info/RECORD,,
169
+ smftools-0.2.3.dist-info/METADATA,sha256=w_PRsBPndPoTQZviW9WTuiZV1Pk3ukeJ155OvC4E57M,8787
170
+ smftools-0.2.3.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
171
+ smftools-0.2.3.dist-info/entry_points.txt,sha256=q4hg4w-mKkI2leekM_-YZc5XRJzp96Mh1FcU3hac82g,52
172
+ smftools-0.2.3.dist-info/licenses/LICENSE,sha256=F8LwmL6vMPddaCt1z1S83Kh_OZv50alTlY7BvVx1RXw,1066
173
+ smftools-0.2.3.dist-info/RECORD,,
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ smftools = smftools.cli_entry:cli
smftools/cli.py DELETED
@@ -1,184 +0,0 @@
1
- import click
2
- import pandas as pd
3
- from pathlib import Path
4
- from typing import Dict, Optional
5
-
6
- from . import load_adata
7
- from .readwrite import merge_barcoded_anndatas_core, safe_read_h5ad, safe_write_h5ad
8
-
9
- @click.group()
10
- def cli():
11
- """Command-line interface for smftools."""
12
- pass
13
-
14
- ####### Main processing workflow ###########
15
- @cli.command()
16
- @click.argument("config_path", type=click.Path(exists=True))
17
- def load(config_path):
18
- """Load and process data from CONFIG_PATH."""
19
- load_adata(config_path)
20
- ##########################################
21
-
22
-
23
- ####### Merging existing anndatas from an experiment that used two different demultiplexing rules #######
24
- REQUIRED_KEYS = ("adata_single_path", "adata_double_path")
25
- OPTIONAL_KEYS = (
26
- "adata_single_backups_path",
27
- "adata_double_backups_path",
28
- "output_path",
29
- "merged_filename",
30
- )
31
-
32
- def _read_config_csv(csv_path: Path) -> Dict[str, str]:
33
- """
34
- Read a multi-row, two-column CSV of key,value pairs into a dict.
35
-
36
- Supported features:
37
- - Optional header ("key,value") or none.
38
- - Comments starting with '#' and blank lines are ignored.
39
- - If duplicate keys occur, the last one wins.
40
- - Keys are matched literally against REQUIRED_KEYS/OPTIONAL_KEYS.
41
- """
42
- try:
43
- # Read as two columns regardless of header; comments ignored.
44
- df = pd.read_csv(
45
- csv_path,
46
- dtype=str,
47
- comment="#",
48
- header=None, # treat everything as rows; we'll normalize below
49
- usecols=[0, 1],
50
- names=["key", "value"]
51
- )
52
- except Exception as e:
53
- raise click.ClickException(f"Failed to read CSV: {e}") from e
54
-
55
- # Drop completely empty rows
56
- df = df.fillna("").astype(str)
57
- df["key"] = df["key"].str.strip()
58
- df["value"] = df["value"].str.strip()
59
- df = df[(df["key"] != "") & (df["key"].notna())]
60
-
61
- if df.empty:
62
- raise click.ClickException("Config CSV is empty after removing comments/blank lines.")
63
-
64
- # Remove an optional header row if present
65
- if df.iloc[0]["key"].lower() in {"key", "keys"}:
66
- df = df.iloc[1:]
67
- df = df[(df["key"] != "") & (df["key"].notna())]
68
- if df.empty:
69
- raise click.ClickException("Config CSV contains only a header row.")
70
-
71
- # Build dict; last occurrence of a key wins
72
- cfg = {}
73
- for k, v in zip(df["key"], df["value"]):
74
- cfg[k] = v
75
-
76
- # Validate required keys
77
- missing = [k for k in REQUIRED_KEYS if not cfg.get(k)]
78
- if missing:
79
- raise click.ClickException(
80
- "Missing required keys in CSV: "
81
- + ", ".join(missing)
82
- + "\nExpected keys:\n - "
83
- + "\n - ".join(REQUIRED_KEYS)
84
- + "\nOptional keys:\n - "
85
- + "\n - ".join(OPTIONAL_KEYS)
86
- )
87
-
88
- return cfg
89
-
90
- def _resolve_output_path(cfg: Dict[str, str], single_path: Path, double_path: Path) -> Path:
91
- """Decide on the output .h5ad path based on CSV; create directories if needed."""
92
- merged_filename = cfg.get("merged_filename") or f"merged_{single_path.stem}__{double_path.stem}.h5ad"
93
- if not merged_filename.endswith(".h5ad"):
94
- merged_filename += ".h5ad"
95
-
96
- output_path_raw = cfg.get("output_path", "").strip()
97
-
98
- if not output_path_raw:
99
- out_dir = Path.cwd() / "merged_output"
100
- out_dir.mkdir(parents=True, exist_ok=True)
101
- return out_dir / merged_filename
102
-
103
- output_path = Path(output_path_raw)
104
-
105
- if output_path.suffix.lower() == ".h5ad":
106
- output_path.parent.mkdir(parents=True, exist_ok=True)
107
- return output_path
108
-
109
- # Treat as directory
110
- output_path.mkdir(parents=True, exist_ok=True)
111
- return output_path / merged_filename
112
-
113
- def _maybe_read_adata(label: str, primary: Path, backups: Optional[Path]):
114
-
115
- if backups:
116
- click.echo(f"Loading {label} from {primary} with backups at {backups} ...")
117
- return safe_read_h5ad(primary, backups_path=backups, restore_backups=True)
118
- else:
119
- click.echo(f"Loading {label} from {primary} with backups disabled ...")
120
- return safe_read_h5ad(primary, restore_backups=False)
121
-
122
-
123
- @cli.command()
124
- @click.argument("config_path", type=click.Path(exists=True, dir_okay=False, readable=True, path_type=Path))
125
- def merge_barcoded_anndatas(config_path: Path):
126
- """
127
- Merge two AnnData objects from the same experiment that were demultiplexed
128
- under different end-barcoding requirements, using a 1-row CSV for config.
129
-
130
- CSV must include:
131
- - adata_single_path
132
- - adata_double_path
133
-
134
- Optional columns:
135
- - adata_single_backups_path
136
- - adata_double_backups_path
137
- - output_path (file or directory; default: ./merged_output/)
138
- - merged_filename (default: merged_<single>__<double>.h5ad)
139
-
140
- Example CSV:
141
-
142
- adata_single_path,adata_double_path,adata_single_backups_path,adata_double_backups_path,output_path,merged_filename
143
- /path/single.h5ad,/path/double.h5ad,,,,merged_output,merged_run.h5ad
144
- """
145
- try:
146
- cfg = _read_config_csv(config_path)
147
-
148
- single_path = Path(cfg["adata_single_path"]).expanduser().resolve()
149
- double_path = Path(cfg["adata_double_path"]).expanduser().resolve()
150
-
151
- for p, label in [(single_path, "adata_single_path"), (double_path, "adata_double_path")]:
152
- if not p.exists():
153
- raise click.ClickException(f"{label} does not exist: {p}")
154
-
155
- single_backups = Path(cfg["adata_single_backups_path"]).expanduser().resolve() if cfg.get("adata_single_backups_path") else None
156
- double_backups = Path(cfg["adata_double_backups_path"]).expanduser().resolve() if cfg.get("adata_double_backups_path") else None
157
-
158
- if single_backups and not single_backups.exists():
159
- raise click.ClickException(f"adata_single_backups_path does not exist: {single_backups}")
160
- if double_backups and not double_backups.exists():
161
- raise click.ClickException(f"adata_double_backups_path does not exist: {double_backups}")
162
-
163
- output_path = _resolve_output_path(cfg, single_path, double_path)
164
-
165
- # Load
166
- adata_single, read_report_single = _maybe_read_adata("single-barcoded AnnData", single_path, single_backups)
167
- adata_double, read_report_double = _maybe_read_adata("double-barcoded AnnData", double_path, double_backups)
168
-
169
- click.echo("Merging AnnDatas ...")
170
- merged = merge_barcoded_anndatas_core(adata_single, adata_double)
171
-
172
- click.echo(f"Writing merged AnnData to: {output_path}")
173
- backup_dir = output_path.cwd() / "merged_backups"
174
- safe_write_h5ad(merged, output_path, backup=True, backup_dir=backup_dir)
175
-
176
- click.secho(f"Done. Merged AnnData saved to {output_path}", fg="green")
177
-
178
- except click.ClickException:
179
- raise
180
- except Exception as e:
181
- # Surface unexpected errors cleanly
182
- raise click.ClickException(f"Unexpected error: {e}") from e
183
-
184
- ################################################################################################################
@@ -1,24 +0,0 @@
1
- # fast5_to_pod5
2
-
3
- def fast5_to_pod5(fast5_dir, output_pod5='FAST5s_to_POD5.pod5'):
4
- """
5
- Convert Nanopore FAST5 files to POD5 file
6
-
7
- Parameters:
8
- fast5_dir (str): String representing the file path to a directory containing all FAST5 files to convert into a single POD5 output.
9
- output_pod5 (str): The name of the output POD5.
10
-
11
- Returns:
12
- None
13
-
14
- """
15
- import subprocess
16
- from pathlib import Path
17
-
18
- if isinstance(fast5_dir, (list, tuple)):
19
- cmd = ["pod5", "convert", "fast5"] + fast5_dir + ["--output", output_pod5]
20
- subprocess.run(cmd)
21
- elif Path(fast5_dir).is_file():
22
- subprocess.run(["pod5", "convert", "fast5", fast5_dir, "--output", output_pod5])
23
- elif Path(fast5_dir).is_dir():
24
- subprocess.run(["pod5", "convert", "fast5", f".{fast5_dir}*.fast5", "--output", output_pod5])
@@ -1,73 +0,0 @@
1
- from .align_and_sort_BAM import align_and_sort_BAM
2
- from .aligned_BAM_to_bed import aligned_BAM_to_bed
3
- from .bam_qc import bam_qc
4
- from .bed_to_bigwig import bed_to_bigwig
5
- from .binarize_converted_base_identities import binarize_converted_base_identities
6
- from .canoncall import canoncall
7
- from .complement_base_list import complement_base_list
8
- from .converted_BAM_to_adata_II import converted_BAM_to_adata_II
9
- from .concatenate_fastqs_to_bam import concatenate_fastqs_to_bam
10
- from .count_aligned_reads import count_aligned_reads
11
- from .demux_and_index_BAM import demux_and_index_BAM
12
- from .discover_input_files import *
13
- from .extract_base_identities import extract_base_identities
14
- from .extract_mods import extract_mods
15
- from .extract_read_features_from_bam import extract_read_features_from_bam
16
- from .extract_read_lengths_from_bed import extract_read_lengths_from_bed
17
- from .extract_readnames_from_BAM import extract_readnames_from_BAM
18
- from .find_conversion_sites import find_conversion_sites
19
- from .generate_converted_FASTA import convert_FASTA_record, generate_converted_FASTA
20
- from .get_chromosome_lengths import get_chromosome_lengths
21
- from .get_native_references import get_native_references
22
- from .index_fasta import index_fasta
23
- from .make_dirs import make_dirs
24
- from .make_modbed import make_modbed
25
- from .modcall import modcall
26
- from .modkit_extract_to_adata import modkit_extract_to_adata
27
- from .modQC import modQC
28
- from .one_hot_encode import one_hot_encode
29
- from .ohe_batching import ohe_batching
30
- from .one_hot_decode import one_hot_decode
31
- from .ohe_layers_decode import ohe_layers_decode
32
- from .plot_bed_histograms import plot_bed_histograms
33
- from .run_multiqc import run_multiqc
34
- from .separate_bam_by_bc import separate_bam_by_bc
35
- from .split_and_index_BAM import split_and_index_BAM
36
-
37
- __all__ = [
38
- "align_and_sort_BAM",
39
- "aligned_BAM_to_bed",
40
- "bam_qc",
41
- "bed_to_bigwig",
42
- "binarize_converted_base_identities",
43
- "canoncall",
44
- "complement_base_list",
45
- "converted_BAM_to_adata_II",
46
- "concatenate_fastqs_to_bam",
47
- "count_aligned_reads",
48
- "demux_and_index_BAM",
49
- "extract_base_identities",
50
- "extract_mods",
51
- "extract_read_features_from_bam",
52
- "extract_read_lengths_from_bed",
53
- "extract_readnames_from_BAM",
54
- "find_conversion_sites",
55
- "convert_FASTA_record",
56
- "generate_converted_FASTA",
57
- "get_chromosome_lengths",
58
- "get_native_references",
59
- "index_fasta",
60
- "make_dirs",
61
- "make_modbed",
62
- "modcall",
63
- "modkit_extract_to_adata",
64
- "modQC",
65
- "one_hot_encode",
66
- "ohe_batching",
67
- "one_hot_decode",
68
- "ohe_layers_decode",
69
- "plot_bed_histograms",
70
- "run_multiqc",
71
- "separate_bam_by_bc",
72
- "split_and_index_BAM"
73
- ]
@@ -1,86 +0,0 @@
1
- ## align_and_sort_BAM
2
-
3
- def align_and_sort_BAM(fasta,
4
- input,
5
- bam_suffix='.bam',
6
- output_directory='aligned_outputs',
7
- make_bigwigs=False,
8
- threads=None,
9
- aligner='minimap2',
10
- aligner_args=['-a', '-x', 'map-ont', '--MD', '-Y', '-y', '-N', '5', '--secondary=no']):
11
- """
12
- A wrapper for running dorado aligner and samtools functions
13
-
14
- Parameters:
15
- fasta (str): File path to the reference genome to align to.
16
- input (str): File path to the basecalled file to align. Works for .bam and .fastq files
17
- bam_suffix (str): The suffix to use for the BAM file.
18
- output_directory (str): A file path to the directory to output all the analyses.
19
- make_bigwigs (bool): Whether to make bigwigs
20
- threads (int): Number of additional threads to use
21
- aligner (str): Aligner to use. minimap2 and dorado options
22
- aligner_args (list): list of optional parameters to use for the alignment
23
-
24
- Returns:
25
- None
26
- The function writes out files for: 1) An aligned BAM, 2) and aligned_sorted BAM, 3) an index file for the aligned_sorted BAM, 4) A bed file for the aligned_sorted BAM, 5) A text file containing read names in the aligned_sorted BAM
27
- """
28
- import subprocess
29
- import os
30
-
31
- input_basename = os.path.basename(input)
32
- input_suffix = '.' + input_basename.split('.')[1]
33
- input_as_fastq = input_basename.split('.')[0] + '.fastq'
34
-
35
- output_path_minus_suffix = os.path.join(output_directory, input_basename.split(input_suffix)[0])
36
-
37
- aligned_BAM=f"{output_path_minus_suffix}_aligned"
38
- aligned_sorted_BAM=f"{aligned_BAM}_sorted"
39
- aligned_output = aligned_BAM + bam_suffix
40
- aligned_sorted_output = aligned_sorted_BAM + bam_suffix
41
-
42
- if threads:
43
- threads = str(threads)
44
- else:
45
- pass
46
-
47
- if aligner == 'minimap2':
48
- print(f"Converting BAM to FASTQ: {input}")
49
- bam_to_fastq_command = ['samtools', 'fastq', input]
50
- subprocess.run(bam_to_fastq_command, stdout=open(input_as_fastq, "w"))
51
- print(f"Aligning FASTQ to Reference: {input_as_fastq}")
52
- if threads:
53
- minimap_command = ['minimap2'] + aligner_args + ['-t', threads, fasta, input_as_fastq]
54
- else:
55
- minimap_command = ['minimap2'] + aligner_args + [fasta, input_as_fastq]
56
- subprocess.run(minimap_command, stdout=open(aligned_output, "w"))
57
- os.remove(input_as_fastq)
58
-
59
- elif aligner == 'dorado':
60
- # Run dorado aligner
61
- print(f"Aligning BAM to Reference: {input}")
62
- if threads:
63
- alignment_command = ["dorado", "aligner", "-t", threads] + aligner_args + [fasta, input]
64
- else:
65
- alignment_command = ["dorado", "aligner"] + aligner_args + [fasta, input]
66
- subprocess.run(alignment_command, stdout=open(aligned_output, "w"))
67
-
68
- else:
69
- print(f'Aligner not recognized: {aligner}. Choose from minimap2 and dorado')
70
- return
71
-
72
- # Sort the BAM on positional coordinates
73
- print(f"Sorting BAM: {aligned_output}")
74
- if threads:
75
- sort_command = ["samtools", "sort", "-@", threads, "-o", aligned_sorted_output, aligned_output]
76
- else:
77
- sort_command = ["samtools", "sort", "-o", aligned_sorted_output, aligned_output]
78
- subprocess.run(sort_command)
79
-
80
- # Create a BAM index file
81
- print(f"Indexing BAM: {aligned_sorted_output}")
82
- if threads:
83
- index_command = ["samtools", "index", "-@", threads, aligned_sorted_output]
84
- else:
85
- index_command = ["samtools", "index", aligned_sorted_output]
86
- subprocess.run(index_command)