smftools 0.2.1__py3-none-any.whl → 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. smftools/__init__.py +2 -6
  2. smftools/_version.py +1 -1
  3. smftools/cli/__init__.py +0 -0
  4. smftools/cli/archived/cli_flows.py +94 -0
  5. smftools/cli/helpers.py +48 -0
  6. smftools/cli/hmm_adata.py +361 -0
  7. smftools/cli/load_adata.py +637 -0
  8. smftools/cli/preprocess_adata.py +455 -0
  9. smftools/cli/spatial_adata.py +697 -0
  10. smftools/cli_entry.py +434 -0
  11. smftools/config/conversion.yaml +18 -6
  12. smftools/config/deaminase.yaml +18 -11
  13. smftools/config/default.yaml +151 -36
  14. smftools/config/direct.yaml +28 -1
  15. smftools/config/discover_input_files.py +115 -0
  16. smftools/config/experiment_config.py +225 -27
  17. smftools/hmm/HMM.py +12 -1
  18. smftools/hmm/__init__.py +0 -6
  19. smftools/hmm/archived/call_hmm_peaks.py +106 -0
  20. smftools/hmm/call_hmm_peaks.py +318 -90
  21. smftools/informatics/__init__.py +13 -7
  22. smftools/informatics/archived/fast5_to_pod5.py +43 -0
  23. smftools/informatics/archived/helpers/archived/__init__.py +71 -0
  24. smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py +126 -0
  25. smftools/informatics/{helpers → archived/helpers/archived}/aligned_BAM_to_bed.py +6 -4
  26. smftools/informatics/archived/helpers/archived/bam_qc.py +213 -0
  27. smftools/informatics/archived/helpers/archived/bed_to_bigwig.py +90 -0
  28. smftools/informatics/archived/helpers/archived/concatenate_fastqs_to_bam.py +259 -0
  29. smftools/informatics/{helpers → archived/helpers/archived}/count_aligned_reads.py +2 -2
  30. smftools/informatics/{helpers → archived/helpers/archived}/demux_and_index_BAM.py +8 -10
  31. smftools/informatics/{helpers → archived/helpers/archived}/extract_base_identities.py +1 -1
  32. smftools/informatics/{helpers → archived/helpers/archived}/extract_mods.py +15 -13
  33. smftools/informatics/{helpers → archived/helpers/archived}/generate_converted_FASTA.py +2 -0
  34. smftools/informatics/{helpers → archived/helpers/archived}/get_chromosome_lengths.py +9 -8
  35. smftools/informatics/archived/helpers/archived/index_fasta.py +24 -0
  36. smftools/informatics/{helpers → archived/helpers/archived}/make_modbed.py +1 -2
  37. smftools/informatics/{helpers → archived/helpers/archived}/modQC.py +2 -2
  38. smftools/informatics/{helpers → archived/helpers/archived}/plot_bed_histograms.py +0 -19
  39. smftools/informatics/{helpers → archived/helpers/archived}/separate_bam_by_bc.py +6 -5
  40. smftools/informatics/{helpers → archived/helpers/archived}/split_and_index_BAM.py +7 -7
  41. smftools/informatics/archived/subsample_fasta_from_bed.py +49 -0
  42. smftools/informatics/bam_functions.py +811 -0
  43. smftools/informatics/basecalling.py +67 -0
  44. smftools/informatics/bed_functions.py +366 -0
  45. smftools/informatics/{helpers/converted_BAM_to_adata_II.py → converted_BAM_to_adata.py} +42 -30
  46. smftools/informatics/fasta_functions.py +255 -0
  47. smftools/informatics/h5ad_functions.py +197 -0
  48. smftools/informatics/{helpers/modkit_extract_to_adata.py → modkit_extract_to_adata.py} +142 -59
  49. smftools/informatics/modkit_functions.py +129 -0
  50. smftools/informatics/ohe.py +160 -0
  51. smftools/informatics/pod5_functions.py +224 -0
  52. smftools/informatics/{helpers/run_multiqc.py → run_multiqc.py} +5 -2
  53. smftools/plotting/autocorrelation_plotting.py +1 -3
  54. smftools/plotting/general_plotting.py +1084 -363
  55. smftools/plotting/position_stats.py +3 -3
  56. smftools/preprocessing/__init__.py +4 -4
  57. smftools/preprocessing/append_base_context.py +35 -26
  58. smftools/preprocessing/append_binary_layer_by_base_context.py +6 -6
  59. smftools/preprocessing/binarize.py +17 -0
  60. smftools/preprocessing/binarize_on_Youden.py +11 -9
  61. smftools/preprocessing/calculate_complexity_II.py +1 -1
  62. smftools/preprocessing/calculate_coverage.py +16 -13
  63. smftools/preprocessing/calculate_position_Youden.py +42 -26
  64. smftools/preprocessing/calculate_read_modification_stats.py +2 -2
  65. smftools/preprocessing/filter_reads_on_length_quality_mapping.py +1 -1
  66. smftools/preprocessing/filter_reads_on_modification_thresholds.py +20 -20
  67. smftools/preprocessing/flag_duplicate_reads.py +2 -2
  68. smftools/preprocessing/invert_adata.py +1 -1
  69. smftools/preprocessing/load_sample_sheet.py +1 -1
  70. smftools/preprocessing/reindex_references_adata.py +37 -0
  71. smftools/readwrite.py +360 -140
  72. {smftools-0.2.1.dist-info → smftools-0.2.4.dist-info}/METADATA +26 -19
  73. smftools-0.2.4.dist-info/RECORD +176 -0
  74. smftools-0.2.4.dist-info/entry_points.txt +2 -0
  75. smftools/cli.py +0 -184
  76. smftools/informatics/fast5_to_pod5.py +0 -24
  77. smftools/informatics/helpers/__init__.py +0 -73
  78. smftools/informatics/helpers/align_and_sort_BAM.py +0 -86
  79. smftools/informatics/helpers/bam_qc.py +0 -66
  80. smftools/informatics/helpers/bed_to_bigwig.py +0 -39
  81. smftools/informatics/helpers/concatenate_fastqs_to_bam.py +0 -378
  82. smftools/informatics/helpers/discover_input_files.py +0 -100
  83. smftools/informatics/helpers/index_fasta.py +0 -12
  84. smftools/informatics/helpers/make_dirs.py +0 -21
  85. smftools/informatics/readwrite.py +0 -106
  86. smftools/informatics/subsample_fasta_from_bed.py +0 -47
  87. smftools/load_adata.py +0 -1346
  88. smftools-0.2.1.dist-info/RECORD +0 -161
  89. smftools-0.2.1.dist-info/entry_points.txt +0 -2
  90. /smftools/hmm/{apply_hmm_batched.py → archived/apply_hmm_batched.py} +0 -0
  91. /smftools/hmm/{calculate_distances.py → archived/calculate_distances.py} +0 -0
  92. /smftools/hmm/{train_hmm.py → archived/train_hmm.py} +0 -0
  93. /smftools/informatics/{basecall_pod5s.py → archived/basecall_pod5s.py} +0 -0
  94. /smftools/informatics/{helpers → archived/helpers/archived}/canoncall.py +0 -0
  95. /smftools/informatics/{helpers → archived/helpers/archived}/converted_BAM_to_adata.py +0 -0
  96. /smftools/informatics/{helpers → archived/helpers/archived}/extract_read_features_from_bam.py +0 -0
  97. /smftools/informatics/{helpers → archived/helpers/archived}/extract_read_lengths_from_bed.py +0 -0
  98. /smftools/informatics/{helpers → archived/helpers/archived}/extract_readnames_from_BAM.py +0 -0
  99. /smftools/informatics/{helpers → archived/helpers/archived}/find_conversion_sites.py +0 -0
  100. /smftools/informatics/{helpers → archived/helpers/archived}/get_native_references.py +0 -0
  101. /smftools/informatics/{helpers → archived/helpers}/archived/informatics.py +0 -0
  102. /smftools/informatics/{helpers → archived/helpers}/archived/load_adata.py +0 -0
  103. /smftools/informatics/{helpers → archived/helpers/archived}/modcall.py +0 -0
  104. /smftools/informatics/{helpers → archived/helpers/archived}/ohe_batching.py +0 -0
  105. /smftools/informatics/{helpers → archived/helpers/archived}/ohe_layers_decode.py +0 -0
  106. /smftools/informatics/{helpers → archived/helpers/archived}/one_hot_decode.py +0 -0
  107. /smftools/informatics/{helpers → archived/helpers/archived}/one_hot_encode.py +0 -0
  108. /smftools/informatics/{subsample_pod5.py → archived/subsample_pod5.py} +0 -0
  109. /smftools/informatics/{helpers/binarize_converted_base_identities.py → binarize_converted_base_identities.py} +0 -0
  110. /smftools/informatics/{helpers/complement_base_list.py → complement_base_list.py} +0 -0
  111. /smftools/preprocessing/{add_read_length_and_mapping_qc.py → archives/add_read_length_and_mapping_qc.py} +0 -0
  112. /smftools/preprocessing/{calculate_complexity.py → archives/calculate_complexity.py} +0 -0
  113. {smftools-0.2.1.dist-info → smftools-0.2.4.dist-info}/WHEEL +0 -0
  114. {smftools-0.2.1.dist-info → smftools-0.2.4.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,176 @@
1
+ smftools/__init__.py,sha256=aZlrZBVexf_nEnzQeZu7NU_Kp6OnxcYpLo1KPImi7sI,599
2
+ smftools/_settings.py,sha256=Ed8lzKUA5ncq5ZRfSp0t6_rphEEjMxts6guttwTZP5Y,409
3
+ smftools/_version.py,sha256=k2uKAAzDEmm1BIVWeztFlHrCh9fq64H6szFcsXW7tvs,21
4
+ smftools/cli_entry.py,sha256=LvobMVtEb_jrLZScoWCB-OBjUMue9JQBXJZW1oMbHnw,14618
5
+ smftools/readwrite.py,sha256=mbuCKj7LfEKp4bDBxxxMiaTddMwblwURpcCKpgmU6Sw,48678
6
+ smftools/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ smftools/cli/helpers.py,sha256=tgjxUlOIhFGCLGD2ON7zlD45UPx93vENM82mM_BpLFk,1281
8
+ smftools/cli/hmm_adata.py,sha256=2ria8u6cCBQnzX_GjUUO3wBVOd7a4m3Al-vzwk0OasQ,17728
9
+ smftools/cli/load_adata.py,sha256=W4NgbM28wOzQHkLnZNILJyblRgee-O4oLnNZcyPDCXc,30486
10
+ smftools/cli/preprocess_adata.py,sha256=g9aHQ1DSScb4zx8RfpCjcEmam6APWHiu8Ow0sza6D2Y,22203
11
+ smftools/cli/spatial_adata.py,sha256=pp0KLK8d-MYjl_hF1ziDVKc6uOJGDDDbKNQELQcRUa8,28980
12
+ smftools/cli/archived/cli_flows.py,sha256=xRiFUThoAL3LX1xdXaHVg4LjyJI4uNpGsc9aQ_wVCto,4941
13
+ smftools/config/__init__.py,sha256=ObUnnR7aRSoD_uvpmsxA_BUFt4NOOfWNopDVCqjp7tg,69
14
+ smftools/config/conversion.yaml,sha256=07dKEXykQeP5VoVxa4xst-tcbSX4B6ErqyqtWJ5RCKk,1177
15
+ smftools/config/deaminase.yaml,sha256=okXdMFAghUAsDyx6P5Kru7ydF2bcbrhMPOaMpXlZPGM,1359
16
+ smftools/config/default.yaml,sha256=cKUUxVkH42kkHQM82mNJC8bfcak6lY063AnIif5o-1g,13071
17
+ smftools/config/direct.yaml,sha256=s30JbOTOOdIiBIefPSEi72YABHnfcCyFXj9WwZ7duJQ,2173
18
+ smftools/config/discover_input_files.py,sha256=G9vyAmK_n_8Ur5dOnumevVLG3ydHchMy_JQrJdiuuz0,3892
19
+ smftools/config/experiment_config.py,sha256=f7hVIc9ShUZk852Ypp6Dfelus8iKFHrSbThiyhpuQsE,63259
20
+ smftools/datasets/F1_hybrid_NKG2A_enhander_promoter_GpC_conversion_SMF.h5ad.gz,sha256=q6wJtgFRDln0o20XNCx1qad3lwcdCoylqPN7wskTfI8,2926497
21
+ smftools/datasets/F1_sample_sheet.csv,sha256=9PodIIOXK2eamYPbC6DGnXdzgi9bRDovf296j1aM0ak,259
22
+ smftools/datasets/__init__.py,sha256=xkSTlPuakVYVCuRurif9BceNBDt6bsngJvvjI8757QI,142
23
+ smftools/datasets/dCas9_m6A_invitro_kinetics.h5ad.gz,sha256=niOcVHaYY7h3XyvwSkN-V_NMBaRt2vTP5TrJO0CwMCs,8385050
24
+ smftools/datasets/datasets.py,sha256=0y597Ntp707bOgDwN6O-JEt9yxgplj66p0aj6Zs_IB4,779
25
+ smftools/hmm/HMM.py,sha256=Y7YB-45HoLN--JloajoLBgC0rIYmHuWHDfmKRXfFuFk,71458
26
+ smftools/hmm/__init__.py,sha256=_-plMbL5xq8d0szNIYgUrgUwdb8oybuyTn6jned8eSU,382
27
+ smftools/hmm/call_hmm_peaks.py,sha256=BMlwDh-_k8bzqRn4LSYuTk3dCcUoNYHp8eohvWYNn7A,14573
28
+ smftools/hmm/display_hmm.py,sha256=3WuQCPvM3wPfzAdgbhfiBTd0g5mQdx9HTUdqAxs2aj4,825
29
+ smftools/hmm/hmm_readwrite.py,sha256=DjJ3hunpBQ7N0GVvxL7-0QUas_SkA88LVgL72mVK2cI,359
30
+ smftools/hmm/nucleosome_hmm_refinement.py,sha256=nQWimvse6dclcXhbU707rGbRVMKHM0mU_ZhH9g2yCMA,4641
31
+ smftools/hmm/archived/apply_hmm_batched.py,sha256=BBeJ8DiIuuMWzLwtDdk2DO2vvrfLCrVe4JtRYPFItIU,10648
32
+ smftools/hmm/archived/calculate_distances.py,sha256=KDWimQ6u-coyxCKrbTm42Fh_Alf_gURBZ0vfFaem848,644
33
+ smftools/hmm/archived/call_hmm_peaks.py,sha256=T-3Ld8H4t3Mgg2whBTYP9s2QL7rY-9RIzVCgB6avKhE,4625
34
+ smftools/hmm/archived/train_hmm.py,sha256=srzRcB9LEmNuHyBM0R5Z0VEnxecifQt-MoaJhADxGT8,2477
35
+ smftools/informatics/__init__.py,sha256=vLvSrCtCVYRUCCNLW7fL3ltPr3h_w8FhT--V6el3ZkQ,1191
36
+ smftools/informatics/bam_functions.py,sha256=SCtOQWgF7Nqbk7-22fAq9J8kRYrd2V5chmM0x1lLJh0,32261
37
+ smftools/informatics/basecalling.py,sha256=jc39jneaa8Gt1azutHgBGWHqCoPeTVSGBu3kyQwP7xM,3460
38
+ smftools/informatics/bed_functions.py,sha256=uETVxT5mRWDNn7t0OqhDi8kDiq7uDakeHB1L2JsP4PA,13377
39
+ smftools/informatics/binarize_converted_base_identities.py,sha256=yOepGaNBGfZJEsMiLRwKauvsmaHn_JRrxaGp8LmKAXs,7778
40
+ smftools/informatics/complement_base_list.py,sha256=k6EkLtxFoajaIufxw1p0pShJ2nPHyGLTbzZmIFFjB4o,532
41
+ smftools/informatics/converted_BAM_to_adata.py,sha256=Y2kQNWly0WjjGN9El9zL1nLfjVxmPLWONvX5VNgZUh0,22554
42
+ smftools/informatics/fasta_functions.py,sha256=5IfTkX_GIj5gRJB9PjL_WjyEktpBHwGsmS_nnO1ETjI,9790
43
+ smftools/informatics/h5ad_functions.py,sha256=9zUKuARwjjt0J-i_kBqo2jxLtD6Gud1VxKT0pV-ACeA,7829
44
+ smftools/informatics/modkit_extract_to_adata.py,sha256=TrgrL_IgfqzNJ9qZ_2EvF_B38_Syw8mP38Sl7v0Riwo,55278
45
+ smftools/informatics/modkit_functions.py,sha256=lywjeqAJ7Cdd7k-0P3YaL_9cAZvEDTDLh91rIRcSMWE,5604
46
+ smftools/informatics/ohe.py,sha256=MEmh3ps-ZSSyXuIrr5LMzQvCsDJRCYiy7JS-WD4TlYs,5805
47
+ smftools/informatics/pod5_functions.py,sha256=vxwhD_d_iWpJydIpbf0uce7VGHm8sBnCwb7tLNpYBc8,9859
48
+ smftools/informatics/run_multiqc.py,sha256=n6LvQuGQpLfsutVGmgvHfV0SV5PqTQ8wa_SeKOjRssM,1052
49
+ smftools/informatics/archived/bam_conversion.py,sha256=I8EzXjQixMmqx2oWnoNSH5NURBhfT-krbWHkoi_M964,3330
50
+ smftools/informatics/archived/bam_direct.py,sha256=jbEFtUIiUR8Wlp3po_sWkr19AUNS9WZjglojb9j28vo,3606
51
+ smftools/informatics/archived/basecall_pod5s.py,sha256=Ynmxscsxj6qp-zVY0RWodq513oDuHDaHnpqoepB3RUU,3930
52
+ smftools/informatics/archived/basecalls_to_adata.py,sha256=-Nag6lr_NAtU4t8jo0GSMdgIAIfmDge-5VEUPQbEatE,3692
53
+ smftools/informatics/archived/conversion_smf.py,sha256=QhlISVi3Z-XqFKyDG_CenLojovAt5-ZhuVe9hus36lg,7177
54
+ smftools/informatics/archived/deaminase_smf.py,sha256=mNeg1mIYYVLIiW8powEpz0CqrGRDsrmY5-aoIgwMGHs,7221
55
+ smftools/informatics/archived/direct_smf.py,sha256=ylPGFBvRLdxLHeDJjAwq98j8Q8_lfGK3k5JJnQxrwJw,7485
56
+ smftools/informatics/archived/fast5_to_pod5.py,sha256=TRG_FYYGCGWUPzZCt0ZqzB8gQv_HKvkssp9nTctWzXU,1398
57
+ smftools/informatics/archived/print_bam_query_seq.py,sha256=8Z2ZJEOOlfWYUXiZGjteLWU4yTgvV8KQzEIBHUmamGM,838
58
+ smftools/informatics/archived/subsample_fasta_from_bed.py,sha256=7YTKhXg_mtP4KWpnD-TB4nuFEL4crOa9_d84IJKllyQ,1633
59
+ smftools/informatics/archived/subsample_pod5.py,sha256=zDw9tRcrFRmPI62xkcy9dh8IfsJcuYm7R-FVeBC_g3s,4701
60
+ smftools/informatics/archived/helpers/archived/__init__.py,sha256=DiiBerFJAxZeG5y0ScpJSaVBJ8b4XWdfEJCh8Q7k8jU,2783
61
+ smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py,sha256=yaRfhQDh3HpsSTme6QnSqBgElCS0kv2G6TunhvR1weY,5493
62
+ smftools/informatics/archived/helpers/archived/aligned_BAM_to_bed.py,sha256=N3NAOaoSt_M4V48vtTP_m_iF1tRuNIPS_uNJ3Y0IA4E,3391
63
+ smftools/informatics/archived/helpers/archived/bam_qc.py,sha256=PWl3dViCHGOcjB4UKkxBFz34Gc0PXHVTHjpYVNckVH0,7975
64
+ smftools/informatics/archived/helpers/archived/bed_to_bigwig.py,sha256=Bg9wFsavUU9Ha57n_99vYlYpVcbDUz3tLtYJ7ZFVR9k,2986
65
+ smftools/informatics/archived/helpers/archived/canoncall.py,sha256=5WS6lwukc_xYTdPQy0OSj-WLbx0Rg70Cun1lCucY7w8,1741
66
+ smftools/informatics/archived/helpers/archived/concatenate_fastqs_to_bam.py,sha256=6GTHXG1dfaC8rBin5NthG3xgyGqOsT6wIGxJVCmCq58,9774
67
+ smftools/informatics/archived/helpers/archived/converted_BAM_to_adata.py,sha256=sRmOtn0kNosLYfogqslDHg1Azk51l6nfNOLgQOnQjlA,14591
68
+ smftools/informatics/archived/helpers/archived/count_aligned_reads.py,sha256=ZF_kkzAf1RvM4PwDYhxD36UiuVuMM_MBvZgiXom1NQ0,2176
69
+ smftools/informatics/archived/helpers/archived/demux_and_index_BAM.py,sha256=KmU7nqGQ-MfDrp8h3txbToGn4h95Rkvg0WEiuext-vY,2000
70
+ smftools/informatics/archived/helpers/archived/extract_base_identities.py,sha256=CaFqNBjkDujYlyiUnOeRock1OQWs3CeiD3yTL96sjIs,3043
71
+ smftools/informatics/archived/helpers/archived/extract_mods.py,sha256=Mrs7mrLFgCTiRGfPFSyvJm6brq--LGzZrNDiFB-jynI,3895
72
+ smftools/informatics/archived/helpers/archived/extract_read_features_from_bam.py,sha256=SYAb4Q1HxiJzCx5bIz86MdH_TvVPsRAVodZD9082HGY,1491
73
+ smftools/informatics/archived/helpers/archived/extract_read_lengths_from_bed.py,sha256=Cw39wgp1eBTV45Wk1l0c9l-upBW5N2OcgyWXTAXln90,678
74
+ smftools/informatics/archived/helpers/archived/extract_readnames_from_BAM.py,sha256=3FxSNqbZ1VsOK2RfHrvevQTzhWATf5E8bZ5yVOqayvk,759
75
+ smftools/informatics/archived/helpers/archived/find_conversion_sites.py,sha256=JPlDipmzeCBkV_T6esGD5ptwmbQmk8gJMTh7NMaSYd4,2480
76
+ smftools/informatics/archived/helpers/archived/generate_converted_FASTA.py,sha256=Us6iH1cIhsXDnTvDxI-FEHB6ndbB30hd1ss-9dIoWVE,3819
77
+ smftools/informatics/archived/helpers/archived/get_chromosome_lengths.py,sha256=BEroXshYSpjf5wt_vrEAFiTJmSuf-kvD-Z1B_1gusME,1000
78
+ smftools/informatics/archived/helpers/archived/get_native_references.py,sha256=fRuyEm9UJkfd5DwHmFb1bxEtNvtSI1_BxGRmrCymGkw,981
79
+ smftools/informatics/archived/helpers/archived/index_fasta.py,sha256=w6xHFSaoXVk-YWZWftZ9Xv8rywZ_IuuIouLQ12KL3ro,779
80
+ smftools/informatics/archived/helpers/archived/informatics.py,sha256=gKb2ZJ_LcAeEXuQqn9e-QDF_sS4tMpMTr2vZlqa7n54,14572
81
+ smftools/informatics/archived/helpers/archived/load_adata.py,sha256=DhvYYqO9VLsZqhL1WjN9sd-e3fgvdXGlgTP18z1h0L0,33654
82
+ smftools/informatics/archived/helpers/archived/make_modbed.py,sha256=Wh0UCSOL4fMZbWYK-3oGGHwJtqPurJ3Bl6wJWBaTXoM,923
83
+ smftools/informatics/archived/helpers/archived/modQC.py,sha256=pz2EscFgO-j-9dfNgNDseweXXqM5-a-Rj2abBLErLd0,1051
84
+ smftools/informatics/archived/helpers/archived/modcall.py,sha256=LVPrdMNVp2gyQTJ4BNp8NJNm89AueDjsKaY7Gqkluho,1777
85
+ smftools/informatics/archived/helpers/archived/ohe_batching.py,sha256=QVOiyl9fYHNIFWM23afYnQo0uaOjf1NR3ASKGVSrmuw,2975
86
+ smftools/informatics/archived/helpers/archived/ohe_layers_decode.py,sha256=gIgUC9L8TFLi-fTnjR4PRzXdUaH5D6WL2Hump6XOoy0,1042
87
+ smftools/informatics/archived/helpers/archived/one_hot_decode.py,sha256=3n4rzY8_aC9YKmgrftsguMsH7fUyQ-DbWmrOYF6la9s,906
88
+ smftools/informatics/archived/helpers/archived/one_hot_encode.py,sha256=5hHigA6-SZLK84WH_RHo06F_6aTg7S3TJgvSr8gxGX8,1968
89
+ smftools/informatics/archived/helpers/archived/plot_bed_histograms.py,sha256=78i0mYFuElTPGA2Dt1feO6Z4Grh1Nro3m-F8D5FRBOw,9914
90
+ smftools/informatics/archived/helpers/archived/separate_bam_by_bc.py,sha256=pCLev0OQji1jBdVr25lI_gt9fsozSG8vh7TQkE_UHnY,1800
91
+ smftools/informatics/archived/helpers/archived/split_and_index_BAM.py,sha256=Q7I5qJ5JjW6mSKysfl9NdlFZ6LIy3C8G5rGmG7cn2eA,1224
92
+ smftools/machine_learning/__init__.py,sha256=cWyGN_QVcssqBr_VVr7xh2Inz0P7ylqUmBBcpMgsK0k,257
93
+ smftools/machine_learning/data/__init__.py,sha256=xbfLE-gNjdgxvZ9LKTdvjAtbIHOcs2TR0Gz3YRFbo38,113
94
+ smftools/machine_learning/data/anndata_data_module.py,sha256=ktrdMVMk5yhIUrnu-G_Xf3y7G-KP9PyhYZhobv8TCVg,10063
95
+ smftools/machine_learning/data/preprocessing.py,sha256=dSs6Qs3wmlccFPZSpOc-uy1nlFSf68wWQKwF1iTqMok,137
96
+ smftools/machine_learning/evaluation/__init__.py,sha256=KHvcC7bTYv-ThptAi6G8wD-hW5Iz1HPgMcQ3AewtK3c,122
97
+ smftools/machine_learning/evaluation/eval_utils.py,sha256=t9WIevIJ6b6HqU6OYaNx7UBAa5TEIPFmZow6n_ZDZeY,1105
98
+ smftools/machine_learning/evaluation/evaluators.py,sha256=KqYHqbVV2WOs0Yo4GIhLS_0h1oKY6nd1yi6piDWYQLg,8184
99
+ smftools/machine_learning/inference/__init__.py,sha256=vWLQD-JNEKKNGuzDtx7vcE4czKKXEO6S-0Zp5-21fPs,172
100
+ smftools/machine_learning/inference/inference_utils.py,sha256=aJuXvTgC8v4BOjLCgOU9vT3S2y1UGoZjq4mQpPswTQU,947
101
+ smftools/machine_learning/inference/lightning_inference.py,sha256=34WVnPfpPDf4KM8ZN5MOsx4tYgsrUclkens6GXgB4Ek,2160
102
+ smftools/machine_learning/inference/sklearn_inference.py,sha256=FomgQF5jFBfAj1-H2Q0_RPmvR9rDJsmUeaWOVRhbpTw,1612
103
+ smftools/machine_learning/inference/sliding_window_inference.py,sha256=8zjQs2hGhj0Dww4gWljLVK0g002_U96dyIqQJiDdSDY,4426
104
+ smftools/machine_learning/models/__init__.py,sha256=bMfPbQ5bDmn_kWv82virLuUhjb12Yow7t_j96afNbyA,421
105
+ smftools/machine_learning/models/base.py,sha256=p3d77iyY8BVx0tYL0TjmOSnPNP1ZrKTzn_J05e2GF0A,9626
106
+ smftools/machine_learning/models/cnn.py,sha256=KKZmJLQ6Bjm_HI8GULnafjz6mRy5BZ6Y0ZCgDSuS268,4465
107
+ smftools/machine_learning/models/lightning_base.py,sha256=3nC3wajPIupFMtOq3YUf24_SHvDoW_9BIGyIvEwzN9w,13626
108
+ smftools/machine_learning/models/mlp.py,sha256=Y2hc_qHj6vpM_mHpreFxBULn4MkR25oEA1LXu5sPA_w,820
109
+ smftools/machine_learning/models/positional.py,sha256=EfTyYnY0pCB-aVJIWf-4DVNpyGlvx1q_09PzfrC-VlA,652
110
+ smftools/machine_learning/models/rnn.py,sha256=uJnHDGpT2_l_HqHGsx33XGF3v3EYZPeOtSQ89uvhdpE,717
111
+ smftools/machine_learning/models/sklearn_models.py,sha256=ssV-mR3rmcjycQEzKccRcbVaEjZp0zRNUL5-R6m1UKU,10402
112
+ smftools/machine_learning/models/transformer.py,sha256=8YXS0vCcOWT-33h-8yeDfFM5ibPHQ-CMSEhGWzR4pm8,11039
113
+ smftools/machine_learning/models/wrappers.py,sha256=HEY2A6-Bk6MtVZ9jOaPT8S1Qi0L98SyEg1nbKqYZoag,697
114
+ smftools/machine_learning/training/__init__.py,sha256=teUmwpnmAl0oNFaqVrfoijEpxBjLwI5YtBwLHT3uXck,185
115
+ smftools/machine_learning/training/train_lightning_model.py,sha256=usEBaQ4vNjfatefP5XDCXkywzgZ2D-YppGmT3-3gTGE,4070
116
+ smftools/machine_learning/training/train_sklearn_model.py,sha256=m1k1Gsynpj6SJI64rl4B3cfXm1SliU0fwMAj1-bAAeE,3166
117
+ smftools/machine_learning/utils/__init__.py,sha256=yOpzBc9AXbarSRfN8Ixh2Z1uWLGpgpjRR46h6E46_2w,62
118
+ smftools/machine_learning/utils/device.py,sha256=GITrULOty2Fr96Bqt1wi1PaYl_oVgB5Z99Gfn5vQy4o,274
119
+ smftools/machine_learning/utils/grl.py,sha256=BWBDp_kQBigrUzQpRbZzgpfr_WOcd2K2V3MQL-aAIc4,334
120
+ smftools/plotting/__init__.py,sha256=7T3-hZFgTY0nfQgV4J6Vn9ogwkNMlY315kguZR7V1AI,866
121
+ smftools/plotting/autocorrelation_plotting.py,sha256=cF9X3CgKiwzL79mgMUFO1tSqdybDoPN1COQQ567InCY,27455
122
+ smftools/plotting/classifiers.py,sha256=8_zabh4NNB1_yVxLD22lfrfl5yfzbEoG3XWqlIqdtrQ,13786
123
+ smftools/plotting/general_plotting.py,sha256=o4aPXm_2JRj69XyHINKSTAJGaw9VA-csDgX1pyirso0,63151
124
+ smftools/plotting/hmm_plotting.py,sha256=3Eq82gty_0b8GkSMCQgUlbKfzR9h2fJ5rZkB8yYGX-M,10934
125
+ smftools/plotting/position_stats.py,sha256=Ia15EuYq5r3Ckz3jVjYMHON6IHZboatAVqJdb2WrUA4,17415
126
+ smftools/plotting/qc_plotting.py,sha256=q5Ri0q89udvNUFUNxHzgk9atvQYqUkqkS5-JFq9EqoI,10045
127
+ smftools/preprocessing/__init__.py,sha256=mcmovdFq6jt1kWIe0sVW6MwCXs4tUVTy3Qak7RDts74,1644
128
+ smftools/preprocessing/append_base_context.py,sha256=VnxKf8sI4uWale215FEFFoE2me6uJszXvswl-dFQmUY,6702
129
+ smftools/preprocessing/append_binary_layer_by_base_context.py,sha256=qgjeDyfOghuqWZAzCjd4eE5riCWAgra6CIZ9UCyUgTs,6207
130
+ smftools/preprocessing/binarize.py,sha256=6Vr7Z8zgtJ5rS_uPAx1n3EnQR670V33DlZ_95JmOeWc,484
131
+ smftools/preprocessing/binarize_on_Youden.py,sha256=OwI0JwKBsSPVdPr61D31dR9XhnF0N4e5PnbboTpk8xI,1891
132
+ smftools/preprocessing/binary_layers_to_ohe.py,sha256=Lxd8knelNTaUozfGMFNMlnrOb6uP28Laj3Ymw6cRHL0,1826
133
+ smftools/preprocessing/calculate_complexity_II.py,sha256=oh5y0jbM1-k29ujRUfvXoL3ir4E6bVXLE9bWxlD5efc,9306
134
+ smftools/preprocessing/calculate_consensus.py,sha256=6zRpRmb2xdfDu5hctZrReALRb7Pjn8sy8xJZTm3o0nU,2442
135
+ smftools/preprocessing/calculate_coverage.py,sha256=L417_XWAadMH3vxVDGEEAqxIGOiV48nfzVzD7HYyhus,2199
136
+ smftools/preprocessing/calculate_pairwise_differences.py,sha256=5zJbNNaFld5qgKRoPyplCmMHflbvAQ9eKWCXPXPpJ60,1774
137
+ smftools/preprocessing/calculate_pairwise_hamming_distances.py,sha256=e5Mzyex7pT29H2PY014uU4Fi_eewbut1JkzC1ffBbCg,961
138
+ smftools/preprocessing/calculate_position_Youden.py,sha256=JJLvU62zpBcvWm5QnsQ3FeRgIv5TMQbz5zTHa3z_Y1s,8342
139
+ smftools/preprocessing/calculate_read_length_stats.py,sha256=gNNePwMqYZJidzGgT1ZkfSlvc5Y3I3bi5KNYpP6wQQc,4584
140
+ smftools/preprocessing/calculate_read_modification_stats.py,sha256=hZzoEe1Acc1TQV3crkjyGZBWTMkMMcqXymJb3vJMHks,4784
141
+ smftools/preprocessing/clean_NaN.py,sha256=IOcnN5YF05gpPQc3cc3IS83petCnhCpkYiyT6bXEyx0,1937
142
+ smftools/preprocessing/filter_adata_by_nan_proportion.py,sha256=GZcvr2JCsthX8EMw34S9-W3fc6JElw6ka99Jy6f2JvA,1292
143
+ smftools/preprocessing/filter_reads_on_length_quality_mapping.py,sha256=UhMXpM_qxbhTCorjpKAePRk1qQVls8DP6Z51aFVnr3k,7380
144
+ smftools/preprocessing/filter_reads_on_modification_thresholds.py,sha256=LK3u0mIwD-T_qwqIH8v7BP1ZRL88HtRXPkDJwchsCjk,19363
145
+ smftools/preprocessing/flag_duplicate_reads.py,sha256=8Z3sVQr8gmci3ZtYfQGDAHI7GpKGhzmAFHoZVyL6nK4,65581
146
+ smftools/preprocessing/invert_adata.py,sha256=TmvwRGlkJKnMajOADAzpE_C2kYEtDVYDYtQKv3IthKs,1047
147
+ smftools/preprocessing/load_sample_sheet.py,sha256=WXAKfIhbnptnkbIpI5hEe6p02HhpQ3eRX1EDGEEvH-8,1916
148
+ smftools/preprocessing/make_dirs.py,sha256=lWHXpwC76MFM5sSme9i_WeYUaxutzybendokhny03ds,537
149
+ smftools/preprocessing/min_non_diagonal.py,sha256=hx1asW8CEmLaIroZISW8EcAf_RnBEC_nofGD8QG0b1E,711
150
+ smftools/preprocessing/recipes.py,sha256=cfKEpKW8TtQLe1CMdSHyPuIgKiWOPn7uP6uMIoRlnaQ,7063
151
+ smftools/preprocessing/reindex_references_adata.py,sha256=4oViEcWWSi7bnX3Yyf-DdSZBSocvuiqr4LC-jDFHwu0,1137
152
+ smftools/preprocessing/subsample_adata.py,sha256=ivJvJIOvEtyvAjqZ7cwEeVedm4QgJxCJEI7sFaTuI3w,2360
153
+ smftools/preprocessing/archives/add_read_length_and_mapping_qc.py,sha256=zD_Kxw3DvyOypfuSMGv0ESyt-02w4XlAAMqQxb7yDNQ,5700
154
+ smftools/preprocessing/archives/calculate_complexity.py,sha256=cXMpFrhkwkPipQo2GZGT5yFknMYUMt1t8gz0Cse1DrA,3288
155
+ smftools/preprocessing/archives/mark_duplicates.py,sha256=kwfstcWb7KkqeNB321dB-NLe8yd9_hZsSmpL8pCVBQg,8747
156
+ smftools/preprocessing/archives/preprocessing.py,sha256=4mLT09A7vwRZ78FHmuwtv38mH9TQ9qrZc_WjHRhhkIw,34379
157
+ smftools/preprocessing/archives/remove_duplicates.py,sha256=Erooi5_1VOUNfWpzddzmMNYMCl1U1jJryt7ZtMhabAs,699
158
+ smftools/tools/__init__.py,sha256=QV3asy5_lP9wcRzpNTfxGTCcpykkbNYvzxSMpFw4KXU,719
159
+ smftools/tools/calculate_umap.py,sha256=2arbAQdFOtnWoPq22TWicyr6fLYZ5PTNeZv_jdwuk_I,2491
160
+ smftools/tools/cluster_adata_on_methylation.py,sha256=UDC5lpW8fZ6O-16ETu-mbflLkNBKuIg7RIzQ9r7knvA,5760
161
+ smftools/tools/general_tools.py,sha256=YbobB6Zllz6cUq50yolGH9Jr6uuAMvEI4m3hiJ6FmAI,2561
162
+ smftools/tools/position_stats.py,sha256=Z7VW54wUVzH1RQ9xhP6KO7ewp-xeLybd07I5umV_aqM,24369
163
+ smftools/tools/read_stats.py,sha256=w3Zaim6l__Kt8EPCJKXTlMgO51Iy2Milj6yUb88HXiI,6324
164
+ smftools/tools/spatial_autocorrelation.py,sha256=uQkuPi2PJCj5lZzb33IWTL-e-p3J6PdMeM88rUFfQRw,21212
165
+ smftools/tools/subset_adata.py,sha256=nBbtAxCNteZCUBmPnZ9swQNyU74XgWM8aJHHWg2AuL0,1025
166
+ smftools/tools/archived/apply_hmm.py,sha256=pJXCULay0zbmubrwql368y7yiHAZr2bJhuGx2QUuKnE,9321
167
+ smftools/tools/archived/classifiers.py,sha256=mwSTpWUXBPjmUuV5i_SMG1lIPpHSMCzsKhl8wTbm-Og,36903
168
+ smftools/tools/archived/classify_methylated_features.py,sha256=Z0N2UKw3luD3CTQ8wcUvdnMY7w-8574OJbEcwzNsy88,2897
169
+ smftools/tools/archived/classify_non_methylated_features.py,sha256=IJERTozEs7IPL7K-VIjq2q2K36wRCW9iiNSYLAXasrA,3256
170
+ smftools/tools/archived/subset_adata_v1.py,sha256=qyU9iCal03edb5aUS3AZ2U4TlL3uQ42jGI9hX3QF7Fc,1047
171
+ smftools/tools/archived/subset_adata_v2.py,sha256=OKZoUpvdURPtckIQxGTWmOI5jLa-_EU62Xs3LyyehnA,1880
172
+ smftools-0.2.4.dist-info/METADATA,sha256=BVgWPtWTeDoNF6d1IOpvXyV0IE4fI5X_fLIs4nmVvJ4,9138
173
+ smftools-0.2.4.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
174
+ smftools-0.2.4.dist-info/entry_points.txt,sha256=q4hg4w-mKkI2leekM_-YZc5XRJzp96Mh1FcU3hac82g,52
175
+ smftools-0.2.4.dist-info/licenses/LICENSE,sha256=F8LwmL6vMPddaCt1z1S83Kh_OZv50alTlY7BvVx1RXw,1066
176
+ smftools-0.2.4.dist-info/RECORD,,
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ smftools = smftools.cli_entry:cli
smftools/cli.py DELETED
@@ -1,184 +0,0 @@
1
- import click
2
- import pandas as pd
3
- from pathlib import Path
4
- from typing import Dict, Optional
5
-
6
- from . import load_adata
7
- from .readwrite import merge_barcoded_anndatas_core, safe_read_h5ad, safe_write_h5ad
8
-
9
- @click.group()
10
- def cli():
11
- """Command-line interface for smftools."""
12
- pass
13
-
14
- ####### Main processing workflow ###########
15
- @cli.command()
16
- @click.argument("config_path", type=click.Path(exists=True))
17
- def load(config_path):
18
- """Load and process data from CONFIG_PATH."""
19
- load_adata(config_path)
20
- ##########################################
21
-
22
-
23
- ####### Merging existing anndatas from an experiment that used two different demultiplexing rules #######
24
- REQUIRED_KEYS = ("adata_single_path", "adata_double_path")
25
- OPTIONAL_KEYS = (
26
- "adata_single_backups_path",
27
- "adata_double_backups_path",
28
- "output_path",
29
- "merged_filename",
30
- )
31
-
32
- def _read_config_csv(csv_path: Path) -> Dict[str, str]:
33
- """
34
- Read a multi-row, two-column CSV of key,value pairs into a dict.
35
-
36
- Supported features:
37
- - Optional header ("key,value") or none.
38
- - Comments starting with '#' and blank lines are ignored.
39
- - If duplicate keys occur, the last one wins.
40
- - Keys are matched literally against REQUIRED_KEYS/OPTIONAL_KEYS.
41
- """
42
- try:
43
- # Read as two columns regardless of header; comments ignored.
44
- df = pd.read_csv(
45
- csv_path,
46
- dtype=str,
47
- comment="#",
48
- header=None, # treat everything as rows; we'll normalize below
49
- usecols=[0, 1],
50
- names=["key", "value"]
51
- )
52
- except Exception as e:
53
- raise click.ClickException(f"Failed to read CSV: {e}") from e
54
-
55
- # Drop completely empty rows
56
- df = df.fillna("").astype(str)
57
- df["key"] = df["key"].str.strip()
58
- df["value"] = df["value"].str.strip()
59
- df = df[(df["key"] != "") & (df["key"].notna())]
60
-
61
- if df.empty:
62
- raise click.ClickException("Config CSV is empty after removing comments/blank lines.")
63
-
64
- # Remove an optional header row if present
65
- if df.iloc[0]["key"].lower() in {"key", "keys"}:
66
- df = df.iloc[1:]
67
- df = df[(df["key"] != "") & (df["key"].notna())]
68
- if df.empty:
69
- raise click.ClickException("Config CSV contains only a header row.")
70
-
71
- # Build dict; last occurrence of a key wins
72
- cfg = {}
73
- for k, v in zip(df["key"], df["value"]):
74
- cfg[k] = v
75
-
76
- # Validate required keys
77
- missing = [k for k in REQUIRED_KEYS if not cfg.get(k)]
78
- if missing:
79
- raise click.ClickException(
80
- "Missing required keys in CSV: "
81
- + ", ".join(missing)
82
- + "\nExpected keys:\n - "
83
- + "\n - ".join(REQUIRED_KEYS)
84
- + "\nOptional keys:\n - "
85
- + "\n - ".join(OPTIONAL_KEYS)
86
- )
87
-
88
- return cfg
89
-
90
- def _resolve_output_path(cfg: Dict[str, str], single_path: Path, double_path: Path) -> Path:
91
- """Decide on the output .h5ad path based on CSV; create directories if needed."""
92
- merged_filename = cfg.get("merged_filename") or f"merged_{single_path.stem}__{double_path.stem}.h5ad"
93
- if not merged_filename.endswith(".h5ad"):
94
- merged_filename += ".h5ad"
95
-
96
- output_path_raw = cfg.get("output_path", "").strip()
97
-
98
- if not output_path_raw:
99
- out_dir = Path.cwd() / "merged_output"
100
- out_dir.mkdir(parents=True, exist_ok=True)
101
- return out_dir / merged_filename
102
-
103
- output_path = Path(output_path_raw)
104
-
105
- if output_path.suffix.lower() == ".h5ad":
106
- output_path.parent.mkdir(parents=True, exist_ok=True)
107
- return output_path
108
-
109
- # Treat as directory
110
- output_path.mkdir(parents=True, exist_ok=True)
111
- return output_path / merged_filename
112
-
113
- def _maybe_read_adata(label: str, primary: Path, backups: Optional[Path]):
114
-
115
- if backups:
116
- click.echo(f"Loading {label} from {primary} with backups at {backups} ...")
117
- return safe_read_h5ad(primary, backups_path=backups, restore_backups=True)
118
- else:
119
- click.echo(f"Loading {label} from {primary} with backups disabled ...")
120
- return safe_read_h5ad(primary, restore_backups=False)
121
-
122
-
123
- @cli.command()
124
- @click.argument("config_path", type=click.Path(exists=True, dir_okay=False, readable=True, path_type=Path))
125
- def merge_barcoded_anndatas(config_path: Path):
126
- """
127
- Merge two AnnData objects from the same experiment that were demultiplexed
128
- under different end-barcoding requirements, using a 1-row CSV for config.
129
-
130
- CSV must include:
131
- - adata_single_path
132
- - adata_double_path
133
-
134
- Optional columns:
135
- - adata_single_backups_path
136
- - adata_double_backups_path
137
- - output_path (file or directory; default: ./merged_output/)
138
- - merged_filename (default: merged_<single>__<double>.h5ad)
139
-
140
- Example CSV:
141
-
142
- adata_single_path,adata_double_path,adata_single_backups_path,adata_double_backups_path,output_path,merged_filename
143
- /path/single.h5ad,/path/double.h5ad,,,,merged_output,merged_run.h5ad
144
- """
145
- try:
146
- cfg = _read_config_csv(config_path)
147
-
148
- single_path = Path(cfg["adata_single_path"]).expanduser().resolve()
149
- double_path = Path(cfg["adata_double_path"]).expanduser().resolve()
150
-
151
- for p, label in [(single_path, "adata_single_path"), (double_path, "adata_double_path")]:
152
- if not p.exists():
153
- raise click.ClickException(f"{label} does not exist: {p}")
154
-
155
- single_backups = Path(cfg["adata_single_backups_path"]).expanduser().resolve() if cfg.get("adata_single_backups_path") else None
156
- double_backups = Path(cfg["adata_double_backups_path"]).expanduser().resolve() if cfg.get("adata_double_backups_path") else None
157
-
158
- if single_backups and not single_backups.exists():
159
- raise click.ClickException(f"adata_single_backups_path does not exist: {single_backups}")
160
- if double_backups and not double_backups.exists():
161
- raise click.ClickException(f"adata_double_backups_path does not exist: {double_backups}")
162
-
163
- output_path = _resolve_output_path(cfg, single_path, double_path)
164
-
165
- # Load
166
- adata_single, read_report_single = _maybe_read_adata("single-barcoded AnnData", single_path, single_backups)
167
- adata_double, read_report_double = _maybe_read_adata("double-barcoded AnnData", double_path, double_backups)
168
-
169
- click.echo("Merging AnnDatas ...")
170
- merged = merge_barcoded_anndatas_core(adata_single, adata_double)
171
-
172
- click.echo(f"Writing merged AnnData to: {output_path}")
173
- backup_dir = output_path.cwd() / "merged_backups"
174
- safe_write_h5ad(merged, output_path, backup=True, backup_dir=backup_dir)
175
-
176
- click.secho(f"Done. Merged AnnData saved to {output_path}", fg="green")
177
-
178
- except click.ClickException:
179
- raise
180
- except Exception as e:
181
- # Surface unexpected errors cleanly
182
- raise click.ClickException(f"Unexpected error: {e}") from e
183
-
184
- ################################################################################################################
@@ -1,24 +0,0 @@
1
- # fast5_to_pod5
2
-
3
- def fast5_to_pod5(fast5_dir, output_pod5='FAST5s_to_POD5.pod5'):
4
- """
5
- Convert Nanopore FAST5 files to POD5 file
6
-
7
- Parameters:
8
- fast5_dir (str): String representing the file path to a directory containing all FAST5 files to convert into a single POD5 output.
9
- output_pod5 (str): The name of the output POD5.
10
-
11
- Returns:
12
- None
13
-
14
- """
15
- import subprocess
16
- from pathlib import Path
17
-
18
- if isinstance(fast5_dir, (list, tuple)):
19
- cmd = ["pod5", "convert", "fast5"] + fast5_dir + ["--output", output_pod5]
20
- subprocess.run(cmd)
21
- elif Path(fast5_dir).is_file():
22
- subprocess.run(["pod5", "convert", "fast5", fast5_dir, "--output", output_pod5])
23
- elif Path(fast5_dir).is_dir():
24
- subprocess.run(["pod5", "convert", "fast5", f".{fast5_dir}*.fast5", "--output", output_pod5])
@@ -1,73 +0,0 @@
1
- from .align_and_sort_BAM import align_and_sort_BAM
2
- from .aligned_BAM_to_bed import aligned_BAM_to_bed
3
- from .bam_qc import bam_qc
4
- from .bed_to_bigwig import bed_to_bigwig
5
- from .binarize_converted_base_identities import binarize_converted_base_identities
6
- from .canoncall import canoncall
7
- from .complement_base_list import complement_base_list
8
- from .converted_BAM_to_adata_II import converted_BAM_to_adata_II
9
- from .concatenate_fastqs_to_bam import concatenate_fastqs_to_bam
10
- from .count_aligned_reads import count_aligned_reads
11
- from .demux_and_index_BAM import demux_and_index_BAM
12
- from .discover_input_files import *
13
- from .extract_base_identities import extract_base_identities
14
- from .extract_mods import extract_mods
15
- from .extract_read_features_from_bam import extract_read_features_from_bam
16
- from .extract_read_lengths_from_bed import extract_read_lengths_from_bed
17
- from .extract_readnames_from_BAM import extract_readnames_from_BAM
18
- from .find_conversion_sites import find_conversion_sites
19
- from .generate_converted_FASTA import convert_FASTA_record, generate_converted_FASTA
20
- from .get_chromosome_lengths import get_chromosome_lengths
21
- from .get_native_references import get_native_references
22
- from .index_fasta import index_fasta
23
- from .make_dirs import make_dirs
24
- from .make_modbed import make_modbed
25
- from .modcall import modcall
26
- from .modkit_extract_to_adata import modkit_extract_to_adata
27
- from .modQC import modQC
28
- from .one_hot_encode import one_hot_encode
29
- from .ohe_batching import ohe_batching
30
- from .one_hot_decode import one_hot_decode
31
- from .ohe_layers_decode import ohe_layers_decode
32
- from .plot_bed_histograms import plot_bed_histograms
33
- from .run_multiqc import run_multiqc
34
- from .separate_bam_by_bc import separate_bam_by_bc
35
- from .split_and_index_BAM import split_and_index_BAM
36
-
37
- __all__ = [
38
- "align_and_sort_BAM",
39
- "aligned_BAM_to_bed",
40
- "bam_qc",
41
- "bed_to_bigwig",
42
- "binarize_converted_base_identities",
43
- "canoncall",
44
- "complement_base_list",
45
- "converted_BAM_to_adata_II",
46
- "concatenate_fastqs_to_bam",
47
- "count_aligned_reads",
48
- "demux_and_index_BAM",
49
- "extract_base_identities",
50
- "extract_mods",
51
- "extract_read_features_from_bam",
52
- "extract_read_lengths_from_bed",
53
- "extract_readnames_from_BAM",
54
- "find_conversion_sites",
55
- "convert_FASTA_record",
56
- "generate_converted_FASTA",
57
- "get_chromosome_lengths",
58
- "get_native_references",
59
- "index_fasta",
60
- "make_dirs",
61
- "make_modbed",
62
- "modcall",
63
- "modkit_extract_to_adata",
64
- "modQC",
65
- "one_hot_encode",
66
- "ohe_batching",
67
- "one_hot_decode",
68
- "ohe_layers_decode",
69
- "plot_bed_histograms",
70
- "run_multiqc",
71
- "separate_bam_by_bc",
72
- "split_and_index_BAM"
73
- ]
@@ -1,86 +0,0 @@
1
- ## align_and_sort_BAM
2
-
3
- def align_and_sort_BAM(fasta,
4
- input,
5
- bam_suffix='.bam',
6
- output_directory='aligned_outputs',
7
- make_bigwigs=False,
8
- threads=None,
9
- aligner='minimap2',
10
- aligner_args=['-a', '-x', 'map-ont', '--MD', '-Y', '-y', '-N', '5', '--secondary=no']):
11
- """
12
- A wrapper for running dorado aligner and samtools functions
13
-
14
- Parameters:
15
- fasta (str): File path to the reference genome to align to.
16
- input (str): File path to the basecalled file to align. Works for .bam and .fastq files
17
- bam_suffix (str): The suffix to use for the BAM file.
18
- output_directory (str): A file path to the directory to output all the analyses.
19
- make_bigwigs (bool): Whether to make bigwigs
20
- threads (int): Number of additional threads to use
21
- aligner (str): Aligner to use. minimap2 and dorado options
22
- aligner_args (list): list of optional parameters to use for the alignment
23
-
24
- Returns:
25
- None
26
- The function writes out files for: 1) An aligned BAM, 2) and aligned_sorted BAM, 3) an index file for the aligned_sorted BAM, 4) A bed file for the aligned_sorted BAM, 5) A text file containing read names in the aligned_sorted BAM
27
- """
28
- import subprocess
29
- import os
30
-
31
- input_basename = os.path.basename(input)
32
- input_suffix = '.' + input_basename.split('.')[1]
33
- input_as_fastq = input_basename.split('.')[0] + '.fastq'
34
-
35
- output_path_minus_suffix = os.path.join(output_directory, input_basename.split(input_suffix)[0])
36
-
37
- aligned_BAM=f"{output_path_minus_suffix}_aligned"
38
- aligned_sorted_BAM=f"{aligned_BAM}_sorted"
39
- aligned_output = aligned_BAM + bam_suffix
40
- aligned_sorted_output = aligned_sorted_BAM + bam_suffix
41
-
42
- if threads:
43
- threads = str(threads)
44
- else:
45
- pass
46
-
47
- if aligner == 'minimap2':
48
- print(f"Converting BAM to FASTQ: {input}")
49
- bam_to_fastq_command = ['samtools', 'fastq', input]
50
- subprocess.run(bam_to_fastq_command, stdout=open(input_as_fastq, "w"))
51
- print(f"Aligning FASTQ to Reference: {input_as_fastq}")
52
- if threads:
53
- minimap_command = ['minimap2'] + aligner_args + ['-t', threads, fasta, input_as_fastq]
54
- else:
55
- minimap_command = ['minimap2'] + aligner_args + [fasta, input_as_fastq]
56
- subprocess.run(minimap_command, stdout=open(aligned_output, "w"))
57
- os.remove(input_as_fastq)
58
-
59
- elif aligner == 'dorado':
60
- # Run dorado aligner
61
- print(f"Aligning BAM to Reference: {input}")
62
- if threads:
63
- alignment_command = ["dorado", "aligner", "-t", threads] + aligner_args + [fasta, input]
64
- else:
65
- alignment_command = ["dorado", "aligner"] + aligner_args + [fasta, input]
66
- subprocess.run(alignment_command, stdout=open(aligned_output, "w"))
67
-
68
- else:
69
- print(f'Aligner not recognized: {aligner}. Choose from minimap2 and dorado')
70
- return
71
-
72
- # Sort the BAM on positional coordinates
73
- print(f"Sorting BAM: {aligned_output}")
74
- if threads:
75
- sort_command = ["samtools", "sort", "-@", threads, "-o", aligned_sorted_output, aligned_output]
76
- else:
77
- sort_command = ["samtools", "sort", "-o", aligned_sorted_output, aligned_output]
78
- subprocess.run(sort_command)
79
-
80
- # Create a BAM index file
81
- print(f"Indexing BAM: {aligned_sorted_output}")
82
- if threads:
83
- index_command = ["samtools", "index", "-@", threads, aligned_sorted_output]
84
- else:
85
- index_command = ["samtools", "index", aligned_sorted_output]
86
- subprocess.run(index_command)
@@ -1,66 +0,0 @@
1
- ## bam_qc
2
-
3
- def bam_qc(bam_files, bam_qc_dir, threads, modality, stats=True, flagstats=True, idxstats=True):
4
- """
5
- Performs QC on BAM files by running samtools stats, flagstat, and idxstats.
6
-
7
- Parameters:
8
- - bam_files: List of BAM file paths.
9
- - bam_qc_dir: Directory to save QC reports.
10
- - threads: Number threads to use.
11
- - modality: 'conversion' or 'direct' (affects processing mode).
12
- - stats: Run `samtools stats` if True.
13
- - flagstats: Run `samtools flagstat` if True.
14
- - idxstats: Run `samtools idxstats` if True.
15
- """
16
- import os
17
- import subprocess
18
-
19
- # Ensure the QC output directory exists
20
- os.makedirs(bam_qc_dir, exist_ok=True)
21
-
22
- if threads:
23
- threads = str(threads)
24
- else:
25
- pass
26
-
27
- for bam in bam_files:
28
- bam_name = os.path.basename(bam).replace(".bam", "") # Extract filename without extension
29
-
30
- # Run samtools QC commands based on selected options
31
- if stats:
32
- stats_out = os.path.join(bam_qc_dir, f"{bam_name}_stats.txt")
33
- if threads:
34
- command = ["samtools", "stats", "-@", threads, bam]
35
- else:
36
- command = ["samtools", "stats", bam]
37
- print(f"Running: {' '.join(command)} > {stats_out}")
38
- with open(stats_out, "w") as out_file:
39
- subprocess.run(command, stdout=out_file)
40
-
41
- if flagstats:
42
- flagstats_out = os.path.join(bam_qc_dir, f"{bam_name}_flagstat.txt")
43
- if threads:
44
- command = ["samtools", "flagstat", "-@", threads, bam]
45
- else:
46
- command = ["samtools", "flagstat", bam]
47
- print(f"Running: {' '.join(command)} > {flagstats_out}")
48
- with open(flagstats_out, "w") as out_file:
49
- subprocess.run(command, stdout=out_file)
50
-
51
- if idxstats:
52
- idxstats_out = os.path.join(bam_qc_dir, f"{bam_name}_idxstats.txt")
53
- if threads:
54
- command = ["samtools", "idxstats", "-@", threads, bam]
55
- else:
56
- command = ["samtools", "idxstats", bam]
57
- print(f"Running: {' '.join(command)} > {idxstats_out}")
58
- with open(idxstats_out, "w") as out_file:
59
- subprocess.run(command, stdout=out_file)
60
-
61
- if modality == 'conversion':
62
- pass
63
- elif modality == 'direct':
64
- pass
65
-
66
- print("QC processing completed.")