smftools 0.1.0__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. smftools/__init__.py +0 -2
  2. smftools/_settings.py +3 -2
  3. smftools/_version.py +1 -0
  4. smftools/datasets/F1_sample_sheet.csv +5 -0
  5. smftools/datasets/datasets.py +14 -11
  6. smftools/informatics/__init__.py +10 -7
  7. smftools/informatics/archived/bam_conversion.py +59 -0
  8. smftools/informatics/archived/bam_direct.py +63 -0
  9. smftools/informatics/archived/basecalls_to_adata.py +71 -0
  10. smftools/informatics/conversion_smf.py +79 -0
  11. smftools/informatics/direct_smf.py +89 -0
  12. smftools/informatics/fast5_to_pod5.py +21 -0
  13. smftools/informatics/helpers/LoadExperimentConfig.py +74 -0
  14. smftools/informatics/helpers/__init__.py +22 -4
  15. smftools/informatics/helpers/align_and_sort_BAM.py +48 -0
  16. smftools/informatics/helpers/aligned_BAM_to_bed.py +73 -0
  17. smftools/informatics/helpers/bed_to_bigwig.py +39 -0
  18. smftools/informatics/helpers/binarize_converted_base_identities.py +11 -4
  19. smftools/informatics/helpers/canoncall.py +14 -1
  20. smftools/informatics/helpers/complement_base_list.py +21 -0
  21. smftools/informatics/helpers/concatenate_fastqs_to_bam.py +54 -0
  22. smftools/informatics/helpers/converted_BAM_to_adata.py +183 -97
  23. smftools/informatics/helpers/count_aligned_reads.py +25 -14
  24. smftools/informatics/helpers/extract_base_identities.py +44 -23
  25. smftools/informatics/helpers/extract_mods.py +17 -5
  26. smftools/informatics/helpers/extract_readnames_from_BAM.py +22 -0
  27. smftools/informatics/helpers/find_conversion_sites.py +24 -16
  28. smftools/informatics/helpers/generate_converted_FASTA.py +60 -21
  29. smftools/informatics/helpers/get_chromosome_lengths.py +32 -0
  30. smftools/informatics/helpers/get_native_references.py +10 -7
  31. smftools/informatics/helpers/index_fasta.py +12 -0
  32. smftools/informatics/helpers/make_dirs.py +9 -3
  33. smftools/informatics/helpers/make_modbed.py +10 -4
  34. smftools/informatics/helpers/modQC.py +10 -2
  35. smftools/informatics/helpers/modcall.py +16 -2
  36. smftools/informatics/helpers/modkit_extract_to_adata.py +486 -323
  37. smftools/informatics/helpers/ohe_batching.py +52 -0
  38. smftools/informatics/helpers/one_hot_encode.py +15 -8
  39. smftools/informatics/helpers/plot_read_length_and_coverage_histograms.py +52 -0
  40. smftools/informatics/helpers/separate_bam_by_bc.py +20 -5
  41. smftools/informatics/helpers/split_and_index_BAM.py +31 -11
  42. smftools/informatics/load_adata.py +127 -0
  43. smftools/informatics/readwrite.py +13 -16
  44. smftools/informatics/subsample_fasta_from_bed.py +47 -0
  45. smftools/informatics/subsample_pod5.py +104 -0
  46. smftools/preprocessing/__init__.py +6 -7
  47. smftools/preprocessing/append_C_context.py +52 -22
  48. smftools/preprocessing/binarize_on_Youden.py +8 -4
  49. smftools/preprocessing/binary_layers_to_ohe.py +9 -4
  50. smftools/preprocessing/calculate_complexity.py +26 -14
  51. smftools/preprocessing/calculate_consensus.py +47 -0
  52. smftools/preprocessing/calculate_converted_read_methylation_stats.py +69 -11
  53. smftools/preprocessing/calculate_coverage.py +14 -8
  54. smftools/preprocessing/calculate_pairwise_hamming_distances.py +11 -6
  55. smftools/preprocessing/calculate_position_Youden.py +21 -12
  56. smftools/preprocessing/calculate_read_length_stats.py +67 -8
  57. smftools/preprocessing/clean_NaN.py +13 -6
  58. smftools/preprocessing/filter_converted_reads_on_methylation.py +15 -6
  59. smftools/preprocessing/filter_reads_on_length.py +16 -6
  60. smftools/preprocessing/invert_adata.py +10 -5
  61. smftools/preprocessing/load_sample_sheet.py +24 -0
  62. smftools/preprocessing/make_dirs.py +21 -0
  63. smftools/preprocessing/mark_duplicates.py +54 -30
  64. smftools/preprocessing/min_non_diagonal.py +9 -4
  65. smftools/preprocessing/recipes.py +125 -0
  66. smftools/preprocessing/remove_duplicates.py +15 -6
  67. smftools/readwrite.py +13 -16
  68. smftools/tools/apply_HMM.py +1 -0
  69. smftools/tools/cluster.py +0 -0
  70. smftools/tools/read_HMM.py +1 -0
  71. smftools/tools/subset_adata.py +32 -0
  72. smftools/tools/train_HMM.py +43 -0
  73. smftools-0.1.3.dist-info/METADATA +94 -0
  74. smftools-0.1.3.dist-info/RECORD +84 -0
  75. smftools/informatics/helpers/align_BAM.py +0 -49
  76. smftools/informatics/helpers/load_experiment_config.py +0 -17
  77. smftools/informatics/pod5_conversion.py +0 -26
  78. smftools/informatics/pod5_direct.py +0 -29
  79. smftools/informatics/pod5_to_adata.py +0 -17
  80. smftools-0.1.0.dist-info/METADATA +0 -75
  81. smftools-0.1.0.dist-info/RECORD +0 -58
  82. /smftools/informatics/helpers/{informatics.py → archived/informatics.py} +0 -0
  83. /smftools/informatics/helpers/{load_adata.py → archived/load_adata.py} +0 -0
  84. /smftools/preprocessing/{preprocessing.py → archives/preprocessing.py} +0 -0
  85. {smftools-0.1.0.dist-info → smftools-0.1.3.dist-info}/WHEEL +0 -0
  86. {smftools-0.1.0.dist-info → smftools-0.1.3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,84 @@
1
+ smftools/__init__.py,sha256=zy4ckT7hKrLrlm6NiZQoupvc6oSN7wJsyOBCYdzukcQ,401
2
+ smftools/_settings.py,sha256=Ed8lzKUA5ncq5ZRfSp0t6_rphEEjMxts6guttwTZP5Y,409
3
+ smftools/_version.py,sha256=R5TtpJu7Qu6sOarfDpp-5Oyy8Pi2Ir3VewCvsCQiAgo,21
4
+ smftools/readwrite.py,sha256=DgVisHYdkjzaO7suPbUvluImeTc3jqGDlioNveHUxPc,4158
5
+ smftools/datasets/F1_hybrid_NKG2A_enhander_promoter_GpC_conversion_SMF.h5ad.gz,sha256=q6wJtgFRDln0o20XNCx1qad3lwcdCoylqPN7wskTfI8,2926497
6
+ smftools/datasets/F1_sample_sheet.csv,sha256=9PodIIOXK2eamYPbC6DGnXdzgi9bRDovf296j1aM0ak,259
7
+ smftools/datasets/__init__.py,sha256=xkSTlPuakVYVCuRurif9BceNBDt6bsngJvvjI8757QI,142
8
+ smftools/datasets/dCas9_m6A_invitro_kinetics.h5ad.gz,sha256=niOcVHaYY7h3XyvwSkN-V_NMBaRt2vTP5TrJO0CwMCs,8385050
9
+ smftools/datasets/datasets.py,sha256=0y597Ntp707bOgDwN6O-JEt9yxgplj66p0aj6Zs_IB4,779
10
+ smftools/informatics/__init__.py,sha256=WQiMBr1yjDrlmHg8UNgW2MJsq4fPrVfh-UBr5tYI9x4,326
11
+ smftools/informatics/conversion_smf.py,sha256=PS-TjgMttr3VRrT0zg5L_L01xMOewB_OXSsQyoM7DWI,4333
12
+ smftools/informatics/direct_smf.py,sha256=ue7p7deuRwaZtEh9EFV1YTE8HKRAmOsx9oaRJdjCrbY,4697
13
+ smftools/informatics/fast5_to_pod5.py,sha256=xfdZU3QluaAcR-q2uBRz8hcBwYt73nCnrFeahvi0OKQ,704
14
+ smftools/informatics/load_adata.py,sha256=i-2YCSaeLzbPfNtKPrLwfkv-9u_TrTAZrbtNAj3FRWY,7271
15
+ smftools/informatics/readwrite.py,sha256=DgVisHYdkjzaO7suPbUvluImeTc3jqGDlioNveHUxPc,4158
16
+ smftools/informatics/subsample_fasta_from_bed.py,sha256=YqYV09rvEQdeiS5hTTrKa8xYmJfeM3Vk-UUqwpw0qBk,1983
17
+ smftools/informatics/subsample_pod5.py,sha256=zDw9tRcrFRmPI62xkcy9dh8IfsJcuYm7R-FVeBC_g3s,4701
18
+ smftools/informatics/archived/bam_conversion.py,sha256=I8EzXjQixMmqx2oWnoNSH5NURBhfT-krbWHkoi_M964,3330
19
+ smftools/informatics/archived/bam_direct.py,sha256=jbEFtUIiUR8Wlp3po_sWkr19AUNS9WZjglojb9j28vo,3606
20
+ smftools/informatics/archived/basecalls_to_adata.py,sha256=-Nag6lr_NAtU4t8jo0GSMdgIAIfmDge-5VEUPQbEatE,3692
21
+ smftools/informatics/helpers/LoadExperimentConfig.py,sha256=gsWGoa9cydwY4Kd-hTXF2gtmxc8glRRD2V1JB88e9js,2822
22
+ smftools/informatics/helpers/__init__.py,sha256=KrfyM08_RgDf3Ajvb4KNTvcOqZiWYSIVhEznCr01Gcc,2255
23
+ smftools/informatics/helpers/align_and_sort_BAM.py,sha256=DouG6nGWXtz2ulZD5p0sEShE-4dbPudHaWcHFm4-oJA,2184
24
+ smftools/informatics/helpers/aligned_BAM_to_bed.py,sha256=eYkGQFSM2gPEauASkY_-9Yvy6727vP8Q4wx_st85Dpc,2638
25
+ smftools/informatics/helpers/bed_to_bigwig.py,sha256=AazYEZzKgKgukSFwCpeiApzxh1kbt11X4RFqRIiBIaY,1466
26
+ smftools/informatics/helpers/binarize_converted_base_identities.py,sha256=iJlDah-YJ0zx0UrlHdtgvrALVNSA0TTTdDoKmNCVg0Q,1846
27
+ smftools/informatics/helpers/canoncall.py,sha256=M7HEqhYsWMUB0tLP3hzMM0L7PhcOTXgetl5lV3GgIaw,1062
28
+ smftools/informatics/helpers/complement_base_list.py,sha256=k6EkLtxFoajaIufxw1p0pShJ2nPHyGLTbzZmIFFjB4o,532
29
+ smftools/informatics/helpers/concatenate_fastqs_to_bam.py,sha256=RXPn7e6Dcwol9tnUsfXJu3EuZcMSOJJo5LNWouovvZs,2715
30
+ smftools/informatics/helpers/converted_BAM_to_adata.py,sha256=Rsnydzpf9lMS3TQjXpbXJSSfCzhVTPn3rBDLiK-8utA,13991
31
+ smftools/informatics/helpers/count_aligned_reads.py,sha256=uYyUYglF1asiaoxr-LKxPMUEbfyD7FS-dumTg2hJHzQ,2170
32
+ smftools/informatics/helpers/extract_base_identities.py,sha256=E-_m9W82N52NjX5kz9Af5YH0S2k58hnq9KTrm4S5vgM,4370
33
+ smftools/informatics/helpers/extract_mods.py,sha256=UBFjXDKz_A6ivjcocYT1_pKjvygY2Fdg0RjQmMS8UuA,2269
34
+ smftools/informatics/helpers/extract_readnames_from_BAM.py,sha256=3FxSNqbZ1VsOK2RfHrvevQTzhWATf5E8bZ5yVOqayvk,759
35
+ smftools/informatics/helpers/find_conversion_sites.py,sha256=5AghDQzEoSvE2Og98VsKoeWUFSLnIGY1LnRu1BtQavM,3700
36
+ smftools/informatics/helpers/generate_converted_FASTA.py,sha256=ueaAsFnBuc7zKwkBivBR3DJg4DtkxkHHIQcVVSWzv-w,5161
37
+ smftools/informatics/helpers/get_chromosome_lengths.py,sha256=sLumLrGsU_Xg_oJcdOpQyjUGpJoT2HbcmxWwbwzXUlE,1036
38
+ smftools/informatics/helpers/get_native_references.py,sha256=fRuyEm9UJkfd5DwHmFb1bxEtNvtSI1_BxGRmrCymGkw,981
39
+ smftools/informatics/helpers/index_fasta.py,sha256=N3IErfSiavYldeaat8xcQgA1MpykoQHcE0gHUeWuClE,267
40
+ smftools/informatics/helpers/make_dirs.py,sha256=lWHXpwC76MFM5sSme9i_WeYUaxutzybendokhny03ds,537
41
+ smftools/informatics/helpers/make_modbed.py,sha256=cOQ97gPfRiCcw_fqboxousXIiOYjp78IFYLbu749U1Y,939
42
+ smftools/informatics/helpers/modQC.py,sha256=LeOBObG8gAVVdgESIMceYhd5AW1gfN7ABo91OQtOzTM,1041
43
+ smftools/informatics/helpers/modcall.py,sha256=9PH7Peq4y-VBqQcMkbv0TwgePBlD5aM4_FmI7H4hbQQ,1142
44
+ smftools/informatics/helpers/modkit_extract_to_adata.py,sha256=duPlRAIz4VWM-jm9iaLY7N6JHQcun_L0nhr2VyUjNTI,38184
45
+ smftools/informatics/helpers/ohe_batching.py,sha256=_Mz2p1We5PVIb8S6Hbq_hREKJ9mGQiADwfFK_NgMGhA,1909
46
+ smftools/informatics/helpers/one_hot_encode.py,sha256=hpZAuwa9ndkhyCm9sO65KVHE0lbFDKqRylfliEKyD4o,632
47
+ smftools/informatics/helpers/plot_read_length_and_coverage_histograms.py,sha256=tAnXFleGzXJNjHRAgZ0NUJuZ0P3aKmUYIrK-V9VoJKY,1860
48
+ smftools/informatics/helpers/separate_bam_by_bc.py,sha256=Fsi8OEmv5Ny13cWoHVV9JmEjVFEXT_ZxbBOlRdmyPbE,1742
49
+ smftools/informatics/helpers/split_and_index_BAM.py,sha256=_TFJ8fcLbIf37JG83hSc1zgs1yxX70-NhA8y-PbhTpo,1966
50
+ smftools/informatics/helpers/archived/informatics.py,sha256=gKb2ZJ_LcAeEXuQqn9e-QDF_sS4tMpMTr2vZlqa7n54,14572
51
+ smftools/informatics/helpers/archived/load_adata.py,sha256=DhvYYqO9VLsZqhL1WjN9sd-e3fgvdXGlgTP18z1h0L0,33654
52
+ smftools/plotting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
+ smftools/preprocessing/__init__.py,sha256=5FQNrj51KmaDLeAGGBA8iWMkYiSOe7O91ES8mT4aVtE,1399
54
+ smftools/preprocessing/append_C_context.py,sha256=pP5u9o5U4JmHras0PK6yas65u4-U5KlX3sKLb-duo80,3728
55
+ smftools/preprocessing/binarize_on_Youden.py,sha256=slkkt56DZ1FZWy8Un5mNJEZ49JlPnPKow2zU4GoHEr8,2303
56
+ smftools/preprocessing/binary_layers_to_ohe.py,sha256=931eHuVda6pMZTvC7jVTKkY2a_KQWpSfgi-nkA5NmaI,1238
57
+ smftools/preprocessing/calculate_complexity.py,sha256=ut60et8bmIswtiLhctJWHNseIV4ZRQultYdtJPHcRPs,3224
58
+ smftools/preprocessing/calculate_consensus.py,sha256=6zRpRmb2xdfDu5hctZrReALRb7Pjn8sy8xJZTm3o0nU,2442
59
+ smftools/preprocessing/calculate_converted_read_methylation_stats.py,sha256=Si0DcES0lLMvg3XgdKpedxfPnXQ14tEFKrOAFRn3fHs,6059
60
+ smftools/preprocessing/calculate_coverage.py,sha256=ZgRxQGpydxQg1exkvSiy8nHmzDIPGGqL5vL9XQ2PZQ4,2068
61
+ smftools/preprocessing/calculate_pairwise_hamming_distances.py,sha256=e5Mzyex7pT29H2PY014uU4Fi_eewbut1JkzC1ffBbCg,961
62
+ smftools/preprocessing/calculate_position_Youden.py,sha256=mfQ6nFfUaEaKg_icyHA1zZlhh0wHjpLE56BZDXOdP_4,6364
63
+ smftools/preprocessing/calculate_read_length_stats.py,sha256=6m362JaCKlD0QoBUMnM2qsB6Jo_4shl7xFzqU1uZccU,4945
64
+ smftools/preprocessing/clean_NaN.py,sha256=1vieT026p0gDJCbqB_CiLvAGGxlc-5xufoKJgZuBFFk,1150
65
+ smftools/preprocessing/filter_converted_reads_on_methylation.py,sha256=SN5q0rqYtYW9j3i0sVSyTv9EmR_uLKI7GkjmJixeOU0,1307
66
+ smftools/preprocessing/filter_reads_on_length.py,sha256=sAT66bjuI8ZtXyQc9SuPzq1dPIB1CNVx6VfWqVng4Dg,2191
67
+ smftools/preprocessing/invert_adata.py,sha256=u6Y70EH0B5mXb9-HuukIlzpMgZ6rhzcJuy3YZZTx3SA,684
68
+ smftools/preprocessing/load_sample_sheet.py,sha256=uGjzG9x-1t_1lCooH85P8Tfg80GdvVx8Jv1LPl9XNFM,915
69
+ smftools/preprocessing/make_dirs.py,sha256=lWHXpwC76MFM5sSme9i_WeYUaxutzybendokhny03ds,537
70
+ smftools/preprocessing/mark_duplicates.py,sha256=sQuPcTw8JsQoONOk-kMlAF965sIk2Pu-M7rIyfbyGGs,8145
71
+ smftools/preprocessing/min_non_diagonal.py,sha256=hx1asW8CEmLaIroZISW8EcAf_RnBEC_nofGD8QG0b1E,711
72
+ smftools/preprocessing/recipes.py,sha256=KzSw5JW0WJGzSis5Fm7moQY5PxOYl6-uYYf1NDj6nOE,7117
73
+ smftools/preprocessing/remove_duplicates.py,sha256=Erooi5_1VOUNfWpzddzmMNYMCl1U1jJryt7ZtMhabAs,699
74
+ smftools/preprocessing/archives/preprocessing.py,sha256=4mLT09A7vwRZ78FHmuwtv38mH9TQ9qrZc_WjHRhhkIw,34379
75
+ smftools/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
76
+ smftools/tools/apply_HMM.py,sha256=AuVtOki69-Xs4mhjhTXJzd49KCVXwixFyWSUgDjtR6s,11
77
+ smftools/tools/cluster.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
78
+ smftools/tools/read_HMM.py,sha256=N0MGG494VjlxYJcCVz1jN4OasGtRITZS98SJ2xB_j8k,10
79
+ smftools/tools/subset_adata.py,sha256=qyU9iCal03edb5aUS3AZ2U4TlL3uQ42jGI9hX3QF7Fc,1047
80
+ smftools/tools/train_HMM.py,sha256=x5ZcXj-heWQqDOX86nuuDoj1tPkYKl04fYA1fCKNQ0c,1380
81
+ smftools-0.1.3.dist-info/METADATA,sha256=u26Og8tpAF2TgXZztotk3Q4EuP7Fvf73s1tlIjBDD-A,6410
82
+ smftools-0.1.3.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
83
+ smftools-0.1.3.dist-info/licenses/LICENSE,sha256=F8LwmL6vMPddaCt1z1S83Kh_OZv50alTlY7BvVx1RXw,1066
84
+ smftools-0.1.3.dist-info/RECORD,,
@@ -1,49 +0,0 @@
1
- ## align_BAM
2
- import subprocess
3
-
4
- def align_BAM(fasta, bam, bam_suffix):
5
- """
6
- A wrapper for running dorado aligner and samtools functions
7
- """
8
- aligned_BAM=f"{bam}_aligned"
9
- aligned_sorted_BAM=f"{aligned_BAM}_sorted"
10
- output = bam + bam_suffix
11
- aligned_output = aligned_BAM + bam_suffix
12
- aligned_sorted_output = aligned_sorted_BAM + bam_suffix
13
-
14
- # Run dorado aligner
15
- subprocess.run([
16
- "dorado", "aligner",
17
- "--secondary=no",
18
- fasta,
19
- output
20
- ], stdout=open(aligned_output, "w"))
21
-
22
- # Sort the BAM on positional coordinates
23
- subprocess.run([
24
- "samtools", "sort",
25
- "-o", aligned_sorted_output,
26
- aligned_output
27
- ])
28
-
29
- # Create a BAM index file
30
- subprocess.run([
31
- "samtools", "index",
32
- aligned_sorted_output
33
- ])
34
-
35
- # Make a bed file of coordinates for the BAM
36
- subprocess.run([
37
- "samtools", "view",
38
- aligned_sorted_output
39
- ], stdout=subprocess.PIPE) | subprocess.run([
40
- "awk", '{print $3, $4, $4+length($10)-1}'
41
- ], stdin=subprocess.PIPE, stdout=open(f"{aligned_sorted_BAM}_bed.bed", "w"))
42
-
43
- # Make a text file of reads for the BAM
44
- subprocess.run([
45
- "samtools", "view",
46
- aligned_sorted_output
47
- ], stdout=subprocess.PIPE) | subprocess.run([
48
- "cut", "-f1"
49
- ], stdin=subprocess.PIPE, stdout=open(f"aligned_sorted_BAM_read_names.txt", "w"))
@@ -1,17 +0,0 @@
1
- ## load_experiment_config
2
- import csv
3
-
4
- def load_experiment_config(experiment_config):
5
- """
6
- Loads in the experiment configuration csv and saves global variables with experiment configuration parameters
7
- """
8
- with open(experiment_config, mode='r', encoding='utf-8-sig') as csvfile:
9
- reader = csv.DictReader(csvfile)
10
- for row in reader:
11
- # Extract variable name and value from each row
12
- var_name = row['variable']
13
- value = row['value']
14
-
15
- # Alternatively, set it directly in the globals() dictionary
16
- globals()[var_name] = value
17
-
@@ -1,26 +0,0 @@
1
- ## pod5_conversion
2
- from .helpers import align_BAM, canoncall, converted_BAM_to_adata, generate_converted_FASTA, split_and_index_BAM
3
- import subprocess
4
-
5
- def pod5_conversion(fasta, output_directory, conversion_types, strands, model, pod5_dir, split_dir, barcode_kit, mapping_threshold, experiment_name, bam_suffix):
6
- """
7
- Converts a POD5 file from a nanopore conversion SMF experiment to an adata object
8
- """
9
- bam=f"{output_directory}/HAC_basecalls"
10
- aligned_BAM=f"{bam}_aligned"
11
- aligned_sorted_BAM=f"{aligned_BAM}_sorted"
12
- # 1) Convert FASTA file
13
- converted_FASTA=fasta.split('.fa')[0]+'_converted.fasta'
14
- generate_converted_FASTA(fasta, conversion_types, strands, converted_FASTA)
15
-
16
- # 2) Basecall from the input POD5 to generate a singular output BAM
17
- canoncall(model, pod5_dir, barcode_kit, bam, bam_suffix)
18
-
19
- # 3) Align the BAM to the converted reference FASTA and sort the bam on positional coordinates. Also make an index and a bed file of mapped reads
20
- align_BAM(converted_FASTA, bam, bam_suffix)
21
-
22
- ### 4) Split the aligned and sorted BAM files by barcode (BC Tag) into the split_BAM directory###
23
- split_and_index_BAM(aligned_sorted_BAM, split_dir, bam_suffix)
24
-
25
- # 5) Take the converted BAM and load it into an adata object.
26
- converted_BAM_to_adata(converted_FASTA, split_dir, mapping_threshold, experiment_name, conversion_types, bam_suffix)
@@ -1,29 +0,0 @@
1
- ## pod5_direct
2
- from .helpers import align_BAM, extract_mods, make_modbed, modcall, modkit_extract_to_adata, modQC, split_and_index_BAM
3
-
4
- def pod5_direct(fasta, output_directory, mod_list, model, thresholds, pod5_dir, split_dir, barcode_kit, mapping_threshold, experiment_name, bam_suffix, batch_size):
5
- """
6
-
7
- """
8
- bam=f"{output_directory}/HAC_mod_calls"
9
- aligned_BAM=f"{bam}_aligned"
10
- aligned_sorted_BAM=f"{aligned_BAM}_sorted"
11
- mod_bed_dir=f"{output_directory}/split_mod_beds"
12
- mod_tsv_dir=f"{output_directory}/split_mod_tsvs"
13
-
14
- aligned_sorted_output = aligned_sorted_BAM + bam_suffix
15
- mod_map = {'6mA': '6mA', '5mC_5hmC': '5mC'}
16
- mods = [mod_map[mod] for mod in mod_list]
17
-
18
- # 1) Basecall using dorado
19
- modcall(model, pod5_dir, barcode_kit, mod_list, bam, bam_suffix)
20
- # 2) Align the BAM to the converted reference FASTA. Also make an index and a bed file of mapped reads
21
- align_BAM(fasta, bam, bam_suffix)
22
- # 3) Split the aligned and sorted BAM files by barcode (BC Tag) into the split_BAM directory
23
- split_and_index_BAM(aligned_sorted_BAM, split_dir, bam_suffix)
24
- # 4) Using nanopore modkit to work with modified BAM files ###
25
- modQC(aligned_sorted_output, thresholds) # get QC metrics for mod calls
26
- make_modbed(aligned_sorted_output, thresholds, mod_bed_dir) # Generate bed files of position methylation summaries for every sample
27
- extract_mods(thresholds, mod_tsv_dir, split_dir, bam_suffix) # Extract methylations calls for split BAM files into split TSV files
28
- #5 Load the modification data from TSVs into an adata object
29
- modkit_extract_to_adata(fasta, aligned_sorted_output, mapping_threshold, experiment_name, mods, batch_size)
@@ -1,17 +0,0 @@
1
- ## pod5_to_adata
2
- from .helpers import load_experiment_config
3
- from.pod5_direct import pod5_direct
4
- from.pod5_conversion import pod5_conversion
5
-
6
- def pod5_to_adata(config_path, ):
7
- """
8
-
9
- """
10
- # Load experiment config parameters into global variables
11
- load_experiment_config(config_path)
12
- if smf_modality == 'conversion':
13
- (fasta, output_directory, conversion_types, strands, model, pod5_dir, split_dir, barcode_kit, mapping_threshold, experiment_name, bam_suffix)
14
- elif smf_modality == 'direct':
15
- pod5_direct(fasta, output_directory, mod_list, model, thresholds, pod5_dir, split_dir, barcode_kit, mapping_threshold, experiment_name, bam_suffix, batch_size)
16
- else:
17
- print("Error")
@@ -1,75 +0,0 @@
1
- Metadata-Version: 2.3
2
- Name: smftools
3
- Version: 0.1.0
4
- Summary: Single Molecule Footprinting Analysis in Python.
5
- Project-URL: Source, https://github.com/jkmckenna/smftools
6
- Author: Joseph McKenna
7
- Maintainer-email: Joseph McKenna <jkmckenna@berkeley.edu>
8
- License-Expression: MIT
9
- License-File: LICENSE
10
- Keywords: anndata,chromatin-accessibility,machine-learning,nanopore,protein-dna-binding,single-locus,single-molecule-footprinting
11
- Classifier: Development Status :: 2 - Pre-Alpha
12
- Classifier: Environment :: Console
13
- Classifier: Intended Audience :: Developers
14
- Classifier: Intended Audience :: Science/Research
15
- Classifier: License :: OSI Approved :: MIT License
16
- Classifier: Natural Language :: English
17
- Classifier: Operating System :: MacOS :: MacOS X
18
- Classifier: Programming Language :: Python :: 3
19
- Classifier: Programming Language :: Python :: 3.9
20
- Classifier: Programming Language :: Python :: 3.10
21
- Classifier: Programming Language :: Python :: 3.11
22
- Classifier: Programming Language :: Python :: 3.12
23
- Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
24
- Classifier: Topic :: Scientific/Engineering :: Visualization
25
- Requires-Python: >=3.9
26
- Requires-Dist: anndata>=0.10.0
27
- Requires-Dist: biopython>=1.79
28
- Requires-Dist: cython>=0.29.28
29
- Requires-Dist: networkx>=3.2
30
- Requires-Dist: numpy<2,>=1.22.0
31
- Requires-Dist: pandas>=1.4.2
32
- Requires-Dist: pomegranate>1.0.0
33
- Requires-Dist: pysam>=0.19.1
34
- Requires-Dist: scanpy>=1.9
35
- Requires-Dist: scikit-learn>=1.0.2
36
- Requires-Dist: scipy>=1.7.3
37
- Requires-Dist: seaborn>=0.11
38
- Requires-Dist: tqdm
39
- Provides-Extra: base-tests
40
- Requires-Dist: pytest; extra == 'base-tests'
41
- Requires-Dist: pytest-cov; extra == 'base-tests'
42
- Provides-Extra: doc
43
- Requires-Dist: ipython>=7.20; extra == 'doc'
44
- Requires-Dist: matplotlib!=3.6.1; extra == 'doc'
45
- Requires-Dist: myst-nb>=1; extra == 'doc'
46
- Requires-Dist: myst-parser>=2; extra == 'doc'
47
- Requires-Dist: nbsphinx>=0.9; extra == 'doc'
48
- Requires-Dist: readthedocs-sphinx-search; extra == 'doc'
49
- Requires-Dist: setuptools; extra == 'doc'
50
- Requires-Dist: sphinx-autodoc-typehints>=1.25.2; extra == 'doc'
51
- Requires-Dist: sphinx-book-theme>=1.1.0; extra == 'doc'
52
- Requires-Dist: sphinx-copybutton; extra == 'doc'
53
- Requires-Dist: sphinx-design; extra == 'doc'
54
- Requires-Dist: sphinx>=7; extra == 'doc'
55
- Requires-Dist: sphinxcontrib-bibtex; extra == 'doc'
56
- Requires-Dist: sphinxext-opengraph; extra == 'doc'
57
- Provides-Extra: torch
58
- Requires-Dist: pomeganate>=1.0.0; extra == 'torch'
59
- Requires-Dist: torch>=1.9.0; extra == 'torch'
60
- Provides-Extra: torch-tests
61
- Requires-Dist: pomeganate>=1.0.0; extra == 'torch-tests'
62
- Requires-Dist: pytest; extra == 'torch-tests'
63
- Requires-Dist: pytest-cov; extra == 'torch-tests'
64
- Requires-Dist: torch>=1.9.0; extra == 'torch-tests'
65
- Description-Content-Type: text/markdown
66
-
67
- # smftools
68
- A tool for processing raw sequencing data for single molecule footprinting experiments at single genomic loci.
69
-
70
- ## Dependencies
71
- The following tools need to be installed and configured:
72
- 1) [Dorado](https://github.com/nanoporetech/dorado) -> For standard/modified basecalling and alignment. Can be attained by downloading and configuring nanopore MinKnow software.
73
- 2) [Samtools](https://github.com/samtools/samtools) -> For working with SAM/BAM files
74
- 3) [Minimap2](https://github.com/lh3/minimap2) -> The aligner used by Dorado
75
- 4) [Modkit](https://github.com/nanoporetech/modkit) -> Extracting summary statistics and read level methylation calls from modified BAM files
@@ -1,58 +0,0 @@
1
- smftools/__init__.py,sha256=pWcysCXCokCdW4YySaA8BMumZkE56m15otMPG88nQGc,444
2
- smftools/_settings.py,sha256=a1uYWNBNtQb30cGSdpjeiIMnQV1Fip7IZAQrNzjXR5w,324
3
- smftools/readwrite.py,sha256=p-K_RYOrM0vDawBTcuCUyuwVzmYwJqNMvhv9fCTLDKE,4159
4
- smftools/datasets/F1_hybrid_NKG2A_enhander_promoter_GpC_conversion_SMF.h5ad.gz,sha256=q6wJtgFRDln0o20XNCx1qad3lwcdCoylqPN7wskTfI8,2926497
5
- smftools/datasets/__init__.py,sha256=xkSTlPuakVYVCuRurif9BceNBDt6bsngJvvjI8757QI,142
6
- smftools/datasets/dCas9_m6A_invitro_kinetics.h5ad.gz,sha256=niOcVHaYY7h3XyvwSkN-V_NMBaRt2vTP5TrJO0CwMCs,8385050
7
- smftools/datasets/datasets.py,sha256=rAcp7_Raa8Uv95DISj-oACY1fE_5fIfb5Poj-9WVOWo,473
8
- smftools/informatics/__init__.py,sha256=Bjufdncl978d-tNriuRHX92mjeAO5axjTlZP7iePjms,235
9
- smftools/informatics/pod5_conversion.py,sha256=m_qNRSNeUndl5KO8PJPMLCOWqcVq-TJSvoyUnJW-UHE,1399
10
- smftools/informatics/pod5_direct.py,sha256=MGQkpHI2qQuEO0IDFOiXjL0Pq59oC1DwUaIWH7jTHiU,1707
11
- smftools/informatics/pod5_to_adata.py,sha256=R31bkGbparRGpYpTZ69znQThK1xCGChlf4oP836IC9Y,722
12
- smftools/informatics/readwrite.py,sha256=p-K_RYOrM0vDawBTcuCUyuwVzmYwJqNMvhv9fCTLDKE,4159
13
- smftools/informatics/helpers/__init__.py,sha256=ws8Zyxin68L7G5R9Rna_qoBnkSNOaD1ndlcrooV2d-k,1466
14
- smftools/informatics/helpers/align_BAM.py,sha256=vZpkbI-mUqd6qJaovRhuNM03s816fjd5hNEcfA0oHxo,1414
15
- smftools/informatics/helpers/binarize_converted_base_identities.py,sha256=rTdk06BmU_bvuE1NOU1LGSQs9ytkl7vQjZcwMbA5Yx0,1409
16
- smftools/informatics/helpers/canoncall.py,sha256=Ujz0Pkp_wW-XJyb3uB2fzVpB12c2MCOWVTg_uIOQL8c,397
17
- smftools/informatics/helpers/converted_BAM_to_adata.py,sha256=7tntnkTZpNqS0WTMbO42ksxXqzk9NFpXK0q6uWSqtkM,8593
18
- smftools/informatics/helpers/count_aligned_reads.py,sha256=Q9iU0zwwNZRn0oOxRl_x5OAF7YgaRkSpQBojaBccQsI,1814
19
- smftools/informatics/helpers/extract_base_identities.py,sha256=nrQy8cUyOA2C8cKKL6SpZ97U0ZGhChw8Qdk0BwuxIT4,2406
20
- smftools/informatics/helpers/extract_mods.py,sha256=IQdpQxh_2NStKK4kIVKa8UAcV-fVJSs_bzB3JLe0Jx4,1602
21
- smftools/informatics/helpers/find_conversion_sites.py,sha256=aer63p2JHqaoB3wSK9xqpSjow7w7UyrMSJ06aKTQSiQ,3208
22
- smftools/informatics/helpers/generate_converted_FASTA.py,sha256=xHjspkeCiSKjqb6zUrjCG13OKW597LJ4_w33tg-wFok,3006
23
- smftools/informatics/helpers/get_native_references.py,sha256=wx_RXnPwj0NGp7Tx1_hXyO8ZzQJHZwa0b3a6r3266FY,976
24
- smftools/informatics/helpers/informatics.py,sha256=gKb2ZJ_LcAeEXuQqn9e-QDF_sS4tMpMTr2vZlqa7n54,14572
25
- smftools/informatics/helpers/load_adata.py,sha256=DhvYYqO9VLsZqhL1WjN9sd-e3fgvdXGlgTP18z1h0L0,33654
26
- smftools/informatics/helpers/load_experiment_config.py,sha256=boF524jZZKzBjc2yvAiMYvhM4OW_efXSbwU-nINKDdg,607
27
- smftools/informatics/helpers/make_dirs.py,sha256=UxghbuquyXgDD-H24Ghf1B7Kfpdt04NgMs8GE6zSJ3U,475
28
- smftools/informatics/helpers/make_modbed.py,sha256=m1lQUbw0W63YjiM_Tmy6QOL3GkHgvSuCrkX2Bo8sCco,741
29
- smftools/informatics/helpers/modQC.py,sha256=C-WVaoLN7Dxh--JcWa4UXzhwFpf0AXrFA99IsxfVXwo,770
30
- smftools/informatics/helpers/modcall.py,sha256=f41SkaXi2fYgv7B0oAFwj-9CZ0XVhvzWHfe977rT0wQ,493
31
- smftools/informatics/helpers/modkit_extract_to_adata.py,sha256=TvIHDXWTO2QxArGyIU3w-dQBVyq0rQ1A4iMvV9Rb_7A,24407
32
- smftools/informatics/helpers/one_hot_encode.py,sha256=jxfTNREED0YhdvwhVmRrt2BZUfiOrOUURVSOVtGypns,439
33
- smftools/informatics/helpers/separate_bam_by_bc.py,sha256=AFT-v0XXuW2rRYG6FC-8gulhAA6a4YGAa0UEuMDlimc,1235
34
- smftools/informatics/helpers/split_and_index_BAM.py,sha256=HqPAy5YxK0jokCWCUlUIWSLf2n-Gubkge-xxYfX4XLE,755
35
- smftools/plotting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
- smftools/preprocessing/__init__.py,sha256=ngs4XYcd9gSXwSa-gi1pWMblCRJ_uZo5MMkg5vuhDOU,1438
37
- smftools/preprocessing/append_C_context.py,sha256=sfHiV3gBOum3KyZ6wFOCUcDUieW9rra3hKFkWEb8wmk,2063
38
- smftools/preprocessing/binarize_on_Youden.py,sha256=CO0KnxxHmCwq7tlrBh7BuY0_6SEacR7NwRhkwaWoVUg,2056
39
- smftools/preprocessing/binary_layers_to_ohe.py,sha256=zahq1YcgAeva-b1CcEVaBK4XaNmMgE6IpfiedYepI5k,876
40
- smftools/preprocessing/calculate_complexity.py,sha256=H4j0mCWL-jFWZ8UoTGa5lNEcqByfwIDtlsJrpNRNkg4,2751
41
- smftools/preprocessing/calculate_converted_read_methylation_stats.py,sha256=ZIOKGkbWI15RzpnfgWU4MUXFz3LlUjL_yjodGrye8-A,2626
42
- smftools/preprocessing/calculate_coverage.py,sha256=Q-RjTqbYt9jc-Axk807_h0m7_oDFdewrO805FQARLUA,1852
43
- smftools/preprocessing/calculate_pairwise_hamming_distances.py,sha256=l0IYlWu9RCDq4R2pJm_qGXN_RyFkIuah9fFLvg1Hti0,843
44
- smftools/preprocessing/calculate_position_Youden.py,sha256=FUSsrhp8L8TLoJaX2cSl8u1phNbYpTRJfVqsrwMOWgY,6008
45
- smftools/preprocessing/calculate_read_length_stats.py,sha256=kKcEw4zS-GnJ2nyC5c24YVMY2oBmxmcxjPWLGnrkwws,1711
46
- smftools/preprocessing/clean_NaN.py,sha256=HCRX_nA6H3o7CysCa6yxN07xQEoh6LvdkX7aAYqSKR8,1024
47
- smftools/preprocessing/filter_converted_reads_on_methylation.py,sha256=krqDb6TNjQx4IICXbEQ8SDcaSjrWZ-9ChtaEiIxU5KY,962
48
- smftools/preprocessing/filter_reads_on_length.py,sha256=-tXMIpg8Mx8GskCfjBy0ZBczuJRTZdyuSZtDyb6KDJs,1737
49
- smftools/preprocessing/invert_adata.py,sha256=vpR0jynLODhE8mpiHZQIv1XUY9pd7cEG0ujC-GArXIE,616
50
- smftools/preprocessing/mark_duplicates.py,sha256=Qd1fluCHkL7ZAY37wGmBe40HwkRipOkbDAp6lnoLU9I,6818
51
- smftools/preprocessing/min_non_diagonal.py,sha256=o79E5xy-aO-cSwN5dUVi5oj8_EfQBDPcj1D0_7fvk1Q,644
52
- smftools/preprocessing/preprocessing.py,sha256=4mLT09A7vwRZ78FHmuwtv38mH9TQ9qrZc_WjHRhhkIw,34379
53
- smftools/preprocessing/remove_duplicates.py,sha256=sgdRjZSLakocTRwAukdp1RpFhODbeOjNN_EWZkTshAc,395
54
- smftools/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
55
- smftools-0.1.0.dist-info/METADATA,sha256=Loh3iFQgPLn6Xe_WdbeATlGiMqggFaYJOqgwf5e8WRI,3422
56
- smftools-0.1.0.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
57
- smftools-0.1.0.dist-info/licenses/LICENSE,sha256=F8LwmL6vMPddaCt1z1S83Kh_OZv50alTlY7BvVx1RXw,1066
58
- smftools-0.1.0.dist-info/RECORD,,