smftools 0.1.3__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. {smftools-0.1.3.dist-info → smftools-0.1.6.dist-info}/METADATA +44 -11
  2. smftools-0.1.6.dist-info/RECORD +4 -0
  3. smftools/__init__.py +0 -25
  4. smftools/_settings.py +0 -20
  5. smftools/_version.py +0 -1
  6. smftools/datasets/F1_hybrid_NKG2A_enhander_promoter_GpC_conversion_SMF.h5ad.gz +0 -0
  7. smftools/datasets/F1_sample_sheet.csv +0 -5
  8. smftools/datasets/__init__.py +0 -9
  9. smftools/datasets/dCas9_m6A_invitro_kinetics.h5ad.gz +0 -0
  10. smftools/datasets/datasets.py +0 -28
  11. smftools/informatics/__init__.py +0 -14
  12. smftools/informatics/archived/bam_conversion.py +0 -59
  13. smftools/informatics/archived/bam_direct.py +0 -63
  14. smftools/informatics/archived/basecalls_to_adata.py +0 -71
  15. smftools/informatics/conversion_smf.py +0 -79
  16. smftools/informatics/direct_smf.py +0 -89
  17. smftools/informatics/fast5_to_pod5.py +0 -21
  18. smftools/informatics/helpers/LoadExperimentConfig.py +0 -74
  19. smftools/informatics/helpers/__init__.py +0 -60
  20. smftools/informatics/helpers/align_and_sort_BAM.py +0 -48
  21. smftools/informatics/helpers/aligned_BAM_to_bed.py +0 -73
  22. smftools/informatics/helpers/archived/informatics.py +0 -260
  23. smftools/informatics/helpers/archived/load_adata.py +0 -516
  24. smftools/informatics/helpers/bed_to_bigwig.py +0 -39
  25. smftools/informatics/helpers/binarize_converted_base_identities.py +0 -31
  26. smftools/informatics/helpers/canoncall.py +0 -25
  27. smftools/informatics/helpers/complement_base_list.py +0 -21
  28. smftools/informatics/helpers/concatenate_fastqs_to_bam.py +0 -54
  29. smftools/informatics/helpers/converted_BAM_to_adata.py +0 -233
  30. smftools/informatics/helpers/count_aligned_reads.py +0 -43
  31. smftools/informatics/helpers/extract_base_identities.py +0 -57
  32. smftools/informatics/helpers/extract_mods.py +0 -51
  33. smftools/informatics/helpers/extract_readnames_from_BAM.py +0 -22
  34. smftools/informatics/helpers/find_conversion_sites.py +0 -61
  35. smftools/informatics/helpers/generate_converted_FASTA.py +0 -98
  36. smftools/informatics/helpers/get_chromosome_lengths.py +0 -32
  37. smftools/informatics/helpers/get_native_references.py +0 -28
  38. smftools/informatics/helpers/index_fasta.py +0 -12
  39. smftools/informatics/helpers/make_dirs.py +0 -21
  40. smftools/informatics/helpers/make_modbed.py +0 -27
  41. smftools/informatics/helpers/modQC.py +0 -27
  42. smftools/informatics/helpers/modcall.py +0 -28
  43. smftools/informatics/helpers/modkit_extract_to_adata.py +0 -518
  44. smftools/informatics/helpers/ohe_batching.py +0 -52
  45. smftools/informatics/helpers/one_hot_encode.py +0 -21
  46. smftools/informatics/helpers/plot_read_length_and_coverage_histograms.py +0 -52
  47. smftools/informatics/helpers/separate_bam_by_bc.py +0 -43
  48. smftools/informatics/helpers/split_and_index_BAM.py +0 -41
  49. smftools/informatics/load_adata.py +0 -127
  50. smftools/informatics/readwrite.py +0 -106
  51. smftools/informatics/subsample_fasta_from_bed.py +0 -47
  52. smftools/informatics/subsample_pod5.py +0 -104
  53. smftools/plotting/__init__.py +0 -0
  54. smftools/preprocessing/__init__.py +0 -34
  55. smftools/preprocessing/append_C_context.py +0 -69
  56. smftools/preprocessing/archives/preprocessing.py +0 -614
  57. smftools/preprocessing/binarize_on_Youden.py +0 -42
  58. smftools/preprocessing/binary_layers_to_ohe.py +0 -30
  59. smftools/preprocessing/calculate_complexity.py +0 -71
  60. smftools/preprocessing/calculate_consensus.py +0 -47
  61. smftools/preprocessing/calculate_converted_read_methylation_stats.py +0 -96
  62. smftools/preprocessing/calculate_coverage.py +0 -41
  63. smftools/preprocessing/calculate_pairwise_hamming_distances.py +0 -27
  64. smftools/preprocessing/calculate_position_Youden.py +0 -104
  65. smftools/preprocessing/calculate_read_length_stats.py +0 -86
  66. smftools/preprocessing/clean_NaN.py +0 -38
  67. smftools/preprocessing/filter_converted_reads_on_methylation.py +0 -29
  68. smftools/preprocessing/filter_reads_on_length.py +0 -41
  69. smftools/preprocessing/invert_adata.py +0 -23
  70. smftools/preprocessing/load_sample_sheet.py +0 -24
  71. smftools/preprocessing/make_dirs.py +0 -21
  72. smftools/preprocessing/mark_duplicates.py +0 -134
  73. smftools/preprocessing/min_non_diagonal.py +0 -25
  74. smftools/preprocessing/recipes.py +0 -125
  75. smftools/preprocessing/remove_duplicates.py +0 -21
  76. smftools/readwrite.py +0 -106
  77. smftools/tools/__init__.py +0 -0
  78. smftools/tools/apply_HMM.py +0 -1
  79. smftools/tools/cluster.py +0 -0
  80. smftools/tools/read_HMM.py +0 -1
  81. smftools/tools/subset_adata.py +0 -32
  82. smftools/tools/train_HMM.py +0 -43
  83. smftools-0.1.3.dist-info/RECORD +0 -84
  84. {smftools-0.1.3.dist-info → smftools-0.1.6.dist-info}/WHEEL +0 -0
  85. {smftools-0.1.3.dist-info → smftools-0.1.6.dist-info}/licenses/LICENSE +0 -0
@@ -1,28 +0,0 @@
1
- ## get_native_references
2
-
3
- # Direct methylation specific
4
- def get_native_references(fasta_file):
5
- """
6
- Makes a dictionary keyed by record id which points to the record length and record sequence.
7
-
8
- Paramaters:
9
- fasta_file (str): A string representing the path to the FASTA file for the experiment.
10
-
11
- Returns:
12
- None
13
- """
14
- from .. import readwrite
15
- from Bio import SeqIO
16
- from Bio.SeqRecord import SeqRecord
17
- from Bio.Seq import Seq
18
- record_dict = {}
19
- print('{0}: Opening FASTA file {1}'.format(readwrite.time_string(), fasta_file))
20
- # Open the FASTA record as read only
21
- with open(fasta_file, "r") as f:
22
- # Iterate over records in the FASTA
23
- for record in SeqIO.parse(f, "fasta"):
24
- # Extract the sequence string of the record
25
- sequence = str(record.seq).upper()
26
- sequence_length = len(sequence)
27
- record_dict[record.id] = [sequence_length, sequence]
28
- return record_dict
@@ -1,12 +0,0 @@
1
- # index_fasta
2
-
3
- def index_fasta(fasta):
4
- """
5
- Generate a FASTA index file for an input fasta.
6
-
7
- Parameters:
8
- fasta (str): Path to the input fasta to make an index file for.
9
- """
10
- import subprocess
11
-
12
- subprocess.run(["samtools", "faidx", fasta])
@@ -1,21 +0,0 @@
1
- ## make_dirs
2
-
3
- # General
4
- def make_dirs(directories):
5
- """
6
- Takes a list of file paths and makes new directories if the directory does not already exist.
7
-
8
- Parameters:
9
- directories (list): A list of directories to make
10
-
11
- Returns:
12
- None
13
- """
14
- import os
15
-
16
- for directory in directories:
17
- if not os.path.isdir(directory):
18
- os.mkdir(directory)
19
- print(f"Directory '{directory}' created successfully.")
20
- else:
21
- print(f"Directory '{directory}' already exists.")
@@ -1,27 +0,0 @@
1
- ## make_modbed
2
-
3
- # Direct SMF
4
- def make_modbed(aligned_sorted_output, thresholds, mod_bed_dir):
5
- """
6
- Generating position methylation summaries for each barcoded sample starting from the overall BAM file that was direct output of dorado aligner.
7
- Parameters:
8
- aligned_sorted_output (str): A string representing the file path to the aligned_sorted non-split BAM file.
9
-
10
- Returns:
11
- None
12
- """
13
- import os
14
- import subprocess
15
-
16
- os.chdir(mod_bed_dir)
17
- filter_threshold, m6A_threshold, m5C_threshold, hm5C_threshold = thresholds
18
- command = [
19
- "modkit", "pileup", aligned_sorted_output, mod_bed_dir,
20
- "--partition-tag", "BC",
21
- "--only-tabs",
22
- "--filter-threshold", f'{filter_threshold}',
23
- "--mod-thresholds", f"m:{m5C_threshold}",
24
- "--mod-thresholds", f"a:{m6A_threshold}",
25
- "--mod-thresholds", f"h:{hm5C_threshold}"
26
- ]
27
- subprocess.run(command)
@@ -1,27 +0,0 @@
1
- ## modQC
2
-
3
- # Direct SMF
4
- def modQC(aligned_sorted_output, thresholds):
5
- """
6
- Output the percentile of bases falling at a call threshold (threshold is a probability between 0-1) for the overall BAM file.
7
- It is generally good to look at these parameters on positive and negative controls.
8
-
9
- Parameters:
10
- aligned_sorted_output (str): A string representing the file path of the aligned_sorted non-split BAM file output by the dorado aligned.
11
- thresholds (list): A list of floats to pass for call thresholds.
12
-
13
- Returns:
14
- None
15
- """
16
- import subprocess
17
-
18
- filter_threshold, m6A_threshold, m5C_threshold, hm5C_threshold = thresholds
19
- subprocess.run(["modkit", "sample-probs", aligned_sorted_output])
20
- command = [
21
- "modkit", "summary", aligned_sorted_output,
22
- "--filter-threshold", f"{filter_threshold}",
23
- "--mod-thresholds", f"m:{m5C_threshold}",
24
- "--mod-thresholds", f"a:{m6A_threshold}",
25
- "--mod-thresholds", f"h:{hm5C_threshold}"
26
- ]
27
- subprocess.run(command)
@@ -1,28 +0,0 @@
1
- ## modcall
2
-
3
- # Direct methylation specific
4
- def modcall(model, pod5_dir, barcode_kit, mod_list, bam, bam_suffix):
5
- """
6
- Wrapper function for dorado modified base calling.
7
-
8
- Parameters:
9
- model (str): a string representing the file path to the dorado basecalling model.
10
- pod5_dir (str): a string representing the file path to the experiment directory containing the POD5 files.
11
- barcode_kit (str): A string representing the barcoding kit used in the experiment.
12
- mod_list (list): A list of modification types to use in the analysis.
13
- bam (str): File path to the BAM file to output.
14
- bam_suffix (str): The suffix to use for the BAM file.
15
-
16
- Returns:
17
- None
18
- Outputs a BAM file holding the modified base calls output by the dorado basecaller.
19
- """
20
- import subprocess
21
- output = bam + bam_suffix
22
- command = [
23
- "dorado", "basecaller", model, pod5_dir, "--kit-name", barcode_kit, "-Y",
24
- "--modified-bases"]
25
- command += mod_list
26
- print(f'Running: {" ".join(command)}')
27
- with open(output, "w") as outfile:
28
- subprocess.run(command, stdout=outfile)