smftools 0.1.3__py3-none-any.whl → 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- smftools/__init__.py +5 -1
- smftools/_version.py +1 -1
- smftools/informatics/__init__.py +2 -0
- smftools/informatics/archived/print_bam_query_seq.py +29 -0
- smftools/informatics/basecall_pod5s.py +80 -0
- smftools/informatics/conversion_smf.py +63 -10
- smftools/informatics/direct_smf.py +66 -18
- smftools/informatics/helpers/LoadExperimentConfig.py +1 -0
- smftools/informatics/helpers/__init__.py +16 -2
- smftools/informatics/helpers/align_and_sort_BAM.py +27 -16
- smftools/informatics/helpers/aligned_BAM_to_bed.py +49 -48
- smftools/informatics/helpers/bam_qc.py +66 -0
- smftools/informatics/helpers/binarize_converted_base_identities.py +69 -21
- smftools/informatics/helpers/canoncall.py +12 -3
- smftools/informatics/helpers/concatenate_fastqs_to_bam.py +5 -4
- smftools/informatics/helpers/converted_BAM_to_adata.py +34 -22
- smftools/informatics/helpers/converted_BAM_to_adata_II.py +369 -0
- smftools/informatics/helpers/demux_and_index_BAM.py +52 -0
- smftools/informatics/helpers/extract_base_identities.py +33 -46
- smftools/informatics/helpers/extract_mods.py +55 -23
- smftools/informatics/helpers/extract_read_features_from_bam.py +31 -0
- smftools/informatics/helpers/extract_read_lengths_from_bed.py +25 -0
- smftools/informatics/helpers/find_conversion_sites.py +33 -44
- smftools/informatics/helpers/generate_converted_FASTA.py +87 -86
- smftools/informatics/helpers/modcall.py +13 -5
- smftools/informatics/helpers/modkit_extract_to_adata.py +762 -396
- smftools/informatics/helpers/ohe_batching.py +65 -41
- smftools/informatics/helpers/ohe_layers_decode.py +32 -0
- smftools/informatics/helpers/one_hot_decode.py +27 -0
- smftools/informatics/helpers/one_hot_encode.py +45 -9
- smftools/informatics/helpers/plot_read_length_and_coverage_histograms.py +1 -0
- smftools/informatics/helpers/run_multiqc.py +28 -0
- smftools/informatics/helpers/split_and_index_BAM.py +3 -8
- smftools/informatics/load_adata.py +58 -3
- smftools/plotting/__init__.py +15 -0
- smftools/plotting/classifiers.py +355 -0
- smftools/plotting/general_plotting.py +205 -0
- smftools/plotting/position_stats.py +462 -0
- smftools/preprocessing/__init__.py +6 -7
- smftools/preprocessing/append_C_context.py +22 -9
- smftools/preprocessing/{mark_duplicates.py → archives/mark_duplicates.py} +38 -26
- smftools/preprocessing/binarize_on_Youden.py +35 -32
- smftools/preprocessing/binary_layers_to_ohe.py +13 -3
- smftools/preprocessing/calculate_complexity.py +3 -2
- smftools/preprocessing/calculate_converted_read_methylation_stats.py +44 -46
- smftools/preprocessing/calculate_coverage.py +26 -25
- smftools/preprocessing/calculate_pairwise_differences.py +49 -0
- smftools/preprocessing/calculate_position_Youden.py +18 -7
- smftools/preprocessing/calculate_read_length_stats.py +39 -46
- smftools/preprocessing/clean_NaN.py +33 -25
- smftools/preprocessing/filter_adata_by_nan_proportion.py +31 -0
- smftools/preprocessing/filter_converted_reads_on_methylation.py +20 -5
- smftools/preprocessing/filter_reads_on_length.py +14 -4
- smftools/preprocessing/flag_duplicate_reads.py +149 -0
- smftools/preprocessing/invert_adata.py +18 -11
- smftools/preprocessing/load_sample_sheet.py +30 -16
- smftools/preprocessing/recipes.py +22 -20
- smftools/preprocessing/subsample_adata.py +58 -0
- smftools/readwrite.py +105 -13
- smftools/tools/__init__.py +49 -0
- smftools/tools/apply_hmm.py +202 -0
- smftools/tools/apply_hmm_batched.py +241 -0
- smftools/tools/archived/classify_methylated_features.py +66 -0
- smftools/tools/archived/classify_non_methylated_features.py +75 -0
- smftools/tools/archived/subset_adata_v1.py +32 -0
- smftools/tools/archived/subset_adata_v2.py +46 -0
- smftools/tools/calculate_distances.py +18 -0
- smftools/tools/calculate_umap.py +62 -0
- smftools/tools/call_hmm_peaks.py +105 -0
- smftools/tools/classifiers.py +787 -0
- smftools/tools/cluster_adata_on_methylation.py +105 -0
- smftools/tools/data/__init__.py +2 -0
- smftools/tools/data/anndata_data_module.py +90 -0
- smftools/tools/data/preprocessing.py +6 -0
- smftools/tools/display_hmm.py +18 -0
- smftools/tools/general_tools.py +69 -0
- smftools/tools/hmm_readwrite.py +16 -0
- smftools/tools/inference/__init__.py +1 -0
- smftools/tools/inference/lightning_inference.py +41 -0
- smftools/tools/models/__init__.py +9 -0
- smftools/tools/models/base.py +14 -0
- smftools/tools/models/cnn.py +34 -0
- smftools/tools/models/lightning_base.py +41 -0
- smftools/tools/models/mlp.py +17 -0
- smftools/tools/models/positional.py +17 -0
- smftools/tools/models/rnn.py +16 -0
- smftools/tools/models/sklearn_models.py +40 -0
- smftools/tools/models/transformer.py +133 -0
- smftools/tools/models/wrappers.py +20 -0
- smftools/tools/nucleosome_hmm_refinement.py +104 -0
- smftools/tools/position_stats.py +239 -0
- smftools/tools/read_stats.py +70 -0
- smftools/tools/subset_adata.py +19 -23
- smftools/tools/train_hmm.py +78 -0
- smftools/tools/training/__init__.py +1 -0
- smftools/tools/training/train_lightning_model.py +47 -0
- smftools/tools/utils/__init__.py +2 -0
- smftools/tools/utils/device.py +10 -0
- smftools/tools/utils/grl.py +14 -0
- {smftools-0.1.3.dist-info → smftools-0.1.7.dist-info}/METADATA +47 -11
- smftools-0.1.7.dist-info/RECORD +136 -0
- smftools/tools/apply_HMM.py +0 -1
- smftools/tools/read_HMM.py +0 -1
- smftools/tools/train_HMM.py +0 -43
- smftools-0.1.3.dist-info/RECORD +0 -84
- /smftools/preprocessing/{remove_duplicates.py → archives/remove_duplicates.py} +0 -0
- /smftools/tools/{cluster.py → evaluation/__init__.py} +0 -0
- {smftools-0.1.3.dist-info → smftools-0.1.7.dist-info}/WHEEL +0 -0
- {smftools-0.1.3.dist-info → smftools-0.1.7.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# extract_read_lengths_from_bed
|
|
2
|
+
|
|
3
|
+
def extract_read_lengths_from_bed(file_path):
|
|
4
|
+
"""
|
|
5
|
+
Load a dict of read names that points to the read length
|
|
6
|
+
|
|
7
|
+
Params:
|
|
8
|
+
file_path (str): file path to a bed file
|
|
9
|
+
Returns:
|
|
10
|
+
read_dict (dict)
|
|
11
|
+
"""
|
|
12
|
+
import pandas as pd
|
|
13
|
+
columns = ['chrom', 'start', 'end', 'length', 'name']
|
|
14
|
+
df = pd.read_csv(file_path, sep='\t', header=None, names=columns, comment='#')
|
|
15
|
+
read_dict = {}
|
|
16
|
+
for _, row in df.iterrows():
|
|
17
|
+
chrom = row['chrom']
|
|
18
|
+
start = row['start']
|
|
19
|
+
end = row['end']
|
|
20
|
+
name = row['name']
|
|
21
|
+
length = row['length']
|
|
22
|
+
read_dict[name] = length
|
|
23
|
+
|
|
24
|
+
return read_dict
|
|
25
|
+
|
|
@@ -1,61 +1,50 @@
|
|
|
1
|
-
## find_conversion_sites
|
|
2
|
-
|
|
3
1
|
def find_conversion_sites(fasta_file, modification_type, conversion_types):
|
|
4
2
|
"""
|
|
5
|
-
|
|
6
|
-
If searching for adenine conversions, it will find coordinates of all adenines.
|
|
3
|
+
Finds genomic coordinates of modified bases (5mC or 6mA) in a reference FASTA file.
|
|
7
4
|
|
|
8
5
|
Parameters:
|
|
9
|
-
fasta_file (str):
|
|
10
|
-
modification_type (str):
|
|
11
|
-
conversion_types (list):
|
|
6
|
+
fasta_file (str): Path to the converted reference FASTA.
|
|
7
|
+
modification_type (str): Modification type ('5mC' or '6mA') or 'unconverted'.
|
|
8
|
+
conversion_types (list): List of conversion types. The first element is the unconverted record type.
|
|
12
9
|
|
|
13
10
|
Returns:
|
|
14
|
-
|
|
11
|
+
dict: Dictionary where keys are **both unconverted & converted record names**.
|
|
12
|
+
Values contain:
|
|
13
|
+
[sequence length, top strand coordinates, bottom strand coordinates, sequence, complement sequence].
|
|
15
14
|
"""
|
|
16
|
-
|
|
15
|
+
import numpy as np
|
|
17
16
|
from Bio import SeqIO
|
|
18
|
-
from Bio.SeqRecord import SeqRecord
|
|
19
|
-
from Bio.Seq import Seq
|
|
20
|
-
|
|
21
|
-
#print('{0}: Finding positions of interest in reference FASTA: {1}'.format(readwrite.time_string(), fasta_file))
|
|
22
|
-
# Initialize lists to hold top and bottom strand positional coordinates of interest
|
|
23
|
-
top_strand_coordinates = []
|
|
24
|
-
bottom_strand_coordinates = []
|
|
25
17
|
unconverted = conversion_types[0]
|
|
26
18
|
record_dict = {}
|
|
27
|
-
|
|
28
|
-
#
|
|
19
|
+
|
|
20
|
+
# Define base mapping based on modification type
|
|
21
|
+
base_mappings = {
|
|
22
|
+
'5mC': ('C', 'G'), # Cytosine and Guanine
|
|
23
|
+
'6mA': ('A', 'T') # Adenine and Thymine
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
# Read FASTA file and process records
|
|
29
27
|
with open(fasta_file, "r") as f:
|
|
30
|
-
# Iterate over records in the FASTA
|
|
31
28
|
for record in SeqIO.parse(f, "fasta"):
|
|
32
|
-
# Only iterate over the unconverted records for the reference
|
|
33
29
|
if unconverted in record.id:
|
|
34
|
-
#print('{0}: Iterating over record {1} in FASTA file {2}'.format(readwrite.time_string(), record, fasta_file))
|
|
35
|
-
# Extract the sequence string of the record
|
|
36
30
|
sequence = str(record.seq).upper()
|
|
37
31
|
complement = str(record.seq.complement()).upper()
|
|
38
32
|
sequence_length = len(sequence)
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
bottom_strand_coordinates.append(i) # 0-indexed coordinate
|
|
54
|
-
#print('{0}: Returning zero-indexed top and bottom strand FASTA coordinates for adenines of interest'.format(readwrite.time_string()))
|
|
33
|
+
|
|
34
|
+
# Unconverted case: store the full sequence without coordinate filtering
|
|
35
|
+
if modification_type == unconverted:
|
|
36
|
+
record_dict[record.id] = [sequence_length, [], [], sequence, complement]
|
|
37
|
+
|
|
38
|
+
# Process converted records: extract modified base positions
|
|
39
|
+
elif modification_type in base_mappings:
|
|
40
|
+
top_base, bottom_base = base_mappings[modification_type]
|
|
41
|
+
seq_array = np.array(list(sequence))
|
|
42
|
+
top_strand_coordinates = np.where(seq_array == top_base)[0].tolist()
|
|
43
|
+
bottom_strand_coordinates = np.where(seq_array == bottom_base)[0].tolist()
|
|
44
|
+
|
|
45
|
+
record_dict[record.id] = [sequence_length, top_strand_coordinates, bottom_strand_coordinates, sequence, complement]
|
|
46
|
+
|
|
55
47
|
else:
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
else:
|
|
60
|
-
pass
|
|
61
|
-
return record_dict
|
|
48
|
+
raise ValueError(f"Invalid modification_type: {modification_type}. Choose '5mC', '6mA', or 'unconverted'.")
|
|
49
|
+
|
|
50
|
+
return record_dict
|
|
@@ -1,98 +1,99 @@
|
|
|
1
|
-
|
|
1
|
+
import numpy as np
|
|
2
|
+
import gzip
|
|
3
|
+
import os
|
|
4
|
+
from Bio import SeqIO
|
|
5
|
+
from Bio.SeqRecord import SeqRecord
|
|
6
|
+
from Bio.Seq import Seq
|
|
7
|
+
from concurrent.futures import ProcessPoolExecutor
|
|
8
|
+
from itertools import chain
|
|
2
9
|
|
|
3
10
|
def convert_FASTA_record(record, modification_type, strand, unconverted):
|
|
4
|
-
"""
|
|
5
|
-
|
|
11
|
+
""" Converts a FASTA record based on modification type and strand. """
|
|
12
|
+
conversion_maps = {
|
|
13
|
+
('5mC', 'top'): ('C', 'T'),
|
|
14
|
+
('5mC', 'bottom'): ('G', 'A'),
|
|
15
|
+
('6mA', 'top'): ('A', 'G'),
|
|
16
|
+
('6mA', 'bottom'): ('T', 'C')
|
|
17
|
+
}
|
|
6
18
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
19
|
+
sequence = str(record.seq).upper()
|
|
20
|
+
|
|
21
|
+
if modification_type == unconverted:
|
|
22
|
+
return SeqRecord(Seq(sequence), id=f"{record.id}_{modification_type}_top", description=record.description)
|
|
23
|
+
|
|
24
|
+
if (modification_type, strand) not in conversion_maps:
|
|
25
|
+
raise ValueError(f"Invalid combination: {modification_type}, {strand}")
|
|
26
|
+
|
|
27
|
+
original_base, converted_base = conversion_maps[(modification_type, strand)]
|
|
28
|
+
new_seq = sequence.replace(original_base, converted_base)
|
|
29
|
+
|
|
30
|
+
return SeqRecord(Seq(new_seq), id=f"{record.id}_{modification_type}_{strand}", description=record.description)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def process_fasta_record(args):
|
|
34
|
+
"""
|
|
35
|
+
Processes a single FASTA record for parallel execution.
|
|
36
|
+
Args:
|
|
37
|
+
args (tuple): (record, modification_types, strands, unconverted)
|
|
11
38
|
Returns:
|
|
12
|
-
|
|
13
|
-
new_id (str): Record id for the converted sequence string.
|
|
39
|
+
list of modified SeqRecord objects.
|
|
14
40
|
"""
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
elif strand == 'bottom':
|
|
30
|
-
# Replace every 'T' with 'C' in the sequence
|
|
31
|
-
new_seq = record.seq.upper().replace('T', 'C')
|
|
32
|
-
else:
|
|
33
|
-
print('need to provide a valid strand string: top or bottom')
|
|
34
|
-
new_id = '{0}_{1}_{2}'.format(record.id, modification_type, strand)
|
|
35
|
-
elif modification_type == unconverted:
|
|
36
|
-
new_seq = record.seq.upper()
|
|
37
|
-
new_id = '{0}_{1}_top'.format(record.id, modification_type)
|
|
38
|
-
else:
|
|
39
|
-
print(f'need to provide a valid modification_type string: 5mC, 6mA, or {unconverted}')
|
|
40
|
-
|
|
41
|
-
return new_seq, new_id
|
|
42
|
-
|
|
43
|
-
def generate_converted_FASTA(input_fasta, modification_types, strands, output_fasta):
|
|
41
|
+
record, modification_types, strands, unconverted = args
|
|
42
|
+
modified_records = []
|
|
43
|
+
|
|
44
|
+
for modification_type in modification_types:
|
|
45
|
+
for i, strand in enumerate(strands):
|
|
46
|
+
if i > 0 and modification_type == unconverted:
|
|
47
|
+
continue # Ensure unconverted is added only once
|
|
48
|
+
|
|
49
|
+
modified_records.append(convert_FASTA_record(record, modification_type, strand, unconverted))
|
|
50
|
+
|
|
51
|
+
return modified_records
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def generate_converted_FASTA(input_fasta, modification_types, strands, output_fasta, num_threads=4, chunk_size=500):
|
|
44
55
|
"""
|
|
45
|
-
|
|
56
|
+
Converts an input FASTA file and writes a new converted FASTA file efficiently.
|
|
46
57
|
|
|
47
58
|
Parameters:
|
|
48
|
-
|
|
49
|
-
modification_types (list):
|
|
50
|
-
strands (list):
|
|
51
|
-
|
|
59
|
+
input_fasta (str): Path to the unconverted FASTA file.
|
|
60
|
+
modification_types (list): List of modification types ('5mC', '6mA', or unconverted).
|
|
61
|
+
strands (list): List of strands ('top', 'bottom').
|
|
62
|
+
output_fasta (str): Path to the converted FASTA output file.
|
|
63
|
+
num_threads (int): Number of parallel threads to use.
|
|
64
|
+
chunk_size (int): Number of records to process per write batch.
|
|
65
|
+
|
|
52
66
|
Returns:
|
|
53
|
-
None
|
|
54
|
-
Writes out a converted FASTA reference for the experiment.
|
|
67
|
+
None (Writes the converted FASTA file).
|
|
55
68
|
"""
|
|
56
|
-
from .. import readwrite
|
|
57
|
-
from Bio import SeqIO
|
|
58
|
-
from Bio.SeqRecord import SeqRecord
|
|
59
|
-
from Bio.Seq import Seq
|
|
60
|
-
import gzip
|
|
61
|
-
modified_records = []
|
|
62
69
|
unconverted = modification_types[0]
|
|
63
|
-
|
|
64
|
-
if
|
|
65
|
-
|
|
70
|
+
|
|
71
|
+
# Detect if input is gzipped
|
|
72
|
+
open_func = gzip.open if input_fasta.endswith('.gz') else open
|
|
73
|
+
file_mode = 'rt' if input_fasta.endswith('.gz') else 'r'
|
|
74
|
+
|
|
75
|
+
def fasta_record_generator():
|
|
76
|
+
""" Lazily yields FASTA records from file. """
|
|
77
|
+
with open_func(input_fasta, file_mode) as handle:
|
|
66
78
|
for record in SeqIO.parse(handle, 'fasta'):
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
pass
|
|
89
|
-
else:
|
|
90
|
-
# Add the modified record to the list of modified records
|
|
91
|
-
print(f'converting {modification_type} on the {strand} strand of record {record}')
|
|
92
|
-
new_seq, new_id = convert_FASTA_record(record, modification_type, strand, unconverted)
|
|
93
|
-
new_record = SeqRecord(Seq(new_seq), id=new_id, description=record_description)
|
|
94
|
-
modified_records.append(new_record)
|
|
95
|
-
|
|
96
|
-
with open(output_fasta, 'w') as output_handle:
|
|
97
|
-
# write out the concatenated FASTA file of modified sequences
|
|
98
|
-
SeqIO.write(modified_records, output_handle, 'fasta')
|
|
79
|
+
yield record
|
|
80
|
+
|
|
81
|
+
with open(output_fasta, 'w') as output_handle, ProcessPoolExecutor(max_workers=num_threads) as executor:
|
|
82
|
+
# Process records in parallel using a named function (avoiding lambda)
|
|
83
|
+
results = executor.map(
|
|
84
|
+
process_fasta_record,
|
|
85
|
+
((record, modification_types, strands, unconverted) for record in fasta_record_generator())
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
buffer = []
|
|
89
|
+
for modified_records in results:
|
|
90
|
+
buffer.extend(modified_records)
|
|
91
|
+
|
|
92
|
+
# Write out in chunks to save memory
|
|
93
|
+
if len(buffer) >= chunk_size:
|
|
94
|
+
SeqIO.write(buffer, output_handle, 'fasta')
|
|
95
|
+
buffer.clear()
|
|
96
|
+
|
|
97
|
+
# Write any remaining records
|
|
98
|
+
if buffer:
|
|
99
|
+
SeqIO.write(buffer, output_handle, 'fasta')
|
|
@@ -1,17 +1,21 @@
|
|
|
1
1
|
## modcall
|
|
2
2
|
|
|
3
3
|
# Direct methylation specific
|
|
4
|
-
def modcall(model, pod5_dir, barcode_kit, mod_list, bam, bam_suffix):
|
|
4
|
+
def modcall(model_dir, model, pod5_dir, barcode_kit, mod_list, bam, bam_suffix, barcode_both_ends=True, trim=False, device='auto'):
|
|
5
5
|
"""
|
|
6
6
|
Wrapper function for dorado modified base calling.
|
|
7
7
|
|
|
8
8
|
Parameters:
|
|
9
|
-
|
|
9
|
+
model_dir (str): a string representing the file path to the dorado basecalling model directory.
|
|
10
|
+
model (str): a string representing the the dorado basecalling model.
|
|
10
11
|
pod5_dir (str): a string representing the file path to the experiment directory containing the POD5 files.
|
|
11
12
|
barcode_kit (str): A string representing the barcoding kit used in the experiment.
|
|
12
13
|
mod_list (list): A list of modification types to use in the analysis.
|
|
13
14
|
bam (str): File path to the BAM file to output.
|
|
14
15
|
bam_suffix (str): The suffix to use for the BAM file.
|
|
16
|
+
barcode_both_ends (bool): Whether to require a barcode detection on both ends for demultiplexing.
|
|
17
|
+
trim (bool): Whether to trim barcodes, adapters, and primers from read ends
|
|
18
|
+
device (str): Device to use for basecalling. auto, metal, cpu, cuda.
|
|
15
19
|
|
|
16
20
|
Returns:
|
|
17
21
|
None
|
|
@@ -19,10 +23,14 @@ def modcall(model, pod5_dir, barcode_kit, mod_list, bam, bam_suffix):
|
|
|
19
23
|
"""
|
|
20
24
|
import subprocess
|
|
21
25
|
output = bam + bam_suffix
|
|
22
|
-
command = [
|
|
23
|
-
"dorado", "basecaller", model, pod5_dir, "--kit-name", barcode_kit, "-Y",
|
|
24
|
-
"--modified-bases"]
|
|
26
|
+
command = ["dorado", "basecaller", "--models-directory", model_dir, "--kit-name", barcode_kit, "--modified-bases"]
|
|
25
27
|
command += mod_list
|
|
28
|
+
command += ["--device", device, "--batchsize", "0"]
|
|
29
|
+
if barcode_both_ends:
|
|
30
|
+
command.append("--barcode-both-ends")
|
|
31
|
+
if not trim:
|
|
32
|
+
command.append("--no-trim")
|
|
33
|
+
command += [model, pod5_dir]
|
|
26
34
|
print(f'Running: {" ".join(command)}')
|
|
27
35
|
with open(output, "w") as outfile:
|
|
28
36
|
subprocess.run(command, stdout=outfile)
|