smftools 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- smftools/_version.py +1 -1
- smftools/cli/chimeric_adata.py +1563 -0
- smftools/cli/helpers.py +49 -7
- smftools/cli/hmm_adata.py +250 -32
- smftools/cli/latent_adata.py +773 -0
- smftools/cli/load_adata.py +78 -74
- smftools/cli/preprocess_adata.py +122 -58
- smftools/cli/recipes.py +26 -0
- smftools/cli/spatial_adata.py +74 -112
- smftools/cli/variant_adata.py +423 -0
- smftools/cli_entry.py +52 -4
- smftools/config/conversion.yaml +1 -1
- smftools/config/deaminase.yaml +3 -0
- smftools/config/default.yaml +85 -12
- smftools/config/experiment_config.py +146 -1
- smftools/constants.py +69 -0
- smftools/hmm/HMM.py +88 -0
- smftools/hmm/call_hmm_peaks.py +1 -1
- smftools/informatics/__init__.py +6 -0
- smftools/informatics/bam_functions.py +358 -8
- smftools/informatics/binarize_converted_base_identities.py +2 -89
- smftools/informatics/converted_BAM_to_adata.py +636 -175
- smftools/informatics/h5ad_functions.py +198 -2
- smftools/informatics/modkit_extract_to_adata.py +1007 -425
- smftools/informatics/sequence_encoding.py +72 -0
- smftools/logging_utils.py +21 -2
- smftools/metadata.py +1 -1
- smftools/plotting/__init__.py +26 -3
- smftools/plotting/autocorrelation_plotting.py +22 -4
- smftools/plotting/chimeric_plotting.py +1893 -0
- smftools/plotting/classifiers.py +28 -14
- smftools/plotting/general_plotting.py +62 -1583
- smftools/plotting/hmm_plotting.py +1670 -8
- smftools/plotting/latent_plotting.py +804 -0
- smftools/plotting/plotting_utils.py +243 -0
- smftools/plotting/position_stats.py +16 -8
- smftools/plotting/preprocess_plotting.py +281 -0
- smftools/plotting/qc_plotting.py +8 -3
- smftools/plotting/spatial_plotting.py +1134 -0
- smftools/plotting/variant_plotting.py +1231 -0
- smftools/preprocessing/__init__.py +4 -0
- smftools/preprocessing/append_base_context.py +18 -18
- smftools/preprocessing/append_mismatch_frequency_sites.py +187 -0
- smftools/preprocessing/append_sequence_mismatch_annotations.py +171 -0
- smftools/preprocessing/append_variant_call_layer.py +480 -0
- smftools/preprocessing/calculate_consensus.py +1 -1
- smftools/preprocessing/calculate_read_modification_stats.py +6 -1
- smftools/preprocessing/flag_duplicate_reads.py +4 -4
- smftools/preprocessing/invert_adata.py +1 -0
- smftools/readwrite.py +159 -99
- smftools/schema/anndata_schema_v1.yaml +15 -1
- smftools/tools/__init__.py +10 -0
- smftools/tools/calculate_knn.py +121 -0
- smftools/tools/calculate_leiden.py +57 -0
- smftools/tools/calculate_nmf.py +130 -0
- smftools/tools/calculate_pca.py +180 -0
- smftools/tools/calculate_umap.py +79 -80
- smftools/tools/position_stats.py +4 -4
- smftools/tools/rolling_nn_distance.py +872 -0
- smftools/tools/sequence_alignment.py +140 -0
- smftools/tools/tensor_factorization.py +217 -0
- {smftools-0.3.0.dist-info → smftools-0.3.2.dist-info}/METADATA +9 -5
- {smftools-0.3.0.dist-info → smftools-0.3.2.dist-info}/RECORD +66 -45
- {smftools-0.3.0.dist-info → smftools-0.3.2.dist-info}/WHEEL +0 -0
- {smftools-0.3.0.dist-info → smftools-0.3.2.dist-info}/entry_points.txt +0 -0
- {smftools-0.3.0.dist-info → smftools-0.3.2.dist-info}/licenses/LICENSE +0 -0
smftools/cli/preprocess_adata.py
CHANGED
|
@@ -1,11 +1,19 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import logging
|
|
3
4
|
from pathlib import Path
|
|
4
5
|
from typing import Optional, Tuple
|
|
5
6
|
|
|
6
7
|
import anndata as ad
|
|
7
8
|
|
|
8
|
-
from smftools.
|
|
9
|
+
from smftools.constants import (
|
|
10
|
+
BASE_QUALITY_SCORES,
|
|
11
|
+
DEMUX_TYPE,
|
|
12
|
+
LOGGING_DIR,
|
|
13
|
+
PREPROCESS_DIR,
|
|
14
|
+
READ_SPAN_MASK,
|
|
15
|
+
)
|
|
16
|
+
from smftools.logging_utils import get_logger, setup_logging
|
|
9
17
|
|
|
10
18
|
logger = get_logger(__name__)
|
|
11
19
|
|
|
@@ -36,30 +44,23 @@ def preprocess_adata(
|
|
|
36
44
|
Path to preprocessed, duplicate-removed AnnData.
|
|
37
45
|
"""
|
|
38
46
|
from ..readwrite import safe_read_h5ad
|
|
39
|
-
from .helpers import get_adata_paths
|
|
40
|
-
from .load_adata import load_adata
|
|
47
|
+
from .helpers import get_adata_paths, load_experiment_config
|
|
41
48
|
|
|
42
49
|
# 1) Ensure config is loaded and at least *some* AnnData stage exists
|
|
43
|
-
|
|
50
|
+
cfg = load_experiment_config(config_path)
|
|
44
51
|
|
|
45
52
|
# 2) Compute canonical paths
|
|
46
53
|
paths = get_adata_paths(cfg)
|
|
47
54
|
raw_path = paths.raw
|
|
48
55
|
pp_path = paths.pp
|
|
49
56
|
pp_dedup_path = paths.pp_dedup
|
|
50
|
-
spatial_path = paths.spatial
|
|
51
|
-
hmm_path = paths.hmm
|
|
52
57
|
|
|
53
58
|
raw_exists = raw_path.exists()
|
|
54
59
|
pp_exists = pp_path.exists()
|
|
55
60
|
pp_dedup_exists = pp_dedup_path.exists()
|
|
56
|
-
spatial_exists = spatial_path.exists()
|
|
57
|
-
hmm_exists = hmm_path.exists()
|
|
58
61
|
|
|
59
|
-
# Helper:
|
|
62
|
+
# Helper: read from disk
|
|
60
63
|
def _load(path: Path):
|
|
61
|
-
if loaded_adata is not None and loaded_path == path:
|
|
62
|
-
return loaded_adata
|
|
63
64
|
adata, _ = safe_read_h5ad(path)
|
|
64
65
|
return adata
|
|
65
66
|
|
|
@@ -67,20 +68,8 @@ def preprocess_adata(
|
|
|
67
68
|
# Case A: full redo of preprocessing
|
|
68
69
|
# -----------------------------
|
|
69
70
|
if getattr(cfg, "force_redo_preprocessing", False):
|
|
70
|
-
logger.info(
|
|
71
|
-
|
|
72
|
-
)
|
|
73
|
-
|
|
74
|
-
if hmm_exists:
|
|
75
|
-
adata = _load(hmm_path)
|
|
76
|
-
source_path = hmm_path
|
|
77
|
-
elif spatial_exists:
|
|
78
|
-
adata = _load(spatial_path)
|
|
79
|
-
source_path = spatial_path
|
|
80
|
-
elif pp_dedup_exists:
|
|
81
|
-
adata = _load(pp_dedup_path)
|
|
82
|
-
source_path = pp_dedup_path
|
|
83
|
-
elif pp_exists:
|
|
71
|
+
logger.info("Forcing full redo of preprocessing workflow.")
|
|
72
|
+
if pp_exists:
|
|
84
73
|
adata = _load(pp_path)
|
|
85
74
|
source_path = pp_path
|
|
86
75
|
elif raw_exists:
|
|
@@ -135,26 +124,16 @@ def preprocess_adata(
|
|
|
135
124
|
# Case C: normal behavior (no explicit redo flags)
|
|
136
125
|
# -----------------------------
|
|
137
126
|
|
|
138
|
-
# If HMM exists, preprocessing is considered “done enough”
|
|
139
|
-
if hmm_exists:
|
|
140
|
-
logger.debug(f"Skipping preprocessing. HMM AnnData found: {hmm_path}")
|
|
141
|
-
return (None, None, None, None)
|
|
142
|
-
|
|
143
|
-
# If spatial exists, also skip re-preprocessing by default
|
|
144
|
-
if spatial_exists:
|
|
145
|
-
logger.debug(f"Skipping preprocessing. Spatial AnnData found: {spatial_path}")
|
|
146
|
-
return (None, None, None, None)
|
|
147
|
-
|
|
148
127
|
# If pp_dedup exists, just return paths (no recomputation)
|
|
149
128
|
if pp_dedup_exists:
|
|
150
|
-
logger.
|
|
129
|
+
logger.info(
|
|
151
130
|
f"Skipping preprocessing. Preprocessed deduplicated AnnData found: {pp_dedup_path}"
|
|
152
131
|
)
|
|
153
132
|
return (None, pp_path, None, pp_dedup_path)
|
|
154
133
|
|
|
155
134
|
# If pp exists but pp_dedup does not, load pp and run core
|
|
156
135
|
if pp_exists:
|
|
157
|
-
logger.
|
|
136
|
+
logger.info(f"Preprocessed AnnData found: {pp_path}")
|
|
158
137
|
adata = _load(pp_path)
|
|
159
138
|
source_path = pp_path
|
|
160
139
|
pp_adata, pp_adata_path, pp_dedup_adata, pp_dedup_adata_path = preprocess_adata_core(
|
|
@@ -202,10 +181,6 @@ def preprocess_adata_core(
|
|
|
202
181
|
- `pp_adata_path` and `pp_dup_rem_adata_path` are the target output paths for
|
|
203
182
|
preprocessed and preprocessed+deduplicated AnnData.
|
|
204
183
|
|
|
205
|
-
Does NOT:
|
|
206
|
-
- Decide which stage to load from (that's the wrapper's job).
|
|
207
|
-
- Decide whether to skip entirely; it always runs its steps, but individual
|
|
208
|
-
sub-steps may skip based on `cfg.bypass_*` or directory existence.
|
|
209
184
|
|
|
210
185
|
Returns
|
|
211
186
|
-------
|
|
@@ -218,10 +193,14 @@ def preprocess_adata_core(
|
|
|
218
193
|
pp_dup_rem_adata_path : Path
|
|
219
194
|
Path where pp_dedup_adata was written.
|
|
220
195
|
"""
|
|
196
|
+
from datetime import datetime
|
|
221
197
|
from pathlib import Path
|
|
222
198
|
|
|
223
199
|
from ..metadata import record_smftools_metadata
|
|
224
|
-
from ..plotting import
|
|
200
|
+
from ..plotting import (
|
|
201
|
+
plot_read_qc_histograms,
|
|
202
|
+
plot_read_span_quality_clustermaps,
|
|
203
|
+
)
|
|
225
204
|
from ..preprocessing import (
|
|
226
205
|
append_base_context,
|
|
227
206
|
append_binary_layer_by_base_context,
|
|
@@ -235,22 +214,39 @@ def preprocess_adata_core(
|
|
|
235
214
|
filter_reads_on_length_quality_mapping,
|
|
236
215
|
filter_reads_on_modification_thresholds,
|
|
237
216
|
flag_duplicate_reads,
|
|
217
|
+
invert_adata,
|
|
238
218
|
load_sample_sheet,
|
|
219
|
+
reindex_references_adata,
|
|
239
220
|
)
|
|
240
221
|
from ..readwrite import make_dirs
|
|
241
222
|
from .helpers import write_gz_h5ad
|
|
242
223
|
|
|
243
224
|
################################### 1) Load existing ###################################
|
|
225
|
+
date_str = datetime.today().strftime("%y%m%d")
|
|
226
|
+
now = datetime.now()
|
|
227
|
+
time_str = now.strftime("%H%M%S")
|
|
228
|
+
|
|
229
|
+
log_level = getattr(logging, cfg.log_level.upper(), logging.INFO)
|
|
230
|
+
|
|
244
231
|
# General config variable init - Necessary user passed inputs
|
|
245
232
|
smf_modality = cfg.smf_modality # needed for specifying if the data is conversion SMF or direct methylation detection SMF. Or deaminase smf Necessary.
|
|
246
233
|
output_directory = Path(
|
|
247
234
|
cfg.output_directory
|
|
248
235
|
) # Path to the output directory to make for the analysis. Necessary.
|
|
249
|
-
|
|
236
|
+
preprocess_directory = output_directory / PREPROCESS_DIR
|
|
237
|
+
logging_directory = preprocess_directory / LOGGING_DIR
|
|
250
238
|
|
|
251
|
-
|
|
252
|
-
|
|
239
|
+
make_dirs([output_directory, preprocess_directory])
|
|
240
|
+
|
|
241
|
+
if cfg.emit_log_file:
|
|
242
|
+
log_file = logging_directory / f"{date_str}_{time_str}_log.log"
|
|
243
|
+
make_dirs([logging_directory])
|
|
244
|
+
else:
|
|
245
|
+
log_file = None
|
|
246
|
+
|
|
247
|
+
setup_logging(level=log_level, log_file=log_file, reconfigure=log_file is not None)
|
|
253
248
|
|
|
249
|
+
######### Begin Preprocessing #########
|
|
254
250
|
## Load sample sheet metadata based on barcode mapping ##
|
|
255
251
|
if getattr(cfg, "sample_sheet_path", None):
|
|
256
252
|
load_sample_sheet(
|
|
@@ -264,12 +260,12 @@ def preprocess_adata_core(
|
|
|
264
260
|
pass
|
|
265
261
|
|
|
266
262
|
# Adding read length, read quality, reference length, mapped_length, and mapping quality metadata to adata object.
|
|
267
|
-
pp_length_qc_dir =
|
|
263
|
+
pp_length_qc_dir = preprocess_directory / "01_Read_length_and_quality_QC_metrics"
|
|
268
264
|
|
|
269
265
|
if pp_length_qc_dir.is_dir() and not cfg.force_redo_preprocessing:
|
|
270
266
|
logger.debug(f"{pp_length_qc_dir} already exists. Skipping read level QC plotting.")
|
|
271
267
|
else:
|
|
272
|
-
make_dirs([
|
|
268
|
+
make_dirs([preprocess_directory, pp_length_qc_dir])
|
|
273
269
|
plot_read_qc_histograms(
|
|
274
270
|
adata,
|
|
275
271
|
pp_length_qc_dir,
|
|
@@ -292,12 +288,12 @@ def preprocess_adata_core(
|
|
|
292
288
|
)
|
|
293
289
|
print(adata.shape)
|
|
294
290
|
|
|
295
|
-
pp_length_qc_dir =
|
|
291
|
+
pp_length_qc_dir = preprocess_directory / "02_Read_length_and_quality_QC_metrics_post_filtering"
|
|
296
292
|
|
|
297
293
|
if pp_length_qc_dir.is_dir() and not cfg.force_redo_preprocessing:
|
|
298
294
|
logger.debug(f"{pp_length_qc_dir} already exists. Skipping read level QC plotting.")
|
|
299
295
|
else:
|
|
300
|
-
make_dirs([
|
|
296
|
+
make_dirs([preprocess_directory, pp_length_qc_dir])
|
|
301
297
|
plot_read_qc_histograms(
|
|
302
298
|
adata,
|
|
303
299
|
pp_length_qc_dir,
|
|
@@ -310,7 +306,7 @@ def preprocess_adata_core(
|
|
|
310
306
|
if smf_modality == "direct":
|
|
311
307
|
native = True
|
|
312
308
|
if cfg.fit_position_methylation_thresholds:
|
|
313
|
-
pp_Youden_dir =
|
|
309
|
+
pp_Youden_dir = preprocess_directory / "02B_Position_wide_Youden_threshold_performance"
|
|
314
310
|
make_dirs([pp_Youden_dir])
|
|
315
311
|
# Calculate positional methylation thresholds for mod calls
|
|
316
312
|
calculate_position_Youden(
|
|
@@ -359,7 +355,6 @@ def preprocess_adata_core(
|
|
|
359
355
|
)
|
|
360
356
|
|
|
361
357
|
############### Add base context to each position for each Reference_strand and calculate read level methylation/deamination stats ###############
|
|
362
|
-
# Additionally, store base_context level binary modification arrays in adata.obsm
|
|
363
358
|
append_base_context(
|
|
364
359
|
adata,
|
|
365
360
|
ref_column=cfg.reference_column,
|
|
@@ -378,17 +373,18 @@ def preprocess_adata_core(
|
|
|
378
373
|
cfg.mod_target_bases,
|
|
379
374
|
bypass=cfg.bypass_calculate_read_modification_stats,
|
|
380
375
|
force_redo=cfg.force_redo_calculate_read_modification_stats,
|
|
376
|
+
smf_modality=cfg.smf_modality,
|
|
381
377
|
)
|
|
382
378
|
|
|
383
379
|
### Make a dir for outputting sample level read modification metrics before filtering ###
|
|
384
|
-
pp_meth_qc_dir =
|
|
380
|
+
pp_meth_qc_dir = preprocess_directory / "03_read_modification_QC_metrics"
|
|
385
381
|
|
|
386
382
|
if pp_meth_qc_dir.is_dir() and not cfg.force_redo_preprocessing:
|
|
387
383
|
logger.debug(
|
|
388
384
|
f"{pp_meth_qc_dir} already exists. Skipping read level methylation QC plotting."
|
|
389
385
|
)
|
|
390
386
|
else:
|
|
391
|
-
make_dirs([
|
|
387
|
+
make_dirs([preprocess_directory, pp_meth_qc_dir])
|
|
392
388
|
obs_to_plot = ["Raw_modification_signal"]
|
|
393
389
|
if any(base in cfg.mod_target_bases for base in ["GpC", "CpG", "C"]):
|
|
394
390
|
obs_to_plot += [
|
|
@@ -422,14 +418,14 @@ def preprocess_adata_core(
|
|
|
422
418
|
force_redo=cfg.force_redo_filter_reads_on_modification_thresholds,
|
|
423
419
|
)
|
|
424
420
|
|
|
425
|
-
pp_meth_qc_dir =
|
|
421
|
+
pp_meth_qc_dir = preprocess_directory / "04_read_modification_QC_metrics_post_filtering"
|
|
426
422
|
|
|
427
423
|
if pp_meth_qc_dir.is_dir() and not cfg.force_redo_preprocessing:
|
|
428
424
|
logger.debug(
|
|
429
425
|
f"{pp_meth_qc_dir} already exists. Skipping read level methylation QC plotting."
|
|
430
426
|
)
|
|
431
427
|
else:
|
|
432
|
-
make_dirs([
|
|
428
|
+
make_dirs([preprocess_directory, pp_meth_qc_dir])
|
|
433
429
|
obs_to_plot = ["Raw_modification_signal"]
|
|
434
430
|
if any(base in cfg.mod_target_bases for base in ["GpC", "CpG", "C"]):
|
|
435
431
|
obs_to_plot += [
|
|
@@ -480,6 +476,22 @@ def preprocess_adata_core(
|
|
|
480
476
|
from_valid_sites_only=True,
|
|
481
477
|
)
|
|
482
478
|
|
|
479
|
+
# -----------------------------
|
|
480
|
+
# Optional inversion along positions axis
|
|
481
|
+
# -----------------------------
|
|
482
|
+
if getattr(cfg, "invert_adata", False):
|
|
483
|
+
adata = invert_adata(adata)
|
|
484
|
+
|
|
485
|
+
# -----------------------------
|
|
486
|
+
# Optional reindexing by reference
|
|
487
|
+
# -----------------------------
|
|
488
|
+
reindex_references_adata(
|
|
489
|
+
adata,
|
|
490
|
+
reference_col=cfg.reference_column,
|
|
491
|
+
offsets=cfg.reindexing_offsets,
|
|
492
|
+
new_col=cfg.reindexed_var_suffix,
|
|
493
|
+
)
|
|
494
|
+
|
|
483
495
|
############### Duplicate detection for conversion/deamination SMF ###############
|
|
484
496
|
if smf_modality != "direct":
|
|
485
497
|
references = adata.obs[cfg.reference_column].cat.categories
|
|
@@ -489,7 +501,7 @@ def preprocess_adata_core(
|
|
|
489
501
|
for site_type in cfg.duplicate_detection_site_types:
|
|
490
502
|
var_filters_sets += [[f"{ref}_{site_type}_site", f"position_in_{ref}"]]
|
|
491
503
|
|
|
492
|
-
pp_dup_qc_dir =
|
|
504
|
+
pp_dup_qc_dir = preprocess_directory / "05_read_duplication_QC_metrics"
|
|
493
505
|
|
|
494
506
|
make_dirs([pp_dup_qc_dir])
|
|
495
507
|
|
|
@@ -514,8 +526,8 @@ def preprocess_adata_core(
|
|
|
514
526
|
hierarchical_linkage=cfg.duplicate_detection_hierarchical_linkage,
|
|
515
527
|
hierarchical_metric="euclidean",
|
|
516
528
|
hierarchical_window=cfg.duplicate_detection_window_size_for_hamming_neighbors,
|
|
517
|
-
demux_types=
|
|
518
|
-
demux_col=
|
|
529
|
+
demux_types=cfg.duplicate_detection_demux_types_to_use,
|
|
530
|
+
demux_col=DEMUX_TYPE,
|
|
519
531
|
)
|
|
520
532
|
|
|
521
533
|
# Use the flagged duplicate read groups and perform complexity analysis
|
|
@@ -541,6 +553,58 @@ def preprocess_adata_core(
|
|
|
541
553
|
adata_unique = adata
|
|
542
554
|
########################################################################################################################
|
|
543
555
|
|
|
556
|
+
############################################### Plot read span mask + base quality clustermaps ###############################################
|
|
557
|
+
quality_layer = None
|
|
558
|
+
if BASE_QUALITY_SCORES in adata.layers:
|
|
559
|
+
quality_layer = BASE_QUALITY_SCORES
|
|
560
|
+
elif "base_qualities" in adata.layers:
|
|
561
|
+
quality_layer = "base_qualities"
|
|
562
|
+
|
|
563
|
+
if READ_SPAN_MASK not in adata.layers or quality_layer is None:
|
|
564
|
+
logger.debug(
|
|
565
|
+
"read_span_mask and base quality layers not found; skipping read span/base quality clustermaps."
|
|
566
|
+
)
|
|
567
|
+
else:
|
|
568
|
+
pp_span_quality_dir = preprocess_directory / "06_read_span_and_quality_clustermaps"
|
|
569
|
+
if pp_span_quality_dir.is_dir() and not cfg.force_redo_preprocessing:
|
|
570
|
+
logger.debug(
|
|
571
|
+
f"{pp_span_quality_dir} already exists. Skipping read span/base quality clustermaps."
|
|
572
|
+
)
|
|
573
|
+
else:
|
|
574
|
+
make_dirs([pp_span_quality_dir])
|
|
575
|
+
plot_read_span_quality_clustermaps(
|
|
576
|
+
adata,
|
|
577
|
+
sample_col=cfg.sample_name_col_for_plotting,
|
|
578
|
+
reference_col=cfg.reference_column,
|
|
579
|
+
quality_layer=quality_layer,
|
|
580
|
+
read_span_layer=READ_SPAN_MASK,
|
|
581
|
+
demux_types=cfg.clustermap_demux_types_to_plot,
|
|
582
|
+
save_path=pp_span_quality_dir,
|
|
583
|
+
show_position_axis=True,
|
|
584
|
+
max_nan_fraction=0.5,
|
|
585
|
+
)
|
|
586
|
+
|
|
587
|
+
pp_dedup_span_quality_dir = (
|
|
588
|
+
preprocess_directory / "deduplicated" / "06_read_span_and_quality_clustermaps"
|
|
589
|
+
)
|
|
590
|
+
if pp_dedup_span_quality_dir.is_dir() and not cfg.force_redo_preprocessing:
|
|
591
|
+
logger.debug(
|
|
592
|
+
f"{pp_dedup_span_quality_dir} already exists. Skipping read span/base quality clustermaps."
|
|
593
|
+
)
|
|
594
|
+
elif quality_layer in adata_unique.layers and READ_SPAN_MASK in adata_unique.layers:
|
|
595
|
+
make_dirs([pp_dedup_span_quality_dir])
|
|
596
|
+
plot_read_span_quality_clustermaps(
|
|
597
|
+
adata_unique,
|
|
598
|
+
sample_col=cfg.sample_name_col_for_plotting,
|
|
599
|
+
reference_col=cfg.reference_column,
|
|
600
|
+
quality_layer=quality_layer,
|
|
601
|
+
read_span_layer=READ_SPAN_MASK,
|
|
602
|
+
demux_types=cfg.clustermap_demux_types_to_plot,
|
|
603
|
+
save_path=pp_dedup_span_quality_dir,
|
|
604
|
+
show_position_axis=True,
|
|
605
|
+
max_nan_fraction=0.5,
|
|
606
|
+
)
|
|
607
|
+
|
|
544
608
|
############################################### Save preprocessed adata with duplicate detection ###############################################
|
|
545
609
|
if not pp_adata_path.exists() or cfg.force_redo_preprocessing:
|
|
546
610
|
logger.info("Saving preprocessed adata.")
|
smftools/cli/recipes.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Optional, Tuple
|
|
5
|
+
|
|
6
|
+
import anndata as ad
|
|
7
|
+
|
|
8
|
+
from ..cli.chimeric_adata import chimeric_adata
|
|
9
|
+
from ..cli.hmm_adata import hmm_adata
|
|
10
|
+
from ..cli.latent_adata import latent_adata
|
|
11
|
+
from ..cli.load_adata import load_adata
|
|
12
|
+
from ..cli.preprocess_adata import preprocess_adata
|
|
13
|
+
from ..cli.spatial_adata import spatial_adata
|
|
14
|
+
from ..cli.variant_adata import variant_adata
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def full_flow(
|
|
18
|
+
config_path: str,
|
|
19
|
+
) -> Tuple[Optional[ad.AnnData], Optional[Path]]:
|
|
20
|
+
load_adata(config_path)
|
|
21
|
+
preprocess_adata(config_path)
|
|
22
|
+
spatial_adata(config_path)
|
|
23
|
+
variant_adata(config_path)
|
|
24
|
+
chimeric_adata(config_path)
|
|
25
|
+
hmm_adata(config_path)
|
|
26
|
+
latent_adata(config_path)
|