smftools 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. smftools/_version.py +1 -1
  2. smftools/cli/chimeric_adata.py +1563 -0
  3. smftools/cli/helpers.py +49 -7
  4. smftools/cli/hmm_adata.py +250 -32
  5. smftools/cli/latent_adata.py +773 -0
  6. smftools/cli/load_adata.py +78 -74
  7. smftools/cli/preprocess_adata.py +122 -58
  8. smftools/cli/recipes.py +26 -0
  9. smftools/cli/spatial_adata.py +74 -112
  10. smftools/cli/variant_adata.py +423 -0
  11. smftools/cli_entry.py +52 -4
  12. smftools/config/conversion.yaml +1 -1
  13. smftools/config/deaminase.yaml +3 -0
  14. smftools/config/default.yaml +85 -12
  15. smftools/config/experiment_config.py +146 -1
  16. smftools/constants.py +69 -0
  17. smftools/hmm/HMM.py +88 -0
  18. smftools/hmm/call_hmm_peaks.py +1 -1
  19. smftools/informatics/__init__.py +6 -0
  20. smftools/informatics/bam_functions.py +358 -8
  21. smftools/informatics/binarize_converted_base_identities.py +2 -89
  22. smftools/informatics/converted_BAM_to_adata.py +636 -175
  23. smftools/informatics/h5ad_functions.py +198 -2
  24. smftools/informatics/modkit_extract_to_adata.py +1007 -425
  25. smftools/informatics/sequence_encoding.py +72 -0
  26. smftools/logging_utils.py +21 -2
  27. smftools/metadata.py +1 -1
  28. smftools/plotting/__init__.py +26 -3
  29. smftools/plotting/autocorrelation_plotting.py +22 -4
  30. smftools/plotting/chimeric_plotting.py +1893 -0
  31. smftools/plotting/classifiers.py +28 -14
  32. smftools/plotting/general_plotting.py +62 -1583
  33. smftools/plotting/hmm_plotting.py +1670 -8
  34. smftools/plotting/latent_plotting.py +804 -0
  35. smftools/plotting/plotting_utils.py +243 -0
  36. smftools/plotting/position_stats.py +16 -8
  37. smftools/plotting/preprocess_plotting.py +281 -0
  38. smftools/plotting/qc_plotting.py +8 -3
  39. smftools/plotting/spatial_plotting.py +1134 -0
  40. smftools/plotting/variant_plotting.py +1231 -0
  41. smftools/preprocessing/__init__.py +4 -0
  42. smftools/preprocessing/append_base_context.py +18 -18
  43. smftools/preprocessing/append_mismatch_frequency_sites.py +187 -0
  44. smftools/preprocessing/append_sequence_mismatch_annotations.py +171 -0
  45. smftools/preprocessing/append_variant_call_layer.py +480 -0
  46. smftools/preprocessing/calculate_consensus.py +1 -1
  47. smftools/preprocessing/calculate_read_modification_stats.py +6 -1
  48. smftools/preprocessing/flag_duplicate_reads.py +4 -4
  49. smftools/preprocessing/invert_adata.py +1 -0
  50. smftools/readwrite.py +159 -99
  51. smftools/schema/anndata_schema_v1.yaml +15 -1
  52. smftools/tools/__init__.py +10 -0
  53. smftools/tools/calculate_knn.py +121 -0
  54. smftools/tools/calculate_leiden.py +57 -0
  55. smftools/tools/calculate_nmf.py +130 -0
  56. smftools/tools/calculate_pca.py +180 -0
  57. smftools/tools/calculate_umap.py +79 -80
  58. smftools/tools/position_stats.py +4 -4
  59. smftools/tools/rolling_nn_distance.py +872 -0
  60. smftools/tools/sequence_alignment.py +140 -0
  61. smftools/tools/tensor_factorization.py +217 -0
  62. {smftools-0.3.0.dist-info → smftools-0.3.2.dist-info}/METADATA +9 -5
  63. {smftools-0.3.0.dist-info → smftools-0.3.2.dist-info}/RECORD +66 -45
  64. {smftools-0.3.0.dist-info → smftools-0.3.2.dist-info}/WHEEL +0 -0
  65. {smftools-0.3.0.dist-info → smftools-0.3.2.dist-info}/entry_points.txt +0 -0
  66. {smftools-0.3.0.dist-info → smftools-0.3.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,423 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from pathlib import Path
5
+ from typing import Optional, Tuple
6
+
7
+ import anndata as ad
8
+
9
+ from smftools.constants import LOGGING_DIR, VARIANT_DIR
10
+ from smftools.logging_utils import get_logger, setup_logging
11
+
12
+ logger = get_logger(__name__)
13
+
14
+
15
+ def variant_adata(
16
+ config_path: str,
17
+ ) -> Tuple[Optional[ad.AnnData], Optional[Path]]:
18
+ """
19
+ CLI-facing wrapper for variant analyses.
20
+
21
+ Called by: `smftools variant <config_path>`
22
+
23
+ Responsibilities:
24
+ - Ensure a usable AnnData exists.
25
+ - Determine which AnnData stages exist.
26
+ - Decide whether to skip (return existing) or run the core.
27
+ - Call `variant_adata_core(...)` when actual work is needed.
28
+ """
29
+ from ..readwrite import safe_read_h5ad
30
+ from .helpers import get_adata_paths, load_experiment_config
31
+
32
+ # 1) Ensure config + basic paths via load_adata
33
+ cfg = load_experiment_config(config_path)
34
+
35
+ paths = get_adata_paths(cfg)
36
+
37
+ pp_path = paths.pp
38
+ pp_dedup_path = paths.pp_dedup
39
+ spatial_path = paths.spatial
40
+ chimeric_path = paths.chimeric
41
+ variant_path = paths.variant
42
+ hmm_path = paths.hmm
43
+ latent_path = paths.latent
44
+
45
+ # Stage-skipping logic
46
+ if not getattr(cfg, "force_redo_variant_analyses", False):
47
+ if variant_path.exists():
48
+ logger.info(f"Variant AnnData found: {variant_path}\nSkipping smftools variant")
49
+ return None, spatial_path
50
+
51
+ # Helper to load from disk, reusing loaded_adata if it matches
52
+ def _load(path: Path):
53
+ adata, _ = safe_read_h5ad(path)
54
+ return adata
55
+
56
+ # 3) Decide which AnnData to use as the *starting point* for analyses
57
+ if hmm_path.exists():
58
+ start_adata = _load(hmm_path)
59
+ source_path = hmm_path
60
+ elif latent_path.exists():
61
+ start_adata = _load(latent_path)
62
+ source_path = latent_path
63
+ elif spatial_path.exists():
64
+ start_adata = _load(spatial_path)
65
+ source_path = spatial_path
66
+ elif chimeric_path.exists():
67
+ start_adata = _load(chimeric_path)
68
+ source_path = chimeric_path
69
+ elif variant_path.exists():
70
+ start_adata = _load(variant_path)
71
+ source_path = variant_path
72
+ elif pp_dedup_path.exists():
73
+ start_adata = _load(pp_dedup_path)
74
+ source_path = pp_dedup_path
75
+ elif pp_path.exists():
76
+ start_adata = _load(pp_path)
77
+ source_path = pp_path
78
+ else:
79
+ logger.warning(
80
+ "No suitable AnnData found for variant analyses (need at least preprocessed)."
81
+ )
82
+ return None, None
83
+
84
+ # 4) Run the core
85
+ adata_variant, variant_path = variant_adata_core(
86
+ adata=start_adata,
87
+ cfg=cfg,
88
+ paths=paths,
89
+ source_adata_path=source_path,
90
+ config_path=config_path,
91
+ )
92
+
93
+ return adata_variant, variant_path
94
+
95
+
96
+ def variant_adata_core(
97
+ adata: ad.AnnData,
98
+ cfg,
99
+ paths: AdataPaths,
100
+ source_adata_path: Optional[Path] = None,
101
+ config_path: Optional[str] = None,
102
+ ) -> Tuple[ad.AnnData, Path]:
103
+ """
104
+ Core variant analysis pipeline.
105
+
106
+ Assumes:
107
+ - `cfg` is the ExperimentConfig.
108
+
109
+ Does:
110
+ -
111
+ - Save AnnData
112
+ """
113
+ import os
114
+ import warnings
115
+ from datetime import datetime
116
+ from pathlib import Path
117
+
118
+ import numpy as np
119
+ import pandas as pd
120
+
121
+ from ..metadata import record_smftools_metadata
122
+ from ..plotting import (
123
+ plot_mismatch_base_frequency_by_position,
124
+ plot_sequence_integer_encoding_clustermaps,
125
+ plot_variant_segment_clustermaps,
126
+ )
127
+ from ..preprocessing import (
128
+ append_mismatch_frequency_sites,
129
+ append_sequence_mismatch_annotations,
130
+ append_variant_call_layer,
131
+ append_variant_segment_layer,
132
+ load_sample_sheet,
133
+ )
134
+ from ..readwrite import make_dirs
135
+ from .helpers import write_gz_h5ad
136
+
137
+ # -----------------------------
138
+ # General setup
139
+ # -----------------------------
140
+ date_str = datetime.today().strftime("%y%m%d")
141
+ now = datetime.now()
142
+ time_str = now.strftime("%H%M%S")
143
+ log_level = getattr(logging, cfg.log_level.upper(), logging.INFO)
144
+
145
+ output_directory = Path(cfg.output_directory)
146
+ variant_directory = output_directory / VARIANT_DIR
147
+ logging_directory = variant_directory / LOGGING_DIR
148
+
149
+ make_dirs([output_directory, variant_directory])
150
+
151
+ if cfg.emit_log_file:
152
+ log_file = logging_directory / f"{date_str}_{time_str}_log.log"
153
+ make_dirs([logging_directory])
154
+ else:
155
+ log_file = None
156
+
157
+ setup_logging(level=log_level, log_file=log_file, reconfigure=log_file is not None)
158
+
159
+ smf_modality = cfg.smf_modality
160
+ if smf_modality == "conversion":
161
+ deaminase = False
162
+ else:
163
+ deaminase = True
164
+
165
+ # -----------------------------
166
+ # Optional sample sheet metadata
167
+ # -----------------------------
168
+ if getattr(cfg, "sample_sheet_path", None):
169
+ load_sample_sheet(
170
+ adata,
171
+ cfg.sample_sheet_path,
172
+ mapping_key_column=cfg.sample_sheet_mapping_column,
173
+ as_category=True,
174
+ force_reload=cfg.force_reload_sample_sheet,
175
+ )
176
+
177
+ # ============================================================
178
+ # 1) Reference variant position annotation
179
+ # ============================================================
180
+ seq1_col, seq2_col = getattr(cfg, "references_to_align_for_variant_annotation", [None, None])
181
+ if seq1_col and seq2_col:
182
+ append_sequence_mismatch_annotations(adata, seq1_col, seq2_col)
183
+
184
+ ############################################### Append mismatch frequency per position ###############################################
185
+ append_mismatch_frequency_sites(
186
+ adata,
187
+ ref_column=cfg.reference_column,
188
+ mismatch_layer=cfg.mismatch_frequency_layer,
189
+ read_span_layer=cfg.mismatch_frequency_read_span_layer,
190
+ mismatch_frequency_range=cfg.mismatch_frequency_range,
191
+ bypass=cfg.bypass_append_mismatch_frequency_sites,
192
+ force_redo=cfg.force_redo_append_mismatch_frequency_sites,
193
+ )
194
+
195
+ # ============================================================
196
+ # 2) Per-read variant call layer at reference mismatch sites
197
+ # ============================================================
198
+ if seq1_col and seq2_col:
199
+ # For conversion SMF, derive converted column names so variant calling
200
+ # compares read bases against the converted reference (which reads are mapped to).
201
+ # Unconverted: "{chrom}_{strand}_strand_FASTA_base"
202
+ # Converted: "{chrom}_{conversion}_{strand}_{strand}_strand_FASTA_base"
203
+ # e.g. "6B6_top_strand_FASTA_base" -> "6B6_5mC_top_top_strand_FASTA_base"
204
+ def _find_converted_column(unconverted_col: str, var_columns) -> str | None:
205
+ """Find the converted FASTA column corresponding to an unconverted one.
206
+
207
+ Unconverted columns follow the pattern ``{chromosome}_{strand}_strand_FASTA_base``.
208
+ Converted columns follow ``{chromosome}_{conversion}_{strand}_{strand}_strand_FASTA_base``
209
+ (e.g. ``6B6_5mC_top_top_strand_FASTA_base`` for unconverted ``6B6_top_strand_FASTA_base``).
210
+ """
211
+ suffix = "_strand_FASTA_base"
212
+ if not unconverted_col.endswith(suffix):
213
+ return None
214
+ stem = unconverted_col[: -len(suffix)] # e.g. "6B6_top"
215
+ # Parse strand from end of stem: "6B6_top" -> strand="top", chrom="6B6"
216
+ for strand in ("top", "bottom"):
217
+ if stem.endswith(f"_{strand}"):
218
+ chrom = stem[: -len(f"_{strand}")]
219
+ # Converted column: {chrom}_{conversion}_{strand}_{strand}_strand_FASTA_base
220
+ # The strand appears twice: once in the record name, once in the suffix.
221
+ prefix = f"{chrom}_"
222
+ end = f"_{strand}_{strand}{suffix}"
223
+ candidates = [
224
+ c
225
+ for c in var_columns
226
+ if c.startswith(prefix) and c.endswith(end) and c != unconverted_col
227
+ ]
228
+ if len(candidates) == 1:
229
+ return candidates[0]
230
+ if len(candidates) > 1:
231
+ logger.info(
232
+ "Multiple converted column candidates for '%s': %s",
233
+ unconverted_col,
234
+ candidates,
235
+ )
236
+ return candidates[0]
237
+ break
238
+ return None
239
+
240
+ seq1_conv = _find_converted_column(seq1_col, adata.var.columns)
241
+ seq2_conv = _find_converted_column(seq2_col, adata.var.columns)
242
+ if seq1_conv and seq2_conv:
243
+ logger.info("Using converted columns: '%s', '%s'", seq1_conv, seq2_conv)
244
+
245
+ append_variant_call_layer(
246
+ adata,
247
+ seq1_column=seq1_col,
248
+ seq2_column=seq2_col,
249
+ seq1_converted_column=seq1_conv,
250
+ seq2_converted_column=seq2_conv,
251
+ read_span_layer=cfg.mismatch_frequency_read_span_layer,
252
+ reference_col=cfg.reference_column,
253
+ )
254
+
255
+ append_variant_segment_layer(
256
+ adata,
257
+ seq1_column=seq1_col,
258
+ seq2_column=seq2_col,
259
+ read_span_layer=cfg.mismatch_frequency_read_span_layer,
260
+ reference_col=cfg.reference_column,
261
+ )
262
+
263
+ ############################################### Plot mismatch base frequencies ###############################################
264
+ if cfg.mismatch_frequency_layer not in adata.layers:
265
+ logger.debug(
266
+ "Mismatch layer '%s' not found; skipping mismatch base frequency plots.",
267
+ cfg.mismatch_frequency_layer,
268
+ )
269
+ elif not adata.uns.get("mismatch_integer_encoding_map"):
270
+ logger.debug("Mismatch encoding map not found; skipping mismatch base frequency plots.")
271
+ else:
272
+ mismatch_base_freq_dir = (
273
+ variant_directory / "deduplicated" / "01_mismatch_base_frequency_plots"
274
+ )
275
+ if mismatch_base_freq_dir.is_dir() and not cfg.force_redo_preprocessing:
276
+ logger.debug(
277
+ f"{mismatch_base_freq_dir} already exists. Skipping mismatch base frequency plots."
278
+ )
279
+ else:
280
+ make_dirs([mismatch_base_freq_dir])
281
+ plot_mismatch_base_frequency_by_position(
282
+ adata,
283
+ sample_col=cfg.sample_name_col_for_plotting,
284
+ reference_col=cfg.reference_column,
285
+ mismatch_layer=cfg.mismatch_frequency_layer,
286
+ read_span_layer=cfg.mismatch_frequency_read_span_layer,
287
+ exclude_mod_sites=True, # cfg.mismatch_base_frequency_exclude_mod_sites,
288
+ mod_site_bases=cfg.mod_target_bases,
289
+ save_path=mismatch_base_freq_dir,
290
+ plot_zscores=True,
291
+ )
292
+
293
+ ############################################### Plot integer sequence encoding clustermaps ###############################################
294
+ if "sequence_integer_encoding" not in adata.layers:
295
+ logger.debug(
296
+ "sequence_integer_encoding layer not found; skipping integer encoding clustermaps."
297
+ )
298
+ else:
299
+ seq_clustermap_dir = (
300
+ variant_directory / "deduplicated" / "02_sequence_integer_encoding_clustermaps"
301
+ )
302
+ if seq_clustermap_dir.is_dir() and not cfg.force_redo_preprocessing:
303
+ logger.debug(
304
+ f"{seq_clustermap_dir} already exists. Skipping sequence integer encoding clustermaps."
305
+ )
306
+ else:
307
+ make_dirs([seq_clustermap_dir])
308
+ plot_sequence_integer_encoding_clustermaps(
309
+ adata,
310
+ sample_col=cfg.sample_name_col_for_plotting,
311
+ reference_col=cfg.reference_column,
312
+ demux_types=cfg.clustermap_demux_types_to_plot,
313
+ min_quality=None,
314
+ min_length=None,
315
+ min_mapped_length_to_reference_length_ratio=None,
316
+ sort_by="none",
317
+ max_unknown_fraction=0.5,
318
+ save_path=seq_clustermap_dir,
319
+ show_position_axis=True,
320
+ )
321
+
322
+ if "mismatch_integer_encoding" in adata.layers:
323
+ mismatch_clustermap_dir = (
324
+ variant_directory
325
+ / "deduplicated"
326
+ / "03_mismatch_integer_encoding_clustermaps_no_mod_sites"
327
+ )
328
+ if mismatch_clustermap_dir.is_dir():
329
+ logger.debug(
330
+ f"{mismatch_clustermap_dir} already exists. "
331
+ "Skipping mismatch clustermaps without mod sites."
332
+ )
333
+ else:
334
+ make_dirs([mismatch_clustermap_dir])
335
+ plot_sequence_integer_encoding_clustermaps(
336
+ adata,
337
+ sample_col=cfg.sample_name_col_for_plotting,
338
+ reference_col=cfg.reference_column,
339
+ demux_types=cfg.clustermap_demux_types_to_plot,
340
+ min_quality=None,
341
+ min_length=None,
342
+ min_mapped_length_to_reference_length_ratio=None,
343
+ sort_by="none",
344
+ max_unknown_fraction=0.5,
345
+ save_path=mismatch_clustermap_dir,
346
+ show_position_axis=True,
347
+ exclude_mod_sites=True,
348
+ mod_site_bases=cfg.mod_target_bases,
349
+ )
350
+
351
+ # ============================================================
352
+ # 4) Variant segment clustermaps
353
+ # ============================================================
354
+ if seq1_col and seq2_col:
355
+ segment_layer_name = f"{seq1_col}__{seq2_col}_variant_segments"
356
+ if segment_layer_name in adata.layers:
357
+ segment_dir = variant_directory / "deduplicated" / "04_variant_segment_clustermaps"
358
+ if segment_dir.exists():
359
+ logger.info(
360
+ "Variant segment clustermaps already exist at %s; skipping.",
361
+ segment_dir,
362
+ )
363
+ else:
364
+ make_dirs([segment_dir])
365
+ plot_variant_segment_clustermaps(
366
+ adata,
367
+ seq1_column=seq1_col,
368
+ seq2_column=seq2_col,
369
+ sample_col=cfg.sample_name_col_for_plotting,
370
+ reference_col=cfg.reference_column,
371
+ variant_segment_layer=segment_layer_name,
372
+ read_span_layer=cfg.mismatch_frequency_read_span_layer,
373
+ save_path=segment_dir,
374
+ ref1_marker_color=getattr(cfg, "variant_overlay_seq1_color", "white"),
375
+ ref2_marker_color=getattr(cfg, "variant_overlay_seq2_color", "black"),
376
+ marker_size=getattr(cfg, "variant_overlay_marker_size", 4.0),
377
+ show_position_axis=True,
378
+ )
379
+
380
+ segment_type_dir = (
381
+ variant_directory
382
+ / "deduplicated"
383
+ / "05_variant_segment_clustermaps_with_mismatch_type"
384
+ )
385
+ if segment_type_dir.exists():
386
+ logger.info(
387
+ "Variant segment mismatch-type clustermaps already exist at %s; skipping.",
388
+ segment_type_dir,
389
+ )
390
+ else:
391
+ make_dirs([segment_type_dir])
392
+ plot_variant_segment_clustermaps(
393
+ adata,
394
+ seq1_column=seq1_col,
395
+ seq2_column=seq2_col,
396
+ sample_col=cfg.sample_name_col_for_plotting,
397
+ reference_col=cfg.reference_column,
398
+ variant_segment_layer=segment_layer_name,
399
+ read_span_layer=cfg.mismatch_frequency_read_span_layer,
400
+ save_path=segment_type_dir,
401
+ ref1_marker_color=getattr(cfg, "variant_overlay_seq1_color", "white"),
402
+ ref2_marker_color=getattr(cfg, "variant_overlay_seq2_color", "black"),
403
+ marker_size=getattr(cfg, "variant_overlay_marker_size", 4.0),
404
+ show_position_axis=True,
405
+ mismatch_type_obs_col="chimeric_variant_sites_type",
406
+ )
407
+
408
+ # ============================================================
409
+ # 5) Save AnnData
410
+ # ============================================================
411
+ if not paths.variant.exists():
412
+ logger.info("Saving variant AnnData")
413
+ record_smftools_metadata(
414
+ adata,
415
+ step_name="variant",
416
+ cfg=cfg,
417
+ config_path=config_path,
418
+ input_paths=[source_adata_path] if source_adata_path else None,
419
+ output_path=paths.variant,
420
+ )
421
+ write_gz_h5ad(adata, paths.variant)
422
+
423
+ return adata, paths.variant
smftools/cli_entry.py CHANGED
@@ -7,10 +7,14 @@ from typing import Sequence
7
7
  import click
8
8
  import pandas as pd
9
9
 
10
+ from .cli.chimeric_adata import chimeric_adata
10
11
  from .cli.hmm_adata import hmm_adata
12
+ from .cli.latent_adata import latent_adata
11
13
  from .cli.load_adata import load_adata
12
14
  from .cli.preprocess_adata import preprocess_adata
15
+ from .cli.recipes import full_flow
13
16
  from .cli.spatial_adata import spatial_adata
17
+ from .cli.variant_adata import variant_adata
14
18
  from .informatics.pod5_functions import subsample_pod5
15
19
  from .logging_utils import get_logger, setup_logging
16
20
  from .readwrite import concatenate_h5ads
@@ -63,7 +67,7 @@ def cli(log_file: Path | None, log_level: str):
63
67
  @cli.command()
64
68
  @click.argument("config_path", type=click.Path(exists=True))
65
69
  def load(config_path):
66
- """Load and process data from CONFIG_PATH."""
70
+ """Load raw data into AnnData."""
67
71
  load_adata(config_path)
68
72
 
69
73
 
@@ -74,7 +78,7 @@ def load(config_path):
74
78
  @cli.command()
75
79
  @click.argument("config_path", type=click.Path(exists=True))
76
80
  def preprocess(config_path):
77
- """Preprocess data from CONFIG_PATH."""
81
+ """Preprocessing."""
78
82
  preprocess_adata(config_path)
79
83
 
80
84
 
@@ -85,7 +89,7 @@ def preprocess(config_path):
85
89
  @cli.command()
86
90
  @click.argument("config_path", type=click.Path(exists=True))
87
91
  def spatial(config_path):
88
- """Process data from CONFIG_PATH."""
92
+ """Spatial signal analysis"""
89
93
  spatial_adata(config_path)
90
94
 
91
95
 
@@ -96,13 +100,57 @@ def spatial(config_path):
96
100
  @cli.command()
97
101
  @click.argument("config_path", type=click.Path(exists=True))
98
102
  def hmm(config_path):
99
- """Process data from CONFIG_PATH."""
103
+ """HMM feature annotations and plotting"""
100
104
  hmm_adata(config_path)
101
105
 
102
106
 
103
107
  ##########################################
104
108
 
105
109
 
110
+ ####### Latent ###########
111
+ @cli.command()
112
+ @click.argument("config_path", type=click.Path(exists=True))
113
+ def latent(config_path):
114
+ """Latent representations of signal"""
115
+ latent_adata(config_path)
116
+
117
+
118
+ ##########################################
119
+
120
+
121
+ ####### Variant ###########
122
+ @cli.command()
123
+ @click.argument("config_path", type=click.Path(exists=True))
124
+ def variant(config_path):
125
+ """Sequence variation analyses"""
126
+ variant_adata(config_path)
127
+
128
+
129
+ ##########################################
130
+
131
+
132
+ ####### Chimeric ###########
133
+ @cli.command()
134
+ @click.argument("config_path", type=click.Path(exists=True))
135
+ def chimeric(config_path):
136
+ """Finding putative PCR chimeras"""
137
+ chimeric_adata(config_path)
138
+
139
+
140
+ ##########################################
141
+
142
+
143
+ ####### Recipes ###########
144
+ @cli.command()
145
+ @click.argument("config_path", type=click.Path(exists=True))
146
+ def full(config_path):
147
+ """Workflow: load preprocess spatial variant chimeric hmm latent"""
148
+ full_flow(config_path)
149
+
150
+
151
+ ##########################################
152
+
153
+
106
154
  ####### batch command ###########
107
155
  @cli.command()
108
156
  @click.argument(
@@ -46,4 +46,4 @@ hmm_feature_sets:
46
46
  cpg_patch: [0, inf]
47
47
 
48
48
  hmm_merge_layer_features:
49
- - ["all_accessible_features", 60]
49
+ - ["all_accessible_features", 60]
@@ -39,6 +39,9 @@ autocorr_site_types:
39
39
  correlation_matrix_site_types:
40
40
  - "C_site"
41
41
 
42
+ rolling_nn_site_types:
43
+ - "C"
44
+
42
45
  # ######## smftools hmm params #########
43
46
  cpg: False # whether to use the default HMM endogenous CpG patch params
44
47
  hmm_methbases: