smftools 0.2.1__py3-none-any.whl → 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. smftools/__init__.py +2 -6
  2. smftools/_version.py +1 -1
  3. smftools/cli/__init__.py +0 -0
  4. smftools/cli/archived/cli_flows.py +94 -0
  5. smftools/cli/helpers.py +48 -0
  6. smftools/cli/hmm_adata.py +361 -0
  7. smftools/cli/load_adata.py +637 -0
  8. smftools/cli/preprocess_adata.py +455 -0
  9. smftools/cli/spatial_adata.py +697 -0
  10. smftools/cli_entry.py +434 -0
  11. smftools/config/conversion.yaml +18 -6
  12. smftools/config/deaminase.yaml +18 -11
  13. smftools/config/default.yaml +151 -36
  14. smftools/config/direct.yaml +28 -1
  15. smftools/config/discover_input_files.py +115 -0
  16. smftools/config/experiment_config.py +225 -27
  17. smftools/hmm/HMM.py +12 -1
  18. smftools/hmm/__init__.py +0 -6
  19. smftools/hmm/archived/call_hmm_peaks.py +106 -0
  20. smftools/hmm/call_hmm_peaks.py +318 -90
  21. smftools/informatics/__init__.py +13 -7
  22. smftools/informatics/archived/fast5_to_pod5.py +43 -0
  23. smftools/informatics/archived/helpers/archived/__init__.py +71 -0
  24. smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py +126 -0
  25. smftools/informatics/{helpers → archived/helpers/archived}/aligned_BAM_to_bed.py +6 -4
  26. smftools/informatics/archived/helpers/archived/bam_qc.py +213 -0
  27. smftools/informatics/archived/helpers/archived/bed_to_bigwig.py +90 -0
  28. smftools/informatics/archived/helpers/archived/concatenate_fastqs_to_bam.py +259 -0
  29. smftools/informatics/{helpers → archived/helpers/archived}/count_aligned_reads.py +2 -2
  30. smftools/informatics/{helpers → archived/helpers/archived}/demux_and_index_BAM.py +8 -10
  31. smftools/informatics/{helpers → archived/helpers/archived}/extract_base_identities.py +1 -1
  32. smftools/informatics/{helpers → archived/helpers/archived}/extract_mods.py +15 -13
  33. smftools/informatics/{helpers → archived/helpers/archived}/generate_converted_FASTA.py +2 -0
  34. smftools/informatics/{helpers → archived/helpers/archived}/get_chromosome_lengths.py +9 -8
  35. smftools/informatics/archived/helpers/archived/index_fasta.py +24 -0
  36. smftools/informatics/{helpers → archived/helpers/archived}/make_modbed.py +1 -2
  37. smftools/informatics/{helpers → archived/helpers/archived}/modQC.py +2 -2
  38. smftools/informatics/{helpers → archived/helpers/archived}/plot_bed_histograms.py +0 -19
  39. smftools/informatics/{helpers → archived/helpers/archived}/separate_bam_by_bc.py +6 -5
  40. smftools/informatics/{helpers → archived/helpers/archived}/split_and_index_BAM.py +7 -7
  41. smftools/informatics/archived/subsample_fasta_from_bed.py +49 -0
  42. smftools/informatics/bam_functions.py +811 -0
  43. smftools/informatics/basecalling.py +67 -0
  44. smftools/informatics/bed_functions.py +366 -0
  45. smftools/informatics/{helpers/converted_BAM_to_adata_II.py → converted_BAM_to_adata.py} +42 -30
  46. smftools/informatics/fasta_functions.py +255 -0
  47. smftools/informatics/h5ad_functions.py +197 -0
  48. smftools/informatics/{helpers/modkit_extract_to_adata.py → modkit_extract_to_adata.py} +142 -59
  49. smftools/informatics/modkit_functions.py +129 -0
  50. smftools/informatics/ohe.py +160 -0
  51. smftools/informatics/pod5_functions.py +224 -0
  52. smftools/informatics/{helpers/run_multiqc.py → run_multiqc.py} +5 -2
  53. smftools/plotting/autocorrelation_plotting.py +1 -3
  54. smftools/plotting/general_plotting.py +1084 -363
  55. smftools/plotting/position_stats.py +3 -3
  56. smftools/preprocessing/__init__.py +4 -4
  57. smftools/preprocessing/append_base_context.py +35 -26
  58. smftools/preprocessing/append_binary_layer_by_base_context.py +6 -6
  59. smftools/preprocessing/binarize.py +17 -0
  60. smftools/preprocessing/binarize_on_Youden.py +11 -9
  61. smftools/preprocessing/calculate_complexity_II.py +1 -1
  62. smftools/preprocessing/calculate_coverage.py +16 -13
  63. smftools/preprocessing/calculate_position_Youden.py +42 -26
  64. smftools/preprocessing/calculate_read_modification_stats.py +2 -2
  65. smftools/preprocessing/filter_reads_on_length_quality_mapping.py +1 -1
  66. smftools/preprocessing/filter_reads_on_modification_thresholds.py +20 -20
  67. smftools/preprocessing/flag_duplicate_reads.py +2 -2
  68. smftools/preprocessing/invert_adata.py +1 -1
  69. smftools/preprocessing/load_sample_sheet.py +1 -1
  70. smftools/preprocessing/reindex_references_adata.py +37 -0
  71. smftools/readwrite.py +360 -140
  72. {smftools-0.2.1.dist-info → smftools-0.2.4.dist-info}/METADATA +26 -19
  73. smftools-0.2.4.dist-info/RECORD +176 -0
  74. smftools-0.2.4.dist-info/entry_points.txt +2 -0
  75. smftools/cli.py +0 -184
  76. smftools/informatics/fast5_to_pod5.py +0 -24
  77. smftools/informatics/helpers/__init__.py +0 -73
  78. smftools/informatics/helpers/align_and_sort_BAM.py +0 -86
  79. smftools/informatics/helpers/bam_qc.py +0 -66
  80. smftools/informatics/helpers/bed_to_bigwig.py +0 -39
  81. smftools/informatics/helpers/concatenate_fastqs_to_bam.py +0 -378
  82. smftools/informatics/helpers/discover_input_files.py +0 -100
  83. smftools/informatics/helpers/index_fasta.py +0 -12
  84. smftools/informatics/helpers/make_dirs.py +0 -21
  85. smftools/informatics/readwrite.py +0 -106
  86. smftools/informatics/subsample_fasta_from_bed.py +0 -47
  87. smftools/load_adata.py +0 -1346
  88. smftools-0.2.1.dist-info/RECORD +0 -161
  89. smftools-0.2.1.dist-info/entry_points.txt +0 -2
  90. /smftools/hmm/{apply_hmm_batched.py → archived/apply_hmm_batched.py} +0 -0
  91. /smftools/hmm/{calculate_distances.py → archived/calculate_distances.py} +0 -0
  92. /smftools/hmm/{train_hmm.py → archived/train_hmm.py} +0 -0
  93. /smftools/informatics/{basecall_pod5s.py → archived/basecall_pod5s.py} +0 -0
  94. /smftools/informatics/{helpers → archived/helpers/archived}/canoncall.py +0 -0
  95. /smftools/informatics/{helpers → archived/helpers/archived}/converted_BAM_to_adata.py +0 -0
  96. /smftools/informatics/{helpers → archived/helpers/archived}/extract_read_features_from_bam.py +0 -0
  97. /smftools/informatics/{helpers → archived/helpers/archived}/extract_read_lengths_from_bed.py +0 -0
  98. /smftools/informatics/{helpers → archived/helpers/archived}/extract_readnames_from_BAM.py +0 -0
  99. /smftools/informatics/{helpers → archived/helpers/archived}/find_conversion_sites.py +0 -0
  100. /smftools/informatics/{helpers → archived/helpers/archived}/get_native_references.py +0 -0
  101. /smftools/informatics/{helpers → archived/helpers}/archived/informatics.py +0 -0
  102. /smftools/informatics/{helpers → archived/helpers}/archived/load_adata.py +0 -0
  103. /smftools/informatics/{helpers → archived/helpers/archived}/modcall.py +0 -0
  104. /smftools/informatics/{helpers → archived/helpers/archived}/ohe_batching.py +0 -0
  105. /smftools/informatics/{helpers → archived/helpers/archived}/ohe_layers_decode.py +0 -0
  106. /smftools/informatics/{helpers → archived/helpers/archived}/one_hot_decode.py +0 -0
  107. /smftools/informatics/{helpers → archived/helpers/archived}/one_hot_encode.py +0 -0
  108. /smftools/informatics/{subsample_pod5.py → archived/subsample_pod5.py} +0 -0
  109. /smftools/informatics/{helpers/binarize_converted_base_identities.py → binarize_converted_base_identities.py} +0 -0
  110. /smftools/informatics/{helpers/complement_base_list.py → complement_base_list.py} +0 -0
  111. /smftools/preprocessing/{add_read_length_and_mapping_qc.py → archives/add_read_length_and_mapping_qc.py} +0 -0
  112. /smftools/preprocessing/{calculate_complexity.py → archives/calculate_complexity.py} +0 -0
  113. {smftools-0.2.1.dist-info → smftools-0.2.4.dist-info}/WHEEL +0 -0
  114. {smftools-0.2.1.dist-info → smftools-0.2.4.dist-info}/licenses/LICENSE +0 -0
smftools/cli_entry.py ADDED
@@ -0,0 +1,434 @@
1
+ import click
2
+ import pandas as pd
3
+ from pathlib import Path
4
+ from typing import Dict, Optional, Sequence
5
+
6
+ from .cli.load_adata import load_adata
7
+ from .cli.preprocess_adata import preprocess_adata
8
+ from .cli.spatial_adata import spatial_adata
9
+ from .cli.hmm_adata import hmm_adata
10
+
11
+ from .readwrite import safe_read_h5ad, safe_write_h5ad, concatenate_h5ads
12
+
13
+ @click.group()
14
+ def cli():
15
+ """Command-line interface for smftools."""
16
+ pass
17
+
18
+ ####### Load anndata from raw data ###########
19
+ @cli.command()
20
+ @click.argument("config_path", type=click.Path(exists=True))
21
+ def load(config_path):
22
+ """Load and process data from CONFIG_PATH."""
23
+ load_adata(config_path)
24
+ ##########################################
25
+
26
+ ####### Preprocessing ###########
27
+ @cli.command()
28
+ @click.argument("config_path", type=click.Path(exists=True))
29
+ def preprocess(config_path):
30
+ """Preprocess data from CONFIG_PATH."""
31
+ preprocess_adata(config_path)
32
+ ##########################################
33
+
34
+ ####### Spatial ###########
35
+ @cli.command()
36
+ @click.argument("config_path", type=click.Path(exists=True))
37
+ def spatial(config_path):
38
+ """Process data from CONFIG_PATH."""
39
+ spatial_adata(config_path)
40
+ ##########################################
41
+
42
+ ####### HMM ###########
43
+ @cli.command()
44
+ @click.argument("config_path", type=click.Path(exists=True))
45
+ def hmm(config_path):
46
+ """Process data from CONFIG_PATH."""
47
+ hmm_adata(config_path)
48
+ ##########################################
49
+
50
+ ####### batch command ###########
51
+ @cli.command()
52
+ @click.argument(
53
+ "task",
54
+ type=click.Choice(["load", "preprocess", "spatial", "hmm"], case_sensitive=False),
55
+ )
56
+ @click.argument(
57
+ "config_table",
58
+ type=click.Path(exists=True, dir_okay=False, readable=True, path_type=Path),
59
+ )
60
+ @click.option(
61
+ "--column",
62
+ "-c",
63
+ default="config_path",
64
+ show_default=True,
65
+ help="Column name containing config paths (ignored for plain TXT).",
66
+ )
67
+ @click.option(
68
+ "--sep",
69
+ default=None,
70
+ help="Field separator: default auto-detect (.tsv -> '\\t', .csv -> ',', others treated as TXT).",
71
+ )
72
+ def batch(task, config_table: Path, column: str, sep: str | None):
73
+ """
74
+ Run a TASK (load, preprocess, spatial, hmm) on multiple CONFIG_PATHs
75
+ listed in a CSV/TSV or plain TXT file.
76
+
77
+ Plain text format: one config path per line, no header.
78
+ """
79
+
80
+ # ----------------------------
81
+ # Decide file type
82
+ # ----------------------------
83
+ suffix = config_table.suffix.lower()
84
+
85
+ # TXT mode → each line is a config path
86
+ if suffix in {".txt", ".list"}:
87
+ paths = []
88
+ with config_table.open() as f:
89
+ for line in f:
90
+ line = line.strip()
91
+ if line:
92
+ paths.append(Path(line).expanduser())
93
+
94
+ if not paths:
95
+ raise click.ClickException(f"No config paths found in text file: {config_table}")
96
+
97
+ config_paths = paths
98
+
99
+ else:
100
+ # CSV / TSV mode
101
+ # auto-detect separator if not provided
102
+ if sep is None:
103
+ if suffix in {".tsv", ".tab"}:
104
+ sep = "\t"
105
+ else:
106
+ sep = ","
107
+
108
+ try:
109
+ df = pd.read_csv(config_table, sep=sep, dtype=str)
110
+ except Exception as e:
111
+ raise click.ClickException(f"Failed to read table {config_table}: {e}") from e
112
+
113
+ if df.empty:
114
+ raise click.ClickException(f"Config table is empty: {config_table}")
115
+
116
+ # If table has no header or only one column, treat it as raw paths
117
+ if df.shape[1] == 1 and column not in df.columns:
118
+ # re-read as headerless single-column list, so we don't drop the first path
119
+ try:
120
+ df = pd.read_csv(
121
+ config_table,
122
+ sep=sep,
123
+ header=None,
124
+ names=[column],
125
+ dtype=str,
126
+ )
127
+ except Exception as e:
128
+ raise click.ClickException(f"Failed to read {config_table} as headerless list: {e}") from e
129
+
130
+ config_series = df[column]
131
+ else:
132
+ if column not in df.columns:
133
+ raise click.ClickException(
134
+ f"Column '{column}' not found in {config_table}. "
135
+ f"Available columns: {', '.join(df.columns)}"
136
+ )
137
+ config_series = df[column]
138
+
139
+ config_paths = (
140
+ config_series.dropna()
141
+ .map(str)
142
+ .map(lambda p: Path(p).expanduser())
143
+ .tolist()
144
+ )
145
+
146
+ # ----------------------------
147
+ # Validate config paths
148
+ # ----------------------------
149
+ if not config_paths:
150
+ raise click.ClickException("No config paths found.")
151
+
152
+ # ----------------------------
153
+ # Map task to function
154
+ # ----------------------------
155
+ task = task.lower()
156
+ task_funcs = {
157
+ "load": load_adata,
158
+ "preprocess": preprocess_adata,
159
+ "spatial": spatial_adata,
160
+ "hmm": hmm_adata,
161
+ }
162
+
163
+ func = task_funcs[task]
164
+
165
+ click.echo(
166
+ f"Running task '{task}' on {len(config_paths)} config paths from {config_table}"
167
+ )
168
+
169
+ # ----------------------------
170
+ # Loop over paths
171
+ # ----------------------------
172
+ for i, cfg in enumerate(config_paths, start=1):
173
+ if not cfg.exists():
174
+ click.echo(f"[{i}/{len(config_paths)}] SKIP (missing): {cfg}")
175
+ continue
176
+
177
+ click.echo(f"[{i}/{len(config_paths)}] {task} → {cfg}")
178
+
179
+ try:
180
+ func(str(cfg)) # underlying functions take a string path
181
+ except Exception as e:
182
+ click.echo(f" ERROR on {cfg}: {e}")
183
+
184
+ click.echo("Batch processing complete.")
185
+ ##########################################
186
+
187
+ ####### concatenate command ###########
188
+ @cli.command("concatenate")
189
+ @click.argument(
190
+ "output_path",
191
+ type=click.Path(path_type=Path, dir_okay=False),
192
+ )
193
+ @click.option(
194
+ "--input-dir",
195
+ "-d",
196
+ type=click.Path(path_type=Path, file_okay=False),
197
+ default=None,
198
+ help="Directory containing .h5ad/.h5ad.gz files to concatenate.",
199
+ )
200
+ @click.option(
201
+ "--csv-path",
202
+ "-c",
203
+ type=click.Path(path_type=Path, dir_okay=False),
204
+ default=None,
205
+ help="CSV/TSV/TXT containing file paths of h5ad files.",
206
+ )
207
+ @click.option(
208
+ "--csv-column",
209
+ "-C",
210
+ default="h5ad_path",
211
+ help="Column in the CSV containing file paths (ignored for TXT).",
212
+ show_default=True,
213
+ )
214
+ @click.option(
215
+ "--suffix",
216
+ "-s",
217
+ multiple=True,
218
+ default=[".h5ad", ".h5ad.gz"],
219
+ help="Allowed file suffixes (repeatable).",
220
+ show_default=True,
221
+ )
222
+ @click.option(
223
+ "--delete",
224
+ is_flag=False,
225
+ help="Delete input .h5ad files after concatenation.",
226
+ )
227
+ @click.option(
228
+ "--restore",
229
+ is_flag=True,
230
+ help="Restore .h5ad backups during reading.",
231
+ )
232
+ def concatenate_cmd(
233
+ output_path: Path,
234
+ input_dir: Path | None,
235
+ csv_path: Path | None,
236
+ csv_column: str,
237
+ suffix: Sequence[str],
238
+ delete: bool,
239
+ restore: bool,
240
+ ):
241
+ """
242
+ Concatenate multiple .h5ad files into a single output file.
243
+
244
+ Two modes:
245
+
246
+ smftools concatenate out.h5ad.gz --input-dir ./dir
247
+
248
+ smftools concatenate out.h5ad.gz --csv-path paths.csv --csv-column h5ad_path
249
+
250
+ TXT input also works (one file path per line).
251
+
252
+ Uses safe_read_h5ad() and safe_write_h5ad().
253
+ """
254
+
255
+ if input_dir and csv_path:
256
+ raise click.ClickException("Provide only ONE of --input-dir or --csv-path.")
257
+
258
+ try:
259
+ out = concatenate_h5ads(
260
+ output_path=output_path,
261
+ input_dir=input_dir,
262
+ csv_path=csv_path,
263
+ csv_column=csv_column,
264
+ file_suffixes=tuple(suffix),
265
+ delete_inputs=delete,
266
+ restore_backups=restore,
267
+ )
268
+ click.echo(f"Concatenated file written to: {out}")
269
+
270
+ except Exception as e:
271
+ raise click.ClickException(str(e)) from e
272
+ ##########################################
273
+
274
+ ####### Merging existing anndatas from an experiment that used two different demultiplexing rules #######
275
+ # REQUIRED_KEYS = ("adata_single_path", "adata_double_path")
276
+ # OPTIONAL_KEYS = (
277
+ # "adata_single_backups_path",
278
+ # "adata_double_backups_path",
279
+ # "output_path",
280
+ # "merged_filename",
281
+ # )
282
+
283
+ # def _read_config_csv(csv_path: Path) -> Dict[str, str]:
284
+ # """
285
+ # Read a multi-row, two-column CSV of key,value pairs into a dict.
286
+
287
+ # Supported features:
288
+ # - Optional header ("key,value") or none.
289
+ # - Comments starting with '#' and blank lines are ignored.
290
+ # - If duplicate keys occur, the last one wins.
291
+ # - Keys are matched literally against REQUIRED_KEYS/OPTIONAL_KEYS.
292
+ # """
293
+ # try:
294
+ # # Read as two columns regardless of header; comments ignored.
295
+ # df = pd.read_csv(
296
+ # csv_path,
297
+ # dtype=str,
298
+ # comment="#",
299
+ # header=None, # treat everything as rows; we'll normalize below
300
+ # usecols=[0, 1],
301
+ # names=["key", "value"]
302
+ # )
303
+ # except Exception as e:
304
+ # raise click.ClickException(f"Failed to read CSV: {e}") from e
305
+
306
+ # # Drop completely empty rows
307
+ # df = df.fillna("").astype(str)
308
+ # df["key"] = df["key"].str.strip()
309
+ # df["value"] = df["value"].str.strip()
310
+ # df = df[(df["key"] != "") & (df["key"].notna())]
311
+
312
+ # if df.empty:
313
+ # raise click.ClickException("Config CSV is empty after removing comments/blank lines.")
314
+
315
+ # # Remove an optional header row if present
316
+ # if df.iloc[0]["key"].lower() in {"key", "keys"}:
317
+ # df = df.iloc[1:]
318
+ # df = df[(df["key"] != "") & (df["key"].notna())]
319
+ # if df.empty:
320
+ # raise click.ClickException("Config CSV contains only a header row.")
321
+
322
+ # # Build dict; last occurrence of a key wins
323
+ # cfg = {}
324
+ # for k, v in zip(df["key"], df["value"]):
325
+ # cfg[k] = v
326
+
327
+ # # Validate required keys
328
+ # missing = [k for k in REQUIRED_KEYS if not cfg.get(k)]
329
+ # if missing:
330
+ # raise click.ClickException(
331
+ # "Missing required keys in CSV: "
332
+ # + ", ".join(missing)
333
+ # + "\nExpected keys:\n - "
334
+ # + "\n - ".join(REQUIRED_KEYS)
335
+ # + "\nOptional keys:\n - "
336
+ # + "\n - ".join(OPTIONAL_KEYS)
337
+ # )
338
+
339
+ # return cfg
340
+
341
+ # def _resolve_output_path(cfg: Dict[str, str], single_path: Path, double_path: Path) -> Path:
342
+ # """Decide on the output .h5ad path based on CSV; create directories if needed."""
343
+ # merged_filename = cfg.get("merged_filename") or f"merged_{single_path.stem}__{double_path.stem}.h5ad"
344
+ # if not merged_filename.endswith(".h5ad"):
345
+ # merged_filename += ".h5ad"
346
+
347
+ # output_path_raw = cfg.get("output_path", "").strip()
348
+
349
+ # if not output_path_raw:
350
+ # out_dir = Path.cwd() / "merged_output"
351
+ # out_dir.mkdir(parents=True, exist_ok=True)
352
+ # return out_dir / merged_filename
353
+
354
+ # output_path = Path(output_path_raw)
355
+
356
+ # if output_path.suffix.lower() == ".h5ad":
357
+ # output_path.parent.mkdir(parents=True, exist_ok=True)
358
+ # return output_path
359
+
360
+ # # Treat as directory
361
+ # output_path.mkdir(parents=True, exist_ok=True)
362
+ # return output_path / merged_filename
363
+
364
+ # def _maybe_read_adata(label: str, primary: Path, backups: Optional[Path]):
365
+
366
+ # if backups:
367
+ # click.echo(f"Loading {label} from {primary} with backups at {backups} ...")
368
+ # return safe_read_h5ad(primary, backups_path=backups, restore_backups=True)
369
+ # else:
370
+ # click.echo(f"Loading {label} from {primary} with backups disabled ...")
371
+ # return safe_read_h5ad(primary, restore_backups=False)
372
+
373
+
374
+ # @cli.command()
375
+ # @click.argument("config_path", type=click.Path(exists=True, dir_okay=False, readable=True, path_type=Path))
376
+ # def merge_barcoded_anndatas(config_path: Path):
377
+ # """
378
+ # Merge two AnnData objects from the same experiment that were demultiplexed
379
+ # under different end-barcoding requirements, using a 1-row CSV for config.
380
+
381
+ # CSV must include:
382
+ # - adata_single_path
383
+ # - adata_double_path
384
+
385
+ # Optional columns:
386
+ # - adata_single_backups_path
387
+ # - adata_double_backups_path
388
+ # - output_path (file or directory; default: ./merged_output/)
389
+ # - merged_filename (default: merged_<single>__<double>.h5ad)
390
+
391
+ # Example CSV:
392
+
393
+ # adata_single_path,adata_double_path,adata_single_backups_path,adata_double_backups_path,output_path,merged_filename
394
+ # /path/single.h5ad,/path/double.h5ad,,,,merged_output,merged_run.h5ad
395
+ # """
396
+ # try:
397
+ # cfg = _read_config_csv(config_path)
398
+
399
+ # single_path = Path(cfg["adata_single_path"]).expanduser().resolve()
400
+ # double_path = Path(cfg["adata_double_path"]).expanduser().resolve()
401
+
402
+ # for p, label in [(single_path, "adata_single_path"), (double_path, "adata_double_path")]:
403
+ # if not p.exists():
404
+ # raise click.ClickException(f"{label} does not exist: {p}")
405
+
406
+ # single_backups = Path(cfg["adata_single_backups_path"]).expanduser().resolve() if cfg.get("adata_single_backups_path") else None
407
+ # double_backups = Path(cfg["adata_double_backups_path"]).expanduser().resolve() if cfg.get("adata_double_backups_path") else None
408
+
409
+ # if single_backups and not single_backups.exists():
410
+ # raise click.ClickException(f"adata_single_backups_path does not exist: {single_backups}")
411
+ # if double_backups and not double_backups.exists():
412
+ # raise click.ClickException(f"adata_double_backups_path does not exist: {double_backups}")
413
+
414
+ # output_path = _resolve_output_path(cfg, single_path, double_path)
415
+
416
+ # # Load
417
+ # adata_single, read_report_single = _maybe_read_adata("single-barcoded AnnData", single_path, single_backups)
418
+ # adata_double, read_report_double = _maybe_read_adata("double-barcoded AnnData", double_path, double_backups)
419
+
420
+ # click.echo("Merging AnnDatas ...")
421
+ # merged = merge_barcoded_anndatas_core(adata_single, adata_double)
422
+
423
+ # click.echo(f"Writing merged AnnData to: {output_path}")
424
+ # backup_dir = output_path.cwd() / "merged_backups"
425
+ # safe_write_h5ad(merged, output_path, backup=True, backup_dir=backup_dir)
426
+
427
+ # click.secho(f"Done. Merged AnnData saved to {output_path}", fg="green")
428
+
429
+ # except click.ClickException:
430
+ # raise
431
+ # except Exception as e:
432
+ # # Surface unexpected errors cleanly
433
+ # raise click.ClickException(f"Unexpected error: {e}") from e
434
+ ################################################################################################################
@@ -1,11 +1,22 @@
1
1
  # Conversion (Bisulfite/APOBEC)footprinting defaults
2
2
  extends: default
3
+
4
+ ######## smftools load params #########
3
5
  conversion_types:
4
6
  - '5mC' # 5mC
5
7
 
8
+ ######## smftools preprocess params #########
6
9
  # Read QC Params
7
10
  read_mod_filtering_use_other_c_as_background: True
8
11
 
12
+ # Spatial Analysis - Clustermap params
13
+ layer_for_clustermap_plotting: 'nan0_0minus1'
14
+ clustermap_cmap_c: "coolwarm"
15
+ clustermap_cmap_gpc: "coolwarm"
16
+ clustermap_cmap_cpg: "viridis"
17
+ clustermap_cmap_a: "coolwarm"
18
+
19
+ ######## smftools hmm params #########
9
20
  # HMM
10
21
  cpg: True # whether to use the default HMM endogenous CpG patch params
11
22
  hmm_methbases:
@@ -14,16 +25,17 @@ hmm_feature_sets:
14
25
  footprint:
15
26
  state: "Non-Modified"
16
27
  features:
17
- small_bound_stretch: [0, 20]
18
- medium_bound_stretch: [20, 50]
19
- putative_nucleosome: [50, 200]
28
+ small_bound_stretch: [6, 40]
29
+ medium_bound_stretch: [40, 100]
30
+ putative_nucleosome: [100, 200]
20
31
  large_bound_stretch: [200, inf]
21
32
  accessible:
22
33
  state: "Modified"
23
34
  features:
24
- small_accessible_patch: [0, 20]
25
- mid_accessible_patch: [20, 80]
26
- large_accessible_patch: [80, inf]
35
+ small_accessible_patch: [3, 20]
36
+ mid_accessible_patch: [20, 40]
37
+ large_accessible_patch: [40, 110]
38
+ nucleosome_depleted_region: [110, inf]
27
39
  cpg:
28
40
  state: "Modified"
29
41
  features:
@@ -1,18 +1,23 @@
1
1
  # Deaminase footprinting defaults
2
2
  extends: default
3
+
4
+ ######## smftools load params #########
3
5
  conversion_types:
4
6
  - '5mC' # 5mC
5
7
 
6
8
  mod_target_bases:
7
9
  - "C"
10
+ enzyme_target_bases:
11
+ - "C"
8
12
 
13
+ ######## smftools preprocess params #########
9
14
  read_mod_filtering_gpc_thresholds:
10
15
  - null
11
16
  - null
12
17
  read_mod_filtering_cpg_thresholds:
13
18
  - null
14
19
  - null
15
- read_mod_filtering_any_c_thresholds:
20
+ read_mod_filtering_c_thresholds:
16
21
  - 0.01
17
22
  - 0.99
18
23
  read_mod_filtering_a_thresholds:
@@ -23,17 +28,18 @@ read_mod_filtering_use_other_c_as_background: False
23
28
 
24
29
  # Duplicate Detection Params
25
30
  duplicate_detection_site_types:
26
- - "any_C"
31
+ - "C"
27
32
 
33
+ ######## smftools analyze params #########
28
34
  # Autocorrelation params
29
35
  autocorr_site_types:
30
- - "any_C"
36
+ - "C"
31
37
 
32
38
  # Correlation matrix params
33
39
  correlation_matrix_site_types:
34
- - "any_C_site"
40
+ - "C_site"
35
41
 
36
- # HMM
42
+ # ######## smftools hmm params #########
37
43
  cpg: False # whether to use the default HMM endogenous CpG patch params
38
44
  hmm_methbases:
39
45
  - "C"
@@ -41,16 +47,17 @@ hmm_feature_sets:
41
47
  footprint:
42
48
  state: "Non-Modified"
43
49
  features:
44
- small_bound_stretch: [0, 25]
45
- medium_bound_stretch: [25, 80]
46
- putative_nucleosome: [80, 200]
50
+ small_bound_stretch: [6, 40]
51
+ medium_bound_stretch: [40, 100]
52
+ putative_nucleosome: [100, 200]
47
53
  large_bound_stretch: [200, inf]
48
54
  accessible:
49
55
  state: "Modified"
50
56
  features:
51
- small_accessible_patch: [0, 20]
52
- mid_accessible_patch: [20, 100]
53
- large_accessible_patch: [100, inf]
57
+ small_accessible_patch: [3, 20]
58
+ mid_accessible_patch: [20, 40]
59
+ large_accessible_patch: [40, 110]
60
+ nucleosome_depleted_region: [110, inf]
54
61
 
55
62
  hmm_merge_layer_features:
56
63
  - ["C_all_accessible_features", 80]