smftools 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. smftools/__init__.py +6 -8
  2. smftools/_settings.py +4 -6
  3. smftools/_version.py +1 -1
  4. smftools/cli/helpers.py +54 -0
  5. smftools/cli/hmm_adata.py +937 -256
  6. smftools/cli/load_adata.py +448 -268
  7. smftools/cli/preprocess_adata.py +469 -263
  8. smftools/cli/spatial_adata.py +536 -319
  9. smftools/cli_entry.py +97 -182
  10. smftools/config/__init__.py +1 -1
  11. smftools/config/conversion.yaml +17 -6
  12. smftools/config/deaminase.yaml +12 -10
  13. smftools/config/default.yaml +142 -33
  14. smftools/config/direct.yaml +11 -3
  15. smftools/config/discover_input_files.py +19 -5
  16. smftools/config/experiment_config.py +594 -264
  17. smftools/constants.py +37 -0
  18. smftools/datasets/__init__.py +2 -8
  19. smftools/datasets/datasets.py +32 -18
  20. smftools/hmm/HMM.py +2128 -1418
  21. smftools/hmm/__init__.py +2 -9
  22. smftools/hmm/archived/call_hmm_peaks.py +121 -0
  23. smftools/hmm/call_hmm_peaks.py +299 -91
  24. smftools/hmm/display_hmm.py +19 -6
  25. smftools/hmm/hmm_readwrite.py +13 -4
  26. smftools/hmm/nucleosome_hmm_refinement.py +102 -14
  27. smftools/informatics/__init__.py +30 -7
  28. smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py +14 -1
  29. smftools/informatics/archived/helpers/archived/bam_qc.py +14 -1
  30. smftools/informatics/archived/helpers/archived/concatenate_fastqs_to_bam.py +8 -1
  31. smftools/informatics/archived/helpers/archived/load_adata.py +3 -3
  32. smftools/informatics/archived/helpers/archived/plot_bed_histograms.py +3 -1
  33. smftools/informatics/archived/print_bam_query_seq.py +7 -1
  34. smftools/informatics/bam_functions.py +397 -175
  35. smftools/informatics/basecalling.py +51 -9
  36. smftools/informatics/bed_functions.py +90 -57
  37. smftools/informatics/binarize_converted_base_identities.py +18 -7
  38. smftools/informatics/complement_base_list.py +7 -6
  39. smftools/informatics/converted_BAM_to_adata.py +265 -122
  40. smftools/informatics/fasta_functions.py +161 -83
  41. smftools/informatics/h5ad_functions.py +196 -30
  42. smftools/informatics/modkit_extract_to_adata.py +609 -270
  43. smftools/informatics/modkit_functions.py +85 -44
  44. smftools/informatics/ohe.py +44 -21
  45. smftools/informatics/pod5_functions.py +112 -73
  46. smftools/informatics/run_multiqc.py +20 -14
  47. smftools/logging_utils.py +51 -0
  48. smftools/machine_learning/__init__.py +2 -7
  49. smftools/machine_learning/data/anndata_data_module.py +143 -50
  50. smftools/machine_learning/data/preprocessing.py +2 -1
  51. smftools/machine_learning/evaluation/__init__.py +1 -1
  52. smftools/machine_learning/evaluation/eval_utils.py +11 -14
  53. smftools/machine_learning/evaluation/evaluators.py +46 -33
  54. smftools/machine_learning/inference/__init__.py +1 -1
  55. smftools/machine_learning/inference/inference_utils.py +7 -4
  56. smftools/machine_learning/inference/lightning_inference.py +9 -13
  57. smftools/machine_learning/inference/sklearn_inference.py +6 -8
  58. smftools/machine_learning/inference/sliding_window_inference.py +35 -25
  59. smftools/machine_learning/models/__init__.py +10 -5
  60. smftools/machine_learning/models/base.py +28 -42
  61. smftools/machine_learning/models/cnn.py +15 -11
  62. smftools/machine_learning/models/lightning_base.py +71 -40
  63. smftools/machine_learning/models/mlp.py +13 -4
  64. smftools/machine_learning/models/positional.py +3 -2
  65. smftools/machine_learning/models/rnn.py +3 -2
  66. smftools/machine_learning/models/sklearn_models.py +39 -22
  67. smftools/machine_learning/models/transformer.py +68 -53
  68. smftools/machine_learning/models/wrappers.py +2 -1
  69. smftools/machine_learning/training/__init__.py +2 -2
  70. smftools/machine_learning/training/train_lightning_model.py +29 -20
  71. smftools/machine_learning/training/train_sklearn_model.py +9 -15
  72. smftools/machine_learning/utils/__init__.py +1 -1
  73. smftools/machine_learning/utils/device.py +7 -4
  74. smftools/machine_learning/utils/grl.py +3 -1
  75. smftools/metadata.py +443 -0
  76. smftools/plotting/__init__.py +19 -5
  77. smftools/plotting/autocorrelation_plotting.py +145 -44
  78. smftools/plotting/classifiers.py +162 -72
  79. smftools/plotting/general_plotting.py +422 -197
  80. smftools/plotting/hmm_plotting.py +42 -13
  81. smftools/plotting/position_stats.py +147 -87
  82. smftools/plotting/qc_plotting.py +20 -12
  83. smftools/preprocessing/__init__.py +10 -12
  84. smftools/preprocessing/append_base_context.py +115 -80
  85. smftools/preprocessing/append_binary_layer_by_base_context.py +77 -39
  86. smftools/preprocessing/{calculate_complexity.py → archived/calculate_complexity.py} +3 -1
  87. smftools/preprocessing/{archives → archived}/preprocessing.py +8 -6
  88. smftools/preprocessing/binarize.py +21 -4
  89. smftools/preprocessing/binarize_on_Youden.py +129 -31
  90. smftools/preprocessing/binary_layers_to_ohe.py +17 -11
  91. smftools/preprocessing/calculate_complexity_II.py +86 -59
  92. smftools/preprocessing/calculate_consensus.py +28 -19
  93. smftools/preprocessing/calculate_coverage.py +50 -25
  94. smftools/preprocessing/calculate_pairwise_differences.py +2 -1
  95. smftools/preprocessing/calculate_pairwise_hamming_distances.py +4 -3
  96. smftools/preprocessing/calculate_position_Youden.py +118 -54
  97. smftools/preprocessing/calculate_read_length_stats.py +52 -23
  98. smftools/preprocessing/calculate_read_modification_stats.py +91 -57
  99. smftools/preprocessing/clean_NaN.py +38 -28
  100. smftools/preprocessing/filter_adata_by_nan_proportion.py +24 -12
  101. smftools/preprocessing/filter_reads_on_length_quality_mapping.py +71 -38
  102. smftools/preprocessing/filter_reads_on_modification_thresholds.py +181 -73
  103. smftools/preprocessing/flag_duplicate_reads.py +689 -272
  104. smftools/preprocessing/invert_adata.py +26 -11
  105. smftools/preprocessing/load_sample_sheet.py +40 -22
  106. smftools/preprocessing/make_dirs.py +8 -3
  107. smftools/preprocessing/min_non_diagonal.py +2 -1
  108. smftools/preprocessing/recipes.py +56 -23
  109. smftools/preprocessing/reindex_references_adata.py +103 -0
  110. smftools/preprocessing/subsample_adata.py +33 -16
  111. smftools/readwrite.py +331 -82
  112. smftools/schema/__init__.py +11 -0
  113. smftools/schema/anndata_schema_v1.yaml +227 -0
  114. smftools/tools/__init__.py +3 -4
  115. smftools/tools/archived/classifiers.py +163 -0
  116. smftools/tools/archived/subset_adata_v1.py +10 -1
  117. smftools/tools/archived/subset_adata_v2.py +12 -1
  118. smftools/tools/calculate_umap.py +54 -15
  119. smftools/tools/cluster_adata_on_methylation.py +115 -46
  120. smftools/tools/general_tools.py +70 -25
  121. smftools/tools/position_stats.py +229 -98
  122. smftools/tools/read_stats.py +50 -29
  123. smftools/tools/spatial_autocorrelation.py +365 -192
  124. smftools/tools/subset_adata.py +23 -21
  125. {smftools-0.2.3.dist-info → smftools-0.2.5.dist-info}/METADATA +17 -39
  126. smftools-0.2.5.dist-info/RECORD +181 -0
  127. smftools-0.2.3.dist-info/RECORD +0 -173
  128. /smftools/cli/{cli_flows.py → archived/cli_flows.py} +0 -0
  129. /smftools/hmm/{apply_hmm_batched.py → archived/apply_hmm_batched.py} +0 -0
  130. /smftools/hmm/{calculate_distances.py → archived/calculate_distances.py} +0 -0
  131. /smftools/hmm/{train_hmm.py → archived/train_hmm.py} +0 -0
  132. /smftools/preprocessing/{add_read_length_and_mapping_qc.py → archived/add_read_length_and_mapping_qc.py} +0 -0
  133. /smftools/preprocessing/{archives → archived}/mark_duplicates.py +0 -0
  134. /smftools/preprocessing/{archives → archived}/remove_duplicates.py +0 -0
  135. {smftools-0.2.3.dist-info → smftools-0.2.5.dist-info}/WHEEL +0 -0
  136. {smftools-0.2.3.dist-info → smftools-0.2.5.dist-info}/entry_points.txt +0 -0
  137. {smftools-0.2.3.dist-info → smftools-0.2.5.dist-info}/licenses/LICENSE +0 -0
smftools/cli_entry.py CHANGED
@@ -1,20 +1,38 @@
1
+ import logging
2
+ from pathlib import Path
3
+ from typing import Sequence
4
+
1
5
  import click
2
6
  import pandas as pd
3
- from pathlib import Path
4
- from typing import Dict, Optional, Sequence
5
7
 
8
+ from .cli.hmm_adata import hmm_adata
6
9
  from .cli.load_adata import load_adata
7
- from .cli.cli_flows import flow_I
8
10
  from .cli.preprocess_adata import preprocess_adata
9
11
  from .cli.spatial_adata import spatial_adata
10
- from .cli.hmm_adata import hmm_adata
12
+ from .informatics.pod5_functions import subsample_pod5
13
+ from .logging_utils import setup_logging
14
+ from .readwrite import concatenate_h5ads
11
15
 
12
- from .readwrite import merge_barcoded_anndatas_core, safe_read_h5ad, safe_write_h5ad, concatenate_h5ads
13
16
 
14
17
  @click.group()
15
- def cli():
18
+ @click.option(
19
+ "--log-file",
20
+ type=click.Path(dir_okay=False, writable=True, path_type=Path),
21
+ default=None,
22
+ help="Optional file path to write smftools logs.",
23
+ )
24
+ @click.option(
25
+ "--log-level",
26
+ type=click.Choice(["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"], case_sensitive=False),
27
+ default="INFO",
28
+ show_default=True,
29
+ help="Logging level for smftools output.",
30
+ )
31
+ def cli(log_file: Path | None, log_level: str):
16
32
  """Command-line interface for smftools."""
17
- pass
33
+ level = getattr(logging, log_level.upper(), logging.INFO)
34
+ setup_logging(level=level, log_file=log_file)
35
+
18
36
 
19
37
  ####### Load anndata from raw data ###########
20
38
  @cli.command()
@@ -22,32 +40,44 @@ def cli():
22
40
  def load(config_path):
23
41
  """Load and process data from CONFIG_PATH."""
24
42
  load_adata(config_path)
43
+
44
+
25
45
  ##########################################
26
46
 
47
+
27
48
  ####### Preprocessing ###########
28
49
  @cli.command()
29
50
  @click.argument("config_path", type=click.Path(exists=True))
30
51
  def preprocess(config_path):
31
52
  """Preprocess data from CONFIG_PATH."""
32
53
  preprocess_adata(config_path)
54
+
55
+
33
56
  ##########################################
34
57
 
58
+
35
59
  ####### Spatial ###########
36
60
  @cli.command()
37
61
  @click.argument("config_path", type=click.Path(exists=True))
38
62
  def spatial(config_path):
39
63
  """Process data from CONFIG_PATH."""
40
64
  spatial_adata(config_path)
65
+
66
+
41
67
  ##########################################
42
68
 
69
+
43
70
  ####### HMM ###########
44
71
  @cli.command()
45
72
  @click.argument("config_path", type=click.Path(exists=True))
46
73
  def hmm(config_path):
47
74
  """Process data from CONFIG_PATH."""
48
75
  hmm_adata(config_path)
76
+
77
+
49
78
  ##########################################
50
79
 
80
+
51
81
  ####### batch command ###########
52
82
  @cli.command()
53
83
  @click.argument(
@@ -126,7 +156,9 @@ def batch(task, config_table: Path, column: str, sep: str | None):
126
156
  dtype=str,
127
157
  )
128
158
  except Exception as e:
129
- raise click.ClickException(f"Failed to read {config_table} as headerless list: {e}") from e
159
+ raise click.ClickException(
160
+ f"Failed to read {config_table} as headerless list: {e}"
161
+ ) from e
130
162
 
131
163
  config_series = df[column]
132
164
  else:
@@ -137,12 +169,7 @@ def batch(task, config_table: Path, column: str, sep: str | None):
137
169
  )
138
170
  config_series = df[column]
139
171
 
140
- config_paths = (
141
- config_series.dropna()
142
- .map(str)
143
- .map(lambda p: Path(p).expanduser())
144
- .tolist()
145
- )
172
+ config_paths = config_series.dropna().map(str).map(lambda p: Path(p).expanduser()).tolist()
146
173
 
147
174
  # ----------------------------
148
175
  # Validate config paths
@@ -163,9 +190,7 @@ def batch(task, config_table: Path, column: str, sep: str | None):
163
190
 
164
191
  func = task_funcs[task]
165
192
 
166
- click.echo(
167
- f"Running task '{task}' on {len(config_paths)} config paths from {config_table}"
168
- )
193
+ click.echo(f"Running task '{task}' on {len(config_paths)} config paths from {config_table}")
169
194
 
170
195
  # ----------------------------
171
196
  # Loop over paths
@@ -178,13 +203,16 @@ def batch(task, config_table: Path, column: str, sep: str | None):
178
203
  click.echo(f"[{i}/{len(config_paths)}] {task} → {cfg}")
179
204
 
180
205
  try:
181
- func(str(cfg)) # underlying functions take a string path
206
+ func(str(cfg)) # underlying functions take a string path
182
207
  except Exception as e:
183
208
  click.echo(f" ERROR on {cfg}: {e}")
184
209
 
185
210
  click.echo("Batch processing complete.")
211
+
212
+
186
213
  ##########################################
187
214
 
215
+
188
216
  ####### concatenate command ###########
189
217
  @cli.command("concatenate")
190
218
  @click.argument(
@@ -244,9 +272,9 @@ def concatenate_cmd(
244
272
 
245
273
  Two modes:
246
274
 
247
- smftools concatenate out.h5ad --input-dir ./dir
275
+ smftools concatenate out.h5ad.gz --input-dir ./dir
248
276
 
249
- smftools concatenate out.h5ad --csv-path paths.csv --csv-column h5ad_path
277
+ smftools concatenate out.h5ad.gz --csv-path paths.csv --csv-column h5ad_path
250
278
 
251
279
  TXT input also works (one file path per line).
252
280
 
@@ -266,170 +294,57 @@ def concatenate_cmd(
266
294
  delete_inputs=delete,
267
295
  restore_backups=restore,
268
296
  )
269
- click.echo(f"Concatenated file written to: {out}")
297
+ click.echo(f"Concatenated file written to: {out}")
270
298
 
271
299
  except Exception as e:
272
300
  raise click.ClickException(str(e)) from e
301
+
302
+
273
303
  ##########################################
274
304
 
275
- ####### Merging existing anndatas from an experiment that used two different demultiplexing rules #######
276
- # REQUIRED_KEYS = ("adata_single_path", "adata_double_path")
277
- # OPTIONAL_KEYS = (
278
- # "adata_single_backups_path",
279
- # "adata_double_backups_path",
280
- # "output_path",
281
- # "merged_filename",
282
- # )
283
-
284
- # def _read_config_csv(csv_path: Path) -> Dict[str, str]:
285
- # """
286
- # Read a multi-row, two-column CSV of key,value pairs into a dict.
287
-
288
- # Supported features:
289
- # - Optional header ("key,value") or none.
290
- # - Comments starting with '#' and blank lines are ignored.
291
- # - If duplicate keys occur, the last one wins.
292
- # - Keys are matched literally against REQUIRED_KEYS/OPTIONAL_KEYS.
293
- # """
294
- # try:
295
- # # Read as two columns regardless of header; comments ignored.
296
- # df = pd.read_csv(
297
- # csv_path,
298
- # dtype=str,
299
- # comment="#",
300
- # header=None, # treat everything as rows; we'll normalize below
301
- # usecols=[0, 1],
302
- # names=["key", "value"]
303
- # )
304
- # except Exception as e:
305
- # raise click.ClickException(f"Failed to read CSV: {e}") from e
306
-
307
- # # Drop completely empty rows
308
- # df = df.fillna("").astype(str)
309
- # df["key"] = df["key"].str.strip()
310
- # df["value"] = df["value"].str.strip()
311
- # df = df[(df["key"] != "") & (df["key"].notna())]
312
-
313
- # if df.empty:
314
- # raise click.ClickException("Config CSV is empty after removing comments/blank lines.")
315
-
316
- # # Remove an optional header row if present
317
- # if df.iloc[0]["key"].lower() in {"key", "keys"}:
318
- # df = df.iloc[1:]
319
- # df = df[(df["key"] != "") & (df["key"].notna())]
320
- # if df.empty:
321
- # raise click.ClickException("Config CSV contains only a header row.")
322
-
323
- # # Build dict; last occurrence of a key wins
324
- # cfg = {}
325
- # for k, v in zip(df["key"], df["value"]):
326
- # cfg[k] = v
327
-
328
- # # Validate required keys
329
- # missing = [k for k in REQUIRED_KEYS if not cfg.get(k)]
330
- # if missing:
331
- # raise click.ClickException(
332
- # "Missing required keys in CSV: "
333
- # + ", ".join(missing)
334
- # + "\nExpected keys:\n - "
335
- # + "\n - ".join(REQUIRED_KEYS)
336
- # + "\nOptional keys:\n - "
337
- # + "\n - ".join(OPTIONAL_KEYS)
338
- # )
339
-
340
- # return cfg
341
-
342
- # def _resolve_output_path(cfg: Dict[str, str], single_path: Path, double_path: Path) -> Path:
343
- # """Decide on the output .h5ad path based on CSV; create directories if needed."""
344
- # merged_filename = cfg.get("merged_filename") or f"merged_{single_path.stem}__{double_path.stem}.h5ad"
345
- # if not merged_filename.endswith(".h5ad"):
346
- # merged_filename += ".h5ad"
347
-
348
- # output_path_raw = cfg.get("output_path", "").strip()
349
-
350
- # if not output_path_raw:
351
- # out_dir = Path.cwd() / "merged_output"
352
- # out_dir.mkdir(parents=True, exist_ok=True)
353
- # return out_dir / merged_filename
354
-
355
- # output_path = Path(output_path_raw)
356
-
357
- # if output_path.suffix.lower() == ".h5ad":
358
- # output_path.parent.mkdir(parents=True, exist_ok=True)
359
- # return output_path
360
-
361
- # # Treat as directory
362
- # output_path.mkdir(parents=True, exist_ok=True)
363
- # return output_path / merged_filename
364
-
365
- # def _maybe_read_adata(label: str, primary: Path, backups: Optional[Path]):
366
-
367
- # if backups:
368
- # click.echo(f"Loading {label} from {primary} with backups at {backups} ...")
369
- # return safe_read_h5ad(primary, backups_path=backups, restore_backups=True)
370
- # else:
371
- # click.echo(f"Loading {label} from {primary} with backups disabled ...")
372
- # return safe_read_h5ad(primary, restore_backups=False)
373
-
374
-
375
- # @cli.command()
376
- # @click.argument("config_path", type=click.Path(exists=True, dir_okay=False, readable=True, path_type=Path))
377
- # def merge_barcoded_anndatas(config_path: Path):
378
- # """
379
- # Merge two AnnData objects from the same experiment that were demultiplexed
380
- # under different end-barcoding requirements, using a 1-row CSV for config.
381
-
382
- # CSV must include:
383
- # - adata_single_path
384
- # - adata_double_path
385
-
386
- # Optional columns:
387
- # - adata_single_backups_path
388
- # - adata_double_backups_path
389
- # - output_path (file or directory; default: ./merged_output/)
390
- # - merged_filename (default: merged_<single>__<double>.h5ad)
391
-
392
- # Example CSV:
393
-
394
- # adata_single_path,adata_double_path,adata_single_backups_path,adata_double_backups_path,output_path,merged_filename
395
- # /path/single.h5ad,/path/double.h5ad,,,,merged_output,merged_run.h5ad
396
- # """
397
- # try:
398
- # cfg = _read_config_csv(config_path)
399
-
400
- # single_path = Path(cfg["adata_single_path"]).expanduser().resolve()
401
- # double_path = Path(cfg["adata_double_path"]).expanduser().resolve()
402
-
403
- # for p, label in [(single_path, "adata_single_path"), (double_path, "adata_double_path")]:
404
- # if not p.exists():
405
- # raise click.ClickException(f"{label} does not exist: {p}")
406
-
407
- # single_backups = Path(cfg["adata_single_backups_path"]).expanduser().resolve() if cfg.get("adata_single_backups_path") else None
408
- # double_backups = Path(cfg["adata_double_backups_path"]).expanduser().resolve() if cfg.get("adata_double_backups_path") else None
409
-
410
- # if single_backups and not single_backups.exists():
411
- # raise click.ClickException(f"adata_single_backups_path does not exist: {single_backups}")
412
- # if double_backups and not double_backups.exists():
413
- # raise click.ClickException(f"adata_double_backups_path does not exist: {double_backups}")
414
-
415
- # output_path = _resolve_output_path(cfg, single_path, double_path)
416
-
417
- # # Load
418
- # adata_single, read_report_single = _maybe_read_adata("single-barcoded AnnData", single_path, single_backups)
419
- # adata_double, read_report_double = _maybe_read_adata("double-barcoded AnnData", double_path, double_backups)
420
-
421
- # click.echo("Merging AnnDatas ...")
422
- # merged = merge_barcoded_anndatas_core(adata_single, adata_double)
423
-
424
- # click.echo(f"Writing merged AnnData to: {output_path}")
425
- # backup_dir = output_path.cwd() / "merged_backups"
426
- # safe_write_h5ad(merged, output_path, backup=True, backup_dir=backup_dir)
427
-
428
- # click.secho(f"Done. Merged AnnData saved to {output_path}", fg="green")
429
-
430
- # except click.ClickException:
431
- # raise
432
- # except Exception as e:
433
- # # Surface unexpected errors cleanly
434
- # raise click.ClickException(f"Unexpected error: {e}") from e
435
- ################################################################################################################
305
+
306
+ ####### subsample pod5 command ###########
307
+ @cli.command("subsample-pod5")
308
+ @click.argument(
309
+ "pod5_path",
310
+ type=click.Path(exists=True, path_type=Path),
311
+ )
312
+ @click.option(
313
+ "--read-names",
314
+ "-r",
315
+ type=click.Path(exists=True, path_type=Path),
316
+ default=None,
317
+ help="Text file with one read_id per line.",
318
+ )
319
+ @click.option(
320
+ "--n-reads",
321
+ "-n",
322
+ type=int,
323
+ default=None,
324
+ help="Randomly subsample N reads.",
325
+ )
326
+ @click.option(
327
+ "--outdir",
328
+ "-o",
329
+ type=click.Path(path_type=Path, file_okay=False),
330
+ required=True,
331
+ help="Output directory for subsampled POD5.",
332
+ )
333
+ def subsample_pod5_cmd(pod5_path, read_names, n_reads, outdir):
334
+ """
335
+ Subsample POD5 file(s) by read ID list or random sampling.
336
+ """
337
+
338
+ # --- Validate mutually exclusive options ---
339
+ if (read_names is None and n_reads is None) or (read_names and n_reads):
340
+ raise click.UsageError("You must specify exactly ONE of --read-names or --n-reads.")
341
+
342
+ outdir.mkdir(parents=True, exist_ok=True)
343
+
344
+ subsample_arg = str(read_names) if read_names else n_reads
345
+
346
+ subsample_pod5(
347
+ pod5_path=str(pod5_path),
348
+ read_name_path=subsample_arg,
349
+ output_directory=str(outdir),
350
+ )
@@ -1 +1 @@
1
- from .experiment_config import LoadExperimentConfig, ExperimentConfig
1
+ from .experiment_config import ExperimentConfig, LoadExperimentConfig
@@ -9,6 +9,17 @@ conversion_types:
9
9
  # Read QC Params
10
10
  read_mod_filtering_use_other_c_as_background: True
11
11
 
12
+ # Spatial Analysis - Autocorr params
13
+ autocorr_site_types:
14
+ - "GpC"
15
+
16
+ # Spatial Analysis - Clustermap params
17
+ layer_for_clustermap_plotting: 'nan0_0minus1'
18
+ clustermap_cmap_c: "coolwarm"
19
+ clustermap_cmap_gpc: "coolwarm"
20
+ clustermap_cmap_cpg: "viridis"
21
+ clustermap_cmap_a: "coolwarm"
22
+
12
23
  ######## smftools hmm params #########
13
24
  # HMM
14
25
  cpg: True # whether to use the default HMM endogenous CpG patch params
@@ -18,21 +29,21 @@ hmm_feature_sets:
18
29
  footprint:
19
30
  state: "Non-Modified"
20
31
  features:
21
- small_bound_stretch: [10, 30]
22
- medium_bound_stretch: [30, 110]
23
- putative_nucleosome: [110, 200]
32
+ small_bound_stretch: [6, 40]
33
+ medium_bound_stretch: [40, 100]
34
+ putative_nucleosome: [100, 200]
24
35
  large_bound_stretch: [200, inf]
25
36
  accessible:
26
37
  state: "Modified"
27
38
  features:
28
39
  small_accessible_patch: [3, 20]
29
40
  mid_accessible_patch: [20, 40]
30
- mid_large_accessible_patch: [40, 130]
31
- large_accessible_patch: [130, inf]
41
+ large_accessible_patch: [40, 110]
42
+ nucleosome_depleted_region: [110, inf]
32
43
  cpg:
33
44
  state: "Modified"
34
45
  features:
35
46
  cpg_patch: [0, inf]
36
47
 
37
48
  hmm_merge_layer_features:
38
- - ["GpC_all_accessible_features", 80]
49
+ - ["all_accessible_features", 60]
@@ -7,6 +7,8 @@ conversion_types:
7
7
 
8
8
  mod_target_bases:
9
9
  - "C"
10
+ enzyme_target_bases:
11
+ - "C"
10
12
 
11
13
  ######## smftools preprocess params #########
12
14
  read_mod_filtering_gpc_thresholds:
@@ -15,7 +17,7 @@ read_mod_filtering_gpc_thresholds:
15
17
  read_mod_filtering_cpg_thresholds:
16
18
  - null
17
19
  - null
18
- read_mod_filtering_any_c_thresholds:
20
+ read_mod_filtering_c_thresholds:
19
21
  - 0.01
20
22
  - 0.99
21
23
  read_mod_filtering_a_thresholds:
@@ -26,16 +28,16 @@ read_mod_filtering_use_other_c_as_background: False
26
28
 
27
29
  # Duplicate Detection Params
28
30
  duplicate_detection_site_types:
29
- - "any_C"
31
+ - "C"
30
32
 
31
33
  ######## smftools analyze params #########
32
34
  # Autocorrelation params
33
35
  autocorr_site_types:
34
- - "any_C"
36
+ - "C"
35
37
 
36
38
  # Correlation matrix params
37
39
  correlation_matrix_site_types:
38
- - "any_C_site"
40
+ - "C_site"
39
41
 
40
42
  # ######## smftools hmm params #########
41
43
  cpg: False # whether to use the default HMM endogenous CpG patch params
@@ -45,17 +47,17 @@ hmm_feature_sets:
45
47
  footprint:
46
48
  state: "Non-Modified"
47
49
  features:
48
- small_bound_stretch: [10, 30]
49
- medium_bound_stretch: [30, 110]
50
- putative_nucleosome: [110, 200]
50
+ small_bound_stretch: [6, 40]
51
+ medium_bound_stretch: [40, 100]
52
+ putative_nucleosome: [100, 200]
51
53
  large_bound_stretch: [200, inf]
52
54
  accessible:
53
55
  state: "Modified"
54
56
  features:
55
57
  small_accessible_patch: [3, 20]
56
58
  mid_accessible_patch: [20, 40]
57
- mid_large_accessible_patch: [40, 130]
58
- large_accessible_patch: [130, inf]
59
+ large_accessible_patch: [40, 110]
60
+ nucleosome_depleted_region: [110, inf]
59
61
 
60
62
  hmm_merge_layer_features:
61
- - ["C_all_accessible_features", 80]
63
+ - ["all_accessible_features", 60]