smftools 0.2.4__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. smftools/__init__.py +6 -8
  2. smftools/_settings.py +4 -6
  3. smftools/_version.py +1 -1
  4. smftools/cli/helpers.py +7 -1
  5. smftools/cli/hmm_adata.py +902 -244
  6. smftools/cli/load_adata.py +318 -198
  7. smftools/cli/preprocess_adata.py +285 -171
  8. smftools/cli/spatial_adata.py +137 -53
  9. smftools/cli_entry.py +94 -178
  10. smftools/config/__init__.py +1 -1
  11. smftools/config/conversion.yaml +5 -1
  12. smftools/config/deaminase.yaml +1 -1
  13. smftools/config/default.yaml +22 -17
  14. smftools/config/direct.yaml +8 -3
  15. smftools/config/discover_input_files.py +19 -5
  16. smftools/config/experiment_config.py +505 -276
  17. smftools/constants.py +37 -0
  18. smftools/datasets/__init__.py +2 -8
  19. smftools/datasets/datasets.py +32 -18
  20. smftools/hmm/HMM.py +2125 -1426
  21. smftools/hmm/__init__.py +2 -3
  22. smftools/hmm/archived/call_hmm_peaks.py +16 -1
  23. smftools/hmm/call_hmm_peaks.py +173 -193
  24. smftools/hmm/display_hmm.py +19 -6
  25. smftools/hmm/hmm_readwrite.py +13 -4
  26. smftools/hmm/nucleosome_hmm_refinement.py +102 -14
  27. smftools/informatics/__init__.py +30 -7
  28. smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py +14 -1
  29. smftools/informatics/archived/helpers/archived/bam_qc.py +14 -1
  30. smftools/informatics/archived/helpers/archived/concatenate_fastqs_to_bam.py +8 -1
  31. smftools/informatics/archived/helpers/archived/load_adata.py +3 -3
  32. smftools/informatics/archived/helpers/archived/plot_bed_histograms.py +3 -1
  33. smftools/informatics/archived/print_bam_query_seq.py +7 -1
  34. smftools/informatics/bam_functions.py +379 -156
  35. smftools/informatics/basecalling.py +51 -9
  36. smftools/informatics/bed_functions.py +90 -57
  37. smftools/informatics/binarize_converted_base_identities.py +18 -7
  38. smftools/informatics/complement_base_list.py +7 -6
  39. smftools/informatics/converted_BAM_to_adata.py +265 -122
  40. smftools/informatics/fasta_functions.py +161 -83
  41. smftools/informatics/h5ad_functions.py +195 -29
  42. smftools/informatics/modkit_extract_to_adata.py +609 -270
  43. smftools/informatics/modkit_functions.py +85 -44
  44. smftools/informatics/ohe.py +44 -21
  45. smftools/informatics/pod5_functions.py +112 -73
  46. smftools/informatics/run_multiqc.py +20 -14
  47. smftools/logging_utils.py +51 -0
  48. smftools/machine_learning/__init__.py +2 -7
  49. smftools/machine_learning/data/anndata_data_module.py +143 -50
  50. smftools/machine_learning/data/preprocessing.py +2 -1
  51. smftools/machine_learning/evaluation/__init__.py +1 -1
  52. smftools/machine_learning/evaluation/eval_utils.py +11 -14
  53. smftools/machine_learning/evaluation/evaluators.py +46 -33
  54. smftools/machine_learning/inference/__init__.py +1 -1
  55. smftools/machine_learning/inference/inference_utils.py +7 -4
  56. smftools/machine_learning/inference/lightning_inference.py +9 -13
  57. smftools/machine_learning/inference/sklearn_inference.py +6 -8
  58. smftools/machine_learning/inference/sliding_window_inference.py +35 -25
  59. smftools/machine_learning/models/__init__.py +10 -5
  60. smftools/machine_learning/models/base.py +28 -42
  61. smftools/machine_learning/models/cnn.py +15 -11
  62. smftools/machine_learning/models/lightning_base.py +71 -40
  63. smftools/machine_learning/models/mlp.py +13 -4
  64. smftools/machine_learning/models/positional.py +3 -2
  65. smftools/machine_learning/models/rnn.py +3 -2
  66. smftools/machine_learning/models/sklearn_models.py +39 -22
  67. smftools/machine_learning/models/transformer.py +68 -53
  68. smftools/machine_learning/models/wrappers.py +2 -1
  69. smftools/machine_learning/training/__init__.py +2 -2
  70. smftools/machine_learning/training/train_lightning_model.py +29 -20
  71. smftools/machine_learning/training/train_sklearn_model.py +9 -15
  72. smftools/machine_learning/utils/__init__.py +1 -1
  73. smftools/machine_learning/utils/device.py +7 -4
  74. smftools/machine_learning/utils/grl.py +3 -1
  75. smftools/metadata.py +443 -0
  76. smftools/plotting/__init__.py +19 -5
  77. smftools/plotting/autocorrelation_plotting.py +145 -44
  78. smftools/plotting/classifiers.py +162 -72
  79. smftools/plotting/general_plotting.py +347 -168
  80. smftools/plotting/hmm_plotting.py +42 -13
  81. smftools/plotting/position_stats.py +145 -85
  82. smftools/plotting/qc_plotting.py +20 -12
  83. smftools/preprocessing/__init__.py +8 -8
  84. smftools/preprocessing/append_base_context.py +105 -79
  85. smftools/preprocessing/append_binary_layer_by_base_context.py +75 -37
  86. smftools/preprocessing/{archives → archived}/calculate_complexity.py +3 -1
  87. smftools/preprocessing/{archives → archived}/preprocessing.py +8 -6
  88. smftools/preprocessing/binarize.py +21 -4
  89. smftools/preprocessing/binarize_on_Youden.py +127 -31
  90. smftools/preprocessing/binary_layers_to_ohe.py +17 -11
  91. smftools/preprocessing/calculate_complexity_II.py +86 -59
  92. smftools/preprocessing/calculate_consensus.py +28 -19
  93. smftools/preprocessing/calculate_coverage.py +44 -22
  94. smftools/preprocessing/calculate_pairwise_differences.py +2 -1
  95. smftools/preprocessing/calculate_pairwise_hamming_distances.py +4 -3
  96. smftools/preprocessing/calculate_position_Youden.py +103 -55
  97. smftools/preprocessing/calculate_read_length_stats.py +52 -23
  98. smftools/preprocessing/calculate_read_modification_stats.py +91 -57
  99. smftools/preprocessing/clean_NaN.py +38 -28
  100. smftools/preprocessing/filter_adata_by_nan_proportion.py +24 -12
  101. smftools/preprocessing/filter_reads_on_length_quality_mapping.py +70 -37
  102. smftools/preprocessing/filter_reads_on_modification_thresholds.py +181 -73
  103. smftools/preprocessing/flag_duplicate_reads.py +688 -271
  104. smftools/preprocessing/invert_adata.py +26 -11
  105. smftools/preprocessing/load_sample_sheet.py +40 -22
  106. smftools/preprocessing/make_dirs.py +8 -3
  107. smftools/preprocessing/min_non_diagonal.py +2 -1
  108. smftools/preprocessing/recipes.py +56 -23
  109. smftools/preprocessing/reindex_references_adata.py +93 -27
  110. smftools/preprocessing/subsample_adata.py +33 -16
  111. smftools/readwrite.py +264 -109
  112. smftools/schema/__init__.py +11 -0
  113. smftools/schema/anndata_schema_v1.yaml +227 -0
  114. smftools/tools/__init__.py +3 -4
  115. smftools/tools/archived/classifiers.py +163 -0
  116. smftools/tools/archived/subset_adata_v1.py +10 -1
  117. smftools/tools/archived/subset_adata_v2.py +12 -1
  118. smftools/tools/calculate_umap.py +54 -15
  119. smftools/tools/cluster_adata_on_methylation.py +115 -46
  120. smftools/tools/general_tools.py +70 -25
  121. smftools/tools/position_stats.py +229 -98
  122. smftools/tools/read_stats.py +50 -29
  123. smftools/tools/spatial_autocorrelation.py +365 -192
  124. smftools/tools/subset_adata.py +23 -21
  125. {smftools-0.2.4.dist-info → smftools-0.2.5.dist-info}/METADATA +15 -43
  126. smftools-0.2.5.dist-info/RECORD +181 -0
  127. smftools-0.2.4.dist-info/RECORD +0 -176
  128. /smftools/preprocessing/{archives → archived}/add_read_length_and_mapping_qc.py +0 -0
  129. /smftools/preprocessing/{archives → archived}/mark_duplicates.py +0 -0
  130. /smftools/preprocessing/{archives → archived}/remove_duplicates.py +0 -0
  131. {smftools-0.2.4.dist-info → smftools-0.2.5.dist-info}/WHEEL +0 -0
  132. {smftools-0.2.4.dist-info → smftools-0.2.5.dist-info}/entry_points.txt +0 -0
  133. {smftools-0.2.4.dist-info → smftools-0.2.5.dist-info}/licenses/LICENSE +0 -0
smftools/metadata.py ADDED
@@ -0,0 +1,443 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ import platform
5
+ import subprocess
6
+ import sys
7
+ from datetime import datetime, timezone
8
+ from importlib.metadata import PackageNotFoundError, version
9
+ from pathlib import Path
10
+ from typing import Any, Iterable, Optional
11
+
12
+ from ._version import __version__
13
+ from .schema import SCHEMA_REGISTRY_RESOURCE, SCHEMA_REGISTRY_VERSION
14
+
15
+ _DEPENDENCIES = ("anndata", "numpy", "pandas", "scanpy", "torch")
16
+
17
+
18
+ def _iso_timestamp() -> str:
19
+ return datetime.now(timezone.utc).astimezone().isoformat()
20
+
21
+
22
+ def _safe_version(package_name: str) -> Optional[str]:
23
+ try:
24
+ return version(package_name)
25
+ except PackageNotFoundError:
26
+ return None
27
+
28
+
29
+ def _find_git_root(start: Path) -> Optional[Path]:
30
+ for candidate in [start, *start.parents]:
31
+ if (candidate / ".git").exists():
32
+ return candidate
33
+ return None
34
+
35
+
36
+ def _get_git_commit() -> Optional[str]:
37
+ root = _find_git_root(Path(__file__).resolve())
38
+ if root is None:
39
+ return None
40
+ try:
41
+ result = subprocess.run(
42
+ ["git", "rev-parse", "HEAD"],
43
+ cwd=root,
44
+ capture_output=True,
45
+ text=True,
46
+ check=False,
47
+ )
48
+ except OSError:
49
+ return None
50
+ if result.returncode != 0:
51
+ return None
52
+ return result.stdout.strip() or None
53
+
54
+
55
+ def _hash_file(path: Path, *, max_full_bytes: int = 50 * 1024 * 1024) -> dict[str, Any]:
56
+ stat = path.stat()
57
+ size = stat.st_size
58
+ mtime = datetime.fromtimestamp(stat.st_mtime, tz=timezone.utc).isoformat()
59
+ hasher = hashlib.sha256()
60
+ hash_mode = "full"
61
+ hash_bytes = 0
62
+ chunk_size = 1024 * 1024
63
+
64
+ with path.open("rb") as handle:
65
+ if size <= max_full_bytes:
66
+ for chunk in iter(lambda: handle.read(1024 * 1024), b""):
67
+ hasher.update(chunk)
68
+ hash_bytes += len(chunk)
69
+ else:
70
+ hash_mode = "head_tail_1mb"
71
+ head = handle.read(chunk_size)
72
+ hasher.update(head)
73
+ hash_bytes += len(head)
74
+ if size > chunk_size:
75
+ handle.seek(max(size - chunk_size, 0))
76
+ tail = handle.read(chunk_size)
77
+ hasher.update(tail)
78
+ hash_bytes += len(tail)
79
+
80
+ return {
81
+ "size": size,
82
+ "mtime": mtime,
83
+ "hash": hasher.hexdigest(),
84
+ "hash_algorithm": "sha256",
85
+ "hash_mode": hash_mode,
86
+ "hash_bytes": hash_bytes,
87
+ }
88
+
89
+
90
+ def _path_record(path: Path, role: Optional[str] = None) -> dict[str, Any]:
91
+ record: dict[str, Any] = {"path": str(path)}
92
+ if role:
93
+ record["role"] = role
94
+ if not path.exists():
95
+ record["exists"] = False
96
+ return record
97
+
98
+ record["exists"] = True
99
+ if path.is_dir():
100
+ stat = path.stat()
101
+ record["mtime"] = datetime.fromtimestamp(stat.st_mtime, tz=timezone.utc).isoformat()
102
+ record["type"] = "directory"
103
+ return record
104
+
105
+ record["type"] = "file"
106
+ record.update(_hash_file(path))
107
+ return record
108
+
109
+
110
+ def _normalize_paths(paths: Optional[Iterable[Path | str]]) -> list[Path]:
111
+ if not paths:
112
+ return []
113
+ normalized = []
114
+ for path in paths:
115
+ if path is None:
116
+ continue
117
+ normalized.append(Path(path))
118
+ return normalized
119
+
120
+
121
+ def _environment_snapshot() -> dict[str, Any]:
122
+ dependencies = {name: version for name in _DEPENDENCIES if (version := _safe_version(name))}
123
+ return {
124
+ "smftools_version": __version__,
125
+ "python_version": platform.python_version(),
126
+ "platform": platform.platform(),
127
+ "system": platform.system(),
128
+ "release": platform.release(),
129
+ "machine": platform.machine(),
130
+ "dependencies": dependencies,
131
+ }
132
+
133
+
134
+ def _infer_dtype(value: Any) -> str:
135
+ if hasattr(value, "dtype"):
136
+ return str(value.dtype)
137
+ if hasattr(value, "dtypes"):
138
+ try:
139
+ return ",".join(str(dt) for dt in value.dtypes)
140
+ except TypeError:
141
+ return str(value.dtypes)
142
+ return type(value).__name__
143
+
144
+
145
+ def _schema_snapshot(adata) -> dict[str, Any]:
146
+ layers = {
147
+ name: {"dtype": _infer_dtype(matrix), "shape": list(matrix.shape)}
148
+ for name, matrix in adata.layers.items()
149
+ }
150
+ obsm = {
151
+ name: {"dtype": _infer_dtype(matrix), "shape": list(matrix.shape)}
152
+ for name, matrix in adata.obsm.items()
153
+ }
154
+ obsp = {
155
+ name: {"dtype": _infer_dtype(matrix), "shape": list(matrix.shape)}
156
+ for name, matrix in adata.obsp.items()
157
+ }
158
+ return {
159
+ "layers": layers,
160
+ "obs": {name: {"dtype": str(adata.obs[name].dtype)} for name in adata.obs.columns},
161
+ "var": {name: {"dtype": str(adata.var[name].dtype)} for name in adata.var.columns},
162
+ "obsm": obsm,
163
+ "obsp": obsp,
164
+ "uns_keys": sorted(adata.uns.keys()),
165
+ }
166
+
167
+
168
+ def _runtime_schema_entries(items: dict[str, Any]) -> dict[str, dict[str, Any]]:
169
+ return {
170
+ key: {
171
+ "dtype": _infer_dtype(value),
172
+ "created_by": "runtime_snapshot",
173
+ "modified_by": [],
174
+ "notes": "",
175
+ "requires": [],
176
+ "optional_inputs": [],
177
+ }
178
+ for key, value in items.items()
179
+ }
180
+
181
+
182
+ def _runtime_schema_dict(adata, step_name: str, output_path: Optional[Path] = None) -> dict:
183
+ return {
184
+ "schema_version": "runtime-1",
185
+ "description": "Runtime AnnData schema snapshot (auto-generated).",
186
+ "generated_at": _iso_timestamp(),
187
+ "output_path": str(output_path) if output_path else None,
188
+ "stages": {
189
+ step_name: {
190
+ "stage_requires": [],
191
+ "obs": _runtime_schema_entries(
192
+ {name: adata.obs[name] for name in adata.obs.columns}
193
+ ),
194
+ "var": _runtime_schema_entries(
195
+ {name: adata.var[name] for name in adata.var.columns}
196
+ ),
197
+ "obsm": _runtime_schema_entries(dict(adata.obsm.items())),
198
+ "varm": _runtime_schema_entries(dict(adata.varm.items())),
199
+ "layers": _runtime_schema_entries(dict(adata.layers.items())),
200
+ "obsp": _runtime_schema_entries(dict(adata.obsp.items())),
201
+ "uns": _runtime_schema_entries(dict(adata.uns.items())),
202
+ }
203
+ },
204
+ }
205
+
206
+
207
+ def append_runtime_schema_entry(
208
+ adata,
209
+ *,
210
+ stage: str,
211
+ location: str,
212
+ key: str,
213
+ created_by: str,
214
+ used_structures: Optional[list[str]] = None,
215
+ notes: Optional[str] = None,
216
+ ) -> None:
217
+ """Append a runtime schema entry describing a newly created structure.
218
+
219
+ Args:
220
+ adata: AnnData object to annotate.
221
+ stage: Pipeline stage name (e.g. "load", "preprocess").
222
+ location: AnnData slot ("obs", "var", "layers", "obsm", "varm", "obsp", "uns").
223
+ key: Name of the structure within the slot.
224
+ created_by: Function or module responsible for creating the structure.
225
+ used_structures: List of structures consumed to create this structure.
226
+ notes: Optional notes (e.g., first line of a docstring).
227
+ """
228
+ smftools_uns = adata.uns.setdefault("smftools", {})
229
+ runtime_schema = smftools_uns.setdefault(
230
+ "runtime_schema",
231
+ {
232
+ "schema_version": "runtime-1",
233
+ "description": "Runtime AnnData schema annotations (recorded during execution).",
234
+ "generated_at": _iso_timestamp(),
235
+ "stages": {},
236
+ },
237
+ )
238
+ stages = runtime_schema.setdefault("stages", {})
239
+ stage_block = stages.setdefault(stage, {})
240
+ slot_block = stage_block.setdefault(location, {})
241
+
242
+ value = None
243
+ if location == "obs" and key in adata.obs:
244
+ value = adata.obs[key]
245
+ elif location == "var" and key in adata.var:
246
+ value = adata.var[key]
247
+ elif location == "layers" and key in adata.layers:
248
+ value = adata.layers[key]
249
+ elif location == "obsm" and key in adata.obsm:
250
+ value = adata.obsm[key]
251
+ elif location == "varm" and key in adata.varm:
252
+ value = adata.varm[key]
253
+ elif location == "obsp" and key in adata.obsp:
254
+ value = adata.obsp[key]
255
+ elif location == "uns" and key in adata.uns:
256
+ value = adata.uns[key]
257
+
258
+ slot_block[key] = {
259
+ "dtype": _infer_dtype(value) if value is not None else "unknown",
260
+ "created_by": created_by,
261
+ "used_structures": used_structures or [],
262
+ "notes": notes or "",
263
+ "recorded_at": _iso_timestamp(),
264
+ }
265
+
266
+
267
+ def _format_yaml_value(value: Any) -> str:
268
+ if value is None:
269
+ return "null"
270
+ if isinstance(value, bool):
271
+ return "true" if value else "false"
272
+ if isinstance(value, (int, float)):
273
+ return str(value)
274
+ if isinstance(value, str):
275
+ escaped = value.replace('"', '\\"')
276
+ return f'"{escaped}"'
277
+ return f'"{str(value)}"'
278
+
279
+
280
+ def _dump_yaml(data: Any, indent: int = 0) -> str:
281
+ space = " " * indent
282
+ if isinstance(data, dict):
283
+ lines = []
284
+ for key, value in data.items():
285
+ if isinstance(value, (dict, list)):
286
+ lines.append(f"{space}{key}:")
287
+ lines.append(_dump_yaml(value, indent + 1))
288
+ else:
289
+ lines.append(f"{space}{key}: {_format_yaml_value(value)}")
290
+ return "\n".join(lines)
291
+ if isinstance(data, list):
292
+ lines = []
293
+ for item in data:
294
+ if isinstance(item, (dict, list)):
295
+ lines.append(f"{space}-")
296
+ lines.append(_dump_yaml(item, indent + 1))
297
+ else:
298
+ lines.append(f"{space}- {_format_yaml_value(item)}")
299
+ return "\n".join(lines)
300
+ return f"{space}{_format_yaml_value(data)}"
301
+
302
+
303
+ def _schema_sidecar_path(output_path: Path) -> Path:
304
+ name = output_path.name
305
+ if name.endswith(".h5ad.gz"):
306
+ base = name[: -len(".h5ad.gz")]
307
+ elif name.endswith(".h5ad"):
308
+ base = name[: -len(".h5ad")]
309
+ else:
310
+ base = output_path.stem
311
+ return output_path.with_name(f"{base}.schema.yaml")
312
+
313
+
314
+ def write_runtime_schema_yaml(adata, output_path: Path, step_name: str) -> Path:
315
+ runtime_schema = adata.uns.get("smftools", {}).get("runtime_schema")
316
+ if isinstance(runtime_schema, dict):
317
+ schema_dict = dict(runtime_schema)
318
+ schema_dict.setdefault("output_path", str(output_path))
319
+ schema_dict.setdefault("generated_at", _iso_timestamp())
320
+ schema_dict.setdefault("schema_version", "runtime-1")
321
+ schema_dict.setdefault(
322
+ "description", "Runtime AnnData schema annotations (recorded during execution)."
323
+ )
324
+ else:
325
+ schema_dict = _runtime_schema_dict(adata, step_name, output_path=output_path)
326
+ yaml_text = _dump_yaml(schema_dict)
327
+ schema_path = _schema_sidecar_path(output_path)
328
+ schema_path.write_text(yaml_text + "\n", encoding="utf-8")
329
+ return schema_path
330
+
331
+
332
+ def _append_unique_inputs(existing: list[dict[str, Any]], new_inputs: list[dict[str, Any]]) -> None:
333
+ seen = {
334
+ (item.get("path"), item.get("hash"), item.get("hash_mode"))
335
+ for item in existing
336
+ if item.get("path")
337
+ }
338
+ for item in new_inputs:
339
+ key = (item.get("path"), item.get("hash"), item.get("hash_mode"))
340
+ if key in seen:
341
+ continue
342
+ existing.append(item)
343
+ seen.add(key)
344
+
345
+
346
+ def record_smftools_metadata(
347
+ adata,
348
+ *,
349
+ step_name: str,
350
+ cfg: Optional[Any] = None,
351
+ config_path: Optional[str | Path] = None,
352
+ input_paths: Optional[Iterable[Path | str]] = None,
353
+ output_path: Optional[Path | str] = None,
354
+ status: str = "ok",
355
+ cli_argv: Optional[list[str]] = None,
356
+ ) -> None:
357
+ """Record structured smftools metadata into AnnData.uns.
358
+
359
+ Args:
360
+ adata: AnnData object to update.
361
+ step_name: Pipeline step name (e.g. "load", "preprocess").
362
+ cfg: Optional ExperimentConfig to capture resolved params.
363
+ config_path: Path to the experiment config file used.
364
+ input_paths: Optional iterable of input artifacts (e.g. h5ad inputs).
365
+ output_path: Optional output path written by this step.
366
+ status: Step status string ("ok" or "failed").
367
+ cli_argv: Optional command argument vector for provenance.
368
+ """
369
+ smftools_uns = adata.uns.setdefault("smftools", {})
370
+ timestamp = _iso_timestamp()
371
+
372
+ if "created_by" not in smftools_uns:
373
+ smftools_uns["created_by"] = {
374
+ "version": __version__,
375
+ "time": timestamp,
376
+ "git_commit": _get_git_commit(),
377
+ }
378
+
379
+ smftools_uns.setdefault("environment", _environment_snapshot())
380
+ smftools_uns.setdefault("schema_version", "1")
381
+ smftools_uns.setdefault("schema_registry_version", SCHEMA_REGISTRY_VERSION)
382
+ smftools_uns.setdefault(
383
+ "schema_registry_resource",
384
+ f"smftools.schema:{SCHEMA_REGISTRY_RESOURCE}",
385
+ )
386
+ smftools_uns["schema"] = _schema_snapshot(adata)
387
+
388
+ provenance = smftools_uns.setdefault("provenance", {})
389
+ inputs = provenance.setdefault("inputs", [])
390
+
391
+ input_records: list[dict[str, Any]] = []
392
+ if config_path:
393
+ input_records.append(_path_record(Path(config_path), role="config"))
394
+ if cfg is not None:
395
+ cfg_paths = _normalize_paths(
396
+ [
397
+ cfg.input_data_path,
398
+ cfg.fasta,
399
+ cfg.sample_sheet_path,
400
+ cfg.summary_file,
401
+ ]
402
+ )
403
+ input_records.extend(_path_record(path, role="input") for path in cfg_paths)
404
+ input_records.extend(
405
+ _path_record(path, role="input") for path in _normalize_paths(cfg.input_files)
406
+ )
407
+ if input_paths:
408
+ input_records.extend(_path_record(Path(path), role="input") for path in input_paths)
409
+
410
+ _append_unique_inputs(inputs, input_records)
411
+
412
+ outputs: dict[str, Any] = {
413
+ "layers": sorted(adata.layers.keys()),
414
+ "obs_columns": sorted(adata.obs.columns),
415
+ "var_columns": sorted(adata.var.columns),
416
+ "uns_keys": sorted(adata.uns.keys()),
417
+ "obsm_keys": sorted(adata.obsm.keys()),
418
+ "obsp_keys": sorted(adata.obsp.keys()),
419
+ }
420
+ if output_path is not None:
421
+ out_path = Path(output_path)
422
+ outputs["h5ad_path"] = str(out_path)
423
+ outputs["schema_yaml_path"] = str(_schema_sidecar_path(out_path))
424
+
425
+ runtime = {"device": getattr(cfg, "device", None), "threads": getattr(cfg, "threads", None)}
426
+ if cli_argv is None:
427
+ cli_argv = list(sys.argv)
428
+
429
+ step_record = {
430
+ "id": hashlib.sha1(f"{step_name}-{timestamp}".encode("utf-8")).hexdigest(),
431
+ "time": timestamp,
432
+ "step": step_name,
433
+ "smftools_version": __version__,
434
+ "params": cfg.to_dict() if cfg is not None else None,
435
+ "inputs": input_records,
436
+ "outputs": outputs,
437
+ "runtime": runtime,
438
+ "status": status,
439
+ "cli_argv": cli_argv,
440
+ }
441
+
442
+ history = smftools_uns.setdefault("history", [])
443
+ history.append(step_record)
@@ -1,8 +1,22 @@
1
1
  from .autocorrelation_plotting import *
2
+ from .classifiers import (
3
+ plot_feature_importances_or_saliency,
4
+ plot_model_curves_from_adata,
5
+ plot_model_curves_from_adata_with_frequency_grid,
6
+ plot_model_performance,
7
+ )
8
+ from .general_plotting import (
9
+ combined_hmm_raw_clustermap,
10
+ combined_raw_clustermap,
11
+ plot_hmm_layers_rolling_by_sample_ref,
12
+ )
2
13
  from .hmm_plotting import *
3
- from .position_stats import plot_bar_relative_risk, plot_volcano_relative_risk, plot_positionwise_matrix, plot_positionwise_matrix_grid
4
- from .general_plotting import combined_hmm_raw_clustermap, combined_raw_clustermap, plot_hmm_layers_rolling_by_sample_ref
5
- from .classifiers import plot_model_performance, plot_feature_importances_or_saliency, plot_model_curves_from_adata, plot_model_curves_from_adata_with_frequency_grid
14
+ from .position_stats import (
15
+ plot_bar_relative_risk,
16
+ plot_positionwise_matrix,
17
+ plot_positionwise_matrix_grid,
18
+ plot_volcano_relative_risk,
19
+ )
6
20
  from .qc_plotting import *
7
21
 
8
22
  __all__ = [
@@ -14,5 +28,5 @@ __all__ = [
14
28
  "plot_feature_importances_or_saliency",
15
29
  "plot_model_performance",
16
30
  "plot_model_curves_from_adata",
17
- "plot_model_curves_from_adata_with_frequency_grid"
18
- ]
31
+ "plot_model_curves_from_adata_with_frequency_grid",
32
+ ]