smftools 0.2.3__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- smftools/_version.py +1 -1
- smftools/cli/helpers.py +48 -0
- smftools/cli/hmm_adata.py +168 -145
- smftools/cli/load_adata.py +155 -95
- smftools/cli/preprocess_adata.py +222 -130
- smftools/cli/spatial_adata.py +441 -308
- smftools/cli_entry.py +4 -5
- smftools/config/conversion.yaml +12 -5
- smftools/config/deaminase.yaml +11 -9
- smftools/config/default.yaml +123 -19
- smftools/config/direct.yaml +3 -0
- smftools/config/experiment_config.py +120 -19
- smftools/hmm/HMM.py +12 -1
- smftools/hmm/__init__.py +0 -6
- smftools/hmm/archived/call_hmm_peaks.py +106 -0
- smftools/hmm/call_hmm_peaks.py +318 -90
- smftools/informatics/bam_functions.py +28 -29
- smftools/informatics/h5ad_functions.py +1 -1
- smftools/plotting/general_plotting.py +97 -51
- smftools/plotting/position_stats.py +3 -3
- smftools/preprocessing/__init__.py +2 -4
- smftools/preprocessing/append_base_context.py +34 -25
- smftools/preprocessing/append_binary_layer_by_base_context.py +2 -2
- smftools/preprocessing/binarize_on_Youden.py +10 -8
- smftools/preprocessing/calculate_complexity_II.py +1 -1
- smftools/preprocessing/calculate_coverage.py +16 -13
- smftools/preprocessing/calculate_position_Youden.py +41 -25
- smftools/preprocessing/calculate_read_modification_stats.py +1 -1
- smftools/preprocessing/filter_reads_on_length_quality_mapping.py +1 -1
- smftools/preprocessing/filter_reads_on_modification_thresholds.py +1 -1
- smftools/preprocessing/flag_duplicate_reads.py +1 -1
- smftools/preprocessing/invert_adata.py +1 -1
- smftools/preprocessing/load_sample_sheet.py +1 -1
- smftools/preprocessing/reindex_references_adata.py +37 -0
- smftools/readwrite.py +94 -0
- {smftools-0.2.3.dist-info → smftools-0.2.4.dist-info}/METADATA +18 -12
- {smftools-0.2.3.dist-info → smftools-0.2.4.dist-info}/RECORD +46 -43
- /smftools/cli/{cli_flows.py → archived/cli_flows.py} +0 -0
- /smftools/hmm/{apply_hmm_batched.py → archived/apply_hmm_batched.py} +0 -0
- /smftools/hmm/{calculate_distances.py → archived/calculate_distances.py} +0 -0
- /smftools/hmm/{train_hmm.py → archived/train_hmm.py} +0 -0
- /smftools/preprocessing/{add_read_length_and_mapping_qc.py → archives/add_read_length_and_mapping_qc.py} +0 -0
- /smftools/preprocessing/{calculate_complexity.py → archives/calculate_complexity.py} +0 -0
- {smftools-0.2.3.dist-info → smftools-0.2.4.dist-info}/WHEEL +0 -0
- {smftools-0.2.3.dist-info → smftools-0.2.4.dist-info}/entry_points.txt +0 -0
- {smftools-0.2.3.dist-info → smftools-0.2.4.dist-info}/licenses/LICENSE +0 -0
smftools/_version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.2.
|
|
1
|
+
__version__ = "0.2.4"
|
smftools/cli/helpers.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
import anndata as ad
|
|
4
|
+
from ..readwrite import safe_write_h5ad
|
|
5
|
+
|
|
6
|
+
@dataclass
|
|
7
|
+
class AdataPaths:
|
|
8
|
+
raw: Path
|
|
9
|
+
pp: Path
|
|
10
|
+
pp_dedup: Path
|
|
11
|
+
spatial: Path
|
|
12
|
+
hmm: Path
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def get_adata_paths(cfg) -> AdataPaths:
|
|
16
|
+
"""
|
|
17
|
+
Central helper: given cfg, compute all standard AnnData paths.
|
|
18
|
+
"""
|
|
19
|
+
h5_dir = Path(cfg.output_directory) / "h5ads"
|
|
20
|
+
|
|
21
|
+
raw = h5_dir / f"{cfg.experiment_name}.h5ad.gz"
|
|
22
|
+
|
|
23
|
+
pp = h5_dir / f"{cfg.experiment_name}_preprocessed.h5ad.gz"
|
|
24
|
+
|
|
25
|
+
if cfg.smf_modality == "direct":
|
|
26
|
+
# direct SMF: duplicate-removed path is just preprocessed path
|
|
27
|
+
pp_dedup = pp
|
|
28
|
+
else:
|
|
29
|
+
pp_dedup = h5_dir / f"{cfg.experiment_name}_preprocessed_duplicates_removed.h5ad.gz"
|
|
30
|
+
|
|
31
|
+
pp_dedup_base = pp_dedup.name.removesuffix(".h5ad.gz")
|
|
32
|
+
|
|
33
|
+
spatial = h5_dir / f"{pp_dedup_base}_spatial.h5ad.gz"
|
|
34
|
+
hmm = h5_dir / f"{pp_dedup_base}_spatial_hmm.h5ad.gz"
|
|
35
|
+
|
|
36
|
+
return AdataPaths(
|
|
37
|
+
raw=raw,
|
|
38
|
+
pp=pp,
|
|
39
|
+
pp_dedup=pp_dedup,
|
|
40
|
+
spatial=spatial,
|
|
41
|
+
hmm=hmm,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
def write_gz_h5ad(adata: ad.AnnData, path: Path) -> Path:
|
|
45
|
+
if path.suffix != ".gz":
|
|
46
|
+
path = path.with_name(path.name + ".gz")
|
|
47
|
+
safe_write_h5ad(adata, path, compression="gzip", backup=True)
|
|
48
|
+
return path
|
smftools/cli/hmm_adata.py
CHANGED
|
@@ -63,9 +63,11 @@ def hmm_adata(config_path):
|
|
|
63
63
|
preprocessed_dedup_spatial_version_available = spatial_adata_path.exists()
|
|
64
64
|
preprocessed_dedup_spatial_hmm_version_available = hmm_adata_path.exists()
|
|
65
65
|
|
|
66
|
-
if cfg.force_redo_hmm_fit:
|
|
67
|
-
print(f"Forcing redo of
|
|
68
|
-
if
|
|
66
|
+
if cfg.force_redo_hmm_fit or cfg.force_redo_hmm_apply:
|
|
67
|
+
print(f"Forcing redo of hmm analysis workflow.")
|
|
68
|
+
if preprocessed_dedup_spatial_hmm_version_available:
|
|
69
|
+
adata, load_report = safe_read_h5ad(hmm_adata_path)
|
|
70
|
+
elif preprocessed_dedup_spatial_version_available:
|
|
69
71
|
adata, load_report = safe_read_h5ad(spatial_adata_path)
|
|
70
72
|
elif preprocessed_dup_removed_version_available:
|
|
71
73
|
adata, load_report = safe_read_h5ad(pp_dup_rem_adata_path)
|
|
@@ -74,7 +76,7 @@ def hmm_adata(config_path):
|
|
|
74
76
|
else:
|
|
75
77
|
print(f"Can not redo duplicate detection when there is no compatible adata available: either raw or preprocessed are required")
|
|
76
78
|
elif preprocessed_dedup_spatial_hmm_version_available:
|
|
77
|
-
|
|
79
|
+
adata, load_report = safe_read_h5ad(hmm_adata_path)
|
|
78
80
|
else:
|
|
79
81
|
if preprocessed_dedup_spatial_version_available:
|
|
80
82
|
adata, load_report = safe_read_h5ad(spatial_adata_path)
|
|
@@ -110,96 +112,126 @@ def hmm_adata(config_path):
|
|
|
110
112
|
if adata.uns.get(uns_key) is None:
|
|
111
113
|
adata.uns[uns_key] = []
|
|
112
114
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
mod_label = {'C': 'C'}.get(mod_site, mod_site)
|
|
122
|
-
hmm_path = hmm_dir / f"{sample}_{ref}_{mod_label}_hmm_model.pth"
|
|
123
|
-
|
|
124
|
-
# ensure the input obsm exists
|
|
125
|
-
obsm_key = f'{ref}_{mod_label}_site'
|
|
126
|
-
if obsm_key not in subset.obsm:
|
|
127
|
-
print(f"Skipping {sample} {ref} {mod_label}: missing obsm '{obsm_key}'")
|
|
115
|
+
if adata.uns.get('hmm_annotated', False) and not cfg.force_redo_hmm_fit and not cfg.force_redo_hmm_apply:
|
|
116
|
+
pass
|
|
117
|
+
else:
|
|
118
|
+
for sample in samples:
|
|
119
|
+
for ref in references:
|
|
120
|
+
mask = (adata.obs[cfg.sample_name_col_for_plotting] == sample) & (adata.obs[cfg.reference_column] == ref)
|
|
121
|
+
subset = adata[mask].copy()
|
|
122
|
+
if subset.shape[0] < 1:
|
|
128
123
|
continue
|
|
129
124
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
hmm.print_params()
|
|
134
|
-
else:
|
|
135
|
-
print(f"Fitting HMM for {sample} {ref} {mod_label}")
|
|
136
|
-
hmm = HMM.from_config(cfg)
|
|
137
|
-
# fit expects a list-of-seqs or 2D ndarray in the obsm
|
|
138
|
-
seqs = subset.obsm[obsm_key]
|
|
139
|
-
hmm.fit(seqs)
|
|
140
|
-
hmm.print_params()
|
|
141
|
-
hmm.save(hmm_path)
|
|
142
|
-
|
|
143
|
-
# Apply / annotate on the subset, then copy layers back to final_adata
|
|
144
|
-
if (not cfg.bypass_hmm_apply) or cfg.force_redo_hmm_apply:
|
|
145
|
-
print(f"Applying HMM on subset for {sample} {ref} {mod_label}")
|
|
146
|
-
# Use the new uns_key argument so subset will record appended layer names
|
|
147
|
-
# (annotate_adata modifies subset.obs/layers in-place and should write subset.uns[uns_key])
|
|
148
|
-
hmm.annotate_adata(subset,
|
|
149
|
-
obs_column=cfg.reference_column,
|
|
150
|
-
layer=cfg.layer_for_umap_plotting,
|
|
151
|
-
config=cfg)
|
|
152
|
-
|
|
153
|
-
#to_merge = [("C_all_accessible_features", 80)]
|
|
154
|
-
to_merge = cfg.hmm_merge_layer_features
|
|
155
|
-
for layer_to_merge, merge_distance in to_merge:
|
|
156
|
-
if layer_to_merge:
|
|
157
|
-
hmm.merge_intervals_in_layer(subset,
|
|
158
|
-
layer=layer_to_merge,
|
|
159
|
-
distance_threshold=merge_distance,
|
|
160
|
-
overwrite=True
|
|
161
|
-
)
|
|
162
|
-
else:
|
|
163
|
-
pass
|
|
125
|
+
for mod_site in cfg.hmm_methbases:
|
|
126
|
+
mod_label = {'C': 'C'}.get(mod_site, mod_site)
|
|
127
|
+
hmm_path = hmm_dir / f"{sample}_{ref}_{mod_label}_hmm_model.pth"
|
|
164
128
|
|
|
165
|
-
#
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
# nothing appended for this subset; continue
|
|
129
|
+
# ensure the input obsm exists
|
|
130
|
+
obsm_key = f'{ref}_{mod_label}_site'
|
|
131
|
+
if obsm_key not in subset.obsm:
|
|
132
|
+
print(f"Skipping {sample} {ref} {mod_label}: missing obsm '{obsm_key}'")
|
|
170
133
|
continue
|
|
171
134
|
|
|
172
|
-
#
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
135
|
+
# Fit or load model
|
|
136
|
+
if hmm_path.exists() and not cfg.force_redo_hmm_fit:
|
|
137
|
+
hmm = HMM.load(hmm_path)
|
|
138
|
+
hmm.print_params()
|
|
139
|
+
else:
|
|
140
|
+
print(f"Fitting HMM for {sample} {ref} {mod_label}")
|
|
141
|
+
hmm = HMM.from_config(cfg)
|
|
142
|
+
# fit expects a list-of-seqs or 2D ndarray in the obsm
|
|
143
|
+
seqs = subset.obsm[obsm_key]
|
|
144
|
+
hmm.fit(seqs)
|
|
145
|
+
hmm.print_params()
|
|
146
|
+
hmm.save(hmm_path)
|
|
147
|
+
|
|
148
|
+
# Apply / annotate on the subset, then copy layers back to final_adata
|
|
149
|
+
if cfg.bypass_hmm_apply:
|
|
150
|
+
pass
|
|
151
|
+
else:
|
|
152
|
+
print(f"Applying HMM on subset for {sample} {ref} {mod_label}")
|
|
153
|
+
# Use the new uns_key argument so subset will record appended layer names
|
|
154
|
+
# (annotate_adata modifies subset.obs/layers in-place and should write subset.uns[uns_key])
|
|
155
|
+
if smf_modality == "direct":
|
|
156
|
+
hmm_layer = cfg.output_binary_layer_name
|
|
157
|
+
else:
|
|
158
|
+
hmm_layer = None
|
|
159
|
+
|
|
160
|
+
hmm.annotate_adata(subset,
|
|
161
|
+
obs_column=cfg.reference_column,
|
|
162
|
+
layer=hmm_layer,
|
|
163
|
+
config=cfg,
|
|
164
|
+
force_redo=cfg.force_redo_hmm_apply
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
if adata.uns.get('hmm_annotated', False) and not cfg.force_redo_hmm_apply:
|
|
168
|
+
pass
|
|
169
|
+
else:
|
|
170
|
+
to_merge = cfg.hmm_merge_layer_features
|
|
171
|
+
for layer_to_merge, merge_distance in to_merge:
|
|
172
|
+
if layer_to_merge:
|
|
173
|
+
hmm.merge_intervals_in_layer(subset,
|
|
174
|
+
layer=layer_to_merge,
|
|
175
|
+
distance_threshold=merge_distance,
|
|
176
|
+
overwrite=True
|
|
177
|
+
)
|
|
178
|
+
else:
|
|
179
|
+
pass
|
|
180
|
+
|
|
181
|
+
# collect appended layers from subset.uns
|
|
182
|
+
appended = list(subset.uns.get(uns_key, []))
|
|
183
|
+
print(appended)
|
|
184
|
+
if len(appended) == 0:
|
|
185
|
+
# nothing appended for this subset; continue
|
|
186
|
+
continue
|
|
187
|
+
|
|
188
|
+
# copy each appended layer into adata
|
|
189
|
+
subset_mask_bool = mask.values if hasattr(mask, "values") else np.asarray(mask)
|
|
190
|
+
for layer_name in appended:
|
|
191
|
+
if layer_name not in subset.layers:
|
|
192
|
+
# defensive: skip
|
|
193
|
+
warnings.warn(f"Expected layer {layer_name} in subset but not found; skipping copy.")
|
|
194
|
+
continue
|
|
195
|
+
sub_layer = subset.layers[layer_name]
|
|
196
|
+
# ensure final layer exists and assign rows
|
|
197
|
+
try:
|
|
198
|
+
hmm._ensure_final_layer_and_assign(adata, layer_name, subset_mask_bool, sub_layer)
|
|
199
|
+
except Exception as e:
|
|
200
|
+
warnings.warn(f"Failed to copy layer {layer_name} into adata: {e}", stacklevel=2)
|
|
201
|
+
# fallback: if dense and small, try to coerce
|
|
202
|
+
if issparse(sub_layer):
|
|
203
|
+
arr = sub_layer.toarray()
|
|
204
|
+
else:
|
|
205
|
+
arr = np.asarray(sub_layer)
|
|
206
|
+
adata.layers[layer_name] = adata.layers.get(layer_name, np.zeros((adata.shape[0], arr.shape[1]), dtype=arr.dtype))
|
|
207
|
+
final_idx = np.nonzero(subset_mask_bool)[0]
|
|
208
|
+
adata.layers[layer_name][final_idx, :] = arr
|
|
209
|
+
|
|
210
|
+
# merge appended layer names into adata.uns
|
|
211
|
+
existing = list(adata.uns.get(uns_key, []))
|
|
212
|
+
for ln in appended:
|
|
213
|
+
if ln not in existing:
|
|
214
|
+
existing.append(ln)
|
|
215
|
+
adata.uns[uns_key] = existing
|
|
200
216
|
|
|
201
217
|
else:
|
|
202
218
|
pass
|
|
219
|
+
|
|
220
|
+
from ..hmm import call_hmm_peaks
|
|
221
|
+
hmm_dir = pp_dir / "11_hmm_peak_calling"
|
|
222
|
+
if hmm_dir.is_dir():
|
|
223
|
+
pass
|
|
224
|
+
else:
|
|
225
|
+
make_dirs([pp_dir, hmm_dir])
|
|
226
|
+
|
|
227
|
+
call_hmm_peaks(
|
|
228
|
+
adata,
|
|
229
|
+
feature_configs=cfg.hmm_peak_feature_configs,
|
|
230
|
+
ref_column=cfg.reference_column,
|
|
231
|
+
site_types=cfg.mod_target_bases,
|
|
232
|
+
save_plot=True,
|
|
233
|
+
output_dir=hmm_dir,
|
|
234
|
+
index_col_suffix=cfg.reindexed_var_suffix)
|
|
203
235
|
|
|
204
236
|
## Save HMM annotated adata
|
|
205
237
|
if not hmm_adata_path.exists():
|
|
@@ -215,85 +247,69 @@ def hmm_adata(config_path):
|
|
|
215
247
|
########################################################################################################################
|
|
216
248
|
|
|
217
249
|
############################################### HMM based feature plotting ###############################################
|
|
218
|
-
|
|
219
|
-
hmm_dir = pp_dir / "
|
|
250
|
+
from ..plotting import combined_hmm_raw_clustermap
|
|
251
|
+
hmm_dir = pp_dir / "12_hmm_clustermaps"
|
|
252
|
+
make_dirs([pp_dir, hmm_dir])
|
|
220
253
|
|
|
221
|
-
|
|
222
|
-
print(f'{hmm_dir} already exists.')
|
|
223
|
-
else:
|
|
224
|
-
make_dirs([pp_dir, hmm_dir])
|
|
225
|
-
from ..plotting import combined_hmm_raw_clustermap
|
|
226
|
-
feature_layers = [
|
|
227
|
-
"all_accessible_features",
|
|
228
|
-
"large_accessible_patch",
|
|
229
|
-
"small_bound_stretch",
|
|
230
|
-
"medium_bound_stretch",
|
|
231
|
-
"putative_nucleosome",
|
|
232
|
-
"all_accessible_features_merged",
|
|
233
|
-
]
|
|
234
|
-
|
|
235
|
-
layers: list[str] = []
|
|
236
|
-
|
|
237
|
-
if any(base in ["C", "CpG", "GpC"] for base in cfg.mod_target_bases):
|
|
238
|
-
if smf_modality == 'deaminase':
|
|
239
|
-
layers.extend([f"C_{layer}" for layer in feature_layers])
|
|
240
|
-
elif smf_modality == 'conversion':
|
|
241
|
-
layers.extend([f"GpC_{layer}" for layer in feature_layers])
|
|
242
|
-
|
|
243
|
-
if 'A' in cfg.mod_target_bases:
|
|
244
|
-
layers.extend([f"A_{layer}" for layer in feature_layers])
|
|
245
|
-
|
|
246
|
-
if not layers:
|
|
247
|
-
raise ValueError(
|
|
248
|
-
f"No HMM feature layers matched mod_target_bases={cfg.mod_target_bases} "
|
|
249
|
-
f"and smf_modality={smf_modality}"
|
|
250
|
-
)
|
|
251
|
-
|
|
252
|
-
if smf_modality == 'direct':
|
|
253
|
-
sort_by = "any_a"
|
|
254
|
-
else:
|
|
255
|
-
sort_by = 'gpc'
|
|
254
|
+
layers: list[str] = []
|
|
256
255
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
256
|
+
for base in cfg.hmm_methbases:
|
|
257
|
+
layers.extend([f"{base}_{layer}" for layer in cfg.hmm_clustermap_feature_layers])
|
|
258
|
+
|
|
259
|
+
if cfg.cpg:
|
|
260
|
+
layers.extend(["CpG_cpg_patch"])
|
|
261
|
+
|
|
262
|
+
if not layers:
|
|
263
|
+
raise ValueError(
|
|
264
|
+
f"No HMM feature layers matched mod_target_bases={cfg.mod_target_bases} "
|
|
265
|
+
f"and smf_modality={smf_modality}"
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
for layer in layers:
|
|
269
|
+
hmm_cluster_save_dir = hmm_dir / layer
|
|
270
|
+
if hmm_cluster_save_dir.is_dir():
|
|
271
|
+
pass
|
|
272
|
+
else:
|
|
273
|
+
make_dirs([hmm_cluster_save_dir])
|
|
260
274
|
|
|
261
275
|
combined_hmm_raw_clustermap(
|
|
262
276
|
adata,
|
|
263
277
|
sample_col=cfg.sample_name_col_for_plotting,
|
|
264
278
|
reference_col=cfg.reference_column,
|
|
265
279
|
hmm_feature_layer=layer,
|
|
266
|
-
layer_gpc=
|
|
267
|
-
layer_cpg=
|
|
268
|
-
|
|
269
|
-
layer_a=
|
|
270
|
-
cmap_hmm=
|
|
271
|
-
cmap_gpc=
|
|
272
|
-
cmap_cpg=
|
|
273
|
-
|
|
274
|
-
cmap_a=
|
|
280
|
+
layer_gpc=cfg.layer_for_clustermap_plotting,
|
|
281
|
+
layer_cpg=cfg.layer_for_clustermap_plotting,
|
|
282
|
+
layer_c=cfg.layer_for_clustermap_plotting,
|
|
283
|
+
layer_a=cfg.layer_for_clustermap_plotting,
|
|
284
|
+
cmap_hmm=cfg.clustermap_cmap_hmm,
|
|
285
|
+
cmap_gpc=cfg.clustermap_cmap_gpc,
|
|
286
|
+
cmap_cpg=cfg.clustermap_cmap_cpg,
|
|
287
|
+
cmap_c=cfg.clustermap_cmap_c,
|
|
288
|
+
cmap_a=cfg.clustermap_cmap_a,
|
|
275
289
|
min_quality=cfg.read_quality_filter_thresholds[0],
|
|
276
290
|
min_length=cfg.read_len_filter_thresholds[0],
|
|
277
291
|
min_mapped_length_to_reference_length_ratio=cfg.read_len_to_ref_ratio_filter_thresholds[0],
|
|
278
292
|
min_position_valid_fraction=1-cfg.position_max_nan_threshold,
|
|
279
|
-
save_path=
|
|
293
|
+
save_path=hmm_cluster_save_dir,
|
|
280
294
|
normalize_hmm=False,
|
|
281
|
-
sort_by=
|
|
295
|
+
sort_by=cfg.hmm_clustermap_sortby, # options: 'gpc', 'cpg', 'gpc_cpg', 'none', or 'obs:<column>'
|
|
282
296
|
bins=None,
|
|
283
297
|
deaminase=deaminase,
|
|
284
|
-
min_signal=0
|
|
298
|
+
min_signal=0,
|
|
299
|
+
index_col_suffix=cfg.reindexed_var_suffix
|
|
285
300
|
)
|
|
286
301
|
|
|
287
|
-
hmm_dir = pp_dir / "
|
|
302
|
+
hmm_dir = pp_dir / "13_hmm_bulk_traces"
|
|
288
303
|
|
|
289
304
|
if hmm_dir.is_dir():
|
|
290
305
|
print(f'{hmm_dir} already exists.')
|
|
291
306
|
else:
|
|
292
307
|
make_dirs([pp_dir, hmm_dir])
|
|
293
308
|
from ..plotting import plot_hmm_layers_rolling_by_sample_ref
|
|
309
|
+
bulk_hmm_layers = [layer for layer in adata.uns['hmm_appended_layers'] if "_lengths" not in layer]
|
|
294
310
|
saved = plot_hmm_layers_rolling_by_sample_ref(
|
|
295
311
|
adata,
|
|
296
|
-
layers=
|
|
312
|
+
layers=bulk_hmm_layers,
|
|
297
313
|
sample_col=cfg.sample_name_col_for_plotting,
|
|
298
314
|
ref_col=cfg.reference_column,
|
|
299
315
|
window=101,
|
|
@@ -304,7 +320,7 @@ def hmm_adata(config_path):
|
|
|
304
320
|
show_raw=False
|
|
305
321
|
)
|
|
306
322
|
|
|
307
|
-
hmm_dir = pp_dir / "
|
|
323
|
+
hmm_dir = pp_dir / "14_hmm_fragment_distributions"
|
|
308
324
|
|
|
309
325
|
if hmm_dir.is_dir():
|
|
310
326
|
print(f'{hmm_dir} already exists.')
|
|
@@ -312,7 +328,14 @@ def hmm_adata(config_path):
|
|
|
312
328
|
make_dirs([pp_dir, hmm_dir])
|
|
313
329
|
from ..plotting import plot_hmm_size_contours
|
|
314
330
|
|
|
315
|
-
|
|
331
|
+
if smf_modality == 'deaminase':
|
|
332
|
+
fragments = [('C_all_accessible_features_lengths', 400), ('C_all_footprint_features_lengths', 250), ('C_all_accessible_features_merged_lengths', 800)]
|
|
333
|
+
elif smf_modality == 'conversion':
|
|
334
|
+
fragments = [('GpC_all_accessible_features_lengths', 400), ('GpC_all_footprint_features_lengths', 250), ('GpC_all_accessible_features_merged_lengths', 800)]
|
|
335
|
+
elif smf_modality == "direct":
|
|
336
|
+
fragments = [('A_all_accessible_features_lengths', 400), ('A_all_footprint_features_lengths', 200), ('A_all_accessible_features_merged_lengths', 800)]
|
|
337
|
+
|
|
338
|
+
for layer, max in fragments:
|
|
316
339
|
save_path = hmm_dir / layer
|
|
317
340
|
make_dirs([save_path])
|
|
318
341
|
|
|
@@ -328,9 +351,9 @@ def hmm_adata(config_path):
|
|
|
328
351
|
save_pdf=False,
|
|
329
352
|
save_each_page=True,
|
|
330
353
|
dpi=200,
|
|
331
|
-
smoothing_sigma=
|
|
332
|
-
normalize_after_smoothing=
|
|
333
|
-
cmap='
|
|
354
|
+
smoothing_sigma=(10, 10),
|
|
355
|
+
normalize_after_smoothing=True,
|
|
356
|
+
cmap='Greens',
|
|
334
357
|
log_scale_z=True
|
|
335
358
|
)
|
|
336
359
|
########################################################################################################################
|