smftools 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- smftools/_version.py +1 -1
- smftools/cli/chimeric_adata.py +1563 -0
- smftools/cli/helpers.py +18 -2
- smftools/cli/hmm_adata.py +18 -1
- smftools/cli/latent_adata.py +522 -67
- smftools/cli/load_adata.py +2 -2
- smftools/cli/preprocess_adata.py +32 -93
- smftools/cli/recipes.py +26 -0
- smftools/cli/spatial_adata.py +23 -109
- smftools/cli/variant_adata.py +423 -0
- smftools/cli_entry.py +41 -5
- smftools/config/conversion.yaml +0 -10
- smftools/config/deaminase.yaml +3 -0
- smftools/config/default.yaml +49 -13
- smftools/config/experiment_config.py +96 -3
- smftools/constants.py +4 -0
- smftools/hmm/call_hmm_peaks.py +1 -1
- smftools/informatics/binarize_converted_base_identities.py +2 -89
- smftools/informatics/converted_BAM_to_adata.py +53 -13
- smftools/informatics/h5ad_functions.py +83 -0
- smftools/informatics/modkit_extract_to_adata.py +4 -0
- smftools/plotting/__init__.py +26 -12
- smftools/plotting/autocorrelation_plotting.py +22 -4
- smftools/plotting/chimeric_plotting.py +1893 -0
- smftools/plotting/classifiers.py +28 -14
- smftools/plotting/general_plotting.py +58 -3362
- smftools/plotting/hmm_plotting.py +1586 -2
- smftools/plotting/latent_plotting.py +804 -0
- smftools/plotting/plotting_utils.py +243 -0
- smftools/plotting/position_stats.py +16 -8
- smftools/plotting/preprocess_plotting.py +281 -0
- smftools/plotting/qc_plotting.py +8 -3
- smftools/plotting/spatial_plotting.py +1134 -0
- smftools/plotting/variant_plotting.py +1231 -0
- smftools/preprocessing/__init__.py +3 -0
- smftools/preprocessing/append_base_context.py +1 -1
- smftools/preprocessing/append_mismatch_frequency_sites.py +35 -6
- smftools/preprocessing/append_sequence_mismatch_annotations.py +171 -0
- smftools/preprocessing/append_variant_call_layer.py +480 -0
- smftools/preprocessing/flag_duplicate_reads.py +4 -4
- smftools/preprocessing/invert_adata.py +1 -0
- smftools/readwrite.py +109 -85
- smftools/tools/__init__.py +6 -0
- smftools/tools/calculate_knn.py +121 -0
- smftools/tools/calculate_nmf.py +18 -7
- smftools/tools/calculate_pca.py +180 -0
- smftools/tools/calculate_umap.py +70 -154
- smftools/tools/position_stats.py +4 -4
- smftools/tools/rolling_nn_distance.py +640 -3
- smftools/tools/sequence_alignment.py +140 -0
- smftools/tools/tensor_factorization.py +52 -4
- {smftools-0.3.1.dist-info → smftools-0.3.2.dist-info}/METADATA +3 -1
- {smftools-0.3.1.dist-info → smftools-0.3.2.dist-info}/RECORD +56 -42
- {smftools-0.3.1.dist-info → smftools-0.3.2.dist-info}/WHEEL +0 -0
- {smftools-0.3.1.dist-info → smftools-0.3.2.dist-info}/entry_points.txt +0 -0
- {smftools-0.3.1.dist-info → smftools-0.3.2.dist-info}/licenses/LICENSE +0 -0
smftools/cli/helpers.py
CHANGED
|
@@ -5,7 +5,16 @@ from pathlib import Path
|
|
|
5
5
|
|
|
6
6
|
import anndata as ad
|
|
7
7
|
|
|
8
|
-
from smftools.constants import
|
|
8
|
+
from smftools.constants import (
|
|
9
|
+
CHIMERIC_DIR,
|
|
10
|
+
H5_DIR,
|
|
11
|
+
HMM_DIR,
|
|
12
|
+
LATENT_DIR,
|
|
13
|
+
LOAD_DIR,
|
|
14
|
+
PREPROCESS_DIR,
|
|
15
|
+
SPATIAL_DIR,
|
|
16
|
+
VARIANT_DIR,
|
|
17
|
+
)
|
|
9
18
|
|
|
10
19
|
from ..metadata import write_runtime_schema_yaml
|
|
11
20
|
from ..readwrite import safe_write_h5ad
|
|
@@ -19,6 +28,8 @@ class AdataPaths:
|
|
|
19
28
|
spatial: Path
|
|
20
29
|
hmm: Path
|
|
21
30
|
latent: Path
|
|
31
|
+
variant: Path
|
|
32
|
+
chimeric: Path
|
|
22
33
|
|
|
23
34
|
|
|
24
35
|
def get_adata_paths(cfg) -> AdataPaths:
|
|
@@ -27,8 +38,8 @@ def get_adata_paths(cfg) -> AdataPaths:
|
|
|
27
38
|
"""
|
|
28
39
|
output_directory = Path(cfg.output_directory)
|
|
29
40
|
|
|
41
|
+
# Raw and Preprocessed adata file pathes will have set names.
|
|
30
42
|
raw = output_directory / LOAD_DIR / H5_DIR / f"{cfg.experiment_name}.h5ad.gz"
|
|
31
|
-
|
|
32
43
|
pp = output_directory / PREPROCESS_DIR / H5_DIR / f"{cfg.experiment_name}_preprocessed.h5ad.gz"
|
|
33
44
|
|
|
34
45
|
if cfg.smf_modality == "direct":
|
|
@@ -44,9 +55,12 @@ def get_adata_paths(cfg) -> AdataPaths:
|
|
|
44
55
|
|
|
45
56
|
pp_dedup_base = pp_dedup.name.removesuffix(".h5ad.gz")
|
|
46
57
|
|
|
58
|
+
# All of the following just append a new suffix to the preprocessesed_deduplicated base name
|
|
47
59
|
spatial = output_directory / SPATIAL_DIR / H5_DIR / f"{pp_dedup_base}_spatial.h5ad.gz"
|
|
48
60
|
hmm = output_directory / HMM_DIR / H5_DIR / f"{pp_dedup_base}_hmm.h5ad.gz"
|
|
49
61
|
latent = output_directory / LATENT_DIR / H5_DIR / f"{pp_dedup_base}_latent.h5ad.gz"
|
|
62
|
+
variant = output_directory / VARIANT_DIR / H5_DIR / f"{pp_dedup_base}_variant.h5ad.gz"
|
|
63
|
+
chimeric = output_directory / CHIMERIC_DIR / H5_DIR / f"{pp_dedup_base}_chimeric.h5ad.gz"
|
|
50
64
|
|
|
51
65
|
return AdataPaths(
|
|
52
66
|
raw=raw,
|
|
@@ -55,6 +69,8 @@ def get_adata_paths(cfg) -> AdataPaths:
|
|
|
55
69
|
spatial=spatial,
|
|
56
70
|
hmm=hmm,
|
|
57
71
|
latent=latent,
|
|
72
|
+
variant=variant,
|
|
73
|
+
chimeric=chimeric,
|
|
58
74
|
)
|
|
59
75
|
|
|
60
76
|
|
smftools/cli/hmm_adata.py
CHANGED
|
@@ -18,7 +18,7 @@ from ..hmm.HMM import _safe_int_coords, _to_dense_np, create_hmm, normalize_hmm_
|
|
|
18
18
|
logger = get_logger(__name__)
|
|
19
19
|
|
|
20
20
|
if TYPE_CHECKING:
|
|
21
|
-
import torch
|
|
21
|
+
import torch
|
|
22
22
|
|
|
23
23
|
torch = require("torch", extra="torch", purpose="HMM CLI")
|
|
24
24
|
mpl = require("matplotlib", extra="plotting", purpose="HMM plotting")
|
|
@@ -586,9 +586,18 @@ def hmm_adata(config_path: str):
|
|
|
586
586
|
if paths.hmm.exists():
|
|
587
587
|
adata, _ = safe_read_h5ad(paths.hmm)
|
|
588
588
|
source_path = paths.hmm
|
|
589
|
+
elif paths.latent.exists():
|
|
590
|
+
adata, _ = safe_read_h5ad(paths.latent)
|
|
591
|
+
source_path = paths.latent
|
|
589
592
|
elif paths.spatial.exists():
|
|
590
593
|
adata, _ = safe_read_h5ad(paths.spatial)
|
|
591
594
|
source_path = paths.spatial
|
|
595
|
+
elif paths.chimeric.exists():
|
|
596
|
+
adata, _ = safe_read_h5ad(paths.chimeric)
|
|
597
|
+
source_path = paths.chimeric
|
|
598
|
+
elif paths.variant.exists():
|
|
599
|
+
adata, _ = safe_read_h5ad(paths.variant)
|
|
600
|
+
source_path = paths.variant
|
|
592
601
|
elif paths.pp_dedup.exists():
|
|
593
602
|
adata, _ = safe_read_h5ad(paths.pp_dedup)
|
|
594
603
|
source_path = paths.pp_dedup
|
|
@@ -1080,6 +1089,10 @@ def hmm_adata_core(
|
|
|
1080
1089
|
deaminase=deaminase,
|
|
1081
1090
|
min_signal=0,
|
|
1082
1091
|
index_col_suffix=cfg.reindexed_var_suffix,
|
|
1092
|
+
overlay_variant_calls=getattr(cfg, "overlay_variant_calls", False),
|
|
1093
|
+
variant_overlay_seq1_color=getattr(cfg, "variant_overlay_seq1_color", "white"),
|
|
1094
|
+
variant_overlay_seq2_color=getattr(cfg, "variant_overlay_seq2_color", "black"),
|
|
1095
|
+
variant_overlay_marker_size=getattr(cfg, "variant_overlay_marker_size", 4.0),
|
|
1083
1096
|
)
|
|
1084
1097
|
|
|
1085
1098
|
hmm_length_dir = hmm_directory / "12b_hmm_length_clustermaps"
|
|
@@ -1136,6 +1149,10 @@ def hmm_adata_core(
|
|
|
1136
1149
|
min_signal=0,
|
|
1137
1150
|
index_col_suffix=cfg.reindexed_var_suffix,
|
|
1138
1151
|
length_feature_ranges=length_feature_ranges,
|
|
1152
|
+
overlay_variant_calls=getattr(cfg, "overlay_variant_calls", False),
|
|
1153
|
+
variant_overlay_seq1_color=getattr(cfg, "variant_overlay_seq1_color", "white"),
|
|
1154
|
+
variant_overlay_seq2_color=getattr(cfg, "variant_overlay_seq2_color", "black"),
|
|
1155
|
+
variant_overlay_marker_size=getattr(cfg, "variant_overlay_marker_size", 4.0),
|
|
1139
1156
|
)
|
|
1140
1157
|
|
|
1141
1158
|
hmm_dir = hmm_directory / "13_hmm_bulk_traces"
|