smftools 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- smftools/_version.py +1 -1
- smftools/cli/helpers.py +32 -6
- smftools/cli/hmm_adata.py +232 -31
- smftools/cli/latent_adata.py +318 -0
- smftools/cli/load_adata.py +77 -73
- smftools/cli/preprocess_adata.py +178 -53
- smftools/cli/spatial_adata.py +149 -101
- smftools/cli_entry.py +12 -0
- smftools/config/conversion.yaml +11 -1
- smftools/config/default.yaml +38 -1
- smftools/config/experiment_config.py +53 -1
- smftools/constants.py +65 -0
- smftools/hmm/HMM.py +88 -0
- smftools/informatics/__init__.py +6 -0
- smftools/informatics/bam_functions.py +358 -8
- smftools/informatics/converted_BAM_to_adata.py +584 -163
- smftools/informatics/h5ad_functions.py +115 -2
- smftools/informatics/modkit_extract_to_adata.py +1003 -425
- smftools/informatics/sequence_encoding.py +72 -0
- smftools/logging_utils.py +21 -2
- smftools/metadata.py +1 -1
- smftools/plotting/__init__.py +9 -0
- smftools/plotting/general_plotting.py +2411 -628
- smftools/plotting/hmm_plotting.py +85 -7
- smftools/preprocessing/__init__.py +1 -0
- smftools/preprocessing/append_base_context.py +17 -17
- smftools/preprocessing/append_mismatch_frequency_sites.py +158 -0
- smftools/preprocessing/calculate_consensus.py +1 -1
- smftools/preprocessing/calculate_read_modification_stats.py +6 -1
- smftools/readwrite.py +53 -17
- smftools/schema/anndata_schema_v1.yaml +15 -1
- smftools/tools/__init__.py +4 -0
- smftools/tools/calculate_leiden.py +57 -0
- smftools/tools/calculate_nmf.py +119 -0
- smftools/tools/calculate_umap.py +91 -8
- smftools/tools/rolling_nn_distance.py +235 -0
- smftools/tools/tensor_factorization.py +169 -0
- {smftools-0.3.0.dist-info → smftools-0.3.1.dist-info}/METADATA +8 -6
- {smftools-0.3.0.dist-info → smftools-0.3.1.dist-info}/RECORD +42 -35
- {smftools-0.3.0.dist-info → smftools-0.3.1.dist-info}/WHEEL +0 -0
- {smftools-0.3.0.dist-info → smftools-0.3.1.dist-info}/entry_points.txt +0 -0
- {smftools-0.3.0.dist-info → smftools-0.3.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Iterable, Mapping
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
from smftools.constants import (
|
|
8
|
+
MODKIT_EXTRACT_SEQUENCE_BASE_TO_INT,
|
|
9
|
+
MODKIT_EXTRACT_SEQUENCE_INT_TO_BASE,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def encode_sequence_to_int(
|
|
14
|
+
sequence: str | Iterable[str],
|
|
15
|
+
*,
|
|
16
|
+
base_to_int: Mapping[str, int] = MODKIT_EXTRACT_SEQUENCE_BASE_TO_INT,
|
|
17
|
+
unknown_base: str = "N",
|
|
18
|
+
) -> np.ndarray:
|
|
19
|
+
"""Encode a base sequence into integer values using constant mappings.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
sequence: Sequence string or iterable of base characters.
|
|
23
|
+
base_to_int: Mapping of base characters to integer encodings.
|
|
24
|
+
unknown_base: Base to use when a character is not in the encoding map.
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
np.ndarray: Integer-encoded sequence array.
|
|
28
|
+
|
|
29
|
+
Raises:
|
|
30
|
+
ValueError: If an unknown base is encountered and ``unknown_base`` is not mapped.
|
|
31
|
+
"""
|
|
32
|
+
if unknown_base not in base_to_int:
|
|
33
|
+
raise ValueError(f"Unknown base '{unknown_base}' not present in encoding map.")
|
|
34
|
+
|
|
35
|
+
if isinstance(sequence, str):
|
|
36
|
+
sequence_iter = sequence
|
|
37
|
+
else:
|
|
38
|
+
sequence_iter = list(sequence)
|
|
39
|
+
|
|
40
|
+
fallback = base_to_int[unknown_base]
|
|
41
|
+
encoded = np.fromiter(
|
|
42
|
+
(base_to_int.get(base, fallback) for base in sequence_iter),
|
|
43
|
+
dtype=np.int16,
|
|
44
|
+
count=len(sequence_iter),
|
|
45
|
+
)
|
|
46
|
+
return encoded
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def decode_int_sequence(
|
|
50
|
+
encoded_sequence: Iterable[int] | np.ndarray,
|
|
51
|
+
*,
|
|
52
|
+
int_to_base: Mapping[int, str] = MODKIT_EXTRACT_SEQUENCE_INT_TO_BASE,
|
|
53
|
+
unknown_base: str = "N",
|
|
54
|
+
) -> list[str]:
|
|
55
|
+
"""Decode integer-encoded bases into characters using constant mappings.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
encoded_sequence: Iterable of integer-encoded bases.
|
|
59
|
+
int_to_base: Mapping of integer encodings to base characters.
|
|
60
|
+
unknown_base: Base to use when an integer is not in the decoding map.
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
list[str]: Decoded base characters.
|
|
64
|
+
|
|
65
|
+
Raises:
|
|
66
|
+
ValueError: If ``unknown_base`` is not available for fallback.
|
|
67
|
+
"""
|
|
68
|
+
if unknown_base not in int_to_base.values():
|
|
69
|
+
raise ValueError(f"Unknown base '{unknown_base}' not present in decoding map.")
|
|
70
|
+
|
|
71
|
+
fallback = unknown_base
|
|
72
|
+
return [int_to_base.get(int(value), fallback) for value in encoded_sequence]
|
smftools/logging_utils.py
CHANGED
|
@@ -15,18 +15,37 @@ def setup_logging(
|
|
|
15
15
|
fmt: str = DEFAULT_LOG_FORMAT,
|
|
16
16
|
datefmt: str = DEFAULT_DATE_FORMAT,
|
|
17
17
|
log_file: Optional[Union[str, Path]] = None,
|
|
18
|
+
reconfigure: bool = False,
|
|
18
19
|
) -> None:
|
|
19
20
|
"""
|
|
20
21
|
Configure logging for smftools.
|
|
21
22
|
|
|
22
23
|
Should be called once by the CLI entrypoint.
|
|
23
|
-
Safe to call multiple times.
|
|
24
|
+
Safe to call multiple times, with optional reconfiguration.
|
|
24
25
|
"""
|
|
25
26
|
logger = logging.getLogger("smftools")
|
|
26
27
|
|
|
27
|
-
if logger.handlers:
|
|
28
|
+
if logger.handlers and not reconfigure:
|
|
29
|
+
if log_file is not None:
|
|
30
|
+
log_path = Path(log_file)
|
|
31
|
+
has_file_handler = any(
|
|
32
|
+
isinstance(handler, logging.FileHandler)
|
|
33
|
+
and Path(getattr(handler, "baseFilename", "")) == log_path
|
|
34
|
+
for handler in logger.handlers
|
|
35
|
+
)
|
|
36
|
+
if not has_file_handler:
|
|
37
|
+
log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
38
|
+
file_handler = logging.FileHandler(log_path)
|
|
39
|
+
file_handler.setFormatter(logging.Formatter(fmt=fmt, datefmt=datefmt))
|
|
40
|
+
logger.addHandler(file_handler)
|
|
41
|
+
logger.setLevel(level)
|
|
28
42
|
return
|
|
29
43
|
|
|
44
|
+
if logger.handlers and reconfigure:
|
|
45
|
+
for handler in list(logger.handlers):
|
|
46
|
+
logger.removeHandler(handler)
|
|
47
|
+
handler.close()
|
|
48
|
+
|
|
30
49
|
formatter = logging.Formatter(fmt=fmt, datefmt=datefmt)
|
|
31
50
|
|
|
32
51
|
# Console handler (stderr)
|
smftools/metadata.py
CHANGED
|
@@ -12,7 +12,7 @@ from typing import Any, Iterable, Optional
|
|
|
12
12
|
from ._version import __version__
|
|
13
13
|
from .schema import SCHEMA_REGISTRY_RESOURCE, SCHEMA_REGISTRY_VERSION
|
|
14
14
|
|
|
15
|
-
_DEPENDENCIES = ("anndata", "numpy", "pandas", "
|
|
15
|
+
_DEPENDENCIES = ("anndata", "numpy", "pandas", "umap-learn", "pynndescent", "torch")
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
def _iso_timestamp() -> str:
|
smftools/plotting/__init__.py
CHANGED
|
@@ -3,9 +3,18 @@ from __future__ import annotations
|
|
|
3
3
|
from importlib import import_module
|
|
4
4
|
|
|
5
5
|
_LAZY_ATTRS = {
|
|
6
|
+
"combined_hmm_length_clustermap": "smftools.plotting.general_plotting",
|
|
6
7
|
"combined_hmm_raw_clustermap": "smftools.plotting.general_plotting",
|
|
7
8
|
"combined_raw_clustermap": "smftools.plotting.general_plotting",
|
|
9
|
+
"plot_rolling_nn_and_layer": "smftools.plotting.general_plotting",
|
|
8
10
|
"plot_hmm_layers_rolling_by_sample_ref": "smftools.plotting.general_plotting",
|
|
11
|
+
"plot_nmf_components": "smftools.plotting.general_plotting",
|
|
12
|
+
"plot_cp_sequence_components": "smftools.plotting.general_plotting",
|
|
13
|
+
"plot_embedding": "smftools.plotting.general_plotting",
|
|
14
|
+
"plot_read_span_quality_clustermaps": "smftools.plotting.general_plotting",
|
|
15
|
+
"plot_pca": "smftools.plotting.general_plotting",
|
|
16
|
+
"plot_sequence_integer_encoding_clustermaps": "smftools.plotting.general_plotting",
|
|
17
|
+
"plot_umap": "smftools.plotting.general_plotting",
|
|
9
18
|
"plot_bar_relative_risk": "smftools.plotting.position_stats",
|
|
10
19
|
"plot_positionwise_matrix": "smftools.plotting.position_stats",
|
|
11
20
|
"plot_positionwise_matrix_grid": "smftools.plotting.position_stats",
|