PyPI - smftools - Versions diffs - 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

smftools 0.2.4py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (181) hide show

smftools/__init__.py +43 -13
smftools/_settings.py +6 -6
smftools/_version.py +3 -1
smftools/cli/__init__.py +1 -0
smftools/cli/archived/cli_flows.py +2 -0
smftools/cli/helpers.py +9 -1
smftools/cli/hmm_adata.py +905 -242
smftools/cli/load_adata.py +432 -280
smftools/cli/preprocess_adata.py +287 -171
smftools/cli/spatial_adata.py +141 -53
smftools/cli_entry.py +119 -178
smftools/config/__init__.py +3 -1
smftools/config/conversion.yaml +5 -1
smftools/config/deaminase.yaml +1 -1
smftools/config/default.yaml +26 -18
smftools/config/direct.yaml +8 -3
smftools/config/discover_input_files.py +19 -5
smftools/config/experiment_config.py +511 -276
smftools/constants.py +37 -0
smftools/datasets/__init__.py +4 -8
smftools/datasets/datasets.py +32 -18
smftools/hmm/HMM.py +2133 -1428
smftools/hmm/__init__.py +24 -14
smftools/hmm/archived/apply_hmm_batched.py +2 -0
smftools/hmm/archived/calculate_distances.py +2 -0
smftools/hmm/archived/call_hmm_peaks.py +18 -1
smftools/hmm/archived/train_hmm.py +2 -0
smftools/hmm/call_hmm_peaks.py +176 -193
smftools/hmm/display_hmm.py +23 -7
smftools/hmm/hmm_readwrite.py +20 -6
smftools/hmm/nucleosome_hmm_refinement.py +104 -14
smftools/informatics/__init__.py +55 -13
smftools/informatics/archived/bam_conversion.py +2 -0
smftools/informatics/archived/bam_direct.py +2 -0
smftools/informatics/archived/basecall_pod5s.py +2 -0
smftools/informatics/archived/basecalls_to_adata.py +2 -0
smftools/informatics/archived/conversion_smf.py +2 -0
smftools/informatics/archived/deaminase_smf.py +1 -0
smftools/informatics/archived/direct_smf.py +2 -0
smftools/informatics/archived/fast5_to_pod5.py +2 -0
smftools/informatics/archived/helpers/archived/__init__.py +2 -0
smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py +16 -1
smftools/informatics/archived/helpers/archived/aligned_BAM_to_bed.py +2 -0
smftools/informatics/archived/helpers/archived/bam_qc.py +14 -1
smftools/informatics/archived/helpers/archived/bed_to_bigwig.py +2 -0
smftools/informatics/archived/helpers/archived/canoncall.py +2 -0
smftools/informatics/archived/helpers/archived/concatenate_fastqs_to_bam.py +8 -1
smftools/informatics/archived/helpers/archived/converted_BAM_to_adata.py +2 -0
smftools/informatics/archived/helpers/archived/count_aligned_reads.py +2 -0
smftools/informatics/archived/helpers/archived/demux_and_index_BAM.py +2 -0
smftools/informatics/archived/helpers/archived/extract_base_identities.py +2 -0
smftools/informatics/archived/helpers/archived/extract_mods.py +2 -0
smftools/informatics/archived/helpers/archived/extract_read_features_from_bam.py +2 -0
smftools/informatics/archived/helpers/archived/extract_read_lengths_from_bed.py +2 -0
smftools/informatics/archived/helpers/archived/extract_readnames_from_BAM.py +2 -0
smftools/informatics/archived/helpers/archived/find_conversion_sites.py +2 -0
smftools/informatics/archived/helpers/archived/generate_converted_FASTA.py +2 -0
smftools/informatics/archived/helpers/archived/get_chromosome_lengths.py +2 -0
smftools/informatics/archived/helpers/archived/get_native_references.py +2 -0
smftools/informatics/archived/helpers/archived/index_fasta.py +2 -0
smftools/informatics/archived/helpers/archived/informatics.py +2 -0
smftools/informatics/archived/helpers/archived/load_adata.py +5 -3
smftools/informatics/archived/helpers/archived/make_modbed.py +2 -0
smftools/informatics/archived/helpers/archived/modQC.py +2 -0
smftools/informatics/archived/helpers/archived/modcall.py +2 -0
smftools/informatics/archived/helpers/archived/ohe_batching.py +2 -0
smftools/informatics/archived/helpers/archived/ohe_layers_decode.py +2 -0
smftools/informatics/archived/helpers/archived/one_hot_decode.py +2 -0
smftools/informatics/archived/helpers/archived/one_hot_encode.py +2 -0
smftools/informatics/archived/helpers/archived/plot_bed_histograms.py +5 -1
smftools/informatics/archived/helpers/archived/separate_bam_by_bc.py +2 -0
smftools/informatics/archived/helpers/archived/split_and_index_BAM.py +2 -0
smftools/informatics/archived/print_bam_query_seq.py +9 -1
smftools/informatics/archived/subsample_fasta_from_bed.py +2 -0
smftools/informatics/archived/subsample_pod5.py +2 -0
smftools/informatics/bam_functions.py +1059 -269
smftools/informatics/basecalling.py +53 -9
smftools/informatics/bed_functions.py +357 -114
smftools/informatics/binarize_converted_base_identities.py +21 -7
smftools/informatics/complement_base_list.py +9 -6
smftools/informatics/converted_BAM_to_adata.py +324 -137
smftools/informatics/fasta_functions.py +251 -89
smftools/informatics/h5ad_functions.py +202 -30
smftools/informatics/modkit_extract_to_adata.py +623 -274
smftools/informatics/modkit_functions.py +87 -44
smftools/informatics/ohe.py +46 -21
smftools/informatics/pod5_functions.py +114 -74
smftools/informatics/run_multiqc.py +20 -14
smftools/logging_utils.py +51 -0
smftools/machine_learning/__init__.py +23 -12
smftools/machine_learning/data/__init__.py +2 -0
smftools/machine_learning/data/anndata_data_module.py +157 -50
smftools/machine_learning/data/preprocessing.py +4 -1
smftools/machine_learning/evaluation/__init__.py +3 -1
smftools/machine_learning/evaluation/eval_utils.py +13 -14
smftools/machine_learning/evaluation/evaluators.py +52 -34
smftools/machine_learning/inference/__init__.py +3 -1
smftools/machine_learning/inference/inference_utils.py +9 -4
smftools/machine_learning/inference/lightning_inference.py +14 -13
smftools/machine_learning/inference/sklearn_inference.py +8 -8
smftools/machine_learning/inference/sliding_window_inference.py +37 -25
smftools/machine_learning/models/__init__.py +12 -5
smftools/machine_learning/models/base.py +34 -43
smftools/machine_learning/models/cnn.py +22 -13
smftools/machine_learning/models/lightning_base.py +78 -42
smftools/machine_learning/models/mlp.py +18 -5
smftools/machine_learning/models/positional.py +10 -4
smftools/machine_learning/models/rnn.py +8 -3
smftools/machine_learning/models/sklearn_models.py +46 -24
smftools/machine_learning/models/transformer.py +75 -55
smftools/machine_learning/models/wrappers.py +8 -3
smftools/machine_learning/training/__init__.py +4 -2
smftools/machine_learning/training/train_lightning_model.py +42 -23
smftools/machine_learning/training/train_sklearn_model.py +11 -15
smftools/machine_learning/utils/__init__.py +3 -1
smftools/machine_learning/utils/device.py +12 -5
smftools/machine_learning/utils/grl.py +8 -2
smftools/metadata.py +443 -0
smftools/optional_imports.py +31 -0
smftools/plotting/__init__.py +32 -17
smftools/plotting/autocorrelation_plotting.py +153 -48
smftools/plotting/classifiers.py +175 -73
smftools/plotting/general_plotting.py +350 -168
smftools/plotting/hmm_plotting.py +53 -14
smftools/plotting/position_stats.py +155 -87
smftools/plotting/qc_plotting.py +25 -12
smftools/preprocessing/__init__.py +35 -37
smftools/preprocessing/append_base_context.py +105 -79
smftools/preprocessing/append_binary_layer_by_base_context.py +75 -37
smftools/preprocessing/{archives → archived}/add_read_length_and_mapping_qc.py +2 -0
smftools/preprocessing/{archives → archived}/calculate_complexity.py +5 -1
smftools/preprocessing/{archives → archived}/mark_duplicates.py +2 -0
smftools/preprocessing/{archives → archived}/preprocessing.py +10 -6
smftools/preprocessing/{archives → archived}/remove_duplicates.py +2 -0
smftools/preprocessing/binarize.py +21 -4
smftools/preprocessing/binarize_on_Youden.py +127 -31
smftools/preprocessing/binary_layers_to_ohe.py +18 -11
smftools/preprocessing/calculate_complexity_II.py +89 -59
smftools/preprocessing/calculate_consensus.py +28 -19
smftools/preprocessing/calculate_coverage.py +44 -22
smftools/preprocessing/calculate_pairwise_differences.py +4 -1
smftools/preprocessing/calculate_pairwise_hamming_distances.py +7 -3
smftools/preprocessing/calculate_position_Youden.py +110 -55
smftools/preprocessing/calculate_read_length_stats.py +52 -23
smftools/preprocessing/calculate_read_modification_stats.py +91 -57
smftools/preprocessing/clean_NaN.py +38 -28
smftools/preprocessing/filter_adata_by_nan_proportion.py +24 -12
smftools/preprocessing/filter_reads_on_length_quality_mapping.py +72 -37
smftools/preprocessing/filter_reads_on_modification_thresholds.py +183 -73
smftools/preprocessing/flag_duplicate_reads.py +708 -303
smftools/preprocessing/invert_adata.py +26 -11
smftools/preprocessing/load_sample_sheet.py +40 -22
smftools/preprocessing/make_dirs.py +9 -3
smftools/preprocessing/min_non_diagonal.py +4 -1
smftools/preprocessing/recipes.py +58 -23
smftools/preprocessing/reindex_references_adata.py +93 -27
smftools/preprocessing/subsample_adata.py +33 -16
smftools/readwrite.py +264 -109
smftools/schema/__init__.py +11 -0
smftools/schema/anndata_schema_v1.yaml +227 -0
smftools/tools/__init__.py +25 -18
smftools/tools/archived/apply_hmm.py +2 -0
smftools/tools/archived/classifiers.py +165 -0
smftools/tools/archived/classify_methylated_features.py +2 -0
smftools/tools/archived/classify_non_methylated_features.py +2 -0
smftools/tools/archived/subset_adata_v1.py +12 -1
smftools/tools/archived/subset_adata_v2.py +14 -1
smftools/tools/calculate_umap.py +56 -15
smftools/tools/cluster_adata_on_methylation.py +122 -47
smftools/tools/general_tools.py +70 -25
smftools/tools/position_stats.py +220 -99
smftools/tools/read_stats.py +50 -29
smftools/tools/spatial_autocorrelation.py +365 -192
smftools/tools/subset_adata.py +23 -21
smftools-0.3.0.dist-info/METADATA +147 -0
smftools-0.3.0.dist-info/RECORD +182 -0
smftools-0.2.4.dist-info/METADATA +0 -141
smftools-0.2.4.dist-info/RECORD +0 -176
{smftools-0.2.4.dist-info → smftools-0.3.0.dist-info}/WHEEL +0 -0
{smftools-0.2.4.dist-info → smftools-0.3.0.dist-info}/entry_points.txt +0 -0
{smftools-0.2.4.dist-info → smftools-0.3.0.dist-info}/licenses/LICENSE +0 -0

smftools/hmm/HMM.py CHANGED Viewed

@@ -1,1587 +1,2292 @@
-import math
-from typing import List, Optional, Tuple, Union, Any, Dict, Sequence
+from __future__ import annotations
 import ast
 import json
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple, Union
 import numpy as np
-import pandas as pd
-import torch
-import torch.nn as nn
+from scipy.sparse import issparse
-def _logsumexp(vec: torch.Tensor, dim: int = -1, keepdim: bool = False) -> torch.Tensor:
-    return torch.logsumexp(vec, dim=dim, keepdim=keepdim)
+from smftools.logging_utils import get_logger
+from smftools.optional_imports import require
-class HMM(nn.Module):
-    """
-    Vectorized HMM (Bernoulli emissions) implemented in PyTorch.
-    Methods:
-      - fit(data, ...) -> trains params in-place
-      - predict(data, ...) -> list of (L, K) posterior marginals (gamma) numpy arrays
-      - viterbi(seq, ...) -> (path_list, score)
-      - batch_viterbi(data, ...) -> list of (path_list, score)
-      - score(seq_or_list, ...) -> float or list of floats
-    Notes:
-      - data: list of sequences (each sequence is iterable of {0,1,np.nan}).
-      - impute_strategy: "ignore" (NaN treated as missing), "random" (fill NaNs randomly with 0/1).
-    """
+if TYPE_CHECKING:
+    import torch as torch_types
+    import torch.nn as nn_types
-    def __init__(
-        self,
-        n_states: int = 2,
-        init_start: Optional[List[float]] = None,
-        init_trans: Optional[List[List[float]]] = None,
-        init_emission: Optional[List[float]] = None,
-        dtype: torch.dtype = torch.float64,
-        eps: float = 1e-8,
-        smf_modality: Optional[str] = None,
-    ):
-        super().__init__()
-        if n_states < 2:
-            raise ValueError("n_states must be >= 2")
-        self.n_states = n_states
-        self.eps = float(eps)
-        self.dtype = dtype
-        self.smf_modality = smf_modality
+torch = require("torch", extra="torch", purpose="HMM modeling")
+nn = torch.nn
-        # initialize params (probabilities)
-        if init_start is None:
-            start = np.full((n_states,), 1.0 / n_states, dtype=float)
-        else:
-            start = np.asarray(init_start, dtype=float)
-        if init_trans is None:
-            trans = np.full((n_states, n_states), 1.0 / n_states, dtype=float)
-        else:
-            trans = np.asarray(init_trans, dtype=float)
-        # --- sanitize init_emission so it's a 1-D list of P(obs==1 | state) ---
-        if init_emission is None:
-            emission = np.full((n_states,), 0.5, dtype=float)
-        else:
-            em_arr = np.asarray(init_emission, dtype=float)
-            # case:  (K,2) -> pick P(obs==1) from second column
-            if em_arr.ndim == 2 and em_arr.shape[1] == 2 and em_arr.shape[0] == n_states:
-                emission = em_arr[:, 1].astype(float)
-            # case: maybe shape (1,K,2) etc. -> try to collapse trailing axis of length 2
-            elif em_arr.ndim >= 2 and em_arr.shape[-1] == 2:
-                emission = em_arr.reshape(-1, 2)[:n_states, 1].astype(float)
-            else:
-                emission = em_arr.reshape(-1)[:n_states].astype(float)
+logger = get_logger(__name__)
+# =============================================================================
+# Registry / Factory
+# =============================================================================
-        # store as parameters (not trainable via grad; EM updates .data in-place)
-        self.start = nn.Parameter(torch.tensor(start, dtype=self.dtype), requires_grad=False)
-        self.trans = nn.Parameter(torch.tensor(trans, dtype=self.dtype), requires_grad=False)
-        self.emission = nn.Parameter(torch.tensor(emission, dtype=self.dtype), requires_grad=False)
+_HMM_REGISTRY: Dict[str, type] = {}
-        self._normalize_params()
-    def _normalize_params(self):
-        with torch.no_grad():
-            # coerce shapes
-            K = self.n_states
-            self.start.data = self.start.data.squeeze()
-            if self.start.data.numel() != K:
-                self.start.data = torch.full((K,), 1.0 / K, dtype=self.dtype)
+def register_hmm(name: str):
+    """Decorator to register an HMM backend under a string key."""
-            self.trans.data = self.trans.data.squeeze()
-            if not (self.trans.data.ndim == 2 and self.trans.data.shape == (K, K)):
-                if K == 2:
-                    self.trans.data = torch.tensor([[0.9,0.1],[0.1,0.9]], dtype=self.dtype)
-                else:
-                    self.trans.data = torch.full((K, K), 1.0 / K, dtype=self.dtype)
+    def deco(cls):
+        """Register the provided class in the HMM registry."""
+        _HMM_REGISTRY[name] = cls
+        cls.hmm_name = name
+        return cls
-            self.emission.data = self.emission.data.squeeze()
-            if self.emission.data.numel() != K:
-                self.emission.data = torch.full((K,), 0.5, dtype=self.dtype)
+    return deco
-            # now perform smoothing/normalization
-            self.start.data = (self.start.data + self.eps)
-            self.start.data = self.start.data / self.start.data.sum()
-            self.trans.data = (self.trans.data + self.eps)
-            row_sums = self.trans.data.sum(dim=1, keepdim=True)
-            row_sums[row_sums == 0.0] = 1.0
-            self.trans.data = self.trans.data / row_sums
+def create_hmm(cfg: Union[dict, Any, None], arch: Optional[str] = None, **kwargs):
+    """
+    Factory: creates an HMM from cfg + arch (override).
+    """
+    key = (
+        arch
+        or getattr(cfg, "hmm_arch", None)
+        or (cfg.get("hmm_arch") if isinstance(cfg, dict) else None)
+        or "single"
+    )
+    if key not in _HMM_REGISTRY:
+        raise KeyError(f"Unknown hmm_arch={key!r}. Known: {sorted(_HMM_REGISTRY.keys())}")
+    return _HMM_REGISTRY[key].from_config(cfg, **kwargs)
+# =============================================================================
+# Small utilities
+# =============================================================================
+def _coerce_dtype_for_device(
+    dtype: torch.dtype, device: Optional[Union[str, torch.device]]
+) -> torch.dtype:
+    """MPS does not support float64. When targeting MPS, coerce to float32."""
+    dev = torch.device(device) if isinstance(device, str) else device
+    if dev is not None and getattr(dev, "type", None) == "mps" and dtype == torch.float64:
+        return torch.float32
+    return dtype
+def _try_json_or_literal(x: Any) -> Any:
+    """Parse a string value as JSON or a Python literal when possible.
+    Args:
+        x: Value to parse.
+    Returns:
+        The parsed value if possible, otherwise the original value.
+    """
+    if x is None:
+        return None
+    if not isinstance(x, str):
+        return x
+    s = x.strip()
+    if not s:
+        return None
+    try:
+        return json.loads(s)
+    except Exception:
+        pass
+    try:
+        return ast.literal_eval(s)
+    except Exception:
+        return x
+def _coerce_bool(x: Any) -> bool:
+    """Coerce a value into a boolean using common truthy strings.
+    Args:
+        x: Value to coerce.
+    Returns:
+        Boolean interpretation of the input.
+    """
+    if x is None:
+        return False
+    if isinstance(x, bool):
+        return x
+    if isinstance(x, (int, float)):
+        return bool(x)
+    s = str(x).strip().lower()
+    return s in ("1", "true", "t", "yes", "y", "on")
-            self.emission.data = self.emission.data.clamp(min=self.eps, max=1.0 - self.eps)
+def _resolve_dtype(dtype_entry: Any) -> torch.dtype:
+    """Resolve a torch dtype from a config entry.
-    @staticmethod
-    def _resolve_dtype(dtype_entry):
-        """Accept torch.dtype, string ('float32'/'float64') or None -> torch.dtype."""
-        if dtype_entry is None:
-            return torch.float64
-        if isinstance(dtype_entry, torch.dtype):
-            return dtype_entry
-        s = str(dtype_entry).lower()
-        if "32" in s:
-            return torch.float32
-        if "16" in s:
-            return torch.float16
+    Args:
+        dtype_entry: Config value (string or torch.dtype).
+    Returns:
+        Resolved torch dtype.
+    """
+    if dtype_entry is None:
         return torch.float64
+    if isinstance(dtype_entry, torch.dtype):
+        return dtype_entry
+    s = str(dtype_entry).lower()
+    if "16" in s:
+        return torch.float16
+    if "32" in s:
+        return torch.float32
+    return torch.float64
-    @classmethod
-    def from_config(cls, cfg: Union[dict, "ExperimentConfig", None], *,
-                    override: Optional[dict] = None,
-                    device: Optional[Union[str, torch.device]] = None) -> "HMM":
-        """
-        Construct an HMM using keys from an ExperimentConfig instance or a plain dict.
-        cfg may be:
-          - an ExperimentConfig (your dataclass instance)
-          - a dict (e.g. loader.var_dict or merged defaults)
-          - None (uses internal defaults)
+def _safe_int_coords(var_names) -> Tuple[np.ndarray, bool]:
+    """
+    Try to cast var_names to int coordinates. If not possible,
+    fall back to 0..L-1 index coordinates.
+    """
+    try:
+        coords = np.asarray(var_names, dtype=int)
+        return coords, True
+    except Exception:
+        return np.arange(len(var_names), dtype=int), False
-        override: optional dict to override resolved keys (handy for tests).
-        device: optional device string or torch.device to move model to.
-        """
-        # Accept ExperimentConfig dataclass
-        if cfg is None:
-            merged = {}
-        elif hasattr(cfg, "to_dict") and callable(getattr(cfg, "to_dict")):
-            merged = dict(cfg.to_dict())
-        elif isinstance(cfg, dict):
-            merged = dict(cfg)
-        else:
-            # try attr access as fallback
-            try:
-                merged = {k: getattr(cfg, k) for k in dir(cfg) if k.startswith("hmm_")}
-            except Exception:
-                merged = {}
-        if override:
-            merged.update(override)
+def _logsumexp(x: torch.Tensor, dim: int) -> torch.Tensor:
+    """Compute log-sum-exp in a numerically stable way.
-        # basic resolution with fallback
-        n_states = int(merged.get("hmm_n_states", merged.get("n_states", 2)))
-        init_start = merged.get("hmm_init_start_probs", merged.get("hmm_init_start", None))
-        init_trans = merged.get("hmm_init_transition_probs", merged.get("hmm_init_trans", None))
-        init_emission = merged.get("hmm_init_emission_probs", merged.get("hmm_init_emission", None))
-        eps = float(merged.get("hmm_eps", merged.get("eps", 1e-8)))
-        dtype = cls._resolve_dtype(merged.get("hmm_dtype", merged.get("dtype", None)))
+    Args:
+        x: Input tensor.
+        dim: Dimension to reduce.
-        # coerce lists (if present) -> numpy arrays (the HMM constructor already sanitizes)
-        def _coerce_np(x):
-            if x is None:
-                return None
-            return np.asarray(x, dtype=float)
+    Returns:
+        Reduced tensor.
+    """
+    return torch.logsumexp(x, dim=dim)
-        init_start = _coerce_np(init_start)
-        init_trans = _coerce_np(init_trans)
-        init_emission = _coerce_np(init_emission)
-        model = cls(
-            n_states=n_states,
-            init_start=init_start,
-            init_trans=init_trans,
-            init_emission=init_emission,
-            dtype=dtype,
-            eps=eps,
-            smf_modality=merged.get("smf_modality", None),
-        )
+def _ensure_layer_full_shape(adata, name: str, dtype, fill_value=0):
+    """
+    Ensure adata.layers[name] exists with shape (n_obs, n_vars).
+    """
+    if name not in adata.layers:
+        arr = np.full((adata.n_obs, adata.n_vars), fill_value=fill_value, dtype=dtype)
+        adata.layers[name] = arr
+    else:
+        arr = _to_dense_np(adata.layers[name])
+        if arr.shape != (adata.n_obs, adata.n_vars):
+            raise ValueError(
+                f"Layer '{name}' exists but has shape {arr.shape}; expected {(adata.n_obs, adata.n_vars)}"
+            )
+    return adata.layers[name]
+def _assign_back_obs(final_adata, sub_adata, cols: List[str]):
+    """
+    Assign obs columns from sub_adata back into final_adata for the matching obs_names.
+    Works for list/object columns too.
+    """
+    idx = final_adata.obs_names.get_indexer(sub_adata.obs_names)
+    if (idx < 0).any():
+        raise ValueError("Some sub_adata.obs_names not found in final_adata.obs_names")
-        # move to device if requested
-        if device is not None:
-            if isinstance(device, str):
-                device = torch.device(device)
-            model.to(device)
+    for c in cols:
+        final_adata.obs.iloc[idx, final_adata.obs.columns.get_loc(c)] = sub_adata.obs[c].values
-        # persist the config to the hmm class
-        cls.config = cfg
-        return model
+def _to_dense_np(x):
+    """Convert sparse or array-like input to a dense NumPy array.
-    def update_from_config(self, cfg: Union[dict, "ExperimentConfig", None], *,
-                           override: Optional[dict] = None):
-        """
-        Update existing model parameters from a config or dict (in-place).
-        This will normalize / reinitialize start/trans/emission using same logic as constructor.
-        """
-        if cfg is None:
-            merged = {}
-        elif hasattr(cfg, "to_dict") and callable(getattr(cfg, "to_dict")):
-            merged = dict(cfg.to_dict())
-        elif isinstance(cfg, dict):
-            merged = dict(cfg)
-        else:
-            try:
-                merged = {k: getattr(cfg, k) for k in dir(cfg) if k.startswith("hmm_")}
-            except Exception:
-                merged = {}
+    Args:
+        x: Input array or sparse matrix.
-        if override:
-            merged.update(override)
+    Returns:
+        Dense NumPy array or None.
+    """
+    if x is None:
+        return None
+    if issparse(x):
+        return x.toarray()
+    return np.asarray(x)
-        # extract same keys as from_config
-        n_states = int(merged.get("hmm_n_states", self.n_states))
-        init_start = merged.get("hmm_init_start_probs", None)
-        init_trans = merged.get("hmm_init_transition_probs", None)
-        init_emission = merged.get("hmm_init_emission_probs", None)
-        eps = merged.get("hmm_eps", None)
-        dtype = merged.get("hmm_dtype", None)
-        # apply dtype/eps if present
-        if eps is not None:
-            self.eps = float(eps)
-        if dtype is not None:
-            self.dtype = self._resolve_dtype(dtype)
-        # if n_states changes we need a fresh re-init (easy approach: reconstruct)
-        if int(n_states) != int(self.n_states):
-            # rebuild self in-place: create a new model and copy tensors
-            new_model = HMM.from_config(merged)
-            # copy content
-            with torch.no_grad():
-                self.n_states = new_model.n_states
-                self.eps = new_model.eps
-                self.dtype = new_model.dtype
-                self.start.data = new_model.start.data.clone().to(self.start.device, dtype=self.dtype)
-                self.trans.data = new_model.trans.data.clone().to(self.trans.device, dtype=self.dtype)
-                self.emission.data = new_model.emission.data.clone().to(self.emission.device, dtype=self.dtype)
-            return
-        # else only update provided tensors
-        def _to_tensor(obj, shape_expected=None):
-            if obj is None:
-                return None
-            arr = np.asarray(obj, dtype=float)
-            if shape_expected is not None:
-                try:
-                    arr = arr.reshape(shape_expected)
-                except Exception:
-                    # try to free-form slice/reshape (keep best-effort)
-                    arr = np.reshape(arr, shape_expected) if arr.size >= np.prod(shape_expected) else arr
-            return torch.tensor(arr, dtype=self.dtype, device=self.start.device)
+def _ensure_2d_np(x):
+    """Ensure an array is 2D, reshaping 1D inputs.
-        with torch.no_grad():
-            if init_start is not None:
-                t = _to_tensor(init_start, (self.n_states,))
-                if t.numel() == self.n_states:
-                    self.start.data = t.clone()
-            if init_trans is not None:
-                t = _to_tensor(init_trans, (self.n_states, self.n_states))
-                if t.shape == (self.n_states, self.n_states):
-                    self.trans.data = t.clone()
-            if init_emission is not None:
-                # attempt to extract P(obs==1) if shaped (K,2)
-                arr = np.asarray(init_emission, dtype=float)
-                if arr.ndim == 2 and arr.shape[1] == 2 and arr.shape[0] >= self.n_states:
-                    em = arr[: self.n_states, 1]
-                else:
-                    em = arr.reshape(-1)[: self.n_states]
-                t = torch.tensor(em, dtype=self.dtype, device=self.start.device)
-                if t.numel() == self.n_states:
-                    self.emission.data = t.clone()
+    Args:
+        x: Input array-like.
-            # finally normalize
-            self._normalize_params()
+    Returns:
+        2D NumPy array.
+    """
+    x = _to_dense_np(x)
+    if x.ndim == 1:
+        x = x.reshape(1, -1)
+    if x.ndim != 2:
+        raise ValueError(f"Expected 2D array; got shape {x.shape}")
+    return x
-    def _ensure_device_dtype(self, device: Optional[torch.device]):
-        if device is None:
-            device = next(self.parameters()).device
-        self.start.data = self.start.data.to(device=device, dtype=self.dtype)
-        self.trans.data = self.trans.data.to(device=device, dtype=self.dtype)
-        self.emission.data = self.emission.data.to(device=device, dtype=self.dtype)
-        return device
-    @staticmethod
-    def _pad_and_mask(
-        data: List[List],
-        device: torch.device,
-        dtype: torch.dtype,
-        impute_strategy: str = "ignore",
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+# =============================================================================
+# Feature-set normalization
+# =============================================================================
+def normalize_hmm_feature_sets(raw: Any) -> Dict[str, Dict[str, Any]]:
+    """
+    Canonical format:
+      {
+        "footprints": {"state": "Non-Modified", "features": {"small_bound_stretch": [0,50], ...}},
+        "accessible": {"state": "Modified", "features": {"all_accessible_features": [0, inf], ...}},
+        ...
+      }
+    Each feature range is [lo, hi) in genomic bp (or index units if coords aren't ints).
+    """
+    parsed = _try_json_or_literal(raw)
+    if not isinstance(parsed, dict):
+        return {}
+    def _coerce_bound(v):
+        """Coerce a bound value into a float or sentinel.
+        Args:
+            v: Bound value.
+        Returns:
+            Float, np.inf, or None.
         """
-        Pads sequences to shape (B, L). Returns (obs, mask, lengths)
-        - Accepts: list-of-seqs, or 2D ndarray (B, L).
-        - If a sequence element is itself an array (per-timestep feature vector),
-          collapse the last axis by mean (warns once).
+        if v is None:
+            return None
+        if isinstance(v, (int, float)):
+            return float(v)
+        s = str(v).strip().lower()
+        if s in ("inf", "infty", "np.inf", "infinite"):
+            return np.inf
+        if s in ("none", ""):
+            return None
+        try:
+            return float(v)
+        except Exception:
+            return None
+    def _coerce_map(feats):
+        """Coerce feature ranges into (lo, hi) tuples.
+        Args:
+            feats: Mapping of feature names to ranges.
+        Returns:
+            Mapping of feature names to numeric bounds.
         """
-        import warnings
-        # If somebody passed a 2-D ndarray directly, convert to list-of-rows
-        if isinstance(data, np.ndarray) and data.ndim == 2:
-            # convert rows -> python lists (scalars per timestep)
-            data = data.tolist()
-        B = len(data)
-        lengths = torch.tensor([len(s) for s in data], dtype=torch.long, device=device)
-        L = int(lengths.max().item()) if B > 0 else 0
-        obs = torch.zeros((B, L), dtype=dtype, device=device)
-        mask = torch.zeros((B, L), dtype=torch.bool, device=device)
-        warned_collapse = False
-        for i, seq in enumerate(data):
-            # seq may be list/ndarray of scalars OR list/ndarray of per-timestep arrays
-            arr = np.asarray(seq, dtype=float)
-            # If arr is shape (L,1,1,...) squeeze trailing singletons
-            while arr.ndim > 1 and arr.shape[-1] == 1:
-                arr = np.squeeze(arr, axis=-1)
-            # If arr is still >1D (e.g., (L, F)), collapse the last axis by mean
-            if arr.ndim > 1:
-                if not warned_collapse:
-                    warnings.warn(
-                        "HMM._pad_and_mask: collapsing per-timestep feature axis by mean "
-                        "(arr had shape {}). If you prefer a different reduction, "
-                        "preprocess your data.".format(arr.shape),
-                        stacklevel=2,
-                    )
-                    warned_collapse = True
-                # collapse features -> scalar per timestep
-                arr = np.asarray(arr, dtype=float).mean(axis=-1)
-            # now arr should be 1D (T,)
-            if arr.ndim == 0:
-                # single scalar: treat as length-1 sequence
-                arr = np.atleast_1d(arr)
-            nan_mask = np.isnan(arr)
-            if impute_strategy == "random" and nan_mask.any():
-                arr[nan_mask] = np.random.choice([0, 1], size=nan_mask.sum())
-                local_mask = np.ones_like(arr, dtype=bool)
+        out = {}
+        if not isinstance(feats, dict):
+            return out
+        for name, rng in feats.items():
+            if rng is None:
+                out[name] = (0.0, np.inf)
+                continue
+            if isinstance(rng, (list, tuple)) and len(rng) >= 2:
+                lo = _coerce_bound(rng[0])
+                hi = _coerce_bound(rng[1])
+                lo = 0.0 if lo is None else float(lo)
+                hi = np.inf if hi is None else float(hi)
+                out[name] = (lo, hi)
             else:
-                local_mask = ~nan_mask
-                arr = np.where(local_mask, arr, 0.0)
+                hi = _coerce_bound(rng)
+                hi = np.inf if hi is None else float(hi)
+                out[name] = (0.0, hi)
+        return out
+    out: Dict[str, Dict[str, Any]] = {}
+    for group, info in parsed.items():
+        if isinstance(info, dict):
+            feats = _coerce_map(info.get("features", info.get("ranges", {})))
+            state = info.get("state", info.get("label", "Modified"))
+        else:
+            feats = _coerce_map(info)
+            state = "Modified"
+        out[group] = {"features": feats, "state": state}
+    return out
-            L_i = arr.shape[0]
-            obs[i, :L_i] = torch.tensor(arr, dtype=dtype, device=device)
-            mask[i, :L_i] = torch.tensor(local_mask, dtype=torch.bool, device=device)
-        return obs, mask, lengths
+# =============================================================================
+# BaseHMM: shared decoding + annotation pipeline
+# =============================================================================
-    def _log_emission(self, obs: torch.Tensor, mask: torch.Tensor) -> torch.Tensor:
-        """
-        obs: (B, L)
-        mask: (B, L) bool
-        returns logB (B, L, K)
-        """
-        B, L = obs.shape
-        p = self.emission  # (K,)
-        logp = torch.log(p + self.eps)
-        log1mp = torch.log1p(-p + self.eps)
-        obs_expand = obs.unsqueeze(-1)  # (B, L, 1)
-        logB = obs_expand * logp.unsqueeze(0).unsqueeze(0) + (1.0 - obs_expand) * log1mp.unsqueeze(0).unsqueeze(0)
-        logB = torch.where(mask.unsqueeze(-1), logB, torch.zeros_like(logB))
-        return logB
-    def fit(
-        self,
-        data: List[List],
-        max_iter: int = 100,
-        tol: float = 1e-4,
-        impute_strategy: str = "ignore",
-        verbose: bool = True,
-        return_history: bool = False,
-        device: Optional[Union[torch.device, str]] = None,
-    ):
-        """
-        Vectorized Baum-Welch EM across a batch of sequences (padded).
-        """
-        if device is None:
-            device = next(self.parameters()).device
-        elif isinstance(device, str):
-            device = torch.device(device)
-        device = self._ensure_device_dtype(device)
+class BaseHMM(nn.Module):
+    """
+    BaseHMM responsibilities:
+      - config resolution (from_config)
+      - EM fit wrapper (fit / fit_em)
+      - decoding (gamma / viterbi)
+      - AnnData annotation from provided arrays (X + coords)
+      - save/load registry aware
+    Subclasses implement:
+      - _log_emission(...)  -> logB
+      - optional distance-aware transition handling
+    """
-        if isinstance(data, np.ndarray):
-            if data.ndim == 2:
-                # rows are sequences: convert to list of 1D arrays
-                data = data.tolist()
-            elif data.ndim == 1:
-                # single sequence
-                data = [data.tolist()]
-            else:
-                raise ValueError(f"Expected data to be 1D or 2D ndarray; got array with ndim={data.ndim}")
+    def __init__(self, n_states: int = 2, eps: float = 1e-8, dtype: torch.dtype = torch.float64):
+        """Initialize the base HMM with shared parameters.
-        obs, mask, lengths = self._pad_and_mask(data, device=device, dtype=self.dtype, impute_strategy=impute_strategy)
-        B, L = obs.shape
-        K = self.n_states
-        eps = float(self.eps)
+        Args:
+            n_states: Number of hidden states.
+            eps: Smoothing epsilon for probabilities.
+            dtype: Torch dtype for parameters.
+        """
+        super().__init__()
+        if n_states < 2:
+            raise ValueError("n_states must be >= 2")
+        self.n_states = int(n_states)
+        self.eps = float(eps)
+        self.dtype = dtype
-        if verbose:
-            print(f"[HMM.fit] device={device}, batch={B}, max_len={L}, states={K}")
+        # start probs + transitions (shared across backends)
+        start = np.full((self.n_states,), 1.0 / self.n_states, dtype=float)
+        trans = np.full((self.n_states, self.n_states), 1.0 / self.n_states, dtype=float)
-        loglik_history = []
+        self.start = nn.Parameter(torch.tensor(start, dtype=self.dtype), requires_grad=False)
+        self.trans = nn.Parameter(torch.tensor(trans, dtype=self.dtype), requires_grad=False)
+        self._normalize_params()
-        for it in range(1, max_iter + 1):
-            if verbose:
-                print(f"[HMM.fit] EM iter {it}")
+    # ------------------------- config -------------------------
-            # compute batched emission logs
-            logB = self._log_emission(obs, mask)  # (B, L, K)
+    @classmethod
+    def from_config(
+        cls, cfg: Union[dict, Any, None], *, override: Optional[dict] = None, device=None
+    ):
+        """Create a model from config with optional overrides.
-            # logs for start and transition
-            logA = torch.log(self.trans + eps)  # (K, K)
-            logstart = torch.log(self.start + eps)  # (K,)
+        Args:
+            cfg: Configuration mapping or object.
+            override: Override values to apply.
+            device: Device specifier.
-            # Forward (batched)
-            alpha = torch.empty((B, L, K), dtype=self.dtype, device=device)
-            alpha[:, 0, :] = logstart.unsqueeze(0) + logB[:, 0, :]  # (B,K)
-            for t in range(1, L):
-                # prev: (B, i, 1) + (1, i, j) broadcast => (B, i, j)
-                prev = alpha[:, t - 1, :].unsqueeze(2) + logA.unsqueeze(0)  # (B, K, K)
-                alpha[:, t, :] = _logsumexp(prev, dim=1) + logB[:, t, :]
+        Returns:
+            Initialized HMM instance.
+        """
+        merged = cls._cfg_to_dict(cfg)
+        if override:
+            merged.update(override)
-            # Backward (batched)
-            beta = torch.empty((B, L, K), dtype=self.dtype, device=device)
-            beta[:, L - 1, :] = torch.zeros((K,), dtype=self.dtype, device=device).unsqueeze(0).expand(B, K)
-            for t in range(L - 2, -1, -1):
-                # temp (B, i, j) = logA[i,j] + logB[:,t+1,j] + beta[:,t+1,j]
-                temp = logA.unsqueeze(0) + (logB[:, t + 1, :].unsqueeze(1) + beta[:, t + 1, :].unsqueeze(1))
-                beta[:, t, :] = _logsumexp(temp, dim=2)
+        n_states = int(merged.get("hmm_n_states", merged.get("n_states", 2)))
+        eps = float(merged.get("hmm_eps", merged.get("eps", 1e-8)))
+        dtype = _resolve_dtype(merged.get("hmm_dtype", merged.get("dtype", None)))
+        dtype = _coerce_dtype_for_device(dtype, device)  # <<< NEW
-            # sequence log-likelihoods (use last real index)
-            last_idx = (lengths - 1).clamp(min=0)
-            idx_range = torch.arange(B, device=device)
-            final_alpha = alpha[idx_range, last_idx, :]  # (B, K)
-            seq_loglikes = _logsumexp(final_alpha, dim=1)  # (B,)
-            total_loglike = float(seq_loglikes.sum().item())
+        model = cls(n_states=n_states, eps=eps, dtype=dtype)
+        if device is not None:
+            model.to(torch.device(device) if isinstance(device, str) else device)
+        model._persisted_cfg = merged
+        return model
-            # posterior gamma (B, L, K)
-            log_gamma = alpha + beta  # (B, L, K)
-            logZ_time = _logsumexp(log_gamma, dim=2, keepdim=True)  # (B, L, 1)
-            gamma = (log_gamma - logZ_time).exp()  # (B, L, K)
+    @staticmethod
+    def _cfg_to_dict(cfg: Union[dict, Any, None]) -> dict:
+        """Normalize a config object into a dictionary.
-            # accumulators: starts, transitions, emissions
-            gamma_start_accum = gamma[:, 0, :].sum(dim=0)  # (K,)
+        Args:
+            cfg: Config mapping or object.
-            # emission accumulators: sum over observed positions only
-            mask_f = mask.unsqueeze(-1)  # (B, L, 1)
-            emit_num = (gamma * obs.unsqueeze(-1) * mask_f).sum(dim=(0, 1))  # (K,)
-            emit_den = (gamma * mask_f).sum(dim=(0, 1))  # (K,)
+        Returns:
+            Dictionary of HMM-related config values.
+        """
+        if cfg is None:
+            return {}
+        if isinstance(cfg, dict):
+            return dict(cfg)
+        if hasattr(cfg, "to_dict") and callable(getattr(cfg, "to_dict")):
+            return dict(cfg.to_dict())
+        out = {}
+        for k in dir(cfg):
+            if k.startswith("hmm_") or k in ("smf_modality", "cpg"):
+                try:
+                    out[k] = getattr(cfg, k)
+                except Exception:
+                    pass
+        return out
-            # transitions: accumulate xi across t for valid positions
-            trans_accum = torch.zeros((K, K), dtype=self.dtype, device=device)
-            if L >= 2:
-                time_idx = torch.arange(L - 1, device=device).unsqueeze(0).expand(B, L - 1)  # (B, L-1)
-                valid = time_idx < (lengths.unsqueeze(1) - 1)  # (B, L-1) bool
-                for t in range(L - 1):
-                    a_t = alpha[:, t, :].unsqueeze(2)  # (B, i, 1)
-                    b_next = (logB[:, t + 1, :] + beta[:, t + 1, :]).unsqueeze(1)  # (B, 1, j)
-                    log_xi_unnorm = a_t + logA.unsqueeze(0) + b_next  # (B, i, j)
-                    log_xi_flat = log_xi_unnorm.view(B, -1)  # (B, i*j)
-                    log_norm = _logsumexp(log_xi_flat, dim=1).unsqueeze(1).unsqueeze(2)  # (B,1,1)
-                    xi = (log_xi_unnorm - log_norm).exp()  # (B,i,j)
-                    valid_t = valid[:, t].float().unsqueeze(1).unsqueeze(2)  # (B,1,1)
-                    xi_masked = xi * valid_t
-                    trans_accum += xi_masked.sum(dim=0)  # (i,j)
-            # M-step: update parameters with smoothing
-            with torch.no_grad():
-                new_start = gamma_start_accum + eps
-                new_start = new_start / new_start.sum()
+    # ------------------------- params -------------------------
-                new_trans = trans_accum + eps
-                row_sums = new_trans.sum(dim=1, keepdim=True)
-                row_sums[row_sums == 0.0] = 1.0
-                new_trans = new_trans / row_sums
+    def _normalize_params(self):
+        """Normalize start and transition probabilities in-place."""
+        with torch.no_grad():
+            K = self.n_states
-                new_emission = (emit_num + eps) / (emit_den + 2.0 * eps)
-                new_emission = new_emission.clamp(min=eps, max=1.0 - eps)
+            # start
+            self.start.data = self.start.data.reshape(-1)
+            if self.start.data.numel() != K:
+                self.start.data = torch.full(
+                    (K,), 1.0 / K, dtype=self.dtype, device=self.start.device
+                )
+            self.start.data = self.start.data + self.eps
+            self.start.data = self.start.data / self.start.data.sum()
-                self.start.data = new_start
-                self.trans.data = new_trans
-                self.emission.data = new_emission
+            # trans
+            self.trans.data = self.trans.data.reshape(K, K)
+            self.trans.data = self.trans.data + self.eps
+            rs = self.trans.data.sum(dim=1, keepdim=True)
+            rs[rs == 0.0] = 1.0
+            self.trans.data = self.trans.data / rs
-            loglik_history.append(total_loglike)
-            if verbose:
-                print(f"  total loglik = {total_loglike:.6f}")
+    def _ensure_device_dtype(
+        self, device: Optional[Union[str, torch.device]] = None
+    ) -> torch.device:
+        """Move parameters to the requested device/dtype.
-            if len(loglik_history) > 1 and abs(loglik_history[-1] - loglik_history[-2]) < tol:
-                if verbose:
-                    print(f"[HMM.fit] converged (Δll < {tol}) at iter {it}")
-                break
+        Args:
+            device: Device specifier or None to use current device.
-        return loglik_history if return_history else None
-    def get_params(self) -> dict:
+        Returns:
+            Resolved torch device.
         """
-        Return model parameters as numpy arrays on CPU.
-        """
-        with torch.no_grad():
-            return {
-                "n_states": int(self.n_states),
-                "start": self.start.detach().cpu().numpy().astype(float).reshape(-1),
-                "trans": self.trans.detach().cpu().numpy().astype(float),
-                "emission": self.emission.detach().cpu().numpy().astype(float).reshape(-1),
-            }
+        if device is None:
+            device = next(self.parameters()).device
+        device = torch.device(device) if isinstance(device, str) else device
+        self.start.data = self.start.data.to(device=device, dtype=self.dtype)
+        self.trans.data = self.trans.data.to(device=device, dtype=self.dtype)
+        return device
-    def print_params(self, decimals: int = 4):
+    # ------------------------- state labeling -------------------------
+    def _state_modified_score(self) -> torch.Tensor:
+        """Subclasses return (K,) score; higher => more “Modified/Accessible”."""
+        raise NotImplementedError
+    def modified_state_index(self) -> int:
+        """Return the index of the most modified/accessible state."""
+        scores = self._state_modified_score()
+        return int(torch.argmax(scores).item())
+    def resolve_target_state_index(self, state_target: Any) -> int:
         """
-        Nicely print start, transition, and emission probabilities.
+        Accept:
+          - int -> explicit state index
+          - "Modified" / "Non-Modified" and aliases
         """
-        params = self.get_params()
-        K = params["n_states"]
-        fmt = f"{{:.{decimals}f}}"
-        print(f"HMM params (K={K} states):")
-        print(" start probs:")
-        print("  [" + ", ".join(fmt.format(v) for v in params["start"]) + "]")
-        print(" transition matrix (rows = from-state, cols = to-state):")
-        for i, row in enumerate(params["trans"]):
-            print("  s{:d}: [".format(i) + ", ".join(fmt.format(v) for v in row) + "]")
-        print(" emission P(obs==1 | state):")
-        for i, v in enumerate(params["emission"]):
-            print(f"  s{i}: {fmt.format(v)}")
-    def to_dataframes(self) -> dict:
+        if isinstance(state_target, (int, np.integer)):
+            idx = int(state_target)
+            return max(0, min(idx, self.n_states - 1))
+        s = str(state_target).strip().lower()
+        if s in ("modified", "open", "accessible", "1", "pos", "positive"):
+            return self.modified_state_index()
+        if s in ("non-modified", "closed", "inaccessible", "0", "neg", "negative"):
+            scores = self._state_modified_score()
+            return int(torch.argmin(scores).item())
+        return self.modified_state_index()
+    # ------------------------- emissions -------------------------
+    def _log_emission(self, obs: torch.Tensor, mask: torch.Tensor) -> torch.Tensor:
         """
-        Return pandas DataFrames for start (Series), trans (DataFrame), emission (Series).
+        Return logB:
+          - single: obs (N,L), mask (N,L) -> logB (N,L,K)
+          - multi : obs (N,L,C), mask (N,L,C) -> logB (N,L,K)
         """
-        p = self.get_params()
-        K = p["n_states"]
-        state_names = [f"state_{i}" for i in range(K)]
-        start_s = pd.Series(p["start"], index=state_names, name="start_prob")
-        trans_df = pd.DataFrame(p["trans"], index=state_names, columns=state_names)
-        emission_s = pd.Series(p["emission"], index=state_names, name="p_obs1")
-        return {"start": start_s, "trans": trans_df, "emission": emission_s}
-    def predict(self, data: List[List], impute_strategy: str = "ignore", device: Optional[Union[torch.device, str]] = None) -> List[np.ndarray]:
+        raise NotImplementedError
+    # ------------------------- decoding core -------------------------
+    def _forward_backward(
+        self,
+        obs: torch.Tensor,
+        mask: torch.Tensor,
+        *,
+        coords: Optional[np.ndarray] = None,
+    ) -> torch.Tensor:
         """
-        Return posterior marginals gamma_t(k) for each sequence as list of (L, K) numpy arrays.
+        Returns gamma (N,L,K) in probability space.
+        Subclasses can override for distance-aware transitions.
         """
-        if device is None:
-            device = next(self.parameters()).device
-        elif isinstance(device, str):
-            device = torch.device(device)
-        device = self._ensure_device_dtype(device)
-        obs, mask, lengths = self._pad_and_mask(data, device=device, dtype=self.dtype, impute_strategy=impute_strategy)
-        B, L = obs.shape
-        K = self.n_states
+        device = obs.device
         eps = float(self.eps)
+        K = self.n_states
-        logB = self._log_emission(obs, mask)  # (B, L, K)
-        logA = torch.log(self.trans + eps)
-        logstart = torch.log(self.start + eps)
+        logB = self._log_emission(obs, mask)  # (N,L,K)
+        logA = torch.log(self.trans + eps)  # (K,K)
+        logstart = torch.log(self.start + eps)  # (K,)
+        N, L, _ = logB.shape
-        # Forward
-        alpha = torch.empty((B, L, K), dtype=self.dtype, device=device)
+        alpha = torch.empty((N, L, K), dtype=self.dtype, device=device)
         alpha[:, 0, :] = logstart.unsqueeze(0) + logB[:, 0, :]
         for t in range(1, L):
-            prev = alpha[:, t - 1, :].unsqueeze(2) + logA.unsqueeze(0)
+            prev = alpha[:, t - 1, :].unsqueeze(2) + logA.unsqueeze(0)  # (N,K,K)
             alpha[:, t, :] = _logsumexp(prev, dim=1) + logB[:, t, :]
-        # Backward
-        beta = torch.empty((B, L, K), dtype=self.dtype, device=device)
-        beta[:, L - 1, :] = torch.zeros((K,), dtype=self.dtype, device=device).unsqueeze(0).expand(B, K)
+        beta = torch.empty((N, L, K), dtype=self.dtype, device=device)
+        beta[:, L - 1, :] = 0.0
         for t in range(L - 2, -1, -1):
-            temp = logA.unsqueeze(0) + (logB[:, t + 1, :].unsqueeze(1) + beta[:, t + 1, :].unsqueeze(1))
+            temp = logA.unsqueeze(0) + (logB[:, t + 1, :] + beta[:, t + 1, :]).unsqueeze(1)
             beta[:, t, :] = _logsumexp(temp, dim=2)
-        # gamma
         log_gamma = alpha + beta
-        logZ_time = _logsumexp(log_gamma, dim=2, keepdim=True)
-        gamma = (log_gamma - logZ_time).exp()  # (B, L, K)
+        logZ = _logsumexp(log_gamma, dim=2).unsqueeze(2)
+        gamma = (log_gamma - logZ).exp()
+        return gamma
-        results = []
-        for i in range(B):
-            L_i = int(lengths[i].item())
-            results.append(gamma[i, :L_i, :].detach().cpu().numpy())
-        return results
-    def score(self, seq_or_list: Union[List[float], List[List[float]]], impute_strategy: str = "ignore", device: Optional[Union[torch.device, str]] = None) -> Union[float, List[float]]:
+    def _viterbi(
+        self,
+        obs: torch.Tensor,
+        mask: torch.Tensor,
+        *,
+        coords: Optional[np.ndarray] = None,
+    ) -> torch.Tensor:
         """
-        Compute log-likelihood of a single sequence or list of sequences under current params.
-        Returns float (single) or list of floats (batch).
+        Returns states (N,L) int64. Missing positions (mask False for all channels)
+        are still decoded, but you’ll overwrite them to -1 during writing.
+        Subclasses can override for distance-aware transitions.
         """
-        single = False
-        if not isinstance(seq_or_list[0], (list, tuple, np.ndarray)):
-            seqs = [seq_or_list]
-            single = True
-        else:
-            seqs = seq_or_list
+        device = obs.device
+        eps = float(self.eps)
+        K = self.n_states
-        if device is None:
-            device = next(self.parameters()).device
-        elif isinstance(device, str):
-            device = torch.device(device)
-        device = self._ensure_device_dtype(device)
+        logB = self._log_emission(obs, mask)  # (N,L,K)
+        logA = torch.log(self.trans + eps)  # (K,K)
+        logstart = torch.log(self.start + eps)  # (K,)
-        obs, mask, lengths = self._pad_and_mask(seqs, device=device, dtype=self.dtype, impute_strategy=impute_strategy)
-        B, L = obs.shape
-        K = self.n_states
-        eps = float(self.eps)
+        N, L, _ = logB.shape
+        delta = torch.empty((N, L, K), dtype=self.dtype, device=device)
+        psi = torch.empty((N, L, K), dtype=torch.long, device=device)
-        logB = self._log_emission(obs, mask)
-        logA = torch.log(self.trans + eps)
-        logstart = torch.log(self.start + eps)
+        delta[:, 0, :] = logstart.unsqueeze(0) + logB[:, 0, :]
+        psi[:, 0, :] = -1
-        alpha = torch.empty((B, L, K), dtype=self.dtype, device=device)
-        alpha[:, 0, :] = logstart.unsqueeze(0) + logB[:, 0, :]
         for t in range(1, L):
-            prev = alpha[:, t - 1, :].unsqueeze(2) + logA.unsqueeze(0)
-            alpha[:, t, :] = _logsumexp(prev, dim=1) + logB[:, t, :]
+            cand = delta[:, t - 1, :].unsqueeze(2) + logA.unsqueeze(0)  # (N,K,K)
+            best_val, best_idx = cand.max(dim=1)
+            delta[:, t, :] = best_val + logB[:, t, :]
+            psi[:, t, :] = best_idx
-        last_idx = (lengths - 1).clamp(min=0)
-        idx_range = torch.arange(B, device=device)
-        final_alpha = alpha[idx_range, last_idx, :]  # (B, K)
-        seq_loglikes = _logsumexp(final_alpha, dim=1)  # (B,)
-        seq_loglikes = seq_loglikes.detach().cpu().numpy().tolist()
-        return seq_loglikes[0] if single else seq_loglikes
+        last_state = torch.argmax(delta[:, L - 1, :], dim=1)  # (N,)
+        states = torch.empty((N, L), dtype=torch.long, device=device)
-    def viterbi(self, seq: List[float], impute_strategy: str = "ignore", device: Optional[Union[torch.device, str]] = None) -> Tuple[List[int], float]:
-        """
-        Viterbi decode a single sequence. Returns (state_path, log_probability_of_path).
-        """
-        paths, scores = self.batch_viterbi([seq], impute_strategy=impute_strategy, device=device)
-        return paths[0], scores[0]
+        states[:, L - 1] = last_state
+        for t in range(L - 2, -1, -1):
+            states[:, t] = psi[torch.arange(N, device=device), t + 1, states[:, t + 1]]
-    def batch_viterbi(self, data: List[List[float]], impute_strategy: str = "ignore", device: Optional[Union[torch.device, str]] = None) -> Tuple[List[List[int]], List[float]]:
+        return states
+    def decode(
+        self,
+        X: np.ndarray,
+        coords: Optional[np.ndarray] = None,
+        *,
+        decode: str = "marginal",
+        device: Optional[Union[str, torch.device]] = None,
+    ) -> Tuple[np.ndarray, np.ndarray]:
+        """Decode observations into state calls and posterior probabilities.
+        Args:
+            X: Observations array (N, L) or (N, L, C).
+            coords: Optional coordinates aligned to L.
+            decode: Decoding strategy ("marginal" or "viterbi").
+            device: Device specifier.
+        Returns:
+            Tuple of (states, posterior probabilities).
         """
-        Batched Viterbi decoding on padded sequences. Returns (list_of_paths, list_of_scores).
-        Each path is the length of the original sequence.
+        device = self._ensure_device_dtype(device)
+        X = np.asarray(X, dtype=float)
+        if X.ndim == 2:
+            L = X.shape[1]
+        elif X.ndim == 3:
+            L = X.shape[1]
+        else:
+            raise ValueError(f"X must be 2D or 3D; got shape {X.shape}")
+        if coords is None:
+            coords = np.arange(L, dtype=int)
+        coords = np.asarray(coords, dtype=int)
+        if X.ndim == 2:
+            obs = torch.tensor(np.nan_to_num(X, nan=0.0), dtype=self.dtype, device=device)
+            mask = torch.tensor(~np.isnan(X), dtype=torch.bool, device=device)
+        else:
+            obs = torch.tensor(np.nan_to_num(X, nan=0.0), dtype=self.dtype, device=device)
+            mask = torch.tensor(~np.isnan(X), dtype=torch.bool, device=device)
+        gamma = self._forward_backward(obs, mask, coords=coords)
+        if str(decode).lower() == "viterbi":
+            st = self._viterbi(obs, mask, coords=coords)
+        else:
+            st = torch.argmax(gamma, dim=2)
+        return st.detach().cpu().numpy(), gamma.detach().cpu().numpy()
+    # ------------------------- EM fit -------------------------
+    def fit(
+        self,
+        X: np.ndarray,
+        coords: Optional[np.ndarray] = None,
+        *,
+        max_iter: int = 50,
+        tol: float = 1e-4,
+        device: Optional[Union[str, torch.device]] = None,
+        update_start: bool = True,
+        update_trans: bool = True,
+        update_emission: bool = True,
+        verbose: bool = False,
+        **kwargs,
+    ) -> List[float]:
+        """Fit HMM parameters using EM.
+        Args:
+            X: Observations array.
+            coords: Optional coordinate array.
+            max_iter: Maximum EM iterations.
+            tol: Convergence tolerance.
+            device: Device specifier.
+            update_start: Whether to update start probabilities.
+            update_trans: Whether to update transition probabilities.
+            update_emission: Whether to update emission parameters.
+            verbose: Whether to log progress.
+            **kwargs: Additional implementation-specific kwargs.
+        Returns:
+            List of log-likelihood values across iterations.
         """
-        if device is None:
-            device = next(self.parameters()).device
-        elif isinstance(device, str):
-            device = torch.device(device)
+        X = np.asarray(X, dtype=float)
+        if X.ndim not in (2, 3):
+            raise ValueError(f"X must be 2D or 3D; got {X.shape}")
+        L = X.shape[1]
+        if coords is None:
+            coords = np.arange(L, dtype=int)
+        coords = np.asarray(coords, dtype=int)
         device = self._ensure_device_dtype(device)
+        return self.fit_em(
+            X,
+            coords,
+            device=device,
+            max_iter=max_iter,
+            tol=tol,
+            update_start=update_start,
+            update_trans=update_trans,
+            update_emission=update_emission,
+            verbose=verbose,
+            **kwargs,
+        )
-        obs, mask, lengths = self._pad_and_mask(data, device=device, dtype=self.dtype, impute_strategy=impute_strategy)
-        B, L = obs.shape
-        K = self.n_states
-        eps = float(self.eps)
+    def adapt_emissions(
+        self,
+        X: np.ndarray,
+        coords: np.ndarray,
+        *,
+        iters: int = 5,
+        device: Optional[Union[str, torch.device]] = None,
+        freeze_start: bool = True,
+        freeze_trans: bool = True,
+        verbose: bool = False,
+        **kwargs,
+    ) -> List[float]:
+        """Adapt emission parameters while keeping shared structure fixed.
+        Args:
+            X: Observations array.
+            coords: Coordinate array aligned to X.
+            iters: Number of EM iterations.
+            device: Device specifier.
+            freeze_start: Whether to freeze start probabilities.
+            freeze_trans: Whether to freeze transitions.
+            verbose: Whether to log progress.
+            **kwargs: Additional implementation-specific kwargs.
+        Returns:
+            List of log-likelihood values across iterations.
+        """
+        return self.fit(
+            X,
+            coords,
+            max_iter=int(iters),
+            tol=0.0,
+            device=device,
+            update_start=not freeze_start,
+            update_trans=not freeze_trans,
+            update_emission=True,
+            verbose=verbose,
+            **kwargs,
+        )
-        p = self.emission
-        logp = torch.log(p + eps)
-        log1mp = torch.log1p(-p + eps)
-        logB = obs.unsqueeze(-1) * logp.unsqueeze(0).unsqueeze(0) + (1.0 - obs.unsqueeze(-1)) * log1mp.unsqueeze(0).unsqueeze(0)
-        logB = torch.where(mask.unsqueeze(-1), logB, torch.zeros_like(logB))
+    def fit_em(
+        self,
+        X: np.ndarray,
+        coords: np.ndarray,
+        *,
+        device: torch.device,
+        max_iter: int,
+        tol: float,
+        update_start: bool,
+        update_trans: bool,
+        update_emission: bool,
+        verbose: bool,
+        **kwargs,
+    ) -> List[float]:
+        """Run the core EM update loop (subclasses implement).
+        Args:
+            X: Observations array.
+            coords: Coordinate array aligned to X.
+            device: Torch device.
+            max_iter: Maximum iterations.
+            tol: Convergence tolerance.
+            update_start: Whether to update start probabilities.
+            update_trans: Whether to update transitions.
+            update_emission: Whether to update emission parameters.
+            verbose: Whether to log progress.
+            **kwargs: Additional subclass-specific kwargs.
+        Returns:
+            List of log-likelihood values across iterations.
+        """
+        raise NotImplementedError
-        logstart = torch.log(self.start + eps)
-        logA = torch.log(self.trans + eps)
+    # ------------------------- save/load -------------------------
-        # delta (score) and psi (argmax pointers)
-        delta = torch.empty((B, L, K), dtype=self.dtype, device=device)
-        psi = torch.zeros((B, L, K), dtype=torch.long, device=device)
+    def _extra_save_payload(self) -> dict:
+        """Return extra model state to include when saving."""
+        return {}
-        delta[:, 0, :] = logstart.unsqueeze(0) + logB[:, 0, :]
-        psi[:, 0, :] = -1  # sentinel
+    def _load_extra_payload(self, payload: dict, *, device: torch.device):
+        """Load extra model state saved by subclasses.
-        for t in range(1, L):
-            # cand shape (B, i, j)
-            cand = delta[:, t - 1, :].unsqueeze(2) + logA.unsqueeze(0)  # (B, K, K)
-            best_val, best_idx = cand.max(dim=1)  # best over previous i: results (B, j)
-            delta[:, t, :] = best_val + logB[:, t, :]
-            psi[:, t, :] = best_idx  # best previous state index for each (B, j)
-        # backtrack
-        last_idx = (lengths - 1).clamp(min=0)
-        idx_range = torch.arange(B, device=device)
-        final_delta = delta[idx_range, last_idx, :]  # (B, K)
-        best_last_val, best_last_state = final_delta.max(dim=1)  # (B,), (B,)
-        paths = []
-        scores = []
-        for b in range(B):
-            Lb = int(lengths[b].item())
-            if Lb == 0:
-                paths.append([])
-                scores.append(float("-inf"))
-                continue
-            s = int(best_last_state[b].item())
-            path = [s]
-            for t in range(Lb - 1, 0, -1):
-                s = int(psi[b, t, s].item())
-                path.append(s)
-            path.reverse()
-            paths.append(path)
-            scores.append(float(best_last_val[b].item()))
-        return paths, scores
-    def save(self, path: str) -> None:
+        Args:
+            payload: Serialized model payload.
+            device: Torch device for tensors.
         """
-        Save HMM to `path` using torch.save. Stores:
-        - n_states, eps, dtype (string)
-        - start, trans, emission (CPU tensors)
+        return
+    def save(self, path: Union[str, Path]) -> None:
+        """Serialize the model to disk.
+        Args:
+            path: Output path for the serialized model.
         """
+        path = str(path)
         payload = {
+            "hmm_name": getattr(self, "hmm_name", self.__class__.__name__),
+            "class": self.__class__.__name__,
             "n_states": int(self.n_states),
             "eps": float(self.eps),
-            # store dtype as a string like "torch.float64" (portable)
             "dtype": str(self.dtype),
             "start": self.start.detach().cpu(),
             "trans": self.trans.detach().cpu(),
-            "emission": self.emission.detach().cpu(),
         }
+        payload.update(self._extra_save_payload())
         torch.save(payload, path)
     @classmethod
-    def load(cls, path: str, device: Optional[Union[torch.device, str]] = None) -> "HMM":
-        """
-        Load model from `path`. If `device` is provided (str or torch.device),
-        parameters will be moved to that device; otherwise they remain on CPU.
-        Example: model = HMM.load('hmm.pt', device='cuda')
-        """
-        payload = torch.load(path, map_location="cpu")
+    def load(cls, path: Union[str, Path], device: Optional[Union[str, torch.device]] = None):
+        """Load a serialized model from disk.
-        n_states = int(payload.get("n_states"))
-        eps = float(payload.get("eps", 1e-8))
-        dtype_entry = payload.get("dtype", "torch.float64")
+        Args:
+            path: Path to the serialized model.
+            device: Optional device specifier.
-        # Resolve dtype string robustly:
-        # Accept "torch.float64" or "float64" or actual torch.dtype (older payloads)
-        if isinstance(dtype_entry, torch.dtype):
-            torch_dtype = dtype_entry
-        else:
-            # dtype_entry expected to be a string
-            dtype_str = str(dtype_entry)
-            # take last part after dot if present: "torch.float64" -> "float64"
-            name = dtype_str.split(".")[-1]
-            # map to torch dtype if available, else fallback mapping
-            if hasattr(torch, name):
-                torch_dtype = getattr(torch, name)
-            else:
-                fallback = {"float64": torch.float64, "float32": torch.float32, "float16": torch.float16}
-                torch_dtype = fallback.get(name, torch.float64)
-        # Build instance (use resolved dtype)
-        model = cls(n_states=n_states, dtype=torch_dtype, eps=eps)
-        # Determine target device
-        if device is None:
-            device = torch.device("cpu")
-        elif isinstance(device, str):
-            device = torch.device(device)
+        Returns:
+            Loaded HMM instance.
+        """
+        payload = torch.load(str(path), map_location="cpu")
+        hmm_name = payload.get("hmm_name", None)
+        klass = _HMM_REGISTRY.get(hmm_name, cls)
+        dtype_str = str(payload.get("dtype", "torch.float64"))
+        torch_dtype = getattr(torch, dtype_str.split(".")[-1], torch.float64)
+        torch_dtype = _coerce_dtype_for_device(torch_dtype, device)  # <<< NEW
+        model = klass(
+            n_states=int(payload["n_states"]),
+            eps=float(payload.get("eps", 1e-8)),
+            dtype=torch_dtype,
+        )
+        dev = torch.device(device) if isinstance(device, str) else (device or torch.device("cpu"))
+        model.to(dev)
-        # Load params (they were saved on CPU) and cast to model dtype/device
         with torch.no_grad():
-            model.start.data = payload["start"].to(device=device, dtype=model.dtype)
-            model.trans.data = payload["trans"].to(device=device, dtype=model.dtype)
-            model.emission.data = payload["emission"].to(device=device, dtype=model.dtype)
+            model.start.data = payload["start"].to(device=dev, dtype=model.dtype)
+            model.trans.data = payload["trans"].to(device=dev, dtype=model.dtype)
-        # Normalize / coerce shapes just in case
+        model._load_extra_payload(payload, device=dev)
         model._normalize_params()
         return model
+    # ------------------------- interval helpers -------------------------
+    @staticmethod
+    def _runs_from_bool(mask_1d: np.ndarray) -> List[Tuple[int, int]]:
+        """
+        Return runs as (start_idx, end_idx_exclusive) for True segments.
+        """
+        idx = np.nonzero(mask_1d)[0]
+        if idx.size == 0:
+            return []
+        breaks = np.where(np.diff(idx) > 1)[0]
+        starts = np.r_[idx[0], idx[breaks + 1]]
+        ends = np.r_[idx[breaks] + 1, idx[-1] + 1]
+        return list(zip(starts, ends))
+    @staticmethod
+    def _interval_length(coords: np.ndarray, s: int, e: int) -> int:
+        """Genomic length for [s,e) on coords."""
+        if e <= s:
+            return 0
+        return int(coords[e - 1]) - int(coords[s]) + 1
+    @staticmethod
+    def _write_lengths_for_binary_layer(bin_mat: np.ndarray) -> np.ndarray:
+        """
+        For each row, each True-run gets its run-length assigned across that run.
+        Output same shape as bin_mat, int32.
+        """
+        n, L = bin_mat.shape
+        out = np.zeros((n, L), dtype=np.int32)
+        for i in range(n):
+            runs = BaseHMM._runs_from_bool(bin_mat[i].astype(bool))
+            for s, e in runs:
+                out[i, s:e] = e - s
+        return out
+    @staticmethod
+    def _write_lengths_for_state_layer(states: np.ndarray) -> np.ndarray:
+        """
+        For each row, each constant-state run gets run-length assigned across run.
+        Missing values should be -1 and will get 0 length.
+        """
+        n, L = states.shape
+        out = np.zeros((n, L), dtype=np.int32)
+        for i in range(n):
+            row = states[i]
+            valid = row >= 0
+            if not np.any(valid):
+                continue
+            # scan runs
+            s = 0
+            while s < L:
+                if row[s] < 0:
+                    s += 1
+                    continue
+                v = row[s]
+                e = s + 1
+                while e < L and row[e] == v:
+                    e += 1
+                out[i, s:e] = e - s
+                s = e
+        return out
+    # ------------------------- merging -------------------------
+    def merge_intervals_to_new_layer(
+        self,
+        adata,
+        base_layer: str,
+        *,
+        distance_threshold: int,
+        suffix: str = "_merged",
+        overwrite: bool = True,
+    ) -> str:
+        """
+        Merge adjacent 1-intervals in a binary layer if gaps <= distance_threshold (in coords space),
+        writing:
+          - {base_layer}{suffix}
+          - {base_layer}{suffix}_lengths   (run-length in index units)
+        """
+        if base_layer not in adata.layers:
+            raise KeyError(f"Layer '{base_layer}' not found.")
+        coords, coords_are_ints = _safe_int_coords(adata.var_names)
+        arr = np.asarray(adata.layers[base_layer])
+        arr = (arr > 0).astype(np.uint8)
+        merged_name = f"{base_layer}{suffix}"
+        merged_len_name = f"{merged_name}_lengths"
+        if (merged_name in adata.layers or merged_len_name in adata.layers) and not overwrite:
+            raise KeyError(f"Merged outputs exist (use overwrite=True): {merged_name}")
+        n, L = arr.shape
+        out = np.zeros_like(arr, dtype=np.uint8)
+        dt = int(distance_threshold)
+        for i in range(n):
+            ones = np.nonzero(arr[i] != 0)[0]
+            runs = self._runs_from_bool(arr[i] != 0)
+            if not runs:
+                continue
+            ms, me = runs[0]
+            merged_runs = []
+            for s, e in runs[1:]:
+                if coords_are_ints:
+                    gap = int(coords[s]) - int(coords[me - 1]) - 1
+                else:
+                    gap = s - me
+                if gap <= dt:
+                    me = e
+                else:
+                    merged_runs.append((ms, me))
+                    ms, me = s, e
+            merged_runs.append((ms, me))
+            for s, e in merged_runs:
+                out[i, s:e] = 1
+        adata.layers[merged_name] = out
+        adata.layers[merged_len_name] = self._write_lengths_for_binary_layer(out)
+        # bookkeeping
+        key = "hmm_appended_layers"
+        if adata.uns.get(key) is None:
+            adata.uns[key] = []
+        for nm in (merged_name, merged_len_name):
+            if nm not in adata.uns[key]:
+                adata.uns[key].append(nm)
+        return merged_name
+    def write_size_class_layers_from_binary(
+        self,
+        adata,
+        base_layer: str,
+        *,
+        out_prefix: str,
+        feature_ranges: Dict[str, Tuple[float, float]],
+        suffix: str = "",
+        overwrite: bool = True,
+    ) -> List[str]:
+        """
+        Take an existing binary layer (runs represent features) and write size-class layers:
+          - {out_prefix}_{feature}{suffix}
+          - plus lengths layers
+        feature_ranges: name -> (lo, hi) in genomic bp.
+        """
+        if base_layer not in adata.layers:
+            raise KeyError(f"Layer '{base_layer}' not found.")
+        coords, coords_are_ints = _safe_int_coords(adata.var_names)
+        bin_arr = (np.asarray(adata.layers[base_layer]) > 0).astype(np.uint8)
+        n, L = bin_arr.shape
+        created: List[str] = []
+        for feat_name in feature_ranges.keys():
+            nm = f"{out_prefix}_{feat_name}{suffix}"
+            ln = f"{nm}_lengths"
+            if (nm in adata.layers or ln in adata.layers) and not overwrite:
+                continue
+            adata.layers[nm] = np.zeros((n, L), dtype=np.uint8)
+            adata.layers[ln] = np.zeros((n, L), dtype=np.int32)
+            created.extend([nm, ln])
+        for i in range(n):
+            runs = self._runs_from_bool(bin_arr[i] != 0)
+            for s, e in runs:
+                length_bp = self._interval_length(coords, s, e) if coords_are_ints else (e - s)
+                for feat_name, (lo, hi) in feature_ranges.items():
+                    if float(lo) <= float(length_bp) < float(hi):
+                        nm = f"{out_prefix}_{feat_name}{suffix}"
+                        adata.layers[nm][i, s:e] = 1
+                        adata.layers[f"{nm}_lengths"][i, s:e] = e - s
+                        break
+        # fill lengths for each size layer (consistent, even if overlaps)
+        for feat_name in feature_ranges.keys():
+            nm = f"{out_prefix}_{feat_name}{suffix}"
+            adata.layers[f"{nm}_lengths"] = self._write_lengths_for_binary_layer(
+                np.asarray(adata.layers[nm])
+            )
+        key = "hmm_appended_layers"
+        if adata.uns.get(key) is None:
+            adata.uns[key] = []
+        for nm in created:
+            if nm not in adata.uns[key]:
+                adata.uns[key].append(nm)
+        return created
+    # ------------------------- AnnData annotation -------------------------
+    @staticmethod
+    def _resolve_pos_mask_for_methbase(subset, ref: str, methbase: str) -> Optional[np.ndarray]:
+        """
+        Local helper to resolve per-base masks from subset.var.* columns.
+        Returns a boolean np.ndarray of length subset.n_vars or None.
+        """
+        key = str(methbase).strip().lower()
+        var = subset.var
+        def _has(col: str) -> bool:
+            """Return True when a column exists on subset.var."""
+            return col in var.columns
+        if key in ("a",):
+            col = f"{ref}_strand_FASTA_base"
+            if not _has(col):
+                return None
+            return np.asarray(var[col] == "A")
+        if key in ("c", "any_c", "anyc", "any-c"):
+            for col in (f"{ref}_any_C_site", f"{ref}_C_site"):
+                if _has(col):
+                    return np.asarray(var[col])
+            return None
+        if key in ("gpc", "gpc_site", "gpc-site"):
+            col = f"{ref}_GpC_site"
+            if not _has(col):
+                return None
+            return np.asarray(var[col])
+        if key in ("cpg", "cpg_site", "cpg-site"):
+            col = f"{ref}_CpG_site"
+            if not _has(col):
+                return None
+            return np.asarray(var[col])
+        alt = f"{ref}_{methbase}_site"
+        if not _has(alt):
+            return None
+        return np.asarray(var[alt])
     def annotate_adata(
         self,
         adata,
-        obs_column: str,
-        layer: Optional[str] = None,
-        footprints: Optional[bool] = None,
-        accessible_patches: Optional[bool] = None,
-        cpg: Optional[bool] = None,
-        methbases: Optional[List[str]] = None,
-        threshold: Optional[float] = None,
-        device: Optional[Union[str, torch.device]] = None,
-        batch_size: Optional[int] = None,
-        use_viterbi: Optional[bool] = None,
-        in_place: bool = True,
-        verbose: bool = True,
+        *,
+        prefix: str,
+        X: np.ndarray,
+        coords: np.ndarray,
+        var_mask: np.ndarray,
+        span_fill: bool = True,
+        config=None,
+        decode: str = "marginal",
+        write_posterior: bool = True,
+        posterior_state: str = "Modified",
+        feature_sets: Optional[Dict[str, Dict[str, Any]]] = None,
+        prob_threshold: float = 0.5,
         uns_key: str = "hmm_appended_layers",
-        config: Optional[Union[dict, "ExperimentConfig"]] = None,  # NEW: config/dict accepted
         uns_flag: str = "hmm_annotated",
-        force_redo: bool = False
+        force_redo: bool = False,
+        device: Optional[Union[str, torch.device]] = None,
+        **kwargs,
     ):
+        """Decode and annotate an AnnData object with HMM-derived layers.
+        Args:
+            adata: AnnData to annotate.
+            prefix: Prefix for newly written layers.
+            X: Observations array for decoding.
+            coords: Coordinate array aligned to X.
+            var_mask: Boolean mask for positions in adata.var.
+            span_fill: Whether to fill missing spans.
+            config: Optional config for naming and state selection.
+            decode: Decode method ("marginal" or "viterbi").
+            write_posterior: Whether to write posterior probabilities.
+            posterior_state: State label to write posterior for.
+            feature_sets: Optional feature set definition for size classes.
+            prob_threshold: Posterior probability threshold for binary calls.
+            uns_key: .uns key to track appended layers.
+            uns_flag: .uns flag to mark annotations.
+            force_redo: Whether to overwrite existing layers.
+            device: Device specifier.
+            **kwargs: Additional parameters for specialized workflows.
+        Returns:
+            List of created layer names or None if skipped.
         """
-        Annotate an AnnData with HMM-derived features (in adata.obs and adata.layers).
-        Parameters
-        ----------
-        config : optional ExperimentConfig instance or plain dict
-            When provided, the following keys (if present) are used to override defaults:
-            - hmm_feature_sets : dict (canonical feature set structure) OR a JSON/string repr
-            - hmm_annotation_threshold : float
-            - hmm_batch_size : int
-            - hmm_use_viterbi : bool
-            - hmm_methbases : list
-            - footprints / accessible_patches / cpg (booleans)
-        Other keyword args override config values if explicitly provided.
-        """
-        import json, ast, warnings
-        import numpy as _np
-        import torch as _torch
-        from tqdm import trange, tqdm as _tqdm
-        # Only run if not already performed
-        already = bool(adata.uns.get(uns_flag, False))
-        if (already and not force_redo):
-            # QC already performed; nothing to do
-            return None if in_place else adata
-        # small helpers
-        def _try_json_or_literal(s):
-            if s is None:
-                return None
-            if not isinstance(s, str):
-                return s
-            s0 = s.strip()
-            if s0 == "":
-                return None
-            try:
-                return json.loads(s0)
-            except Exception:
-                pass
-            try:
-                return ast.literal_eval(s0)
-            except Exception:
-                return s
-        def _coerce_bool(x):
-            if x is None:
-                return False
-            if isinstance(x, bool):
-                return x
-            if isinstance(x, (int, float)):
-                return bool(x)
-            s = str(x).strip().lower()
-            return s in ("1", "true", "t", "yes", "y", "on")
-        def normalize_hmm_feature_sets(raw):
-            if raw is None:
-                return {}
-            parsed = raw
-            if isinstance(raw, str):
-                parsed = _try_json_or_literal(raw)
-            if not isinstance(parsed, dict):
-                return {}
-            def _coerce_bound(x):
-                if x is None:
-                    return None
-                if isinstance(x, (int, float)):
-                    return float(x)
-                s = str(x).strip().lower()
-                if s in ("inf", "infty", "infinite", "np.inf"):
-                    return _np.inf
-                if s in ("none", ""):
-                    return None
-                try:
-                    return float(x)
-                except Exception:
-                    return None
-            def _coerce_feature_map(feats):
-                out = {}
-                if not isinstance(feats, dict):
-                    return out
-                for fname, rng in feats.items():
-                    if rng is None:
-                        out[fname] = (0.0, _np.inf)
+        # skip logic
+        if bool(adata.uns.get(uns_flag, False)) and not force_redo:
+            return None
+        if adata.uns.get(uns_key) is None:
+            adata.uns[uns_key] = []
+        appended = list(adata.uns.get(uns_key, [])) if adata.uns.get(uns_key) is not None else []
+        X = np.asarray(X, dtype=float)
+        coords = np.asarray(coords, dtype=int)
+        var_mask = np.asarray(var_mask, dtype=bool)
+        if var_mask.shape[0] != adata.n_vars:
+            raise ValueError(f"var_mask length {var_mask.shape[0]} != adata.n_vars {adata.n_vars}")
+        # decode
+        states, gamma = self.decode(
+            X, coords, decode=decode, device=device
+        )  # states (N,L), gamma (N,L,K)
+        N, L = states.shape
+        if N != adata.n_obs:
+            raise ValueError(f"X has N={N} rows but adata.n_obs={adata.n_obs}")
+        # map coords -> full-var indices for span_fill
+        full_coords, full_int = _safe_int_coords(adata.var_names)
+        # ---- write posterior + states on masked columns only ----
+        masked_idx = np.nonzero(var_mask)[0]
+        masked_coords, _ = _safe_int_coords(adata.var_names[var_mask])
+        # build mapping from coords order -> masked column order
+        coord_to_pos_in_decoded = {int(c): i for i, c in enumerate(coords.tolist())}
+        take = np.array(
+            [coord_to_pos_in_decoded.get(int(c), -1) for c in masked_coords.tolist()], dtype=int
+        )
+        good = take >= 0
+        masked_idx = masked_idx[good]
+        take = take[good]
+        # states layer
+        states_name = f"{prefix}_states"
+        if states_name not in adata.layers:
+            adata.layers[states_name] = np.full((adata.n_obs, adata.n_vars), -1, dtype=np.int8)
+        adata.layers[states_name][:, masked_idx] = states[:, take].astype(np.int8)
+        if states_name not in appended:
+            appended.append(states_name)
+        # posterior layer (requested state)
+        if write_posterior:
+            t_idx = self.resolve_target_state_index(posterior_state)
+            post = gamma[:, :, t_idx].astype(np.float32)
+            post_name = f"{prefix}_posterior_{str(posterior_state).strip().lower().replace(' ', '_').replace('-', '_')}"
+            if post_name not in adata.layers:
+                adata.layers[post_name] = np.zeros((adata.n_obs, adata.n_vars), dtype=np.float32)
+            adata.layers[post_name][:, masked_idx] = post[:, take]
+            if post_name not in appended:
+                appended.append(post_name)
+        # ---- feature layers ----
+        if feature_sets is None:
+            cfgd = self._cfg_to_dict(config)
+            feature_sets = normalize_hmm_feature_sets(cfgd.get("hmm_feature_sets", None))
+        if not feature_sets:
+            adata.uns[uns_key] = appended
+            adata.uns[uns_flag] = True
+            return None
+        # allocate outputs
+        for group, fs in feature_sets.items():
+            fmap = fs.get("features", {}) or {}
+            if not fmap:
+                continue
+            all_layer = f"{prefix}_all_{group}_features"
+            if all_layer not in adata.layers:
+                adata.layers[all_layer] = np.zeros((adata.n_obs, adata.n_vars), dtype=np.uint8)
+            if f"{all_layer}_lengths" not in adata.layers:
+                adata.layers[f"{all_layer}_lengths"] = np.zeros(
+                    (adata.n_obs, adata.n_vars), dtype=np.int32
+                )
+            for nm in (all_layer, f"{all_layer}_lengths"):
+                if nm not in appended:
+                    appended.append(nm)
+            for feat in fmap.keys():
+                nm = f"{prefix}_{feat}"
+                if nm not in adata.layers:
+                    adata.layers[nm] = np.zeros(
+                        (adata.n_obs, adata.n_vars),
+                        dtype=np.int32 if nm.endswith("_lengths") else np.uint8,
+                    )
+                if f"{nm}_lengths" not in adata.layers:
+                    adata.layers[f"{nm}_lengths"] = np.zeros(
+                        (adata.n_obs, adata.n_vars), dtype=np.int32
+                    )
+                for outnm in (nm, f"{nm}_lengths"):
+                    if outnm not in appended:
+                        appended.append(outnm)
+            # classify runs per row
+            target_idx = self.resolve_target_state_index(fs.get("state", "Modified"))
+            membership = (
+                (states == target_idx)
+                if str(decode).lower() == "viterbi"
+                else (gamma[:, :, target_idx] >= float(prob_threshold))
+            )
+            for i in range(N):
+                runs = self._runs_from_bool(membership[i].astype(bool))
+                for s, e in runs:
+                    # genomic length in coords space
+                    glen = int(coords[e - 1]) - int(coords[s]) + 1 if e > s else 0
+                    if glen <= 0:
+                        continue
+                    # pick feature bin
+                    chosen = None
+                    for feat_name, (lo, hi) in fmap.items():
+                        if float(lo) <= float(glen) < float(hi):
+                            chosen = feat_name
+                            break
+                    if chosen is None:
                         continue
-                    if isinstance(rng, (list, tuple)) and len(rng) >= 2:
-                        lo = _coerce_bound(rng[0]) or 0.0
-                        hi = _coerce_bound(rng[1])
-                        if hi is None:
-                            hi = _np.inf
-                        out[fname] = (float(lo), float(hi) if not _np.isinf(hi) else _np.inf)
+                    # convert span to indices in full var grid
+                    if span_fill and full_int:
+                        left = int(np.searchsorted(full_coords, int(coords[s]), side="left"))
+                        right = int(np.searchsorted(full_coords, int(coords[e - 1]), side="right"))
+                        if left >= right:
+                            continue
+                        adata.layers[f"{prefix}_{chosen}"][i, left:right] = 1
+                        adata.layers[f"{prefix}_all_{group}_features"][i, left:right] = 1
                     else:
-                        val = _coerce_bound(rng)
-                        out[fname] = (0.0, float(val) if val is not None else _np.inf)
-                return out
-            canonical = {}
-            for grp, info in parsed.items():
-                if not isinstance(info, dict):
-                    feats = _coerce_feature_map(info)
-                    canonical[grp] = {"features": feats, "state": "Modified"}
-                    continue
-                feats = _coerce_feature_map(info.get("features", info.get("ranges", {})))
-                state = info.get("state", info.get("label", "Modified"))
-                canonical[grp] = {"features": feats, "state": state}
-            return canonical
-        # ---------- resolve config dict ----------
-        merged_cfg = {}
-        if config is not None:
-            if hasattr(config, "to_dict") and callable(getattr(config, "to_dict")):
-                merged_cfg = dict(config.to_dict())
-            elif isinstance(config, dict):
-                merged_cfg = dict(config)
-            else:
-                try:
-                    merged_cfg = {k: getattr(config, k) for k in dir(config) if k.startswith("hmm_")}
-                except Exception:
-                    merged_cfg = {}
-        def _pick(key, local_val, fallback=None):
-            if local_val is not None:
-                return local_val
-            if key in merged_cfg and merged_cfg[key] is not None:
-                return merged_cfg[key]
-            alt = f"hmm_{key}"
-            if alt in merged_cfg and merged_cfg[alt] is not None:
-                return merged_cfg[alt]
-            return fallback
-        # coerce booleans robustly
-        footprints = _coerce_bool(_pick("footprints", footprints, merged_cfg.get("footprints", False)))
-        accessible_patches = _coerce_bool(_pick("accessible_patches", accessible_patches, merged_cfg.get("accessible_patches", False)))
-        cpg = _coerce_bool(_pick("cpg", cpg, merged_cfg.get("cpg", False)))
-        threshold = float(_pick("threshold", threshold, merged_cfg.get("hmm_annotation_threshold", 0.5)))
-        batch_size = int(_pick("batch_size", batch_size, merged_cfg.get("hmm_batch_size", 1024)))
-        use_viterbi = _coerce_bool(_pick("use_viterbi", use_viterbi, merged_cfg.get("hmm_use_viterbi", False)))
-        methbases = merged_cfg.get("hmm_methbases", None)
-        # normalize whitespace/case for human-friendly inputs (but keep original tokens as given)
-        methbases = [str(m).strip() for m in methbases if m is not None]
-        if verbose:
-            print("DEBUG: final methbases list =", methbases)
-        # resolve feature sets: prefer canonical if it yields non-empty mapping, otherwise fall back to boolean defaults
-        feature_sets = {}
-        if "hmm_feature_sets" in merged_cfg and merged_cfg.get("hmm_feature_sets") is not None:
-            cand = normalize_hmm_feature_sets(merged_cfg.get("hmm_feature_sets"))
-            if isinstance(cand, dict) and len(cand) > 0:
-                feature_sets = cand
+                        # only fill at masked indices
+                        cols = masked_idx[
+                            (masked_coords >= coords[s]) & (masked_coords <= coords[e - 1])
+                        ]
+                        if cols.size == 0:
+                            continue
+                        adata.layers[f"{prefix}_{chosen}"][i, cols] = 1
+                        adata.layers[f"{prefix}_all_{group}_features"][i, cols] = 1
+            # lengths derived from binary
+            adata.layers[f"{prefix}_all_{group}_features_lengths"] = (
+                self._write_lengths_for_binary_layer(
+                    np.asarray(adata.layers[f"{prefix}_all_{group}_features"])
+                )
+            )
+            for feat in fmap.keys():
+                nm = f"{prefix}_{feat}"
+                adata.layers[f"{nm}_lengths"] = self._write_lengths_for_binary_layer(
+                    np.asarray(adata.layers[nm])
+                )
+        adata.uns[uns_key] = appended
+        adata.uns[uns_flag] = True
+        return None
+    # ------------------------- row-copy helper (workflow uses it) -------------------------
+    def _ensure_final_layer_and_assign(
+        self, final_adata, layer_name: str, subset_idx_mask: np.ndarray, sub_data
+    ):
+        """
+        Assign rows from sub_data into final_adata.layers[layer_name] for rows where subset_idx_mask is True.
+        Handles dense arrays. If you want sparse support, add it here.
+        """
+        n_final_obs, n_vars = final_adata.shape
+        final_rows = np.nonzero(np.asarray(subset_idx_mask).astype(bool))[0]
+        sub_arr = np.asarray(sub_data)
+        if layer_name not in final_adata.layers:
+            final_adata.layers[layer_name] = np.zeros((n_final_obs, n_vars), dtype=sub_arr.dtype)
+        final_arr = np.asarray(final_adata.layers[layer_name])
+        if sub_arr.shape[0] != final_rows.size:
+            raise ValueError(f"Sub rows {sub_arr.shape[0]} != mask sum {final_rows.size}")
+        final_arr[final_rows, :] = sub_arr
+        final_adata.layers[layer_name] = final_arr
+# =============================================================================
+# Single-channel Bernoulli HMM
+# =============================================================================
+@register_hmm("single")
+class SingleBernoulliHMM(BaseHMM):
+    """
+    Bernoulli emission per state:
+      emission[k] = P(obs==1 | state=k)
+    """
+    def __init__(
+        self,
+        n_states: int = 2,
+        init_emission: Optional[Sequence[float]] = None,
+        eps: float = 1e-8,
+        dtype: torch.dtype = torch.float64,
+    ):
+        """Initialize a single-channel Bernoulli HMM.
+        Args:
+            n_states: Number of hidden states.
+            init_emission: Initial emission probabilities per state.
+            eps: Smoothing epsilon for probabilities.
+            dtype: Torch dtype for parameters.
+        """
+        super().__init__(n_states=n_states, eps=eps, dtype=dtype)
+        if init_emission is None:
+            em = np.full((self.n_states,), 0.5, dtype=float)
+        else:
+            em = np.asarray(init_emission, dtype=float).reshape(-1)[: self.n_states]
+            if em.size != self.n_states:
+                em = np.full((self.n_states,), 0.5, dtype=float)
+        self.emission = nn.Parameter(torch.tensor(em, dtype=self.dtype), requires_grad=False)
+        self._normalize_emission()
+    @classmethod
+    def from_config(cls, cfg, *, override=None, device=None):
+        """Create a single-channel Bernoulli HMM from config.
+        Args:
+            cfg: Configuration mapping or object.
+            override: Override values to apply.
+            device: Optional device specifier.
+        Returns:
+            Initialized SingleBernoulliHMM instance.
+        """
+        merged = cls._cfg_to_dict(cfg)
+        if override:
+            merged.update(override)
+        n_states = int(merged.get("hmm_n_states", 2))
+        eps = float(merged.get("hmm_eps", 1e-8))
+        dtype = _resolve_dtype(merged.get("hmm_dtype", None))
+        dtype = _coerce_dtype_for_device(dtype, device)  # <<< NEW
+        init_em = merged.get("hmm_init_emission_probs", merged.get("hmm_init_emission", None))
+        model = cls(n_states=n_states, init_emission=init_em, eps=eps, dtype=dtype)
+        if device is not None:
+            model.to(torch.device(device) if isinstance(device, str) else device)
+        model._persisted_cfg = merged
+        return model
+    def _normalize_emission(self):
+        """Normalize and clamp emission probabilities in-place."""
+        with torch.no_grad():
+            self.emission.data = self.emission.data.reshape(-1)
+            if self.emission.data.numel() != self.n_states:
+                self.emission.data = torch.full(
+                    (self.n_states,), 0.5, dtype=self.dtype, device=self.emission.device
+                )
+            self.emission.data = self.emission.data.clamp(min=self.eps, max=1.0 - self.eps)
+    def _ensure_device_dtype(self, device=None) -> torch.device:
+        """Move emission parameters to the requested device/dtype."""
+        device = super()._ensure_device_dtype(device)
+        self.emission.data = self.emission.data.to(device=device, dtype=self.dtype)
+        return device
+    def _state_modified_score(self) -> torch.Tensor:
+        """Return per-state modified scores for ranking."""
+        return self.emission.detach()
+    def _log_emission(self, obs: torch.Tensor, mask: torch.Tensor) -> torch.Tensor:
+        """
+        obs: (N,L), mask: (N,L) -> logB: (N,L,K)
+        """
+        p = self.emission  # (K,)
+        logp = torch.log(p + self.eps)
+        log1mp = torch.log1p(-p + self.eps)
+        o = obs.unsqueeze(-1)  # (N,L,1)
+        logB = o * logp.view(1, 1, -1) + (1.0 - o) * log1mp.view(1, 1, -1)
+        logB = torch.where(mask.unsqueeze(-1), logB, torch.zeros_like(logB))
+        return logB
+    def _extra_save_payload(self) -> dict:
+        """Return extra payload data for serialization."""
+        return {"emission": self.emission.detach().cpu()}
+    def _load_extra_payload(self, payload: dict, *, device: torch.device):
+        """Load serialized emission parameters.
+        Args:
+            payload: Serialized payload dictionary.
+            device: Target torch device.
+        """
+        with torch.no_grad():
+            self.emission.data = payload["emission"].to(device=device, dtype=self.dtype)
+        self._normalize_emission()
+    def fit_em(
+        self,
+        X: np.ndarray,
+        coords: np.ndarray,
+        *,
+        device: torch.device,
+        max_iter: int,
+        tol: float,
+        update_start: bool,
+        update_trans: bool,
+        update_emission: bool,
+        verbose: bool,
+        **kwargs,
+    ) -> List[float]:
+        """Run EM updates for a single-channel Bernoulli HMM.
+        Args:
+            X: Observations array (N, L).
+            coords: Coordinate array aligned to X.
+            device: Torch device.
+            max_iter: Maximum iterations.
+            tol: Convergence tolerance.
+            update_start: Whether to update start probabilities.
+            update_trans: Whether to update transitions.
+            update_emission: Whether to update emission parameters.
+            verbose: Whether to log progress.
+            **kwargs: Additional implementation-specific kwargs.
+        Returns:
+            List of log-likelihood proxy values.
+        """
+        X = np.asarray(X, dtype=float)
+        if X.ndim != 2:
+            raise ValueError("SingleBernoulliHMM expects X shape (N,L).")
+        obs = torch.tensor(np.nan_to_num(X, nan=0.0), dtype=self.dtype, device=device)
+        mask = torch.tensor(~np.isnan(X), dtype=torch.bool, device=device)
+        eps = float(self.eps)
+        K = self.n_states
+        N, L = obs.shape
+        hist: List[float] = []
+        for it in range(1, int(max_iter) + 1):
+            gamma = self._forward_backward(obs, mask)  # (N,L,K)
+            # log-likelihood proxy
+            ll_proxy = float(torch.sum(torch.log(torch.clamp(gamma.sum(dim=2), min=eps))).item())
+            hist.append(ll_proxy)
+            # expected start
+            start_acc = gamma[:, 0, :].sum(dim=0)  # (K,)
+            # expected transitions xi
+            logB = self._log_emission(obs, mask)
+            logA = torch.log(self.trans + eps)
+            alpha = torch.empty((N, L, K), dtype=self.dtype, device=device)
+            alpha[:, 0, :] = torch.log(self.start + eps).unsqueeze(0) + logB[:, 0, :]
+            for t in range(1, L):
+                prev = alpha[:, t - 1, :].unsqueeze(2) + logA.unsqueeze(0)
+                alpha[:, t, :] = _logsumexp(prev, dim=1) + logB[:, t, :]
+            beta = torch.empty((N, L, K), dtype=self.dtype, device=device)
+            beta[:, L - 1, :] = 0.0
+            for t in range(L - 2, -1, -1):
+                temp = logA.unsqueeze(0) + (logB[:, t + 1, :] + beta[:, t + 1, :]).unsqueeze(1)
+                beta[:, t, :] = _logsumexp(temp, dim=2)
+            trans_acc = torch.zeros((K, K), dtype=self.dtype, device=device)
+            for t in range(L - 1):
+                valid_t = (mask[:, t] & mask[:, t + 1]).float().view(N, 1, 1)
+                log_xi = (
+                    alpha[:, t, :].unsqueeze(2)
+                    + logA.unsqueeze(0)
+                    + (logB[:, t + 1, :] + beta[:, t + 1, :]).unsqueeze(1)
+                )
+                log_norm = _logsumexp(log_xi.view(N, -1), dim=1).view(N, 1, 1)
+                xi = (log_xi - log_norm).exp() * valid_t
+                trans_acc += xi.sum(dim=0)
+            # emission update
+            mask_f = mask.float().unsqueeze(-1)  # (N,L,1)
+            emit_num = (gamma * obs.unsqueeze(-1) * mask_f).sum(dim=(0, 1))  # (K,)
+            emit_den = (gamma * mask_f).sum(dim=(0, 1))  # (K,)
+            with torch.no_grad():
+                if update_start:
+                    new_start = start_acc + eps
+                    self.start.data = new_start / new_start.sum()
+                if update_trans:
+                    new_trans = trans_acc + eps
+                    rs = new_trans.sum(dim=1, keepdim=True)
+                    rs[rs == 0.0] = 1.0
+                    self.trans.data = new_trans / rs
+                if update_emission:
+                    new_em = (emit_num + eps) / (emit_den + 2.0 * eps)
+                    self.emission.data = new_em.clamp(min=eps, max=1.0 - eps)
+            self._normalize_params()
+            self._normalize_emission()
-        if not feature_sets:
             if verbose:
-                print("[HMM.annotate_adata] no feature sets configured; nothing to append.")
-            return None if in_place else adata
-        if verbose:
-            print("[HMM.annotate_adata] resolved feature sets:", list(feature_sets.keys()))
-        # copy vs in-place
-        if not in_place:
-            adata = adata.copy()
-        # prepare column names
-        all_features = []
-        combined_prefix = "Combined"
-        for key, fs in feature_sets.items():
-            feats = fs.get("features", {})
-            if key == "cpg":
-                all_features += [f"CpG_{f}" for f in feats]
-                all_features.append(f"CpG_all_{key}_features")
-            else:
-                for methbase in methbases:
-                    all_features += [f"{methbase}_{f}" for f in feats]
-                    all_features.append(f"{methbase}_all_{key}_features")
-                if len(methbases) > 1:
-                    all_features += [f"{combined_prefix}_{f}" for f in feats]
-                    all_features.append(f"{combined_prefix}_all_{key}_features")
-        # initialize obs columns (unique lists per row)
-        n_rows = adata.shape[0]
-        for feature in all_features:
-            if feature not in adata.obs.columns:
-                adata.obs[feature] = [[] for _ in range(n_rows)]
-            if f"{feature}_distances" not in adata.obs.columns:
-                adata.obs[f"{feature}_distances"] = [None] * n_rows
-            if f"n_{feature}" not in adata.obs.columns:
-                adata.obs[f"n_{feature}"] = -1
-        appended_layers: List[str] = []
-        # device management
-        if device is None:
-            device = next(self.parameters()).device
-        elif isinstance(device, str):
-            device = _torch.device(device)
-        self.to(device)
+                logger.info(
+                    "[SingleBernoulliHMM.fit] iter=%s ll_proxy=%.6f",
+                    it,
+                    hist[-1],
+                )
+            if len(hist) > 1 and abs(hist[-1] - hist[-2]) < float(tol):
+                break
+        return hist
+    def adapt_emissions(
+        self,
+        X: np.ndarray,
+        coords: Optional[np.ndarray] = None,
+        *,
+        device: Optional[Union[str, torch.device]] = None,
+        iters: Optional[int] = None,
+        max_iter: Optional[int] = None,  # alias for your trainer
+        verbose: bool = False,
+        **kwargs,
+    ):
+        """Adapt emissions with legacy parameter names.
+        Args:
+            X: Observations array.
+            coords: Optional coordinate array.
+            device: Device specifier.
+            iters: Number of iterations.
+            max_iter: Alias for iters.
+            verbose: Whether to log progress.
+            **kwargs: Additional kwargs forwarded to BaseHMM.adapt_emissions.
+        Returns:
+            List of log-likelihood values.
+        """
+        if iters is None:
+            iters = int(max_iter) if max_iter is not None else int(kwargs.pop("iters", 5))
+        return super().adapt_emissions(
+            np.asarray(X, dtype=float),
+            coords if coords is not None else None,
+            iters=int(iters),
+            device=device,
+            verbose=verbose,
+        )
+# =============================================================================
+# Multi-channel Bernoulli HMM (union coordinate grid)
+# =============================================================================
+@register_hmm("multi")
+class MultiBernoulliHMM(BaseHMM):
+    """
+    Multi-channel independent Bernoulli:
+      emission[k,c] = P(obs_c==1 | state=k)
+    X must be (N,L,C) on a union coordinate grid; NaN per-channel allowed.
+    """
+    def __init__(
+        self,
+        n_states: int = 2,
+        n_channels: int = 2,
+        init_emission: Optional[Any] = None,
+        eps: float = 1e-8,
+        dtype: torch.dtype = torch.float64,
+    ):
+        """Initialize a multi-channel Bernoulli HMM.
+        Args:
+            n_states: Number of hidden states.
+            n_channels: Number of observed channels.
+            init_emission: Initial emission probabilities.
+            eps: Smoothing epsilon for probabilities.
+            dtype: Torch dtype for parameters.
+        """
+        super().__init__(n_states=n_states, eps=eps, dtype=dtype)
+        self.n_channels = int(n_channels)
+        if self.n_channels < 1:
+            raise ValueError("n_channels must be >=1")
-        # helpers ---------------------------------------------------------------
-        def _ensure_2d_array_like(matrix):
-            arr = _np.asarray(matrix)
+        if init_emission is None:
+            em = np.full((self.n_states, self.n_channels), 0.5, dtype=float)
+        else:
+            arr = np.asarray(init_emission, dtype=float)
             if arr.ndim == 1:
-                arr = arr[_np.newaxis, :]
-            elif arr.ndim > 2:
-                # squeeze trailing singletons
-                while arr.ndim > 2 and arr.shape[-1] == 1:
-                    arr = _np.squeeze(arr, axis=-1)
-                if arr.ndim != 2:
-                    raise ValueError(f"Expected 2D sequence matrix; got array with shape {arr.shape}")
-            return arr
-        def calculate_batch_distances(intervals_list, threshold_local=0.9):
-            results_local = []
-            for intervals in intervals_list:
-                if not isinstance(intervals, list) or len(intervals) == 0:
-                    results_local.append([])
-                    continue
-                valid = [iv for iv in intervals if iv[2] > threshold_local]
-                if len(valid) <= 1:
-                    results_local.append([])
-                    continue
-                valid = sorted(valid, key=lambda x: x[0])
-                dists = [(valid[i + 1][0] - (valid[i][0] + valid[i][1])) for i in range(len(valid) - 1)]
-                results_local.append(dists)
-            return results_local
-        def classify_batch_local(predicted_states_batch, probabilities_batch, coordinates, classification_mapping, target_state="Modified"):
-            # Accept numpy arrays or torch tensors
-            if isinstance(predicted_states_batch, _torch.Tensor):
-                pred_np = predicted_states_batch.detach().cpu().numpy()
+                arr = arr.reshape(-1, 1)
+                em = np.repeat(arr[: self.n_states, :], self.n_channels, axis=1)
             else:
-                pred_np = _np.asarray(predicted_states_batch)
-            if isinstance(probabilities_batch, _torch.Tensor):
-                probs_np = probabilities_batch.detach().cpu().numpy()
-            else:
-                probs_np = _np.asarray(probabilities_batch)
-            batch_size, L = pred_np.shape
-            all_classifications_local = []
-            # allow caller to pass arbitrary state labels mapping; default two-state mapping:
-            state_labels = ["Non-Modified", "Modified"]
-            try:
-                target_idx = state_labels.index(target_state)
-            except ValueError:
-                target_idx = 1  # fallback
-            for b in range(batch_size):
-                predicted_states = pred_np[b]
-                probabilities = probs_np[b]
-                regions = []
-                current_start, current_length, current_probs = None, 0, []
-                for i, state_index in enumerate(predicted_states):
-                    state_prob = float(probabilities[i][state_index])
-                    if state_index == target_idx:
-                        if current_start is None:
-                            current_start = i
-                        current_length += 1
-                        current_probs.append(state_prob)
-                    elif current_start is not None:
-                        regions.append((current_start, current_length, float(_np.mean(current_probs))))
-                        current_start, current_length, current_probs = None, 0, []
-                if current_start is not None:
-                    regions.append((current_start, current_length, float(_np.mean(current_probs))))
-                final = []
-                for start, length, prob in regions:
-                    # compute genomic length try/catch
-                    try:
-                        feature_length = int(coordinates[start + length - 1]) - int(coordinates[start]) + 1
-                    except Exception:
-                        feature_length = int(length)
-                    # classification_mapping values are (lo, hi) tuples or lists
-                    label = None
-                    for ftype, rng in classification_mapping.items():
-                        lo, hi = rng[0], rng[1]
-                        try:
-                            if lo <= feature_length < hi:
-                                label = ftype
-                                break
-                        except Exception:
-                            continue
-                    if label is None:
-                        # fallback to first mapping key or 'unknown'
-                        label = next(iter(classification_mapping.keys()), "feature")
-                    # Store reported start coordinate in same coordinate system as `coordinates`.
-                    try:
-                        genomic_start = int(coordinates[start])
-                    except Exception:
-                        genomic_start = int(start)
-                    final.append((genomic_start, feature_length, label, prob))
-                all_classifications_local.append(final)
-            return all_classifications_local
-        # -----------------------------------------------------------------------
-        # Ensure obs_column is categorical-like for iteration
-        sseries = adata.obs[obs_column]
-        if not pd.api.types.is_categorical_dtype(sseries):
-            sseries = sseries.astype("category")
-        references = list(sseries.cat.categories)
-        ref_iter = references if not verbose else _tqdm(references, desc="Processing References")
-        for ref in ref_iter:
-            # subset reads with this obs_column value
-            ref_mask = adata.obs[obs_column] == ref
-            ref_subset = adata[ref_mask].copy()
-            combined_mask = None
-            # per-methbase processing
-            for methbase in methbases:
-                key_lower = methbase.strip().lower()
-                # map several common synonyms -> canonical lookup
-                if key_lower in ("a",):
-                    pos_mask = ref_subset.var.get(f"{ref}_strand_FASTA_base") == "A"
-                elif key_lower in ("c", "any_c", "anyc", "any-c"):
-                    # unify 'C' or 'any_C' names to the any_C var column
-                    pos_mask = ref_subset.var.get(f"{ref}_any_C_site") == True
-                elif key_lower in ("gpc", "gpc_site", "gpc-site"):
-                    pos_mask = ref_subset.var.get(f"{ref}_GpC_site") == True
-                elif key_lower in ("cpg", "cpg_site", "cpg-site"):
-                    pos_mask = ref_subset.var.get(f"{ref}_CpG_site") == True
-                else:
-                    # try a best-effort: if a column named f"{ref}_{methbase}_site" exists, use it
-                    alt_col = f"{ref}_{methbase}_site"
-                    pos_mask = ref_subset.var.get(alt_col, None)
+                em = arr[: self.n_states, : self.n_channels]
+            if em.shape != (self.n_states, self.n_channels):
+                em = np.full((self.n_states, self.n_channels), 0.5, dtype=float)
-                if pos_mask is None:
-                    continue
-                combined_mask = pos_mask if combined_mask is None else (combined_mask | pos_mask)
+        self.emission = nn.Parameter(torch.tensor(em, dtype=self.dtype), requires_grad=False)
+        self._normalize_emission()
-                if pos_mask.sum() == 0:
-                    continue
+    @classmethod
+    def from_config(cls, cfg, *, override=None, device=None):
+        """Create a multi-channel Bernoulli HMM from config.
-                sub = ref_subset[:, pos_mask]
-                # choose matrix
-                matrix = sub.layers[layer] if (layer and layer in sub.layers) else sub.X
-                matrix = _ensure_2d_array_like(matrix)
-                n_reads = matrix.shape[0]
+        Args:
+            cfg: Configuration mapping or object.
+            override: Override values to apply.
+            device: Optional device specifier.
-                # coordinates for this sub (try to convert to ints, else fallback to indices)
-                try:
-                    coords = _np.asarray(sub.var_names, dtype=int)
-                except Exception:
-                    coords = _np.arange(sub.shape[1], dtype=int)
-                # chunked processing
-                chunk_iter = range(0, n_reads, batch_size)
-                if verbose:
-                    chunk_iter = _tqdm(list(chunk_iter), desc=f"{ref}:{methbase} chunks")
-                for start_idx in chunk_iter:
-                    stop_idx = min(n_reads, start_idx + batch_size)
-                    chunk = matrix[start_idx:stop_idx]
-                    seqs = chunk.tolist()
-                    # posterior marginals
-                    gammas = self.predict(seqs, impute_strategy="ignore", device=device)
-                    if len(gammas) == 0:
-                        continue
-                    probs_batch = _np.stack(gammas, axis=0)  # (B, L, K)
-                    if use_viterbi:
-                        paths, _scores = self.batch_viterbi(seqs, impute_strategy="ignore", device=device)
-                        pred_states = _np.asarray(paths)
-                    else:
-                        pred_states = _np.argmax(probs_batch, axis=2)
+        Returns:
+            Initialized MultiBernoulliHMM instance.
+        """
+        merged = cls._cfg_to_dict(cfg)
+        if override:
+            merged.update(override)
+        n_states = int(merged.get("hmm_n_states", 2))
+        eps = float(merged.get("hmm_eps", 1e-8))
+        dtype = _resolve_dtype(merged.get("hmm_dtype", None))
+        dtype = _coerce_dtype_for_device(dtype, device)  # <<< NEW
+        n_channels = int(merged.get("hmm_n_channels", merged.get("n_channels", 2)))
+        init_em = merged.get("hmm_init_emission_probs", None)
+        model = cls(
+            n_states=n_states, n_channels=n_channels, init_emission=init_em, eps=eps, dtype=dtype
+        )
+        if device is not None:
+            model.to(torch.device(device) if isinstance(device, str) else device)
+        model._persisted_cfg = merged
+        return model
-                    # For each feature group, classify separately and write back
-                    for key, fs in feature_sets.items():
-                        if key == "cpg":
-                            continue
-                        state_target = fs.get("state", "Modified")
-                        feature_map = fs.get("features", {})
-                        classifications = classify_batch_local(pred_states, probs_batch, coords, feature_map, target_state=state_target)
-                        # write results to adata.obs rows (use original index names)
-                        row_indices = list(sub.obs.index[start_idx:stop_idx])
-                        for i_local, idx in enumerate(row_indices):
-                            for start, length, label, prob in classifications[i_local]:
-                                col_name = f"{methbase}_{label}"
-                                all_col = f"{methbase}_all_{key}_features"
-                                adata.obs.at[idx, col_name].append([start, length, prob])
-                                adata.obs.at[idx, all_col].append([start, length, prob])
-            # Combined subset (if multiple methbases)
-            if len(methbases) > 1 and (combined_mask is not None) and (combined_mask.sum() > 0):
-                comb = ref_subset[:, combined_mask]
-                if comb.shape[1] > 0:
-                    matrix = comb.layers[layer] if (layer and layer in comb.layers) else comb.X
-                    matrix = _ensure_2d_array_like(matrix)
-                    n_reads_comb = matrix.shape[0]
-                    try:
-                        coords_comb = _np.asarray(comb.var_names, dtype=int)
-                    except Exception:
-                        coords_comb = _np.arange(comb.shape[1], dtype=int)
-                    chunk_iter = range(0, n_reads_comb, batch_size)
-                    if verbose:
-                        chunk_iter = _tqdm(list(chunk_iter), desc=f"{ref}:Combined chunks")
-                    for start_idx in chunk_iter:
-                        stop_idx = min(n_reads_comb, start_idx + batch_size)
-                        chunk = matrix[start_idx:stop_idx]
-                        seqs = chunk.tolist()
-                        gammas = self.predict(seqs, impute_strategy="ignore", device=device)
-                        if len(gammas) == 0:
-                            continue
-                        probs_batch = _np.stack(gammas, axis=0)
-                        if use_viterbi:
-                            paths, _scores = self.batch_viterbi(seqs, impute_strategy="ignore", device=device)
-                            pred_states = _np.asarray(paths)
-                        else:
-                            pred_states = _np.argmax(probs_batch, axis=2)
-                        for key, fs in feature_sets.items():
-                            if key == "cpg":
-                                continue
-                            state_target = fs.get("state", "Modified")
-                            feature_map = fs.get("features", {})
-                            classifications = classify_batch_local(pred_states, probs_batch, coords_comb, feature_map, target_state=state_target)
-                            row_indices = list(comb.obs.index[start_idx:stop_idx])
-                            for i_local, idx in enumerate(row_indices):
-                                for start, length, label, prob in classifications[i_local]:
-                                    adata.obs.at[idx, f"{combined_prefix}_{label}"].append([start, length, prob])
-                                    adata.obs.at[idx, f"{combined_prefix}_all_{key}_features"].append([start, length, prob])
-        # CpG special handling
-        if "cpg" in feature_sets and feature_sets.get("cpg") is not None:
-            cpg_iter = references if not verbose else _tqdm(references, desc="Processing CpG")
-            for ref in cpg_iter:
-                ref_mask = adata.obs[obs_column] == ref
-                ref_subset = adata[ref_mask].copy()
-                pos_mask = ref_subset.var[f"{ref}_CpG_site"] == True
-                if pos_mask.sum() == 0:
-                    continue
-                cpg_sub = ref_subset[:, pos_mask]
-                matrix = cpg_sub.layers[layer] if (layer and layer in cpg_sub.layers) else cpg_sub.X
-                matrix = _ensure_2d_array_like(matrix)
-                n_reads = matrix.shape[0]
-                try:
-                    coords_cpg = _np.asarray(cpg_sub.var_names, dtype=int)
-                except Exception:
-                    coords_cpg = _np.arange(cpg_sub.shape[1], dtype=int)
-                chunk_iter = range(0, n_reads, batch_size)
-                if verbose:
-                    chunk_iter = _tqdm(list(chunk_iter), desc=f"{ref}:CpG chunks")
-                for start_idx in chunk_iter:
-                    stop_idx = min(n_reads, start_idx + batch_size)
-                    chunk = matrix[start_idx:stop_idx]
-                    seqs = chunk.tolist()
-                    gammas = self.predict(seqs, impute_strategy="ignore", device=device)
-                    if len(gammas) == 0:
-                        continue
-                    probs_batch = _np.stack(gammas, axis=0)
-                    if use_viterbi:
-                        paths, _scores = self.batch_viterbi(seqs, impute_strategy="ignore", device=device)
-                        pred_states = _np.asarray(paths)
-                    else:
-                        pred_states = _np.argmax(probs_batch, axis=2)
-                    fs = feature_sets["cpg"]
-                    state_target = fs.get("state", "Modified")
-                    feature_map = fs.get("features", {})
-                    classifications = classify_batch_local(pred_states, probs_batch, coords_cpg, feature_map, target_state=state_target)
-                    row_indices = list(cpg_sub.obs.index[start_idx:stop_idx])
-                    for i_local, idx in enumerate(row_indices):
-                        for start, length, label, prob in classifications[i_local]:
-                            adata.obs.at[idx, f"CpG_{label}"].append([start, length, prob])
-                            adata.obs.at[idx, f"CpG_all_cpg_features"].append([start, length, prob])
-        # finalize: convert intervals into binary layers and distances
-        try:
-            coordinates = _np.asarray(adata.var_names, dtype=int)
-            coords_are_ints = True
-        except Exception:
-            coordinates = _np.arange(adata.shape[1], dtype=int)
-            coords_are_ints = False
-        features_iter = all_features if not verbose else _tqdm(all_features, desc="Finalizing Layers")
-        for feature in features_iter:
-            bin_matrix = _np.zeros((adata.shape[0], adata.shape[1]), dtype=int)
-            counts = _np.zeros(adata.shape[0], dtype=int)
-            # new: integer-length layer (0 where not inside a feature)
-            len_matrix = _np.zeros((adata.shape[0], adata.shape[1]), dtype=int)
-            for row_idx, intervals in enumerate(adata.obs[feature]):
-                if not isinstance(intervals, list):
-                    intervals = []
-                for start, length, prob in intervals:
-                    if prob > threshold:
-                        if coords_are_ints:
-                            # map genomic start/length into index interval [start_idx, end_idx)
-                            start_idx = _np.searchsorted(coordinates, int(start), side="left")
-                            end_idx = _np.searchsorted(coordinates, int(start) + int(length) - 1, side="right")
-                        else:
-                            start_idx = int(start)
-                            end_idx = start_idx + int(length)
-                        start_idx = max(0, min(start_idx, adata.shape[1]))
-                        end_idx = max(0, min(end_idx, adata.shape[1]))
-                        if start_idx < end_idx:
-                            span = end_idx - start_idx  # number of positions covered
-                            # set binary mask
-                            bin_matrix[row_idx, start_idx:end_idx] = 1
-                            # set length mask: use maximum in case of overlaps
-                            existing = len_matrix[row_idx, start_idx:end_idx]
-                            len_matrix[row_idx, start_idx:end_idx] = _np.maximum(existing, span)
-                            counts[row_idx] += 1
-            # write binary layer and length layer, track appended names
-            adata.layers[feature] = bin_matrix
-            appended_layers.append(feature)
-            # name the integer-length layer (choose suffix you like)
-            length_layer_name = f"{feature}_lengths"
-            adata.layers[length_layer_name] = len_matrix
-            appended_layers.append(length_layer_name)
-            adata.obs[f"n_{feature}"] = counts
-            adata.obs[f"{feature}_distances"] = calculate_batch_distances(adata.obs[feature].tolist(), threshold)
-        # Merge appended_layers into adata.uns[uns_key] (preserve pre-existing and avoid duplicates)
-        existing = list(adata.uns.get(uns_key, [])) if adata.uns.get(uns_key) is not None else []
-        new_list = existing + [l for l in appended_layers if l not in existing]
-        adata.uns[uns_key] = new_list
-        # Mark that the annotation has been completed
-        adata.uns[uns_flag] = True
+    def _normalize_emission(self):
+        """Normalize and clamp emission probabilities in-place."""
+        with torch.no_grad():
+            self.emission.data = self.emission.data.reshape(self.n_states, self.n_channels)
+            self.emission.data = self.emission.data.clamp(min=self.eps, max=1.0 - self.eps)
+    def _ensure_device_dtype(self, device=None) -> torch.device:
+        """Move emission parameters to the requested device/dtype."""
+        device = super()._ensure_device_dtype(device)
+        self.emission.data = self.emission.data.to(device=device, dtype=self.dtype)
+        return device
+    def _state_modified_score(self) -> torch.Tensor:
+        """Return per-state modified scores for ranking."""
+        # more “modified” = higher mean P(1) across channels
+        return self.emission.detach().mean(dim=1)
+    def _log_emission(self, obs: torch.Tensor, mask: torch.Tensor) -> torch.Tensor:
+        """
+        obs: (N,L,C), mask: (N,L,C) -> logB: (N,L,K)
+        """
+        N, L, C = obs.shape
+        K = self.n_states
+        p = self.emission  # (K,C)
+        logp = torch.log(p + self.eps).view(1, 1, K, C)
+        log1mp = torch.log1p(-p + self.eps).view(1, 1, K, C)
+        o = obs.unsqueeze(2)  # (N,L,1,C)
+        m = mask.unsqueeze(2)  # (N,L,1,C)
-        return None if in_place else adata
+        logBC = o * logp + (1.0 - o) * log1mp
+        logBC = torch.where(m, logBC, torch.zeros_like(logBC))
+        return logBC.sum(dim=3)  # sum channels -> (N,L,K)
-    def merge_intervals_in_layer(
+    def _extra_save_payload(self) -> dict:
+        """Return extra payload data for serialization."""
+        return {"n_channels": int(self.n_channels), "emission": self.emission.detach().cpu()}
+    def _load_extra_payload(self, payload: dict, *, device: torch.device):
+        """Load serialized emission parameters.
+        Args:
+            payload: Serialized payload dictionary.
+            device: Target torch device.
+        """
+        self.n_channels = int(payload.get("n_channels", self.n_channels))
+        with torch.no_grad():
+            self.emission.data = payload["emission"].to(device=device, dtype=self.dtype)
+        self._normalize_emission()
+    def fit_em(
         self,
-        adata,
-        layer: str,
-        distance_threshold: int = 0,
-        merged_suffix: str = "_merged",
-        length_layer_suffix: str = "_lengths",
-        update_obs: bool = True,
-        prob_strategy: str = "mean",  # 'mean'|'max'|'orig_first'
-        inplace: bool = True,
-        overwrite: bool = False,
+        X: np.ndarray,
+        coords: np.ndarray,
+        *,
+        device: torch.device,
+        max_iter: int,
+        tol: float,
+        update_start: bool,
+        update_trans: bool,
+        update_emission: bool,
+        verbose: bool,
+        **kwargs,
+    ) -> List[float]:
+        """Run EM updates for a multi-channel Bernoulli HMM.
+        Args:
+            X: Observations array (N, L, C).
+            coords: Coordinate array aligned to X.
+            device: Torch device.
+            max_iter: Maximum iterations.
+            tol: Convergence tolerance.
+            update_start: Whether to update start probabilities.
+            update_trans: Whether to update transitions.
+            update_emission: Whether to update emission parameters.
+            verbose: Whether to log progress.
+            **kwargs: Additional implementation-specific kwargs.
+        Returns:
+            List of log-likelihood proxy values.
+        """
+        X = np.asarray(X, dtype=float)
+        if X.ndim != 3:
+            raise ValueError("MultiBernoulliHMM expects X shape (N,L,C).")
+        obs = torch.tensor(np.nan_to_num(X, nan=0.0), dtype=self.dtype, device=device)
+        mask = torch.tensor(~np.isnan(X), dtype=torch.bool, device=device)
+        eps = float(self.eps)
+        K = self.n_states
+        N, L, C = obs.shape
+        self._ensure_n_channels(C, device)
+        hist: List[float] = []
+        for it in range(1, int(max_iter) + 1):
+            gamma = self._forward_backward(obs, mask)  # (N,L,K)
+            ll_proxy = float(torch.sum(torch.log(torch.clamp(gamma.sum(dim=2), min=eps))).item())
+            hist.append(ll_proxy)
+            # expected start
+            start_acc = gamma[:, 0, :].sum(dim=0)  # (K,)
+            # transitions xi
+            logB = self._log_emission(obs, mask)
+            logA = torch.log(self.trans + eps)
+            alpha = torch.empty((N, L, K), dtype=self.dtype, device=device)
+            alpha[:, 0, :] = torch.log(self.start + eps).unsqueeze(0) + logB[:, 0, :]
+            for t in range(1, L):
+                prev = alpha[:, t - 1, :].unsqueeze(2) + logA.unsqueeze(0)
+                alpha[:, t, :] = _logsumexp(prev, dim=1) + logB[:, t, :]
+            beta = torch.empty((N, L, K), dtype=self.dtype, device=device)
+            beta[:, L - 1, :] = 0.0
+            for t in range(L - 2, -1, -1):
+                temp = logA.unsqueeze(0) + (logB[:, t + 1, :] + beta[:, t + 1, :]).unsqueeze(1)
+                beta[:, t, :] = _logsumexp(temp, dim=2)
+            trans_acc = torch.zeros((K, K), dtype=self.dtype, device=device)
+            # valid timestep if at least one channel observed at both positions
+            valid_pos = mask.any(dim=2)  # (N,L)
+            for t in range(L - 1):
+                valid_t = (valid_pos[:, t] & valid_pos[:, t + 1]).float().view(N, 1, 1)
+                log_xi = (
+                    alpha[:, t, :].unsqueeze(2)
+                    + logA.unsqueeze(0)
+                    + (logB[:, t + 1, :] + beta[:, t + 1, :]).unsqueeze(1)
+                )
+                log_norm = _logsumexp(log_xi.view(N, -1), dim=1).view(N, 1, 1)
+                xi = (log_xi - log_norm).exp() * valid_t
+                trans_acc += xi.sum(dim=0)
+            # emission update per channel
+            gamma_k = gamma.unsqueeze(-1)  # (N,L,K,1)
+            obs_c = obs.unsqueeze(2)  # (N,L,1,C)
+            mask_c = mask.unsqueeze(2).float()  # (N,L,1,C)
+            emit_num = (gamma_k * obs_c * mask_c).sum(dim=(0, 1))  # (K,C)
+            emit_den = (gamma_k * mask_c).sum(dim=(0, 1))  # (K,C)
+            with torch.no_grad():
+                if update_start:
+                    new_start = start_acc + eps
+                    self.start.data = new_start / new_start.sum()
+                if update_trans:
+                    new_trans = trans_acc + eps
+                    rs = new_trans.sum(dim=1, keepdim=True)
+                    rs[rs == 0.0] = 1.0
+                    self.trans.data = new_trans / rs
+                if update_emission:
+                    new_em = (emit_num + eps) / (emit_den + 2.0 * eps)
+                    self.emission.data = new_em.clamp(min=eps, max=1.0 - eps)
+            self._normalize_params()
+            self._normalize_emission()
+            if verbose:
+                logger.info(
+                    "[MultiBernoulliHMM.fit] iter=%s ll_proxy=%.6f",
+                    it,
+                    hist[-1],
+                )
+            if len(hist) > 1 and abs(hist[-1] - hist[-2]) < float(tol):
+                break
+        return hist
+    def adapt_emissions(
+        self,
+        X: np.ndarray,
+        coords: Optional[np.ndarray] = None,
+        *,
+        device: Optional[Union[str, torch.device]] = None,
+        iters: Optional[int] = None,
+        max_iter: Optional[int] = None,  # alias for your trainer
         verbose: bool = False,
+        **kwargs,
     ):
+        """Adapt emissions with legacy parameter names.
+        Args:
+            X: Observations array.
+            coords: Optional coordinate array.
+            device: Device specifier.
+            iters: Number of iterations.
+            max_iter: Alias for iters.
+            verbose: Whether to log progress.
+            **kwargs: Additional kwargs forwarded to BaseHMM.adapt_emissions.
+        Returns:
+            List of log-likelihood values.
         """
-        Merge intervals in `adata.layers[layer]` that are within `distance_threshold`.
-        Writes new merged binary layer named f"{layer}{merged_suffix}" and length layer
-        f"{layer}{merged_suffix}{length_layer_suffix}". Optionally updates adata.obs for merged intervals.
-        Parameters
-        ----------
-        layer : str
-            Name of original binary layer (0/1 mask).
-        distance_threshold : int
-            Merge intervals whose gap <= this threshold (genomic coords if adata.var_names are ints).
-        merged_suffix : str
-            Suffix appended to original layer for the merged binary layer (default "_merged").
-        length_layer_suffix : str
-            Suffix appended after merged suffix for the lengths layer (default "_lengths").
-        update_obs : bool
-            If True, create/update adata.obs[f"{layer}{merged_suffix}"] with merged intervals.
-        prob_strategy : str
-            How to combine probs when merging ('mean', 'max', 'orig_first').
-        inplace : bool
-            If False, returns a new AnnData with changes (original untouched).
-        overwrite : bool
-            If True, will overwrite existing merged layers / obs entries; otherwise will error if they exist.
-        """
-        import numpy as _np
-        from scipy.sparse import issparse
+        if iters is None:
+            iters = int(max_iter) if max_iter is not None else int(kwargs.pop("iters", 5))
+        return super().adapt_emissions(
+            np.asarray(X, dtype=float),
+            coords if coords is not None else None,
+            iters=int(iters),
+            device=device,
+            verbose=verbose,
+        )
-        if not inplace:
-            adata = adata.copy()
+    def _ensure_n_channels(self, C: int, device: torch.device):
+        """Expand emission parameters when channel count changes.
-        merged_bin_name = f"{layer}{merged_suffix}"
-        merged_len_name = f"{layer}{merged_suffix}{length_layer_suffix}"
+        Args:
+            C: Target channel count.
+            device: Torch device for the new parameters.
+        """
+        C = int(C)
+        if C == self.n_channels:
+            return
+        with torch.no_grad():
+            old = self.emission.detach().cpu().numpy()  # (K, Cold)
+            K = old.shape[0]
+            new = np.full((K, C), 0.5, dtype=float)
+            m = min(old.shape[1], C)
+            new[:, :m] = old[:, :m]
+            if C > old.shape[1]:
+                fill = old.mean(axis=1, keepdims=True)
+                new[:, m:] = fill
+            self.n_channels = C
+            self.emission = nn.Parameter(
+                torch.tensor(new, dtype=self.dtype, device=device), requires_grad=False
+            )
+            self._normalize_emission()
+# =============================================================================
+# Distance-binned transitions (single-channel only)
+# =============================================================================
+@register_hmm("single_distance_binned")
+class DistanceBinnedSingleBernoulliHMM(SingleBernoulliHMM):
+    """
+    Transition matrix depends on binned distances between consecutive coords.
-        if (merged_bin_name in adata.layers or merged_len_name in adata.layers or
-                (update_obs and merged_bin_name in adata.obs.columns)) and not overwrite:
-            raise KeyError(f"Merged outputs exist (use overwrite=True to replace): {merged_bin_name} / {merged_len_name}")
+    Config keys:
+      hmm_distance_bins: list[int] edges (bp)
+      hmm_init_transitions_by_bin: optional (n_bins,K,K)
+    """
-        if layer not in adata.layers:
-            raise KeyError(f"Layer '{layer}' not found in adata.layers")
+    def __init__(
+        self,
+        n_states: int = 2,
+        init_emission: Optional[Sequence[float]] = None,
+        distance_bins: Optional[Sequence[int]] = None,
+        init_trans_by_bin: Optional[Any] = None,
+        eps: float = 1e-8,
+        dtype: torch.dtype = torch.float64,
+    ):
+        """Initialize a distance-binned transition HMM.
+        Args:
+            n_states: Number of hidden states.
+            init_emission: Initial emission probabilities per state.
+            distance_bins: Distance bin edges in base pairs.
+            init_trans_by_bin: Initial transition matrices per bin.
+            eps: Smoothing epsilon for probabilities.
+            dtype: Torch dtype for parameters.
+        """
+        super().__init__(n_states=n_states, init_emission=init_emission, eps=eps, dtype=dtype)
+        self.distance_bins = np.asarray(
+            distance_bins if distance_bins is not None else [1, 5, 10, 25, 50, 100], dtype=int
+        )
+        self.n_bins = int(len(self.distance_bins) + 1)
-        bin_layer = adata.layers[layer]
-        if issparse(bin_layer):
-            bin_arr = bin_layer.toarray().astype(int)
+        if init_trans_by_bin is None:
+            base = self.trans.detach().cpu().numpy()
+            tb = np.stack([base for _ in range(self.n_bins)], axis=0)
         else:
-            bin_arr = _np.asarray(bin_layer, dtype=int)
+            tb = np.asarray(init_trans_by_bin, dtype=float)
+            if tb.shape != (self.n_bins, self.n_states, self.n_states):
+                base = self.trans.detach().cpu().numpy()
+                tb = np.stack([base for _ in range(self.n_bins)], axis=0)
-        n_rows, n_cols = bin_arr.shape
+        self.trans_by_bin = nn.Parameter(torch.tensor(tb, dtype=self.dtype), requires_grad=False)
+        self._normalize_trans_by_bin()
-        # coordinates in genomic units if possible
-        try:
-            coords = _np.asarray(adata.var_names, dtype=int)
-            coords_are_ints = True
-        except Exception:
-            coords = _np.arange(n_cols, dtype=int)
-            coords_are_ints = False
-        # helper: contiguous runs of 1s -> list of (start_idx, end_idx) (end exclusive)
-        def _runs_from_mask(mask_1d):
-            idx = _np.nonzero(mask_1d)[0]
-            if idx.size == 0:
-                return []
-            runs = []
-            start = idx[0]
-            prev = idx[0]
-            for i in idx[1:]:
-                if i == prev + 1:
-                    prev = i
-                    continue
-                runs.append((start, prev + 1))
-                start = i
-                prev = i
-            runs.append((start, prev + 1))
-            return runs
-        # read original obs intervals/probs if available (for combining probs)
-        orig_obs = None
-        if update_obs and (layer in adata.obs.columns):
-            orig_obs = list(adata.obs[layer])  # might be non-list entries
-        # prepare outputs
-        merged_bin = _np.zeros_like(bin_arr, dtype=int)
-        merged_len = _np.zeros_like(bin_arr, dtype=int)
-        merged_obs_col = [[] for _ in range(n_rows)]
-        merged_counts = _np.zeros(n_rows, dtype=int)
-        for r in range(n_rows):
-            mask = bin_arr[r, :] != 0
-            runs = _runs_from_mask(mask)
-            if not runs:
-                merged_obs_col[r] = []
-                continue
+    @classmethod
+    def from_config(cls, cfg, *, override=None, device=None):
+        """Create a distance-binned HMM from config.
-            # merge runs where gap <= distance_threshold (gap in genomic coords when possible)
-            merged_runs = []
-            cur_s, cur_e = runs[0]
-            for (s, e) in runs[1:]:
-                if coords_are_ints:
-                    end_coord = int(coords[cur_e - 1])
-                    next_start_coord = int(coords[s])
-                    gap = next_start_coord - end_coord - 1
-                else:
-                    gap = s - cur_e
-                if gap <= distance_threshold:
-                    # extend
-                    cur_e = e
-                else:
-                    merged_runs.append((cur_s, cur_e))
-                    cur_s, cur_e = s, e
-            merged_runs.append((cur_s, cur_e))
-            # assemble merged mask/lengths and obs entries
-            row_entries = []
-            for (s_idx, e_idx) in merged_runs:
-                if e_idx <= s_idx:
-                    continue
-                span_positions = e_idx - s_idx
-                if coords_are_ints:
-                    try:
-                        length_val = int(coords[e_idx - 1]) - int(coords[s_idx]) + 1
-                    except Exception:
-                        length_val = span_positions
-                else:
-                    length_val = span_positions
-                # set binary and length masks
-                merged_bin[r, s_idx:e_idx] = 1
-                existing_segment = merged_len[r, s_idx:e_idx]
-                # set to max(existing, length_val)
-                if existing_segment.size > 0:
-                    merged_len[r, s_idx:e_idx] = _np.maximum(existing_segment, length_val)
-                else:
-                    merged_len[r, s_idx:e_idx] = length_val
-                # determine prob from overlapping original obs (if present)
-                prob_val = 1.0
-                if update_obs and orig_obs is not None:
-                    overlaps = []
-                    for orig in (orig_obs[r] or []):
-                        try:
-                            ostart, olen, opro = orig[0], int(orig[1]), float(orig[2])
-                        except Exception:
-                            continue
-                        if coords_are_ints:
-                            ostart_idx = _np.searchsorted(coords, int(ostart), side="left")
-                            oend_idx = ostart_idx + olen
-                        else:
-                            ostart_idx = int(ostart)
-                            oend_idx = ostart_idx + olen
-                        # overlap test in index space
-                        if not (oend_idx <= s_idx or ostart_idx >= e_idx):
-                            overlaps.append(opro)
-                    if overlaps:
-                        if prob_strategy == "mean":
-                            prob_val = float(_np.mean(overlaps))
-                        elif prob_strategy == "max":
-                            prob_val = float(_np.max(overlaps))
-                        else:
-                            prob_val = float(overlaps[0])
-                start_coord = int(coords[s_idx]) if coords_are_ints else int(s_idx)
-                row_entries.append((start_coord, int(length_val), float(prob_val)))
-            merged_obs_col[r] = row_entries
-            merged_counts[r] = len(row_entries)
-        # write merged layers (do not overwrite originals unless overwrite=True was set above)
-        adata.layers[merged_bin_name] = merged_bin
-        adata.layers[merged_len_name] = merged_len
-        if update_obs:
-            adata.obs[merged_bin_name] = merged_obs_col
-            adata.obs[f"n_{merged_bin_name}"] = merged_counts
-            # recompute distances list per-row (gaps between adjacent merged intervals)
-            def _calc_distances(obs_list):
-                out = []
-                for intervals in obs_list:
-                    if not intervals:
-                        out.append([])
-                        continue
-                    iv = sorted(intervals, key=lambda x: int(x[0]))
-                    if len(iv) <= 1:
-                        out.append([])
-                        continue
-                    dlist = []
-                    for i in range(len(iv) - 1):
-                        endi = int(iv[i][0]) + int(iv[i][1]) - 1
-                        startn = int(iv[i + 1][0])
-                        dlist.append(startn - endi - 1)
-                    out.append(dlist)
-                return out
-            adata.obs[f"{merged_bin_name}_distances"] = _calc_distances(merged_obs_col)
-        # update uns appended list
-        uns_key = "hmm_appended_layers"
-        existing = list(adata.uns.get(uns_key, [])) if adata.uns.get(uns_key, None) is not None else []
-        for nm in (merged_bin_name, merged_len_name):
-            if nm not in existing:
-                existing.append(nm)
-        adata.uns[uns_key] = existing
-        if verbose:
-            print(f"Created merged binary layer: {merged_bin_name}")
-            print(f"Created merged length layer: {merged_len_name}")
-            if update_obs:
-                print(f"Updated adata.obs columns: {merged_bin_name}, n_{merged_bin_name}, {merged_bin_name}_distances")
-        return None if inplace else adata
-    def _ensure_final_layer_and_assign(self, final_adata, layer_name: str, subset_idx_mask: np.ndarray, sub_data):
+        Args:
+            cfg: Configuration mapping or object.
+            override: Override values to apply.
+            device: Optional device specifier.
+        Returns:
+            Initialized DistanceBinnedSingleBernoulliHMM instance.
+        """
+        merged = cls._cfg_to_dict(cfg)
+        if override:
+            merged.update(override)
+        n_states = int(merged.get("hmm_n_states", 2))
+        eps = float(merged.get("hmm_eps", 1e-8))
+        dtype = _resolve_dtype(merged.get("hmm_dtype", None))
+        dtype = _coerce_dtype_for_device(dtype, device)  # <<< NEW
+        init_em = merged.get("hmm_init_emission_probs", None)
+        bins = merged.get("hmm_distance_bins", [1, 5, 10, 25, 50, 100])
+        init_tb = merged.get("hmm_init_transitions_by_bin", None)
+        model = cls(
+            n_states=n_states,
+            init_emission=init_em,
+            distance_bins=bins,
+            init_trans_by_bin=init_tb,
+            eps=eps,
+            dtype=dtype,
+        )
+        if device is not None:
+            model.to(torch.device(device) if isinstance(device, str) else device)
+        model._persisted_cfg = merged
+        return model
+    def _ensure_device_dtype(self, device=None) -> torch.device:
+        """Move transition-by-bin parameters to the requested device/dtype."""
+        device = super()._ensure_device_dtype(device)
+        self.trans_by_bin.data = self.trans_by_bin.data.to(device=device, dtype=self.dtype)
+        return device
+    def _normalize_trans_by_bin(self):
+        """Normalize transition matrices per distance bin in-place."""
+        with torch.no_grad():
+            tb = self.trans_by_bin.data.reshape(self.n_bins, self.n_states, self.n_states)
+            tb = tb + self.eps
+            rs = tb.sum(dim=2, keepdim=True)
+            rs[rs == 0.0] = 1.0
+            self.trans_by_bin.data = tb / rs
+    def _extra_save_payload(self) -> dict:
+        """Return extra payload data for serialization."""
+        p = super()._extra_save_payload()
+        p.update(
+            {
+                "distance_bins": torch.tensor(self.distance_bins, dtype=torch.long),
+                "trans_by_bin": self.trans_by_bin.detach().cpu(),
+            }
+        )
+        return p
+    def _load_extra_payload(self, payload: dict, *, device: torch.device):
+        """Load serialized distance-bin parameters.
+        Args:
+            payload: Serialized payload dictionary.
+            device: Target torch device.
         """
-        Ensure final_adata.layers[layer_name] exists and assign rows corresponding to subset_idx_mask
-        sub_data has shape (n_subset_rows, n_vars).
-        subset_idx_mask: boolean array of length final_adata.n_obs with True where rows belong to subset.
+        super()._load_extra_payload(payload, device=device)
+        self.distance_bins = (
+            payload.get("distance_bins", torch.tensor([1, 5, 10, 25, 50, 100]))
+            .cpu()
+            .numpy()
+            .astype(int)
+        )
+        self.n_bins = int(len(self.distance_bins) + 1)
+        with torch.no_grad():
+            self.trans_by_bin.data = payload["trans_by_bin"].to(device=device, dtype=self.dtype)
+        self._normalize_trans_by_bin()
+    def _bin_index(self, coords: np.ndarray) -> np.ndarray:
+        """Return per-step distance bin indices for coordinates.
+        Args:
+            coords: Coordinate array.
+        Returns:
+            Array of bin indices (length L-1).
         """
-        from scipy.sparse import issparse, csr_matrix
-        import warnings
+        d = np.diff(np.asarray(coords, dtype=int))
+        return np.digitize(d, self.distance_bins, right=True)  # length L-1
-        n_final_obs, n_vars = final_adata.shape
-        n_sub_rows = int(subset_idx_mask.sum())
-        # prepare row indices in final_adata
-        final_row_indices = np.nonzero(subset_idx_mask)[0]
-        # if sub_data is sparse, work with sparse
-        if issparse(sub_data):
-            sub_csr = sub_data.tocsr()
-            # if final layer not present, create sparse CSR with zero rows and same n_vars
-            if layer_name not in final_adata.layers:
-                # create an empty CSR of shape (n_final_obs, n_vars)
-                final_adata.layers[layer_name] = csr_matrix((n_final_obs, n_vars), dtype=sub_csr.dtype)
-            final_csr = final_adata.layers[layer_name]
-            if not issparse(final_csr):
-                # convert dense final to sparse first
-                final_csr = csr_matrix(final_csr)
-            # replace the block of rows: easiest is to build a new csr by stacking pieces
-            # (efficient for moderate sizes; for huge data you might want an in-place approach)
-            # Build list of blocks: rows before, the subset rows (from final where mask False -> zeros), rows after
-            # We'll convert final to LIL for row assignment (mutable), then back to CSR.
-            final_lil = final_csr.tolil()
-            for i_local, r in enumerate(final_row_indices):
-                final_lil.rows[r] = sub_csr.getrow(i_local).indices.tolist()
-                final_lil.data[r] = sub_csr.getrow(i_local).data.tolist()
-            final_csr = final_lil.tocsr()
-            final_adata.layers[layer_name] = final_csr
-        else:
-            # dense numpy array
-            sub_arr = np.asarray(sub_data)
-            if sub_arr.shape[0] != n_sub_rows:
-                raise ValueError(f"Sub data rows ({sub_arr.shape[0]}) != mask selected rows ({n_sub_rows})")
-            if layer_name not in final_adata.layers:
-                # create zero array with small dtype
-                final_adata.layers[layer_name] = np.zeros((n_final_obs, n_vars), dtype=sub_arr.dtype)
-            final_arr = final_adata.layers[layer_name]
-            if issparse(final_arr):
-                # convert sparse final to dense (or convert sub to sparse); we'll convert final to dense here
-                final_arr = final_arr.toarray()
-            # assign
-            final_arr[final_row_indices, :] = sub_arr
-            final_adata.layers[layer_name] = final_arr
+    def _forward_backward(
+        self, obs: torch.Tensor, mask: torch.Tensor, *, coords: Optional[np.ndarray] = None
+    ) -> torch.Tensor:
+        """Run forward-backward using distance-binned transitions.
+        Args:
+            obs: Observation tensor.
+            mask: Observation mask.
+            coords: Coordinate array.
+        Returns:
+            Posterior probabilities (gamma).
+        """
+        if coords is None:
+            raise ValueError("Distance-binned HMM requires coords.")
+        device = obs.device
+        eps = float(self.eps)
+        K = self.n_states
+        coords = np.asarray(coords, dtype=int)
+        bins = torch.tensor(self._bin_index(coords), dtype=torch.long, device=device)  # (L-1,)
+        logB = self._log_emission(obs, mask)  # (N,L,K)
+        logstart = torch.log(self.start + eps)
+        logA_by_bin = torch.log(self.trans_by_bin + eps)  # (nb,K,K)
+        N, L, _ = logB.shape
+        alpha = torch.empty((N, L, K), dtype=self.dtype, device=device)
+        alpha[:, 0, :] = logstart.unsqueeze(0) + logB[:, 0, :]
+        for t in range(1, L):
+            b = int(bins[t - 1].item()) if (t - 1) < bins.numel() else 0
+            logA = logA_by_bin[b]
+            prev = alpha[:, t - 1, :].unsqueeze(2) + logA.unsqueeze(0)
+            alpha[:, t, :] = _logsumexp(prev, dim=1) + logB[:, t, :]
+        beta = torch.empty((N, L, K), dtype=self.dtype, device=device)
+        beta[:, L - 1, :] = 0.0
+        for t in range(L - 2, -1, -1):
+            b = int(bins[t].item()) if t < bins.numel() else 0
+            logA = logA_by_bin[b]
+            temp = logA.unsqueeze(0) + (logB[:, t + 1, :] + beta[:, t + 1, :]).unsqueeze(1)
+            beta[:, t, :] = _logsumexp(temp, dim=2)
+        log_gamma = alpha + beta
+        logZ = _logsumexp(log_gamma, dim=2).unsqueeze(2)
+        return (log_gamma - logZ).exp()
+    def _viterbi(
+        self, obs: torch.Tensor, mask: torch.Tensor, *, coords: Optional[np.ndarray] = None
+    ) -> torch.Tensor:
+        """Run Viterbi decoding using distance-binned transitions.
+        Args:
+            obs: Observation tensor.
+            mask: Observation mask.
+            coords: Coordinate array.
+        Returns:
+            Decoded state sequence tensor.
+        """
+        if coords is None:
+            raise ValueError("Distance-binned HMM requires coords.")
+        device = obs.device
+        eps = float(self.eps)
+        K = self.n_states
+        coords = np.asarray(coords, dtype=int)
+        bins = torch.tensor(self._bin_index(coords), dtype=torch.long, device=device)  # (L-1,)
+        logB = self._log_emission(obs, mask)
+        logstart = torch.log(self.start + eps)
+        logA_by_bin = torch.log(self.trans_by_bin + eps)
+        N, L, _ = logB.shape
+        delta = torch.empty((N, L, K), dtype=self.dtype, device=device)
+        psi = torch.empty((N, L, K), dtype=torch.long, device=device)
+        delta[:, 0, :] = logstart.unsqueeze(0) + logB[:, 0, :]
+        psi[:, 0, :] = -1
+        for t in range(1, L):
+            b = int(bins[t - 1].item()) if (t - 1) < bins.numel() else 0
+            logA = logA_by_bin[b]
+            cand = delta[:, t - 1, :].unsqueeze(2) + logA.unsqueeze(0)
+            best_val, best_idx = cand.max(dim=1)
+            delta[:, t, :] = best_val + logB[:, t, :]
+            psi[:, t, :] = best_idx
+        last_state = torch.argmax(delta[:, L - 1, :], dim=1)
+        states = torch.empty((N, L), dtype=torch.long, device=device)
+        states[:, L - 1] = last_state
+        for t in range(L - 2, -1, -1):
+            states[:, t] = psi[torch.arange(N, device=device), t + 1, states[:, t + 1]]
+        return states
+    def fit_em(
+        self,
+        X: np.ndarray,
+        coords: np.ndarray,
+        *,
+        device: torch.device,
+        max_iter: int,
+        tol: float,
+        update_start: bool,
+        update_trans: bool,
+        update_emission: bool,
+        verbose: bool,
+        **kwargs,
+    ) -> List[float]:
+        """Run EM updates for distance-binned transitions.
+        Args:
+            X: Observations array (N, L).
+            coords: Coordinate array aligned to X.
+            device: Torch device.
+            max_iter: Maximum iterations.
+            tol: Convergence tolerance.
+            update_start: Whether to update start probabilities.
+            update_trans: Whether to update transitions.
+            update_emission: Whether to update emission parameters.
+            verbose: Whether to log progress.
+            **kwargs: Additional implementation-specific kwargs.
+        Returns:
+            List of log-likelihood proxy values.
+        """
+        # Keep this simple: use gamma for emissions; transitions-by-bin updated via xi (same pattern).
+        X = np.asarray(X, dtype=float)
+        if X.ndim != 2:
+            raise ValueError("DistanceBinnedSingleBernoulliHMM expects X shape (N,L).")
+        coords = np.asarray(coords, dtype=int)
+        bins_np = self._bin_index(coords)  # (L-1,)
+        obs = torch.tensor(np.nan_to_num(X, nan=0.0), dtype=self.dtype, device=device)
+        mask = torch.tensor(~np.isnan(X), dtype=torch.bool, device=device)
+        eps = float(self.eps)
+        K = self.n_states
+        N, L = obs.shape
+        hist: List[float] = []
+        for it in range(1, int(max_iter) + 1):
+            gamma = self._forward_backward(obs, mask, coords=coords)  # (N,L,K)
+            ll_proxy = float(torch.sum(torch.log(torch.clamp(gamma.sum(dim=2), min=eps))).item())
+            hist.append(ll_proxy)
+            # expected start
+            start_acc = gamma[:, 0, :].sum(dim=0)
+            # compute alpha/beta for xi
+            logB = self._log_emission(obs, mask)
+            logstart = torch.log(self.start + eps)
+            logA_by_bin = torch.log(self.trans_by_bin + eps)
+            alpha = torch.empty((N, L, K), dtype=self.dtype, device=device)
+            alpha[:, 0, :] = logstart.unsqueeze(0) + logB[:, 0, :]
+            for t in range(1, L):
+                b = int(bins_np[t - 1])
+                logA = logA_by_bin[b]
+                prev = alpha[:, t - 1, :].unsqueeze(2) + logA.unsqueeze(0)
+                alpha[:, t, :] = _logsumexp(prev, dim=1) + logB[:, t, :]
+            beta = torch.empty((N, L, K), dtype=self.dtype, device=device)
+            beta[:, L - 1, :] = 0.0
+            for t in range(L - 2, -1, -1):
+                b = int(bins_np[t])
+                logA = logA_by_bin[b]
+                temp = logA.unsqueeze(0) + (logB[:, t + 1, :] + beta[:, t + 1, :]).unsqueeze(1)
+                beta[:, t, :] = _logsumexp(temp, dim=2)
+            trans_acc_by_bin = torch.zeros((self.n_bins, K, K), dtype=self.dtype, device=device)
+            for t in range(L - 1):
+                b = int(bins_np[t])
+                logA = logA_by_bin[b]
+                valid_t = (mask[:, t] & mask[:, t + 1]).float().view(N, 1, 1)
+                log_xi = (
+                    alpha[:, t, :].unsqueeze(2)
+                    + logA.unsqueeze(0)
+                    + (logB[:, t + 1, :] + beta[:, t + 1, :]).unsqueeze(1)
+                )
+                log_norm = _logsumexp(log_xi.view(N, -1), dim=1).view(N, 1, 1)
+                xi = (log_xi - log_norm).exp() * valid_t
+                trans_acc_by_bin[b] += xi.sum(dim=0)
+            mask_f = mask.float().unsqueeze(-1)
+            emit_num = (gamma * obs.unsqueeze(-1) * mask_f).sum(dim=(0, 1))
+            emit_den = (gamma * mask_f).sum(dim=(0, 1))
+            with torch.no_grad():
+                if update_start:
+                    new_start = start_acc + eps
+                    self.start.data = new_start / new_start.sum()
+                if update_trans:
+                    tb = trans_acc_by_bin + eps
+                    rs = tb.sum(dim=2, keepdim=True)
+                    rs[rs == 0.0] = 1.0
+                    self.trans_by_bin.data = tb / rs
+                if update_emission:
+                    new_em = (emit_num + eps) / (emit_den + 2.0 * eps)
+                    self.emission.data = new_em.clamp(min=eps, max=1.0 - eps)
+            self._normalize_params()
+            self._normalize_emission()
+            self._normalize_trans_by_bin()
+            if verbose:
+                logger.info(
+                    "[DistanceBinnedSingle.fit] iter=%s ll_proxy=%.6f",
+                    it,
+                    hist[-1],
+                )
+            if len(hist) > 1 and abs(hist[-1] - hist[-2]) < float(tol):
+                break
+        return hist
+    def adapt_emissions(
+        self,
+        X: np.ndarray,
+        coords: Optional[np.ndarray] = None,
+        *,
+        device: Optional[Union[str, torch.device]] = None,
+        iters: Optional[int] = None,
+        max_iter: Optional[int] = None,
+        verbose: bool = False,
+        **kwargs,
+    ):
+        """Adapt emissions with legacy parameter names.
+        Args:
+            X: Observations array.
+            coords: Optional coordinate array.
+            device: Device specifier.
+            iters: Number of iterations.
+            max_iter: Alias for iters.
+            verbose: Whether to log progress.
+            **kwargs: Additional kwargs forwarded to BaseHMM.adapt_emissions.
+        Returns:
+            List of log-likelihood values.
+        """
+        if iters is None:
+            iters = int(max_iter) if max_iter is not None else int(kwargs.pop("iters", 5))
+        return super().adapt_emissions(
+            np.asarray(X, dtype=float),
+            coords if coords is not None else None,
+            iters=int(iters),
+            device=device,
+            verbose=verbose,
+        )
+# =============================================================================
+# Facade class to match workflow import style
+# =============================================================================
+class HMM:
+    """
+    Facade so workflow can do:
+      from ..hmm.HMM import HMM
+      hmm = HMM.from_config(cfg, arch="single")
+      hmm.save(...)
+      hmm = HMM.load(...)
+    """
+    @staticmethod
+    def from_config(cfg, arch: Optional[str] = None, **kwargs) -> BaseHMM:
+        """Create an HMM instance from configuration.
+        Args:
+            cfg: Configuration mapping or object.
+            arch: Optional HMM architecture name.
+            **kwargs: Additional parameters passed to the factory.
+        Returns:
+            Initialized HMM instance.
+        """
+        return create_hmm(cfg, arch=arch, **kwargs)
+    @staticmethod
+    def load(path: Union[str, Path], device: Optional[Union[str, torch.device]] = None) -> BaseHMM:
+        """Load an HMM instance from disk.
+        Args:
+            path: Path to the serialized model.
+            device: Optional device specifier.
+        Returns:
+            Loaded HMM instance.
+        """
+        return BaseHMM.load(path, device=device)

smftools 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl

smftools 0.2.4py3-none-any.whl → 0.3.0py3-none-any.whl