PyPI - pysteer-adaptation - Versions diffs - 0.1.1__py3-none-any.whl - Mend

pysteer-adaptation 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

activation_manager/ActivationExtractor.py +407 -0
activation_manager/SteeredModelWrapper.py +1327 -0
activation_manager/VectorMediator.py +161 -0
activation_manager/__init__.py +3 -0
enums/ApplyFromModeEnum.py +9 -0
enums/MediatorGroupRepresentativeEnum.py +9 -0
enums/ModelFamilyEnum.py +12 -0
enums/ModelTypeEnum.py +9 -0
enums/TaskTypeEnum.py +8 -0
enums/__init__.py +15 -0
executor.py +775 -0
prompt_generator/BasePromptGenerator.py +63 -0
prompt_generator/Gemma3PromptGenerator.py +22 -0
prompt_generator/Llama3Point1PromptGenerator.py +23 -0
prompt_generator/MistralV0Point3PromptGenerator.py +21 -0
prompt_generator/OLMo2PromptGenerator.py +23 -0
prompt_generator/Qwen2Point5PromptGenerator.py +23 -0
prompt_generator/__init__.py +17 -0
pysteer/__init__.py +15 -0
pysteer_adaptation-0.1.1.dist-info/METADATA +283 -0
pysteer_adaptation-0.1.1.dist-info/RECORD +50 -0
pysteer_adaptation-0.1.1.dist-info/WHEEL +5 -0
pysteer_adaptation-0.1.1.dist-info/licenses/LICENSE.txt +373 -0
pysteer_adaptation-0.1.1.dist-info/top_level.txt +9 -0
steering_engine/__init__.py +65 -0
steering_engine/components.py +191 -0
steering_engine/defaults.py +401 -0
steering_engine/domain.py +165 -0
steering_engine/executor_services.py +1074 -0
steering_engine/registry.py +151 -0
steering_engine/runtime.py +47 -0
steering_strategy/ActsSteeringStrategy.py +440 -0
steering_strategy/AdaptiveActivationSteeringStrategy.py +222 -0
steering_strategy/AngularSteeringStrategy.py +122 -0
steering_strategy/BaseSteeringStrategy.py +37 -0
steering_strategy/GeneralSteeringStrategy.py +146 -0
steering_strategy/MbsSteeringStrategy.py +34 -0
steering_strategy/__init__.py +3 -0
utils/ModelUtils.py +374 -0
utils/StringUtils.py +28 -0
utils/__init__.py +17 -0
vector_update_strategy/ActsVectorMediator.py +607 -0
vector_update_strategy/AdaptiveActivationVectorMediator.py +223 -0
vector_update_strategy/AngularVectorMediator.py +178 -0
vector_update_strategy/BaseVectorUpdateStrategy.py +38 -0
vector_update_strategy/CmdVectorMediator.py +294 -0
vector_update_strategy/ColdKernelGradientMediator.py +96 -0
vector_update_strategy/CpcaVectorMediator.py +214 -0
vector_update_strategy/MbsCmdVectorMediator.py +118 -0
vector_update_strategy/__init__.py +3 -0

activation_manager/ActivationExtractor.py ADDED Viewed

@@ -0,0 +1,407 @@
+"""Forward-hook based activation collection for transformer residual streams."""
+import contextlib
+import logging
+import warnings
+from typing import Any, Dict, Iterable, List, Optional, Union
+import torch
+import torch.nn as nn
+from torch import Tensor
+from torch.utils.hooks import RemovableHandle
+from utils.ModelUtils import ModelUtils
+logger = logging.getLogger(__name__)
+class ActivationExtractor:
+    """Collect residual-stream activations from selected transformer layers.
+    The extractor registers forward hooks only while attached. Captured tensors
+    are normalized to ``[batch, sequence, hidden]`` layout, optionally offloaded
+    to CPU, and concatenated across prefill/decode chunks by :meth:`finalize`.
+    """
+    def __init__(
+            self,
+            model: Any,
+            layers_to_extract: Union[Iterable[int], int],
+            *,
+            offload_to_cpu: bool = True,
+            decode_chunk_max: int = 1,
+    ):
+        """Initialize the extractor.
+        Args:
+            model: PyTorch model whose transformer blocks should be hooked.
+            layers_to_extract: Layer index or indexes to collect.
+            offload_to_cpu: Move captured activations to CPU before storing.
+            decode_chunk_max: Maximum sequence length treated as one decode
+                chunk after prefill.
+        Raises:
+            TypeError: If ``model`` is not a ``torch.nn.Module``.
+            IndexError: If a requested layer is outside the discovered stack.
+        """
+        if not isinstance(model, nn.Module):
+            raise TypeError("ActivationExtractor expects 'model' to be a torch.nn.Module")
+        self._model = model
+        self.layers_to_extract = layers_to_extract
+        self.handles: List[RemovableHandle] = []
+        self._offload_to_cpu = bool(offload_to_cpu)
+        base = ModelUtils.unwrap_model(self._model)
+        self._hidden_size = ModelUtils.get_hidden_size(base.config)
+        self._layers = list(ModelUtils.find_transformer_layers(self._model))
+        self._hook_errors: List[tuple[int, Exception]] = []
+        self._chunks: Dict[int, List[Tensor]] = {}
+        self._activations: Dict[int, Tensor] = {}
+        self._seen_seq_len: Dict[int, int] = {}
+        self._phase: Dict[int, str] = {}
+        self._prefill_len: Dict[int, int] = {}
+        self._decode_chunk_max: int = max(1, int(decode_chunk_max))
+        for idx in self.layers_to_extract:
+            if idx < 0 or idx >= len(self._layers):
+                raise IndexError(
+                    f"Layer index {idx} out of range for model with {len(self._layers)} layers."
+                )
+    def _reset_layer_state(self, idx: int) -> None:
+        """Reset layer state."""
+        self._chunks.pop(idx, None)
+        self._activations.pop(idx, None)
+        self._seen_seq_len.pop(idx, None)
+        self._phase.pop(idx, None)
+        self._prefill_len.pop(idx, None)
+    @property
+    def model(self) -> Any:
+        """Model."""
+        return self._model
+    @model.setter
+    def model(self, value: Any) -> None:
+        """Set the model value after validation."""
+        raise AttributeError("ActivationExtractor.model is immutable after initialization.")
+    @property
+    def layers_to_extract(self) -> List[int]:
+        """Layers to extract."""
+        return self._layers_to_extract
+    @layers_to_extract.setter
+    def layers_to_extract(self, value: Union[Iterable[int], int]) -> None:
+        """Set the layers_to_extract value after validation."""
+        if isinstance(value, int):
+            value = [value]
+        self._layers_to_extract = sorted({int(i) for i in value})
+        if not self._layers_to_extract:
+            raise ValueError("At least one layer must be specified for extraction")
+    @property
+    def handles(self) -> List[RemovableHandle]:
+        """Handles."""
+        return self._handles
+    @handles.setter
+    def handles(self, value: List[RemovableHandle]) -> None:
+        """Set the handles value after validation."""
+        self._handles = value
+    @property
+    def activations(self) -> Dict[int, Tensor]:
+        """Activations."""
+        return self._activations
+    def _make_hook(self, idx: int):
+        """Create hook helper data."""
+        @torch.inference_mode()
+        def hook(_module, inputs, output):
+            """Forward hook used to capture or replace hidden states."""
+            try:
+                if hasattr(output, "last_hidden_state"):
+                    t = output.last_hidden_state
+                elif isinstance(output, (tuple, list)) and len(output) > 0:
+                    t = output[0]
+                else:
+                    t = output
+                if not torch.is_tensor(t):
+                    raise RuntimeError(
+                        f"ActivationExtractor: layer {idx} hook output is not a tensor "
+                        f"(type={type(t).__name__})."
+                    )
+                t = ModelUtils.ensure_bsh(
+                    t,
+                    self._hidden_size,
+                    from_layout="BSH",
+                )
+                if not (torch.is_tensor(t) and t.dim() == 3):
+                    raise RuntimeError(
+                        f"ActivationExtractor: layer {idx} expected 3D tensor (B,S,H), "
+                        f"got {None if not torch.is_tensor(t) else tuple(t.shape)}."
+                    )
+                s = int(t.shape[1])
+                already_have = bool(self._chunks.get(idx)) or (idx in self._activations)
+                if already_have and s > self._decode_chunk_max:
+                    msg = (
+                        f"ActivationExtractor: detected seq_len={s} (> decode_chunk_max={self._decode_chunk_max}) "
+                        f"on layer {idx} while previous activations are still accumulated. "
+                        "This often means a new prefill happened without clear(). "
+                        "Resetting accumulation for this layer to avoid mixing."
+                    )
+                    warnings.warn(msg, RuntimeWarning, stacklevel=2)
+                    logger.warning("%s", msg)
+                    self._reset_layer_state(idx)
+                phase = self._phase.get(idx)
+                if phase is None:
+                    if s > 1:
+                        self._phase[idx] = "prefill"
+                        self._prefill_len[idx] = s
+                    else:
+                        self._phase[idx] = "decode"
+                    self._seen_seq_len[idx] = s
+                else:
+                    if phase == "prefill":
+                        prefill_s = self._prefill_len.get(idx, self._seen_seq_len.get(idx, s))
+                        if s == prefill_s:
+                            self._seen_seq_len[idx] = s
+                        elif s <= self._decode_chunk_max:
+                            self._phase[idx] = "decode"
+                            self._seen_seq_len[idx] = s
+                        else:
+                            raise RuntimeError(
+                                f"ActivationExtractor: layer {idx} saw unexpected seq_len change during prefill: "
+                                f"prefill_seq_len={prefill_s} new_seq_len={s}. "
+                                "Call clear() between independent forwards/generations."
+                            )
+                    else:
+                        if s <= self._decode_chunk_max:
+                            self._seen_seq_len[idx] = s
+                        else:
+                            msg = (
+                                f"ActivationExtractor: layer {idx} saw seq_len={s} during decode phase; "
+                                f"expected <= {self._decode_chunk_max}. "
+                                "This likely indicates an accidental new prefill without clear(). "
+                                "Resetting this layer state and treating this as a new prefill."
+                            )
+                            warnings.warn(msg, RuntimeWarning, stacklevel=2)
+                            logger.warning("%s", msg)
+                            self._reset_layer_state(idx)
+                            self._phase[idx] = "prefill" if s > 1 else "decode"
+                            if s > 1:
+                                self._prefill_len[idx] = s
+                            self._seen_seq_len[idx] = s
+                t = torch.nan_to_num(t, nan=0.0, posinf=0.0, neginf=0.0).detach()
+                if self._offload_to_cpu:
+                    if t.device.type == "cuda":
+                        t = t.to("cpu", non_blocking=True)
+                    else:
+                        t = t.to("cpu")
+                chunks = self._chunks.setdefault(idx, [])
+                prev = chunks[-1] if chunks else self._activations.get(idx, None)
+                if (
+                        prev is not None
+                        and (
+                        (not torch.is_tensor(prev))
+                        or prev.dim() != 3
+                        or t.shape[0] != prev.shape[0]
+                        or t.shape[2] != prev.shape[2]
+                )
+                ):
+                    msg = (
+                        f"ActivationExtractor: shape mismatch on layer {idx}: "
+                        f"prev={None if prev is None else tuple(prev.shape)} new={tuple(t.shape)}."
+                    )
+                    logger.warning("%s Resetting accumulation.", msg)
+                    self._reset_layer_state(idx)
+                    raise RuntimeError(msg)
+                chunks.append(t)
+            except Exception as e:
+                self._reset_layer_state(idx)
+                self._hook_errors.append((idx, e))
+                raise
+        return hook
+    def assert_ok(self) -> None:
+        """Raise the first stored hook or update error, if any."""
+        if self._hook_errors:
+            idx, e = self._hook_errors[0]
+            raise RuntimeError(f"Activation hook failed on layer {idx}: {e}") from e
+    def finalize(self, *, clear_chunks: bool = True) -> Dict[int, Tensor]:
+        """Concatenate captured chunks into one tensor per layer.
+        Args:
+            clear_chunks: Clear chunk buffers after finalization.
+        Returns:
+            Mapping from layer index to captured activation tensor.
+        Raises:
+            RuntimeError: If chunks for a layer have incompatible shapes.
+        """
+        for idx, chunks in self._chunks.copy().items():
+            if not chunks:
+                continue
+            if idx in self._activations:
+                pieces: List[Tensor] = [self._activations.pop(idx)] + chunks
+            else:
+                pieces = chunks
+            try:
+                b0 = int(pieces[0].shape[0])
+                h0 = int(pieces[0].shape[2])
+            except Exception:
+                if clear_chunks:
+                    self._chunks.pop(idx, None)
+                raise RuntimeError(f"ActivationExtractor.finalize: bad tensor shape at layer {idx}.")
+            ok_mask: List[bool] = []
+            for x in pieces:
+                ok_mask.append(
+                    torch.is_tensor(x)
+                    and x.dim() == 3
+                    and int(x.shape[0]) == b0
+                    and int(x.shape[2]) == h0
+                )
+            ok = [pieces[i] for i, good in enumerate(ok_mask) if good]
+            if len(ok) != len(pieces):
+                bad_shapes = [
+                    None if not torch.is_tensor(pieces[i]) else tuple(pieces[i].shape)
+                    for i, good in enumerate(ok_mask) if not good
+                ]
+                raise RuntimeError(
+                    f"ActivationExtractor.finalize: incompatible chunks at layer {idx}: {bad_shapes}"
+                )
+            if not ok:
+                if clear_chunks:
+                    self._chunks.pop(idx, None)
+                continue
+            self._activations[idx] = torch.cat(ok, dim=1) if len(ok) > 1 else ok[0]
+            if clear_chunks:
+                self._chunks.pop(idx, None)
+            else:
+                self._chunks[idx] = []
+        if clear_chunks:
+            self._chunks.clear()
+        return self._activations
+    def attach(self, *, clear_activations: bool = True) -> None:
+        """Attach hooks to the configured model layers.
+        Args:
+            clear_activations: Clear previous activations before attaching.
+        Raises:
+            IndexError: If layer discovery changed and a requested layer is now
+                out of range.
+            RuntimeError: If any hook cannot be registered.
+        """
+        self._detach_hooks_only()
+        if clear_activations:
+            self.clear()
+        self._layers = list(ModelUtils.find_transformer_layers(self._model))
+        try:
+            base = ModelUtils.unwrap_model(self._model)
+            cfg = getattr(base, "config", None)
+            expected = None
+            for k in ("num_hidden_layers", "n_layer", "num_layers", "n_layers"):
+                if cfg is not None and hasattr(cfg, k):
+                    expected = int(getattr(cfg, k))
+                    break
+            if expected is not None and 0 < expected != len(self._layers):
+                logger.warning(
+                    "ActivationExtractor: find_transformer_layers() found %d layers, "
+                    "but config expects %d. Hooks may target wrong modules (pre/post norm mismatch risk).",
+                    len(self._layers), expected
+                )
+        except Exception:
+            pass
+        n = len(self._layers)
+        bad = [i for i in self.layers_to_extract if i < 0 or i >= n]
+        if bad:
+            raise IndexError(f"Layer index(es) {bad} out of range for model with {n} layers.")
+        new_handles: List[RemovableHandle] = []
+        try:
+            for idx in self.layers_to_extract:
+                block = self._layers[idx]
+                h = block.register_forward_hook(self._make_hook(idx))
+                new_handles.append(h)
+            self.handles = new_handles
+        except Exception as e:
+            self.remove()
+            raise RuntimeError(f"Failed to attach activation hooks: {e}") from e
+    def clear(self) -> None:
+        """Clear accumulated tensors and error state."""
+        self._chunks.clear()
+        self._activations.clear()
+        self._hook_errors.clear()
+        self._seen_seq_len.clear()
+        self._phase.clear()
+        self._prefill_len.clear()
+    def _detach_hooks_only(self) -> None:
+        """Detach hooks only."""
+        for h in self.handles.copy():
+            try:
+                h.remove()
+            except Exception:
+                pass
+        self.handles.clear()
+    def remove(self) -> None:
+        """Remove hooks and clear accumulated state."""
+        self._detach_hooks_only()
+        self.clear()
+    def close(self) -> None:
+        """Close the object by releasing managed hooks."""
+        self.remove()
+    def __enter__(self):
+        """Enter the context manager and activate managed resources."""
+        self.attach(clear_activations=True)
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """Exit the context manager and release managed resources."""
+        self.remove()
+        return False
+    @contextlib.contextmanager
+    def temporarily_disabled(self):
+        """Temporarily detach hooks and restore them afterward."""
+        was_on = bool(self.handles)
+        if was_on:
+            self._detach_hooks_only()
+        try:
+            yield
+        finally:
+            if was_on:
+                self.attach(clear_activations=False)