PyPI - mi-crow - Versions diffs - 0.1.2__py3-none-any.whl → 1.0.0.post1__py3-none-any.whl - Mend

mi-crow 0.1.2py3-none-any.whl → 1.0.0.post1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

mi_crow/datasets/base_dataset.py +71 -1
mi_crow/datasets/classification_dataset.py +136 -30
mi_crow/datasets/text_dataset.py +165 -24
mi_crow/hooks/controller.py +12 -7
mi_crow/hooks/implementations/layer_activation_detector.py +30 -34
mi_crow/hooks/implementations/model_input_detector.py +87 -87
mi_crow/hooks/implementations/model_output_detector.py +43 -42
mi_crow/hooks/utils.py +74 -0
mi_crow/language_model/activations.py +174 -77
mi_crow/language_model/device_manager.py +119 -0
mi_crow/language_model/inference.py +18 -5
mi_crow/language_model/initialization.py +10 -6
mi_crow/language_model/language_model.py +67 -97
mi_crow/language_model/layers.py +16 -13
mi_crow/language_model/persistence.py +4 -2
mi_crow/language_model/utils.py +5 -5
mi_crow/mechanistic/sae/concepts/autoencoder_concepts.py +157 -95
mi_crow/mechanistic/sae/concepts/concept_dictionary.py +12 -2
mi_crow/mechanistic/sae/concepts/text_heap.py +161 -0
mi_crow/mechanistic/sae/modules/topk_sae.py +29 -22
mi_crow/mechanistic/sae/sae.py +3 -1
mi_crow/mechanistic/sae/sae_trainer.py +362 -29
mi_crow/store/local_store.py +11 -5
mi_crow/store/store.py +34 -1
{mi_crow-0.1.2.dist-info → mi_crow-1.0.0.post1.dist-info}/METADATA +2 -1
{mi_crow-0.1.2.dist-info → mi_crow-1.0.0.post1.dist-info}/RECORD +28 -26
{mi_crow-0.1.2.dist-info → mi_crow-1.0.0.post1.dist-info}/WHEEL +1 -1
{mi_crow-0.1.2.dist-info → mi_crow-1.0.0.post1.dist-info}/top_level.txt +0 -0

mi_crow/hooks/utils.py CHANGED Viewed

@@ -74,3 +74,77 @@ def extract_tensor_from_output(output: HOOK_FUNCTION_OUTPUT) -> torch.Tensor | N
     return None
+def apply_modification_to_output(
+    output: HOOK_FUNCTION_OUTPUT,
+    modified_tensor: torch.Tensor,
+    target_device: torch.device | None = None
+) -> None:
+    """
+    Apply a modified tensor to an output object in-place.
+    Handles various output formats:
+    - Plain tensors: modifies the tensor directly (in-place)
+    - Tuples/lists of tensors: replaces first tensor
+    - Objects with last_hidden_state attribute: sets last_hidden_state
+    If target_device is provided, output tensors are moved to target_device first,
+    ensuring consistency with the desired device (e.g., context.device).
+    Otherwise, modified_tensor is moved to match output's current device.
+    Args:
+        output: Output object to modify
+        modified_tensor: Modified tensor to apply
+        target_device: Optional target device. If provided, output tensors are moved
+            to this device before applying modification. If None, uses output's current device.
+    """
+    if output is None:
+        return
+    if isinstance(output, torch.Tensor):
+        if target_device is not None:
+            if output.device != target_device:
+                output = output.to(target_device)
+            if modified_tensor.device != target_device:
+                modified_tensor = modified_tensor.to(target_device)
+        else:
+            if modified_tensor.device != output.device:
+                modified_tensor = modified_tensor.to(output.device)
+        output.data.copy_(modified_tensor.data)
+        return
+    if isinstance(output, (tuple, list)):
+        for i, item in enumerate(output):
+            if isinstance(item, torch.Tensor):
+                if target_device is not None:
+                    if item.device != target_device:
+                        item = item.to(target_device)
+                        if isinstance(output, list):
+                            output[i] = item
+                    if modified_tensor.device != target_device or modified_tensor.dtype != item.dtype:
+                        modified_tensor = modified_tensor.to(device=target_device, dtype=item.dtype)
+                else:
+                    if modified_tensor.device != item.device or modified_tensor.dtype != item.dtype:
+                        modified_tensor = modified_tensor.to(device=item.device, dtype=item.dtype)
+                if isinstance(output, tuple):
+                    item.data.copy_(modified_tensor.data)
+                else:
+                    output[i] = modified_tensor
+                break
+        return
+    if hasattr(output, "last_hidden_state"):
+        original_tensor = output.last_hidden_state
+        if isinstance(original_tensor, torch.Tensor):
+            if target_device is not None:
+                if original_tensor.device != target_device:
+                    output.last_hidden_state = original_tensor.to(target_device)
+                    original_tensor = output.last_hidden_state
+                if modified_tensor.device != target_device:
+                    modified_tensor = modified_tensor.to(target_device)
+            else:
+                if modified_tensor.device != original_tensor.device:
+                    modified_tensor = modified_tensor.to(original_tensor.device)
+        output.last_hidden_state = modified_tensor
+        return

mi_crow/language_model/activations.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import datetime
+import gc
 from typing import TYPE_CHECKING, Any, Dict, Sequence
 import torch
@@ -10,7 +11,6 @@ from mi_crow.hooks.implementations.layer_activation_detector import LayerActivat
 from mi_crow.hooks.implementations.model_input_detector import ModelInputDetector
 from mi_crow.store.store import Store
 from mi_crow.utils import get_logger
-from mi_crow.language_model.utils import get_device_from_model
 if TYPE_CHECKING:
     from mi_crow.language_model.context import LanguageModelContext
@@ -30,18 +30,25 @@ class LanguageModelActivations:
         """
         self.context = context
-    def _setup_detector(self, layer_signature: str | int, hook_id_suffix: str) -> tuple[LayerActivationDetector, str]:
+    def _setup_detector(
+        self, layer_signature: str | int, hook_id_suffix: str, dtype: torch.dtype | None = None
+    ) -> tuple[LayerActivationDetector, str]:
         """
         Create and register an activation detector.
         Args:
             layer_signature: Layer to attach detector to
             hook_id_suffix: Suffix for hook ID
+            dtype: Optional dtype for activations
         Returns:
             Tuple of (detector, hook_id)
         """
-        detector = LayerActivationDetector(layer_signature=layer_signature, hook_id=f"detector_{hook_id_suffix}")
+        detector = LayerActivationDetector(
+            layer_signature=layer_signature,
+            hook_id=f"detector_{hook_id_suffix}",
+            target_dtype=dtype,
+        )
         hook_id = self.context.language_model.layers.register_hook(layer_signature, detector, HookType.FORWARD)
@@ -71,24 +78,115 @@ class LanguageModelActivations:
         """
         attention_mask_layer_sig = "attention_masks"
         root_model = self.context.model
-        # Add layer signature to registry for root model
         if attention_mask_layer_sig not in self.context.language_model.layers.name_to_layer:
             self.context.language_model.layers.name_to_layer[attention_mask_layer_sig] = root_model
         detector = ModelInputDetector(
             layer_signature=attention_mask_layer_sig,
             hook_id=f"attention_mask_detector_{run_name}",
             save_input_ids=False,
             save_attention_mask=True,
         )
         hook_id = self.context.language_model.layers.register_hook(
             attention_mask_layer_sig, detector, HookType.PRE_FORWARD
         )
         return detector, hook_id
+    def _setup_activation_hooks(
+        self,
+        layer_sig_list: list[str],
+        run_name: str,
+        save_attention_mask: bool,
+        dtype: torch.dtype | None = None,
+    ) -> tuple[list[str], str | None]:
+        """
+        Setup activation hooks for saving.
+        Args:
+            layer_sig_list: List of layer signatures to hook
+            run_name: Run name for hook IDs
+            save_attention_mask: Whether to setup attention mask detector
+            dtype: Optional dtype for activations
+        Returns:
+            Tuple of (hook_ids list, attention_mask_hook_id or None)
+        """
+        hook_ids: list[str] = []
+        for sig in layer_sig_list:
+            _, hook_id = self._setup_detector(sig, f"save_{run_name}_{sig}", dtype=dtype)
+            hook_ids.append(hook_id)
+        attention_mask_hook_id: str | None = None
+        if save_attention_mask:
+            _, attention_mask_hook_id = self._setup_attention_mask_detector(run_name)
+        return hook_ids, attention_mask_hook_id
+    def _teardown_activation_hooks(
+        self,
+        hook_ids: list[str],
+        attention_mask_hook_id: str | None,
+    ) -> None:
+        """
+        Teardown activation hooks.
+        Args:
+            hook_ids: List of hook IDs to cleanup
+            attention_mask_hook_id: Optional attention mask hook ID to cleanup
+        """
+        for hook_id in hook_ids:
+            self._cleanup_detector(hook_id)
+        if attention_mask_hook_id is not None:
+            self._cleanup_detector(attention_mask_hook_id)
+    def _validate_save_prerequisites(self) -> tuple[nn.Module, Store]:
+        """
+        Validate prerequisites for saving activations.
+        Returns:
+            Tuple of (model, store)
+        Raises:
+            ValueError: If model or store is not initialized
+        """
+        model: nn.Module | None = self.context.model
+        if model is None:
+            raise ValueError("Model must be initialized before running")
+        store = self.context.store
+        if store is None:
+            raise ValueError("Store must be provided or set on the language model")
+        return model, store
+    def _prepare_save_metadata(
+        self,
+        layer_signature: str | int | list[str | int],
+        dataset: BaseDataset | None,
+        run_name: str | None,
+        options: Dict[str, Any],
+    ) -> tuple[str, Dict[str, Any], list[str]]:
+        """
+        Prepare metadata for activation saving.
+        Args:
+            layer_signature: Layer signature(s) to save
+            dataset: Optional dataset
+            run_name: Optional run name
+            options: Options dictionary
+        Returns:
+            Tuple of (run_name, metadata, layer_sig_list)
+        """
+        _, layer_sig_list = self._normalize_layer_signatures(layer_signature)
+        run_name, meta = self._prepare_run_metadata(
+            layer_signature, dataset=dataset, run_name=run_name, options=options
+        )
+        return run_name, meta, layer_sig_list
     def _normalize_layer_signatures(
         self, layer_signatures: str | int | list[str | int] | None
     ) -> tuple[str | None, list[str]]:
@@ -171,6 +269,7 @@ class LanguageModelActivations:
             verbose: Whether to log
         """
         from mi_crow.language_model.inference import InferenceEngine
         InferenceEngine._save_run_metadata(store, run_name, meta, verbose)
     def _process_batch(
@@ -184,6 +283,7 @@ class LanguageModelActivations:
         dtype: torch.dtype | None,
         verbose: bool,
         save_in_batches: bool = True,
+        stop_after_layer: str | int | None = None,
     ) -> None:
         """Process a single batch of texts.
@@ -196,6 +296,7 @@ class LanguageModelActivations:
             autocast_dtype: Optional dtype for autocast
             dtype: Optional dtype to convert activations to
             verbose: Whether to log progress
+            stop_after_layer: Optional layer signature to stop after (name or index)
         """
         if not texts:
             return
@@ -209,31 +310,48 @@ class LanguageModelActivations:
             tok_kwargs=tok_kwargs,
             autocast=autocast,
             autocast_dtype=autocast_dtype,
+            stop_after_layer=stop_after_layer,
         )
-        if dtype is not None:
-            self._convert_activations_to_dtype(dtype)
         self.context.language_model.save_detector_metadata(
             run_name,
             batch_index,
             unified=not save_in_batches,
         )
+        # Synchronize CUDA to ensure async CPU transfers from detector hooks complete
+        # Only synchronize if CUDA is actually available and initialized
+        try:
+            if torch.cuda.is_available():
+                torch.cuda.synchronize()
+        except (AssertionError, RuntimeError):
+            # CUDA not available or not initialized (e.g., in test environment)
+            pass
+        gc.collect()
+        if torch.cuda.is_available():
+            try:
+                torch.cuda.empty_cache()
+            except (AssertionError, RuntimeError):
+                # CUDA not available or not initialized
+                pass
         if verbose:
             logger.info(f"Saved batch {batch_index} for run={run_name}")
     def _convert_activations_to_dtype(self, dtype: torch.dtype) -> None:
         """
-        Convert captured activations to specified dtype.
+        Convert all captured activations in detectors to the specified dtype.
         Args:
-            dtype: Target dtype
+            dtype: Target dtype to convert activations to
         """
         detectors = self.context.language_model.layers.get_detectors()
         for detector in detectors:
-            if "activations" in detector.tensor_metadata:
-                detector.tensor_metadata["activations"] = detector.tensor_metadata["activations"].to(dtype)
+            if hasattr(detector, "tensor_metadata") and "activations" in detector.tensor_metadata:
+                tensor = detector.tensor_metadata["activations"]
+                if tensor.dtype != dtype:
+                    detector.tensor_metadata["activations"] = tensor.to(dtype)
     def _manage_cuda_cache(
         self, batch_counter: int, free_cuda_cache_every: int | None, device_type: str, verbose: bool
@@ -264,10 +382,11 @@ class LanguageModelActivations:
         max_length: int | None = None,
         autocast: bool = True,
         autocast_dtype: torch.dtype | None = None,
-        free_cuda_cache_every: int | None = 0,
+        free_cuda_cache_every: int | None = None,
         verbose: bool = False,
         save_in_batches: bool = True,
         save_attention_mask: bool = False,
+        stop_after_last_layer: bool = True,
     ) -> str:
         """
         Save activations from a dataset.
@@ -281,9 +400,11 @@ class LanguageModelActivations:
             max_length: Optional max length for tokenization
             autocast: Whether to use autocast
             autocast_dtype: Optional dtype for autocast
-            free_cuda_cache_every: Clear CUDA cache every N batches (0 or None to disable)
+            free_cuda_cache_every: Clear CUDA cache every N batches (None to auto-detect, 0 to disable)
             verbose: Whether to log progress
             save_attention_mask: Whether to also save attention masks (automatically attaches ModelInputDetector)
+            stop_after_last_layer: Whether to stop model forward pass after the last requested layer
+                to save memory and time. Defaults to True.
         Returns:
             Run name used for saving
@@ -291,29 +412,22 @@ class LanguageModelActivations:
         Raises:
             ValueError: If model or store is not initialized
         """
-        model: nn.Module | None = self.context.model
-        if model is None:
-            raise ValueError("Model must be initialized before running")
+        model, store = self._validate_save_prerequisites()
-        _, layer_sig_list = self._normalize_layer_signatures(layer_signature)
-        store = self.context.store
-        if store is None:
-            raise ValueError("Store must be provided or set on the language model")
-        device = get_device_from_model(model)
+        device = torch.device(self.context.device)
         device_type = str(device.type)
+        if free_cuda_cache_every is None:
+            free_cuda_cache_every = 5 if device_type == "cuda" else 0
         options = {
             "dtype": str(dtype) if dtype is not None else None,
             "max_length": max_length,
             "batch_size": int(batch_size),
+            "stop_after_last_layer": stop_after_last_layer,
         }
-        run_name, meta = self._prepare_run_metadata(
-            layer_signature, dataset=dataset, run_name=run_name, options=options
-        )
+        run_name, meta, layer_sig_list = self._prepare_save_metadata(layer_signature, dataset, run_name, options)
         if verbose:
             logger.info(
@@ -323,17 +437,13 @@ class LanguageModelActivations:
         self._save_run_metadata(store, run_name, meta, verbose)
-        hook_ids: list[str] = []
-        for sig in layer_sig_list:
-            _, hook_id = self._setup_detector(sig, f"save_{run_name}_{sig}")
-            hook_ids.append(hook_id)
-        # Setup attention mask detector if requested
-        attention_mask_hook_id: str | None = None
-        if save_attention_mask:
-            _, attention_mask_hook_id = self._setup_attention_mask_detector(run_name)
+        hook_ids, attention_mask_hook_id = self._setup_activation_hooks(
+            layer_sig_list, run_name, save_attention_mask, dtype=dtype
+        )
         batch_counter = 0
+        # Stop after last hooked layer if requested
+        stop_after = layer_sig_list[-1] if (layer_sig_list and stop_after_last_layer) else None
         try:
             with torch.inference_mode():
@@ -349,19 +459,18 @@ class LanguageModelActivations:
                         dtype,
                         verbose,
                         save_in_batches=save_in_batches,
+                        stop_after_layer=stop_after,
                     )
                     batch_counter += 1
                     self._manage_cuda_cache(batch_counter, free_cuda_cache_every, device_type, verbose)
         finally:
-            for hook_id in hook_ids:
-                self._cleanup_detector(hook_id)
-            if attention_mask_hook_id is not None:
-                self._cleanup_detector(attention_mask_hook_id)
+            self._teardown_activation_hooks(hook_ids, attention_mask_hook_id)
             if verbose:
                 logger.info(f"Completed save_activations_dataset: run={run_name}, batches_saved={batch_counter}")
         return run_name
     def save_activations(
         self,
         texts: Sequence[str],
@@ -377,6 +486,7 @@ class LanguageModelActivations:
         verbose: bool = False,
         save_in_batches: bool = True,
         save_attention_mask: bool = False,
+        stop_after_last_layer: bool = True,
     ) -> str:
         """
         Save activations from a list of texts.
@@ -393,6 +503,8 @@ class LanguageModelActivations:
             free_cuda_cache_every: Clear CUDA cache every N batches (0 or None to disable)
             verbose: Whether to log progress
             save_attention_mask: Whether to also save attention masks (automatically attaches ModelInputDetector)
+            stop_after_last_layer: Whether to stop model forward pass after the last requested layer
+                to save memory and time. Defaults to True.
         Returns:
             Run name used for saving
@@ -400,20 +512,12 @@ class LanguageModelActivations:
         Raises:
             ValueError: If model or store is not initialized
         """
-        model: nn.Module | None = self.context.model
-        if model is None:
-            raise ValueError("Model must be initialized before running")
-        _, layer_sig_list = self._normalize_layer_signatures(layer_signature)
-        store = self.context.store
-        if store is None:
-            raise ValueError("Store must be provided or set on the language model")
         if not texts:
             raise ValueError("Texts list cannot be empty")
-        device = get_device_from_model(model)
+        model, store = self._validate_save_prerequisites()
+        device = torch.device(self.context.device)
         device_type = str(device.type)
         if batch_size is None:
@@ -423,11 +527,10 @@ class LanguageModelActivations:
             "dtype": str(dtype) if dtype is not None else None,
             "max_length": max_length,
             "batch_size": int(batch_size),
+            "stop_after_last_layer": stop_after_last_layer,
         }
-        run_name, meta = self._prepare_run_metadata(
-            layer_signature, dataset=None, run_name=run_name, options=options
-        )
+        run_name, meta, layer_sig_list = self._prepare_save_metadata(layer_signature, None, run_name, options)
         if verbose:
             logger.info(
@@ -437,24 +540,20 @@ class LanguageModelActivations:
         self._save_run_metadata(store, run_name, meta, verbose)
-        hook_ids: list[str] = []
-        for sig in layer_sig_list:
-            _, hook_id = self._setup_detector(sig, f"save_{run_name}_{sig}")
-            hook_ids.append(hook_id)
-        # Setup attention mask detector if requested
-        attention_mask_hook_id: str | None = None
-        if save_attention_mask:
-            _, attention_mask_hook_id = self._setup_attention_mask_detector(run_name)
+        hook_ids, attention_mask_hook_id = self._setup_activation_hooks(
+            layer_sig_list, run_name, save_attention_mask, dtype=dtype
+        )
         batch_counter = 0
+        # Stop after last hooked layer if requested
+        stop_after = layer_sig_list[-1] if (layer_sig_list and stop_after_last_layer) else None
         try:
             with torch.inference_mode():
                 for i in range(0, len(texts), batch_size):
-                    batch_texts = texts[i:i + batch_size]
+                    batch_texts = texts[i : i + batch_size]
                     batch_index = i // batch_size
                     self._process_batch(
                         batch_texts,
                         run_name,
@@ -465,15 +564,13 @@ class LanguageModelActivations:
                         dtype,
                         verbose,
                         save_in_batches=save_in_batches,
+                        stop_after_layer=stop_after,
                     )
                     batch_counter += 1
                     self._manage_cuda_cache(batch_counter, free_cuda_cache_every, device_type, verbose)
         finally:
-            for hook_id in hook_ids:
-                self._cleanup_detector(hook_id)
-            if attention_mask_hook_id is not None:
-                self._cleanup_detector(attention_mask_hook_id)
+            self._teardown_activation_hooks(hook_ids, attention_mask_hook_id)
             if verbose:
                 logger.info(f"Completed save_activations: run={run_name}, batches_saved={batch_counter}")
-        return run_name
+        return run_name

mi_crow/language_model/device_manager.py ADDED Viewed

@@ -0,0 +1,119 @@
+"""Centralized device management utilities for LanguageModel operations.
+This module provides shared device handling logic to ensure consistent
+device management across the codebase.
+"""
+from __future__ import annotations
+import logging
+from typing import TYPE_CHECKING
+import torch
+if TYPE_CHECKING:
+    from mi_crow.language_model.language_model import LanguageModel
+logger = logging.getLogger(__name__)
+def normalize_device(device: str | torch.device | None) -> str:
+    """
+    Normalize and validate device specification.
+    Ensures the device is available and normalizes generic device strings.
+    - None → "cpu"
+    - "cuda" → "cuda:0" (if available)
+    - Validates CUDA/MPS availability
+    Args:
+        device: Device specification as string, torch.device, or None
+    Returns:
+        Normalized device string such as "cpu", "cuda:0", or "mps"
+    Raises:
+        ValueError: If requested device is not available
+    """
+    if device is None:
+        return "cpu"
+    if isinstance(device, torch.device):
+        device_str = str(device)
+    else:
+        device_str = str(device)
+    if device_str.startswith("cuda"):
+        if not torch.cuda.is_available():
+            raise ValueError(
+                "Requested device 'cuda' but CUDA is not available. "
+                "Install a CUDA-enabled PyTorch build or use device='cpu'."
+            )
+        if device_str == "cuda":
+            device_str = "cuda:0"
+    if device_str == "mps":
+        mps_backend = getattr(torch.backends, "mps", None)
+        mps_available = bool(mps_backend and mps_backend.is_available())
+        if not mps_available:
+            raise ValueError(
+                "Requested device 'mps' but MPS is not available. "
+                "Ensure PyTorch is built with MPS support or use device='cpu'."
+            )
+    return device_str
+def ensure_context_device(lm: LanguageModel) -> torch.device:
+    """
+    Ensure LanguageModel has valid context.device and return it.
+    Args:
+        lm: LanguageModel instance
+    Returns:
+        torch.device from context
+    Raises:
+        ValueError: If context.device is not properly set
+    """
+    if not hasattr(lm, "context") or not hasattr(lm.context, "device") or lm.context.device is None:
+        raise ValueError(
+            "LanguageModel must have context.device set. "
+            "Ensure LanguageModel is properly initialized with a device."
+        )
+    return torch.device(lm.context.device)
+def sync_model_to_context_device(lm: LanguageModel) -> None:
+    """
+    Ensure model is on the device specified by context.device.
+    Moves the model if there's a mismatch between current location
+    and context.device. This is the primary device synchronization
+    function that should be called before any model operations.
+    Args:
+        lm: LanguageModel instance with context.device set
+    Raises:
+        ValueError: If context.device is not set
+        RuntimeError: If model cannot be moved to target device
+    """
+    from mi_crow.language_model.utils import get_device_from_model
+    target_device = ensure_context_device(lm)
+    model_device = get_device_from_model(lm.context.model)
+    if model_device != target_device:
+        try:
+            lm.context.model = lm.context.model.to(target_device)
+            logger.debug(
+                "Moved model from %s to %s to match context.device",
+                model_device,
+                target_device,
+            )
+        except Exception as e:
+            raise RuntimeError(
+                f"Failed to move model from {model_device} to {target_device}: {e}"
+            ) from e

mi-crow 0.1.2__py3-none-any.whl → 1.0.0.post1__py3-none-any.whl

mi-crow 0.1.2py3-none-any.whl → 1.0.0.post1py3-none-any.whl