PyPI - mi-crow - Versions diffs - 0.1.1.post12__py3-none-any.whl - Mend

mi-crow 0.1.1.post12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

amber/__init__.py +15 -0
amber/datasets/__init__.py +11 -0
amber/datasets/base_dataset.py +640 -0
amber/datasets/classification_dataset.py +566 -0
amber/datasets/loading_strategy.py +29 -0
amber/datasets/text_dataset.py +488 -0
amber/hooks/__init__.py +20 -0
amber/hooks/controller.py +171 -0
amber/hooks/detector.py +95 -0
amber/hooks/hook.py +218 -0
amber/hooks/implementations/__init__.py +0 -0
amber/hooks/implementations/function_controller.py +93 -0
amber/hooks/implementations/layer_activation_detector.py +96 -0
amber/hooks/implementations/model_input_detector.py +250 -0
amber/hooks/implementations/model_output_detector.py +132 -0
amber/hooks/utils.py +76 -0
amber/language_model/__init__.py +0 -0
amber/language_model/activations.py +479 -0
amber/language_model/context.py +33 -0
amber/language_model/contracts.py +13 -0
amber/language_model/hook_metadata.py +38 -0
amber/language_model/inference.py +525 -0
amber/language_model/initialization.py +126 -0
amber/language_model/language_model.py +390 -0
amber/language_model/layers.py +460 -0
amber/language_model/persistence.py +177 -0
amber/language_model/tokenizer.py +203 -0
amber/language_model/utils.py +97 -0
amber/mechanistic/__init__.py +0 -0
amber/mechanistic/sae/__init__.py +0 -0
amber/mechanistic/sae/autoencoder_context.py +40 -0
amber/mechanistic/sae/concepts/__init__.py +0 -0
amber/mechanistic/sae/concepts/autoencoder_concepts.py +332 -0
amber/mechanistic/sae/concepts/concept_dictionary.py +206 -0
amber/mechanistic/sae/concepts/concept_models.py +9 -0
amber/mechanistic/sae/concepts/input_tracker.py +68 -0
amber/mechanistic/sae/modules/__init__.py +5 -0
amber/mechanistic/sae/modules/l1_sae.py +409 -0
amber/mechanistic/sae/modules/topk_sae.py +459 -0
amber/mechanistic/sae/sae.py +166 -0
amber/mechanistic/sae/sae_trainer.py +604 -0
amber/mechanistic/sae/training/wandb_logger.py +222 -0
amber/store/__init__.py +5 -0
amber/store/local_store.py +437 -0
amber/store/store.py +276 -0
amber/store/store_dataloader.py +124 -0
amber/utils.py +46 -0
mi_crow-0.1.1.post12.dist-info/METADATA +124 -0
mi_crow-0.1.1.post12.dist-info/RECORD +51 -0
mi_crow-0.1.1.post12.dist-info/WHEEL +5 -0
mi_crow-0.1.1.post12.dist-info/top_level.txt +1 -0

amber/hooks/implementations/model_input_detector.py ADDED Viewed

@@ -0,0 +1,250 @@
+from __future__ import annotations
+from typing import TYPE_CHECKING, Dict, Set, List, Optional
+import torch
+from amber.hooks.detector import Detector
+from amber.hooks.hook import HookType, HOOK_FUNCTION_INPUT, HOOK_FUNCTION_OUTPUT
+if TYPE_CHECKING:
+    from torch import nn
+class ModelInputDetector(Detector):
+    """
+    Detector hook that captures and saves tokenized inputs from model forward pass.
+    This detector is designed to be attached to the root model module and captures:
+    - Tokenized inputs (input_ids) from the model's forward pass
+    - Attention masks (optional) that exclude both padding and special tokens
+    Uses PRE_FORWARD hook to capture inputs before they are processed.
+    Useful for saving tokenized inputs for analysis or training.
+    """
+    def __init__(
+            self,
+            layer_signature: str | int | None = None,
+            hook_id: str | None = None,
+            save_input_ids: bool = True,
+            save_attention_mask: bool = False,
+            special_token_ids: Optional[List[int] | Set[int]] = None
+    ):
+        """
+        Initialize the model input detector.
+        Args:
+            layer_signature: Layer to capture from (typically the root model, can be None)
+            hook_id: Unique identifier for this hook
+            save_input_ids: Whether to save input_ids tensor
+            save_attention_mask: Whether to save attention_mask tensor (excludes padding and special tokens)
+            special_token_ids: Optional list/set of special token IDs. If None, will extract from LanguageModel context.
+        """
+        super().__init__(
+            hook_type=HookType.PRE_FORWARD,
+            hook_id=hook_id,
+            store=None,
+            layer_signature=layer_signature
+        )
+        self.save_input_ids = save_input_ids
+        self.save_attention_mask = save_attention_mask
+        self.special_token_ids = set(special_token_ids) if special_token_ids is not None else None
+    def _extract_input_ids(self, input: HOOK_FUNCTION_INPUT) -> torch.Tensor | None:
+        """
+        Extract input_ids from model input.
+        Handles various input formats:
+        - Dict with 'input_ids' key (most common for HuggingFace models)
+        - Tuple with dict as first element
+        - Tuple with tensor as first element
+        Args:
+            input: Input to the model forward pass
+        Returns:
+            input_ids tensor or None if not found
+        """
+        if not input or len(input) == 0:
+            return None
+        first_item = input[0]
+        if isinstance(first_item, dict):
+            if 'input_ids' in first_item:
+                return first_item['input_ids']
+            return None
+        if isinstance(first_item, torch.Tensor):
+            return first_item
+        return None
+    def _extract_attention_mask(self, input: HOOK_FUNCTION_INPUT) -> torch.Tensor | None:
+        """
+        Extract attention_mask from model input.
+        Args:
+            input: Input to the model forward pass
+        Returns:
+            attention_mask tensor or None if not found
+        """
+        if not input or len(input) == 0:
+            return None
+        first_item = input[0]
+        if isinstance(first_item, dict):
+            if 'attention_mask' in first_item:
+                return first_item['attention_mask']
+        return None
+    def _get_special_token_ids(self, module: torch.nn.Module) -> Set[int]:
+        """
+        Get special token IDs from user-provided list or from LanguageModel context.
+        Priority order:
+        1. self.special_token_ids (user-provided during initialization)
+        2. self.context.special_token_ids (from LanguageModel initialization)
+        Args:
+            module: The PyTorch module being hooked (unused, kept for API compatibility)
+        Returns:
+            Set of special token IDs, or empty set if none available
+        """
+        if self.special_token_ids is not None:
+            return self.special_token_ids
+        if self.context is not None and self.context.special_token_ids is not None:
+            return self.context.special_token_ids
+        return set()
+    def _create_combined_attention_mask(
+        self,
+        input_ids: torch.Tensor,
+        attention_mask: torch.Tensor | None,
+        module: torch.nn.Module
+    ) -> torch.Tensor:
+        """
+        Create a combined attention mask that excludes both padding and special tokens.
+        Args:
+            input_ids: Input token IDs tensor (batch_size × sequence_length)
+            attention_mask: Original attention mask from tokenizer (None if not provided)
+            module: The PyTorch module being hooked
+        Returns:
+            Binary mask tensor with same shape as input_ids (1 for regular tokens, 0 for padding/special tokens)
+        """
+        if attention_mask is None:
+            attention_mask = torch.ones_like(input_ids, dtype=torch.bool)
+        else:
+            attention_mask = attention_mask.bool()
+        special_token_ids = self._get_special_token_ids(module)
+        if special_token_ids:
+            special_ids_tensor = torch.tensor(list(special_token_ids), device=input_ids.device, dtype=input_ids.dtype)
+            expanded_input = input_ids.unsqueeze(-1)
+            expanded_special = special_ids_tensor.unsqueeze(0).unsqueeze(0)
+            is_special = (expanded_input == expanded_special).any(dim=-1)
+            attention_mask = attention_mask & ~is_special
+        return attention_mask.to(torch.bool)
+    def set_inputs_from_encodings(self, encodings: Dict[str, torch.Tensor], module: Optional[torch.nn.Module] = None) -> None:
+        """
+        Manually set inputs from encodings dictionary.
+        This is useful when the model is called with keyword arguments,
+        as PyTorch's pre_forward hook doesn't receive kwargs.
+        Args:
+            encodings: Dictionary of encoded inputs (e.g., from lm.forwards() or lm.tokenize())
+            module: Optional module for extracting special token IDs. If None, will use DummyModule.
+        Raises:
+            RuntimeError: If tensor extraction or storage fails
+        """
+        try:
+            if self.save_input_ids and 'input_ids' in encodings:
+                input_ids = encodings['input_ids']
+                self.tensor_metadata['input_ids'] = input_ids.detach().to("cpu")
+                self.metadata['input_ids_shape'] = tuple(input_ids.shape)
+            if self.save_attention_mask and 'input_ids' in encodings:
+                input_ids = encodings['input_ids']
+                if module is None:
+                    class DummyModule:
+                        pass
+                    module = DummyModule()
+                original_attention_mask = encodings.get('attention_mask')
+                combined_mask = self._create_combined_attention_mask(input_ids, original_attention_mask, module)
+                self.tensor_metadata['attention_mask'] = combined_mask.detach().to("cpu")
+                self.metadata['attention_mask_shape'] = tuple(combined_mask.shape)
+        except Exception as e:
+            raise RuntimeError(
+                f"Error setting inputs from encodings in ModelInputDetector {self.id}: {e}"
+            ) from e
+    def process_activations(
+            self,
+            module: torch.nn.Module,
+            input: HOOK_FUNCTION_INPUT,
+            output: HOOK_FUNCTION_OUTPUT
+    ) -> None:
+        """
+        Extract and store tokenized inputs.
+        Note: For HuggingFace models called with **kwargs, the input tuple may be empty.
+        In such cases, use set_inputs_from_encodings() to manually set inputs from
+        the encodings dictionary returned by lm.forwards().
+        Args:
+            module: The PyTorch module being hooked (typically the root model)
+            input: Tuple of input tensors/dicts to the module
+            output: Output from the module (None for PRE_FORWARD hooks)
+        Raises:
+            RuntimeError: If tensor extraction or storage fails
+        """
+        try:
+            if self.save_input_ids:
+                input_ids = self._extract_input_ids(input)
+                if input_ids is not None:
+                    self.tensor_metadata['input_ids'] = input_ids.detach().to("cpu")
+                    self.metadata['input_ids_shape'] = tuple(input_ids.shape)
+            if self.save_attention_mask:
+                input_ids = self._extract_input_ids(input)
+                if input_ids is not None:
+                    original_attention_mask = self._extract_attention_mask(input)
+                    combined_mask = self._create_combined_attention_mask(input_ids, original_attention_mask, module)
+                    self.tensor_metadata['attention_mask'] = combined_mask.detach().to("cpu")
+                    self.metadata['attention_mask_shape'] = tuple(combined_mask.shape)
+        except Exception as e:
+            raise RuntimeError(
+                f"Error extracting inputs in ModelInputDetector {self.id}: {e}"
+            ) from e
+    def get_captured_input_ids(self) -> torch.Tensor | None:
+        """Get the captured input_ids from the current batch."""
+        return self.tensor_metadata.get('input_ids')
+    def get_captured_attention_mask(self) -> torch.Tensor | None:
+        """Get the captured attention_mask from the current batch (excludes padding and special tokens)."""
+        return self.tensor_metadata.get('attention_mask')
+    def clear_captured(self) -> None:
+        """Clear all captured inputs for current batch."""
+        keys_to_remove = ['input_ids', 'attention_mask']
+        for key in keys_to_remove:
+            self.tensor_metadata.pop(key, None)
+            self.metadata.pop(f'{key}_shape', None)

amber/hooks/implementations/model_output_detector.py ADDED Viewed

@@ -0,0 +1,132 @@
+from __future__ import annotations
+from typing import TYPE_CHECKING
+import torch
+from amber.hooks.detector import Detector
+from amber.hooks.hook import HookType, HOOK_FUNCTION_INPUT, HOOK_FUNCTION_OUTPUT
+if TYPE_CHECKING:
+    from torch import nn
+class ModelOutputDetector(Detector):
+    """
+    Detector hook that captures and saves model outputs.
+    This detector is designed to be attached to the root model module and captures:
+    - Model outputs (logits) from the model's forward pass
+    - Hidden states (optional) from the model's forward pass
+    Uses FORWARD hook to capture outputs after they are computed.
+    Useful for saving model outputs for analysis or training.
+    """
+    def __init__(
+            self,
+            layer_signature: str | int | None = None,
+            hook_id: str | None = None,
+            save_output_logits: bool = True,
+            save_output_hidden_state: bool = False
+    ):
+        """
+        Initialize the model output detector.
+        Args:
+            layer_signature: Layer to capture from (typically the root model, can be None)
+            hook_id: Unique identifier for this hook
+            save_output_logits: Whether to save output logits (if available)
+            save_output_hidden_state: Whether to save last_hidden_state (if available)
+        """
+        super().__init__(
+            hook_type=HookType.FORWARD,
+            hook_id=hook_id,
+            store=None,
+            layer_signature=layer_signature
+        )
+        self.save_output_logits = save_output_logits
+        self.save_output_hidden_state = save_output_hidden_state
+    def _extract_output_tensor(self, output: HOOK_FUNCTION_OUTPUT) -> tuple[torch.Tensor | None, torch.Tensor | None]:
+        """
+        Extract logits and last_hidden_state from model output.
+        Args:
+            output: Output from the model forward pass
+        Returns:
+            Tuple of (logits, last_hidden_state), either can be None
+        """
+        logits = None
+        hidden_state = None
+        if output is None:
+            return None, None
+        # Handle HuggingFace output objects
+        if hasattr(output, "logits"):
+            logits = output.logits
+        if hasattr(output, "last_hidden_state"):
+            hidden_state = output.last_hidden_state
+        # Handle tuple output (logits might be first element)
+        if isinstance(output, (tuple, list)) and len(output) > 0:
+            first_item = output[0]
+            if isinstance(first_item, torch.Tensor) and logits is None:
+                logits = first_item
+        # Handle direct tensor output
+        if isinstance(output, torch.Tensor) and logits is None:
+            logits = output
+        return logits, hidden_state
+    def process_activations(
+            self,
+            module: torch.nn.Module,
+            input: HOOK_FUNCTION_INPUT,
+            output: HOOK_FUNCTION_OUTPUT
+    ) -> None:
+        """
+        Extract and store model outputs.
+        Args:
+            module: The PyTorch module being hooked (typically the root model)
+            input: Tuple of input tensors/dicts to the module
+            output: Output from the module
+        Raises:
+            RuntimeError: If tensor extraction or storage fails
+        """
+        try:
+            # Extract and save outputs
+            logits, hidden_state = self._extract_output_tensor(output)
+            if self.save_output_logits and logits is not None:
+                self.tensor_metadata['output_logits'] = logits.detach().to("cpu")
+                self.metadata['output_logits_shape'] = tuple(logits.shape)
+            if self.save_output_hidden_state and hidden_state is not None:
+                self.tensor_metadata['output_hidden_state'] = hidden_state.detach().to("cpu")
+                self.metadata['output_hidden_state_shape'] = tuple(hidden_state.shape)
+        except Exception as e:
+            raise RuntimeError(
+                f"Error extracting outputs in ModelOutputDetector {self.id}: {e}"
+            ) from e
+    def get_captured_output_logits(self) -> torch.Tensor | None:
+        """Get the captured output logits from the current batch."""
+        return self.tensor_metadata.get('output_logits')
+    def get_captured_output_hidden_state(self) -> torch.Tensor | None:
+        """Get the captured output hidden state from the current batch."""
+        return self.tensor_metadata.get('output_hidden_state')
+    def clear_captured(self) -> None:
+        """Clear all captured outputs for current batch."""
+        keys_to_remove = ['output_logits', 'output_hidden_state']
+        for key in keys_to_remove:
+            self.tensor_metadata.pop(key, None)
+            self.metadata.pop(f'{key}_shape', None)

amber/hooks/utils.py ADDED Viewed

@@ -0,0 +1,76 @@
+"""Utility functions for hook implementations."""
+from __future__ import annotations
+from typing import Any
+import torch
+from amber.hooks.hook import HOOK_FUNCTION_INPUT, HOOK_FUNCTION_OUTPUT
+def extract_tensor_from_input(input: HOOK_FUNCTION_INPUT) -> torch.Tensor | None:
+    """
+    Extract the first tensor from input sequence.
+    Handles various input formats:
+    - Direct tensor in first position
+    - Tuple/list of tensors in first position
+    - Empty or None inputs
+    Args:
+        input: Input sequence (tuple/list of tensors)
+    Returns:
+        First tensor found, or None if no tensor found
+    """
+    if not input or len(input) == 0:
+        return None
+    first_item = input[0]
+    if isinstance(first_item, torch.Tensor):
+        return first_item
+    if isinstance(first_item, (tuple, list)):
+        for item in first_item:
+            if isinstance(item, torch.Tensor):
+                return item
+    return None
+def extract_tensor_from_output(output: HOOK_FUNCTION_OUTPUT) -> torch.Tensor | None:
+    """
+    Extract tensor from output (handles various output types).
+    Handles various output formats:
+    - Plain tensors
+    - Tuples/lists of tensors (takes first tensor)
+    - Objects with last_hidden_state attribute (e.g., HuggingFace outputs)
+    - None outputs
+    Args:
+        output: Output from module (tensor, tuple, or object with attributes)
+    Returns:
+        First tensor found, or None if no tensor found
+    """
+    if output is None:
+        return None
+    if isinstance(output, torch.Tensor):
+        return output
+    if isinstance(output, (tuple, list)):
+        for item in output:
+            if isinstance(item, torch.Tensor):
+                return item
+    # Try common HuggingFace output objects
+    if hasattr(output, "last_hidden_state"):
+        maybe = getattr(output, "last_hidden_state")
+        if isinstance(maybe, torch.Tensor):
+            return maybe
+    return None

amber/language_model/__init__.py ADDED Viewed

File without changes