PyPI - representation-geometry - Versions diffs - 0.1.0__py3-none-any.whl - Mend

representation-geometry 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

representation_geometry/__init__.py +12 -0
representation_geometry/api.py +366 -0
representation_geometry/diagnostics.py +76 -0
representation_geometry/hooks.py +215 -0
representation_geometry/metrics.py +216 -0
representation_geometry/moe.py +57 -0
representation_geometry/online.py +65 -0
representation_geometry/results.py +87 -0
representation_geometry/smoke.py +77 -0
representation_geometry-0.1.0.dist-info/METADATA +417 -0
representation_geometry-0.1.0.dist-info/RECORD +15 -0
representation_geometry-0.1.0.dist-info/WHEEL +5 -0
representation_geometry-0.1.0.dist-info/entry_points.txt +2 -0
representation_geometry-0.1.0.dist-info/licenses/LICENSE +21 -0
representation_geometry-0.1.0.dist-info/top_level.txt +1 -0

representation_geometry/__init__.py ADDED Viewed

@@ -0,0 +1,12 @@
+from representation_geometry.api import analyze_model
+from representation_geometry.metrics import metric_from_eigenvalues, subspace_novelty
+from representation_geometry.online import RunningCovariance
+from representation_geometry.results import AnalysisResults
+__all__ = [
+    "AnalysisResults",
+    "RunningCovariance",
+    "analyze_model",
+    "metric_from_eigenvalues",
+    "subspace_novelty",
+]

representation_geometry/api.py ADDED Viewed

@@ -0,0 +1,366 @@
+from __future__ import annotations
+import importlib.metadata
+import platform
+import time
+from collections.abc import Iterable, Mapping, Sequence
+from typing import Any
+import pandas as pd
+import torch
+from representation_geometry.diagnostics import DIAGNOSTIC_REGISTRY, available_diagnostics
+from representation_geometry.hooks import (
+    ResidualStatsStore,
+    get_input_device,
+    move_to_device,
+    register_activation_hooks,
+    resolve_hook_targets,
+)
+from representation_geometry.metrics import compute_metrics_from_stats
+from representation_geometry.moe import MoERouterStats
+from representation_geometry.results import AnalysisResults
+ARTIFACT_SCHEMA_VERSION = "0.1"
+def analyze_model(
+    model_name: str | torch.nn.Module | None = None,
+    dataloader: Iterable[Any] | None = None,
+    *,
+    model: torch.nn.Module | None = None,
+    max_tokens: int | None = 100_000,
+    metrics: str | Sequence[str] | None = "default",
+    diagnostics: Sequence[str] | None = None,
+    layers: str | Sequence[int] = "all",
+    hook_point: str = "residual_input",
+    save_mode: str | None = None,
+    save_dir: str | None = None,
+    sample_limit: int = 512,
+    novelty_k: int = 32,
+    device: str | torch.device | None = None,
+    model_kwargs: Mapping[str, Any] | None = None,
+    trust_remote_code: bool = False,
+    output_router_logits: bool | str = "auto",
+    run_name: str | None = None,
+    tokenizer_name: str | None = None,
+    dataset_name: str | None = None,
+    run_metadata: Mapping[str, Any] | None = None,
+) -> AnalysisResults:
+    """Analyze residual-stream covariance geometry for a transformer model.
+    Parameters
+    ----------
+    model_name:
+        Either a HuggingFace model id or an already loaded ``torch.nn.Module``.
+        Passing the model object here is supported to match the rough API used
+        in the paper workflow.
+    dataloader:
+        Iterable of batches. Dict batches with ``input_ids`` are preferred.
+    model:
+        Explicit loaded model. If supplied, it takes precedence over
+        ``model_name``.
+    max_tokens:
+        Maximum token budget. Full batches are processed until this budget is
+        reached; the recorded metadata reports the actual observed tokens.
+    metrics:
+        ``"default"``, ``None``, or ``[]`` computes the default spectral metrics.
+        The list form is reserved for future metric subsets.
+    diagnostics:
+        Optional diagnostics. Currently supports ``"normalization_ablation"``.
+    layers:
+        ``"all"`` or a sequence of integer block indices for residual hooks.
+    hook_point:
+        ``"residual_input"``, ``"residual_output"``, ``"module_input:<path>"``,
+        or ``"module_output:<path>"``.
+    save_mode:
+        ``None``, ``"json"``, ``"csv"``, or ``"bundle"``.
+    save_dir:
+        Output directory for saved artifacts.
+    """
+    if dataloader is None:
+        raise ValueError("dataloader is required; pass tokenized batches for the model.")
+    metric_names = _normalize_metrics(metrics)
+    diagnostic_names = set(diagnostics or [])
+    unknown_diagnostics = diagnostic_names - set(available_diagnostics())
+    if unknown_diagnostics:
+        raise ValueError(f"Unknown diagnostics: {sorted(unknown_diagnostics)}")
+    model_obj, model_id = _resolve_model(model_name, model, model_kwargs, trust_remote_code)
+    model_obj.eval()
+    input_device = torch.device(device) if device is not None else get_input_device(model_obj)
+    if device is not None and not _has_device_map(model_obj):
+        model_obj.to(input_device)
+    hook_targets, hook_metadata = resolve_hook_targets(
+        model_obj,
+        hook_point=hook_point,
+        layers=layers,
+    )
+    store = ResidualStatsStore(sample_limit=sample_limit)
+    handles = register_activation_hooks(hook_targets, store, hook_point=hook_point)
+    router_stats = _make_router_stats(model_obj, len(hook_targets), output_router_logits)
+    ask_router_logits = router_stats is not None and output_router_logits is not False
+    started = time.time()
+    batches_seen = 0
+    tokens_seen = 0
+    try:
+        with torch.no_grad():
+            for batch in dataloader:
+                if max_tokens is not None and tokens_seen >= max_tokens:
+                    break
+                batch_tokens = _count_tokens(batch)
+                moved_batch = move_to_device(batch, input_device)
+                outputs = _call_model(
+                    model_obj,
+                    moved_batch,
+                    output_router_logits=ask_router_logits,
+                )
+                if router_stats is not None:
+                    router_stats.update_from_outputs(outputs)
+                batches_seen += 1
+                tokens_seen += batch_tokens
+    finally:
+        for handle in handles:
+            handle.remove()
+    metrics_df, novelty_df, eigenvalues = compute_metrics_from_stats(
+        store.stats,
+        model_key=_safe_model_key(model_id),
+        model_label=model_id,
+        novelty_k=novelty_k,
+        include_spectrum_metrics="spectrum" in metric_names or "default" in metric_names,
+        include_sample_metrics="sample" in metric_names or "default" in metric_names,
+        include_novelty_metrics="novelty" in metric_names or "default" in metric_names,
+    )
+    router_df = router_stats.to_frame() if router_stats is not None else pd.DataFrame()
+    diagnostic_tables: dict[str, pd.DataFrame] = {}
+    for diagnostic_name in sorted(diagnostic_names):
+        diagnostic_tables[diagnostic_name] = DIAGNOSTIC_REGISTRY[diagnostic_name](
+            store.stats,
+            model_key=_safe_model_key(model_id),
+            model_label=model_id,
+        )
+    runtime_sec = time.time() - started
+    metadata = {
+        "artifact_schema_version": ARTIFACT_SCHEMA_VERSION,
+        "model_id": model_id,
+        "model_class": type(model_obj).__name__,
+        "max_tokens": max_tokens,
+        "tokens_observed": int(tokens_seen),
+        "batches_observed": int(batches_seen),
+        "num_blocks": hook_metadata.get("num_blocks"),
+        "layers": hook_metadata.get("layers"),
+        "hook_point": hook_point,
+        "hook_targets": hook_metadata.get("hook_targets", []),
+        "sample_limit": sample_limit,
+        "novelty_k": novelty_k,
+        "metrics": sorted(metric_names),
+        "diagnostics": sorted(diagnostic_names),
+        "input_device": str(input_device),
+        "runtime_sec": runtime_sec,
+        "save_mode": save_mode,
+        "run_name": run_name,
+        "tokenizer": tokenizer_name,
+        "dataset": dataset_name,
+        "model_config": _model_config_summary(model_obj),
+        "software": _software_versions(),
+    }
+    if run_metadata:
+        metadata["run_metadata"] = dict(run_metadata)
+    results = AnalysisResults(
+        metrics=metrics_df,
+        novelty=novelty_df,
+        router=router_df,
+        diagnostics=diagnostic_tables,
+        eigenvalues=eigenvalues,
+        metadata=metadata,
+    )
+    if save_mode is not None:
+        results.save(save_dir=save_dir, save_mode=save_mode)
+    return results
+def _normalize_metrics(metrics: str | Sequence[str] | None) -> set[str]:
+    if metrics is None or metrics == "default" or metrics == []:
+        return {"default", "spectrum", "novelty", "sample"}
+    if isinstance(metrics, str):
+        return {metrics}
+    metric_names = set(metrics)
+    unknown = metric_names - {"default", "spectrum", "novelty", "sample", "router"}
+    if unknown:
+        raise ValueError(f"Unknown metrics: {sorted(unknown)}")
+    return metric_names or {"default", "spectrum", "novelty", "sample"}
+def _resolve_model(
+    model_name: str | torch.nn.Module | None,
+    model: torch.nn.Module | None,
+    model_kwargs: Mapping[str, Any] | None,
+    trust_remote_code: bool,
+) -> tuple[torch.nn.Module, str]:
+    if model is not None:
+        return model, str(model_name or _infer_model_id(model))
+    if isinstance(model_name, torch.nn.Module):
+        return model_name, _infer_model_id(model_name)
+    if isinstance(model_name, str):
+        try:
+            from transformers import AutoModelForCausalLM
+        except ImportError as exc:
+            raise ImportError(
+                "Install HuggingFace support with: python -m pip install -e '.[hf]'"
+            ) from exc
+        kwargs = {"trust_remote_code": trust_remote_code}
+        kwargs.update(dict(model_kwargs or {}))
+        loaded = AutoModelForCausalLM.from_pretrained(model_name, **kwargs)
+        return loaded, model_name
+    raise ValueError(
+        "Pass either model_name=<hf id>, model_name=<model object>, or model=<model object>."
+    )
+def _has_device_map(model: torch.nn.Module) -> bool:
+    return hasattr(model, "hf_device_map") and bool(model.hf_device_map)
+def _make_router_stats(
+    model: torch.nn.Module,
+    num_layers: int,
+    output_router_logits: bool | str,
+) -> MoERouterStats | None:
+    if output_router_logits is False:
+        return None
+    config = getattr(model, "config", None)
+    if config is None:
+        return None
+    num_experts = getattr(config, "num_local_experts", getattr(config, "num_experts", None))
+    if num_experts is None:
+        return None
+    top_k = int(getattr(config, "num_experts_per_tok", 2))
+    return MoERouterStats(num_layers=num_layers, num_experts=int(num_experts), top_k=top_k)
+def _call_model(model: torch.nn.Module, batch: Any, *, output_router_logits: bool) -> Any:
+    extra = {"use_cache": False}
+    if output_router_logits:
+        extra["output_router_logits"] = True
+    if isinstance(batch, Mapping):
+        try:
+            return model(**batch, **extra)
+        except TypeError:
+            try:
+                return model(**batch, use_cache=False)
+            except TypeError:
+                return model(**batch)
+    if isinstance(batch, tuple):
+        try:
+            return model(*batch, **extra)
+        except TypeError:
+            return model(*batch)
+    if isinstance(batch, list):
+        try:
+            return model(*batch, **extra)
+        except TypeError:
+            return model(*batch)
+    try:
+        return model(batch, **extra)
+    except TypeError:
+        return model(batch)
+def _count_tokens(batch: Any) -> int:
+    if isinstance(batch, Mapping):
+        input_ids = batch.get("input_ids")
+        if isinstance(input_ids, torch.Tensor):
+            return int(input_ids.numel())
+        for value in batch.values():
+            if isinstance(value, torch.Tensor):
+                return int(value.shape[0])
+        return 0
+    if isinstance(batch, torch.Tensor):
+        return int(batch.numel()) if batch.ndim <= 2 else int(batch.shape[0] * batch.shape[1])
+    if isinstance(batch, (tuple, list)):
+        for value in batch:
+            if isinstance(value, torch.Tensor):
+                if value.ndim <= 2:
+                    return int(value.numel())
+                return int(value.shape[0] * value.shape[1])
+    return 0
+def _safe_model_key(model_id: str) -> str:
+    return (
+        model_id.replace("/", "_")
+        .replace("\\", "_")
+        .replace(" ", "_")
+        .replace(":", "_")
+        .lower()
+    )
+def _infer_model_id(model: torch.nn.Module) -> str:
+    name_or_path = getattr(model, "name_or_path", None)
+    if name_or_path:
+        return str(name_or_path)
+    config = getattr(model, "config", None)
+    config_name = getattr(config, "_name_or_path", None)
+    if config_name:
+        return str(config_name)
+    return type(model).__name__
+def _model_config_summary(model: torch.nn.Module) -> dict[str, Any]:
+    config = getattr(model, "config", None)
+    if config is None:
+        return {}
+    fields = [
+        "model_type",
+        "architectures",
+        "hidden_size",
+        "n_embd",
+        "num_hidden_layers",
+        "n_layer",
+        "num_attention_heads",
+        "n_head",
+        "intermediate_size",
+        "vocab_size",
+    ]
+    summary: dict[str, Any] = {}
+    for field in fields:
+        value = getattr(config, field, None)
+        if value is not None:
+            summary[field] = value
+    return summary
+def _software_versions() -> dict[str, str | None]:
+    return {
+        "python": platform.python_version(),
+        "torch": torch.__version__,
+        "pandas": pd.__version__,
+        "representation_geometry": _distribution_version("representation-geometry"),
+        "transformers": _distribution_version("transformers"),
+    }
+def _distribution_version(package: str) -> str | None:
+    try:
+        return importlib.metadata.version(package)
+    except importlib.metadata.PackageNotFoundError:
+        return None

representation_geometry/diagnostics.py ADDED Viewed

@@ -0,0 +1,76 @@
+from __future__ import annotations
+from typing import Any
+import pandas as pd
+import torch
+import torch.nn.functional as F
+from representation_geometry.metrics import metric_from_eigenvalues
+from representation_geometry.online import RunningCovariance
+DiagnosticFn = Any
+def transform_activation(x: torch.Tensor, variant: str) -> torch.Tensor:
+    x = x.float()
+    if variant == "raw":
+        return x
+    if variant == "token_l2":
+        return F.normalize(x, dim=-1)
+    if variant == "token_rms":
+        rms = x.pow(2).mean(dim=-1, keepdim=True).sqrt().clamp(min=1e-12)
+        return x / rms
+    if variant == "feature_standardized":
+        return (x - x.mean(dim=0, keepdim=True)) / x.std(dim=0, keepdim=True).clamp(min=1e-12)
+    raise ValueError(f"Unknown activation transform: {variant}")
+def spectrum_metrics_from_activations(x: torch.Tensor) -> dict[str, Any]:
+    if x.shape[0] < 2:
+        return {}
+    x = x.float()
+    x = x - x.mean(dim=0, keepdim=True)
+    singular_values = torch.linalg.svdvals(x)
+    eigen_like = (singular_values**2).clamp_min(0).cpu().numpy()
+    return metric_from_eigenvalues(eigen_like)
+def normalization_ablation_metrics(
+    stats: dict[int, RunningCovariance],
+    *,
+    model_key: str,
+    model_label: str,
+    variants: tuple[str, ...] = ("raw", "token_l2", "token_rms", "feature_standardized"),
+) -> pd.DataFrame:
+    rows: list[dict[str, Any]] = []
+    for layer_idx, stat in sorted(stats.items()):
+        sample = stat.sample()
+        if sample.numel() == 0:
+            continue
+        for variant in variants:
+            transformed = transform_activation(sample, variant)
+            row = spectrum_metrics_from_activations(transformed)
+            if not row:
+                continue
+            row.update(
+                {
+                    "model_key": model_key,
+                    "model_label": model_label,
+                    "layer": layer_idx,
+                    "variant": variant,
+                    "tokens": int(sample.shape[0]),
+                    "hidden_dim": int(sample.shape[1]),
+                }
+            )
+            rows.append(row)
+    return pd.DataFrame(rows)
+DIAGNOSTIC_REGISTRY: dict[str, DiagnosticFn] = {
+    "normalization_ablation": normalization_ablation_metrics,
+}
+def available_diagnostics() -> tuple[str, ...]:
+    return tuple(sorted(DIAGNOSTIC_REGISTRY))

representation_geometry/hooks.py ADDED Viewed

@@ -0,0 +1,215 @@
+from __future__ import annotations
+from collections.abc import Mapping, Sequence
+from dataclasses import dataclass
+from typing import Any
+import torch
+from representation_geometry.online import RunningCovariance
+class ResidualStatsStore:
+    """Lazy per-layer streaming covariance store."""
+    def __init__(self, *, sample_limit: int = 512, dtype: torch.dtype = torch.float32):
+        self.sample_limit = sample_limit
+        self.dtype = dtype
+        self.stats: dict[int, RunningCovariance] = {}
+    @torch.no_grad()
+    def update(self, layer_idx: int, value: Any) -> None:
+        tensor = first_tensor(value)
+        if tensor is None or tensor.ndim < 2:
+            return
+        dim = int(tensor.shape[-1])
+        stat = self.stats.get(layer_idx)
+        if stat is None:
+            stat = RunningCovariance(dim=dim, dtype=self.dtype, sample_limit=self.sample_limit)
+            self.stats[layer_idx] = stat
+        stat.update(tensor)
+@dataclass(frozen=True)
+class HookTarget:
+    layer: int
+    module: torch.nn.Module
+    name: str
+def resolve_hook_targets(
+    model: torch.nn.Module,
+    *,
+    hook_point: str,
+    layers: str | Sequence[int],
+) -> tuple[list[HookTarget], dict[str, Any]]:
+    """Resolve a public hook-point string into concrete PyTorch modules."""
+    if hook_point in {"residual_input", "residual_output"}:
+        blocks = get_transformer_blocks(model)
+        layer_ids = parse_layers(layers, len(blocks))
+        targets = [
+            HookTarget(layer=layer_idx, module=blocks[layer_idx], name=f"block_{layer_idx}")
+            for layer_idx in layer_ids
+        ]
+        return targets, {
+            "num_blocks": len(blocks),
+            "layers": layer_ids,
+            "hook_targets": [target.name for target in targets],
+        }
+    module_prefixes = {
+        "module_input:": "input",
+        "module_output:": "output",
+    }
+    for prefix, capture in module_prefixes.items():
+        if hook_point.startswith(prefix):
+            if layers != "all":
+                raise ValueError(
+                    "layers is only supported with residual_input and residual_output hooks."
+                )
+            module_name = hook_point.removeprefix(prefix)
+            if not module_name:
+                raise ValueError(f"Expected a module path after {prefix!r}.")
+            modules = dict(model.named_modules())
+            if module_name not in modules:
+                available = ", ".join(name for name in modules if name) or "<root only>"
+                raise ValueError(
+                    f"Unknown module path {module_name!r}. "
+                    f"Available modules include: {available}"
+                )
+            target = HookTarget(layer=0, module=modules[module_name], name=module_name)
+            return [target], {
+                "num_blocks": None,
+                "layers": [0],
+                "hook_targets": [module_name],
+                "module_capture": capture,
+            }
+    raise ValueError(
+        "hook_point must be one of 'residual_input', 'residual_output', "
+        "'module_input:<module_path>', or 'module_output:<module_path>'."
+    )
+def register_activation_hooks(
+    targets: Sequence[HookTarget],
+    store: ResidualStatsStore,
+    *,
+    hook_point: str,
+) -> list[torch.utils.hooks.RemovableHandle]:
+    capture_input = hook_point == "residual_input" or hook_point.startswith("module_input:")
+    handles: list[torch.utils.hooks.RemovableHandle] = []
+    for target in targets:
+        if capture_input:
+            handles.append(
+                target.module.register_forward_pre_hook(_make_input_hook(target.layer, store))
+            )
+        else:
+            handles.append(
+                target.module.register_forward_hook(_make_output_hook(target.layer, store))
+            )
+    return handles
+def register_residual_input_hooks(
+    blocks: Sequence[torch.nn.Module],
+    layers: Sequence[int],
+    store: ResidualStatsStore,
+) -> list[torch.utils.hooks.RemovableHandle]:
+    """Backward-compatible wrapper for the original residual-input collector."""
+    targets = [
+        HookTarget(layer=idx, module=block, name=f"block_{idx}")
+        for idx, block in enumerate(blocks)
+        if idx in set(layers)
+    ]
+    return register_activation_hooks(targets, store, hook_point="residual_input")
+def _make_input_hook(layer_idx: int, store: ResidualStatsStore):
+    def hook(module: torch.nn.Module, inputs: tuple[Any, ...]) -> None:
+        if inputs:
+            store.update(layer_idx, inputs[0])
+    return hook
+def _make_output_hook(layer_idx: int, store: ResidualStatsStore):
+    def hook(module: torch.nn.Module, inputs: tuple[Any, ...], output: Any) -> None:
+        store.update(layer_idx, output)
+    return hook
+def get_transformer_blocks(model: torch.nn.Module) -> list[torch.nn.Module]:
+    """Locate common transformer block containers."""
+    candidates = [
+        ("transformer", "h"),
+        ("model", "layers"),
+        ("gpt_neox", "layers"),
+        ("decoder", "layers"),
+    ]
+    for parent_name, child_name in candidates:
+        parent = getattr(model, parent_name, None)
+        if parent is not None and hasattr(parent, child_name):
+            return list(getattr(parent, child_name))
+    for attr in ["layers", "blocks", "h"]:
+        value = getattr(model, attr, None)
+        if value is not None:
+            return list(value)
+    raise ValueError(
+        f"Could not locate transformer blocks for {type(model).__name__}. "
+        "Pass a model with .transformer.h, .model.layers, .gpt_neox.layers, .layers, or .blocks."
+    )
+def get_input_device(model: torch.nn.Module) -> torch.device:
+    for parameter in model.parameters():
+        if parameter.device.type != "meta":
+            return parameter.device
+    return torch.device("cuda" if torch.cuda.is_available() else "cpu")
+def parse_layers(layers: str | Sequence[int], num_blocks: int) -> list[int]:
+    if layers == "all":
+        return list(range(num_blocks))
+    if isinstance(layers, str):
+        return [int(part.strip()) for part in layers.split(",") if part.strip()]
+    out = [int(layer) for layer in layers]
+    invalid = [layer for layer in out if layer < 0 or layer >= num_blocks]
+    if invalid:
+        raise ValueError(f"Layer indices out of range for {num_blocks} blocks: {invalid}")
+    return out
+def first_tensor(value: Any) -> torch.Tensor | None:
+    if isinstance(value, torch.Tensor):
+        return value
+    if isinstance(value, Mapping):
+        for item in value.values():
+            tensor = first_tensor(item)
+            if tensor is not None:
+                return tensor
+    if isinstance(value, (tuple, list)):
+        for item in value:
+            tensor = first_tensor(item)
+            if tensor is not None:
+                return tensor
+    return None
+def move_to_device(value: Any, device: torch.device) -> Any:
+    if isinstance(value, torch.Tensor):
+        return value.to(device)
+    if isinstance(value, Mapping):
+        return {key: move_to_device(item, device) for key, item in value.items()}
+    if isinstance(value, tuple):
+        return tuple(move_to_device(item, device) for item in value)
+    if isinstance(value, list):
+        return [move_to_device(item, device) for item in value]
+    return value