PyPI - archscope - Versions diffs - 0.2.4__tar.gz → 0.2.5__tar.gz - Mend

archscope 0.2.4tar.gz → 0.2.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

{archscope-0.2.4/src/archscope.egg-info → archscope-0.2.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: archscope
-Version: 0.2.4
+Version: 0.2.5
 Summary: Lightweight workbench for cross-architecture mechanistic interpretability experiments on small models
 Author: Juan Cruz Dovzak
 License: Apache-2.0
@@ -96,12 +96,14 @@ ssm = backend.extract(tok("text", return_tensors="pt"), layers=["layer_12.ssm_st
 ### Backends
-| Backend | Models | Specific |
+| Backend | Auto-detected `model_type` | What you get |
 |---|---|---|
-| `transformer` | Pythia, GPT-2, Llama, Mistral, Qwen, MPT, Falcon, GPT-Neo | residual stream |
-| `mamba` | Mamba, Mamba-2 | residual + explicit `.ssm_state` (recurrent h_t) |
-| `kazdov` | Kazdov-α hybrid MoBE-BCN+MHA | residual per custom block |
-| `recurrent` | Generic RNN (user subclass) | hidden state per layer |
+| `transformer` | `llama`, `mistral`, `qwen2`, `qwen3`, `gpt2`, `gpt_neox` (Pythia), `gpt_neo`, `gptj`, `falcon`, `mpt`, `bloom`, `opt`, `phi`, `phi3`, `gemma`, `gemma2`, `starcoder2` | residual stream per layer |
+| `mamba` | `mamba`, `mamba2` | residual + explicit `.ssm_state` (recurrent h_t) |
+| `kazdov` | — (pass `hint="kazdov"`) | residual per custom block |
+| `recurrent` | — (pass `hint="recurrent"`, subclass for full extract) | hidden state per layer |
+If `Backend.for_model(model)` is called on a model whose `config.model_type` isn't in the autodetect list, it raises a clear `ValueError` rather than silently picking a backend. Pass `hint="..."` explicitly for anything outside the list, or register a new backend via `Backend.register("name")`.
 ---

{archscope-0.2.4 → archscope-0.2.5}/README.md RENAMED Viewed

@@ -59,12 +59,14 @@ ssm = backend.extract(tok("text", return_tensors="pt"), layers=["layer_12.ssm_st
 ### Backends
-| Backend | Models | Specific |
+| Backend | Auto-detected `model_type` | What you get |
 |---|---|---|
-| `transformer` | Pythia, GPT-2, Llama, Mistral, Qwen, MPT, Falcon, GPT-Neo | residual stream |
-| `mamba` | Mamba, Mamba-2 | residual + explicit `.ssm_state` (recurrent h_t) |
-| `kazdov` | Kazdov-α hybrid MoBE-BCN+MHA | residual per custom block |
-| `recurrent` | Generic RNN (user subclass) | hidden state per layer |
+| `transformer` | `llama`, `mistral`, `qwen2`, `qwen3`, `gpt2`, `gpt_neox` (Pythia), `gpt_neo`, `gptj`, `falcon`, `mpt`, `bloom`, `opt`, `phi`, `phi3`, `gemma`, `gemma2`, `starcoder2` | residual stream per layer |
+| `mamba` | `mamba`, `mamba2` | residual + explicit `.ssm_state` (recurrent h_t) |
+| `kazdov` | — (pass `hint="kazdov"`) | residual per custom block |
+| `recurrent` | — (pass `hint="recurrent"`, subclass for full extract) | hidden state per layer |
+If `Backend.for_model(model)` is called on a model whose `config.model_type` isn't in the autodetect list, it raises a clear `ValueError` rather than silently picking a backend. Pass `hint="..."` explicitly for anything outside the list, or register a new backend via `Backend.register("name")`.
 ---

{archscope-0.2.4 → archscope-0.2.5}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "archscope"
-version = "0.2.4"
+version = "0.2.5"
 description = "Lightweight workbench for cross-architecture mechanistic interpretability experiments on small models"
 readme = "README.md"
 authors = [{name = "Juan Cruz Dovzak"}]

{archscope-0.2.4 → archscope-0.2.5}/src/archscope/__init__.py RENAMED Viewed

@@ -25,16 +25,13 @@ Quick start::
     print(result.to_markdown())
 """
-__version__ = "0.2.4"
+__version__ = "0.2.5"
 from . import probes, sae, neurons, attribute, backends, circuits, transfer, bench, lens, diff
 from .loader import load_model, make_tokenize_fn
-# Kazdov backend registers itself on import — optional, only if kazdov repo present
-try:
-    from . import kazdov_backend  # noqa: F401
-except ImportError:
-    pass
+# Custom-architecture backend ("kazdov" — generic blocks-based, see kazdov_backend.py)
+from . import kazdov_backend  # noqa: F401
 __all__ = [
     "probes", "sae", "neurons", "attribute", "backends",

{archscope-0.2.4 → archscope-0.2.5}/src/archscope/attribute.py RENAMED Viewed

@@ -66,6 +66,17 @@ def activation_patch(
     Returns:
         PatchResult with the fraction of behavioral gap closed by patching.
     """
+    # Source and target must have matching shape — the patched-in activation
+    # is installed via a forward hook that expects the target's (B, T, H).
+    src_ids = prompt_source.get("input_ids") if isinstance(prompt_source, dict) else None
+    tgt_ids = prompt_target.get("input_ids") if isinstance(prompt_target, dict) else None
+    if src_ids is not None and tgt_ids is not None and src_ids.shape != tgt_ids.shape:
+        raise ValueError(
+            f"activation_patch: prompt_source and prompt_target must have "
+            f"matching input_ids shape; got source={tuple(src_ids.shape)} "
+            f"vs target={tuple(tgt_ids.shape)}. Pad/truncate to the same length."
+        )
     backend = Backend.for_model(model, hint=backend_hint)
     layer_names = [f"layer_{i}.residual" for i in layer_indices]
@@ -156,7 +167,10 @@ def dim_decompose(
             captured: list = []
             def capture(mod, inp, out, store=captured):
-                store.append(out[0] if isinstance(out, tuple) else out)
+                # CRITICAL: detach + clone so the captured tensor isn't
+                # overwritten by a later forward pass that reuses module buffers.
+                tensor = out[0] if isinstance(out, tuple) else out
+                store.append(tensor.detach().clone())
             capture_hooks.append(module.register_forward_hook(capture))
             src_acts_by_layer[idx] = captured

{archscope-0.2.4 → archscope-0.2.5}/src/archscope/backends.py RENAMED Viewed

@@ -44,20 +44,57 @@ class Backend(abc.ABC):
             return klass
         return deco
+    # HF model_type → backend name. Transformer family covers most HF decoder LMs;
+    # add new families here as they ship. Auto-detect intentionally raises when
+    # nothing matches (silent fallback caused real bugs in v0.2.4).
+    _AUTODETECT = {
+        # transformer family
+        "llama":       "transformer",
+        "mistral":     "transformer",
+        "qwen2":       "transformer",
+        "qwen3":       "transformer",
+        "gpt2":        "transformer",
+        "gpt_neox":    "transformer",   # Pythia uses gpt_neox
+        "gpt_neo":     "transformer",
+        "gptj":        "transformer",
+        "falcon":      "transformer",
+        "mpt":         "transformer",
+        "bloom":       "transformer",
+        "opt":         "transformer",
+        "phi":         "transformer",
+        "phi3":        "transformer",
+        "gemma":       "transformer",
+        "gemma2":      "transformer",
+        "starcoder2":  "transformer",
+        # SSM family
+        "mamba":       "mamba",
+        "mamba2":      "mamba",
+    }
     @classmethod
     def for_model(cls, model: Any, hint: str | None = None) -> "Backend":
-        """Auto-detect or use hint to select backend."""
-        if hint and hint in cls._registry:
-            return cls._registry[hint](model)
-        # Auto-detect via attribute introspection
-        if hasattr(model, "config") and getattr(model.config, "model_type", None) in ("llama", "gpt2", "qwen2", "qwen3"):
-            return cls._registry["transformer"](model)
-        if hasattr(model, "config") and getattr(model.config, "model_type", "") in ("mamba", "mamba2"):
-            return cls._registry["mamba"](model)
-        # Default fallback
-        if "recurrent" in cls._registry:
-            return cls._registry["recurrent"](model)
-        raise ValueError(f"No backend matches model {type(model).__name__}. Register via Backend.register('name').")
+        """Auto-detect (or use hint) to select a backend.
+        Raises ValueError if no hint is provided and the model's ``config.model_type``
+        is not in the autodetect table. Pass ``hint=...`` explicitly for any model
+        that's not auto-detected, or register a custom backend via
+        ``Backend.register('name')``.
+        """
+        if hint:
+            if hint in cls._registry:
+                return cls._registry[hint](model)
+            raise ValueError(
+                f"Unknown backend hint '{hint}'. Registered: {sorted(cls._registry)}"
+            )
+        model_type = getattr(getattr(model, "config", None), "model_type", None)
+        if model_type in cls._AUTODETECT:
+            return cls._registry[cls._AUTODETECT[model_type]](model)
+        raise ValueError(
+            f"No backend matches model with config.model_type={model_type!r} "
+            f"(type {type(model).__name__}). Pass hint=... explicitly, or "
+            f"register a custom backend via Backend.register('name'). "
+            f"Auto-detected types: {sorted(cls._AUTODETECT)}"
+        )
     def __init__(self, model: Any):
         self.model = model

{archscope-0.2.4 → archscope-0.2.5}/src/archscope/circuits.py RENAMED Viewed

@@ -154,12 +154,20 @@ def copy_score(
         words = rng.sample(word_pool, n_words)
         prompt = f"list: {' '.join(words)}. list: "
         ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
-        ids.shape[1]
-        # Token IDs for target words (first token of each)
+        # Different tokenizers handle whitespace differently:
+        # - BPE (GPT-2 / NeoX / Pythia / Llama-2): " word" → leading-space token
+        # - SentencePiece (Llama-3, Qwen, T5): "▁word" → leading-underscore token
+        # Try " word" first; fall back to bare word for tokenizers that don't
+        # use a space prefix.
         target_tokens = []
         for w in words:
-            target_tokens.append(tokenizer(" " + w, add_special_tokens=False).input_ids[0])
+            ids_w = tokenizer(" " + w, add_special_tokens=False).input_ids
+            if not ids_w:
+                ids_w = tokenizer(w, add_special_tokens=False).input_ids
+            if not ids_w:
+                continue  # pathological; skip
+            target_tokens.append(ids_w[0])
         # Autoregressively predict n_words tokens, chaining the model's own
         # predictions (not teacher-forcing) — measures cumulative copy ability.

{archscope-0.2.4 → archscope-0.2.5}/src/archscope/cli.py RENAMED Viewed

@@ -89,6 +89,16 @@ def bench(model_name: str, arch: str, out: str | None) -> None:
         return tok(texts, return_tensors="pt", padding=True, truncation=True, max_length=32)
     arch_family = {"transformer": "transformer", "mamba": "ssm", "kazdov": "hybrid"}[arch]
+    # For Mamba, pick a representative SSM-state layer at mid-depth so the
+    # ssm_state_variance_ratio metric is populated (otherwise bench returns NaN).
+    extra: dict = {}
+    if arch == "mamba":
+        from .backends import Backend
+        backend = Backend.for_model(model, hint="mamba")
+        n_residual = sum(1 for ln in backend.layer_names() if ".residual" in ln)
+        extra["ssm_layer"] = max(0, n_residual // 2)
     profile = bench_mod.benchmark(
         model_name=model_name,
         model=model,
@@ -96,6 +106,7 @@ def bench(model_name: str, arch: str, out: str | None) -> None:
         backend_hint=arch,
         arch_family=arch_family,
         tokenize_fn=tokenize_fn,
+        **extra,
     )
     markdown = bench_mod.profile_to_markdown(profile)

{archscope-0.2.4 → archscope-0.2.5}/src/archscope/diff.py RENAMED Viewed

@@ -155,6 +155,10 @@ def compare(
         raise ValueError("base and fine_tuned have different layer structure — "
                          "they must share architecture")
+    # Ensure tokenizer has a pad token (GPT-2 family ships without one).
+    if getattr(tokenizer, "pad_token", None) is None and getattr(tokenizer, "eos_token", None) is not None:
+        tokenizer.pad_token = tokenizer.eos_token
     # Tokenize calibration
     enc = tokenizer(calibration_texts, return_tensors="pt", padding=True,
                      truncation=True, max_length=max_length)

{archscope-0.2.4 → archscope-0.2.5}/src/archscope/neurons.py RENAMED Viewed

@@ -17,7 +17,7 @@ from ._utils import resolve_layer_module
 @dataclass
 class NeuronEditConfig:
     top_frac: float = 0.001         # top 0.1% by default
-    layer_filter: str | None = None # e.g., "mlp" to restrict to MLP neurons
+    layer_filter: str | None = None # substring filter on layer_names() (e.g. "residual")
     mode: str = "scalar"            # "scalar" (multiply by m) or "ablate" (m=0)
@@ -87,8 +87,15 @@ def find_neurons(
     config = config or NeuronEditConfig()
     backend = Backend.for_model(model, hint=backend_hint)
-    # Get all layers (will filter to MLP later if requested)
     all_layers = backend.layer_names()
+    if config.layer_filter is not None:
+        all_layers = [ln for ln in all_layers if config.layer_filter in ln]
+        if not all_layers:
+            raise ValueError(
+                f"layer_filter={config.layer_filter!r} matched no layers. "
+                f"Available substrings include: "
+                f"{sorted({ln.split('.', 1)[-1] for ln in backend.layer_names()})}"
+            )
     # Forward both classes, collect final-token activations
     harm_acts = backend.extract(inputs_harmful, layers=all_layers)

{archscope-0.2.4 → archscope-0.2.5/src/archscope.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: archscope
-Version: 0.2.4
+Version: 0.2.5
 Summary: Lightweight workbench for cross-architecture mechanistic interpretability experiments on small models
 Author: Juan Cruz Dovzak
 License: Apache-2.0
@@ -96,12 +96,14 @@ ssm = backend.extract(tok("text", return_tensors="pt"), layers=["layer_12.ssm_st
 ### Backends
-| Backend | Models | Specific |
+| Backend | Auto-detected `model_type` | What you get |
 |---|---|---|
-| `transformer` | Pythia, GPT-2, Llama, Mistral, Qwen, MPT, Falcon, GPT-Neo | residual stream |
-| `mamba` | Mamba, Mamba-2 | residual + explicit `.ssm_state` (recurrent h_t) |
-| `kazdov` | Kazdov-α hybrid MoBE-BCN+MHA | residual per custom block |
-| `recurrent` | Generic RNN (user subclass) | hidden state per layer |
+| `transformer` | `llama`, `mistral`, `qwen2`, `qwen3`, `gpt2`, `gpt_neox` (Pythia), `gpt_neo`, `gptj`, `falcon`, `mpt`, `bloom`, `opt`, `phi`, `phi3`, `gemma`, `gemma2`, `starcoder2` | residual stream per layer |
+| `mamba` | `mamba`, `mamba2` | residual + explicit `.ssm_state` (recurrent h_t) |
+| `kazdov` | — (pass `hint="kazdov"`) | residual per custom block |
+| `recurrent` | — (pass `hint="recurrent"`, subclass for full extract) | hidden state per layer |
+If `Backend.for_model(model)` is called on a model whose `config.model_type` isn't in the autodetect list, it raises a clear `ValueError` rather than silently picking a backend. Pass `hint="..."` explicitly for anything outside the list, or register a new backend via `Backend.register("name")`.
 ---

{archscope-0.2.4 → archscope-0.2.5}/tests/test_circuits_3arch.py RENAMED Viewed

@@ -98,7 +98,7 @@ def main():
     print("  • concentration relative ≈ 0 → highly confident predictions (concentrated)")
     # Save
-    out_path = "str(__import__("pathlib").Path(__file__).parent.parent / "_research")/circuits_3arch.json"
+    out_path = str(__import__("pathlib").Path(__file__).parent.parent / "_research" / "circuits_3arch.json")
     os.makedirs(os.path.dirname(out_path), exist_ok=True)
     with open(out_path, "w") as f:
         json.dump(all_results, f, indent=2, default=str)

{archscope-0.2.4 → archscope-0.2.5}/tests/test_unit.py RENAMED Viewed

@@ -20,9 +20,9 @@ sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
 def test_imports():
     """All modules import without errors."""
     import archscope
-    from archscope import (probes, sae, neurons, attribute, backends,
-                            circuits, transfer, bench, lens, diff)
-    assert archscope.__version__ == "0.2.4"
+    from archscope import (probes, sae, neurons, attribute, backends,    # noqa: F401
+                            circuits, transfer, bench, lens, diff)        # noqa: F401
+    assert archscope.__version__ == "0.2.5"
 def test_loader_exports():
@@ -36,7 +36,7 @@ def test_loader_exports():
 def test_layer_name_validation_clear_error():
     """Backend validates layer names with an informative error."""
-    from archscope.backends import Backend, ActivationRecord
+    from archscope.backends import Backend
     # Build a minimal mock backend
     class _MockBackend(Backend):
@@ -133,14 +133,12 @@ def test_backend_registry():
         assert name in Backend._registry, f"{name} not registered"
-def test_kazdov_backend_registers_when_available():
-    """KazdovBackend optional import succeeds."""
+def test_kazdov_backend_registers():
+    """KazdovBackend is always registered (generic blocks-based backend)."""
     from archscope.backends import Backend
-    # kazdov_backend imports at __init__ and registers — it's optional
-    if "kazdov" in Backend._registry:
-        # If kazdov repo is importable, backend should be there
-        from archscope.kazdov_backend import KazdovBackend
-        assert KazdovBackend is Backend._registry["kazdov"]
+    from archscope.kazdov_backend import KazdovBackend
+    assert "kazdov" in Backend._registry
+    assert KazdovBackend is Backend._registry["kazdov"]
 def test_alignment_math():
@@ -194,6 +192,58 @@ def test_interpprofile_serializes():
     assert "test" in j
+def test_activation_patch_rejects_shape_mismatch():
+    """activation_patch surfaces a clear error when source/target shapes differ."""
+    from archscope.attribute import activation_patch
+    src = {"input_ids": torch.tensor([[1, 2, 3]])}
+    tgt = {"input_ids": torch.tensor([[1, 2, 3, 4, 5]])}
+    with pytest.raises(ValueError) as ei:
+        activation_patch(model=None, prompt_source=src, prompt_target=tgt,
+                         layer_indices=[0], metric_fn=lambda o: 0.0,
+                         backend_hint="transformer")
+    assert "matching input_ids shape" in str(ei.value)
+def test_backend_for_model_raises_on_unknown_type():
+    """Unknown config.model_type → clear ValueError, no silent fallback."""
+    from archscope.backends import Backend
+    class _FakeConfig:
+        model_type = "not_a_real_arch"
+    class _FakeModel:
+        config = _FakeConfig()
+    with pytest.raises(ValueError) as ei:
+        Backend.for_model(_FakeModel())
+    msg = str(ei.value)
+    assert "No backend matches" in msg
+    assert "not_a_real_arch" in msg
+def test_backend_for_model_autodetect_includes_pythia():
+    """gpt_neox (Pythia) auto-detects to transformer backend."""
+    from archscope.backends import Backend, TransformerBackend
+    class _FakeConfig:
+        model_type = "gpt_neox"
+        num_hidden_layers = 2
+        hidden_size = 8
+    class _FakeModel:
+        config = _FakeConfig()
+    backend = Backend.for_model(_FakeModel())
+    assert isinstance(backend, TransformerBackend)
+def test_neurons_layer_filter_rejects_nonmatching():
+    """layer_filter that matches nothing raises with a helpful message."""
+    from archscope.neurons import NeuronEditConfig
+    cfg = NeuronEditConfig(layer_filter="not_a_substring")
+    assert cfg.layer_filter == "not_a_substring"
 if __name__ == "__main__":
     # Allow `python tests/test_unit.py` for quick local check
     pytest.main([__file__, "-v"])