PyPI - archscope - Versions diffs - 0.2.6__tar.gz → 0.2.7__tar.gz - Mend

archscope 0.2.6tar.gz → 0.2.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

{archscope-0.2.6/src/archscope.egg-info → archscope-0.2.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: archscope
-Version: 0.2.6
+Version: 0.2.7
 Summary: Lightweight workbench for cross-architecture mechanistic interpretability experiments on small models
 Author: Juan Cruz Dovzak
 License: Apache-2.0

{archscope-0.2.6 → archscope-0.2.7}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "archscope"
-version = "0.2.6"
+version = "0.2.7"
 description = "Lightweight workbench for cross-architecture mechanistic interpretability experiments on small models"
 readme = "README.md"
 authors = [{name = "Juan Cruz Dovzak"}]

{archscope-0.2.6 → archscope-0.2.7}/src/archscope/__init__.py RENAMED Viewed

@@ -25,7 +25,7 @@ Quick start::
     print(result.to_markdown())
 """
-__version__ = "0.2.6"
+__version__ = "0.2.7"
 from . import probes, sae, neurons, attribute, backends, circuits, transfer, bench, lens, diff
 from .loader import load_model, make_tokenize_fn

{archscope-0.2.6 → archscope-0.2.7}/src/archscope/probes.py RENAMED Viewed

@@ -108,6 +108,40 @@ class ProbeFit:
         with torch.no_grad():
             return torch.sigmoid(self.probe(activations.to(self.device)))
+    @property
+    def direction(self) -> torch.Tensor:
+        """1D direction vector in activation space (linear probes only).
+        Shape: ``(hidden_dim,)``. This is the projection axis the probe found —
+        useful for: applying a probe to externally-transformed activations
+        (e.g., after ``archscope.transfer.learn_alignment``), inspecting feature
+        geometry, or projecting interventions along the learned direction.
+        Raises ``ValueError`` for MLP probes (no single linear direction).
+        """
+        if self.config.probe_type != "linear":
+            raise ValueError(
+                f".direction is only defined for linear probes (got "
+                f"probe_type={self.config.probe_type!r}). MLP probes don't have a "
+                "single direction in activation space."
+            )
+        return self.probe.net.weight.detach().squeeze(0).clone()
+    @property
+    def bias(self) -> torch.Tensor:
+        """Scalar bias term (linear probes only). Shape: ``()``.
+        Together with ``.direction``, lets you score arbitrary activations as
+        ``logits = acts @ direction + bias`` without going through the probe
+        module — handy for cross-arch transfer experiments.
+        """
+        if self.config.probe_type != "linear":
+            raise ValueError(
+                f".bias is only defined for linear probes (got "
+                f"probe_type={self.config.probe_type!r})."
+            )
+        return self.probe.net.bias.detach().squeeze().clone()
 def _auroc(logits: torch.Tensor, labels: torch.Tensor) -> float:
     """AUROC from logits + binary labels.

{archscope-0.2.6 → archscope-0.2.7/src/archscope.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: archscope
-Version: 0.2.6
+Version: 0.2.7
 Summary: Lightweight workbench for cross-architecture mechanistic interpretability experiments on small models
 Author: Juan Cruz Dovzak
 License: Apache-2.0

{archscope-0.2.6 → archscope-0.2.7}/tests/test_unit.py RENAMED Viewed

@@ -22,7 +22,7 @@ def test_imports():
     import archscope
     from archscope import (probes, sae, neurons, attribute, backends,    # noqa: F401
                             circuits, transfer, bench, lens, diff)        # noqa: F401
-    assert archscope.__version__ == "0.2.6"
+    assert archscope.__version__ == "0.2.7"
 def test_loader_exports():
@@ -259,6 +259,37 @@ def test_induction_head_score_small_vocab_clear_error():
     assert "vocab window" in str(ei.value).lower() or "n_pairs" in str(ei.value)
+def test_probefit_direction_and_bias_accessors():
+    """ProbeFit exposes .direction and .bias for linear probes."""
+    from archscope.probes import ProbeFit, ProbeConfig
+    torch.manual_seed(0)
+    pos = torch.randn(40, 8) + 1.5
+    neg = torch.randn(40, 8) - 1.5
+    cfg = ProbeConfig(layer_name="x", probe_type="linear")
+    pf = ProbeFit(cfg, input_dim=8)
+    pf.train(torch.cat([pos, neg]), torch.cat([torch.ones(40), torch.zeros(40)]),
+              epochs=30, batch_size=16)
+    d, b = pf.direction, pf.bias
+    assert d.shape == (8,), f"direction shape: {d.shape}"
+    assert b.dim() == 0, f"bias should be scalar: {b.shape}"
+    # Manual application matches what probe.score does (up to sigmoid).
+    test_act = torch.randn(3, 8)
+    manual_logits = test_act @ d + b
+    via_probe = pf.probe(test_act)
+    assert torch.allclose(manual_logits, via_probe, atol=1e-5), \
+        "direction @ acts + bias should equal probe(acts)"
+def test_probefit_direction_rejects_mlp():
+    """.direction raises on MLP probes."""
+    from archscope.probes import ProbeFit, ProbeConfig
+    cfg = ProbeConfig(layer_name="x", probe_type="mlp")
+    pf = ProbeFit(cfg, input_dim=8)
+    with pytest.raises(ValueError) as ei:
+        _ = pf.direction
+    assert "linear" in str(ei.value).lower()
 def test_dim_decompose_rejects_mamba_style_model():
     """dim_decompose raises on models with no attention/MLP submodules."""
     from archscope.attribute import dim_decompose