archscope 0.2.6__tar.gz → 0.2.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {archscope-0.2.6/src/archscope.egg-info → archscope-0.2.7}/PKG-INFO +1 -1
  2. {archscope-0.2.6 → archscope-0.2.7}/pyproject.toml +1 -1
  3. {archscope-0.2.6 → archscope-0.2.7}/src/archscope/__init__.py +1 -1
  4. {archscope-0.2.6 → archscope-0.2.7}/src/archscope/probes.py +34 -0
  5. {archscope-0.2.6 → archscope-0.2.7/src/archscope.egg-info}/PKG-INFO +1 -1
  6. {archscope-0.2.6 → archscope-0.2.7}/tests/test_unit.py +32 -1
  7. {archscope-0.2.6 → archscope-0.2.7}/LICENSE +0 -0
  8. {archscope-0.2.6 → archscope-0.2.7}/README.md +0 -0
  9. {archscope-0.2.6 → archscope-0.2.7}/setup.cfg +0 -0
  10. {archscope-0.2.6 → archscope-0.2.7}/src/archscope/_utils.py +0 -0
  11. {archscope-0.2.6 → archscope-0.2.7}/src/archscope/attribute.py +0 -0
  12. {archscope-0.2.6 → archscope-0.2.7}/src/archscope/backends.py +0 -0
  13. {archscope-0.2.6 → archscope-0.2.7}/src/archscope/bench.py +0 -0
  14. {archscope-0.2.6 → archscope-0.2.7}/src/archscope/circuits.py +0 -0
  15. {archscope-0.2.6 → archscope-0.2.7}/src/archscope/cli.py +0 -0
  16. {archscope-0.2.6 → archscope-0.2.7}/src/archscope/diff.py +0 -0
  17. {archscope-0.2.6 → archscope-0.2.7}/src/archscope/kazdov_backend.py +0 -0
  18. {archscope-0.2.6 → archscope-0.2.7}/src/archscope/lens.py +0 -0
  19. {archscope-0.2.6 → archscope-0.2.7}/src/archscope/loader.py +0 -0
  20. {archscope-0.2.6 → archscope-0.2.7}/src/archscope/neurons.py +0 -0
  21. {archscope-0.2.6 → archscope-0.2.7}/src/archscope/py.typed +0 -0
  22. {archscope-0.2.6 → archscope-0.2.7}/src/archscope/sae.py +0 -0
  23. {archscope-0.2.6 → archscope-0.2.7}/src/archscope/transfer.py +0 -0
  24. {archscope-0.2.6 → archscope-0.2.7}/src/archscope.egg-info/SOURCES.txt +0 -0
  25. {archscope-0.2.6 → archscope-0.2.7}/src/archscope.egg-info/dependency_links.txt +0 -0
  26. {archscope-0.2.6 → archscope-0.2.7}/src/archscope.egg-info/entry_points.txt +0 -0
  27. {archscope-0.2.6 → archscope-0.2.7}/src/archscope.egg-info/requires.txt +0 -0
  28. {archscope-0.2.6 → archscope-0.2.7}/src/archscope.egg-info/top_level.txt +0 -0
  29. {archscope-0.2.6 → archscope-0.2.7}/tests/test_circuits_3arch.py +0 -0
  30. {archscope-0.2.6 → archscope-0.2.7}/tests/test_diff.py +0 -0
  31. {archscope-0.2.6 → archscope-0.2.7}/tests/test_kazdov_integration.py +0 -0
  32. {archscope-0.2.6 → archscope-0.2.7}/tests/test_lens.py +0 -0
  33. {archscope-0.2.6 → archscope-0.2.7}/tests/test_mamba_integration.py +0 -0
  34. {archscope-0.2.6 → archscope-0.2.7}/tests/test_mamba_ssm_state.py +0 -0
  35. {archscope-0.2.6 → archscope-0.2.7}/tests/test_probe_transfer.py +0 -0
  36. {archscope-0.2.6 → archscope-0.2.7}/tests/test_pythia_end_to_end.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: archscope
3
- Version: 0.2.6
3
+ Version: 0.2.7
4
4
  Summary: Lightweight workbench for cross-architecture mechanistic interpretability experiments on small models
5
5
  Author: Juan Cruz Dovzak
6
6
  License: Apache-2.0
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "archscope"
3
- version = "0.2.6"
3
+ version = "0.2.7"
4
4
  description = "Lightweight workbench for cross-architecture mechanistic interpretability experiments on small models"
5
5
  readme = "README.md"
6
6
  authors = [{name = "Juan Cruz Dovzak"}]
@@ -25,7 +25,7 @@ Quick start::
25
25
  print(result.to_markdown())
26
26
  """
27
27
 
28
- __version__ = "0.2.6"
28
+ __version__ = "0.2.7"
29
29
 
30
30
  from . import probes, sae, neurons, attribute, backends, circuits, transfer, bench, lens, diff
31
31
  from .loader import load_model, make_tokenize_fn
@@ -108,6 +108,40 @@ class ProbeFit:
108
108
  with torch.no_grad():
109
109
  return torch.sigmoid(self.probe(activations.to(self.device)))
110
110
 
111
+ @property
112
+ def direction(self) -> torch.Tensor:
113
+ """1D direction vector in activation space (linear probes only).
114
+
115
+ Shape: ``(hidden_dim,)``. This is the projection axis the probe found —
116
+ useful for: applying a probe to externally-transformed activations
117
+ (e.g., after ``archscope.transfer.learn_alignment``), inspecting feature
118
+ geometry, or projecting interventions along the learned direction.
119
+
120
+ Raises ``ValueError`` for MLP probes (no single linear direction).
121
+ """
122
+ if self.config.probe_type != "linear":
123
+ raise ValueError(
124
+ f".direction is only defined for linear probes (got "
125
+ f"probe_type={self.config.probe_type!r}). MLP probes don't have a "
126
+ "single direction in activation space."
127
+ )
128
+ return self.probe.net.weight.detach().squeeze(0).clone()
129
+
130
+ @property
131
+ def bias(self) -> torch.Tensor:
132
+ """Scalar bias term (linear probes only). Shape: ``()``.
133
+
134
+ Together with ``.direction``, lets you score arbitrary activations as
135
+ ``logits = acts @ direction + bias`` without going through the probe
136
+ module — handy for cross-arch transfer experiments.
137
+ """
138
+ if self.config.probe_type != "linear":
139
+ raise ValueError(
140
+ f".bias is only defined for linear probes (got "
141
+ f"probe_type={self.config.probe_type!r})."
142
+ )
143
+ return self.probe.net.bias.detach().squeeze().clone()
144
+
111
145
 
112
146
  def _auroc(logits: torch.Tensor, labels: torch.Tensor) -> float:
113
147
  """AUROC from logits + binary labels.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: archscope
3
- Version: 0.2.6
3
+ Version: 0.2.7
4
4
  Summary: Lightweight workbench for cross-architecture mechanistic interpretability experiments on small models
5
5
  Author: Juan Cruz Dovzak
6
6
  License: Apache-2.0
@@ -22,7 +22,7 @@ def test_imports():
22
22
  import archscope
23
23
  from archscope import (probes, sae, neurons, attribute, backends, # noqa: F401
24
24
  circuits, transfer, bench, lens, diff) # noqa: F401
25
- assert archscope.__version__ == "0.2.6"
25
+ assert archscope.__version__ == "0.2.7"
26
26
 
27
27
 
28
28
  def test_loader_exports():
@@ -259,6 +259,37 @@ def test_induction_head_score_small_vocab_clear_error():
259
259
  assert "vocab window" in str(ei.value).lower() or "n_pairs" in str(ei.value)
260
260
 
261
261
 
262
+ def test_probefit_direction_and_bias_accessors():
263
+ """ProbeFit exposes .direction and .bias for linear probes."""
264
+ from archscope.probes import ProbeFit, ProbeConfig
265
+ torch.manual_seed(0)
266
+ pos = torch.randn(40, 8) + 1.5
267
+ neg = torch.randn(40, 8) - 1.5
268
+ cfg = ProbeConfig(layer_name="x", probe_type="linear")
269
+ pf = ProbeFit(cfg, input_dim=8)
270
+ pf.train(torch.cat([pos, neg]), torch.cat([torch.ones(40), torch.zeros(40)]),
271
+ epochs=30, batch_size=16)
272
+ d, b = pf.direction, pf.bias
273
+ assert d.shape == (8,), f"direction shape: {d.shape}"
274
+ assert b.dim() == 0, f"bias should be scalar: {b.shape}"
275
+ # Manual application matches what probe.score does (up to sigmoid).
276
+ test_act = torch.randn(3, 8)
277
+ manual_logits = test_act @ d + b
278
+ via_probe = pf.probe(test_act)
279
+ assert torch.allclose(manual_logits, via_probe, atol=1e-5), \
280
+ "direction @ acts + bias should equal probe(acts)"
281
+
282
+
283
+ def test_probefit_direction_rejects_mlp():
284
+ """.direction raises on MLP probes."""
285
+ from archscope.probes import ProbeFit, ProbeConfig
286
+ cfg = ProbeConfig(layer_name="x", probe_type="mlp")
287
+ pf = ProbeFit(cfg, input_dim=8)
288
+ with pytest.raises(ValueError) as ei:
289
+ _ = pf.direction
290
+ assert "linear" in str(ei.value).lower()
291
+
292
+
262
293
  def test_dim_decompose_rejects_mamba_style_model():
263
294
  """dim_decompose raises on models with no attention/MLP submodules."""
264
295
  from archscope.attribute import dim_decompose
File without changes
File without changes
File without changes
File without changes
File without changes