archscope 0.2.5__tar.gz → 0.2.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {archscope-0.2.5/src/archscope.egg-info → archscope-0.2.6}/PKG-INFO +25 -6
- {archscope-0.2.5 → archscope-0.2.6}/README.md +24 -5
- {archscope-0.2.5 → archscope-0.2.6}/pyproject.toml +1 -1
- {archscope-0.2.5 → archscope-0.2.6}/src/archscope/__init__.py +1 -1
- {archscope-0.2.5 → archscope-0.2.6}/src/archscope/attribute.py +20 -1
- {archscope-0.2.5 → archscope-0.2.6}/src/archscope/backends.py +4 -1
- {archscope-0.2.5 → archscope-0.2.6}/src/archscope/circuits.py +12 -2
- {archscope-0.2.5 → archscope-0.2.6}/src/archscope/lens.py +26 -4
- {archscope-0.2.5 → archscope-0.2.6/src/archscope.egg-info}/PKG-INFO +25 -6
- {archscope-0.2.5 → archscope-0.2.6}/tests/test_unit.py +35 -1
- {archscope-0.2.5 → archscope-0.2.6}/LICENSE +0 -0
- {archscope-0.2.5 → archscope-0.2.6}/setup.cfg +0 -0
- {archscope-0.2.5 → archscope-0.2.6}/src/archscope/_utils.py +0 -0
- {archscope-0.2.5 → archscope-0.2.6}/src/archscope/bench.py +0 -0
- {archscope-0.2.5 → archscope-0.2.6}/src/archscope/cli.py +0 -0
- {archscope-0.2.5 → archscope-0.2.6}/src/archscope/diff.py +0 -0
- {archscope-0.2.5 → archscope-0.2.6}/src/archscope/kazdov_backend.py +0 -0
- {archscope-0.2.5 → archscope-0.2.6}/src/archscope/loader.py +0 -0
- {archscope-0.2.5 → archscope-0.2.6}/src/archscope/neurons.py +0 -0
- {archscope-0.2.5 → archscope-0.2.6}/src/archscope/probes.py +0 -0
- {archscope-0.2.5 → archscope-0.2.6}/src/archscope/py.typed +0 -0
- {archscope-0.2.5 → archscope-0.2.6}/src/archscope/sae.py +0 -0
- {archscope-0.2.5 → archscope-0.2.6}/src/archscope/transfer.py +0 -0
- {archscope-0.2.5 → archscope-0.2.6}/src/archscope.egg-info/SOURCES.txt +0 -0
- {archscope-0.2.5 → archscope-0.2.6}/src/archscope.egg-info/dependency_links.txt +0 -0
- {archscope-0.2.5 → archscope-0.2.6}/src/archscope.egg-info/entry_points.txt +0 -0
- {archscope-0.2.5 → archscope-0.2.6}/src/archscope.egg-info/requires.txt +0 -0
- {archscope-0.2.5 → archscope-0.2.6}/src/archscope.egg-info/top_level.txt +0 -0
- {archscope-0.2.5 → archscope-0.2.6}/tests/test_circuits_3arch.py +0 -0
- {archscope-0.2.5 → archscope-0.2.6}/tests/test_diff.py +0 -0
- {archscope-0.2.5 → archscope-0.2.6}/tests/test_kazdov_integration.py +0 -0
- {archscope-0.2.5 → archscope-0.2.6}/tests/test_lens.py +0 -0
- {archscope-0.2.5 → archscope-0.2.6}/tests/test_mamba_integration.py +0 -0
- {archscope-0.2.5 → archscope-0.2.6}/tests/test_mamba_ssm_state.py +0 -0
- {archscope-0.2.5 → archscope-0.2.6}/tests/test_probe_transfer.py +0 -0
- {archscope-0.2.5 → archscope-0.2.6}/tests/test_pythia_end_to_end.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: archscope
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.6
|
|
4
4
|
Summary: Lightweight workbench for cross-architecture mechanistic interpretability experiments on small models
|
|
5
5
|
Author: Juan Cruz Dovzak
|
|
6
6
|
License: Apache-2.0
|
|
@@ -58,18 +58,17 @@ It is **not**: a competitor to `transformer_lens` or `nnsight` (both are broader
|
|
|
58
58
|
|
|
59
59
|
```python
|
|
60
60
|
import archscope as mi
|
|
61
|
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
62
|
-
|
|
63
|
-
tok = AutoTokenizer.from_pretrained("state-spaces/mamba-130m-hf")
|
|
64
|
-
model = AutoModelForCausalLM.from_pretrained("state-spaces/mamba-130m-hf")
|
|
65
61
|
|
|
66
|
-
|
|
62
|
+
# One call → HuggingFace model + tokenizer + the right backend
|
|
63
|
+
model, tok, backend = mi.load_model("state-spaces/mamba-130m-hf", arch="mamba")
|
|
67
64
|
|
|
68
65
|
# Extract Mamba's recurrent SSM state h_t (in addition to residual stream)
|
|
69
66
|
ssm = backend.extract(tok("text", return_tensors="pt"), layers=["layer_12.ssm_state"])[0]
|
|
70
67
|
# Shape: (B, intermediate_size, ssm_state_size) = (B, 1536, 16) for mamba-130m
|
|
71
68
|
```
|
|
72
69
|
|
|
70
|
+
`load_model` handles `pad_token` setup, `model.eval()`, and backend auto-detection. If you'd rather drive `transformers` yourself, every method also accepts `backend_hint=...`.
|
|
71
|
+
|
|
73
72
|
---
|
|
74
73
|
|
|
75
74
|
## What's inside
|
|
@@ -105,6 +104,26 @@ ssm = backend.extract(tok("text", return_tensors="pt"), layers=["layer_12.ssm_st
|
|
|
105
104
|
|
|
106
105
|
If `Backend.for_model(model)` is called on a model whose `config.model_type` isn't in the autodetect list, it raises a clear `ValueError` rather than silently picking a backend. Pass `hint="..."` explicitly for anything outside the list, or register a new backend via `Backend.register("name")`.
|
|
107
106
|
|
|
107
|
+
### Method × backend support
|
|
108
|
+
|
|
109
|
+
Not every method works on every architecture. The cross-product:
|
|
110
|
+
|
|
111
|
+
| Method | transformer | mamba | kazdov | recurrent |
|
|
112
|
+
|---|:---:|:---:|:---:|:---:|
|
|
113
|
+
| `probes.fit_probe` | ✅ | ✅ | ✅ | ✅ |
|
|
114
|
+
| `sae.fit_sae` (Dense / Rank-1) | ✅ | ✅ | ✅ | ✅ |
|
|
115
|
+
| `neurons.find_neurons` | ✅ | ✅ | ✅ | ✅ |
|
|
116
|
+
| `attribute.activation_patch` | ✅ | ✅ residual only | ✅ | ⚠️ subclass needed |
|
|
117
|
+
| `attribute.dim_decompose` | ✅ | ❌ no attention/MLP submods | ✅ | ❌ |
|
|
118
|
+
| `circuits.*` (behavioural) | ✅ | ✅ | ✅ | ✅ |
|
|
119
|
+
| `lens.logit_lens` | ✅ | ⚠️ degrades with depth — use `TunedLens` | ✅ | ⚠️ |
|
|
120
|
+
| `lens.TunedLens.fit` | ✅ | ✅ | ✅ | ⚠️ |
|
|
121
|
+
| `diff.compare` | ✅ | ✅ | ✅ | ✅ |
|
|
122
|
+
| `transfer.evaluate_transfer` | ✅ ↔ any | ✅ ↔ any | ✅ ↔ any | ✅ ↔ any |
|
|
123
|
+
| `bench.benchmark` | ✅ | ✅ | ✅ | partial |
|
|
124
|
+
|
|
125
|
+
❌ entries raise a clear `ValueError` rather than silently degrading.
|
|
126
|
+
|
|
108
127
|
---
|
|
109
128
|
|
|
110
129
|
## Install
|
|
@@ -21,18 +21,17 @@ It is **not**: a competitor to `transformer_lens` or `nnsight` (both are broader
|
|
|
21
21
|
|
|
22
22
|
```python
|
|
23
23
|
import archscope as mi
|
|
24
|
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
25
|
-
|
|
26
|
-
tok = AutoTokenizer.from_pretrained("state-spaces/mamba-130m-hf")
|
|
27
|
-
model = AutoModelForCausalLM.from_pretrained("state-spaces/mamba-130m-hf")
|
|
28
24
|
|
|
29
|
-
|
|
25
|
+
# One call → HuggingFace model + tokenizer + the right backend
|
|
26
|
+
model, tok, backend = mi.load_model("state-spaces/mamba-130m-hf", arch="mamba")
|
|
30
27
|
|
|
31
28
|
# Extract Mamba's recurrent SSM state h_t (in addition to residual stream)
|
|
32
29
|
ssm = backend.extract(tok("text", return_tensors="pt"), layers=["layer_12.ssm_state"])[0]
|
|
33
30
|
# Shape: (B, intermediate_size, ssm_state_size) = (B, 1536, 16) for mamba-130m
|
|
34
31
|
```
|
|
35
32
|
|
|
33
|
+
`load_model` handles `pad_token` setup, `model.eval()`, and backend auto-detection. If you'd rather drive `transformers` yourself, every method also accepts `backend_hint=...`.
|
|
34
|
+
|
|
36
35
|
---
|
|
37
36
|
|
|
38
37
|
## What's inside
|
|
@@ -68,6 +67,26 @@ ssm = backend.extract(tok("text", return_tensors="pt"), layers=["layer_12.ssm_st
|
|
|
68
67
|
|
|
69
68
|
If `Backend.for_model(model)` is called on a model whose `config.model_type` isn't in the autodetect list, it raises a clear `ValueError` rather than silently picking a backend. Pass `hint="..."` explicitly for anything outside the list, or register a new backend via `Backend.register("name")`.
|
|
70
69
|
|
|
70
|
+
### Method × backend support
|
|
71
|
+
|
|
72
|
+
Not every method works on every architecture. The cross-product:
|
|
73
|
+
|
|
74
|
+
| Method | transformer | mamba | kazdov | recurrent |
|
|
75
|
+
|---|:---:|:---:|:---:|:---:|
|
|
76
|
+
| `probes.fit_probe` | ✅ | ✅ | ✅ | ✅ |
|
|
77
|
+
| `sae.fit_sae` (Dense / Rank-1) | ✅ | ✅ | ✅ | ✅ |
|
|
78
|
+
| `neurons.find_neurons` | ✅ | ✅ | ✅ | ✅ |
|
|
79
|
+
| `attribute.activation_patch` | ✅ | ✅ residual only | ✅ | ⚠️ subclass needed |
|
|
80
|
+
| `attribute.dim_decompose` | ✅ | ❌ no attention/MLP submods | ✅ | ❌ |
|
|
81
|
+
| `circuits.*` (behavioural) | ✅ | ✅ | ✅ | ✅ |
|
|
82
|
+
| `lens.logit_lens` | ✅ | ⚠️ degrades with depth — use `TunedLens` | ✅ | ⚠️ |
|
|
83
|
+
| `lens.TunedLens.fit` | ✅ | ✅ | ✅ | ⚠️ |
|
|
84
|
+
| `diff.compare` | ✅ | ✅ | ✅ | ✅ |
|
|
85
|
+
| `transfer.evaluate_transfer` | ✅ ↔ any | ✅ ↔ any | ✅ ↔ any | ✅ ↔ any |
|
|
86
|
+
| `bench.benchmark` | ✅ | ✅ | ✅ | partial |
|
|
87
|
+
|
|
88
|
+
❌ entries raise a clear `ValueError` rather than silently degrading.
|
|
89
|
+
|
|
71
90
|
---
|
|
72
91
|
|
|
73
92
|
## Install
|
|
@@ -95,7 +95,9 @@ def activation_patch(
|
|
|
95
95
|
module = resolve_layer_module(model, f"layer_{idx}.residual")
|
|
96
96
|
if module is None:
|
|
97
97
|
continue
|
|
98
|
-
|
|
98
|
+
# detach+clone for the same reason dim_decompose does: avoid aliasing
|
|
99
|
+
# a tensor that could be overwritten when the patched forward runs.
|
|
100
|
+
src_h = src_rec.activations.detach().clone()
|
|
99
101
|
|
|
100
102
|
def hook(mod, inp, out, replacement=src_h):
|
|
101
103
|
if isinstance(out, tuple):
|
|
@@ -155,6 +157,23 @@ def dim_decompose(
|
|
|
155
157
|
metric_b = metric_fn(out_b)
|
|
156
158
|
total_gap = metric_a - metric_b
|
|
157
159
|
|
|
160
|
+
# Sanity check: at least one component must be resolvable for at least one
|
|
161
|
+
# requested layer. Architectures without attention/MLP submodules (Mamba,
|
|
162
|
+
# pure SSMs, custom recurrent blocks) would otherwise silently return an
|
|
163
|
+
# empty DIMResult.
|
|
164
|
+
resolvable = any(
|
|
165
|
+
resolve_subcomponent_module(model, idx, comp) is not None
|
|
166
|
+
for idx in layer_indices for comp in components
|
|
167
|
+
)
|
|
168
|
+
if not resolvable:
|
|
169
|
+
raise ValueError(
|
|
170
|
+
f"dim_decompose: none of components={components} were found on this "
|
|
171
|
+
f"model (type {type(model).__name__}). This method expects "
|
|
172
|
+
"attention/MLP submodules — it's transformer-style only. For "
|
|
173
|
+
"SSM/recurrent architectures, use activation_patch on the residual "
|
|
174
|
+
"stream instead."
|
|
175
|
+
)
|
|
176
|
+
|
|
158
177
|
contributions: dict[str, float] = {}
|
|
159
178
|
for comp in components:
|
|
160
179
|
# 1) Capture component outputs during prompt_a.
|
|
@@ -135,7 +135,10 @@ class TransformerBackend(Backend):
|
|
|
135
135
|
"""HuggingFace transformers backend — extracts residual stream per layer."""
|
|
136
136
|
|
|
137
137
|
def layer_names(self) -> list[str]:
|
|
138
|
-
#
|
|
138
|
+
# Layer names are virtual handles consumed by .extract(), which uses
|
|
139
|
+
# HF's `output_hidden_states=True` to retrieve the residual stream
|
|
140
|
+
# (no direct attribute walk into model.model.layers[i] needed —
|
|
141
|
+
# so this works across HF decoder LM families).
|
|
139
142
|
n_layers = getattr(self.model.config, "num_hidden_layers", 0)
|
|
140
143
|
return [f"layer_{i}.residual" for i in range(n_layers)]
|
|
141
144
|
|
|
@@ -74,12 +74,22 @@ def induction_head_score(
|
|
|
74
74
|
else:
|
|
75
75
|
vocab_size = 50257 # GPT-2 default
|
|
76
76
|
|
|
77
|
+
# Adaptive vocab window — defaults to [100, 40000) for full-size LMs but
|
|
78
|
+
# tightens for small-vocab toy models so we don't sample outside the range.
|
|
79
|
+
lo = min(100, max(1, vocab_size // 4))
|
|
80
|
+
hi = min(vocab_size, 40000)
|
|
81
|
+
if hi - lo < 2 * n_pairs:
|
|
82
|
+
raise ValueError(
|
|
83
|
+
f"induction_head_score: vocab window [{lo}, {hi}) has only "
|
|
84
|
+
f"{hi - lo} tokens but n_pairs={n_pairs} requires {2 * n_pairs} distinct ids. "
|
|
85
|
+
f"Lower n_pairs or pass a model with vocab_size >= {2 * n_pairs + 100}."
|
|
86
|
+
)
|
|
87
|
+
|
|
77
88
|
successes = 0
|
|
78
89
|
rank_sum = 0.0
|
|
79
90
|
prob_target_sum = 0.0
|
|
80
91
|
for trial in range(n_trials):
|
|
81
|
-
|
|
82
|
-
tokens = rng.sample(range(100, min(vocab_size, 40000)), 2 * n_pairs)
|
|
92
|
+
tokens = rng.sample(range(lo, hi), 2 * n_pairs)
|
|
83
93
|
seq = []
|
|
84
94
|
pairs = []
|
|
85
95
|
for i in range(n_pairs):
|
|
@@ -218,14 +218,36 @@ class TunedLens(nn.Module):
|
|
|
218
218
|
|
|
219
219
|
opt = torch.optim.AdamW(tl.translators.parameters(), lr=lr)
|
|
220
220
|
|
|
221
|
-
# Pre-extract all activations + target logits once
|
|
221
|
+
# Pre-extract all activations + target logits once.
|
|
222
|
+
# Ensure tokenizer has a pad token (GPT-2 family ships without one).
|
|
223
|
+
if getattr(tokenizer, "pad_token", None) is None and getattr(tokenizer, "eos_token", None) is not None:
|
|
224
|
+
tokenizer.pad_token = tokenizer.eos_token
|
|
225
|
+
|
|
222
226
|
enc = tokenizer(calibration_texts, return_tensors="pt", padding=True,
|
|
223
227
|
truncation=True, max_length=max_len)
|
|
224
228
|
inputs = {"input_ids": enc["input_ids"].to(device)}
|
|
229
|
+
if "attention_mask" in enc:
|
|
230
|
+
inputs["attention_mask"] = enc["attention_mask"].to(device)
|
|
231
|
+
|
|
232
|
+
# Per-row index of the last REAL (non-pad) token. If no attention_mask
|
|
233
|
+
# (single, unpadded sequence), the conventional last-position is fine.
|
|
234
|
+
if "attention_mask" in enc:
|
|
235
|
+
real_lengths = enc["attention_mask"].sum(dim=1).to(device) # (B,)
|
|
236
|
+
last_idx = (real_lengths - 1).clamp(min=0)
|
|
237
|
+
else:
|
|
238
|
+
B = inputs["input_ids"].shape[0]
|
|
239
|
+
last_idx = torch.full((B,), inputs["input_ids"].shape[1] - 1,
|
|
240
|
+
dtype=torch.long, device=device)
|
|
241
|
+
|
|
242
|
+
def gather_last(acts: torch.Tensor) -> torch.Tensor:
|
|
243
|
+
# acts: (B, T, H) → (B, H) at each row's real last position.
|
|
244
|
+
B = acts.shape[0]
|
|
245
|
+
return acts[torch.arange(B, device=acts.device), last_idx]
|
|
246
|
+
|
|
225
247
|
with torch.no_grad():
|
|
226
248
|
records = backend.extract(inputs, layers=layer_names)
|
|
227
|
-
# Target: model's actual final logits at last position
|
|
228
|
-
final_residual = records[-1].activations
|
|
249
|
+
# Target: model's actual final logits at last REAL position per row.
|
|
250
|
+
final_residual = gather_last(records[-1].activations)
|
|
229
251
|
if norm is not None:
|
|
230
252
|
final_residual = norm(final_residual)
|
|
231
253
|
target_logits = unembed(final_residual).detach() # (B, vocab)
|
|
@@ -235,7 +257,7 @@ class TunedLens(nn.Module):
|
|
|
235
257
|
opt.zero_grad()
|
|
236
258
|
total_loss = 0.0
|
|
237
259
|
for i, rec in enumerate(records):
|
|
238
|
-
last = rec.activations
|
|
260
|
+
last = gather_last(rec.activations).detach()
|
|
239
261
|
translated = tl.translators[i](last)
|
|
240
262
|
if norm is not None:
|
|
241
263
|
translated = norm(translated)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: archscope
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.6
|
|
4
4
|
Summary: Lightweight workbench for cross-architecture mechanistic interpretability experiments on small models
|
|
5
5
|
Author: Juan Cruz Dovzak
|
|
6
6
|
License: Apache-2.0
|
|
@@ -58,18 +58,17 @@ It is **not**: a competitor to `transformer_lens` or `nnsight` (both are broader
|
|
|
58
58
|
|
|
59
59
|
```python
|
|
60
60
|
import archscope as mi
|
|
61
|
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
62
|
-
|
|
63
|
-
tok = AutoTokenizer.from_pretrained("state-spaces/mamba-130m-hf")
|
|
64
|
-
model = AutoModelForCausalLM.from_pretrained("state-spaces/mamba-130m-hf")
|
|
65
61
|
|
|
66
|
-
|
|
62
|
+
# One call → HuggingFace model + tokenizer + the right backend
|
|
63
|
+
model, tok, backend = mi.load_model("state-spaces/mamba-130m-hf", arch="mamba")
|
|
67
64
|
|
|
68
65
|
# Extract Mamba's recurrent SSM state h_t (in addition to residual stream)
|
|
69
66
|
ssm = backend.extract(tok("text", return_tensors="pt"), layers=["layer_12.ssm_state"])[0]
|
|
70
67
|
# Shape: (B, intermediate_size, ssm_state_size) = (B, 1536, 16) for mamba-130m
|
|
71
68
|
```
|
|
72
69
|
|
|
70
|
+
`load_model` handles `pad_token` setup, `model.eval()`, and backend auto-detection. If you'd rather drive `transformers` yourself, every method also accepts `backend_hint=...`.
|
|
71
|
+
|
|
73
72
|
---
|
|
74
73
|
|
|
75
74
|
## What's inside
|
|
@@ -105,6 +104,26 @@ ssm = backend.extract(tok("text", return_tensors="pt"), layers=["layer_12.ssm_st
|
|
|
105
104
|
|
|
106
105
|
If `Backend.for_model(model)` is called on a model whose `config.model_type` isn't in the autodetect list, it raises a clear `ValueError` rather than silently picking a backend. Pass `hint="..."` explicitly for anything outside the list, or register a new backend via `Backend.register("name")`.
|
|
107
106
|
|
|
107
|
+
### Method × backend support
|
|
108
|
+
|
|
109
|
+
Not every method works on every architecture. The cross-product:
|
|
110
|
+
|
|
111
|
+
| Method | transformer | mamba | kazdov | recurrent |
|
|
112
|
+
|---|:---:|:---:|:---:|:---:|
|
|
113
|
+
| `probes.fit_probe` | ✅ | ✅ | ✅ | ✅ |
|
|
114
|
+
| `sae.fit_sae` (Dense / Rank-1) | ✅ | ✅ | ✅ | ✅ |
|
|
115
|
+
| `neurons.find_neurons` | ✅ | ✅ | ✅ | ✅ |
|
|
116
|
+
| `attribute.activation_patch` | ✅ | ✅ residual only | ✅ | ⚠️ subclass needed |
|
|
117
|
+
| `attribute.dim_decompose` | ✅ | ❌ no attention/MLP submods | ✅ | ❌ |
|
|
118
|
+
| `circuits.*` (behavioural) | ✅ | ✅ | ✅ | ✅ |
|
|
119
|
+
| `lens.logit_lens` | ✅ | ⚠️ degrades with depth — use `TunedLens` | ✅ | ⚠️ |
|
|
120
|
+
| `lens.TunedLens.fit` | ✅ | ✅ | ✅ | ⚠️ |
|
|
121
|
+
| `diff.compare` | ✅ | ✅ | ✅ | ✅ |
|
|
122
|
+
| `transfer.evaluate_transfer` | ✅ ↔ any | ✅ ↔ any | ✅ ↔ any | ✅ ↔ any |
|
|
123
|
+
| `bench.benchmark` | ✅ | ✅ | ✅ | partial |
|
|
124
|
+
|
|
125
|
+
❌ entries raise a clear `ValueError` rather than silently degrading.
|
|
126
|
+
|
|
108
127
|
---
|
|
109
128
|
|
|
110
129
|
## Install
|
|
@@ -22,7 +22,7 @@ def test_imports():
|
|
|
22
22
|
import archscope
|
|
23
23
|
from archscope import (probes, sae, neurons, attribute, backends, # noqa: F401
|
|
24
24
|
circuits, transfer, bench, lens, diff) # noqa: F401
|
|
25
|
-
assert archscope.__version__ == "0.2.
|
|
25
|
+
assert archscope.__version__ == "0.2.6"
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
def test_loader_exports():
|
|
@@ -244,6 +244,40 @@ def test_neurons_layer_filter_rejects_nonmatching():
|
|
|
244
244
|
assert cfg.layer_filter == "not_a_substring"
|
|
245
245
|
|
|
246
246
|
|
|
247
|
+
def test_induction_head_score_small_vocab_clear_error():
|
|
248
|
+
"""induction_head_score raises a clear error when vocab is too small."""
|
|
249
|
+
from archscope.circuits import induction_head_score
|
|
250
|
+
|
|
251
|
+
class _TinyModel:
|
|
252
|
+
class config:
|
|
253
|
+
vocab_size = 40 # << 2*n_pairs + 100
|
|
254
|
+
def __call__(self, ids):
|
|
255
|
+
return torch.zeros(1, ids.shape[1], 40)
|
|
256
|
+
|
|
257
|
+
with pytest.raises(ValueError) as ei:
|
|
258
|
+
induction_head_score(_TinyModel(), n_pairs=20, n_trials=1)
|
|
259
|
+
assert "vocab window" in str(ei.value).lower() or "n_pairs" in str(ei.value)
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def test_dim_decompose_rejects_mamba_style_model():
|
|
263
|
+
"""dim_decompose raises on models with no attention/MLP submodules."""
|
|
264
|
+
from archscope.attribute import dim_decompose
|
|
265
|
+
|
|
266
|
+
class _NoSubmods(torch.nn.Module):
|
|
267
|
+
def forward(self, **kwargs):
|
|
268
|
+
class Out:
|
|
269
|
+
logits = torch.zeros(1, 3, 8)
|
|
270
|
+
return Out()
|
|
271
|
+
|
|
272
|
+
with pytest.raises(ValueError) as ei:
|
|
273
|
+
dim_decompose(_NoSubmods(),
|
|
274
|
+
prompt_a={"input_ids": torch.tensor([[1, 2, 3]])},
|
|
275
|
+
prompt_b={"input_ids": torch.tensor([[4, 5, 6]])},
|
|
276
|
+
layer_indices=[0, 1],
|
|
277
|
+
metric_fn=lambda o: 0.0)
|
|
278
|
+
assert "attention" in str(ei.value).lower() or "submod" in str(ei.value).lower()
|
|
279
|
+
|
|
280
|
+
|
|
247
281
|
if __name__ == "__main__":
|
|
248
282
|
# Allow `python tests/test_unit.py` for quick local check
|
|
249
283
|
pytest.main([__file__, "-v"])
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|