archscope 0.2.3__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {archscope-0.2.3/src/archscope.egg-info → archscope-0.2.4}/PKG-INFO +1 -1
- {archscope-0.2.3 → archscope-0.2.4}/pyproject.toml +1 -1
- {archscope-0.2.3 → archscope-0.2.4}/src/archscope/__init__.py +1 -1
- archscope-0.2.4/src/archscope/kazdov_backend.py +99 -0
- {archscope-0.2.3 → archscope-0.2.4/src/archscope.egg-info}/PKG-INFO +1 -1
- {archscope-0.2.3 → archscope-0.2.4}/tests/test_circuits_3arch.py +5 -4
- {archscope-0.2.3 → archscope-0.2.4}/tests/test_diff.py +1 -1
- {archscope-0.2.3 → archscope-0.2.4}/tests/test_kazdov_integration.py +4 -3
- {archscope-0.2.3 → archscope-0.2.4}/tests/test_lens.py +1 -1
- {archscope-0.2.3 → archscope-0.2.4}/tests/test_mamba_integration.py +1 -1
- {archscope-0.2.3 → archscope-0.2.4}/tests/test_mamba_ssm_state.py +1 -1
- {archscope-0.2.3 → archscope-0.2.4}/tests/test_probe_transfer.py +4 -3
- {archscope-0.2.3 → archscope-0.2.4}/tests/test_pythia_end_to_end.py +1 -1
- {archscope-0.2.3 → archscope-0.2.4}/tests/test_unit.py +1 -1
- archscope-0.2.3/src/archscope/kazdov_backend.py +0 -142
- {archscope-0.2.3 → archscope-0.2.4}/LICENSE +0 -0
- {archscope-0.2.3 → archscope-0.2.4}/README.md +0 -0
- {archscope-0.2.3 → archscope-0.2.4}/setup.cfg +0 -0
- {archscope-0.2.3 → archscope-0.2.4}/src/archscope/_utils.py +0 -0
- {archscope-0.2.3 → archscope-0.2.4}/src/archscope/attribute.py +0 -0
- {archscope-0.2.3 → archscope-0.2.4}/src/archscope/backends.py +0 -0
- {archscope-0.2.3 → archscope-0.2.4}/src/archscope/bench.py +0 -0
- {archscope-0.2.3 → archscope-0.2.4}/src/archscope/circuits.py +0 -0
- {archscope-0.2.3 → archscope-0.2.4}/src/archscope/cli.py +0 -0
- {archscope-0.2.3 → archscope-0.2.4}/src/archscope/diff.py +0 -0
- {archscope-0.2.3 → archscope-0.2.4}/src/archscope/lens.py +0 -0
- {archscope-0.2.3 → archscope-0.2.4}/src/archscope/loader.py +0 -0
- {archscope-0.2.3 → archscope-0.2.4}/src/archscope/neurons.py +0 -0
- {archscope-0.2.3 → archscope-0.2.4}/src/archscope/probes.py +0 -0
- {archscope-0.2.3 → archscope-0.2.4}/src/archscope/py.typed +0 -0
- {archscope-0.2.3 → archscope-0.2.4}/src/archscope/sae.py +0 -0
- {archscope-0.2.3 → archscope-0.2.4}/src/archscope/transfer.py +0 -0
- {archscope-0.2.3 → archscope-0.2.4}/src/archscope.egg-info/SOURCES.txt +0 -0
- {archscope-0.2.3 → archscope-0.2.4}/src/archscope.egg-info/dependency_links.txt +0 -0
- {archscope-0.2.3 → archscope-0.2.4}/src/archscope.egg-info/entry_points.txt +0 -0
- {archscope-0.2.3 → archscope-0.2.4}/src/archscope.egg-info/requires.txt +0 -0
- {archscope-0.2.3 → archscope-0.2.4}/src/archscope.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""Backend for custom architectures that expose layers via ``model.blocks``.
|
|
2
|
+
|
|
3
|
+
Originally written for kazdov-α (a transformer-style decoder LM with hybrid
|
|
4
|
+
MoBE-BCN + MHA attention) — but the backend is generic. It works for ANY
|
|
5
|
+
PyTorch model where:
|
|
6
|
+
|
|
7
|
+
- residual blocks are exposed as ``model.blocks`` (a ``nn.ModuleList``)
|
|
8
|
+
- ``model.d_model`` (or ``model.hidden_size``) is set on the model
|
|
9
|
+
- forward signature is ``model(input_ids, attention_mask=None, ...)``
|
|
10
|
+
|
|
11
|
+
This is the simplest pattern for registering a custom architecture with
|
|
12
|
+
archscope. If your model uses a different convention (e.g., ``model.layers``
|
|
13
|
+
under another parent), subclass ``Backend`` directly — this module is a
|
|
14
|
+
working example.
|
|
15
|
+
|
|
16
|
+
The backend registers under the name ``"kazdov"`` for historical reasons.
|
|
17
|
+
It used to be coupled to a private model-loading function; that function
|
|
18
|
+
was moved out of the shipped package since it depended on a private
|
|
19
|
+
repository. To load your own custom model, do it yourself and then call
|
|
20
|
+
``Backend.for_model(model, hint="kazdov")``.
|
|
21
|
+
"""
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
import torch
|
|
24
|
+
|
|
25
|
+
from .backends import Backend, ActivationRecord
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@Backend.register("kazdov")
|
|
29
|
+
class KazdovBackend(Backend):
|
|
30
|
+
"""Generic backend for models exposing layers via ``model.blocks``.
|
|
31
|
+
|
|
32
|
+
Captures the output of each block via forward hooks (the model is
|
|
33
|
+
expected to not implement ``output_hidden_states=True`` natively).
|
|
34
|
+
|
|
35
|
+
Requirements on the model:
|
|
36
|
+
- ``model.blocks`` is a ``nn.ModuleList`` of residual blocks.
|
|
37
|
+
- ``model.d_model`` or ``model.hidden_size`` is set.
|
|
38
|
+
- ``model(input_ids, attention_mask=...)`` is the forward signature.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def layer_names(self) -> list[str]:
|
|
42
|
+
n_layers = len(self.model.blocks)
|
|
43
|
+
return [f"layer_{i}.residual" for i in range(n_layers)]
|
|
44
|
+
|
|
45
|
+
def extract(self, inputs, layers=None):
|
|
46
|
+
layers = layers or self.layer_names()
|
|
47
|
+
self._validate_layers(layers)
|
|
48
|
+
captures: dict[str, torch.Tensor] = {}
|
|
49
|
+
|
|
50
|
+
# Register a forward hook on each requested block.
|
|
51
|
+
hooks = []
|
|
52
|
+
for layer_name in layers:
|
|
53
|
+
idx = int(layer_name.split("_")[1].split(".")[0])
|
|
54
|
+
if idx >= len(self.model.blocks):
|
|
55
|
+
continue
|
|
56
|
+
block = self.model.blocks[idx]
|
|
57
|
+
|
|
58
|
+
def make_hook(name):
|
|
59
|
+
def hook(module, inp, out):
|
|
60
|
+
tensor = out if isinstance(out, torch.Tensor) else out[0]
|
|
61
|
+
captures[name] = tensor.detach()
|
|
62
|
+
return hook
|
|
63
|
+
hooks.append(block.register_forward_hook(make_hook(layer_name)))
|
|
64
|
+
|
|
65
|
+
try:
|
|
66
|
+
with torch.no_grad():
|
|
67
|
+
if isinstance(inputs, dict):
|
|
68
|
+
input_ids = inputs["input_ids"]
|
|
69
|
+
attn = inputs.get("attention_mask")
|
|
70
|
+
else:
|
|
71
|
+
input_ids = inputs
|
|
72
|
+
attn = None
|
|
73
|
+
self.model(input_ids, attention_mask=attn)
|
|
74
|
+
finally:
|
|
75
|
+
for h in hooks:
|
|
76
|
+
h.remove()
|
|
77
|
+
|
|
78
|
+
records = []
|
|
79
|
+
for layer_name in layers:
|
|
80
|
+
if layer_name not in captures:
|
|
81
|
+
continue
|
|
82
|
+
records.append(ActivationRecord(
|
|
83
|
+
layer_name=layer_name,
|
|
84
|
+
activations=captures[layer_name],
|
|
85
|
+
meta={"kind": "residual", "arch": "kazdov-blocks"},
|
|
86
|
+
))
|
|
87
|
+
return records
|
|
88
|
+
|
|
89
|
+
def hidden_dim(self, layer_name: str) -> int:
|
|
90
|
+
# Some custom models expose this as `d_model`, others as `hidden_size`.
|
|
91
|
+
for attr in ("d_model", "hidden_size"):
|
|
92
|
+
v = getattr(self.model, attr, None)
|
|
93
|
+
if v is not None:
|
|
94
|
+
return v
|
|
95
|
+
raise ValueError(
|
|
96
|
+
f"Cannot infer hidden_dim for {type(self.model).__name__}: "
|
|
97
|
+
f"set model.d_model or model.hidden_size, or subclass KazdovBackend "
|
|
98
|
+
f"and override hidden_dim()."
|
|
99
|
+
)
|
|
@@ -11,13 +11,14 @@ import os
|
|
|
11
11
|
|
|
12
12
|
import torch
|
|
13
13
|
|
|
14
|
-
sys.path.insert(0, "/
|
|
14
|
+
sys.path.insert(0, str(__import__("pathlib").Path(__file__).parent.parent / "src"))
|
|
15
15
|
|
|
16
16
|
from archscope import circuits
|
|
17
|
-
|
|
17
|
+
import sys as _sys; _sys.path.insert(0, str(__import__("pathlib").Path(__file__).parent.parent / "scripts"))
|
|
18
|
+
from _kazdov_loader import load_kazdov_checkpoint
|
|
18
19
|
|
|
19
20
|
|
|
20
|
-
CHECKPOINT_KAZDOV = "/Users/kazdov/code/OriginalKazdov/_models/kazdov-98m-alpha"
|
|
21
|
+
CHECKPOINT_KAZDOV = __import__("os").environ.get("KAZDOV_CHECKPOINT", "/Users/kazdov/code/OriginalKazdov/_models/kazdov-98m-alpha")
|
|
21
22
|
PYTHIA_NAME = "EleutherAI/pythia-160m"
|
|
22
23
|
MAMBA_NAME = "state-spaces/mamba-130m-hf"
|
|
23
24
|
|
|
@@ -97,7 +98,7 @@ def main():
|
|
|
97
98
|
print(" • concentration relative ≈ 0 → highly confident predictions (concentrated)")
|
|
98
99
|
|
|
99
100
|
# Save
|
|
100
|
-
out_path = "/
|
|
101
|
+
out_path = "str(__import__("pathlib").Path(__file__).parent.parent / "_research")/circuits_3arch.json"
|
|
101
102
|
os.makedirs(os.path.dirname(out_path), exist_ok=True)
|
|
102
103
|
with open(out_path, "w") as f:
|
|
103
104
|
json.dump(all_results, f, indent=2, default=str)
|
|
@@ -9,14 +9,15 @@ import sys
|
|
|
9
9
|
import time
|
|
10
10
|
import torch
|
|
11
11
|
|
|
12
|
-
sys.path.insert(0, "/
|
|
12
|
+
sys.path.insert(0, str(__import__("pathlib").Path(__file__).parent.parent / "src"))
|
|
13
13
|
|
|
14
14
|
from archscope import probes, sae, neurons
|
|
15
15
|
from archscope.backends import Backend
|
|
16
|
-
|
|
16
|
+
import sys as _sys; _sys.path.insert(0, str(__import__("pathlib").Path(__file__).parent.parent / "scripts"))
|
|
17
|
+
from _kazdov_loader import load_kazdov_checkpoint
|
|
17
18
|
|
|
18
19
|
|
|
19
|
-
CHECKPOINT = "/Users/kazdov/code/OriginalKazdov/_models/kazdov-98m-alpha"
|
|
20
|
+
CHECKPOINT = __import__("os").environ.get("KAZDOV_CHECKPOINT", "/Users/kazdov/code/OriginalKazdov/_models/kazdov-98m-alpha")
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
def tokenize(tokenizer, texts: list[str]) -> dict:
|
|
@@ -10,7 +10,7 @@ import sys
|
|
|
10
10
|
import time
|
|
11
11
|
import torch
|
|
12
12
|
|
|
13
|
-
sys.path.insert(0, "/
|
|
13
|
+
sys.path.insert(0, str(__import__("pathlib").Path(__file__).parent.parent / "src"))
|
|
14
14
|
|
|
15
15
|
from archscope import probes, sae, neurons
|
|
16
16
|
from archscope.backends import Backend
|
|
@@ -11,7 +11,7 @@ import sys
|
|
|
11
11
|
import time
|
|
12
12
|
import torch
|
|
13
13
|
|
|
14
|
-
sys.path.insert(0, "/
|
|
14
|
+
sys.path.insert(0, str(__import__("pathlib").Path(__file__).parent.parent / "src"))
|
|
15
15
|
|
|
16
16
|
from archscope import sae
|
|
17
17
|
from archscope.backends import Backend
|
|
@@ -14,14 +14,15 @@ import sys
|
|
|
14
14
|
import time
|
|
15
15
|
import torch
|
|
16
16
|
|
|
17
|
-
sys.path.insert(0, "/
|
|
17
|
+
sys.path.insert(0, str(__import__("pathlib").Path(__file__).parent.parent / "src"))
|
|
18
18
|
|
|
19
19
|
from archscope import transfer
|
|
20
20
|
from archscope.backends import Backend
|
|
21
|
-
|
|
21
|
+
import sys as _sys; _sys.path.insert(0, str(__import__("pathlib").Path(__file__).parent.parent / "scripts"))
|
|
22
|
+
from _kazdov_loader import load_kazdov_checkpoint
|
|
22
23
|
|
|
23
24
|
|
|
24
|
-
CHECKPOINT_KAZDOV = "/Users/kazdov/code/OriginalKazdov/_models/kazdov-98m-alpha"
|
|
25
|
+
CHECKPOINT_KAZDOV = __import__("os").environ.get("KAZDOV_CHECKPOINT", "/Users/kazdov/code/OriginalKazdov/_models/kazdov-98m-alpha")
|
|
25
26
|
PYTHIA_NAME = "EleutherAI/pythia-160m"
|
|
26
27
|
|
|
27
28
|
|
|
@@ -13,7 +13,7 @@ import sys
|
|
|
13
13
|
import time
|
|
14
14
|
import torch
|
|
15
15
|
|
|
16
|
-
sys.path.insert(0, "/
|
|
16
|
+
sys.path.insert(0, str(__import__("pathlib").Path(__file__).parent.parent / "src"))
|
|
17
17
|
|
|
18
18
|
from archscope import probes, sae, neurons, attribute
|
|
19
19
|
from archscope.backends import Backend
|
|
@@ -22,7 +22,7 @@ def test_imports():
|
|
|
22
22
|
import archscope
|
|
23
23
|
from archscope import (probes, sae, neurons, attribute, backends,
|
|
24
24
|
circuits, transfer, bench, lens, diff)
|
|
25
|
-
assert archscope.__version__ == "0.2.
|
|
25
|
+
assert archscope.__version__ == "0.2.4"
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
def test_loader_exports():
|
|
@@ -1,142 +0,0 @@
|
|
|
1
|
-
"""Backend for kazdov-α (and related Kazdov family models).
|
|
2
|
-
|
|
3
|
-
Kazdov-α is a transformer-style decoder LM with hybrid attention (MoBE-BCN
|
|
4
|
-
mixture of bilinear experts + standard MHA in parallel). Architecturally
|
|
5
|
-
closer to standard transformer than to pure RNN/SSM — but the BCN attention
|
|
6
|
-
branch makes it a distinct architecture family for cross-arch interp.
|
|
7
|
-
|
|
8
|
-
Differences from HF transformer:
|
|
9
|
-
- No HF AutoModelForCausalLM interface (custom forward signature)
|
|
10
|
-
- Layers exposed as `model.blocks` (ModuleList)
|
|
11
|
-
- No `output_hidden_states=True` argument — we capture via forward hooks
|
|
12
|
-
- Forward signature: (input_ids, attention_mask=None, labels=None)
|
|
13
|
-
"""
|
|
14
|
-
from __future__ import annotations
|
|
15
|
-
import sys
|
|
16
|
-
from pathlib import Path
|
|
17
|
-
import torch
|
|
18
|
-
|
|
19
|
-
from .backends import Backend, ActivationRecord
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
KAZDOV_REPO = Path.home() / "code" / "OriginalKazdov" / "kazdov"
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def _ensure_kazdov_importable():
|
|
26
|
-
"""Add kazdov repo to sys.path so we can import KazdovLM."""
|
|
27
|
-
p = str(KAZDOV_REPO)
|
|
28
|
-
if p not in sys.path:
|
|
29
|
-
sys.path.insert(0, p)
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def load_kazdov_checkpoint(checkpoint_path: str | Path, device: str = "cpu"):
|
|
33
|
-
"""Load kazdov-α from a checkpoint directory.
|
|
34
|
-
|
|
35
|
-
Expects: config.json + final.pt (or latest.pt) in the directory.
|
|
36
|
-
Returns: (model in eval mode, tokenizer wrapper).
|
|
37
|
-
"""
|
|
38
|
-
_ensure_kazdov_importable()
|
|
39
|
-
from kazdov.kazdov_lm import KazdovLM
|
|
40
|
-
import json
|
|
41
|
-
|
|
42
|
-
ckpt_dir = Path(checkpoint_path)
|
|
43
|
-
config = json.loads((ckpt_dir / "config.json").read_text())
|
|
44
|
-
model_cfg = config["model_cfg"]
|
|
45
|
-
|
|
46
|
-
model = KazdovLM(
|
|
47
|
-
vocab_size=model_cfg["vocab_size"],
|
|
48
|
-
d_model=model_cfg["d_model"],
|
|
49
|
-
n_layers=model_cfg["n_layers"],
|
|
50
|
-
n_heads=model_cfg["n_heads"],
|
|
51
|
-
rank=model_cfg["rank"],
|
|
52
|
-
mlp_dim=model_cfg.get("mlp_dim"),
|
|
53
|
-
max_len=model_cfg.get("max_len", 256),
|
|
54
|
-
use_trilinear=model_cfg.get("use_trilinear", False),
|
|
55
|
-
use_bi_bcn=model_cfg.get("use_bi_bcn", False),
|
|
56
|
-
use_hybrid_mha=model_cfg.get("use_hybrid_mha", True),
|
|
57
|
-
use_mobe=model_cfg.get("use_mobe", False),
|
|
58
|
-
n_experts=model_cfg.get("n_experts", 1),
|
|
59
|
-
)
|
|
60
|
-
|
|
61
|
-
# Try final.pt then latest.pt
|
|
62
|
-
for fname in ("final.pt", "latest.pt"):
|
|
63
|
-
f = ckpt_dir / fname
|
|
64
|
-
if f.exists():
|
|
65
|
-
state = torch.load(f, map_location=device, weights_only=False)
|
|
66
|
-
if isinstance(state, dict) and "model" in state:
|
|
67
|
-
state = state["model"]
|
|
68
|
-
model.load_state_dict(state, strict=False)
|
|
69
|
-
break
|
|
70
|
-
else:
|
|
71
|
-
raise FileNotFoundError(f"No final.pt or latest.pt in {ckpt_dir}")
|
|
72
|
-
|
|
73
|
-
model.to(device).eval()
|
|
74
|
-
|
|
75
|
-
# Tokenizer: kazdov used GPT-2 tokenizer per memory
|
|
76
|
-
from transformers import GPT2Tokenizer
|
|
77
|
-
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
|
78
|
-
if tokenizer.pad_token is None:
|
|
79
|
-
tokenizer.pad_token = tokenizer.eos_token
|
|
80
|
-
|
|
81
|
-
return model, tokenizer
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
@Backend.register("kazdov")
|
|
85
|
-
class KazdovBackend(Backend):
|
|
86
|
-
"""Backend for kazdov-family models (KazdovLM, MoBE-BCN variants).
|
|
87
|
-
|
|
88
|
-
Uses forward hooks to capture residual stream after each KazdovBlock,
|
|
89
|
-
since the model doesn't expose output_hidden_states.
|
|
90
|
-
"""
|
|
91
|
-
|
|
92
|
-
def layer_names(self) -> list[str]:
|
|
93
|
-
n_layers = len(self.model.blocks)
|
|
94
|
-
return [f"layer_{i}.residual" for i in range(n_layers)]
|
|
95
|
-
|
|
96
|
-
def extract(self, inputs, layers=None):
|
|
97
|
-
layers = layers or self.layer_names()
|
|
98
|
-
self._validate_layers(layers)
|
|
99
|
-
captures: dict[str, torch.Tensor] = {}
|
|
100
|
-
|
|
101
|
-
# Register a forward hook on each requested block.
|
|
102
|
-
hooks = []
|
|
103
|
-
for layer_name in layers:
|
|
104
|
-
idx = int(layer_name.split("_")[1].split(".")[0])
|
|
105
|
-
if idx >= len(self.model.blocks):
|
|
106
|
-
continue
|
|
107
|
-
block = self.model.blocks[idx]
|
|
108
|
-
|
|
109
|
-
def make_hook(name):
|
|
110
|
-
def hook(module, inp, out):
|
|
111
|
-
tensor = out if isinstance(out, torch.Tensor) else out[0]
|
|
112
|
-
captures[name] = tensor.detach()
|
|
113
|
-
return hook
|
|
114
|
-
hooks.append(block.register_forward_hook(make_hook(layer_name)))
|
|
115
|
-
|
|
116
|
-
try:
|
|
117
|
-
# Kazdov forward signature: model(input_ids, attention_mask=None)
|
|
118
|
-
with torch.no_grad():
|
|
119
|
-
if isinstance(inputs, dict):
|
|
120
|
-
input_ids = inputs["input_ids"]
|
|
121
|
-
attn = inputs.get("attention_mask")
|
|
122
|
-
else:
|
|
123
|
-
input_ids = inputs
|
|
124
|
-
attn = None
|
|
125
|
-
self.model(input_ids, attention_mask=attn)
|
|
126
|
-
finally:
|
|
127
|
-
for h in hooks:
|
|
128
|
-
h.remove()
|
|
129
|
-
|
|
130
|
-
records = []
|
|
131
|
-
for layer_name in layers:
|
|
132
|
-
if layer_name not in captures:
|
|
133
|
-
continue
|
|
134
|
-
records.append(ActivationRecord(
|
|
135
|
-
layer_name=layer_name,
|
|
136
|
-
activations=captures[layer_name],
|
|
137
|
-
meta={"kind": "residual", "arch": "kazdov-mobe-bcn"},
|
|
138
|
-
))
|
|
139
|
-
return records
|
|
140
|
-
|
|
141
|
-
def hidden_dim(self, layer_name: str) -> int:
|
|
142
|
-
return self.model.d_model
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|