model-unfolder 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- model_unfolder/__init__.py +58 -0
- model_unfolder/adapters/__init__.py +15 -0
- model_unfolder/adapters/custom/__init__.py +8 -0
- model_unfolder/adapters/diffusor/__init__.py +8 -0
- model_unfolder/adapters/transformer/__init__.py +5 -0
- model_unfolder/adapters/transformer/assembly.py +57 -0
- model_unfolder/adapters/transformer/blocks.py +238 -0
- model_unfolder/adapters/transformer/common.py +35 -0
- model_unfolder/adapters/transformer/families/__init__.py +12 -0
- model_unfolder/adapters/transformer/families/deepseek.py +107 -0
- model_unfolder/adapters/transformer/families/gemma4.py +202 -0
- model_unfolder/adapters/transformer/families/llama.py +91 -0
- model_unfolder/adapters/transformer/special_parts/__init__.py +2 -0
- model_unfolder/adapters/transformer/special_parts/per_layer_embedding.py +220 -0
- model_unfolder/diagram.py +95 -0
- model_unfolder/html_renderer.py +5 -0
- model_unfolder/ir.py +163 -0
- model_unfolder/labels.py +166 -0
- model_unfolder/params.py +119 -0
- model_unfolder/parser.py +137 -0
- model_unfolder/renderers/__init__.py +1 -0
- model_unfolder/renderers/html/__init__.py +5 -0
- model_unfolder/renderers/html/block_views/__init__.py +20 -0
- model_unfolder/renderers/html/block_views/attention.py +91 -0
- model_unfolder/renderers/html/block_views/feed_forward.py +213 -0
- model_unfolder/renderers/html/block_views/per_layer_embedding.py +199 -0
- model_unfolder/renderers/html/cards.py +130 -0
- model_unfolder/renderers/html/document.py +157 -0
- model_unfolder/renderers/html/interactions.py +64 -0
- model_unfolder/renderers/html/metadata.py +265 -0
- model_unfolder/renderers/html/sections.py +60 -0
- model_unfolder/renderers/html/styles.py +283 -0
- model_unfolder/renderers/html/svg.py +349 -0
- model_unfolder/renderers/html/theme.py +24 -0
- model_unfolder/renderers/html/utils.py +28 -0
- model_unfolder/renderers/html/views.py +461 -0
- model_unfolder-0.2.0.dist-info/METADATA +122 -0
- model_unfolder-0.2.0.dist-info/RECORD +41 -0
- model_unfolder-0.2.0.dist-info/WHEEL +5 -0
- model_unfolder-0.2.0.dist-info/licenses/LICENSE +201 -0
- model_unfolder-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""Inspect-card content for attention blocks."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from ....labels import describe_attention, kv_shared, mask_long
|
|
5
|
+
from ..utils import _html
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def attention_card(ir: dict, info: dict, meta_for: callable) -> str:
|
|
9
|
+
"""Inspect card for the attention block."""
|
|
10
|
+
attn_groups = [
|
|
11
|
+
g for g in info.get("groups", []) if g.get("spec", {}).get("attention")
|
|
12
|
+
]
|
|
13
|
+
if len(attn_groups) <= 1:
|
|
14
|
+
title, desc = meta_for("attn")
|
|
15
|
+
return (
|
|
16
|
+
'<div class="uf-card-detail uf-card-attn">'
|
|
17
|
+
f'<div class="uf-card-title">{_html(title)}</div>'
|
|
18
|
+
f'<div class="uf-card-desc">{_html(desc)}</div>'
|
|
19
|
+
"</div>"
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
rows = "".join(_attention_row_for_group(group, ir) for group in attn_groups)
|
|
23
|
+
return (
|
|
24
|
+
'<div class="uf-card-detail uf-card-attn">'
|
|
25
|
+
'<div class="uf-card-title">Attention layers</div>'
|
|
26
|
+
'<div class="uf-card-desc">'
|
|
27
|
+
f"{len(attn_groups)} attention variants in this model — each row is one variant."
|
|
28
|
+
"</div>"
|
|
29
|
+
f'<div class="uf-attn-rows">{rows}</div>'
|
|
30
|
+
"</div>"
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _attention_row_for_group(group: dict, ir: dict) -> str:
|
|
35
|
+
attn = group["spec"]["attention"]
|
|
36
|
+
indices = group["indices"]
|
|
37
|
+
n_layers = len(indices)
|
|
38
|
+
layers = ir.get("layers", [])
|
|
39
|
+
n_shared = sum(
|
|
40
|
+
1 for i in indices
|
|
41
|
+
if 0 <= i < len(layers) and kv_shared(layers[i].get("attention") or {})
|
|
42
|
+
)
|
|
43
|
+
return _attention_row(attn, n_layers, n_shared)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _attention_row(attn: dict, n_layers: int, n_shared: int) -> str:
|
|
47
|
+
title = f"{mask_long(attn)} · {describe_attention(attn)}"
|
|
48
|
+
bits: list[str] = []
|
|
49
|
+
if attn.get("window_size"):
|
|
50
|
+
bits.append(f"window {attn['window_size']}")
|
|
51
|
+
if n_shared:
|
|
52
|
+
bits.append(f"{n_shared} of {n_layers} reuse K/V from earlier layers")
|
|
53
|
+
else:
|
|
54
|
+
bits.append(f"{n_layers} layers")
|
|
55
|
+
detail = " · ".join(bits)
|
|
56
|
+
return (
|
|
57
|
+
'<div class="uf-attn-row">'
|
|
58
|
+
f'<div class="uf-attn-row-title">{_html(title)}</div>'
|
|
59
|
+
f'<div class="uf-attn-row-detail">{_html(detail)}</div>'
|
|
60
|
+
"</div>"
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def attention_card_css(mount_id: str, theme: dict) -> str:
|
|
65
|
+
return f"""
|
|
66
|
+
#{mount_id} .uf-attn-rows {{
|
|
67
|
+
margin-top:10px;
|
|
68
|
+
display:flex;
|
|
69
|
+
flex-direction:column;
|
|
70
|
+
gap:8px;
|
|
71
|
+
}}
|
|
72
|
+
#{mount_id} .uf-attn-row {{
|
|
73
|
+
padding:9px 12px;
|
|
74
|
+
background:{theme['bg_card']};
|
|
75
|
+
border:0.5px solid {theme['border']};
|
|
76
|
+
border-left:3px solid {theme['block']};
|
|
77
|
+
border-radius:8px;
|
|
78
|
+
}}
|
|
79
|
+
#{mount_id} .uf-attn-row-title {{
|
|
80
|
+
font-family:{theme['font_head']};
|
|
81
|
+
font-size:16px;
|
|
82
|
+
color:{theme['text']};
|
|
83
|
+
line-height:1.15;
|
|
84
|
+
}}
|
|
85
|
+
#{mount_id} .uf-attn-row-detail {{
|
|
86
|
+
margin-top:3px;
|
|
87
|
+
font-size:12px;
|
|
88
|
+
color:{theme['muted']};
|
|
89
|
+
font-family:{theme['font_mono']};
|
|
90
|
+
}}
|
|
91
|
+
"""
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
"""Detail SVGs for feed-forward blocks."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from ....labels import activation_label
|
|
5
|
+
from ..svg import (
|
|
6
|
+
_defs,
|
|
7
|
+
_elbow_hv,
|
|
8
|
+
_elbow_vh,
|
|
9
|
+
_ids,
|
|
10
|
+
_plus_block,
|
|
11
|
+
_rect_block,
|
|
12
|
+
_region_rect,
|
|
13
|
+
_svg,
|
|
14
|
+
_svg_tag,
|
|
15
|
+
_svg_text,
|
|
16
|
+
_v_line,
|
|
17
|
+
_v_seg,
|
|
18
|
+
)
|
|
19
|
+
from ..theme import C, FONT_BODY, FONT_HEAD, FONT_MONO, GAP
|
|
20
|
+
from ..utils import _fmt_int
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def build_moe_view(ir: dict, info: dict, mount_id: str) -> str:
|
|
24
|
+
w, h = 720, 620
|
|
25
|
+
arrow_id, shadow_id = _ids(mount_id, "moe")
|
|
26
|
+
parts = [_defs(arrow_id, shadow_id)]
|
|
27
|
+
parts.append(_region_rect(40, 30, w - 80, h - 60, C["bg_outer"]))
|
|
28
|
+
|
|
29
|
+
ffn = info["dominant"]["spec"]["ffn"]
|
|
30
|
+
hidden = _fmt_int(ir.get("hidden_size"))
|
|
31
|
+
inter = _fmt_int(ffn.get("expert_intermediate_size") or ffn.get("intermediate_size"))
|
|
32
|
+
n_experts = _fmt_int(ffn.get("num_experts")) if ffn.get("num_experts") else "N"
|
|
33
|
+
cx = w / 2
|
|
34
|
+
router_w = 540
|
|
35
|
+
router = _rect_block(parts, info, shadow_id, "router", (w - router_w) / 2, h - 130, router_w, 50, "Router")
|
|
36
|
+
sum_node = _plus_block(parts, info, shadow_id, "add_moe", cx, 100)
|
|
37
|
+
|
|
38
|
+
expert_w, expert_h = 116, 54
|
|
39
|
+
expert_y = 235
|
|
40
|
+
n_total = ffn.get("num_experts")
|
|
41
|
+
last_label = str(n_total) if n_total else "N"
|
|
42
|
+
side_pad = 60
|
|
43
|
+
gap = (w - 2 * side_pad - 4 * expert_w) / 3
|
|
44
|
+
slots = [
|
|
45
|
+
(side_pad + 0 * (expert_w + gap), "Expert 1", "expert_1"),
|
|
46
|
+
(side_pad + 1 * (expert_w + gap), "Expert k", "expert_k"),
|
|
47
|
+
(side_pad + 2 * (expert_w + gap), "Expert k+1", "expert_kp1"),
|
|
48
|
+
(side_pad + 3 * (expert_w + gap), f"Expert {last_label}", "expert_n"),
|
|
49
|
+
]
|
|
50
|
+
experts = [
|
|
51
|
+
_rect_block(parts, info, shadow_id, node_id, x, expert_y, expert_w, expert_h, label, font_size=15)
|
|
52
|
+
for x, label, node_id in slots
|
|
53
|
+
]
|
|
54
|
+
_dim_label(parts, router["cx"], router["top"] - 14, f"router: {hidden} -> {n_experts}", anchor="middle")
|
|
55
|
+
_dim_label(parts, cx, expert_y - 18, f"each expert: {hidden} -> {inter} -> {hidden}", anchor="middle")
|
|
56
|
+
|
|
57
|
+
dots_x = (experts[1]["right"] + experts[2]["left"]) / 2
|
|
58
|
+
dots_y = expert_y + expert_h / 2
|
|
59
|
+
for i in range(-2, 3):
|
|
60
|
+
parts.append(_svg_tag("circle", {"cx": dots_x + i * 7, "cy": dots_y, "r": 2.5, "fill": C["muted"]}))
|
|
61
|
+
|
|
62
|
+
for expert in experts:
|
|
63
|
+
parts.append(_v_seg(expert["cx"], router["top"], expert["bottom"] + GAP, arrow_id))
|
|
64
|
+
|
|
65
|
+
for expert in experts:
|
|
66
|
+
target_x = sum_node["cx"] + (-sum_node["r"] - GAP if expert["cx"] < sum_node["cx"] else sum_node["r"] + GAP)
|
|
67
|
+
parts.append(_elbow_vh(expert["cx"], expert["top"], target_x, sum_node["cy"], arrow_id))
|
|
68
|
+
|
|
69
|
+
if ffn.get("num_experts") and ffn.get("num_experts_per_tok"):
|
|
70
|
+
sparsity = 100 * ffn["num_experts_per_tok"] / ffn["num_experts"]
|
|
71
|
+
cg_x, cg_y, cg_w, cg_h = w - 244, 58, 188, 58
|
|
72
|
+
parts.append(
|
|
73
|
+
_svg_tag(
|
|
74
|
+
"rect",
|
|
75
|
+
{
|
|
76
|
+
"x": cg_x,
|
|
77
|
+
"y": cg_y,
|
|
78
|
+
"width": cg_w,
|
|
79
|
+
"height": cg_h,
|
|
80
|
+
"rx": 10,
|
|
81
|
+
"ry": 10,
|
|
82
|
+
"fill": C["bg_card"],
|
|
83
|
+
"stroke": C["border"],
|
|
84
|
+
"stroke-width": 0.5,
|
|
85
|
+
},
|
|
86
|
+
)
|
|
87
|
+
)
|
|
88
|
+
parts.append(
|
|
89
|
+
_svg_text(
|
|
90
|
+
cg_x + 12,
|
|
91
|
+
cg_y + 18,
|
|
92
|
+
"ACTIVE PER TOKEN",
|
|
93
|
+
{
|
|
94
|
+
"fill": C["muted"],
|
|
95
|
+
"font-family": FONT_BODY,
|
|
96
|
+
"font-size": 10,
|
|
97
|
+
"letter-spacing": "0.12em",
|
|
98
|
+
"font-weight": 600,
|
|
99
|
+
},
|
|
100
|
+
)
|
|
101
|
+
)
|
|
102
|
+
parts.append(
|
|
103
|
+
_svg_text(
|
|
104
|
+
cg_x + 12,
|
|
105
|
+
cg_y + 44,
|
|
106
|
+
f"{ffn['num_experts_per_tok']} / {ffn['num_experts']} - {sparsity:.1f}%",
|
|
107
|
+
{"fill": C["text"], "font-family": FONT_HEAD, "font-size": 22},
|
|
108
|
+
)
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
parts.append(_svg_tag("line", {
|
|
112
|
+
"x1": sum_node["cx"], "y1": sum_node["top"],
|
|
113
|
+
"x2": sum_node["cx"], "y2": sum_node["top"] - 36,
|
|
114
|
+
"stroke": C["arrow"], "stroke-width": 1.6, "stroke-linecap": "round",
|
|
115
|
+
"marker-end": f"url(#{arrow_id})", "fill": "none",
|
|
116
|
+
}))
|
|
117
|
+
parts.append(_svg_text(
|
|
118
|
+
sum_node["cx"], sum_node["top"] - 46,
|
|
119
|
+
f"out ({hidden})",
|
|
120
|
+
{"text-anchor": "middle", "fill": C["muted"], "font-family": FONT_MONO, "font-size": 11},
|
|
121
|
+
))
|
|
122
|
+
|
|
123
|
+
parts.append(_svg_tag("line", {
|
|
124
|
+
"x1": cx, "y1": router["bottom"] + 36,
|
|
125
|
+
"x2": cx, "y2": router["bottom"] + GAP,
|
|
126
|
+
"stroke": C["arrow"], "stroke-width": 1.6, "stroke-linecap": "round",
|
|
127
|
+
"marker-end": f"url(#{arrow_id})", "fill": "none",
|
|
128
|
+
}))
|
|
129
|
+
parts.append(_svg_text(
|
|
130
|
+
cx, router["bottom"] + 50,
|
|
131
|
+
f"in ({hidden})",
|
|
132
|
+
{"text-anchor": "middle", "fill": C["muted"], "font-family": FONT_MONO, "font-size": 11},
|
|
133
|
+
))
|
|
134
|
+
|
|
135
|
+
return _svg(w, h, f"{ir.get('name', 'model')} mixture of experts", parts)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def build_ffn_view(ir: dict, info: dict, mount_id: str) -> str:
|
|
139
|
+
w, h = 720, 660
|
|
140
|
+
arrow_id, shadow_id = _ids(mount_id, "ffn")
|
|
141
|
+
parts = [_defs(arrow_id, shadow_id)]
|
|
142
|
+
parts.append(_region_rect(40, 30, w - 80, h - 60, C["bg_outer"]))
|
|
143
|
+
|
|
144
|
+
ffn = info["dominant"]["spec"]["ffn"]
|
|
145
|
+
hidden = _fmt_int(ir.get("hidden_size"))
|
|
146
|
+
inter = _fmt_int(ffn.get("expert_intermediate_size") or ffn.get("intermediate_size"))
|
|
147
|
+
cx = w / 2
|
|
148
|
+
act_name = activation_label(ffn.get("activation") or "silu")
|
|
149
|
+
|
|
150
|
+
down_proj = _rect_block(parts, info, shadow_id, "down_proj", cx - 90, 110, 180, 50, "Linear (down)")
|
|
151
|
+
mul_node = _plus_block(parts, info, shadow_id, "mul", cx, 230, "×")
|
|
152
|
+
silu = _rect_block(parts, info, shadow_id, "silu", cx - 270, 330, 180, 50, act_name)
|
|
153
|
+
up_proj = _rect_block(parts, info, shadow_id, "up_proj", cx + 90, 330, 180, 50, "Linear (up)")
|
|
154
|
+
gate_proj = _rect_block(parts, info, shadow_id, "gate_proj", cx - 270, 460, 180, 50, "Linear (gate)")
|
|
155
|
+
|
|
156
|
+
branch_y = h - 110
|
|
157
|
+
parts.append(_svg_tag("circle", {"cx": cx, "cy": branch_y, "r": 4, "fill": C["arrow"]}))
|
|
158
|
+
parts.append(_elbow_hv(cx, branch_y, gate_proj["cx"], gate_proj["bottom"] + GAP, arrow_id))
|
|
159
|
+
parts.append(_elbow_hv(cx, branch_y, up_proj["cx"], up_proj["bottom"] + GAP, arrow_id))
|
|
160
|
+
parts.append(_v_line(gate_proj, silu, arrow_id))
|
|
161
|
+
parts.append(_elbow_vh(silu["cx"], silu["top"], mul_node["cx"] - mul_node["r"] - GAP, mul_node["cy"], arrow_id))
|
|
162
|
+
parts.append(_elbow_vh(up_proj["cx"], up_proj["top"], mul_node["cx"] + mul_node["r"] + GAP, mul_node["cy"], arrow_id))
|
|
163
|
+
parts.append(_v_line(mul_node, down_proj, arrow_id))
|
|
164
|
+
_dim_label(parts, down_proj["right"] + 14, down_proj["cy"], f"{inter} -> {hidden}")
|
|
165
|
+
_dim_label(parts, gate_proj["right"] + 14, gate_proj["cy"], f"{hidden} -> {inter}")
|
|
166
|
+
_dim_label(parts, up_proj["cx"], up_proj["bottom"] + 18, f"{hidden} -> {inter}", anchor="middle")
|
|
167
|
+
_dim_label(parts, mul_node["cx"], mul_node["cy"] + 36, f"{inter} x {inter}", anchor="middle")
|
|
168
|
+
|
|
169
|
+
parts.append(_svg_tag("line", {
|
|
170
|
+
"x1": cx, "y1": down_proj["top"],
|
|
171
|
+
"x2": cx, "y2": down_proj["top"] - 36,
|
|
172
|
+
"stroke": C["arrow"], "stroke-width": 1.6, "stroke-linecap": "round",
|
|
173
|
+
"marker-end": f"url(#{arrow_id})", "fill": "none",
|
|
174
|
+
}))
|
|
175
|
+
parts.append(_svg_text(
|
|
176
|
+
cx, down_proj["top"] - 46,
|
|
177
|
+
f"out ({hidden})",
|
|
178
|
+
{"text-anchor": "middle", "fill": C["muted"], "font-family": FONT_MONO, "font-size": 11},
|
|
179
|
+
))
|
|
180
|
+
|
|
181
|
+
parts.append(_svg_tag("line", {
|
|
182
|
+
"x1": cx, "y1": branch_y + 38,
|
|
183
|
+
"x2": cx, "y2": branch_y + 8,
|
|
184
|
+
"stroke": C["arrow"], "stroke-width": 1.6, "stroke-linecap": "round",
|
|
185
|
+
"marker-end": f"url(#{arrow_id})", "fill": "none",
|
|
186
|
+
}))
|
|
187
|
+
parts.append(
|
|
188
|
+
_svg_text(
|
|
189
|
+
cx,
|
|
190
|
+
h - 48,
|
|
191
|
+
f"in ({hidden})",
|
|
192
|
+
{"text-anchor": "middle", "fill": C["muted"], "font-family": FONT_MONO, "font-size": 11},
|
|
193
|
+
)
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
return _svg(w, h, f"{ir.get('name', 'model')} feed-forward block", parts)
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def _dim_label(parts: list[str], x: float, y: float, text: str, *, anchor: str = "start") -> None:
|
|
200
|
+
parts.append(
|
|
201
|
+
_svg_text(
|
|
202
|
+
x,
|
|
203
|
+
y,
|
|
204
|
+
text,
|
|
205
|
+
{
|
|
206
|
+
"text-anchor": anchor,
|
|
207
|
+
"dominant-baseline": "central",
|
|
208
|
+
"fill": C["muted"],
|
|
209
|
+
"font-family": FONT_MONO,
|
|
210
|
+
"font-size": 10,
|
|
211
|
+
},
|
|
212
|
+
)
|
|
213
|
+
)
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
"""Detail SVG for reusable Per-Layer Embedding blocks."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from ..metadata import _block_label
|
|
5
|
+
from ..svg import (
|
|
6
|
+
_defs,
|
|
7
|
+
_ids,
|
|
8
|
+
_plus_block,
|
|
9
|
+
_rect_block,
|
|
10
|
+
_region_rect,
|
|
11
|
+
_svg,
|
|
12
|
+
_svg_tag,
|
|
13
|
+
_svg_text,
|
|
14
|
+
_v_line,
|
|
15
|
+
)
|
|
16
|
+
from ..theme import C, FONT_MONO, GAP
|
|
17
|
+
from ..utils import _fmt_int
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def build_per_layer_embedding_view(ir: dict, info: dict, mount_id: str, block: dict) -> str:
|
|
21
|
+
"""Render the canonical PLE gate -> multiply -> projection detail view.
|
|
22
|
+
|
|
23
|
+
The adapter declares the child node ids and dimensions in ``block['detail']``.
|
|
24
|
+
Any model family with the same part emits the same block contract.
|
|
25
|
+
"""
|
|
26
|
+
w, h = 720, 660
|
|
27
|
+
detail = block.get("detail") or {}
|
|
28
|
+
view_id = detail.get("view_id") or block.get("id") or "ple"
|
|
29
|
+
arrow_id, shadow_id = _ids(mount_id, view_id)
|
|
30
|
+
parts = [_defs(arrow_id, shadow_id)]
|
|
31
|
+
parts.append(_region_rect(40, 30, w - 80, h - 60, C["bg_outer"]))
|
|
32
|
+
|
|
33
|
+
ids = _node_ids(block)
|
|
34
|
+
hidden_size = detail.get("hidden_size") or ir.get("hidden_size")
|
|
35
|
+
embedding_dim = detail.get("embedding_dim") or (
|
|
36
|
+
((ir.get("extras") or {}).get("per_layer_embeddings") or {}).get("hidden")
|
|
37
|
+
)
|
|
38
|
+
cx = 300
|
|
39
|
+
y_shift = -24
|
|
40
|
+
|
|
41
|
+
gate = _rect_block(
|
|
42
|
+
parts, info, shadow_id, ids["gate"],
|
|
43
|
+
cx - 110, h - 160 + y_shift, 220, 50,
|
|
44
|
+
_label(info, block, ids["gate"], "Linear (gate)"),
|
|
45
|
+
)
|
|
46
|
+
act = _rect_block(
|
|
47
|
+
parts, info, shadow_id, ids["activation"],
|
|
48
|
+
cx - 90, h - 250 + y_shift, 180, 44,
|
|
49
|
+
_label(info, block, ids["activation"], "Activation"),
|
|
50
|
+
)
|
|
51
|
+
mul = _plus_block(parts, info, shadow_id, ids["multiply"], cx, h - 320 + y_shift, "×")
|
|
52
|
+
proj = _rect_block(
|
|
53
|
+
parts, info, shadow_id, ids["projection"],
|
|
54
|
+
cx - 110, h - 410 + y_shift, 220, 50,
|
|
55
|
+
_label(info, block, ids["projection"], "Linear (up)"),
|
|
56
|
+
)
|
|
57
|
+
norm = _rect_block(
|
|
58
|
+
parts, info, shadow_id, ids["norm"],
|
|
59
|
+
cx - 90, h - 500 + y_shift, 180, 44,
|
|
60
|
+
_label(info, block, ids["norm"], "RMSNorm"),
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
for src, dst in ((gate, act), (act, mul), (mul, proj), (proj, norm)):
|
|
64
|
+
parts.append(_v_line(src, dst, arrow_id))
|
|
65
|
+
|
|
66
|
+
parts.append(_svg_tag("line", {
|
|
67
|
+
"x1": cx, "y1": norm["top"],
|
|
68
|
+
"x2": cx, "y2": norm["top"] - 36,
|
|
69
|
+
"stroke": C["arrow"], "stroke-width": 1.6, "stroke-linecap": "round",
|
|
70
|
+
"marker-end": f"url(#{arrow_id})", "fill": "none",
|
|
71
|
+
}))
|
|
72
|
+
parts.append(_svg_text(
|
|
73
|
+
cx, norm["top"] - 46,
|
|
74
|
+
detail.get("output_label") or "out -> add (residual)",
|
|
75
|
+
{"text-anchor": "middle", "fill": C["muted"], "font-family": FONT_MONO, "font-size": 11},
|
|
76
|
+
))
|
|
77
|
+
|
|
78
|
+
parts.append(_svg_tag("line", {
|
|
79
|
+
"x1": cx, "y1": gate["bottom"] + 38,
|
|
80
|
+
"x2": cx, "y2": gate["bottom"] + 8,
|
|
81
|
+
"stroke": C["arrow"], "stroke-width": 1.6, "stroke-linecap": "round",
|
|
82
|
+
"marker-end": f"url(#{arrow_id})", "fill": "none",
|
|
83
|
+
}))
|
|
84
|
+
parts.append(_svg_text(
|
|
85
|
+
cx, gate["bottom"] + 56,
|
|
86
|
+
detail.get("input_label") or "in (hidden)",
|
|
87
|
+
{"text-anchor": "middle", "fill": C["muted"], "font-family": FONT_MONO, "font-size": 11},
|
|
88
|
+
))
|
|
89
|
+
|
|
90
|
+
external = _external_tensor_block(
|
|
91
|
+
parts,
|
|
92
|
+
shadow_id,
|
|
93
|
+
detail.get("pathway_id") or "per_layer_input",
|
|
94
|
+
382,
|
|
95
|
+
mul["cy"] - 21,
|
|
96
|
+
178,
|
|
97
|
+
42,
|
|
98
|
+
detail.get("external_label") or "per_layer_input[L]",
|
|
99
|
+
)
|
|
100
|
+
parts.append(_svg_tag("line", {
|
|
101
|
+
"x1": external["left"] - GAP, "y1": external["cy"],
|
|
102
|
+
"x2": mul["cx"] + mul["r"] + GAP, "y2": mul["cy"],
|
|
103
|
+
"stroke": "#1F9E78", "stroke-width": 1.6, "stroke-linecap": "round",
|
|
104
|
+
"stroke-dasharray": "5 4",
|
|
105
|
+
"marker-end": f"url(#{arrow_id})",
|
|
106
|
+
}))
|
|
107
|
+
parts.append(_svg_text(
|
|
108
|
+
external["cx"], external["bottom"] + 16,
|
|
109
|
+
"(outside stack)",
|
|
110
|
+
{"text-anchor": "middle", "fill": C["muted"], "font-family": FONT_MONO, "font-size": 9},
|
|
111
|
+
))
|
|
112
|
+
|
|
113
|
+
parts.append(_svg_text(
|
|
114
|
+
gate["right"] + 14, gate["cy"],
|
|
115
|
+
f"{_fmt_int(hidden_size)} -> {_fmt_int(embedding_dim)}" if embedding_dim else "",
|
|
116
|
+
{"dominant-baseline": "central", "fill": C["muted"],
|
|
117
|
+
"font-family": FONT_MONO, "font-size": 10},
|
|
118
|
+
))
|
|
119
|
+
parts.append(_svg_text(
|
|
120
|
+
proj["right"] + 14, proj["cy"],
|
|
121
|
+
f"{_fmt_int(embedding_dim)} -> {_fmt_int(hidden_size)}" if embedding_dim else "",
|
|
122
|
+
{"dominant-baseline": "central", "fill": C["muted"],
|
|
123
|
+
"font-family": FONT_MONO, "font-size": 10},
|
|
124
|
+
))
|
|
125
|
+
|
|
126
|
+
return _svg(w, h, f"{ir.get('name', 'model')} per-layer embeddings block", parts)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _node_ids(block: dict) -> dict[str, str]:
|
|
130
|
+
ids = ((block.get("detail") or {}).get("nodes") or {}).copy()
|
|
131
|
+
block_id = block.get("id") or "ple"
|
|
132
|
+
ids.setdefault("gate", f"{block_id}_gate")
|
|
133
|
+
ids.setdefault("activation", f"{block_id}_act")
|
|
134
|
+
ids.setdefault("multiply", f"{block_id}_mul")
|
|
135
|
+
ids.setdefault("projection", f"{block_id}_proj")
|
|
136
|
+
ids.setdefault("norm", f"{block_id}_norm")
|
|
137
|
+
return ids
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _external_tensor_block(
|
|
141
|
+
parts: list[str],
|
|
142
|
+
shadow_id: str,
|
|
143
|
+
node_id: str,
|
|
144
|
+
x: float,
|
|
145
|
+
y: float,
|
|
146
|
+
w: float,
|
|
147
|
+
h: float,
|
|
148
|
+
label: str,
|
|
149
|
+
) -> dict:
|
|
150
|
+
children = [
|
|
151
|
+
_svg_tag("rect", {
|
|
152
|
+
"x": x,
|
|
153
|
+
"y": y,
|
|
154
|
+
"width": w,
|
|
155
|
+
"height": h,
|
|
156
|
+
"rx": 10,
|
|
157
|
+
"ry": 10,
|
|
158
|
+
"fill": C["badge_bg"],
|
|
159
|
+
"stroke": "#1F9E78",
|
|
160
|
+
"stroke-width": 1,
|
|
161
|
+
"stroke-dasharray": "4 3",
|
|
162
|
+
"filter": f"url(#{shadow_id})",
|
|
163
|
+
}),
|
|
164
|
+
_svg_text(
|
|
165
|
+
x + w / 2,
|
|
166
|
+
y + h / 2,
|
|
167
|
+
label,
|
|
168
|
+
{
|
|
169
|
+
"text-anchor": "middle",
|
|
170
|
+
"dominant-baseline": "central",
|
|
171
|
+
"fill": "#0F6E56",
|
|
172
|
+
"font-family": FONT_MONO,
|
|
173
|
+
"font-size": 12,
|
|
174
|
+
"font-weight": 700,
|
|
175
|
+
"pointer-events": "none",
|
|
176
|
+
},
|
|
177
|
+
),
|
|
178
|
+
]
|
|
179
|
+
parts.append(_svg_tag("g", {"class": "uf-node uf-external-tensor", "data-id": node_id}, "".join(children)))
|
|
180
|
+
return {
|
|
181
|
+
"left": x,
|
|
182
|
+
"right": x + w,
|
|
183
|
+
"top": y,
|
|
184
|
+
"bottom": y + h,
|
|
185
|
+
"cx": x + w / 2,
|
|
186
|
+
"cy": y + h / 2,
|
|
187
|
+
"w": w,
|
|
188
|
+
"h": h,
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _label(info: dict, block: dict, node_id: str, default: str) -> str:
|
|
193
|
+
label = _block_label(info, node_id, None)
|
|
194
|
+
if label:
|
|
195
|
+
return label
|
|
196
|
+
for child in block.get("children") or []:
|
|
197
|
+
if child.get("id") == node_id:
|
|
198
|
+
return child.get("label") or default
|
|
199
|
+
return default
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"""Inspect-card HTML for architecture block clicks."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from ...labels import activation_label
|
|
5
|
+
from .block_views import attention_card, block_detail_svg
|
|
6
|
+
from .utils import _attr, _fmt_int, _html
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _build_inspect_cards(ir: dict, info: dict, mount_id: str) -> str:
|
|
10
|
+
"""Cards-only HTML for the L2 inspect panel."""
|
|
11
|
+
panels: list[str] = [_hint_card("default", "Click a block above to inspect it")]
|
|
12
|
+
|
|
13
|
+
spec = info["dominant"]["spec"]
|
|
14
|
+
layer_blocks = spec.get("blocks") or []
|
|
15
|
+
|
|
16
|
+
for node_id in ("tok_text", "embed"):
|
|
17
|
+
panels.append(_simple_card(node_id, *_meta(info, node_id)))
|
|
18
|
+
|
|
19
|
+
for block in layer_blocks:
|
|
20
|
+
kind = block.get("kind")
|
|
21
|
+
node_id = block["id"]
|
|
22
|
+
if kind == "attention":
|
|
23
|
+
panels.append(attention_card(ir, info, lambda nid: _meta(info, nid)))
|
|
24
|
+
continue
|
|
25
|
+
|
|
26
|
+
svg = block_detail_svg(ir, info, mount_id, block)
|
|
27
|
+
if svg:
|
|
28
|
+
title, desc = _meta(info, node_id)
|
|
29
|
+
panels.append(_rich_card(node_id, title, desc, svg))
|
|
30
|
+
else:
|
|
31
|
+
panels.append(_simple_card(node_id, *_meta(info, node_id)))
|
|
32
|
+
|
|
33
|
+
for node_id in ("final_rms", "lm_head"):
|
|
34
|
+
panels.append(_simple_card(node_id, *_meta(info, node_id)))
|
|
35
|
+
|
|
36
|
+
return "".join(panels)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _build_sub_inspect_cards(ir: dict, info: dict, mount_id: str) -> str:
|
|
40
|
+
"""Cards-only HTML for the L3 sub-inspect panel."""
|
|
41
|
+
panels: list[str] = [_l3_card("default", "", "")]
|
|
42
|
+
ffn = info["dominant"]["spec"]["ffn"]
|
|
43
|
+
children = _sub_inspect_children(info)
|
|
44
|
+
|
|
45
|
+
if children:
|
|
46
|
+
seen: set[str] = set()
|
|
47
|
+
for child in children:
|
|
48
|
+
child_id = child.get("id")
|
|
49
|
+
if not child_id or child_id in seen:
|
|
50
|
+
continue
|
|
51
|
+
seen.add(child_id)
|
|
52
|
+
panels.append(_l3_card(child_id, child.get("title", child_id), child.get("description", "")))
|
|
53
|
+
else:
|
|
54
|
+
panels.extend(_fallback_sub_inspect_cards(ir, ffn))
|
|
55
|
+
|
|
56
|
+
return "".join(panels)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _meta(info: dict, node_id: str) -> tuple[str, str]:
|
|
60
|
+
return info.get("meta", {}).get(node_id, (node_id, ""))
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _simple_card(node_id: str, title: str, desc: str) -> str:
|
|
64
|
+
return (
|
|
65
|
+
f'<div class="uf-card-detail uf-card-{_attr(node_id)}">'
|
|
66
|
+
f'<div class="uf-card-title">{_html(title)}</div>'
|
|
67
|
+
f'<div class="uf-card-desc">{_html(desc)}</div>'
|
|
68
|
+
"</div>"
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _hint_card(node_id: str, hint: str) -> str:
|
|
73
|
+
return (
|
|
74
|
+
f'<div class="uf-card-detail uf-card-hint uf-card-{_attr(node_id)}">'
|
|
75
|
+
f"{_html(hint)}"
|
|
76
|
+
"</div>"
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _l3_card(node_id: str, title: str, desc: str) -> str:
|
|
81
|
+
return (
|
|
82
|
+
f'<div class="uf-card-detail uf-l3-{_attr(node_id)}">'
|
|
83
|
+
f'<div class="uf-card-title">{_html(title)}</div>'
|
|
84
|
+
f'<div class="uf-card-desc">{_html(desc)}</div>'
|
|
85
|
+
"</div>"
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _rich_card(node_id: str, title: str, desc: str, svg: str) -> str:
|
|
90
|
+
return (
|
|
91
|
+
f'<div class="uf-card-detail uf-card-{_attr(node_id)}">'
|
|
92
|
+
f'<div class="uf-card-title">{_html(title)}</div>'
|
|
93
|
+
f'<div class="uf-card-desc">{_html(desc)}</div>'
|
|
94
|
+
f'<div class="uf-card-svg">{svg}</div>'
|
|
95
|
+
"</div>"
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _sub_inspect_children(info: dict) -> list[dict]:
|
|
100
|
+
children: list[dict] = []
|
|
101
|
+
for block in (info["dominant"]["spec"].get("blocks") or []):
|
|
102
|
+
children.extend(block.get("children") or [])
|
|
103
|
+
return children
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _fallback_sub_inspect_cards(ir: dict, ffn: dict) -> list[str]:
|
|
107
|
+
h = _fmt_int(ir.get("hidden_size"))
|
|
108
|
+
inter = _fmt_int(ffn.get("expert_intermediate_size") or ffn.get("intermediate_size"))
|
|
109
|
+
activation = activation_label(ffn.get("activation") or "silu")
|
|
110
|
+
panels = [
|
|
111
|
+
_l3_card("gate_proj", "Gate projection", f"Linear · {h} → {inter} (gated path through {activation})"),
|
|
112
|
+
_l3_card("up_proj", "Up projection", f"Linear · {h} → {inter}"),
|
|
113
|
+
_l3_card("silu", f"{activation} activation", "Element-wise non-linearity applied to the gate path"),
|
|
114
|
+
_l3_card("mul", "Element-wise multiply", f"{activation}(gate) × up — combines the gated and ungated paths"),
|
|
115
|
+
_l3_card("down_proj", "Down projection", f"Linear · {inter} → {h}"),
|
|
116
|
+
]
|
|
117
|
+
if ffn.get("kind") == "moe":
|
|
118
|
+
n_experts = _fmt_int(ffn.get("num_experts")) if ffn.get("num_experts") else "N"
|
|
119
|
+
n_active = ffn.get("num_experts_per_tok") or "k"
|
|
120
|
+
n_shared = ffn.get("num_shared_experts") or 0
|
|
121
|
+
panels.append(_l3_card("router", "Router", f"Linear · {h} → {n_experts} (selects top-{n_active} experts per token)"))
|
|
122
|
+
expert_desc = (
|
|
123
|
+
f"Dense FFN with same shape as above · {h} → {inter} → {h} · "
|
|
124
|
+
f"only top-{n_active} of {n_experts} active per token"
|
|
125
|
+
+ (f" · plus {n_shared} shared expert(s) always active" if n_shared else "")
|
|
126
|
+
)
|
|
127
|
+
for eid in ("expert_1", "expert_k", "expert_kp1", "expert_n"):
|
|
128
|
+
panels.append(_l3_card(eid, "Expert FFN", expert_desc))
|
|
129
|
+
panels.append(_l3_card("add_moe", "Weighted sum", f"Combines top-{n_active} expert outputs weighted by router probabilities"))
|
|
130
|
+
return panels
|