model-unfolder 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- model_unfolder/__init__.py +58 -0
- model_unfolder/adapters/__init__.py +15 -0
- model_unfolder/adapters/custom/__init__.py +8 -0
- model_unfolder/adapters/diffusor/__init__.py +8 -0
- model_unfolder/adapters/transformer/__init__.py +5 -0
- model_unfolder/adapters/transformer/assembly.py +57 -0
- model_unfolder/adapters/transformer/blocks.py +238 -0
- model_unfolder/adapters/transformer/common.py +35 -0
- model_unfolder/adapters/transformer/families/__init__.py +12 -0
- model_unfolder/adapters/transformer/families/deepseek.py +107 -0
- model_unfolder/adapters/transformer/families/gemma4.py +202 -0
- model_unfolder/adapters/transformer/families/llama.py +91 -0
- model_unfolder/adapters/transformer/special_parts/__init__.py +2 -0
- model_unfolder/adapters/transformer/special_parts/per_layer_embedding.py +220 -0
- model_unfolder/diagram.py +95 -0
- model_unfolder/html_renderer.py +5 -0
- model_unfolder/ir.py +163 -0
- model_unfolder/labels.py +166 -0
- model_unfolder/params.py +119 -0
- model_unfolder/parser.py +137 -0
- model_unfolder/renderers/__init__.py +1 -0
- model_unfolder/renderers/html/__init__.py +5 -0
- model_unfolder/renderers/html/block_views/__init__.py +20 -0
- model_unfolder/renderers/html/block_views/attention.py +91 -0
- model_unfolder/renderers/html/block_views/feed_forward.py +213 -0
- model_unfolder/renderers/html/block_views/per_layer_embedding.py +199 -0
- model_unfolder/renderers/html/cards.py +130 -0
- model_unfolder/renderers/html/document.py +157 -0
- model_unfolder/renderers/html/interactions.py +64 -0
- model_unfolder/renderers/html/metadata.py +265 -0
- model_unfolder/renderers/html/sections.py +60 -0
- model_unfolder/renderers/html/styles.py +283 -0
- model_unfolder/renderers/html/svg.py +349 -0
- model_unfolder/renderers/html/theme.py +24 -0
- model_unfolder/renderers/html/utils.py +28 -0
- model_unfolder/renderers/html/views.py +461 -0
- model_unfolder-0.2.0.dist-info/METADATA +122 -0
- model_unfolder-0.2.0.dist-info/RECORD +41 -0
- model_unfolder-0.2.0.dist-info/WHEEL +5 -0
- model_unfolder-0.2.0.dist-info/licenses/LICENSE +201 -0
- model_unfolder-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,461 @@
|
|
|
1
|
+
"""Top-level SVG views for architecture and layer maps."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from ...labels import kind_short, mask_short
|
|
5
|
+
from .metadata import _block_label, _indices_summary, _signature
|
|
6
|
+
from .svg import (
|
|
7
|
+
_branch_dot,
|
|
8
|
+
_defs,
|
|
9
|
+
_elbow_hv,
|
|
10
|
+
_input_tap,
|
|
11
|
+
_ids,
|
|
12
|
+
_plus_block,
|
|
13
|
+
_rect_block,
|
|
14
|
+
_region_rect,
|
|
15
|
+
_residual_loop_right,
|
|
16
|
+
_svg,
|
|
17
|
+
_svg_tag,
|
|
18
|
+
_svg_text,
|
|
19
|
+
_v_line,
|
|
20
|
+
)
|
|
21
|
+
from .theme import C, FONT_BODY, FONT_HEAD, FONT_MONO, GAP
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# --- Layout vocabulary for the data-driven architecture view ----------------
|
|
25
|
+
# Each ``kind`` declares its glyph (rect / circle), nominal size, and font.
|
|
26
|
+
# A new architectural feature gets rendered by adding a kind here and tagging
|
|
27
|
+
# the relevant blocks in the adapter — no edits to the layout engine itself.
|
|
28
|
+
_KIND_LAYOUT = {
|
|
29
|
+
"norm": {"shape": "rect", "w": 160, "h": 36, "font": 16},
|
|
30
|
+
"linear": {"shape": "rect", "w": 200, "h": 38, "font": 15},
|
|
31
|
+
"activation": {"shape": "rect", "w": 150, "h": 36, "font": 15},
|
|
32
|
+
"attention": {"shape": "rect", "w": 230, "h": 60, "font": 17},
|
|
33
|
+
"ffn": {"shape": "rect", "w": 160, "h": 44, "font": 17},
|
|
34
|
+
"ple": {"shape": "rect", "w": 160, "h": 44, "font": 17},
|
|
35
|
+
"residual_add": {"shape": "circle", "w": 28, "h": 28, "sym": "+"},
|
|
36
|
+
"gate_mul": {"shape": "circle", "w": 28, "h": 28, "sym": "×"},
|
|
37
|
+
}
|
|
38
|
+
_BLOCK_GAP = 32 # vertical gap between consecutive layer-body blocks
|
|
39
|
+
# Larger than the arrow padding (`GAP` ×2) so the chain arrow has a visible
|
|
40
|
+
# stem between blocks rather than collapsing to just an arrowhead.
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _build_architecture_view(ir: dict, info: dict, mount_id: str) -> str:
|
|
44
|
+
"""Data-driven decoder architecture view.
|
|
45
|
+
|
|
46
|
+
The layer body comes from ``info['dominant']['spec']['blocks']`` — an
|
|
47
|
+
ordered list of typed blocks emitted by the adapter. Each block's
|
|
48
|
+
``kind`` selects a glyph in :data:`_KIND_LAYOUT`; ``residual_from`` adds
|
|
49
|
+
a residual loop when the bypass is not already represented by the central
|
|
50
|
+
chain; side-lane blocks render as parallel rails to the left or right of
|
|
51
|
+
the inner region.
|
|
52
|
+
|
|
53
|
+
The view grows from the block list, so side-path models get extra vertical
|
|
54
|
+
space while compact decoder-only models keep the same vocabulary.
|
|
55
|
+
"""
|
|
56
|
+
spec = info["dominant"]["spec"]
|
|
57
|
+
layer_blocks = list(spec.get("blocks") or [])
|
|
58
|
+
|
|
59
|
+
# Side blocks live OFF the central column. They share a row with the block
|
|
60
|
+
# they feed but get their own offset x-position and explicit connections.
|
|
61
|
+
chain_blocks = [b for b in layer_blocks if not b.get("lane")]
|
|
62
|
+
side_blocks = [b for b in layer_blocks if b.get("lane")]
|
|
63
|
+
|
|
64
|
+
cx = 360
|
|
65
|
+
inner_x, inner_w = 110, 500
|
|
66
|
+
|
|
67
|
+
# --- 1. Compute heights from the chain block list ---
|
|
68
|
+
inner_padding = 60
|
|
69
|
+
stack_h = _layer_stack_height(chain_blocks)
|
|
70
|
+
inner_h = max(490, stack_h + 2 * inner_padding)
|
|
71
|
+
|
|
72
|
+
inner_y = 200
|
|
73
|
+
h = inner_y + inner_h + 232 # 232 = embed + tok_text + bottom padding
|
|
74
|
+
w = 720
|
|
75
|
+
|
|
76
|
+
arrow_id, shadow_id = _ids(mount_id, "arch")
|
|
77
|
+
parts = [_defs(arrow_id, shadow_id)]
|
|
78
|
+
parts.append(_region_rect(40, 26, w - 80, h - 52, C["bg_outer"]))
|
|
79
|
+
parts.append(_region_rect(inner_x, inner_y, inner_w, inner_h, C["bg_inner"]))
|
|
80
|
+
|
|
81
|
+
# --- 2. Model-level scaffold (positions tracked by total height h) ---
|
|
82
|
+
tok_text = _rect_block(parts, info, shadow_id, "tok_text",
|
|
83
|
+
cx - 110, h - 100, 220, 44,
|
|
84
|
+
_block_label(info, "tok_text", "Tokenized text"), font_size=17)
|
|
85
|
+
embed = _rect_block(parts, info, shadow_id, "embed",
|
|
86
|
+
cx - 130, h - 168, 260, 44,
|
|
87
|
+
_block_label(info, "embed", "Token Embedding layer"), font_size=17)
|
|
88
|
+
final_rms = _rect_block(parts, info, shadow_id, "final_rms",
|
|
89
|
+
cx - 90, 140, 180, 36,
|
|
90
|
+
_block_label(info, "final_rms", "Final RMSNorm"), font_size=16)
|
|
91
|
+
lm_head = _rect_block(parts, info, shadow_id, "lm_head",
|
|
92
|
+
cx - 130, 70, 260, 44,
|
|
93
|
+
_block_label(info, "lm_head", "Linear output layer"), font_size=17)
|
|
94
|
+
|
|
95
|
+
# --- 3. Layer body (data-driven, stacked bottom-up) ---
|
|
96
|
+
block_pos: dict[str, dict] = {}
|
|
97
|
+
free = inner_h - stack_h
|
|
98
|
+
y_cursor = inner_y + inner_h - free / 2
|
|
99
|
+
for block in chain_blocks:
|
|
100
|
+
layout = _KIND_LAYOUT.get(block["kind"]) or _KIND_LAYOUT["norm"]
|
|
101
|
+
block_h = layout["h"]
|
|
102
|
+
top = y_cursor - block_h
|
|
103
|
+
if layout["shape"] == "rect":
|
|
104
|
+
geom = _rect_block(
|
|
105
|
+
parts, info, shadow_id, block["id"],
|
|
106
|
+
cx - layout["w"] / 2, top, layout["w"], block_h,
|
|
107
|
+
_block_label(info, block["id"], block.get("label")),
|
|
108
|
+
font_size=layout["font"],
|
|
109
|
+
)
|
|
110
|
+
else:
|
|
111
|
+
geom = _plus_block(
|
|
112
|
+
parts, info, shadow_id, block["id"],
|
|
113
|
+
cx, top + block_h / 2, sym=layout.get("sym", "+"),
|
|
114
|
+
)
|
|
115
|
+
block_pos[block["id"]] = geom
|
|
116
|
+
y_cursor = top - _BLOCK_GAP
|
|
117
|
+
|
|
118
|
+
# --- 4. Linear chain arrows ---
|
|
119
|
+
chain = [tok_text, embed] + [block_pos[b["id"]] for b in chain_blocks] + [final_rms, lm_head]
|
|
120
|
+
for src, dst in zip(chain, chain[1:]):
|
|
121
|
+
parts.append(_v_line(src, dst, arrow_id))
|
|
122
|
+
|
|
123
|
+
# Output arrow above lm_head.
|
|
124
|
+
parts.append(_svg_tag("line", {
|
|
125
|
+
"x1": cx, "y1": lm_head["top"], "x2": cx, "y2": lm_head["top"] - 32,
|
|
126
|
+
"stroke": C["arrow"], "stroke-width": 1.6, "stroke-linecap": "round",
|
|
127
|
+
"marker-end": f"url(#{arrow_id})", "fill": "none",
|
|
128
|
+
}))
|
|
129
|
+
|
|
130
|
+
# --- 5. Residual loops (declared via residual_from) ---
|
|
131
|
+
chain_ids = [b["id"] for b in chain_blocks]
|
|
132
|
+
chain_prev = {block_id: chain_ids[i - 1] for i, block_id in enumerate(chain_ids[1:], start=1)}
|
|
133
|
+
branch_taps: set[tuple[float, float]] = set()
|
|
134
|
+
lane = inner_x + inner_w - 28
|
|
135
|
+
for block in layer_blocks:
|
|
136
|
+
src_id = block.get("residual_from")
|
|
137
|
+
if src_id and src_id in block_pos and block["id"] in block_pos:
|
|
138
|
+
if chain_prev.get(block["id"]) == src_id:
|
|
139
|
+
continue
|
|
140
|
+
src_geom = block_pos[src_id]
|
|
141
|
+
dst_geom = block_pos[block["id"]]
|
|
142
|
+
parts.append(_residual_loop_right(src_geom, dst_geom, lane, arrow_id))
|
|
143
|
+
# Junction dot at the tap point on the input-arrow stem so the
|
|
144
|
+
# bypass visually originates from the arrow, not from the block.
|
|
145
|
+
_mark_branch_tap(parts, branch_taps, _input_tap(src_geom))
|
|
146
|
+
|
|
147
|
+
# --- 6. Side blocks — placed off the central column ---
|
|
148
|
+
for block in side_blocks:
|
|
149
|
+
_draw_side_block(
|
|
150
|
+
parts, info, shadow_id,
|
|
151
|
+
block, block_pos,
|
|
152
|
+
inner_x, inner_w, arrow_id, branch_taps,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
# --- 7. × N badge over the inner region ---
|
|
156
|
+
parts.append(_svg_tag("rect", {
|
|
157
|
+
"x": inner_x + inner_w - 78, "y": inner_y + 12,
|
|
158
|
+
"width": 66, "height": 26, "rx": 13, "ry": 13,
|
|
159
|
+
"fill": "rgba(255,255,255,0.65)", "stroke": C["border"], "stroke-width": 0.5,
|
|
160
|
+
}))
|
|
161
|
+
parts.append(_svg_text(
|
|
162
|
+
inner_x + inner_w - 45, inner_y + 25,
|
|
163
|
+
f"x {len(ir.get('layers', []))}",
|
|
164
|
+
{"text-anchor": "middle", "dominant-baseline": "central",
|
|
165
|
+
"fill": C["text"], "font-family": FONT_HEAD, "font-size": 20},
|
|
166
|
+
))
|
|
167
|
+
|
|
168
|
+
return _svg(w, h, f"{ir.get('name', 'model')} architecture", parts)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def _layer_stack_height(layer_blocks: list[dict]) -> int:
|
|
172
|
+
if not layer_blocks:
|
|
173
|
+
return 0
|
|
174
|
+
total = sum(_KIND_LAYOUT.get(b["kind"], _KIND_LAYOUT["norm"])["h"] for b in layer_blocks)
|
|
175
|
+
total += _BLOCK_GAP * (len(layer_blocks) - 1)
|
|
176
|
+
return total
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _draw_side_block(
|
|
180
|
+
parts: list[str],
|
|
181
|
+
info: dict,
|
|
182
|
+
shadow_id: str,
|
|
183
|
+
block: dict,
|
|
184
|
+
block_pos: dict,
|
|
185
|
+
inner_x: float,
|
|
186
|
+
inner_w: float,
|
|
187
|
+
arrow_id: str,
|
|
188
|
+
branch_taps: set[tuple[float, float]],
|
|
189
|
+
) -> None:
|
|
190
|
+
"""Render a block that lives OFF the central chain.
|
|
191
|
+
|
|
192
|
+
The block is drawn at the y-row of whatever it ``feeds``, offset to the
|
|
193
|
+
declared ``lane`` (left/right). Its input is a long arrow tapping the
|
|
194
|
+
chain at the bottom of the ``tap_from`` block; its output is a short
|
|
195
|
+
horizontal arrow into the ``feeds`` target.
|
|
196
|
+
"""
|
|
197
|
+
layout = _KIND_LAYOUT.get(block["kind"]) or _KIND_LAYOUT["norm"]
|
|
198
|
+
block_w = layout["w"]
|
|
199
|
+
block_h = layout["h"]
|
|
200
|
+
lane = block.get("lane", "left")
|
|
201
|
+
feeds_id = block.get("feeds")
|
|
202
|
+
tap_id = block.get("tap_from")
|
|
203
|
+
|
|
204
|
+
feeds_geom = block_pos.get(feeds_id) if feeds_id else None
|
|
205
|
+
tap_geom = block_pos.get(tap_id) if tap_id else None
|
|
206
|
+
if not feeds_geom or not tap_geom:
|
|
207
|
+
return # mis-declared; nothing to anchor to
|
|
208
|
+
|
|
209
|
+
# Side block sits at the same y as the block it feeds, shifted left/right.
|
|
210
|
+
cy = feeds_geom["cy"]
|
|
211
|
+
if lane == "left":
|
|
212
|
+
block_x = inner_x + 30
|
|
213
|
+
else:
|
|
214
|
+
block_x = inner_x + inner_w - 30 - block_w
|
|
215
|
+
top = cy - block_h / 2
|
|
216
|
+
|
|
217
|
+
geom = _rect_block(
|
|
218
|
+
parts, info, shadow_id, block["id"],
|
|
219
|
+
block_x, top, block_w, block_h,
|
|
220
|
+
_block_label(info, block["id"], block.get("label")),
|
|
221
|
+
font_size=layout["font"],
|
|
222
|
+
)
|
|
223
|
+
block_pos[block["id"]] = geom
|
|
224
|
+
|
|
225
|
+
# --- Input: long arrow up the side, tapping the chain at tap_from's input
|
|
226
|
+
# stem (so the visual reads "the same x flowing into the layer also
|
|
227
|
+
# feeds this side block"). Routed as a rounded L-bend.
|
|
228
|
+
rail_x = geom["cx"]
|
|
229
|
+
tap_x, tap_y = _input_tap(tap_geom)
|
|
230
|
+
parts.append(_elbow_hv(tap_x, tap_y, rail_x, geom["bottom"] + GAP, arrow_id))
|
|
231
|
+
_mark_branch_tap(parts, branch_taps, (tap_x, tap_y))
|
|
232
|
+
|
|
233
|
+
# --- Output: short horizontal arrow into feeds target.
|
|
234
|
+
if lane == "left":
|
|
235
|
+
x1 = geom["right"]
|
|
236
|
+
x2 = feeds_geom["left"] - GAP
|
|
237
|
+
else:
|
|
238
|
+
x1 = geom["left"]
|
|
239
|
+
x2 = feeds_geom["right"] + GAP
|
|
240
|
+
parts.append(_svg_tag("line", {
|
|
241
|
+
"x1": x1, "y1": cy, "x2": x2, "y2": cy,
|
|
242
|
+
"stroke": C["arrow"], "stroke-width": 1.6, "stroke-linecap": "round",
|
|
243
|
+
"marker-end": f"url(#{arrow_id})", "fill": "none",
|
|
244
|
+
}))
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def _mark_branch_tap(
|
|
248
|
+
parts: list[str],
|
|
249
|
+
branch_taps: set[tuple[float, float]],
|
|
250
|
+
tap: tuple[float, float],
|
|
251
|
+
) -> None:
|
|
252
|
+
key = (round(tap[0], 3), round(tap[1], 3))
|
|
253
|
+
if key in branch_taps:
|
|
254
|
+
return
|
|
255
|
+
branch_taps.add(key)
|
|
256
|
+
parts.append(_branch_dot(*tap))
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def _build_layer_map(ir: dict, info: dict, mount_id: str) -> str:
|
|
260
|
+
w = 720
|
|
261
|
+
layers = ir.get("layers", [])
|
|
262
|
+
kv_shared_indices = [
|
|
263
|
+
i for i, layer in enumerate(layers)
|
|
264
|
+
if (layer.get("attention") or {}).get("kv_source_layer") is not None
|
|
265
|
+
]
|
|
266
|
+
has_kv_share = bool(kv_shared_indices)
|
|
267
|
+
n_legend_rows = len(info["groups"]) + (1 if has_kv_share else 0)
|
|
268
|
+
# Reserve extra room for the optional "KV CACHE" sub-strip and its annotation.
|
|
269
|
+
extra = 56 if has_kv_share else 0
|
|
270
|
+
h = max(240, 160 + extra + 22 * n_legend_rows)
|
|
271
|
+
arrow_id, shadow_id = _ids(mount_id, "map")
|
|
272
|
+
parts = [_defs(arrow_id, shadow_id)]
|
|
273
|
+
parts.append(_hatch_pattern(mount_id))
|
|
274
|
+
parts.append(_region_rect(40, 30, w - 80, h - 60, C["bg_card"], stroke=C["border"], stroke_width=0.5))
|
|
275
|
+
|
|
276
|
+
# Green-family palette so the layer map shares the diagram's theme.
|
|
277
|
+
# Ordered dark → light so consecutive groups read like a gradient step.
|
|
278
|
+
palette = ["#0F6E56", "#1F9E78", "#5BB89A", "#0A4F3F", "#7FCFB4", "#0E5C48", "#A0E3CD"]
|
|
279
|
+
sig_to_color = {group["sig"]: palette[i % len(palette)] for i, group in enumerate(info["groups"])}
|
|
280
|
+
|
|
281
|
+
strip_x, strip_y, strip_w, strip_h = 80, 90, w - 160, 36
|
|
282
|
+
n = len(layers)
|
|
283
|
+
col_w = strip_w / max(n, 1)
|
|
284
|
+
|
|
285
|
+
layer_sigs = info.get("layer_sigs") or [_signature(layer) for layer in layers]
|
|
286
|
+
for i, sig in enumerate(layer_sigs):
|
|
287
|
+
parts.append(
|
|
288
|
+
_svg_tag(
|
|
289
|
+
"rect",
|
|
290
|
+
{
|
|
291
|
+
"x": strip_x + i * col_w,
|
|
292
|
+
"y": strip_y,
|
|
293
|
+
"width": max(col_w - 0.5, 1),
|
|
294
|
+
"height": strip_h,
|
|
295
|
+
"fill": sig_to_color.get(sig, palette[0]),
|
|
296
|
+
"opacity": 0.95,
|
|
297
|
+
},
|
|
298
|
+
)
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
# KV-share overlay — diagonal hatch on layers that don't compute their own K/V.
|
|
302
|
+
for i in kv_shared_indices:
|
|
303
|
+
parts.append(
|
|
304
|
+
_svg_tag(
|
|
305
|
+
"rect",
|
|
306
|
+
{
|
|
307
|
+
"x": strip_x + i * col_w,
|
|
308
|
+
"y": strip_y,
|
|
309
|
+
"width": max(col_w - 0.5, 1),
|
|
310
|
+
"height": strip_h,
|
|
311
|
+
"fill": f"url(#uf-{mount_id}-hatch)",
|
|
312
|
+
"pointer-events": "none",
|
|
313
|
+
},
|
|
314
|
+
)
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
parts.append(
|
|
318
|
+
_svg_tag(
|
|
319
|
+
"rect",
|
|
320
|
+
{
|
|
321
|
+
"x": strip_x,
|
|
322
|
+
"y": strip_y,
|
|
323
|
+
"width": strip_w,
|
|
324
|
+
"height": strip_h,
|
|
325
|
+
"fill": "none",
|
|
326
|
+
"stroke": C["text"],
|
|
327
|
+
"stroke-width": 0.4,
|
|
328
|
+
"rx": 4,
|
|
329
|
+
"ry": 4,
|
|
330
|
+
},
|
|
331
|
+
)
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
if n:
|
|
335
|
+
for idx in (0, n - 1):
|
|
336
|
+
x = strip_x + (idx + 0.5) * col_w
|
|
337
|
+
parts.append(
|
|
338
|
+
_svg_text(
|
|
339
|
+
x,
|
|
340
|
+
strip_y + strip_h + 16,
|
|
341
|
+
f"L{idx}",
|
|
342
|
+
{"text-anchor": "middle", "fill": C["muted"], "font-family": FONT_MONO, "font-size": 10},
|
|
343
|
+
)
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
type_word = "type" if len(info["groups"]) == 1 else "types"
|
|
347
|
+
parts.append(
|
|
348
|
+
_svg_text(
|
|
349
|
+
strip_x,
|
|
350
|
+
70,
|
|
351
|
+
f"{n} layers - {len(info['groups'])} {type_word}",
|
|
352
|
+
{"fill": C["text"], "font-family": FONT_BODY, "font-size": 12, "font-weight": 600},
|
|
353
|
+
)
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
legend_y = strip_y + strip_h + 44
|
|
357
|
+
|
|
358
|
+
if has_kv_share:
|
|
359
|
+
first = kv_shared_indices[0]
|
|
360
|
+
last = kv_shared_indices[-1]
|
|
361
|
+
# Bracket above the strip marking where KV reuse kicks in.
|
|
362
|
+
bracket_y = strip_y - 8
|
|
363
|
+
x_start = strip_x + first * col_w
|
|
364
|
+
x_end = strip_x + (last + 1) * col_w - 0.5
|
|
365
|
+
parts.append(
|
|
366
|
+
_svg_tag(
|
|
367
|
+
"path",
|
|
368
|
+
{
|
|
369
|
+
"d": f"M {x_start} {bracket_y - 6} L {x_start} {bracket_y} L {x_end} {bracket_y} L {x_end} {bracket_y - 6}",
|
|
370
|
+
"fill": "none",
|
|
371
|
+
"stroke": C["muted"],
|
|
372
|
+
"stroke-width": 1.0,
|
|
373
|
+
"stroke-linecap": "round",
|
|
374
|
+
},
|
|
375
|
+
)
|
|
376
|
+
)
|
|
377
|
+
# Sources of the K/V tensors — collected from cross-layer edges.
|
|
378
|
+
edges = ir.get("cross_layer_edges") or []
|
|
379
|
+
kv_sources = sorted({e.get("from_layer") for e in edges if e.get("kind") == "kv_share"})
|
|
380
|
+
src_summary = (
|
|
381
|
+
f"L{kv_sources[0]}–L{kv_sources[-1]}" if len(kv_sources) > 1
|
|
382
|
+
else (f"L{kv_sources[0]}" if kv_sources else "earlier layer")
|
|
383
|
+
)
|
|
384
|
+
share_label = (
|
|
385
|
+
f"K/V reused: L{first}–L{last} ({len(kv_shared_indices)} layers) ← {src_summary}"
|
|
386
|
+
)
|
|
387
|
+
parts.append(
|
|
388
|
+
_svg_text(
|
|
389
|
+
(x_start + x_end) / 2,
|
|
390
|
+
bracket_y - 12,
|
|
391
|
+
share_label,
|
|
392
|
+
{"text-anchor": "middle", "fill": C["muted"], "font-family": FONT_MONO, "font-size": 10},
|
|
393
|
+
)
|
|
394
|
+
)
|
|
395
|
+
legend_y += 8
|
|
396
|
+
|
|
397
|
+
lx, ly = strip_x, legend_y
|
|
398
|
+
for group in info["groups"]:
|
|
399
|
+
spec = group["spec"]
|
|
400
|
+
ffn_kind = "MoE" if spec["ffn"].get("kind") == "moe" else "Dense"
|
|
401
|
+
attn = spec.get("attention", {})
|
|
402
|
+
label = (
|
|
403
|
+
f"{kind_short(attn)} + {ffn_kind} ({mask_short(attn)})"
|
|
404
|
+
f" · {_indices_summary(group, info)}"
|
|
405
|
+
)
|
|
406
|
+
color = sig_to_color[group["sig"]]
|
|
407
|
+
parts.append(_svg_tag("rect", {"x": lx, "y": ly - 9, "width": 12, "height": 12, "fill": color, "rx": 2}))
|
|
408
|
+
parts.append(
|
|
409
|
+
_svg_text(
|
|
410
|
+
lx + 18,
|
|
411
|
+
ly,
|
|
412
|
+
label,
|
|
413
|
+
{"dominant-baseline": "central", "fill": C["text"], "font-family": FONT_BODY, "font-size": 12},
|
|
414
|
+
)
|
|
415
|
+
)
|
|
416
|
+
ly += 20
|
|
417
|
+
|
|
418
|
+
if has_kv_share:
|
|
419
|
+
# Hatched chip in the legend.
|
|
420
|
+
parts.append(
|
|
421
|
+
_svg_tag(
|
|
422
|
+
"rect",
|
|
423
|
+
{"x": lx, "y": ly - 9, "width": 12, "height": 12, "fill": palette[0], "rx": 2},
|
|
424
|
+
)
|
|
425
|
+
)
|
|
426
|
+
parts.append(
|
|
427
|
+
_svg_tag(
|
|
428
|
+
"rect",
|
|
429
|
+
{
|
|
430
|
+
"x": lx,
|
|
431
|
+
"y": ly - 9,
|
|
432
|
+
"width": 12,
|
|
433
|
+
"height": 12,
|
|
434
|
+
"fill": f"url(#uf-{mount_id}-hatch)",
|
|
435
|
+
"rx": 2,
|
|
436
|
+
},
|
|
437
|
+
)
|
|
438
|
+
)
|
|
439
|
+
parts.append(
|
|
440
|
+
_svg_text(
|
|
441
|
+
lx + 18,
|
|
442
|
+
ly,
|
|
443
|
+
f"K/V reused (no own K/V projections) · {len(kv_shared_indices)} layers",
|
|
444
|
+
{"dominant-baseline": "central", "fill": C["text"], "font-family": FONT_BODY, "font-size": 12},
|
|
445
|
+
)
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
return _svg(w, h, f"{ir.get('name', 'model')} layer map", parts)
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
def _hatch_pattern(mount_id: str) -> str:
|
|
452
|
+
"""Diagonal-stripe pattern used to mark KV-shared layers."""
|
|
453
|
+
pid = f"uf-{mount_id}-hatch"
|
|
454
|
+
return (
|
|
455
|
+
'<defs>'
|
|
456
|
+
f'<pattern id="{pid}" patternUnits="userSpaceOnUse" width="6" height="6" patternTransform="rotate(45)">'
|
|
457
|
+
'<rect width="6" height="6" fill="none"/>'
|
|
458
|
+
'<line x1="0" y1="0" x2="0" y2="6" stroke="rgba(255,255,255,0.55)" stroke-width="2"/>'
|
|
459
|
+
'</pattern>'
|
|
460
|
+
'</defs>'
|
|
461
|
+
)
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: model-unfolder
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Unfold any HuggingFace transformer into an interactive architecture diagram, inline in Jupyter.
|
|
5
|
+
Author: model-unfolder contributors
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Keywords: transformers,visualization,llm,architecture,jupyter
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
10
|
+
Classifier: Framework :: Jupyter
|
|
11
|
+
Classifier: Topic :: Scientific/Engineering :: Visualization
|
|
12
|
+
Requires-Python: >=3.9
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
License-File: LICENSE
|
|
15
|
+
Provides-Extra: hf
|
|
16
|
+
Requires-Dist: transformers>=4.40; extra == "hf"
|
|
17
|
+
Provides-Extra: dev
|
|
18
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
|
19
|
+
Requires-Dist: transformers>=4.40; extra == "dev"
|
|
20
|
+
Dynamic: license-file
|
|
21
|
+
|
|
22
|
+
# MODEL UNFOLDER
|
|
23
|
+
|
|
24
|
+
> your one click model unfolder
|
|
25
|
+
|
|
26
|
+
```python
|
|
27
|
+
from model_unfolder import unfold
|
|
28
|
+
unfold("meta-llama/Meta-Llama-3-8B")
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
<p align="center">
|
|
32
|
+
<a href="examples/llama-3-8b.html">
|
|
33
|
+
<img src="examples/images/llama-3-8b.png" width="540" alt="Meta-Llama-3-8B architecture diagram">
|
|
34
|
+
</a>
|
|
35
|
+
</p>
|
|
36
|
+
|
|
37
|
+
---
|
|
38
|
+
|
|
39
|
+
## Install
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
pip install model-unfolder
|
|
43
|
+
|
|
44
|
+
# for local development
|
|
45
|
+
pip install -e .
|
|
46
|
+
pip install transformers # only required to load by model ID
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Three ways to call it
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
from model_unfolder import unfold
|
|
53
|
+
|
|
54
|
+
# 1) by HuggingFace model ID — only config.json is downloaded, never weights
|
|
55
|
+
unfold("meta-llama/Meta-Llama-3-8B")
|
|
56
|
+
unfold("deepseek-ai/DeepSeek-V3")
|
|
57
|
+
|
|
58
|
+
# 2) from a transformers AutoConfig
|
|
59
|
+
from transformers import AutoConfig
|
|
60
|
+
unfold(AutoConfig.from_pretrained("Qwen/Qwen2.5-7B", trust_remote_code=True))
|
|
61
|
+
|
|
62
|
+
# 3) from a raw config.json dict — no transformers install needed
|
|
63
|
+
import json
|
|
64
|
+
unfold(json.load(open("config.json")))
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## Built on `transformers`
|
|
68
|
+
|
|
69
|
+
Pass a model ID and `unfold` calls `transformers.AutoConfig.from_pretrained(model_id)` under the hood ([parser.py](model_unfolder/parser.py)). It only retries with `trust_remote_code=True` when Transformers says the config requires remote code.
|
|
70
|
+
|
|
71
|
+
## Auth-token from your environment
|
|
72
|
+
|
|
73
|
+
Gated models (Llama-3, Mistral, Gemma, …) need a HuggingFace token. `unfold` reuses whatever `transformers` / `huggingface_hub` already see:
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
# Either set an env var
|
|
77
|
+
export HF_TOKEN="hf_xxxxxxxx" # also accepted: HUGGING_FACE_HUB_TOKEN
|
|
78
|
+
|
|
79
|
+
# or use the CLI cache (persists across sessions)
|
|
80
|
+
huggingface-cli login
|
|
81
|
+
|
|
82
|
+
# or load a .env in your notebook
|
|
83
|
+
# >>> from dotenv import load_dotenv; load_dotenv()
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
No extra config in `model_unfolder` itself.
|
|
87
|
+
|
|
88
|
+
## Save / export
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
diagram = unfold(cfg)
|
|
92
|
+
diagram.save("model.html") # standalone interactive HTML
|
|
93
|
+
diagram.save("model.json") # IR (no rendering)
|
|
94
|
+
diagram.param_count() # {"total": ..., "active": ..., "per_layer": [...]}
|
|
95
|
+
diagram.to_ir() # full IR dict
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
Param estimates are close to published numbers — DeepSeek-V3 reports `~675B (~41B active)`, Llama-3-8B reports `8.03B`.
|
|
99
|
+
|
|
100
|
+
## Live demos
|
|
101
|
+
|
|
102
|
+
Open in any browser to interact (click blocks, expand sub-blocks, toggle layer types):
|
|
103
|
+
|
|
104
|
+
| Model | Highlights | Demo |
|
|
105
|
+
|---|---|---|
|
|
106
|
+
| Llama-3-8B | GQA + dense baseline | [examples/llama-3-8b.html](examples/llama-3-8b.html) |
|
|
107
|
+
| Mistral-7B-v0.3 | GQA + dense, 32k context | [examples/mistral-7b-v0.3.html](examples/mistral-7b-v0.3.html) |
|
|
108
|
+
| DeepSeek-V3 | MLA + dense → MoE phase change | [examples/deepseek-v3.html](examples/deepseek-v3.html) |
|
|
109
|
+
| Kimi K2 | MLA + 384-expert MoE, ~1T params | [examples/kimi-k2.html](examples/kimi-k2.html) |
|
|
110
|
+
|
|
111
|
+
## Supported architectures
|
|
112
|
+
|
|
113
|
+
| Family | Adapter | Notes |
|
|
114
|
+
|---|---|---|
|
|
115
|
+
| DeepSeek-V2 / V3 / Kimi K2 | [families/deepseek.py](model_unfolder/adapters/transformer/families/deepseek.py) | MLA + dense → MoE phase change |
|
|
116
|
+
| Llama / Mistral / Qwen2 / Qwen3 / Phi-3 | [families/llama.py](model_unfolder/adapters/transformer/families/llama.py) | GQA / MQA / MHA + dense FFN |
|
|
117
|
+
| Gemma 4 | [families/gemma4.py](model_unfolder/adapters/transformer/families/gemma4.py) | sliding/global layers, KV sharing, PLE |
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
## License
|
|
121
|
+
|
|
122
|
+
[Apache 2.0](LICENSE).
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
model_unfolder/__init__.py,sha256=tHQqiyBxVcnfLXdLzj9nVl_e1tZXeLJrSZ0h9FXgBkU,1436
|
|
2
|
+
model_unfolder/diagram.py,sha256=IqxzaCkIlUxqubQ15tTG_VYyq0fPJjEDrDoF43GvvR0,3312
|
|
3
|
+
model_unfolder/html_renderer.py,sha256=BApMwMhik8DXRs30B-Q6yxPs9wauCZRq5-34kIo2rdU,175
|
|
4
|
+
model_unfolder/ir.py,sha256=odlJo9qZMP_x-umbAvyyVw2bIxwxg5A0bAtSpg15YWc,5543
|
|
5
|
+
model_unfolder/labels.py,sha256=Sgk77kVHkNQdwutE445XLmbpZT05mDV8PgOqBSAxE18,5752
|
|
6
|
+
model_unfolder/params.py,sha256=HOgXh4P9HxTwiAjwhuIhj4ajjTitRK7rYK3JGQz_ul0,3682
|
|
7
|
+
model_unfolder/parser.py,sha256=3pST8OUzur6AdrNLRK808vkA0Kvn0dh36U3fhwfdAWM,4023
|
|
8
|
+
model_unfolder/adapters/__init__.py,sha256=hat_sdbjeFQYorLiOFzzUjnwcE_6f0CWqRfiOLsimDs,319
|
|
9
|
+
model_unfolder/adapters/custom/__init__.py,sha256=b67NtIumgv2H4mcoUi7vllXAEJPobFLccyNax3t_-gM,209
|
|
10
|
+
model_unfolder/adapters/diffusor/__init__.py,sha256=IRCb0ZqRTg6mGQE4IGmXDq5zfXOCTkIzvVcoOE7bdWI,203
|
|
11
|
+
model_unfolder/adapters/transformer/__init__.py,sha256=3u716Sm4JHKlQsg-RUNSFdTzcWA2p86Re0OTRlGOGC8,121
|
|
12
|
+
model_unfolder/adapters/transformer/assembly.py,sha256=RWdlIdv7Kw8iFVw1rd_M-tmRn_KvpuVGEg8_nd1yQuc,1534
|
|
13
|
+
model_unfolder/adapters/transformer/blocks.py,sha256=C0hcbdFKzgJDkkf3mNXjBJ3E3sltrItok8CuDsKFneU,9003
|
|
14
|
+
model_unfolder/adapters/transformer/common.py,sha256=rfjUlHrR4ro-megaQZLv3h3fbjotn1PNBjoXLSqZ7b4,1073
|
|
15
|
+
model_unfolder/adapters/transformer/families/__init__.py,sha256=HKd6kn3ZegP0Fvd_bBt2o-okMek10xgC33zmSE4-Ils,442
|
|
16
|
+
model_unfolder/adapters/transformer/families/deepseek.py,sha256=Ayd34wHNGaWnw-yVc3xv_GiS75Aafy2D72YQvSTHIF4,3874
|
|
17
|
+
model_unfolder/adapters/transformer/families/gemma4.py,sha256=1jMzdgnpacrApalsVHBmancJOp4dTrlmn-oARmMX3fk,7308
|
|
18
|
+
model_unfolder/adapters/transformer/families/llama.py,sha256=t3YBkng1y5Ue-J0m9xfa0546nObvB_sUBfyv2-KDJxY,3079
|
|
19
|
+
model_unfolder/adapters/transformer/special_parts/__init__.py,sha256=UwsfIcLipJlrXRAmzrBF6mhsVDQTaMksvWZ_0bC-bEU,41
|
|
20
|
+
model_unfolder/adapters/transformer/special_parts/per_layer_embedding.py,sha256=gYlV3W6j1W1H0Vyfn2SLiKlhUETs_skjFqO1rOMQUTU,6818
|
|
21
|
+
model_unfolder/renderers/__init__.py,sha256=1ykvzunxvgh2Isb8rfaxVuL7cU4z5uft0xL7y-MbS2c,54
|
|
22
|
+
model_unfolder/renderers/html/__init__.py,sha256=m40f45B1vwhchu6YIoFPKF_put6bVeEMxvhoQXsKwMQ,140
|
|
23
|
+
model_unfolder/renderers/html/cards.py,sha256=0kCAtoFSOb2yynMrktvQW5JFoWDCerZYxmgnsP9ftpA,5109
|
|
24
|
+
model_unfolder/renderers/html/document.py,sha256=oWJDIRYM6q2vU0qzWocq2BV2b_Wu3Pk_z6gFoRDenEo,5750
|
|
25
|
+
model_unfolder/renderers/html/interactions.py,sha256=UoA1J2DHPTxsWMDy1DcwEl1FoPA99PO34fU_1Qjz6qE,2076
|
|
26
|
+
model_unfolder/renderers/html/metadata.py,sha256=KNIUYO9I4TIxRer_9Vw98L8Lj2YS7QsLFTg_Fk4grVM,10046
|
|
27
|
+
model_unfolder/renderers/html/sections.py,sha256=Ahp_C_JykGBWIVx-g0FSQeaFb_tQ89pJLIOemJkYe88,2082
|
|
28
|
+
model_unfolder/renderers/html/styles.py,sha256=FmC7ottzXNRfJPCoPkismEOBo-cGqlxegmyWBSPbHfY,6322
|
|
29
|
+
model_unfolder/renderers/html/svg.py,sha256=wnOglNvFmcsG29sULBmz2soZesLF5P2FQRmQyikOoXs,10907
|
|
30
|
+
model_unfolder/renderers/html/theme.py,sha256=90mxafJl8H_EaUZESo6t8Lz9P_G71xY4POBjdxr5XUo,821
|
|
31
|
+
model_unfolder/renderers/html/utils.py,sha256=mOiKht9RcxdWIypw1-ZVyEPDd4vtDWtNFFQEfucQegI,602
|
|
32
|
+
model_unfolder/renderers/html/views.py,sha256=CsOaa7UMaO2yE5z-Q3a9UUEl00_Y-MSrP2h6vJSHUu0,17477
|
|
33
|
+
model_unfolder/renderers/html/block_views/__init__.py,sha256=Bku0AMi-1UIBJe1FUqQMqWelwTzys71N7vdpIRdWPsU,815
|
|
34
|
+
model_unfolder/renderers/html/block_views/attention.py,sha256=z8jOK5BzdDU_FU0kh9rpNb5EL9Gg3y1aGEC51VneboI,2769
|
|
35
|
+
model_unfolder/renderers/html/block_views/feed_forward.py,sha256=MZeH_X_4FTxr_25PZvpPXX_ExlAIYmiGKyC1PIoLv5c,8465
|
|
36
|
+
model_unfolder/renderers/html/block_views/per_layer_embedding.py,sha256=oRljarBSuWLu6ybbMwubUvfh9VaLUfneGX8yTi8kyxE,6569
|
|
37
|
+
model_unfolder-0.2.0.dist-info/licenses/LICENSE,sha256=yMO60lTafQiwR2JxgndvctUowU2R-x1qNTYdTCL8B-8,11317
|
|
38
|
+
model_unfolder-0.2.0.dist-info/METADATA,sha256=WJB3K7GFKyTZ-nYQSa2PCYSWxtkXdNawZvZYjEMYYBw,4027
|
|
39
|
+
model_unfolder-0.2.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
40
|
+
model_unfolder-0.2.0.dist-info/top_level.txt,sha256=bVwVm4fshXNJ6gUZhj1atwhytYmjm8zpoM5ijBRkrRo,15
|
|
41
|
+
model_unfolder-0.2.0.dist-info/RECORD,,
|