model-unfolder 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. model_unfolder/__init__.py +58 -0
  2. model_unfolder/adapters/__init__.py +15 -0
  3. model_unfolder/adapters/custom/__init__.py +8 -0
  4. model_unfolder/adapters/diffusor/__init__.py +8 -0
  5. model_unfolder/adapters/transformer/__init__.py +5 -0
  6. model_unfolder/adapters/transformer/assembly.py +57 -0
  7. model_unfolder/adapters/transformer/blocks.py +238 -0
  8. model_unfolder/adapters/transformer/common.py +35 -0
  9. model_unfolder/adapters/transformer/families/__init__.py +12 -0
  10. model_unfolder/adapters/transformer/families/deepseek.py +107 -0
  11. model_unfolder/adapters/transformer/families/gemma4.py +202 -0
  12. model_unfolder/adapters/transformer/families/llama.py +91 -0
  13. model_unfolder/adapters/transformer/special_parts/__init__.py +2 -0
  14. model_unfolder/adapters/transformer/special_parts/per_layer_embedding.py +220 -0
  15. model_unfolder/diagram.py +95 -0
  16. model_unfolder/html_renderer.py +5 -0
  17. model_unfolder/ir.py +163 -0
  18. model_unfolder/labels.py +166 -0
  19. model_unfolder/params.py +119 -0
  20. model_unfolder/parser.py +137 -0
  21. model_unfolder/renderers/__init__.py +1 -0
  22. model_unfolder/renderers/html/__init__.py +5 -0
  23. model_unfolder/renderers/html/block_views/__init__.py +20 -0
  24. model_unfolder/renderers/html/block_views/attention.py +91 -0
  25. model_unfolder/renderers/html/block_views/feed_forward.py +213 -0
  26. model_unfolder/renderers/html/block_views/per_layer_embedding.py +199 -0
  27. model_unfolder/renderers/html/cards.py +130 -0
  28. model_unfolder/renderers/html/document.py +157 -0
  29. model_unfolder/renderers/html/interactions.py +64 -0
  30. model_unfolder/renderers/html/metadata.py +265 -0
  31. model_unfolder/renderers/html/sections.py +60 -0
  32. model_unfolder/renderers/html/styles.py +283 -0
  33. model_unfolder/renderers/html/svg.py +349 -0
  34. model_unfolder/renderers/html/theme.py +24 -0
  35. model_unfolder/renderers/html/utils.py +28 -0
  36. model_unfolder/renderers/html/views.py +461 -0
  37. model_unfolder-0.2.0.dist-info/METADATA +122 -0
  38. model_unfolder-0.2.0.dist-info/RECORD +41 -0
  39. model_unfolder-0.2.0.dist-info/WHEEL +5 -0
  40. model_unfolder-0.2.0.dist-info/licenses/LICENSE +201 -0
  41. model_unfolder-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,461 @@
1
+ """Top-level SVG views for architecture and layer maps."""
2
+ from __future__ import annotations
3
+
4
+ from ...labels import kind_short, mask_short
5
+ from .metadata import _block_label, _indices_summary, _signature
6
+ from .svg import (
7
+ _branch_dot,
8
+ _defs,
9
+ _elbow_hv,
10
+ _input_tap,
11
+ _ids,
12
+ _plus_block,
13
+ _rect_block,
14
+ _region_rect,
15
+ _residual_loop_right,
16
+ _svg,
17
+ _svg_tag,
18
+ _svg_text,
19
+ _v_line,
20
+ )
21
+ from .theme import C, FONT_BODY, FONT_HEAD, FONT_MONO, GAP
22
+
23
+
24
+ # --- Layout vocabulary for the data-driven architecture view ----------------
25
+ # Each ``kind`` declares its glyph (rect / circle), nominal size, and font.
26
+ # A new architectural feature gets rendered by adding a kind here and tagging
27
+ # the relevant blocks in the adapter — no edits to the layout engine itself.
28
+ _KIND_LAYOUT = {
29
+ "norm": {"shape": "rect", "w": 160, "h": 36, "font": 16},
30
+ "linear": {"shape": "rect", "w": 200, "h": 38, "font": 15},
31
+ "activation": {"shape": "rect", "w": 150, "h": 36, "font": 15},
32
+ "attention": {"shape": "rect", "w": 230, "h": 60, "font": 17},
33
+ "ffn": {"shape": "rect", "w": 160, "h": 44, "font": 17},
34
+ "ple": {"shape": "rect", "w": 160, "h": 44, "font": 17},
35
+ "residual_add": {"shape": "circle", "w": 28, "h": 28, "sym": "+"},
36
+ "gate_mul": {"shape": "circle", "w": 28, "h": 28, "sym": "×"},
37
+ }
38
+ _BLOCK_GAP = 32 # vertical gap between consecutive layer-body blocks
39
+ # Larger than the arrow padding (`GAP` ×2) so the chain arrow has a visible
40
+ # stem between blocks rather than collapsing to just an arrowhead.
41
+
42
+
43
+ def _build_architecture_view(ir: dict, info: dict, mount_id: str) -> str:
44
+ """Data-driven decoder architecture view.
45
+
46
+ The layer body comes from ``info['dominant']['spec']['blocks']`` — an
47
+ ordered list of typed blocks emitted by the adapter. Each block's
48
+ ``kind`` selects a glyph in :data:`_KIND_LAYOUT`; ``residual_from`` adds
49
+ a residual loop when the bypass is not already represented by the central
50
+ chain; side-lane blocks render as parallel rails to the left or right of
51
+ the inner region.
52
+
53
+ The view grows from the block list, so side-path models get extra vertical
54
+ space while compact decoder-only models keep the same vocabulary.
55
+ """
56
+ spec = info["dominant"]["spec"]
57
+ layer_blocks = list(spec.get("blocks") or [])
58
+
59
+ # Side blocks live OFF the central column. They share a row with the block
60
+ # they feed but get their own offset x-position and explicit connections.
61
+ chain_blocks = [b for b in layer_blocks if not b.get("lane")]
62
+ side_blocks = [b for b in layer_blocks if b.get("lane")]
63
+
64
+ cx = 360
65
+ inner_x, inner_w = 110, 500
66
+
67
+ # --- 1. Compute heights from the chain block list ---
68
+ inner_padding = 60
69
+ stack_h = _layer_stack_height(chain_blocks)
70
+ inner_h = max(490, stack_h + 2 * inner_padding)
71
+
72
+ inner_y = 200
73
+ h = inner_y + inner_h + 232 # 232 = embed + tok_text + bottom padding
74
+ w = 720
75
+
76
+ arrow_id, shadow_id = _ids(mount_id, "arch")
77
+ parts = [_defs(arrow_id, shadow_id)]
78
+ parts.append(_region_rect(40, 26, w - 80, h - 52, C["bg_outer"]))
79
+ parts.append(_region_rect(inner_x, inner_y, inner_w, inner_h, C["bg_inner"]))
80
+
81
+ # --- 2. Model-level scaffold (positions tracked by total height h) ---
82
+ tok_text = _rect_block(parts, info, shadow_id, "tok_text",
83
+ cx - 110, h - 100, 220, 44,
84
+ _block_label(info, "tok_text", "Tokenized text"), font_size=17)
85
+ embed = _rect_block(parts, info, shadow_id, "embed",
86
+ cx - 130, h - 168, 260, 44,
87
+ _block_label(info, "embed", "Token Embedding layer"), font_size=17)
88
+ final_rms = _rect_block(parts, info, shadow_id, "final_rms",
89
+ cx - 90, 140, 180, 36,
90
+ _block_label(info, "final_rms", "Final RMSNorm"), font_size=16)
91
+ lm_head = _rect_block(parts, info, shadow_id, "lm_head",
92
+ cx - 130, 70, 260, 44,
93
+ _block_label(info, "lm_head", "Linear output layer"), font_size=17)
94
+
95
+ # --- 3. Layer body (data-driven, stacked bottom-up) ---
96
+ block_pos: dict[str, dict] = {}
97
+ free = inner_h - stack_h
98
+ y_cursor = inner_y + inner_h - free / 2
99
+ for block in chain_blocks:
100
+ layout = _KIND_LAYOUT.get(block["kind"]) or _KIND_LAYOUT["norm"]
101
+ block_h = layout["h"]
102
+ top = y_cursor - block_h
103
+ if layout["shape"] == "rect":
104
+ geom = _rect_block(
105
+ parts, info, shadow_id, block["id"],
106
+ cx - layout["w"] / 2, top, layout["w"], block_h,
107
+ _block_label(info, block["id"], block.get("label")),
108
+ font_size=layout["font"],
109
+ )
110
+ else:
111
+ geom = _plus_block(
112
+ parts, info, shadow_id, block["id"],
113
+ cx, top + block_h / 2, sym=layout.get("sym", "+"),
114
+ )
115
+ block_pos[block["id"]] = geom
116
+ y_cursor = top - _BLOCK_GAP
117
+
118
+ # --- 4. Linear chain arrows ---
119
+ chain = [tok_text, embed] + [block_pos[b["id"]] for b in chain_blocks] + [final_rms, lm_head]
120
+ for src, dst in zip(chain, chain[1:]):
121
+ parts.append(_v_line(src, dst, arrow_id))
122
+
123
+ # Output arrow above lm_head.
124
+ parts.append(_svg_tag("line", {
125
+ "x1": cx, "y1": lm_head["top"], "x2": cx, "y2": lm_head["top"] - 32,
126
+ "stroke": C["arrow"], "stroke-width": 1.6, "stroke-linecap": "round",
127
+ "marker-end": f"url(#{arrow_id})", "fill": "none",
128
+ }))
129
+
130
+ # --- 5. Residual loops (declared via residual_from) ---
131
+ chain_ids = [b["id"] for b in chain_blocks]
132
+ chain_prev = {block_id: chain_ids[i - 1] for i, block_id in enumerate(chain_ids[1:], start=1)}
133
+ branch_taps: set[tuple[float, float]] = set()
134
+ lane = inner_x + inner_w - 28
135
+ for block in layer_blocks:
136
+ src_id = block.get("residual_from")
137
+ if src_id and src_id in block_pos and block["id"] in block_pos:
138
+ if chain_prev.get(block["id"]) == src_id:
139
+ continue
140
+ src_geom = block_pos[src_id]
141
+ dst_geom = block_pos[block["id"]]
142
+ parts.append(_residual_loop_right(src_geom, dst_geom, lane, arrow_id))
143
+ # Junction dot at the tap point on the input-arrow stem so the
144
+ # bypass visually originates from the arrow, not from the block.
145
+ _mark_branch_tap(parts, branch_taps, _input_tap(src_geom))
146
+
147
+ # --- 6. Side blocks — placed off the central column ---
148
+ for block in side_blocks:
149
+ _draw_side_block(
150
+ parts, info, shadow_id,
151
+ block, block_pos,
152
+ inner_x, inner_w, arrow_id, branch_taps,
153
+ )
154
+
155
+ # --- 7. × N badge over the inner region ---
156
+ parts.append(_svg_tag("rect", {
157
+ "x": inner_x + inner_w - 78, "y": inner_y + 12,
158
+ "width": 66, "height": 26, "rx": 13, "ry": 13,
159
+ "fill": "rgba(255,255,255,0.65)", "stroke": C["border"], "stroke-width": 0.5,
160
+ }))
161
+ parts.append(_svg_text(
162
+ inner_x + inner_w - 45, inner_y + 25,
163
+ f"x {len(ir.get('layers', []))}",
164
+ {"text-anchor": "middle", "dominant-baseline": "central",
165
+ "fill": C["text"], "font-family": FONT_HEAD, "font-size": 20},
166
+ ))
167
+
168
+ return _svg(w, h, f"{ir.get('name', 'model')} architecture", parts)
169
+
170
+
171
+ def _layer_stack_height(layer_blocks: list[dict]) -> int:
172
+ if not layer_blocks:
173
+ return 0
174
+ total = sum(_KIND_LAYOUT.get(b["kind"], _KIND_LAYOUT["norm"])["h"] for b in layer_blocks)
175
+ total += _BLOCK_GAP * (len(layer_blocks) - 1)
176
+ return total
177
+
178
+
179
+ def _draw_side_block(
180
+ parts: list[str],
181
+ info: dict,
182
+ shadow_id: str,
183
+ block: dict,
184
+ block_pos: dict,
185
+ inner_x: float,
186
+ inner_w: float,
187
+ arrow_id: str,
188
+ branch_taps: set[tuple[float, float]],
189
+ ) -> None:
190
+ """Render a block that lives OFF the central chain.
191
+
192
+ The block is drawn at the y-row of whatever it ``feeds``, offset to the
193
+ declared ``lane`` (left/right). Its input is a long arrow tapping the
194
+ chain at the bottom of the ``tap_from`` block; its output is a short
195
+ horizontal arrow into the ``feeds`` target.
196
+ """
197
+ layout = _KIND_LAYOUT.get(block["kind"]) or _KIND_LAYOUT["norm"]
198
+ block_w = layout["w"]
199
+ block_h = layout["h"]
200
+ lane = block.get("lane", "left")
201
+ feeds_id = block.get("feeds")
202
+ tap_id = block.get("tap_from")
203
+
204
+ feeds_geom = block_pos.get(feeds_id) if feeds_id else None
205
+ tap_geom = block_pos.get(tap_id) if tap_id else None
206
+ if not feeds_geom or not tap_geom:
207
+ return # mis-declared; nothing to anchor to
208
+
209
+ # Side block sits at the same y as the block it feeds, shifted left/right.
210
+ cy = feeds_geom["cy"]
211
+ if lane == "left":
212
+ block_x = inner_x + 30
213
+ else:
214
+ block_x = inner_x + inner_w - 30 - block_w
215
+ top = cy - block_h / 2
216
+
217
+ geom = _rect_block(
218
+ parts, info, shadow_id, block["id"],
219
+ block_x, top, block_w, block_h,
220
+ _block_label(info, block["id"], block.get("label")),
221
+ font_size=layout["font"],
222
+ )
223
+ block_pos[block["id"]] = geom
224
+
225
+ # --- Input: long arrow up the side, tapping the chain at tap_from's input
226
+ # stem (so the visual reads "the same x flowing into the layer also
227
+ # feeds this side block"). Routed as a rounded L-bend.
228
+ rail_x = geom["cx"]
229
+ tap_x, tap_y = _input_tap(tap_geom)
230
+ parts.append(_elbow_hv(tap_x, tap_y, rail_x, geom["bottom"] + GAP, arrow_id))
231
+ _mark_branch_tap(parts, branch_taps, (tap_x, tap_y))
232
+
233
+ # --- Output: short horizontal arrow into feeds target.
234
+ if lane == "left":
235
+ x1 = geom["right"]
236
+ x2 = feeds_geom["left"] - GAP
237
+ else:
238
+ x1 = geom["left"]
239
+ x2 = feeds_geom["right"] + GAP
240
+ parts.append(_svg_tag("line", {
241
+ "x1": x1, "y1": cy, "x2": x2, "y2": cy,
242
+ "stroke": C["arrow"], "stroke-width": 1.6, "stroke-linecap": "round",
243
+ "marker-end": f"url(#{arrow_id})", "fill": "none",
244
+ }))
245
+
246
+
247
+ def _mark_branch_tap(
248
+ parts: list[str],
249
+ branch_taps: set[tuple[float, float]],
250
+ tap: tuple[float, float],
251
+ ) -> None:
252
+ key = (round(tap[0], 3), round(tap[1], 3))
253
+ if key in branch_taps:
254
+ return
255
+ branch_taps.add(key)
256
+ parts.append(_branch_dot(*tap))
257
+
258
+
259
+ def _build_layer_map(ir: dict, info: dict, mount_id: str) -> str:
260
+ w = 720
261
+ layers = ir.get("layers", [])
262
+ kv_shared_indices = [
263
+ i for i, layer in enumerate(layers)
264
+ if (layer.get("attention") or {}).get("kv_source_layer") is not None
265
+ ]
266
+ has_kv_share = bool(kv_shared_indices)
267
+ n_legend_rows = len(info["groups"]) + (1 if has_kv_share else 0)
268
+ # Reserve extra room for the optional "KV CACHE" sub-strip and its annotation.
269
+ extra = 56 if has_kv_share else 0
270
+ h = max(240, 160 + extra + 22 * n_legend_rows)
271
+ arrow_id, shadow_id = _ids(mount_id, "map")
272
+ parts = [_defs(arrow_id, shadow_id)]
273
+ parts.append(_hatch_pattern(mount_id))
274
+ parts.append(_region_rect(40, 30, w - 80, h - 60, C["bg_card"], stroke=C["border"], stroke_width=0.5))
275
+
276
+ # Green-family palette so the layer map shares the diagram's theme.
277
+ # Ordered dark → light so consecutive groups read like a gradient step.
278
+ palette = ["#0F6E56", "#1F9E78", "#5BB89A", "#0A4F3F", "#7FCFB4", "#0E5C48", "#A0E3CD"]
279
+ sig_to_color = {group["sig"]: palette[i % len(palette)] for i, group in enumerate(info["groups"])}
280
+
281
+ strip_x, strip_y, strip_w, strip_h = 80, 90, w - 160, 36
282
+ n = len(layers)
283
+ col_w = strip_w / max(n, 1)
284
+
285
+ layer_sigs = info.get("layer_sigs") or [_signature(layer) for layer in layers]
286
+ for i, sig in enumerate(layer_sigs):
287
+ parts.append(
288
+ _svg_tag(
289
+ "rect",
290
+ {
291
+ "x": strip_x + i * col_w,
292
+ "y": strip_y,
293
+ "width": max(col_w - 0.5, 1),
294
+ "height": strip_h,
295
+ "fill": sig_to_color.get(sig, palette[0]),
296
+ "opacity": 0.95,
297
+ },
298
+ )
299
+ )
300
+
301
+ # KV-share overlay — diagonal hatch on layers that don't compute their own K/V.
302
+ for i in kv_shared_indices:
303
+ parts.append(
304
+ _svg_tag(
305
+ "rect",
306
+ {
307
+ "x": strip_x + i * col_w,
308
+ "y": strip_y,
309
+ "width": max(col_w - 0.5, 1),
310
+ "height": strip_h,
311
+ "fill": f"url(#uf-{mount_id}-hatch)",
312
+ "pointer-events": "none",
313
+ },
314
+ )
315
+ )
316
+
317
+ parts.append(
318
+ _svg_tag(
319
+ "rect",
320
+ {
321
+ "x": strip_x,
322
+ "y": strip_y,
323
+ "width": strip_w,
324
+ "height": strip_h,
325
+ "fill": "none",
326
+ "stroke": C["text"],
327
+ "stroke-width": 0.4,
328
+ "rx": 4,
329
+ "ry": 4,
330
+ },
331
+ )
332
+ )
333
+
334
+ if n:
335
+ for idx in (0, n - 1):
336
+ x = strip_x + (idx + 0.5) * col_w
337
+ parts.append(
338
+ _svg_text(
339
+ x,
340
+ strip_y + strip_h + 16,
341
+ f"L{idx}",
342
+ {"text-anchor": "middle", "fill": C["muted"], "font-family": FONT_MONO, "font-size": 10},
343
+ )
344
+ )
345
+
346
+ type_word = "type" if len(info["groups"]) == 1 else "types"
347
+ parts.append(
348
+ _svg_text(
349
+ strip_x,
350
+ 70,
351
+ f"{n} layers - {len(info['groups'])} {type_word}",
352
+ {"fill": C["text"], "font-family": FONT_BODY, "font-size": 12, "font-weight": 600},
353
+ )
354
+ )
355
+
356
+ legend_y = strip_y + strip_h + 44
357
+
358
+ if has_kv_share:
359
+ first = kv_shared_indices[0]
360
+ last = kv_shared_indices[-1]
361
+ # Bracket above the strip marking where KV reuse kicks in.
362
+ bracket_y = strip_y - 8
363
+ x_start = strip_x + first * col_w
364
+ x_end = strip_x + (last + 1) * col_w - 0.5
365
+ parts.append(
366
+ _svg_tag(
367
+ "path",
368
+ {
369
+ "d": f"M {x_start} {bracket_y - 6} L {x_start} {bracket_y} L {x_end} {bracket_y} L {x_end} {bracket_y - 6}",
370
+ "fill": "none",
371
+ "stroke": C["muted"],
372
+ "stroke-width": 1.0,
373
+ "stroke-linecap": "round",
374
+ },
375
+ )
376
+ )
377
+ # Sources of the K/V tensors — collected from cross-layer edges.
378
+ edges = ir.get("cross_layer_edges") or []
379
+ kv_sources = sorted({e.get("from_layer") for e in edges if e.get("kind") == "kv_share"})
380
+ src_summary = (
381
+ f"L{kv_sources[0]}–L{kv_sources[-1]}" if len(kv_sources) > 1
382
+ else (f"L{kv_sources[0]}" if kv_sources else "earlier layer")
383
+ )
384
+ share_label = (
385
+ f"K/V reused: L{first}–L{last} ({len(kv_shared_indices)} layers) ← {src_summary}"
386
+ )
387
+ parts.append(
388
+ _svg_text(
389
+ (x_start + x_end) / 2,
390
+ bracket_y - 12,
391
+ share_label,
392
+ {"text-anchor": "middle", "fill": C["muted"], "font-family": FONT_MONO, "font-size": 10},
393
+ )
394
+ )
395
+ legend_y += 8
396
+
397
+ lx, ly = strip_x, legend_y
398
+ for group in info["groups"]:
399
+ spec = group["spec"]
400
+ ffn_kind = "MoE" if spec["ffn"].get("kind") == "moe" else "Dense"
401
+ attn = spec.get("attention", {})
402
+ label = (
403
+ f"{kind_short(attn)} + {ffn_kind} ({mask_short(attn)})"
404
+ f" · {_indices_summary(group, info)}"
405
+ )
406
+ color = sig_to_color[group["sig"]]
407
+ parts.append(_svg_tag("rect", {"x": lx, "y": ly - 9, "width": 12, "height": 12, "fill": color, "rx": 2}))
408
+ parts.append(
409
+ _svg_text(
410
+ lx + 18,
411
+ ly,
412
+ label,
413
+ {"dominant-baseline": "central", "fill": C["text"], "font-family": FONT_BODY, "font-size": 12},
414
+ )
415
+ )
416
+ ly += 20
417
+
418
+ if has_kv_share:
419
+ # Hatched chip in the legend.
420
+ parts.append(
421
+ _svg_tag(
422
+ "rect",
423
+ {"x": lx, "y": ly - 9, "width": 12, "height": 12, "fill": palette[0], "rx": 2},
424
+ )
425
+ )
426
+ parts.append(
427
+ _svg_tag(
428
+ "rect",
429
+ {
430
+ "x": lx,
431
+ "y": ly - 9,
432
+ "width": 12,
433
+ "height": 12,
434
+ "fill": f"url(#uf-{mount_id}-hatch)",
435
+ "rx": 2,
436
+ },
437
+ )
438
+ )
439
+ parts.append(
440
+ _svg_text(
441
+ lx + 18,
442
+ ly,
443
+ f"K/V reused (no own K/V projections) · {len(kv_shared_indices)} layers",
444
+ {"dominant-baseline": "central", "fill": C["text"], "font-family": FONT_BODY, "font-size": 12},
445
+ )
446
+ )
447
+
448
+ return _svg(w, h, f"{ir.get('name', 'model')} layer map", parts)
449
+
450
+
451
+ def _hatch_pattern(mount_id: str) -> str:
452
+ """Diagonal-stripe pattern used to mark KV-shared layers."""
453
+ pid = f"uf-{mount_id}-hatch"
454
+ return (
455
+ '<defs>'
456
+ f'<pattern id="{pid}" patternUnits="userSpaceOnUse" width="6" height="6" patternTransform="rotate(45)">'
457
+ '<rect width="6" height="6" fill="none"/>'
458
+ '<line x1="0" y1="0" x2="0" y2="6" stroke="rgba(255,255,255,0.55)" stroke-width="2"/>'
459
+ '</pattern>'
460
+ '</defs>'
461
+ )
@@ -0,0 +1,122 @@
1
+ Metadata-Version: 2.4
2
+ Name: model-unfolder
3
+ Version: 0.2.0
4
+ Summary: Unfold any HuggingFace transformer into an interactive architecture diagram, inline in Jupyter.
5
+ Author: model-unfolder contributors
6
+ License: Apache-2.0
7
+ Keywords: transformers,visualization,llm,architecture,jupyter
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: Apache Software License
10
+ Classifier: Framework :: Jupyter
11
+ Classifier: Topic :: Scientific/Engineering :: Visualization
12
+ Requires-Python: >=3.9
13
+ Description-Content-Type: text/markdown
14
+ License-File: LICENSE
15
+ Provides-Extra: hf
16
+ Requires-Dist: transformers>=4.40; extra == "hf"
17
+ Provides-Extra: dev
18
+ Requires-Dist: pytest>=7; extra == "dev"
19
+ Requires-Dist: transformers>=4.40; extra == "dev"
20
+ Dynamic: license-file
21
+
22
+ # MODEL UNFOLDER
23
+
24
+ > your one click model unfolder
25
+
26
+ ```python
27
+ from model_unfolder import unfold
28
+ unfold("meta-llama/Meta-Llama-3-8B")
29
+ ```
30
+
31
+ <p align="center">
32
+ <a href="examples/llama-3-8b.html">
33
+ <img src="examples/images/llama-3-8b.png" width="540" alt="Meta-Llama-3-8B architecture diagram">
34
+ </a>
35
+ </p>
36
+
37
+ ---
38
+
39
+ ## Install
40
+
41
+ ```bash
42
+ pip install model-unfolder
43
+
44
+ # for local development
45
+ pip install -e .
46
+ pip install transformers # only required to load by model ID
47
+ ```
48
+
49
+ ## Three ways to call it
50
+
51
+ ```python
52
+ from model_unfolder import unfold
53
+
54
+ # 1) by HuggingFace model ID — only config.json is downloaded, never weights
55
+ unfold("meta-llama/Meta-Llama-3-8B")
56
+ unfold("deepseek-ai/DeepSeek-V3")
57
+
58
+ # 2) from a transformers AutoConfig
59
+ from transformers import AutoConfig
60
+ unfold(AutoConfig.from_pretrained("Qwen/Qwen2.5-7B", trust_remote_code=True))
61
+
62
+ # 3) from a raw config.json dict — no transformers install needed
63
+ import json
64
+ unfold(json.load(open("config.json")))
65
+ ```
66
+
67
+ ## Built on `transformers`
68
+
69
+ Pass a model ID and `unfold` calls `transformers.AutoConfig.from_pretrained(model_id)` under the hood ([parser.py](model_unfolder/parser.py)). It only retries with `trust_remote_code=True` when Transformers says the config requires remote code.
70
+
71
+ ## Auth-token from your environment
72
+
73
+ Gated models (Llama-3, Mistral, Gemma, …) need a HuggingFace token. `unfold` reuses whatever `transformers` / `huggingface_hub` already see:
74
+
75
+ ```bash
76
+ # Either set an env var
77
+ export HF_TOKEN="hf_xxxxxxxx" # also accepted: HUGGING_FACE_HUB_TOKEN
78
+
79
+ # or use the CLI cache (persists across sessions)
80
+ huggingface-cli login
81
+
82
+ # or load a .env in your notebook
83
+ # >>> from dotenv import load_dotenv; load_dotenv()
84
+ ```
85
+
86
+ No extra config in `model_unfolder` itself.
87
+
88
+ ## Save / export
89
+
90
+ ```python
91
+ diagram = unfold(cfg)
92
+ diagram.save("model.html") # standalone interactive HTML
93
+ diagram.save("model.json") # IR (no rendering)
94
+ diagram.param_count() # {"total": ..., "active": ..., "per_layer": [...]}
95
+ diagram.to_ir() # full IR dict
96
+ ```
97
+
98
+ Param estimates are close to published numbers — DeepSeek-V3 reports `~675B (~41B active)`, Llama-3-8B reports `8.03B`.
99
+
100
+ ## Live demos
101
+
102
+ Open in any browser to interact (click blocks, expand sub-blocks, toggle layer types):
103
+
104
+ | Model | Highlights | Demo |
105
+ |---|---|---|
106
+ | Llama-3-8B | GQA + dense baseline | [examples/llama-3-8b.html](examples/llama-3-8b.html) |
107
+ | Mistral-7B-v0.3 | GQA + dense, 32k context | [examples/mistral-7b-v0.3.html](examples/mistral-7b-v0.3.html) |
108
+ | DeepSeek-V3 | MLA + dense → MoE phase change | [examples/deepseek-v3.html](examples/deepseek-v3.html) |
109
+ | Kimi K2 | MLA + 384-expert MoE, ~1T params | [examples/kimi-k2.html](examples/kimi-k2.html) |
110
+
111
+ ## Supported architectures
112
+
113
+ | Family | Adapter | Notes |
114
+ |---|---|---|
115
+ | DeepSeek-V2 / V3 / Kimi K2 | [families/deepseek.py](model_unfolder/adapters/transformer/families/deepseek.py) | MLA + dense → MoE phase change |
116
+ | Llama / Mistral / Qwen2 / Qwen3 / Phi-3 | [families/llama.py](model_unfolder/adapters/transformer/families/llama.py) | GQA / MQA / MHA + dense FFN |
117
+ | Gemma 4 | [families/gemma4.py](model_unfolder/adapters/transformer/families/gemma4.py) | sliding/global layers, KV sharing, PLE |
118
+
119
+
120
+ ## License
121
+
122
+ [Apache 2.0](LICENSE).
@@ -0,0 +1,41 @@
1
+ model_unfolder/__init__.py,sha256=tHQqiyBxVcnfLXdLzj9nVl_e1tZXeLJrSZ0h9FXgBkU,1436
2
+ model_unfolder/diagram.py,sha256=IqxzaCkIlUxqubQ15tTG_VYyq0fPJjEDrDoF43GvvR0,3312
3
+ model_unfolder/html_renderer.py,sha256=BApMwMhik8DXRs30B-Q6yxPs9wauCZRq5-34kIo2rdU,175
4
+ model_unfolder/ir.py,sha256=odlJo9qZMP_x-umbAvyyVw2bIxwxg5A0bAtSpg15YWc,5543
5
+ model_unfolder/labels.py,sha256=Sgk77kVHkNQdwutE445XLmbpZT05mDV8PgOqBSAxE18,5752
6
+ model_unfolder/params.py,sha256=HOgXh4P9HxTwiAjwhuIhj4ajjTitRK7rYK3JGQz_ul0,3682
7
+ model_unfolder/parser.py,sha256=3pST8OUzur6AdrNLRK808vkA0Kvn0dh36U3fhwfdAWM,4023
8
+ model_unfolder/adapters/__init__.py,sha256=hat_sdbjeFQYorLiOFzzUjnwcE_6f0CWqRfiOLsimDs,319
9
+ model_unfolder/adapters/custom/__init__.py,sha256=b67NtIumgv2H4mcoUi7vllXAEJPobFLccyNax3t_-gM,209
10
+ model_unfolder/adapters/diffusor/__init__.py,sha256=IRCb0ZqRTg6mGQE4IGmXDq5zfXOCTkIzvVcoOE7bdWI,203
11
+ model_unfolder/adapters/transformer/__init__.py,sha256=3u716Sm4JHKlQsg-RUNSFdTzcWA2p86Re0OTRlGOGC8,121
12
+ model_unfolder/adapters/transformer/assembly.py,sha256=RWdlIdv7Kw8iFVw1rd_M-tmRn_KvpuVGEg8_nd1yQuc,1534
13
+ model_unfolder/adapters/transformer/blocks.py,sha256=C0hcbdFKzgJDkkf3mNXjBJ3E3sltrItok8CuDsKFneU,9003
14
+ model_unfolder/adapters/transformer/common.py,sha256=rfjUlHrR4ro-megaQZLv3h3fbjotn1PNBjoXLSqZ7b4,1073
15
+ model_unfolder/adapters/transformer/families/__init__.py,sha256=HKd6kn3ZegP0Fvd_bBt2o-okMek10xgC33zmSE4-Ils,442
16
+ model_unfolder/adapters/transformer/families/deepseek.py,sha256=Ayd34wHNGaWnw-yVc3xv_GiS75Aafy2D72YQvSTHIF4,3874
17
+ model_unfolder/adapters/transformer/families/gemma4.py,sha256=1jMzdgnpacrApalsVHBmancJOp4dTrlmn-oARmMX3fk,7308
18
+ model_unfolder/adapters/transformer/families/llama.py,sha256=t3YBkng1y5Ue-J0m9xfa0546nObvB_sUBfyv2-KDJxY,3079
19
+ model_unfolder/adapters/transformer/special_parts/__init__.py,sha256=UwsfIcLipJlrXRAmzrBF6mhsVDQTaMksvWZ_0bC-bEU,41
20
+ model_unfolder/adapters/transformer/special_parts/per_layer_embedding.py,sha256=gYlV3W6j1W1H0Vyfn2SLiKlhUETs_skjFqO1rOMQUTU,6818
21
+ model_unfolder/renderers/__init__.py,sha256=1ykvzunxvgh2Isb8rfaxVuL7cU4z5uft0xL7y-MbS2c,54
22
+ model_unfolder/renderers/html/__init__.py,sha256=m40f45B1vwhchu6YIoFPKF_put6bVeEMxvhoQXsKwMQ,140
23
+ model_unfolder/renderers/html/cards.py,sha256=0kCAtoFSOb2yynMrktvQW5JFoWDCerZYxmgnsP9ftpA,5109
24
+ model_unfolder/renderers/html/document.py,sha256=oWJDIRYM6q2vU0qzWocq2BV2b_Wu3Pk_z6gFoRDenEo,5750
25
+ model_unfolder/renderers/html/interactions.py,sha256=UoA1J2DHPTxsWMDy1DcwEl1FoPA99PO34fU_1Qjz6qE,2076
26
+ model_unfolder/renderers/html/metadata.py,sha256=KNIUYO9I4TIxRer_9Vw98L8Lj2YS7QsLFTg_Fk4grVM,10046
27
+ model_unfolder/renderers/html/sections.py,sha256=Ahp_C_JykGBWIVx-g0FSQeaFb_tQ89pJLIOemJkYe88,2082
28
+ model_unfolder/renderers/html/styles.py,sha256=FmC7ottzXNRfJPCoPkismEOBo-cGqlxegmyWBSPbHfY,6322
29
+ model_unfolder/renderers/html/svg.py,sha256=wnOglNvFmcsG29sULBmz2soZesLF5P2FQRmQyikOoXs,10907
30
+ model_unfolder/renderers/html/theme.py,sha256=90mxafJl8H_EaUZESo6t8Lz9P_G71xY4POBjdxr5XUo,821
31
+ model_unfolder/renderers/html/utils.py,sha256=mOiKht9RcxdWIypw1-ZVyEPDd4vtDWtNFFQEfucQegI,602
32
+ model_unfolder/renderers/html/views.py,sha256=CsOaa7UMaO2yE5z-Q3a9UUEl00_Y-MSrP2h6vJSHUu0,17477
33
+ model_unfolder/renderers/html/block_views/__init__.py,sha256=Bku0AMi-1UIBJe1FUqQMqWelwTzys71N7vdpIRdWPsU,815
34
+ model_unfolder/renderers/html/block_views/attention.py,sha256=z8jOK5BzdDU_FU0kh9rpNb5EL9Gg3y1aGEC51VneboI,2769
35
+ model_unfolder/renderers/html/block_views/feed_forward.py,sha256=MZeH_X_4FTxr_25PZvpPXX_ExlAIYmiGKyC1PIoLv5c,8465
36
+ model_unfolder/renderers/html/block_views/per_layer_embedding.py,sha256=oRljarBSuWLu6ybbMwubUvfh9VaLUfneGX8yTi8kyxE,6569
37
+ model_unfolder-0.2.0.dist-info/licenses/LICENSE,sha256=yMO60lTafQiwR2JxgndvctUowU2R-x1qNTYdTCL8B-8,11317
38
+ model_unfolder-0.2.0.dist-info/METADATA,sha256=WJB3K7GFKyTZ-nYQSa2PCYSWxtkXdNawZvZYjEMYYBw,4027
39
+ model_unfolder-0.2.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
40
+ model_unfolder-0.2.0.dist-info/top_level.txt,sha256=bVwVm4fshXNJ6gUZhj1atwhytYmjm8zpoM5ijBRkrRo,15
41
+ model_unfolder-0.2.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+