wasm-mcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/CHANGELOG.md +46 -0
  2. package/LICENSE +21 -0
  3. package/README.md +108 -0
  4. package/build/wasm-proposals-main.json +1 -0
  5. package/build/wasm-sections-js-api-main.json +1 -0
  6. package/build/wasm-sections-web-api-main.json +1 -0
  7. package/build/wasm-spec-core-main.json +1 -0
  8. package/dist/mcp/_args.d.ts +22 -0
  9. package/dist/mcp/_args.js +25 -0
  10. package/dist/mcp/instructions.d.ts +1 -0
  11. package/dist/mcp/instructions.js +67 -0
  12. package/dist/mcp/server.d.ts +2 -0
  13. package/dist/mcp/server.js +63 -0
  14. package/dist/mcp/tool_meta.d.ts +32 -0
  15. package/dist/mcp/tool_meta.js +100 -0
  16. package/dist/mcp/tools/instruction_get.d.ts +32 -0
  17. package/dist/mcp/tools/instruction_get.js +39 -0
  18. package/dist/mcp/tools/instruction_list.d.ts +67 -0
  19. package/dist/mcp/tools/instruction_list.js +52 -0
  20. package/dist/mcp/tools/instruction_search.d.ts +28 -0
  21. package/dist/mcp/tools/instruction_search.js +33 -0
  22. package/dist/mcp/tools/proposal_list.d.ts +51 -0
  23. package/dist/mcp/tools/proposal_list.js +44 -0
  24. package/dist/mcp/tools/section_get.d.ts +29 -0
  25. package/dist/mcp/tools/section_get.js +32 -0
  26. package/dist/mcp/tools/section_list.d.ts +49 -0
  27. package/dist/mcp/tools/section_list.js +56 -0
  28. package/dist/mcp/tools/spec_search.d.ts +35 -0
  29. package/dist/mcp/tools/spec_search.js +34 -0
  30. package/dist/mcp/tools/spec_version.d.ts +28 -0
  31. package/dist/mcp/tools/spec_version.js +30 -0
  32. package/dist/mcp/tools/type_get.d.ts +22 -0
  33. package/dist/mcp/tools/type_get.js +31 -0
  34. package/dist/parser/bikeshed.d.ts +8 -0
  35. package/dist/parser/bikeshed.js +106 -0
  36. package/dist/parser/instructions.d.ts +171 -0
  37. package/dist/parser/instructions.js +241 -0
  38. package/dist/parser/proposals.d.ts +30 -0
  39. package/dist/parser/proposals.js +188 -0
  40. package/dist/parser/sections.d.ts +27 -0
  41. package/dist/parser/sections.js +213 -0
  42. package/dist/parser/types.d.ts +37 -0
  43. package/dist/parser/types.js +116 -0
  44. package/dist/parser/upstream.d.ts +7 -0
  45. package/dist/parser/upstream.js +230 -0
  46. package/dist/paths.d.ts +3 -0
  47. package/dist/paths.js +12 -0
  48. package/dist/spec/catalog.d.ts +10 -0
  49. package/dist/spec/catalog.js +20 -0
  50. package/dist/spec/instructions_query.d.ts +46 -0
  51. package/dist/spec/instructions_query.js +120 -0
  52. package/dist/spec/pin.d.ts +13 -0
  53. package/dist/spec/pin.js +39 -0
  54. package/dist/spec/proposals_query.d.ts +15 -0
  55. package/dist/spec/proposals_query.js +23 -0
  56. package/dist/spec/sections_query.d.ts +43 -0
  57. package/dist/spec/sections_query.js +89 -0
  58. package/dist/spec/spec_data.d.ts +46 -0
  59. package/dist/spec/spec_data.js +92 -0
  60. package/dist/spec/tool_inventory.d.ts +5 -0
  61. package/dist/spec/tool_inventory.js +17 -0
  62. package/dist/versions.d.ts +12 -0
  63. package/dist/versions.js +22 -0
  64. package/package.json +76 -0
@@ -0,0 +1,213 @@
1
+ // Parse the WebAssembly core spec's reStructuredText sources into a
2
+ // flat, anchor-addressable clause index.
3
+ //
4
+ // Since March 2025 the spec is authored in SpecTec: the `.rst` files
5
+ // carry hand-written prose interleaved with SpecTec splice macros
6
+ // (`$${syntax: numtype}`, `$${rule-prose: Step_pure/nop}`,
7
+ // `${:I32}`). The OCaml SpecTec tool expands those into the formal
8
+ // grammar / typing / reduction notation at build time. We do NOT run
9
+ // SpecTec here — that keeps the build deterministic over the pinned
10
+ // SHA without an OCaml toolchain. Instead we:
11
+ //
12
+ // - keep the hand-written prose (rich for syntax/binary/text/intro/
13
+ // appendix sections),
14
+ // - record the SpecTec rule / syntax names the splices reference
15
+ // (`formal_refs`) so callers can see which formal rule a clause
16
+ // defines and follow the rendered URL for the notation itself,
17
+ // - resolve every `:ref:` cross-reference target.
18
+ //
19
+ // Each clause is addressable by any of the `.. _anchor:` labels that
20
+ // attach to it, mirroring the stable fragment ids in the rendered
21
+ // spec.
22
+ const SPEC_BASE = "https://webassembly.github.io/spec/core";
23
+ /** Map a source path + anchor to the rendered spec URL. */
24
+ export function clauseUrl(path, anchor) {
25
+ const page = `${SPEC_BASE}/${path}.html`;
26
+ return anchor ? `${page}#${anchor}` : page;
27
+ }
28
+ function slugify(title) {
29
+ return title
30
+ .toLowerCase()
31
+ .replace(/[^a-z0-9]+/g, "-")
32
+ .replace(/^-+|-+$/g, "");
33
+ }
34
+ /** Is `underline` a valid RST heading underline for `title`? */
35
+ function isUnderline(underline, title) {
36
+ if (underline.length < 1)
37
+ return false;
38
+ const ch = underline[0];
39
+ if (!"=-~^\"`+*#:.'_".includes(ch))
40
+ return false;
41
+ if (![...underline].every((c) => c === ch))
42
+ return false;
43
+ // RST requires the underline to be at least as long as the title.
44
+ return underline.length >= title.trim().length && title.trim().length > 0;
45
+ }
46
+ /**
47
+ * Strip SpecTec splice macros and RST inline roles from a prose
48
+ * block, collecting cross-reference targets and formal-rule names as
49
+ * a side effect.
50
+ */
51
+ function cleanProse(raw, crossrefs, formalRefs) {
52
+ let text = raw;
53
+ const collectRefs = (body) => {
54
+ // body like "rule-prose: Step_pure/nop", "syntax: numtype",
55
+ // "rule: {Step_pure/select-*}", or ":I32" (inline atom).
56
+ const colon = body.indexOf(":");
57
+ const payload = (colon >= 0 ? body.slice(colon + 1) : body).trim();
58
+ const names = [];
59
+ for (const ref of payload.split(/[\s,]+/)) {
60
+ const name = ref.replace(/[{}]/g, "").trim();
61
+ if (name && /[A-Za-z]/.test(name)) {
62
+ formalRefs.add(name);
63
+ names.push(name);
64
+ }
65
+ }
66
+ return names;
67
+ };
68
+ // Block-level SpecTec splices `$${...}` — whole formal blocks
69
+ // (grammar, typing/reduction rules). Record the referenced names
70
+ // and drop the block from prose. The brace body may nest one level
71
+ // (`$${rule: {Step_pure/nop}}`).
72
+ text = text.replace(/\$\$\{(?:[^{}]|\{[^{}]*\})*\}/g, (m) => {
73
+ collectRefs(m.slice(3, -1));
74
+ return "";
75
+ });
76
+ // Inline SpecTec atoms `${...}` — a single type / keyword / value
77
+ // reference rendered inline. Record the name and keep it readable
78
+ // (`${:I32}` → `I32`, `${:SELECT}` → `SELECT`).
79
+ text = text.replace(/\$\{(?:[^{}]|\{[^{}]*\})*\}/g, (m) => {
80
+ const names = collectRefs(m.slice(2, -1));
81
+ return names.join(" ");
82
+ });
83
+ // :ref:`text <target>` and :ref:`target` → keep text, record target.
84
+ text = text.replace(/:ref:`([^`<]+?)\s*<([^>]+)>`/g, (_m, label, target) => {
85
+ crossrefs.add(target.trim());
86
+ return label.trim();
87
+ });
88
+ text = text.replace(/:ref:`([^`]+)`/g, (_m, target) => {
89
+ crossrefs.add(target.trim());
90
+ return target.trim();
91
+ });
92
+ // Other RST roles: :math:`x`, :code:`x`, :token:`x`, :superscript:`x`
93
+ // → keep the inner content.
94
+ text = text.replace(/:[a-z]+:`([^`]*)`/g, "$1");
95
+ // Substitution refs: |IEEE754|_ or |foo| → strip the bars.
96
+ text = text.replace(/\|([A-Za-z0-9_]+)\|_?/g, "$1");
97
+ // Emphasis markers (keep the words).
98
+ text = text.replace(/\*\*([^*]+)\*\*/g, "$1").replace(/\*([^*]+)\*/g, "$1");
99
+ text = text.replace(/``([^`]+)``/g, "$1");
100
+ // Collapse whitespace.
101
+ return text.replace(/[ \t]+/g, " ").replace(/\n{3,}/g, "\n\n").trim();
102
+ }
103
+ /**
104
+ * Parse one RST document into clauses. `path` is the source-relative
105
+ * path without extension (e.g. `syntax/types`).
106
+ */
107
+ export function parseRst(source, path) {
108
+ const lines = source.split("\n");
109
+ // First pass: discover the order in which underline chars appear so
110
+ // we can assign heading levels the RST way (first char seen = level 1).
111
+ const charLevels = new Map();
112
+ for (let i = 0; i + 1 < lines.length; i++) {
113
+ const title = lines[i];
114
+ const underline = lines[i + 1];
115
+ if (title.trim() !== "" && isUnderline(underline.trim(), title)) {
116
+ const ch = underline.trim()[0];
117
+ if (!charLevels.has(ch))
118
+ charLevels.set(ch, charLevels.size + 1);
119
+ }
120
+ }
121
+ const blocks = [];
122
+ let pendingAnchors = [];
123
+ let current = null;
124
+ const flush = () => {
125
+ if (current)
126
+ blocks.push(current);
127
+ current = null;
128
+ };
129
+ for (let i = 0; i < lines.length; i++) {
130
+ const line = lines[i];
131
+ const trimmed = line.trim();
132
+ // Anchor label: `.. _name:`
133
+ const anchorMatch = trimmed.match(/^\.\.\s+_([A-Za-z0-9_.-]+):$/);
134
+ if (anchorMatch) {
135
+ pendingAnchors.push(anchorMatch[1]);
136
+ continue;
137
+ }
138
+ // Heading: a non-blank line followed by an underline.
139
+ const next = lines[i + 1];
140
+ if (trimmed !== "" && next !== undefined && isUnderline(next.trim(), line)) {
141
+ flush();
142
+ current = {
143
+ anchors: pendingAnchors,
144
+ title: trimmed,
145
+ level: charLevels.get(next.trim()[0]) ?? 1,
146
+ bodyLines: [],
147
+ };
148
+ pendingAnchors = [];
149
+ i++; // consume the underline line
150
+ continue;
151
+ }
152
+ // Other `.. directive::` lines (index, note, etc.). An anchor's
153
+ // content can be a bare splice/paragraph, so anchors that were
154
+ // pending and are now hitting non-heading content start an
155
+ // anchor-only block.
156
+ if (pendingAnchors.length > 0 && trimmed !== "") {
157
+ flush();
158
+ current = { anchors: pendingAnchors, title: null, level: 0, bodyLines: [] };
159
+ pendingAnchors = [];
160
+ }
161
+ if (current)
162
+ current.bodyLines.push(line);
163
+ }
164
+ flush();
165
+ // Drop pure `.. index::` / directive noise from body and build clauses.
166
+ const clauses = [];
167
+ for (const block of blocks) {
168
+ const crossrefs = new Set();
169
+ const formalRefs = new Set();
170
+ // Remove standalone directive blocks we don't surface as prose:
171
+ // `.. index::` (+ its indented continuation lines) and the
172
+ // `.. toctree::` / `.. only::` machinery. Keep `.. note::` /
173
+ // `.. warning::` bodies as prose.
174
+ const kept = [];
175
+ let skippingDirective = false;
176
+ for (const raw of block.bodyLines) {
177
+ const directive = raw.match(/^\.\.\s+([a-z-]+)::/);
178
+ if (directive) {
179
+ const name = directive[1];
180
+ skippingDirective = ["index", "toctree", "only", "math"].includes(name);
181
+ if (skippingDirective)
182
+ continue;
183
+ // Admonitions: drop the directive marker, keep following text.
184
+ continue;
185
+ }
186
+ // Indented continuation of a skipped directive.
187
+ if (skippingDirective) {
188
+ if (raw.trim() === "" || /^\s/.test(raw))
189
+ continue;
190
+ skippingDirective = false;
191
+ }
192
+ kept.push(raw);
193
+ }
194
+ const prose = cleanProse(kept.join("\n"), crossrefs, formalRefs);
195
+ const anchors = block.anchors;
196
+ const id = anchors[0] ?? (block.title ? `${path}-${slugify(block.title)}` : path);
197
+ // Skip empty connective blocks (no anchor, no title, no prose).
198
+ if (anchors.length === 0 && block.title === null && prose === "")
199
+ continue;
200
+ clauses.push({
201
+ id,
202
+ anchors,
203
+ title: block.title,
204
+ level: block.level,
205
+ path,
206
+ prose,
207
+ crossrefs: [...crossrefs],
208
+ formal_refs: [...formalRefs],
209
+ url: clauseUrl(path, anchors[0] ?? null),
210
+ });
211
+ }
212
+ return clauses;
213
+ }
@@ -0,0 +1,37 @@
1
+ import type { SpecClause } from "./sections.js";
2
+ export type TypeKind = "number" | "vector" | "reference" | "form";
3
+ export interface TypeEntry {
4
+ /** Type or type-form name, e.g. `i32`, `funcref`, `functype`. */
5
+ name: string;
6
+ /** Classification of this entry. */
7
+ kind: TypeKind;
8
+ /** Defining clause anchor, e.g. `syntax-numtype`, `syntax-functype`. */
9
+ anchor: string;
10
+ /** For category types: the sibling concrete types (e.g. the four
11
+ * number types). Empty for type forms. */
12
+ members: string[];
13
+ /** Defining clause title, e.g. `Number Types`. */
14
+ title: string | null;
15
+ /** Defining clause prose. */
16
+ prose: string;
17
+ /** SpecTec rule / syntax names referenced by the defining clause. */
18
+ formal_refs: string[];
19
+ /** Rendered spec URL for the defining clause. */
20
+ url: string;
21
+ }
22
+ /** Raw macro shape as emitted by src/parser/upstream.ts. */
23
+ interface RawMacro {
24
+ body: string;
25
+ kind: "instruction" | "type" | "other";
26
+ category: string | null;
27
+ section: string;
28
+ anchor: string;
29
+ }
30
+ /**
31
+ * Build the type catalog. `macros` is the dumped macro table;
32
+ * `typeClauses` is the parsed `syntax/types` section.
33
+ */
34
+ export declare function buildTypeCatalog(macros: Record<string, RawMacro>, typeClauses: SpecClause[]): TypeEntry[];
35
+ /** Look up one type by name (case-insensitive, exact). */
36
+ export declare function getType(catalog: TypeEntry[], name: string): TypeEntry | null;
37
+ export {};
@@ -0,0 +1,116 @@
1
+ // Build a type catalog from the macro table (concrete value-type
2
+ // names) joined with the parsed `syntax/types` section clauses
3
+ // (the prose + anchors that define each type and type form).
4
+ //
5
+ // Two kinds of entry:
6
+ // - concrete value types — `i32`, `f64`, `v128`, `funcref`, … —
7
+ // grouped under their category section (number / vector /
8
+ // reference types);
9
+ // - type forms — `functype`, `limits`, `memtype`, `rectype`, … —
10
+ // each its own clause in the types section.
11
+ // Concrete value-type macros are tagged with these anchors. Each maps
12
+ // to a catalog `kind`, the canonical category clause anchor, and an
13
+ // `accept` predicate that keeps only well-formed concrete type names.
14
+ //
15
+ // The macro table also defines meta-variable shorthands under these
16
+ // same anchors (`\INX` → `i`, `\FNX` → `f`, `\VNX` → `v`), lane
17
+ // widths (`\I128` → `i128`), and reference constructors / keywords
18
+ // (`\REF` → `ref`, `\NULL` → `null`). Those aren't value types, so
19
+ // `accept` filters them out.
20
+ const VALUE_TYPE_ANCHORS = {
21
+ "syntax-numtype": {
22
+ kind: "number",
23
+ clause: "syntax-numtype",
24
+ accept: (b) => /^[if](32|64)$/.test(b),
25
+ },
26
+ "syntax-vectype": {
27
+ kind: "vector",
28
+ clause: "syntax-vectype",
29
+ accept: (b) => b === "v128",
30
+ },
31
+ "syntax-reftype": {
32
+ kind: "reference",
33
+ clause: "syntax-reftype",
34
+ accept: (b) => b.endsWith("ref") && b !== "ref",
35
+ },
36
+ };
37
+ function findClause(clauses, anchor) {
38
+ return clauses.find((c) => c.anchors.includes(anchor));
39
+ }
40
+ /**
41
+ * Build the type catalog. `macros` is the dumped macro table;
42
+ * `typeClauses` is the parsed `syntax/types` section.
43
+ */
44
+ export function buildTypeCatalog(macros, typeClauses) {
45
+ const entries = [];
46
+ const seen = new Set();
47
+ // 1. Concrete value types, grouped by category anchor.
48
+ const byCategory = new Map();
49
+ for (const macro of Object.values(macros)) {
50
+ if (macro.kind !== "type")
51
+ continue;
52
+ const mapping = VALUE_TYPE_ANCHORS[macro.anchor];
53
+ if (!mapping)
54
+ continue; // skip syntax-shape / syntax-valtype aliases here
55
+ if (!mapping.accept(macro.body))
56
+ continue; // drop meta-vars / keywords
57
+ const list = byCategory.get(mapping.clause) ?? [];
58
+ if (!list.includes(macro.body))
59
+ list.push(macro.body);
60
+ byCategory.set(mapping.clause, list);
61
+ }
62
+ for (const [clauseAnchor, mapping] of Object.entries(VALUE_TYPE_ANCHORS)) {
63
+ const members = (byCategory.get(clauseAnchor) ?? []).sort();
64
+ const clause = findClause(typeClauses, mapping.clause);
65
+ for (const name of members) {
66
+ if (seen.has(name))
67
+ continue;
68
+ seen.add(name);
69
+ entries.push({
70
+ name,
71
+ kind: mapping.kind,
72
+ anchor: clauseAnchor,
73
+ members: members.filter((m) => m !== name),
74
+ title: clause?.title ?? null,
75
+ prose: clause?.prose ?? "",
76
+ formal_refs: clause?.formal_refs ?? [],
77
+ url: clause?.url ??
78
+ `https://webassembly.github.io/spec/core/syntax/types.html#${clauseAnchor}`,
79
+ });
80
+ }
81
+ }
82
+ // 2. Type forms — every `syntax-<form>` clause in the types section
83
+ // that isn't already a concrete value type. Covers functype,
84
+ // limits, memtype, tabletype, globaltype, rectype, heaptype,
85
+ // resulttype, blocktype, externtype, etc.
86
+ for (const clause of typeClauses) {
87
+ for (const anchor of clause.anchors) {
88
+ const m = anchor.match(/^syntax-([a-z0-9]+)$/);
89
+ if (!m)
90
+ continue;
91
+ const name = m[1];
92
+ if (seen.has(name))
93
+ continue;
94
+ // Skip the category clauses already represented by their members.
95
+ if (anchor in VALUE_TYPE_ANCHORS)
96
+ continue;
97
+ seen.add(name);
98
+ entries.push({
99
+ name,
100
+ kind: "form",
101
+ anchor,
102
+ members: [],
103
+ title: clause.title,
104
+ prose: clause.prose,
105
+ formal_refs: clause.formal_refs,
106
+ url: clause.url,
107
+ });
108
+ }
109
+ }
110
+ return entries.sort((a, b) => a.name.localeCompare(b.name));
111
+ }
112
+ /** Look up one type by name (case-insensitive, exact). */
113
+ export function getType(catalog, name) {
114
+ const needle = name.trim().toLowerCase();
115
+ return catalog.find((t) => t.name.toLowerCase() === needle) ?? null;
116
+ }
@@ -0,0 +1,7 @@
1
+ import type { RawInstruction, RawMacro, RawDump } from "./instructions.js";
2
+ export type { RawInstruction, RawMacro, RawDump } from "./instructions.js";
3
+ export declare function parseInstructions(source: string): RawInstruction[];
4
+ export declare function parseMacros(text: string): Record<string, RawMacro>;
5
+ /** Read both upstream files from a vendored checkout and produce the
6
+ * combined raw dump. */
7
+ export declare function extractRawDump(snapshotDir: string): RawDump;
@@ -0,0 +1,230 @@
1
+ // Extract the structured instruction index + macro table from the
2
+ // upstream WebAssembly/spec sources — a pure-TypeScript replacement
3
+ // for the old scripts/dump-instructions.py, so the build needs no
4
+ // Python toolchain.
5
+ //
6
+ // Two upstream files are read:
7
+ //
8
+ // document/core/appendix/index-instructions.py
9
+ // The structured source for the appendix instruction table. We do
10
+ // NOT execute it; we parse the literal `INSTRUCTIONS = [ ... ]`
11
+ // list of `Instruction(...)` calls. Each entry's args are simple
12
+ // Python literals — raw strings (`r'...'`), floats, `None`, and
13
+ // keyword args — with no nested calls, escaped quotes, or
14
+ // in-string commas (verified against the pinned source).
15
+ //
16
+ // document/core/util/macros.def
17
+ // reStructuredText `|MACRO| mathdef:: \xref{section}{anchor}
18
+ // {\K{body}}` lines. The `\K{...}` body is the rendered mnemonic
19
+ // / type string; the anchor gives the category.
20
+ import { readFileSync } from "node:fs";
21
+ import { resolve } from "node:path";
22
+ const INSTRUCTION_KEYS = [
23
+ "version",
24
+ "name",
25
+ "opcode",
26
+ "type",
27
+ "validation",
28
+ "execution",
29
+ "operator",
30
+ "validation2",
31
+ "execution2",
32
+ ];
33
+ // ─── index-instructions.py ──────────────────────────────────────────
34
+ const QUOTES = new Set(["'", '"']);
35
+ /**
36
+ * Slice out the body of the `INSTRUCTIONS = [ ... ]` list, scanning
37
+ * for the matching `]` while ignoring brackets inside string literals.
38
+ * Upstream uses both `r'...'` and `r"..."` raw strings (the latter
39
+ * when the content contains a single quote), so quote tracking keys
40
+ * off whichever quote opened the current string.
41
+ */
42
+ function sliceInstructionsList(source) {
43
+ const marker = "INSTRUCTIONS = [";
44
+ const start = source.indexOf(marker);
45
+ if (start < 0)
46
+ throw new Error("INSTRUCTIONS list not found in index-instructions.py");
47
+ let i = start + marker.length;
48
+ let depth = 1;
49
+ let quote = null;
50
+ for (; i < source.length; i++) {
51
+ const c = source[i];
52
+ if (quote) {
53
+ if (c === quote)
54
+ quote = null;
55
+ continue;
56
+ }
57
+ if (QUOTES.has(c))
58
+ quote = c;
59
+ else if (c === "[")
60
+ depth++;
61
+ else if (c === "]") {
62
+ depth--;
63
+ if (depth === 0)
64
+ return source.slice(start + marker.length, i);
65
+ }
66
+ }
67
+ throw new Error("Unterminated INSTRUCTIONS list");
68
+ }
69
+ /** Extract the inner-argument text of each `Instruction(...)` call. */
70
+ function instructionCallArgs(listBody) {
71
+ const calls = [];
72
+ const marker = "Instruction(";
73
+ let idx = 0;
74
+ while ((idx = listBody.indexOf(marker, idx)) >= 0) {
75
+ let i = idx + marker.length;
76
+ let depth = 1;
77
+ let quote = null;
78
+ const argStart = i;
79
+ for (; i < listBody.length; i++) {
80
+ const c = listBody[i];
81
+ if (quote) {
82
+ if (c === quote)
83
+ quote = null;
84
+ continue;
85
+ }
86
+ if (QUOTES.has(c))
87
+ quote = c;
88
+ else if (c === "(")
89
+ depth++;
90
+ else if (c === ")") {
91
+ depth--;
92
+ if (depth === 0)
93
+ break;
94
+ }
95
+ }
96
+ calls.push(listBody.slice(argStart, i));
97
+ idx = i + 1;
98
+ }
99
+ return calls;
100
+ }
101
+ /** Split an argument list on top-level commas (ignoring those inside
102
+ * string literals of either quote style). */
103
+ function splitArgs(argText) {
104
+ const out = [];
105
+ let cur = "";
106
+ let quote = null;
107
+ for (let i = 0; i < argText.length; i++) {
108
+ const c = argText[i];
109
+ if (quote) {
110
+ cur += c;
111
+ if (c === quote)
112
+ quote = null;
113
+ continue;
114
+ }
115
+ if (QUOTES.has(c)) {
116
+ quote = c;
117
+ cur += c;
118
+ }
119
+ else if (c === ",") {
120
+ out.push(cur);
121
+ cur = "";
122
+ }
123
+ else {
124
+ cur += c;
125
+ }
126
+ }
127
+ if (cur.trim() !== "")
128
+ out.push(cur);
129
+ return out;
130
+ }
131
+ /** Parse a single Python literal arg value: raw/plain string (either
132
+ * quote style), float, or None. */
133
+ function parseValue(tokenRaw) {
134
+ const token = tokenRaw.trim();
135
+ if (token === "None")
136
+ return null;
137
+ // String: optional `r` prefix, then '...' or "...".
138
+ const m = token.match(/^r?(['"])([\s\S]*)\1$/);
139
+ if (m)
140
+ return m[2];
141
+ const num = Number(token);
142
+ if (!Number.isNaN(num))
143
+ return num;
144
+ throw new Error(`Unparseable Instruction arg: ${tokenRaw}`);
145
+ }
146
+ export function parseInstructions(source) {
147
+ const listBody = sliceInstructionsList(source);
148
+ const out = [];
149
+ for (const callArgs of instructionCallArgs(listBody)) {
150
+ const rec = {
151
+ version: null,
152
+ name: null,
153
+ opcode: null,
154
+ type: null,
155
+ validation: null,
156
+ execution: null,
157
+ operator: null,
158
+ validation2: null,
159
+ execution2: null,
160
+ };
161
+ let positional = 0;
162
+ for (const argRaw of splitArgs(callArgs)) {
163
+ const arg = argRaw.trim();
164
+ if (arg === "")
165
+ continue;
166
+ // Keyword arg? `name=value` where name is a bare identifier.
167
+ const kw = arg.match(/^([A-Za-z_][A-Za-z0-9_]*)\s*=\s*([\s\S]+)$/);
168
+ if (kw && !kw[1].startsWith("r'")) {
169
+ const key = kw[1];
170
+ if (key in rec)
171
+ rec[key] = parseValue(kw[2]);
172
+ continue;
173
+ }
174
+ const key = INSTRUCTION_KEYS[positional++];
175
+ if (key)
176
+ rec[key] = parseValue(arg);
177
+ }
178
+ out.push(rec);
179
+ }
180
+ return out;
181
+ }
182
+ // ─── macros.def ─────────────────────────────────────────────────────
183
+ // `.. |NAME| mathdef:: \xref{section}{anchor}{\K{body}}` — body may
184
+ // nest one level of braces (e.g. `\K{local{.}get}`).
185
+ const MACRO_RE = /^\.\.\s+\|([A-Z0-9]+)\|\s+mathdef::\s+\\xref\{([^}]+)\}\{([^}]+)\}\{\\K\{((?:[^{}]+|\{[^{}]*\})+)\}\}\s*$/;
186
+ const TYPE_ANCHORS = new Set([
187
+ "syntax-numtype",
188
+ "syntax-vectype",
189
+ "syntax-reftype",
190
+ "syntax-valtype",
191
+ "syntax-shape",
192
+ ]);
193
+ function cleanBody(body) {
194
+ let cleaned = body.replace(/\\_/g, "_").replace(/\{\.\}/g, ".");
195
+ cleaned = cleaned.replace(/\\scriptstyle\s*/g, "");
196
+ cleaned = cleaned.replace(/\\;/g, "");
197
+ cleaned = cleaned.replace(/\{(\d+)\}/g, "$1");
198
+ return cleaned;
199
+ }
200
+ export function parseMacros(text) {
201
+ const macros = {};
202
+ for (const line of text.split("\n")) {
203
+ const m = line.match(MACRO_RE);
204
+ if (!m)
205
+ continue;
206
+ const [, name, section, anchor, bodyRaw] = m;
207
+ let kind = "other";
208
+ let category = null;
209
+ if (anchor.startsWith("syntax-instr-")) {
210
+ kind = "instruction";
211
+ category = anchor.slice("syntax-instr-".length);
212
+ }
213
+ else if (TYPE_ANCHORS.has(anchor)) {
214
+ kind = "type";
215
+ }
216
+ macros[name] = { body: cleanBody(bodyRaw), kind, category, section: section, anchor: anchor };
217
+ }
218
+ return macros;
219
+ }
220
+ // ─── driver ─────────────────────────────────────────────────────────
221
+ /** Read both upstream files from a vendored checkout and produce the
222
+ * combined raw dump. */
223
+ export function extractRawDump(snapshotDir) {
224
+ const instrPath = resolve(snapshotDir, "document/core/appendix/index-instructions.py");
225
+ const macrosPath = resolve(snapshotDir, "document/core/util/macros.def");
226
+ return {
227
+ instructions: parseInstructions(readFileSync(instrPath, "utf8")),
228
+ macros: parseMacros(readFileSync(macrosPath, "utf8")),
229
+ };
230
+ }
@@ -0,0 +1,3 @@
1
+ export declare const REPO_ROOT: string;
2
+ export declare const BUILD_DIR: string;
3
+ export declare const VENDOR_ROOT: string;
package/dist/paths.js ADDED
@@ -0,0 +1,12 @@
1
+ // Resolved at module load. Used by build-time scripts and the runtime
2
+ // server to locate the baked `build/` artifacts relative to the
3
+ // repository root (in dev) or the published package (in dist).
4
+ import { fileURLToPath } from "node:url";
5
+ import { dirname, resolve } from "node:path";
6
+ const HERE = dirname(fileURLToPath(import.meta.url));
7
+ // In dev: src/paths.ts → repo-root is `../`.
8
+ // In dist: dist/paths.js → repo-root is `../`.
9
+ // Either way one `..` lands us at the project root.
10
+ export const REPO_ROOT = resolve(HERE, "..");
11
+ export const BUILD_DIR = resolve(REPO_ROOT, "build");
12
+ export const VENDOR_ROOT = resolve(REPO_ROOT, "vendor");
@@ -0,0 +1,10 @@
1
+ import type { SpecVersion } from "../versions.js";
2
+ export declare const SPEC_NAMES: readonly ["core", "js-api", "web-api"];
3
+ export type SpecName = (typeof SPEC_NAMES)[number];
4
+ /** Specs that have a section index (all of them). */
5
+ export declare const SECTION_SPECS: readonly ["core", "js-api", "web-api"];
6
+ /** Filename (within build/) for the unified core snapshot. */
7
+ export declare function buildArtifactName(spec: "core", version: SpecVersion): string;
8
+ /** Filename (within build/) for an auxiliary spec's section index
9
+ * (js-api / web-api). */
10
+ export declare function sectionsArtifactName(spec: SpecName, version: SpecVersion): string;
@@ -0,0 +1,20 @@
1
+ // Spec catalog — what (spec, version) pairs the server claims to
2
+ // support, and what build artifact each one corresponds to. Kept
3
+ // dependency-free so the Cloudflare Worker can bundle it.
4
+ // The WebAssembly/spec repo carries three specifications under
5
+ // /document/. `core` is the first-class target (instructions, types,
6
+ // validation, execution, formats); `js-api` and `web-api` are the
7
+ // JavaScript + Web embedding specs, covered by the section/search
8
+ // tools.
9
+ export const SPEC_NAMES = ["core", "js-api", "web-api"];
10
+ /** Specs that have a section index (all of them). */
11
+ export const SECTION_SPECS = SPEC_NAMES;
12
+ /** Filename (within build/) for the unified core snapshot. */
13
+ export function buildArtifactName(spec, version) {
14
+ return `wasm-spec-${spec}-${version}.json`;
15
+ }
16
+ /** Filename (within build/) for an auxiliary spec's section index
17
+ * (js-api / web-api). */
18
+ export function sectionsArtifactName(spec, version) {
19
+ return `wasm-sections-${spec}-${version}.json`;
20
+ }