wasm-mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +46 -0
- package/LICENSE +21 -0
- package/README.md +108 -0
- package/build/wasm-proposals-main.json +1 -0
- package/build/wasm-sections-js-api-main.json +1 -0
- package/build/wasm-sections-web-api-main.json +1 -0
- package/build/wasm-spec-core-main.json +1 -0
- package/dist/mcp/_args.d.ts +22 -0
- package/dist/mcp/_args.js +25 -0
- package/dist/mcp/instructions.d.ts +1 -0
- package/dist/mcp/instructions.js +67 -0
- package/dist/mcp/server.d.ts +2 -0
- package/dist/mcp/server.js +63 -0
- package/dist/mcp/tool_meta.d.ts +32 -0
- package/dist/mcp/tool_meta.js +100 -0
- package/dist/mcp/tools/instruction_get.d.ts +32 -0
- package/dist/mcp/tools/instruction_get.js +39 -0
- package/dist/mcp/tools/instruction_list.d.ts +67 -0
- package/dist/mcp/tools/instruction_list.js +52 -0
- package/dist/mcp/tools/instruction_search.d.ts +28 -0
- package/dist/mcp/tools/instruction_search.js +33 -0
- package/dist/mcp/tools/proposal_list.d.ts +51 -0
- package/dist/mcp/tools/proposal_list.js +44 -0
- package/dist/mcp/tools/section_get.d.ts +29 -0
- package/dist/mcp/tools/section_get.js +32 -0
- package/dist/mcp/tools/section_list.d.ts +49 -0
- package/dist/mcp/tools/section_list.js +56 -0
- package/dist/mcp/tools/spec_search.d.ts +35 -0
- package/dist/mcp/tools/spec_search.js +34 -0
- package/dist/mcp/tools/spec_version.d.ts +28 -0
- package/dist/mcp/tools/spec_version.js +30 -0
- package/dist/mcp/tools/type_get.d.ts +22 -0
- package/dist/mcp/tools/type_get.js +31 -0
- package/dist/parser/bikeshed.d.ts +8 -0
- package/dist/parser/bikeshed.js +106 -0
- package/dist/parser/instructions.d.ts +171 -0
- package/dist/parser/instructions.js +241 -0
- package/dist/parser/proposals.d.ts +30 -0
- package/dist/parser/proposals.js +188 -0
- package/dist/parser/sections.d.ts +27 -0
- package/dist/parser/sections.js +213 -0
- package/dist/parser/types.d.ts +37 -0
- package/dist/parser/types.js +116 -0
- package/dist/parser/upstream.d.ts +7 -0
- package/dist/parser/upstream.js +230 -0
- package/dist/paths.d.ts +3 -0
- package/dist/paths.js +12 -0
- package/dist/spec/catalog.d.ts +10 -0
- package/dist/spec/catalog.js +20 -0
- package/dist/spec/instructions_query.d.ts +46 -0
- package/dist/spec/instructions_query.js +120 -0
- package/dist/spec/pin.d.ts +13 -0
- package/dist/spec/pin.js +39 -0
- package/dist/spec/proposals_query.d.ts +15 -0
- package/dist/spec/proposals_query.js +23 -0
- package/dist/spec/sections_query.d.ts +43 -0
- package/dist/spec/sections_query.js +89 -0
- package/dist/spec/spec_data.d.ts +46 -0
- package/dist/spec/spec_data.js +92 -0
- package/dist/spec/tool_inventory.d.ts +5 -0
- package/dist/spec/tool_inventory.js +17 -0
- package/dist/versions.d.ts +12 -0
- package/dist/versions.js +22 -0
- package/package.json +76 -0
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
// Parse the WebAssembly core spec's reStructuredText sources into a
|
|
2
|
+
// flat, anchor-addressable clause index.
|
|
3
|
+
//
|
|
4
|
+
// Since March 2025 the spec is authored in SpecTec: the `.rst` files
|
|
5
|
+
// carry hand-written prose interleaved with SpecTec splice macros
|
|
6
|
+
// (`$${syntax: numtype}`, `$${rule-prose: Step_pure/nop}`,
|
|
7
|
+
// `${:I32}`). The OCaml SpecTec tool expands those into the formal
|
|
8
|
+
// grammar / typing / reduction notation at build time. We do NOT run
|
|
9
|
+
// SpecTec here — that keeps the build deterministic over the pinned
|
|
10
|
+
// SHA without an OCaml toolchain. Instead we:
|
|
11
|
+
//
|
|
12
|
+
// - keep the hand-written prose (rich for syntax/binary/text/intro/
|
|
13
|
+
// appendix sections),
|
|
14
|
+
// - record the SpecTec rule / syntax names the splices reference
|
|
15
|
+
// (`formal_refs`) so callers can see which formal rule a clause
|
|
16
|
+
// defines and follow the rendered URL for the notation itself,
|
|
17
|
+
// - resolve every `:ref:` cross-reference target.
|
|
18
|
+
//
|
|
19
|
+
// Each clause is addressable by any of the `.. _anchor:` labels that
|
|
20
|
+
// attach to it, mirroring the stable fragment ids in the rendered
|
|
21
|
+
// spec.
|
|
22
|
+
const SPEC_BASE = "https://webassembly.github.io/spec/core";
|
|
23
|
+
/** Map a source path + anchor to the rendered spec URL. */
|
|
24
|
+
export function clauseUrl(path, anchor) {
|
|
25
|
+
const page = `${SPEC_BASE}/${path}.html`;
|
|
26
|
+
return anchor ? `${page}#${anchor}` : page;
|
|
27
|
+
}
|
|
28
|
+
function slugify(title) {
|
|
29
|
+
return title
|
|
30
|
+
.toLowerCase()
|
|
31
|
+
.replace(/[^a-z0-9]+/g, "-")
|
|
32
|
+
.replace(/^-+|-+$/g, "");
|
|
33
|
+
}
|
|
34
|
+
/** Is `underline` a valid RST heading underline for `title`? */
|
|
35
|
+
function isUnderline(underline, title) {
|
|
36
|
+
if (underline.length < 1)
|
|
37
|
+
return false;
|
|
38
|
+
const ch = underline[0];
|
|
39
|
+
if (!"=-~^\"`+*#:.'_".includes(ch))
|
|
40
|
+
return false;
|
|
41
|
+
if (![...underline].every((c) => c === ch))
|
|
42
|
+
return false;
|
|
43
|
+
// RST requires the underline to be at least as long as the title.
|
|
44
|
+
return underline.length >= title.trim().length && title.trim().length > 0;
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Strip SpecTec splice macros and RST inline roles from a prose
|
|
48
|
+
* block, collecting cross-reference targets and formal-rule names as
|
|
49
|
+
* a side effect.
|
|
50
|
+
*/
|
|
51
|
+
function cleanProse(raw, crossrefs, formalRefs) {
|
|
52
|
+
let text = raw;
|
|
53
|
+
const collectRefs = (body) => {
|
|
54
|
+
// body like "rule-prose: Step_pure/nop", "syntax: numtype",
|
|
55
|
+
// "rule: {Step_pure/select-*}", or ":I32" (inline atom).
|
|
56
|
+
const colon = body.indexOf(":");
|
|
57
|
+
const payload = (colon >= 0 ? body.slice(colon + 1) : body).trim();
|
|
58
|
+
const names = [];
|
|
59
|
+
for (const ref of payload.split(/[\s,]+/)) {
|
|
60
|
+
const name = ref.replace(/[{}]/g, "").trim();
|
|
61
|
+
if (name && /[A-Za-z]/.test(name)) {
|
|
62
|
+
formalRefs.add(name);
|
|
63
|
+
names.push(name);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
return names;
|
|
67
|
+
};
|
|
68
|
+
// Block-level SpecTec splices `$${...}` — whole formal blocks
|
|
69
|
+
// (grammar, typing/reduction rules). Record the referenced names
|
|
70
|
+
// and drop the block from prose. The brace body may nest one level
|
|
71
|
+
// (`$${rule: {Step_pure/nop}}`).
|
|
72
|
+
text = text.replace(/\$\$\{(?:[^{}]|\{[^{}]*\})*\}/g, (m) => {
|
|
73
|
+
collectRefs(m.slice(3, -1));
|
|
74
|
+
return "";
|
|
75
|
+
});
|
|
76
|
+
// Inline SpecTec atoms `${...}` — a single type / keyword / value
|
|
77
|
+
// reference rendered inline. Record the name and keep it readable
|
|
78
|
+
// (`${:I32}` → `I32`, `${:SELECT}` → `SELECT`).
|
|
79
|
+
text = text.replace(/\$\{(?:[^{}]|\{[^{}]*\})*\}/g, (m) => {
|
|
80
|
+
const names = collectRefs(m.slice(2, -1));
|
|
81
|
+
return names.join(" ");
|
|
82
|
+
});
|
|
83
|
+
// :ref:`text <target>` and :ref:`target` → keep text, record target.
|
|
84
|
+
text = text.replace(/:ref:`([^`<]+?)\s*<([^>]+)>`/g, (_m, label, target) => {
|
|
85
|
+
crossrefs.add(target.trim());
|
|
86
|
+
return label.trim();
|
|
87
|
+
});
|
|
88
|
+
text = text.replace(/:ref:`([^`]+)`/g, (_m, target) => {
|
|
89
|
+
crossrefs.add(target.trim());
|
|
90
|
+
return target.trim();
|
|
91
|
+
});
|
|
92
|
+
// Other RST roles: :math:`x`, :code:`x`, :token:`x`, :superscript:`x`
|
|
93
|
+
// → keep the inner content.
|
|
94
|
+
text = text.replace(/:[a-z]+:`([^`]*)`/g, "$1");
|
|
95
|
+
// Substitution refs: |IEEE754|_ or |foo| → strip the bars.
|
|
96
|
+
text = text.replace(/\|([A-Za-z0-9_]+)\|_?/g, "$1");
|
|
97
|
+
// Emphasis markers (keep the words).
|
|
98
|
+
text = text.replace(/\*\*([^*]+)\*\*/g, "$1").replace(/\*([^*]+)\*/g, "$1");
|
|
99
|
+
text = text.replace(/``([^`]+)``/g, "$1");
|
|
100
|
+
// Collapse whitespace.
|
|
101
|
+
return text.replace(/[ \t]+/g, " ").replace(/\n{3,}/g, "\n\n").trim();
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* Parse one RST document into clauses. `path` is the source-relative
|
|
105
|
+
* path without extension (e.g. `syntax/types`).
|
|
106
|
+
*/
|
|
107
|
+
export function parseRst(source, path) {
|
|
108
|
+
const lines = source.split("\n");
|
|
109
|
+
// First pass: discover the order in which underline chars appear so
|
|
110
|
+
// we can assign heading levels the RST way (first char seen = level 1).
|
|
111
|
+
const charLevels = new Map();
|
|
112
|
+
for (let i = 0; i + 1 < lines.length; i++) {
|
|
113
|
+
const title = lines[i];
|
|
114
|
+
const underline = lines[i + 1];
|
|
115
|
+
if (title.trim() !== "" && isUnderline(underline.trim(), title)) {
|
|
116
|
+
const ch = underline.trim()[0];
|
|
117
|
+
if (!charLevels.has(ch))
|
|
118
|
+
charLevels.set(ch, charLevels.size + 1);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
const blocks = [];
|
|
122
|
+
let pendingAnchors = [];
|
|
123
|
+
let current = null;
|
|
124
|
+
const flush = () => {
|
|
125
|
+
if (current)
|
|
126
|
+
blocks.push(current);
|
|
127
|
+
current = null;
|
|
128
|
+
};
|
|
129
|
+
for (let i = 0; i < lines.length; i++) {
|
|
130
|
+
const line = lines[i];
|
|
131
|
+
const trimmed = line.trim();
|
|
132
|
+
// Anchor label: `.. _name:`
|
|
133
|
+
const anchorMatch = trimmed.match(/^\.\.\s+_([A-Za-z0-9_.-]+):$/);
|
|
134
|
+
if (anchorMatch) {
|
|
135
|
+
pendingAnchors.push(anchorMatch[1]);
|
|
136
|
+
continue;
|
|
137
|
+
}
|
|
138
|
+
// Heading: a non-blank line followed by an underline.
|
|
139
|
+
const next = lines[i + 1];
|
|
140
|
+
if (trimmed !== "" && next !== undefined && isUnderline(next.trim(), line)) {
|
|
141
|
+
flush();
|
|
142
|
+
current = {
|
|
143
|
+
anchors: pendingAnchors,
|
|
144
|
+
title: trimmed,
|
|
145
|
+
level: charLevels.get(next.trim()[0]) ?? 1,
|
|
146
|
+
bodyLines: [],
|
|
147
|
+
};
|
|
148
|
+
pendingAnchors = [];
|
|
149
|
+
i++; // consume the underline line
|
|
150
|
+
continue;
|
|
151
|
+
}
|
|
152
|
+
// Other `.. directive::` lines (index, note, etc.). An anchor's
|
|
153
|
+
// content can be a bare splice/paragraph, so anchors that were
|
|
154
|
+
// pending and are now hitting non-heading content start an
|
|
155
|
+
// anchor-only block.
|
|
156
|
+
if (pendingAnchors.length > 0 && trimmed !== "") {
|
|
157
|
+
flush();
|
|
158
|
+
current = { anchors: pendingAnchors, title: null, level: 0, bodyLines: [] };
|
|
159
|
+
pendingAnchors = [];
|
|
160
|
+
}
|
|
161
|
+
if (current)
|
|
162
|
+
current.bodyLines.push(line);
|
|
163
|
+
}
|
|
164
|
+
flush();
|
|
165
|
+
// Drop pure `.. index::` / directive noise from body and build clauses.
|
|
166
|
+
const clauses = [];
|
|
167
|
+
for (const block of blocks) {
|
|
168
|
+
const crossrefs = new Set();
|
|
169
|
+
const formalRefs = new Set();
|
|
170
|
+
// Remove standalone directive blocks we don't surface as prose:
|
|
171
|
+
// `.. index::` (+ its indented continuation lines) and the
|
|
172
|
+
// `.. toctree::` / `.. only::` machinery. Keep `.. note::` /
|
|
173
|
+
// `.. warning::` bodies as prose.
|
|
174
|
+
const kept = [];
|
|
175
|
+
let skippingDirective = false;
|
|
176
|
+
for (const raw of block.bodyLines) {
|
|
177
|
+
const directive = raw.match(/^\.\.\s+([a-z-]+)::/);
|
|
178
|
+
if (directive) {
|
|
179
|
+
const name = directive[1];
|
|
180
|
+
skippingDirective = ["index", "toctree", "only", "math"].includes(name);
|
|
181
|
+
if (skippingDirective)
|
|
182
|
+
continue;
|
|
183
|
+
// Admonitions: drop the directive marker, keep following text.
|
|
184
|
+
continue;
|
|
185
|
+
}
|
|
186
|
+
// Indented continuation of a skipped directive.
|
|
187
|
+
if (skippingDirective) {
|
|
188
|
+
if (raw.trim() === "" || /^\s/.test(raw))
|
|
189
|
+
continue;
|
|
190
|
+
skippingDirective = false;
|
|
191
|
+
}
|
|
192
|
+
kept.push(raw);
|
|
193
|
+
}
|
|
194
|
+
const prose = cleanProse(kept.join("\n"), crossrefs, formalRefs);
|
|
195
|
+
const anchors = block.anchors;
|
|
196
|
+
const id = anchors[0] ?? (block.title ? `${path}-${slugify(block.title)}` : path);
|
|
197
|
+
// Skip empty connective blocks (no anchor, no title, no prose).
|
|
198
|
+
if (anchors.length === 0 && block.title === null && prose === "")
|
|
199
|
+
continue;
|
|
200
|
+
clauses.push({
|
|
201
|
+
id,
|
|
202
|
+
anchors,
|
|
203
|
+
title: block.title,
|
|
204
|
+
level: block.level,
|
|
205
|
+
path,
|
|
206
|
+
prose,
|
|
207
|
+
crossrefs: [...crossrefs],
|
|
208
|
+
formal_refs: [...formalRefs],
|
|
209
|
+
url: clauseUrl(path, anchors[0] ?? null),
|
|
210
|
+
});
|
|
211
|
+
}
|
|
212
|
+
return clauses;
|
|
213
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import type { SpecClause } from "./sections.js";
|
|
2
|
+
export type TypeKind = "number" | "vector" | "reference" | "form";
|
|
3
|
+
export interface TypeEntry {
|
|
4
|
+
/** Type or type-form name, e.g. `i32`, `funcref`, `functype`. */
|
|
5
|
+
name: string;
|
|
6
|
+
/** Classification of this entry. */
|
|
7
|
+
kind: TypeKind;
|
|
8
|
+
/** Defining clause anchor, e.g. `syntax-numtype`, `syntax-functype`. */
|
|
9
|
+
anchor: string;
|
|
10
|
+
/** For category types: the sibling concrete types (e.g. the four
|
|
11
|
+
* number types). Empty for type forms. */
|
|
12
|
+
members: string[];
|
|
13
|
+
/** Defining clause title, e.g. `Number Types`. */
|
|
14
|
+
title: string | null;
|
|
15
|
+
/** Defining clause prose. */
|
|
16
|
+
prose: string;
|
|
17
|
+
/** SpecTec rule / syntax names referenced by the defining clause. */
|
|
18
|
+
formal_refs: string[];
|
|
19
|
+
/** Rendered spec URL for the defining clause. */
|
|
20
|
+
url: string;
|
|
21
|
+
}
|
|
22
|
+
/** Raw macro shape as emitted by src/parser/upstream.ts. */
|
|
23
|
+
interface RawMacro {
|
|
24
|
+
body: string;
|
|
25
|
+
kind: "instruction" | "type" | "other";
|
|
26
|
+
category: string | null;
|
|
27
|
+
section: string;
|
|
28
|
+
anchor: string;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Build the type catalog. `macros` is the dumped macro table;
|
|
32
|
+
* `typeClauses` is the parsed `syntax/types` section.
|
|
33
|
+
*/
|
|
34
|
+
export declare function buildTypeCatalog(macros: Record<string, RawMacro>, typeClauses: SpecClause[]): TypeEntry[];
|
|
35
|
+
/** Look up one type by name (case-insensitive, exact). */
|
|
36
|
+
export declare function getType(catalog: TypeEntry[], name: string): TypeEntry | null;
|
|
37
|
+
export {};
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
// Build a type catalog from the macro table (concrete value-type
|
|
2
|
+
// names) joined with the parsed `syntax/types` section clauses
|
|
3
|
+
// (the prose + anchors that define each type and type form).
|
|
4
|
+
//
|
|
5
|
+
// Two kinds of entry:
|
|
6
|
+
// - concrete value types — `i32`, `f64`, `v128`, `funcref`, … —
|
|
7
|
+
// grouped under their category section (number / vector /
|
|
8
|
+
// reference types);
|
|
9
|
+
// - type forms — `functype`, `limits`, `memtype`, `rectype`, … —
|
|
10
|
+
// each its own clause in the types section.
|
|
11
|
+
// Concrete value-type macros are tagged with these anchors. Each maps
|
|
12
|
+
// to a catalog `kind`, the canonical category clause anchor, and an
|
|
13
|
+
// `accept` predicate that keeps only well-formed concrete type names.
|
|
14
|
+
//
|
|
15
|
+
// The macro table also defines meta-variable shorthands under these
|
|
16
|
+
// same anchors (`\INX` → `i`, `\FNX` → `f`, `\VNX` → `v`), lane
|
|
17
|
+
// widths (`\I128` → `i128`), and reference constructors / keywords
|
|
18
|
+
// (`\REF` → `ref`, `\NULL` → `null`). Those aren't value types, so
|
|
19
|
+
// `accept` filters them out.
|
|
20
|
+
const VALUE_TYPE_ANCHORS = {
|
|
21
|
+
"syntax-numtype": {
|
|
22
|
+
kind: "number",
|
|
23
|
+
clause: "syntax-numtype",
|
|
24
|
+
accept: (b) => /^[if](32|64)$/.test(b),
|
|
25
|
+
},
|
|
26
|
+
"syntax-vectype": {
|
|
27
|
+
kind: "vector",
|
|
28
|
+
clause: "syntax-vectype",
|
|
29
|
+
accept: (b) => b === "v128",
|
|
30
|
+
},
|
|
31
|
+
"syntax-reftype": {
|
|
32
|
+
kind: "reference",
|
|
33
|
+
clause: "syntax-reftype",
|
|
34
|
+
accept: (b) => b.endsWith("ref") && b !== "ref",
|
|
35
|
+
},
|
|
36
|
+
};
|
|
37
|
+
function findClause(clauses, anchor) {
|
|
38
|
+
return clauses.find((c) => c.anchors.includes(anchor));
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Build the type catalog. `macros` is the dumped macro table;
|
|
42
|
+
* `typeClauses` is the parsed `syntax/types` section.
|
|
43
|
+
*/
|
|
44
|
+
export function buildTypeCatalog(macros, typeClauses) {
|
|
45
|
+
const entries = [];
|
|
46
|
+
const seen = new Set();
|
|
47
|
+
// 1. Concrete value types, grouped by category anchor.
|
|
48
|
+
const byCategory = new Map();
|
|
49
|
+
for (const macro of Object.values(macros)) {
|
|
50
|
+
if (macro.kind !== "type")
|
|
51
|
+
continue;
|
|
52
|
+
const mapping = VALUE_TYPE_ANCHORS[macro.anchor];
|
|
53
|
+
if (!mapping)
|
|
54
|
+
continue; // skip syntax-shape / syntax-valtype aliases here
|
|
55
|
+
if (!mapping.accept(macro.body))
|
|
56
|
+
continue; // drop meta-vars / keywords
|
|
57
|
+
const list = byCategory.get(mapping.clause) ?? [];
|
|
58
|
+
if (!list.includes(macro.body))
|
|
59
|
+
list.push(macro.body);
|
|
60
|
+
byCategory.set(mapping.clause, list);
|
|
61
|
+
}
|
|
62
|
+
for (const [clauseAnchor, mapping] of Object.entries(VALUE_TYPE_ANCHORS)) {
|
|
63
|
+
const members = (byCategory.get(clauseAnchor) ?? []).sort();
|
|
64
|
+
const clause = findClause(typeClauses, mapping.clause);
|
|
65
|
+
for (const name of members) {
|
|
66
|
+
if (seen.has(name))
|
|
67
|
+
continue;
|
|
68
|
+
seen.add(name);
|
|
69
|
+
entries.push({
|
|
70
|
+
name,
|
|
71
|
+
kind: mapping.kind,
|
|
72
|
+
anchor: clauseAnchor,
|
|
73
|
+
members: members.filter((m) => m !== name),
|
|
74
|
+
title: clause?.title ?? null,
|
|
75
|
+
prose: clause?.prose ?? "",
|
|
76
|
+
formal_refs: clause?.formal_refs ?? [],
|
|
77
|
+
url: clause?.url ??
|
|
78
|
+
`https://webassembly.github.io/spec/core/syntax/types.html#${clauseAnchor}`,
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
// 2. Type forms — every `syntax-<form>` clause in the types section
|
|
83
|
+
// that isn't already a concrete value type. Covers functype,
|
|
84
|
+
// limits, memtype, tabletype, globaltype, rectype, heaptype,
|
|
85
|
+
// resulttype, blocktype, externtype, etc.
|
|
86
|
+
for (const clause of typeClauses) {
|
|
87
|
+
for (const anchor of clause.anchors) {
|
|
88
|
+
const m = anchor.match(/^syntax-([a-z0-9]+)$/);
|
|
89
|
+
if (!m)
|
|
90
|
+
continue;
|
|
91
|
+
const name = m[1];
|
|
92
|
+
if (seen.has(name))
|
|
93
|
+
continue;
|
|
94
|
+
// Skip the category clauses already represented by their members.
|
|
95
|
+
if (anchor in VALUE_TYPE_ANCHORS)
|
|
96
|
+
continue;
|
|
97
|
+
seen.add(name);
|
|
98
|
+
entries.push({
|
|
99
|
+
name,
|
|
100
|
+
kind: "form",
|
|
101
|
+
anchor,
|
|
102
|
+
members: [],
|
|
103
|
+
title: clause.title,
|
|
104
|
+
prose: clause.prose,
|
|
105
|
+
formal_refs: clause.formal_refs,
|
|
106
|
+
url: clause.url,
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
return entries.sort((a, b) => a.name.localeCompare(b.name));
|
|
111
|
+
}
|
|
112
|
+
/** Look up one type by name (case-insensitive, exact). */
|
|
113
|
+
export function getType(catalog, name) {
|
|
114
|
+
const needle = name.trim().toLowerCase();
|
|
115
|
+
return catalog.find((t) => t.name.toLowerCase() === needle) ?? null;
|
|
116
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { RawInstruction, RawMacro, RawDump } from "./instructions.js";
|
|
2
|
+
export type { RawInstruction, RawMacro, RawDump } from "./instructions.js";
|
|
3
|
+
export declare function parseInstructions(source: string): RawInstruction[];
|
|
4
|
+
export declare function parseMacros(text: string): Record<string, RawMacro>;
|
|
5
|
+
/** Read both upstream files from a vendored checkout and produce the
|
|
6
|
+
* combined raw dump. */
|
|
7
|
+
export declare function extractRawDump(snapshotDir: string): RawDump;
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
// Extract the structured instruction index + macro table from the
|
|
2
|
+
// upstream WebAssembly/spec sources — a pure-TypeScript replacement
|
|
3
|
+
// for the old scripts/dump-instructions.py, so the build needs no
|
|
4
|
+
// Python toolchain.
|
|
5
|
+
//
|
|
6
|
+
// Two upstream files are read:
|
|
7
|
+
//
|
|
8
|
+
// document/core/appendix/index-instructions.py
|
|
9
|
+
// The structured source for the appendix instruction table. We do
|
|
10
|
+
// NOT execute it; we parse the literal `INSTRUCTIONS = [ ... ]`
|
|
11
|
+
// list of `Instruction(...)` calls. Each entry's args are simple
|
|
12
|
+
// Python literals — raw strings (`r'...'`), floats, `None`, and
|
|
13
|
+
// keyword args — with no nested calls, escaped quotes, or
|
|
14
|
+
// in-string commas (verified against the pinned source).
|
|
15
|
+
//
|
|
16
|
+
// document/core/util/macros.def
|
|
17
|
+
// reStructuredText `|MACRO| mathdef:: \xref{section}{anchor}
|
|
18
|
+
// {\K{body}}` lines. The `\K{...}` body is the rendered mnemonic
|
|
19
|
+
// / type string; the anchor gives the category.
|
|
20
|
+
import { readFileSync } from "node:fs";
|
|
21
|
+
import { resolve } from "node:path";
|
|
22
|
+
const INSTRUCTION_KEYS = [
|
|
23
|
+
"version",
|
|
24
|
+
"name",
|
|
25
|
+
"opcode",
|
|
26
|
+
"type",
|
|
27
|
+
"validation",
|
|
28
|
+
"execution",
|
|
29
|
+
"operator",
|
|
30
|
+
"validation2",
|
|
31
|
+
"execution2",
|
|
32
|
+
];
|
|
33
|
+
// ─── index-instructions.py ──────────────────────────────────────────
|
|
34
|
+
const QUOTES = new Set(["'", '"']);
|
|
35
|
+
/**
|
|
36
|
+
* Slice out the body of the `INSTRUCTIONS = [ ... ]` list, scanning
|
|
37
|
+
* for the matching `]` while ignoring brackets inside string literals.
|
|
38
|
+
* Upstream uses both `r'...'` and `r"..."` raw strings (the latter
|
|
39
|
+
* when the content contains a single quote), so quote tracking keys
|
|
40
|
+
* off whichever quote opened the current string.
|
|
41
|
+
*/
|
|
42
|
+
function sliceInstructionsList(source) {
|
|
43
|
+
const marker = "INSTRUCTIONS = [";
|
|
44
|
+
const start = source.indexOf(marker);
|
|
45
|
+
if (start < 0)
|
|
46
|
+
throw new Error("INSTRUCTIONS list not found in index-instructions.py");
|
|
47
|
+
let i = start + marker.length;
|
|
48
|
+
let depth = 1;
|
|
49
|
+
let quote = null;
|
|
50
|
+
for (; i < source.length; i++) {
|
|
51
|
+
const c = source[i];
|
|
52
|
+
if (quote) {
|
|
53
|
+
if (c === quote)
|
|
54
|
+
quote = null;
|
|
55
|
+
continue;
|
|
56
|
+
}
|
|
57
|
+
if (QUOTES.has(c))
|
|
58
|
+
quote = c;
|
|
59
|
+
else if (c === "[")
|
|
60
|
+
depth++;
|
|
61
|
+
else if (c === "]") {
|
|
62
|
+
depth--;
|
|
63
|
+
if (depth === 0)
|
|
64
|
+
return source.slice(start + marker.length, i);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
throw new Error("Unterminated INSTRUCTIONS list");
|
|
68
|
+
}
|
|
69
|
+
/** Extract the inner-argument text of each `Instruction(...)` call. */
|
|
70
|
+
function instructionCallArgs(listBody) {
|
|
71
|
+
const calls = [];
|
|
72
|
+
const marker = "Instruction(";
|
|
73
|
+
let idx = 0;
|
|
74
|
+
while ((idx = listBody.indexOf(marker, idx)) >= 0) {
|
|
75
|
+
let i = idx + marker.length;
|
|
76
|
+
let depth = 1;
|
|
77
|
+
let quote = null;
|
|
78
|
+
const argStart = i;
|
|
79
|
+
for (; i < listBody.length; i++) {
|
|
80
|
+
const c = listBody[i];
|
|
81
|
+
if (quote) {
|
|
82
|
+
if (c === quote)
|
|
83
|
+
quote = null;
|
|
84
|
+
continue;
|
|
85
|
+
}
|
|
86
|
+
if (QUOTES.has(c))
|
|
87
|
+
quote = c;
|
|
88
|
+
else if (c === "(")
|
|
89
|
+
depth++;
|
|
90
|
+
else if (c === ")") {
|
|
91
|
+
depth--;
|
|
92
|
+
if (depth === 0)
|
|
93
|
+
break;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
calls.push(listBody.slice(argStart, i));
|
|
97
|
+
idx = i + 1;
|
|
98
|
+
}
|
|
99
|
+
return calls;
|
|
100
|
+
}
|
|
101
|
+
/** Split an argument list on top-level commas (ignoring those inside
|
|
102
|
+
* string literals of either quote style). */
|
|
103
|
+
function splitArgs(argText) {
|
|
104
|
+
const out = [];
|
|
105
|
+
let cur = "";
|
|
106
|
+
let quote = null;
|
|
107
|
+
for (let i = 0; i < argText.length; i++) {
|
|
108
|
+
const c = argText[i];
|
|
109
|
+
if (quote) {
|
|
110
|
+
cur += c;
|
|
111
|
+
if (c === quote)
|
|
112
|
+
quote = null;
|
|
113
|
+
continue;
|
|
114
|
+
}
|
|
115
|
+
if (QUOTES.has(c)) {
|
|
116
|
+
quote = c;
|
|
117
|
+
cur += c;
|
|
118
|
+
}
|
|
119
|
+
else if (c === ",") {
|
|
120
|
+
out.push(cur);
|
|
121
|
+
cur = "";
|
|
122
|
+
}
|
|
123
|
+
else {
|
|
124
|
+
cur += c;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
if (cur.trim() !== "")
|
|
128
|
+
out.push(cur);
|
|
129
|
+
return out;
|
|
130
|
+
}
|
|
131
|
+
/** Parse a single Python literal arg value: raw/plain string (either
|
|
132
|
+
* quote style), float, or None. */
|
|
133
|
+
function parseValue(tokenRaw) {
|
|
134
|
+
const token = tokenRaw.trim();
|
|
135
|
+
if (token === "None")
|
|
136
|
+
return null;
|
|
137
|
+
// String: optional `r` prefix, then '...' or "...".
|
|
138
|
+
const m = token.match(/^r?(['"])([\s\S]*)\1$/);
|
|
139
|
+
if (m)
|
|
140
|
+
return m[2];
|
|
141
|
+
const num = Number(token);
|
|
142
|
+
if (!Number.isNaN(num))
|
|
143
|
+
return num;
|
|
144
|
+
throw new Error(`Unparseable Instruction arg: ${tokenRaw}`);
|
|
145
|
+
}
|
|
146
|
+
export function parseInstructions(source) {
|
|
147
|
+
const listBody = sliceInstructionsList(source);
|
|
148
|
+
const out = [];
|
|
149
|
+
for (const callArgs of instructionCallArgs(listBody)) {
|
|
150
|
+
const rec = {
|
|
151
|
+
version: null,
|
|
152
|
+
name: null,
|
|
153
|
+
opcode: null,
|
|
154
|
+
type: null,
|
|
155
|
+
validation: null,
|
|
156
|
+
execution: null,
|
|
157
|
+
operator: null,
|
|
158
|
+
validation2: null,
|
|
159
|
+
execution2: null,
|
|
160
|
+
};
|
|
161
|
+
let positional = 0;
|
|
162
|
+
for (const argRaw of splitArgs(callArgs)) {
|
|
163
|
+
const arg = argRaw.trim();
|
|
164
|
+
if (arg === "")
|
|
165
|
+
continue;
|
|
166
|
+
// Keyword arg? `name=value` where name is a bare identifier.
|
|
167
|
+
const kw = arg.match(/^([A-Za-z_][A-Za-z0-9_]*)\s*=\s*([\s\S]+)$/);
|
|
168
|
+
if (kw && !kw[1].startsWith("r'")) {
|
|
169
|
+
const key = kw[1];
|
|
170
|
+
if (key in rec)
|
|
171
|
+
rec[key] = parseValue(kw[2]);
|
|
172
|
+
continue;
|
|
173
|
+
}
|
|
174
|
+
const key = INSTRUCTION_KEYS[positional++];
|
|
175
|
+
if (key)
|
|
176
|
+
rec[key] = parseValue(arg);
|
|
177
|
+
}
|
|
178
|
+
out.push(rec);
|
|
179
|
+
}
|
|
180
|
+
return out;
|
|
181
|
+
}
|
|
182
|
+
// ─── macros.def ─────────────────────────────────────────────────────
|
|
183
|
+
// `.. |NAME| mathdef:: \xref{section}{anchor}{\K{body}}` — body may
|
|
184
|
+
// nest one level of braces (e.g. `\K{local{.}get}`).
|
|
185
|
+
const MACRO_RE = /^\.\.\s+\|([A-Z0-9]+)\|\s+mathdef::\s+\\xref\{([^}]+)\}\{([^}]+)\}\{\\K\{((?:[^{}]+|\{[^{}]*\})+)\}\}\s*$/;
|
|
186
|
+
const TYPE_ANCHORS = new Set([
|
|
187
|
+
"syntax-numtype",
|
|
188
|
+
"syntax-vectype",
|
|
189
|
+
"syntax-reftype",
|
|
190
|
+
"syntax-valtype",
|
|
191
|
+
"syntax-shape",
|
|
192
|
+
]);
|
|
193
|
+
function cleanBody(body) {
|
|
194
|
+
let cleaned = body.replace(/\\_/g, "_").replace(/\{\.\}/g, ".");
|
|
195
|
+
cleaned = cleaned.replace(/\\scriptstyle\s*/g, "");
|
|
196
|
+
cleaned = cleaned.replace(/\\;/g, "");
|
|
197
|
+
cleaned = cleaned.replace(/\{(\d+)\}/g, "$1");
|
|
198
|
+
return cleaned;
|
|
199
|
+
}
|
|
200
|
+
export function parseMacros(text) {
|
|
201
|
+
const macros = {};
|
|
202
|
+
for (const line of text.split("\n")) {
|
|
203
|
+
const m = line.match(MACRO_RE);
|
|
204
|
+
if (!m)
|
|
205
|
+
continue;
|
|
206
|
+
const [, name, section, anchor, bodyRaw] = m;
|
|
207
|
+
let kind = "other";
|
|
208
|
+
let category = null;
|
|
209
|
+
if (anchor.startsWith("syntax-instr-")) {
|
|
210
|
+
kind = "instruction";
|
|
211
|
+
category = anchor.slice("syntax-instr-".length);
|
|
212
|
+
}
|
|
213
|
+
else if (TYPE_ANCHORS.has(anchor)) {
|
|
214
|
+
kind = "type";
|
|
215
|
+
}
|
|
216
|
+
macros[name] = { body: cleanBody(bodyRaw), kind, category, section: section, anchor: anchor };
|
|
217
|
+
}
|
|
218
|
+
return macros;
|
|
219
|
+
}
|
|
220
|
+
// ─── driver ─────────────────────────────────────────────────────────
|
|
221
|
+
/** Read both upstream files from a vendored checkout and produce the
|
|
222
|
+
* combined raw dump. */
|
|
223
|
+
export function extractRawDump(snapshotDir) {
|
|
224
|
+
const instrPath = resolve(snapshotDir, "document/core/appendix/index-instructions.py");
|
|
225
|
+
const macrosPath = resolve(snapshotDir, "document/core/util/macros.def");
|
|
226
|
+
return {
|
|
227
|
+
instructions: parseInstructions(readFileSync(instrPath, "utf8")),
|
|
228
|
+
macros: parseMacros(readFileSync(macrosPath, "utf8")),
|
|
229
|
+
};
|
|
230
|
+
}
|
package/dist/paths.d.ts
ADDED
package/dist/paths.js
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
// Resolved at module load. Used by build-time scripts and the runtime
|
|
2
|
+
// server to locate the baked `build/` artifacts relative to the
|
|
3
|
+
// repository root (in dev) or the published package (in dist).
|
|
4
|
+
import { fileURLToPath } from "node:url";
|
|
5
|
+
import { dirname, resolve } from "node:path";
|
|
6
|
+
const HERE = dirname(fileURLToPath(import.meta.url));
|
|
7
|
+
// In dev: src/paths.ts → repo-root is `../`.
|
|
8
|
+
// In dist: dist/paths.js → repo-root is `../`.
|
|
9
|
+
// Either way one `..` lands us at the project root.
|
|
10
|
+
export const REPO_ROOT = resolve(HERE, "..");
|
|
11
|
+
export const BUILD_DIR = resolve(REPO_ROOT, "build");
|
|
12
|
+
export const VENDOR_ROOT = resolve(REPO_ROOT, "vendor");
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { SpecVersion } from "../versions.js";
|
|
2
|
+
export declare const SPEC_NAMES: readonly ["core", "js-api", "web-api"];
|
|
3
|
+
export type SpecName = (typeof SPEC_NAMES)[number];
|
|
4
|
+
/** Specs that have a section index (all of them). */
|
|
5
|
+
export declare const SECTION_SPECS: readonly ["core", "js-api", "web-api"];
|
|
6
|
+
/** Filename (within build/) for the unified core snapshot. */
|
|
7
|
+
export declare function buildArtifactName(spec: "core", version: SpecVersion): string;
|
|
8
|
+
/** Filename (within build/) for an auxiliary spec's section index
|
|
9
|
+
* (js-api / web-api). */
|
|
10
|
+
export declare function sectionsArtifactName(spec: SpecName, version: SpecVersion): string;
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
// Spec catalog — what (spec, version) pairs the server claims to
|
|
2
|
+
// support, and what build artifact each one corresponds to. Kept
|
|
3
|
+
// dependency-free so the Cloudflare Worker can bundle it.
|
|
4
|
+
// The WebAssembly/spec repo carries three specifications under
|
|
5
|
+
// /document/. `core` is the first-class target (instructions, types,
|
|
6
|
+
// validation, execution, formats); `js-api` and `web-api` are the
|
|
7
|
+
// JavaScript + Web embedding specs, covered by the section/search
|
|
8
|
+
// tools.
|
|
9
|
+
export const SPEC_NAMES = ["core", "js-api", "web-api"];
|
|
10
|
+
/** Specs that have a section index (all of them). */
|
|
11
|
+
export const SECTION_SPECS = SPEC_NAMES;
|
|
12
|
+
/** Filename (within build/) for the unified core snapshot. */
|
|
13
|
+
export function buildArtifactName(spec, version) {
|
|
14
|
+
return `wasm-spec-${spec}-${version}.json`;
|
|
15
|
+
}
|
|
16
|
+
/** Filename (within build/) for an auxiliary spec's section index
|
|
17
|
+
* (js-api / web-api). */
|
|
18
|
+
export function sectionsArtifactName(spec, version) {
|
|
19
|
+
return `wasm-sections-${spec}-${version}.json`;
|
|
20
|
+
}
|