@sanity/ailf 4.2.0 → 4.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/config/package-surface.ts +37 -0
  2. package/config/preflight-scoring.ts +26 -0
  3. package/dist/_vendor/ailf-core/artifact-registry.d.ts +1 -1
  4. package/dist/_vendor/ailf-core/artifact-registry.js +47 -0
  5. package/dist/_vendor/ailf-core/config-helpers.d.ts +35 -0
  6. package/dist/_vendor/ailf-core/config-helpers.js +67 -0
  7. package/dist/_vendor/ailf-core/index.d.ts +1 -1
  8. package/dist/_vendor/ailf-core/index.js +1 -1
  9. package/dist/_vendor/ailf-core/ports/context.d.ts +18 -0
  10. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +30 -0
  11. package/dist/_vendor/ailf-core/ports/index.d.ts +3 -1
  12. package/dist/_vendor/ailf-core/ports/index.js +1 -0
  13. package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +23 -0
  14. package/dist/_vendor/ailf-core/ports/package-surface-resolver.d.ts +71 -0
  15. package/dist/_vendor/ailf-core/ports/package-surface-resolver.js +36 -0
  16. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +6 -0
  17. package/dist/_vendor/ailf-core/schemas/eval-config.js +14 -0
  18. package/dist/_vendor/ailf-core/schemas/index.d.ts +1 -0
  19. package/dist/_vendor/ailf-core/schemas/index.js +1 -0
  20. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +4 -0
  21. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +7 -0
  22. package/dist/_vendor/ailf-core/schemas/symbol-preflight-report.d.ts +51 -0
  23. package/dist/_vendor/ailf-core/schemas/symbol-preflight-report.js +57 -0
  24. package/dist/_vendor/ailf-core/types/index.d.ts +12 -0
  25. package/dist/_vendor/ailf-core/types/index.js +1 -0
  26. package/dist/_vendor/ailf-core/types/package-surface.d.ts +36 -0
  27. package/dist/_vendor/ailf-core/types/package-surface.js +13 -0
  28. package/dist/_vendor/ailf-core/types/pipeline-request.d.ts +1 -0
  29. package/dist/_vendor/ailf-core/types/preflight-scoring.d.ts +52 -0
  30. package/dist/_vendor/ailf-core/types/preflight-scoring.js +18 -0
  31. package/dist/_vendor/ailf-core/types/repo-config.d.ts +14 -0
  32. package/dist/_vendor/ailf-core/types/symbol-preflight-report.d.ts +66 -0
  33. package/dist/_vendor/ailf-core/types/symbol-preflight-report.js +25 -0
  34. package/dist/adapters/api-client/build-request.d.ts +1 -0
  35. package/dist/adapters/api-client/build-request.js +3 -0
  36. package/dist/adapters/config-sources/file-config-adapter.js +1 -0
  37. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +4 -0
  38. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +159 -82
  39. package/dist/adapters/index.d.ts +1 -0
  40. package/dist/adapters/index.js +1 -0
  41. package/dist/adapters/package-surface/dts-package-surface.d.ts +46 -0
  42. package/dist/adapters/package-surface/dts-package-surface.js +173 -0
  43. package/dist/adapters/package-surface/in-memory-package-surface.d.ts +15 -0
  44. package/dist/adapters/package-surface/in-memory-package-surface.js +28 -0
  45. package/dist/adapters/package-surface/index.d.ts +9 -0
  46. package/dist/adapters/package-surface/index.js +8 -0
  47. package/dist/adapters/package-surface/parse-dts-exports.d.ts +31 -0
  48. package/dist/adapters/package-surface/parse-dts-exports.js +54 -0
  49. package/dist/adapters/task-sources/repo-schemas.d.ts +6 -0
  50. package/dist/adapters/task-sources/repo-schemas.js +15 -0
  51. package/dist/commands/pipeline-action.d.ts +2 -0
  52. package/dist/commands/pipeline-action.js +12 -0
  53. package/dist/commands/remote-pipeline.js +10 -2
  54. package/dist/commands/remote-results.d.ts +12 -1
  55. package/dist/commands/remote-results.js +25 -5
  56. package/dist/composition-root.js +9 -0
  57. package/dist/config/package-surface.ts +37 -0
  58. package/dist/config/preflight-scoring.ts +26 -0
  59. package/dist/index.d.ts +2 -2
  60. package/dist/index.js +1 -1
  61. package/dist/orchestration/build-app-context.js +1 -0
  62. package/dist/orchestration/pipeline-orchestrator.d.ts +19 -1
  63. package/dist/orchestration/pipeline-orchestrator.js +38 -0
  64. package/dist/orchestration/steps/calculate-scores-step.js +11 -0
  65. package/dist/orchestration/steps/generate-configs-step.js +16 -1
  66. package/dist/orchestration/steps/run-eval-step.js +27 -0
  67. package/dist/pipeline/calculate-scores.d.ts +66 -5
  68. package/dist/pipeline/calculate-scores.js +141 -27
  69. package/dist/pipeline/compiler/index.d.ts +1 -1
  70. package/dist/pipeline/compiler/index.js +1 -1
  71. package/dist/pipeline/compiler/literacy-bridge.d.ts +9 -0
  72. package/dist/pipeline/compiler/literacy-bridge.js +2 -0
  73. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +1 -1
  74. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +31 -4
  75. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +146 -1
  76. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +2 -0
  77. package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +17 -2
  78. package/dist/pipeline/compiler/rubric-resolution.d.ts +17 -1
  79. package/dist/pipeline/compiler/rubric-resolution.js +78 -2
  80. package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -2
  81. package/dist/pipeline/compiler/scoring-bridge.js +104 -10
  82. package/dist/pipeline/eval-fingerprint.d.ts +9 -0
  83. package/dist/pipeline/eval-fingerprint.js +7 -1
  84. package/dist/pipeline/map-request-to-config.js +1 -0
  85. package/dist/pipeline/preflight/compute-preflight.d.ts +67 -0
  86. package/dist/pipeline/preflight/compute-preflight.js +118 -0
  87. package/dist/pipeline/preflight/emit-symbol-preflight.d.ts +51 -0
  88. package/dist/pipeline/preflight/emit-symbol-preflight.js +102 -0
  89. package/dist/pipeline/preflight/load-package-surface.d.ts +14 -0
  90. package/dist/pipeline/preflight/load-package-surface.js +19 -0
  91. package/dist/pipeline/preflight/load-preflight-context.d.ts +13 -0
  92. package/dist/pipeline/preflight/load-preflight-context.js +25 -0
  93. package/dist/pipeline/preflight/load-preflight-scoring.d.ts +12 -0
  94. package/dist/pipeline/preflight/load-preflight-scoring.js +17 -0
  95. package/dist/pipeline/preflight/parse-imports.d.ts +62 -0
  96. package/dist/pipeline/preflight/parse-imports.js +125 -0
  97. package/dist/report-store.d.ts +8 -0
  98. package/dist/report-store.js +55 -6
  99. package/dist/sanity/document-renderers.d.ts +45 -7
  100. package/dist/sanity/document-renderers.js +99 -13
  101. package/dist/sanity/queries.d.ts +11 -11
  102. package/dist/sanity/queries.js +7 -0
  103. package/dist/sanity/symbol-index.d.ts +98 -0
  104. package/dist/sanity/symbol-index.js +615 -0
  105. package/package.json +2 -1
@@ -0,0 +1,615 @@
1
+ /**
2
+ * symbol-index — Programmatic extractor that produces a flat list of
3
+ * identifiers the canonical reference legitimizes, each with a one-line
4
+ * provenance snippet.
5
+ *
6
+ * Used by the grader-context pathway (W0196 / W0197) so the LLM judge sees
7
+ * a compact, deterministic recognition reference instead of the full
8
+ * narrative doc — addresses the DOC-2117 prior-collision failure mode
9
+ * where a grader claimed `useEditDocument` did not exist.
10
+ *
11
+ * Two upstream sources, both fully programmatic (no LLM in the extractor):
12
+ *
13
+ * - `extractSymbolIndex(blocks)` — walks Sanity Portable Text content
14
+ * (article docs).
15
+ * - `extractSymbolsFromTypedoc(json, packageName)` — parses typedoc
16
+ * JSON (typesReference docs).
17
+ *
18
+ * `mergeSymbolIndexes(indexes)` combines indexes from multiple references
19
+ * into a single deduped index for a task.
20
+ *
21
+ * Source precedence (higher wins on dedup):
22
+ * 1. type-def — typedoc declarations: literal authoritative type
23
+ * surface, no editorial layer between extracted
24
+ * symbol and the package's actual exports.
25
+ * 2. heading — Section headings (`block` style h1..h4), including
26
+ * inline `code` marks within heading spans.
27
+ * 3. inline-code — Inline `code` marks within non-heading `block` spans.
28
+ * 4. code-block — Identifiers from `import` statements in `codeBlock`
29
+ * bodies. Body identifiers (`const foo = ...`) are
30
+ * intentionally not extracted — those are usage demos.
31
+ */
32
+ const HEADING_STYLES = new Set(["h1", "h2", "h3", "h4"]);
33
+ // JS/TS keywords + truly common literals that are never a useful "symbol"
34
+ // to legitimize. Kept deliberately tight — anything that *could* be a
35
+ // library export passes through.
36
+ const JS_KEYWORDS = new Set([
37
+ "abstract",
38
+ "any",
39
+ "as",
40
+ "async",
41
+ "await",
42
+ "boolean",
43
+ "break",
44
+ "case",
45
+ "catch",
46
+ "class",
47
+ "const",
48
+ "constructor",
49
+ "continue",
50
+ "debugger",
51
+ "declare",
52
+ "default",
53
+ "delete",
54
+ "do",
55
+ "else",
56
+ "enum",
57
+ "export",
58
+ "extends",
59
+ "false",
60
+ "finally",
61
+ "for",
62
+ "from",
63
+ "function",
64
+ "get",
65
+ "if",
66
+ "implements",
67
+ "import",
68
+ "in",
69
+ "instanceof",
70
+ "interface",
71
+ "is",
72
+ "keyof",
73
+ "let",
74
+ "module",
75
+ "namespace",
76
+ "never",
77
+ "new",
78
+ "null",
79
+ "number",
80
+ "of",
81
+ "package",
82
+ "private",
83
+ "protected",
84
+ "public",
85
+ "readonly",
86
+ "require",
87
+ "return",
88
+ "set",
89
+ "static",
90
+ "string",
91
+ "super",
92
+ "switch",
93
+ "symbol",
94
+ "this",
95
+ "throw",
96
+ "true",
97
+ "try",
98
+ "type",
99
+ "typeof",
100
+ "undefined",
101
+ "unknown",
102
+ "var",
103
+ "void",
104
+ "while",
105
+ "with",
106
+ "yield",
107
+ ]);
108
+ // ---------------------------------------------------------------------------
109
+ // Public API
110
+ // ---------------------------------------------------------------------------
111
+ export function extractSymbolIndex(blocks) {
112
+ if (!Array.isArray(blocks)) {
113
+ return { symbols: [] };
114
+ }
115
+ const headingHits = new Map();
116
+ const inlineHits = new Map();
117
+ const codeBlockHits = new Map();
118
+ for (const raw of blocks) {
119
+ if (!raw || typeof raw !== "object")
120
+ continue;
121
+ if (raw._type === "block") {
122
+ collectFromBlock(raw, headingHits, inlineHits);
123
+ }
124
+ else if (raw._type === "codeBlock") {
125
+ collectFromCodeBlock(raw, codeBlockHits);
126
+ }
127
+ }
128
+ return mergeAndSort({
129
+ typeDefHits: new Map(),
130
+ headingHits,
131
+ inlineHits,
132
+ codeBlockHits,
133
+ });
134
+ }
135
+ /**
136
+ * Extract a symbol index from a typedoc JSON document (schema 2.x — the
137
+ * shape produced by `typedoc --json`). Each top-level export becomes a
138
+ * `type-def` provenance entry with the symbol name, declaration kind
139
+ * (function / interface / type / etc.), and the JSDoc summary as snippet.
140
+ *
141
+ * Type-def is the highest-precedence source: typedoc declarations are
142
+ * literal authoritative type surface, no editorial layer between them
143
+ * and the package's actual exports. If a symbol also appears in narrative
144
+ * docs (heading, inline code, code-block import) the type-def entry wins
145
+ * on dedup.
146
+ *
147
+ * `body` is the raw JSON string fetched from the typesReference's
148
+ * attachment URL. Returns an empty index for any unparseable input — this
149
+ * is best-effort recognition material; callers fall back to full-doc
150
+ * injection when extraction yields nothing.
151
+ */
152
+ export function extractSymbolsFromTypedoc(body, packageName) {
153
+ let parsed;
154
+ try {
155
+ parsed = JSON.parse(body);
156
+ }
157
+ catch {
158
+ return { symbols: [] };
159
+ }
160
+ if (!parsed || typeof parsed !== "object")
161
+ return { symbols: [] };
162
+ const root = parsed;
163
+ const exportsNamespace = (root.children ?? []).find((c) => c?.kind === TYPEDOC_KIND.namespace);
164
+ // Top-level exports are typedoc's "exports" namespace child when present;
165
+ // otherwise the root itself carries the children.
166
+ const declarations = exportsNamespace?.children ?? root.children ?? [];
167
+ const typeDefHits = new Map();
168
+ for (const node of declarations) {
169
+ if (!node || typeof node.name !== "string")
170
+ continue;
171
+ const declarationKind = TYPEDOC_KIND_TO_LABEL[node.kind ?? -1];
172
+ if (!declarationKind)
173
+ continue;
174
+ const summary = typedocNodeSummary(node);
175
+ const snippet = summary
176
+ ? clipSnippet(summary)
177
+ : `${declarationKind} ${node.name}${packageName ? ` (from ${packageName})` : ""}`;
178
+ addIfFirst(typeDefHits, node.name, {
179
+ kind: "type-def",
180
+ snippet,
181
+ declarationKind,
182
+ ...(packageName ? { package: packageName } : {}),
183
+ });
184
+ }
185
+ return mergeAndSort({
186
+ typeDefHits,
187
+ headingHits: new Map(),
188
+ inlineHits: new Map(),
189
+ codeBlockHits: new Map(),
190
+ });
191
+ }
192
+ function mergeAndSort(buckets) {
193
+ // Lower-precedence first; later writes overwrite earlier ones, so the
194
+ // highest-precedence kind wins on dedup.
195
+ const merged = new Map();
196
+ for (const [k, v] of buckets.codeBlockHits)
197
+ merged.set(k, v);
198
+ for (const [k, v] of buckets.inlineHits)
199
+ merged.set(k, v);
200
+ for (const [k, v] of buckets.headingHits)
201
+ merged.set(k, v);
202
+ for (const [k, v] of buckets.typeDefHits)
203
+ merged.set(k, v);
204
+ const symbols = [...merged.entries()]
205
+ .map(([symbol, provenance]) => ({ symbol, provenance }))
206
+ .sort(compareEntries);
207
+ return { symbols };
208
+ }
209
+ /**
210
+ * Compute per-tier counts for a SymbolIndex. Used by the fetcher to
211
+ * populate the per-task manifest entry's `tierBreakdown` field.
212
+ */
213
+ export function symbolIndexTierBreakdown(index) {
214
+ const out = { typeDef: 0, heading: 0, inlineCode: 0, codeBlock: 0 };
215
+ for (const entry of index.symbols) {
216
+ if (entry.provenance.kind === "type-def")
217
+ out.typeDef += 1;
218
+ else if (entry.provenance.kind === "heading")
219
+ out.heading += 1;
220
+ else if (entry.provenance.kind === "inline-code")
221
+ out.inlineCode += 1;
222
+ else
223
+ out.codeBlock += 1;
224
+ }
225
+ return out;
226
+ }
227
+ /**
228
+ * Combine multiple `SymbolIndex` instances (typically from different
229
+ * canonical references for the same task) into a single deduped index
230
+ * preserving precedence.
231
+ */
232
+ export function mergeSymbolIndexes(indexes) {
233
+ const buckets = {
234
+ typeDefHits: new Map(),
235
+ headingHits: new Map(),
236
+ inlineHits: new Map(),
237
+ codeBlockHits: new Map(),
238
+ };
239
+ for (const index of indexes) {
240
+ for (const entry of index.symbols) {
241
+ const bucket = bucketForKind(entry.provenance.kind, buckets);
242
+ addIfFirst(bucket, entry.symbol, entry.provenance);
243
+ }
244
+ }
245
+ return mergeAndSort(buckets);
246
+ }
247
+ function bucketForKind(kind, buckets) {
248
+ if (kind === "type-def")
249
+ return buckets.typeDefHits;
250
+ if (kind === "heading")
251
+ return buckets.headingHits;
252
+ if (kind === "inline-code")
253
+ return buckets.inlineHits;
254
+ return buckets.codeBlockHits;
255
+ }
256
+ const TYPEDOC_KIND = {
257
+ namespace: 2,
258
+ enum: 8,
259
+ variable: 32,
260
+ function: 64,
261
+ class: 128,
262
+ interface: 256,
263
+ type: 2_097_152,
264
+ };
265
+ const TYPEDOC_KIND_TO_LABEL = {
266
+ [TYPEDOC_KIND.enum]: "enum",
267
+ [TYPEDOC_KIND.variable]: "variable",
268
+ [TYPEDOC_KIND.function]: "function",
269
+ [TYPEDOC_KIND.class]: "class",
270
+ [TYPEDOC_KIND.interface]: "interface",
271
+ [TYPEDOC_KIND.type]: "type",
272
+ [TYPEDOC_KIND.namespace]: "namespace",
273
+ };
274
+ function typedocNodeSummary(node) {
275
+ // Direct comment on the declaration, then first signature comment for
276
+ // function declarations (overload 0 is the canonical one).
277
+ const direct = renderTypedocSummary(node.comment?.summary);
278
+ if (direct)
279
+ return direct;
280
+ const sig = node.signatures?.[0];
281
+ return renderTypedocSummary(sig?.comment?.summary);
282
+ }
283
+ function renderTypedocSummary(parts) {
284
+ if (!Array.isArray(parts) || parts.length === 0)
285
+ return null;
286
+ const text = parts.map((p) => p.text ?? "").join("");
287
+ const firstLine = text.split("\n")[0].trim();
288
+ return firstLine.length > 0 ? firstLine : null;
289
+ }
290
+ /**
291
+ * Render a SymbolIndex as a compact markdown reference suitable for
292
+ * injection into a grader's `rubricPrompt` as ground-truth recognition
293
+ * material. Layout intentionally puts headings first so the most
294
+ * authoritative symbols anchor the top of the list.
295
+ */
296
+ export function renderSymbolIndex(index, title) {
297
+ if (index.symbols.length === 0)
298
+ return "";
299
+ const lines = [];
300
+ if (title)
301
+ lines.push(`# Symbols referenced in ${title}`, "");
302
+ else
303
+ lines.push("# Canonical symbols", "");
304
+ lines.push("The following identifiers are referenced in the canonical documentation", "for this task. Each entry is tagged with its source (type-def, heading,", "inline, code) — type-def and heading entries are authoritative names", "from the documented surface. Inline and code entries may include", "field names or local variable names from example usage; use the", "tag to weight your confidence.", "");
305
+ for (const entry of index.symbols) {
306
+ lines.push(formatEntry(entry));
307
+ }
308
+ return lines.join("\n");
309
+ }
310
+ // ---------------------------------------------------------------------------
311
+ // Heading + inline collection (block children)
312
+ // ---------------------------------------------------------------------------
313
+ function collectFromBlock(block, headingHits, inlineHits) {
314
+ const children = block.children ?? [];
315
+ const text = blockPlainText(children);
316
+ const trimmed = text.trim();
317
+ if (!trimmed)
318
+ return;
319
+ const isHeading = block.style ? HEADING_STYLES.has(block.style) : false;
320
+ // Pull every span carrying a `code` decorator mark.
321
+ const inlineCodeSymbols = collectInlineCodeSymbols(children);
322
+ if (isHeading) {
323
+ // Headings legitimize whatever inline code they contain *and* the
324
+ // heading itself names a symbol when the heading text is itself an
325
+ // identifier-shaped token (e.g. "useEditDocument" as an h3 in
326
+ // `sdk-react-hooks`).
327
+ const headingProvenance = {
328
+ kind: "heading",
329
+ snippet: trimmed,
330
+ style: block.style,
331
+ };
332
+ if (looksLikeSymbolIdentifier(trimmed)) {
333
+ addIfFirst(headingHits, trimmed, headingProvenance);
334
+ }
335
+ for (const symbol of inlineCodeSymbols) {
336
+ addIfFirst(headingHits, symbol, headingProvenance);
337
+ }
338
+ }
339
+ else {
340
+ const inlineProvenance = {
341
+ kind: "inline-code",
342
+ snippet: clipSnippet(trimmed),
343
+ };
344
+ for (const symbol of inlineCodeSymbols) {
345
+ addIfFirst(inlineHits, symbol, inlineProvenance);
346
+ }
347
+ }
348
+ }
349
+ function blockPlainText(children) {
350
+ return children
351
+ .filter((c) => c && c._type === "span")
352
+ .map((c) => c.text ?? "")
353
+ .join("");
354
+ }
355
+ function collectInlineCodeSymbols(children) {
356
+ const out = [];
357
+ for (const span of children) {
358
+ if (!span || span._type !== "span")
359
+ continue;
360
+ const marks = span.marks ?? [];
361
+ if (!marks.includes("code"))
362
+ continue;
363
+ const text = (span.text ?? "").trim();
364
+ if (!text)
365
+ continue;
366
+ // Inline code spans are usually a single identifier or a small
367
+ // expression like `client.create()` or `useFoo().bar`. We emit only
368
+ // the *rightmost* identifier — that's the operation/method/field
369
+ // being demonstrated, the part the doc is actually legitimizing.
370
+ // The leftmost identifier in a member expression is typically a
371
+ // local variable name from example code (`client`, `result`, `editor`),
372
+ // not a documented export, so it's noise at the grader's recognition
373
+ // surface.
374
+ const rightmost = rightmostIdentifier(text);
375
+ if (rightmost)
376
+ out.push(rightmost);
377
+ }
378
+ return out;
379
+ }
380
+ /**
381
+ * Pull the rightmost identifier-token from a small inline-code span.
382
+ *
383
+ * `client.create()` → `create`
384
+ * `useFoo().bar` → `bar`
385
+ * `useEditDocument` → `useEditDocument`
386
+ * `Array<string>` → `string` (acceptable: the doc is referencing it
387
+ * even if as a type parameter)
388
+ */
389
+ function rightmostIdentifier(text) {
390
+ const matches = tokenizeIdentifiers(text);
391
+ if (matches.length === 0)
392
+ return null;
393
+ return matches[matches.length - 1];
394
+ }
395
+ // ---------------------------------------------------------------------------
396
+ // codeBlock body lexing
397
+ // ---------------------------------------------------------------------------
398
+ function collectFromCodeBlock(cb, hits) {
399
+ // Anchor extraction on `import` statements only: those are the symbols
400
+ // the doc authoritatively *legitimizes* (this is the surface the doc
401
+ // claims you can reach for from the named package). Body identifiers
402
+ // — `const foo = ...`, ad-hoc variable names, prop names — are demos,
403
+ // not legitimization, so they are intentionally dropped here. If a
404
+ // doc only legitimizes a symbol via prose-with-backticks, the inline
405
+ // tier picks it up. If it only appears in non-import code, the doc is
406
+ // not being explicit about it and we should not put words in the
407
+ // doc's mouth.
408
+ const inner = cb.blocks ?? [];
409
+ for (const tab of inner) {
410
+ const code = tab.code?.code ?? "";
411
+ if (!code)
412
+ continue;
413
+ // Strip comments so commented-out imports don't legitimize symbols.
414
+ // Strings are intentionally left intact: the import regex anchors on
415
+ // the source-package quoted string (`from "..."` / `from '...'`) and
416
+ // would not match if the quotes were already replaced.
417
+ const decommented = stripComments(code);
418
+ const filename = tab.filename ?? tab.code?.filename;
419
+ const language = tab.code?.language;
420
+ for (const importMatch of extractImportSymbols(decommented)) {
421
+ const provenance = {
422
+ kind: "code-block",
423
+ snippet: clipSnippet(importMatch.snippet),
424
+ ...(filename ? { filename } : {}),
425
+ ...(language ? { language } : {}),
426
+ };
427
+ for (const symbol of importMatch.symbols) {
428
+ if (JS_KEYWORDS.has(symbol))
429
+ continue;
430
+ addIfFirst(hits, symbol, provenance);
431
+ }
432
+ }
433
+ }
434
+ }
435
+ /**
436
+ * Extract symbols from JS/TS `import` and re-`export` statements.
437
+ *
438
+ * Handles:
439
+ * import foo from "pkg" → ["foo"]
440
+ * import * as foo from "pkg" → ["foo"]
441
+ * import { a, b as c } from "pkg" → ["a", "c"]
442
+ * import { type T, fn } from "pkg" → ["T", "fn"]
443
+ * import foo, { a } from "pkg" → ["foo", "a"]
444
+ * export { a, b as c } from "pkg" → ["a", "c"]
445
+ * export * as ns from "pkg" → ["ns"]
446
+ * export * from "pkg" → [] (no named bindings, skipped)
447
+ *
448
+ * Re-exports are common in barrel files and contribute as much
449
+ * legitimization as imports — both name the same export surface.
450
+ *
451
+ * The snippet returned is the full single-line declaration so the grader
452
+ * can see the source package alongside the symbol.
453
+ */
454
+ function extractImportSymbols(code) {
455
+ const matches = [];
456
+ // Match both `import` and `export ... from`. `export` without `from` is
457
+ // a local declaration (e.g. `export const x = ...`) — those don't
458
+ // legitimize from a named source so we require the `from` clause.
459
+ const stmtRe = /\b(import|export)\s+([\s\S]+?)\s+from\s+["']([^"']+)["']/g;
460
+ let m;
461
+ while ((m = stmtRe.exec(code)) !== null) {
462
+ const keyword = m[1];
463
+ const clause = m[2];
464
+ const source = m[3];
465
+ const symbols = parseImportClause(clause);
466
+ if (symbols.length === 0)
467
+ continue;
468
+ matches.push({
469
+ symbols,
470
+ snippet: `${keyword} ${flattenWhitespace(clause)} from "${source}"`,
471
+ });
472
+ }
473
+ return matches;
474
+ }
475
+ function parseImportClause(clause) {
476
+ const out = [];
477
+ // Strip braces for named-imports section, but remember its content.
478
+ // Three permitted shapes after `import `:
479
+ // <default>
480
+ // * as <ns>
481
+ // { ... }
482
+ // <default>, { ... }
483
+ // <default>, * as <ns>
484
+ const namedMatch = clause.match(/\{([^}]*)\}/);
485
+ const beforeBraces = namedMatch ? clause.slice(0, namedMatch.index) : clause;
486
+ for (const piece of beforeBraces.split(",")) {
487
+ const trimmed = piece.trim();
488
+ if (!trimmed)
489
+ continue;
490
+ const nsMatch = trimmed.match(/^\*\s+as\s+([A-Za-z_$][A-Za-z0-9_$]*)$/);
491
+ if (nsMatch) {
492
+ out.push(nsMatch[1]);
493
+ continue;
494
+ }
495
+ if (looksLikeIdentifier(trimmed)) {
496
+ out.push(trimmed);
497
+ }
498
+ }
499
+ if (namedMatch) {
500
+ for (const item of namedMatch[1].split(",")) {
501
+ const piece = item.trim().replace(/^type\s+/, "");
502
+ if (!piece)
503
+ continue;
504
+ // `a as b` → emit b (the local-binding name), since that's the name
505
+ // a candidate would actually use.
506
+ const aliasMatch = piece.match(/^([A-Za-z_$][A-Za-z0-9_$]*)\s+as\s+([A-Za-z_$][A-Za-z0-9_$]*)$/);
507
+ if (aliasMatch) {
508
+ out.push(aliasMatch[2]);
509
+ continue;
510
+ }
511
+ if (looksLikeIdentifier(piece)) {
512
+ out.push(piece);
513
+ }
514
+ }
515
+ }
516
+ return out;
517
+ }
518
+ function flattenWhitespace(s) {
519
+ return s.replace(/\s+/g, " ").trim();
520
+ }
521
+ /**
522
+ * Strip line + block comments. Strings are left intact because the
523
+ * import regex anchors on the source-package quoted string and would
524
+ * fail to match if the quotes were stripped.
525
+ */
526
+ function stripComments(code) {
527
+ let out = code.replace(/\/\*[\s\S]*?\*\//g, " ");
528
+ out = out.replace(/\/\/[^\n]*/g, " ");
529
+ return out;
530
+ }
531
+ function tokenizeIdentifiers(text) {
532
+ const matches = text.match(/[A-Za-z_$][A-Za-z0-9_$]*/g);
533
+ if (!matches)
534
+ return [];
535
+ return matches;
536
+ }
537
+ function looksLikeIdentifier(text) {
538
+ return /^[A-Za-z_$][A-Za-z0-9_$]*$/.test(text);
539
+ }
540
+ /**
541
+ * Stricter than `looksLikeIdentifier`: the text must not just *be* an
542
+ * identifier, it must be an identifier that could plausibly name a code
543
+ * symbol. A bare title-cased word like "Summary" or "Prerequisites"
544
+ * passes the lexical identifier test but is almost certainly a section
545
+ * label, not a symbol — including it as a heading-tier "symbol" only
546
+ * adds noise to the grader's recognition surface.
547
+ *
548
+ * Heuristic, accepts text that is a lexical identifier AND meets one of:
549
+ * - contains camelCase (`useEditDocument`, `S3Client`)
550
+ * - contains underscore, digit, or `$` (`URL_PATTERN`, `_ailfPrivate`)
551
+ * - starts with two consecutive uppercase letters — admits all-caps
552
+ * acronyms and acronym-prefixed PascalCase (`URL`, `API`, `JSON`,
553
+ * `RegExp`, `HTMLElement`)
554
+ *
555
+ * Rejects bare common-noun cases like `Summary`, `Prerequisites`,
556
+ * `Actions`, `Array`, `Promise`, `Date` — those are almost always
557
+ * section labels or built-in types the grader's prior already handles.
558
+ */
559
+ function looksLikeSymbolIdentifier(text) {
560
+ if (!looksLikeIdentifier(text))
561
+ return false;
562
+ return /[a-z][A-Z]|[_$0-9]|^[A-Z]{2,}/.test(text);
563
+ }
564
+ // ---------------------------------------------------------------------------
565
+ // Helpers
566
+ // ---------------------------------------------------------------------------
567
+ const SNIPPET_MAX = 160;
568
+ function clipSnippet(text) {
569
+ if (text.length <= SNIPPET_MAX)
570
+ return text;
571
+ return text.slice(0, SNIPPET_MAX - 1).trimEnd() + "…";
572
+ }
573
+ function addIfFirst(bucket, symbol, provenance) {
574
+ if (JS_KEYWORDS.has(symbol))
575
+ return;
576
+ if (!bucket.has(symbol))
577
+ bucket.set(symbol, provenance);
578
+ }
579
+ function precedenceRank(kind) {
580
+ if (kind === "type-def")
581
+ return 0;
582
+ if (kind === "heading")
583
+ return 1;
584
+ if (kind === "inline-code")
585
+ return 2;
586
+ return 3;
587
+ }
588
+ function compareEntries(a, b) {
589
+ const rankDiff = precedenceRank(a.provenance.kind) - precedenceRank(b.provenance.kind);
590
+ if (rankDiff !== 0)
591
+ return rankDiff;
592
+ return a.symbol.localeCompare(b.symbol);
593
+ }
594
+ function formatEntry(entry) {
595
+ const { symbol, provenance } = entry;
596
+ const tag = provenanceTag(provenance);
597
+ return `- \`${symbol}\` — ${tag}: ${provenance.snippet}`;
598
+ }
599
+ function provenanceTag(provenance) {
600
+ if (provenance.kind === "type-def") {
601
+ const declKind = provenance.declarationKind ?? "type";
602
+ return provenance.package ? `${declKind} (${provenance.package})` : declKind;
603
+ }
604
+ if (provenance.kind === "heading") {
605
+ return provenance.style ? provenance.style.toUpperCase() : "heading";
606
+ }
607
+ if (provenance.kind === "inline-code")
608
+ return "inline";
609
+ // code-block
610
+ if (provenance.filename)
611
+ return `code (${provenance.filename})`;
612
+ if (provenance.language)
613
+ return `code (${provenance.language})`;
614
+ return "code";
615
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sanity/ailf",
3
- "version": "4.2.0",
3
+ "version": "4.3.1",
4
4
  "private": false,
5
5
  "publishConfig": {
6
6
  "access": "public"
@@ -43,6 +43,7 @@
43
43
  "dotenv-cli": "^11.0.0",
44
44
  "jiti": "^2.6.1",
45
45
  "js-yaml": "^4.1.0",
46
+ "oxc-parser": "^0.129.0",
46
47
  "promptfoo": "^0.120.24",
47
48
  "zod": "^4.3.6"
48
49
  },