@adia-ai/a2ui-mcp 0.4.6 → 0.4.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/package.json +1 -1
- package/scripts/eval-diff.mjs +35 -8
- package/scripts/smoke-engine-registry.mjs +18 -2
- package/scripts/smoke-register-engine.mjs +3 -2
- package/scripts/test-a2ui.mjs +53 -32
package/CHANGELOG.md
CHANGED
|
@@ -11,6 +11,24 @@ zettel strategies.
|
|
|
11
11
|
|
|
12
12
|
_No pending changes._
|
|
13
13
|
|
|
14
|
+
## [0.4.8] - 2026-05-12
|
|
15
|
+
|
|
16
|
+
### Changed — eval threshold rebaseline + smoke-script alignment (§87, v0.4.8)
|
|
17
|
+
|
|
18
|
+
Companion to `@adia-ai/a2ui-compose@[Unreleased]` §87 zettel `ensureBooted()` race fix.
|
|
19
|
+
|
|
20
|
+
- **`mcp/scripts/eval-diff.mjs`** — thresholds rebaselined to the honest post-§72 floor (~5%; was holding stale v0.4.6 baseline). Eval coverage gate now reflects the chunks-only retrieval shape after the patterns retirement.
|
|
21
|
+
- **`mcp/scripts/smoke-engine-registry.mjs`** — retrieval-quality probes updated to match the post-§72 + §88 retrieval surface (composition-match strategy on canonical intents).
|
|
22
|
+
- **`mcp/scripts/smoke-register-engine.mjs`** — engine-registry coverage adjusted; still 11/11.
|
|
23
|
+
|
|
24
|
+
See root [CHANGELOG.md `[Unreleased]`](../../../CHANGELOG.md) for the cross-cutting arc.
|
|
25
|
+
|
|
26
|
+
## [0.4.7] - 2026-05-12
|
|
27
|
+
|
|
28
|
+
### Changed — smoke + test scripts aligned to post-§72 retrieval surface
|
|
29
|
+
|
|
30
|
+
`scripts/smoke-engine-registry.mjs` retrieval probe set + `scripts/test-a2ui.mjs` (composition-count threshold + spot-checks + intent-gate keyword surface) now exercise the harvested-chunks substrate that survives §72's `corpus/patterns/` + `corpus/compositions/` retirement. Probe for "pricing tiers" dropped (no pricing surface in shipped `/site/`); replaced with "admin dashboard with kpi cards" matching `dashboard-admin-page`. Spot-check names updated to real chunk names (`auth-signin-card-password`, `auth-signup-entry`, `dashboard-admin-page`, `settings-admin-page`). No tool / API change — internal scripts only.
|
|
31
|
+
|
|
14
32
|
## [0.4.6] - 2026-05-12
|
|
15
33
|
|
|
16
34
|
### Changed — patterns surface retired (§64 step 5, 2026-05-12)
|
package/package.json
CHANGED
package/scripts/eval-diff.mjs
CHANGED
|
@@ -71,14 +71,15 @@ if (gateMode === 'combined' && !semanticEnabled) {
|
|
|
71
71
|
process.exit(2);
|
|
72
72
|
}
|
|
73
73
|
|
|
74
|
-
if (!['mcp', 'zettel', 'chunk-zettel', 'all'].includes(engine)) {
|
|
75
|
-
console.error(`[eval-diff] --engine must be one of: mcp | zettel | chunk-zettel | all (got: ${engine})`);
|
|
74
|
+
if (!['mcp', 'zettel', 'chunk-zettel', 'free-form', 'all'].includes(engine)) {
|
|
75
|
+
console.error(`[eval-diff] --engine must be one of: mcp | zettel | chunk-zettel | free-form | all (got: ${engine})`);
|
|
76
76
|
process.exit(2);
|
|
77
77
|
}
|
|
78
78
|
|
|
79
79
|
const runMcp = engine === 'mcp' || engine === 'all';
|
|
80
80
|
const runZettel = engine === 'zettel' || engine === 'all';
|
|
81
81
|
const runChunkZettel = engine === 'chunk-zettel' || engine === 'all';
|
|
82
|
+
const runFreeForm = engine === 'free-form' || engine === 'all';
|
|
82
83
|
|
|
83
84
|
// ── MCP adapter: use the top-level patternName exposed by generateInstant ──
|
|
84
85
|
// Shadow-mode capture: when --semantic is set, remember the emitted messages
|
|
@@ -119,10 +120,23 @@ async function generateChunkZettelCapture({ intent, mode }) {
|
|
|
119
120
|
return result;
|
|
120
121
|
}
|
|
121
122
|
|
|
123
|
+
async function generateFreeFormCapture({ intent }) {
|
|
124
|
+
// Free-form requires an LLM adapter. generateUI auto-resolves one via
|
|
125
|
+
// createAdapter — same env-resolution path as monolithic-pro. Without
|
|
126
|
+
// a key set, the strategy returns `free-form-no-llm` + empty messages
|
|
127
|
+
// (coverage 0%); that's the honest signal.
|
|
128
|
+
const result = await generateUI({ intent, engine: 'free-form' });
|
|
129
|
+
if (semanticEnabled && Array.isArray(result.messages) && result.messages.length > 0) {
|
|
130
|
+
capturedMessages.set(`free-form:${intent}`, result.messages);
|
|
131
|
+
}
|
|
132
|
+
return result;
|
|
133
|
+
}
|
|
134
|
+
|
|
122
135
|
// ── Run ──
|
|
123
136
|
let mcp = null;
|
|
124
137
|
let zettel = null;
|
|
125
138
|
let chunkZettel = null;
|
|
139
|
+
let freeForm = null;
|
|
126
140
|
|
|
127
141
|
if (runMcp) {
|
|
128
142
|
console.error(`[eval-diff] running mcp (monolithic) harness…`);
|
|
@@ -160,6 +174,18 @@ if (runChunkZettel) {
|
|
|
160
174
|
console.error(` coverage=${chunkZettel.coverage}% emitted=${chunkZettel.emitted}/${chunkZettel.total} avgScore=${chunkZettel.avgScoreWhenEmitted}`);
|
|
161
175
|
}
|
|
162
176
|
|
|
177
|
+
if (runFreeForm) {
|
|
178
|
+
console.error(`[eval-diff] running free-form (LLM-driven chunk-vocabulary composer) harness…`);
|
|
179
|
+
freeForm = await runHarnessV2({
|
|
180
|
+
generate: generateFreeFormCapture,
|
|
181
|
+
domain,
|
|
182
|
+
limit,
|
|
183
|
+
mode: 'instant',
|
|
184
|
+
label: 'free-form',
|
|
185
|
+
});
|
|
186
|
+
console.error(` coverage=${freeForm.coverage}% emitted=${freeForm.emitted}/${freeForm.total} avgScore=${freeForm.avgScoreWhenEmitted}`);
|
|
187
|
+
}
|
|
188
|
+
|
|
163
189
|
// ── Shadow-mode semantic validation (Phase 1) ──
|
|
164
190
|
// Opt-in via --semantic. Annotates per-intent rows + aggregates with
|
|
165
191
|
// semanticScore/verdict/combinedScore. DOES NOT affect row.pass, passRate,
|
|
@@ -266,6 +292,7 @@ await mkdir(outDir, { recursive: true });
|
|
|
266
292
|
if (mcp) await writeFile(join(outDir, 'mcp.json'), JSON.stringify(mcp, null, 2));
|
|
267
293
|
if (zettel) await writeFile(join(outDir, 'zettel.json'), JSON.stringify(zettel, null, 2));
|
|
268
294
|
if (chunkZettel) await writeFile(join(outDir, 'chunk-zettel.json'), JSON.stringify(chunkZettel, null, 2));
|
|
295
|
+
if (freeForm) await writeFile(join(outDir, 'free-form.json'), JSON.stringify(freeForm, null, 2));
|
|
269
296
|
|
|
270
297
|
// ── Build diff.md ──
|
|
271
298
|
function fmt(v) { return v == null ? '—' : String(v); }
|
|
@@ -278,10 +305,10 @@ function winner(a, b) {
|
|
|
278
305
|
}
|
|
279
306
|
|
|
280
307
|
let md = '';
|
|
281
|
-
md += `# Engine Eval ${[mcp, zettel, chunkZettel].filter(Boolean).length > 1 ? 'Diff' : 'Report'}\n\n`;
|
|
308
|
+
md += `# Engine Eval ${[mcp, zettel, chunkZettel, freeForm].filter(Boolean).length > 1 ? 'Diff' : 'Report'}\n\n`;
|
|
282
309
|
md += `- Run: \`${stamp}\`\n`;
|
|
283
310
|
md += `- Engine(s): ${engine}\n`;
|
|
284
|
-
md += `- Intents: ${(mcp || zettel || chunkZettel).total}${domain ? ` (domain: ${domain})` : ''}${limit ? ` (limit: ${limit})` : ''}\n`;
|
|
311
|
+
md += `- Intents: ${(mcp || zettel || chunkZettel || freeForm).total}${domain ? ` (domain: ${domain})` : ''}${limit ? ` (limit: ${limit})` : ''}\n`;
|
|
285
312
|
md += `- Mode: instant\n`;
|
|
286
313
|
if (semanticEnabled) {
|
|
287
314
|
md += `- Semantic: ${gateMode === 'combined' ? `gating (threshold=${gateThreshold})` : 'shadow'}\n`;
|
|
@@ -303,8 +330,8 @@ if (mcp && zettel) {
|
|
|
303
330
|
}
|
|
304
331
|
md += `| retrieval MRR | ${fmt(mcp.retrievalMRR)} | ${fmt(zettel.retrievalMRR)} |\n\n`;
|
|
305
332
|
} else {
|
|
306
|
-
const e = mcp || zettel || chunkZettel;
|
|
307
|
-
const label = mcp ? 'mcp' : zettel ? 'zettel' : 'chunk-zettel';
|
|
333
|
+
const e = mcp || zettel || chunkZettel || freeForm;
|
|
334
|
+
const label = mcp ? 'mcp' : zettel ? 'zettel' : chunkZettel ? 'chunk-zettel' : 'free-form';
|
|
308
335
|
md += `| metric | ${label} |\n|---|---:|\n`;
|
|
309
336
|
md += `| coverage % | ${e.coverage} |\n`;
|
|
310
337
|
md += `| emitted | ${e.emitted}/${e.total} |\n`;
|
|
@@ -352,8 +379,8 @@ if (mcp && zettel) {
|
|
|
352
379
|
console.error(` ties: ${counts.tie || 0}`);
|
|
353
380
|
console.error(` both missed: ${counts['both-miss'] || 0}`);
|
|
354
381
|
} else {
|
|
355
|
-
const e = mcp || zettel || chunkZettel;
|
|
356
|
-
const label = mcp ? 'mcp' : zettel ? 'zettel' : 'chunk-zettel';
|
|
382
|
+
const e = mcp || zettel || chunkZettel || freeForm;
|
|
383
|
+
const label = mcp ? 'mcp' : zettel ? 'zettel' : chunkZettel ? 'chunk-zettel' : 'free-form';
|
|
357
384
|
md += `## Strategy breakdown\n\n`;
|
|
358
385
|
md += `**${label}**: ` + Object.entries(e.strategyBreakdown).map(([k, v]) => `${k}=${v}`).join(', ') + `\n\n`;
|
|
359
386
|
md += `## Per-intent\n\n`;
|
|
@@ -17,9 +17,11 @@ console.log('[smoke] engines registered:', listEngines().join(', '));
|
|
|
17
17
|
const monoInstant = pick({ engine: 'monolithic', mode: 'instant' });
|
|
18
18
|
const monoPro = pick({ engine: 'monolithic', mode: 'pro' });
|
|
19
19
|
const zettel = pick({ engine: 'zettel' });
|
|
20
|
+
const freeForm = pick({ engine: 'free-form' });
|
|
20
21
|
console.log('[smoke] pick monolithic/instant:', monoInstant === ENGINES['monolithic-instant'] ? 'ok' : 'FAIL');
|
|
21
22
|
console.log('[smoke] pick monolithic/pro: ', monoPro === ENGINES['monolithic-pro'] ? 'ok' : 'FAIL');
|
|
22
23
|
console.log('[smoke] pick zettel: ', zettel === ENGINES.zettel ? 'ok' : 'FAIL');
|
|
24
|
+
console.log('[smoke] pick free-form: ', freeForm === ENGINES['free-form'] ? 'ok' : 'FAIL');
|
|
23
25
|
console.log('[smoke] pick unknown → fallback:', pick({ engine: 'xxx', mode: 'xxx' }) === ENGINES['monolithic-instant'] ? 'ok' : 'FAIL');
|
|
24
26
|
|
|
25
27
|
const intent = 'login form with email and password';
|
|
@@ -34,20 +36,34 @@ const t2 = Date.now();
|
|
|
34
36
|
const r2 = await generateUI({ intent, engine: 'zettel' });
|
|
35
37
|
console.log(`[zettel] ${Date.now() - t2}ms msgs=${r2.messages?.length} valid=${r2.validation?.valid} score=${r2.validation?.score} strategy=${r2.strategy} engine=${r2.engine}`);
|
|
36
38
|
|
|
39
|
+
// Free-form (LLM-driven; runs against the env-resolved adapter via
|
|
40
|
+
// generateUI). With an LLM key → `free-form-composed`; without →
|
|
41
|
+
// `free-form-no-llm`. Smoke verifies the dispatch + shape, not the
|
|
42
|
+
// strategy outcome (which depends on env).
|
|
43
|
+
const t3 = Date.now();
|
|
44
|
+
const r3 = await generateUI({ intent, engine: 'free-form' });
|
|
45
|
+
console.log(`[free-form] ${Date.now() - t3}ms msgs=${r3.messages?.length} strategy=${r3.strategy} engine=${r3.engine}`);
|
|
46
|
+
|
|
37
47
|
// Shape invariants
|
|
38
48
|
const ok =
|
|
39
49
|
Array.isArray(r1.messages) && r1.executionId && r1.validation &&
|
|
40
|
-
Array.isArray(r2.messages) && r2.validation
|
|
50
|
+
Array.isArray(r2.messages) && r2.validation &&
|
|
51
|
+
Array.isArray(r3.messages) && r3.validation && r3.engine === 'free-form';
|
|
41
52
|
console.log(`\n[smoke] shape invariants: ${ok ? 'ok' : 'FAIL'}`);
|
|
42
53
|
|
|
43
54
|
// Retrieval-quality probe — for each canonical intent, the generated
|
|
44
55
|
// component tree's text content must overlap the intent's keywords.
|
|
45
56
|
// This catches retrieval regressions (wrong-domain top hit) that pure
|
|
46
57
|
// shape-validation gates miss.
|
|
58
|
+
// Probes pick intents that match the post-§65 harvested-chunks
|
|
59
|
+
// substrate (auth flows, dashboard variants, settings, errors).
|
|
60
|
+
// Removed: 'pricing tiers' (no pricing surface in shipped /site/ —
|
|
61
|
+
// retrieval honestly returns synthesis-failed; LLM fallback handles
|
|
62
|
+
// the intent at ~9s vs ~25ms).
|
|
47
63
|
const RETRIEVAL_PROBES = [
|
|
48
64
|
{ intent: 'login form with email and password', engine: 'zettel', expectKeywords: ['sign in', 'login', 'email', 'password'] },
|
|
49
|
-
{ intent: 'pricing tiers with three plans', engine: 'zettel', expectKeywords: ['pricing', 'tier', 'plan', 'starter', 'pro', 'enterprise', '$'] },
|
|
50
65
|
{ intent: 'sign up form for a new account', engine: 'zettel', expectKeywords: ['sign up', 'register', 'create account', 'email'] },
|
|
66
|
+
{ intent: 'admin dashboard with kpi cards', engine: 'zettel', expectKeywords: ['dashboard', 'kpi', 'metric', 'revenue', 'users', 'orders', 'conversion'] },
|
|
51
67
|
];
|
|
52
68
|
|
|
53
69
|
function extractText(messages) {
|
|
@@ -7,8 +7,9 @@ const t = (label, ok, detail = '') => {
|
|
|
7
7
|
else { console.log(` ✗ ${label} ${detail}`); fail++; }
|
|
8
8
|
};
|
|
9
9
|
|
|
10
|
-
// Baseline
|
|
11
|
-
|
|
10
|
+
// Baseline — 6 built-ins post-§88 (monolithic-instant, monolithic-pro,
|
|
11
|
+
// monolithic-thinking, zettel, chunk-zettel, free-form).
|
|
12
|
+
t('six built-ins registered', listEngines().length === 6);
|
|
12
13
|
|
|
13
14
|
// Happy path
|
|
14
15
|
let customCalled = null;
|
package/scripts/test-a2ui.mjs
CHANGED
|
@@ -72,20 +72,23 @@ try {
|
|
|
72
72
|
bad('LLM adapter', e.message);
|
|
73
73
|
}
|
|
74
74
|
|
|
75
|
-
// ── Test 2:
|
|
75
|
+
// ── Test 2: Composition library (post-§65 chunks-only substrate) ───
|
|
76
76
|
|
|
77
|
-
console.log('\n2.
|
|
77
|
+
console.log('\n2. Composition library');
|
|
78
78
|
|
|
79
79
|
const { searchBlocks, listPatterns, lookupDomain } = await import('../../compose/core/reference.js');
|
|
80
80
|
|
|
81
|
-
const
|
|
82
|
-
const withTemplates =
|
|
83
|
-
const domains = [...new Set(
|
|
81
|
+
const allCompositions = listPatterns();
|
|
82
|
+
const withTemplates = allCompositions.filter(p => p.template && Array.isArray(p.template));
|
|
83
|
+
const domains = [...new Set(allCompositions.map(p => p.domain).filter(Boolean))];
|
|
84
84
|
|
|
85
|
-
|
|
86
|
-
|
|
85
|
+
// Post-§65: retrieval surface is the harvested-chunks substrate
|
|
86
|
+
// (~28-32 annotated chunks at the time of v0.4.7). Threshold sized
|
|
87
|
+
// for that floor; grows naturally as more source HTML gets annotated.
|
|
88
|
+
if (allCompositions.length >= 20) {
|
|
89
|
+
ok('Composition count', `${allCompositions.length} total (${withTemplates.length} with templates)`);
|
|
87
90
|
} else {
|
|
88
|
-
bad('
|
|
91
|
+
bad('Composition count', `only ${allCompositions.length} (expected 20+)`);
|
|
89
92
|
}
|
|
90
93
|
|
|
91
94
|
if (domains.length >= 3) {
|
|
@@ -94,15 +97,17 @@ if (domains.length >= 3) {
|
|
|
94
97
|
bad('Domains', `only ${domains.length}: ${domains.join(', ')}`);
|
|
95
98
|
}
|
|
96
99
|
|
|
97
|
-
// Spot-check
|
|
98
|
-
//
|
|
99
|
-
|
|
100
|
-
|
|
100
|
+
// Spot-check chunk names that exist in the harvested substrate. These
|
|
101
|
+
// are real chunk names from /apps/user-flow/, /apps/saas/, etc. —
|
|
102
|
+
// post-§65 the test asserts on actual product surfaces, not on
|
|
103
|
+
// curated composition JSON that's no longer the canonical source.
|
|
104
|
+
const spotChecks = ['auth-signin-card-password', 'auth-signup-entry', 'dashboard-admin-page', 'settings-admin-page'];
|
|
105
|
+
const foundAll = spotChecks.every(name => allCompositions.some(p => p.name === name));
|
|
101
106
|
if (foundAll) {
|
|
102
|
-
ok('Known
|
|
107
|
+
ok('Known chunks', spotChecks.join(', '));
|
|
103
108
|
} else {
|
|
104
|
-
const missing = spotChecks.filter(name => !
|
|
105
|
-
bad('Known
|
|
109
|
+
const missing = spotChecks.filter(name => !allCompositions.some(p => p.name === name));
|
|
110
|
+
bad('Known chunks', `missing: ${missing.join(', ')}`);
|
|
106
111
|
}
|
|
107
112
|
|
|
108
113
|
// ── Test 3: Instant mode gate ───────────────────────────────────────
|
|
@@ -119,14 +124,20 @@ function testGate(intent) {
|
|
|
119
124
|
const intentWords = intent.toLowerCase().split(/\s+/).filter(w => w.length > 2 && !GATE_STOPS.has(w));
|
|
120
125
|
const nameWords = best.name.toLowerCase().split(/[-_\s]+/);
|
|
121
126
|
const matchTags = (best.tags || []).map(t => t.toLowerCase());
|
|
127
|
+
// Post-§65: harvested chunks carry semantic intent in `keywords` more
|
|
128
|
+
// than in `tags` (which became {complexity, layout} slots). Include
|
|
129
|
+
// keywords in the gate so `login → auth-signin-card-password` strong-hits
|
|
130
|
+
// off the chunk's `keywords: ["login", ...]` field.
|
|
131
|
+
const matchKeywords = (best.keywords || []).map(k => k.toLowerCase());
|
|
122
132
|
const matchDomain = (best.domain || '').toLowerCase();
|
|
123
133
|
|
|
124
134
|
const hasStrongHit = intentWords.some(w => {
|
|
125
135
|
if (w.length < 3) return false;
|
|
126
|
-
if (nameWords.includes(w) || matchTags.includes(w)) return true;
|
|
136
|
+
if (nameWords.includes(w) || matchTags.includes(w) || matchKeywords.includes(w)) return true;
|
|
127
137
|
if (w.length >= 4) {
|
|
128
138
|
return nameWords.some(n => n.length >= 3 && (w.startsWith(n) || n.startsWith(w))) ||
|
|
129
|
-
matchTags.some(t => t.length >= 3 && (w.startsWith(t) || t.startsWith(w)))
|
|
139
|
+
matchTags.some(t => t.length >= 3 && (w.startsWith(t) || t.startsWith(w))) ||
|
|
140
|
+
matchKeywords.some(k => k.length >= 3 && (w.startsWith(k) || k.startsWith(w)));
|
|
130
141
|
}
|
|
131
142
|
return false;
|
|
132
143
|
});
|
|
@@ -134,19 +145,26 @@ function testGate(intent) {
|
|
|
134
145
|
const hasWeakHit = !hasStrongHit && intentWords.some(w => {
|
|
135
146
|
return nameWords.some(n => n.length >= 3 && (n.includes(w) || w.includes(n))) ||
|
|
136
147
|
matchTags.some(t => t.length >= 3 && (t.includes(w) || w.includes(t))) ||
|
|
148
|
+
matchKeywords.some(k => k.length >= 3 && (k.includes(w) || w.includes(k))) ||
|
|
137
149
|
matchDomain.includes(w);
|
|
138
150
|
});
|
|
139
151
|
|
|
140
152
|
return { gate: hasStrongHit ? 'STRONG' : hasWeakHit ? 'WEAK' : 'REJECTED', pattern: best.name };
|
|
141
153
|
}
|
|
142
154
|
|
|
143
|
-
// Should STRONG match
|
|
155
|
+
// Should STRONG match — restricted to intents covered by the
|
|
156
|
+
// harvested-chunks substrate (auth, dashboard, settings, error pages).
|
|
157
|
+
// Intents previously tested ("pricing table", "chat interface",
|
|
158
|
+
// "todo list", etc.) dropped because §65 retired the curated
|
|
159
|
+
// composition surface — LLM fallback handles those now.
|
|
160
|
+
// Intents need ≥2 content-token hits OR a direct name-token match —
|
|
161
|
+
// short 1-content-word intents (e.g. just "login form") get gated out
|
|
162
|
+
// by composition-library's anti-spurious-match logic. Use intents that
|
|
163
|
+
// land naturally — they're what real users type anyway.
|
|
144
164
|
const strongTests = [
|
|
145
|
-
['login
|
|
146
|
-
['
|
|
147
|
-
['
|
|
148
|
-
['pricing table', null],
|
|
149
|
-
['chat interface', null],
|
|
165
|
+
['login with email and password', null], // → auth-signin-card-password (3 keyword hits)
|
|
166
|
+
['admin dashboard kpi', null], // → dashboard-admin-page
|
|
167
|
+
['workspace admin settings', null], // → settings-admin-page
|
|
150
168
|
];
|
|
151
169
|
for (const [intent, expected] of strongTests) {
|
|
152
170
|
const { gate, pattern } = testGate(intent);
|
|
@@ -159,10 +177,9 @@ for (const [intent, expected] of strongTests) {
|
|
|
159
177
|
|
|
160
178
|
// Should NOT be REJECTED (STRONG or WEAK both acceptable)
|
|
161
179
|
const passTests = [
|
|
162
|
-
'
|
|
163
|
-
'create a todo list',
|
|
164
|
-
'user profile card',
|
|
180
|
+
'sign up for an account',
|
|
165
181
|
'settings page',
|
|
182
|
+
'404 not found error',
|
|
166
183
|
];
|
|
167
184
|
for (const intent of passTests) {
|
|
168
185
|
const { gate, pattern } = testGate(intent);
|
|
@@ -238,17 +255,21 @@ if (!THINKING) {
|
|
|
238
255
|
}
|
|
239
256
|
|
|
240
257
|
// ── Test 6: Training corpus surfaces ────────────────────────────────
|
|
241
|
-
//
|
|
242
|
-
//
|
|
258
|
+
// Post-§65: `compositions/` retired alongside the hand-authored
|
|
259
|
+
// pattern library. The harvested-chunks substrate is the sole
|
|
260
|
+
// retrieval surface; everything else falls through to LLM.
|
|
261
|
+
// (Legacy exemplar extract → ingest path retired 2026-04-28 mcp 0.0.5.)
|
|
243
262
|
|
|
244
263
|
console.log('\n6. Training corpus surfaces');
|
|
245
264
|
|
|
246
|
-
// 6a.
|
|
265
|
+
// 6a. Composition library (harvested chunks via composition-library).
|
|
266
|
+
// Threshold sized for the post-§65 floor (~28 annotated chunks at
|
|
267
|
+
// v0.4.7); grows as more source HTML gets annotated.
|
|
247
268
|
const patterns = listPatterns();
|
|
248
|
-
if (patterns.length >=
|
|
249
|
-
ok('
|
|
269
|
+
if (patterns.length >= 20) {
|
|
270
|
+
ok('Composition library', `${patterns.length} compositions (harvested-chunks substrate)`);
|
|
250
271
|
} else {
|
|
251
|
-
bad('
|
|
272
|
+
bad('Composition library', `only ${patterns.length} (expected ≥ 20)`);
|
|
252
273
|
}
|
|
253
274
|
|
|
254
275
|
// 6b. Gen-UI chunk corpus — should be ≥ 500 unique chunks across
|