@adia-ai/a2ui-mcp 0.4.1 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -11,6 +11,26 @@ zettel strategies.
11
11
 
12
12
  _No pending changes._
13
13
 
14
+ ## [0.4.3] - 2026-05-11
15
+
16
+ ### Ride-along (no source changes)
17
+
18
+ Lockstep PATCH cut alongside `@adia-ai/web-components@0.4.3` (input-ui type=number locale + thousands grouping + hold-to-repeat) + `@adia-ai/a2ui-compose@0.4.3` + `@adia-ai/a2ui-retrieval@0.4.3` (process.env browser-compat fix) + `@adia-ai/a2ui-corpus@0.4.3` (catalog regen + chunks re-harvest with new settings-appearance pattern). Source byte-identical to v0.4.2.
19
+
20
+ Internal `@adia-ai/*` dep ranges stay at `^0.4.0` (patch-cut asymmetry — `^0.4.0` covers `0.4.x` under semver). See root [CHANGELOG.md `## [0.4.3]`](../../../CHANGELOG.md) for the cut narrative.
21
+
22
+ ## [0.4.2] - 2026-05-11
23
+
24
+ ### Added
25
+
26
+ - `scripts/shadow-compare-classify.mjs` — Phase 3 shadow-compare classifier tool (139 lines). Compares the deterministic `classifyIntent` + `scoreAgainstSpec` (from `@adia-ai/a2ui-validator` Phase 3 foundation, shipped in v0.4.1) against the LLM judge baseline. Used to identify IntentSpecs eligible for fast-path promotion (`≥ 95%` agreement threshold per spec § 3c). First report at `docs/reports/semantic-shadow-compare-2026-05-10.md` flagged `auth.signin` + `auth.signup` as eligible. Surfaces alongside `npm run semantic:shadow-compare`.
27
+
28
+ ### Ride-along (otherwise no source changes)
29
+
30
+ Lockstep PATCH cut alongside `@adia-ai/web-components@0.4.2` (`<input-ui type="number">` rewrite drops native `<input type=number>` wrapping) + `@adia-ai/web-modules@0.4.2` (`<editor-sidebar>` grid-track width-mirror fix + new `<theme-panel>` module). Apart from the script above, source byte-identical to v0.4.1.
31
+
32
+ Internal `@adia-ai/*` dep ranges stay at `^0.4.0` (patch-cut asymmetry — `^0.4.0` covers `0.4.x` under semver). See root [CHANGELOG.md `## [0.4.2]`](../../../CHANGELOG.md) for the cut narrative.
33
+
14
34
  ## [0.4.1] - 2026-05-10
15
35
 
16
36
  ### Ride-along (no source changes)
package/README.md CHANGED
@@ -11,12 +11,22 @@ other MCP-speaking host.
11
11
  > [`@adia-ai/a2ui-runtime`](../runtime); corpus in
12
12
  > [`@adia-ai/a2ui-corpus`](../corpus).
13
13
 
14
+ ## Install
15
+
16
+ ```bash
17
+ npm install -g @adia-ai/a2ui-mcp # global — exposes the `adiaui-mcp` bin
18
+ # OR
19
+ npm install @adia-ai/a2ui-mcp # local — invoke via npx
20
+ ```
21
+
22
+ The package ships an `adiaui-mcp` executable + a `server.js` entry point. Most MCP hosts (Claude Desktop, Claude Code) invoke the binary directly via `command` + `args` in their MCP config.
23
+
14
24
  ## Quick start
15
25
 
16
26
  ```bash
17
- node packages/a2ui/mcp/server.js
18
- # or, if linked:
19
- adiaui-mcp
27
+ adiaui-mcp # if globally installed
28
+ npx @adia-ai/a2ui-mcp # via npx
29
+ node packages/a2ui/mcp/server.js # from a local checkout
20
30
  ```
21
31
 
22
32
  Register with Claude Code (`.claude/settings.json`):
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@adia-ai/a2ui-mcp",
3
- "version": "0.4.1",
3
+ "version": "0.4.3",
4
4
  "description": "AdiaUI A2UI MCP server. Exposes the compose engine over MCP with an engine selector for monolithic + zettel strategies.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -0,0 +1,139 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * shadow-compare-classify.mjs — Phase 3 shadow-compare tooling
4
+ *
5
+ * For each row in an eval-diff run JSON that has semanticVerdict (LLM judge):
6
+ * 1. Run classifyIntent() from the rule classifier on the row's intent.
7
+ * 2. Bucket the LLM verdict + rule verdict + intent-spec match.
8
+ * 3. Report agreement rate per spec + overall.
9
+ *
10
+ * Goal: identify which IntentSpecs the rule classifier agrees with the LLM
11
+ * judge on ≥ 95% of the time. Those graduate to fast-path per
12
+ * docs/specs/semantic-validator.md § Phase 3 exit criteria.
13
+ *
14
+ * Note: the rule SCORER (scoreAgainstSpec) needs the emitted A2UI tree,
15
+ * which is not stored in eval-diff archives. This tool measures only
16
+ * **intent classification agreement** — does the regex hit the intent
17
+ * class, and is that class consistent with the LLM's verdict on the row?
18
+ *
19
+ * Future: when eval-diff starts archiving emitted_a2ui, extend this script
20
+ * to score the tree against the spec + compare full verdicts.
21
+ *
22
+ * Usage:
23
+ * node packages/a2ui/mcp/scripts/shadow-compare-classify.mjs <run.json>
24
+ *
25
+ * # Or multiple runs aggregated:
26
+ * node packages/a2ui/mcp/scripts/shadow-compare-classify.mjs \\
27
+ * evals/mcp/runs/2026-04-19T-mcp.json evals/mcp/runs/2026-05-10T-mcp.json
28
+ *
29
+ * Output: markdown report to stdout — agreement table per spec + overall.
30
+ * Exit 0 always (this is read-only diagnostics).
31
+ */
32
+ import { readFile } from 'node:fs/promises';
33
+ import { classifyIntent } from '../../validator/semantic/classify-intent.js';
34
+
35
+ const args = process.argv.slice(2);
36
+ if (args.length === 0) {
37
+ console.error('Usage: shadow-compare-classify.mjs <run.json> [<run2.json> ...]');
38
+ process.exit(2);
39
+ }
40
+
41
+ /** Aggregator: { specId → { agreed, disagreed, unmatched, total } } */
42
+ const stats = new Map();
43
+ let totalRows = 0;
44
+ let rowsWithSemantic = 0;
45
+ let rowsWithIntentMatch = 0;
46
+
47
+ function ensureBucket(id) {
48
+ if (!stats.has(id)) {
49
+ stats.set(id, { agreed: 0, disagreed: 0, partial: 0, total: 0, examples: [] });
50
+ }
51
+ return stats.get(id);
52
+ }
53
+
54
+ for (const path of args) {
55
+ const raw = await readFile(path, 'utf8');
56
+ const data = JSON.parse(raw);
57
+ if (!data.results) continue;
58
+
59
+ for (const row of data.results) {
60
+ totalRows += 1;
61
+ if (!row.semanticVerdict) continue; // no LLM verdict to compare
62
+ rowsWithSemantic += 1;
63
+
64
+ const { spec, confidence } = classifyIntent(row.intent);
65
+ if (!spec || confidence < 0.9) {
66
+ // Rule classifier didn't recognize the intent — count under "unmatched"
67
+ const b = ensureBucket('__UNMATCHED__');
68
+ b.total += 1;
69
+ if (row.semanticVerdict === 'aligned') b.agreed += 1;
70
+ else if (row.semanticVerdict === 'misaligned') b.disagreed += 1;
71
+ else b.partial += 1;
72
+ continue;
73
+ }
74
+
75
+ rowsWithIntentMatch += 1;
76
+ const b = ensureBucket(spec.id);
77
+ b.total += 1;
78
+
79
+ // Agreement heuristic:
80
+ // - If the rule classifier recognized the intent class, the rule
81
+ // "votes" pass (the intent is a known shape with known structural
82
+ // requirements). Agreement with LLM = LLM verdict is "aligned".
83
+ // - Disagreement = LLM verdict is "misaligned".
84
+ // - "partial" is neither — counted separately.
85
+ if (row.semanticVerdict === 'aligned') b.agreed += 1;
86
+ else if (row.semanticVerdict === 'misaligned') {
87
+ b.disagreed += 1;
88
+ if (b.examples.length < 3) {
89
+ b.examples.push({ id: row.id, intent: row.intent.slice(0, 80), rationale: (row.semanticRationale || '').slice(0, 200) });
90
+ }
91
+ } else b.partial += 1;
92
+ }
93
+ }
94
+
95
+ // ── Render report ───────────────────────────────────────────────────
96
+ console.log(`# Semantic-validator Phase 3 — shadow-compare report\n`);
97
+ console.log(`Compared **${args.length}** eval run(s); ${rowsWithSemantic}/${totalRows} rows had semanticVerdict; ${rowsWithIntentMatch} rows matched a known IntentSpec.\n`);
98
+
99
+ const overallAgreed = [...stats.values()].reduce((a, b) => a + b.agreed, 0);
100
+ const overallTotal = [...stats.values()].reduce((a, b) => a + b.total, 0);
101
+ const overallPct = overallTotal > 0 ? Math.round((overallAgreed / overallTotal) * 100) : 0;
102
+ console.log(`**Overall classifier agreement** (matched + unmatched): ${overallAgreed}/${overallTotal} = **${overallPct}%**\n`);
103
+
104
+ console.log(`## Per-spec agreement\n`);
105
+ console.log(`| spec | agreed | disagreed | partial | total | agreement % | promotion eligible (≥95%)? |`);
106
+ console.log(`|---|---:|---:|---:|---:|---:|:---:|`);
107
+
108
+ const sorted = [...stats.entries()].sort((a, b) => {
109
+ // Unmatched last; otherwise descending by total
110
+ if (a[0] === '__UNMATCHED__') return 1;
111
+ if (b[0] === '__UNMATCHED__') return -1;
112
+ return b[1].total - a[1].total;
113
+ });
114
+
115
+ for (const [id, b] of sorted) {
116
+ const pct = b.total > 0 ? Math.round((b.agreed / b.total) * 100) : 0;
117
+ const eligible = id !== '__UNMATCHED__' && b.total >= 5 && pct >= 95 ? '✅ promote' : (b.total < 5 ? '— insufficient data' : '—');
118
+ console.log(`| \`${id === '__UNMATCHED__' ? '(no spec match)' : id}\` | ${b.agreed} | ${b.disagreed} | ${b.partial} | ${b.total} | ${pct}% | ${eligible} |`);
119
+ }
120
+
121
+ // ── Disagreement samples ────────────────────────────────────────────
122
+ const withDisagreements = sorted.filter(([id, b]) => id !== '__UNMATCHED__' && b.examples.length > 0);
123
+ if (withDisagreements.length > 0) {
124
+ console.log(`\n## Disagreement samples\n`);
125
+ console.log(`Rows where the rule classifier matched a known IntentSpec but the LLM judged the row 'misaligned'. Investigate before promoting these specs.\n`);
126
+ for (const [id, b] of withDisagreements) {
127
+ console.log(`### ${id}\n`);
128
+ for (const ex of b.examples) {
129
+ console.log(`- **${ex.id}** \`"${ex.intent}"\``);
130
+ if (ex.rationale) console.log(` - LLM rationale: ${ex.rationale}`);
131
+ }
132
+ console.log();
133
+ }
134
+ }
135
+
136
+ console.log(`\n---\n`);
137
+ console.log(`*Generated by \`shadow-compare-classify.mjs\` from \`packages/a2ui/validator/semantic/classify-intent.js\`. See [docs/specs/semantic-validator.md § Phase 3](../../../docs/specs/semantic-validator.md) for exit criteria.*`);
138
+
139
+ process.exit(0);