@adia-ai/a2ui-validator 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +36 -0
- package/README.md +54 -0
- package/catalog-validator.js +162 -0
- package/index.js +11 -0
- package/package.json +35 -0
- package/semantic/cache.js +54 -0
- package/semantic/index.js +163 -0
- package/semantic/judge.js +180 -0
- package/validator.js +1074 -0
package/CHANGELOG.md
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Changelog — @adia-ai/a2ui-validator
|
|
2
|
+
|
|
3
|
+
Follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and
|
|
4
|
+
[Semantic Versioning](https://semver.org/).
|
|
5
|
+
|
|
6
|
+
## [Unreleased]
|
|
7
|
+
|
|
8
|
+
_Nothing yet._
|
|
9
|
+
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
## [0.0.1] - 2026-04-24
|
|
13
|
+
|
|
14
|
+
First public release. Extracted from
|
|
15
|
+
[`@adia-ai/a2ui-compose`](../compose/) during the 2026-04-24
|
|
16
|
+
consolidation.
|
|
17
|
+
|
|
18
|
+
### Included
|
|
19
|
+
|
|
20
|
+
- **Structural validator** (`validator.js`) — JSON Schema validation
|
|
21
|
+
against the A2UI protocol schema via `ajv` + `ajv-formats`. Returns
|
|
22
|
+
`{ valid, errors }`.
|
|
23
|
+
- **Catalog validator** (`catalog-validator.js`) — semantic checks:
|
|
24
|
+
component-exists, props-match-YAML, references-resolve. Consumed by
|
|
25
|
+
the compose engine's post-generation validator pass and by CI
|
|
26
|
+
drift gates.
|
|
27
|
+
- **Semantic validator** (`semantic/`) — optional LLM-judged rubric
|
|
28
|
+
scoring in shadow mode. Disk-cached by content hash; gated behind
|
|
29
|
+
`--semantic` flag in `eval-diff.mjs`. See
|
|
30
|
+
[`docs/specs/semantic-validator.md`](https://github.com/adiahealth/gen-ui-kit/blob/main/docs/specs/semantic-validator.md).
|
|
31
|
+
|
|
32
|
+
### Dependencies
|
|
33
|
+
|
|
34
|
+
- `@adia-ai/a2ui-utils` — A2UI registry + runtime primitives.
|
|
35
|
+
- `ajv` ^8 — JSON Schema validator.
|
|
36
|
+
- `ajv-formats` ^3 — standard format validators (uri, date, etc.).
|
package/README.md
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# `@adia-ai/a2ui-validator`
|
|
2
|
+
|
|
3
|
+
JSON Schema structural validation + catalog-aware semantic validation
|
|
4
|
+
for A2UI (Agent-to-UI) protocol messages. Extracted from
|
|
5
|
+
[`@adia-ai/a2ui-compose`](../compose/) so non-compose tooling (tests,
|
|
6
|
+
MCP validator tools, CI gates) can depend on validation without
|
|
7
|
+
pulling the full generator graph.
|
|
8
|
+
|
|
9
|
+
## Install
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
npm install @adia-ai/a2ui-validator
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Usage
|
|
16
|
+
|
|
17
|
+
```js
|
|
18
|
+
import { validateSchema } from '@adia-ai/a2ui-validator';
|
|
19
|
+
|
|
20
|
+
const messages = [/* A2UI protocol messages */];
|
|
21
|
+
const result = validateSchema(messages);
|
|
22
|
+
if (!result.valid) {
|
|
23
|
+
console.error(result.errors);
|
|
24
|
+
}
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Catalog-aware validation (component exists + props match YAML):
|
|
28
|
+
|
|
29
|
+
```js
|
|
30
|
+
import { validateAgainstCatalog } from '@adia-ai/a2ui-validator/catalog';
|
|
31
|
+
|
|
32
|
+
const result = validateAgainstCatalog(messages, catalog);
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## What's here
|
|
36
|
+
|
|
37
|
+
- **Structural validator** — JSON Schema validation against the A2UI
|
|
38
|
+
protocol schema (message shape, required fields, enum constraints).
|
|
39
|
+
- **Catalog validator** — semantic checks: does this component exist in
|
|
40
|
+
the catalog? Do its props match the YAML contract? Are references
|
|
41
|
+
resolvable?
|
|
42
|
+
- **Semantic validator** (optional, shadow-mode) — LLM-judged output
|
|
43
|
+
quality against a rubric; cached on disk by content hash.
|
|
44
|
+
|
|
45
|
+
## Runtime
|
|
46
|
+
|
|
47
|
+
- `ajv` + `ajv-formats` for structural validation.
|
|
48
|
+
- `@adia-ai/a2ui-utils` for the registry shape.
|
|
49
|
+
|
|
50
|
+
## Links
|
|
51
|
+
|
|
52
|
+
- Repo: [`adiahealth/gen-ui-kit`](https://github.com/adiahealth/gen-ui-kit)
|
|
53
|
+
- Spec: [`docs/specs/semantic-validator.md`](https://github.com/adiahealth/gen-ui-kit/blob/main/docs/specs/semantic-validator.md)
|
|
54
|
+
- CHANGELOG: [`CHANGELOG.md`](./CHANGELOG.md)
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Catalog-backed structural validator — checks that every emitted A2UI
|
|
3
|
+
* component conforms to its catalog schema (allOf: ComponentCommon +
|
|
4
|
+
* CatalogComponentCommon + component-specific props, with
|
|
5
|
+
* unevaluatedProperties: false so unknown props are rejected).
|
|
6
|
+
*
|
|
7
|
+
* The catalog at a2ui/corpus/catalog-a2ui_0_9.json is a freestanding
|
|
8
|
+
* JSON Schema draft-2020-12 document produced by scripts/build-components.mjs.
|
|
9
|
+
* At first use we compile one validator per component type and cache them,
|
|
10
|
+
* then dispatch per-component by the `component` discriminator.
|
|
11
|
+
*
|
|
12
|
+
* Fast path when the catalog is missing (e.g. browser bundle without assets):
|
|
13
|
+
* return { valid: true, skipped: true } so the scoring validator still runs.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import Ajv2020 from 'ajv/dist/2020.js';
|
|
17
|
+
import addFormats from 'ajv-formats';
|
|
18
|
+
|
|
19
|
+
const IS_NODE = typeof process !== 'undefined' && !!process.versions?.node;
|
|
20
|
+
|
|
21
|
+
let _catalog = null;
|
|
22
|
+
let _ajv = null;
|
|
23
|
+
let _validators = null; // Map<componentName, compiledValidator>
|
|
24
|
+
let _loadPromise = null;
|
|
25
|
+
|
|
26
|
+
async function _loadCatalog() {
|
|
27
|
+
if (_catalog) return _catalog;
|
|
28
|
+
if (_loadPromise) return _loadPromise;
|
|
29
|
+
_loadPromise = (async () => {
|
|
30
|
+
try {
|
|
31
|
+
if (IS_NODE) {
|
|
32
|
+
const fs = await import(/* @vite-ignore */ 'node:fs/promises');
|
|
33
|
+
const path = await import(/* @vite-ignore */ 'node:path');
|
|
34
|
+
const url = await import(/* @vite-ignore */ 'node:url');
|
|
35
|
+
const here = path.dirname(url.fileURLToPath(import.meta.url));
|
|
36
|
+
// a2ui/validator → a2ui/corpus/catalog-a2ui_0_9.json
|
|
37
|
+
const p = path.resolve(here, '../corpus/catalog-a2ui_0_9.json');
|
|
38
|
+
const raw = await fs.readFile(p, 'utf8');
|
|
39
|
+
_catalog = JSON.parse(raw);
|
|
40
|
+
} else {
|
|
41
|
+
// Browser: Vite glob-resolves the JSON at build time.
|
|
42
|
+
const mod = await import('../corpus/catalog-a2ui_0_9.json',
|
|
43
|
+
{ with: { type: 'json' } }).catch(() => null);
|
|
44
|
+
_catalog = mod?.default ?? null;
|
|
45
|
+
}
|
|
46
|
+
} catch {
|
|
47
|
+
_catalog = null; // missing catalog → validator no-ops; caller sees skipped: true
|
|
48
|
+
}
|
|
49
|
+
return _catalog;
|
|
50
|
+
})();
|
|
51
|
+
return _loadPromise;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function _getAjv() {
|
|
55
|
+
if (_ajv) return _ajv;
|
|
56
|
+
_ajv = new Ajv2020({
|
|
57
|
+
strict: false, // our schemas are authored, not hardened; silence warnings
|
|
58
|
+
allErrors: true, // return every violation, not just the first
|
|
59
|
+
verbose: false,
|
|
60
|
+
allowUnionTypes: true,
|
|
61
|
+
validateFormats: false, // formats cost compilation time and aren't strictly needed
|
|
62
|
+
});
|
|
63
|
+
addFormats(_ajv);
|
|
64
|
+
return _ajv;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
async function _getValidatorFor(componentName) {
|
|
68
|
+
if (_validators && _validators.has(componentName)) return _validators.get(componentName);
|
|
69
|
+
const catalog = await _loadCatalog();
|
|
70
|
+
if (!catalog) return null;
|
|
71
|
+
|
|
72
|
+
if (!_validators) {
|
|
73
|
+
_validators = new Map();
|
|
74
|
+
const ajv = _getAjv();
|
|
75
|
+
// Register the catalog as a single addressable document; sub-schemas are
|
|
76
|
+
// retrieved by JSON Pointer via getSchema(). Intra-catalog `$ref`s (#/$defs/X)
|
|
77
|
+
// resolve naturally because the catalog IS the root.
|
|
78
|
+
const catalogId = catalog.$id || 'adiaui-catalog';
|
|
79
|
+
try {
|
|
80
|
+
ajv.addSchema(catalog, catalogId);
|
|
81
|
+
} catch (e) {
|
|
82
|
+
console.warn(`[catalog-validator] failed to register catalog: ${e.message}`);
|
|
83
|
+
return null;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
for (const name of Object.keys(catalog.components || {})) {
|
|
87
|
+
try {
|
|
88
|
+
// Get a compiled validator for the sub-schema at #/components/<name>.
|
|
89
|
+
// AJV resolves it against the already-registered catalog.
|
|
90
|
+
const validate = ajv.getSchema(`${catalogId}#/components/${name}`);
|
|
91
|
+
if (validate) _validators.set(name, validate);
|
|
92
|
+
} catch (e) {
|
|
93
|
+
console.warn(`[catalog-validator] skip ${name}: ${e.message}`);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
return _validators.get(componentName) || null;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Validate one A2UI component object against its catalog schema.
|
|
102
|
+
*
|
|
103
|
+
* @param {object} component — A2UI component ({ id, component, ...props })
|
|
104
|
+
* @returns {Promise<{ valid: boolean, errors?: string[], skipped?: boolean }>}
|
|
105
|
+
*/
|
|
106
|
+
export async function validateComponent(component) {
|
|
107
|
+
if (!component || typeof component !== 'object') {
|
|
108
|
+
return { valid: false, errors: ['not an object'] };
|
|
109
|
+
}
|
|
110
|
+
const typeName = component.component;
|
|
111
|
+
if (!typeName) return { valid: false, errors: ['missing `component` discriminator'] };
|
|
112
|
+
|
|
113
|
+
const validate = await _getValidatorFor(typeName);
|
|
114
|
+
if (!validate) {
|
|
115
|
+
// Catalog missing OR component type not in catalog → structural-only fallback.
|
|
116
|
+
return { valid: true, skipped: true };
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
const ok = validate(component);
|
|
120
|
+
if (ok) return { valid: true };
|
|
121
|
+
|
|
122
|
+
const errors = (validate.errors || []).map(e => {
|
|
123
|
+
const path = e.instancePath || '/';
|
|
124
|
+
return `${path} ${e.message}${e.params && Object.keys(e.params).length ? ` (${JSON.stringify(e.params)})` : ''}`;
|
|
125
|
+
});
|
|
126
|
+
return { valid: false, errors };
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Validate an array of A2UI messages ({ type: 'updateComponents', components: [...] }).
|
|
131
|
+
* Flattens all updateComponents.components[] across messages and runs per-component validation.
|
|
132
|
+
*
|
|
133
|
+
* @returns {Promise<{ valid: boolean, totalChecked: number, failures: Array<{ id, component, errors }> }>}
|
|
134
|
+
*/
|
|
135
|
+
export async function validateMessages(messages) {
|
|
136
|
+
const failures = [];
|
|
137
|
+
let totalChecked = 0;
|
|
138
|
+
|
|
139
|
+
for (const msg of messages || []) {
|
|
140
|
+
if (msg?.type !== 'updateComponents') continue;
|
|
141
|
+
for (const comp of msg.components || []) {
|
|
142
|
+
totalChecked++;
|
|
143
|
+
const { valid, errors, skipped } = await validateComponent(comp);
|
|
144
|
+
if (skipped) continue;
|
|
145
|
+
if (!valid) {
|
|
146
|
+
failures.push({
|
|
147
|
+
id: comp.id || '(no id)',
|
|
148
|
+
component: comp.component || '(no component)',
|
|
149
|
+
errors: errors || ['unknown validation failure'],
|
|
150
|
+
});
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
return { valid: failures.length === 0, totalChecked, failures };
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
/** Diagnostics: has the catalog loaded successfully? */
|
|
159
|
+
export async function isCatalogLoaded() {
|
|
160
|
+
await _loadCatalog();
|
|
161
|
+
return _catalog !== null;
|
|
162
|
+
}
|
package/index.js
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @adia-ai/a2ui-validator — barrel.
|
|
3
|
+
*
|
|
4
|
+
* Schema + semantic validation for A2UI protocol messages. Use
|
|
5
|
+
* `validateSchema` for structural checks (shape, types, required
|
|
6
|
+
* fields); use `validateMessages` for catalog-aware checks (component
|
|
7
|
+
* exists, props match the YAML contract).
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
export * from './validator.js';
|
|
11
|
+
export * from './catalog-validator.js';
|
package/package.json
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@adia-ai/a2ui-validator",
|
|
3
|
+
"version": "0.0.1",
|
|
4
|
+
"description": "AdiaUI A2UI validator — JSON Schema structural validation plus catalog-aware semantic validation (component exists, props match YAML). Split out from the compose engine so non-compose tooling (tests, MCP validator tools, CI gates) can depend on validation without pulling the whole generator graph.",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "./index.js",
|
|
7
|
+
"exports": {
|
|
8
|
+
".": "./index.js",
|
|
9
|
+
"./validator": "./validator.js",
|
|
10
|
+
"./catalog": "./catalog-validator.js"
|
|
11
|
+
},
|
|
12
|
+
"files": [
|
|
13
|
+
"index.js",
|
|
14
|
+
"validator.js",
|
|
15
|
+
"catalog-validator.js",
|
|
16
|
+
"semantic/",
|
|
17
|
+
"README.md",
|
|
18
|
+
"CHANGELOG.md"
|
|
19
|
+
],
|
|
20
|
+
"license": "MIT",
|
|
21
|
+
"publishConfig": {
|
|
22
|
+
"access": "public",
|
|
23
|
+
"registry": "https://registry.npmjs.org"
|
|
24
|
+
},
|
|
25
|
+
"repository": {
|
|
26
|
+
"type": "git",
|
|
27
|
+
"url": "git+https://github.com/adiahealth/gen-ui-kit.git",
|
|
28
|
+
"directory": "packages/a2ui/validator"
|
|
29
|
+
},
|
|
30
|
+
"dependencies": {
|
|
31
|
+
"@adia-ai/a2ui-utils": "^0.0.2",
|
|
32
|
+
"ajv": "^8.0.0",
|
|
33
|
+
"ajv-formats": "^3.0.0"
|
|
34
|
+
}
|
|
35
|
+
}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Content-hash cache for semantic verdicts.
|
|
3
|
+
*
|
|
4
|
+
* Key: (rubricVersion, intent, a2ui-hash).
|
|
5
|
+
* Store: one JSON file per key under evals/mcp/runs/.semantic-cache/.
|
|
6
|
+
*
|
|
7
|
+
* Phase 1 scope: read/write; no eviction. Bump rubricVersion to invalidate.
|
|
8
|
+
*/
|
|
9
|
+
import { createHash } from 'node:crypto';
|
|
10
|
+
import { mkdir, readFile, writeFile } from 'node:fs/promises';
|
|
11
|
+
import { existsSync } from 'node:fs';
|
|
12
|
+
import { dirname, join } from 'node:path';
|
|
13
|
+
import { fileURLToPath } from 'node:url';
|
|
14
|
+
|
|
15
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
16
|
+
const repoRoot = join(__dirname, '..', '..', '..', '..');
|
|
17
|
+
const DEFAULT_DIR = join(repoRoot, 'evals', 'mcp', 'runs', '.semantic-cache');
|
|
18
|
+
|
|
19
|
+
export function hashA2UI(messages) {
|
|
20
|
+
const h = createHash('sha256');
|
|
21
|
+
h.update(JSON.stringify(messages ?? []));
|
|
22
|
+
return h.digest('hex').slice(0, 16);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export function cacheKey({ rubricVersion, intent, messages }) {
|
|
26
|
+
const h = createHash('sha256');
|
|
27
|
+
h.update(String(rubricVersion || 'v1'));
|
|
28
|
+
h.update('\x1f');
|
|
29
|
+
h.update(String(intent || ''));
|
|
30
|
+
h.update('\x1f');
|
|
31
|
+
h.update(hashA2UI(messages));
|
|
32
|
+
return h.digest('hex').slice(0, 32);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export function makeCache({ dir = DEFAULT_DIR } = {}) {
|
|
36
|
+
return {
|
|
37
|
+
dir,
|
|
38
|
+
async get(key) {
|
|
39
|
+
const p = join(dir, `${key}.json`);
|
|
40
|
+
if (!existsSync(p)) return null;
|
|
41
|
+
try {
|
|
42
|
+
const raw = await readFile(p, 'utf8');
|
|
43
|
+
return JSON.parse(raw);
|
|
44
|
+
} catch {
|
|
45
|
+
return null;
|
|
46
|
+
}
|
|
47
|
+
},
|
|
48
|
+
async set(key, verdict) {
|
|
49
|
+
await mkdir(dir, { recursive: true });
|
|
50
|
+
const p = join(dir, `${key}.json`);
|
|
51
|
+
await writeFile(p, JSON.stringify(verdict, null, 2));
|
|
52
|
+
},
|
|
53
|
+
};
|
|
54
|
+
}
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Semantic validator — public API (Phase 1: LLM-only, shadow mode).
|
|
3
|
+
*
|
|
4
|
+
* See docs/specs/semantic-validator.md §5.2 for the SemanticVerdict shape.
|
|
5
|
+
* Phase 1: LLM-judge only. Hybrid rule path is Phase 3.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* import { validateSemantics } from 'packages/a2ui/validator/semantic/index.js';
|
|
9
|
+
* const v = await validateSemantics({ intent, messages }, { cache: true });
|
|
10
|
+
*/
|
|
11
|
+
import { cacheKey, makeCache } from './cache.js';
|
|
12
|
+
import { callJudge, getRubric, summarizeA2UI } from './judge.js';
|
|
13
|
+
|
|
14
|
+
function clamp(n, lo = 0, hi = 100) {
|
|
15
|
+
const x = Number.isFinite(n) ? n : 0;
|
|
16
|
+
return Math.max(lo, Math.min(hi, Math.round(x)));
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function shapeVerdict(parsed, { rubricVersion, cost }) {
|
|
20
|
+
const axes = parsed?.axes || {};
|
|
21
|
+
const dp = axes.dominantPattern || {};
|
|
22
|
+
const rc = axes.requiredCapabilities || {};
|
|
23
|
+
const fn = axes.forbiddenNoise || {};
|
|
24
|
+
|
|
25
|
+
const dpScore = clamp(dp.score);
|
|
26
|
+
const rcScore = clamp(rc.score);
|
|
27
|
+
const fnScore = clamp(fn.score);
|
|
28
|
+
|
|
29
|
+
// Trust the judge's score if sane; otherwise recompute from axes.
|
|
30
|
+
let score = parsed?.score;
|
|
31
|
+
if (!Number.isFinite(score) || score < 0 || score > 100) {
|
|
32
|
+
score = Math.round(0.5 * dpScore + 0.35 * rcScore + 0.15 * fnScore);
|
|
33
|
+
} else {
|
|
34
|
+
score = clamp(score);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const verdictIn = String(parsed?.verdict || '').toLowerCase();
|
|
38
|
+
const verdict = ['aligned', 'partial', 'misaligned', 'off-topic'].includes(verdictIn)
|
|
39
|
+
? verdictIn
|
|
40
|
+
: (() => {
|
|
41
|
+
if (dpScore < 15) return 'off-topic';
|
|
42
|
+
if (dpScore < 40 || [dpScore, rcScore, fnScore].filter((s) => s < 60).length >= 2) return 'misaligned';
|
|
43
|
+
if (score >= 75 && dpScore >= 75 && rcScore >= 75 && fnScore >= 75) return 'aligned';
|
|
44
|
+
return 'partial';
|
|
45
|
+
})();
|
|
46
|
+
|
|
47
|
+
return {
|
|
48
|
+
score,
|
|
49
|
+
verdict,
|
|
50
|
+
path: 'llm-only',
|
|
51
|
+
axes: {
|
|
52
|
+
dominantPattern: {
|
|
53
|
+
expected: String(dp.expected || ''),
|
|
54
|
+
observed: String(dp.observed || ''),
|
|
55
|
+
score: dpScore,
|
|
56
|
+
},
|
|
57
|
+
requiredCapabilities: {
|
|
58
|
+
expected: Array.isArray(rc.expected) ? rc.expected.map(String) : [],
|
|
59
|
+
missing: Array.isArray(rc.missing) ? rc.missing.map(String) : [],
|
|
60
|
+
score: rcScore,
|
|
61
|
+
},
|
|
62
|
+
forbiddenNoise: {
|
|
63
|
+
observed: Array.isArray(fn.observed) ? fn.observed.map(String) : [],
|
|
64
|
+
score: fnScore,
|
|
65
|
+
},
|
|
66
|
+
},
|
|
67
|
+
rationale: String(parsed?.rationale || '').slice(0, 240),
|
|
68
|
+
evidence: Array.isArray(parsed?.evidence) ? parsed.evidence.map(String).slice(0, 10) : [],
|
|
69
|
+
cost,
|
|
70
|
+
rubricVersion,
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* validateSemantics — Phase 1 implementation (LLM-only + cache).
|
|
76
|
+
*/
|
|
77
|
+
export async function validateSemantics(input, opts = {}) {
|
|
78
|
+
const { intent, messages } = input || {};
|
|
79
|
+
const {
|
|
80
|
+
cache = true,
|
|
81
|
+
timeoutMs = 15000,
|
|
82
|
+
model,
|
|
83
|
+
rubricVersion = getRubric().version,
|
|
84
|
+
} = opts;
|
|
85
|
+
|
|
86
|
+
if (!intent || !Array.isArray(messages) || messages.length === 0) {
|
|
87
|
+
return {
|
|
88
|
+
score: 0,
|
|
89
|
+
verdict: 'off-topic',
|
|
90
|
+
path: 'llm-only',
|
|
91
|
+
axes: {
|
|
92
|
+
dominantPattern: { expected: '', observed: 'none', score: 0 },
|
|
93
|
+
requiredCapabilities: { expected: [], missing: [], score: 0 },
|
|
94
|
+
forbiddenNoise: { observed: [], score: 100 },
|
|
95
|
+
},
|
|
96
|
+
rationale: 'No messages emitted; nothing to score.',
|
|
97
|
+
evidence: [],
|
|
98
|
+
cost: { cached: false, latencyMs: 0 },
|
|
99
|
+
rubricVersion,
|
|
100
|
+
error: 'no-messages',
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
const key = cacheKey({ rubricVersion, intent, messages });
|
|
105
|
+
const store = cache ? makeCache() : null;
|
|
106
|
+
|
|
107
|
+
if (store) {
|
|
108
|
+
const hit = await store.get(key);
|
|
109
|
+
if (hit) {
|
|
110
|
+
return { ...hit, cost: { ...(hit.cost || {}), cached: true } };
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const summary = summarizeA2UI(messages);
|
|
115
|
+
let judgeRes;
|
|
116
|
+
try {
|
|
117
|
+
judgeRes = await callJudge({
|
|
118
|
+
intent,
|
|
119
|
+
componentSummary: summary,
|
|
120
|
+
rubricVersion,
|
|
121
|
+
timeoutMs,
|
|
122
|
+
...(model ? { model } : {}),
|
|
123
|
+
});
|
|
124
|
+
} catch (err) {
|
|
125
|
+
return {
|
|
126
|
+
score: 0,
|
|
127
|
+
verdict: 'partial',
|
|
128
|
+
path: 'llm-only',
|
|
129
|
+
axes: {
|
|
130
|
+
dominantPattern: { expected: '', observed: '', score: 0 },
|
|
131
|
+
requiredCapabilities: { expected: [], missing: [], score: 0 },
|
|
132
|
+
forbiddenNoise: { observed: [], score: 0 },
|
|
133
|
+
},
|
|
134
|
+
rationale: `Judge error: ${err.message || String(err)}`.slice(0, 240),
|
|
135
|
+
evidence: [],
|
|
136
|
+
cost: { cached: false, latencyMs: 0 },
|
|
137
|
+
rubricVersion,
|
|
138
|
+
error: 'judge-failed',
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
const cost = {
|
|
143
|
+
provider: judgeRes.provider,
|
|
144
|
+
model: judgeRes.model,
|
|
145
|
+
inputTokens: judgeRes.usage.inputTokens,
|
|
146
|
+
outputTokens: judgeRes.usage.outputTokens,
|
|
147
|
+
cached: false,
|
|
148
|
+
latencyMs: judgeRes.latencyMs,
|
|
149
|
+
};
|
|
150
|
+
|
|
151
|
+
const verdict = shapeVerdict(judgeRes.parsed, { rubricVersion, cost });
|
|
152
|
+
|
|
153
|
+
if (store) {
|
|
154
|
+
try {
|
|
155
|
+
await store.set(key, verdict);
|
|
156
|
+
} catch {
|
|
157
|
+
/* cache write failure is non-fatal */
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
return verdict;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
export { getRubric, summarizeA2UI };
|