@xera-ai/core 0.1.7 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/internal.ts +1 -0
- package/dist/adapter/types.d.ts +1 -1
- package/dist/adapter/types.d.ts.map +1 -1
- package/dist/artifact/meta.d.ts +2 -28
- package/dist/artifact/meta.d.ts.map +1 -1
- package/dist/artifact/status.d.ts +49 -74
- package/dist/artifact/status.d.ts.map +1 -1
- package/dist/auth/key.d.ts.map +1 -1
- package/dist/auth/refresh.d.ts.map +1 -1
- package/dist/auth/state.d.ts +5 -14
- package/dist/auth/state.d.ts.map +1 -1
- package/dist/bin/internal.js +10037 -746
- package/dist/bin-internal/doctor.d.ts +5 -0
- package/dist/bin-internal/doctor.d.ts.map +1 -0
- package/dist/bin-internal/eval-deterministic.d.ts +5 -0
- package/dist/bin-internal/eval-deterministic.d.ts.map +1 -0
- package/dist/bin-internal/eval-prepare.d.ts +7 -0
- package/dist/bin-internal/eval-prepare.d.ts.map +1 -0
- package/dist/bin-internal/eval-report.d.ts +5 -0
- package/dist/bin-internal/eval-report.d.ts.map +1 -0
- package/dist/bin-internal/exec.d.ts.map +1 -1
- package/dist/bin-internal/fetch.d.ts.map +1 -1
- package/dist/bin-internal/graph-backfill.d.ts +2 -0
- package/dist/bin-internal/graph-backfill.d.ts.map +1 -0
- package/dist/bin-internal/graph-query.d.ts +2 -0
- package/dist/bin-internal/graph-query.d.ts.map +1 -0
- package/dist/bin-internal/graph-record-script.d.ts +2 -0
- package/dist/bin-internal/graph-record-script.d.ts.map +1 -0
- package/dist/bin-internal/graph-record.d.ts +3 -0
- package/dist/bin-internal/graph-record.d.ts.map +1 -0
- package/dist/bin-internal/graph-snapshot.d.ts +2 -0
- package/dist/bin-internal/graph-snapshot.d.ts.map +1 -0
- package/dist/bin-internal/heal-prepare.d.ts +19 -0
- package/dist/bin-internal/heal-prepare.d.ts.map +1 -0
- package/dist/bin-internal/index.d.ts.map +1 -1
- package/dist/bin-internal/lint.d.ts.map +1 -1
- package/dist/bin-internal/normalize.d.ts.map +1 -1
- package/dist/bin-internal/post.d.ts.map +1 -1
- package/dist/bin-internal/status-cmd.d.ts.map +1 -1
- package/dist/bin-internal/typecheck.d.ts.map +1 -1
- package/dist/bin-internal/unlock.d.ts.map +1 -1
- package/dist/bin-internal/validate-feature.d.ts.map +1 -1
- package/dist/bin-internal/verify-prompts.d.ts +7 -0
- package/dist/bin-internal/verify-prompts.d.ts.map +1 -0
- package/dist/classifier/aggregate.d.ts.map +1 -1
- package/dist/config/define.d.ts.map +1 -1
- package/dist/config/load.d.ts.map +1 -1
- package/dist/config/schema.d.ts +38 -298
- package/dist/config/schema.d.ts.map +1 -1
- package/dist/eval/paths.d.ts +15 -0
- package/dist/eval/paths.d.ts.map +1 -0
- package/dist/eval/run-id.d.ts +6 -0
- package/dist/eval/run-id.d.ts.map +1 -0
- package/dist/eval/types.d.ts +203 -0
- package/dist/eval/types.d.ts.map +1 -0
- package/dist/graph/cost.d.ts +21 -0
- package/dist/graph/cost.d.ts.map +1 -0
- package/dist/graph/index.d.ts +8 -0
- package/dist/graph/index.d.ts.map +1 -0
- package/dist/graph/paths.d.ts +10 -0
- package/dist/graph/paths.d.ts.map +1 -0
- package/dist/graph/schema.d.ts +177 -0
- package/dist/graph/schema.d.ts.map +1 -0
- package/dist/graph/store.d.ts +14 -0
- package/dist/graph/store.d.ts.map +1 -0
- package/dist/graph/types.d.ts +151 -0
- package/dist/graph/types.d.ts.map +1 -0
- package/dist/graph/ulid.d.ts +2 -0
- package/dist/graph/ulid.d.ts.map +1 -0
- package/dist/index.d.ts +11 -11
- package/dist/index.d.ts.map +1 -1
- package/dist/jira/client.d.ts.map +1 -1
- package/dist/jira/fields.d.ts.map +1 -1
- package/dist/jira/rest-backend.d.ts.map +1 -1
- package/dist/reporter/jira-comment.d.ts.map +1 -1
- package/dist/reporter/status-writer.d.ts.map +1 -1
- package/dist/src/index.js +349 -321
- package/package.json +19 -13
- package/src/adapter/types.ts +5 -2
- package/src/artifact/meta.ts +1 -1
- package/src/artifact/status.ts +1 -1
- package/src/auth/encrypt.ts +2 -2
- package/src/auth/key.ts +1 -2
- package/src/auth/refresh.ts +5 -1
- package/src/auth/state.ts +2 -2
- package/src/bin-internal/doctor.ts +169 -0
- package/src/bin-internal/eval-deterministic.ts +149 -0
- package/src/bin-internal/eval-prepare.ts +214 -0
- package/src/bin-internal/eval-report.ts +177 -0
- package/src/bin-internal/exec.ts +28 -15
- package/src/bin-internal/fetch.ts +21 -10
- package/src/bin-internal/graph-backfill.ts +43 -0
- package/src/bin-internal/graph-query.ts +43 -0
- package/src/bin-internal/graph-record-script.ts +191 -0
- package/src/bin-internal/graph-record.ts +243 -0
- package/src/bin-internal/graph-snapshot.ts +23 -0
- package/src/bin-internal/heal-prepare.ts +230 -0
- package/src/bin-internal/index.ts +33 -11
- package/src/bin-internal/lint.ts +11 -4
- package/src/bin-internal/normalize.ts +23 -9
- package/src/bin-internal/post.ts +10 -4
- package/src/bin-internal/report.ts +3 -3
- package/src/bin-internal/status-cmd.ts +11 -3
- package/src/bin-internal/typecheck.ts +9 -3
- package/src/bin-internal/unlock.ts +12 -4
- package/src/bin-internal/validate-feature.ts +14 -5
- package/src/bin-internal/verify-prompts.ts +60 -0
- package/src/classifier/aggregate.ts +13 -6
- package/src/config/define.ts +3 -1
- package/src/config/load.ts +1 -1
- package/src/config/schema.ts +43 -37
- package/src/eval/paths.ts +32 -0
- package/src/eval/run-id.ts +30 -0
- package/src/eval/types.ts +101 -0
- package/src/graph/cost.ts +59 -0
- package/src/graph/index.ts +15 -0
- package/src/graph/paths.ts +27 -0
- package/src/graph/schema.ts +135 -0
- package/src/graph/store.ts +231 -0
- package/src/graph/types.ts +174 -0
- package/src/graph/ulid.ts +58 -0
- package/src/index.ts +11 -11
- package/src/jira/client.ts +4 -2
- package/src/jira/fields.ts +4 -2
- package/src/jira/mcp-backend.ts +1 -1
- package/src/jira/rest-backend.ts +18 -6
- package/src/jira/retry.ts +2 -2
- package/src/lock/file-lock.ts +2 -2
- package/src/logging/ndjson-logger.ts +2 -2
- package/src/reporter/jira-comment.ts +13 -7
- package/src/reporter/status-writer.ts +2 -2
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@xera-ai/core",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.4.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"types": "./dist/index.d.ts",
|
|
@@ -17,16 +17,22 @@
|
|
|
17
17
|
}
|
|
18
18
|
},
|
|
19
19
|
"bin": {
|
|
20
|
-
|
|
21
|
-
},
|
|
22
|
-
"files": [
|
|
23
|
-
"
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
"
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
20
|
+
"xera-internal": "./dist/bin/internal.js"
|
|
21
|
+
},
|
|
22
|
+
"files": [
|
|
23
|
+
"dist",
|
|
24
|
+
"src",
|
|
25
|
+
"bin"
|
|
26
|
+
],
|
|
27
|
+
"scripts": {
|
|
28
|
+
"build": "bun build ./src/index.ts ./bin/internal.ts --outdir ./dist --target bun --external @playwright/test --external @xera-ai/web --external zod",
|
|
29
|
+
"typecheck": "tsc --noEmit"
|
|
30
|
+
},
|
|
31
|
+
"dependencies": {
|
|
32
|
+
"zod": "4.4.3",
|
|
33
|
+
"@xera-ai/web": "^0.2.0",
|
|
34
|
+
"@playwright/test": "1.60.0",
|
|
35
|
+
"fflate": "0.8.3",
|
|
36
|
+
"yaml": "2.9.0"
|
|
37
|
+
}
|
|
32
38
|
}
|
package/src/adapter/types.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import type { XeraConfig } from '../config/schema';
|
|
2
1
|
import type { Classification } from '../artifact/status';
|
|
2
|
+
import type { XeraConfig } from '../config/schema';
|
|
3
3
|
|
|
4
4
|
export interface GenerateInput {
|
|
5
5
|
ticketDir: string;
|
|
@@ -57,6 +57,9 @@ export interface TestAdapter {
|
|
|
57
57
|
readonly id: string;
|
|
58
58
|
generate(input: GenerateInput): Promise<GenerateResult>;
|
|
59
59
|
execute(input: ExecuteInput): Promise<RunResult>;
|
|
60
|
-
classify?(
|
|
60
|
+
classify?(
|
|
61
|
+
run: RunResult,
|
|
62
|
+
ctx: ClassifyContext,
|
|
63
|
+
): Partial<{ class: Classification; rationale: string }>;
|
|
61
64
|
doctor(): Promise<DoctorReport>;
|
|
62
65
|
}
|
package/src/artifact/meta.ts
CHANGED
package/src/artifact/status.ts
CHANGED
package/src/auth/encrypt.ts
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { createCipheriv, createDecipheriv, randomBytes } from 'node:crypto';
|
|
2
2
|
|
|
3
3
|
const ALGO = 'aes-256-gcm';
|
|
4
4
|
const KEY_LEN = 32; // bytes (256 bits)
|
|
5
|
-
const IV_LEN = 12;
|
|
5
|
+
const IV_LEN = 12; // recommended for GCM
|
|
6
6
|
const TAG_LEN = 16;
|
|
7
7
|
const VERSION = 'v1';
|
|
8
8
|
|
package/src/auth/key.ts
CHANGED
|
@@ -4,8 +4,7 @@ export function resolveAuthKey(): string {
|
|
|
4
4
|
const key = process.env[AUTH_KEY_ENV];
|
|
5
5
|
if (!key) {
|
|
6
6
|
throw new Error(
|
|
7
|
-
`${AUTH_KEY_ENV} not set. It is auto-generated by \`xera init\` and saved to .env.
|
|
8
|
-
`If you deleted .env, regenerate it by running \`xera init --update\` — note that any cached auth state will be invalidated.`,
|
|
7
|
+
`${AUTH_KEY_ENV} not set. It is auto-generated by \`xera init\` and saved to .env. If you deleted .env, regenerate it by running \`xera init --update\` — note that any cached auth state will be invalidated.`,
|
|
9
8
|
);
|
|
10
9
|
}
|
|
11
10
|
if (!/^[0-9a-f]{64}$/i.test(key)) {
|
package/src/auth/refresh.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { AuthStateEntry } from './state';
|
|
2
|
+
|
|
2
3
|
export type { AuthStateEntry } from './state';
|
|
3
4
|
|
|
4
5
|
const RE = /^(\d+)([hms])$/;
|
|
@@ -13,7 +14,10 @@ export function parseDuration(d: string): number {
|
|
|
13
14
|
return n * 1000;
|
|
14
15
|
}
|
|
15
16
|
|
|
16
|
-
export interface RefreshPolicy {
|
|
17
|
+
export interface RefreshPolicy {
|
|
18
|
+
ttl: string;
|
|
19
|
+
refreshBuffer: string;
|
|
20
|
+
}
|
|
17
21
|
|
|
18
22
|
export function needsRefresh(
|
|
19
23
|
entry: AuthStateEntry | null,
|
package/src/auth/state.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { existsSync, readFileSync, writeFileSync
|
|
1
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
|
|
2
2
|
import { join } from 'node:path';
|
|
3
3
|
import { z } from 'zod';
|
|
4
|
-
import {
|
|
4
|
+
import { decrypt, encrypt } from './encrypt';
|
|
5
5
|
import { resolveAuthKey } from './key';
|
|
6
6
|
|
|
7
7
|
export const AuthStateEntrySchema = z.object({
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
import { existsSync, readdirSync, readFileSync } from 'node:fs';
|
|
2
|
+
import { join } from 'node:path';
|
|
3
|
+
import type { Stage } from '../eval/types';
|
|
4
|
+
import { summarizeCost } from '../graph/cost';
|
|
5
|
+
import { loadAllEvents } from '../graph/store';
|
|
6
|
+
import { verifyPrompts } from './verify-prompts';
|
|
7
|
+
|
|
8
|
+
export interface DoctorOpts {
|
|
9
|
+
cwd?: string;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
interface CheckResult {
|
|
13
|
+
ok: boolean;
|
|
14
|
+
message: string;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
const REQUIRED_FILES_PER_STAGE: Record<Stage, string[]> = {
|
|
18
|
+
'feature-from-story': ['golden/test.feature'],
|
|
19
|
+
'script-from-feature': ['golden/spec-requirements.md'],
|
|
20
|
+
'diagnose-failure': [],
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
const REQUIRED_SCRIPTS = [
|
|
24
|
+
'xera:eval-prepare',
|
|
25
|
+
'xera:eval-deterministic',
|
|
26
|
+
'xera:eval-report',
|
|
27
|
+
'xera:verify-prompts',
|
|
28
|
+
'xera:doctor',
|
|
29
|
+
];
|
|
30
|
+
|
|
31
|
+
function frontmatterField(content: string, field: string): string | null {
|
|
32
|
+
const m = content.match(new RegExp(`^${field}:\\s*(\\S+)\\s*$`, 'm'));
|
|
33
|
+
return m?.[1] ?? null;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function checkGoldenEvalDir(repoRoot: string): CheckResult[] {
|
|
37
|
+
const root = join(repoRoot, 'fixtures/golden-eval');
|
|
38
|
+
if (!existsSync(root)) return [{ ok: false, message: 'fixtures/golden-eval/ does not exist' }];
|
|
39
|
+
const dirs = readdirSync(root, { withFileTypes: true }).filter(
|
|
40
|
+
(e) => e.isDirectory() && !e.name.startsWith('.'),
|
|
41
|
+
);
|
|
42
|
+
const results: CheckResult[] = [];
|
|
43
|
+
if (dirs.length < 3) {
|
|
44
|
+
results.push({
|
|
45
|
+
ok: false,
|
|
46
|
+
message: `fixtures/golden-eval/ has ${dirs.length} ticket dir(s); need ≥ 3`,
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
for (const entry of dirs) {
|
|
50
|
+
const dir = join(root, entry.name);
|
|
51
|
+
const metaPath = join(dir, 'meta.json');
|
|
52
|
+
if (!existsSync(metaPath)) {
|
|
53
|
+
results.push({ ok: false, message: `${entry.name}: meta.json missing` });
|
|
54
|
+
continue;
|
|
55
|
+
}
|
|
56
|
+
let meta: { id?: string; stages?: unknown[] };
|
|
57
|
+
try {
|
|
58
|
+
meta = JSON.parse(readFileSync(metaPath, 'utf8')) as { id?: string; stages?: unknown[] };
|
|
59
|
+
} catch (err) {
|
|
60
|
+
results.push({
|
|
61
|
+
ok: false,
|
|
62
|
+
message: `${entry.name}: meta.json parse error: ${(err as Error).message}`,
|
|
63
|
+
});
|
|
64
|
+
continue;
|
|
65
|
+
}
|
|
66
|
+
const stages = Array.isArray(meta.stages) ? (meta.stages as Stage[]) : [];
|
|
67
|
+
if (stages.length === 0)
|
|
68
|
+
results.push({ ok: false, message: `${entry.name}: meta.stages is empty` });
|
|
69
|
+
if (!existsSync(join(dir, 'story.md')))
|
|
70
|
+
results.push({ ok: false, message: `${entry.name}: story.md missing` });
|
|
71
|
+
for (const stage of stages) {
|
|
72
|
+
const required = REQUIRED_FILES_PER_STAGE[stage] ?? [];
|
|
73
|
+
for (const rel of required) {
|
|
74
|
+
if (!existsSync(join(dir, rel))) {
|
|
75
|
+
results.push({
|
|
76
|
+
ok: false,
|
|
77
|
+
message: `${meta.id ?? entry.name}: stage "${stage}" declared but ${rel} missing`,
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
return results;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
function checkRubricPrompt(repoRoot: string): CheckResult[] {
|
|
87
|
+
const path = join(repoRoot, 'packages/prompts/eval-rubric.md');
|
|
88
|
+
if (!existsSync(path)) return [{ ok: false, message: 'packages/prompts/eval-rubric.md missing' }];
|
|
89
|
+
const text = readFileSync(path, 'utf8');
|
|
90
|
+
const id = frontmatterField(text, 'id');
|
|
91
|
+
const version = frontmatterField(text, 'version');
|
|
92
|
+
if (id !== 'eval-rubric')
|
|
93
|
+
return [{ ok: false, message: 'eval-rubric.md frontmatter "id" must be "eval-rubric"' }];
|
|
94
|
+
if (!version) return [{ ok: false, message: 'eval-rubric.md frontmatter "version" missing' }];
|
|
95
|
+
return [];
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
function checkEvalSkill(repoRoot: string): CheckResult[] {
|
|
99
|
+
const path = join(repoRoot, 'packages/skills/xera-eval.md');
|
|
100
|
+
if (!existsSync(path)) return [{ ok: false, message: 'packages/skills/xera-eval.md missing' }];
|
|
101
|
+
const text = readFileSync(path, 'utf8');
|
|
102
|
+
if (!frontmatterField(text, 'name'))
|
|
103
|
+
return [{ ok: false, message: 'xera-eval.md frontmatter "name" missing' }];
|
|
104
|
+
return [];
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
function checkPromptInjectionPreamble(repoRoot: string): CheckResult[] {
|
|
108
|
+
return verifyPrompts(repoRoot);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
function checkRootScripts(repoRoot: string): CheckResult[] {
|
|
112
|
+
const path = join(repoRoot, 'package.json');
|
|
113
|
+
if (!existsSync(path)) return [{ ok: false, message: 'root package.json missing' }];
|
|
114
|
+
const pkg = JSON.parse(readFileSync(path, 'utf8'));
|
|
115
|
+
const scripts = pkg.scripts ?? {};
|
|
116
|
+
const missing = REQUIRED_SCRIPTS.filter((s) => typeof scripts[s] !== 'string');
|
|
117
|
+
return missing.map((s) => ({ ok: false, message: `root package.json missing script: ${s}` }));
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
export async function doctorCmd(_argv: string[], opts: DoctorOpts = {}): Promise<number> {
|
|
121
|
+
const repoRoot = opts.cwd ?? process.cwd();
|
|
122
|
+
const results: CheckResult[] = [
|
|
123
|
+
...checkGoldenEvalDir(repoRoot),
|
|
124
|
+
...checkRubricPrompt(repoRoot),
|
|
125
|
+
...checkEvalSkill(repoRoot),
|
|
126
|
+
...checkPromptInjectionPreamble(repoRoot),
|
|
127
|
+
...checkRootScripts(repoRoot),
|
|
128
|
+
];
|
|
129
|
+
// Cost summary (past 7 days)
|
|
130
|
+
const cost = summarizeCost(repoRoot, 7);
|
|
131
|
+
if (cost.totalCalls > 0) {
|
|
132
|
+
console.log('');
|
|
133
|
+
console.log('LLM cost (past 7 days):');
|
|
134
|
+
console.log(` Total calls: ${cost.totalCalls}`);
|
|
135
|
+
console.log(` Estimated: $${cost.totalUsd.toFixed(2)} USD`);
|
|
136
|
+
const top = Object.entries(cost.bySkill).sort((a, b) => b[1].usd - a[1].usd)[0];
|
|
137
|
+
if (top)
|
|
138
|
+
console.log(` Top skill: ${top[0]} (${top[1].calls} calls, $${top[1].usd.toFixed(2)})`);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// Backfill detection
|
|
142
|
+
const xeraDir = join(repoRoot, '.xera');
|
|
143
|
+
if (existsSync(xeraDir)) {
|
|
144
|
+
const ticketDirs = readdirSync(xeraDir, { withFileTypes: true }).filter(
|
|
145
|
+
(e) => e.isDirectory() && /^[A-Z]+-\d+$/.test(e.name),
|
|
146
|
+
);
|
|
147
|
+
if (ticketDirs.length > 0) {
|
|
148
|
+
const events = loadAllEvents(repoRoot);
|
|
149
|
+
const fetchedTickets = new Set(
|
|
150
|
+
events.filter((e) => e.type === 'ticket.fetched').map((e) => e.payload.ticketId),
|
|
151
|
+
);
|
|
152
|
+
const unbackfilled = ticketDirs.map((d) => d.name).filter((t) => !fetchedTickets.has(t));
|
|
153
|
+
if (unbackfilled.length > 0) {
|
|
154
|
+
console.log('');
|
|
155
|
+
console.log(`⚠ Graph: ${unbackfilled.length} ticket(s) not yet in graph.`);
|
|
156
|
+
console.log(` These won't participate in v0.6.1+ features (TEST_OUTDATED, /xera-impact).`);
|
|
157
|
+
console.log(` Run: bun run xera:graph-backfill`);
|
|
158
|
+
console.log(` (Use --dry-run to preview.)`);
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
if (results.length === 0) {
|
|
164
|
+
console.log('[xera:doctor] ok');
|
|
165
|
+
return 0;
|
|
166
|
+
}
|
|
167
|
+
for (const r of results) console.error(`[xera:doctor] ${r.message}`);
|
|
168
|
+
return 1;
|
|
169
|
+
}
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
import { existsSync, readFileSync, writeFileSync } from 'node:fs';
|
|
2
|
+
import { join } from 'node:path';
|
|
3
|
+
import { validateGherkin } from '@xera-ai/web';
|
|
4
|
+
import { resolveEvalPaths } from '../eval/paths';
|
|
5
|
+
import {
|
|
6
|
+
type DeterministicEntry,
|
|
7
|
+
type DeterministicScores,
|
|
8
|
+
DeterministicScoresSchema,
|
|
9
|
+
ManifestSchema,
|
|
10
|
+
} from '../eval/types';
|
|
11
|
+
|
|
12
|
+
export interface EvalDeterministicOpts {
|
|
13
|
+
cwd?: string;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
interface ClassifierScenario {
|
|
17
|
+
name: string;
|
|
18
|
+
class: string;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
function checkFeatureFromStory(actualFeaturePath: string): {
|
|
22
|
+
passed: boolean;
|
|
23
|
+
checks: string[];
|
|
24
|
+
error?: string;
|
|
25
|
+
} {
|
|
26
|
+
if (!existsSync(actualFeaturePath)) {
|
|
27
|
+
return { passed: false, checks: ['validate-feature'], error: 'actual missing: test.feature' };
|
|
28
|
+
}
|
|
29
|
+
try {
|
|
30
|
+
const r = validateGherkin(readFileSync(actualFeaturePath, 'utf8'));
|
|
31
|
+
if (r.ok) return { passed: true, checks: ['validate-feature'] };
|
|
32
|
+
return {
|
|
33
|
+
passed: false,
|
|
34
|
+
checks: ['validate-feature'],
|
|
35
|
+
error: r.errors.map((e) => `line ${e.line}: ${e.message}`).join('; '),
|
|
36
|
+
};
|
|
37
|
+
} catch (err) {
|
|
38
|
+
return { passed: false, checks: ['validate-feature'], error: (err as Error).message };
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function checkScriptFromFeature(actualTicketDir: string): {
|
|
43
|
+
passed: boolean;
|
|
44
|
+
checks: string[];
|
|
45
|
+
error?: string;
|
|
46
|
+
} {
|
|
47
|
+
// v0.2 deviation: file-presence only. Full typecheck/lint hookup is deferred to v0.2.1
|
|
48
|
+
// because v0.1's lintTicket/typecheckTicket resolve paths from .xera/<TICKET>/ (consumer
|
|
49
|
+
// project layout), not .xera/eval/<run-id>/actual/<ticket>/. The judge dimensions
|
|
50
|
+
// "Requirements satisfied", "Wait strategy", "No dead code" cover the lint surface.
|
|
51
|
+
const specPath = join(actualTicketDir, 'spec.ts');
|
|
52
|
+
if (!existsSync(specPath)) {
|
|
53
|
+
return { passed: false, checks: ['file-presence'], error: 'actual missing: spec.ts' };
|
|
54
|
+
}
|
|
55
|
+
return { passed: true, checks: ['file-presence'] };
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function checkDiagnoseFailure(
|
|
59
|
+
inputsTicketDir: string,
|
|
60
|
+
actualTicketDir: string,
|
|
61
|
+
): { passed: boolean; checks: string[]; error?: string } {
|
|
62
|
+
const inputPath = join(inputsTicketDir, 'classifier-input.json');
|
|
63
|
+
const actualPath = join(actualTicketDir, 'classification.json');
|
|
64
|
+
if (!existsSync(actualPath)) {
|
|
65
|
+
return {
|
|
66
|
+
passed: false,
|
|
67
|
+
checks: ['bucket-match'],
|
|
68
|
+
error: 'actual missing: classification.json',
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
if (!existsSync(inputPath)) {
|
|
72
|
+
return {
|
|
73
|
+
passed: false,
|
|
74
|
+
checks: ['bucket-match'],
|
|
75
|
+
error: 'inputs missing: classifier-input.json',
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
const golden = JSON.parse(readFileSync(inputPath, 'utf8'));
|
|
79
|
+
const actual = JSON.parse(readFileSync(actualPath, 'utf8'));
|
|
80
|
+
const goldScens: ClassifierScenario[] = golden.scenarios ?? [];
|
|
81
|
+
const actScens: ClassifierScenario[] = actual.scenarios ?? [];
|
|
82
|
+
const mismatches: string[] = [];
|
|
83
|
+
for (const g of goldScens) {
|
|
84
|
+
const a = actScens.find((s) => s.name === g.name);
|
|
85
|
+
if (!a) {
|
|
86
|
+
mismatches.push(`missing scenario "${g.name}"`);
|
|
87
|
+
continue;
|
|
88
|
+
}
|
|
89
|
+
if (a.class !== g.class)
|
|
90
|
+
mismatches.push(`scenario "${g.name}": expected class ${g.class}, got ${a.class}`);
|
|
91
|
+
}
|
|
92
|
+
if (mismatches.length > 0) {
|
|
93
|
+
return {
|
|
94
|
+
passed: false,
|
|
95
|
+
checks: ['bucket-match'],
|
|
96
|
+
error: `bucket mismatch — ${mismatches.join('; ')}`,
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
return { passed: true, checks: ['bucket-match'] };
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
export async function evalDeterministicCmd(
|
|
103
|
+
argv: string[],
|
|
104
|
+
opts: EvalDeterministicOpts = {},
|
|
105
|
+
): Promise<number> {
|
|
106
|
+
const cwd = opts.cwd ?? process.cwd();
|
|
107
|
+
const runId = argv[0];
|
|
108
|
+
if (!runId) {
|
|
109
|
+
console.error('[xera:eval-deterministic] usage: eval-deterministic <run-id>');
|
|
110
|
+
return 1;
|
|
111
|
+
}
|
|
112
|
+
const paths = resolveEvalPaths(cwd, runId);
|
|
113
|
+
if (!existsSync(paths.manifest)) {
|
|
114
|
+
console.error(`[xera:eval-deterministic] missing manifest.json at ${paths.manifest}`);
|
|
115
|
+
return 1;
|
|
116
|
+
}
|
|
117
|
+
const manifest = ManifestSchema.parse(JSON.parse(readFileSync(paths.manifest, 'utf8')));
|
|
118
|
+
|
|
119
|
+
const entries: DeterministicEntry[] = [];
|
|
120
|
+
for (const [ticket, ticketStages] of Object.entries(manifest.ticket_stages)) {
|
|
121
|
+
for (const stage of ticketStages) {
|
|
122
|
+
const inputsDir = paths.ticketInputsDir(ticket);
|
|
123
|
+
const actualDir = paths.ticketActualDir(ticket);
|
|
124
|
+
let result: { passed: boolean; checks: string[]; error?: string };
|
|
125
|
+
if (stage === 'feature-from-story') {
|
|
126
|
+
result = checkFeatureFromStory(join(actualDir, 'test.feature'));
|
|
127
|
+
} else if (stage === 'script-from-feature') {
|
|
128
|
+
result = checkScriptFromFeature(actualDir);
|
|
129
|
+
} else {
|
|
130
|
+
result = checkDiagnoseFailure(inputsDir, actualDir);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
const entry: DeterministicEntry = {
|
|
134
|
+
ticket,
|
|
135
|
+
stage,
|
|
136
|
+
passed: result.passed,
|
|
137
|
+
checks: result.checks,
|
|
138
|
+
};
|
|
139
|
+
if (result.error !== undefined) entry.error = result.error;
|
|
140
|
+
entries.push(entry);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
const scores: DeterministicScores = { run_id: runId, entries };
|
|
145
|
+
DeterministicScoresSchema.parse(scores);
|
|
146
|
+
writeFileSync(paths.deterministicScores, JSON.stringify(scores, null, 2));
|
|
147
|
+
console.log(`[xera:eval-deterministic] wrote ${entries.length} entries`);
|
|
148
|
+
return 0;
|
|
149
|
+
}
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
import {
|
|
2
|
+
copyFileSync,
|
|
3
|
+
existsSync,
|
|
4
|
+
mkdirSync,
|
|
5
|
+
readdirSync,
|
|
6
|
+
readFileSync,
|
|
7
|
+
writeFileSync,
|
|
8
|
+
} from 'node:fs';
|
|
9
|
+
import { join } from 'node:path';
|
|
10
|
+
import { resolveEvalPaths } from '../eval/paths';
|
|
11
|
+
import { generateRunId } from '../eval/run-id';
|
|
12
|
+
import { type Manifest, ManifestSchema, STAGES, type Stage } from '../eval/types';
|
|
13
|
+
import { acquireLock } from '../lock/file-lock';
|
|
14
|
+
|
|
15
|
+
export interface EvalPrepareOpts {
|
|
16
|
+
cwd?: string;
|
|
17
|
+
now?: () => Date;
|
|
18
|
+
getGitSha?: () => string | null;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
interface ParsedFlags {
|
|
22
|
+
force: boolean;
|
|
23
|
+
only_prompt: Stage | null;
|
|
24
|
+
only_ticket: string | null;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function parseFlags(argv: string[]): ParsedFlags | { error: string } {
|
|
28
|
+
const flags: ParsedFlags = { force: false, only_prompt: null, only_ticket: null };
|
|
29
|
+
for (const arg of argv) {
|
|
30
|
+
if (arg === '--force') flags.force = true;
|
|
31
|
+
else if (arg.startsWith('--prompt=')) {
|
|
32
|
+
const v = arg.slice('--prompt='.length);
|
|
33
|
+
if (!STAGES.includes(v as Stage)) {
|
|
34
|
+
return { error: `Unknown stage: ${v}. Valid: ${STAGES.join(', ')}.` };
|
|
35
|
+
}
|
|
36
|
+
flags.only_prompt = v as Stage;
|
|
37
|
+
} else if (arg.startsWith('--ticket=')) {
|
|
38
|
+
flags.only_ticket = arg.slice('--ticket='.length);
|
|
39
|
+
} else {
|
|
40
|
+
return { error: `Unknown argument: ${arg}` };
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
return flags;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function readPromptVersion(repoRoot: string, name: string): string {
|
|
47
|
+
const path = join(repoRoot, 'packages/prompts', `${name}.md`);
|
|
48
|
+
if (!existsSync(path)) return '0.0.0';
|
|
49
|
+
const text = readFileSync(path, 'utf8');
|
|
50
|
+
const m = /^version:\s*(\S+)\s*$/m.exec(text);
|
|
51
|
+
return m?.[1] ?? '0.0.0';
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function discoverEvalTickets(repoRoot: string): { id: string; dir: string; stages: Stage[] }[] {
|
|
55
|
+
const root = join(repoRoot, 'fixtures/golden-eval');
|
|
56
|
+
if (!existsSync(root)) return [];
|
|
57
|
+
const out: { id: string; dir: string; stages: Stage[] }[] = [];
|
|
58
|
+
for (const entry of readdirSync(root, { withFileTypes: true })) {
|
|
59
|
+
if (!entry.isDirectory()) continue;
|
|
60
|
+
if (entry.name === 'README.md' || entry.name.startsWith('.')) continue;
|
|
61
|
+
const dir = join(root, entry.name);
|
|
62
|
+
const metaPath = join(dir, 'meta.json');
|
|
63
|
+
if (!existsSync(metaPath)) continue;
|
|
64
|
+
const meta = JSON.parse(readFileSync(metaPath, 'utf8')) as { id: string; stages: Stage[] };
|
|
65
|
+
out.push({ id: meta.id, dir, stages: meta.stages });
|
|
66
|
+
}
|
|
67
|
+
return out.sort((a, b) => a.id.localeCompare(b.id));
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function discoverClassifierTickets(repoRoot: string): { id: string; path: string }[] {
|
|
71
|
+
const root = join(repoRoot, 'fixtures/golden-tickets');
|
|
72
|
+
if (!existsSync(root)) return [];
|
|
73
|
+
const out: { id: string; path: string }[] = [];
|
|
74
|
+
for (const entry of readdirSync(root, { withFileTypes: true })) {
|
|
75
|
+
if (!entry.isFile() || !entry.name.endsWith('.json')) continue;
|
|
76
|
+
const path = join(root, entry.name);
|
|
77
|
+
const data = JSON.parse(readFileSync(path, 'utf8')) as { ticket?: string };
|
|
78
|
+
if (typeof data.ticket === 'string') out.push({ id: data.ticket, path });
|
|
79
|
+
}
|
|
80
|
+
return out.sort((a, b) => a.id.localeCompare(b.id));
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
export async function evalPrepareCmd(argv: string[], opts: EvalPrepareOpts = {}): Promise<number> {
|
|
84
|
+
const repoRoot = opts.cwd ?? process.cwd();
|
|
85
|
+
|
|
86
|
+
const flags = parseFlags(argv);
|
|
87
|
+
if ('error' in flags) {
|
|
88
|
+
console.error(`[xera:eval-prepare] ${flags.error}`);
|
|
89
|
+
return 1;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
const evalTickets = discoverEvalTickets(repoRoot);
|
|
93
|
+
const classifierTickets = discoverClassifierTickets(repoRoot);
|
|
94
|
+
|
|
95
|
+
// Determine which stages to run.
|
|
96
|
+
const stages: Stage[] = flags.only_prompt ? [flags.only_prompt] : [...STAGES];
|
|
97
|
+
|
|
98
|
+
// Determine which tickets are relevant.
|
|
99
|
+
const wantsEval = stages.some((s) => s !== 'diagnose-failure');
|
|
100
|
+
const wantsClassifier = stages.includes('diagnose-failure');
|
|
101
|
+
|
|
102
|
+
let selectedTickets: string[] = [];
|
|
103
|
+
if (wantsEval) selectedTickets.push(...evalTickets.map((t) => t.id));
|
|
104
|
+
if (wantsClassifier) selectedTickets.push(...classifierTickets.map((t) => t.id));
|
|
105
|
+
selectedTickets = [...new Set(selectedTickets)].sort();
|
|
106
|
+
|
|
107
|
+
if (flags.only_ticket) {
|
|
108
|
+
if (!selectedTickets.includes(flags.only_ticket)) {
|
|
109
|
+
console.error(`[xera:eval-prepare] No golden fixture for ${flags.only_ticket}`);
|
|
110
|
+
return 1;
|
|
111
|
+
}
|
|
112
|
+
selectedTickets = [flags.only_ticket];
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
if (selectedTickets.length === 0) {
|
|
116
|
+
console.error('[xera:eval-prepare] No tickets selected (after filters).');
|
|
117
|
+
return 1;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// Compute per-ticket stages by intersecting the ticket's declared stages with the
|
|
121
|
+
// global stages array, then filter out tickets with an empty intersection.
|
|
122
|
+
const ticket_stages: Record<string, Stage[]> = {};
|
|
123
|
+
for (const ticket of selectedTickets) {
|
|
124
|
+
const evalT = evalTickets.find((t) => t.id === ticket);
|
|
125
|
+
let ticketDeclared: Stage[];
|
|
126
|
+
if (evalT) {
|
|
127
|
+
ticketDeclared = evalT.stages;
|
|
128
|
+
} else {
|
|
129
|
+
// Classifier/GOLD ticket — always diagnose-failure.
|
|
130
|
+
ticketDeclared = ['diagnose-failure'];
|
|
131
|
+
}
|
|
132
|
+
const intersection = ticketDeclared.filter((s) => stages.includes(s));
|
|
133
|
+
if (intersection.length > 0) {
|
|
134
|
+
ticket_stages[ticket] = intersection;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// Filter selectedTickets to only those with applicable stages.
|
|
139
|
+
selectedTickets = selectedTickets.filter((t) => ticket_stages[t] !== undefined);
|
|
140
|
+
|
|
141
|
+
if (selectedTickets.length === 0) {
|
|
142
|
+
console.error('[xera:eval-prepare] No tickets applicable to requested stages.');
|
|
143
|
+
return 1;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
const runId = generateRunId({
|
|
147
|
+
...(opts.now ? { now: opts.now } : {}),
|
|
148
|
+
...(opts.getGitSha ? { getGitSha: opts.getGitSha } : {}),
|
|
149
|
+
});
|
|
150
|
+
const paths = resolveEvalPaths(repoRoot, runId);
|
|
151
|
+
|
|
152
|
+
if (existsSync(paths.root) && !flags.force) {
|
|
153
|
+
console.error(
|
|
154
|
+
`[xera:eval-prepare] run dir already exists: ${paths.root}. Pass --force to re-run.`,
|
|
155
|
+
);
|
|
156
|
+
return 1;
|
|
157
|
+
}
|
|
158
|
+
mkdirSync(paths.inputsDir, { recursive: true });
|
|
159
|
+
mkdirSync(paths.actualDir, { recursive: true });
|
|
160
|
+
|
|
161
|
+
// Copy inputs.
|
|
162
|
+
for (const ticket of selectedTickets) {
|
|
163
|
+
const ticketInputs = paths.ticketInputsDir(ticket);
|
|
164
|
+
mkdirSync(ticketInputs, { recursive: true });
|
|
165
|
+
const evalT = evalTickets.find((t) => t.id === ticket);
|
|
166
|
+
const classT = classifierTickets.find((t) => t.id === ticket);
|
|
167
|
+
if (evalT) {
|
|
168
|
+
copyFileSync(join(evalT.dir, 'story.md'), join(ticketInputs, 'story.md'));
|
|
169
|
+
const featurePath = join(evalT.dir, 'golden/test.feature');
|
|
170
|
+
if (existsSync(featurePath)) copyFileSync(featurePath, join(ticketInputs, 'test.feature'));
|
|
171
|
+
}
|
|
172
|
+
if (classT) {
|
|
173
|
+
copyFileSync(classT.path, join(ticketInputs, 'classifier-input.json'));
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// Build manifest.
|
|
178
|
+
const now = (opts.now ?? (() => new Date()))();
|
|
179
|
+
const manifest: Manifest = {
|
|
180
|
+
run_id: runId,
|
|
181
|
+
started_at: now.toISOString(),
|
|
182
|
+
git_sha: runId.split('-')[2] ?? 'nogit',
|
|
183
|
+
tickets: selectedTickets,
|
|
184
|
+
stages,
|
|
185
|
+
ticket_stages,
|
|
186
|
+
prompt_versions: {
|
|
187
|
+
'feature-from-story': readPromptVersion(repoRoot, 'feature-from-story'),
|
|
188
|
+
'script-from-feature': readPromptVersion(repoRoot, 'script-from-feature'),
|
|
189
|
+
'diagnose-failure': readPromptVersion(repoRoot, 'diagnose-failure'),
|
|
190
|
+
'eval-rubric': readPromptVersion(repoRoot, 'eval-rubric'),
|
|
191
|
+
},
|
|
192
|
+
flags: {
|
|
193
|
+
force: flags.force,
|
|
194
|
+
only_prompt: flags.only_prompt,
|
|
195
|
+
only_ticket: flags.only_ticket,
|
|
196
|
+
judge_only: false,
|
|
197
|
+
},
|
|
198
|
+
};
|
|
199
|
+
|
|
200
|
+
// Validate before writing.
|
|
201
|
+
ManifestSchema.parse(manifest);
|
|
202
|
+
writeFileSync(paths.manifest, JSON.stringify(manifest, null, 2));
|
|
203
|
+
|
|
204
|
+
if (!acquireLock(paths.lock, runId)) {
|
|
205
|
+
console.error(`[xera:eval-prepare] failed to acquire lock at ${paths.lock}`);
|
|
206
|
+
return 4;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
console.log(
|
|
210
|
+
`[xera:eval-prepare] prepared ${selectedTickets.length} ticket(s) for stages: ${stages.join(', ')}`,
|
|
211
|
+
);
|
|
212
|
+
console.log(`RUN_ID=${runId}`);
|
|
213
|
+
return 0;
|
|
214
|
+
}
|