@tangle-network/agent-eval 0.11.1 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +96 -11
- package/dist/chunk-ITN4YOZY.js +215 -0
- package/dist/chunk-ITN4YOZY.js.map +1 -0
- package/dist/chunk-OZPRSK4A.js +594 -0
- package/dist/chunk-OZPRSK4A.js.map +1 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +104 -0
- package/dist/cli.js.map +1 -0
- package/dist/index.d.ts +597 -4
- package/dist/index.js +908 -241
- package/dist/index.js.map +1 -1
- package/dist/sink-fetch-C0B8ximv.d.ts +101 -0
- package/dist/telemetry/file.d.ts +19 -0
- package/dist/telemetry/file.js +40 -0
- package/dist/telemetry/file.js.map +1 -0
- package/dist/telemetry/index.d.ts +38 -0
- package/dist/telemetry/index.js +128 -0
- package/dist/telemetry/index.js.map +1 -0
- package/dist/wire/index.d.ts +211 -0
- package/dist/wire/index.js +56 -0
- package/dist/wire/index.js.map +1 -0
- package/package.json +27 -3
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fleet telemetry envelope — agent-eval's portable observability shape.
|
|
3
|
+
*
|
|
4
|
+
* Designed so any consumer (Node CLI, Cloudflare Worker, Lambda, browser
|
|
5
|
+
* extension) can emit structured rows describing one unit of work — a page
|
|
6
|
+
* audit, a tool call, an evolve round, a full agent run — to a central sink.
|
|
7
|
+
*
|
|
8
|
+
* The schema is intentionally a strict superset of agent-eval's `Run` shape
|
|
9
|
+
* so a future TraceStore adapter can promote envelopes into traces without
|
|
10
|
+
* translation.
|
|
11
|
+
*/
|
|
12
|
+
declare const TELEMETRY_SCHEMA_VERSION = 1;
|
|
13
|
+
/** Discriminator for the unit of work this envelope describes. */
|
|
14
|
+
type TelemetryKind = 'agent-run' | 'design-audit-page' | 'design-audit-run' | 'design-evolve-round' | 'design-evolve-run' | 'gepa-trial' | 'gepa-generation' | 'tool-call' | 'judge-verdict' | 'custom';
|
|
15
|
+
interface TelemetryEnvelope {
|
|
16
|
+
schemaVersion: typeof TELEMETRY_SCHEMA_VERSION;
|
|
17
|
+
envelopeId: string;
|
|
18
|
+
runId: string;
|
|
19
|
+
timestamp: string;
|
|
20
|
+
parentRunId?: string;
|
|
21
|
+
source: TelemetrySource;
|
|
22
|
+
model?: TelemetryModel;
|
|
23
|
+
kind: TelemetryKind;
|
|
24
|
+
ok: boolean;
|
|
25
|
+
durationMs: number;
|
|
26
|
+
data: Record<string, unknown>;
|
|
27
|
+
metrics: Record<string, number>;
|
|
28
|
+
tags?: Record<string, string>;
|
|
29
|
+
error?: string;
|
|
30
|
+
}
|
|
31
|
+
interface TelemetrySource {
|
|
32
|
+
/** Repo identity — basename of cwd plus git remote if discoverable. */
|
|
33
|
+
repo: string;
|
|
34
|
+
cwd: string;
|
|
35
|
+
gitSha?: string;
|
|
36
|
+
gitBranch?: string;
|
|
37
|
+
cliVersion: string;
|
|
38
|
+
/** What was invoked, e.g. `design-audit`, `bad run`, `gepa --target`. */
|
|
39
|
+
invocation: string;
|
|
40
|
+
/** Sanitised argv minus secrets. */
|
|
41
|
+
argv?: string[];
|
|
42
|
+
/**
|
|
43
|
+
* Multi-tenant identity. Set when the consumer runs inside a hosted
|
|
44
|
+
* product so a fleet rollup can group by tenant without leaking customer
|
|
45
|
+
* URLs or PII.
|
|
46
|
+
*/
|
|
47
|
+
tenantId?: string;
|
|
48
|
+
/** Optional sub-tenant identity (project, suite, walkthrough, customer). */
|
|
49
|
+
customerId?: string;
|
|
50
|
+
/** SHA-256 (12 hex) of the API key used to authenticate this run, when applicable. */
|
|
51
|
+
apiKeyHash?: string;
|
|
52
|
+
}
|
|
53
|
+
interface TelemetryModel {
|
|
54
|
+
provider: string;
|
|
55
|
+
name: string;
|
|
56
|
+
/** SHA-256 (12 hex chars) of the prompt(s) used. */
|
|
57
|
+
promptHash?: string;
|
|
58
|
+
/** SHA-256 (12 hex chars) of the composed rubric body, if applicable. */
|
|
59
|
+
rubricHash?: string;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Workers-safe telemetry sinks — only `fetch` and pure JS. No `fs`, no
|
|
64
|
+
* `child_process`. Safe to import from a Cloudflare Worker, Lambda, edge
|
|
65
|
+
* function, or browser extension.
|
|
66
|
+
*
|
|
67
|
+
* For Node-only file persistence, import from './sink-file' instead.
|
|
68
|
+
*/
|
|
69
|
+
|
|
70
|
+
interface TelemetrySink {
|
|
71
|
+
emit(envelope: TelemetryEnvelope): Promise<void> | void;
|
|
72
|
+
close?(): Promise<void> | void;
|
|
73
|
+
}
|
|
74
|
+
/** Best-effort POST to a remote collector. Fire-and-forget; never throws. */
|
|
75
|
+
declare class HttpTelemetrySink implements TelemetrySink {
|
|
76
|
+
private readonly endpoint;
|
|
77
|
+
private readonly bearer?;
|
|
78
|
+
private inflight;
|
|
79
|
+
constructor(endpoint: string, bearer?: string | undefined);
|
|
80
|
+
emit(envelope: TelemetryEnvelope): void;
|
|
81
|
+
close(): Promise<void>;
|
|
82
|
+
}
|
|
83
|
+
/** Fanout to multiple sinks — failures in one do not affect others. */
|
|
84
|
+
declare class FanoutTelemetrySink implements TelemetrySink {
|
|
85
|
+
private readonly sinks;
|
|
86
|
+
constructor(sinks: TelemetrySink[]);
|
|
87
|
+
emit(envelope: TelemetryEnvelope): void;
|
|
88
|
+
close(): Promise<void>;
|
|
89
|
+
}
|
|
90
|
+
/** No-op sink — used when telemetry is explicitly disabled. */
|
|
91
|
+
declare class NullTelemetrySink implements TelemetrySink {
|
|
92
|
+
emit(): void;
|
|
93
|
+
}
|
|
94
|
+
/** In-memory sink — useful for tests + downstream adapters. */
|
|
95
|
+
declare class InMemoryTelemetrySink implements TelemetrySink {
|
|
96
|
+
readonly envelopes: TelemetryEnvelope[];
|
|
97
|
+
emit(envelope: TelemetryEnvelope): void;
|
|
98
|
+
clear(): void;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
export { FanoutTelemetrySink as F, HttpTelemetrySink as H, InMemoryTelemetrySink as I, NullTelemetrySink as N, type TelemetryKind as T, type TelemetryModel as a, type TelemetrySource as b, type TelemetrySink as c, TELEMETRY_SCHEMA_VERSION as d, type TelemetryEnvelope as e };
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { c as TelemetrySink, e as TelemetryEnvelope } from '../sink-fetch-C0B8ximv.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Node-only file sink. Imports `node:fs` — DO NOT import this from a Worker
|
|
5
|
+
* or edge runtime; use `./sink-fetch` instead.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
/** Append envelopes to a JSONL file, partitioned by repo + date. */
|
|
9
|
+
declare class FileTelemetrySink implements TelemetrySink {
|
|
10
|
+
private readonly baseDir;
|
|
11
|
+
private streams;
|
|
12
|
+
constructor(baseDir: string);
|
|
13
|
+
emit(envelope: TelemetryEnvelope): void;
|
|
14
|
+
close(): Promise<void>;
|
|
15
|
+
}
|
|
16
|
+
/** Default location for local telemetry, mirroring bad CLI's convention. */
|
|
17
|
+
declare function defaultTelemetryDir(homeDir: string, override?: string): string;
|
|
18
|
+
|
|
19
|
+
export { FileTelemetrySink, defaultTelemetryDir };
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
// src/telemetry/sink-file.ts
|
|
2
|
+
import * as fs from "fs";
|
|
3
|
+
import * as path from "path";
|
|
4
|
+
var FileTelemetrySink = class {
|
|
5
|
+
constructor(baseDir) {
|
|
6
|
+
this.baseDir = baseDir;
|
|
7
|
+
fs.mkdirSync(baseDir, { recursive: true });
|
|
8
|
+
}
|
|
9
|
+
baseDir;
|
|
10
|
+
streams = /* @__PURE__ */ new Map();
|
|
11
|
+
emit(envelope) {
|
|
12
|
+
const date = envelope.timestamp.slice(0, 10);
|
|
13
|
+
const repo = envelope.source.repo || "unknown";
|
|
14
|
+
const key = `${repo}/${date}`;
|
|
15
|
+
let stream = this.streams.get(key);
|
|
16
|
+
if (!stream) {
|
|
17
|
+
const dir = path.join(this.baseDir, repo);
|
|
18
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
19
|
+
stream = fs.createWriteStream(path.join(dir, `${date}.jsonl`), { flags: "a", encoding: "utf-8" });
|
|
20
|
+
this.streams.set(key, stream);
|
|
21
|
+
}
|
|
22
|
+
stream.write(`${JSON.stringify(envelope)}
|
|
23
|
+
`);
|
|
24
|
+
}
|
|
25
|
+
async close() {
|
|
26
|
+
const closes = Array.from(this.streams.values()).map(
|
|
27
|
+
(s) => new Promise((resolve) => s.end(() => resolve()))
|
|
28
|
+
);
|
|
29
|
+
this.streams.clear();
|
|
30
|
+
await Promise.all(closes);
|
|
31
|
+
}
|
|
32
|
+
};
|
|
33
|
+
function defaultTelemetryDir(homeDir, override) {
|
|
34
|
+
return override || path.join(homeDir, ".agent-eval", "telemetry");
|
|
35
|
+
}
|
|
36
|
+
export {
|
|
37
|
+
FileTelemetrySink,
|
|
38
|
+
defaultTelemetryDir
|
|
39
|
+
};
|
|
40
|
+
//# sourceMappingURL=file.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/telemetry/sink-file.ts"],"sourcesContent":["/**\n * Node-only file sink. Imports `node:fs` — DO NOT import this from a Worker\n * or edge runtime; use `./sink-fetch` instead.\n */\n\nimport * as fs from 'node:fs'\nimport * as path from 'node:path'\nimport type { TelemetryEnvelope } from './schema'\nimport type { TelemetrySink } from './sink-fetch'\n\n/** Append envelopes to a JSONL file, partitioned by repo + date. */\nexport class FileTelemetrySink implements TelemetrySink {\n private streams = new Map<string, fs.WriteStream>()\n\n constructor(private readonly baseDir: string) {\n fs.mkdirSync(baseDir, { recursive: true })\n }\n\n emit(envelope: TelemetryEnvelope): void {\n const date = envelope.timestamp.slice(0, 10) // YYYY-MM-DD\n const repo = envelope.source.repo || 'unknown'\n const key = `${repo}/${date}`\n let stream = this.streams.get(key)\n if (!stream) {\n const dir = path.join(this.baseDir, repo)\n fs.mkdirSync(dir, { recursive: true })\n stream = fs.createWriteStream(path.join(dir, `${date}.jsonl`), { flags: 'a', encoding: 'utf-8' })\n this.streams.set(key, stream)\n }\n stream.write(`${JSON.stringify(envelope)}\\n`)\n }\n\n async close(): Promise<void> {\n const closes = Array.from(this.streams.values()).map(\n (s) => new Promise<void>((resolve) => s.end(() => resolve())),\n )\n this.streams.clear()\n await Promise.all(closes)\n }\n}\n\n/** Default location for local telemetry, mirroring bad CLI's convention. */\nexport function defaultTelemetryDir(homeDir: string, override?: string): string {\n return override || path.join(homeDir, '.agent-eval', 'telemetry')\n}\n"],"mappings":";AAKA,YAAY,QAAQ;AACpB,YAAY,UAAU;AAKf,IAAM,oBAAN,MAAiD;AAAA,EAGtD,YAA6B,SAAiB;AAAjB;AAC3B,IAAG,aAAU,SAAS,EAAE,WAAW,KAAK,CAAC;AAAA,EAC3C;AAAA,EAF6B;AAAA,EAFrB,UAAU,oBAAI,IAA4B;AAAA,EAMlD,KAAK,UAAmC;AACtC,UAAM,OAAO,SAAS,UAAU,MAAM,GAAG,EAAE;AAC3C,UAAM,OAAO,SAAS,OAAO,QAAQ;AACrC,UAAM,MAAM,GAAG,IAAI,IAAI,IAAI;AAC3B,QAAI,SAAS,KAAK,QAAQ,IAAI,GAAG;AACjC,QAAI,CAAC,QAAQ;AACX,YAAM,MAAW,UAAK,KAAK,SAAS,IAAI;AACxC,MAAG,aAAU,KAAK,EAAE,WAAW,KAAK,CAAC;AACrC,eAAY,qBAAuB,UAAK,KAAK,GAAG,IAAI,QAAQ,GAAG,EAAE,OAAO,KAAK,UAAU,QAAQ,CAAC;AAChG,WAAK,QAAQ,IAAI,KAAK,MAAM;AAAA,IAC9B;AACA,WAAO,MAAM,GAAG,KAAK,UAAU,QAAQ,CAAC;AAAA,CAAI;AAAA,EAC9C;AAAA,EAEA,MAAM,QAAuB;AAC3B,UAAM,SAAS,MAAM,KAAK,KAAK,QAAQ,OAAO,CAAC,EAAE;AAAA,MAC/C,CAAC,MAAM,IAAI,QAAc,CAAC,YAAY,EAAE,IAAI,MAAM,QAAQ,CAAC,CAAC;AAAA,IAC9D;AACA,SAAK,QAAQ,MAAM;AACnB,UAAM,QAAQ,IAAI,MAAM;AAAA,EAC1B;AACF;AAGO,SAAS,oBAAoB,SAAiB,UAA2B;AAC9E,SAAO,YAAiB,UAAK,SAAS,eAAe,WAAW;AAClE;","names":[]}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import { T as TelemetryKind, a as TelemetryModel, b as TelemetrySource, c as TelemetrySink } from '../sink-fetch-C0B8ximv.js';
|
|
2
|
+
export { F as FanoutTelemetrySink, H as HttpTelemetrySink, I as InMemoryTelemetrySink, N as NullTelemetrySink, d as TELEMETRY_SCHEMA_VERSION, e as TelemetryEnvelope } from '../sink-fetch-C0B8ximv.js';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Telemetry client — thin wrapper that builds envelopes from `EmitArgs` and
|
|
6
|
+
* delegates to a `TelemetrySink`. Pure logic; no I/O. Use this from any
|
|
7
|
+
* runtime — Workers, Node, browser — and choose the sink accordingly.
|
|
8
|
+
*
|
|
9
|
+
* For an opinionated singleton with env-var-driven sink wiring (the bad CLI
|
|
10
|
+
* pattern), see `./node-client.ts`.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
interface EmitArgs {
|
|
14
|
+
kind: TelemetryKind;
|
|
15
|
+
runId: string;
|
|
16
|
+
parentRunId?: string;
|
|
17
|
+
ok: boolean;
|
|
18
|
+
durationMs: number;
|
|
19
|
+
data?: Record<string, unknown>;
|
|
20
|
+
metrics?: Record<string, number>;
|
|
21
|
+
tags?: Record<string, string>;
|
|
22
|
+
model?: TelemetryModel;
|
|
23
|
+
error?: string;
|
|
24
|
+
/** Override the source for this envelope. Falls back to `defaultSource`. */
|
|
25
|
+
source?: TelemetrySource;
|
|
26
|
+
}
|
|
27
|
+
declare class TelemetryClient {
|
|
28
|
+
private readonly sink;
|
|
29
|
+
private readonly defaultSource;
|
|
30
|
+
constructor(sink: TelemetrySink, defaultSource: TelemetrySource);
|
|
31
|
+
emit(args: EmitArgs): void;
|
|
32
|
+
close(): Promise<void>;
|
|
33
|
+
}
|
|
34
|
+
declare const SECRET_FLAGS: Set<string>;
|
|
35
|
+
/** Strip likely-secret values from argv, preserving structure. */
|
|
36
|
+
declare function sanitiseArgv(argv: string[]): string[];
|
|
37
|
+
|
|
38
|
+
export { type EmitArgs, SECRET_FLAGS, TelemetryClient, TelemetryKind, TelemetryModel, TelemetrySink, TelemetrySource, sanitiseArgv };
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
// src/telemetry/schema.ts
|
|
2
|
+
var TELEMETRY_SCHEMA_VERSION = 1;
|
|
3
|
+
|
|
4
|
+
// src/telemetry/sink-fetch.ts
|
|
5
|
+
var HttpTelemetrySink = class {
|
|
6
|
+
constructor(endpoint, bearer) {
|
|
7
|
+
this.endpoint = endpoint;
|
|
8
|
+
this.bearer = bearer;
|
|
9
|
+
}
|
|
10
|
+
endpoint;
|
|
11
|
+
bearer;
|
|
12
|
+
inflight = /* @__PURE__ */ new Set();
|
|
13
|
+
emit(envelope) {
|
|
14
|
+
const body = JSON.stringify(envelope);
|
|
15
|
+
const headers = { "content-type": "application/json" };
|
|
16
|
+
if (this.bearer) headers.authorization = `Bearer ${this.bearer}`;
|
|
17
|
+
const promise = fetch(this.endpoint, { method: "POST", headers, body }).then(() => void 0).catch(() => void 0);
|
|
18
|
+
this.inflight.add(promise);
|
|
19
|
+
promise.finally(() => this.inflight.delete(promise));
|
|
20
|
+
}
|
|
21
|
+
async close() {
|
|
22
|
+
await Promise.allSettled(Array.from(this.inflight));
|
|
23
|
+
}
|
|
24
|
+
};
|
|
25
|
+
var FanoutTelemetrySink = class {
|
|
26
|
+
constructor(sinks) {
|
|
27
|
+
this.sinks = sinks;
|
|
28
|
+
}
|
|
29
|
+
sinks;
|
|
30
|
+
emit(envelope) {
|
|
31
|
+
for (const sink of this.sinks) {
|
|
32
|
+
try {
|
|
33
|
+
const result = sink.emit(envelope);
|
|
34
|
+
if (result && typeof result.catch === "function") {
|
|
35
|
+
;
|
|
36
|
+
result.catch(() => void 0);
|
|
37
|
+
}
|
|
38
|
+
} catch {
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
async close() {
|
|
43
|
+
await Promise.allSettled(this.sinks.map((s) => Promise.resolve(s.close?.())));
|
|
44
|
+
}
|
|
45
|
+
};
|
|
46
|
+
var NullTelemetrySink = class {
|
|
47
|
+
emit() {
|
|
48
|
+
}
|
|
49
|
+
};
|
|
50
|
+
var InMemoryTelemetrySink = class {
|
|
51
|
+
envelopes = [];
|
|
52
|
+
emit(envelope) {
|
|
53
|
+
this.envelopes.push(envelope);
|
|
54
|
+
}
|
|
55
|
+
clear() {
|
|
56
|
+
this.envelopes.length = 0;
|
|
57
|
+
}
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
// src/telemetry/client.ts
|
|
61
|
+
var TelemetryClient = class {
|
|
62
|
+
constructor(sink, defaultSource) {
|
|
63
|
+
this.sink = sink;
|
|
64
|
+
this.defaultSource = defaultSource;
|
|
65
|
+
}
|
|
66
|
+
sink;
|
|
67
|
+
defaultSource;
|
|
68
|
+
emit(args) {
|
|
69
|
+
const envelope = {
|
|
70
|
+
schemaVersion: TELEMETRY_SCHEMA_VERSION,
|
|
71
|
+
envelopeId: makeEnvelopeId(),
|
|
72
|
+
runId: args.runId,
|
|
73
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
74
|
+
source: args.source ?? this.defaultSource,
|
|
75
|
+
kind: args.kind,
|
|
76
|
+
ok: args.ok,
|
|
77
|
+
durationMs: args.durationMs,
|
|
78
|
+
data: args.data ?? {},
|
|
79
|
+
metrics: args.metrics ?? {},
|
|
80
|
+
...args.parentRunId ? { parentRunId: args.parentRunId } : {},
|
|
81
|
+
...args.model ? { model: args.model } : {},
|
|
82
|
+
...args.tags ? { tags: args.tags } : {},
|
|
83
|
+
...args.error ? { error: args.error } : {}
|
|
84
|
+
};
|
|
85
|
+
try {
|
|
86
|
+
this.sink.emit(envelope);
|
|
87
|
+
} catch {
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
async close() {
|
|
91
|
+
await this.sink.close?.();
|
|
92
|
+
}
|
|
93
|
+
};
|
|
94
|
+
function makeEnvelopeId() {
|
|
95
|
+
if (typeof crypto !== "undefined" && typeof crypto.randomUUID === "function") {
|
|
96
|
+
return crypto.randomUUID();
|
|
97
|
+
}
|
|
98
|
+
return "env-" + Date.now().toString(36) + "-" + Math.random().toString(36).slice(2, 10);
|
|
99
|
+
}
|
|
100
|
+
var SECRET_FLAGS = /* @__PURE__ */ new Set(["--api-key", "--bearer", "--token", "--password"]);
|
|
101
|
+
function sanitiseArgv(argv) {
|
|
102
|
+
const out = [];
|
|
103
|
+
for (let i = 0; i < argv.length; i++) {
|
|
104
|
+
const a = argv[i];
|
|
105
|
+
if (SECRET_FLAGS.has(a)) {
|
|
106
|
+
out.push(a, "<redacted>");
|
|
107
|
+
i++;
|
|
108
|
+
continue;
|
|
109
|
+
}
|
|
110
|
+
if (/^(?:--api-key|--bearer|--token|--password)=/.test(a)) {
|
|
111
|
+
out.push(a.replace(/=.*$/, "=<redacted>"));
|
|
112
|
+
continue;
|
|
113
|
+
}
|
|
114
|
+
out.push(a);
|
|
115
|
+
}
|
|
116
|
+
return out;
|
|
117
|
+
}
|
|
118
|
+
export {
|
|
119
|
+
FanoutTelemetrySink,
|
|
120
|
+
HttpTelemetrySink,
|
|
121
|
+
InMemoryTelemetrySink,
|
|
122
|
+
NullTelemetrySink,
|
|
123
|
+
SECRET_FLAGS,
|
|
124
|
+
TELEMETRY_SCHEMA_VERSION,
|
|
125
|
+
TelemetryClient,
|
|
126
|
+
sanitiseArgv
|
|
127
|
+
};
|
|
128
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/telemetry/schema.ts","../../src/telemetry/sink-fetch.ts","../../src/telemetry/client.ts"],"sourcesContent":["/**\n * Fleet telemetry envelope — agent-eval's portable observability shape.\n *\n * Designed so any consumer (Node CLI, Cloudflare Worker, Lambda, browser\n * extension) can emit structured rows describing one unit of work — a page\n * audit, a tool call, an evolve round, a full agent run — to a central sink.\n *\n * The schema is intentionally a strict superset of agent-eval's `Run` shape\n * so a future TraceStore adapter can promote envelopes into traces without\n * translation.\n */\n\nexport const TELEMETRY_SCHEMA_VERSION = 1\n\n/** Discriminator for the unit of work this envelope describes. */\nexport type TelemetryKind =\n | 'agent-run'\n | 'design-audit-page'\n | 'design-audit-run'\n | 'design-evolve-round'\n | 'design-evolve-run'\n | 'gepa-trial'\n | 'gepa-generation'\n | 'tool-call'\n | 'judge-verdict'\n | 'custom'\n\nexport interface TelemetryEnvelope {\n schemaVersion: typeof TELEMETRY_SCHEMA_VERSION\n envelopeId: string\n runId: string\n timestamp: string\n parentRunId?: string\n\n source: TelemetrySource\n model?: TelemetryModel\n kind: TelemetryKind\n ok: boolean\n durationMs: number\n\n data: Record<string, unknown>\n metrics: Record<string, number>\n tags?: Record<string, string>\n\n error?: string\n}\n\nexport interface TelemetrySource {\n /** Repo identity — basename of cwd plus git remote if discoverable. */\n repo: string\n cwd: string\n gitSha?: string\n gitBranch?: string\n cliVersion: string\n /** What was invoked, e.g. `design-audit`, `bad run`, `gepa --target`. */\n invocation: string\n /** Sanitised argv minus secrets. */\n argv?: string[]\n /**\n * Multi-tenant identity. Set when the consumer runs inside a hosted\n * product so a fleet rollup can group by tenant without leaking customer\n * URLs or PII.\n */\n tenantId?: string\n /** Optional sub-tenant identity (project, suite, walkthrough, customer). */\n customerId?: string\n /** SHA-256 (12 hex) of the API key used to authenticate this run, when applicable. */\n apiKeyHash?: string\n}\n\nexport interface TelemetryModel {\n provider: string\n name: string\n /** SHA-256 (12 hex chars) of the prompt(s) used. */\n promptHash?: string\n /** SHA-256 (12 hex chars) of the composed rubric body, if applicable. */\n rubricHash?: string\n}\n","/**\n * Workers-safe telemetry sinks — only `fetch` and pure JS. No `fs`, no\n * `child_process`. Safe to import from a Cloudflare Worker, Lambda, edge\n * function, or browser extension.\n *\n * For Node-only file persistence, import from './sink-file' instead.\n */\n\nimport type { TelemetryEnvelope } from './schema'\n\nexport interface TelemetrySink {\n emit(envelope: TelemetryEnvelope): Promise<void> | void\n close?(): Promise<void> | void\n}\n\n/** Best-effort POST to a remote collector. Fire-and-forget; never throws. */\nexport class HttpTelemetrySink implements TelemetrySink {\n private inflight = new Set<Promise<void>>()\n\n constructor(\n private readonly endpoint: string,\n private readonly bearer?: string,\n ) {}\n\n emit(envelope: TelemetryEnvelope): void {\n const body = JSON.stringify(envelope)\n const headers: Record<string, string> = { 'content-type': 'application/json' }\n if (this.bearer) headers.authorization = `Bearer ${this.bearer}`\n const promise = fetch(this.endpoint, { method: 'POST', headers, body })\n .then(() => undefined)\n .catch(() => undefined)\n this.inflight.add(promise)\n promise.finally(() => this.inflight.delete(promise))\n }\n\n async close(): Promise<void> {\n await Promise.allSettled(Array.from(this.inflight))\n }\n}\n\n/** Fanout to multiple sinks — failures in one do not affect others. */\nexport class FanoutTelemetrySink implements TelemetrySink {\n constructor(private readonly sinks: TelemetrySink[]) {}\n\n emit(envelope: TelemetryEnvelope): void {\n for (const sink of this.sinks) {\n try {\n const result = sink.emit(envelope)\n if (result && typeof (result as Promise<unknown>).catch === 'function') {\n ;(result as Promise<unknown>).catch(() => undefined)\n }\n } catch {\n // swallow — telemetry must never break a run\n }\n }\n }\n\n async close(): Promise<void> {\n await Promise.allSettled(this.sinks.map((s) => Promise.resolve(s.close?.())))\n }\n}\n\n/** No-op sink — used when telemetry is explicitly disabled. */\nexport class NullTelemetrySink implements TelemetrySink {\n emit(): void {}\n}\n\n/** In-memory sink — useful for tests + downstream adapters. */\nexport class InMemoryTelemetrySink implements TelemetrySink {\n readonly envelopes: TelemetryEnvelope[] = []\n emit(envelope: TelemetryEnvelope): void {\n this.envelopes.push(envelope)\n }\n clear(): void { this.envelopes.length = 0 }\n}\n","/**\n * Telemetry client — thin wrapper that builds envelopes from `EmitArgs` and\n * delegates to a `TelemetrySink`. Pure logic; no I/O. Use this from any\n * runtime — Workers, Node, browser — and choose the sink accordingly.\n *\n * For an opinionated singleton with env-var-driven sink wiring (the bad CLI\n * pattern), see `./node-client.ts`.\n */\n\nimport type { TelemetryEnvelope, TelemetryKind, TelemetryModel, TelemetrySource } from './schema'\nimport { TELEMETRY_SCHEMA_VERSION } from './schema'\nimport type { TelemetrySink } from './sink-fetch'\n\nexport interface EmitArgs {\n kind: TelemetryKind\n runId: string\n parentRunId?: string\n ok: boolean\n durationMs: number\n data?: Record<string, unknown>\n metrics?: Record<string, number>\n tags?: Record<string, string>\n model?: TelemetryModel\n error?: string\n /** Override the source for this envelope. Falls back to `defaultSource`. */\n source?: TelemetrySource\n}\n\nexport class TelemetryClient {\n constructor(\n private readonly sink: TelemetrySink,\n private readonly defaultSource: TelemetrySource,\n ) {}\n\n emit(args: EmitArgs): void {\n const envelope: TelemetryEnvelope = {\n schemaVersion: TELEMETRY_SCHEMA_VERSION,\n envelopeId: makeEnvelopeId(),\n runId: args.runId,\n timestamp: new Date().toISOString(),\n source: args.source ?? this.defaultSource,\n kind: args.kind,\n ok: args.ok,\n durationMs: args.durationMs,\n data: args.data ?? {},\n metrics: args.metrics ?? {},\n ...(args.parentRunId ? { parentRunId: args.parentRunId } : {}),\n ...(args.model ? { model: args.model } : {}),\n ...(args.tags ? { tags: args.tags } : {}),\n ...(args.error ? { error: args.error } : {}),\n }\n try {\n this.sink.emit(envelope)\n } catch {\n // swallow — telemetry never breaks the calling code path\n }\n }\n\n async close(): Promise<void> {\n await this.sink.close?.()\n }\n}\n\n/** Generate a UUIDv4 with whatever crypto is available (Node, Workers, browsers). */\nfunction makeEnvelopeId(): string {\n if (typeof crypto !== 'undefined' && typeof crypto.randomUUID === 'function') {\n return crypto.randomUUID()\n }\n // Last-resort fallback. Lower entropy but never throws.\n return 'env-' + Date.now().toString(36) + '-' + Math.random().toString(36).slice(2, 10)\n}\n\nexport const SECRET_FLAGS = new Set(['--api-key', '--bearer', '--token', '--password'])\n\n/** Strip likely-secret values from argv, preserving structure. */\nexport function sanitiseArgv(argv: string[]): string[] {\n const out: string[] = []\n for (let i = 0; i < argv.length; i++) {\n const a = argv[i]!\n if (SECRET_FLAGS.has(a)) {\n out.push(a, '<redacted>')\n i++\n continue\n }\n if (/^(?:--api-key|--bearer|--token|--password)=/.test(a)) {\n out.push(a.replace(/=.*$/, '=<redacted>'))\n continue\n }\n out.push(a)\n }\n return out\n}\n"],"mappings":";AAYO,IAAM,2BAA2B;;;ACIjC,IAAM,oBAAN,MAAiD;AAAA,EAGtD,YACmB,UACA,QACjB;AAFiB;AACA;AAAA,EAChB;AAAA,EAFgB;AAAA,EACA;AAAA,EAJX,WAAW,oBAAI,IAAmB;AAAA,EAO1C,KAAK,UAAmC;AACtC,UAAM,OAAO,KAAK,UAAU,QAAQ;AACpC,UAAM,UAAkC,EAAE,gBAAgB,mBAAmB;AAC7E,QAAI,KAAK,OAAQ,SAAQ,gBAAgB,UAAU,KAAK,MAAM;AAC9D,UAAM,UAAU,MAAM,KAAK,UAAU,EAAE,QAAQ,QAAQ,SAAS,KAAK,CAAC,EACnE,KAAK,MAAM,MAAS,EACpB,MAAM,MAAM,MAAS;AACxB,SAAK,SAAS,IAAI,OAAO;AACzB,YAAQ,QAAQ,MAAM,KAAK,SAAS,OAAO,OAAO,CAAC;AAAA,EACrD;AAAA,EAEA,MAAM,QAAuB;AAC3B,UAAM,QAAQ,WAAW,MAAM,KAAK,KAAK,QAAQ,CAAC;AAAA,EACpD;AACF;AAGO,IAAM,sBAAN,MAAmD;AAAA,EACxD,YAA6B,OAAwB;AAAxB;AAAA,EAAyB;AAAA,EAAzB;AAAA,EAE7B,KAAK,UAAmC;AACtC,eAAW,QAAQ,KAAK,OAAO;AAC7B,UAAI;AACF,cAAM,SAAS,KAAK,KAAK,QAAQ;AACjC,YAAI,UAAU,OAAQ,OAA4B,UAAU,YAAY;AACtE;AAAC,UAAC,OAA4B,MAAM,MAAM,MAAS;AAAA,QACrD;AAAA,MACF,QAAQ;AAAA,MAER;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,QAAuB;AAC3B,UAAM,QAAQ,WAAW,KAAK,MAAM,IAAI,CAAC,MAAM,QAAQ,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAAA,EAC9E;AACF;AAGO,IAAM,oBAAN,MAAiD;AAAA,EACtD,OAAa;AAAA,EAAC;AAChB;AAGO,IAAM,wBAAN,MAAqD;AAAA,EACjD,YAAiC,CAAC;AAAA,EAC3C,KAAK,UAAmC;AACtC,SAAK,UAAU,KAAK,QAAQ;AAAA,EAC9B;AAAA,EACA,QAAc;AAAE,SAAK,UAAU,SAAS;AAAA,EAAE;AAC5C;;;AC9CO,IAAM,kBAAN,MAAsB;AAAA,EAC3B,YACmB,MACA,eACjB;AAFiB;AACA;AAAA,EAChB;AAAA,EAFgB;AAAA,EACA;AAAA,EAGnB,KAAK,MAAsB;AACzB,UAAM,WAA8B;AAAA,MAClC,eAAe;AAAA,MACf,YAAY,eAAe;AAAA,MAC3B,OAAO,KAAK;AAAA,MACZ,YAAW,oBAAI,KAAK,GAAE,YAAY;AAAA,MAClC,QAAQ,KAAK,UAAU,KAAK;AAAA,MAC5B,MAAM,KAAK;AAAA,MACX,IAAI,KAAK;AAAA,MACT,YAAY,KAAK;AAAA,MACjB,MAAM,KAAK,QAAQ,CAAC;AAAA,MACpB,SAAS,KAAK,WAAW,CAAC;AAAA,MAC1B,GAAI,KAAK,cAAc,EAAE,aAAa,KAAK,YAAY,IAAI,CAAC;AAAA,MAC5D,GAAI,KAAK,QAAQ,EAAE,OAAO,KAAK,MAAM,IAAI,CAAC;AAAA,MAC1C,GAAI,KAAK,OAAO,EAAE,MAAM,KAAK,KAAK,IAAI,CAAC;AAAA,MACvC,GAAI,KAAK,QAAQ,EAAE,OAAO,KAAK,MAAM,IAAI,CAAC;AAAA,IAC5C;AACA,QAAI;AACF,WAAK,KAAK,KAAK,QAAQ;AAAA,IACzB,QAAQ;AAAA,IAER;AAAA,EACF;AAAA,EAEA,MAAM,QAAuB;AAC3B,UAAM,KAAK,KAAK,QAAQ;AAAA,EAC1B;AACF;AAGA,SAAS,iBAAyB;AAChC,MAAI,OAAO,WAAW,eAAe,OAAO,OAAO,eAAe,YAAY;AAC5E,WAAO,OAAO,WAAW;AAAA,EAC3B;AAEA,SAAO,SAAS,KAAK,IAAI,EAAE,SAAS,EAAE,IAAI,MAAM,KAAK,OAAO,EAAE,SAAS,EAAE,EAAE,MAAM,GAAG,EAAE;AACxF;AAEO,IAAM,eAAe,oBAAI,IAAI,CAAC,aAAa,YAAY,WAAW,YAAY,CAAC;AAG/E,SAAS,aAAa,MAA0B;AACrD,QAAM,MAAgB,CAAC;AACvB,WAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;AACpC,UAAM,IAAI,KAAK,CAAC;AAChB,QAAI,aAAa,IAAI,CAAC,GAAG;AACvB,UAAI,KAAK,GAAG,YAAY;AACxB;AACA;AAAA,IACF;AACA,QAAI,8CAA8C,KAAK,CAAC,GAAG;AACzD,UAAI,KAAK,EAAE,QAAQ,QAAQ,aAAa,CAAC;AACzC;AAAA,IACF;AACA,QAAI,KAAK,CAAC;AAAA,EACZ;AACA,SAAO;AACT;","names":[]}
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
import { OpenAPIObject } from 'openapi3-ts/oas31';
|
|
3
|
+
import * as hono_types from 'hono/types';
|
|
4
|
+
import { ServerType } from '@hono/node-server';
|
|
5
|
+
import { Hono } from 'hono';
|
|
6
|
+
|
|
7
|
+
declare const RubricDimensionSchema: z.ZodObject<{
|
|
8
|
+
id: z.ZodString;
|
|
9
|
+
description: z.ZodString;
|
|
10
|
+
weight: z.ZodDefault<z.ZodNumber>;
|
|
11
|
+
min: z.ZodDefault<z.ZodNumber>;
|
|
12
|
+
max: z.ZodDefault<z.ZodNumber>;
|
|
13
|
+
}, z.core.$strip>;
|
|
14
|
+
declare const FailureModeSchema: z.ZodObject<{
|
|
15
|
+
id: z.ZodString;
|
|
16
|
+
description: z.ZodString;
|
|
17
|
+
}, z.core.$strip>;
|
|
18
|
+
declare const RubricSchema: z.ZodObject<{
|
|
19
|
+
name: z.ZodString;
|
|
20
|
+
description: z.ZodString;
|
|
21
|
+
systemPrompt: z.ZodString;
|
|
22
|
+
dimensions: z.ZodArray<z.ZodObject<{
|
|
23
|
+
id: z.ZodString;
|
|
24
|
+
description: z.ZodString;
|
|
25
|
+
weight: z.ZodDefault<z.ZodNumber>;
|
|
26
|
+
min: z.ZodDefault<z.ZodNumber>;
|
|
27
|
+
max: z.ZodDefault<z.ZodNumber>;
|
|
28
|
+
}, z.core.$strip>>;
|
|
29
|
+
failureModes: z.ZodDefault<z.ZodArray<z.ZodObject<{
|
|
30
|
+
id: z.ZodString;
|
|
31
|
+
description: z.ZodString;
|
|
32
|
+
}, z.core.$strip>>>;
|
|
33
|
+
wins: z.ZodDefault<z.ZodArray<z.ZodObject<{
|
|
34
|
+
id: z.ZodString;
|
|
35
|
+
description: z.ZodString;
|
|
36
|
+
}, z.core.$strip>>>;
|
|
37
|
+
}, z.core.$strip>;
|
|
38
|
+
declare const JudgeRequestSchema: z.ZodObject<{
|
|
39
|
+
rubricName: z.ZodOptional<z.ZodString>;
|
|
40
|
+
rubric: z.ZodOptional<z.ZodObject<{
|
|
41
|
+
name: z.ZodString;
|
|
42
|
+
description: z.ZodString;
|
|
43
|
+
systemPrompt: z.ZodString;
|
|
44
|
+
dimensions: z.ZodArray<z.ZodObject<{
|
|
45
|
+
id: z.ZodString;
|
|
46
|
+
description: z.ZodString;
|
|
47
|
+
weight: z.ZodDefault<z.ZodNumber>;
|
|
48
|
+
min: z.ZodDefault<z.ZodNumber>;
|
|
49
|
+
max: z.ZodDefault<z.ZodNumber>;
|
|
50
|
+
}, z.core.$strip>>;
|
|
51
|
+
failureModes: z.ZodDefault<z.ZodArray<z.ZodObject<{
|
|
52
|
+
id: z.ZodString;
|
|
53
|
+
description: z.ZodString;
|
|
54
|
+
}, z.core.$strip>>>;
|
|
55
|
+
wins: z.ZodDefault<z.ZodArray<z.ZodObject<{
|
|
56
|
+
id: z.ZodString;
|
|
57
|
+
description: z.ZodString;
|
|
58
|
+
}, z.core.$strip>>>;
|
|
59
|
+
}, z.core.$strip>>;
|
|
60
|
+
content: z.ZodString;
|
|
61
|
+
context: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
62
|
+
model: z.ZodOptional<z.ZodString>;
|
|
63
|
+
}, z.core.$strip>;
|
|
64
|
+
declare const JudgeResultSchema: z.ZodObject<{
|
|
65
|
+
composite: z.ZodNumber;
|
|
66
|
+
dimensions: z.ZodRecord<z.ZodString, z.ZodNumber>;
|
|
67
|
+
failureModes: z.ZodDefault<z.ZodArray<z.ZodString>>;
|
|
68
|
+
wins: z.ZodDefault<z.ZodArray<z.ZodString>>;
|
|
69
|
+
rationale: z.ZodString;
|
|
70
|
+
rubricVersion: z.ZodString;
|
|
71
|
+
model: z.ZodString;
|
|
72
|
+
durationMs: z.ZodNumber;
|
|
73
|
+
}, z.core.$strip>;
|
|
74
|
+
declare const RubricInfoSchema: z.ZodObject<{
|
|
75
|
+
name: z.ZodString;
|
|
76
|
+
description: z.ZodString;
|
|
77
|
+
dimensions: z.ZodArray<z.ZodObject<{
|
|
78
|
+
id: z.ZodString;
|
|
79
|
+
description: z.ZodString;
|
|
80
|
+
weight: z.ZodNumber;
|
|
81
|
+
}, z.core.$strip>>;
|
|
82
|
+
failureModes: z.ZodDefault<z.ZodArray<z.ZodString>>;
|
|
83
|
+
rubricVersion: z.ZodString;
|
|
84
|
+
}, z.core.$strip>;
|
|
85
|
+
declare const ListRubricsResponseSchema: z.ZodObject<{
|
|
86
|
+
rubrics: z.ZodArray<z.ZodObject<{
|
|
87
|
+
name: z.ZodString;
|
|
88
|
+
description: z.ZodString;
|
|
89
|
+
dimensions: z.ZodArray<z.ZodObject<{
|
|
90
|
+
id: z.ZodString;
|
|
91
|
+
description: z.ZodString;
|
|
92
|
+
weight: z.ZodNumber;
|
|
93
|
+
}, z.core.$strip>>;
|
|
94
|
+
failureModes: z.ZodDefault<z.ZodArray<z.ZodString>>;
|
|
95
|
+
rubricVersion: z.ZodString;
|
|
96
|
+
}, z.core.$strip>>;
|
|
97
|
+
}, z.core.$strip>;
|
|
98
|
+
declare const VersionResponseSchema: z.ZodObject<{
|
|
99
|
+
package: z.ZodString;
|
|
100
|
+
version: z.ZodString;
|
|
101
|
+
wireVersion: z.ZodString;
|
|
102
|
+
apiSurface: z.ZodArray<z.ZodString>;
|
|
103
|
+
}, z.core.$strip>;
|
|
104
|
+
declare const HealthResponseSchema: z.ZodObject<{
|
|
105
|
+
status: z.ZodLiteral<"ok">;
|
|
106
|
+
uptimeSec: z.ZodNumber;
|
|
107
|
+
}, z.core.$strip>;
|
|
108
|
+
declare const ErrorResponseSchema: z.ZodObject<{
|
|
109
|
+
error: z.ZodObject<{
|
|
110
|
+
code: z.ZodString;
|
|
111
|
+
message: z.ZodString;
|
|
112
|
+
details: z.ZodOptional<z.ZodUnknown>;
|
|
113
|
+
}, z.core.$strip>;
|
|
114
|
+
}, z.core.$strip>;
|
|
115
|
+
type RubricDimension = z.infer<typeof RubricDimensionSchema>;
|
|
116
|
+
type FailureMode = z.infer<typeof FailureModeSchema>;
|
|
117
|
+
type Rubric = z.infer<typeof RubricSchema>;
|
|
118
|
+
type JudgeRequest = z.infer<typeof JudgeRequestSchema>;
|
|
119
|
+
type JudgeResult = z.infer<typeof JudgeResultSchema>;
|
|
120
|
+
type RubricInfo = z.infer<typeof RubricInfoSchema>;
|
|
121
|
+
type ListRubricsResponse = z.infer<typeof ListRubricsResponseSchema>;
|
|
122
|
+
type VersionResponse = z.infer<typeof VersionResponseSchema>;
|
|
123
|
+
type ErrorResponse = z.infer<typeof ErrorResponseSchema>;
|
|
124
|
+
/**
|
|
125
|
+
* Bump on any breaking change to a request/response schema.
|
|
126
|
+
* Non-breaking (additive) changes don't require a bump.
|
|
127
|
+
*/
|
|
128
|
+
declare const WIRE_VERSION = "1.0.0";
|
|
129
|
+
/**
|
|
130
|
+
* Stable hash of a rubric. Used to make scores comparable across runs:
|
|
131
|
+
* if the rubricVersion matches, the rubric was identical.
|
|
132
|
+
*/
|
|
133
|
+
declare function hashRubric(rubric: Rubric): string;
|
|
134
|
+
|
|
135
|
+
/** Caller-fixable error. The transport renders this to 4xx + ErrorResponse. */
|
|
136
|
+
declare class WireError extends Error {
|
|
137
|
+
readonly code: string;
|
|
138
|
+
readonly status: number;
|
|
139
|
+
readonly details?: unknown | undefined;
|
|
140
|
+
constructor(code: string, message: string, status?: number, details?: unknown | undefined);
|
|
141
|
+
}
|
|
142
|
+
declare function handleJudge(req: JudgeRequest): Promise<JudgeResult>;
|
|
143
|
+
declare function handleListRubrics(): ListRubricsResponse;
|
|
144
|
+
declare function handleVersion(): VersionResponse;
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Built-in rubrics shipped with agent-eval.
|
|
148
|
+
*
|
|
149
|
+
* A rubric is a set of scoring axes plus a system prompt that tells the
|
|
150
|
+
* judging LLM how to grade against those axes. Built-in rubrics are
|
|
151
|
+
* curated for use cases that recur across Tangle projects — call them
|
|
152
|
+
* by name from any client.
|
|
153
|
+
*
|
|
154
|
+
* Adding a rubric:
|
|
155
|
+
* 1. Define the Rubric object below with a clear `description` and
|
|
156
|
+
* named `dimensions`.
|
|
157
|
+
* 2. Register it in `BUILTIN_RUBRICS` at the bottom.
|
|
158
|
+
* 3. Add a test in `tests/wire/rubrics.test.ts`.
|
|
159
|
+
*
|
|
160
|
+
* Custom rubrics: callers pass `rubric` inline to /v1/judge instead of
|
|
161
|
+
* `rubricName` — see schemas.ts.
|
|
162
|
+
*/
|
|
163
|
+
|
|
164
|
+
declare const BUILTIN_RUBRICS: Record<string, Rubric>;
|
|
165
|
+
/** Get a built-in rubric by name, or undefined. */
|
|
166
|
+
declare function getBuiltinRubric(name: string): Rubric | undefined;
|
|
167
|
+
/** List built-in rubrics with their stable versions. */
|
|
168
|
+
declare function listBuiltinRubrics(): {
|
|
169
|
+
name: string;
|
|
170
|
+
description: string;
|
|
171
|
+
dimensions: {
|
|
172
|
+
id: string;
|
|
173
|
+
description: string;
|
|
174
|
+
weight: number;
|
|
175
|
+
}[];
|
|
176
|
+
failureModes: string[];
|
|
177
|
+
rubricVersion: string;
|
|
178
|
+
}[];
|
|
179
|
+
|
|
180
|
+
declare function buildOpenApi(packageVersion: string): OpenAPIObject;
|
|
181
|
+
|
|
182
|
+
declare function createApp(): Hono<hono_types.BlankEnv, hono_types.BlankSchema, "/">;
|
|
183
|
+
interface ServeOptions {
|
|
184
|
+
/** Default 5005. */
|
|
185
|
+
port?: number;
|
|
186
|
+
/** Default '127.0.0.1'. Set to '0.0.0.0' to listen on all interfaces. */
|
|
187
|
+
host?: string;
|
|
188
|
+
}
|
|
189
|
+
declare function startServer(opts?: ServeOptions): ServerType;
|
|
190
|
+
|
|
191
|
+
interface RpcRequest {
|
|
192
|
+
method: 'judge' | 'listRubrics' | 'version';
|
|
193
|
+
params?: unknown;
|
|
194
|
+
}
|
|
195
|
+
interface RpcSuccess {
|
|
196
|
+
result: unknown;
|
|
197
|
+
}
|
|
198
|
+
interface RpcError {
|
|
199
|
+
error: {
|
|
200
|
+
code: string;
|
|
201
|
+
message: string;
|
|
202
|
+
details?: unknown;
|
|
203
|
+
};
|
|
204
|
+
}
|
|
205
|
+
declare function dispatchRpc(req: RpcRequest): Promise<RpcSuccess | RpcError>;
|
|
206
|
+
/** Read one JSON request from stdin, write one JSON response to stdout. */
|
|
207
|
+
declare function runRpcOnce(method?: string): Promise<number>;
|
|
208
|
+
/** Read JSONL requests from stdin, write JSONL responses to stdout. */
|
|
209
|
+
declare function runRpcBatch(method?: string): Promise<number>;
|
|
210
|
+
|
|
211
|
+
export { BUILTIN_RUBRICS, type ErrorResponse, ErrorResponseSchema, type FailureMode, FailureModeSchema, HealthResponseSchema, type JudgeRequest, JudgeRequestSchema, type JudgeResult, JudgeResultSchema, type ListRubricsResponse, ListRubricsResponseSchema, type Rubric, type RubricDimension, RubricDimensionSchema, type RubricInfo, RubricInfoSchema, RubricSchema, type ServeOptions, type VersionResponse, VersionResponseSchema, WIRE_VERSION, WireError, buildOpenApi, createApp, dispatchRpc, getBuiltinRubric, handleJudge, handleListRubrics, handleVersion, hashRubric, listBuiltinRubrics, runRpcBatch, runRpcOnce, startServer };
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import {
|
|
2
|
+
BUILTIN_RUBRICS,
|
|
3
|
+
ErrorResponseSchema,
|
|
4
|
+
FailureModeSchema,
|
|
5
|
+
HealthResponseSchema,
|
|
6
|
+
JudgeRequestSchema,
|
|
7
|
+
JudgeResultSchema,
|
|
8
|
+
ListRubricsResponseSchema,
|
|
9
|
+
RubricDimensionSchema,
|
|
10
|
+
RubricInfoSchema,
|
|
11
|
+
RubricSchema,
|
|
12
|
+
VersionResponseSchema,
|
|
13
|
+
WIRE_VERSION,
|
|
14
|
+
WireError,
|
|
15
|
+
buildOpenApi,
|
|
16
|
+
createApp,
|
|
17
|
+
dispatchRpc,
|
|
18
|
+
getBuiltinRubric,
|
|
19
|
+
handleJudge,
|
|
20
|
+
handleListRubrics,
|
|
21
|
+
handleVersion,
|
|
22
|
+
hashRubric,
|
|
23
|
+
listBuiltinRubrics,
|
|
24
|
+
runRpcBatch,
|
|
25
|
+
runRpcOnce,
|
|
26
|
+
startServer
|
|
27
|
+
} from "../chunk-OZPRSK4A.js";
|
|
28
|
+
import "../chunk-ITN4YOZY.js";
|
|
29
|
+
export {
|
|
30
|
+
BUILTIN_RUBRICS,
|
|
31
|
+
ErrorResponseSchema,
|
|
32
|
+
FailureModeSchema,
|
|
33
|
+
HealthResponseSchema,
|
|
34
|
+
JudgeRequestSchema,
|
|
35
|
+
JudgeResultSchema,
|
|
36
|
+
ListRubricsResponseSchema,
|
|
37
|
+
RubricDimensionSchema,
|
|
38
|
+
RubricInfoSchema,
|
|
39
|
+
RubricSchema,
|
|
40
|
+
VersionResponseSchema,
|
|
41
|
+
WIRE_VERSION,
|
|
42
|
+
WireError,
|
|
43
|
+
buildOpenApi,
|
|
44
|
+
createApp,
|
|
45
|
+
dispatchRpc,
|
|
46
|
+
getBuiltinRubric,
|
|
47
|
+
handleJudge,
|
|
48
|
+
handleListRubrics,
|
|
49
|
+
handleVersion,
|
|
50
|
+
hashRubric,
|
|
51
|
+
listBuiltinRubrics,
|
|
52
|
+
runRpcBatch,
|
|
53
|
+
runRpcOnce,
|
|
54
|
+
startServer
|
|
55
|
+
};
|
|
56
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
|