@tangle-network/agent-eval 0.11.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +498 -4
- package/dist/index.js +786 -38
- package/dist/index.js.map +1 -1
- package/dist/sink-fetch-C0B8ximv.d.ts +101 -0
- package/dist/telemetry/file.d.ts +19 -0
- package/dist/telemetry/file.js +40 -0
- package/dist/telemetry/file.js.map +1 -0
- package/dist/telemetry/index.d.ts +38 -0
- package/dist/telemetry/index.js +128 -0
- package/dist/telemetry/index.js.map +1 -0
- package/package.json +18 -9
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fleet telemetry envelope — agent-eval's portable observability shape.
|
|
3
|
+
*
|
|
4
|
+
* Designed so any consumer (Node CLI, Cloudflare Worker, Lambda, browser
|
|
5
|
+
* extension) can emit structured rows describing one unit of work — a page
|
|
6
|
+
* audit, a tool call, an evolve round, a full agent run — to a central sink.
|
|
7
|
+
*
|
|
8
|
+
* The schema is intentionally a strict superset of agent-eval's `Run` shape
|
|
9
|
+
* so a future TraceStore adapter can promote envelopes into traces without
|
|
10
|
+
* translation.
|
|
11
|
+
*/
|
|
12
|
+
declare const TELEMETRY_SCHEMA_VERSION = 1;
|
|
13
|
+
/** Discriminator for the unit of work this envelope describes. */
|
|
14
|
+
type TelemetryKind = 'agent-run' | 'design-audit-page' | 'design-audit-run' | 'design-evolve-round' | 'design-evolve-run' | 'gepa-trial' | 'gepa-generation' | 'tool-call' | 'judge-verdict' | 'custom';
|
|
15
|
+
interface TelemetryEnvelope {
|
|
16
|
+
schemaVersion: typeof TELEMETRY_SCHEMA_VERSION;
|
|
17
|
+
envelopeId: string;
|
|
18
|
+
runId: string;
|
|
19
|
+
timestamp: string;
|
|
20
|
+
parentRunId?: string;
|
|
21
|
+
source: TelemetrySource;
|
|
22
|
+
model?: TelemetryModel;
|
|
23
|
+
kind: TelemetryKind;
|
|
24
|
+
ok: boolean;
|
|
25
|
+
durationMs: number;
|
|
26
|
+
data: Record<string, unknown>;
|
|
27
|
+
metrics: Record<string, number>;
|
|
28
|
+
tags?: Record<string, string>;
|
|
29
|
+
error?: string;
|
|
30
|
+
}
|
|
31
|
+
interface TelemetrySource {
|
|
32
|
+
/** Repo identity — basename of cwd plus git remote if discoverable. */
|
|
33
|
+
repo: string;
|
|
34
|
+
cwd: string;
|
|
35
|
+
gitSha?: string;
|
|
36
|
+
gitBranch?: string;
|
|
37
|
+
cliVersion: string;
|
|
38
|
+
/** What was invoked, e.g. `design-audit`, `bad run`, `gepa --target`. */
|
|
39
|
+
invocation: string;
|
|
40
|
+
/** Sanitised argv minus secrets. */
|
|
41
|
+
argv?: string[];
|
|
42
|
+
/**
|
|
43
|
+
* Multi-tenant identity. Set when the consumer runs inside a hosted
|
|
44
|
+
* product so a fleet rollup can group by tenant without leaking customer
|
|
45
|
+
* URLs or PII.
|
|
46
|
+
*/
|
|
47
|
+
tenantId?: string;
|
|
48
|
+
/** Optional sub-tenant identity (project, suite, walkthrough, customer). */
|
|
49
|
+
customerId?: string;
|
|
50
|
+
/** SHA-256 (12 hex) of the API key used to authenticate this run, when applicable. */
|
|
51
|
+
apiKeyHash?: string;
|
|
52
|
+
}
|
|
53
|
+
interface TelemetryModel {
|
|
54
|
+
provider: string;
|
|
55
|
+
name: string;
|
|
56
|
+
/** SHA-256 (12 hex chars) of the prompt(s) used. */
|
|
57
|
+
promptHash?: string;
|
|
58
|
+
/** SHA-256 (12 hex chars) of the composed rubric body, if applicable. */
|
|
59
|
+
rubricHash?: string;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Workers-safe telemetry sinks — only `fetch` and pure JS. No `fs`, no
|
|
64
|
+
* `child_process`. Safe to import from a Cloudflare Worker, Lambda, edge
|
|
65
|
+
* function, or browser extension.
|
|
66
|
+
*
|
|
67
|
+
* For Node-only file persistence, import from './sink-file' instead.
|
|
68
|
+
*/
|
|
69
|
+
|
|
70
|
+
interface TelemetrySink {
|
|
71
|
+
emit(envelope: TelemetryEnvelope): Promise<void> | void;
|
|
72
|
+
close?(): Promise<void> | void;
|
|
73
|
+
}
|
|
74
|
+
/** Best-effort POST to a remote collector. Fire-and-forget; never throws. */
|
|
75
|
+
declare class HttpTelemetrySink implements TelemetrySink {
|
|
76
|
+
private readonly endpoint;
|
|
77
|
+
private readonly bearer?;
|
|
78
|
+
private inflight;
|
|
79
|
+
constructor(endpoint: string, bearer?: string | undefined);
|
|
80
|
+
emit(envelope: TelemetryEnvelope): void;
|
|
81
|
+
close(): Promise<void>;
|
|
82
|
+
}
|
|
83
|
+
/** Fanout to multiple sinks — failures in one do not affect others. */
|
|
84
|
+
declare class FanoutTelemetrySink implements TelemetrySink {
|
|
85
|
+
private readonly sinks;
|
|
86
|
+
constructor(sinks: TelemetrySink[]);
|
|
87
|
+
emit(envelope: TelemetryEnvelope): void;
|
|
88
|
+
close(): Promise<void>;
|
|
89
|
+
}
|
|
90
|
+
/** No-op sink — used when telemetry is explicitly disabled. */
|
|
91
|
+
declare class NullTelemetrySink implements TelemetrySink {
|
|
92
|
+
emit(): void;
|
|
93
|
+
}
|
|
94
|
+
/** In-memory sink — useful for tests + downstream adapters. */
|
|
95
|
+
declare class InMemoryTelemetrySink implements TelemetrySink {
|
|
96
|
+
readonly envelopes: TelemetryEnvelope[];
|
|
97
|
+
emit(envelope: TelemetryEnvelope): void;
|
|
98
|
+
clear(): void;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
export { FanoutTelemetrySink as F, HttpTelemetrySink as H, InMemoryTelemetrySink as I, NullTelemetrySink as N, type TelemetryKind as T, type TelemetryModel as a, type TelemetrySource as b, type TelemetrySink as c, TELEMETRY_SCHEMA_VERSION as d, type TelemetryEnvelope as e };
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { c as TelemetrySink, e as TelemetryEnvelope } from '../sink-fetch-C0B8ximv.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Node-only file sink. Imports `node:fs` — DO NOT import this from a Worker
|
|
5
|
+
* or edge runtime; use `./sink-fetch` instead.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
/** Append envelopes to a JSONL file, partitioned by repo + date. */
|
|
9
|
+
declare class FileTelemetrySink implements TelemetrySink {
|
|
10
|
+
private readonly baseDir;
|
|
11
|
+
private streams;
|
|
12
|
+
constructor(baseDir: string);
|
|
13
|
+
emit(envelope: TelemetryEnvelope): void;
|
|
14
|
+
close(): Promise<void>;
|
|
15
|
+
}
|
|
16
|
+
/** Default location for local telemetry, mirroring bad CLI's convention. */
|
|
17
|
+
declare function defaultTelemetryDir(homeDir: string, override?: string): string;
|
|
18
|
+
|
|
19
|
+
export { FileTelemetrySink, defaultTelemetryDir };
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
// src/telemetry/sink-file.ts
|
|
2
|
+
import * as fs from "fs";
|
|
3
|
+
import * as path from "path";
|
|
4
|
+
var FileTelemetrySink = class {
|
|
5
|
+
constructor(baseDir) {
|
|
6
|
+
this.baseDir = baseDir;
|
|
7
|
+
fs.mkdirSync(baseDir, { recursive: true });
|
|
8
|
+
}
|
|
9
|
+
baseDir;
|
|
10
|
+
streams = /* @__PURE__ */ new Map();
|
|
11
|
+
emit(envelope) {
|
|
12
|
+
const date = envelope.timestamp.slice(0, 10);
|
|
13
|
+
const repo = envelope.source.repo || "unknown";
|
|
14
|
+
const key = `${repo}/${date}`;
|
|
15
|
+
let stream = this.streams.get(key);
|
|
16
|
+
if (!stream) {
|
|
17
|
+
const dir = path.join(this.baseDir, repo);
|
|
18
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
19
|
+
stream = fs.createWriteStream(path.join(dir, `${date}.jsonl`), { flags: "a", encoding: "utf-8" });
|
|
20
|
+
this.streams.set(key, stream);
|
|
21
|
+
}
|
|
22
|
+
stream.write(`${JSON.stringify(envelope)}
|
|
23
|
+
`);
|
|
24
|
+
}
|
|
25
|
+
async close() {
|
|
26
|
+
const closes = Array.from(this.streams.values()).map(
|
|
27
|
+
(s) => new Promise((resolve) => s.end(() => resolve()))
|
|
28
|
+
);
|
|
29
|
+
this.streams.clear();
|
|
30
|
+
await Promise.all(closes);
|
|
31
|
+
}
|
|
32
|
+
};
|
|
33
|
+
function defaultTelemetryDir(homeDir, override) {
|
|
34
|
+
return override || path.join(homeDir, ".agent-eval", "telemetry");
|
|
35
|
+
}
|
|
36
|
+
export {
|
|
37
|
+
FileTelemetrySink,
|
|
38
|
+
defaultTelemetryDir
|
|
39
|
+
};
|
|
40
|
+
//# sourceMappingURL=file.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/telemetry/sink-file.ts"],"sourcesContent":["/**\n * Node-only file sink. Imports `node:fs` — DO NOT import this from a Worker\n * or edge runtime; use `./sink-fetch` instead.\n */\n\nimport * as fs from 'node:fs'\nimport * as path from 'node:path'\nimport type { TelemetryEnvelope } from './schema'\nimport type { TelemetrySink } from './sink-fetch'\n\n/** Append envelopes to a JSONL file, partitioned by repo + date. */\nexport class FileTelemetrySink implements TelemetrySink {\n private streams = new Map<string, fs.WriteStream>()\n\n constructor(private readonly baseDir: string) {\n fs.mkdirSync(baseDir, { recursive: true })\n }\n\n emit(envelope: TelemetryEnvelope): void {\n const date = envelope.timestamp.slice(0, 10) // YYYY-MM-DD\n const repo = envelope.source.repo || 'unknown'\n const key = `${repo}/${date}`\n let stream = this.streams.get(key)\n if (!stream) {\n const dir = path.join(this.baseDir, repo)\n fs.mkdirSync(dir, { recursive: true })\n stream = fs.createWriteStream(path.join(dir, `${date}.jsonl`), { flags: 'a', encoding: 'utf-8' })\n this.streams.set(key, stream)\n }\n stream.write(`${JSON.stringify(envelope)}\\n`)\n }\n\n async close(): Promise<void> {\n const closes = Array.from(this.streams.values()).map(\n (s) => new Promise<void>((resolve) => s.end(() => resolve())),\n )\n this.streams.clear()\n await Promise.all(closes)\n }\n}\n\n/** Default location for local telemetry, mirroring bad CLI's convention. */\nexport function defaultTelemetryDir(homeDir: string, override?: string): string {\n return override || path.join(homeDir, '.agent-eval', 'telemetry')\n}\n"],"mappings":";AAKA,YAAY,QAAQ;AACpB,YAAY,UAAU;AAKf,IAAM,oBAAN,MAAiD;AAAA,EAGtD,YAA6B,SAAiB;AAAjB;AAC3B,IAAG,aAAU,SAAS,EAAE,WAAW,KAAK,CAAC;AAAA,EAC3C;AAAA,EAF6B;AAAA,EAFrB,UAAU,oBAAI,IAA4B;AAAA,EAMlD,KAAK,UAAmC;AACtC,UAAM,OAAO,SAAS,UAAU,MAAM,GAAG,EAAE;AAC3C,UAAM,OAAO,SAAS,OAAO,QAAQ;AACrC,UAAM,MAAM,GAAG,IAAI,IAAI,IAAI;AAC3B,QAAI,SAAS,KAAK,QAAQ,IAAI,GAAG;AACjC,QAAI,CAAC,QAAQ;AACX,YAAM,MAAW,UAAK,KAAK,SAAS,IAAI;AACxC,MAAG,aAAU,KAAK,EAAE,WAAW,KAAK,CAAC;AACrC,eAAY,qBAAuB,UAAK,KAAK,GAAG,IAAI,QAAQ,GAAG,EAAE,OAAO,KAAK,UAAU,QAAQ,CAAC;AAChG,WAAK,QAAQ,IAAI,KAAK,MAAM;AAAA,IAC9B;AACA,WAAO,MAAM,GAAG,KAAK,UAAU,QAAQ,CAAC;AAAA,CAAI;AAAA,EAC9C;AAAA,EAEA,MAAM,QAAuB;AAC3B,UAAM,SAAS,MAAM,KAAK,KAAK,QAAQ,OAAO,CAAC,EAAE;AAAA,MAC/C,CAAC,MAAM,IAAI,QAAc,CAAC,YAAY,EAAE,IAAI,MAAM,QAAQ,CAAC,CAAC;AAAA,IAC9D;AACA,SAAK,QAAQ,MAAM;AACnB,UAAM,QAAQ,IAAI,MAAM;AAAA,EAC1B;AACF;AAGO,SAAS,oBAAoB,SAAiB,UAA2B;AAC9E,SAAO,YAAiB,UAAK,SAAS,eAAe,WAAW;AAClE;","names":[]}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import { T as TelemetryKind, a as TelemetryModel, b as TelemetrySource, c as TelemetrySink } from '../sink-fetch-C0B8ximv.js';
|
|
2
|
+
export { F as FanoutTelemetrySink, H as HttpTelemetrySink, I as InMemoryTelemetrySink, N as NullTelemetrySink, d as TELEMETRY_SCHEMA_VERSION, e as TelemetryEnvelope } from '../sink-fetch-C0B8ximv.js';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Telemetry client — thin wrapper that builds envelopes from `EmitArgs` and
|
|
6
|
+
* delegates to a `TelemetrySink`. Pure logic; no I/O. Use this from any
|
|
7
|
+
* runtime — Workers, Node, browser — and choose the sink accordingly.
|
|
8
|
+
*
|
|
9
|
+
* For an opinionated singleton with env-var-driven sink wiring (the bad CLI
|
|
10
|
+
* pattern), see `./node-client.ts`.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
interface EmitArgs {
|
|
14
|
+
kind: TelemetryKind;
|
|
15
|
+
runId: string;
|
|
16
|
+
parentRunId?: string;
|
|
17
|
+
ok: boolean;
|
|
18
|
+
durationMs: number;
|
|
19
|
+
data?: Record<string, unknown>;
|
|
20
|
+
metrics?: Record<string, number>;
|
|
21
|
+
tags?: Record<string, string>;
|
|
22
|
+
model?: TelemetryModel;
|
|
23
|
+
error?: string;
|
|
24
|
+
/** Override the source for this envelope. Falls back to `defaultSource`. */
|
|
25
|
+
source?: TelemetrySource;
|
|
26
|
+
}
|
|
27
|
+
declare class TelemetryClient {
|
|
28
|
+
private readonly sink;
|
|
29
|
+
private readonly defaultSource;
|
|
30
|
+
constructor(sink: TelemetrySink, defaultSource: TelemetrySource);
|
|
31
|
+
emit(args: EmitArgs): void;
|
|
32
|
+
close(): Promise<void>;
|
|
33
|
+
}
|
|
34
|
+
declare const SECRET_FLAGS: Set<string>;
|
|
35
|
+
/** Strip likely-secret values from argv, preserving structure. */
|
|
36
|
+
declare function sanitiseArgv(argv: string[]): string[];
|
|
37
|
+
|
|
38
|
+
export { type EmitArgs, SECRET_FLAGS, TelemetryClient, TelemetryKind, TelemetryModel, TelemetrySink, TelemetrySource, sanitiseArgv };
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
// src/telemetry/schema.ts
|
|
2
|
+
var TELEMETRY_SCHEMA_VERSION = 1;
|
|
3
|
+
|
|
4
|
+
// src/telemetry/sink-fetch.ts
|
|
5
|
+
var HttpTelemetrySink = class {
|
|
6
|
+
constructor(endpoint, bearer) {
|
|
7
|
+
this.endpoint = endpoint;
|
|
8
|
+
this.bearer = bearer;
|
|
9
|
+
}
|
|
10
|
+
endpoint;
|
|
11
|
+
bearer;
|
|
12
|
+
inflight = /* @__PURE__ */ new Set();
|
|
13
|
+
emit(envelope) {
|
|
14
|
+
const body = JSON.stringify(envelope);
|
|
15
|
+
const headers = { "content-type": "application/json" };
|
|
16
|
+
if (this.bearer) headers.authorization = `Bearer ${this.bearer}`;
|
|
17
|
+
const promise = fetch(this.endpoint, { method: "POST", headers, body }).then(() => void 0).catch(() => void 0);
|
|
18
|
+
this.inflight.add(promise);
|
|
19
|
+
promise.finally(() => this.inflight.delete(promise));
|
|
20
|
+
}
|
|
21
|
+
async close() {
|
|
22
|
+
await Promise.allSettled(Array.from(this.inflight));
|
|
23
|
+
}
|
|
24
|
+
};
|
|
25
|
+
var FanoutTelemetrySink = class {
|
|
26
|
+
constructor(sinks) {
|
|
27
|
+
this.sinks = sinks;
|
|
28
|
+
}
|
|
29
|
+
sinks;
|
|
30
|
+
emit(envelope) {
|
|
31
|
+
for (const sink of this.sinks) {
|
|
32
|
+
try {
|
|
33
|
+
const result = sink.emit(envelope);
|
|
34
|
+
if (result && typeof result.catch === "function") {
|
|
35
|
+
;
|
|
36
|
+
result.catch(() => void 0);
|
|
37
|
+
}
|
|
38
|
+
} catch {
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
async close() {
|
|
43
|
+
await Promise.allSettled(this.sinks.map((s) => Promise.resolve(s.close?.())));
|
|
44
|
+
}
|
|
45
|
+
};
|
|
46
|
+
var NullTelemetrySink = class {
|
|
47
|
+
emit() {
|
|
48
|
+
}
|
|
49
|
+
};
|
|
50
|
+
var InMemoryTelemetrySink = class {
|
|
51
|
+
envelopes = [];
|
|
52
|
+
emit(envelope) {
|
|
53
|
+
this.envelopes.push(envelope);
|
|
54
|
+
}
|
|
55
|
+
clear() {
|
|
56
|
+
this.envelopes.length = 0;
|
|
57
|
+
}
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
// src/telemetry/client.ts
|
|
61
|
+
var TelemetryClient = class {
|
|
62
|
+
constructor(sink, defaultSource) {
|
|
63
|
+
this.sink = sink;
|
|
64
|
+
this.defaultSource = defaultSource;
|
|
65
|
+
}
|
|
66
|
+
sink;
|
|
67
|
+
defaultSource;
|
|
68
|
+
emit(args) {
|
|
69
|
+
const envelope = {
|
|
70
|
+
schemaVersion: TELEMETRY_SCHEMA_VERSION,
|
|
71
|
+
envelopeId: makeEnvelopeId(),
|
|
72
|
+
runId: args.runId,
|
|
73
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
74
|
+
source: args.source ?? this.defaultSource,
|
|
75
|
+
kind: args.kind,
|
|
76
|
+
ok: args.ok,
|
|
77
|
+
durationMs: args.durationMs,
|
|
78
|
+
data: args.data ?? {},
|
|
79
|
+
metrics: args.metrics ?? {},
|
|
80
|
+
...args.parentRunId ? { parentRunId: args.parentRunId } : {},
|
|
81
|
+
...args.model ? { model: args.model } : {},
|
|
82
|
+
...args.tags ? { tags: args.tags } : {},
|
|
83
|
+
...args.error ? { error: args.error } : {}
|
|
84
|
+
};
|
|
85
|
+
try {
|
|
86
|
+
this.sink.emit(envelope);
|
|
87
|
+
} catch {
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
async close() {
|
|
91
|
+
await this.sink.close?.();
|
|
92
|
+
}
|
|
93
|
+
};
|
|
94
|
+
function makeEnvelopeId() {
|
|
95
|
+
if (typeof crypto !== "undefined" && typeof crypto.randomUUID === "function") {
|
|
96
|
+
return crypto.randomUUID();
|
|
97
|
+
}
|
|
98
|
+
return "env-" + Date.now().toString(36) + "-" + Math.random().toString(36).slice(2, 10);
|
|
99
|
+
}
|
|
100
|
+
var SECRET_FLAGS = /* @__PURE__ */ new Set(["--api-key", "--bearer", "--token", "--password"]);
|
|
101
|
+
function sanitiseArgv(argv) {
|
|
102
|
+
const out = [];
|
|
103
|
+
for (let i = 0; i < argv.length; i++) {
|
|
104
|
+
const a = argv[i];
|
|
105
|
+
if (SECRET_FLAGS.has(a)) {
|
|
106
|
+
out.push(a, "<redacted>");
|
|
107
|
+
i++;
|
|
108
|
+
continue;
|
|
109
|
+
}
|
|
110
|
+
if (/^(?:--api-key|--bearer|--token|--password)=/.test(a)) {
|
|
111
|
+
out.push(a.replace(/=.*$/, "=<redacted>"));
|
|
112
|
+
continue;
|
|
113
|
+
}
|
|
114
|
+
out.push(a);
|
|
115
|
+
}
|
|
116
|
+
return out;
|
|
117
|
+
}
|
|
118
|
+
export {
|
|
119
|
+
FanoutTelemetrySink,
|
|
120
|
+
HttpTelemetrySink,
|
|
121
|
+
InMemoryTelemetrySink,
|
|
122
|
+
NullTelemetrySink,
|
|
123
|
+
SECRET_FLAGS,
|
|
124
|
+
TELEMETRY_SCHEMA_VERSION,
|
|
125
|
+
TelemetryClient,
|
|
126
|
+
sanitiseArgv
|
|
127
|
+
};
|
|
128
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/telemetry/schema.ts","../../src/telemetry/sink-fetch.ts","../../src/telemetry/client.ts"],"sourcesContent":["/**\n * Fleet telemetry envelope — agent-eval's portable observability shape.\n *\n * Designed so any consumer (Node CLI, Cloudflare Worker, Lambda, browser\n * extension) can emit structured rows describing one unit of work — a page\n * audit, a tool call, an evolve round, a full agent run — to a central sink.\n *\n * The schema is intentionally a strict superset of agent-eval's `Run` shape\n * so a future TraceStore adapter can promote envelopes into traces without\n * translation.\n */\n\nexport const TELEMETRY_SCHEMA_VERSION = 1\n\n/** Discriminator for the unit of work this envelope describes. */\nexport type TelemetryKind =\n | 'agent-run'\n | 'design-audit-page'\n | 'design-audit-run'\n | 'design-evolve-round'\n | 'design-evolve-run'\n | 'gepa-trial'\n | 'gepa-generation'\n | 'tool-call'\n | 'judge-verdict'\n | 'custom'\n\nexport interface TelemetryEnvelope {\n schemaVersion: typeof TELEMETRY_SCHEMA_VERSION\n envelopeId: string\n runId: string\n timestamp: string\n parentRunId?: string\n\n source: TelemetrySource\n model?: TelemetryModel\n kind: TelemetryKind\n ok: boolean\n durationMs: number\n\n data: Record<string, unknown>\n metrics: Record<string, number>\n tags?: Record<string, string>\n\n error?: string\n}\n\nexport interface TelemetrySource {\n /** Repo identity — basename of cwd plus git remote if discoverable. */\n repo: string\n cwd: string\n gitSha?: string\n gitBranch?: string\n cliVersion: string\n /** What was invoked, e.g. `design-audit`, `bad run`, `gepa --target`. */\n invocation: string\n /** Sanitised argv minus secrets. */\n argv?: string[]\n /**\n * Multi-tenant identity. Set when the consumer runs inside a hosted\n * product so a fleet rollup can group by tenant without leaking customer\n * URLs or PII.\n */\n tenantId?: string\n /** Optional sub-tenant identity (project, suite, walkthrough, customer). */\n customerId?: string\n /** SHA-256 (12 hex) of the API key used to authenticate this run, when applicable. */\n apiKeyHash?: string\n}\n\nexport interface TelemetryModel {\n provider: string\n name: string\n /** SHA-256 (12 hex chars) of the prompt(s) used. */\n promptHash?: string\n /** SHA-256 (12 hex chars) of the composed rubric body, if applicable. */\n rubricHash?: string\n}\n","/**\n * Workers-safe telemetry sinks — only `fetch` and pure JS. No `fs`, no\n * `child_process`. Safe to import from a Cloudflare Worker, Lambda, edge\n * function, or browser extension.\n *\n * For Node-only file persistence, import from './sink-file' instead.\n */\n\nimport type { TelemetryEnvelope } from './schema'\n\nexport interface TelemetrySink {\n emit(envelope: TelemetryEnvelope): Promise<void> | void\n close?(): Promise<void> | void\n}\n\n/** Best-effort POST to a remote collector. Fire-and-forget; never throws. */\nexport class HttpTelemetrySink implements TelemetrySink {\n private inflight = new Set<Promise<void>>()\n\n constructor(\n private readonly endpoint: string,\n private readonly bearer?: string,\n ) {}\n\n emit(envelope: TelemetryEnvelope): void {\n const body = JSON.stringify(envelope)\n const headers: Record<string, string> = { 'content-type': 'application/json' }\n if (this.bearer) headers.authorization = `Bearer ${this.bearer}`\n const promise = fetch(this.endpoint, { method: 'POST', headers, body })\n .then(() => undefined)\n .catch(() => undefined)\n this.inflight.add(promise)\n promise.finally(() => this.inflight.delete(promise))\n }\n\n async close(): Promise<void> {\n await Promise.allSettled(Array.from(this.inflight))\n }\n}\n\n/** Fanout to multiple sinks — failures in one do not affect others. */\nexport class FanoutTelemetrySink implements TelemetrySink {\n constructor(private readonly sinks: TelemetrySink[]) {}\n\n emit(envelope: TelemetryEnvelope): void {\n for (const sink of this.sinks) {\n try {\n const result = sink.emit(envelope)\n if (result && typeof (result as Promise<unknown>).catch === 'function') {\n ;(result as Promise<unknown>).catch(() => undefined)\n }\n } catch {\n // swallow — telemetry must never break a run\n }\n }\n }\n\n async close(): Promise<void> {\n await Promise.allSettled(this.sinks.map((s) => Promise.resolve(s.close?.())))\n }\n}\n\n/** No-op sink — used when telemetry is explicitly disabled. */\nexport class NullTelemetrySink implements TelemetrySink {\n emit(): void {}\n}\n\n/** In-memory sink — useful for tests + downstream adapters. */\nexport class InMemoryTelemetrySink implements TelemetrySink {\n readonly envelopes: TelemetryEnvelope[] = []\n emit(envelope: TelemetryEnvelope): void {\n this.envelopes.push(envelope)\n }\n clear(): void { this.envelopes.length = 0 }\n}\n","/**\n * Telemetry client — thin wrapper that builds envelopes from `EmitArgs` and\n * delegates to a `TelemetrySink`. Pure logic; no I/O. Use this from any\n * runtime — Workers, Node, browser — and choose the sink accordingly.\n *\n * For an opinionated singleton with env-var-driven sink wiring (the bad CLI\n * pattern), see `./node-client.ts`.\n */\n\nimport type { TelemetryEnvelope, TelemetryKind, TelemetryModel, TelemetrySource } from './schema'\nimport { TELEMETRY_SCHEMA_VERSION } from './schema'\nimport type { TelemetrySink } from './sink-fetch'\n\nexport interface EmitArgs {\n kind: TelemetryKind\n runId: string\n parentRunId?: string\n ok: boolean\n durationMs: number\n data?: Record<string, unknown>\n metrics?: Record<string, number>\n tags?: Record<string, string>\n model?: TelemetryModel\n error?: string\n /** Override the source for this envelope. Falls back to `defaultSource`. */\n source?: TelemetrySource\n}\n\nexport class TelemetryClient {\n constructor(\n private readonly sink: TelemetrySink,\n private readonly defaultSource: TelemetrySource,\n ) {}\n\n emit(args: EmitArgs): void {\n const envelope: TelemetryEnvelope = {\n schemaVersion: TELEMETRY_SCHEMA_VERSION,\n envelopeId: makeEnvelopeId(),\n runId: args.runId,\n timestamp: new Date().toISOString(),\n source: args.source ?? this.defaultSource,\n kind: args.kind,\n ok: args.ok,\n durationMs: args.durationMs,\n data: args.data ?? {},\n metrics: args.metrics ?? {},\n ...(args.parentRunId ? { parentRunId: args.parentRunId } : {}),\n ...(args.model ? { model: args.model } : {}),\n ...(args.tags ? { tags: args.tags } : {}),\n ...(args.error ? { error: args.error } : {}),\n }\n try {\n this.sink.emit(envelope)\n } catch {\n // swallow — telemetry never breaks the calling code path\n }\n }\n\n async close(): Promise<void> {\n await this.sink.close?.()\n }\n}\n\n/** Generate a UUIDv4 with whatever crypto is available (Node, Workers, browsers). */\nfunction makeEnvelopeId(): string {\n if (typeof crypto !== 'undefined' && typeof crypto.randomUUID === 'function') {\n return crypto.randomUUID()\n }\n // Last-resort fallback. Lower entropy but never throws.\n return 'env-' + Date.now().toString(36) + '-' + Math.random().toString(36).slice(2, 10)\n}\n\nexport const SECRET_FLAGS = new Set(['--api-key', '--bearer', '--token', '--password'])\n\n/** Strip likely-secret values from argv, preserving structure. */\nexport function sanitiseArgv(argv: string[]): string[] {\n const out: string[] = []\n for (let i = 0; i < argv.length; i++) {\n const a = argv[i]!\n if (SECRET_FLAGS.has(a)) {\n out.push(a, '<redacted>')\n i++\n continue\n }\n if (/^(?:--api-key|--bearer|--token|--password)=/.test(a)) {\n out.push(a.replace(/=.*$/, '=<redacted>'))\n continue\n }\n out.push(a)\n }\n return out\n}\n"],"mappings":";AAYO,IAAM,2BAA2B;;;ACIjC,IAAM,oBAAN,MAAiD;AAAA,EAGtD,YACmB,UACA,QACjB;AAFiB;AACA;AAAA,EAChB;AAAA,EAFgB;AAAA,EACA;AAAA,EAJX,WAAW,oBAAI,IAAmB;AAAA,EAO1C,KAAK,UAAmC;AACtC,UAAM,OAAO,KAAK,UAAU,QAAQ;AACpC,UAAM,UAAkC,EAAE,gBAAgB,mBAAmB;AAC7E,QAAI,KAAK,OAAQ,SAAQ,gBAAgB,UAAU,KAAK,MAAM;AAC9D,UAAM,UAAU,MAAM,KAAK,UAAU,EAAE,QAAQ,QAAQ,SAAS,KAAK,CAAC,EACnE,KAAK,MAAM,MAAS,EACpB,MAAM,MAAM,MAAS;AACxB,SAAK,SAAS,IAAI,OAAO;AACzB,YAAQ,QAAQ,MAAM,KAAK,SAAS,OAAO,OAAO,CAAC;AAAA,EACrD;AAAA,EAEA,MAAM,QAAuB;AAC3B,UAAM,QAAQ,WAAW,MAAM,KAAK,KAAK,QAAQ,CAAC;AAAA,EACpD;AACF;AAGO,IAAM,sBAAN,MAAmD;AAAA,EACxD,YAA6B,OAAwB;AAAxB;AAAA,EAAyB;AAAA,EAAzB;AAAA,EAE7B,KAAK,UAAmC;AACtC,eAAW,QAAQ,KAAK,OAAO;AAC7B,UAAI;AACF,cAAM,SAAS,KAAK,KAAK,QAAQ;AACjC,YAAI,UAAU,OAAQ,OAA4B,UAAU,YAAY;AACtE;AAAC,UAAC,OAA4B,MAAM,MAAM,MAAS;AAAA,QACrD;AAAA,MACF,QAAQ;AAAA,MAER;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,QAAuB;AAC3B,UAAM,QAAQ,WAAW,KAAK,MAAM,IAAI,CAAC,MAAM,QAAQ,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAAA,EAC9E;AACF;AAGO,IAAM,oBAAN,MAAiD;AAAA,EACtD,OAAa;AAAA,EAAC;AAChB;AAGO,IAAM,wBAAN,MAAqD;AAAA,EACjD,YAAiC,CAAC;AAAA,EAC3C,KAAK,UAAmC;AACtC,SAAK,UAAU,KAAK,QAAQ;AAAA,EAC9B;AAAA,EACA,QAAc;AAAE,SAAK,UAAU,SAAS;AAAA,EAAE;AAC5C;;;AC9CO,IAAM,kBAAN,MAAsB;AAAA,EAC3B,YACmB,MACA,eACjB;AAFiB;AACA;AAAA,EAChB;AAAA,EAFgB;AAAA,EACA;AAAA,EAGnB,KAAK,MAAsB;AACzB,UAAM,WAA8B;AAAA,MAClC,eAAe;AAAA,MACf,YAAY,eAAe;AAAA,MAC3B,OAAO,KAAK;AAAA,MACZ,YAAW,oBAAI,KAAK,GAAE,YAAY;AAAA,MAClC,QAAQ,KAAK,UAAU,KAAK;AAAA,MAC5B,MAAM,KAAK;AAAA,MACX,IAAI,KAAK;AAAA,MACT,YAAY,KAAK;AAAA,MACjB,MAAM,KAAK,QAAQ,CAAC;AAAA,MACpB,SAAS,KAAK,WAAW,CAAC;AAAA,MAC1B,GAAI,KAAK,cAAc,EAAE,aAAa,KAAK,YAAY,IAAI,CAAC;AAAA,MAC5D,GAAI,KAAK,QAAQ,EAAE,OAAO,KAAK,MAAM,IAAI,CAAC;AAAA,MAC1C,GAAI,KAAK,OAAO,EAAE,MAAM,KAAK,KAAK,IAAI,CAAC;AAAA,MACvC,GAAI,KAAK,QAAQ,EAAE,OAAO,KAAK,MAAM,IAAI,CAAC;AAAA,IAC5C;AACA,QAAI;AACF,WAAK,KAAK,KAAK,QAAQ;AAAA,IACzB,QAAQ;AAAA,IAER;AAAA,EACF;AAAA,EAEA,MAAM,QAAuB;AAC3B,UAAM,KAAK,KAAK,QAAQ;AAAA,EAC1B;AACF;AAGA,SAAS,iBAAyB;AAChC,MAAI,OAAO,WAAW,eAAe,OAAO,OAAO,eAAe,YAAY;AAC5E,WAAO,OAAO,WAAW;AAAA,EAC3B;AAEA,SAAO,SAAS,KAAK,IAAI,EAAE,SAAS,EAAE,IAAI,MAAM,KAAK,OAAO,EAAE,SAAS,EAAE,EAAE,MAAM,GAAG,EAAE;AACxF;AAEO,IAAM,eAAe,oBAAI,IAAI,CAAC,aAAa,YAAY,WAAW,YAAY,CAAC;AAG/E,SAAS,aAAa,MAA0B;AACrD,QAAM,MAAgB,CAAC;AACvB,WAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;AACpC,UAAM,IAAI,KAAK,CAAC;AAChB,QAAI,aAAa,IAAI,CAAC,GAAG;AACvB,UAAI,KAAK,GAAG,YAAY;AACxB;AACA;AAAA,IACF;AACA,QAAI,8CAA8C,KAAK,CAAC,GAAG;AACzD,UAAI,KAAK,EAAE,QAAQ,QAAQ,aAAa,CAAC;AACzC;AAAA,IACF;AACA,QAAI,KAAK,CAAC;AAAA,EACZ;AACA,SAAO;AACT;","names":[]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tangle-network/agent-eval",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.12.0",
|
|
4
4
|
"description": "Trace-first evaluation framework for Tangle agents. Core (spans, pipelines, sandbox harness, OTLP export), trust (dataset, red-team, calibration, behavior DSL), builder-of-builders (three-layer eval, resumable sessions, meta-runtime correlation), and frontier (meta-eval correlation study, Process Reward Modeling, bisector).",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -10,6 +10,16 @@
|
|
|
10
10
|
"types": "./dist/index.d.ts",
|
|
11
11
|
"import": "./dist/index.js",
|
|
12
12
|
"default": "./dist/index.js"
|
|
13
|
+
},
|
|
14
|
+
"./telemetry": {
|
|
15
|
+
"types": "./dist/telemetry/index.d.ts",
|
|
16
|
+
"import": "./dist/telemetry/index.js",
|
|
17
|
+
"default": "./dist/telemetry/index.js"
|
|
18
|
+
},
|
|
19
|
+
"./telemetry/file": {
|
|
20
|
+
"types": "./dist/telemetry/file.d.ts",
|
|
21
|
+
"import": "./dist/telemetry/file.js",
|
|
22
|
+
"default": "./dist/telemetry/file.js"
|
|
13
23
|
}
|
|
14
24
|
},
|
|
15
25
|
"files": [
|
|
@@ -18,13 +28,6 @@
|
|
|
18
28
|
"publishConfig": {
|
|
19
29
|
"access": "public"
|
|
20
30
|
},
|
|
21
|
-
"scripts": {
|
|
22
|
-
"build": "tsup",
|
|
23
|
-
"dev": "tsup --watch",
|
|
24
|
-
"test": "vitest run",
|
|
25
|
-
"test:watch": "vitest",
|
|
26
|
-
"typecheck": "tsc --noEmit"
|
|
27
|
-
},
|
|
28
31
|
"dependencies": {
|
|
29
32
|
"@ax-llm/ax": "^19.0.25",
|
|
30
33
|
"@tangle-network/tcloud": "^0.2.0"
|
|
@@ -39,5 +42,11 @@
|
|
|
39
42
|
"node": ">=20"
|
|
40
43
|
},
|
|
41
44
|
"license": "MIT",
|
|
42
|
-
"
|
|
45
|
+
"scripts": {
|
|
46
|
+
"build": "tsup",
|
|
47
|
+
"dev": "tsup --watch",
|
|
48
|
+
"test": "vitest run",
|
|
49
|
+
"test:watch": "vitest",
|
|
50
|
+
"typecheck": "tsc --noEmit"
|
|
51
|
+
}
|
|
43
52
|
}
|