@tangle-network/agent-eval 0.14.2 → 0.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,5 @@
1
+ import "../chunk-PZ5AY32C.js";
2
+
1
3
  // src/telemetry/sink-file.ts
2
4
  import * as fs from "fs";
3
5
  import * as path from "path";
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/telemetry/sink-file.ts"],"sourcesContent":["/**\n * Node-only file sink. Imports `node:fs` — DO NOT import this from a Worker\n * or edge runtime; use `./sink-fetch` instead.\n */\n\nimport * as fs from 'node:fs'\nimport * as path from 'node:path'\nimport type { TelemetryEnvelope } from './schema'\nimport type { TelemetrySink } from './sink-fetch'\n\n/** Append envelopes to a JSONL file, partitioned by repo + date. */\nexport class FileTelemetrySink implements TelemetrySink {\n private streams = new Map<string, fs.WriteStream>()\n\n constructor(private readonly baseDir: string) {\n fs.mkdirSync(baseDir, { recursive: true })\n }\n\n emit(envelope: TelemetryEnvelope): void {\n const date = envelope.timestamp.slice(0, 10) // YYYY-MM-DD\n const repo = envelope.source.repo || 'unknown'\n const key = `${repo}/${date}`\n let stream = this.streams.get(key)\n if (!stream) {\n const dir = path.join(this.baseDir, repo)\n fs.mkdirSync(dir, { recursive: true })\n stream = fs.createWriteStream(path.join(dir, `${date}.jsonl`), { flags: 'a', encoding: 'utf-8' })\n this.streams.set(key, stream)\n }\n stream.write(`${JSON.stringify(envelope)}\\n`)\n }\n\n async close(): Promise<void> {\n const closes = Array.from(this.streams.values()).map(\n (s) => new Promise<void>((resolve) => s.end(() => resolve())),\n )\n this.streams.clear()\n await Promise.all(closes)\n }\n}\n\n/** Default location for local telemetry, mirroring bad CLI's convention. */\nexport function defaultTelemetryDir(homeDir: string, override?: string): string {\n return override || path.join(homeDir, '.agent-eval', 'telemetry')\n}\n"],"mappings":";AAKA,YAAY,QAAQ;AACpB,YAAY,UAAU;AAKf,IAAM,oBAAN,MAAiD;AAAA,EAGtD,YAA6B,SAAiB;AAAjB;AAC3B,IAAG,aAAU,SAAS,EAAE,WAAW,KAAK,CAAC;AAAA,EAC3C;AAAA,EAF6B;AAAA,EAFrB,UAAU,oBAAI,IAA4B;AAAA,EAMlD,KAAK,UAAmC;AACtC,UAAM,OAAO,SAAS,UAAU,MAAM,GAAG,EAAE;AAC3C,UAAM,OAAO,SAAS,OAAO,QAAQ;AACrC,UAAM,MAAM,GAAG,IAAI,IAAI,IAAI;AAC3B,QAAI,SAAS,KAAK,QAAQ,IAAI,GAAG;AACjC,QAAI,CAAC,QAAQ;AACX,YAAM,MAAW,UAAK,KAAK,SAAS,IAAI;AACxC,MAAG,aAAU,KAAK,EAAE,WAAW,KAAK,CAAC;AACrC,eAAY,qBAAuB,UAAK,KAAK,GAAG,IAAI,QAAQ,GAAG,EAAE,OAAO,KAAK,UAAU,QAAQ,CAAC;AAChG,WAAK,QAAQ,IAAI,KAAK,MAAM;AAAA,IAC9B;AACA,WAAO,MAAM,GAAG,KAAK,UAAU,QAAQ,CAAC;AAAA,CAAI;AAAA,EAC9C;AAAA,EAEA,MAAM,QAAuB;AAC3B,UAAM,SAAS,MAAM,KAAK,KAAK,QAAQ,OAAO,CAAC,EAAE;AAAA,MAC/C,CAAC,MAAM,IAAI,QAAc,CAAC,YAAY,EAAE,IAAI,MAAM,QAAQ,CAAC,CAAC;AAAA,IAC9D;AACA,SAAK,QAAQ,MAAM;AACnB,UAAM,QAAQ,IAAI,MAAM;AAAA,EAC1B;AACF;AAGO,SAAS,oBAAoB,SAAiB,UAA2B;AAC9E,SAAO,YAAiB,UAAK,SAAS,eAAe,WAAW;AAClE;","names":[]}
1
+ {"version":3,"sources":["../../src/telemetry/sink-file.ts"],"sourcesContent":["/**\n * Node-only file sink. Imports `node:fs` — DO NOT import this from a Worker\n * or edge runtime; use `./sink-fetch` instead.\n */\n\nimport * as fs from 'node:fs'\nimport * as path from 'node:path'\nimport type { TelemetryEnvelope } from './schema'\nimport type { TelemetrySink } from './sink-fetch'\n\n/** Append envelopes to a JSONL file, partitioned by repo + date. */\nexport class FileTelemetrySink implements TelemetrySink {\n private streams = new Map<string, fs.WriteStream>()\n\n constructor(private readonly baseDir: string) {\n fs.mkdirSync(baseDir, { recursive: true })\n }\n\n emit(envelope: TelemetryEnvelope): void {\n const date = envelope.timestamp.slice(0, 10) // YYYY-MM-DD\n const repo = envelope.source.repo || 'unknown'\n const key = `${repo}/${date}`\n let stream = this.streams.get(key)\n if (!stream) {\n const dir = path.join(this.baseDir, repo)\n fs.mkdirSync(dir, { recursive: true })\n stream = fs.createWriteStream(path.join(dir, `${date}.jsonl`), { flags: 'a', encoding: 'utf-8' })\n this.streams.set(key, stream)\n }\n stream.write(`${JSON.stringify(envelope)}\\n`)\n }\n\n async close(): Promise<void> {\n const closes = Array.from(this.streams.values()).map(\n (s) => new Promise<void>((resolve) => s.end(() => resolve())),\n )\n this.streams.clear()\n await Promise.all(closes)\n }\n}\n\n/** Default location for local telemetry, mirroring bad CLI's convention. */\nexport function defaultTelemetryDir(homeDir: string, override?: string): string {\n return override || path.join(homeDir, '.agent-eval', 'telemetry')\n}\n"],"mappings":";;;AAKA,YAAY,QAAQ;AACpB,YAAY,UAAU;AAKf,IAAM,oBAAN,MAAiD;AAAA,EAGtD,YAA6B,SAAiB;AAAjB;AAC3B,IAAG,aAAU,SAAS,EAAE,WAAW,KAAK,CAAC;AAAA,EAC3C;AAAA,EAF6B;AAAA,EAFrB,UAAU,oBAAI,IAA4B;AAAA,EAMlD,KAAK,UAAmC;AACtC,UAAM,OAAO,SAAS,UAAU,MAAM,GAAG,EAAE;AAC3C,UAAM,OAAO,SAAS,OAAO,QAAQ;AACrC,UAAM,MAAM,GAAG,IAAI,IAAI,IAAI;AAC3B,QAAI,SAAS,KAAK,QAAQ,IAAI,GAAG;AACjC,QAAI,CAAC,QAAQ;AACX,YAAM,MAAW,UAAK,KAAK,SAAS,IAAI;AACxC,MAAG,aAAU,KAAK,EAAE,WAAW,KAAK,CAAC;AACrC,eAAY,qBAAuB,UAAK,KAAK,GAAG,IAAI,QAAQ,GAAG,EAAE,OAAO,KAAK,UAAU,QAAQ,CAAC;AAChG,WAAK,QAAQ,IAAI,KAAK,MAAM;AAAA,IAC9B;AACA,WAAO,MAAM,GAAG,KAAK,UAAU,QAAQ,CAAC;AAAA,CAAI;AAAA,EAC9C;AAAA,EAEA,MAAM,QAAuB;AAC3B,UAAM,SAAS,MAAM,KAAK,KAAK,QAAQ,OAAO,CAAC,EAAE;AAAA,MAC/C,CAAC,MAAM,IAAI,QAAc,CAAC,YAAY,EAAE,IAAI,MAAM,QAAQ,CAAC,CAAC;AAAA,IAC9D;AACA,SAAK,QAAQ,MAAM;AACnB,UAAM,QAAQ,IAAI,MAAM;AAAA,EAC1B;AACF;AAGO,SAAS,oBAAoB,SAAiB,UAA2B;AAC9E,SAAO,YAAiB,UAAK,SAAS,eAAe,WAAW;AAClE;","names":[]}
@@ -1,3 +1,5 @@
1
+ import "../chunk-PZ5AY32C.js";
2
+
1
3
  // src/telemetry/schema.ts
2
4
  var TELEMETRY_SCHEMA_VERSION = 1;
3
5
 
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/telemetry/schema.ts","../../src/telemetry/sink-fetch.ts","../../src/telemetry/client.ts"],"sourcesContent":["/**\n * Fleet telemetry envelope — agent-eval's portable observability shape.\n *\n * Designed so any consumer (Node CLI, Cloudflare Worker, Lambda, browser\n * extension) can emit structured rows describing one unit of work — a page\n * audit, a tool call, an evolve round, a full agent run — to a central sink.\n *\n * The schema is intentionally a strict superset of agent-eval's `Run` shape\n * so a future TraceStore adapter can promote envelopes into traces without\n * translation.\n */\n\nexport const TELEMETRY_SCHEMA_VERSION = 1\n\n/** Discriminator for the unit of work this envelope describes. */\nexport type TelemetryKind =\n | 'agent-run'\n | 'design-audit-page'\n | 'design-audit-run'\n | 'design-evolve-round'\n | 'design-evolve-run'\n | 'gepa-trial'\n | 'gepa-generation'\n | 'tool-call'\n | 'judge-verdict'\n | 'custom'\n\nexport interface TelemetryEnvelope {\n schemaVersion: typeof TELEMETRY_SCHEMA_VERSION\n envelopeId: string\n runId: string\n timestamp: string\n parentRunId?: string\n\n source: TelemetrySource\n model?: TelemetryModel\n kind: TelemetryKind\n ok: boolean\n durationMs: number\n\n data: Record<string, unknown>\n metrics: Record<string, number>\n tags?: Record<string, string>\n\n error?: string\n}\n\nexport interface TelemetrySource {\n /** Repo identity — basename of cwd plus git remote if discoverable. */\n repo: string\n cwd: string\n gitSha?: string\n gitBranch?: string\n cliVersion: string\n /** What was invoked, e.g. `design-audit`, `bad run`, `gepa --target`. */\n invocation: string\n /** Sanitised argv minus secrets. */\n argv?: string[]\n /**\n * Multi-tenant identity. Set when the consumer runs inside a hosted\n * product so a fleet rollup can group by tenant without leaking customer\n * URLs or PII.\n */\n tenantId?: string\n /** Optional sub-tenant identity (project, suite, walkthrough, customer). */\n customerId?: string\n /** SHA-256 (12 hex) of the API key used to authenticate this run, when applicable. */\n apiKeyHash?: string\n}\n\nexport interface TelemetryModel {\n provider: string\n name: string\n /** SHA-256 (12 hex chars) of the prompt(s) used. */\n promptHash?: string\n /** SHA-256 (12 hex chars) of the composed rubric body, if applicable. */\n rubricHash?: string\n}\n","/**\n * Workers-safe telemetry sinks — only `fetch` and pure JS. No `fs`, no\n * `child_process`. Safe to import from a Cloudflare Worker, Lambda, edge\n * function, or browser extension.\n *\n * For Node-only file persistence, import from './sink-file' instead.\n */\n\nimport type { TelemetryEnvelope } from './schema'\n\nexport interface TelemetrySink {\n emit(envelope: TelemetryEnvelope): Promise<void> | void\n close?(): Promise<void> | void\n}\n\n/** Best-effort POST to a remote collector. Fire-and-forget; never throws. */\nexport class HttpTelemetrySink implements TelemetrySink {\n private inflight = new Set<Promise<void>>()\n\n constructor(\n private readonly endpoint: string,\n private readonly bearer?: string,\n ) {}\n\n emit(envelope: TelemetryEnvelope): void {\n const body = JSON.stringify(envelope)\n const headers: Record<string, string> = { 'content-type': 'application/json' }\n if (this.bearer) headers.authorization = `Bearer ${this.bearer}`\n const promise = fetch(this.endpoint, { method: 'POST', headers, body })\n .then(() => undefined)\n .catch(() => undefined)\n this.inflight.add(promise)\n promise.finally(() => this.inflight.delete(promise))\n }\n\n async close(): Promise<void> {\n await Promise.allSettled(Array.from(this.inflight))\n }\n}\n\n/** Fanout to multiple sinks — failures in one do not affect others. */\nexport class FanoutTelemetrySink implements TelemetrySink {\n constructor(private readonly sinks: TelemetrySink[]) {}\n\n emit(envelope: TelemetryEnvelope): void {\n for (const sink of this.sinks) {\n try {\n const result = sink.emit(envelope)\n if (result && typeof (result as Promise<unknown>).catch === 'function') {\n ;(result as Promise<unknown>).catch(() => undefined)\n }\n } catch {\n // swallow — telemetry must never break a run\n }\n }\n }\n\n async close(): Promise<void> {\n await Promise.allSettled(this.sinks.map((s) => Promise.resolve(s.close?.())))\n }\n}\n\n/** No-op sink — used when telemetry is explicitly disabled. */\nexport class NullTelemetrySink implements TelemetrySink {\n emit(): void {}\n}\n\n/** In-memory sink — useful for tests + downstream adapters. */\nexport class InMemoryTelemetrySink implements TelemetrySink {\n readonly envelopes: TelemetryEnvelope[] = []\n emit(envelope: TelemetryEnvelope): void {\n this.envelopes.push(envelope)\n }\n clear(): void { this.envelopes.length = 0 }\n}\n","/**\n * Telemetry client — thin wrapper that builds envelopes from `EmitArgs` and\n * delegates to a `TelemetrySink`. Pure logic; no I/O. Use this from any\n * runtime — Workers, Node, browser — and choose the sink accordingly.\n *\n * For an opinionated singleton with env-var-driven sink wiring (the bad CLI\n * pattern), see `./node-client.ts`.\n */\n\nimport type { TelemetryEnvelope, TelemetryKind, TelemetryModel, TelemetrySource } from './schema'\nimport { TELEMETRY_SCHEMA_VERSION } from './schema'\nimport type { TelemetrySink } from './sink-fetch'\n\nexport interface EmitArgs {\n kind: TelemetryKind\n runId: string\n parentRunId?: string\n ok: boolean\n durationMs: number\n data?: Record<string, unknown>\n metrics?: Record<string, number>\n tags?: Record<string, string>\n model?: TelemetryModel\n error?: string\n /** Override the source for this envelope. Falls back to `defaultSource`. */\n source?: TelemetrySource\n}\n\nexport class TelemetryClient {\n constructor(\n private readonly sink: TelemetrySink,\n private readonly defaultSource: TelemetrySource,\n ) {}\n\n emit(args: EmitArgs): void {\n const envelope: TelemetryEnvelope = {\n schemaVersion: TELEMETRY_SCHEMA_VERSION,\n envelopeId: makeEnvelopeId(),\n runId: args.runId,\n timestamp: new Date().toISOString(),\n source: args.source ?? this.defaultSource,\n kind: args.kind,\n ok: args.ok,\n durationMs: args.durationMs,\n data: args.data ?? {},\n metrics: args.metrics ?? {},\n ...(args.parentRunId ? { parentRunId: args.parentRunId } : {}),\n ...(args.model ? { model: args.model } : {}),\n ...(args.tags ? { tags: args.tags } : {}),\n ...(args.error ? { error: args.error } : {}),\n }\n try {\n this.sink.emit(envelope)\n } catch {\n // swallow — telemetry never breaks the calling code path\n }\n }\n\n async close(): Promise<void> {\n await this.sink.close?.()\n }\n}\n\n/** Generate a UUIDv4 with whatever crypto is available (Node, Workers, browsers). */\nfunction makeEnvelopeId(): string {\n if (typeof crypto !== 'undefined' && typeof crypto.randomUUID === 'function') {\n return crypto.randomUUID()\n }\n // Last-resort fallback. Lower entropy but never throws.\n return 'env-' + Date.now().toString(36) + '-' + Math.random().toString(36).slice(2, 10)\n}\n\nexport const SECRET_FLAGS = new Set(['--api-key', '--bearer', '--token', '--password'])\n\n/** Strip likely-secret values from argv, preserving structure. */\nexport function sanitiseArgv(argv: string[]): string[] {\n const out: string[] = []\n for (let i = 0; i < argv.length; i++) {\n const a = argv[i]!\n if (SECRET_FLAGS.has(a)) {\n out.push(a, '<redacted>')\n i++\n continue\n }\n if (/^(?:--api-key|--bearer|--token|--password)=/.test(a)) {\n out.push(a.replace(/=.*$/, '=<redacted>'))\n continue\n }\n out.push(a)\n }\n return out\n}\n"],"mappings":";AAYO,IAAM,2BAA2B;;;ACIjC,IAAM,oBAAN,MAAiD;AAAA,EAGtD,YACmB,UACA,QACjB;AAFiB;AACA;AAAA,EAChB;AAAA,EAFgB;AAAA,EACA;AAAA,EAJX,WAAW,oBAAI,IAAmB;AAAA,EAO1C,KAAK,UAAmC;AACtC,UAAM,OAAO,KAAK,UAAU,QAAQ;AACpC,UAAM,UAAkC,EAAE,gBAAgB,mBAAmB;AAC7E,QAAI,KAAK,OAAQ,SAAQ,gBAAgB,UAAU,KAAK,MAAM;AAC9D,UAAM,UAAU,MAAM,KAAK,UAAU,EAAE,QAAQ,QAAQ,SAAS,KAAK,CAAC,EACnE,KAAK,MAAM,MAAS,EACpB,MAAM,MAAM,MAAS;AACxB,SAAK,SAAS,IAAI,OAAO;AACzB,YAAQ,QAAQ,MAAM,KAAK,SAAS,OAAO,OAAO,CAAC;AAAA,EACrD;AAAA,EAEA,MAAM,QAAuB;AAC3B,UAAM,QAAQ,WAAW,MAAM,KAAK,KAAK,QAAQ,CAAC;AAAA,EACpD;AACF;AAGO,IAAM,sBAAN,MAAmD;AAAA,EACxD,YAA6B,OAAwB;AAAxB;AAAA,EAAyB;AAAA,EAAzB;AAAA,EAE7B,KAAK,UAAmC;AACtC,eAAW,QAAQ,KAAK,OAAO;AAC7B,UAAI;AACF,cAAM,SAAS,KAAK,KAAK,QAAQ;AACjC,YAAI,UAAU,OAAQ,OAA4B,UAAU,YAAY;AACtE;AAAC,UAAC,OAA4B,MAAM,MAAM,MAAS;AAAA,QACrD;AAAA,MACF,QAAQ;AAAA,MAER;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,QAAuB;AAC3B,UAAM,QAAQ,WAAW,KAAK,MAAM,IAAI,CAAC,MAAM,QAAQ,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAAA,EAC9E;AACF;AAGO,IAAM,oBAAN,MAAiD;AAAA,EACtD,OAAa;AAAA,EAAC;AAChB;AAGO,IAAM,wBAAN,MAAqD;AAAA,EACjD,YAAiC,CAAC;AAAA,EAC3C,KAAK,UAAmC;AACtC,SAAK,UAAU,KAAK,QAAQ;AAAA,EAC9B;AAAA,EACA,QAAc;AAAE,SAAK,UAAU,SAAS;AAAA,EAAE;AAC5C;;;AC9CO,IAAM,kBAAN,MAAsB;AAAA,EAC3B,YACmB,MACA,eACjB;AAFiB;AACA;AAAA,EAChB;AAAA,EAFgB;AAAA,EACA;AAAA,EAGnB,KAAK,MAAsB;AACzB,UAAM,WAA8B;AAAA,MAClC,eAAe;AAAA,MACf,YAAY,eAAe;AAAA,MAC3B,OAAO,KAAK;AAAA,MACZ,YAAW,oBAAI,KAAK,GAAE,YAAY;AAAA,MAClC,QAAQ,KAAK,UAAU,KAAK;AAAA,MAC5B,MAAM,KAAK;AAAA,MACX,IAAI,KAAK;AAAA,MACT,YAAY,KAAK;AAAA,MACjB,MAAM,KAAK,QAAQ,CAAC;AAAA,MACpB,SAAS,KAAK,WAAW,CAAC;AAAA,MAC1B,GAAI,KAAK,cAAc,EAAE,aAAa,KAAK,YAAY,IAAI,CAAC;AAAA,MAC5D,GAAI,KAAK,QAAQ,EAAE,OAAO,KAAK,MAAM,IAAI,CAAC;AAAA,MAC1C,GAAI,KAAK,OAAO,EAAE,MAAM,KAAK,KAAK,IAAI,CAAC;AAAA,MACvC,GAAI,KAAK,QAAQ,EAAE,OAAO,KAAK,MAAM,IAAI,CAAC;AAAA,IAC5C;AACA,QAAI;AACF,WAAK,KAAK,KAAK,QAAQ;AAAA,IACzB,QAAQ;AAAA,IAER;AAAA,EACF;AAAA,EAEA,MAAM,QAAuB;AAC3B,UAAM,KAAK,KAAK,QAAQ;AAAA,EAC1B;AACF;AAGA,SAAS,iBAAyB;AAChC,MAAI,OAAO,WAAW,eAAe,OAAO,OAAO,eAAe,YAAY;AAC5E,WAAO,OAAO,WAAW;AAAA,EAC3B;AAEA,SAAO,SAAS,KAAK,IAAI,EAAE,SAAS,EAAE,IAAI,MAAM,KAAK,OAAO,EAAE,SAAS,EAAE,EAAE,MAAM,GAAG,EAAE;AACxF;AAEO,IAAM,eAAe,oBAAI,IAAI,CAAC,aAAa,YAAY,WAAW,YAAY,CAAC;AAG/E,SAAS,aAAa,MAA0B;AACrD,QAAM,MAAgB,CAAC;AACvB,WAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;AACpC,UAAM,IAAI,KAAK,CAAC;AAChB,QAAI,aAAa,IAAI,CAAC,GAAG;AACvB,UAAI,KAAK,GAAG,YAAY;AACxB;AACA;AAAA,IACF;AACA,QAAI,8CAA8C,KAAK,CAAC,GAAG;AACzD,UAAI,KAAK,EAAE,QAAQ,QAAQ,aAAa,CAAC;AACzC;AAAA,IACF;AACA,QAAI,KAAK,CAAC;AAAA,EACZ;AACA,SAAO;AACT;","names":[]}
1
+ {"version":3,"sources":["../../src/telemetry/schema.ts","../../src/telemetry/sink-fetch.ts","../../src/telemetry/client.ts"],"sourcesContent":["/**\n * Fleet telemetry envelope — agent-eval's portable observability shape.\n *\n * Designed so any consumer (Node CLI, Cloudflare Worker, Lambda, browser\n * extension) can emit structured rows describing one unit of work — a page\n * audit, a tool call, an evolve round, a full agent run — to a central sink.\n *\n * The schema is intentionally a strict superset of agent-eval's `Run` shape\n * so a future TraceStore adapter can promote envelopes into traces without\n * translation.\n */\n\nexport const TELEMETRY_SCHEMA_VERSION = 1\n\n/** Discriminator for the unit of work this envelope describes. */\nexport type TelemetryKind =\n | 'agent-run'\n | 'design-audit-page'\n | 'design-audit-run'\n | 'design-evolve-round'\n | 'design-evolve-run'\n | 'gepa-trial'\n | 'gepa-generation'\n | 'tool-call'\n | 'judge-verdict'\n | 'custom'\n\nexport interface TelemetryEnvelope {\n schemaVersion: typeof TELEMETRY_SCHEMA_VERSION\n envelopeId: string\n runId: string\n timestamp: string\n parentRunId?: string\n\n source: TelemetrySource\n model?: TelemetryModel\n kind: TelemetryKind\n ok: boolean\n durationMs: number\n\n data: Record<string, unknown>\n metrics: Record<string, number>\n tags?: Record<string, string>\n\n error?: string\n}\n\nexport interface TelemetrySource {\n /** Repo identity — basename of cwd plus git remote if discoverable. */\n repo: string\n cwd: string\n gitSha?: string\n gitBranch?: string\n cliVersion: string\n /** What was invoked, e.g. `design-audit`, `bad run`, `gepa --target`. */\n invocation: string\n /** Sanitised argv minus secrets. */\n argv?: string[]\n /**\n * Multi-tenant identity. Set when the consumer runs inside a hosted\n * product so a fleet rollup can group by tenant without leaking customer\n * URLs or PII.\n */\n tenantId?: string\n /** Optional sub-tenant identity (project, suite, walkthrough, customer). */\n customerId?: string\n /** SHA-256 (12 hex) of the API key used to authenticate this run, when applicable. */\n apiKeyHash?: string\n}\n\nexport interface TelemetryModel {\n provider: string\n name: string\n /** SHA-256 (12 hex chars) of the prompt(s) used. */\n promptHash?: string\n /** SHA-256 (12 hex chars) of the composed rubric body, if applicable. */\n rubricHash?: string\n}\n","/**\n * Workers-safe telemetry sinks — only `fetch` and pure JS. No `fs`, no\n * `child_process`. Safe to import from a Cloudflare Worker, Lambda, edge\n * function, or browser extension.\n *\n * For Node-only file persistence, import from './sink-file' instead.\n */\n\nimport type { TelemetryEnvelope } from './schema'\n\nexport interface TelemetrySink {\n emit(envelope: TelemetryEnvelope): Promise<void> | void\n close?(): Promise<void> | void\n}\n\n/** Best-effort POST to a remote collector. Fire-and-forget; never throws. */\nexport class HttpTelemetrySink implements TelemetrySink {\n private inflight = new Set<Promise<void>>()\n\n constructor(\n private readonly endpoint: string,\n private readonly bearer?: string,\n ) {}\n\n emit(envelope: TelemetryEnvelope): void {\n const body = JSON.stringify(envelope)\n const headers: Record<string, string> = { 'content-type': 'application/json' }\n if (this.bearer) headers.authorization = `Bearer ${this.bearer}`\n const promise = fetch(this.endpoint, { method: 'POST', headers, body })\n .then(() => undefined)\n .catch(() => undefined)\n this.inflight.add(promise)\n promise.finally(() => this.inflight.delete(promise))\n }\n\n async close(): Promise<void> {\n await Promise.allSettled(Array.from(this.inflight))\n }\n}\n\n/** Fanout to multiple sinks — failures in one do not affect others. */\nexport class FanoutTelemetrySink implements TelemetrySink {\n constructor(private readonly sinks: TelemetrySink[]) {}\n\n emit(envelope: TelemetryEnvelope): void {\n for (const sink of this.sinks) {\n try {\n const result = sink.emit(envelope)\n if (result && typeof (result as Promise<unknown>).catch === 'function') {\n ;(result as Promise<unknown>).catch(() => undefined)\n }\n } catch {\n // swallow — telemetry must never break a run\n }\n }\n }\n\n async close(): Promise<void> {\n await Promise.allSettled(this.sinks.map((s) => Promise.resolve(s.close?.())))\n }\n}\n\n/** No-op sink — used when telemetry is explicitly disabled. */\nexport class NullTelemetrySink implements TelemetrySink {\n emit(): void {}\n}\n\n/** In-memory sink — useful for tests + downstream adapters. */\nexport class InMemoryTelemetrySink implements TelemetrySink {\n readonly envelopes: TelemetryEnvelope[] = []\n emit(envelope: TelemetryEnvelope): void {\n this.envelopes.push(envelope)\n }\n clear(): void { this.envelopes.length = 0 }\n}\n","/**\n * Telemetry client — thin wrapper that builds envelopes from `EmitArgs` and\n * delegates to a `TelemetrySink`. Pure logic; no I/O. Use this from any\n * runtime — Workers, Node, browser — and choose the sink accordingly.\n *\n * For an opinionated singleton with env-var-driven sink wiring (the bad CLI\n * pattern), see `./node-client.ts`.\n */\n\nimport type { TelemetryEnvelope, TelemetryKind, TelemetryModel, TelemetrySource } from './schema'\nimport { TELEMETRY_SCHEMA_VERSION } from './schema'\nimport type { TelemetrySink } from './sink-fetch'\n\nexport interface EmitArgs {\n kind: TelemetryKind\n runId: string\n parentRunId?: string\n ok: boolean\n durationMs: number\n data?: Record<string, unknown>\n metrics?: Record<string, number>\n tags?: Record<string, string>\n model?: TelemetryModel\n error?: string\n /** Override the source for this envelope. Falls back to `defaultSource`. */\n source?: TelemetrySource\n}\n\nexport class TelemetryClient {\n constructor(\n private readonly sink: TelemetrySink,\n private readonly defaultSource: TelemetrySource,\n ) {}\n\n emit(args: EmitArgs): void {\n const envelope: TelemetryEnvelope = {\n schemaVersion: TELEMETRY_SCHEMA_VERSION,\n envelopeId: makeEnvelopeId(),\n runId: args.runId,\n timestamp: new Date().toISOString(),\n source: args.source ?? this.defaultSource,\n kind: args.kind,\n ok: args.ok,\n durationMs: args.durationMs,\n data: args.data ?? {},\n metrics: args.metrics ?? {},\n ...(args.parentRunId ? { parentRunId: args.parentRunId } : {}),\n ...(args.model ? { model: args.model } : {}),\n ...(args.tags ? { tags: args.tags } : {}),\n ...(args.error ? { error: args.error } : {}),\n }\n try {\n this.sink.emit(envelope)\n } catch {\n // swallow — telemetry never breaks the calling code path\n }\n }\n\n async close(): Promise<void> {\n await this.sink.close?.()\n }\n}\n\n/** Generate a UUIDv4 with whatever crypto is available (Node, Workers, browsers). */\nfunction makeEnvelopeId(): string {\n if (typeof crypto !== 'undefined' && typeof crypto.randomUUID === 'function') {\n return crypto.randomUUID()\n }\n // Last-resort fallback. Lower entropy but never throws.\n return 'env-' + Date.now().toString(36) + '-' + Math.random().toString(36).slice(2, 10)\n}\n\nexport const SECRET_FLAGS = new Set(['--api-key', '--bearer', '--token', '--password'])\n\n/** Strip likely-secret values from argv, preserving structure. */\nexport function sanitiseArgv(argv: string[]): string[] {\n const out: string[] = []\n for (let i = 0; i < argv.length; i++) {\n const a = argv[i]!\n if (SECRET_FLAGS.has(a)) {\n out.push(a, '<redacted>')\n i++\n continue\n }\n if (/^(?:--api-key|--bearer|--token|--password)=/.test(a)) {\n out.push(a.replace(/=.*$/, '=<redacted>'))\n continue\n }\n out.push(a)\n }\n return out\n}\n"],"mappings":";;;AAYO,IAAM,2BAA2B;;;ACIjC,IAAM,oBAAN,MAAiD;AAAA,EAGtD,YACmB,UACA,QACjB;AAFiB;AACA;AAAA,EAChB;AAAA,EAFgB;AAAA,EACA;AAAA,EAJX,WAAW,oBAAI,IAAmB;AAAA,EAO1C,KAAK,UAAmC;AACtC,UAAM,OAAO,KAAK,UAAU,QAAQ;AACpC,UAAM,UAAkC,EAAE,gBAAgB,mBAAmB;AAC7E,QAAI,KAAK,OAAQ,SAAQ,gBAAgB,UAAU,KAAK,MAAM;AAC9D,UAAM,UAAU,MAAM,KAAK,UAAU,EAAE,QAAQ,QAAQ,SAAS,KAAK,CAAC,EACnE,KAAK,MAAM,MAAS,EACpB,MAAM,MAAM,MAAS;AACxB,SAAK,SAAS,IAAI,OAAO;AACzB,YAAQ,QAAQ,MAAM,KAAK,SAAS,OAAO,OAAO,CAAC;AAAA,EACrD;AAAA,EAEA,MAAM,QAAuB;AAC3B,UAAM,QAAQ,WAAW,MAAM,KAAK,KAAK,QAAQ,CAAC;AAAA,EACpD;AACF;AAGO,IAAM,sBAAN,MAAmD;AAAA,EACxD,YAA6B,OAAwB;AAAxB;AAAA,EAAyB;AAAA,EAAzB;AAAA,EAE7B,KAAK,UAAmC;AACtC,eAAW,QAAQ,KAAK,OAAO;AAC7B,UAAI;AACF,cAAM,SAAS,KAAK,KAAK,QAAQ;AACjC,YAAI,UAAU,OAAQ,OAA4B,UAAU,YAAY;AACtE;AAAC,UAAC,OAA4B,MAAM,MAAM,MAAS;AAAA,QACrD;AAAA,MACF,QAAQ;AAAA,MAER;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,QAAuB;AAC3B,UAAM,QAAQ,WAAW,KAAK,MAAM,IAAI,CAAC,MAAM,QAAQ,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAAA,EAC9E;AACF;AAGO,IAAM,oBAAN,MAAiD;AAAA,EACtD,OAAa;AAAA,EAAC;AAChB;AAGO,IAAM,wBAAN,MAAqD;AAAA,EACjD,YAAiC,CAAC;AAAA,EAC3C,KAAK,UAAmC;AACtC,SAAK,UAAU,KAAK,QAAQ;AAAA,EAC9B;AAAA,EACA,QAAc;AAAE,SAAK,UAAU,SAAS;AAAA,EAAE;AAC5C;;;AC9CO,IAAM,kBAAN,MAAsB;AAAA,EAC3B,YACmB,MACA,eACjB;AAFiB;AACA;AAAA,EAChB;AAAA,EAFgB;AAAA,EACA;AAAA,EAGnB,KAAK,MAAsB;AACzB,UAAM,WAA8B;AAAA,MAClC,eAAe;AAAA,MACf,YAAY,eAAe;AAAA,MAC3B,OAAO,KAAK;AAAA,MACZ,YAAW,oBAAI,KAAK,GAAE,YAAY;AAAA,MAClC,QAAQ,KAAK,UAAU,KAAK;AAAA,MAC5B,MAAM,KAAK;AAAA,MACX,IAAI,KAAK;AAAA,MACT,YAAY,KAAK;AAAA,MACjB,MAAM,KAAK,QAAQ,CAAC;AAAA,MACpB,SAAS,KAAK,WAAW,CAAC;AAAA,MAC1B,GAAI,KAAK,cAAc,EAAE,aAAa,KAAK,YAAY,IAAI,CAAC;AAAA,MAC5D,GAAI,KAAK,QAAQ,EAAE,OAAO,KAAK,MAAM,IAAI,CAAC;AAAA,MAC1C,GAAI,KAAK,OAAO,EAAE,MAAM,KAAK,KAAK,IAAI,CAAC;AAAA,MACvC,GAAI,KAAK,QAAQ,EAAE,OAAO,KAAK,MAAM,IAAI,CAAC;AAAA,IAC5C;AACA,QAAI;AACF,WAAK,KAAK,KAAK,QAAQ;AAAA,IACzB,QAAQ;AAAA,IAER;AAAA,EACF;AAAA,EAEA,MAAM,QAAuB;AAC3B,UAAM,KAAK,KAAK,QAAQ;AAAA,EAC1B;AACF;AAGA,SAAS,iBAAyB;AAChC,MAAI,OAAO,WAAW,eAAe,OAAO,OAAO,eAAe,YAAY;AAC5E,WAAO,OAAO,WAAW;AAAA,EAC3B;AAEA,SAAO,SAAS,KAAK,IAAI,EAAE,SAAS,EAAE,IAAI,MAAM,KAAK,OAAO,EAAE,SAAS,EAAE,EAAE,MAAM,GAAG,EAAE;AACxF;AAEO,IAAM,eAAe,oBAAI,IAAI,CAAC,aAAa,YAAY,WAAW,YAAY,CAAC;AAG/E,SAAS,aAAa,MAA0B;AACrD,QAAM,MAAgB,CAAC;AACvB,WAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;AACpC,UAAM,IAAI,KAAK,CAAC;AAChB,QAAI,aAAa,IAAI,CAAC,GAAG;AACvB,UAAI,KAAK,GAAG,YAAY;AACxB;AACA;AAAA,IACF;AACA,QAAI,8CAA8C,KAAK,CAAC,GAAG;AACzD,UAAI,KAAK,EAAE,QAAQ,QAAQ,aAAa,CAAC;AACzC;AAAA,IACF;AACA,QAAI,KAAK,CAAC;AAAA,EACZ;AACA,SAAO;AACT;","names":[]}
@@ -26,6 +26,7 @@ import {
26
26
  startServer
27
27
  } from "../chunk-OZPRSK4A.js";
28
28
  import "../chunk-ITN4YOZY.js";
29
+ import "../chunk-PZ5AY32C.js";
29
30
  export {
30
31
  BUILTIN_RUBRICS,
31
32
  ErrorResponseSchema,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tangle-network/agent-eval",
3
- "version": "0.14.2",
3
+ "version": "0.16.1",
4
4
  "description": "Trace-first evaluation framework for Tangle agents. Core (spans, pipelines, sandbox harness, OTLP export), trust (dataset, red-team, calibration, behavior DSL), builder-of-builders (three-layer eval, resumable sessions, meta-runtime correlation), and frontier (meta-eval correlation study, Process Reward Modeling, bisector).",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
@@ -36,15 +36,6 @@
36
36
  "publishConfig": {
37
37
  "access": "public"
38
38
  },
39
- "scripts": {
40
- "build": "tsup",
41
- "dev": "tsup --watch",
42
- "prepare": "tsup",
43
- "test": "vitest run",
44
- "test:watch": "vitest",
45
- "typecheck": "tsc --noEmit",
46
- "openapi": "node dist/cli.js openapi --out dist/openapi.json"
47
- },
48
39
  "dependencies": {
49
40
  "@asteasolutions/zod-to-openapi": "^8.5.0",
50
41
  "@ax-llm/ax": "^19.0.25",
@@ -64,5 +55,12 @@
64
55
  "node": ">=20"
65
56
  },
66
57
  "license": "MIT",
67
- "packageManager": "pnpm@10.22.0"
68
- }
58
+ "scripts": {
59
+ "build": "tsup",
60
+ "dev": "tsup --watch",
61
+ "test": "vitest run",
62
+ "test:watch": "vitest",
63
+ "typecheck": "tsc --noEmit",
64
+ "openapi": "node dist/cli.js openapi --out dist/openapi.json"
65
+ }
66
+ }