@tangle-network/agent-eval 0.55.0 → 0.57.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/dist/campaign/index.js +3 -3
  2. package/dist/{chunk-MAOZCN36.js → chunk-5GLYP2IQ.js} +207 -1
  3. package/dist/chunk-5GLYP2IQ.js.map +1 -0
  4. package/dist/{chunk-J3EIOI3O.js → chunk-74Y2EMNH.js} +2 -2
  5. package/dist/{chunk-UBQGWD3O.js → chunk-AIXHUIHG.js} +2 -2
  6. package/dist/{chunk-LYL4SOKT.js → chunk-GM476SZU.js} +2 -2
  7. package/dist/{chunk-YXD7GWJI.js → chunk-JB4UWIM6.js} +3 -3
  8. package/dist/{chunk-EGIPWXHL.js → chunk-OLIBRKRD.js} +2 -2
  9. package/dist/{chunk-H4TOS272.js → chunk-QDOSODID.js} +2 -2
  10. package/dist/{chunk-WP7SY7AI.js → chunk-S3SDD56V.js} +48 -1
  11. package/dist/chunk-S3SDD56V.js.map +1 -0
  12. package/dist/contract/index.d.ts +98 -1
  13. package/dist/contract/index.js +78 -4
  14. package/dist/contract/index.js.map +1 -1
  15. package/dist/index.d.ts +109 -4
  16. package/dist/index.js +144 -6
  17. package/dist/index.js.map +1 -1
  18. package/dist/openapi.json +1 -1
  19. package/dist/pipelines/index.js +2 -2
  20. package/dist/{release-report-B6l5fi7T.d.ts → release-report-DmPjIce3.d.ts} +44 -1
  21. package/dist/reporting.d.ts +1 -1
  22. package/dist/reporting.js +3 -3
  23. package/dist/rl.js +3 -3
  24. package/dist/{run-campaign-6UEVBPP3.js → run-campaign-ZURVWMMI.js} +3 -3
  25. package/dist/traces.d.ts +86 -3
  26. package/dist/traces.js +5 -1
  27. package/package.json +1 -1
  28. package/dist/chunk-MAOZCN36.js.map +0 -1
  29. package/dist/chunk-WP7SY7AI.js.map +0 -1
  30. /package/dist/{chunk-J3EIOI3O.js.map → chunk-74Y2EMNH.js.map} +0 -0
  31. /package/dist/{chunk-UBQGWD3O.js.map → chunk-AIXHUIHG.js.map} +0 -0
  32. /package/dist/{chunk-LYL4SOKT.js.map → chunk-GM476SZU.js.map} +0 -0
  33. /package/dist/{chunk-YXD7GWJI.js.map → chunk-JB4UWIM6.js.map} +0 -0
  34. /package/dist/{chunk-EGIPWXHL.js.map → chunk-OLIBRKRD.js.map} +0 -0
  35. /package/dist/{chunk-H4TOS272.js.map → chunk-QDOSODID.js.map} +0 -0
  36. /package/dist/{run-campaign-6UEVBPP3.js.map → run-campaign-ZURVWMMI.js.map} +0 -0
@@ -11,15 +11,15 @@ import {
11
11
  runImprovementLoop,
12
12
  runOptimization,
13
13
  surfaceHash
14
- } from "../chunk-YXD7GWJI.js";
14
+ } from "../chunk-JB4UWIM6.js";
15
15
  import {
16
16
  fsCampaignStorage,
17
17
  inMemoryCampaignStorage,
18
18
  runCampaign
19
- } from "../chunk-J3EIOI3O.js";
19
+ } from "../chunk-74Y2EMNH.js";
20
20
  import "../chunk-N4SBKEPJ.js";
21
21
  import "../chunk-YV7J7X5N.js";
22
- import "../chunk-WP7SY7AI.js";
22
+ import "../chunk-S3SDD56V.js";
23
23
  import "../chunk-GGE4NNQT.js";
24
24
  import "../chunk-VXNVVBZO.js";
25
25
  import "../chunk-PC4UYEBM.js";
@@ -2,6 +2,10 @@ import {
2
2
  canonicalize,
3
3
  hashJson
4
4
  } from "./chunk-VSMTAMNK.js";
5
+ import {
6
+ defaultProviderRedactor,
7
+ providerFromBaseUrl
8
+ } from "./chunk-PC4UYEBM.js";
5
9
  import {
6
10
  NotFoundError,
7
11
  ReplayError
@@ -1282,6 +1286,86 @@ ${JSON.stringify(
1282
1286
  Use the trace tools. Do not invent facts. Cite task ids. Separate customer-facing claims from internal harness/model findings.`;
1283
1287
  }
1284
1288
 
1289
+ // src/trace-analyst/otlp-flatten.ts
1290
+ var DEFAULT_KIND_MAP = {
1291
+ 0: "SPAN_KIND_UNSPECIFIED",
1292
+ 1: "SPAN_KIND_INTERNAL",
1293
+ 2: "SPAN_KIND_SERVER",
1294
+ 3: "SPAN_KIND_CLIENT",
1295
+ 4: "SPAN_KIND_PRODUCER",
1296
+ 5: "SPAN_KIND_CONSUMER"
1297
+ };
1298
+ var STATUS_MAP = {
1299
+ 0: "STATUS_CODE_UNSET",
1300
+ 1: "STATUS_CODE_OK",
1301
+ 2: "STATUS_CODE_ERROR"
1302
+ };
1303
+ function attrValue(v) {
1304
+ if (v.stringValue !== void 0) return v.stringValue;
1305
+ if (v.intValue !== void 0) return Number(v.intValue);
1306
+ if (v.doubleValue !== void 0) return v.doubleValue;
1307
+ if (v.boolValue !== void 0) return v.boolValue;
1308
+ return "";
1309
+ }
1310
+ function attrsToRecord(attrs) {
1311
+ const out = {};
1312
+ for (const a of attrs) out[a.key] = attrValue(a.value);
1313
+ return out;
1314
+ }
1315
+ function nanoToIso(nano) {
1316
+ const ms = Number(nano) / 1e6;
1317
+ return Number.isFinite(ms) ? new Date(ms).toISOString() : (/* @__PURE__ */ new Date(0)).toISOString();
1318
+ }
1319
+ function applyOpenInference(attrs) {
1320
+ if ("llm.model" in attrs && !("llm.model_name" in attrs)) {
1321
+ attrs["llm.model_name"] = attrs["llm.model"];
1322
+ }
1323
+ if ("tool.name" in attrs && !("inference.tool.name" in attrs)) {
1324
+ attrs["inference.tool.name"] = attrs["tool.name"];
1325
+ }
1326
+ if ("span.kind" in attrs && !("openinference.span.kind" in attrs)) {
1327
+ attrs["openinference.span.kind"] = String(attrs["span.kind"]).toUpperCase();
1328
+ }
1329
+ }
1330
+ function flattenOtlpExportToNdjson(otlpExport, opts = {}) {
1331
+ const vocab = opts.attributeVocabulary ?? "openinference";
1332
+ const kindMap = { ...DEFAULT_KIND_MAP, ...opts.kindMap };
1333
+ const lines = [];
1334
+ for (const rs of otlpExport.resourceSpans ?? []) {
1335
+ const resource = { attributes: attrsToRecord(rs.resource?.attributes ?? []) };
1336
+ for (const scope of rs.scopeSpans ?? []) {
1337
+ for (const span of scope.spans ?? []) {
1338
+ const attributes = attrsToRecord(span.attributes ?? []);
1339
+ if (vocab === "openinference") applyOpenInference(attributes);
1340
+ const line = {
1341
+ trace_id: span.traceId,
1342
+ span_id: span.spanId,
1343
+ parent_span_id: span.parentSpanId ?? null,
1344
+ name: span.name,
1345
+ kind: kindMap[span.kind] ?? "SPAN_KIND_UNSPECIFIED",
1346
+ start_time: nanoToIso(span.startTimeUnixNano),
1347
+ end_time: nanoToIso(span.endTimeUnixNano),
1348
+ status: {
1349
+ code: STATUS_MAP[span.status?.code ?? 0] ?? "STATUS_CODE_UNSET",
1350
+ ...span.status?.message ? { message: span.status.message } : {}
1351
+ },
1352
+ resource,
1353
+ attributes
1354
+ };
1355
+ if (span.events && span.events.length > 0) {
1356
+ line.events = span.events.map((e) => ({
1357
+ name: e.name,
1358
+ timeUnixNano: e.timeUnixNano,
1359
+ ...e.attributes ? { attributes: attrsToRecord(e.attributes) } : {}
1360
+ }));
1361
+ }
1362
+ lines.push(line);
1363
+ }
1364
+ }
1365
+ }
1366
+ return lines;
1367
+ }
1368
+
1285
1369
  // src/trace/store.ts
1286
1370
  var InMemoryTraceStore = class {
1287
1371
  runs = /* @__PURE__ */ new Map();
@@ -1509,6 +1593,126 @@ var FileSystemTraceStore = class {
1509
1593
  }
1510
1594
  };
1511
1595
 
1596
+ // src/trace/capture-fetch.ts
1597
+ var DEFAULT_BODY_CAP = 2 * 1024 * 1024;
1598
+ function headersToRecord(headers) {
1599
+ if (!headers) return void 0;
1600
+ const out = {};
1601
+ headers.forEach((value, key) => {
1602
+ out[key.toLowerCase()] = value;
1603
+ });
1604
+ return Object.keys(out).length > 0 ? out : void 0;
1605
+ }
1606
+ function parseMaybeJson(text) {
1607
+ if (text.length === 0) return void 0;
1608
+ try {
1609
+ return JSON.parse(text);
1610
+ } catch {
1611
+ return text;
1612
+ }
1613
+ }
1614
+ async function readRequestBody(input, init) {
1615
+ if (typeof init?.body === "string") return parseMaybeJson(init.body);
1616
+ if (init?.body != null) return void 0;
1617
+ if (input instanceof Request) {
1618
+ try {
1619
+ return parseMaybeJson(await input.clone().text());
1620
+ } catch {
1621
+ return void 0;
1622
+ }
1623
+ }
1624
+ return void 0;
1625
+ }
1626
+ function endpointFromUrl(url, baseUrl) {
1627
+ const normalisedBase = baseUrl.replace(/\/+$/, "");
1628
+ if (url.startsWith(normalisedBase)) return url.slice(normalisedBase.length) || "/";
1629
+ try {
1630
+ return new URL(url).pathname;
1631
+ } catch {
1632
+ return url;
1633
+ }
1634
+ }
1635
+ function captureFetchToRawSink(fetch2, sink, ctx, opts = {}) {
1636
+ const provider = ctx.provider ?? providerFromBaseUrl(ctx.baseUrl);
1637
+ const redactor = opts.redactor ?? defaultProviderRedactor;
1638
+ const bodyCap = opts.responseBodyByteCap ?? DEFAULT_BODY_CAP;
1639
+ let warned = false;
1640
+ const baseEvent = (direction, endpoint) => ({
1641
+ eventId: crypto.randomUUID(),
1642
+ runId: ctx.runId,
1643
+ spanId: ctx.spanId,
1644
+ provider,
1645
+ model: ctx.model,
1646
+ endpoint,
1647
+ baseUrl: ctx.baseUrl,
1648
+ attemptIndex: 0,
1649
+ // retries are re-invocations one layer up; documented in 0.x
1650
+ direction,
1651
+ timestamp: Date.now(),
1652
+ redactedFields: []
1653
+ });
1654
+ const record = async (event) => {
1655
+ try {
1656
+ await sink.record(redactor(event));
1657
+ } catch (err) {
1658
+ if (opts.failClosed) throw err;
1659
+ if (!warned) {
1660
+ warned = true;
1661
+ console.warn(
1662
+ `captureFetchToRawSink: sink.record failed (capture is best-effort) \u2014 ${err instanceof Error ? err.message : String(err)}`
1663
+ );
1664
+ }
1665
+ }
1666
+ };
1667
+ return async (input, init) => {
1668
+ const url = typeof input === "string" ? input : input instanceof URL ? input.toString() : input.url;
1669
+ const method = (init?.method ?? (input instanceof Request ? input.method : "GET")).toUpperCase();
1670
+ const endpoint = endpointFromUrl(url, ctx.baseUrl);
1671
+ const reqHeaders = new Headers(
1672
+ init?.headers ?? (input instanceof Request ? input.headers : void 0)
1673
+ );
1674
+ await record({
1675
+ ...baseEvent("request", endpoint),
1676
+ requestHeaders: { ...headersToRecord(reqHeaders), "x-http-method": method },
1677
+ requestBody: await readRequestBody(input, init)
1678
+ });
1679
+ const start = Date.now();
1680
+ let response;
1681
+ try {
1682
+ response = await fetch2(input, init);
1683
+ } catch (err) {
1684
+ await record({
1685
+ ...baseEvent("error", endpoint),
1686
+ durationMs: Date.now() - start,
1687
+ errorMessage: err instanceof Error ? err.message : String(err)
1688
+ });
1689
+ throw err;
1690
+ }
1691
+ let responseBody;
1692
+ const redactedFields = [];
1693
+ try {
1694
+ const raw = await response.clone().text();
1695
+ if (raw.length > bodyCap) {
1696
+ responseBody = raw.slice(0, bodyCap);
1697
+ redactedFields.push("body_truncated");
1698
+ } else {
1699
+ responseBody = parseMaybeJson(raw);
1700
+ }
1701
+ } catch {
1702
+ responseBody = void 0;
1703
+ }
1704
+ await record({
1705
+ ...baseEvent("response", endpoint),
1706
+ durationMs: Date.now() - start,
1707
+ statusCode: response.status,
1708
+ responseHeaders: headersToRecord(response.headers),
1709
+ responseBody,
1710
+ redactedFields
1711
+ });
1712
+ return response;
1713
+ };
1714
+ }
1715
+
1512
1716
  // src/trace/otel.ts
1513
1717
  var OTEL_AGENT_EVAL_SCOPE = { name: "@tangle-network/agent-eval", version: "0.3.0" };
1514
1718
  async function exportRunAsOtlp(store, runId, resourceAttrs = {}) {
@@ -1996,8 +2200,10 @@ export {
1996
2200
  scoreTraceInsightReadiness,
1997
2201
  defaultTraceInsightPanel,
1998
2202
  buildTraceInsightPrompt,
2203
+ flattenOtlpExportToNdjson,
1999
2204
  InMemoryTraceStore,
2000
2205
  FileSystemTraceStore,
2206
+ captureFetchToRawSink,
2001
2207
  OTEL_AGENT_EVAL_SCOPE,
2002
2208
  exportRunAsOtlp,
2003
2209
  otelRunCompleteHook,
@@ -2008,4 +2214,4 @@ export {
2008
2214
  createReplayFetch,
2009
2215
  iterateRawCalls
2010
2216
  };
2011
- //# sourceMappingURL=chunk-MAOZCN36.js.map
2217
+ //# sourceMappingURL=chunk-5GLYP2IQ.js.map