@tangle-network/agent-eval 0.72.3 → 0.73.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1323,6 +1323,330 @@ function padTraceId(id) {
1323
1323
  return cleaned.slice(0, 32).padEnd(32, "0");
1324
1324
  }
1325
1325
 
1326
+ // src/trace/store-to-otlp.ts
1327
+ import { readdirSync, readFileSync, statSync, writeFileSync } from "fs";
1328
+ import { join } from "path";
1329
+ function convertTraceStoresToOtlp(source, outPath, opts = {}) {
1330
+ const sources = Array.isArray(source) ? [...source] : typeof source === "string" ? [{ root: source, layout: "celled" }] : [source];
1331
+ const defaultServiceName = opts.serviceName ?? "agent-eval";
1332
+ const resourceAttributes = opts.resourceAttributes ?? (() => ({}));
1333
+ const runAttributes = opts.runAttributes ?? (() => ({}));
1334
+ const lines = [];
1335
+ let spanCount = 0;
1336
+ let runCount = 0;
1337
+ let cellCount = 0;
1338
+ let cellErrorCount = 0;
1339
+ for (const src of sources) {
1340
+ const serviceName = src.serviceName ?? defaultServiceName;
1341
+ const cellDirs = src.layout === "flat" ? [{ label: "<root>", dir: src.root }] : listCells(src.root).map((name) => ({ label: name, dir: join(src.root, name) }));
1342
+ for (const cell of cellDirs) {
1343
+ try {
1344
+ const result = projectCell({
1345
+ cellDir: cell.dir,
1346
+ serviceName,
1347
+ resourceAttributes,
1348
+ runAttributes
1349
+ });
1350
+ for (const line of result.lines) lines.push(line);
1351
+ spanCount += result.spanCount;
1352
+ runCount += result.runCount;
1353
+ cellCount += 1;
1354
+ } catch (err) {
1355
+ console.warn(
1356
+ `[traces-to-otlp] cell ${cell.label} (${cell.dir}) skipped: ${err instanceof Error ? err.message : String(err)}`
1357
+ );
1358
+ cellErrorCount += 1;
1359
+ }
1360
+ }
1361
+ }
1362
+ writeFileSync(outPath, lines.join("\n") + (lines.length > 0 ? "\n" : ""));
1363
+ return { spanCount, runCount, cellCount, cellErrorCount };
1364
+ }
1365
+ function projectCell(args) {
1366
+ const { cellDir, serviceName, resourceAttributes, runAttributes } = args;
1367
+ const lines = [];
1368
+ let runCount = 0;
1369
+ let spanCount = 0;
1370
+ const runs = readMergedShards(cellDir, "runs", "runId");
1371
+ const spans = readMergedShards(cellDir, "spans", "spanId");
1372
+ const events = readShards(cellDir, "events");
1373
+ const runByRunId = /* @__PURE__ */ new Map();
1374
+ for (const r of runs) runByRunId.set(r.runId, r);
1375
+ const spanBySpanId = /* @__PURE__ */ new Map();
1376
+ for (const s of spans) spanBySpanId.set(s.spanId, s);
1377
+ const eventsBySpanId = /* @__PURE__ */ new Map();
1378
+ for (const e of events) {
1379
+ if (e.kind === "state_mutation" && e.payload && typeof e.payload === "object") {
1380
+ const entity = e.payload.entity;
1381
+ if (entity === "run") {
1382
+ const run = e.payload.run;
1383
+ if (run?.runId) runByRunId.set(run.runId, run);
1384
+ continue;
1385
+ }
1386
+ if (entity === "run.update") {
1387
+ const patch = e.payload.patch;
1388
+ if (patch && e.runId) {
1389
+ const prior = runByRunId.get(e.runId);
1390
+ if (prior) runByRunId.set(e.runId, { ...prior, ...patch });
1391
+ }
1392
+ continue;
1393
+ }
1394
+ if (entity === "span") {
1395
+ const span = e.payload.span;
1396
+ if (span?.spanId) spanBySpanId.set(span.spanId, span);
1397
+ continue;
1398
+ }
1399
+ if (entity === "span.update") {
1400
+ const spanId = e.payload.spanId;
1401
+ const patch = e.payload.patch;
1402
+ if (spanId && patch) {
1403
+ const prior = spanBySpanId.get(spanId);
1404
+ if (prior) spanBySpanId.set(spanId, { ...prior, ...patch });
1405
+ }
1406
+ continue;
1407
+ }
1408
+ }
1409
+ if (!e.spanId) continue;
1410
+ const arr = eventsBySpanId.get(e.spanId) ?? [];
1411
+ arr.push(e);
1412
+ eventsBySpanId.set(e.spanId, arr);
1413
+ }
1414
+ for (const run of runByRunId.values()) {
1415
+ const traceId = padTraceId2(run.runId);
1416
+ const agentName = run.variantId ?? run.scenarioId;
1417
+ const sharedResource = {
1418
+ attributes: {
1419
+ "service.name": serviceName,
1420
+ "agent.name": agentName,
1421
+ "run.id": run.runId,
1422
+ "run.status": run.status,
1423
+ ...resourceAttributes(run)
1424
+ }
1425
+ };
1426
+ const runSpanId = padSpanId3(`run-${run.runId}`);
1427
+ const runStart = msToIso(run.startedAt);
1428
+ const runEnd = msToIso(run.endedAt ?? run.startedAt);
1429
+ const runStatus = run.outcome?.failureClass && run.outcome.failureClass !== "success" ? "STATUS_CODE_ERROR" : "STATUS_CODE_OK";
1430
+ const runAttrs = {
1431
+ "openinference.span.kind": "AGENT",
1432
+ "agent.name": agentName,
1433
+ "agent.workflow.name": serviceName,
1434
+ ...runAttributes(run)
1435
+ };
1436
+ lines.push(
1437
+ JSON.stringify(
1438
+ toLine({
1439
+ traceId,
1440
+ spanId: runSpanId,
1441
+ parentSpanId: "",
1442
+ name: `run.${agentName}`,
1443
+ kind: "SPAN_KIND_INTERNAL",
1444
+ startTime: runStart,
1445
+ endTime: runEnd,
1446
+ statusCode: runStatus,
1447
+ statusMessage: run.outcome?.notes ?? "",
1448
+ resource: sharedResource,
1449
+ attributes: runAttrs
1450
+ })
1451
+ )
1452
+ );
1453
+ runCount += 1;
1454
+ for (const span of spanBySpanId.values()) {
1455
+ if (span.runId !== run.runId) continue;
1456
+ const spanAttrs = spanToAttributes(span, eventsBySpanId.get(span.spanId) ?? []);
1457
+ const statusCode = span.status === "error" ? "STATUS_CODE_ERROR" : "STATUS_CODE_OK";
1458
+ lines.push(
1459
+ JSON.stringify(
1460
+ toLine({
1461
+ traceId,
1462
+ spanId: padSpanId3(span.spanId),
1463
+ parentSpanId: span.parentSpanId ? padSpanId3(span.parentSpanId) : runSpanId,
1464
+ name: span.name,
1465
+ kind: spanKindToOtlpKind(span.kind),
1466
+ startTime: msToIso(span.startedAt),
1467
+ endTime: msToIso(span.endedAt ?? span.startedAt),
1468
+ statusCode,
1469
+ statusMessage: span.error ?? "",
1470
+ resource: sharedResource,
1471
+ attributes: spanAttrs
1472
+ })
1473
+ )
1474
+ );
1475
+ spanCount += 1;
1476
+ }
1477
+ }
1478
+ return { lines, runCount, spanCount };
1479
+ }
1480
+ function listCells(root) {
1481
+ try {
1482
+ return readdirSync(root, { withFileTypes: true }).filter((d) => d.isDirectory()).map((d) => d.name).sort();
1483
+ } catch {
1484
+ return [];
1485
+ }
1486
+ }
1487
+ function readShards(cellDir, name) {
1488
+ let entries;
1489
+ try {
1490
+ entries = readdirSync(cellDir);
1491
+ } catch {
1492
+ return [];
1493
+ }
1494
+ const shards = entries.filter((f) => (f === `${name}.ndjson` || f.startsWith(`${name}.`)) && f.endsWith(".ndjson")).map((f) => ({ file: f, path: join(cellDir, f) })).map((s) => {
1495
+ let mtime = 0;
1496
+ try {
1497
+ mtime = statSync(s.path).mtimeMs;
1498
+ } catch {
1499
+ }
1500
+ return { ...s, mtime };
1501
+ }).sort((a, b) => a.mtime - b.mtime || a.file.localeCompare(b.file));
1502
+ const rows = [];
1503
+ for (const shard of shards) {
1504
+ let text;
1505
+ try {
1506
+ text = readFileSync(shard.path, "utf-8");
1507
+ } catch {
1508
+ continue;
1509
+ }
1510
+ for (const line of text.split("\n")) {
1511
+ const trimmed = line.trim();
1512
+ if (!trimmed) continue;
1513
+ try {
1514
+ rows.push(JSON.parse(trimmed));
1515
+ } catch {
1516
+ }
1517
+ }
1518
+ }
1519
+ return rows;
1520
+ }
1521
+ function readMergedShards(cellDir, name, idKey) {
1522
+ const rows = readShards(cellDir, name);
1523
+ const byId = /* @__PURE__ */ new Map();
1524
+ for (const row of rows) {
1525
+ const id = row[idKey];
1526
+ if (!id) continue;
1527
+ const prior = byId.get(id);
1528
+ if (prior && row._update) {
1529
+ byId.set(id, { ...prior, ...row, _update: void 0 });
1530
+ } else {
1531
+ byId.set(id, row);
1532
+ }
1533
+ }
1534
+ return [...byId.values()];
1535
+ }
1536
+ function spanToAttributes(span, events) {
1537
+ const attrs = {
1538
+ "openinference.span.kind": spanKindToOpenInferenceKind(span.kind)
1539
+ };
1540
+ if (span.kind === "llm") {
1541
+ attrs["llm.model_name"] = span.model;
1542
+ if (span.inputTokens !== void 0) attrs["llm.token_count.prompt"] = span.inputTokens;
1543
+ if (span.outputTokens !== void 0) attrs["llm.token_count.completion"] = span.outputTokens;
1544
+ if (span.costUsd !== void 0) attrs["llm.cost_usd"] = span.costUsd;
1545
+ if (span.finishReason) attrs["llm.finish_reason"] = span.finishReason;
1546
+ if (Array.isArray(span.messages)) {
1547
+ attrs["llm.input_messages"] = JSON.stringify(span.messages.slice(-6));
1548
+ }
1549
+ if (typeof span.output === "string") {
1550
+ attrs["llm.output_messages"] = JSON.stringify([{ role: "assistant", content: span.output }]);
1551
+ }
1552
+ } else if (span.kind === "tool") {
1553
+ attrs["tool.name"] = span.toolName;
1554
+ if (span.latencyMs !== void 0) attrs["tool.latency_ms"] = span.latencyMs;
1555
+ attrs["input.value"] = safeStringify(span.args);
1556
+ if (span.result !== void 0) attrs["output.value"] = safeStringify(span.result);
1557
+ } else if (span.kind === "judge") {
1558
+ attrs["judge.id"] = span.judgeId;
1559
+ attrs["judge.dimension"] = span.dimension;
1560
+ attrs["judge.score"] = span.score;
1561
+ attrs["judge.target_span_id"] = span.targetSpanId;
1562
+ }
1563
+ if (span.attributes) {
1564
+ for (const [k, v] of Object.entries(span.attributes)) {
1565
+ attrs[`agent_eval.${k}`] = v;
1566
+ }
1567
+ }
1568
+ if (events.length > 0) {
1569
+ attrs["agent_eval.event_count"] = events.length;
1570
+ attrs["agent_eval.event_kinds"] = JSON.stringify(events.map((e) => e.kind));
1571
+ }
1572
+ return attrs;
1573
+ }
1574
+ function spanKindToOtlpKind(kind) {
1575
+ switch (kind) {
1576
+ case "llm":
1577
+ return "SPAN_KIND_CLIENT";
1578
+ case "retrieval":
1579
+ return "SPAN_KIND_CLIENT";
1580
+ default:
1581
+ return "SPAN_KIND_INTERNAL";
1582
+ }
1583
+ }
1584
+ function spanKindToOpenInferenceKind(kind) {
1585
+ switch (kind) {
1586
+ case "llm":
1587
+ return "LLM";
1588
+ case "tool":
1589
+ return "TOOL";
1590
+ case "retrieval":
1591
+ return "CHAIN";
1592
+ case "judge":
1593
+ return "GUARDRAIL";
1594
+ case "sandbox":
1595
+ return "CHAIN";
1596
+ case "agent":
1597
+ return "AGENT";
1598
+ default:
1599
+ return "SPAN";
1600
+ }
1601
+ }
1602
+ function toLine(args) {
1603
+ return {
1604
+ trace_id: args.traceId,
1605
+ span_id: args.spanId,
1606
+ parent_span_id: args.parentSpanId,
1607
+ name: args.name,
1608
+ kind: args.kind,
1609
+ start_time: args.startTime,
1610
+ end_time: args.endTime,
1611
+ status: { code: args.statusCode, message: args.statusMessage },
1612
+ resource: args.resource,
1613
+ attributes: args.attributes
1614
+ };
1615
+ }
1616
+ function msToIso(ms) {
1617
+ if (!Number.isFinite(ms) || ms <= 0) return (/* @__PURE__ */ new Date(0)).toISOString();
1618
+ return new Date(ms).toISOString();
1619
+ }
1620
+ function padSpanId3(id) {
1621
+ const cleaned = id.replace(/[^a-f0-9]/gi, "").toLowerCase();
1622
+ if (cleaned.length >= 16) return cleaned.slice(0, 16);
1623
+ return foldTo16Hex(id);
1624
+ }
1625
+ function padTraceId2(id) {
1626
+ const cleaned = id.replace(/[^a-f0-9]/gi, "").toLowerCase();
1627
+ if (cleaned.length >= 32) return cleaned.slice(0, 32);
1628
+ return foldTo32Hex(id);
1629
+ }
1630
+ function foldTo16Hex(s) {
1631
+ let h1 = 2166136261;
1632
+ for (const ch of s) {
1633
+ h1 ^= ch.charCodeAt(0);
1634
+ h1 = Math.imul(h1, 16777619) >>> 0;
1635
+ }
1636
+ const part = h1.toString(16).padStart(8, "0");
1637
+ return (part + part).slice(0, 16);
1638
+ }
1639
+ function foldTo32Hex(s) {
1640
+ return foldTo16Hex(s) + foldTo16Hex(`${s}::trace`).slice(0, 16);
1641
+ }
1642
+ function safeStringify(value) {
1643
+ try {
1644
+ return typeof value === "string" ? value : JSON.stringify(value);
1645
+ } catch {
1646
+ return String(value);
1647
+ }
1648
+ }
1649
+
1326
1650
  // src/replay.ts
1327
1651
  var ReplayCacheMissError = class extends ReplayError {
1328
1652
  constructor(url, requestKey2, message) {
@@ -1488,9 +1812,10 @@ export {
1488
1812
  otelRunCompleteHook,
1489
1813
  createOtelTracingStore,
1490
1814
  createOtelExporter,
1815
+ convertTraceStoresToOtlp,
1491
1816
  ReplayCacheMissError,
1492
1817
  ReplayCache,
1493
1818
  createReplayFetch,
1494
1819
  iterateRawCalls
1495
1820
  };
1496
- //# sourceMappingURL=chunk-JHA3ZGSO.js.map
1821
+ //# sourceMappingURL=chunk-XGNCBAVZ.js.map