@tangle-network/agent-eval 0.72.3 → 0.73.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/campaign/index.d.ts +34 -1
- package/dist/campaign/index.js +75 -0
- package/dist/campaign/index.js.map +1 -1
- package/dist/{chunk-JHA3ZGSO.js → chunk-XGNCBAVZ.js} +326 -1
- package/dist/chunk-XGNCBAVZ.js.map +1 -0
- package/dist/index.d.ts +1 -1
- package/dist/index.js +3 -1
- package/dist/index.js.map +1 -1
- package/dist/openapi.json +1 -1
- package/dist/traces.d.ts +75 -2
- package/dist/traces.js +3 -1
- package/package.json +1 -1
- package/dist/chunk-JHA3ZGSO.js.map +0 -1
|
@@ -1323,6 +1323,330 @@ function padTraceId(id) {
|
|
|
1323
1323
|
return cleaned.slice(0, 32).padEnd(32, "0");
|
|
1324
1324
|
}
|
|
1325
1325
|
|
|
1326
|
+
// src/trace/store-to-otlp.ts
|
|
1327
|
+
import { readdirSync, readFileSync, statSync, writeFileSync } from "fs";
|
|
1328
|
+
import { join } from "path";
|
|
1329
|
+
function convertTraceStoresToOtlp(source, outPath, opts = {}) {
|
|
1330
|
+
const sources = Array.isArray(source) ? [...source] : typeof source === "string" ? [{ root: source, layout: "celled" }] : [source];
|
|
1331
|
+
const defaultServiceName = opts.serviceName ?? "agent-eval";
|
|
1332
|
+
const resourceAttributes = opts.resourceAttributes ?? (() => ({}));
|
|
1333
|
+
const runAttributes = opts.runAttributes ?? (() => ({}));
|
|
1334
|
+
const lines = [];
|
|
1335
|
+
let spanCount = 0;
|
|
1336
|
+
let runCount = 0;
|
|
1337
|
+
let cellCount = 0;
|
|
1338
|
+
let cellErrorCount = 0;
|
|
1339
|
+
for (const src of sources) {
|
|
1340
|
+
const serviceName = src.serviceName ?? defaultServiceName;
|
|
1341
|
+
const cellDirs = src.layout === "flat" ? [{ label: "<root>", dir: src.root }] : listCells(src.root).map((name) => ({ label: name, dir: join(src.root, name) }));
|
|
1342
|
+
for (const cell of cellDirs) {
|
|
1343
|
+
try {
|
|
1344
|
+
const result = projectCell({
|
|
1345
|
+
cellDir: cell.dir,
|
|
1346
|
+
serviceName,
|
|
1347
|
+
resourceAttributes,
|
|
1348
|
+
runAttributes
|
|
1349
|
+
});
|
|
1350
|
+
for (const line of result.lines) lines.push(line);
|
|
1351
|
+
spanCount += result.spanCount;
|
|
1352
|
+
runCount += result.runCount;
|
|
1353
|
+
cellCount += 1;
|
|
1354
|
+
} catch (err) {
|
|
1355
|
+
console.warn(
|
|
1356
|
+
`[traces-to-otlp] cell ${cell.label} (${cell.dir}) skipped: ${err instanceof Error ? err.message : String(err)}`
|
|
1357
|
+
);
|
|
1358
|
+
cellErrorCount += 1;
|
|
1359
|
+
}
|
|
1360
|
+
}
|
|
1361
|
+
}
|
|
1362
|
+
writeFileSync(outPath, lines.join("\n") + (lines.length > 0 ? "\n" : ""));
|
|
1363
|
+
return { spanCount, runCount, cellCount, cellErrorCount };
|
|
1364
|
+
}
|
|
1365
|
+
function projectCell(args) {
|
|
1366
|
+
const { cellDir, serviceName, resourceAttributes, runAttributes } = args;
|
|
1367
|
+
const lines = [];
|
|
1368
|
+
let runCount = 0;
|
|
1369
|
+
let spanCount = 0;
|
|
1370
|
+
const runs = readMergedShards(cellDir, "runs", "runId");
|
|
1371
|
+
const spans = readMergedShards(cellDir, "spans", "spanId");
|
|
1372
|
+
const events = readShards(cellDir, "events");
|
|
1373
|
+
const runByRunId = /* @__PURE__ */ new Map();
|
|
1374
|
+
for (const r of runs) runByRunId.set(r.runId, r);
|
|
1375
|
+
const spanBySpanId = /* @__PURE__ */ new Map();
|
|
1376
|
+
for (const s of spans) spanBySpanId.set(s.spanId, s);
|
|
1377
|
+
const eventsBySpanId = /* @__PURE__ */ new Map();
|
|
1378
|
+
for (const e of events) {
|
|
1379
|
+
if (e.kind === "state_mutation" && e.payload && typeof e.payload === "object") {
|
|
1380
|
+
const entity = e.payload.entity;
|
|
1381
|
+
if (entity === "run") {
|
|
1382
|
+
const run = e.payload.run;
|
|
1383
|
+
if (run?.runId) runByRunId.set(run.runId, run);
|
|
1384
|
+
continue;
|
|
1385
|
+
}
|
|
1386
|
+
if (entity === "run.update") {
|
|
1387
|
+
const patch = e.payload.patch;
|
|
1388
|
+
if (patch && e.runId) {
|
|
1389
|
+
const prior = runByRunId.get(e.runId);
|
|
1390
|
+
if (prior) runByRunId.set(e.runId, { ...prior, ...patch });
|
|
1391
|
+
}
|
|
1392
|
+
continue;
|
|
1393
|
+
}
|
|
1394
|
+
if (entity === "span") {
|
|
1395
|
+
const span = e.payload.span;
|
|
1396
|
+
if (span?.spanId) spanBySpanId.set(span.spanId, span);
|
|
1397
|
+
continue;
|
|
1398
|
+
}
|
|
1399
|
+
if (entity === "span.update") {
|
|
1400
|
+
const spanId = e.payload.spanId;
|
|
1401
|
+
const patch = e.payload.patch;
|
|
1402
|
+
if (spanId && patch) {
|
|
1403
|
+
const prior = spanBySpanId.get(spanId);
|
|
1404
|
+
if (prior) spanBySpanId.set(spanId, { ...prior, ...patch });
|
|
1405
|
+
}
|
|
1406
|
+
continue;
|
|
1407
|
+
}
|
|
1408
|
+
}
|
|
1409
|
+
if (!e.spanId) continue;
|
|
1410
|
+
const arr = eventsBySpanId.get(e.spanId) ?? [];
|
|
1411
|
+
arr.push(e);
|
|
1412
|
+
eventsBySpanId.set(e.spanId, arr);
|
|
1413
|
+
}
|
|
1414
|
+
for (const run of runByRunId.values()) {
|
|
1415
|
+
const traceId = padTraceId2(run.runId);
|
|
1416
|
+
const agentName = run.variantId ?? run.scenarioId;
|
|
1417
|
+
const sharedResource = {
|
|
1418
|
+
attributes: {
|
|
1419
|
+
"service.name": serviceName,
|
|
1420
|
+
"agent.name": agentName,
|
|
1421
|
+
"run.id": run.runId,
|
|
1422
|
+
"run.status": run.status,
|
|
1423
|
+
...resourceAttributes(run)
|
|
1424
|
+
}
|
|
1425
|
+
};
|
|
1426
|
+
const runSpanId = padSpanId3(`run-${run.runId}`);
|
|
1427
|
+
const runStart = msToIso(run.startedAt);
|
|
1428
|
+
const runEnd = msToIso(run.endedAt ?? run.startedAt);
|
|
1429
|
+
const runStatus = run.outcome?.failureClass && run.outcome.failureClass !== "success" ? "STATUS_CODE_ERROR" : "STATUS_CODE_OK";
|
|
1430
|
+
const runAttrs = {
|
|
1431
|
+
"openinference.span.kind": "AGENT",
|
|
1432
|
+
"agent.name": agentName,
|
|
1433
|
+
"agent.workflow.name": serviceName,
|
|
1434
|
+
...runAttributes(run)
|
|
1435
|
+
};
|
|
1436
|
+
lines.push(
|
|
1437
|
+
JSON.stringify(
|
|
1438
|
+
toLine({
|
|
1439
|
+
traceId,
|
|
1440
|
+
spanId: runSpanId,
|
|
1441
|
+
parentSpanId: "",
|
|
1442
|
+
name: `run.${agentName}`,
|
|
1443
|
+
kind: "SPAN_KIND_INTERNAL",
|
|
1444
|
+
startTime: runStart,
|
|
1445
|
+
endTime: runEnd,
|
|
1446
|
+
statusCode: runStatus,
|
|
1447
|
+
statusMessage: run.outcome?.notes ?? "",
|
|
1448
|
+
resource: sharedResource,
|
|
1449
|
+
attributes: runAttrs
|
|
1450
|
+
})
|
|
1451
|
+
)
|
|
1452
|
+
);
|
|
1453
|
+
runCount += 1;
|
|
1454
|
+
for (const span of spanBySpanId.values()) {
|
|
1455
|
+
if (span.runId !== run.runId) continue;
|
|
1456
|
+
const spanAttrs = spanToAttributes(span, eventsBySpanId.get(span.spanId) ?? []);
|
|
1457
|
+
const statusCode = span.status === "error" ? "STATUS_CODE_ERROR" : "STATUS_CODE_OK";
|
|
1458
|
+
lines.push(
|
|
1459
|
+
JSON.stringify(
|
|
1460
|
+
toLine({
|
|
1461
|
+
traceId,
|
|
1462
|
+
spanId: padSpanId3(span.spanId),
|
|
1463
|
+
parentSpanId: span.parentSpanId ? padSpanId3(span.parentSpanId) : runSpanId,
|
|
1464
|
+
name: span.name,
|
|
1465
|
+
kind: spanKindToOtlpKind(span.kind),
|
|
1466
|
+
startTime: msToIso(span.startedAt),
|
|
1467
|
+
endTime: msToIso(span.endedAt ?? span.startedAt),
|
|
1468
|
+
statusCode,
|
|
1469
|
+
statusMessage: span.error ?? "",
|
|
1470
|
+
resource: sharedResource,
|
|
1471
|
+
attributes: spanAttrs
|
|
1472
|
+
})
|
|
1473
|
+
)
|
|
1474
|
+
);
|
|
1475
|
+
spanCount += 1;
|
|
1476
|
+
}
|
|
1477
|
+
}
|
|
1478
|
+
return { lines, runCount, spanCount };
|
|
1479
|
+
}
|
|
1480
|
+
function listCells(root) {
|
|
1481
|
+
try {
|
|
1482
|
+
return readdirSync(root, { withFileTypes: true }).filter((d) => d.isDirectory()).map((d) => d.name).sort();
|
|
1483
|
+
} catch {
|
|
1484
|
+
return [];
|
|
1485
|
+
}
|
|
1486
|
+
}
|
|
1487
|
+
function readShards(cellDir, name) {
|
|
1488
|
+
let entries;
|
|
1489
|
+
try {
|
|
1490
|
+
entries = readdirSync(cellDir);
|
|
1491
|
+
} catch {
|
|
1492
|
+
return [];
|
|
1493
|
+
}
|
|
1494
|
+
const shards = entries.filter((f) => (f === `${name}.ndjson` || f.startsWith(`${name}.`)) && f.endsWith(".ndjson")).map((f) => ({ file: f, path: join(cellDir, f) })).map((s) => {
|
|
1495
|
+
let mtime = 0;
|
|
1496
|
+
try {
|
|
1497
|
+
mtime = statSync(s.path).mtimeMs;
|
|
1498
|
+
} catch {
|
|
1499
|
+
}
|
|
1500
|
+
return { ...s, mtime };
|
|
1501
|
+
}).sort((a, b) => a.mtime - b.mtime || a.file.localeCompare(b.file));
|
|
1502
|
+
const rows = [];
|
|
1503
|
+
for (const shard of shards) {
|
|
1504
|
+
let text;
|
|
1505
|
+
try {
|
|
1506
|
+
text = readFileSync(shard.path, "utf-8");
|
|
1507
|
+
} catch {
|
|
1508
|
+
continue;
|
|
1509
|
+
}
|
|
1510
|
+
for (const line of text.split("\n")) {
|
|
1511
|
+
const trimmed = line.trim();
|
|
1512
|
+
if (!trimmed) continue;
|
|
1513
|
+
try {
|
|
1514
|
+
rows.push(JSON.parse(trimmed));
|
|
1515
|
+
} catch {
|
|
1516
|
+
}
|
|
1517
|
+
}
|
|
1518
|
+
}
|
|
1519
|
+
return rows;
|
|
1520
|
+
}
|
|
1521
|
+
function readMergedShards(cellDir, name, idKey) {
|
|
1522
|
+
const rows = readShards(cellDir, name);
|
|
1523
|
+
const byId = /* @__PURE__ */ new Map();
|
|
1524
|
+
for (const row of rows) {
|
|
1525
|
+
const id = row[idKey];
|
|
1526
|
+
if (!id) continue;
|
|
1527
|
+
const prior = byId.get(id);
|
|
1528
|
+
if (prior && row._update) {
|
|
1529
|
+
byId.set(id, { ...prior, ...row, _update: void 0 });
|
|
1530
|
+
} else {
|
|
1531
|
+
byId.set(id, row);
|
|
1532
|
+
}
|
|
1533
|
+
}
|
|
1534
|
+
return [...byId.values()];
|
|
1535
|
+
}
|
|
1536
|
+
function spanToAttributes(span, events) {
|
|
1537
|
+
const attrs = {
|
|
1538
|
+
"openinference.span.kind": spanKindToOpenInferenceKind(span.kind)
|
|
1539
|
+
};
|
|
1540
|
+
if (span.kind === "llm") {
|
|
1541
|
+
attrs["llm.model_name"] = span.model;
|
|
1542
|
+
if (span.inputTokens !== void 0) attrs["llm.token_count.prompt"] = span.inputTokens;
|
|
1543
|
+
if (span.outputTokens !== void 0) attrs["llm.token_count.completion"] = span.outputTokens;
|
|
1544
|
+
if (span.costUsd !== void 0) attrs["llm.cost_usd"] = span.costUsd;
|
|
1545
|
+
if (span.finishReason) attrs["llm.finish_reason"] = span.finishReason;
|
|
1546
|
+
if (Array.isArray(span.messages)) {
|
|
1547
|
+
attrs["llm.input_messages"] = JSON.stringify(span.messages.slice(-6));
|
|
1548
|
+
}
|
|
1549
|
+
if (typeof span.output === "string") {
|
|
1550
|
+
attrs["llm.output_messages"] = JSON.stringify([{ role: "assistant", content: span.output }]);
|
|
1551
|
+
}
|
|
1552
|
+
} else if (span.kind === "tool") {
|
|
1553
|
+
attrs["tool.name"] = span.toolName;
|
|
1554
|
+
if (span.latencyMs !== void 0) attrs["tool.latency_ms"] = span.latencyMs;
|
|
1555
|
+
attrs["input.value"] = safeStringify(span.args);
|
|
1556
|
+
if (span.result !== void 0) attrs["output.value"] = safeStringify(span.result);
|
|
1557
|
+
} else if (span.kind === "judge") {
|
|
1558
|
+
attrs["judge.id"] = span.judgeId;
|
|
1559
|
+
attrs["judge.dimension"] = span.dimension;
|
|
1560
|
+
attrs["judge.score"] = span.score;
|
|
1561
|
+
attrs["judge.target_span_id"] = span.targetSpanId;
|
|
1562
|
+
}
|
|
1563
|
+
if (span.attributes) {
|
|
1564
|
+
for (const [k, v] of Object.entries(span.attributes)) {
|
|
1565
|
+
attrs[`agent_eval.${k}`] = v;
|
|
1566
|
+
}
|
|
1567
|
+
}
|
|
1568
|
+
if (events.length > 0) {
|
|
1569
|
+
attrs["agent_eval.event_count"] = events.length;
|
|
1570
|
+
attrs["agent_eval.event_kinds"] = JSON.stringify(events.map((e) => e.kind));
|
|
1571
|
+
}
|
|
1572
|
+
return attrs;
|
|
1573
|
+
}
|
|
1574
|
+
function spanKindToOtlpKind(kind) {
|
|
1575
|
+
switch (kind) {
|
|
1576
|
+
case "llm":
|
|
1577
|
+
return "SPAN_KIND_CLIENT";
|
|
1578
|
+
case "retrieval":
|
|
1579
|
+
return "SPAN_KIND_CLIENT";
|
|
1580
|
+
default:
|
|
1581
|
+
return "SPAN_KIND_INTERNAL";
|
|
1582
|
+
}
|
|
1583
|
+
}
|
|
1584
|
+
function spanKindToOpenInferenceKind(kind) {
|
|
1585
|
+
switch (kind) {
|
|
1586
|
+
case "llm":
|
|
1587
|
+
return "LLM";
|
|
1588
|
+
case "tool":
|
|
1589
|
+
return "TOOL";
|
|
1590
|
+
case "retrieval":
|
|
1591
|
+
return "CHAIN";
|
|
1592
|
+
case "judge":
|
|
1593
|
+
return "GUARDRAIL";
|
|
1594
|
+
case "sandbox":
|
|
1595
|
+
return "CHAIN";
|
|
1596
|
+
case "agent":
|
|
1597
|
+
return "AGENT";
|
|
1598
|
+
default:
|
|
1599
|
+
return "SPAN";
|
|
1600
|
+
}
|
|
1601
|
+
}
|
|
1602
|
+
function toLine(args) {
|
|
1603
|
+
return {
|
|
1604
|
+
trace_id: args.traceId,
|
|
1605
|
+
span_id: args.spanId,
|
|
1606
|
+
parent_span_id: args.parentSpanId,
|
|
1607
|
+
name: args.name,
|
|
1608
|
+
kind: args.kind,
|
|
1609
|
+
start_time: args.startTime,
|
|
1610
|
+
end_time: args.endTime,
|
|
1611
|
+
status: { code: args.statusCode, message: args.statusMessage },
|
|
1612
|
+
resource: args.resource,
|
|
1613
|
+
attributes: args.attributes
|
|
1614
|
+
};
|
|
1615
|
+
}
|
|
1616
|
+
function msToIso(ms) {
|
|
1617
|
+
if (!Number.isFinite(ms) || ms <= 0) return (/* @__PURE__ */ new Date(0)).toISOString();
|
|
1618
|
+
return new Date(ms).toISOString();
|
|
1619
|
+
}
|
|
1620
|
+
function padSpanId3(id) {
|
|
1621
|
+
const cleaned = id.replace(/[^a-f0-9]/gi, "").toLowerCase();
|
|
1622
|
+
if (cleaned.length >= 16) return cleaned.slice(0, 16);
|
|
1623
|
+
return foldTo16Hex(id);
|
|
1624
|
+
}
|
|
1625
|
+
function padTraceId2(id) {
|
|
1626
|
+
const cleaned = id.replace(/[^a-f0-9]/gi, "").toLowerCase();
|
|
1627
|
+
if (cleaned.length >= 32) return cleaned.slice(0, 32);
|
|
1628
|
+
return foldTo32Hex(id);
|
|
1629
|
+
}
|
|
1630
|
+
function foldTo16Hex(s) {
|
|
1631
|
+
let h1 = 2166136261;
|
|
1632
|
+
for (const ch of s) {
|
|
1633
|
+
h1 ^= ch.charCodeAt(0);
|
|
1634
|
+
h1 = Math.imul(h1, 16777619) >>> 0;
|
|
1635
|
+
}
|
|
1636
|
+
const part = h1.toString(16).padStart(8, "0");
|
|
1637
|
+
return (part + part).slice(0, 16);
|
|
1638
|
+
}
|
|
1639
|
+
function foldTo32Hex(s) {
|
|
1640
|
+
return foldTo16Hex(s) + foldTo16Hex(`${s}::trace`).slice(0, 16);
|
|
1641
|
+
}
|
|
1642
|
+
function safeStringify(value) {
|
|
1643
|
+
try {
|
|
1644
|
+
return typeof value === "string" ? value : JSON.stringify(value);
|
|
1645
|
+
} catch {
|
|
1646
|
+
return String(value);
|
|
1647
|
+
}
|
|
1648
|
+
}
|
|
1649
|
+
|
|
1326
1650
|
// src/replay.ts
|
|
1327
1651
|
var ReplayCacheMissError = class extends ReplayError {
|
|
1328
1652
|
constructor(url, requestKey2, message) {
|
|
@@ -1488,9 +1812,10 @@ export {
|
|
|
1488
1812
|
otelRunCompleteHook,
|
|
1489
1813
|
createOtelTracingStore,
|
|
1490
1814
|
createOtelExporter,
|
|
1815
|
+
convertTraceStoresToOtlp,
|
|
1491
1816
|
ReplayCacheMissError,
|
|
1492
1817
|
ReplayCache,
|
|
1493
1818
|
createReplayFetch,
|
|
1494
1819
|
iterateRawCalls
|
|
1495
1820
|
};
|
|
1496
|
-
//# sourceMappingURL=chunk-
|
|
1821
|
+
//# sourceMappingURL=chunk-XGNCBAVZ.js.map
|