@ramarivera/coding-agent-langfuse 0.1.27 → 0.1.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/backfill.d.ts +6 -1
- package/dist/backfill.js +151 -25
- package/package.json +1 -1
package/dist/backfill.d.ts
CHANGED
|
@@ -40,6 +40,9 @@ type BackfillOptions = {
|
|
|
40
40
|
sinceMs?: number;
|
|
41
41
|
untilMs?: number;
|
|
42
42
|
batchSize: number;
|
|
43
|
+
maxRequestBytes: number;
|
|
44
|
+
maxFieldBytes: number;
|
|
45
|
+
postDelayMs: number;
|
|
43
46
|
};
|
|
44
47
|
type RunSummary = {
|
|
45
48
|
discovered: Record<string, number>;
|
|
@@ -68,7 +71,9 @@ declare function opencodeEvents(homeDir: string, options?: {
|
|
|
68
71
|
untilMs?: number;
|
|
69
72
|
}): BackfillEvent[];
|
|
70
73
|
declare function fingerprint(event: BackfillEvent): string;
|
|
71
|
-
declare function toOtlp(events: BackfillEvent[]
|
|
74
|
+
declare function toOtlp(events: BackfillEvent[], options?: {
|
|
75
|
+
maxFieldBytes?: number;
|
|
76
|
+
}): Record<string, unknown>;
|
|
72
77
|
declare function discoverEvents(options: BackfillOptions): BackfillEvent[];
|
|
73
78
|
declare function run(options: BackfillOptions): Promise<RunSummary>;
|
|
74
79
|
declare function follow(options: BackfillOptions): Promise<FollowSummary>;
|
package/dist/backfill.js
CHANGED
|
@@ -15,6 +15,8 @@ const importIdentityVersions = {
|
|
|
15
15
|
};
|
|
16
16
|
const defaultEndpoint = "https://langfuse.ai.roxasroot.net/otel/v1/traces";
|
|
17
17
|
const deadRemoteEndpoint = "http://langfuse.ai.roxasroot.net:14318/v1/traces";
|
|
18
|
+
const defaultMaxRequestBytes = 12 * 1024 * 1024;
|
|
19
|
+
const defaultMaxFieldBytes = 512 * 1024;
|
|
18
20
|
const defaultStatePath = join(homedir(), ".local/state/coding-agent-langfuse/backfill-v6.json");
|
|
19
21
|
const currentHost = hostname();
|
|
20
22
|
function usage() {
|
|
@@ -29,6 +31,9 @@ Options:
|
|
|
29
31
|
--until ISO_OR_MS Only import events before or at this timestamp
|
|
30
32
|
--limit N Stop after N unsent events
|
|
31
33
|
--batch-size N OTLP spans per POST (default: 50)
|
|
34
|
+
--max-request-bytes N Split OTLP POSTs below this JSON byte size (default: ${defaultMaxRequestBytes})
|
|
35
|
+
--max-field-bytes N Truncate individual input/output fields above this byte size (default: ${defaultMaxFieldBytes})
|
|
36
|
+
--post-delay-ms N Delay after each successful OTLP POST (default: 0)
|
|
32
37
|
--follow Keep scanning and sending newly written events
|
|
33
38
|
--poll-interval-ms N Delay between --follow scans (default: 5000)
|
|
34
39
|
--idle-exit-after-ms N Stop --follow after this much time without new sends
|
|
@@ -45,6 +50,15 @@ function parseArgs(argv) {
|
|
|
45
50
|
let sinceMs;
|
|
46
51
|
let untilMs;
|
|
47
52
|
let batchSize = 50;
|
|
53
|
+
let maxRequestBytes = Number.parseInt(process.env.LANGFUSE_BACKFILL_MAX_REQUEST_BYTES ?? "", 10);
|
|
54
|
+
if (!Number.isFinite(maxRequestBytes))
|
|
55
|
+
maxRequestBytes = defaultMaxRequestBytes;
|
|
56
|
+
let maxFieldBytes = Number.parseInt(process.env.LANGFUSE_BACKFILL_MAX_FIELD_BYTES ?? "", 10);
|
|
57
|
+
if (!Number.isFinite(maxFieldBytes))
|
|
58
|
+
maxFieldBytes = defaultMaxFieldBytes;
|
|
59
|
+
let postDelayMs = Number.parseInt(process.env.LANGFUSE_BACKFILL_POST_DELAY_MS ?? "", 10);
|
|
60
|
+
if (!Number.isFinite(postDelayMs))
|
|
61
|
+
postDelayMs = 0;
|
|
48
62
|
let follow = false;
|
|
49
63
|
let pollIntervalMs = 5_000;
|
|
50
64
|
let idleExitAfterMs;
|
|
@@ -89,6 +103,15 @@ function parseArgs(argv) {
|
|
|
89
103
|
else if (arg === "--batch-size") {
|
|
90
104
|
batchSize = Number.parseInt(next(), 10);
|
|
91
105
|
}
|
|
106
|
+
else if (arg === "--max-request-bytes") {
|
|
107
|
+
maxRequestBytes = Number.parseInt(next(), 10);
|
|
108
|
+
}
|
|
109
|
+
else if (arg === "--max-field-bytes") {
|
|
110
|
+
maxFieldBytes = Number.parseInt(next(), 10);
|
|
111
|
+
}
|
|
112
|
+
else if (arg === "--post-delay-ms") {
|
|
113
|
+
postDelayMs = Number.parseInt(next(), 10);
|
|
114
|
+
}
|
|
92
115
|
else if (arg === "--follow") {
|
|
93
116
|
follow = true;
|
|
94
117
|
}
|
|
@@ -111,6 +134,15 @@ function parseArgs(argv) {
|
|
|
111
134
|
if (!Number.isFinite(batchSize) || batchSize < 1) {
|
|
112
135
|
throw new Error("--batch-size must be a positive integer");
|
|
113
136
|
}
|
|
137
|
+
if (!Number.isFinite(maxRequestBytes) || maxRequestBytes < 1024) {
|
|
138
|
+
throw new Error("--max-request-bytes must be an integer >= 1024");
|
|
139
|
+
}
|
|
140
|
+
if (!Number.isFinite(maxFieldBytes) || maxFieldBytes < 1024) {
|
|
141
|
+
throw new Error("--max-field-bytes must be an integer >= 1024");
|
|
142
|
+
}
|
|
143
|
+
if (!Number.isFinite(postDelayMs) || postDelayMs < 0) {
|
|
144
|
+
throw new Error("--post-delay-ms must be a non-negative integer");
|
|
145
|
+
}
|
|
114
146
|
if (!Number.isFinite(pollIntervalMs) || pollIntervalMs < 1) {
|
|
115
147
|
throw new Error("--poll-interval-ms must be a positive integer");
|
|
116
148
|
}
|
|
@@ -134,6 +166,9 @@ function parseArgs(argv) {
|
|
|
134
166
|
sinceMs,
|
|
135
167
|
untilMs,
|
|
136
168
|
batchSize,
|
|
169
|
+
maxRequestBytes,
|
|
170
|
+
maxFieldBytes,
|
|
171
|
+
postDelayMs,
|
|
137
172
|
};
|
|
138
173
|
}
|
|
139
174
|
function normalizeEndpoint(endpoint) {
|
|
@@ -1124,9 +1159,40 @@ function attr(key, value) {
|
|
|
1124
1159
|
return { key, value: { stringValue: value } };
|
|
1125
1160
|
return { key, value: { stringValue: JSON.stringify(value).slice(0, 8000) } };
|
|
1126
1161
|
}
|
|
1127
|
-
function
|
|
1162
|
+
function utf8Bytes(value) {
|
|
1163
|
+
return Buffer.byteLength(value, "utf8");
|
|
1164
|
+
}
|
|
1165
|
+
function truncateStringByBytes(value, maxBytes) {
|
|
1166
|
+
if (utf8Bytes(value) <= maxBytes)
|
|
1167
|
+
return value;
|
|
1168
|
+
let out = value.slice(0, maxBytes);
|
|
1169
|
+
while (utf8Bytes(out) > maxBytes && out.length > 0) {
|
|
1170
|
+
out = out.slice(0, -1);
|
|
1171
|
+
}
|
|
1172
|
+
return `${out}\n[truncated by coding-agent-langfuse: original_bytes=${utf8Bytes(value)} limit_bytes=${maxBytes}]`;
|
|
1173
|
+
}
|
|
1174
|
+
function limitValueBytes(value, maxBytes) {
|
|
1175
|
+
if (value === undefined || value === null)
|
|
1176
|
+
return value;
|
|
1177
|
+
if (typeof value === "string")
|
|
1178
|
+
return truncateStringByBytes(value, maxBytes);
|
|
1179
|
+
const json = JSON.stringify(value);
|
|
1180
|
+
if (utf8Bytes(json) <= maxBytes)
|
|
1181
|
+
return value;
|
|
1182
|
+
return truncateStringByBytes(json, maxBytes);
|
|
1183
|
+
}
|
|
1184
|
+
function limitEventPayload(event, maxFieldBytes) {
|
|
1185
|
+
return {
|
|
1186
|
+
...event,
|
|
1187
|
+
input: limitValueBytes(event.input, maxFieldBytes),
|
|
1188
|
+
output: limitValueBytes(event.output, maxFieldBytes),
|
|
1189
|
+
};
|
|
1190
|
+
}
|
|
1191
|
+
function toOtlp(events, options = {}) {
|
|
1192
|
+
const maxFieldBytes = options.maxFieldBytes ?? defaultMaxFieldBytes;
|
|
1128
1193
|
const spansByTrace = new Map();
|
|
1129
|
-
for (const
|
|
1194
|
+
for (const rawEvent of events) {
|
|
1195
|
+
const event = limitEventPayload(rawEvent, maxFieldBytes);
|
|
1130
1196
|
const id = traceId(event);
|
|
1131
1197
|
spansByTrace.set(id, [...(spansByTrace.get(id) ?? []), event]);
|
|
1132
1198
|
}
|
|
@@ -1137,10 +1203,7 @@ function toOtlp(events) {
|
|
|
1137
1203
|
const sortedEvents = [...traceEventsForSession].sort((a, b) => a.startMs - b.startMs);
|
|
1138
1204
|
const traceStartMs = sortedEvents[0]?.startMs ?? Date.now();
|
|
1139
1205
|
const traceEndMs = Math.max(...sortedEvents.map((event) => event.endMs ?? event.startMs + 1), traceStartMs + 1);
|
|
1140
|
-
const
|
|
1141
|
-
const lastOutputEvent = [...sortedEvents]
|
|
1142
|
-
.reverse()
|
|
1143
|
-
.find((event) => event.output !== undefined);
|
|
1206
|
+
const shouldEmitRootSpan = sortedEvents.some((event) => event.recordId === "session");
|
|
1144
1207
|
const rootAttributes = [
|
|
1145
1208
|
attr("service.name", `agent.${first.agent}`),
|
|
1146
1209
|
attr("deployment.environment", "local"),
|
|
@@ -1160,8 +1223,6 @@ function toOtlp(events) {
|
|
|
1160
1223
|
attr("langfuse.trace.metadata.machine", currentHost),
|
|
1161
1224
|
attr("langfuse.trace.metadata.source_path", first.sourcePath),
|
|
1162
1225
|
attr("langfuse.trace.metadata.cwd", first.cwd),
|
|
1163
|
-
attr("langfuse.trace.input", firstInputEvent?.input),
|
|
1164
|
-
attr("langfuse.trace.output", lastOutputEvent?.output),
|
|
1165
1226
|
attr("langfuse.observation.metadata.agent", first.agent),
|
|
1166
1227
|
attr("langfuse.observation.metadata.host", currentHost),
|
|
1167
1228
|
attr("langfuse.observation.metadata.machine", currentHost),
|
|
@@ -1220,13 +1281,6 @@ function toOtlp(events) {
|
|
|
1220
1281
|
attr("agent.record_id", event.recordId),
|
|
1221
1282
|
attr("agent.original_start_time", new Date(event.startMs).toISOString()),
|
|
1222
1283
|
attr("agent.original_end_time", event.endMs === undefined ? undefined : new Date(event.endMs).toISOString()),
|
|
1223
|
-
attr("langfuse.trace.metadata.agent", event.agent),
|
|
1224
|
-
attr("langfuse.trace.metadata.host", currentHost),
|
|
1225
|
-
attr("langfuse.trace.metadata.machine", currentHost),
|
|
1226
|
-
attr("langfuse.trace.metadata.source_path", event.sourcePath),
|
|
1227
|
-
attr("langfuse.trace.metadata.cwd", event.cwd),
|
|
1228
|
-
attr("langfuse.trace.metadata.model", event.model),
|
|
1229
|
-
attr("langfuse.trace.metadata.provider", event.provider),
|
|
1230
1284
|
attr("langfuse.observation.metadata.agent", event.agent),
|
|
1231
1285
|
attr("langfuse.observation.metadata.host", currentHost),
|
|
1232
1286
|
attr("langfuse.observation.metadata.machine", currentHost),
|
|
@@ -1237,8 +1291,6 @@ function toOtlp(events) {
|
|
|
1237
1291
|
attr("langfuse.observation.metadata.model", modelName ?? event.model),
|
|
1238
1292
|
attr("langfuse.observation.metadata.provider", event.provider),
|
|
1239
1293
|
attr("langfuse.observation.metadata.cost_source", cost?.source),
|
|
1240
|
-
attr("langfuse.trace.input", event.input),
|
|
1241
|
-
attr("langfuse.trace.output", event.output),
|
|
1242
1294
|
attr("langfuse.observation.input", event.input),
|
|
1243
1295
|
attr("langfuse.observation.output", event.output),
|
|
1244
1296
|
attr("source.path", event.sourcePath),
|
|
@@ -1265,7 +1317,7 @@ function toOtlp(events) {
|
|
|
1265
1317
|
status: { code: 1 },
|
|
1266
1318
|
};
|
|
1267
1319
|
});
|
|
1268
|
-
return [rootSpan, ...childSpans];
|
|
1320
|
+
return shouldEmitRootSpan ? [rootSpan, ...childSpans] : childSpans;
|
|
1269
1321
|
});
|
|
1270
1322
|
return {
|
|
1271
1323
|
resourceSpans: [
|
|
@@ -1292,13 +1344,55 @@ function toOtlp(events) {
|
|
|
1292
1344
|
],
|
|
1293
1345
|
};
|
|
1294
1346
|
}
|
|
1295
|
-
|
|
1347
|
+
function otlpBodyBytes(events, options) {
|
|
1348
|
+
return utf8Bytes(JSON.stringify(toOtlp(events, options)));
|
|
1349
|
+
}
|
|
1350
|
+
function splitSendBatches(events, options) {
|
|
1351
|
+
const batches = [];
|
|
1352
|
+
let current = [];
|
|
1353
|
+
let currentBytes = 0;
|
|
1354
|
+
const flush = () => {
|
|
1355
|
+
if (current.length === 0)
|
|
1356
|
+
return;
|
|
1357
|
+
batches.push({ events: current, bytes: currentBytes });
|
|
1358
|
+
current = [];
|
|
1359
|
+
currentBytes = 0;
|
|
1360
|
+
};
|
|
1361
|
+
for (const event of events) {
|
|
1362
|
+
const singleBytes = otlpBodyBytes([event], options);
|
|
1363
|
+
if (singleBytes > options.maxRequestBytes) {
|
|
1364
|
+
flush();
|
|
1365
|
+
throw new Error(`Single OTLP event ${event.agent}/${event.sessionId}/${event.recordId} is ${singleBytes} bytes after field truncation, above --max-request-bytes ${options.maxRequestBytes}. Lower --max-field-bytes and rerun.`);
|
|
1366
|
+
}
|
|
1367
|
+
if (current.length === 0) {
|
|
1368
|
+
current = [event];
|
|
1369
|
+
currentBytes = singleBytes;
|
|
1370
|
+
continue;
|
|
1371
|
+
}
|
|
1372
|
+
const nextEvents = [...current, event];
|
|
1373
|
+
const nextBytes = otlpBodyBytes(nextEvents, options);
|
|
1374
|
+
if (current.length >= options.batchSize ||
|
|
1375
|
+
nextBytes > options.maxRequestBytes) {
|
|
1376
|
+
flush();
|
|
1377
|
+
current = [event];
|
|
1378
|
+
currentBytes = singleBytes;
|
|
1379
|
+
}
|
|
1380
|
+
else {
|
|
1381
|
+
current = nextEvents;
|
|
1382
|
+
currentBytes = nextBytes;
|
|
1383
|
+
}
|
|
1384
|
+
}
|
|
1385
|
+
flush();
|
|
1386
|
+
return batches;
|
|
1387
|
+
}
|
|
1388
|
+
async function postOtlp(endpoint, events, options) {
|
|
1389
|
+
const body = JSON.stringify(toOtlp(events, options));
|
|
1296
1390
|
let response;
|
|
1297
1391
|
try {
|
|
1298
1392
|
response = await fetch(endpoint, {
|
|
1299
1393
|
method: "POST",
|
|
1300
1394
|
headers: { "content-type": "application/json" },
|
|
1301
|
-
body
|
|
1395
|
+
body,
|
|
1302
1396
|
});
|
|
1303
1397
|
}
|
|
1304
1398
|
catch (error) {
|
|
@@ -1361,22 +1455,54 @@ async function run(options) {
|
|
|
1361
1455
|
let aborted = false;
|
|
1362
1456
|
let lastError;
|
|
1363
1457
|
if (!options.dryRun) {
|
|
1364
|
-
|
|
1365
|
-
|
|
1458
|
+
let batches;
|
|
1459
|
+
try {
|
|
1460
|
+
batches = splitSendBatches(selected, {
|
|
1461
|
+
batchSize: options.batchSize,
|
|
1462
|
+
maxRequestBytes: options.maxRequestBytes,
|
|
1463
|
+
maxFieldBytes: options.maxFieldBytes,
|
|
1464
|
+
});
|
|
1465
|
+
}
|
|
1466
|
+
catch (error) {
|
|
1467
|
+
aborted = true;
|
|
1468
|
+
lastError = describeError(error);
|
|
1469
|
+
failed = selected.length;
|
|
1470
|
+
console.error(lastError);
|
|
1471
|
+
return {
|
|
1472
|
+
discovered,
|
|
1473
|
+
sent,
|
|
1474
|
+
skipped: events.length - selected.length,
|
|
1475
|
+
failed,
|
|
1476
|
+
notAttempted: 0,
|
|
1477
|
+
aborted,
|
|
1478
|
+
error: lastError,
|
|
1479
|
+
dryRun: options.dryRun,
|
|
1480
|
+
endpoint: options.endpoint,
|
|
1481
|
+
statePath: options.statePath,
|
|
1482
|
+
};
|
|
1483
|
+
}
|
|
1484
|
+
for (let index = 0; index < batches.length; index += 1) {
|
|
1485
|
+
const batch = batches[index]?.events ?? [];
|
|
1366
1486
|
try {
|
|
1367
|
-
await postOtlp(options.endpoint, batch
|
|
1487
|
+
await postOtlp(options.endpoint, batch, {
|
|
1488
|
+
maxFieldBytes: options.maxFieldBytes,
|
|
1489
|
+
});
|
|
1368
1490
|
for (const event of batch) {
|
|
1369
1491
|
state.sent[fingerprint(event)] = new Date().toISOString();
|
|
1370
1492
|
}
|
|
1371
1493
|
saveState(options.statePath, state);
|
|
1372
1494
|
sent += batch.length;
|
|
1495
|
+
if (options.postDelayMs > 0)
|
|
1496
|
+
await sleep(options.postDelayMs);
|
|
1373
1497
|
}
|
|
1374
1498
|
catch (error) {
|
|
1375
1499
|
failed += batch.length;
|
|
1376
1500
|
aborted = true;
|
|
1377
1501
|
lastError = describeError(error);
|
|
1378
|
-
console.error(`OTLP POST failed for batch ${
|
|
1379
|
-
const remaining =
|
|
1502
|
+
console.error(`OTLP POST failed for batch ${index + 1} (${batch.length} event(s)) to ${options.endpoint}: ${lastError}`);
|
|
1503
|
+
const remaining = batches
|
|
1504
|
+
.slice(index + 1)
|
|
1505
|
+
.reduce((sum, item) => sum + item.events.length, 0);
|
|
1380
1506
|
if (remaining > 0) {
|
|
1381
1507
|
console.error(`Aborting backfill after first failed POST; ${remaining} selected event(s) were not attempted. Fix the endpoint and rerun to resume from the local dedupe state.`);
|
|
1382
1508
|
}
|