@ramarivera/coding-agent-langfuse 0.1.27 → 0.1.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -40,6 +40,9 @@ type BackfillOptions = {
40
40
  sinceMs?: number;
41
41
  untilMs?: number;
42
42
  batchSize: number;
43
+ maxRequestBytes: number;
44
+ maxFieldBytes: number;
45
+ postDelayMs: number;
43
46
  };
44
47
  type RunSummary = {
45
48
  discovered: Record<string, number>;
@@ -68,7 +71,9 @@ declare function opencodeEvents(homeDir: string, options?: {
68
71
  untilMs?: number;
69
72
  }): BackfillEvent[];
70
73
  declare function fingerprint(event: BackfillEvent): string;
71
- declare function toOtlp(events: BackfillEvent[]): Record<string, unknown>;
74
+ declare function toOtlp(events: BackfillEvent[], options?: {
75
+ maxFieldBytes?: number;
76
+ }): Record<string, unknown>;
72
77
  declare function discoverEvents(options: BackfillOptions): BackfillEvent[];
73
78
  declare function run(options: BackfillOptions): Promise<RunSummary>;
74
79
  declare function follow(options: BackfillOptions): Promise<FollowSummary>;
package/dist/backfill.js CHANGED
@@ -15,6 +15,8 @@ const importIdentityVersions = {
15
15
  };
16
16
  const defaultEndpoint = "https://langfuse.ai.roxasroot.net/otel/v1/traces";
17
17
  const deadRemoteEndpoint = "http://langfuse.ai.roxasroot.net:14318/v1/traces";
18
+ const defaultMaxRequestBytes = 12 * 1024 * 1024;
19
+ const defaultMaxFieldBytes = 512 * 1024;
18
20
  const defaultStatePath = join(homedir(), ".local/state/coding-agent-langfuse/backfill-v6.json");
19
21
  const currentHost = hostname();
20
22
  function usage() {
@@ -29,6 +31,9 @@ Options:
29
31
  --until ISO_OR_MS Only import events before or at this timestamp
30
32
  --limit N Stop after N unsent events
31
33
  --batch-size N OTLP spans per POST (default: 50)
34
+ --max-request-bytes N Split OTLP POSTs below this JSON byte size (default: ${defaultMaxRequestBytes})
35
+ --max-field-bytes N Truncate individual input/output fields above this byte size (default: ${defaultMaxFieldBytes})
36
+ --post-delay-ms N Delay after each successful OTLP POST (default: 0)
32
37
  --follow Keep scanning and sending newly written events
33
38
  --poll-interval-ms N Delay between --follow scans (default: 5000)
34
39
  --idle-exit-after-ms N Stop --follow after this much time without new sends
@@ -45,6 +50,15 @@ function parseArgs(argv) {
45
50
  let sinceMs;
46
51
  let untilMs;
47
52
  let batchSize = 50;
53
+ let maxRequestBytes = Number.parseInt(process.env.LANGFUSE_BACKFILL_MAX_REQUEST_BYTES ?? "", 10);
54
+ if (!Number.isFinite(maxRequestBytes))
55
+ maxRequestBytes = defaultMaxRequestBytes;
56
+ let maxFieldBytes = Number.parseInt(process.env.LANGFUSE_BACKFILL_MAX_FIELD_BYTES ?? "", 10);
57
+ if (!Number.isFinite(maxFieldBytes))
58
+ maxFieldBytes = defaultMaxFieldBytes;
59
+ let postDelayMs = Number.parseInt(process.env.LANGFUSE_BACKFILL_POST_DELAY_MS ?? "", 10);
60
+ if (!Number.isFinite(postDelayMs))
61
+ postDelayMs = 0;
48
62
  let follow = false;
49
63
  let pollIntervalMs = 5_000;
50
64
  let idleExitAfterMs;
@@ -89,6 +103,15 @@ function parseArgs(argv) {
89
103
  else if (arg === "--batch-size") {
90
104
  batchSize = Number.parseInt(next(), 10);
91
105
  }
106
+ else if (arg === "--max-request-bytes") {
107
+ maxRequestBytes = Number.parseInt(next(), 10);
108
+ }
109
+ else if (arg === "--max-field-bytes") {
110
+ maxFieldBytes = Number.parseInt(next(), 10);
111
+ }
112
+ else if (arg === "--post-delay-ms") {
113
+ postDelayMs = Number.parseInt(next(), 10);
114
+ }
92
115
  else if (arg === "--follow") {
93
116
  follow = true;
94
117
  }
@@ -111,6 +134,15 @@ function parseArgs(argv) {
111
134
  if (!Number.isFinite(batchSize) || batchSize < 1) {
112
135
  throw new Error("--batch-size must be a positive integer");
113
136
  }
137
+ if (!Number.isFinite(maxRequestBytes) || maxRequestBytes < 1024) {
138
+ throw new Error("--max-request-bytes must be an integer >= 1024");
139
+ }
140
+ if (!Number.isFinite(maxFieldBytes) || maxFieldBytes < 1024) {
141
+ throw new Error("--max-field-bytes must be an integer >= 1024");
142
+ }
143
+ if (!Number.isFinite(postDelayMs) || postDelayMs < 0) {
144
+ throw new Error("--post-delay-ms must be a non-negative integer");
145
+ }
114
146
  if (!Number.isFinite(pollIntervalMs) || pollIntervalMs < 1) {
115
147
  throw new Error("--poll-interval-ms must be a positive integer");
116
148
  }
@@ -134,6 +166,9 @@ function parseArgs(argv) {
134
166
  sinceMs,
135
167
  untilMs,
136
168
  batchSize,
169
+ maxRequestBytes,
170
+ maxFieldBytes,
171
+ postDelayMs,
137
172
  };
138
173
  }
139
174
  function normalizeEndpoint(endpoint) {
@@ -1124,9 +1159,40 @@ function attr(key, value) {
1124
1159
  return { key, value: { stringValue: value } };
1125
1160
  return { key, value: { stringValue: JSON.stringify(value).slice(0, 8000) } };
1126
1161
  }
1127
- function toOtlp(events) {
1162
+ function utf8Bytes(value) {
1163
+ return Buffer.byteLength(value, "utf8");
1164
+ }
1165
+ function truncateStringByBytes(value, maxBytes) {
1166
+ if (utf8Bytes(value) <= maxBytes)
1167
+ return value;
1168
+ let out = value.slice(0, maxBytes);
1169
+ while (utf8Bytes(out) > maxBytes && out.length > 0) {
1170
+ out = out.slice(0, -1);
1171
+ }
1172
+ return `${out}\n[truncated by coding-agent-langfuse: original_bytes=${utf8Bytes(value)} limit_bytes=${maxBytes}]`;
1173
+ }
1174
+ function limitValueBytes(value, maxBytes) {
1175
+ if (value === undefined || value === null)
1176
+ return value;
1177
+ if (typeof value === "string")
1178
+ return truncateStringByBytes(value, maxBytes);
1179
+ const json = JSON.stringify(value);
1180
+ if (utf8Bytes(json) <= maxBytes)
1181
+ return value;
1182
+ return truncateStringByBytes(json, maxBytes);
1183
+ }
1184
+ function limitEventPayload(event, maxFieldBytes) {
1185
+ return {
1186
+ ...event,
1187
+ input: limitValueBytes(event.input, maxFieldBytes),
1188
+ output: limitValueBytes(event.output, maxFieldBytes),
1189
+ };
1190
+ }
1191
+ function toOtlp(events, options = {}) {
1192
+ const maxFieldBytes = options.maxFieldBytes ?? defaultMaxFieldBytes;
1128
1193
  const spansByTrace = new Map();
1129
- for (const event of events) {
1194
+ for (const rawEvent of events) {
1195
+ const event = limitEventPayload(rawEvent, maxFieldBytes);
1130
1196
  const id = traceId(event);
1131
1197
  spansByTrace.set(id, [...(spansByTrace.get(id) ?? []), event]);
1132
1198
  }
@@ -1137,10 +1203,7 @@ function toOtlp(events) {
1137
1203
  const sortedEvents = [...traceEventsForSession].sort((a, b) => a.startMs - b.startMs);
1138
1204
  const traceStartMs = sortedEvents[0]?.startMs ?? Date.now();
1139
1205
  const traceEndMs = Math.max(...sortedEvents.map((event) => event.endMs ?? event.startMs + 1), traceStartMs + 1);
1140
- const firstInputEvent = sortedEvents.find((event) => event.input !== undefined);
1141
- const lastOutputEvent = [...sortedEvents]
1142
- .reverse()
1143
- .find((event) => event.output !== undefined);
1206
+ const shouldEmitRootSpan = sortedEvents.some((event) => event.recordId === "session");
1144
1207
  const rootAttributes = [
1145
1208
  attr("service.name", `agent.${first.agent}`),
1146
1209
  attr("deployment.environment", "local"),
@@ -1160,8 +1223,6 @@ function toOtlp(events) {
1160
1223
  attr("langfuse.trace.metadata.machine", currentHost),
1161
1224
  attr("langfuse.trace.metadata.source_path", first.sourcePath),
1162
1225
  attr("langfuse.trace.metadata.cwd", first.cwd),
1163
- attr("langfuse.trace.input", firstInputEvent?.input),
1164
- attr("langfuse.trace.output", lastOutputEvent?.output),
1165
1226
  attr("langfuse.observation.metadata.agent", first.agent),
1166
1227
  attr("langfuse.observation.metadata.host", currentHost),
1167
1228
  attr("langfuse.observation.metadata.machine", currentHost),
@@ -1220,13 +1281,6 @@ function toOtlp(events) {
1220
1281
  attr("agent.record_id", event.recordId),
1221
1282
  attr("agent.original_start_time", new Date(event.startMs).toISOString()),
1222
1283
  attr("agent.original_end_time", event.endMs === undefined ? undefined : new Date(event.endMs).toISOString()),
1223
- attr("langfuse.trace.metadata.agent", event.agent),
1224
- attr("langfuse.trace.metadata.host", currentHost),
1225
- attr("langfuse.trace.metadata.machine", currentHost),
1226
- attr("langfuse.trace.metadata.source_path", event.sourcePath),
1227
- attr("langfuse.trace.metadata.cwd", event.cwd),
1228
- attr("langfuse.trace.metadata.model", event.model),
1229
- attr("langfuse.trace.metadata.provider", event.provider),
1230
1284
  attr("langfuse.observation.metadata.agent", event.agent),
1231
1285
  attr("langfuse.observation.metadata.host", currentHost),
1232
1286
  attr("langfuse.observation.metadata.machine", currentHost),
@@ -1237,8 +1291,6 @@ function toOtlp(events) {
1237
1291
  attr("langfuse.observation.metadata.model", modelName ?? event.model),
1238
1292
  attr("langfuse.observation.metadata.provider", event.provider),
1239
1293
  attr("langfuse.observation.metadata.cost_source", cost?.source),
1240
- attr("langfuse.trace.input", event.input),
1241
- attr("langfuse.trace.output", event.output),
1242
1294
  attr("langfuse.observation.input", event.input),
1243
1295
  attr("langfuse.observation.output", event.output),
1244
1296
  attr("source.path", event.sourcePath),
@@ -1265,7 +1317,7 @@ function toOtlp(events) {
1265
1317
  status: { code: 1 },
1266
1318
  };
1267
1319
  });
1268
- return [rootSpan, ...childSpans];
1320
+ return shouldEmitRootSpan ? [rootSpan, ...childSpans] : childSpans;
1269
1321
  });
1270
1322
  return {
1271
1323
  resourceSpans: [
@@ -1292,13 +1344,55 @@ function toOtlp(events) {
1292
1344
  ],
1293
1345
  };
1294
1346
  }
1295
- async function postOtlp(endpoint, events) {
1347
+ function otlpBodyBytes(events, options) {
1348
+ return utf8Bytes(JSON.stringify(toOtlp(events, options)));
1349
+ }
1350
+ function splitSendBatches(events, options) {
1351
+ const batches = [];
1352
+ let current = [];
1353
+ let currentBytes = 0;
1354
+ const flush = () => {
1355
+ if (current.length === 0)
1356
+ return;
1357
+ batches.push({ events: current, bytes: currentBytes });
1358
+ current = [];
1359
+ currentBytes = 0;
1360
+ };
1361
+ for (const event of events) {
1362
+ const singleBytes = otlpBodyBytes([event], options);
1363
+ if (singleBytes > options.maxRequestBytes) {
1364
+ flush();
1365
+ throw new Error(`Single OTLP event ${event.agent}/${event.sessionId}/${event.recordId} is ${singleBytes} bytes after field truncation, above --max-request-bytes ${options.maxRequestBytes}. Lower --max-field-bytes and rerun.`);
1366
+ }
1367
+ if (current.length === 0) {
1368
+ current = [event];
1369
+ currentBytes = singleBytes;
1370
+ continue;
1371
+ }
1372
+ const nextEvents = [...current, event];
1373
+ const nextBytes = otlpBodyBytes(nextEvents, options);
1374
+ if (current.length >= options.batchSize ||
1375
+ nextBytes > options.maxRequestBytes) {
1376
+ flush();
1377
+ current = [event];
1378
+ currentBytes = singleBytes;
1379
+ }
1380
+ else {
1381
+ current = nextEvents;
1382
+ currentBytes = nextBytes;
1383
+ }
1384
+ }
1385
+ flush();
1386
+ return batches;
1387
+ }
1388
+ async function postOtlp(endpoint, events, options) {
1389
+ const body = JSON.stringify(toOtlp(events, options));
1296
1390
  let response;
1297
1391
  try {
1298
1392
  response = await fetch(endpoint, {
1299
1393
  method: "POST",
1300
1394
  headers: { "content-type": "application/json" },
1301
- body: JSON.stringify(toOtlp(events)),
1395
+ body,
1302
1396
  });
1303
1397
  }
1304
1398
  catch (error) {
@@ -1361,22 +1455,54 @@ async function run(options) {
1361
1455
  let aborted = false;
1362
1456
  let lastError;
1363
1457
  if (!options.dryRun) {
1364
- for (let index = 0; index < selected.length; index += options.batchSize) {
1365
- const batch = selected.slice(index, index + options.batchSize);
1458
+ let batches;
1459
+ try {
1460
+ batches = splitSendBatches(selected, {
1461
+ batchSize: options.batchSize,
1462
+ maxRequestBytes: options.maxRequestBytes,
1463
+ maxFieldBytes: options.maxFieldBytes,
1464
+ });
1465
+ }
1466
+ catch (error) {
1467
+ aborted = true;
1468
+ lastError = describeError(error);
1469
+ failed = selected.length;
1470
+ console.error(lastError);
1471
+ return {
1472
+ discovered,
1473
+ sent,
1474
+ skipped: events.length - selected.length,
1475
+ failed,
1476
+ notAttempted: 0,
1477
+ aborted,
1478
+ error: lastError,
1479
+ dryRun: options.dryRun,
1480
+ endpoint: options.endpoint,
1481
+ statePath: options.statePath,
1482
+ };
1483
+ }
1484
+ for (let index = 0; index < batches.length; index += 1) {
1485
+ const batch = batches[index]?.events ?? [];
1366
1486
  try {
1367
- await postOtlp(options.endpoint, batch);
1487
+ await postOtlp(options.endpoint, batch, {
1488
+ maxFieldBytes: options.maxFieldBytes,
1489
+ });
1368
1490
  for (const event of batch) {
1369
1491
  state.sent[fingerprint(event)] = new Date().toISOString();
1370
1492
  }
1371
1493
  saveState(options.statePath, state);
1372
1494
  sent += batch.length;
1495
+ if (options.postDelayMs > 0)
1496
+ await sleep(options.postDelayMs);
1373
1497
  }
1374
1498
  catch (error) {
1375
1499
  failed += batch.length;
1376
1500
  aborted = true;
1377
1501
  lastError = describeError(error);
1378
- console.error(`OTLP POST failed for batch ${Math.floor(index / options.batchSize) + 1} (${batch.length} event(s)) to ${options.endpoint}: ${lastError}`);
1379
- const remaining = selected.length - index - batch.length;
1502
+ console.error(`OTLP POST failed for batch ${index + 1} (${batch.length} event(s)) to ${options.endpoint}: ${lastError}`);
1503
+ const remaining = batches
1504
+ .slice(index + 1)
1505
+ .reduce((sum, item) => sum + item.events.length, 0);
1380
1506
  if (remaining > 0) {
1381
1507
  console.error(`Aborting backfill after first failed POST; ${remaining} selected event(s) were not attempted. Fix the endpoint and rerun to resume from the local dedupe state.`);
1382
1508
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ramarivera/coding-agent-langfuse",
3
- "version": "0.1.27",
3
+ "version": "0.1.29",
4
4
  "description": "Universal coding-agent Langfuse backfiller and live OTLP helpers",
5
5
  "type": "module",
6
6
  "license": "MIT",