@ramarivera/coding-agent-langfuse 0.1.0 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1000 @@
1
+ #!/usr/bin/env node
2
+ import { execFileSync } from "node:child_process";
3
+ import { createHash } from "node:crypto";
4
+ import { existsSync, mkdirSync, readdirSync, readFileSync, statSync, writeFileSync, } from "node:fs";
5
+ import { homedir } from "node:os";
6
+ import { dirname, join } from "node:path";
7
+ const allAgents = ["claude", "codex", "grok", "opencode", "pi"];
8
+ const importIdentityVersion = "v6-session-traces-with-generations-and-costs";
9
+ const defaultEndpoint = "https://langfuse.ai.roxasroot.net/otel/v1/traces";
10
+ const deadRemoteEndpoint = "http://langfuse.ai.roxasroot.net:14318/v1/traces";
11
+ const defaultStatePath = join(homedir(), ".local/state/langfuse-backfill/state.json");
12
+ function usage() {
13
+ return `Usage: node --experimental-strip-types backfill.ts [options]
14
+
15
+ Options:
16
+ --endpoint URL OTLP HTTP traces endpoint (default: ${defaultEndpoint})
17
+ --agents LIST Comma-separated agents: claude,codex,grok,opencode,pi
18
+ --state PATH Dedupe state file (default: ${defaultStatePath})
19
+ --home PATH Home directory to scan (default: current user home)
20
+ --since ISO_OR_MS Only import events at or after this timestamp
21
+ --until ISO_OR_MS Only import events before or at this timestamp
22
+ --limit N Stop after N unsent events
23
+ --batch-size N OTLP spans per POST (default: 50)
24
+ --dry-run Discover and dedupe without sending or mutating state
25
+ --help Show this help
26
+ `;
27
+ }
28
+ function parseArgs(argv) {
29
+ let endpoint = normalizeEndpoint(process.env.LANGFUSE_BACKFILL_ENDPOINT ?? defaultEndpoint);
30
+ let statePath = process.env.LANGFUSE_BACKFILL_STATE ?? defaultStatePath;
31
+ let homeDir = process.env.HOME ?? homedir();
32
+ let dryRun = false;
33
+ let limit;
34
+ let sinceMs;
35
+ let untilMs;
36
+ let batchSize = 50;
37
+ const agents = new Set(allAgents);
38
+ for (let i = 0; i < argv.length; i++) {
39
+ const arg = argv[i];
40
+ const next = () => {
41
+ const value = argv[++i];
42
+ if (!value)
43
+ throw new Error(`Missing value for ${arg}`);
44
+ return value;
45
+ };
46
+ if (arg === "--help" || arg === "-h") {
47
+ console.log(usage());
48
+ process.exit(0);
49
+ }
50
+ if (arg === "--dry-run") {
51
+ dryRun = true;
52
+ }
53
+ else if (arg === "--endpoint") {
54
+ endpoint = normalizeEndpoint(next());
55
+ }
56
+ else if (arg === "--state") {
57
+ statePath = next();
58
+ }
59
+ else if (arg === "--home") {
60
+ homeDir = next();
61
+ }
62
+ else if (arg === "--agents") {
63
+ agents.clear();
64
+ for (const item of next().split(",")) {
65
+ const agent = item.trim();
66
+ if (!allAgents.includes(agent)) {
67
+ throw new Error(`Unknown agent '${item}'`);
68
+ }
69
+ agents.add(agent);
70
+ }
71
+ }
72
+ else if (arg === "--limit") {
73
+ limit = Number.parseInt(next(), 10);
74
+ }
75
+ else if (arg === "--batch-size") {
76
+ batchSize = Number.parseInt(next(), 10);
77
+ }
78
+ else if (arg === "--since") {
79
+ sinceMs = parseTime(next());
80
+ }
81
+ else if (arg === "--until") {
82
+ untilMs = parseTime(next());
83
+ }
84
+ else {
85
+ throw new Error(`Unknown argument '${arg}'`);
86
+ }
87
+ }
88
+ if (!Number.isFinite(batchSize) || batchSize < 1) {
89
+ throw new Error("--batch-size must be a positive integer");
90
+ }
91
+ if (limit !== undefined && (!Number.isFinite(limit) || limit < 1)) {
92
+ throw new Error("--limit must be a positive integer");
93
+ }
94
+ return {
95
+ agents,
96
+ endpoint,
97
+ statePath,
98
+ homeDir,
99
+ dryRun,
100
+ limit,
101
+ sinceMs,
102
+ untilMs,
103
+ batchSize,
104
+ };
105
+ }
106
+ function normalizeEndpoint(endpoint) {
107
+ if (endpoint !== deadRemoteEndpoint)
108
+ return endpoint;
109
+ console.warn(`${deadRemoteEndpoint} is not reachable from remote hosts; using ${defaultEndpoint} instead.`);
110
+ return defaultEndpoint;
111
+ }
112
+ function parseTime(value) {
113
+ if (/^\d+$/.test(value))
114
+ return Number.parseInt(value, 10);
115
+ const ms = Date.parse(value);
116
+ if (!Number.isFinite(ms))
117
+ throw new Error(`Invalid timestamp '${value}'`);
118
+ return ms;
119
+ }
120
+ function loadState(path) {
121
+ if (!existsSync(path))
122
+ return { version: 1, sent: {} };
123
+ const parsed = JSON.parse(readFileSync(path, "utf8"));
124
+ return { version: 1, sent: parsed.sent ?? {} };
125
+ }
126
+ function saveState(path, state) {
127
+ mkdirSync(dirname(path), { recursive: true });
128
+ writeFileSync(path, `${JSON.stringify(state, null, 2)}\n`);
129
+ }
130
+ function listFiles(root, predicate) {
131
+ if (!existsSync(root))
132
+ return [];
133
+ const out = [];
134
+ const stack = [root];
135
+ while (stack.length > 0) {
136
+ const current = stack.pop();
137
+ if (!current)
138
+ continue;
139
+ let entries;
140
+ try {
141
+ entries = readdirSync(current);
142
+ }
143
+ catch {
144
+ continue;
145
+ }
146
+ for (const entry of entries) {
147
+ const path = join(current, entry);
148
+ let stat;
149
+ try {
150
+ stat = statSync(path);
151
+ }
152
+ catch {
153
+ continue;
154
+ }
155
+ if (stat.isDirectory())
156
+ stack.push(path);
157
+ else if (stat.isFile() && predicate(path))
158
+ out.push(path);
159
+ }
160
+ }
161
+ return out.sort();
162
+ }
163
+ function parseJsonl(path) {
164
+ return readFileSync(path, "utf8")
165
+ .split(/\r?\n/)
166
+ .filter((line) => line.trim().length > 0)
167
+ .map((line) => JSON.parse(line));
168
+ }
169
+ function asRecord(value) {
170
+ return value && typeof value === "object"
171
+ ? value
172
+ : {};
173
+ }
174
+ function getPath(value, keys) {
175
+ let current = value;
176
+ for (const key of keys) {
177
+ current = asRecord(current)[key];
178
+ }
179
+ return current;
180
+ }
181
+ function asString(value) {
182
+ return typeof value === "string" ? value : undefined;
183
+ }
184
+ function asNumber(value) {
185
+ return typeof value === "number" && Number.isFinite(value)
186
+ ? value
187
+ : undefined;
188
+ }
189
+ function getTimestampMs(value, fallback = Date.now()) {
190
+ const fromNumber = asNumber(value);
191
+ if (fromNumber !== undefined) {
192
+ return fromNumber > 10_000_000_000 ? fromNumber : fromNumber * 1000;
193
+ }
194
+ const fromString = asString(value);
195
+ if (!fromString)
196
+ return fallback;
197
+ const parsed = Date.parse(fromString);
198
+ return Number.isFinite(parsed) ? parsed : fallback;
199
+ }
200
+ function extractText(value, maxLength = 4000) {
201
+ if (typeof value === "string")
202
+ return value.slice(0, maxLength);
203
+ if (Array.isArray(value)) {
204
+ const text = value
205
+ .map((item) => {
206
+ const record = asRecord(item);
207
+ return asString(record.text) ?? asString(record.content) ?? "";
208
+ })
209
+ .filter(Boolean)
210
+ .join("\n");
211
+ return text ? text.slice(0, maxLength) : undefined;
212
+ }
213
+ return undefined;
214
+ }
215
+ function normalizeUsage(value) {
216
+ const record = asRecord(value);
217
+ const nestedCost = asRecord(record.cost);
218
+ const cache = asRecord(record.cache);
219
+ const inputDetails = asRecord(record.input_tokens_details);
220
+ const outputDetails = asRecord(record.output_tokens_details);
221
+ const usage = {
222
+ input: asNumber(record.input) ??
223
+ asNumber(record.input_tokens) ??
224
+ asNumber(record.prompt_tokens),
225
+ output: asNumber(record.output) ??
226
+ asNumber(record.output_tokens) ??
227
+ asNumber(record.completion_tokens),
228
+ reasoning: asNumber(record.reasoning) ??
229
+ asNumber(record.reasoning_tokens) ??
230
+ asNumber(outputDetails.reasoning_tokens),
231
+ cacheRead: asNumber(record.cacheRead) ??
232
+ asNumber(record.cache_read_input_tokens) ??
233
+ asNumber(record.cached_tokens) ??
234
+ asNumber(inputDetails.cached_tokens) ??
235
+ asNumber(cache.read),
236
+ cacheWrite: asNumber(record.cacheWrite) ??
237
+ asNumber(record.cache_creation_input_tokens) ??
238
+ asNumber(cache.write),
239
+ total: asNumber(record.totalTokens) ?? asNumber(record.total_tokens) ??
240
+ asNumber(record.total),
241
+ cost: asNumber(nestedCost.total) ??
242
+ asNumber(record.totalCostUSD) ??
243
+ asNumber(record.costUSD) ??
244
+ asNumber(record.total_cost) ??
245
+ asNumber(record.cost),
246
+ };
247
+ if (usage.total === undefined) {
248
+ const total = (usage.input ?? 0) +
249
+ (usage.output ?? 0) +
250
+ (usage.reasoning ?? 0) +
251
+ (usage.cacheRead ?? 0) +
252
+ (usage.cacheWrite ?? 0);
253
+ if (total > 0)
254
+ usage.total = total;
255
+ }
256
+ return Object.values(usage).some((item) => item !== undefined)
257
+ ? usage
258
+ : undefined;
259
+ }
260
+ function normalizeModelName(model) {
261
+ if (!model)
262
+ return undefined;
263
+ if (model === "gpt-5.4-mini")
264
+ return undefined;
265
+ if (model.startsWith("kimi-for-coding"))
266
+ return "kimi-for-coding";
267
+ return model;
268
+ }
269
+ function usageDetails(usage) {
270
+ if (!usage)
271
+ return undefined;
272
+ const details = {};
273
+ if (usage.input !== undefined)
274
+ details.input = usage.input;
275
+ if (usage.output !== undefined)
276
+ details.output = usage.output;
277
+ if (usage.reasoning !== undefined)
278
+ details.output_reasoning = usage.reasoning;
279
+ if (usage.cacheRead !== undefined)
280
+ details.input_cached_tokens = usage.cacheRead;
281
+ if (usage.cacheWrite !== undefined)
282
+ details.input_cache_creation = usage.cacheWrite;
283
+ if (usage.total !== undefined)
284
+ details.total = usage.total;
285
+ return Object.keys(details).length > 0 ? details : undefined;
286
+ }
287
+ function pricingForModel(model) {
288
+ if (!model)
289
+ return undefined;
290
+ const normalized = normalizeModelName(model) ?? model;
291
+ if (normalized === "kimi-for-coding") {
292
+ return { input: 0.95, output: 4.0, cacheRead: 0.16, cacheWrite: 0 };
293
+ }
294
+ if (normalized.includes("accounts/fireworks/routers/kimi-k2p6-turbo")) {
295
+ return { input: 2.0, output: 8.0, cacheRead: 0.30, cacheWrite: 0 };
296
+ }
297
+ if (normalized.includes("accounts/fireworks/models/deepseek-v4-pro")) {
298
+ return { input: 1.74, output: 3.48, cacheRead: 0.15, cacheWrite: 0 };
299
+ }
300
+ if (normalized.includes("DeepSeek-V4-Pro")) {
301
+ return { input: 2.1, output: 4.4, cacheRead: 0.2, cacheWrite: 0 };
302
+ }
303
+ if (normalized.includes("Kimi-K2.6")) {
304
+ return { input: 1.2, output: 4.5, cacheRead: 0.2, cacheWrite: 0 };
305
+ }
306
+ if (normalized.includes("MiniMax-M2.7")) {
307
+ return { input: 0.3, output: 1.2, cacheRead: 0.06, cacheWrite: 0 };
308
+ }
309
+ return undefined;
310
+ }
311
+ function costDetails(usage, model) {
312
+ if (!usage)
313
+ return undefined;
314
+ if (usage.cost !== undefined && usage.cost > 0) {
315
+ return { total: usage.cost, source: "recorded" };
316
+ }
317
+ const rates = pricingForModel(model);
318
+ if (!rates)
319
+ return undefined;
320
+ const input = ((usage.input ?? 0) * rates.input) / 1_000_000;
321
+ const output = (((usage.output ?? 0) + (usage.reasoning ?? 0)) * rates.output) /
322
+ 1_000_000;
323
+ const cache_read = ((usage.cacheRead ?? 0) * rates.cacheRead) / 1_000_000;
324
+ const cache_write = ((usage.cacheWrite ?? 0) * rates.cacheWrite) / 1_000_000;
325
+ const total = input + output + cache_read + cache_write;
326
+ if (total <= 0)
327
+ return undefined;
328
+ return { input, output, cache_read, cache_write, total, source: "estimated" };
329
+ }
330
+ function isGenerationEvent(event) {
331
+ return event.usage !== undefined && event.role !== "user";
332
+ }
333
+ function codexEvents(homeDir) {
334
+ const files = listFiles(join(homeDir, ".codex/sessions"), (path) => path.endsWith(".jsonl"));
335
+ return files.flatMap((path) => {
336
+ const rows = parseJsonl(path).map(asRecord);
337
+ const meta = rows.find((row) => row.type === "session_meta");
338
+ const payload = asRecord(meta?.payload);
339
+ const sessionId = asString(payload.id) ?? stableId(path);
340
+ const sessionTime = getTimestampMs(payload.timestamp ?? rows[0]?.timestamp);
341
+ const cwd = asString(payload.cwd);
342
+ const model = asString(payload.model) ??
343
+ asString(getPath(payload, ["model"]));
344
+ let currentModel = model;
345
+ let currentCwd = cwd;
346
+ const events = [
347
+ {
348
+ agent: "codex",
349
+ sourcePath: path,
350
+ sessionId,
351
+ recordId: "session",
352
+ name: "codex session",
353
+ cwd,
354
+ model,
355
+ provider: asString(payload.model_provider),
356
+ startMs: sessionTime,
357
+ metadata: pick(payload, [
358
+ "originator",
359
+ "cli_version",
360
+ "source",
361
+ "thread_source",
362
+ ]),
363
+ },
364
+ ];
365
+ for (const [index, row] of rows.entries()) {
366
+ const rowPayload = asRecord(row.payload);
367
+ const type = asString(row.type);
368
+ const itemType = asString(rowPayload.type);
369
+ const timestamp = getTimestampMs(row.timestamp, sessionTime + index);
370
+ if (type === "turn_context") {
371
+ currentModel = asString(rowPayload.model) ?? currentModel;
372
+ currentCwd = asString(rowPayload.cwd) ?? currentCwd;
373
+ events.push({
374
+ agent: "codex",
375
+ sourcePath: path,
376
+ sessionId,
377
+ recordId: `turn-${asString(rowPayload.turn_id) ?? index}`,
378
+ name: "codex turn",
379
+ cwd: currentCwd,
380
+ model: currentModel,
381
+ startMs: timestamp,
382
+ parentRecordId: "session",
383
+ metadata: pick(rowPayload, ["turn_id", "approval_policy"]),
384
+ });
385
+ }
386
+ if (type === "response_item" && itemType === "message") {
387
+ events.push({
388
+ agent: "codex",
389
+ sourcePath: path,
390
+ sessionId,
391
+ recordId: `message-${asString(rowPayload.id) ?? index}`,
392
+ name: `codex ${asString(rowPayload.role) ?? "message"}`,
393
+ role: asString(rowPayload.role),
394
+ model: currentModel,
395
+ cwd: currentCwd,
396
+ startMs: timestamp,
397
+ parentRecordId: "session",
398
+ output: extractText(rowPayload.content),
399
+ usage: normalizeUsage(rowPayload.usage),
400
+ });
401
+ }
402
+ if (type === "response_item" && itemType === "function_call") {
403
+ events.push({
404
+ agent: "codex",
405
+ sourcePath: path,
406
+ sessionId,
407
+ recordId: `tool-${asString(rowPayload.call_id) ?? index}`,
408
+ name: `codex tool ${asString(rowPayload.name) ?? "call"}`,
409
+ model: currentModel,
410
+ cwd: currentCwd,
411
+ startMs: timestamp,
412
+ parentRecordId: "session",
413
+ input: rowPayload.arguments,
414
+ metadata: pick(rowPayload, ["name", "call_id"]),
415
+ });
416
+ }
417
+ if (type === "event_msg" && rowPayload.type === "token_count") {
418
+ const info = asRecord(rowPayload.info);
419
+ const usage = normalizeUsage(asRecord(info.last_token_usage)) ??
420
+ normalizeUsage(asRecord(info.total_token_usage));
421
+ events.push({
422
+ agent: "codex",
423
+ sourcePath: path,
424
+ sessionId,
425
+ recordId: `token-count-${index}`,
426
+ name: "codex token usage",
427
+ model: asString(info.model) ?? currentModel,
428
+ cwd: currentCwd,
429
+ startMs: timestamp,
430
+ parentRecordId: "session",
431
+ usage,
432
+ metadata: pick(info, ["model_context_window"]),
433
+ });
434
+ }
435
+ }
436
+ return events;
437
+ });
438
+ }
439
+ function claudeEvents(homeDir) {
440
+ const roots = [
441
+ join(homeDir, ".config/claude/projects"),
442
+ join(homeDir, ".claude/projects"),
443
+ ];
444
+ const files = roots.flatMap((root) => listFiles(root, (path) => path.endsWith(".jsonl")));
445
+ return genericJsonlEvents("claude", files, "claude session");
446
+ }
447
+ function piEvents(homeDir) {
448
+ const files = [
449
+ ...listFiles(join(homeDir, ".pi/agent/sessions"), (path) => path.endsWith(".jsonl")),
450
+ ...listFiles(join(homeDir, ".pi/agent"), (path) => path.endsWith("run-history.jsonl")),
451
+ ];
452
+ return files.flatMap((path) => {
453
+ const rows = parseJsonl(path).map(asRecord);
454
+ const session = rows.find((row) => row.type === "session");
455
+ const sessionId = asString(session?.id) ?? stableId(path);
456
+ const startMs = getTimestampMs(session?.timestamp ?? rows[0]?.timestamp);
457
+ const cwd = asString(session?.cwd);
458
+ const events = [
459
+ {
460
+ agent: "pi",
461
+ sourcePath: path,
462
+ sessionId,
463
+ recordId: "session",
464
+ name: "pi session",
465
+ cwd,
466
+ startMs,
467
+ metadata: pick(session ?? {}, ["version"]),
468
+ },
469
+ ];
470
+ for (const [index, row] of rows.entries()) {
471
+ if (row.type !== "message")
472
+ continue;
473
+ const message = asRecord(row.message);
474
+ const usage = normalizeUsage(message.usage);
475
+ events.push({
476
+ agent: "pi",
477
+ sourcePath: path,
478
+ sessionId,
479
+ recordId: asString(row.id) ?? `message-${index}`,
480
+ name: `pi ${asString(message.role) ?? "message"}`,
481
+ role: asString(message.role),
482
+ model: asString(message.model) ?? asString(row.model),
483
+ provider: asString(message.provider),
484
+ cwd,
485
+ startMs: getTimestampMs(row.timestamp ?? message.timestamp, startMs + index),
486
+ endMs: getTimestampMs(message.completed, startMs + index + 1),
487
+ parentRecordId: asString(row.parentId) ?? "session",
488
+ input: message.role === "user"
489
+ ? extractText(message.content)
490
+ : undefined,
491
+ output: message.role === "assistant"
492
+ ? extractText(message.content)
493
+ : undefined,
494
+ usage,
495
+ });
496
+ for (const tool of toolCallsFromContent(message.content)) {
497
+ events.push({
498
+ agent: "pi",
499
+ sourcePath: path,
500
+ sessionId,
501
+ recordId: tool.id,
502
+ name: `pi tool ${tool.name}`,
503
+ cwd,
504
+ model: asString(message.model),
505
+ startMs: getTimestampMs(row.timestamp, startMs + index),
506
+ parentRecordId: asString(row.id),
507
+ input: tool.arguments,
508
+ });
509
+ }
510
+ }
511
+ return events;
512
+ });
513
+ }
514
+ function grokEvents(homeDir) {
515
+ const files = listFiles(join(homeDir, ".grok/sessions"), (path) => path.endsWith(".jsonl"));
516
+ return genericJsonlEvents("grok", files, "grok session");
517
+ }
518
+ function opencodeEvents(homeDir, rowLimit) {
519
+ const db = join(homeDir, ".local/share/opencode/opencode.db");
520
+ if (!existsSync(db))
521
+ return [];
522
+ let sessions = [];
523
+ let messages = [];
524
+ try {
525
+ const limitClause = rowLimit === undefined
526
+ ? ""
527
+ : ` limit ${Math.max(rowLimit, 1)}`;
528
+ sessions = sqliteJson(db, `select * from session order by time_created${limitClause};`);
529
+ messages = sqliteJson(db, `select * from message where length(data) <= 1000000 order by time_created${limitClause};`);
530
+ }
531
+ catch (error) {
532
+ console.error(`Skipping OpenCode history from ${db}: ${error instanceof Error ? error.message : String(error)}`);
533
+ return [];
534
+ }
535
+ const sessionsById = new Map(sessions.map((row) => [asString(row.id), row]));
536
+ const events = [];
537
+ for (const session of sessions) {
538
+ const sessionId = asString(session.id);
539
+ if (!sessionId)
540
+ continue;
541
+ events.push({
542
+ agent: "opencode",
543
+ sourcePath: db,
544
+ sessionId,
545
+ recordId: "session",
546
+ name: "opencode session",
547
+ cwd: asString(session.directory),
548
+ startMs: getTimestampMs(session.time_created),
549
+ endMs: getTimestampMs(session.time_updated),
550
+ metadata: pick(session, ["title", "version", "slug", "project_id"]),
551
+ });
552
+ }
553
+ for (const message of messages) {
554
+ const sessionId = asString(message.session_id);
555
+ const session = sessionsById.get(sessionId);
556
+ const data = parseMaybeJson(message.data);
557
+ const dataRecord = asRecord(data);
558
+ const tokens = normalizeUsage(dataRecord.tokens);
559
+ const usage = tokens
560
+ ? { ...tokens, cost: asNumber(dataRecord.cost) ?? tokens.cost }
561
+ : normalizeUsage(dataRecord.usage);
562
+ events.push({
563
+ agent: "opencode",
564
+ sourcePath: db,
565
+ sessionId: sessionId ?? stableId(db),
566
+ recordId: asString(message.id) ?? stableId(JSON.stringify(message)),
567
+ name: `opencode ${asString(dataRecord.role) ?? "message"}`,
568
+ role: asString(dataRecord.role),
569
+ model: asString(dataRecord.modelID) ??
570
+ asString(getPath(dataRecord, ["model", "modelID"])),
571
+ provider: asString(dataRecord.providerID) ??
572
+ asString(getPath(dataRecord, ["model", "providerID"])),
573
+ cwd: asString(getPath(dataRecord, ["path", "cwd"])) ??
574
+ asString(asRecord(session).directory),
575
+ startMs: getTimestampMs(message.time_created),
576
+ endMs: getTimestampMs(message.time_updated),
577
+ parentRecordId: asString(dataRecord.parentID) ?? "session",
578
+ usage,
579
+ metadata: pick(dataRecord, ["agent", "mode", "error"]),
580
+ });
581
+ }
582
+ return events;
583
+ }
584
+ function sqliteJson(db, sql) {
585
+ const output = execFileSync("sqlite3", ["-readonly", "-json", db, sql], {
586
+ encoding: "utf8",
587
+ maxBuffer: 128 * 1024 * 1024,
588
+ stdio: ["ignore", "pipe", "ignore"],
589
+ });
590
+ return JSON.parse(output || "[]");
591
+ }
592
+ function genericJsonlEvents(agent, files, sessionName) {
593
+ return files.flatMap((path) => {
594
+ const rows = parseJsonl(path).map(asRecord);
595
+ const first = rows[0] ?? {};
596
+ const sessionId = asString(first.sessionId) ??
597
+ asString(first.session_id) ??
598
+ asString(first.id) ??
599
+ stableId(path);
600
+ const startMs = getTimestampMs(first.timestamp ?? first.time_created);
601
+ const cwd = asString(first.cwd);
602
+ const events = [
603
+ {
604
+ agent,
605
+ sourcePath: path,
606
+ sessionId,
607
+ recordId: "session",
608
+ name: sessionName,
609
+ cwd,
610
+ startMs,
611
+ },
612
+ ];
613
+ for (const [index, row] of rows.entries()) {
614
+ const message = asRecord(row.message);
615
+ const role = asString(message.role) ?? asString(row.type);
616
+ const timestamp = getTimestampMs(row.timestamp ?? row.time_created, startMs + index);
617
+ const usage = normalizeUsage(message.usage ?? row.usage);
618
+ events.push({
619
+ agent,
620
+ sourcePath: path,
621
+ sessionId: asString(row.sessionId) ?? asString(row.session_id) ??
622
+ sessionId,
623
+ recordId: asString(row.uuid) ??
624
+ asString(row.id) ??
625
+ asString(row.toolUseID) ??
626
+ `row-${index}`,
627
+ name: `${agent} ${role ?? "event"}`,
628
+ role,
629
+ model: asString(getPath(message, ["model"])) ?? asString(row.model),
630
+ cwd: asString(row.cwd) ?? cwd,
631
+ startMs: timestamp,
632
+ parentRecordId: asString(row.parentUuid) ?? asString(row.parentId) ??
633
+ "session",
634
+ input: role === "user"
635
+ ? extractText(message.content ?? row.content)
636
+ : undefined,
637
+ output: role === "assistant"
638
+ ? extractText(message.content ?? row.content)
639
+ : undefined,
640
+ usage,
641
+ metadata: pick(row, [
642
+ "type",
643
+ "entrypoint",
644
+ "version",
645
+ "gitBranch",
646
+ "error",
647
+ ]),
648
+ });
649
+ for (const tool of toolCallsFromContent(message.content)) {
650
+ events.push({
651
+ agent,
652
+ sourcePath: path,
653
+ sessionId,
654
+ recordId: tool.id,
655
+ name: `${agent} tool ${tool.name}`,
656
+ cwd,
657
+ startMs: timestamp,
658
+ parentRecordId: asString(row.uuid) ?? asString(row.id),
659
+ input: tool.arguments,
660
+ });
661
+ }
662
+ }
663
+ return events;
664
+ });
665
+ }
666
+ function toolCallsFromContent(content) {
667
+ if (!Array.isArray(content))
668
+ return [];
669
+ return content.flatMap((item, index) => {
670
+ const record = asRecord(item);
671
+ const type = asString(record.type);
672
+ if (type !== "toolCall" && type !== "tool_use")
673
+ return [];
674
+ return [
675
+ {
676
+ id: asString(record.id) ?? asString(record.tool_use_id) ??
677
+ `tool-${index}`,
678
+ name: asString(record.name) ?? "call",
679
+ arguments: record.arguments ?? record.input,
680
+ },
681
+ ];
682
+ });
683
+ }
684
+ function pick(source, keys) {
685
+ const out = {};
686
+ for (const key of keys) {
687
+ if (source[key] !== undefined)
688
+ out[key] = source[key];
689
+ }
690
+ return out;
691
+ }
692
+ function parseMaybeJson(value) {
693
+ if (typeof value !== "string")
694
+ return value;
695
+ try {
696
+ return JSON.parse(value);
697
+ }
698
+ catch {
699
+ return value;
700
+ }
701
+ }
702
+ function stableId(input) {
703
+ return createHash("sha256").update(input).digest("hex").slice(0, 32);
704
+ }
705
+ function fingerprint(event) {
706
+ return `${importIdentityVersion}:${event.agent}:${event.sourcePath}:${event.sessionId}:${event.recordId}`;
707
+ }
708
+ function traceFingerprint(event) {
709
+ return `${importIdentityVersion}:${event.agent}:${event.sourcePath}:${event.sessionId}`;
710
+ }
711
+ function traceId(event) {
712
+ return stableId(traceFingerprint(event));
713
+ }
714
+ function spanId(event) {
715
+ return stableId(fingerprint(event)).slice(0, 16);
716
+ }
717
+ function rootSpanId(event) {
718
+ return stableId(`${traceFingerprint(event)}:root`).slice(0, 16);
719
+ }
720
+ function ns(ms) {
721
+ return `${BigInt(Math.trunc(ms)) * 1000000n}`;
722
+ }
723
+ function attr(key, value) {
724
+ if (value === undefined || value === null)
725
+ return undefined;
726
+ if (typeof value === "number" && Number.isFinite(value)) {
727
+ if (Number.isInteger(value))
728
+ return { key, value: { intValue: value } };
729
+ return { key, value: { doubleValue: value } };
730
+ }
731
+ if (typeof value === "boolean")
732
+ return { key, value: { boolValue: value } };
733
+ if (typeof value === "string")
734
+ return { key, value: { stringValue: value } };
735
+ return { key, value: { stringValue: JSON.stringify(value).slice(0, 8000) } };
736
+ }
737
+ function toOtlp(events) {
738
+ const spansByTrace = new Map();
739
+ for (const event of events) {
740
+ const id = traceId(event);
741
+ spansByTrace.set(id, [...(spansByTrace.get(id) ?? []), event]);
742
+ }
743
+ const spans = [...spansByTrace.values()].flatMap((traceEventsForSession) => {
744
+ const first = traceEventsForSession[0];
745
+ if (!first)
746
+ return [];
747
+ const sortedEvents = [...traceEventsForSession].sort((a, b) => a.startMs - b.startMs);
748
+ const traceStartMs = sortedEvents[0]?.startMs ?? Date.now();
749
+ const traceEndMs = Math.max(...sortedEvents.map((event) => event.endMs ?? event.startMs + 1), traceStartMs + 1);
750
+ const firstInputEvent = sortedEvents.find((event) => event.input !== undefined);
751
+ const lastOutputEvent = [...sortedEvents]
752
+ .reverse()
753
+ .find((event) => event.output !== undefined);
754
+ const rootAttributes = [
755
+ attr("service.name", `agent.${first.agent}`),
756
+ attr("deployment.environment", "local"),
757
+ attr("langfuse.environment", "local"),
758
+ attr("langfuse.internal.as_root", true),
759
+ attr("langfuse.trace.name", `${first.agent} session`),
760
+ attr("langfuse.session.id", first.sessionId),
761
+ attr("session.id", first.sessionId),
762
+ attr("langfuse.observation.type", "span"),
763
+ attr("agent.name", first.agent),
764
+ attr("agent.session_id", first.sessionId),
765
+ attr("agent.record_id", "session-root"),
766
+ attr("agent.event_count", sortedEvents.length),
767
+ attr("langfuse.trace.metadata.agent", first.agent),
768
+ attr("langfuse.trace.metadata.source_path", first.sourcePath),
769
+ attr("langfuse.trace.metadata.cwd", first.cwd),
770
+ attr("langfuse.trace.input", firstInputEvent?.input),
771
+ attr("langfuse.trace.output", lastOutputEvent?.output),
772
+ attr("langfuse.observation.metadata.agent", first.agent),
773
+ attr("langfuse.observation.metadata.record_id", "session-root"),
774
+ attr("source.path", first.sourcePath),
775
+ attr("cwd", first.cwd),
776
+ ].filter((item) => Boolean(item));
777
+ const rootSpan = {
778
+ traceId: traceId(first),
779
+ spanId: rootSpanId(first),
780
+ name: `${first.agent} session`,
781
+ kind: 1,
782
+ startTimeUnixNano: ns(traceStartMs),
783
+ endTimeUnixNano: ns(traceEndMs),
784
+ attributes: rootAttributes,
785
+ status: { code: 1 },
786
+ };
787
+ const childSpans = sortedEvents.map((event) => {
788
+ const startMs = event.startMs;
789
+ const durationMs = Math.max(1, (event.endMs ?? event.startMs + 1) - event.startMs);
790
+ const modelName = normalizeModelName(event.model);
791
+ const generation = isGenerationEvent(event);
792
+ const usage = usageDetails(event.usage);
793
+ const cost = costDetails(event.usage, modelName);
794
+ const costForLangfuse = cost === undefined
795
+ ? undefined
796
+ : {
797
+ ...(cost.input !== undefined ? { input: cost.input } : {}),
798
+ ...(cost.output !== undefined ? { output: cost.output } : {}),
799
+ ...(cost.cache_read !== undefined ? { cache_read: cost.cache_read } : {}),
800
+ ...(cost.cache_write !== undefined ? { cache_write: cost.cache_write } : {}),
801
+ total: cost.total,
802
+ };
803
+ const attributes = [
804
+ attr("service.name", `agent.${event.agent}`),
805
+ attr("deployment.environment", "local"),
806
+ attr("langfuse.environment", "local"),
807
+ attr("langfuse.session.id", event.sessionId),
808
+ attr("session.id", event.sessionId),
809
+ attr("langfuse.observation.type", generation ? "generation" : "span"),
810
+ attr("langfuse.observation.model.name", generation ? modelName : undefined),
811
+ attr("langfuse.observation.usage_details", usage),
812
+ attr("langfuse.observation.cost_details", costForLangfuse),
813
+ attr("gen_ai.response.model", generation ? modelName : undefined),
814
+ attr("gen_ai.usage.input_tokens", usage?.input),
815
+ attr("gen_ai.usage.output_tokens", usage?.output),
816
+ attr("gen_ai.usage.total_tokens", usage?.total),
817
+ attr("gen_ai.usage.cost", cost?.total),
818
+ attr("agent.name", event.agent),
819
+ attr("agent.session_id", event.sessionId),
820
+ attr("agent.record_id", event.recordId),
821
+ attr("agent.original_start_time", new Date(event.startMs).toISOString()),
822
+ attr("agent.original_end_time", event.endMs === undefined ? undefined : new Date(event.endMs).toISOString()),
823
+ attr("langfuse.trace.metadata.agent", event.agent),
824
+ attr("langfuse.trace.metadata.source_path", event.sourcePath),
825
+ attr("langfuse.trace.metadata.model", event.model),
826
+ attr("langfuse.trace.metadata.provider", event.provider),
827
+ attr("langfuse.observation.metadata.agent", event.agent),
828
+ attr("langfuse.observation.metadata.record_id", event.recordId),
829
+ attr("langfuse.observation.metadata.model", modelName ?? event.model),
830
+ attr("langfuse.observation.metadata.provider", event.provider),
831
+ attr("langfuse.observation.metadata.cost_source", cost?.source),
832
+ attr("langfuse.trace.input", event.input),
833
+ attr("langfuse.trace.output", event.output),
834
+ attr("langfuse.observation.input", event.input),
835
+ attr("langfuse.observation.output", event.output),
836
+ attr("source.path", event.sourcePath),
837
+ attr("cwd", event.cwd),
838
+ attr("role", event.role),
839
+ attr("agent.model", event.model),
840
+ attr("agent.provider", event.provider),
841
+ attr("input.value", event.input),
842
+ attr("output.value", event.output),
843
+ attr("metadata", event.metadata),
844
+ ].filter((item) => Boolean(item));
845
+ return {
846
+ traceId: traceId(event),
847
+ spanId: spanId(event),
848
+ parentSpanId: rootSpanId(event),
849
+ name: event.name,
850
+ kind: 1,
851
+ startTimeUnixNano: ns(startMs),
852
+ endTimeUnixNano: ns(startMs + durationMs),
853
+ attributes,
854
+ status: { code: 1 },
855
+ };
856
+ });
857
+ return [rootSpan, ...childSpans];
858
+ });
859
+ return {
860
+ resourceSpans: [
861
+ {
862
+ resource: {
863
+ attributes: [
864
+ {
865
+ key: "service.name",
866
+ value: { stringValue: "agent-history-backfill" },
867
+ },
868
+ {
869
+ key: "telemetry.sdk.name",
870
+ value: { stringValue: "toolbox-langfuse-backfill" },
871
+ },
872
+ ],
873
+ },
874
+ scopeSpans: [
875
+ {
876
+ scope: { name: "toolbox.langfuse.backfill", version: "1" },
877
+ spans,
878
+ },
879
+ ],
880
+ },
881
+ ],
882
+ };
883
+ }
884
+ async function postOtlp(endpoint, events) {
885
+ let response;
886
+ try {
887
+ response = await fetch(endpoint, {
888
+ method: "POST",
889
+ headers: { "content-type": "application/json" },
890
+ body: JSON.stringify(toOtlp(events)),
891
+ });
892
+ }
893
+ catch (error) {
894
+ throw new Error(describeError(error));
895
+ }
896
+ if (!response.ok) {
897
+ throw new Error(`OTLP POST failed: ${response.status} ${await response.text()}`);
898
+ }
899
+ }
900
+ function describeError(error) {
901
+ if (!(error instanceof Error))
902
+ return String(error);
903
+ const cause = error.cause;
904
+ if (!cause)
905
+ return error.message;
906
+ const parts = [
907
+ typeof cause.code === "string" ? cause.code : undefined,
908
+ typeof cause.syscall === "string" ? cause.syscall : undefined,
909
+ typeof cause.address === "string" && typeof cause.port === "number"
910
+ ? `${cause.address}:${cause.port}`
911
+ : undefined,
912
+ typeof cause.message === "string" ? `cause: ${cause.message}` : undefined,
913
+ ].filter(Boolean);
914
+ return parts.length > 0
915
+ ? `${error.message} (${parts.join("; ")})`
916
+ : error.message;
917
+ }
918
+ function discoverEvents(options) {
919
+ const providers = {
920
+ claude: (inner) => claudeEvents(inner.homeDir),
921
+ codex: (inner) => codexEvents(inner.homeDir),
922
+ grok: (inner) => grokEvents(inner.homeDir),
923
+ opencode: (inner) => opencodeEvents(inner.homeDir, inner.limit),
924
+ pi: (inner) => piEvents(inner.homeDir),
925
+ };
926
+ return allAgents
927
+ .filter((agent) => options.agents.has(agent))
928
+ .flatMap((agent) => providers[agent](options))
929
+ .filter((event) => options.sinceMs === undefined || event.startMs >= options.sinceMs)
930
+ .filter((event) => options.untilMs === undefined || event.startMs <= options.untilMs)
931
+ .sort((a, b) => a.startMs - b.startMs);
932
+ }
933
+ async function run(options) {
934
+ const state = loadState(options.statePath);
935
+ const events = discoverEvents(options);
936
+ const discovered = Object.fromEntries(allAgents.map((agent) => [agent, 0]));
937
+ for (const event of events) {
938
+ discovered[event.agent] = (discovered[event.agent] ?? 0) + 1;
939
+ }
940
+ const unsent = events.filter((event) => state.sent[fingerprint(event)] === undefined);
941
+ const selected = options.limit === undefined
942
+ ? unsent
943
+ : unsent.slice(0, options.limit);
944
+ let sent = 0;
945
+ let failed = 0;
946
+ let aborted = false;
947
+ let lastError;
948
+ if (!options.dryRun) {
949
+ for (let index = 0; index < selected.length; index += options.batchSize) {
950
+ const batch = selected.slice(index, index + options.batchSize);
951
+ try {
952
+ await postOtlp(options.endpoint, batch);
953
+ for (const event of batch) {
954
+ state.sent[fingerprint(event)] = new Date().toISOString();
955
+ }
956
+ saveState(options.statePath, state);
957
+ sent += batch.length;
958
+ }
959
+ catch (error) {
960
+ failed += batch.length;
961
+ aborted = true;
962
+ lastError = describeError(error);
963
+ console.error(`OTLP POST failed for batch ${Math.floor(index / options.batchSize) + 1} (${batch.length} event(s)) to ${options.endpoint}: ${lastError}`);
964
+ const remaining = selected.length - index - batch.length;
965
+ if (remaining > 0) {
966
+ console.error(`Aborting backfill after first failed POST; ${remaining} selected event(s) were not attempted. Fix the endpoint and rerun to resume from the local dedupe state.`);
967
+ }
968
+ break;
969
+ }
970
+ }
971
+ }
972
+ return {
973
+ discovered,
974
+ sent: options.dryRun ? 0 : sent,
975
+ skipped: events.length - selected.length,
976
+ failed,
977
+ notAttempted: selected.length - sent - failed,
978
+ aborted,
979
+ ...(lastError ? { error: lastError } : {}),
980
+ dryRun: options.dryRun,
981
+ endpoint: options.endpoint,
982
+ statePath: options.statePath,
983
+ };
984
+ }
985
+ async function main(argv = process.argv.slice(2)) {
986
+ const summary = await run(parseArgs(argv));
987
+ console.log(JSON.stringify(summary, null, 2));
988
+ return summary;
989
+ }
990
+ if (import.meta.url === `file://${process.argv[1]}`) {
991
+ try {
992
+ const summary = await main();
993
+ process.exit(summary.failed > 0 ? 1 : 0);
994
+ }
995
+ catch (error) {
996
+ console.error(error instanceof Error ? error.message : String(error));
997
+ process.exit(1);
998
+ }
999
+ }
1000
+ export { claudeEvents, codexEvents, discoverEvents, fingerprint, grokEvents, main, opencodeEvents, parseArgs, piEvents, run, toOtlp, };