@bububuger/spanory 0.1.18 → 0.1.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,1016 +0,0 @@
1
- // @ts-nocheck
2
- import { createHash } from 'node:crypto';
3
- import { calibratedEstimate, calibrate, CONTEXT_SOURCE_KINDS, estimateTokens, pollutionScoreV1, } from '@bububuger/core';
4
- function toNumber(value) {
5
- const n = Number(value);
6
- return Number.isFinite(n) ? n : undefined;
7
- }
8
- export function pickUsage(raw) {
9
- if (!raw || typeof raw !== 'object')
10
- return undefined;
11
- const inputTokens = toNumber(raw.input_tokens ?? raw.prompt_tokens);
12
- const outputTokens = toNumber(raw.output_tokens ?? raw.completion_tokens);
13
- const totalTokens = toNumber(raw.total_tokens) ?? ((inputTokens ?? 0) + (outputTokens ?? 0) || undefined);
14
- const cacheReadInputTokens = toNumber(raw.cache_read_input_tokens);
15
- const cacheCreationInputTokens = toNumber(raw.cache_creation_input_tokens);
16
- const usage = {};
17
- if (inputTokens !== undefined)
18
- usage.input_tokens = inputTokens;
19
- if (outputTokens !== undefined)
20
- usage.output_tokens = outputTokens;
21
- if (totalTokens !== undefined)
22
- usage.total_tokens = totalTokens;
23
- if (cacheReadInputTokens !== undefined)
24
- usage.cache_read_input_tokens = cacheReadInputTokens;
25
- if (cacheCreationInputTokens !== undefined)
26
- usage.cache_creation_input_tokens = cacheCreationInputTokens;
27
- return Object.keys(usage).length ? usage : undefined;
28
- }
29
- function addUsage(total, usage) {
30
- if (!usage)
31
- return;
32
- for (const [key, value] of Object.entries(usage)) {
33
- total[key] = (total[key] ?? 0) + Number(value);
34
- }
35
- }
36
- function usageAttributes(usage) {
37
- if (!usage)
38
- return {};
39
- const attrs = {};
40
- if (usage.input_tokens !== undefined) {
41
- attrs['gen_ai.usage.input_tokens'] = usage.input_tokens;
42
- attrs['gen_ai.usage.prompt_tokens'] = usage.input_tokens;
43
- }
44
- if (usage.output_tokens !== undefined) {
45
- attrs['gen_ai.usage.output_tokens'] = usage.output_tokens;
46
- attrs['gen_ai.usage.completion_tokens'] = usage.output_tokens;
47
- }
48
- if (usage.total_tokens !== undefined) {
49
- attrs['gen_ai.usage.total_tokens'] = usage.total_tokens;
50
- }
51
- if (usage.cache_read_input_tokens !== undefined) {
52
- attrs['gen_ai.usage.cache_read.input_tokens'] = usage.cache_read_input_tokens;
53
- }
54
- if (usage.cache_creation_input_tokens !== undefined) {
55
- attrs['gen_ai.usage.cache_creation.input_tokens'] = usage.cache_creation_input_tokens;
56
- }
57
- const cacheRead = usage.cache_read_input_tokens ?? 0;
58
- const denominator = (usage.input_tokens ?? 0) + cacheRead;
59
- const cacheHitRate = denominator > 0 ? cacheRead / denominator : 0;
60
- attrs['gen_ai.usage.details.cache_hit_rate'] = Number(cacheHitRate.toFixed(6));
61
- return attrs;
62
- }
63
- function modelAttributes(model) {
64
- if (!model)
65
- return {};
66
- return {
67
- 'langfuse.observation.model.name': model,
68
- 'gen_ai.request.model': model,
69
- };
70
- }
71
- function extractText(content) {
72
- if (typeof content === 'string')
73
- return content;
74
- if (!Array.isArray(content))
75
- return '';
76
- return content
77
- .map((block) => {
78
- if (typeof block === 'string')
79
- return block;
80
- if (block && typeof block === 'object' && block.type === 'text')
81
- return String(block.text ?? '');
82
- return '';
83
- })
84
- .filter(Boolean)
85
- .join('\n');
86
- }
87
- function extractToolUses(content) {
88
- if (!Array.isArray(content))
89
- return [];
90
- return content.filter((block) => block && typeof block === 'object' && block.type === 'tool_use');
91
- }
92
- function extractToolResults(content) {
93
- if (!Array.isArray(content))
94
- return [];
95
- return content.filter((block) => block && typeof block === 'object' && block.type === 'tool_result');
96
- }
97
- function extractReasoningBlocks(content) {
98
- if (!Array.isArray(content))
99
- return [];
100
- return content.filter((block) => block && typeof block === 'object' && block.type === 'reasoning');
101
- }
102
- function isoFromUnknownTimestamp(value, fallback) {
103
- const candidate = value instanceof Date ? value : new Date(value ?? '');
104
- if (!Number.isNaN(candidate.getTime()))
105
- return candidate.toISOString();
106
- return fallback.toISOString();
107
- }
108
- function isToolResultOnlyContent(content) {
109
- return Array.isArray(content)
110
- && content.length > 0
111
- && content.every((block) => block && typeof block === 'object' && block.type === 'tool_result');
112
- }
113
- function isPromptUserMessage(message) {
114
- if (!message || message.role !== 'user' || message.isMeta)
115
- return false;
116
- const { content } = message;
117
- if (typeof content === 'string')
118
- return content.trim().length > 0;
119
- if (!Array.isArray(content))
120
- return false;
121
- if (isToolResultOnlyContent(content))
122
- return false;
123
- return content.length > 0;
124
- }
125
- const GATEWAY_INPUT_METADATA_BLOCK_RE = /Conversation info \(untrusted metadata\):\s*```json\s*([\s\S]*?)\s*```\s*/i;
126
- function runtimeVersionAttributes(version) {
127
- if (version === undefined || version === null)
128
- return {};
129
- const normalized = String(version).trim();
130
- if (!normalized)
131
- return {};
132
- return {
133
- 'agentic.runtime.version': normalized,
134
- };
135
- }
136
- function extractGatewayInputMetadata(text) {
137
- if (!text)
138
- return { input: '', attributes: {} };
139
- const match = text.match(GATEWAY_INPUT_METADATA_BLOCK_RE);
140
- if (!match)
141
- return { input: text.trim(), attributes: {} };
142
- const attributes = {};
143
- const metadataRaw = match[1];
144
- try {
145
- const parsed = JSON.parse(metadataRaw);
146
- if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
147
- attributes['agentic.input.metadata'] = JSON.stringify(parsed);
148
- if (parsed.message_id !== undefined)
149
- attributes['agentic.input.message_id'] = String(parsed.message_id);
150
- if (parsed.sender !== undefined)
151
- attributes['agentic.input.sender'] = String(parsed.sender);
152
- }
153
- }
154
- catch {
155
- // ignore malformed metadata JSON and only strip wrapper text
156
- }
157
- const input = text.slice(match.index + match[0].length).trim() || text.trim();
158
- return { input, attributes };
159
- }
160
- function normalizeUserInput(content) {
161
- const text = extractText(content).trim();
162
- if (text)
163
- return extractGatewayInputMetadata(text);
164
- if (Array.isArray(content))
165
- return { input: JSON.stringify(content), attributes: {} };
166
- if (typeof content === 'string')
167
- return extractGatewayInputMetadata(content);
168
- return { input: '', attributes: {} };
169
- }
170
- function extractToolResultText(block, message) {
171
- const raw = block?.content;
172
- if (typeof raw === 'string' && raw.trim())
173
- return raw;
174
- if (Array.isArray(raw)) {
175
- const text = extractText(raw).trim();
176
- if (text)
177
- return text;
178
- return JSON.stringify(raw);
179
- }
180
- if (raw && typeof raw === 'object')
181
- return JSON.stringify(raw);
182
- const stdout = message?.toolUseResult?.stdout;
183
- if (typeof stdout === 'string' && stdout.length > 0)
184
- return stdout;
185
- const stderr = message?.toolUseResult?.stderr;
186
- if (typeof stderr === 'string' && stderr.length > 0)
187
- return stderr;
188
- return '';
189
- }
190
- function parseSlashCommand(text) {
191
- const m = text.match(/<command-name>\s*\/([^<\s]+)\s*<\/command-name>/i);
192
- if (m) {
193
- const argsMatch = text.match(/<command-args>([\s\S]*?)<\/command-args>/i);
194
- return { name: m[1].trim(), args: argsMatch ? argsMatch[1].trim() : '' };
195
- }
196
- // fallback for plain slash commands like "/compact please summarize"
197
- const plain = String(text ?? '').trim().match(/^\/([a-zA-Z0-9._:-]+)(?:\s+([\s\S]*))?$/);
198
- if (!plain)
199
- return null;
200
- return {
201
- name: plain[1].trim(),
202
- args: plain[2] ? plain[2].trim() : '',
203
- };
204
- }
205
- function parseBashCommandAttributes(commandLine) {
206
- const raw = String(commandLine ?? '').trim();
207
- if (!raw) {
208
- return {
209
- 'agentic.command.name': '',
210
- 'agentic.command.args': '',
211
- 'agentic.command.pipe_count': 0,
212
- };
213
- }
214
- const segments = raw.split(/\|(?!\|)/);
215
- const firstSegment = String(segments[0] ?? '').trim();
216
- const tokens = firstSegment ? firstSegment.split(/\s+/) : [];
217
- const name = String(tokens[0] ?? '').trim();
218
- const args = tokens.length > 1 ? tokens.slice(1).join(' ') : '';
219
- return {
220
- 'agentic.command.name': name,
221
- 'agentic.command.args': args,
222
- 'agentic.command.pipe_count': Math.max(segments.length - 1, 0),
223
- };
224
- }
225
- function isMcpToolName(name) {
226
- const n = String(name || '').toLowerCase();
227
- return n === 'mcp' || n.startsWith('mcp__') || n.startsWith('mcp-');
228
- }
229
- function hashText(text) {
230
- return createHash('sha256').update(String(text ?? '')).digest('hex');
231
- }
232
- function lineCount(text) {
233
- const s = String(text ?? '');
234
- if (!s)
235
- return 0;
236
- return s.split(/\r?\n/).length;
237
- }
238
- function tokenSet(text) {
239
- const tokens = String(text ?? '').trim().split(/\s+/).filter(Boolean);
240
- return new Set(tokens);
241
- }
242
- function similarityScore(a, b) {
243
- if (a === b)
244
- return 1;
245
- const setA = tokenSet(a);
246
- const setB = tokenSet(b);
247
- if (setA.size === 0 && setB.size === 0)
248
- return 1;
249
- if (setA.size === 0 || setB.size === 0)
250
- return 0;
251
- let intersection = 0;
252
- for (const token of setA) {
253
- if (setB.has(token))
254
- intersection += 1;
255
- }
256
- const union = setA.size + setB.size - intersection;
257
- if (union === 0)
258
- return 1;
259
- return Number((intersection / union).toFixed(6));
260
- }
261
- const DEFAULT_CONTEXT_WINDOW_TOKENS = 200000;
262
- const CONTEXT_ENABLED_RUNTIMES = new Set(['claude-code', 'codex', 'openclaw', 'opencode']);
263
- const CONTEXT_PARSING_ENABLED = process.env.SPANORY_CONTEXT_ENABLED !== '0';
264
- function contextWindowTokens() {
265
- const raw = Number(process.env.SPANORY_CONTEXT_WINDOW_TOKENS);
266
- if (Number.isFinite(raw) && raw > 0)
267
- return Math.floor(raw);
268
- return DEFAULT_CONTEXT_WINDOW_TOKENS;
269
- }
270
- function clamp(value, min, max) {
271
- return Math.max(min, Math.min(max, value));
272
- }
273
- function round6(value) {
274
- return Number(value.toFixed(6));
275
- }
276
- function detectCompactInferred(currentTokens, previousTokens) {
277
- if (!(previousTokens > 0))
278
- return { detected: false, compactionRatio: 0 };
279
- const dropRatio = (previousTokens - currentTokens) / previousTokens;
280
- if (dropRatio > 0.4)
281
- return { detected: true, compactionRatio: round6(dropRatio) };
282
- return { detected: false, compactionRatio: 0 };
283
- }
284
- function createSourceDeltaMap() {
285
- const out = {};
286
- for (const kind of CONTEXT_SOURCE_KINDS)
287
- out[kind] = 0;
288
- return out;
289
- }
290
- function addSourceDelta(map, kind, delta) {
291
- if (!kind || !Object.prototype.hasOwnProperty.call(map, kind))
292
- return;
293
- const n = Number(delta);
294
- if (!Number.isFinite(n) || n <= 0)
295
- return;
296
- map[kind] += n;
297
- }
298
- function moveSourceDelta(map, fromKind, toKind, delta) {
299
- const n = Number(delta);
300
- if (!Number.isFinite(n) || n <= 0)
301
- return;
302
- if (!Object.prototype.hasOwnProperty.call(map, fromKind))
303
- return;
304
- if (!Object.prototype.hasOwnProperty.call(map, toKind))
305
- return;
306
- const moved = Math.min(map[fromKind], n);
307
- if (moved <= 0)
308
- return;
309
- map[fromKind] -= moved;
310
- map[toKind] += moved;
311
- }
312
- function parseJsonObject(raw) {
313
- const text = String(raw ?? '').trim();
314
- if (!text || !text.startsWith('{'))
315
- return null;
316
- try {
317
- const parsed = JSON.parse(text);
318
- if (parsed && typeof parsed === 'object' && !Array.isArray(parsed))
319
- return parsed;
320
- }
321
- catch {
322
- // ignore parse errors
323
- }
324
- return null;
325
- }
326
- function extractMentionFileSignals(text) {
327
- const input = String(text ?? '');
328
- if (!input)
329
- return [];
330
- const matches = input.match(/(?:\/|\b)[^\s"'`]+?\.[a-zA-Z0-9]{1,8}\b/g) ?? [];
331
- return [...new Set(matches.map((m) => m.trim()).filter(Boolean))];
332
- }
333
- function extractSourceName(event) {
334
- const attrs = event?.attributes ?? {};
335
- return String(attrs['gen_ai.tool.name'] ?? attrs['agentic.command.name'] ?? event?.name ?? '').trim();
336
- }
337
- function classifyContextSignals(turnEvents) {
338
- const sourceDelta = createSourceDeltaMap();
339
- const sourceNames = new Map();
340
- let compactRequested = false;
341
- let restoreRequested = false;
342
- function markSourceName(kind, name) {
343
- const text = String(name ?? '').trim();
344
- if (!text)
345
- return;
346
- if (!sourceNames.has(kind))
347
- sourceNames.set(kind, new Set());
348
- sourceNames.get(kind).add(text);
349
- }
350
- function absorbFileSignals(text, fromKind) {
351
- const mentions = extractMentionFileSignals(text);
352
- if (!mentions.length)
353
- return;
354
- const mentionTokens = estimateTokens(mentions.join('\n'));
355
- addSourceDelta(sourceDelta, 'mention_file', mentionTokens);
356
- moveSourceDelta(sourceDelta, fromKind, 'mention_file', mentionTokens);
357
- markSourceName('mention_file', mentions[0]);
358
- const hasClaudeMd = mentions.some((item) => /(?:^|\/)(?:claude|agents)\.md$/i.test(item));
359
- if (hasClaudeMd) {
360
- addSourceDelta(sourceDelta, 'claude_md', mentionTokens);
361
- moveSourceDelta(sourceDelta, 'mention_file', 'claude_md', mentionTokens);
362
- markSourceName('claude_md', mentions.find((item) => /(?:^|\/)(?:claude|agents)\.md$/i.test(item)));
363
- }
364
- }
365
- for (const event of turnEvents) {
366
- const category = String(event?.category ?? '');
367
- const attrs = event?.attributes ?? {};
368
- const input = String(event?.input ?? '');
369
- const output = String(event?.output ?? '');
370
- const inputTokens = estimateTokens(input);
371
- const outputTokens = estimateTokens(output);
372
- const sourceName = extractSourceName(event);
373
- if (category === 'turn') {
374
- addSourceDelta(sourceDelta, 'turn', inputTokens + outputTokens);
375
- absorbFileSignals(input, 'turn');
376
- const sender = String(attrs['agentic.input.sender'] ?? '').toLowerCase();
377
- if (sender === 'system') {
378
- addSourceDelta(sourceDelta, 'system_prompt', inputTokens);
379
- moveSourceDelta(sourceDelta, 'turn', 'system_prompt', inputTokens);
380
- markSourceName('system_prompt', 'system_input');
381
- }
382
- if (/\b(delegate|handoff|coordinate|sync)\b/i.test(input)) {
383
- const coordinationTokens = Math.min(estimateTokens(input), Math.max(1, Math.floor(inputTokens / 2)));
384
- addSourceDelta(sourceDelta, 'team_coordination', coordinationTokens);
385
- moveSourceDelta(sourceDelta, 'turn', 'team_coordination', coordinationTokens);
386
- markSourceName('team_coordination', 'turn_coordination');
387
- }
388
- continue;
389
- }
390
- if (category === 'agent_command') {
391
- const commandName = String(attrs['agentic.command.name'] ?? '').trim().toLowerCase();
392
- addSourceDelta(sourceDelta, 'skill', inputTokens + outputTokens);
393
- markSourceName('skill', sourceName ? `/${sourceName}` : 'slash_command');
394
- if (commandName === 'compact')
395
- compactRequested = true;
396
- if (commandName === 'restore' || commandName === 'resume')
397
- restoreRequested = true;
398
- continue;
399
- }
400
- if (category === 'agent_task') {
401
- addSourceDelta(sourceDelta, 'subagent', inputTokens + outputTokens);
402
- addSourceDelta(sourceDelta, 'team_coordination', inputTokens);
403
- markSourceName('subagent', sourceName || 'Task');
404
- markSourceName('team_coordination', sourceName || 'Task');
405
- continue;
406
- }
407
- if (category === 'tool' || category === 'mcp' || category === 'shell_command') {
408
- addSourceDelta(sourceDelta, 'tool_input', inputTokens);
409
- addSourceDelta(sourceDelta, 'tool_output', outputTokens);
410
- if (sourceName) {
411
- markSourceName('tool_input', sourceName);
412
- markSourceName('tool_output', sourceName);
413
- }
414
- absorbFileSignals(input, 'tool_input');
415
- const toolName = String(attrs['gen_ai.tool.name'] ?? '').toLowerCase();
416
- if (toolName.includes('memory') || /\bmemory\b/i.test(input)) {
417
- const memoryTokens = Math.max(1, Math.floor(inputTokens * 0.6));
418
- addSourceDelta(sourceDelta, 'memory', memoryTokens);
419
- moveSourceDelta(sourceDelta, 'tool_input', 'memory', memoryTokens);
420
- markSourceName('memory', sourceName || 'memory');
421
- }
422
- continue;
423
- }
424
- }
425
- const knownTotal = CONTEXT_SOURCE_KINDS
426
- .filter((kind) => kind !== 'unknown')
427
- .reduce((acc, kind) => acc + Number(sourceDelta[kind] ?? 0), 0);
428
- if (knownTotal <= 0) {
429
- sourceDelta.unknown = 1;
430
- markSourceName('unknown', 'unclassified');
431
- }
432
- return { sourceDelta, sourceNames, compactRequested, restoreRequested };
433
- }
434
- function composeContextEvents({ runtime, projectId, sessionId, turnEvent, turnEvents, previousEstimatedTotal, recentSourceKindsWindow, calibrationState, }) {
435
- const attrs = turnEvent?.attributes ?? {};
436
- const usageInput = Number(attrs['gen_ai.usage.input_tokens'] ?? 0);
437
- const usageCacheRead = Number(attrs['gen_ai.usage.cache_read.input_tokens'] ?? 0);
438
- const usageEstimated = Math.max(0, usageInput + usageCacheRead);
439
- const fallbackEstimated = estimateTokens(`${turnEvent?.input ?? ''}\n${turnEvent?.output ?? ''}`);
440
- let estimationMethod = 'heuristic';
441
- let estimationConfidence = 0.4;
442
- let estimatedTotalTokens = Math.max(0, fallbackEstimated);
443
- let nextCalibrationState = calibrationState;
444
- if (usageEstimated > 0) {
445
- estimationMethod = 'usage';
446
- estimationConfidence = 1;
447
- estimatedTotalTokens = usageEstimated;
448
- nextCalibrationState = calibrate(calibrationState, usageEstimated, Math.max(fallbackEstimated, 1));
449
- }
450
- else if (Number(calibrationState?.sampleCount ?? 0) >= 2) {
451
- estimationMethod = 'calibrated';
452
- estimationConfidence = round6(0.7 + 0.03 * Math.min(Number(calibrationState.sampleCount ?? 0), 10));
453
- estimatedTotalTokens = Math.max(0, calibratedEstimate(fallbackEstimated, calibrationState));
454
- }
455
- const { sourceDelta, sourceNames, compactRequested, restoreRequested } = classifyContextSignals(turnEvents);
456
- const deltaTokens = previousEstimatedTotal > 0 ? (estimatedTotalTokens - previousEstimatedTotal) : 0;
457
- const windowLimitTokens = contextWindowTokens();
458
- const fillRatio = round6(clamp(estimatedTotalTokens / windowLimitTokens, 0, 1));
459
- const compositionEntries = Object.entries(sourceDelta)
460
- .filter(([, value]) => Number(value) > 0)
461
- .sort((a, b) => Number(b[1]) - Number(a[1]));
462
- const activeKinds = compositionEntries.map(([kind]) => kind);
463
- const nextRecentSourceKindsWindow = [...recentSourceKindsWindow, activeKinds].slice(-5);
464
- const repeatCountByKind = {};
465
- for (const kind of CONTEXT_SOURCE_KINDS) {
466
- repeatCountByKind[kind] = nextRecentSourceKindsWindow.reduce((count, row) => count + (row.includes(kind) ? 1 : 0), 0);
467
- }
468
- const totalDelta = compositionEntries.reduce((sum, [, value]) => sum + Number(value), 0);
469
- const composition = Object.fromEntries(compositionEntries);
470
- const topSources = compositionEntries.slice(0, 3).map(([kind]) => kind);
471
- const passthroughAttrs = {};
472
- const passthroughKeys = [
473
- 'gen_ai.agent.id',
474
- 'agentic.parent.session_id',
475
- 'agentic.parent.turn_id',
476
- 'agentic.parent.tool_call_id',
477
- 'agentic.parent.link.confidence',
478
- 'agentic.runtime.version',
479
- ];
480
- for (const key of passthroughKeys) {
481
- if (attrs[key] !== undefined)
482
- passthroughAttrs[key] = attrs[key];
483
- }
484
- const baseAttrs = {
485
- 'agentic.event.category': 'context',
486
- 'agentic.context.event_type': 'context_snapshot',
487
- 'agentic.context.estimated_total_tokens': estimatedTotalTokens,
488
- 'agentic.context.fill_ratio': fillRatio,
489
- 'agentic.context.delta_tokens': deltaTokens,
490
- 'agentic.context.composition': JSON.stringify(composition),
491
- 'agentic.context.top_sources': JSON.stringify(topSources),
492
- 'agentic.context.estimation_method': estimationMethod,
493
- 'agentic.context.estimation_confidence': estimationConfidence,
494
- ...passthroughAttrs,
495
- };
496
- const out = [
497
- {
498
- runtime,
499
- projectId,
500
- sessionId,
501
- turnId: turnEvent.turnId,
502
- category: 'context',
503
- name: 'Context Snapshot',
504
- startedAt: turnEvent.startedAt,
505
- endedAt: turnEvent.endedAt,
506
- input: '',
507
- output: '',
508
- attributes: baseAttrs,
509
- },
510
- ];
511
- if (compactRequested && previousEstimatedTotal > 0) {
512
- out.push({
513
- runtime,
514
- projectId,
515
- sessionId,
516
- turnId: turnEvent.turnId,
517
- category: 'context',
518
- name: 'Context Boundary',
519
- startedAt: turnEvent.startedAt,
520
- endedAt: turnEvent.startedAt,
521
- input: '',
522
- output: '',
523
- attributes: {
524
- 'agentic.event.category': 'context',
525
- 'agentic.context.event_type': 'context_boundary',
526
- 'agentic.context.boundary_kind': 'compact_before',
527
- 'agentic.context.compaction_ratio': 0,
528
- 'agentic.context.detection_method': 'hook',
529
- ...passthroughAttrs,
530
- },
531
- });
532
- }
533
- const inferred = detectCompactInferred(estimatedTotalTokens, previousEstimatedTotal);
534
- if (inferred.detected) {
535
- out.push({
536
- runtime,
537
- projectId,
538
- sessionId,
539
- turnId: turnEvent.turnId,
540
- category: 'context',
541
- name: 'Context Boundary',
542
- startedAt: turnEvent.startedAt,
543
- endedAt: turnEvent.endedAt,
544
- input: '',
545
- output: '',
546
- attributes: {
547
- 'agentic.event.category': 'context',
548
- 'agentic.context.event_type': 'context_boundary',
549
- 'agentic.context.boundary_kind': 'compact_after',
550
- 'agentic.context.compaction_ratio': inferred.compactionRatio,
551
- 'agentic.context.detection_method': 'inferred',
552
- ...passthroughAttrs,
553
- },
554
- });
555
- }
556
- if (restoreRequested) {
557
- out.push({
558
- runtime,
559
- projectId,
560
- sessionId,
561
- turnId: turnEvent.turnId,
562
- category: 'context',
563
- name: 'Context Boundary',
564
- startedAt: turnEvent.startedAt,
565
- endedAt: turnEvent.startedAt,
566
- input: '',
567
- output: '',
568
- attributes: {
569
- 'agentic.event.category': 'context',
570
- 'agentic.context.event_type': 'context_boundary',
571
- 'agentic.context.boundary_kind': 'restore',
572
- 'agentic.context.compaction_ratio': 0,
573
- 'agentic.context.detection_method': 'hook',
574
- ...passthroughAttrs,
575
- },
576
- });
577
- }
578
- for (const [kind, value] of compositionEntries) {
579
- const tokenDelta = Number(value);
580
- if (tokenDelta <= 0)
581
- continue;
582
- const sourceShare = totalDelta > 0 ? round6(tokenDelta / totalDelta) : 0;
583
- const repeatCountRecent = Number(repeatCountByKind[kind] ?? 0);
584
- const names = [...(sourceNames.get(kind) ?? [])];
585
- out.push({
586
- runtime,
587
- projectId,
588
- sessionId,
589
- turnId: turnEvent.turnId,
590
- category: 'context',
591
- name: 'Context Source Attribution',
592
- startedAt: turnEvent.startedAt,
593
- endedAt: turnEvent.endedAt,
594
- input: '',
595
- output: '',
596
- attributes: {
597
- 'agentic.event.category': 'context',
598
- 'agentic.context.event_type': 'context_source_attribution',
599
- 'agentic.context.source_kind': kind,
600
- 'agentic.context.source_name': names[0] ?? kind,
601
- 'agentic.context.token_delta': tokenDelta,
602
- 'agentic.context.source_share': sourceShare,
603
- 'agentic.context.repeat_count_recent': repeatCountRecent,
604
- 'agentic.context.pollution_score': pollutionScoreV1({
605
- tokenDelta,
606
- windowLimitTokens,
607
- sourceShare,
608
- repeatCountRecent,
609
- sourceKind: kind,
610
- }),
611
- 'agentic.context.score_version': 'pollution_score_v1',
612
- ...passthroughAttrs,
613
- },
614
- });
615
- }
616
- return {
617
- events: out,
618
- estimatedTotalTokens,
619
- recentSourceKindsWindow: nextRecentSourceKindsWindow,
620
- calibrationState: nextCalibrationState,
621
- };
622
- }
623
- function actorHeuristic(messages) {
624
- const hasSidechainSignal = messages.some((m) => m?.isSidechain === true || (typeof m?.agentId === 'string' && m.agentId.trim().length > 0));
625
- if (hasSidechainSignal)
626
- return { role: 'unknown', confidence: 0.6 };
627
- return { role: 'main', confidence: 0.95 };
628
- }
629
- function firstNonEmptyString(values) {
630
- for (const value of values) {
631
- const text = String(value ?? '').trim();
632
- if (text)
633
- return text;
634
- }
635
- return '';
636
- }
637
- function inferParentLinkAttributes(messages) {
638
- const agentId = firstNonEmptyString(messages.map((m) => m?.agentId ?? m?.agent_id ?? m?.message?.agentId ?? m?.message?.agent_id));
639
- const parentSessionId = firstNonEmptyString(messages.map((m) => m?.parentSessionId
640
- ?? m?.parent_session_id
641
- ?? m?.parent?.sessionId
642
- ?? m?.parent?.session_id
643
- ?? m?.session_meta?.parent_session_id
644
- ?? m?.sessionMeta?.parentSessionId));
645
- const parentTurnId = firstNonEmptyString(messages.map((m) => m?.parentTurnId
646
- ?? m?.parent_turn_id
647
- ?? m?.parent?.turnId
648
- ?? m?.parent?.turn_id
649
- ?? m?.session_meta?.parent_turn_id
650
- ?? m?.sessionMeta?.parentTurnId));
651
- const parentToolCallId = firstNonEmptyString(messages.map((m) => m?.parentToolCallId
652
- ?? m?.parent_tool_call_id
653
- ?? m?.parent?.toolCallId
654
- ?? m?.parent?.tool_call_id
655
- ?? m?.session_meta?.parent_tool_call_id
656
- ?? m?.sessionMeta?.parentToolCallId));
657
- const explicitConfidence = firstNonEmptyString(messages.map((m) => m?.parentLinkConfidence ?? m?.parent_link_confidence));
658
- const attrs = {};
659
- if (agentId) {
660
- attrs['gen_ai.agent.id'] = agentId;
661
- }
662
- if (parentSessionId)
663
- attrs['agentic.parent.session_id'] = parentSessionId;
664
- if (parentTurnId)
665
- attrs['agentic.parent.turn_id'] = parentTurnId;
666
- if (parentToolCallId)
667
- attrs['agentic.parent.tool_call_id'] = parentToolCallId;
668
- if (explicitConfidence) {
669
- attrs['agentic.parent.link.confidence'] = explicitConfidence;
670
- }
671
- else if (parentSessionId || parentTurnId || parentToolCallId) {
672
- attrs['agentic.parent.link.confidence'] = 'exact';
673
- }
674
- else if (agentId) {
675
- attrs['agentic.parent.link.confidence'] = 'unknown';
676
- }
677
- return attrs;
678
- }
679
- function createTurn(messages, turnId, projectId, sessionId, runtime) {
680
- const user = messages.find(isPromptUserMessage) ?? messages.find((m) => m.role === 'user' && !m.isMeta) ?? messages[0];
681
- const assistantsRaw = messages.filter((m) => m.role === 'assistant');
682
- const assistantOrder = [];
683
- const assistantLatest = new Map();
684
- for (let i = 0; i < assistantsRaw.length; i += 1) {
685
- const msg = assistantsRaw[i];
686
- const key = msg.messageId ? `id:${msg.messageId}` : `idx:${i}`;
687
- if (!assistantLatest.has(key))
688
- assistantOrder.push(key);
689
- assistantLatest.set(key, msg);
690
- }
691
- const assistants = assistantOrder.map((key) => assistantLatest.get(key)).filter(Boolean);
692
- const start = user?.timestamp ?? messages[0]?.timestamp ?? new Date();
693
- const end = messages[messages.length - 1]?.timestamp ?? start;
694
- const output = assistants.map((m) => extractText(m.content)).filter(Boolean).join('\n');
695
- const runtimeVersion = [...messages]
696
- .map((m) => String(m.runtimeVersion ?? '').trim())
697
- .filter(Boolean)
698
- .at(-1);
699
- const runtimeAttrs = runtimeVersionAttributes(runtimeVersion);
700
- const normalizedInput = normalizeUserInput(user?.content);
701
- const totalUsage = {};
702
- let latestModel;
703
- for (const msg of assistants) {
704
- if (msg.model)
705
- latestModel = msg.model;
706
- addUsage(totalUsage, msg.usage);
707
- }
708
- const usage = Object.keys(totalUsage).length ? totalUsage : undefined;
709
- const actor = actorHeuristic(messages);
710
- const parentLinkAttrs = inferParentLinkAttributes(messages);
711
- const sharedAttrs = { ...runtimeAttrs, ...parentLinkAttrs };
712
- const events = [
713
- {
714
- runtime,
715
- projectId,
716
- sessionId,
717
- turnId,
718
- category: 'turn',
719
- name: `${runtime} - Turn ${turnId}`,
720
- startedAt: start.toISOString(),
721
- endedAt: end.toISOString(),
722
- input: normalizedInput.input,
723
- output,
724
- attributes: {
725
- 'agentic.event.category': 'turn',
726
- 'langfuse.observation.type': 'agent',
727
- 'gen_ai.operation.name': 'invoke_agent',
728
- ...sharedAttrs,
729
- ...modelAttributes(latestModel),
730
- 'agentic.actor.role': actor.role,
731
- 'agentic.actor.role_confidence': actor.confidence,
732
- ...normalizedInput.attributes,
733
- ...usageAttributes(usage),
734
- },
735
- },
736
- ];
737
- const resultByToolId = new Map();
738
- for (const msg of messages) {
739
- if (msg.role !== 'user')
740
- continue;
741
- const resultAt = isoFromUnknownTimestamp(msg.timestamp, end);
742
- for (const tr of extractToolResults(msg.content)) {
743
- const toolUseId = String(tr.tool_use_id ?? tr.toolUseId ?? '');
744
- if (!toolUseId)
745
- continue;
746
- const content = extractToolResultText(tr, msg);
747
- if (!resultByToolId.has(toolUseId) || !resultByToolId.get(toolUseId)?.content) {
748
- resultByToolId.set(toolUseId, { content, endedAt: resultAt });
749
- }
750
- }
751
- if (msg.sourceToolUseId) {
752
- const fallback = extractToolResultText({}, msg);
753
- if (fallback
754
- && (!resultByToolId.has(msg.sourceToolUseId) || !resultByToolId.get(msg.sourceToolUseId)?.content)) {
755
- resultByToolId.set(msg.sourceToolUseId, { content: fallback, endedAt: resultAt });
756
- }
757
- }
758
- }
759
- if (user?.role === 'user') {
760
- const slash = parseSlashCommand(extractText(user.content));
761
- if (slash) {
762
- const isMcp = slash.name.toLowerCase() === 'mcp' || slash.name.toLowerCase().startsWith('mcp:');
763
- events.push({
764
- runtime,
765
- projectId,
766
- sessionId,
767
- turnId,
768
- category: isMcp ? 'mcp' : 'agent_command',
769
- name: isMcp ? 'MCP Slash Command' : `Agent Command: /${slash.name}`,
770
- startedAt: start.toISOString(),
771
- endedAt: start.toISOString(),
772
- input: extractText(user.content),
773
- output: '',
774
- attributes: {
775
- 'agentic.event.category': isMcp ? 'mcp' : 'agent_command',
776
- 'langfuse.observation.type': isMcp ? 'tool' : 'event',
777
- ...sharedAttrs,
778
- 'agentic.command.name': slash.name,
779
- 'agentic.command.args': slash.args,
780
- 'gen_ai.operation.name': isMcp ? 'execute_tool' : 'invoke_agent',
781
- },
782
- });
783
- }
784
- }
785
- for (const assistant of assistants) {
786
- const reasoningBlocks = extractReasoningBlocks(assistant.content);
787
- for (const reasoning of reasoningBlocks) {
788
- const reasoningText = String(reasoning?.text ?? '').trim();
789
- if (!reasoningText)
790
- continue;
791
- const reasoningAt = isoFromUnknownTimestamp(reasoning?.timestamp, assistant.timestamp);
792
- events.push({
793
- runtime,
794
- projectId,
795
- sessionId,
796
- turnId,
797
- category: 'reasoning',
798
- name: 'Assistant Reasoning',
799
- startedAt: reasoningAt,
800
- endedAt: reasoningAt,
801
- input: '',
802
- output: reasoningText,
803
- attributes: {
804
- 'agentic.event.category': 'reasoning',
805
- 'langfuse.observation.type': 'span',
806
- ...sharedAttrs,
807
- 'gen_ai.operation.name': 'invoke_agent',
808
- ...modelAttributes(assistant.model),
809
- },
810
- });
811
- }
812
- const toolUses = extractToolUses(assistant.content);
813
- for (const tu of toolUses) {
814
- const toolName = String(tu.name ?? '');
815
- const toolId = String(tu.id ?? '');
816
- const toolInput = tu.input ?? {};
817
- const toolResult = resultByToolId.get(toolId);
818
- const toolOutput = toolResult?.content ?? '';
819
- const t = assistant.timestamp.toISOString();
820
- const toolEndedAt = toolResult?.endedAt ?? t;
821
- if (toolName === 'Bash') {
822
- const commandLine = String(toolInput.command ?? '');
823
- events.push({
824
- runtime,
825
- projectId,
826
- sessionId,
827
- turnId,
828
- category: 'shell_command',
829
- name: 'Tool: Bash',
830
- startedAt: t,
831
- endedAt: toolEndedAt,
832
- input: commandLine,
833
- output: toolOutput,
834
- attributes: {
835
- 'agentic.event.category': 'shell_command',
836
- 'langfuse.observation.type': 'tool',
837
- ...sharedAttrs,
838
- 'process.command_line': commandLine,
839
- ...parseBashCommandAttributes(commandLine),
840
- 'gen_ai.tool.name': 'Bash',
841
- 'gen_ai.tool.call.id': toolId,
842
- 'gen_ai.operation.name': 'execute_tool',
843
- ...modelAttributes(assistant.model),
844
- ...usageAttributes(assistant.usage),
845
- },
846
- });
847
- continue;
848
- }
849
- if (isMcpToolName(toolName)) {
850
- const serverName = toolName.startsWith('mcp__') ? toolName.split('__')[1] : undefined;
851
- events.push({
852
- runtime,
853
- projectId,
854
- sessionId,
855
- turnId,
856
- category: 'mcp',
857
- name: `Tool: ${toolName}`,
858
- startedAt: t,
859
- endedAt: toolEndedAt,
860
- input: JSON.stringify(toolInput),
861
- output: toolOutput,
862
- attributes: {
863
- 'agentic.event.category': 'mcp',
864
- 'langfuse.observation.type': 'tool',
865
- ...sharedAttrs,
866
- 'gen_ai.tool.name': toolName,
867
- 'mcp.request.id': toolId,
868
- 'gen_ai.operation.name': 'execute_tool',
869
- ...(serverName ? { 'agentic.mcp.server.name': serverName } : {}),
870
- ...modelAttributes(assistant.model),
871
- ...usageAttributes(assistant.usage),
872
- },
873
- });
874
- continue;
875
- }
876
- if (toolName === 'Task') {
877
- events.push({
878
- runtime,
879
- projectId,
880
- sessionId,
881
- turnId,
882
- category: 'agent_task',
883
- name: 'Tool: Task',
884
- startedAt: t,
885
- endedAt: toolEndedAt,
886
- input: JSON.stringify(toolInput),
887
- output: toolOutput,
888
- attributes: {
889
- 'agentic.event.category': 'agent_task',
890
- 'langfuse.observation.type': 'agent',
891
- ...sharedAttrs,
892
- 'gen_ai.tool.name': 'Task',
893
- 'gen_ai.tool.call.id': toolId,
894
- 'gen_ai.operation.name': 'invoke_agent',
895
- ...modelAttributes(assistant.model),
896
- ...usageAttributes(assistant.usage),
897
- },
898
- });
899
- continue;
900
- }
901
- if (toolName) {
902
- events.push({
903
- runtime,
904
- projectId,
905
- sessionId,
906
- turnId,
907
- category: 'tool',
908
- name: `Tool: ${toolName}`,
909
- startedAt: t,
910
- endedAt: toolEndedAt,
911
- input: JSON.stringify(toolInput),
912
- output: toolOutput,
913
- attributes: {
914
- 'agentic.event.category': 'tool',
915
- 'langfuse.observation.type': 'tool',
916
- ...sharedAttrs,
917
- 'gen_ai.tool.name': toolName,
918
- 'gen_ai.tool.call.id': toolId,
919
- 'gen_ai.operation.name': 'execute_tool',
920
- ...modelAttributes(assistant.model),
921
- ...usageAttributes(assistant.usage),
922
- },
923
- });
924
- }
925
- }
926
- }
927
- const turnInput = String(events[0].input ?? '').trim();
928
- const turnOutput = String(events[0].output ?? '').trim();
929
- if (!turnInput && !turnOutput && events.length === 1) {
930
- return [];
931
- }
932
- events[0].attributes['agentic.subagent.calls'] = events.filter((e) => e.category === 'agent_task').length;
933
- return events;
934
- }
935
- export function groupByTurns(messages) {
936
- const turns = [];
937
- let current = [];
938
- for (const msg of messages) {
939
- if (isPromptUserMessage(msg)) {
940
- if (current.length > 0)
941
- turns.push(current);
942
- current = [msg];
943
- continue;
944
- }
945
- if (current.length > 0)
946
- current.push(msg);
947
- }
948
- if (current.length > 0)
949
- turns.push(current);
950
- return turns;
951
- }
952
- export function normalizeTranscriptMessages({ runtime, projectId, sessionId, messages }) {
953
- const turns = groupByTurns(messages);
954
- const events = [];
955
- let previousInput = '';
956
- let previousHash = '';
957
- let hasPreviousTurn = false;
958
- let previousEstimatedTotal = 0;
959
- let recentSourceKindsWindow = [];
960
- let calibrationState = { ema: 1, sampleCount: 0 };
961
- for (let i = 0; i < turns.length; i += 1) {
962
- const turnEvents = createTurn(turns[i], `turn-${i + 1}`, projectId, sessionId, runtime);
963
- if (!turnEvents.length)
964
- continue;
965
- const turnEvent = turnEvents[0];
966
- const input = String(turnEvent.input ?? '');
967
- const inputHash = hashText(input);
968
- if (!hasPreviousTurn) {
969
- turnEvent.attributes['agentic.turn.input.hash'] = inputHash;
970
- turnEvent.attributes['agentic.turn.input.prev_hash'] = '';
971
- turnEvent.attributes['agentic.turn.diff.char_delta'] = 0;
972
- turnEvent.attributes['agentic.turn.diff.line_delta'] = 0;
973
- turnEvent.attributes['agentic.turn.diff.similarity'] = 1;
974
- turnEvent.attributes['agentic.turn.diff.changed'] = false;
975
- hasPreviousTurn = true;
976
- }
977
- else {
978
- turnEvent.attributes['agentic.turn.input.hash'] = inputHash;
979
- turnEvent.attributes['agentic.turn.input.prev_hash'] = previousHash;
980
- turnEvent.attributes['agentic.turn.diff.char_delta'] = input.length - previousInput.length;
981
- turnEvent.attributes['agentic.turn.diff.line_delta'] = lineCount(input) - lineCount(previousInput);
982
- turnEvent.attributes['agentic.turn.diff.similarity'] = similarityScore(previousInput, input);
983
- turnEvent.attributes['agentic.turn.diff.changed'] = inputHash !== previousHash;
984
- }
985
- previousInput = input;
986
- previousHash = inputHash;
987
- events.push(...turnEvents);
988
- if (CONTEXT_PARSING_ENABLED && CONTEXT_ENABLED_RUNTIMES.has(runtime)) {
989
- const contextResult = composeContextEvents({
990
- runtime,
991
- projectId,
992
- sessionId,
993
- turnEvent,
994
- turnEvents,
995
- previousEstimatedTotal,
996
- recentSourceKindsWindow,
997
- calibrationState,
998
- });
999
- events.push(...contextResult.events);
1000
- previousEstimatedTotal = contextResult.estimatedTotalTokens;
1001
- recentSourceKindsWindow = contextResult.recentSourceKindsWindow;
1002
- calibrationState = contextResult.calibrationState;
1003
- }
1004
- }
1005
- return events;
1006
- }
1007
- export function parseProjectIdFromTranscriptPath(transcriptPath, marker) {
1008
- if (!transcriptPath)
1009
- return undefined;
1010
- const normalized = transcriptPath.replace(/\\/g, '/');
1011
- const idx = normalized.indexOf(marker);
1012
- if (idx === -1)
1013
- return undefined;
1014
- const rest = normalized.slice(idx + marker.length);
1015
- return rest.split('/')[0] || undefined;
1016
- }