@respan/cli 0.6.8 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -142,7 +142,7 @@ function detectModel(hookData) {
142
142
  return String(llmReq.model ?? '') || 'gemini-cli';
143
143
  }
144
144
  // ── Span construction ─────────────────────────────────────────────
145
- function buildSpans(hookData, outputText, tokens, config, startTimeIso, toolTurns, toolDetails, thoughtsTokens) {
145
+ function buildSpans(hookData, outputText, tokens, config, startTimeIso, toolTurns, toolDetails, thoughtsTokens, textRounds, roundStartTimes) {
146
146
  const spans = [];
147
147
  const sessionId = String(hookData.session_id ?? '');
148
148
  const model = detectModel(hookData);
@@ -151,21 +151,17 @@ function buildSpans(hookData, outputText, tokens, config, startTimeIso, toolTurn
151
151
  const beginTime = startTimeIso || endTime;
152
152
  const lat = latencySeconds(beginTime, endTime);
153
153
  const promptMessages = extractMessages(hookData);
154
- const completionMessage = { role: 'assistant', content: truncate(outputText, MAX_CHARS) };
155
154
  const { workflowName, spanName, customerId } = resolveSpanFields(config, {
156
155
  workflowName: 'gemini-cli',
157
156
  spanName: 'gemini-cli',
158
157
  });
159
158
  const safeId = sessionId.replace(/[/\\]/g, '_').slice(0, 50);
160
- // Use first chunk timestamp to differentiate turns within the same session
161
159
  const turnTs = beginTime.replace(/[^0-9]/g, '').slice(0, 14);
162
160
  const traceUniqueId = `gcli_${safeId}_${turnTs}`;
163
161
  const rootSpanId = `gcli_${safeId}_${turnTs}_root`;
164
162
  const threadId = `gcli_${sessionId}`;
165
- // LLM config
166
163
  const llmReq = (hookData.llm_request ?? {});
167
164
  const reqConfig = (llmReq.config ?? {});
168
- // Metadata
169
165
  const baseMeta = { source: 'gemini-cli' };
170
166
  if (toolTurns > 0)
171
167
  baseMeta.tool_turns = toolTurns;
@@ -190,55 +186,99 @@ function buildSpans(hookData, outputText, tokens, config, startTimeIso, toolTurn
190
186
  metadata,
191
187
  ...(lat !== undefined ? { latency: lat } : {}),
192
188
  });
193
- // Generation child span
194
- const genSpan = {
195
- trace_unique_id: traceUniqueId,
196
- span_unique_id: `gcli_${safeId}_${turnTs}_gen`,
197
- span_parent_id: rootSpanId,
198
- span_name: 'gemini.chat',
199
- span_workflow_name: workflowName,
200
- span_path: 'gemini_chat',
201
- model,
202
- provider_id: 'google',
203
- metadata: {},
204
- input: promptMessages.length ? JSON.stringify(promptMessages) : '',
205
- output: truncate(outputText, MAX_CHARS),
206
- timestamp: endTime,
207
- start_time: beginTime,
208
- prompt_tokens: tokens.prompt_tokens,
209
- completion_tokens: tokens.completion_tokens,
210
- total_tokens: tokens.total_tokens,
211
- ...(lat !== undefined ? { latency: lat } : {}),
212
- };
213
- if (reqConfig.temperature != null)
214
- genSpan.temperature = reqConfig.temperature;
215
- if (reqConfig.maxOutputTokens != null)
216
- genSpan.max_tokens = reqConfig.maxOutputTokens;
217
- spans.push(genSpan);
218
- // Reasoning span
219
- if (thoughtsTokens > 0) {
220
- spans.push({
221
- trace_unique_id: traceUniqueId,
222
- span_unique_id: `gcli_${safeId}_${turnTs}_reasoning`,
223
- span_parent_id: rootSpanId,
224
- span_name: 'Reasoning',
225
- span_workflow_name: workflowName,
226
- span_path: 'reasoning',
227
- provider_id: '',
228
- metadata: { reasoning_tokens: thoughtsTokens },
229
- input: '',
230
- output: `[Reasoning: ${thoughtsTokens} tokens]`,
231
- timestamp: endTime,
232
- start_time: beginTime,
233
- });
189
+ // Build interleaved LLM + Tool spans in chronological order.
190
+ // If we have text rounds, create one gemini.chat per round with tools between them.
191
+ // Otherwise fall back to a single gemini.chat span.
192
+ const rounds = textRounds.length > 0 ? textRounds : [outputText];
193
+ const roundStarts = roundStartTimes.length > 0 ? roundStartTimes : [beginTime];
194
+ let toolIdx = 0;
195
+ for (let r = 0; r < rounds.length; r++) {
196
+ const roundText = rounds[r];
197
+ const roundStart = roundStarts[r] || beginTime;
198
+ // Round end: next tool start, or endTime for last round
199
+ const nextTool = toolIdx < toolDetails.length ? toolDetails[toolIdx] : null;
200
+ const roundEnd = (r < rounds.length - 1 && nextTool?.start_time) ? nextTool.start_time : endTime;
201
+ const roundLat = latencySeconds(roundStart, roundEnd);
202
+ // LLM generation span for this round
203
+ if (roundText) {
204
+ const genSpan = {
205
+ trace_unique_id: traceUniqueId,
206
+ span_unique_id: `gcli_${safeId}_${turnTs}_gen_${r}`,
207
+ span_parent_id: rootSpanId,
208
+ span_name: 'gemini.chat',
209
+ span_workflow_name: workflowName,
210
+ span_path: 'gemini_chat',
211
+ model,
212
+ provider_id: 'google',
213
+ metadata: {},
214
+ input: r === 0 && promptMessages.length ? JSON.stringify(promptMessages) : '',
215
+ output: truncate(roundText, MAX_CHARS),
216
+ timestamp: roundEnd,
217
+ start_time: roundStart,
218
+ ...(roundLat !== undefined ? { latency: roundLat } : {}),
219
+ // Only attach tokens to the first round (aggregate usage from Gemini)
220
+ ...(r === 0 ? {
221
+ prompt_tokens: tokens.prompt_tokens,
222
+ completion_tokens: tokens.completion_tokens,
223
+ total_tokens: tokens.total_tokens,
224
+ } : {}),
225
+ };
226
+ if (r === 0) {
227
+ if (reqConfig.temperature != null)
228
+ genSpan.temperature = reqConfig.temperature;
229
+ if (reqConfig.maxOutputTokens != null)
230
+ genSpan.max_tokens = reqConfig.maxOutputTokens;
231
+ }
232
+ spans.push(genSpan);
233
+ }
234
+ // Tool spans that come after this round (before next round)
235
+ if (r < rounds.length - 1) {
236
+ // Emit all tools between this round and the next
237
+ while (toolIdx < toolDetails.length) {
238
+ const detail = toolDetails[toolIdx];
239
+ const toolName = detail?.name ?? '';
240
+ const toolArgs = detail?.args ?? detail?.input ?? {};
241
+ const toolOutput = detail?.output ?? '';
242
+ const displayName = toolName ? toolDisplayName(toolName) : `Call ${toolIdx + 1}`;
243
+ const toolInputStr = toolName ? formatToolInput(toolName, toolArgs) : '';
244
+ const toolMeta = {};
245
+ if (toolName)
246
+ toolMeta.tool_name = toolName;
247
+ if (detail?.error)
248
+ toolMeta.error = detail.error;
249
+ const toolStart = detail?.start_time ?? beginTime;
250
+ const toolEnd = detail?.end_time ?? endTime;
251
+ const toolLat = latencySeconds(toolStart, toolEnd);
252
+ spans.push({
253
+ trace_unique_id: traceUniqueId,
254
+ span_unique_id: `gcli_${safeId}_${turnTs}_tool_${toolIdx + 1}`,
255
+ span_parent_id: rootSpanId,
256
+ span_name: `Tool: ${displayName}`,
257
+ span_workflow_name: workflowName,
258
+ span_path: toolName ? `tool_${toolName}` : 'tool_call',
259
+ provider_id: '',
260
+ metadata: toolMeta,
261
+ input: toolInputStr,
262
+ output: truncate(toolOutput, MAX_CHARS),
263
+ timestamp: toolEnd,
264
+ start_time: toolStart,
265
+ ...(toolLat !== undefined ? { latency: toolLat } : {}),
266
+ });
267
+ toolIdx++;
268
+ // If next tool starts after next round's start time, break — it belongs to a later gap
269
+ const nextDetail = toolDetails[toolIdx];
270
+ if (nextDetail && roundStarts[r + 1] && nextDetail.start_time && nextDetail.start_time > roundStarts[r + 1])
271
+ break;
272
+ }
273
+ }
234
274
  }
235
- // Tool child spans
236
- for (let i = 0; i < toolTurns; i++) {
237
- const detail = toolDetails[i] ?? null;
275
+ // Any remaining tools not yet emitted (e.g. only one round but tools exist)
276
+ while (toolIdx < toolDetails.length) {
277
+ const detail = toolDetails[toolIdx];
238
278
  const toolName = detail?.name ?? '';
239
279
  const toolArgs = detail?.args ?? detail?.input ?? {};
240
280
  const toolOutput = detail?.output ?? '';
241
- const displayName = toolName ? toolDisplayName(toolName) : `Call ${i + 1}`;
281
+ const displayName = toolName ? toolDisplayName(toolName) : `Call ${toolIdx + 1}`;
242
282
  const toolInputStr = toolName ? formatToolInput(toolName, toolArgs) : '';
243
283
  const toolMeta = {};
244
284
  if (toolName)
@@ -250,7 +290,7 @@ function buildSpans(hookData, outputText, tokens, config, startTimeIso, toolTurn
250
290
  const toolLat = latencySeconds(toolStart, toolEnd);
251
291
  spans.push({
252
292
  trace_unique_id: traceUniqueId,
253
- span_unique_id: `gcli_${safeId}_${turnTs}_tool_${i + 1}`,
293
+ span_unique_id: `gcli_${safeId}_${turnTs}_tool_${toolIdx + 1}`,
254
294
  span_parent_id: rootSpanId,
255
295
  span_name: `Tool: ${displayName}`,
256
296
  span_workflow_name: workflowName,
@@ -263,6 +303,24 @@ function buildSpans(hookData, outputText, tokens, config, startTimeIso, toolTurn
263
303
  start_time: toolStart,
264
304
  ...(toolLat !== undefined ? { latency: toolLat } : {}),
265
305
  });
306
+ toolIdx++;
307
+ }
308
+ // Reasoning span
309
+ if (thoughtsTokens > 0) {
310
+ spans.push({
311
+ trace_unique_id: traceUniqueId,
312
+ span_unique_id: `gcli_${safeId}_${turnTs}_reasoning`,
313
+ span_parent_id: rootSpanId,
314
+ span_name: 'Reasoning',
315
+ span_workflow_name: workflowName,
316
+ span_path: 'reasoning',
317
+ provider_id: '',
318
+ metadata: { reasoning_tokens: thoughtsTokens },
319
+ input: '',
320
+ output: `[Reasoning: ${thoughtsTokens} tokens]`,
321
+ timestamp: endTime,
322
+ start_time: beginTime,
323
+ });
266
324
  }
267
325
  return addDefaultsToAll(spans);
268
326
  }
@@ -390,7 +448,6 @@ function processBeforeTool(hookData) {
390
448
  // Increment send_version to cancel any pending delayed sends —
391
449
  // the turn isn't done yet, a tool is about to execute.
392
450
  state.send_version = (state.send_version ?? 0) + 1;
393
- state.tool_turns = (state.tool_turns ?? 0) + 1;
394
451
  saveStreamState(sessionId, state);
395
452
  }
396
453
  function processAfterTool(hookData) {
@@ -482,11 +539,13 @@ function processChunk(hookData) {
482
539
  state.tool_turns = (state.tool_turns ?? 0) + 1;
483
540
  state.send_version = (state.send_version ?? 0) + 1;
484
541
  toolCallDetected = true;
485
- debug(`Tool call detected via msg_count (${savedMsgCount} ${currentMsgCount}), tool_turns=${state.tool_turns}`);
542
+ // Start a new text round after tool completes
543
+ state.current_round = (state.current_round ?? 0) + 1;
544
+ debug(`Tool call detected via msg_count (${savedMsgCount} → ${currentMsgCount}), tool_turns=${state.tool_turns}, round=${state.current_round}`);
486
545
  }
487
546
  }
488
547
  state.msg_count = currentMsgCount;
489
- // Accumulate text and grounding tool details
548
+ // Accumulate text into both total and per-round tracking
490
549
  if (chunkText) {
491
550
  if (!state.first_chunk_time)
492
551
  state.first_chunk_time = nowISO();
@@ -494,10 +553,21 @@ function processChunk(hookData) {
494
553
  state.last_tokens = completionTokens || state.last_tokens;
495
554
  if (thoughtsTokens > 0)
496
555
  state.thoughts_tokens = thoughtsTokens;
497
- }
498
- if (chunkText) {
556
+ // Track text per round
557
+ const round = state.current_round ?? 0;
558
+ if (!state.text_rounds)
559
+ state.text_rounds = [];
560
+ if (!state.round_start_times)
561
+ state.round_start_times = [];
562
+ while (state.text_rounds.length <= round)
563
+ state.text_rounds.push('');
564
+ while (state.round_start_times.length <= round)
565
+ state.round_start_times.push('');
566
+ state.text_rounds[round] += chunkText;
567
+ if (!state.round_start_times[round])
568
+ state.round_start_times[round] = nowISO();
499
569
  saveStreamState(sessionId, state);
500
- debug(`Accumulated chunk: +${chunkText.length} chars, total=${state.accumulated_text.length}`);
570
+ debug(`Accumulated chunk: +${chunkText.length} chars, total=${state.accumulated_text.length}, round=${round}`);
501
571
  }
502
572
  // Tool call in response parts
503
573
  const isToolTurn = hasToolCall || ['TOOL_CALLS', 'FUNCTION_CALL', 'TOOL_USE'].includes(finishReason);
@@ -537,7 +607,7 @@ function processChunk(hookData) {
537
607
  const finalTotal = Number(usage.totalTokenCount ?? 0) || (finalPrompt + finalCompletion);
538
608
  const tok = { prompt_tokens: finalPrompt, completion_tokens: finalCompletion, total_tokens: finalTotal };
539
609
  const config = loadRespanConfig(path.join(os.homedir(), '.gemini', 'respan.json'));
540
- const spans = buildSpans(hookData, state.accumulated_text, tok, config, state.first_chunk_time || undefined, state.tool_turns ?? 0, state.tool_details ?? [], state.thoughts_tokens ?? 0);
610
+ const spans = buildSpans(hookData, state.accumulated_text, tok, config, state.first_chunk_time || undefined, state.tool_turns ?? 0, state.tool_details ?? [], state.thoughts_tokens ?? 0, state.text_rounds ?? [], state.round_start_times ?? []);
541
611
  // Method b: text + STOP → send immediately
542
612
  if (isFinished && chunkText) {
543
613
  debug(`Immediate send (text+STOP, tool_turns=${state.tool_turns ?? 0}), ${state.accumulated_text.length} chars`);
@@ -553,66 +623,87 @@ function processChunk(hookData) {
553
623
  launchDelayedSend(sessionId, state.send_version, spans, creds.apiKey, creds.baseUrl);
554
624
  }
555
625
  // ── Main ──────────────────────────────────────────────────────────
556
- function mainWorker(raw) {
626
+ function processChunkInWorker(dataFile) {
557
627
  try {
628
+ const raw = fs.readFileSync(dataFile, 'utf-8');
629
+ fs.unlinkSync(dataFile);
558
630
  if (!raw.trim())
559
631
  return;
560
632
  const hookData = JSON.parse(raw);
561
- const event = String(hookData.hook_event_name ?? '');
562
633
  const unlock = acquireLock(LOCK_PATH);
563
634
  try {
564
- if (event === 'BeforeTool') {
565
- processBeforeTool(hookData);
566
- }
567
- else if (event === 'AfterTool') {
568
- processAfterTool(hookData);
569
- }
570
- else {
571
- processChunk(hookData);
572
- }
635
+ processChunk(hookData);
573
636
  }
574
637
  finally {
575
638
  unlock?.();
576
639
  }
577
640
  }
578
641
  catch (e) {
579
- if (e instanceof SyntaxError) {
580
- log('ERROR', `Invalid JSON from stdin: ${e}`);
581
- }
582
- else {
583
- log('ERROR', `Hook error: ${e}`);
642
+ log('ERROR', `Worker error: ${e}`);
643
+ try {
644
+ fs.unlinkSync(dataFile);
584
645
  }
646
+ catch { }
585
647
  }
586
648
  }
587
649
  function main() {
588
- // Worker mode: re-invoked as detached subprocess
650
+ // Worker mode: process chunk from temp file
589
651
  if (process.env._RESPAN_GEM_WORKER === '1') {
590
- const raw = process.env._RESPAN_GEM_DATA ?? '';
591
- mainWorker(raw);
652
+ const dataFile = process.env._RESPAN_GEM_FILE ?? '';
653
+ if (dataFile)
654
+ processChunkInWorker(dataFile);
592
655
  return;
593
656
  }
594
- // Read stdin synchronously, respond immediately, fork worker, exit
595
657
  let raw = '';
596
658
  try {
597
659
  raw = fs.readFileSync(0, 'utf-8');
598
660
  }
599
661
  catch { }
662
+ // Respond immediately so Gemini CLI doesn't block
600
663
  process.stdout.write('{}\n');
601
664
  if (!raw.trim()) {
602
665
  process.exit(0);
603
666
  }
604
667
  try {
605
- const scriptPath = __filename || process.argv[1];
606
- const child = execFile('node', [scriptPath], {
607
- env: { ...process.env, _RESPAN_GEM_WORKER: '1', _RESPAN_GEM_DATA: raw },
608
- stdio: 'ignore',
609
- detached: true,
610
- });
611
- child.unref();
668
+ const hookData = JSON.parse(raw);
669
+ const event = String(hookData.hook_event_name ?? '');
670
+ if (event === 'BeforeTool' || event === 'AfterTool') {
671
+ // Tool events are fast (just state updates) and must run in order.
672
+ // Process inline, don't fork.
673
+ const unlock = acquireLock(LOCK_PATH);
674
+ try {
675
+ if (event === 'BeforeTool')
676
+ processBeforeTool(hookData);
677
+ else
678
+ processAfterTool(hookData);
679
+ }
680
+ finally {
681
+ unlock?.();
682
+ }
683
+ }
684
+ else {
685
+ // AfterModel chunks: fork to background so Gemini CLI doesn't block.
686
+ // Write data to temp file (avoids env var size limits).
687
+ const dataFile = path.join(STATE_DIR, `respan_chunk_${process.pid}.json`);
688
+ fs.mkdirSync(STATE_DIR, { recursive: true });
689
+ fs.writeFileSync(dataFile, raw);
690
+ try {
691
+ const scriptPath = __filename || process.argv[1];
692
+ const child = execFile('node', [scriptPath], {
693
+ env: { ...process.env, _RESPAN_GEM_WORKER: '1', _RESPAN_GEM_FILE: dataFile },
694
+ stdio: 'ignore',
695
+ detached: true,
696
+ });
697
+ child.unref();
698
+ }
699
+ catch (e) {
700
+ // Fallback: run inline
701
+ processChunkInWorker(dataFile);
702
+ }
703
+ }
612
704
  }
613
705
  catch (e) {
614
- // Fallback: run inline
615
- mainWorker(raw);
706
+ log('ERROR', `Hook error: ${e}`);
616
707
  }
617
708
  process.exit(0);
618
709
  }
@@ -395,7 +395,7 @@ export function toOtlpPayload(spans) {
395
395
  }),
396
396
  },
397
397
  scopeSpans: [{
398
- scope: { name: 'respan-cli-hooks', version: '0.5.3' },
398
+ scope: { name: 'respan-cli-hooks', version: '0.7.0' },
399
399
  spans: otlpSpans,
400
400
  }],
401
401
  }],