@respan/cli 0.4.1 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -81,6 +81,38 @@ def save_state(state: Dict[str, Any]) -> None:
81
81
  STATE_FILE.write_text(json.dumps(state, indent=2), encoding="utf-8")
82
82
 
83
83
 
84
+ # Known config keys in respan.json that map to span fields.
85
+ # Anything else is treated as a custom property (goes into metadata).
86
+ KNOWN_CONFIG_KEYS = {"customer_id", "span_name", "workflow_name"}
87
+
88
+
89
+ def load_respan_config(cwd: str) -> Dict[str, Any]:
90
+ """Load .claude/respan.json from the project directory.
91
+
92
+ Returns a dict with two keys:
93
+ - "fields": known span fields (customer_id, span_name, workflow_name)
94
+ - "properties": everything else (custom properties → metadata)
95
+ """
96
+ config_path = Path(cwd) / ".claude" / "respan.json"
97
+ if not config_path.exists():
98
+ return {"fields": {}, "properties": {}}
99
+ try:
100
+ raw = json.loads(config_path.read_text(encoding="utf-8"))
101
+ if not isinstance(raw, dict):
102
+ return {"fields": {}, "properties": {}}
103
+ fields = {}
104
+ properties = {}
105
+ for k, v in raw.items():
106
+ if k in KNOWN_CONFIG_KEYS:
107
+ fields[k] = v
108
+ else:
109
+ properties[k] = v
110
+ return {"fields": fields, "properties": properties}
111
+ except (json.JSONDecodeError, IOError) as e:
112
+ debug(f"Failed to load respan.json from {config_path}: {e}")
113
+ return {"fields": {}, "properties": {}}
114
+
115
+
84
116
  def get_content(msg: Dict[str, Any]) -> Any:
85
117
  """Extract content from a message."""
86
118
  if isinstance(msg, dict):
@@ -317,27 +349,33 @@ def create_respan_spans(
317
349
  user_msg: Dict[str, Any],
318
350
  assistant_msgs: List[Dict[str, Any]],
319
351
  tool_results: List[Dict[str, Any]],
352
+ config: Optional[Dict[str, Any]] = None,
320
353
  ) -> List[Dict[str, Any]]:
321
- """Create Respan span logs for a single turn with all available metadata."""
354
+ """Create Respan span logs for a single turn with all available metadata.
355
+
356
+ Produces a proper span tree so that the Respan UI renders nested children:
357
+ Root (agent container)
358
+ ├── claude.chat (generation – carries model, tokens, messages)
359
+ ├── Thinking 1 (generation, if extended thinking is present)
360
+ ├── Tool: Read (tool, if tool use occurred)
361
+ └── Tool: Write (tool, if tool use occurred)
362
+ """
322
363
  spans = []
323
-
324
- # Extract user text and timestamp
364
+
365
+ # ------------------------------------------------------------------
366
+ # 1. Extract data from the transcript messages
367
+ # ------------------------------------------------------------------
325
368
  user_text = get_text_content(user_msg)
326
369
  user_timestamp = user_msg.get("timestamp")
327
370
  user_time = parse_timestamp(user_timestamp) if user_timestamp else None
328
-
329
- # Extract assistant text from ALL messages in the turn (tool-using turns
330
- # have multiple assistant messages: text before tool, then text after).
371
+
372
+ # Collect assistant text across all messages in the turn
331
373
  final_output = ""
332
- first_assistant_msg = None
333
374
  if assistant_msgs:
334
375
  text_parts = [get_text_content(m) for m in assistant_msgs]
335
376
  final_output = "\n".join(p for p in text_parts if p)
336
- first_assistant_msg = assistant_msgs[0]
337
-
338
- # Get model, usage, and timing info from assistant messages.
339
- # For tool-using turns there are multiple assistant messages (multiple API
340
- # calls), so we aggregate usage and take the *last* timestamp as end time.
377
+
378
+ # Aggregate model, usage, timing from (possibly multiple) API calls
341
379
  model = "claude"
342
380
  usage = None
343
381
  request_id = None
@@ -360,7 +398,6 @@ def create_respan_spans(
360
398
  last_assistant_timestamp = ts
361
399
  last_assistant_time = parse_timestamp(ts)
362
400
 
363
- # Aggregate usage across all API calls in the turn
364
401
  msg_usage = msg_obj.get("usage")
365
402
  if msg_usage:
366
403
  if usage is None:
@@ -371,187 +408,187 @@ def create_respan_spans(
371
408
  "cache_read_input_tokens"):
372
409
  if key in msg_usage:
373
410
  usage[key] = usage.get(key, 0) + msg_usage[key]
374
- # Keep last service_tier
375
411
  if "service_tier" in msg_usage:
376
412
  usage["service_tier"] = msg_usage["service_tier"]
377
413
 
378
- # Calculate timing
379
- start_time_str = user_timestamp or first_assistant_timestamp or datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
380
- timestamp_str = last_assistant_timestamp or first_assistant_timestamp or datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
414
+ # Timing
415
+ now_str = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
416
+ start_time_str = user_timestamp or first_assistant_timestamp or now_str
417
+ timestamp_str = last_assistant_timestamp or first_assistant_timestamp or now_str
381
418
 
382
- # Calculate latency from user message to final assistant response
383
419
  latency = None
384
420
  if user_time and last_assistant_time:
385
421
  latency = (last_assistant_time - user_time).total_seconds()
386
-
387
- # Extract messages for chat span
388
- prompt_messages = []
422
+
423
+ # Messages
424
+ prompt_messages: List[Dict[str, Any]] = []
389
425
  if user_text:
390
426
  prompt_messages.append({"role": "user", "content": user_text})
391
-
392
- completion_message = None
427
+ completion_message: Optional[Dict[str, Any]] = None
393
428
  if final_output:
394
429
  completion_message = {"role": "assistant", "content": final_output}
395
-
396
- # Create trace ID for this turn
430
+
431
+ # IDs respan.json fields, then env var overrides
432
+ cfg_fields = (config or {}).get("fields", {})
433
+ cfg_props = (config or {}).get("properties", {})
434
+
397
435
  trace_unique_id = f"{session_id}_turn_{turn_num}"
398
-
399
- # Naming: human-readable workflow + span names
400
- workflow_name = "claude-code"
401
- # Use first ~60 chars of user message as span name for readability
402
- user_preview = (user_text[:60] + "...") if user_text and len(user_text) > 60 else (user_text or f"turn_{turn_num}")
403
- root_span_name = f"Turn {turn_num}: {user_preview}"
436
+ workflow_name = os.environ.get("RESPAN_WORKFLOW_NAME") or cfg_fields.get("workflow_name") or "claude-code"
437
+ root_span_name = os.environ.get("RESPAN_SPAN_NAME") or cfg_fields.get("span_name") or "claude-code"
404
438
  thread_id = f"claudecode_{session_id}"
405
-
406
- # Build metadata with additional info
407
- metadata = {
408
- "claude_code_turn": turn_num,
409
- }
439
+ customer_id = os.environ.get("RESPAN_CUSTOMER_ID") or cfg_fields.get("customer_id") or ""
440
+
441
+ # Metadata — custom properties from respan.json, then env overrides
442
+ metadata: Dict[str, Any] = {"claude_code_turn": turn_num}
443
+ if cfg_props:
444
+ metadata.update(cfg_props)
410
445
  if request_id:
411
446
  metadata["request_id"] = request_id
412
447
  if stop_reason:
413
448
  metadata["stop_reason"] = stop_reason
414
-
415
- # Merge user-provided metadata from env var
416
449
  env_metadata = os.environ.get("RESPAN_METADATA")
417
450
  if env_metadata:
418
451
  try:
419
452
  extra = json.loads(env_metadata)
420
453
  if isinstance(extra, dict):
421
454
  metadata.update(extra)
422
- else:
423
- debug("RESPAN_METADATA is not a JSON object, skipping")
424
- except json.JSONDecodeError as e:
425
- debug(f"Invalid JSON in RESPAN_METADATA, skipping: {e}")
426
-
427
- # Build usage object with cache details
428
- usage_obj = None
455
+ except json.JSONDecodeError:
456
+ pass
457
+
458
+ # Usage
459
+ usage_fields: Dict[str, Any] = {}
429
460
  if usage:
430
- usage_obj = {
431
- "prompt_tokens": usage.get("input_tokens", 0),
432
- "completion_tokens": usage.get("output_tokens", 0),
433
- }
434
- total_tokens = usage_obj["prompt_tokens"] + usage_obj["completion_tokens"]
435
- if total_tokens > 0:
436
- usage_obj["total_tokens"] = total_tokens
437
-
438
- # Add cache details
439
- prompt_tokens_details = {}
461
+ prompt_tokens = usage.get("input_tokens", 0)
462
+ completion_tokens = usage.get("output_tokens", 0)
463
+ usage_fields["prompt_tokens"] = prompt_tokens
464
+ usage_fields["completion_tokens"] = completion_tokens
465
+ total = prompt_tokens + completion_tokens
466
+ if total > 0:
467
+ usage_fields["total_tokens"] = total
440
468
  cache_creation = usage.get("cache_creation_input_tokens", 0)
441
469
  cache_read = usage.get("cache_read_input_tokens", 0)
470
+ if cache_creation > 0:
471
+ usage_fields["cache_creation_prompt_tokens"] = cache_creation
472
+ prompt_tokens_details: Dict[str, int] = {}
442
473
  if cache_creation > 0:
443
474
  prompt_tokens_details["cache_creation_tokens"] = cache_creation
444
- usage_obj["cache_creation_prompt_tokens"] = cache_creation
445
475
  if cache_read > 0:
446
476
  prompt_tokens_details["cached_tokens"] = cache_read
447
-
448
477
  if prompt_tokens_details:
449
- usage_obj["prompt_tokens_details"] = prompt_tokens_details
450
-
451
- # Add service tier to metadata
478
+ usage_fields["prompt_tokens_details"] = prompt_tokens_details
452
479
  service_tier = usage.get("service_tier")
453
480
  if service_tier:
454
481
  metadata["service_tier"] = service_tier
455
-
456
- # Create chat span (root)
457
- chat_span_id = f"claudecode_{trace_unique_id}_chat"
458
- customer_id = os.environ.get("RESPAN_CUSTOMER_ID", "claude-code")
459
- chat_span = {
482
+
483
+ # ------------------------------------------------------------------
484
+ # 2. Root span – pure agent container (no model / token info)
485
+ # ------------------------------------------------------------------
486
+ root_span_id = f"claudecode_{trace_unique_id}_root"
487
+ root_span: Dict[str, Any] = {
460
488
  "trace_unique_id": trace_unique_id,
461
489
  "thread_identifier": thread_id,
462
490
  "customer_identifier": customer_id,
463
- "span_unique_id": chat_span_id,
464
- "span_parent_id": None,
491
+ "span_unique_id": root_span_id,
465
492
  "span_name": root_span_name,
466
493
  "span_workflow_name": workflow_name,
467
- "log_type": "agent",
494
+ "model": model,
495
+ "span_path": "",
468
496
  "input": json.dumps(prompt_messages) if prompt_messages else "",
469
497
  "output": json.dumps(completion_message) if completion_message else "",
470
- "prompt_messages": prompt_messages,
471
- "completion_message": completion_message,
472
- "model": model,
473
498
  "timestamp": timestamp_str,
474
499
  "start_time": start_time_str,
475
500
  "metadata": metadata,
476
501
  }
477
-
478
- # Add usage if available
479
- if usage_obj:
480
- chat_span["prompt_tokens"] = usage_obj["prompt_tokens"]
481
- chat_span["completion_tokens"] = usage_obj["completion_tokens"]
482
- if "total_tokens" in usage_obj:
483
- chat_span["total_tokens"] = usage_obj["total_tokens"]
484
- if "cache_creation_prompt_tokens" in usage_obj:
485
- chat_span["cache_creation_prompt_tokens"] = usage_obj["cache_creation_prompt_tokens"]
486
- if "prompt_tokens_details" in usage_obj:
487
- chat_span["prompt_tokens_details"] = usage_obj["prompt_tokens_details"]
488
-
489
- # Add latency if calculated
490
502
  if latency is not None:
491
- chat_span["latency"] = latency
492
-
493
- spans.append(chat_span)
494
-
495
- # Extract thinking blocks and create spans for them
496
- thinking_spans = []
497
- for idx, assistant_msg in enumerate(assistant_msgs):
498
- if isinstance(assistant_msg, dict) and "message" in assistant_msg:
499
- content = assistant_msg["message"].get("content", [])
500
- if isinstance(content, list):
501
- for item in content:
502
- if isinstance(item, dict) and item.get("type") == "thinking":
503
- thinking_text = item.get("thinking", "")
504
- if thinking_text:
505
- thinking_span_id = f"claudecode_{trace_unique_id}_thinking_{len(thinking_spans) + 1}"
506
- thinking_timestamp = assistant_msg.get("timestamp", timestamp_str)
507
- thinking_spans.append({
508
- "trace_unique_id": trace_unique_id,
509
- "span_unique_id": thinking_span_id,
510
- "span_parent_id": chat_span_id,
511
- "span_name": f"Thinking {len(thinking_spans) + 1}",
512
- "span_workflow_name": workflow_name,
513
- "log_type": "generation",
514
- "input": "",
515
- "output": thinking_text,
516
- "timestamp": thinking_timestamp,
517
- "start_time": thinking_timestamp,
518
- })
519
-
520
- spans.extend(thinking_spans)
521
-
522
- # Collect all tool calls and results with metadata
523
- tool_call_map = {}
503
+ root_span["latency"] = latency
504
+ spans.append(root_span)
505
+
506
+ # ------------------------------------------------------------------
507
+ # 3. LLM generation child span (always created → every turn has ≥1 child)
508
+ # ------------------------------------------------------------------
509
+ gen_span_id = f"claudecode_{trace_unique_id}_gen"
510
+ gen_start = first_assistant_timestamp or start_time_str
511
+ gen_end = last_assistant_timestamp or timestamp_str
512
+ gen_latency = None
513
+ gen_start_dt = parse_timestamp(gen_start) if gen_start else None
514
+ gen_end_dt = parse_timestamp(gen_end) if gen_end else None
515
+ if gen_start_dt and gen_end_dt:
516
+ gen_latency = (gen_end_dt - gen_start_dt).total_seconds()
517
+
518
+ gen_span: Dict[str, Any] = {
519
+ "trace_unique_id": trace_unique_id,
520
+ "span_unique_id": gen_span_id,
521
+ "span_parent_id": root_span_id,
522
+ "span_name": "claude.chat",
523
+ "span_workflow_name": workflow_name,
524
+ "span_path": "claude_chat",
525
+ "model": model,
526
+ "provider_id": "anthropic",
527
+ "input": json.dumps(prompt_messages) if prompt_messages else "",
528
+ "output": json.dumps(completion_message) if completion_message else "",
529
+ "prompt_messages": prompt_messages,
530
+ "completion_message": completion_message,
531
+ "timestamp": gen_end,
532
+ "start_time": gen_start,
533
+ }
534
+ if gen_latency is not None:
535
+ gen_span["latency"] = gen_latency
536
+ gen_span.update(usage_fields)
537
+ spans.append(gen_span)
538
+
539
+ # ------------------------------------------------------------------
540
+ # 4. Thinking child spans
541
+ # ------------------------------------------------------------------
542
+ thinking_num = 0
524
543
  for assistant_msg in assistant_msgs:
525
- tool_calls = get_tool_calls(assistant_msg)
526
- for tool_call in tool_calls:
527
- tool_name = tool_call.get("name", "unknown")
528
- tool_input = tool_call.get("input", {})
544
+ if not (isinstance(assistant_msg, dict) and "message" in assistant_msg):
545
+ continue
546
+ content = assistant_msg["message"].get("content", [])
547
+ if not isinstance(content, list):
548
+ continue
549
+ for item in content:
550
+ if isinstance(item, dict) and item.get("type") == "thinking":
551
+ thinking_text = item.get("thinking", "")
552
+ if not thinking_text:
553
+ continue
554
+ thinking_num += 1
555
+ thinking_ts = assistant_msg.get("timestamp", timestamp_str)
556
+ spans.append({
557
+ "trace_unique_id": trace_unique_id,
558
+ "span_unique_id": f"claudecode_{trace_unique_id}_thinking_{thinking_num}",
559
+ "span_parent_id": root_span_id,
560
+ "span_name": f"Thinking {thinking_num}",
561
+ "span_workflow_name": workflow_name,
562
+ "span_path": "thinking",
563
+ "input": "",
564
+ "output": thinking_text,
565
+ "timestamp": thinking_ts,
566
+ "start_time": thinking_ts,
567
+ })
568
+
569
+ # ------------------------------------------------------------------
570
+ # 5. Tool child spans
571
+ # ------------------------------------------------------------------
572
+ tool_call_map: Dict[str, Dict[str, Any]] = {}
573
+ for assistant_msg in assistant_msgs:
574
+ for tool_call in get_tool_calls(assistant_msg):
529
575
  tool_id = tool_call.get("id", "")
530
576
  tool_call_map[tool_id] = {
531
- "name": tool_name,
532
- "input": tool_input,
577
+ "name": tool_call.get("name", "unknown"),
578
+ "input": tool_call.get("input", {}),
533
579
  "id": tool_id,
534
580
  "timestamp": assistant_msg.get("timestamp") if isinstance(assistant_msg, dict) else None,
535
581
  }
536
-
537
- # Find matching tool results with metadata
582
+
538
583
  for tr in tool_results:
539
584
  tr_content = get_content(tr)
540
- tool_result_metadata = {}
541
-
542
- # Extract tool result metadata
585
+ tool_result_metadata: Dict[str, Any] = {}
543
586
  if isinstance(tr, dict):
544
- tool_use_result = tr.get("toolUseResult", {})
545
- if tool_use_result:
546
- if "durationMs" in tool_use_result:
547
- tool_result_metadata["duration_ms"] = tool_use_result["durationMs"]
548
- if "numFiles" in tool_use_result:
549
- tool_result_metadata["num_files"] = tool_use_result["numFiles"]
550
- if "filenames" in tool_use_result:
551
- tool_result_metadata["filenames"] = tool_use_result["filenames"]
552
- if "truncated" in tool_use_result:
553
- tool_result_metadata["truncated"] = tool_use_result["truncated"]
554
-
587
+ tur = tr.get("toolUseResult") or {}
588
+ for src, dst in [("durationMs", "duration_ms"), ("numFiles", "num_files"),
589
+ ("filenames", "filenames"), ("truncated", "truncated")]:
590
+ if src in tur:
591
+ tool_result_metadata[dst] = tur[src]
555
592
  if isinstance(tr_content, list):
556
593
  for item in tr_content:
557
594
  if isinstance(item, dict) and item.get("type") == "tool_result":
@@ -560,44 +597,51 @@ def create_respan_spans(
560
597
  tool_call_map[tool_use_id]["output"] = item.get("content")
561
598
  tool_call_map[tool_use_id]["result_metadata"] = tool_result_metadata
562
599
  tool_call_map[tool_use_id]["result_timestamp"] = tr.get("timestamp")
563
-
564
- # Create tool spans (children)
600
+
565
601
  tool_num = 0
566
- for tool_id, tool_data in tool_call_map.items():
602
+ for tool_id, td in tool_call_map.items():
567
603
  tool_num += 1
568
- tool_span_id = f"claudecode_{trace_unique_id}_tool_{tool_num}"
569
-
570
- # Use tool result timestamp if available, otherwise use tool call timestamp
571
- tool_timestamp = tool_data.get("result_timestamp") or tool_data.get("timestamp") or timestamp_str
572
- tool_start_time = tool_data.get("timestamp") or start_time_str
573
-
574
- # Format input and output for better readability
575
- formatted_input = format_tool_input(tool_data['name'], tool_data["input"])
576
- formatted_output = format_tool_output(tool_data['name'], tool_data.get("output"))
577
-
578
- tool_span = {
604
+ tool_ts = td.get("result_timestamp") or td.get("timestamp") or timestamp_str
605
+ tool_start = td.get("timestamp") or start_time_str
606
+ tool_span: Dict[str, Any] = {
579
607
  "trace_unique_id": trace_unique_id,
580
- "span_unique_id": tool_span_id,
581
- "span_parent_id": chat_span_id,
582
- "span_name": f"Tool: {tool_data['name']}",
608
+ "span_unique_id": f"claudecode_{trace_unique_id}_tool_{tool_num}",
609
+ "span_parent_id": root_span_id,
610
+ "span_name": f"Tool: {td['name']}",
583
611
  "span_workflow_name": workflow_name,
584
- "log_type": "tool",
585
- "input": formatted_input,
586
- "output": formatted_output,
587
- "timestamp": tool_timestamp,
588
- "start_time": tool_start_time,
612
+ "span_path": f"tool_{td['name'].lower()}",
613
+ "input": format_tool_input(td["name"], td["input"]),
614
+ "output": format_tool_output(td["name"], td.get("output")),
615
+ "timestamp": tool_ts,
616
+ "start_time": tool_start,
589
617
  }
590
-
591
- # Add tool result metadata if available
592
- if tool_data.get("result_metadata"):
593
- tool_span["metadata"] = tool_data["result_metadata"]
594
- # Calculate latency if duration_ms is available
595
- duration_ms = tool_data["result_metadata"].get("duration_ms")
618
+ if td.get("result_metadata"):
619
+ tool_span["metadata"] = td["result_metadata"]
620
+ duration_ms = td["result_metadata"].get("duration_ms")
596
621
  if duration_ms:
597
- tool_span["latency"] = duration_ms / 1000.0 # Convert ms to seconds
598
-
622
+ tool_span["latency"] = duration_ms / 1000.0
599
623
  spans.append(tool_span)
600
-
624
+
625
+ # Add required Respan platform fields to every span.
626
+ # The backend expects these on all spans (per official SDK examples).
627
+ respan_defaults = {
628
+ "warnings": "",
629
+ "encoding_format": "float",
630
+ "disable_fallback": False,
631
+ "respan_params": {
632
+ "has_webhook": False,
633
+ "environment": os.environ.get("RESPAN_ENVIRONMENT", "prod"),
634
+ },
635
+ "field_name": "data: ",
636
+ "delimiter": "\n\n",
637
+ "disable_log": False,
638
+ "request_breakdown": False,
639
+ }
640
+ for span in spans:
641
+ for key, value in respan_defaults.items():
642
+ if key not in span:
643
+ span[key] = value
644
+
601
645
  return spans
602
646
 
603
647
 
@@ -607,36 +651,49 @@ def send_spans(
607
651
  base_url: str,
608
652
  turn_num: int,
609
653
  ) -> None:
610
- """Send spans to Respan with timeout and one retry on transient errors."""
654
+ """Send spans to Respan as a single batch (matches official SDK behaviour).
655
+
656
+ The official Respan tracing SDK sends all spans for a trace in one
657
+ POST request to ``/v1/traces/ingest``. We do the same here, with
658
+ simple retry logic for transient server errors.
659
+ """
611
660
  url = f"{base_url}/v1/traces/ingest"
612
661
  headers = {"Authorization": f"Bearer {api_key}"}
613
662
 
614
- for attempt in range(2):
663
+ span_names = [s.get("span_name", "?") for s in spans]
664
+ payload_json = json.dumps(spans)
665
+ payload_size = len(payload_json)
666
+ debug(f"Sending {len(spans)} spans ({payload_size} bytes) for turn {turn_num}: {span_names}")
667
+ if DEBUG:
668
+ debug_file = LOG_FILE.parent / f"respan_spans_turn_{turn_num}.json"
669
+ debug_file.write_text(payload_json, encoding="utf-8")
670
+ debug(f"Dumped spans to {debug_file}")
671
+
672
+ for attempt in range(3):
615
673
  try:
616
674
  response = requests.post(url, json=spans, headers=headers, timeout=30)
617
675
  if response.status_code < 400:
618
- debug(f"Sent {len(spans)} spans for turn {turn_num}")
676
+ resp_text = response.text[:300] if response.text else ""
677
+ debug(f"Sent {len(spans)} spans for turn {turn_num} "
678
+ f"(attempt {attempt + 1}): {resp_text}")
619
679
  return
620
680
  if response.status_code < 500:
621
- # 4xx not retryable
622
- log("ERROR", f"Failed to send spans for turn {turn_num}: HTTP {response.status_code}")
681
+ log("ERROR", f"Spans rejected for turn {turn_num}: "
682
+ f"HTTP {response.status_code} - {response.text[:200]}")
623
683
  return
624
- # 5xx — retryable
625
- if attempt == 0:
626
- debug(f"Server error {response.status_code} for turn {turn_num}, retrying...")
627
- time.sleep(1)
628
- continue
629
- log("ERROR", f"Failed to send spans for turn {turn_num} after retry: HTTP {response.status_code}")
630
- except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
631
- if attempt == 0:
632
- debug(f"Transient error for turn {turn_num}: {e}, retrying...")
633
- time.sleep(1)
634
- continue
635
- log("ERROR", f"Failed to send spans for turn {turn_num} after retry: {e}")
684
+ # 5xx — retry after short delay
685
+ debug(f"Server error for turn {turn_num} "
686
+ f"(attempt {attempt + 1}), retrying...")
687
+ time.sleep(1.0)
688
+ except (requests.exceptions.Timeout, requests.exceptions.ConnectionError):
689
+ time.sleep(1.0)
636
690
  except Exception as e:
637
691
  log("ERROR", f"Failed to send spans for turn {turn_num}: {e}")
638
692
  return
639
693
 
694
+ log("ERROR", f"Failed to send {len(spans)} spans for turn {turn_num} "
695
+ f"after 3 attempts")
696
+
640
697
 
641
698
  def process_transcript(
642
699
  session_id: str,
@@ -644,6 +701,7 @@ def process_transcript(
644
701
  state: Dict[str, Any],
645
702
  api_key: str,
646
703
  base_url: str,
704
+ config: Optional[Dict[str, Any]] = None,
647
705
  ) -> int:
648
706
  """Process a transcript file and create traces for new turns."""
649
707
  # Get previous state for this session
@@ -693,7 +751,8 @@ def process_transcript(
693
751
  turns_processed += 1
694
752
  turn_num = turn_count + turns_processed
695
753
  spans = create_respan_spans(
696
- session_id, turn_num, current_user, current_assistants, current_tool_results
754
+ session_id, turn_num, current_user, current_assistants, current_tool_results,
755
+ config=config,
697
756
  )
698
757
  send_spans(spans, api_key, base_url, turn_num)
699
758
  last_committed_line = total_lines # safe default, refined below
@@ -755,8 +814,17 @@ def process_transcript(
755
814
  current_assistants.append(merged)
756
815
 
757
816
  if current_user and current_assistants:
758
- _commit_turn()
759
- last_committed_line = total_lines
817
+ # Check if the turn has actual text output. The Stop hook can fire
818
+ # before the final assistant text block is flushed to disk, leaving
819
+ # only thinking/tool_use blocks. If no text content is found, treat
820
+ # the turn as incomplete so the retry logic re-reads it.
821
+ has_text = any(get_text_content(m) for m in current_assistants)
822
+ if has_text:
823
+ _commit_turn()
824
+ last_committed_line = total_lines
825
+ else:
826
+ last_committed_line = current_user_line
827
+ debug(f"Turn has assistant msgs but no text output yet (likely not flushed), will retry")
760
828
  else:
761
829
  # Incomplete turn — rewind so the next run re-reads from the
762
830
  # unmatched user message (or from where we left off if no user).
@@ -869,13 +937,32 @@ def main():
869
937
  debug("Tracing disabled (TRACE_TO_RESPAN != true)")
870
938
  sys.exit(0)
871
939
 
872
- # Check for required environment variables
940
+ # Resolve API key: env var > ~/.config/respan/credentials.json
873
941
  api_key = os.getenv("RESPAN_API_KEY")
874
- # Default: api.respan.ai | Enterprise: endpoint.respan.ai (set RESPAN_BASE_URL)
875
942
  base_url = os.getenv("RESPAN_BASE_URL", "https://api.respan.ai/api")
876
943
 
877
944
  if not api_key:
878
- log("ERROR", "Respan API key not set (RESPAN_API_KEY)")
945
+ creds_file = Path.home() / ".config" / "respan" / "credentials.json"
946
+ if creds_file.exists():
947
+ try:
948
+ creds = json.loads(creds_file.read_text(encoding="utf-8"))
949
+ # Find the active profile's credential
950
+ config_file = Path.home() / ".config" / "respan" / "config.json"
951
+ profile = "default"
952
+ if config_file.exists():
953
+ cfg = json.loads(config_file.read_text(encoding="utf-8"))
954
+ profile = cfg.get("activeProfile", "default")
955
+ cred = creds.get(profile, {})
956
+ api_key = cred.get("apiKey") or cred.get("accessToken")
957
+ if not base_url or base_url == "https://api.respan.ai/api":
958
+ base_url = cred.get("baseUrl", base_url)
959
+ if api_key:
960
+ debug(f"Using API key from credentials.json (profile: {profile})")
961
+ except (json.JSONDecodeError, IOError) as e:
962
+ debug(f"Failed to read credentials.json: {e}")
963
+
964
+ if not api_key:
965
+ log("ERROR", "No API key found. Run: respan auth login")
879
966
  sys.exit(0)
880
967
 
881
968
  # Try stdin payload first, fall back to filesystem scan
@@ -894,11 +981,49 @@ def main():
894
981
 
895
982
  debug(f"Processing session: {session_id}")
896
983
 
897
- # Process the transcript under file lock
984
+ # Load respan.json config from the project directory.
985
+ # Extract the project CWD from the first user message in the transcript.
986
+ config: Dict[str, Any] = {"fields": {}, "properties": {}}
898
987
  try:
899
- with state_lock():
900
- state = load_state()
901
- turns = process_transcript(session_id, transcript_file, state, api_key, base_url)
988
+ first_line = transcript_file.read_text(encoding="utf-8").split("\n")[0]
989
+ if first_line:
990
+ first_msg = json.loads(first_line)
991
+ cwd = first_msg.get("cwd")
992
+ if not cwd:
993
+ # Try second line (first is often file-history-snapshot)
994
+ lines = transcript_file.read_text(encoding="utf-8").split("\n")
995
+ for line in lines[:5]:
996
+ if line.strip():
997
+ msg = json.loads(line)
998
+ cwd = msg.get("cwd")
999
+ if cwd:
1000
+ break
1001
+ if cwd:
1002
+ config = load_respan_config(cwd)
1003
+ debug(f"Loaded respan.json config from {cwd}: {config}")
1004
+ except Exception as e:
1005
+ debug(f"Failed to extract CWD or load config: {e}")
1006
+
1007
+ # Process the transcript under file lock.
1008
+ # Retry up to 3 times with a short delay — the Stop hook can fire
1009
+ # before Claude Code finishes flushing the assistant response to
1010
+ # the transcript file, causing an incomplete turn on the first read.
1011
+ max_attempts = 3
1012
+ turns = 0
1013
+ try:
1014
+ for attempt in range(max_attempts):
1015
+ with state_lock():
1016
+ state = load_state()
1017
+ turns = process_transcript(session_id, transcript_file, state, api_key, base_url, config=config)
1018
+
1019
+ if turns > 0:
1020
+ break
1021
+
1022
+ if attempt < max_attempts - 1:
1023
+ delay = 0.5 * (attempt + 1)
1024
+ debug(f"No turns processed (attempt {attempt + 1}/{max_attempts}), "
1025
+ f"retrying in {delay}s...")
1026
+ time.sleep(delay)
902
1027
 
903
1028
  # Log execution time
904
1029
  duration = (datetime.now() - script_start).total_seconds()