@respan/cli 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -81,6 +81,38 @@ def save_state(state: Dict[str, Any]) -> None:
81
81
  STATE_FILE.write_text(json.dumps(state, indent=2), encoding="utf-8")
82
82
 
83
83
 
84
+ # Known config keys in respan.json that map to span fields.
85
+ # Anything else is treated as a custom property (goes into metadata).
86
+ KNOWN_CONFIG_KEYS = {"customer_id", "span_name", "workflow_name"}
87
+
88
+
89
+ def load_respan_config(cwd: str) -> Dict[str, Any]:
90
+ """Load .claude/respan.json from the project directory.
91
+
92
+ Returns a dict with two keys:
93
+ - "fields": known span fields (customer_id, span_name, workflow_name)
94
+ - "properties": everything else (custom properties → metadata)
95
+ """
96
+ config_path = Path(cwd) / ".claude" / "respan.json"
97
+ if not config_path.exists():
98
+ return {"fields": {}, "properties": {}}
99
+ try:
100
+ raw = json.loads(config_path.read_text(encoding="utf-8"))
101
+ if not isinstance(raw, dict):
102
+ return {"fields": {}, "properties": {}}
103
+ fields = {}
104
+ properties = {}
105
+ for k, v in raw.items():
106
+ if k in KNOWN_CONFIG_KEYS:
107
+ fields[k] = v
108
+ else:
109
+ properties[k] = v
110
+ return {"fields": fields, "properties": properties}
111
+ except (json.JSONDecodeError, IOError) as e:
112
+ debug(f"Failed to load respan.json from {config_path}: {e}")
113
+ return {"fields": {}, "properties": {}}
114
+
115
+
84
116
  def get_content(msg: Dict[str, Any]) -> Any:
85
117
  """Extract content from a message."""
86
118
  if isinstance(msg, dict):
@@ -317,27 +349,33 @@ def create_respan_spans(
317
349
  user_msg: Dict[str, Any],
318
350
  assistant_msgs: List[Dict[str, Any]],
319
351
  tool_results: List[Dict[str, Any]],
352
+ config: Optional[Dict[str, Any]] = None,
320
353
  ) -> List[Dict[str, Any]]:
321
- """Create Respan span logs for a single turn with all available metadata."""
354
+ """Create Respan span logs for a single turn with all available metadata.
355
+
356
+ Produces a proper span tree so that the Respan UI renders nested children:
357
+ Root (agent container)
358
+ ├── claude.chat (generation – carries model, tokens, messages)
359
+ ├── Thinking 1 (generation, if extended thinking is present)
360
+ ├── Tool: Read (tool, if tool use occurred)
361
+ └── Tool: Write (tool, if tool use occurred)
362
+ """
322
363
  spans = []
323
-
324
- # Extract user text and timestamp
364
+
365
+ # ------------------------------------------------------------------
366
+ # 1. Extract data from the transcript messages
367
+ # ------------------------------------------------------------------
325
368
  user_text = get_text_content(user_msg)
326
369
  user_timestamp = user_msg.get("timestamp")
327
370
  user_time = parse_timestamp(user_timestamp) if user_timestamp else None
328
-
329
- # Extract assistant text from ALL messages in the turn (tool-using turns
330
- # have multiple assistant messages: text before tool, then text after).
371
+
372
+ # Collect assistant text across all messages in the turn
331
373
  final_output = ""
332
- first_assistant_msg = None
333
374
  if assistant_msgs:
334
375
  text_parts = [get_text_content(m) for m in assistant_msgs]
335
376
  final_output = "\n".join(p for p in text_parts if p)
336
- first_assistant_msg = assistant_msgs[0]
337
-
338
- # Get model, usage, and timing info from assistant messages.
339
- # For tool-using turns there are multiple assistant messages (multiple API
340
- # calls), so we aggregate usage and take the *last* timestamp as end time.
377
+
378
+ # Aggregate model, usage, timing from (possibly multiple) API calls
341
379
  model = "claude"
342
380
  usage = None
343
381
  request_id = None
@@ -360,7 +398,6 @@ def create_respan_spans(
360
398
  last_assistant_timestamp = ts
361
399
  last_assistant_time = parse_timestamp(ts)
362
400
 
363
- # Aggregate usage across all API calls in the turn
364
401
  msg_usage = msg_obj.get("usage")
365
402
  if msg_usage:
366
403
  if usage is None:
@@ -371,187 +408,191 @@ def create_respan_spans(
371
408
  "cache_read_input_tokens"):
372
409
  if key in msg_usage:
373
410
  usage[key] = usage.get(key, 0) + msg_usage[key]
374
- # Keep last service_tier
375
411
  if "service_tier" in msg_usage:
376
412
  usage["service_tier"] = msg_usage["service_tier"]
377
413
 
378
- # Calculate timing
379
- start_time_str = user_timestamp or first_assistant_timestamp or datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
380
- timestamp_str = last_assistant_timestamp or first_assistant_timestamp or datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
414
+ # Timing
415
+ now_str = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
416
+ start_time_str = user_timestamp or first_assistant_timestamp or now_str
417
+ timestamp_str = last_assistant_timestamp or first_assistant_timestamp or now_str
381
418
 
382
- # Calculate latency from user message to final assistant response
383
419
  latency = None
384
420
  if user_time and last_assistant_time:
385
421
  latency = (last_assistant_time - user_time).total_seconds()
386
-
387
- # Extract messages for chat span
388
- prompt_messages = []
422
+
423
+ # Messages
424
+ prompt_messages: List[Dict[str, Any]] = []
389
425
  if user_text:
390
426
  prompt_messages.append({"role": "user", "content": user_text})
391
-
392
- completion_message = None
427
+ completion_message: Optional[Dict[str, Any]] = None
393
428
  if final_output:
394
429
  completion_message = {"role": "assistant", "content": final_output}
395
-
396
- # Create trace ID for this turn
430
+
431
+ # IDs respan.json fields, then env var overrides
432
+ cfg_fields = (config or {}).get("fields", {})
433
+ cfg_props = (config or {}).get("properties", {})
434
+
397
435
  trace_unique_id = f"{session_id}_turn_{turn_num}"
398
-
399
- # Naming: human-readable workflow + span names
400
- workflow_name = "claude-code"
401
- # Use first ~60 chars of user message as span name for readability
402
- user_preview = (user_text[:60] + "...") if user_text and len(user_text) > 60 else (user_text or f"turn_{turn_num}")
403
- root_span_name = f"Turn {turn_num}: {user_preview}"
436
+ workflow_name = os.environ.get("RESPAN_WORKFLOW_NAME") or cfg_fields.get("workflow_name") or "claude-code"
437
+ root_span_name = os.environ.get("RESPAN_SPAN_NAME") or cfg_fields.get("span_name") or "claude-code"
404
438
  thread_id = f"claudecode_{session_id}"
405
-
406
- # Build metadata with additional info
407
- metadata = {
408
- "claude_code_turn": turn_num,
409
- }
439
+ customer_id = os.environ.get("RESPAN_CUSTOMER_ID") or cfg_fields.get("customer_id") or ""
440
+
441
+ # Metadata — custom properties from respan.json, then env overrides
442
+ metadata: Dict[str, Any] = {"claude_code_turn": turn_num}
443
+ if cfg_props:
444
+ metadata.update(cfg_props)
410
445
  if request_id:
411
446
  metadata["request_id"] = request_id
412
447
  if stop_reason:
413
448
  metadata["stop_reason"] = stop_reason
414
-
415
- # Merge user-provided metadata from env var
416
449
  env_metadata = os.environ.get("RESPAN_METADATA")
417
450
  if env_metadata:
418
451
  try:
419
452
  extra = json.loads(env_metadata)
420
453
  if isinstance(extra, dict):
421
454
  metadata.update(extra)
422
- else:
423
- debug("RESPAN_METADATA is not a JSON object, skipping")
424
- except json.JSONDecodeError as e:
425
- debug(f"Invalid JSON in RESPAN_METADATA, skipping: {e}")
426
-
427
- # Build usage object with cache details
428
- usage_obj = None
455
+ except json.JSONDecodeError:
456
+ pass
457
+
458
+ # Usage
459
+ usage_fields: Dict[str, Any] = {}
429
460
  if usage:
430
- usage_obj = {
431
- "prompt_tokens": usage.get("input_tokens", 0),
432
- "completion_tokens": usage.get("output_tokens", 0),
433
- }
434
- total_tokens = usage_obj["prompt_tokens"] + usage_obj["completion_tokens"]
435
- if total_tokens > 0:
436
- usage_obj["total_tokens"] = total_tokens
437
-
438
- # Add cache details
439
- prompt_tokens_details = {}
461
+ prompt_tokens = usage.get("input_tokens", 0)
462
+ completion_tokens = usage.get("output_tokens", 0)
463
+ usage_fields["prompt_tokens"] = prompt_tokens
464
+ usage_fields["completion_tokens"] = completion_tokens
465
+ total = prompt_tokens + completion_tokens
466
+ if total > 0:
467
+ usage_fields["total_tokens"] = total
440
468
  cache_creation = usage.get("cache_creation_input_tokens", 0)
441
469
  cache_read = usage.get("cache_read_input_tokens", 0)
470
+ if cache_creation > 0:
471
+ usage_fields["cache_creation_prompt_tokens"] = cache_creation
472
+ prompt_tokens_details: Dict[str, int] = {}
442
473
  if cache_creation > 0:
443
474
  prompt_tokens_details["cache_creation_tokens"] = cache_creation
444
- usage_obj["cache_creation_prompt_tokens"] = cache_creation
445
475
  if cache_read > 0:
446
476
  prompt_tokens_details["cached_tokens"] = cache_read
447
-
448
477
  if prompt_tokens_details:
449
- usage_obj["prompt_tokens_details"] = prompt_tokens_details
450
-
451
- # Add service tier to metadata
478
+ usage_fields["prompt_tokens_details"] = prompt_tokens_details
452
479
  service_tier = usage.get("service_tier")
453
480
  if service_tier:
454
481
  metadata["service_tier"] = service_tier
455
-
456
- # Create chat span (root)
457
- chat_span_id = f"claudecode_{trace_unique_id}_chat"
458
- customer_id = os.environ.get("RESPAN_CUSTOMER_ID", "claude-code")
459
- chat_span = {
482
+
483
+ # ------------------------------------------------------------------
484
+ # 2. Root span – pure agent container (no model / token info)
485
+ # ------------------------------------------------------------------
486
+ root_span_id = f"claudecode_{trace_unique_id}_root"
487
+ root_span: Dict[str, Any] = {
460
488
  "trace_unique_id": trace_unique_id,
461
489
  "thread_identifier": thread_id,
462
490
  "customer_identifier": customer_id,
463
- "span_unique_id": chat_span_id,
464
- "span_parent_id": None,
491
+ "span_unique_id": root_span_id,
465
492
  "span_name": root_span_name,
466
493
  "span_workflow_name": workflow_name,
467
- "log_type": "agent",
494
+ "model": model,
495
+ "provider_id": "",
496
+ "span_path": "",
468
497
  "input": json.dumps(prompt_messages) if prompt_messages else "",
469
498
  "output": json.dumps(completion_message) if completion_message else "",
470
- "prompt_messages": prompt_messages,
471
- "completion_message": completion_message,
472
- "model": model,
473
499
  "timestamp": timestamp_str,
474
500
  "start_time": start_time_str,
475
501
  "metadata": metadata,
476
502
  }
477
-
478
- # Add usage if available
479
- if usage_obj:
480
- chat_span["prompt_tokens"] = usage_obj["prompt_tokens"]
481
- chat_span["completion_tokens"] = usage_obj["completion_tokens"]
482
- if "total_tokens" in usage_obj:
483
- chat_span["total_tokens"] = usage_obj["total_tokens"]
484
- if "cache_creation_prompt_tokens" in usage_obj:
485
- chat_span["cache_creation_prompt_tokens"] = usage_obj["cache_creation_prompt_tokens"]
486
- if "prompt_tokens_details" in usage_obj:
487
- chat_span["prompt_tokens_details"] = usage_obj["prompt_tokens_details"]
488
-
489
- # Add latency if calculated
490
503
  if latency is not None:
491
- chat_span["latency"] = latency
492
-
493
- spans.append(chat_span)
494
-
495
- # Extract thinking blocks and create spans for them
496
- thinking_spans = []
497
- for idx, assistant_msg in enumerate(assistant_msgs):
498
- if isinstance(assistant_msg, dict) and "message" in assistant_msg:
499
- content = assistant_msg["message"].get("content", [])
500
- if isinstance(content, list):
501
- for item in content:
502
- if isinstance(item, dict) and item.get("type") == "thinking":
503
- thinking_text = item.get("thinking", "")
504
- if thinking_text:
505
- thinking_span_id = f"claudecode_{trace_unique_id}_thinking_{len(thinking_spans) + 1}"
506
- thinking_timestamp = assistant_msg.get("timestamp", timestamp_str)
507
- thinking_spans.append({
508
- "trace_unique_id": trace_unique_id,
509
- "span_unique_id": thinking_span_id,
510
- "span_parent_id": chat_span_id,
511
- "span_name": f"Thinking {len(thinking_spans) + 1}",
512
- "span_workflow_name": workflow_name,
513
- "log_type": "generation",
514
- "input": "",
515
- "output": thinking_text,
516
- "timestamp": thinking_timestamp,
517
- "start_time": thinking_timestamp,
518
- })
519
-
520
- spans.extend(thinking_spans)
521
-
522
- # Collect all tool calls and results with metadata
523
- tool_call_map = {}
504
+ root_span["latency"] = latency
505
+ spans.append(root_span)
506
+
507
+ # ------------------------------------------------------------------
508
+ # 3. LLM generation child span (always created → every turn has ≥1 child)
509
+ # ------------------------------------------------------------------
510
+ gen_span_id = f"claudecode_{trace_unique_id}_gen"
511
+ gen_start = first_assistant_timestamp or start_time_str
512
+ gen_end = last_assistant_timestamp or timestamp_str
513
+ gen_latency = None
514
+ gen_start_dt = parse_timestamp(gen_start) if gen_start else None
515
+ gen_end_dt = parse_timestamp(gen_end) if gen_end else None
516
+ if gen_start_dt and gen_end_dt:
517
+ gen_latency = (gen_end_dt - gen_start_dt).total_seconds()
518
+
519
+ gen_span: Dict[str, Any] = {
520
+ "trace_unique_id": trace_unique_id,
521
+ "span_unique_id": gen_span_id,
522
+ "span_parent_id": root_span_id,
523
+ "span_name": "claude.chat",
524
+ "span_workflow_name": workflow_name,
525
+ "span_path": "claude_chat",
526
+ "model": model,
527
+ "provider_id": "anthropic",
528
+ "metadata": {},
529
+ "input": json.dumps(prompt_messages) if prompt_messages else "",
530
+ "output": json.dumps(completion_message) if completion_message else "",
531
+ "prompt_messages": prompt_messages,
532
+ "completion_message": completion_message,
533
+ "timestamp": gen_end,
534
+ "start_time": gen_start,
535
+ }
536
+ if gen_latency is not None:
537
+ gen_span["latency"] = gen_latency
538
+ gen_span.update(usage_fields)
539
+ spans.append(gen_span)
540
+
541
+ # ------------------------------------------------------------------
542
+ # 4. Thinking child spans
543
+ # ------------------------------------------------------------------
544
+ thinking_num = 0
524
545
  for assistant_msg in assistant_msgs:
525
- tool_calls = get_tool_calls(assistant_msg)
526
- for tool_call in tool_calls:
527
- tool_name = tool_call.get("name", "unknown")
528
- tool_input = tool_call.get("input", {})
546
+ if not (isinstance(assistant_msg, dict) and "message" in assistant_msg):
547
+ continue
548
+ content = assistant_msg["message"].get("content", [])
549
+ if not isinstance(content, list):
550
+ continue
551
+ for item in content:
552
+ if isinstance(item, dict) and item.get("type") == "thinking":
553
+ thinking_text = item.get("thinking", "")
554
+ if not thinking_text:
555
+ continue
556
+ thinking_num += 1
557
+ thinking_ts = assistant_msg.get("timestamp", timestamp_str)
558
+ spans.append({
559
+ "trace_unique_id": trace_unique_id,
560
+ "span_unique_id": f"claudecode_{trace_unique_id}_thinking_{thinking_num}",
561
+ "span_parent_id": root_span_id,
562
+ "span_name": f"Thinking {thinking_num}",
563
+ "span_workflow_name": workflow_name,
564
+ "span_path": "thinking",
565
+ "provider_id": "",
566
+ "metadata": {},
567
+ "input": "",
568
+ "output": thinking_text,
569
+ "timestamp": thinking_ts,
570
+ "start_time": thinking_ts,
571
+ })
572
+
573
+ # ------------------------------------------------------------------
574
+ # 5. Tool child spans
575
+ # ------------------------------------------------------------------
576
+ tool_call_map: Dict[str, Dict[str, Any]] = {}
577
+ for assistant_msg in assistant_msgs:
578
+ for tool_call in get_tool_calls(assistant_msg):
529
579
  tool_id = tool_call.get("id", "")
530
580
  tool_call_map[tool_id] = {
531
- "name": tool_name,
532
- "input": tool_input,
581
+ "name": tool_call.get("name", "unknown"),
582
+ "input": tool_call.get("input", {}),
533
583
  "id": tool_id,
534
584
  "timestamp": assistant_msg.get("timestamp") if isinstance(assistant_msg, dict) else None,
535
585
  }
536
-
537
- # Find matching tool results with metadata
586
+
538
587
  for tr in tool_results:
539
588
  tr_content = get_content(tr)
540
- tool_result_metadata = {}
541
-
542
- # Extract tool result metadata
589
+ tool_result_metadata: Dict[str, Any] = {}
543
590
  if isinstance(tr, dict):
544
- tool_use_result = tr.get("toolUseResult", {})
545
- if tool_use_result:
546
- if "durationMs" in tool_use_result:
547
- tool_result_metadata["duration_ms"] = tool_use_result["durationMs"]
548
- if "numFiles" in tool_use_result:
549
- tool_result_metadata["num_files"] = tool_use_result["numFiles"]
550
- if "filenames" in tool_use_result:
551
- tool_result_metadata["filenames"] = tool_use_result["filenames"]
552
- if "truncated" in tool_use_result:
553
- tool_result_metadata["truncated"] = tool_use_result["truncated"]
554
-
591
+ tur = tr.get("toolUseResult") or {}
592
+ for src, dst in [("durationMs", "duration_ms"), ("numFiles", "num_files"),
593
+ ("filenames", "filenames"), ("truncated", "truncated")]:
594
+ if src in tur:
595
+ tool_result_metadata[dst] = tur[src]
555
596
  if isinstance(tr_content, list):
556
597
  for item in tr_content:
557
598
  if isinstance(item, dict) and item.get("type") == "tool_result":
@@ -560,44 +601,52 @@ def create_respan_spans(
560
601
  tool_call_map[tool_use_id]["output"] = item.get("content")
561
602
  tool_call_map[tool_use_id]["result_metadata"] = tool_result_metadata
562
603
  tool_call_map[tool_use_id]["result_timestamp"] = tr.get("timestamp")
563
-
564
- # Create tool spans (children)
604
+
565
605
  tool_num = 0
566
- for tool_id, tool_data in tool_call_map.items():
606
+ for tool_id, td in tool_call_map.items():
567
607
  tool_num += 1
568
- tool_span_id = f"claudecode_{trace_unique_id}_tool_{tool_num}"
569
-
570
- # Use tool result timestamp if available, otherwise use tool call timestamp
571
- tool_timestamp = tool_data.get("result_timestamp") or tool_data.get("timestamp") or timestamp_str
572
- tool_start_time = tool_data.get("timestamp") or start_time_str
573
-
574
- # Format input and output for better readability
575
- formatted_input = format_tool_input(tool_data['name'], tool_data["input"])
576
- formatted_output = format_tool_output(tool_data['name'], tool_data.get("output"))
577
-
578
- tool_span = {
608
+ tool_ts = td.get("result_timestamp") or td.get("timestamp") or timestamp_str
609
+ tool_start = td.get("timestamp") or start_time_str
610
+ tool_span: Dict[str, Any] = {
579
611
  "trace_unique_id": trace_unique_id,
580
- "span_unique_id": tool_span_id,
581
- "span_parent_id": chat_span_id,
582
- "span_name": f"Tool: {tool_data['name']}",
612
+ "span_unique_id": f"claudecode_{trace_unique_id}_tool_{tool_num}",
613
+ "span_parent_id": root_span_id,
614
+ "span_name": f"Tool: {td['name']}",
583
615
  "span_workflow_name": workflow_name,
584
- "log_type": "tool",
585
- "input": formatted_input,
586
- "output": formatted_output,
587
- "timestamp": tool_timestamp,
588
- "start_time": tool_start_time,
616
+ "span_path": f"tool_{td['name'].lower()}",
617
+ "provider_id": "",
618
+ "metadata": td.get("result_metadata") or {},
619
+ "input": format_tool_input(td["name"], td["input"]),
620
+ "output": format_tool_output(td["name"], td.get("output")),
621
+ "timestamp": tool_ts,
622
+ "start_time": tool_start,
589
623
  }
590
-
591
- # Add tool result metadata if available
592
- if tool_data.get("result_metadata"):
593
- tool_span["metadata"] = tool_data["result_metadata"]
594
- # Calculate latency if duration_ms is available
595
- duration_ms = tool_data["result_metadata"].get("duration_ms")
624
+ if td.get("result_metadata"):
625
+ duration_ms = td["result_metadata"].get("duration_ms")
596
626
  if duration_ms:
597
- tool_span["latency"] = duration_ms / 1000.0 # Convert ms to seconds
598
-
627
+ tool_span["latency"] = duration_ms / 1000.0
599
628
  spans.append(tool_span)
600
-
629
+
630
+ # Add required Respan platform fields to every span.
631
+ # The backend expects these on all spans (per official SDK examples).
632
+ respan_defaults = {
633
+ "warnings": "",
634
+ "encoding_format": "float",
635
+ "disable_fallback": False,
636
+ "respan_params": {
637
+ "has_webhook": False,
638
+ "environment": os.environ.get("RESPAN_ENVIRONMENT", "prod"),
639
+ },
640
+ "field_name": "data: ",
641
+ "delimiter": "\n\n",
642
+ "disable_log": False,
643
+ "request_breakdown": False,
644
+ }
645
+ for span in spans:
646
+ for key, value in respan_defaults.items():
647
+ if key not in span:
648
+ span[key] = value
649
+
601
650
  return spans
602
651
 
603
652
 
@@ -607,36 +656,49 @@ def send_spans(
607
656
  base_url: str,
608
657
  turn_num: int,
609
658
  ) -> None:
610
- """Send spans to Respan with timeout and one retry on transient errors."""
659
+ """Send spans to Respan as a single batch (matches official SDK behaviour).
660
+
661
+ The official Respan tracing SDK sends all spans for a trace in one
662
+ POST request to ``/v1/traces/ingest``. We do the same here, with
663
+ simple retry logic for transient server errors.
664
+ """
611
665
  url = f"{base_url}/v1/traces/ingest"
612
666
  headers = {"Authorization": f"Bearer {api_key}"}
613
667
 
614
- for attempt in range(2):
668
+ span_names = [s.get("span_name", "?") for s in spans]
669
+ payload_json = json.dumps(spans)
670
+ payload_size = len(payload_json)
671
+ debug(f"Sending {len(spans)} spans ({payload_size} bytes) for turn {turn_num}: {span_names}")
672
+ if DEBUG:
673
+ debug_file = LOG_FILE.parent / f"respan_spans_turn_{turn_num}.json"
674
+ debug_file.write_text(payload_json, encoding="utf-8")
675
+ debug(f"Dumped spans to {debug_file}")
676
+
677
+ for attempt in range(3):
615
678
  try:
616
679
  response = requests.post(url, json=spans, headers=headers, timeout=30)
617
680
  if response.status_code < 400:
618
- debug(f"Sent {len(spans)} spans for turn {turn_num}")
681
+ resp_text = response.text[:300] if response.text else ""
682
+ debug(f"Sent {len(spans)} spans for turn {turn_num} "
683
+ f"(attempt {attempt + 1}): {resp_text}")
619
684
  return
620
685
  if response.status_code < 500:
621
- # 4xx not retryable
622
- log("ERROR", f"Failed to send spans for turn {turn_num}: HTTP {response.status_code}")
686
+ log("ERROR", f"Spans rejected for turn {turn_num}: "
687
+ f"HTTP {response.status_code} - {response.text[:200]}")
623
688
  return
624
- # 5xx — retryable
625
- if attempt == 0:
626
- debug(f"Server error {response.status_code} for turn {turn_num}, retrying...")
627
- time.sleep(1)
628
- continue
629
- log("ERROR", f"Failed to send spans for turn {turn_num} after retry: HTTP {response.status_code}")
630
- except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
631
- if attempt == 0:
632
- debug(f"Transient error for turn {turn_num}: {e}, retrying...")
633
- time.sleep(1)
634
- continue
635
- log("ERROR", f"Failed to send spans for turn {turn_num} after retry: {e}")
689
+ # 5xx — retry after short delay
690
+ debug(f"Server error for turn {turn_num} "
691
+ f"(attempt {attempt + 1}), retrying...")
692
+ time.sleep(1.0)
693
+ except (requests.exceptions.Timeout, requests.exceptions.ConnectionError):
694
+ time.sleep(1.0)
636
695
  except Exception as e:
637
696
  log("ERROR", f"Failed to send spans for turn {turn_num}: {e}")
638
697
  return
639
698
 
699
+ log("ERROR", f"Failed to send {len(spans)} spans for turn {turn_num} "
700
+ f"after 3 attempts")
701
+
640
702
 
641
703
  def process_transcript(
642
704
  session_id: str,
@@ -644,6 +706,7 @@ def process_transcript(
644
706
  state: Dict[str, Any],
645
707
  api_key: str,
646
708
  base_url: str,
709
+ config: Optional[Dict[str, Any]] = None,
647
710
  ) -> int:
648
711
  """Process a transcript file and create traces for new turns."""
649
712
  # Get previous state for this session
@@ -693,7 +756,8 @@ def process_transcript(
693
756
  turns_processed += 1
694
757
  turn_num = turn_count + turns_processed
695
758
  spans = create_respan_spans(
696
- session_id, turn_num, current_user, current_assistants, current_tool_results
759
+ session_id, turn_num, current_user, current_assistants, current_tool_results,
760
+ config=config,
697
761
  )
698
762
  send_spans(spans, api_key, base_url, turn_num)
699
763
  last_committed_line = total_lines # safe default, refined below
@@ -755,8 +819,17 @@ def process_transcript(
755
819
  current_assistants.append(merged)
756
820
 
757
821
  if current_user and current_assistants:
758
- _commit_turn()
759
- last_committed_line = total_lines
822
+ # Check if the turn has actual text output. The Stop hook can fire
823
+ # before the final assistant text block is flushed to disk, leaving
824
+ # only thinking/tool_use blocks. If no text content is found, treat
825
+ # the turn as incomplete so the retry logic re-reads it.
826
+ has_text = any(get_text_content(m) for m in current_assistants)
827
+ if has_text:
828
+ _commit_turn()
829
+ last_committed_line = total_lines
830
+ else:
831
+ last_committed_line = current_user_line
832
+ debug(f"Turn has assistant msgs but no text output yet (likely not flushed), will retry")
760
833
  else:
761
834
  # Incomplete turn — rewind so the next run re-reads from the
762
835
  # unmatched user message (or from where we left off if no user).
@@ -869,13 +942,35 @@ def main():
869
942
  debug("Tracing disabled (TRACE_TO_RESPAN != true)")
870
943
  sys.exit(0)
871
944
 
872
- # Check for required environment variables
945
+ # Resolve API key: env var > ~/.respan/credentials.json
873
946
  api_key = os.getenv("RESPAN_API_KEY")
874
- # Default: api.respan.ai | Enterprise: endpoint.respan.ai (set RESPAN_BASE_URL)
875
947
  base_url = os.getenv("RESPAN_BASE_URL", "https://api.respan.ai/api")
876
948
 
877
949
  if not api_key:
878
- log("ERROR", "Respan API key not set (RESPAN_API_KEY)")
950
+ creds_file = Path.home() / ".respan" / "credentials.json"
951
+ if creds_file.exists():
952
+ try:
953
+ creds = json.loads(creds_file.read_text(encoding="utf-8"))
954
+ # Find the active profile's credential
955
+ config_file = Path.home() / ".respan" / "config.json"
956
+ profile = "default"
957
+ if config_file.exists():
958
+ cfg = json.loads(config_file.read_text(encoding="utf-8"))
959
+ profile = cfg.get("activeProfile", "default")
960
+ cred = creds.get(profile, {})
961
+ api_key = cred.get("apiKey") or cred.get("accessToken")
962
+ if not base_url or base_url == "https://api.respan.ai/api":
963
+ base_url = cred.get("baseUrl", base_url)
964
+ # Ensure base_url ends with /api (credentials store the host only)
965
+ if base_url and not base_url.rstrip("/").endswith("/api"):
966
+ base_url = base_url.rstrip("/") + "/api"
967
+ if api_key:
968
+ debug(f"Using API key from credentials.json (profile: {profile})")
969
+ except (json.JSONDecodeError, IOError) as e:
970
+ debug(f"Failed to read credentials.json: {e}")
971
+
972
+ if not api_key:
973
+ log("ERROR", "No API key found. Run: respan auth login")
879
974
  sys.exit(0)
880
975
 
881
976
  # Try stdin payload first, fall back to filesystem scan
@@ -894,11 +989,49 @@ def main():
894
989
 
895
990
  debug(f"Processing session: {session_id}")
896
991
 
897
- # Process the transcript under file lock
992
+ # Load respan.json config from the project directory.
993
+ # Extract the project CWD from the first user message in the transcript.
994
+ config: Dict[str, Any] = {"fields": {}, "properties": {}}
898
995
  try:
899
- with state_lock():
900
- state = load_state()
901
- turns = process_transcript(session_id, transcript_file, state, api_key, base_url)
996
+ first_line = transcript_file.read_text(encoding="utf-8").split("\n")[0]
997
+ if first_line:
998
+ first_msg = json.loads(first_line)
999
+ cwd = first_msg.get("cwd")
1000
+ if not cwd:
1001
+ # Try second line (first is often file-history-snapshot)
1002
+ lines = transcript_file.read_text(encoding="utf-8").split("\n")
1003
+ for line in lines[:5]:
1004
+ if line.strip():
1005
+ msg = json.loads(line)
1006
+ cwd = msg.get("cwd")
1007
+ if cwd:
1008
+ break
1009
+ if cwd:
1010
+ config = load_respan_config(cwd)
1011
+ debug(f"Loaded respan.json config from {cwd}: {config}")
1012
+ except Exception as e:
1013
+ debug(f"Failed to extract CWD or load config: {e}")
1014
+
1015
+ # Process the transcript under file lock.
1016
+ # Retry up to 3 times with a short delay — the Stop hook can fire
1017
+ # before Claude Code finishes flushing the assistant response to
1018
+ # the transcript file, causing an incomplete turn on the first read.
1019
+ max_attempts = 3
1020
+ turns = 0
1021
+ try:
1022
+ for attempt in range(max_attempts):
1023
+ with state_lock():
1024
+ state = load_state()
1025
+ turns = process_transcript(session_id, transcript_file, state, api_key, base_url, config=config)
1026
+
1027
+ if turns > 0:
1028
+ break
1029
+
1030
+ if attempt < max_attempts - 1:
1031
+ delay = 0.5 * (attempt + 1)
1032
+ debug(f"No turns processed (attempt {attempt + 1}/{max_attempts}), "
1033
+ f"retrying in {delay}s...")
1034
+ time.sleep(delay)
902
1035
 
903
1036
  # Log execution time
904
1037
  duration = (datetime.now() - script_start).total_seconds()