@respan/cli 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,1044 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Respan Hook for Claude Code
4
-
5
- Sends Claude Code conversation traces to Respan after each response.
6
- Uses Claude Code's Stop hook to capture transcripts and convert them to Respan spans.
7
-
8
- Usage:
9
- Copy this file to ~/.claude/hooks/respan_hook.py
10
- Configure in ~/.claude/settings.json (see .claude/settings.json.example)
11
- Enable per-project in .claude/settings.local.json (see .claude/settings.local.json.example)
12
- """
13
-
14
- import contextlib
15
- import json
16
- import os
17
- import sys
18
- import tempfile
19
- import time
20
- import requests
21
- from datetime import datetime, timezone
22
- from pathlib import Path
23
- from typing import Any, Dict, List, Optional, Tuple
24
-
25
- try:
26
- import fcntl
27
- except ImportError:
28
- fcntl = None # Not available on Windows
29
-
30
- # Configuration
31
- LOG_FILE = Path.home() / ".claude" / "state" / "respan_hook.log"
32
- STATE_FILE = Path.home() / ".claude" / "state" / "respan_state.json"
33
- LOCK_FILE = Path.home() / ".claude" / "state" / "respan_hook.lock"
34
- DEBUG = os.environ.get("CC_RESPAN_DEBUG", "").lower() == "true"
35
-
36
- try:
37
- MAX_CHARS = int(os.environ.get("CC_RESPAN_MAX_CHARS", "4000"))
38
- except (ValueError, TypeError):
39
- MAX_CHARS = 4000
40
-
41
-
42
- def log(level: str, message: str) -> None:
43
- """Log a message to the log file."""
44
- LOG_FILE.parent.mkdir(parents=True, exist_ok=True)
45
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
46
- with open(LOG_FILE, "a", encoding="utf-8") as f:
47
- f.write(f"{timestamp} [{level}] {message}\n")
48
-
49
-
50
- def debug(message: str) -> None:
51
- """Log a debug message (only if DEBUG is enabled)."""
52
- if DEBUG:
53
- log("DEBUG", message)
54
-
55
-
56
- def load_state() -> Dict[str, Any]:
57
- """Load the state file containing session tracking info."""
58
- if not STATE_FILE.exists():
59
- return {}
60
- try:
61
- return json.loads(STATE_FILE.read_text(encoding="utf-8"))
62
- except (json.JSONDecodeError, IOError):
63
- return {}
64
-
65
-
66
- def save_state(state: Dict[str, Any]) -> None:
67
- """Save the state file atomically via write-to-temp + rename."""
68
- STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
69
- try:
70
- fd, tmp_path = tempfile.mkstemp(dir=STATE_FILE.parent, suffix=".tmp")
71
- try:
72
- with os.fdopen(fd, "w", encoding="utf-8") as f:
73
- json.dump(state, f, indent=2)
74
- os.rename(tmp_path, STATE_FILE)
75
- except BaseException:
76
- with contextlib.suppress(OSError):
77
- os.unlink(tmp_path)
78
- raise
79
- except OSError as e:
80
- log("ERROR", f"Failed to save state atomically, falling back: {e}")
81
- STATE_FILE.write_text(json.dumps(state, indent=2), encoding="utf-8")
82
-
83
-
84
- # Known config keys in respan.json that map to span fields.
85
- # Anything else is treated as a custom property (goes into metadata).
86
- KNOWN_CONFIG_KEYS = {"customer_id", "span_name", "workflow_name"}
87
-
88
-
89
- def load_respan_config(cwd: str) -> Dict[str, Any]:
90
- """Load .claude/respan.json from the project directory.
91
-
92
- Returns a dict with two keys:
93
- - "fields": known span fields (customer_id, span_name, workflow_name)
94
- - "properties": everything else (custom properties → metadata)
95
- """
96
- config_path = Path(cwd) / ".claude" / "respan.json"
97
- if not config_path.exists():
98
- return {"fields": {}, "properties": {}}
99
- try:
100
- raw = json.loads(config_path.read_text(encoding="utf-8"))
101
- if not isinstance(raw, dict):
102
- return {"fields": {}, "properties": {}}
103
- fields = {}
104
- properties = {}
105
- for k, v in raw.items():
106
- if k in KNOWN_CONFIG_KEYS:
107
- fields[k] = v
108
- else:
109
- properties[k] = v
110
- return {"fields": fields, "properties": properties}
111
- except (json.JSONDecodeError, IOError) as e:
112
- debug(f"Failed to load respan.json from {config_path}: {e}")
113
- return {"fields": {}, "properties": {}}
114
-
115
-
116
- def get_content(msg: Dict[str, Any]) -> Any:
117
- """Extract content from a message."""
118
- if isinstance(msg, dict):
119
- if "message" in msg:
120
- return msg["message"].get("content")
121
- return msg.get("content")
122
- return None
123
-
124
-
125
- def is_tool_result(msg: Dict[str, Any]) -> bool:
126
- """Check if a message contains tool results."""
127
- content = get_content(msg)
128
- if isinstance(content, list):
129
- return any(
130
- isinstance(item, dict) and item.get("type") == "tool_result"
131
- for item in content
132
- )
133
- return False
134
-
135
-
136
- def get_tool_calls(msg: Dict[str, Any]) -> List[Dict[str, Any]]:
137
- """Extract tool use blocks from a message."""
138
- content = get_content(msg)
139
- if isinstance(content, list):
140
- return [
141
- item for item in content
142
- if isinstance(item, dict) and item.get("type") == "tool_use"
143
- ]
144
- return []
145
-
146
-
147
- def get_text_content(msg: Dict[str, Any]) -> str:
148
- """Extract text content from a message."""
149
- content = get_content(msg)
150
- if isinstance(content, str):
151
- return content
152
- if isinstance(content, list):
153
- text_parts = []
154
- for item in content:
155
- if isinstance(item, dict) and item.get("type") == "text":
156
- text_parts.append(item.get("text", ""))
157
- elif isinstance(item, str):
158
- text_parts.append(item)
159
- return "\n".join(text_parts)
160
- return ""
161
-
162
-
163
- def format_tool_input(tool_name: str, tool_input: Any, max_length: int = MAX_CHARS) -> str:
164
- """Format tool input for better readability."""
165
- if not tool_input:
166
- return ""
167
-
168
- # Handle Write/Edit tool - show file path and content preview
169
- if tool_name in ("Write", "Edit", "MultiEdit"):
170
- if isinstance(tool_input, dict):
171
- file_path = tool_input.get("file_path", tool_input.get("path", ""))
172
- content = tool_input.get("content", "")
173
-
174
- result = f"File: {file_path}\n"
175
- if content:
176
- content_preview = content[:2000] + "..." if len(content) > 2000 else content
177
- result += f"Content:\n{content_preview}"
178
- return result[:max_length]
179
-
180
- # Handle Read tool
181
- if tool_name == "Read":
182
- if isinstance(tool_input, dict):
183
- file_path = tool_input.get("file_path", tool_input.get("path", ""))
184
- return f"File: {file_path}"
185
-
186
- # Handle Bash/Shell tool
187
- if tool_name in ("Bash", "Shell"):
188
- if isinstance(tool_input, dict):
189
- command = tool_input.get("command", "")
190
- return f"Command: {command}"
191
-
192
- # Default: JSON dump with truncation
193
- try:
194
- result = json.dumps(tool_input, indent=2)
195
- if len(result) > max_length:
196
- result = result[:max_length] + "\n... (truncated)"
197
- return result
198
- except (TypeError, ValueError):
199
- return str(tool_input)[:max_length]
200
-
201
-
202
- def format_tool_output(tool_name: str, tool_output: Any, max_length: int = MAX_CHARS) -> str:
203
- """Format tool output for better readability."""
204
- if not tool_output:
205
- return ""
206
-
207
- # Handle string output directly
208
- if isinstance(tool_output, str):
209
- if len(tool_output) > max_length:
210
- return tool_output[:max_length] + "\n... (truncated)"
211
- return tool_output
212
-
213
- # Handle list of content blocks (common in Claude Code tool results)
214
- if isinstance(tool_output, list):
215
- parts = []
216
- total_length = 0
217
-
218
- for item in tool_output:
219
- if isinstance(item, dict):
220
- # Text content block
221
- if item.get("type") == "text":
222
- text = item.get("text", "")
223
- if total_length + len(text) > max_length:
224
- remaining = max_length - total_length
225
- if remaining > 100:
226
- parts.append(text[:remaining] + "... (truncated)")
227
- break
228
- parts.append(text)
229
- total_length += len(text)
230
- # Image or other type
231
- elif item.get("type") == "image":
232
- parts.append("[Image output]")
233
- else:
234
- # Try to extract any text-like content
235
- text = str(item)[:500]
236
- parts.append(text)
237
- total_length += len(text)
238
- elif isinstance(item, str):
239
- if total_length + len(item) > max_length:
240
- remaining = max_length - total_length
241
- if remaining > 100:
242
- parts.append(item[:remaining] + "... (truncated)")
243
- break
244
- parts.append(item)
245
- total_length += len(item)
246
-
247
- return "\n".join(parts)
248
-
249
- # Handle dict output
250
- if isinstance(tool_output, dict):
251
- # Special handling for Write tool success/error
252
- if "success" in tool_output:
253
- return f"Success: {tool_output.get('success')}\n{tool_output.get('message', '')}"
254
-
255
- # Default JSON formatting
256
- try:
257
- result = json.dumps(tool_output, indent=2)
258
- if len(result) > max_length:
259
- result = result[:max_length] + "\n... (truncated)"
260
- return result
261
- except (TypeError, ValueError):
262
- return str(tool_output)[:max_length]
263
-
264
- return str(tool_output)[:max_length]
265
-
266
-
267
- def merge_assistant_parts(parts: List[Dict[str, Any]]) -> Dict[str, Any]:
268
- """Merge multiple assistant message parts into one."""
269
- if not parts:
270
- return {}
271
-
272
- merged_content = []
273
- for part in parts:
274
- content = get_content(part)
275
- if isinstance(content, list):
276
- merged_content.extend(content)
277
- elif content:
278
- merged_content.append({"type": "text", "text": str(content)})
279
-
280
- # Use the structure from the first part
281
- result = parts[0].copy()
282
- if "message" in result:
283
- result["message"] = result["message"].copy()
284
- result["message"]["content"] = merged_content
285
- else:
286
- result["content"] = merged_content
287
-
288
- return result
289
-
290
-
291
- def find_latest_transcript() -> Optional[Tuple[str, Path]]:
292
- """Find the most recently modified transcript file.
293
-
294
- Claude Code stores transcripts as *.jsonl files directly in the project directory.
295
- Main conversation files have UUID names, agent files have agent-*.jsonl names.
296
- The session ID is stored inside each JSON line.
297
- """
298
- projects_dir = Path.home() / ".claude" / "projects"
299
-
300
- if not projects_dir.exists():
301
- debug(f"Projects directory not found: {projects_dir}")
302
- return None
303
-
304
- latest_file = None
305
- latest_mtime = 0
306
-
307
- for project_dir in projects_dir.iterdir():
308
- if not project_dir.is_dir():
309
- continue
310
-
311
- # Look for all .jsonl files directly in the project directory
312
- for transcript_file in project_dir.glob("*.jsonl"):
313
- mtime = transcript_file.stat().st_mtime
314
- if mtime > latest_mtime:
315
- latest_mtime = mtime
316
- latest_file = transcript_file
317
-
318
- if latest_file:
319
- # Extract session ID from the first line of the file
320
- try:
321
- first_line = latest_file.read_text(encoding="utf-8").split("\n")[0]
322
- if first_line:
323
- first_msg = json.loads(first_line)
324
- session_id = first_msg.get("sessionId", latest_file.stem)
325
- debug(f"Found transcript: {latest_file}, session: {session_id}")
326
- return (session_id, latest_file)
327
- except (json.JSONDecodeError, IOError, IndexError, UnicodeDecodeError) as e:
328
- debug(f"Error reading transcript {latest_file}: {e}")
329
- return None
330
-
331
- debug("No transcript files found")
332
- return None
333
-
334
-
335
- def parse_timestamp(ts_str: str) -> Optional[datetime]:
336
- """Parse ISO timestamp string to datetime."""
337
- try:
338
- # Handle both with and without timezone
339
- if ts_str.endswith("Z"):
340
- ts_str = ts_str[:-1] + "+00:00"
341
- return datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
342
- except (ValueError, AttributeError):
343
- return None
344
-
345
-
346
- def create_respan_spans(
347
- session_id: str,
348
- turn_num: int,
349
- user_msg: Dict[str, Any],
350
- assistant_msgs: List[Dict[str, Any]],
351
- tool_results: List[Dict[str, Any]],
352
- config: Optional[Dict[str, Any]] = None,
353
- ) -> List[Dict[str, Any]]:
354
- """Create Respan span logs for a single turn with all available metadata.
355
-
356
- Produces a proper span tree so that the Respan UI renders nested children:
357
- Root (agent container)
358
- ├── claude.chat (generation – carries model, tokens, messages)
359
- ├── Thinking 1 (generation, if extended thinking is present)
360
- ├── Tool: Read (tool, if tool use occurred)
361
- └── Tool: Write (tool, if tool use occurred)
362
- """
363
- spans = []
364
-
365
- # ------------------------------------------------------------------
366
- # 1. Extract data from the transcript messages
367
- # ------------------------------------------------------------------
368
- user_text = get_text_content(user_msg)
369
- user_timestamp = user_msg.get("timestamp")
370
- user_time = parse_timestamp(user_timestamp) if user_timestamp else None
371
-
372
- # Collect assistant text across all messages in the turn
373
- final_output = ""
374
- if assistant_msgs:
375
- text_parts = [get_text_content(m) for m in assistant_msgs]
376
- final_output = "\n".join(p for p in text_parts if p)
377
-
378
- # Aggregate model, usage, timing from (possibly multiple) API calls
379
- model = "claude"
380
- usage = None
381
- request_id = None
382
- stop_reason = None
383
- first_assistant_timestamp = None
384
- last_assistant_timestamp = None
385
- last_assistant_time = None
386
-
387
- for a_msg in assistant_msgs:
388
- if not (isinstance(a_msg, dict) and "message" in a_msg):
389
- continue
390
- msg_obj = a_msg["message"]
391
- model = msg_obj.get("model", model)
392
- request_id = a_msg.get("requestId", request_id)
393
- stop_reason = msg_obj.get("stop_reason") or stop_reason
394
- ts = a_msg.get("timestamp")
395
- if ts:
396
- if first_assistant_timestamp is None:
397
- first_assistant_timestamp = ts
398
- last_assistant_timestamp = ts
399
- last_assistant_time = parse_timestamp(ts)
400
-
401
- msg_usage = msg_obj.get("usage")
402
- if msg_usage:
403
- if usage is None:
404
- usage = dict(msg_usage)
405
- else:
406
- for key in ("input_tokens", "output_tokens",
407
- "cache_creation_input_tokens",
408
- "cache_read_input_tokens"):
409
- if key in msg_usage:
410
- usage[key] = usage.get(key, 0) + msg_usage[key]
411
- if "service_tier" in msg_usage:
412
- usage["service_tier"] = msg_usage["service_tier"]
413
-
414
- # Timing
415
- now_str = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
416
- start_time_str = user_timestamp or first_assistant_timestamp or now_str
417
- timestamp_str = last_assistant_timestamp or first_assistant_timestamp or now_str
418
-
419
- latency = None
420
- if user_time and last_assistant_time:
421
- latency = (last_assistant_time - user_time).total_seconds()
422
-
423
- # Messages
424
- prompt_messages: List[Dict[str, Any]] = []
425
- if user_text:
426
- prompt_messages.append({"role": "user", "content": user_text})
427
- completion_message: Optional[Dict[str, Any]] = None
428
- if final_output:
429
- completion_message = {"role": "assistant", "content": final_output}
430
-
431
- # IDs — respan.json fields, then env var overrides
432
- cfg_fields = (config or {}).get("fields", {})
433
- cfg_props = (config or {}).get("properties", {})
434
-
435
- trace_unique_id = f"{session_id}_turn_{turn_num}"
436
- workflow_name = os.environ.get("RESPAN_WORKFLOW_NAME") or cfg_fields.get("workflow_name") or "claude-code"
437
- root_span_name = os.environ.get("RESPAN_SPAN_NAME") or cfg_fields.get("span_name") or "claude-code"
438
- thread_id = f"claudecode_{session_id}"
439
- customer_id = os.environ.get("RESPAN_CUSTOMER_ID") or cfg_fields.get("customer_id") or ""
440
-
441
- # Metadata — custom properties from respan.json, then env overrides
442
- metadata: Dict[str, Any] = {"claude_code_turn": turn_num}
443
- if cfg_props:
444
- metadata.update(cfg_props)
445
- if request_id:
446
- metadata["request_id"] = request_id
447
- if stop_reason:
448
- metadata["stop_reason"] = stop_reason
449
- env_metadata = os.environ.get("RESPAN_METADATA")
450
- if env_metadata:
451
- try:
452
- extra = json.loads(env_metadata)
453
- if isinstance(extra, dict):
454
- metadata.update(extra)
455
- except json.JSONDecodeError:
456
- pass
457
-
458
- # Usage
459
- usage_fields: Dict[str, Any] = {}
460
- if usage:
461
- prompt_tokens = usage.get("input_tokens", 0)
462
- completion_tokens = usage.get("output_tokens", 0)
463
- usage_fields["prompt_tokens"] = prompt_tokens
464
- usage_fields["completion_tokens"] = completion_tokens
465
- total = prompt_tokens + completion_tokens
466
- if total > 0:
467
- usage_fields["total_tokens"] = total
468
- cache_creation = usage.get("cache_creation_input_tokens", 0)
469
- cache_read = usage.get("cache_read_input_tokens", 0)
470
- if cache_creation > 0:
471
- usage_fields["cache_creation_prompt_tokens"] = cache_creation
472
- prompt_tokens_details: Dict[str, int] = {}
473
- if cache_creation > 0:
474
- prompt_tokens_details["cache_creation_tokens"] = cache_creation
475
- if cache_read > 0:
476
- prompt_tokens_details["cached_tokens"] = cache_read
477
- if prompt_tokens_details:
478
- usage_fields["prompt_tokens_details"] = prompt_tokens_details
479
- service_tier = usage.get("service_tier")
480
- if service_tier:
481
- metadata["service_tier"] = service_tier
482
-
483
- # ------------------------------------------------------------------
484
- # 2. Root span – pure agent container (no model / token info)
485
- # ------------------------------------------------------------------
486
- root_span_id = f"claudecode_{trace_unique_id}_root"
487
- root_span: Dict[str, Any] = {
488
- "trace_unique_id": trace_unique_id,
489
- "thread_identifier": thread_id,
490
- "customer_identifier": customer_id,
491
- "span_unique_id": root_span_id,
492
- "span_name": root_span_name,
493
- "span_workflow_name": workflow_name,
494
- "model": model,
495
- "span_path": "",
496
- "input": json.dumps(prompt_messages) if prompt_messages else "",
497
- "output": json.dumps(completion_message) if completion_message else "",
498
- "timestamp": timestamp_str,
499
- "start_time": start_time_str,
500
- "metadata": metadata,
501
- }
502
- if latency is not None:
503
- root_span["latency"] = latency
504
- spans.append(root_span)
505
-
506
- # ------------------------------------------------------------------
507
- # 3. LLM generation child span (always created → every turn has ≥1 child)
508
- # ------------------------------------------------------------------
509
- gen_span_id = f"claudecode_{trace_unique_id}_gen"
510
- gen_start = first_assistant_timestamp or start_time_str
511
- gen_end = last_assistant_timestamp or timestamp_str
512
- gen_latency = None
513
- gen_start_dt = parse_timestamp(gen_start) if gen_start else None
514
- gen_end_dt = parse_timestamp(gen_end) if gen_end else None
515
- if gen_start_dt and gen_end_dt:
516
- gen_latency = (gen_end_dt - gen_start_dt).total_seconds()
517
-
518
- gen_span: Dict[str, Any] = {
519
- "trace_unique_id": trace_unique_id,
520
- "span_unique_id": gen_span_id,
521
- "span_parent_id": root_span_id,
522
- "span_name": "claude.chat",
523
- "span_workflow_name": workflow_name,
524
- "span_path": "claude_chat",
525
- "model": model,
526
- "provider_id": "anthropic",
527
- "input": json.dumps(prompt_messages) if prompt_messages else "",
528
- "output": json.dumps(completion_message) if completion_message else "",
529
- "prompt_messages": prompt_messages,
530
- "completion_message": completion_message,
531
- "timestamp": gen_end,
532
- "start_time": gen_start,
533
- }
534
- if gen_latency is not None:
535
- gen_span["latency"] = gen_latency
536
- gen_span.update(usage_fields)
537
- spans.append(gen_span)
538
-
539
- # ------------------------------------------------------------------
540
- # 4. Thinking child spans
541
- # ------------------------------------------------------------------
542
- thinking_num = 0
543
- for assistant_msg in assistant_msgs:
544
- if not (isinstance(assistant_msg, dict) and "message" in assistant_msg):
545
- continue
546
- content = assistant_msg["message"].get("content", [])
547
- if not isinstance(content, list):
548
- continue
549
- for item in content:
550
- if isinstance(item, dict) and item.get("type") == "thinking":
551
- thinking_text = item.get("thinking", "")
552
- if not thinking_text:
553
- continue
554
- thinking_num += 1
555
- thinking_ts = assistant_msg.get("timestamp", timestamp_str)
556
- spans.append({
557
- "trace_unique_id": trace_unique_id,
558
- "span_unique_id": f"claudecode_{trace_unique_id}_thinking_{thinking_num}",
559
- "span_parent_id": root_span_id,
560
- "span_name": f"Thinking {thinking_num}",
561
- "span_workflow_name": workflow_name,
562
- "span_path": "thinking",
563
- "input": "",
564
- "output": thinking_text,
565
- "timestamp": thinking_ts,
566
- "start_time": thinking_ts,
567
- })
568
-
569
- # ------------------------------------------------------------------
570
- # 5. Tool child spans
571
- # ------------------------------------------------------------------
572
- tool_call_map: Dict[str, Dict[str, Any]] = {}
573
- for assistant_msg in assistant_msgs:
574
- for tool_call in get_tool_calls(assistant_msg):
575
- tool_id = tool_call.get("id", "")
576
- tool_call_map[tool_id] = {
577
- "name": tool_call.get("name", "unknown"),
578
- "input": tool_call.get("input", {}),
579
- "id": tool_id,
580
- "timestamp": assistant_msg.get("timestamp") if isinstance(assistant_msg, dict) else None,
581
- }
582
-
583
- for tr in tool_results:
584
- tr_content = get_content(tr)
585
- tool_result_metadata: Dict[str, Any] = {}
586
- if isinstance(tr, dict):
587
- tur = tr.get("toolUseResult") or {}
588
- for src, dst in [("durationMs", "duration_ms"), ("numFiles", "num_files"),
589
- ("filenames", "filenames"), ("truncated", "truncated")]:
590
- if src in tur:
591
- tool_result_metadata[dst] = tur[src]
592
- if isinstance(tr_content, list):
593
- for item in tr_content:
594
- if isinstance(item, dict) and item.get("type") == "tool_result":
595
- tool_use_id = item.get("tool_use_id")
596
- if tool_use_id in tool_call_map:
597
- tool_call_map[tool_use_id]["output"] = item.get("content")
598
- tool_call_map[tool_use_id]["result_metadata"] = tool_result_metadata
599
- tool_call_map[tool_use_id]["result_timestamp"] = tr.get("timestamp")
600
-
601
- tool_num = 0
602
- for tool_id, td in tool_call_map.items():
603
- tool_num += 1
604
- tool_ts = td.get("result_timestamp") or td.get("timestamp") or timestamp_str
605
- tool_start = td.get("timestamp") or start_time_str
606
- tool_span: Dict[str, Any] = {
607
- "trace_unique_id": trace_unique_id,
608
- "span_unique_id": f"claudecode_{trace_unique_id}_tool_{tool_num}",
609
- "span_parent_id": root_span_id,
610
- "span_name": f"Tool: {td['name']}",
611
- "span_workflow_name": workflow_name,
612
- "span_path": f"tool_{td['name'].lower()}",
613
- "input": format_tool_input(td["name"], td["input"]),
614
- "output": format_tool_output(td["name"], td.get("output")),
615
- "timestamp": tool_ts,
616
- "start_time": tool_start,
617
- }
618
- if td.get("result_metadata"):
619
- tool_span["metadata"] = td["result_metadata"]
620
- duration_ms = td["result_metadata"].get("duration_ms")
621
- if duration_ms:
622
- tool_span["latency"] = duration_ms / 1000.0
623
- spans.append(tool_span)
624
-
625
- # Add required Respan platform fields to every span.
626
- # The backend expects these on all spans (per official SDK examples).
627
- respan_defaults = {
628
- "warnings": "",
629
- "encoding_format": "float",
630
- "disable_fallback": False,
631
- "respan_params": {
632
- "has_webhook": False,
633
- "environment": os.environ.get("RESPAN_ENVIRONMENT", "prod"),
634
- },
635
- "field_name": "data: ",
636
- "delimiter": "\n\n",
637
- "disable_log": False,
638
- "request_breakdown": False,
639
- }
640
- for span in spans:
641
- for key, value in respan_defaults.items():
642
- if key not in span:
643
- span[key] = value
644
-
645
- return spans
646
-
647
-
648
- def send_spans(
649
- spans: List[Dict[str, Any]],
650
- api_key: str,
651
- base_url: str,
652
- turn_num: int,
653
- ) -> None:
654
- """Send spans to Respan as a single batch (matches official SDK behaviour).
655
-
656
- The official Respan tracing SDK sends all spans for a trace in one
657
- POST request to ``/v1/traces/ingest``. We do the same here, with
658
- simple retry logic for transient server errors.
659
- """
660
- url = f"{base_url}/v1/traces/ingest"
661
- headers = {"Authorization": f"Bearer {api_key}"}
662
-
663
- span_names = [s.get("span_name", "?") for s in spans]
664
- payload_json = json.dumps(spans)
665
- payload_size = len(payload_json)
666
- debug(f"Sending {len(spans)} spans ({payload_size} bytes) for turn {turn_num}: {span_names}")
667
- if DEBUG:
668
- debug_file = LOG_FILE.parent / f"respan_spans_turn_{turn_num}.json"
669
- debug_file.write_text(payload_json, encoding="utf-8")
670
- debug(f"Dumped spans to {debug_file}")
671
-
672
- for attempt in range(3):
673
- try:
674
- response = requests.post(url, json=spans, headers=headers, timeout=30)
675
- if response.status_code < 400:
676
- resp_text = response.text[:300] if response.text else ""
677
- debug(f"Sent {len(spans)} spans for turn {turn_num} "
678
- f"(attempt {attempt + 1}): {resp_text}")
679
- return
680
- if response.status_code < 500:
681
- log("ERROR", f"Spans rejected for turn {turn_num}: "
682
- f"HTTP {response.status_code} - {response.text[:200]}")
683
- return
684
- # 5xx — retry after short delay
685
- debug(f"Server error for turn {turn_num} "
686
- f"(attempt {attempt + 1}), retrying...")
687
- time.sleep(1.0)
688
- except (requests.exceptions.Timeout, requests.exceptions.ConnectionError):
689
- time.sleep(1.0)
690
- except Exception as e:
691
- log("ERROR", f"Failed to send spans for turn {turn_num}: {e}")
692
- return
693
-
694
- log("ERROR", f"Failed to send {len(spans)} spans for turn {turn_num} "
695
- f"after 3 attempts")
696
-
697
-
698
- def process_transcript(
699
- session_id: str,
700
- transcript_file: Path,
701
- state: Dict[str, Any],
702
- api_key: str,
703
- base_url: str,
704
- config: Optional[Dict[str, Any]] = None,
705
- ) -> int:
706
- """Process a transcript file and create traces for new turns."""
707
- # Get previous state for this session
708
- session_state = state.get(session_id, {})
709
- last_line = session_state.get("last_line", 0)
710
- turn_count = session_state.get("turn_count", 0)
711
-
712
- # Read transcript - need ALL messages to build conversation history
713
- lines = transcript_file.read_text(encoding="utf-8").strip().split("\n")
714
- total_lines = len(lines)
715
-
716
- if last_line >= total_lines:
717
- debug(f"No new lines to process (last: {last_line}, total: {total_lines})")
718
- return 0
719
-
720
- # Parse new messages, tracking their line indices
721
- new_messages = []
722
- for i in range(last_line, total_lines):
723
- try:
724
- if lines[i].strip():
725
- msg = json.loads(lines[i])
726
- msg["_line_idx"] = i
727
- new_messages.append(msg)
728
- except json.JSONDecodeError:
729
- continue
730
-
731
- if not new_messages:
732
- return 0
733
-
734
- debug(f"Processing {len(new_messages)} new messages")
735
-
736
- # Group messages into turns (user -> assistant(s) -> tool_results)
737
- turns_processed = 0
738
- # Track the line after the last fully-processed turn so we can
739
- # re-read incomplete turns on the next invocation.
740
- last_committed_line = last_line
741
- current_user = None
742
- current_user_line = last_line
743
- current_assistants = []
744
- current_assistant_parts = []
745
- current_msg_id = None
746
- current_tool_results = []
747
-
748
- def _commit_turn():
749
- """Send the current turn and update last_committed_line."""
750
- nonlocal turns_processed, last_committed_line
751
- turns_processed += 1
752
- turn_num = turn_count + turns_processed
753
- spans = create_respan_spans(
754
- session_id, turn_num, current_user, current_assistants, current_tool_results,
755
- config=config,
756
- )
757
- send_spans(spans, api_key, base_url, turn_num)
758
- last_committed_line = total_lines # safe default, refined below
759
-
760
- for msg in new_messages:
761
- line_idx = msg.pop("_line_idx", last_line)
762
- role = msg.get("type") or (msg.get("message", {}).get("role"))
763
-
764
- if role == "user":
765
- # Check if this is a tool result
766
- if is_tool_result(msg):
767
- current_tool_results.append(msg)
768
- continue
769
-
770
- # New user message - finalize previous turn
771
- if current_msg_id and current_assistant_parts:
772
- merged = merge_assistant_parts(current_assistant_parts)
773
- current_assistants.append(merged)
774
- current_assistant_parts = []
775
- current_msg_id = None
776
-
777
- if current_user and current_assistants:
778
- _commit_turn()
779
- # Advance committed line to just before this new user msg
780
- last_committed_line = line_idx
781
-
782
- # Start new turn
783
- current_user = msg
784
- current_user_line = line_idx
785
- current_assistants = []
786
- current_assistant_parts = []
787
- current_msg_id = None
788
- current_tool_results = []
789
-
790
- elif role == "assistant":
791
- msg_id = None
792
- if isinstance(msg, dict) and "message" in msg:
793
- msg_id = msg["message"].get("id")
794
-
795
- if not msg_id:
796
- # No message ID, treat as continuation
797
- current_assistant_parts.append(msg)
798
- elif msg_id == current_msg_id:
799
- # Same message ID, add to current parts
800
- current_assistant_parts.append(msg)
801
- else:
802
- # New message ID - finalize previous message
803
- if current_msg_id and current_assistant_parts:
804
- merged = merge_assistant_parts(current_assistant_parts)
805
- current_assistants.append(merged)
806
-
807
- # Start new assistant message
808
- current_msg_id = msg_id
809
- current_assistant_parts = [msg]
810
-
811
- # Process final turn
812
- if current_msg_id and current_assistant_parts:
813
- merged = merge_assistant_parts(current_assistant_parts)
814
- current_assistants.append(merged)
815
-
816
- if current_user and current_assistants:
817
- # Check if the turn has actual text output. The Stop hook can fire
818
- # before the final assistant text block is flushed to disk, leaving
819
- # only thinking/tool_use blocks. If no text content is found, treat
820
- # the turn as incomplete so the retry logic re-reads it.
821
- has_text = any(get_text_content(m) for m in current_assistants)
822
- if has_text:
823
- _commit_turn()
824
- last_committed_line = total_lines
825
- else:
826
- last_committed_line = current_user_line
827
- debug(f"Turn has assistant msgs but no text output yet (likely not flushed), will retry")
828
- else:
829
- # Incomplete turn — rewind so the next run re-reads from the
830
- # unmatched user message (or from where we left off if no user).
831
- if current_user:
832
- last_committed_line = current_user_line
833
- debug(f"Incomplete turn at line {current_user_line}, will retry next run")
834
- # else: no pending user, advance past non-turn lines
835
- elif last_committed_line == last_line:
836
- last_committed_line = total_lines
837
-
838
- # Update state
839
- state[session_id] = {
840
- "last_line": last_committed_line,
841
- "turn_count": turn_count + turns_processed,
842
- "updated": datetime.now(timezone.utc).isoformat(),
843
- }
844
- save_state(state)
845
-
846
- return turns_processed
847
-
848
-
849
- def read_stdin_payload() -> Optional[Tuple[str, Path]]:
850
- """Read session_id and transcript_path from stdin JSON payload.
851
-
852
- Claude Code hooks pipe a JSON object on stdin with at least
853
- ``session_id`` and ``transcript_path``. Returns ``None`` when
854
- stdin is a TTY, empty, or contains invalid data.
855
- """
856
- if sys.stdin.isatty():
857
- debug("stdin is a TTY, skipping stdin payload")
858
- return None
859
-
860
- try:
861
- raw = sys.stdin.read()
862
- except Exception as e:
863
- debug(f"Failed to read stdin: {e}")
864
- return None
865
-
866
- if not raw or not raw.strip():
867
- debug("stdin is empty")
868
- return None
869
-
870
- try:
871
- payload = json.loads(raw)
872
- except json.JSONDecodeError as e:
873
- debug(f"Invalid JSON on stdin: {e}")
874
- return None
875
-
876
- session_id = payload.get("session_id")
877
- transcript_path_str = payload.get("transcript_path")
878
- if not session_id or not transcript_path_str:
879
- debug("stdin payload missing session_id or transcript_path")
880
- return None
881
-
882
- transcript_path = Path(transcript_path_str)
883
- if not transcript_path.exists():
884
- debug(f"transcript_path from stdin does not exist: {transcript_path}")
885
- return None
886
-
887
- debug(f"Got transcript from stdin: session={session_id}, path={transcript_path}")
888
- return (session_id, transcript_path)
889
-
890
-
891
- @contextlib.contextmanager
892
- def state_lock(timeout: float = 5.0):
893
- """Acquire an advisory file lock around state operations.
894
-
895
- Falls back to no-lock when fcntl is unavailable (Windows) or on errors.
896
- """
897
- if fcntl is None:
898
- yield
899
- return
900
-
901
- LOCK_FILE.parent.mkdir(parents=True, exist_ok=True)
902
- lock_fd = None
903
- try:
904
- lock_fd = open(LOCK_FILE, "w")
905
- deadline = time.monotonic() + timeout
906
- while True:
907
- try:
908
- fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
909
- break
910
- except (IOError, OSError):
911
- if time.monotonic() >= deadline:
912
- debug("Could not acquire state lock within timeout, proceeding without lock")
913
- lock_fd.close()
914
- lock_fd = None
915
- yield
916
- return
917
- time.sleep(0.1)
918
- try:
919
- yield
920
- finally:
921
- fcntl.flock(lock_fd, fcntl.LOCK_UN)
922
- lock_fd.close()
923
- except Exception as e:
924
- debug(f"Lock error, proceeding without lock: {e}")
925
- if lock_fd is not None:
926
- with contextlib.suppress(Exception):
927
- lock_fd.close()
928
- yield
929
-
930
-
931
- def main():
932
- script_start = datetime.now()
933
- debug("Hook started")
934
-
935
- # Check if tracing is enabled
936
- if os.environ.get("TRACE_TO_RESPAN", "").lower() != "true":
937
- debug("Tracing disabled (TRACE_TO_RESPAN != true)")
938
- sys.exit(0)
939
-
940
- # Resolve API key: env var > ~/.config/respan/credentials.json
941
- api_key = os.getenv("RESPAN_API_KEY")
942
- base_url = os.getenv("RESPAN_BASE_URL", "https://api.respan.ai/api")
943
-
944
- if not api_key:
945
- creds_file = Path.home() / ".config" / "respan" / "credentials.json"
946
- if creds_file.exists():
947
- try:
948
- creds = json.loads(creds_file.read_text(encoding="utf-8"))
949
- # Find the active profile's credential
950
- config_file = Path.home() / ".config" / "respan" / "config.json"
951
- profile = "default"
952
- if config_file.exists():
953
- cfg = json.loads(config_file.read_text(encoding="utf-8"))
954
- profile = cfg.get("activeProfile", "default")
955
- cred = creds.get(profile, {})
956
- api_key = cred.get("apiKey") or cred.get("accessToken")
957
- if not base_url or base_url == "https://api.respan.ai/api":
958
- base_url = cred.get("baseUrl", base_url)
959
- if api_key:
960
- debug(f"Using API key from credentials.json (profile: {profile})")
961
- except (json.JSONDecodeError, IOError) as e:
962
- debug(f"Failed to read credentials.json: {e}")
963
-
964
- if not api_key:
965
- log("ERROR", "No API key found. Run: respan auth login")
966
- sys.exit(0)
967
-
968
- # Try stdin payload first, fall back to filesystem scan
969
- result = read_stdin_payload()
970
- if not result:
971
- result = find_latest_transcript()
972
- if not result:
973
- debug("No transcript file found")
974
- sys.exit(0)
975
-
976
- session_id, transcript_file = result
977
-
978
- if not transcript_file:
979
- debug("No transcript file found")
980
- sys.exit(0)
981
-
982
- debug(f"Processing session: {session_id}")
983
-
984
- # Load respan.json config from the project directory.
985
- # Extract the project CWD from the first user message in the transcript.
986
- config: Dict[str, Any] = {"fields": {}, "properties": {}}
987
- try:
988
- first_line = transcript_file.read_text(encoding="utf-8").split("\n")[0]
989
- if first_line:
990
- first_msg = json.loads(first_line)
991
- cwd = first_msg.get("cwd")
992
- if not cwd:
993
- # Try second line (first is often file-history-snapshot)
994
- lines = transcript_file.read_text(encoding="utf-8").split("\n")
995
- for line in lines[:5]:
996
- if line.strip():
997
- msg = json.loads(line)
998
- cwd = msg.get("cwd")
999
- if cwd:
1000
- break
1001
- if cwd:
1002
- config = load_respan_config(cwd)
1003
- debug(f"Loaded respan.json config from {cwd}: {config}")
1004
- except Exception as e:
1005
- debug(f"Failed to extract CWD or load config: {e}")
1006
-
1007
- # Process the transcript under file lock.
1008
- # Retry up to 3 times with a short delay — the Stop hook can fire
1009
- # before Claude Code finishes flushing the assistant response to
1010
- # the transcript file, causing an incomplete turn on the first read.
1011
- max_attempts = 3
1012
- turns = 0
1013
- try:
1014
- for attempt in range(max_attempts):
1015
- with state_lock():
1016
- state = load_state()
1017
- turns = process_transcript(session_id, transcript_file, state, api_key, base_url, config=config)
1018
-
1019
- if turns > 0:
1020
- break
1021
-
1022
- if attempt < max_attempts - 1:
1023
- delay = 0.5 * (attempt + 1)
1024
- debug(f"No turns processed (attempt {attempt + 1}/{max_attempts}), "
1025
- f"retrying in {delay}s...")
1026
- time.sleep(delay)
1027
-
1028
- # Log execution time
1029
- duration = (datetime.now() - script_start).total_seconds()
1030
- log("INFO", f"Processed {turns} turns in {duration:.1f}s")
1031
-
1032
- if duration > 180:
1033
- log("WARN", f"Hook took {duration:.1f}s (>3min), consider optimizing")
1034
-
1035
- except Exception as e:
1036
- log("ERROR", f"Failed to process transcript: {e}")
1037
- import traceback
1038
- debug(traceback.format_exc())
1039
-
1040
- sys.exit(0)
1041
-
1042
-
1043
- if __name__ == "__main__":
1044
- main()