coze_lab 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1303 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ CozeLoop Hook for Claude Code
6
+
7
+ This hook integrates Claude Code with CozeLoop for tracing and observability.
8
+ It captures conversation interactions from the local .jsonl file and sends them
9
+ as traces to the CozeLoop platform.
10
+
11
+ Usage:
12
+ 1. Place this script in `~/.claude/hooks/cozeloop_hook.py`.
13
+ 2. Configure the hook in `~/.claude/settings.json`.
14
+ 3. Set environment variables `COZELOOP_WORKSPACE_ID` and `COZELOOP_API_TOKEN`
15
+ in your project's `.claude/settings.local.json`.
16
+ 4. Run Claude Code as normal - traces will be sent automatically.
17
+ """
18
+
19
+ import json
20
+ import os
21
+ import sys
22
+ import glob
23
+ import hashlib
24
+ import time
25
+ import urllib.request
26
+ import urllib.error
27
+ from datetime import datetime
28
+ from pathlib import Path
29
+ from typing import Optional, List, Dict, Any
30
+
31
+ # --- SDK Import ---
32
+ try:
33
+ import cozeloop
34
+ from cozeloop.spec.tracespec import (
35
+ Runtime, ModelInput, ModelMessage, ModelToolChoice,
36
+ ModelOutput, ModelChoice, ModelToolCall, ModelToolCallFunction,
37
+ ModelMessagePart, ModelMessagePartType
38
+ )
39
+ except ImportError:
40
+ print("Error: cozeloop SDK not found. Please install it with: pip install cozeloop", file=sys.stderr)
41
+ sys.exit(1)
42
+
43
+ # --- Configuration ---
44
+ DEBUG = os.environ.get("CC_COZELOOP_DEBUG", "").lower() == "true"
45
+ _COZELOOP_CLIENT_ID = "56089404009908161803155625287505.app.coze"
46
+ _COZE_API = "https://api.coze.cn"
47
+ _REFRESH_THRESHOLD = 10 * 60 # refresh when < 10 minutes remain
48
+ _DEFAULT_WORKSPACE_ID = "7644910356078837760" # hardcoded spaceID fallback
49
+
50
+
51
+ # --- coze-context parsing -------------------------------------------------
52
+ # User messages may embed a block like:
53
+ # <coze-context>
54
+ # account_id: 0
55
+ # agent_id: 7644920552473395499
56
+ # session_id: 7644919579054997796
57
+ # message_id: 04dd5246-...
58
+ # </coze-context>
59
+ # We parse its key:value pairs and inject them into the trace.
60
+
61
+ _COZE_CTX_OPEN = "<coze-context>"
62
+ _COZE_CTX_CLOSE = "</coze-context>"
63
+
64
+
65
+ def _content_to_text(content: Any) -> str:
66
+ """Flatten Claude message content (str | list[dict] | dict) to plain text."""
67
+ if content is None:
68
+ return ""
69
+ if isinstance(content, str):
70
+ return content
71
+ if isinstance(content, dict):
72
+ return content.get("text", "") if content.get("type") == "text" else ""
73
+ if isinstance(content, list):
74
+ parts = []
75
+ for item in content:
76
+ if isinstance(item, str):
77
+ parts.append(item)
78
+ elif isinstance(item, dict) and item.get("type") == "text":
79
+ parts.append(item.get("text", ""))
80
+ return "\n".join(parts)
81
+ return ""
82
+
83
+
84
+ def parse_coze_context(text: str) -> Dict[str, str]:
85
+ """Extract the LAST <coze-context> block's key:value pairs from text."""
86
+ if not text or _COZE_CTX_OPEN not in text:
87
+ return {}
88
+ open_idx = text.rfind(_COZE_CTX_OPEN)
89
+ close_idx = text.find(_COZE_CTX_CLOSE, open_idx)
90
+ if close_idx == -1:
91
+ return {}
92
+ body = text[open_idx + len(_COZE_CTX_OPEN):close_idx]
93
+ # The block may arrive with real newlines, OR with literal backslash-n
94
+ # (e.g. when the whole message is an embedded JSON string that was never
95
+ # un-escaped). Normalize both forms before splitting into lines.
96
+ body = body.replace("\\r\\n", "\n").replace("\\n", "\n").replace("\\r", "\n")
97
+ result: Dict[str, str] = {}
98
+ for line in body.splitlines():
99
+ line = line.strip()
100
+ if not line or ":" not in line:
101
+ continue
102
+ key, _, value = line.partition(":")
103
+ key = key.strip()
104
+ value = value.strip()
105
+ if key:
106
+ result[key] = value
107
+ return result
108
+
109
+
110
+ def coze_context_tags(content: Any) -> Dict[str, str]:
111
+ """Return coze-context kv (prefixed 'coze_') from a message content."""
112
+ return {f"coze_{k}": v for k, v in parse_coze_context(_content_to_text(content)).items()}
113
+
114
+
115
+ # --- trace upload failure / logid capture ---------------------------------
116
+ def _extract_logid(msg: str) -> str:
117
+ """Pull the server logid out of an SDK error message ('logid=XXXX')."""
118
+ if not msg:
119
+ return ""
120
+ marker = "logid="
121
+ idx = msg.find(marker)
122
+ if idx == -1:
123
+ return ""
124
+ rest = msg[idx + len(marker):]
125
+ logid = []
126
+ for ch in rest:
127
+ if ch.isalnum():
128
+ logid.append(ch)
129
+ else:
130
+ break
131
+ return "".join(logid)
132
+
133
+
134
+ def _make_finish_event_processor():
135
+ """Return a trace_finish_event_processor that surfaces failures + logid.
136
+
137
+ The CozeLoop SDK calls this for each flush event; on failure we print the
138
+ server logid to stderr so it can be handed to platform support (e.g. via
139
+ `bytedcli log get-logid-log <logid>`).
140
+ """
141
+ def _processor(info):
142
+ try:
143
+ if not getattr(info, "is_event_fail", False):
144
+ return
145
+ detail = getattr(info, "detail_msg", "") or ""
146
+ logid = _extract_logid(detail)
147
+ if logid:
148
+ print(f"[CozeLoop] 上报失败 logid={logid} (可用 bytedcli log get-logid-log {logid} 排查)", file=sys.stderr)
149
+ else:
150
+ print(f"[CozeLoop] 上报失败: {detail[:300]}", file=sys.stderr)
151
+ except Exception:
152
+ pass
153
+ return _processor
154
+
155
+
156
+
157
+ def debug_log(message: str):
158
+ """Print debug message if debug mode is enabled."""
159
+ if DEBUG:
160
+ print(f"[COZELOOP_HOOK_DEBUG] {datetime.now().isoformat()} - {message}", file=sys.stderr)
161
+
162
+ # --- Token refresh --------------------------------------------------------
163
+
164
+ def _get_credentials_path() -> Path:
165
+ return Path.home() / ".cozeloop" / "credentials.json"
166
+
167
+ def _load_credentials() -> Optional[Dict]:
168
+ path = _get_credentials_path()
169
+ if not path.exists():
170
+ return None
171
+ try:
172
+ return json.loads(path.read_text())
173
+ except Exception:
174
+ return None
175
+
176
+ def _save_credentials(creds: Dict):
177
+ path = _get_credentials_path()
178
+ path.parent.mkdir(parents=True, exist_ok=True)
179
+ path.write_text(json.dumps(creds, indent=2))
180
+ os.chmod(path, 0o600)
181
+
182
+ def _refresh_token(refresh_token: str) -> Optional[str]:
183
+ """Call Coze refresh token API. Returns new access_token or None on failure."""
184
+ try:
185
+ payload = json.dumps({
186
+ "grant_type": "refresh_token",
187
+ "client_id": _COZELOOP_CLIENT_ID,
188
+ "refresh_token": refresh_token,
189
+ }).encode()
190
+ req = urllib.request.Request(
191
+ f"{_COZE_API}/api/permission/oauth2/token",
192
+ data=payload,
193
+ headers={"Content-Type": "application/json"},
194
+ )
195
+ with urllib.request.urlopen(req, timeout=10) as resp:
196
+ data = json.loads(resp.read())
197
+ if data.get("access_token"):
198
+ existing = _load_credentials() or {}
199
+ creds = {
200
+ "access_token": data["access_token"],
201
+ "refresh_token": data.get("refresh_token", refresh_token),
202
+ "expires_at": data.get("expires_in", 0) * 1000, # unix timestamp in seconds
203
+ "workspace_id": existing.get("workspace_id", ""),
204
+ }
205
+ _save_credentials(creds)
206
+ debug_log("Token refreshed successfully.")
207
+ return creds["access_token"]
208
+ except Exception as e:
209
+ debug_log(f"Token refresh failed: {e}")
210
+ return None
211
+
212
+ def get_fresh_token() -> Optional[str]:
213
+ """Return a valid access token, refreshing if needed. Falls back to env var."""
214
+ creds = _load_credentials()
215
+ if creds:
216
+ expires_at_sec = creds.get("expires_at", 0) / 1000
217
+ remaining = expires_at_sec - time.time()
218
+ if remaining > _REFRESH_THRESHOLD:
219
+ debug_log(f"Cached token valid, expires in {int(remaining)}s.")
220
+ return creds["access_token"]
221
+ if creds.get("refresh_token"):
222
+ debug_log(f"Token expiring in {int(remaining)}s, refreshing...")
223
+ new_token = _refresh_token(creds["refresh_token"])
224
+ if new_token:
225
+ return new_token
226
+ debug_log("Refresh failed, falling back to env var.")
227
+ return os.environ.get("COZELOOP_API_TOKEN")
228
+
229
+ # -------------------------------------------------------------------------
230
+
231
+ # --- State Management ---
232
+
233
+ def get_state_file_path(conversation_file: str) -> str:
234
+ """Get the state file path for tracking processed messages."""
235
+ state_dir = Path.home() / ".claude" / "cozeloop_state"
236
+ state_dir.mkdir(parents=True, exist_ok=True)
237
+ file_hash = hashlib.md5(conversation_file.encode()).hexdigest()[:12]
238
+ return str(state_dir / f"state_{file_hash}.json")
239
+
240
+ def load_state(state_file: str) -> Dict[str, Any]:
241
+ """Load the processing state from file."""
242
+ if os.path.exists(state_file):
243
+ try:
244
+ with open(state_file, 'r') as f:
245
+ return json.load(f)
246
+ except (json.JSONDecodeError, IOError) as e:
247
+ debug_log(f"Error loading state: {e}")
248
+ return {"last_processed_line": 0, "session_id": None}
249
+
250
+ def save_state(state_file: str, state: Dict[str, Any]):
251
+ """Save the processing state to file."""
252
+ try:
253
+ with open(state_file, 'w') as f:
254
+ json.dump(state, f, indent=2)
255
+ except IOError as e:
256
+ debug_log(f"Error saving state: {e}")
257
+
258
+ # --- Conversation File Handling ---
259
+
260
+ def find_latest_conversation_file() -> Optional[str]:
261
+ """Find the most recently modified conversation file in ~/.claude/projects/."""
262
+ claude_dir = Path.home() / ".claude" / "projects"
263
+ if not claude_dir.exists():
264
+ debug_log(f"Claude projects directory not found: {claude_dir}")
265
+ return None
266
+
267
+ jsonl_files = list(claude_dir.rglob("*.jsonl"))
268
+ if not jsonl_files:
269
+ debug_log("No conversation files (*.jsonl) found.")
270
+ return None
271
+
272
+ latest_file = max(jsonl_files, key=lambda p: p.stat().st_mtime)
273
+ debug_log(f"Found latest conversation file: {latest_file}")
274
+ return str(latest_file)
275
+
276
+ def read_new_messages(file_path: str, start_line: int = 0) -> List[Dict[str, Any]]:
277
+ """Read new messages from a conversation file since the last processed line."""
278
+ messages = []
279
+ try:
280
+ with open(file_path, 'r', encoding='utf-8') as f:
281
+ for i, line in enumerate(f):
282
+ if i < start_line:
283
+ continue
284
+ line = line.strip()
285
+ if line:
286
+ try:
287
+ msg = json.loads(line)
288
+ msg['_line_number'] = i
289
+ messages.append(msg)
290
+ except json.JSONDecodeError:
291
+ debug_log(f"Skipping malformed JSON on line {i+1}")
292
+ except (IOError, FileNotFoundError) as e:
293
+ debug_log(f"Error reading conversation file: {e}")
294
+ return messages
295
+
296
+ # --- Content Helpers ---
297
+
298
+ def is_empty_content(content: Any) -> bool:
299
+ """Return True if content carries no meaningful data."""
300
+ if content is None:
301
+ return True
302
+ if isinstance(content, str):
303
+ return content.strip() == ""
304
+ if isinstance(content, list):
305
+ if len(content) == 0:
306
+ return True
307
+ if len(content) == 1 and isinstance(content[0], dict) and content[0].get("type") == "text" and content[0].get("text", "").strip() == "":
308
+ return True
309
+ return False
310
+
311
+ def format_content(content: Any, truncate: int = 4096) -> str:
312
+ """Format message content for trace display."""
313
+ if isinstance(content, str):
314
+ return content[:truncate]
315
+ if isinstance(content, dict):
316
+ return json.dumps(content, ensure_ascii=False)[:truncate]
317
+ if isinstance(content, list):
318
+ return json.dumps(content, ensure_ascii=False)[:truncate]
319
+ return str(content)[:truncate]
320
+
321
+
322
+ # --- Message Parsing and Grouping ---
323
+
324
+ def is_tool_result_message(msg: Dict[str, Any]) -> bool:
325
+ """Check if a message is a tool_result (not a real user input)."""
326
+ content = msg.get("message", {}).get("content", [])
327
+ return isinstance(content, list) and any(
328
+ isinstance(item, dict) and item.get("type") == "tool_result"
329
+ for item in content
330
+ )
331
+
332
+ def extract_tool_result_from_message(msg: Dict[str, Any]) -> List[Dict[str, Any]]:
333
+ """Extract tool_result items from a user message."""
334
+ content = msg.get("message", {}).get("content", [])
335
+ if isinstance(content, list):
336
+ return [item for item in content if isinstance(item, dict) and item.get("type") == "tool_result"]
337
+ return []
338
+
339
+
340
+ def _extract_progress_inner_message(msg: Dict[str, Any]) -> Optional[Dict[str, Any]]:
341
+ """Extract the inner conversation message from a progress (sub-agent) message.
342
+
343
+ Progress messages have the inner message nested at data.message.message.
344
+ Returns a dict with keys: role, content, id, parentToolUseID, or None if not valid.
345
+ """
346
+ data = msg.get("data", {})
347
+ outer_msg = data.get("message", {})
348
+ inner_msg = outer_msg.get("message", {})
349
+ if not inner_msg:
350
+ return None
351
+
352
+ role = inner_msg.get("role")
353
+ content = inner_msg.get("content")
354
+ if not role or content is None:
355
+ return None
356
+
357
+ return {
358
+ "role": role,
359
+ "content": content,
360
+ "id": inner_msg.get("id"),
361
+ "usage": inner_msg.get("usage", {}),
362
+ "model": inner_msg.get("model"),
363
+ "parentToolUseID": msg.get("parentToolUseID"),
364
+ "agentId": data.get("agentId", ""),
365
+ }
366
+
367
+
368
+ def _group_subagent_steps(progress_msgs: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
369
+ """Group sub-agent progress messages into steps (same logic as top-level).
370
+
371
+ Each step is an assistant message (model call) + its tool_calls + tool_results.
372
+ Returns list of steps in the same format as turn["steps"], but with simplified
373
+ assistant_message structure.
374
+ """
375
+ steps = []
376
+
377
+ for pmsg in progress_msgs:
378
+ role = pmsg.get("role")
379
+ content = pmsg.get("content", [])
380
+
381
+ if role == "user":
382
+ # Could be tool_result or user input for the sub-agent
383
+ if isinstance(content, list):
384
+ has_tool_result = any(
385
+ isinstance(item, dict) and item.get("type") == "tool_result"
386
+ for item in content
387
+ )
388
+ if has_tool_result and steps:
389
+ for item in content:
390
+ if isinstance(item, dict) and item.get("type") == "tool_result":
391
+ steps[-1]["tool_results"].append(item)
392
+ # Skip non-tool-result user messages (sub-agent prompt)
393
+ continue
394
+
395
+ if role == "assistant":
396
+ tool_calls = []
397
+ if isinstance(content, list):
398
+ for item in content:
399
+ if isinstance(item, dict) and item.get("type") == "tool_use":
400
+ tool_calls.append(item)
401
+
402
+ msg_id = pmsg.get("id")
403
+ last_step = steps[-1] if steps else None
404
+ last_msg_id = last_step.get("_msg_id") if last_step else None
405
+
406
+ if last_step and msg_id and msg_id == last_msg_id:
407
+ # Same API response — merge
408
+ existing = last_step["assistant_message"].get("message", {}).get("content", [])
409
+ if isinstance(existing, list) and isinstance(content, list):
410
+ existing.extend(content)
411
+ last_step["tool_calls"].extend(tool_calls)
412
+ usage = pmsg.get("usage", {})
413
+ if usage.get("input_tokens", 0) > 0 or usage.get("output_tokens", 0) > 0:
414
+ last_step["assistant_message"]["message"]["usage"] = usage
415
+ else:
416
+ steps.append({
417
+ "assistant_message": {
418
+ "message": {
419
+ "role": "assistant",
420
+ "content": content,
421
+ "id": msg_id,
422
+ "model": pmsg.get("model", ""),
423
+ "usage": pmsg.get("usage", {}),
424
+ }
425
+ },
426
+ "tool_calls": tool_calls,
427
+ "tool_results": [],
428
+ "_msg_id": msg_id,
429
+ })
430
+
431
+ return steps
432
+
433
+
434
+ def group_messages_into_turns(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
435
+ """Group messages into conversation turns (user -> assistant -> tool_results).
436
+
437
+ A turn represents a complete interaction cycle starting from a real user input.
438
+ Within each turn, we track individual "steps" -- each step is a single model
439
+ invocation (assistant message) paired with the tool_results it triggered.
440
+
441
+ This captures the full chain:
442
+ user_input -> model_call_1 (tool_use) -> tool_result -> model_call_2 (tool_use)
443
+ -> tool_result -> ... -> model_call_N (final text)
444
+
445
+ Each step has:
446
+ - assistant_message: the assistant's response (one API call)
447
+ - tool_calls: tool_use items from this assistant message
448
+ - tool_results: matching tool_result items from the following user message(s)
449
+
450
+ Sub-agent (Task tool) progress messages are parsed and stored as
451
+ sub_steps on the step containing the parent tool call.
452
+ """
453
+ turns = []
454
+ current_turn = None
455
+
456
+ # First pass: collect progress messages grouped by parentToolUseID,
457
+ # and collect toolUseResult usage keyed by tool_use_id.
458
+ subagent_progress: Dict[str, List[Dict[str, Any]]] = {}
459
+ tool_use_result_usage: Dict[str, Dict[str, Any]] = {}
460
+ for msg in messages:
461
+ if msg.get("type") == "progress":
462
+ inner = _extract_progress_inner_message(msg)
463
+ if inner and inner.get("parentToolUseID"):
464
+ parent_id = inner["parentToolUseID"]
465
+ if parent_id not in subagent_progress:
466
+ subagent_progress[parent_id] = []
467
+ subagent_progress[parent_id].append(inner)
468
+ # Collect toolUseResult usage from tool_result messages
469
+ tur = msg.get("toolUseResult")
470
+ if isinstance(tur, dict) and tur.get("usage"):
471
+ message = msg.get("message", {})
472
+ content = message.get("content", [])
473
+ if isinstance(content, list):
474
+ for item in content:
475
+ if isinstance(item, dict) and item.get("type") == "tool_result":
476
+ tid = item.get("tool_use_id", "")
477
+ if tid:
478
+ tool_use_result_usage[tid] = tur["usage"]
479
+
480
+ # Second pass: build turns from user/assistant messages
481
+ for msg in messages:
482
+ msg_type = msg.get("type")
483
+ role = msg.get("role")
484
+ message = msg.get("message", {})
485
+ message_role = message.get("role", "")
486
+
487
+ # Skip non-conversation messages
488
+ if msg_type in ("progress", "system", "file-history-snapshot"):
489
+ continue
490
+
491
+ # Check if this is a user message
492
+ is_user_msg = msg_type == "user" or role == "user" or message_role == "user"
493
+
494
+ if is_user_msg:
495
+ # Check if this is a tool_result message (should not start a new turn)
496
+ if is_tool_result_message(msg):
497
+ # Attach tool results to the last step of the current turn
498
+ if current_turn and current_turn["steps"]:
499
+ tool_results = extract_tool_result_from_message(msg)
500
+ current_turn["steps"][-1]["tool_results"].extend(tool_results)
501
+ else:
502
+ # This is a real user input, start a new turn
503
+ if current_turn:
504
+ turns.append(current_turn)
505
+ current_turn = {
506
+ "user_message": msg,
507
+ "steps": [],
508
+ "start_line": msg.get("_line_number", 0)
509
+ }
510
+ elif msg_type == "assistant" or role == "assistant" or message_role == "assistant":
511
+ if current_turn:
512
+ # Extract tool_use items from this line's content
513
+ tool_calls = []
514
+ content = message.get("content", [])
515
+ if isinstance(content, list):
516
+ for item in content:
517
+ if isinstance(item, dict) and item.get("type") == "tool_use":
518
+ tool_calls.append(item)
519
+
520
+ # Claude Code writes text and tool_use from the same API response
521
+ # as separate JSONL lines sharing the same message.id.
522
+ # Merge them into a single step.
523
+ msg_id = message.get("id")
524
+ last_step = current_turn["steps"][-1] if current_turn["steps"] else None
525
+ last_msg_id = (last_step["assistant_message"].get("message", {}).get("id")
526
+ if last_step else None)
527
+
528
+ if last_step and msg_id and msg_id == last_msg_id:
529
+ # Same API response — merge content into the existing step
530
+ existing_content = last_step["assistant_message"].get("message", {}).get("content", [])
531
+ if isinstance(existing_content, list) and isinstance(content, list):
532
+ existing_content.extend(content)
533
+ last_step["tool_calls"].extend(tool_calls)
534
+ # Carry over usage from the later line (earlier line typically has zeros)
535
+ usage = message.get("usage", {})
536
+ if usage.get("input_tokens", 0) > 0 or usage.get("output_tokens", 0) > 0:
537
+ last_step["assistant_message"]["message"]["usage"] = usage
538
+ else:
539
+ # New API response — create a new step
540
+ current_turn["steps"].append({
541
+ "assistant_message": msg,
542
+ "tool_calls": tool_calls,
543
+ "tool_results": [],
544
+ })
545
+
546
+ # Don't forget the last turn
547
+ if current_turn:
548
+ turns.append(current_turn)
549
+
550
+ # Third pass: attach sub-agent steps, agentId, and total usage to their parent tool calls
551
+ for turn in turns:
552
+ for step in turn["steps"]:
553
+ for tc in step["tool_calls"]:
554
+ tool_id = tc.get("id", "")
555
+ if tool_id in subagent_progress:
556
+ progress_msgs = subagent_progress[tool_id]
557
+ tc["_sub_steps"] = _group_subagent_steps(progress_msgs)
558
+ # Extract agentId (same for all messages under this parent)
559
+ for pm in progress_msgs:
560
+ if pm.get("agentId"):
561
+ tc["_agent_id"] = pm["agentId"]
562
+ break
563
+ # Attach total usage from toolUseResult for token distribution
564
+ if tool_id in tool_use_result_usage:
565
+ tc["_total_usage"] = tool_use_result_usage[tool_id]
566
+
567
+ return turns
568
+
569
+
570
+ # --- CozeLoop Message Helpers ---
571
+
572
+ def _make_message(role: str, content: str = "", tool_calls: list = None,
573
+ tool_call_id: str = "", parts: list = None) -> ModelMessage:
574
+ """Helper to create a CozeLoop ModelMessage with default fields."""
575
+ return ModelMessage(
576
+ role=role,
577
+ content=content,
578
+ reasoning_content="",
579
+ parts=parts or [],
580
+ name="",
581
+ tool_calls=tool_calls or [],
582
+ tool_call_id=tool_call_id or "",
583
+ metadata={}
584
+ )
585
+
586
+
587
+ def _format_tool_output(result_content: Any, max_len: int = 2000) -> str:
588
+ """Format tool result content for span output.
589
+
590
+ When content is a list (e.g. Task tool results with multiple text blocks),
591
+ extract and join text parts instead of dumping raw JSON.
592
+ """
593
+ if isinstance(result_content, str):
594
+ if len(result_content) > max_len:
595
+ return result_content[:max_len] + "..."
596
+ return result_content
597
+
598
+ if isinstance(result_content, list):
599
+ text_parts = []
600
+ for item in result_content:
601
+ if isinstance(item, dict):
602
+ if item.get("type") == "text":
603
+ text_parts.append(item.get("text", ""))
604
+ else:
605
+ # Non-text items: serialize compactly
606
+ text_parts.append(json.dumps(item, ensure_ascii=False))
607
+ elif isinstance(item, str):
608
+ text_parts.append(item)
609
+ joined = "\n".join(text_parts)
610
+ if len(joined) > max_len:
611
+ return joined[:max_len] + "..."
612
+ return joined
613
+
614
+ s = str(result_content)
615
+ if len(s) > max_len:
616
+ return s[:max_len] + "..."
617
+ return s
618
+
619
+
620
+ def _make_tool_result_message(result_content: Any, tool_call_id: str = "") -> ModelMessage:
621
+ """Create a role='tool' ModelMessage for model input.
622
+
623
+ When result_content is a list, items go into parts (not content) to avoid
624
+ dumping raw JSON into the content field.
625
+ """
626
+ if isinstance(result_content, list):
627
+ parts_list = []
628
+ for item in result_content:
629
+ if isinstance(item, dict):
630
+ item_type = item.get("type", "text")
631
+ if item_type == "text":
632
+ parts_list.append(ModelMessagePart(type=ModelMessagePartType.TEXT, text=item.get("text", "")))
633
+ else:
634
+ parts_list.append(ModelMessagePart(
635
+ type=ModelMessagePartType.TEXT,
636
+ text=json.dumps(item, ensure_ascii=False)[:4096]
637
+ ))
638
+ elif isinstance(item, str):
639
+ parts_list.append(ModelMessagePart(type=ModelMessagePartType.TEXT, text=item))
640
+ return _make_message(
641
+ role="tool",
642
+ content="",
643
+ tool_call_id=tool_call_id,
644
+ parts=parts_list
645
+ )
646
+
647
+ # String or other scalar
648
+ return _make_message(
649
+ role="tool",
650
+ content=format_content(result_content),
651
+ tool_call_id=tool_call_id
652
+ )
653
+
654
+
655
+ def _raw_content_to_input_message(raw_content: Any, role: str) -> List[ModelMessage]:
656
+ """Convert raw Claude content to CozeLoop ModelMessage(s) suitable for model input.
657
+
658
+ When content is a list:
659
+ - tool_use items -> ModelMessage.tool_calls (as ModelToolCall objects)
660
+ - tool_result items -> separate ModelMessage(role="tool") per result
661
+ - all other items (text, etc.) -> ModelMessage.parts (as ModelMessagePart objects)
662
+ - ModelMessage.content = empty when parts are used (avoid duplication)
663
+
664
+ When content is a string:
665
+ - Simple ModelMessage with content
666
+ """
667
+ if isinstance(raw_content, str):
668
+ return [_make_message(role, format_content(raw_content))]
669
+
670
+ if not isinstance(raw_content, list):
671
+ return [_make_message(role, format_content(raw_content))]
672
+
673
+ # Check if content is all tool_result items
674
+ all_tool_results = all(
675
+ isinstance(item, dict) and item.get("type") == "tool_result"
676
+ for item in raw_content if isinstance(item, dict)
677
+ ) and any(
678
+ isinstance(item, dict) and item.get("type") == "tool_result"
679
+ for item in raw_content
680
+ )
681
+
682
+ if all_tool_results:
683
+ messages = []
684
+ for item in raw_content:
685
+ if isinstance(item, dict) and item.get("type") == "tool_result":
686
+ result_content = item.get("content", "")
687
+ messages.append(_make_tool_result_message(
688
+ result_content,
689
+ tool_call_id=item.get("tool_use_id", "")
690
+ ))
691
+ return messages
692
+
693
+ # Mixed content: split into tool_calls, parts, and text
694
+ tc_list = []
695
+ parts_list = []
696
+ text_parts = []
697
+
698
+ for item in raw_content:
699
+ if not isinstance(item, dict):
700
+ continue
701
+ item_type = item.get("type", "")
702
+
703
+ if item_type == "tool_use":
704
+ tc_list.append(ModelToolCall(
705
+ id=item.get("id", ""),
706
+ type="function",
707
+ function=ModelToolCallFunction(
708
+ name=item.get("name", ""),
709
+ arguments=json.dumps(item.get("input", {}), ensure_ascii=False) if isinstance(item.get("input"), dict) else str(item.get("input", ""))
710
+ )
711
+ ))
712
+ elif item_type == "text":
713
+ t = item.get("text", "")
714
+ if t:
715
+ text_parts.append(t)
716
+ parts_list.append(ModelMessagePart(type=ModelMessagePartType.TEXT, text=t))
717
+ elif item_type in ("thinking", "redacted_thinking"):
718
+ pass # skip internal thinking from input history
719
+ else:
720
+ parts_list.append(ModelMessagePart(
721
+ type=ModelMessagePartType.TEXT,
722
+ text=json.dumps(item, ensure_ascii=False)[:4096]
723
+ ))
724
+
725
+ content_text = "\n".join(text_parts) if text_parts else ""
726
+ return [_make_message(
727
+ role=role,
728
+ content=content_text,
729
+ tool_calls=tc_list if tc_list else None,
730
+ parts=parts_list if parts_list else None
731
+ )]
732
+
733
+
734
+ def _build_history_messages(history_turns: List[Dict[str, Any]]) -> list:
735
+ """Build cumulative history messages from previously processed turns."""
736
+ history_messages = []
737
+ for ht in (history_turns or []):
738
+ ht_user = ht.get("user_message", {}).get("message", {})
739
+ ht_user_content = ht_user.get("content") if ht_user else None
740
+ if ht_user and not is_empty_content(ht_user_content):
741
+ history_messages.append(_make_message("user", format_content(ht_user_content)))
742
+ for step in ht.get("steps", []):
743
+ msg = step.get("assistant_message", {})
744
+ asst_content = msg.get("message", {}).get("content")
745
+ if not is_empty_content(asst_content):
746
+ history_messages.extend(_raw_content_to_input_message(asst_content, "assistant"))
747
+ for tr in step.get("tool_results", []):
748
+ tr_content = tr.get("content", "")
749
+ history_messages.append(_make_tool_result_message(
750
+ tr_content,
751
+ tool_call_id=tr.get("tool_use_id", "")
752
+ ))
753
+ return history_messages
754
+
755
+
756
+ # --- CozeLoop Trace Reporting ---
757
+
758
+ def send_turns_to_cozeloop(turns: List[Dict[str, Any]], session_id: str, history_turns: Optional[List[Dict[str, Any]]] = None):
759
+ """Send conversation turns to CozeLoop.
760
+
761
+ Span hierarchy:
762
+ root_span (claude_code_request) [input=user_input, output=final_response]
763
+ +-- turn_span
764
+ |-- model_span (1st model call)
765
+ |-- tool_span / agent_span (tool call from 1st model response)
766
+ |-- model_span (2nd model call, after receiving tool result)
767
+ |-- tool_span / agent_span (tool call from 2nd model response)
768
+ |-- ...
769
+ +-- model_span (Nth model call, final text response)
770
+ """
771
+ if not turns:
772
+ return
773
+
774
+ debug_log(f"Initializing CozeLoop client for session: {session_id}")
775
+ token = get_fresh_token()
776
+ if token:
777
+ os.environ["COZELOOP_API_TOKEN"] = token
778
+ print(f"[CozeLoop] Token 获取成功 ({token[:12]}...)", file=sys.stderr)
779
+ else:
780
+ print("[CozeLoop] 警告: 未找到有效 Token,上报可能失败", file=sys.stderr)
781
+ creds = _load_credentials()
782
+ workspace_id = (creds or {}).get("workspace_id") or os.environ.get("COZELOOP_WORKSPACE_ID", "") or _DEFAULT_WORKSPACE_ID
783
+ os.environ["COZELOOP_WORKSPACE_ID"] = workspace_id
784
+ client_kwargs = {
785
+ "ultra_large_report": True,
786
+ "upload_timeout": 120,
787
+ "trace_finish_event_processor": _make_finish_event_processor(),
788
+ }
789
+ if workspace_id:
790
+ client_kwargs["workspace_id"] = workspace_id
791
+ if token:
792
+ client_kwargs["api_token"] = token
793
+ client = cozeloop.new_client(**client_kwargs)
794
+
795
+ try:
796
+ with client.start_span(name="claude_code_request", span_type="main") as root_span:
797
+ root_span.set_runtime(Runtime(library="claude-code"))
798
+ root_tags = {
799
+ "thread_id": session_id,
800
+ "total_turns": len(turns),
801
+ "source": "claude_code"
802
+ }
803
+ root_baggage = {
804
+ "thread_id": session_id,
805
+ }
806
+ # Inject coze-context kv (last occurrence across turns wins).
807
+ coze_tags = {}
808
+ for turn in turns:
809
+ um = turn.get("user_message", {}).get("message", {})
810
+ t = coze_context_tags(um.get("content") if um else None)
811
+ if t:
812
+ coze_tags = t
813
+ if coze_tags:
814
+ root_tags.update(coze_tags)
815
+ root_baggage.update(coze_tags)
816
+ root_span.set_tags(root_tags)
817
+ root_span.set_baggage(root_baggage)
818
+
819
+ # Set root span input: first user message across all turns
820
+ first_user_content = None
821
+ for turn in turns:
822
+ um = turn.get("user_message", {}).get("message", {})
823
+ uc = um.get("content") if um else None
824
+ if not is_empty_content(uc):
825
+ first_user_content = uc
826
+ break
827
+ if first_user_content is not None:
828
+ root_span.set_input(format_content(first_user_content))
829
+
830
+ # Build cumulative history from previously processed turns
831
+ history_messages = _build_history_messages(history_turns)
832
+
833
+ # Process each turn as a child span under the root
834
+ for i, turn in enumerate(turns):
835
+ try:
836
+ steps = turn.get("steps", [])
837
+ total_steps = len(steps)
838
+
839
+ with client.start_span(name=f"turn_{i}", span_type="main") as turn_span:
840
+ turn_span.set_runtime(Runtime(library="claude-code"))
841
+ turn_span.set_tags({
842
+ "thread_id": session_id,
843
+ "turn_index": i,
844
+ "total_steps": total_steps,
845
+ "source": "claude_code",
846
+ })
847
+
848
+ # Extract user input for this turn
849
+ user_message = turn.get("user_message", {}).get("message", {})
850
+ user_raw_content = user_message.get("content") if user_message else None
851
+
852
+ # Build input context for the first model call in this turn
853
+ input_messages = list(history_messages)
854
+ if not is_empty_content(user_raw_content):
855
+ input_messages.append(_make_message("user", format_content(user_raw_content)))
856
+
857
+ # Process each step: model_span + tool_spans
858
+ for j, step in enumerate(steps):
859
+ assistant_msg = step.get("assistant_message", {})
860
+ assistant_message_obj = assistant_msg.get("message", {})
861
+ raw_content = assistant_message_obj.get("content", [])
862
+ model_name = assistant_message_obj.get("model", "claude-code")
863
+
864
+ # --- Create model span for this step ---
865
+ with client.start_span(name=f"model_call_{j}", span_type="model") as model_span:
866
+ model_span.set_runtime(Runtime(library="claude-code"))
867
+ model_span.set_model_name(model_name)
868
+
869
+ # Set input: accumulated context up to this point
870
+ model_span.set_input(ModelInput(
871
+ messages=list(input_messages),
872
+ tools=[],
873
+ tool_choice=ModelToolChoice(type="", function=None)
874
+ ))
875
+
876
+ # Build output: text -> parts, tool_use -> tool_calls, thinking -> reasoning_content
877
+ text_parts = []
878
+ tool_call_list = []
879
+ parts_list = []
880
+ thinking_parts = []
881
+ if isinstance(raw_content, list):
882
+ for item in raw_content:
883
+ if not isinstance(item, dict):
884
+ continue
885
+ item_type = item.get("type", "")
886
+ if item_type == "text":
887
+ text = item.get("text", "")
888
+ if text:
889
+ text_parts.append(text)
890
+ parts_list.append(ModelMessagePart(type=ModelMessagePartType.TEXT, text=text))
891
+ elif item_type == "thinking":
892
+ thinking = item.get("thinking", "")
893
+ if thinking:
894
+ thinking_parts.append(thinking)
895
+ elif item_type == "redacted_thinking":
896
+ pass # encrypted, cannot extract
897
+ elif item_type == "tool_use":
898
+ tool_call_list.append(ModelToolCall(
899
+ id=item.get("id", ""),
900
+ type="function",
901
+ function=ModelToolCallFunction(
902
+ name=item.get("name", ""),
903
+ arguments=json.dumps(item.get("input", {}), ensure_ascii=False) if isinstance(item.get("input"), dict) else str(item.get("input", ""))
904
+ )
905
+ ))
906
+ else:
907
+ parts_list.append(ModelMessagePart(
908
+ type=ModelMessagePartType.TEXT,
909
+ text=json.dumps(item, ensure_ascii=False)[:4096]
910
+ ))
911
+ elif isinstance(raw_content, str) and raw_content:
912
+ text_parts.append(raw_content)
913
+
914
+ content_text = "\n".join(text_parts) if text_parts else ""
915
+ reasoning_text = "\n".join(thinking_parts) if thinking_parts else ""
916
+ finish_reason = "tool_calls" if tool_call_list else "stop"
917
+
918
+ output_choice = ModelChoice(
919
+ finish_reason=finish_reason,
920
+ index=0,
921
+ message=ModelMessage(
922
+ role="assistant",
923
+ content=content_text,
924
+ reasoning_content=reasoning_text,
925
+ parts=parts_list,
926
+ name="",
927
+ tool_calls=tool_call_list if tool_call_list else [],
928
+ tool_call_id="",
929
+ metadata={}
930
+ )
931
+ )
932
+
933
+ model_span.set_output(ModelOutput(choices=[output_choice]))
934
+
935
+ # Set token usage for this specific model call
936
+ usage = assistant_message_obj.get("usage", {})
937
+ input_tokens = usage.get("input_tokens", 0)
938
+ output_tokens = usage.get("output_tokens", 0)
939
+ cache_creation = usage.get("cache_creation_input_tokens", 0)
940
+ cache_read = usage.get("cache_read_input_tokens", 0)
941
+ if input_tokens > 0 or cache_creation > 0 or cache_read > 0:
942
+ model_span.set_input_tokens(input_tokens + cache_creation + cache_read)
943
+ if output_tokens > 0:
944
+ model_span.set_output_tokens(output_tokens)
945
+
946
+ # Add this assistant message to context for subsequent steps
947
+ if not is_empty_content(raw_content):
948
+ input_messages.extend(_raw_content_to_input_message(raw_content, "assistant"))
949
+
950
+ # --- Create tool spans for each tool call in this step ---
951
+ for tool_call in step.get("tool_calls", []):
952
+ tool_name = tool_call.get('name', 'unknown')
953
+ sub_steps = tool_call.get("_sub_steps", [])
954
+ agent_id = tool_call.get("_agent_id", "")
955
+ is_agent = bool(sub_steps)
956
+
957
+ # Task tool with sub-agent steps uses "agent" span type
958
+ span_type = "agent" if is_agent else "tool"
959
+ span_name = f"agent_{tool_name}" if is_agent else f"tool_{tool_name}"
960
+
961
+ with client.start_span(name=span_name, span_type=span_type) as tool_span:
962
+ tool_span.set_runtime(Runtime(library="claude-code"))
963
+ tags = {
964
+ "tool_name": tool_name,
965
+ "tool_call_id": tool_call.get("id"),
966
+ "step_index": j,
967
+ }
968
+ if is_agent:
969
+ tags["agent_name"] = agent_id
970
+ tool_span.set_tags(tags)
971
+ tool_span.set_input(
972
+ json.dumps(tool_call.get("input", {}), ensure_ascii=False)[:2000]
973
+ )
974
+
975
+ # Find matching tool result
976
+ tool_id = tool_call.get("id")
977
+ for result in step.get("tool_results", []):
978
+ if result.get("tool_use_id") == tool_id:
979
+ result_content = result.get("content", "")
980
+ tool_span.set_output(_format_tool_output(result_content))
981
+ break
982
+
983
+ # If this tool call has sub-agent steps (e.g. Task tool),
984
+ # create child spans for each sub-agent model call and tool call.
985
+ if sub_steps:
986
+ # Initialize sub-agent input with the prompt (first user message)
987
+ sub_input_messages = []
988
+ task_prompt = tool_call.get("input", {}).get("prompt", "")
989
+ if task_prompt:
990
+ sub_input_messages.append(_make_message("user", format_content(task_prompt)))
991
+
992
+ # Distribute total usage evenly across sub-agent model steps.
993
+ total_usage = tool_call.get("_total_usage", {})
994
+ total_in = (total_usage.get("input_tokens", 0)
995
+ + total_usage.get("cache_creation_input_tokens", 0)
996
+ + total_usage.get("cache_read_input_tokens", 0))
997
+ total_out = total_usage.get("output_tokens", 0)
998
+ n_model_steps = len(sub_steps)
999
+ per_step_in = total_in // n_model_steps if n_model_steps > 0 else 0
1000
+ per_step_out = total_out // n_model_steps if n_model_steps > 0 else 0
1001
+ # Give remainder to the last step
1002
+ remainder_in = total_in - per_step_in * n_model_steps if n_model_steps > 0 else 0
1003
+ remainder_out = total_out - per_step_out * n_model_steps if n_model_steps > 0 else 0
1004
+
1005
+ for sk, sub_step in enumerate(sub_steps):
1006
+ sub_asst = sub_step.get("assistant_message", {}).get("message", {})
1007
+ sub_content = sub_asst.get("content", [])
1008
+ sub_model = sub_asst.get("model") or "claude-code"
1009
+
1010
+ # Sub-agent model span
1011
+ with client.start_span(name=f"subagent_model_{sk}", span_type="model") as sub_model_span:
1012
+ sub_model_span.set_runtime(Runtime(library="claude-code"))
1013
+ sub_model_span.set_model_name(sub_model)
1014
+ sub_model_span.set_tags({"agent_name": agent_id})
1015
+
1016
+ # Set input: accumulated sub-agent context
1017
+ sub_model_span.set_input(ModelInput(
1018
+ messages=list(sub_input_messages),
1019
+ tools=[],
1020
+ tool_choice=ModelToolChoice(type="", function=None)
1021
+ ))
1022
+
1023
+ # Build output for sub-agent model call
1024
+ sub_text_parts = []
1025
+ sub_tc_list = []
1026
+ sub_parts_list = []
1027
+ sub_thinking_parts = []
1028
+ if isinstance(sub_content, list):
1029
+ for item in sub_content:
1030
+ if not isinstance(item, dict):
1031
+ continue
1032
+ item_type = item.get("type", "")
1033
+ if item_type == "text":
1034
+ t = item.get("text", "")
1035
+ if t:
1036
+ sub_text_parts.append(t)
1037
+ sub_parts_list.append(ModelMessagePart(type=ModelMessagePartType.TEXT, text=t))
1038
+ elif item_type == "thinking":
1039
+ t = item.get("thinking", "")
1040
+ if t:
1041
+ sub_thinking_parts.append(t)
1042
+ elif item_type == "redacted_thinking":
1043
+ pass
1044
+ elif item_type == "tool_use":
1045
+ sub_tc_list.append(ModelToolCall(
1046
+ id=item.get("id", ""),
1047
+ type="function",
1048
+ function=ModelToolCallFunction(
1049
+ name=item.get("name", ""),
1050
+ arguments=json.dumps(item.get("input", {}), ensure_ascii=False) if isinstance(item.get("input"), dict) else str(item.get("input", ""))
1051
+ )
1052
+ ))
1053
+ else:
1054
+ sub_parts_list.append(ModelMessagePart(
1055
+ type=ModelMessagePartType.TEXT,
1056
+ text=json.dumps(item, ensure_ascii=False)[:4096]
1057
+ ))
1058
+
1059
+ sub_content_text = "\n".join(sub_text_parts) if sub_text_parts else ""
1060
+ sub_reasoning_text = "\n".join(sub_thinking_parts) if sub_thinking_parts else ""
1061
+ sub_finish = "tool_calls" if sub_tc_list else "stop"
1062
+ sub_model_span.set_output(ModelOutput(choices=[ModelChoice(
1063
+ finish_reason=sub_finish,
1064
+ index=0,
1065
+ message=ModelMessage(
1066
+ role="assistant",
1067
+ content=sub_content_text,
1068
+ reasoning_content=sub_reasoning_text,
1069
+ parts=sub_parts_list,
1070
+ name="",
1071
+ tool_calls=sub_tc_list if sub_tc_list else [],
1072
+ tool_call_id="",
1073
+ metadata={}
1074
+ )
1075
+ )]))
1076
+
1077
+ # Distribute tokens evenly; remainder goes to last step
1078
+ step_in = per_step_in + (remainder_in if sk == n_model_steps - 1 else 0)
1079
+ step_out = per_step_out + (remainder_out if sk == n_model_steps - 1 else 0)
1080
+ if step_in > 0:
1081
+ sub_model_span.set_input_tokens(step_in)
1082
+ if step_out > 0:
1083
+ sub_model_span.set_output_tokens(step_out)
1084
+
1085
+ # Add assistant output to sub-agent context
1086
+ if not is_empty_content(sub_content):
1087
+ sub_input_messages.extend(
1088
+ _raw_content_to_input_message(sub_content, "assistant")
1089
+ )
1090
+
1091
+ # Sub-agent tool spans
1092
+ for sub_tc in sub_step.get("tool_calls", []):
1093
+ with client.start_span(name=f"tool_{sub_tc.get('name', 'unknown')}", span_type="tool") as sub_tool_span:
1094
+ sub_tool_span.set_tags({
1095
+ "tool_name": sub_tc.get("name"),
1096
+ "tool_call_id": sub_tc.get("id"),
1097
+ "agent_name": agent_id,
1098
+ })
1099
+ sub_tool_span.set_runtime(Runtime(library="claude-code"))
1100
+ sub_tool_span.set_input(
1101
+ json.dumps(sub_tc.get("input", {}), ensure_ascii=False)[:2000]
1102
+ )
1103
+
1104
+ sub_tool_id = sub_tc.get("id")
1105
+ for sub_result in sub_step.get("tool_results", []):
1106
+ if sub_result.get("tool_use_id") == sub_tool_id:
1107
+ sr_content = sub_result.get("content", "")
1108
+ sub_tool_span.set_output(_format_tool_output(sr_content))
1109
+ break
1110
+
1111
+ # Add tool results to sub-agent context
1112
+ for sub_result in sub_step.get("tool_results", []):
1113
+ sr_content = sub_result.get("content", "")
1114
+ sub_input_messages.append(_make_tool_result_message(
1115
+ sr_content,
1116
+ tool_call_id=sub_result.get("tool_use_id", "")
1117
+ ))
1118
+
1119
+ # Add tool results to context for subsequent model calls
1120
+ for result in step.get("tool_results", []):
1121
+ result_content = result.get("content", "")
1122
+ input_messages.append(_make_tool_result_message(
1123
+ result_content,
1124
+ tool_call_id=result.get("tool_use_id", "")
1125
+ ))
1126
+
1127
+ # Append this turn's messages to history for subsequent turns
1128
+ if user_message and not is_empty_content(user_message.get("content")):
1129
+ history_messages.append(_make_message(
1130
+ "user", format_content(user_message.get("content"))
1131
+ ))
1132
+ for step in steps:
1133
+ msg = step.get("assistant_message", {})
1134
+ asst_content = msg.get("message", {}).get("content")
1135
+ if not is_empty_content(asst_content):
1136
+ history_messages.extend(_raw_content_to_input_message(asst_content, "assistant"))
1137
+ for tr in step.get("tool_results", []):
1138
+ tr_content = tr.get("content", "")
1139
+ history_messages.append(_make_tool_result_message(
1140
+ tr_content,
1141
+ tool_call_id=tr.get("tool_use_id", "")
1142
+ ))
1143
+
1144
+ except Exception as e:
1145
+ debug_log(f"Error processing turn {i}: {e}")
1146
+ continue
1147
+
1148
+ # Set root span output: last assistant text from the last step of the last turn
1149
+ last_output = None
1150
+ for turn in reversed(turns):
1151
+ for step in reversed(turn.get("steps", [])):
1152
+ asst = step.get("assistant_message", {}).get("message", {})
1153
+ content = asst.get("content", [])
1154
+ if isinstance(content, list):
1155
+ text_parts = [
1156
+ item.get("text", "")
1157
+ for item in content
1158
+ if isinstance(item, dict) and item.get("type") == "text" and item.get("text")
1159
+ ]
1160
+ if text_parts:
1161
+ last_output = "\n".join(text_parts)
1162
+ break
1163
+ elif isinstance(content, str) and content.strip():
1164
+ last_output = content
1165
+ break
1166
+ if last_output:
1167
+ break
1168
+ if last_output:
1169
+ root_span.set_output(format_content(last_output))
1170
+
1171
+ debug_log(f"Successfully processed {len(turns)} turn(s) for session {session_id}")
1172
+
1173
+ except Exception as e:
1174
+ print(f"[CozeLoop] 上报失败 ✗ {e}", file=sys.stderr)
1175
+ debug_log(f"An error occurred while sending traces to CozeLoop: {e}")
1176
+ finally:
1177
+ # Crucial: close the client to ensure all buffered traces are sent.
1178
+ client.close()
1179
+ debug_log("CozeLoop client closed.")
1180
+
1181
+
1182
+ # --- Hook Input ---
1183
+
1184
+ def read_hook_stdin() -> Dict[str, Any]:
1185
+ """Read hook input from stdin (non-blocking).
1186
+
1187
+ Claude Code passes a JSON payload via stdin to hooks, containing fields like
1188
+ transcript_path, session_id, hook_event_name, etc.
1189
+ Returns empty dict if stdin is empty or not valid JSON.
1190
+ """
1191
+ try:
1192
+ if not sys.stdin.isatty():
1193
+ data = sys.stdin.read().strip()
1194
+ if data:
1195
+ result = json.loads(data)
1196
+ debug_log(f"Read hook stdin: keys={list(result.keys())}")
1197
+ return result
1198
+ except Exception as e:
1199
+ debug_log(f"Error reading hook stdin: {e}")
1200
+ return {}
1201
+
1202
+
1203
+ # --- Main Execution ---
1204
+
1205
+ def main():
1206
+ """Main entry point for the hook script."""
1207
+ print("[CozeLoop] Hook triggered.", file=sys.stderr)
1208
+ debug_log("Hook started.")
1209
+
1210
+ # Check if tracing is enabled
1211
+ if os.environ.get("TRACE_TO_COZELOOP", "").lower() == "false":
1212
+ debug_log("TRACE_TO_COZELOOP is set to 'false', skipping")
1213
+ return
1214
+
1215
+ # Read hook input from stdin (Claude Code provides transcript_path, session_id, etc.)
1216
+ hook_input = read_hook_stdin()
1217
+
1218
+ # Determine conversation file: prefer stdin, fallback to file scan
1219
+ conversation_file = hook_input.get("transcript_path")
1220
+ if conversation_file:
1221
+ conversation_file = os.path.expanduser(conversation_file)
1222
+ if not os.path.exists(conversation_file):
1223
+ debug_log(f"transcript_path from stdin does not exist: {conversation_file}")
1224
+ conversation_file = None
1225
+
1226
+ if not conversation_file:
1227
+ conversation_file = find_latest_conversation_file()
1228
+
1229
+ if not conversation_file:
1230
+ debug_log("Execution skipped: No conversation file found.")
1231
+ return
1232
+
1233
+ debug_log(f"Using conversation file: {conversation_file}")
1234
+ print(f"[CozeLoop] 读取会话文件: {conversation_file}", file=sys.stderr)
1235
+
1236
+ # Load state to know where to start reading
1237
+ state_file = get_state_file_path(conversation_file)
1238
+ state = load_state(state_file)
1239
+ last_processed_line = state.get("last_processed_line", 0)
1240
+
1241
+ # Read new messages from the file
1242
+ new_messages = read_new_messages(conversation_file, last_processed_line)
1243
+
1244
+ # Determine session ID: prefer stdin, then messages, then state, then generate
1245
+ session_id = hook_input.get("session_id")
1246
+ if not session_id:
1247
+ for msg in new_messages:
1248
+ if msg.get("sessionId"):
1249
+ session_id = msg.get("sessionId")
1250
+ break
1251
+ if not session_id:
1252
+ if state.get("session_id"):
1253
+ session_id = state["session_id"]
1254
+ else:
1255
+ session_id = f"claude-code-{datetime.now().strftime('%Y%m%d-%H%M%S')}-{os.getpid()}"
1256
+ debug_log(f"Generated new session ID: {session_id}")
1257
+
1258
+ state["session_id"] = session_id
1259
+ debug_log(f"Session ID: {session_id}")
1260
+
1261
+ if not new_messages:
1262
+ debug_log("No new messages to process.")
1263
+ return
1264
+
1265
+ debug_log(f"Found {len(new_messages)} new messages.")
1266
+
1267
+ # Read historical messages to build context for model input
1268
+ history_turns = []
1269
+ if last_processed_line > 0:
1270
+ historical_messages = read_new_messages(conversation_file, 0)
1271
+ historical_messages = [m for m in historical_messages if m.get("_line_number", 0) < last_processed_line]
1272
+ history_turns = group_messages_into_turns(historical_messages)
1273
+ debug_log(f"Loaded {len(history_turns)} historical turn(s) for context.")
1274
+
1275
+ # Group messages into turns and send to CozeLoop — only if coze-context present.
1276
+ turns = group_messages_into_turns(new_messages)
1277
+ if turns:
1278
+ has_coze_ctx = any(
1279
+ coze_context_tags(
1280
+ (turn.get("user_message", {}).get("message", {}) or {}).get("content")
1281
+ )
1282
+ for turn in turns
1283
+ )
1284
+ if not has_coze_ctx:
1285
+ debug_log("No coze-context found in any turn, skipping upload.")
1286
+ return
1287
+ print(f"[CozeLoop] 开始上报: session={session_id}, turns={len(turns)}", file=sys.stderr)
1288
+ send_turns_to_cozeloop(turns, session_id, history_turns)
1289
+
1290
+ # Update state with the new last processed line number
1291
+ last_line_in_batch = max(msg.get("_line_number", 0) for msg in new_messages)
1292
+ state["last_processed_line"] = last_line_in_batch + 1
1293
+ save_state(state_file, state)
1294
+ print(f"[CozeLoop] 上报完成 ✓ session={session_id}, turns={len(turns)}", file=sys.stderr)
1295
+ debug_log(f"State updated. Last processed line: {state['last_processed_line']}")
1296
+
1297
+ debug_log("Hook finished.")
1298
+
1299
+ if __name__ == "__main__":
1300
+ main()
1301
+
1302
+
1303
+