coze_lab 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1051 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ CozeLoop Hook for Codex CLI
4
+
5
+ This hook integrates OpenAI Codex CLI with CozeLoop for tracing and observability.
6
+ It captures conversation interactions from the rollout JSONL file and sends them
7
+ to the CozeLoop platform for analysis.
8
+
9
+ Usage:
10
+ 1. Copy this script to ~/.codex/hooks/cozeloop_hook.py
11
+ 2. Register the hook in ~/.codex/hooks.json
12
+ 3. Set environment variables: COZELOOP_WORKSPACE_ID, COZELOOP_API_TOKEN
13
+ 4. Run Codex CLI as normal - traces will be sent automatically on each turn end
14
+
15
+ Hook input (via stdin):
16
+ {
17
+ "hook_event_name": "Stop",
18
+ "session_id": "...",
19
+ "turn_id": "...",
20
+ "transcript_path": "/Users/.../.codex/sessions/YYYY/MM/DD/rollout-xxx.jsonl"
21
+ }
22
+
23
+ Subagent support:
24
+ When Codex spawns subagents, each subagent gets its own rollout file with:
25
+ session_meta.source = {"subagent": {"thread_spawn": {"parent_thread_id": "..."}}}
26
+ Subagent hooks do NOT report traces directly. Instead they save their
27
+ processed turn data to a per-agent file under ~/.codex/cozeloop_state/.
28
+ When the parent session's hook runs, it reads those saved files and includes
29
+ the subagent spans inside the same trace, producing a single trace per
30
+ conversation that contains both the main agent and all of its subagents.
31
+ """
32
+
33
+ import json
34
+ import os
35
+ import sys
36
+ import hashlib
37
+ import time
38
+ import urllib.request
39
+ import urllib.error
40
+ from datetime import datetime
41
+ from pathlib import Path
42
+ from typing import Optional, List, Dict, Any
43
+
44
+ # --- Token refresh --------------------------------------------------------
45
+ _COZELOOP_CLIENT_ID = "56089404009908161803155625287505.app.coze"
46
+ _COZE_API = "https://api.coze.cn"
47
+ _REFRESH_THRESHOLD = 10 * 60
48
+ _DEFAULT_WORKSPACE_ID = "7644910356078837760" # hardcoded spaceID fallback
49
+
50
+
51
+ # --- coze-context parsing -------------------------------------------------
52
+ # User messages may embed a block like:
53
+ # <coze-context>
54
+ # account_id: 0
55
+ # agent_id: 7644920552473395499
56
+ # session_id: 7644919579054997796
57
+ # message_id: 04dd5246-...
58
+ # </coze-context>
59
+ # We parse its key:value pairs and inject them into the trace.
60
+
61
+ _COZE_CTX_OPEN = "<coze-context>"
62
+ _COZE_CTX_CLOSE = "</coze-context>"
63
+
64
+
65
+ def parse_coze_context(text: str) -> Dict[str, str]:
66
+ """Extract the LAST <coze-context> block's key:value pairs from text.
67
+
68
+ Returns {} if no block is present. Tag keys are prefixed with
69
+ 'coze_' by the caller; here we return raw keys as written.
70
+ """
71
+ if not text or _COZE_CTX_OPEN not in text:
72
+ return {}
73
+ # Take the last occurrence (latest context wins).
74
+ open_idx = text.rfind(_COZE_CTX_OPEN)
75
+ close_idx = text.find(_COZE_CTX_CLOSE, open_idx)
76
+ if close_idx == -1:
77
+ return {}
78
+ body = text[open_idx + len(_COZE_CTX_OPEN):close_idx]
79
+ # The block may arrive with real newlines, OR with literal backslash-n
80
+ # (e.g. when the whole message is an embedded JSON string that was never
81
+ # un-escaped). Normalize both forms before splitting into lines.
82
+ body = body.replace("\\r\\n", "\n").replace("\\n", "\n").replace("\\r", "\n")
83
+ result: Dict[str, str] = {}
84
+ for line in body.splitlines():
85
+ line = line.strip()
86
+ if not line or ":" not in line:
87
+ continue
88
+ key, _, value = line.partition(":")
89
+ key = key.strip()
90
+ value = value.strip()
91
+ if key:
92
+ result[key] = value
93
+ return result
94
+
95
+
96
+ def coze_context_tags(text: str) -> Dict[str, str]:
97
+ """Return coze-context kv as trace tags, prefixed with 'coze_'."""
98
+ return {f"coze_{k}": v for k, v in parse_coze_context(text).items()}
99
+
100
+
101
+ # --- trace upload failure / logid capture ---------------------------------
102
+ def _extract_logid(msg: str) -> str:
103
+ """Pull the server logid out of an SDK error message, if present.
104
+
105
+ SDK failure messages embed it as 'logid=XXXX' (sometimes within brackets).
106
+ """
107
+ if not msg:
108
+ return ""
109
+ marker = "logid="
110
+ idx = msg.find(marker)
111
+ if idx == -1:
112
+ return ""
113
+ rest = msg[idx + len(marker):]
114
+ logid = []
115
+ for ch in rest:
116
+ if ch.isalnum():
117
+ logid.append(ch)
118
+ else:
119
+ break
120
+ return "".join(logid)
121
+
122
+
123
+ def _make_finish_event_processor():
124
+ """Return a trace_finish_event_processor that surfaces failures + logid.
125
+
126
+ The CozeLoop SDK calls this for each flush event; on failure we print the
127
+ server logid to stderr so it can be handed to platform support for tracing
128
+ the root cause (e.g. via `bytedcli log get-logid-log <logid>`).
129
+ """
130
+ def _processor(info):
131
+ try:
132
+ if not getattr(info, "is_event_fail", False):
133
+ return
134
+ detail = getattr(info, "detail_msg", "") or ""
135
+ logid = _extract_logid(detail)
136
+ if logid:
137
+ print(f"[CozeLoop] 上报失败 logid={logid} (可用 bytedcli log get-logid-log {logid} 排查)", file=sys.stderr)
138
+ else:
139
+ print(f"[CozeLoop] 上报失败: {detail[:300]}", file=sys.stderr)
140
+ except Exception:
141
+ pass
142
+ return _processor
143
+
144
+
145
+
146
+ def _get_credentials_path() -> Path:
147
+ return Path.home() / ".cozeloop" / "credentials.json"
148
+
149
+ def _load_credentials():
150
+ path = _get_credentials_path()
151
+ if not path.exists():
152
+ return None
153
+ try:
154
+ return json.loads(path.read_text())
155
+ except Exception:
156
+ return None
157
+
158
+ def _save_credentials(creds):
159
+ path = _get_credentials_path()
160
+ path.parent.mkdir(parents=True, exist_ok=True)
161
+ path.write_text(json.dumps(creds, indent=2))
162
+ os.chmod(path, 0o600)
163
+
164
+ def _refresh_token(refresh_tok: str):
165
+ try:
166
+ payload = json.dumps({
167
+ "grant_type": "refresh_token",
168
+ "client_id": _COZELOOP_CLIENT_ID,
169
+ "refresh_token": refresh_tok,
170
+ }).encode()
171
+ req = urllib.request.Request(
172
+ f"{_COZE_API}/api/permission/oauth2/token",
173
+ data=payload,
174
+ headers={"Content-Type": "application/json"},
175
+ )
176
+ with urllib.request.urlopen(req, timeout=10) as resp:
177
+ data = json.loads(resp.read())
178
+ if data.get("access_token"):
179
+ creds = {
180
+ "access_token": data["access_token"],
181
+ "refresh_token": data.get("refresh_token", refresh_tok),
182
+ "expires_at": data.get("expires_in", 0) * 1000 # unix timestamp in seconds
183
+ }
184
+ _save_credentials(creds)
185
+ return creds["access_token"]
186
+ except Exception:
187
+ pass
188
+ return None
189
+
190
+ def get_fresh_token():
191
+ creds = _load_credentials()
192
+ if creds:
193
+ remaining = creds.get("expires_at", 0) / 1000 - time.time()
194
+ if remaining > _REFRESH_THRESHOLD:
195
+ return creds["access_token"]
196
+ if creds.get("refresh_token"):
197
+ new_token = _refresh_token(creds["refresh_token"])
198
+ if new_token:
199
+ return new_token
200
+ return os.environ.get("COZELOOP_API_TOKEN")
201
+ # -------------------------------------------------------------------------
202
+
203
+ # --- SDK Import ---
204
+ try:
205
+ import cozeloop
206
+ from cozeloop.spec.tracespec import (
207
+ Runtime, ModelInput, ModelMessage, ModelToolChoice,
208
+ ModelOutput, ModelChoice, ModelToolCall, ModelToolCallFunction,
209
+ ModelMessagePart, ModelMessagePartType
210
+ )
211
+ except ImportError:
212
+ print("Error: cozeloop SDK not found. Please install it with: pip install cozeloop", file=sys.stderr)
213
+ sys.exit(1)
214
+
215
+ # --- Configuration ---
216
+ DEBUG = os.environ.get("CC_COZELOOP_DEBUG", "").lower() == "true"
217
+
218
+
219
+ def debug_log(message: str):
220
+ """Print debug message if debug mode is enabled."""
221
+ if DEBUG:
222
+ print(f"[COZELOOP_HOOK_DEBUG] {datetime.now().isoformat()} - {message}", file=sys.stderr)
223
+
224
+
225
+ # --- State Management ---
226
+
227
+ def get_state_file_path(transcript_path: str) -> str:
228
+ """Get the state file path for tracking processed lines."""
229
+ state_dir = Path.home() / ".codex" / "cozeloop_state"
230
+ state_dir.mkdir(parents=True, exist_ok=True)
231
+ file_hash = hashlib.md5(transcript_path.encode()).hexdigest()[:12]
232
+ return str(state_dir / f"state_{file_hash}.json")
233
+
234
+
235
+ def get_subagent_data_file(agent_session_id: str) -> str:
236
+ """Get the file path for storing subagent turn data."""
237
+ state_dir = Path.home() / ".codex" / "cozeloop_state"
238
+ state_dir.mkdir(parents=True, exist_ok=True)
239
+ return str(state_dir / f"subagent_{agent_session_id}.json")
240
+
241
+
242
+ def save_subagent_data(agent_session_id: str, data: Dict[str, Any]):
243
+ """Save subagent turn data for later inclusion by parent hook."""
244
+ path = get_subagent_data_file(agent_session_id)
245
+ try:
246
+ with open(path, "w") as f:
247
+ json.dump(data, f, ensure_ascii=False)
248
+ debug_log(f"Saved subagent data for {agent_session_id}")
249
+ except Exception as e:
250
+ debug_log(f"Error saving subagent data for {agent_session_id}: {e}")
251
+
252
+
253
+ def load_subagent_data(agent_session_id: str) -> Optional[Dict[str, Any]]:
254
+ """Load previously saved subagent turn data."""
255
+ path = get_subagent_data_file(agent_session_id)
256
+ if os.path.exists(path):
257
+ try:
258
+ with open(path, "r") as f:
259
+ return json.load(f)
260
+ except Exception as e:
261
+ debug_log(f"Error loading subagent data for {agent_session_id}: {e}")
262
+ return None
263
+
264
+
265
+ def load_state(state_file: str) -> Dict[str, Any]:
266
+ """Load the processing state from file."""
267
+ if os.path.exists(state_file):
268
+ try:
269
+ with open(state_file, 'r') as f:
270
+ return json.load(f)
271
+ except Exception as e:
272
+ debug_log(f"Error loading state: {e}")
273
+ return {"last_processed_line": 0, "session_id": None, "conversation_history": []}
274
+
275
+
276
+ def save_state(state_file: str, state: Dict[str, Any]):
277
+ """Save the processing state to file."""
278
+ try:
279
+ with open(state_file, 'w') as f:
280
+ json.dump(state, f)
281
+ except Exception as e:
282
+ debug_log(f"Error saving state: {e}")
283
+
284
+
285
+ # --- Rollout File Parsing ---
286
+
287
+ def read_rollout_messages(transcript_path: str, start_line: int = 0) -> List[Dict[str, Any]]:
288
+ """Read raw JSONL entries from the rollout file starting from a given line."""
289
+ entries = []
290
+ try:
291
+ with open(transcript_path, 'r') as f:
292
+ for i, line in enumerate(f):
293
+ if i < start_line:
294
+ continue
295
+ line = line.strip()
296
+ if line:
297
+ try:
298
+ entry = json.loads(line)
299
+ entry['_line_number'] = i
300
+ entries.append(entry)
301
+ except json.JSONDecodeError as e:
302
+ debug_log(f"Error parsing line {i}: {e}")
303
+ except Exception as e:
304
+ debug_log(f"Error reading rollout file: {e}")
305
+ return entries
306
+
307
+
308
+ def parse_session_meta(entries: List[Dict[str, Any]]) -> Dict[str, Any]:
309
+ """Extract session identity from session_meta entry."""
310
+ result = {
311
+ "session_id": None,
312
+ "parent_session_id": None,
313
+ "agent_nickname": None,
314
+ "agent_role": None,
315
+ "is_subagent": False,
316
+ "subagent_content_start_line": None,
317
+ }
318
+ for entry in entries:
319
+ if entry.get("type") != "session_meta":
320
+ continue
321
+ p = entry.get("payload", {})
322
+ result["session_id"] = p.get("id")
323
+ result["agent_nickname"] = p.get("agent_nickname")
324
+ result["agent_role"] = p.get("agent_role")
325
+
326
+ source = p.get("source", "")
327
+ if isinstance(source, dict):
328
+ thread_spawn = source.get("subagent", {}).get("thread_spawn", {})
329
+ parent_id = thread_spawn.get("parent_thread_id")
330
+ if parent_id:
331
+ result["parent_session_id"] = parent_id
332
+ result["is_subagent"] = True
333
+ break
334
+
335
+ if result["is_subagent"]:
336
+ meta_count = 0
337
+ for entry in entries:
338
+ if entry.get("type") == "session_meta":
339
+ meta_count += 1
340
+ if meta_count == 2:
341
+ result["subagent_content_start_line"] = entry.get("_line_number", 0) + 1
342
+ break
343
+
344
+ return result
345
+
346
+
347
+ # --- Message Content Helpers ---
348
+
349
+ def is_real_user_message(payload: Dict[str, Any]) -> bool:
350
+ """Check whether a response_item/message(user) entry is a real user input."""
351
+ if payload.get("role") != "user":
352
+ return False
353
+ content = payload.get("content", [])
354
+ if not isinstance(content, list):
355
+ return False
356
+
357
+ for item in content:
358
+ if not isinstance(item, dict):
359
+ continue
360
+ if item.get("type") != "input_text":
361
+ continue
362
+ text = item.get("text", "")
363
+ if text.startswith("<environment_context>"):
364
+ continue
365
+ if text.startswith("<permissions instructions>"):
366
+ continue
367
+ if text.startswith("<turn_aborted>"):
368
+ continue
369
+ if text.strip():
370
+ return True
371
+
372
+ return False
373
+
374
+
375
+ def extract_user_text(payload: Dict[str, Any]) -> str:
376
+ """Extract the visible text from a user message payload."""
377
+ parts = []
378
+ for item in payload.get("content", []):
379
+ if isinstance(item, dict) and item.get("type") == "input_text":
380
+ text = item.get("text", "")
381
+ if (not text.startswith("<environment_context>") and
382
+ not text.startswith("<permissions instructions>") and
383
+ not text.startswith("<turn_aborted>")):
384
+ parts.append(text)
385
+ return "\n".join(parts)
386
+
387
+
388
+ def extract_assistant_text(payload: Dict[str, Any]) -> str:
389
+ """Extract visible text from an assistant message payload."""
390
+ parts = []
391
+ for item in payload.get("content", []):
392
+ if isinstance(item, dict) and item.get("type") in ("output_text", "text"):
393
+ parts.append(item.get("text", ""))
394
+ return "\n".join(parts)
395
+
396
+
397
+ def extract_message_content_text(payload: Dict[str, Any]) -> str:
398
+ """Extract all text content from a message payload regardless of role."""
399
+ parts = []
400
+ for item in payload.get("content", []):
401
+ if not isinstance(item, dict):
402
+ continue
403
+ text = item.get("text", "")
404
+ if text:
405
+ parts.append(text)
406
+ return "\n".join(parts)
407
+
408
+
409
+ def truncate_text(text: str, limit: int = 12000) -> str:
410
+ """Truncate text to a maximum length."""
411
+ if len(text) <= limit:
412
+ return text
413
+ return text[:limit] + "..."
414
+
415
+
416
+ # --- Message Grouping ---
417
+
418
+ def group_messages_into_turns(entries: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
419
+ """Group raw JSONL entries into conversation turns.
420
+
421
+ Turn lifecycle:
422
+ - Opened by: event_msg / task_started
423
+ - Closed by: event_msg / task_complete (or next task_started)
424
+
425
+ Within a turn we collect:
426
+ - user_message : first real user input
427
+ - assistant_messages : response_item/message role=assistant items
428
+ - tool_calls : response_item/function_call items (excl. spawn/wait_agent)
429
+ - tool_results : response_item/function_call_output items (excl. spawn/wait)
430
+ - input_messages : messages sent as model input up to the user message
431
+ - subagent_calls : spawn_agent calls with their agent_id and final result
432
+ """
433
+ turns = []
434
+ current_turn: Optional[Dict[str, Any]] = None
435
+ pending_calls: Dict[str, Dict[str, Any]] = {}
436
+
437
+ for entry in entries:
438
+ entry_type = entry.get("type")
439
+ payload = entry.get("payload", {})
440
+
441
+ # --- Turn lifecycle events ---
442
+ if entry_type == "event_msg":
443
+ msg_type = payload.get("type")
444
+ if msg_type == "task_started":
445
+ if current_turn is not None:
446
+ turns.append(current_turn)
447
+ current_turn = {
448
+ "turn_id": payload.get("turn_id"),
449
+ "user_message": None,
450
+ "user_message_text": "",
451
+ "assistant_messages": [],
452
+ "tool_calls": [],
453
+ "tool_results": [],
454
+ "input_messages": [],
455
+ "subagent_calls": [],
456
+ "token_usage": {},
457
+ "start_line": entry.get("_line_number", 0),
458
+ }
459
+ pending_calls = {}
460
+ elif msg_type == "task_complete":
461
+ if current_turn is not None:
462
+ turns.append(current_turn)
463
+ current_turn = None
464
+ pending_calls = {}
465
+ elif msg_type == "token_count":
466
+ if current_turn is not None:
467
+ info = payload.get("info") or {}
468
+ current_turn["token_usage"] = info.get("last_token_usage", {})
469
+ continue
470
+
471
+ # --- Content items ---
472
+ if entry_type == "response_item":
473
+ item_type = payload.get("type")
474
+
475
+ if item_type == "message":
476
+ role = payload.get("role")
477
+ if role == "user" and is_real_user_message(payload):
478
+ if current_turn is not None and current_turn["user_message"] is None:
479
+ current_turn["user_message"] = payload
480
+ current_turn["user_message_text"] = extract_user_text(payload)
481
+ if current_turn is not None:
482
+ current_turn["input_messages"].append({
483
+ "role": "user",
484
+ "content": extract_user_text(payload),
485
+ })
486
+ elif role == "assistant":
487
+ if current_turn is not None:
488
+ current_turn["assistant_messages"].append(payload)
489
+ elif role in ("developer", "system"):
490
+ if current_turn is not None:
491
+ current_turn["input_messages"].append({
492
+ "role": role,
493
+ "content": extract_message_content_text(payload),
494
+ })
495
+ else:
496
+ if current_turn is not None:
497
+ text = extract_message_content_text(payload)
498
+ if text:
499
+ current_turn["input_messages"].append({
500
+ "role": role or "user",
501
+ "content": text,
502
+ })
503
+
504
+ elif item_type == "function_call":
505
+ if current_turn is None:
506
+ continue
507
+ call_id = payload.get("call_id")
508
+ name = payload.get("name", "")
509
+ args_raw = payload.get("arguments", "{}")
510
+ try:
511
+ args = json.loads(args_raw)
512
+ except (json.JSONDecodeError, TypeError):
513
+ args = {"_raw": args_raw}
514
+
515
+ if name == "spawn_agent":
516
+ subagent_call = {
517
+ "call_id": call_id,
518
+ "agent_id": None,
519
+ "nickname": None,
520
+ "role": args.get("agent_type"),
521
+ "message": args.get("message", ""),
522
+ "model": args.get("model"),
523
+ "result": None,
524
+ }
525
+ current_turn["subagent_calls"].append(subagent_call)
526
+ pending_calls[call_id] = {"kind": "spawn", "subagent_call": subagent_call}
527
+ elif name == "wait_agent":
528
+ pending_calls[call_id] = {
529
+ "kind": "wait",
530
+ "ids": args.get("ids", []),
531
+ }
532
+ else:
533
+ current_turn["tool_calls"].append({
534
+ "call_id": call_id,
535
+ "name": name,
536
+ "input": args,
537
+ })
538
+ pending_calls[call_id] = {"kind": "tool"}
539
+
540
+ elif item_type == "function_call_output":
541
+ if current_turn is None:
542
+ continue
543
+ call_id = payload.get("call_id")
544
+ raw_output = payload.get("output", "")
545
+
546
+ pending = pending_calls.get(call_id, {})
547
+ kind = pending.get("kind", "tool")
548
+
549
+ if kind == "spawn":
550
+ subagent_call = pending.get("subagent_call")
551
+ if subagent_call is not None:
552
+ try:
553
+ out = json.loads(raw_output) if isinstance(raw_output, str) else raw_output
554
+ subagent_call["agent_id"] = out.get("agent_id")
555
+ subagent_call["nickname"] = out.get("nickname")
556
+ except (json.JSONDecodeError, TypeError, AttributeError):
557
+ pass
558
+
559
+ elif kind == "wait":
560
+ try:
561
+ out = json.loads(raw_output) if isinstance(raw_output, str) else raw_output
562
+ status = out.get("status", {}) if isinstance(out, dict) else {}
563
+ for agent_id, agent_status in status.items():
564
+ result_text = None
565
+ if isinstance(agent_status, dict):
566
+ result_text = agent_status.get("completed")
567
+ for sc in current_turn["subagent_calls"]:
568
+ if sc.get("agent_id") == agent_id and sc.get("result") is None:
569
+ sc["result"] = result_text
570
+ break
571
+ except (json.JSONDecodeError, TypeError, AttributeError):
572
+ pass
573
+
574
+ else:
575
+ current_turn["tool_results"].append({
576
+ "call_id": call_id,
577
+ "output": raw_output,
578
+ })
579
+
580
+ if current_turn is not None:
581
+ turns.append(current_turn)
582
+
583
+ # Drop turns with no user input and no assistant response
584
+ turns = [
585
+ t for t in turns
586
+ if t["user_message"] is not None or t["assistant_messages"]
587
+ ]
588
+
589
+ return turns
590
+
591
+
592
+ # --- CozeLoop Trace Reporting ---
593
+
594
+ def _make_model_message(role: str, content: str = "", tool_calls: list = None,
595
+ tool_call_id: str = "") -> ModelMessage:
596
+ """Helper to create a CozeLoop ModelMessage."""
597
+ return ModelMessage(
598
+ role=role,
599
+ content=content,
600
+ reasoning_content="",
601
+ parts=[],
602
+ name="",
603
+ tool_calls=tool_calls or [],
604
+ tool_call_id=tool_call_id or "",
605
+ metadata={}
606
+ )
607
+
608
+
609
+ def send_turns_to_cozeloop(turns: List[Dict[str, Any]], session_id: str, model_name: str = "codex",
610
+ history_context: Optional[List[Dict[str, Any]]] = None) -> Optional[List[Dict[str, Any]]]:
611
+ """Send conversation turns to CozeLoop for tracing.
612
+
613
+ Span hierarchy:
614
+ root_span (codex_request) [input=user_input, output=final_response]
615
+ +-- turn_span (turn_0, turn_1, ...)
616
+ |-- model_span (assistant_response)
617
+ |-- tool_span (tool calls)
618
+ |-- subagent_span (subagent calls with nested turns)
619
+
620
+ Returns the updated history_context on success, or None on failure.
621
+ """
622
+ if not turns:
623
+ return history_context
624
+
625
+ debug_log(f"Initializing CozeLoop client for session: {session_id}")
626
+ token = get_fresh_token()
627
+ if token:
628
+ os.environ["COZELOOP_API_TOKEN"] = token
629
+ print(f"[CozeLoop] Token 获取成功 ({token[:12]}...)", file=sys.stderr)
630
+ else:
631
+ print("[CozeLoop] 警告: 未找到有效 Token,上报可能失败", file=sys.stderr)
632
+ creds = _load_credentials()
633
+ workspace_id = (creds or {}).get("workspace_id") or os.environ.get("COZELOOP_WORKSPACE_ID", "") or _DEFAULT_WORKSPACE_ID
634
+ os.environ["COZELOOP_WORKSPACE_ID"] = workspace_id
635
+ client_kwargs = {
636
+ "ultra_large_report": True,
637
+ "upload_timeout": 120,
638
+ "trace_finish_event_processor": _make_finish_event_processor(),
639
+ }
640
+ if workspace_id:
641
+ client_kwargs["workspace_id"] = workspace_id
642
+ if token:
643
+ client_kwargs["api_token"] = token
644
+ client = cozeloop.new_client(**client_kwargs)
645
+ ctx: List[Dict[str, Any]] = list(history_context) if history_context else []
646
+
647
+ try:
648
+ with client.start_span(name="codex_request", span_type="main") as root_span:
649
+ root_span.set_runtime(Runtime(library="codex-cli"))
650
+ root_tags = {
651
+ "thread_id": session_id,
652
+ "total_turns": len(turns),
653
+ "source": "codex_cli",
654
+ }
655
+ root_baggage = {
656
+ "thread_id": session_id,
657
+ }
658
+ # Inject coze-context kv (last occurrence across turns wins).
659
+ coze_tags = {}
660
+ for turn in turns:
661
+ t = coze_context_tags(turn.get("user_message_text", ""))
662
+ if t:
663
+ coze_tags = t
664
+ if coze_tags:
665
+ root_tags.update(coze_tags)
666
+ root_baggage.update(coze_tags)
667
+ root_span.set_tags(root_tags)
668
+ root_span.set_baggage(root_baggage)
669
+
670
+ # Set root span input: all user messages
671
+ root_input_parts = []
672
+ for turn in turns:
673
+ text = turn.get("user_message_text", "")
674
+ if text:
675
+ root_input_parts.append(text)
676
+ if root_input_parts:
677
+ root_span.set_input(truncate_text("\n\n".join(root_input_parts)))
678
+
679
+ # Set root span output: all assistant messages
680
+ root_output_parts = []
681
+ for turn in turns:
682
+ for assistant_payload in turn.get("assistant_messages", []):
683
+ assistant_text = extract_assistant_text(assistant_payload)
684
+ if assistant_text:
685
+ root_output_parts.append(assistant_text)
686
+ if root_output_parts:
687
+ root_span.set_output(truncate_text("\n\n".join(root_output_parts)))
688
+
689
+ # Process each turn
690
+ for i, turn in enumerate(turns):
691
+ try:
692
+ with client.start_span(name=f"turn_{i}", span_type="main") as turn_span:
693
+ turn_span.set_runtime(Runtime(library="codex-cli"))
694
+ turn_span.set_tags({
695
+ "thread_id": session_id,
696
+ "turn_index": i,
697
+ "turn_id": turn.get("turn_id", ""),
698
+ "source": "codex_cli",
699
+ })
700
+
701
+ # --- Model span for assistant response ---
702
+ if turn.get("assistant_messages"):
703
+ with client.start_span(name="assistant_response", span_type="model") as model_span:
704
+ model_span.set_runtime(Runtime(library="codex-cli"))
705
+ model_span.set_model_name(model_name)
706
+
707
+ # Build input messages: history + current turn input
708
+ turn_input = turn.get("input_messages", [])
709
+ if not turn_input:
710
+ turn_input = [{"role": "user", "content": turn.get("user_message_text", "")}]
711
+ input_messages = ctx + turn_input
712
+
713
+ model_messages = []
714
+ for msg in input_messages:
715
+ model_messages.append(_make_model_message(
716
+ role=msg.get("role", "user"),
717
+ content=msg.get("content", "")
718
+ ))
719
+
720
+ model_span.set_input(ModelInput(
721
+ messages=model_messages,
722
+ tools=[],
723
+ tool_choice=ModelToolChoice(type="", function=None)
724
+ ))
725
+
726
+ # Build output choices
727
+ choices = []
728
+ for assistant_payload in turn["assistant_messages"]:
729
+ assistant_text = extract_assistant_text(assistant_payload)
730
+ # Extract tool calls from assistant content
731
+ tc_list = []
732
+ for item in assistant_payload.get("content", []):
733
+ if isinstance(item, dict) and item.get("type") == "function_call":
734
+ tc_list.append(ModelToolCall(
735
+ id=item.get("call_id", ""),
736
+ type="function",
737
+ function=ModelToolCallFunction(
738
+ name=item.get("name", ""),
739
+ arguments=item.get("arguments", "")
740
+ )
741
+ ))
742
+
743
+ finish_reason = "tool_calls" if tc_list else "stop"
744
+ choices.append(ModelChoice(
745
+ finish_reason=finish_reason,
746
+ index=len(choices),
747
+ message=ModelMessage(
748
+ role="assistant",
749
+ content=assistant_text,
750
+ reasoning_content="",
751
+ parts=[],
752
+ name="",
753
+ tool_calls=tc_list,
754
+ tool_call_id="",
755
+ metadata={}
756
+ )
757
+ ))
758
+
759
+ model_span.set_output(ModelOutput(choices=choices))
760
+
761
+ # Set token usage
762
+ token_usage = turn.get("token_usage", {})
763
+ input_tokens = token_usage.get("input_tokens", 0)
764
+ output_tokens = token_usage.get("output_tokens", 0)
765
+ if input_tokens > 0:
766
+ model_span.set_input_tokens(input_tokens)
767
+ if output_tokens > 0:
768
+ model_span.set_output_tokens(output_tokens)
769
+
770
+ # --- Tool call spans ---
771
+ for tool_call in turn.get("tool_calls", []):
772
+ tool_name = tool_call.get("name", "unknown")
773
+ with client.start_span(name=f"tool_{tool_name}", span_type="tool") as tool_span:
774
+ tool_span.set_runtime(Runtime(library="codex-cli"))
775
+ tool_span.set_tags({
776
+ "tool_name": tool_name,
777
+ "call_id": tool_call.get("call_id"),
778
+ })
779
+ tool_span.set_input(
780
+ json.dumps(tool_call.get("input", {}), ensure_ascii=False)[:2000]
781
+ )
782
+ # Find matching tool result
783
+ call_id = tool_call.get("call_id")
784
+ for result in turn.get("tool_results", []):
785
+ if result.get("call_id") == call_id:
786
+ output = result.get("output", "")
787
+ if isinstance(output, str) and len(output) > 2000:
788
+ output = output[:2000] + "..."
789
+ tool_span.set_output(str(output))
790
+ break
791
+
792
+ # --- Subagent spans ---
793
+ for sc in turn.get("subagent_calls", []):
794
+ agent_id = sc.get("agent_id") or "unknown"
795
+ nickname = sc.get("nickname") or agent_id
796
+
797
+ with client.start_span(name=f"subagent_{nickname}", span_type="agent") as subagent_span:
798
+ subagent_span.set_runtime(Runtime(library="codex-cli"))
799
+ subagent_span.set_tags({
800
+ "agent_id": agent_id,
801
+ "agent_nickname": nickname,
802
+ "agent_role": sc.get("role") or "",
803
+ "agent_model": sc.get("model") or "",
804
+ })
805
+ subagent_span.set_input(sc.get("message", "")[:2000])
806
+
807
+ # Load and include saved subagent turn data
808
+ sa_data = load_subagent_data(agent_id)
809
+ if sa_data and sa_data.get("turns"):
810
+ sa_turns = sa_data["turns"]
811
+ sa_model = sa_data.get("model_name", "codex")
812
+
813
+ for si, sa_turn in enumerate(sa_turns):
814
+ with client.start_span(name=f"turn_{si}", span_type="main") as sa_turn_span:
815
+ sa_turn_span.set_runtime(Runtime(library="codex-cli"))
816
+ sa_turn_span.set_tags({
817
+ "turn_index": si,
818
+ "turn_id": sa_turn.get("turn_id", ""),
819
+ "agent_name": nickname,
820
+ })
821
+
822
+ # Subagent model span
823
+ if sa_turn.get("assistant_messages"):
824
+ with client.start_span(name="assistant_response", span_type="model") as sa_model_span:
825
+ sa_model_span.set_runtime(Runtime(library="codex-cli"))
826
+ sa_model_span.set_model_name(sa_model)
827
+ sa_model_span.set_tags({"agent_name": nickname})
828
+
829
+ sa_input = sa_turn.get("input_messages", [])
830
+ if not sa_input:
831
+ sa_input = [{"role": "user", "content": sa_turn.get("user_message_text", "")}]
832
+ sa_model_messages = []
833
+ for msg in sa_input:
834
+ sa_model_messages.append(_make_model_message(
835
+ role=msg.get("role", "user"),
836
+ content=msg.get("content", "")
837
+ ))
838
+ sa_model_span.set_input(ModelInput(
839
+ messages=sa_model_messages,
840
+ tools=[],
841
+ tool_choice=ModelToolChoice(type="", function=None)
842
+ ))
843
+
844
+ sa_choices = []
845
+ for ap in sa_turn["assistant_messages"]:
846
+ sa_choices.append(ModelChoice(
847
+ finish_reason="stop",
848
+ index=len(sa_choices),
849
+ message=ModelMessage(
850
+ role="assistant",
851
+ content=extract_assistant_text(ap),
852
+ reasoning_content="",
853
+ parts=[],
854
+ name="",
855
+ tool_calls=[],
856
+ tool_call_id="",
857
+ metadata={}
858
+ )
859
+ ))
860
+ sa_model_span.set_output(ModelOutput(choices=sa_choices))
861
+
862
+ sa_token = sa_turn.get("token_usage", {})
863
+ if sa_token.get("input_tokens", 0) > 0:
864
+ sa_model_span.set_input_tokens(sa_token["input_tokens"])
865
+ if sa_token.get("output_tokens", 0) > 0:
866
+ sa_model_span.set_output_tokens(sa_token["output_tokens"])
867
+
868
+ # Subagent tool spans
869
+ for sa_tc in sa_turn.get("tool_calls", []):
870
+ sa_tool_name = sa_tc.get("name", "unknown")
871
+ with client.start_span(name=f"tool_{sa_tool_name}", span_type="tool") as sa_tool_span:
872
+ sa_tool_span.set_runtime(Runtime(library="codex-cli"))
873
+ sa_tool_span.set_tags({
874
+ "tool_name": sa_tool_name,
875
+ "call_id": sa_tc.get("call_id"),
876
+ "agent_name": nickname,
877
+ })
878
+ sa_tool_span.set_input(
879
+ json.dumps(sa_tc.get("input", {}), ensure_ascii=False)[:2000]
880
+ )
881
+ sa_cid = sa_tc.get("call_id")
882
+ for sa_r in sa_turn.get("tool_results", []):
883
+ if sa_r.get("call_id") == sa_cid:
884
+ sa_out = sa_r.get("output", "")
885
+ if isinstance(sa_out, str) and len(sa_out) > 2000:
886
+ sa_out = sa_out[:2000] + "..."
887
+ sa_tool_span.set_output(str(sa_out))
888
+ break
889
+
890
+ debug_log(f"Included {len(sa_turns)} subagent turns for {nickname} ({agent_id})")
891
+ else:
892
+ debug_log(f"No saved data found for subagent {nickname} ({agent_id})")
893
+
894
+ result_text = sc.get("result") or ""
895
+ if len(result_text) > 2000:
896
+ result_text = result_text[:2000] + "..."
897
+ subagent_span.set_output(result_text)
898
+
899
+ # Update conversation context for subsequent turns
900
+ if turn.get("user_message_text"):
901
+ ctx.append({"role": "user", "content": turn["user_message_text"]})
902
+ for assistant_payload in turn.get("assistant_messages", []):
903
+ assistant_text = extract_assistant_text(assistant_payload)
904
+ if assistant_text:
905
+ ctx.append({"role": "assistant", "content": assistant_text})
906
+
907
+ except Exception as e:
908
+ debug_log(f"Error processing turn {i}: {e}")
909
+ continue
910
+
911
+ debug_log(f"Successfully processed {len(turns)} turn(s) for session {session_id}")
912
+
913
+ except Exception as e:
914
+ debug_log(f"An error occurred while sending traces to CozeLoop: {e}")
915
+ return None
916
+ finally:
917
+ client.close()
918
+ debug_log("CozeLoop client closed.")
919
+
920
+ return ctx
921
+
922
+
923
+ # --- Main Execution ---
924
+
925
+ def main():
926
+ """Main entry point for the Codex CozeLoop hook."""
927
+ print("[CozeLoop] Hook triggered (Codex).", file=sys.stderr)
928
+ debug_log("Codex CozeLoop hook started.")
929
+
930
+ # Check if tracing is enabled
931
+ if os.environ.get("TRACE_TO_COZELOOP", "").lower() == "false":
932
+ debug_log("TRACE_TO_COZELOOP is set to 'false', skipping")
933
+ return
934
+
935
+ # Read hook input from stdin
936
+ try:
937
+ raw_input = sys.stdin.read().strip()
938
+ if not raw_input:
939
+ debug_log("No input received from stdin")
940
+ return
941
+ hook_input = json.loads(raw_input)
942
+ except Exception as e:
943
+ debug_log(f"Error reading hook input from stdin: {e}")
944
+ return
945
+
946
+ debug_log(f"Hook input: {json.dumps(hook_input, ensure_ascii=False)}")
947
+
948
+ # Get transcript path
949
+ transcript_path = hook_input.get("transcript_path")
950
+ if not transcript_path:
951
+ debug_log("No transcript_path in hook input")
952
+ return
953
+
954
+ if not os.path.exists(transcript_path):
955
+ debug_log(f"Transcript file not found: {transcript_path}")
956
+ return
957
+
958
+ # Load state
959
+ state_file = get_state_file_path(transcript_path)
960
+ state = load_state(state_file)
961
+
962
+ # Read new entries
963
+ entries = read_rollout_messages(transcript_path, state["last_processed_line"])
964
+
965
+ if not entries:
966
+ debug_log("No new entries to process")
967
+ return
968
+
969
+ debug_log(f"Read {len(entries)} new entries from line {state['last_processed_line']}")
970
+
971
+ # Parse session identity
972
+ all_entries_for_meta = read_rollout_messages(transcript_path, 0)
973
+ session_info = parse_session_meta(all_entries_for_meta)
974
+
975
+ session_id = session_info["session_id"] or hook_input.get("session_id", "")
976
+ parent_session_id = session_info["parent_session_id"]
977
+ agent_nickname = session_info["agent_nickname"]
978
+ agent_role = session_info["agent_role"]
979
+ is_subagent = session_info["is_subagent"]
980
+ subagent_content_start = session_info.get("subagent_content_start_line")
981
+
982
+ # Filter subagent entries to only include their own content
983
+ if is_subagent and subagent_content_start is not None:
984
+ entries = [e for e in entries if e.get("_line_number", 0) >= subagent_content_start]
985
+ debug_log(f"Filtered subagent entries from line {subagent_content_start}, {len(entries)} remaining")
986
+
987
+ # Determine model name
988
+ model_name = "codex"
989
+ for entry in entries:
990
+ if entry.get("type") == "turn_context":
991
+ model_name = entry.get("payload", {}).get("model", model_name)
992
+ break
993
+
994
+ if not session_id:
995
+ session_id = f"codex_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{os.getpid()}"
996
+
997
+ state["session_id"] = session_id
998
+ debug_log(f"Session ID: {session_id}, parent: {parent_session_id}, "
999
+ f"is_subagent: {is_subagent}, nickname: {agent_nickname}, model: {model_name}")
1000
+
1001
+ # Group entries into turns
1002
+ turns = group_messages_into_turns(entries)
1003
+ debug_log(f"Grouped into {len(turns)} turns")
1004
+
1005
+ # If this is a subagent, save data for parent to include later
1006
+ if is_subagent:
1007
+ save_subagent_data(session_id, {
1008
+ "session_id": session_id,
1009
+ "parent_session_id": parent_session_id,
1010
+ "agent_nickname": agent_nickname,
1011
+ "agent_role": agent_role,
1012
+ "model_name": model_name,
1013
+ "turns": turns[-1:],
1014
+ })
1015
+ last_line = max(e.get("_line_number", 0) for e in entries) + 1
1016
+ state["last_processed_line"] = last_line
1017
+ save_state(state_file, state)
1018
+ debug_log("Subagent data saved, hook completed")
1019
+ return
1020
+
1021
+ # Send turns to CozeLoop — only if at least one turn carries coze-context.
1022
+ if turns:
1023
+ has_coze_ctx = any(
1024
+ parse_coze_context(t.get("user_message_text", ""))
1025
+ for t in turns
1026
+ )
1027
+ if not has_coze_ctx:
1028
+ debug_log("No coze-context found in any turn, skipping upload.")
1029
+ return
1030
+ history_context = state.get("conversation_history", [])
1031
+ updated_history = send_turns_to_cozeloop(
1032
+ turns, session_id, model_name,
1033
+ history_context=history_context,
1034
+ )
1035
+ if updated_history is not None:
1036
+ last_line = max(e.get("_line_number", 0) for e in entries) + 1
1037
+ state["last_processed_line"] = last_line
1038
+ state["conversation_history"] = updated_history
1039
+ save_state(state_file, state)
1040
+ debug_log(f"State updated, last processed line: {last_line}")
1041
+ else:
1042
+ debug_log("Send failed, state not advanced")
1043
+ else:
1044
+ debug_log("No turns to send")
1045
+
1046
+ debug_log("Codex CozeLoop hook completed.")
1047
+
1048
+
1049
+ if __name__ == "__main__":
1050
+ main()
1051
+