oh-langfuse 0.1.53 → 0.1.55

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/langfuse_hook.py CHANGED
@@ -4,58 +4,58 @@ Claude Code -> Langfuse hook
4
4
 
5
5
  """
6
6
 
7
- import json
8
- import os
9
- import re
10
- import sys
11
- import time
12
- import hashlib
13
- from dataclasses import dataclass, field
14
- from datetime import datetime, timezone
15
- from pathlib import Path
16
- from typing import Any, Dict, List, Optional, Tuple
17
- from urllib.parse import urlparse
18
-
19
-
20
- def configure_langfuse_no_proxy() -> None:
21
- hosts = ["localhost", "127.0.0.1"]
22
- for key in ("LANGFUSE_HOST", "LANGFUSE_BASEURL", "CC_LANGFUSE_BASE_URL"):
23
- value = os.environ.get(key)
24
- if not value:
25
- continue
26
- parsed = urlparse(value if "://" in value else f"http://{value}")
27
- if parsed.hostname:
28
- hosts.append(parsed.hostname)
29
- if parsed.netloc:
30
- hosts.append(parsed.netloc)
31
- existing = []
32
- for key in ("NO_PROXY", "no_proxy"):
33
- existing.extend([item.strip() for item in os.environ.get(key, "").split(",") if item.strip()])
34
- merged = []
35
- for item in [*existing, *hosts]:
36
- if item and item not in merged:
37
- merged.append(item)
38
- if merged:
39
- value = ",".join(merged)
40
- os.environ["NO_PROXY"] = value
41
- os.environ["no_proxy"] = value
42
-
43
-
44
- configure_langfuse_no_proxy()
45
-
46
- # --- Langfuse import (fail-open) ---
47
- try:
48
- from langfuse import Langfuse, propagate_attributes
49
- except Exception as e:
50
- try:
51
- state_dir = Path.home() / ".claude" / "state"
52
- state_dir.mkdir(parents=True, exist_ok=True)
53
- with open(state_dir / "langfuse_hook.log", "a", encoding="utf-8") as f:
54
- ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
55
- f.write(f"{ts} [ERROR] Failed to import langfuse package: {e}\n")
56
- except Exception:
57
- pass
58
- sys.exit(0)
7
+ import json
8
+ import os
9
+ import re
10
+ import sys
11
+ import time
12
+ import hashlib
13
+ from dataclasses import dataclass, field
14
+ from datetime import datetime, timezone
15
+ from pathlib import Path
16
+ from typing import Any, Dict, List, Optional, Tuple
17
+ from urllib.parse import urlparse
18
+
19
+
20
+ def configure_langfuse_no_proxy() -> None:
21
+ hosts = ["localhost", "127.0.0.1"]
22
+ for key in ("LANGFUSE_HOST", "LANGFUSE_BASEURL", "CC_LANGFUSE_BASE_URL"):
23
+ value = os.environ.get(key)
24
+ if not value:
25
+ continue
26
+ parsed = urlparse(value if "://" in value else f"http://{value}")
27
+ if parsed.hostname:
28
+ hosts.append(parsed.hostname)
29
+ if parsed.netloc:
30
+ hosts.append(parsed.netloc)
31
+ existing = []
32
+ for key in ("NO_PROXY", "no_proxy"):
33
+ existing.extend([item.strip() for item in os.environ.get(key, "").split(",") if item.strip()])
34
+ merged = []
35
+ for item in [*existing, *hosts]:
36
+ if item and item not in merged:
37
+ merged.append(item)
38
+ if merged:
39
+ value = ",".join(merged)
40
+ os.environ["NO_PROXY"] = value
41
+ os.environ["no_proxy"] = value
42
+
43
+
44
+ configure_langfuse_no_proxy()
45
+
46
+ # --- Langfuse import (fail-open) ---
47
+ try:
48
+ from langfuse import Langfuse, propagate_attributes
49
+ except Exception as e:
50
+ try:
51
+ state_dir = Path.home() / ".claude" / "state"
52
+ state_dir.mkdir(parents=True, exist_ok=True)
53
+ with open(state_dir / "langfuse_hook.log", "a", encoding="utf-8") as f:
54
+ ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
55
+ f.write(f"{ts} [ERROR] Failed to import langfuse package: {e}\n")
56
+ except Exception:
57
+ pass
58
+ sys.exit(0)
59
59
 
60
60
  # --- Paths ---
61
61
  STATE_DIR = Path.home() / ".claude" / "state"
@@ -63,10 +63,10 @@ LOG_FILE = STATE_DIR / "langfuse_hook.log"
63
63
  STATE_FILE = STATE_DIR / "langfuse_state.json"
64
64
  LOCK_FILE = STATE_DIR / "langfuse_state.lock"
65
65
 
66
- DEBUG = os.environ.get("CC_LANGFUSE_DEBUG", "").lower() == "true"
67
- MAX_CHARS = int(os.environ.get("CC_LANGFUSE_MAX_CHARS", "20000"))
68
- METRICS_SCHEMA_VERSION = "1.1"
69
- AGENT_NAME = "claude"
66
+ DEBUG = os.environ.get("CC_LANGFUSE_DEBUG", "").lower() == "true"
67
+ MAX_CHARS = int(os.environ.get("CC_LANGFUSE_MAX_CHARS", "20000"))
68
+ METRICS_SCHEMA_VERSION = "1.1"
69
+ AGENT_NAME = "claude"
70
70
 
71
71
  # ----------------- Logging -----------------
72
72
  def _log(level: str, message: str) -> None:
@@ -268,349 +268,349 @@ def extract_text(content: Any) -> str:
268
268
  return "\n".join([p for p in parts if p])
269
269
  return ""
270
270
 
271
- def truncate_text(s: str, max_chars: int = MAX_CHARS) -> Tuple[str, Dict[str, Any]]:
271
+ def truncate_text(s: str, max_chars: int = MAX_CHARS) -> Tuple[str, Dict[str, Any]]:
272
272
  if s is None:
273
273
  return "", {"truncated": False, "orig_len": 0}
274
274
  orig_len = len(s)
275
275
  if orig_len <= max_chars:
276
276
  return s, {"truncated": False, "orig_len": orig_len}
277
- head = s[:max_chars]
278
- return head, {"truncated": True, "orig_len": orig_len, "kept_len": len(head), "sha256": hashlib.sha256(s.encode("utf-8")).hexdigest()}
279
-
280
- def build_interaction_id(source: str, session_id: str, turn_number: int) -> str:
281
- return f"{source or 'unknown'}:{session_id or 'unknown'}:{int(turn_number or 0)}"
282
-
283
- def _num_or_none(value: Any) -> Optional[int]:
284
- if isinstance(value, bool):
285
- return None
286
- if isinstance(value, int) and value >= 0:
287
- return value
288
- if isinstance(value, float) and value >= 0:
289
- return int(value)
290
- if isinstance(value, str):
291
- try:
292
- n = int(value)
293
- return n if n >= 0 else None
294
- except Exception:
295
- return None
296
- return None
297
-
298
- def _first_num(raw: Dict[str, Any], *keys: str) -> Optional[int]:
299
- for key in keys:
300
- if key in raw:
301
- value = _num_or_none(raw.get(key))
302
- if value is not None:
303
- return value
304
- return None
305
-
306
- def normalize_token_metrics(raw: Optional[Dict[str, Any]]) -> Dict[str, Any]:
307
- if not isinstance(raw, dict) or not raw:
308
- return {
309
- "token_metrics_available": False,
310
- "input_tokens": None,
311
- "output_tokens": None,
312
- "total_tokens": None,
313
- "cache_read_tokens": None,
314
- "reasoning_tokens": None,
315
- }
316
- input_tokens = _first_num(raw, "input", "input_tokens", "inputTokens")
317
- output_tokens = _first_num(raw, "output", "output_tokens", "outputTokens")
318
- total_tokens = _first_num(raw, "total", "total_tokens", "totalTokens")
319
- if total_tokens is None and input_tokens is not None and output_tokens is not None:
320
- total_tokens = input_tokens + output_tokens
321
- cache_read_tokens = _first_num(raw, "cache_read_tokens", "cachedInputTokens", "cacheRead")
322
- reasoning_tokens = _first_num(raw, "reasoning_tokens", "reasoningTokens", "reasoning")
323
- available = any(v is not None for v in [input_tokens, output_tokens, total_tokens, cache_read_tokens, reasoning_tokens])
324
- return {
325
- "token_metrics_available": available,
326
- "input_tokens": input_tokens if available else None,
327
- "output_tokens": output_tokens if available else None,
328
- "total_tokens": total_tokens if available else None,
329
- "cache_read_tokens": cache_read_tokens if available else None,
330
- "reasoning_tokens": reasoning_tokens if available else None,
331
- }
332
-
333
- def _ratio(numerator: Optional[int], denominator: Optional[int]) -> Optional[float]:
334
- if numerator is None or denominator in (None, 0):
335
- return None
336
- return numerator / denominator
337
-
338
- def build_interaction_metadata(
339
- source: str,
340
- user_id: Optional[str],
341
- session_id: str,
342
- turn_number: int,
343
- token_metrics: Optional[Dict[str, Any]],
344
- tool_call_count: int,
345
- tool_result_count: int,
346
- skill_use_count: int,
347
- model: Optional[str],
348
- user_message_count: int = 1,
349
- assistant_message_count: int = 1,
350
- skill_use_events: Optional[List[Dict[str, Any]]] = None,
351
- ) -> Dict[str, Any]:
352
- tokens = normalize_token_metrics(token_metrics)
353
- interaction_id = build_interaction_id(source, session_id, turn_number)
354
- events = list(skill_use_events or [])
355
- skill_names_all = [str(event.get("skill_name") or "") for event in events if event.get("skill_name")]
356
- unique_skill_names = list(dict.fromkeys(skill_names_all))
357
- skill_invocation_modes = [str(event.get("skill_invocation_mode") or "") for event in events if event.get("skill_invocation_mode")]
358
- skill_agent_paths = [str(event.get("skill_agent_path") or "") for event in events if event.get("skill_agent_path")]
359
- effective_skill_count = len(events) if events else int(skill_use_count or 0)
360
- return {
361
- "source": source,
362
- "agent": source,
363
- "user_id": user_id or "",
364
- "session_id": session_id,
365
- "interaction_id": interaction_id,
366
- "metrics_schema_version": METRICS_SCHEMA_VERSION,
367
- "interaction_count": 1,
368
- "user_message_count": user_message_count,
369
- "assistant_message_count": assistant_message_count,
370
- "tool_call_count": int(tool_call_count or 0),
371
- "tool_result_count": int(tool_result_count or 0),
372
- "skill_use_count": effective_skill_count,
373
- "unique_skill_count": len(unique_skill_names),
374
- "repeated_skill_count": max(0, effective_skill_count - len(unique_skill_names)),
375
- **tokens,
376
- "model": model,
377
- "turn_number": int(turn_number or 0),
378
- "efficiency": {
379
- "tokens_per_interaction": tokens.get("total_tokens"),
380
- "tool_calls_per_interaction": int(tool_call_count or 0),
381
- "skills_per_interaction": effective_skill_count,
382
- "output_input_token_ratio": _ratio(tokens.get("output_tokens"), tokens.get("input_tokens")),
383
- "tokens_per_tool_call": _ratio(tokens.get("total_tokens"), int(tool_call_count or 0)),
384
- },
385
- **({
386
- "skill_names": unique_skill_names,
387
- "skill_names_all": skill_names_all,
388
- "skill_invocation_modes": skill_invocation_modes,
389
- "skill_agent_paths": skill_agent_paths,
390
- } if events else {}),
391
- }
392
-
393
- def discover_known_skills(extra_roots: Optional[List[Path]] = None) -> set:
394
- roots = [
395
- Path.home() / ".codex" / "skills",
396
- Path.home() / ".claude" / "skills",
397
- Path.home() / ".config" / "opencode" / "skill",
398
- ]
399
- if extra_roots:
400
- roots.extend(extra_roots)
401
- names = set()
402
- for root in roots:
403
- try:
404
- if not root.exists():
405
- continue
406
- for skill_file in root.rglob("SKILL.md"):
407
- names.add(skill_file.parent.name)
408
- except Exception:
409
- continue
410
- return names
411
-
412
- def _skill_namespace(name: str) -> str:
413
- return name.split(":", 1)[0] if ":" in name else ""
414
-
415
- def _skill_agent_from_interaction_id(interaction_id: str) -> str:
416
- return str(interaction_id or "unknown").split(":", 1)[0] or "unknown"
417
-
418
- def _skill_agent_path(agent: str, detected_by: str) -> str:
419
- if agent == "claude":
420
- if detected_by == "tool_call":
421
- return "claude_skill_tool"
422
- if detected_by == "slash_command":
423
- return "claude_slash_skill"
424
- if detected_by == "attribution_skill":
425
- return "claude_attribution_skill"
426
- if detected_by == "skill_file_path":
427
- return "skill_file_path"
428
- return detected_by or "metadata"
429
-
430
- def _skill_invocation_mode(agent: str, detected_by: str) -> str:
431
- if detected_by in ("slash_command", "attribution_skill"):
432
- return "explicit"
433
- if detected_by in ("tool_call", "plugin_event"):
434
- return "implicit"
435
- return "detected"
436
-
437
- def _skill_event_type(detected_by: str, agent: str = "unknown") -> str:
438
- return "invoked" if detected_by in ("tool_call", "plugin_event", "attribution_skill", "slash_command") else "detected"
439
-
440
- def _skill_id_segment(name: str) -> str:
441
- segment = re.sub(r"[^A-Za-z0-9_.:-]+", "-", str(name or "").strip()).strip("-")
442
- return (segment or "unknown")[:96]
443
-
444
- def detect_skill_usages(tool_calls: List[Dict[str, Any]], known_skills: set) -> List[Dict[str, str]]:
445
- found: List[Dict[str, str]] = []
446
- seen_call_ids: set = set()
447
- for call in tool_calls or []:
448
- tool_name = str(call.get("name") or "")
449
- call_id = str(call.get("id") or call.get("call_id") or call.get("callId") or call.get("tool_call_id") or call.get("toolCallId") or "").strip()
450
- input_obj = call.get("input") if isinstance(call.get("input"), (dict, list, str)) else {}
451
- if tool_name.lower() == "skill" and isinstance(input_obj, dict):
452
- for key in ("skill_name", "skill", "name"):
453
- value = input_obj.get(key)
454
- if isinstance(value, str) and value.strip():
455
- name = value.strip()
456
- if call_id:
457
- dedupe_key = f"call:{call_id}"
458
- if dedupe_key in seen_call_ids:
459
- break
460
- seen_call_ids.add(dedupe_key)
461
- found.append({"name": name, "skill_namespace": _skill_namespace(name), "detected_by": "tool_call", "skill_call_id": call_id})
462
- break
463
- try:
464
- text = json.dumps(input_obj, ensure_ascii=False)
465
- except Exception:
466
- text = str(input_obj)
467
- for match in re.finditer(r"([A-Za-z]:)?[^\"'\n\r]*[\\/]+([^\\/\"'\n\r]+)[\\/]+SKILL\.md", text, re.IGNORECASE):
468
- candidate = match.group(2)
469
- if candidate and (candidate in known_skills or not known_skills):
470
- found.append({"name": candidate, "skill_namespace": _skill_namespace(candidate), "detected_by": "skill_file_path"})
471
- return found
472
-
473
- def _skill_usage(name: str, detected_by: str, skill_call_id: str = "") -> Dict[str, str]:
474
- clean = str(name or "").strip().lstrip("/")
475
- return {
476
- "name": clean,
477
- "skill_namespace": _skill_namespace(clean),
478
- "detected_by": detected_by,
479
- "skill_call_id": str(skill_call_id or "").strip(),
480
- }
481
-
482
- def _accept_skill_candidate(name: Any, known_skills: set, trusted: bool = False) -> str:
483
- clean = str(name or "").strip().lstrip("/")
484
- if not clean:
485
- return ""
486
- if trusted or not known_skills or clean in known_skills:
487
- return clean
488
- return ""
489
-
490
- def _detect_skill_usages_from_text(text: str, known_skills: set) -> List[Dict[str, str]]:
491
- found: List[Dict[str, str]] = []
492
- if not text:
493
- return found
494
-
495
- for pattern in (
496
- r"<command-name>\s*/?([^<\s]+)\s*</command-name>",
497
- r"<command-message>\s*/?([^<\s]+)\s*</command-message>",
498
- ):
499
- for match in re.finditer(pattern, text, re.IGNORECASE):
500
- name = _accept_skill_candidate(match.group(1), known_skills)
501
- if name:
502
- found.append(_skill_usage(name, "slash_command"))
503
-
504
- for match in re.finditer(r"Base directory for this skill:\s*([^\r\n]+)", text, re.IGNORECASE):
505
- path_text = match.group(1)
506
- path_match = re.search(r"[\\/](?:skills|skill)[\\/]([^\\/\"\r\n]+)", path_text, re.IGNORECASE)
507
- if path_match:
508
- name = _accept_skill_candidate(path_match.group(1), known_skills)
509
- if name:
510
- found.append(_skill_usage(name, "skill_file_path"))
511
-
512
- return found
513
-
514
- def _attribution_skill_from_row(row: Dict[str, Any]) -> str:
515
- if not isinstance(row, dict):
516
- return ""
517
- value = row.get("attributionSkill") or row.get("attribution_skill")
518
- if isinstance(value, str) and value.strip():
519
- return value.strip()
520
- message = row.get("message")
521
- if isinstance(message, dict):
522
- value = message.get("attributionSkill") or message.get("attribution_skill")
523
- if isinstance(value, str) and value.strip():
524
- return value.strip()
525
- return ""
526
-
527
- def _dedupe_turn_skill_usages(usages: List[Dict[str, str]]) -> List[Dict[str, str]]:
528
- out: List[Dict[str, str]] = []
529
- seen_call_ids: set = set()
530
- seen_detected_names: set = set()
531
- for usage in usages or []:
532
- name = str(usage.get("name") or "").strip()
533
- if not name:
534
- continue
535
- call_id = str(usage.get("skill_call_id") or "").strip()
536
- if call_id:
537
- key = f"call:{call_id}"
538
- if key in seen_call_ids:
539
- continue
540
- seen_call_ids.add(key)
541
- out.append(usage)
542
- continue
543
-
544
- detected_by = str(usage.get("detected_by") or "")
545
- if detected_by in ("attribution_skill", "slash_command", "skill_file_path"):
546
- key = f"name:{name}"
547
- if key in seen_detected_names:
548
- continue
549
- seen_detected_names.add(key)
550
- out.append(usage)
551
- return out
552
-
553
- def detect_turn_skill_usages(turn: "Turn", tool_calls: List[Dict[str, Any]], known_skills: set) -> List[Dict[str, str]]:
554
- found = list(detect_skill_usages(tool_calls, known_skills))
555
- rows = [turn.user_msg, *getattr(turn, "context_msgs", []), *turn.assistant_msgs]
556
-
557
- for row in rows:
558
- attributed = _accept_skill_candidate(_attribution_skill_from_row(row), known_skills, trusted=True)
559
- if attributed:
560
- found.append(_skill_usage(attributed, "attribution_skill"))
561
- found.extend(_detect_skill_usages_from_text(extract_text(get_content(row)), known_skills))
562
-
563
- return _dedupe_turn_skill_usages(found)
564
-
565
- def build_skill_use_events(interaction_id: str, skill_usages: List[Dict[str, str]]) -> List[Dict[str, Any]]:
566
- events: List[Dict[str, Any]] = []
567
- deduped: List[Dict[str, str]] = []
568
- seen_call_ids: set = set()
569
- agent = _skill_agent_from_interaction_id(interaction_id)
570
- for skill in skill_usages or []:
571
- call_id = str(skill.get("skill_call_id") or "").strip()
572
- if call_id:
573
- dedupe_key = f"call:{call_id}"
574
- if dedupe_key in seen_call_ids:
575
- continue
576
- seen_call_ids.add(dedupe_key)
577
- deduped.append(skill)
578
- total = len(deduped)
579
- for index, skill in enumerate(deduped, start=1):
580
- name = str(skill.get("name") or "").strip()
581
- if not name:
582
- continue
583
- detected_by = str(skill.get("detected_by") or "metadata")
584
- call_id = str(skill.get("skill_call_id") or "").strip()
585
- invocation_mode = _skill_invocation_mode(agent, detected_by)
586
- events.append({
587
- "skill_use_id": f"{interaction_id}:skill:{index}:{_skill_id_segment(name)}",
588
- "skill_use_index": index,
589
- "skill_use_count_in_interaction": total,
590
- "skill_event_type": _skill_event_type(detected_by, agent),
591
- "skill_trigger": invocation_mode,
592
- "skill_invocation_mode": invocation_mode,
593
- "skill_agent_path": _skill_agent_path(agent, detected_by),
594
- "skill_name": name,
595
- "skill_use_count": 1,
596
- "skill_namespace": skill.get("skill_namespace") or _skill_namespace(name),
597
- "detected_by": detected_by,
598
- **({"skill_call_id": call_id} if call_id else {}),
599
- })
600
- return events
601
-
602
- def summarize_skill_usages(skill_usages: List[Dict[str, str]]) -> List[Dict[str, Any]]:
603
- summary: Dict[str, Dict[str, Any]] = {}
604
- for item in skill_usages or []:
605
- name = item.get("name")
606
- if not name:
607
- continue
608
- entry = summary.setdefault(name, {"name": name, "count": 0, "detected_by": item.get("detected_by")})
609
- entry["count"] += 1
610
- detected_by = str(item.get("detected_by") or "metadata")
611
- entry.setdefault("skill_invocation_mode", _skill_invocation_mode("claude", detected_by))
612
- entry.setdefault("skill_agent_path", _skill_agent_path("claude", detected_by))
613
- return list(summary.values())
277
+ head = s[:max_chars]
278
+ return head, {"truncated": True, "orig_len": orig_len, "kept_len": len(head), "sha256": hashlib.sha256(s.encode("utf-8")).hexdigest()}
279
+
280
+ def build_interaction_id(source: str, session_id: str, turn_number: int) -> str:
281
+ return f"{source or 'unknown'}:{session_id or 'unknown'}:{int(turn_number or 0)}"
282
+
283
+ def _num_or_none(value: Any) -> Optional[int]:
284
+ if isinstance(value, bool):
285
+ return None
286
+ if isinstance(value, int) and value >= 0:
287
+ return value
288
+ if isinstance(value, float) and value >= 0:
289
+ return int(value)
290
+ if isinstance(value, str):
291
+ try:
292
+ n = int(value)
293
+ return n if n >= 0 else None
294
+ except Exception:
295
+ return None
296
+ return None
297
+
298
+ def _first_num(raw: Dict[str, Any], *keys: str) -> Optional[int]:
299
+ for key in keys:
300
+ if key in raw:
301
+ value = _num_or_none(raw.get(key))
302
+ if value is not None:
303
+ return value
304
+ return None
305
+
306
+ def normalize_token_metrics(raw: Optional[Dict[str, Any]]) -> Dict[str, Any]:
307
+ if not isinstance(raw, dict) or not raw:
308
+ return {
309
+ "token_metrics_available": False,
310
+ "input_tokens": None,
311
+ "output_tokens": None,
312
+ "total_tokens": None,
313
+ "cache_read_tokens": None,
314
+ "reasoning_tokens": None,
315
+ }
316
+ input_tokens = _first_num(raw, "input", "input_tokens", "inputTokens")
317
+ output_tokens = _first_num(raw, "output", "output_tokens", "outputTokens")
318
+ total_tokens = _first_num(raw, "total", "total_tokens", "totalTokens")
319
+ if total_tokens is None and input_tokens is not None and output_tokens is not None:
320
+ total_tokens = input_tokens + output_tokens
321
+ cache_read_tokens = _first_num(raw, "cache_read_tokens", "cachedInputTokens", "cacheRead")
322
+ reasoning_tokens = _first_num(raw, "reasoning_tokens", "reasoningTokens", "reasoning")
323
+ available = any(v is not None for v in [input_tokens, output_tokens, total_tokens, cache_read_tokens, reasoning_tokens])
324
+ return {
325
+ "token_metrics_available": available,
326
+ "input_tokens": input_tokens if available else None,
327
+ "output_tokens": output_tokens if available else None,
328
+ "total_tokens": total_tokens if available else None,
329
+ "cache_read_tokens": cache_read_tokens if available else None,
330
+ "reasoning_tokens": reasoning_tokens if available else None,
331
+ }
332
+
333
+ def _ratio(numerator: Optional[int], denominator: Optional[int]) -> Optional[float]:
334
+ if numerator is None or denominator in (None, 0):
335
+ return None
336
+ return numerator / denominator
337
+
338
+ def build_interaction_metadata(
339
+ source: str,
340
+ user_id: Optional[str],
341
+ session_id: str,
342
+ turn_number: int,
343
+ token_metrics: Optional[Dict[str, Any]],
344
+ tool_call_count: int,
345
+ tool_result_count: int,
346
+ skill_use_count: int,
347
+ model: Optional[str],
348
+ user_message_count: int = 1,
349
+ assistant_message_count: int = 1,
350
+ skill_use_events: Optional[List[Dict[str, Any]]] = None,
351
+ ) -> Dict[str, Any]:
352
+ tokens = normalize_token_metrics(token_metrics)
353
+ interaction_id = build_interaction_id(source, session_id, turn_number)
354
+ events = list(skill_use_events or [])
355
+ skill_names_all = [str(event.get("skill_name") or "") for event in events if event.get("skill_name")]
356
+ unique_skill_names = list(dict.fromkeys(skill_names_all))
357
+ skill_invocation_modes = [str(event.get("skill_invocation_mode") or "") for event in events if event.get("skill_invocation_mode")]
358
+ skill_agent_paths = [str(event.get("skill_agent_path") or "") for event in events if event.get("skill_agent_path")]
359
+ effective_skill_count = len(events) if events else int(skill_use_count or 0)
360
+ return {
361
+ "source": source,
362
+ "agent": source,
363
+ "user_id": user_id or "",
364
+ "session_id": session_id,
365
+ "interaction_id": interaction_id,
366
+ "metrics_schema_version": METRICS_SCHEMA_VERSION,
367
+ "interaction_count": 1,
368
+ "user_message_count": user_message_count,
369
+ "assistant_message_count": assistant_message_count,
370
+ "tool_call_count": int(tool_call_count or 0),
371
+ "tool_result_count": int(tool_result_count or 0),
372
+ "skill_use_count": effective_skill_count,
373
+ "unique_skill_count": len(unique_skill_names),
374
+ "repeated_skill_count": max(0, effective_skill_count - len(unique_skill_names)),
375
+ **tokens,
376
+ "model": model,
377
+ "turn_number": int(turn_number or 0),
378
+ "efficiency": {
379
+ "tokens_per_interaction": tokens.get("total_tokens"),
380
+ "tool_calls_per_interaction": int(tool_call_count or 0),
381
+ "skills_per_interaction": effective_skill_count,
382
+ "output_input_token_ratio": _ratio(tokens.get("output_tokens"), tokens.get("input_tokens")),
383
+ "tokens_per_tool_call": _ratio(tokens.get("total_tokens"), int(tool_call_count or 0)),
384
+ },
385
+ **({
386
+ "skill_names": unique_skill_names,
387
+ "skill_names_all": skill_names_all,
388
+ "skill_invocation_modes": skill_invocation_modes,
389
+ "skill_agent_paths": skill_agent_paths,
390
+ } if events else {}),
391
+ }
392
+
393
+ def discover_known_skills(extra_roots: Optional[List[Path]] = None) -> set:
394
+ roots = [
395
+ Path.home() / ".codex" / "skills",
396
+ Path.home() / ".claude" / "skills",
397
+ Path.home() / ".config" / "opencode" / "skill",
398
+ ]
399
+ if extra_roots:
400
+ roots.extend(extra_roots)
401
+ names = set()
402
+ for root in roots:
403
+ try:
404
+ if not root.exists():
405
+ continue
406
+ for skill_file in root.rglob("SKILL.md"):
407
+ names.add(skill_file.parent.name)
408
+ except Exception:
409
+ continue
410
+ return names
411
+
412
+ def _skill_namespace(name: str) -> str:
413
+ return name.split(":", 1)[0] if ":" in name else ""
414
+
415
+ def _skill_agent_from_interaction_id(interaction_id: str) -> str:
416
+ return str(interaction_id or "unknown").split(":", 1)[0] or "unknown"
417
+
418
+ def _skill_agent_path(agent: str, detected_by: str) -> str:
419
+ if agent == "claude":
420
+ if detected_by == "tool_call":
421
+ return "claude_skill_tool"
422
+ if detected_by == "slash_command":
423
+ return "claude_slash_skill"
424
+ if detected_by == "attribution_skill":
425
+ return "claude_attribution_skill"
426
+ if detected_by == "skill_file_path":
427
+ return "skill_file_path"
428
+ return detected_by or "metadata"
429
+
430
+ def _skill_invocation_mode(agent: str, detected_by: str) -> str:
431
+ if detected_by in ("slash_command", "attribution_skill"):
432
+ return "explicit"
433
+ if detected_by in ("tool_call", "plugin_event"):
434
+ return "implicit"
435
+ return "detected"
436
+
437
+ def _skill_event_type(detected_by: str, agent: str = "unknown") -> str:
438
+ return "invoked" if detected_by in ("tool_call", "plugin_event", "attribution_skill", "slash_command") else "detected"
439
+
440
+ def _skill_id_segment(name: str) -> str:
441
+ segment = re.sub(r"[^A-Za-z0-9_.:-]+", "-", str(name or "").strip()).strip("-")
442
+ return (segment or "unknown")[:96]
443
+
444
+ def detect_skill_usages(tool_calls: List[Dict[str, Any]], known_skills: set) -> List[Dict[str, str]]:
445
+ found: List[Dict[str, str]] = []
446
+ seen_call_ids: set = set()
447
+ for call in tool_calls or []:
448
+ tool_name = str(call.get("name") or "")
449
+ call_id = str(call.get("id") or call.get("call_id") or call.get("callId") or call.get("tool_call_id") or call.get("toolCallId") or "").strip()
450
+ input_obj = call.get("input") if isinstance(call.get("input"), (dict, list, str)) else {}
451
+ if tool_name.lower() == "skill" and isinstance(input_obj, dict):
452
+ for key in ("skill_name", "skill", "name"):
453
+ value = input_obj.get(key)
454
+ if isinstance(value, str) and value.strip():
455
+ name = value.strip()
456
+ if call_id:
457
+ dedupe_key = f"call:{call_id}"
458
+ if dedupe_key in seen_call_ids:
459
+ break
460
+ seen_call_ids.add(dedupe_key)
461
+ found.append({"name": name, "skill_namespace": _skill_namespace(name), "detected_by": "tool_call", "skill_call_id": call_id})
462
+ break
463
+ try:
464
+ text = json.dumps(input_obj, ensure_ascii=False)
465
+ except Exception:
466
+ text = str(input_obj)
467
+ for match in re.finditer(r"([A-Za-z]:)?[^\"'\n\r]*[\\/]+([^\\/\"'\n\r]+)[\\/]+SKILL\.md", text, re.IGNORECASE):
468
+ candidate = match.group(2)
469
+ if candidate and (candidate in known_skills or not known_skills):
470
+ found.append({"name": candidate, "skill_namespace": _skill_namespace(candidate), "detected_by": "skill_file_path"})
471
+ return found
472
+
473
+ def _skill_usage(name: str, detected_by: str, skill_call_id: str = "") -> Dict[str, str]:
474
+ clean = str(name or "").strip().lstrip("/")
475
+ return {
476
+ "name": clean,
477
+ "skill_namespace": _skill_namespace(clean),
478
+ "detected_by": detected_by,
479
+ "skill_call_id": str(skill_call_id or "").strip(),
480
+ }
481
+
482
+ def _accept_skill_candidate(name: Any, known_skills: set, trusted: bool = False) -> str:
483
+ clean = str(name or "").strip().lstrip("/")
484
+ if not clean:
485
+ return ""
486
+ if trusted or not known_skills or clean in known_skills:
487
+ return clean
488
+ return ""
489
+
490
+ def _detect_skill_usages_from_text(text: str, known_skills: set) -> List[Dict[str, str]]:
491
+ found: List[Dict[str, str]] = []
492
+ if not text:
493
+ return found
494
+
495
+ for pattern in (
496
+ r"<command-name>\s*/?([^<\s]+)\s*</command-name>",
497
+ r"<command-message>\s*/?([^<\s]+)\s*</command-message>",
498
+ ):
499
+ for match in re.finditer(pattern, text, re.IGNORECASE):
500
+ name = _accept_skill_candidate(match.group(1), known_skills)
501
+ if name:
502
+ found.append(_skill_usage(name, "slash_command"))
503
+
504
+ for match in re.finditer(r"Base directory for this skill:\s*([^\r\n]+)", text, re.IGNORECASE):
505
+ path_text = match.group(1)
506
+ path_match = re.search(r"[\\/](?:skills|skill)[\\/]([^\\/\"\r\n]+)", path_text, re.IGNORECASE)
507
+ if path_match:
508
+ name = _accept_skill_candidate(path_match.group(1), known_skills)
509
+ if name:
510
+ found.append(_skill_usage(name, "skill_file_path"))
511
+
512
+ return found
513
+
514
+ def _attribution_skill_from_row(row: Dict[str, Any]) -> str:
515
+ if not isinstance(row, dict):
516
+ return ""
517
+ value = row.get("attributionSkill") or row.get("attribution_skill")
518
+ if isinstance(value, str) and value.strip():
519
+ return value.strip()
520
+ message = row.get("message")
521
+ if isinstance(message, dict):
522
+ value = message.get("attributionSkill") or message.get("attribution_skill")
523
+ if isinstance(value, str) and value.strip():
524
+ return value.strip()
525
+ return ""
526
+
527
+ def _dedupe_turn_skill_usages(usages: List[Dict[str, str]]) -> List[Dict[str, str]]:
528
+ out: List[Dict[str, str]] = []
529
+ seen_call_ids: set = set()
530
+ seen_detected_names: set = set()
531
+ for usage in usages or []:
532
+ name = str(usage.get("name") or "").strip()
533
+ if not name:
534
+ continue
535
+ call_id = str(usage.get("skill_call_id") or "").strip()
536
+ if call_id:
537
+ key = f"call:{call_id}"
538
+ if key in seen_call_ids:
539
+ continue
540
+ seen_call_ids.add(key)
541
+ out.append(usage)
542
+ continue
543
+
544
+ detected_by = str(usage.get("detected_by") or "")
545
+ if detected_by in ("attribution_skill", "slash_command", "skill_file_path"):
546
+ key = f"name:{name}"
547
+ if key in seen_detected_names:
548
+ continue
549
+ seen_detected_names.add(key)
550
+ out.append(usage)
551
+ return out
552
+
553
+ def detect_turn_skill_usages(turn: "Turn", tool_calls: List[Dict[str, Any]], known_skills: set) -> List[Dict[str, str]]:
554
+ found = list(detect_skill_usages(tool_calls, known_skills))
555
+ rows = [turn.user_msg, *getattr(turn, "context_msgs", []), *turn.assistant_msgs]
556
+
557
+ for row in rows:
558
+ attributed = _accept_skill_candidate(_attribution_skill_from_row(row), known_skills, trusted=True)
559
+ if attributed:
560
+ found.append(_skill_usage(attributed, "attribution_skill"))
561
+ found.extend(_detect_skill_usages_from_text(extract_text(get_content(row)), known_skills))
562
+
563
+ return _dedupe_turn_skill_usages(found)
564
+
565
+ def build_skill_use_events(interaction_id: str, skill_usages: List[Dict[str, str]]) -> List[Dict[str, Any]]:
566
+ events: List[Dict[str, Any]] = []
567
+ deduped: List[Dict[str, str]] = []
568
+ seen_call_ids: set = set()
569
+ agent = _skill_agent_from_interaction_id(interaction_id)
570
+ for skill in skill_usages or []:
571
+ call_id = str(skill.get("skill_call_id") or "").strip()
572
+ if call_id:
573
+ dedupe_key = f"call:{call_id}"
574
+ if dedupe_key in seen_call_ids:
575
+ continue
576
+ seen_call_ids.add(dedupe_key)
577
+ deduped.append(skill)
578
+ total = len(deduped)
579
+ for index, skill in enumerate(deduped, start=1):
580
+ name = str(skill.get("name") or "").strip()
581
+ if not name:
582
+ continue
583
+ detected_by = str(skill.get("detected_by") or "metadata")
584
+ call_id = str(skill.get("skill_call_id") or "").strip()
585
+ invocation_mode = _skill_invocation_mode(agent, detected_by)
586
+ events.append({
587
+ "skill_use_id": f"{interaction_id}:skill:{index}:{_skill_id_segment(name)}",
588
+ "skill_use_index": index,
589
+ "skill_use_count_in_interaction": total,
590
+ "skill_event_type": _skill_event_type(detected_by, agent),
591
+ "skill_trigger": invocation_mode,
592
+ "skill_invocation_mode": invocation_mode,
593
+ "skill_agent_path": _skill_agent_path(agent, detected_by),
594
+ "skill_name": name,
595
+ "skill_use_count": 1,
596
+ "skill_namespace": skill.get("skill_namespace") or _skill_namespace(name),
597
+ "detected_by": detected_by,
598
+ **({"skill_call_id": call_id} if call_id else {}),
599
+ })
600
+ return events
601
+
602
+ def summarize_skill_usages(skill_usages: List[Dict[str, str]]) -> List[Dict[str, Any]]:
603
+ summary: Dict[str, Dict[str, Any]] = {}
604
+ for item in skill_usages or []:
605
+ name = item.get("name")
606
+ if not name:
607
+ continue
608
+ entry = summary.setdefault(name, {"name": name, "count": 0, "detected_by": item.get("detected_by")})
609
+ entry["count"] += 1
610
+ detected_by = str(item.get("detected_by") or "metadata")
611
+ entry.setdefault("skill_invocation_mode", _skill_invocation_mode("claude", detected_by))
612
+ entry.setdefault("skill_agent_path", _skill_agent_path("claude", detected_by))
613
+ return list(summary.values())
614
614
 
615
615
  def get_model(msg: Dict[str, Any]) -> str:
616
616
  m = msg.get("message")
@@ -699,48 +699,48 @@ def read_new_jsonl(transcript_path: Path, ss: SessionState) -> Tuple[List[Dict[s
699
699
  text = chunk.decode(errors="replace")
700
700
 
701
701
  combined = ss.buffer + text
702
- lines = combined.split("\n")
703
- tail = lines[-1]
704
- ss.offset = new_offset
705
-
706
- msgs: List[Dict[str, Any]] = []
707
- for line in lines[:-1]:
708
- line = line.strip()
702
+ lines = combined.split("\n")
703
+ tail = lines[-1]
704
+ ss.offset = new_offset
705
+
706
+ msgs: List[Dict[str, Any]] = []
707
+ for line in lines[:-1]:
708
+ line = line.strip()
709
709
  if not line:
710
710
  continue
711
711
  try:
712
712
  msgs.append(json.loads(line))
713
- except Exception:
714
- continue
715
-
716
- tail = tail.strip()
717
- if tail:
718
- try:
719
- msgs.append(json.loads(tail))
720
- ss.buffer = ""
721
- except Exception:
722
- # Keep a genuinely partial final line for the next hook run.
723
- ss.buffer = tail
724
- else:
725
- ss.buffer = ""
726
-
727
- return msgs, ss
713
+ except Exception:
714
+ continue
715
+
716
+ tail = tail.strip()
717
+ if tail:
718
+ try:
719
+ msgs.append(json.loads(tail))
720
+ ss.buffer = ""
721
+ except Exception:
722
+ # Keep a genuinely partial final line for the next hook run.
723
+ ss.buffer = tail
724
+ else:
725
+ ss.buffer = ""
726
+
727
+ return msgs, ss
728
728
 
729
729
  # ----------------- Turn assembly -----------------
730
- @dataclass
731
- class Turn:
732
- user_msg: Dict[str, Any]
733
- assistant_msgs: List[Dict[str, Any]]
734
- tool_results_by_id: Dict[str, Any]
735
- context_msgs: List[Dict[str, Any]] = field(default_factory=list)
736
-
737
- def is_skill_context_user_msg(msg: Dict[str, Any]) -> bool:
738
- if get_role(msg) != "user" or is_tool_result(msg):
739
- return False
740
- text = extract_text(get_content(msg)).lstrip()
741
- return text.startswith("Base directory for this skill:")
742
-
743
- def build_turns(messages: List[Dict[str, Any]]) -> List[Turn]:
730
+ @dataclass
731
+ class Turn:
732
+ user_msg: Dict[str, Any]
733
+ assistant_msgs: List[Dict[str, Any]]
734
+ tool_results_by_id: Dict[str, Any]
735
+ context_msgs: List[Dict[str, Any]] = field(default_factory=list)
736
+
737
+ def is_skill_context_user_msg(msg: Dict[str, Any]) -> bool:
738
+ if get_role(msg) != "user" or is_tool_result(msg):
739
+ return False
740
+ text = extract_text(get_content(msg)).lstrip()
741
+ return text.startswith("Base directory for this skill:")
742
+
743
+ def build_turns(messages: List[Dict[str, Any]]) -> List[Turn]:
744
744
  """
745
745
  Groups incremental transcript rows into turns:
746
746
  user (non-tool-result) -> assistant messages -> (tool_result rows, possibly interleaved)
@@ -755,50 +755,50 @@ def build_turns(messages: List[Dict[str, Any]]) -> List[Turn]:
755
755
  assistant_order: List[str] = [] # message ids in order of first appearance (or synthetic)
756
756
  assistant_latest: Dict[str, Dict[str, Any]] = {} # id -> latest msg
757
757
 
758
- tool_results_by_id: Dict[str, Any] = {} # tool_use_id -> content
759
- context_msgs: List[Dict[str, Any]] = []
760
-
761
- def flush_turn():
762
- nonlocal current_user, assistant_order, assistant_latest, tool_results_by_id, context_msgs, turns
763
- if current_user is None:
764
- return
765
- if not assistant_latest:
766
- return
767
- assistants = [assistant_latest[mid] for mid in assistant_order if mid in assistant_latest]
768
- turns.append(Turn(
769
- user_msg=current_user,
770
- assistant_msgs=assistants,
771
- tool_results_by_id=dict(tool_results_by_id),
772
- context_msgs=list(context_msgs),
773
- ))
758
+ tool_results_by_id: Dict[str, Any] = {} # tool_use_id -> content
759
+ context_msgs: List[Dict[str, Any]] = []
760
+
761
+ def flush_turn():
762
+ nonlocal current_user, assistant_order, assistant_latest, tool_results_by_id, context_msgs, turns
763
+ if current_user is None:
764
+ return
765
+ if not assistant_latest:
766
+ return
767
+ assistants = [assistant_latest[mid] for mid in assistant_order if mid in assistant_latest]
768
+ turns.append(Turn(
769
+ user_msg=current_user,
770
+ assistant_msgs=assistants,
771
+ tool_results_by_id=dict(tool_results_by_id),
772
+ context_msgs=list(context_msgs),
773
+ ))
774
774
 
775
775
  for msg in messages:
776
776
  role = get_role(msg)
777
777
 
778
778
  # tool_result rows show up as role=user with content blocks of type tool_result
779
- if is_tool_result(msg):
780
- for tr in iter_tool_results(get_content(msg)):
781
- tid = tr.get("tool_use_id")
782
- if tid:
783
- tool_results_by_id[str(tid)] = tr.get("content")
784
- continue
785
-
786
- if is_skill_context_user_msg(msg):
787
- if current_user is not None:
788
- context_msgs.append(msg)
789
- continue
790
-
791
- if role == "user":
792
- # new user message -> finalize previous turn
793
- flush_turn()
779
+ if is_tool_result(msg):
780
+ for tr in iter_tool_results(get_content(msg)):
781
+ tid = tr.get("tool_use_id")
782
+ if tid:
783
+ tool_results_by_id[str(tid)] = tr.get("content")
784
+ continue
785
+
786
+ if is_skill_context_user_msg(msg):
787
+ if current_user is not None:
788
+ context_msgs.append(msg)
789
+ continue
790
+
791
+ if role == "user":
792
+ # new user message -> finalize previous turn
793
+ flush_turn()
794
794
 
795
795
  # start a new turn
796
796
  current_user = msg
797
- assistant_order = []
798
- assistant_latest = {}
799
- tool_results_by_id = {}
800
- context_msgs = []
801
- continue
797
+ assistant_order = []
798
+ assistant_latest = {}
799
+ tool_results_by_id = {}
800
+ context_msgs = []
801
+ continue
802
802
 
803
803
  if role == "assistant":
804
804
  if current_user is None:
@@ -846,31 +846,31 @@ def emit_turn(
846
846
  assistant_text, assistant_text_meta = truncate_text(assistant_text_raw)
847
847
 
848
848
  model = get_model(turn.assistant_msgs[0])
849
- usage_details = get_usage(last_assistant)
850
-
851
- tool_calls = _tool_calls_from_assistants(turn.assistant_msgs)
852
- skill_usages = detect_turn_skill_usages(turn, tool_calls, discover_known_skills())
853
- interaction_id = build_interaction_id("claude", session_id, turn_num)
854
- skill_use_events = build_skill_use_events(interaction_id, skill_usages)
855
- interaction_meta = build_interaction_metadata(
856
- "claude",
857
- user_id,
858
- session_id,
859
- turn_num,
860
- usage_details,
861
- len(tool_calls),
862
- len(turn.tool_results_by_id),
863
- len(skill_use_events),
864
- model,
865
- user_message_count=1,
866
- assistant_message_count=len(turn.assistant_msgs),
867
- skill_use_events=skill_use_events,
868
- )
869
- skill_summary = summarize_skill_usages(skill_usages)
870
-
871
- # attach tool outputs
872
- for c in tool_calls:
873
- if c["id"] and c["id"] in turn.tool_results_by_id:
849
+ usage_details = get_usage(last_assistant)
850
+
851
+ tool_calls = _tool_calls_from_assistants(turn.assistant_msgs)
852
+ skill_usages = detect_turn_skill_usages(turn, tool_calls, discover_known_skills())
853
+ interaction_id = build_interaction_id("claude", session_id, turn_num)
854
+ skill_use_events = build_skill_use_events(interaction_id, skill_usages)
855
+ interaction_meta = build_interaction_metadata(
856
+ "claude",
857
+ user_id,
858
+ session_id,
859
+ turn_num,
860
+ usage_details,
861
+ len(tool_calls),
862
+ len(turn.tool_results_by_id),
863
+ len(skill_use_events),
864
+ model,
865
+ user_message_count=1,
866
+ assistant_message_count=len(turn.assistant_msgs),
867
+ skill_use_events=skill_use_events,
868
+ )
869
+ skill_summary = summarize_skill_usages(skill_usages)
870
+
871
+ # attach tool outputs
872
+ for c in tool_calls:
873
+ if c["id"] and c["id"] in turn.tool_results_by_id:
874
874
  out_raw = turn.tool_results_by_id[c["id"]]
875
875
  out_str = out_raw if isinstance(out_raw, str) else json.dumps(out_raw, ensure_ascii=False)
876
876
  out_trunc, out_meta = truncate_text(out_str)
@@ -882,49 +882,49 @@ def emit_turn(
882
882
  with propagate_attributes(
883
883
  user_id=user_id,
884
884
  session_id=session_id,
885
- trace_name="Agent Turn",
886
- tags=[AGENT_NAME],
885
+ trace_name="Agent Turn",
886
+ tags=[AGENT_NAME],
887
887
  ):
888
- with langfuse.start_as_current_observation(
889
- name="Agent Turn",
890
- input={"role": "user", "content": user_text},
891
- output={"role": "assistant", "content": assistant_text},
892
- metadata={
893
- **interaction_meta,
894
- "source": AGENT_NAME,
895
- "agent": AGENT_NAME,
896
- "session_id": session_id,
897
- "turn_number": turn_num,
898
- "transcript_path": str(transcript_path),
899
- "user_text": user_text_meta,
900
- "skills": skill_summary,
901
- },
902
- ) as trace_span:
903
- # LLM generation
904
- with langfuse.start_as_current_observation(
905
- name="Agent Response",
888
+ with langfuse.start_as_current_observation(
889
+ name="Agent Turn",
890
+ input={"role": "user", "content": user_text},
891
+ output={"role": "assistant", "content": assistant_text},
892
+ metadata={
893
+ **interaction_meta,
894
+ "source": AGENT_NAME,
895
+ "agent": AGENT_NAME,
896
+ "session_id": session_id,
897
+ "turn_number": turn_num,
898
+ "transcript_path": str(transcript_path),
899
+ "user_text": user_text_meta,
900
+ "skills": skill_summary,
901
+ },
902
+ ) as trace_span:
903
+ # LLM generation
904
+ with langfuse.start_as_current_observation(
905
+ name="Agent Response",
906
906
  as_type="generation",
907
907
  model=model,
908
908
  input={"role": "user", "content": user_text},
909
909
  output={"role": "assistant", "content": assistant_text},
910
910
  usage_details=usage_details or None,
911
911
  metadata={
912
- "assistant_text": assistant_text_meta,
913
- "tool_count": len(tool_calls),
914
- "usage_details": usage_details,
915
- "source": AGENT_NAME,
916
- "agent": AGENT_NAME,
917
- "user_id": user_id or "",
918
- "session_id": session_id,
919
- "interaction_id": interaction_meta["interaction_id"],
920
- "turn_number": turn_num,
921
- },
922
- ):
923
- pass
924
-
925
- # Tool observations
926
- for tc in tool_calls:
927
- in_obj = tc["input"]
912
+ "assistant_text": assistant_text_meta,
913
+ "tool_count": len(tool_calls),
914
+ "usage_details": usage_details,
915
+ "source": AGENT_NAME,
916
+ "agent": AGENT_NAME,
917
+ "user_id": user_id or "",
918
+ "session_id": session_id,
919
+ "interaction_id": interaction_meta["interaction_id"],
920
+ "turn_number": turn_num,
921
+ },
922
+ ):
923
+ pass
924
+
925
+ # Tool observations
926
+ for tc in tool_calls:
927
+ in_obj = tc["input"]
928
928
  # truncate tool input if it's a large string payload
929
929
  if isinstance(in_obj, str):
930
930
  in_obj, in_meta = truncate_text(in_obj)
@@ -932,24 +932,24 @@ def emit_turn(
932
932
  in_meta = None
933
933
 
934
934
  with langfuse.start_as_current_observation(
935
- name="Tool Call",
935
+ name="Tool Call",
936
936
  as_type="tool",
937
937
  input=in_obj,
938
- metadata={
939
- "source": AGENT_NAME,
940
- "agent": AGENT_NAME,
941
- "user_id": user_id or "",
942
- "session_id": session_id,
943
- "interaction_id": interaction_meta["interaction_id"],
944
- "tool_name": tc["name"],
945
- "tool_id": tc["id"],
946
- "turn_number": turn_num,
947
- "input_meta": in_meta,
948
- "output_meta": tc.get("output_meta"),
949
- "metrics_schema_version": METRICS_SCHEMA_VERSION,
950
- },
951
- ) as tool_obs:
952
- tool_obs.update(output=tc.get("output"))
938
+ metadata={
939
+ "source": AGENT_NAME,
940
+ "agent": AGENT_NAME,
941
+ "user_id": user_id or "",
942
+ "session_id": session_id,
943
+ "interaction_id": interaction_meta["interaction_id"],
944
+ "tool_name": tc["name"],
945
+ "tool_id": tc["id"],
946
+ "turn_number": turn_num,
947
+ "input_meta": in_meta,
948
+ "output_meta": tc.get("output_meta"),
949
+ "metrics_schema_version": METRICS_SCHEMA_VERSION,
950
+ },
951
+ ) as tool_obs:
952
+ tool_obs.update(output=tc.get("output"))
953
953
 
954
954
  trace_span.update(output={"role": "assistant", "content": assistant_text})
955
955
 
@@ -958,34 +958,34 @@ def main() -> int:
958
958
  start = time.time()
959
959
  debug("Hook started")
960
960
 
961
- if os.environ.get("TRACE_TO_LANGFUSE", "").lower() != "true":
962
- return 0
963
-
964
- public_key = os.environ.get("CC_LANGFUSE_PUBLIC_KEY") or os.environ.get("LANGFUSE_PUBLIC_KEY")
965
- secret_key = os.environ.get("CC_LANGFUSE_SECRET_KEY") or os.environ.get("LANGFUSE_SECRET_KEY")
966
- host = os.environ.get("CC_LANGFUSE_BASE_URL") or os.environ.get("LANGFUSE_BASEURL") or "https://cloud.langfuse.com"
967
-
968
- if not public_key or not secret_key:
969
- warn("Missing Langfuse public/secret key in hook environment; exiting.")
970
- return 0
971
-
972
- payload = read_hook_payload()
973
- session_id, transcript_path, user_id = extract_session_transcript_and_user(payload)
974
-
975
- if not session_id or not transcript_path:
976
- # No structured payload; fail open (do not guess)
977
- warn("Missing session_id or transcript_path from hook payload; exiting.")
978
- return 0
979
-
980
- if not transcript_path.exists():
981
- warn(f"Transcript path does not exist: {transcript_path}")
982
- return 0
983
-
984
- try:
985
- langfuse = Langfuse(public_key=public_key, secret_key=secret_key, host=host)
986
- except Exception as e:
987
- warn(f"Langfuse init failed: {e}")
988
- return 0
961
+ if os.environ.get("TRACE_TO_LANGFUSE", "").lower() != "true":
962
+ return 0
963
+
964
+ public_key = os.environ.get("CC_LANGFUSE_PUBLIC_KEY") or os.environ.get("LANGFUSE_PUBLIC_KEY")
965
+ secret_key = os.environ.get("CC_LANGFUSE_SECRET_KEY") or os.environ.get("LANGFUSE_SECRET_KEY")
966
+ host = os.environ.get("CC_LANGFUSE_BASE_URL") or os.environ.get("LANGFUSE_BASEURL") or "https://cloud.langfuse.com"
967
+
968
+ if not public_key or not secret_key:
969
+ warn("Missing Langfuse public/secret key in hook environment; exiting.")
970
+ return 0
971
+
972
+ payload = read_hook_payload()
973
+ session_id, transcript_path, user_id = extract_session_transcript_and_user(payload)
974
+
975
+ if not session_id or not transcript_path:
976
+ # No structured payload; fail open (do not guess)
977
+ warn("Missing session_id or transcript_path from hook payload; exiting.")
978
+ return 0
979
+
980
+ if not transcript_path.exists():
981
+ warn(f"Transcript path does not exist: {transcript_path}")
982
+ return 0
983
+
984
+ try:
985
+ langfuse = Langfuse(public_key=public_key, secret_key=secret_key, host=host)
986
+ except Exception as e:
987
+ warn(f"Langfuse init failed: {e}")
988
+ return 0
989
989
 
990
990
  try:
991
991
  with FileLock(LOCK_FILE):
@@ -1010,21 +1010,21 @@ def main() -> int:
1010
1010
  for t in turns:
1011
1011
  emitted += 1
1012
1012
  turn_num = ss.turn_count + emitted
1013
- try:
1014
- emit_turn(langfuse, session_id, user_id, turn_num, t, transcript_path)
1015
- except Exception as e:
1016
- warn(f"emit_turn failed: {e}")
1017
- # continue emitting other turns
1013
+ try:
1014
+ emit_turn(langfuse, session_id, user_id, turn_num, t, transcript_path)
1015
+ except Exception as e:
1016
+ warn(f"emit_turn failed: {e}")
1017
+ # continue emitting other turns
1018
1018
 
1019
1019
  ss.turn_count += emitted
1020
1020
  write_session_state(state, key, ss)
1021
1021
  save_state(state)
1022
1022
 
1023
- try:
1024
- langfuse.flush()
1025
- except Exception as e:
1026
- warn(f"Langfuse flush failed: {e}")
1027
- pass
1023
+ try:
1024
+ langfuse.flush()
1025
+ except Exception as e:
1026
+ warn(f"Langfuse flush failed: {e}")
1027
+ pass
1028
1028
 
1029
1029
  dur = time.time() - start
1030
1030
  info(f"Processed {emitted} turns in {dur:.2f}s (session={session_id})")