oh-langfuse 0.1.25 → 0.1.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +38 -10
- package/codex_langfuse_notify.py +283 -64
- package/langfuse_hook.py +247 -46
- package/package.json +10 -5
- package/scripts/metrics-utils.mjs +126 -0
- package/scripts/opencode-langfuse-setup.mjs +233 -45
- package/scripts/real-self-verify.mjs +148 -8
- package/scripts/update-langfuse-runtime.mjs +178 -0
- package/scripts/update-utils.mjs +20 -0
package/bin/cli.js
CHANGED
|
@@ -712,15 +712,27 @@ async function interactiveMain(options) {
|
|
|
712
712
|
rl,
|
|
713
713
|
"What would you like to configure?",
|
|
714
714
|
[
|
|
715
|
-
{ label: "Setup Langfuse", value: "setup-langfuse", description: "Select one or more targets: Claude Code, OpenCode, Codex." },
|
|
716
|
-
{ label: "
|
|
717
|
-
{ label: "Check
|
|
718
|
-
{ label: "
|
|
715
|
+
{ label: "Setup Langfuse", value: "setup-langfuse", description: "Select one or more targets: Claude Code, OpenCode, Codex." },
|
|
716
|
+
{ label: "Update Installed Runtimes", value: "update", description: "Refresh installed Claude, OpenCode, and Codex Langfuse hooks/plugins." },
|
|
717
|
+
{ label: "Check Environment", value: "check-environment", description: "Verify required local tools before setup." },
|
|
718
|
+
{ label: "Check Configuration", value: "check", description: "Inspect current setup without changing local files." },
|
|
719
|
+
{ label: "Exit", value: "exit", description: "Close the setup console." }
|
|
719
720
|
],
|
|
720
721
|
options
|
|
721
722
|
);
|
|
722
|
-
|
|
723
|
-
if (action === "setup-langfuse") return await setupLangfuseMenu(rl, options);
|
|
723
|
+
|
|
724
|
+
if (action === "setup-langfuse") return await setupLangfuseMenu(rl, options);
|
|
725
|
+
if (action === "update") {
|
|
726
|
+
const config = langfuseConfig(options.configOverrides);
|
|
727
|
+
return runNodeScript("update-langfuse-runtime.mjs", [
|
|
728
|
+
"all",
|
|
729
|
+
...commonLangfuseArgs(config),
|
|
730
|
+
...(hasValue(options.npmRegistry) ? [`--npmRegistry=${options.npmRegistry}`] : []),
|
|
731
|
+
...(hasValue(options.pipIndexUrl) ? [`--pipIndexUrl=${options.pipIndexUrl}`] : []),
|
|
732
|
+
...(options.skipCheck ? ["--skip-check"] : []),
|
|
733
|
+
...(options.skipPluginInstall ? ["--skip-plugin-install"] : []),
|
|
734
|
+
], options);
|
|
735
|
+
}
|
|
724
736
|
if (action === "setup-claude") return await setupClaude(rl, options);
|
|
725
737
|
if (action === "setup-opencode") return await setupOpenCode(rl, options);
|
|
726
738
|
if (action === "setup-codex") return await setupCodex(rl, options);
|
|
@@ -758,7 +770,7 @@ async function setupLangfuseMenu(rl, options) {
|
|
|
758
770
|
return code;
|
|
759
771
|
}
|
|
760
772
|
|
|
761
|
-
function printHelp() {
|
|
773
|
+
function printHelp() {
|
|
762
774
|
renderBrand({ dryRun: false });
|
|
763
775
|
console.log("");
|
|
764
776
|
renderSection("Usage", [
|
|
@@ -773,7 +785,12 @@ function printHelp() {
|
|
|
773
785
|
"oh-langfuse check environment",
|
|
774
786
|
"oh-langfuse check claude",
|
|
775
787
|
"oh-langfuse check opencode",
|
|
776
|
-
"oh-langfuse check codex"
|
|
788
|
+
"oh-langfuse check codex",
|
|
789
|
+
"oh-langfuse update",
|
|
790
|
+
"oh-langfuse update all",
|
|
791
|
+
"oh-langfuse update claude",
|
|
792
|
+
"oh-langfuse update opencode",
|
|
793
|
+
"oh-langfuse update codex"
|
|
777
794
|
]);
|
|
778
795
|
renderSection("Options", [
|
|
779
796
|
`${paint("--dry-run", t.gold)} Preview actions without writing files or installing packages.`,
|
|
@@ -821,8 +838,19 @@ async function main() {
|
|
|
821
838
|
if (cmd === "setup" && target === "claude") return await setupClaude(rl, options);
|
|
822
839
|
if (cmd === "setup" && target === "opencode") return await setupOpenCode(rl, options);
|
|
823
840
|
if (cmd === "setup" && target === "codex") return await setupCodex(rl, options);
|
|
824
|
-
if (cmd === "setup") return await setupLangfuseMenu(rl, options);
|
|
825
|
-
if (cmd === "
|
|
841
|
+
if (cmd === "setup") return await setupLangfuseMenu(rl, options);
|
|
842
|
+
if (cmd === "update") {
|
|
843
|
+
const updateArgs = [
|
|
844
|
+
target || "all",
|
|
845
|
+
...commonLangfuseArgs(langfuseConfig(options.configOverrides)),
|
|
846
|
+
...(hasValue(options.npmRegistry) ? [`--npmRegistry=${options.npmRegistry}`] : []),
|
|
847
|
+
...(hasValue(options.pipIndexUrl) ? [`--pipIndexUrl=${options.pipIndexUrl}`] : []),
|
|
848
|
+
...(options.skipCheck ? ["--skip-check"] : []),
|
|
849
|
+
...(options.skipPluginInstall ? ["--skip-plugin-install"] : []),
|
|
850
|
+
];
|
|
851
|
+
return runNodeScript("update-langfuse-runtime.mjs", updateArgs, options);
|
|
852
|
+
}
|
|
853
|
+
if (cmd === "check" && target === "claude") return checkClaude(options);
|
|
826
854
|
if (cmd === "check" && target === "opencode") return checkOpenCode(options);
|
|
827
855
|
if (cmd === "check" && target === "codex") return checkCodex(options);
|
|
828
856
|
if (cmd === "check" && target === "environment") {
|
package/codex_langfuse_notify.py
CHANGED
|
@@ -7,11 +7,12 @@ uses that signal to incrementally read the matching Codex session JSONL file and
|
|
|
7
7
|
emit the new assistant/user/tool events to Langfuse.
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
|
-
import json
|
|
11
|
-
import os
|
|
12
|
-
import
|
|
13
|
-
import
|
|
14
|
-
import
|
|
10
|
+
import json
|
|
11
|
+
import os
|
|
12
|
+
import re
|
|
13
|
+
import sys
|
|
14
|
+
import time
|
|
15
|
+
import hashlib
|
|
15
16
|
from dataclasses import dataclass
|
|
16
17
|
from datetime import datetime, timezone
|
|
17
18
|
from pathlib import Path
|
|
@@ -30,8 +31,9 @@ STATE_FILE = STATE_DIR / "state.json"
|
|
|
30
31
|
LOCK_FILE = STATE_DIR / "state.lock"
|
|
31
32
|
LOG_FILE = STATE_DIR / "codex_langfuse_notify.log"
|
|
32
33
|
|
|
33
|
-
DEBUG = os.environ.get("CODEX_LANGFUSE_DEBUG", "").lower() == "true"
|
|
34
|
-
MAX_CHARS = int(os.environ.get("CODEX_LANGFUSE_MAX_CHARS", "20000"))
|
|
34
|
+
DEBUG = os.environ.get("CODEX_LANGFUSE_DEBUG", "").lower() == "true"
|
|
35
|
+
MAX_CHARS = int(os.environ.get("CODEX_LANGFUSE_MAX_CHARS", "20000"))
|
|
36
|
+
METRICS_SCHEMA_VERSION = "1.0"
|
|
35
37
|
|
|
36
38
|
|
|
37
39
|
def log(level: str, message: str) -> None:
|
|
@@ -290,7 +292,7 @@ def extract_text(content: Any) -> str:
|
|
|
290
292
|
return ""
|
|
291
293
|
|
|
292
294
|
|
|
293
|
-
def truncate(value: Any, max_chars: int = MAX_CHARS) -> Tuple[Any, Dict[str, Any]]:
|
|
295
|
+
def truncate(value: Any, max_chars: int = MAX_CHARS) -> Tuple[Any, Dict[str, Any]]:
|
|
294
296
|
if not isinstance(value, str):
|
|
295
297
|
try:
|
|
296
298
|
text = json.dumps(value, ensure_ascii=False)
|
|
@@ -303,12 +305,163 @@ def truncate(value: Any, max_chars: int = MAX_CHARS) -> Tuple[Any, Dict[str, Any
|
|
|
303
305
|
if orig_len <= max_chars:
|
|
304
306
|
return value if isinstance(value, str) else value, {"truncated": False, "orig_len": orig_len}
|
|
305
307
|
kept = text[:max_chars]
|
|
306
|
-
return kept, {
|
|
307
|
-
"truncated": True,
|
|
308
|
-
"orig_len": orig_len,
|
|
309
|
-
"kept_len": len(kept),
|
|
310
|
-
"sha256": hashlib.sha256(text.encode("utf-8")).hexdigest(),
|
|
311
|
-
}
|
|
308
|
+
return kept, {
|
|
309
|
+
"truncated": True,
|
|
310
|
+
"orig_len": orig_len,
|
|
311
|
+
"kept_len": len(kept),
|
|
312
|
+
"sha256": hashlib.sha256(text.encode("utf-8")).hexdigest(),
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def build_interaction_id(source: str, session_id: str, turn_number: int) -> str:
|
|
317
|
+
return f"{source or 'unknown'}:{session_id or 'unknown'}:{int(turn_number or 0)}"
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def _num_or_none(value: Any) -> Optional[int]:
|
|
321
|
+
if isinstance(value, bool):
|
|
322
|
+
return None
|
|
323
|
+
if isinstance(value, int) and value >= 0:
|
|
324
|
+
return value
|
|
325
|
+
if isinstance(value, float) and value >= 0:
|
|
326
|
+
return int(value)
|
|
327
|
+
if isinstance(value, str):
|
|
328
|
+
try:
|
|
329
|
+
n = int(value)
|
|
330
|
+
return n if n >= 0 else None
|
|
331
|
+
except Exception:
|
|
332
|
+
return None
|
|
333
|
+
return None
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
def _first_num(raw: Dict[str, Any], *keys: str) -> Optional[int]:
|
|
337
|
+
for key in keys:
|
|
338
|
+
if key in raw:
|
|
339
|
+
value = _num_or_none(raw.get(key))
|
|
340
|
+
if value is not None:
|
|
341
|
+
return value
|
|
342
|
+
return None
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def normalize_token_metrics(raw: Optional[Dict[str, Any]]) -> Dict[str, Any]:
|
|
346
|
+
if not isinstance(raw, dict) or not raw:
|
|
347
|
+
return {
|
|
348
|
+
"token_metrics_available": False,
|
|
349
|
+
"input_tokens": None,
|
|
350
|
+
"output_tokens": None,
|
|
351
|
+
"total_tokens": None,
|
|
352
|
+
"cache_read_tokens": None,
|
|
353
|
+
"reasoning_tokens": None,
|
|
354
|
+
}
|
|
355
|
+
input_tokens = _first_num(raw, "input", "input_tokens", "inputTokens")
|
|
356
|
+
output_tokens = _first_num(raw, "output", "output_tokens", "outputTokens")
|
|
357
|
+
total_tokens = _first_num(raw, "total", "total_tokens", "totalTokens")
|
|
358
|
+
if total_tokens is None and input_tokens is not None and output_tokens is not None:
|
|
359
|
+
total_tokens = input_tokens + output_tokens
|
|
360
|
+
cache_read_tokens = _first_num(raw, "cache_read_tokens", "cachedInputTokens", "cacheRead")
|
|
361
|
+
reasoning_tokens = _first_num(raw, "reasoning_tokens", "reasoningTokens", "reasoning")
|
|
362
|
+
available = any(v is not None for v in [input_tokens, output_tokens, total_tokens, cache_read_tokens, reasoning_tokens])
|
|
363
|
+
return {
|
|
364
|
+
"token_metrics_available": available,
|
|
365
|
+
"input_tokens": input_tokens if available else None,
|
|
366
|
+
"output_tokens": output_tokens if available else None,
|
|
367
|
+
"total_tokens": total_tokens if available else None,
|
|
368
|
+
"cache_read_tokens": cache_read_tokens if available else None,
|
|
369
|
+
"reasoning_tokens": reasoning_tokens if available else None,
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def _ratio(numerator: Optional[int], denominator: Optional[int]) -> Optional[float]:
|
|
374
|
+
if numerator is None or denominator in (None, 0):
|
|
375
|
+
return None
|
|
376
|
+
return numerator / denominator
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def build_interaction_metadata(
|
|
380
|
+
source: str,
|
|
381
|
+
user_id: Optional[str],
|
|
382
|
+
session_id: str,
|
|
383
|
+
turn_number: int,
|
|
384
|
+
token_metrics: Optional[Dict[str, Any]],
|
|
385
|
+
tool_call_count: int,
|
|
386
|
+
tool_result_count: int,
|
|
387
|
+
skill_use_count: int,
|
|
388
|
+
model: Optional[str],
|
|
389
|
+
user_message_count: int = 1,
|
|
390
|
+
assistant_message_count: int = 1,
|
|
391
|
+
) -> Dict[str, Any]:
|
|
392
|
+
tokens = normalize_token_metrics(token_metrics)
|
|
393
|
+
return {
|
|
394
|
+
"source": source,
|
|
395
|
+
"user_id": user_id or "",
|
|
396
|
+
"session_id": session_id,
|
|
397
|
+
"interaction_id": build_interaction_id(source, session_id, turn_number),
|
|
398
|
+
"metrics_schema_version": METRICS_SCHEMA_VERSION,
|
|
399
|
+
"interaction_count": 1,
|
|
400
|
+
"user_message_count": user_message_count,
|
|
401
|
+
"assistant_message_count": assistant_message_count,
|
|
402
|
+
"tool_call_count": int(tool_call_count or 0),
|
|
403
|
+
"tool_result_count": int(tool_result_count or 0),
|
|
404
|
+
"skill_use_count": int(skill_use_count or 0),
|
|
405
|
+
**tokens,
|
|
406
|
+
"model": model,
|
|
407
|
+
"turn_number": int(turn_number or 0),
|
|
408
|
+
"efficiency": {
|
|
409
|
+
"tokens_per_interaction": tokens.get("total_tokens"),
|
|
410
|
+
"tool_calls_per_interaction": int(tool_call_count or 0),
|
|
411
|
+
"skills_per_interaction": int(skill_use_count or 0),
|
|
412
|
+
"output_input_token_ratio": _ratio(tokens.get("output_tokens"), tokens.get("input_tokens")),
|
|
413
|
+
"tokens_per_tool_call": _ratio(tokens.get("total_tokens"), int(tool_call_count or 0)),
|
|
414
|
+
},
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
def discover_known_skills(extra_roots: Optional[List[Path]] = None) -> set:
|
|
419
|
+
roots = [
|
|
420
|
+
CODEX_DIR / "skills",
|
|
421
|
+
Path.home() / ".claude" / "skills",
|
|
422
|
+
Path.home() / ".config" / "opencode" / "skill",
|
|
423
|
+
]
|
|
424
|
+
if extra_roots:
|
|
425
|
+
roots.extend(extra_roots)
|
|
426
|
+
names = set()
|
|
427
|
+
for root in roots:
|
|
428
|
+
try:
|
|
429
|
+
if not root.exists():
|
|
430
|
+
continue
|
|
431
|
+
for skill_file in root.rglob("SKILL.md"):
|
|
432
|
+
names.add(skill_file.parent.name)
|
|
433
|
+
except Exception:
|
|
434
|
+
continue
|
|
435
|
+
return names
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
def _skill_namespace(name: str) -> str:
|
|
439
|
+
return name.split(":", 1)[0] if ":" in name else ""
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
def detect_skill_usages(tool_calls: List[Dict[str, Any]], known_skills: set) -> List[Dict[str, str]]:
|
|
443
|
+
found: Dict[str, str] = {}
|
|
444
|
+
for call in tool_calls or []:
|
|
445
|
+
tool_name = str(call.get("name") or "")
|
|
446
|
+
input_obj = call.get("input") if isinstance(call.get("input"), (dict, list, str)) else {}
|
|
447
|
+
if tool_name.lower() == "skill" and isinstance(input_obj, dict):
|
|
448
|
+
for key in ("skill_name", "skill", "name"):
|
|
449
|
+
value = input_obj.get(key)
|
|
450
|
+
if isinstance(value, str) and value.strip():
|
|
451
|
+
found[value.strip()] = "tool_call"
|
|
452
|
+
break
|
|
453
|
+
try:
|
|
454
|
+
text = json.dumps(input_obj, ensure_ascii=False)
|
|
455
|
+
except Exception:
|
|
456
|
+
text = str(input_obj)
|
|
457
|
+
for match in re.finditer(r"([A-Za-z]:)?[^\"'\n\r]*[\\/]+([^\\/\"'\n\r]+)[\\/]+SKILL\.md", text, re.IGNORECASE):
|
|
458
|
+
candidate = match.group(2)
|
|
459
|
+
if candidate and (candidate in known_skills or not known_skills):
|
|
460
|
+
found[candidate] = "skill_file_path"
|
|
461
|
+
return [
|
|
462
|
+
{"name": name, "skill_namespace": _skill_namespace(name), "detected_by": detected_by}
|
|
463
|
+
for name, detected_by in sorted(found.items())
|
|
464
|
+
]
|
|
312
465
|
|
|
313
466
|
|
|
314
467
|
def get_payload(row: Dict[str, Any]) -> Dict[str, Any]:
|
|
@@ -428,67 +581,133 @@ def emit_codex_turn(
|
|
|
428
581
|
) -> None:
|
|
429
582
|
user_text, user_meta = truncate(material.get("user_text") or "")
|
|
430
583
|
assistant_text, assistant_meta = truncate(material.get("assistant_text") or "")
|
|
431
|
-
usage_details = usage_details_from_codex(usage)
|
|
432
|
-
model = first_string(meta.get("model"), meta.get("model_provider")) or "codex"
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
584
|
+
usage_details = usage_details_from_codex(usage)
|
|
585
|
+
model = first_string(meta.get("model"), meta.get("model_provider")) or "codex"
|
|
586
|
+
tool_calls = material.get("tool_calls") or []
|
|
587
|
+
tool_results = material.get("tool_results") or []
|
|
588
|
+
skill_usages = detect_skill_usages(tool_calls, discover_known_skills())
|
|
589
|
+
interaction_meta = build_interaction_metadata(
|
|
590
|
+
"codex",
|
|
591
|
+
user_id,
|
|
592
|
+
session_id,
|
|
593
|
+
turn_num,
|
|
594
|
+
usage_details,
|
|
595
|
+
len(tool_calls),
|
|
596
|
+
len(tool_results),
|
|
597
|
+
len(skill_usages),
|
|
598
|
+
model,
|
|
599
|
+
user_message_count=1 if material.get("user_text") else 0,
|
|
600
|
+
assistant_message_count=1 if material.get("assistant_text") else 0,
|
|
601
|
+
)
|
|
602
|
+
skill_summary = [
|
|
603
|
+
{"name": item["name"], "count": 1, "detected_by": item["detected_by"]}
|
|
604
|
+
for item in skill_usages
|
|
605
|
+
]
|
|
606
|
+
|
|
607
|
+
with propagate_attributes(
|
|
608
|
+
user_id=user_id,
|
|
436
609
|
session_id=session_id,
|
|
437
610
|
trace_name=f"Codex - Turn {turn_num}",
|
|
438
611
|
tags=["codex"],
|
|
439
612
|
):
|
|
440
613
|
with langfuse.start_as_current_observation(
|
|
441
614
|
name=f"Codex - Turn {turn_num}",
|
|
442
|
-
input={"role": "user", "content": user_text},
|
|
443
|
-
metadata={
|
|
444
|
-
|
|
445
|
-
"
|
|
446
|
-
"
|
|
447
|
-
"
|
|
615
|
+
input={"role": "user", "content": user_text},
|
|
616
|
+
metadata={
|
|
617
|
+
**interaction_meta,
|
|
618
|
+
"source": "codex",
|
|
619
|
+
"session_id": session_id,
|
|
620
|
+
"turn_number": turn_num,
|
|
621
|
+
"session_path": str(session_path),
|
|
448
622
|
"cwd": meta.get("cwd"),
|
|
449
623
|
"originator": meta.get("originator"),
|
|
450
|
-
"cli_version": meta.get("cli_version"),
|
|
451
|
-
"user_text": user_meta,
|
|
452
|
-
"usage": usage,
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
624
|
+
"cli_version": meta.get("cli_version"),
|
|
625
|
+
"user_text": user_meta,
|
|
626
|
+
"usage": usage,
|
|
627
|
+
"skills": skill_summary,
|
|
628
|
+
},
|
|
629
|
+
) as trace_span:
|
|
630
|
+
with langfuse.start_as_current_observation(
|
|
631
|
+
name="AI Interaction",
|
|
632
|
+
input={"role": "user", "content": user_text},
|
|
633
|
+
output={"role": "assistant", "content": assistant_text},
|
|
634
|
+
metadata=interaction_meta,
|
|
635
|
+
):
|
|
636
|
+
pass
|
|
637
|
+
|
|
638
|
+
with langfuse.start_as_current_observation(
|
|
639
|
+
name="Codex Response",
|
|
640
|
+
as_type="generation",
|
|
458
641
|
model=model,
|
|
459
642
|
input={"role": "user", "content": user_text},
|
|
460
|
-
output={"role": "assistant", "content": assistant_text},
|
|
461
|
-
usage_details=usage_details or None,
|
|
462
|
-
metadata={
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
643
|
+
output={"role": "assistant", "content": assistant_text},
|
|
644
|
+
usage_details=usage_details or None,
|
|
645
|
+
metadata={
|
|
646
|
+
"assistant_text": assistant_meta,
|
|
647
|
+
"source": "codex",
|
|
648
|
+
"user_id": user_id or "",
|
|
649
|
+
"session_id": session_id,
|
|
650
|
+
"interaction_id": interaction_meta["interaction_id"],
|
|
651
|
+
"turn_number": turn_num,
|
|
652
|
+
},
|
|
653
|
+
):
|
|
654
|
+
pass
|
|
655
|
+
|
|
656
|
+
for skill in skill_usages:
|
|
657
|
+
with langfuse.start_as_current_observation(
|
|
658
|
+
name=f"Skill Use: {skill['name']}",
|
|
659
|
+
metadata={
|
|
660
|
+
"source": "codex",
|
|
661
|
+
"user_id": user_id or "",
|
|
662
|
+
"session_id": session_id,
|
|
663
|
+
"interaction_id": interaction_meta["interaction_id"],
|
|
664
|
+
"skill_name": skill["name"],
|
|
665
|
+
"skill_namespace": skill["skill_namespace"],
|
|
666
|
+
"detected_by": skill["detected_by"],
|
|
667
|
+
"turn_number": turn_num,
|
|
668
|
+
"metrics_schema_version": METRICS_SCHEMA_VERSION,
|
|
669
|
+
},
|
|
670
|
+
):
|
|
671
|
+
pass
|
|
672
|
+
|
|
673
|
+
for call in tool_calls:
|
|
674
|
+
tool_input, input_meta = truncate(call.get("input"))
|
|
675
|
+
with langfuse.start_as_current_observation(
|
|
676
|
+
name=f"Tool: {call.get('name') or 'tool'}",
|
|
484
677
|
as_type="tool",
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
"
|
|
488
|
-
"
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
678
|
+
input=tool_input,
|
|
679
|
+
metadata={
|
|
680
|
+
"source": "codex",
|
|
681
|
+
"user_id": user_id or "",
|
|
682
|
+
"session_id": session_id,
|
|
683
|
+
"interaction_id": interaction_meta["interaction_id"],
|
|
684
|
+
"tool_id": call.get("id"),
|
|
685
|
+
"tool_name": call.get("name"),
|
|
686
|
+
"turn_number": turn_num,
|
|
687
|
+
"input_meta": input_meta,
|
|
688
|
+
"metrics_schema_version": METRICS_SCHEMA_VERSION,
|
|
689
|
+
},
|
|
690
|
+
):
|
|
691
|
+
pass
|
|
692
|
+
|
|
693
|
+
for result in tool_results:
|
|
694
|
+
output, output_meta = truncate(result.get("output"))
|
|
695
|
+
with langfuse.start_as_current_observation(
|
|
696
|
+
name=f"Tool Result: {result.get('name') or 'tool'}",
|
|
697
|
+
as_type="tool",
|
|
698
|
+
metadata={
|
|
699
|
+
"source": "codex",
|
|
700
|
+
"user_id": user_id or "",
|
|
701
|
+
"session_id": session_id,
|
|
702
|
+
"interaction_id": interaction_meta["interaction_id"],
|
|
703
|
+
"tool_id": result.get("id"),
|
|
704
|
+
"tool_name": result.get("name"),
|
|
705
|
+
"turn_number": turn_num,
|
|
706
|
+
"output_meta": output_meta,
|
|
707
|
+
"metrics_schema_version": METRICS_SCHEMA_VERSION,
|
|
708
|
+
},
|
|
709
|
+
) as tool_obs:
|
|
710
|
+
tool_obs.update(output=output)
|
|
492
711
|
|
|
493
712
|
trace_span.update(output={"role": "assistant", "content": assistant_text})
|
|
494
713
|
|