oh-langfuse 0.1.25 → 0.1.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +44 -10
- package/codex_langfuse_notify.py +283 -64
- package/langfuse_hook.py +247 -46
- package/package.json +10 -5
- package/scripts/metrics-utils.mjs +126 -0
- package/scripts/opencode-langfuse-setup.mjs +233 -45
- package/scripts/real-self-verify.mjs +148 -8
- package/scripts/update-langfuse-runtime.mjs +178 -0
- package/scripts/update-utils.mjs +20 -0
package/bin/cli.js
CHANGED
|
@@ -33,6 +33,12 @@ function createPromptInterface(options) {
|
|
|
33
33
|
question(query) {
|
|
34
34
|
return new Promise((resolve) => rl.question(query, resolve));
|
|
35
35
|
},
|
|
36
|
+
pause() {
|
|
37
|
+
rl.pause();
|
|
38
|
+
},
|
|
39
|
+
resume() {
|
|
40
|
+
rl.resume();
|
|
41
|
+
},
|
|
36
42
|
close() {
|
|
37
43
|
rl.close();
|
|
38
44
|
},
|
|
@@ -712,15 +718,27 @@ async function interactiveMain(options) {
|
|
|
712
718
|
rl,
|
|
713
719
|
"What would you like to configure?",
|
|
714
720
|
[
|
|
715
|
-
{ label: "Setup Langfuse", value: "setup-langfuse", description: "Select one or more targets: Claude Code, OpenCode, Codex." },
|
|
716
|
-
{ label: "
|
|
717
|
-
{ label: "Check
|
|
718
|
-
{ label: "
|
|
721
|
+
{ label: "Setup Langfuse", value: "setup-langfuse", description: "Select one or more targets: Claude Code, OpenCode, Codex." },
|
|
722
|
+
{ label: "Update Installed Runtimes", value: "update", description: "Refresh installed Claude, OpenCode, and Codex Langfuse hooks/plugins." },
|
|
723
|
+
{ label: "Check Environment", value: "check-environment", description: "Verify required local tools before setup." },
|
|
724
|
+
{ label: "Check Configuration", value: "check", description: "Inspect current setup without changing local files." },
|
|
725
|
+
{ label: "Exit", value: "exit", description: "Close the setup console." }
|
|
719
726
|
],
|
|
720
727
|
options
|
|
721
728
|
);
|
|
722
|
-
|
|
723
|
-
if (action === "setup-langfuse") return await setupLangfuseMenu(rl, options);
|
|
729
|
+
|
|
730
|
+
if (action === "setup-langfuse") return await setupLangfuseMenu(rl, options);
|
|
731
|
+
if (action === "update") {
|
|
732
|
+
const config = langfuseConfig(options.configOverrides);
|
|
733
|
+
return runNodeScript("update-langfuse-runtime.mjs", [
|
|
734
|
+
"all",
|
|
735
|
+
...commonLangfuseArgs(config),
|
|
736
|
+
...(hasValue(options.npmRegistry) ? [`--npmRegistry=${options.npmRegistry}`] : []),
|
|
737
|
+
...(hasValue(options.pipIndexUrl) ? [`--pipIndexUrl=${options.pipIndexUrl}`] : []),
|
|
738
|
+
...(options.skipCheck ? ["--skip-check"] : []),
|
|
739
|
+
...(options.skipPluginInstall ? ["--skip-plugin-install"] : []),
|
|
740
|
+
], options);
|
|
741
|
+
}
|
|
724
742
|
if (action === "setup-claude") return await setupClaude(rl, options);
|
|
725
743
|
if (action === "setup-opencode") return await setupOpenCode(rl, options);
|
|
726
744
|
if (action === "setup-codex") return await setupCodex(rl, options);
|
|
@@ -758,7 +776,7 @@ async function setupLangfuseMenu(rl, options) {
|
|
|
758
776
|
return code;
|
|
759
777
|
}
|
|
760
778
|
|
|
761
|
-
function printHelp() {
|
|
779
|
+
function printHelp() {
|
|
762
780
|
renderBrand({ dryRun: false });
|
|
763
781
|
console.log("");
|
|
764
782
|
renderSection("Usage", [
|
|
@@ -773,7 +791,12 @@ function printHelp() {
|
|
|
773
791
|
"oh-langfuse check environment",
|
|
774
792
|
"oh-langfuse check claude",
|
|
775
793
|
"oh-langfuse check opencode",
|
|
776
|
-
"oh-langfuse check codex"
|
|
794
|
+
"oh-langfuse check codex",
|
|
795
|
+
"oh-langfuse update",
|
|
796
|
+
"oh-langfuse update all",
|
|
797
|
+
"oh-langfuse update claude",
|
|
798
|
+
"oh-langfuse update opencode",
|
|
799
|
+
"oh-langfuse update codex"
|
|
777
800
|
]);
|
|
778
801
|
renderSection("Options", [
|
|
779
802
|
`${paint("--dry-run", t.gold)} Preview actions without writing files or installing packages.`,
|
|
@@ -821,8 +844,19 @@ async function main() {
|
|
|
821
844
|
if (cmd === "setup" && target === "claude") return await setupClaude(rl, options);
|
|
822
845
|
if (cmd === "setup" && target === "opencode") return await setupOpenCode(rl, options);
|
|
823
846
|
if (cmd === "setup" && target === "codex") return await setupCodex(rl, options);
|
|
824
|
-
if (cmd === "setup") return await setupLangfuseMenu(rl, options);
|
|
825
|
-
if (cmd === "
|
|
847
|
+
if (cmd === "setup") return await setupLangfuseMenu(rl, options);
|
|
848
|
+
if (cmd === "update") {
|
|
849
|
+
const updateArgs = [
|
|
850
|
+
target || "all",
|
|
851
|
+
...commonLangfuseArgs(langfuseConfig(options.configOverrides)),
|
|
852
|
+
...(hasValue(options.npmRegistry) ? [`--npmRegistry=${options.npmRegistry}`] : []),
|
|
853
|
+
...(hasValue(options.pipIndexUrl) ? [`--pipIndexUrl=${options.pipIndexUrl}`] : []),
|
|
854
|
+
...(options.skipCheck ? ["--skip-check"] : []),
|
|
855
|
+
...(options.skipPluginInstall ? ["--skip-plugin-install"] : []),
|
|
856
|
+
];
|
|
857
|
+
return runNodeScript("update-langfuse-runtime.mjs", updateArgs, options);
|
|
858
|
+
}
|
|
859
|
+
if (cmd === "check" && target === "claude") return checkClaude(options);
|
|
826
860
|
if (cmd === "check" && target === "opencode") return checkOpenCode(options);
|
|
827
861
|
if (cmd === "check" && target === "codex") return checkCodex(options);
|
|
828
862
|
if (cmd === "check" && target === "environment") {
|
package/codex_langfuse_notify.py
CHANGED
|
@@ -7,11 +7,12 @@ uses that signal to incrementally read the matching Codex session JSONL file and
|
|
|
7
7
|
emit the new assistant/user/tool events to Langfuse.
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
|
-
import json
|
|
11
|
-
import os
|
|
12
|
-
import
|
|
13
|
-
import
|
|
14
|
-
import
|
|
10
|
+
import json
|
|
11
|
+
import os
|
|
12
|
+
import re
|
|
13
|
+
import sys
|
|
14
|
+
import time
|
|
15
|
+
import hashlib
|
|
15
16
|
from dataclasses import dataclass
|
|
16
17
|
from datetime import datetime, timezone
|
|
17
18
|
from pathlib import Path
|
|
@@ -30,8 +31,9 @@ STATE_FILE = STATE_DIR / "state.json"
|
|
|
30
31
|
LOCK_FILE = STATE_DIR / "state.lock"
|
|
31
32
|
LOG_FILE = STATE_DIR / "codex_langfuse_notify.log"
|
|
32
33
|
|
|
33
|
-
DEBUG = os.environ.get("CODEX_LANGFUSE_DEBUG", "").lower() == "true"
|
|
34
|
-
MAX_CHARS = int(os.environ.get("CODEX_LANGFUSE_MAX_CHARS", "20000"))
|
|
34
|
+
DEBUG = os.environ.get("CODEX_LANGFUSE_DEBUG", "").lower() == "true"
|
|
35
|
+
MAX_CHARS = int(os.environ.get("CODEX_LANGFUSE_MAX_CHARS", "20000"))
|
|
36
|
+
METRICS_SCHEMA_VERSION = "1.0"
|
|
35
37
|
|
|
36
38
|
|
|
37
39
|
def log(level: str, message: str) -> None:
|
|
@@ -290,7 +292,7 @@ def extract_text(content: Any) -> str:
|
|
|
290
292
|
return ""
|
|
291
293
|
|
|
292
294
|
|
|
293
|
-
def truncate(value: Any, max_chars: int = MAX_CHARS) -> Tuple[Any, Dict[str, Any]]:
|
|
295
|
+
def truncate(value: Any, max_chars: int = MAX_CHARS) -> Tuple[Any, Dict[str, Any]]:
|
|
294
296
|
if not isinstance(value, str):
|
|
295
297
|
try:
|
|
296
298
|
text = json.dumps(value, ensure_ascii=False)
|
|
@@ -303,12 +305,163 @@ def truncate(value: Any, max_chars: int = MAX_CHARS) -> Tuple[Any, Dict[str, Any
|
|
|
303
305
|
if orig_len <= max_chars:
|
|
304
306
|
return value if isinstance(value, str) else value, {"truncated": False, "orig_len": orig_len}
|
|
305
307
|
kept = text[:max_chars]
|
|
306
|
-
return kept, {
|
|
307
|
-
"truncated": True,
|
|
308
|
-
"orig_len": orig_len,
|
|
309
|
-
"kept_len": len(kept),
|
|
310
|
-
"sha256": hashlib.sha256(text.encode("utf-8")).hexdigest(),
|
|
311
|
-
}
|
|
308
|
+
return kept, {
|
|
309
|
+
"truncated": True,
|
|
310
|
+
"orig_len": orig_len,
|
|
311
|
+
"kept_len": len(kept),
|
|
312
|
+
"sha256": hashlib.sha256(text.encode("utf-8")).hexdigest(),
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def build_interaction_id(source: str, session_id: str, turn_number: int) -> str:
|
|
317
|
+
return f"{source or 'unknown'}:{session_id or 'unknown'}:{int(turn_number or 0)}"
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def _num_or_none(value: Any) -> Optional[int]:
|
|
321
|
+
if isinstance(value, bool):
|
|
322
|
+
return None
|
|
323
|
+
if isinstance(value, int) and value >= 0:
|
|
324
|
+
return value
|
|
325
|
+
if isinstance(value, float) and value >= 0:
|
|
326
|
+
return int(value)
|
|
327
|
+
if isinstance(value, str):
|
|
328
|
+
try:
|
|
329
|
+
n = int(value)
|
|
330
|
+
return n if n >= 0 else None
|
|
331
|
+
except Exception:
|
|
332
|
+
return None
|
|
333
|
+
return None
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
def _first_num(raw: Dict[str, Any], *keys: str) -> Optional[int]:
|
|
337
|
+
for key in keys:
|
|
338
|
+
if key in raw:
|
|
339
|
+
value = _num_or_none(raw.get(key))
|
|
340
|
+
if value is not None:
|
|
341
|
+
return value
|
|
342
|
+
return None
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def normalize_token_metrics(raw: Optional[Dict[str, Any]]) -> Dict[str, Any]:
|
|
346
|
+
if not isinstance(raw, dict) or not raw:
|
|
347
|
+
return {
|
|
348
|
+
"token_metrics_available": False,
|
|
349
|
+
"input_tokens": None,
|
|
350
|
+
"output_tokens": None,
|
|
351
|
+
"total_tokens": None,
|
|
352
|
+
"cache_read_tokens": None,
|
|
353
|
+
"reasoning_tokens": None,
|
|
354
|
+
}
|
|
355
|
+
input_tokens = _first_num(raw, "input", "input_tokens", "inputTokens")
|
|
356
|
+
output_tokens = _first_num(raw, "output", "output_tokens", "outputTokens")
|
|
357
|
+
total_tokens = _first_num(raw, "total", "total_tokens", "totalTokens")
|
|
358
|
+
if total_tokens is None and input_tokens is not None and output_tokens is not None:
|
|
359
|
+
total_tokens = input_tokens + output_tokens
|
|
360
|
+
cache_read_tokens = _first_num(raw, "cache_read_tokens", "cachedInputTokens", "cacheRead")
|
|
361
|
+
reasoning_tokens = _first_num(raw, "reasoning_tokens", "reasoningTokens", "reasoning")
|
|
362
|
+
available = any(v is not None for v in [input_tokens, output_tokens, total_tokens, cache_read_tokens, reasoning_tokens])
|
|
363
|
+
return {
|
|
364
|
+
"token_metrics_available": available,
|
|
365
|
+
"input_tokens": input_tokens if available else None,
|
|
366
|
+
"output_tokens": output_tokens if available else None,
|
|
367
|
+
"total_tokens": total_tokens if available else None,
|
|
368
|
+
"cache_read_tokens": cache_read_tokens if available else None,
|
|
369
|
+
"reasoning_tokens": reasoning_tokens if available else None,
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def _ratio(numerator: Optional[int], denominator: Optional[int]) -> Optional[float]:
|
|
374
|
+
if numerator is None or denominator in (None, 0):
|
|
375
|
+
return None
|
|
376
|
+
return numerator / denominator
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def build_interaction_metadata(
|
|
380
|
+
source: str,
|
|
381
|
+
user_id: Optional[str],
|
|
382
|
+
session_id: str,
|
|
383
|
+
turn_number: int,
|
|
384
|
+
token_metrics: Optional[Dict[str, Any]],
|
|
385
|
+
tool_call_count: int,
|
|
386
|
+
tool_result_count: int,
|
|
387
|
+
skill_use_count: int,
|
|
388
|
+
model: Optional[str],
|
|
389
|
+
user_message_count: int = 1,
|
|
390
|
+
assistant_message_count: int = 1,
|
|
391
|
+
) -> Dict[str, Any]:
|
|
392
|
+
tokens = normalize_token_metrics(token_metrics)
|
|
393
|
+
return {
|
|
394
|
+
"source": source,
|
|
395
|
+
"user_id": user_id or "",
|
|
396
|
+
"session_id": session_id,
|
|
397
|
+
"interaction_id": build_interaction_id(source, session_id, turn_number),
|
|
398
|
+
"metrics_schema_version": METRICS_SCHEMA_VERSION,
|
|
399
|
+
"interaction_count": 1,
|
|
400
|
+
"user_message_count": user_message_count,
|
|
401
|
+
"assistant_message_count": assistant_message_count,
|
|
402
|
+
"tool_call_count": int(tool_call_count or 0),
|
|
403
|
+
"tool_result_count": int(tool_result_count or 0),
|
|
404
|
+
"skill_use_count": int(skill_use_count or 0),
|
|
405
|
+
**tokens,
|
|
406
|
+
"model": model,
|
|
407
|
+
"turn_number": int(turn_number or 0),
|
|
408
|
+
"efficiency": {
|
|
409
|
+
"tokens_per_interaction": tokens.get("total_tokens"),
|
|
410
|
+
"tool_calls_per_interaction": int(tool_call_count or 0),
|
|
411
|
+
"skills_per_interaction": int(skill_use_count or 0),
|
|
412
|
+
"output_input_token_ratio": _ratio(tokens.get("output_tokens"), tokens.get("input_tokens")),
|
|
413
|
+
"tokens_per_tool_call": _ratio(tokens.get("total_tokens"), int(tool_call_count or 0)),
|
|
414
|
+
},
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
def discover_known_skills(extra_roots: Optional[List[Path]] = None) -> set:
|
|
419
|
+
roots = [
|
|
420
|
+
CODEX_DIR / "skills",
|
|
421
|
+
Path.home() / ".claude" / "skills",
|
|
422
|
+
Path.home() / ".config" / "opencode" / "skill",
|
|
423
|
+
]
|
|
424
|
+
if extra_roots:
|
|
425
|
+
roots.extend(extra_roots)
|
|
426
|
+
names = set()
|
|
427
|
+
for root in roots:
|
|
428
|
+
try:
|
|
429
|
+
if not root.exists():
|
|
430
|
+
continue
|
|
431
|
+
for skill_file in root.rglob("SKILL.md"):
|
|
432
|
+
names.add(skill_file.parent.name)
|
|
433
|
+
except Exception:
|
|
434
|
+
continue
|
|
435
|
+
return names
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
def _skill_namespace(name: str) -> str:
|
|
439
|
+
return name.split(":", 1)[0] if ":" in name else ""
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
def detect_skill_usages(tool_calls: List[Dict[str, Any]], known_skills: set) -> List[Dict[str, str]]:
|
|
443
|
+
found: Dict[str, str] = {}
|
|
444
|
+
for call in tool_calls or []:
|
|
445
|
+
tool_name = str(call.get("name") or "")
|
|
446
|
+
input_obj = call.get("input") if isinstance(call.get("input"), (dict, list, str)) else {}
|
|
447
|
+
if tool_name.lower() == "skill" and isinstance(input_obj, dict):
|
|
448
|
+
for key in ("skill_name", "skill", "name"):
|
|
449
|
+
value = input_obj.get(key)
|
|
450
|
+
if isinstance(value, str) and value.strip():
|
|
451
|
+
found[value.strip()] = "tool_call"
|
|
452
|
+
break
|
|
453
|
+
try:
|
|
454
|
+
text = json.dumps(input_obj, ensure_ascii=False)
|
|
455
|
+
except Exception:
|
|
456
|
+
text = str(input_obj)
|
|
457
|
+
for match in re.finditer(r"([A-Za-z]:)?[^\"'\n\r]*[\\/]+([^\\/\"'\n\r]+)[\\/]+SKILL\.md", text, re.IGNORECASE):
|
|
458
|
+
candidate = match.group(2)
|
|
459
|
+
if candidate and (candidate in known_skills or not known_skills):
|
|
460
|
+
found[candidate] = "skill_file_path"
|
|
461
|
+
return [
|
|
462
|
+
{"name": name, "skill_namespace": _skill_namespace(name), "detected_by": detected_by}
|
|
463
|
+
for name, detected_by in sorted(found.items())
|
|
464
|
+
]
|
|
312
465
|
|
|
313
466
|
|
|
314
467
|
def get_payload(row: Dict[str, Any]) -> Dict[str, Any]:
|
|
@@ -428,67 +581,133 @@ def emit_codex_turn(
|
|
|
428
581
|
) -> None:
|
|
429
582
|
user_text, user_meta = truncate(material.get("user_text") or "")
|
|
430
583
|
assistant_text, assistant_meta = truncate(material.get("assistant_text") or "")
|
|
431
|
-
usage_details = usage_details_from_codex(usage)
|
|
432
|
-
model = first_string(meta.get("model"), meta.get("model_provider")) or "codex"
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
584
|
+
usage_details = usage_details_from_codex(usage)
|
|
585
|
+
model = first_string(meta.get("model"), meta.get("model_provider")) or "codex"
|
|
586
|
+
tool_calls = material.get("tool_calls") or []
|
|
587
|
+
tool_results = material.get("tool_results") or []
|
|
588
|
+
skill_usages = detect_skill_usages(tool_calls, discover_known_skills())
|
|
589
|
+
interaction_meta = build_interaction_metadata(
|
|
590
|
+
"codex",
|
|
591
|
+
user_id,
|
|
592
|
+
session_id,
|
|
593
|
+
turn_num,
|
|
594
|
+
usage_details,
|
|
595
|
+
len(tool_calls),
|
|
596
|
+
len(tool_results),
|
|
597
|
+
len(skill_usages),
|
|
598
|
+
model,
|
|
599
|
+
user_message_count=1 if material.get("user_text") else 0,
|
|
600
|
+
assistant_message_count=1 if material.get("assistant_text") else 0,
|
|
601
|
+
)
|
|
602
|
+
skill_summary = [
|
|
603
|
+
{"name": item["name"], "count": 1, "detected_by": item["detected_by"]}
|
|
604
|
+
for item in skill_usages
|
|
605
|
+
]
|
|
606
|
+
|
|
607
|
+
with propagate_attributes(
|
|
608
|
+
user_id=user_id,
|
|
436
609
|
session_id=session_id,
|
|
437
610
|
trace_name=f"Codex - Turn {turn_num}",
|
|
438
611
|
tags=["codex"],
|
|
439
612
|
):
|
|
440
613
|
with langfuse.start_as_current_observation(
|
|
441
614
|
name=f"Codex - Turn {turn_num}",
|
|
442
|
-
input={"role": "user", "content": user_text},
|
|
443
|
-
metadata={
|
|
444
|
-
|
|
445
|
-
"
|
|
446
|
-
"
|
|
447
|
-
"
|
|
615
|
+
input={"role": "user", "content": user_text},
|
|
616
|
+
metadata={
|
|
617
|
+
**interaction_meta,
|
|
618
|
+
"source": "codex",
|
|
619
|
+
"session_id": session_id,
|
|
620
|
+
"turn_number": turn_num,
|
|
621
|
+
"session_path": str(session_path),
|
|
448
622
|
"cwd": meta.get("cwd"),
|
|
449
623
|
"originator": meta.get("originator"),
|
|
450
|
-
"cli_version": meta.get("cli_version"),
|
|
451
|
-
"user_text": user_meta,
|
|
452
|
-
"usage": usage,
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
624
|
+
"cli_version": meta.get("cli_version"),
|
|
625
|
+
"user_text": user_meta,
|
|
626
|
+
"usage": usage,
|
|
627
|
+
"skills": skill_summary,
|
|
628
|
+
},
|
|
629
|
+
) as trace_span:
|
|
630
|
+
with langfuse.start_as_current_observation(
|
|
631
|
+
name="AI Interaction",
|
|
632
|
+
input={"role": "user", "content": user_text},
|
|
633
|
+
output={"role": "assistant", "content": assistant_text},
|
|
634
|
+
metadata=interaction_meta,
|
|
635
|
+
):
|
|
636
|
+
pass
|
|
637
|
+
|
|
638
|
+
with langfuse.start_as_current_observation(
|
|
639
|
+
name="Codex Response",
|
|
640
|
+
as_type="generation",
|
|
458
641
|
model=model,
|
|
459
642
|
input={"role": "user", "content": user_text},
|
|
460
|
-
output={"role": "assistant", "content": assistant_text},
|
|
461
|
-
usage_details=usage_details or None,
|
|
462
|
-
metadata={
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
643
|
+
output={"role": "assistant", "content": assistant_text},
|
|
644
|
+
usage_details=usage_details or None,
|
|
645
|
+
metadata={
|
|
646
|
+
"assistant_text": assistant_meta,
|
|
647
|
+
"source": "codex",
|
|
648
|
+
"user_id": user_id or "",
|
|
649
|
+
"session_id": session_id,
|
|
650
|
+
"interaction_id": interaction_meta["interaction_id"],
|
|
651
|
+
"turn_number": turn_num,
|
|
652
|
+
},
|
|
653
|
+
):
|
|
654
|
+
pass
|
|
655
|
+
|
|
656
|
+
for skill in skill_usages:
|
|
657
|
+
with langfuse.start_as_current_observation(
|
|
658
|
+
name=f"Skill Use: {skill['name']}",
|
|
659
|
+
metadata={
|
|
660
|
+
"source": "codex",
|
|
661
|
+
"user_id": user_id or "",
|
|
662
|
+
"session_id": session_id,
|
|
663
|
+
"interaction_id": interaction_meta["interaction_id"],
|
|
664
|
+
"skill_name": skill["name"],
|
|
665
|
+
"skill_namespace": skill["skill_namespace"],
|
|
666
|
+
"detected_by": skill["detected_by"],
|
|
667
|
+
"turn_number": turn_num,
|
|
668
|
+
"metrics_schema_version": METRICS_SCHEMA_VERSION,
|
|
669
|
+
},
|
|
670
|
+
):
|
|
671
|
+
pass
|
|
672
|
+
|
|
673
|
+
for call in tool_calls:
|
|
674
|
+
tool_input, input_meta = truncate(call.get("input"))
|
|
675
|
+
with langfuse.start_as_current_observation(
|
|
676
|
+
name=f"Tool: {call.get('name') or 'tool'}",
|
|
484
677
|
as_type="tool",
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
"
|
|
488
|
-
"
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
678
|
+
input=tool_input,
|
|
679
|
+
metadata={
|
|
680
|
+
"source": "codex",
|
|
681
|
+
"user_id": user_id or "",
|
|
682
|
+
"session_id": session_id,
|
|
683
|
+
"interaction_id": interaction_meta["interaction_id"],
|
|
684
|
+
"tool_id": call.get("id"),
|
|
685
|
+
"tool_name": call.get("name"),
|
|
686
|
+
"turn_number": turn_num,
|
|
687
|
+
"input_meta": input_meta,
|
|
688
|
+
"metrics_schema_version": METRICS_SCHEMA_VERSION,
|
|
689
|
+
},
|
|
690
|
+
):
|
|
691
|
+
pass
|
|
692
|
+
|
|
693
|
+
for result in tool_results:
|
|
694
|
+
output, output_meta = truncate(result.get("output"))
|
|
695
|
+
with langfuse.start_as_current_observation(
|
|
696
|
+
name=f"Tool Result: {result.get('name') or 'tool'}",
|
|
697
|
+
as_type="tool",
|
|
698
|
+
metadata={
|
|
699
|
+
"source": "codex",
|
|
700
|
+
"user_id": user_id or "",
|
|
701
|
+
"session_id": session_id,
|
|
702
|
+
"interaction_id": interaction_meta["interaction_id"],
|
|
703
|
+
"tool_id": result.get("id"),
|
|
704
|
+
"tool_name": result.get("name"),
|
|
705
|
+
"turn_number": turn_num,
|
|
706
|
+
"output_meta": output_meta,
|
|
707
|
+
"metrics_schema_version": METRICS_SCHEMA_VERSION,
|
|
708
|
+
},
|
|
709
|
+
) as tool_obs:
|
|
710
|
+
tool_obs.update(output=output)
|
|
492
711
|
|
|
493
712
|
trace_span.update(output={"role": "assistant", "content": assistant_text})
|
|
494
713
|
|