agent-apprenticeship 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +217 -0
- package/bin/agent-apprenticeship.js +131 -0
- package/package.json +30 -0
- package/pyproject.toml +23 -0
- package/src/agent_apprenticeship_trace/__init__.py +2 -0
- package/src/agent_apprenticeship_trace/actual_outputs_normalizer.py +240 -0
- package/src/agent_apprenticeship_trace/apprentice_adapters.py +348 -0
- package/src/agent_apprenticeship_trace/artifact_capture.py +23 -0
- package/src/agent_apprenticeship_trace/artifact_previews.py +80 -0
- package/src/agent_apprenticeship_trace/artifact_resolver.py +142 -0
- package/src/agent_apprenticeship_trace/batch_runner.py +116 -0
- package/src/agent_apprenticeship_trace/bundle_exporter.py +254 -0
- package/src/agent_apprenticeship_trace/certification.py +580 -0
- package/src/agent_apprenticeship_trace/cli.py +2979 -0
- package/src/agent_apprenticeship_trace/codex_runner.py +428 -0
- package/src/agent_apprenticeship_trace/command_discovery.py +94 -0
- package/src/agent_apprenticeship_trace/config.py +609 -0
- package/src/agent_apprenticeship_trace/contract_diagnostics.py +69 -0
- package/src/agent_apprenticeship_trace/env.py +46 -0
- package/src/agent_apprenticeship_trace/evaluator.py +64 -0
- package/src/agent_apprenticeship_trace/grader.py +194 -0
- package/src/agent_apprenticeship_trace/integration_status.py +193 -0
- package/src/agent_apprenticeship_trace/io.py +20 -0
- package/src/agent_apprenticeship_trace/learning.py +627 -0
- package/src/agent_apprenticeship_trace/lesson_extractor.py +5 -0
- package/src/agent_apprenticeship_trace/llm_output_normalizer.py +467 -0
- package/src/agent_apprenticeship_trace/loop.py +111 -0
- package/src/agent_apprenticeship_trace/mentor_checkpoints.py +354 -0
- package/src/agent_apprenticeship_trace/openai_structured.py +783 -0
- package/src/agent_apprenticeship_trace/package_exporter.py +303 -0
- package/src/agent_apprenticeship_trace/progress.py +223 -0
- package/src/agent_apprenticeship_trace/public_run.py +1109 -0
- package/src/agent_apprenticeship_trace/public_sanitizer.py +139 -0
- package/src/agent_apprenticeship_trace/recipes.py +129 -0
- package/src/agent_apprenticeship_trace/release_exporter.py +259 -0
- package/src/agent_apprenticeship_trace/revision.py +21 -0
- package/src/agent_apprenticeship_trace/role_runners.py +7 -0
- package/src/agent_apprenticeship_trace/rubric_generation.py +75 -0
- package/src/agent_apprenticeship_trace/schemas.py +273 -0
- package/src/agent_apprenticeship_trace/session_events.py +99 -0
- package/src/agent_apprenticeship_trace/task_intake.py +112 -0
- package/src/agent_apprenticeship_trace/trace_normalizer.py +669 -0
- package/src/agent_apprenticeship_trace/trace_prompt.py +51 -0
- package/src/agent_apprenticeship_trace/training_signals.py +30 -0
- package/src/agent_apprenticeship_trace/validation.py +210 -0
- package/src/agent_apprenticeship_trace/verifier.py +55 -0
|
@@ -0,0 +1,669 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import json, re
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
from .schemas import AgentTrace, ActualOutputs
|
|
7
|
+
from .io import write_json
|
|
8
|
+
|
|
9
|
+
CANONICAL_TOP = {'schema_version','trace_id','collection_id','prior_trace_id','trace_mode','task','task_id','task_family_id','attempt_id','attempt_kind','attempt_status','agent_tools','started_at','ended_at','system_prompt','system_prompt_hash','skills','memory','agent_config','learning','termination_reason','steps','actual_outputs','artifacts','metadata_json'}
|
|
10
|
+
CANONICAL_STEP = {'step','turn','actor','action','operation','tool','execution_mode','parallel_group','observation','input','input_source','output','state_change','reasoning','caused_by','causal_type','causal_note','alternatives_considered','success','step_outcome','error_type','error_message','message_role','feedback_type','feedback_content','started_at','ended_at','retry_of','artifact_refs','metadata_json'}
|
|
11
|
+
VALID_OUTCOMES={'progress','neutral','blocked','failed','corrected','completed'}
|
|
12
|
+
VALID_ACTIONS={'user_message','agent_step','output','error'}
|
|
13
|
+
VALID_OPS={'plan','analyze','search','read','write','edit','execute','verify','download','install','ask_user','answer','select','grade','evaluate','revise','other'}
|
|
14
|
+
VALID_CAUSAL_TYPES={'user_request','follow_up_user_request','answer_to_agent_question','execution_of_plan','dependency_on_tool_result','retry_after_failure','correction_response','approval_response','verification_of_prior_step','dependency_on_multiple_prior_steps','delegation_to_subagent','delegated_work','used_subagent_result','handoff_from_subagent','parallel_work','other'}
|
|
15
|
+
CAUSAL_MAP={'sequential':'execution_of_plan','sequence':'execution_of_plan','previous_step':'execution_of_plan'}
|
|
16
|
+
OP_MAP={
|
|
17
|
+
'inspect_workspace_files':'read','inspect_workspace':'read','read_policy':'read','read_input_csvs':'read','create_artifacts_directory':'execute','create_reconciliation_script':'write','run_initial_reconciliation':'execute','inspect_initial_outputs':'read','generate_reconciliation_artifacts':'execute','inspect_reconciled_payments':'read','validate_json_files':'verify','validate_artifacts':'verify','run':'execute','shell':'execute','bash':'execute','create_file':'write','write_file':'write','modify_file':'edit','read_file':'read','inspect':'read','think':'analyze','reason':'analyze','summarize':'analyze','finalize':'answer','final_response':'answer'}
|
|
18
|
+
READ_OPS={'read','search'}; WRITE_OPS={'write','edit'}; EXEC_OPS={'execute','install','download'}; VERIFY_OPS={'verify'}
|
|
19
|
+
VALID_ATTEMPT_KINDS={'baseline','revised','apprentice_without_lessons','apprentice_with_lessons','other'}
|
|
20
|
+
VALID_TRACE_MODES={'live','retraced','hybrid'}
|
|
21
|
+
VALID_TERMINATION_REASONS={'task_complete','verifier_passed','verifier_failed','max_iterations_reached','agent_blocked','timeout','error_unrecoverable','partial_then_stopped','provider_usage_limit','other'}
|
|
22
|
+
VALID_ATTEMPT_STATUSES={'completed','failed','blocked','fallback','partial'}
|
|
23
|
+
|
|
24
|
+
class TraceNormalizationContext(BaseModel):
|
|
25
|
+
task_id: str
|
|
26
|
+
attempt_id: str
|
|
27
|
+
attempt_kind: str
|
|
28
|
+
task: str
|
|
29
|
+
actual_outputs: ActualOutputs | None = None
|
|
30
|
+
trace_id_prefix: str = 'trace'
|
|
31
|
+
|
|
32
|
+
class TraceNormalizationReport(BaseModel):
|
|
33
|
+
task_id: str
|
|
34
|
+
attempt_id: str
|
|
35
|
+
attempt_kind: str
|
|
36
|
+
raw_trace_ref: str | None = None
|
|
37
|
+
normalized_trace_ref: str | None = None
|
|
38
|
+
canonical_trace_ref: str | None = None
|
|
39
|
+
trace_schema_valid: bool = False
|
|
40
|
+
trace_normalized: bool = False
|
|
41
|
+
trace_lossless: bool = True
|
|
42
|
+
fallback_trace: bool = False
|
|
43
|
+
raw_step_count: int = 0
|
|
44
|
+
normalized_step_count: int = 0
|
|
45
|
+
discarded_step_count: int = 0
|
|
46
|
+
raw_trace_parse_error: bool = False
|
|
47
|
+
trace_normalization_error: bool = False
|
|
48
|
+
trace_normalization_partial: bool = False
|
|
49
|
+
validation_errors: list[str] = Field(default_factory=list)
|
|
50
|
+
warnings: list[str] = Field(default_factory=list)
|
|
51
|
+
field_mappings: dict[str,str] = Field(default_factory=dict)
|
|
52
|
+
input_dict_to_string_count: int = 0
|
|
53
|
+
output_dict_to_string_count: int = 0
|
|
54
|
+
causal_type_repair_count: int = 0
|
|
55
|
+
operation_repair_count: int = 0
|
|
56
|
+
action_repair_count: int = 0
|
|
57
|
+
metadata_json: dict[str, Any] = Field(default_factory=dict)
|
|
58
|
+
|
|
59
|
+
class NormalizationResult(BaseModel):
|
|
60
|
+
normalized_trace: dict[str, Any] | None = None
|
|
61
|
+
report: TraceNormalizationReport
|
|
62
|
+
fallback_required: bool = False
|
|
63
|
+
parse_error: str | None = None
|
|
64
|
+
|
|
65
|
+
def _compact(v: Any, max_len: int=1200) -> str:
|
|
66
|
+
if v is None: return ''
|
|
67
|
+
if isinstance(v, str): return v[:max_len]
|
|
68
|
+
try: s=json.dumps(v, sort_keys=True)
|
|
69
|
+
except Exception: s=str(v)
|
|
70
|
+
return s[:max_len]
|
|
71
|
+
|
|
72
|
+
def _as_list(v: Any) -> list[Any]:
|
|
73
|
+
if v is None: return []
|
|
74
|
+
if isinstance(v, list): return v
|
|
75
|
+
return [v]
|
|
76
|
+
|
|
77
|
+
def _operation(raw_action: str | None, raw_op: str | None, meta: dict[str,Any], report: TraceNormalizationReport | None = None) -> str:
|
|
78
|
+
val=str(raw_op or raw_action or '').strip()
|
|
79
|
+
mapped=OP_MAP.get(val, val if val in VALID_OPS else 'other')
|
|
80
|
+
if val and (val != mapped or val not in VALID_OPS):
|
|
81
|
+
meta['original_operation']=val
|
|
82
|
+
if report is not None:
|
|
83
|
+
report.operation_repair_count += 1
|
|
84
|
+
return mapped if mapped in VALID_OPS else 'other'
|
|
85
|
+
|
|
86
|
+
def _action(raw_action: str | None, op: str, meta: dict[str,Any], report: TraceNormalizationReport | None = None) -> str:
|
|
87
|
+
a=str(raw_action or '').strip()
|
|
88
|
+
low=a.lower()
|
|
89
|
+
if a:
|
|
90
|
+
meta['original_action']=a
|
|
91
|
+
if low in VALID_ACTIONS:
|
|
92
|
+
return low
|
|
93
|
+
if low in {'error','failure','exception'} or (op == 'other' and any(x in low for x in ['error','fail','exception'])):
|
|
94
|
+
canonical='error'
|
|
95
|
+
elif low in {'final_response','finalize'} or op == 'answer':
|
|
96
|
+
canonical='output'
|
|
97
|
+
else:
|
|
98
|
+
canonical='agent_step'
|
|
99
|
+
if a and report is not None:
|
|
100
|
+
report.action_repair_count += 1
|
|
101
|
+
return canonical
|
|
102
|
+
|
|
103
|
+
def _string_or_none(v: Any, meta: dict[str, Any] | None = None, key: str | None = None) -> str | None:
|
|
104
|
+
if v is None:
|
|
105
|
+
return None
|
|
106
|
+
if isinstance(v, str):
|
|
107
|
+
return v
|
|
108
|
+
if meta is not None and key is not None:
|
|
109
|
+
meta[key]=v
|
|
110
|
+
return _compact(v)
|
|
111
|
+
|
|
112
|
+
def _output_to_string(v: Any) -> str | None:
|
|
113
|
+
if v is None:
|
|
114
|
+
return None
|
|
115
|
+
if isinstance(v, str):
|
|
116
|
+
return v
|
|
117
|
+
if isinstance(v, dict):
|
|
118
|
+
parts=[]
|
|
119
|
+
for k in ['exit_code','stdout','stdout_summary','stderr','stderr_summary']:
|
|
120
|
+
if k in v and v[k] not in (None, ''):
|
|
121
|
+
parts.append(f'{k}: {_compact(v[k], 900)}')
|
|
122
|
+
return '\n'.join(parts) if parts else _compact(v)
|
|
123
|
+
return _compact(v)
|
|
124
|
+
|
|
125
|
+
def _normalize_causal_type(v: Any, meta: dict[str, Any], report: TraceNormalizationReport | None = None) -> str | None:
|
|
126
|
+
if v is None:
|
|
127
|
+
return None
|
|
128
|
+
raw=str(v).strip()
|
|
129
|
+
mapped=CAUSAL_MAP.get(raw, raw if raw in VALID_CAUSAL_TYPES else 'other')
|
|
130
|
+
if raw != mapped or raw not in VALID_CAUSAL_TYPES:
|
|
131
|
+
meta['original_causal_type']=v
|
|
132
|
+
if report is not None:
|
|
133
|
+
report.causal_type_repair_count += 1
|
|
134
|
+
return mapped
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _command_tokens(command: str) -> list[str]:
|
|
138
|
+
return [part.strip().lower() for part in command.replace('&&', '\n').replace(';', '\n').split('\n') if part.strip()]
|
|
139
|
+
|
|
140
|
+
def _first_command_word(part: str) -> str:
|
|
141
|
+
words=part.split()
|
|
142
|
+
while words and ('=' in words[0] or words[0] in {'env','time','uv','poetry','pipenv','xargs','sudo'}):
|
|
143
|
+
words=words[1:]
|
|
144
|
+
return words[0] if words else ''
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _has_phrase(text: str | None, phrase: str) -> bool:
|
|
149
|
+
"""Return true when a normalized phrase appears in text.
|
|
150
|
+
|
|
151
|
+
Multi-word phrases use substring matching. Single-word phrases require
|
|
152
|
+
token/word boundaries so generic language like "next move" does not
|
|
153
|
+
accidentally become a file-write operation.
|
|
154
|
+
"""
|
|
155
|
+
import re
|
|
156
|
+
|
|
157
|
+
if not text or not phrase:
|
|
158
|
+
return False
|
|
159
|
+
|
|
160
|
+
text_l = str(text).lower()
|
|
161
|
+
phrase_l = str(phrase).lower().strip()
|
|
162
|
+
if not phrase_l:
|
|
163
|
+
return False
|
|
164
|
+
|
|
165
|
+
if " " in phrase_l:
|
|
166
|
+
return phrase_l in text_l
|
|
167
|
+
|
|
168
|
+
return re.search(r"(?<![a-z0-9_])" + re.escape(phrase_l) + r"(?![a-z0-9_])", text_l) is not None
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def _operation_from_phrase(value: str | None) -> str | None:
|
|
172
|
+
"""Infer canonical operation from descriptive action/operation/tool text.
|
|
173
|
+
|
|
174
|
+
Canonical operations are intentionally small:
|
|
175
|
+
read, write, execute, verify, other.
|
|
176
|
+
Return None when intent is unclear so command-level inference can still run.
|
|
177
|
+
"""
|
|
178
|
+
if not value:
|
|
179
|
+
return None
|
|
180
|
+
|
|
181
|
+
low = str(value).lower().replace("_", " ").replace("-", " ").strip()
|
|
182
|
+
if not low or low == "other":
|
|
183
|
+
return None
|
|
184
|
+
|
|
185
|
+
# Avoid mapping generic reasoning language to file operations.
|
|
186
|
+
if "next move" in low or low.startswith(("ponder ", "think ", "reason ")):
|
|
187
|
+
return None
|
|
188
|
+
|
|
189
|
+
if low in {"read", "write", "execute", "verify"}:
|
|
190
|
+
return low
|
|
191
|
+
|
|
192
|
+
verify_markers = (
|
|
193
|
+
"validate", "validation", "verify", "verification", "check", "test",
|
|
194
|
+
"pytest", "compileall", "json tool", "schema", "checksum", "hash",
|
|
195
|
+
"inspect exported", "inspect workbook", "formula scan", "scan workbook",
|
|
196
|
+
"render", "visual qa", "preview", "qa", "review output",
|
|
197
|
+
"spreadsheet validation", "xlsx validation", "csv validation",
|
|
198
|
+
"markdown validation", "artifact validation",
|
|
199
|
+
"attempted spreadsheet library import", "checked local office",
|
|
200
|
+
"package availability", "tool check",
|
|
201
|
+
)
|
|
202
|
+
if any(_has_phrase(low, marker) for marker in verify_markers):
|
|
203
|
+
return "verify"
|
|
204
|
+
|
|
205
|
+
write_markers = (
|
|
206
|
+
"write", "create", "edit", "patch", "apply patch", "file creation",
|
|
207
|
+
"file edit", "write artifact", "create artifact", "edit artifact",
|
|
208
|
+
"mkdir", "copy", "cp ", "mv ", "builder script",
|
|
209
|
+
"write final trace", "persist", "save", "generated artifacts",
|
|
210
|
+
)
|
|
211
|
+
if any(_has_phrase(low, marker) for marker in write_markers):
|
|
212
|
+
return "write"
|
|
213
|
+
|
|
214
|
+
execute_markers = (
|
|
215
|
+
"execute", "run", "rerun", "re run", "executed", "script execution",
|
|
216
|
+
"builder", "python", "python3", "node", "npm", "bash", "sh ",
|
|
217
|
+
"calculation script", "link workspace", "environment setup",
|
|
218
|
+
"symlink", "ln -s",
|
|
219
|
+
)
|
|
220
|
+
if any(_has_phrase(low, marker) for marker in execute_markers):
|
|
221
|
+
return "execute"
|
|
222
|
+
|
|
223
|
+
read_markers = (
|
|
224
|
+
"read", "inspect", "inventory", "list", "listing", "file listing",
|
|
225
|
+
"file inspection", "read context", "inspect task", "inspect inputs",
|
|
226
|
+
"inspect workspace", "load", "loaded", "sed", "cat", "head", "tail",
|
|
227
|
+
"less", "ls", "find", "rg", "grep", "git status", "unzip",
|
|
228
|
+
)
|
|
229
|
+
if any(_has_phrase(low, marker) for marker in read_markers):
|
|
230
|
+
return "read"
|
|
231
|
+
|
|
232
|
+
return None
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _operation_from_command(command: str | None, meta: dict[str, Any], report: TraceNormalizationReport | None = None) -> str | None:
|
|
236
|
+
c=(command or '').strip()
|
|
237
|
+
if not c:
|
|
238
|
+
return None
|
|
239
|
+
parts=_command_tokens(c) or [c.lower()]
|
|
240
|
+
low='\n'.join(parts)
|
|
241
|
+
firsts={_first_command_word(p) for p in parts}
|
|
242
|
+
verify_markers=['json.tool','pytest','compileall','schema','validate','validation','validator',' check','check ','checksum','sha256sum','shasum','md5sum','openpyxl','xlsx validation','csv validation','json validation','markdown validation','test ']
|
|
243
|
+
write_markers=['apply_patch','file edit','edit file','write artifact','write_artifact','create file','file creation','write','create','edit','patch','builder script','write final trace','persist','save']
|
|
244
|
+
execute_markers=['execute','run','rerun','re run','executed','script execution','builder','python','python3','node','npm','bash','sh','calculation script','environment setup','symlink','ln -s']
|
|
245
|
+
read_markers=['read','inspect','inventory','list','listing','file listing','file inspection','read context','inspect task','inspect inputs','inspect workspace','load','loaded','sed','cat','head','tail','less','ls','find','rg','grep','git status','unzip']
|
|
246
|
+
if any(m in low for m in verify_markers) or any(f in {'pytest','py.test'} for f in firsts):
|
|
247
|
+
return 'verify'
|
|
248
|
+
if 'git status' in low:
|
|
249
|
+
return 'read'
|
|
250
|
+
if any(m in low for m in ['apply_patch','file edit','edit file','write artifact','write_artifact','create file','file creation']):
|
|
251
|
+
return 'write'
|
|
252
|
+
if any(f in {'sed','cat','head','tail','less','ls','find','rg','grep','file','pwd'} for f in firsts):
|
|
253
|
+
return 'read'
|
|
254
|
+
if any('unzip -l' in p or 'zipinfo' in p for p in parts) or ' inventory' in low or 'listing' in low:
|
|
255
|
+
return 'read'
|
|
256
|
+
if any(f in {'mkdir','cp','mv','touch','tee'} for f in firsts):
|
|
257
|
+
return 'write'
|
|
258
|
+
if any(f in {'python','python3','node','npm','npx','bash','sh','zsh'} for f in firsts):
|
|
259
|
+
return 'execute'
|
|
260
|
+
if any(_has_phrase(low, m) for m in verify_markers):
|
|
261
|
+
return 'verify'
|
|
262
|
+
if any(_has_phrase(low, m) for m in write_markers):
|
|
263
|
+
return 'write'
|
|
264
|
+
if any(_has_phrase(low, m) for m in read_markers):
|
|
265
|
+
return 'read'
|
|
266
|
+
return None
|
|
267
|
+
|
|
268
|
+
def _operation_from_step_context(raw_action: Any, raw_op: Any, tool: Any, command: Any, meta: dict[str, Any], report: TraceNormalizationReport | None = None) -> str | None:
|
|
269
|
+
values=[raw_op, raw_action, tool, command]
|
|
270
|
+
for value in values:
|
|
271
|
+
op=_operation_from_phrase(str(value) if value is not None else None)
|
|
272
|
+
if op:
|
|
273
|
+
if report is not None:
|
|
274
|
+
report.operation_repair_count += 1
|
|
275
|
+
return op
|
|
276
|
+
return None
|
|
277
|
+
|
|
278
|
+
def infer_attempt_status(trace: dict[str, Any], report: dict[str, Any] | None = None) -> str:
|
|
279
|
+
existing=trace.get('attempt_status')
|
|
280
|
+
if existing in VALID_ATTEMPT_STATUSES:
|
|
281
|
+
return existing
|
|
282
|
+
termination=trace.get('termination_reason')
|
|
283
|
+
md=trace.get('metadata_json') if isinstance(trace.get('metadata_json'), dict) else {}
|
|
284
|
+
report=report or {}
|
|
285
|
+
if termination == 'task_complete':
|
|
286
|
+
return 'completed'
|
|
287
|
+
if termination == 'agent_blocked':
|
|
288
|
+
return 'blocked'
|
|
289
|
+
actual=trace.get('actual_outputs') if isinstance(trace.get('actual_outputs'), dict) else {}
|
|
290
|
+
if termination in {'timeout','error_unrecoverable','provider_usage_limit','verifier_failed'} or actual.get('status') in {'failed','timeout','error'}:
|
|
291
|
+
return 'failed'
|
|
292
|
+
if report.get('fallback_trace') or md.get('fallback_trace') or md.get('fallback_trace_created') or md.get('fallback_reason') or report.get('fallback_reason') or (isinstance(report.get('metadata_json'), dict) and report['metadata_json'].get('fallback_reason')):
|
|
293
|
+
return 'fallback'
|
|
294
|
+
steps=trace.get('steps') or []
|
|
295
|
+
if any(s.get('step_outcome') == 'blocked' for s in steps if isinstance(s, dict)):
|
|
296
|
+
return 'blocked'
|
|
297
|
+
if any(s.get('step_outcome') == 'failed' or s.get('success') is False for s in steps if isinstance(s, dict)):
|
|
298
|
+
return 'failed' if not any(s.get('success') is True for s in steps if isinstance(s, dict)) else 'partial'
|
|
299
|
+
if steps:
|
|
300
|
+
return 'partial'
|
|
301
|
+
return 'failed'
|
|
302
|
+
|
|
303
|
+
def normalize_trace_for_export(trace: dict[str, Any], report: dict[str, Any] | None = None) -> dict[str, Any]:
|
|
304
|
+
out=dict(trace)
|
|
305
|
+
steps=[]
|
|
306
|
+
for s in out.get('steps') or []:
|
|
307
|
+
if not isinstance(s, dict):
|
|
308
|
+
steps.append(s); continue
|
|
309
|
+
ns=dict(s)
|
|
310
|
+
meta=dict(ns.get('metadata_json') or {}) if isinstance(ns.get('metadata_json'), dict) else {}
|
|
311
|
+
command=ns.get('input') or ns.get('command') or meta.get('command')
|
|
312
|
+
op=_operation_from_command(str(command) if command is not None else None, meta)
|
|
313
|
+
if op and (not ns.get('operation') or ns.get('operation') == 'other'):
|
|
314
|
+
ns['operation']=op
|
|
315
|
+
ns['metadata_json']=meta
|
|
316
|
+
steps.append(ns)
|
|
317
|
+
out['steps']=steps
|
|
318
|
+
out['attempt_status']=infer_attempt_status(out, report)
|
|
319
|
+
return out
|
|
320
|
+
|
|
321
|
+
def _tool(command: str | None, raw: dict[str,Any], op: str) -> str | None:
|
|
322
|
+
c=(command or '').strip()
|
|
323
|
+
low=c.lower()
|
|
324
|
+
if low.startswith('python ') or low.startswith('python3 ') or low == 'python' or low == 'python3': return 'python'
|
|
325
|
+
if low.startswith('apply_patch') or low == 'apply_patch': return 'apply_patch'
|
|
326
|
+
if c and any(tok in low for tok in ['sed','cat','ls','rg','find','pwd','mkdir','bash','zsh','sh ','python','touch','cp ','mv ','rm ']): return 'Bash'
|
|
327
|
+
if op in READ_OPS: return 'file_read'
|
|
328
|
+
if op in WRITE_OPS: return 'file_write'
|
|
329
|
+
return raw.get('tool')
|
|
330
|
+
|
|
331
|
+
def _derive_observation(step_num: int, op: str) -> str:
|
|
332
|
+
if step_num == 1: return 'Attempt workspace and task inputs were available.'
|
|
333
|
+
if op in READ_OPS: return 'Input file was available for inspection.'
|
|
334
|
+
if op in EXEC_OPS or op in WRITE_OPS: return 'Prior inputs and task requirements were available.'
|
|
335
|
+
if op in VERIFY_OPS: return 'Generated outputs were available for validation.'
|
|
336
|
+
return 'Prior workflow context was available.'
|
|
337
|
+
|
|
338
|
+
def _normalize_outcome(v: Any, meta: dict[str,Any]) -> str | None:
|
|
339
|
+
if v is None: return None
|
|
340
|
+
low=str(v).lower().strip()
|
|
341
|
+
if low in VALID_OUTCOMES: return low
|
|
342
|
+
meta['original_step_outcome']=v
|
|
343
|
+
if low in {'ok','success','succeeded','done'}: return 'completed'
|
|
344
|
+
if low in {'fail','failure','errored','error'}: return 'failed'
|
|
345
|
+
if low in {'block','blocked'}: return 'blocked'
|
|
346
|
+
return 'neutral'
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def _safe_int(v: Any, default: int, field: str, meta: dict[str, Any], report: TraceNormalizationReport | None = None) -> int:
|
|
350
|
+
try:
|
|
351
|
+
if isinstance(v, bool):
|
|
352
|
+
raise ValueError('boolean is not numeric')
|
|
353
|
+
if v is None or v == '':
|
|
354
|
+
return default
|
|
355
|
+
return int(v)
|
|
356
|
+
except Exception:
|
|
357
|
+
if v is not None:
|
|
358
|
+
meta[f'original_{field}']=v
|
|
359
|
+
warning=f'{field} value {v!r} was not numeric; defaulted to {default}'
|
|
360
|
+
meta.setdefault('normalization_warnings', []).append(warning)
|
|
361
|
+
if report is not None:
|
|
362
|
+
report.warnings.append(warning)
|
|
363
|
+
return default
|
|
364
|
+
|
|
365
|
+
def _normalize_step(raw_step: Any, ordinal: int, used_steps: set[int], report: TraceNormalizationReport) -> dict[str,Any]:
|
|
366
|
+
raw = raw_step if isinstance(raw_step, dict) else {'value': raw_step}
|
|
367
|
+
meta=dict(raw.get('metadata_json') or {}) if isinstance(raw.get('metadata_json'), dict) else {}
|
|
368
|
+
meta['raw_step']=raw
|
|
369
|
+
unknown={k:v for k,v in raw.items() if k not in CANONICAL_STEP and k not in {'step_number','index','command','inputs','outputs','state_changes','artifacts','artifact_paths','decision_summary','stdout_summary','exit_code'}}
|
|
370
|
+
if unknown: meta['original_fields']=unknown
|
|
371
|
+
orig_step=raw.get('step', raw.get('step_number', raw.get('index')))
|
|
372
|
+
step_num=_safe_int(orig_step, ordinal, 'step', meta, report)
|
|
373
|
+
if step_num < 1 or step_num in used_steps:
|
|
374
|
+
meta['original_step_number']=orig_step
|
|
375
|
+
step_num=ordinal
|
|
376
|
+
report.warnings.append(f'step {ordinal} renumbered from {orig_step!r}')
|
|
377
|
+
used_steps.add(step_num)
|
|
378
|
+
raw_action=raw.get('action')
|
|
379
|
+
command=raw.get('command')
|
|
380
|
+
inputs=raw.get('inputs')
|
|
381
|
+
raw_input_probe=raw.get('input')
|
|
382
|
+
commands_list = None
|
|
383
|
+
if isinstance(raw_input_probe, dict):
|
|
384
|
+
commands_list = raw_input_probe.get('commands')
|
|
385
|
+
if commands_list is None and isinstance(inputs, dict):
|
|
386
|
+
commands_list = inputs.get('commands')
|
|
387
|
+
inferred_command = command or (raw_input_probe.get('cmd') if isinstance(raw_input_probe, dict) else None) or (raw_input_probe if isinstance(raw_input_probe, str) else None) or (inputs.get('cmd') if isinstance(inputs, dict) else None) or (inputs if isinstance(inputs, str) else None)
|
|
388
|
+
if inferred_command is None and isinstance(commands_list, list):
|
|
389
|
+
inferred_command = '\n'.join(str(c) for c in commands_list)
|
|
390
|
+
op=_operation(str(raw_action) if raw_action is not None else None, raw.get('operation'), meta, report)
|
|
391
|
+
ctx_op=_operation_from_step_context(raw_action, raw.get('operation'), raw.get('tool'), inferred_command, meta, report)
|
|
392
|
+
cmd_op=_operation_from_command(str(inferred_command) if inferred_command is not None else None, meta, report) or ctx_op
|
|
393
|
+
if cmd_op and (raw.get('operation') is None or op == 'other'):
|
|
394
|
+
if op != cmd_op and report is not None:
|
|
395
|
+
report.operation_repair_count += 1
|
|
396
|
+
op=cmd_op
|
|
397
|
+
action=_action(str(raw_action) if raw_action is not None else None, op, meta, report)
|
|
398
|
+
if action == 'error' and raw.get('success') is True and not raw.get('error_type') and not raw.get('error_message'):
|
|
399
|
+
action='agent_step'
|
|
400
|
+
|
|
401
|
+
raw_input=raw.get('input')
|
|
402
|
+
input_val: str | None
|
|
403
|
+
if command is not None:
|
|
404
|
+
input_val=str(command)
|
|
405
|
+
meta['command']=command
|
|
406
|
+
if inputs is not None: meta['inputs']=inputs
|
|
407
|
+
if isinstance(raw_input, dict): meta.setdefault('input', raw_input); report.input_dict_to_string_count += 1
|
|
408
|
+
elif isinstance(raw_input, dict):
|
|
409
|
+
meta['inputs']=raw_input
|
|
410
|
+
report.input_dict_to_string_count += 1
|
|
411
|
+
input_val=str(raw_input.get('cmd')) if raw_input.get('cmd') is not None else _compact(raw_input)
|
|
412
|
+
elif raw_input is not None:
|
|
413
|
+
input_val=_string_or_none(raw_input, meta, 'input')
|
|
414
|
+
elif inputs is not None:
|
|
415
|
+
meta['inputs']=inputs
|
|
416
|
+
if isinstance(inputs, dict) and inputs.get('cmd') is not None:
|
|
417
|
+
input_val=str(inputs.get('cmd'))
|
|
418
|
+
else:
|
|
419
|
+
input_val=_compact(inputs)
|
|
420
|
+
if isinstance(inputs, dict): report.input_dict_to_string_count += 1
|
|
421
|
+
else:
|
|
422
|
+
input_val=None
|
|
423
|
+
|
|
424
|
+
raw_output=raw.get('output')
|
|
425
|
+
outputs=raw.get('outputs')
|
|
426
|
+
output_val: str | None = None
|
|
427
|
+
if isinstance(raw_output, dict):
|
|
428
|
+
meta['outputs']=raw_output
|
|
429
|
+
output_val=_output_to_string(raw_output)
|
|
430
|
+
report.output_dict_to_string_count += 1
|
|
431
|
+
elif raw_output is not None:
|
|
432
|
+
output_val=_string_or_none(raw_output, meta, 'output')
|
|
433
|
+
if outputs is not None:
|
|
434
|
+
meta['outputs']=outputs
|
|
435
|
+
if isinstance(outputs, dict):
|
|
436
|
+
report.output_dict_to_string_count += 1
|
|
437
|
+
output_val=output_val or _output_to_string(outputs)
|
|
438
|
+
if raw.get('stdout_summary') is not None:
|
|
439
|
+
meta['stdout_summary']=raw.get('stdout_summary')
|
|
440
|
+
output_val=output_val or str(raw.get('stdout_summary'))
|
|
441
|
+
|
|
442
|
+
state_change=_string_or_none(raw.get('state_change'), meta, 'state_change')
|
|
443
|
+
if raw.get('state_changes') is not None:
|
|
444
|
+
sc=raw.get('state_changes'); meta['state_changes']=sc
|
|
445
|
+
if isinstance(sc, list):
|
|
446
|
+
state_change=state_change or ('; '.join(str(x) for x in sc) if sc else None)
|
|
447
|
+
else:
|
|
448
|
+
state_change=state_change or _compact(sc)
|
|
449
|
+
artifact_refs=list(raw.get('artifact_refs') or [])
|
|
450
|
+
if raw.get('artifacts') is not None:
|
|
451
|
+
meta['original_artifacts']=raw.get('artifacts')
|
|
452
|
+
artifact_refs.extend(str(x) for x in _as_list(raw.get('artifacts')))
|
|
453
|
+
if raw.get('artifact_paths') is not None:
|
|
454
|
+
meta['original_artifact_paths']=raw.get('artifact_paths')
|
|
455
|
+
artifact_refs.extend(str(x) for x in _as_list(raw.get('artifact_paths')))
|
|
456
|
+
if raw.get('exit_code') is not None: meta['exit_code']=raw.get('exit_code')
|
|
457
|
+
if raw.get('decision_summary') is not None: meta['decision_summary']=raw.get('decision_summary')
|
|
458
|
+
|
|
459
|
+
observation=_string_or_none(raw.get('observation'), meta, 'observation')
|
|
460
|
+
if observation is None and action != 'user_message':
|
|
461
|
+
observation=_derive_observation(step_num, op); meta['observation_derived']=True
|
|
462
|
+
reasoning=None if action=='user_message' else _string_or_none(raw.get('reasoning') or raw.get('decision_summary'), meta, 'reasoning')
|
|
463
|
+
|
|
464
|
+
caused=raw.get('caused_by')
|
|
465
|
+
warnings=[]
|
|
466
|
+
if caused is None:
|
|
467
|
+
caused_by=None if ordinal == 1 else [ordinal-1]
|
|
468
|
+
causal_type=None if ordinal == 1 else 'execution_of_plan'
|
|
469
|
+
causal_note=None if ordinal == 1 else 'Sequential workflow step following the prior action.'
|
|
470
|
+
else:
|
|
471
|
+
caused_by=[int(x) for x in _as_list(caused) if isinstance(x, int) or str(x).isdigit()]
|
|
472
|
+
valid=[x for x in caused_by if 1 <= x < step_num]
|
|
473
|
+
if len(valid) != len(caused_by): warnings.append('invalid caused_by references removed')
|
|
474
|
+
caused_by=valid or None
|
|
475
|
+
causal_type=_normalize_causal_type(raw.get('causal_type') or ('execution_of_plan' if caused_by else None), meta, report)
|
|
476
|
+
causal_note=_string_or_none(raw.get('causal_note'), meta, 'causal_note')
|
|
477
|
+
if caused is None and causal_type is not None:
|
|
478
|
+
causal_type=_normalize_causal_type(causal_type, meta, report)
|
|
479
|
+
if warnings:
|
|
480
|
+
meta['normalization_warnings']=warnings; report.warnings.extend(warnings)
|
|
481
|
+
|
|
482
|
+
actor=raw.get('actor') or ('user' if action == 'user_message' else 'agent:worker')
|
|
483
|
+
if action == 'user_message': actor='user'
|
|
484
|
+
step={
|
|
485
|
+
'step':step_num,'turn':_safe_int(raw.get('turn'), 1, 'turn', meta, report),'actor':actor,'action':action,
|
|
486
|
+
'operation': None if action=='user_message' else op,'tool': None if action=='user_message' else _tool(command if command is not None else input_val, raw, op),
|
|
487
|
+
'execution_mode':raw.get('execution_mode') if raw.get('execution_mode') in {'serial','parallel'} else None,'parallel_group':_string_or_none(raw.get('parallel_group'), meta, 'parallel_group'),'observation':None if action=='user_message' else observation,
|
|
488
|
+
'input':input_val,'input_source':raw.get('input_source') if isinstance(raw.get('input_source'), dict) else None,'output':None if action=='user_message' else output_val,
|
|
489
|
+
'state_change':state_change,'reasoning':None if action=='user_message' else reasoning,
|
|
490
|
+
'caused_by':caused_by,'causal_type':causal_type,'causal_note':causal_note,'alternatives_considered':_string_or_none(raw.get('alternatives_considered'), meta, 'alternatives_considered'),
|
|
491
|
+
'success':None if action=='user_message' else raw.get('success') if isinstance(raw.get('success'), bool) else None,'step_outcome':None if action=='user_message' else _normalize_outcome(raw.get('step_outcome'), meta),
|
|
492
|
+
'error_type':_string_or_none(raw.get('error_type'), meta, 'error_type'),'error_message':_string_or_none(raw.get('error_message'), meta, 'error_message'),'message_role':raw.get('message_role') if action=='user_message' else None,
|
|
493
|
+
'feedback_type':raw.get('feedback_type') if action=='user_message' else None,'feedback_content':_string_or_none(raw.get('feedback_content'), meta, 'feedback_content') if action=='user_message' else None,
|
|
494
|
+
'started_at':_string_or_none(raw.get('started_at'), meta, 'started_at'),'ended_at':_string_or_none(raw.get('ended_at'), meta, 'ended_at'),'retry_of':raw.get('retry_of') if isinstance(raw.get('retry_of'), int) else None,'artifact_refs':artifact_refs,'metadata_json':meta,
|
|
495
|
+
}
|
|
496
|
+
if action == 'user_message':
|
|
497
|
+
step.update({'operation':None,'tool':None,'execution_mode':None,'observation':None,'reasoning':None,'success':None,'step_outcome':None,'output':None})
|
|
498
|
+
return step
|
|
499
|
+
|
|
500
|
+
def _model_dump(obj: Any) -> dict[str, Any]:
|
|
501
|
+
if obj is None:
|
|
502
|
+
return None
|
|
503
|
+
if hasattr(obj, 'model_dump'):
|
|
504
|
+
try:
|
|
505
|
+
return obj.model_dump(mode='json')
|
|
506
|
+
except TypeError:
|
|
507
|
+
return obj.model_dump()
|
|
508
|
+
return obj
|
|
509
|
+
|
|
510
|
+
def _valid_actual_outputs(raw_actual: Any, context: TraceNormalizationContext, metadata: dict[str, Any]) -> dict[str, Any] | None:
|
|
511
|
+
if context.actual_outputs is not None:
|
|
512
|
+
if raw_actual is not None:
|
|
513
|
+
metadata['original_actual_outputs']=raw_actual
|
|
514
|
+
return _model_dump(context.actual_outputs)
|
|
515
|
+
if isinstance(raw_actual, dict):
|
|
516
|
+
try:
|
|
517
|
+
return ActualOutputs.model_validate(raw_actual).model_dump()
|
|
518
|
+
except Exception:
|
|
519
|
+
metadata['original_actual_outputs']=raw_actual
|
|
520
|
+
return None
|
|
521
|
+
if raw_actual is not None:
|
|
522
|
+
metadata['original_actual_outputs']=raw_actual
|
|
523
|
+
return None
|
|
524
|
+
|
|
525
|
+
def _canonical_attempt_kind(raw: dict[str, Any], context: TraceNormalizationContext, metadata: dict[str, Any]) -> str:
|
|
526
|
+
val=raw.get('attempt_kind') or raw.get('attempt') or context.attempt_kind
|
|
527
|
+
if val in VALID_ATTEMPT_KINDS:
|
|
528
|
+
return val
|
|
529
|
+
if val is not None:
|
|
530
|
+
metadata['original_attempt_kind']=val
|
|
531
|
+
return context.attempt_kind if context.attempt_kind in VALID_ATTEMPT_KINDS else 'other'
|
|
532
|
+
|
|
533
|
+
def _canonical_trace_mode(raw: dict[str, Any], metadata: dict[str, Any]) -> str:
|
|
534
|
+
val=raw.get('trace_mode') or 'live'
|
|
535
|
+
if val in VALID_TRACE_MODES:
|
|
536
|
+
return val
|
|
537
|
+
metadata['original_trace_mode']=val
|
|
538
|
+
return 'live'
|
|
539
|
+
|
|
540
|
+
def _canonical_termination_reason(raw: dict[str, Any], success_like: bool, metadata: dict[str, Any]) -> str:
|
|
541
|
+
val=raw.get('termination_reason')
|
|
542
|
+
if val in VALID_TERMINATION_REASONS:
|
|
543
|
+
return val
|
|
544
|
+
if val is not None:
|
|
545
|
+
metadata['original_termination_reason']=val
|
|
546
|
+
return 'task_complete' if success_like else 'other'
|
|
547
|
+
|
|
548
|
+
def _canonical_agent_tools(raw: dict[str, Any], metadata: dict[str, Any]) -> list[str]:
|
|
549
|
+
val=raw.get('agent_tools')
|
|
550
|
+
if isinstance(val, list) and all(isinstance(x, str) for x in val):
|
|
551
|
+
return val
|
|
552
|
+
if val is not None:
|
|
553
|
+
metadata['original_agent_tools']=val
|
|
554
|
+
return ['codex_cli','Bash','python','file_read','file_write','apply_patch']
|
|
555
|
+
|
|
556
|
+
def _canonical_artifacts(raw: dict[str, Any], metadata: dict[str, Any]) -> list[dict[str, Any]]:
|
|
557
|
+
val=raw.get('artifacts')
|
|
558
|
+
if not val:
|
|
559
|
+
return []
|
|
560
|
+
metadata['original_artifacts']=val
|
|
561
|
+
return []
|
|
562
|
+
|
|
563
|
+
def _extract_raw_steps(raw: dict[str, Any]) -> tuple[list[Any], str | None]:
|
|
564
|
+
for key in ['steps','trace_steps','trace','records','events','actions']:
|
|
565
|
+
val=raw.get(key)
|
|
566
|
+
if isinstance(val, list):
|
|
567
|
+
return val, key
|
|
568
|
+
if isinstance(val, dict):
|
|
569
|
+
nested=val.get('steps') or val.get('events') or val.get('actions')
|
|
570
|
+
if isinstance(nested, list):
|
|
571
|
+
return nested, key
|
|
572
|
+
return [], None
|
|
573
|
+
|
|
574
|
+
def normalize_agent_trace(raw: dict[str,Any], context: TraceNormalizationContext) -> dict[str,Any]:
|
|
575
|
+
report=TraceNormalizationReport(task_id=context.task_id, attempt_id=context.attempt_id, attempt_kind=context.attempt_kind)
|
|
576
|
+
raw_steps, raw_step_source = _extract_raw_steps(raw)
|
|
577
|
+
if not isinstance(raw_steps, list): raw_steps=[]; report.warnings.append('raw steps was not a list')
|
|
578
|
+
metadata=dict(raw.get('metadata_json') or {}) if isinstance(raw.get('metadata_json'), dict) else {}
|
|
579
|
+
if raw_step_source:
|
|
580
|
+
metadata['raw_trace_step_source']=raw_step_source
|
|
581
|
+
unknown_top={k:v for k,v in raw.items() if k not in CANONICAL_TOP and k not in {'attempt','role','schema_name'}}
|
|
582
|
+
if unknown_top: metadata['original_fields']=unknown_top
|
|
583
|
+
if raw.get('role') is not None: metadata['original_role']=raw.get('role')
|
|
584
|
+
if raw.get('schema_name') is not None: metadata['schema_name']=raw.get('schema_name')
|
|
585
|
+
used=set(); steps=[_normalize_step(s, i+1, used, report) for i,s in enumerate(raw_steps)]
|
|
586
|
+
# Ensure final monotonic sequence without dropping; preserve original numbers already in metadata when changed.
|
|
587
|
+
if [s['step'] for s in steps] != list(range(1,len(steps)+1)):
|
|
588
|
+
for i,s in enumerate(steps, 1):
|
|
589
|
+
s['metadata_json'].setdefault('original_step_number', s['step']); s['step']=i
|
|
590
|
+
report.warnings.append('steps renumbered to canonical 1..N order')
|
|
591
|
+
success_like=bool(raw.get('actual_outputs') or any(s.get('success') for s in steps))
|
|
592
|
+
metadata.update({'raw_trace_preserved':True,'discarded_step_count':0,'trace_normalization_status':'normalized' if not report.warnings else 'partial'})
|
|
593
|
+
trace={
|
|
594
|
+
'schema_version': str(raw.get('schema_version') or 'aa-trace-v0.1'),
|
|
595
|
+
'trace_id': str(raw.get('trace_id') or f'{context.trace_id_prefix}_{context.attempt_id}_normalized'),
|
|
596
|
+
'collection_id': _string_or_none(raw.get('collection_id'), metadata, 'collection_id'), 'prior_trace_id': _string_or_none(raw.get('prior_trace_id'), metadata, 'prior_trace_id'), 'trace_mode': _canonical_trace_mode(raw, metadata),
|
|
597
|
+
'task': _string_or_none(raw.get('task'), metadata, 'task') or context.task, 'task_id': _string_or_none(raw.get('task_id'), metadata, 'task_id') or context.task_id, 'task_family_id': _string_or_none(raw.get('task_family_id'), metadata, 'task_family_id'),
|
|
598
|
+
'attempt_id': _string_or_none(raw.get('attempt_id'), metadata, 'attempt_id') or context.attempt_id, 'attempt_kind': _canonical_attempt_kind(raw, context, metadata),
|
|
599
|
+
'agent_tools': _canonical_agent_tools(raw, metadata),
|
|
600
|
+
'started_at': _string_or_none(raw.get('started_at'), metadata, 'started_at'), 'ended_at': _string_or_none(raw.get('ended_at'), metadata, 'ended_at'), 'system_prompt': _string_or_none(raw.get('system_prompt'), metadata, 'system_prompt'), 'system_prompt_hash': _string_or_none(raw.get('system_prompt_hash'), metadata, 'system_prompt_hash'), 'skills': raw.get('skills') if isinstance(raw.get('skills'), list) else None, 'memory': _string_or_none(raw.get('memory'), metadata, 'memory'), 'agent_config': raw.get('agent_config') if isinstance(raw.get('agent_config'), dict) else None, 'learning': _string_or_none(raw.get('learning'), metadata, 'learning'),
|
|
601
|
+
'termination_reason': _canonical_termination_reason(raw, success_like, metadata),
|
|
602
|
+
'steps': steps, 'actual_outputs': _valid_actual_outputs(raw.get('actual_outputs'), context, metadata), 'artifacts': _canonical_artifacts(raw, metadata), 'metadata_json': metadata,
|
|
603
|
+
}
|
|
604
|
+
trace['attempt_status']=infer_attempt_status(trace)
|
|
605
|
+
report.raw_step_count=len(raw_steps); report.normalized_step_count=len(steps); report.discarded_step_count=0
|
|
606
|
+
valid, errors = _validate_normalized_trace(trace)
|
|
607
|
+
report.trace_schema_valid=valid; trace['metadata_json']['schema_valid']=valid
|
|
608
|
+
if not valid:
|
|
609
|
+
report.trace_normalization_partial=True; report.validation_errors.extend(errors)
|
|
610
|
+
report.trace_normalized=True; report.trace_lossless=(report.raw_step_count==report.normalized_step_count and report.discarded_step_count==0)
|
|
611
|
+
return trace
|
|
612
|
+
|
|
613
|
+
def _validation_error_messages(exc: Exception) -> list[str]:
|
|
614
|
+
if hasattr(exc, 'errors'):
|
|
615
|
+
try:
|
|
616
|
+
return [_compact(e, 2000) for e in exc.errors()]
|
|
617
|
+
except Exception:
|
|
618
|
+
pass
|
|
619
|
+
return [str(exc)]
|
|
620
|
+
|
|
621
|
+
def _validate_normalized_trace(trace: dict[str, Any]) -> tuple[bool, list[str]]:
|
|
622
|
+
try:
|
|
623
|
+
AgentTrace.model_validate(trace)
|
|
624
|
+
return True, []
|
|
625
|
+
except Exception as exc:
|
|
626
|
+
return False, _validation_error_messages(exc)
|
|
627
|
+
|
|
628
|
+
def repair_agent_trace_file(raw_trace_path: Path, context: TraceNormalizationContext) -> NormalizationResult:
|
|
629
|
+
report=TraceNormalizationReport(task_id=context.task_id, attempt_id=context.attempt_id, attempt_kind=context.attempt_kind, raw_trace_ref=raw_trace_path.name)
|
|
630
|
+
try:
|
|
631
|
+
raw=json.loads(raw_trace_path.read_text())
|
|
632
|
+
if isinstance(raw, list):
|
|
633
|
+
raw={'steps': raw, 'metadata_json': {'raw_trace_shape': 'list', 'raw_trace_list': raw}}
|
|
634
|
+
elif isinstance(raw, dict):
|
|
635
|
+
raw_steps, source = _extract_raw_steps(raw)
|
|
636
|
+
if source and source != 'steps':
|
|
637
|
+
md=dict(raw.get('metadata_json') or {}) if isinstance(raw.get('metadata_json'), dict) else {}
|
|
638
|
+
md.setdefault('raw_trace_shape', source)
|
|
639
|
+
raw['metadata_json']=md
|
|
640
|
+
raw['steps']=raw_steps
|
|
641
|
+
else:
|
|
642
|
+
raise ValueError('raw trace JSON was not an object or step list')
|
|
643
|
+
except Exception as e:
|
|
644
|
+
report.raw_trace_parse_error=True; report.validation_errors.append(str(e))
|
|
645
|
+
return NormalizationResult(normalized_trace=None, report=report, fallback_required=True, parse_error=str(e))
|
|
646
|
+
trace=normalize_agent_trace(raw, context)
|
|
647
|
+
valid, errors = _validate_normalized_trace(trace)
|
|
648
|
+
raw_steps, _source = _extract_raw_steps(raw)
|
|
649
|
+
raw_step_count = len(raw_steps) if isinstance(raw_steps, list) else 0
|
|
650
|
+
normalized_step_count = len(trace.get('steps') or [])
|
|
651
|
+
comp=TraceNormalizationReport(task_id=context.task_id, attempt_id=context.attempt_id, attempt_kind=context.attempt_kind, raw_trace_ref=raw_trace_path.name, normalized_trace_ref='agent_trace.normalized.json', canonical_trace_ref='agent_trace.json', trace_schema_valid=valid, trace_normalized=True, trace_lossless=True, fallback_trace=False, raw_step_count=raw_step_count, normalized_step_count=normalized_step_count, discarded_step_count=0, trace_normalization_partial=(not valid) or trace.get('metadata_json',{}).get('trace_normalization_status')=='partial')
|
|
652
|
+
# Re-run normalization into a temporary report to retain repair counters.
|
|
653
|
+
counter_report=TraceNormalizationReport(task_id=context.task_id, attempt_id=context.attempt_id, attempt_kind=context.attempt_kind)
|
|
654
|
+
used=set()
|
|
655
|
+
for i, step in enumerate(raw_steps if isinstance(raw_steps, list) else [], 1):
|
|
656
|
+
_normalize_step(step, i, used, counter_report)
|
|
657
|
+
comp.input_dict_to_string_count=counter_report.input_dict_to_string_count
|
|
658
|
+
comp.output_dict_to_string_count=counter_report.output_dict_to_string_count
|
|
659
|
+
comp.causal_type_repair_count=counter_report.causal_type_repair_count
|
|
660
|
+
comp.operation_repair_count=counter_report.operation_repair_count
|
|
661
|
+
comp.action_repair_count=counter_report.action_repair_count
|
|
662
|
+
comp.warnings.extend(counter_report.warnings)
|
|
663
|
+
comp.trace_lossless = comp.raw_step_count == comp.normalized_step_count and comp.discarded_step_count == 0
|
|
664
|
+
if not comp.trace_lossless: comp.validation_errors.append('normalized step count is less than raw step count')
|
|
665
|
+
if not valid:
|
|
666
|
+
comp.validation_errors.extend(errors)
|
|
667
|
+
trace.setdefault('metadata_json', {})['schema_valid']=valid
|
|
668
|
+
trace['metadata_json']['trace_normalization_status']='normalized' if valid and not comp.warnings else 'partial'
|
|
669
|
+
return NormalizationResult(normalized_trace=trace if valid else trace, report=comp, fallback_required=False)
|