codeer-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codeer_cli/__init__.py +54 -0
- codeer_cli/_validate.py +131 -0
- codeer_cli/agents.py +155 -0
- codeer_cli/chats.py +87 -0
- codeer_cli/cli.py +92 -0
- codeer_cli/client.py +277 -0
- codeer_cli/commands/__init__.py +0 -0
- codeer_cli/commands/_util.py +12 -0
- codeer_cli/commands/agent.py +186 -0
- codeer_cli/commands/check.py +66 -0
- codeer_cli/commands/eval_cmd.py +919 -0
- codeer_cli/commands/history.py +200 -0
- codeer_cli/commands/kb.py +126 -0
- codeer_cli/commands/profile.py +205 -0
- codeer_cli/constants.py +66 -0
- codeer_cli/eval_.py +423 -0
- codeer_cli/histories.py +156 -0
- codeer_cli/kb.py +226 -0
- codeer_cli/parse.py +567 -0
- codeer_cli-0.1.0.dist-info/METADATA +108 -0
- codeer_cli-0.1.0.dist-info/RECORD +23 -0
- codeer_cli-0.1.0.dist-info/WHEEL +4 -0
- codeer_cli-0.1.0.dist-info/entry_points.txt +2 -0
codeer_cli/parse.py
ADDED
|
@@ -0,0 +1,567 @@
|
|
|
1
|
+
"""Typed parsers for Codeer API response shapes.
|
|
2
|
+
|
|
3
|
+
Use these instead of digging through dicts in every caller. Each ``parse_*``
|
|
4
|
+
function accepts the raw envelope-unwrapped payload (i.e. what the
|
|
5
|
+
``codeer_cli`` helpers already return) and produces a small frozen dataclass
|
|
6
|
+
plus a few derived rollups that come up over and over again.
|
|
7
|
+
|
|
8
|
+
Parsers are deliberately tolerant: missing/extra fields don't raise, casing
|
|
9
|
+
mismatches (``FILE`` vs ``file``) are normalized to lowercase. They are NOT a
|
|
10
|
+
schema validator.
|
|
11
|
+
|
|
12
|
+
KEY GAP, called out here so you don't waste time:
|
|
13
|
+
|
|
14
|
+
Tool **arguments** (e.g. the regex passed to ``list_kb_files`` or the query
|
|
15
|
+
passed to ``retrieve_context_objs``) and **outputs** are not persisted on the
|
|
16
|
+
Conversation row. They flow over the WebSocket during execution and are
|
|
17
|
+
dropped after the assistant turn is saved. What you CAN recover from a
|
|
18
|
+
history is:
|
|
19
|
+
|
|
20
|
+
- tool name + call id (regex over ``content``: ``<tool id=...>name</tool>``)
|
|
21
|
+
- per-call token usage (``meta.token_usage.tool_calls[]``)
|
|
22
|
+
- retrieved primary sources (``primary_sources[]`` — the end-to-end trace)
|
|
23
|
+
- assistant's final text (``content`` with tool markers stripped)
|
|
24
|
+
|
|
25
|
+
If you need the raw tool args, you must capture them at execution time via
|
|
26
|
+
the chat SSE stream, not from history reads.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
from __future__ import annotations
|
|
30
|
+
|
|
31
|
+
import re
|
|
32
|
+
from collections import Counter
|
|
33
|
+
from dataclasses import dataclass, field
|
|
34
|
+
from datetime import datetime
|
|
35
|
+
from typing import Any, Iterable, Optional
|
|
36
|
+
|
|
37
|
+
# ---------------------------------------------------------------------------
|
|
38
|
+
# Tool calls (stored as `<tool id=call_xxx>tool_name</tool>` inside `content`)
|
|
39
|
+
# ---------------------------------------------------------------------------
|
|
40
|
+
|
|
41
|
+
_TOOL_TAG_RE = re.compile(r"<tool\s+id=([A-Za-z0-9_\-]+)>([A-Za-z_][A-Za-z_0-9]*)</tool>")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass(frozen=True)
|
|
45
|
+
class ToolCall:
|
|
46
|
+
name: str
|
|
47
|
+
call_id: str
|
|
48
|
+
# The token-usage entry for this call (matched positionally to the tool tags
|
|
49
|
+
# in `content`, since the API doesn't link the two by id). May be ``None``
|
|
50
|
+
# if the assistant turn ended before tool execution finished.
|
|
51
|
+
prompt_tokens: Optional[int] = None
|
|
52
|
+
completion_tokens: Optional[int] = None
|
|
53
|
+
total_tokens: Optional[int] = None
|
|
54
|
+
model: Optional[str] = None
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass(frozen=True)
|
|
58
|
+
class EvalToolCall:
|
|
59
|
+
name: str
|
|
60
|
+
call_id: Optional[str] = None
|
|
61
|
+
status: Optional[str] = None
|
|
62
|
+
arguments: Any = None
|
|
63
|
+
output: Any = None
|
|
64
|
+
error: Optional[str] = None
|
|
65
|
+
duration_ms: Optional[float] = None
|
|
66
|
+
started_at: Optional[str] = None
|
|
67
|
+
ended_at: Optional[str] = None
|
|
68
|
+
raw: dict = field(default_factory=dict)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
_EVAL_TOOL_CONTAINER_KEYS = (
|
|
72
|
+
"reasoning_steps",
|
|
73
|
+
"tool_calls",
|
|
74
|
+
"tool_call_trace",
|
|
75
|
+
"tool_calling_trace",
|
|
76
|
+
"tool_trace",
|
|
77
|
+
"tool_traces",
|
|
78
|
+
"trace",
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _first_present(raw: dict, keys: Iterable[str]) -> Any:
|
|
83
|
+
for key in keys:
|
|
84
|
+
if key in raw and raw.get(key) not in (None, ""):
|
|
85
|
+
return raw.get(key)
|
|
86
|
+
return None
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _duration_ms(raw: dict) -> Optional[float]:
|
|
90
|
+
for key in ("duration_ms", "elapsed_ms", "latency_ms", "execution_time_ms", "time_ms"):
|
|
91
|
+
val = raw.get(key)
|
|
92
|
+
if isinstance(val, (int, float)):
|
|
93
|
+
return float(val)
|
|
94
|
+
for key in ("duration_s", "elapsed_s", "latency_s", "execution_time", "execution_time_s"):
|
|
95
|
+
val = raw.get(key)
|
|
96
|
+
if isinstance(val, (int, float)):
|
|
97
|
+
return float(val) * 1000
|
|
98
|
+
started = _first_present(raw, ("start_at", "started_at", "start_time", "created_at"))
|
|
99
|
+
ended = _first_present(raw, ("end_at", "ended_at", "end_time", "completed_at", "finished_at"))
|
|
100
|
+
if isinstance(started, str) and isinstance(ended, str):
|
|
101
|
+
try:
|
|
102
|
+
start_dt = datetime.fromisoformat(started.replace("Z", "+00:00"))
|
|
103
|
+
end_dt = datetime.fromisoformat(ended.replace("Z", "+00:00"))
|
|
104
|
+
except ValueError:
|
|
105
|
+
return None
|
|
106
|
+
return max((end_dt - start_dt).total_seconds() * 1000, 0)
|
|
107
|
+
return None
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _tool_name(raw: dict) -> str:
|
|
111
|
+
name = _first_present(raw, ("name", "tool_name", "function_name", "type"))
|
|
112
|
+
if name:
|
|
113
|
+
return str(name)
|
|
114
|
+
function = raw.get("function")
|
|
115
|
+
if isinstance(function, dict) and function.get("name"):
|
|
116
|
+
return str(function["name"])
|
|
117
|
+
tool = raw.get("tool")
|
|
118
|
+
if isinstance(tool, dict) and tool.get("name"):
|
|
119
|
+
return str(tool["name"])
|
|
120
|
+
return ""
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _normalize_eval_tool_call(raw: dict) -> EvalToolCall:
|
|
124
|
+
function = raw.get("function") if isinstance(raw.get("function"), dict) else {}
|
|
125
|
+
return EvalToolCall(
|
|
126
|
+
name=_tool_name(raw),
|
|
127
|
+
call_id=_first_present(raw, ("id", "call_id", "tool_call_id")),
|
|
128
|
+
status=_first_present(raw, ("status", "state")),
|
|
129
|
+
arguments=_first_present(raw, ("arguments", "args", "input", "parameters")) or function.get("arguments"),
|
|
130
|
+
output=_first_present(raw, ("output", "result", "response")),
|
|
131
|
+
error=_first_present(raw, ("error", "error_message")),
|
|
132
|
+
duration_ms=_duration_ms(raw),
|
|
133
|
+
started_at=_first_present(raw, ("start_at", "started_at", "start_time", "created_at")),
|
|
134
|
+
ended_at=_first_present(raw, ("end_at", "ended_at", "end_time", "completed_at", "finished_at")),
|
|
135
|
+
raw=raw,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _iter_eval_tool_payloads(value: Any) -> Iterable[dict]:
|
|
140
|
+
if isinstance(value, list):
|
|
141
|
+
for item in value:
|
|
142
|
+
if isinstance(item, dict):
|
|
143
|
+
yield item
|
|
144
|
+
return
|
|
145
|
+
if not isinstance(value, dict):
|
|
146
|
+
return
|
|
147
|
+
for key in ("calls", "tool_calls", "steps", "events", "items"):
|
|
148
|
+
nested = value.get(key)
|
|
149
|
+
if isinstance(nested, list):
|
|
150
|
+
for item in nested:
|
|
151
|
+
if isinstance(item, dict):
|
|
152
|
+
yield item
|
|
153
|
+
return
|
|
154
|
+
if _tool_name(value):
|
|
155
|
+
yield value
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def parse_eval_tool_calls(raw: dict) -> list[EvalToolCall]:
|
|
159
|
+
"""Extract tool-call trace details from an eval result row, if present.
|
|
160
|
+
|
|
161
|
+
Current eval rows expose this as ``reasoning_steps`` when callers pass
|
|
162
|
+
``include_reasoning_steps=true``. The parser also accepts earlier trace
|
|
163
|
+
field-name variants while this data was being added.
|
|
164
|
+
"""
|
|
165
|
+
out: list[EvalToolCall] = []
|
|
166
|
+
containers = [raw]
|
|
167
|
+
for key in ("meta", "metadata", "debug", "execution", "run"):
|
|
168
|
+
value = raw.get(key)
|
|
169
|
+
if isinstance(value, dict):
|
|
170
|
+
containers.append(value)
|
|
171
|
+
for container in containers:
|
|
172
|
+
for key in _EVAL_TOOL_CONTAINER_KEYS:
|
|
173
|
+
if key in container:
|
|
174
|
+
for item in _iter_eval_tool_payloads(container[key]):
|
|
175
|
+
call = _normalize_eval_tool_call(item)
|
|
176
|
+
if call.name or call.call_id:
|
|
177
|
+
out.append(call)
|
|
178
|
+
if out:
|
|
179
|
+
return out
|
|
180
|
+
|
|
181
|
+
# Fallback for older rows where only the assistant output/content persisted
|
|
182
|
+
# tool markers. This gives name + call id, but not args/output/timing.
|
|
183
|
+
content = raw.get("output") or raw.get("actual_output") or raw.get("content") or ""
|
|
184
|
+
return [
|
|
185
|
+
EvalToolCall(name=tc.name, call_id=tc.call_id, raw={"source": "output_tool_marker"})
|
|
186
|
+
for tc in parse_tool_calls(content)
|
|
187
|
+
]
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def summarize_eval_tool_calls(calls: Iterable[EvalToolCall]) -> str:
|
|
191
|
+
parts = []
|
|
192
|
+
for call in calls:
|
|
193
|
+
label = call.name or call.call_id or "tool"
|
|
194
|
+
if call.duration_ms is not None:
|
|
195
|
+
label = f"{label} ({call.duration_ms:g} ms)"
|
|
196
|
+
parts.append(label)
|
|
197
|
+
return ", ".join(parts)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def parse_tool_calls(content: str, token_usage: Optional[dict] = None) -> list[ToolCall]:
|
|
201
|
+
"""Pull tool calls out of an assistant turn's ``content``.
|
|
202
|
+
|
|
203
|
+
``token_usage`` is the assistant turn's ``meta.token_usage`` dict (so we
|
|
204
|
+
can attach per-call token counts). It's optional — pass ``None`` to skip.
|
|
205
|
+
"""
|
|
206
|
+
if not content:
|
|
207
|
+
return []
|
|
208
|
+
tags = _TOOL_TAG_RE.findall(content)
|
|
209
|
+
usage_list: list[dict] = []
|
|
210
|
+
if isinstance(token_usage, dict):
|
|
211
|
+
usage_list = list(token_usage.get("tool_calls") or [])
|
|
212
|
+
|
|
213
|
+
out: list[ToolCall] = []
|
|
214
|
+
for i, (call_id, name) in enumerate(tags):
|
|
215
|
+
u = usage_list[i] if i < len(usage_list) else {}
|
|
216
|
+
out.append(ToolCall(
|
|
217
|
+
name=name,
|
|
218
|
+
call_id=call_id,
|
|
219
|
+
prompt_tokens=u.get("prompt_tokens"),
|
|
220
|
+
completion_tokens=u.get("completion_tokens"),
|
|
221
|
+
total_tokens=u.get("total_tokens"),
|
|
222
|
+
model=u.get("model"),
|
|
223
|
+
))
|
|
224
|
+
return out
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def strip_tool_markers(content: str) -> str:
|
|
228
|
+
"""Return the assistant's final-answer text with ``<tool …>`` markers removed."""
|
|
229
|
+
if not content:
|
|
230
|
+
return ""
|
|
231
|
+
return _TOOL_TAG_RE.sub("", content).strip()
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
# ---------------------------------------------------------------------------
|
|
235
|
+
# Conversation turns (one row per system/user/assistant message)
|
|
236
|
+
# ---------------------------------------------------------------------------
|
|
237
|
+
|
|
238
|
+
@dataclass(frozen=True)
|
|
239
|
+
class ConversationTurn:
|
|
240
|
+
id: Optional[int]
|
|
241
|
+
role: str # "system" | "user" | "assistant"
|
|
242
|
+
group_id: Optional[str]
|
|
243
|
+
text: str # content with tool markers stripped (use for display)
|
|
244
|
+
raw_content: str # original content including <tool …> tags
|
|
245
|
+
tool_calls: list[ToolCall]
|
|
246
|
+
primary_source_ids: list[str]
|
|
247
|
+
score: Optional[int]
|
|
248
|
+
feedback_tags: list[str] # e.g. ["sys_helpful"], ["sys_improve"]
|
|
249
|
+
feedback_comments: list[str]
|
|
250
|
+
response_time_ms: Optional[int]
|
|
251
|
+
total_tokens: Optional[int]
|
|
252
|
+
cost_credits: Optional[float]
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def parse_conversation_turn(raw: dict) -> ConversationTurn:
|
|
256
|
+
meta = raw.get("meta") or {}
|
|
257
|
+
token_usage = meta.get("token_usage") or {}
|
|
258
|
+
primary = raw.get("primary_sources") or []
|
|
259
|
+
feedbacks = raw.get("feedbacks") or []
|
|
260
|
+
|
|
261
|
+
return ConversationTurn(
|
|
262
|
+
id=raw.get("id"),
|
|
263
|
+
role=raw.get("role") or "",
|
|
264
|
+
group_id=raw.get("group_id"),
|
|
265
|
+
text=strip_tool_markers(raw.get("content") or ""),
|
|
266
|
+
raw_content=raw.get("content") or "",
|
|
267
|
+
tool_calls=parse_tool_calls(raw.get("content") or "", token_usage),
|
|
268
|
+
primary_source_ids=[ps.get("id") for ps in primary if isinstance(ps, dict) and ps.get("id")],
|
|
269
|
+
score=raw.get("score") if raw.get("score") not in (0, None) else None,
|
|
270
|
+
feedback_tags=[fb.get("tag") for fb in feedbacks if fb.get("tag")],
|
|
271
|
+
feedback_comments=[fb.get("content") for fb in feedbacks if fb.get("content")],
|
|
272
|
+
response_time_ms=meta.get("response_time_ms"),
|
|
273
|
+
total_tokens=token_usage.get("total_tokens"),
|
|
274
|
+
cost_credits=meta.get("cost_credits"),
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def parse_conversations(raw_list: Iterable[dict]) -> list[ConversationTurn]:
|
|
279
|
+
return [parse_conversation_turn(c) for c in raw_list]
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
# ---------------------------------------------------------------------------
|
|
283
|
+
# History rollup (one history = many conversation turns)
|
|
284
|
+
# ---------------------------------------------------------------------------
|
|
285
|
+
|
|
286
|
+
@dataclass
|
|
287
|
+
class HistorySummary:
|
|
288
|
+
id: int
|
|
289
|
+
title: str
|
|
290
|
+
agent_id: Optional[str]
|
|
291
|
+
agent_name: Optional[str]
|
|
292
|
+
external_user_id: Optional[str]
|
|
293
|
+
share_type: Optional[str]
|
|
294
|
+
created_at: Optional[str]
|
|
295
|
+
updated_at: Optional[str]
|
|
296
|
+
|
|
297
|
+
n_turns: int
|
|
298
|
+
n_user: int
|
|
299
|
+
n_assistant: int
|
|
300
|
+
n_system: int
|
|
301
|
+
tool_counts: Counter # tool_name -> call count
|
|
302
|
+
primary_sources_seen: int # distinct primary_source ids referenced
|
|
303
|
+
total_tokens: int # summed across turns
|
|
304
|
+
total_credits: float # summed across turns
|
|
305
|
+
feedback_tags: Counter # e.g. {"sys_helpful": 0, "sys_improve": 1}
|
|
306
|
+
avg_response_time_ms: Optional[float]
|
|
307
|
+
first_user_message: str
|
|
308
|
+
last_assistant_message: str
|
|
309
|
+
turns: list[ConversationTurn] = field(default_factory=list)
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def summarize_history(history_raw: dict, conversations_raw: list[dict]) -> HistorySummary:
|
|
313
|
+
turns = parse_conversations(conversations_raw)
|
|
314
|
+
tool_counter: Counter = Counter()
|
|
315
|
+
primary_ids: set[str] = set()
|
|
316
|
+
feedback_counter: Counter = Counter()
|
|
317
|
+
rt_samples: list[int] = []
|
|
318
|
+
total_tokens = 0
|
|
319
|
+
total_credits = 0.0
|
|
320
|
+
|
|
321
|
+
for t in turns:
|
|
322
|
+
for tc in t.tool_calls:
|
|
323
|
+
tool_counter[tc.name] += 1
|
|
324
|
+
for pid in t.primary_source_ids:
|
|
325
|
+
primary_ids.add(pid)
|
|
326
|
+
for tag in t.feedback_tags:
|
|
327
|
+
feedback_counter[tag] += 1
|
|
328
|
+
if t.response_time_ms is not None:
|
|
329
|
+
rt_samples.append(t.response_time_ms)
|
|
330
|
+
if t.total_tokens:
|
|
331
|
+
total_tokens += t.total_tokens
|
|
332
|
+
if t.cost_credits:
|
|
333
|
+
total_credits += t.cost_credits
|
|
334
|
+
|
|
335
|
+
user_msgs = [t.text for t in turns if t.role == "user"]
|
|
336
|
+
asst_msgs = [t.text for t in turns if t.role == "assistant"]
|
|
337
|
+
|
|
338
|
+
return HistorySummary(
|
|
339
|
+
id=history_raw["id"],
|
|
340
|
+
title=history_raw.get("name") or history_raw.get("title") or "",
|
|
341
|
+
agent_id=history_raw.get("agent_id"),
|
|
342
|
+
agent_name=history_raw.get("agent_name"),
|
|
343
|
+
external_user_id=history_raw.get("external_user_id"),
|
|
344
|
+
share_type=history_raw.get("share_type"),
|
|
345
|
+
created_at=history_raw.get("created_at"),
|
|
346
|
+
updated_at=history_raw.get("updated_at"),
|
|
347
|
+
n_turns=len(turns),
|
|
348
|
+
n_user=sum(1 for t in turns if t.role == "user"),
|
|
349
|
+
n_assistant=sum(1 for t in turns if t.role == "assistant"),
|
|
350
|
+
n_system=sum(1 for t in turns if t.role == "system"),
|
|
351
|
+
tool_counts=tool_counter,
|
|
352
|
+
primary_sources_seen=len(primary_ids),
|
|
353
|
+
total_tokens=total_tokens,
|
|
354
|
+
total_credits=total_credits,
|
|
355
|
+
feedback_tags=feedback_counter,
|
|
356
|
+
avg_response_time_ms=(sum(rt_samples) / len(rt_samples)) if rt_samples else None,
|
|
357
|
+
first_user_message=(user_msgs[0] if user_msgs else "")[:200],
|
|
358
|
+
last_assistant_message=(asst_msgs[-1] if asst_msgs else "")[:200],
|
|
359
|
+
turns=turns,
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
# ---------------------------------------------------------------------------
|
|
364
|
+
# Agent
|
|
365
|
+
# ---------------------------------------------------------------------------
|
|
366
|
+
|
|
367
|
+
@dataclass
|
|
368
|
+
class AgentSummary:
|
|
369
|
+
id: str
|
|
370
|
+
name: str
|
|
371
|
+
description: Optional[str]
|
|
372
|
+
workspace_id: Optional[str]
|
|
373
|
+
publish_state: Optional[str]
|
|
374
|
+
version: Optional[int]
|
|
375
|
+
latest_version_number: Optional[int]
|
|
376
|
+
published_version_number: Optional[int]
|
|
377
|
+
publish_history_id: Optional[str]
|
|
378
|
+
llm_model: Optional[str]
|
|
379
|
+
use_search: bool
|
|
380
|
+
suggested_questions: list[str]
|
|
381
|
+
|
|
382
|
+
# Derived from unified_tools
|
|
383
|
+
tools_by_type: dict[str, list[dict]] # "memory" -> [tool_dict, ...]
|
|
384
|
+
kb_attached_node_ids: list[str] # union of all KB tools' knowledge_node_ids
|
|
385
|
+
form_field_names: list[str] # field names from the first request_form tool
|
|
386
|
+
call_agent_targets: list[str] # agent_ids referenced by call_agent tools
|
|
387
|
+
http_request_endpoints: list[str] # method+url_template strings
|
|
388
|
+
|
|
389
|
+
raw: dict = field(default_factory=dict, repr=False)
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
def parse_agent(raw: dict) -> AgentSummary:
|
|
393
|
+
tools = raw.get("unified_tools") or []
|
|
394
|
+
by_type: dict[str, list[dict]] = {}
|
|
395
|
+
for t in tools:
|
|
396
|
+
by_type.setdefault(t.get("type") or "?", []).append(t)
|
|
397
|
+
|
|
398
|
+
kb_nodes = []
|
|
399
|
+
for t in by_type.get("knowledge_base", []):
|
|
400
|
+
kb_nodes.extend(t.get("knowledge_node_ids") or [])
|
|
401
|
+
|
|
402
|
+
form_fields = []
|
|
403
|
+
for t in by_type.get("request_form", []):
|
|
404
|
+
schema = t.get("custom_form_schema") or {}
|
|
405
|
+
form_fields = [f.get("name") for f in schema.get("fields", []) if f.get("name")]
|
|
406
|
+
break
|
|
407
|
+
|
|
408
|
+
call_targets = [t.get("agent_id") for t in by_type.get("call_agent", []) if t.get("agent_id")]
|
|
409
|
+
|
|
410
|
+
http_eps = []
|
|
411
|
+
for t in by_type.get("http_request", []):
|
|
412
|
+
cfg = t.get("http_request") or {}
|
|
413
|
+
http_eps.append(f"{cfg.get('method','?')} {cfg.get('url_template','?')}")
|
|
414
|
+
|
|
415
|
+
return AgentSummary(
|
|
416
|
+
id=str(raw.get("id")),
|
|
417
|
+
name=raw.get("name") or "",
|
|
418
|
+
description=raw.get("description"),
|
|
419
|
+
workspace_id=raw.get("workspace_id"),
|
|
420
|
+
publish_state=raw.get("publish_state"),
|
|
421
|
+
version=raw.get("version"),
|
|
422
|
+
latest_version_number=raw.get("latest_version_number"),
|
|
423
|
+
published_version_number=raw.get("published_version_number"),
|
|
424
|
+
publish_history_id=raw.get("publish_history_id"),
|
|
425
|
+
llm_model=raw.get("llm_model"),
|
|
426
|
+
use_search=bool(raw.get("use_search")),
|
|
427
|
+
suggested_questions=list(raw.get("suggested_questions") or []),
|
|
428
|
+
tools_by_type=by_type,
|
|
429
|
+
kb_attached_node_ids=kb_nodes,
|
|
430
|
+
form_field_names=form_fields,
|
|
431
|
+
call_agent_targets=call_targets,
|
|
432
|
+
http_request_endpoints=http_eps,
|
|
433
|
+
raw=raw,
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
# ---------------------------------------------------------------------------
|
|
438
|
+
# Eval result
|
|
439
|
+
# ---------------------------------------------------------------------------
|
|
440
|
+
|
|
441
|
+
@dataclass(frozen=True)
|
|
442
|
+
class EvalResultSummary:
|
|
443
|
+
id: str
|
|
444
|
+
case_id: str
|
|
445
|
+
evaluator_id: str
|
|
446
|
+
agent_history_id: Optional[str]
|
|
447
|
+
status: str # PENDING/RUNNING/READY/FAILED/etc.
|
|
448
|
+
score: Optional[float] # 0.0–1.0
|
|
449
|
+
reason: Optional[str] # judge's explanation
|
|
450
|
+
output: Optional[str] # the agent response that was scored
|
|
451
|
+
execution_time_s: Optional[float]
|
|
452
|
+
cost_credits: Optional[int]
|
|
453
|
+
tool_calls: list[EvalToolCall] = field(default_factory=list)
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
def parse_eval_result(raw: dict) -> EvalResultSummary:
|
|
457
|
+
return EvalResultSummary(
|
|
458
|
+
id=str(raw.get("id")),
|
|
459
|
+
case_id=str(raw.get("evaluation_case_id") or raw.get("case_id") or ""),
|
|
460
|
+
evaluator_id=str(raw.get("evaluator_id") or ""),
|
|
461
|
+
agent_history_id=raw.get("agent_history_id"),
|
|
462
|
+
status=str(raw.get("status") or ""),
|
|
463
|
+
score=raw.get("score"),
|
|
464
|
+
reason=raw.get("reason"),
|
|
465
|
+
output=raw.get("output") or raw.get("actual_output"),
|
|
466
|
+
execution_time_s=raw.get("execution_time") or raw.get("execution_time_s"),
|
|
467
|
+
cost_credits=raw.get("cost_credits"),
|
|
468
|
+
tool_calls=parse_eval_tool_calls(raw),
|
|
469
|
+
)
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
# ---------------------------------------------------------------------------
|
|
473
|
+
# Rubric extraction from eval result `reason` text
|
|
474
|
+
# ---------------------------------------------------------------------------
|
|
475
|
+
#
|
|
476
|
+
# The Codeer API has NO GET endpoint for rubrics — they live only as part of
|
|
477
|
+
# completed eval result rows, quoted at the head of each judge's `reason`.
|
|
478
|
+
# This parser reverses that: given a `reason` string, returns the list of
|
|
479
|
+
# rubric rules the judge was checking against.
|
|
480
|
+
#
|
|
481
|
+
# Patterns covered (verified against real production results in 2026-04):
|
|
482
|
+
# 1. Rubric Rule: "<rule>" ✓ (compliant): ...
|
|
483
|
+
# 2. <rule>。✓(符合) - ...
|
|
484
|
+
# 3. 規則:「<rule>」✓(遵守)...
|
|
485
|
+
# 4. <label>:✓(合規) ← falls back to label-only
|
|
486
|
+
# 5. <rule> → ✓ (compliant) ...
|
|
487
|
+
# 6. Content rule: <rule> ✗ (違規): ...
|
|
488
|
+
#
|
|
489
|
+
# Limitation: when the rubric was just a label and the full text only appears
|
|
490
|
+
# in the analysis after the verdict, you get the label only. This is the price
|
|
491
|
+
# of the API not exposing rubrics directly.
|
|
492
|
+
|
|
493
|
+
_NUMBERED_ITEM_RE = re.compile(
|
|
494
|
+
r"(?:^|\n)\s*(\d+)\.\s+(.+?)(?=(?:\n\s*\d+\.\s+)|\Z)",
|
|
495
|
+
re.DOTALL,
|
|
496
|
+
)
|
|
497
|
+
_VERDICT_MARKER_RE = re.compile(r"(?:→\s*)?[✓✗]")
|
|
498
|
+
_RULE_LABEL_RE = re.compile(
|
|
499
|
+
r"^(?:Rubric\s+Rule|Rubric|Content[\s_-]+(?:Boundary[\s_-]+)?[Rr]ule|Rule|規則|內容規則|Content\s+rule|Content\s+Rule)\s*[::]\s*",
|
|
500
|
+
re.IGNORECASE,
|
|
501
|
+
)
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
def parse_rubrics_from_reason(reason: str) -> list[str]:
|
|
505
|
+
"""Extract per-rule rubric texts from an eval judge's ``reason`` field.
|
|
506
|
+
|
|
507
|
+
Returns the cleaned rule texts in order. Returns ``[]`` when no rules can
|
|
508
|
+
be parsed (including the ``"No content boundary rules were provided"``
|
|
509
|
+
case, which means the rubric was empty when the run was triggered).
|
|
510
|
+
|
|
511
|
+
KNOWN LIMITATION: when the original rubric was just a short label
|
|
512
|
+
(e.g. ``"安全性與衛教界線"``) and the verbose rule statement only appears
|
|
513
|
+
in the analysis text *after* the verdict marker, the parser returns only
|
|
514
|
+
the label. The Codeer API doesn't persist rubrics as their own field, so
|
|
515
|
+
the verbose text is genuinely unrecoverable from result rows.
|
|
516
|
+
"""
|
|
517
|
+
if not reason:
|
|
518
|
+
return []
|
|
519
|
+
out: list[str] = []
|
|
520
|
+
for _num, body in _NUMBERED_ITEM_RE.findall(reason):
|
|
521
|
+
head = _VERDICT_MARKER_RE.split(body, maxsplit=1)[0]
|
|
522
|
+
head = head.rstrip()
|
|
523
|
+
head = _RULE_LABEL_RE.sub("", head)
|
|
524
|
+
# Strip surrounding straight + CJK quotes
|
|
525
|
+
head = head.strip().strip('"\'').strip("「」『』").strip()
|
|
526
|
+
# Trim trailing punctuation that hugs the verdict (sentence-end or
|
|
527
|
+
# the colon-form "<rule>:✓" which leaves a dangling colon)
|
|
528
|
+
head = head.rstrip("。.::").strip()
|
|
529
|
+
if not head:
|
|
530
|
+
continue
|
|
531
|
+
if "No content boundary rules" in head or "no rules to violate" in head.lower():
|
|
532
|
+
continue
|
|
533
|
+
out.append(head)
|
|
534
|
+
return out
|
|
535
|
+
|
|
536
|
+
|
|
537
|
+
# ---------------------------------------------------------------------------
|
|
538
|
+
# KB node — normalize the uppercase enum quirk
|
|
539
|
+
# ---------------------------------------------------------------------------
|
|
540
|
+
|
|
541
|
+
@dataclass(frozen=True)
|
|
542
|
+
class KBNode:
|
|
543
|
+
id: str
|
|
544
|
+
name: str
|
|
545
|
+
node_type: str # 'folder' or 'file' (lowercased)
|
|
546
|
+
parent_id: Optional[str]
|
|
547
|
+
knowledge_base_id: Optional[str]
|
|
548
|
+
status: Optional[str] # PENDING/INDEXING/READY/FAILED/None for folders
|
|
549
|
+
size_bytes: Optional[int]
|
|
550
|
+
content_type: Optional[str]
|
|
551
|
+
|
|
552
|
+
|
|
553
|
+
def parse_kb_node(raw: dict) -> KBNode:
|
|
554
|
+
return KBNode(
|
|
555
|
+
id=str(raw.get("id")),
|
|
556
|
+
name=raw.get("name") or "",
|
|
557
|
+
node_type=(raw.get("node_type") or "").lower(),
|
|
558
|
+
parent_id=raw.get("parent_id"),
|
|
559
|
+
knowledge_base_id=raw.get("knowledge_base_id"),
|
|
560
|
+
status=raw.get("status"),
|
|
561
|
+
size_bytes=raw.get("size_bytes"),
|
|
562
|
+
content_type=raw.get("content_type"),
|
|
563
|
+
)
|
|
564
|
+
|
|
565
|
+
|
|
566
|
+
def parse_kb_nodes(raw_list: Iterable[dict]) -> list[KBNode]:
|
|
567
|
+
return [parse_kb_node(n) for n in raw_list]
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: codeer-cli
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Command line tools for managing Codeer agents over the Codeer API.
|
|
5
|
+
Project-URL: Homepage, https://www.codeer.ai
|
|
6
|
+
Author: Codeer.AI
|
|
7
|
+
License: Proprietary
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Environment :: Console
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Topic :: Software Development
|
|
15
|
+
Requires-Python: >=3.11
|
|
16
|
+
Requires-Dist: httpx>=0.27
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
|
|
19
|
+
# codeer-cli
|
|
20
|
+
|
|
21
|
+
Standalone CLI for managing Codeer agents over the Codeer API.
|
|
22
|
+
|
|
23
|
+
## User install
|
|
24
|
+
|
|
25
|
+
After the package is published to PyPI, install the CLI as an isolated command
|
|
26
|
+
line tool:
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
uv tool install codeer-cli
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
Until the package is published, install directly from this repository:
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
uv tool install 'git+https://github.com/<org>/codeer-skills.git#subdirectory=codeer-cli'
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Replace `<org>` with the GitHub organization or user that hosts this repository.
|
|
39
|
+
|
|
40
|
+
Verify that the command is available:
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
codeer --help
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Credentials
|
|
47
|
+
|
|
48
|
+
The CLI expects credentials to be configured outside any skill workspace. Add a
|
|
49
|
+
named profile, select it, then verify the setup:
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
codeer profile add work
|
|
53
|
+
codeer profile use work
|
|
54
|
+
codeer check
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
`codeer profile add` prompts for the API key without echoing it. The local
|
|
58
|
+
project stores only the selected profile name in `.codeer/profile`; API keys
|
|
59
|
+
remain in the user-level config file.
|
|
60
|
+
|
|
61
|
+
For a one-off shell session, you can also export an API key directly:
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
export CODEER_API_KEY=<admin-workspace-api-key>
|
|
65
|
+
codeer check
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
`CODEER_API_BASE` defaults to `https://api.codeer.ai`. Override it only for
|
|
69
|
+
local, beta, or preview environments:
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
export CODEER_API_BASE=http://localhost:8000
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
The CLI intentionally does not read repo-root credential files or caller CWD
|
|
76
|
+
`.env`, because those files are often visible to LLM workspace context. Do not
|
|
77
|
+
paste the API key into agent chat or commit it to the repository.
|
|
78
|
+
|
|
79
|
+
Workspace and organization scope are inferred from the workspace API-key
|
|
80
|
+
virtual user's profile. `--workspace`, `--org`, `CODEER_WORKSPACE_ID`, and
|
|
81
|
+
`CODEER_ORGANIZATION_ID` are not used by the CLI.
|
|
82
|
+
|
|
83
|
+
Agent scope is optional and can be set as a non-secret environment variable:
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
CODEER_AGENT_ID=<agent-id>
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Development install
|
|
90
|
+
|
|
91
|
+
Use an editable install while the CLI is changing quickly:
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
cd /path/to/codeer-skills/codeer-cli
|
|
95
|
+
uv tool install --editable .
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
Reinstall only when dependencies, entry points, or package metadata change:
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
uv tool install --reinstall --editable /path/to/codeer-skills/codeer-cli
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
Validate setup before API work:
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
codeer check
|
|
108
|
+
```
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
codeer_cli/__init__.py,sha256=-0gL8upoSsLAnXAfcRrwqZYJbwG0knzQoFf94O7Nc7c,1817
|
|
2
|
+
codeer_cli/_validate.py,sha256=pKUJa2TyTpERx5xmiYNZRn7tFqDxLZ2fF1rHAf1oz14,5415
|
|
3
|
+
codeer_cli/agents.py,sha256=a1w1WKrxY9jWJNSNT9PONRh9ZA7vEjrYlzdYkALhIC0,5361
|
|
4
|
+
codeer_cli/chats.py,sha256=YVrZJhoa-d67o6tzX6riGXsbA-ehyhOxrZ8zRCcJNro,2675
|
|
5
|
+
codeer_cli/cli.py,sha256=0WECgT5eDWu5iCXoqlxxjI-LJHS37e_f7ydU-EsoWTA,2424
|
|
6
|
+
codeer_cli/client.py,sha256=-FddTZaNPAsVDV4wdgHvdHUIiPyUwgiLCul0-IyJ_-c,9860
|
|
7
|
+
codeer_cli/constants.py,sha256=YthRM77jTh9npRDES0h02AeOKhx6fwAqPQwUjXJxhMY,2304
|
|
8
|
+
codeer_cli/eval_.py,sha256=EsH8f8nT8x9MFlUhpHWSAU4aNPkNceD-Tw5GM15Ae1I,14157
|
|
9
|
+
codeer_cli/histories.py,sha256=tk28git_peX4x703CIDU8u72JtlGaytyrtlHfxlK-7A,5979
|
|
10
|
+
codeer_cli/kb.py,sha256=MWZMWHq2rLoVf-4oBlM7RUN3UcdXrWF0IoXp5Uh2uyM,7196
|
|
11
|
+
codeer_cli/parse.py,sha256=qrjZn0MUTjGfucp4cwxy8Pt7WS-0x15kK5F7kWTY8Ps,21818
|
|
12
|
+
codeer_cli/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
|
+
codeer_cli/commands/_util.py,sha256=b9LVtv8pevMI2OBliro633OOTDnjBpo-_CutvFuI6qI,264
|
|
14
|
+
codeer_cli/commands/agent.py,sha256=oLseuSzxTX0RVvIdZYbNF03fjCJHUoSEp8DOqR-y05E,7091
|
|
15
|
+
codeer_cli/commands/check.py,sha256=xg13sY6qUfOVpXJaRmWHv47zSltu8d0wJZcPoaYBr08,2057
|
|
16
|
+
codeer_cli/commands/eval_cmd.py,sha256=FBzij-FsVCc9yRTzVsfIjgR15uaTba7lf6DdphqJNWw,38275
|
|
17
|
+
codeer_cli/commands/history.py,sha256=VoZnCW2w0EovTtDobMzw_lurqS0HvB2_VGR9S95g-GU,7291
|
|
18
|
+
codeer_cli/commands/kb.py,sha256=K5kjfr9NGILgc8DbX7IKDvxl-34rlieVuM3-Nn9s1q4,4850
|
|
19
|
+
codeer_cli/commands/profile.py,sha256=aOc9S4HvyFyQ3vH4yK_BcF4yd7VZd2kVnKyOgtqLP_I,6509
|
|
20
|
+
codeer_cli-0.1.0.dist-info/METADATA,sha256=IP7AMRFCi4NgOV6YFAPzHBhmvE5df4cTaqPnOYzT8n0,2822
|
|
21
|
+
codeer_cli-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
22
|
+
codeer_cli-0.1.0.dist-info/entry_points.txt,sha256=-nXIrlm5SR5r7gg3y8AS0tN66MwmvNHsrlwLNQNGD50,47
|
|
23
|
+
codeer_cli-0.1.0.dist-info/RECORD,,
|