avp-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
avp/trajectory.py ADDED
@@ -0,0 +1,530 @@
1
+ """avp.trajectory — Pydantic types for the AVP Trajectory Spec.
2
+
3
+ Defines the agent-emitted event stream: CloudEvents envelopes, typed
4
+ `data` payloads (one per event type), the `Event` discriminated union,
5
+ and the `parse_event` / `event_to_wire` helpers. This module mirrors
6
+ the [Trajectory spec](../../../../spec/v0.1/trajectory.md).
7
+
8
+ Consumers wanting only the event stream can:
9
+
10
+ from avp.trajectory import (
11
+ AgentStartedEvent,
12
+ AssistantMessageEvent,
13
+ parse_event,
14
+ )
15
+
16
+ …without dragging in Commission / Descriptor types they don't use.
17
+
18
+ The wire format is built on:
19
+
20
+ - **CloudEvents 1.0** for the event envelope (`specversion`, `id`,
21
+ `source`, `type`, `subject`, `time`, `datacontenttype`, `data`).
22
+ - **OpenTelemetry span identification** (`trace_id`, `span_id`,
23
+ `parent_span_id`) so downstream consumers reconstruct the run's span
24
+ tree.
25
+
26
+ All AVP-defined `data` attributes (token / cost / model / tool /
27
+ subagent / refusal / step / content) live under the single `avp.*` namespace.
28
+ See `spec/v0.1/trajectory.md` for the normative attribute reference.
29
+ """
30
+
31
+ from __future__ import annotations
32
+
33
+ from enum import StrEnum
34
+ from typing import Annotated, Any, Literal
35
+
36
+ from pydantic import BaseModel, Field, TypeAdapter, ValidationError
37
+
38
+ from avp.commission import Commission
39
+ from avp.content import AVPContentBlock, ToolResultBlock
40
+ from avp.descriptor import (
41
+ AgentDescriptor,
42
+ McpServerDecl,
43
+ SkillDecl,
44
+ SubagentDecl,
45
+ ToolDecl,
46
+ )
47
+ from avp.envelope import (
48
+ _OPEN,
49
+ SOURCE_AGENT,
50
+ ZERO_SPAN_ID,
51
+ _CloudEventBase,
52
+ _SpanData,
53
+ new_event_id,
54
+ new_span_id,
55
+ new_trace_id,
56
+ now_iso,
57
+ )
58
+
59
+ # Reverse-DNS event types per CloudEvents convention. All AVP-defined types
60
+ # live under the `avp.` namespace.
61
+ T_RUN_REQUESTED = "avp.run_requested"
62
+ T_AGENT_DESCRIBED = "avp.agent_described"
63
+ T_AGENT_STARTED = "avp.agent_started"
64
+ T_AGENT_STOPPED = "avp.agent_stopped"
65
+ T_ASSISTANT_MESSAGE = "avp.assistant_message"
66
+ T_TOOL_INVOKED = "avp.tool_invoked"
67
+ T_TOOL_RETURNED = "avp.tool_returned"
68
+ T_ERROR_OCCURRED = "avp.error_occurred"
69
+ T_SUBAGENT_INVOKED = "avp.subagent_invoked"
70
+ T_SUBAGENT_RETURNED = "avp.subagent_returned"
71
+
72
+
73
+ class StopReason(StrEnum):
74
+ """Why a run terminated. v0.1 keeps the enum tight: model said done,
75
+ model declined, agent crashed, or operator interrupted. Cap-driven
76
+ stop reasons (turn / token / cost / duration limits) are not part of
77
+ v0.1; agents that need bounded execution wire it externally
78
+ (subprocess timeouts, supervisor SIGKILL)."""
79
+
80
+ converged = "converged"
81
+ error = "error"
82
+ interrupted = "interrupted"
83
+ refused = "refused"
84
+
85
+
86
+ class ErrorCode(StrEnum):
87
+ rate_limit = "rate_limit"
88
+ context_limit = "context_limit"
89
+ auth_error = "auth_error"
90
+ agent_crash = "agent_crash"
91
+ unsupported_model = "unsupported_model"
92
+ unsupported_provider = "unsupported_provider"
93
+ commission_collision = "commission_collision"
94
+ mcp_connect_failed = "mcp_connect_failed"
95
+ unknown = "unknown"
96
+
97
+
98
+ class Usage(BaseModel):
99
+ """Per-turn token accounting carried on `assistant_message.avp.usage`.
100
+
101
+ `input_tokens` is the total input tokens for the turn, INCLUDING
102
+ cache-read tokens. `cache_read_input_tokens` and
103
+ `cache_creation_input_tokens` are informational breakdowns already
104
+ accounted for inside `input_tokens`; consumers MUST NOT double-count
105
+ them when summing. `reasoning_output_tokens` is the subset of
106
+ `output_tokens` the provider attributes to internal reasoning (o-series
107
+ reasoning tokens, Anthropic extended-thinking output).
108
+
109
+ `extra="allow"` so provider-specific token categories the spec
110
+ doesn't enumerate (vision tokens, audio output tokens, ...) round-trip
111
+ through `avp.usage` verbatim without requiring spec churn.
112
+ """
113
+
114
+ model_config = _OPEN
115
+ input_tokens: int = Field(ge=0)
116
+ output_tokens: int = Field(ge=0)
117
+ cache_read_input_tokens: int | None = Field(default=None, ge=0)
118
+ cache_creation_input_tokens: int | None = Field(default=None, ge=0)
119
+ reasoning_output_tokens: int | None = Field(default=None, ge=0)
120
+
121
+
122
+ class SubagentUsage(BaseModel):
123
+ """Narrow totals carrier for the in-process subagent rollup.
124
+
125
+ Used ONLY on `subagent_returned.data["avp.subagent.usage"]` when the
126
+ parent agent's SDK does not expose the child's per-turn events (e.g.
127
+ Claude Agent SDK's Task tool). `extra="allow"` so SDK-specific fields
128
+ (total_tokens, tool_uses, duration_ms) round-trip verbatim.
129
+ """
130
+
131
+ model_config = _OPEN
132
+ cost_usd: float = Field(ge=0)
133
+ tokens_input: int = Field(ge=0)
134
+ tokens_output: int = Field(ge=0)
135
+ turns: int = Field(ge=0)
136
+
137
+
138
+ # ── Data payloads (per-event-type) ────────────────────────────────────────────
139
+ #
140
+ # Every AVP event's `data` field carries an OTel span triple plus the
141
+ # event-type-specific attributes. Field names with dots (the OTel/MCP/JSON-RPC
142
+ # wire form) are declared via Pydantic aliases; Python attribute names use
143
+ # underscores. `model_dump(by_alias=True)` produces the wire form on emit.
144
+
145
+
146
+ class RunRequestedData(_SpanData):
147
+ """Payload of avp.run_requested events.
148
+
149
+ Anchors the trajectory. When relaying a Commission, carries the full
150
+ snapshot under `avp.commission` plus `avp.supervisor.*` for attribution,
151
+ making the trajectory self-contained for audit. Without a Commission
152
+ (library-invocation path), those fields are absent — per spec §2.1,
153
+ absence (not `"unknown"`) is the canonical signal.
154
+ """
155
+
156
+ supervisor_name: str | None = Field(default=None, min_length=1, alias="avp.supervisor.name")
157
+ supervisor_version: str | None = Field(default=None, alias="avp.supervisor.version")
158
+ commission: Commission | None = Field(default=None, alias="avp.commission")
159
+
160
+
161
+ class AgentDescribedData(_SpanData):
162
+ """Payload of avp.agent_described events.
163
+
164
+ The agent's published Descriptor, emitted between `run_requested`
165
+ and `agent_started`. `avp.descriptor` SHOULD be consistent with what
166
+ `<agent> describe` prints to stdout for the same agent build;
167
+ pre-flight describe MAY omit MCP-surfaced tool entries (those whose
168
+ `avp.mcp_server_id` is set) and per-server `mcp_servers[].status`,
169
+ both of which require a startup dial.
170
+ """
171
+
172
+ descriptor: AgentDescriptor = Field(alias="avp.descriptor")
173
+
174
+
175
+ class AgentStartedData(_SpanData):
176
+ """Payload of avp.agent_started events."""
177
+
178
+ provider_name: str | None = Field(default=None, alias="avp.provider.name")
179
+ operation_name: Literal["invoke_agent", "chat"] | None = Field(
180
+ default=None, alias="avp.operation.name"
181
+ )
182
+ request_model: str | None = Field(default=None, alias="avp.request.model")
183
+ prompt: str | None = Field(default=None, alias="avp.prompt")
184
+ system_prompt: str | None = Field(default=None, alias="avp.system_prompt")
185
+ tools: list[ToolDecl] | None = Field(default=None, alias="avp.tools")
186
+ mcp_servers: list[McpServerDecl] | None = Field(default=None, alias="avp.mcp_servers")
187
+ skills: list[SkillDecl] | None = Field(default=None, alias="avp.skills")
188
+ subagents: list[SubagentDecl] | None = Field(default=None, alias="avp.subagents")
189
+ thread_id: str | None = Field(default=None, alias="avp.thread_id")
190
+ session_id: str | None = Field(default=None, alias="avp.session_id")
191
+ tags: list[str] | None = Field(default=None, alias="avp.tags")
192
+
193
+
194
+ class AgentStoppedData(_SpanData):
195
+ """Payload of avp.agent_stopped events. Terminator of the trajectory.
196
+
197
+ Carries `avp.reason` (why the run ended) and an optional `avp.output`
198
+ payload. The agent does NOT publish cumulative totals on this event.
199
+ Per-turn deltas live on each `assistant_message`; consumers reduce
200
+ the stream to compute totals.
201
+ """
202
+
203
+ reason: StopReason = Field(alias="avp.reason")
204
+ output: Any | None = Field(default=None, alias="avp.output")
205
+
206
+
207
+ class AssistantMessageData(_SpanData):
208
+ """Payload of avp.assistant_message events.
209
+
210
+ Carries the full content the model produced this turn under
211
+ `avp.content` (a `list[AVPContentBlock]`) plus per-turn token / cost
212
+ deltas. Reconstructing a provider message array is a direct read of
213
+ `avp.content` per turn, paired with the `avp.tool_result` blocks from
214
+ intervening `tool_returned` events to form the user-role tool-result
215
+ messages.
216
+
217
+ Refusal metadata: when the provider declined the turn, the refusal
218
+ text appears as a `RefusalBlock` (or `TextBlock` for providers that
219
+ don't typify it) inside `avp.content`, the upstream finish-reason
220
+ string surfaces on `avp.response.finish_reasons`, and the
221
+ provider's safety category (when given, free-form because every
222
+ provider names them differently) surfaces on `avp.refusal.category`.
223
+ """
224
+
225
+ step: int = Field(ge=0, alias="avp.step")
226
+ duration_ms: int = Field(ge=0, alias="avp.duration_ms")
227
+ content: list[AVPContentBlock] = Field(alias="avp.content")
228
+ provider_name: str | None = Field(default=None, alias="avp.provider.name")
229
+ request_model: str | None = Field(default=None, alias="avp.request.model")
230
+ response_model: str | None = Field(default=None, alias="avp.response.model")
231
+ response_finish_reasons: list[str] | None = Field(
232
+ default=None, alias="avp.response.finish_reasons"
233
+ )
234
+ response_time_to_first_chunk: float | None = Field(
235
+ default=None, ge=0, alias="avp.response.time_to_first_chunk"
236
+ )
237
+ usage: Usage = Field(alias="avp.usage")
238
+ cost_usd: float = Field(ge=0, alias="avp.cost_usd")
239
+ cost_source: Literal["computed", "reported", "unknown"] | None = Field(
240
+ default=None, alias="avp.cost.source"
241
+ )
242
+ refusal_category: str | None = Field(default=None, alias="avp.refusal.category")
243
+
244
+
245
+ class ToolInvokedData(_SpanData):
246
+ step: int = Field(ge=0, alias="avp.step")
247
+ tool_call_id: str = Field(min_length=1, alias="avp.tool.call_id")
248
+ tool_name: str = Field(alias="avp.tool.name")
249
+ tool_input: dict[str, Any] = Field(alias="avp.tool.input")
250
+ tool_dispatch_target: Literal["mcp_server", "local"] | None = Field(
251
+ default=None, alias="avp.tool.dispatch_target"
252
+ )
253
+
254
+
255
+ class ToolReturnedData(_SpanData):
256
+ """Tool result sent back to the model.
257
+
258
+ `avp.tool_result` is a `content.ToolResultBlock` carrying
259
+ `tool_use_id`, `content` (string or nested text/image/document
260
+ blocks), and `is_error`. Rejections set `is_error=True` with the
261
+ reason in `content[0].text`. During reconstruction this block
262
+ becomes one entry of the next user-role message's content array.
263
+ """
264
+
265
+ step: int = Field(ge=0, alias="avp.step")
266
+ tool_call_id: str = Field(min_length=1, alias="avp.tool.call_id")
267
+ tool_name: str = Field(alias="avp.tool.name")
268
+ duration_ms: int = Field(ge=0, alias="avp.duration_ms")
269
+ tool_result: ToolResultBlock = Field(alias="avp.tool_result")
270
+
271
+
272
+ class SubagentInvokedData(_SpanData):
273
+ """Parent agent delegates to a declared subagent.
274
+
275
+ The event's `span_id` IS the subagent's frame span. Events emitted by
276
+ the subagent's sub-loop set `parent_span_id` to this frame (or chain
277
+ through descendants of it), so the trajectory reconstructs as a nested
278
+ tree. The subagent's declared name surfaces on `avp.subagent.name`;
279
+ the event type itself signals an `invoke_agent`-style operation, so no
280
+ separate operation-name field is carried on the wire.
281
+
282
+ `avp.subagent.run_id` is reserved for future use when the subagent runs
283
+ as a separate commissioned trajectory. Absent for in-process subagents.
284
+ """
285
+
286
+ step: int = Field(ge=0, alias="avp.step")
287
+ subagent_name: str = Field(alias="avp.subagent.name")
288
+ subagent_description: str | None = Field(default=None, alias="avp.subagent.description")
289
+ subagent_invocation_id: str = Field(min_length=1, alias="avp.subagent.invocation_id")
290
+ subagent_input: dict[str, Any] = Field(alias="avp.subagent.input")
291
+ subagent_run_id: str | None = Field(default=None, min_length=1, alias="avp.subagent.run_id")
292
+
293
+
294
+ class SubagentReturnedData(_SpanData):
295
+ """Closes the subagent's frame. `span_id` matches the corresponding
296
+ `subagent_invoked` event so consumers can pair them.
297
+
298
+ `avp.subagent.reason` is a `StopReason`; on the error path,
299
+ `reason = error` and `avp.subagent.result.text` carries the error
300
+ string. The paired `tool_returned` mirrors this: `is_error = true`
301
+ when `reason = error`, with the same `Error: ...` content.
302
+
303
+ `avp.subagent.usage` is OPTIONAL and intended only for the in-process
304
+ fallback: parent agents whose SDK black-boxes the child loop (no
305
+ per-turn AssistantMessages exposed to the parent) carry the child's
306
+ totals here as the only signal the supervisor receives of the child's
307
+ spend. Agents that emit the child's per-turn events into the parent's
308
+ trajectory with proper span parentage (`parent_span_id` = this event's
309
+ `span_id`) MUST omit this field; the supervisor reconstructs from the
310
+ raw stream. Managed subagents (separate `run_id`, separate trajectory)
311
+ MUST also omit it; the supervisor reads the child's trajectory.
312
+ """
313
+
314
+ step: int = Field(ge=0, alias="avp.step")
315
+ subagent_name: str = Field(alias="avp.subagent.name")
316
+ subagent_invocation_id: str = Field(min_length=1, alias="avp.subagent.invocation_id")
317
+ duration_ms: int = Field(ge=0, alias="avp.duration_ms")
318
+ subagent_result_text: str = Field(alias="avp.subagent.result.text")
319
+ subagent_result_structured: Any | None = Field(
320
+ default=None, alias="avp.subagent.result.structured"
321
+ )
322
+ subagent_reason: StopReason = Field(alias="avp.subagent.reason")
323
+ subagent_usage: SubagentUsage | None = Field(default=None, alias="avp.subagent.usage")
324
+
325
+
326
+ class ErrorOccurredData(_SpanData):
327
+ error_code: ErrorCode = Field(alias="avp.error.code")
328
+ error_message: str = Field(alias="avp.error.message")
329
+
330
+
331
+ # ── CloudEvents 1.0 envelope (event types) ────────────────────────────────────
332
+ #
333
+ # Each event is a CloudEvent. `type` discriminates the union. `source` is the
334
+ # producer URI. `subject` carries the run_id. `data` carries the typed payload.
335
+
336
+
337
+ class RunRequestedEvent(_CloudEventBase):
338
+ """First event of the trajectory. The agent is the sole producer on the
339
+ wire (spec §8 conformance #1), so `source` is `avp://agent`. Supervisor
340
+ attribution, when a Commission is in use, lives inside `data` as
341
+ `avp.supervisor.*` plus the full `avp.commission` snapshot — that's what
342
+ makes the trajectory self-contained for audit without resort to the
343
+ envelope's `source` field.
344
+ """
345
+
346
+ type: Literal["avp.run_requested"] = T_RUN_REQUESTED
347
+ source: Literal["avp://agent"] = SOURCE_AGENT
348
+ data: RunRequestedData
349
+
350
+
351
+ class AgentDescribedEvent(_CloudEventBase):
352
+ """Second event of the trajectory. The agent's "whoami":
353
+ self-published manifest of everything triggerable without supervisor
354
+ configuration. Carries the same JSON `<agent> describe` prints to
355
+ stdout for this agent build.
356
+ """
357
+
358
+ type: Literal["avp.agent_described"] = T_AGENT_DESCRIBED
359
+ source: Literal["avp://agent"] = SOURCE_AGENT
360
+ data: AgentDescribedData
361
+
362
+
363
+ class AgentStartedEvent(_CloudEventBase):
364
+ type: Literal["avp.agent_started"] = T_AGENT_STARTED
365
+ source: Literal["avp://agent"] = SOURCE_AGENT
366
+ data: AgentStartedData
367
+
368
+
369
+ class AgentStoppedEvent(_CloudEventBase):
370
+ type: Literal["avp.agent_stopped"] = T_AGENT_STOPPED
371
+ source: Literal["avp://agent"] = SOURCE_AGENT
372
+ data: AgentStoppedData
373
+
374
+
375
+ class AssistantMessageEvent(_CloudEventBase):
376
+ type: Literal["avp.assistant_message"] = T_ASSISTANT_MESSAGE
377
+ source: Literal["avp://agent"] = SOURCE_AGENT
378
+ data: AssistantMessageData
379
+
380
+
381
+ class ToolInvokedEvent(_CloudEventBase):
382
+ type: Literal["avp.tool_invoked"] = T_TOOL_INVOKED
383
+ source: Literal["avp://agent"] = SOURCE_AGENT
384
+ data: ToolInvokedData
385
+
386
+
387
+ class ToolReturnedEvent(_CloudEventBase):
388
+ type: Literal["avp.tool_returned"] = T_TOOL_RETURNED
389
+ source: Literal["avp://agent"] = SOURCE_AGENT
390
+ data: ToolReturnedData
391
+
392
+
393
+ class SubagentInvokedEvent(_CloudEventBase):
394
+ type: Literal["avp.subagent_invoked"] = T_SUBAGENT_INVOKED
395
+ source: Literal["avp://agent"] = SOURCE_AGENT
396
+ data: SubagentInvokedData
397
+
398
+
399
+ class SubagentReturnedEvent(_CloudEventBase):
400
+ type: Literal["avp.subagent_returned"] = T_SUBAGENT_RETURNED
401
+ source: Literal["avp://agent"] = SOURCE_AGENT
402
+ data: SubagentReturnedData
403
+
404
+
405
+ class ErrorOccurredEvent(_CloudEventBase):
406
+ type: Literal["avp.error_occurred"] = T_ERROR_OCCURRED
407
+ source: Literal["avp://agent"] = SOURCE_AGENT
408
+ data: ErrorOccurredData
409
+
410
+
411
+ class UnknownEvent(_CloudEventBase):
412
+ """Catch-all for CloudEvents whose `type` is not in the AVP-defined
413
+ union. Validates the CloudEvents 1.0 envelope plus the AVP span
414
+ triple on `data`; the rest of `data` is free-form. Consumers MUST
415
+ pass through unknown event types without error (spec §4), so any
416
+ well-formed CloudEvent — forward-compat AVP additions, vendor-
417
+ namespaced events under `acme.*`, etc. — round-trips through here.
418
+
419
+ `id` and `time` are re-declared without `default_factory` so missing
420
+ envelope fields error rather than silently getting fabricated values.
421
+ """
422
+
423
+ id: str = Field(min_length=1)
424
+ time: str = Field(min_length=1)
425
+ source: str = Field(min_length=1)
426
+ type: str = Field(min_length=1)
427
+ data: _SpanData
428
+
429
+
430
+ # ── Discriminated unions ──────────────────────────────────────────────────────
431
+
432
+
433
+ _AGENT_EVENT_TYPES = (
434
+ RunRequestedEvent,
435
+ AgentDescribedEvent,
436
+ AgentStartedEvent,
437
+ AgentStoppedEvent,
438
+ AssistantMessageEvent,
439
+ ToolInvokedEvent,
440
+ ToolReturnedEvent,
441
+ SubagentInvokedEvent,
442
+ SubagentReturnedEvent,
443
+ ErrorOccurredEvent,
444
+ )
445
+
446
+ Event = Annotated[
447
+ RunRequestedEvent
448
+ | AgentDescribedEvent
449
+ | AgentStartedEvent
450
+ | AgentStoppedEvent
451
+ | AssistantMessageEvent
452
+ | ToolInvokedEvent
453
+ | ToolReturnedEvent
454
+ | SubagentInvokedEvent
455
+ | SubagentReturnedEvent
456
+ | ErrorOccurredEvent,
457
+ Field(discriminator="type"),
458
+ ]
459
+
460
+
461
+ def parse_event(payload: dict[str, Any]) -> Event | UnknownEvent:
462
+ """Parse an agent-emitted event payload.
463
+
464
+ Known types validate via the `Event` discriminated-union TypeAdapter.
465
+ Unknown types validate as `UnknownEvent`: envelope + span triple are
466
+ enforced; the rest of `data` is opaque. Per spec/v0.1/README.md §4,
467
+ consumers MUST pass through unknown types without error.
468
+ """
469
+ _adapter = TypeAdapter(Event)
470
+ try:
471
+ return _adapter.validate_python(payload)
472
+ except ValidationError as e:
473
+ if any(err.get("type") == "union_tag_invalid" for err in e.errors()):
474
+ return UnknownEvent.model_validate(payload)
475
+ raise
476
+
477
+
478
+ def event_to_wire(event: BaseModel) -> dict[str, Any]:
479
+ """Serialize an event Pydantic model to the wire-form dict.
480
+
481
+ Always uses aliases (the dotted forms like `avp.usage.input_tokens`)
482
+ so the output is what consumers see on the wire.
483
+ """
484
+ return event.model_dump(by_alias=True, exclude_none=True, mode="json")
485
+
486
+
487
+ __all__ = [
488
+ "SOURCE_AGENT",
489
+ "T_AGENT_DESCRIBED",
490
+ "T_AGENT_STARTED",
491
+ "T_AGENT_STOPPED",
492
+ "T_ASSISTANT_MESSAGE",
493
+ "T_ERROR_OCCURRED",
494
+ "T_RUN_REQUESTED",
495
+ "T_SUBAGENT_INVOKED",
496
+ "T_SUBAGENT_RETURNED",
497
+ "T_TOOL_INVOKED",
498
+ "T_TOOL_RETURNED",
499
+ "ZERO_SPAN_ID",
500
+ "AgentDescribedData",
501
+ "AgentDescribedEvent",
502
+ "AgentStartedData",
503
+ "AgentStartedEvent",
504
+ "AgentStoppedData",
505
+ "AgentStoppedEvent",
506
+ "AssistantMessageData",
507
+ "AssistantMessageEvent",
508
+ "ErrorOccurredData",
509
+ "ErrorOccurredEvent",
510
+ "Event",
511
+ "RunRequestedData",
512
+ "RunRequestedEvent",
513
+ "SubagentInvokedData",
514
+ "SubagentInvokedEvent",
515
+ "SubagentReturnedData",
516
+ "SubagentReturnedEvent",
517
+ "SubagentUsage",
518
+ "ToolInvokedData",
519
+ "ToolInvokedEvent",
520
+ "ToolReturnedData",
521
+ "ToolReturnedEvent",
522
+ "UnknownEvent",
523
+ "Usage",
524
+ "event_to_wire",
525
+ "new_event_id",
526
+ "new_span_id",
527
+ "new_trace_id",
528
+ "now_iso",
529
+ "parse_event",
530
+ ]
avp_cli/__init__.py ADDED
@@ -0,0 +1,82 @@
1
+ """avp_cli — the local AVP CLI (`avp`): build, run, and iterate on Commissions.
2
+
3
+ An eval is a JSON config file (no user code); the CLI is the engine. It loads a
4
+ config, composes a Commission per setup, runs each against a real agent (Goose /
5
+ Claude Code) via the agent's `run --commission --out` manifest contract, scores
6
+ each run, and ranks a board by accuracy / pass-rate / cost / turns.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from avp_cli.agent import load_manifest, run_agent
12
+ from avp_cli.agents import ResolvedAgent, known_agents, preflight, resolve_agent
13
+ from avp_cli.config import EvalConfigError, eval_from_dict, load_eval
14
+ from avp_cli.eval.dataset import Dataset, Item
15
+ from avp_cli.eval.engine import (
16
+ Board,
17
+ Eval,
18
+ RunObserver,
19
+ RunResult,
20
+ SetupRow,
21
+ extract_final_output,
22
+ run_eval,
23
+ run_matrix,
24
+ )
25
+ from avp_cli.eval.report import (
26
+ board_table,
27
+ board_to_dict,
28
+ comparison_table,
29
+ dump_json,
30
+ failures,
31
+ )
32
+ from avp_cli.eval.scoring import (
33
+ ExactMatchScorer,
34
+ FidelityScorer,
35
+ FinalOutput,
36
+ LLMJudgeScorer,
37
+ Score,
38
+ Scorer,
39
+ StructuralMatchScorer,
40
+ )
41
+ from avp_cli.eval.setup import Setup
42
+ from avp_cli.observability import Summary, ToolUsage, render, summarize, tool_tally
43
+
44
+ __all__ = [
45
+ "Board",
46
+ "Dataset",
47
+ "Eval",
48
+ "EvalConfigError",
49
+ "ExactMatchScorer",
50
+ "FidelityScorer",
51
+ "FinalOutput",
52
+ "Item",
53
+ "LLMJudgeScorer",
54
+ "ResolvedAgent",
55
+ "RunObserver",
56
+ "RunResult",
57
+ "Score",
58
+ "Scorer",
59
+ "Setup",
60
+ "SetupRow",
61
+ "StructuralMatchScorer",
62
+ "Summary",
63
+ "ToolUsage",
64
+ "board_table",
65
+ "board_to_dict",
66
+ "comparison_table",
67
+ "dump_json",
68
+ "eval_from_dict",
69
+ "extract_final_output",
70
+ "failures",
71
+ "known_agents",
72
+ "load_eval",
73
+ "load_manifest",
74
+ "preflight",
75
+ "render",
76
+ "resolve_agent",
77
+ "run_agent",
78
+ "run_eval",
79
+ "run_matrix",
80
+ "summarize",
81
+ "tool_tally",
82
+ ]