agent-runtime-kit 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_runtime_kit-0.1.0/.gitignore +16 -0
- agent_runtime_kit-0.1.0/.planning/PROJECT.md +140 -0
- agent_runtime_kit-0.1.0/.planning/REQUIREMENTS.md +196 -0
- agent_runtime_kit-0.1.0/.planning/ROADMAP.md +124 -0
- agent_runtime_kit-0.1.0/.planning/STATE.md +100 -0
- agent_runtime_kit-0.1.0/.planning/config.json +68 -0
- agent_runtime_kit-0.1.0/.planning/phases/01-core-runtime-skeleton/01-CONTEXT.md +71 -0
- agent_runtime_kit-0.1.0/.planning/phases/01-core-runtime-skeleton/01-PLAN.md +19 -0
- agent_runtime_kit-0.1.0/.planning/phases/01-core-runtime-skeleton/01-SUMMARY.md +12 -0
- agent_runtime_kit-0.1.0/.planning/phases/01-core-runtime-skeleton/01-VERIFICATION.md +17 -0
- agent_runtime_kit-0.1.0/.planning/phases/02-events-and-test-harness/02-CONTEXT.md +64 -0
- agent_runtime_kit-0.1.0/.planning/phases/02-events-and-test-harness/02-PLAN.md +20 -0
- agent_runtime_kit-0.1.0/.planning/phases/02-events-and-test-harness/02-SUMMARY.md +11 -0
- agent_runtime_kit-0.1.0/.planning/phases/02-events-and-test-harness/02-VERIFICATION.md +18 -0
- agent_runtime_kit-0.1.0/.planning/phases/03-claude-and-codex-runtimes/03-CONTEXT.md +65 -0
- agent_runtime_kit-0.1.0/.planning/phases/03-claude-and-codex-runtimes/03-PLAN.md +21 -0
- agent_runtime_kit-0.1.0/.planning/phases/03-claude-and-codex-runtimes/03-SUMMARY.md +12 -0
- agent_runtime_kit-0.1.0/.planning/phases/03-claude-and-codex-runtimes/03-VERIFICATION.md +18 -0
- agent_runtime_kit-0.1.0/.planning/phases/04-antigravity-and-cross-runtime-proof/04-CONTEXT.md +62 -0
- agent_runtime_kit-0.1.0/.planning/phases/04-antigravity-and-cross-runtime-proof/04-PLAN.md +21 -0
- agent_runtime_kit-0.1.0/.planning/phases/04-antigravity-and-cross-runtime-proof/04-SUMMARY.md +10 -0
- agent_runtime_kit-0.1.0/.planning/phases/04-antigravity-and-cross-runtime-proof/04-VERIFICATION.md +18 -0
- agent_runtime_kit-0.1.0/.planning/phases/05-public-release-readiness/05-CONTEXT.md +61 -0
- agent_runtime_kit-0.1.0/.planning/phases/05-public-release-readiness/05-PLAN.md +19 -0
- agent_runtime_kit-0.1.0/.planning/phases/05-public-release-readiness/05-SUMMARY.md +10 -0
- agent_runtime_kit-0.1.0/.planning/phases/05-public-release-readiness/05-VERIFICATION.md +19 -0
- agent_runtime_kit-0.1.0/.planning/research/ARCHITECTURE.md +273 -0
- agent_runtime_kit-0.1.0/.planning/research/FEATURES.md +147 -0
- agent_runtime_kit-0.1.0/.planning/research/PITFALLS.md +233 -0
- agent_runtime_kit-0.1.0/.planning/research/STACK.md +115 -0
- agent_runtime_kit-0.1.0/.planning/research/SUMMARY.md +191 -0
- agent_runtime_kit-0.1.0/.python-version +1 -0
- agent_runtime_kit-0.1.0/AGENTS.md +190 -0
- agent_runtime_kit-0.1.0/LICENSE +21 -0
- agent_runtime_kit-0.1.0/PKG-INFO +118 -0
- agent_runtime_kit-0.1.0/README.md +85 -0
- agent_runtime_kit-0.1.0/docs/capability-matrix.md +19 -0
- agent_runtime_kit-0.1.0/docs/live-smoke.md +27 -0
- agent_runtime_kit-0.1.0/docs/mestre-migration.md +25 -0
- agent_runtime_kit-0.1.0/docs/providers.md +26 -0
- agent_runtime_kit-0.1.0/docs/publish-checklist.md +48 -0
- agent_runtime_kit-0.1.0/docs/quickstart.md +34 -0
- agent_runtime_kit-0.1.0/examples/run_same_task.py +29 -0
- agent_runtime_kit-0.1.0/pyproject.toml +70 -0
- agent_runtime_kit-0.1.0/src/agent_runtime_kit/__init__.py +72 -0
- agent_runtime_kit-0.1.0/src/agent_runtime_kit/_errors.py +34 -0
- agent_runtime_kit-0.1.0/src/agent_runtime_kit/_runtime.py +139 -0
- agent_runtime_kit-0.1.0/src/agent_runtime_kit/_types.py +251 -0
- agent_runtime_kit-0.1.0/src/agent_runtime_kit/adapters/__init__.py +26 -0
- agent_runtime_kit-0.1.0/src/agent_runtime_kit/adapters/_common.py +123 -0
- agent_runtime_kit-0.1.0/src/agent_runtime_kit/adapters/antigravity.py +379 -0
- agent_runtime_kit-0.1.0/src/agent_runtime_kit/adapters/claude.py +302 -0
- agent_runtime_kit-0.1.0/src/agent_runtime_kit/adapters/codex.py +298 -0
- agent_runtime_kit-0.1.0/src/agent_runtime_kit/adapters/diagnostics.py +18 -0
- agent_runtime_kit-0.1.0/src/agent_runtime_kit/events.py +224 -0
- agent_runtime_kit-0.1.0/src/agent_runtime_kit/py.typed +1 -0
- agent_runtime_kit-0.1.0/src/agent_runtime_kit/registry.py +83 -0
- agent_runtime_kit-0.1.0/src/agent_runtime_kit/testing/__init__.py +15 -0
- agent_runtime_kit-0.1.0/src/agent_runtime_kit/testing/fakes.py +164 -0
- agent_runtime_kit-0.1.0/tests/test_antigravity_adapter.py +182 -0
- agent_runtime_kit-0.1.0/tests/test_claude_adapter.py +81 -0
- agent_runtime_kit-0.1.0/tests/test_codex_adapter.py +125 -0
- agent_runtime_kit-0.1.0/tests/test_core.py +53 -0
- agent_runtime_kit-0.1.0/tests/test_events.py +85 -0
- agent_runtime_kit-0.1.0/tests/test_live_smoke.py +53 -0
- agent_runtime_kit-0.1.0/tests/test_mestre_compatibility.py +67 -0
- agent_runtime_kit-0.1.0/tests/test_optional_dependencies.py +12 -0
- agent_runtime_kit-0.1.0/tests/test_provider_diagnostics.py +14 -0
- agent_runtime_kit-0.1.0/uv.lock +1722 -0
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
# agent-runtime-kit
|
|
2
|
+
|
|
3
|
+
## What This Is
|
|
4
|
+
|
|
5
|
+
`agent-runtime-kit` is a Python package that gives developers one clean API for running agentic
|
|
6
|
+
coding tasks through Claude Agent SDK, OpenAI Codex SDK, and Google
|
|
7
|
+
Antigravity SDK. It extracts the useful vendor-runtime ideas from Mestre while
|
|
8
|
+
remaining independently useful to the community: install it, choose a runtime,
|
|
9
|
+
run a task, stream/inspect events, and get a typed result back.
|
|
10
|
+
|
|
11
|
+
The package is not a new orchestrator or model router. It is the reusable
|
|
12
|
+
runtime layer that makes vendor agent SDKs feel consistent without hiding the
|
|
13
|
+
capabilities and constraints that make each SDK different.
|
|
14
|
+
|
|
15
|
+
## Core Value
|
|
16
|
+
|
|
17
|
+
Developers can run the same agentic task through Claude, Codex, or Antigravity
|
|
18
|
+
using one small, typed Python API while preserving the vendor-specific
|
|
19
|
+
capabilities needed for real work.
|
|
20
|
+
|
|
21
|
+
## Requirements
|
|
22
|
+
|
|
23
|
+
### Validated
|
|
24
|
+
|
|
25
|
+
(None yet - ship to validate)
|
|
26
|
+
|
|
27
|
+
### Active
|
|
28
|
+
|
|
29
|
+
- [ ] Publish the package as `agent-runtime-kit` on PyPI.
|
|
30
|
+
- [ ] Support Python 3.10+ so the package is broadly usable and aligns with
|
|
31
|
+
current vendor SDK lower bounds.
|
|
32
|
+
- [ ] Provide a clean public API that does not expose Mestre internals.
|
|
33
|
+
- [ ] Keep a low-friction Mestre adoption path through compatibility adapters
|
|
34
|
+
or migration helpers.
|
|
35
|
+
- [ ] Provide a shared runtime contract for agentic work: task input, runtime
|
|
36
|
+
capability metadata, event emission, cancellation, session/resume handles,
|
|
37
|
+
tool-call audit records, structured output, artifacts, cost/usage
|
|
38
|
+
metadata, and typed results.
|
|
39
|
+
- [ ] Implement runnable adapters for Claude Agent SDK, OpenAI Codex SDK, and
|
|
40
|
+
Google Antigravity SDK in the first public release.
|
|
41
|
+
- [ ] Make vendor dependencies optional, with extras such as `claude`, `codex`,
|
|
42
|
+
`antigravity`, and `all`.
|
|
43
|
+
- [ ] Surface vendor capability differences explicitly instead of pretending
|
|
44
|
+
all SDKs support the same features.
|
|
45
|
+
- [ ] Include examples that run the same task through all three runtimes.
|
|
46
|
+
- [ ] Include unit tests with fake SDK surfaces and at least smoke-test paths
|
|
47
|
+
that prove adapter construction and invocation behavior.
|
|
48
|
+
- [ ] Document authentication, permissions, working-directory behavior,
|
|
49
|
+
structured output behavior, MCP support, session behavior, and known
|
|
50
|
+
vendor limitations.
|
|
51
|
+
|
|
52
|
+
### Out of Scope
|
|
53
|
+
|
|
54
|
+
- Full Mestre orchestration, routing, fallback, benchmarking, optimization, and
|
|
55
|
+
self-improvement loops - the package should be useful without becoming
|
|
56
|
+
Mestre.
|
|
57
|
+
- Generic chat/completions abstraction - this package targets agentic SDKs that
|
|
58
|
+
own tool loops and local/runtime context.
|
|
59
|
+
- Scraping or reusing unsupported local account credentials - use each vendor's
|
|
60
|
+
supported authentication path.
|
|
61
|
+
- A hosted service, UI, queue, control plane, or remote execution platform.
|
|
62
|
+
- Non-Python SDKs for the initial release.
|
|
63
|
+
- Hiding vendor differences behind lowest-common-denominator behavior.
|
|
64
|
+
|
|
65
|
+
## Context
|
|
66
|
+
|
|
67
|
+
The project starts from Mestre's live vendor-lane implementation in
|
|
68
|
+
`~/Github/mestre`, especially:
|
|
69
|
+
|
|
70
|
+
- `mestre/vendor_lane/agent_protocol.py` - existing typed contract for
|
|
71
|
+
`AgentTask`, `AgentResult`, capabilities, session resume state, MCP config,
|
|
72
|
+
event sinks, and tool-call audits.
|
|
73
|
+
- `mestre/vendor_lane/events.py` - canonical task/tool/output/vendor-turn event
|
|
74
|
+
vocabulary.
|
|
75
|
+
- `mestre/vendor_lane/backends/claude_sdk.py` - Claude Agent SDK adapter logic.
|
|
76
|
+
- `mestre/vendor_lane/backends/codex_sdk.py` - Codex SDK adapter logic.
|
|
77
|
+
- `mestre/vendor_lane/backends/antigravity_sdk.py` - Antigravity SDK adapter
|
|
78
|
+
logic.
|
|
79
|
+
- `mestre/execution/agent/registry.py` - runtime registry pattern.
|
|
80
|
+
- `mestre/llm/policy.py` - boundary to avoid over-extracting full routing
|
|
81
|
+
policy into this package.
|
|
82
|
+
|
|
83
|
+
Official vendor docs checked during initialization:
|
|
84
|
+
|
|
85
|
+
- Claude Agent SDK: https://docs.anthropic.com/en/docs/claude-code/sdk
|
|
86
|
+
- Codex SDK: https://developers.openai.com/codex/sdk
|
|
87
|
+
- Google Antigravity SDK:
|
|
88
|
+
https://github.com/google-antigravity/antigravity-sdk-python
|
|
89
|
+
|
|
90
|
+
The current PyPI name check found `agent-runtime-kit` available on
|
|
91
|
+
2026-06-10. This availability must be rechecked immediately before publishing.
|
|
92
|
+
|
|
93
|
+
## Constraints
|
|
94
|
+
|
|
95
|
+
- **Language**: Python package first - Mestre and all three target vendor SDK
|
|
96
|
+
integrations are Python-facing for this work.
|
|
97
|
+
- **Python version**: Python 3.10+ - broad community compatibility matters more
|
|
98
|
+
than matching Mestre's current Python 3.14-only project constraint.
|
|
99
|
+
- **Package name**: Use `agent-runtime-kit` unless a later publishing check
|
|
100
|
+
shows the name is no longer available.
|
|
101
|
+
- **Vendor support**: Claude, Codex, and Antigravity must all be runnable in
|
|
102
|
+
v1; partial provider stubs are not enough for a useful community release.
|
|
103
|
+
- **Dependency model**: Vendor SDKs should be optional extras so users can
|
|
104
|
+
install only the runtimes they need.
|
|
105
|
+
- **Architecture**: Extract the runtime/adapters layer from Mestre, not the full
|
|
106
|
+
orchestration and routing system.
|
|
107
|
+
- **API design**: Prefer a clean public API, but keep compatibility adapters or
|
|
108
|
+
migration helpers so Mestre can adopt the package without excessive churn.
|
|
109
|
+
- **Authentication**: Stay within supported vendor SDK authentication
|
|
110
|
+
mechanisms; do not build brittle local credential scraping into the core.
|
|
111
|
+
|
|
112
|
+
## Key Decisions
|
|
113
|
+
|
|
114
|
+
| Decision | Rationale | Outcome |
|
|
115
|
+
|----------|-----------|---------|
|
|
116
|
+
| Publish as `agent-runtime-kit` | The name was available on PyPI during initialization and accurately describes a runtime/adapters library. | Pending |
|
|
117
|
+
| Target Python 3.10+ | Vendor SDK docs support Python 3.10+, and community adoption is more important than mirroring Mestre's Python 3.14 baseline. | Pending |
|
|
118
|
+
| Make all three runtimes runnable in v1 | Community usefulness depends on a real multi-vendor proof, not a single polished adapter plus placeholders. | Pending |
|
|
119
|
+
| Use a clean public API with Mestre migration support | The package should stand on its own while still making future Mestre adoption practical. | Pending |
|
|
120
|
+
| Extract runtime/adapters, not full orchestration | Mestre's routing, fallback, benchmarking, and self-improvement loops would make the package too broad for a first release. | Pending |
|
|
121
|
+
|
|
122
|
+
## Evolution
|
|
123
|
+
|
|
124
|
+
This document evolves at phase transitions and milestone boundaries.
|
|
125
|
+
|
|
126
|
+
**After each phase transition** (via `$gsd-transition`):
|
|
127
|
+
1. Requirements invalidated? -> Move to Out of Scope with reason
|
|
128
|
+
2. Requirements validated? -> Move to Validated with phase reference
|
|
129
|
+
3. New requirements emerged? -> Add to Active
|
|
130
|
+
4. Decisions to log? -> Add to Key Decisions
|
|
131
|
+
5. "What This Is" still accurate? -> Update if drifted
|
|
132
|
+
|
|
133
|
+
**After each milestone** (via `$gsd-complete-milestone`):
|
|
134
|
+
1. Full review of all sections
|
|
135
|
+
2. Core Value check - still the right priority?
|
|
136
|
+
3. Audit Out of Scope - reasons still valid?
|
|
137
|
+
4. Update Context with current state
|
|
138
|
+
|
|
139
|
+
---
|
|
140
|
+
*Last updated: 2026-06-10 after initialization*
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
# Requirements: agent-runtime-kit
|
|
2
|
+
|
|
3
|
+
**Defined:** 2026-06-10
|
|
4
|
+
**Core Value:** Developers can run the same agentic task through Claude, Codex,
|
|
5
|
+
or Antigravity using one small, typed Python API while preserving the
|
|
6
|
+
vendor-specific capabilities needed for real work.
|
|
7
|
+
|
|
8
|
+
## v1 Requirements
|
|
9
|
+
|
|
10
|
+
Requirements for the first public release of `agent-runtime-kit`. Each maps to
|
|
11
|
+
roadmap phases.
|
|
12
|
+
|
|
13
|
+
### Packaging
|
|
14
|
+
|
|
15
|
+
- [ ] **PKG-01**: Developer can install the core package from PyPI as
|
|
16
|
+
`agent-runtime-kit` without installing any vendor SDK.
|
|
17
|
+
- [ ] **PKG-02**: Developer can install vendor-specific extras for `claude`,
|
|
18
|
+
`codex`, `antigravity`, and `all`.
|
|
19
|
+
- [ ] **PKG-03**: Package metadata declares Python 3.10+ support.
|
|
20
|
+
- [ ] **PKG-04**: Core package import succeeds when no vendor extras are
|
|
21
|
+
installed.
|
|
22
|
+
- [ ] **PKG-05**: Package publishing checklist includes a fresh PyPI name
|
|
23
|
+
availability check for `agent-runtime-kit`.
|
|
24
|
+
|
|
25
|
+
### Core Runtime API
|
|
26
|
+
|
|
27
|
+
- [ ] **CORE-01**: Developer can create a typed `AgentTask` with goal, optional
|
|
28
|
+
system prompt, working directory, permissions, session/resume data, metadata,
|
|
29
|
+
MCP server config, and output schema.
|
|
30
|
+
- [ ] **CORE-02**: Developer receives a typed `AgentResult` with output,
|
|
31
|
+
finish reason, error, session id, artifacts, tool-call audits, usage/cost
|
|
32
|
+
metadata, and parsed structured output when available.
|
|
33
|
+
- [ ] **CORE-03**: Developer can implement or use an async `AgentRuntime`
|
|
34
|
+
protocol with `run(task)` and `cancel(task_id)`.
|
|
35
|
+
- [ ] **CORE-04**: Developer can inspect each runtime's declared capabilities
|
|
36
|
+
before dispatching a task.
|
|
37
|
+
- [ ] **CORE-05**: Runtime rejects unsupported task inputs with clear typed
|
|
38
|
+
errors instead of silently dropping fields.
|
|
39
|
+
- [ ] **CORE-06**: Developer can register and resolve runtimes through a small
|
|
40
|
+
runtime registry.
|
|
41
|
+
- [ ] **CORE-07**: Runtime availability checks distinguish missing package,
|
|
42
|
+
missing credentials, unsupported model/runtime, and other setup failures.
|
|
43
|
+
|
|
44
|
+
### Events and Observability
|
|
45
|
+
|
|
46
|
+
- [ ] **EVNT-01**: Developer can attach an async event sink to receive
|
|
47
|
+
`agent.task.started` events.
|
|
48
|
+
- [ ] **EVNT-02**: Developer can receive `agent.task.completed` and
|
|
49
|
+
`agent.task.failed` events with normalized task/result metadata.
|
|
50
|
+
- [ ] **EVNT-03**: Developer can receive normalized output-delta events for
|
|
51
|
+
streamed text where a vendor exposes streaming.
|
|
52
|
+
- [ ] **EVNT-04**: Developer can receive normalized tool-requested and
|
|
53
|
+
tool-completed audit events where a vendor exposes tool activity.
|
|
54
|
+
- [ ] **EVNT-05**: Event payloads truncate or summarize sensitive/high-volume
|
|
55
|
+
data by default.
|
|
56
|
+
|
|
57
|
+
### Provider Adapters
|
|
58
|
+
|
|
59
|
+
- [ ] **ADPT-01**: Developer can run a task through Claude Agent SDK using the
|
|
60
|
+
shared runtime API.
|
|
61
|
+
- [ ] **ADPT-02**: Claude adapter supports working directory, permission mode,
|
|
62
|
+
MCP config, allowed/disallowed tools, session resume, structured output, and
|
|
63
|
+
clear missing-SDK/auth diagnostics where supported by the installed SDK.
|
|
64
|
+
- [ ] **ADPT-03**: Developer can run a task through OpenAI Codex SDK using the
|
|
65
|
+
shared runtime API.
|
|
66
|
+
- [ ] **ADPT-04**: Codex adapter supports local app-server/thread lifecycle,
|
|
67
|
+
working directory, approval/sandbox mapping, session resume, structured
|
|
68
|
+
output, and model availability diagnostics.
|
|
69
|
+
- [ ] **ADPT-05**: Developer can run a task through Google Antigravity SDK using
|
|
70
|
+
the shared runtime API.
|
|
71
|
+
- [ ] **ADPT-06**: Antigravity adapter supports API-key diagnostics,
|
|
72
|
+
working-directory/workspace mapping, permission/capability mapping, MCP
|
|
73
|
+
config, structured output, session id, and tool/event translation where
|
|
74
|
+
supported by the installed SDK.
|
|
75
|
+
- [ ] **ADPT-07**: All adapters preserve vendor-specific metadata needed for
|
|
76
|
+
debugging without making that metadata the primary public API.
|
|
77
|
+
|
|
78
|
+
### Testing and Quality
|
|
79
|
+
|
|
80
|
+
- [ ] **TEST-01**: Core tests pass without Claude, Codex, or Antigravity SDKs
|
|
81
|
+
installed.
|
|
82
|
+
- [ ] **TEST-02**: Fake SDK tests cover successful invocation, vendor errors,
|
|
83
|
+
missing dependency, unsupported task input, timeout, session id, structured
|
|
84
|
+
output, and event translation for each adapter.
|
|
85
|
+
- [ ] **TEST-03**: Type-check or static-analysis workflow validates the public
|
|
86
|
+
API surface.
|
|
87
|
+
- [ ] **TEST-04**: Ruff lint/format workflow passes for the package.
|
|
88
|
+
- [ ] **TEST-05**: Optional live smoke tests are documented and skipped unless
|
|
89
|
+
explicit credentials/runtime flags are present.
|
|
90
|
+
- [ ] **TEST-06**: Compatibility tests verify the public API can represent the
|
|
91
|
+
fields Mestre currently needs from its vendor-lane runtime contract.
|
|
92
|
+
|
|
93
|
+
### Documentation and Examples
|
|
94
|
+
|
|
95
|
+
- [ ] **DOCS-01**: README explains what the package is, what it is not, and how
|
|
96
|
+
it differs from vendor SDKs and full agent frameworks.
|
|
97
|
+
- [ ] **DOCS-02**: Quickstart shows installing the package and running one task
|
|
98
|
+
through one runtime.
|
|
99
|
+
- [ ] **DOCS-03**: Example shows the same task running through Claude, Codex,
|
|
100
|
+
and Antigravity with the shared API.
|
|
101
|
+
- [ ] **DOCS-04**: Provider setup docs cover auth, required extras, local
|
|
102
|
+
runtime requirements, and known limitations for each vendor.
|
|
103
|
+
- [ ] **DOCS-05**: Capability matrix documents MCP, working directory, session
|
|
104
|
+
resume, structured output, permissions, streaming, and tool-audit support for
|
|
105
|
+
each runtime.
|
|
106
|
+
- [ ] **DOCS-06**: Migration notes describe how Mestre can adopt
|
|
107
|
+
`agent-runtime-kit` without moving its routing, fallback, benchmarking, or
|
|
108
|
+
self-improvement layers into the package.
|
|
109
|
+
|
|
110
|
+
## v2 Requirements
|
|
111
|
+
|
|
112
|
+
Deferred to future release. Tracked but not in current roadmap.
|
|
113
|
+
|
|
114
|
+
### Integrations
|
|
115
|
+
|
|
116
|
+
- **INTG-01**: Package provides optional OpenTelemetry helper functions for
|
|
117
|
+
converting events into spans or span events.
|
|
118
|
+
- **INTG-02**: Package provides a first-class Mestre compatibility module if
|
|
119
|
+
the initial migration notes are not enough.
|
|
120
|
+
- **INTG-03**: Package maintains a generated provider compatibility/version
|
|
121
|
+
matrix.
|
|
122
|
+
|
|
123
|
+
### Additional Runtime Scope
|
|
124
|
+
|
|
125
|
+
- **RUNT-01**: Package evaluates whether direct chat/completions adapters
|
|
126
|
+
belong in a separate package or future major version.
|
|
127
|
+
- **RUNT-02**: Package evaluates support for additional agent SDKs after the
|
|
128
|
+
first three providers are stable.
|
|
129
|
+
- **RUNT-03**: Package evaluates a synchronous convenience wrapper over the
|
|
130
|
+
async core.
|
|
131
|
+
|
|
132
|
+
## Out of Scope
|
|
133
|
+
|
|
134
|
+
Explicitly excluded. Documented to prevent scope creep.
|
|
135
|
+
|
|
136
|
+
| Feature | Reason |
|
|
137
|
+
|---------|--------|
|
|
138
|
+
| Full model routing and fallback | Belongs in applications such as Mestre; too broad for a focused runtime SDK. |
|
|
139
|
+
| Benchmarking and self-optimization loops | Product-specific and not required for a useful public package. |
|
|
140
|
+
| Hosted service, queue, UI, or control plane | The first release is a local Python library. |
|
|
141
|
+
| Unsupported local credential scraping | Security-sensitive and outside supported vendor auth paths. |
|
|
142
|
+
| Non-Python SDKs | Python package first; revisit after v1 adoption. |
|
|
143
|
+
| Lowest-common-denominator provider abstraction | The package must preserve important vendor capability differences. |
|
|
144
|
+
| Mandatory live provider tests in default CI | Would require credentials, cost money, and create flaky contributor workflows. |
|
|
145
|
+
|
|
146
|
+
## Traceability
|
|
147
|
+
|
|
148
|
+
Which phases cover which requirements. Updated during roadmap creation.
|
|
149
|
+
|
|
150
|
+
| Requirement | Phase | Status |
|
|
151
|
+
|-------------|-------|--------|
|
|
152
|
+
| PKG-01 | Phase 5 | Ready (publish pending) |
|
|
153
|
+
| PKG-02 | Phase 2 | Complete |
|
|
154
|
+
| PKG-03 | Phase 1 | Complete |
|
|
155
|
+
| PKG-04 | Phase 1 | Complete |
|
|
156
|
+
| PKG-05 | Phase 5 | Complete |
|
|
157
|
+
| CORE-01 | Phase 1 | Complete |
|
|
158
|
+
| CORE-02 | Phase 1 | Complete |
|
|
159
|
+
| CORE-03 | Phase 1 | Complete |
|
|
160
|
+
| CORE-04 | Phase 1 | Complete |
|
|
161
|
+
| CORE-05 | Phase 1 | Complete |
|
|
162
|
+
| CORE-06 | Phase 1 | Complete |
|
|
163
|
+
| CORE-07 | Phase 1 | Complete |
|
|
164
|
+
| EVNT-01 | Phase 2 | Complete |
|
|
165
|
+
| EVNT-02 | Phase 2 | Complete |
|
|
166
|
+
| EVNT-03 | Phase 2 | Complete |
|
|
167
|
+
| EVNT-04 | Phase 2 | Complete |
|
|
168
|
+
| EVNT-05 | Phase 2 | Complete |
|
|
169
|
+
| ADPT-01 | Phase 3 | Complete |
|
|
170
|
+
| ADPT-02 | Phase 3 | Complete |
|
|
171
|
+
| ADPT-03 | Phase 3 | Complete |
|
|
172
|
+
| ADPT-04 | Phase 3 | Complete |
|
|
173
|
+
| ADPT-05 | Phase 4 | Complete |
|
|
174
|
+
| ADPT-06 | Phase 4 | Complete |
|
|
175
|
+
| ADPT-07 | Phase 4 | Complete |
|
|
176
|
+
| TEST-01 | Phase 2 | Complete |
|
|
177
|
+
| TEST-02 | Phase 2 | Complete |
|
|
178
|
+
| TEST-03 | Phase 1 | Complete |
|
|
179
|
+
| TEST-04 | Phase 1 | Complete |
|
|
180
|
+
| TEST-05 | Phase 5 | Complete |
|
|
181
|
+
| TEST-06 | Phase 4 | Complete |
|
|
182
|
+
| DOCS-01 | Phase 5 | Complete |
|
|
183
|
+
| DOCS-02 | Phase 3 | Complete |
|
|
184
|
+
| DOCS-03 | Phase 4 | Complete |
|
|
185
|
+
| DOCS-04 | Phase 5 | Complete |
|
|
186
|
+
| DOCS-05 | Phase 5 | Complete |
|
|
187
|
+
| DOCS-06 | Phase 5 | Complete |
|
|
188
|
+
|
|
189
|
+
**Coverage:**
|
|
190
|
+
- v1 requirements: 36 total
|
|
191
|
+
- Mapped to phases: 36
|
|
192
|
+
- Unmapped: 0
|
|
193
|
+
|
|
194
|
+
---
|
|
195
|
+
*Requirements defined: 2026-06-10*
|
|
196
|
+
*Last updated: 2026-06-10 after roadmap traceability mapping*
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
# Roadmap: agent-runtime-kit
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
The v1.0 milestone turns the repository into a publishable Python package
|
|
6
|
+
named `agent-runtime-kit`. The build path starts with a small
|
|
7
|
+
installable core and public runtime contract, adds the fake-SDK harness needed
|
|
8
|
+
to keep vendor drift under control, then delivers Claude/Codex adapters,
|
|
9
|
+
Antigravity plus cross-runtime proof, and finally release-ready docs,
|
|
10
|
+
packaging, and smoke-test guidance.
|
|
11
|
+
|
|
12
|
+
## Phases
|
|
13
|
+
|
|
14
|
+
**Phase Numbering:**
|
|
15
|
+
- Integer phases (1, 2, 3): Planned milestone work
|
|
16
|
+
- Decimal phases (2.1, 2.2): Urgent insertions (marked with INSERTED)
|
|
17
|
+
|
|
18
|
+
Decimal phases appear between their surrounding integers in numeric order.
|
|
19
|
+
|
|
20
|
+
- [x] **Phase 1: Core Runtime Skeleton** - Installable core package with public task/result/runtime contracts.
|
|
21
|
+
- [x] **Phase 2: Events and Test Harness** - Optional extras skeleton, event system, and fake SDK contract tests.
|
|
22
|
+
- [x] **Phase 3: Claude and Codex Runtimes** - First two real vendor adapters through the shared API.
|
|
23
|
+
- [x] **Phase 4: Antigravity and Cross-Runtime Proof** - Third adapter plus same-task multi-runtime proof and Mestre compatibility checks.
|
|
24
|
+
- [x] **Phase 5: Public Release Readiness** - Documentation, capability matrix, live-smoke guidance, and PyPI publish checklist.
|
|
25
|
+
|
|
26
|
+
## Phase Details
|
|
27
|
+
|
|
28
|
+
### Phase 1: Core Runtime Skeleton
|
|
29
|
+
**Goal**: A developer can install/import the core package locally and run a fake runtime through the public async API without any vendor SDK installed.
|
|
30
|
+
**Mode:** mvp
|
|
31
|
+
**Depends on**: Nothing (first phase)
|
|
32
|
+
**Requirements**: [PKG-03, PKG-04, CORE-01, CORE-02, CORE-03, CORE-04, CORE-05, CORE-06, CORE-07, TEST-03, TEST-04]
|
|
33
|
+
**Success Criteria** (what must be TRUE):
|
|
34
|
+
1. Developer can import `agent_runtime_kit` in an environment with no vendor SDKs installed.
|
|
35
|
+
2. Developer can create an `AgentTask`, execute a fake runtime, and receive an `AgentResult`.
|
|
36
|
+
3. Runtime capability checks and unsupported-feature errors are typed and covered by tests.
|
|
37
|
+
4. Ruff and static-analysis commands validate the initial public API.
|
|
38
|
+
**Plans**: 3 plans
|
|
39
|
+
|
|
40
|
+
Plans:
|
|
41
|
+
- [ ] 01-01: Package scaffolding and Python 3.10+ metadata
|
|
42
|
+
- [ ] 01-02: Public task/result/runtime/capability/error models
|
|
43
|
+
- [ ] 01-03: Registry, availability diagnostics, lint, and type/static checks
|
|
44
|
+
|
|
45
|
+
### Phase 2: Events and Test Harness
|
|
46
|
+
**Goal**: A developer can observe normalized fake-runtime events and the package has the fake SDK harness required to test real adapters without live credentials.
|
|
47
|
+
**Mode:** mvp
|
|
48
|
+
**Depends on**: Phase 1
|
|
49
|
+
**Requirements**: [PKG-02, EVNT-01, EVNT-02, EVNT-03, EVNT-04, EVNT-05, TEST-01, TEST-02]
|
|
50
|
+
**Success Criteria** (what must be TRUE):
|
|
51
|
+
1. Core tests pass without Claude, Codex, or Antigravity SDKs installed.
|
|
52
|
+
2. Developer can attach an event sink and receive started, completed, failed, output, tool, and vendor-turn events from a fake runtime.
|
|
53
|
+
3. Event payload defaults summarize or truncate high-volume fields.
|
|
54
|
+
4. Fake SDK fixtures can simulate success, failure, missing dependency, unsupported input, timeout, session id, structured output, and tool events.
|
|
55
|
+
**Plans**: 3 plans
|
|
56
|
+
|
|
57
|
+
Plans:
|
|
58
|
+
- [ ] 02-01: Optional extras skeleton and dependency isolation tests
|
|
59
|
+
- [ ] 02-02: Event vocabulary, event sink, and redaction/truncation defaults
|
|
60
|
+
- [ ] 02-03: Fake SDK harness and adapter contract test utilities
|
|
61
|
+
|
|
62
|
+
### Phase 3: Claude and Codex Runtimes
|
|
63
|
+
**Goal**: A developer can run real Claude and Codex agent tasks through the shared runtime API with clear diagnostics and provider-specific capability handling.
|
|
64
|
+
**Mode:** mvp
|
|
65
|
+
**Depends on**: Phase 2
|
|
66
|
+
**Requirements**: [ADPT-01, ADPT-02, ADPT-03, ADPT-04, DOCS-02]
|
|
67
|
+
**Success Criteria** (what must be TRUE):
|
|
68
|
+
1. Developer can install the Claude extra and run a Claude Agent SDK task through `agent-runtime-kit`.
|
|
69
|
+
2. Developer can install the Codex extra and run a Codex SDK task through `agent-runtime-kit`.
|
|
70
|
+
3. Claude and Codex adapters fail clearly for missing SDKs, missing setup, unsupported fields, and unsupported models.
|
|
71
|
+
4. The quickstart demonstrates one runtime end to end through the public API.
|
|
72
|
+
**Plans**: 3 plans
|
|
73
|
+
|
|
74
|
+
Plans:
|
|
75
|
+
- [ ] 03-01: Claude Agent SDK adapter and tests
|
|
76
|
+
- [ ] 03-02: Codex SDK adapter and tests
|
|
77
|
+
- [ ] 03-03: One-runtime quickstart and provider diagnostics docs
|
|
78
|
+
|
|
79
|
+
### Phase 4: Antigravity and Cross-Runtime Proof
|
|
80
|
+
**Goal**: A developer can run the same task through Claude, Codex, and Antigravity, and the public API can represent Mestre's current runtime needs.
|
|
81
|
+
**Mode:** mvp
|
|
82
|
+
**Depends on**: Phase 3
|
|
83
|
+
**Requirements**: [ADPT-05, ADPT-06, ADPT-07, TEST-06, DOCS-03]
|
|
84
|
+
**Success Criteria** (what must be TRUE):
|
|
85
|
+
1. Developer can install the Antigravity extra and run an Antigravity SDK task through `agent-runtime-kit`.
|
|
86
|
+
2. Antigravity adapter maps auth, workspace, permissions, MCP, structured output, sessions, and tool/event behavior where supported.
|
|
87
|
+
3. Same-task example runs through all three runtime kinds with one public API shape.
|
|
88
|
+
4. Compatibility tests prove the public API can represent the fields Mestre currently uses from its vendor-lane contract.
|
|
89
|
+
**Plans**: 3 plans
|
|
90
|
+
|
|
91
|
+
Plans:
|
|
92
|
+
- [ ] 04-01: Google Antigravity SDK adapter and tests
|
|
93
|
+
- [ ] 04-02: Same-task three-runtime example
|
|
94
|
+
- [ ] 04-03: Mestre compatibility field audit and tests
|
|
95
|
+
|
|
96
|
+
### Phase 5: Public Release Readiness
|
|
97
|
+
**Goal**: The package is ready for a first public PyPI release with documentation, capability matrix, optional live smoke tests, and a final publish checklist.
|
|
98
|
+
**Mode:** mvp
|
|
99
|
+
**Depends on**: Phase 4
|
|
100
|
+
**Requirements**: [PKG-01, PKG-05, TEST-05, DOCS-01, DOCS-04, DOCS-05, DOCS-06]
|
|
101
|
+
**Success Criteria** (what must be TRUE):
|
|
102
|
+
1. README and provider docs explain what the package is, what it is not, and how to configure each runtime.
|
|
103
|
+
2. Capability matrix documents MCP, working directory, sessions, structured output, permissions, streaming, and tool-audit behavior for each runtime.
|
|
104
|
+
3. Optional live smoke tests are documented and skipped unless explicit credentials/runtime flags are present.
|
|
105
|
+
4. PyPI publish checklist includes a fresh `agent-runtime-kit` name check and the package can be built for release.
|
|
106
|
+
**Plans**: 3 plans
|
|
107
|
+
|
|
108
|
+
Plans:
|
|
109
|
+
- [ ] 05-01: README, provider setup docs, and capability matrix
|
|
110
|
+
- [ ] 05-02: Optional live smoke test harness and documentation
|
|
111
|
+
- [ ] 05-03: Build, publish checklist, and Mestre migration notes
|
|
112
|
+
|
|
113
|
+
## Progress
|
|
114
|
+
|
|
115
|
+
**Execution Order:**
|
|
116
|
+
Phases execute in numeric order: 1 -> 2 -> 3 -> 4 -> 5
|
|
117
|
+
|
|
118
|
+
| Phase | Plans Complete | Status | Completed |
|
|
119
|
+
|-------|----------------|--------|-----------|
|
|
120
|
+
| 1. Core Runtime Skeleton | 3/3 | Complete | 2026-06-10 |
|
|
121
|
+
| 2. Events and Test Harness | 3/3 | Complete | 2026-06-10 |
|
|
122
|
+
| 3. Claude and Codex Runtimes | 3/3 | Complete | 2026-06-10 |
|
|
123
|
+
| 4. Antigravity and Cross-Runtime Proof | 3/3 | Complete | 2026-06-10 |
|
|
124
|
+
| 5. Public Release Readiness | 3/3 | Complete | 2026-06-10 |
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
---
|
|
2
|
+
gsd_state_version: '1.0'
|
|
3
|
+
status: complete
|
|
4
|
+
progress:
|
|
5
|
+
total_phases: 5
|
|
6
|
+
completed_phases: 5
|
|
7
|
+
total_plans: 15
|
|
8
|
+
completed_plans: 15
|
|
9
|
+
percent: 100
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
# Project State
|
|
13
|
+
|
|
14
|
+
## Project Reference
|
|
15
|
+
|
|
16
|
+
See: .planning/PROJECT.md (updated 2026-06-10)
|
|
17
|
+
|
|
18
|
+
**Core value:** Developers can run the same agentic task through Claude, Codex,
|
|
19
|
+
or Antigravity using one small, typed Python API while preserving the
|
|
20
|
+
vendor-specific capabilities needed for real work.
|
|
21
|
+
**Current focus:** Roadmap delivered as stacked PRs
|
|
22
|
+
|
|
23
|
+
## Current Position
|
|
24
|
+
|
|
25
|
+
Phase: 5 of 5 (Public Release Readiness)
|
|
26
|
+
Plan: 3 of 3 in current phase
|
|
27
|
+
Status: All phases complete; stacked PR publication pending
|
|
28
|
+
Last activity: 2026-06-10 - Phase 5 completed release readiness and build verification.
|
|
29
|
+
|
|
30
|
+
Progress: [##########] 100%
|
|
31
|
+
|
|
32
|
+
## Performance Metrics
|
|
33
|
+
|
|
34
|
+
**Velocity:**
|
|
35
|
+
- Total plans completed: 15
|
|
36
|
+
- Average duration: n/a
|
|
37
|
+
- Total execution time: 0.0 hours
|
|
38
|
+
|
|
39
|
+
**By Phase:**
|
|
40
|
+
|
|
41
|
+
| Phase | Plans | Total | Avg/Plan |
|
|
42
|
+
|-------|-------|-------|----------|
|
|
43
|
+
| 1. Core Runtime Skeleton | 3 | 3 | n/a |
|
|
44
|
+
| 2. Events and Test Harness | 3 | 3 | n/a |
|
|
45
|
+
| 3. Claude and Codex Runtimes | 3 | 3 | n/a |
|
|
46
|
+
| 4. Antigravity and Cross-Runtime Proof | 3 | 3 | n/a |
|
|
47
|
+
| 5. Public Release Readiness | 3 | 3 | n/a |
|
|
48
|
+
|
|
49
|
+
**Recent Trend:**
|
|
50
|
+
- Last 5 plans: 04-02, 04-03, 05-01, 05-02, 05-03
|
|
51
|
+
- Trend: n/a
|
|
52
|
+
|
|
53
|
+
*Updated after each plan completion*
|
|
54
|
+
|
|
55
|
+
## Accumulated Context
|
|
56
|
+
|
|
57
|
+
### Decisions
|
|
58
|
+
|
|
59
|
+
Decisions are logged in PROJECT.md Key Decisions table.
|
|
60
|
+
Recent decisions affecting current work:
|
|
61
|
+
|
|
62
|
+
- Initialization: Publish package as `agent-runtime-kit`.
|
|
63
|
+
- Initialization: Target Python 3.10+.
|
|
64
|
+
- Initialization: Make Claude, Codex, and Antigravity runnable in v1.
|
|
65
|
+
- Initialization: Keep public API clean while preserving a low-friction Mestre adoption path.
|
|
66
|
+
- Initialization: Extract runtime/adapters, not Mestre's full orchestration stack.
|
|
67
|
+
- Phase 1: Core package stays dependency-free; vendor SDK imports are deferred
|
|
68
|
+
to optional adapter modules.
|
|
69
|
+
- Phase 2: Event payloads use a normalized dictionary shape and sanitize
|
|
70
|
+
sensitive/high-volume attributes before emission.
|
|
71
|
+
- Phase 3: Claude and Codex adapters use lazy imports and fake-injected tests
|
|
72
|
+
so default CI remains credential-free.
|
|
73
|
+
- Phase 4: Antigravity MCP stdio server env values are rejected because the
|
|
74
|
+
SDK config surface does not expose env.
|
|
75
|
+
- Phase 5: Actual PyPI publication remains pending after review/merge; release
|
|
76
|
+
checklist includes a fresh name check.
|
|
77
|
+
|
|
78
|
+
### Pending Todos
|
|
79
|
+
|
|
80
|
+
- Publish stacked PRs.
|
|
81
|
+
- Recheck PyPI name immediately before actual publication.
|
|
82
|
+
|
|
83
|
+
### Blockers/Concerns
|
|
84
|
+
|
|
85
|
+
- PyPI name availability for `agent-runtime-kit` must be rechecked immediately before publishing.
|
|
86
|
+
- Vendor SDK surfaces are moving; rerun live smoke tests before release if credentials are available.
|
|
87
|
+
|
|
88
|
+
## Deferred Items
|
|
89
|
+
|
|
90
|
+
Items acknowledged and carried forward from previous milestone close:
|
|
91
|
+
|
|
92
|
+
| Category | Item | Status | Deferred At |
|
|
93
|
+
|----------|------|--------|-------------|
|
|
94
|
+
| *(none)* | | | |
|
|
95
|
+
|
|
96
|
+
## Session Continuity
|
|
97
|
+
|
|
98
|
+
Last session: 2026-06-10 23:00
|
|
99
|
+
Stopped at: Initial roadmap drafted
|
|
100
|
+
Resume file: None
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
{
|
|
2
|
+
"model_profile": "quality",
|
|
3
|
+
"commit_docs": true,
|
|
4
|
+
"parallelization": true,
|
|
5
|
+
"search_gitignored": false,
|
|
6
|
+
"brave_search": false,
|
|
7
|
+
"firecrawl": false,
|
|
8
|
+
"exa_search": false,
|
|
9
|
+
"tavily_search": false,
|
|
10
|
+
"ref_search": false,
|
|
11
|
+
"perplexity": false,
|
|
12
|
+
"jina": false,
|
|
13
|
+
"git": {
|
|
14
|
+
"branching_strategy": "none",
|
|
15
|
+
"create_tag": true,
|
|
16
|
+
"phase_branch_template": "gsd/phase-{phase}-{slug}",
|
|
17
|
+
"milestone_branch_template": "gsd/{milestone}-{slug}",
|
|
18
|
+
"quick_branch_template": null
|
|
19
|
+
},
|
|
20
|
+
"workflow": {
|
|
21
|
+
"research": true,
|
|
22
|
+
"plan_check": true,
|
|
23
|
+
"verifier": true,
|
|
24
|
+
"nyquist_validation": false,
|
|
25
|
+
"auto_advance": true,
|
|
26
|
+
"node_repair": true,
|
|
27
|
+
"node_repair_budget": 2,
|
|
28
|
+
"ui_phase": true,
|
|
29
|
+
"ui_safety_gate": true,
|
|
30
|
+
"ai_integration_phase": true,
|
|
31
|
+
"tdd_mode": false,
|
|
32
|
+
"human_verify_mode": "end-of-phase",
|
|
33
|
+
"text_mode": false,
|
|
34
|
+
"research_before_questions": false,
|
|
35
|
+
"discuss_mode": "discuss",
|
|
36
|
+
"skip_discuss": false,
|
|
37
|
+
"code_review": true,
|
|
38
|
+
"code_review_depth": "deep",
|
|
39
|
+
"code_review_command": null,
|
|
40
|
+
"pattern_mapper": true,
|
|
41
|
+
"plan_bounce": false,
|
|
42
|
+
"plan_bounce_script": null,
|
|
43
|
+
"plan_bounce_passes": 2,
|
|
44
|
+
"auto_prune_state": false,
|
|
45
|
+
"post_planning_gaps": true,
|
|
46
|
+
"security_enforcement": true,
|
|
47
|
+
"security_asvs_level": 1,
|
|
48
|
+
"security_block_on": "high",
|
|
49
|
+
"use_worktrees": true
|
|
50
|
+
},
|
|
51
|
+
"ship": {
|
|
52
|
+
"pr_body_sections": []
|
|
53
|
+
},
|
|
54
|
+
"hooks": {
|
|
55
|
+
"context_warnings": true
|
|
56
|
+
},
|
|
57
|
+
"project_code": null,
|
|
58
|
+
"phase_naming": "sequential",
|
|
59
|
+
"agent_skills": {},
|
|
60
|
+
"claude_md_path": "./CLAUDE.md",
|
|
61
|
+
"plan_review": {
|
|
62
|
+
"source_grounding": true,
|
|
63
|
+
"source_grounding_authority": "grep"
|
|
64
|
+
},
|
|
65
|
+
"mode": "yolo",
|
|
66
|
+
"granularity": "coarse",
|
|
67
|
+
"resolve_model_ids": "omit"
|
|
68
|
+
}
|