agent-runtime-kit 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. agent_runtime_kit-0.1.0/.gitignore +16 -0
  2. agent_runtime_kit-0.1.0/.planning/PROJECT.md +140 -0
  3. agent_runtime_kit-0.1.0/.planning/REQUIREMENTS.md +196 -0
  4. agent_runtime_kit-0.1.0/.planning/ROADMAP.md +124 -0
  5. agent_runtime_kit-0.1.0/.planning/STATE.md +100 -0
  6. agent_runtime_kit-0.1.0/.planning/config.json +68 -0
  7. agent_runtime_kit-0.1.0/.planning/phases/01-core-runtime-skeleton/01-CONTEXT.md +71 -0
  8. agent_runtime_kit-0.1.0/.planning/phases/01-core-runtime-skeleton/01-PLAN.md +19 -0
  9. agent_runtime_kit-0.1.0/.planning/phases/01-core-runtime-skeleton/01-SUMMARY.md +12 -0
  10. agent_runtime_kit-0.1.0/.planning/phases/01-core-runtime-skeleton/01-VERIFICATION.md +17 -0
  11. agent_runtime_kit-0.1.0/.planning/phases/02-events-and-test-harness/02-CONTEXT.md +64 -0
  12. agent_runtime_kit-0.1.0/.planning/phases/02-events-and-test-harness/02-PLAN.md +20 -0
  13. agent_runtime_kit-0.1.0/.planning/phases/02-events-and-test-harness/02-SUMMARY.md +11 -0
  14. agent_runtime_kit-0.1.0/.planning/phases/02-events-and-test-harness/02-VERIFICATION.md +18 -0
  15. agent_runtime_kit-0.1.0/.planning/phases/03-claude-and-codex-runtimes/03-CONTEXT.md +65 -0
  16. agent_runtime_kit-0.1.0/.planning/phases/03-claude-and-codex-runtimes/03-PLAN.md +21 -0
  17. agent_runtime_kit-0.1.0/.planning/phases/03-claude-and-codex-runtimes/03-SUMMARY.md +12 -0
  18. agent_runtime_kit-0.1.0/.planning/phases/03-claude-and-codex-runtimes/03-VERIFICATION.md +18 -0
  19. agent_runtime_kit-0.1.0/.planning/phases/04-antigravity-and-cross-runtime-proof/04-CONTEXT.md +62 -0
  20. agent_runtime_kit-0.1.0/.planning/phases/04-antigravity-and-cross-runtime-proof/04-PLAN.md +21 -0
  21. agent_runtime_kit-0.1.0/.planning/phases/04-antigravity-and-cross-runtime-proof/04-SUMMARY.md +10 -0
  22. agent_runtime_kit-0.1.0/.planning/phases/04-antigravity-and-cross-runtime-proof/04-VERIFICATION.md +18 -0
  23. agent_runtime_kit-0.1.0/.planning/phases/05-public-release-readiness/05-CONTEXT.md +61 -0
  24. agent_runtime_kit-0.1.0/.planning/phases/05-public-release-readiness/05-PLAN.md +19 -0
  25. agent_runtime_kit-0.1.0/.planning/phases/05-public-release-readiness/05-SUMMARY.md +10 -0
  26. agent_runtime_kit-0.1.0/.planning/phases/05-public-release-readiness/05-VERIFICATION.md +19 -0
  27. agent_runtime_kit-0.1.0/.planning/research/ARCHITECTURE.md +273 -0
  28. agent_runtime_kit-0.1.0/.planning/research/FEATURES.md +147 -0
  29. agent_runtime_kit-0.1.0/.planning/research/PITFALLS.md +233 -0
  30. agent_runtime_kit-0.1.0/.planning/research/STACK.md +115 -0
  31. agent_runtime_kit-0.1.0/.planning/research/SUMMARY.md +191 -0
  32. agent_runtime_kit-0.1.0/.python-version +1 -0
  33. agent_runtime_kit-0.1.0/AGENTS.md +190 -0
  34. agent_runtime_kit-0.1.0/LICENSE +21 -0
  35. agent_runtime_kit-0.1.0/PKG-INFO +118 -0
  36. agent_runtime_kit-0.1.0/README.md +85 -0
  37. agent_runtime_kit-0.1.0/docs/capability-matrix.md +19 -0
  38. agent_runtime_kit-0.1.0/docs/live-smoke.md +27 -0
  39. agent_runtime_kit-0.1.0/docs/mestre-migration.md +25 -0
  40. agent_runtime_kit-0.1.0/docs/providers.md +26 -0
  41. agent_runtime_kit-0.1.0/docs/publish-checklist.md +48 -0
  42. agent_runtime_kit-0.1.0/docs/quickstart.md +34 -0
  43. agent_runtime_kit-0.1.0/examples/run_same_task.py +29 -0
  44. agent_runtime_kit-0.1.0/pyproject.toml +70 -0
  45. agent_runtime_kit-0.1.0/src/agent_runtime_kit/__init__.py +72 -0
  46. agent_runtime_kit-0.1.0/src/agent_runtime_kit/_errors.py +34 -0
  47. agent_runtime_kit-0.1.0/src/agent_runtime_kit/_runtime.py +139 -0
  48. agent_runtime_kit-0.1.0/src/agent_runtime_kit/_types.py +251 -0
  49. agent_runtime_kit-0.1.0/src/agent_runtime_kit/adapters/__init__.py +26 -0
  50. agent_runtime_kit-0.1.0/src/agent_runtime_kit/adapters/_common.py +123 -0
  51. agent_runtime_kit-0.1.0/src/agent_runtime_kit/adapters/antigravity.py +379 -0
  52. agent_runtime_kit-0.1.0/src/agent_runtime_kit/adapters/claude.py +302 -0
  53. agent_runtime_kit-0.1.0/src/agent_runtime_kit/adapters/codex.py +298 -0
  54. agent_runtime_kit-0.1.0/src/agent_runtime_kit/adapters/diagnostics.py +18 -0
  55. agent_runtime_kit-0.1.0/src/agent_runtime_kit/events.py +224 -0
  56. agent_runtime_kit-0.1.0/src/agent_runtime_kit/py.typed +1 -0
  57. agent_runtime_kit-0.1.0/src/agent_runtime_kit/registry.py +83 -0
  58. agent_runtime_kit-0.1.0/src/agent_runtime_kit/testing/__init__.py +15 -0
  59. agent_runtime_kit-0.1.0/src/agent_runtime_kit/testing/fakes.py +164 -0
  60. agent_runtime_kit-0.1.0/tests/test_antigravity_adapter.py +182 -0
  61. agent_runtime_kit-0.1.0/tests/test_claude_adapter.py +81 -0
  62. agent_runtime_kit-0.1.0/tests/test_codex_adapter.py +125 -0
  63. agent_runtime_kit-0.1.0/tests/test_core.py +53 -0
  64. agent_runtime_kit-0.1.0/tests/test_events.py +85 -0
  65. agent_runtime_kit-0.1.0/tests/test_live_smoke.py +53 -0
  66. agent_runtime_kit-0.1.0/tests/test_mestre_compatibility.py +67 -0
  67. agent_runtime_kit-0.1.0/tests/test_optional_dependencies.py +12 -0
  68. agent_runtime_kit-0.1.0/tests/test_provider_diagnostics.py +14 -0
  69. agent_runtime_kit-0.1.0/uv.lock +1722 -0
@@ -0,0 +1,16 @@
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+ .mypy_cache/
9
+ .pytest_cache/
10
+ .ruff_cache/
11
+ .coverage
12
+ htmlcov/
13
+ .DS_Store
14
+
15
+ # Virtual environments
16
+ .venv
@@ -0,0 +1,140 @@
1
+ # agent-runtime-kit
2
+
3
+ ## What This Is
4
+
5
+ `agent-runtime-kit` is a Python package that gives developers one clean API for running agentic
6
+ coding tasks through Claude Agent SDK, OpenAI Codex SDK, and Google
7
+ Antigravity SDK. It extracts the useful vendor-runtime ideas from Mestre while
8
+ remaining independently useful to the community: install it, choose a runtime,
9
+ run a task, stream/inspect events, and get a typed result back.
10
+
11
+ The package is not a new orchestrator or model router. It is the reusable
12
+ runtime layer that makes vendor agent SDKs feel consistent without hiding the
13
+ capabilities and constraints that make each SDK different.
14
+
15
+ ## Core Value
16
+
17
+ Developers can run the same agentic task through Claude, Codex, or Antigravity
18
+ using one small, typed Python API while preserving the vendor-specific
19
+ capabilities needed for real work.
20
+
21
+ ## Requirements
22
+
23
+ ### Validated
24
+
25
+ (None yet - ship to validate)
26
+
27
+ ### Active
28
+
29
+ - [ ] Publish the package as `agent-runtime-kit` on PyPI.
30
+ - [ ] Support Python 3.10+ so the package is broadly usable and aligns with
31
+ current vendor SDK lower bounds.
32
+ - [ ] Provide a clean public API that does not expose Mestre internals.
33
+ - [ ] Keep a low-friction Mestre adoption path through compatibility adapters
34
+ or migration helpers.
35
+ - [ ] Provide a shared runtime contract for agentic work: task input, runtime
36
+ capability metadata, event emission, cancellation, session/resume handles,
37
+ tool-call audit records, structured output, artifacts, cost/usage
38
+ metadata, and typed results.
39
+ - [ ] Implement runnable adapters for Claude Agent SDK, OpenAI Codex SDK, and
40
+ Google Antigravity SDK in the first public release.
41
+ - [ ] Make vendor dependencies optional, with extras such as `claude`, `codex`,
42
+ `antigravity`, and `all`.
43
+ - [ ] Surface vendor capability differences explicitly instead of pretending
44
+ all SDKs support the same features.
45
+ - [ ] Include examples that run the same task through all three runtimes.
46
+ - [ ] Include unit tests with fake SDK surfaces and at least smoke-test paths
47
+ that prove adapter construction and invocation behavior.
48
+ - [ ] Document authentication, permissions, working-directory behavior,
49
+ structured output behavior, MCP support, session behavior, and known
50
+ vendor limitations.
51
+
52
+ ### Out of Scope
53
+
54
+ - Full Mestre orchestration, routing, fallback, benchmarking, optimization, and
55
+ self-improvement loops - the package should be useful without becoming
56
+ Mestre.
57
+ - Generic chat/completions abstraction - this package targets agentic SDKs that
58
+ own tool loops and local/runtime context.
59
+ - Scraping or reusing unsupported local account credentials - use each vendor's
60
+ supported authentication path.
61
+ - A hosted service, UI, queue, control plane, or remote execution platform.
62
+ - Non-Python SDKs for the initial release.
63
+ - Hiding vendor differences behind lowest-common-denominator behavior.
64
+
65
+ ## Context
66
+
67
+ The project starts from Mestre's live vendor-lane implementation in
68
+ `~/Github/mestre`, especially:
69
+
70
+ - `mestre/vendor_lane/agent_protocol.py` - existing typed contract for
71
+ `AgentTask`, `AgentResult`, capabilities, session resume state, MCP config,
72
+ event sinks, and tool-call audits.
73
+ - `mestre/vendor_lane/events.py` - canonical task/tool/output/vendor-turn event
74
+ vocabulary.
75
+ - `mestre/vendor_lane/backends/claude_sdk.py` - Claude Agent SDK adapter logic.
76
+ - `mestre/vendor_lane/backends/codex_sdk.py` - Codex SDK adapter logic.
77
+ - `mestre/vendor_lane/backends/antigravity_sdk.py` - Antigravity SDK adapter
78
+ logic.
79
+ - `mestre/execution/agent/registry.py` - runtime registry pattern.
80
+ - `mestre/llm/policy.py` - boundary to avoid over-extracting full routing
81
+ policy into this package.
82
+
83
+ Official vendor docs checked during initialization:
84
+
85
+ - Claude Agent SDK: https://docs.anthropic.com/en/docs/claude-code/sdk
86
+ - Codex SDK: https://developers.openai.com/codex/sdk
87
+ - Google Antigravity SDK:
88
+ https://github.com/google-antigravity/antigravity-sdk-python
89
+
90
+ The current PyPI name check found `agent-runtime-kit` available on
91
+ 2026-06-10. This availability must be rechecked immediately before publishing.
92
+
93
+ ## Constraints
94
+
95
+ - **Language**: Python package first - Mestre and all three target vendor SDK
96
+ integrations are Python-facing for this work.
97
+ - **Python version**: Python 3.10+ - broad community compatibility matters more
98
+ than matching Mestre's current Python 3.14-only project constraint.
99
+ - **Package name**: Use `agent-runtime-kit` unless a later publishing check
100
+ shows the name is no longer available.
101
+ - **Vendor support**: Claude, Codex, and Antigravity must all be runnable in
102
+ v1; partial provider stubs are not enough for a useful community release.
103
+ - **Dependency model**: Vendor SDKs should be optional extras so users can
104
+ install only the runtimes they need.
105
+ - **Architecture**: Extract the runtime/adapters layer from Mestre, not the full
106
+ orchestration and routing system.
107
+ - **API design**: Prefer a clean public API, but keep compatibility adapters or
108
+ migration helpers so Mestre can adopt the package without excessive churn.
109
+ - **Authentication**: Stay within supported vendor SDK authentication
110
+ mechanisms; do not build brittle local credential scraping into the core.
111
+
112
+ ## Key Decisions
113
+
114
+ | Decision | Rationale | Outcome |
115
+ |----------|-----------|---------|
116
+ | Publish as `agent-runtime-kit` | The name was available on PyPI during initialization and accurately describes a runtime/adapters library. | Pending |
117
+ | Target Python 3.10+ | Vendor SDK docs support Python 3.10+, and community adoption is more important than mirroring Mestre's Python 3.14 baseline. | Pending |
118
+ | Make all three runtimes runnable in v1 | Community usefulness depends on a real multi-vendor proof, not a single polished adapter plus placeholders. | Pending |
119
+ | Use a clean public API with Mestre migration support | The package should stand on its own while still making future Mestre adoption practical. | Pending |
120
+ | Extract runtime/adapters, not full orchestration | Mestre's routing, fallback, benchmarking, and self-improvement loops would make the package too broad for a first release. | Pending |
121
+
122
+ ## Evolution
123
+
124
+ This document evolves at phase transitions and milestone boundaries.
125
+
126
+ **After each phase transition** (via `$gsd-transition`):
127
+ 1. Requirements invalidated? -> Move to Out of Scope with reason
128
+ 2. Requirements validated? -> Move to Validated with phase reference
129
+ 3. New requirements emerged? -> Add to Active
130
+ 4. Decisions to log? -> Add to Key Decisions
131
+ 5. "What This Is" still accurate? -> Update if drifted
132
+
133
+ **After each milestone** (via `$gsd-complete-milestone`):
134
+ 1. Full review of all sections
135
+ 2. Core Value check - still the right priority?
136
+ 3. Audit Out of Scope - reasons still valid?
137
+ 4. Update Context with current state
138
+
139
+ ---
140
+ *Last updated: 2026-06-10 after initialization*
@@ -0,0 +1,196 @@
1
+ # Requirements: agent-runtime-kit
2
+
3
+ **Defined:** 2026-06-10
4
+ **Core Value:** Developers can run the same agentic task through Claude, Codex,
5
+ or Antigravity using one small, typed Python API while preserving the
6
+ vendor-specific capabilities needed for real work.
7
+
8
+ ## v1 Requirements
9
+
10
+ Requirements for the first public release of `agent-runtime-kit`. Each maps to
11
+ roadmap phases.
12
+
13
+ ### Packaging
14
+
15
+ - [ ] **PKG-01**: Developer can install the core package from PyPI as
16
+ `agent-runtime-kit` without installing any vendor SDK.
17
+ - [ ] **PKG-02**: Developer can install vendor-specific extras for `claude`,
18
+ `codex`, `antigravity`, and `all`.
19
+ - [ ] **PKG-03**: Package metadata declares Python 3.10+ support.
20
+ - [ ] **PKG-04**: Core package import succeeds when no vendor extras are
21
+ installed.
22
+ - [ ] **PKG-05**: Package publishing checklist includes a fresh PyPI name
23
+ availability check for `agent-runtime-kit`.
24
+
25
+ ### Core Runtime API
26
+
27
+ - [ ] **CORE-01**: Developer can create a typed `AgentTask` with goal, optional
28
+ system prompt, working directory, permissions, session/resume data, metadata,
29
+ MCP server config, and output schema.
30
+ - [ ] **CORE-02**: Developer receives a typed `AgentResult` with output,
31
+ finish reason, error, session id, artifacts, tool-call audits, usage/cost
32
+ metadata, and parsed structured output when available.
33
+ - [ ] **CORE-03**: Developer can implement or use an async `AgentRuntime`
34
+ protocol with `run(task)` and `cancel(task_id)`.
35
+ - [ ] **CORE-04**: Developer can inspect each runtime's declared capabilities
36
+ before dispatching a task.
37
+ - [ ] **CORE-05**: Runtime rejects unsupported task inputs with clear typed
38
+ errors instead of silently dropping fields.
39
+ - [ ] **CORE-06**: Developer can register and resolve runtimes through a small
40
+ runtime registry.
41
+ - [ ] **CORE-07**: Runtime availability checks distinguish missing package,
42
+ missing credentials, unsupported model/runtime, and other setup failures.
43
+
44
+ ### Events and Observability
45
+
46
+ - [ ] **EVNT-01**: Developer can attach an async event sink to receive
47
+ `agent.task.started` events.
48
+ - [ ] **EVNT-02**: Developer can receive `agent.task.completed` and
49
+ `agent.task.failed` events with normalized task/result metadata.
50
+ - [ ] **EVNT-03**: Developer can receive normalized output-delta events for
51
+ streamed text where a vendor exposes streaming.
52
+ - [ ] **EVNT-04**: Developer can receive normalized tool-requested and
53
+ tool-completed audit events where a vendor exposes tool activity.
54
+ - [ ] **EVNT-05**: Event payloads truncate or summarize sensitive/high-volume
55
+ data by default.
56
+
57
+ ### Provider Adapters
58
+
59
+ - [ ] **ADPT-01**: Developer can run a task through Claude Agent SDK using the
60
+ shared runtime API.
61
+ - [ ] **ADPT-02**: Claude adapter supports working directory, permission mode,
62
+ MCP config, allowed/disallowed tools, session resume, structured output, and
63
+ clear missing-SDK/auth diagnostics where supported by the installed SDK.
64
+ - [ ] **ADPT-03**: Developer can run a task through OpenAI Codex SDK using the
65
+ shared runtime API.
66
+ - [ ] **ADPT-04**: Codex adapter supports local app-server/thread lifecycle,
67
+ working directory, approval/sandbox mapping, session resume, structured
68
+ output, and model availability diagnostics.
69
+ - [ ] **ADPT-05**: Developer can run a task through Google Antigravity SDK using
70
+ the shared runtime API.
71
+ - [ ] **ADPT-06**: Antigravity adapter supports API-key diagnostics,
72
+ working-directory/workspace mapping, permission/capability mapping, MCP
73
+ config, structured output, session id, and tool/event translation where
74
+ supported by the installed SDK.
75
+ - [ ] **ADPT-07**: All adapters preserve vendor-specific metadata needed for
76
+ debugging without making that metadata the primary public API.
77
+
78
+ ### Testing and Quality
79
+
80
+ - [ ] **TEST-01**: Core tests pass without Claude, Codex, or Antigravity SDKs
81
+ installed.
82
+ - [ ] **TEST-02**: Fake SDK tests cover successful invocation, vendor errors,
83
+ missing dependency, unsupported task input, timeout, session id, structured
84
+ output, and event translation for each adapter.
85
+ - [ ] **TEST-03**: Type-check or static-analysis workflow validates the public
86
+ API surface.
87
+ - [ ] **TEST-04**: Ruff lint/format workflow passes for the package.
88
+ - [ ] **TEST-05**: Optional live smoke tests are documented and skipped unless
89
+ explicit credentials/runtime flags are present.
90
+ - [ ] **TEST-06**: Compatibility tests verify the public API can represent the
91
+ fields Mestre currently needs from its vendor-lane runtime contract.
92
+
93
+ ### Documentation and Examples
94
+
95
+ - [ ] **DOCS-01**: README explains what the package is, what it is not, and how
96
+ it differs from vendor SDKs and full agent frameworks.
97
+ - [ ] **DOCS-02**: Quickstart shows installing the package and running one task
98
+ through one runtime.
99
+ - [ ] **DOCS-03**: Example shows the same task running through Claude, Codex,
100
+ and Antigravity with the shared API.
101
+ - [ ] **DOCS-04**: Provider setup docs cover auth, required extras, local
102
+ runtime requirements, and known limitations for each vendor.
103
+ - [ ] **DOCS-05**: Capability matrix documents MCP, working directory, session
104
+ resume, structured output, permissions, streaming, and tool-audit support for
105
+ each runtime.
106
+ - [ ] **DOCS-06**: Migration notes describe how Mestre can adopt
107
+ `agent-runtime-kit` without moving its routing, fallback, benchmarking, or
108
+ self-improvement layers into the package.
109
+
110
+ ## v2 Requirements
111
+
112
+ Deferred to future release. Tracked but not in current roadmap.
113
+
114
+ ### Integrations
115
+
116
+ - **INTG-01**: Package provides optional OpenTelemetry helper functions for
117
+ converting events into spans or span events.
118
+ - **INTG-02**: Package provides a first-class Mestre compatibility module if
119
+ the initial migration notes are not enough.
120
+ - **INTG-03**: Package maintains a generated provider compatibility/version
121
+ matrix.
122
+
123
+ ### Additional Runtime Scope
124
+
125
+ - **RUNT-01**: Package evaluates whether direct chat/completions adapters
126
+ belong in a separate package or future major version.
127
+ - **RUNT-02**: Package evaluates support for additional agent SDKs after the
128
+ first three providers are stable.
129
+ - **RUNT-03**: Package evaluates a synchronous convenience wrapper over the
130
+ async core.
131
+
132
+ ## Out of Scope
133
+
134
+ Explicitly excluded. Documented to prevent scope creep.
135
+
136
+ | Feature | Reason |
137
+ |---------|--------|
138
+ | Full model routing and fallback | Belongs in applications such as Mestre; too broad for a focused runtime SDK. |
139
+ | Benchmarking and self-optimization loops | Product-specific and not required for a useful public package. |
140
+ | Hosted service, queue, UI, or control plane | The first release is a local Python library. |
141
+ | Unsupported local credential scraping | Security-sensitive and outside supported vendor auth paths. |
142
+ | Non-Python SDKs | Python package first; revisit after v1 adoption. |
143
+ | Lowest-common-denominator provider abstraction | The package must preserve important vendor capability differences. |
144
+ | Mandatory live provider tests in default CI | Would require credentials, cost money, and create flaky contributor workflows. |
145
+
146
+ ## Traceability
147
+
148
+ Which phases cover which requirements. Updated during roadmap creation.
149
+
150
+ | Requirement | Phase | Status |
151
+ |-------------|-------|--------|
152
+ | PKG-01 | Phase 5 | Ready (publish pending) |
153
+ | PKG-02 | Phase 2 | Complete |
154
+ | PKG-03 | Phase 1 | Complete |
155
+ | PKG-04 | Phase 1 | Complete |
156
+ | PKG-05 | Phase 5 | Complete |
157
+ | CORE-01 | Phase 1 | Complete |
158
+ | CORE-02 | Phase 1 | Complete |
159
+ | CORE-03 | Phase 1 | Complete |
160
+ | CORE-04 | Phase 1 | Complete |
161
+ | CORE-05 | Phase 1 | Complete |
162
+ | CORE-06 | Phase 1 | Complete |
163
+ | CORE-07 | Phase 1 | Complete |
164
+ | EVNT-01 | Phase 2 | Complete |
165
+ | EVNT-02 | Phase 2 | Complete |
166
+ | EVNT-03 | Phase 2 | Complete |
167
+ | EVNT-04 | Phase 2 | Complete |
168
+ | EVNT-05 | Phase 2 | Complete |
169
+ | ADPT-01 | Phase 3 | Complete |
170
+ | ADPT-02 | Phase 3 | Complete |
171
+ | ADPT-03 | Phase 3 | Complete |
172
+ | ADPT-04 | Phase 3 | Complete |
173
+ | ADPT-05 | Phase 4 | Complete |
174
+ | ADPT-06 | Phase 4 | Complete |
175
+ | ADPT-07 | Phase 4 | Complete |
176
+ | TEST-01 | Phase 2 | Complete |
177
+ | TEST-02 | Phase 2 | Complete |
178
+ | TEST-03 | Phase 1 | Complete |
179
+ | TEST-04 | Phase 1 | Complete |
180
+ | TEST-05 | Phase 5 | Complete |
181
+ | TEST-06 | Phase 4 | Complete |
182
+ | DOCS-01 | Phase 5 | Complete |
183
+ | DOCS-02 | Phase 3 | Complete |
184
+ | DOCS-03 | Phase 4 | Complete |
185
+ | DOCS-04 | Phase 5 | Complete |
186
+ | DOCS-05 | Phase 5 | Complete |
187
+ | DOCS-06 | Phase 5 | Complete |
188
+
189
+ **Coverage:**
190
+ - v1 requirements: 36 total
191
+ - Mapped to phases: 36
192
+ - Unmapped: 0
193
+
194
+ ---
195
+ *Requirements defined: 2026-06-10*
196
+ *Last updated: 2026-06-10 after roadmap traceability mapping*
@@ -0,0 +1,124 @@
1
+ # Roadmap: agent-runtime-kit
2
+
3
+ ## Overview
4
+
5
+ The v1.0 milestone turns the repository into a publishable Python package
6
+ named `agent-runtime-kit`. The build path starts with a small
7
+ installable core and public runtime contract, adds the fake-SDK harness needed
8
+ to keep vendor drift under control, then delivers Claude/Codex adapters,
9
+ Antigravity plus cross-runtime proof, and finally release-ready docs,
10
+ packaging, and smoke-test guidance.
11
+
12
+ ## Phases
13
+
14
+ **Phase Numbering:**
15
+ - Integer phases (1, 2, 3): Planned milestone work
16
+ - Decimal phases (2.1, 2.2): Urgent insertions (marked with INSERTED)
17
+
18
+ Decimal phases appear between their surrounding integers in numeric order.
19
+
20
+ - [x] **Phase 1: Core Runtime Skeleton** - Installable core package with public task/result/runtime contracts.
21
+ - [x] **Phase 2: Events and Test Harness** - Optional extras skeleton, event system, and fake SDK contract tests.
22
+ - [x] **Phase 3: Claude and Codex Runtimes** - First two real vendor adapters through the shared API.
23
+ - [x] **Phase 4: Antigravity and Cross-Runtime Proof** - Third adapter plus same-task multi-runtime proof and Mestre compatibility checks.
24
+ - [x] **Phase 5: Public Release Readiness** - Documentation, capability matrix, live-smoke guidance, and PyPI publish checklist.
25
+
26
+ ## Phase Details
27
+
28
+ ### Phase 1: Core Runtime Skeleton
29
+ **Goal**: A developer can install/import the core package locally and run a fake runtime through the public async API without any vendor SDK installed.
30
+ **Mode:** mvp
31
+ **Depends on**: Nothing (first phase)
32
+ **Requirements**: [PKG-03, PKG-04, CORE-01, CORE-02, CORE-03, CORE-04, CORE-05, CORE-06, CORE-07, TEST-03, TEST-04]
33
+ **Success Criteria** (what must be TRUE):
34
+ 1. Developer can import `agent_runtime_kit` in an environment with no vendor SDKs installed.
35
+ 2. Developer can create an `AgentTask`, execute a fake runtime, and receive an `AgentResult`.
36
+ 3. Runtime capability checks and unsupported-feature errors are typed and covered by tests.
37
+ 4. Ruff and static-analysis commands validate the initial public API.
38
+ **Plans**: 3 plans
39
+
40
+ Plans:
41
+ - [ ] 01-01: Package scaffolding and Python 3.10+ metadata
42
+ - [ ] 01-02: Public task/result/runtime/capability/error models
43
+ - [ ] 01-03: Registry, availability diagnostics, lint, and type/static checks
44
+
45
+ ### Phase 2: Events and Test Harness
46
+ **Goal**: A developer can observe normalized fake-runtime events and the package has the fake SDK harness required to test real adapters without live credentials.
47
+ **Mode:** mvp
48
+ **Depends on**: Phase 1
49
+ **Requirements**: [PKG-02, EVNT-01, EVNT-02, EVNT-03, EVNT-04, EVNT-05, TEST-01, TEST-02]
50
+ **Success Criteria** (what must be TRUE):
51
+ 1. Core tests pass without Claude, Codex, or Antigravity SDKs installed.
52
+ 2. Developer can attach an event sink and receive started, completed, failed, output, tool, and vendor-turn events from a fake runtime.
53
+ 3. Event payload defaults summarize or truncate high-volume fields.
54
+ 4. Fake SDK fixtures can simulate success, failure, missing dependency, unsupported input, timeout, session id, structured output, and tool events.
55
+ **Plans**: 3 plans
56
+
57
+ Plans:
58
+ - [ ] 02-01: Optional extras skeleton and dependency isolation tests
59
+ - [ ] 02-02: Event vocabulary, event sink, and redaction/truncation defaults
60
+ - [ ] 02-03: Fake SDK harness and adapter contract test utilities
61
+
62
+ ### Phase 3: Claude and Codex Runtimes
63
+ **Goal**: A developer can run real Claude and Codex agent tasks through the shared runtime API with clear diagnostics and provider-specific capability handling.
64
+ **Mode:** mvp
65
+ **Depends on**: Phase 2
66
+ **Requirements**: [ADPT-01, ADPT-02, ADPT-03, ADPT-04, DOCS-02]
67
+ **Success Criteria** (what must be TRUE):
68
+ 1. Developer can install the Claude extra and run a Claude Agent SDK task through `agent-runtime-kit`.
69
+ 2. Developer can install the Codex extra and run a Codex SDK task through `agent-runtime-kit`.
70
+ 3. Claude and Codex adapters fail clearly for missing SDKs, missing setup, unsupported fields, and unsupported models.
71
+ 4. The quickstart demonstrates one runtime end to end through the public API.
72
+ **Plans**: 3 plans
73
+
74
+ Plans:
75
+ - [ ] 03-01: Claude Agent SDK adapter and tests
76
+ - [ ] 03-02: Codex SDK adapter and tests
77
+ - [ ] 03-03: One-runtime quickstart and provider diagnostics docs
78
+
79
+ ### Phase 4: Antigravity and Cross-Runtime Proof
80
+ **Goal**: A developer can run the same task through Claude, Codex, and Antigravity, and the public API can represent Mestre's current runtime needs.
81
+ **Mode:** mvp
82
+ **Depends on**: Phase 3
83
+ **Requirements**: [ADPT-05, ADPT-06, ADPT-07, TEST-06, DOCS-03]
84
+ **Success Criteria** (what must be TRUE):
85
+ 1. Developer can install the Antigravity extra and run an Antigravity SDK task through `agent-runtime-kit`.
86
+ 2. Antigravity adapter maps auth, workspace, permissions, MCP, structured output, sessions, and tool/event behavior where supported.
87
+ 3. Same-task example runs through all three runtime kinds with one public API shape.
88
+ 4. Compatibility tests prove the public API can represent the fields Mestre currently uses from its vendor-lane contract.
89
+ **Plans**: 3 plans
90
+
91
+ Plans:
92
+ - [ ] 04-01: Google Antigravity SDK adapter and tests
93
+ - [ ] 04-02: Same-task three-runtime example
94
+ - [ ] 04-03: Mestre compatibility field audit and tests
95
+
96
+ ### Phase 5: Public Release Readiness
97
+ **Goal**: The package is ready for a first public PyPI release with documentation, capability matrix, optional live smoke tests, and a final publish checklist.
98
+ **Mode:** mvp
99
+ **Depends on**: Phase 4
100
+ **Requirements**: [PKG-01, PKG-05, TEST-05, DOCS-01, DOCS-04, DOCS-05, DOCS-06]
101
+ **Success Criteria** (what must be TRUE):
102
+ 1. README and provider docs explain what the package is, what it is not, and how to configure each runtime.
103
+ 2. Capability matrix documents MCP, working directory, sessions, structured output, permissions, streaming, and tool-audit behavior for each runtime.
104
+ 3. Optional live smoke tests are documented and skipped unless explicit credentials/runtime flags are present.
105
+ 4. PyPI publish checklist includes a fresh `agent-runtime-kit` name check and the package can be built for release.
106
+ **Plans**: 3 plans
107
+
108
+ Plans:
109
+ - [ ] 05-01: README, provider setup docs, and capability matrix
110
+ - [ ] 05-02: Optional live smoke test harness and documentation
111
+ - [ ] 05-03: Build, publish checklist, and Mestre migration notes
112
+
113
+ ## Progress
114
+
115
+ **Execution Order:**
116
+ Phases execute in numeric order: 1 -> 2 -> 3 -> 4 -> 5
117
+
118
+ | Phase | Plans Complete | Status | Completed |
119
+ |-------|----------------|--------|-----------|
120
+ | 1. Core Runtime Skeleton | 3/3 | Complete | 2026-06-10 |
121
+ | 2. Events and Test Harness | 3/3 | Complete | 2026-06-10 |
122
+ | 3. Claude and Codex Runtimes | 3/3 | Complete | 2026-06-10 |
123
+ | 4. Antigravity and Cross-Runtime Proof | 3/3 | Complete | 2026-06-10 |
124
+ | 5. Public Release Readiness | 3/3 | Complete | 2026-06-10 |
@@ -0,0 +1,100 @@
1
+ ---
2
+ gsd_state_version: '1.0'
3
+ status: complete
4
+ progress:
5
+ total_phases: 5
6
+ completed_phases: 5
7
+ total_plans: 15
8
+ completed_plans: 15
9
+ percent: 100
10
+ ---
11
+
12
+ # Project State
13
+
14
+ ## Project Reference
15
+
16
+ See: .planning/PROJECT.md (updated 2026-06-10)
17
+
18
+ **Core value:** Developers can run the same agentic task through Claude, Codex,
19
+ or Antigravity using one small, typed Python API while preserving the
20
+ vendor-specific capabilities needed for real work.
21
+ **Current focus:** Roadmap delivered as stacked PRs
22
+
23
+ ## Current Position
24
+
25
+ Phase: 5 of 5 (Public Release Readiness)
26
+ Plan: 3 of 3 in current phase
27
+ Status: All phases complete; stacked PR publication pending
28
+ Last activity: 2026-06-10 - Phase 5 completed release readiness and build verification.
29
+
30
+ Progress: [##########] 100%
31
+
32
+ ## Performance Metrics
33
+
34
+ **Velocity:**
35
+ - Total plans completed: 15
36
+ - Average duration: n/a
37
+ - Total execution time: 0.0 hours
38
+
39
+ **By Phase:**
40
+
41
+ | Phase | Plans | Total | Avg/Plan |
42
+ |-------|-------|-------|----------|
43
+ | 1. Core Runtime Skeleton | 3 | 3 | n/a |
44
+ | 2. Events and Test Harness | 3 | 3 | n/a |
45
+ | 3. Claude and Codex Runtimes | 3 | 3 | n/a |
46
+ | 4. Antigravity and Cross-Runtime Proof | 3 | 3 | n/a |
47
+ | 5. Public Release Readiness | 3 | 3 | n/a |
48
+
49
+ **Recent Trend:**
50
+ - Last 5 plans: 04-02, 04-03, 05-01, 05-02, 05-03
51
+ - Trend: n/a
52
+
53
+ *Updated after each plan completion*
54
+
55
+ ## Accumulated Context
56
+
57
+ ### Decisions
58
+
59
+ Decisions are logged in PROJECT.md Key Decisions table.
60
+ Recent decisions affecting current work:
61
+
62
+ - Initialization: Publish package as `agent-runtime-kit`.
63
+ - Initialization: Target Python 3.10+.
64
+ - Initialization: Make Claude, Codex, and Antigravity runnable in v1.
65
+ - Initialization: Keep public API clean while preserving a low-friction Mestre adoption path.
66
+ - Initialization: Extract runtime/adapters, not Mestre's full orchestration stack.
67
+ - Phase 1: Core package stays dependency-free; vendor SDK imports are deferred
68
+ to optional adapter modules.
69
+ - Phase 2: Event payloads use a normalized dictionary shape and sanitize
70
+ sensitive/high-volume attributes before emission.
71
+ - Phase 3: Claude and Codex adapters use lazy imports and fake-injected tests
72
+ so default CI remains credential-free.
73
+ - Phase 4: Antigravity MCP stdio server env values are rejected because the
74
+ SDK config surface does not expose env.
75
+ - Phase 5: Actual PyPI publication remains pending after review/merge; release
76
+ checklist includes a fresh name check.
77
+
78
+ ### Pending Todos
79
+
80
+ - Publish stacked PRs.
81
+ - Recheck PyPI name immediately before actual publication.
82
+
83
+ ### Blockers/Concerns
84
+
85
+ - PyPI name availability for `agent-runtime-kit` must be rechecked immediately before publishing.
86
+ - Vendor SDK surfaces are moving; rerun live smoke tests before release if credentials are available.
87
+
88
+ ## Deferred Items
89
+
90
+ Items acknowledged and carried forward from previous milestone close:
91
+
92
+ | Category | Item | Status | Deferred At |
93
+ |----------|------|--------|-------------|
94
+ | *(none)* | | | |
95
+
96
+ ## Session Continuity
97
+
98
+ Last session: 2026-06-10 23:00
99
+ Stopped at: Initial roadmap drafted
100
+ Resume file: None
@@ -0,0 +1,68 @@
1
+ {
2
+ "model_profile": "quality",
3
+ "commit_docs": true,
4
+ "parallelization": true,
5
+ "search_gitignored": false,
6
+ "brave_search": false,
7
+ "firecrawl": false,
8
+ "exa_search": false,
9
+ "tavily_search": false,
10
+ "ref_search": false,
11
+ "perplexity": false,
12
+ "jina": false,
13
+ "git": {
14
+ "branching_strategy": "none",
15
+ "create_tag": true,
16
+ "phase_branch_template": "gsd/phase-{phase}-{slug}",
17
+ "milestone_branch_template": "gsd/{milestone}-{slug}",
18
+ "quick_branch_template": null
19
+ },
20
+ "workflow": {
21
+ "research": true,
22
+ "plan_check": true,
23
+ "verifier": true,
24
+ "nyquist_validation": false,
25
+ "auto_advance": true,
26
+ "node_repair": true,
27
+ "node_repair_budget": 2,
28
+ "ui_phase": true,
29
+ "ui_safety_gate": true,
30
+ "ai_integration_phase": true,
31
+ "tdd_mode": false,
32
+ "human_verify_mode": "end-of-phase",
33
+ "text_mode": false,
34
+ "research_before_questions": false,
35
+ "discuss_mode": "discuss",
36
+ "skip_discuss": false,
37
+ "code_review": true,
38
+ "code_review_depth": "deep",
39
+ "code_review_command": null,
40
+ "pattern_mapper": true,
41
+ "plan_bounce": false,
42
+ "plan_bounce_script": null,
43
+ "plan_bounce_passes": 2,
44
+ "auto_prune_state": false,
45
+ "post_planning_gaps": true,
46
+ "security_enforcement": true,
47
+ "security_asvs_level": 1,
48
+ "security_block_on": "high",
49
+ "use_worktrees": true
50
+ },
51
+ "ship": {
52
+ "pr_body_sections": []
53
+ },
54
+ "hooks": {
55
+ "context_warnings": true
56
+ },
57
+ "project_code": null,
58
+ "phase_naming": "sequential",
59
+ "agent_skills": {},
60
+ "claude_md_path": "./CLAUDE.md",
61
+ "plan_review": {
62
+ "source_grounding": true,
63
+ "source_grounding_authority": "grep"
64
+ },
65
+ "mode": "yolo",
66
+ "granularity": "coarse",
67
+ "resolve_model_ids": "omit"
68
+ }