vgxness 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. package/LICENSE +9 -0
  2. package/README.md +110 -0
  3. package/dist/agents/agent-activation-service.js +144 -0
  4. package/dist/agents/agent-registry-service.js +46 -0
  5. package/dist/agents/agent-resolver.js +249 -0
  6. package/dist/agents/agent-seed-service.js +146 -0
  7. package/dist/agents/manager-profile-overlay-service.js +34 -0
  8. package/dist/agents/profile-model-routing.js +26 -0
  9. package/dist/agents/renderers/claude-renderer.js +98 -0
  10. package/dist/agents/renderers/index.js +16 -0
  11. package/dist/agents/renderers/json-renderer.js +87 -0
  12. package/dist/agents/renderers/opencode-renderer.js +100 -0
  13. package/dist/agents/renderers/provider-adapter.js +6 -0
  14. package/dist/agents/repositories/agents.js +185 -0
  15. package/dist/agents/repositories/manager-profile-overlays.js +81 -0
  16. package/dist/agents/schema.js +1 -0
  17. package/dist/cli/dashboard-operational-read-models.js +153 -0
  18. package/dist/cli/dashboard-renderer.js +109 -0
  19. package/dist/cli/dashboard-screen-renderers.js +332 -0
  20. package/dist/cli/dashboard-tui-read-model.js +71 -0
  21. package/dist/cli/dashboard-tui-state.js +218 -0
  22. package/dist/cli/dispatcher.js +2880 -0
  23. package/dist/cli/index.js +27 -0
  24. package/dist/cli/interactive-dashboard.js +29 -0
  25. package/dist/cli/mcp-start-path.js +21 -0
  26. package/dist/cli/setup-status-renderer.js +29 -0
  27. package/dist/cli/setup-wizard-read-model.js +56 -0
  28. package/dist/cli/setup-wizard-renderer.js +148 -0
  29. package/dist/cli/setup-wizard-state.js +82 -0
  30. package/dist/cli/tui-render-helpers.js +192 -0
  31. package/dist/export/redaction.js +71 -0
  32. package/dist/harness/tools/agents.js +245 -0
  33. package/dist/harness/tools/memory.js +29 -0
  34. package/dist/mcp/client-install-opencode-contract.js +227 -0
  35. package/dist/mcp/client-install-opencode.js +194 -0
  36. package/dist/mcp/client-setup-preview.js +38 -0
  37. package/dist/mcp/control-plane.js +175 -0
  38. package/dist/mcp/doctor.js +193 -0
  39. package/dist/mcp/index.js +10 -0
  40. package/dist/mcp/opencode-default-agent-config.js +156 -0
  41. package/dist/mcp/opencode-visibility.js +102 -0
  42. package/dist/mcp/schema.js +234 -0
  43. package/dist/mcp/stdio-server.js +56 -0
  44. package/dist/mcp/validation.js +761 -0
  45. package/dist/memory/import/dry-run-planner.js +58 -0
  46. package/dist/memory/import/index.js +3 -0
  47. package/dist/memory/import/observation-writer.js +220 -0
  48. package/dist/memory/import/package.js +178 -0
  49. package/dist/memory/memory-service.js +126 -0
  50. package/dist/memory/repositories/artifacts.js +41 -0
  51. package/dist/memory/repositories/observations.js +133 -0
  52. package/dist/memory/repositories/sessions.js +105 -0
  53. package/dist/memory/repositories/traces.js +58 -0
  54. package/dist/memory/schema.js +1 -0
  55. package/dist/memory/search.js +11 -0
  56. package/dist/memory/sqlite/database.js +97 -0
  57. package/dist/memory/sqlite/migrations/001_initial.sql +128 -0
  58. package/dist/memory/sqlite/migrations/002_observation_revisions.sql +14 -0
  59. package/dist/memory/sqlite/migrations/003_agent_registry.sql +26 -0
  60. package/dist/memory/sqlite/migrations/004_run_runtime.sql +62 -0
  61. package/dist/memory/sqlite/migrations/005_run_approvals.sql +20 -0
  62. package/dist/memory/sqlite/migrations/006_run_operation_attempts.sql +32 -0
  63. package/dist/memory/sqlite/migrations/007_abandoned_operation_attempts.sql +46 -0
  64. package/dist/memory/sqlite/migrations/008_run_execution_plan_events.sql +105 -0
  65. package/dist/memory/sqlite/migrations/009_multiple_operation_attempts.sql +73 -0
  66. package/dist/memory/sqlite/migrations/010_skill_registry.sql +66 -0
  67. package/dist/memory/sqlite/migrations/011_skill_usage_resolution_outcomes.sql +21 -0
  68. package/dist/memory/sqlite/migrations/012_skill_improvement_proposals.sql +37 -0
  69. package/dist/memory/sqlite/migrations/013_skill_evaluation_scenarios.sql +43 -0
  70. package/dist/memory/sqlite/migrations/014_manager_profile_overlays.sql +14 -0
  71. package/dist/memory/storage-paths.js +72 -0
  72. package/dist/orchestrator/natural-language-planner.js +191 -0
  73. package/dist/orchestrator/schema.js +1 -0
  74. package/dist/permissions/index.js +2 -0
  75. package/dist/permissions/policy-evaluator.js +109 -0
  76. package/dist/permissions/schema.js +1 -0
  77. package/dist/providers/opencode/injection-preview.js +134 -0
  78. package/dist/providers/opencode/manager-payload.js +129 -0
  79. package/dist/runs/execution-planning.js +117 -0
  80. package/dist/runs/operation-execution.js +1 -0
  81. package/dist/runs/operation-retry.js +124 -0
  82. package/dist/runs/repositories/runs.js +611 -0
  83. package/dist/runs/run-insights.js +145 -0
  84. package/dist/runs/run-service.js +713 -0
  85. package/dist/runs/run-snapshot-export-service.js +31 -0
  86. package/dist/runs/sandbox-process-execution.js +218 -0
  87. package/dist/runs/sandbox-worktree-planning.js +59 -0
  88. package/dist/runs/schema.js +1 -0
  89. package/dist/sdd/artifact-portability-service.js +118 -0
  90. package/dist/sdd/schema.js +17 -0
  91. package/dist/sdd/sdd-workflow-service.js +217 -0
  92. package/dist/setup/backup-rollback-service.js +76 -0
  93. package/dist/setup/index.js +3 -0
  94. package/dist/setup/providers/antigravity-setup-adapter.js +18 -0
  95. package/dist/setup/providers/claude-setup-adapter.js +30 -0
  96. package/dist/setup/providers/custom-setup-adapter.js +18 -0
  97. package/dist/setup/providers/index.js +6 -0
  98. package/dist/setup/providers/opencode-setup-adapter.js +104 -0
  99. package/dist/setup/providers/provider-setup-adapter.js +15 -0
  100. package/dist/setup/providers/provider-setup-registry.js +11 -0
  101. package/dist/setup/schema.js +1 -0
  102. package/dist/setup/setup-defaults.js +11 -0
  103. package/dist/setup/setup-lifecycle-service.js +175 -0
  104. package/dist/setup/setup-plan.js +105 -0
  105. package/dist/skills/repositories/skill-evaluation-scenarios.js +289 -0
  106. package/dist/skills/repositories/skill-improvement-proposals.js +288 -0
  107. package/dist/skills/repositories/skills.js +430 -0
  108. package/dist/skills/schema.js +1 -0
  109. package/dist/skills/skill-payload.js +94 -0
  110. package/dist/skills/skill-registry-service.js +92 -0
  111. package/dist/skills/skill-resolver.js +191 -0
  112. package/dist/workflows/command-allowlist-adapter.js +70 -0
  113. package/dist/workflows/schema.js +4 -0
  114. package/dist/workflows/workflow-executor.js +345 -0
  115. package/dist/workflows/workflow-registry.js +66 -0
  116. package/docs/architecture.md +698 -0
  117. package/docs/cli.md +741 -0
  118. package/docs/funcionamiento-del-sistema.md +868 -0
  119. package/docs/harness-gap-analysis.md +229 -0
  120. package/docs/prd.md +372 -0
  121. package/package.json +57 -0
@@ -0,0 +1,698 @@
1
+ # vgxness Architecture
2
+
3
+ `vgxness` is a local-first, provider-agnostic, Gentle-AI-like harness for agentic development. Its core architecture separates the product domain from provider-specific tooling so agents, skills, memory, SDD workflows, runs, and traces can work across OpenCode, Claude Code, and future adapters such as Pi.
4
+
5
+ The architectural goal is not only to install better prompts or agent configs. `vgxness` should combine an ecosystem configurator with a runtime control plane: configured agents may execute the work, but the product keeps explicit state for phase readiness, artifacts, runs, approvals, checkpoints, and audit evidence.
6
+
7
+ The user-facing shape is deliberately three-surface: **MCP for agents**, **CLI for scriptable operator control**, and **TUI for guided setup and visual local operations**. All three must call the same core services instead of reimplementing workflow rules.
8
+
9
+ ## Architecture decision summary
10
+
11
+ | Area | Decision |
12
+ |---|---|
13
+ | Product shape | Local-first harness for advanced individual developers first; team/cloud features later. |
14
+ | Reference model | Similar product surface to Gentle-AI/`gentle-pi`: agent setup, SDD orchestration, memory wiring, skills, profiles, permissions, verification. |
15
+ | Differentiator | Verifiable runtime state engine: SDD phases, artifacts, runs, approvals, checkpoints, and audit trails are queryable product state, not prompt-only convention. |
16
+ | Core workflow | SDD-first canonical state: explore → proposal → spec → design → tasks → apply-progress → verify → archive. |
17
+ | Interfaces | MCP server for AI tools, CLI for automation/power users, TUI for guided install/status/profile/SDD workflows. |
18
+ | Installation UX | Step-based guided setup with doctor checks, dry-run support, and no manual provider JSON editing on the happy path. |
19
+ | Provider strategy | Provider-agnostic domain model with OpenCode and Claude Code first; Pi/`gentle-pi` compatibility is a future adapter/reference target. |
20
+ | Memory | Project memory plus personal/global memory, backed locally. |
21
+ | Agents | Agents/subagents are registered in a neutral schema, then rendered into provider-specific config. |
22
+ | Skills | Skills are first-class, versioned, attachable to agents/workflows/adapters, and improved through reviewable proposals. |
23
+ | Runtime | Every meaningful operation is captured as an auditable run with traces, artifacts, memory ops, approvals, and verification evidence. |
24
+ | Safety | Permissions and sandbox boundaries are core domain concepts, not provider-specific afterthoughts. |
25
+
26
+ ## System boundaries
27
+
28
+ ```text
29
+ Human operator AI coding tool / agent
30
+ │ │
31
+ ├───────────────┐ │
32
+ ▼ ▼ ▼
33
+ CLI (`vgx`) TUI (`vgx`) MCP server (`vgx mcp start`)
34
+ │ │ │
35
+ └───────────────┴──────────┬───────────┘
36
+
37
+ vgxness service layer
38
+
39
+ ┌──────────────────┴──────────────────┐
40
+ ▼ ▼
41
+ Configurator plane Runtime control plane
42
+ ├─ install/sync ├─ SDD workflow engine
43
+ ├─ asset rendering ├─ Run runtime
44
+ ├─ profile rendering ├─ Artifact service
45
+ ├─ config verify ├─ Permission policy engine
46
+ └─ backup/rollback └─ Trace/observability service
47
+ │ │
48
+ └──────────────────┬──────────────────┘
49
+
50
+ vgxness core domain
51
+ ├─ Agent registry
52
+ ├─ Skill registry
53
+ ├─ Memory service
54
+ ├─ Adapter contracts
55
+ └─ Evaluation/verification contracts
56
+
57
+ ┌──────────────────┴──────────────────┐
58
+ ▼ ▼
59
+ Provider adapter layer Local storage
60
+ ├─ OpenCode adapter ├─ Project store
61
+ ├─ Claude Code adapter └─ Personal/global store
62
+ └─ Future adapters such as Pi
63
+ ```
64
+
65
+ This split is the key product boundary:
66
+
67
+ - The **configurator plane** makes external AI tools usable: managed prompts, agents, skills, model profiles, permissions, and safe config rendering.
68
+ - The **runtime control plane** makes SDD auditable: phase state, readiness gates, artifacts, approvals, checkpoints, traces, and run history.
69
+ - The **interface layer** exposes the same capabilities through MCP, CLI, and TUI. MCP is agent-facing; CLI and TUI are human/operator-facing.
70
+
71
+ Gentle-AI/`gentle-pi` are strong references for the configurator and agent-behavior side. `vgxness` should borrow those proven patterns while ensuring the runtime control plane remains explicit and provider-neutral.
72
+
73
+ ## Core domain model
74
+
75
+ | Entity | Responsibility |
76
+ |---|---|
77
+ | `Agent` | Defines who executes work: role, instructions, capabilities, model preference, permissions, and compatible workflows. |
78
+ | `Subagent` | Specialized agent intended for delegated/focused work with constrained scope and tools. |
79
+ | `Skill` | Reusable knowledge/procedure that can be attached to agents, workflows, phases, or adapters. |
80
+ | `Workflow` | A named process such as SDD, with ordered phases, gates, artifacts, and verification expectations. |
81
+ | `Run` | Auditable execution record for an agentic operation. |
82
+ | `Profile` | Model assignment strategy for orchestrators, agents, subagents, or SDD phases. |
83
+ | `ManagedAsset` | Prompt, skill, command, chain, permission template, or config fragment rendered/synced into an external tool. |
84
+ | `McpTool` | Agent-facing operation exported by the MCP server, backed by core services and permission policy. |
85
+ | `DoctorCheck` | Health check for install/config/runtime readiness with actionable recovery guidance. |
86
+ | `Trace` | Structured event stream describing what happened during a run. |
87
+ | `Memory` | Durable observations, decisions, artifacts, preferences, and learnings. |
88
+ | `Artifact` | Structured output such as PRD, spec, design, task list, apply progress, verify report, or archive report. |
89
+ | `Adapter` | Translator between the neutral `vgxness` model and a provider-specific config/runtime. |
90
+ | `PermissionPolicy` | Rules defining what an agent may do, what requires approval, and what must be denied. |
91
+
92
+ ## Storage model
93
+
94
+ `vgxness` should keep local storage split by scope.
95
+
96
+ | Scope | Purpose | Example location |
97
+ |---|---|---|
98
+ | Project | Repo-specific memory, SDD artifacts, run history, adapter config, project agents/skills. | `.vgx/` or project-local SQLite store. |
99
+ | Personal/global | User preferences, reusable skills, cross-project patterns, global agents. | User-level config/data directory. |
100
+
101
+ The exact path format is still open, but the architectural rule is fixed: **project data and personal data must not be collapsed into one scope**.
102
+
103
+ ## SDD workflow engine
104
+
105
+ ### Current implementation boundary
106
+
107
+ SDD is the default workflow for substantial changes. The current workflow engine is provider-agnostic and local-first: it tracks phase artifact presence, readiness, and artifact persistence in SQLite only.
108
+
109
+ ```text
110
+ explore → proposal → spec → design → tasks → apply-progress → verify → archive
111
+ ```
112
+
113
+ Current phase artifacts use one canonical topic key each:
114
+
115
+ ```text
116
+ sdd/{change}/{phase}
117
+ ```
118
+
119
+ Readiness is based on artifact presence only:
120
+
121
+ | Phase | Required artifacts |
122
+ |---|---|
123
+ | `explore` | none |
124
+ | `proposal` | `explore` |
125
+ | `spec` | `proposal` |
126
+ | `design` | `proposal`, `spec` |
127
+ | `tasks` | `proposal`, `spec`, `design` |
128
+ | `apply-progress` | `tasks` |
129
+ | `verify` | `apply-progress` |
130
+ | `archive` | `verify` |
131
+
132
+ Current service API:
133
+
134
+ | API | Purpose |
135
+ |---|---|
136
+ | `getWorkflow(change)` | Return canonical phases, topic keys, and prerequisites. |
137
+ | `getStatus({ project, change })` | Report present/missing phase artifacts and the next ready missing phase. |
138
+ | `getReady({ project, change, phase })` | Report whether a phase is ready, satisfied prerequisites, and missing topic keys. |
139
+ | `saveArtifact({ project, change, phase, content })` | Persist non-empty content under the canonical SDD topic key. |
140
+ | `getArtifact({ project, change, phase })` | Read one full artifact from the local SQLite `artifacts` table by canonical topic key. |
141
+ | `listArtifacts({ project, change })` | List matching artifacts from the local SQLite `artifacts` table in canonical phase order. |
142
+
143
+ Artifact read/list surfaces use the full-content `artifacts` table in the selected local SQLite memory store. They do **not** read Engram search previews, and they do **not** create or modify `openspec/` paths.
144
+
145
+ The CLI exposes the same boundary through `npm run cli -- sdd status|ready|save-artifact|get-artifact|list-artifacts`. These commands do **not** execute providers, continue phases automatically, create `openspec/`, or write `.opencode/`, `.claude/`, or user/global provider config.
146
+
147
+ ### Target architecture
148
+
149
+ Long term, SDD orchestration must combine two mechanisms:
150
+
151
+ 1. **Agent behavior** — orchestrator prompts, SDD subagents/chains, skills, per-phase models, and strict TDD instructions rendered into tools such as OpenCode, Claude Code, or Pi.
152
+ 2. **Product state** — workflow definitions, prerequisites, artifacts, approvals, runs, checkpoints, verification results, and archive status stored locally and queried before advancing.
153
+
154
+ The first mechanism makes agents effective. The second makes the workflow enforceable and inspectable. `vgxness` should not rely on either one alone.
155
+
156
+ ## Configurator and profile plane
157
+
158
+ To be comparable with Gentle-AI-like systems, `vgxness` needs a configurator plane that can install, sync, render, and verify managed assets for supported tools without taking ownership of user files blindly.
159
+
160
+ Planned responsibilities:
161
+
162
+ | Responsibility | Purpose |
163
+ |---|---|
164
+ | Asset catalog | Know which prompts, skills, SDD agents, commands, chains, and permission templates are managed. |
165
+ | Safe rendering | Generate provider-specific artifacts from neutral definitions without mutating source registries. |
166
+ | Profile management | Assign models per orchestrator, agent, subagent, or SDD phase. |
167
+ | File merge strategy | Inject managed blocks without clobbering user-owned config. |
168
+ | Backup/rollback | Snapshot affected config before sync/install and provide restore metadata. |
169
+ | Config verification | Report missing assets, invalid provider config, stale versions, and unsupported adapter capabilities. |
170
+
171
+ This plane should remain separate from runtime execution. Rendering OpenCode, Claude Code, or Pi assets is not the same thing as running an SDD phase.
172
+
173
+ ## Interface surfaces
174
+
175
+ `vgxness` exposes the same local core through three surfaces. This prevents the product from becoming three separate implementations of the same rules.
176
+
177
+ | Surface | Primary user | Job | Must not |
178
+ |---|---|---|---|
179
+ | MCP server | AI coding tools and agents | Query/update workflow state, resolve agents/skills, record runs/checkpoints, request approvals. | Bypass permission policy or mutate provider config directly. |
180
+ | CLI | Human operators, scripts, CI-friendly automation | Initialize, inspect, doctor, sync/render assets, manage MCP installs, inspect SDD/runs/profiles. | Hide dangerous changes or require manual JSON editing for the happy path. |
181
+ | TUI | Humans during setup and local operation | Guide installation, show health/status, configure profiles, inspect SDD progress, surface blockers. | Become a decorative dashboard without next actions. |
182
+
183
+ ### Natural-language planning seam
184
+
185
+ `NaturalLanguagePlanner` is the provider-agnostic front-door classifier for operator text. It maps an intent to exactly one preview flow: `direct`, `plan`, `sdd`, or `diagnose`, with confidence, reasons, signals, safety notes, and preview-only next actions.
186
+
187
+ The CLI exposes this through:
188
+
189
+ ```bash
190
+ npm run cli -- orchestrator preview --project vgxness --intent "..." [--change <id>] [--db <path>]
191
+ ```
192
+
193
+ This seam is intentionally non-executing. It may read SDD status/next context for a supplied change from the selected local SQLite store, but it does not call providers, invoke run preflight, edit code, write provider config, create run records, install global memory, or create `openspec/`. Future live orchestration must build on this explicit preview contract instead of bypassing it with prompt-only routing.
194
+
195
+ ### MCP server boundary
196
+
197
+ The MCP server is the agent-facing control plane. It should expose typed, narrow tools backed by the same service APIs used by CLI and TUI.
198
+
199
+ Current MCP operator verification uses two short-lived CLI commands before connecting a client:
200
+
201
+ ```bash
202
+ npm run cli -- mcp setup --preview --provider opencode --db <path>
203
+ npm run cli -- mcp setup --preview --provider claude --db <path>
204
+ npm run cli -- mcp doctor --db <path> --project vgxness --change manual-smoke
205
+ ```
206
+
207
+ `mcp setup --preview` returns copyable stdio client snippets only. It is read-only and must not write `.opencode/`, `.claude/`, or provider config. `mcp doctor` verifies local readiness with JSON checks and may prepare/use the selected SQLite DB, but it does not mutate provider configuration.
208
+
209
+ Current MCP tool groups are defined in source by `SUPPORTED_VGX_MCP_TOOL_NAMES`; the table below is representative, not exhaustive:
210
+
211
+ | Group | Example tools | Purpose |
212
+ |---|---|---|
213
+ | SDD | `vgxness_sdd_status`, `vgxness_sdd_next`, `vgxness_sdd_ready`, `vgxness_sdd_save_artifact`, `vgxness_sdd_get_artifact`, `vgxness_sdd_list_artifacts` | Let agents ask what can happen next and retrieve local SQLite SDD artifacts without guessing from prompts. |
214
+ | Runs | `vgxness_run_start`, `vgxness_run_list`, `vgxness_run_get`, `vgxness_run_preflight`, `vgxness_run_checkpoint`, `vgxness_run_finalize` | Keep execution history, safety preflight, and resumability explicit. |
215
+ | Resolution/profile/payload | `vgxness_agent_resolve`, `vgxness_agent_activate`, `vgxness_manager_profile_get`, `vgxness_manager_profile_set`, `vgxness_skill_payload`, `vgxness_opencode_manager_payload` | Give agents the correct role, skills, model/profile context, and read-only OpenCode manager previews. Payload/profile preview tools do not execute providers or mutate provider config by default. |
216
+
217
+ MVP transport should prefer local stdio because it matches common MCP host expectations and avoids opening a network port. A local HTTP transport can be added later for dashboard or background-daemon use cases.
218
+
219
+ ### CLI boundary
220
+
221
+ The CLI is the scriptable operator surface. It should be predictable, dry-run friendly, and explicit about whether a command only inspects state or mutates local/provider config.
222
+
223
+ Command shape should stay verb-first and grouped by product concept:
224
+
225
+ ```bash
226
+ vgx init
227
+ vgx doctor
228
+ vgx status
229
+ vgx mcp install opencode
230
+ vgx mcp status
231
+ vgx sdd status <change>
232
+ vgx sdd next <change>
233
+ vgx profiles list
234
+ vgx profiles set apply <model>
235
+ ```
236
+
237
+ ### TUI boundary
238
+
239
+ The TUI is the guided human surface. It should optimize for the next meaningful decision:
240
+
241
+ | Screen | Main decision/action |
242
+ |---|---|
243
+ | Setup | Select tools, inspect provider readiness, review copy-only external setup guidance, and run doctor-style checks. |
244
+ | Workflows | Understand available workflow paths and safe next actions. |
245
+ | Runs | Inspect run history, current run state, blockers, and resumable execution. |
246
+ | Approvals | Review pending permission requests and safety gates. |
247
+ | Agents | Inspect registered agents/subagents, attached skills, model/profile routing, and provider compatibility. |
248
+ | SDD | See active changes, missing artifacts, next ready phase, blockers, and resumable runs. |
249
+ | Doctor | Fix broken config, missing MCP integration, memory issues, and stale assets. |
250
+ | Settings | Review local preferences and configuration status without silent provider writes. |
251
+
252
+ Every TUI screen must define loading, empty, error, success, blocked, and permission states. The dashboard TUI is read-only: provider config writes/install/apply are external-only, require explicit confirmation outside the dashboard, and are not run by dashboard flows.
253
+
254
+ ### Installation flow
255
+
256
+ The happy path should be guided and step-based:
257
+
258
+ ```text
259
+ 1. Install binary
260
+ 2. Run `vgx`
261
+ 3. Select AI tools
262
+ 4. Install MCP integration
263
+ 5. Configure memory
264
+ 6. Choose SDD mode/profile
265
+ 7. Run doctor
266
+ 8. Open the AI tool and start working
267
+ ```
268
+
269
+ The same flow must be automatable through CLI flags and dry-run output so users can preview provider config changes before applying them.
270
+
271
+ ## Agent and subagent registry
272
+
273
+ The registry stores provider-neutral definitions.
274
+
275
+ Minimum schema:
276
+
277
+ ```yaml
278
+ name: code-reviewer
279
+ description: Reviews code for maintainability, bugs, and security issues
280
+ mode: subagent
281
+ instructions:
282
+ kind: path
283
+ value: ./prompts/code-reviewer.md
284
+ capabilities:
285
+ - code-review
286
+ - security-review
287
+ permissions:
288
+ read: allow
289
+ edit: deny
290
+ shell: ask
291
+ memory:
292
+ scopes:
293
+ - project
294
+ - personal
295
+ skills:
296
+ - code-review
297
+ - secure-coding
298
+ adapters:
299
+ opencode:
300
+ model: anthropic/claude-sonnet-4-20250514
301
+ claude-code:
302
+ model: sonnet
303
+ ```
304
+
305
+ Provider-specific fields are allowed only under adapter overrides.
306
+
307
+ First implementation slice stores this neutral model locally in SQLite through the `AgentRegistryService` API:
308
+
309
+ | API | Purpose |
310
+ |---|---|
311
+ | `registerAgent(input)` | Upsert a top-level agent by `project + scope + name`. |
312
+ | `registerSubagent(input)` | Upsert a subagent with a required same-project/same-scope parent agent. |
313
+ | `getAgent(id)` / `getAgentByName(project, scope, name)` | Read full provider-neutral configuration. |
314
+ | `listAgents(filters)` / `listSubagents(parentAgentId)` | Read summaries for discovery and delegation relationships. |
315
+ | `resolveAgents(input)` | Rank registered agents/subagents for a task using transparent metadata rules. |
316
+
317
+ Adapter-specific options stay under `adapters.{adapterName}` and are treated as opaque neutral JSON by the core registry.
318
+
319
+ Harness tools expose the same registry through `createAgentRegistryToolHandlers(...)` for tool-facing JSON-ish payloads:
320
+
321
+ | Handler | Purpose |
322
+ |---|---|
323
+ | `registerAgent(input)` / `registerSubagent(input)` | Validate tool payloads and upsert provider-neutral definitions. |
324
+ | `getAgent({ id })` / `getAgentByName({ project, scope, name })` | Read full definitions for execution or adapter rendering. |
325
+ | `listAgents(filters)` / `listSubagents({ parentAgentId })` | Discover available agents and parent-child relationships. |
326
+
327
+ The minimal CLI exposes this same registry for local terminal use through `npm run cli -- agents register|list|get` and `npm run cli -- subagents register|list|get`. It stays thin: flag/JSON-file parsing, SQLite opening, and JSON formatting only; registry rules remain in the service/tool boundary.
328
+
329
+ Current agent resolution v1 is provider-agnostic, deterministic, and rule-based. It accepts project/scope, task text or intent, desired capabilities, workflow/phase, provider adapter, and optional `agent`/`subagent` mode. Resolution reads existing agent definitions only, then scores compatible candidates with transparent reasons:
330
+
331
+ - capability matches
332
+ - workflow/phase matches such as `sdd`, `apply`, or `sdd:apply`
333
+ - provider adapter compatibility, with empty adapter maps treated as provider-neutral
334
+ - skill-name matches from the agent definition `skills` field
335
+ - task text matches against agent name, description, capabilities, skills, and workflows
336
+ - mode and subagent parent relationship signals
337
+
338
+ Incompatible definitions are returned under `skipped` with reasons such as project/scope mismatch, mode mismatch, provider adapter mismatch, workflow mismatch, or capability mismatch. Ties are stable: higher score first, then top-level agents before subagents, then agent name/id.
339
+
340
+ ```bash
341
+ npm run cli -- agents resolve --project vgxness --capabilities implementation,typescript --workflow sdd --phase apply --provider opencode --task "Implement the next SDD apply slice"
342
+ npm run cli -- agents resolve --project vgxness --mode subagent --capabilities review --workflow sdd --phase verify
343
+ ```
344
+
345
+ Resolution v1 does **not** call an AI/model ranker, execute agents, mutate provider config, install tools, choose a model dynamically, or create/update provider files. Future orchestration can use these ranked candidates as one explicit input, but current behavior is explainable metadata matching only.
346
+
347
+ ## Skill registry and self-improvement
348
+
349
+ Skills are versioned assets registered independently from agents and provider config. The current v1 foundation is provider-agnostic and local-first: it stores skill identity, versions, source metadata, compatibility hints, attachments, and simple usage outcomes in SQLite.
350
+
351
+ Current v1 APIs:
352
+
353
+ | API | Purpose |
354
+ |---|---|
355
+ | `registerSkill(input)` | Upsert a skill by `project + scope + name`. |
356
+ | `addSkillVersion(input)` / `listSkillVersions(skillId)` | Store version metadata, source info, compatibility hints, and optionally activate the version as current. |
357
+ | `attachSkill(input)` / `detachSkill(...)` | Attach a skill to an agent, subagent, workflow phase, or provider adapter without mutating provider files. |
358
+ | `getSkill(...)` / `getSkillByName(...)` / `getSkillDetails(id)` / `listSkills(filters)` | Read skills, current version, versions, attachments, and usage records. |
359
+ | `resolveSkills(input)` | Resolve active skill versions for an agent/runtime context. |
360
+ | `buildSkillPayload(input, options)` | Resolve skills and return provider-agnostic injection payload v1 without writing provider config. |
361
+ | `recordSkillUsage(input)` | Record whether a skill was selected/injected or later helped, failed, or was neutral during a run. |
362
+ | `createSkillImprovementProposal(input)` | Create a reviewable proposed skill version from trace/failure/correction/memory signals without changing the active skill. |
363
+ | `submit/approve/reject/cancel/applySkillImprovementProposal(...)` | Gate the proposal lifecycle and activate only an approved proposal when apply is explicit. |
364
+
365
+ Source metadata is descriptive at registration time: `path`, `url`, and `inline` sources are stored without installing or rewriting provider files. Payload building can materialize injection-ready content from inline metadata and from safe local paths only.
366
+
367
+ Current skill resolution v1 is provider-agnostic and deterministic. It accepts an agent id or agent name/project/scope, workflow/phase, provider adapter, and optional run id. Resolution checks the agent definition `skills` field first, then matching agent/subagent attachments, workflow-phase attachments such as `sdd:apply` or `apply`, and provider-adapter attachments such as `opencode`. Duplicate skills are returned once in first-seen order with all attachment sources preserved. Only active versions are returned: explicit attachment versions must be active, otherwise the skill current version must exist and be active. Missing skills, no-current-version skills, draft/deprecated/archived versions, and missing attachment versions are reported in `skipped` instead of failing the whole resolution.
368
+
369
+ The resolver returns enough data for payload building: skill id/name, version id/version, source metadata, inline `summary`/`content` when present, and attachment reason/source. If a run id and explicit resolution outcome are supplied, it records `selected` or `injected` usage for audit. Payload v1 turns resolved skills into deterministic provider-agnostic items with identity, source kind, content availability, attachment reasons, and ordering metadata. Inline content is available directly. URL sources remain metadata-only with `url_fetch_disabled`. Local path sources are read-only and only loaded when the path resolves inside the supplied workspace root; missing, unsafe, non-file, or oversized paths return deterministic unavailable reasons instead of throwing.
370
+
371
+ Payload v1 does **not** inject into a live provider, write `.opencode/`, `.claude/`, or user/global config, fetch networks/URLs, evaluate quality, or propose autonomous skill changes.
372
+
373
+ Skill improvement proposals are the current controlled foundation for self-improvement. A proposal stores the target skill, base version, proposed source/compatibility metadata, rationale, source signal, and deterministic diff summary. Proposal creation and approval do **not** mutate the active skill. Only `applySkillImprovementProposal(...)` on an `approved` proposal creates a new skill version and makes it current/active, with actor/reason audit fields on the proposal. Rejected or cancelled proposals cannot be applied.
374
+
375
+ CLI examples:
376
+
377
+ ```bash
378
+ npm run cli -- skills register --project vgxness --name sdd-apply --description "Applies SDD tasks"
379
+ npm run cli -- skills add-version --project vgxness --name sdd-apply --version 1.0.0 --source-kind path --source-path .config/opencode/skills/sdd-apply/SKILL.md --activate
380
+ npm run cli -- skills attach --project vgxness --name sdd-apply --target-type workflow-phase --target-key sdd:apply
381
+ npm run cli -- skills resolve --agent apply-agent --project vgxness --workflow sdd --phase apply --provider opencode
382
+ npm run cli -- skills payload --agent apply-agent --project vgxness --workflow sdd --phase apply --provider opencode
383
+ npm run cli -- skills propose --project vgxness --name sdd-apply --proposed-version 1.1.0 --source-kind inline --inline-metadata '{"content":"Updated skill"}' --rationale "Repeated correction from trace"
384
+ npm run cli -- skills submit-proposal --proposal <proposal-id> --actor uziel
385
+ npm run cli -- skills approve-proposal --proposal <proposal-id> --actor uziel --reason "Reviewed diff"
386
+ npm run cli -- skills apply-proposal --proposal <proposal-id> --actor uziel
387
+ ```
388
+
389
+ V1 does **not** autonomously detect trace patterns, mutate skill files, activate autonomous updates, evaluate proposed quality, or write external skill/provider configuration. Proposal creation is reviewable, and activation is gated by explicit approval plus explicit apply.
390
+
391
+ Minimum lifecycle:
392
+
393
+ ```text
394
+ draft → active → proposed-update → approved → active
395
+ └────────────→ rejected
396
+ active → deprecated → archived
397
+ ```
398
+
399
+ Self-improvement must be controlled:
400
+
401
+ 1. Observe traces, failures, corrections, and repeated patterns.
402
+ 2. Detect a candidate skill improvement.
403
+ 3. Generate a reviewable diff/version proposal.
404
+ 4. Run relevant evaluations.
405
+ 5. Ask for human approval.
406
+ 6. Activate approved version with rollback history.
407
+
408
+ NO silent mutation of active skills. Eso es una línea roja.
409
+
410
+ ## Provider adapter contract
411
+
412
+ Adapters render registry definitions into provider artifacts without changing the registry model.
413
+
414
+ Current contract:
415
+
416
+ | Type | Purpose |
417
+ |---|---|
418
+ | `ProviderRenderer` | Named renderer for one output format/provider. |
419
+ | `ProviderRenderInput` | A root agent plus optional registered subagents. |
420
+ | `ProviderRenderResult` | Generated artifacts, provider name, `installable: false`, and warnings. |
421
+ | `ProviderRenderArtifact` | Relative path, content type, and generated contents. |
422
+
423
+ Renderers currently include:
424
+
425
+ | Provider | Preview artifact | Notes |
426
+ |---|---|---|
427
+ | `json` | `rendered/json/...` | Export/debug shape; includes matching `adapters.json` as `selectedAdapter`. |
428
+ | `opencode` | `rendered/opencode/<project>/<scope>/<agent>/opencode.json` | Single config preview with `$schema` and `agent` object. Top-level agents default to `primary`; subagents render as `subagent`. |
429
+
430
+ ```bash
431
+ npm run cli -- agents render --provider json --project vgxness --name apply-agent
432
+ npm run cli -- agents render --provider opencode --project vgxness --name apply-agent
433
+ ```
434
+
435
+ Rendering is intentionally read-only: it returns generated content in the CLI response. It does **not** write `.opencode/`, `.claude/`, or any user/global provider config.
436
+ OpenCode agent keys are sanitized deterministically from registry names, and rendering rejects key collisions instead of overwriting generated config.
437
+
438
+ Claude Code rendering remains follow-up work after its exact install-safe artifact shape is specified.
439
+
440
+ ### OpenCode injection preview
441
+
442
+ The current OpenCode integration also exposes a preview-only composition seam through `OpenCodeInjectionPreviewService` and the thin CLI command:
443
+
444
+ ```bash
445
+ npm run cli -- opencode preview --provider opencode --agent apply-agent --project vgxness --change checkout-flow --phase apply-progress
446
+ ```
447
+
448
+ This preview lives in the provider adapter/preview layer, not in provider-agnostic core services. It composes existing read-only outputs:
449
+
450
+ | Output | Source boundary |
451
+ |---|---|
452
+ | `providerArtifacts` | OpenCode renderer for the selected agent and registered subagents. |
453
+ | `skillPayload` | Skill registry payload builder for the selected SDD phase and OpenCode adapter. |
454
+ | `sdd` | SDD workflow status and readiness for the selected project/change/phase. |
455
+ | `context` and `safety` | OpenCode preview layer metadata for future OpenCode/MCP/hook callers. |
456
+
457
+ The envelope is always `installable:false` and `readOnly:true`. It does **not** execute OpenCode, install hooks, create MCP servers, create runs, record skill usage, write `.opencode/`, write `.claude/`, or touch user/global provider config. Future live injection should build on this contract only after a separate approved change defines execution, hook, or MCP safety rules.
458
+
459
+ ### OpenCode manager orchestration
460
+
461
+ The checked-in OpenCode default config and `seeds/agents/agent-seed-v1.json` define `vgxness-manager` as an MCP-first orchestrator. The manager should use `vgxness_session_restore` with the project and workspace directory when starting, resuming, or recovering context, then use SDD artifact/status tools, memory tools, agent resolution, run/preflight tools, and read-only payload/profile previews before delegating substantial phase work to exact SDD subagents. Before ending, pausing, handing off, or compacting, it should call `vgxness_session_close` with the current session id, actor `manager`, and an actionable summary; if no current session id is available, it must not invent one and should preserve the summary in its final response. Its OpenCode `permission.task` remains deny-by-default: `*` is denied and only the canonical `vgxness-sdd-*` subagent names are allowed explicitly. The seed/default prompts are self-contained and do not require external `~/.config/opencode/skills/sdd-*` files; such skills are optional registry assets if a project registers them separately.
462
+
463
+ ## Run lifecycle
464
+
465
+ A run is the core unit of execution. The current foundation stores local, provider-neutral run records in SQLite; deeper orchestration and approval enforcement are follow-up work.
466
+
467
+ Current terminal lifecycle rules:
468
+
469
+ ```text
470
+ created → completed | failed | blocked | cancelled
471
+ ```
472
+
473
+ The broader planned lifecycle still includes `planned`, `running`, and `needs-human`, but this slice only enforces safe finalization: terminal runs cannot be finalized again, and final outcomes must match the terminal status.
474
+
475
+ Current run fields:
476
+
477
+ - run id
478
+ - user intent
479
+ - project identity
480
+ - workflow and phase
481
+ - selected agent/subagent id
482
+ - provider adapter
483
+ - model
484
+ - status, outcome, and outcome reason
485
+ - latest checkpoint id
486
+ - created/updated/completed timestamps
487
+
488
+ Timeline events store audit entries such as tool calls, permission decisions, approvals, memory operations, artifact references, and verification evidence. Events may reference memory observation IDs or artifact IDs, but run state remains separate from long-term memory and SDD artifacts.
489
+
490
+ Current operation enforcement is safe-by-default and executor-injected: `RunService.executeOperation(...)` evaluates permission, records the `permission-decision`, then either blocks or calls the supplied `RunOperationExecutor`.
491
+
492
+ Execution isolation is currently **planning-only**. `planExecutionIsolation(...)` and `RunService.planOperationExecution(...)` produce an auditable plan before any real executor exists; they do not create worktrees, launch sandboxes, run shell commands, call providers, or mutate files.
493
+
494
+ | Operation shape | Current planned strategy | Current behavior |
495
+ |---|---|---|
496
+ | In-workspace read | `workspace` | Allowed when permission policy allows it; filesystem plans require realpath/symlink hardening before any future execution. |
497
+ | Edit, git, destructive mutation | `git-worktree` | Planned as isolated mutation work, but still asks/blocks according to permission policy. No worktree is created yet. |
498
+ | Shell, network, provider tool, privileged operation | `process-sandbox` | Planned as sandboxed process/provider work, but approval is still required by default. No sandbox is launched yet. |
499
+ | External directory or out-of-workspace path | `process-sandbox` or blocked workspace plan | Denied by default or requires explicit stronger future approval conditions; external paths are not allowed by current plans. |
500
+
501
+ Every plan includes path constraints, required approvals/conditions, whether realpath hardening must happen before execution, and limitations that state the future boundary. Real sandbox/worktree executors are follow-up work after this planner is connected to a safe execution backend.
502
+
503
+ | Decision | Current behavior |
504
+ |---|---|
505
+ | `deny` | Records an `operation-execution` event with `status: "blocked"`; executor is not invoked. |
506
+ | `ask` | Creates a pending approval, records enough pending operation metadata to resume later, and records `status: "pending-approval"`; executor is not invoked. |
507
+ | `allow` | Invokes the injected executor once and records `status: "succeeded"` or `status: "failed"`. |
508
+
509
+ When the decision is `ask`, the runtime creates a first-class pending approval record linked to the permission-decision event. `allow` and `deny` decisions do not create approvals. Approval records can be listed per run, fetched by id, and resolved once as `approved`, `rejected`, or `cancelled` with actor, reason, and timestamp.
510
+
511
+ Approved operation resume is available only through the service API and an injected executor:
512
+
513
+ ```ts
514
+ runService.resumeApprovedOperation({
515
+ approvalId,
516
+ executor: fakeOrSandboxedExecutor,
517
+ });
518
+ ```
519
+
520
+ Resume does **not** re-run policy. Before calling the injected executor, the runtime creates a durable operation attempt reservation linked to the approval id, original permission-decision event, original pending execution event, operation metadata, executor name, attempt sequence, and `reserved` status. The storage model now supports multiple ordered attempts per approval, but active `reserved` attempts remain exclusive.
521
+
522
+ After the executor returns, the attempt is finalized as `succeeded` or `failed`, then the runtime appends the `operation-execution` event with `policyReevaluated: false` and the attempt id. If appending that event fails, the finalized attempt still prevents duplicate execution on retry.
523
+
524
+ Stuck `reserved` attempts can be explicitly abandoned through `RunService.abandonReservedOperationAttempt({ attemptId, actor, reason })`. Abandonment is recovery-only: it changes the attempt to `abandoned`, appends an `operation-execution` audit event with the actor, reason, approval id, attempt id, and `executorInvoked: false`, and does **not** call an executor or retry anything.
525
+
526
+ Retry admission is storage/admission only. `RunService.admitOperationRetryAttempt(...)` can reserve the next ordered attempt after an earlier terminal attempt only when explicit retry policy evaluation allows the latest status. It appends an audit event with `retryAdmission: true`, `executorInvoked: false`, and `operationExecuted: false`; it does **not** execute the operation, launch a sandbox, call a provider, or invoke an executor. Without a policy, the default remains `never`, so admission is blocked.
527
+
528
+ Resume is safe by default: `RunService.resumeApprovedOperation(...)` uses the `never` retry policy, so any existing `reserved`, `succeeded`, `failed`, or `abandoned` attempt blocks later resume calls before executor invocation. `RunService.evaluateOperationRetry(...)` can evaluate explicit policies without executing anything:
529
+
530
+ | Policy | Allows a new attempt after | Always blocks |
531
+ |---|---|---|
532
+ | `never` | No previous attempt only | `reserved`, `succeeded`, `failed`, `abandoned` |
533
+ | `after-abandoned` | latest attempt is `abandoned` | active `reserved`, `succeeded`, `failed` |
534
+ | `after-failure` | latest attempt is `failed` | active `reserved`, `succeeded`, `abandoned` |
535
+ | `after-failure-or-abandoned` | latest attempt is `failed` or `abandoned` | active `reserved`, `succeeded` |
536
+
537
+ The retry decision returns `allowed`, `reasonCode`, a human-readable `reason`, evaluated attempt count, retryable statuses, and latest/active attempt metadata for audit. The CLI exposes this only as `runs retry-check --approval <id> [--policy <json>]`; it is a read-only operator check and does not append events, reserve attempts, invoke executors, or retry. Mutation/admission CLI, explicit retry execution, timeout detection, and sandboxed execution are follow-up work.
538
+
539
+ ```ts
540
+ runService.executeOperation({
541
+ runId,
542
+ operation: { category: 'read', operation: 'preview-file', workspaceRoot, targetPath: 'src/index.ts' },
543
+ executor: fakeOrSandboxedExecutor,
544
+ });
545
+ ```
546
+
547
+ `getRun(...)` includes `operationAttempts` beside events, checkpoints, and approvals so callers can audit reservations even if final event recording failed. `evaluatePermissionForRun(...)` remains available when the caller only wants the audit decision and pending approval record. Approvals created without a pending `executeOperation(...)` event are not resumable because they lack deterministic operation metadata. Real shell/filesystem/network/provider execution is still future work. The current enforcement and resume boundary is intentionally testable with fake or deterministic executors only.
548
+
549
+ Checkpoints store resumable JSON state per run:
550
+
551
+ ```ts
552
+ runService.appendCheckpoint({
553
+ runId,
554
+ label: 'after-plan',
555
+ state: { nextTask: 'implement-runtime', completed: ['schema'] },
556
+ });
557
+ ```
558
+
559
+ Follow-up runtime work:
560
+
561
+ - active state transitions for `planned`, `running`, and `needs-human`
562
+ - real provider/tool invocation behind sandboxed executors
563
+ - CLI or adapter orchestration for resume-after-approval once a safe executor exists outside tests
564
+ - operator UX for retry admission and retry execution, with clear separation between reservation and actual execution
565
+ - sandbox/worktree execution strategies after decision recording is stable
566
+ - richer verification evidence summaries
567
+
568
+ ## Trace model
569
+
570
+ Traces make the harness inspectable.
571
+
572
+ Minimum trace events:
573
+
574
+ - run started/stopped
575
+ - phase started/stopped
576
+ - agent invoked
577
+ - skill injected
578
+ - memory read/write
579
+ - artifact read/write
580
+ - tool call requested/completed/failed
581
+ - permission requested/approved/denied
582
+ - verification command started/completed
583
+ - error/blocker recorded
584
+
585
+ ## Permission model
586
+
587
+ Permissions must be defined in `vgxness` first, then mapped to adapters.
588
+
589
+ Minimum categories:
590
+
591
+ | Category | Examples |
592
+ |---|---|
593
+ | `read` | files, memory, artifacts |
594
+ | `edit` | write, patch, modify files |
595
+ | `shell` | commands, scripts, package managers |
596
+ | `git` | status, diff, branch, commit, push |
597
+ | `network` | web fetch, API calls, package downloads |
598
+ | `memory` | create/update/delete/search memory |
599
+ | `external-directory` | access outside project/user-approved roots |
600
+ | `provider-tool` | opaque adapter/provider tool calls |
601
+ | `secrets` | environment variables, credentials, tokens |
602
+
603
+ Operations can resolve to:
604
+
605
+ - `allow`
606
+ - `ask`
607
+ - `deny`
608
+
609
+ Default stance for destructive or external operations: **ask or deny**, never implicit allow.
610
+
611
+ Current foundation API: `evaluatePermission(request)` in `src/permissions/` returns `allow`, `ask`, or `deny` with a reason. Defaults are intentionally conservative:
612
+
613
+ - workspace reads are allowed only when the target path stays inside `workspaceRoot`
614
+ - edits, shell, git, network, memory writes/searches, and provider-specific tools ask by default
615
+ - secrets and external directory access deny by default
616
+ - destructive, external, privileged, or ambiguous requests require ask even when an agent override would otherwise allow the category
617
+ - workspace boundary denials cannot be relaxed by agent/subagent overrides
618
+
619
+ Agent and subagent registry definitions keep neutral `permissions` such as `{ "shell": "ask", "provider-tool": "deny" }`. Provider names and tool details remain opaque metadata; enforcement and sandbox execution are follow-up runtime work.
620
+
621
+ ## Future interface surface
622
+
623
+ Candidate future CLI surface beyond the current minimal local CLI documented in `docs/cli.md`:
624
+
625
+ ```bash
626
+ vgx init
627
+ vgx memory search|get|save|update
628
+ vgx agent list|add|validate|render
629
+ vgx skill list|add|propose|approve|reject
630
+ vgx sdd new|continue|status|archive
631
+ vgx run list|show|resume
632
+ vgx adapter doctor|render
633
+ ```
634
+
635
+ Representative MCP tools mirror the same core services for agent use. For the current exact tool names, use `SUPPORTED_VGX_MCP_TOOL_NAMES`:
636
+
637
+ ```text
638
+ vgxness_sdd_status
639
+ vgxness_sdd_next
640
+ vgxness_sdd_ready
641
+ vgxness_sdd_save_artifact
642
+ vgxness_sdd_get_artifact
643
+ vgxness_sdd_list_artifacts
644
+ vgxness_memory_search
645
+ vgxness_memory_get
646
+ vgxness_memory_save
647
+ vgxness_memory_update
648
+ vgxness_run_start
649
+ vgxness_run_list
650
+ vgxness_run_get
651
+ vgxness_run_preflight
652
+ vgxness_run_checkpoint
653
+ vgxness_run_finalize
654
+ vgxness_agent_resolve
655
+ vgxness_agent_activate
656
+ vgxness_manager_profile_get
657
+ vgxness_manager_profile_set
658
+ vgxness_skill_payload
659
+ vgxness_opencode_manager_payload
660
+ ```
661
+
662
+ The CLI and TUI are human/operator control surfaces. MCP is the agent-facing control surface. Provider integrations are the execution/configuration plane.
663
+
664
+ ## Evaluation strategy
665
+
666
+ Minimum eval/test targets:
667
+
668
+ - Agent resolution selects the expected agent.
669
+ - Skill resolution injects the expected skill.
670
+ - Adapter rendering produces valid provider config.
671
+ - Permission rules block unsafe operations.
672
+ - SDD artifact chains remain complete.
673
+ - Memory upserts preserve revisions.
674
+ - Run resume restores expected state.
675
+ - Skill improvement proposals are versioned and require approval.
676
+ - MCP tools call the same core services as CLI/TUI and return actionable blocked states.
677
+ - TUI setup screens expose loading, empty, error, success, blocked, and permission states.
678
+ - Installation dry-run reports the exact provider config changes before mutation.
679
+
680
+ ## Immediate implementation recommendation
681
+
682
+ The next SDD change should be `harness-runtime-foundation`.
683
+
684
+ Scope:
685
+
686
+ - define schemas for agents, skills, runs, traces, permissions, and adapters
687
+ - add local persistence for these entities where missing
688
+ - add adapter validation/render skeleton
689
+ - add CLI/MCP/TUI interface boundaries for validation, inspection, and guided setup
690
+ - add tests for schema validation, permission decisions, and adapter rendering
691
+
692
+ Out of scope:
693
+
694
+ - cloud sync
695
+ - team collaboration
696
+ - web dashboard
697
+ - distributed workers
698
+ - fully autonomous skill mutation