vgxness 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +9 -0
- package/README.md +110 -0
- package/dist/agents/agent-activation-service.js +144 -0
- package/dist/agents/agent-registry-service.js +46 -0
- package/dist/agents/agent-resolver.js +249 -0
- package/dist/agents/agent-seed-service.js +146 -0
- package/dist/agents/manager-profile-overlay-service.js +34 -0
- package/dist/agents/profile-model-routing.js +26 -0
- package/dist/agents/renderers/claude-renderer.js +98 -0
- package/dist/agents/renderers/index.js +16 -0
- package/dist/agents/renderers/json-renderer.js +87 -0
- package/dist/agents/renderers/opencode-renderer.js +100 -0
- package/dist/agents/renderers/provider-adapter.js +6 -0
- package/dist/agents/repositories/agents.js +185 -0
- package/dist/agents/repositories/manager-profile-overlays.js +81 -0
- package/dist/agents/schema.js +1 -0
- package/dist/cli/dashboard-operational-read-models.js +153 -0
- package/dist/cli/dashboard-renderer.js +109 -0
- package/dist/cli/dashboard-screen-renderers.js +332 -0
- package/dist/cli/dashboard-tui-read-model.js +71 -0
- package/dist/cli/dashboard-tui-state.js +218 -0
- package/dist/cli/dispatcher.js +2880 -0
- package/dist/cli/index.js +27 -0
- package/dist/cli/interactive-dashboard.js +29 -0
- package/dist/cli/mcp-start-path.js +21 -0
- package/dist/cli/setup-status-renderer.js +29 -0
- package/dist/cli/setup-wizard-read-model.js +56 -0
- package/dist/cli/setup-wizard-renderer.js +148 -0
- package/dist/cli/setup-wizard-state.js +82 -0
- package/dist/cli/tui-render-helpers.js +192 -0
- package/dist/export/redaction.js +71 -0
- package/dist/harness/tools/agents.js +245 -0
- package/dist/harness/tools/memory.js +29 -0
- package/dist/mcp/client-install-opencode-contract.js +227 -0
- package/dist/mcp/client-install-opencode.js +194 -0
- package/dist/mcp/client-setup-preview.js +38 -0
- package/dist/mcp/control-plane.js +175 -0
- package/dist/mcp/doctor.js +193 -0
- package/dist/mcp/index.js +10 -0
- package/dist/mcp/opencode-default-agent-config.js +156 -0
- package/dist/mcp/opencode-visibility.js +102 -0
- package/dist/mcp/schema.js +234 -0
- package/dist/mcp/stdio-server.js +56 -0
- package/dist/mcp/validation.js +761 -0
- package/dist/memory/import/dry-run-planner.js +58 -0
- package/dist/memory/import/index.js +3 -0
- package/dist/memory/import/observation-writer.js +220 -0
- package/dist/memory/import/package.js +178 -0
- package/dist/memory/memory-service.js +126 -0
- package/dist/memory/repositories/artifacts.js +41 -0
- package/dist/memory/repositories/observations.js +133 -0
- package/dist/memory/repositories/sessions.js +105 -0
- package/dist/memory/repositories/traces.js +58 -0
- package/dist/memory/schema.js +1 -0
- package/dist/memory/search.js +11 -0
- package/dist/memory/sqlite/database.js +97 -0
- package/dist/memory/sqlite/migrations/001_initial.sql +128 -0
- package/dist/memory/sqlite/migrations/002_observation_revisions.sql +14 -0
- package/dist/memory/sqlite/migrations/003_agent_registry.sql +26 -0
- package/dist/memory/sqlite/migrations/004_run_runtime.sql +62 -0
- package/dist/memory/sqlite/migrations/005_run_approvals.sql +20 -0
- package/dist/memory/sqlite/migrations/006_run_operation_attempts.sql +32 -0
- package/dist/memory/sqlite/migrations/007_abandoned_operation_attempts.sql +46 -0
- package/dist/memory/sqlite/migrations/008_run_execution_plan_events.sql +105 -0
- package/dist/memory/sqlite/migrations/009_multiple_operation_attempts.sql +73 -0
- package/dist/memory/sqlite/migrations/010_skill_registry.sql +66 -0
- package/dist/memory/sqlite/migrations/011_skill_usage_resolution_outcomes.sql +21 -0
- package/dist/memory/sqlite/migrations/012_skill_improvement_proposals.sql +37 -0
- package/dist/memory/sqlite/migrations/013_skill_evaluation_scenarios.sql +43 -0
- package/dist/memory/sqlite/migrations/014_manager_profile_overlays.sql +14 -0
- package/dist/memory/storage-paths.js +72 -0
- package/dist/orchestrator/natural-language-planner.js +191 -0
- package/dist/orchestrator/schema.js +1 -0
- package/dist/permissions/index.js +2 -0
- package/dist/permissions/policy-evaluator.js +109 -0
- package/dist/permissions/schema.js +1 -0
- package/dist/providers/opencode/injection-preview.js +134 -0
- package/dist/providers/opencode/manager-payload.js +129 -0
- package/dist/runs/execution-planning.js +117 -0
- package/dist/runs/operation-execution.js +1 -0
- package/dist/runs/operation-retry.js +124 -0
- package/dist/runs/repositories/runs.js +611 -0
- package/dist/runs/run-insights.js +145 -0
- package/dist/runs/run-service.js +713 -0
- package/dist/runs/run-snapshot-export-service.js +31 -0
- package/dist/runs/sandbox-process-execution.js +218 -0
- package/dist/runs/sandbox-worktree-planning.js +59 -0
- package/dist/runs/schema.js +1 -0
- package/dist/sdd/artifact-portability-service.js +118 -0
- package/dist/sdd/schema.js +17 -0
- package/dist/sdd/sdd-workflow-service.js +217 -0
- package/dist/setup/backup-rollback-service.js +76 -0
- package/dist/setup/index.js +3 -0
- package/dist/setup/providers/antigravity-setup-adapter.js +18 -0
- package/dist/setup/providers/claude-setup-adapter.js +30 -0
- package/dist/setup/providers/custom-setup-adapter.js +18 -0
- package/dist/setup/providers/index.js +6 -0
- package/dist/setup/providers/opencode-setup-adapter.js +104 -0
- package/dist/setup/providers/provider-setup-adapter.js +15 -0
- package/dist/setup/providers/provider-setup-registry.js +11 -0
- package/dist/setup/schema.js +1 -0
- package/dist/setup/setup-defaults.js +11 -0
- package/dist/setup/setup-lifecycle-service.js +175 -0
- package/dist/setup/setup-plan.js +105 -0
- package/dist/skills/repositories/skill-evaluation-scenarios.js +289 -0
- package/dist/skills/repositories/skill-improvement-proposals.js +288 -0
- package/dist/skills/repositories/skills.js +430 -0
- package/dist/skills/schema.js +1 -0
- package/dist/skills/skill-payload.js +94 -0
- package/dist/skills/skill-registry-service.js +92 -0
- package/dist/skills/skill-resolver.js +191 -0
- package/dist/workflows/command-allowlist-adapter.js +70 -0
- package/dist/workflows/schema.js +4 -0
- package/dist/workflows/workflow-executor.js +345 -0
- package/dist/workflows/workflow-registry.js +66 -0
- package/docs/architecture.md +698 -0
- package/docs/cli.md +741 -0
- package/docs/funcionamiento-del-sistema.md +868 -0
- package/docs/harness-gap-analysis.md +229 -0
- package/docs/prd.md +372 -0
- package/package.json +57 -0
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
# Harness Systems Gap Analysis
|
|
2
|
+
|
|
3
|
+
This research compares current agent harness patterns against the `vgxness` PRD and identifies what the product still needs before it can become a serious local-first SDD harness.
|
|
4
|
+
|
|
5
|
+
## Executive summary
|
|
6
|
+
|
|
7
|
+
The current PRD has the right product direction: local-first, provider-agnostic, memory-backed, SDD-first, and agent/subagent aware.
|
|
8
|
+
|
|
9
|
+
What is still missing is the **runtime contract**: permissions, sandboxing, run state, provider adapters, observability, evaluation, and artifact portability. Without these, `vgxness` risks becoming “memory + prompts” instead of a real harness.
|
|
10
|
+
|
|
11
|
+
## Systems reviewed
|
|
12
|
+
|
|
13
|
+
| System | Relevant lessons for `vgxness` |
|
|
14
|
+
|---|---|
|
|
15
|
+
| Anthropic agent patterns | Keep workflows simple and composable; distinguish predictable workflows from autonomous agents; invest heavily in tool design and transparency. |
|
|
16
|
+
| Claude Code subagents | Subagents need isolated context, explicit tools, permissions, model selection, memory scopes, lifecycle hooks, and clear delegation descriptions. |
|
|
17
|
+
| OpenCode agents | Provider/tool configuration should support primary agents, subagents, per-agent permissions, model routing, task permissions, and markdown/JSON definitions. |
|
|
18
|
+
| OpenAI Agents SDK | Useful primitives: agents, handoffs, agents-as-tools, guardrails, sessions, human-in-the-loop, tracing, MCP, sandbox agents, and resumable workspaces. |
|
|
19
|
+
| LangGraph | Durable execution, checkpoints, streaming, human-in-the-loop, stateful workflows, memory, and deep traces matter for long-running agents. |
|
|
20
|
+
| AutoGen | Multi-agent systems benefit from layers: simple AgentChat, lower-level event-driven Core, extensions, distributed runtimes, and UI/studio tooling. |
|
|
21
|
+
| CrewAI | Productized multi-agent systems commonly include agents, crews, flows, tasks, memory, knowledge, guardrails, observability, persistence, and resume. |
|
|
22
|
+
|
|
23
|
+
## What the PRD already covers well
|
|
24
|
+
|
|
25
|
+
- Local-first memory.
|
|
26
|
+
- Project and personal/global memory scopes.
|
|
27
|
+
- SDD-first workflow.
|
|
28
|
+
- Agent and subagent registry from the MVP.
|
|
29
|
+
- Provider-agnostic model with OpenCode/Claude Code adapters.
|
|
30
|
+
- CLI for setup/configuration and integrations for day-to-day usage.
|
|
31
|
+
- Cloud sync and team workflows correctly deferred until later.
|
|
32
|
+
|
|
33
|
+
## Missing or underdefined areas
|
|
34
|
+
|
|
35
|
+
### 1. Runtime/run model
|
|
36
|
+
|
|
37
|
+
`vgxness` needs a first-class concept of a **run**.
|
|
38
|
+
|
|
39
|
+
Minimum fields:
|
|
40
|
+
|
|
41
|
+
- run id
|
|
42
|
+
- project id/path
|
|
43
|
+
- user intent
|
|
44
|
+
- phase/workflow
|
|
45
|
+
- selected agent/subagent
|
|
46
|
+
- provider adapter
|
|
47
|
+
- model
|
|
48
|
+
- tool calls
|
|
49
|
+
- artifacts read/written
|
|
50
|
+
- memory reads/writes
|
|
51
|
+
- approvals
|
|
52
|
+
- verification evidence
|
|
53
|
+
- final status
|
|
54
|
+
|
|
55
|
+
Why it matters: without runs, the harness cannot resume, debug, audit, or explain agent behavior.
|
|
56
|
+
|
|
57
|
+
### 2. Permission and sandbox model
|
|
58
|
+
|
|
59
|
+
The PRD mentions agents and integrations, but not the security boundary.
|
|
60
|
+
|
|
61
|
+
Needed capabilities:
|
|
62
|
+
|
|
63
|
+
- Read/write/shell/network/git/memory permission categories.
|
|
64
|
+
- Per-agent and per-tool permissions.
|
|
65
|
+
- Human approval gates for destructive, external, or privileged operations.
|
|
66
|
+
- Workspace boundary enforcement.
|
|
67
|
+
- Optional sandbox/worktree strategy for implementation agents.
|
|
68
|
+
|
|
69
|
+
This is NOT optional. A harness that can run agents without strong permissions is a loaded weapon.
|
|
70
|
+
|
|
71
|
+
### 3. Provider adapter contract
|
|
72
|
+
|
|
73
|
+
Provider-agnostic intent is correct, but the PRD needs an adapter interface.
|
|
74
|
+
|
|
75
|
+
Each adapter should declare:
|
|
76
|
+
|
|
77
|
+
- supported agent definition fields
|
|
78
|
+
- supported permissions
|
|
79
|
+
- supported memory injection modes
|
|
80
|
+
- supported subagent/task model
|
|
81
|
+
- supported hooks/lifecycle events
|
|
82
|
+
- config file locations
|
|
83
|
+
- limitations
|
|
84
|
+
- export/render format
|
|
85
|
+
|
|
86
|
+
This prevents `vgxness` from pretending all tools support the same features.
|
|
87
|
+
|
|
88
|
+
### 4. Agent definition schema
|
|
89
|
+
|
|
90
|
+
The agent registry needs a neutral schema, not just “store agents”.
|
|
91
|
+
|
|
92
|
+
Suggested minimum schema:
|
|
93
|
+
|
|
94
|
+
- name
|
|
95
|
+
- description/delegation trigger
|
|
96
|
+
- role/system instructions
|
|
97
|
+
- mode: primary/subagent/workflow-phase
|
|
98
|
+
- capabilities
|
|
99
|
+
- allowed tools
|
|
100
|
+
- denied tools
|
|
101
|
+
- model preference
|
|
102
|
+
- memory scopes
|
|
103
|
+
- SDD phases supported
|
|
104
|
+
- max steps/turns
|
|
105
|
+
- required approvals
|
|
106
|
+
- adapter overrides
|
|
107
|
+
|
|
108
|
+
### 5. Tool/ACI design
|
|
109
|
+
|
|
110
|
+
Agent-computer interface design is a product feature.
|
|
111
|
+
|
|
112
|
+
Needed:
|
|
113
|
+
|
|
114
|
+
- Tool descriptions optimized for model usage.
|
|
115
|
+
- Safe input schemas.
|
|
116
|
+
- Examples and edge cases per tool.
|
|
117
|
+
- Clear boundaries between similar tools.
|
|
118
|
+
- Tool-level tests/evals to catch misuse.
|
|
119
|
+
|
|
120
|
+
Bad tools create bad agents. This is where a lot of harnesses quietly fail.
|
|
121
|
+
|
|
122
|
+
### 6. Durable execution and resume
|
|
123
|
+
|
|
124
|
+
SDD creates long-running work. Long-running work needs checkpoints.
|
|
125
|
+
|
|
126
|
+
Needed:
|
|
127
|
+
|
|
128
|
+
- run checkpoints
|
|
129
|
+
- phase checkpoints
|
|
130
|
+
- apply-progress merge rules
|
|
131
|
+
- resumable interrupted runs
|
|
132
|
+
- idempotency expectations for tools
|
|
133
|
+
- failure classification: blocked, failed, needs-human, cancelled, completed
|
|
134
|
+
|
|
135
|
+
### 7. Observability and debugging
|
|
136
|
+
|
|
137
|
+
The product needs traces, not just logs.
|
|
138
|
+
|
|
139
|
+
Minimum trace entities:
|
|
140
|
+
|
|
141
|
+
- run
|
|
142
|
+
- phase
|
|
143
|
+
- agent/subagent invocation
|
|
144
|
+
- tool call
|
|
145
|
+
- memory operation
|
|
146
|
+
- artifact operation
|
|
147
|
+
- approval decision
|
|
148
|
+
- verification command/result
|
|
149
|
+
|
|
150
|
+
Nice-to-have later:
|
|
151
|
+
|
|
152
|
+
- token/cost tracking
|
|
153
|
+
- model latency
|
|
154
|
+
- failure heatmap
|
|
155
|
+
- timeline UI/export
|
|
156
|
+
|
|
157
|
+
### 8. Evaluation and quality gates
|
|
158
|
+
|
|
159
|
+
The PRD has success criteria, but not evals.
|
|
160
|
+
|
|
161
|
+
Needed MVP evals:
|
|
162
|
+
|
|
163
|
+
- agent resolution chooses the expected agent
|
|
164
|
+
- SDD artifact chain remains complete
|
|
165
|
+
- memory upsert/revision behavior is durable
|
|
166
|
+
- provider adapter renders valid config
|
|
167
|
+
- permission model blocks unsafe operations
|
|
168
|
+
- resume restores the expected run state
|
|
169
|
+
|
|
170
|
+
### 9. Artifact portability
|
|
171
|
+
|
|
172
|
+
Memory-only artifacts are fast, but PRD/review workflows need portability.
|
|
173
|
+
|
|
174
|
+
Needed:
|
|
175
|
+
|
|
176
|
+
- export SDD artifacts to markdown/json
|
|
177
|
+
- import artifacts back into memory
|
|
178
|
+
- snapshot a run for debugging or sharing
|
|
179
|
+
- redact sensitive data during export
|
|
180
|
+
|
|
181
|
+
### 10. CLI surface definition
|
|
182
|
+
|
|
183
|
+
The PRD says CLI, but the first command set is still open.
|
|
184
|
+
|
|
185
|
+
Candidate MVP commands:
|
|
186
|
+
|
|
187
|
+
- `vgx init`
|
|
188
|
+
- `vgx memory search|get|save|update`
|
|
189
|
+
- `vgx agent list|add|render|validate`
|
|
190
|
+
- `vgx sdd new|continue|status|archive`
|
|
191
|
+
- `vgx run list|show|resume`
|
|
192
|
+
- `vgx adapter doctor|render`
|
|
193
|
+
|
|
194
|
+
## Recommended MVP additions to PRD
|
|
195
|
+
|
|
196
|
+
Add these as explicit MVP requirements:
|
|
197
|
+
|
|
198
|
+
1. **Run lifecycle model** — every agentic operation is captured as a resumable/auditable run.
|
|
199
|
+
2. **Permission model** — per-agent tool permissions with human approval gates.
|
|
200
|
+
3. **Provider adapter contract** — adapters translate neutral `vgxness` definitions into provider-specific configs.
|
|
201
|
+
4. **Agent schema** — neutral registry schema for agents/subagents/workflow-phase agents.
|
|
202
|
+
5. **Trace model** — structured trace records for runs, tools, memory, artifacts, approvals, and verification.
|
|
203
|
+
6. **Artifact export/import** — SDD and memory artifacts can be exported for review/debugging.
|
|
204
|
+
7. **Evaluation harness** — tests/evals for agent resolution, adapters, permissions, memory, and resume.
|
|
205
|
+
|
|
206
|
+
## Suggested next SDD change
|
|
207
|
+
|
|
208
|
+
Create a new SDD change named `harness-runtime-foundation`.
|
|
209
|
+
|
|
210
|
+
Scope it narrowly:
|
|
211
|
+
|
|
212
|
+
- define run lifecycle schema
|
|
213
|
+
- define agent registry schema
|
|
214
|
+
- define permission categories
|
|
215
|
+
- define provider adapter interface
|
|
216
|
+
- add CLI validation/render skeleton
|
|
217
|
+
- add tests for schemas and adapter rendering
|
|
218
|
+
|
|
219
|
+
Do **not** implement full cloud sync, distributed agents, web UI, or team workflows yet.
|
|
220
|
+
|
|
221
|
+
## Sources
|
|
222
|
+
|
|
223
|
+
- Anthropic: Building effective agents
|
|
224
|
+
- Claude Code: subagents documentation
|
|
225
|
+
- OpenCode: agents documentation
|
|
226
|
+
- OpenAI Agents SDK documentation
|
|
227
|
+
- LangGraph overview
|
|
228
|
+
- Microsoft AutoGen documentation
|
|
229
|
+
- CrewAI documentation
|
package/docs/prd.md
ADDED
|
@@ -0,0 +1,372 @@
|
|
|
1
|
+
# vgxness PRD
|
|
2
|
+
|
|
3
|
+
`vgxness` is a local-first, Gentle-AI-like agentic harness for advanced individual developers who want to configure AI coding tools, coordinate agents/subagents, use persistent memory, and run SDD workflows with explicit runtime state.
|
|
4
|
+
|
|
5
|
+
## Product thesis
|
|
6
|
+
|
|
7
|
+
AI coding tools are powerful, but their work is often fragmented: context is lost between sessions, agents are configured differently per tool, and substantial changes lack a repeatable planning/verification workflow.
|
|
8
|
+
|
|
9
|
+
`vgxness` solves this by providing a provider-agnostic harness layer with persistent memory, agent/subagent configuration, model/profile management, MCP tools, an intuitive CLI/TUI, and SDD-first development flows.
|
|
10
|
+
|
|
11
|
+
The product intentionally overlaps with systems such as Gentle-AI and `gentle-pi`: it should configure AI agents, install/sync SDD assets, wire memory, and support per-phase agent/model behavior. Its differentiator is that the workflow must not live only in prompts or agent instructions. `vgxness` should also maintain a **verifiable runtime control plane**: explicit phase state, readiness gates, artifacts, runs, approvals, checkpoints, and audit trails.
|
|
12
|
+
|
|
13
|
+
## Reference positioning
|
|
14
|
+
|
|
15
|
+
| Reference system | What it proves | What `vgxness` should learn | Where `vgxness` differentiates |
|
|
16
|
+
|---|---|---|---|
|
|
17
|
+
| Gentle-AI | AI coding tools can be upgraded through installed prompts, skills, SDD agents, model profiles, permissions, backups, and verification. | Installer/sync discipline, provider adapters, per-phase model routing, safe file merging, backups, golden tests. | Add a first-class local runtime state engine instead of relying only on configured agent behavior. |
|
|
18
|
+
| `gentle-pi` | Pi can own persona, SDD agents/chains, strict TDD support, safety rules, model assignment, and Engram wiring as packages. | Package-owned runtime behavior, startup asset checks, project-local SDD assets, child-agent communication. | Expose provider-neutral APIs for phase readiness, artifact state, run history, approvals, and resumable checkpoints across tools. |
|
|
19
|
+
| Engram | Persistent memory can preserve decisions, discoveries, prompts, SDD artifacts, and sessions across agent runs. | Memory as infrastructure, project detection, topic-key upserts, revisions, session summaries. | Treat memory as one backend behind explicit workflow/run state, not the only source of truth. |
|
|
20
|
+
|
|
21
|
+
## Primary user
|
|
22
|
+
|
|
23
|
+
| Segment | Priority | Description |
|
|
24
|
+
|---|---:|---|
|
|
25
|
+
| Advanced individual developer | MVP | A power user working with AI coding tools who needs durable context, structured workflows, and reusable agent configuration. |
|
|
26
|
+
| Development teams | Future | Teams that need shared workflows, governance, permissions, audit trails, PR coordination, and cloud sync. |
|
|
27
|
+
|
|
28
|
+
## MVP scope
|
|
29
|
+
|
|
30
|
+
The MVP must prove that a single developer can use `vgxness` locally to manage agentic development work with less context loss and more reliable execution. It must feel comparable to a modern agent ecosystem harness while proving one stronger claim: SDD state can be queried, gated, resumed, and audited through product APIs instead of depending only on LLM discipline.
|
|
31
|
+
|
|
32
|
+
### 1. Local-first memory
|
|
33
|
+
|
|
34
|
+
The product must support persistent memory without requiring a cloud account.
|
|
35
|
+
|
|
36
|
+
Minimum capabilities:
|
|
37
|
+
|
|
38
|
+
- Project memory for repository-specific decisions, SDD artifacts, sessions, progress, and codebase discoveries.
|
|
39
|
+
- Personal/global memory for user preferences, reusable patterns, standards, and cross-project learnings.
|
|
40
|
+
- Search, read, create, update, and topic-key upsert flows.
|
|
41
|
+
- Durable revisions for evolving observations and artifacts.
|
|
42
|
+
- Traceability for why memory was created or updated.
|
|
43
|
+
|
|
44
|
+
### 2. SDD-first workflow
|
|
45
|
+
|
|
46
|
+
The product must make Spec-Driven Development the primary development path.
|
|
47
|
+
|
|
48
|
+
Minimum canonical workflow:
|
|
49
|
+
|
|
50
|
+
1. Explore
|
|
51
|
+
2. Proposal
|
|
52
|
+
3. Spec
|
|
53
|
+
4. Design
|
|
54
|
+
5. Tasks
|
|
55
|
+
6. Apply progress
|
|
56
|
+
7. Verify
|
|
57
|
+
8. Archive
|
|
58
|
+
|
|
59
|
+
Canonical artifact phase keys use `proposal` and `apply-progress`. User-facing commands may later expose friendlier verbs such as “propose” or “apply,” but stored workflow state must use one canonical vocabulary.
|
|
60
|
+
|
|
61
|
+
Minimum capabilities:
|
|
62
|
+
|
|
63
|
+
- Persist SDD artifacts in memory.
|
|
64
|
+
- Expose phase status, readiness, and missing prerequisites through CLI/API/MCP calls.
|
|
65
|
+
- Block or warn before a phase runs without required artifacts or approvals.
|
|
66
|
+
- Continue interrupted changes from stored state.
|
|
67
|
+
- Track apply progress without overwriting previous work.
|
|
68
|
+
- Support review workload planning for large changes.
|
|
69
|
+
- Keep verification results linked to specs, tasks, runs, and evidence.
|
|
70
|
+
|
|
71
|
+
### 3. Agent and subagent registry
|
|
72
|
+
|
|
73
|
+
The product must include agent/subagent registration and configuration from the start.
|
|
74
|
+
|
|
75
|
+
Minimum capabilities:
|
|
76
|
+
|
|
77
|
+
- Define agents and subagents in a provider-neutral model.
|
|
78
|
+
- Describe capabilities, instructions, permissions, memory access, and compatible workflows.
|
|
79
|
+
- Resolve which agents should be used for a task.
|
|
80
|
+
- Render or preview provider-specific agent/subagent configuration from neutral definitions.
|
|
81
|
+
- Support future provider adapters without changing the core domain model.
|
|
82
|
+
|
|
83
|
+
### 4. Skill registry and controlled self-improvement
|
|
84
|
+
|
|
85
|
+
The product must manage skills as first-class reusable assets, not just static prompt files.
|
|
86
|
+
|
|
87
|
+
Minimum capabilities:
|
|
88
|
+
|
|
89
|
+
- Register skills independently from agents and workflows.
|
|
90
|
+
- Attach skills to agents, subagents, SDD phases, or provider adapters.
|
|
91
|
+
- Track skill versions, sources, compatibility, and usage history.
|
|
92
|
+
- Evaluate whether a skill helped or failed during a run.
|
|
93
|
+
- Propose skill improvements from traces, failures, repeated corrections, and memory discoveries.
|
|
94
|
+
- Generate reviewable skill diffs instead of silently mutating active skills.
|
|
95
|
+
- Require human approval before a proposed skill improvement becomes active.
|
|
96
|
+
|
|
97
|
+
Self-improvement loop:
|
|
98
|
+
|
|
99
|
+
1. Observe runs, traces, failures, and repeated user corrections.
|
|
100
|
+
2. Detect a candidate improvement.
|
|
101
|
+
3. Draft a skill update as a versioned proposal.
|
|
102
|
+
4. Evaluate the proposal against relevant scenarios.
|
|
103
|
+
5. Ask for human approval.
|
|
104
|
+
6. Activate the approved version and preserve rollback history.
|
|
105
|
+
|
|
106
|
+
### 5. MCP server, CLI, TUI, and integrations
|
|
107
|
+
|
|
108
|
+
The product must provide three first-class interfaces over the same local core:
|
|
109
|
+
|
|
110
|
+
- **MCP server** for agent-facing workflow/state tools.
|
|
111
|
+
- **CLI** for scriptable setup, inspection, and automation.
|
|
112
|
+
- **TUI** for guided installation, onboarding, configuration, and visual status.
|
|
113
|
+
|
|
114
|
+
The core rule: MCP, CLI, and TUI must call the same services and storage. They must not each reimplement workflow rules.
|
|
115
|
+
|
|
116
|
+
#### MCP server
|
|
117
|
+
|
|
118
|
+
The MCP server is the main integration surface for AI coding tools. It should expose safe, typed tools that let agents query and update product state without editing config files directly.
|
|
119
|
+
|
|
120
|
+
Minimum MCP capabilities:
|
|
121
|
+
|
|
122
|
+
- Start a local MCP server through `vgx mcp start` or an installed provider config.
|
|
123
|
+
- Install MCP integration for supported tools through guided setup.
|
|
124
|
+
- Expose SDD status, readiness, next-phase, and artifact operations.
|
|
125
|
+
- Expose run start/checkpoint/finalize and approval-request operations.
|
|
126
|
+
- Expose agent resolution and skill payload operations.
|
|
127
|
+
- Return actionable blocked states instead of relying on prompt interpretation.
|
|
128
|
+
|
|
129
|
+
Candidate MCP tools:
|
|
130
|
+
|
|
131
|
+
```text
|
|
132
|
+
vgxness_sdd_status
|
|
133
|
+
vgxness_sdd_next
|
|
134
|
+
vgxness_sdd_ready
|
|
135
|
+
vgxness_sdd_save_artifact
|
|
136
|
+
vgxness_run_start
|
|
137
|
+
vgxness_run_checkpoint
|
|
138
|
+
vgxness_run_request_approval
|
|
139
|
+
vgxness_agent_resolve
|
|
140
|
+
vgxness_skill_payload
|
|
141
|
+
vgxness_profile_get
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
#### CLI
|
|
145
|
+
|
|
146
|
+
The CLI must be intuitive, predictable, and useful without reading internal docs.
|
|
147
|
+
|
|
148
|
+
Minimum CLI capabilities:
|
|
149
|
+
|
|
150
|
+
- Initialize `vgxness` in a project.
|
|
151
|
+
- Install/sync managed agent assets without clobbering user-owned config.
|
|
152
|
+
- Install, inspect, and remove MCP integration for supported tools.
|
|
153
|
+
- Configure memory scopes and storage.
|
|
154
|
+
- Configure agents and subagents.
|
|
155
|
+
- Configure per-phase model/profile assignments.
|
|
156
|
+
- Inspect SDD artifacts and memory.
|
|
157
|
+
- Export or debug product state.
|
|
158
|
+
- Verify installation/configuration health and report rollback options.
|
|
159
|
+
|
|
160
|
+
Candidate CLI shape:
|
|
161
|
+
|
|
162
|
+
```bash
|
|
163
|
+
vgx init
|
|
164
|
+
vgx doctor
|
|
165
|
+
vgx status
|
|
166
|
+
vgx mcp install opencode
|
|
167
|
+
vgx mcp status
|
|
168
|
+
vgx sdd status <change>
|
|
169
|
+
vgx sdd next <change>
|
|
170
|
+
vgx profiles list
|
|
171
|
+
vgx profiles set apply <model>
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
#### TUI
|
|
175
|
+
|
|
176
|
+
The TUI is the default human onboarding and local operations surface. It should optimize for the next meaningful decision, not decorative metrics.
|
|
177
|
+
|
|
178
|
+
Minimum TUI screens:
|
|
179
|
+
|
|
180
|
+
- **Setup** — detect environment/project state, choose providers, inspect agents, run verification, and show copy-only external setup guidance without silent provider writes.
|
|
181
|
+
- **Workflows** — available workflow paths and safe next actions.
|
|
182
|
+
- **Runs** — run history, current run state, blockers, and resumable execution.
|
|
183
|
+
- **Approvals** — pending permission requests and safety gates.
|
|
184
|
+
- **Agents** — registered agents/subagents, attached skills, model/profile routing, and provider compatibility.
|
|
185
|
+
- **SDD** — change list, phase progress, missing artifacts, next ready phase, blockers, and resumable runs.
|
|
186
|
+
- **Doctor** — health checks, broken config, MCP availability, memory status, adapter issues, and suggested fixes.
|
|
187
|
+
- **Settings** — local preferences and configuration status without silent provider writes.
|
|
188
|
+
|
|
189
|
+
TUI state requirements:
|
|
190
|
+
|
|
191
|
+
- Loading, empty, error, success, blocked, and permission states must be explicit and actionable.
|
|
192
|
+
- Keyboard navigation and visible focus are required.
|
|
193
|
+
- The dashboard TUI is read-only; provider config writes/install/apply are external-only, require explicit confirmation outside the dashboard, and are not run by dashboard flows.
|
|
194
|
+
|
|
195
|
+
#### Installation experience
|
|
196
|
+
|
|
197
|
+
Installation should be step-based and avoid manual JSON editing for the happy path.
|
|
198
|
+
|
|
199
|
+
Target flow:
|
|
200
|
+
|
|
201
|
+
```text
|
|
202
|
+
1. Install binary
|
|
203
|
+
2. Run `vgx`
|
|
204
|
+
3. Select AI tools
|
|
205
|
+
4. Install MCP integration
|
|
206
|
+
5. Configure memory
|
|
207
|
+
6. Choose SDD mode/profile
|
|
208
|
+
7. Run doctor
|
|
209
|
+
8. Open the AI tool and start working
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
The same flow must be available through CLI flags for automation and CI-friendly dry runs.
|
|
213
|
+
|
|
214
|
+
Initial integration targets:
|
|
215
|
+
|
|
216
|
+
- OpenCode
|
|
217
|
+
- Claude Code
|
|
218
|
+
|
|
219
|
+
Pi/`gentle-pi` is a design reference and future adapter target, not part of the first integration target list unless the MVP scope is explicitly expanded.
|
|
220
|
+
|
|
221
|
+
The integration model must remain provider-agnostic. Tool-specific behavior belongs in adapters, not in the core domain.
|
|
222
|
+
|
|
223
|
+
### 6. Harness runtime foundation
|
|
224
|
+
|
|
225
|
+
The product must model agentic work as explicit, auditable, resumable runs.
|
|
226
|
+
|
|
227
|
+
Minimum capabilities:
|
|
228
|
+
|
|
229
|
+
- Create a run record for each meaningful agentic operation.
|
|
230
|
+
- Track user intent, project, workflow phase, selected agent, provider adapter, model, artifacts, memory operations, approvals, tool calls, verification evidence, and final status.
|
|
231
|
+
- Support interrupted work through checkpoints and resumable run state.
|
|
232
|
+
- Classify run outcomes: completed, failed, blocked, cancelled, needs-human, or partially-complete.
|
|
233
|
+
- Keep run state separate from long-term memory and SDD artifacts.
|
|
234
|
+
|
|
235
|
+
### 7. Permission and sandbox model
|
|
236
|
+
|
|
237
|
+
The product must treat agent permissions as a core safety feature, not as adapter-specific behavior.
|
|
238
|
+
|
|
239
|
+
Minimum capabilities:
|
|
240
|
+
|
|
241
|
+
- Define permission categories for read, write/edit, shell, network, git, memory, external directories, and provider-specific tools.
|
|
242
|
+
- Support per-agent and per-subagent permissions.
|
|
243
|
+
- Require human approval for destructive, external, privileged, or ambiguous operations.
|
|
244
|
+
- Enforce workspace boundaries.
|
|
245
|
+
- Support future isolated execution strategies, such as worktrees or sandboxes, for implementation agents.
|
|
246
|
+
|
|
247
|
+
### 8. Provider adapter contract
|
|
248
|
+
|
|
249
|
+
The product must translate `vgxness` concepts into provider-specific configuration through adapters.
|
|
250
|
+
|
|
251
|
+
Each adapter must declare:
|
|
252
|
+
|
|
253
|
+
- Supported agent definition fields.
|
|
254
|
+
- Supported skill definition and injection modes.
|
|
255
|
+
- Supported permission categories.
|
|
256
|
+
- Supported memory injection modes.
|
|
257
|
+
- Supported subagent/task delegation model.
|
|
258
|
+
- Supported hooks or lifecycle events.
|
|
259
|
+
- Config file locations.
|
|
260
|
+
- Known limitations.
|
|
261
|
+
- Render/export format.
|
|
262
|
+
|
|
263
|
+
Adapters must not redefine the core product model.
|
|
264
|
+
|
|
265
|
+
### 9. Observability and evaluation
|
|
266
|
+
|
|
267
|
+
The product must make agentic behavior inspectable and testable.
|
|
268
|
+
|
|
269
|
+
Minimum observability capabilities:
|
|
270
|
+
|
|
271
|
+
- Structured traces for runs, phases, agent invocations, tool calls, memory operations, artifact operations, approvals, and verification commands.
|
|
272
|
+
- Debuggable timelines for completed or failed runs.
|
|
273
|
+
- Redaction strategy for sensitive data before export or sharing.
|
|
274
|
+
|
|
275
|
+
Minimum evaluation capabilities:
|
|
276
|
+
|
|
277
|
+
- Agent resolution chooses the expected agent for representative tasks.
|
|
278
|
+
- Skill resolution injects the expected reusable guidance for representative tasks.
|
|
279
|
+
- Skill improvement proposals are reviewable, versioned, and gated by approval.
|
|
280
|
+
- SDD artifact chains remain complete and linked.
|
|
281
|
+
- Memory revision/upsert behavior remains durable.
|
|
282
|
+
- Provider adapters render valid config.
|
|
283
|
+
- Permission rules block unsafe operations.
|
|
284
|
+
- Resume restores expected run state.
|
|
285
|
+
|
|
286
|
+
### 10. Artifact portability
|
|
287
|
+
|
|
288
|
+
The product must support memory-first operation without trapping artifacts inside memory only.
|
|
289
|
+
|
|
290
|
+
Minimum capabilities:
|
|
291
|
+
|
|
292
|
+
- Export SDD artifacts to Markdown or JSON.
|
|
293
|
+
- Import exported artifacts back into memory.
|
|
294
|
+
- Snapshot runs for debugging or review.
|
|
295
|
+
- Redact sensitive data during export.
|
|
296
|
+
|
|
297
|
+
## Non-goals for MVP
|
|
298
|
+
|
|
299
|
+
- Cloud sync.
|
|
300
|
+
- Multi-user collaboration.
|
|
301
|
+
- Team permissions and governance.
|
|
302
|
+
- Web dashboard.
|
|
303
|
+
- Hosted memory service.
|
|
304
|
+
- Marketplace for agents.
|
|
305
|
+
- Full provider parity across every AI coding tool.
|
|
306
|
+
- Distributed multi-worker execution.
|
|
307
|
+
- Production cloud-hosted tracing.
|
|
308
|
+
- Fully autonomous, unreviewed skill mutation.
|
|
309
|
+
|
|
310
|
+
These are future expansion areas, not MVP blockers.
|
|
311
|
+
|
|
312
|
+
## Product principles
|
|
313
|
+
|
|
314
|
+
| Principle | Meaning |
|
|
315
|
+
|---|---|
|
|
316
|
+
| Local-first | The developer owns their data and can work offline. |
|
|
317
|
+
| Provider-agnostic | Core concepts must not be coupled to OpenCode, Claude Code, or any single vendor. |
|
|
318
|
+
| SDD-first | Substantial work should move through explicit planning, implementation, verification, and archive phases. |
|
|
319
|
+
| MCP-first for agents | AI tools should interact with the harness through typed MCP tools instead of prompt-only conventions. |
|
|
320
|
+
| Guided by default | The TUI should make setup and diagnosis understandable without manual config editing. |
|
|
321
|
+
| Memory as infrastructure | Memory is not a chat convenience; it is a durable product substrate. |
|
|
322
|
+
| Reviewable work | The system should help keep work units understandable, traceable, and verifiable. |
|
|
323
|
+
| Safe by default | Agents must operate inside explicit permissions and auditable boundaries. |
|
|
324
|
+
| Inspectable runtime | Every meaningful agentic operation should be explainable after it happens. |
|
|
325
|
+
| Controlled self-improvement | The system may propose better skills, but humans approve what becomes active. |
|
|
326
|
+
|
|
327
|
+
## Future roadmap
|
|
328
|
+
|
|
329
|
+
After the MVP proves local individual usage, expand toward:
|
|
330
|
+
|
|
331
|
+
- Cloud sync across machines.
|
|
332
|
+
- Team/shared memory spaces.
|
|
333
|
+
- Permissions, governance, and audit trails.
|
|
334
|
+
- PR and chained-PR coordination.
|
|
335
|
+
- Hosted dashboard for inspection and collaboration.
|
|
336
|
+
- Additional provider adapters.
|
|
337
|
+
- Import/export between local and cloud memory backends.
|
|
338
|
+
- Distributed agent workers.
|
|
339
|
+
- Hosted observability and evaluation dashboard.
|
|
340
|
+
- Skill marketplace or shared skill catalog.
|
|
341
|
+
|
|
342
|
+
## Success criteria
|
|
343
|
+
|
|
344
|
+
The MVP is successful when an advanced individual developer can:
|
|
345
|
+
|
|
346
|
+
- Initialize `vgxness` in a repo.
|
|
347
|
+
- Complete the guided TUI setup without manually editing provider config.
|
|
348
|
+
- Install and verify MCP integration for at least one AI coding tool.
|
|
349
|
+
- Configure personal and project memory.
|
|
350
|
+
- Register agents/subagents in a provider-neutral way.
|
|
351
|
+
- Register skills, attach them to agents/workflows, and review proposed improvements.
|
|
352
|
+
- Run an SDD change from idea through archive.
|
|
353
|
+
- Resume work across sessions without losing critical context.
|
|
354
|
+
- Use at least one external AI coding tool integration through an adapter.
|
|
355
|
+
- Use the CLI to inspect status, run doctor, and debug product state.
|
|
356
|
+
- Inspect a run timeline with agents, tools, memory/artifact operations, approvals, and verification evidence.
|
|
357
|
+
- Validate that unsafe operations are blocked or require approval.
|
|
358
|
+
|
|
359
|
+
## Open questions
|
|
360
|
+
|
|
361
|
+
- What is the first integration adapter: OpenCode or Claude Code?
|
|
362
|
+
- Should memory storage be per-repo by default, with global memory in a user-level directory?
|
|
363
|
+
- What config format should define agents/subagents?
|
|
364
|
+
- What config format should define skills and skill versions?
|
|
365
|
+
- Which skill improvement proposals should require approval versus automatic rejection?
|
|
366
|
+
- Which commands form the first public CLI surface?
|
|
367
|
+
- Which TUI framework should be used for the first implementation?
|
|
368
|
+
- Should the MCP server run only over stdio for MVP, or also support local HTTP later?
|
|
369
|
+
- What is the safest default install command and update channel?
|
|
370
|
+
- What privacy/export guarantees are required before public release?
|
|
371
|
+
- What is the first sandbox strategy: normal workspace, git worktree, or process/container isolation?
|
|
372
|
+
- What trace format should be used for local inspection and future cloud sync?
|
package/package.json
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "vgxness",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Alpha CLI and MCP control plane for guided AI-agent workflows, SDD, memory, and OpenCode setup.",
|
|
5
|
+
"license": "SEE LICENSE IN LICENSE",
|
|
6
|
+
"repository": {
|
|
7
|
+
"type": "git",
|
|
8
|
+
"url": "git+https://github.com/uzielvgx/vgxness.git"
|
|
9
|
+
},
|
|
10
|
+
"homepage": "https://github.com/uzielvgx/vgxness#readme",
|
|
11
|
+
"bugs": {
|
|
12
|
+
"url": "https://github.com/uzielvgx/vgxness/issues"
|
|
13
|
+
},
|
|
14
|
+
"keywords": [
|
|
15
|
+
"mcp",
|
|
16
|
+
"opencode",
|
|
17
|
+
"agents",
|
|
18
|
+
"sdd",
|
|
19
|
+
"cli"
|
|
20
|
+
],
|
|
21
|
+
"type": "module",
|
|
22
|
+
"scripts": {
|
|
23
|
+
"cli": "tsx src/cli/index.ts",
|
|
24
|
+
"build": "tsc -p tsconfig.build.json && node scripts/copy-migrations.mjs",
|
|
25
|
+
"prepack": "npm run build",
|
|
26
|
+
"package:dry-run": "npm pack --dry-run --json",
|
|
27
|
+
"package:release-check": "node scripts/validate-package-release.mjs",
|
|
28
|
+
"package:smoke:install": "node scripts/smoke-tarball-install.mjs",
|
|
29
|
+
"test": "node --import tsx --test \"test/**/*.test.ts\"",
|
|
30
|
+
"typecheck": "tsc --noEmit"
|
|
31
|
+
},
|
|
32
|
+
"bin": {
|
|
33
|
+
"vgxness": "dist/cli/index.js",
|
|
34
|
+
"vgx": "dist/cli/index.js"
|
|
35
|
+
},
|
|
36
|
+
"files": [
|
|
37
|
+
"dist",
|
|
38
|
+
"package.json",
|
|
39
|
+
"README.md",
|
|
40
|
+
"LICENSE",
|
|
41
|
+
"docs"
|
|
42
|
+
],
|
|
43
|
+
"dependencies": {
|
|
44
|
+
"@modelcontextprotocol/sdk": "^1.29.0",
|
|
45
|
+
"better-sqlite3": "^11.10.0",
|
|
46
|
+
"zod": "^4.4.3"
|
|
47
|
+
},
|
|
48
|
+
"devDependencies": {
|
|
49
|
+
"@types/better-sqlite3": "^7.6.13",
|
|
50
|
+
"@types/node": "^22.15.18",
|
|
51
|
+
"tsx": "^4.19.4",
|
|
52
|
+
"typescript": "^5.8.3"
|
|
53
|
+
},
|
|
54
|
+
"engines": {
|
|
55
|
+
"node": ">=22"
|
|
56
|
+
}
|
|
57
|
+
}
|