vgxness 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. package/LICENSE +9 -0
  2. package/README.md +110 -0
  3. package/dist/agents/agent-activation-service.js +144 -0
  4. package/dist/agents/agent-registry-service.js +46 -0
  5. package/dist/agents/agent-resolver.js +249 -0
  6. package/dist/agents/agent-seed-service.js +146 -0
  7. package/dist/agents/manager-profile-overlay-service.js +34 -0
  8. package/dist/agents/profile-model-routing.js +26 -0
  9. package/dist/agents/renderers/claude-renderer.js +98 -0
  10. package/dist/agents/renderers/index.js +16 -0
  11. package/dist/agents/renderers/json-renderer.js +87 -0
  12. package/dist/agents/renderers/opencode-renderer.js +100 -0
  13. package/dist/agents/renderers/provider-adapter.js +6 -0
  14. package/dist/agents/repositories/agents.js +185 -0
  15. package/dist/agents/repositories/manager-profile-overlays.js +81 -0
  16. package/dist/agents/schema.js +1 -0
  17. package/dist/cli/dashboard-operational-read-models.js +153 -0
  18. package/dist/cli/dashboard-renderer.js +109 -0
  19. package/dist/cli/dashboard-screen-renderers.js +332 -0
  20. package/dist/cli/dashboard-tui-read-model.js +71 -0
  21. package/dist/cli/dashboard-tui-state.js +218 -0
  22. package/dist/cli/dispatcher.js +2880 -0
  23. package/dist/cli/index.js +27 -0
  24. package/dist/cli/interactive-dashboard.js +29 -0
  25. package/dist/cli/mcp-start-path.js +21 -0
  26. package/dist/cli/setup-status-renderer.js +29 -0
  27. package/dist/cli/setup-wizard-read-model.js +56 -0
  28. package/dist/cli/setup-wizard-renderer.js +148 -0
  29. package/dist/cli/setup-wizard-state.js +82 -0
  30. package/dist/cli/tui-render-helpers.js +192 -0
  31. package/dist/export/redaction.js +71 -0
  32. package/dist/harness/tools/agents.js +245 -0
  33. package/dist/harness/tools/memory.js +29 -0
  34. package/dist/mcp/client-install-opencode-contract.js +227 -0
  35. package/dist/mcp/client-install-opencode.js +194 -0
  36. package/dist/mcp/client-setup-preview.js +38 -0
  37. package/dist/mcp/control-plane.js +175 -0
  38. package/dist/mcp/doctor.js +193 -0
  39. package/dist/mcp/index.js +10 -0
  40. package/dist/mcp/opencode-default-agent-config.js +156 -0
  41. package/dist/mcp/opencode-visibility.js +102 -0
  42. package/dist/mcp/schema.js +234 -0
  43. package/dist/mcp/stdio-server.js +56 -0
  44. package/dist/mcp/validation.js +761 -0
  45. package/dist/memory/import/dry-run-planner.js +58 -0
  46. package/dist/memory/import/index.js +3 -0
  47. package/dist/memory/import/observation-writer.js +220 -0
  48. package/dist/memory/import/package.js +178 -0
  49. package/dist/memory/memory-service.js +126 -0
  50. package/dist/memory/repositories/artifacts.js +41 -0
  51. package/dist/memory/repositories/observations.js +133 -0
  52. package/dist/memory/repositories/sessions.js +105 -0
  53. package/dist/memory/repositories/traces.js +58 -0
  54. package/dist/memory/schema.js +1 -0
  55. package/dist/memory/search.js +11 -0
  56. package/dist/memory/sqlite/database.js +97 -0
  57. package/dist/memory/sqlite/migrations/001_initial.sql +128 -0
  58. package/dist/memory/sqlite/migrations/002_observation_revisions.sql +14 -0
  59. package/dist/memory/sqlite/migrations/003_agent_registry.sql +26 -0
  60. package/dist/memory/sqlite/migrations/004_run_runtime.sql +62 -0
  61. package/dist/memory/sqlite/migrations/005_run_approvals.sql +20 -0
  62. package/dist/memory/sqlite/migrations/006_run_operation_attempts.sql +32 -0
  63. package/dist/memory/sqlite/migrations/007_abandoned_operation_attempts.sql +46 -0
  64. package/dist/memory/sqlite/migrations/008_run_execution_plan_events.sql +105 -0
  65. package/dist/memory/sqlite/migrations/009_multiple_operation_attempts.sql +73 -0
  66. package/dist/memory/sqlite/migrations/010_skill_registry.sql +66 -0
  67. package/dist/memory/sqlite/migrations/011_skill_usage_resolution_outcomes.sql +21 -0
  68. package/dist/memory/sqlite/migrations/012_skill_improvement_proposals.sql +37 -0
  69. package/dist/memory/sqlite/migrations/013_skill_evaluation_scenarios.sql +43 -0
  70. package/dist/memory/sqlite/migrations/014_manager_profile_overlays.sql +14 -0
  71. package/dist/memory/storage-paths.js +72 -0
  72. package/dist/orchestrator/natural-language-planner.js +191 -0
  73. package/dist/orchestrator/schema.js +1 -0
  74. package/dist/permissions/index.js +2 -0
  75. package/dist/permissions/policy-evaluator.js +109 -0
  76. package/dist/permissions/schema.js +1 -0
  77. package/dist/providers/opencode/injection-preview.js +134 -0
  78. package/dist/providers/opencode/manager-payload.js +129 -0
  79. package/dist/runs/execution-planning.js +117 -0
  80. package/dist/runs/operation-execution.js +1 -0
  81. package/dist/runs/operation-retry.js +124 -0
  82. package/dist/runs/repositories/runs.js +611 -0
  83. package/dist/runs/run-insights.js +145 -0
  84. package/dist/runs/run-service.js +713 -0
  85. package/dist/runs/run-snapshot-export-service.js +31 -0
  86. package/dist/runs/sandbox-process-execution.js +218 -0
  87. package/dist/runs/sandbox-worktree-planning.js +59 -0
  88. package/dist/runs/schema.js +1 -0
  89. package/dist/sdd/artifact-portability-service.js +118 -0
  90. package/dist/sdd/schema.js +17 -0
  91. package/dist/sdd/sdd-workflow-service.js +217 -0
  92. package/dist/setup/backup-rollback-service.js +76 -0
  93. package/dist/setup/index.js +3 -0
  94. package/dist/setup/providers/antigravity-setup-adapter.js +18 -0
  95. package/dist/setup/providers/claude-setup-adapter.js +30 -0
  96. package/dist/setup/providers/custom-setup-adapter.js +18 -0
  97. package/dist/setup/providers/index.js +6 -0
  98. package/dist/setup/providers/opencode-setup-adapter.js +104 -0
  99. package/dist/setup/providers/provider-setup-adapter.js +15 -0
  100. package/dist/setup/providers/provider-setup-registry.js +11 -0
  101. package/dist/setup/schema.js +1 -0
  102. package/dist/setup/setup-defaults.js +11 -0
  103. package/dist/setup/setup-lifecycle-service.js +175 -0
  104. package/dist/setup/setup-plan.js +105 -0
  105. package/dist/skills/repositories/skill-evaluation-scenarios.js +289 -0
  106. package/dist/skills/repositories/skill-improvement-proposals.js +288 -0
  107. package/dist/skills/repositories/skills.js +430 -0
  108. package/dist/skills/schema.js +1 -0
  109. package/dist/skills/skill-payload.js +94 -0
  110. package/dist/skills/skill-registry-service.js +92 -0
  111. package/dist/skills/skill-resolver.js +191 -0
  112. package/dist/workflows/command-allowlist-adapter.js +70 -0
  113. package/dist/workflows/schema.js +4 -0
  114. package/dist/workflows/workflow-executor.js +345 -0
  115. package/dist/workflows/workflow-registry.js +66 -0
  116. package/docs/architecture.md +698 -0
  117. package/docs/cli.md +741 -0
  118. package/docs/funcionamiento-del-sistema.md +868 -0
  119. package/docs/harness-gap-analysis.md +229 -0
  120. package/docs/prd.md +372 -0
  121. package/package.json +57 -0
@@ -0,0 +1,229 @@
1
+ # Harness Systems Gap Analysis
2
+
3
+ This research compares current agent harness patterns against the `vgxness` PRD and identifies what the product still needs before it can become a serious local-first SDD harness.
4
+
5
+ ## Executive summary
6
+
7
+ The current PRD has the right product direction: local-first, provider-agnostic, memory-backed, SDD-first, and agent/subagent aware.
8
+
9
+ What is still missing is the **runtime contract**: permissions, sandboxing, run state, provider adapters, observability, evaluation, and artifact portability. Without these, `vgxness` risks becoming “memory + prompts” instead of a real harness.
10
+
11
+ ## Systems reviewed
12
+
13
+ | System | Relevant lessons for `vgxness` |
14
+ |---|---|
15
+ | Anthropic agent patterns | Keep workflows simple and composable; distinguish predictable workflows from autonomous agents; invest heavily in tool design and transparency. |
16
+ | Claude Code subagents | Subagents need isolated context, explicit tools, permissions, model selection, memory scopes, lifecycle hooks, and clear delegation descriptions. |
17
+ | OpenCode agents | Provider/tool configuration should support primary agents, subagents, per-agent permissions, model routing, task permissions, and markdown/JSON definitions. |
18
+ | OpenAI Agents SDK | Useful primitives: agents, handoffs, agents-as-tools, guardrails, sessions, human-in-the-loop, tracing, MCP, sandbox agents, and resumable workspaces. |
19
+ | LangGraph | Durable execution, checkpoints, streaming, human-in-the-loop, stateful workflows, memory, and deep traces matter for long-running agents. |
20
+ | AutoGen | Multi-agent systems benefit from layers: simple AgentChat, lower-level event-driven Core, extensions, distributed runtimes, and UI/studio tooling. |
21
+ | CrewAI | Productized multi-agent systems commonly include agents, crews, flows, tasks, memory, knowledge, guardrails, observability, persistence, and resume. |
22
+
23
+ ## What the PRD already covers well
24
+
25
+ - Local-first memory.
26
+ - Project and personal/global memory scopes.
27
+ - SDD-first workflow.
28
+ - Agent and subagent registry from the MVP.
29
+ - Provider-agnostic model with OpenCode/Claude Code adapters.
30
+ - CLI for setup/configuration and integrations for day-to-day usage.
31
+ - Cloud sync and team workflows correctly deferred until later.
32
+
33
+ ## Missing or underdefined areas
34
+
35
+ ### 1. Runtime/run model
36
+
37
+ `vgxness` needs a first-class concept of a **run**.
38
+
39
+ Minimum fields:
40
+
41
+ - run id
42
+ - project id/path
43
+ - user intent
44
+ - phase/workflow
45
+ - selected agent/subagent
46
+ - provider adapter
47
+ - model
48
+ - tool calls
49
+ - artifacts read/written
50
+ - memory reads/writes
51
+ - approvals
52
+ - verification evidence
53
+ - final status
54
+
55
+ Why it matters: without runs, the harness cannot resume, debug, audit, or explain agent behavior.
56
+
57
+ ### 2. Permission and sandbox model
58
+
59
+ The PRD mentions agents and integrations, but not the security boundary.
60
+
61
+ Needed capabilities:
62
+
63
+ - Read/write/shell/network/git/memory permission categories.
64
+ - Per-agent and per-tool permissions.
65
+ - Human approval gates for destructive, external, or privileged operations.
66
+ - Workspace boundary enforcement.
67
+ - Optional sandbox/worktree strategy for implementation agents.
68
+
69
+ This is NOT optional. A harness that can run agents without strong permissions is a loaded weapon.
70
+
71
+ ### 3. Provider adapter contract
72
+
73
+ Provider-agnostic intent is correct, but the PRD needs an adapter interface.
74
+
75
+ Each adapter should declare:
76
+
77
+ - supported agent definition fields
78
+ - supported permissions
79
+ - supported memory injection modes
80
+ - supported subagent/task model
81
+ - supported hooks/lifecycle events
82
+ - config file locations
83
+ - limitations
84
+ - export/render format
85
+
86
+ This prevents `vgxness` from pretending all tools support the same features.
87
+
88
+ ### 4. Agent definition schema
89
+
90
+ The agent registry needs a neutral schema, not just “store agents”.
91
+
92
+ Suggested minimum schema:
93
+
94
+ - name
95
+ - description/delegation trigger
96
+ - role/system instructions
97
+ - mode: primary/subagent/workflow-phase
98
+ - capabilities
99
+ - allowed tools
100
+ - denied tools
101
+ - model preference
102
+ - memory scopes
103
+ - SDD phases supported
104
+ - max steps/turns
105
+ - required approvals
106
+ - adapter overrides
107
+
108
+ ### 5. Tool/ACI design
109
+
110
+ Agent-computer interface design is a product feature.
111
+
112
+ Needed:
113
+
114
+ - Tool descriptions optimized for model usage.
115
+ - Safe input schemas.
116
+ - Examples and edge cases per tool.
117
+ - Clear boundaries between similar tools.
118
+ - Tool-level tests/evals to catch misuse.
119
+
120
+ Bad tools create bad agents. This is where a lot of harnesses quietly fail.
121
+
122
+ ### 6. Durable execution and resume
123
+
124
+ SDD creates long-running work. Long-running work needs checkpoints.
125
+
126
+ Needed:
127
+
128
+ - run checkpoints
129
+ - phase checkpoints
130
+ - apply-progress merge rules
131
+ - resumable interrupted runs
132
+ - idempotency expectations for tools
133
+ - failure classification: blocked, failed, needs-human, cancelled, completed
134
+
135
+ ### 7. Observability and debugging
136
+
137
+ The product needs traces, not just logs.
138
+
139
+ Minimum trace entities:
140
+
141
+ - run
142
+ - phase
143
+ - agent/subagent invocation
144
+ - tool call
145
+ - memory operation
146
+ - artifact operation
147
+ - approval decision
148
+ - verification command/result
149
+
150
+ Nice-to-have later:
151
+
152
+ - token/cost tracking
153
+ - model latency
154
+ - failure heatmap
155
+ - timeline UI/export
156
+
157
+ ### 8. Evaluation and quality gates
158
+
159
+ The PRD has success criteria, but not evals.
160
+
161
+ Needed MVP evals:
162
+
163
+ - agent resolution chooses the expected agent
164
+ - SDD artifact chain remains complete
165
+ - memory upsert/revision behavior is durable
166
+ - provider adapter renders valid config
167
+ - permission model blocks unsafe operations
168
+ - resume restores the expected run state
169
+
170
+ ### 9. Artifact portability
171
+
172
+ Memory-only artifacts are fast, but PRD/review workflows need portability.
173
+
174
+ Needed:
175
+
176
+ - export SDD artifacts to markdown/json
177
+ - import artifacts back into memory
178
+ - snapshot a run for debugging or sharing
179
+ - redact sensitive data during export
180
+
181
+ ### 10. CLI surface definition
182
+
183
+ The PRD says CLI, but the first command set is still open.
184
+
185
+ Candidate MVP commands:
186
+
187
+ - `vgx init`
188
+ - `vgx memory search|get|save|update`
189
+ - `vgx agent list|add|render|validate`
190
+ - `vgx sdd new|continue|status|archive`
191
+ - `vgx run list|show|resume`
192
+ - `vgx adapter doctor|render`
193
+
194
+ ## Recommended MVP additions to PRD
195
+
196
+ Add these as explicit MVP requirements:
197
+
198
+ 1. **Run lifecycle model** — every agentic operation is captured as a resumable/auditable run.
199
+ 2. **Permission model** — per-agent tool permissions with human approval gates.
200
+ 3. **Provider adapter contract** — adapters translate neutral `vgxness` definitions into provider-specific configs.
201
+ 4. **Agent schema** — neutral registry schema for agents/subagents/workflow-phase agents.
202
+ 5. **Trace model** — structured trace records for runs, tools, memory, artifacts, approvals, and verification.
203
+ 6. **Artifact export/import** — SDD and memory artifacts can be exported for review/debugging.
204
+ 7. **Evaluation harness** — tests/evals for agent resolution, adapters, permissions, memory, and resume.
205
+
206
+ ## Suggested next SDD change
207
+
208
+ Create a new SDD change named `harness-runtime-foundation`.
209
+
210
+ Scope it narrowly:
211
+
212
+ - define run lifecycle schema
213
+ - define agent registry schema
214
+ - define permission categories
215
+ - define provider adapter interface
216
+ - add CLI validation/render skeleton
217
+ - add tests for schemas and adapter rendering
218
+
219
+ Do **not** implement full cloud sync, distributed agents, web UI, or team workflows yet.
220
+
221
+ ## Sources
222
+
223
+ - Anthropic: Building effective agents
224
+ - Claude Code: subagents documentation
225
+ - OpenCode: agents documentation
226
+ - OpenAI Agents SDK documentation
227
+ - LangGraph overview
228
+ - Microsoft AutoGen documentation
229
+ - CrewAI documentation
package/docs/prd.md ADDED
@@ -0,0 +1,372 @@
1
+ # vgxness PRD
2
+
3
+ `vgxness` is a local-first, Gentle-AI-like agentic harness for advanced individual developers who want to configure AI coding tools, coordinate agents/subagents, use persistent memory, and run SDD workflows with explicit runtime state.
4
+
5
+ ## Product thesis
6
+
7
+ AI coding tools are powerful, but their work is often fragmented: context is lost between sessions, agents are configured differently per tool, and substantial changes lack a repeatable planning/verification workflow.
8
+
9
+ `vgxness` solves this by providing a provider-agnostic harness layer with persistent memory, agent/subagent configuration, model/profile management, MCP tools, an intuitive CLI/TUI, and SDD-first development flows.
10
+
11
+ The product intentionally overlaps with systems such as Gentle-AI and `gentle-pi`: it should configure AI agents, install/sync SDD assets, wire memory, and support per-phase agent/model behavior. Its differentiator is that the workflow must not live only in prompts or agent instructions. `vgxness` should also maintain a **verifiable runtime control plane**: explicit phase state, readiness gates, artifacts, runs, approvals, checkpoints, and audit trails.
12
+
13
+ ## Reference positioning
14
+
15
+ | Reference system | What it proves | What `vgxness` should learn | Where `vgxness` differentiates |
16
+ |---|---|---|---|
17
+ | Gentle-AI | AI coding tools can be upgraded through installed prompts, skills, SDD agents, model profiles, permissions, backups, and verification. | Installer/sync discipline, provider adapters, per-phase model routing, safe file merging, backups, golden tests. | Add a first-class local runtime state engine instead of relying only on configured agent behavior. |
18
+ | `gentle-pi` | Pi can own persona, SDD agents/chains, strict TDD support, safety rules, model assignment, and Engram wiring as packages. | Package-owned runtime behavior, startup asset checks, project-local SDD assets, child-agent communication. | Expose provider-neutral APIs for phase readiness, artifact state, run history, approvals, and resumable checkpoints across tools. |
19
+ | Engram | Persistent memory can preserve decisions, discoveries, prompts, SDD artifacts, and sessions across agent runs. | Memory as infrastructure, project detection, topic-key upserts, revisions, session summaries. | Treat memory as one backend behind explicit workflow/run state, not the only source of truth. |
20
+
21
+ ## Primary user
22
+
23
+ | Segment | Priority | Description |
24
+ |---|---:|---|
25
+ | Advanced individual developer | MVP | A power user working with AI coding tools who needs durable context, structured workflows, and reusable agent configuration. |
26
+ | Development teams | Future | Teams that need shared workflows, governance, permissions, audit trails, PR coordination, and cloud sync. |
27
+
28
+ ## MVP scope
29
+
30
+ The MVP must prove that a single developer can use `vgxness` locally to manage agentic development work with less context loss and more reliable execution. It must feel comparable to a modern agent ecosystem harness while proving one stronger claim: SDD state can be queried, gated, resumed, and audited through product APIs instead of depending only on LLM discipline.
31
+
32
+ ### 1. Local-first memory
33
+
34
+ The product must support persistent memory without requiring a cloud account.
35
+
36
+ Minimum capabilities:
37
+
38
+ - Project memory for repository-specific decisions, SDD artifacts, sessions, progress, and codebase discoveries.
39
+ - Personal/global memory for user preferences, reusable patterns, standards, and cross-project learnings.
40
+ - Search, read, create, update, and topic-key upsert flows.
41
+ - Durable revisions for evolving observations and artifacts.
42
+ - Traceability for why memory was created or updated.
43
+
44
+ ### 2. SDD-first workflow
45
+
46
+ The product must make Spec-Driven Development the primary development path.
47
+
48
+ Minimum canonical workflow:
49
+
50
+ 1. Explore
51
+ 2. Proposal
52
+ 3. Spec
53
+ 4. Design
54
+ 5. Tasks
55
+ 6. Apply progress
56
+ 7. Verify
57
+ 8. Archive
58
+
59
+ Canonical artifact phase keys use `proposal` and `apply-progress`. User-facing commands may later expose friendlier verbs such as “propose” or “apply,” but stored workflow state must use one canonical vocabulary.
60
+
61
+ Minimum capabilities:
62
+
63
+ - Persist SDD artifacts in memory.
64
+ - Expose phase status, readiness, and missing prerequisites through CLI/API/MCP calls.
65
+ - Block or warn before a phase runs without required artifacts or approvals.
66
+ - Continue interrupted changes from stored state.
67
+ - Track apply progress without overwriting previous work.
68
+ - Support review workload planning for large changes.
69
+ - Keep verification results linked to specs, tasks, runs, and evidence.
70
+
71
+ ### 3. Agent and subagent registry
72
+
73
+ The product must include agent/subagent registration and configuration from the start.
74
+
75
+ Minimum capabilities:
76
+
77
+ - Define agents and subagents in a provider-neutral model.
78
+ - Describe capabilities, instructions, permissions, memory access, and compatible workflows.
79
+ - Resolve which agents should be used for a task.
80
+ - Render or preview provider-specific agent/subagent configuration from neutral definitions.
81
+ - Support future provider adapters without changing the core domain model.
82
+
83
+ ### 4. Skill registry and controlled self-improvement
84
+
85
+ The product must manage skills as first-class reusable assets, not just static prompt files.
86
+
87
+ Minimum capabilities:
88
+
89
+ - Register skills independently from agents and workflows.
90
+ - Attach skills to agents, subagents, SDD phases, or provider adapters.
91
+ - Track skill versions, sources, compatibility, and usage history.
92
+ - Evaluate whether a skill helped or failed during a run.
93
+ - Propose skill improvements from traces, failures, repeated corrections, and memory discoveries.
94
+ - Generate reviewable skill diffs instead of silently mutating active skills.
95
+ - Require human approval before a proposed skill improvement becomes active.
96
+
97
+ Self-improvement loop:
98
+
99
+ 1. Observe runs, traces, failures, and repeated user corrections.
100
+ 2. Detect a candidate improvement.
101
+ 3. Draft a skill update as a versioned proposal.
102
+ 4. Evaluate the proposal against relevant scenarios.
103
+ 5. Ask for human approval.
104
+ 6. Activate the approved version and preserve rollback history.
105
+
106
+ ### 5. MCP server, CLI, TUI, and integrations
107
+
108
+ The product must provide three first-class interfaces over the same local core:
109
+
110
+ - **MCP server** for agent-facing workflow/state tools.
111
+ - **CLI** for scriptable setup, inspection, and automation.
112
+ - **TUI** for guided installation, onboarding, configuration, and visual status.
113
+
114
+ The core rule: MCP, CLI, and TUI must call the same services and storage. They must not each reimplement workflow rules.
115
+
116
+ #### MCP server
117
+
118
+ The MCP server is the main integration surface for AI coding tools. It should expose safe, typed tools that let agents query and update product state without editing config files directly.
119
+
120
+ Minimum MCP capabilities:
121
+
122
+ - Start a local MCP server through `vgx mcp start` or an installed provider config.
123
+ - Install MCP integration for supported tools through guided setup.
124
+ - Expose SDD status, readiness, next-phase, and artifact operations.
125
+ - Expose run start/checkpoint/finalize and approval-request operations.
126
+ - Expose agent resolution and skill payload operations.
127
+ - Return actionable blocked states instead of relying on prompt interpretation.
128
+
129
+ Candidate MCP tools:
130
+
131
+ ```text
132
+ vgxness_sdd_status
133
+ vgxness_sdd_next
134
+ vgxness_sdd_ready
135
+ vgxness_sdd_save_artifact
136
+ vgxness_run_start
137
+ vgxness_run_checkpoint
138
+ vgxness_run_request_approval
139
+ vgxness_agent_resolve
140
+ vgxness_skill_payload
141
+ vgxness_profile_get
142
+ ```
143
+
144
+ #### CLI
145
+
146
+ The CLI must be intuitive, predictable, and useful without reading internal docs.
147
+
148
+ Minimum CLI capabilities:
149
+
150
+ - Initialize `vgxness` in a project.
151
+ - Install/sync managed agent assets without clobbering user-owned config.
152
+ - Install, inspect, and remove MCP integration for supported tools.
153
+ - Configure memory scopes and storage.
154
+ - Configure agents and subagents.
155
+ - Configure per-phase model/profile assignments.
156
+ - Inspect SDD artifacts and memory.
157
+ - Export or debug product state.
158
+ - Verify installation/configuration health and report rollback options.
159
+
160
+ Candidate CLI shape:
161
+
162
+ ```bash
163
+ vgx init
164
+ vgx doctor
165
+ vgx status
166
+ vgx mcp install opencode
167
+ vgx mcp status
168
+ vgx sdd status <change>
169
+ vgx sdd next <change>
170
+ vgx profiles list
171
+ vgx profiles set apply <model>
172
+ ```
173
+
174
+ #### TUI
175
+
176
+ The TUI is the default human onboarding and local operations surface. It should optimize for the next meaningful decision, not decorative metrics.
177
+
178
+ Minimum TUI screens:
179
+
180
+ - **Setup** — detect environment/project state, choose providers, inspect agents, run verification, and show copy-only external setup guidance without silent provider writes.
181
+ - **Workflows** — available workflow paths and safe next actions.
182
+ - **Runs** — run history, current run state, blockers, and resumable execution.
183
+ - **Approvals** — pending permission requests and safety gates.
184
+ - **Agents** — registered agents/subagents, attached skills, model/profile routing, and provider compatibility.
185
+ - **SDD** — change list, phase progress, missing artifacts, next ready phase, blockers, and resumable runs.
186
+ - **Doctor** — health checks, broken config, MCP availability, memory status, adapter issues, and suggested fixes.
187
+ - **Settings** — local preferences and configuration status without silent provider writes.
188
+
189
+ TUI state requirements:
190
+
191
+ - Loading, empty, error, success, blocked, and permission states must be explicit and actionable.
192
+ - Keyboard navigation and visible focus are required.
193
+ - The dashboard TUI is read-only; provider config writes/install/apply are external-only, require explicit confirmation outside the dashboard, and are not run by dashboard flows.
194
+
195
+ #### Installation experience
196
+
197
+ Installation should be step-based and avoid manual JSON editing for the happy path.
198
+
199
+ Target flow:
200
+
201
+ ```text
202
+ 1. Install binary
203
+ 2. Run `vgx`
204
+ 3. Select AI tools
205
+ 4. Install MCP integration
206
+ 5. Configure memory
207
+ 6. Choose SDD mode/profile
208
+ 7. Run doctor
209
+ 8. Open the AI tool and start working
210
+ ```
211
+
212
+ The same flow must be available through CLI flags for automation and CI-friendly dry runs.
213
+
214
+ Initial integration targets:
215
+
216
+ - OpenCode
217
+ - Claude Code
218
+
219
+ Pi/`gentle-pi` is a design reference and future adapter target, not part of the first integration target list unless the MVP scope is explicitly expanded.
220
+
221
+ The integration model must remain provider-agnostic. Tool-specific behavior belongs in adapters, not in the core domain.
222
+
223
+ ### 6. Harness runtime foundation
224
+
225
+ The product must model agentic work as explicit, auditable, resumable runs.
226
+
227
+ Minimum capabilities:
228
+
229
+ - Create a run record for each meaningful agentic operation.
230
+ - Track user intent, project, workflow phase, selected agent, provider adapter, model, artifacts, memory operations, approvals, tool calls, verification evidence, and final status.
231
+ - Support interrupted work through checkpoints and resumable run state.
232
+ - Classify run outcomes: completed, failed, blocked, cancelled, needs-human, or partially-complete.
233
+ - Keep run state separate from long-term memory and SDD artifacts.
234
+
235
+ ### 7. Permission and sandbox model
236
+
237
+ The product must treat agent permissions as a core safety feature, not as adapter-specific behavior.
238
+
239
+ Minimum capabilities:
240
+
241
+ - Define permission categories for read, write/edit, shell, network, git, memory, external directories, and provider-specific tools.
242
+ - Support per-agent and per-subagent permissions.
243
+ - Require human approval for destructive, external, privileged, or ambiguous operations.
244
+ - Enforce workspace boundaries.
245
+ - Support future isolated execution strategies, such as worktrees or sandboxes, for implementation agents.
246
+
247
+ ### 8. Provider adapter contract
248
+
249
+ The product must translate `vgxness` concepts into provider-specific configuration through adapters.
250
+
251
+ Each adapter must declare:
252
+
253
+ - Supported agent definition fields.
254
+ - Supported skill definition and injection modes.
255
+ - Supported permission categories.
256
+ - Supported memory injection modes.
257
+ - Supported subagent/task delegation model.
258
+ - Supported hooks or lifecycle events.
259
+ - Config file locations.
260
+ - Known limitations.
261
+ - Render/export format.
262
+
263
+ Adapters must not redefine the core product model.
264
+
265
+ ### 9. Observability and evaluation
266
+
267
+ The product must make agentic behavior inspectable and testable.
268
+
269
+ Minimum observability capabilities:
270
+
271
+ - Structured traces for runs, phases, agent invocations, tool calls, memory operations, artifact operations, approvals, and verification commands.
272
+ - Debuggable timelines for completed or failed runs.
273
+ - Redaction strategy for sensitive data before export or sharing.
274
+
275
+ Minimum evaluation capabilities:
276
+
277
+ - Agent resolution chooses the expected agent for representative tasks.
278
+ - Skill resolution injects the expected reusable guidance for representative tasks.
279
+ - Skill improvement proposals are reviewable, versioned, and gated by approval.
280
+ - SDD artifact chains remain complete and linked.
281
+ - Memory revision/upsert behavior remains durable.
282
+ - Provider adapters render valid config.
283
+ - Permission rules block unsafe operations.
284
+ - Resume restores expected run state.
285
+
286
+ ### 10. Artifact portability
287
+
288
+ The product must support memory-first operation without trapping artifacts inside memory only.
289
+
290
+ Minimum capabilities:
291
+
292
+ - Export SDD artifacts to Markdown or JSON.
293
+ - Import exported artifacts back into memory.
294
+ - Snapshot runs for debugging or review.
295
+ - Redact sensitive data during export.
296
+
297
+ ## Non-goals for MVP
298
+
299
+ - Cloud sync.
300
+ - Multi-user collaboration.
301
+ - Team permissions and governance.
302
+ - Web dashboard.
303
+ - Hosted memory service.
304
+ - Marketplace for agents.
305
+ - Full provider parity across every AI coding tool.
306
+ - Distributed multi-worker execution.
307
+ - Production cloud-hosted tracing.
308
+ - Fully autonomous, unreviewed skill mutation.
309
+
310
+ These are future expansion areas, not MVP blockers.
311
+
312
+ ## Product principles
313
+
314
+ | Principle | Meaning |
315
+ |---|---|
316
+ | Local-first | The developer owns their data and can work offline. |
317
+ | Provider-agnostic | Core concepts must not be coupled to OpenCode, Claude Code, or any single vendor. |
318
+ | SDD-first | Substantial work should move through explicit planning, implementation, verification, and archive phases. |
319
+ | MCP-first for agents | AI tools should interact with the harness through typed MCP tools instead of prompt-only conventions. |
320
+ | Guided by default | The TUI should make setup and diagnosis understandable without manual config editing. |
321
+ | Memory as infrastructure | Memory is not a chat convenience; it is a durable product substrate. |
322
+ | Reviewable work | The system should help keep work units understandable, traceable, and verifiable. |
323
+ | Safe by default | Agents must operate inside explicit permissions and auditable boundaries. |
324
+ | Inspectable runtime | Every meaningful agentic operation should be explainable after it happens. |
325
+ | Controlled self-improvement | The system may propose better skills, but humans approve what becomes active. |
326
+
327
+ ## Future roadmap
328
+
329
+ After the MVP proves local individual usage, expand toward:
330
+
331
+ - Cloud sync across machines.
332
+ - Team/shared memory spaces.
333
+ - Permissions, governance, and audit trails.
334
+ - PR and chained-PR coordination.
335
+ - Hosted dashboard for inspection and collaboration.
336
+ - Additional provider adapters.
337
+ - Import/export between local and cloud memory backends.
338
+ - Distributed agent workers.
339
+ - Hosted observability and evaluation dashboard.
340
+ - Skill marketplace or shared skill catalog.
341
+
342
+ ## Success criteria
343
+
344
+ The MVP is successful when an advanced individual developer can:
345
+
346
+ - Initialize `vgxness` in a repo.
347
+ - Complete the guided TUI setup without manually editing provider config.
348
+ - Install and verify MCP integration for at least one AI coding tool.
349
+ - Configure personal and project memory.
350
+ - Register agents/subagents in a provider-neutral way.
351
+ - Register skills, attach them to agents/workflows, and review proposed improvements.
352
+ - Run an SDD change from idea through archive.
353
+ - Resume work across sessions without losing critical context.
354
+ - Use at least one external AI coding tool integration through an adapter.
355
+ - Use the CLI to inspect status, run doctor, and debug product state.
356
+ - Inspect a run timeline with agents, tools, memory/artifact operations, approvals, and verification evidence.
357
+ - Validate that unsafe operations are blocked or require approval.
358
+
359
+ ## Open questions
360
+
361
+ - What is the first integration adapter: OpenCode or Claude Code?
362
+ - Should memory storage be per-repo by default, with global memory in a user-level directory?
363
+ - What config format should define agents/subagents?
364
+ - What config format should define skills and skill versions?
365
+ - Which skill improvement proposals should require approval versus automatic rejection?
366
+ - Which commands form the first public CLI surface?
367
+ - Which TUI framework should be used for the first implementation?
368
+ - Should the MCP server run only over stdio for MVP, or also support local HTTP later?
369
+ - What is the safest default install command and update channel?
370
+ - What privacy/export guarantees are required before public release?
371
+ - What is the first sandbox strategy: normal workspace, git worktree, or process/container isolation?
372
+ - What trace format should be used for local inspection and future cloud sync?
package/package.json ADDED
@@ -0,0 +1,57 @@
1
+ {
2
+ "name": "vgxness",
3
+ "version": "0.1.0",
4
+ "description": "Alpha CLI and MCP control plane for guided AI-agent workflows, SDD, memory, and OpenCode setup.",
5
+ "license": "SEE LICENSE IN LICENSE",
6
+ "repository": {
7
+ "type": "git",
8
+ "url": "git+https://github.com/uzielvgx/vgxness.git"
9
+ },
10
+ "homepage": "https://github.com/uzielvgx/vgxness#readme",
11
+ "bugs": {
12
+ "url": "https://github.com/uzielvgx/vgxness/issues"
13
+ },
14
+ "keywords": [
15
+ "mcp",
16
+ "opencode",
17
+ "agents",
18
+ "sdd",
19
+ "cli"
20
+ ],
21
+ "type": "module",
22
+ "scripts": {
23
+ "cli": "tsx src/cli/index.ts",
24
+ "build": "tsc -p tsconfig.build.json && node scripts/copy-migrations.mjs",
25
+ "prepack": "npm run build",
26
+ "package:dry-run": "npm pack --dry-run --json",
27
+ "package:release-check": "node scripts/validate-package-release.mjs",
28
+ "package:smoke:install": "node scripts/smoke-tarball-install.mjs",
29
+ "test": "node --import tsx --test \"test/**/*.test.ts\"",
30
+ "typecheck": "tsc --noEmit"
31
+ },
32
+ "bin": {
33
+ "vgxness": "dist/cli/index.js",
34
+ "vgx": "dist/cli/index.js"
35
+ },
36
+ "files": [
37
+ "dist",
38
+ "package.json",
39
+ "README.md",
40
+ "LICENSE",
41
+ "docs"
42
+ ],
43
+ "dependencies": {
44
+ "@modelcontextprotocol/sdk": "^1.29.0",
45
+ "better-sqlite3": "^11.10.0",
46
+ "zod": "^4.4.3"
47
+ },
48
+ "devDependencies": {
49
+ "@types/better-sqlite3": "^7.6.13",
50
+ "@types/node": "^22.15.18",
51
+ "tsx": "^4.19.4",
52
+ "typescript": "^5.8.3"
53
+ },
54
+ "engines": {
55
+ "node": ">=22"
56
+ }
57
+ }