@miller-tech/uap 1.40.0 → 1.40.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +109 -642
- package/docs/INDEX.md +48 -286
- package/docs/architecture/OVERVIEW.md +328 -0
- package/docs/architecture/PROTOCOL.md +204 -0
- package/docs/benchmarks/README.md +17 -192
- package/docs/getting-started/CONFIGURATION.md +237 -0
- package/docs/getting-started/INSTALLATION.md +125 -0
- package/docs/getting-started/QUICKSTART.md +115 -0
- package/docs/guides/COORDINATION.md +162 -0
- package/docs/guides/DELIVER.md +115 -0
- package/docs/guides/DEPLOY_BATCHING.md +212 -0
- package/docs/guides/DROIDS_AND_SKILLS.md +202 -0
- package/docs/guides/LOCAL_MODELS.md +148 -0
- package/docs/guides/MCP_ROUTER.md +195 -0
- package/docs/guides/MEMORY.md +235 -0
- package/docs/guides/MULTI_MODEL.md +223 -0
- package/docs/guides/POLICIES.md +190 -0
- package/docs/guides/WORKTREE_WORKFLOW.md +185 -0
- package/docs/integrations/MCP_ROUTER.md +147 -0
- package/docs/integrations/RTK.md +102 -0
- package/docs/reference/API.md +485 -0
- package/docs/reference/CLI.md +719 -0
- package/docs/reference/CONFIGURATION.md +90 -193
- package/docs/reference/DATABASE_SCHEMA.md +110 -344
- package/docs/reference/FEATURES.md +176 -472
- package/docs/reference/PATTERNS.md +102 -0
- package/docs/reference/PLATFORMS.md +83 -0
- package/package.json +1 -1
- package/docs/AGENTS.md +0 -423
- package/docs/DOCUMENTATION_AUDIT_REPORT.md +0 -131
- package/docs/GETTING_STARTED.md +0 -288
- package/docs/PROJECT_ANALYSIS_REPORT.md +0 -510
- package/docs/architecture/COMPLETE_ARCHITECTURE.md +0 -748
- package/docs/architecture/EXPERT_STACK.md +0 -137
- package/docs/architecture/MULTI_MODEL.md +0 -224
- package/docs/architecture/PLATFORM_GATING.md +0 -68
- package/docs/architecture/SYSTEM_ANALYSIS.md +0 -334
- package/docs/architecture/UAP_COMPLIANCE.md +0 -217
- package/docs/architecture/UAP_PROTOCOL.md +0 -339
- package/docs/architecture/UAP_STRICT_DROIDS.md +0 -172
- package/docs/archive/BALLS_MODE_SELF_ANALYSIS.md +0 -260
- package/docs/archive/BENCHMARK_GAPS_AND_PLAN.md +0 -146
- package/docs/archive/FAILING_TASKS_SOLUTION_PLAN.md +0 -668
- package/docs/archive/JINJA2-SYSTEM-MESSAGE-FIX.md +0 -209
- package/docs/archive/MODEL_ROUTING_IMPLEMENTATION_SUMMARY.md +0 -281
- package/docs/archive/MODEL_ROUTING_OPTIMIZATION_PLAN.md +0 -320
- package/docs/archive/NPM-PUBLISH-V0.9.1.md +0 -240
- package/docs/archive/OPTIMIZATION_OPTIONS.md +0 -334
- package/docs/archive/PARALLELISM_GAPS_AND_OPTIONS.md +0 -422
- package/docs/archive/POLICY_GATE_IMPLEMENTATION.md +0 -245
- package/docs/archive/SETUP_IMPROVEMENTS.md +0 -213
- package/docs/archive/UAP_GENERIC_OPTIMIZATION_PLAN.md +0 -270
- package/docs/archive/UAP_OPTIMIZATION_PLAN.md +0 -701
- package/docs/archive/UAP_V103_PATTERN_DESIGN.md +0 -315
- package/docs/archive/UAP_V104_COMPLIANCE_DESIGN.md +0 -223
- package/docs/archive/changelog/2026-03-10_uap-100-compliance.md +0 -77
- package/docs/archive/changelog/2026-03-10_uap-full-system-verification.md +0 -109
- package/docs/archive/opencode-integration-guide.md +0 -740
- package/docs/archive/opencode-integration-quickref.md +0 -180
- package/docs/benchmarks/OVERNIGHT_RUNNER.md +0 -341
- package/docs/benchmarks/SPECULATIVE_DECODING_JOURNEY_2026-03.md +0 -221
- package/docs/benchmarks/VALIDATION_PLAN.md +0 -568
- package/docs/blog/SPECULATIVE_DECODING_PRODUCTION_PLAYBOOK.md +0 -139
- package/docs/blog/local-coding-agents.md +0 -266
- package/docs/blog/x-thread.md +0 -254
- package/docs/deployment/DEPLOYMENT.md +0 -895
- package/docs/deployment/DEPLOYMENT_STRATEGIES.md +0 -518
- package/docs/deployment/DEPLOY_BATCHER_ANALYSIS.md +0 -224
- package/docs/deployment/DEPLOY_BATCHING.md +0 -273
- package/docs/deployment/DEPLOY_BUCKETING_ANALYSIS.md +0 -420
- package/docs/deployment/QWEN35_LLAMA_CPP.md +0 -426
- package/docs/deployment/UAP_LLAMA_ANTHROPIC_PROXY_BOOTSTRAP.md +0 -279
- package/docs/getting-started/INTEGRATION.md +0 -628
- package/docs/getting-started/OVERVIEW.md +0 -324
- package/docs/getting-started/SETUP.md +0 -377
- package/docs/integrations/MCP_ROUTER_SETUP.md +0 -445
- package/docs/integrations/RTK_INTEGRATION.md +0 -468
- package/docs/operations/TROUBLESHOOTING.md +0 -660
- package/docs/pr/PR_SPECULATIVE_DOCS_TEMPLATE.md +0 -146
- package/docs/pr/UPSTREAM_PRS.md +0 -424
- package/docs/reference/API_REFERENCE.md +0 -903
- package/docs/reference/EXPERT_DROIDS.md +0 -219
- package/docs/reference/HARNESS-MATRIX.md +0 -318
- package/docs/reference/PATTERN_LIBRARY.md +0 -636
- package/docs/reference/UAP_CLI_REFERENCE.md +0 -620
- package/docs/research/BEHAVIORAL_PATTERNS.md +0 -228
- package/docs/research/DOMAIN_STRATEGIES.md +0 -316
- package/docs/research/MEMORY_SYSTEMS_COMPARISON.md +0 -812
- package/docs/research/PATTERN_ANALYSIS_2026-01-18.md +0 -436
- package/docs/research/PERFORMANCE_ANALYSIS_2026-01-18.md +0 -209
- package/docs/research/PERFORMANCE_TEST_PLAN.md +0 -383
- package/docs/research/TERMINAL_BENCH_LEARNINGS.md +0 -217
package/README.md
CHANGED
|
@@ -1,716 +1,183 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
|
|
1
3
|
# Universal Agent Protocol (UAP)
|
|
2
4
|
|
|
3
|
-
|
|
4
|
-
[](https://opensource.org/licenses/MIT)
|
|
5
|
+
**Give your AI coding agents memory, judgment, and the discipline to finish the job.**
|
|
5
6
|
|
|
6
|
-
|
|
7
|
+
[](https://www.npmjs.com/package/@miller-tech/uap)
|
|
8
|
+
[](https://nodejs.org)
|
|
9
|
+
[](#testing)
|
|
10
|
+
[](LICENSE)
|
|
7
11
|
|
|
8
|
-
|
|
12
|
+
`v1.40.0` · 168 modules · 117 test suites · 9 agent harnesses
|
|
9
13
|
|
|
10
|
-
|
|
14
|
+
[Quickstart](#quickstart) · [Why UAP?](#why-uap) · [`uap deliver`](#the-deliver-harness) · [Architecture](#architecture) · [Benchmarks](#benchmarks) · [Docs](docs/INDEX.md)
|
|
11
15
|
|
|
12
16
|
</div>
|
|
13
17
|
|
|
14
18
|
---
|
|
15
19
|
|
|
16
|
-
##
|
|
17
|
-
|
|
18
|
-
**New:** Delivery Harness (`uap deliver`) — a convergence loop that drives an
|
|
19
|
-
underlying model through execute → apply → verify → feedback against the
|
|
20
|
-
project's real completion gates until delivery is achieved. Best-of-N
|
|
21
|
-
exploration, a structured critic, semantically-recalled best-practice cards,
|
|
22
|
-
and a stagnation-driven escalation ladder turn weaker/local models into
|
|
23
|
-
reliable closers. See [Delivery Harness](#delivery-harness).
|
|
24
|
-
|
|
25
|
-
```bash
|
|
26
|
-
uap deliver "add a parseDuration(str) helper returning seconds" \
|
|
27
|
-
--candidates 3 --critic --practices --escalate
|
|
28
|
-
```
|
|
29
|
-
|
|
30
|
-
**New:** Expert-stack extensions — forward-design droids (strategic/tactical
|
|
31
|
-
architect, implementation-planner), activated `experts.<name>` MCP tools, HALO
|
|
32
|
-
trace-based harness optimization, open-collider divergent ideation, and a real
|
|
33
|
-
expert-review hard gate. See [docs/architecture/EXPERT_STACK.md](docs/architecture/EXPERT_STACK.md).
|
|
20
|
+
## Why UAP?
|
|
34
21
|
|
|
35
|
-
|
|
36
|
-
uap harness analyze -p "systemic failure modes?" # HALO trace analysis
|
|
37
|
-
uap ideate setup <name> # divergent ideation project
|
|
38
|
-
```
|
|
22
|
+
AI coding agents are capable but undisciplined. They forget everything between sessions, burn tokens echoing huge tool outputs, repeat the same mistakes, declare victory on work that doesn't compile, and trip over each other in shared repos. UAP is a production-tested layer that sits **underneath your agent harness** (Claude Code, Factory, Cursor, OpenCode, and more) and fixes these problems at the protocol level — no model change required.
|
|
39
23
|
|
|
40
|
-
|
|
24
|
+
| The problem | What UAP does | Measured impact |
|
|
25
|
+
|---|---|---|
|
|
26
|
+
| Agents forget past sessions | 4-tier memory with semantic recall + write-gates | **49.7% fewer tokens** |
|
|
27
|
+
| Tool output floods the context | MCP Router — tool-hiding + FTS5 output compression | **up to ~98%** on large tool calls |
|
|
28
|
+
| Agents declare done on broken work | `uap deliver` — convergence loop against **real** gates | **+33pp** task success (25% → 58%) |
|
|
29
|
+
| Repetitive mistakes | 23 Terminal-Bench patterns + learning loop | **68% fewer errors** |
|
|
30
|
+
| Wrong model for the job | Multi-model router, 7 profiles | optimal cost/perf per task |
|
|
31
|
+
| Agents step on each other | Worktree isolation + coordination service | conflict-free parallel work |
|
|
32
|
+
| "Guidelines" get ignored | Policy gates as executable hooks, not prose | violations are **blocked**, not suggested |
|
|
41
33
|
|
|
42
|
-
|
|
43
|
-
uap worktree prune --dry-run # Preview
|
|
44
|
-
uap worktree prune --force # Execute
|
|
45
|
-
```
|
|
34
|
+
> Benchmarks below are from Terminal-Bench 2.0 (12 representative tasks). See [docs/benchmarks/](docs/benchmarks/) for the full methodology and raw data.
|
|
46
35
|
|
|
47
36
|
---
|
|
48
37
|
|
|
49
|
-
##
|
|
38
|
+
## Quickstart
|
|
50
39
|
|
|
51
40
|
```bash
|
|
41
|
+
# Install globally
|
|
52
42
|
npm install -g @miller-tech/uap
|
|
53
|
-
cd your-project
|
|
54
|
-
uap init
|
|
55
|
-
uap setup -p all
|
|
56
|
-
```
|
|
57
|
-
|
|
58
|
-
---
|
|
59
|
-
|
|
60
|
-
## Table of Contents
|
|
61
|
-
|
|
62
|
-
- [Feature Overview](#feature-overview)
|
|
63
|
-
- [Memory System](#memory-system)
|
|
64
|
-
- [Multi-Agent Coordination](#multi-agent-coordination)
|
|
65
|
-
- [Deploy Batching](#deploy-batching)
|
|
66
|
-
- [Policy Enforcement](#policy-enforcement)
|
|
67
|
-
- [Browser Automation](#browser-automation)
|
|
68
|
-
- [MCP Router](#mcp-router)
|
|
69
|
-
- [Multi-Model Architecture](#multi-model-architecture)
|
|
70
|
-
- [Delivery Harness](#delivery-harness)
|
|
71
|
-
- [Pattern System](#pattern-system)
|
|
72
|
-
- [Droids and Skills](#droids--skills)
|
|
73
|
-
- [Task Management](#task-management)
|
|
74
|
-
- [Worktree System](#worktree-system)
|
|
75
|
-
- [Hooks System](#hooks-system)
|
|
76
|
-
- [CLI Reference](#cli-reference)
|
|
77
|
-
- [Configuration](#configuration)
|
|
78
|
-
- [Testing](#testing--quality)
|
|
79
|
-
- [Requirements](#requirements)
|
|
80
|
-
|
|
81
|
-
---
|
|
82
|
-
|
|
83
|
-
## Feature Overview
|
|
84
|
-
|
|
85
|
-
| Category | Components | Purpose |
|
|
86
|
-
| ------------------ | -------------- | -------------------------------------------------------------------------------- |
|
|
87
|
-
| Memory | 27 modules | 4-layer persistent memory with embeddings, knowledge graph, hierarchical tiering |
|
|
88
|
-
| Coordination | 8 modules | Multi-agent lifecycle, work claims, messaging, overlap detection |
|
|
89
|
-
| Deploy Batching | 1 module | Squash, merge, parallelize deploy actions across agents |
|
|
90
|
-
| Policy Enforcement | 8 modules | Store, evaluate, and enforce operational policies with audit trail |
|
|
91
|
-
| Browser | 1 module | Stealth web automation via CloakBrowser (Playwright drop-in) |
|
|
92
|
-
| MCP Router | 12 modules | 3-tool meta-router (discover/execute/deliver) + expert-consultation registry (98% token savings) |
|
|
93
|
-
| Models | 10 modules | Multi-model routing, planning, execution, validation, 13 model profiles |
|
|
94
|
-
| Delivery Harness | 14 modules | `uap deliver`: convergence loop, best-of-N explorer, critic, practice recall, escalation, ideation seeds, HALO tracing, coordination + deploy queueing |
|
|
95
|
-
| Patterns | 23 patterns | Battle-tested workflows from Terminal-Bench 2.0 |
|
|
96
|
-
| Droids | 30 experts | Full SDLC expert stack: strategy, design, build, review, release, ops ([reference](docs/reference/EXPERT_DROIDS.md)) |
|
|
97
|
-
| Expert Orchestrator | 1 module | Adaptive droid-chain selection across plan→design→implement→review→release |
|
|
98
|
-
| Skills | 34 skills | Reusable domain expertise (now includes `parallel-expert-review`) |
|
|
99
|
-
| Tasks | 7 modules | Full task lifecycle with dependencies, claims, JSONL sync |
|
|
100
|
-
| Worktrees | 1 module | Isolated git branches per agent, auto-numbered |
|
|
101
|
-
| Hooks | 2 hooks | Session start (memory injection) and pre-compact (preservation) |
|
|
102
|
-
| CLI | 25 commands | Full system management with rich dashboard visualization |
|
|
103
|
-
| Benchmarks | 9 modules | Terminal-Bench adapter, Harbor integration, A/B comparison |
|
|
104
|
-
| LLM Optimization | 5 tools | Qwen3.5 tool call fixes, llama.cpp optimizer, LoRA training |
|
|
105
|
-
| Local LLM Proxy | 1 service | Anthropic Messages API default; OpenAI Chat Completions retained as option |
|
|
106
|
-
| RTK | 1 module | 60-90% token savings on command outputs |
|
|
107
|
-
| Platforms | 10 integrations | Claude, Factory, OpenCode, ForgeCode, VSCode, Cursor, Codex, OMP, Hermes (+ MCP) |
|
|
108
|
-
|
|
109
|
-
---
|
|
110
|
-
|
|
111
|
-
## Memory System
|
|
112
|
-
|
|
113
|
-
### Architecture: 4 Layers
|
|
114
|
-
|
|
115
|
-
```
|
|
116
|
-
+-------------------------------------------------------------------+
|
|
117
|
-
| L1: WORKING | Recent actions | 50 max | SQLite |
|
|
118
|
-
| L2: SESSION | Current session | Per run | SQLite |
|
|
119
|
-
| L3: SEMANTIC | Long-term learnings | Qdrant | Vectors |
|
|
120
|
-
| L4: KNOWLEDGE | Entity relationships | SQLite | Graph |
|
|
121
|
-
+-------------------------------------------------------------------+
|
|
122
|
-
```
|
|
123
|
-
|
|
124
|
-
### Hierarchical Tiers (Hot/Warm/Cold)
|
|
125
|
-
|
|
126
|
-
| Tier | Entries | Behavior |
|
|
127
|
-
| ---- | ------- | ------------------------------------ |
|
|
128
|
-
| Hot | 10 | Always in context, highest relevance |
|
|
129
|
-
| Warm | 50 | Promoted on frequent access |
|
|
130
|
-
| Cold | 500 | Semantic search only, compressed |
|
|
131
|
-
|
|
132
|
-
Time-decay formula: `effective_importance = importance * decayRate^daysSinceAccess`
|
|
133
|
-
|
|
134
|
-
### Components (27 modules)
|
|
135
|
-
|
|
136
|
-
| Component | File | Purpose |
|
|
137
|
-
| ------------------------ | ---------------------------------------- | ----------------------------------------------------------- |
|
|
138
|
-
| Short-Term (SQLite) | `src/memory/short-term/sqlite.ts` | FTS5 full-text search, WAL mode |
|
|
139
|
-
| Short-Term Schema | `src/memory/short-term/schema.ts` | FTS5 triggers, table definitions |
|
|
140
|
-
| Hierarchical Memory | `src/memory/hierarchical-memory.ts` | Hot/warm/cold tiering with auto-promotion/demotion |
|
|
141
|
-
| Dynamic Retrieval | `src/memory/dynamic-retrieval.ts` | Adaptive depth, hierarchical query, 6 memory sources |
|
|
142
|
-
| Embedding Service | `src/memory/embeddings.ts` | 5 providers: LlamaCpp, Ollama, OpenAI, Local, TF-IDF |
|
|
143
|
-
| GitHub Backend | `src/memory/backends/github.ts` | Store memories as JSON files in a GitHub repo |
|
|
144
|
-
| Qdrant Backend | `src/memory/backends/qdrant-cloud.ts` | Vector search with project-isolated collections |
|
|
145
|
-
| Backend Factory | `src/memory/backends/factory.ts` | Backend selection and initialization |
|
|
146
|
-
| Backend Base | `src/memory/backends/base.ts` | Interface definitions |
|
|
147
|
-
| Serverless Qdrant | `src/memory/serverless-qdrant.ts` | Auto-start/stop Docker, cloud fallback, idle shutdown |
|
|
148
|
-
| Write Gate | `src/memory/write-gate.ts` | Quality filter: 5 criteria, minimum score 0.3 |
|
|
149
|
-
| Daily Log | `src/memory/daily-log.ts` | Staging area -- all writes land here first |
|
|
150
|
-
| Correction Propagation | `src/memory/correction-propagator.ts` | Cross-tier updates, old claims marked [superseded] |
|
|
151
|
-
| Memory Maintenance | `src/memory/memory-maintenance.ts` | Prune, decay, archive, deduplicate |
|
|
152
|
-
| Memory Consolidation | `src/memory/memory-consolidator.ts` | Semantic dedup, quality scoring, background consolidation |
|
|
153
|
-
| Context Compression | `src/memory/context-compressor.ts` | 3 levels (light/medium/aggressive), dynamic budget-aware |
|
|
154
|
-
| Semantic Compression | `src/memory/semantic-compression.ts` | Atomic facts extraction, token reduction |
|
|
155
|
-
| Speculative Cache | `src/memory/speculative-cache.ts` | Pre-computes likely queries, LRU with TTL |
|
|
156
|
-
| Knowledge Graph | `src/memory/knowledge-graph.ts` | Entities + relationships in SQLite, recursive CTE traversal |
|
|
157
|
-
| Adaptive Context | `src/memory/adaptive-context.ts` | 21 optimizations, historical benefit tracking |
|
|
158
|
-
| Task Classifier | `src/memory/task-classifier.ts` | 9 categories, suggests droids |
|
|
159
|
-
| Model Router | `src/memory/model-router.ts` | Routes to optimal model by task type and cost |
|
|
160
|
-
| Predictive Memory | `src/memory/predictive-memory.ts` | Cross-session query prediction with SQLite persistence |
|
|
161
|
-
| Ambiguity Detector | `src/memory/ambiguity-detector.ts` | Detects ambiguous task descriptions |
|
|
162
|
-
| Context Pruner | `src/memory/context-pruner.ts` | Token-budget-aware memory pruning |
|
|
163
|
-
| Prepopulation | `src/memory/prepopulate.ts` | Import from docs (markdown) and git history |
|
|
164
|
-
| Terminal-Bench Knowledge | `src/memory/terminal-bench-knowledge.ts` | Domain knowledge from benchmark analysis |
|
|
165
|
-
|
|
166
|
-
---
|
|
167
|
-
|
|
168
|
-
## Multi-Agent Coordination
|
|
169
43
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
Agent A Agent B Agent C
|
|
174
|
-
| | |
|
|
175
|
-
[Register] -> [Heartbeat 30s] -> [Announce: src/auth/]
|
|
176
|
-
| | |
|
|
177
|
-
[Overlap Check] ---------> [Overlap Check] ---------> [Overlap Check]
|
|
178
|
-
| | |
|
|
179
|
-
[Worktree: 001-auth] [Worktree: 002-api] [Worktree: 003-ui]
|
|
180
|
-
| | |
|
|
181
|
-
[Queue deploy] ----------> [Deploy Batcher] -------> [Squash & Execute]
|
|
44
|
+
# One-command setup in your project (memory, patterns, hooks, policies)
|
|
45
|
+
cd your-project
|
|
46
|
+
uap setup
|
|
182
47
|
```
|
|
183
48
|
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
| Component | File | Purpose |
|
|
187
|
-
| --------------------- | --------------------------------------- | -------------------------------------------------------------- |
|
|
188
|
-
| Coordination Service | `src/coordination/service.ts` | Agent lifecycle, work claims, announcements, messaging |
|
|
189
|
-
| Coordination Database | `src/coordination/database.ts` | SQLite with WAL: agents, claims, announcements, messages |
|
|
190
|
-
| Capability Router | `src/coordination/capability-router.ts` | Routes tasks to droids by 18 capability types |
|
|
191
|
-
| Auto-Agent | `src/coordination/auto-agent.ts` | Automatic registration, heartbeat, graceful shutdown |
|
|
192
|
-
| Pattern Router | `src/coordination/pattern-router.ts` | Loads Terminal-Bench patterns, critical patterns always active |
|
|
193
|
-
| Deploy Batcher | `src/coordination/deploy-batcher.ts` | Squash, merge, parallelize deploy actions |
|
|
194
|
-
| Adaptive Patterns | `src/coordination/adaptive-patterns.ts` | Pattern success tracking with SQLite persistence |
|
|
195
|
-
|
|
196
|
-
### Messaging
|
|
197
|
-
|
|
198
|
-
- **Broadcast** -- all agents
|
|
199
|
-
- **Direct** -- specific agent
|
|
200
|
-
- **Channels** -- broadcast, deploy, review, coordination
|
|
201
|
-
- **Priority** -- normal, high, urgent
|
|
202
|
-
- **Read receipts** -- delivery confirmation
|
|
203
|
-
|
|
204
|
-
---
|
|
205
|
-
|
|
206
|
-
## Deploy Batching
|
|
207
|
-
|
|
208
|
-
Prevents deploy storms when multiple agents finish work simultaneously.
|
|
209
|
-
|
|
210
|
-
### Batch Windows
|
|
211
|
-
|
|
212
|
-
| Action | Default | Urgent |
|
|
213
|
-
| -------- | ------- | ------ |
|
|
214
|
-
| commit | 30s | 3s |
|
|
215
|
-
| push | 5s | 1s |
|
|
216
|
-
| merge | 10s | 2s |
|
|
217
|
-
| workflow | 5s | 1s |
|
|
218
|
-
| deploy | 60s | 5s |
|
|
219
|
-
|
|
220
|
-
### CLI
|
|
49
|
+
That's it. Your agent now has persistent memory, battle-tested patterns, policy gates, and multi-agent coordination wired into every session.
|
|
221
50
|
|
|
222
51
|
```bash
|
|
223
|
-
uap
|
|
224
|
-
uap
|
|
225
|
-
uap
|
|
226
|
-
uap deploy status # View queue
|
|
227
|
-
uap deploy flush # Force-execute all pending
|
|
228
|
-
uap deploy config # View batch config
|
|
229
|
-
uap deploy set-config # Update config
|
|
230
|
-
uap deploy urgent # Enable urgent mode
|
|
52
|
+
uap memory query "how did we handle auth last time?" # semantic recall
|
|
53
|
+
uap deliver "add rate limiting to the API" # drive a model to verified completion
|
|
54
|
+
uap dashboard overview # live task / agent / memory state
|
|
231
55
|
```
|
|
232
56
|
|
|
233
57
|
---
|
|
234
58
|
|
|
235
|
-
##
|
|
236
|
-
|
|
237
|
-
### Components (8 modules)
|
|
238
|
-
|
|
239
|
-
| Component | File | Purpose |
|
|
240
|
-
| -------------------- | ------------------------------------------ | ------------------------------------------------ |
|
|
241
|
-
| Policy Schema | `src/policies/schemas/policy.ts` | Zod schemas for policies and executions |
|
|
242
|
-
| Database Manager | `src/policies/database-manager.ts` | SQLite with WAL, JSON serialization |
|
|
243
|
-
| Policy Memory | `src/policies/policy-memory.ts` | CRUD, relevance search, tag/category filtering |
|
|
244
|
-
| Policy Tools | `src/policies/policy-tools.ts` | Store/execute Python enforcement tools |
|
|
245
|
-
| Policy Gate | `src/policies/policy-gate.ts` | Middleware: blocks REQUIRED violations |
|
|
246
|
-
| Enforced Tool Router | `src/policies/enforced-tool-router.ts` | Single entry point for policy-checked tool calls |
|
|
247
|
-
| Policy Converter | `src/policies/convert-policy-to-claude.ts` | Markdown to CLAUDE.md format |
|
|
248
|
-
|
|
249
|
-
### Enforcement Levels
|
|
59
|
+
## The `deliver` harness
|
|
250
60
|
|
|
251
|
-
|
|
252
|
-
| ----------- | ----------------------------------------------- |
|
|
253
|
-
| REQUIRED | Blocks execution, throws `PolicyViolationError` |
|
|
254
|
-
| RECOMMENDED | Logged but does not block |
|
|
255
|
-
| OPTIONAL | Informational only |
|
|
256
|
-
|
|
257
|
-
### CLI (15 subcommands)
|
|
61
|
+
`uap deliver` is the headline of the v1.27–v1.40 line: a **convergence loop that iterates a model against your project's real completion gates until the work is actually delivered** — build passes, tests pass, lint is clean — not until the model *thinks* it's done.
|
|
258
62
|
|
|
259
63
|
```bash
|
|
260
|
-
uap
|
|
261
|
-
uap policy install <name> # Install built-in policy
|
|
262
|
-
uap policy enable <id> # Enable a policy
|
|
263
|
-
uap policy disable <id> # Disable a policy
|
|
264
|
-
uap policy status # Enforcement status
|
|
265
|
-
uap policy add -f <file> # Add from markdown
|
|
266
|
-
uap policy convert -i <id> # Convert to CLAUDE.md format
|
|
267
|
-
uap policy get-relevant -t <task> # Find relevant policies
|
|
268
|
-
uap policy add-tool -p <id> -t <name> -c <file> # Add Python tool
|
|
269
|
-
uap policy check -o <operation> # Check if allowed
|
|
270
|
-
uap policy audit # View audit trail
|
|
271
|
-
uap policy toggle <id> # Toggle on/off
|
|
272
|
-
uap policy stage <id> -s <stage> # Set enforcement stage
|
|
273
|
-
uap policy level <id> -l <level> # Set enforcement level
|
|
274
|
-
```
|
|
275
|
-
|
|
276
|
-
Also available as standalone binary: `uap-policy`
|
|
277
|
-
|
|
278
|
-
---
|
|
279
|
-
|
|
280
|
-
## Browser Automation
|
|
281
|
-
|
|
282
|
-
Stealth web browser via CloakBrowser -- a Playwright drop-in.
|
|
283
|
-
|
|
284
|
-
```typescript
|
|
285
|
-
import { createWebBrowser } from '@miller-tech/uap/browser';
|
|
286
|
-
|
|
287
|
-
const browser = createWebBrowser();
|
|
288
|
-
await browser.launch({ headless: true, humanize: true });
|
|
289
|
-
await browser.goto('https://example.com');
|
|
290
|
-
const content = await browser.getContent();
|
|
291
|
-
await browser.close();
|
|
292
|
-
```
|
|
293
|
-
|
|
294
|
-
---
|
|
295
|
-
|
|
296
|
-
## MCP Router
|
|
297
|
-
|
|
298
|
-
Replaces N tool definitions with 3 meta-tools for 98% token reduction: `discover_tools`, `execute_tool`, and `deliver` (auto-routes a coding task into the `uap deliver` convergence loop, which classifies complexity and drives a model to verified completion against real gates).
|
|
299
|
-
|
|
300
|
-
### Components (11 modules)
|
|
301
|
-
|
|
302
|
-
| Component | File | Purpose |
|
|
303
|
-
| ----------------- | ------------------------------------- | ------------------------------------------- |
|
|
304
|
-
| MCP Server | `src/mcp-router/server.ts` | Exposes `discover_tools`, `execute_tool`, `deliver` |
|
|
305
|
-
| Config Parser | `src/mcp-router/config/parser.ts` | Loads MCP configs from standard paths |
|
|
306
|
-
| Fuzzy Search | `src/mcp-router/search/fuzzy.ts` | Tool discovery with fuzzy matching |
|
|
307
|
-
| Client Pool | `src/mcp-router/executor/client.ts` | Manages connections to MCP servers |
|
|
308
|
-
| Tool Execute | `src/mcp-router/tools/execute.ts` | Tool execution with policy gate |
|
|
309
|
-
| Tool Discover | `src/mcp-router/tools/discover.ts` | Tool discovery definitions |
|
|
310
|
-
| Tool Deliver | `src/mcp-router/tools/deliver.ts` | Routes a task into `uap deliver` (sandbox-confined subprocess) |
|
|
311
|
-
| Output Compressor | `src/mcp-router/output-compressor.ts` | Compresses tool output |
|
|
312
|
-
| Session Stats | `src/mcp-router/session-stats.ts` | Per-tool token consumption tracking |
|
|
313
|
-
|
|
314
|
-
---
|
|
315
|
-
|
|
316
|
-
## Multi-Model Architecture
|
|
317
|
-
|
|
318
|
-
### 3-Tier Execution
|
|
319
|
-
|
|
64
|
+
uap deliver "implement the password reset flow"
|
|
320
65
|
```
|
|
321
|
-
Tier 1: TaskPlanner -- Decomposes task into subtasks
|
|
322
|
-
Tier 2: ModelRouter -- Assigns optimal model per subtask
|
|
323
|
-
Tier 3: TaskExecutor -- Executes with validation, dynamic temperature, rate limiting
|
|
324
|
-
```
|
|
325
|
-
|
|
326
|
-
### Components (11 modules)
|
|
327
66
|
|
|
328
|
-
|
|
329
|
-
| ------------------ | ---------------------------------- | -------------------------------------------- |
|
|
330
|
-
| Model Router | `src/models/router.ts` | Routes by complexity and cost |
|
|
331
|
-
| Task Planner | `src/models/planner.ts` | Decomposition, dependency analysis |
|
|
332
|
-
| Task Executor | `src/models/executor.ts` | Execution with model profiles, rate limiting |
|
|
333
|
-
| Plan Validator | `src/models/plan-validator.ts` | Cycle detection, coherence checks |
|
|
334
|
-
| Profile Loader | `src/models/profile-loader.ts` | Load model profiles from JSON |
|
|
335
|
-
| Execution Profiles | `src/models/execution-profiles.ts` | Runtime profile management |
|
|
336
|
-
| Unified Router | `src/models/unified-router.ts` | Combined routing logic |
|
|
337
|
-
| Analytics | `src/models/analytics.ts` | Model performance tracking |
|
|
67
|
+
What happens under the hood:
|
|
338
68
|
|
|
339
|
-
|
|
69
|
+
1. **Explore → plan → apply** — the model proposes changes; the applier writes them safely (pre-existing tests and gate configs are protected from being overwritten).
|
|
70
|
+
2. **Verify against real gates** — a verifier ladder runs your build, tests, and lint. Nothing is "done" until they're green.
|
|
71
|
+
3. **Critique & iterate** — failures feed back as structured guidance; the loop continues, **persisting until delivered** (extends past `--max-turns` to a ceiling, stopping on genuine stagnation).
|
|
72
|
+
4. **Auto-optimization** — every task is classified by complexity and the matching aids (HALO trace analysis, divergent ideation, coordination, deploy batching) activate automatically.
|
|
73
|
+
5. **Autonomy with a guidance channel** — runs the full mission without stopping to ask, while still accepting operator guidance mid-flight.
|
|
340
74
|
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
Each profile supports: `dynamic_temperature` (decay per retry), `tool_call_batching` (system prompt suffix), `rate_limits` (requests/tokens per minute).
|
|
75
|
+
It works with frontier models *and* local models (llama.cpp / Qwen) served over the Anthropic Messages API. See **[docs/guides/DELIVER.md](docs/guides/DELIVER.md)**.
|
|
344
76
|
|
|
345
77
|
---
|
|
346
78
|
|
|
347
|
-
##
|
|
348
|
-
|
|
349
|
-
`uap deliver` forces an underlying model — including weaker or local models —
|
|
350
|
-
to reach a **verified** outcome. Instead of trusting a single generation, it
|
|
351
|
-
loops: the model emits whole files, the harness writes them, runs the
|
|
352
|
-
project's real completion gates, and feeds the failures back until every gate
|
|
353
|
-
passes or the turn budget is exhausted. "Done" is defined by the gates, not by
|
|
354
|
-
the model's say-so.
|
|
355
|
-
|
|
356
|
-
### Pipeline
|
|
357
|
-
|
|
358
|
-
```
|
|
359
|
-
┌─────────────────────────── loop until gates pass ───────────────────────────┐
|
|
360
|
-
│ │
|
|
361
|
-
instruction → build prompt → execute → apply files → verify (gates) → feedback ─────────┘
|
|
362
|
-
(+ practices) (+ critique) model to tree build/typecheck/test/lint
|
|
363
|
-
│ │
|
|
364
|
-
best-of-N candidates pass → done ✓ fail → critic + escalate
|
|
365
|
-
```
|
|
79
|
+
## Features
|
|
366
80
|
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
11. **Test protection (default)** — pre-existing test/spec files are snapshotted at loop start (case-folded, symlink-alias-aware) and the applier refuses model writes to them, with steering feedback and a prompt warning; test-runner/compiler configs (`vitest.config.*`, `tsconfig*.json`, `jest.config.*`, `pytest.ini`, …) are blocked too, closing gate-rigging by indirection. Protection extends to the spec's **transitive oracle material** — helpers/fixtures/mocks the tests import (by convention or data extension, including through tsconfig path aliases like `@fixtures/*` and `baseUrl` bare imports), quoted fixture paths, reserved missing goldens, and recursive helper chains (the unit under test stays writable). A **runtime integrity guard** hashes every protected file and re-verifies after each gate run: tampering from test code executing during the gates is restored and the gate result discarded. New test files remain allowed. Opt out with `--no-protect-tests`.
|
|
378
|
-
12. **Mission autonomy (default)** — the loop tells the driven model to complete the *entire* task without stopping to ask questions or pausing between phases (state assumptions and proceed; emit a one-line progress note; don't invent unrequested scope). The real gates remain the arbiter. Opt out per-run with `ConvergenceConfig.autonomous: false`. An **operator-guidance channel** lets you steer a running, unattended mission without stopping it: `--guidance-file <path>` is polled each turn and its text is injected as high-priority guidance — write to the file to redirect, clear it to drop the steer. This complements the execution-level `onIteration` directive channel (stop/escalate/switch-model).
|
|
379
|
-
12. **Dynamic optimization (default)** — every instruction is classified for complexity (simple / moderate / complex); non-trivial requests automatically get the aids that improve outcomes (moderate → exploration ×3 + critic + practices + HALO + coordination; complex → the full `--optimize` stack). Any explicit aid flag, `--no-auto`, or `UAP_DELIVER_AUTO=0` disables auto mode. Deploy queueing is never auto-enabled.
|
|
380
|
-
|
|
381
|
-
### Components (14 modules)
|
|
382
|
-
|
|
383
|
-
| Component | File | Purpose |
|
|
384
|
-
| ----------------- | ------------------------------------- | ----------------------------------------------------------------- |
|
|
385
|
-
| Convergence Loop | `src/delivery/convergence-loop.ts` | Turn loop with pluggable seams + mutable run-state for escalation |
|
|
386
|
-
| Verifier Ladder | `src/delivery/verifier-ladder.ts` | Build/typecheck/test/lint gates with fail-fast and diagnostics |
|
|
387
|
-
| Applier | `src/delivery/applier.ts` | Writes ` ```file:path ` blocks; path-safe, rollback-capable |
|
|
388
|
-
| Explorer | `src/delivery/explorer.ts` | Best-of-N candidates with strategy seeds + rollback evaluation |
|
|
389
|
-
| Judge | `src/delivery/judge.ts` | Model tie-break among equally-scored candidates |
|
|
390
|
-
| Critic | `src/delivery/critic.ts` | Gate-persona repair plans from failed turns |
|
|
391
|
-
| Practice Store | `src/delivery/practice.ts` | Provenance-safe best-practice cards with semantic recall |
|
|
392
|
-
| Escalation | `src/delivery/escalation.ts` | Stagnation-driven ladder returning loop directives |
|
|
393
|
-
| Ideation Seeder | `src/delivery/ideation.ts` | Divergent strategy seeds (generated or from curated ideas) |
|
|
394
|
-
| HALO Tracer | `src/delivery/halo-trace.ts` | Run/turn spans for `uap harness analyze` |
|
|
395
|
-
| Run Coordinator | `src/delivery/run-coordinator.ts` | `uap agent` registration/heartbeat + `uap deploy` commit queueing |
|
|
396
|
-
| Auto-Optimizer | `src/delivery/auto-optimizer.ts` | Complexity-classified dynamic activation of convergence aids |
|
|
397
|
-
| Spec Imports | `src/delivery/spec-imports.ts` | Transitive oracle-material discovery for spec protection |
|
|
398
|
-
| Integrity Guard | `src/delivery/integrity.ts` | Hash-verify + restore protected files after every gate run |
|
|
399
|
-
|
|
400
|
-
The model is reached through an OpenAI-compatible client
|
|
401
|
-
(`src/models/openai-compat-client.ts`) — the local inference gateway,
|
|
402
|
-
llama.cpp, vLLM, Ollama, or any `/v1/chat/completions` endpoint.
|
|
403
|
-
|
|
404
|
-
### Usage
|
|
405
|
-
|
|
406
|
-
```bash
|
|
407
|
-
# Single-shot loop against the current project's gates
|
|
408
|
-
uap deliver "implement src/slugify.js exporting slugify(str)"
|
|
81
|
+
- **🧠 4-tier memory** — daily log → working cache → semantic (Qdrant) → long-term archive, with write-gates that block low-quality/duplicate memories and corrections that cascade across tiers.
|
|
82
|
+
- **🗜️ MCP Router** — a token-optimizing tool proxy; large outputs are compressed via FTS5 intent search instead of dumped into context.
|
|
83
|
+
- **🎯 `uap deliver`** — the convergence/delivery harness (above).
|
|
84
|
+
- **🌳 Worktree workflow** — isolated branch-per-feature, auto-PR, safe cleanup; enforced so agents never edit the project root.
|
|
85
|
+
- **🛡️ Policy gates** — 20 executable enforcers (worktree, test, schema-diff, expert-review, memory-before-plan, delivery-enforcement…) that *block* non-compliant tool calls.
|
|
86
|
+
- **🤖 Expert droids & skills** — 38 specialized droids and 32 skills, with an expert-router that recommends a droid chain per task.
|
|
87
|
+
- **🧭 Multi-model routing** — 7 profiles (Claude Opus/Sonnet/Haiku, GPT, Qwen, generic); the router picks by complexity, cost, and performance.
|
|
88
|
+
- **🚦 Deploy batching & coordination** — batched git/deploy actions and overlap detection keep multi-agent work conflict-free.
|
|
89
|
+
- **📊 Dashboard** — rich TUI/web views of tasks, agents, memory, benchmarks, and policy status.
|
|
90
|
+
- **🔌 9 harnesses** — Claude Code, Factory, Cursor, VSCode, OpenCode, Codex, ForgeCode, Oh-My-Pi, Hermes.
|
|
409
91
|
|
|
410
|
-
|
|
411
|
-
uap deliver "add retry-with-backoff to the HTTP client" \
|
|
412
|
-
--candidates 3 --critic --practices --escalate --escalate-model opus-4.6
|
|
413
|
-
|
|
414
|
-
# Preview detected gates and plan without calling the model
|
|
415
|
-
uap deliver "..." --dry-run
|
|
416
|
-
|
|
417
|
-
# Scope to a subset of gates, cap turns, target another project
|
|
418
|
-
uap deliver "..." --gates build,test --max-turns 8 --project-root ../service
|
|
419
|
-
|
|
420
|
-
# Everything on: exploration, critic, practices, escalation, ideation, HALO, coordination
|
|
421
|
-
uap deliver "refactor the cache layer to LRU with TTL" --optimize
|
|
422
|
-
|
|
423
|
-
# Divergent ideation seeds + queue a commit into the deploy batcher on success
|
|
424
|
-
uap deliver "..." --ideate --candidates 4 --deploy
|
|
425
|
-
```
|
|
426
|
-
|
|
427
|
-
### Key flags
|
|
428
|
-
|
|
429
|
-
| Flag | Effect |
|
|
430
|
-
| -------------------------- | ---------------------------------------------------------------------- |
|
|
431
|
-
| `-m, --model <preset>` | Model preset (default `$UAP_DELIVER_MODEL` or `qwen35-a3b`) |
|
|
432
|
-
| `--max-turns <n>` | Maximum execute→verify iterations (default 5) |
|
|
433
|
-
| `--gates <ids>` | Gate subset: `build,typecheck,test,lint` |
|
|
434
|
-
| `--candidates <n>` | Best-of-N exploration (2–8) per turn |
|
|
435
|
-
| `--critic` | Structured repair plans on failed turns |
|
|
436
|
-
| `--practices` | Inject and record best-practice cards |
|
|
437
|
-
| `--no-semantic` | Use keyword (not embedding) practice recall |
|
|
438
|
-
| `--escalate` | Escalation ladder on stagnation |
|
|
439
|
-
| `--escalate-model <preset>`| Stronger model for the final escalation tier |
|
|
440
|
-
| `--ideate` | Divergent ideation: task-specific strategy seeds (implies exploration) |
|
|
441
|
-
| `--ideate-project <name>` | Seed exploration from `projects/<name>` curated ideas (`uap ideate`) |
|
|
442
|
-
| `--halo` | Emit HALO spans; analyze with `uap harness analyze` |
|
|
443
|
-
| `--coordinate` | Register with `uap agent`: announce, heartbeat, overlap detection |
|
|
444
|
-
| `--deploy` | On success, queue a commit into the deploy batcher (`uap deploy`) |
|
|
445
|
-
| `--optimize` | Enable every convergence aid (deploy excluded) |
|
|
446
|
-
| `--no-auto` | Disable dynamic optimization (auto-classified aids are the default) |
|
|
447
|
-
| `--no-protect-tests` | Allow modifying pre-existing test files (protected by default) |
|
|
448
|
-
| `--guidance-file <path>` | Poll this file each turn; steer a running mission without stopping it |
|
|
449
|
-
| `--endpoint <url>` | Override the model endpoint (OpenAI-compatible `/v1`) |
|
|
450
|
-
| `--dry-run` / `--json` | Show the plan only / emit machine-readable result |
|
|
451
|
-
|
|
452
|
-
Model output is never executed — only written as files and checked by the
|
|
453
|
-
gates. The applier refuses writes to executed config (`package.json`,
|
|
454
|
-
lockfiles), `.git`/hooks/CI paths, symlinks that escape the project root,
|
|
455
|
-
and pre-existing test/spec files (gate integrity — the spec defines "done",
|
|
456
|
-
so the model must satisfy it, not rewrite it).
|
|
92
|
+
Full list with code-level detail: **[docs/reference/FEATURES.md](docs/reference/FEATURES.md)**.
|
|
457
93
|
|
|
458
94
|
---
|
|
459
95
|
|
|
460
|
-
##
|
|
461
|
-
|
|
462
|
-
Battle-tested patterns from Terminal-Bench 2.0, stored in `.factory/patterns/`.
|
|
463
|
-
|
|
464
|
-
| Pattern | ID | What It Prevents |
|
|
465
|
-
| --------------------- | --- | -------------------------------------- |
|
|
466
|
-
| Output Existence | P12 | Missing output files (37% of failures) |
|
|
467
|
-
| Iterative Refinement | P13 | First-attempt acceptance |
|
|
468
|
-
| Output Format | P14 | Wrong format/encoding |
|
|
469
|
-
| Task-First | P16 | Over-planning before doing |
|
|
470
|
-
| Constraint Extraction | P17 | Missing hidden requirements |
|
|
471
|
-
| Impossible Refusal | P19 | Attempting impossible tasks |
|
|
472
|
-
| Adversarial | P20 | Missing attack vectors |
|
|
473
|
-
| Chess Engine | P21 | Reinventing Stockfish |
|
|
474
|
-
| Git Recovery | P22 | Data loss during git ops |
|
|
475
|
-
| Compression Check | P23 | Lossy compression errors |
|
|
476
|
-
| Polyglot | P24 | Single-language thinking |
|
|
477
|
-
| Service Config | P25 | Misconfigured services |
|
|
478
|
-
| Near-Miss | P26 | Almost-correct solutions |
|
|
479
|
-
| Smoke Test | P28 | Untested changes |
|
|
480
|
-
| Performance Threshold | P30 | Missing perf targets |
|
|
481
|
-
| Round-Trip | P31 | Encode/decode mismatches |
|
|
482
|
-
| CLI Verify | P32 | Broken CLI commands |
|
|
483
|
-
| Numerical Stability | P33 | Floating point errors |
|
|
484
|
-
| Image Pipeline | P34 | Image processing errors |
|
|
485
|
-
| Decoder-First | P35 | Wrong problem decomposition |
|
|
486
|
-
| Competition Domain | P36 | Missing domain knowledge |
|
|
487
|
-
| Ambiguity Detection | P37 | Ambiguous task descriptions |
|
|
488
|
-
| IaC Parity | IaC | Config drift |
|
|
489
|
-
|
|
490
|
-
---
|
|
96
|
+
## Architecture
|
|
491
97
|
|
|
492
|
-
|
|
98
|
+
UAP installs hooks into your agent harness, then mediates every tool call through memory, policy, and token-optimization layers.
|
|
493
99
|
|
|
494
|
-
### Expert Droids (30) — full SDLC coverage
|
|
495
|
-
|
|
496
|
-
See [docs/reference/EXPERT_DROIDS.md](docs/reference/EXPERT_DROIDS.md) for the complete roster, and [docs/architecture/EXPERT_STACK.md](docs/architecture/EXPERT_STACK.md) for the forward-design / HALO / ideation extensions.
|
|
497
|
-
|
|
498
|
-
| Phase | Droids |
|
|
499
|
-
|---|---|
|
|
500
|
-
| **Ideation** | ideation-expert *(open-collider divergent ideation)* |
|
|
501
|
-
| **Strategy & Design** | product-strategist, strategic-architect, tactical-architect, implementation-planner, architect-reviewer, api-designer |
|
|
502
|
-
| **Build** | typescript-node-expert, javascript-pro, python-pro, rust-pro, go-pro, cli-design-expert, debug-expert, refactoring-specialist |
|
|
503
|
-
| **Quality** | code-quality-guardian, code-quality-reviewer, security-auditor, security-code-reviewer |
|
|
504
|
-
| **Performance & Cost** | performance-optimizer, performance-reviewer, cost-engineer |
|
|
505
|
-
| **Testing & QA** | test-strategist, test-plan-writer, test-coverage-reviewer, qa-expert |
|
|
506
|
-
| **Documentation** | documentation-expert, documentation-accuracy-reviewer |
|
|
507
|
-
| **Operations** | release-manager, compliance-officer, incident-responder, observability-engineer, dependency-auditor, harness-optimizer *(HALO loop)* |
|
|
508
|
-
| **Specialty** | ml-training-expert, sysadmin-expert, terminal-bench-optimizer, accessibility-tester |
|
|
509
|
-
|
|
510
|
-
```bash
|
|
511
|
-
uap droids list # see what's installed
|
|
512
|
-
uap droids validate # CI-grade integrity check
|
|
513
|
-
uap expert-route "<task>" # recommended droid chain for a task
|
|
514
|
-
uap expert-route "<task>" --json # machine-readable
|
|
515
100
|
```
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
101
|
+
┌─────────────────────────────────────────────────────────────┐
|
|
102
|
+
│ Agent harnesses │
|
|
103
|
+
│ Claude Code · Factory · Cursor · VSCode · OpenCode · … │
|
|
104
|
+
└───────────────────────────┬─────────────────────────────────┘
|
|
105
|
+
│ hooks (PreToolUse / tool.execute.before)
|
|
106
|
+
▼
|
|
107
|
+
┌─────────────────────────────────────────────────────────────┐
|
|
108
|
+
│ UAP CLI (uap) │
|
|
109
|
+
│ setup · memory · deliver · worktree · policy · deploy │
|
|
110
|
+
│ task · droids · model · mcp-router · harness · ideate … │
|
|
111
|
+
└──┬─────────┬──────────┬──────────┬──────────┬───────────────┘
|
|
112
|
+
▼ ▼ ▼ ▼ ▼
|
|
113
|
+
Memory Policy MCP Router Delivery Coordination
|
|
114
|
+
4 tiers 20 gates FTS5 compr. harness + deploy batch
|
|
526
115
|
```
|
|
527
116
|
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
**Project Skills** (5): codebase-navigator, memory-management, near-miss-iteration, terminal-bench, worktree-workflow
|
|
531
|
-
|
|
532
|
-
**Claude Skills** (6): hooks-session-start, hooks-pre-compact, scripts-tool-router, scripts-preload-memory, session-context-preservation-droid, **parallel-expert-review**
|
|
533
|
-
|
|
534
|
-
**Factory Skills** (23): adversarial, balls-mode, batch-review, chess-engine, cli-design-expert, codebase-navigator, compression, git-forensics, near-miss, polyglot, service-config, terminal-bench-strategies, typescript-node-expert, unreal-engine-developer, tuistory, agent-browser, figma-mcp-promotion, infra-worker, uap-coordination, uap-patterns, uap-tasks, uap-worktree
|
|
117
|
+
- **30+ CLI commands** across 18 source subsystems (168 TypeScript modules).
|
|
118
|
+
- Deep dive: **[docs/architecture/OVERVIEW.md](docs/architecture/OVERVIEW.md)** · protocol spec: **[docs/architecture/PROTOCOL.md](docs/architecture/PROTOCOL.md)**.
|
|
535
119
|
|
|
536
120
|
---
|
|
537
121
|
|
|
538
|
-
##
|
|
539
|
-
|
|
540
|
-
| Feature | Description |
|
|
541
|
-
| ------------ | -------------------------------------------- |
|
|
542
|
-
| Types | task, bug, feature, epic, chore, story |
|
|
543
|
-
| Statuses | open, in_progress, blocked, done, wont_do |
|
|
544
|
-
| Priorities | P0 (critical) through P4 (low) |
|
|
545
|
-
| Dependencies | blocks, related, discovered_from |
|
|
546
|
-
| Claims | Exclusive claim with worktree + announcement |
|
|
547
|
-
| JSONL Sync | Git-versionable task export |
|
|
548
|
-
| Compaction | Archive old closed tasks |
|
|
549
|
-
|
|
550
|
-
---
|
|
122
|
+
## Benchmarks
|
|
551
123
|
|
|
552
|
-
|
|
124
|
+
Terminal-Bench 2.0, 12 representative tasks, UAP-on vs. baseline:
|
|
553
125
|
|
|
554
|
-
|
|
126
|
+
| Metric | Baseline | With UAP | Δ |
|
|
127
|
+
|---|---|---|---|
|
|
128
|
+
| Tokens consumed | 558,000 | 280,438 | **−49.7%** |
|
|
129
|
+
| Task success rate | 25% | 58% | **+33pp** |
|
|
130
|
+
| Errors per task | 1.17 | 0.42 | **−68%** |
|
|
131
|
+
| Wall-clock (total) | 618s | 266s | **−57%** |
|
|
555
132
|
|
|
556
|
-
|
|
557
|
-
uap worktree create my-feature # Creates .worktrees/001-my-feature/
|
|
558
|
-
uap worktree list # Show all worktrees
|
|
559
|
-
uap worktree pr 001 # Create PR
|
|
560
|
-
uap worktree cleanup 001 # Remove worktree + branch
|
|
561
|
-
uap worktree ensure --strict # Verify inside worktree (CI gate)
|
|
562
|
-
```
|
|
133
|
+
Methodology, raw runs, and cost analysis: **[docs/benchmarks/](docs/benchmarks/)**.
|
|
563
134
|
|
|
564
135
|
---
|
|
565
136
|
|
|
566
|
-
##
|
|
567
|
-
|
|
568
|
-
### Session Start Hook
|
|
569
|
-
|
|
570
|
-
1. Cleans stale agents (>24h no heartbeat)
|
|
571
|
-
2. Injects UAP compliance checklist
|
|
572
|
-
3. Loads recent memories (last 24h)
|
|
573
|
-
4. Surfaces open loops from session memories
|
|
574
|
-
5. Warns about stale worktrees
|
|
137
|
+
## Supported harnesses
|
|
575
138
|
|
|
576
|
-
|
|
139
|
+
| Harness | Hooks | MCP Router | Policy gates |
|
|
140
|
+
|---|---|---|---|
|
|
141
|
+
| Claude Code | ✅ | ✅ | ✅ |
|
|
142
|
+
| Factory | ✅ | ✅ | ✅ |
|
|
143
|
+
| Cursor | ✅ | ✅ | ✅ |
|
|
144
|
+
| VSCode | ✅ | ✅ | ✅ |
|
|
145
|
+
| OpenCode | ✅ | ✅ | ✅ |
|
|
146
|
+
| Codex | ✅ | ✅ | ✅ |
|
|
147
|
+
| ForgeCode | ✅ | ✅ | ✅ |
|
|
148
|
+
| Oh-My-Pi | ✅ | ✅ | ✅ |
|
|
149
|
+
| Hermes (global) | ✅ | ✅ | ✅ |
|
|
577
150
|
|
|
578
|
-
|
|
579
|
-
2. Checks if lessons were stored
|
|
580
|
-
3. Outputs compliance reminder
|
|
581
|
-
4. Cleans up agents from current session
|
|
582
|
-
|
|
583
|
-
### Supported Platforms
|
|
584
|
-
|
|
585
|
-
```bash
|
|
586
|
-
uap hooks install # all project platforms at once
|
|
587
|
-
uap hooks install claude # Claude Code
|
|
588
|
-
uap hooks install factory # Factory.AI
|
|
589
|
-
uap hooks install cursor # Cursor
|
|
590
|
-
uap hooks install vscode # VSCode
|
|
591
|
-
uap hooks install opencode # OpenCode
|
|
592
|
-
uap hooks install forgecode # ForgeCode
|
|
593
|
-
uap hooks install codex # Codex CLI
|
|
594
|
-
uap hooks install omp # Oh-My-Pi
|
|
595
|
-
uap hooks install -t hermes # Hermes Agent (NousResearch; global ~/.hermes)
|
|
596
|
-
uap hooks doctor # audit policy-gate coverage across platforms
|
|
597
|
-
```
|
|
598
|
-
|
|
599
|
-
The DB-driven **policy gate** is installed and wired on every platform with a
|
|
600
|
-
pre-tool-use mechanism (claude, vscode, cursor, factory, opencode, omp, hermes).
|
|
601
|
-
**Codex** is MCP-gated (no native pre-tool hook); **ForgeCode** is advisory.
|
|
602
|
-
`uap hooks doctor` reports true coverage — see
|
|
603
|
-
[docs/architecture/PLATFORM_GATING.md](docs/architecture/PLATFORM_GATING.md).
|
|
151
|
+
Install into all detected harnesses with `uap hooks install`; audit coverage with `uap hooks doctor`. Matrix: **[docs/reference/PLATFORMS.md](docs/reference/PLATFORMS.md)**.
|
|
604
152
|
|
|
605
153
|
---
|
|
606
154
|
|
|
607
|
-
##
|
|
608
|
-
|
|
609
|
-
### 29 Top-Level Commands
|
|
610
|
-
|
|
611
|
-
| Command | Description |
|
|
612
|
-
| ------------------------- | -------------------------------------------- |
|
|
613
|
-
| `uap init` | Initialize UAP in a project |
|
|
614
|
-
| `uap setup -p all` | Full setup (memory, Qdrant, hooks, patterns) |
|
|
615
|
-
| `uap generate` | Regenerate CLAUDE.md from templates |
|
|
616
|
-
| `uap update` | Update all components |
|
|
617
|
-
| `uap analyze` | Analyze project structure |
|
|
618
|
-
| `uap compliance check` | Verify UAP compliance |
|
|
619
|
-
| `uap dashboard` | Rich terminal dashboard (11 views) |
|
|
620
|
-
| `uap memory <action>` | Memory management (9 subcommands) |
|
|
621
|
-
| `uap patterns <action>` | Pattern RAG management (4 subcommands) |
|
|
622
|
-
| `uap worktree <action>` | Git worktree management (5 subcommands) |
|
|
623
|
-
| `uap agent <action>` | Agent lifecycle (10 subcommands) |
|
|
624
|
-
| `uap coord <action>` | Coordination status (3 subcommands) |
|
|
625
|
-
| `uap deploy <action>` | Deploy batching (8 subcommands) |
|
|
626
|
-
| `uap task <action>` | Task management (15 subcommands) |
|
|
627
|
-
| `uap droids <action>` | Droid management (3 subcommands) |
|
|
628
|
-
| `uap expert-route <task>` | Recommend an expert droid chain for a task |
|
|
629
|
-
| `uap deliver <task>` | Convergence loop: iterate a model against real gates until delivery |
|
|
630
|
-
| `uap harness <action>` | HALO trace analysis (analyze, status) |
|
|
631
|
-
| `uap ideate <action>` | Open-collider ideation (setup, run, ideas) |
|
|
632
|
-
| `uap model <action>` | Multi-model management (8 subcommands) |
|
|
633
|
-
| `uap policy <action>` | Policy management (15 subcommands) |
|
|
634
|
-
| `uap mcp-router <action>` | MCP Router management (4 subcommands) |
|
|
635
|
-
| `uap hooks <action>` | Hook install / status / doctor (3 subcommands) |
|
|
636
|
-
| `uap tool-calls <action>` | Qwen3.5 tool call fixes (4 subcommands) |
|
|
637
|
-
| `uap rtk <action>` | RTK token compression (3 subcommands) |
|
|
638
|
-
| `uap schema-diff` | Detect breaking schema changes |
|
|
639
|
-
| `uap mcp-setup` | Configure MCP Router for AI harnesses |
|
|
640
|
-
| `uap sync` | Sync configuration between platforms |
|
|
641
|
-
| `uap uap-omp <action>` | Oh-My-Pi integration (7 subcommands) |
|
|
642
|
-
|
|
643
|
-
**Total: 118 commands and subcommands.**
|
|
644
|
-
|
|
645
|
-
### Additional Binaries
|
|
646
|
-
|
|
647
|
-
| Binary | Purpose |
|
|
648
|
-
| ----------------------- | ------------------------------------- |
|
|
649
|
-
| `uap-policy` | Standalone policy management |
|
|
650
|
-
| `llama-optimize` | llama.cpp startup parameter generator |
|
|
651
|
-
| `uap-tool-call-test` | Qwen3.5 tool call testing |
|
|
652
|
-
| `uap-tool-call-wrapper` | Qwen3.5 tool call wrapper |
|
|
653
|
-
| `uap-template-verify` | Chat template verification |
|
|
654
|
-
| `generate-lora-data` | LoRA training data generation |
|
|
155
|
+
## Documentation
|
|
655
156
|
|
|
656
|
-
|
|
157
|
+
| | |
|
|
158
|
+
|---|---|
|
|
159
|
+
| **[Getting Started](docs/getting-started/)** | Installation, quickstart, configuration |
|
|
160
|
+
| **[Guides](docs/guides/)** | deliver, memory, MCP router, worktrees, policies, multi-model, local models |
|
|
161
|
+
| **[Architecture](docs/architecture/)** | System overview + the UAP protocol |
|
|
162
|
+
| **[Reference](docs/reference/)** | CLI, API, patterns, database schema, platforms |
|
|
163
|
+
| **[Benchmarks](docs/benchmarks/)** | Methodology and results |
|
|
164
|
+
| **[Contributing](CONTRIBUTING.md)** | Dev setup, gates, conventions |
|
|
657
165
|
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
### .uap.json (Project)
|
|
661
|
-
|
|
662
|
-
```json
|
|
663
|
-
{
|
|
664
|
-
"version": "1.0.0",
|
|
665
|
-
"project": { "name": "my-project", "defaultBranch": "main" },
|
|
666
|
-
"memory": {
|
|
667
|
-
"shortTerm": { "enabled": true, "path": "./agents/data/memory/short_term.db" },
|
|
668
|
-
"longTerm": { "enabled": true, "provider": "qdrant" }
|
|
669
|
-
},
|
|
670
|
-
"multiModel": {
|
|
671
|
-
"enabled": true,
|
|
672
|
-
"models": ["opus-4.6", "qwen35"],
|
|
673
|
-
"roles": { "planner": "opus-4.6", "executor": "qwen35" },
|
|
674
|
-
"routingStrategy": "balanced"
|
|
675
|
-
},
|
|
676
|
-
"worktrees": { "enabled": true, "directory": ".worktrees" }
|
|
677
|
-
}
|
|
678
|
-
```
|
|
166
|
+
Start at the **[documentation index](docs/INDEX.md)**.
|
|
679
167
|
|
|
680
168
|
---
|
|
681
169
|
|
|
682
|
-
## Testing
|
|
170
|
+
## Testing
|
|
683
171
|
|
|
684
172
|
```bash
|
|
685
|
-
npm
|
|
686
|
-
npm run build
|
|
687
|
-
npm
|
|
688
|
-
npm run
|
|
689
|
-
npm run test:coverage # Coverage report (50% thresholds)
|
|
173
|
+
npm install
|
|
174
|
+
npm run build # TypeScript compile
|
|
175
|
+
npm test # vitest — 117 suites
|
|
176
|
+
npm run bench # benchmark suite
|
|
690
177
|
```
|
|
691
178
|
|
|
692
179
|
---
|
|
693
180
|
|
|
694
|
-
##
|
|
695
|
-
|
|
696
|
-
| Dependency | Version | Required | Purpose |
|
|
697
|
-
| ---------- | --------- | -------- | -------------------------- |
|
|
698
|
-
| Node.js | >= 18.0.0 | Yes | Runtime |
|
|
699
|
-
| git | Latest | Yes | Version control, worktrees |
|
|
700
|
-
| Docker | Latest | No | Local Qdrant |
|
|
701
|
-
| Python 3 | Latest | No | Embeddings, Pattern RAG |
|
|
702
|
-
|
|
703
|
-
---
|
|
704
|
-
|
|
705
|
-
## Attribution
|
|
181
|
+
## License
|
|
706
182
|
|
|
707
|
-
|
|
708
|
-
- CloakBrowser from [CloakHQ/CloakBrowser](https://github.com/CloakHQ/CloakBrowser)
|
|
709
|
-
|
|
710
|
-
---
|
|
711
|
-
|
|
712
|
-
<div align="center">
|
|
713
|
-
|
|
714
|
-
**[Documentation](docs/INDEX.md)** | **[npm](https://www.npmjs.com/package/@miller-tech/uap)**
|
|
715
|
-
|
|
716
|
-
</div>
|
|
183
|
+
MIT © Miller Tech. See [LICENSE](LICENSE).
|