@miller-tech/uap 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/benchmarks/benchmark.d.ts +8 -8
- package/dist/benchmarks/improved-benchmark.d.ts.map +1 -1
- package/dist/benchmarks/improved-benchmark.js +10 -23
- package/dist/benchmarks/improved-benchmark.js.map +1 -1
- package/dist/benchmarks/model-integration.d.ts.map +1 -1
- package/dist/benchmarks/model-integration.js +22 -23
- package/dist/benchmarks/model-integration.js.map +1 -1
- package/dist/bin/policy.js +67 -11
- package/dist/bin/policy.js.map +1 -1
- package/dist/cli/dashboard.d.ts +2 -1
- package/dist/cli/dashboard.d.ts.map +1 -1
- package/dist/cli/dashboard.js +399 -10
- package/dist/cli/dashboard.js.map +1 -1
- package/dist/cli/model.js +12 -12
- package/dist/cli/model.js.map +1 -1
- package/dist/cli/setup-wizard.d.ts.map +1 -1
- package/dist/cli/setup-wizard.js +24 -0
- package/dist/cli/setup-wizard.js.map +1 -1
- package/dist/coordination/deploy-batcher.d.ts +1 -0
- package/dist/coordination/deploy-batcher.d.ts.map +1 -1
- package/dist/coordination/deploy-batcher.js +24 -25
- package/dist/coordination/deploy-batcher.js.map +1 -1
- package/dist/dashboard/data-service.d.ts +94 -0
- package/dist/dashboard/data-service.d.ts.map +1 -0
- package/dist/dashboard/data-service.js +286 -0
- package/dist/dashboard/data-service.js.map +1 -0
- package/dist/dashboard/index.d.ts +5 -0
- package/dist/dashboard/index.d.ts.map +1 -0
- package/dist/dashboard/index.js +3 -0
- package/dist/dashboard/index.js.map +1 -0
- package/dist/dashboard/server.d.ts +15 -0
- package/dist/dashboard/server.d.ts.map +1 -0
- package/dist/dashboard/server.js +158 -0
- package/dist/dashboard/server.js.map +1 -0
- package/dist/mcp-router/session-stats.d.ts +9 -0
- package/dist/mcp-router/session-stats.d.ts.map +1 -1
- package/dist/mcp-router/session-stats.js +19 -3
- package/dist/mcp-router/session-stats.js.map +1 -1
- package/dist/memory/adaptive-context.d.ts +1 -0
- package/dist/memory/adaptive-context.d.ts.map +1 -1
- package/dist/memory/adaptive-context.js +4 -0
- package/dist/memory/adaptive-context.js.map +1 -1
- package/dist/memory/embeddings.d.ts.map +1 -1
- package/dist/memory/embeddings.js +4 -4
- package/dist/memory/embeddings.js.map +1 -1
- package/dist/memory/model-router.d.ts +1 -1
- package/dist/memory/model-router.d.ts.map +1 -1
- package/dist/memory/model-router.js +52 -1
- package/dist/memory/model-router.js.map +1 -1
- package/dist/memory/predictive-memory.d.ts.map +1 -1
- package/dist/memory/predictive-memory.js +4 -3
- package/dist/memory/predictive-memory.js.map +1 -1
- package/dist/models/analytics.d.ts +93 -0
- package/dist/models/analytics.d.ts.map +1 -0
- package/dist/models/analytics.js +205 -0
- package/dist/models/analytics.js.map +1 -0
- package/dist/models/execution-profiles.d.ts +6 -0
- package/dist/models/execution-profiles.d.ts.map +1 -1
- package/dist/models/execution-profiles.js +15 -0
- package/dist/models/execution-profiles.js.map +1 -1
- package/dist/models/executor.d.ts.map +1 -1
- package/dist/models/executor.js +51 -17
- package/dist/models/executor.js.map +1 -1
- package/dist/models/index.d.ts +2 -0
- package/dist/models/index.d.ts.map +1 -1
- package/dist/models/index.js +2 -0
- package/dist/models/index.js.map +1 -1
- package/dist/models/router.d.ts +8 -0
- package/dist/models/router.d.ts.map +1 -1
- package/dist/models/router.js +39 -18
- package/dist/models/router.js.map +1 -1
- package/dist/models/types.d.ts +26 -0
- package/dist/models/types.d.ts.map +1 -1
- package/dist/models/types.js +39 -0
- package/dist/models/types.js.map +1 -1
- package/dist/models/unified-router.d.ts.map +1 -1
- package/dist/models/unified-router.js +4 -0
- package/dist/models/unified-router.js.map +1 -1
- package/dist/policies/database-manager.d.ts +1 -0
- package/dist/policies/database-manager.d.ts.map +1 -1
- package/dist/policies/database-manager.js +14 -2
- package/dist/policies/database-manager.js.map +1 -1
- package/dist/policies/policy-gate.d.ts +2 -2
- package/dist/policies/policy-gate.d.ts.map +1 -1
- package/dist/policies/policy-gate.js +6 -4
- package/dist/policies/policy-gate.js.map +1 -1
- package/dist/policies/policy-memory.d.ts +3 -0
- package/dist/policies/policy-memory.d.ts.map +1 -1
- package/dist/policies/policy-memory.js +11 -0
- package/dist/policies/policy-memory.js.map +1 -1
- package/dist/policies/schemas/policy.d.ts +3 -0
- package/dist/policies/schemas/policy.d.ts.map +1 -1
- package/dist/policies/schemas/policy.js +1 -0
- package/dist/policies/schemas/policy.js.map +1 -1
- package/dist/tasks/coordination.d.ts +18 -0
- package/dist/tasks/coordination.d.ts.map +1 -1
- package/dist/tasks/coordination.js +59 -1
- package/dist/tasks/coordination.js.map +1 -1
- package/dist/tasks/event-bus.d.ts +91 -0
- package/dist/tasks/event-bus.d.ts.map +1 -0
- package/dist/tasks/event-bus.js +123 -0
- package/dist/tasks/event-bus.js.map +1 -0
- package/dist/tasks/service.d.ts +5 -0
- package/dist/tasks/service.d.ts.map +1 -1
- package/dist/tasks/service.js +59 -0
- package/dist/tasks/service.js.map +1 -1
- package/dist/telemetry/session-telemetry.d.ts.map +1 -1
- package/dist/telemetry/session-telemetry.js +3 -0
- package/dist/telemetry/session-telemetry.js.map +1 -1
- package/dist/utils/concurrency-pool.d.ts +51 -0
- package/dist/utils/concurrency-pool.d.ts.map +1 -0
- package/dist/utils/concurrency-pool.js +80 -0
- package/dist/utils/concurrency-pool.js.map +1 -0
- package/dist/utils/system-resources.d.ts +47 -0
- package/dist/utils/system-resources.d.ts.map +1 -0
- package/dist/utils/system-resources.js +92 -0
- package/dist/utils/system-resources.js.map +1 -0
- package/docs/BENCHMARK_GAPS_AND_PLAN.md +146 -0
- package/docs/PARALLELISM_GAPS_AND_OPTIONS.md +422 -0
- package/docs/UAP_OPTIMIZATION_PLAN.md +638 -0
- package/package.json +4 -1
- package/templates/hooks/session-start.sh +8 -1
|
@@ -0,0 +1,638 @@
|
|
|
1
|
+
# UAP Optimization & Dashboard Overlay Plan (Validated)
|
|
2
|
+
|
|
3
|
+
> Validated against codebase on 2026-03-17. All references point to real files, types, and services.
|
|
4
|
+
|
|
5
|
+
## Validation Summary
|
|
6
|
+
|
|
7
|
+
### What exists today
|
|
8
|
+
|
|
9
|
+
- **Policy system**: Full CRUD + enforcement gate + audit trail (`src/policies/`), SQLite-backed, 3 enforcement levels (REQUIRED/RECOMMENDED/OPTIONAL), `togglePolicy()` already on `PolicyMemoryManager`
|
|
10
|
+
- **Memory system**: 4-tier (L1-L4), 26 files in `src/memory/`, adaptive context, dynamic retrieval, predictive pre-fetch
|
|
11
|
+
- **Model router**: Rule-based (`src/models/router.ts`) + benchmark-data (`src/memory/model-router.ts`) + unified consensus (`src/models/unified-router.ts`), execution profiles per model family (`src/models/execution-profiles.ts`)
|
|
12
|
+
- **Dashboard**: 1830-line terminal dashboard (`src/cli/dashboard.ts`) with 8 views, 424-line viz library (`src/cli/visualize.ts`), session telemetry (`src/telemetry/session-telemetry.ts`)
|
|
13
|
+
- **No web dashboard exists**. All visualization is chalk-based terminal output.
|
|
14
|
+
- **No enforcement stage concept exists** on policies. Policies have `level` (REQUIRED/RECOMMENDED/OPTIONAL) and `isActive` (boolean) but no stage gating.
|
|
15
|
+
|
|
16
|
+
### What the original plan got wrong
|
|
17
|
+
|
|
18
|
+
1. Proposed `ink`/`blessed` TUI -- unnecessary. The existing chalk-based dashboard + visualize.ts primitives already work and are battle-tested. New panels should extend the existing system.
|
|
19
|
+
2. Proposed web React dashboard from scratch -- premature. Option 3 (embedded) is correct: extend the existing `uap dashboard` CLI with new panels first, add a lightweight HTTP/WebSocket server later.
|
|
20
|
+
3. Missed that `PolicyMemoryManager.togglePolicy()` already exists at `src/policies/policy-memory.ts:91`.
|
|
21
|
+
4. Missed that `unified-router.ts` model maps need updating for opus-4.6 and qwen35.
|
|
22
|
+
5. Missed that the session dashboard already shows policies but only as hardcoded bullet items (`dashboard.ts:1305-1317`), not from the database.
|
|
23
|
+
|
|
24
|
+
### Codebase gaps that must be fixed before dashboard work
|
|
25
|
+
|
|
26
|
+
1. `src/models/unified-router.ts:35-49` -- `BENCHMARK_TO_RULE_MODEL_MAP` and `RULE_TO_BENCHMARK_MODEL_MAP` are missing entries for `opus-4.6` and `qwen35`
|
|
27
|
+
2. `src/cli/model.ts:44-59` -- `getMultiModelConfig()` defaults still reference `opus-4.5` as fallback
|
|
28
|
+
3. `src/cli/dashboard.ts:1305-1317` -- Policies section is hardcoded, not reading from `PolicyMemoryManager`
|
|
29
|
+
|
|
30
|
+
---
|
|
31
|
+
|
|
32
|
+
## Part 1: Model Optimization
|
|
33
|
+
|
|
34
|
+
### A. Immediate Fixes (Prerequisite)
|
|
35
|
+
|
|
36
|
+
#### 1.1 Update unified-router model maps
|
|
37
|
+
|
|
38
|
+
**File**: `src/models/unified-router.ts:35-49`
|
|
39
|
+
|
|
40
|
+
```typescript
|
|
41
|
+
const BENCHMARK_TO_RULE_MODEL_MAP: Record<string, string> = {
|
|
42
|
+
'claude-opus-4.5': 'opus-4.5',
|
|
43
|
+
'claude-opus-4.6': 'opus-4.6', // ADD
|
|
44
|
+
'gpt-5.2': 'gpt-5.2',
|
|
45
|
+
'glm-4.7': 'glm-4.7',
|
|
46
|
+
'gpt-5.2-codex': 'gpt-5.2',
|
|
47
|
+
qwen35: 'qwen35', // ADD
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
const RULE_TO_BENCHMARK_MODEL_MAP: Record<string, ModelId> = {
|
|
51
|
+
'opus-4.5': 'claude-opus-4.5',
|
|
52
|
+
'opus-4.6': 'claude-opus-4.6', // ADD
|
|
53
|
+
'gpt-5.2': 'gpt-5.2',
|
|
54
|
+
'glm-4.7': 'glm-4.7',
|
|
55
|
+
'deepseek-v3.2': 'gpt-5.2',
|
|
56
|
+
'deepseek-v3.2-exp': 'gpt-5.2',
|
|
57
|
+
'qwen35-a3b': 'glm-4.7',
|
|
58
|
+
qwen35: 'qwen35', // ADD
|
|
59
|
+
};
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
#### 1.2 Update CLI model defaults
|
|
63
|
+
|
|
64
|
+
**File**: `src/cli/model.ts:44-59`
|
|
65
|
+
|
|
66
|
+
Change `getMultiModelConfig()` fallback to use `ModelRouter.getDefaultUAPConfig()` instead of hardcoded opus-4.5 defaults.
|
|
67
|
+
|
|
68
|
+
#### 1.3 Add benchmark fingerprint for opus-4.6 and qwen35
|
|
69
|
+
|
|
70
|
+
**File**: `src/memory/model-router.ts`
|
|
71
|
+
|
|
72
|
+
Add `MODEL_FINGERPRINTS` entries for `claude-opus-4.6` and `qwen35` so the benchmark-data router can track them.
|
|
73
|
+
|
|
74
|
+
### B. Qwen 3.5 Optimizations
|
|
75
|
+
|
|
76
|
+
#### 2.1 Dynamic quantization switching
|
|
77
|
+
|
|
78
|
+
**Where**: New function in `src/models/execution-profiles.ts`
|
|
79
|
+
|
|
80
|
+
The `SMALL_MOE_PROFILE` already covers qwen3.5 correctly. Enhancement: add a `quantizationHint` field to `ExecutionProfile` so the llama.cpp server can be told which quant to load.
|
|
81
|
+
|
|
82
|
+
```typescript
|
|
83
|
+
// Add to ExecutionProfile interface
|
|
84
|
+
quantizationHint?: {
|
|
85
|
+
low: string; // e.g. 'iq2_xs' for simple tasks
|
|
86
|
+
medium: string; // e.g. 'iq4_xs' for standard tasks
|
|
87
|
+
high: string; // e.g. 'q5_k_m' for complex tasks
|
|
88
|
+
};
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
The router already classifies complexity. Wire the quant hint into the `ModelSelection` result so the agent runner can pass it to the llama.cpp endpoint.
|
|
92
|
+
|
|
93
|
+
#### 2.2 Context window management
|
|
94
|
+
|
|
95
|
+
**Where**: Extend `src/memory/context-compressor.ts` and `src/memory/adaptive-context.ts`
|
|
96
|
+
|
|
97
|
+
These already exist and handle token budgets. Enhancement:
|
|
98
|
+
|
|
99
|
+
- Add a `modelContextBudget` field to `ModelConfig` in `src/models/types.ts` (distinct from `maxContextTokens`) representing the _effective_ context the model handles well
|
|
100
|
+
- For qwen35: `maxContextTokens: 262144` but `modelContextBudget: 32768` (sweet spot for 3B active params)
|
|
101
|
+
- `AdaptiveContext` already selects context level by task type -- wire it to respect `modelContextBudget`
|
|
102
|
+
|
|
103
|
+
#### 2.3 Prompt token budget tracking
|
|
104
|
+
|
|
105
|
+
**Where**: `src/memory/context-compressor.ts` already has `SemanticCompressor` with entropy-aware compression
|
|
106
|
+
|
|
107
|
+
Enhancement: expose a per-session token counter that the dashboard can read. Add to `globalSessionStats` in `src/mcp-router/session-stats.ts`:
|
|
108
|
+
|
|
109
|
+
```typescript
|
|
110
|
+
// Already exists: totalContextBytes, totalRawBytes, savingsRatio
|
|
111
|
+
// Add:
|
|
112
|
+
modelTokenBudget: number; // from modelContextBudget
|
|
113
|
+
modelTokensConsumed: number; // running total
|
|
114
|
+
compressionEvents: number; // how many times compressor fired
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
### C. Multi-Model Routing Enhancements
|
|
118
|
+
|
|
119
|
+
#### 3.1 Complexity-based routing matrix
|
|
120
|
+
|
|
121
|
+
**Where**: `src/models/router.ts` -- `selectAdaptiveModel()` already implements this logic
|
|
122
|
+
|
|
123
|
+
Current behavior (validated):
|
|
124
|
+
|
|
125
|
+
- `critical`/`high` -> planner (opus-4.6)
|
|
126
|
+
- `medium` -> executor (qwen35)
|
|
127
|
+
- `low` -> cheapest model (qwen35, $0/1M)
|
|
128
|
+
|
|
129
|
+
This is correct. No change needed for the matrix itself.
|
|
130
|
+
|
|
131
|
+
Enhancement: add a `routingMatrix` config option to `MultiModelConfig` so users can override per-complexity routing without editing code:
|
|
132
|
+
|
|
133
|
+
```typescript
|
|
134
|
+
// Add to MultiModelConfig in src/models/types.ts
|
|
135
|
+
routingMatrix?: Record<TaskComplexity, { planner: string; executor: string }>;
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
#### 3.2 Performance analytics module
|
|
139
|
+
|
|
140
|
+
**Where**: New file `src/models/analytics.ts`
|
|
141
|
+
|
|
142
|
+
```typescript
|
|
143
|
+
export interface TaskOutcome {
|
|
144
|
+
modelId: string;
|
|
145
|
+
taskType: string;
|
|
146
|
+
complexity: TaskComplexity;
|
|
147
|
+
success: boolean;
|
|
148
|
+
durationMs: number;
|
|
149
|
+
tokensUsed: { input: number; output: number };
|
|
150
|
+
cost: number;
|
|
151
|
+
timestamp: string;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
export class ModelAnalytics {
|
|
155
|
+
private db: Database; // SQLite, same pattern as other DBs
|
|
156
|
+
|
|
157
|
+
recordOutcome(outcome: TaskOutcome): void;
|
|
158
|
+
getSuccessRate(modelId: string, taskType?: string): number;
|
|
159
|
+
getAvgLatency(modelId: string, taskType?: string): number;
|
|
160
|
+
getOptimalRouting(): Record<string, string>; // taskType -> modelId
|
|
161
|
+
getCostBreakdown(since?: Date): CostBreakdown[];
|
|
162
|
+
}
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
This feeds into the dashboard cost tracker panel.
|
|
166
|
+
|
|
167
|
+
---
|
|
168
|
+
|
|
169
|
+
## Part 2: Dashboard -- Policies / Memories / Model Active Panels
|
|
170
|
+
|
|
171
|
+
### A. Architecture Decision
|
|
172
|
+
|
|
173
|
+
**Extend the existing `src/cli/dashboard.ts`** with new panels and a new `uap dashboard policies` view. Do NOT build a separate TUI framework. The existing chalk + visualize.ts primitives cover everything needed.
|
|
174
|
+
|
|
175
|
+
For the web overlay (Phase 3), add a thin HTTP + WebSocket server that serves JSON from the same data sources the CLI dashboard reads. A single-page HTML file (like the existing `web/generator.html` pattern) consumes it.
|
|
176
|
+
|
|
177
|
+
### B. New Dashboard Panels
|
|
178
|
+
|
|
179
|
+
#### Panel 1: Policies Active
|
|
180
|
+
|
|
181
|
+
**CLI command**: `uap dashboard policies`
|
|
182
|
+
|
|
183
|
+
Reads from `PolicyMemoryManager.getAllPolicies()` and `PolicyGate.getAuditTrail()`.
|
|
184
|
+
|
|
185
|
+
```
|
|
186
|
+
UAP Policies Dashboard
|
|
187
|
+
──────────────────────────────────────────────────────
|
|
188
|
+
|
|
189
|
+
Active Policies (3)
|
|
190
|
+
──────────────────────────────────────────────────────
|
|
191
|
+
Name Level Category Stage Status
|
|
192
|
+
─────────────────────────────────────────────────────────────────────
|
|
193
|
+
IaC State Parity REQUIRED code pre-exec ON
|
|
194
|
+
Mandatory File Backup REQUIRED code pre-exec ON
|
|
195
|
+
Image Asset Verification RECOMMENDED image pre-exec ON
|
|
196
|
+
|
|
197
|
+
Enforcement Stages
|
|
198
|
+
──────────────────────────────────────────────────────
|
|
199
|
+
pre-exec ████████████████████ 3 policies
|
|
200
|
+
post-exec ░░░░░░░░░░░░░░░░░░░ 0 policies
|
|
201
|
+
review ░░░░░░░░░░░░░░░░░░░ 0 policies
|
|
202
|
+
|
|
203
|
+
Recent Audit Trail (last 10)
|
|
204
|
+
──────────────────────────────────────────────────────
|
|
205
|
+
2026-03-17 14:23 IaC State Parity web_browser ALLOWED
|
|
206
|
+
2026-03-17 14:22 Mandatory File Backup file_write ALLOWED
|
|
207
|
+
2026-03-17 14:20 IaC State Parity terraform BLOCKED "No state file"
|
|
208
|
+
|
|
209
|
+
Toggle: uap policy toggle <id> --off
|
|
210
|
+
Stage: uap policy stage <id> --stage post-exec
|
|
211
|
+
Level: uap policy level <id> --level OPTIONAL
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
#### Panel 2: Memories Active
|
|
215
|
+
|
|
216
|
+
**CLI command**: `uap dashboard memories`
|
|
217
|
+
|
|
218
|
+
Extends the existing memory section in `showSessionDashboard()` (`dashboard.ts:1217-1247`).
|
|
219
|
+
|
|
220
|
+
```
|
|
221
|
+
UAP Memories Dashboard
|
|
222
|
+
──────────────────────────────────────────────────────
|
|
223
|
+
|
|
224
|
+
Memory Tiers
|
|
225
|
+
──────────────────────────────────────────────────────
|
|
226
|
+
L1 Working ████████░░░░░░░░░░░░ 42/50 entries 12 KB
|
|
227
|
+
L2 Session ██░░░░░░░░░░░░░░░░░░ 8 entries 3 KB
|
|
228
|
+
L3 Semantic Qdrant: Running (Up 4h 23m) 1,247 vectors
|
|
229
|
+
L4 Knowledge 23 entities 47 relationships
|
|
230
|
+
|
|
231
|
+
Active Memories This Session (by type)
|
|
232
|
+
──────────────────────────────────────────────────────
|
|
233
|
+
decision ████████████ 12
|
|
234
|
+
observation ████████ 8
|
|
235
|
+
pattern ██████ 6
|
|
236
|
+
correction ██ 2
|
|
237
|
+
|
|
238
|
+
Open Loops (3)
|
|
239
|
+
──────────────────────────────────────────────────────
|
|
240
|
+
> TODO: wire dashboard WebSocket to session-stats
|
|
241
|
+
> BLOCKED: Qdrant cloud migration pending API key
|
|
242
|
+
> REVIEW: memory consolidation threshold too aggressive
|
|
243
|
+
|
|
244
|
+
Compression Stats
|
|
245
|
+
──────────────────────────────────────────────────────
|
|
246
|
+
Token budget: 32,768 / 262,144 (12.5%)
|
|
247
|
+
Compressions: 4 this session
|
|
248
|
+
Savings ratio: 73.2%
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
#### Panel 3: Model Active Per Task
|
|
252
|
+
|
|
253
|
+
**CLI command**: `uap dashboard models`
|
|
254
|
+
|
|
255
|
+
Reads from `ModelRouter`, `UnifiedRoutingService`, and the new `ModelAnalytics`.
|
|
256
|
+
|
|
257
|
+
```
|
|
258
|
+
UAP Model Dashboard
|
|
259
|
+
──────────────────────────────────────────────────────
|
|
260
|
+
|
|
261
|
+
Active Configuration
|
|
262
|
+
──────────────────────────────────────────────────────
|
|
263
|
+
Planner: opus-4.6 Claude Opus 4.6 $7.50/$37.50 per 1M
|
|
264
|
+
Executor: qwen35 Qwen 3.5 (local) $0.00/$0.00
|
|
265
|
+
Reviewer: opus-4.6 Claude Opus 4.6
|
|
266
|
+
Fallback: qwen35 Qwen 3.5 (local)
|
|
267
|
+
Strategy: balanced
|
|
268
|
+
|
|
269
|
+
Routing Matrix
|
|
270
|
+
──────────────────────────────────────────────────────
|
|
271
|
+
Complexity Planner Executor
|
|
272
|
+
low qwen35 qwen35 $0.00
|
|
273
|
+
medium opus-4.6 qwen35 $0.04
|
|
274
|
+
high opus-4.6 opus-4.6 $0.22
|
|
275
|
+
critical opus-4.6 opus-4.6 $0.22
|
|
276
|
+
|
|
277
|
+
Session Usage
|
|
278
|
+
──────────────────────────────────────────────────────
|
|
279
|
+
Model Tasks Tokens In Tokens Out Cost Success
|
|
280
|
+
opus-4.6 3 4,521 2,103 $0.11 100%
|
|
281
|
+
qwen35 12 18,432 9,876 $0.00 91.7%
|
|
282
|
+
|
|
283
|
+
Execution Profile: small-moe (Qwen 3.5)
|
|
284
|
+
──────────────────────────────────────────────────────
|
|
285
|
+
domainHints: ON webSearch: OFF reflectionCheckpoints: OFF
|
|
286
|
+
temperature: 0.15 loopEscapeThreshold: 3 toolChoiceForce: required
|
|
287
|
+
softBudget: 35 hardBudget: 50
|
|
288
|
+
|
|
289
|
+
Unified Router Consensus
|
|
290
|
+
──────────────────────────────────────────────────────
|
|
291
|
+
Last 10 decisions: 8 consensus, 1 rule-based, 1 benchmark-data
|
|
292
|
+
Avg confidence: 0.82
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
### C. Policy Enforcement Stages & Toggling
|
|
296
|
+
|
|
297
|
+
#### Schema Changes
|
|
298
|
+
|
|
299
|
+
**File**: `src/policies/schemas/policy.ts`
|
|
300
|
+
|
|
301
|
+
Add `enforcementStage` to the policy schema:
|
|
302
|
+
|
|
303
|
+
```typescript
|
|
304
|
+
export const PolicySchema = z.object({
|
|
305
|
+
id: z.string().uuid(),
|
|
306
|
+
name: z.string(),
|
|
307
|
+
category: z.enum(['image', 'code', 'security', 'testing', 'ui', 'automation', 'custom']),
|
|
308
|
+
level: z.enum(['REQUIRED', 'RECOMMENDED', 'OPTIONAL']),
|
|
309
|
+
enforcementStage: z.enum(['pre-exec', 'post-exec', 'review', 'always']).default('pre-exec'), // NEW
|
|
310
|
+
rawMarkdown: z.string(),
|
|
311
|
+
convertedFormat: z.string().optional(),
|
|
312
|
+
executableTools: z.array(z.string()).optional(),
|
|
313
|
+
tags: z.array(z.string()),
|
|
314
|
+
createdAt: z
|
|
315
|
+
.string()
|
|
316
|
+
.refine((d) => !Number.isNaN(Date.parse(d)), { message: 'Invalid ISO date string' }),
|
|
317
|
+
updatedAt: z
|
|
318
|
+
.string()
|
|
319
|
+
.refine((d) => !Number.isNaN(Date.parse(d)), { message: 'Invalid ISO date string' }),
|
|
320
|
+
version: z.number(),
|
|
321
|
+
isActive: z.boolean(),
|
|
322
|
+
priority: z.number().default(50),
|
|
323
|
+
});
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
**File**: `src/policies/database-manager.ts`
|
|
327
|
+
|
|
328
|
+
Add column to `policies` table:
|
|
329
|
+
|
|
330
|
+
```sql
|
|
331
|
+
ALTER TABLE policies ADD COLUMN enforcementStage TEXT NOT NULL DEFAULT 'pre-exec';
|
|
332
|
+
```
|
|
333
|
+
|
|
334
|
+
Use a migration check pattern (check if column exists before adding).
|
|
335
|
+
|
|
336
|
+
#### PolicyGate Changes
|
|
337
|
+
|
|
338
|
+
**File**: `src/policies/policy-gate.ts`
|
|
339
|
+
|
|
340
|
+
Add stage-aware enforcement:
|
|
341
|
+
|
|
342
|
+
```typescript
|
|
343
|
+
async executeWithGates<T>(
|
|
344
|
+
operation: string,
|
|
345
|
+
args: Record<string, unknown>,
|
|
346
|
+
executor: () => Promise<T>,
|
|
347
|
+
stage: 'pre-exec' | 'post-exec' | 'review' = 'pre-exec' // NEW param
|
|
348
|
+
): Promise<T> {
|
|
349
|
+
const gateResult = await this.checkPolicies(operation, args, stage);
|
|
350
|
+
// ... existing logic, but only check policies matching this stage
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
async checkPolicies(
|
|
354
|
+
operation: string,
|
|
355
|
+
args: Record<string, unknown>,
|
|
356
|
+
stage: 'pre-exec' | 'post-exec' | 'review' | 'always' = 'pre-exec'
|
|
357
|
+
): Promise<GateResult> {
|
|
358
|
+
const allPolicies = await this.memory.getAllPolicies();
|
|
359
|
+
// Filter to policies matching this stage or 'always'
|
|
360
|
+
const stagePolicies = allPolicies.filter(
|
|
361
|
+
p => p.enforcementStage === stage || p.enforcementStage === 'always'
|
|
362
|
+
);
|
|
363
|
+
// ... evaluate only stagePolicies
|
|
364
|
+
}
|
|
365
|
+
```
|
|
366
|
+
|
|
367
|
+
#### CLI Commands for Toggling
|
|
368
|
+
|
|
369
|
+
**File**: `src/bin/policy.ts` (extend existing)
|
|
370
|
+
|
|
371
|
+
```
|
|
372
|
+
uap policy toggle <id> [--on|--off] # Uses existing PolicyMemoryManager.togglePolicy()
|
|
373
|
+
uap policy stage <id> --stage <stage> # New: change enforcement stage
|
|
374
|
+
uap policy level <id> --level <level> # New: change REQUIRED/RECOMMENDED/OPTIONAL
|
|
375
|
+
uap policy list # New: list all with status/stage/level
|
|
376
|
+
uap policy audit [--policy-id <id>] # New: show audit trail
|
|
377
|
+
```
|
|
378
|
+
|
|
379
|
+
Implementation: `togglePolicy()` already exists. Add `setEnforcementStage()` and `setLevel()` to `PolicyMemoryManager`:
|
|
380
|
+
|
|
381
|
+
```typescript
|
|
382
|
+
// src/policies/policy-memory.ts
|
|
383
|
+
async setEnforcementStage(id: string, stage: 'pre-exec' | 'post-exec' | 'review' | 'always'): Promise<void> {
|
|
384
|
+
this.db.updatePolicy({ id }, { enforcementStage: stage, updatedAt: new Date().toISOString() });
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
async setLevel(id: string, level: 'REQUIRED' | 'RECOMMENDED' | 'OPTIONAL'): Promise<void> {
|
|
388
|
+
this.db.updatePolicy({ id }, { level, updatedAt: new Date().toISOString() });
|
|
389
|
+
}
|
|
390
|
+
```
|
|
391
|
+
|
|
392
|
+
### D. Grouping: Per-Task vs Grouped Display
|
|
393
|
+
|
|
394
|
+
The dashboard supports both views:
|
|
395
|
+
|
|
396
|
+
1. **Grouped view** (default for `uap dashboard policies`, `uap dashboard models`): Shows aggregate state -- all active policies, all model assignments, memory tier health.
|
|
397
|
+
|
|
398
|
+
2. **Per-task view** (when a task ID is provided): Shows what was active _for that specific task_.
|
|
399
|
+
|
|
400
|
+
```
|
|
401
|
+
uap dashboard policies # Grouped: all policies, stages, audit
|
|
402
|
+
uap dashboard policies --task <task-id> # Per-task: which policies fired for this task
|
|
403
|
+
uap dashboard models # Grouped: all model assignments, session totals
|
|
404
|
+
uap dashboard models --task <task-id> # Per-task: which model handled this task, tokens, cost
|
|
405
|
+
uap dashboard memories --task <task-id> # Per-task: memories retrieved/stored for this task
|
|
406
|
+
```
|
|
407
|
+
|
|
408
|
+
Per-task view requires linking `policy_executions` and `ModelAnalytics.TaskOutcome` to task IDs. Add a `taskId` column to both:
|
|
409
|
+
|
|
410
|
+
- `policy_executions` table: `taskId TEXT` (nullable, for backward compat)
|
|
411
|
+
- `ModelAnalytics` outcomes table: `taskId TEXT`
|
|
412
|
+
|
|
413
|
+
---
|
|
414
|
+
|
|
415
|
+
## Part 3: Phase 3 -- Advanced Features
|
|
416
|
+
|
|
417
|
+
### A. Web Overlay (Option 3: Embedded)
|
|
418
|
+
|
|
419
|
+
Architecture: The CLI dashboard functions already compute all the data. Extract the data-gathering logic into shared service functions, then expose via a lightweight HTTP server.
|
|
420
|
+
|
|
421
|
+
#### 3.1 Data service layer
|
|
422
|
+
|
|
423
|
+
**New file**: `src/dashboard/data-service.ts`
|
|
424
|
+
|
|
425
|
+
```typescript
|
|
426
|
+
export interface DashboardData {
|
|
427
|
+
policies: PolicyDashboardData;
|
|
428
|
+
memories: MemoryDashboardData;
|
|
429
|
+
models: ModelDashboardData;
|
|
430
|
+
tasks: TaskDashboardData;
|
|
431
|
+
coordination: CoordinationDashboardData;
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
export async function getDashboardData(): Promise<DashboardData> {
|
|
435
|
+
// Reuse the same DB queries from dashboard.ts but return structured data
|
|
436
|
+
// instead of printing to console
|
|
437
|
+
}
|
|
438
|
+
```
|
|
439
|
+
|
|
440
|
+
#### 3.2 Embedded HTTP + WebSocket server
|
|
441
|
+
|
|
442
|
+
**New file**: `src/dashboard/server.ts`
|
|
443
|
+
|
|
444
|
+
```typescript
|
|
445
|
+
import { createServer } from 'http';
|
|
446
|
+
import { WebSocketServer } from 'ws'; // ws package, already common in Node ecosystem
|
|
447
|
+
import { getDashboardData } from './data-service.js';
|
|
448
|
+
|
|
449
|
+
export function startDashboardServer(port: number = 3847): void {
|
|
450
|
+
const server = createServer(async (req, res) => {
|
|
451
|
+
if (req.url === '/api/dashboard') {
|
|
452
|
+
const data = await getDashboardData();
|
|
453
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
454
|
+
res.end(JSON.stringify(data));
|
|
455
|
+
}
|
|
456
|
+
if (req.url === '/') {
|
|
457
|
+
// Serve the single-page dashboard HTML (like web/generator.html pattern)
|
|
458
|
+
res.writeHead(200, { 'Content-Type': 'text/html' });
|
|
459
|
+
res.end(DASHBOARD_HTML); // Inline or read from file
|
|
460
|
+
}
|
|
461
|
+
});
|
|
462
|
+
|
|
463
|
+
const wss = new WebSocketServer({ server });
|
|
464
|
+
// Push updates every 2s
|
|
465
|
+
setInterval(async () => {
|
|
466
|
+
const data = await getDashboardData();
|
|
467
|
+
for (const client of wss.clients) {
|
|
468
|
+
client.send(JSON.stringify(data));
|
|
469
|
+
}
|
|
470
|
+
}, 2000);
|
|
471
|
+
|
|
472
|
+
server.listen(port);
|
|
473
|
+
}
|
|
474
|
+
```
|
|
475
|
+
|
|
476
|
+
#### 3.3 CLI integration
|
|
477
|
+
|
|
478
|
+
**File**: `src/cli/dashboard.ts`
|
|
479
|
+
|
|
480
|
+
```
|
|
481
|
+
uap dashboard serve [--port 3847] # Start embedded web dashboard
|
|
482
|
+
```
|
|
483
|
+
|
|
484
|
+
Launches as foreground process. When opencode/claude-code exits, the server dies with it (child process of the same shell).
|
|
485
|
+
|
|
486
|
+
#### 3.4 Single-page HTML dashboard
|
|
487
|
+
|
|
488
|
+
**New file**: `web/dashboard.html`
|
|
489
|
+
|
|
490
|
+
Self-contained HTML + CSS + vanilla JS (no build step, same pattern as `web/generator.html`). Connects to `ws://localhost:3847`, renders:
|
|
491
|
+
|
|
492
|
+
- Policy table with toggle buttons (POST to `/api/policy/:id/toggle`)
|
|
493
|
+
- Memory tier gauges
|
|
494
|
+
- Model routing live view
|
|
495
|
+
- Cost tracker
|
|
496
|
+
- Task timeline
|
|
497
|
+
|
|
498
|
+
### B. Historical session comparison
|
|
499
|
+
|
|
500
|
+
Store session snapshots in SQLite (`agents/data/memory/sessions.db`):
|
|
501
|
+
|
|
502
|
+
```sql
|
|
503
|
+
CREATE TABLE session_snapshots (
|
|
504
|
+
id TEXT PRIMARY KEY,
|
|
505
|
+
timestamp TEXT NOT NULL,
|
|
506
|
+
data TEXT NOT NULL, -- JSON blob from getDashboardData()
|
|
507
|
+
duration_ms INTEGER,
|
|
508
|
+
total_cost REAL,
|
|
509
|
+
tasks_completed INTEGER,
|
|
510
|
+
models_used TEXT -- JSON array
|
|
511
|
+
);
|
|
512
|
+
```
|
|
513
|
+
|
|
514
|
+
CLI: `uap dashboard history [--last 10]`
|
|
515
|
+
|
|
516
|
+
### C. Export
|
|
517
|
+
|
|
518
|
+
`uap dashboard export [--format json|csv] [--output file]`
|
|
519
|
+
|
|
520
|
+
Dumps current dashboard data. JSON is the `DashboardData` object. CSV flattens the key tables (policies, model usage, task outcomes).
|
|
521
|
+
|
|
522
|
+
---
|
|
523
|
+
|
|
524
|
+
## Part 4: Implementation Roadmap
|
|
525
|
+
|
|
526
|
+
### Phase 1: Foundation (Week 1-2)
|
|
527
|
+
|
|
528
|
+
| # | Task | File(s) | Status |
|
|
529
|
+
| --- | ---------------------------------------------------------------- | -------------------------------------------------------------------- | ------ |
|
|
530
|
+
| 1 | Update unified-router model maps for opus-4.6/qwen35 | `src/models/unified-router.ts` | Ready |
|
|
531
|
+
| 2 | Update CLI model defaults | `src/cli/model.ts` | Ready |
|
|
532
|
+
| 3 | Add benchmark fingerprints for new models | `src/memory/model-router.ts` | Ready |
|
|
533
|
+
| 4 | Add `enforcementStage` to policy schema + DB migration | `src/policies/schemas/policy.ts`, `src/policies/database-manager.ts` | Ready |
|
|
534
|
+
| 5 | Add `setEnforcementStage()`, `setLevel()` to PolicyMemoryManager | `src/policies/policy-memory.ts` | Ready |
|
|
535
|
+
| 6 | Add stage-aware filtering to PolicyGate | `src/policies/policy-gate.ts` | Ready |
|
|
536
|
+
| 7 | Add policy CLI commands (toggle/stage/level/list/audit) | `src/bin/policy.ts` | Ready |
|
|
537
|
+
|
|
538
|
+
### Phase 2: Dashboard Panels (Week 3-4)
|
|
539
|
+
|
|
540
|
+
| # | Task | File(s) | Status |
|
|
541
|
+
| --- | ------------------------------------------------------- | ------------------------------------------------ | ------ |
|
|
542
|
+
| 8 | Replace hardcoded policies section with DB-driven panel | `src/cli/dashboard.ts:1305-1317` | Ready |
|
|
543
|
+
| 9 | Build `showPoliciesDashboard()` panel | `src/cli/dashboard.ts` | Ready |
|
|
544
|
+
| 10 | Build `showModelsDashboard()` panel | `src/cli/dashboard.ts` | Ready |
|
|
545
|
+
| 11 | Extend `showMemoryDashboard()` with compression stats | `src/cli/dashboard.ts` | Ready |
|
|
546
|
+
| 12 | Add `--task <id>` per-task filtering to all panels | `src/cli/dashboard.ts` | Ready |
|
|
547
|
+
| 13 | Create `ModelAnalytics` module | `src/models/analytics.ts` | Ready |
|
|
548
|
+
| 14 | Wire `ModelAnalytics` into router + executor | `src/models/router.ts`, `src/models/executor.ts` | Ready |
|
|
549
|
+
|
|
550
|
+
### Phase 3: Web Overlay + Advanced (Week 5-6)
|
|
551
|
+
|
|
552
|
+
| # | Task | File(s) | Status |
|
|
553
|
+
| --- | -------------------------------------------- | ------------------------------- | ------ |
|
|
554
|
+
| 15 | Extract data-service layer from dashboard.ts | `src/dashboard/data-service.ts` | Ready |
|
|
555
|
+
| 16 | Build embedded HTTP + WebSocket server | `src/dashboard/server.ts` | Ready |
|
|
556
|
+
| 17 | Build single-page HTML dashboard | `web/dashboard.html` | Ready |
|
|
557
|
+
| 18 | Add `uap dashboard serve` command | `src/cli/dashboard.ts` | Ready |
|
|
558
|
+
| 19 | Add policy toggle/stage/level API endpoints | `src/dashboard/server.ts` | Ready |
|
|
559
|
+
| 20 | Session snapshot storage + history view | `src/dashboard/data-service.ts` | Ready |
|
|
560
|
+
| 21 | Export command (JSON/CSV) | `src/cli/dashboard.ts` | Ready |
|
|
561
|
+
|
|
562
|
+
### Phase 4: Model Optimization (Week 7-8)
|
|
563
|
+
|
|
564
|
+
| # | Task | File(s) | Status |
|
|
565
|
+
| --- | --------------------------------------------------- | --------------------------------------------- | ------ |
|
|
566
|
+
| 22 | Add `quantizationHint` to ExecutionProfile | `src/models/execution-profiles.ts` | Ready |
|
|
567
|
+
| 23 | Add `modelContextBudget` to ModelConfig | `src/models/types.ts` | Ready |
|
|
568
|
+
| 24 | Wire adaptive context to respect modelContextBudget | `src/memory/adaptive-context.ts` | Ready |
|
|
569
|
+
| 25 | Add token counter to globalSessionStats | `src/mcp-router/session-stats.ts` | Ready |
|
|
570
|
+
| 26 | Add `routingMatrix` config option | `src/models/types.ts`, `src/models/router.ts` | Ready |
|
|
571
|
+
| 27 | Training data collection script | `scripts/collect-training-data.py` | Ready |
|
|
572
|
+
|
|
573
|
+
---
|
|
574
|
+
|
|
575
|
+
## Part 5: Dependency Graph
|
|
576
|
+
|
|
577
|
+
```
|
|
578
|
+
Phase 1 (foundation)
|
|
579
|
+
[1,2,3] unified-router + CLI + fingerprints (parallel, no deps)
|
|
580
|
+
[4,5,6] policy schema + memory + gate (sequential: 4 -> 5 -> 6)
|
|
581
|
+
[7] policy CLI commands (depends on 5,6)
|
|
582
|
+
|
|
583
|
+
Phase 2 (dashboard panels)
|
|
584
|
+
[8,9] policies dashboard (depends on 5,6)
|
|
585
|
+
[10] models dashboard (depends on 1,2,3)
|
|
586
|
+
[11] memory dashboard (no deps)
|
|
587
|
+
[12] per-task filtering (depends on 13)
|
|
588
|
+
[13,14] ModelAnalytics (depends on 1)
|
|
589
|
+
|
|
590
|
+
Phase 3 (web overlay)
|
|
591
|
+
[15] data-service extraction (depends on 8,9,10,11)
|
|
592
|
+
[16,17,18] server + HTML + CLI (depends on 15)
|
|
593
|
+
[19] policy API endpoints (depends on 16, 5,6)
|
|
594
|
+
[20,21] history + export (depends on 15)
|
|
595
|
+
|
|
596
|
+
Phase 4 (model optimization)
|
|
597
|
+
[22-27] all independent of Phase 3, can run in parallel with Phase 2-3
|
|
598
|
+
```
|
|
599
|
+
|
|
600
|
+
---
|
|
601
|
+
|
|
602
|
+
## Part 6: Risk Assessment
|
|
603
|
+
|
|
604
|
+
| Risk | Impact | Mitigation |
|
|
605
|
+
| ------------------------------------------------------- | ------ | ------------------------------------------------------------------------------------ |
|
|
606
|
+
| Policy DB migration breaks existing data | High | Use `ALTER TABLE ADD COLUMN ... DEFAULT` -- backward compatible |
|
|
607
|
+
| WebSocket server port conflicts | Low | Configurable port, default 3847 (unlikely to conflict) |
|
|
608
|
+
| Dashboard overhead slows agent execution | Medium | Data-service reads are read-only SQLite queries (<5ms each), WebSocket push is async |
|
|
609
|
+
| Qwen 3.5 quantization switching requires server restart | Medium | Document as limitation; future: llama.cpp hot-swap support |
|
|
610
|
+
| Unified router model map drift | Low | Add test that validates all ModelPresets have map entries |
|
|
611
|
+
|
|
612
|
+
---
|
|
613
|
+
|
|
614
|
+
## Part 7: Test Strategy
|
|
615
|
+
|
|
616
|
+
### Unit tests needed
|
|
617
|
+
|
|
618
|
+
```
|
|
619
|
+
test/policies/enforcement-stage.test.ts -- stage filtering in PolicyGate
|
|
620
|
+
test/policies/policy-toggle.test.ts -- toggle/level/stage mutations
|
|
621
|
+
test/models/unified-router-maps.test.ts -- all presets have map entries
|
|
622
|
+
test/models/analytics.test.ts -- outcome recording + queries
|
|
623
|
+
test/dashboard/data-service.test.ts -- structured data output
|
|
624
|
+
```
|
|
625
|
+
|
|
626
|
+
### Integration tests
|
|
627
|
+
|
|
628
|
+
```
|
|
629
|
+
test/dashboard/policies-panel.test.ts -- end-to-end: store policy -> toggle -> verify dashboard output
|
|
630
|
+
test/dashboard/models-panel.test.ts -- route task -> verify model usage in dashboard
|
|
631
|
+
test/dashboard/web-server.test.ts -- HTTP + WebSocket connectivity
|
|
632
|
+
```
|
|
633
|
+
|
|
634
|
+
### Validation command
|
|
635
|
+
|
|
636
|
+
```bash
|
|
637
|
+
npm test -- --grep "enforcement-stage|unified-router-maps|analytics"
|
|
638
|
+
```
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@miller-tech/uap",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.2.0",
|
|
4
4
|
"description": "Autonomous AI agent memory system with CLAUDE.md protocol enforcement",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -66,6 +66,7 @@
|
|
|
66
66
|
"node": ">=18.0.0"
|
|
67
67
|
},
|
|
68
68
|
"dependencies": {
|
|
69
|
+
"@miller-tech/uap": "^1.0.0",
|
|
69
70
|
"@octokit/rest": "^20.0.2",
|
|
70
71
|
"@qdrant/js-client-rest": "^1.11.0",
|
|
71
72
|
"better-sqlite3": "^11.0.0",
|
|
@@ -81,6 +82,7 @@
|
|
|
81
82
|
"ora": "^8.0.1",
|
|
82
83
|
"playwright-core": "^1.58.2",
|
|
83
84
|
"simple-git": "^3.22.0",
|
|
85
|
+
"ws": "^8.19.0",
|
|
84
86
|
"zod": "^3.23.8"
|
|
85
87
|
},
|
|
86
88
|
"devDependencies": {
|
|
@@ -89,6 +91,7 @@
|
|
|
89
91
|
"@types/inquirer": "^9.0.7",
|
|
90
92
|
"@types/js-yaml": "^4.0.9",
|
|
91
93
|
"@types/node": "^20.11.0",
|
|
94
|
+
"@types/ws": "^8.18.1",
|
|
92
95
|
"@typescript-eslint/eslint-plugin": "^6.19.0",
|
|
93
96
|
"@typescript-eslint/parser": "^6.19.0",
|
|
94
97
|
"@vitest/coverage-v8": "^1.6.1",
|