@miller-tech/uap 1.40.0 → 1.40.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +109 -642
- package/docs/INDEX.md +48 -286
- package/docs/architecture/OVERVIEW.md +328 -0
- package/docs/architecture/PROTOCOL.md +204 -0
- package/docs/benchmarks/README.md +17 -192
- package/docs/getting-started/CONFIGURATION.md +237 -0
- package/docs/getting-started/INSTALLATION.md +125 -0
- package/docs/getting-started/QUICKSTART.md +115 -0
- package/docs/guides/COORDINATION.md +162 -0
- package/docs/guides/DELIVER.md +115 -0
- package/docs/guides/DEPLOY_BATCHING.md +212 -0
- package/docs/guides/DROIDS_AND_SKILLS.md +202 -0
- package/docs/guides/LOCAL_MODELS.md +148 -0
- package/docs/guides/MCP_ROUTER.md +195 -0
- package/docs/guides/MEMORY.md +235 -0
- package/docs/guides/MULTI_MODEL.md +223 -0
- package/docs/guides/POLICIES.md +190 -0
- package/docs/guides/WORKTREE_WORKFLOW.md +185 -0
- package/docs/integrations/MCP_ROUTER.md +147 -0
- package/docs/integrations/RTK.md +102 -0
- package/docs/reference/API.md +485 -0
- package/docs/reference/CLI.md +719 -0
- package/docs/reference/CONFIGURATION.md +90 -193
- package/docs/reference/DATABASE_SCHEMA.md +110 -344
- package/docs/reference/FEATURES.md +176 -472
- package/docs/reference/PATTERNS.md +102 -0
- package/docs/reference/PLATFORMS.md +83 -0
- package/package.json +1 -1
- package/docs/AGENTS.md +0 -423
- package/docs/DOCUMENTATION_AUDIT_REPORT.md +0 -131
- package/docs/GETTING_STARTED.md +0 -288
- package/docs/PROJECT_ANALYSIS_REPORT.md +0 -510
- package/docs/architecture/COMPLETE_ARCHITECTURE.md +0 -748
- package/docs/architecture/EXPERT_STACK.md +0 -137
- package/docs/architecture/MULTI_MODEL.md +0 -224
- package/docs/architecture/PLATFORM_GATING.md +0 -68
- package/docs/architecture/SYSTEM_ANALYSIS.md +0 -334
- package/docs/architecture/UAP_COMPLIANCE.md +0 -217
- package/docs/architecture/UAP_PROTOCOL.md +0 -339
- package/docs/architecture/UAP_STRICT_DROIDS.md +0 -172
- package/docs/archive/BALLS_MODE_SELF_ANALYSIS.md +0 -260
- package/docs/archive/BENCHMARK_GAPS_AND_PLAN.md +0 -146
- package/docs/archive/FAILING_TASKS_SOLUTION_PLAN.md +0 -668
- package/docs/archive/JINJA2-SYSTEM-MESSAGE-FIX.md +0 -209
- package/docs/archive/MODEL_ROUTING_IMPLEMENTATION_SUMMARY.md +0 -281
- package/docs/archive/MODEL_ROUTING_OPTIMIZATION_PLAN.md +0 -320
- package/docs/archive/NPM-PUBLISH-V0.9.1.md +0 -240
- package/docs/archive/OPTIMIZATION_OPTIONS.md +0 -334
- package/docs/archive/PARALLELISM_GAPS_AND_OPTIONS.md +0 -422
- package/docs/archive/POLICY_GATE_IMPLEMENTATION.md +0 -245
- package/docs/archive/SETUP_IMPROVEMENTS.md +0 -213
- package/docs/archive/UAP_GENERIC_OPTIMIZATION_PLAN.md +0 -270
- package/docs/archive/UAP_OPTIMIZATION_PLAN.md +0 -701
- package/docs/archive/UAP_V103_PATTERN_DESIGN.md +0 -315
- package/docs/archive/UAP_V104_COMPLIANCE_DESIGN.md +0 -223
- package/docs/archive/changelog/2026-03-10_uap-100-compliance.md +0 -77
- package/docs/archive/changelog/2026-03-10_uap-full-system-verification.md +0 -109
- package/docs/archive/opencode-integration-guide.md +0 -740
- package/docs/archive/opencode-integration-quickref.md +0 -180
- package/docs/benchmarks/OVERNIGHT_RUNNER.md +0 -341
- package/docs/benchmarks/SPECULATIVE_DECODING_JOURNEY_2026-03.md +0 -221
- package/docs/benchmarks/VALIDATION_PLAN.md +0 -568
- package/docs/blog/SPECULATIVE_DECODING_PRODUCTION_PLAYBOOK.md +0 -139
- package/docs/blog/local-coding-agents.md +0 -266
- package/docs/blog/x-thread.md +0 -254
- package/docs/deployment/DEPLOYMENT.md +0 -895
- package/docs/deployment/DEPLOYMENT_STRATEGIES.md +0 -518
- package/docs/deployment/DEPLOY_BATCHER_ANALYSIS.md +0 -224
- package/docs/deployment/DEPLOY_BATCHING.md +0 -273
- package/docs/deployment/DEPLOY_BUCKETING_ANALYSIS.md +0 -420
- package/docs/deployment/QWEN35_LLAMA_CPP.md +0 -426
- package/docs/deployment/UAP_LLAMA_ANTHROPIC_PROXY_BOOTSTRAP.md +0 -279
- package/docs/getting-started/INTEGRATION.md +0 -628
- package/docs/getting-started/OVERVIEW.md +0 -324
- package/docs/getting-started/SETUP.md +0 -377
- package/docs/integrations/MCP_ROUTER_SETUP.md +0 -445
- package/docs/integrations/RTK_INTEGRATION.md +0 -468
- package/docs/operations/TROUBLESHOOTING.md +0 -660
- package/docs/pr/PR_SPECULATIVE_DOCS_TEMPLATE.md +0 -146
- package/docs/pr/UPSTREAM_PRS.md +0 -424
- package/docs/reference/API_REFERENCE.md +0 -903
- package/docs/reference/EXPERT_DROIDS.md +0 -219
- package/docs/reference/HARNESS-MATRIX.md +0 -318
- package/docs/reference/PATTERN_LIBRARY.md +0 -636
- package/docs/reference/UAP_CLI_REFERENCE.md +0 -620
- package/docs/research/BEHAVIORAL_PATTERNS.md +0 -228
- package/docs/research/DOMAIN_STRATEGIES.md +0 -316
- package/docs/research/MEMORY_SYSTEMS_COMPARISON.md +0 -812
- package/docs/research/PATTERN_ANALYSIS_2026-01-18.md +0 -436
- package/docs/research/PERFORMANCE_ANALYSIS_2026-01-18.md +0 -209
- package/docs/research/PERFORMANCE_TEST_PLAN.md +0 -383
- package/docs/research/TERMINAL_BENCH_LEARNINGS.md +0 -217
|
@@ -1,180 +0,0 @@
|
|
|
1
|
-
# OpenCode Integration Quick Reference
|
|
2
|
-
|
|
3
|
-
## File Structure
|
|
4
|
-
|
|
5
|
-
```
|
|
6
|
-
.project/
|
|
7
|
-
├── .opencode/
|
|
8
|
-
│ ├── plugin/
|
|
9
|
-
│ │ ├── your-plugin.ts # Your custom plugin
|
|
10
|
-
│ │ └── index.ts # Optional: aggregate exports
|
|
11
|
-
│ └── package.json # Dependencies (add @opencode-ai/plugin)
|
|
12
|
-
└── opencode.json # OpenCode configuration
|
|
13
|
-
```
|
|
14
|
-
|
|
15
|
-
## Plugin Template
|
|
16
|
-
|
|
17
|
-
```typescript
|
|
18
|
-
import type { Plugin } from '@opencode-ai/plugin';
|
|
19
|
-
import { tool } from '@opencode-ai/plugin';
|
|
20
|
-
|
|
21
|
-
export const MyPlugin: Plugin = async ({ $, directory }) => {
|
|
22
|
-
return {
|
|
23
|
-
// Define tools
|
|
24
|
-
tool: {
|
|
25
|
-
my_tool: tool({
|
|
26
|
-
description: 'What this tool does',
|
|
27
|
-
args: {
|
|
28
|
-
param: tool.schema.string().describe('Parameter'),
|
|
29
|
-
},
|
|
30
|
-
async execute({ param }) {
|
|
31
|
-
const result = await $`command ${param}`;
|
|
32
|
-
return result.stdout.toString();
|
|
33
|
-
},
|
|
34
|
-
}),
|
|
35
|
-
},
|
|
36
|
-
|
|
37
|
-
// Optional: Event hooks
|
|
38
|
-
event: async ({ event }) => {
|
|
39
|
-
if (event.type === 'session.created') {
|
|
40
|
-
console.log('Session started');
|
|
41
|
-
}
|
|
42
|
-
},
|
|
43
|
-
};
|
|
44
|
-
};
|
|
45
|
-
```
|
|
46
|
-
|
|
47
|
-
## Available Hooks
|
|
48
|
-
|
|
49
|
-
| Hook | Purpose | Example |
|
|
50
|
-
| ------------------------------------ | -------------------------- | ------------------------------ |
|
|
51
|
-
| `tool` | Define new tools | Custom commands for LLM |
|
|
52
|
-
| `event.session.created` | Session initialization | Load context, initialize state |
|
|
53
|
-
| `event.session.compacting` | Before context compression | Preserve important data |
|
|
54
|
-
| `tool.execute.before` | Before tool runs | Validate args, log activity |
|
|
55
|
-
| `tool.execute.after` | After tool completes | Record results, update state |
|
|
56
|
-
| `tool.definition` | Modify tool descriptions | Add policy constraints |
|
|
57
|
-
| `experimental.chat.system.transform` | Inject system context | RAG retrieval, dynamic context |
|
|
58
|
-
| `middleware` | Transform messages | Pre/post processing |
|
|
59
|
-
|
|
60
|
-
## Tool Schema Types
|
|
61
|
-
|
|
62
|
-
```typescript
|
|
63
|
-
// String
|
|
64
|
-
tool.schema.string().describe('Text parameter');
|
|
65
|
-
|
|
66
|
-
// Number with constraints
|
|
67
|
-
tool.schema.number().min(0).max(100).default(50);
|
|
68
|
-
|
|
69
|
-
// Enum
|
|
70
|
-
tool.schema.enum(['read', 'write', 'execute']).default('read');
|
|
71
|
-
|
|
72
|
-
// Array
|
|
73
|
-
tool.schema.array().of(tool.schema.string());
|
|
74
|
-
|
|
75
|
-
// Optional
|
|
76
|
-
tool.schema.string().optional();
|
|
77
|
-
```
|
|
78
|
-
|
|
79
|
-
## Common Patterns
|
|
80
|
-
|
|
81
|
-
### 1. CLI Wrapper
|
|
82
|
-
|
|
83
|
-
```typescript
|
|
84
|
-
tool({
|
|
85
|
-
description: 'Run external command',
|
|
86
|
-
args: { cmd: tool.schema.string() },
|
|
87
|
-
async execute({ cmd }) {
|
|
88
|
-
return (await $`${cmd}`.quiet()).stdout.toString();
|
|
89
|
-
},
|
|
90
|
-
});
|
|
91
|
-
```
|
|
92
|
-
|
|
93
|
-
### 2. File Operations
|
|
94
|
-
|
|
95
|
-
```typescript
|
|
96
|
-
import { readFile, writeFile } from 'fs/promises';
|
|
97
|
-
|
|
98
|
-
tool({
|
|
99
|
-
description: 'Read project file',
|
|
100
|
-
args: { path: tool.schema.string() },
|
|
101
|
-
async execute({ path }) {
|
|
102
|
-
return await readFile(join(projectDir, path), 'utf-8');
|
|
103
|
-
},
|
|
104
|
-
});
|
|
105
|
-
```
|
|
106
|
-
|
|
107
|
-
### 3. Memory Query
|
|
108
|
-
|
|
109
|
-
```typescript
|
|
110
|
-
tool({
|
|
111
|
-
description: 'Query persistent memory',
|
|
112
|
-
args: { query: tool.schema.string() },
|
|
113
|
-
async execute({ query }) {
|
|
114
|
-
const result = await $`python3 ./scripts/query.py "${query}"`;
|
|
115
|
-
return result.stdout.toString().trim();
|
|
116
|
-
},
|
|
117
|
-
});
|
|
118
|
-
```
|
|
119
|
-
|
|
120
|
-
### 4. Context Injection (RAG)
|
|
121
|
-
|
|
122
|
-
```typescript
|
|
123
|
-
middleware: async (input, next) => {
|
|
124
|
-
const lastMsg = input.messages?.[input.messages.length - 1];
|
|
125
|
-
if (lastMsg?.role === 'user') {
|
|
126
|
-
const context = await queryRAG(lastMsg.content);
|
|
127
|
-
input.messages.push({ role: 'system', content: `<context>${context}</context>` });
|
|
128
|
-
}
|
|
129
|
-
return next(input);
|
|
130
|
-
};
|
|
131
|
-
```
|
|
132
|
-
|
|
133
|
-
## Plugin Examples in This Repo
|
|
134
|
-
|
|
135
|
-
| Plugin | File | Purpose |
|
|
136
|
-
| --------------- | ----------------------------------------- | ----------------------------- |
|
|
137
|
-
| Commands | `.opencode/plugin/uap-commands.ts` | CLI commands as tools |
|
|
138
|
-
| Skills | `.opencode/plugin/uap-skills.ts` | Skill loading system |
|
|
139
|
-
| Droids | `.opencode/plugin/uap-droids.ts` | Specialized agent droids |
|
|
140
|
-
| Pattern RAG | `.opencode/plugin/uap-pattern-rag.ts` | On-demand pattern retrieval |
|
|
141
|
-
| Task Completion | `.opencode/plugin/uap-task-completion.ts` | Track task outcomes |
|
|
142
|
-
| Session Hooks | `.opencode/plugin/uap-session-hooks.ts` | Session lifecycle events |
|
|
143
|
-
| Enforcement | `tools/agents/plugins/uap-enforce.ts` | Loop detection, budget limits |
|
|
144
|
-
|
|
145
|
-
## Dependencies
|
|
146
|
-
|
|
147
|
-
```json
|
|
148
|
-
{
|
|
149
|
-
"dependencies": {
|
|
150
|
-
"@opencode-ai/plugin": "1.2.16"
|
|
151
|
-
}
|
|
152
|
-
}
|
|
153
|
-
```
|
|
154
|
-
|
|
155
|
-
## Debugging
|
|
156
|
-
|
|
157
|
-
```bash
|
|
158
|
-
# Check plugin loads
|
|
159
|
-
opencode run "What tools are available?"
|
|
160
|
-
|
|
161
|
-
# View logs
|
|
162
|
-
tail -f ~/.opencode/logs/*.log
|
|
163
|
-
|
|
164
|
-
# Test TypeScript syntax
|
|
165
|
-
npx tsc --noEmit .opencode/plugin/your-plugin.ts
|
|
166
|
-
```
|
|
167
|
-
|
|
168
|
-
## Best Practices
|
|
169
|
-
|
|
170
|
-
1. **Error Handling**: Always use `.nothrow()` and check exit codes
|
|
171
|
-
2. **Security**: Validate inputs, prevent command injection
|
|
172
|
-
3. **Caching**: Cache expensive operations between tool calls
|
|
173
|
-
4. **Descriptions**: Write clear, comprehensive tool descriptions
|
|
174
|
-
5. **Naming**: Use snake_case, prefix with domain (`mydomain_tool`)
|
|
175
|
-
6. **Context**: Preserve important state across compaction
|
|
176
|
-
7. **Performance**: Use `--quiet` to reduce output noise
|
|
177
|
-
|
|
178
|
-
## Full Example
|
|
179
|
-
|
|
180
|
-
See: `.opencode/plugin/uap-commands.ts` for a complete implementation example.
|
|
@@ -1,341 +0,0 @@
|
|
|
1
|
-
# Overnight Benchmark Runner Guide
|
|
2
|
-
|
|
3
|
-
> **Version:** 1.18.0
|
|
4
|
-
> **Last Updated:** 2026-03-28
|
|
5
|
-
> **Purpose:** Automated overnight benchmark execution
|
|
6
|
-
|
|
7
|
-
---
|
|
8
|
-
|
|
9
|
-
## Overview
|
|
10
|
-
|
|
11
|
-
This guide explains how to set up and run the overnight benchmark suite for comprehensive UAP validation.
|
|
12
|
-
|
|
13
|
-
### What Gets Run
|
|
14
|
-
|
|
15
|
-
The overnight suite executes:
|
|
16
|
-
- **10 representative tasks** (short benchmark)
|
|
17
|
-
- **Token tracking** per task
|
|
18
|
-
- **Time measurement** per task
|
|
19
|
-
- **Success/failure** tracking
|
|
20
|
-
- **Error count** per task
|
|
21
|
-
- **Quality scoring** (if enabled)
|
|
22
|
-
|
|
23
|
-
### Expected Duration
|
|
24
|
-
|
|
25
|
-
| Suite | Tasks | Duration |
|
|
26
|
-
|-------|-------|----------|
|
|
27
|
-
| Short | 10 | ~15-20 minutes |
|
|
28
|
-
| Full | 14 | ~25-30 minutes |
|
|
29
|
-
| Overnight | 10 + extended | ~4 hours |
|
|
30
|
-
|
|
31
|
-
---
|
|
32
|
-
|
|
33
|
-
## Quick Start
|
|
34
|
-
|
|
35
|
-
### Manual Run
|
|
36
|
-
|
|
37
|
-
```bash
|
|
38
|
-
# Run short benchmark suite
|
|
39
|
-
npm run benchmark:short
|
|
40
|
-
|
|
41
|
-
# Run full benchmark suite
|
|
42
|
-
npm run benchmark:full
|
|
43
|
-
|
|
44
|
-
# Run overnight suite
|
|
45
|
-
npm run benchmark:overnight
|
|
46
|
-
```
|
|
47
|
-
|
|
48
|
-
### Automated Nightly Run
|
|
49
|
-
|
|
50
|
-
```bash
|
|
51
|
-
# Edit crontab
|
|
52
|
-
crontab -e
|
|
53
|
-
|
|
54
|
-
# Add nightly run at 2:00 AM
|
|
55
|
-
0 2 * * * cd /path/to/uap && npm run benchmark:overnight >> /var/log/uap-benchmark.log 2>&1
|
|
56
|
-
```
|
|
57
|
-
|
|
58
|
-
---
|
|
59
|
-
|
|
60
|
-
## Configuration
|
|
61
|
-
|
|
62
|
-
### Environment Variables
|
|
63
|
-
|
|
64
|
-
```bash
|
|
65
|
-
# Benchmark configuration
|
|
66
|
-
UAP_BENCHMARK_TASKS=T01,T02,T03,T04,T05,T06,T07,T08,T09,T10
|
|
67
|
-
UAP_BENCHMARK_UAP_ENABLED=true
|
|
68
|
-
UAP_BENCHMARK_OPENCODE_ENABLED=true
|
|
69
|
-
UAP_BENCHMARK_TOKEN_TRACKING=true
|
|
70
|
-
UAP_BENCHMARK_QUALITY_SCORING=false
|
|
71
|
-
|
|
72
|
-
# Results location
|
|
73
|
-
UAP_BENCHMARK_RESULTS_DIR=./benchmark-results
|
|
74
|
-
```
|
|
75
|
-
|
|
76
|
-
### Task Selection
|
|
77
|
-
|
|
78
|
-
```typescript
|
|
79
|
-
// scripts/benchmark-quick-suite.ts
|
|
80
|
-
const TASKS = [
|
|
81
|
-
{ id: 'T01', name: 'Git Repository Recovery', category: 'system-admin' },
|
|
82
|
-
{ id: 'T02', name: 'Password Hash Recovery', category: 'security' },
|
|
83
|
-
{ id: 'T03', name: 'mTLS Certificate Setup', category: 'security' },
|
|
84
|
-
{ id: 'T04', name: 'Docker Compose Config', category: 'containers' },
|
|
85
|
-
{ id: 'T05', name: 'ML Model Training', category: 'ml' },
|
|
86
|
-
{ id: 'T06', name: 'Data Compression', category: 'data-processing' },
|
|
87
|
-
{ id: 'T07', name: 'Chess FEN Parser', category: 'games' },
|
|
88
|
-
{ id: 'T08', name: 'SQLite WAL Recovery', category: 'database' },
|
|
89
|
-
{ id: 'T09', name: 'HTTP Server Config', category: 'networking' },
|
|
90
|
-
{ id: 'T10', name: 'Code Compression', category: 'development' },
|
|
91
|
-
];
|
|
92
|
-
```
|
|
93
|
-
|
|
94
|
-
---
|
|
95
|
-
|
|
96
|
-
## Output Format
|
|
97
|
-
|
|
98
|
-
### Results JSON
|
|
99
|
-
|
|
100
|
-
```json
|
|
101
|
-
[
|
|
102
|
-
{
|
|
103
|
-
"taskId": "T01",
|
|
104
|
-
"taskName": "Git Repository Recovery",
|
|
105
|
-
"category": "system-admin",
|
|
106
|
-
"tokens": 19800,
|
|
107
|
-
"time": 12.34,
|
|
108
|
-
"success": true,
|
|
109
|
-
"errors": 0
|
|
110
|
-
}
|
|
111
|
-
]
|
|
112
|
-
```
|
|
113
|
-
|
|
114
|
-
### Markdown Report
|
|
115
|
-
|
|
116
|
-
```markdown
|
|
117
|
-
# UAP Benchmark Report
|
|
118
|
-
|
|
119
|
-
**Generated:** 2026-03-28
|
|
120
|
-
**Version:** 1.18.0
|
|
121
|
-
|
|
122
|
-
## Summary
|
|
123
|
-
|
|
124
|
-
| Metric | Value |
|
|
125
|
-
|--------|-------|
|
|
126
|
-
| Total Tasks | 10 |
|
|
127
|
-
| Successful | 10 |
|
|
128
|
-
| Avg Tokens/Task | 20,000 |
|
|
129
|
-
| Avg Time/Task | 15.5s |
|
|
130
|
-
| Success Rate | 100% |
|
|
131
|
-
```
|
|
132
|
-
|
|
133
|
-
---
|
|
134
|
-
|
|
135
|
-
## Results Location
|
|
136
|
-
|
|
137
|
-
```
|
|
138
|
-
benchmark-results/
|
|
139
|
-
├── overnight-2026-03-28-020000/
|
|
140
|
-
│ ├── benchmark.log
|
|
141
|
-
│ ├── results-2026-03-28.json
|
|
142
|
-
│ └── report-2026-03-28.md
|
|
143
|
-
├── overnight-2026-03-27-020000/
|
|
144
|
-
│ └── ...
|
|
145
|
-
└── ...
|
|
146
|
-
```
|
|
147
|
-
|
|
148
|
-
---
|
|
149
|
-
|
|
150
|
-
## Monitoring
|
|
151
|
-
|
|
152
|
-
### Check Status
|
|
153
|
-
|
|
154
|
-
```bash
|
|
155
|
-
# Check latest results
|
|
156
|
-
ls -lt benchmark-results/overnight-*/ | head -5
|
|
157
|
-
|
|
158
|
-
# View latest report
|
|
159
|
-
cat benchmark-results/overnight-*/report-*.md | tail -50
|
|
160
|
-
|
|
161
|
-
# Check benchmark log
|
|
162
|
-
tail -f benchmark-results/overnight-*/benchmark.log
|
|
163
|
-
```
|
|
164
|
-
|
|
165
|
-
### Alerting
|
|
166
|
-
|
|
167
|
-
```bash
|
|
168
|
-
# Check for failures
|
|
169
|
-
grep -r "Failed\|Error" benchmark-results/overnight-*/benchmark.log
|
|
170
|
-
|
|
171
|
-
# Check success rate
|
|
172
|
-
jq -s 'map(select(.success | not)) | length' benchmark-results/overnight-*/results-*.json
|
|
173
|
-
```
|
|
174
|
-
|
|
175
|
-
---
|
|
176
|
-
|
|
177
|
-
## Troubleshooting
|
|
178
|
-
|
|
179
|
-
### Benchmark Fails
|
|
180
|
-
|
|
181
|
-
```bash
|
|
182
|
-
# Check logs
|
|
183
|
-
cat benchmark-results/overnight-*/benchmark.log
|
|
184
|
-
|
|
185
|
-
# Check Node.js version
|
|
186
|
-
node --version # Should be >= 18.0.0
|
|
187
|
-
|
|
188
|
-
# Check dependencies
|
|
189
|
-
npm install
|
|
190
|
-
|
|
191
|
-
# Rebuild project
|
|
192
|
-
npm run build
|
|
193
|
-
```
|
|
194
|
-
|
|
195
|
-
### Results Not Generated
|
|
196
|
-
|
|
197
|
-
```bash
|
|
198
|
-
# Check results directory permissions
|
|
199
|
-
ls -la benchmark-results/
|
|
200
|
-
|
|
201
|
-
# Create results directory manually
|
|
202
|
-
mkdir -p benchmark-results
|
|
203
|
-
|
|
204
|
-
# Run with verbose output
|
|
205
|
-
npm run benchmark:short -- --verbose
|
|
206
|
-
```
|
|
207
|
-
|
|
208
|
-
### Performance Issues
|
|
209
|
-
|
|
210
|
-
```bash
|
|
211
|
-
# Check system resources
|
|
212
|
-
free -h # Memory
|
|
213
|
-
df -h # Disk space
|
|
214
|
-
top # CPU usage
|
|
215
|
-
|
|
216
|
-
# Reduce concurrent tasks if needed
|
|
217
|
-
export UAP_BENCHMARK_CONCURRENCY=1
|
|
218
|
-
```
|
|
219
|
-
|
|
220
|
-
---
|
|
221
|
-
|
|
222
|
-
## Advanced Usage
|
|
223
|
-
|
|
224
|
-
### Custom Task List
|
|
225
|
-
|
|
226
|
-
```bash
|
|
227
|
-
# Create custom tasks file
|
|
228
|
-
cat > custom-tasks.json << EOF
|
|
229
|
-
[
|
|
230
|
-
{"id": "T01", "name": "Task 1", "category": "test"},
|
|
231
|
-
{"id": "T02", "name": "Task 2", "category": "test"}
|
|
232
|
-
]
|
|
233
|
-
EOF
|
|
234
|
-
|
|
235
|
-
# Run with custom tasks
|
|
236
|
-
node scripts/benchmark-quick-suite.ts --tasks custom-tasks.json
|
|
237
|
-
```
|
|
238
|
-
|
|
239
|
-
### Quality Scoring
|
|
240
|
-
|
|
241
|
-
```bash
|
|
242
|
-
# Enable quality scoring
|
|
243
|
-
export UAP_BENCHMARK_QUALITY_SCORING=true
|
|
244
|
-
|
|
245
|
-
# Quality score is calculated by:
|
|
246
|
-
correctness * 0.3 +
|
|
247
|
-
completeness * 0.25 +
|
|
248
|
-
efficiency * 0.2 +
|
|
249
|
-
security * 0.15 +
|
|
250
|
-
maintainability * 0.1
|
|
251
|
-
```
|
|
252
|
-
|
|
253
|
-
### Compare Results
|
|
254
|
-
|
|
255
|
-
```bash
|
|
256
|
-
# Compare two benchmark runs
|
|
257
|
-
npm run benchmark:compare \
|
|
258
|
-
-- --before benchmark-results/overnight-2026-03-27/results.json \
|
|
259
|
-
--after benchmark-results/overnight-2026-03-28/results.json
|
|
260
|
-
|
|
261
|
-
# Generate comparison report
|
|
262
|
-
npm run benchmark:report \
|
|
263
|
-
-- --input benchmark-results/overnight-2026-03-28/results.json \
|
|
264
|
-
--output benchmark-results/overnight-2026-03-28/comparison.md
|
|
265
|
-
```
|
|
266
|
-
|
|
267
|
-
---
|
|
268
|
-
|
|
269
|
-
## Expected Results
|
|
270
|
-
|
|
271
|
-
### Based on Historical Data
|
|
272
|
-
|
|
273
|
-
| Metric | Target | Status |
|
|
274
|
-
|--------|--------|--------|
|
|
275
|
-
| Success Rate | 100% | ✅ |
|
|
276
|
-
| Avg Tokens/Task | <25,000 | ✅ |
|
|
277
|
-
| Avg Time/Task | <20s | ✅ |
|
|
278
|
-
| Error Rate | 0% | ✅ |
|
|
279
|
-
|
|
280
|
-
### Performance Comparison
|
|
281
|
-
|
|
282
|
-
| Version | Tokens/Task | Time/Task | Success Rate |
|
|
283
|
-
|---------|-------------|-----------|--------------|
|
|
284
|
-
| Baseline | 52,000 | 45s | 75% |
|
|
285
|
-
| UAP v1.17 | 28,500 | 38s | 92% |
|
|
286
|
-
| UAP v1.18 + OpenCode | 23,400 | 32s | 100% |
|
|
287
|
-
|
|
288
|
-
---
|
|
289
|
-
|
|
290
|
-
## Best Practices
|
|
291
|
-
|
|
292
|
-
### 1. Run During Off-Peak Hours
|
|
293
|
-
- Avoid running during business hours
|
|
294
|
-
- Schedule for 2:00 AM local time
|
|
295
|
-
- Ensure no other heavy workloads
|
|
296
|
-
|
|
297
|
-
### 2. Monitor Resources
|
|
298
|
-
- Check disk space before run
|
|
299
|
-
- Ensure sufficient memory
|
|
300
|
-
- Monitor network connectivity
|
|
301
|
-
|
|
302
|
-
### 3. Review Results Daily
|
|
303
|
-
- Check for failures
|
|
304
|
-
- Review token usage trends
|
|
305
|
-
- Monitor success rate
|
|
306
|
-
|
|
307
|
-
### 4. Archive Old Results
|
|
308
|
-
```bash
|
|
309
|
-
# Archive results older than 30 days
|
|
310
|
-
find benchmark-results -minmtime 30 -exec mv {} benchmark-results/archive/ \;
|
|
311
|
-
```
|
|
312
|
-
|
|
313
|
-
### 5. Set Up Alerts
|
|
314
|
-
```bash
|
|
315
|
-
# Alert on failures
|
|
316
|
-
grep -q "Failed" benchmark-results/overnight-*/benchmark.log && \
|
|
317
|
-
echo "Benchmark failures detected!" | mail -s "UAP Benchmark Alert" admin@example.com
|
|
318
|
-
```
|
|
319
|
-
|
|
320
|
-
---
|
|
321
|
-
|
|
322
|
-
## Next Steps
|
|
323
|
-
|
|
324
|
-
After overnight run completes:
|
|
325
|
-
|
|
326
|
-
1. **Review Report**: Check `benchmark-results/overnight-*/report-*.md`
|
|
327
|
-
2. **Verify Success**: Ensure 100% success rate
|
|
328
|
-
3. **Check Tokens**: Confirm token usage is within targets
|
|
329
|
-
4. **Monitor Trends**: Compare with previous runs
|
|
330
|
-
5. **Update Documentation**: If significant changes detected
|
|
331
|
-
|
|
332
|
-
---
|
|
333
|
-
|
|
334
|
-
<div align="center">
|
|
335
|
-
|
|
336
|
-
**Related Documentation:**
|
|
337
|
-
- [Benchmark Results](COMPREHENSIVE_BENCHMARKS.md)
|
|
338
|
-
- [Validation Plan](VALIDATION_PLAN.md)
|
|
339
|
-
- [CLI Reference](../reference/UAP_CLI_REFERENCE.md)
|
|
340
|
-
|
|
341
|
-
</div>
|