@miller-tech/uap 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. package/dist/benchmarks/benchmark.d.ts +8 -8
  2. package/dist/benchmarks/improved-benchmark.d.ts.map +1 -1
  3. package/dist/benchmarks/improved-benchmark.js +10 -23
  4. package/dist/benchmarks/improved-benchmark.js.map +1 -1
  5. package/dist/benchmarks/model-integration.d.ts.map +1 -1
  6. package/dist/benchmarks/model-integration.js +22 -23
  7. package/dist/benchmarks/model-integration.js.map +1 -1
  8. package/dist/bin/policy.js +67 -11
  9. package/dist/bin/policy.js.map +1 -1
  10. package/dist/cli/dashboard.d.ts +2 -1
  11. package/dist/cli/dashboard.d.ts.map +1 -1
  12. package/dist/cli/dashboard.js +399 -10
  13. package/dist/cli/dashboard.js.map +1 -1
  14. package/dist/cli/model.js +12 -12
  15. package/dist/cli/model.js.map +1 -1
  16. package/dist/cli/setup-wizard.d.ts.map +1 -1
  17. package/dist/cli/setup-wizard.js +24 -0
  18. package/dist/cli/setup-wizard.js.map +1 -1
  19. package/dist/coordination/deploy-batcher.d.ts +1 -0
  20. package/dist/coordination/deploy-batcher.d.ts.map +1 -1
  21. package/dist/coordination/deploy-batcher.js +24 -25
  22. package/dist/coordination/deploy-batcher.js.map +1 -1
  23. package/dist/dashboard/data-service.d.ts +94 -0
  24. package/dist/dashboard/data-service.d.ts.map +1 -0
  25. package/dist/dashboard/data-service.js +286 -0
  26. package/dist/dashboard/data-service.js.map +1 -0
  27. package/dist/dashboard/index.d.ts +5 -0
  28. package/dist/dashboard/index.d.ts.map +1 -0
  29. package/dist/dashboard/index.js +3 -0
  30. package/dist/dashboard/index.js.map +1 -0
  31. package/dist/dashboard/server.d.ts +15 -0
  32. package/dist/dashboard/server.d.ts.map +1 -0
  33. package/dist/dashboard/server.js +158 -0
  34. package/dist/dashboard/server.js.map +1 -0
  35. package/dist/mcp-router/session-stats.d.ts +9 -0
  36. package/dist/mcp-router/session-stats.d.ts.map +1 -1
  37. package/dist/mcp-router/session-stats.js +19 -3
  38. package/dist/mcp-router/session-stats.js.map +1 -1
  39. package/dist/memory/adaptive-context.d.ts +1 -0
  40. package/dist/memory/adaptive-context.d.ts.map +1 -1
  41. package/dist/memory/adaptive-context.js +4 -0
  42. package/dist/memory/adaptive-context.js.map +1 -1
  43. package/dist/memory/embeddings.d.ts.map +1 -1
  44. package/dist/memory/embeddings.js +4 -4
  45. package/dist/memory/embeddings.js.map +1 -1
  46. package/dist/memory/model-router.d.ts +1 -1
  47. package/dist/memory/model-router.d.ts.map +1 -1
  48. package/dist/memory/model-router.js +52 -1
  49. package/dist/memory/model-router.js.map +1 -1
  50. package/dist/memory/predictive-memory.d.ts.map +1 -1
  51. package/dist/memory/predictive-memory.js +4 -3
  52. package/dist/memory/predictive-memory.js.map +1 -1
  53. package/dist/models/analytics.d.ts +93 -0
  54. package/dist/models/analytics.d.ts.map +1 -0
  55. package/dist/models/analytics.js +205 -0
  56. package/dist/models/analytics.js.map +1 -0
  57. package/dist/models/execution-profiles.d.ts +6 -0
  58. package/dist/models/execution-profiles.d.ts.map +1 -1
  59. package/dist/models/execution-profiles.js +15 -0
  60. package/dist/models/execution-profiles.js.map +1 -1
  61. package/dist/models/executor.d.ts.map +1 -1
  62. package/dist/models/executor.js +51 -17
  63. package/dist/models/executor.js.map +1 -1
  64. package/dist/models/index.d.ts +2 -0
  65. package/dist/models/index.d.ts.map +1 -1
  66. package/dist/models/index.js +2 -0
  67. package/dist/models/index.js.map +1 -1
  68. package/dist/models/router.d.ts +8 -0
  69. package/dist/models/router.d.ts.map +1 -1
  70. package/dist/models/router.js +39 -18
  71. package/dist/models/router.js.map +1 -1
  72. package/dist/models/types.d.ts +26 -0
  73. package/dist/models/types.d.ts.map +1 -1
  74. package/dist/models/types.js +39 -0
  75. package/dist/models/types.js.map +1 -1
  76. package/dist/models/unified-router.d.ts.map +1 -1
  77. package/dist/models/unified-router.js +4 -0
  78. package/dist/models/unified-router.js.map +1 -1
  79. package/dist/policies/database-manager.d.ts +1 -0
  80. package/dist/policies/database-manager.d.ts.map +1 -1
  81. package/dist/policies/database-manager.js +14 -2
  82. package/dist/policies/database-manager.js.map +1 -1
  83. package/dist/policies/policy-gate.d.ts +2 -2
  84. package/dist/policies/policy-gate.d.ts.map +1 -1
  85. package/dist/policies/policy-gate.js +6 -4
  86. package/dist/policies/policy-gate.js.map +1 -1
  87. package/dist/policies/policy-memory.d.ts +3 -0
  88. package/dist/policies/policy-memory.d.ts.map +1 -1
  89. package/dist/policies/policy-memory.js +11 -0
  90. package/dist/policies/policy-memory.js.map +1 -1
  91. package/dist/policies/schemas/policy.d.ts +3 -0
  92. package/dist/policies/schemas/policy.d.ts.map +1 -1
  93. package/dist/policies/schemas/policy.js +1 -0
  94. package/dist/policies/schemas/policy.js.map +1 -1
  95. package/dist/tasks/coordination.d.ts +18 -0
  96. package/dist/tasks/coordination.d.ts.map +1 -1
  97. package/dist/tasks/coordination.js +59 -1
  98. package/dist/tasks/coordination.js.map +1 -1
  99. package/dist/tasks/event-bus.d.ts +91 -0
  100. package/dist/tasks/event-bus.d.ts.map +1 -0
  101. package/dist/tasks/event-bus.js +123 -0
  102. package/dist/tasks/event-bus.js.map +1 -0
  103. package/dist/tasks/service.d.ts +5 -0
  104. package/dist/tasks/service.d.ts.map +1 -1
  105. package/dist/tasks/service.js +59 -0
  106. package/dist/tasks/service.js.map +1 -1
  107. package/dist/telemetry/session-telemetry.d.ts.map +1 -1
  108. package/dist/telemetry/session-telemetry.js +3 -0
  109. package/dist/telemetry/session-telemetry.js.map +1 -1
  110. package/dist/utils/concurrency-pool.d.ts +51 -0
  111. package/dist/utils/concurrency-pool.d.ts.map +1 -0
  112. package/dist/utils/concurrency-pool.js +80 -0
  113. package/dist/utils/concurrency-pool.js.map +1 -0
  114. package/dist/utils/system-resources.d.ts +47 -0
  115. package/dist/utils/system-resources.d.ts.map +1 -0
  116. package/dist/utils/system-resources.js +92 -0
  117. package/dist/utils/system-resources.js.map +1 -0
  118. package/docs/BENCHMARK_GAPS_AND_PLAN.md +146 -0
  119. package/docs/PARALLELISM_GAPS_AND_OPTIONS.md +422 -0
  120. package/docs/UAP_OPTIMIZATION_PLAN.md +638 -0
  121. package/package.json +4 -1
  122. package/templates/hooks/session-start.sh +8 -1
@@ -0,0 +1,92 @@
1
+ /**
2
+ * System Resource Detection
3
+ *
4
+ * Provides vCPU, VRAM, and memory detection with caching.
5
+ * Used to auto-tune parallelism across the UAP system.
6
+ *
7
+ * Env override: UAP_MAX_PARALLEL always takes precedence.
8
+ * Precedence: env var → config → auto-detect → hardcoded default
9
+ */
10
+ import { cpus, totalmem } from 'os';
11
+ import { execSync } from 'child_process';
12
+ let _cached = null;
13
+ /**
14
+ * Detect system resources (cached after first call).
15
+ */
16
+ export function detectSystemResources() {
17
+ if (_cached)
18
+ return _cached;
19
+ const vCPUs = cpus().length;
20
+ const memoryGB = Math.round(totalmem() / 1024 ** 3);
21
+ let vramGB = 0;
22
+ try {
23
+ // NVIDIA GPU
24
+ const out = execSync('nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits', {
25
+ encoding: 'utf-8',
26
+ timeout: 3000,
27
+ stdio: ['pipe', 'pipe', 'pipe'],
28
+ });
29
+ vramGB = Math.round(parseInt(out.trim().split('\n')[0], 10) / 1024);
30
+ }
31
+ catch {
32
+ try {
33
+ // macOS unified memory (report as VRAM since GPU shares it)
34
+ const out = execSync('sysctl -n hw.memsize', {
35
+ encoding: 'utf-8',
36
+ timeout: 3000,
37
+ stdio: ['pipe', 'pipe', 'pipe'],
38
+ });
39
+ vramGB = Math.min(Math.round(parseInt(out.trim(), 10) / 1024 ** 3), 48);
40
+ }
41
+ catch {
42
+ // No GPU detected
43
+ }
44
+ }
45
+ _cached = { vCPUs, vramGB, memoryGB };
46
+ return _cached;
47
+ }
48
+ /**
49
+ * Compute safe parallelism ceiling.
50
+ *
51
+ * @param mode - 'cpu' for compute-bound work (reserves cores for OS + inference),
52
+ * 'io' for IO-bound work like API calls (higher concurrency safe)
53
+ * @returns Maximum number of concurrent operations
54
+ *
55
+ * Precedence:
56
+ * 1. UAP_MAX_PARALLEL env var (always wins)
57
+ * 2. Auto-detected from os.cpus()
58
+ * 3. Hardcoded fallback (3)
59
+ */
60
+ export function getMaxParallel(mode = 'io') {
61
+ const envOverride = process.env.UAP_MAX_PARALLEL;
62
+ if (envOverride) {
63
+ const parsed = parseInt(envOverride, 10);
64
+ if (!isNaN(parsed) && parsed > 0)
65
+ return parsed;
66
+ }
67
+ const { vCPUs } = detectSystemResources();
68
+ if (mode === 'cpu') {
69
+ // Reserve 2 cores for OS + inference server
70
+ return Math.max(1, vCPUs - 2);
71
+ }
72
+ // IO-bound: safe to use more concurrency, cap at 8 to avoid
73
+ // overwhelming local inference endpoints
74
+ return Math.max(1, Math.min(vCPUs, 8));
75
+ }
76
+ /**
77
+ * Check if parallelism is globally enabled.
78
+ *
79
+ * Precedence:
80
+ * 1. UAP_PARALLEL env var ('false' disables)
81
+ * 2. Default: true
82
+ */
83
+ export function isParallelEnabled() {
84
+ return process.env.UAP_PARALLEL !== 'false';
85
+ }
86
+ /**
87
+ * Reset cached resources (for testing).
88
+ */
89
+ export function resetResourceCache() {
90
+ _cached = null;
91
+ }
92
+ //# sourceMappingURL=system-resources.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"system-resources.js","sourceRoot":"","sources":["../../src/utils/system-resources.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,MAAM,IAAI,CAAC;AACpC,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAWzC,IAAI,OAAO,GAA2B,IAAI,CAAC;AAE3C;;GAEG;AACH,MAAM,UAAU,qBAAqB;IACnC,IAAI,OAAO;QAAE,OAAO,OAAO,CAAC;IAE5B,MAAM,KAAK,GAAG,IAAI,EAAE,CAAC,MAAM,CAAC;IAC5B,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,EAAE,GAAG,IAAI,IAAI,CAAC,CAAC,CAAC;IAEpD,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,IAAI,CAAC;QACH,aAAa;QACb,MAAM,GAAG,GAAG,QAAQ,CAAC,mEAAmE,EAAE;YACxF,QAAQ,EAAE,OAAO;YACjB,OAAO,EAAE,IAAI;YACb,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;SAChC,CAAC,CAAC;QACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,IAAI,CAAC,CAAC;IACtE,CAAC;IAAC,MAAM,CAAC;QACP,IAAI,CAAC;YACH,4DAA4D;YAC5D,MAAM,GAAG,GAAG,QAAQ,CAAC,sBAAsB,EAAE;gBAC3C,QAAQ,EAAE,OAAO;gBACjB,OAAO,EAAE,IAAI;gBACb,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;aAChC,CAAC,CAAC;YACH,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,EAAE,EAAE,EAAE,CAAC,GAAG,IAAI,IAAI,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAC1E,CAAC;QAAC,MAAM,CAAC;YACP,kBAAkB;QACpB,CAAC;IACH,CAAC;IAED,OAAO,GAAG,EAAE,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC;IACtC,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;;;;;;;;;;GAWG;AACH,MAAM,UAAU,cAAc,CAAC,OAAqB,IAAI;IACtD,MAAM,WAAW,GAAG,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC;IACjD,IAAI,WAAW,EAAE,CAAC;QAChB,MAAM,MAAM,GAAG,QAAQ,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC;QACzC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,MAAM,GAAG,CAAC;YAAE,OAAO,MAAM,CAAC;IAClD,CAAC;IAED,MAAM,EAAE,KAAK,EAAE,GAAG,qBAAqB,EAAE,CAAC;IAE1C,IAAI,IAAI,KAAK,KAAK,EAAE,CAAC;QACnB,4CAA4C;QAC5C,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC;IAChC,CAAC;IAED,4DAA4D;IAC5D,yCAAyC;IACzC,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC;AACzC,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,iBAAiB;IAC/B,OAAO,OAAO,CAAC,GAAG,CAAC,YAAY,KAAK,OAAO,CAAC;AAC9C,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,kBAAkB;IAChC,OAAO,GAAG,IAAI,CAAC;AACjB,CAAC"}
@@ -0,0 +1,146 @@
1
+ # UAP Benchmark: Actual Gaps & Execution Plan
2
+
3
+ **Generated:** 2026-03-17
4
+ **Benchmark:** Harbor Terminal-Bench 2.0 (89 tasks)
5
+ **Primary Target:** Qwen3.5 35B A3B (IQ4_XS)
6
+
7
+ ---
8
+
9
+ ## What Already Exists (DO NOT REBUILD)
10
+
11
+ | Component | File | Status |
12
+ | -------------------------------- | ---------------------------------------------------------- | ---------------------------------------------------------------------------------------------- |
13
+ | Baseline benchmark (no UAP) | `scripts/benchmarks/benchmark-qwen35-baseline-no-uap.tsx` | 403 lines, 94 tasks |
14
+ | UAP benchmark (full integration) | `scripts/benchmarks/benchmark-qwen35-uap-3.0-opencode.tsx` | 812 lines, 89 tasks |
15
+ | Harbor quick runner (UAP) | `scripts/benchmarks/run-tbench-qwen35-quick.sh` | 459 lines, hybrid-adaptive |
16
+ | Harbor baseline+UAP runner | `scripts/benchmarks/run-harbor-qwen35-benchmark.sh` | Runs both configs sequentially |
17
+ | Harbor YAML configs | `benchmarks/harbor-configs/qwen35_*.yaml` | Baseline + UAP pair |
18
+ | Comparison report generator | `scripts/benchmarks/generate-comparison-report.ts` | 461 lines, p-value tests |
19
+ | Full benchmark harness | `scripts/benchmarks/run-full-benchmark.sh` | 413 lines, multi-model A/B |
20
+ | Multi-turn agent loop | `src/benchmarks/multi-turn-loop.ts` | 213 lines, `executeWithRetry()` |
21
+ | Multi-turn + verification | `src/benchmarks/multi-turn-agent.ts` | Wired to dynamic retrieval |
22
+ | Improved benchmark runner | `src/benchmarks/improved-benchmark.ts` | 794 lines, wires multi-turn + dynamic retrieval + task classification + hierarchical prompting |
23
+ | Dynamic memory retrieval | `src/memory/dynamic-retrieval.ts` | 1168 lines, 6 memory sources, adaptive depth |
24
+ | Task classifier | `src/memory/task-classifier.ts` | 426 lines, 8 categories, ambiguity detection |
25
+ | Qdrant embeddings | `src/memory/embeddings.ts` | Fixed, 5 backends with fallback |
26
+ | Tool call retry (Qwen) | `tools/agents/scripts/qwen_tool_call_wrapper.py` | 686 lines, 6 retry strategies |
27
+ | Harbor UAP agent | `tools/uap_harbor/uap_agent.py` | 379 lines, classified preamble |
28
+ | Qwen3.5 model presets | `src/models/types.ts:136-151` | `qwen35-a3b` and `qwen35` defined |
29
+ | Model router | `src/models/router.ts` | Qwen3.5 as default executor |
30
+
31
+ ---
32
+
33
+ ## Actual Gaps (3 items)
34
+
35
+ ### Gap 1: `improved-benchmark.ts` MODELS array missing Qwen3.5
36
+
37
+ `src/benchmarks/improved-benchmark.ts:95-99` has the fully wired runner (multi-turn + dynamic retrieval + task classification + hierarchical prompting + verification) but its MODELS array only contains:
38
+
39
+ ```typescript
40
+ const MODELS: ModelConfig[] = [
41
+ { id: 'opus-4.5', name: 'Claude Opus 4.5', apiModel: 'claude-opus-4-5-20251101' },
42
+ { id: 'glm-4.7', name: 'GLM 4.7', apiModel: 'glm-4.7' },
43
+ { id: 'gpt-5.2-codex', name: 'GPT 5.2 Codex', apiModel: 'gpt-5.2-codex' },
44
+ ];
45
+ // Qwen3.5 MISSING
46
+ ```
47
+
48
+ **Fix:** Add Qwen3.5 to the MODELS array. The preset already exists in `src/models/types.ts:136-151`.
49
+
50
+ ### Gap 2: `model-integration.ts` MODELS array missing Qwen3.5 + still single-shot
51
+
52
+ `src/benchmarks/model-integration.ts:336-361` is the older benchmark runner. It:
53
+
54
+ - Has no Qwen3.5 in its MODELS array
55
+ - Uses single-shot execution (no multi-turn, no dynamic retrieval)
56
+
57
+ **Fix:** Add Qwen3.5 to its MODELS array. The multi-turn wiring gap is already solved by `improved-benchmark.ts` -- this file can remain as the "legacy single-shot" runner for comparison purposes.
58
+
59
+ ### Gap 3: No benchmark results exist
60
+
61
+ `benchmark-results/` directory does not exist. None of the scripts have been executed.
62
+
63
+ **Fix:** Run the existing scripts.
64
+
65
+ ---
66
+
67
+ ## Execution Plan
68
+
69
+ ### Step 1: Add Qwen3.5 to improved-benchmark.ts MODELS array
70
+
71
+ **File:** `src/benchmarks/improved-benchmark.ts:95-99`
72
+
73
+ ```typescript
74
+ const MODELS: ModelConfig[] = [
75
+ { id: 'opus-4.5', name: 'Claude Opus 4.5', apiModel: 'claude-opus-4-5-20251101' },
76
+ { id: 'glm-4.7', name: 'GLM 4.7', apiModel: 'glm-4.7' },
77
+ { id: 'gpt-5.2-codex', name: 'GPT 5.2 Codex', apiModel: 'gpt-5.2-codex' },
78
+ { id: 'qwen35-a3b', name: 'Qwen 3.5 35B A3B', apiModel: 'qwen35-a3b-iq4xs' },
79
+ ];
80
+ ```
81
+
82
+ ### Step 2: Add Qwen3.5 to model-integration.ts MODELS array
83
+
84
+ **File:** `src/benchmarks/model-integration.ts:336-361`
85
+
86
+ ```typescript
87
+ {
88
+ id: 'qwen35-a3b',
89
+ name: 'Qwen 3.5 35B A3B',
90
+ provider: 'local',
91
+ apiModel: 'qwen35-a3b-iq4xs',
92
+ },
93
+ ```
94
+
95
+ ### Step 3: Run existing benchmarks
96
+
97
+ ```bash
98
+ # Option A: Quick Qwen3.5 baseline + UAP via Harbor (recommended first)
99
+ ./scripts/benchmarks/run-harbor-qwen35-benchmark.sh
100
+
101
+ # Option B: Direct API baseline (no Harbor containers)
102
+ npx tsx scripts/benchmarks/benchmark-qwen35-baseline-no-uap.tsx
103
+
104
+ # Option C: Direct API UAP-enhanced
105
+ npx tsx scripts/benchmarks/benchmark-qwen35-uap-3.0-opencode.tsx
106
+
107
+ # Option D: Improved benchmark with multi-turn + dynamic retrieval (all models)
108
+ npx tsx src/benchmarks/improved-benchmark.ts
109
+
110
+ # Option E: Full Harbor harness (all models, baseline vs UAP)
111
+ ./scripts/benchmarks/run-full-benchmark.sh --model qwen35-a3b-iq4xs
112
+ ```
113
+
114
+ ### Step 4: Generate comparison report
115
+
116
+ ```bash
117
+ npx tsx scripts/benchmarks/generate-comparison-report.ts \
118
+ --baseline benchmark-results/qwen35_baseline_no_uap/ \
119
+ --uap benchmark-results/qwen35_uap_3.0_opencode/
120
+ ```
121
+
122
+ ---
123
+
124
+ ## What This Plan Does NOT Do (because it already exists)
125
+
126
+ - Build a multi-turn agent loop (exists: `src/benchmarks/multi-turn-loop.ts`)
127
+ - Build dynamic memory retrieval (exists: `src/memory/dynamic-retrieval.ts`)
128
+ - Build task classification (exists: `src/memory/task-classifier.ts`)
129
+ - Fix Qdrant embeddings (already fixed: `src/memory/embeddings.ts`)
130
+ - Build Harbor configs (exist: `benchmarks/harbor-configs/qwen35_*.yaml`)
131
+ - Build comparison report generator (exists: `scripts/benchmarks/generate-comparison-report.ts`)
132
+ - Wire multi-turn into benchmark runner (exists: `src/benchmarks/improved-benchmark.ts`)
133
+ - Build tool call retry for Qwen (exists: `tools/agents/scripts/qwen_tool_call_wrapper.py`)
134
+ - Create execution scripts (exist: 6+ scripts in `scripts/benchmarks/`)
135
+
136
+ ---
137
+
138
+ ## Estimated Effort
139
+
140
+ | Step | Effort | Type |
141
+ | ------------------------------------ | -------------- | -------------------------------------- |
142
+ | Add Qwen3.5 to improved-benchmark.ts | 2 minutes | Code change (1 line) |
143
+ | Add Qwen3.5 to model-integration.ts | 2 minutes | Code change (5 lines) |
144
+ | Run benchmarks | 2-8 hours | Execution (depends on model speed) |
145
+ | Review results | 30 minutes | Analysis |
146
+ | **Total** | **~3-9 hours** | Mostly waiting for benchmark execution |
@@ -0,0 +1,422 @@
1
+ # Parallelism & Dependency Notification: Gaps and Options
2
+
3
+ **Generated:** 2026-03-17
4
+
5
+ ---
6
+
7
+ ## Current State
8
+
9
+ ### What exists (DO NOT REBUILD)
10
+
11
+ | Component | File | What it does |
12
+ | ---------------------------- | ------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------- |
13
+ | Executor parallel batching | `src/models/executor.ts:54-162` | `Promise.all` batches capped by `maxParallel` (default 3), topological level grouping |
14
+ | Deploy batcher parallelism | `src/coordination/deploy-batcher.ts:34-591` | Categorizes actions as parallel-safe vs sequential, `Promise.allSettled` with `maxParallelActions` (default 5) |
15
+ | Benchmark model parallelism | `src/benchmarks/model-integration.ts:911-944` + `improved-benchmark.ts:565-603` | Queue-based concurrency pool, `parallelModels` param (default 1) |
16
+ | Planner topological sort | `src/models/planner.ts:379-413` | Returns `string[][]` dependency levels for parallel execution |
17
+ | Task dependency DAG | `src/tasks/database.ts:71-84` + `service.ts:438-538` | `task_dependencies` table, add/remove deps, cycle detection (BFS), blocked/ready queries |
18
+ | Task coordination scoring | `src/tasks/coordination.ts:260-298` | Scores tasks: +5 no deps, +3 per task it unblocks |
19
+ | Pub/sub messaging | `src/coordination/service.ts:608-642` | SQLite-backed broadcast/send/receive on channels |
20
+ | Config schema (unused) | `src/types/config.ts:223-229` | `maxParallelDroids` (4), `maxParallelWorkflows` (3) -- declared but never consumed |
21
+ | VRAM detection | `src/bin/llama-server-optimize.ts:301-331` | nvidia-smi / sysctl, not reusable (private to llama-server) |
22
+ | CPU detection | `src/bin/llama-server-optimize.ts:614` | `os.cpus().length - 2`, not reusable (inline in llama-server) |
23
+ | Harbor N_CONCURRENT | Shell scripts | `N_CONCURRENT` env var (default 4) for Harbor `-n` flag |
24
+ | Python parallel tool calls | `tools/agents/scripts/qwen_tool_call_wrapper.py` | `parallel_tool_calls: True` in every API request |
25
+ | Unbounded memory parallelism | `src/memory/predictive-memory.ts:94-104` + `embeddings.ts:228-230` | `Promise.all` with no concurrency limit |
26
+
27
+ ### What's broken or missing
28
+
29
+ | Gap | Location | Impact |
30
+ | -------------------------------------------------- | -------------------------------------------------------------- | --------------------------------------------------------------------------- |
31
+ | **No env var override for TypeScript parallelism** | `executor.ts`, `improved-benchmark.ts`, `model-integration.ts` | Cannot tune parallelism without code changes |
32
+ | **No vCPU/resource detection shared utility** | `llama-server-optimize.ts` has it but private | Every module hardcodes defaults |
33
+ | **Config schema not wired** | `config.ts:223-229` | `maxParallelDroids`/`maxParallelWorkflows` declared but never read |
34
+ | **No dependency completion notification** | `service.ts:287-302` | `close()` marks task done but does NOT notify blocked dependents |
35
+ | **No auto-unblock on completion** | `coordination.ts:155-200` | `release()` broadcasts generic message, doesn't check newly-unblocked tasks |
36
+ | **Unbounded memory concurrency** | `predictive-memory.ts`, `embeddings.ts` | `Promise.all` on all items, can overwhelm local inference |
37
+ | **Benchmark parallelism defaults to 1** | `improved-benchmark.ts:632`, `model-integration.ts` | Sequential by default, no auto-detection |
38
+
39
+ ---
40
+
41
+ ## Option A: Minimal -- Env Overrides + Wire Config (Recommended)
42
+
43
+ **Effort:** ~2 hours | **Risk:** Low | **Impact:** High
44
+
45
+ No new abstractions. Add env var reads to existing code, wire the existing config schema, and add notification to `close()`.
46
+
47
+ ### A1. Shared resource detection utility
48
+
49
+ **New file:** `src/utils/system-resources.ts`
50
+
51
+ Extract and generalize the detection logic already in `llama-server-optimize.ts`:
52
+
53
+ ```typescript
54
+ import { cpus } from 'os';
55
+ import { execSync } from 'child_process';
56
+
57
+ export interface SystemResources {
58
+ vCPUs: number;
59
+ vramGB: number;
60
+ memoryGB: number;
61
+ }
62
+
63
+ let _cached: SystemResources | null = null;
64
+
65
+ export function detectSystemResources(): SystemResources {
66
+ if (_cached) return _cached;
67
+
68
+ const vCPUs = cpus().length;
69
+
70
+ let vramGB = 0;
71
+ try {
72
+ const out = execSync('nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits', {
73
+ encoding: 'utf-8',
74
+ timeout: 3000,
75
+ });
76
+ vramGB = Math.round(parseInt(out.trim().split('\n')[0]) / 1024);
77
+ } catch {
78
+ try {
79
+ const out = execSync('sysctl -n hw.memsize', { encoding: 'utf-8' });
80
+ vramGB = Math.min(Math.round(parseInt(out.trim()) / 1024 ** 3), 48);
81
+ } catch {
82
+ vramGB = 0;
83
+ }
84
+ }
85
+
86
+ const memoryGB = Math.round(require('os').totalmem() / 1024 ** 3);
87
+
88
+ _cached = { vCPUs, vramGB, memoryGB };
89
+ return _cached;
90
+ }
91
+
92
+ /**
93
+ * Compute safe parallelism ceiling.
94
+ * For CPU-bound: vCPUs - 2 (reserve for OS + inference).
95
+ * For IO-bound (API calls): min(vCPUs, 8).
96
+ * Env override: UAP_MAX_PARALLEL always wins.
97
+ */
98
+ export function getMaxParallel(mode: 'cpu' | 'io' = 'io'): number {
99
+ const envOverride = process.env.UAP_MAX_PARALLEL;
100
+ if (envOverride) return Math.max(1, parseInt(envOverride, 10) || 1);
101
+
102
+ const { vCPUs } = detectSystemResources();
103
+
104
+ if (mode === 'cpu') {
105
+ return Math.max(1, vCPUs - 2);
106
+ }
107
+ return Math.min(vCPUs, 8);
108
+ }
109
+ ```
110
+
111
+ ### A2. Env var overrides in existing code
112
+
113
+ **File: `src/models/executor.ts:58-65`** -- read env vars into defaults:
114
+
115
+ ```typescript
116
+ const DEFAULT_OPTIONS: ExecutorOptions = {
117
+ maxRetries: 2,
118
+ retryDelayMs: 1000,
119
+ stepTimeout: 120000,
120
+ enableFallback: true,
121
+ parallelExecution: process.env.UAP_PARALLEL !== 'false',
122
+ maxParallel: parseInt(process.env.UAP_MAX_PARALLEL || '', 10) || 3,
123
+ };
124
+ ```
125
+
126
+ **File: `src/benchmarks/improved-benchmark.ts:632`** -- auto-detect:
127
+
128
+ ```typescript
129
+ const parallelModels =
130
+ (options.parallelModels ?? parseInt(process.env.UAP_BENCHMARK_PARALLEL || '', 10)) ||
131
+ getMaxParallel('io');
132
+ ```
133
+
134
+ **File: `src/benchmarks/model-integration.ts`** -- same pattern.
135
+
136
+ **File: `src/coordination/deploy-batcher.ts:78-79`** -- same pattern:
137
+
138
+ ```typescript
139
+ parallelExecution: process.env.UAP_PARALLEL !== 'false',
140
+ maxParallelActions: parseInt(process.env.UAP_MAX_PARALLEL || '', 10) || 5,
141
+ ```
142
+
143
+ ### A3. Wire existing config schema
144
+
145
+ **File: `src/types/config.ts:223-229`** -- already declares `maxParallelDroids` and `maxParallelWorkflows`.
146
+
147
+ Wire into executor by reading config at startup:
148
+
149
+ ```typescript
150
+ // In executor.ts constructor or factory
151
+ const uapConfig = loadUAPConfig(); // existing config loader
152
+ if (uapConfig?.parallelExecution) {
153
+ this.options.parallelExecution = uapConfig.parallelExecution.enabled;
154
+ this.options.maxParallel = uapConfig.parallelExecution.maxParallelDroids;
155
+ }
156
+ ```
157
+
158
+ ### A4. Dependency completion notification
159
+
160
+ **File: `src/tasks/service.ts:287-302`** -- add unblock notification to `close()`:
161
+
162
+ ```typescript
163
+ close(id: string, reason?: string): Task | null {
164
+ const task = this.get(id);
165
+ if (!task) return null;
166
+
167
+ const now = new Date().toISOString();
168
+ const stmt = this.db.prepare(`
169
+ UPDATE tasks SET status = 'done', closed_at = ?, closed_reason = ?, updated_at = ?
170
+ WHERE id = ?
171
+ `);
172
+ stmt.run(now, reason || null, now, id);
173
+
174
+ this.recordHistory(id, 'status', task.status, 'done');
175
+ this.recordActivity(id, 'closed', reason || 'Task completed');
176
+
177
+ // --- NEW: Notify newly-unblocked dependents ---
178
+ this.notifyUnblockedDependents(id);
179
+
180
+ return this.get(id);
181
+ }
182
+
183
+ private notifyUnblockedDependents(completedTaskId: string): void {
184
+ // Find tasks that were blocked by this task
185
+ const dependents = this.db.prepare(`
186
+ SELECT from_task FROM task_dependencies
187
+ WHERE to_task = ? AND dep_type = 'blocks'
188
+ `).all(completedTaskId) as Array<{ from_task: string }>;
189
+
190
+ for (const dep of dependents) {
191
+ const dependent = this.getWithRelations(dep.from_task);
192
+ if (dependent && dependent.isReady) {
193
+ // Task is now unblocked -- record activity
194
+ this.recordActivity(dep.from_task, 'unblocked',
195
+ `Unblocked: dependency "${completedTaskId}" completed`);
196
+
197
+ // If task was in 'blocked' status, move to 'open'
198
+ const raw = this.get(dep.from_task);
199
+ if (raw && raw.status === 'blocked') {
200
+ this.db.prepare(`UPDATE tasks SET status = 'open', updated_at = ? WHERE id = ?`)
201
+ .run(new Date().toISOString(), dep.from_task);
202
+ this.recordHistory(dep.from_task, 'status', 'blocked', 'open');
203
+ }
204
+ }
205
+ }
206
+ }
207
+ ```
208
+
209
+ **File: `src/tasks/coordination.ts:155-200`** -- enhance `release()` to return unblocked tasks:
210
+
211
+ ```typescript
212
+ async release(taskId: string, reason?: string): Promise<ReleaseResult | null> {
213
+ // ... existing code ...
214
+
215
+ // Broadcast task completion
216
+ this.coordService.broadcast(this.agentId, 'coordination', {
217
+ action: 'task_completed',
218
+ resource: taskId,
219
+ data: {
220
+ title: task.title,
221
+ reason,
222
+ },
223
+ });
224
+
225
+ // --- NEW: Find and broadcast newly-unblocked tasks ---
226
+ const nowReady = this.taskService.ready().filter(t =>
227
+ t.blockedBy.length === 0 // was previously blocked, now free
228
+ );
229
+
230
+ if (nowReady.length > 0) {
231
+ this.coordService.broadcast(this.agentId, 'coordination', {
232
+ action: 'tasks_unblocked',
233
+ resource: taskId,
234
+ data: {
235
+ unblockedTasks: nowReady.map(t => ({ id: t.id, title: t.title })),
236
+ count: nowReady.length,
237
+ },
238
+ });
239
+ }
240
+
241
+ // ... rest of existing code ...
242
+
243
+ return {
244
+ task: closedTask,
245
+ completedAnnouncements: 1,
246
+ unblockedTasks: nowReady.map(t => t.id), // NEW field
247
+ };
248
+ }
249
+ ```
250
+
251
+ ### A5. Cap unbounded memory concurrency
252
+
253
+ **File: `src/memory/predictive-memory.ts:94-104`**:
254
+
255
+ ```typescript
256
+ // Before (unbounded):
257
+ const results = await Promise.all(predictions.map((p) => this.query(p)));
258
+
259
+ // After (capped):
260
+ import { getMaxParallel } from '../utils/system-resources.js';
261
+
262
+ const maxConcurrent = getMaxParallel('io');
263
+ const results: MemoryEntry[][] = [];
264
+ for (let i = 0; i < predictions.length; i += maxConcurrent) {
265
+ const batch = predictions.slice(i, i + maxConcurrent);
266
+ const batchResults = await Promise.all(batch.map((p) => this.query(p)));
267
+ results.push(...batchResults);
268
+ }
269
+ ```
270
+
271
+ ### Env Variable Summary
272
+
273
+ | Variable | Default | Scope | Override |
274
+ | ------------------------ | ------------------------ | ---------------------------------- | ---------------------------------- |
275
+ | `UAP_PARALLEL` | `true` | Global on/off | `false` to disable all parallelism |
276
+ | `UAP_MAX_PARALLEL` | auto-detected from vCPUs | Global concurrency cap | Any integer |
277
+ | `UAP_BENCHMARK_PARALLEL` | auto-detected | Benchmark model concurrency | Any integer |
278
+ | `N_CONCURRENT` | `4` | Harbor task concurrency (existing) | Any integer |
279
+
280
+ ### Precedence
281
+
282
+ ```
283
+ UAP_MAX_PARALLEL env var
284
+ → uap.config parallelExecution.maxParallelDroids
285
+ → auto-detected from os.cpus().length
286
+ → hardcoded default (3)
287
+ ```
288
+
289
+ ---
290
+
291
+ ## Option B: Concurrency Pool Utility
292
+
293
+ **Effort:** ~4 hours | **Risk:** Low | **Impact:** Medium
294
+
295
+ Everything in Option A, plus a shared concurrency pool to replace the duplicated `Promise.all` batching pattern across 5+ files.
296
+
297
+ ### B1. Shared concurrency pool
298
+
299
+ **New file:** `src/utils/concurrency-pool.ts`
300
+
301
+ ```typescript
302
+ import { getMaxParallel } from './system-resources.js';
303
+
304
+ export async function concurrentMap<T, R>(
305
+ items: T[],
306
+ fn: (item: T, index: number) => Promise<R>,
307
+ options?: { maxConcurrent?: number; mode?: 'cpu' | 'io' }
308
+ ): Promise<R[]> {
309
+ const max = options?.maxConcurrent ?? getMaxParallel(options?.mode ?? 'io');
310
+ const results: R[] = new Array(items.length);
311
+ let nextIndex = 0;
312
+
313
+ const worker = async () => {
314
+ while (nextIndex < items.length) {
315
+ const i = nextIndex++;
316
+ results[i] = await fn(items[i], i);
317
+ }
318
+ };
319
+
320
+ const workers = Array.from({ length: Math.min(max, items.length) }, () => worker());
321
+ await Promise.all(workers);
322
+ return results;
323
+ }
324
+ ```
325
+
326
+ ### B2. Replace duplicated patterns
327
+
328
+ | File | Current | Replace with |
329
+ | ------------------------------- | -------------------------------------- | ----------------------------------------------------------- |
330
+ | `executor.ts:149-153` | Manual `Promise.all` batch loop | `concurrentMap(batch, taskId => this.executeSubtask(...))` |
331
+ | `deploy-batcher.ts:582-591` | Manual `Promise.allSettled` chunk loop | `concurrentMap(actions, action => this.executeAction(...))` |
332
+ | `improved-benchmark.ts:565-603` | Queue-based pool | `concurrentMap(models, model => runBenchmarkForModel(...))` |
333
+ | `model-integration.ts:911-944` | Queue-based pool | `concurrentMap(models, model => runBenchmarkForModel(...))` |
334
+ | `predictive-memory.ts:94-104` | Unbounded `Promise.all` | `concurrentMap(predictions, p => this.query(p))` |
335
+ | `embeddings.ts:228-230` | Unbounded `Promise.all` | `concurrentMap(texts, t => this.embed(t))` |
336
+
337
+ ---
338
+
339
+ ## Option C: Full Event-Driven Dependency Resolution
340
+
341
+ **Effort:** ~8 hours | **Risk:** Medium | **Impact:** High
342
+
343
+ Everything in Options A+B, plus an event-driven system where task completion automatically triggers dependent task execution.
344
+
345
+ ### C1. TaskEventBus
346
+
347
+ **New file:** `src/tasks/event-bus.ts`
348
+
349
+ ```typescript
350
+ type TaskEvent =
351
+ | { type: 'task_completed'; taskId: string }
352
+ | { type: 'task_unblocked'; taskId: string; unblockedBy: string }
353
+ | { type: 'task_failed'; taskId: string; error: string };
354
+
355
+ type TaskEventHandler = (event: TaskEvent) => void | Promise<void>;
356
+
357
+ export class TaskEventBus {
358
+ private handlers: Map<TaskEvent['type'], TaskEventHandler[]> = new Map();
359
+
360
+ on(type: TaskEvent['type'], handler: TaskEventHandler): void {
361
+ const list = this.handlers.get(type) || [];
362
+ list.push(handler);
363
+ this.handlers.set(type, list);
364
+ }
365
+
366
+ async emit(event: TaskEvent): Promise<void> {
367
+ const handlers = this.handlers.get(event.type) || [];
368
+ await Promise.all(handlers.map((h) => h(event)));
369
+ }
370
+ }
371
+ ```
372
+
373
+ ### C2. Auto-execute unblocked tasks
374
+
375
+ Wire into `TaskCoordinator`:
376
+
377
+ ```typescript
378
+ // In coordinator constructor
379
+ this.eventBus.on('task_unblocked', async (event) => {
380
+ if (event.type !== 'task_unblocked') return;
381
+ const task = this.taskService.getWithRelations(event.taskId);
382
+ if (task && task.isReady && this.autoExecuteEnabled) {
383
+ await this.claim(event.taskId);
384
+ }
385
+ });
386
+ ```
387
+
388
+ ### C3. Executor emits completion events
389
+
390
+ Wire into `TaskExecutor.executePlan()`:
391
+
392
+ ```typescript
393
+ // After subtask completes successfully
394
+ this.eventBus.emit({ type: 'task_completed', taskId: subtaskId });
395
+ ```
396
+
397
+ ---
398
+
399
+ ## Recommendation
400
+
401
+ **Start with Option A** (env overrides + wire config + dependency notification). It addresses every gap with minimal code changes to existing files and no new abstractions. The concurrency pool (Option B) is a nice cleanup but not blocking. The event bus (Option C) is only needed if you want auto-execution of unblocked tasks.
402
+
403
+ | Option | Effort | Files changed | New files | Risk |
404
+ | ------------------- | -------- | ------------- | ------------------------------------------------ | ------ |
405
+ | **A (Recommended)** | ~2 hours | 6 existing | 1 (`system-resources.ts`) | Low |
406
+ | B | ~4 hours | 8 existing | 2 (`system-resources.ts`, `concurrency-pool.ts`) | Low |
407
+ | C | ~8 hours | 10 existing | 3 (+ `event-bus.ts`) | Medium |
408
+
409
+ ---
410
+
411
+ ## Files to Change (Option A)
412
+
413
+ | File | Change | Lines affected |
414
+ | ------------------------------------------ | -------------------------------------------------------- | -------------- |
415
+ | `src/utils/system-resources.ts` | **NEW** -- vCPU/VRAM detection + `getMaxParallel()` | ~45 lines |
416
+ | `src/models/executor.ts:58-65` | Read `UAP_PARALLEL`, `UAP_MAX_PARALLEL` env vars | 2 lines |
417
+ | `src/benchmarks/improved-benchmark.ts:632` | Read `UAP_BENCHMARK_PARALLEL`, fallback to auto-detect | 3 lines |
418
+ | `src/benchmarks/model-integration.ts` | Same as above | 3 lines |
419
+ | `src/coordination/deploy-batcher.ts:78-79` | Read env vars | 2 lines |
420
+ | `src/tasks/service.ts:287-302` | Add `notifyUnblockedDependents()` after `close()` | ~25 lines |
421
+ | `src/tasks/coordination.ts:155-200` | Broadcast `tasks_unblocked` event, return unblocked list | ~15 lines |
422
+ | `src/memory/predictive-memory.ts:94-104` | Cap concurrency with `getMaxParallel()` | 5 lines |