npm - @miller-tech/uap - Versions diffs - 1.0.0 → 1.2.0 - Mend

@miller-tech/uap 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (122) hide show

package/dist/benchmarks/benchmark.d.ts +8 -8
package/dist/benchmarks/improved-benchmark.d.ts.map +1 -1
package/dist/benchmarks/improved-benchmark.js +10 -23
package/dist/benchmarks/improved-benchmark.js.map +1 -1
package/dist/benchmarks/model-integration.d.ts.map +1 -1
package/dist/benchmarks/model-integration.js +22 -23
package/dist/benchmarks/model-integration.js.map +1 -1
package/dist/bin/policy.js +67 -11
package/dist/bin/policy.js.map +1 -1
package/dist/cli/dashboard.d.ts +2 -1
package/dist/cli/dashboard.d.ts.map +1 -1
package/dist/cli/dashboard.js +399 -10
package/dist/cli/dashboard.js.map +1 -1
package/dist/cli/model.js +12 -12
package/dist/cli/model.js.map +1 -1
package/dist/cli/setup-wizard.d.ts.map +1 -1
package/dist/cli/setup-wizard.js +24 -0
package/dist/cli/setup-wizard.js.map +1 -1
package/dist/coordination/deploy-batcher.d.ts +1 -0
package/dist/coordination/deploy-batcher.d.ts.map +1 -1
package/dist/coordination/deploy-batcher.js +24 -25
package/dist/coordination/deploy-batcher.js.map +1 -1
package/dist/dashboard/data-service.d.ts +94 -0
package/dist/dashboard/data-service.d.ts.map +1 -0
package/dist/dashboard/data-service.js +286 -0
package/dist/dashboard/data-service.js.map +1 -0
package/dist/dashboard/index.d.ts +5 -0
package/dist/dashboard/index.d.ts.map +1 -0
package/dist/dashboard/index.js +3 -0
package/dist/dashboard/index.js.map +1 -0
package/dist/dashboard/server.d.ts +15 -0
package/dist/dashboard/server.d.ts.map +1 -0
package/dist/dashboard/server.js +158 -0
package/dist/dashboard/server.js.map +1 -0
package/dist/mcp-router/session-stats.d.ts +9 -0
package/dist/mcp-router/session-stats.d.ts.map +1 -1
package/dist/mcp-router/session-stats.js +19 -3
package/dist/mcp-router/session-stats.js.map +1 -1
package/dist/memory/adaptive-context.d.ts +1 -0
package/dist/memory/adaptive-context.d.ts.map +1 -1
package/dist/memory/adaptive-context.js +4 -0
package/dist/memory/adaptive-context.js.map +1 -1
package/dist/memory/embeddings.d.ts.map +1 -1
package/dist/memory/embeddings.js +4 -4
package/dist/memory/embeddings.js.map +1 -1
package/dist/memory/model-router.d.ts +1 -1
package/dist/memory/model-router.d.ts.map +1 -1
package/dist/memory/model-router.js +52 -1
package/dist/memory/model-router.js.map +1 -1
package/dist/memory/predictive-memory.d.ts.map +1 -1
package/dist/memory/predictive-memory.js +4 -3
package/dist/memory/predictive-memory.js.map +1 -1
package/dist/models/analytics.d.ts +93 -0
package/dist/models/analytics.d.ts.map +1 -0
package/dist/models/analytics.js +205 -0
package/dist/models/analytics.js.map +1 -0
package/dist/models/execution-profiles.d.ts +6 -0
package/dist/models/execution-profiles.d.ts.map +1 -1
package/dist/models/execution-profiles.js +15 -0
package/dist/models/execution-profiles.js.map +1 -1
package/dist/models/executor.d.ts.map +1 -1
package/dist/models/executor.js +51 -17
package/dist/models/executor.js.map +1 -1
package/dist/models/index.d.ts +2 -0
package/dist/models/index.d.ts.map +1 -1
package/dist/models/index.js +2 -0
package/dist/models/index.js.map +1 -1
package/dist/models/router.d.ts +8 -0
package/dist/models/router.d.ts.map +1 -1
package/dist/models/router.js +39 -18
package/dist/models/router.js.map +1 -1
package/dist/models/types.d.ts +26 -0
package/dist/models/types.d.ts.map +1 -1
package/dist/models/types.js +39 -0
package/dist/models/types.js.map +1 -1
package/dist/models/unified-router.d.ts.map +1 -1
package/dist/models/unified-router.js +4 -0
package/dist/models/unified-router.js.map +1 -1
package/dist/policies/database-manager.d.ts +1 -0
package/dist/policies/database-manager.d.ts.map +1 -1
package/dist/policies/database-manager.js +14 -2
package/dist/policies/database-manager.js.map +1 -1
package/dist/policies/policy-gate.d.ts +2 -2
package/dist/policies/policy-gate.d.ts.map +1 -1
package/dist/policies/policy-gate.js +6 -4
package/dist/policies/policy-gate.js.map +1 -1
package/dist/policies/policy-memory.d.ts +3 -0
package/dist/policies/policy-memory.d.ts.map +1 -1
package/dist/policies/policy-memory.js +11 -0
package/dist/policies/policy-memory.js.map +1 -1
package/dist/policies/schemas/policy.d.ts +3 -0
package/dist/policies/schemas/policy.d.ts.map +1 -1
package/dist/policies/schemas/policy.js +1 -0
package/dist/policies/schemas/policy.js.map +1 -1
package/dist/tasks/coordination.d.ts +18 -0
package/dist/tasks/coordination.d.ts.map +1 -1
package/dist/tasks/coordination.js +59 -1
package/dist/tasks/coordination.js.map +1 -1
package/dist/tasks/event-bus.d.ts +91 -0
package/dist/tasks/event-bus.d.ts.map +1 -0
package/dist/tasks/event-bus.js +123 -0
package/dist/tasks/event-bus.js.map +1 -0
package/dist/tasks/service.d.ts +5 -0
package/dist/tasks/service.d.ts.map +1 -1
package/dist/tasks/service.js +59 -0
package/dist/tasks/service.js.map +1 -1
package/dist/telemetry/session-telemetry.d.ts.map +1 -1
package/dist/telemetry/session-telemetry.js +3 -0
package/dist/telemetry/session-telemetry.js.map +1 -1
package/dist/utils/concurrency-pool.d.ts +51 -0
package/dist/utils/concurrency-pool.d.ts.map +1 -0
package/dist/utils/concurrency-pool.js +80 -0
package/dist/utils/concurrency-pool.js.map +1 -0
package/dist/utils/system-resources.d.ts +47 -0
package/dist/utils/system-resources.d.ts.map +1 -0
package/dist/utils/system-resources.js +92 -0
package/dist/utils/system-resources.js.map +1 -0
package/docs/BENCHMARK_GAPS_AND_PLAN.md +146 -0
package/docs/PARALLELISM_GAPS_AND_OPTIONS.md +422 -0
package/docs/UAP_OPTIMIZATION_PLAN.md +638 -0
package/package.json +4 -1
package/templates/hooks/session-start.sh +8 -1

package/dist/utils/system-resources.js ADDED Viewed

@@ -0,0 +1,92 @@
+/**
+ * System Resource Detection
+ *
+ * Provides vCPU, VRAM, and memory detection with caching.
+ * Used to auto-tune parallelism across the UAP system.
+ *
+ * Env override: UAP_MAX_PARALLEL always takes precedence.
+ * Precedence: env var → config → auto-detect → hardcoded default
+ */
+import { cpus, totalmem } from 'os';
+import { execSync } from 'child_process';
+let _cached = null;
+/**
+ * Detect system resources (cached after first call).
+ */
+export function detectSystemResources() {
+    if (_cached)
+        return _cached;
+    const vCPUs = cpus().length;
+    const memoryGB = Math.round(totalmem() / 1024 ** 3);
+    let vramGB = 0;
+    try {
+        // NVIDIA GPU
+        const out = execSync('nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits', {
+            encoding: 'utf-8',
+            timeout: 3000,
+            stdio: ['pipe', 'pipe', 'pipe'],
+        });
+        vramGB = Math.round(parseInt(out.trim().split('\n')[0], 10) / 1024);
+    }
+    catch {
+        try {
+            // macOS unified memory (report as VRAM since GPU shares it)
+            const out = execSync('sysctl -n hw.memsize', {
+                encoding: 'utf-8',
+                timeout: 3000,
+                stdio: ['pipe', 'pipe', 'pipe'],
+            });
+            vramGB = Math.min(Math.round(parseInt(out.trim(), 10) / 1024 ** 3), 48);
+        }
+        catch {
+            // No GPU detected
+        }
+    }
+    _cached = { vCPUs, vramGB, memoryGB };
+    return _cached;
+}
+/**
+ * Compute safe parallelism ceiling.
+ *
+ * @param mode - 'cpu' for compute-bound work (reserves cores for OS + inference),
+ *               'io' for IO-bound work like API calls (higher concurrency safe)
+ * @returns Maximum number of concurrent operations
+ *
+ * Precedence:
+ *   1. UAP_MAX_PARALLEL env var (always wins)
+ *   2. Auto-detected from os.cpus()
+ *   3. Hardcoded fallback (3)
+ */
+export function getMaxParallel(mode = 'io') {
+    const envOverride = process.env.UAP_MAX_PARALLEL;
+    if (envOverride) {
+        const parsed = parseInt(envOverride, 10);
+        if (!isNaN(parsed) && parsed > 0)
+            return parsed;
+    }
+    const { vCPUs } = detectSystemResources();
+    if (mode === 'cpu') {
+        // Reserve 2 cores for OS + inference server
+        return Math.max(1, vCPUs - 2);
+    }
+    // IO-bound: safe to use more concurrency, cap at 8 to avoid
+    // overwhelming local inference endpoints
+    return Math.max(1, Math.min(vCPUs, 8));
+}
+/**
+ * Check if parallelism is globally enabled.
+ *
+ * Precedence:
+ *   1. UAP_PARALLEL env var ('false' disables)
+ *   2. Default: true
+ */
+export function isParallelEnabled() {
+    return process.env.UAP_PARALLEL !== 'false';
+}
+/**
+ * Reset cached resources (for testing).
+ */
+export function resetResourceCache() {
+    _cached = null;
+}
+//# sourceMappingURL=system-resources.js.map

package/dist/utils/system-resources.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"system-resources.js","sourceRoot":"","sources":["../../src/utils/system-resources.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,MAAM,IAAI,CAAC;AACpC,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAWzC,IAAI,OAAO,GAA2B,IAAI,CAAC;AAE3C;;GAEG;AACH,MAAM,UAAU,qBAAqB;IACnC,IAAI,OAAO;QAAE,OAAO,OAAO,CAAC;IAE5B,MAAM,KAAK,GAAG,IAAI,EAAE,CAAC,MAAM,CAAC;IAC5B,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,EAAE,GAAG,IAAI,IAAI,CAAC,CAAC,CAAC;IAEpD,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,IAAI,CAAC;QACH,aAAa;QACb,MAAM,GAAG,GAAG,QAAQ,CAAC,mEAAmE,EAAE;YACxF,QAAQ,EAAE,OAAO;YACjB,OAAO,EAAE,IAAI;YACb,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;SAChC,CAAC,CAAC;QACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,IAAI,CAAC,CAAC;IACtE,CAAC;IAAC,MAAM,CAAC;QACP,IAAI,CAAC;YACH,4DAA4D;YAC5D,MAAM,GAAG,GAAG,QAAQ,CAAC,sBAAsB,EAAE;gBAC3C,QAAQ,EAAE,OAAO;gBACjB,OAAO,EAAE,IAAI;gBACb,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;aAChC,CAAC,CAAC;YACH,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,EAAE,EAAE,EAAE,CAAC,GAAG,IAAI,IAAI,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAC1E,CAAC;QAAC,MAAM,CAAC;YACP,kBAAkB;QACpB,CAAC;IACH,CAAC;IAED,OAAO,GAAG,EAAE,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC;IACtC,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;;;;;;;;;;GAWG;AACH,MAAM,UAAU,cAAc,CAAC,OAAqB,IAAI;IACtD,MAAM,WAAW,GAAG,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC;IACjD,IAAI,WAAW,EAAE,CAAC;QAChB,MAAM,MAAM,GAAG,QAAQ,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC;QACzC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,MAAM,GAAG,CAAC;YAAE,OAAO,MAAM,CAAC;IAClD,CAAC;IAED,MAAM,EAAE,KAAK,EAAE,GAAG,qBAAqB,EAAE,CAAC;IAE1C,IAAI,IAAI,KAAK,KAAK,EAAE,CAAC;QACnB,4CAA4C;QAC5C,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC;IAChC,CAAC;IAED,4DAA4D;IAC5D,yCAAyC;IACzC,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC;AACzC,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,iBAAiB;IAC/B,OAAO,OAAO,CAAC,GAAG,CAAC,YAAY,KAAK,OAAO,CAAC;AAC9C,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,kBAAkB;IAChC,OAAO,GAAG,IAAI,CAAC;AACjB,CAAC"}

package/docs/BENCHMARK_GAPS_AND_PLAN.md ADDED Viewed

@@ -0,0 +1,146 @@
+# UAP Benchmark: Actual Gaps & Execution Plan
+**Generated:** 2026-03-17
+**Benchmark:** Harbor Terminal-Bench 2.0 (89 tasks)
+**Primary Target:** Qwen3.5 35B A3B (IQ4_XS)
+---
+## What Already Exists (DO NOT REBUILD)
+| Component                        | File                                                       | Status                                                                                         |
+| -------------------------------- | ---------------------------------------------------------- | ---------------------------------------------------------------------------------------------- |
+| Baseline benchmark (no UAP)      | `scripts/benchmarks/benchmark-qwen35-baseline-no-uap.tsx`  | 403 lines, 94 tasks                                                                            |
+| UAP benchmark (full integration) | `scripts/benchmarks/benchmark-qwen35-uap-3.0-opencode.tsx` | 812 lines, 89 tasks                                                                            |
+| Harbor quick runner (UAP)        | `scripts/benchmarks/run-tbench-qwen35-quick.sh`            | 459 lines, hybrid-adaptive                                                                     |
+| Harbor baseline+UAP runner       | `scripts/benchmarks/run-harbor-qwen35-benchmark.sh`        | Runs both configs sequentially                                                                 |
+| Harbor YAML configs              | `benchmarks/harbor-configs/qwen35_*.yaml`                  | Baseline + UAP pair                                                                            |
+| Comparison report generator      | `scripts/benchmarks/generate-comparison-report.ts`         | 461 lines, p-value tests                                                                       |
+| Full benchmark harness           | `scripts/benchmarks/run-full-benchmark.sh`                 | 413 lines, multi-model A/B                                                                     |
+| Multi-turn agent loop            | `src/benchmarks/multi-turn-loop.ts`                        | 213 lines, `executeWithRetry()`                                                                |
+| Multi-turn + verification        | `src/benchmarks/multi-turn-agent.ts`                       | Wired to dynamic retrieval                                                                     |
+| Improved benchmark runner        | `src/benchmarks/improved-benchmark.ts`                     | 794 lines, wires multi-turn + dynamic retrieval + task classification + hierarchical prompting |
+| Dynamic memory retrieval         | `src/memory/dynamic-retrieval.ts`                          | 1168 lines, 6 memory sources, adaptive depth                                                   |
+| Task classifier                  | `src/memory/task-classifier.ts`                            | 426 lines, 8 categories, ambiguity detection                                                   |
+| Qdrant embeddings                | `src/memory/embeddings.ts`                                 | Fixed, 5 backends with fallback                                                                |
+| Tool call retry (Qwen)           | `tools/agents/scripts/qwen_tool_call_wrapper.py`           | 686 lines, 6 retry strategies                                                                  |
+| Harbor UAP agent                 | `tools/uap_harbor/uap_agent.py`                            | 379 lines, classified preamble                                                                 |
+| Qwen3.5 model presets            | `src/models/types.ts:136-151`                              | `qwen35-a3b` and `qwen35` defined                                                              |
+| Model router                     | `src/models/router.ts`                                     | Qwen3.5 as default executor                                                                    |
+---
+## Actual Gaps (3 items)
+### Gap 1: `improved-benchmark.ts` MODELS array missing Qwen3.5
+`src/benchmarks/improved-benchmark.ts:95-99` has the fully wired runner (multi-turn + dynamic retrieval + task classification + hierarchical prompting + verification) but its MODELS array only contains:
+```typescript
+const MODELS: ModelConfig[] = [
+  { id: 'opus-4.5', name: 'Claude Opus 4.5', apiModel: 'claude-opus-4-5-20251101' },
+  { id: 'glm-4.7', name: 'GLM 4.7', apiModel: 'glm-4.7' },
+  { id: 'gpt-5.2-codex', name: 'GPT 5.2 Codex', apiModel: 'gpt-5.2-codex' },
+];
+// Qwen3.5 MISSING
+```
+**Fix:** Add Qwen3.5 to the MODELS array. The preset already exists in `src/models/types.ts:136-151`.
+### Gap 2: `model-integration.ts` MODELS array missing Qwen3.5 + still single-shot
+`src/benchmarks/model-integration.ts:336-361` is the older benchmark runner. It:
+- Has no Qwen3.5 in its MODELS array
+- Uses single-shot execution (no multi-turn, no dynamic retrieval)
+**Fix:** Add Qwen3.5 to its MODELS array. The multi-turn wiring gap is already solved by `improved-benchmark.ts` -- this file can remain as the "legacy single-shot" runner for comparison purposes.
+### Gap 3: No benchmark results exist
+`benchmark-results/` directory does not exist. None of the scripts have been executed.
+**Fix:** Run the existing scripts.
+---
+## Execution Plan
+### Step 1: Add Qwen3.5 to improved-benchmark.ts MODELS array
+**File:** `src/benchmarks/improved-benchmark.ts:95-99`
+```typescript
+const MODELS: ModelConfig[] = [
+  { id: 'opus-4.5', name: 'Claude Opus 4.5', apiModel: 'claude-opus-4-5-20251101' },
+  { id: 'glm-4.7', name: 'GLM 4.7', apiModel: 'glm-4.7' },
+  { id: 'gpt-5.2-codex', name: 'GPT 5.2 Codex', apiModel: 'gpt-5.2-codex' },
+  { id: 'qwen35-a3b', name: 'Qwen 3.5 35B A3B', apiModel: 'qwen35-a3b-iq4xs' },
+];
+```
+### Step 2: Add Qwen3.5 to model-integration.ts MODELS array
+**File:** `src/benchmarks/model-integration.ts:336-361`
+```typescript
+{
+  id: 'qwen35-a3b',
+  name: 'Qwen 3.5 35B A3B',
+  provider: 'local',
+  apiModel: 'qwen35-a3b-iq4xs',
+},
+```
+### Step 3: Run existing benchmarks
+```bash
+# Option A: Quick Qwen3.5 baseline + UAP via Harbor (recommended first)
+./scripts/benchmarks/run-harbor-qwen35-benchmark.sh
+# Option B: Direct API baseline (no Harbor containers)
+npx tsx scripts/benchmarks/benchmark-qwen35-baseline-no-uap.tsx
+# Option C: Direct API UAP-enhanced
+npx tsx scripts/benchmarks/benchmark-qwen35-uap-3.0-opencode.tsx
+# Option D: Improved benchmark with multi-turn + dynamic retrieval (all models)
+npx tsx src/benchmarks/improved-benchmark.ts
+# Option E: Full Harbor harness (all models, baseline vs UAP)
+./scripts/benchmarks/run-full-benchmark.sh --model qwen35-a3b-iq4xs
+```
+### Step 4: Generate comparison report
+```bash
+npx tsx scripts/benchmarks/generate-comparison-report.ts \
+  --baseline benchmark-results/qwen35_baseline_no_uap/ \
+  --uap benchmark-results/qwen35_uap_3.0_opencode/
+```
+---
+## What This Plan Does NOT Do (because it already exists)
+- Build a multi-turn agent loop (exists: `src/benchmarks/multi-turn-loop.ts`)
+- Build dynamic memory retrieval (exists: `src/memory/dynamic-retrieval.ts`)
+- Build task classification (exists: `src/memory/task-classifier.ts`)
+- Fix Qdrant embeddings (already fixed: `src/memory/embeddings.ts`)
+- Build Harbor configs (exist: `benchmarks/harbor-configs/qwen35_*.yaml`)
+- Build comparison report generator (exists: `scripts/benchmarks/generate-comparison-report.ts`)
+- Wire multi-turn into benchmark runner (exists: `src/benchmarks/improved-benchmark.ts`)
+- Build tool call retry for Qwen (exists: `tools/agents/scripts/qwen_tool_call_wrapper.py`)
+- Create execution scripts (exist: 6+ scripts in `scripts/benchmarks/`)
+---
+## Estimated Effort
+| Step                                 | Effort         | Type                                   |
+| ------------------------------------ | -------------- | -------------------------------------- |
+| Add Qwen3.5 to improved-benchmark.ts | 2 minutes      | Code change (1 line)                   |
+| Add Qwen3.5 to model-integration.ts  | 2 minutes      | Code change (5 lines)                  |
+| Run benchmarks                       | 2-8 hours      | Execution (depends on model speed)     |
+| Review results                       | 30 minutes     | Analysis                               |
+| **Total**                            | **~3-9 hours** | Mostly waiting for benchmark execution |

package/docs/PARALLELISM_GAPS_AND_OPTIONS.md ADDED Viewed

@@ -0,0 +1,422 @@
+# Parallelism & Dependency Notification: Gaps and Options
+**Generated:** 2026-03-17
+---
+## Current State
+### What exists (DO NOT REBUILD)
+| Component                    | File                                                                            | What it does                                                                                                   |
+| ---------------------------- | ------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------- |
+| Executor parallel batching   | `src/models/executor.ts:54-162`                                                 | `Promise.all` batches capped by `maxParallel` (default 3), topological level grouping                          |
+| Deploy batcher parallelism   | `src/coordination/deploy-batcher.ts:34-591`                                     | Categorizes actions as parallel-safe vs sequential, `Promise.allSettled` with `maxParallelActions` (default 5) |
+| Benchmark model parallelism  | `src/benchmarks/model-integration.ts:911-944` + `improved-benchmark.ts:565-603` | Queue-based concurrency pool, `parallelModels` param (default 1)                                               |
+| Planner topological sort     | `src/models/planner.ts:379-413`                                                 | Returns `string[][]` dependency levels for parallel execution                                                  |
+| Task dependency DAG          | `src/tasks/database.ts:71-84` + `service.ts:438-538`                            | `task_dependencies` table, add/remove deps, cycle detection (BFS), blocked/ready queries                       |
+| Task coordination scoring    | `src/tasks/coordination.ts:260-298`                                             | Scores tasks: +5 no deps, +3 per task it unblocks                                                              |
+| Pub/sub messaging            | `src/coordination/service.ts:608-642`                                           | SQLite-backed broadcast/send/receive on channels                                                               |
+| Config schema (unused)       | `src/types/config.ts:223-229`                                                   | `maxParallelDroids` (4), `maxParallelWorkflows` (3) -- declared but never consumed                             |
+| VRAM detection               | `src/bin/llama-server-optimize.ts:301-331`                                      | nvidia-smi / sysctl, not reusable (private to llama-server)                                                    |
+| CPU detection                | `src/bin/llama-server-optimize.ts:614`                                          | `os.cpus().length - 2`, not reusable (inline in llama-server)                                                  |
+| Harbor N_CONCURRENT          | Shell scripts                                                                   | `N_CONCURRENT` env var (default 4) for Harbor `-n` flag                                                        |
+| Python parallel tool calls   | `tools/agents/scripts/qwen_tool_call_wrapper.py`                                | `parallel_tool_calls: True` in every API request                                                               |
+| Unbounded memory parallelism | `src/memory/predictive-memory.ts:94-104` + `embeddings.ts:228-230`              | `Promise.all` with no concurrency limit                                                                        |
+### What's broken or missing
+| Gap                                                | Location                                                       | Impact                                                                      |
+| -------------------------------------------------- | -------------------------------------------------------------- | --------------------------------------------------------------------------- |
+| **No env var override for TypeScript parallelism** | `executor.ts`, `improved-benchmark.ts`, `model-integration.ts` | Cannot tune parallelism without code changes                                |
+| **No vCPU/resource detection shared utility**      | `llama-server-optimize.ts` has it but private                  | Every module hardcodes defaults                                             |
+| **Config schema not wired**                        | `config.ts:223-229`                                            | `maxParallelDroids`/`maxParallelWorkflows` declared but never read          |
+| **No dependency completion notification**          | `service.ts:287-302`                                           | `close()` marks task done but does NOT notify blocked dependents            |
+| **No auto-unblock on completion**                  | `coordination.ts:155-200`                                      | `release()` broadcasts generic message, doesn't check newly-unblocked tasks |
+| **Unbounded memory concurrency**                   | `predictive-memory.ts`, `embeddings.ts`                        | `Promise.all` on all items, can overwhelm local inference                   |
+| **Benchmark parallelism defaults to 1**            | `improved-benchmark.ts:632`, `model-integration.ts`            | Sequential by default, no auto-detection                                    |
+---
+## Option A: Minimal -- Env Overrides + Wire Config (Recommended)
+**Effort:** ~2 hours | **Risk:** Low | **Impact:** High
+No new abstractions. Add env var reads to existing code, wire the existing config schema, and add notification to `close()`.
+### A1. Shared resource detection utility
+**New file:** `src/utils/system-resources.ts`
+Extract and generalize the detection logic already in `llama-server-optimize.ts`:
+```typescript
+import { cpus } from 'os';
+import { execSync } from 'child_process';
+export interface SystemResources {
+  vCPUs: number;
+  vramGB: number;
+  memoryGB: number;
+}
+let _cached: SystemResources | null = null;
+export function detectSystemResources(): SystemResources {
+  if (_cached) return _cached;
+  const vCPUs = cpus().length;
+  let vramGB = 0;
+  try {
+    const out = execSync('nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits', {
+      encoding: 'utf-8',
+      timeout: 3000,
+    });
+    vramGB = Math.round(parseInt(out.trim().split('\n')[0]) / 1024);
+  } catch {
+    try {
+      const out = execSync('sysctl -n hw.memsize', { encoding: 'utf-8' });
+      vramGB = Math.min(Math.round(parseInt(out.trim()) / 1024 ** 3), 48);
+    } catch {
+      vramGB = 0;
+    }
+  }
+  const memoryGB = Math.round(require('os').totalmem() / 1024 ** 3);
+  _cached = { vCPUs, vramGB, memoryGB };
+  return _cached;
+}
+/**
+ * Compute safe parallelism ceiling.
+ * For CPU-bound: vCPUs - 2 (reserve for OS + inference).
+ * For IO-bound (API calls): min(vCPUs, 8).
+ * Env override: UAP_MAX_PARALLEL always wins.
+ */
+export function getMaxParallel(mode: 'cpu' | 'io' = 'io'): number {
+  const envOverride = process.env.UAP_MAX_PARALLEL;
+  if (envOverride) return Math.max(1, parseInt(envOverride, 10) || 1);
+  const { vCPUs } = detectSystemResources();
+  if (mode === 'cpu') {
+    return Math.max(1, vCPUs - 2);
+  }
+  return Math.min(vCPUs, 8);
+}
+```
+### A2. Env var overrides in existing code
+**File: `src/models/executor.ts:58-65`** -- read env vars into defaults:
+```typescript
+const DEFAULT_OPTIONS: ExecutorOptions = {
+  maxRetries: 2,
+  retryDelayMs: 1000,
+  stepTimeout: 120000,
+  enableFallback: true,
+  parallelExecution: process.env.UAP_PARALLEL !== 'false',
+  maxParallel: parseInt(process.env.UAP_MAX_PARALLEL || '', 10) || 3,
+};
+```
+**File: `src/benchmarks/improved-benchmark.ts:632`** -- auto-detect:
+```typescript
+const parallelModels =
+  (options.parallelModels ?? parseInt(process.env.UAP_BENCHMARK_PARALLEL || '', 10)) ||
+  getMaxParallel('io');
+```
+**File: `src/benchmarks/model-integration.ts`** -- same pattern.
+**File: `src/coordination/deploy-batcher.ts:78-79`** -- same pattern:
+```typescript
+parallelExecution: process.env.UAP_PARALLEL !== 'false',
+maxParallelActions: parseInt(process.env.UAP_MAX_PARALLEL || '', 10) || 5,
+```
+### A3. Wire existing config schema
+**File: `src/types/config.ts:223-229`** -- already declares `maxParallelDroids` and `maxParallelWorkflows`.
+Wire into executor by reading config at startup:
+```typescript
+// In executor.ts constructor or factory
+const uapConfig = loadUAPConfig(); // existing config loader
+if (uapConfig?.parallelExecution) {
+  this.options.parallelExecution = uapConfig.parallelExecution.enabled;
+  this.options.maxParallel = uapConfig.parallelExecution.maxParallelDroids;
+}
+```
+### A4. Dependency completion notification
+**File: `src/tasks/service.ts:287-302`** -- add unblock notification to `close()`:
+```typescript
+close(id: string, reason?: string): Task | null {
+  const task = this.get(id);
+  if (!task) return null;
+  const now = new Date().toISOString();
+  const stmt = this.db.prepare(`
+    UPDATE tasks SET status = 'done', closed_at = ?, closed_reason = ?, updated_at = ?
+    WHERE id = ?
+  `);
+  stmt.run(now, reason || null, now, id);
+  this.recordHistory(id, 'status', task.status, 'done');
+  this.recordActivity(id, 'closed', reason || 'Task completed');
+  // --- NEW: Notify newly-unblocked dependents ---
+  this.notifyUnblockedDependents(id);
+  return this.get(id);
+}
+private notifyUnblockedDependents(completedTaskId: string): void {
+  // Find tasks that were blocked by this task
+  const dependents = this.db.prepare(`
+    SELECT from_task FROM task_dependencies
+    WHERE to_task = ? AND dep_type = 'blocks'
+  `).all(completedTaskId) as Array<{ from_task: string }>;
+  for (const dep of dependents) {
+    const dependent = this.getWithRelations(dep.from_task);
+    if (dependent && dependent.isReady) {
+      // Task is now unblocked -- record activity
+      this.recordActivity(dep.from_task, 'unblocked',
+        `Unblocked: dependency "${completedTaskId}" completed`);
+      // If task was in 'blocked' status, move to 'open'
+      const raw = this.get(dep.from_task);
+      if (raw && raw.status === 'blocked') {
+        this.db.prepare(`UPDATE tasks SET status = 'open', updated_at = ? WHERE id = ?`)
+          .run(new Date().toISOString(), dep.from_task);
+        this.recordHistory(dep.from_task, 'status', 'blocked', 'open');
+      }
+    }
+  }
+}
+```
+**File: `src/tasks/coordination.ts:155-200`** -- enhance `release()` to return unblocked tasks:
+```typescript
+async release(taskId: string, reason?: string): Promise<ReleaseResult | null> {
+  // ... existing code ...
+  // Broadcast task completion
+  this.coordService.broadcast(this.agentId, 'coordination', {
+    action: 'task_completed',
+    resource: taskId,
+    data: {
+      title: task.title,
+      reason,
+    },
+  });
+  // --- NEW: Find and broadcast newly-unblocked tasks ---
+  const nowReady = this.taskService.ready().filter(t =>
+    t.blockedBy.length === 0 // was previously blocked, now free
+  );
+  if (nowReady.length > 0) {
+    this.coordService.broadcast(this.agentId, 'coordination', {
+      action: 'tasks_unblocked',
+      resource: taskId,
+      data: {
+        unblockedTasks: nowReady.map(t => ({ id: t.id, title: t.title })),
+        count: nowReady.length,
+      },
+    });
+  }
+  // ... rest of existing code ...
+  return {
+    task: closedTask,
+    completedAnnouncements: 1,
+    unblockedTasks: nowReady.map(t => t.id),  // NEW field
+  };
+}
+```
+### A5. Cap unbounded memory concurrency
+**File: `src/memory/predictive-memory.ts:94-104`**:
+```typescript
+// Before (unbounded):
+const results = await Promise.all(predictions.map((p) => this.query(p)));
+// After (capped):
+import { getMaxParallel } from '../utils/system-resources.js';
+const maxConcurrent = getMaxParallel('io');
+const results: MemoryEntry[][] = [];
+for (let i = 0; i < predictions.length; i += maxConcurrent) {
+  const batch = predictions.slice(i, i + maxConcurrent);
+  const batchResults = await Promise.all(batch.map((p) => this.query(p)));
+  results.push(...batchResults);
+}
+```
+### Env Variable Summary
+| Variable                 | Default                  | Scope                              | Override                           |
+| ------------------------ | ------------------------ | ---------------------------------- | ---------------------------------- |
+| `UAP_PARALLEL`           | `true`                   | Global on/off                      | `false` to disable all parallelism |
+| `UAP_MAX_PARALLEL`       | auto-detected from vCPUs | Global concurrency cap             | Any integer                        |
+| `UAP_BENCHMARK_PARALLEL` | auto-detected            | Benchmark model concurrency        | Any integer                        |
+| `N_CONCURRENT`           | `4`                      | Harbor task concurrency (existing) | Any integer                        |
+### Precedence
+```
+UAP_MAX_PARALLEL env var
+  → uap.config parallelExecution.maxParallelDroids
+    → auto-detected from os.cpus().length
+      → hardcoded default (3)
+```
+---
+## Option B: Concurrency Pool Utility
+**Effort:** ~4 hours | **Risk:** Low | **Impact:** Medium
+Everything in Option A, plus a shared concurrency pool to replace the duplicated `Promise.all` batching pattern across 5+ files.
+### B1. Shared concurrency pool
+**New file:** `src/utils/concurrency-pool.ts`
+```typescript
+import { getMaxParallel } from './system-resources.js';
+export async function concurrentMap<T, R>(
+  items: T[],
+  fn: (item: T, index: number) => Promise<R>,
+  options?: { maxConcurrent?: number; mode?: 'cpu' | 'io' }
+): Promise<R[]> {
+  const max = options?.maxConcurrent ?? getMaxParallel(options?.mode ?? 'io');
+  const results: R[] = new Array(items.length);
+  let nextIndex = 0;
+  const worker = async () => {
+    while (nextIndex < items.length) {
+      const i = nextIndex++;
+      results[i] = await fn(items[i], i);
+    }
+  };
+  const workers = Array.from({ length: Math.min(max, items.length) }, () => worker());
+  await Promise.all(workers);
+  return results;
+}
+```
+### B2. Replace duplicated patterns
+| File                            | Current                                | Replace with                                                |
+| ------------------------------- | -------------------------------------- | ----------------------------------------------------------- |
+| `executor.ts:149-153`           | Manual `Promise.all` batch loop        | `concurrentMap(batch, taskId => this.executeSubtask(...))`  |
+| `deploy-batcher.ts:582-591`     | Manual `Promise.allSettled` chunk loop | `concurrentMap(actions, action => this.executeAction(...))` |
+| `improved-benchmark.ts:565-603` | Queue-based pool                       | `concurrentMap(models, model => runBenchmarkForModel(...))` |
+| `model-integration.ts:911-944`  | Queue-based pool                       | `concurrentMap(models, model => runBenchmarkForModel(...))` |
+| `predictive-memory.ts:94-104`   | Unbounded `Promise.all`                | `concurrentMap(predictions, p => this.query(p))`            |
+| `embeddings.ts:228-230`         | Unbounded `Promise.all`                | `concurrentMap(texts, t => this.embed(t))`                  |
+---
+## Option C: Full Event-Driven Dependency Resolution
+**Effort:** ~8 hours | **Risk:** Medium | **Impact:** High
+Everything in Options A+B, plus an event-driven system where task completion automatically triggers dependent task execution.
+### C1. TaskEventBus
+**New file:** `src/tasks/event-bus.ts`
+```typescript
+type TaskEvent =
+  | { type: 'task_completed'; taskId: string }
+  | { type: 'task_unblocked'; taskId: string; unblockedBy: string }
+  | { type: 'task_failed'; taskId: string; error: string };
+type TaskEventHandler = (event: TaskEvent) => void | Promise<void>;
+export class TaskEventBus {
+  private handlers: Map<TaskEvent['type'], TaskEventHandler[]> = new Map();
+  on(type: TaskEvent['type'], handler: TaskEventHandler): void {
+    const list = this.handlers.get(type) || [];
+    list.push(handler);
+    this.handlers.set(type, list);
+  }
+  async emit(event: TaskEvent): Promise<void> {
+    const handlers = this.handlers.get(event.type) || [];
+    await Promise.all(handlers.map((h) => h(event)));
+  }
+}
+```
+### C2. Auto-execute unblocked tasks
+Wire into `TaskCoordinator`:
+```typescript
+// In coordinator constructor
+this.eventBus.on('task_unblocked', async (event) => {
+  if (event.type !== 'task_unblocked') return;
+  const task = this.taskService.getWithRelations(event.taskId);
+  if (task && task.isReady && this.autoExecuteEnabled) {
+    await this.claim(event.taskId);
+  }
+});
+```
+### C3. Executor emits completion events
+Wire into `TaskExecutor.executePlan()`:
+```typescript
+// After subtask completes successfully
+this.eventBus.emit({ type: 'task_completed', taskId: subtaskId });
+```
+---
+## Recommendation
+**Start with Option A** (env overrides + wire config + dependency notification). It addresses every gap with minimal code changes to existing files and no new abstractions. The concurrency pool (Option B) is a nice cleanup but not blocking. The event bus (Option C) is only needed if you want auto-execution of unblocked tasks.
+| Option              | Effort   | Files changed | New files                                        | Risk   |
+| ------------------- | -------- | ------------- | ------------------------------------------------ | ------ |
+| **A (Recommended)** | ~2 hours | 6 existing    | 1 (`system-resources.ts`)                        | Low    |
+| B                   | ~4 hours | 8 existing    | 2 (`system-resources.ts`, `concurrency-pool.ts`) | Low    |
+| C                   | ~8 hours | 10 existing   | 3 (+ `event-bus.ts`)                             | Medium |
+---
+## Files to Change (Option A)
+| File                                       | Change                                                   | Lines affected |
+| ------------------------------------------ | -------------------------------------------------------- | -------------- |
+| `src/utils/system-resources.ts`            | **NEW** -- vCPU/VRAM detection + `getMaxParallel()`      | ~45 lines      |
+| `src/models/executor.ts:58-65`             | Read `UAP_PARALLEL`, `UAP_MAX_PARALLEL` env vars         | 2 lines        |
+| `src/benchmarks/improved-benchmark.ts:632` | Read `UAP_BENCHMARK_PARALLEL`, fallback to auto-detect   | 3 lines        |
+| `src/benchmarks/model-integration.ts`      | Same as above                                            | 3 lines        |
+| `src/coordination/deploy-batcher.ts:78-79` | Read env vars                                            | 2 lines        |
+| `src/tasks/service.ts:287-302`             | Add `notifyUnblockedDependents()` after `close()`        | ~25 lines      |
+| `src/tasks/coordination.ts:155-200`        | Broadcast `tasks_unblocked` event, return unblocked list | ~15 lines      |
+| `src/memory/predictive-memory.ts:94-104`   | Cap concurrency with `getMaxParallel()`                  | 5 lines        |