npm - capman - Versions diffs - 0.6.0 → 0.6.2 - Mend

capman 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

package/CODEBASE.md +6 -5
package/dist/cjs/cache.d.ts +9 -0
package/dist/cjs/cache.d.ts.map +1 -1
package/dist/cjs/cache.js +37 -7
package/dist/cjs/cache.js.map +1 -1
package/dist/cjs/concurrent.d.ts +53 -0
package/dist/cjs/concurrent.d.ts.map +1 -0
package/dist/cjs/concurrent.js +71 -0
package/dist/cjs/concurrent.js.map +1 -0
package/dist/cjs/engine.d.ts +92 -7
package/dist/cjs/engine.d.ts.map +1 -1
package/dist/cjs/engine.js +269 -57
package/dist/cjs/engine.js.map +1 -1
package/dist/cjs/generator.d.ts.map +1 -1
package/dist/cjs/generator.js +28 -6
package/dist/cjs/generator.js.map +1 -1
package/dist/cjs/index.d.ts +3 -1
package/dist/cjs/index.d.ts.map +1 -1
package/dist/cjs/index.js +5 -1
package/dist/cjs/index.js.map +1 -1
package/dist/cjs/learning.d.ts +16 -1
package/dist/cjs/learning.d.ts.map +1 -1
package/dist/cjs/learning.js +95 -14
package/dist/cjs/learning.js.map +1 -1
package/dist/cjs/matcher.d.ts +51 -2
package/dist/cjs/matcher.d.ts.map +1 -1
package/dist/cjs/matcher.js +173 -33
package/dist/cjs/matcher.js.map +1 -1
package/dist/cjs/parser.js +27 -9
package/dist/cjs/parser.js.map +1 -1
package/dist/cjs/resolver.d.ts +2 -2
package/dist/cjs/resolver.d.ts.map +1 -1
package/dist/cjs/resolver.js +66 -26
package/dist/cjs/resolver.js.map +1 -1
package/dist/cjs/schema.d.ts +821 -68
package/dist/cjs/schema.d.ts.map +1 -1
package/dist/cjs/schema.js +62 -13
package/dist/cjs/schema.js.map +1 -1
package/dist/cjs/types.d.ts +156 -9
package/dist/cjs/types.d.ts.map +1 -1
package/dist/cjs/version.d.ts +1 -1
package/dist/cjs/version.js +1 -1
package/dist/esm/cache.d.ts +9 -0
package/dist/esm/cache.js +37 -7
package/dist/esm/concurrent.d.ts +52 -0
package/dist/esm/concurrent.js +66 -0
package/dist/esm/engine.d.ts +92 -7
package/dist/esm/engine.js +270 -58
package/dist/esm/generator.js +28 -6
package/dist/esm/index.d.ts +3 -1
package/dist/esm/index.js +2 -0
package/dist/esm/learning.d.ts +16 -1
package/dist/esm/learning.js +95 -14
package/dist/esm/matcher.d.ts +51 -2
package/dist/esm/matcher.js +170 -33
package/dist/esm/parser.js +27 -9
package/dist/esm/resolver.d.ts +2 -2
package/dist/esm/resolver.js +66 -26
package/dist/esm/schema.d.ts +821 -68
package/dist/esm/schema.js +62 -13
package/dist/esm/types.d.ts +156 -9
package/dist/esm/version.d.ts +1 -1
package/dist/esm/version.js +1 -1
package/package.json +1 -1

package/dist/esm/engine.d.ts CHANGED Viewed

@@ -3,17 +3,34 @@ import type { LLMMatcherOptions } from './matcher';
 import type { ResolveOptions, AuthContext } from './resolver';
 import type { CacheStore } from './cache';
 import type { LearningStore } from './learning';
+import type { EmbeddingProvider } from './types';
 import type { MatchMode } from './types';
 /**
  * Options for constructing a CapmanEngine instance.
  *
- * ⚠️  CONCURRENCY: CapmanEngine is not safe for sharing across concurrent
- * async request handlers. The LLM rate limiter, circuit breaker, and
- * learning index cache are all instance-level mutable state. In an
- * Express/Fastify/etc. server, either:
- *   (a) Create one engine per request — safest, no shared state
- *   (b) Use a single instance only with cheap mode (no LLM calls)
- *   (c) Add an external mutex around LLM calls if sharing is required
+ * ⚠️  CONCURRENCY: CapmanEngine is NOT safe for sharing a single instance
+ * across concurrent async request handlers in a server environment.
+ *
+ * Node.js is single-threaded — classical data races do not apply. What does
+ * apply is async interleaving: two ask() chains can interleave at await
+ * suspension points. The following hazards are real:
+ *
+ *   - Calling loadManifest() while ask() calls are in-flight: mitigated by
+ *     an optimistic manifestVersion guard — in-flight results skip the cache
+ *     write rather than polluting it with stale data.
+ *   - Sharing one instance across concurrent balanced/accurate LLM calls:
+ *     rate limiter and circuit-breaker state can interleave.
+ *
+ * The following are NOT hazards (synchronous within the event loop):
+ *   - MemoryCache Map mutations
+ *   - LLM counter increments (llmCallsThisMinute++ is atomic in Node.js)
+ *   - statsCounter updates
+ *
+ * Safe patterns:
+ *   (a) One engine per request — safest, zero shared state
+ *   (b) Single shared instance in cheap mode only (no LLM calls)
+ *   (c) ConcurrentCapmanEngine wrapper (v0.8.0) — serialises ask() via
+ *       a zero-dependency promise queue
  *
  * @example
  * // Safe — per-request engine
@@ -87,6 +104,15 @@ export interface EngineOptions {
      * @default 60000
      */
     llmCircuitBreakerResetMs?: number;
+    /**
+     * Half-life in days for time-decayed learning weights.
+     * A learning entry that is exactly this many days old retains 50% of its
+     * original weight. Older entries fade faster; recent ones dominate.
+     * Only applies when the engine creates its own default MemoryLearningStore.
+     * If you pass a custom learning store, configure halfLifeDays on it directly.
+     * @default 30
+     */
+    learningHalfLifeDays?: number;
     /**
      * Enable fuzzy matching using Fuse.js — catches paraphrases, typos,
      * and morphological variants that exact keyword matching misses.
@@ -114,6 +140,44 @@ export interface EngineOptions {
      * When undefined, calibrated automatically from manifest score distribution.
      */
     adaptiveMarginOverride?: number;
+    /**
+    * Target environment for server selection from manifest.servers[].
+    * When manifest.servers is present and this matches a server's environment,
+    * that server's URL is used as baseUrl.
+    * Falls back to first server, then EngineOptions.baseUrl if no match.
+    */
+    environment?: string;
+    /**
+    * Half-life for time-decayed learning in days.
+    * A learning signal that is halfLifeDays old contributes half its original weight.
+    * Only applies when using the engine's default MemoryLearningStore.
+    * For FileLearningStore, pass halfLifeDays directly to its constructor.
+    * @default 30
+    */
+    halfLifeDays?: number;
+    /**
+     * Optional embedding provider for semantic similarity matching.
+     * When provided, capability texts are pre-encoded at construction time
+     * and query embeddings are computed on each ask() call. The embedding
+     * signal is fused with BM25 and fuzzy signals via RRF.
+     *
+     * Zero mandatory dependencies — bring your own provider:
+     *
+     * @example
+     * const engine = new CapmanEngine({
+     *   manifest,
+     *   embedding: {
+     *     async encode(texts: string[]) {
+     *       // call your embedding API here
+     *       return texts.map(t => myEmbedModel.embed(t))
+     *     }
+     *   }
+     * })
+     *
+     * Note: embedding is purely additive — if encode() throws, the engine
+     * falls back to BM25 + fuzzy scoring without interrupting operation.
+     */
+    embedding?: EmbeddingProvider;
 }
 export interface EngineResult {
     match: MatchResult;
@@ -135,6 +199,7 @@ export declare class CapmanEngine {
     /** Maximum allowed query length in characters. Queries exceeding this throw RangeError. */
     static readonly MAX_QUERY_LENGTH = 1000;
     private manifest;
+    private manifestVersion;
     private mode;
     private llm?;
     private cache;
@@ -152,6 +217,11 @@ export declare class CapmanEngine {
     private bm25B;
     private marginAwareLLM;
     private adaptiveMargin;
+    private environment?;
+    private embedding?;
+    private capEmbeddings?;
+    /** Resolves when the post-loadManifest re-encode completes. Awaited by buildEmbeddingScores(). */
+    private pendingEmbedding;
     private maxLLMCallsPerMinute;
     private llmCooldownMs;
     private llmCircuitBreakerThreshold;
@@ -191,6 +261,12 @@ export declare class CapmanEngine {
      */
     clearCache(): Promise<void>;
     private checkManifestVersion;
+    private checkCapabilityLifecycle;
+    /** Cosine similarity between two equal-length vectors */
+    private cosineSim;
+    /** Encode query and return cosine similarity scores (0–100) keyed by capability ID */
+    private buildEmbeddingScores;
+    private checkMatchHint;
     /**
      * Replaces the active manifest without creating a new engine instance.
      * Useful for hot-reloading manifests in long-running servers without
@@ -252,6 +328,11 @@ export declare class CapmanEngine {
      * score boost — capped at +15 to avoid overriding strong keyword matches.
      */
     private applyLearningBoost;
+    /**
+     * Resolves the effective baseUrl from manifest.servers[] or EngineOptions.baseUrl.
+     * Priority: environment-matched server > first server > explicit baseUrl > undefined
+     */
+    private resolveBaseUrl;
     private resolveOptions;
     private recordLearning;
     private calibrateBM25Ceiling;
@@ -266,6 +347,10 @@ export declare class CapmanEngine {
      * For manifests with ≤100 capabilities this is negligible (<10ms).
      * For very large manifests (500+ capabilities), consider passing
      * `adaptiveMarginOverride` to skip calibration.
+     *
+     * Note: constructor total cost also includes BM25 index build O(capabilities × tokens)
+     * and embedding pre-encoding O(capabilities) if an EmbeddingProvider is configured.
+     * For 100 capabilities with embeddings, expect ~100–500ms depending on provider latency.
      */
     private calibrateAdaptiveMargin;
     private computeVerdict;

package/dist/esm/engine.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { match as _match, matchWithLLM as _matchWithLLM, resolverToIntent, extractParams, LLMParseError, tokenize, buildBM25Index, scoreCapability as _scoreCapability, sanitizeForPrompt } from './matcher';
+import { match as _match, matchWithLLM as _matchWithLLM, resolverToIntent, extractParams, LLMParseError, tokenize, buildBM25Index, sanitizeForPrompt, calibrateCeiling as _calibrateCeiling } from './matcher';
 import { resolve as _resolve, checkPrivacy } from './resolver';
 import { MemoryLearningStore } from './learning';
 import { logger } from './logger';
@@ -7,6 +7,9 @@ import { VERSION } from './version';
 // ─── CapmanEngine ─────────────────────────────────────────────────────────────
 export class CapmanEngine {
     constructor(options) {
+        this.manifestVersion = 0;
+        /** Resolves when the post-loadManifest re-encode completes. Awaited by buildEmbeddingScores(). */
+        this.pendingEmbedding = null;
         // ── LLM rate limiting state ────────────────────────────────────────────────
         this.llmCallsThisMinute = 0;
         this.llmWindowStart = Date.now();
@@ -17,6 +20,7 @@ export class CapmanEngine {
         this.mode = options.mode ?? 'balanced';
         this.llm = options.llm;
         this.baseUrl = options.baseUrl;
+        this.environment = options.environment;
         this.auth = options.auth;
         this.headers = options.headers;
         this.threshold = options.threshold ?? 50;
@@ -42,8 +46,20 @@ export class CapmanEngine {
         // Use FileLearningStore explicitly for persistence across restarts
         this.learning = options.learning === false
             ? null
-            : (options.learning ?? new MemoryLearningStore());
-        logger.info(`CapmanEngine initialized — mode: ${this.mode}, cache: ${this.cache ? 'enabled' : 'disabled'}, learning: ${this.learning ? 'enabled' : 'disabled'}`);
+            : (options.learning ?? new MemoryLearningStore(options.learningHalfLifeDays ?? 30));
+        this.embedding = options.embedding;
+        if (this.embedding) {
+            // Pre-encode all capability texts at construction time — one batch call.
+            // Concatenate name + description for richer semantic surface.
+            const texts = this.manifest.capabilities.map(c => `${c.name}: ${c.description}`);
+            this.embedding.encode(texts).then(vecs => {
+                this.capEmbeddings = vecs;
+                logger.info('Capability embeddings pre-encoded');
+            }).catch(err => {
+                logger.warn(`EmbeddingProvider pre-encode failed — embedding signal disabled: ${err instanceof Error ? err.message : String(err)}`);
+            });
+        }
+        logger.info(`CapmanEngine initialized — mode: ${this.mode}, cache: ${this.cache ? 'enabled' : 'disabled'}, learning: ${this.learning ? 'enabled' : 'disabled'}, embedding: ${this.embedding ? 'enabled' : 'disabled'}`);
         // ── Manifest version compatibility check ─────────────────────────────────
         this.checkManifestVersion(options.manifest);
     }
@@ -67,6 +83,9 @@ export class CapmanEngine {
         }
         const start = Date.now();
         const steps = [];
+        // Capture manifest version at entry — used to guard the cache write.
+        // If loadManifest() is called mid-flight, we skip writing stale results.
+        const manifestVersion = this.manifestVersion;
         // ── Step 1: Check cache ──────────────────────────────────────────────────
         const cacheStart = Date.now();
         if (this.cache) {
@@ -124,6 +143,7 @@ export class CapmanEngine {
         // ── Step 2.5: Apply learning boost ───────────────────────────────────────
         matchResult = await this.applyBoostToMatchResult(query, matchResult, resolvedVia);
         // ── Step 3: Privacy check ────────────────────────────────────────────────
+        let privacyFailed = false;
         if (matchResult.capability) {
             const privacyError = checkPrivacy(matchResult.capability, this.auth);
             steps.push({
@@ -132,13 +152,23 @@ export class CapmanEngine {
                 durationMs: 0,
                 detail: privacyError ?? `level: ${matchResult.capability.privacy.level}`,
             });
+            // Warn on deprecated or sunset capabilities — never silently fail
+            this.checkCapabilityLifecycle(matchResult.capability);
+            // Log when engine mode differs from capability's preferred mode
+            this.checkMatchHint(matchResult.capability);
+            // Short-circuit: if privacy fails, skip disambiguation to avoid burning an LLM
+            // call on a request that _resolve() will block anyway. privacyFailed propagates
+            // to Step 4a so the mode guard check is clean and explicit.
+            if (privacyError)
+                privacyFailed = true;
         }
         // ── Step 4a: Compute verdict + optional margin-aware LLM disambiguation ──
         let { verdict, margin } = this.computeVerdict(matchResult);
         if (verdict === 'marginal' &&
             this.marginAwareLLM &&
             this.llm &&
-            this.mode === 'balanced') {
+            !privacyFailed &&
+            (this.mode === 'balanced' || this.mode === 'accurate')) {
             matchResult = await this.disambiguateLLM(query, matchResult, steps);
             // Recompute verdict after disambiguation
             const recomputed = this.computeVerdict(matchResult);
@@ -161,11 +191,19 @@ export class CapmanEngine {
         //    queries that resolve to the same capability share a cache entry
         if (this.cache && resolution.success && matchResult.capability
             && matchResult.capability.privacy.level === 'public') {
-            const queryKey = normalizeQuery(query);
-            const capKey = buildCacheKey(query, matchResult.capability.id, matchResult.extractedParams);
-            await this.cache.set(queryKey, matchResult);
-            await this.cache.set(capKey, matchResult);
-            // capKey always starts with 'cap:' — structurally distinct from queryKey
+            // Optimistic concurrency guard — skip cache write if manifest was swapped
+            // mid-flight. The result was computed against a now-stale manifest and
+            // must not pollute the cache for the new one.
+            if (this.manifestVersion === manifestVersion) {
+                const queryKey = normalizeQuery(query);
+                const capKey = buildCacheKey(query, matchResult.capability.id, matchResult.extractedParams);
+                await this.cache.set(queryKey, matchResult);
+                await this.cache.set(capKey, matchResult);
+                // capKey always starts with 'cap:' — structurally distinct from queryKey
+            }
+            else {
+                logger.warn('loadManifest() called mid-flight — skipping cache write for stale result');
+            }
         }
         // ── Step 5b: Compute missingParams ───────────────────────────────────────
         // Spec: LLM attempts extraction first when available. missingParams is last resort.
@@ -205,8 +243,19 @@ export class CapmanEngine {
                             }
                         }
                     }
-                    catch {
-                        // LLM param extraction failed — fall through to missingParams below
+                    catch (err) {
+                        const isParseError = err instanceof SyntaxError;
+                        if (isParseError) {
+                            // JSON parse failure: refund the rate-limit slot but don't open circuit breaker
+                            // The llm is reachable - the response format was just bad
+                            this.llmCallsThisMinute = Math.max(0, this.llmCallsThisMinute - 1);
+                        }
+                        else {
+                            // Hard failure (timeout, network): refund slot and increment fail counter
+                            this.recordLLMFailure();
+                        }
+                        logger.warn(`LLM param extraction failed: ${err instanceof Error ? err.message : String(err)}`);
+                        // fall through to missingParams below
                     }
                 }
             }
@@ -292,6 +341,20 @@ export class CapmanEngine {
             await this.cache.clear();
     }
     checkManifestVersion(manifest) {
+        // ── Schema version check ─────────────────────────────────────────────────
+        // schemaVersion tracks manifest format — "1" for v0.6+.
+        // Manifests without schemaVersion are pre-v0.6 — warn but allow.
+        const CURRENT_SCHEMA_VERSION = '1';
+        if (!manifest.schemaVersion) {
+            console.warn(`[capman] Manifest is missing schemaVersion — it was generated with capman < 0.6. ` +
+                `Regenerate with: npx capman generate`);
+        }
+        else if (manifest.schemaVersion !== CURRENT_SCHEMA_VERSION) {
+            console.warn(`[capman] Manifest schemaVersion "${manifest.schemaVersion}" differs from ` +
+                `engine's expected "${CURRENT_SCHEMA_VERSION}". ` +
+                `Regenerate with: npx capman generate`);
+        }
+        // ── Package version check ────────────────────────────────────────────────
         if (!manifest.version)
             return;
         const SEMVER_RE = /^\d+\.\d+\.\d+$/;
@@ -299,8 +362,8 @@ export class CapmanEngine {
             const [mMaj, mMin] = manifest.version.split('.').map(Number);
             const [eMaj, eMin] = VERSION.split('.').map(Number);
             if (mMaj !== eMaj || mMin !== eMin) {
-                console.warn(`[capman] Manifest version "${manifest.version}" was generated with a ` +
-                    `different engine version than "${VERSION}". This is usually fine across patch versions. ` +
+                console.warn(`[capman] Manifest was generated with capman "${manifest.version}" ` +
+                    `but engine is "${VERSION}". This is usually fine across patch versions. ` +
                     `If you experience unexpected matching issues, regenerate with: npx capman generate`);
             }
         }
@@ -309,6 +372,80 @@ export class CapmanEngine {
                 `to engine version "${VERSION}" — version strings are not valid semver.`);
         }
     }
+    checkCapabilityLifecycle(capability) {
+        const lc = capability.lifecycle;
+        if (!lc || lc.status === 'stable' || lc.status === 'beta' || lc.status === 'experimental') {
+            if (lc?.status === 'beta') {
+                logger.warn(`Capability "${capability.id}" is in beta — behavior may change`);
+            }
+            if (lc?.status === 'experimental') {
+                logger.warn(`Capability "${capability.id}" is experimental — use with caution`);
+            }
+            return;
+        }
+        if (lc.status === 'deprecated') {
+            const sunsetPassed = lc.sunsetAt && new Date(lc.sunsetAt) < new Date();
+            if (sunsetPassed) {
+                // Sunset date has passed — strongest warning
+                console.warn(`[capman] ⚠️  Capability "${capability.id}" passed its sunset date (${lc.sunsetAt}). ` +
+                    `It may be removed in a future version.` +
+                    (lc.successor ? ` Use "${lc.successor}" instead.` : '') +
+                    (lc.note ? ` Note: ${lc.note}` : ''));
+            }
+            else {
+                logger.warn(`Capability "${capability.id}" is deprecated.` +
+                    (lc.sunsetAt ? ` Sunset: ${lc.sunsetAt}.` : '') +
+                    (lc.successor ? ` Use "${lc.successor}" instead.` : '') +
+                    (lc.note ? ` Note: ${lc.note}` : ''));
+            }
+        }
+    }
+    /** Cosine similarity between two equal-length vectors */
+    cosineSim(a, b) {
+        if (a.length !== b.length || a.length === 0) {
+            logger.warn(`cosineSim: dimension mismatch (${a.length} vs ${b.length}) — returning 0`);
+            return 0;
+        }
+        let dot = 0, normA = 0, normB = 0;
+        for (let i = 0; i < a.length; i++) {
+            dot += a[i] * b[i];
+            normA += a[i] * a[i];
+            normB += b[i] * b[i];
+        }
+        const denom = Math.sqrt(normA) * Math.sqrt(normB);
+        return denom === 0 ? 0 : dot / denom;
+    }
+    /** Encode query and return cosine similarity scores (0–100) keyed by capability ID */
+    async buildEmbeddingScores(query) {
+        if (!this.embedding || !this.capEmbeddings)
+            return undefined;
+        // Wait for any in-flight re-encode from loadManifest() to finish.
+        // Without this, the first ask() after loadManifest returns uses stale embeddings.
+        if (this.pendingEmbedding)
+            await this.pendingEmbedding;
+        try {
+            const [queryVec] = await this.embedding.encode([query]);
+            const scores = new Map();
+            this.manifest.capabilities.forEach((cap, i) => {
+                const sim = this.cosineSim(queryVec, this.capEmbeddings[i]);
+                // Cosine sim is -1..1; map to 0–100, negatives floored to 0
+                scores.set(cap.id, Math.max(0, Math.round(sim * 100)));
+            });
+            return scores;
+        }
+        catch (err) {
+            logger.warn(`Embedding encode failed — skipping embedding signal: ${err instanceof Error ? err.message : String(err)}`);
+            return undefined;
+        }
+    }
+    checkMatchHint(capability) {
+        const hint = capability.matchHint?.preferredMode;
+        if (!hint || hint === this.mode)
+            return;
+        // Advisory only — log but never enforce
+        logger.warn(`Capability "${capability.id}" prefers mode "${hint}" but engine is in "${this.mode}" mode. ` +
+            `Set mode: '${hint}' in EngineOptions to honor this hint.`);
+    }
     /**
      * Replaces the active manifest without creating a new engine instance.
      * Useful for hot-reloading manifests in long-running servers without
@@ -323,11 +460,31 @@ export class CapmanEngine {
      */
     async loadManifest(manifest) {
         this.checkManifestVersion(manifest);
+        // Assign all derived state atomically before any await — an in-flight ask()
+        // must never see a new manifest paired with a stale bm25Index or ceiling.
         this.manifest = manifest;
         this.bm25Index = buildBM25Index(manifest.capabilities);
         this.bm25Ceiling = this.calibrateBM25Ceiling();
         this.adaptiveMargin = this.calibrateAdaptiveMargin();
+        this.manifestVersion++;
+        // server selection updates automatically after loadManifest()
         await this.clearCache();
+        // Re-encode capabilities after manifest swap — stale embeddings misalign with new capabilities
+        if (this.embedding) {
+            const texts = manifest.capabilities.map(c => `${c.name}: ${c.description}`);
+            this.pendingEmbedding = this.embedding.encode(texts).then(vecs => {
+                this.capEmbeddings = vecs;
+                this.pendingEmbedding = null;
+                logger.info('Capability embeddings re-encoded after manifest reload');
+            }).catch(err => {
+                this.capEmbeddings = undefined;
+                this.pendingEmbedding = null;
+                logger.warn(`EmbeddingProvider re-encode failed after loadManifest: ${err instanceof Error ? err.message : String(err)}`);
+            });
+        }
+        else {
+            this.pendingEmbedding = null;
+        }
     }
     /**
      * Explain what would happen for a query — without executing it.
@@ -569,13 +726,15 @@ export class CapmanEngine {
         let matchResult;
         let resolvedVia = 'keyword';
         // Fuzzy options — never applied in cheap mode
+        const embeddingScores = await this.buildEmbeddingScores(query);
         const fuzzyOpts = {
             fuzzyMatch: this.fuzzyMatch,
             fuzzyThreshold: this.fuzzyThreshold,
             bm25Index: this.bm25Index,
-            bm25Ceiling: this.bm25Ceiling,
             bm25K1: this.bm25K1,
             bm25B: this.bm25B,
+            bm25Ceiling: this.bm25Ceiling,
+            embeddingScores,
         };
         switch (this.mode) {
             case 'cheap': {
@@ -598,20 +757,33 @@ export class CapmanEngine {
                     else {
                         const t = Date.now();
                         try {
-                            matchResult = await _matchWithLLM(query, this.manifest, { llm: this.llm });
-                            this.recordLLMSuccess();
-                            resolvedVia = 'llm';
-                            // Merge keyword scores into LLM candidates so boost has real signal for alternatives
-                            const kwResult = _match(query, this.manifest, fuzzyOpts);
-                            matchResult = {
-                                ...matchResult,
-                                candidates: matchResult.candidates.map(c => ({
-                                    ...c,
-                                    score: c.matched
-                                        ? c.score // keep LLM confidence for winner
-                                        : (kwResult.candidates.find(kc => kc.capabilityId === c.capabilityId)?.score ?? 0),
-                                })),
-                            };
+                            const kwResultAccurate = _match(query, this.manifest, fuzzyOpts);
+                            const top3Accurate = kwResultAccurate.candidates
+                                .sort((a, b) => b.score - a.score)
+                                .filter(c => c.score > 0)
+                                .slice(0, 3)
+                                .map(c => this.manifest.capabilities.find(cap => cap.id === c.capabilityId))
+                                .filter(Boolean);
+                            // Skip LLM if no candidates scored above zero — no meaningful top-3 to discriminate
+                            if (top3Accurate.length === 0) {
+                                matchResult = kwResultAccurate;
+                            }
+                            else {
+                                const llmResult = await _matchWithLLM(query, top3Accurate, { llm: this.llm, app: this.manifest.app });
+                                this.recordLLMSuccess();
+                                resolvedVia = 'llm';
+                                // If LLM says OOS but keyword had a match, the correct capability may have
+                                // been rank 4+. Fall back to keyword result rather than returning OOS.
+                                matchResult = llmResult.capability === null ? kwResultAccurate : {
+                                    ...llmResult,
+                                    candidates: llmResult.candidates.map(c => ({
+                                        ...c,
+                                        score: c.matched
+                                            ? c.score
+                                            : (kwResultAccurate.candidates.find(kc => kc.capabilityId === c.capabilityId)?.score ?? 0),
+                                    })),
+                                };
+                            }
                             steps?.push({ type: 'llm_match', status: 'pass', durationMs: Date.now() - t, detail: `confidence: ${matchResult.confidence}%` });
                         }
                         catch (err) {
@@ -656,19 +828,32 @@ export class CapmanEngine {
                         logger.debug(`Query escalated to LLM: "${query}"`);
                         const t2 = Date.now();
                         try {
-                            matchResult = await _matchWithLLM(query, this.manifest, { llm: this.llm });
-                            this.recordLLMSuccess();
-                            resolvedVia = 'llm';
-                            // keywordResult already computed above in balanced mode — merge scores
-                            matchResult = {
-                                ...matchResult,
-                                candidates: matchResult.candidates.map(c => ({
-                                    ...c,
-                                    score: c.matched
-                                        ? c.score
-                                        : (keywordResult.candidates.find(kc => kc.capabilityId === c.capabilityId)?.score ?? 0),
-                                })),
-                            };
+                            const top3Balanced = keywordResult.candidates
+                                .sort((a, b) => b.score - a.score)
+                                .filter(c => c.score > 0)
+                                .slice(0, 3)
+                                .map(c => this.manifest.capabilities.find(cap => cap.id === c.capabilityId))
+                                .filter(Boolean);
+                            // Balanced mode only escalates when keyword confidence is low but > 0 —
+                            // top3 should always be non-empty here, but guard anyway
+                            if (top3Balanced.length === 0) {
+                                matchResult = keywordResult;
+                            }
+                            else {
+                                const llmResult = await _matchWithLLM(query, top3Balanced, { llm: this.llm, app: this.manifest.app });
+                                this.recordLLMSuccess();
+                                resolvedVia = 'llm';
+                                // If LLM returns OOS but keyword had a scored candidate, fall back to keyword
+                                matchResult = llmResult.capability === null ? keywordResult : {
+                                    ...llmResult,
+                                    candidates: llmResult.candidates.map(c => ({
+                                        ...c,
+                                        score: c.matched
+                                            ? c.score
+                                            : (keywordResult.candidates.find(kc => kc.capabilityId === c.capabilityId)?.score ?? 0),
+                                    })),
+                                };
+                            }
                             steps?.push({ type: 'llm_match', status: 'pass', durationMs: Date.now() - t2, detail: `confidence: ${matchResult.confidence}%` });
                         }
                         catch (err) {
@@ -684,7 +869,11 @@ export class CapmanEngine {
                 break;
             }
         }
-        return { matchResult: matchResult, resolvedVia };
+        if (matchResult === undefined) {
+            const exhaustive = this.mode;
+            throw new Error(`_runMatch: unhandled MatchMode "${exhaustive}"`);
+        }
+        return { matchResult, resolvedVia };
     }
     /**
      * Applies learning boost to a MatchResult and returns the updated result.
@@ -755,7 +944,15 @@ export class CapmanEngine {
                 const hits = wordIndex[candidate.capabilityId] ?? 0;
                 if (hits > 0) {
                     // Logarithmic boost — diminishing returns after first few hits
-                    boost += Math.min(5, Math.log2(hits + 1) * 2);
+                    const rawBoost = Math.min(5, Math.log2(hits + 1) * 2);
+                    // IDF weighting — common words ("get", "show", "user") appear in many
+                    // capabilities and accumulate learning hits that carry little signal.
+                    // Reuses BM25 df/N so no separate computation is needed.
+                    const df = this.bm25Index.df[word] ?? 0;
+                    const idf = df > 0
+                        ? Math.log((this.bm25Index.N - df + 0.5) / (df + 0.5) + 1)
+                        : 0;
+                    boost += rawBoost * Math.min(1, idf);
                 }
             }
             const cappedBoost = Math.min(15, Math.round(boost));
@@ -769,10 +966,26 @@ export class CapmanEngine {
             };
         });
     }
+    /**
+     * Resolves the effective baseUrl from manifest.servers[] or EngineOptions.baseUrl.
+     * Priority: environment-matched server > first server > explicit baseUrl > undefined
+     */
+    resolveBaseUrl() {
+        const servers = this.manifest.servers;
+        if (!servers?.length)
+            return this.baseUrl;
+        if (this.environment) {
+            const match = servers.find(s => s.environment === this.environment);
+            if (match)
+                return match.url.replace(/\/$/, '');
+        }
+        // Fallback to first server
+        return servers[0].url.replace(/\/$/, '');
+    }
     // ── Private helpers ────────────────────────────────────────────────────────
     resolveOptions(overrides = {}) {
         return {
-            baseUrl: this.baseUrl,
+            baseUrl: this.resolveBaseUrl(),
             auth: this.auth,
             headers: this.headers,
             ...overrides,
@@ -792,16 +1005,7 @@ export class CapmanEngine {
         });
     }
     calibrateBM25Ceiling() {
-        let max = 0;
-        for (const cap of this.manifest.capabilities) {
-            if (!cap.examples?.length)
-                continue;
-            const selfWords = new Set(tokenize(cap.examples[0]));
-            const raw = _scoreCapability(selfWords, cap, this.bm25Index, this.bm25K1, this.bm25B);
-            if (raw > max)
-                max = raw;
-        }
-        return max > 0 ? max : 100;
+        return _calibrateCeiling(this.manifest.capabilities, this.bm25Index, this.bm25K1, this.bm25B);
     }
     /**
      * Calibrates the adaptive margin threshold from the manifest's own score
@@ -814,6 +1018,10 @@ export class CapmanEngine {
      * For manifests with ≤100 capabilities this is negligible (<10ms).
      * For very large manifests (500+ capabilities), consider passing
      * `adaptiveMarginOverride` to skip calibration.
+     *
+     * Note: constructor total cost also includes BM25 index build O(capabilities × tokens)
+     * and embedding pre-encoding O(capabilities) if an EmbeddingProvider is configured.
+     * For 100 capabilities with embeddings, expect ~100–500ms depending on provider latency.
      */
     calibrateAdaptiveMargin() {
         if (this.manifest.capabilities.length < 2)
@@ -829,10 +1037,14 @@ export class CapmanEngine {
         for (const cap of this.manifest.capabilities) {
             if (!cap.examples?.length)
                 continue;
-            const result = _match(cap.examples[0], this.manifest, fuzzyOpts);
-            const sorted = [...result.candidates].sort((a, b) => b.score - a.score);
-            if (sorted.length >= 2) {
-                margins.push(sorted[0].score - sorted[1].score);
+            // Use all examples and take the maximum margin — same rationale as
+            // calibrateBM25Ceiling(): a weak first example skews the calibration.
+            for (const example of cap.examples) {
+                const result = _match(example, this.manifest, fuzzyOpts);
+                const sorted = [...result.candidates].sort((a, b) => b.score - a.score);
+                if (sorted.length >= 2) {
+                    margins.push(sorted[0].score - sorted[1].score);
+                }
             }
         }
         if (margins.length === 0)