capman 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/CODEBASE.md +6 -5
  2. package/dist/cjs/concurrent.d.ts +53 -0
  3. package/dist/cjs/concurrent.d.ts.map +1 -0
  4. package/dist/cjs/concurrent.js +71 -0
  5. package/dist/cjs/concurrent.js.map +1 -0
  6. package/dist/cjs/engine.d.ts +82 -12
  7. package/dist/cjs/engine.d.ts.map +1 -1
  8. package/dist/cjs/engine.js +159 -37
  9. package/dist/cjs/engine.js.map +1 -1
  10. package/dist/cjs/index.d.ts +2 -1
  11. package/dist/cjs/index.d.ts.map +1 -1
  12. package/dist/cjs/index.js +3 -1
  13. package/dist/cjs/index.js.map +1 -1
  14. package/dist/cjs/learning.d.ts +14 -6
  15. package/dist/cjs/learning.d.ts.map +1 -1
  16. package/dist/cjs/learning.js +64 -10
  17. package/dist/cjs/learning.js.map +1 -1
  18. package/dist/cjs/matcher.d.ts +13 -1
  19. package/dist/cjs/matcher.d.ts.map +1 -1
  20. package/dist/cjs/matcher.js +67 -10
  21. package/dist/cjs/matcher.js.map +1 -1
  22. package/dist/cjs/schema.js +1 -1
  23. package/dist/cjs/schema.js.map +1 -1
  24. package/dist/cjs/types.d.ts +9 -0
  25. package/dist/cjs/types.d.ts.map +1 -1
  26. package/dist/cjs/version.d.ts +1 -1
  27. package/dist/cjs/version.js +1 -1
  28. package/dist/esm/concurrent.d.ts +52 -0
  29. package/dist/esm/concurrent.js +66 -0
  30. package/dist/esm/engine.d.ts +82 -12
  31. package/dist/esm/engine.js +159 -37
  32. package/dist/esm/index.d.ts +2 -1
  33. package/dist/esm/index.js +1 -0
  34. package/dist/esm/learning.d.ts +14 -6
  35. package/dist/esm/learning.js +64 -10
  36. package/dist/esm/matcher.d.ts +13 -1
  37. package/dist/esm/matcher.js +66 -10
  38. package/dist/esm/schema.js +1 -1
  39. package/dist/esm/types.d.ts +9 -0
  40. package/dist/esm/version.d.ts +1 -1
  41. package/dist/esm/version.js +1 -1
  42. package/package.json +1 -1
package/CODEBASE.md CHANGED
@@ -169,12 +169,11 @@ Usage analytics and keyword index — incremental, PII-safe.
169
169
 
170
170
  Key exports:
171
171
  - `LearningStore` interface — `record(entry)`, `getStats()`, `getTopCapabilities(limit)`, `getIndex()`, `destroy()`
172
- - `FileLearningStore` — persists to `.capman/learning.json`, caps at 10,000 entries. Saves debounced (5s) with synchronous flush on process exit via `flushSync()`
173
- - `MemoryLearningStore` — in-memory only, used in tests
174
- - `LearningIndex` — internal class shared by both stores. Maintains keyword index and stats counters incrementally. Eliminates ~80 lines of duplication
172
+ - `FileLearningStore(filePath, halfLifeDays)` — persists to `.capman/learning.json`, caps at 10,000 entries. Saves debounced (5s) with synchronous flush on process exit. Migration guard: pre-v0.7 entries without `lastUpdated` use file mtime as fallback
173
+ - `MemoryLearningStore(halfLifeDays)` — in-memory only, used in tests
174
+ - `LearningIndex(halfLifeDays)` — internal class shared by both stores. Maintains keyword index and stats counters incrementally. Time decay applied lazily on `getStats()` read — not on write. `halfLifeDays` must be positive; throws `RangeError` otherwise Eliminates ~80 lines of duplication
175
175
 
176
- `LearningEntry`:
177
- - `query` — stored as tokenized keywords only, never raw text. PII (emails, names, IDs) stripped before persistence
176
+ - `LearningEntry` — query stored as tokenized keywords only (PII-safe), plus capabilityId, confidence, intent, resolvedVia, timestamp, `lastUpdated` (ms since epoch — used for time decay)
178
177
  - `capabilityId`, `confidence`, `intent`, `extractedParams`
179
178
  - `resolvedVia: 'keyword' | 'llm' | 'cache'`
180
179
  - `timestamp`
@@ -222,6 +221,8 @@ Key exports:
222
221
  - `cacheTtlMs` — optional TTL for cache entries in ms (default: no expiry)
223
222
  - `maxLLMCallsPerMinute` — rate limit (default: 60). Set to 0 to disable LLM entirely
224
223
  - `llmCooldownMs`, `llmCircuitBreakerThreshold`, `llmCircuitBreakerResetMs`
224
+ - `learningHalfLifeDays` — half-life in days for time-decayed learning (default: 30). Only applies when engine creates its own default store
225
+ - `embedding` — optional `EmbeddingProvider` for semantic similarity. Pre-encodes capabilities at construction and after `loadManifest()`. Fused into RRF as third signal. Failures fall back gracefully to BM25+fuzzy
225
226
 
226
227
  Matching pipeline in `ask()`:
227
228
  1. Cache check — return immediately on hit (public capabilities only). Re-extracts params fresh from current query
@@ -0,0 +1,53 @@
1
+ /**
2
+ * ConcurrentCapmanEngine — a thin wrapper around CapmanEngine that serialises
3
+ * ask() and explain() calls via an internal promise queue.
4
+ *
5
+ * Use this when sharing a single CapmanEngine instance across concurrent async
6
+ * request handlers (e.g. a long-lived Express server with balanced/accurate mode).
7
+ *
8
+ * Why a promise queue instead of async-mutex:
9
+ * - Zero external dependencies — no new package.json entries for consumers
10
+ * - Identical serialisation guarantee to a FIFO mutex
11
+ * - Simpler audit surface
12
+ *
13
+ * Why opt-in, not default:
14
+ * - Per-request engine patterns pay zero overhead (recommended for most servers)
15
+ * - Cheap mode shared engines pay zero overhead
16
+ * - Consumer retains full control over their concurrency model
17
+ *
18
+ * @example
19
+ * // Safe shared engine across concurrent requests
20
+ * const engine = new ConcurrentCapmanEngine({ manifest, llm, mode: 'balanced' })
21
+ * app.post('/ask', async (req, res) => {
22
+ * const result = await engine.ask(req.body.query)
23
+ * res.json(result)
24
+ * })
25
+ */
26
+ import { type EngineOptions, type EngineResult } from './engine';
27
+ import type { Manifest } from './types';
28
+ import type { ResolveOptions } from './resolver';
29
+ import type { ExplainResult } from './types';
30
+ export declare class ConcurrentCapmanEngine {
31
+ private engine;
32
+ /**
33
+ * The tail of the promise chain — each new call appends to this.
34
+ * On rejection, the queue resets to a resolved promise so subsequent
35
+ * calls are not permanently blocked by a single failure.
36
+ */
37
+ private queue;
38
+ constructor(options: EngineOptions);
39
+ ask(query: string, overrides?: Partial<ResolveOptions>): Promise<EngineResult>;
40
+ explain(query: string): Promise<ExplainResult>;
41
+ /** Swap the manifest. Safe to call outside the queue — triggers cache clear internally. */
42
+ loadManifest(manifest: Manifest): Promise<void>;
43
+ /** Returns learning stats or null if learning is disabled. */
44
+ getStats(): Promise<import("./learning").KeywordStats | null>;
45
+ /** Returns top-N most frequently matched capabilities. */
46
+ getTopCapabilities(limit?: number): Promise<{
47
+ id: string;
48
+ hits: number;
49
+ }[]>;
50
+ /** Clear the cache. */
51
+ clearCache(): Promise<void>;
52
+ }
53
+ //# sourceMappingURL=concurrent.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"concurrent.d.ts","sourceRoot":"","sources":["../../src/concurrent.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAEH,OAAO,EAAgB,KAAK,aAAa,EAAE,KAAK,YAAY,EAAE,MAAM,UAAU,CAAA;AAC9E,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAA;AACvC,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,YAAY,CAAA;AAChD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,SAAS,CAAA;AAE5C,qBAAa,sBAAsB;IACjC,OAAO,CAAC,MAAM,CAAc;IAC5B;;;;OAIG;IACH,OAAO,CAAC,KAAK,CAAsC;gBAEvC,OAAO,EAAE,aAAa;IAIlC,GAAG,CAAC,KAAK,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,OAAO,CAAC,cAAc,CAAC,GAAG,OAAO,CAAC,YAAY,CAAC;IAQ9E,OAAO,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,CAAC;IAQ9C,2FAA2F;IAC3F,YAAY,CAAC,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC;IAI/C,8DAA8D;IAC9D,QAAQ;IAIR,0DAA0D;IAC1D,kBAAkB,CAAC,KAAK,CAAC,EAAE,MAAM;;;;IAIjC,uBAAuB;IACvB,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;CAG5B"}
@@ -0,0 +1,71 @@
1
+ "use strict";
2
+ /**
3
+ * ConcurrentCapmanEngine — a thin wrapper around CapmanEngine that serialises
4
+ * ask() and explain() calls via an internal promise queue.
5
+ *
6
+ * Use this when sharing a single CapmanEngine instance across concurrent async
7
+ * request handlers (e.g. a long-lived Express server with balanced/accurate mode).
8
+ *
9
+ * Why a promise queue instead of async-mutex:
10
+ * - Zero external dependencies — no new package.json entries for consumers
11
+ * - Identical serialisation guarantee to a FIFO mutex
12
+ * - Simpler audit surface
13
+ *
14
+ * Why opt-in, not default:
15
+ * - Per-request engine patterns pay zero overhead (recommended for most servers)
16
+ * - Cheap mode shared engines pay zero overhead
17
+ * - Consumer retains full control over their concurrency model
18
+ *
19
+ * @example
20
+ * // Safe shared engine across concurrent requests
21
+ * const engine = new ConcurrentCapmanEngine({ manifest, llm, mode: 'balanced' })
22
+ * app.post('/ask', async (req, res) => {
23
+ * const result = await engine.ask(req.body.query)
24
+ * res.json(result)
25
+ * })
26
+ */
27
+ Object.defineProperty(exports, "__esModule", { value: true });
28
+ exports.ConcurrentCapmanEngine = void 0;
29
+ const engine_1 = require("./engine");
30
+ class ConcurrentCapmanEngine {
31
+ constructor(options) {
32
+ /**
33
+ * The tail of the promise chain — each new call appends to this.
34
+ * On rejection, the queue resets to a resolved promise so subsequent
35
+ * calls are not permanently blocked by a single failure.
36
+ */
37
+ this.queue = Promise.resolve();
38
+ this.engine = new engine_1.CapmanEngine(options);
39
+ }
40
+ ask(query, overrides) {
41
+ const result = this.queue.then(() => this.engine.ask(query, overrides));
42
+ // Reset queue tail to resolved on failure — one bad call must not
43
+ // block all subsequent callers indefinitely.
44
+ this.queue = result.catch(() => { });
45
+ return result;
46
+ }
47
+ explain(query) {
48
+ const result = this.queue.then(() => this.engine.explain(query));
49
+ this.queue = result.catch(() => { });
50
+ return result;
51
+ }
52
+ // ── Delegated methods — safe to call directly, no serialisation needed ──
53
+ /** Swap the manifest. Safe to call outside the queue — triggers cache clear internally. */
54
+ loadManifest(manifest) {
55
+ return this.engine.loadManifest(manifest);
56
+ }
57
+ /** Returns learning stats or null if learning is disabled. */
58
+ getStats() {
59
+ return this.engine.getStats();
60
+ }
61
+ /** Returns top-N most frequently matched capabilities. */
62
+ getTopCapabilities(limit) {
63
+ return this.engine.getTopCapabilities(limit);
64
+ }
65
+ /** Clear the cache. */
66
+ clearCache() {
67
+ return this.engine.clearCache();
68
+ }
69
+ }
70
+ exports.ConcurrentCapmanEngine = ConcurrentCapmanEngine;
71
+ //# sourceMappingURL=concurrent.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"concurrent.js","sourceRoot":"","sources":["../../src/concurrent.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;;;AAEH,qCAA8E;AAK9E,MAAa,sBAAsB;IASjC,YAAY,OAAsB;QAPlC;;;;WAIG;QACK,UAAK,GAAqB,OAAO,CAAC,OAAO,EAAE,CAAA;QAGjD,IAAI,CAAC,MAAM,GAAG,IAAI,qBAAY,CAAC,OAAO,CAAC,CAAA;IACzC,CAAC;IAED,GAAG,CAAC,KAAa,EAAE,SAAmC;QACpD,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC,CAAA;QACvE,kEAAkE;QAClE,6CAA6C;QAC7C,IAAI,CAAC,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAA;QACnC,OAAO,MAAM,CAAA;IACf,CAAC;IAED,OAAO,CAAC,KAAa;QACnB,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAA;QAChE,IAAI,CAAC,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAA;QACnC,OAAO,MAAM,CAAA;IACf,CAAC;IAED,2EAA2E;IAE3E,2FAA2F;IAC3F,YAAY,CAAC,QAAkB;QAC7B,OAAO,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAA;IAC3C,CAAC;IAED,8DAA8D;IAC9D,QAAQ;QACN,OAAO,IAAI,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAA;IAC/B,CAAC;IAED,0DAA0D;IAC1D,kBAAkB,CAAC,KAAc;QAC/B,OAAO,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC,KAAK,CAAC,CAAA;IAC9C,CAAC;IAED,uBAAuB;IACvB,UAAU;QACR,OAAO,IAAI,CAAC,MAAM,CAAC,UAAU,EAAE,CAAA;IACjC,CAAC;CACF;AAhDD,wDAgDC"}
@@ -3,17 +3,34 @@ import type { LLMMatcherOptions } from './matcher';
3
3
  import type { ResolveOptions, AuthContext } from './resolver';
4
4
  import type { CacheStore } from './cache';
5
5
  import type { LearningStore } from './learning';
6
+ import type { EmbeddingProvider } from './types';
6
7
  import type { MatchMode } from './types';
7
8
  /**
8
9
  * Options for constructing a CapmanEngine instance.
9
10
  *
10
- * ⚠️ CONCURRENCY: CapmanEngine is not safe for sharing across concurrent
11
- * async request handlers. The LLM rate limiter, circuit breaker, and
12
- * learning index cache are all instance-level mutable state. In an
13
- * Express/Fastify/etc. server, either:
14
- * (a) Create one engine per request safest, no shared state
15
- * (b) Use a single instance only with cheap mode (no LLM calls)
16
- * (c) Add an external mutex around LLM calls if sharing is required
11
+ * ⚠️ CONCURRENCY: CapmanEngine is NOT safe for sharing a single instance
12
+ * across concurrent async request handlers in a server environment.
13
+ *
14
+ * Node.js is single-threaded — classical data races do not apply. What does
15
+ * apply is async interleaving: two ask() chains can interleave at await
16
+ * suspension points. The following hazards are real:
17
+ *
18
+ * - Calling loadManifest() while ask() calls are in-flight: mitigated by
19
+ * an optimistic manifestVersion guard — in-flight results skip the cache
20
+ * write rather than polluting it with stale data.
21
+ * - Sharing one instance across concurrent balanced/accurate LLM calls:
22
+ * rate limiter and circuit-breaker state can interleave.
23
+ *
24
+ * The following are NOT hazards (synchronous within the event loop):
25
+ * - MemoryCache Map mutations
26
+ * - LLM counter increments (llmCallsThisMinute++ is atomic in Node.js)
27
+ * - statsCounter updates
28
+ *
29
+ * Safe patterns:
30
+ * (a) One engine per request — safest, zero shared state
31
+ * (b) Single shared instance in cheap mode only (no LLM calls)
32
+ * (c) ConcurrentCapmanEngine wrapper (v0.8.0) — serialises ask() via
33
+ * a zero-dependency promise queue
17
34
  *
18
35
  * @example
19
36
  * // Safe — per-request engine
@@ -87,6 +104,15 @@ export interface EngineOptions {
87
104
  * @default 60000
88
105
  */
89
106
  llmCircuitBreakerResetMs?: number;
107
+ /**
108
+ * Half-life in days for time-decayed learning weights.
109
+ * A learning entry that is exactly this many days old retains 50% of its
110
+ * original weight. Older entries fade faster; recent ones dominate.
111
+ * Only applies when the engine creates its own default MemoryLearningStore.
112
+ * If you pass a custom learning store, configure halfLifeDays on it directly.
113
+ * @default 30
114
+ */
115
+ learningHalfLifeDays?: number;
90
116
  /**
91
117
  * Enable fuzzy matching using Fuse.js — catches paraphrases, typos,
92
118
  * and morphological variants that exact keyword matching misses.
@@ -115,12 +141,43 @@ export interface EngineOptions {
115
141
  */
116
142
  adaptiveMarginOverride?: number;
117
143
  /**
118
- * Target environment for server selection from manifest.servers[].
119
- * When manifest.servers is present and this matches a server's environment,
120
- * that server's URL is used as baseUrl.
121
- * Falls back to first server, then EngineOptions.baseUrl if no match.
122
- */
144
+ * Target environment for server selection from manifest.servers[].
145
+ * When manifest.servers is present and this matches a server's environment,
146
+ * that server's URL is used as baseUrl.
147
+ * Falls back to first server, then EngineOptions.baseUrl if no match.
148
+ */
123
149
  environment?: string;
150
+ /**
151
+ * Half-life for time-decayed learning in days.
152
+ * A learning signal that is halfLifeDays old contributes half its original weight.
153
+ * Only applies when using the engine's default MemoryLearningStore.
154
+ * For FileLearningStore, pass halfLifeDays directly to its constructor.
155
+ * @default 30
156
+ */
157
+ halfLifeDays?: number;
158
+ /**
159
+ * Optional embedding provider for semantic similarity matching.
160
+ * When provided, capability texts are pre-encoded at construction time
161
+ * and query embeddings are computed on each ask() call. The embedding
162
+ * signal is fused with BM25 and fuzzy signals via RRF.
163
+ *
164
+ * Zero mandatory dependencies — bring your own provider:
165
+ *
166
+ * @example
167
+ * const engine = new CapmanEngine({
168
+ * manifest,
169
+ * embedding: {
170
+ * async encode(texts: string[]) {
171
+ * // call your embedding API here
172
+ * return texts.map(t => myEmbedModel.embed(t))
173
+ * }
174
+ * }
175
+ * })
176
+ *
177
+ * Note: embedding is purely additive — if encode() throws, the engine
178
+ * falls back to BM25 + fuzzy scoring without interrupting operation.
179
+ */
180
+ embedding?: EmbeddingProvider;
124
181
  }
125
182
  export interface EngineResult {
126
183
  match: MatchResult;
@@ -142,6 +199,7 @@ export declare class CapmanEngine {
142
199
  /** Maximum allowed query length in characters. Queries exceeding this throw RangeError. */
143
200
  static readonly MAX_QUERY_LENGTH = 1000;
144
201
  private manifest;
202
+ private manifestVersion;
145
203
  private mode;
146
204
  private llm?;
147
205
  private cache;
@@ -160,6 +218,10 @@ export declare class CapmanEngine {
160
218
  private marginAwareLLM;
161
219
  private adaptiveMargin;
162
220
  private environment?;
221
+ private embedding?;
222
+ private capEmbeddings?;
223
+ /** Resolves when the post-loadManifest re-encode completes. Awaited by buildEmbeddingScores(). */
224
+ private pendingEmbedding;
163
225
  private maxLLMCallsPerMinute;
164
226
  private llmCooldownMs;
165
227
  private llmCircuitBreakerThreshold;
@@ -200,6 +262,10 @@ export declare class CapmanEngine {
200
262
  clearCache(): Promise<void>;
201
263
  private checkManifestVersion;
202
264
  private checkCapabilityLifecycle;
265
+ /** Cosine similarity between two equal-length vectors */
266
+ private cosineSim;
267
+ /** Encode query and return cosine similarity scores (0–100) keyed by capability ID */
268
+ private buildEmbeddingScores;
203
269
  private checkMatchHint;
204
270
  /**
205
271
  * Replaces the active manifest without creating a new engine instance.
@@ -281,6 +347,10 @@ export declare class CapmanEngine {
281
347
  * For manifests with ≤100 capabilities this is negligible (<10ms).
282
348
  * For very large manifests (500+ capabilities), consider passing
283
349
  * `adaptiveMarginOverride` to skip calibration.
350
+ *
351
+ * Note: constructor total cost also includes BM25 index build O(capabilities × tokens)
352
+ * and embedding pre-encoding O(capabilities) if an EmbeddingProvider is configured.
353
+ * For 100 capabilities with embeddings, expect ~100–500ms depending on provider latency.
284
354
  */
285
355
  private calibrateAdaptiveMargin;
286
356
  private computeVerdict;
@@ -1 +1 @@
1
- {"version":3,"file":"engine.d.ts","sourceRoot":"","sources":["../../src/engine.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,WAAW,EAAE,aAAa,EAAE,cAAc,EAAa,aAAa,EAAwG,MAAM,SAAS,CAAA;AACnN,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,WAAW,CAAA;AAClD,OAAO,KAAK,EAAE,cAAc,EAAE,WAAW,EAAE,MAAM,YAAY,CAAA;AAC7D,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,SAAS,CAAA;AACzC,OAAO,KAAK,EAAE,aAAa,EAAgB,MAAM,YAAY,CAAA;AAK7D,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,SAAS,CAAA;AAMxC;;;;;;;;;;;;;;;;;;GAkBG;AACH,MAAM,WAAW,aAAa;IAC5B,qCAAqC;IACrC,QAAQ,EAAE,QAAQ,CAAA;IAClB;;;;;OAKG;IACH,IAAI,CAAC,EAAE,SAAS,CAAA;IAChB,kDAAkD;IAClD,GAAG,CAAC,EAAE,iBAAiB,CAAC,KAAK,CAAC,CAAA;IAC9B,0FAA0F;IAC1F,KAAK,CAAC,EAAE,UAAU,GAAG,KAAK,CAAA;IAC1B,+FAA+F;IAC/F,QAAQ,CAAC,EAAE,aAAa,GAAG,KAAK,CAAA;IAChC,iCAAiC;IACjC,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,mDAAmD;IACnD,IAAI,CAAC,EAAE,WAAW,CAAA;IAClB,mCAAmC;IACnC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IAChC,6DAA6D;IAC7D,SAAS,CAAC,EAAE,MAAM,CAAA;IAElB,kDAAkD;IAClD,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,0DAA0D;IAC1D,KAAK,CAAC,EAAE,MAAM,CAAA;IAEd;;;;;;;;;;;OAWG;IACH,UAAU,CAAC,EAAE,MAAM,CAAA;IAEnB;;;;OAIG;IACH,oBAAoB,CAAC,EAAE,MAAM,CAAA;IAE7B;;;;OAIG;IACH,aAAa,CAAC,EAAE,MAAM,CAAA;IAEtB;;;;OAIG;IACH,0BAA0B,CAAC,EAAE,MAAM,CAAA;IAEnC;;;OAGG;IACH,wBAAwB,CAAC,EAAE,MAAM,CAAA;IAEjC;;;;;;OAMG;IACH,UAAU,CAAC,EAAE,OAAO,CAAA;IACpB;;;;OAIG;IACH,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB;;;;;;OAMG;IACH,cAAc,CAAC,EAAE,OAAO,CAAA;IACxB;;;OAGG;IACH,sBAAsB,CAAC,EAAE,MAAM,CAAA;IAC/B;;;;;OAKG;IACH,WAAW,CAAC,EAAE,MAAM,CAAA;CACrB;AAID,MAAM,WAAW,YAAY;IAC3B,KAAK,EAAQ,WAAW,CAAA;IACxB,UAAU,EAAG,aAAa,CAAA;IAC1B,WAAW,EAAE,OAAO,GAAG,SAAS,GAAG,KAAK,CAAA;IACxC,UAAU,EAAG,MAAM,CAAA;IACnB,KAAK,EAAQ,cAAc,CAAA;IAC3B,OAAO,EAAM,OAAO,GAAG,UAAU,GAAG,WAAW,CAAA;IAC/C,MAAM,EAAO,MAAM,CAAA;IACnB;;;;;OAKG;IACH,aAAa,CAAC,EAAE,MAAM,EAAE,CAAA;CACzB;AAID,qBAAa,YAAY;IACvB,2FAA2F;IAC3F,MAAM,CAAC,QAAQ,CAAC,gBAAgB,QAAO;IACvC,OAAO,CAAC,QAAQ,CAAW;IAC3B,OAAO,CAAC,IAAI,CAAgB;IAC5B,OAAO,CAAC,GAAG,CAAC,CAA+B;IAC3C,OAAO,CAAC,KAAK,CAAuB;IACpC,OAAO,CAAC,QAAQ,CAAuB;IACvC,OAAO,CAAC,OAAO,CAAC,CAAS;IACzB,OAAO,CAAC,IAAI,CAAC,CAAiB;IAC9B,OAAO,CAAC,OAAO,CAAC,CAAyB;IACzC,OAAO,CAAC,SAAS,CAAQ;IACzB,OAAO,CAAC,UAAU,CAAe;IACjC,OAAO,CAAC,UAAU,CAAa;IAC/B,OAAO,CAAC,cAAc,CAAQ;IAC9B,OAAO,CAAC,SAAS,CAAa;IAC9B,OAAO,CAAC,WAAW,CAAQ;IAC3B,OAAO,CAAC,MAAM,CAAa;IAC3B,OAAO,CAAC,KAAK,CAAc;IAC3B,OAAO,CAAC,cAAc,CAAY;IAClC,OAAO,CAAC,cAAc,CAAW;IACjC,OAAO,CAAC,WAAW,CAAC,CAAQ;IAG5B,OAAO,CAAC,oBAAoB,CAAe;IAC3C,OAAO,CAAC,aAAa,CAAsB;IAC3C,OAAO,CAAC,0BAA0B,CAAS;IAC3C,OAAO,CAAC,wBAAwB,CAAW;IAG3C,OAAO,CAAC,kBAAkB,CAAiB;IAC3C,OAAO,CAAC,cAAc,CAA8B;IACpD,OAAO,CAAC,aAAa,CAAsB;IAC3C,OAAO,CAAC,mBAAmB,CAAgB;IAC3C,OAAO,CAAC,gBAAgB,CAAmB;gBAE/B,OAAO,EAAE,aAAa;IAwClC;;;;;;;;;;OAUG;IACG,GAAG,CAAC,KAAK,EAAE,MAAM,EAAE,SAAS,GAAE,OAAO,CAAC,cAAc,CAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IA+QxF;;;OAGG;IACG,QAAQ;IAKd;;OAEG;IACG,kBAAkB,CAAC,KAAK,SAAI;;;;IAKlC;;OAEG;IACG,UAAU;IAIhB,OAAO,CAAC,oBAAoB;IAuC5B,OAAO,CAAC,wBAAwB;IAkChC,OAAO,CAAC,cAAc;IAWpB;;;;;;;;;;;OAWG;IACC,YAAY,CAAC,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC;IAWrD;;;;;;;;;;;;;;;;OAgBG;IAEI,OAAO,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,CAAC;IA0JrD;;;OAGG;IACH,OAAO,CAAC,eAAe;IA+CvB;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAIxB;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAaxB;;;;OAIG;YACa,SAAS;IA+HzB;;;OAGG;YACW,uBAAuB;IA+CrC;;;;OAIG;YACW,kBAAkB;IAyChC;;;OAGG;IACH,OAAO,CAAC,cAAc;IAetB,OAAO,CAAC,cAAc;YASR,cAAc;IAiB5B,OAAO,CAAC,oBAAoB;IAI5B;;;;;;;;;;;OAWG;IACH,OAAO,CAAC,uBAAuB;IAkC/B,OAAO,CAAC,cAAc;IAatB;;;;SAIK;YACW,eAAe;CAsEhC"}
1
+ {"version":3,"file":"engine.d.ts","sourceRoot":"","sources":["../../src/engine.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,WAAW,EAAE,aAAa,EAAE,cAAc,EAAa,aAAa,EAAwG,MAAM,SAAS,CAAA;AACnN,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,WAAW,CAAA;AAClD,OAAO,KAAK,EAAE,cAAc,EAAE,WAAW,EAAE,MAAM,YAAY,CAAA;AAC7D,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,SAAS,CAAA;AACzC,OAAO,KAAK,EAAE,aAAa,EAAgB,MAAM,YAAY,CAAA;AAC7D,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,SAAS,CAAA;AAKhD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,SAAS,CAAA;AAMxC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkCG;AACH,MAAM,WAAW,aAAa;IAC5B,qCAAqC;IACrC,QAAQ,EAAE,QAAQ,CAAA;IAClB;;;;;OAKG;IACH,IAAI,CAAC,EAAE,SAAS,CAAA;IAChB,kDAAkD;IAClD,GAAG,CAAC,EAAE,iBAAiB,CAAC,KAAK,CAAC,CAAA;IAC9B,0FAA0F;IAC1F,KAAK,CAAC,EAAE,UAAU,GAAG,KAAK,CAAA;IAC1B,+FAA+F;IAC/F,QAAQ,CAAC,EAAE,aAAa,GAAG,KAAK,CAAA;IAChC,iCAAiC;IACjC,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,mDAAmD;IACnD,IAAI,CAAC,EAAE,WAAW,CAAA;IAClB,mCAAmC;IACnC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IAChC,6DAA6D;IAC7D,SAAS,CAAC,EAAE,MAAM,CAAA;IAElB,kDAAkD;IAClD,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,0DAA0D;IAC1D,KAAK,CAAC,EAAE,MAAM,CAAA;IAEd;;;;;;;;;;;OAWG;IACH,UAAU,CAAC,EAAE,MAAM,CAAA;IAEnB;;;;OAIG;IACH,oBAAoB,CAAC,EAAE,MAAM,CAAA;IAE7B;;;;OAIG;IACH,aAAa,CAAC,EAAE,MAAM,CAAA;IAEtB;;;;OAIG;IACH,0BAA0B,CAAC,EAAE,MAAM,CAAA;IAEnC;;;OAGG;IACH,wBAAwB,CAAC,EAAE,MAAM,CAAA;IAEjC;;;;;;;OAOG;IACH,oBAAoB,CAAC,EAAE,MAAM,CAAA;IAE7B;;;;;;OAMG;IACH,UAAU,CAAC,EAAE,OAAO,CAAA;IACpB;;;;OAIG;IACH,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB;;;;;;OAMG;IACH,cAAc,CAAC,EAAE,OAAO,CAAA;IACxB;;;OAGG;IACH,sBAAsB,CAAC,EAAE,MAAM,CAAA;IAC/B;;;;;MAKE;IACF,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB;;;;;;MAME;IACF,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB;;;;;;;;;;;;;;;;;;;;;OAqBG;IACH,SAAS,CAAC,EAAE,iBAAiB,CAAA;CAC9B;AAID,MAAM,WAAW,YAAY;IAC3B,KAAK,EAAQ,WAAW,CAAA;IACxB,UAAU,EAAG,aAAa,CAAA;IAC1B,WAAW,EAAE,OAAO,GAAG,SAAS,GAAG,KAAK,CAAA;IACxC,UAAU,EAAG,MAAM,CAAA;IACnB,KAAK,EAAQ,cAAc,CAAA;IAC3B,OAAO,EAAM,OAAO,GAAG,UAAU,GAAG,WAAW,CAAA;IAC/C,MAAM,EAAO,MAAM,CAAA;IACnB;;;;;OAKG;IACH,aAAa,CAAC,EAAE,MAAM,EAAE,CAAA;CACzB;AAID,qBAAa,YAAY;IACvB,2FAA2F;IAC3F,MAAM,CAAC,QAAQ,CAAC,gBAAgB,QAAO;IACvC,OAAO,CAAC,QAAQ,CAAiB;IACjC,OAAO,CAAC,eAAe,CAAY;IACnC,OAAO,CAAC,IAAI,CAAsB;IAClC,OAAO,CAAC,GAAG,CAAC,CAA+B;IAC3C,OAAO,CAAC,KAAK,CAAuB;IACpC,OAAO,CAAC,QAAQ,CAAuB;IACvC,OAAO,CAAC,OAAO,CAAC,CAAS;IACzB,OAAO,CAAC,IAAI,CAAC,CAAiB;IAC9B,OAAO,CAAC,OAAO,CAAC,CAAyB;IACzC,OAAO,CAAC,SAAS,CAAQ;IACzB,OAAO,CAAC,UAAU,CAAe;IACjC,OAAO,CAAC,UAAU,CAAa;IAC/B,OAAO,CAAC,cAAc,CAAQ;IAC9B,OAAO,CAAC,SAAS,CAAa;IAC9B,OAAO,CAAC,WAAW,CAAQ;IAC3B,OAAO,CAAC,MAAM,CAAa;IAC3B,OAAO,CAAC,KAAK,CAAc;IAC3B,OAAO,CAAC,cAAc,CAAY;IAClC,OAAO,CAAC,cAAc,CAAW;IACjC,OAAO,CAAC,WAAW,CAAC,CAAY;IAChC,OAAO,CAAC,SAAS,CAAC,CAAyB;IAC3C,OAAO,CAAC,aAAa,CAAC,CAAc;IACpC,kGAAkG;IAClG,OAAO,CAAC,gBAAgB,CAA6B;IAGrD,OAAO,CAAC,oBAAoB,CAAe;IAC3C,OAAO,CAAC,aAAa,CAAsB;IAC3C,OAAO,CAAC,0BAA0B,CAAS;IAC3C,OAAO,CAAC,wBAAwB,CAAW;IAG3C,OAAO,CAAC,kBAAkB,CAAiB;IAC3C,OAAO,CAAC,cAAc,CAA8B;IACpD,OAAO,CAAC,aAAa,CAAsB;IAC3C,OAAO,CAAC,mBAAmB,CAAgB;IAC3C,OAAO,CAAC,gBAAgB,CAAmB;gBAE/B,OAAO,EAAE,aAAa;IAoDlC;;;;;;;;;;OAUG;IACG,GAAG,CAAC,KAAK,EAAE,MAAM,EAAE,SAAS,GAAE,OAAO,CAAC,cAAc,CAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IAyRxF;;;OAGG;IACG,QAAQ;IAKd;;OAEG;IACG,kBAAkB,CAAC,KAAK,SAAI;;;;IAKlC;;OAEG;IACG,UAAU;IAIhB,OAAO,CAAC,oBAAoB;IAuC5B,OAAO,CAAC,wBAAwB;IAkChC,yDAAyD;IACxD,OAAO,CAAC,SAAS;IAelB,sFAAsF;YACxE,oBAAoB;IAoBlC,OAAO,CAAC,cAAc;IAWpB;;;;;;;;;;;OAWG;IACC,YAAY,CAAC,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC;IA4BrD;;;;;;;;;;;;;;;;OAgBG;IAEI,OAAO,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,CAAC;IA0JrD;;;OAGG;IACH,OAAO,CAAC,eAAe;IA+CvB;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAIxB;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAaxB;;;;OAIG;YACa,SAAS;IAyJzB;;;OAGG;YACW,uBAAuB;IA+CrC;;;;OAIG;YACW,kBAAkB;IAiDhC;;;OAGG;IACH,OAAO,CAAC,cAAc;IAetB,OAAO,CAAC,cAAc;YASR,cAAc;IAiB5B,OAAO,CAAC,oBAAoB;IAI5B;;;;;;;;;;;;;;;OAeG;IACH,OAAO,CAAC,uBAAuB;IAkC/B,OAAO,CAAC,cAAc;IAatB;;;;SAIK;YACW,eAAe;CAsEhC"}
@@ -10,6 +10,9 @@ const version_1 = require("./version");
10
10
  // ─── CapmanEngine ─────────────────────────────────────────────────────────────
11
11
  class CapmanEngine {
12
12
  constructor(options) {
13
+ this.manifestVersion = 0;
14
+ /** Resolves when the post-loadManifest re-encode completes. Awaited by buildEmbeddingScores(). */
15
+ this.pendingEmbedding = null;
13
16
  // ── LLM rate limiting state ────────────────────────────────────────────────
14
17
  this.llmCallsThisMinute = 0;
15
18
  this.llmWindowStart = Date.now();
@@ -46,8 +49,20 @@ class CapmanEngine {
46
49
  // Use FileLearningStore explicitly for persistence across restarts
47
50
  this.learning = options.learning === false
48
51
  ? null
49
- : (options.learning ?? new learning_1.MemoryLearningStore());
50
- logger_1.logger.info(`CapmanEngine initialized — mode: ${this.mode}, cache: ${this.cache ? 'enabled' : 'disabled'}, learning: ${this.learning ? 'enabled' : 'disabled'}`);
52
+ : (options.learning ?? new learning_1.MemoryLearningStore(options.learningHalfLifeDays ?? 30));
53
+ this.embedding = options.embedding;
54
+ if (this.embedding) {
55
+ // Pre-encode all capability texts at construction time — one batch call.
56
+ // Concatenate name + description for richer semantic surface.
57
+ const texts = this.manifest.capabilities.map(c => `${c.name}: ${c.description}`);
58
+ this.embedding.encode(texts).then(vecs => {
59
+ this.capEmbeddings = vecs;
60
+ logger_1.logger.info('Capability embeddings pre-encoded');
61
+ }).catch(err => {
62
+ logger_1.logger.warn(`EmbeddingProvider pre-encode failed — embedding signal disabled: ${err instanceof Error ? err.message : String(err)}`);
63
+ });
64
+ }
65
+ logger_1.logger.info(`CapmanEngine initialized — mode: ${this.mode}, cache: ${this.cache ? 'enabled' : 'disabled'}, learning: ${this.learning ? 'enabled' : 'disabled'}, embedding: ${this.embedding ? 'enabled' : 'disabled'}`);
51
66
  // ── Manifest version compatibility check ─────────────────────────────────
52
67
  this.checkManifestVersion(options.manifest);
53
68
  }
@@ -71,6 +86,9 @@ class CapmanEngine {
71
86
  }
72
87
  const start = Date.now();
73
88
  const steps = [];
89
+ // Capture manifest version at entry — used to guard the cache write.
90
+ // If loadManifest() is called mid-flight, we skip writing stale results.
91
+ const manifestVersion = this.manifestVersion;
74
92
  // ── Step 1: Check cache ──────────────────────────────────────────────────
75
93
  const cacheStart = Date.now();
76
94
  if (this.cache) {
@@ -176,11 +194,19 @@ class CapmanEngine {
176
194
  // queries that resolve to the same capability share a cache entry
177
195
  if (this.cache && resolution.success && matchResult.capability
178
196
  && matchResult.capability.privacy.level === 'public') {
179
- const queryKey = (0, cache_1.normalizeQuery)(query);
180
- const capKey = (0, cache_1.buildCacheKey)(query, matchResult.capability.id, matchResult.extractedParams);
181
- await this.cache.set(queryKey, matchResult);
182
- await this.cache.set(capKey, matchResult);
183
- // capKey always starts with 'cap:' — structurally distinct from queryKey
197
+ // Optimistic concurrency guard — skip cache write if manifest was swapped
198
+ // mid-flight. The result was computed against a now-stale manifest and
199
+ // must not pollute the cache for the new one.
200
+ if (this.manifestVersion === manifestVersion) {
201
+ const queryKey = (0, cache_1.normalizeQuery)(query);
202
+ const capKey = (0, cache_1.buildCacheKey)(query, matchResult.capability.id, matchResult.extractedParams);
203
+ await this.cache.set(queryKey, matchResult);
204
+ await this.cache.set(capKey, matchResult);
205
+ // capKey always starts with 'cap:' — structurally distinct from queryKey
206
+ }
207
+ else {
208
+ logger_1.logger.warn('loadManifest() called mid-flight — skipping cache write for stale result');
209
+ }
184
210
  }
185
211
  // ── Step 5b: Compute missingParams ───────────────────────────────────────
186
212
  // Spec: LLM attempts extraction first when available. missingParams is last resort.
@@ -377,6 +403,44 @@ class CapmanEngine {
377
403
  }
378
404
  }
379
405
  }
406
+ /** Cosine similarity between two equal-length vectors */
407
+ cosineSim(a, b) {
408
+ if (a.length !== b.length || a.length === 0) {
409
+ logger_1.logger.warn(`cosineSim: dimension mismatch (${a.length} vs ${b.length}) — returning 0`);
410
+ return 0;
411
+ }
412
+ let dot = 0, normA = 0, normB = 0;
413
+ for (let i = 0; i < a.length; i++) {
414
+ dot += a[i] * b[i];
415
+ normA += a[i] * a[i];
416
+ normB += b[i] * b[i];
417
+ }
418
+ const denom = Math.sqrt(normA) * Math.sqrt(normB);
419
+ return denom === 0 ? 0 : dot / denom;
420
+ }
421
+ /** Encode query and return cosine similarity scores (0–100) keyed by capability ID */
422
+ async buildEmbeddingScores(query) {
423
+ if (!this.embedding || !this.capEmbeddings)
424
+ return undefined;
425
+ // Wait for any in-flight re-encode from loadManifest() to finish.
426
+ // Without this, the first ask() after loadManifest returns uses stale embeddings.
427
+ if (this.pendingEmbedding)
428
+ await this.pendingEmbedding;
429
+ try {
430
+ const [queryVec] = await this.embedding.encode([query]);
431
+ const scores = new Map();
432
+ this.manifest.capabilities.forEach((cap, i) => {
433
+ const sim = this.cosineSim(queryVec, this.capEmbeddings[i]);
434
+ // Cosine sim is -1..1; map to 0–100, negatives floored to 0
435
+ scores.set(cap.id, Math.max(0, Math.round(sim * 100)));
436
+ });
437
+ return scores;
438
+ }
439
+ catch (err) {
440
+ logger_1.logger.warn(`Embedding encode failed — skipping embedding signal: ${err instanceof Error ? err.message : String(err)}`);
441
+ return undefined;
442
+ }
443
+ }
380
444
  checkMatchHint(capability) {
381
445
  const hint = capability.matchHint?.preferredMode;
382
446
  if (!hint || hint === this.mode)
@@ -399,13 +463,31 @@ class CapmanEngine {
399
463
  */
400
464
  async loadManifest(manifest) {
401
465
  this.checkManifestVersion(manifest);
466
+ // Assign all derived state atomically before any await — an in-flight ask()
467
+ // must never see a new manifest paired with a stale bm25Index or ceiling.
402
468
  this.manifest = manifest;
403
469
  this.bm25Index = (0, matcher_1.buildBM25Index)(manifest.capabilities);
404
470
  this.bm25Ceiling = this.calibrateBM25Ceiling();
405
471
  this.adaptiveMargin = this.calibrateAdaptiveMargin();
406
- // resolveBaseUrl() reads from this.manifest.servers on each call —
472
+ this.manifestVersion++;
407
473
  // server selection updates automatically after loadManifest()
408
474
  await this.clearCache();
475
+ // Re-encode capabilities after manifest swap — stale embeddings misalign with new capabilities
476
+ if (this.embedding) {
477
+ const texts = manifest.capabilities.map(c => `${c.name}: ${c.description}`);
478
+ this.pendingEmbedding = this.embedding.encode(texts).then(vecs => {
479
+ this.capEmbeddings = vecs;
480
+ this.pendingEmbedding = null;
481
+ logger_1.logger.info('Capability embeddings re-encoded after manifest reload');
482
+ }).catch(err => {
483
+ this.capEmbeddings = undefined;
484
+ this.pendingEmbedding = null;
485
+ logger_1.logger.warn(`EmbeddingProvider re-encode failed after loadManifest: ${err instanceof Error ? err.message : String(err)}`);
486
+ });
487
+ }
488
+ else {
489
+ this.pendingEmbedding = null;
490
+ }
409
491
  }
410
492
  /**
411
493
  * Explain what would happen for a query — without executing it.
@@ -647,13 +729,15 @@ class CapmanEngine {
647
729
  let matchResult;
648
730
  let resolvedVia = 'keyword';
649
731
  // Fuzzy options — never applied in cheap mode
732
+ const embeddingScores = await this.buildEmbeddingScores(query);
650
733
  const fuzzyOpts = {
651
734
  fuzzyMatch: this.fuzzyMatch,
652
735
  fuzzyThreshold: this.fuzzyThreshold,
653
736
  bm25Index: this.bm25Index,
654
- bm25Ceiling: this.bm25Ceiling,
655
737
  bm25K1: this.bm25K1,
656
738
  bm25B: this.bm25B,
739
+ bm25Ceiling: this.bm25Ceiling,
740
+ embeddingScores,
657
741
  };
658
742
  switch (this.mode) {
659
743
  case 'cheap': {
@@ -676,20 +760,33 @@ class CapmanEngine {
676
760
  else {
677
761
  const t = Date.now();
678
762
  try {
679
- matchResult = await (0, matcher_1.matchWithLLM)(query, this.manifest, { llm: this.llm });
680
- this.recordLLMSuccess();
681
- resolvedVia = 'llm';
682
- // Merge keyword scores into LLM candidates so boost has real signal for alternatives
683
- const kwResult = (0, matcher_1.match)(query, this.manifest, fuzzyOpts);
684
- matchResult = {
685
- ...matchResult,
686
- candidates: matchResult.candidates.map(c => ({
687
- ...c,
688
- score: c.matched
689
- ? c.score // keep LLM confidence for winner
690
- : (kwResult.candidates.find(kc => kc.capabilityId === c.capabilityId)?.score ?? 0),
691
- })),
692
- };
763
+ const kwResultAccurate = (0, matcher_1.match)(query, this.manifest, fuzzyOpts);
764
+ const top3Accurate = kwResultAccurate.candidates
765
+ .sort((a, b) => b.score - a.score)
766
+ .filter(c => c.score > 0)
767
+ .slice(0, 3)
768
+ .map(c => this.manifest.capabilities.find(cap => cap.id === c.capabilityId))
769
+ .filter(Boolean);
770
+ // Skip LLM if no candidates scored above zero — no meaningful top-3 to discriminate
771
+ if (top3Accurate.length === 0) {
772
+ matchResult = kwResultAccurate;
773
+ }
774
+ else {
775
+ const llmResult = await (0, matcher_1.matchWithLLM)(query, top3Accurate, { llm: this.llm, app: this.manifest.app });
776
+ this.recordLLMSuccess();
777
+ resolvedVia = 'llm';
778
+ // If LLM says OOS but keyword had a match, the correct capability may have
779
+ // been rank 4+. Fall back to keyword result rather than returning OOS.
780
+ matchResult = llmResult.capability === null ? kwResultAccurate : {
781
+ ...llmResult,
782
+ candidates: llmResult.candidates.map(c => ({
783
+ ...c,
784
+ score: c.matched
785
+ ? c.score
786
+ : (kwResultAccurate.candidates.find(kc => kc.capabilityId === c.capabilityId)?.score ?? 0),
787
+ })),
788
+ };
789
+ }
693
790
  steps?.push({ type: 'llm_match', status: 'pass', durationMs: Date.now() - t, detail: `confidence: ${matchResult.confidence}%` });
694
791
  }
695
792
  catch (err) {
@@ -734,19 +831,32 @@ class CapmanEngine {
734
831
  logger_1.logger.debug(`Query escalated to LLM: "${query}"`);
735
832
  const t2 = Date.now();
736
833
  try {
737
- matchResult = await (0, matcher_1.matchWithLLM)(query, this.manifest, { llm: this.llm });
738
- this.recordLLMSuccess();
739
- resolvedVia = 'llm';
740
- // keywordResult already computed above in balanced mode — merge scores
741
- matchResult = {
742
- ...matchResult,
743
- candidates: matchResult.candidates.map(c => ({
744
- ...c,
745
- score: c.matched
746
- ? c.score
747
- : (keywordResult.candidates.find(kc => kc.capabilityId === c.capabilityId)?.score ?? 0),
748
- })),
749
- };
834
+ const top3Balanced = keywordResult.candidates
835
+ .sort((a, b) => b.score - a.score)
836
+ .filter(c => c.score > 0)
837
+ .slice(0, 3)
838
+ .map(c => this.manifest.capabilities.find(cap => cap.id === c.capabilityId))
839
+ .filter(Boolean);
840
+ // Balanced mode only escalates when keyword confidence is low but > 0 —
841
+ // top3 should always be non-empty here, but guard anyway
842
+ if (top3Balanced.length === 0) {
843
+ matchResult = keywordResult;
844
+ }
845
+ else {
846
+ const llmResult = await (0, matcher_1.matchWithLLM)(query, top3Balanced, { llm: this.llm, app: this.manifest.app });
847
+ this.recordLLMSuccess();
848
+ resolvedVia = 'llm';
849
+ // If LLM returns OOS but keyword had a scored candidate, fall back to keyword
850
+ matchResult = llmResult.capability === null ? keywordResult : {
851
+ ...llmResult,
852
+ candidates: llmResult.candidates.map(c => ({
853
+ ...c,
854
+ score: c.matched
855
+ ? c.score
856
+ : (keywordResult.candidates.find(kc => kc.capabilityId === c.capabilityId)?.score ?? 0),
857
+ })),
858
+ };
859
+ }
750
860
  steps?.push({ type: 'llm_match', status: 'pass', durationMs: Date.now() - t2, detail: `confidence: ${matchResult.confidence}%` });
751
861
  }
752
862
  catch (err) {
@@ -837,7 +947,15 @@ class CapmanEngine {
837
947
  const hits = wordIndex[candidate.capabilityId] ?? 0;
838
948
  if (hits > 0) {
839
949
  // Logarithmic boost — diminishing returns after first few hits
840
- boost += Math.min(5, Math.log2(hits + 1) * 2);
950
+ const rawBoost = Math.min(5, Math.log2(hits + 1) * 2);
951
+ // IDF weighting — common words ("get", "show", "user") appear in many
952
+ // capabilities and accumulate learning hits that carry little signal.
953
+ // Reuses BM25 df/N so no separate computation is needed.
954
+ const df = this.bm25Index.df[word] ?? 0;
955
+ const idf = df > 0
956
+ ? Math.log((this.bm25Index.N - df + 0.5) / (df + 0.5) + 1)
957
+ : 0;
958
+ boost += rawBoost * Math.min(1, idf);
841
959
  }
842
960
  }
843
961
  const cappedBoost = Math.min(15, Math.round(boost));
@@ -903,6 +1021,10 @@ class CapmanEngine {
903
1021
  * For manifests with ≤100 capabilities this is negligible (<10ms).
904
1022
  * For very large manifests (500+ capabilities), consider passing
905
1023
  * `adaptiveMarginOverride` to skip calibration.
1024
+ *
1025
+ * Note: constructor total cost also includes BM25 index build O(capabilities × tokens)
1026
+ * and embedding pre-encoding O(capabilities) if an EmbeddingProvider is configured.
1027
+ * For 100 capabilities with embeddings, expect ~100–500ms depending on provider latency.
906
1028
  */
907
1029
  calibrateAdaptiveMargin() {
908
1030
  if (this.manifest.capabilities.length < 2)