capman 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/CODEBASE.md +6 -5
  2. package/dist/cjs/cache.d.ts +9 -0
  3. package/dist/cjs/cache.d.ts.map +1 -1
  4. package/dist/cjs/cache.js +37 -7
  5. package/dist/cjs/cache.js.map +1 -1
  6. package/dist/cjs/concurrent.d.ts +53 -0
  7. package/dist/cjs/concurrent.d.ts.map +1 -0
  8. package/dist/cjs/concurrent.js +71 -0
  9. package/dist/cjs/concurrent.js.map +1 -0
  10. package/dist/cjs/engine.d.ts +92 -7
  11. package/dist/cjs/engine.d.ts.map +1 -1
  12. package/dist/cjs/engine.js +269 -57
  13. package/dist/cjs/engine.js.map +1 -1
  14. package/dist/cjs/generator.d.ts.map +1 -1
  15. package/dist/cjs/generator.js +28 -6
  16. package/dist/cjs/generator.js.map +1 -1
  17. package/dist/cjs/index.d.ts +3 -1
  18. package/dist/cjs/index.d.ts.map +1 -1
  19. package/dist/cjs/index.js +5 -1
  20. package/dist/cjs/index.js.map +1 -1
  21. package/dist/cjs/learning.d.ts +16 -1
  22. package/dist/cjs/learning.d.ts.map +1 -1
  23. package/dist/cjs/learning.js +95 -14
  24. package/dist/cjs/learning.js.map +1 -1
  25. package/dist/cjs/matcher.d.ts +51 -2
  26. package/dist/cjs/matcher.d.ts.map +1 -1
  27. package/dist/cjs/matcher.js +173 -33
  28. package/dist/cjs/matcher.js.map +1 -1
  29. package/dist/cjs/parser.js +27 -9
  30. package/dist/cjs/parser.js.map +1 -1
  31. package/dist/cjs/resolver.d.ts +2 -2
  32. package/dist/cjs/resolver.d.ts.map +1 -1
  33. package/dist/cjs/resolver.js +66 -26
  34. package/dist/cjs/resolver.js.map +1 -1
  35. package/dist/cjs/schema.d.ts +821 -68
  36. package/dist/cjs/schema.d.ts.map +1 -1
  37. package/dist/cjs/schema.js +62 -13
  38. package/dist/cjs/schema.js.map +1 -1
  39. package/dist/cjs/types.d.ts +156 -9
  40. package/dist/cjs/types.d.ts.map +1 -1
  41. package/dist/cjs/version.d.ts +1 -1
  42. package/dist/cjs/version.js +1 -1
  43. package/dist/esm/cache.d.ts +9 -0
  44. package/dist/esm/cache.js +37 -7
  45. package/dist/esm/concurrent.d.ts +52 -0
  46. package/dist/esm/concurrent.js +66 -0
  47. package/dist/esm/engine.d.ts +92 -7
  48. package/dist/esm/engine.js +270 -58
  49. package/dist/esm/generator.js +28 -6
  50. package/dist/esm/index.d.ts +3 -1
  51. package/dist/esm/index.js +2 -0
  52. package/dist/esm/learning.d.ts +16 -1
  53. package/dist/esm/learning.js +95 -14
  54. package/dist/esm/matcher.d.ts +51 -2
  55. package/dist/esm/matcher.js +170 -33
  56. package/dist/esm/parser.js +27 -9
  57. package/dist/esm/resolver.d.ts +2 -2
  58. package/dist/esm/resolver.js +66 -26
  59. package/dist/esm/schema.d.ts +821 -68
  60. package/dist/esm/schema.js +62 -13
  61. package/dist/esm/types.d.ts +156 -9
  62. package/dist/esm/version.d.ts +1 -1
  63. package/dist/esm/version.js +1 -1
  64. package/package.json +1 -1
@@ -3,17 +3,34 @@ import type { LLMMatcherOptions } from './matcher';
3
3
  import type { ResolveOptions, AuthContext } from './resolver';
4
4
  import type { CacheStore } from './cache';
5
5
  import type { LearningStore } from './learning';
6
+ import type { EmbeddingProvider } from './types';
6
7
  import type { MatchMode } from './types';
7
8
  /**
8
9
  * Options for constructing a CapmanEngine instance.
9
10
  *
10
- * ⚠️ CONCURRENCY: CapmanEngine is not safe for sharing across concurrent
11
- * async request handlers. The LLM rate limiter, circuit breaker, and
12
- * learning index cache are all instance-level mutable state. In an
13
- * Express/Fastify/etc. server, either:
14
- * (a) Create one engine per request safest, no shared state
15
- * (b) Use a single instance only with cheap mode (no LLM calls)
16
- * (c) Add an external mutex around LLM calls if sharing is required
11
+ * ⚠️ CONCURRENCY: CapmanEngine is NOT safe for sharing a single instance
12
+ * across concurrent async request handlers in a server environment.
13
+ *
14
+ * Node.js is single-threaded — classical data races do not apply. What does
15
+ * apply is async interleaving: two ask() chains can interleave at await
16
+ * suspension points. The following hazards are real:
17
+ *
18
+ * - Calling loadManifest() while ask() calls are in-flight: mitigated by
19
+ * an optimistic manifestVersion guard — in-flight results skip the cache
20
+ * write rather than polluting it with stale data.
21
+ * - Sharing one instance across concurrent balanced/accurate LLM calls:
22
+ * rate limiter and circuit-breaker state can interleave.
23
+ *
24
+ * The following are NOT hazards (synchronous within the event loop):
25
+ * - MemoryCache Map mutations
26
+ * - LLM counter increments (llmCallsThisMinute++ is atomic in Node.js)
27
+ * - statsCounter updates
28
+ *
29
+ * Safe patterns:
30
+ * (a) One engine per request — safest, zero shared state
31
+ * (b) Single shared instance in cheap mode only (no LLM calls)
32
+ * (c) ConcurrentCapmanEngine wrapper (v0.8.0) — serialises ask() via
33
+ * a zero-dependency promise queue
17
34
  *
18
35
  * @example
19
36
  * // Safe — per-request engine
@@ -87,6 +104,15 @@ export interface EngineOptions {
87
104
  * @default 60000
88
105
  */
89
106
  llmCircuitBreakerResetMs?: number;
107
+ /**
108
+ * Half-life in days for time-decayed learning weights.
109
+ * A learning entry that is exactly this many days old retains 50% of its
110
+ * original weight. Older entries fade faster; recent ones dominate.
111
+ * Only applies when the engine creates its own default MemoryLearningStore.
112
+ * If you pass a custom learning store, configure halfLifeDays on it directly.
113
+ * @default 30
114
+ */
115
+ learningHalfLifeDays?: number;
90
116
  /**
91
117
  * Enable fuzzy matching using Fuse.js — catches paraphrases, typos,
92
118
  * and morphological variants that exact keyword matching misses.
@@ -114,6 +140,44 @@ export interface EngineOptions {
114
140
  * When undefined, calibrated automatically from manifest score distribution.
115
141
  */
116
142
  adaptiveMarginOverride?: number;
143
+ /**
144
+ * Target environment for server selection from manifest.servers[].
145
+ * When manifest.servers is present and this matches a server's environment,
146
+ * that server's URL is used as baseUrl.
147
+ * Falls back to first server, then EngineOptions.baseUrl if no match.
148
+ */
149
+ environment?: string;
150
+ /**
151
+ * Half-life for time-decayed learning in days.
152
+ * A learning signal that is halfLifeDays old contributes half its original weight.
153
+ * Only applies when using the engine's default MemoryLearningStore.
154
+ * For FileLearningStore, pass halfLifeDays directly to its constructor.
155
+ * @default 30
156
+ */
157
+ halfLifeDays?: number;
158
+ /**
159
+ * Optional embedding provider for semantic similarity matching.
160
+ * When provided, capability texts are pre-encoded at construction time
161
+ * and query embeddings are computed on each ask() call. The embedding
162
+ * signal is fused with BM25 and fuzzy signals via RRF.
163
+ *
164
+ * Zero mandatory dependencies — bring your own provider:
165
+ *
166
+ * @example
167
+ * const engine = new CapmanEngine({
168
+ * manifest,
169
+ * embedding: {
170
+ * async encode(texts: string[]) {
171
+ * // call your embedding API here
172
+ * return texts.map(t => myEmbedModel.embed(t))
173
+ * }
174
+ * }
175
+ * })
176
+ *
177
+ * Note: embedding is purely additive — if encode() throws, the engine
178
+ * falls back to BM25 + fuzzy scoring without interrupting operation.
179
+ */
180
+ embedding?: EmbeddingProvider;
117
181
  }
118
182
  export interface EngineResult {
119
183
  match: MatchResult;
@@ -135,6 +199,7 @@ export declare class CapmanEngine {
135
199
  /** Maximum allowed query length in characters. Queries exceeding this throw RangeError. */
136
200
  static readonly MAX_QUERY_LENGTH = 1000;
137
201
  private manifest;
202
+ private manifestVersion;
138
203
  private mode;
139
204
  private llm?;
140
205
  private cache;
@@ -152,6 +217,11 @@ export declare class CapmanEngine {
152
217
  private bm25B;
153
218
  private marginAwareLLM;
154
219
  private adaptiveMargin;
220
+ private environment?;
221
+ private embedding?;
222
+ private capEmbeddings?;
223
+ /** Resolves when the post-loadManifest re-encode completes. Awaited by buildEmbeddingScores(). */
224
+ private pendingEmbedding;
155
225
  private maxLLMCallsPerMinute;
156
226
  private llmCooldownMs;
157
227
  private llmCircuitBreakerThreshold;
@@ -191,6 +261,12 @@ export declare class CapmanEngine {
191
261
  */
192
262
  clearCache(): Promise<void>;
193
263
  private checkManifestVersion;
264
+ private checkCapabilityLifecycle;
265
+ /** Cosine similarity between two equal-length vectors */
266
+ private cosineSim;
267
+ /** Encode query and return cosine similarity scores (0–100) keyed by capability ID */
268
+ private buildEmbeddingScores;
269
+ private checkMatchHint;
194
270
  /**
195
271
  * Replaces the active manifest without creating a new engine instance.
196
272
  * Useful for hot-reloading manifests in long-running servers without
@@ -252,6 +328,11 @@ export declare class CapmanEngine {
252
328
  * score boost — capped at +15 to avoid overriding strong keyword matches.
253
329
  */
254
330
  private applyLearningBoost;
331
+ /**
332
+ * Resolves the effective baseUrl from manifest.servers[] or EngineOptions.baseUrl.
333
+ * Priority: environment-matched server > first server > explicit baseUrl > undefined
334
+ */
335
+ private resolveBaseUrl;
255
336
  private resolveOptions;
256
337
  private recordLearning;
257
338
  private calibrateBM25Ceiling;
@@ -266,6 +347,10 @@ export declare class CapmanEngine {
266
347
  * For manifests with ≤100 capabilities this is negligible (<10ms).
267
348
  * For very large manifests (500+ capabilities), consider passing
268
349
  * `adaptiveMarginOverride` to skip calibration.
350
+ *
351
+ * Note: constructor total cost also includes BM25 index build O(capabilities × tokens)
352
+ * and embedding pre-encoding O(capabilities) if an EmbeddingProvider is configured.
353
+ * For 100 capabilities with embeddings, expect ~100–500ms depending on provider latency.
269
354
  */
270
355
  private calibrateAdaptiveMargin;
271
356
  private computeVerdict;
@@ -1,4 +1,4 @@
1
- import { match as _match, matchWithLLM as _matchWithLLM, resolverToIntent, extractParams, LLMParseError, tokenize, buildBM25Index, scoreCapability as _scoreCapability, sanitizeForPrompt } from './matcher';
1
+ import { match as _match, matchWithLLM as _matchWithLLM, resolverToIntent, extractParams, LLMParseError, tokenize, buildBM25Index, sanitizeForPrompt, calibrateCeiling as _calibrateCeiling } from './matcher';
2
2
  import { resolve as _resolve, checkPrivacy } from './resolver';
3
3
  import { MemoryLearningStore } from './learning';
4
4
  import { logger } from './logger';
@@ -7,6 +7,9 @@ import { VERSION } from './version';
7
7
  // ─── CapmanEngine ─────────────────────────────────────────────────────────────
8
8
  export class CapmanEngine {
9
9
  constructor(options) {
10
+ this.manifestVersion = 0;
11
+ /** Resolves when the post-loadManifest re-encode completes. Awaited by buildEmbeddingScores(). */
12
+ this.pendingEmbedding = null;
10
13
  // ── LLM rate limiting state ────────────────────────────────────────────────
11
14
  this.llmCallsThisMinute = 0;
12
15
  this.llmWindowStart = Date.now();
@@ -17,6 +20,7 @@ export class CapmanEngine {
17
20
  this.mode = options.mode ?? 'balanced';
18
21
  this.llm = options.llm;
19
22
  this.baseUrl = options.baseUrl;
23
+ this.environment = options.environment;
20
24
  this.auth = options.auth;
21
25
  this.headers = options.headers;
22
26
  this.threshold = options.threshold ?? 50;
@@ -42,8 +46,20 @@ export class CapmanEngine {
42
46
  // Use FileLearningStore explicitly for persistence across restarts
43
47
  this.learning = options.learning === false
44
48
  ? null
45
- : (options.learning ?? new MemoryLearningStore());
46
- logger.info(`CapmanEngine initialized — mode: ${this.mode}, cache: ${this.cache ? 'enabled' : 'disabled'}, learning: ${this.learning ? 'enabled' : 'disabled'}`);
49
+ : (options.learning ?? new MemoryLearningStore(options.learningHalfLifeDays ?? 30));
50
+ this.embedding = options.embedding;
51
+ if (this.embedding) {
52
+ // Pre-encode all capability texts at construction time — one batch call.
53
+ // Concatenate name + description for richer semantic surface.
54
+ const texts = this.manifest.capabilities.map(c => `${c.name}: ${c.description}`);
55
+ this.embedding.encode(texts).then(vecs => {
56
+ this.capEmbeddings = vecs;
57
+ logger.info('Capability embeddings pre-encoded');
58
+ }).catch(err => {
59
+ logger.warn(`EmbeddingProvider pre-encode failed — embedding signal disabled: ${err instanceof Error ? err.message : String(err)}`);
60
+ });
61
+ }
62
+ logger.info(`CapmanEngine initialized — mode: ${this.mode}, cache: ${this.cache ? 'enabled' : 'disabled'}, learning: ${this.learning ? 'enabled' : 'disabled'}, embedding: ${this.embedding ? 'enabled' : 'disabled'}`);
47
63
  // ── Manifest version compatibility check ─────────────────────────────────
48
64
  this.checkManifestVersion(options.manifest);
49
65
  }
@@ -67,6 +83,9 @@ export class CapmanEngine {
67
83
  }
68
84
  const start = Date.now();
69
85
  const steps = [];
86
+ // Capture manifest version at entry — used to guard the cache write.
87
+ // If loadManifest() is called mid-flight, we skip writing stale results.
88
+ const manifestVersion = this.manifestVersion;
70
89
  // ── Step 1: Check cache ──────────────────────────────────────────────────
71
90
  const cacheStart = Date.now();
72
91
  if (this.cache) {
@@ -124,6 +143,7 @@ export class CapmanEngine {
124
143
  // ── Step 2.5: Apply learning boost ───────────────────────────────────────
125
144
  matchResult = await this.applyBoostToMatchResult(query, matchResult, resolvedVia);
126
145
  // ── Step 3: Privacy check ────────────────────────────────────────────────
146
+ let privacyFailed = false;
127
147
  if (matchResult.capability) {
128
148
  const privacyError = checkPrivacy(matchResult.capability, this.auth);
129
149
  steps.push({
@@ -132,13 +152,23 @@ export class CapmanEngine {
132
152
  durationMs: 0,
133
153
  detail: privacyError ?? `level: ${matchResult.capability.privacy.level}`,
134
154
  });
155
+ // Warn on deprecated or sunset capabilities — never silently fail
156
+ this.checkCapabilityLifecycle(matchResult.capability);
157
+ // Log when engine mode differs from capability's preferred mode
158
+ this.checkMatchHint(matchResult.capability);
159
+ // Short-circuit: if privacy fails, skip disambiguation to avoid burning an LLM
160
+ // call on a request that _resolve() will block anyway. privacyFailed propagates
161
+ // to Step 4a so the mode guard check is clean and explicit.
162
+ if (privacyError)
163
+ privacyFailed = true;
135
164
  }
136
165
  // ── Step 4a: Compute verdict + optional margin-aware LLM disambiguation ──
137
166
  let { verdict, margin } = this.computeVerdict(matchResult);
138
167
  if (verdict === 'marginal' &&
139
168
  this.marginAwareLLM &&
140
169
  this.llm &&
141
- this.mode === 'balanced') {
170
+ !privacyFailed &&
171
+ (this.mode === 'balanced' || this.mode === 'accurate')) {
142
172
  matchResult = await this.disambiguateLLM(query, matchResult, steps);
143
173
  // Recompute verdict after disambiguation
144
174
  const recomputed = this.computeVerdict(matchResult);
@@ -161,11 +191,19 @@ export class CapmanEngine {
161
191
  // queries that resolve to the same capability share a cache entry
162
192
  if (this.cache && resolution.success && matchResult.capability
163
193
  && matchResult.capability.privacy.level === 'public') {
164
- const queryKey = normalizeQuery(query);
165
- const capKey = buildCacheKey(query, matchResult.capability.id, matchResult.extractedParams);
166
- await this.cache.set(queryKey, matchResult);
167
- await this.cache.set(capKey, matchResult);
168
- // capKey always starts with 'cap:' — structurally distinct from queryKey
194
+ // Optimistic concurrency guard — skip cache write if manifest was swapped
195
+ // mid-flight. The result was computed against a now-stale manifest and
196
+ // must not pollute the cache for the new one.
197
+ if (this.manifestVersion === manifestVersion) {
198
+ const queryKey = normalizeQuery(query);
199
+ const capKey = buildCacheKey(query, matchResult.capability.id, matchResult.extractedParams);
200
+ await this.cache.set(queryKey, matchResult);
201
+ await this.cache.set(capKey, matchResult);
202
+ // capKey always starts with 'cap:' — structurally distinct from queryKey
203
+ }
204
+ else {
205
+ logger.warn('loadManifest() called mid-flight — skipping cache write for stale result');
206
+ }
169
207
  }
170
208
  // ── Step 5b: Compute missingParams ───────────────────────────────────────
171
209
  // Spec: LLM attempts extraction first when available. missingParams is last resort.
@@ -205,8 +243,19 @@ export class CapmanEngine {
205
243
  }
206
244
  }
207
245
  }
208
- catch {
209
- // LLM param extraction failed — fall through to missingParams below
246
+ catch (err) {
247
+ const isParseError = err instanceof SyntaxError;
248
+ if (isParseError) {
249
+ // JSON parse failure: refund the rate-limit slot but don't open circuit breaker
250
+ // The llm is reachable - the response format was just bad
251
+ this.llmCallsThisMinute = Math.max(0, this.llmCallsThisMinute - 1);
252
+ }
253
+ else {
254
+ // Hard failure (timeout, network): refund slot and increment fail counter
255
+ this.recordLLMFailure();
256
+ }
257
+ logger.warn(`LLM param extraction failed: ${err instanceof Error ? err.message : String(err)}`);
258
+ // fall through to missingParams below
210
259
  }
211
260
  }
212
261
  }
@@ -292,6 +341,20 @@ export class CapmanEngine {
292
341
  await this.cache.clear();
293
342
  }
294
343
  checkManifestVersion(manifest) {
344
+ // ── Schema version check ─────────────────────────────────────────────────
345
+ // schemaVersion tracks manifest format — "1" for v0.6+.
346
+ // Manifests without schemaVersion are pre-v0.6 — warn but allow.
347
+ const CURRENT_SCHEMA_VERSION = '1';
348
+ if (!manifest.schemaVersion) {
349
+ console.warn(`[capman] Manifest is missing schemaVersion — it was generated with capman < 0.6. ` +
350
+ `Regenerate with: npx capman generate`);
351
+ }
352
+ else if (manifest.schemaVersion !== CURRENT_SCHEMA_VERSION) {
353
+ console.warn(`[capman] Manifest schemaVersion "${manifest.schemaVersion}" differs from ` +
354
+ `engine's expected "${CURRENT_SCHEMA_VERSION}". ` +
355
+ `Regenerate with: npx capman generate`);
356
+ }
357
+ // ── Package version check ────────────────────────────────────────────────
295
358
  if (!manifest.version)
296
359
  return;
297
360
  const SEMVER_RE = /^\d+\.\d+\.\d+$/;
@@ -299,8 +362,8 @@ export class CapmanEngine {
299
362
  const [mMaj, mMin] = manifest.version.split('.').map(Number);
300
363
  const [eMaj, eMin] = VERSION.split('.').map(Number);
301
364
  if (mMaj !== eMaj || mMin !== eMin) {
302
- console.warn(`[capman] Manifest version "${manifest.version}" was generated with a ` +
303
- `different engine version than "${VERSION}". This is usually fine across patch versions. ` +
365
+ console.warn(`[capman] Manifest was generated with capman "${manifest.version}" ` +
366
+ `but engine is "${VERSION}". This is usually fine across patch versions. ` +
304
367
  `If you experience unexpected matching issues, regenerate with: npx capman generate`);
305
368
  }
306
369
  }
@@ -309,6 +372,80 @@ export class CapmanEngine {
309
372
  `to engine version "${VERSION}" — version strings are not valid semver.`);
310
373
  }
311
374
  }
375
+ checkCapabilityLifecycle(capability) {
376
+ const lc = capability.lifecycle;
377
+ if (!lc || lc.status === 'stable' || lc.status === 'beta' || lc.status === 'experimental') {
378
+ if (lc?.status === 'beta') {
379
+ logger.warn(`Capability "${capability.id}" is in beta — behavior may change`);
380
+ }
381
+ if (lc?.status === 'experimental') {
382
+ logger.warn(`Capability "${capability.id}" is experimental — use with caution`);
383
+ }
384
+ return;
385
+ }
386
+ if (lc.status === 'deprecated') {
387
+ const sunsetPassed = lc.sunsetAt && new Date(lc.sunsetAt) < new Date();
388
+ if (sunsetPassed) {
389
+ // Sunset date has passed — strongest warning
390
+ console.warn(`[capman] ⚠️ Capability "${capability.id}" passed its sunset date (${lc.sunsetAt}). ` +
391
+ `It may be removed in a future version.` +
392
+ (lc.successor ? ` Use "${lc.successor}" instead.` : '') +
393
+ (lc.note ? ` Note: ${lc.note}` : ''));
394
+ }
395
+ else {
396
+ logger.warn(`Capability "${capability.id}" is deprecated.` +
397
+ (lc.sunsetAt ? ` Sunset: ${lc.sunsetAt}.` : '') +
398
+ (lc.successor ? ` Use "${lc.successor}" instead.` : '') +
399
+ (lc.note ? ` Note: ${lc.note}` : ''));
400
+ }
401
+ }
402
+ }
403
+ /** Cosine similarity between two equal-length vectors */
404
+ cosineSim(a, b) {
405
+ if (a.length !== b.length || a.length === 0) {
406
+ logger.warn(`cosineSim: dimension mismatch (${a.length} vs ${b.length}) — returning 0`);
407
+ return 0;
408
+ }
409
+ let dot = 0, normA = 0, normB = 0;
410
+ for (let i = 0; i < a.length; i++) {
411
+ dot += a[i] * b[i];
412
+ normA += a[i] * a[i];
413
+ normB += b[i] * b[i];
414
+ }
415
+ const denom = Math.sqrt(normA) * Math.sqrt(normB);
416
+ return denom === 0 ? 0 : dot / denom;
417
+ }
418
+ /** Encode query and return cosine similarity scores (0–100) keyed by capability ID */
419
+ async buildEmbeddingScores(query) {
420
+ if (!this.embedding || !this.capEmbeddings)
421
+ return undefined;
422
+ // Wait for any in-flight re-encode from loadManifest() to finish.
423
+ // Without this, the first ask() after loadManifest returns uses stale embeddings.
424
+ if (this.pendingEmbedding)
425
+ await this.pendingEmbedding;
426
+ try {
427
+ const [queryVec] = await this.embedding.encode([query]);
428
+ const scores = new Map();
429
+ this.manifest.capabilities.forEach((cap, i) => {
430
+ const sim = this.cosineSim(queryVec, this.capEmbeddings[i]);
431
+ // Cosine sim is -1..1; map to 0–100, negatives floored to 0
432
+ scores.set(cap.id, Math.max(0, Math.round(sim * 100)));
433
+ });
434
+ return scores;
435
+ }
436
+ catch (err) {
437
+ logger.warn(`Embedding encode failed — skipping embedding signal: ${err instanceof Error ? err.message : String(err)}`);
438
+ return undefined;
439
+ }
440
+ }
441
+ checkMatchHint(capability) {
442
+ const hint = capability.matchHint?.preferredMode;
443
+ if (!hint || hint === this.mode)
444
+ return;
445
+ // Advisory only — log but never enforce
446
+ logger.warn(`Capability "${capability.id}" prefers mode "${hint}" but engine is in "${this.mode}" mode. ` +
447
+ `Set mode: '${hint}' in EngineOptions to honor this hint.`);
448
+ }
312
449
  /**
313
450
  * Replaces the active manifest without creating a new engine instance.
314
451
  * Useful for hot-reloading manifests in long-running servers without
@@ -323,11 +460,31 @@ export class CapmanEngine {
323
460
  */
324
461
  async loadManifest(manifest) {
325
462
  this.checkManifestVersion(manifest);
463
+ // Assign all derived state atomically before any await — an in-flight ask()
464
+ // must never see a new manifest paired with a stale bm25Index or ceiling.
326
465
  this.manifest = manifest;
327
466
  this.bm25Index = buildBM25Index(manifest.capabilities);
328
467
  this.bm25Ceiling = this.calibrateBM25Ceiling();
329
468
  this.adaptiveMargin = this.calibrateAdaptiveMargin();
469
+ this.manifestVersion++;
470
+ // server selection updates automatically after loadManifest()
330
471
  await this.clearCache();
472
+ // Re-encode capabilities after manifest swap — stale embeddings misalign with new capabilities
473
+ if (this.embedding) {
474
+ const texts = manifest.capabilities.map(c => `${c.name}: ${c.description}`);
475
+ this.pendingEmbedding = this.embedding.encode(texts).then(vecs => {
476
+ this.capEmbeddings = vecs;
477
+ this.pendingEmbedding = null;
478
+ logger.info('Capability embeddings re-encoded after manifest reload');
479
+ }).catch(err => {
480
+ this.capEmbeddings = undefined;
481
+ this.pendingEmbedding = null;
482
+ logger.warn(`EmbeddingProvider re-encode failed after loadManifest: ${err instanceof Error ? err.message : String(err)}`);
483
+ });
484
+ }
485
+ else {
486
+ this.pendingEmbedding = null;
487
+ }
331
488
  }
332
489
  /**
333
490
  * Explain what would happen for a query — without executing it.
@@ -569,13 +726,15 @@ export class CapmanEngine {
569
726
  let matchResult;
570
727
  let resolvedVia = 'keyword';
571
728
  // Fuzzy options — never applied in cheap mode
729
+ const embeddingScores = await this.buildEmbeddingScores(query);
572
730
  const fuzzyOpts = {
573
731
  fuzzyMatch: this.fuzzyMatch,
574
732
  fuzzyThreshold: this.fuzzyThreshold,
575
733
  bm25Index: this.bm25Index,
576
- bm25Ceiling: this.bm25Ceiling,
577
734
  bm25K1: this.bm25K1,
578
735
  bm25B: this.bm25B,
736
+ bm25Ceiling: this.bm25Ceiling,
737
+ embeddingScores,
579
738
  };
580
739
  switch (this.mode) {
581
740
  case 'cheap': {
@@ -598,20 +757,33 @@ export class CapmanEngine {
598
757
  else {
599
758
  const t = Date.now();
600
759
  try {
601
- matchResult = await _matchWithLLM(query, this.manifest, { llm: this.llm });
602
- this.recordLLMSuccess();
603
- resolvedVia = 'llm';
604
- // Merge keyword scores into LLM candidates so boost has real signal for alternatives
605
- const kwResult = _match(query, this.manifest, fuzzyOpts);
606
- matchResult = {
607
- ...matchResult,
608
- candidates: matchResult.candidates.map(c => ({
609
- ...c,
610
- score: c.matched
611
- ? c.score // keep LLM confidence for winner
612
- : (kwResult.candidates.find(kc => kc.capabilityId === c.capabilityId)?.score ?? 0),
613
- })),
614
- };
760
+ const kwResultAccurate = _match(query, this.manifest, fuzzyOpts);
761
+ const top3Accurate = kwResultAccurate.candidates
762
+ .sort((a, b) => b.score - a.score)
763
+ .filter(c => c.score > 0)
764
+ .slice(0, 3)
765
+ .map(c => this.manifest.capabilities.find(cap => cap.id === c.capabilityId))
766
+ .filter(Boolean);
767
+ // Skip LLM if no candidates scored above zero — no meaningful top-3 to discriminate
768
+ if (top3Accurate.length === 0) {
769
+ matchResult = kwResultAccurate;
770
+ }
771
+ else {
772
+ const llmResult = await _matchWithLLM(query, top3Accurate, { llm: this.llm, app: this.manifest.app });
773
+ this.recordLLMSuccess();
774
+ resolvedVia = 'llm';
775
+ // If LLM says OOS but keyword had a match, the correct capability may have
776
+ // been rank 4+. Fall back to keyword result rather than returning OOS.
777
+ matchResult = llmResult.capability === null ? kwResultAccurate : {
778
+ ...llmResult,
779
+ candidates: llmResult.candidates.map(c => ({
780
+ ...c,
781
+ score: c.matched
782
+ ? c.score
783
+ : (kwResultAccurate.candidates.find(kc => kc.capabilityId === c.capabilityId)?.score ?? 0),
784
+ })),
785
+ };
786
+ }
615
787
  steps?.push({ type: 'llm_match', status: 'pass', durationMs: Date.now() - t, detail: `confidence: ${matchResult.confidence}%` });
616
788
  }
617
789
  catch (err) {
@@ -656,19 +828,32 @@ export class CapmanEngine {
656
828
  logger.debug(`Query escalated to LLM: "${query}"`);
657
829
  const t2 = Date.now();
658
830
  try {
659
- matchResult = await _matchWithLLM(query, this.manifest, { llm: this.llm });
660
- this.recordLLMSuccess();
661
- resolvedVia = 'llm';
662
- // keywordResult already computed above in balanced mode — merge scores
663
- matchResult = {
664
- ...matchResult,
665
- candidates: matchResult.candidates.map(c => ({
666
- ...c,
667
- score: c.matched
668
- ? c.score
669
- : (keywordResult.candidates.find(kc => kc.capabilityId === c.capabilityId)?.score ?? 0),
670
- })),
671
- };
831
+ const top3Balanced = keywordResult.candidates
832
+ .sort((a, b) => b.score - a.score)
833
+ .filter(c => c.score > 0)
834
+ .slice(0, 3)
835
+ .map(c => this.manifest.capabilities.find(cap => cap.id === c.capabilityId))
836
+ .filter(Boolean);
837
+ // Balanced mode only escalates when keyword confidence is low but > 0 —
838
+ // top3 should always be non-empty here, but guard anyway
839
+ if (top3Balanced.length === 0) {
840
+ matchResult = keywordResult;
841
+ }
842
+ else {
843
+ const llmResult = await _matchWithLLM(query, top3Balanced, { llm: this.llm, app: this.manifest.app });
844
+ this.recordLLMSuccess();
845
+ resolvedVia = 'llm';
846
+ // If LLM returns OOS but keyword had a scored candidate, fall back to keyword
847
+ matchResult = llmResult.capability === null ? keywordResult : {
848
+ ...llmResult,
849
+ candidates: llmResult.candidates.map(c => ({
850
+ ...c,
851
+ score: c.matched
852
+ ? c.score
853
+ : (keywordResult.candidates.find(kc => kc.capabilityId === c.capabilityId)?.score ?? 0),
854
+ })),
855
+ };
856
+ }
672
857
  steps?.push({ type: 'llm_match', status: 'pass', durationMs: Date.now() - t2, detail: `confidence: ${matchResult.confidence}%` });
673
858
  }
674
859
  catch (err) {
@@ -684,7 +869,11 @@ export class CapmanEngine {
684
869
  break;
685
870
  }
686
871
  }
687
- return { matchResult: matchResult, resolvedVia };
872
+ if (matchResult === undefined) {
873
+ const exhaustive = this.mode;
874
+ throw new Error(`_runMatch: unhandled MatchMode "${exhaustive}"`);
875
+ }
876
+ return { matchResult, resolvedVia };
688
877
  }
689
878
  /**
690
879
  * Applies learning boost to a MatchResult and returns the updated result.
@@ -755,7 +944,15 @@ export class CapmanEngine {
755
944
  const hits = wordIndex[candidate.capabilityId] ?? 0;
756
945
  if (hits > 0) {
757
946
  // Logarithmic boost — diminishing returns after first few hits
758
- boost += Math.min(5, Math.log2(hits + 1) * 2);
947
+ const rawBoost = Math.min(5, Math.log2(hits + 1) * 2);
948
+ // IDF weighting — common words ("get", "show", "user") appear in many
949
+ // capabilities and accumulate learning hits that carry little signal.
950
+ // Reuses BM25 df/N so no separate computation is needed.
951
+ const df = this.bm25Index.df[word] ?? 0;
952
+ const idf = df > 0
953
+ ? Math.log((this.bm25Index.N - df + 0.5) / (df + 0.5) + 1)
954
+ : 0;
955
+ boost += rawBoost * Math.min(1, idf);
759
956
  }
760
957
  }
761
958
  const cappedBoost = Math.min(15, Math.round(boost));
@@ -769,10 +966,26 @@ export class CapmanEngine {
769
966
  };
770
967
  });
771
968
  }
969
+ /**
970
+ * Resolves the effective baseUrl from manifest.servers[] or EngineOptions.baseUrl.
971
+ * Priority: environment-matched server > first server > explicit baseUrl > undefined
972
+ */
973
+ resolveBaseUrl() {
974
+ const servers = this.manifest.servers;
975
+ if (!servers?.length)
976
+ return this.baseUrl;
977
+ if (this.environment) {
978
+ const match = servers.find(s => s.environment === this.environment);
979
+ if (match)
980
+ return match.url.replace(/\/$/, '');
981
+ }
982
+ // Fallback to first server
983
+ return servers[0].url.replace(/\/$/, '');
984
+ }
772
985
  // ── Private helpers ────────────────────────────────────────────────────────
773
986
  resolveOptions(overrides = {}) {
774
987
  return {
775
- baseUrl: this.baseUrl,
988
+ baseUrl: this.resolveBaseUrl(),
776
989
  auth: this.auth,
777
990
  headers: this.headers,
778
991
  ...overrides,
@@ -792,16 +1005,7 @@ export class CapmanEngine {
792
1005
  });
793
1006
  }
794
1007
  calibrateBM25Ceiling() {
795
- let max = 0;
796
- for (const cap of this.manifest.capabilities) {
797
- if (!cap.examples?.length)
798
- continue;
799
- const selfWords = new Set(tokenize(cap.examples[0]));
800
- const raw = _scoreCapability(selfWords, cap, this.bm25Index, this.bm25K1, this.bm25B);
801
- if (raw > max)
802
- max = raw;
803
- }
804
- return max > 0 ? max : 100;
1008
+ return _calibrateCeiling(this.manifest.capabilities, this.bm25Index, this.bm25K1, this.bm25B);
805
1009
  }
806
1010
  /**
807
1011
  * Calibrates the adaptive margin threshold from the manifest's own score
@@ -814,6 +1018,10 @@ export class CapmanEngine {
814
1018
  * For manifests with ≤100 capabilities this is negligible (<10ms).
815
1019
  * For very large manifests (500+ capabilities), consider passing
816
1020
  * `adaptiveMarginOverride` to skip calibration.
1021
+ *
1022
+ * Note: constructor total cost also includes BM25 index build O(capabilities × tokens)
1023
+ * and embedding pre-encoding O(capabilities) if an EmbeddingProvider is configured.
1024
+ * For 100 capabilities with embeddings, expect ~100–500ms depending on provider latency.
817
1025
  */
818
1026
  calibrateAdaptiveMargin() {
819
1027
  if (this.manifest.capabilities.length < 2)
@@ -829,10 +1037,14 @@ export class CapmanEngine {
829
1037
  for (const cap of this.manifest.capabilities) {
830
1038
  if (!cap.examples?.length)
831
1039
  continue;
832
- const result = _match(cap.examples[0], this.manifest, fuzzyOpts);
833
- const sorted = [...result.candidates].sort((a, b) => b.score - a.score);
834
- if (sorted.length >= 2) {
835
- margins.push(sorted[0].score - sorted[1].score);
1040
+ // Use all examples and take the maximum margin — same rationale as
1041
+ // calibrateBM25Ceiling(): a weak first example skews the calibration.
1042
+ for (const example of cap.examples) {
1043
+ const result = _match(example, this.manifest, fuzzyOpts);
1044
+ const sorted = [...result.candidates].sort((a, b) => b.score - a.score);
1045
+ if (sorted.length >= 2) {
1046
+ margins.push(sorted[0].score - sorted[1].score);
1047
+ }
836
1048
  }
837
1049
  }
838
1050
  if (margins.length === 0)