capman 0.6.0 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CODEBASE.md +6 -5
- package/dist/cjs/cache.d.ts +9 -0
- package/dist/cjs/cache.d.ts.map +1 -1
- package/dist/cjs/cache.js +37 -7
- package/dist/cjs/cache.js.map +1 -1
- package/dist/cjs/concurrent.d.ts +53 -0
- package/dist/cjs/concurrent.d.ts.map +1 -0
- package/dist/cjs/concurrent.js +71 -0
- package/dist/cjs/concurrent.js.map +1 -0
- package/dist/cjs/engine.d.ts +92 -7
- package/dist/cjs/engine.d.ts.map +1 -1
- package/dist/cjs/engine.js +269 -57
- package/dist/cjs/engine.js.map +1 -1
- package/dist/cjs/generator.d.ts.map +1 -1
- package/dist/cjs/generator.js +28 -6
- package/dist/cjs/generator.js.map +1 -1
- package/dist/cjs/index.d.ts +3 -1
- package/dist/cjs/index.d.ts.map +1 -1
- package/dist/cjs/index.js +5 -1
- package/dist/cjs/index.js.map +1 -1
- package/dist/cjs/learning.d.ts +16 -1
- package/dist/cjs/learning.d.ts.map +1 -1
- package/dist/cjs/learning.js +95 -14
- package/dist/cjs/learning.js.map +1 -1
- package/dist/cjs/matcher.d.ts +51 -2
- package/dist/cjs/matcher.d.ts.map +1 -1
- package/dist/cjs/matcher.js +173 -33
- package/dist/cjs/matcher.js.map +1 -1
- package/dist/cjs/parser.js +27 -9
- package/dist/cjs/parser.js.map +1 -1
- package/dist/cjs/resolver.d.ts +2 -2
- package/dist/cjs/resolver.d.ts.map +1 -1
- package/dist/cjs/resolver.js +66 -26
- package/dist/cjs/resolver.js.map +1 -1
- package/dist/cjs/schema.d.ts +821 -68
- package/dist/cjs/schema.d.ts.map +1 -1
- package/dist/cjs/schema.js +62 -13
- package/dist/cjs/schema.js.map +1 -1
- package/dist/cjs/types.d.ts +156 -9
- package/dist/cjs/types.d.ts.map +1 -1
- package/dist/cjs/version.d.ts +1 -1
- package/dist/cjs/version.js +1 -1
- package/dist/esm/cache.d.ts +9 -0
- package/dist/esm/cache.js +37 -7
- package/dist/esm/concurrent.d.ts +52 -0
- package/dist/esm/concurrent.js +66 -0
- package/dist/esm/engine.d.ts +92 -7
- package/dist/esm/engine.js +270 -58
- package/dist/esm/generator.js +28 -6
- package/dist/esm/index.d.ts +3 -1
- package/dist/esm/index.js +2 -0
- package/dist/esm/learning.d.ts +16 -1
- package/dist/esm/learning.js +95 -14
- package/dist/esm/matcher.d.ts +51 -2
- package/dist/esm/matcher.js +170 -33
- package/dist/esm/parser.js +27 -9
- package/dist/esm/resolver.d.ts +2 -2
- package/dist/esm/resolver.js +66 -26
- package/dist/esm/schema.d.ts +821 -68
- package/dist/esm/schema.js +62 -13
- package/dist/esm/types.d.ts +156 -9
- package/dist/esm/version.d.ts +1 -1
- package/dist/esm/version.js +1 -1
- package/package.json +1 -1
package/dist/esm/engine.d.ts
CHANGED
|
@@ -3,17 +3,34 @@ import type { LLMMatcherOptions } from './matcher';
|
|
|
3
3
|
import type { ResolveOptions, AuthContext } from './resolver';
|
|
4
4
|
import type { CacheStore } from './cache';
|
|
5
5
|
import type { LearningStore } from './learning';
|
|
6
|
+
import type { EmbeddingProvider } from './types';
|
|
6
7
|
import type { MatchMode } from './types';
|
|
7
8
|
/**
|
|
8
9
|
* Options for constructing a CapmanEngine instance.
|
|
9
10
|
*
|
|
10
|
-
* ⚠️ CONCURRENCY: CapmanEngine is
|
|
11
|
-
* async request handlers
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
16
|
-
*
|
|
11
|
+
* ⚠️ CONCURRENCY: CapmanEngine is NOT safe for sharing a single instance
|
|
12
|
+
* across concurrent async request handlers in a server environment.
|
|
13
|
+
*
|
|
14
|
+
* Node.js is single-threaded — classical data races do not apply. What does
|
|
15
|
+
* apply is async interleaving: two ask() chains can interleave at await
|
|
16
|
+
* suspension points. The following hazards are real:
|
|
17
|
+
*
|
|
18
|
+
* - Calling loadManifest() while ask() calls are in-flight: mitigated by
|
|
19
|
+
* an optimistic manifestVersion guard — in-flight results skip the cache
|
|
20
|
+
* write rather than polluting it with stale data.
|
|
21
|
+
* - Sharing one instance across concurrent balanced/accurate LLM calls:
|
|
22
|
+
* rate limiter and circuit-breaker state can interleave.
|
|
23
|
+
*
|
|
24
|
+
* The following are NOT hazards (synchronous within the event loop):
|
|
25
|
+
* - MemoryCache Map mutations
|
|
26
|
+
* - LLM counter increments (llmCallsThisMinute++ is atomic in Node.js)
|
|
27
|
+
* - statsCounter updates
|
|
28
|
+
*
|
|
29
|
+
* Safe patterns:
|
|
30
|
+
* (a) One engine per request — safest, zero shared state
|
|
31
|
+
* (b) Single shared instance in cheap mode only (no LLM calls)
|
|
32
|
+
* (c) ConcurrentCapmanEngine wrapper (v0.8.0) — serialises ask() via
|
|
33
|
+
* a zero-dependency promise queue
|
|
17
34
|
*
|
|
18
35
|
* @example
|
|
19
36
|
* // Safe — per-request engine
|
|
@@ -87,6 +104,15 @@ export interface EngineOptions {
|
|
|
87
104
|
* @default 60000
|
|
88
105
|
*/
|
|
89
106
|
llmCircuitBreakerResetMs?: number;
|
|
107
|
+
/**
|
|
108
|
+
* Half-life in days for time-decayed learning weights.
|
|
109
|
+
* A learning entry that is exactly this many days old retains 50% of its
|
|
110
|
+
* original weight. Older entries fade faster; recent ones dominate.
|
|
111
|
+
* Only applies when the engine creates its own default MemoryLearningStore.
|
|
112
|
+
* If you pass a custom learning store, configure halfLifeDays on it directly.
|
|
113
|
+
* @default 30
|
|
114
|
+
*/
|
|
115
|
+
learningHalfLifeDays?: number;
|
|
90
116
|
/**
|
|
91
117
|
* Enable fuzzy matching using Fuse.js — catches paraphrases, typos,
|
|
92
118
|
* and morphological variants that exact keyword matching misses.
|
|
@@ -114,6 +140,44 @@ export interface EngineOptions {
|
|
|
114
140
|
* When undefined, calibrated automatically from manifest score distribution.
|
|
115
141
|
*/
|
|
116
142
|
adaptiveMarginOverride?: number;
|
|
143
|
+
/**
|
|
144
|
+
* Target environment for server selection from manifest.servers[].
|
|
145
|
+
* When manifest.servers is present and this matches a server's environment,
|
|
146
|
+
* that server's URL is used as baseUrl.
|
|
147
|
+
* Falls back to first server, then EngineOptions.baseUrl if no match.
|
|
148
|
+
*/
|
|
149
|
+
environment?: string;
|
|
150
|
+
/**
|
|
151
|
+
* Half-life for time-decayed learning in days.
|
|
152
|
+
* A learning signal that is halfLifeDays old contributes half its original weight.
|
|
153
|
+
* Only applies when using the engine's default MemoryLearningStore.
|
|
154
|
+
* For FileLearningStore, pass halfLifeDays directly to its constructor.
|
|
155
|
+
* @default 30
|
|
156
|
+
*/
|
|
157
|
+
halfLifeDays?: number;
|
|
158
|
+
/**
|
|
159
|
+
* Optional embedding provider for semantic similarity matching.
|
|
160
|
+
* When provided, capability texts are pre-encoded at construction time
|
|
161
|
+
* and query embeddings are computed on each ask() call. The embedding
|
|
162
|
+
* signal is fused with BM25 and fuzzy signals via RRF.
|
|
163
|
+
*
|
|
164
|
+
* Zero mandatory dependencies — bring your own provider:
|
|
165
|
+
*
|
|
166
|
+
* @example
|
|
167
|
+
* const engine = new CapmanEngine({
|
|
168
|
+
* manifest,
|
|
169
|
+
* embedding: {
|
|
170
|
+
* async encode(texts: string[]) {
|
|
171
|
+
* // call your embedding API here
|
|
172
|
+
* return texts.map(t => myEmbedModel.embed(t))
|
|
173
|
+
* }
|
|
174
|
+
* }
|
|
175
|
+
* })
|
|
176
|
+
*
|
|
177
|
+
* Note: embedding is purely additive — if encode() throws, the engine
|
|
178
|
+
* falls back to BM25 + fuzzy scoring without interrupting operation.
|
|
179
|
+
*/
|
|
180
|
+
embedding?: EmbeddingProvider;
|
|
117
181
|
}
|
|
118
182
|
export interface EngineResult {
|
|
119
183
|
match: MatchResult;
|
|
@@ -135,6 +199,7 @@ export declare class CapmanEngine {
|
|
|
135
199
|
/** Maximum allowed query length in characters. Queries exceeding this throw RangeError. */
|
|
136
200
|
static readonly MAX_QUERY_LENGTH = 1000;
|
|
137
201
|
private manifest;
|
|
202
|
+
private manifestVersion;
|
|
138
203
|
private mode;
|
|
139
204
|
private llm?;
|
|
140
205
|
private cache;
|
|
@@ -152,6 +217,11 @@ export declare class CapmanEngine {
|
|
|
152
217
|
private bm25B;
|
|
153
218
|
private marginAwareLLM;
|
|
154
219
|
private adaptiveMargin;
|
|
220
|
+
private environment?;
|
|
221
|
+
private embedding?;
|
|
222
|
+
private capEmbeddings?;
|
|
223
|
+
/** Resolves when the post-loadManifest re-encode completes. Awaited by buildEmbeddingScores(). */
|
|
224
|
+
private pendingEmbedding;
|
|
155
225
|
private maxLLMCallsPerMinute;
|
|
156
226
|
private llmCooldownMs;
|
|
157
227
|
private llmCircuitBreakerThreshold;
|
|
@@ -191,6 +261,12 @@ export declare class CapmanEngine {
|
|
|
191
261
|
*/
|
|
192
262
|
clearCache(): Promise<void>;
|
|
193
263
|
private checkManifestVersion;
|
|
264
|
+
private checkCapabilityLifecycle;
|
|
265
|
+
/** Cosine similarity between two equal-length vectors */
|
|
266
|
+
private cosineSim;
|
|
267
|
+
/** Encode query and return cosine similarity scores (0–100) keyed by capability ID */
|
|
268
|
+
private buildEmbeddingScores;
|
|
269
|
+
private checkMatchHint;
|
|
194
270
|
/**
|
|
195
271
|
* Replaces the active manifest without creating a new engine instance.
|
|
196
272
|
* Useful for hot-reloading manifests in long-running servers without
|
|
@@ -252,6 +328,11 @@ export declare class CapmanEngine {
|
|
|
252
328
|
* score boost — capped at +15 to avoid overriding strong keyword matches.
|
|
253
329
|
*/
|
|
254
330
|
private applyLearningBoost;
|
|
331
|
+
/**
|
|
332
|
+
* Resolves the effective baseUrl from manifest.servers[] or EngineOptions.baseUrl.
|
|
333
|
+
* Priority: environment-matched server > first server > explicit baseUrl > undefined
|
|
334
|
+
*/
|
|
335
|
+
private resolveBaseUrl;
|
|
255
336
|
private resolveOptions;
|
|
256
337
|
private recordLearning;
|
|
257
338
|
private calibrateBM25Ceiling;
|
|
@@ -266,6 +347,10 @@ export declare class CapmanEngine {
|
|
|
266
347
|
* For manifests with ≤100 capabilities this is negligible (<10ms).
|
|
267
348
|
* For very large manifests (500+ capabilities), consider passing
|
|
268
349
|
* `adaptiveMarginOverride` to skip calibration.
|
|
350
|
+
*
|
|
351
|
+
* Note: constructor total cost also includes BM25 index build O(capabilities × tokens)
|
|
352
|
+
* and embedding pre-encoding O(capabilities) if an EmbeddingProvider is configured.
|
|
353
|
+
* For 100 capabilities with embeddings, expect ~100–500ms depending on provider latency.
|
|
269
354
|
*/
|
|
270
355
|
private calibrateAdaptiveMargin;
|
|
271
356
|
private computeVerdict;
|
package/dist/esm/engine.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { match as _match, matchWithLLM as _matchWithLLM, resolverToIntent, extractParams, LLMParseError, tokenize, buildBM25Index,
|
|
1
|
+
import { match as _match, matchWithLLM as _matchWithLLM, resolverToIntent, extractParams, LLMParseError, tokenize, buildBM25Index, sanitizeForPrompt, calibrateCeiling as _calibrateCeiling } from './matcher';
|
|
2
2
|
import { resolve as _resolve, checkPrivacy } from './resolver';
|
|
3
3
|
import { MemoryLearningStore } from './learning';
|
|
4
4
|
import { logger } from './logger';
|
|
@@ -7,6 +7,9 @@ import { VERSION } from './version';
|
|
|
7
7
|
// ─── CapmanEngine ─────────────────────────────────────────────────────────────
|
|
8
8
|
export class CapmanEngine {
|
|
9
9
|
constructor(options) {
|
|
10
|
+
this.manifestVersion = 0;
|
|
11
|
+
/** Resolves when the post-loadManifest re-encode completes. Awaited by buildEmbeddingScores(). */
|
|
12
|
+
this.pendingEmbedding = null;
|
|
10
13
|
// ── LLM rate limiting state ────────────────────────────────────────────────
|
|
11
14
|
this.llmCallsThisMinute = 0;
|
|
12
15
|
this.llmWindowStart = Date.now();
|
|
@@ -17,6 +20,7 @@ export class CapmanEngine {
|
|
|
17
20
|
this.mode = options.mode ?? 'balanced';
|
|
18
21
|
this.llm = options.llm;
|
|
19
22
|
this.baseUrl = options.baseUrl;
|
|
23
|
+
this.environment = options.environment;
|
|
20
24
|
this.auth = options.auth;
|
|
21
25
|
this.headers = options.headers;
|
|
22
26
|
this.threshold = options.threshold ?? 50;
|
|
@@ -42,8 +46,20 @@ export class CapmanEngine {
|
|
|
42
46
|
// Use FileLearningStore explicitly for persistence across restarts
|
|
43
47
|
this.learning = options.learning === false
|
|
44
48
|
? null
|
|
45
|
-
: (options.learning ?? new MemoryLearningStore());
|
|
46
|
-
|
|
49
|
+
: (options.learning ?? new MemoryLearningStore(options.learningHalfLifeDays ?? 30));
|
|
50
|
+
this.embedding = options.embedding;
|
|
51
|
+
if (this.embedding) {
|
|
52
|
+
// Pre-encode all capability texts at construction time — one batch call.
|
|
53
|
+
// Concatenate name + description for richer semantic surface.
|
|
54
|
+
const texts = this.manifest.capabilities.map(c => `${c.name}: ${c.description}`);
|
|
55
|
+
this.embedding.encode(texts).then(vecs => {
|
|
56
|
+
this.capEmbeddings = vecs;
|
|
57
|
+
logger.info('Capability embeddings pre-encoded');
|
|
58
|
+
}).catch(err => {
|
|
59
|
+
logger.warn(`EmbeddingProvider pre-encode failed — embedding signal disabled: ${err instanceof Error ? err.message : String(err)}`);
|
|
60
|
+
});
|
|
61
|
+
}
|
|
62
|
+
logger.info(`CapmanEngine initialized — mode: ${this.mode}, cache: ${this.cache ? 'enabled' : 'disabled'}, learning: ${this.learning ? 'enabled' : 'disabled'}, embedding: ${this.embedding ? 'enabled' : 'disabled'}`);
|
|
47
63
|
// ── Manifest version compatibility check ─────────────────────────────────
|
|
48
64
|
this.checkManifestVersion(options.manifest);
|
|
49
65
|
}
|
|
@@ -67,6 +83,9 @@ export class CapmanEngine {
|
|
|
67
83
|
}
|
|
68
84
|
const start = Date.now();
|
|
69
85
|
const steps = [];
|
|
86
|
+
// Capture manifest version at entry — used to guard the cache write.
|
|
87
|
+
// If loadManifest() is called mid-flight, we skip writing stale results.
|
|
88
|
+
const manifestVersion = this.manifestVersion;
|
|
70
89
|
// ── Step 1: Check cache ──────────────────────────────────────────────────
|
|
71
90
|
const cacheStart = Date.now();
|
|
72
91
|
if (this.cache) {
|
|
@@ -124,6 +143,7 @@ export class CapmanEngine {
|
|
|
124
143
|
// ── Step 2.5: Apply learning boost ───────────────────────────────────────
|
|
125
144
|
matchResult = await this.applyBoostToMatchResult(query, matchResult, resolvedVia);
|
|
126
145
|
// ── Step 3: Privacy check ────────────────────────────────────────────────
|
|
146
|
+
let privacyFailed = false;
|
|
127
147
|
if (matchResult.capability) {
|
|
128
148
|
const privacyError = checkPrivacy(matchResult.capability, this.auth);
|
|
129
149
|
steps.push({
|
|
@@ -132,13 +152,23 @@ export class CapmanEngine {
|
|
|
132
152
|
durationMs: 0,
|
|
133
153
|
detail: privacyError ?? `level: ${matchResult.capability.privacy.level}`,
|
|
134
154
|
});
|
|
155
|
+
// Warn on deprecated or sunset capabilities — never silently fail
|
|
156
|
+
this.checkCapabilityLifecycle(matchResult.capability);
|
|
157
|
+
// Log when engine mode differs from capability's preferred mode
|
|
158
|
+
this.checkMatchHint(matchResult.capability);
|
|
159
|
+
// Short-circuit: if privacy fails, skip disambiguation to avoid burning an LLM
|
|
160
|
+
// call on a request that _resolve() will block anyway. privacyFailed propagates
|
|
161
|
+
// to Step 4a so the mode guard check is clean and explicit.
|
|
162
|
+
if (privacyError)
|
|
163
|
+
privacyFailed = true;
|
|
135
164
|
}
|
|
136
165
|
// ── Step 4a: Compute verdict + optional margin-aware LLM disambiguation ──
|
|
137
166
|
let { verdict, margin } = this.computeVerdict(matchResult);
|
|
138
167
|
if (verdict === 'marginal' &&
|
|
139
168
|
this.marginAwareLLM &&
|
|
140
169
|
this.llm &&
|
|
141
|
-
|
|
170
|
+
!privacyFailed &&
|
|
171
|
+
(this.mode === 'balanced' || this.mode === 'accurate')) {
|
|
142
172
|
matchResult = await this.disambiguateLLM(query, matchResult, steps);
|
|
143
173
|
// Recompute verdict after disambiguation
|
|
144
174
|
const recomputed = this.computeVerdict(matchResult);
|
|
@@ -161,11 +191,19 @@ export class CapmanEngine {
|
|
|
161
191
|
// queries that resolve to the same capability share a cache entry
|
|
162
192
|
if (this.cache && resolution.success && matchResult.capability
|
|
163
193
|
&& matchResult.capability.privacy.level === 'public') {
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
194
|
+
// Optimistic concurrency guard — skip cache write if manifest was swapped
|
|
195
|
+
// mid-flight. The result was computed against a now-stale manifest and
|
|
196
|
+
// must not pollute the cache for the new one.
|
|
197
|
+
if (this.manifestVersion === manifestVersion) {
|
|
198
|
+
const queryKey = normalizeQuery(query);
|
|
199
|
+
const capKey = buildCacheKey(query, matchResult.capability.id, matchResult.extractedParams);
|
|
200
|
+
await this.cache.set(queryKey, matchResult);
|
|
201
|
+
await this.cache.set(capKey, matchResult);
|
|
202
|
+
// capKey always starts with 'cap:' — structurally distinct from queryKey
|
|
203
|
+
}
|
|
204
|
+
else {
|
|
205
|
+
logger.warn('loadManifest() called mid-flight — skipping cache write for stale result');
|
|
206
|
+
}
|
|
169
207
|
}
|
|
170
208
|
// ── Step 5b: Compute missingParams ───────────────────────────────────────
|
|
171
209
|
// Spec: LLM attempts extraction first when available. missingParams is last resort.
|
|
@@ -205,8 +243,19 @@ export class CapmanEngine {
|
|
|
205
243
|
}
|
|
206
244
|
}
|
|
207
245
|
}
|
|
208
|
-
catch {
|
|
209
|
-
|
|
246
|
+
catch (err) {
|
|
247
|
+
const isParseError = err instanceof SyntaxError;
|
|
248
|
+
if (isParseError) {
|
|
249
|
+
// JSON parse failure: refund the rate-limit slot but don't open circuit breaker
|
|
250
|
+
// The llm is reachable - the response format was just bad
|
|
251
|
+
this.llmCallsThisMinute = Math.max(0, this.llmCallsThisMinute - 1);
|
|
252
|
+
}
|
|
253
|
+
else {
|
|
254
|
+
// Hard failure (timeout, network): refund slot and increment fail counter
|
|
255
|
+
this.recordLLMFailure();
|
|
256
|
+
}
|
|
257
|
+
logger.warn(`LLM param extraction failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
258
|
+
// fall through to missingParams below
|
|
210
259
|
}
|
|
211
260
|
}
|
|
212
261
|
}
|
|
@@ -292,6 +341,20 @@ export class CapmanEngine {
|
|
|
292
341
|
await this.cache.clear();
|
|
293
342
|
}
|
|
294
343
|
checkManifestVersion(manifest) {
|
|
344
|
+
// ── Schema version check ─────────────────────────────────────────────────
|
|
345
|
+
// schemaVersion tracks manifest format — "1" for v0.6+.
|
|
346
|
+
// Manifests without schemaVersion are pre-v0.6 — warn but allow.
|
|
347
|
+
const CURRENT_SCHEMA_VERSION = '1';
|
|
348
|
+
if (!manifest.schemaVersion) {
|
|
349
|
+
console.warn(`[capman] Manifest is missing schemaVersion — it was generated with capman < 0.6. ` +
|
|
350
|
+
`Regenerate with: npx capman generate`);
|
|
351
|
+
}
|
|
352
|
+
else if (manifest.schemaVersion !== CURRENT_SCHEMA_VERSION) {
|
|
353
|
+
console.warn(`[capman] Manifest schemaVersion "${manifest.schemaVersion}" differs from ` +
|
|
354
|
+
`engine's expected "${CURRENT_SCHEMA_VERSION}". ` +
|
|
355
|
+
`Regenerate with: npx capman generate`);
|
|
356
|
+
}
|
|
357
|
+
// ── Package version check ────────────────────────────────────────────────
|
|
295
358
|
if (!manifest.version)
|
|
296
359
|
return;
|
|
297
360
|
const SEMVER_RE = /^\d+\.\d+\.\d+$/;
|
|
@@ -299,8 +362,8 @@ export class CapmanEngine {
|
|
|
299
362
|
const [mMaj, mMin] = manifest.version.split('.').map(Number);
|
|
300
363
|
const [eMaj, eMin] = VERSION.split('.').map(Number);
|
|
301
364
|
if (mMaj !== eMaj || mMin !== eMin) {
|
|
302
|
-
console.warn(`[capman] Manifest
|
|
303
|
-
`
|
|
365
|
+
console.warn(`[capman] Manifest was generated with capman "${manifest.version}" ` +
|
|
366
|
+
`but engine is "${VERSION}". This is usually fine across patch versions. ` +
|
|
304
367
|
`If you experience unexpected matching issues, regenerate with: npx capman generate`);
|
|
305
368
|
}
|
|
306
369
|
}
|
|
@@ -309,6 +372,80 @@ export class CapmanEngine {
|
|
|
309
372
|
`to engine version "${VERSION}" — version strings are not valid semver.`);
|
|
310
373
|
}
|
|
311
374
|
}
|
|
375
|
+
checkCapabilityLifecycle(capability) {
|
|
376
|
+
const lc = capability.lifecycle;
|
|
377
|
+
if (!lc || lc.status === 'stable' || lc.status === 'beta' || lc.status === 'experimental') {
|
|
378
|
+
if (lc?.status === 'beta') {
|
|
379
|
+
logger.warn(`Capability "${capability.id}" is in beta — behavior may change`);
|
|
380
|
+
}
|
|
381
|
+
if (lc?.status === 'experimental') {
|
|
382
|
+
logger.warn(`Capability "${capability.id}" is experimental — use with caution`);
|
|
383
|
+
}
|
|
384
|
+
return;
|
|
385
|
+
}
|
|
386
|
+
if (lc.status === 'deprecated') {
|
|
387
|
+
const sunsetPassed = lc.sunsetAt && new Date(lc.sunsetAt) < new Date();
|
|
388
|
+
if (sunsetPassed) {
|
|
389
|
+
// Sunset date has passed — strongest warning
|
|
390
|
+
console.warn(`[capman] ⚠️ Capability "${capability.id}" passed its sunset date (${lc.sunsetAt}). ` +
|
|
391
|
+
`It may be removed in a future version.` +
|
|
392
|
+
(lc.successor ? ` Use "${lc.successor}" instead.` : '') +
|
|
393
|
+
(lc.note ? ` Note: ${lc.note}` : ''));
|
|
394
|
+
}
|
|
395
|
+
else {
|
|
396
|
+
logger.warn(`Capability "${capability.id}" is deprecated.` +
|
|
397
|
+
(lc.sunsetAt ? ` Sunset: ${lc.sunsetAt}.` : '') +
|
|
398
|
+
(lc.successor ? ` Use "${lc.successor}" instead.` : '') +
|
|
399
|
+
(lc.note ? ` Note: ${lc.note}` : ''));
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
/** Cosine similarity between two equal-length vectors */
|
|
404
|
+
cosineSim(a, b) {
|
|
405
|
+
if (a.length !== b.length || a.length === 0) {
|
|
406
|
+
logger.warn(`cosineSim: dimension mismatch (${a.length} vs ${b.length}) — returning 0`);
|
|
407
|
+
return 0;
|
|
408
|
+
}
|
|
409
|
+
let dot = 0, normA = 0, normB = 0;
|
|
410
|
+
for (let i = 0; i < a.length; i++) {
|
|
411
|
+
dot += a[i] * b[i];
|
|
412
|
+
normA += a[i] * a[i];
|
|
413
|
+
normB += b[i] * b[i];
|
|
414
|
+
}
|
|
415
|
+
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
416
|
+
return denom === 0 ? 0 : dot / denom;
|
|
417
|
+
}
|
|
418
|
+
/** Encode query and return cosine similarity scores (0–100) keyed by capability ID */
|
|
419
|
+
async buildEmbeddingScores(query) {
|
|
420
|
+
if (!this.embedding || !this.capEmbeddings)
|
|
421
|
+
return undefined;
|
|
422
|
+
// Wait for any in-flight re-encode from loadManifest() to finish.
|
|
423
|
+
// Without this, the first ask() after loadManifest returns uses stale embeddings.
|
|
424
|
+
if (this.pendingEmbedding)
|
|
425
|
+
await this.pendingEmbedding;
|
|
426
|
+
try {
|
|
427
|
+
const [queryVec] = await this.embedding.encode([query]);
|
|
428
|
+
const scores = new Map();
|
|
429
|
+
this.manifest.capabilities.forEach((cap, i) => {
|
|
430
|
+
const sim = this.cosineSim(queryVec, this.capEmbeddings[i]);
|
|
431
|
+
// Cosine sim is -1..1; map to 0–100, negatives floored to 0
|
|
432
|
+
scores.set(cap.id, Math.max(0, Math.round(sim * 100)));
|
|
433
|
+
});
|
|
434
|
+
return scores;
|
|
435
|
+
}
|
|
436
|
+
catch (err) {
|
|
437
|
+
logger.warn(`Embedding encode failed — skipping embedding signal: ${err instanceof Error ? err.message : String(err)}`);
|
|
438
|
+
return undefined;
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
checkMatchHint(capability) {
|
|
442
|
+
const hint = capability.matchHint?.preferredMode;
|
|
443
|
+
if (!hint || hint === this.mode)
|
|
444
|
+
return;
|
|
445
|
+
// Advisory only — log but never enforce
|
|
446
|
+
logger.warn(`Capability "${capability.id}" prefers mode "${hint}" but engine is in "${this.mode}" mode. ` +
|
|
447
|
+
`Set mode: '${hint}' in EngineOptions to honor this hint.`);
|
|
448
|
+
}
|
|
312
449
|
/**
|
|
313
450
|
* Replaces the active manifest without creating a new engine instance.
|
|
314
451
|
* Useful for hot-reloading manifests in long-running servers without
|
|
@@ -323,11 +460,31 @@ export class CapmanEngine {
|
|
|
323
460
|
*/
|
|
324
461
|
async loadManifest(manifest) {
|
|
325
462
|
this.checkManifestVersion(manifest);
|
|
463
|
+
// Assign all derived state atomically before any await — an in-flight ask()
|
|
464
|
+
// must never see a new manifest paired with a stale bm25Index or ceiling.
|
|
326
465
|
this.manifest = manifest;
|
|
327
466
|
this.bm25Index = buildBM25Index(manifest.capabilities);
|
|
328
467
|
this.bm25Ceiling = this.calibrateBM25Ceiling();
|
|
329
468
|
this.adaptiveMargin = this.calibrateAdaptiveMargin();
|
|
469
|
+
this.manifestVersion++;
|
|
470
|
+
// server selection updates automatically after loadManifest()
|
|
330
471
|
await this.clearCache();
|
|
472
|
+
// Re-encode capabilities after manifest swap — stale embeddings misalign with new capabilities
|
|
473
|
+
if (this.embedding) {
|
|
474
|
+
const texts = manifest.capabilities.map(c => `${c.name}: ${c.description}`);
|
|
475
|
+
this.pendingEmbedding = this.embedding.encode(texts).then(vecs => {
|
|
476
|
+
this.capEmbeddings = vecs;
|
|
477
|
+
this.pendingEmbedding = null;
|
|
478
|
+
logger.info('Capability embeddings re-encoded after manifest reload');
|
|
479
|
+
}).catch(err => {
|
|
480
|
+
this.capEmbeddings = undefined;
|
|
481
|
+
this.pendingEmbedding = null;
|
|
482
|
+
logger.warn(`EmbeddingProvider re-encode failed after loadManifest: ${err instanceof Error ? err.message : String(err)}`);
|
|
483
|
+
});
|
|
484
|
+
}
|
|
485
|
+
else {
|
|
486
|
+
this.pendingEmbedding = null;
|
|
487
|
+
}
|
|
331
488
|
}
|
|
332
489
|
/**
|
|
333
490
|
* Explain what would happen for a query — without executing it.
|
|
@@ -569,13 +726,15 @@ export class CapmanEngine {
|
|
|
569
726
|
let matchResult;
|
|
570
727
|
let resolvedVia = 'keyword';
|
|
571
728
|
// Fuzzy options — never applied in cheap mode
|
|
729
|
+
const embeddingScores = await this.buildEmbeddingScores(query);
|
|
572
730
|
const fuzzyOpts = {
|
|
573
731
|
fuzzyMatch: this.fuzzyMatch,
|
|
574
732
|
fuzzyThreshold: this.fuzzyThreshold,
|
|
575
733
|
bm25Index: this.bm25Index,
|
|
576
|
-
bm25Ceiling: this.bm25Ceiling,
|
|
577
734
|
bm25K1: this.bm25K1,
|
|
578
735
|
bm25B: this.bm25B,
|
|
736
|
+
bm25Ceiling: this.bm25Ceiling,
|
|
737
|
+
embeddingScores,
|
|
579
738
|
};
|
|
580
739
|
switch (this.mode) {
|
|
581
740
|
case 'cheap': {
|
|
@@ -598,20 +757,33 @@ export class CapmanEngine {
|
|
|
598
757
|
else {
|
|
599
758
|
const t = Date.now();
|
|
600
759
|
try {
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
})
|
|
614
|
-
|
|
760
|
+
const kwResultAccurate = _match(query, this.manifest, fuzzyOpts);
|
|
761
|
+
const top3Accurate = kwResultAccurate.candidates
|
|
762
|
+
.sort((a, b) => b.score - a.score)
|
|
763
|
+
.filter(c => c.score > 0)
|
|
764
|
+
.slice(0, 3)
|
|
765
|
+
.map(c => this.manifest.capabilities.find(cap => cap.id === c.capabilityId))
|
|
766
|
+
.filter(Boolean);
|
|
767
|
+
// Skip LLM if no candidates scored above zero — no meaningful top-3 to discriminate
|
|
768
|
+
if (top3Accurate.length === 0) {
|
|
769
|
+
matchResult = kwResultAccurate;
|
|
770
|
+
}
|
|
771
|
+
else {
|
|
772
|
+
const llmResult = await _matchWithLLM(query, top3Accurate, { llm: this.llm, app: this.manifest.app });
|
|
773
|
+
this.recordLLMSuccess();
|
|
774
|
+
resolvedVia = 'llm';
|
|
775
|
+
// If LLM says OOS but keyword had a match, the correct capability may have
|
|
776
|
+
// been rank 4+. Fall back to keyword result rather than returning OOS.
|
|
777
|
+
matchResult = llmResult.capability === null ? kwResultAccurate : {
|
|
778
|
+
...llmResult,
|
|
779
|
+
candidates: llmResult.candidates.map(c => ({
|
|
780
|
+
...c,
|
|
781
|
+
score: c.matched
|
|
782
|
+
? c.score
|
|
783
|
+
: (kwResultAccurate.candidates.find(kc => kc.capabilityId === c.capabilityId)?.score ?? 0),
|
|
784
|
+
})),
|
|
785
|
+
};
|
|
786
|
+
}
|
|
615
787
|
steps?.push({ type: 'llm_match', status: 'pass', durationMs: Date.now() - t, detail: `confidence: ${matchResult.confidence}%` });
|
|
616
788
|
}
|
|
617
789
|
catch (err) {
|
|
@@ -656,19 +828,32 @@ export class CapmanEngine {
|
|
|
656
828
|
logger.debug(`Query escalated to LLM: "${query}"`);
|
|
657
829
|
const t2 = Date.now();
|
|
658
830
|
try {
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
831
|
+
const top3Balanced = keywordResult.candidates
|
|
832
|
+
.sort((a, b) => b.score - a.score)
|
|
833
|
+
.filter(c => c.score > 0)
|
|
834
|
+
.slice(0, 3)
|
|
835
|
+
.map(c => this.manifest.capabilities.find(cap => cap.id === c.capabilityId))
|
|
836
|
+
.filter(Boolean);
|
|
837
|
+
// Balanced mode only escalates when keyword confidence is low but > 0 —
|
|
838
|
+
// top3 should always be non-empty here, but guard anyway
|
|
839
|
+
if (top3Balanced.length === 0) {
|
|
840
|
+
matchResult = keywordResult;
|
|
841
|
+
}
|
|
842
|
+
else {
|
|
843
|
+
const llmResult = await _matchWithLLM(query, top3Balanced, { llm: this.llm, app: this.manifest.app });
|
|
844
|
+
this.recordLLMSuccess();
|
|
845
|
+
resolvedVia = 'llm';
|
|
846
|
+
// If LLM returns OOS but keyword had a scored candidate, fall back to keyword
|
|
847
|
+
matchResult = llmResult.capability === null ? keywordResult : {
|
|
848
|
+
...llmResult,
|
|
849
|
+
candidates: llmResult.candidates.map(c => ({
|
|
850
|
+
...c,
|
|
851
|
+
score: c.matched
|
|
852
|
+
? c.score
|
|
853
|
+
: (keywordResult.candidates.find(kc => kc.capabilityId === c.capabilityId)?.score ?? 0),
|
|
854
|
+
})),
|
|
855
|
+
};
|
|
856
|
+
}
|
|
672
857
|
steps?.push({ type: 'llm_match', status: 'pass', durationMs: Date.now() - t2, detail: `confidence: ${matchResult.confidence}%` });
|
|
673
858
|
}
|
|
674
859
|
catch (err) {
|
|
@@ -684,7 +869,11 @@ export class CapmanEngine {
|
|
|
684
869
|
break;
|
|
685
870
|
}
|
|
686
871
|
}
|
|
687
|
-
|
|
872
|
+
if (matchResult === undefined) {
|
|
873
|
+
const exhaustive = this.mode;
|
|
874
|
+
throw new Error(`_runMatch: unhandled MatchMode "${exhaustive}"`);
|
|
875
|
+
}
|
|
876
|
+
return { matchResult, resolvedVia };
|
|
688
877
|
}
|
|
689
878
|
/**
|
|
690
879
|
* Applies learning boost to a MatchResult and returns the updated result.
|
|
@@ -755,7 +944,15 @@ export class CapmanEngine {
|
|
|
755
944
|
const hits = wordIndex[candidate.capabilityId] ?? 0;
|
|
756
945
|
if (hits > 0) {
|
|
757
946
|
// Logarithmic boost — diminishing returns after first few hits
|
|
758
|
-
|
|
947
|
+
const rawBoost = Math.min(5, Math.log2(hits + 1) * 2);
|
|
948
|
+
// IDF weighting — common words ("get", "show", "user") appear in many
|
|
949
|
+
// capabilities and accumulate learning hits that carry little signal.
|
|
950
|
+
// Reuses BM25 df/N so no separate computation is needed.
|
|
951
|
+
const df = this.bm25Index.df[word] ?? 0;
|
|
952
|
+
const idf = df > 0
|
|
953
|
+
? Math.log((this.bm25Index.N - df + 0.5) / (df + 0.5) + 1)
|
|
954
|
+
: 0;
|
|
955
|
+
boost += rawBoost * Math.min(1, idf);
|
|
759
956
|
}
|
|
760
957
|
}
|
|
761
958
|
const cappedBoost = Math.min(15, Math.round(boost));
|
|
@@ -769,10 +966,26 @@ export class CapmanEngine {
|
|
|
769
966
|
};
|
|
770
967
|
});
|
|
771
968
|
}
|
|
969
|
+
/**
|
|
970
|
+
* Resolves the effective baseUrl from manifest.servers[] or EngineOptions.baseUrl.
|
|
971
|
+
* Priority: environment-matched server > first server > explicit baseUrl > undefined
|
|
972
|
+
*/
|
|
973
|
+
resolveBaseUrl() {
|
|
974
|
+
const servers = this.manifest.servers;
|
|
975
|
+
if (!servers?.length)
|
|
976
|
+
return this.baseUrl;
|
|
977
|
+
if (this.environment) {
|
|
978
|
+
const match = servers.find(s => s.environment === this.environment);
|
|
979
|
+
if (match)
|
|
980
|
+
return match.url.replace(/\/$/, '');
|
|
981
|
+
}
|
|
982
|
+
// Fallback to first server
|
|
983
|
+
return servers[0].url.replace(/\/$/, '');
|
|
984
|
+
}
|
|
772
985
|
// ── Private helpers ────────────────────────────────────────────────────────
|
|
773
986
|
resolveOptions(overrides = {}) {
|
|
774
987
|
return {
|
|
775
|
-
baseUrl: this.
|
|
988
|
+
baseUrl: this.resolveBaseUrl(),
|
|
776
989
|
auth: this.auth,
|
|
777
990
|
headers: this.headers,
|
|
778
991
|
...overrides,
|
|
@@ -792,16 +1005,7 @@ export class CapmanEngine {
|
|
|
792
1005
|
});
|
|
793
1006
|
}
|
|
794
1007
|
calibrateBM25Ceiling() {
|
|
795
|
-
|
|
796
|
-
for (const cap of this.manifest.capabilities) {
|
|
797
|
-
if (!cap.examples?.length)
|
|
798
|
-
continue;
|
|
799
|
-
const selfWords = new Set(tokenize(cap.examples[0]));
|
|
800
|
-
const raw = _scoreCapability(selfWords, cap, this.bm25Index, this.bm25K1, this.bm25B);
|
|
801
|
-
if (raw > max)
|
|
802
|
-
max = raw;
|
|
803
|
-
}
|
|
804
|
-
return max > 0 ? max : 100;
|
|
1008
|
+
return _calibrateCeiling(this.manifest.capabilities, this.bm25Index, this.bm25K1, this.bm25B);
|
|
805
1009
|
}
|
|
806
1010
|
/**
|
|
807
1011
|
* Calibrates the adaptive margin threshold from the manifest's own score
|
|
@@ -814,6 +1018,10 @@ export class CapmanEngine {
|
|
|
814
1018
|
* For manifests with ≤100 capabilities this is negligible (<10ms).
|
|
815
1019
|
* For very large manifests (500+ capabilities), consider passing
|
|
816
1020
|
* `adaptiveMarginOverride` to skip calibration.
|
|
1021
|
+
*
|
|
1022
|
+
* Note: constructor total cost also includes BM25 index build O(capabilities × tokens)
|
|
1023
|
+
* and embedding pre-encoding O(capabilities) if an EmbeddingProvider is configured.
|
|
1024
|
+
* For 100 capabilities with embeddings, expect ~100–500ms depending on provider latency.
|
|
817
1025
|
*/
|
|
818
1026
|
calibrateAdaptiveMargin() {
|
|
819
1027
|
if (this.manifest.capabilities.length < 2)
|
|
@@ -829,10 +1037,14 @@ export class CapmanEngine {
|
|
|
829
1037
|
for (const cap of this.manifest.capabilities) {
|
|
830
1038
|
if (!cap.examples?.length)
|
|
831
1039
|
continue;
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
1040
|
+
// Use all examples and take the maximum margin — same rationale as
|
|
1041
|
+
// calibrateBM25Ceiling(): a weak first example skews the calibration.
|
|
1042
|
+
for (const example of cap.examples) {
|
|
1043
|
+
const result = _match(example, this.manifest, fuzzyOpts);
|
|
1044
|
+
const sorted = [...result.candidates].sort((a, b) => b.score - a.score);
|
|
1045
|
+
if (sorted.length >= 2) {
|
|
1046
|
+
margins.push(sorted[0].score - sorted[1].score);
|
|
1047
|
+
}
|
|
836
1048
|
}
|
|
837
1049
|
}
|
|
838
1050
|
if (margins.length === 0)
|