capman 0.5.5 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import { match as _match, matchWithLLM as _matchWithLLM, resolverToIntent, extractParams, STOPWORDS, LLMParseError } from './matcher';
1
+ import { match as _match, matchWithLLM as _matchWithLLM, resolverToIntent, extractParams, LLMParseError, tokenize, buildBM25Index, scoreCapability as _scoreCapability, sanitizeForPrompt } from './matcher';
2
2
  import { resolve as _resolve, checkPrivacy } from './resolver';
3
3
  import { MemoryLearningStore } from './learning';
4
4
  import { logger } from './logger';
@@ -27,6 +27,12 @@ export class CapmanEngine {
27
27
  this.llmCircuitBreakerResetMs = options.llmCircuitBreakerResetMs ?? 60_000;
28
28
  this.fuzzyMatch = options.fuzzyMatch ?? false;
29
29
  this.fuzzyThreshold = options.fuzzyThreshold ?? 0.4;
30
+ this.bm25K1 = options.bm25K1 ?? 1.5;
31
+ this.bm25B = options.bm25B ?? 0.75;
32
+ this.bm25Index = buildBM25Index(options.manifest.capabilities);
33
+ this.bm25Ceiling = this.calibrateBM25Ceiling();
34
+ this.marginAwareLLM = options.marginAwareLLM ?? false;
35
+ this.adaptiveMargin = options.adaptiveMarginOverride ?? this.calibrateAdaptiveMargin();
30
36
  // Cache — default MemoryCache (no filesystem writes), or disabled with false
31
37
  // Use FileCache or ComboCache explicitly for persistence across restarts
32
38
  this.cache = options.cache === false
@@ -90,12 +96,16 @@ export class CapmanEngine {
90
96
  resolvedVia: 'cache',
91
97
  totalMs: Date.now() - start,
92
98
  };
99
+ const { verdict: cacheVerdict, margin: cacheMargin } = this.computeVerdict(matchWithFreshParams);
93
100
  const result = {
94
101
  match: matchWithFreshParams,
95
102
  resolution,
96
103
  resolvedVia: 'cache',
97
104
  durationMs: Date.now() - start,
98
105
  trace,
106
+ verdict: cacheVerdict,
107
+ margin: cacheMargin,
108
+ missingParams: undefined
99
109
  };
100
110
  await this.recordLearning(query, matchWithFreshParams, 'cache');
101
111
  return result;
@@ -123,7 +133,19 @@ export class CapmanEngine {
123
133
  detail: privacyError ?? `level: ${matchResult.capability.privacy.level}`,
124
134
  });
125
135
  }
126
- // ── Step 4: Resolve ──────────────────────────────────────────────────────
136
+ // ── Step 4a: Compute verdict + optional margin-aware LLM disambiguation ──
137
+ let { verdict, margin } = this.computeVerdict(matchResult);
138
+ if (verdict === 'marginal' &&
139
+ this.marginAwareLLM &&
140
+ this.llm &&
141
+ this.mode === 'balanced') {
142
+ matchResult = await this.disambiguateLLM(query, matchResult, steps);
143
+ // Recompute verdict after disambiguation
144
+ const recomputed = this.computeVerdict(matchResult);
145
+ verdict = recomputed.verdict;
146
+ margin = recomputed.margin;
147
+ }
148
+ // ── Step 4b: Resolve ──────────────────────────────────────────────────────
127
149
  const resolveStart = Date.now();
128
150
  const resolution = await _resolve(matchResult, matchResult.extractedParams, this.resolveOptions(overrides));
129
151
  steps.push({
@@ -145,6 +167,57 @@ export class CapmanEngine {
145
167
  await this.cache.set(capKey, matchResult);
146
168
  // capKey always starts with 'cap:' — structurally distinct from queryKey
147
169
  }
170
+ // ── Step 5b: Compute missingParams ───────────────────────────────────────
171
+ // Spec: LLM attempts extraction first when available. missingParams is last resort.
172
+ let missingParams;
173
+ if (matchResult.capability && resolvedVia !== 'llm') {
174
+ const cap = matchResult.capability;
175
+ const unresolved = cap.params.filter(p => p.source === 'user_query' && p.required
176
+ && matchResult.extractedParams[p.name] === null);
177
+ if (unresolved.length > 0 && this.llm && this.mode !== 'cheap') {
178
+ // LLM available — attempt targeted param extraction before declaring incomplete
179
+ const skipReason = this.checkLLMAllowed();
180
+ if (!skipReason) {
181
+ try {
182
+ const paramExtractionStart = Date.now();
183
+ const paramDescriptions = unresolved
184
+ .map(p => `- ${p.name}: ${p.description}`)
185
+ .join('\n');
186
+ const paramPrompt = `Extract the following parameters from this user query.\n` +
187
+ `Query: ${JSON.stringify({ user_query: query })}\n\n` +
188
+ `Parameters to extract:\n${paramDescriptions}\n\n` +
189
+ `Respond ONLY with valid JSON: { "params": { "<name>": "<value or null>" } }`;
190
+ const raw = await this.llm(paramPrompt);
191
+ const clean = raw.replace(/```json|```/g, '').trim();
192
+ const parsed = JSON.parse(clean);
193
+ this.recordLLMSuccess();
194
+ steps.push({
195
+ type: 'llm_match',
196
+ status: 'pass',
197
+ durationMs: Date.now() - paramExtractionStart,
198
+ detail: `param extraction: ${unresolved.map(p => p.name).join(', ')}`,
199
+ });
200
+ // Merge LLM-extracted values — validate type before accepting
201
+ for (const p of unresolved) {
202
+ const val = parsed?.params?.[p.name];
203
+ if (val && typeof val === 'string' && val.trim().length > 0) {
204
+ matchResult.extractedParams[p.name] = val.trim();
205
+ }
206
+ }
207
+ }
208
+ catch {
209
+ // LLM param extraction failed — fall through to missingParams below
210
+ }
211
+ }
212
+ }
213
+ // After LLM attempt (or if skipped/unavailable), report what's still missing
214
+ const stillMissing = cap.params
215
+ .filter(p => p.source === 'user_query' && p.required
216
+ && matchResult.extractedParams[p.name] === null)
217
+ .map(p => p.name);
218
+ if (stillMissing.length > 0)
219
+ missingParams = stillMissing;
220
+ }
148
221
  // ── Step 6: Build reasoning array ────────────────────────────────────────
149
222
  const reasoning = [];
150
223
  if (matchResult.candidates.length) {
@@ -189,6 +262,9 @@ export class CapmanEngine {
189
262
  resolvedVia,
190
263
  durationMs: Date.now() - start,
191
264
  trace,
265
+ verdict,
266
+ margin,
267
+ missingParams,
192
268
  };
193
269
  }
194
270
  /**
@@ -248,11 +324,10 @@ export class CapmanEngine {
248
324
  async loadManifest(manifest) {
249
325
  this.checkManifestVersion(manifest);
250
326
  this.manifest = manifest;
327
+ this.bm25Index = buildBM25Index(manifest.capabilities);
328
+ this.bm25Ceiling = this.calibrateBM25Ceiling();
329
+ this.adaptiveMargin = this.calibrateAdaptiveMargin();
251
330
  await this.clearCache();
252
- // Note: LLM rate limiter state (llmCallsThisMinute, llmConsecutiveFails,
253
- // llmCircuitOpenAt) is intentionally preserved across manifest reloads.
254
- // The LLM provider has not changed, so circuit breaker state remains valid.
255
- // If you need a clean rate limiter state, create a new CapmanEngine instance.
256
331
  }
257
332
  /**
258
333
  * Explain what would happen for a query — without executing it.
@@ -291,7 +366,8 @@ export class CapmanEngine {
291
366
  // ── Apply learning boost (same as ask()) ─────────────────────────────────
292
367
  matchResult = await this.applyBoostToMatchResult(query, matchResult, resolvedVia);
293
368
  // ── Build candidate explanations ─────────────────────────────────────────
294
- const qWordSet = new Set(query.toLowerCase().split(/\W+/).filter(Boolean));
369
+ const qTokens = tokenize(query);
370
+ const qWordSet = new Set(qTokens);
295
371
  const candidates = matchResult.candidates
296
372
  .sort((a, b) => b.score - a.score)
297
373
  .map(c => {
@@ -305,8 +381,8 @@ export class CapmanEngine {
305
381
  }
306
382
  else if (c.score >= 50) {
307
383
  const matchedWords = (cap?.examples ?? [])
308
- .flatMap(e => e.toLowerCase().split(/\s+/))
309
- .filter(w => qWordSet.has(w) && w.length > 2);
384
+ .flatMap(e => tokenize(e))
385
+ .filter(w => qWordSet.has(w));
310
386
  const unique = [...new Set(matchedWords)].slice(0, 3);
311
387
  explanation = unique.length
312
388
  ? `Matched keywords: ${unique.join(', ')} (${c.score}%)`
@@ -496,6 +572,10 @@ export class CapmanEngine {
496
572
  const fuzzyOpts = {
497
573
  fuzzyMatch: this.fuzzyMatch,
498
574
  fuzzyThreshold: this.fuzzyThreshold,
575
+ bm25Index: this.bm25Index,
576
+ bm25Ceiling: this.bm25Ceiling,
577
+ bm25K1: this.bm25K1,
578
+ bm25B: this.bm25B,
499
579
  };
500
580
  switch (this.mode) {
501
581
  case 'cheap': {
@@ -663,7 +743,7 @@ export class CapmanEngine {
663
743
  const stats = await this.learning.getStats();
664
744
  if (!stats || Object.keys(stats.index).length === 0)
665
745
  return candidates;
666
- const qWords = query.toLowerCase().split(/\W+/).filter(w => w.length > 2 && !STOPWORDS.has(w));
746
+ const qWords = tokenize(query);
667
747
  if (qWords.length === 0)
668
748
  return candidates;
669
749
  return candidates.map(candidate => {
@@ -711,6 +791,136 @@ export class CapmanEngine {
711
791
  timestamp: new Date().toISOString(),
712
792
  });
713
793
  }
794
+ calibrateBM25Ceiling() {
795
+ let max = 0;
796
+ for (const cap of this.manifest.capabilities) {
797
+ if (!cap.examples?.length)
798
+ continue;
799
+ const selfWords = new Set(tokenize(cap.examples[0]));
800
+ const raw = _scoreCapability(selfWords, cap, this.bm25Index, this.bm25K1, this.bm25B);
801
+ if (raw > max)
802
+ max = raw;
803
+ }
804
+ return max > 0 ? max : 100;
805
+ }
806
+ /**
807
+ * Calibrates the adaptive margin threshold from the manifest's own score
808
+ * distribution. Runs each capability's first example against all other
809
+ * capabilities to find the typical inter-capability score spread.
810
+ * Dense overlapping vocabulary → lower margin (harder to separate).
811
+ * Sparse vocabulary → higher margin (easier to separate).
812
+ *
813
+ * Complexity: O(capabilities²) — runs at constructor time and on loadManifest().
814
+ * For manifests with ≤100 capabilities this is negligible (<10ms).
815
+ * For very large manifests (500+ capabilities), consider passing
816
+ * `adaptiveMarginOverride` to skip calibration.
817
+ */
818
+ calibrateAdaptiveMargin() {
819
+ if (this.manifest.capabilities.length < 2)
820
+ return 20;
821
+ const margins = [];
822
+ const fuzzyOpts = {
823
+ fuzzyMatch: false, // calibration uses keyword only — deterministic
824
+ bm25Index: this.bm25Index,
825
+ bm25Ceiling: this.bm25Ceiling,
826
+ bm25K1: this.bm25K1,
827
+ bm25B: this.bm25B,
828
+ };
829
+ for (const cap of this.manifest.capabilities) {
830
+ if (!cap.examples?.length)
831
+ continue;
832
+ const result = _match(cap.examples[0], this.manifest, fuzzyOpts);
833
+ const sorted = [...result.candidates].sort((a, b) => b.score - a.score);
834
+ if (sorted.length >= 2) {
835
+ margins.push(sorted[0].score - sorted[1].score);
836
+ }
837
+ }
838
+ if (margins.length === 0)
839
+ return 20;
840
+ // Use 25th percentile of margins as the threshold — manifests where
841
+ // capabilities are naturally close together get a tighter threshold
842
+ margins.sort((a, b) => a - b);
843
+ const p25 = margins[Math.floor(margins.length * 0.25)];
844
+ return Math.max(10, Math.min(30, Math.round(p25 * 0.6)));
845
+ }
846
+ computeVerdict(matchResult) {
847
+ if (!matchResult.capability)
848
+ return { verdict: 'uncertain', margin: 0 };
849
+ const sorted = [...matchResult.candidates].sort((a, b) => b.score - a.score);
850
+ const best = sorted[0]?.score ?? 0;
851
+ const second = sorted[1]?.score ?? 0;
852
+ const margin = best - second;
853
+ if (best < 60)
854
+ return { verdict: 'uncertain', margin };
855
+ if (margin < this.adaptiveMargin)
856
+ return { verdict: 'marginal', margin };
857
+ return { verdict: 'clear', margin };
858
+ }
859
+ /**
860
+ * Targeted disambiguation between top-2 candidates.
861
+ * Sends ~200 tokens instead of full manifest (~4000 tokens) — 93% cost reduction.
862
+ * Returns updated matchResult with LLM-preferred winner, or original on failure.
863
+ */
864
+ async disambiguateLLM(query, matchResult, steps) {
865
+ if (!this.llm)
866
+ return matchResult;
867
+ const sorted = [...matchResult.candidates]
868
+ .sort((a, b) => b.score - a.score)
869
+ .slice(0, 2);
870
+ if (sorted.length < 2)
871
+ return matchResult;
872
+ const capA = this.manifest.capabilities.find(c => c.id === sorted[0].capabilityId);
873
+ const capB = this.manifest.capabilities.find(c => c.id === sorted[1].capabilityId);
874
+ if (!capA || !capB)
875
+ return matchResult;
876
+ const skipReason = this.checkLLMAllowed();
877
+ if (skipReason) {
878
+ logger.warn(`Disambiguation LLM skipped — ${skipReason}`);
879
+ steps.push({ type: 'llm_match', status: 'skip', durationMs: 0, detail: `disambiguation skipped: ${skipReason}` });
880
+ return matchResult;
881
+ }
882
+ const prompt = `Two capabilities are close matches for this query. Pick the best one.
883
+
884
+ Query: ${JSON.stringify({ user_query: query })}
885
+
886
+ Option A: ${capA.id} — ${sanitizeForPrompt(capA.description, 150)}
887
+ Option B: ${capB.id} — ${sanitizeForPrompt(capB.description, 150)}
888
+
889
+ Respond ONLY with valid JSON:
890
+ { "winner": "<capability_id>", "confidence": <0-100>, "reasoning": "<one sentence>" }`;
891
+ const t = Date.now();
892
+ try {
893
+ const raw = await this.llm(prompt);
894
+ const clean = raw.replace(/```json|```/g, '').trim();
895
+ const parsed = JSON.parse(clean);
896
+ this.recordLLMSuccess();
897
+ const winner = this.manifest.capabilities.find(c => c.id === parsed.winner);
898
+ if (!winner) {
899
+ steps.push({ type: 'llm_match', status: 'fail', durationMs: Date.now() - t, detail: 'disambiguation returned unknown id' });
900
+ return matchResult;
901
+ }
902
+ steps.push({ type: 'llm_match', status: 'pass', durationMs: Date.now() - t, detail: `disambiguation: ${winner.id} (${parsed.confidence}%)` });
903
+ const confidence = typeof parsed.confidence === 'number' && !isNaN(parsed.confidence)
904
+ ? Math.min(100, Math.max(0, Math.round(parsed.confidence)))
905
+ : matchResult.confidence; // fallback to original if LLM returned bad value
906
+ return {
907
+ ...matchResult,
908
+ capability: winner,
909
+ confidence,
910
+ intent: resolverToIntent(winner),
911
+ extractedParams: extractParams(query, winner),
912
+ candidates: matchResult.candidates.map(c => ({ ...c, matched: c.capabilityId === winner.id })),
913
+ reasoning: parsed.reasoning ?? `Disambiguated to "${winner.id}"`,
914
+ };
915
+ }
916
+ catch (err) {
917
+ const isParseError = err instanceof LLMParseError;
918
+ if (!isParseError)
919
+ this.recordLLMFailure();
920
+ steps.push({ type: 'llm_match', status: 'fail', durationMs: Date.now() - t, detail: String(err) });
921
+ return matchResult;
922
+ }
923
+ }
714
924
  }
715
925
  /** Maximum allowed query length in characters. Queries exceeding this throw RangeError. */
716
926
  CapmanEngine.MAX_QUERY_LENGTH = 1000;
@@ -5,6 +5,7 @@ export { generate, loadConfig, writeManifest, readManifest, validate, generateSt
5
5
  export { match, matchWithLLM, extractParams, } from './matcher';
6
6
  export { LLMParseError } from './matcher';
7
7
  export type { LLMMatcherOptions } from './matcher';
8
+ export { TYPE_PATTERNS } from './matcher';
8
9
  export { resolve } from './resolver';
9
10
  export type { ResolveOptions, AuthContext } from './resolver';
10
11
  export { CapmanEngine } from './engine';
package/dist/esm/index.js CHANGED
@@ -2,6 +2,7 @@ export { setLogLevel } from './logger';
2
2
  export { generate, loadConfig, writeManifest, readManifest, validate, generateStarterConfig, } from './generator';
3
3
  export { match, matchWithLLM, extractParams, } from './matcher';
4
4
  export { LLMParseError } from './matcher';
5
+ export { TYPE_PATTERNS } from './matcher';
5
6
  export { resolve } from './resolver';
6
7
  // ─── Engine (recommended API) ─────────────────────────────────────────────────
7
8
  export { CapmanEngine } from './engine';
@@ -2,7 +2,7 @@ import * as fs from 'fs';
2
2
  import * as path from 'path';
3
3
  import { logger } from './logger';
4
4
  const MAX_LEARNING_ENTRIES = 10_000;
5
- import { STOPWORDS } from './matcher';
5
+ import { tokenize } from './matcher';
6
6
  // Module-level registry — tracks all active FileLearningStore instances
7
7
  // for process exit flushing. Handlers registered once to avoid accumulation.
8
8
  const activeStores = new Set();
@@ -71,13 +71,15 @@ class LearningIndex {
71
71
  if (!entry.capabilityId)
72
72
  this.statsCounter.outOfScope++;
73
73
  if (entry.capabilityId) {
74
- const words = entry.query.toLowerCase()
75
- .split(/\W+/)
76
- .filter(w => w.length > 2 && !STOPWORDS.has(w));
74
+ // Confidence-weighted contribution — a 95% match contributes 9.
75
+ // more signal than a 51% borderline match. Floor of 0.1 ensures
76
+ // borderline matches still contribute, just proportionally less.
77
+ const weight = Math.max(0.1, entry.confidence / 100);
78
+ const words = tokenize(entry.query);
77
79
  for (const word of words) {
78
80
  this.index[word] ??= {};
79
81
  this.index[word][entry.capabilityId] =
80
- (this.index[word][entry.capabilityId] ?? 0) + 1;
82
+ (this.index[word][entry.capabilityId] ?? 0) + weight;
81
83
  }
82
84
  }
83
85
  }
@@ -93,14 +95,14 @@ class LearningIndex {
93
95
  return;
94
96
  }
95
97
  // Keyword index cleanup
96
- const words = entry.query.toLowerCase()
97
- .split(/\W+/)
98
- .filter(w => w.length > 2 && !STOPWORDS.has(w));
98
+ const words = tokenize(entry.query);
99
99
  for (const word of words) {
100
100
  if (!this.index[word])
101
101
  continue;
102
+ // Subtract estimated weight (0.5 average) — exact weight not stored.
103
+ // Minor drift on prune is acceptable; index is rebuilt when drift matters.
102
104
  this.index[word][entry.capabilityId] =
103
- (this.index[word][entry.capabilityId] ?? 1) - 1;
105
+ (this.index[word][entry.capabilityId] ?? 0.5) - 0.5;
104
106
  if (this.index[word][entry.capabilityId] <= 0) {
105
107
  delete this.index[word][entry.capabilityId];
106
108
  }
@@ -255,11 +257,7 @@ export class FileLearningStore {
255
257
  // not be persisted to disk under GDPR/CCPA data retention requirements.
256
258
  const sanitized = {
257
259
  ...entry,
258
- query: entry.query
259
- .toLowerCase()
260
- .split(/\W+/)
261
- .filter(w => w.length > 2 && !STOPWORDS.has(w))
262
- .join(' '),
260
+ query: tokenize(entry.query).join(' '),
263
261
  };
264
262
  this.entries.push(sanitized);
265
263
  this.learningIndex.update(sanitized);
@@ -308,11 +306,7 @@ export class MemoryLearningStore {
308
306
  async record(entry) {
309
307
  const sanitized = {
310
308
  ...entry,
311
- query: entry.query
312
- .toLowerCase()
313
- .split(/\W+/)
314
- .filter(w => w.length > 2 && !STOPWORDS.has(w))
315
- .join(' '),
309
+ query: tokenize(entry.query).join(' '),
316
310
  };
317
311
  this.entries.push(sanitized);
318
312
  this.learningIndex.update(sanitized);
@@ -3,7 +3,58 @@ export declare class LLMParseError extends Error {
3
3
  constructor(message: string);
4
4
  }
5
5
  export declare const STOPWORDS: Set<string>;
6
+ /**
7
+ * Regex patterns for common param types.
8
+ * Used when a CapabilityParam has `pattern` set to a named type.
9
+ */
10
+ export declare const TYPE_PATTERNS: Record<string, RegExp>;
11
+ /**
12
+ * Simplified suffix-stripping stemmer — 10 most common English morphological
13
+ * patterns covering ~80% of benefit at ~25% the complexity of Porter stemmer.
14
+ * Applied symmetrically to both query words and capability index words.
15
+ */
16
+ export declare function stem(word: string): string;
17
+ /**
18
+ * Shared tokenizer — used by scorer, learning index, and boost system.
19
+ * Applies stopword filtering AND stemming symmetrically.
20
+ * Any site that tokenizes text for matching MUST use this function
21
+ * to avoid silent mismatches between query and index tokens.
22
+ */
23
+ export declare function tokenize(text: string): string[];
24
+ export interface BM25Index {
25
+ /** Document frequency — how many capabilities contain each term */
26
+ df: Record<string, number>;
27
+ /** Average field length per field type */
28
+ avgdl: {
29
+ examples: number;
30
+ description: number;
31
+ name: number;
32
+ };
33
+ /** Total number of capabilities */
34
+ N: number;
35
+ /** Bigram sets per capability — post-stopword, post-stem, examples only */
36
+ bigrams: Record<string, Set<string>>;
37
+ }
38
+ /** Build a BM25 index over all capabilities. Call once at manifest load. */
39
+ export declare function buildBM25Index(capabilities: Capability[]): BM25Index;
40
+ /**
41
+ * BM25 scoring with field weights.
42
+ * k1 = 1.5 (TF saturation), b = 0.75 (length normalization)
43
+ * Field weights: examples 0.6, description 0.3, name 0.1
44
+ */
45
+ export declare function scoreCapability(qWordSet: Set<string>, cap: Capability, index: BM25Index, k1?: number, b?: number): number;
46
+ /**
47
+ * Extracts bigrams from a token array as "token1__token2" strings.
48
+ * Input must already be post-stopword and post-stem (use tokenize() first).
49
+ */
50
+ export declare function extractBigrams(tokens: string[]): Set<string>;
6
51
  export declare function resolverToIntent(cap: Capability): MatchResult['intent'];
52
+ /**
53
+ * Strips characters that could break LLM prompt structure from
54
+ * capability field values before injection into the system prompt.
55
+ * Removes control characters, newlines, and delimiter-like sequences.
56
+ */
57
+ export declare function sanitizeForPrompt(value: string, maxLen: number): string;
7
58
  /**
8
59
  * Extracts parameter values from a user query using keyword heuristics.
9
60
  *
@@ -22,6 +73,10 @@ export declare function extractParams(query: string, cap: Capability): Record<st
22
73
  export interface MatchOptions {
23
74
  fuzzyMatch?: boolean;
24
75
  fuzzyThreshold?: number;
76
+ bm25Index?: BM25Index;
77
+ bm25K1?: number;
78
+ bm25B?: number;
79
+ bm25Ceiling?: number;
25
80
  }
26
81
  export declare function match(query: string, manifest: Manifest, options?: MatchOptions): MatchResult;
27
82
  export interface LLMMatcherOptions {