capman 0.5.5 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/CHANGELOG.md +1 -1
  2. package/bin/lib/cmd-generate.js +156 -12
  3. package/bin/lib/cmd-help.js +3 -0
  4. package/dist/cjs/cache.d.ts +9 -0
  5. package/dist/cjs/cache.d.ts.map +1 -1
  6. package/dist/cjs/cache.js +37 -7
  7. package/dist/cjs/cache.js.map +1 -1
  8. package/dist/cjs/engine.d.ts +68 -1
  9. package/dist/cjs/engine.d.ts.map +1 -1
  10. package/dist/cjs/engine.js +313 -13
  11. package/dist/cjs/engine.js.map +1 -1
  12. package/dist/cjs/generator.d.ts.map +1 -1
  13. package/dist/cjs/generator.js +28 -6
  14. package/dist/cjs/generator.js.map +1 -1
  15. package/dist/cjs/index.d.ts +3 -1
  16. package/dist/cjs/index.d.ts.map +1 -1
  17. package/dist/cjs/index.js +5 -1
  18. package/dist/cjs/index.js.map +1 -1
  19. package/dist/cjs/learning.d.ts +7 -0
  20. package/dist/cjs/learning.d.ts.map +1 -1
  21. package/dist/cjs/learning.js +44 -23
  22. package/dist/cjs/learning.js.map +1 -1
  23. package/dist/cjs/matcher.d.ts +92 -0
  24. package/dist/cjs/matcher.d.ts.map +1 -1
  25. package/dist/cjs/matcher.js +354 -35
  26. package/dist/cjs/matcher.js.map +1 -1
  27. package/dist/cjs/parser.js +27 -9
  28. package/dist/cjs/parser.js.map +1 -1
  29. package/dist/cjs/resolver.d.ts +2 -2
  30. package/dist/cjs/resolver.d.ts.map +1 -1
  31. package/dist/cjs/resolver.js +66 -26
  32. package/dist/cjs/resolver.js.map +1 -1
  33. package/dist/cjs/schema.d.ts +865 -94
  34. package/dist/cjs/schema.d.ts.map +1 -1
  35. package/dist/cjs/schema.js +62 -12
  36. package/dist/cjs/schema.js.map +1 -1
  37. package/dist/cjs/types.d.ts +153 -9
  38. package/dist/cjs/types.d.ts.map +1 -1
  39. package/dist/cjs/version.d.ts +1 -1
  40. package/dist/cjs/version.js +1 -1
  41. package/dist/esm/cache.d.ts +9 -0
  42. package/dist/esm/cache.js +37 -7
  43. package/dist/esm/engine.d.ts +68 -1
  44. package/dist/esm/engine.js +314 -14
  45. package/dist/esm/generator.js +28 -6
  46. package/dist/esm/index.d.ts +3 -1
  47. package/dist/esm/index.js +2 -0
  48. package/dist/esm/learning.d.ts +7 -0
  49. package/dist/esm/learning.js +45 -24
  50. package/dist/esm/matcher.d.ts +92 -0
  51. package/dist/esm/matcher.js +346 -35
  52. package/dist/esm/parser.js +27 -9
  53. package/dist/esm/resolver.d.ts +2 -2
  54. package/dist/esm/resolver.js +66 -26
  55. package/dist/esm/schema.d.ts +865 -94
  56. package/dist/esm/schema.js +62 -12
  57. package/dist/esm/types.d.ts +153 -9
  58. package/dist/esm/version.d.ts +1 -1
  59. package/dist/esm/version.js +1 -1
  60. package/package.json +1 -1
@@ -2,7 +2,7 @@ import * as fs from 'fs';
2
2
  import * as path from 'path';
3
3
  import { logger } from './logger';
4
4
  const MAX_LEARNING_ENTRIES = 10_000;
5
- import { STOPWORDS } from './matcher';
5
+ import { tokenize } from './matcher';
6
6
  // Module-level registry — tracks all active FileLearningStore instances
7
7
  // for process exit flushing. Handlers registered once to avoid accumulation.
8
8
  const activeStores = new Set();
@@ -71,13 +71,19 @@ class LearningIndex {
71
71
  if (!entry.capabilityId)
72
72
  this.statsCounter.outOfScope++;
73
73
  if (entry.capabilityId) {
74
- const words = entry.query.toLowerCase()
75
- .split(/\W+/)
76
- .filter(w => w.length > 2 && !STOPWORDS.has(w));
74
+ // Confidence-weighted contribution — a 95% match contributes 9.
75
+ // more signal than a 51% borderline match. Floor of 0.1 ensures
76
+ // borderline matches still contribute, just proportionally less.
77
+ const weight = Math.max(0.1, entry.confidence / 100);
78
+ // Store weight on the entry so subtract() can reverse the exact amount.
79
+ // Without this, subtract() would have to use a hardcoded estimate (0.5)
80
+ // that causes index drift after pruning high-confidence entries.
81
+ entry.weight = weight;
82
+ const words = tokenize(entry.query);
77
83
  for (const word of words) {
78
84
  this.index[word] ??= {};
79
85
  this.index[word][entry.capabilityId] =
80
- (this.index[word][entry.capabilityId] ?? 0) + 1;
86
+ (this.index[word][entry.capabilityId] ?? 0) + weight;
81
87
  }
82
88
  }
83
89
  }
@@ -93,14 +99,16 @@ class LearningIndex {
93
99
  return;
94
100
  }
95
101
  // Keyword index cleanup
96
- const words = entry.query.toLowerCase()
97
- .split(/\W+/)
98
- .filter(w => w.length > 2 && !STOPWORDS.has(w));
102
+ const words = tokenize(entry.query);
99
103
  for (const word of words) {
100
104
  if (!this.index[word])
101
105
  continue;
106
+ // Use the weight stored at record time for exact symmetric subtraction.
107
+ // Fallback recalculates from confidence for entries persisted before the
108
+ // weight field was added (backwards-compatible with older learning.json files).
109
+ const weight = entry.weight ?? Math.max(0.1, entry.confidence / 100);
102
110
  this.index[word][entry.capabilityId] =
103
- (this.index[word][entry.capabilityId] ?? 1) - 1;
111
+ (this.index[word][entry.capabilityId] ?? weight) - weight;
104
112
  if (this.index[word][entry.capabilityId] <= 0) {
105
113
  delete this.index[word][entry.capabilityId];
106
114
  }
@@ -166,8 +174,10 @@ export class FileLearningStore {
166
174
  fs.writeFileSync(tmp, payload);
167
175
  fs.renameSync(tmp, this.filePath);
168
176
  }
169
- catch {
170
- // Best-effort in exit handler
177
+ catch (err) {
178
+ // Use process.stderr.write — never console.error in an exit handler,
179
+ // as stdout may already be flushed or closed at this point.
180
+ process.stderr.write(`[capman] Failed to flush learning store to ${this.filePath}: ${err}\n`);
171
181
  }
172
182
  }
173
183
  /**
@@ -200,7 +210,26 @@ export class FileLearningStore {
200
210
  const raw = await fs.promises.readFile(this.filePath, 'utf-8');
201
211
  const parsed = JSON.parse(raw);
202
212
  if (parsed && typeof parsed === 'object' && !Array.isArray(parsed) && Array.isArray(parsed.entries)) {
203
- this.entries = parsed.entries;
213
+ // Validate each entry — corrupted entries (null capability, wrong types) must
214
+ // not propagate into the engine where they cause runtime errors deep in matching.
215
+ const validEntries = [];
216
+ let skipped = 0;
217
+ for (const entry of parsed.entries) {
218
+ if (entry !== null && typeof entry === 'object' &&
219
+ typeof entry.query === 'string' &&
220
+ (entry.capabilityId === null || typeof entry.capabilityId === 'string') &&
221
+ typeof entry.confidence === 'number' &&
222
+ typeof entry.resolvedVia === 'string') {
223
+ validEntries.push(entry);
224
+ }
225
+ else {
226
+ skipped++;
227
+ }
228
+ }
229
+ if (skipped > 0) {
230
+ logger.warn(`Learning store: skipped ${skipped} invalid entries during load`);
231
+ }
232
+ this.entries = validEntries;
204
233
  this.learningIndex.rebuild(this.entries);
205
234
  logger.debug(`Learning store loaded: ${this.entries.length} entries`);
206
235
  }
@@ -255,11 +284,7 @@ export class FileLearningStore {
255
284
  // not be persisted to disk under GDPR/CCPA data retention requirements.
256
285
  const sanitized = {
257
286
  ...entry,
258
- query: entry.query
259
- .toLowerCase()
260
- .split(/\W+/)
261
- .filter(w => w.length > 2 && !STOPWORDS.has(w))
262
- .join(' '),
287
+ query: tokenize(entry.query).join(' '),
263
288
  };
264
289
  this.entries.push(sanitized);
265
290
  this.learningIndex.update(sanitized);
@@ -308,19 +333,15 @@ export class MemoryLearningStore {
308
333
  async record(entry) {
309
334
  const sanitized = {
310
335
  ...entry,
311
- query: entry.query
312
- .toLowerCase()
313
- .split(/\W+/)
314
- .filter(w => w.length > 2 && !STOPWORDS.has(w))
315
- .join(' '),
336
+ query: tokenize(entry.query).join(' '),
316
337
  };
317
338
  this.entries.push(sanitized);
318
339
  this.learningIndex.update(sanitized);
319
340
  if (this.entries.length > MAX_LEARNING_ENTRIES) {
320
341
  const excess = this.entries.length - MAX_LEARNING_ENTRIES;
321
342
  const pruned = this.entries.splice(0, excess);
322
- for (const entry of pruned) {
323
- this.learningIndex.subtract(entry);
343
+ for (const staleEntry of pruned) {
344
+ this.learningIndex.subtract(staleEntry);
324
345
  }
325
346
  }
326
347
  }
@@ -3,7 +3,89 @@ export declare class LLMParseError extends Error {
3
3
  constructor(message: string);
4
4
  }
5
5
  export declare const STOPWORDS: Set<string>;
6
+ /**
7
+ * Regex patterns for common param types.
8
+ * Used when a CapabilityParam has `pattern` set to a named type.
9
+ */
10
+ export declare const TYPE_PATTERNS: Record<string, RegExp>;
11
+ /**
12
+ * Simplified suffix-stripping stemmer — 10 most common English morphological
13
+ * patterns covering ~80% of benefit at ~25% the complexity of Porter stemmer.
14
+ * Applied symmetrically to both query words and capability index words.
15
+ */
16
+ export declare function stem(word: string): string;
17
+ /**
18
+ * Shared tokenizer — used by scorer, learning index, and boost system.
19
+ * Applies stopword filtering AND stemming symmetrically.
20
+ * Any site that tokenizes text for matching MUST use this function
21
+ * to avoid silent mismatches between query and index tokens.
22
+ */
23
+ export declare function tokenize(text: string): string[];
24
+ export interface BM25Index {
25
+ /** Document frequency — how many capabilities contain each term */
26
+ df: Record<string, number>;
27
+ /** Average field length per field type */
28
+ avgdl: {
29
+ examples: number;
30
+ description: number;
31
+ name: number;
32
+ };
33
+ /** Total number of capabilities */
34
+ N: number;
35
+ /** Bigram sets per capability — post-stopword, post-stem, examples only */
36
+ bigrams: Record<string, Set<string>>;
37
+ /**
38
+ * Pre-computed token arrays per capability, per field.
39
+ * Avoids re-tokenizing capability text on every scoreCapability() call.
40
+ * At 50 capabilities × 100 req/s, that is 5,000 redundant tokenization
41
+ * calls per second — each involving stem() and split/filter chains.
42
+ */
43
+ capTokens: Record<string, {
44
+ examples: string[];
45
+ description: string[];
46
+ name: string[];
47
+ }>;
48
+ }
49
+ /** Build a BM25 index over all capabilities. Call once at manifest load. */
50
+ export declare function buildBM25Index(capabilities: Capability[]): BM25Index;
51
+ /**
52
+ * BM25 scoring with field weights.
53
+ * k1 = 1.5 (TF saturation), b = 0.75 (length normalization)
54
+ * Field weights: examples 0.6, description 0.3, name 0.1
55
+ */
56
+ export declare function scoreCapability(qWordSet: Set<string>, cap: Capability, index: BM25Index, k1?: number, b?: number): number;
57
+ /**
58
+ * Extracts bigrams from a token array as "token1__token2" strings.
59
+ * Input must already be post-stopword and post-stem (use tokenize() first).
60
+ */
61
+ export declare function extractBigrams(tokens: string[]): Set<string>;
62
+ /**
63
+ * Returns a sub-manifest containing only capabilities that match ALL provided tags.
64
+ * Capabilities without tags are excluded when tags filter is active.
65
+ * Enables token-efficient LLM prompts for large manifests:
66
+ *
67
+ * @example
68
+ * // Only send order-related capabilities to LLM
69
+ * const orderManifest = filterByTags(manifest, ['orders'])
70
+ * const result = await matchWithLLM(query, orderManifest, { llm })
71
+ *
72
+ * @example
73
+ * // Match by any of multiple tags (union) — call filterByTags per tag and merge
74
+ * const ordersOrPayments = [
75
+ * ...filterByTags(manifest, ['orders']).capabilities,
76
+ * ...filterByTags(manifest, ['payments']).capabilities,
77
+ * ]
78
+ */
79
+ export declare function filterByTags(manifest: Manifest, tags: string[]): Manifest;
6
80
  export declare function resolverToIntent(cap: Capability): MatchResult['intent'];
81
+ /**
82
+ * Strips characters that could break LLM prompt structure from
83
+ * capability field values before injection into the system prompt.
84
+ * Removes control characters, newlines, delimiter sequences, and braces
85
+ * anywhere in the string (not just at line starts) to resist prompt injection
86
+ * from third-party OpenAPI spec content ingested via parseOpenAPI().
87
+ */
88
+ export declare function sanitizeForPrompt(value: string, maxLen: number): string;
7
89
  /**
8
90
  * Extracts parameter values from a user query using keyword heuristics.
9
91
  *
@@ -22,7 +104,17 @@ export declare function extractParams(query: string, cap: Capability): Record<st
22
104
  export interface MatchOptions {
23
105
  fuzzyMatch?: boolean;
24
106
  fuzzyThreshold?: number;
107
+ bm25Index?: BM25Index;
108
+ bm25K1?: number;
109
+ bm25B?: number;
110
+ bm25Ceiling?: number;
25
111
  }
112
+ /**
113
+ * Calibrates a BM25 normalization ceiling from the manifest.
114
+ * Scores each capability against all of its own examples and returns the maximum.
115
+ * Call once at manifest load time — O(capabilities × examples).
116
+ */
117
+ export declare function calibrateCeiling(capabilities: Capability[], bm25Index: BM25Index, k1: number, b: number): number;
26
118
  export declare function match(query: string, manifest: Manifest, options?: MatchOptions): MatchResult;
27
119
  export interface LLMMatcherOptions {
28
120
  llm: (prompt: string) => Promise<string>;
@@ -18,40 +18,265 @@ export const STOPWORDS = new Set([
18
18
  'it', 'its', 'how', 'when', 'where', 'who', 'which', 'all',
19
19
  'just', 'some', 'any', 'there', 'their', 'them', 'they',
20
20
  ]);
21
- function filterStopwords(words) {
22
- return words.filter(w => !STOPWORDS.has(w.toLowerCase()) && w.length > 1);
21
+ // ─── Type Patterns ────────────────────────────────────────────────────────────
22
+ /**
23
+ * Regex patterns for common param types.
24
+ * Used when a CapabilityParam has `pattern` set to a named type.
25
+ */
26
+ export const TYPE_PATTERNS = {
27
+ email: /\b[\w.+-]+@[\w-]+\.[a-zA-Z]{2,}\b/,
28
+ date: /\b\d{4}-\d{2}-\d{2}\b|\b(?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\w*\s+\d{1,2}\b/i,
29
+ orderId: /\b[A-Z]{2,}-?\d{4,}\b|\b\d{6,}\b/,
30
+ url: /https?:\/\/[^\s]+/,
31
+ };
32
+ /**
33
+ * Extracts a value from a query using an example template pattern.
34
+ * e.g. template "order {orderId}", query "track order 12345" → "12345"
35
+ * e.g. template "booking {ref}", query "cancel booking ABC-001" → "ABC-001"
36
+ */
37
+ function extractFromTemplate(query, template, paramName) {
38
+ // Split template on {paramName} to get prefix and suffix
39
+ const placeholder = `{${paramName}}`;
40
+ const idx = template.indexOf(placeholder);
41
+ if (idx === -1)
42
+ return null;
43
+ const prefix = template.slice(0, idx).trim().toLowerCase();
44
+ const suffix = template.slice(idx + placeholder.length).trim().toLowerCase();
45
+ const q = query.toLowerCase();
46
+ if (prefix) {
47
+ const prefixIdx = q.indexOf(prefix);
48
+ if (prefixIdx === -1)
49
+ return null;
50
+ const after = query.slice(prefixIdx + prefix.length).trim();
51
+ const tokens = after.split(/\s+/).filter(t => t.length > 0);
52
+ if (!tokens.length)
53
+ return null;
54
+ // If there's a suffix, find it and take what's between
55
+ if (suffix) {
56
+ const suffixIdx = after.toLowerCase().indexOf(suffix);
57
+ if (suffixIdx > 0) {
58
+ return after.slice(0, suffixIdx).trim().split(/\s+/)[0] ?? null;
59
+ }
60
+ }
61
+ return tokens[0].replace(/[^a-zA-Z0-9\-_.@]/g, '') || null;
62
+ }
63
+ // Prefix is empty — placeholder is at start of template e.g. "{email} unsubscribe"
64
+ if (!prefix) {
65
+ if (suffix) {
66
+ // Find suffix in query — take what comes before it
67
+ const suffixIdx = query.toLowerCase().indexOf(suffix);
68
+ if (suffixIdx > 0) {
69
+ return query.slice(0, suffixIdx).trim().split(/\s+/).pop()
70
+ ?.replace(/[^a-zA-Z0-9\-_.@]/g, '') || null;
71
+ }
72
+ }
73
+ // No prefix, no suffix — template is just "{paramName}"; take last meaningful word
74
+ const words = query.trim().split(/\s+/);
75
+ return words[words.length - 1]?.replace(/[^a-zA-Z0-9\-_.@]/g, '') || null;
76
+ }
77
+ return null;
23
78
  }
24
- function scoreCapability(qWordSet, cap) {
79
+ // ─── Stem cache ───────────────────────────────────────────────────────────────
80
+ // Each word stemmed exactly once per process — O(1) on repeat lookups.
81
+ // Module-level — persists for the process lifetime. Vocabulary in production
82
+ // is finite (capability names + user query vocabulary) so growth is bounded
83
+ // in practice. In test environments with synthetic random strings, this may
84
+ // grow larger but remains functionally harmless.
85
+ const stemCache = new Map();
86
+ /**
87
+ * Simplified suffix-stripping stemmer — 10 most common English morphological
88
+ * patterns covering ~80% of benefit at ~25% the complexity of Porter stemmer.
89
+ * Applied symmetrically to both query words and capability index words.
90
+ */
91
+ export function stem(word) {
92
+ const cached = stemCache.get(word);
93
+ if (cached !== undefined)
94
+ return cached;
95
+ let s = word;
96
+ if (s.length > 7 && s.endsWith('ation'))
97
+ s = s.slice(0, -5); // cancellation → cancell
98
+ else if (s.length > 6 && s.endsWith('tion'))
99
+ s = s.slice(0, -4); // completion → comple
100
+ else if (s.length > 6 && s.endsWith('ing'))
101
+ s = s.slice(0, -3); // tracking → track
102
+ else if (s.length > 6 && s.endsWith('ity'))
103
+ s = s.slice(0, -3); // availability → availabil
104
+ else if (s.length > 5 && s.endsWith('ion'))
105
+ s = s.slice(0, -3); // version → vers
106
+ else if (s.length > 6 && s.endsWith('est'))
107
+ s = s.slice(0, -3); // fastest → fast
108
+ else if (s.length > 4 && s.endsWith('er'))
109
+ s = s.slice(0, -2); // tracker → track
110
+ else if (s.length > 4 && s.endsWith('ed'))
111
+ s = s.slice(0, -2); // ordered → order
112
+ else if (s.length > 4 && s.endsWith('ly'))
113
+ s = s.slice(0, -2); // quickly → quick
114
+ else if (s.length > 4 && s.endsWith('es'))
115
+ s = s.slice(0, -2); // fetches → fetch
116
+ else if (s.length > 3 && s.endsWith('s') &&
117
+ !s.endsWith('ss'))
118
+ s = s.slice(0, -1); // orders → order
119
+ stemCache.set(word, s);
120
+ return s;
121
+ }
122
+ /**
123
+ * Shared tokenizer — used by scorer, learning index, and boost system.
124
+ * Applies stopword filtering AND stemming symmetrically.
125
+ * Any site that tokenizes text for matching MUST use this function
126
+ * to avoid silent mismatches between query and index tokens.
127
+ */
128
+ export function tokenize(text) {
129
+ return text
130
+ .toLowerCase()
131
+ .split(/\W+/)
132
+ .filter(w => w.length > 2 && !STOPWORDS.has(w))
133
+ .map(stem);
134
+ }
135
+ /** Build a BM25 index over all capabilities. Call once at manifest load. */
136
+ export function buildBM25Index(capabilities) {
137
+ const N = capabilities.length;
138
+ if (N === 0)
139
+ return { df: {}, avgdl: { examples: 0, description: 0, name: 0 }, N: 0, bigrams: {}, capTokens: {}, };
140
+ const df = {};
141
+ let totalExLen = 0;
142
+ let totalDescLen = 0;
143
+ let totalNameLen = 0;
144
+ // Pre-compute token arrays for every capability in a single pass.
145
+ // scoreCapability() reads from capTokens instead of re-tokenizing on every call.
146
+ const capTokens = {};
147
+ for (const cap of capabilities) {
148
+ const exTokens = tokenize((cap.examples ?? []).join(' '));
149
+ const descTokens = tokenize(cap.description);
150
+ const nameTokens = tokenize(cap.name);
151
+ capTokens[cap.id] = { examples: exTokens, description: descTokens, name: nameTokens };
152
+ totalExLen += exTokens.length;
153
+ totalDescLen += descTokens.length;
154
+ totalNameLen += nameTokens.length;
155
+ // Count document frequency — each term counted once per capability
156
+ const seen = new Set();
157
+ for (const t of [...exTokens, ...descTokens, ...nameTokens]) {
158
+ if (!seen.has(t)) {
159
+ df[t] = (df[t] ?? 0) + 1;
160
+ seen.add(t);
161
+ }
162
+ }
163
+ }
164
+ // Build bigram sets per capability — examples field only
165
+ // Clean bigrams only: post-stopword, post-stem tokens
166
+ const bigrams = {};
167
+ for (const cap of capabilities) {
168
+ const set = new Set();
169
+ for (const example of cap.examples ?? []) {
170
+ for (const bg of extractBigrams(tokenize(example)))
171
+ set.add(bg);
172
+ }
173
+ bigrams[cap.id] = set;
174
+ }
175
+ return {
176
+ df,
177
+ avgdl: {
178
+ examples: totalExLen / N,
179
+ description: totalDescLen / N,
180
+ name: totalNameLen / N,
181
+ },
182
+ N,
183
+ bigrams,
184
+ capTokens,
185
+ };
186
+ }
187
+ /**
188
+ * BM25 scoring with field weights.
189
+ * k1 = 1.5 (TF saturation), b = 0.75 (length normalization)
190
+ * Field weights: examples 0.6, description 0.3, name 0.1
191
+ */
192
+ export function scoreCapability(qWordSet, cap, index, k1 = 1.5, b = 0.75) {
193
+ if (index.N === 0)
194
+ return 0;
195
+ // Use pre-computed token arrays from the index — avoids re-tokenizing
196
+ // capability text on every call. Falls back to live tokenization only when
197
+ // scoreCapability() is called outside CapmanEngine (e.g. unit tests that
198
+ // build a BM25Index manually without capTokens populated).
199
+ const tokens = index.capTokens[cap.id];
200
+ const exTokens = tokens?.examples ?? tokenize((cap.examples ?? []).join(' '));
201
+ const descTokens = tokens?.description ?? tokenize(cap.description);
202
+ const nameTokens = tokens?.name ?? tokenize(cap.name);
203
+ const score = bm25Field(qWordSet, exTokens, index, 'examples', k1, b) * 0.6
204
+ + bm25Field(qWordSet, descTokens, index, 'description', k1, b) * 0.3
205
+ + bm25Field(qWordSet, nameTokens, index, 'name', k1, b) * 0.1;
206
+ return score;
207
+ }
208
+ function bm25Field(queryTerms, fieldTokens, index, field, k1, b) {
209
+ if (fieldTokens.length === 0)
210
+ return 0;
211
+ const avgdl = index.avgdl[field] || 1;
212
+ const dl = fieldTokens.length;
213
+ const tf = new Map();
214
+ for (const t of fieldTokens) {
215
+ tf.set(t, (tf.get(t) ?? 0) + 1);
216
+ }
25
217
  let score = 0;
26
- // Check examples take the best single example match, not the sum.
27
- // Accumulating across examples rewards bloated example lists over precise ones:
28
- // 10 examples at 50% overlap = 300 points (clamped to 60) beats 1 perfect example at 60.
29
- // Taking Math.max means quality of examples matters, not quantity.
30
- let bestExampleScore = 0;
31
- for (const example of cap.examples ?? []) {
32
- const exWords = filterStopwords(example.toLowerCase().split(/\s+/));
33
- if (exWords.length === 0)
218
+ for (const term of queryTerms) {
219
+ const termTf = tf.get(term) ?? 0;
220
+ if (termTf === 0)
34
221
  continue;
35
- const overlap = exWords.filter(w => qWordSet.has(w)).length;
36
- const contribution = (overlap / exWords.length) * 60;
37
- bestExampleScore = Math.max(bestExampleScore, contribution);
222
+ const df = index.df[term] ?? 0;
223
+ const idf = Math.log((index.N - df + 0.5) / (df + 0.5) + 1);
224
+ const tfNorm = (termTf * (k1 + 1)) / (termTf + k1 * (1 - b + b * (dl / avgdl)));
225
+ score += idf * tfNorm;
38
226
  }
39
- score += bestExampleScore;
40
- // Check description words — normalize against min(length, 10) to avoid
41
- // penalizing rich documentation (many words = lower ratio) while also
42
- // preventing single-word descriptions from maxing out on any match.
43
- const descWords = filterStopwords(cap.description.toLowerCase().split(/\W+/).filter(Boolean));
44
- if (descWords.length > 0) {
45
- const descOverlap = descWords.filter(w => qWordSet.has(w)).length;
46
- score += Math.min((descOverlap / Math.min(descWords.length, 10)) * 30, 30);
227
+ return score;
228
+ }
229
+ /**
230
+ * Extracts bigrams from a token array as "token1__token2" strings.
231
+ * Input must already be post-stopword and post-stem (use tokenize() first).
232
+ */
233
+ export function extractBigrams(tokens) {
234
+ const bigrams = new Set();
235
+ for (let i = 0; i < tokens.length - 1; i++) {
236
+ bigrams.add(`${tokens[i]}__${tokens[i + 1]}`);
47
237
  }
48
- // Check name words
49
- const nameWords = filterStopwords(cap.name.toLowerCase().split(/\W+/).filter(Boolean));
50
- if (nameWords.length > 0) {
51
- const nameOverlap = nameWords.filter(w => qWordSet.has(w)).length;
52
- score += (nameOverlap / nameWords.length) * 10;
238
+ return bigrams;
239
+ }
240
+ /**
241
+ * Returns a sub-manifest containing only capabilities that match ALL provided tags.
242
+ * Capabilities without tags are excluded when tags filter is active.
243
+ * Enables token-efficient LLM prompts for large manifests:
244
+ *
245
+ * @example
246
+ * // Only send order-related capabilities to LLM
247
+ * const orderManifest = filterByTags(manifest, ['orders'])
248
+ * const result = await matchWithLLM(query, orderManifest, { llm })
249
+ *
250
+ * @example
251
+ * // Match by any of multiple tags (union) — call filterByTags per tag and merge
252
+ * const ordersOrPayments = [
253
+ * ...filterByTags(manifest, ['orders']).capabilities,
254
+ * ...filterByTags(manifest, ['payments']).capabilities,
255
+ * ]
256
+ */
257
+ export function filterByTags(manifest, tags) {
258
+ if (tags.length === 0)
259
+ return manifest;
260
+ const tagSet = new Set(tags);
261
+ return {
262
+ ...manifest,
263
+ capabilities: manifest.capabilities.filter(cap => cap.tags?.length && tags.every(t => cap.tags.includes(t))),
264
+ };
265
+ }
266
+ /**
267
+ * Returns a fixed bonus in normalized points (0–15), applied after BM25 normalization.
268
+ * 5 points per matching bigram, saturates at 3 bigrams (15 points).
269
+ * Fixed point value regardless of manifest size — ceiling-independent.
270
+ */
271
+ function bigramBonus(queryBigrams, capBigrams) {
272
+ if (queryBigrams.size === 0 || capBigrams.size === 0)
273
+ return 0;
274
+ let overlap = 0;
275
+ for (const bigram of queryBigrams) {
276
+ if (capBigrams.has(bigram))
277
+ overlap++;
53
278
  }
54
- return Math.min(Math.round(score), 100);
279
+ return Math.min(overlap * 5, 15); // normalized points — 3 bigrams saturate at 15
55
280
  }
56
281
  export function resolverToIntent(cap) {
57
282
  const t = cap.resolver.type;
@@ -66,13 +291,18 @@ export function resolverToIntent(cap) {
66
291
  /**
67
292
  * Strips characters that could break LLM prompt structure from
68
293
  * capability field values before injection into the system prompt.
69
- * Removes control characters, newlines, and delimiter-like sequences.
294
+ * Removes control characters, newlines, delimiter sequences, and braces
295
+ * anywhere in the string (not just at line starts) to resist prompt injection
296
+ * from third-party OpenAPI spec content ingested via parseOpenAPI().
70
297
  */
71
- function sanitizeForPrompt(value, maxLen) {
298
+ export function sanitizeForPrompt(value, maxLen) {
72
299
  return value
73
- .replace(/[\r\n\t]/g, ' ') // newlines → space
300
+ .replace(/[\r\n\t]/g, ' ') // newlines/tabs → space
74
301
  .replace(/---+/g, '—') // horizontal rules → em dash
75
- .replace(/^\s*[{}\[\]]/gm, ' ') // leading braces/brackets → space
302
+ .replace(/[{}\[\]]/g, ' ') // all braces/brackets anywhere → space (was: leading only)
303
+ .split(' ') // per-word cap — limits injection payload per token
304
+ .map(w => w.slice(0, 200)) // no single token longer than 200 chars
305
+ .join(' ')
76
306
  .replace(/\s+/g, ' ') // collapse whitespace
77
307
  .trim()
78
308
  .slice(0, maxLen);
@@ -104,6 +334,42 @@ export function extractParams(query, cap) {
104
334
  result[param.name] = null;
105
335
  continue;
106
336
  }
337
+ // ── Type-implied pattern extraction ───────────────────────────────────
338
+ // param.type implies a TYPE_PATTERNS match — no need to set pattern explicitly
339
+ if (param.type && !param.pattern) {
340
+ // Map param types that have direct regex equivalents
341
+ const typeToPattern = {
342
+ email: TYPE_PATTERNS.email,
343
+ date: TYPE_PATTERNS.date,
344
+ url: TYPE_PATTERNS.url,
345
+ };
346
+ const impliedPattern = typeToPattern[param.type];
347
+ if (impliedPattern) {
348
+ const match = query.match(impliedPattern);
349
+ if (match) {
350
+ result[param.name] = match[0];
351
+ continue;
352
+ }
353
+ }
354
+ }
355
+ // ── Explicit pattern extraction (highest priority when set) ───────────
356
+ if (param.pattern) {
357
+ const namedPattern = TYPE_PATTERNS[param.pattern];
358
+ if (namedPattern) {
359
+ const match = query.match(namedPattern);
360
+ if (match) {
361
+ result[param.name] = match[0];
362
+ continue;
363
+ }
364
+ }
365
+ else if (param.pattern.includes(`{${param.name}}`)) {
366
+ const extracted = extractFromTemplate(query, param.pattern, param.name);
367
+ if (extracted) {
368
+ result[param.name] = extracted;
369
+ continue;
370
+ }
371
+ }
372
+ }
107
373
  // Try to extract value after known keywords
108
374
  // e.g. "profile for johndoe" → johndoe
109
375
  // "articles by jane" → jane
@@ -157,10 +423,36 @@ export function extractParams(query, cap) {
157
423
  extracted = candidate;
158
424
  }
159
425
  }
426
+ // ── Enum validation ───────────────────────────────────────────────────
427
+ if (extracted !== null && param.type === 'enum' && param.enum?.length) {
428
+ if (!param.enum.includes(extracted)) {
429
+ // Extracted value not in allowed list — treat as not found
430
+ extracted = null;
431
+ }
432
+ }
160
433
  result[param.name] = extracted;
161
434
  }
162
435
  return result;
163
436
  }
437
+ /**
438
+ * Calibrates a BM25 normalization ceiling from the manifest.
439
+ * Scores each capability against all of its own examples and returns the maximum.
440
+ * Call once at manifest load time — O(capabilities × examples).
441
+ */
442
+ export function calibrateCeiling(capabilities, bm25Index, k1, b) {
443
+ let max = 0;
444
+ for (const cap of capabilities) {
445
+ if (!cap.examples?.length)
446
+ continue;
447
+ for (const example of cap.examples) {
448
+ const selfWords = new Set(tokenize(example));
449
+ const raw = scoreCapability(selfWords, cap, bm25Index, k1, b);
450
+ if (raw > max)
451
+ max = raw;
452
+ }
453
+ }
454
+ return max > 0 ? max : 100;
455
+ }
164
456
  export function match(query, manifest, options = {}) {
165
457
  if (!query?.trim()) {
166
458
  logger.warn('Empty query received');
@@ -225,10 +517,23 @@ export function match(query, manifest, options = {}) {
225
517
  }
226
518
  // ── Score all capabilities ────────────────────────────────────────────────
227
519
  // Build qWordSet once — O(1) lookups instead of O(n) Array.includes per word
228
- const qWordSet = new Set(filterStopwords(query.toLowerCase().split(/\W+/).filter(Boolean)));
520
+ const qTokens = tokenize(query);
521
+ const qWordSet = new Set(qTokens);
522
+ // Build query bigrams for phrase bonus
523
+ const qBigrams = extractBigrams(qTokens);
524
+ // Build BM25 index for this manifest — O(capabilities × tokens)
525
+ // In CapmanEngine this is pre-built; for direct match() calls it's built per-call
526
+ const bm25Index = options.bm25Index ?? buildBM25Index(manifest.capabilities);
527
+ const k1 = options.bm25K1 ?? 1.5;
528
+ const b = options.bm25B ?? 0.75;
529
+ // Calibrate ceiling — max self-score for normalization
530
+ const ceiling = options.bm25Ceiling ?? calibrateCeiling(manifest.capabilities, bm25Index, k1, b);
229
531
  const allScores = [];
230
532
  for (const cap of manifest.capabilities) {
231
- const keywordScore = scoreCapability(qWordSet, cap);
533
+ const rawBM25 = scoreCapability(qWordSet, cap, bm25Index, k1, b);
534
+ const bm25Score = Math.min(100, Math.round((rawBM25 / ceiling) * 100));
535
+ const bonusPoints = bigramBonus(qBigrams, bm25Index.bigrams[cap.id] ?? new Set());
536
+ const keywordScore = Math.min(100, bm25Score + bonusPoints);
232
537
  const fuzzyScore = fuzzyScoreMap.get(cap.id) ?? 0;
233
538
  const via = fuzzyScore > keywordScore ? 'fuzzy' : 'keyword';
234
539
  const score = Math.min(100, Math.round(Math.max(keywordScore, fuzzyScore)));
@@ -345,7 +650,13 @@ ${JSON.stringify({ user_query: query })}
345
650
  // Build full candidate list — all capabilities scored, LLM winner marked as matched.
346
651
  // This aligns the shape with keyword match results and allows the learning boost
347
652
  // to surface alternatives if the LLM made a wrong call.
348
- const llmConfidence = effectivelyOOS ? 0 : parsed.confidence;
653
+ // Clamp and round confidence — LLM may return values outside 0–100 with
654
+ // misconfigured models or prompt drift. Unclamped values corrupt learning
655
+ // weights (weight = confidence/100 can exceed 1.0) and verdict margins.
656
+ // disambiguateLLM() already does this; apply the same treatment here.
657
+ const llmConfidence = effectivelyOOS
658
+ ? 0
659
+ : Math.min(100, Math.max(0, Math.round(parsed.confidence)));
349
660
  const allCandidates = manifest.capabilities.map(c => ({
350
661
  capabilityId: c.id,
351
662
  score: c.id === capability?.id ? llmConfidence : 0,