capman 0.5.5 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +1 -1
- package/bin/lib/cmd-generate.js +156 -12
- package/bin/lib/cmd-help.js +3 -0
- package/dist/cjs/engine.d.ts +53 -1
- package/dist/cjs/engine.d.ts.map +1 -1
- package/dist/cjs/engine.js +219 -9
- package/dist/cjs/engine.js.map +1 -1
- package/dist/cjs/index.d.ts +1 -0
- package/dist/cjs/index.d.ts.map +1 -1
- package/dist/cjs/index.js +3 -1
- package/dist/cjs/index.js.map +1 -1
- package/dist/cjs/learning.d.ts.map +1 -1
- package/dist/cjs/learning.js +12 -18
- package/dist/cjs/learning.js.map +1 -1
- package/dist/cjs/matcher.d.ts +55 -0
- package/dist/cjs/matcher.d.ts.map +1 -1
- package/dist/cjs/matcher.js +267 -31
- package/dist/cjs/matcher.js.map +1 -1
- package/dist/cjs/schema.d.ts +46 -28
- package/dist/cjs/schema.d.ts.map +1 -1
- package/dist/cjs/schema.js +1 -0
- package/dist/cjs/schema.js.map +1 -1
- package/dist/cjs/types.d.ts +7 -1
- package/dist/cjs/types.d.ts.map +1 -1
- package/dist/esm/engine.d.ts +53 -1
- package/dist/esm/engine.js +220 -10
- package/dist/esm/index.d.ts +1 -0
- package/dist/esm/index.js +1 -0
- package/dist/esm/learning.js +13 -19
- package/dist/esm/matcher.d.ts +55 -0
- package/dist/esm/matcher.js +261 -31
- package/dist/esm/schema.d.ts +46 -28
- package/dist/esm/schema.js +1 -0
- package/dist/esm/types.d.ts +7 -1
- package/package.json +1 -1
package/dist/esm/engine.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { match as _match, matchWithLLM as _matchWithLLM, resolverToIntent, extractParams,
|
|
1
|
+
import { match as _match, matchWithLLM as _matchWithLLM, resolverToIntent, extractParams, LLMParseError, tokenize, buildBM25Index, scoreCapability as _scoreCapability, sanitizeForPrompt } from './matcher';
|
|
2
2
|
import { resolve as _resolve, checkPrivacy } from './resolver';
|
|
3
3
|
import { MemoryLearningStore } from './learning';
|
|
4
4
|
import { logger } from './logger';
|
|
@@ -27,6 +27,12 @@ export class CapmanEngine {
|
|
|
27
27
|
this.llmCircuitBreakerResetMs = options.llmCircuitBreakerResetMs ?? 60_000;
|
|
28
28
|
this.fuzzyMatch = options.fuzzyMatch ?? false;
|
|
29
29
|
this.fuzzyThreshold = options.fuzzyThreshold ?? 0.4;
|
|
30
|
+
this.bm25K1 = options.bm25K1 ?? 1.5;
|
|
31
|
+
this.bm25B = options.bm25B ?? 0.75;
|
|
32
|
+
this.bm25Index = buildBM25Index(options.manifest.capabilities);
|
|
33
|
+
this.bm25Ceiling = this.calibrateBM25Ceiling();
|
|
34
|
+
this.marginAwareLLM = options.marginAwareLLM ?? false;
|
|
35
|
+
this.adaptiveMargin = options.adaptiveMarginOverride ?? this.calibrateAdaptiveMargin();
|
|
30
36
|
// Cache — default MemoryCache (no filesystem writes), or disabled with false
|
|
31
37
|
// Use FileCache or ComboCache explicitly for persistence across restarts
|
|
32
38
|
this.cache = options.cache === false
|
|
@@ -90,12 +96,16 @@ export class CapmanEngine {
|
|
|
90
96
|
resolvedVia: 'cache',
|
|
91
97
|
totalMs: Date.now() - start,
|
|
92
98
|
};
|
|
99
|
+
const { verdict: cacheVerdict, margin: cacheMargin } = this.computeVerdict(matchWithFreshParams);
|
|
93
100
|
const result = {
|
|
94
101
|
match: matchWithFreshParams,
|
|
95
102
|
resolution,
|
|
96
103
|
resolvedVia: 'cache',
|
|
97
104
|
durationMs: Date.now() - start,
|
|
98
105
|
trace,
|
|
106
|
+
verdict: cacheVerdict,
|
|
107
|
+
margin: cacheMargin,
|
|
108
|
+
missingParams: undefined
|
|
99
109
|
};
|
|
100
110
|
await this.recordLearning(query, matchWithFreshParams, 'cache');
|
|
101
111
|
return result;
|
|
@@ -123,7 +133,19 @@ export class CapmanEngine {
|
|
|
123
133
|
detail: privacyError ?? `level: ${matchResult.capability.privacy.level}`,
|
|
124
134
|
});
|
|
125
135
|
}
|
|
126
|
-
// ── Step
|
|
136
|
+
// ── Step 4a: Compute verdict + optional margin-aware LLM disambiguation ──
|
|
137
|
+
let { verdict, margin } = this.computeVerdict(matchResult);
|
|
138
|
+
if (verdict === 'marginal' &&
|
|
139
|
+
this.marginAwareLLM &&
|
|
140
|
+
this.llm &&
|
|
141
|
+
this.mode === 'balanced') {
|
|
142
|
+
matchResult = await this.disambiguateLLM(query, matchResult, steps);
|
|
143
|
+
// Recompute verdict after disambiguation
|
|
144
|
+
const recomputed = this.computeVerdict(matchResult);
|
|
145
|
+
verdict = recomputed.verdict;
|
|
146
|
+
margin = recomputed.margin;
|
|
147
|
+
}
|
|
148
|
+
// ── Step 4b: Resolve ──────────────────────────────────────────────────────
|
|
127
149
|
const resolveStart = Date.now();
|
|
128
150
|
const resolution = await _resolve(matchResult, matchResult.extractedParams, this.resolveOptions(overrides));
|
|
129
151
|
steps.push({
|
|
@@ -145,6 +167,57 @@ export class CapmanEngine {
|
|
|
145
167
|
await this.cache.set(capKey, matchResult);
|
|
146
168
|
// capKey always starts with 'cap:' — structurally distinct from queryKey
|
|
147
169
|
}
|
|
170
|
+
// ── Step 5b: Compute missingParams ───────────────────────────────────────
|
|
171
|
+
// Spec: LLM attempts extraction first when available. missingParams is last resort.
|
|
172
|
+
let missingParams;
|
|
173
|
+
if (matchResult.capability && resolvedVia !== 'llm') {
|
|
174
|
+
const cap = matchResult.capability;
|
|
175
|
+
const unresolved = cap.params.filter(p => p.source === 'user_query' && p.required
|
|
176
|
+
&& matchResult.extractedParams[p.name] === null);
|
|
177
|
+
if (unresolved.length > 0 && this.llm && this.mode !== 'cheap') {
|
|
178
|
+
// LLM available — attempt targeted param extraction before declaring incomplete
|
|
179
|
+
const skipReason = this.checkLLMAllowed();
|
|
180
|
+
if (!skipReason) {
|
|
181
|
+
try {
|
|
182
|
+
const paramExtractionStart = Date.now();
|
|
183
|
+
const paramDescriptions = unresolved
|
|
184
|
+
.map(p => `- ${p.name}: ${p.description}`)
|
|
185
|
+
.join('\n');
|
|
186
|
+
const paramPrompt = `Extract the following parameters from this user query.\n` +
|
|
187
|
+
`Query: ${JSON.stringify({ user_query: query })}\n\n` +
|
|
188
|
+
`Parameters to extract:\n${paramDescriptions}\n\n` +
|
|
189
|
+
`Respond ONLY with valid JSON: { "params": { "<name>": "<value or null>" } }`;
|
|
190
|
+
const raw = await this.llm(paramPrompt);
|
|
191
|
+
const clean = raw.replace(/```json|```/g, '').trim();
|
|
192
|
+
const parsed = JSON.parse(clean);
|
|
193
|
+
this.recordLLMSuccess();
|
|
194
|
+
steps.push({
|
|
195
|
+
type: 'llm_match',
|
|
196
|
+
status: 'pass',
|
|
197
|
+
durationMs: Date.now() - paramExtractionStart,
|
|
198
|
+
detail: `param extraction: ${unresolved.map(p => p.name).join(', ')}`,
|
|
199
|
+
});
|
|
200
|
+
// Merge LLM-extracted values — validate type before accepting
|
|
201
|
+
for (const p of unresolved) {
|
|
202
|
+
const val = parsed?.params?.[p.name];
|
|
203
|
+
if (val && typeof val === 'string' && val.trim().length > 0) {
|
|
204
|
+
matchResult.extractedParams[p.name] = val.trim();
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
catch {
|
|
209
|
+
// LLM param extraction failed — fall through to missingParams below
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
// After LLM attempt (or if skipped/unavailable), report what's still missing
|
|
214
|
+
const stillMissing = cap.params
|
|
215
|
+
.filter(p => p.source === 'user_query' && p.required
|
|
216
|
+
&& matchResult.extractedParams[p.name] === null)
|
|
217
|
+
.map(p => p.name);
|
|
218
|
+
if (stillMissing.length > 0)
|
|
219
|
+
missingParams = stillMissing;
|
|
220
|
+
}
|
|
148
221
|
// ── Step 6: Build reasoning array ────────────────────────────────────────
|
|
149
222
|
const reasoning = [];
|
|
150
223
|
if (matchResult.candidates.length) {
|
|
@@ -189,6 +262,9 @@ export class CapmanEngine {
|
|
|
189
262
|
resolvedVia,
|
|
190
263
|
durationMs: Date.now() - start,
|
|
191
264
|
trace,
|
|
265
|
+
verdict,
|
|
266
|
+
margin,
|
|
267
|
+
missingParams,
|
|
192
268
|
};
|
|
193
269
|
}
|
|
194
270
|
/**
|
|
@@ -248,11 +324,10 @@ export class CapmanEngine {
|
|
|
248
324
|
async loadManifest(manifest) {
|
|
249
325
|
this.checkManifestVersion(manifest);
|
|
250
326
|
this.manifest = manifest;
|
|
327
|
+
this.bm25Index = buildBM25Index(manifest.capabilities);
|
|
328
|
+
this.bm25Ceiling = this.calibrateBM25Ceiling();
|
|
329
|
+
this.adaptiveMargin = this.calibrateAdaptiveMargin();
|
|
251
330
|
await this.clearCache();
|
|
252
|
-
// Note: LLM rate limiter state (llmCallsThisMinute, llmConsecutiveFails,
|
|
253
|
-
// llmCircuitOpenAt) is intentionally preserved across manifest reloads.
|
|
254
|
-
// The LLM provider has not changed, so circuit breaker state remains valid.
|
|
255
|
-
// If you need a clean rate limiter state, create a new CapmanEngine instance.
|
|
256
331
|
}
|
|
257
332
|
/**
|
|
258
333
|
* Explain what would happen for a query — without executing it.
|
|
@@ -291,7 +366,8 @@ export class CapmanEngine {
|
|
|
291
366
|
// ── Apply learning boost (same as ask()) ─────────────────────────────────
|
|
292
367
|
matchResult = await this.applyBoostToMatchResult(query, matchResult, resolvedVia);
|
|
293
368
|
// ── Build candidate explanations ─────────────────────────────────────────
|
|
294
|
-
const
|
|
369
|
+
const qTokens = tokenize(query);
|
|
370
|
+
const qWordSet = new Set(qTokens);
|
|
295
371
|
const candidates = matchResult.candidates
|
|
296
372
|
.sort((a, b) => b.score - a.score)
|
|
297
373
|
.map(c => {
|
|
@@ -305,8 +381,8 @@ export class CapmanEngine {
|
|
|
305
381
|
}
|
|
306
382
|
else if (c.score >= 50) {
|
|
307
383
|
const matchedWords = (cap?.examples ?? [])
|
|
308
|
-
.flatMap(e => e
|
|
309
|
-
.filter(w => qWordSet.has(w)
|
|
384
|
+
.flatMap(e => tokenize(e))
|
|
385
|
+
.filter(w => qWordSet.has(w));
|
|
310
386
|
const unique = [...new Set(matchedWords)].slice(0, 3);
|
|
311
387
|
explanation = unique.length
|
|
312
388
|
? `Matched keywords: ${unique.join(', ')} (${c.score}%)`
|
|
@@ -496,6 +572,10 @@ export class CapmanEngine {
|
|
|
496
572
|
const fuzzyOpts = {
|
|
497
573
|
fuzzyMatch: this.fuzzyMatch,
|
|
498
574
|
fuzzyThreshold: this.fuzzyThreshold,
|
|
575
|
+
bm25Index: this.bm25Index,
|
|
576
|
+
bm25Ceiling: this.bm25Ceiling,
|
|
577
|
+
bm25K1: this.bm25K1,
|
|
578
|
+
bm25B: this.bm25B,
|
|
499
579
|
};
|
|
500
580
|
switch (this.mode) {
|
|
501
581
|
case 'cheap': {
|
|
@@ -663,7 +743,7 @@ export class CapmanEngine {
|
|
|
663
743
|
const stats = await this.learning.getStats();
|
|
664
744
|
if (!stats || Object.keys(stats.index).length === 0)
|
|
665
745
|
return candidates;
|
|
666
|
-
const qWords = query
|
|
746
|
+
const qWords = tokenize(query);
|
|
667
747
|
if (qWords.length === 0)
|
|
668
748
|
return candidates;
|
|
669
749
|
return candidates.map(candidate => {
|
|
@@ -711,6 +791,136 @@ export class CapmanEngine {
|
|
|
711
791
|
timestamp: new Date().toISOString(),
|
|
712
792
|
});
|
|
713
793
|
}
|
|
794
|
+
calibrateBM25Ceiling() {
|
|
795
|
+
let max = 0;
|
|
796
|
+
for (const cap of this.manifest.capabilities) {
|
|
797
|
+
if (!cap.examples?.length)
|
|
798
|
+
continue;
|
|
799
|
+
const selfWords = new Set(tokenize(cap.examples[0]));
|
|
800
|
+
const raw = _scoreCapability(selfWords, cap, this.bm25Index, this.bm25K1, this.bm25B);
|
|
801
|
+
if (raw > max)
|
|
802
|
+
max = raw;
|
|
803
|
+
}
|
|
804
|
+
return max > 0 ? max : 100;
|
|
805
|
+
}
|
|
806
|
+
/**
|
|
807
|
+
* Calibrates the adaptive margin threshold from the manifest's own score
|
|
808
|
+
* distribution. Runs each capability's first example against all other
|
|
809
|
+
* capabilities to find the typical inter-capability score spread.
|
|
810
|
+
* Dense overlapping vocabulary → lower margin (harder to separate).
|
|
811
|
+
* Sparse vocabulary → higher margin (easier to separate).
|
|
812
|
+
*
|
|
813
|
+
* Complexity: O(capabilities²) — runs at constructor time and on loadManifest().
|
|
814
|
+
* For manifests with ≤100 capabilities this is negligible (<10ms).
|
|
815
|
+
* For very large manifests (500+ capabilities), consider passing
|
|
816
|
+
* `adaptiveMarginOverride` to skip calibration.
|
|
817
|
+
*/
|
|
818
|
+
calibrateAdaptiveMargin() {
|
|
819
|
+
if (this.manifest.capabilities.length < 2)
|
|
820
|
+
return 20;
|
|
821
|
+
const margins = [];
|
|
822
|
+
const fuzzyOpts = {
|
|
823
|
+
fuzzyMatch: false, // calibration uses keyword only — deterministic
|
|
824
|
+
bm25Index: this.bm25Index,
|
|
825
|
+
bm25Ceiling: this.bm25Ceiling,
|
|
826
|
+
bm25K1: this.bm25K1,
|
|
827
|
+
bm25B: this.bm25B,
|
|
828
|
+
};
|
|
829
|
+
for (const cap of this.manifest.capabilities) {
|
|
830
|
+
if (!cap.examples?.length)
|
|
831
|
+
continue;
|
|
832
|
+
const result = _match(cap.examples[0], this.manifest, fuzzyOpts);
|
|
833
|
+
const sorted = [...result.candidates].sort((a, b) => b.score - a.score);
|
|
834
|
+
if (sorted.length >= 2) {
|
|
835
|
+
margins.push(sorted[0].score - sorted[1].score);
|
|
836
|
+
}
|
|
837
|
+
}
|
|
838
|
+
if (margins.length === 0)
|
|
839
|
+
return 20;
|
|
840
|
+
// Use 25th percentile of margins as the threshold — manifests where
|
|
841
|
+
// capabilities are naturally close together get a tighter threshold
|
|
842
|
+
margins.sort((a, b) => a - b);
|
|
843
|
+
const p25 = margins[Math.floor(margins.length * 0.25)];
|
|
844
|
+
return Math.max(10, Math.min(30, Math.round(p25 * 0.6)));
|
|
845
|
+
}
|
|
846
|
+
computeVerdict(matchResult) {
|
|
847
|
+
if (!matchResult.capability)
|
|
848
|
+
return { verdict: 'uncertain', margin: 0 };
|
|
849
|
+
const sorted = [...matchResult.candidates].sort((a, b) => b.score - a.score);
|
|
850
|
+
const best = sorted[0]?.score ?? 0;
|
|
851
|
+
const second = sorted[1]?.score ?? 0;
|
|
852
|
+
const margin = best - second;
|
|
853
|
+
if (best < 60)
|
|
854
|
+
return { verdict: 'uncertain', margin };
|
|
855
|
+
if (margin < this.adaptiveMargin)
|
|
856
|
+
return { verdict: 'marginal', margin };
|
|
857
|
+
return { verdict: 'clear', margin };
|
|
858
|
+
}
|
|
859
|
+
/**
|
|
860
|
+
* Targeted disambiguation between top-2 candidates.
|
|
861
|
+
* Sends ~200 tokens instead of full manifest (~4000 tokens) — 93% cost reduction.
|
|
862
|
+
* Returns updated matchResult with LLM-preferred winner, or original on failure.
|
|
863
|
+
*/
|
|
864
|
+
async disambiguateLLM(query, matchResult, steps) {
|
|
865
|
+
if (!this.llm)
|
|
866
|
+
return matchResult;
|
|
867
|
+
const sorted = [...matchResult.candidates]
|
|
868
|
+
.sort((a, b) => b.score - a.score)
|
|
869
|
+
.slice(0, 2);
|
|
870
|
+
if (sorted.length < 2)
|
|
871
|
+
return matchResult;
|
|
872
|
+
const capA = this.manifest.capabilities.find(c => c.id === sorted[0].capabilityId);
|
|
873
|
+
const capB = this.manifest.capabilities.find(c => c.id === sorted[1].capabilityId);
|
|
874
|
+
if (!capA || !capB)
|
|
875
|
+
return matchResult;
|
|
876
|
+
const skipReason = this.checkLLMAllowed();
|
|
877
|
+
if (skipReason) {
|
|
878
|
+
logger.warn(`Disambiguation LLM skipped — ${skipReason}`);
|
|
879
|
+
steps.push({ type: 'llm_match', status: 'skip', durationMs: 0, detail: `disambiguation skipped: ${skipReason}` });
|
|
880
|
+
return matchResult;
|
|
881
|
+
}
|
|
882
|
+
const prompt = `Two capabilities are close matches for this query. Pick the best one.
|
|
883
|
+
|
|
884
|
+
Query: ${JSON.stringify({ user_query: query })}
|
|
885
|
+
|
|
886
|
+
Option A: ${capA.id} — ${sanitizeForPrompt(capA.description, 150)}
|
|
887
|
+
Option B: ${capB.id} — ${sanitizeForPrompt(capB.description, 150)}
|
|
888
|
+
|
|
889
|
+
Respond ONLY with valid JSON:
|
|
890
|
+
{ "winner": "<capability_id>", "confidence": <0-100>, "reasoning": "<one sentence>" }`;
|
|
891
|
+
const t = Date.now();
|
|
892
|
+
try {
|
|
893
|
+
const raw = await this.llm(prompt);
|
|
894
|
+
const clean = raw.replace(/```json|```/g, '').trim();
|
|
895
|
+
const parsed = JSON.parse(clean);
|
|
896
|
+
this.recordLLMSuccess();
|
|
897
|
+
const winner = this.manifest.capabilities.find(c => c.id === parsed.winner);
|
|
898
|
+
if (!winner) {
|
|
899
|
+
steps.push({ type: 'llm_match', status: 'fail', durationMs: Date.now() - t, detail: 'disambiguation returned unknown id' });
|
|
900
|
+
return matchResult;
|
|
901
|
+
}
|
|
902
|
+
steps.push({ type: 'llm_match', status: 'pass', durationMs: Date.now() - t, detail: `disambiguation: ${winner.id} (${parsed.confidence}%)` });
|
|
903
|
+
const confidence = typeof parsed.confidence === 'number' && !isNaN(parsed.confidence)
|
|
904
|
+
? Math.min(100, Math.max(0, Math.round(parsed.confidence)))
|
|
905
|
+
: matchResult.confidence; // fallback to original if LLM returned bad value
|
|
906
|
+
return {
|
|
907
|
+
...matchResult,
|
|
908
|
+
capability: winner,
|
|
909
|
+
confidence,
|
|
910
|
+
intent: resolverToIntent(winner),
|
|
911
|
+
extractedParams: extractParams(query, winner),
|
|
912
|
+
candidates: matchResult.candidates.map(c => ({ ...c, matched: c.capabilityId === winner.id })),
|
|
913
|
+
reasoning: parsed.reasoning ?? `Disambiguated to "${winner.id}"`,
|
|
914
|
+
};
|
|
915
|
+
}
|
|
916
|
+
catch (err) {
|
|
917
|
+
const isParseError = err instanceof LLMParseError;
|
|
918
|
+
if (!isParseError)
|
|
919
|
+
this.recordLLMFailure();
|
|
920
|
+
steps.push({ type: 'llm_match', status: 'fail', durationMs: Date.now() - t, detail: String(err) });
|
|
921
|
+
return matchResult;
|
|
922
|
+
}
|
|
923
|
+
}
|
|
714
924
|
}
|
|
715
925
|
/** Maximum allowed query length in characters. Queries exceeding this throw RangeError. */
|
|
716
926
|
CapmanEngine.MAX_QUERY_LENGTH = 1000;
|
package/dist/esm/index.d.ts
CHANGED
|
@@ -5,6 +5,7 @@ export { generate, loadConfig, writeManifest, readManifest, validate, generateSt
|
|
|
5
5
|
export { match, matchWithLLM, extractParams, } from './matcher';
|
|
6
6
|
export { LLMParseError } from './matcher';
|
|
7
7
|
export type { LLMMatcherOptions } from './matcher';
|
|
8
|
+
export { TYPE_PATTERNS } from './matcher';
|
|
8
9
|
export { resolve } from './resolver';
|
|
9
10
|
export type { ResolveOptions, AuthContext } from './resolver';
|
|
10
11
|
export { CapmanEngine } from './engine';
|
package/dist/esm/index.js
CHANGED
|
@@ -2,6 +2,7 @@ export { setLogLevel } from './logger';
|
|
|
2
2
|
export { generate, loadConfig, writeManifest, readManifest, validate, generateStarterConfig, } from './generator';
|
|
3
3
|
export { match, matchWithLLM, extractParams, } from './matcher';
|
|
4
4
|
export { LLMParseError } from './matcher';
|
|
5
|
+
export { TYPE_PATTERNS } from './matcher';
|
|
5
6
|
export { resolve } from './resolver';
|
|
6
7
|
// ─── Engine (recommended API) ─────────────────────────────────────────────────
|
|
7
8
|
export { CapmanEngine } from './engine';
|
package/dist/esm/learning.js
CHANGED
|
@@ -2,7 +2,7 @@ import * as fs from 'fs';
|
|
|
2
2
|
import * as path from 'path';
|
|
3
3
|
import { logger } from './logger';
|
|
4
4
|
const MAX_LEARNING_ENTRIES = 10_000;
|
|
5
|
-
import {
|
|
5
|
+
import { tokenize } from './matcher';
|
|
6
6
|
// Module-level registry — tracks all active FileLearningStore instances
|
|
7
7
|
// for process exit flushing. Handlers registered once to avoid accumulation.
|
|
8
8
|
const activeStores = new Set();
|
|
@@ -71,13 +71,15 @@ class LearningIndex {
|
|
|
71
71
|
if (!entry.capabilityId)
|
|
72
72
|
this.statsCounter.outOfScope++;
|
|
73
73
|
if (entry.capabilityId) {
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
74
|
+
// Confidence-weighted contribution — a 95% match contributes 9.5×
|
|
75
|
+
// more signal than a 51% borderline match. Floor of 0.1 ensures
|
|
76
|
+
// borderline matches still contribute, just proportionally less.
|
|
77
|
+
const weight = Math.max(0.1, entry.confidence / 100);
|
|
78
|
+
const words = tokenize(entry.query);
|
|
77
79
|
for (const word of words) {
|
|
78
80
|
this.index[word] ??= {};
|
|
79
81
|
this.index[word][entry.capabilityId] =
|
|
80
|
-
(this.index[word][entry.capabilityId] ?? 0) +
|
|
82
|
+
(this.index[word][entry.capabilityId] ?? 0) + weight;
|
|
81
83
|
}
|
|
82
84
|
}
|
|
83
85
|
}
|
|
@@ -93,14 +95,14 @@ class LearningIndex {
|
|
|
93
95
|
return;
|
|
94
96
|
}
|
|
95
97
|
// Keyword index cleanup
|
|
96
|
-
const words = entry.query
|
|
97
|
-
.split(/\W+/)
|
|
98
|
-
.filter(w => w.length > 2 && !STOPWORDS.has(w));
|
|
98
|
+
const words = tokenize(entry.query);
|
|
99
99
|
for (const word of words) {
|
|
100
100
|
if (!this.index[word])
|
|
101
101
|
continue;
|
|
102
|
+
// Subtract estimated weight (0.5 average) — exact weight not stored.
|
|
103
|
+
// Minor drift on prune is acceptable; index is rebuilt when drift matters.
|
|
102
104
|
this.index[word][entry.capabilityId] =
|
|
103
|
-
(this.index[word][entry.capabilityId] ??
|
|
105
|
+
(this.index[word][entry.capabilityId] ?? 0.5) - 0.5;
|
|
104
106
|
if (this.index[word][entry.capabilityId] <= 0) {
|
|
105
107
|
delete this.index[word][entry.capabilityId];
|
|
106
108
|
}
|
|
@@ -255,11 +257,7 @@ export class FileLearningStore {
|
|
|
255
257
|
// not be persisted to disk under GDPR/CCPA data retention requirements.
|
|
256
258
|
const sanitized = {
|
|
257
259
|
...entry,
|
|
258
|
-
query: entry.query
|
|
259
|
-
.toLowerCase()
|
|
260
|
-
.split(/\W+/)
|
|
261
|
-
.filter(w => w.length > 2 && !STOPWORDS.has(w))
|
|
262
|
-
.join(' '),
|
|
260
|
+
query: tokenize(entry.query).join(' '),
|
|
263
261
|
};
|
|
264
262
|
this.entries.push(sanitized);
|
|
265
263
|
this.learningIndex.update(sanitized);
|
|
@@ -308,11 +306,7 @@ export class MemoryLearningStore {
|
|
|
308
306
|
async record(entry) {
|
|
309
307
|
const sanitized = {
|
|
310
308
|
...entry,
|
|
311
|
-
query: entry.query
|
|
312
|
-
.toLowerCase()
|
|
313
|
-
.split(/\W+/)
|
|
314
|
-
.filter(w => w.length > 2 && !STOPWORDS.has(w))
|
|
315
|
-
.join(' '),
|
|
309
|
+
query: tokenize(entry.query).join(' '),
|
|
316
310
|
};
|
|
317
311
|
this.entries.push(sanitized);
|
|
318
312
|
this.learningIndex.update(sanitized);
|
package/dist/esm/matcher.d.ts
CHANGED
|
@@ -3,7 +3,58 @@ export declare class LLMParseError extends Error {
|
|
|
3
3
|
constructor(message: string);
|
|
4
4
|
}
|
|
5
5
|
export declare const STOPWORDS: Set<string>;
|
|
6
|
+
/**
|
|
7
|
+
* Regex patterns for common param types.
|
|
8
|
+
* Used when a CapabilityParam has `pattern` set to a named type.
|
|
9
|
+
*/
|
|
10
|
+
export declare const TYPE_PATTERNS: Record<string, RegExp>;
|
|
11
|
+
/**
|
|
12
|
+
* Simplified suffix-stripping stemmer — 10 most common English morphological
|
|
13
|
+
* patterns covering ~80% of benefit at ~25% the complexity of Porter stemmer.
|
|
14
|
+
* Applied symmetrically to both query words and capability index words.
|
|
15
|
+
*/
|
|
16
|
+
export declare function stem(word: string): string;
|
|
17
|
+
/**
|
|
18
|
+
* Shared tokenizer — used by scorer, learning index, and boost system.
|
|
19
|
+
* Applies stopword filtering AND stemming symmetrically.
|
|
20
|
+
* Any site that tokenizes text for matching MUST use this function
|
|
21
|
+
* to avoid silent mismatches between query and index tokens.
|
|
22
|
+
*/
|
|
23
|
+
export declare function tokenize(text: string): string[];
|
|
24
|
+
export interface BM25Index {
|
|
25
|
+
/** Document frequency — how many capabilities contain each term */
|
|
26
|
+
df: Record<string, number>;
|
|
27
|
+
/** Average field length per field type */
|
|
28
|
+
avgdl: {
|
|
29
|
+
examples: number;
|
|
30
|
+
description: number;
|
|
31
|
+
name: number;
|
|
32
|
+
};
|
|
33
|
+
/** Total number of capabilities */
|
|
34
|
+
N: number;
|
|
35
|
+
/** Bigram sets per capability — post-stopword, post-stem, examples only */
|
|
36
|
+
bigrams: Record<string, Set<string>>;
|
|
37
|
+
}
|
|
38
|
+
/** Build a BM25 index over all capabilities. Call once at manifest load. */
|
|
39
|
+
export declare function buildBM25Index(capabilities: Capability[]): BM25Index;
|
|
40
|
+
/**
|
|
41
|
+
* BM25 scoring with field weights.
|
|
42
|
+
* k1 = 1.5 (TF saturation), b = 0.75 (length normalization)
|
|
43
|
+
* Field weights: examples 0.6, description 0.3, name 0.1
|
|
44
|
+
*/
|
|
45
|
+
export declare function scoreCapability(qWordSet: Set<string>, cap: Capability, index: BM25Index, k1?: number, b?: number): number;
|
|
46
|
+
/**
|
|
47
|
+
* Extracts bigrams from a token array as "token1__token2" strings.
|
|
48
|
+
* Input must already be post-stopword and post-stem (use tokenize() first).
|
|
49
|
+
*/
|
|
50
|
+
export declare function extractBigrams(tokens: string[]): Set<string>;
|
|
6
51
|
export declare function resolverToIntent(cap: Capability): MatchResult['intent'];
|
|
52
|
+
/**
|
|
53
|
+
* Strips characters that could break LLM prompt structure from
|
|
54
|
+
* capability field values before injection into the system prompt.
|
|
55
|
+
* Removes control characters, newlines, and delimiter-like sequences.
|
|
56
|
+
*/
|
|
57
|
+
export declare function sanitizeForPrompt(value: string, maxLen: number): string;
|
|
7
58
|
/**
|
|
8
59
|
* Extracts parameter values from a user query using keyword heuristics.
|
|
9
60
|
*
|
|
@@ -22,6 +73,10 @@ export declare function extractParams(query: string, cap: Capability): Record<st
|
|
|
22
73
|
export interface MatchOptions {
|
|
23
74
|
fuzzyMatch?: boolean;
|
|
24
75
|
fuzzyThreshold?: number;
|
|
76
|
+
bm25Index?: BM25Index;
|
|
77
|
+
bm25K1?: number;
|
|
78
|
+
bm25B?: number;
|
|
79
|
+
bm25Ceiling?: number;
|
|
25
80
|
}
|
|
26
81
|
export declare function match(query: string, manifest: Manifest, options?: MatchOptions): MatchResult;
|
|
27
82
|
export interface LLMMatcherOptions {
|