npm - capman - Versions diffs - 0.5.4 → 0.6.0 - Mend

capman 0.5.4 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

package/CHANGELOG.md +46 -0
package/CODEBASE.md +111 -66
package/README.md +45 -4
package/bin/lib/cmd-generate.js +200 -40
package/bin/lib/cmd-help.js +3 -0
package/dist/cjs/cache.d.ts.map +1 -1
package/dist/cjs/cache.js +22 -5
package/dist/cjs/cache.js.map +1 -1
package/dist/cjs/engine.d.ts +53 -1
package/dist/cjs/engine.d.ts.map +1 -1
package/dist/cjs/engine.js +252 -17
package/dist/cjs/engine.js.map +1 -1
package/dist/cjs/generator.d.ts.map +1 -1
package/dist/cjs/generator.js +7 -1
package/dist/cjs/generator.js.map +1 -1
package/dist/cjs/index.d.ts +1 -0
package/dist/cjs/index.d.ts.map +1 -1
package/dist/cjs/index.js +3 -1
package/dist/cjs/index.js.map +1 -1
package/dist/cjs/learning.d.ts.map +1 -1
package/dist/cjs/learning.js +51 -30
package/dist/cjs/learning.js.map +1 -1
package/dist/cjs/matcher.d.ts +69 -9
package/dist/cjs/matcher.d.ts.map +1 -1
package/dist/cjs/matcher.js +328 -43
package/dist/cjs/matcher.js.map +1 -1
package/dist/cjs/parser.d.ts.map +1 -1
package/dist/cjs/parser.js +15 -8
package/dist/cjs/parser.js.map +1 -1
package/dist/cjs/resolver.d.ts +1 -0
package/dist/cjs/resolver.d.ts.map +1 -1
package/dist/cjs/resolver.js +16 -5
package/dist/cjs/resolver.js.map +1 -1
package/dist/cjs/schema.d.ts +64 -46
package/dist/cjs/schema.d.ts.map +1 -1
package/dist/cjs/schema.js +2 -1
package/dist/cjs/schema.js.map +1 -1
package/dist/cjs/types.d.ts +8 -2
package/dist/cjs/types.d.ts.map +1 -1
package/dist/cjs/version.d.ts +1 -1
package/dist/cjs/version.js +1 -1
package/dist/esm/cache.js +22 -5
package/dist/esm/engine.d.ts +53 -1
package/dist/esm/engine.js +255 -20
package/dist/esm/generator.js +7 -1
package/dist/esm/index.d.ts +1 -0
package/dist/esm/index.js +1 -0
package/dist/esm/learning.js +52 -31
package/dist/esm/matcher.d.ts +69 -9
package/dist/esm/matcher.js +321 -42
package/dist/esm/parser.js +15 -8
package/dist/esm/resolver.d.ts +1 -0
package/dist/esm/resolver.js +16 -6
package/dist/esm/schema.d.ts +64 -46
package/dist/esm/schema.js +2 -1
package/dist/esm/types.d.ts +8 -2
package/dist/esm/version.d.ts +1 -1
package/dist/esm/version.js +1 -1
package/package.json +1 -1

package/dist/esm/engine.js CHANGED Viewed

@@ -1,8 +1,8 @@
-import { match as _match, matchWithLLM as _matchWithLLM, resolverToIntent, extractParams, STOPWORDS, LLMParseError } from './matcher';
-import { resolve as _resolve } from './resolver';
+import { match as _match, matchWithLLM as _matchWithLLM, resolverToIntent, extractParams, LLMParseError, tokenize, buildBM25Index, scoreCapability as _scoreCapability, sanitizeForPrompt } from './matcher';
+import { resolve as _resolve, checkPrivacy } from './resolver';
 import { MemoryLearningStore } from './learning';
 import { logger } from './logger';
-import { MemoryCache, normalizeQuery } from './cache';
+import { MemoryCache, normalizeQuery, buildCacheKey } from './cache';
 import { VERSION } from './version';
 // ─── CapmanEngine ─────────────────────────────────────────────────────────────
 export class CapmanEngine {
@@ -27,6 +27,12 @@ export class CapmanEngine {
         this.llmCircuitBreakerResetMs = options.llmCircuitBreakerResetMs ?? 60_000;
         this.fuzzyMatch = options.fuzzyMatch ?? false;
         this.fuzzyThreshold = options.fuzzyThreshold ?? 0.4;
+        this.bm25K1 = options.bm25K1 ?? 1.5;
+        this.bm25B = options.bm25B ?? 0.75;
+        this.bm25Index = buildBM25Index(options.manifest.capabilities);
+        this.bm25Ceiling = this.calibrateBM25Ceiling();
+        this.marginAwareLLM = options.marginAwareLLM ?? false;
+        this.adaptiveMargin = options.adaptiveMarginOverride ?? this.calibrateAdaptiveMargin();
         // Cache — default MemoryCache (no filesystem writes), or disabled with false
         // Use FileCache or ComboCache explicitly for persistence across restarts
         this.cache = options.cache === false
@@ -90,12 +96,16 @@ export class CapmanEngine {
                     resolvedVia: 'cache',
                     totalMs: Date.now() - start,
                 };
+                const { verdict: cacheVerdict, margin: cacheMargin } = this.computeVerdict(matchWithFreshParams);
                 const result = {
                     match: matchWithFreshParams,
                     resolution,
                     resolvedVia: 'cache',
                     durationMs: Date.now() - start,
                     trace,
+                    verdict: cacheVerdict,
+                    margin: cacheMargin,
+                    missingParams: undefined
                 };
                 await this.recordLearning(query, matchWithFreshParams, 'cache');
                 return result;
@@ -107,20 +117,35 @@ export class CapmanEngine {
         }
         // ── Step 2: Match ────────────────────────────────────────────────────────
         let { matchResult, resolvedVia } = await this._runMatch(query, steps);
-        const preBoostMatchResult = matchResult; // kept for learning recording only — prevents feedback loop
+        // Shallow copy with candidates slice — not a reference alias.
+        // applyBoostToMatchResult() returns a new object today, but an explicit
+        // copy makes the invariant clear and safe against future in-place mutation.
+        const preBoostMatchResult = { ...matchResult, candidates: matchResult.candidates.slice() };
         // ── Step 2.5: Apply learning boost ───────────────────────────────────────
-        matchResult = await this.applyBoostToMatchResult(query, matchResult);
+        matchResult = await this.applyBoostToMatchResult(query, matchResult, resolvedVia);
         // ── Step 3: Privacy check ────────────────────────────────────────────────
         if (matchResult.capability) {
-            const privacyLevel = matchResult.capability.privacy.level;
+            const privacyError = checkPrivacy(matchResult.capability, this.auth);
             steps.push({
                 type: 'privacy_check',
-                status: 'pass',
+                status: privacyError ? 'fail' : 'pass',
                 durationMs: 0,
-                detail: `level: ${privacyLevel}`,
+                detail: privacyError ?? `level: ${matchResult.capability.privacy.level}`,
             });
         }
-        // ── Step 4: Resolve ──────────────────────────────────────────────────────
+        // ── Step 4a: Compute verdict + optional margin-aware LLM disambiguation ──
+        let { verdict, margin } = this.computeVerdict(matchResult);
+        if (verdict === 'marginal' &&
+            this.marginAwareLLM &&
+            this.llm &&
+            this.mode === 'balanced') {
+            matchResult = await this.disambiguateLLM(query, matchResult, steps);
+            // Recompute verdict after disambiguation
+            const recomputed = this.computeVerdict(matchResult);
+            verdict = recomputed.verdict;
+            margin = recomputed.margin;
+        }
+        // ── Step 4b: Resolve ──────────────────────────────────────────────────────
         const resolveStart = Date.now();
         const resolution = await _resolve(matchResult, matchResult.extractedParams, this.resolveOptions(overrides));
         steps.push({
@@ -130,14 +155,68 @@ export class CapmanEngine {
             detail: resolution.error ?? `via ${resolution.resolverType}`,
         });
         // ── Step 5: Cache after successful resolution ────────────────────────────
-        // Only cache when resolution succeeded — a failed resolution (network error,
-        // auth failure, bad params) must not poison the cache. A cached failed match
-        // would cause every subsequent cache hit to attempt the same failing resolution
-        // until TTL expires.
+        // Write under two keys:
+        // 1. normalizeQuery — exact phrasing lookup for this query
+        // 2. buildCacheKey — semantic key (capability + params) so differently-phrased
+        //    queries that resolve to the same capability share a cache entry
         if (this.cache && resolution.success && matchResult.capability
             && matchResult.capability.privacy.level === 'public') {
             const queryKey = normalizeQuery(query);
+            const capKey = buildCacheKey(query, matchResult.capability.id, matchResult.extractedParams);
             await this.cache.set(queryKey, matchResult);
+            await this.cache.set(capKey, matchResult);
+            // capKey always starts with 'cap:' — structurally distinct from queryKey
+        }
+        // ── Step 5b: Compute missingParams ───────────────────────────────────────
+        // Spec: LLM attempts extraction first when available. missingParams is last resort.
+        let missingParams;
+        if (matchResult.capability && resolvedVia !== 'llm') {
+            const cap = matchResult.capability;
+            const unresolved = cap.params.filter(p => p.source === 'user_query' && p.required
+                && matchResult.extractedParams[p.name] === null);
+            if (unresolved.length > 0 && this.llm && this.mode !== 'cheap') {
+                // LLM available — attempt targeted param extraction before declaring incomplete
+                const skipReason = this.checkLLMAllowed();
+                if (!skipReason) {
+                    try {
+                        const paramExtractionStart = Date.now();
+                        const paramDescriptions = unresolved
+                            .map(p => `- ${p.name}: ${p.description}`)
+                            .join('\n');
+                        const paramPrompt = `Extract the following parameters from this user query.\n` +
+                            `Query: ${JSON.stringify({ user_query: query })}\n\n` +
+                            `Parameters to extract:\n${paramDescriptions}\n\n` +
+                            `Respond ONLY with valid JSON: { "params": { "<name>": "<value or null>" } }`;
+                        const raw = await this.llm(paramPrompt);
+                        const clean = raw.replace(/```json|```/g, '').trim();
+                        const parsed = JSON.parse(clean);
+                        this.recordLLMSuccess();
+                        steps.push({
+                            type: 'llm_match',
+                            status: 'pass',
+                            durationMs: Date.now() - paramExtractionStart,
+                            detail: `param extraction: ${unresolved.map(p => p.name).join(', ')}`,
+                        });
+                        // Merge LLM-extracted values — validate type before accepting
+                        for (const p of unresolved) {
+                            const val = parsed?.params?.[p.name];
+                            if (val && typeof val === 'string' && val.trim().length > 0) {
+                                matchResult.extractedParams[p.name] = val.trim();
+                            }
+                        }
+                    }
+                    catch {
+                        // LLM param extraction failed — fall through to missingParams below
+                    }
+                }
+            }
+            // After LLM attempt (or if skipped/unavailable), report what's still missing
+            const stillMissing = cap.params
+                .filter(p => p.source === 'user_query' && p.required
+                && matchResult.extractedParams[p.name] === null)
+                .map(p => p.name);
+            if (stillMissing.length > 0)
+                missingParams = stillMissing;
         }
         // ── Step 6: Build reasoning array ────────────────────────────────────────
         const reasoning = [];
@@ -183,6 +262,9 @@ export class CapmanEngine {
             resolvedVia,
             durationMs: Date.now() - start,
             trace,
+            verdict,
+            margin,
+            missingParams,
         };
     }
     /**
@@ -242,6 +324,9 @@ export class CapmanEngine {
     async loadManifest(manifest) {
         this.checkManifestVersion(manifest);
         this.manifest = manifest;
+        this.bm25Index = buildBM25Index(manifest.capabilities);
+        this.bm25Ceiling = this.calibrateBM25Ceiling();
+        this.adaptiveMargin = this.calibrateAdaptiveMargin();
         await this.clearCache();
     }
     /**
@@ -279,8 +364,10 @@ export class CapmanEngine {
         }
         let resolvedVia = _resolvedVia;
         // ── Apply learning boost (same as ask()) ─────────────────────────────────
-        matchResult = await this.applyBoostToMatchResult(query, matchResult);
+        matchResult = await this.applyBoostToMatchResult(query, matchResult, resolvedVia);
         // ── Build candidate explanations ─────────────────────────────────────────
+        const qTokens = tokenize(query);
+        const qWordSet = new Set(qTokens);
         const candidates = matchResult.candidates
             .sort((a, b) => b.score - a.score)
             .map(c => {
@@ -293,10 +380,9 @@ export class CapmanEngine {
                 explanation = `Strong match (${c.score}%) — query closely matches examples`;
             }
             else if (c.score >= 50) {
-                const qWords = query.toLowerCase().split(/\W+/).filter(Boolean);
                 const matchedWords = (cap?.examples ?? [])
-                    .flatMap(e => e.toLowerCase().split(/\s+/))
-                    .filter(w => qWords.includes(w) && w.length > 2);
+                    .flatMap(e => tokenize(e))
+                    .filter(w => qWordSet.has(w));
                 const unique = [...new Set(matchedWords)].slice(0, 3);
                 explanation = unique.length
                     ? `Matched keywords: ${unique.join(', ')} (${c.score}%)`
@@ -442,8 +528,10 @@ export class CapmanEngine {
             this.llmCallsThisMinute = 0;
             this.llmWindowStart = now;
         }
+        if (this.maxLLMCallsPerMinute === 0) {
+            return 'LLM disabled — maxLLMCallsPerMinute is 0';
+        }
         if (this.llmCallsThisMinute >= this.maxLLMCallsPerMinute) {
-            // Recalculate elapsed after possible window reset above
             const resetIn = Math.ceil((60_000 - (now - this.llmWindowStart)) / 1000);
             return `rate limit reached (${this.maxLLMCallsPerMinute}/min) — resets in ${Math.max(0, resetIn)}s`;
         }
@@ -462,6 +550,10 @@ export class CapmanEngine {
      * Records a failed LLM call — may open the circuit breaker.
      */
     recordLLMFailure() {
+        // Refund the rate-limit slot — the call failed so it shouldn't count
+        // against the per-minute quota. Without this, sustained failures
+        // exhaust the limit prematurely and silently degrade to keyword-only.
+        this.llmCallsThisMinute = Math.max(0, this.llmCallsThisMinute - 1);
         this.llmConsecutiveFails++;
         if (this.llmConsecutiveFails >= this.llmCircuitBreakerThreshold) {
             this.llmCircuitOpenAt = Date.now();
@@ -480,6 +572,10 @@ export class CapmanEngine {
         const fuzzyOpts = {
             fuzzyMatch: this.fuzzyMatch,
             fuzzyThreshold: this.fuzzyThreshold,
+            bm25Index: this.bm25Index,
+            bm25Ceiling: this.bm25Ceiling,
+            bm25K1: this.bm25K1,
+            bm25B: this.bm25B,
         };
         switch (this.mode) {
             case 'cheap': {
@@ -490,6 +586,8 @@ export class CapmanEngine {
             }
             case 'accurate': {
                 if (this.llm) {
+                    // Rate limiter shared between ask() and explain() — explain() counts
+                    // against the same quota since it makes real LLM calls.
                     const skipReason = this.checkLLMAllowed();
                     if (skipReason) {
                         logger.warn(`LLM skipped — ${skipReason} — falling back to keyword`);
@@ -545,6 +643,8 @@ export class CapmanEngine {
                     matchResult = keywordResult;
                 }
                 else {
+                    // Rate limiter shared between ask() and explain() — explain() counts
+                    // against the same quota since it makes real LLM calls.
                     const skipReason = this.checkLLMAllowed();
                     if (skipReason) {
                         logger.warn(`LLM skipped — ${skipReason}`);
@@ -590,7 +690,12 @@ export class CapmanEngine {
      * Applies learning boost to a MatchResult and returns the updated result.
      * Shared by ask() and explain() to avoid logic divergence.
      */
-    async applyBoostToMatchResult(query, matchResult) {
+    async applyBoostToMatchResult(query, matchResult, resolvedVia = 'keyword') {
+        // Skip boost when LLM matched with high confidence — learning signal is
+        // less reliable than a strong LLM result and could incorrectly override it.
+        // Threshold 80% leaves room for boost to help on borderline LLM matches.
+        if (resolvedVia === 'llm' && matchResult.confidence > 80)
+            return matchResult;
         const hasKeywordSignal = matchResult.candidates.some(c => c.score > 0);
         if (!hasKeywordSignal || matchResult.candidates.length === 0 || !this.learning || this.mode === 'cheap') {
             return matchResult;
@@ -638,7 +743,7 @@ export class CapmanEngine {
         const stats = await this.learning.getStats();
         if (!stats || Object.keys(stats.index).length === 0)
             return candidates;
-        const qWords = query.toLowerCase().split(/\W+/).filter(w => w.length > 2 && !STOPWORDS.has(w));
+        const qWords = tokenize(query);
         if (qWords.length === 0)
             return candidates;
         return candidates.map(candidate => {
@@ -686,6 +791,136 @@ export class CapmanEngine {
             timestamp: new Date().toISOString(),
         });
     }
+    calibrateBM25Ceiling() {
+        let max = 0;
+        for (const cap of this.manifest.capabilities) {
+            if (!cap.examples?.length)
+                continue;
+            const selfWords = new Set(tokenize(cap.examples[0]));
+            const raw = _scoreCapability(selfWords, cap, this.bm25Index, this.bm25K1, this.bm25B);
+            if (raw > max)
+                max = raw;
+        }
+        return max > 0 ? max : 100;
+    }
+    /**
+     * Calibrates the adaptive margin threshold from the manifest's own score
+     * distribution. Runs each capability's first example against all other
+     * capabilities to find the typical inter-capability score spread.
+     * Dense overlapping vocabulary → lower margin (harder to separate).
+     * Sparse vocabulary → higher margin (easier to separate).
+     *
+     * Complexity: O(capabilities²) — runs at constructor time and on loadManifest().
+     * For manifests with ≤100 capabilities this is negligible (<10ms).
+     * For very large manifests (500+ capabilities), consider passing
+     * `adaptiveMarginOverride` to skip calibration.
+     */
+    calibrateAdaptiveMargin() {
+        if (this.manifest.capabilities.length < 2)
+            return 20;
+        const margins = [];
+        const fuzzyOpts = {
+            fuzzyMatch: false, // calibration uses keyword only — deterministic
+            bm25Index: this.bm25Index,
+            bm25Ceiling: this.bm25Ceiling,
+            bm25K1: this.bm25K1,
+            bm25B: this.bm25B,
+        };
+        for (const cap of this.manifest.capabilities) {
+            if (!cap.examples?.length)
+                continue;
+            const result = _match(cap.examples[0], this.manifest, fuzzyOpts);
+            const sorted = [...result.candidates].sort((a, b) => b.score - a.score);
+            if (sorted.length >= 2) {
+                margins.push(sorted[0].score - sorted[1].score);
+            }
+        }
+        if (margins.length === 0)
+            return 20;
+        // Use 25th percentile of margins as the threshold — manifests where
+        // capabilities are naturally close together get a tighter threshold
+        margins.sort((a, b) => a - b);
+        const p25 = margins[Math.floor(margins.length * 0.25)];
+        return Math.max(10, Math.min(30, Math.round(p25 * 0.6)));
+    }
+    computeVerdict(matchResult) {
+        if (!matchResult.capability)
+            return { verdict: 'uncertain', margin: 0 };
+        const sorted = [...matchResult.candidates].sort((a, b) => b.score - a.score);
+        const best = sorted[0]?.score ?? 0;
+        const second = sorted[1]?.score ?? 0;
+        const margin = best - second;
+        if (best < 60)
+            return { verdict: 'uncertain', margin };
+        if (margin < this.adaptiveMargin)
+            return { verdict: 'marginal', margin };
+        return { verdict: 'clear', margin };
+    }
+    /**
+       * Targeted disambiguation between top-2 candidates.
+       * Sends ~200 tokens instead of full manifest (~4000 tokens) — 93% cost reduction.
+       * Returns updated matchResult with LLM-preferred winner, or original on failure.
+       */
+    async disambiguateLLM(query, matchResult, steps) {
+        if (!this.llm)
+            return matchResult;
+        const sorted = [...matchResult.candidates]
+            .sort((a, b) => b.score - a.score)
+            .slice(0, 2);
+        if (sorted.length < 2)
+            return matchResult;
+        const capA = this.manifest.capabilities.find(c => c.id === sorted[0].capabilityId);
+        const capB = this.manifest.capabilities.find(c => c.id === sorted[1].capabilityId);
+        if (!capA || !capB)
+            return matchResult;
+        const skipReason = this.checkLLMAllowed();
+        if (skipReason) {
+            logger.warn(`Disambiguation LLM skipped — ${skipReason}`);
+            steps.push({ type: 'llm_match', status: 'skip', durationMs: 0, detail: `disambiguation skipped: ${skipReason}` });
+            return matchResult;
+        }
+        const prompt = `Two capabilities are close matches for this query. Pick the best one.
+  Query: ${JSON.stringify({ user_query: query })}
+  Option A: ${capA.id} — ${sanitizeForPrompt(capA.description, 150)}
+  Option B: ${capB.id} — ${sanitizeForPrompt(capB.description, 150)}
+  Respond ONLY with valid JSON:
+  { "winner": "<capability_id>", "confidence": <0-100>, "reasoning": "<one sentence>" }`;
+        const t = Date.now();
+        try {
+            const raw = await this.llm(prompt);
+            const clean = raw.replace(/```json|```/g, '').trim();
+            const parsed = JSON.parse(clean);
+            this.recordLLMSuccess();
+            const winner = this.manifest.capabilities.find(c => c.id === parsed.winner);
+            if (!winner) {
+                steps.push({ type: 'llm_match', status: 'fail', durationMs: Date.now() - t, detail: 'disambiguation returned unknown id' });
+                return matchResult;
+            }
+            steps.push({ type: 'llm_match', status: 'pass', durationMs: Date.now() - t, detail: `disambiguation: ${winner.id} (${parsed.confidence}%)` });
+            const confidence = typeof parsed.confidence === 'number' && !isNaN(parsed.confidence)
+                ? Math.min(100, Math.max(0, Math.round(parsed.confidence)))
+                : matchResult.confidence; // fallback to original if LLM returned bad value
+            return {
+                ...matchResult,
+                capability: winner,
+                confidence,
+                intent: resolverToIntent(winner),
+                extractedParams: extractParams(query, winner),
+                candidates: matchResult.candidates.map(c => ({ ...c, matched: c.capabilityId === winner.id })),
+                reasoning: parsed.reasoning ?? `Disambiguated to "${winner.id}"`,
+            };
+        }
+        catch (err) {
+            const isParseError = err instanceof LLMParseError;
+            if (!isParseError)
+                this.recordLLMFailure();
+            steps.push({ type: 'llm_match', status: 'fail', durationMs: Date.now() - t, detail: String(err) });
+            return matchResult;
+        }
+    }
 }
 /** Maximum allowed query length in characters. Queries exceeding this throw RangeError. */
 CapmanEngine.MAX_QUERY_LENGTH = 1000;

package/dist/esm/generator.js CHANGED Viewed

@@ -73,7 +73,13 @@ export function loadConfig(configPath) {
         `Run: node bin/capman.js init  to create one.`);
 }
 export function writeManifest(manifest, outputPath = 'manifest.json') {
-    const resolved = path.resolve(process.cwd(), outputPath);
+    const cwd = process.cwd();
+    const resolved = path.resolve(cwd, outputPath);
+    const allowedPrefix = cwd === '/' ? '/' : cwd + path.sep;
+    if (!resolved.startsWith(allowedPrefix)) {
+        throw new Error(`writeManifest: output path "${outputPath}" resolves outside the working directory.\n` +
+            `Resolved: ${resolved}\nAllowed:  ${cwd}`);
+    }
     fs.writeFileSync(resolved, JSON.stringify(manifest, null, 2));
     return resolved;
 }

package/dist/esm/index.d.ts CHANGED Viewed

@@ -5,6 +5,7 @@ export { generate, loadConfig, writeManifest, readManifest, validate, generateSt
 export { match, matchWithLLM, extractParams, } from './matcher';
 export { LLMParseError } from './matcher';
 export type { LLMMatcherOptions } from './matcher';
+export { TYPE_PATTERNS } from './matcher';
 export { resolve } from './resolver';
 export type { ResolveOptions, AuthContext } from './resolver';
 export { CapmanEngine } from './engine';

package/dist/esm/index.js CHANGED Viewed

@@ -2,6 +2,7 @@ export { setLogLevel } from './logger';
 export { generate, loadConfig, writeManifest, readManifest, validate, generateStarterConfig, } from './generator';
 export { match, matchWithLLM, extractParams, } from './matcher';
 export { LLMParseError } from './matcher';
+export { TYPE_PATTERNS } from './matcher';
 export { resolve } from './resolver';
 // ─── Engine (recommended API) ─────────────────────────────────────────────────
 export { CapmanEngine } from './engine';

package/dist/esm/learning.js CHANGED Viewed

@@ -2,16 +2,42 @@ import * as fs from 'fs';
 import * as path from 'path';
 import { logger } from './logger';
 const MAX_LEARNING_ENTRIES = 10_000;
-import { STOPWORDS } from './matcher';
+import { tokenize } from './matcher';
 // Module-level registry — tracks all active FileLearningStore instances
 // for process exit flushing. Handlers registered once to avoid accumulation.
 const activeStores = new Set();
-let exitHandlersRegistered = false;
+// Module-level handler references — stored so they can be removed
+// when all stores are destroyed. Never call process.exit() in a library.
+let exitHandler = null;
+let sigTermHandler = null;
+let sigIntHandler = null;
 function flushAllStores() {
     for (const store of activeStores) {
         store.flushSync();
     }
 }
+function registerExitHandlers() {
+    if (exitHandler)
+        return; // already registered
+    exitHandler = flushAllStores;
+    sigTermHandler = flushAllStores;
+    sigIntHandler = flushAllStores;
+    process.on('exit', exitHandler);
+    process.on('SIGTERM', sigTermHandler);
+    process.on('SIGINT', sigIntHandler);
+}
+function unregisterExitHandlers() {
+    if (!exitHandler)
+        return; // nothing registered
+    if (activeStores.size > 0)
+        return; // other stores still active
+    process.off('exit', exitHandler);
+    process.off('SIGTERM', sigTermHandler);
+    process.off('SIGINT', sigIntHandler);
+    exitHandler = null;
+    sigTermHandler = null;
+    sigIntHandler = null;
+}
 // ─── Shared computation helpers ───────────────────────────────────────────────
 function computeTopCapabilities(entries, limit) {
     const counts = {};
@@ -45,13 +71,15 @@ class LearningIndex {
         if (!entry.capabilityId)
             this.statsCounter.outOfScope++;
         if (entry.capabilityId) {
-            const words = entry.query.toLowerCase()
-                .split(/\W+/)
-                .filter(w => w.length > 2 && !STOPWORDS.has(w));
+            // Confidence-weighted contribution — a 95% match contributes 9.5×
+            // more signal than a 51% borderline match. Floor of 0.1 ensures
+            // borderline matches still contribute, just proportionally less.
+            const weight = Math.max(0.1, entry.confidence / 100);
+            const words = tokenize(entry.query);
             for (const word of words) {
                 this.index[word] ??= {};
                 this.index[word][entry.capabilityId] =
-                    (this.index[word][entry.capabilityId] ?? 0) + 1;
+                    (this.index[word][entry.capabilityId] ?? 0) + weight;
             }
         }
     }
@@ -67,14 +95,14 @@ class LearningIndex {
             return;
         }
         // Keyword index cleanup
-        const words = entry.query.toLowerCase()
-            .split(/\W+/)
-            .filter(w => w.length > 2 && !STOPWORDS.has(w));
+        const words = tokenize(entry.query);
         for (const word of words) {
             if (!this.index[word])
                 continue;
+            // Subtract estimated weight (0.5 average) — exact weight not stored.
+            // Minor drift on prune is acceptable; index is rebuilt when drift matters.
             this.index[word][entry.capabilityId] =
-                (this.index[word][entry.capabilityId] ?? 1) - 1;
+                (this.index[word][entry.capabilityId] ?? 0.5) - 0.5;
             if (this.index[word][entry.capabilityId] <= 0) {
                 delete this.index[word][entry.capabilityId];
             }
@@ -120,12 +148,7 @@ export class FileLearningStore {
         this.filePath = resolved;
         logger.info(`FileLearningStore initialized — writing to: ${this.filePath}`);
         activeStores.add(this);
-        if (!exitHandlersRegistered) {
-            exitHandlersRegistered = true;
-            process.on('exit', flushAllStores);
-            process.on('SIGTERM', () => { flushAllStores(); process.exit(0); });
-            process.on('SIGINT', () => { flushAllStores(); process.exit(0); });
-        }
+        registerExitHandlers();
     }
     flushSync() {
         // Cancel pending timer — prevents scheduleSave firing after sync write
@@ -160,15 +183,17 @@ export class FileLearningStore {
         }
         if (this.dirty) {
             this.dirty = false;
-            // Await final flush before removing from registry —
-            // ensures data is written before the store becomes unreachable
             await this.save();
         }
         activeStores.delete(this);
+        unregisterExitHandlers(); // remove handlers if no stores remain
     }
     load() {
         if (!this.loadPromise) {
-            this.loadPromise = this._doLoad();
+            this.loadPromise = this._doLoad().catch(err => {
+                this.loadPromise = null; // allow retry on next call
+                throw err;
+            });
         }
         return this.loadPromise;
     }
@@ -185,8 +210,12 @@ export class FileLearningStore {
                 logger.warn(`Learning store at ${this.filePath} contained unexpected format — starting fresh`);
             }
         }
-        catch {
-            // File doesn't exist yet — start fresh
+        catch (err) {
+            const code = err.code;
+            if (code !== 'ENOENT') {
+                logger.warn(`Failed to load learning store from ${this.filePath} (${code ?? 'unknown error'}) — starting fresh`);
+            }
+            // ENOENT = file doesn't exist yet — expected on first run, no warning needed
         }
     }
     scheduleSave(urgencyMs = 5_000) {
@@ -228,11 +257,7 @@ export class FileLearningStore {
         // not be persisted to disk under GDPR/CCPA data retention requirements.
         const sanitized = {
             ...entry,
-            query: entry.query
-                .toLowerCase()
-                .split(/\W+/)
-                .filter(w => w.length > 2 && !STOPWORDS.has(w))
-                .join(' '),
+            query: tokenize(entry.query).join(' '),
         };
         this.entries.push(sanitized);
         this.learningIndex.update(sanitized);
@@ -281,11 +306,7 @@ export class MemoryLearningStore {
     async record(entry) {
         const sanitized = {
             ...entry,
-            query: entry.query
-                .toLowerCase()
-                .split(/\W+/)
-                .filter(w => w.length > 2 && !STOPWORDS.has(w))
-                .join(' '),
+            query: tokenize(entry.query).join(' '),
         };
         this.entries.push(sanitized);
         this.learningIndex.update(sanitized);