npm - capman - Versions diffs - 0.5.3 → 0.5.5 - Mend

capman 0.5.3 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

package/CHANGELOG.md +61 -0
package/CODEBASE.md +115 -65
package/README.md +45 -4
package/bin/lib/cmd-explain.js +2 -2
package/bin/lib/cmd-generate.js +44 -28
package/bin/lib/cmd-run.js +2 -2
package/bin/lib/shared.js +8 -2
package/dist/cjs/cache.d.ts.map +1 -1
package/dist/cjs/cache.js +22 -5
package/dist/cjs/cache.js.map +1 -1
package/dist/cjs/engine.d.ts +30 -0
package/dist/cjs/engine.d.ts.map +1 -1
package/dist/cjs/engine.js +87 -36
package/dist/cjs/engine.js.map +1 -1
package/dist/cjs/generator.d.ts.map +1 -1
package/dist/cjs/generator.js +7 -1
package/dist/cjs/generator.js.map +1 -1
package/dist/cjs/learning.d.ts.map +1 -1
package/dist/cjs/learning.js +39 -12
package/dist/cjs/learning.js.map +1 -1
package/dist/cjs/matcher.d.ts +18 -10
package/dist/cjs/matcher.d.ts.map +1 -1
package/dist/cjs/matcher.js +140 -29
package/dist/cjs/matcher.js.map +1 -1
package/dist/cjs/parser.d.ts.map +1 -1
package/dist/cjs/parser.js +15 -8
package/dist/cjs/parser.js.map +1 -1
package/dist/cjs/resolver.d.ts +1 -0
package/dist/cjs/resolver.d.ts.map +1 -1
package/dist/cjs/resolver.js +16 -5
package/dist/cjs/resolver.js.map +1 -1
package/dist/cjs/schema.d.ts +18 -18
package/dist/cjs/schema.js +1 -1
package/dist/cjs/schema.js.map +1 -1
package/dist/cjs/types.d.ts +1 -1
package/dist/cjs/types.d.ts.map +1 -1
package/dist/cjs/version.d.ts +1 -1
package/dist/cjs/version.js +1 -1
package/dist/esm/cache.js +22 -5
package/dist/esm/engine.d.ts +30 -0
package/dist/esm/engine.js +89 -38
package/dist/esm/generator.js +7 -1
package/dist/esm/learning.js +39 -12
package/dist/esm/matcher.d.ts +18 -10
package/dist/esm/matcher.js +137 -29
package/dist/esm/parser.js +15 -8
package/dist/esm/resolver.d.ts +1 -0
package/dist/esm/resolver.js +16 -6
package/dist/esm/schema.d.ts +18 -18
package/dist/esm/schema.js +1 -1
package/dist/esm/types.d.ts +1 -1
package/dist/esm/version.d.ts +1 -1
package/dist/esm/version.js +1 -1
package/package.json +11 -10

package/dist/esm/engine.js CHANGED Viewed

@@ -1,8 +1,8 @@
 import { match as _match, matchWithLLM as _matchWithLLM, resolverToIntent, extractParams, STOPWORDS, LLMParseError } from './matcher';
-import { resolve as _resolve } from './resolver';
+import { resolve as _resolve, checkPrivacy } from './resolver';
 import { MemoryLearningStore } from './learning';
 import { logger } from './logger';
-import { MemoryCache, normalizeQuery } from './cache';
+import { MemoryCache, normalizeQuery, buildCacheKey } from './cache';
 import { VERSION } from './version';
 // ─── CapmanEngine ─────────────────────────────────────────────────────────────
 export class CapmanEngine {
@@ -25,6 +25,8 @@ export class CapmanEngine {
         this.llmCooldownMs = options.llmCooldownMs ?? 0;
         this.llmCircuitBreakerThreshold = options.llmCircuitBreakerThreshold ?? 3;
         this.llmCircuitBreakerResetMs = options.llmCircuitBreakerResetMs ?? 60_000;
+        this.fuzzyMatch = options.fuzzyMatch ?? false;
+        this.fuzzyThreshold = options.fuzzyThreshold ?? 0.4;
         // Cache — default MemoryCache (no filesystem writes), or disabled with false
         // Use FileCache or ComboCache explicitly for persistence across restarts
         this.cache = options.cache === false
@@ -37,23 +39,7 @@ export class CapmanEngine {
             : (options.learning ?? new MemoryLearningStore());
         logger.info(`CapmanEngine initialized — mode: ${this.mode}, cache: ${this.cache ? 'enabled' : 'disabled'}, learning: ${this.learning ? 'enabled' : 'disabled'}`);
         // ── Manifest version compatibility check ─────────────────────────────────
-        if (options.manifest.version) {
-            const SEMVER_RE = /^\d+\.\d+\.\d+$/;
-            if (SEMVER_RE.test(options.manifest.version) && SEMVER_RE.test(VERSION)) {
-                const [mMaj, mMin] = options.manifest.version.split('.').map(Number);
-                const [eMaj, eMin] = VERSION.split('.').map(Number);
-                if (mMaj !== eMaj || mMin !== eMin) {
-                    console.warn(`[capman] Manifest version "${options.manifest.version}" was generated with a ` +
-                        `different engine version than "${VERSION}". This is usually fine across patch versions. ` +
-                        `If you experience unexpected matching issues, regenerate with: npx capman generate`);
-                }
-            }
-            else if (options.manifest.version !== VERSION) {
-                //console.warn is used instead of logger.warn to avoid the warning being logged to the console
-                console.warn(`[capman] Manifest version "${options.manifest.version}" could not be compared ` +
-                    `to engine version "${VERSION}" — version strings are not valid semver.`);
-            }
-        }
+        this.checkManifestVersion(options.manifest);
     }
     /**
      * Ask the engine a natural language query.
@@ -121,17 +107,20 @@ export class CapmanEngine {
         }
         // ── Step 2: Match ────────────────────────────────────────────────────────
         let { matchResult, resolvedVia } = await this._runMatch(query, steps);
-        const preBoostMatchResult = matchResult; // kept for learning recording only — prevents feedback loop
+        // Shallow copy with candidates slice — not a reference alias.
+        // applyBoostToMatchResult() returns a new object today, but an explicit
+        // copy makes the invariant clear and safe against future in-place mutation.
+        const preBoostMatchResult = { ...matchResult, candidates: matchResult.candidates.slice() };
         // ── Step 2.5: Apply learning boost ───────────────────────────────────────
-        matchResult = await this.applyBoostToMatchResult(query, matchResult);
+        matchResult = await this.applyBoostToMatchResult(query, matchResult, resolvedVia);
         // ── Step 3: Privacy check ────────────────────────────────────────────────
         if (matchResult.capability) {
-            const privacyLevel = matchResult.capability.privacy.level;
+            const privacyError = checkPrivacy(matchResult.capability, this.auth);
             steps.push({
                 type: 'privacy_check',
-                status: 'pass',
+                status: privacyError ? 'fail' : 'pass',
                 durationMs: 0,
-                detail: `level: ${privacyLevel}`,
+                detail: privacyError ?? `level: ${matchResult.capability.privacy.level}`,
             });
         }
         // ── Step 4: Resolve ──────────────────────────────────────────────────────
@@ -144,14 +133,17 @@ export class CapmanEngine {
             detail: resolution.error ?? `via ${resolution.resolverType}`,
         });
         // ── Step 5: Cache after successful resolution ────────────────────────────
-        // Only cache when resolution succeeded — a failed resolution (network error,
-        // auth failure, bad params) must not poison the cache. A cached failed match
-        // would cause every subsequent cache hit to attempt the same failing resolution
-        // until TTL expires.
+        // Write under two keys:
+        // 1. normalizeQuery — exact phrasing lookup for this query
+        // 2. buildCacheKey — semantic key (capability + params) so differently-phrased
+        //    queries that resolve to the same capability share a cache entry
         if (this.cache && resolution.success && matchResult.capability
             && matchResult.capability.privacy.level === 'public') {
             const queryKey = normalizeQuery(query);
+            const capKey = buildCacheKey(query, matchResult.capability.id, matchResult.extractedParams);
             await this.cache.set(queryKey, matchResult);
+            await this.cache.set(capKey, matchResult);
+            // capKey always starts with 'cap:' — structurally distinct from queryKey
         }
         // ── Step 6: Build reasoning array ────────────────────────────────────────
         const reasoning = [];
@@ -223,6 +215,45 @@ export class CapmanEngine {
         if (this.cache)
             await this.cache.clear();
     }
+    checkManifestVersion(manifest) {
+        if (!manifest.version)
+            return;
+        const SEMVER_RE = /^\d+\.\d+\.\d+$/;
+        if (SEMVER_RE.test(manifest.version) && SEMVER_RE.test(VERSION)) {
+            const [mMaj, mMin] = manifest.version.split('.').map(Number);
+            const [eMaj, eMin] = VERSION.split('.').map(Number);
+            if (mMaj !== eMaj || mMin !== eMin) {
+                console.warn(`[capman] Manifest version "${manifest.version}" was generated with a ` +
+                    `different engine version than "${VERSION}". This is usually fine across patch versions. ` +
+                    `If you experience unexpected matching issues, regenerate with: npx capman generate`);
+            }
+        }
+        else if (manifest.version !== VERSION) {
+            console.warn(`[capman] Manifest version "${manifest.version}" could not be compared ` +
+                `to engine version "${VERSION}" — version strings are not valid semver.`);
+        }
+    }
+    /**
+     * Replaces the active manifest without creating a new engine instance.
+     * Useful for hot-reloading manifests in long-running servers without
+     * losing cache, learning history, or rate limiter state.
+     *
+     * Note: clears the cache automatically — cached results from the old
+     * manifest are no longer valid after the manifest changes.
+     *
+     * @example
+     * const newManifest = generate(updatedConfig)
+     * await engine.loadManifest(newManifest)
+     */
+    async loadManifest(manifest) {
+        this.checkManifestVersion(manifest);
+        this.manifest = manifest;
+        await this.clearCache();
+        // Note: LLM rate limiter state (llmCallsThisMinute, llmConsecutiveFails,
+        // llmCircuitOpenAt) is intentionally preserved across manifest reloads.
+        // The LLM provider has not changed, so circuit breaker state remains valid.
+        // If you need a clean rate limiter state, create a new CapmanEngine instance.
+    }
     /**
      * Explain what would happen for a query — without executing it.
      * Shows matched capability, all candidate scores with reasoning,
@@ -258,8 +289,9 @@ export class CapmanEngine {
         }
         let resolvedVia = _resolvedVia;
         // ── Apply learning boost (same as ask()) ─────────────────────────────────
-        matchResult = await this.applyBoostToMatchResult(query, matchResult);
+        matchResult = await this.applyBoostToMatchResult(query, matchResult, resolvedVia);
         // ── Build candidate explanations ─────────────────────────────────────────
+        const qWordSet = new Set(query.toLowerCase().split(/\W+/).filter(Boolean));
         const candidates = matchResult.candidates
             .sort((a, b) => b.score - a.score)
             .map(c => {
@@ -272,10 +304,9 @@ export class CapmanEngine {
                 explanation = `Strong match (${c.score}%) — query closely matches examples`;
             }
             else if (c.score >= 50) {
-                const qWords = query.toLowerCase().split(/\W+/).filter(Boolean);
                 const matchedWords = (cap?.examples ?? [])
                     .flatMap(e => e.toLowerCase().split(/\s+/))
-                    .filter(w => qWords.includes(w) && w.length > 2);
+                    .filter(w => qWordSet.has(w) && w.length > 2);
                 const unique = [...new Set(matchedWords)].slice(0, 3);
                 explanation = unique.length
                     ? `Matched keywords: ${unique.join(', ')} (${c.score}%)`
@@ -421,8 +452,10 @@ export class CapmanEngine {
             this.llmCallsThisMinute = 0;
             this.llmWindowStart = now;
         }
+        if (this.maxLLMCallsPerMinute === 0) {
+            return 'LLM disabled — maxLLMCallsPerMinute is 0';
+        }
         if (this.llmCallsThisMinute >= this.maxLLMCallsPerMinute) {
-            // Recalculate elapsed after possible window reset above
             const resetIn = Math.ceil((60_000 - (now - this.llmWindowStart)) / 1000);
             return `rate limit reached (${this.maxLLMCallsPerMinute}/min) — resets in ${Math.max(0, resetIn)}s`;
         }
@@ -441,6 +474,10 @@ export class CapmanEngine {
      * Records a failed LLM call — may open the circuit breaker.
      */
     recordLLMFailure() {
+        // Refund the rate-limit slot — the call failed so it shouldn't count
+        // against the per-minute quota. Without this, sustained failures
+        // exhaust the limit prematurely and silently degrade to keyword-only.
+        this.llmCallsThisMinute = Math.max(0, this.llmCallsThisMinute - 1);
         this.llmConsecutiveFails++;
         if (this.llmConsecutiveFails >= this.llmCircuitBreakerThreshold) {
             this.llmCircuitOpenAt = Date.now();
@@ -455,6 +492,11 @@ export class CapmanEngine {
     async _runMatch(query, steps) {
         let matchResult;
         let resolvedVia = 'keyword';
+        // Fuzzy options — never applied in cheap mode
+        const fuzzyOpts = {
+            fuzzyMatch: this.fuzzyMatch,
+            fuzzyThreshold: this.fuzzyThreshold,
+        };
         switch (this.mode) {
             case 'cheap': {
                 const t = Date.now();
@@ -464,11 +506,13 @@ export class CapmanEngine {
             }
             case 'accurate': {
                 if (this.llm) {
+                    // Rate limiter shared between ask() and explain() — explain() counts
+                    // against the same quota since it makes real LLM calls.
                     const skipReason = this.checkLLMAllowed();
                     if (skipReason) {
                         logger.warn(`LLM skipped — ${skipReason} — falling back to keyword`);
                         const t = Date.now();
-                        matchResult = _match(query, this.manifest);
+                        matchResult = _match(query, this.manifest, fuzzyOpts);
                         steps?.push({ type: 'keyword_match', status: 'pass', durationMs: Date.now() - t, detail: `llm skipped: ${skipReason}` });
                     }
                     else {
@@ -478,7 +522,7 @@ export class CapmanEngine {
                             this.recordLLMSuccess();
                             resolvedVia = 'llm';
                             // Merge keyword scores into LLM candidates so boost has real signal for alternatives
-                            const kwResult = _match(query, this.manifest);
+                            const kwResult = _match(query, this.manifest, fuzzyOpts);
                             matchResult = {
                                 ...matchResult,
                                 candidates: matchResult.candidates.map(c => ({
@@ -496,7 +540,7 @@ export class CapmanEngine {
                                 this.recordLLMFailure();
                             logger.warn(`LLM call failed — falling back to keyword: ${err instanceof Error ? err.message : String(err)}`);
                             const t2 = Date.now();
-                            matchResult = _match(query, this.manifest);
+                            matchResult = _match(query, this.manifest, fuzzyOpts);
                             steps?.push({ type: 'llm_match', status: 'fail', durationMs: Date.now() - t, detail: String(err) });
                             steps?.push({ type: 'keyword_match', status: 'pass', durationMs: Date.now() - t2, detail: 'fallback after llm failure' });
                         }
@@ -505,7 +549,7 @@ export class CapmanEngine {
                 else {
                     logger.warn('accurate mode requires llm — falling back to keyword');
                     const t = Date.now();
-                    matchResult = _match(query, this.manifest);
+                    matchResult = _match(query, this.manifest, fuzzyOpts);
                     steps?.push({ type: 'keyword_match', status: 'pass', durationMs: Date.now() - t, detail: 'llm not provided, used keyword' });
                 }
                 break;
@@ -513,12 +557,14 @@ export class CapmanEngine {
             case 'balanced':
             default: {
                 const t1 = Date.now();
-                const keywordResult = _match(query, this.manifest);
+                const keywordResult = _match(query, this.manifest, fuzzyOpts);
                 steps?.push({ type: 'keyword_match', status: 'pass', durationMs: Date.now() - t1, detail: `confidence: ${keywordResult.confidence}%` });
                 if (keywordResult.confidence >= this.threshold || !this.llm) {
                     matchResult = keywordResult;
                 }
                 else {
+                    // Rate limiter shared between ask() and explain() — explain() counts
+                    // against the same quota since it makes real LLM calls.
                     const skipReason = this.checkLLMAllowed();
                     if (skipReason) {
                         logger.warn(`LLM skipped — ${skipReason}`);
@@ -564,7 +610,12 @@ export class CapmanEngine {
      * Applies learning boost to a MatchResult and returns the updated result.
      * Shared by ask() and explain() to avoid logic divergence.
      */
-    async applyBoostToMatchResult(query, matchResult) {
+    async applyBoostToMatchResult(query, matchResult, resolvedVia = 'keyword') {
+        // Skip boost when LLM matched with high confidence — learning signal is
+        // less reliable than a strong LLM result and could incorrectly override it.
+        // Threshold 80% leaves room for boost to help on borderline LLM matches.
+        if (resolvedVia === 'llm' && matchResult.confidence > 80)
+            return matchResult;
         const hasKeywordSignal = matchResult.candidates.some(c => c.score > 0);
         if (!hasKeywordSignal || matchResult.candidates.length === 0 || !this.learning || this.mode === 'cheap') {
             return matchResult;

package/dist/esm/generator.js CHANGED Viewed

@@ -73,7 +73,13 @@ export function loadConfig(configPath) {
         `Run: node bin/capman.js init  to create one.`);
 }
 export function writeManifest(manifest, outputPath = 'manifest.json') {
-    const resolved = path.resolve(process.cwd(), outputPath);
+    const cwd = process.cwd();
+    const resolved = path.resolve(cwd, outputPath);
+    const allowedPrefix = cwd === '/' ? '/' : cwd + path.sep;
+    if (!resolved.startsWith(allowedPrefix)) {
+        throw new Error(`writeManifest: output path "${outputPath}" resolves outside the working directory.\n` +
+            `Resolved: ${resolved}\nAllowed:  ${cwd}`);
+    }
     fs.writeFileSync(resolved, JSON.stringify(manifest, null, 2));
     return resolved;
 }

package/dist/esm/learning.js CHANGED Viewed

@@ -6,12 +6,38 @@ import { STOPWORDS } from './matcher';
 // Module-level registry — tracks all active FileLearningStore instances
 // for process exit flushing. Handlers registered once to avoid accumulation.
 const activeStores = new Set();
-let exitHandlersRegistered = false;
+// Module-level handler references — stored so they can be removed
+// when all stores are destroyed. Never call process.exit() in a library.
+let exitHandler = null;
+let sigTermHandler = null;
+let sigIntHandler = null;
 function flushAllStores() {
     for (const store of activeStores) {
         store.flushSync();
     }
 }
+function registerExitHandlers() {
+    if (exitHandler)
+        return; // already registered
+    exitHandler = flushAllStores;
+    sigTermHandler = flushAllStores;
+    sigIntHandler = flushAllStores;
+    process.on('exit', exitHandler);
+    process.on('SIGTERM', sigTermHandler);
+    process.on('SIGINT', sigIntHandler);
+}
+function unregisterExitHandlers() {
+    if (!exitHandler)
+        return; // nothing registered
+    if (activeStores.size > 0)
+        return; // other stores still active
+    process.off('exit', exitHandler);
+    process.off('SIGTERM', sigTermHandler);
+    process.off('SIGINT', sigIntHandler);
+    exitHandler = null;
+    sigTermHandler = null;
+    sigIntHandler = null;
+}
 // ─── Shared computation helpers ───────────────────────────────────────────────
 function computeTopCapabilities(entries, limit) {
     const counts = {};
@@ -120,12 +146,7 @@ export class FileLearningStore {
         this.filePath = resolved;
         logger.info(`FileLearningStore initialized — writing to: ${this.filePath}`);
         activeStores.add(this);
-        if (!exitHandlersRegistered) {
-            exitHandlersRegistered = true;
-            process.on('exit', flushAllStores);
-            process.on('SIGTERM', () => { flushAllStores(); process.exit(0); });
-            process.on('SIGINT', () => { flushAllStores(); process.exit(0); });
-        }
+        registerExitHandlers();
     }
     flushSync() {
         // Cancel pending timer — prevents scheduleSave firing after sync write
@@ -160,15 +181,17 @@ export class FileLearningStore {
         }
         if (this.dirty) {
             this.dirty = false;
-            // Await final flush before removing from registry —
-            // ensures data is written before the store becomes unreachable
             await this.save();
         }
         activeStores.delete(this);
+        unregisterExitHandlers(); // remove handlers if no stores remain
     }
     load() {
         if (!this.loadPromise) {
-            this.loadPromise = this._doLoad();
+            this.loadPromise = this._doLoad().catch(err => {
+                this.loadPromise = null; // allow retry on next call
+                throw err;
+            });
         }
         return this.loadPromise;
     }
@@ -185,8 +208,12 @@ export class FileLearningStore {
                 logger.warn(`Learning store at ${this.filePath} contained unexpected format — starting fresh`);
             }
         }
-        catch {
-            // File doesn't exist yet — start fresh
+        catch (err) {
+            const code = err.code;
+            if (code !== 'ENOENT') {
+                logger.warn(`Failed to load learning store from ${this.filePath} (${code ?? 'unknown error'}) — starting fresh`);
+            }
+            // ENOENT = file doesn't exist yet — expected on first run, no warning needed
         }
     }
     scheduleSave(urgencyMs = 5_000) {

package/dist/esm/matcher.d.ts CHANGED Viewed

@@ -11,23 +11,31 @@ export declare function resolverToIntent(cap: Capability): MatchResult['intent']
  * - Extracts single tokens only — "jane smith" would extract "jane"
  * - Keyword matching is positional — "articles from authors I follow"
  *   may extract "authors" instead of nothing, since "from" is a keyword
- * - For complex or ambiguous queries, use matchWithLLM() which handles
- *   param extraction more accurately via the LLM prompt
+ * - Required param fallback grabs the last meaningful word — "list all
+ *   recent orders" may extract "orders" even with the denylist extended.
+ *   For precise extraction of complex queries, use matchWithLLM() which
+ *   handles param extraction via structured LLM prompt.
+ * - To support richer extraction patterns, add a `pattern` field to
+ *   CapabilityParam in a future version.
  */
 export declare function extractParams(query: string, cap: Capability): Record<string, string | null>;
-export declare function match(query: string, manifest: Manifest): MatchResult;
+export interface MatchOptions {
+    fuzzyMatch?: boolean;
+    fuzzyThreshold?: number;
+}
+export declare function match(query: string, manifest: Manifest, options?: MatchOptions): MatchResult;
 export interface LLMMatcherOptions {
     llm: (prompt: string) => Promise<string>;
 }
 /**
  * Matches a query to a capability using an LLM.
  *
- * ⚠️  SECURITY NOTE: Capability `description` and `examples` fields from the
- * manifest are injected verbatim into the LLM prompt (system portion).
- * In a solo deployment with a developer-controlled manifest this is safe.
- * If your manifest is generated from third-party OpenAPI specs, user-controlled
- * sources, or any external input, sanitize `description` and `examples` fields
- * before passing the manifest to this function — adversarial content in those
- * fields can influence LLM routing decisions.
+ * ⚠️  SECURITY NOTE: Capability fields are sanitized before injection into
+ * the LLM prompt (newlines stripped, delimiters neutralized, length capped).
+ * However, the current interface passes a single prompt string — it cannot
+ * provide true system/user message separation that some LLM APIs support.
+ * For maximum injection resistance in high-security deployments, use an LLM
+ * wrapper that maps the prompt to a proper system message, keeping user query
+ * data in the user turn only.
  */
 export declare function matchWithLLM(query: string, manifest: Manifest, options: LLMMatcherOptions): Promise<MatchResult>;