npm - @seanhogg/builderforce-memory - Versions diffs - 2026.6.18 - Mend

@seanhogg/builderforce-memory 2026.6.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (163) hide show

package/LICENSE +21 -0
package/README.md +582 -0
package/dist/agent/SSMAgent.d.ts +146 -0
package/dist/agent/SSMAgent.d.ts.map +1 -0
package/dist/agent/SSMAgent.js +231 -0
package/dist/agent/SSMAgent.js.map +1 -0
package/dist/agent/index.d.ts +3 -0
package/dist/agent/index.d.ts.map +1 -0
package/dist/agent/index.js +2 -0
package/dist/agent/index.js.map +1 -0
package/dist/bridges/AnthropicBridge.d.ts +47 -0
package/dist/bridges/AnthropicBridge.d.ts.map +1 -0
package/dist/bridges/AnthropicBridge.js +120 -0
package/dist/bridges/AnthropicBridge.js.map +1 -0
package/dist/bridges/CachingBridge.d.ts +44 -0
package/dist/bridges/CachingBridge.d.ts.map +1 -0
package/dist/bridges/CachingBridge.js +62 -0
package/dist/bridges/CachingBridge.js.map +1 -0
package/dist/bridges/FetchBridge.d.ts +30 -0
package/dist/bridges/FetchBridge.d.ts.map +1 -0
package/dist/bridges/FetchBridge.js +24 -0
package/dist/bridges/FetchBridge.js.map +1 -0
package/dist/bridges/OpenAIBridge.d.ts +33 -0
package/dist/bridges/OpenAIBridge.d.ts.map +1 -0
package/dist/bridges/OpenAIBridge.js +110 -0
package/dist/bridges/OpenAIBridge.js.map +1 -0
package/dist/bridges/ResponseCache.d.ts +65 -0
package/dist/bridges/ResponseCache.d.ts.map +1 -0
package/dist/bridges/ResponseCache.js +97 -0
package/dist/bridges/ResponseCache.js.map +1 -0
package/dist/bridges/SemanticCachingBridge.d.ts +31 -0
package/dist/bridges/SemanticCachingBridge.d.ts.map +1 -0
package/dist/bridges/SemanticCachingBridge.js +44 -0
package/dist/bridges/SemanticCachingBridge.js.map +1 -0
package/dist/bridges/TransformerBridge.d.ts +35 -0
package/dist/bridges/TransformerBridge.d.ts.map +1 -0
package/dist/bridges/TransformerBridge.js +10 -0
package/dist/bridges/TransformerBridge.js.map +1 -0
package/dist/bridges/index.d.ts +14 -0
package/dist/bridges/index.d.ts.map +1 -0
package/dist/bridges/index.js +7 -0
package/dist/bridges/index.js.map +1 -0
package/dist/cache/FetchSemanticCacheBackend.d.ts +40 -0
package/dist/cache/FetchSemanticCacheBackend.d.ts.map +1 -0
package/dist/cache/FetchSemanticCacheBackend.js +61 -0
package/dist/cache/FetchSemanticCacheBackend.js.map +1 -0
package/dist/cache/SemanticCache.d.ts +105 -0
package/dist/cache/SemanticCache.d.ts.map +1 -0
package/dist/cache/SemanticCache.js +130 -0
package/dist/cache/SemanticCache.js.map +1 -0
package/dist/cache/index.d.ts +5 -0
package/dist/cache/index.d.ts.map +1 -0
package/dist/cache/index.js +3 -0
package/dist/cache/index.js.map +1 -0
package/dist/distillation/DistillationEngine.d.ts +107 -0
package/dist/distillation/DistillationEngine.d.ts.map +1 -0
package/dist/distillation/DistillationEngine.js +152 -0
package/dist/distillation/DistillationEngine.js.map +1 -0
package/dist/distillation/index.d.ts +3 -0
package/dist/distillation/index.d.ts.map +1 -0
package/dist/distillation/index.js +2 -0
package/dist/distillation/index.js.map +1 -0
package/dist/errors/SSMError.d.ts +14 -0
package/dist/errors/SSMError.d.ts.map +1 -0
package/dist/errors/SSMError.js +18 -0
package/dist/errors/SSMError.js.map +1 -0
package/dist/errors/index.d.ts +3 -0
package/dist/errors/index.d.ts.map +1 -0
package/dist/errors/index.js +2 -0
package/dist/errors/index.js.map +1 -0
package/dist/index.d.ts +65 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +59 -0
package/dist/index.js.map +1 -0
package/dist/memory/MemoryStore.d.ts +152 -0
package/dist/memory/MemoryStore.d.ts.map +1 -0
package/dist/memory/MemoryStore.js +290 -0
package/dist/memory/MemoryStore.js.map +1 -0
package/dist/memory/index.d.ts +3 -0
package/dist/memory/index.d.ts.map +1 -0
package/dist/memory/index.js +2 -0
package/dist/memory/index.js.map +1 -0
package/dist/router/InferenceRouter.d.ts +92 -0
package/dist/router/InferenceRouter.d.ts.map +1 -0
package/dist/router/InferenceRouter.js +113 -0
package/dist/router/InferenceRouter.js.map +1 -0
package/dist/router/index.d.ts +3 -0
package/dist/router/index.d.ts.map +1 -0
package/dist/router/index.js +2 -0
package/dist/router/index.js.map +1 -0
package/dist/runtime/SSMRuntime.d.ts +167 -0
package/dist/runtime/SSMRuntime.d.ts.map +1 -0
package/dist/runtime/SSMRuntime.js +199 -0
package/dist/runtime/SSMRuntime.js.map +1 -0
package/dist/runtime/index.d.ts +3 -0
package/dist/runtime/index.d.ts.map +1 -0
package/dist/runtime/index.js +2 -0
package/dist/runtime/index.js.map +1 -0
package/dist/session/errors.d.ts +10 -0
package/dist/session/errors.d.ts.map +1 -0
package/dist/session/errors.js +14 -0
package/dist/session/errors.js.map +1 -0
package/dist/session/index.d.ts +11 -0
package/dist/session/index.d.ts.map +1 -0
package/dist/session/index.js +7 -0
package/dist/session/index.js.map +1 -0
package/dist/session/persistence.d.ts +14 -0
package/dist/session/persistence.d.ts.map +1 -0
package/dist/session/persistence.js +100 -0
package/dist/session/persistence.js.map +1 -0
package/dist/session/presets.d.ts +31 -0
package/dist/session/presets.d.ts.map +1 -0
package/dist/session/presets.js +91 -0
package/dist/session/presets.js.map +1 -0
package/dist/session/session.d.ts +186 -0
package/dist/session/session.d.ts.map +1 -0
package/dist/session/session.js +358 -0
package/dist/session/session.js.map +1 -0
package/dist/session/streaming.d.ts +13 -0
package/dist/session/streaming.d.ts.map +1 -0
package/dist/session/streaming.js +74 -0
package/dist/session/streaming.js.map +1 -0
package/dist/session/tokenizer.d.ts +18 -0
package/dist/session/tokenizer.d.ts.map +1 -0
package/dist/session/tokenizer.js +11 -0
package/dist/session/tokenizer.js.map +1 -0
package/dist/similarity/index.d.ts +19 -0
package/dist/similarity/index.d.ts.map +1 -0
package/dist/similarity/index.js +42 -0
package/dist/similarity/index.js.map +1 -0
package/package.json +120 -0
package/src/agent/SSMAgent.ts +327 -0
package/src/agent/index.ts +2 -0
package/src/bridges/AnthropicBridge.ts +166 -0
package/src/bridges/CachingBridge.ts +79 -0
package/src/bridges/FetchBridge.ts +41 -0
package/src/bridges/OpenAIBridge.ts +143 -0
package/src/bridges/ResponseCache.ts +131 -0
package/src/bridges/SemanticCachingBridge.ts +60 -0
package/src/bridges/TransformerBridge.ts +38 -0
package/src/bridges/index.ts +13 -0
package/src/cache/FetchSemanticCacheBackend.ts +79 -0
package/src/cache/SemanticCache.ts +196 -0
package/src/cache/index.ts +9 -0
package/src/distillation/DistillationEngine.ts +248 -0
package/src/distillation/index.ts +2 -0
package/src/errors/SSMError.ts +26 -0
package/src/errors/index.ts +2 -0
package/src/index.ts +128 -0
package/src/memory/MemoryStore.ts +408 -0
package/src/memory/index.ts +2 -0
package/src/router/InferenceRouter.ts +201 -0
package/src/router/index.ts +2 -0
package/src/runtime/SSMRuntime.ts +309 -0
package/src/runtime/index.ts +2 -0
package/src/session/errors.ts +24 -0
package/src/session/index.ts +25 -0
package/src/session/persistence.ts +142 -0
package/src/session/presets.ts +122 -0
package/src/session/session.ts +657 -0
package/src/session/streaming.ts +97 -0
package/src/session/tokenizer.ts +18 -0
package/src/similarity/index.ts +42 -0

package/src/bridges/OpenAIBridge.ts ADDED Viewed

@@ -0,0 +1,143 @@
+/**
+ * OpenAIBridge – TransformerBridge implementation for the OpenAI Chat API.
+ *
+ * Supports both non-streaming and streaming (SSE) completions.
+ * Compatible with any OpenAI-compatible endpoint via the `baseUrl` option.
+ */
+import { SSMError } from '../errors/SSMError.js';
+import type { TransformerBridge, BridgeGenerateOptions } from './TransformerBridge.js';
+export interface OpenAIBridgeOptions {
+    /** OpenAI API key (or compatible service key). */
+    apiKey        : string;
+    /** Model to use. Default: 'gpt-4o-mini'. */
+    model?        : string;
+    /** API base URL. Default: 'https://api.openai.com/v1'. */
+    baseUrl?      : string;
+    /** Default system prompt sent with every request. */
+    systemPrompt? : string;
+    /** Default max tokens. Default: 512. */
+    maxTokens?    : number;
+}
+export class OpenAIBridge implements TransformerBridge {
+    readonly supportsStreaming = true as const;
+    private readonly _apiKey      : string;
+    private readonly _model       : string;
+    private readonly _baseUrl     : string;
+    private readonly _systemPrompt: string;
+    private readonly _maxTokens   : number;
+    constructor(opts: OpenAIBridgeOptions) {
+        this._apiKey       = opts.apiKey;
+        this._model        = opts.model        ?? 'gpt-4o-mini';
+        this._baseUrl      = (opts.baseUrl     ?? 'https://api.openai.com/v1').replace(/\/$/, '');
+        this._systemPrompt = opts.systemPrompt ?? '';
+        this._maxTokens    = opts.maxTokens    ?? 512;
+    }
+    async generate(prompt: string, opts: BridgeGenerateOptions = {}): Promise<string> {
+        const body = this._buildBody(prompt, opts, false);
+        const res  = await this._fetch(body);
+        if (!res.ok) {
+            const text = await res.text().catch(() => '');
+            throw new SSMError(
+                'BRIDGE_REQUEST_FAILED',
+                `OpenAI API returned ${res.status}: ${text}`,
+            );
+        }
+        const json = await res.json() as Record<string, unknown>;
+        const content = (json as any).choices?.[0]?.message?.content;
+        if (typeof content !== 'string') {
+            throw new SSMError('BRIDGE_RESPONSE_INVALID', 'Unexpected OpenAI response shape.');
+        }
+        return content;
+    }
+    async *stream(prompt: string, opts: BridgeGenerateOptions = {}): AsyncIterable<string> {
+        const body = this._buildBody(prompt, opts, true);
+        const res  = await this._fetch(body);
+        if (!res.ok) {
+            const text = await res.text().catch(() => '');
+            throw new SSMError(
+                'BRIDGE_REQUEST_FAILED',
+                `OpenAI streaming API returned ${res.status}: ${text}`,
+            );
+        }
+        if (!res.body) {
+            throw new SSMError('BRIDGE_RESPONSE_INVALID', 'OpenAI streaming response has no body.');
+        }
+        yield* parseOpenAIStream(res.body);
+    }
+    private _buildBody(prompt: string, opts: BridgeGenerateOptions, stream: boolean): string {
+        const sys = opts.systemPrompt ?? this._systemPrompt;
+        const messages: { role: string; content: string }[] = [];
+        if (sys) messages.push({ role: 'system', content: sys });
+        messages.push({ role: 'user', content: prompt });
+        return JSON.stringify({
+            model      : opts.model     ?? this._model,
+            messages,
+            max_tokens : opts.maxTokens ?? this._maxTokens,
+            temperature: opts.temperature ?? 0.7,
+            top_p      : opts.topP        ?? 0.9,
+            stream,
+        });
+    }
+    private _fetch(body: string): Promise<Response> {
+        return fetch(`${this._baseUrl}/chat/completions`, {
+            method : 'POST',
+            headers: {
+                'Content-Type' : 'application/json',
+                'Authorization': `Bearer ${this._apiKey}`,
+            },
+            body,
+        });
+    }
+}
+// ── SSE parser ────────────────────────────────────────────────────────────────
+async function* parseOpenAIStream(body: ReadableStream<Uint8Array>): AsyncIterable<string> {
+    const reader  = body.getReader();
+    const decoder = new TextDecoder();
+    let buffer    = '';
+    try {
+        while (true) {
+            const { done, value } = await reader.read();
+            if (done) break;
+            buffer += decoder.decode(value, { stream: true });
+            const lines = buffer.split('\n');
+            buffer = lines.pop() as string;   // keep the last (possibly partial) line; split() always yields ≥1 element
+            for (const line of lines) {
+                const trimmed = line.trim();
+                if (!trimmed.startsWith('data: ')) continue;
+                const data = trimmed.slice(6);
+                if (data === '[DONE]') return;
+                try {
+                    const chunk = JSON.parse(data) as Record<string, unknown>;
+                    const delta = (chunk as any).choices?.[0]?.delta?.content;
+                    if (typeof delta === 'string' && delta.length > 0) yield delta;
+                } catch {
+                    // Malformed JSON in stream — skip silently
+                }
+            }
+        }
+    } finally {
+        reader.releaseLock();
+    }
+}

package/src/bridges/ResponseCache.ts ADDED Viewed

@@ -0,0 +1,131 @@
+/**
+ * ResponseCache – a small, dependency-free read-through cache for transformer
+ * bridge completions.
+ *
+ * This is the canonical cache for this library: a single bounded LRU with an
+ * optional TTL, not an ad-hoc Map inlined at a call site. It exists because an
+ * external LLM call is the most expensive thing the runtime does — identical
+ * (model, system, prompt, sampling) requests should never be billed twice.
+ *
+ * Scope is in-process by design: this package targets the browser and Node, so
+ * there is no shared KV / cross-isolate tier to propagate to (unlike the
+ * BuilderForce.ai gateway, whose read-through cache is L1 Map + L2 KV). A
+ * consumer that needs cross-process sharing can wrap a bridge with its own
+ * distributed cache using the same `CachingBridge` shape.
+ */
+export interface ResponseCacheOptions {
+    /**
+     * Maximum number of entries retained. Oldest-accessed entries are evicted
+     * first once the bound is reached. Default: 500.
+     */
+    maxEntries? : number;
+    /**
+     * Optional time-to-live in milliseconds. Entries older than this are treated
+     * as misses and dropped on access. Omit for no expiry (cache until evicted).
+     */
+    ttlMs?      : number;
+}
+interface CacheRecord {
+    value     : string;
+    timestamp : number;
+}
+const DEFAULT_MAX_ENTRIES = 500;
+export class ResponseCache {
+    private readonly _maxEntries : number;
+    private readonly _ttlMs      : number | undefined;
+    // Map preserves insertion order; re-insertion on hit gives us LRU ordering.
+    private readonly _store = new Map<string, CacheRecord>();
+    private _hits   = 0;
+    private _misses = 0;
+    constructor(opts: ResponseCacheOptions = {}) {
+        this._maxEntries = opts.maxEntries ?? DEFAULT_MAX_ENTRIES;
+        this._ttlMs      = opts.ttlMs;
+    }
+    /**
+     * Returns the cached value for `key`, or `undefined` on a miss (including an
+     * expired entry, which is also evicted). A hit refreshes recency.
+     */
+    get(key: string): string | undefined {
+        const record = this._store.get(key);
+        if (!record) {
+            this._misses++;
+            return undefined;
+        }
+        if (this._isExpired(record)) {
+            this._store.delete(key);
+            this._misses++;
+            return undefined;
+        }
+        // Refresh recency: delete + re-insert moves the key to the newest slot.
+        this._store.delete(key);
+        this._store.set(key, record);
+        this._hits++;
+        return record.value;
+    }
+    /** Stores `value` under `key`, evicting the least-recently-used entry if full. */
+    set(key: string, value: string, now: number): void {
+        if (this._store.has(key)) this._store.delete(key);
+        this._store.set(key, { value, timestamp: now });
+        while (this._store.size > this._maxEntries) {
+            // size > maxEntries (≥ 0) guarantees the map is non-empty, so the
+            // oldest key always exists — the non-null assertion is safe.
+            const oldest = this._store.keys().next().value as string;
+            this._store.delete(oldest);
+        }
+    }
+    /** Drops all cached entries. */
+    clear(): void {
+        this._store.clear();
+    }
+    /** Current entry count (including not-yet-evicted expired entries). */
+    get size(): number {
+        return this._store.size;
+    }
+    /** Cumulative hit / miss counters, for observability and cache-tuning. */
+    get stats(): { hits: number; misses: number } {
+        return { hits: this._hits, misses: this._misses };
+    }
+    private _isExpired(record: CacheRecord): boolean {
+        if (this._ttlMs == null) return false;
+        // `now` is read at access time so a single import of Date is enough; the
+        // caller-supplied `now` on set() keeps insertion timestamps consistent.
+        return Date.now() > record.timestamp + this._ttlMs;
+    }
+}
+/**
+ * Builds a stable, collision-resistant cache key from the request shape. Any
+ * field that changes the model's output must be part of the key.
+ */
+export function buildCacheKey(parts: {
+    prompt       : string;
+    model?       : string;
+    systemPrompt? : string;
+    maxTokens?   : number;
+    temperature? : number;
+    topP?        : number;
+}): string {
+    // JSON of a fixed-order tuple — deterministic and unambiguous (a delimiter
+    // string could collide across fields; positional JSON cannot).
+    return JSON.stringify([
+        parts.model        ?? '',
+        parts.systemPrompt ?? '',
+        parts.maxTokens    ?? '',
+        parts.temperature  ?? '',
+        parts.topP         ?? '',
+        parts.prompt,
+    ]);
+}

package/src/bridges/SemanticCachingBridge.ts ADDED Viewed

@@ -0,0 +1,60 @@
+/**
+ * SemanticCachingBridge – a read-through *semantic* caching decorator for any
+ * TransformerBridge. The semantic sibling of CachingBridge: where CachingBridge
+ * only reuses byte-identical prompts, this reuses a prior answer when the new
+ * prompt is within `threshold` cosine similarity of one already answered.
+ *
+ *   const bridge = new SemanticCachingBridge(new AnthropicBridge({ apiKey }), {
+ *     embed: (t) => runtime.embed(t),        // on-device SSM, free
+ *     l2: new FetchSemanticCacheBackend({ baseUrl, apiKey }),  // shared via gateway
+ *   });
+ *
+ * Streaming is delegated straight through and never cached.
+ */
+import type { TransformerBridge, BridgeGenerateOptions } from './TransformerBridge.js';
+import { SemanticCache, type SemanticCacheOptions } from '../cache/SemanticCache.js';
+export interface SemanticCachingBridgeOptions extends Omit<SemanticCacheOptions, never> {
+    /** Provide a shared SemanticCache instance instead of constructing one. */
+    cache? : SemanticCache;
+}
+export class SemanticCachingBridge implements TransformerBridge {
+    private readonly _inner : TransformerBridge;
+    private readonly _cache : SemanticCache;
+    constructor(inner: TransformerBridge, opts: SemanticCachingBridgeOptions) {
+        this._inner = inner;
+        this._cache = opts.cache ?? new SemanticCache(opts);
+    }
+    get supportsStreaming(): boolean {
+        return this._inner.supportsStreaming;
+    }
+    /** The underlying SemanticCache — exposed for stats inspection. */
+    get cache(): SemanticCache {
+        return this._cache;
+    }
+    async generate(prompt: string, opts: BridgeGenerateOptions = {}): Promise<string> {
+        // Match on system + prompt meaning so different system contexts don't
+        // cross-hit; partition further by model via the stored meta.
+        const queryText = opts.systemPrompt ? `${opts.systemPrompt}\n${prompt}` : prompt;
+        const { response } = await this._cache.getOrGenerate(
+            queryText,
+            () => this._inner.generate(prompt, opts),
+            opts.model ? { model: opts.model } : undefined,
+        );
+        return response;
+    }
+    /** Streaming is delegated unchanged and never cached. */
+    stream(prompt: string, opts?: BridgeGenerateOptions): AsyncIterable<string> {
+        if (!this._inner.stream) {
+            throw new Error('Wrapped bridge does not support streaming.');
+        }
+        return this._inner.stream(prompt, opts);
+    }
+}

package/src/bridges/TransformerBridge.ts ADDED Viewed

@@ -0,0 +1,38 @@
+/**
+ * TransformerBridge – pluggable interface for any transformer LLM backend.
+ *
+ * Implementations (OpenAIBridge, AnthropicBridge, FetchBridge) satisfy this
+ * interface and are passed to SSMRuntime to enable hybrid inference and
+ * distillation.  The interface is structural — any object with the right
+ * shape works, no base class required.
+ */
+export interface BridgeGenerateOptions {
+    /** Max tokens to generate. Default per-adapter (typically 512). */
+    maxTokens?    : number;
+    /** Sampling temperature. Default per-adapter (typically 0.7). */
+    temperature?  : number;
+    /** Nucleus sampling p. Default per-adapter (typically 0.9). */
+    topP?         : number;
+    /** System prompt for this request, overriding the adapter's default. */
+    systemPrompt? : string;
+    /** Model string, overriding the adapter's default. */
+    model?        : string;
+}
+export interface TransformerBridge {
+    /**
+     * Generates a completion for the given prompt.
+     * Must resolve to the assistant's reply text only (not including the prompt).
+     */
+    generate(prompt: string, opts?: BridgeGenerateOptions): Promise<string>;
+    /**
+     * Streaming variant — yields tokens incrementally.
+     * Check `supportsStreaming` before calling.
+     */
+    stream?(prompt: string, opts?: BridgeGenerateOptions): AsyncIterable<string>;
+    /** True when this bridge supports the `stream()` method. */
+    readonly supportsStreaming: boolean;
+}

package/src/bridges/index.ts ADDED Viewed

@@ -0,0 +1,13 @@
+export type { TransformerBridge, BridgeGenerateOptions } from './TransformerBridge.js';
+export { OpenAIBridge }    from './OpenAIBridge.js';
+export { AnthropicBridge } from './AnthropicBridge.js';
+export { FetchBridge }     from './FetchBridge.js';
+export { CachingBridge }   from './CachingBridge.js';
+export { SemanticCachingBridge } from './SemanticCachingBridge.js';
+export { ResponseCache, buildCacheKey } from './ResponseCache.js';
+export type { OpenAIBridgeOptions }    from './OpenAIBridge.js';
+export type { AnthropicBridgeOptions } from './AnthropicBridge.js';
+export type { FetchBridgeOptions }     from './FetchBridge.js';
+export type { CachingBridgeOptions }   from './CachingBridge.js';
+export type { SemanticCachingBridgeOptions } from './SemanticCachingBridge.js';
+export type { ResponseCacheOptions }   from './ResponseCache.js';

package/src/cache/FetchSemanticCacheBackend.ts ADDED Viewed

@@ -0,0 +1,79 @@
+/**
+ * FetchSemanticCacheBackend – the shared (L2) tier of the SemanticCache, backed
+ * by the BuilderForce.ai gateway's vector store over HTTP.
+ *
+ * One client used by both consumers (browser + agent) so a semantic hit on one
+ * surface is reusable by the other. Pure `fetch` — no environment-specific deps;
+ * inject `fetchImpl` in tests.
+ *
+ * Wire protocol (gateway `/v1/semantic-cache`):
+ *   POST /lookup  { embedding: number[], threshold, namespace? } → { hit?: { response, score } }
+ *   POST /store   { embedding: number[], response, namespace?, meta? } → 2xx
+ */
+import type { SemanticCacheBackend } from './SemanticCache.js';
+export interface FetchSemanticCacheBackendOptions {
+    /** Gateway base URL, e.g. 'https://api.builderforce.ai'. Trailing slash trimmed. */
+    baseUrl  : string;
+    /** Tenant API key (sent as a bearer token). */
+    apiKey   : string;
+    /**
+     * Optional cache partition. Scope hits to a tenant/model/agent so unrelated
+     * traffic can't cross-hit. Defaults to the gateway's per-tenant default.
+     */
+    namespace? : string;
+    /** Injectable fetch (defaults to global fetch). */
+    fetchImpl? : typeof fetch;
+}
+export class FetchSemanticCacheBackend implements SemanticCacheBackend {
+    private readonly _base      : string;
+    private readonly _apiKey    : string;
+    private readonly _namespace : string | undefined;
+    private readonly _fetch     : typeof fetch;
+    constructor(opts: FetchSemanticCacheBackendOptions) {
+        this._base      = opts.baseUrl.replace(/\/$/, '');
+        this._apiKey    = opts.apiKey;
+        this._namespace = opts.namespace;
+        this._fetch     = opts.fetchImpl ?? fetch;
+    }
+    async lookup(embedding: Float32Array, threshold: number): Promise<{ response: string; score: number } | undefined> {
+        const res = await this._fetch(`${this._base}/v1/semantic-cache/lookup`, {
+            method : 'POST',
+            headers: this._headers(),
+            body   : JSON.stringify({
+                embedding: Array.from(embedding),
+                threshold,
+                ...(this._namespace ? { namespace: this._namespace } : {}),
+            }),
+        });
+        if (!res.ok) return undefined;
+        const json = await res.json().catch(() => null) as { hit?: { response?: unknown; score?: unknown } } | null;
+        const hit = json?.hit;
+        if (!hit || typeof hit.response !== 'string' || typeof hit.score !== 'number') return undefined;
+        return { response: hit.response, score: hit.score };
+    }
+    async store(embedding: Float32Array, response: string, meta?: Record<string, unknown>): Promise<void> {
+        await this._fetch(`${this._base}/v1/semantic-cache/store`, {
+            method : 'POST',
+            headers: this._headers(),
+            body   : JSON.stringify({
+                embedding: Array.from(embedding),
+                response,
+                ...(this._namespace ? { namespace: this._namespace } : {}),
+                ...(meta ? { meta } : {}),
+            }),
+        });
+    }
+    private _headers(): Record<string, string> {
+        return {
+            'Content-Type' : 'application/json',
+            Authorization  : `Bearer ${this._apiKey}`,
+        };
+    }
+}

package/src/cache/SemanticCache.ts ADDED Viewed

@@ -0,0 +1,196 @@
+/**
+ * SemanticCache – an embedding-keyed read-through cache for LLM completions.
+ *
+ * Unlike the exact-match ResponseCache (which keys on the byte-identical prompt),
+ * this keys on the *meaning* of the query: it embeds the query and serves a
+ * cached answer when a stored entry is within `threshold` cosine similarity.
+ * That catches paraphrases — "fix the auth bug" ≈ "login is broken" — which is
+ * where real frontier-call avoidance (and token savings) comes from.
+ *
+ * Two tiers, mirroring the project's L1-in-process / L2-shared read-through
+ * pattern:
+ *   - L1: an in-process vector list, scanned locally (fast, offline-capable).
+ *   - L2: an optional shared backend (e.g. the BuilderForce.ai gateway vector
+ *         store) so a hit on one surface — web or agent — benefits the other.
+ *
+ * Fully portable: the embedder and the L2 backend are injected, so the same
+ * class runs in the browser (WebGPU SSM + native fetch) and in Node (the agent's
+ * `@webgpu/node` SSM + fetch) with no environment-specific forks.
+ */
+import { cosineSimilarity } from '../similarity/index.js';
+/** Produces an embedding vector for a piece of text (the on-device SSM, typically). */
+export type Embedder = (text: string) => Promise<Float32Array>;
+/**
+ * The shared (L2) cache tier. Implemented by `FetchSemanticCacheBackend` against
+ * the gateway, but any store satisfying this shape can be injected.
+ */
+export interface SemanticCacheBackend {
+    /** Returns the best stored entry at/above `threshold` cosine similarity, or undefined. */
+    lookup(embedding: Float32Array, threshold: number): Promise<{ response: string; score: number } | undefined>;
+    /** Persists an embedding → response association. */
+    store(embedding: Float32Array, response: string, meta?: Record<string, unknown>): Promise<void>;
+}
+export interface SemanticCacheHit {
+    response: string;
+    /** Cosine similarity of the matched entry to the query. */
+    score: number;
+    /** Which tier served the hit. */
+    tier: 'l1' | 'l2';
+}
+export interface SemanticCacheOptions {
+    /** Embeds queries. Required — this is what makes the cache semantic. */
+    embed: Embedder;
+    /**
+     * Cosine similarity at/above which a stored entry counts as a hit.
+     * Higher = stricter (fewer false hits, lower hit rate). Default: 0.92.
+     */
+    threshold?: number;
+    /** Max L1 entries retained (oldest evicted first). Default: 500. */
+    maxEntries?: number;
+    /** Optional TTL (ms) for L1 entries. Omit for no expiry. */
+    ttlMs?: number;
+    /** Optional shared L2 backend (e.g. the gateway). */
+    l2?: SemanticCacheBackend;
+    /**
+     * When true (default), an answer served by L2 is also written into L1 so the
+     * next local lookup is a fast hit — read-through cache warming.
+     */
+    warmL1FromL2?: boolean;
+}
+interface L1Entry { embedding: Float32Array; response: string; timestamp: number; }
+const DEFAULT_THRESHOLD   = 0.92;
+const DEFAULT_MAX_ENTRIES = 500;
+export class SemanticCache {
+    private readonly _embed      : Embedder;
+    private readonly _threshold  : number;
+    private readonly _maxEntries : number;
+    private readonly _ttlMs      : number | undefined;
+    private readonly _l2         : SemanticCacheBackend | undefined;
+    private readonly _warmL1     : boolean;
+    private readonly _l1         : L1Entry[] = [];
+    private _l1Hits = 0;
+    private _l2Hits = 0;
+    private _misses = 0;
+    constructor(opts: SemanticCacheOptions) {
+        this._embed      = opts.embed;
+        this._threshold  = opts.threshold  ?? DEFAULT_THRESHOLD;
+        this._maxEntries = opts.maxEntries ?? DEFAULT_MAX_ENTRIES;
+        this._ttlMs      = opts.ttlMs;
+        this._l2         = opts.l2;
+        this._warmL1     = opts.warmL1FromL2 ?? true;
+    }
+    /**
+     * Read-through entry point: returns a cached answer for a semantically-similar
+     * prior query, otherwise runs `generate()`, stores the result in both tiers,
+     * and returns it. Embeds the query exactly once (lookup + store share it).
+     */
+    async getOrGenerate(
+        query: string,
+        generate: () => Promise<string>,
+        meta?: Record<string, unknown>,
+    ): Promise<{ response: string; cached: boolean; tier?: 'l1' | 'l2'; score?: number }> {
+        const qv  = await this._embed(query);
+        const hit = await this._lookupVec(qv);
+        if (hit) return { response: hit.response, cached: true, tier: hit.tier, score: hit.score };
+        const response = await generate();
+        await this._storeVec(qv, response, meta);
+        return { response, cached: false };
+    }
+    /** Looks up a semantically-similar cached answer without generating on a miss. */
+    async lookup(query: string): Promise<SemanticCacheHit | undefined> {
+        return this._lookupVec(await this._embed(query));
+    }
+    /** Stores a query → response association in both tiers. */
+    async store(query: string, response: string, meta?: Record<string, unknown>): Promise<void> {
+        await this._storeVec(await this._embed(query), response, meta);
+    }
+    /** Drops all L1 entries. Does not touch the shared L2 backend. */
+    clear(): void {
+        this._l1.length = 0;
+    }
+    /** Current L1 entry count. */
+    get size(): number {
+        return this._l1.length;
+    }
+    /** Cumulative hit/miss counters across both tiers — for measuring savings. */
+    get stats(): { l1Hits: number; l2Hits: number; misses: number } {
+        return { l1Hits: this._l1Hits, l2Hits: this._l2Hits, misses: this._misses };
+    }
+    // ── Internals (operate on a precomputed embedding) ────────────────────────
+    private async _lookupVec(qv: Float32Array): Promise<SemanticCacheHit | undefined> {
+        const local = this._searchL1(qv);
+        if (local) {
+            this._l1Hits++;
+            return { response: local.response, score: local.score, tier: 'l1' };
+        }
+        if (this._l2) {
+            // L2 is best-effort: a gateway error degrades to local-only, never throws.
+            const remote = await this._l2.lookup(qv, this._threshold).catch(() => undefined);
+            if (remote && remote.score >= this._threshold) {
+                if (this._warmL1) this._addL1(qv, remote.response);
+                this._l2Hits++;
+                return { response: remote.response, score: remote.score, tier: 'l2' };
+            }
+        }
+        this._misses++;
+        return undefined;
+    }
+    private async _storeVec(qv: Float32Array, response: string, meta?: Record<string, unknown>): Promise<void> {
+        this._addL1(qv, response);
+        if (this._l2) {
+            // Best-effort: failing to share to L2 must not fail the caller's request.
+            await this._l2.store(qv, response, meta).catch(() => { /* swallow — local copy still cached */ });
+        }
+    }
+    /** Linear cosine scan over L1, dropping expired entries en route. */
+    private _searchL1(qv: Float32Array): { response: string; score: number } | undefined {
+        const now = Date.now();
+        let best: L1Entry | undefined;
+        let bestScore = -Infinity;
+        for (let i = this._l1.length - 1; i >= 0; i--) {
+            const entry = this._l1[i]!;
+            if (this._ttlMs != null && now > entry.timestamp + this._ttlMs) {
+                this._l1.splice(i, 1);
+                continue;
+            }
+            const score = cosineSimilarity(qv, entry.embedding);
+            if (score > bestScore) {
+                bestScore = score;
+                best = entry;
+            }
+        }
+        return best && bestScore >= this._threshold
+            ? { response: best.response, score: bestScore }
+            : undefined;
+    }
+    private _addL1(qv: Float32Array, response: string): void {
+        this._l1.push({ embedding: qv, response, timestamp: Date.now() });
+        while (this._l1.length > this._maxEntries) this._l1.shift();
+    }
+}

package/src/cache/index.ts ADDED Viewed

@@ -0,0 +1,9 @@
+export { SemanticCache } from './SemanticCache.js';
+export type {
+    Embedder,
+    SemanticCacheBackend,
+    SemanticCacheHit,
+    SemanticCacheOptions,
+} from './SemanticCache.js';
+export { FetchSemanticCacheBackend } from './FetchSemanticCacheBackend.js';
+export type { FetchSemanticCacheBackendOptions } from './FetchSemanticCacheBackend.js';