@betterdb/semantic-cache 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/errors.js ADDED
@@ -0,0 +1,43 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.ValkeyCommandError = exports.EmbeddingError = exports.SemanticCacheUsageError = void 0;
4
+ /**
5
+ * Thrown when the caller does something wrong — e.g. calling check()
6
+ * before initialize(), or providing an embedding with the wrong dimension.
7
+ * The message is always actionable: it tells the caller what to fix.
8
+ */
9
+ class SemanticCacheUsageError extends Error {
10
+ constructor(message) {
11
+ super(message);
12
+ this.name = 'SemanticCacheUsageError';
13
+ }
14
+ }
15
+ exports.SemanticCacheUsageError = SemanticCacheUsageError;
16
+ /**
17
+ * Thrown when the embedding function fails.
18
+ * Check the underlying cause for the original error from the embedding provider.
19
+ */
20
+ class EmbeddingError extends Error {
21
+ cause;
22
+ constructor(message, cause) {
23
+ super(message);
24
+ this.cause = cause;
25
+ this.name = 'EmbeddingError';
26
+ }
27
+ }
28
+ exports.EmbeddingError = EmbeddingError;
29
+ /**
30
+ * Thrown when a Valkey command fails unexpectedly.
31
+ * Includes the command name and the underlying error.
32
+ */
33
+ class ValkeyCommandError extends Error {
34
+ command;
35
+ cause;
36
+ constructor(command, cause) {
37
+ super(`Valkey command '${command}' failed: ${cause instanceof Error ? cause.message : String(cause)}`);
38
+ this.command = command;
39
+ this.cause = cause;
40
+ this.name = 'ValkeyCommandError';
41
+ }
42
+ }
43
+ exports.ValkeyCommandError = ValkeyCommandError;
@@ -0,0 +1,3 @@
1
+ export { SemanticCache } from './SemanticCache';
2
+ export type { SemanticCacheOptions, CacheCheckOptions, CacheStoreOptions, CacheCheckResult, CacheStats, IndexInfo, InvalidateResult, CacheConfidence, EmbedFn, } from './types';
3
+ export { SemanticCacheUsageError, EmbeddingError, ValkeyCommandError, } from './errors';
package/dist/index.js ADDED
@@ -0,0 +1,9 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.ValkeyCommandError = exports.EmbeddingError = exports.SemanticCacheUsageError = exports.SemanticCache = void 0;
4
+ var SemanticCache_1 = require("./SemanticCache");
5
+ Object.defineProperty(exports, "SemanticCache", { enumerable: true, get: function () { return SemanticCache_1.SemanticCache; } });
6
+ var errors_1 = require("./errors");
7
+ Object.defineProperty(exports, "SemanticCacheUsageError", { enumerable: true, get: function () { return errors_1.SemanticCacheUsageError; } });
8
+ Object.defineProperty(exports, "EmbeddingError", { enumerable: true, get: function () { return errors_1.EmbeddingError; } });
9
+ Object.defineProperty(exports, "ValkeyCommandError", { enumerable: true, get: function () { return errors_1.ValkeyCommandError; } });
@@ -0,0 +1,19 @@
1
+ import { type Tracer } from '@opentelemetry/api';
2
+ import { Counter, Histogram, Registry } from 'prom-client';
3
+ interface TelemetryFactoryOptions {
4
+ prefix: string;
5
+ tracerName: string;
6
+ registry?: Registry;
7
+ }
8
+ interface CacheMetrics {
9
+ requestsTotal: Counter;
10
+ similarityScore: Histogram;
11
+ operationDuration: Histogram;
12
+ embeddingDuration: Histogram;
13
+ }
14
+ export interface Telemetry {
15
+ tracer: Tracer;
16
+ metrics: CacheMetrics;
17
+ }
18
+ export declare function createTelemetry(opts: TelemetryFactoryOptions): Telemetry;
19
+ export {};
@@ -0,0 +1,54 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.createTelemetry = createTelemetry;
4
+ const api_1 = require("@opentelemetry/api");
5
+ const prom_client_1 = require("prom-client");
6
+ function getOrCreateCounter(registry, config) {
7
+ const existing = registry.getSingleMetric(config.name);
8
+ if (existing)
9
+ return existing;
10
+ return new prom_client_1.Counter({ ...config, registers: [registry] });
11
+ }
12
+ function getOrCreateHistogram(registry, config) {
13
+ const existing = registry.getSingleMetric(config.name);
14
+ if (existing)
15
+ return existing;
16
+ return new prom_client_1.Histogram({ ...config, registers: [registry] });
17
+ }
18
+ function createTelemetry(opts) {
19
+ const registry = opts.registry ?? prom_client_1.register;
20
+ const tracer = api_1.trace.getTracer(opts.tracerName);
21
+ const operationBuckets = [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0];
22
+ const requestsTotal = getOrCreateCounter(registry, {
23
+ name: `${opts.prefix}_requests_total`,
24
+ help: 'Total number of semantic cache requests',
25
+ labelNames: ['cache_name', 'result', 'category'],
26
+ });
27
+ const similarityScore = getOrCreateHistogram(registry, {
28
+ name: `${opts.prefix}_similarity_score`,
29
+ help: 'Cosine distance similarity scores for cache lookups',
30
+ labelNames: ['cache_name', 'category'],
31
+ buckets: [0.02, 0.05, 0.08, 0.1, 0.12, 0.15, 0.2, 0.3, 0.5, 1.0, 2.0],
32
+ });
33
+ const operationDuration = getOrCreateHistogram(registry, {
34
+ name: `${opts.prefix}_operation_duration_seconds`,
35
+ help: 'Duration of semantic cache operations in seconds',
36
+ labelNames: ['cache_name', 'operation'],
37
+ buckets: operationBuckets,
38
+ });
39
+ const embeddingDuration = getOrCreateHistogram(registry, {
40
+ name: `${opts.prefix}_embedding_duration_seconds`,
41
+ help: 'Duration of embedding function calls in seconds',
42
+ labelNames: ['cache_name'],
43
+ buckets: operationBuckets,
44
+ });
45
+ return {
46
+ tracer,
47
+ metrics: {
48
+ requestsTotal,
49
+ similarityScore,
50
+ operationDuration,
51
+ embeddingDuration,
52
+ },
53
+ };
54
+ }
@@ -0,0 +1,142 @@
1
+ import type Valkey from 'iovalkey';
2
+ import type { Registry } from 'prom-client';
3
+ export type { Valkey };
4
+ export type EmbedFn = (text: string) => Promise<number[]>;
5
+ export interface SemanticCacheOptions {
6
+ /** Index name prefix used for Valkey keys. Default: 'betterdb_scache'. */
7
+ name?: string;
8
+ /** iovalkey client instance. Required. */
9
+ client: Valkey;
10
+ /** Async function that returns a float embedding vector for a text string. Required. */
11
+ embedFn: EmbedFn;
12
+ /**
13
+ * Default similarity threshold as cosine DISTANCE (0–2 scale, lower = more similar).
14
+ * A lookup is a hit when score <= threshold. Default: 0.1.
15
+ * NOTE: this is cosine DISTANCE not cosine SIMILARITY.
16
+ * Distance 0 = identical, distance 2 = opposite.
17
+ */
18
+ defaultThreshold?: number;
19
+ /** Default TTL in seconds for stored entries. undefined = no expiry. */
20
+ defaultTtl?: number;
21
+ /**
22
+ * Per-category threshold overrides (cosine distance, 0–2).
23
+ * Applied when CacheCheckOptions.category matches a key here.
24
+ * Example: { faq: 0.08, search: 0.15 }
25
+ */
26
+ categoryThresholds?: Record<string, number>;
27
+ /**
28
+ * Width of the "uncertainty band" below the threshold.
29
+ * A hit whose cosine distance falls within [threshold - band, threshold]
30
+ * is returned with confidence 'uncertain' instead of 'high'.
31
+ *
32
+ * What to do with an uncertain hit:
33
+ * - Use the cached response but flag it for downstream review
34
+ * - Fall back to the LLM and optionally update the cache entry
35
+ * - Collect uncertain hits via Prometheus/OTel and review them to tune
36
+ * your threshold — a high rate of uncertain hits suggests your threshold
37
+ * is too loose
38
+ *
39
+ * Default: 0.05. Set to 0 to disable uncertainty flagging (all hits are 'high').
40
+ */
41
+ uncertaintyBand?: number;
42
+ telemetry?: {
43
+ /** OTel tracer name. Default: '@betterdb/semantic-cache'. */
44
+ tracerName?: string;
45
+ /** Prefix for Prometheus metric names. Default: 'semantic_cache'. */
46
+ metricsPrefix?: string;
47
+ /**
48
+ * prom-client Registry to register metrics on.
49
+ * If omitted, uses the prom-client default registry.
50
+ * Pass a custom Registry in library/multi-tenant contexts to avoid
51
+ * polluting the host application's default registry.
52
+ */
53
+ registry?: Registry;
54
+ };
55
+ }
56
+ export interface CacheCheckOptions {
57
+ /** Per-request threshold override (cosine distance 0–2). Highest priority. */
58
+ threshold?: number;
59
+ /** Category tag — used for per-category threshold lookup and metric labels. */
60
+ category?: string;
61
+ /**
62
+ * Additional FT.SEARCH pre-filter expression.
63
+ * Example: '@model:{gpt-4o}'
64
+ * Applied as: "({filter})=>[KNN {k} @embedding $vec AS __score]"
65
+ *
66
+ * **Security note:** this string is interpolated directly into the FT.SEARCH
67
+ * query. Only pass trusted, programmatically-constructed expressions — never
68
+ * unsanitised user input.
69
+ */
70
+ filter?: string;
71
+ /**
72
+ * Number of nearest neighbours to fetch via KNN. Default: 1.
73
+ * Currently only the closest result is evaluated for hit/miss.
74
+ * Values > 1 are reserved for future multi-candidate support.
75
+ */
76
+ k?: number;
77
+ }
78
+ export interface CacheStoreOptions {
79
+ /** Per-entry TTL in seconds. Overrides SemanticCacheOptions.defaultTtl. */
80
+ ttl?: number;
81
+ /** Category tag stored with the entry. */
82
+ category?: string;
83
+ /** Model name stored with the entry (e.g. 'gpt-4o'). Enables invalidation by model. */
84
+ model?: string;
85
+ /**
86
+ * Arbitrary metadata stored as JSON alongside the entry.
87
+ * Stored for external consumption (e.g. BetterDB Monitor) — not returned by check().
88
+ */
89
+ metadata?: Record<string, string | number>;
90
+ }
91
+ export type CacheConfidence = 'high' | 'uncertain' | 'miss';
92
+ export interface CacheCheckResult {
93
+ hit: boolean;
94
+ response?: string;
95
+ /**
96
+ * Cosine distance score (0–2). Present when a nearest neighbour was found,
97
+ * regardless of whether it was a hit or miss.
98
+ */
99
+ similarity?: number;
100
+ /**
101
+ * Confidence classification for the result.
102
+ *
103
+ * - 'high': similarity score is comfortably below the threshold (distance <= threshold - uncertaintyBand).
104
+ * Safe to return directly.
105
+ * - 'uncertain': similarity score is close to the threshold boundary
106
+ * (threshold - uncertaintyBand < distance <= threshold).
107
+ * Consider falling back to the LLM or flagging for review.
108
+ * - 'miss': no hit. response is undefined.
109
+ */
110
+ confidence: CacheConfidence;
111
+ /** Valkey key of the matched entry. Present on hit only. */
112
+ matchedKey?: string;
113
+ /**
114
+ * On a miss where a candidate existed but didn't clear the threshold,
115
+ * describes how close it was. Useful for threshold tuning.
116
+ */
117
+ nearestMiss?: {
118
+ similarity: number;
119
+ deltaToThreshold: number;
120
+ };
121
+ }
122
+ export interface InvalidateResult {
123
+ /** Number of entries deleted in this call. */
124
+ deleted: number;
125
+ /**
126
+ * True if the result set was truncated at 1000 entries.
127
+ * If true, call invalidate() again with the same filter until truncated is false.
128
+ */
129
+ truncated: boolean;
130
+ }
131
+ export interface CacheStats {
132
+ hits: number;
133
+ misses: number;
134
+ total: number;
135
+ hitRate: number;
136
+ }
137
+ export interface IndexInfo {
138
+ name: string;
139
+ numDocs: number;
140
+ dimension: number;
141
+ indexingState: string;
142
+ }
package/dist/types.js ADDED
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -0,0 +1,25 @@
1
+ /** SHA-256 hex digest of a string. */
2
+ export declare function sha256(text: string): string;
3
+ /**
4
+ * Encode number[] as a little-endian Float32 Buffer.
5
+ * Used to store embeddings as binary HSET field values.
6
+ */
7
+ export declare function encodeFloat32(vec: number[]): Buffer;
8
+ /**
9
+ * Parse a raw FT.SEARCH response from iovalkey's client.call().
10
+ *
11
+ * iovalkey returns FT.SEARCH results in the following shape:
12
+ * [totalCount, key1, [field1, val1, field2, val2, ...], key2, [...], ...]
13
+ *
14
+ * - totalCount is a string (e.g. "2")
15
+ * - Each key is a string
16
+ * - Each field list is a flat string array: [fieldName, value, fieldName, value, ...]
17
+ *
18
+ * Returns an array of { key: string, fields: Record<string, string> }.
19
+ * Returns [] if totalCount is "0" or the response is empty/malformed.
20
+ * Never throws — on any parse error, returns [].
21
+ */
22
+ export declare function parseFtSearchResponse(raw: unknown): Array<{
23
+ key: string;
24
+ fields: Record<string, string>;
25
+ }>;
package/dist/utils.js ADDED
@@ -0,0 +1,77 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.sha256 = sha256;
4
+ exports.encodeFloat32 = encodeFloat32;
5
+ exports.parseFtSearchResponse = parseFtSearchResponse;
6
+ const node_crypto_1 = require("node:crypto");
7
+ /** SHA-256 hex digest of a string. */
8
+ function sha256(text) {
9
+ return (0, node_crypto_1.createHash)('sha256').update(text).digest('hex');
10
+ }
11
+ /**
12
+ * Encode number[] as a little-endian Float32 Buffer.
13
+ * Used to store embeddings as binary HSET field values.
14
+ */
15
+ function encodeFloat32(vec) {
16
+ const buf = Buffer.alloc(vec.length * 4);
17
+ for (let i = 0; i < vec.length; i++) {
18
+ buf.writeFloatLE(vec[i], i * 4);
19
+ }
20
+ return buf;
21
+ }
22
+ /**
23
+ * Parse a raw FT.SEARCH response from iovalkey's client.call().
24
+ *
25
+ * iovalkey returns FT.SEARCH results in the following shape:
26
+ * [totalCount, key1, [field1, val1, field2, val2, ...], key2, [...], ...]
27
+ *
28
+ * - totalCount is a string (e.g. "2")
29
+ * - Each key is a string
30
+ * - Each field list is a flat string array: [fieldName, value, fieldName, value, ...]
31
+ *
32
+ * Returns an array of { key: string, fields: Record<string, string> }.
33
+ * Returns [] if totalCount is "0" or the response is empty/malformed.
34
+ * Never throws — on any parse error, returns [].
35
+ */
36
+ function parseFtSearchResponse(raw) {
37
+ try {
38
+ if (!Array.isArray(raw) || raw.length < 1) {
39
+ return [];
40
+ }
41
+ const totalCount = typeof raw[0] === 'string' ? parseInt(raw[0], 10) : Number(raw[0]);
42
+ if (!totalCount || totalCount <= 0) {
43
+ return [];
44
+ }
45
+ const results = [];
46
+ let i = 1;
47
+ while (i < raw.length) {
48
+ const key = raw[i];
49
+ if (typeof key !== 'string') {
50
+ i++;
51
+ continue;
52
+ }
53
+ const fieldList = raw[i + 1];
54
+ const fields = {};
55
+ if (Array.isArray(fieldList)) {
56
+ const len = fieldList.length - (fieldList.length % 2);
57
+ for (let j = 0; j < len; j += 2) {
58
+ const fieldName = String(fieldList[j]);
59
+ const fieldValue = String(fieldList[j + 1]);
60
+ fields[fieldName] = fieldValue;
61
+ }
62
+ i += 2;
63
+ }
64
+ else {
65
+ // No field list follows the key (e.g. RETURN 0 mode)
66
+ results.push({ key, fields });
67
+ i++;
68
+ continue;
69
+ }
70
+ results.push({ key, fields });
71
+ }
72
+ return results;
73
+ }
74
+ catch {
75
+ return [];
76
+ }
77
+ }
package/package.json ADDED
@@ -0,0 +1,69 @@
1
+ {
2
+ "name": "@betterdb/semantic-cache",
3
+ "version": "0.1.0",
4
+ "description": "Valkey-native semantic cache for LLM applications with built-in OpenTelemetry and Prometheus instrumentation",
5
+ "keywords": [
6
+ "valkey",
7
+ "redis",
8
+ "semantic-cache",
9
+ "llm",
10
+ "opentelemetry",
11
+ "prometheus"
12
+ ],
13
+ "license": "MIT",
14
+ "repository": {
15
+ "type": "git",
16
+ "url": "https://github.com/BetterDB-inc/monitor",
17
+ "directory": "packages/semantic-cache"
18
+ },
19
+ "main": "./dist/index.js",
20
+ "types": "./dist/index.d.ts",
21
+ "exports": {
22
+ ".": {
23
+ "import": "./dist/index.js",
24
+ "require": "./dist/index.js",
25
+ "types": "./dist/index.d.ts"
26
+ },
27
+ "./langchain": {
28
+ "import": "./dist/adapters/langchain.js",
29
+ "require": "./dist/adapters/langchain.js",
30
+ "types": "./dist/adapters/langchain.d.ts"
31
+ },
32
+ "./ai": {
33
+ "import": "./dist/adapters/ai.js",
34
+ "require": "./dist/adapters/ai.js",
35
+ "types": "./dist/adapters/ai.d.ts"
36
+ }
37
+ },
38
+ "files": [
39
+ "dist",
40
+ "README.md"
41
+ ],
42
+ "scripts": {
43
+ "build": "tsc",
44
+ "typecheck": "tsc --noEmit",
45
+ "test": "vitest run",
46
+ "test:watch": "vitest",
47
+ "clean": "rm -rf dist"
48
+ },
49
+ "dependencies": {
50
+ "@opentelemetry/api": "^1.9.0",
51
+ "prom-client": "^15.1.3"
52
+ },
53
+ "engines": {
54
+ "node": ">=20.0.0"
55
+ },
56
+ "peerDependencies": {
57
+ "iovalkey": ">=0.3.0",
58
+ "@langchain/core": ">=0.3.0",
59
+ "ai": ">=4.0.0"
60
+ },
61
+ "peerDependenciesMeta": {
62
+ "@langchain/core": {
63
+ "optional": true
64
+ },
65
+ "ai": {
66
+ "optional": true
67
+ }
68
+ }
69
+ }