@betterdb/semantic-cache 0.1.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +233 -124
- package/dist/SemanticCache.d.ts +127 -7
- package/dist/SemanticCache.js +867 -48
- package/dist/adapters/ai.js +6 -1
- package/dist/adapters/anthropic.d.ts +32 -0
- package/dist/adapters/anthropic.js +94 -0
- package/dist/adapters/langchain.js +6 -1
- package/dist/adapters/langgraph.d.ts +104 -0
- package/dist/adapters/langgraph.js +271 -0
- package/dist/adapters/llamaindex.d.ts +32 -0
- package/dist/adapters/llamaindex.js +76 -0
- package/dist/adapters/openai-responses.d.ts +31 -0
- package/dist/adapters/openai-responses.js +112 -0
- package/dist/adapters/openai.d.ts +42 -0
- package/dist/adapters/openai.js +97 -0
- package/dist/analytics.d.ts +24 -0
- package/dist/analytics.js +116 -0
- package/dist/cluster.d.ts +10 -0
- package/dist/cluster.js +43 -0
- package/dist/defaultCostTable.d.ts +11 -0
- package/dist/defaultCostTable.js +1976 -0
- package/dist/discovery.d.ts +67 -0
- package/dist/discovery.js +140 -0
- package/dist/embed/bedrock.d.ts +32 -0
- package/dist/embed/bedrock.js +109 -0
- package/dist/embed/cohere.d.ts +34 -0
- package/dist/embed/cohere.js +37 -0
- package/dist/embed/ollama.d.ts +30 -0
- package/dist/embed/ollama.js +24 -0
- package/dist/embed/openai.d.ts +31 -0
- package/dist/embed/openai.js +66 -0
- package/dist/embed/voyage.d.ts +31 -0
- package/dist/embed/voyage.js +32 -0
- package/dist/index.d.ts +8 -1
- package/dist/index.js +13 -1
- package/dist/normalizer.d.ts +68 -0
- package/dist/normalizer.js +102 -0
- package/dist/telemetry.d.ts +5 -0
- package/dist/telemetry.js +30 -0
- package/dist/types.d.ts +128 -7
- package/dist/utils.d.ts +58 -0
- package/dist/utils.js +30 -0
- package/package.json +81 -6
package/dist/index.js
CHANGED
|
@@ -1,9 +1,21 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.ValkeyCommandError = exports.EmbeddingError = exports.SemanticCacheUsageError = exports.SemanticCache = void 0;
|
|
3
|
+
exports.defaultNormalizer = exports.composeNormalizer = exports.passthrough = exports.fetchAndHash = exports.hashUrl = exports.hashBytes = exports.hashBase64 = exports.escapeTag = exports.ValkeyCommandError = exports.EmbeddingError = exports.SemanticCacheUsageError = exports.DEFAULT_COST_TABLE = exports.SemanticCache = void 0;
|
|
4
4
|
var SemanticCache_1 = require("./SemanticCache");
|
|
5
5
|
Object.defineProperty(exports, "SemanticCache", { enumerable: true, get: function () { return SemanticCache_1.SemanticCache; } });
|
|
6
|
+
var defaultCostTable_1 = require("./defaultCostTable");
|
|
7
|
+
Object.defineProperty(exports, "DEFAULT_COST_TABLE", { enumerable: true, get: function () { return defaultCostTable_1.DEFAULT_COST_TABLE; } });
|
|
6
8
|
var errors_1 = require("./errors");
|
|
7
9
|
Object.defineProperty(exports, "SemanticCacheUsageError", { enumerable: true, get: function () { return errors_1.SemanticCacheUsageError; } });
|
|
8
10
|
Object.defineProperty(exports, "EmbeddingError", { enumerable: true, get: function () { return errors_1.EmbeddingError; } });
|
|
9
11
|
Object.defineProperty(exports, "ValkeyCommandError", { enumerable: true, get: function () { return errors_1.ValkeyCommandError; } });
|
|
12
|
+
var utils_1 = require("./utils");
|
|
13
|
+
Object.defineProperty(exports, "escapeTag", { enumerable: true, get: function () { return utils_1.escapeTag; } });
|
|
14
|
+
var normalizer_1 = require("./normalizer");
|
|
15
|
+
Object.defineProperty(exports, "hashBase64", { enumerable: true, get: function () { return normalizer_1.hashBase64; } });
|
|
16
|
+
Object.defineProperty(exports, "hashBytes", { enumerable: true, get: function () { return normalizer_1.hashBytes; } });
|
|
17
|
+
Object.defineProperty(exports, "hashUrl", { enumerable: true, get: function () { return normalizer_1.hashUrl; } });
|
|
18
|
+
Object.defineProperty(exports, "fetchAndHash", { enumerable: true, get: function () { return normalizer_1.fetchAndHash; } });
|
|
19
|
+
Object.defineProperty(exports, "passthrough", { enumerable: true, get: function () { return normalizer_1.passthrough; } });
|
|
20
|
+
Object.defineProperty(exports, "composeNormalizer", { enumerable: true, get: function () { return normalizer_1.composeNormalizer; } });
|
|
21
|
+
Object.defineProperty(exports, "defaultNormalizer", { enumerable: true, get: function () { return normalizer_1.defaultNormalizer; } });
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
export interface BinaryRef {
|
|
2
|
+
kind: "image" | "audio" | "document";
|
|
3
|
+
source: {
|
|
4
|
+
type: "base64";
|
|
5
|
+
data: string;
|
|
6
|
+
mediaType?: string;
|
|
7
|
+
} | {
|
|
8
|
+
type: "url";
|
|
9
|
+
url: string;
|
|
10
|
+
} | {
|
|
11
|
+
type: "fileId";
|
|
12
|
+
fileId: string;
|
|
13
|
+
provider: string;
|
|
14
|
+
} | {
|
|
15
|
+
type: "bytes";
|
|
16
|
+
data: Uint8Array | Buffer;
|
|
17
|
+
};
|
|
18
|
+
context?: Record<string, unknown>;
|
|
19
|
+
}
|
|
20
|
+
export type BinaryNormalizer = (ref: BinaryRef) => Promise<string>;
|
|
21
|
+
export interface NormalizerConfig {
|
|
22
|
+
base64?: (data: string) => string | Promise<string>;
|
|
23
|
+
url?: (urlStr: string) => string | Promise<string>;
|
|
24
|
+
fileId?: (fileId: string, provider: string) => string | Promise<string>;
|
|
25
|
+
bytes?: (data: Uint8Array | Buffer) => string | Promise<string>;
|
|
26
|
+
byKind?: {
|
|
27
|
+
image?: BinaryNormalizer;
|
|
28
|
+
audio?: BinaryNormalizer;
|
|
29
|
+
document?: BinaryNormalizer;
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Strip any "data:<mime>;base64," prefix, decode the base64 bytes,
|
|
34
|
+
* and return "sha256:<hex>" of the decoded bytes.
|
|
35
|
+
*/
|
|
36
|
+
export declare function hashBase64(data: string): string;
|
|
37
|
+
/**
|
|
38
|
+
* Return "sha256:<hex>" of the raw bytes.
|
|
39
|
+
*/
|
|
40
|
+
export declare function hashBytes(data: Uint8Array | Buffer): string;
|
|
41
|
+
/**
|
|
42
|
+
* Normalize a URL: lowercase scheme+host, drop default ports (80/443),
|
|
43
|
+
* sort query params, return "url:<normalized>".
|
|
44
|
+
*/
|
|
45
|
+
export declare function hashUrl(urlStr: string): string;
|
|
46
|
+
/**
|
|
47
|
+
* Fetch a URL, throw if response is not ok, and return "sha256:<hex>"
|
|
48
|
+
* of the response body bytes.
|
|
49
|
+
*/
|
|
50
|
+
export declare function fetchAndHash(url: string): Promise<string>;
|
|
51
|
+
/**
|
|
52
|
+
* Return a scheme-prefixed reference without any normalization:
|
|
53
|
+
* - base64 source -> "base64:<data>"
|
|
54
|
+
* - url source -> "url:<url>"
|
|
55
|
+
* - fileId source -> "fileid:<provider>:<fileId>"
|
|
56
|
+
* - bytes source -> "sha256:<hex>" (hashes the bytes)
|
|
57
|
+
*/
|
|
58
|
+
export declare function passthrough(ref: BinaryRef): string;
|
|
59
|
+
/**
|
|
60
|
+
* Build a BinaryNormalizer from a config.
|
|
61
|
+
*
|
|
62
|
+
* Dispatch order:
|
|
63
|
+
* 1. If cfg.byKind[ref.kind] is defined, call it with the full BinaryRef.
|
|
64
|
+
* 2. Otherwise dispatch on ref.source.type using the per-source handlers.
|
|
65
|
+
* 3. Fall back to passthrough for any unhandled source types.
|
|
66
|
+
*/
|
|
67
|
+
export declare function composeNormalizer(cfg?: NormalizerConfig): BinaryNormalizer;
|
|
68
|
+
export declare const defaultNormalizer: BinaryNormalizer;
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.defaultNormalizer = void 0;
|
|
4
|
+
exports.hashBase64 = hashBase64;
|
|
5
|
+
exports.hashBytes = hashBytes;
|
|
6
|
+
exports.hashUrl = hashUrl;
|
|
7
|
+
exports.fetchAndHash = fetchAndHash;
|
|
8
|
+
exports.passthrough = passthrough;
|
|
9
|
+
exports.composeNormalizer = composeNormalizer;
|
|
10
|
+
const node_crypto_1 = require("node:crypto");
|
|
11
|
+
// --- Normalizer functions ---
|
|
12
|
+
/**
|
|
13
|
+
* Strip any "data:<mime>;base64," prefix, decode the base64 bytes,
|
|
14
|
+
* and return "sha256:<hex>" of the decoded bytes.
|
|
15
|
+
*/
|
|
16
|
+
function hashBase64(data) {
|
|
17
|
+
const raw = data.includes(";base64,") ? data.split(";base64,")[1] : data;
|
|
18
|
+
const bytes = Buffer.from(raw, "base64");
|
|
19
|
+
return "sha256:" + (0, node_crypto_1.createHash)("sha256").update(bytes).digest("hex");
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Return "sha256:<hex>" of the raw bytes.
|
|
23
|
+
*/
|
|
24
|
+
function hashBytes(data) {
|
|
25
|
+
return "sha256:" + (0, node_crypto_1.createHash)("sha256").update(data).digest("hex");
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Normalize a URL: lowercase scheme+host, drop default ports (80/443),
|
|
29
|
+
* sort query params, return "url:<normalized>".
|
|
30
|
+
*/
|
|
31
|
+
function hashUrl(urlStr) {
|
|
32
|
+
const url = new URL(urlStr);
|
|
33
|
+
// URL constructor lowercases scheme and hostname; also drops default ports
|
|
34
|
+
url.searchParams.sort();
|
|
35
|
+
return "url:" + url.toString();
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Fetch a URL, throw if response is not ok, and return "sha256:<hex>"
|
|
39
|
+
* of the response body bytes.
|
|
40
|
+
*/
|
|
41
|
+
async function fetchAndHash(url) {
|
|
42
|
+
const res = await fetch(url);
|
|
43
|
+
if (!res.ok) {
|
|
44
|
+
throw new Error(`fetchAndHash: HTTP ${res.status} for ${url}`);
|
|
45
|
+
}
|
|
46
|
+
const buf = await res.arrayBuffer();
|
|
47
|
+
return "sha256:" + (0, node_crypto_1.createHash)("sha256").update(Buffer.from(buf)).digest("hex");
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Return a scheme-prefixed reference without any normalization:
|
|
51
|
+
* - base64 source -> "base64:<data>"
|
|
52
|
+
* - url source -> "url:<url>"
|
|
53
|
+
* - fileId source -> "fileid:<provider>:<fileId>"
|
|
54
|
+
* - bytes source -> "sha256:<hex>" (hashes the bytes)
|
|
55
|
+
*/
|
|
56
|
+
function passthrough(ref) {
|
|
57
|
+
const { source } = ref;
|
|
58
|
+
switch (source.type) {
|
|
59
|
+
case "base64":
|
|
60
|
+
return "base64:" + source.data;
|
|
61
|
+
case "url":
|
|
62
|
+
return "url:" + source.url;
|
|
63
|
+
case "fileId":
|
|
64
|
+
return "fileid:" + source.provider + ":" + source.fileId;
|
|
65
|
+
case "bytes":
|
|
66
|
+
return hashBytes(source.data);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
// --- Factory ---
|
|
70
|
+
/**
|
|
71
|
+
* Build a BinaryNormalizer from a config.
|
|
72
|
+
*
|
|
73
|
+
* Dispatch order:
|
|
74
|
+
* 1. If cfg.byKind[ref.kind] is defined, call it with the full BinaryRef.
|
|
75
|
+
* 2. Otherwise dispatch on ref.source.type using the per-source handlers.
|
|
76
|
+
* 3. Fall back to passthrough for any unhandled source types.
|
|
77
|
+
*/
|
|
78
|
+
function composeNormalizer(cfg = {}) {
|
|
79
|
+
return async (ref) => {
|
|
80
|
+
// byKind takes priority
|
|
81
|
+
const kindFn = cfg.byKind?.[ref.kind];
|
|
82
|
+
if (kindFn)
|
|
83
|
+
return kindFn(ref);
|
|
84
|
+
const { source } = ref;
|
|
85
|
+
switch (source.type) {
|
|
86
|
+
case "base64":
|
|
87
|
+
return cfg.base64 ? cfg.base64(source.data) : passthrough(ref);
|
|
88
|
+
case "url":
|
|
89
|
+
return cfg.url ? cfg.url(source.url) : passthrough(ref);
|
|
90
|
+
case "fileId":
|
|
91
|
+
return cfg.fileId
|
|
92
|
+
? cfg.fileId(source.fileId, source.provider)
|
|
93
|
+
: passthrough(ref);
|
|
94
|
+
case "bytes":
|
|
95
|
+
return cfg.bytes ? cfg.bytes(source.data) : passthrough(ref);
|
|
96
|
+
}
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
exports.defaultNormalizer = composeNormalizer({
|
|
100
|
+
base64: hashBase64,
|
|
101
|
+
bytes: hashBytes,
|
|
102
|
+
});
|
package/dist/telemetry.d.ts
CHANGED
|
@@ -10,6 +10,11 @@ interface CacheMetrics {
|
|
|
10
10
|
similarityScore: Histogram;
|
|
11
11
|
operationDuration: Histogram;
|
|
12
12
|
embeddingDuration: Histogram;
|
|
13
|
+
costSavedTotal: Counter;
|
|
14
|
+
embeddingCacheTotal: Counter;
|
|
15
|
+
staleModelEvictions: Counter;
|
|
16
|
+
discoveryWriteFailed: Counter;
|
|
17
|
+
configRefreshFailed: Counter;
|
|
13
18
|
}
|
|
14
19
|
export interface Telemetry {
|
|
15
20
|
tracer: Tracer;
|
package/dist/telemetry.js
CHANGED
|
@@ -42,6 +42,31 @@ function createTelemetry(opts) {
|
|
|
42
42
|
labelNames: ['cache_name'],
|
|
43
43
|
buckets: operationBuckets,
|
|
44
44
|
});
|
|
45
|
+
const costSavedTotal = getOrCreateCounter(registry, {
|
|
46
|
+
name: `${opts.prefix}_cost_saved_total`,
|
|
47
|
+
help: 'Estimated cost saved in dollars from semantic cache hits',
|
|
48
|
+
labelNames: ['cache_name', 'category'],
|
|
49
|
+
});
|
|
50
|
+
const embeddingCacheTotal = getOrCreateCounter(registry, {
|
|
51
|
+
name: `${opts.prefix}_embedding_cache_total`,
|
|
52
|
+
help: 'Total embedding cache lookups (hit or miss)',
|
|
53
|
+
labelNames: ['cache_name', 'result'],
|
|
54
|
+
});
|
|
55
|
+
const staleModelEvictions = getOrCreateCounter(registry, {
|
|
56
|
+
name: `${opts.prefix}_stale_model_evictions_total`,
|
|
57
|
+
help: 'Entries evicted due to staleAfterModelChange detection',
|
|
58
|
+
labelNames: ['cache_name'],
|
|
59
|
+
});
|
|
60
|
+
const discoveryWriteFailed = getOrCreateCounter(registry, {
|
|
61
|
+
name: `${opts.prefix}_discovery_write_failed_total`,
|
|
62
|
+
help: 'Count of failed discovery-marker writes (best-effort HGET/HSET/SET operations against __betterdb:* keys)',
|
|
63
|
+
labelNames: ['cache_name'],
|
|
64
|
+
});
|
|
65
|
+
const configRefreshFailed = getOrCreateCounter(registry, {
|
|
66
|
+
name: `${opts.prefix}_config_refresh_failed_total`,
|
|
67
|
+
help: 'Count of failed periodic config refreshes (HGETALL on __config).',
|
|
68
|
+
labelNames: ['cache_name'],
|
|
69
|
+
});
|
|
45
70
|
return {
|
|
46
71
|
tracer,
|
|
47
72
|
metrics: {
|
|
@@ -49,6 +74,11 @@ function createTelemetry(opts) {
|
|
|
49
74
|
similarityScore,
|
|
50
75
|
operationDuration,
|
|
51
76
|
embeddingDuration,
|
|
77
|
+
costSavedTotal,
|
|
78
|
+
embeddingCacheTotal,
|
|
79
|
+
staleModelEvictions,
|
|
80
|
+
discoveryWriteFailed,
|
|
81
|
+
configRefreshFailed,
|
|
52
82
|
},
|
|
53
83
|
};
|
|
54
84
|
}
|
package/dist/types.d.ts
CHANGED
|
@@ -1,7 +1,18 @@
|
|
|
1
1
|
import type Valkey from 'iovalkey';
|
|
2
2
|
import type { Registry } from 'prom-client';
|
|
3
|
+
import type { DiscoveryOptions } from './discovery';
|
|
3
4
|
export type { Valkey };
|
|
5
|
+
export interface ConfigRefreshOptions {
|
|
6
|
+
/** Enable periodic config refresh from Valkey. Default: true. */
|
|
7
|
+
enabled?: boolean;
|
|
8
|
+
/** Refresh interval in milliseconds. Default: 30000. Minimum: 1000. */
|
|
9
|
+
intervalMs?: number;
|
|
10
|
+
}
|
|
4
11
|
export type EmbedFn = (text: string) => Promise<number[]>;
|
|
12
|
+
export interface ModelCost {
|
|
13
|
+
inputPer1k: number;
|
|
14
|
+
outputPer1k: number;
|
|
15
|
+
}
|
|
5
16
|
export interface SemanticCacheOptions {
|
|
6
17
|
/** Index name prefix used for Valkey keys. Default: 'betterdb_scache'. */
|
|
7
18
|
name?: string;
|
|
@@ -9,6 +20,16 @@ export interface SemanticCacheOptions {
|
|
|
9
20
|
client: Valkey;
|
|
10
21
|
/** Async function that returns a float embedding vector for a text string. Required. */
|
|
11
22
|
embedFn: EmbedFn;
|
|
23
|
+
/**
|
|
24
|
+
* Model pricing for cost savings tracking. Optional.
|
|
25
|
+
* Keys are model names (e.g. 'gpt-4o'), values are per-1k-token costs.
|
|
26
|
+
*/
|
|
27
|
+
costTable?: Record<string, ModelCost>;
|
|
28
|
+
/**
|
|
29
|
+
* Use bundled default cost table from LiteLLM. User costTable entries override defaults.
|
|
30
|
+
* Default: true.
|
|
31
|
+
*/
|
|
32
|
+
useDefaultCostTable?: boolean;
|
|
12
33
|
/**
|
|
13
34
|
* Default similarity threshold as cosine DISTANCE (0–2 scale, lower = more similar).
|
|
14
35
|
* A lookup is a hit when score <= threshold. Default: 0.1.
|
|
@@ -39,6 +60,22 @@ export interface SemanticCacheOptions {
|
|
|
39
60
|
* Default: 0.05. Set to 0 to disable uncertainty flagging (all hits are 'high').
|
|
40
61
|
*/
|
|
41
62
|
uncertaintyBand?: number;
|
|
63
|
+
/**
|
|
64
|
+
* Pluggable binary content normalizer for stable hashing of images, audio, and documents.
|
|
65
|
+
* Default: passthrough (uses the ref string as-is).
|
|
66
|
+
* Pass this to adapter prepareSemanticParams() calls to share the same normalization strategy.
|
|
67
|
+
*/
|
|
68
|
+
normalizer?: import('./normalizer').BinaryNormalizer;
|
|
69
|
+
/**
|
|
70
|
+
* Embedding cache configuration. When enabled, computed embeddings are stored in Valkey
|
|
71
|
+
* so that repeated check() calls on the same text skip the embedFn call.
|
|
72
|
+
*/
|
|
73
|
+
embeddingCache?: {
|
|
74
|
+
/** Enable embedding caching. Default: true. */
|
|
75
|
+
enabled?: boolean;
|
|
76
|
+
/** TTL for cached embeddings in seconds. Default: 86400 (24 hours). */
|
|
77
|
+
ttl?: number;
|
|
78
|
+
};
|
|
42
79
|
telemetry?: {
|
|
43
80
|
/** OTel tracer name. Default: '@betterdb/semantic-cache'. */
|
|
44
81
|
tracerName?: string;
|
|
@@ -52,11 +89,50 @@ export interface SemanticCacheOptions {
|
|
|
52
89
|
*/
|
|
53
90
|
registry?: Registry;
|
|
54
91
|
};
|
|
92
|
+
analytics?: {
|
|
93
|
+
/** PostHog API key. Overrides the build-time baked key if set. */
|
|
94
|
+
apiKey?: string;
|
|
95
|
+
/** PostHog host. Overrides the build-time baked host if set. */
|
|
96
|
+
host?: string;
|
|
97
|
+
/** Disable analytics. Also controlled by BETTERDB_TELEMETRY env var. */
|
|
98
|
+
disabled?: boolean;
|
|
99
|
+
/** Interval in ms for periodic stats snapshots. Default: 300_000 (5 min). 0 to disable. */
|
|
100
|
+
statsIntervalMs?: number;
|
|
101
|
+
};
|
|
102
|
+
/**
|
|
103
|
+
* Discovery-marker protocol controls. See
|
|
104
|
+
* docs/plans/specs/spec-semantic-cache-discovery-markers.md.
|
|
105
|
+
* Defaults: enabled=true, heartbeatIntervalMs=30000, includeCategories=true.
|
|
106
|
+
*/
|
|
107
|
+
discovery?: DiscoveryOptions;
|
|
108
|
+
/**
|
|
109
|
+
* Periodic refresh of in-memory threshold config from Valkey.
|
|
110
|
+
* When enabled, the cache re-reads `{name}:__config` on the configured
|
|
111
|
+
* interval. Field `threshold` updates `defaultThreshold`; fields named
|
|
112
|
+
* `threshold:{category}` update `categoryThresholds[category]`.
|
|
113
|
+
* Defaults: enabled=true, intervalMs=30000.
|
|
114
|
+
*/
|
|
115
|
+
configRefresh?: ConfigRefreshOptions;
|
|
116
|
+
}
|
|
117
|
+
export interface RerankOptions {
|
|
118
|
+
/**
|
|
119
|
+
* Number of top-k candidates to retrieve before reranking.
|
|
120
|
+
* A higher k gives the rerankFn more candidates to choose from.
|
|
121
|
+
*/
|
|
122
|
+
k: number;
|
|
123
|
+
/**
|
|
124
|
+
* Function that receives the query text and ranked candidates, and returns
|
|
125
|
+
* the index of the best candidate. Return -1 to reject all candidates (miss).
|
|
126
|
+
*/
|
|
127
|
+
rerankFn: (query: string, candidates: Array<{
|
|
128
|
+
response: string;
|
|
129
|
+
similarity: number;
|
|
130
|
+
}>) => Promise<number>;
|
|
55
131
|
}
|
|
56
132
|
export interface CacheCheckOptions {
|
|
57
|
-
/** Per-request threshold override (cosine distance 0
|
|
133
|
+
/** Per-request threshold override (cosine distance 0-2). Highest priority. */
|
|
58
134
|
threshold?: number;
|
|
59
|
-
/** Category tag
|
|
135
|
+
/** Category tag - used for per-category threshold lookup and metric labels. */
|
|
60
136
|
category?: string;
|
|
61
137
|
/**
|
|
62
138
|
* Additional FT.SEARCH pre-filter expression.
|
|
@@ -64,16 +140,33 @@ export interface CacheCheckOptions {
|
|
|
64
140
|
* Applied as: "({filter})=>[KNN {k} @embedding $vec AS __score]"
|
|
65
141
|
*
|
|
66
142
|
* **Security note:** this string is interpolated directly into the FT.SEARCH
|
|
67
|
-
* query. Only pass trusted, programmatically-constructed expressions
|
|
143
|
+
* query. Only pass trusted, programmatically-constructed expressions - never
|
|
68
144
|
* unsanitised user input.
|
|
69
145
|
*/
|
|
70
146
|
filter?: string;
|
|
71
147
|
/**
|
|
72
148
|
* Number of nearest neighbours to fetch via KNN. Default: 1.
|
|
73
|
-
*
|
|
74
|
-
* Values > 1 are reserved for future multi-candidate support.
|
|
149
|
+
* Ignored when rerank is set (rerank.k takes precedence).
|
|
75
150
|
*/
|
|
76
151
|
k?: number;
|
|
152
|
+
/**
|
|
153
|
+
* When true, a cache hit whose stored model differs from currentModel is
|
|
154
|
+
* treated as a miss and the stale entry is deleted. Useful for automatically
|
|
155
|
+
* evicting cache entries when you upgrade the model you use for a given prompt.
|
|
156
|
+
* Requires currentModel to be set.
|
|
157
|
+
* Default: false.
|
|
158
|
+
*/
|
|
159
|
+
staleAfterModelChange?: boolean;
|
|
160
|
+
/** The model name to compare against stored entries when staleAfterModelChange is true. */
|
|
161
|
+
currentModel?: string;
|
|
162
|
+
/**
|
|
163
|
+
* Optional rerank hook. When set, FT.SEARCH retrieves rerank.k candidates
|
|
164
|
+
* and passes them to rerank.rerankFn. The function returns the index of the
|
|
165
|
+
* best candidate, or -1 to treat all as a miss.
|
|
166
|
+
* The threshold is NOT applied to the reranked pick unless you filter candidates
|
|
167
|
+
* in rerankFn yourself.
|
|
168
|
+
*/
|
|
169
|
+
rerank?: RerankOptions;
|
|
77
170
|
}
|
|
78
171
|
export interface CacheStoreOptions {
|
|
79
172
|
/** Per-entry TTL in seconds. Overrides SemanticCacheOptions.defaultTtl. */
|
|
@@ -84,16 +177,33 @@ export interface CacheStoreOptions {
|
|
|
84
177
|
model?: string;
|
|
85
178
|
/**
|
|
86
179
|
* Arbitrary metadata stored as JSON alongside the entry.
|
|
87
|
-
* Stored for external consumption (e.g. BetterDB Monitor)
|
|
180
|
+
* Stored for external consumption (e.g. BetterDB Monitor) - not returned by check().
|
|
88
181
|
*/
|
|
89
182
|
metadata?: Record<string, string | number>;
|
|
183
|
+
/**
|
|
184
|
+
* Number of input tokens used to generate the cached response.
|
|
185
|
+
* When provided along with outputTokens and model, the cost is computed and stored.
|
|
186
|
+
* On future cache hits, the stored cost is reported as costSaved in CacheCheckResult.
|
|
187
|
+
*/
|
|
188
|
+
inputTokens?: number;
|
|
189
|
+
/**
|
|
190
|
+
* Number of output tokens in the cached response.
|
|
191
|
+
* See inputTokens for full description.
|
|
192
|
+
*/
|
|
193
|
+
outputTokens?: number;
|
|
194
|
+
/** LLM sampling temperature stored as a NUMERIC field for opt-in filtering. */
|
|
195
|
+
temperature?: number;
|
|
196
|
+
/** Top-p nucleus sampling parameter stored as a NUMERIC field for opt-in filtering. */
|
|
197
|
+
topP?: number;
|
|
198
|
+
/** Random seed stored as a NUMERIC field for opt-in filtering. */
|
|
199
|
+
seed?: number;
|
|
90
200
|
}
|
|
91
201
|
export type CacheConfidence = 'high' | 'uncertain' | 'miss';
|
|
92
202
|
export interface CacheCheckResult {
|
|
93
203
|
hit: boolean;
|
|
94
204
|
response?: string;
|
|
95
205
|
/**
|
|
96
|
-
* Cosine distance score (0
|
|
206
|
+
* Cosine distance score (0-2). Present when a nearest neighbour was found,
|
|
97
207
|
* regardless of whether it was a hit or miss.
|
|
98
208
|
*/
|
|
99
209
|
similarity?: number;
|
|
@@ -118,6 +228,15 @@ export interface CacheCheckResult {
|
|
|
118
228
|
similarity: number;
|
|
119
229
|
deltaToThreshold: number;
|
|
120
230
|
};
|
|
231
|
+
/**
|
|
232
|
+
* Estimated cost saved (in dollars) by returning this cached result instead of calling the LLM.
|
|
233
|
+
* Present on hit when the original store() call included inputTokens/outputTokens and model.
|
|
234
|
+
*/
|
|
235
|
+
costSaved?: number;
|
|
236
|
+
/**
|
|
237
|
+
* Structured response content blocks. Present on hit when the entry was stored via storeMultipart().
|
|
238
|
+
*/
|
|
239
|
+
contentBlocks?: import('./utils').ContentBlock[];
|
|
121
240
|
}
|
|
122
241
|
export interface InvalidateResult {
|
|
123
242
|
/** Number of entries deleted in this call. */
|
|
@@ -133,6 +252,8 @@ export interface CacheStats {
|
|
|
133
252
|
misses: number;
|
|
134
253
|
total: number;
|
|
135
254
|
hitRate: number;
|
|
255
|
+
/** Accumulated cost saved in microdollars (divide by 1_000_000 for dollars). */
|
|
256
|
+
costSavedMicros: number;
|
|
136
257
|
}
|
|
137
258
|
export interface IndexInfo {
|
|
138
259
|
name: string;
|
package/dist/utils.d.ts
CHANGED
|
@@ -1,5 +1,63 @@
|
|
|
1
1
|
/** SHA-256 hex digest of a string. */
|
|
2
2
|
export declare function sha256(text: string): string;
|
|
3
|
+
/** Escape a string for safe use as a Valkey Search TAG filter value.
|
|
4
|
+
* Spaces are included because Valkey Search treats unescaped spaces as term
|
|
5
|
+
* separators (OR semantics), which would broaden the filter unintentionally.
|
|
6
|
+
*/
|
|
7
|
+
export declare function escapeTag(value: string): string;
|
|
8
|
+
export type ContentBlock = TextBlock | BinaryBlock | ToolCallBlock | ToolResultBlock | ReasoningBlock;
|
|
9
|
+
export interface TextBlock {
|
|
10
|
+
type: 'text';
|
|
11
|
+
text: string;
|
|
12
|
+
hints?: BlockHints;
|
|
13
|
+
}
|
|
14
|
+
export interface BinaryBlock {
|
|
15
|
+
type: 'binary';
|
|
16
|
+
kind: 'image' | 'audio' | 'document';
|
|
17
|
+
mediaType: string;
|
|
18
|
+
ref: string;
|
|
19
|
+
detail?: 'auto' | 'low' | 'high' | 'original';
|
|
20
|
+
filename?: string;
|
|
21
|
+
hints?: BlockHints;
|
|
22
|
+
}
|
|
23
|
+
export interface ToolCallBlock {
|
|
24
|
+
type: 'tool_call';
|
|
25
|
+
id: string;
|
|
26
|
+
name: string;
|
|
27
|
+
args: unknown;
|
|
28
|
+
hints?: BlockHints;
|
|
29
|
+
}
|
|
30
|
+
export interface ToolResultBlock {
|
|
31
|
+
type: 'tool_result';
|
|
32
|
+
toolCallId: string;
|
|
33
|
+
content: Array<TextBlock | BinaryBlock>;
|
|
34
|
+
isError?: boolean;
|
|
35
|
+
hints?: BlockHints;
|
|
36
|
+
}
|
|
37
|
+
export interface ReasoningBlock {
|
|
38
|
+
type: 'reasoning';
|
|
39
|
+
text: string;
|
|
40
|
+
opaqueSignature?: string;
|
|
41
|
+
redacted?: boolean;
|
|
42
|
+
hints?: BlockHints;
|
|
43
|
+
}
|
|
44
|
+
export interface BlockHints {
|
|
45
|
+
anthropicCacheControl?: {
|
|
46
|
+
type: 'ephemeral';
|
|
47
|
+
ttl?: '5m' | '1h';
|
|
48
|
+
};
|
|
49
|
+
[k: string]: unknown;
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Extract all text from a ContentBlock array, joining TextBlock.text values with a space.
|
|
53
|
+
* Used to derive the embedding text from a multi-modal prompt.
|
|
54
|
+
*/
|
|
55
|
+
export declare function extractText(blocks: ContentBlock[]): string;
|
|
56
|
+
/**
|
|
57
|
+
* Extract all binary refs from a ContentBlock array, sorted for stability.
|
|
58
|
+
* Used for the binary_refs TAG field on cache entries.
|
|
59
|
+
*/
|
|
60
|
+
export declare function extractBinaryRefs(blocks: ContentBlock[]): string[];
|
|
3
61
|
/**
|
|
4
62
|
* Encode number[] as a little-endian Float32 Buffer.
|
|
5
63
|
* Used to store embeddings as binary HSET field values.
|
package/dist/utils.js
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.sha256 = sha256;
|
|
4
|
+
exports.escapeTag = escapeTag;
|
|
5
|
+
exports.extractText = extractText;
|
|
6
|
+
exports.extractBinaryRefs = extractBinaryRefs;
|
|
4
7
|
exports.encodeFloat32 = encodeFloat32;
|
|
5
8
|
exports.parseFtSearchResponse = parseFtSearchResponse;
|
|
6
9
|
const node_crypto_1 = require("node:crypto");
|
|
@@ -8,6 +11,33 @@ const node_crypto_1 = require("node:crypto");
|
|
|
8
11
|
function sha256(text) {
|
|
9
12
|
return (0, node_crypto_1.createHash)('sha256').update(text).digest('hex');
|
|
10
13
|
}
|
|
14
|
+
/** Escape a string for safe use as a Valkey Search TAG filter value.
|
|
15
|
+
* Spaces are included because Valkey Search treats unescaped spaces as term
|
|
16
|
+
* separators (OR semantics), which would broaden the filter unintentionally.
|
|
17
|
+
*/
|
|
18
|
+
function escapeTag(value) {
|
|
19
|
+
return value.replace(/[,.<>{}[\]"':;!@#$%^&*()\-+=~|/\\ ]/g, '\\$&');
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Extract all text from a ContentBlock array, joining TextBlock.text values with a space.
|
|
23
|
+
* Used to derive the embedding text from a multi-modal prompt.
|
|
24
|
+
*/
|
|
25
|
+
function extractText(blocks) {
|
|
26
|
+
return blocks
|
|
27
|
+
.filter((b) => b.type === 'text')
|
|
28
|
+
.map((b) => b.text)
|
|
29
|
+
.join(' ');
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Extract all binary refs from a ContentBlock array, sorted for stability.
|
|
33
|
+
* Used for the binary_refs TAG field on cache entries.
|
|
34
|
+
*/
|
|
35
|
+
function extractBinaryRefs(blocks) {
|
|
36
|
+
return blocks
|
|
37
|
+
.filter((b) => b.type === 'binary')
|
|
38
|
+
.map((b) => b.ref)
|
|
39
|
+
.sort();
|
|
40
|
+
}
|
|
11
41
|
/**
|
|
12
42
|
* Encode number[] as a little-endian Float32 Buffer.
|
|
13
43
|
* Used to store embeddings as binary HSET field values.
|