@relayplane/proxy 1.5.46 → 1.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/README.md +251 -15
  2. package/assets/relayplane-proxy.service +20 -0
  3. package/dist/alerts.d.ts +72 -0
  4. package/dist/alerts.d.ts.map +1 -0
  5. package/dist/alerts.js +290 -0
  6. package/dist/alerts.js.map +1 -0
  7. package/dist/anomaly.d.ts +65 -0
  8. package/dist/anomaly.d.ts.map +1 -0
  9. package/dist/anomaly.js +193 -0
  10. package/dist/anomaly.js.map +1 -0
  11. package/dist/budget.d.ts +98 -0
  12. package/dist/budget.d.ts.map +1 -0
  13. package/dist/budget.js +356 -0
  14. package/dist/budget.js.map +1 -0
  15. package/dist/cli.js +512 -93
  16. package/dist/cli.js.map +1 -1
  17. package/dist/config.d.ts +28 -2
  18. package/dist/config.d.ts.map +1 -1
  19. package/dist/config.js +122 -24
  20. package/dist/config.js.map +1 -1
  21. package/dist/downgrade.d.ts +37 -0
  22. package/dist/downgrade.d.ts.map +1 -0
  23. package/dist/downgrade.js +79 -0
  24. package/dist/downgrade.js.map +1 -0
  25. package/dist/mesh/capture.d.ts +11 -0
  26. package/dist/mesh/capture.d.ts.map +1 -0
  27. package/dist/mesh/capture.js +43 -0
  28. package/dist/mesh/capture.js.map +1 -0
  29. package/dist/mesh/fitness.d.ts +14 -0
  30. package/dist/mesh/fitness.d.ts.map +1 -0
  31. package/dist/mesh/fitness.js +40 -0
  32. package/dist/mesh/fitness.js.map +1 -0
  33. package/dist/mesh/index.d.ts +39 -0
  34. package/dist/mesh/index.d.ts.map +1 -0
  35. package/dist/mesh/index.js +118 -0
  36. package/dist/mesh/index.js.map +1 -0
  37. package/dist/mesh/store.d.ts +30 -0
  38. package/dist/mesh/store.d.ts.map +1 -0
  39. package/dist/mesh/store.js +174 -0
  40. package/dist/mesh/store.js.map +1 -0
  41. package/dist/mesh/sync.d.ts +37 -0
  42. package/dist/mesh/sync.d.ts.map +1 -0
  43. package/dist/mesh/sync.js +154 -0
  44. package/dist/mesh/sync.js.map +1 -0
  45. package/dist/mesh/types.d.ts +57 -0
  46. package/dist/mesh/types.d.ts.map +1 -0
  47. package/dist/mesh/types.js +7 -0
  48. package/dist/mesh/types.js.map +1 -0
  49. package/dist/rate-limiter.d.ts +64 -0
  50. package/dist/rate-limiter.d.ts.map +1 -0
  51. package/dist/rate-limiter.js +159 -0
  52. package/dist/rate-limiter.js.map +1 -0
  53. package/dist/relay-config.d.ts +9 -0
  54. package/dist/relay-config.d.ts.map +1 -1
  55. package/dist/relay-config.js +2 -0
  56. package/dist/relay-config.js.map +1 -1
  57. package/dist/response-cache.d.ts +139 -0
  58. package/dist/response-cache.d.ts.map +1 -0
  59. package/dist/response-cache.js +515 -0
  60. package/dist/response-cache.js.map +1 -0
  61. package/dist/server.d.ts.map +1 -1
  62. package/dist/server.js +5 -1
  63. package/dist/server.js.map +1 -1
  64. package/dist/standalone-proxy.d.ts +2 -1
  65. package/dist/standalone-proxy.d.ts.map +1 -1
  66. package/dist/standalone-proxy.js +662 -26
  67. package/dist/standalone-proxy.js.map +1 -1
  68. package/dist/telemetry.d.ts.map +1 -1
  69. package/dist/telemetry.js +8 -5
  70. package/dist/telemetry.js.map +1 -1
  71. package/dist/utils/model-suggestions.d.ts.map +1 -1
  72. package/dist/utils/model-suggestions.js +19 -2
  73. package/dist/utils/model-suggestions.js.map +1 -1
  74. package/dist/utils/version-status.d.ts +9 -0
  75. package/dist/utils/version-status.d.ts.map +1 -0
  76. package/dist/utils/version-status.js +28 -0
  77. package/dist/utils/version-status.js.map +1 -0
  78. package/package.json +7 -3
@@ -0,0 +1,139 @@
1
+ /**
2
+ * RelayPlane Response Cache — Phase 1: Exact Match
3
+ *
4
+ * Caches LLM API responses locally to avoid duplicate API calls.
5
+ * SHA-256 hash of canonical request → cached response.
6
+ *
7
+ * Features:
8
+ * - In-memory LRU + disk persistence (~/.relayplane/cache/)
9
+ * - SQLite index for metadata (hit counts, cost tracking, TTL)
10
+ * - Gzipped response bodies on disk
11
+ * - Configurable TTL with task-type overrides
12
+ * - Only caches deterministic requests (temperature=0)
13
+ * - Skips responses containing tool calls
14
+ *
15
+ * @packageDocumentation
16
+ */
17
+ export interface CacheConfig {
18
+ enabled?: boolean;
19
+ /** Max in-memory cache size in MB (default: 100) */
20
+ maxSizeMb?: number;
21
+ /** Default TTL in seconds (default: 3600 = 1 hour) */
22
+ defaultTtlSeconds?: number;
23
+ /** Per-task-type TTL overrides in seconds */
24
+ ttlByTaskType?: Record<string, number>;
25
+ /** Only cache when temperature=0 or unset (default: true) */
26
+ onlyWhenDeterministic?: boolean;
27
+ /** Cache directory (default: ~/.relayplane/cache) */
28
+ cacheDir?: string;
29
+ /** Cache mode: "exact" (default) or "aggressive" */
30
+ mode?: 'exact' | 'aggressive';
31
+ /** TTL for aggressive mode in seconds (default: 1800 = 30 min) */
32
+ aggressiveMaxAge?: number;
33
+ }
34
+ export interface CacheSetOptions {
35
+ model: string;
36
+ tokensIn: number;
37
+ tokensOut: number;
38
+ costUsd: number;
39
+ taskType?: string;
40
+ }
41
+ export interface CacheStats {
42
+ totalEntries: number;
43
+ totalSizeBytes: number;
44
+ hits: number;
45
+ misses: number;
46
+ bypasses: number;
47
+ hitRate: number;
48
+ savedCostUsd: number;
49
+ savedRequests: number;
50
+ byModel: Record<string, {
51
+ hits: number;
52
+ entries: number;
53
+ savedCostUsd: number;
54
+ }>;
55
+ byTaskType: Record<string, {
56
+ hits: number;
57
+ entries: number;
58
+ savedCostUsd: number;
59
+ }>;
60
+ }
61
+ /**
62
+ * Generate a SHA-256 cache key from a request body.
63
+ * Only includes fields that affect the response content.
64
+ * Excluded: stream, provider headers, API keys.
65
+ */
66
+ export declare function computeCacheKey(requestBody: Record<string, unknown>): string;
67
+ /**
68
+ * Generate an aggressive cache key from a request body.
69
+ * Uses: system prompt + last user message + model + tools.
70
+ * Ignores: full conversation history, temperature, max_tokens, etc.
71
+ */
72
+ export declare function computeAggressiveCacheKey(requestBody: Record<string, unknown>): string;
73
+ /**
74
+ * Check if a request is deterministic (temperature=0 or unset).
75
+ */
76
+ export declare function isDeterministic(requestBody: Record<string, unknown>): boolean;
77
+ /**
78
+ * Check if a response contains tool calls.
79
+ * Note: We still cache tool call responses — agent workloads are almost
80
+ * entirely tool calls, and identical requests should return cached results.
81
+ * The caller decides whether to use this check.
82
+ */
83
+ export declare function responseHasToolCalls(responseBody: Record<string, unknown>): boolean;
84
+ export declare class ResponseCache {
85
+ private config;
86
+ private memory;
87
+ private db;
88
+ private responsesDir;
89
+ private _initialized;
90
+ private _hits;
91
+ private _misses;
92
+ private _bypasses;
93
+ private _savedCostUsd;
94
+ constructor(config?: Partial<CacheConfig>);
95
+ /** Initialize disk storage + SQLite. Safe to call multiple times. */
96
+ init(): void;
97
+ /** Returns true if the request should bypass the cache. */
98
+ shouldBypass(requestBody: Record<string, unknown>): boolean;
99
+ /** Get the cache mode */
100
+ get mode(): 'exact' | 'aggressive';
101
+ /** Compute cache key based on current mode */
102
+ computeKey(requestBody: Record<string, unknown>): string;
103
+ /** Get aggressive mode max age in seconds */
104
+ get aggressiveMaxAge(): number;
105
+ /** Look up a cached response. Returns the response string or null. */
106
+ get(hash: string): string | null;
107
+ /** Store a response in cache. */
108
+ set(hash: string, responseJson: string, opts: CacheSetOptions): void;
109
+ /** Record a cache hit (for stats). */
110
+ recordHit(savedCostUsd: number, _savedLatencyMs: number): void;
111
+ /** Record a cache miss (for stats). */
112
+ recordMiss(): void;
113
+ /** Record a cache bypass (for stats). */
114
+ recordBypass(): void;
115
+ /** Remove a single entry. */
116
+ evict(hash: string): void;
117
+ /** Clear all cached entries. */
118
+ clear(): void;
119
+ /** Clean expired entries. Returns count removed. */
120
+ cleanup(): number;
121
+ /** Get detailed stats. */
122
+ getStats(): CacheStats;
123
+ /** Simple status for CLI. */
124
+ getStatus(): {
125
+ enabled: boolean;
126
+ entries: number;
127
+ sizeMb: number;
128
+ hitRate: string;
129
+ savedCostUsd: number;
130
+ };
131
+ setEnabled(enabled: boolean): void;
132
+ get enabled(): boolean;
133
+ close(): void;
134
+ private loadFromDisk;
135
+ private saveToDisk;
136
+ }
137
+ export declare function getResponseCache(config?: Partial<CacheConfig>): ResponseCache;
138
+ export declare function resetResponseCache(): void;
139
+ //# sourceMappingURL=response-cache.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"response-cache.d.ts","sourceRoot":"","sources":["../src/response-cache.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAUH,MAAM,WAAW,WAAW;IAC1B,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,oDAAoD;IACpD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,sDAAsD;IACtD,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,6CAA6C;IAC7C,aAAa,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACvC,6DAA6D;IAC7D,qBAAqB,CAAC,EAAE,OAAO,CAAC;IAChC,qDAAqD;IACrD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,oDAAoD;IACpD,IAAI,CAAC,EAAE,OAAO,GAAG,YAAY,CAAC;IAC9B,kEAAkE;IAClE,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAqCD,MAAM,WAAW,eAAe;IAC9B,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,UAAU;IACzB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,CAAC;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,YAAY,EAAE,MAAM,CAAC;IACrB,aAAa,EAAE,MAAM,CAAC;IACtB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IACjF,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;CACrF;AAoBD;;;;GAIG;AACH,wBAAgB,eAAe,CAAC,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,MAAM,CAa5E;AAQD;;;;GAIG;AACH,wBAAgB,yBAAyB,CAAC,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,MAAM,CA0BtF;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,OAAO,CAG7E;AAED;;;;;GAKG;AACH,wBAAgB,oBAAoB,CAAC,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,OAAO,CAwBnF;AAyFD,qBAAa,aAAa;IACxB,OAAO,CAAC,MAAM,CAAsB;IACpC,OAAO,CAAC,MAAM,CAAY;IAC1B,OAAO,CAAC,EAAE,CAAyB;IACnC,OAAO,CAAC,YAAY,CAAS;IAC7B,OAAO,CAAC,YAAY,CAAS;IAG7B,OAAO,CAAC,KAAK,CAAK;IAClB,OAAO,CAAC,OAAO,CAAK;IACpB,OAAO,CAAC,SAAS,CAAK;IACtB,OAAO,CAAC,aAAa,CAAK;gBAEd,MAAM,CAAC,EAAE,OAAO,CAAC,WAAW,CAAC;IAMzC,qEAAqE;IACrE,IAAI,IAAI,IAAI;IAmCZ,2DAA2D;IAC3D,YAAY,CAAC,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,OAAO;IAQ3D,yBAAyB;IACzB,IAAI,IAAI,IAAI,OAAO,GAAG,YAAY,CAA6B;IAE/D,8CAA8C;IAC9C,UAAU,CAAC,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,MAAM;IAOxD,6CAA6C;IAC7C,IAAI,gBAAgB,IAAI,MAAM,CAAyC;IAEvE,sEAAsE;IACtE,GAAG,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI;IAoChC,iCAAiC;IACjC,GAAG,CAAC,IAAI,EAAE,MAAM,EAAE,YAAY,EAAE,MAAM,EAAE,IAAI,EAAE,eAAe,GAAG,IAAI;IAwBpE,sCAAsC;IACtC,SAAS,CAAC,YAAY,EAAE,MAAM,EAAE,eAAe,EAAE,MAAM,GAAG,IAAI;IAK9D,uCAAuC;IACvC,UAAU,IAAI,IAAI;IAIlB,yCAAyC;IACzC,YAAY,IAAI,IAAI;IAIpB,6BAA6B;IAC7B,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI;IAMzB,gCAAgC;IAChC,KAAK,IAAI,IAAI;IAcb,oDAAoD;IACpD,OAAO,IAAI,MAAM;IAQjB,0BAA0B;IAC1B,QAAQ,IAAI,UAAU;IAkCtB,6BAA6B;IAC7B,SAAS,IAAI;QAAE,OAAO,EAAE,OAAO,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAA;KAAE;IAWzG,UAAU,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI;IAClC,IAAI,OAAO,IAAI,OAAO,CAAgC;IAEtD,KAAK,IAAI,IAAI;IAMb,OAAO,CAAC,YAAY;IAOpB,OAAO,CAAC,UAAU;CAMnB;AAMD,wBAAgB,gBAAgB,CAAC,MAAM,CAAC,EAAE,OAAO,CAAC,WAAW,CAAC,GAAG,aAAa,CAK7E;AAED,wBAAgB,kBAAkB,IAAI,IAAI,CAEzC"}
@@ -0,0 +1,515 @@
1
+ "use strict";
2
+ /**
3
+ * RelayPlane Response Cache — Phase 1: Exact Match
4
+ *
5
+ * Caches LLM API responses locally to avoid duplicate API calls.
6
+ * SHA-256 hash of canonical request → cached response.
7
+ *
8
+ * Features:
9
+ * - In-memory LRU + disk persistence (~/.relayplane/cache/)
10
+ * - SQLite index for metadata (hit counts, cost tracking, TTL)
11
+ * - Gzipped response bodies on disk
12
+ * - Configurable TTL with task-type overrides
13
+ * - Only caches deterministic requests (temperature=0)
14
+ * - Skips responses containing tool calls
15
+ *
16
+ * @packageDocumentation
17
+ */
18
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
19
+ if (k2 === undefined) k2 = k;
20
+ var desc = Object.getOwnPropertyDescriptor(m, k);
21
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
22
+ desc = { enumerable: true, get: function() { return m[k]; } };
23
+ }
24
+ Object.defineProperty(o, k2, desc);
25
+ }) : (function(o, m, k, k2) {
26
+ if (k2 === undefined) k2 = k;
27
+ o[k2] = m[k];
28
+ }));
29
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
30
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
31
+ }) : function(o, v) {
32
+ o["default"] = v;
33
+ });
34
+ var __importStar = (this && this.__importStar) || (function () {
35
+ var ownKeys = function(o) {
36
+ ownKeys = Object.getOwnPropertyNames || function (o) {
37
+ var ar = [];
38
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
39
+ return ar;
40
+ };
41
+ return ownKeys(o);
42
+ };
43
+ return function (mod) {
44
+ if (mod && mod.__esModule) return mod;
45
+ var result = {};
46
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
47
+ __setModuleDefault(result, mod);
48
+ return result;
49
+ };
50
+ })();
51
+ Object.defineProperty(exports, "__esModule", { value: true });
52
+ exports.ResponseCache = void 0;
53
+ exports.computeCacheKey = computeCacheKey;
54
+ exports.computeAggressiveCacheKey = computeAggressiveCacheKey;
55
+ exports.isDeterministic = isDeterministic;
56
+ exports.responseHasToolCalls = responseHasToolCalls;
57
+ exports.getResponseCache = getResponseCache;
58
+ exports.resetResponseCache = resetResponseCache;
59
+ const crypto = __importStar(require("node:crypto"));
60
+ const fs = __importStar(require("node:fs"));
61
+ const path = __importStar(require("node:path"));
62
+ const os = __importStar(require("node:os"));
63
+ const zlib = __importStar(require("node:zlib"));
64
+ const DEFAULTS = {
65
+ enabled: true,
66
+ maxSizeMb: 100,
67
+ defaultTtlSeconds: 3600,
68
+ ttlByTaskType: {},
69
+ onlyWhenDeterministic: true,
70
+ cacheDir: path.join(os.homedir(), '.relayplane', 'cache'),
71
+ mode: 'exact',
72
+ aggressiveMaxAge: 1800,
73
+ };
74
+ function resolveCache(cfg) {
75
+ return {
76
+ enabled: cfg?.enabled ?? DEFAULTS.enabled,
77
+ maxSizeMb: cfg?.maxSizeMb ?? DEFAULTS.maxSizeMb,
78
+ defaultTtlSeconds: cfg?.defaultTtlSeconds ?? DEFAULTS.defaultTtlSeconds,
79
+ ttlByTaskType: cfg?.ttlByTaskType ?? DEFAULTS.ttlByTaskType,
80
+ onlyWhenDeterministic: cfg?.onlyWhenDeterministic ?? DEFAULTS.onlyWhenDeterministic,
81
+ cacheDir: cfg?.cacheDir ?? DEFAULTS.cacheDir,
82
+ mode: cfg?.mode ?? DEFAULTS.mode,
83
+ aggressiveMaxAge: cfg?.aggressiveMaxAge ?? DEFAULTS.aggressiveMaxAge,
84
+ };
85
+ }
86
+ // ─── Cache Key Generation ────────────────────────────────────────────
87
+ /**
88
+ * Fields included in the cache key (sorted alphabetically for determinism).
89
+ */
90
+ const CACHE_KEY_FIELDS = [
91
+ 'max_tokens',
92
+ 'messages',
93
+ 'model',
94
+ 'stop_sequences',
95
+ 'system',
96
+ 'temperature',
97
+ 'tool_choice',
98
+ 'tools',
99
+ 'top_k',
100
+ 'top_p',
101
+ ];
102
+ /**
103
+ * Generate a SHA-256 cache key from a request body.
104
+ * Only includes fields that affect the response content.
105
+ * Excluded: stream, provider headers, API keys.
106
+ */
107
+ function computeCacheKey(requestBody) {
108
+ const canonical = {};
109
+ for (const field of CACHE_KEY_FIELDS) {
110
+ if (requestBody[field] !== undefined) {
111
+ canonical[field] = requestBody[field];
112
+ }
113
+ }
114
+ // Use stable JSON serialization (sorted top-level keys, full depth)
115
+ const sortedKeys = Object.keys(canonical).sort();
116
+ const ordered = {};
117
+ for (const k of sortedKeys)
118
+ ordered[k] = canonical[k];
119
+ const json = JSON.stringify(ordered);
120
+ return crypto.createHash('sha256').update(json).digest('hex');
121
+ }
122
+ /**
123
+ * Fields included in the aggressive cache key.
124
+ * Ignores conversation history — only uses system prompt + last user message + model + tools.
125
+ */
126
+ const AGGRESSIVE_KEY_FIELDS = ['model', 'system', 'tools'];
127
+ /**
128
+ * Generate an aggressive cache key from a request body.
129
+ * Uses: system prompt + last user message + model + tools.
130
+ * Ignores: full conversation history, temperature, max_tokens, etc.
131
+ */
132
+ function computeAggressiveCacheKey(requestBody) {
133
+ const canonical = {};
134
+ for (const field of AGGRESSIVE_KEY_FIELDS) {
135
+ if (requestBody[field] !== undefined) {
136
+ canonical[field] = requestBody[field];
137
+ }
138
+ }
139
+ // Extract last user message only
140
+ const messages = requestBody['messages'];
141
+ if (messages && messages.length > 0) {
142
+ // Find last user message
143
+ for (let i = messages.length - 1; i >= 0; i--) {
144
+ if (messages[i]['role'] === 'user') {
145
+ canonical['last_user_message'] = messages[i]['content'];
146
+ break;
147
+ }
148
+ }
149
+ }
150
+ const sortedKeys = Object.keys(canonical).sort();
151
+ const ordered = {};
152
+ for (const k of sortedKeys)
153
+ ordered[k] = canonical[k];
154
+ const json = JSON.stringify(ordered);
155
+ return crypto.createHash('sha256').update(json).digest('hex');
156
+ }
157
+ /**
158
+ * Check if a request is deterministic (temperature=0 or unset).
159
+ */
160
+ function isDeterministic(requestBody) {
161
+ const temp = requestBody['temperature'];
162
+ return temp === undefined || temp === null || temp === 0;
163
+ }
164
+ /**
165
+ * Check if a response contains tool calls.
166
+ * Note: We still cache tool call responses — agent workloads are almost
167
+ * entirely tool calls, and identical requests should return cached results.
168
+ * The caller decides whether to use this check.
169
+ */
170
+ function responseHasToolCalls(responseBody) {
171
+ // OpenAI format: choices[].message.tool_calls
172
+ const choices = responseBody['choices'];
173
+ if (choices) {
174
+ for (const choice of choices) {
175
+ const message = choice['message'];
176
+ if (message?.['tool_calls'] && Array.isArray(message['tool_calls']) && message['tool_calls'].length > 0) {
177
+ return true;
178
+ }
179
+ }
180
+ }
181
+ // Anthropic format: content[].type === 'tool_use'
182
+ const content = responseBody['content'];
183
+ if (content) {
184
+ for (const block of content) {
185
+ if (block['type'] === 'tool_use')
186
+ return true;
187
+ }
188
+ }
189
+ // Anthropic stop_reason
190
+ if (responseBody['stop_reason'] === 'tool_use')
191
+ return true;
192
+ return false;
193
+ }
194
+ class MemoryLRU {
195
+ entries = new Map();
196
+ currentSizeBytes = 0;
197
+ maxSizeBytes;
198
+ constructor(maxSizeMb) {
199
+ this.maxSizeBytes = maxSizeMb * 1024 * 1024;
200
+ }
201
+ get(hash) {
202
+ const entry = this.entries.get(hash);
203
+ if (!entry)
204
+ return null;
205
+ // Move to end (most recently used)
206
+ this.entries.delete(hash);
207
+ this.entries.set(hash, entry);
208
+ return entry.response;
209
+ }
210
+ set(hash, response) {
211
+ const sizeBytes = Buffer.byteLength(response, 'utf-8');
212
+ const existing = this.entries.get(hash);
213
+ if (existing) {
214
+ this.currentSizeBytes -= existing.sizeBytes;
215
+ this.entries.delete(hash);
216
+ }
217
+ // Evict LRU until we have space
218
+ while (this.currentSizeBytes + sizeBytes > this.maxSizeBytes && this.entries.size > 0) {
219
+ const oldest = this.entries.keys().next().value;
220
+ if (oldest === undefined)
221
+ break;
222
+ const entry = this.entries.get(oldest);
223
+ if (entry) {
224
+ this.currentSizeBytes -= entry.sizeBytes;
225
+ this.entries.delete(oldest);
226
+ }
227
+ }
228
+ if (sizeBytes > this.maxSizeBytes)
229
+ return; // too big
230
+ this.entries.set(hash, { response, sizeBytes });
231
+ this.currentSizeBytes += sizeBytes;
232
+ }
233
+ delete(hash) {
234
+ const entry = this.entries.get(hash);
235
+ if (entry) {
236
+ this.currentSizeBytes -= entry.sizeBytes;
237
+ this.entries.delete(hash);
238
+ }
239
+ }
240
+ clear() {
241
+ this.entries.clear();
242
+ this.currentSizeBytes = 0;
243
+ }
244
+ get size() { return this.entries.size; }
245
+ get sizeBytes() { return this.currentSizeBytes; }
246
+ }
247
+ function openDatabase(dbPath) {
248
+ // eslint-disable-next-line @typescript-eslint/no-var-requires
249
+ const Database = require('better-sqlite3');
250
+ const db = new Database(dbPath);
251
+ db.pragma('journal_mode = WAL');
252
+ db.pragma('synchronous = NORMAL');
253
+ return db;
254
+ }
255
+ // ─── ResponseCache ──────────────────────────────────────────────────
256
+ class ResponseCache {
257
+ config;
258
+ memory;
259
+ db = null;
260
+ responsesDir;
261
+ _initialized = false;
262
+ // Runtime counters
263
+ _hits = 0;
264
+ _misses = 0;
265
+ _bypasses = 0;
266
+ _savedCostUsd = 0;
267
+ constructor(config) {
268
+ this.config = resolveCache(config);
269
+ this.memory = new MemoryLRU(this.config.maxSizeMb);
270
+ this.responsesDir = path.join(this.config.cacheDir, 'responses');
271
+ }
272
+ /** Initialize disk storage + SQLite. Safe to call multiple times. */
273
+ init() {
274
+ if (this._initialized)
275
+ return;
276
+ if (!this.config.enabled)
277
+ return;
278
+ this._initialized = true;
279
+ fs.mkdirSync(this.config.cacheDir, { recursive: true });
280
+ fs.mkdirSync(this.responsesDir, { recursive: true });
281
+ try {
282
+ const dbPath = path.join(this.config.cacheDir, 'index.db');
283
+ this.db = openDatabase(dbPath);
284
+ this.db.exec(`
285
+ CREATE TABLE IF NOT EXISTS cache_entries (
286
+ hash TEXT PRIMARY KEY,
287
+ model TEXT NOT NULL,
288
+ task_type TEXT NOT NULL DEFAULT 'general',
289
+ tokens_in INTEGER NOT NULL DEFAULT 0,
290
+ tokens_out INTEGER NOT NULL DEFAULT 0,
291
+ cost_usd REAL NOT NULL DEFAULT 0,
292
+ created_at INTEGER NOT NULL,
293
+ expires_at INTEGER NOT NULL,
294
+ hit_count INTEGER NOT NULL DEFAULT 0,
295
+ response_size_bytes INTEGER NOT NULL DEFAULT 0
296
+ );
297
+ CREATE INDEX IF NOT EXISTS idx_expires_at ON cache_entries(expires_at);
298
+ CREATE INDEX IF NOT EXISTS idx_model ON cache_entries(model);
299
+ `);
300
+ // Clean expired on startup
301
+ this.db.prepare('DELETE FROM cache_entries WHERE expires_at <= ?').run(Date.now());
302
+ }
303
+ catch (err) {
304
+ console.warn('[RelayPlane Cache] SQLite unavailable, memory-only mode:', err.message);
305
+ this.db = null;
306
+ }
307
+ }
308
+ /** Returns true if the request should bypass the cache. */
309
+ shouldBypass(requestBody) {
310
+ if (!this.config.enabled)
311
+ return true;
312
+ // In aggressive mode, bypass only if disabled
313
+ if (this.config.mode === 'aggressive')
314
+ return false;
315
+ if (this.config.onlyWhenDeterministic && !isDeterministic(requestBody))
316
+ return true;
317
+ return false;
318
+ }
319
+ /** Get the cache mode */
320
+ get mode() { return this.config.mode; }
321
+ /** Compute cache key based on current mode */
322
+ computeKey(requestBody) {
323
+ if (this.config.mode === 'aggressive') {
324
+ return computeAggressiveCacheKey(requestBody);
325
+ }
326
+ return computeCacheKey(requestBody);
327
+ }
328
+ /** Get aggressive mode max age in seconds */
329
+ get aggressiveMaxAge() { return this.config.aggressiveMaxAge; }
330
+ /** Look up a cached response. Returns the response string or null. */
331
+ get(hash) {
332
+ // Memory first
333
+ const memHit = this.memory.get(hash);
334
+ if (memHit !== null) {
335
+ // Check expiry via DB if available
336
+ if (this.db) {
337
+ const row = this.db.prepare('SELECT expires_at FROM cache_entries WHERE hash = ?').get(hash);
338
+ if (!row || row.expires_at <= Date.now()) {
339
+ this.evict(hash);
340
+ return null;
341
+ }
342
+ this.db.prepare('UPDATE cache_entries SET hit_count = hit_count + 1 WHERE hash = ?').run(hash);
343
+ }
344
+ return memHit;
345
+ }
346
+ // Disk fallback
347
+ if (this.db) {
348
+ const row = this.db.prepare('SELECT hash FROM cache_entries WHERE hash = ? AND expires_at > ?').get(hash, Date.now());
349
+ if (row) {
350
+ const diskResponse = this.loadFromDisk(hash);
351
+ if (diskResponse) {
352
+ this.memory.set(hash, diskResponse);
353
+ this.db.prepare('UPDATE cache_entries SET hit_count = hit_count + 1 WHERE hash = ?').run(hash);
354
+ return diskResponse;
355
+ }
356
+ // Broken disk entry
357
+ this.db.prepare('DELETE FROM cache_entries WHERE hash = ?').run(hash);
358
+ }
359
+ }
360
+ return null;
361
+ }
362
+ /** Store a response in cache. */
363
+ set(hash, responseJson, opts) {
364
+ if (!this.config.enabled)
365
+ return;
366
+ const taskType = opts.taskType || 'general';
367
+ const ttlSec = this.config.ttlByTaskType[taskType] ?? this.config.defaultTtlSeconds;
368
+ const now = Date.now();
369
+ const sizeBytes = Buffer.byteLength(responseJson, 'utf-8');
370
+ // Memory
371
+ this.memory.set(hash, responseJson);
372
+ // Disk
373
+ this.saveToDisk(hash, responseJson);
374
+ // SQLite
375
+ if (this.db) {
376
+ this.db.prepare(`
377
+ INSERT OR REPLACE INTO cache_entries
378
+ (hash, model, task_type, tokens_in, tokens_out, cost_usd, created_at, expires_at, hit_count, response_size_bytes)
379
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, 0, ?)
380
+ `).run(hash, opts.model, taskType, opts.tokensIn, opts.tokensOut, opts.costUsd, now, now + ttlSec * 1000, sizeBytes);
381
+ }
382
+ }
383
+ /** Record a cache hit (for stats). */
384
+ recordHit(savedCostUsd, _savedLatencyMs) {
385
+ this._hits++;
386
+ this._savedCostUsd += savedCostUsd;
387
+ }
388
+ /** Record a cache miss (for stats). */
389
+ recordMiss() {
390
+ this._misses++;
391
+ }
392
+ /** Record a cache bypass (for stats). */
393
+ recordBypass() {
394
+ this._bypasses++;
395
+ }
396
+ /** Remove a single entry. */
397
+ evict(hash) {
398
+ this.memory.delete(hash);
399
+ if (this.db)
400
+ this.db.prepare('DELETE FROM cache_entries WHERE hash = ?').run(hash);
401
+ try {
402
+ fs.unlinkSync(path.join(this.responsesDir, `${hash}.gz`));
403
+ }
404
+ catch { /* ok */ }
405
+ }
406
+ /** Clear all cached entries. */
407
+ clear() {
408
+ this.memory.clear();
409
+ if (this.db)
410
+ this.db.exec('DELETE FROM cache_entries');
411
+ try {
412
+ for (const f of fs.readdirSync(this.responsesDir)) {
413
+ try {
414
+ fs.unlinkSync(path.join(this.responsesDir, f));
415
+ }
416
+ catch { /* ok */ }
417
+ }
418
+ }
419
+ catch { /* ok */ }
420
+ this._hits = 0;
421
+ this._misses = 0;
422
+ this._bypasses = 0;
423
+ this._savedCostUsd = 0;
424
+ }
425
+ /** Clean expired entries. Returns count removed. */
426
+ cleanup() {
427
+ if (!this.db)
428
+ return 0;
429
+ const now = Date.now();
430
+ const expired = this.db.prepare('SELECT hash FROM cache_entries WHERE expires_at <= ?').all(now);
431
+ for (const { hash } of expired)
432
+ this.evict(hash);
433
+ return expired.length;
434
+ }
435
+ /** Get detailed stats. */
436
+ getStats() {
437
+ const total = this._hits + this._misses;
438
+ const result = {
439
+ totalEntries: this.memory.size,
440
+ totalSizeBytes: this.memory.sizeBytes,
441
+ hits: this._hits,
442
+ misses: this._misses,
443
+ bypasses: this._bypasses,
444
+ hitRate: total > 0 ? this._hits / total : 0,
445
+ savedCostUsd: this._savedCostUsd,
446
+ savedRequests: this._hits,
447
+ byModel: {},
448
+ byTaskType: {},
449
+ };
450
+ if (this.db) {
451
+ const countRow = this.db.prepare('SELECT COUNT(*) as c, COALESCE(SUM(response_size_bytes),0) as s FROM cache_entries').get();
452
+ result.totalEntries = countRow.c;
453
+ result.totalSizeBytes = countRow.s;
454
+ const modelRows = this.db.prepare('SELECT model, SUM(hit_count) as h, COUNT(*) as e, SUM(cost_usd * hit_count) as sv FROM cache_entries GROUP BY model').all();
455
+ for (const r of modelRows)
456
+ result.byModel[r.model] = { hits: r.h, entries: r.e, savedCostUsd: r.sv };
457
+ const taskRows = this.db.prepare('SELECT task_type, SUM(hit_count) as h, COUNT(*) as e, SUM(cost_usd * hit_count) as sv FROM cache_entries GROUP BY task_type').all();
458
+ for (const r of taskRows)
459
+ result.byTaskType[r.task_type] = { hits: r.h, entries: r.e, savedCostUsd: r.sv };
460
+ }
461
+ return result;
462
+ }
463
+ /** Simple status for CLI. */
464
+ getStatus() {
465
+ const s = this.getStats();
466
+ return {
467
+ enabled: this.config.enabled,
468
+ entries: s.totalEntries,
469
+ sizeMb: parseFloat((s.totalSizeBytes / (1024 * 1024)).toFixed(2)),
470
+ hitRate: `${(s.hitRate * 100).toFixed(1)}%`,
471
+ savedCostUsd: parseFloat(s.savedCostUsd.toFixed(4)),
472
+ };
473
+ }
474
+ setEnabled(enabled) { this.config.enabled = enabled; }
475
+ get enabled() { return this.config.enabled; }
476
+ close() {
477
+ if (this.db) {
478
+ this.db.close();
479
+ this.db = null;
480
+ }
481
+ }
482
+ // ─── Private ──────────────────────────────────────────────────────
483
+ loadFromDisk(hash) {
484
+ try {
485
+ const compressed = fs.readFileSync(path.join(this.responsesDir, `${hash}.gz`));
486
+ return zlib.gunzipSync(compressed).toString('utf-8');
487
+ }
488
+ catch {
489
+ return null;
490
+ }
491
+ }
492
+ saveToDisk(hash, response) {
493
+ try {
494
+ const compressed = zlib.gzipSync(Buffer.from(response, 'utf-8'));
495
+ fs.writeFileSync(path.join(this.responsesDir, `${hash}.gz`), compressed);
496
+ }
497
+ catch { /* non-fatal */ }
498
+ }
499
+ }
500
+ exports.ResponseCache = ResponseCache;
501
+ // ─── Singleton ──────────────────────────────────────────────────────
502
+ let _instance = null;
503
+ function getResponseCache(config) {
504
+ if (!_instance) {
505
+ _instance = new ResponseCache(config);
506
+ }
507
+ return _instance;
508
+ }
509
+ function resetResponseCache() {
510
+ if (_instance) {
511
+ _instance.close();
512
+ _instance = null;
513
+ }
514
+ }
515
+ //# sourceMappingURL=response-cache.js.map