responses-proxy 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. package/README.md +56 -0
  2. package/cli.js +118 -0
  3. package/dist/anthropic-messages.js +383 -0
  4. package/dist/anthropic-messages.test.js +209 -0
  5. package/dist/audit-log.js +138 -0
  6. package/dist/audit-log.test.js +480 -0
  7. package/dist/billing-expiration.js +70 -0
  8. package/dist/billing-expiration.test.js +114 -0
  9. package/dist/billing.js +716 -0
  10. package/dist/billing.test.js +228 -0
  11. package/dist/chatgpt-oauth-store.js +240 -0
  12. package/dist/chatgpt-oauth-store.test.js +88 -0
  13. package/dist/chatgpt-oauth.js +118 -0
  14. package/dist/chatgpt-oauth.test.js +63 -0
  15. package/dist/chatgpt-provider-auth.js +60 -0
  16. package/dist/chatgpt-provider-auth.test.js +101 -0
  17. package/dist/client/app-icon.svg +17 -0
  18. package/dist/client/assets/index-C7Vvhst8.js +14 -0
  19. package/dist/client/assets/index-DpqgYK3L.css +1 -0
  20. package/dist/client/favicon.svg +17 -0
  21. package/dist/client/index.html +31 -0
  22. package/dist/client-config-apply.js +345 -0
  23. package/dist/client-config-apply.test.js +185 -0
  24. package/dist/client-token-limits.js +111 -0
  25. package/dist/client-token-limits.test.js +129 -0
  26. package/dist/codex-config.js +47 -0
  27. package/dist/codex-setup.js +87 -0
  28. package/dist/codex-setup.test.js +30 -0
  29. package/dist/config.js +314 -0
  30. package/dist/cost-analytics.js +31 -0
  31. package/dist/cost-analytics.test.js +38 -0
  32. package/dist/customer-key-access.js +126 -0
  33. package/dist/customer-key-access.test.js +178 -0
  34. package/dist/customer-keys.js +209 -0
  35. package/dist/customer-keys.test.js +68 -0
  36. package/dist/customer-usage.js +18 -0
  37. package/dist/customer-usage.test.js +55 -0
  38. package/dist/dashboard-auth.js +318 -0
  39. package/dist/dashboard-auth.test.js +133 -0
  40. package/dist/dashboard-serving.test.js +235 -0
  41. package/dist/error-response.js +174 -0
  42. package/dist/error-response.test.js +88 -0
  43. package/dist/forward.js +357 -0
  44. package/dist/health-websocket-manager.js +174 -0
  45. package/dist/http-rate-limit.js +36 -0
  46. package/dist/http-rate-limit.test.js +62 -0
  47. package/dist/kiro-auth.js +136 -0
  48. package/dist/kiro-auth.test.js +234 -0
  49. package/dist/kiro-codewhisperer.js +646 -0
  50. package/dist/kiro-codewhisperer.test.js +219 -0
  51. package/dist/kiro-device-login.js +338 -0
  52. package/dist/kiro-eventstream.js +219 -0
  53. package/dist/kiro-eventstream.test.js +79 -0
  54. package/dist/kiro-forward.js +401 -0
  55. package/dist/kiro-import-cli.js +69 -0
  56. package/dist/kiro-import.js +94 -0
  57. package/dist/kiro-import.test.js +125 -0
  58. package/dist/kiro-token-store.js +196 -0
  59. package/dist/kiro-token-store.test.js +207 -0
  60. package/dist/krouter-usage.js +243 -0
  61. package/dist/model-combo-repository.js +147 -0
  62. package/dist/model-routing.js +69 -0
  63. package/dist/model-routing.test.js +41 -0
  64. package/dist/normalize-request.js +531 -0
  65. package/dist/normalize-request.test.js +277 -0
  66. package/dist/omv-public-firewall.test.js +11 -0
  67. package/dist/package.json +17 -0
  68. package/dist/prompt-cache-state.js +146 -0
  69. package/dist/prompt-cache-state.test.js +71 -0
  70. package/dist/prompt-cache.js +229 -0
  71. package/dist/provider-health-service.js +404 -0
  72. package/dist/provider-request-parameters.js +107 -0
  73. package/dist/provider-request-parameters.test.js +26 -0
  74. package/dist/provider-routing.js +114 -0
  75. package/dist/provider-routing.test.js +64 -0
  76. package/dist/provider-usage.js +314 -0
  77. package/dist/request-timeout-policy.js +61 -0
  78. package/dist/request-timeout-policy.test.js +40 -0
  79. package/dist/response-cache.js +69 -0
  80. package/dist/response-cache.test.js +28 -0
  81. package/dist/routing-combo-repository.js +300 -0
  82. package/dist/routing-engine.js +377 -0
  83. package/dist/routing-integration.js +155 -0
  84. package/dist/routing-simulation-engine.js +326 -0
  85. package/dist/rtk-layer.js +483 -0
  86. package/dist/rtk-layer.test.js +198 -0
  87. package/dist/runtime-provider-repository.js +1742 -0
  88. package/dist/runtime-provider-repository.test.js +1177 -0
  89. package/dist/schema.js +118 -0
  90. package/dist/schema.test.js +16 -0
  91. package/dist/sepay-webhook.js +87 -0
  92. package/dist/sepay-webhook.test.js +142 -0
  93. package/dist/server-body-limit.test.js +35 -0
  94. package/dist/server-client-token-limits.test.js +161 -0
  95. package/dist/server-codex-config-setup.test.js +76 -0
  96. package/dist/server-http-rate-limit.test.js +80 -0
  97. package/dist/server-response-cache.test.js +105 -0
  98. package/dist/server-routes-alias.test.js +39 -0
  99. package/dist/server-sepay-webhook-security.test.js +59 -0
  100. package/dist/server.js +5906 -0
  101. package/dist/session-log.js +178 -0
  102. package/dist/tailnet-funnel-script.test.js +33 -0
  103. package/dist/telegram-bot/actions.js +118 -0
  104. package/dist/telegram-bot/admin-actions.js +103 -0
  105. package/dist/telegram-bot/auth.js +46 -0
  106. package/dist/telegram-bot/auth.test.js +1 -0
  107. package/dist/telegram-bot/bot-identity-repository.js +189 -0
  108. package/dist/telegram-bot/bot-identity-repository.test.js +78 -0
  109. package/dist/telegram-bot/callbacks.js +30 -0
  110. package/dist/telegram-bot/codex-config-delivery.js +38 -0
  111. package/dist/telegram-bot/codex-config-delivery.test.js +75 -0
  112. package/dist/telegram-bot/commands/accounts.js +140 -0
  113. package/dist/telegram-bot/commands/apikey.js +737 -0
  114. package/dist/telegram-bot/commands/apply.js +265 -0
  115. package/dist/telegram-bot/commands/clients.js +13 -0
  116. package/dist/telegram-bot/commands/customer-billing.test.js +271 -0
  117. package/dist/telegram-bot/commands/grant.js +138 -0
  118. package/dist/telegram-bot/commands/grant.test.js +217 -0
  119. package/dist/telegram-bot/commands/help.js +52 -0
  120. package/dist/telegram-bot/commands/me.js +53 -0
  121. package/dist/telegram-bot/commands/models.js +6 -0
  122. package/dist/telegram-bot/commands/oauth.js +64 -0
  123. package/dist/telegram-bot/commands/plans.js +96 -0
  124. package/dist/telegram-bot/commands/providers.js +27 -0
  125. package/dist/telegram-bot/commands/quota.js +10 -0
  126. package/dist/telegram-bot/commands/renew-user.js +139 -0
  127. package/dist/telegram-bot/commands/renew-user.test.js +184 -0
  128. package/dist/telegram-bot/commands/renew.js +1369 -0
  129. package/dist/telegram-bot/commands/renew.test.js +1633 -0
  130. package/dist/telegram-bot/commands/start.js +212 -0
  131. package/dist/telegram-bot/commands/start.test.js +280 -0
  132. package/dist/telegram-bot/commands/status.js +6 -0
  133. package/dist/telegram-bot/commands/tailscale.js +15 -0
  134. package/dist/telegram-bot/commands/tailscale.test.js +76 -0
  135. package/dist/telegram-bot/commands/test.js +51 -0
  136. package/dist/telegram-bot/commands/test.test.js +14 -0
  137. package/dist/telegram-bot/commands/usage.js +10 -0
  138. package/dist/telegram-bot/config.js +98 -0
  139. package/dist/telegram-bot/config.test.js +42 -0
  140. package/dist/telegram-bot/customer-actions.js +160 -0
  141. package/dist/telegram-bot/customer-api-keys.js +68 -0
  142. package/dist/telegram-bot/customer-billing.js +72 -0
  143. package/dist/telegram-bot/customer-workspace-repository.js +134 -0
  144. package/dist/telegram-bot/customer-workspace-repository.test.js +47 -0
  145. package/dist/telegram-bot/dashboard-login.js +39 -0
  146. package/dist/telegram-bot/format.js +140 -0
  147. package/dist/telegram-bot/grants.js +370 -0
  148. package/dist/telegram-bot/grants.test.js +290 -0
  149. package/dist/telegram-bot/index.js +85 -0
  150. package/dist/telegram-bot/message-cleanup.js +55 -0
  151. package/dist/telegram-bot/message-cleanup.test.js +77 -0
  152. package/dist/telegram-bot/message-format.js +45 -0
  153. package/dist/telegram-bot/message-format.test.js +10 -0
  154. package/dist/telegram-bot/proxy-client.js +174 -0
  155. package/dist/telegram-bot/rate-limit.js +95 -0
  156. package/dist/telegram-bot/rate-limit.test.js +58 -0
  157. package/dist/telegram-bot/sessions.js +171 -0
  158. package/dist/telegram-bot/sessions.test.js +107 -0
  159. package/dist/telegram-bot/telegram-adapter.js +126 -0
  160. package/dist/telegram-bot/worker.js +63 -0
  161. package/package.json +39 -0
@@ -0,0 +1,277 @@
1
+ import test from "node:test";
2
+ import assert from "node:assert/strict";
3
+ import { normalizeResponsesRequestWithCache } from "./normalize-request.js";
4
+ test("equivalent requests reuse the same static key despite tool order and volatile metadata", () => {
5
+ const first = normalizeResponsesRequestWithCache({
6
+ model: "cx/gpt-5.4-xhigh",
7
+ input: [
8
+ { role: "assistant", content: "Existing context" },
9
+ { role: "user", content: "Latest user turn" },
10
+ ],
11
+ instructions: "You are a helpful agent.\n\nUse tools when needed.\n",
12
+ tools: [
13
+ {
14
+ type: "function",
15
+ name: "b_tool",
16
+ parameters: { type: "object", properties: { q: { type: "string" } } },
17
+ },
18
+ {
19
+ type: "function",
20
+ name: "a_tool",
21
+ parameters: { properties: { id: { type: "string" } }, type: "object" },
22
+ },
23
+ ],
24
+ metadata: {
25
+ request_id: "req-1",
26
+ trace_id: "trace-1",
27
+ tenant: "alpha",
28
+ },
29
+ }, {
30
+ promptCacheRedesignEnabled: true,
31
+ });
32
+ const second = normalizeResponsesRequestWithCache({
33
+ model: "cx/gpt-5.4-xhigh",
34
+ input: [
35
+ { role: "assistant", content: "Existing context" },
36
+ { role: "user", content: "Latest user turn" },
37
+ ],
38
+ instructions: "You are a helpful agent.\nUse tools when needed.",
39
+ tools: [
40
+ {
41
+ type: "function",
42
+ name: "a_tool",
43
+ parameters: { type: "object", properties: { id: { type: "string" } } },
44
+ },
45
+ {
46
+ type: "function",
47
+ name: "b_tool",
48
+ parameters: { properties: { q: { type: "string" } }, type: "object" },
49
+ },
50
+ ],
51
+ metadata: {
52
+ tenant: "alpha",
53
+ request_id: "req-2",
54
+ trace_id: "trace-2",
55
+ },
56
+ }, {
57
+ promptCacheRedesignEnabled: true,
58
+ });
59
+ assert.equal(first.cacheLayout.familyId, second.cacheLayout.familyId);
60
+ assert.equal(first.cacheLayout.staticKey, second.cacheLayout.staticKey);
61
+ });
62
+ test("changing only the latest user turn changes request key but preserves static key", () => {
63
+ const baseRequest = {
64
+ model: "cx/gpt-5.4-xhigh",
65
+ input: [
66
+ { role: "assistant", content: "Earlier answer" },
67
+ { role: "user", content: "What is the plan?" },
68
+ ],
69
+ instructions: "You are a helpful agent.",
70
+ };
71
+ const first = normalizeResponsesRequestWithCache(baseRequest, {
72
+ promptCacheRedesignEnabled: true,
73
+ });
74
+ const second = normalizeResponsesRequestWithCache({
75
+ ...baseRequest,
76
+ input: [
77
+ { role: "assistant", content: "Earlier answer" },
78
+ { role: "user", content: "What is the revised plan?" },
79
+ ],
80
+ }, {
81
+ promptCacheRedesignEnabled: true,
82
+ });
83
+ assert.equal(first.cacheLayout.staticKey, second.cacheLayout.staticKey);
84
+ assert.notEqual(first.cacheLayout.requestKey, second.cacheLayout.requestKey);
85
+ });
86
+ test("changing previous response id does not fragment the static key", () => {
87
+ const baseRequest = {
88
+ model: "cx/gpt-5.4-xhigh",
89
+ input: [
90
+ { role: "assistant", content: "Earlier answer" },
91
+ { role: "user", content: "Continue" },
92
+ ],
93
+ instructions: "You are a helpful agent.",
94
+ };
95
+ const first = normalizeResponsesRequestWithCache({
96
+ ...baseRequest,
97
+ previous_response_id: "resp-1",
98
+ }, {
99
+ promptCacheRedesignEnabled: true,
100
+ });
101
+ const second = normalizeResponsesRequestWithCache({
102
+ ...baseRequest,
103
+ previous_response_id: "resp-2",
104
+ }, {
105
+ promptCacheRedesignEnabled: true,
106
+ });
107
+ assert.equal(first.cacheLayout.staticKey, second.cacheLayout.staticKey);
108
+ });
109
+ test("long chat history does not fragment static key when only transcript grows", () => {
110
+ const first = normalizeResponsesRequestWithCache({
111
+ model: "cx/gpt-5.4-xhigh",
112
+ instructions: "You are a helpful agent.",
113
+ input: [
114
+ { role: "user", content: "Turn 1" },
115
+ { role: "assistant", content: "Reply 1" },
116
+ { role: "user", content: "Turn 2" },
117
+ { role: "assistant", content: "Reply 2" },
118
+ { role: "user", content: "Latest turn A" },
119
+ ],
120
+ }, {
121
+ promptCacheRedesignEnabled: true,
122
+ promptCacheSummaryKeepRecentItems: 2,
123
+ });
124
+ const second = normalizeResponsesRequestWithCache({
125
+ model: "cx/gpt-5.4-xhigh",
126
+ instructions: "You are a helpful agent.",
127
+ input: [
128
+ { role: "user", content: "Turn 1" },
129
+ { role: "assistant", content: "Reply 1" },
130
+ { role: "user", content: "Turn 2" },
131
+ { role: "assistant", content: "Reply 2" },
132
+ { role: "user", content: "Latest turn B" },
133
+ ],
134
+ }, {
135
+ promptCacheRedesignEnabled: true,
136
+ promptCacheSummaryKeepRecentItems: 2,
137
+ });
138
+ assert.equal(first.cacheLayout.staticKey, second.cacheLayout.staticKey);
139
+ assert.notEqual(first.cacheLayout.requestKey, second.cacheLayout.requestKey);
140
+ });
141
+ test("preserves messages payload when provider capability flag is enabled", () => {
142
+ const normalized = normalizeResponsesRequestWithCache({
143
+ model: "cx/gpt-5.4-xhigh",
144
+ messages: [
145
+ { role: "system", content: "You are Codex." },
146
+ { role: "user", content: "Audit RTK flow" },
147
+ ],
148
+ }, {
149
+ preserveMessagesPayload: true,
150
+ });
151
+ assert.deepEqual(normalized.request.input, [
152
+ { role: "system", content: "You are Codex." },
153
+ { role: "user", content: "Audit RTK flow" },
154
+ ]);
155
+ assert.equal(normalized.request.instructions, undefined);
156
+ });
157
+ test("family retention rules apply by family prefix", () => {
158
+ const normalized = normalizeResponsesRequestWithCache({
159
+ model: "cx/gpt-5.4-xhigh",
160
+ input: [{ role: "user", content: "Hello" }],
161
+ }, {
162
+ promptCacheRedesignEnabled: true,
163
+ defaultPromptCacheRetention: "24h",
164
+ promptCacheRetentionByFamilyEnabled: true,
165
+ promptCacheRetentionByFamilyRules: [
166
+ {
167
+ prefix: "family:cx-gpt-5-4-xhigh",
168
+ retention: "72h",
169
+ },
170
+ ],
171
+ });
172
+ assert.equal(normalized.request.prompt_cache_retention, "72h");
173
+ });
174
+ test("static key retention rules take precedence over family retention rules", () => {
175
+ const base = normalizeResponsesRequestWithCache({
176
+ model: "cx/gpt-5.4-xhigh",
177
+ input: [
178
+ { role: "assistant", content: "Stable context" },
179
+ { role: "user", content: "Hello" },
180
+ ],
181
+ }, {
182
+ promptCacheRedesignEnabled: true,
183
+ });
184
+ const normalized = normalizeResponsesRequestWithCache({
185
+ model: "cx/gpt-5.4-xhigh",
186
+ input: [
187
+ { role: "assistant", content: "Stable context" },
188
+ { role: "user", content: "Different latest turn" },
189
+ ],
190
+ }, {
191
+ promptCacheRedesignEnabled: true,
192
+ defaultPromptCacheRetention: "24h",
193
+ promptCacheRetentionByFamilyEnabled: true,
194
+ promptCacheRetentionByFamilyRules: [
195
+ {
196
+ prefix: base.cacheLayout.familyId,
197
+ retention: "48h",
198
+ },
199
+ ],
200
+ promptCacheRetentionByStaticKeyEnabled: true,
201
+ promptCacheRetentionByStaticKeyRules: [
202
+ {
203
+ prefix: base.cacheLayout.staticKey,
204
+ retention: "96h",
205
+ },
206
+ ],
207
+ });
208
+ assert.equal(normalized.cacheLayout.staticKey, base.cacheLayout.staticKey);
209
+ assert.equal(normalized.request.prompt_cache_retention, "96h");
210
+ });
211
+ test("strip policy removes max_output_tokens including injected defaults", () => {
212
+ const normalized = normalizeResponsesRequestWithCache({
213
+ model: "cx/gpt-5.4-xhigh",
214
+ input: [{ role: "user", content: "Hello" }],
215
+ }, {
216
+ openClawTokenOptimizationEnabled: true,
217
+ defaultMaxOutputTokens: 1200,
218
+ maxOutputTokensPolicy: {
219
+ mode: "strip",
220
+ },
221
+ });
222
+ assert.equal("max_output_tokens" in normalized.request, false);
223
+ });
224
+ test("direct input with legacy tool role is normalized before forwarding", () => {
225
+ const normalized = normalizeResponsesRequestWithCache({
226
+ model: "cx/gpt-5.4-xhigh",
227
+ input: [
228
+ {
229
+ role: "assistant",
230
+ content: "Calling tool",
231
+ },
232
+ {
233
+ role: "tool",
234
+ tool_call_id: "call_123",
235
+ content: "tool result payload",
236
+ },
237
+ ],
238
+ });
239
+ assert.deepEqual(normalized.request.input, [
240
+ {
241
+ role: "assistant",
242
+ content: "Calling tool",
243
+ },
244
+ {
245
+ type: "function_call_output",
246
+ call_id: "call_123",
247
+ output: "tool result payload",
248
+ },
249
+ ]);
250
+ });
251
+ test("legacy tool role without tool_call_id falls back to assistant content", () => {
252
+ const normalized = normalizeResponsesRequestWithCache({
253
+ model: "cx/gpt-5.4-xhigh",
254
+ input: [
255
+ {
256
+ role: "tool",
257
+ content: "orphaned tool output",
258
+ },
259
+ ],
260
+ });
261
+ assert.deepEqual(normalized.request.input, [
262
+ {
263
+ role: "assistant",
264
+ content: "orphaned tool output",
265
+ },
266
+ ]);
267
+ });
268
+ test("ignores nullable reasoning and text fields", () => {
269
+ const normalized = normalizeResponsesRequestWithCache({
270
+ model: "cx/gpt-5.4-xhigh",
271
+ input: [{ role: "user", content: "Hello" }],
272
+ reasoning: null,
273
+ text: null,
274
+ });
275
+ assert.equal("reasoning" in normalized.request, false);
276
+ assert.equal("text" in normalized.request, false);
277
+ });
@@ -0,0 +1,11 @@
1
+ import assert from "node:assert/strict";
2
+ import { readFileSync } from "node:fs";
3
+ import path from "node:path";
4
+ import test from "node:test";
5
+ const scriptPath = path.resolve(process.cwd(), "scripts", "omv-public-firewall.sh");
6
+ const script = readFileSync(scriptPath, "utf8");
7
+ test("omv firewall rollback only deletes guard table", () => {
8
+ assert.match(script, /nft delete table inet "\$TABLE"/);
9
+ assert.doesNotMatch(script, /nft list ruleset/);
10
+ assert.doesNotMatch(script, /BACKUP_FILE/);
11
+ });
@@ -0,0 +1,17 @@
1
+ {
2
+ "name": "responses-proxy-runtime",
3
+ "version": "0.1.0",
4
+ "private": true,
5
+ "dependencies": {
6
+ "@fastify/websocket": "^11.2.0",
7
+ "better-sqlite3": "^12.9.0",
8
+ "chart.js": "^4.5.1",
9
+ "date-fns": "^4.4.0",
10
+ "fastify": "^5.6.1",
11
+ "grammy": "^1.42.0",
12
+ "react": "^19.2.5",
13
+ "react-chartjs-2": "^5.3.1",
14
+ "react-dom": "^19.2.5",
15
+ "zod": "^4.1.5"
16
+ }
17
+ }
@@ -0,0 +1,146 @@
1
+ import { mkdirSync } from "node:fs";
2
+ import path from "node:path";
3
+ import BetterSqlite3 from "better-sqlite3";
4
+ const DEFAULT_CACHE_HIT_STREAK_TTL_MS = 6 * 60 * 60 * 1000;
5
+ const DEFAULT_MAX_CACHE_HIT_STREAK_SESSIONS = 2000;
6
+ export class PromptCacheStateStore {
7
+ db;
8
+ constructor(db) {
9
+ this.db = db;
10
+ }
11
+ static create(dbFile) {
12
+ mkdirSync(path.dirname(dbFile), { recursive: true });
13
+ const db = new BetterSqlite3(dbFile);
14
+ ensureSchema(db);
15
+ return new PromptCacheStateStore(db);
16
+ }
17
+ loadLatestObservations() {
18
+ const rows = this.db
19
+ .prepare(`SELECT scope, payload, updated_at
20
+ FROM prompt_cache_observations
21
+ ORDER BY updated_at DESC, scope ASC`)
22
+ .all();
23
+ const byProvider = new Map();
24
+ let latest;
25
+ for (const row of rows) {
26
+ const observation = parseObservation(row.payload);
27
+ if (!observation) {
28
+ continue;
29
+ }
30
+ if (!latest || observation.timestamp > latest.timestamp) {
31
+ latest = observation;
32
+ }
33
+ if (row.scope.startsWith("provider:") && observation.providerId) {
34
+ const existing = byProvider.get(observation.providerId);
35
+ if (!existing || observation.timestamp > existing.timestamp) {
36
+ byProvider.set(observation.providerId, observation);
37
+ }
38
+ }
39
+ }
40
+ return {
41
+ latest,
42
+ byProvider,
43
+ };
44
+ }
45
+ saveLatestObservation(observation) {
46
+ const payload = JSON.stringify(observation);
47
+ const upsert = this.db.prepare(`
48
+ INSERT INTO prompt_cache_observations (scope, payload, updated_at)
49
+ VALUES (?, ?, ?)
50
+ ON CONFLICT(scope) DO UPDATE SET
51
+ payload = excluded.payload,
52
+ updated_at = excluded.updated_at
53
+ `);
54
+ this.db.exec("BEGIN");
55
+ try {
56
+ upsert.run("latest", payload, observation.timestamp);
57
+ if (observation.providerId) {
58
+ upsert.run(`provider:${observation.providerId}`, payload, observation.timestamp);
59
+ }
60
+ this.db.exec("COMMIT");
61
+ }
62
+ catch (error) {
63
+ this.db.exec("ROLLBACK");
64
+ throw error;
65
+ }
66
+ }
67
+ recordCacheResult(sessionKey, cachedTokens, options = {}) {
68
+ if (cachedTokens === undefined) {
69
+ return undefined;
70
+ }
71
+ const nowMs = options.nowMs ?? Date.now();
72
+ const ttlMs = options.ttlMs ?? DEFAULT_CACHE_HIT_STREAK_TTL_MS;
73
+ const maxSessions = options.maxSessions ?? DEFAULT_MAX_CACHE_HIT_STREAK_SESSIONS;
74
+ const cacheHit = cachedTokens > 0;
75
+ this.pruneExpiredSessionStates(nowMs, ttlMs);
76
+ const existing = this.db
77
+ .prepare(`SELECT session_key, consecutive_cache_hits, updated_at
78
+ FROM prompt_cache_sessions
79
+ WHERE session_key = ?`)
80
+ .get(sessionKey);
81
+ const previousHits = existing && nowMs - existing.updated_at <= ttlMs ? existing.consecutive_cache_hits : 0;
82
+ const consecutiveCacheHits = cacheHit ? previousHits + 1 : 0;
83
+ this.db
84
+ .prepare(`INSERT INTO prompt_cache_sessions (session_key, consecutive_cache_hits, updated_at)
85
+ VALUES (?, ?, ?)
86
+ ON CONFLICT(session_key) DO UPDATE SET
87
+ consecutive_cache_hits = excluded.consecutive_cache_hits,
88
+ updated_at = excluded.updated_at`)
89
+ .run(sessionKey, consecutiveCacheHits, nowMs);
90
+ this.trimOverflowSessions(maxSessions);
91
+ return {
92
+ cacheHit,
93
+ consecutiveCacheHits,
94
+ };
95
+ }
96
+ pruneExpiredSessionStates(nowMs, ttlMs) {
97
+ this.db
98
+ .prepare("DELETE FROM prompt_cache_sessions WHERE updated_at < ?")
99
+ .run(nowMs - ttlMs);
100
+ }
101
+ trimOverflowSessions(maxSessions) {
102
+ const row = this.db
103
+ .prepare("SELECT COUNT(*) AS total FROM prompt_cache_sessions")
104
+ .get();
105
+ const overflow = row.total - maxSessions;
106
+ if (overflow <= 0) {
107
+ return;
108
+ }
109
+ this.db
110
+ .prepare(`DELETE FROM prompt_cache_sessions
111
+ WHERE session_key IN (
112
+ SELECT session_key
113
+ FROM prompt_cache_sessions
114
+ ORDER BY updated_at ASC, session_key ASC
115
+ LIMIT ?
116
+ )`)
117
+ .run(overflow);
118
+ }
119
+ }
120
+ function ensureSchema(db) {
121
+ db.exec(`
122
+ CREATE TABLE IF NOT EXISTS prompt_cache_observations (
123
+ scope TEXT PRIMARY KEY,
124
+ payload TEXT NOT NULL,
125
+ updated_at TEXT NOT NULL
126
+ );
127
+
128
+ CREATE TABLE IF NOT EXISTS prompt_cache_sessions (
129
+ session_key TEXT PRIMARY KEY,
130
+ consecutive_cache_hits INTEGER NOT NULL DEFAULT 0,
131
+ updated_at INTEGER NOT NULL
132
+ );
133
+
134
+ CREATE INDEX IF NOT EXISTS idx_prompt_cache_sessions_updated_at
135
+ ON prompt_cache_sessions(updated_at);
136
+ `);
137
+ }
138
+ function parseObservation(value) {
139
+ try {
140
+ const parsed = JSON.parse(value);
141
+ return parsed && typeof parsed.requestId === "string" ? parsed : undefined;
142
+ }
143
+ catch {
144
+ return undefined;
145
+ }
146
+ }
@@ -0,0 +1,71 @@
1
+ import assert from "node:assert/strict";
2
+ import { mkdtempSync, rmSync } from "node:fs";
3
+ import os from "node:os";
4
+ import path from "node:path";
5
+ import test from "node:test";
6
+ import { PromptCacheStateStore } from "./prompt-cache-state.js";
7
+ test("persists latest prompt cache observation across store recreation", () => {
8
+ const tempDir = mkdtempSync(path.join(os.tmpdir(), "responses-proxy-cache-state-"));
9
+ const dbFile = path.join(tempDir, "app.sqlite");
10
+ try {
11
+ const firstStore = PromptCacheStateStore.create(dbFile);
12
+ firstStore.saveLatestObservation({
13
+ requestId: "req-1",
14
+ providerId: "provider-a",
15
+ model: "cx/gpt-5.4",
16
+ familyId: "family:model:core:abc",
17
+ staticKey: "static:family:model:core:abc:def",
18
+ requestKey: "request:static:family:model:core:abc:def:ghi",
19
+ promptCacheKey: "request:static:family:model:core:abc:def:ghi",
20
+ promptCacheRetention: "24h",
21
+ cacheHit: true,
22
+ cachedTokens: 123,
23
+ cacheSavedPercent: 61.5,
24
+ stream: false,
25
+ timestamp: "2026-04-18T08:00:00.000Z",
26
+ });
27
+ const secondStore = PromptCacheStateStore.create(dbFile);
28
+ const loaded = secondStore.loadLatestObservations();
29
+ assert.equal(loaded.latest?.requestId, "req-1");
30
+ assert.equal(loaded.latest?.providerId, "provider-a");
31
+ assert.equal(loaded.byProvider.get("provider-a")?.staticKey, "static:family:model:core:abc:def");
32
+ }
33
+ finally {
34
+ rmSync(tempDir, { recursive: true, force: true });
35
+ }
36
+ });
37
+ test("persists cache hit streak across store recreation", () => {
38
+ const tempDir = mkdtempSync(path.join(os.tmpdir(), "responses-proxy-cache-state-"));
39
+ const dbFile = path.join(tempDir, "app.sqlite");
40
+ try {
41
+ const firstStore = PromptCacheStateStore.create(dbFile);
42
+ const first = firstStore.recordCacheResult("session-a", 20, {
43
+ nowMs: 1_000,
44
+ ttlMs: 10_000,
45
+ });
46
+ assert.deepEqual(first, {
47
+ cacheHit: true,
48
+ consecutiveCacheHits: 1,
49
+ });
50
+ const secondStore = PromptCacheStateStore.create(dbFile);
51
+ const second = secondStore.recordCacheResult("session-a", 30, {
52
+ nowMs: 2_000,
53
+ ttlMs: 10_000,
54
+ });
55
+ assert.deepEqual(second, {
56
+ cacheHit: true,
57
+ consecutiveCacheHits: 2,
58
+ });
59
+ const third = secondStore.recordCacheResult("session-a", 0, {
60
+ nowMs: 3_000,
61
+ ttlMs: 10_000,
62
+ });
63
+ assert.deepEqual(third, {
64
+ cacheHit: false,
65
+ consecutiveCacheHits: 0,
66
+ });
67
+ }
68
+ finally {
69
+ rmSync(tempDir, { recursive: true, force: true });
70
+ }
71
+ });