@premai/api-sdk 1.0.41 → 1.0.42

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -45,25 +45,127 @@ function getGatewayErrorMessage(err) {
45
45
  return err.kind.message;
46
46
  return null;
47
47
  }
48
- async function attest(apiKey, options = { enabled: true }) {
49
- if (!options.enabled)
50
- return null;
48
+ var ATTEST_TTL_MS = 30000;
49
+ var ATTEST_CACHE_MAX = 500;
50
+ var ATTEST_MAX_ATTEMPTS = 4;
51
+ var ATTEST_RETRY_BASE_MS = 250;
52
+ var ATTEST_RETRY_MAX_MS = 2000;
53
+ var TRANSIENT_PATTERNS = [
54
+ /EOF while parsing/i,
55
+ /error decoding response body/i,
56
+ /connection (reset|closed|refused)/i,
57
+ /socket hang up/i,
58
+ /ETIMEDOUT/i
59
+ ];
60
+ var attestCache = new Map;
61
+ var attestInflight = new Map;
62
+ function attestCacheKey(apiKey, model) {
63
+ return `${apiKey}|${model ?? ""}`;
64
+ }
65
+ function pruneExpired(now) {
66
+ for (const [key, entry] of attestCache) {
67
+ if (entry.expires <= now) {
68
+ attestCache.delete(key);
69
+ } else {
70
+ break;
71
+ }
72
+ }
73
+ }
74
+ function isTransientError(err) {
75
+ const messages = [];
76
+ if (err instanceof Error) {
77
+ messages.push(err.message);
78
+ }
79
+ if (isAttestationError(err) && Array.isArray(err.cause)) {
80
+ messages.push(...err.cause);
81
+ }
82
+ return messages.some((m) => TRANSIENT_PATTERNS.some((re) => re.test(m)));
83
+ }
84
+ function backoffDelayMs(attempt) {
85
+ const exp = ATTEST_RETRY_BASE_MS * 2 ** (attempt - 1);
86
+ const capped = Math.min(exp, ATTEST_RETRY_MAX_MS);
87
+ const jitter = Math.floor(Math.random() * (capped / 2));
88
+ return capped + jitter;
89
+ }
90
+ function delay(ms) {
91
+ return new Promise((resolve) => setTimeout(resolve, ms));
92
+ }
93
+ function safeFree(obj) {
94
+ if (typeof obj?.free !== "function")
95
+ return;
96
+ try {
97
+ obj.free();
98
+ } catch {}
99
+ }
100
+ async function attemptAttest(apiKey, options) {
51
101
  const prem = await loadPrem();
52
- const client = await new prem.ClientBuilder(endpoints.proxy ?? "").with_authorization(apiKey).build();
53
- let query = new prem.QueryParams;
54
- if (options.model)
55
- query = query.with("model", options.model);
102
+ let client;
103
+ let attested;
104
+ let headers;
105
+ let sessionId;
56
106
  try {
57
- client.set_query(query);
58
- const attested = await client.attest();
59
- const headers = attested.headers();
60
- const sessionId = attested.headers().gpu()?.get("x-session-id") ?? null;
61
- headers.free();
62
- attested.free();
63
- return sessionId;
107
+ client = await new prem.ClientBuilder(endpoints.proxy ?? "").with_authorization(apiKey).build();
108
+ if (options.model) {
109
+ client.set_query(new prem.QueryParams().with("model", options.model));
110
+ }
111
+ attested = await client.attest();
112
+ headers = attested.headers();
113
+ sessionId = headers.cpu()?.get("x-session-id") ?? headers.gpu()?.get("x-session-id") ?? null;
64
114
  } finally {
65
- client.free();
115
+ safeFree(headers);
116
+ safeFree(attested);
117
+ safeFree(client);
118
+ }
119
+ if (sessionId === null) {
120
+ throw new Error("missing x-session-id issued by attestation");
121
+ }
122
+ return sessionId;
123
+ }
124
+ async function runAttest(apiKey, options) {
125
+ let lastErr;
126
+ for (let attempt = 1;attempt <= ATTEST_MAX_ATTEMPTS; attempt++) {
127
+ try {
128
+ return await attemptAttest(apiKey, options);
129
+ } catch (err) {
130
+ lastErr = err;
131
+ if (attempt === ATTEST_MAX_ATTEMPTS || !isTransientError(err)) {
132
+ throw err;
133
+ }
134
+ await delay(backoffDelayMs(attempt));
135
+ }
66
136
  }
137
+ throw lastErr;
138
+ }
139
+ async function attest(apiKey, options = { enabled: true }) {
140
+ if (!options.enabled)
141
+ return null;
142
+ const key = attestCacheKey(apiKey, options.model);
143
+ const now = Date.now();
144
+ const cached = attestCache.get(key);
145
+ if (cached) {
146
+ if (cached.expires > now)
147
+ return cached.sessionId;
148
+ attestCache.delete(key);
149
+ }
150
+ const inflight = attestInflight.get(key);
151
+ if (inflight) {
152
+ return inflight;
153
+ }
154
+ const work = runAttest(apiKey, options).then((sessionId) => {
155
+ const insertTime = Date.now();
156
+ pruneExpired(insertTime);
157
+ attestCache.set(key, { sessionId, expires: insertTime + ATTEST_TTL_MS });
158
+ if (attestCache.size > ATTEST_CACHE_MAX) {
159
+ const oldest = attestCache.keys().next().value;
160
+ if (oldest)
161
+ attestCache.delete(oldest);
162
+ }
163
+ return sessionId;
164
+ }).finally(() => {
165
+ attestInflight.delete(key);
166
+ });
167
+ attestInflight.set(key, work);
168
+ return work;
67
169
  }
68
170
 
69
171
  // src/utils/crypto.ts
@@ -1064,10 +1166,14 @@ function createRvencChatClient(apiKey, encryptionKeys, requestTimeoutMs = DEFAUL
1064
1166
  }
1065
1167
  clearTimeout(timeoutId);
1066
1168
  if (isStreaming) {
1067
- return await postprocessStreamingResponse(response, encryptedRequest.sharedSecret, encryptedRequest.nonce, maxBufferSize);
1068
- } else {
1069
- return await postprocessNonStreamingResponse(response, encryptedRequest.sharedSecret);
1169
+ const contentType = response.headers.get("content-type") ?? "";
1170
+ if (contentType.includes("text/event-stream")) {
1171
+ return await postprocessStreamingResponse(response, encryptedRequest.sharedSecret, encryptedRequest.nonce, maxBufferSize);
1172
+ }
1173
+ const completion = await postprocessNonStreamingResponse(response, encryptedRequest.sharedSecret);
1174
+ return completionToChunkStream(completion);
1070
1175
  }
1176
+ return await postprocessNonStreamingResponse(response, encryptedRequest.sharedSecret);
1071
1177
  } catch (error) {
1072
1178
  clearTimeout(timeoutId);
1073
1179
  if (error instanceof Error && error.name === "AbortError") {
@@ -1078,6 +1184,39 @@ function createRvencChatClient(apiKey, encryptionKeys, requestTimeoutMs = DEFAUL
1078
1184
  };
1079
1185
  return client;
1080
1186
  }
1187
+ async function* completionToChunkStream(completion) {
1188
+ const choice = completion.choices[0];
1189
+ const message = choice?.message;
1190
+ const content = typeof message?.content === "string" ? message.content : "";
1191
+ const toolCalls = message?.tool_calls?.filter((tc) => tc.type === "function").map((tc, i) => ({
1192
+ index: i,
1193
+ id: tc.id,
1194
+ type: "function",
1195
+ function: {
1196
+ name: tc.function.name,
1197
+ arguments: tc.function.arguments
1198
+ }
1199
+ }));
1200
+ yield {
1201
+ id: completion.id,
1202
+ object: "chat.completion.chunk",
1203
+ created: completion.created,
1204
+ model: completion.model,
1205
+ choices: [
1206
+ {
1207
+ index: choice?.index ?? 0,
1208
+ delta: {
1209
+ role: "assistant",
1210
+ content,
1211
+ ...toolCalls && toolCalls.length > 0 && { tool_calls: toolCalls }
1212
+ },
1213
+ finish_reason: choice?.finish_reason ?? "stop",
1214
+ logprobs: null
1215
+ }
1216
+ ],
1217
+ usage: completion.usage ?? null
1218
+ };
1219
+ }
1081
1220
  async function* createDecryptedStreamGenerator(reader, sharedSecret, nonce, maxBufferSize) {
1082
1221
  const decoder = new TextDecoder;
1083
1222
  let buffer = "";
@@ -1258,7 +1397,7 @@ async function callFileOutputTool(toolName, params, apiKey, dekStore, clientKEK,
1258
1397
  };
1259
1398
  const response = await callToolRequest(toolName, body, apiKey, timeoutMs, attest2);
1260
1399
  const result = await downloadAndDecryptFile(response, dek, apiKey, timeoutMs);
1261
- if (result && result.fileId) {
1400
+ if (result?.fileId) {
1262
1401
  if (!dekStore.fileDEKs) {
1263
1402
  dekStore.fileDEKs = new Map;
1264
1403
  }
@@ -1315,7 +1454,7 @@ async function callRagTool(toolName, params, apiKey, dekStore, clientKEK, timeou
1315
1454
  }
1316
1455
  const _clientKEK = clientKEK ? hexToBytes6(clientKEK) : getClientKEK();
1317
1456
  const encryptedFileDEKs = fileIds.reduce((acc, fileId) => {
1318
- const fileDEK = dekStore.fileDEKs.get(fileId);
1457
+ const fileDEK = dekStore.fileDEKs?.get(fileId);
1319
1458
  if (!fileDEK) {
1320
1459
  return acc;
1321
1460
  }
@@ -276,25 +276,127 @@ function getGatewayErrorMessage(err) {
276
276
  return err.kind.message;
277
277
  return null;
278
278
  }
279
- async function attest(apiKey, options = { enabled: true }) {
280
- if (!options.enabled)
281
- return null;
279
+ var ATTEST_TTL_MS = 30000;
280
+ var ATTEST_CACHE_MAX = 500;
281
+ var ATTEST_MAX_ATTEMPTS = 4;
282
+ var ATTEST_RETRY_BASE_MS = 250;
283
+ var ATTEST_RETRY_MAX_MS = 2000;
284
+ var TRANSIENT_PATTERNS = [
285
+ /EOF while parsing/i,
286
+ /error decoding response body/i,
287
+ /connection (reset|closed|refused)/i,
288
+ /socket hang up/i,
289
+ /ETIMEDOUT/i
290
+ ];
291
+ var attestCache = new Map;
292
+ var attestInflight = new Map;
293
+ function attestCacheKey(apiKey, model) {
294
+ return `${apiKey}|${model ?? ""}`;
295
+ }
296
+ function pruneExpired(now) {
297
+ for (const [key, entry] of attestCache) {
298
+ if (entry.expires <= now) {
299
+ attestCache.delete(key);
300
+ } else {
301
+ break;
302
+ }
303
+ }
304
+ }
305
+ function isTransientError(err) {
306
+ const messages = [];
307
+ if (err instanceof Error) {
308
+ messages.push(err.message);
309
+ }
310
+ if (isAttestationError(err) && Array.isArray(err.cause)) {
311
+ messages.push(...err.cause);
312
+ }
313
+ return messages.some((m) => TRANSIENT_PATTERNS.some((re) => re.test(m)));
314
+ }
315
+ function backoffDelayMs(attempt) {
316
+ const exp = ATTEST_RETRY_BASE_MS * 2 ** (attempt - 1);
317
+ const capped = Math.min(exp, ATTEST_RETRY_MAX_MS);
318
+ const jitter = Math.floor(Math.random() * (capped / 2));
319
+ return capped + jitter;
320
+ }
321
+ function delay(ms) {
322
+ return new Promise((resolve) => setTimeout(resolve, ms));
323
+ }
324
+ function safeFree(obj) {
325
+ if (typeof obj?.free !== "function")
326
+ return;
327
+ try {
328
+ obj.free();
329
+ } catch {}
330
+ }
331
+ async function attemptAttest(apiKey, options) {
282
332
  const prem = await loadPrem();
283
- const client = await new prem.ClientBuilder(endpoints.proxy ?? "").with_authorization(apiKey).build();
284
- let query = new prem.QueryParams;
285
- if (options.model)
286
- query = query.with("model", options.model);
333
+ let client;
334
+ let attested;
335
+ let headers;
336
+ let sessionId;
287
337
  try {
288
- client.set_query(query);
289
- const attested = await client.attest();
290
- const headers = attested.headers();
291
- const sessionId = attested.headers().gpu()?.get("x-session-id") ?? null;
292
- headers.free();
293
- attested.free();
294
- return sessionId;
338
+ client = await new prem.ClientBuilder(endpoints.proxy ?? "").with_authorization(apiKey).build();
339
+ if (options.model) {
340
+ client.set_query(new prem.QueryParams().with("model", options.model));
341
+ }
342
+ attested = await client.attest();
343
+ headers = attested.headers();
344
+ sessionId = headers.cpu()?.get("x-session-id") ?? headers.gpu()?.get("x-session-id") ?? null;
295
345
  } finally {
296
- client.free();
346
+ safeFree(headers);
347
+ safeFree(attested);
348
+ safeFree(client);
349
+ }
350
+ if (sessionId === null) {
351
+ throw new Error("missing x-session-id issued by attestation");
297
352
  }
353
+ return sessionId;
354
+ }
355
+ async function runAttest(apiKey, options) {
356
+ let lastErr;
357
+ for (let attempt = 1;attempt <= ATTEST_MAX_ATTEMPTS; attempt++) {
358
+ try {
359
+ return await attemptAttest(apiKey, options);
360
+ } catch (err) {
361
+ lastErr = err;
362
+ if (attempt === ATTEST_MAX_ATTEMPTS || !isTransientError(err)) {
363
+ throw err;
364
+ }
365
+ await delay(backoffDelayMs(attempt));
366
+ }
367
+ }
368
+ throw lastErr;
369
+ }
370
+ async function attest(apiKey, options = { enabled: true }) {
371
+ if (!options.enabled)
372
+ return null;
373
+ const key = attestCacheKey(apiKey, options.model);
374
+ const now = Date.now();
375
+ const cached = attestCache.get(key);
376
+ if (cached) {
377
+ if (cached.expires > now)
378
+ return cached.sessionId;
379
+ attestCache.delete(key);
380
+ }
381
+ const inflight = attestInflight.get(key);
382
+ if (inflight) {
383
+ return inflight;
384
+ }
385
+ const work = runAttest(apiKey, options).then((sessionId) => {
386
+ const insertTime = Date.now();
387
+ pruneExpired(insertTime);
388
+ attestCache.set(key, { sessionId, expires: insertTime + ATTEST_TTL_MS });
389
+ if (attestCache.size > ATTEST_CACHE_MAX) {
390
+ const oldest = attestCache.keys().next().value;
391
+ if (oldest)
392
+ attestCache.delete(oldest);
393
+ }
394
+ return sessionId;
395
+ }).finally(() => {
396
+ attestInflight.delete(key);
397
+ });
398
+ attestInflight.set(key, work);
399
+ return work;
298
400
  }
299
401
 
300
402
  // node_modules/@noble/ciphers/aes.js
@@ -25335,10 +25437,14 @@ function createRvencChatClient(apiKey, encryptionKeys, requestTimeoutMs = DEFAUL
25335
25437
  }
25336
25438
  clearTimeout(timeoutId);
25337
25439
  if (isStreaming) {
25338
- return await postprocessStreamingResponse(response, encryptedRequest.sharedSecret, encryptedRequest.nonce, maxBufferSize);
25339
- } else {
25340
- return await postprocessNonStreamingResponse(response, encryptedRequest.sharedSecret);
25440
+ const contentType = response.headers.get("content-type") ?? "";
25441
+ if (contentType.includes("text/event-stream")) {
25442
+ return await postprocessStreamingResponse(response, encryptedRequest.sharedSecret, encryptedRequest.nonce, maxBufferSize);
25443
+ }
25444
+ const completion = await postprocessNonStreamingResponse(response, encryptedRequest.sharedSecret);
25445
+ return completionToChunkStream(completion);
25341
25446
  }
25447
+ return await postprocessNonStreamingResponse(response, encryptedRequest.sharedSecret);
25342
25448
  } catch (error47) {
25343
25449
  clearTimeout(timeoutId);
25344
25450
  if (error47 instanceof Error && error47.name === "AbortError") {
@@ -25349,6 +25455,39 @@ function createRvencChatClient(apiKey, encryptionKeys, requestTimeoutMs = DEFAUL
25349
25455
  };
25350
25456
  return client;
25351
25457
  }
25458
+ async function* completionToChunkStream(completion) {
25459
+ const choice = completion.choices[0];
25460
+ const message = choice?.message;
25461
+ const content = typeof message?.content === "string" ? message.content : "";
25462
+ const toolCalls = message?.tool_calls?.filter((tc) => tc.type === "function").map((tc, i) => ({
25463
+ index: i,
25464
+ id: tc.id,
25465
+ type: "function",
25466
+ function: {
25467
+ name: tc.function.name,
25468
+ arguments: tc.function.arguments
25469
+ }
25470
+ }));
25471
+ yield {
25472
+ id: completion.id,
25473
+ object: "chat.completion.chunk",
25474
+ created: completion.created,
25475
+ model: completion.model,
25476
+ choices: [
25477
+ {
25478
+ index: choice?.index ?? 0,
25479
+ delta: {
25480
+ role: "assistant",
25481
+ content,
25482
+ ...toolCalls && toolCalls.length > 0 && { tool_calls: toolCalls }
25483
+ },
25484
+ finish_reason: choice?.finish_reason ?? "stop",
25485
+ logprobs: null
25486
+ }
25487
+ ],
25488
+ usage: completion.usage ?? null
25489
+ };
25490
+ }
25352
25491
  async function* createDecryptedStreamGenerator(reader, sharedSecret, nonce, maxBufferSize) {
25353
25492
  const decoder = new TextDecoder;
25354
25493
  let buffer = "";
@@ -25528,7 +25667,7 @@ async function callFileOutputTool(toolName, params, apiKey, dekStore, clientKEK,
25528
25667
  };
25529
25668
  const response = await callToolRequest(toolName, body, apiKey, timeoutMs, attest2);
25530
25669
  const result = await downloadAndDecryptFile(response, dek, apiKey, timeoutMs);
25531
- if (result && result.fileId) {
25670
+ if (result?.fileId) {
25532
25671
  if (!dekStore.fileDEKs) {
25533
25672
  dekStore.fileDEKs = new Map;
25534
25673
  }
@@ -25585,7 +25724,7 @@ async function callRagTool(toolName, params, apiKey, dekStore, clientKEK, timeou
25585
25724
  }
25586
25725
  const _clientKEK = clientKEK ? hexToBytes(clientKEK) : getClientKEK();
25587
25726
  const encryptedFileDEKs = fileIds.reduce((acc, fileId) => {
25588
- const fileDEK = dekStore.fileDEKs.get(fileId);
25727
+ const fileDEK = dekStore.fileDEKs?.get(fileId);
25589
25728
  if (!fileDEK) {
25590
25729
  return acc;
25591
25730
  }