@premai/api-sdk 1.0.40 → 1.0.42

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -45,25 +45,127 @@ function getGatewayErrorMessage(err) {
45
45
  return err.kind.message;
46
46
  return null;
47
47
  }
48
- async function attest(apiKey, options = { enabled: true }) {
49
- if (!options.enabled)
50
- return null;
48
+ var ATTEST_TTL_MS = 30000;
49
+ var ATTEST_CACHE_MAX = 500;
50
+ var ATTEST_MAX_ATTEMPTS = 4;
51
+ var ATTEST_RETRY_BASE_MS = 250;
52
+ var ATTEST_RETRY_MAX_MS = 2000;
53
+ var TRANSIENT_PATTERNS = [
54
+ /EOF while parsing/i,
55
+ /error decoding response body/i,
56
+ /connection (reset|closed|refused)/i,
57
+ /socket hang up/i,
58
+ /ETIMEDOUT/i
59
+ ];
60
+ var attestCache = new Map;
61
+ var attestInflight = new Map;
62
+ function attestCacheKey(apiKey, model) {
63
+ return `${apiKey}|${model ?? ""}`;
64
+ }
65
+ function pruneExpired(now) {
66
+ for (const [key, entry] of attestCache) {
67
+ if (entry.expires <= now) {
68
+ attestCache.delete(key);
69
+ } else {
70
+ break;
71
+ }
72
+ }
73
+ }
74
+ function isTransientError(err) {
75
+ const messages = [];
76
+ if (err instanceof Error) {
77
+ messages.push(err.message);
78
+ }
79
+ if (isAttestationError(err) && Array.isArray(err.cause)) {
80
+ messages.push(...err.cause);
81
+ }
82
+ return messages.some((m) => TRANSIENT_PATTERNS.some((re) => re.test(m)));
83
+ }
84
+ function backoffDelayMs(attempt) {
85
+ const exp = ATTEST_RETRY_BASE_MS * 2 ** (attempt - 1);
86
+ const capped = Math.min(exp, ATTEST_RETRY_MAX_MS);
87
+ const jitter = Math.floor(Math.random() * (capped / 2));
88
+ return capped + jitter;
89
+ }
90
+ function delay(ms) {
91
+ return new Promise((resolve) => setTimeout(resolve, ms));
92
+ }
93
+ function safeFree(obj) {
94
+ if (typeof obj?.free !== "function")
95
+ return;
96
+ try {
97
+ obj.free();
98
+ } catch {}
99
+ }
100
+ async function attemptAttest(apiKey, options) {
51
101
  const prem = await loadPrem();
52
- const client = await new prem.ClientBuilder(endpoints.proxy ?? "").with_authorization(apiKey).build();
53
- let query = new prem.QueryParams;
54
- if (options.model)
55
- query = query.with("model", options.model);
102
+ let client;
103
+ let attested;
104
+ let headers;
105
+ let sessionId;
56
106
  try {
57
- client.set_query(query);
58
- const attested = await client.attest();
59
- const headers = attested.headers();
60
- const sessionId = attested.headers().gpu()?.get("x-session-id") ?? null;
61
- headers.free();
62
- attested.free();
63
- return sessionId;
107
+ client = await new prem.ClientBuilder(endpoints.proxy ?? "").with_authorization(apiKey).build();
108
+ if (options.model) {
109
+ client.set_query(new prem.QueryParams().with("model", options.model));
110
+ }
111
+ attested = await client.attest();
112
+ headers = attested.headers();
113
+ sessionId = headers.cpu()?.get("x-session-id") ?? headers.gpu()?.get("x-session-id") ?? null;
64
114
  } finally {
65
- client.free();
115
+ safeFree(headers);
116
+ safeFree(attested);
117
+ safeFree(client);
118
+ }
119
+ if (sessionId === null) {
120
+ throw new Error("missing x-session-id issued by attestation");
121
+ }
122
+ return sessionId;
123
+ }
124
+ async function runAttest(apiKey, options) {
125
+ let lastErr;
126
+ for (let attempt = 1;attempt <= ATTEST_MAX_ATTEMPTS; attempt++) {
127
+ try {
128
+ return await attemptAttest(apiKey, options);
129
+ } catch (err) {
130
+ lastErr = err;
131
+ if (attempt === ATTEST_MAX_ATTEMPTS || !isTransientError(err)) {
132
+ throw err;
133
+ }
134
+ await delay(backoffDelayMs(attempt));
135
+ }
66
136
  }
137
+ throw lastErr;
138
+ }
139
+ async function attest(apiKey, options = { enabled: true }) {
140
+ if (!options.enabled)
141
+ return null;
142
+ const key = attestCacheKey(apiKey, options.model);
143
+ const now = Date.now();
144
+ const cached = attestCache.get(key);
145
+ if (cached) {
146
+ if (cached.expires > now)
147
+ return cached.sessionId;
148
+ attestCache.delete(key);
149
+ }
150
+ const inflight = attestInflight.get(key);
151
+ if (inflight) {
152
+ return inflight;
153
+ }
154
+ const work = runAttest(apiKey, options).then((sessionId) => {
155
+ const insertTime = Date.now();
156
+ pruneExpired(insertTime);
157
+ attestCache.set(key, { sessionId, expires: insertTime + ATTEST_TTL_MS });
158
+ if (attestCache.size > ATTEST_CACHE_MAX) {
159
+ const oldest = attestCache.keys().next().value;
160
+ if (oldest)
161
+ attestCache.delete(oldest);
162
+ }
163
+ return sessionId;
164
+ }).finally(() => {
165
+ attestInflight.delete(key);
166
+ });
167
+ attestInflight.set(key, work);
168
+ return work;
67
169
  }
68
170
 
69
171
  // src/utils/crypto.ts
@@ -243,7 +345,8 @@ async function preprocessAudioRequest(body, encryptionKeys) {
243
345
  const isDeepgram = body.model.startsWith("deepgram/");
244
346
  const requestBody = isDeepgram ? {
245
347
  model: body.model,
246
- diarize: body.diarize
348
+ diarize: body.diarize,
349
+ smart_format: body.smart_format
247
350
  } : {
248
351
  model: body.model,
249
352
  language: body.language,
@@ -1063,10 +1166,14 @@ function createRvencChatClient(apiKey, encryptionKeys, requestTimeoutMs = DEFAUL
1063
1166
  }
1064
1167
  clearTimeout(timeoutId);
1065
1168
  if (isStreaming) {
1066
- return await postprocessStreamingResponse(response, encryptedRequest.sharedSecret, encryptedRequest.nonce, maxBufferSize);
1067
- } else {
1068
- return await postprocessNonStreamingResponse(response, encryptedRequest.sharedSecret);
1169
+ const contentType = response.headers.get("content-type") ?? "";
1170
+ if (contentType.includes("text/event-stream")) {
1171
+ return await postprocessStreamingResponse(response, encryptedRequest.sharedSecret, encryptedRequest.nonce, maxBufferSize);
1172
+ }
1173
+ const completion = await postprocessNonStreamingResponse(response, encryptedRequest.sharedSecret);
1174
+ return completionToChunkStream(completion);
1069
1175
  }
1176
+ return await postprocessNonStreamingResponse(response, encryptedRequest.sharedSecret);
1070
1177
  } catch (error) {
1071
1178
  clearTimeout(timeoutId);
1072
1179
  if (error instanceof Error && error.name === "AbortError") {
@@ -1077,6 +1184,39 @@ function createRvencChatClient(apiKey, encryptionKeys, requestTimeoutMs = DEFAUL
1077
1184
  };
1078
1185
  return client;
1079
1186
  }
1187
+ async function* completionToChunkStream(completion) {
1188
+ const choice = completion.choices[0];
1189
+ const message = choice?.message;
1190
+ const content = typeof message?.content === "string" ? message.content : "";
1191
+ const toolCalls = message?.tool_calls?.filter((tc) => tc.type === "function").map((tc, i) => ({
1192
+ index: i,
1193
+ id: tc.id,
1194
+ type: "function",
1195
+ function: {
1196
+ name: tc.function.name,
1197
+ arguments: tc.function.arguments
1198
+ }
1199
+ }));
1200
+ yield {
1201
+ id: completion.id,
1202
+ object: "chat.completion.chunk",
1203
+ created: completion.created,
1204
+ model: completion.model,
1205
+ choices: [
1206
+ {
1207
+ index: choice?.index ?? 0,
1208
+ delta: {
1209
+ role: "assistant",
1210
+ content,
1211
+ ...toolCalls && toolCalls.length > 0 && { tool_calls: toolCalls }
1212
+ },
1213
+ finish_reason: choice?.finish_reason ?? "stop",
1214
+ logprobs: null
1215
+ }
1216
+ ],
1217
+ usage: completion.usage ?? null
1218
+ };
1219
+ }
1080
1220
  async function* createDecryptedStreamGenerator(reader, sharedSecret, nonce, maxBufferSize) {
1081
1221
  const decoder = new TextDecoder;
1082
1222
  let buffer = "";
@@ -1257,7 +1397,7 @@ async function callFileOutputTool(toolName, params, apiKey, dekStore, clientKEK,
1257
1397
  };
1258
1398
  const response = await callToolRequest(toolName, body, apiKey, timeoutMs, attest2);
1259
1399
  const result = await downloadAndDecryptFile(response, dek, apiKey, timeoutMs);
1260
- if (result && result.fileId) {
1400
+ if (result?.fileId) {
1261
1401
  if (!dekStore.fileDEKs) {
1262
1402
  dekStore.fileDEKs = new Map;
1263
1403
  }
@@ -1314,7 +1454,7 @@ async function callRagTool(toolName, params, apiKey, dekStore, clientKEK, timeou
1314
1454
  }
1315
1455
  const _clientKEK = clientKEK ? hexToBytes6(clientKEK) : getClientKEK();
1316
1456
  const encryptedFileDEKs = fileIds.reduce((acc, fileId) => {
1317
- const fileDEK = dekStore.fileDEKs.get(fileId);
1457
+ const fileDEK = dekStore.fileDEKs?.get(fileId);
1318
1458
  if (!fileDEK) {
1319
1459
  return acc;
1320
1460
  }
@@ -276,25 +276,127 @@ function getGatewayErrorMessage(err) {
276
276
  return err.kind.message;
277
277
  return null;
278
278
  }
279
- async function attest(apiKey, options = { enabled: true }) {
280
- if (!options.enabled)
281
- return null;
279
+ var ATTEST_TTL_MS = 30000;
280
+ var ATTEST_CACHE_MAX = 500;
281
+ var ATTEST_MAX_ATTEMPTS = 4;
282
+ var ATTEST_RETRY_BASE_MS = 250;
283
+ var ATTEST_RETRY_MAX_MS = 2000;
284
+ var TRANSIENT_PATTERNS = [
285
+ /EOF while parsing/i,
286
+ /error decoding response body/i,
287
+ /connection (reset|closed|refused)/i,
288
+ /socket hang up/i,
289
+ /ETIMEDOUT/i
290
+ ];
291
+ var attestCache = new Map;
292
+ var attestInflight = new Map;
293
+ function attestCacheKey(apiKey, model) {
294
+ return `${apiKey}|${model ?? ""}`;
295
+ }
296
+ function pruneExpired(now) {
297
+ for (const [key, entry] of attestCache) {
298
+ if (entry.expires <= now) {
299
+ attestCache.delete(key);
300
+ } else {
301
+ break;
302
+ }
303
+ }
304
+ }
305
+ function isTransientError(err) {
306
+ const messages = [];
307
+ if (err instanceof Error) {
308
+ messages.push(err.message);
309
+ }
310
+ if (isAttestationError(err) && Array.isArray(err.cause)) {
311
+ messages.push(...err.cause);
312
+ }
313
+ return messages.some((m) => TRANSIENT_PATTERNS.some((re) => re.test(m)));
314
+ }
315
+ function backoffDelayMs(attempt) {
316
+ const exp = ATTEST_RETRY_BASE_MS * 2 ** (attempt - 1);
317
+ const capped = Math.min(exp, ATTEST_RETRY_MAX_MS);
318
+ const jitter = Math.floor(Math.random() * (capped / 2));
319
+ return capped + jitter;
320
+ }
321
+ function delay(ms) {
322
+ return new Promise((resolve) => setTimeout(resolve, ms));
323
+ }
324
+ function safeFree(obj) {
325
+ if (typeof obj?.free !== "function")
326
+ return;
327
+ try {
328
+ obj.free();
329
+ } catch {}
330
+ }
331
+ async function attemptAttest(apiKey, options) {
282
332
  const prem = await loadPrem();
283
- const client = await new prem.ClientBuilder(endpoints.proxy ?? "").with_authorization(apiKey).build();
284
- let query = new prem.QueryParams;
285
- if (options.model)
286
- query = query.with("model", options.model);
333
+ let client;
334
+ let attested;
335
+ let headers;
336
+ let sessionId;
287
337
  try {
288
- client.set_query(query);
289
- const attested = await client.attest();
290
- const headers = attested.headers();
291
- const sessionId = attested.headers().gpu()?.get("x-session-id") ?? null;
292
- headers.free();
293
- attested.free();
294
- return sessionId;
338
+ client = await new prem.ClientBuilder(endpoints.proxy ?? "").with_authorization(apiKey).build();
339
+ if (options.model) {
340
+ client.set_query(new prem.QueryParams().with("model", options.model));
341
+ }
342
+ attested = await client.attest();
343
+ headers = attested.headers();
344
+ sessionId = headers.cpu()?.get("x-session-id") ?? headers.gpu()?.get("x-session-id") ?? null;
295
345
  } finally {
296
- client.free();
346
+ safeFree(headers);
347
+ safeFree(attested);
348
+ safeFree(client);
349
+ }
350
+ if (sessionId === null) {
351
+ throw new Error("missing x-session-id issued by attestation");
297
352
  }
353
+ return sessionId;
354
+ }
355
+ async function runAttest(apiKey, options) {
356
+ let lastErr;
357
+ for (let attempt = 1;attempt <= ATTEST_MAX_ATTEMPTS; attempt++) {
358
+ try {
359
+ return await attemptAttest(apiKey, options);
360
+ } catch (err) {
361
+ lastErr = err;
362
+ if (attempt === ATTEST_MAX_ATTEMPTS || !isTransientError(err)) {
363
+ throw err;
364
+ }
365
+ await delay(backoffDelayMs(attempt));
366
+ }
367
+ }
368
+ throw lastErr;
369
+ }
370
+ async function attest(apiKey, options = { enabled: true }) {
371
+ if (!options.enabled)
372
+ return null;
373
+ const key = attestCacheKey(apiKey, options.model);
374
+ const now = Date.now();
375
+ const cached = attestCache.get(key);
376
+ if (cached) {
377
+ if (cached.expires > now)
378
+ return cached.sessionId;
379
+ attestCache.delete(key);
380
+ }
381
+ const inflight = attestInflight.get(key);
382
+ if (inflight) {
383
+ return inflight;
384
+ }
385
+ const work = runAttest(apiKey, options).then((sessionId) => {
386
+ const insertTime = Date.now();
387
+ pruneExpired(insertTime);
388
+ attestCache.set(key, { sessionId, expires: insertTime + ATTEST_TTL_MS });
389
+ if (attestCache.size > ATTEST_CACHE_MAX) {
390
+ const oldest = attestCache.keys().next().value;
391
+ if (oldest)
392
+ attestCache.delete(oldest);
393
+ }
394
+ return sessionId;
395
+ }).finally(() => {
396
+ attestInflight.delete(key);
397
+ });
398
+ attestInflight.set(key, work);
399
+ return work;
298
400
  }
299
401
 
300
402
  // node_modules/@noble/ciphers/aes.js
@@ -5253,7 +5355,8 @@ async function preprocessAudioRequest(body, encryptionKeys) {
5253
5355
  const isDeepgram = body.model.startsWith("deepgram/");
5254
5356
  const requestBody = isDeepgram ? {
5255
5357
  model: body.model,
5256
- diarize: body.diarize
5358
+ diarize: body.diarize,
5359
+ smart_format: body.smart_format
5257
5360
  } : {
5258
5361
  model: body.model,
5259
5362
  language: body.language,
@@ -25334,10 +25437,14 @@ function createRvencChatClient(apiKey, encryptionKeys, requestTimeoutMs = DEFAUL
25334
25437
  }
25335
25438
  clearTimeout(timeoutId);
25336
25439
  if (isStreaming) {
25337
- return await postprocessStreamingResponse(response, encryptedRequest.sharedSecret, encryptedRequest.nonce, maxBufferSize);
25338
- } else {
25339
- return await postprocessNonStreamingResponse(response, encryptedRequest.sharedSecret);
25440
+ const contentType = response.headers.get("content-type") ?? "";
25441
+ if (contentType.includes("text/event-stream")) {
25442
+ return await postprocessStreamingResponse(response, encryptedRequest.sharedSecret, encryptedRequest.nonce, maxBufferSize);
25443
+ }
25444
+ const completion = await postprocessNonStreamingResponse(response, encryptedRequest.sharedSecret);
25445
+ return completionToChunkStream(completion);
25340
25446
  }
25447
+ return await postprocessNonStreamingResponse(response, encryptedRequest.sharedSecret);
25341
25448
  } catch (error47) {
25342
25449
  clearTimeout(timeoutId);
25343
25450
  if (error47 instanceof Error && error47.name === "AbortError") {
@@ -25348,6 +25455,39 @@ function createRvencChatClient(apiKey, encryptionKeys, requestTimeoutMs = DEFAUL
25348
25455
  };
25349
25456
  return client;
25350
25457
  }
25458
+ async function* completionToChunkStream(completion) {
25459
+ const choice = completion.choices[0];
25460
+ const message = choice?.message;
25461
+ const content = typeof message?.content === "string" ? message.content : "";
25462
+ const toolCalls = message?.tool_calls?.filter((tc) => tc.type === "function").map((tc, i) => ({
25463
+ index: i,
25464
+ id: tc.id,
25465
+ type: "function",
25466
+ function: {
25467
+ name: tc.function.name,
25468
+ arguments: tc.function.arguments
25469
+ }
25470
+ }));
25471
+ yield {
25472
+ id: completion.id,
25473
+ object: "chat.completion.chunk",
25474
+ created: completion.created,
25475
+ model: completion.model,
25476
+ choices: [
25477
+ {
25478
+ index: choice?.index ?? 0,
25479
+ delta: {
25480
+ role: "assistant",
25481
+ content,
25482
+ ...toolCalls && toolCalls.length > 0 && { tool_calls: toolCalls }
25483
+ },
25484
+ finish_reason: choice?.finish_reason ?? "stop",
25485
+ logprobs: null
25486
+ }
25487
+ ],
25488
+ usage: completion.usage ?? null
25489
+ };
25490
+ }
25351
25491
  async function* createDecryptedStreamGenerator(reader, sharedSecret, nonce, maxBufferSize) {
25352
25492
  const decoder = new TextDecoder;
25353
25493
  let buffer = "";
@@ -25527,7 +25667,7 @@ async function callFileOutputTool(toolName, params, apiKey, dekStore, clientKEK,
25527
25667
  };
25528
25668
  const response = await callToolRequest(toolName, body, apiKey, timeoutMs, attest2);
25529
25669
  const result = await downloadAndDecryptFile(response, dek, apiKey, timeoutMs);
25530
- if (result && result.fileId) {
25670
+ if (result?.fileId) {
25531
25671
  if (!dekStore.fileDEKs) {
25532
25672
  dekStore.fileDEKs = new Map;
25533
25673
  }
@@ -25584,7 +25724,7 @@ async function callRagTool(toolName, params, apiKey, dekStore, clientKEK, timeou
25584
25724
  }
25585
25725
  const _clientKEK = clientKEK ? hexToBytes(clientKEK) : getClientKEK();
25586
25726
  const encryptedFileDEKs = fileIds.reduce((acc, fileId) => {
25587
- const fileDEK = dekStore.fileDEKs.get(fileId);
25727
+ const fileDEK = dekStore.fileDEKs?.get(fileId);
25588
25728
  if (!fileDEK) {
25589
25729
  return acc;
25590
25730
  }