@adaline/gateway 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -1,5 +1,6 @@
1
1
  import { z } from 'zod';
2
2
  import { ChatModelV1, EmbeddingModelV1 } from '@adaline/provider';
3
+ import { GatewayBaseError } from '@adaline/types';
3
4
  import { AxiosInstance } from 'axios';
4
5
 
5
6
  declare class GatewayError extends Error {
@@ -27,18 +28,36 @@ interface HttpClientResponse<T> {
27
28
  };
28
29
  }
29
30
 
31
+ declare class HttpClientError extends GatewayBaseError {
32
+ readonly info: string;
33
+ readonly cause: unknown;
34
+ constructor({ info, cause }: {
35
+ info: string;
36
+ cause: unknown;
37
+ });
38
+ static isHttpClientError(error: unknown): error is HttpClientError;
39
+ }
40
+ declare class HttpRequestError extends GatewayBaseError {
41
+ readonly info: string;
42
+ readonly cause: {
43
+ status: number;
44
+ headers: Record<string, string>;
45
+ data: unknown;
46
+ };
47
+ constructor(message: string, status: number | undefined, headers: Record<string, string>, data: unknown);
48
+ static isHttpRequestError(error: unknown): error is HttpRequestError;
49
+ }
50
+
30
51
  interface HttpClientConfig {
31
52
  timeoutInMilliseconds?: number;
32
53
  axiosInstance?: AxiosInstance;
33
- }
34
- declare class IsomorphicHttpClientError extends Error {
35
- status: number;
36
- data: unknown;
37
- constructor(message: string, status?: number, data?: unknown);
54
+ proxyUrl?: string;
38
55
  }
39
56
  declare class IsomorphicHttpClient implements HttpClient {
40
57
  private defaultTimeout?;
41
58
  private client;
59
+ private httpProxyAgent?;
60
+ private httpsProxyAgent?;
42
61
  constructor(config: HttpClientConfig);
43
62
  isNodeEnvironment: () => boolean;
44
63
  private makeRequest;
@@ -51,57 +70,308 @@ declare class IsomorphicHttpClient implements HttpClient {
51
70
  put<T>(url: string, data?: Record<string, unknown>, headers?: Record<string, string | undefined>): Promise<HttpClientResponse<T>>;
52
71
  delete<T>(url: string, params?: Record<string, unknown>, headers?: Record<string, string | undefined>): Promise<HttpClientResponse<T>>;
53
72
  patch<T>(url: string, data?: Record<string, unknown>, headers?: Record<string, string | undefined>): Promise<HttpClientResponse<T>>;
54
- static IsomorphicHttpClientError: typeof IsomorphicHttpClientError;
55
73
  }
56
74
 
57
- type Task<Request, Response> = {
58
- tryCount: number;
59
- modelIndex: number;
60
- inputTokens: number;
61
- queueTime: number;
62
- attemptTime: number;
75
+ declare class QueueTaskTimeoutError extends GatewayBaseError {
76
+ readonly info: string;
77
+ readonly cause: unknown;
78
+ constructor({ info, cause }: {
79
+ info: string;
80
+ cause: unknown;
81
+ });
82
+ static isQueueTaskTimeoutError(error: unknown): error is QueueTaskTimeoutError;
83
+ }
84
+
85
+ interface Cache<T> {
86
+ get(key: string): T | undefined;
87
+ set(key: string, value: T): void;
88
+ delete(key: string): void;
89
+ clear(): void;
90
+ }
91
+
92
+ declare class LRUCache<T extends object> implements Cache<T> {
93
+ private cache;
94
+ constructor(maxEntries?: number);
95
+ get(key: string): T | undefined;
96
+ set(key: string, value: T): void;
97
+ delete(key: string): void;
98
+ clear(): void;
99
+ }
100
+
101
+ type QueueTask<Request, Response> = {
63
102
  request: Request;
64
- queuePriority: number | undefined;
103
+ cache: Cache<Response>;
65
104
  resolve: (value: Response) => void;
66
- execute: (request: {
67
- modelIndex: number;
68
- data: Request;
69
- resolve: (value: Response) => void;
70
- reject: (error: any) => void;
71
- }) => Promise<void>;
72
105
  reject: (error: any) => void;
106
+ execute: (request: Request) => Promise<Response>;
73
107
  };
74
108
  interface Queue<Request, Response> {
75
- enqueue(task: Task<Request, Response>): void;
109
+ enqueue(task: QueueTask<Request, Response>): void;
76
110
  }
111
+ declare const QueueOptions: z.ZodObject<{
112
+ maxConcurrentTasks: z.ZodNumber;
113
+ retryCount: z.ZodNumber;
114
+ timeout: z.ZodNumber;
115
+ retry: z.ZodObject<{
116
+ initialDelay: z.ZodNumber;
117
+ exponentialFactor: z.ZodNumber;
118
+ }, "strip", z.ZodTypeAny, {
119
+ initialDelay: number;
120
+ exponentialFactor: number;
121
+ }, {
122
+ initialDelay: number;
123
+ exponentialFactor: number;
124
+ }>;
125
+ }, "strip", z.ZodTypeAny, {
126
+ timeout: number;
127
+ maxConcurrentTasks: number;
128
+ retryCount: number;
129
+ retry: {
130
+ initialDelay: number;
131
+ exponentialFactor: number;
132
+ };
133
+ }, {
134
+ timeout: number;
135
+ maxConcurrentTasks: number;
136
+ retryCount: number;
137
+ retry: {
138
+ initialDelay: number;
139
+ exponentialFactor: number;
140
+ };
141
+ }>;
142
+ type QueueOptionsType = z.infer<typeof QueueOptions>;
77
143
 
78
- type QueueOptions = {
79
- maxRetries?: number;
80
- tokensPerMinute?: number;
81
- timeout?: number;
82
- backOffInitialDelay?: number;
83
- backOffMultiplier?: number;
84
- rateLimitRetryTimeout?: number;
85
- shouldRetry?: (status: number) => boolean;
86
- };
87
- declare class SimpleQueue<Req, Res> implements Queue<Req, Res> {
144
+ declare class SimpleQueue<Request, Response> implements Queue<Request, Response> {
145
+ private options;
146
+ private activeTasks;
88
147
  private queue;
89
- private isProcessing;
90
- private maxRetries;
91
- private tokensPerMinute;
92
- private timeout;
93
- private backOffInitialDelay;
94
- private backOffMultiplier;
95
- private rateLimitRetryTimeout;
96
- private shouldRetry;
97
- constructor(options: QueueOptions);
98
- enqueue(task: Task<Req, Res>): void;
99
- private calculateBackoff;
148
+ constructor(options: QueueOptionsType);
149
+ enqueue(task: QueueTask<Request, Response>): void;
150
+ private withTimeout;
151
+ private executeWithRetry;
100
152
  private processQueue;
101
- private execute;
102
153
  }
103
154
 
104
155
  declare const CompleteChatHandlerRequest: z.ZodObject<{
156
+ cache: z.ZodType<Cache<{
157
+ request: {
158
+ config: Record<string, any>;
159
+ messages: {
160
+ role: "system" | "user" | "assistant" | "tool";
161
+ content: ({
162
+ value: string;
163
+ modality: "text";
164
+ metadata?: undefined;
165
+ } | {
166
+ value: {
167
+ type: "base64";
168
+ base64: string;
169
+ media_type: "png" | "jpeg" | "webp" | "gif";
170
+ } | {
171
+ type: "url";
172
+ url: string;
173
+ };
174
+ modality: "image";
175
+ detail: "low" | "medium" | "high" | "auto";
176
+ metadata?: undefined;
177
+ } | {
178
+ name: string;
179
+ modality: "tool-call";
180
+ index: number;
181
+ id: string;
182
+ arguments: string;
183
+ metadata?: undefined;
184
+ } | {
185
+ data: string;
186
+ name: string;
187
+ modality: "tool-response";
188
+ index: number;
189
+ id: string;
190
+ metadata?: undefined;
191
+ })[];
192
+ metadata?: undefined;
193
+ }[];
194
+ tools?: {
195
+ type: "function";
196
+ definition: {
197
+ schema: {
198
+ name: string;
199
+ description: string;
200
+ strict?: boolean | undefined;
201
+ parameters?: any;
202
+ };
203
+ };
204
+ metadata?: any;
205
+ }[] | undefined;
206
+ };
207
+ response: {
208
+ messages: {
209
+ role: "system" | "user" | "assistant" | "tool";
210
+ content: ({
211
+ value: string;
212
+ modality: "text";
213
+ metadata?: undefined;
214
+ } | {
215
+ value: {
216
+ type: "base64";
217
+ base64: string;
218
+ media_type: "png" | "jpeg" | "webp" | "gif";
219
+ } | {
220
+ type: "url";
221
+ url: string;
222
+ };
223
+ modality: "image";
224
+ detail: "low" | "medium" | "high" | "auto";
225
+ metadata?: undefined;
226
+ } | {
227
+ name: string;
228
+ modality: "tool-call";
229
+ index: number;
230
+ id: string;
231
+ arguments: string;
232
+ metadata?: undefined;
233
+ } | {
234
+ name: string;
235
+ modality: "tool-response";
236
+ index: number;
237
+ id: string;
238
+ data: string;
239
+ metadata?: undefined;
240
+ })[];
241
+ metadata?: undefined;
242
+ }[];
243
+ usage?: {
244
+ totalTokens: number;
245
+ promptTokens: number;
246
+ completionTokens: number;
247
+ } | undefined;
248
+ logProbs?: {
249
+ token: string;
250
+ logProb: number;
251
+ bytes: number[] | null;
252
+ topLogProbs: {
253
+ token: string;
254
+ logProb: number;
255
+ bytes: number[] | null;
256
+ }[];
257
+ }[] | undefined;
258
+ };
259
+ latencyInMs: number;
260
+ provider: {
261
+ request?: any;
262
+ response?: any;
263
+ };
264
+ metadataForCallbacks?: any;
265
+ }>, z.ZodTypeDef, Cache<{
266
+ request: {
267
+ config: Record<string, any>;
268
+ messages: {
269
+ role: "system" | "user" | "assistant" | "tool";
270
+ content: ({
271
+ value: string;
272
+ modality: "text";
273
+ metadata?: undefined;
274
+ } | {
275
+ value: {
276
+ type: "base64";
277
+ base64: string;
278
+ media_type: "png" | "jpeg" | "webp" | "gif";
279
+ } | {
280
+ type: "url";
281
+ url: string;
282
+ };
283
+ modality: "image";
284
+ detail: "low" | "medium" | "high" | "auto";
285
+ metadata?: undefined;
286
+ } | {
287
+ name: string;
288
+ modality: "tool-call";
289
+ index: number;
290
+ id: string;
291
+ arguments: string;
292
+ metadata?: undefined;
293
+ } | {
294
+ data: string;
295
+ name: string;
296
+ modality: "tool-response";
297
+ index: number;
298
+ id: string;
299
+ metadata?: undefined;
300
+ })[];
301
+ metadata?: undefined;
302
+ }[];
303
+ tools?: {
304
+ type: "function";
305
+ definition: {
306
+ schema: {
307
+ name: string;
308
+ description: string;
309
+ strict?: boolean | undefined;
310
+ parameters?: any;
311
+ };
312
+ };
313
+ metadata?: any;
314
+ }[] | undefined;
315
+ };
316
+ response: {
317
+ messages: {
318
+ role: "system" | "user" | "assistant" | "tool";
319
+ content: ({
320
+ value: string;
321
+ modality: "text";
322
+ metadata?: undefined;
323
+ } | {
324
+ value: {
325
+ type: "base64";
326
+ base64: string;
327
+ media_type: "png" | "jpeg" | "webp" | "gif";
328
+ } | {
329
+ type: "url";
330
+ url: string;
331
+ };
332
+ modality: "image";
333
+ detail: "low" | "medium" | "high" | "auto";
334
+ metadata?: undefined;
335
+ } | {
336
+ name: string;
337
+ modality: "tool-call";
338
+ index: number;
339
+ id: string;
340
+ arguments: string;
341
+ metadata?: undefined;
342
+ } | {
343
+ name: string;
344
+ modality: "tool-response";
345
+ index: number;
346
+ id: string;
347
+ data: string;
348
+ metadata?: undefined;
349
+ })[];
350
+ metadata?: undefined;
351
+ }[];
352
+ usage?: {
353
+ totalTokens: number;
354
+ promptTokens: number;
355
+ completionTokens: number;
356
+ } | undefined;
357
+ logProbs?: {
358
+ token: string;
359
+ logProb: number;
360
+ bytes: number[] | null;
361
+ topLogProbs: {
362
+ token: string;
363
+ logProb: number;
364
+ bytes: number[] | null;
365
+ }[];
366
+ }[] | undefined;
367
+ };
368
+ latencyInMs: number;
369
+ provider: {
370
+ request?: any;
371
+ response?: any;
372
+ };
373
+ metadataForCallbacks?: any;
374
+ }>>;
105
375
  model: z.ZodType<ChatModelV1<{
106
376
  name: string;
107
377
  description: string;
@@ -515,6 +785,116 @@ declare const CompleteChatHandlerRequest: z.ZodObject<{
515
785
  })[];
516
786
  metadata?: undefined;
517
787
  }[];
788
+ cache: Cache<{
789
+ request: {
790
+ config: Record<string, any>;
791
+ messages: {
792
+ role: "system" | "user" | "assistant" | "tool";
793
+ content: ({
794
+ value: string;
795
+ modality: "text";
796
+ metadata?: undefined;
797
+ } | {
798
+ value: {
799
+ type: "base64";
800
+ base64: string;
801
+ media_type: "png" | "jpeg" | "webp" | "gif";
802
+ } | {
803
+ type: "url";
804
+ url: string;
805
+ };
806
+ modality: "image";
807
+ detail: "low" | "medium" | "high" | "auto";
808
+ metadata?: undefined;
809
+ } | {
810
+ name: string;
811
+ modality: "tool-call";
812
+ index: number;
813
+ id: string;
814
+ arguments: string;
815
+ metadata?: undefined;
816
+ } | {
817
+ data: string;
818
+ name: string;
819
+ modality: "tool-response";
820
+ index: number;
821
+ id: string;
822
+ metadata?: undefined;
823
+ })[];
824
+ metadata?: undefined;
825
+ }[];
826
+ tools?: {
827
+ type: "function";
828
+ definition: {
829
+ schema: {
830
+ name: string;
831
+ description: string;
832
+ strict?: boolean | undefined;
833
+ parameters?: any;
834
+ };
835
+ };
836
+ metadata?: any;
837
+ }[] | undefined;
838
+ };
839
+ response: {
840
+ messages: {
841
+ role: "system" | "user" | "assistant" | "tool";
842
+ content: ({
843
+ value: string;
844
+ modality: "text";
845
+ metadata?: undefined;
846
+ } | {
847
+ value: {
848
+ type: "base64";
849
+ base64: string;
850
+ media_type: "png" | "jpeg" | "webp" | "gif";
851
+ } | {
852
+ type: "url";
853
+ url: string;
854
+ };
855
+ modality: "image";
856
+ detail: "low" | "medium" | "high" | "auto";
857
+ metadata?: undefined;
858
+ } | {
859
+ name: string;
860
+ modality: "tool-call";
861
+ index: number;
862
+ id: string;
863
+ arguments: string;
864
+ metadata?: undefined;
865
+ } | {
866
+ name: string;
867
+ modality: "tool-response";
868
+ index: number;
869
+ id: string;
870
+ data: string;
871
+ metadata?: undefined;
872
+ })[];
873
+ metadata?: undefined;
874
+ }[];
875
+ usage?: {
876
+ totalTokens: number;
877
+ promptTokens: number;
878
+ completionTokens: number;
879
+ } | undefined;
880
+ logProbs?: {
881
+ token: string;
882
+ logProb: number;
883
+ bytes: number[] | null;
884
+ topLogProbs: {
885
+ token: string;
886
+ logProb: number;
887
+ bytes: number[] | null;
888
+ }[];
889
+ }[] | undefined;
890
+ };
891
+ latencyInMs: number;
892
+ provider: {
893
+ request?: any;
894
+ response?: any;
895
+ };
896
+ metadataForCallbacks?: any;
897
+ }>;
518
898
  tools?: {
519
899
  type: "function";
520
900
  definition: {
@@ -527,8 +907,8 @@ declare const CompleteChatHandlerRequest: z.ZodObject<{
527
907
  };
528
908
  metadata?: any;
529
909
  }[] | undefined;
530
- callbacks?: [CompleteChatCallbackType<any>, ...CompleteChatCallbackType<any>[]] | undefined;
531
910
  metadataForCallbacks?: any;
911
+ callbacks?: [CompleteChatCallbackType<any>, ...CompleteChatCallbackType<any>[]] | undefined;
532
912
  }, {
533
913
  config: Record<string, any>;
534
914
  model: ChatModelV1<{
@@ -612,6 +992,116 @@ declare const CompleteChatHandlerRequest: z.ZodObject<{
612
992
  })[];
613
993
  metadata?: undefined;
614
994
  }[];
995
+ cache: Cache<{
996
+ request: {
997
+ config: Record<string, any>;
998
+ messages: {
999
+ role: "system" | "user" | "assistant" | "tool";
1000
+ content: ({
1001
+ value: string;
1002
+ modality: "text";
1003
+ metadata?: undefined;
1004
+ } | {
1005
+ value: {
1006
+ type: "base64";
1007
+ base64: string;
1008
+ media_type: "png" | "jpeg" | "webp" | "gif";
1009
+ } | {
1010
+ type: "url";
1011
+ url: string;
1012
+ };
1013
+ modality: "image";
1014
+ detail: "low" | "medium" | "high" | "auto";
1015
+ metadata?: undefined;
1016
+ } | {
1017
+ name: string;
1018
+ modality: "tool-call";
1019
+ index: number;
1020
+ id: string;
1021
+ arguments: string;
1022
+ metadata?: undefined;
1023
+ } | {
1024
+ data: string;
1025
+ name: string;
1026
+ modality: "tool-response";
1027
+ index: number;
1028
+ id: string;
1029
+ metadata?: undefined;
1030
+ })[];
1031
+ metadata?: undefined;
1032
+ }[];
1033
+ tools?: {
1034
+ type: "function";
1035
+ definition: {
1036
+ schema: {
1037
+ name: string;
1038
+ description: string;
1039
+ strict?: boolean | undefined;
1040
+ parameters?: any;
1041
+ };
1042
+ };
1043
+ metadata?: any;
1044
+ }[] | undefined;
1045
+ };
1046
+ response: {
1047
+ messages: {
1048
+ role: "system" | "user" | "assistant" | "tool";
1049
+ content: ({
1050
+ value: string;
1051
+ modality: "text";
1052
+ metadata?: undefined;
1053
+ } | {
1054
+ value: {
1055
+ type: "base64";
1056
+ base64: string;
1057
+ media_type: "png" | "jpeg" | "webp" | "gif";
1058
+ } | {
1059
+ type: "url";
1060
+ url: string;
1061
+ };
1062
+ modality: "image";
1063
+ detail: "low" | "medium" | "high" | "auto";
1064
+ metadata?: undefined;
1065
+ } | {
1066
+ name: string;
1067
+ modality: "tool-call";
1068
+ index: number;
1069
+ id: string;
1070
+ arguments: string;
1071
+ metadata?: undefined;
1072
+ } | {
1073
+ name: string;
1074
+ modality: "tool-response";
1075
+ index: number;
1076
+ id: string;
1077
+ data: string;
1078
+ metadata?: undefined;
1079
+ })[];
1080
+ metadata?: undefined;
1081
+ }[];
1082
+ usage?: {
1083
+ totalTokens: number;
1084
+ promptTokens: number;
1085
+ completionTokens: number;
1086
+ } | undefined;
1087
+ logProbs?: {
1088
+ token: string;
1089
+ logProb: number;
1090
+ bytes: number[] | null;
1091
+ topLogProbs: {
1092
+ token: string;
1093
+ logProb: number;
1094
+ bytes: number[] | null;
1095
+ }[];
1096
+ }[] | undefined;
1097
+ };
1098
+ latencyInMs: number;
1099
+ provider: {
1100
+ request?: any;
1101
+ response?: any;
1102
+ };
1103
+ metadataForCallbacks?: any;
1104
+ }>;
615
1105
  tools?: {
616
1106
  type: "function";
617
1107
  definition: {
@@ -624,8 +1114,8 @@ declare const CompleteChatHandlerRequest: z.ZodObject<{
624
1114
  };
625
1115
  metadata?: any;
626
1116
  }[] | undefined;
627
- callbacks?: [CompleteChatCallbackType<any>, ...CompleteChatCallbackType<any>[]] | undefined;
628
1117
  metadataForCallbacks?: any;
1118
+ callbacks?: [CompleteChatCallbackType<any>, ...CompleteChatCallbackType<any>[]] | undefined;
629
1119
  }>;
630
1120
  type CompleteChatHandlerRequestType = z.infer<typeof CompleteChatHandlerRequest>;
631
1121
  declare const CompleteChatHandlerResponse: z.ZodObject<{
@@ -1039,17 +1529,17 @@ declare const CompleteChatHandlerResponse: z.ZodObject<{
1039
1529
  arguments: z.ZodString;
1040
1530
  metadata: z.ZodUndefined;
1041
1531
  }, "strip", z.ZodTypeAny, {
1532
+ name: string;
1042
1533
  modality: "tool-call";
1043
1534
  index: number;
1044
1535
  id: string;
1045
- name: string;
1046
1536
  arguments: string;
1047
1537
  metadata?: undefined;
1048
1538
  }, {
1539
+ name: string;
1049
1540
  modality: "tool-call";
1050
1541
  index: number;
1051
1542
  id: string;
1052
- name: string;
1053
1543
  arguments: string;
1054
1544
  metadata?: undefined;
1055
1545
  }>, z.ZodObject<{
@@ -1060,17 +1550,17 @@ declare const CompleteChatHandlerResponse: z.ZodObject<{
1060
1550
  data: z.ZodString;
1061
1551
  metadata: z.ZodUndefined;
1062
1552
  }, "strip", z.ZodTypeAny, {
1553
+ name: string;
1063
1554
  modality: "tool-response";
1064
1555
  index: number;
1065
1556
  id: string;
1066
- name: string;
1067
1557
  data: string;
1068
1558
  metadata?: undefined;
1069
1559
  }, {
1560
+ name: string;
1070
1561
  modality: "tool-response";
1071
1562
  index: number;
1072
1563
  id: string;
1073
- name: string;
1074
1564
  data: string;
1075
1565
  metadata?: undefined;
1076
1566
  }>]>, "many">;
@@ -1094,17 +1584,17 @@ declare const CompleteChatHandlerResponse: z.ZodObject<{
1094
1584
  detail: "low" | "medium" | "high" | "auto";
1095
1585
  metadata?: undefined;
1096
1586
  } | {
1587
+ name: string;
1097
1588
  modality: "tool-call";
1098
1589
  index: number;
1099
1590
  id: string;
1100
- name: string;
1101
1591
  arguments: string;
1102
1592
  metadata?: undefined;
1103
1593
  } | {
1594
+ name: string;
1104
1595
  modality: "tool-response";
1105
1596
  index: number;
1106
1597
  id: string;
1107
- name: string;
1108
1598
  data: string;
1109
1599
  metadata?: undefined;
1110
1600
  })[];
@@ -1128,17 +1618,17 @@ declare const CompleteChatHandlerResponse: z.ZodObject<{
1128
1618
  detail: "low" | "medium" | "high" | "auto";
1129
1619
  metadata?: undefined;
1130
1620
  } | {
1621
+ name: string;
1131
1622
  modality: "tool-call";
1132
1623
  index: number;
1133
1624
  id: string;
1134
- name: string;
1135
1625
  arguments: string;
1136
1626
  metadata?: undefined;
1137
1627
  } | {
1628
+ name: string;
1138
1629
  modality: "tool-response";
1139
1630
  index: number;
1140
1631
  id: string;
1141
- name: string;
1142
1632
  data: string;
1143
1633
  metadata?: undefined;
1144
1634
  })[];
@@ -1214,17 +1704,17 @@ declare const CompleteChatHandlerResponse: z.ZodObject<{
1214
1704
  detail: "low" | "medium" | "high" | "auto";
1215
1705
  metadata?: undefined;
1216
1706
  } | {
1707
+ name: string;
1217
1708
  modality: "tool-call";
1218
1709
  index: number;
1219
1710
  id: string;
1220
- name: string;
1221
1711
  arguments: string;
1222
1712
  metadata?: undefined;
1223
1713
  } | {
1714
+ name: string;
1224
1715
  modality: "tool-response";
1225
1716
  index: number;
1226
1717
  id: string;
1227
- name: string;
1228
1718
  data: string;
1229
1719
  metadata?: undefined;
1230
1720
  })[];
@@ -1265,17 +1755,17 @@ declare const CompleteChatHandlerResponse: z.ZodObject<{
1265
1755
  detail: "low" | "medium" | "high" | "auto";
1266
1756
  metadata?: undefined;
1267
1757
  } | {
1758
+ name: string;
1268
1759
  modality: "tool-call";
1269
1760
  index: number;
1270
1761
  id: string;
1271
- name: string;
1272
1762
  arguments: string;
1273
1763
  metadata?: undefined;
1274
1764
  } | {
1765
+ name: string;
1275
1766
  modality: "tool-response";
1276
1767
  index: number;
1277
1768
  id: string;
1278
- name: string;
1279
1769
  data: string;
1280
1770
  metadata?: undefined;
1281
1771
  })[];
@@ -1380,17 +1870,17 @@ declare const CompleteChatHandlerResponse: z.ZodObject<{
1380
1870
  detail: "low" | "medium" | "high" | "auto";
1381
1871
  metadata?: undefined;
1382
1872
  } | {
1873
+ name: string;
1383
1874
  modality: "tool-call";
1384
1875
  index: number;
1385
1876
  id: string;
1386
- name: string;
1387
1877
  arguments: string;
1388
1878
  metadata?: undefined;
1389
1879
  } | {
1880
+ name: string;
1390
1881
  modality: "tool-response";
1391
1882
  index: number;
1392
1883
  id: string;
1393
- name: string;
1394
1884
  data: string;
1395
1885
  metadata?: undefined;
1396
1886
  })[];
@@ -1489,17 +1979,17 @@ declare const CompleteChatHandlerResponse: z.ZodObject<{
1489
1979
  detail: "low" | "medium" | "high" | "auto";
1490
1980
  metadata?: undefined;
1491
1981
  } | {
1982
+ name: string;
1492
1983
  modality: "tool-call";
1493
1984
  index: number;
1494
1985
  id: string;
1495
- name: string;
1496
1986
  arguments: string;
1497
1987
  metadata?: undefined;
1498
1988
  } | {
1989
+ name: string;
1499
1990
  modality: "tool-response";
1500
1991
  index: number;
1501
1992
  id: string;
1502
- name: string;
1503
1993
  data: string;
1504
1994
  metadata?: undefined;
1505
1995
  })[];
@@ -1533,12 +2023,89 @@ type CompleteChatCallbackType<M = any> = {
1533
2023
  onChatStart?: (metadata?: M) => Promise<void> | void;
1534
2024
  onChatCached?: (metadata?: M, response?: CompleteChatHandlerResponseType) => Promise<void> | void;
1535
2025
  onChatComplete?: (metadata?: M, response?: CompleteChatHandlerResponseType) => Promise<void> | void;
1536
- onChatError?: (metadata?: M, error?: GatewayError) => Promise<void> | void;
2026
+ onChatError?: (metadata?: M, error?: GatewayError | HttpRequestError) => Promise<void> | void;
1537
2027
  };
1538
2028
 
1539
2029
  declare function handleCompleteChat(request: CompleteChatHandlerRequestType, client: HttpClient): Promise<CompleteChatHandlerResponseType>;
1540
2030
 
1541
2031
  declare const GetEmbeddingsHandlerRequest: z.ZodObject<{
2032
+ cache: z.ZodType<Cache<{
2033
+ request: {
2034
+ config: Record<string, any>;
2035
+ embeddingRequests: {
2036
+ modality: "text";
2037
+ requests: string[];
2038
+ metadata?: undefined;
2039
+ } | {
2040
+ modality: "token";
2041
+ requests: number[][];
2042
+ metadata?: undefined;
2043
+ };
2044
+ };
2045
+ response: {
2046
+ encodingFormat: "float";
2047
+ embeddings: {
2048
+ index: number;
2049
+ embedding: number[];
2050
+ }[];
2051
+ usage?: {
2052
+ totalTokens: number;
2053
+ } | undefined;
2054
+ } | {
2055
+ encodingFormat: "base64";
2056
+ embeddings: {
2057
+ index: number;
2058
+ embedding: string;
2059
+ }[];
2060
+ usage?: {
2061
+ totalTokens: number;
2062
+ } | undefined;
2063
+ };
2064
+ latencyInMs: number;
2065
+ provider: {
2066
+ request?: any;
2067
+ response?: any;
2068
+ };
2069
+ metadataForCallbacks?: any;
2070
+ }>, z.ZodTypeDef, Cache<{
2071
+ request: {
2072
+ config: Record<string, any>;
2073
+ embeddingRequests: {
2074
+ modality: "text";
2075
+ requests: string[];
2076
+ metadata?: undefined;
2077
+ } | {
2078
+ modality: "token";
2079
+ requests: number[][];
2080
+ metadata?: undefined;
2081
+ };
2082
+ };
2083
+ response: {
2084
+ encodingFormat: "float";
2085
+ embeddings: {
2086
+ index: number;
2087
+ embedding: number[];
2088
+ }[];
2089
+ usage?: {
2090
+ totalTokens: number;
2091
+ } | undefined;
2092
+ } | {
2093
+ encodingFormat: "base64";
2094
+ embeddings: {
2095
+ index: number;
2096
+ embedding: string;
2097
+ }[];
2098
+ usage?: {
2099
+ totalTokens: number;
2100
+ } | undefined;
2101
+ };
2102
+ latencyInMs: number;
2103
+ provider: {
2104
+ request?: any;
2105
+ response?: any;
2106
+ };
2107
+ metadataForCallbacks?: any;
2108
+ }>>;
1542
2109
  model: z.ZodType<EmbeddingModelV1<{
1543
2110
  description: string;
1544
2111
  name: string;
@@ -1703,6 +2270,45 @@ declare const GetEmbeddingsHandlerRequest: z.ZodObject<{
1703
2270
  schema: z.ZodObject<z.ZodRawShape, z.UnknownKeysParam, z.ZodTypeAny, unknown, unknown>;
1704
2271
  };
1705
2272
  }>;
2273
+ cache: Cache<{
2274
+ request: {
2275
+ config: Record<string, any>;
2276
+ embeddingRequests: {
2277
+ modality: "text";
2278
+ requests: string[];
2279
+ metadata?: undefined;
2280
+ } | {
2281
+ modality: "token";
2282
+ requests: number[][];
2283
+ metadata?: undefined;
2284
+ };
2285
+ };
2286
+ response: {
2287
+ encodingFormat: "float";
2288
+ embeddings: {
2289
+ index: number;
2290
+ embedding: number[];
2291
+ }[];
2292
+ usage?: {
2293
+ totalTokens: number;
2294
+ } | undefined;
2295
+ } | {
2296
+ encodingFormat: "base64";
2297
+ embeddings: {
2298
+ index: number;
2299
+ embedding: string;
2300
+ }[];
2301
+ usage?: {
2302
+ totalTokens: number;
2303
+ } | undefined;
2304
+ };
2305
+ latencyInMs: number;
2306
+ provider: {
2307
+ request?: any;
2308
+ response?: any;
2309
+ };
2310
+ metadataForCallbacks?: any;
2311
+ }>;
1706
2312
  embeddingRequests: {
1707
2313
  modality: "text";
1708
2314
  requests: string[];
@@ -1712,8 +2318,8 @@ declare const GetEmbeddingsHandlerRequest: z.ZodObject<{
1712
2318
  requests: number[][];
1713
2319
  metadata?: undefined;
1714
2320
  };
1715
- callbacks?: [GetEmbeddingsCallbackType<any>, ...GetEmbeddingsCallbackType<any>[]] | undefined;
1716
2321
  metadataForCallbacks?: any;
2322
+ callbacks?: [GetEmbeddingsCallbackType<any>, ...GetEmbeddingsCallbackType<any>[]] | undefined;
1717
2323
  }, {
1718
2324
  config: Record<string, any>;
1719
2325
  model: EmbeddingModelV1<{
@@ -1761,6 +2367,45 @@ declare const GetEmbeddingsHandlerRequest: z.ZodObject<{
1761
2367
  schema: z.ZodObject<z.ZodRawShape, z.UnknownKeysParam, z.ZodTypeAny, unknown, unknown>;
1762
2368
  };
1763
2369
  }>;
2370
+ cache: Cache<{
2371
+ request: {
2372
+ config: Record<string, any>;
2373
+ embeddingRequests: {
2374
+ modality: "text";
2375
+ requests: string[];
2376
+ metadata?: undefined;
2377
+ } | {
2378
+ modality: "token";
2379
+ requests: number[][];
2380
+ metadata?: undefined;
2381
+ };
2382
+ };
2383
+ response: {
2384
+ encodingFormat: "float";
2385
+ embeddings: {
2386
+ index: number;
2387
+ embedding: number[];
2388
+ }[];
2389
+ usage?: {
2390
+ totalTokens: number;
2391
+ } | undefined;
2392
+ } | {
2393
+ encodingFormat: "base64";
2394
+ embeddings: {
2395
+ index: number;
2396
+ embedding: string;
2397
+ }[];
2398
+ usage?: {
2399
+ totalTokens: number;
2400
+ } | undefined;
2401
+ };
2402
+ latencyInMs: number;
2403
+ provider: {
2404
+ request?: any;
2405
+ response?: any;
2406
+ };
2407
+ metadataForCallbacks?: any;
2408
+ }>;
1764
2409
  embeddingRequests: {
1765
2410
  modality: "text";
1766
2411
  requests: string[];
@@ -1770,8 +2415,8 @@ declare const GetEmbeddingsHandlerRequest: z.ZodObject<{
1770
2415
  requests: number[][];
1771
2416
  metadata?: undefined;
1772
2417
  };
1773
- callbacks?: [GetEmbeddingsCallbackType<any>, ...GetEmbeddingsCallbackType<any>[]] | undefined;
1774
2418
  metadataForCallbacks?: any;
2419
+ callbacks?: [GetEmbeddingsCallbackType<any>, ...GetEmbeddingsCallbackType<any>[]] | undefined;
1775
2420
  }>;
1776
2421
  type GetEmbeddingsHandlerRequestType = z.infer<typeof GetEmbeddingsHandlerRequest>;
1777
2422
  declare const GetEmbeddingsHandlerResponse: z.ZodObject<{
@@ -1994,7 +2639,7 @@ type GetEmbeddingsCallbackType<M = any> = {
1994
2639
  onGetEmbeddingsStart?: (metadata?: M) => Promise<void> | void;
1995
2640
  onGetEmbeddingsCached?: (metadata?: M, response?: GetEmbeddingsHandlerResponseType) => Promise<void> | void;
1996
2641
  onGetEmbeddingsComplete?: (metadata?: M, response?: GetEmbeddingsHandlerResponseType) => Promise<void> | void;
1997
- onGetEmbeddingsError?: (metadata?: M, error?: GatewayError) => Promise<void> | void;
2642
+ onGetEmbeddingsError?: (metadata?: M, error?: GatewayError | HttpRequestError) => Promise<void> | void;
1998
2643
  };
1999
2644
 
2000
2645
  declare function handleGetEmbeddings(request: GetEmbeddingsHandlerRequestType, client: HttpClient): Promise<GetEmbeddingsHandlerResponseType>;
@@ -2425,8 +3070,8 @@ declare const StreamChatHandlerRequest: z.ZodObject<{
2425
3070
  };
2426
3071
  metadata?: any;
2427
3072
  }[] | undefined;
2428
- callbacks?: [StreamChatCallbackType<any>, ...StreamChatCallbackType<any>[]] | undefined;
2429
3073
  metadataForCallbacks?: any;
3074
+ callbacks?: [StreamChatCallbackType<any>, ...StreamChatCallbackType<any>[]] | undefined;
2430
3075
  }, {
2431
3076
  config: Record<string, any>;
2432
3077
  model: ChatModelV1<{
@@ -2522,8 +3167,8 @@ declare const StreamChatHandlerRequest: z.ZodObject<{
2522
3167
  };
2523
3168
  metadata?: any;
2524
3169
  }[] | undefined;
2525
- callbacks?: [StreamChatCallbackType<any>, ...StreamChatCallbackType<any>[]] | undefined;
2526
3170
  metadataForCallbacks?: any;
3171
+ callbacks?: [StreamChatCallbackType<any>, ...StreamChatCallbackType<any>[]] | undefined;
2527
3172
  }>;
2528
3173
  type StreamChatHandlerRequestType = z.infer<typeof StreamChatHandlerRequest>;
2529
3174
  declare const StreamChatHandlerResponse: z.ZodObject<{
@@ -2889,16 +3534,16 @@ declare const StreamChatHandlerResponse: z.ZodObject<{
2889
3534
  }, "strip", z.ZodTypeAny, {
2890
3535
  modality: "partial-tool-call";
2891
3536
  index: number;
3537
+ name?: string | undefined;
2892
3538
  metadata?: undefined;
2893
3539
  id?: string | undefined;
2894
- name?: string | undefined;
2895
3540
  arguments?: string | undefined;
2896
3541
  }, {
2897
3542
  modality: "partial-tool-call";
2898
3543
  index: number;
3544
+ name?: string | undefined;
2899
3545
  metadata?: undefined;
2900
3546
  id?: string | undefined;
2901
- name?: string | undefined;
2902
3547
  arguments?: string | undefined;
2903
3548
  }>]>;
2904
3549
  metadata: z.ZodUndefined;
@@ -2911,9 +3556,9 @@ declare const StreamChatHandlerResponse: z.ZodObject<{
2911
3556
  } | {
2912
3557
  modality: "partial-tool-call";
2913
3558
  index: number;
3559
+ name?: string | undefined;
2914
3560
  metadata?: undefined;
2915
3561
  id?: string | undefined;
2916
- name?: string | undefined;
2917
3562
  arguments?: string | undefined;
2918
3563
  };
2919
3564
  metadata?: undefined;
@@ -2926,9 +3571,9 @@ declare const StreamChatHandlerResponse: z.ZodObject<{
2926
3571
  } | {
2927
3572
  modality: "partial-tool-call";
2928
3573
  index: number;
3574
+ name?: string | undefined;
2929
3575
  metadata?: undefined;
2930
3576
  id?: string | undefined;
2931
- name?: string | undefined;
2932
3577
  arguments?: string | undefined;
2933
3578
  };
2934
3579
  metadata?: undefined;
@@ -2993,9 +3638,9 @@ declare const StreamChatHandlerResponse: z.ZodObject<{
2993
3638
  } | {
2994
3639
  modality: "partial-tool-call";
2995
3640
  index: number;
3641
+ name?: string | undefined;
2996
3642
  metadata?: undefined;
2997
3643
  id?: string | undefined;
2998
- name?: string | undefined;
2999
3644
  arguments?: string | undefined;
3000
3645
  };
3001
3646
  metadata?: undefined;
@@ -3025,9 +3670,9 @@ declare const StreamChatHandlerResponse: z.ZodObject<{
3025
3670
  } | {
3026
3671
  modality: "partial-tool-call";
3027
3672
  index: number;
3673
+ name?: string | undefined;
3028
3674
  metadata?: undefined;
3029
3675
  id?: string | undefined;
3030
- name?: string | undefined;
3031
3676
  arguments?: string | undefined;
3032
3677
  };
3033
3678
  metadata?: undefined;
@@ -3120,9 +3765,9 @@ declare const StreamChatHandlerResponse: z.ZodObject<{
3120
3765
  } | {
3121
3766
  modality: "partial-tool-call";
3122
3767
  index: number;
3768
+ name?: string | undefined;
3123
3769
  metadata?: undefined;
3124
3770
  id?: string | undefined;
3125
- name?: string | undefined;
3126
3771
  arguments?: string | undefined;
3127
3772
  };
3128
3773
  metadata?: undefined;
@@ -3209,9 +3854,9 @@ declare const StreamChatHandlerResponse: z.ZodObject<{
3209
3854
  } | {
3210
3855
  modality: "partial-tool-call";
3211
3856
  index: number;
3857
+ name?: string | undefined;
3212
3858
  metadata?: undefined;
3213
3859
  id?: string | undefined;
3214
- name?: string | undefined;
3215
3860
  arguments?: string | undefined;
3216
3861
  };
3217
3862
  metadata?: undefined;
@@ -3249,19 +3894,667 @@ type StreamChatCallbackType<M = any> = {
3249
3894
 
3250
3895
  declare function handleStreamChat<M>(request: StreamChatHandlerRequestType, client: HttpClient): AsyncGenerator<StreamChatHandlerResponseType, void, unknown>;
3251
3896
 
3252
- type GatewayCallbackType<M = any> = CompleteChatCallbackType<M> | StreamChatCallbackType<M>;
3253
3897
  declare const GatewayOptions: z.ZodObject<{
3898
+ queueOptions: z.ZodOptional<z.ZodObject<{
3899
+ maxConcurrentTasks: z.ZodOptional<z.ZodNumber>;
3900
+ retryCount: z.ZodOptional<z.ZodNumber>;
3901
+ timeout: z.ZodOptional<z.ZodNumber>;
3902
+ retry: z.ZodOptional<z.ZodObject<{
3903
+ initialDelay: z.ZodNumber;
3904
+ exponentialFactor: z.ZodNumber;
3905
+ }, "strip", z.ZodTypeAny, {
3906
+ initialDelay: number;
3907
+ exponentialFactor: number;
3908
+ }, {
3909
+ initialDelay: number;
3910
+ exponentialFactor: number;
3911
+ }>>;
3912
+ }, "strip", z.ZodTypeAny, {
3913
+ timeout?: number | undefined;
3914
+ maxConcurrentTasks?: number | undefined;
3915
+ retryCount?: number | undefined;
3916
+ retry?: {
3917
+ initialDelay: number;
3918
+ exponentialFactor: number;
3919
+ } | undefined;
3920
+ }, {
3921
+ timeout?: number | undefined;
3922
+ maxConcurrentTasks?: number | undefined;
3923
+ retryCount?: number | undefined;
3924
+ retry?: {
3925
+ initialDelay: number;
3926
+ exponentialFactor: number;
3927
+ } | undefined;
3928
+ }>>;
3254
3929
  dangerouslyAllowBrowser: z.ZodOptional<z.ZodBoolean>;
3255
3930
  httpClient: z.ZodOptional<z.ZodType<HttpClient, z.ZodTypeDef, HttpClient>>;
3256
- callbacks: z.ZodOptional<z.ZodArray<z.ZodType<GatewayCallbackType<any>, z.ZodTypeDef, GatewayCallbackType<any>>, "atleastone">>;
3931
+ completeChatCache: z.ZodOptional<z.ZodType<Cache<{
3932
+ request: {
3933
+ config: Record<string, any>;
3934
+ messages: {
3935
+ role: "system" | "user" | "assistant" | "tool";
3936
+ content: ({
3937
+ value: string;
3938
+ modality: "text";
3939
+ metadata?: undefined;
3940
+ } | {
3941
+ value: {
3942
+ type: "base64";
3943
+ base64: string;
3944
+ media_type: "png" | "jpeg" | "webp" | "gif";
3945
+ } | {
3946
+ type: "url";
3947
+ url: string;
3948
+ };
3949
+ modality: "image";
3950
+ detail: "low" | "medium" | "high" | "auto";
3951
+ metadata?: undefined;
3952
+ } | {
3953
+ name: string;
3954
+ modality: "tool-call";
3955
+ index: number;
3956
+ id: string;
3957
+ arguments: string;
3958
+ metadata?: undefined;
3959
+ } | {
3960
+ data: string;
3961
+ name: string;
3962
+ modality: "tool-response";
3963
+ index: number;
3964
+ id: string;
3965
+ metadata?: undefined;
3966
+ })[];
3967
+ metadata?: undefined;
3968
+ }[];
3969
+ tools?: {
3970
+ type: "function";
3971
+ definition: {
3972
+ schema: {
3973
+ name: string;
3974
+ description: string;
3975
+ strict?: boolean | undefined;
3976
+ parameters?: any;
3977
+ };
3978
+ };
3979
+ metadata?: any;
3980
+ }[] | undefined;
3981
+ };
3982
+ response: {
3983
+ messages: {
3984
+ role: "system" | "user" | "assistant" | "tool";
3985
+ content: ({
3986
+ value: string;
3987
+ modality: "text";
3988
+ metadata?: undefined;
3989
+ } | {
3990
+ value: {
3991
+ type: "base64";
3992
+ base64: string;
3993
+ media_type: "png" | "jpeg" | "webp" | "gif";
3994
+ } | {
3995
+ type: "url";
3996
+ url: string;
3997
+ };
3998
+ modality: "image";
3999
+ detail: "low" | "medium" | "high" | "auto";
4000
+ metadata?: undefined;
4001
+ } | {
4002
+ name: string;
4003
+ modality: "tool-call";
4004
+ index: number;
4005
+ id: string;
4006
+ arguments: string;
4007
+ metadata?: undefined;
4008
+ } | {
4009
+ name: string;
4010
+ modality: "tool-response";
4011
+ index: number;
4012
+ id: string;
4013
+ data: string;
4014
+ metadata?: undefined;
4015
+ })[];
4016
+ metadata?: undefined;
4017
+ }[];
4018
+ usage?: {
4019
+ totalTokens: number;
4020
+ promptTokens: number;
4021
+ completionTokens: number;
4022
+ } | undefined;
4023
+ logProbs?: {
4024
+ token: string;
4025
+ logProb: number;
4026
+ bytes: number[] | null;
4027
+ topLogProbs: {
4028
+ token: string;
4029
+ logProb: number;
4030
+ bytes: number[] | null;
4031
+ }[];
4032
+ }[] | undefined;
4033
+ };
4034
+ latencyInMs: number;
4035
+ provider: {
4036
+ request?: any;
4037
+ response?: any;
4038
+ };
4039
+ metadataForCallbacks?: any;
4040
+ }>, z.ZodTypeDef, Cache<{
4041
+ request: {
4042
+ config: Record<string, any>;
4043
+ messages: {
4044
+ role: "system" | "user" | "assistant" | "tool";
4045
+ content: ({
4046
+ value: string;
4047
+ modality: "text";
4048
+ metadata?: undefined;
4049
+ } | {
4050
+ value: {
4051
+ type: "base64";
4052
+ base64: string;
4053
+ media_type: "png" | "jpeg" | "webp" | "gif";
4054
+ } | {
4055
+ type: "url";
4056
+ url: string;
4057
+ };
4058
+ modality: "image";
4059
+ detail: "low" | "medium" | "high" | "auto";
4060
+ metadata?: undefined;
4061
+ } | {
4062
+ name: string;
4063
+ modality: "tool-call";
4064
+ index: number;
4065
+ id: string;
4066
+ arguments: string;
4067
+ metadata?: undefined;
4068
+ } | {
4069
+ data: string;
4070
+ name: string;
4071
+ modality: "tool-response";
4072
+ index: number;
4073
+ id: string;
4074
+ metadata?: undefined;
4075
+ })[];
4076
+ metadata?: undefined;
4077
+ }[];
4078
+ tools?: {
4079
+ type: "function";
4080
+ definition: {
4081
+ schema: {
4082
+ name: string;
4083
+ description: string;
4084
+ strict?: boolean | undefined;
4085
+ parameters?: any;
4086
+ };
4087
+ };
4088
+ metadata?: any;
4089
+ }[] | undefined;
4090
+ };
4091
+ response: {
4092
+ messages: {
4093
+ role: "system" | "user" | "assistant" | "tool";
4094
+ content: ({
4095
+ value: string;
4096
+ modality: "text";
4097
+ metadata?: undefined;
4098
+ } | {
4099
+ value: {
4100
+ type: "base64";
4101
+ base64: string;
4102
+ media_type: "png" | "jpeg" | "webp" | "gif";
4103
+ } | {
4104
+ type: "url";
4105
+ url: string;
4106
+ };
4107
+ modality: "image";
4108
+ detail: "low" | "medium" | "high" | "auto";
4109
+ metadata?: undefined;
4110
+ } | {
4111
+ name: string;
4112
+ modality: "tool-call";
4113
+ index: number;
4114
+ id: string;
4115
+ arguments: string;
4116
+ metadata?: undefined;
4117
+ } | {
4118
+ name: string;
4119
+ modality: "tool-response";
4120
+ index: number;
4121
+ id: string;
4122
+ data: string;
4123
+ metadata?: undefined;
4124
+ })[];
4125
+ metadata?: undefined;
4126
+ }[];
4127
+ usage?: {
4128
+ totalTokens: number;
4129
+ promptTokens: number;
4130
+ completionTokens: number;
4131
+ } | undefined;
4132
+ logProbs?: {
4133
+ token: string;
4134
+ logProb: number;
4135
+ bytes: number[] | null;
4136
+ topLogProbs: {
4137
+ token: string;
4138
+ logProb: number;
4139
+ bytes: number[] | null;
4140
+ }[];
4141
+ }[] | undefined;
4142
+ };
4143
+ latencyInMs: number;
4144
+ provider: {
4145
+ request?: any;
4146
+ response?: any;
4147
+ };
4148
+ metadataForCallbacks?: any;
4149
+ }>>>;
4150
+ completeChatCallbacks: z.ZodOptional<z.ZodArray<z.ZodType<CompleteChatCallbackType, z.ZodTypeDef, CompleteChatCallbackType>, "atleastone">>;
4151
+ getEmbeddingsCache: z.ZodOptional<z.ZodType<Cache<{
4152
+ request: {
4153
+ config: Record<string, any>;
4154
+ embeddingRequests: {
4155
+ modality: "text";
4156
+ requests: string[];
4157
+ metadata?: undefined;
4158
+ } | {
4159
+ modality: "token";
4160
+ requests: number[][];
4161
+ metadata?: undefined;
4162
+ };
4163
+ };
4164
+ response: {
4165
+ encodingFormat: "float";
4166
+ embeddings: {
4167
+ index: number;
4168
+ embedding: number[];
4169
+ }[];
4170
+ usage?: {
4171
+ totalTokens: number;
4172
+ } | undefined;
4173
+ } | {
4174
+ encodingFormat: "base64";
4175
+ embeddings: {
4176
+ index: number;
4177
+ embedding: string;
4178
+ }[];
4179
+ usage?: {
4180
+ totalTokens: number;
4181
+ } | undefined;
4182
+ };
4183
+ latencyInMs: number;
4184
+ provider: {
4185
+ request?: any;
4186
+ response?: any;
4187
+ };
4188
+ metadataForCallbacks?: any;
4189
+ }>, z.ZodTypeDef, Cache<{
4190
+ request: {
4191
+ config: Record<string, any>;
4192
+ embeddingRequests: {
4193
+ modality: "text";
4194
+ requests: string[];
4195
+ metadata?: undefined;
4196
+ } | {
4197
+ modality: "token";
4198
+ requests: number[][];
4199
+ metadata?: undefined;
4200
+ };
4201
+ };
4202
+ response: {
4203
+ encodingFormat: "float";
4204
+ embeddings: {
4205
+ index: number;
4206
+ embedding: number[];
4207
+ }[];
4208
+ usage?: {
4209
+ totalTokens: number;
4210
+ } | undefined;
4211
+ } | {
4212
+ encodingFormat: "base64";
4213
+ embeddings: {
4214
+ index: number;
4215
+ embedding: string;
4216
+ }[];
4217
+ usage?: {
4218
+ totalTokens: number;
4219
+ } | undefined;
4220
+ };
4221
+ latencyInMs: number;
4222
+ provider: {
4223
+ request?: any;
4224
+ response?: any;
4225
+ };
4226
+ metadataForCallbacks?: any;
4227
+ }>>>;
4228
+ getEmbeddingsCallbacks: z.ZodOptional<z.ZodArray<z.ZodType<GetEmbeddingsCallbackType, z.ZodTypeDef, GetEmbeddingsCallbackType>, "atleastone">>;
4229
+ streamChatCallbacks: z.ZodOptional<z.ZodArray<z.ZodType<StreamChatCallbackType, z.ZodTypeDef, StreamChatCallbackType>, "atleastone">>;
3257
4230
  }, "strip", z.ZodTypeAny, {
3258
- callbacks?: [GatewayCallbackType<any>, ...GatewayCallbackType<any>[]] | undefined;
4231
+ queueOptions?: {
4232
+ timeout?: number | undefined;
4233
+ maxConcurrentTasks?: number | undefined;
4234
+ retryCount?: number | undefined;
4235
+ retry?: {
4236
+ initialDelay: number;
4237
+ exponentialFactor: number;
4238
+ } | undefined;
4239
+ } | undefined;
3259
4240
  dangerouslyAllowBrowser?: boolean | undefined;
3260
4241
  httpClient?: HttpClient | undefined;
4242
+ completeChatCache?: Cache<{
4243
+ request: {
4244
+ config: Record<string, any>;
4245
+ messages: {
4246
+ role: "system" | "user" | "assistant" | "tool";
4247
+ content: ({
4248
+ value: string;
4249
+ modality: "text";
4250
+ metadata?: undefined;
4251
+ } | {
4252
+ value: {
4253
+ type: "base64";
4254
+ base64: string;
4255
+ media_type: "png" | "jpeg" | "webp" | "gif";
4256
+ } | {
4257
+ type: "url";
4258
+ url: string;
4259
+ };
4260
+ modality: "image";
4261
+ detail: "low" | "medium" | "high" | "auto";
4262
+ metadata?: undefined;
4263
+ } | {
4264
+ name: string;
4265
+ modality: "tool-call";
4266
+ index: number;
4267
+ id: string;
4268
+ arguments: string;
4269
+ metadata?: undefined;
4270
+ } | {
4271
+ data: string;
4272
+ name: string;
4273
+ modality: "tool-response";
4274
+ index: number;
4275
+ id: string;
4276
+ metadata?: undefined;
4277
+ })[];
4278
+ metadata?: undefined;
4279
+ }[];
4280
+ tools?: {
4281
+ type: "function";
4282
+ definition: {
4283
+ schema: {
4284
+ name: string;
4285
+ description: string;
4286
+ strict?: boolean | undefined;
4287
+ parameters?: any;
4288
+ };
4289
+ };
4290
+ metadata?: any;
4291
+ }[] | undefined;
4292
+ };
4293
+ response: {
4294
+ messages: {
4295
+ role: "system" | "user" | "assistant" | "tool";
4296
+ content: ({
4297
+ value: string;
4298
+ modality: "text";
4299
+ metadata?: undefined;
4300
+ } | {
4301
+ value: {
4302
+ type: "base64";
4303
+ base64: string;
4304
+ media_type: "png" | "jpeg" | "webp" | "gif";
4305
+ } | {
4306
+ type: "url";
4307
+ url: string;
4308
+ };
4309
+ modality: "image";
4310
+ detail: "low" | "medium" | "high" | "auto";
4311
+ metadata?: undefined;
4312
+ } | {
4313
+ name: string;
4314
+ modality: "tool-call";
4315
+ index: number;
4316
+ id: string;
4317
+ arguments: string;
4318
+ metadata?: undefined;
4319
+ } | {
4320
+ name: string;
4321
+ modality: "tool-response";
4322
+ index: number;
4323
+ id: string;
4324
+ data: string;
4325
+ metadata?: undefined;
4326
+ })[];
4327
+ metadata?: undefined;
4328
+ }[];
4329
+ usage?: {
4330
+ totalTokens: number;
4331
+ promptTokens: number;
4332
+ completionTokens: number;
4333
+ } | undefined;
4334
+ logProbs?: {
4335
+ token: string;
4336
+ logProb: number;
4337
+ bytes: number[] | null;
4338
+ topLogProbs: {
4339
+ token: string;
4340
+ logProb: number;
4341
+ bytes: number[] | null;
4342
+ }[];
4343
+ }[] | undefined;
4344
+ };
4345
+ latencyInMs: number;
4346
+ provider: {
4347
+ request?: any;
4348
+ response?: any;
4349
+ };
4350
+ metadataForCallbacks?: any;
4351
+ }> | undefined;
4352
+ completeChatCallbacks?: [CompleteChatCallbackType, ...CompleteChatCallbackType[]] | undefined;
4353
+ getEmbeddingsCache?: Cache<{
4354
+ request: {
4355
+ config: Record<string, any>;
4356
+ embeddingRequests: {
4357
+ modality: "text";
4358
+ requests: string[];
4359
+ metadata?: undefined;
4360
+ } | {
4361
+ modality: "token";
4362
+ requests: number[][];
4363
+ metadata?: undefined;
4364
+ };
4365
+ };
4366
+ response: {
4367
+ encodingFormat: "float";
4368
+ embeddings: {
4369
+ index: number;
4370
+ embedding: number[];
4371
+ }[];
4372
+ usage?: {
4373
+ totalTokens: number;
4374
+ } | undefined;
4375
+ } | {
4376
+ encodingFormat: "base64";
4377
+ embeddings: {
4378
+ index: number;
4379
+ embedding: string;
4380
+ }[];
4381
+ usage?: {
4382
+ totalTokens: number;
4383
+ } | undefined;
4384
+ };
4385
+ latencyInMs: number;
4386
+ provider: {
4387
+ request?: any;
4388
+ response?: any;
4389
+ };
4390
+ metadataForCallbacks?: any;
4391
+ }> | undefined;
4392
+ getEmbeddingsCallbacks?: [GetEmbeddingsCallbackType, ...GetEmbeddingsCallbackType[]] | undefined;
4393
+ streamChatCallbacks?: [StreamChatCallbackType, ...StreamChatCallbackType[]] | undefined;
3261
4394
  }, {
3262
- callbacks?: [GatewayCallbackType<any>, ...GatewayCallbackType<any>[]] | undefined;
4395
+ queueOptions?: {
4396
+ timeout?: number | undefined;
4397
+ maxConcurrentTasks?: number | undefined;
4398
+ retryCount?: number | undefined;
4399
+ retry?: {
4400
+ initialDelay: number;
4401
+ exponentialFactor: number;
4402
+ } | undefined;
4403
+ } | undefined;
3263
4404
  dangerouslyAllowBrowser?: boolean | undefined;
3264
4405
  httpClient?: HttpClient | undefined;
4406
+ completeChatCache?: Cache<{
4407
+ request: {
4408
+ config: Record<string, any>;
4409
+ messages: {
4410
+ role: "system" | "user" | "assistant" | "tool";
4411
+ content: ({
4412
+ value: string;
4413
+ modality: "text";
4414
+ metadata?: undefined;
4415
+ } | {
4416
+ value: {
4417
+ type: "base64";
4418
+ base64: string;
4419
+ media_type: "png" | "jpeg" | "webp" | "gif";
4420
+ } | {
4421
+ type: "url";
4422
+ url: string;
4423
+ };
4424
+ modality: "image";
4425
+ detail: "low" | "medium" | "high" | "auto";
4426
+ metadata?: undefined;
4427
+ } | {
4428
+ name: string;
4429
+ modality: "tool-call";
4430
+ index: number;
4431
+ id: string;
4432
+ arguments: string;
4433
+ metadata?: undefined;
4434
+ } | {
4435
+ data: string;
4436
+ name: string;
4437
+ modality: "tool-response";
4438
+ index: number;
4439
+ id: string;
4440
+ metadata?: undefined;
4441
+ })[];
4442
+ metadata?: undefined;
4443
+ }[];
4444
+ tools?: {
4445
+ type: "function";
4446
+ definition: {
4447
+ schema: {
4448
+ name: string;
4449
+ description: string;
4450
+ strict?: boolean | undefined;
4451
+ parameters?: any;
4452
+ };
4453
+ };
4454
+ metadata?: any;
4455
+ }[] | undefined;
4456
+ };
4457
+ response: {
4458
+ messages: {
4459
+ role: "system" | "user" | "assistant" | "tool";
4460
+ content: ({
4461
+ value: string;
4462
+ modality: "text";
4463
+ metadata?: undefined;
4464
+ } | {
4465
+ value: {
4466
+ type: "base64";
4467
+ base64: string;
4468
+ media_type: "png" | "jpeg" | "webp" | "gif";
4469
+ } | {
4470
+ type: "url";
4471
+ url: string;
4472
+ };
4473
+ modality: "image";
4474
+ detail: "low" | "medium" | "high" | "auto";
4475
+ metadata?: undefined;
4476
+ } | {
4477
+ name: string;
4478
+ modality: "tool-call";
4479
+ index: number;
4480
+ id: string;
4481
+ arguments: string;
4482
+ metadata?: undefined;
4483
+ } | {
4484
+ name: string;
4485
+ modality: "tool-response";
4486
+ index: number;
4487
+ id: string;
4488
+ data: string;
4489
+ metadata?: undefined;
4490
+ })[];
4491
+ metadata?: undefined;
4492
+ }[];
4493
+ usage?: {
4494
+ totalTokens: number;
4495
+ promptTokens: number;
4496
+ completionTokens: number;
4497
+ } | undefined;
4498
+ logProbs?: {
4499
+ token: string;
4500
+ logProb: number;
4501
+ bytes: number[] | null;
4502
+ topLogProbs: {
4503
+ token: string;
4504
+ logProb: number;
4505
+ bytes: number[] | null;
4506
+ }[];
4507
+ }[] | undefined;
4508
+ };
4509
+ latencyInMs: number;
4510
+ provider: {
4511
+ request?: any;
4512
+ response?: any;
4513
+ };
4514
+ metadataForCallbacks?: any;
4515
+ }> | undefined;
4516
+ completeChatCallbacks?: [CompleteChatCallbackType, ...CompleteChatCallbackType[]] | undefined;
4517
+ getEmbeddingsCache?: Cache<{
4518
+ request: {
4519
+ config: Record<string, any>;
4520
+ embeddingRequests: {
4521
+ modality: "text";
4522
+ requests: string[];
4523
+ metadata?: undefined;
4524
+ } | {
4525
+ modality: "token";
4526
+ requests: number[][];
4527
+ metadata?: undefined;
4528
+ };
4529
+ };
4530
+ response: {
4531
+ encodingFormat: "float";
4532
+ embeddings: {
4533
+ index: number;
4534
+ embedding: number[];
4535
+ }[];
4536
+ usage?: {
4537
+ totalTokens: number;
4538
+ } | undefined;
4539
+ } | {
4540
+ encodingFormat: "base64";
4541
+ embeddings: {
4542
+ index: number;
4543
+ embedding: string;
4544
+ }[];
4545
+ usage?: {
4546
+ totalTokens: number;
4547
+ } | undefined;
4548
+ };
4549
+ latencyInMs: number;
4550
+ provider: {
4551
+ request?: any;
4552
+ response?: any;
4553
+ };
4554
+ metadataForCallbacks?: any;
4555
+ }> | undefined;
4556
+ getEmbeddingsCallbacks?: [GetEmbeddingsCallbackType, ...GetEmbeddingsCallbackType[]] | undefined;
4557
+ streamChatCallbacks?: [StreamChatCallbackType, ...StreamChatCallbackType[]] | undefined;
3265
4558
  }>;
3266
4559
  type GatewayOptionsType = z.infer<typeof GatewayOptions>;
3267
4560
  declare const GatewayCompleteChatRequest: z.ZodObject<{
@@ -3594,14 +4887,11 @@ declare const GatewayCompleteChatRequest: z.ZodObject<{
3594
4887
  metadata?: any;
3595
4888
  }>]>, "many">>;
3596
4889
  options: z.ZodOptional<z.ZodObject<{
3597
- queuePriority: z.ZodOptional<z.ZodNumber>;
3598
4890
  metadataForCallbacks: z.ZodOptional<z.ZodAny>;
3599
4891
  }, "strip", z.ZodTypeAny, {
3600
4892
  metadataForCallbacks?: any;
3601
- queuePriority?: number | undefined;
3602
4893
  }, {
3603
4894
  metadataForCallbacks?: any;
3604
- queuePriority?: number | undefined;
3605
4895
  }>>;
3606
4896
  }, "strip", z.ZodTypeAny, {
3607
4897
  config: Record<string, any>;
@@ -3688,7 +4978,6 @@ declare const GatewayCompleteChatRequest: z.ZodObject<{
3688
4978
  }[];
3689
4979
  options?: {
3690
4980
  metadataForCallbacks?: any;
3691
- queuePriority?: number | undefined;
3692
4981
  } | undefined;
3693
4982
  tools?: {
3694
4983
  type: "function";
@@ -3787,7 +5076,6 @@ declare const GatewayCompleteChatRequest: z.ZodObject<{
3787
5076
  }[];
3788
5077
  options?: {
3789
5078
  metadataForCallbacks?: any;
3790
- queuePriority?: number | undefined;
3791
5079
  } | undefined;
3792
5080
  tools?: {
3793
5081
  type: "function";
@@ -4133,14 +5421,11 @@ declare const GatewayStreamChatRequest: z.ZodObject<{
4133
5421
  metadata?: any;
4134
5422
  }>]>, "many">>;
4135
5423
  options: z.ZodOptional<z.ZodObject<{
4136
- queuePriority: z.ZodOptional<z.ZodNumber>;
4137
5424
  metadataForCallbacks: z.ZodOptional<z.ZodAny>;
4138
5425
  }, "strip", z.ZodTypeAny, {
4139
5426
  metadataForCallbacks?: any;
4140
- queuePriority?: number | undefined;
4141
5427
  }, {
4142
5428
  metadataForCallbacks?: any;
4143
- queuePriority?: number | undefined;
4144
5429
  }>>;
4145
5430
  }, "strip", z.ZodTypeAny, {
4146
5431
  config: Record<string, any>;
@@ -4227,7 +5512,6 @@ declare const GatewayStreamChatRequest: z.ZodObject<{
4227
5512
  }[];
4228
5513
  options?: {
4229
5514
  metadataForCallbacks?: any;
4230
- queuePriority?: number | undefined;
4231
5515
  } | undefined;
4232
5516
  tools?: {
4233
5517
  type: "function";
@@ -4326,7 +5610,6 @@ declare const GatewayStreamChatRequest: z.ZodObject<{
4326
5610
  }[];
4327
5611
  options?: {
4328
5612
  metadataForCallbacks?: any;
4329
- queuePriority?: number | undefined;
4330
5613
  } | undefined;
4331
5614
  tools?: {
4332
5615
  type: "function";
@@ -4594,15 +5877,15 @@ type GatewayGetEmbeddingsRequestType = z.infer<typeof GatewayGetEmbeddingsReques
4594
5877
  declare class Gateway {
4595
5878
  private options;
4596
5879
  private httpClient;
4597
- private completeQueue;
4598
- private embeddingQueue;
5880
+ private queues;
5881
+ private caches;
4599
5882
  constructor(options: GatewayOptionsType);
4600
5883
  completeChat(request: GatewayCompleteChatRequestType): Promise<CompleteChatHandlerResponseType>;
4601
- private executeCompleteChatTask;
5884
+ private executeCompleteChat;
4602
5885
  streamChat(request: GatewayStreamChatRequestType): AsyncGenerator<StreamChatHandlerResponseType, void, unknown>;
4603
5886
  getEmbeddings(request: GatewayGetEmbeddingsRequestType): Promise<GetEmbeddingsHandlerResponseType>;
4604
5887
  private executeGetEmbeddingsTask;
4605
5888
  static GatewayError: typeof GatewayError;
4606
5889
  }
4607
5890
 
4608
- export { type CompleteChatCallbackType, CompleteChatHandlerRequest, type CompleteChatHandlerRequestType, CompleteChatHandlerResponse, type CompleteChatHandlerResponseType, Gateway, type GatewayCallbackType, GatewayError, type GatewayOptionsType, type GetEmbeddingsCallbackType, GetEmbeddingsHandlerRequest, type GetEmbeddingsHandlerRequestType, GetEmbeddingsHandlerResponse, type GetEmbeddingsHandlerResponseType, type HttpClient, type HttpClientResponse, IsomorphicHttpClient, IsomorphicHttpClientError, type Queue, SimpleQueue, type StreamChatCallbackType, StreamChatHandlerRequest, type StreamChatHandlerRequestType, StreamChatHandlerResponse, type StreamChatHandlerResponseType, type Task, handleCompleteChat, handleGetEmbeddings, handleStreamChat };
5891
+ export { type Cache, type CompleteChatCallbackType, CompleteChatHandlerRequest, type CompleteChatHandlerRequestType, CompleteChatHandlerResponse, type CompleteChatHandlerResponseType, Gateway, GatewayError, type GatewayOptionsType, type GetEmbeddingsCallbackType, GetEmbeddingsHandlerRequest, type GetEmbeddingsHandlerRequestType, GetEmbeddingsHandlerResponse, type GetEmbeddingsHandlerResponseType, type HttpClient, HttpClientError, type HttpClientResponse, HttpRequestError, IsomorphicHttpClient, LRUCache, type Queue, QueueOptions, type QueueOptionsType, type QueueTask, QueueTaskTimeoutError, SimpleQueue, type StreamChatCallbackType, StreamChatHandlerRequest, type StreamChatHandlerRequestType, StreamChatHandlerResponse, type StreamChatHandlerResponseType, handleCompleteChat, handleGetEmbeddings, handleStreamChat };