@elsium-ai/gateway 0.7.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +95 -1
- package/dist/gateway.d.ts +7 -0
- package/dist/gateway.d.ts.map +1 -1
- package/dist/index.d.ts +3 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +223 -19
- package/dist/providers/openai-compatible.d.ts +10 -0
- package/dist/providers/openai-compatible.d.ts.map +1 -0
- package/dist/router.d.ts.map +1 -1
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -65,6 +65,7 @@ interface Gateway {
|
|
|
65
65
|
data: T
|
|
66
66
|
response: LLMResponse
|
|
67
67
|
}>
|
|
68
|
+
extract<T>(schema: z.ZodType<T>, input: string, options?: ExtractOptions): Promise<T>
|
|
68
69
|
readonly provider: LLMProvider
|
|
69
70
|
lastCall(): XRayData | null
|
|
70
71
|
callHistory(limit?: number): XRayData[]
|
|
@@ -76,6 +77,7 @@ interface Gateway {
|
|
|
76
77
|
| `complete(request)` | Send a completion request and return the full response. |
|
|
77
78
|
| `stream(request)` | Stream a completion request, returning an async-iterable `ElsiumStream`. |
|
|
78
79
|
| `generate<T>(request)` | Structured output -- sends a Zod schema, parses and validates the LLM's JSON response. |
|
|
80
|
+
| `extract<T>(schema, input, options?)` | Structured extraction -- takes a Zod schema and text input, returns typed data with auto-retry on validation failure. |
|
|
79
81
|
| `provider` | Read-only reference to the underlying `LLMProvider` instance. |
|
|
80
82
|
| `lastCall()` | Returns the most recent `XRayData` entry, or `null` if X-Ray is disabled. |
|
|
81
83
|
| `callHistory(limit?)` | Returns up to `limit` (default 10) recent `XRayData` entries. |
|
|
@@ -141,6 +143,58 @@ const { data } = await llm.generate({
|
|
|
141
143
|
console.log(data.name) // "Mars"
|
|
142
144
|
```
|
|
143
145
|
|
|
146
|
+
#### Structured Extraction
|
|
147
|
+
|
|
148
|
+
`extract()` provides a simpler API for pulling typed data out of text. It takes a Zod schema and input text, returns the parsed object directly, and auto-retries on validation failure.
|
|
149
|
+
|
|
150
|
+
```ts
|
|
151
|
+
interface Gateway {
|
|
152
|
+
extract<T>(
|
|
153
|
+
schema: z.ZodType<T>,
|
|
154
|
+
input: string,
|
|
155
|
+
options?: ExtractOptions,
|
|
156
|
+
): Promise<T>
|
|
157
|
+
}
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
**`ExtractOptions`**
|
|
161
|
+
|
|
162
|
+
```ts
|
|
163
|
+
interface ExtractOptions {
|
|
164
|
+
maxRetries?: number // Default: 3
|
|
165
|
+
temperature?: number
|
|
166
|
+
system?: string
|
|
167
|
+
model?: string
|
|
168
|
+
}
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
On validation failure, `extract()` feeds the Zod error back to the LLM and retries (up to `maxRetries`). The return type is inferred from the schema.
|
|
172
|
+
|
|
173
|
+
```ts
|
|
174
|
+
import { gateway } from '@elsium-ai/gateway'
|
|
175
|
+
import { z } from 'zod'
|
|
176
|
+
|
|
177
|
+
const llm = gateway({
|
|
178
|
+
provider: 'anthropic',
|
|
179
|
+
apiKey: process.env.ANTHROPIC_API_KEY!,
|
|
180
|
+
})
|
|
181
|
+
|
|
182
|
+
const ContactInfo = z.object({
|
|
183
|
+
name: z.string(),
|
|
184
|
+
email: z.string().email(),
|
|
185
|
+
role: z.string(),
|
|
186
|
+
})
|
|
187
|
+
|
|
188
|
+
const contact = await llm.extract(
|
|
189
|
+
ContactInfo,
|
|
190
|
+
'Reach out to Jane Smith (jane@acme.com), she is the VP of Engineering.',
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
console.log(contact.name) // "Jane Smith"
|
|
194
|
+
console.log(contact.email) // "jane@acme.com"
|
|
195
|
+
console.log(contact.role) // "VP of Engineering"
|
|
196
|
+
```
|
|
197
|
+
|
|
144
198
|
#### X-Ray Mode
|
|
145
199
|
|
|
146
200
|
```ts
|
|
@@ -295,6 +349,46 @@ const response = await provider.complete({
|
|
|
295
349
|
})
|
|
296
350
|
```
|
|
297
351
|
|
|
352
|
+
### `createOpenAICompatibleProvider(config)`
|
|
353
|
+
|
|
354
|
+
Creates an LLM provider for any API that follows the OpenAI chat completions format (e.g. Groq, Together, Ollama, LMStudio, Azure OpenAI).
|
|
355
|
+
|
|
356
|
+
```ts
|
|
357
|
+
function createOpenAICompatibleProvider(config: {
|
|
358
|
+
baseUrl: string
|
|
359
|
+
apiKey: string
|
|
360
|
+
name?: string
|
|
361
|
+
defaultModel?: string
|
|
362
|
+
capabilities?: string[]
|
|
363
|
+
}): LLMProvider
|
|
364
|
+
```
|
|
365
|
+
|
|
366
|
+
| Parameter | Type | Default | Description |
|
|
367
|
+
|---|---|---|---|
|
|
368
|
+
| `config.baseUrl` | `string` | **(required)** | Base URL of the OpenAI-compatible API. |
|
|
369
|
+
| `config.apiKey` | `string` | **(required)** | API key for the provider. |
|
|
370
|
+
| `config.name` | `string` | `'openai-compatible'` | Provider name used in logging and routing. |
|
|
371
|
+
| `config.defaultModel` | `string` | `'default'` | Default model when none is specified per-request. |
|
|
372
|
+
| `config.capabilities` | `string[]` | `['streaming']` | Capabilities to advertise (used by `capability-aware` routing). |
|
|
373
|
+
|
|
374
|
+
**Returns:** An `LLMProvider` that sends requests to the given base URL using the OpenAI request/response format.
|
|
375
|
+
|
|
376
|
+
```ts
|
|
377
|
+
import { createOpenAICompatibleProvider } from '@elsium-ai/gateway'
|
|
378
|
+
|
|
379
|
+
const provider = createOpenAICompatibleProvider({
|
|
380
|
+
baseUrl: 'https://api.groq.com/openai',
|
|
381
|
+
apiKey: process.env.GROQ_API_KEY!,
|
|
382
|
+
name: 'groq',
|
|
383
|
+
defaultModel: 'llama-3.3-70b-versatile',
|
|
384
|
+
capabilities: ['tools', 'streaming'],
|
|
385
|
+
})
|
|
386
|
+
|
|
387
|
+
const response = await provider.complete({
|
|
388
|
+
messages: [{ role: 'user', content: 'Hello!' }],
|
|
389
|
+
})
|
|
390
|
+
```
|
|
391
|
+
|
|
298
392
|
### `createGoogleProvider(config)`
|
|
299
393
|
|
|
300
394
|
Creates an LLM provider for the Google Gemini API.
|
|
@@ -962,7 +1056,7 @@ interface ProviderMesh {
|
|
|
962
1056
|
| Member | Description |
|
|
963
1057
|
|---|---|
|
|
964
1058
|
| `complete(request)` | Routes a completion request according to the configured strategy. |
|
|
965
|
-
| `stream(request)` | Streams from the first available provider (respects circuit breaker state). |
|
|
1059
|
+
| `stream(request)` | Streams from the first available provider with automatic failover across all four routing strategies (respects circuit breaker state). |
|
|
966
1060
|
| `providers` | List of provider names in the mesh. |
|
|
967
1061
|
| `strategy` | The active routing strategy. |
|
|
968
1062
|
|
package/dist/gateway.d.ts
CHANGED
|
@@ -17,6 +17,12 @@ export interface GatewayConfig {
|
|
|
17
17
|
maxMessages?: number;
|
|
18
18
|
maxInputTokens?: number;
|
|
19
19
|
}
|
|
20
|
+
export interface ExtractOptions {
|
|
21
|
+
model?: string;
|
|
22
|
+
system?: string;
|
|
23
|
+
maxRetries?: number;
|
|
24
|
+
temperature?: number;
|
|
25
|
+
}
|
|
20
26
|
export interface Gateway {
|
|
21
27
|
complete(request: CompletionRequest): Promise<LLMResponse>;
|
|
22
28
|
stream(request: CompletionRequest): ElsiumStream;
|
|
@@ -26,6 +32,7 @@ export interface Gateway {
|
|
|
26
32
|
data: T;
|
|
27
33
|
response: LLMResponse;
|
|
28
34
|
}>;
|
|
35
|
+
extract<T>(schema: z.ZodType<T>, input: string, options?: ExtractOptions): Promise<T>;
|
|
29
36
|
readonly provider: LLMProvider;
|
|
30
37
|
lastCall(): XRayData | null;
|
|
31
38
|
callHistory(limit?: number): XRayData[];
|
package/dist/gateway.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"gateway.d.ts","sourceRoot":"","sources":["../src/gateway.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACX,iBAAiB,EACjB,WAAW,EACX,UAAU,EAEV,cAAc,EAEd,gBAAgB,EAChB,QAAQ,EACR,MAAM,iBAAiB,CAAA;AACxB,OAAO,EAEN,KAAK,YAAY,EAIjB,MAAM,iBAAiB,CAAA;AACxB,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,KAAK,CAAA;AAI5B,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,YAAY,CAAA;
|
|
1
|
+
{"version":3,"file":"gateway.d.ts","sourceRoot":"","sources":["../src/gateway.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACX,iBAAiB,EACjB,WAAW,EACX,UAAU,EAEV,cAAc,EAEd,gBAAgB,EAChB,QAAQ,EACR,MAAM,iBAAiB,CAAA;AACxB,OAAO,EAEN,KAAK,YAAY,EAIjB,MAAM,iBAAiB,CAAA;AACxB,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,KAAK,CAAA;AAI5B,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,YAAY,CAAA;AAY7C,MAAM,WAAW,aAAa;IAC7B,QAAQ,EAAE,MAAM,CAAA;IAChB,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,MAAM,EAAE,MAAM,CAAA;IACd,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,UAAU,CAAC,EAAE,UAAU,EAAE,CAAA;IACzB,gBAAgB,CAAC,EAAE,gBAAgB,EAAE,CAAA;IACrC,IAAI,CAAC,EAAE,OAAO,GAAG;QAAE,UAAU,CAAC,EAAE,MAAM,CAAA;KAAE,CAAA;IACxC,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,cAAc,CAAC,EAAE,MAAM,CAAA;CACvB;AAED,MAAM,WAAW,cAAc;IAC9B,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,WAAW,CAAC,EAAE,MAAM,CAAA;CACpB;AAED,MAAM,WAAW,OAAO;IACvB,QAAQ,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,WAAW,CAAC,CAAA;IAC1D,MAAM,CAAC,OAAO,EAAE,iBAAiB,GAAG,YAAY,CAAA;IAChD,QAAQ,CAAC,CAAC,EAAE,OAAO,EAAE,iBAAiB,GAAG;QAAE,MAAM,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAA;KAAE,GAAG,OAAO,CAAC;QAC3E,IAAI,EAAE,CAAC,CAAA;QACP,QAAQ,EAAE,WAAW,CAAA;KACrB,CAAC,CAAA;IACF,OAAO,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,cAAc,GAAG,OAAO,CAAC,CAAC,CAAC,CAAA;IACrF,QAAQ,CAAC,QAAQ,EAAE,WAAW,CAAA;IAC9B,QAAQ,IAAI,QAAQ,GAAG,IAAI,CAAA;IAC3B,WAAW,CAAC,KAAK,CAAC,EAAE,MAAM,GAAG,QAAQ,EAAE,CAAA;CACvC;AA4BD,wBAAgB,uBAAuB,CACtC,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,CAAC,MAAM,EAAE,cAAc,KAAK,WAAW,GAC9C,IAAI,CAGN;AAwJD,wBAAgB,OAAO,CAAC,MAAM,EAAE,aAAa,GAAG,OAAO,CAsKtD"}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
export { gateway, registerProviderFactory } from './gateway';
|
|
2
|
-
export type { GatewayConfig, Gateway } from './gateway';
|
|
2
|
+
export type { GatewayConfig, Gateway, ExtractOptions } from './gateway';
|
|
3
3
|
export type { LLMProvider, ProviderFactory, ProviderMetadata, ModelPricing, ModelTier, } from './provider';
|
|
4
4
|
export { registerProvider, getProviderFactory, listProviders, registerProviderMetadata, getProviderMetadata, } from './provider';
|
|
5
5
|
export { createAnthropicProvider } from './providers/anthropic';
|
|
6
6
|
export { createOpenAIProvider } from './providers/openai';
|
|
7
7
|
export { createGoogleProvider } from './providers/google';
|
|
8
|
+
export { createOpenAICompatibleProvider } from './providers/openai-compatible';
|
|
9
|
+
export type { OpenAICompatibleConfig } from './providers/openai-compatible';
|
|
8
10
|
export { composeMiddleware, composeStreamMiddleware, loggingMiddleware, costTrackingMiddleware, xrayMiddleware, } from './middleware';
|
|
9
11
|
export type { XRayStore } from './middleware';
|
|
10
12
|
export { securityMiddleware, detectPromptInjection, detectJailbreak, redactSecrets, checkBlockedPatterns, classifyContent, } from './security';
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,OAAO,EAAE,uBAAuB,EAAE,MAAM,WAAW,CAAA;AAC5D,YAAY,EAAE,aAAa,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,OAAO,EAAE,uBAAuB,EAAE,MAAM,WAAW,CAAA;AAC5D,YAAY,EAAE,aAAa,EAAE,OAAO,EAAE,cAAc,EAAE,MAAM,WAAW,CAAA;AAGvE,YAAY,EACX,WAAW,EACX,eAAe,EACf,gBAAgB,EAChB,YAAY,EACZ,SAAS,GACT,MAAM,YAAY,CAAA;AACnB,OAAO,EACN,gBAAgB,EAChB,kBAAkB,EAClB,aAAa,EACb,wBAAwB,EACxB,mBAAmB,GACnB,MAAM,YAAY,CAAA;AAGnB,OAAO,EAAE,uBAAuB,EAAE,MAAM,uBAAuB,CAAA;AAC/D,OAAO,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAA;AACzD,OAAO,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAA;AACzD,OAAO,EAAE,8BAA8B,EAAE,MAAM,+BAA+B,CAAA;AAC9E,YAAY,EAAE,sBAAsB,EAAE,MAAM,+BAA+B,CAAA;AAG3E,OAAO,EACN,iBAAiB,EACjB,uBAAuB,EACvB,iBAAiB,EACjB,sBAAsB,EACtB,cAAc,GACd,MAAM,cAAc,CAAA;AACrB,YAAY,EAAE,SAAS,EAAE,MAAM,cAAc,CAAA;AAG7C,OAAO,EACN,kBAAkB,EAClB,qBAAqB,EACrB,eAAe,EACf,aAAa,EACb,oBAAoB,EACpB,eAAe,GACf,MAAM,YAAY,CAAA;AACnB,YAAY,EACX,wBAAwB,EACxB,iBAAiB,EACjB,cAAc,EACd,kBAAkB,EAClB,oBAAoB,GACpB,MAAM,YAAY,CAAA;AAGnB,OAAO,EAAE,cAAc,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAA;AAC/D,YAAY,EAAE,cAAc,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAA;AAG1D,OAAO,EAAE,eAAe,EAAE,mBAAmB,EAAE,MAAM,SAAS,CAAA;AAC9D,YAAY,EAAE,YAAY,EAAE,UAAU,EAAE,qBAAqB,EAAE,MAAM,SAAS,CAAA;AAG9E,OAAO,EAAE,yBAAyB,EAAE,MAAM,qBAAqB,CAAA;AAC/D,YAAY,EACX,qBAAqB,EACrB,mBAAmB,EACnB,eAAe,GACf,MAAM,qBAAqB,CAAA;AAG5B,OAAO,EAAE,aAAa,EAAE,eAAe,EAAE,YAAY,EAAE,MAAM,WAAW,CAAA;AAGxE,OAAO,EAAE,WAAW,EAAE,MAAM,SAAS,CAAA;AACrC,YAAY,EAAE,WAAW,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,SAAS,CAAA;AAGxE,OAAO,EAAE,kBAAkB,EAAE,MAAM,UAAU,CAAA;AAC7C,YAAY,EACX,kBAAkB,EAClB,aAAa,EACb,eAAe,EACf,YAAY,EACZ,eAAe,GACf,MAAM,UAAU,CAAA"}
|
package/dist/index.js
CHANGED
|
@@ -1991,11 +1991,44 @@ async function processOpenAISSEStream(body, emit) {
|
|
|
1991
1991
|
}
|
|
1992
1992
|
}
|
|
1993
1993
|
|
|
1994
|
+
// src/providers/openai-compatible.ts
|
|
1995
|
+
function createOpenAICompatibleProvider(config) {
|
|
1996
|
+
const providerName = config.name ?? "openai-compatible";
|
|
1997
|
+
const model = config.defaultModel ?? "default";
|
|
1998
|
+
const inner = createOpenAIProvider({
|
|
1999
|
+
apiKey: config.apiKey,
|
|
2000
|
+
baseUrl: config.baseUrl,
|
|
2001
|
+
timeout: config.timeout,
|
|
2002
|
+
maxRetries: config.maxRetries
|
|
2003
|
+
});
|
|
2004
|
+
const metadata = {
|
|
2005
|
+
baseUrl: `${config.baseUrl}/v1/chat/completions`,
|
|
2006
|
+
capabilities: config.capabilities ?? ["tools", "streaming", "system"],
|
|
2007
|
+
authStyle: "bearer"
|
|
2008
|
+
};
|
|
2009
|
+
return {
|
|
2010
|
+
name: providerName,
|
|
2011
|
+
defaultModel: model,
|
|
2012
|
+
metadata,
|
|
2013
|
+
async complete(request) {
|
|
2014
|
+
const response = await inner.complete(request);
|
|
2015
|
+
return { ...response, provider: providerName };
|
|
2016
|
+
},
|
|
2017
|
+
stream(request) {
|
|
2018
|
+
return inner.stream(request);
|
|
2019
|
+
},
|
|
2020
|
+
async listModels() {
|
|
2021
|
+
return inner.listModels();
|
|
2022
|
+
}
|
|
2023
|
+
};
|
|
2024
|
+
}
|
|
2025
|
+
|
|
1994
2026
|
// src/gateway.ts
|
|
1995
2027
|
var PROVIDER_FACTORIES = {
|
|
1996
2028
|
anthropic: createAnthropicProvider,
|
|
1997
2029
|
openai: createOpenAIProvider,
|
|
1998
|
-
google: createGoogleProvider
|
|
2030
|
+
google: createGoogleProvider,
|
|
2031
|
+
"openai-compatible": (cfg) => createOpenAICompatibleProvider({ ...cfg, baseUrl: cfg.baseUrl ?? "" })
|
|
1999
2032
|
};
|
|
2000
2033
|
registerProviderMetadata("anthropic", {
|
|
2001
2034
|
baseUrl: "https://api.anthropic.com/v1/messages",
|
|
@@ -2218,6 +2251,38 @@ function gateway(config) {
|
|
|
2218
2251
|
});
|
|
2219
2252
|
}
|
|
2220
2253
|
return { data: result.data, response };
|
|
2254
|
+
},
|
|
2255
|
+
async extract(schema, input, options) {
|
|
2256
|
+
const maxRetries = options?.maxRetries ?? 3;
|
|
2257
|
+
const messages = [{ role: "user", content: input }];
|
|
2258
|
+
let lastError;
|
|
2259
|
+
for (let attempt = 0;attempt <= maxRetries; attempt++) {
|
|
2260
|
+
try {
|
|
2261
|
+
const result = await this.generate({
|
|
2262
|
+
messages: [...messages],
|
|
2263
|
+
schema,
|
|
2264
|
+
model: options?.model,
|
|
2265
|
+
system: options?.system,
|
|
2266
|
+
temperature: options?.temperature
|
|
2267
|
+
});
|
|
2268
|
+
return result.data;
|
|
2269
|
+
} catch (e) {
|
|
2270
|
+
if (e instanceof ElsiumError && e.code === "VALIDATION_ERROR") {
|
|
2271
|
+
lastError = e;
|
|
2272
|
+
messages.push({
|
|
2273
|
+
role: "assistant",
|
|
2274
|
+
content: "Invalid output"
|
|
2275
|
+
});
|
|
2276
|
+
messages.push({
|
|
2277
|
+
role: "user",
|
|
2278
|
+
content: `The previous response failed validation: ${e.message}. Please try again and return valid JSON matching the schema.`
|
|
2279
|
+
});
|
|
2280
|
+
continue;
|
|
2281
|
+
}
|
|
2282
|
+
throw e;
|
|
2283
|
+
}
|
|
2284
|
+
}
|
|
2285
|
+
throw lastError;
|
|
2221
2286
|
}
|
|
2222
2287
|
};
|
|
2223
2288
|
}
|
|
@@ -3154,6 +3219,151 @@ function createProviderMesh(config) {
|
|
|
3154
3219
|
return ["streaming"];
|
|
3155
3220
|
}
|
|
3156
3221
|
}
|
|
3222
|
+
function errorStream(message) {
|
|
3223
|
+
return new ElsiumStream(async function* () {
|
|
3224
|
+
yield {
|
|
3225
|
+
type: "error",
|
|
3226
|
+
error: new ElsiumError({
|
|
3227
|
+
code: "PROVIDER_ERROR",
|
|
3228
|
+
message,
|
|
3229
|
+
retryable: false
|
|
3230
|
+
})
|
|
3231
|
+
};
|
|
3232
|
+
}());
|
|
3233
|
+
}
|
|
3234
|
+
function logStreamFailover(provider, toProvider, error) {
|
|
3235
|
+
audit?.log("provider_failover", {
|
|
3236
|
+
fromProvider: provider,
|
|
3237
|
+
toProvider,
|
|
3238
|
+
strategy: config.strategy,
|
|
3239
|
+
reason: error?.message
|
|
3240
|
+
});
|
|
3241
|
+
}
|
|
3242
|
+
async function tryStreamProvider(entry, request, emit) {
|
|
3243
|
+
const gw = getGateway(entry.name);
|
|
3244
|
+
const providerStream = await callWithCircuitBreaker(entry.name, async () => gw.stream({ ...request, model: request.model ?? entry.model }));
|
|
3245
|
+
let hasEmittedContent = false;
|
|
3246
|
+
for await (const event of providerStream) {
|
|
3247
|
+
if (event.type === "error") {
|
|
3248
|
+
const err2 = event.error instanceof Error ? event.error : new Error(String(event.error));
|
|
3249
|
+
if (hasEmittedContent) {
|
|
3250
|
+
emit(event);
|
|
3251
|
+
return { success: true };
|
|
3252
|
+
}
|
|
3253
|
+
return { success: false, error: err2 };
|
|
3254
|
+
}
|
|
3255
|
+
hasEmittedContent = true;
|
|
3256
|
+
emit(event);
|
|
3257
|
+
}
|
|
3258
|
+
return { success: true };
|
|
3259
|
+
}
|
|
3260
|
+
async function runStreamFallbackLoop(available, request, emit) {
|
|
3261
|
+
let lastError = null;
|
|
3262
|
+
let failedProvider = null;
|
|
3263
|
+
for (let i = 0;i < available.length; i++) {
|
|
3264
|
+
const entry = available[i];
|
|
3265
|
+
const nextProvider = i + 1 < available.length ? available[i + 1].name : "none";
|
|
3266
|
+
try {
|
|
3267
|
+
const result = await tryStreamProvider(entry, request, emit);
|
|
3268
|
+
if (result.success) {
|
|
3269
|
+
if (failedProvider)
|
|
3270
|
+
logFailover(failedProvider, entry.name, lastError?.message);
|
|
3271
|
+
return;
|
|
3272
|
+
}
|
|
3273
|
+
lastError = result.error ?? null;
|
|
3274
|
+
failedProvider = entry.name;
|
|
3275
|
+
logStreamFailover(entry.name, nextProvider, result.error);
|
|
3276
|
+
} catch (err2) {
|
|
3277
|
+
failedProvider = entry.name;
|
|
3278
|
+
lastError = toError2(err2);
|
|
3279
|
+
logStreamFailover(entry.name, nextProvider, lastError);
|
|
3280
|
+
}
|
|
3281
|
+
}
|
|
3282
|
+
emit({
|
|
3283
|
+
type: "error",
|
|
3284
|
+
error: lastError ?? new ElsiumError({
|
|
3285
|
+
code: "PROVIDER_ERROR",
|
|
3286
|
+
message: "All providers failed during streaming",
|
|
3287
|
+
retryable: false
|
|
3288
|
+
})
|
|
3289
|
+
});
|
|
3290
|
+
}
|
|
3291
|
+
function streamWithFallback(providers, request) {
|
|
3292
|
+
const available = providers.filter((e) => isProviderAvailable(e.name));
|
|
3293
|
+
if (available.length === 0) {
|
|
3294
|
+
return errorStream("All providers unavailable");
|
|
3295
|
+
}
|
|
3296
|
+
return createStream(async (emit) => {
|
|
3297
|
+
await runStreamFallbackLoop(available, request, emit);
|
|
3298
|
+
});
|
|
3299
|
+
}
|
|
3300
|
+
function streamCostOptimized(request) {
|
|
3301
|
+
const optimizer = config.costOptimizer;
|
|
3302
|
+
if (!optimizer) {
|
|
3303
|
+
return streamWithFallback(sortedProviders, request);
|
|
3304
|
+
}
|
|
3305
|
+
const complexity = estimateComplexity(request);
|
|
3306
|
+
const threshold = optimizer.complexityThreshold ?? 0.5;
|
|
3307
|
+
const target = complexity < threshold ? optimizer.simpleModel : optimizer.complexModel;
|
|
3308
|
+
return createStream(async (emit) => {
|
|
3309
|
+
try {
|
|
3310
|
+
const gw = getGateway(target.provider);
|
|
3311
|
+
const providerStream = gw.stream({ ...request, model: target.model });
|
|
3312
|
+
for await (const event of providerStream) {
|
|
3313
|
+
emit(event);
|
|
3314
|
+
}
|
|
3315
|
+
} catch {
|
|
3316
|
+
const fallbackStream = streamWithFallback(sortedProviders, request);
|
|
3317
|
+
for await (const event of fallbackStream) {
|
|
3318
|
+
emit(event);
|
|
3319
|
+
}
|
|
3320
|
+
}
|
|
3321
|
+
});
|
|
3322
|
+
}
|
|
3323
|
+
function streamLatencyOptimized(request) {
|
|
3324
|
+
const available = sortedProviders.filter((e) => isProviderAvailable(e.name));
|
|
3325
|
+
if (available.length === 0) {
|
|
3326
|
+
return errorStream("All providers unavailable");
|
|
3327
|
+
}
|
|
3328
|
+
return createStream(async (emit) => {
|
|
3329
|
+
const controller = new AbortController;
|
|
3330
|
+
const racePromises = available.map(async (entry) => {
|
|
3331
|
+
const gw = getGateway(entry.name);
|
|
3332
|
+
return callWithCircuitBreaker(entry.name, async () => ({
|
|
3333
|
+
entry,
|
|
3334
|
+
stream: gw.stream({
|
|
3335
|
+
...request,
|
|
3336
|
+
model: request.model ?? entry.model,
|
|
3337
|
+
signal: controller.signal
|
|
3338
|
+
})
|
|
3339
|
+
}));
|
|
3340
|
+
});
|
|
3341
|
+
try {
|
|
3342
|
+
const winner = await Promise.any(racePromises);
|
|
3343
|
+
controller.abort();
|
|
3344
|
+
for await (const event of winner.stream) {
|
|
3345
|
+
emit(event);
|
|
3346
|
+
}
|
|
3347
|
+
} catch {
|
|
3348
|
+
emit({
|
|
3349
|
+
type: "error",
|
|
3350
|
+
error: new ElsiumError({
|
|
3351
|
+
code: "PROVIDER_ERROR",
|
|
3352
|
+
message: "All providers failed during streaming",
|
|
3353
|
+
retryable: false
|
|
3354
|
+
})
|
|
3355
|
+
});
|
|
3356
|
+
}
|
|
3357
|
+
});
|
|
3358
|
+
}
|
|
3359
|
+
function streamCapabilityAware(request) {
|
|
3360
|
+
const capabilities = detectRequiredCapabilities(request);
|
|
3361
|
+
const capable = filterCapableProviders(capabilities);
|
|
3362
|
+
if (capable.length === 0) {
|
|
3363
|
+
return streamWithFallback(sortedProviders, request);
|
|
3364
|
+
}
|
|
3365
|
+
return streamWithFallback(capable, request);
|
|
3366
|
+
}
|
|
3157
3367
|
return {
|
|
3158
3368
|
providers: sortedProviders.map((p) => p.name),
|
|
3159
3369
|
strategy: config.strategy,
|
|
@@ -3172,25 +3382,18 @@ function createProviderMesh(config) {
|
|
|
3172
3382
|
}
|
|
3173
3383
|
},
|
|
3174
3384
|
stream(request) {
|
|
3175
|
-
|
|
3176
|
-
|
|
3177
|
-
|
|
3178
|
-
|
|
3179
|
-
|
|
3180
|
-
|
|
3181
|
-
|
|
3182
|
-
|
|
3183
|
-
|
|
3184
|
-
|
|
3185
|
-
|
|
3186
|
-
message: "Circuit breaker is open",
|
|
3187
|
-
retryable: true
|
|
3188
|
-
});
|
|
3189
|
-
return new ElsiumStream(async function* () {
|
|
3190
|
-
yield { type: "error", error: err2 };
|
|
3191
|
-
}());
|
|
3385
|
+
switch (config.strategy) {
|
|
3386
|
+
case "fallback":
|
|
3387
|
+
return streamWithFallback(sortedProviders, request);
|
|
3388
|
+
case "cost-optimized":
|
|
3389
|
+
return streamCostOptimized(request);
|
|
3390
|
+
case "latency-optimized":
|
|
3391
|
+
return streamLatencyOptimized(request);
|
|
3392
|
+
case "capability-aware":
|
|
3393
|
+
return streamCapabilityAware(request);
|
|
3394
|
+
default:
|
|
3395
|
+
return streamWithFallback(sortedProviders, request);
|
|
3192
3396
|
}
|
|
3193
|
-
return resolvedStream;
|
|
3194
3397
|
}
|
|
3195
3398
|
};
|
|
3196
3399
|
}
|
|
@@ -3213,6 +3416,7 @@ export {
|
|
|
3213
3416
|
detectJailbreak,
|
|
3214
3417
|
createProviderMesh,
|
|
3215
3418
|
createOpenAIProvider,
|
|
3419
|
+
createOpenAICompatibleProvider,
|
|
3216
3420
|
createInMemoryCache,
|
|
3217
3421
|
createGoogleProvider,
|
|
3218
3422
|
createBulkhead,
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { ProviderConfig } from '@elsium-ai/core';
|
|
2
|
+
import type { LLMProvider } from '../provider';
|
|
3
|
+
export interface OpenAICompatibleConfig extends ProviderConfig {
|
|
4
|
+
baseUrl: string;
|
|
5
|
+
name?: string;
|
|
6
|
+
defaultModel?: string;
|
|
7
|
+
capabilities?: string[];
|
|
8
|
+
}
|
|
9
|
+
export declare function createOpenAICompatibleProvider(config: OpenAICompatibleConfig): LLMProvider;
|
|
10
|
+
//# sourceMappingURL=openai-compatible.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"openai-compatible.d.ts","sourceRoot":"","sources":["../../src/providers/openai-compatible.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAA;AACrD,OAAO,KAAK,EAAE,WAAW,EAAoB,MAAM,aAAa,CAAA;AAGhE,MAAM,WAAW,sBAAuB,SAAQ,cAAc;IAC7D,OAAO,EAAE,MAAM,CAAA;IACf,IAAI,CAAC,EAAE,MAAM,CAAA;IACb,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAA;CACvB;AAED,wBAAgB,8BAA8B,CAAC,MAAM,EAAE,sBAAsB,GAAG,WAAW,CAmC1F"}
|
package/dist/router.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"router.d.ts","sourceRoot":"","sources":["../src/router.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAA;AACrE,OAAO,EAEN,KAAK,oBAAoB,EAEzB,YAAY,
|
|
1
|
+
{"version":3,"file":"router.d.ts","sourceRoot":"","sources":["../src/router.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAA;AACrE,OAAO,EAEN,KAAK,oBAAoB,EAEzB,YAAY,EAGZ,MAAM,iBAAiB,CAAA;AAKxB,MAAM,MAAM,eAAe,GACxB,UAAU,GACV,gBAAgB,GAChB,mBAAmB,GACnB,kBAAkB,CAAA;AAErB,MAAM,WAAW,aAAa;IAC7B,IAAI,EAAE,MAAM,CAAA;IACZ,MAAM,EAAE;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,OAAO,CAAC,EAAE,MAAM,CAAA;KAAE,CAAA;IAC5C,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,YAAY,CAAC,EAAE,MAAM,EAAE,CAAA;CACvB;AAED,MAAM,WAAW,mBAAmB;IACnC,WAAW,EAAE;QAAE,QAAQ,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAA;IAChD,YAAY,EAAE;QAAE,QAAQ,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAA;IACjD,mBAAmB,CAAC,EAAE,MAAM,CAAA;CAC5B;AAED,MAAM,WAAW,eAAe;IAC/B,GAAG,CACF,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAC7B,OAAO,CAAC,EAAE;QAAE,KAAK,CAAC,EAAE,MAAM,CAAC;QAAC,OAAO,CAAC,EAAE,MAAM,CAAA;KAAE,GAC5C,IAAI,CAAA;CACP;AAED,MAAM,WAAW,kBAAkB;IAClC,SAAS,EAAE,aAAa,EAAE,CAAA;IAC1B,QAAQ,EAAE,eAAe,CAAA;IACzB,aAAa,CAAC,EAAE,mBAAmB,CAAA;IACnC,cAAc,CAAC,EAAE,oBAAoB,GAAG,OAAO,CAAA;IAC/C,KAAK,CAAC,EAAE,eAAe,CAAA;CACvB;AAED,MAAM,WAAW,YAAY;IAC5B,QAAQ,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,WAAW,CAAC,CAAA;IAC1D,MAAM,CAAC,OAAO,EAAE,iBAAiB,GAAG,YAAY,CAAA;IAChD,QAAQ,CAAC,SAAS,EAAE,MAAM,EAAE,CAAA;IAC5B,QAAQ,CAAC,QAAQ,EAAE,eAAe,CAAA;CAClC;AAoDD,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,kBAAkB,GAAG,YAAY,CAsb3E"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@elsium-ai/gateway",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.9.0",
|
|
4
4
|
"description": "Multi-provider LLM gateway for ElsiumAI",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "Eric Utrera <ebutrera9103@gmail.com>",
|
|
@@ -26,7 +26,7 @@
|
|
|
26
26
|
"dev": "bun --watch src/index.ts"
|
|
27
27
|
},
|
|
28
28
|
"dependencies": {
|
|
29
|
-
"@elsium-ai/core": "^0.
|
|
29
|
+
"@elsium-ai/core": "^0.9.0",
|
|
30
30
|
"zod": "^3.24.0"
|
|
31
31
|
},
|
|
32
32
|
"devDependencies": {
|