gavio 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/config.js +106 -0
- package/dist/cjs/errors.js +29 -1
- package/dist/cjs/gateway.js +42 -0
- package/dist/cjs/interceptors/audit/index.js +4 -1
- package/dist/cjs/interceptors/audit/interceptor.js +7 -0
- package/dist/cjs/interceptors/audit/trace.js +43 -0
- package/dist/cjs/interceptors/cache/embedding.js +53 -0
- package/dist/cjs/interceptors/cache/index.js +9 -5
- package/dist/cjs/interceptors/cache/interceptor.js +80 -0
- package/dist/cjs/interceptors/cache/vector.js +35 -0
- package/dist/cjs/interceptors/governance/budget.js +45 -0
- package/dist/cjs/interceptors/governance/index.js +10 -0
- package/dist/cjs/interceptors/governance/model-policy.js +18 -0
- package/dist/cjs/interceptors/governance/rate-limit.js +46 -0
- package/dist/cjs/interceptors/guardrails/index.js +11 -0
- package/dist/cjs/interceptors/guardrails/interceptor.js +40 -0
- package/dist/cjs/interceptors/guardrails/validator.js +8 -0
- package/dist/cjs/interceptors/guardrails/validators/regex.js +32 -0
- package/dist/cjs/interceptors/guardrails/validators/schema.js +63 -0
- package/dist/cjs/interceptors/injection.js +62 -0
- package/dist/cjs/interceptors/reliability/circuit-breaker.js +82 -0
- package/dist/cjs/interceptors/reliability/index.js +6 -1
- package/dist/cjs/interceptors/reliability/load-balancer.js +38 -0
- package/dist/cjs/pricing.js +5 -1
- package/dist/cjs/providers/azure-openai.js +56 -0
- package/dist/cjs/providers/gemini.js +73 -0
- package/dist/cjs/providers/index.js +22 -6
- package/dist/cjs/providers/ollama.js +41 -0
- package/dist/cjs/shim/openai.js +57 -0
- package/dist/esm/config.d.ts +12 -0
- package/dist/esm/config.js +102 -0
- package/dist/esm/errors.d.ts +17 -0
- package/dist/esm/errors.js +24 -0
- package/dist/esm/gateway.d.ts +5 -0
- package/dist/esm/gateway.js +9 -0
- package/dist/esm/interceptors/audit/index.d.ts +2 -0
- package/dist/esm/interceptors/audit/index.js +1 -0
- package/dist/esm/interceptors/audit/interceptor.d.ts +2 -0
- package/dist/esm/interceptors/audit/interceptor.js +7 -0
- package/dist/esm/interceptors/audit/trace.d.ts +19 -0
- package/dist/esm/interceptors/audit/trace.js +39 -0
- package/dist/esm/interceptors/cache/embedding.d.ts +14 -0
- package/dist/esm/interceptors/cache/embedding.js +49 -0
- package/dist/esm/interceptors/cache/index.d.ts +7 -4
- package/dist/esm/interceptors/cache/index.js +4 -4
- package/dist/esm/interceptors/cache/interceptor.d.ts +19 -0
- package/dist/esm/interceptors/cache/interceptor.js +77 -0
- package/dist/esm/interceptors/cache/vector.d.ts +9 -0
- package/dist/esm/interceptors/cache/vector.js +32 -0
- package/dist/esm/interceptors/governance/budget.d.ts +11 -0
- package/dist/esm/interceptors/governance/budget.js +42 -0
- package/dist/esm/interceptors/governance/index.d.ts +7 -0
- package/dist/esm/interceptors/governance/index.js +4 -0
- package/dist/esm/interceptors/governance/model-policy.d.ts +8 -0
- package/dist/esm/interceptors/governance/model-policy.js +15 -0
- package/dist/esm/interceptors/governance/rate-limit.d.ts +9 -0
- package/dist/esm/interceptors/governance/rate-limit.js +43 -0
- package/dist/esm/interceptors/guardrails/index.d.ts +6 -0
- package/dist/esm/interceptors/guardrails/index.js +4 -0
- package/dist/esm/interceptors/guardrails/interceptor.d.ts +15 -0
- package/dist/esm/interceptors/guardrails/interceptor.js +37 -0
- package/dist/esm/interceptors/guardrails/validator.d.ts +11 -0
- package/dist/esm/interceptors/guardrails/validator.js +3 -0
- package/dist/esm/interceptors/guardrails/validators/regex.d.ts +6 -0
- package/dist/esm/interceptors/guardrails/validators/regex.js +28 -0
- package/dist/esm/interceptors/guardrails/validators/schema.d.ts +5 -0
- package/dist/esm/interceptors/guardrails/validators/schema.js +60 -0
- package/dist/esm/interceptors/injection.d.ts +17 -0
- package/dist/esm/interceptors/injection.js +59 -0
- package/dist/esm/interceptors/reliability/circuit-breaker.d.ts +15 -0
- package/dist/esm/interceptors/reliability/circuit-breaker.js +78 -0
- package/dist/esm/interceptors/reliability/index.d.ts +4 -0
- package/dist/esm/interceptors/reliability/index.js +2 -0
- package/dist/esm/interceptors/reliability/load-balancer.d.ts +8 -0
- package/dist/esm/interceptors/reliability/load-balancer.js +35 -0
- package/dist/esm/pricing.js +5 -1
- package/dist/esm/providers/azure-openai.d.ts +28 -0
- package/dist/esm/providers/azure-openai.js +53 -0
- package/dist/esm/providers/gemini.d.ts +36 -0
- package/dist/esm/providers/gemini.js +69 -0
- package/dist/esm/providers/index.d.ts +7 -1
- package/dist/esm/providers/index.js +18 -5
- package/dist/esm/providers/ollama.d.ts +21 -0
- package/dist/esm/providers/ollama.js +38 -0
- package/dist/esm/shim/openai.d.ts +56 -0
- package/dist/esm/shim/openai.js +53 -0
- package/package.json +31 -2
- package/src/config.ts +125 -0
- package/src/errors.ts +28 -0
- package/src/gateway.ts +10 -0
- package/src/interceptors/audit/index.ts +2 -0
- package/src/interceptors/audit/interceptor.ts +9 -0
- package/src/interceptors/audit/trace.ts +47 -0
- package/src/interceptors/cache/embedding.ts +53 -0
- package/src/interceptors/cache/index.ts +7 -4
- package/src/interceptors/cache/interceptor.ts +111 -0
- package/src/interceptors/cache/vector.ts +45 -0
- package/src/interceptors/governance/budget.ts +59 -0
- package/src/interceptors/governance/index.ts +8 -0
- package/src/interceptors/governance/model-policy.ts +25 -0
- package/src/interceptors/governance/rate-limit.ts +63 -0
- package/src/interceptors/guardrails/index.ts +7 -0
- package/src/interceptors/guardrails/interceptor.ts +56 -0
- package/src/interceptors/guardrails/validator.ts +14 -0
- package/src/interceptors/guardrails/validators/regex.ts +29 -0
- package/src/interceptors/guardrails/validators/schema.ts +62 -0
- package/src/interceptors/injection.ts +72 -0
- package/src/interceptors/reliability/circuit-breaker.ts +102 -0
- package/src/interceptors/reliability/index.ts +4 -0
- package/src/interceptors/reliability/load-balancer.ts +56 -0
- package/src/pricing.ts +5 -1
- package/src/providers/azure-openai.ts +77 -0
- package/src/providers/gemini.ts +95 -0
- package/src/providers/index.ts +21 -5
- package/src/providers/ollama.ts +61 -0
- package/src/shim/openai.ts +76 -0
|
@@ -6,3 +6,7 @@ export { timeoutPolicy, timeout } from './timeout.js'
|
|
|
6
6
|
export type { TimeoutPolicyOptions } from './timeout.js'
|
|
7
7
|
export { fallbackChain } from './fallback.js'
|
|
8
8
|
export type { FallbackChainOptions } from './fallback.js'
|
|
9
|
+
export { circuitBreaker, CircuitState } from './circuit-breaker.js'
|
|
10
|
+
export type { CircuitBreakerOptions } from './circuit-breaker.js'
|
|
11
|
+
export { loadBalancer } from './load-balancer.js'
|
|
12
|
+
export type { LoadBalancerOptions } from './load-balancer.js'
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/** loadBalancer (F-REL-04) — weighted round-robin across provider adapters. */
|
|
2
|
+
|
|
3
|
+
import type { InterceptorContext } from '../../context.js'
|
|
4
|
+
import type { ProviderAdapter } from '../../providers/base.js'
|
|
5
|
+
import type { GavioRequest } from '../../request.js'
|
|
6
|
+
import type { GavioResponse } from '../../response.js'
|
|
7
|
+
import { coerceProvider } from '../../types.js'
|
|
8
|
+
import type { Executor, ExecutorPolicy } from '../base.js'
|
|
9
|
+
|
|
10
|
+
export interface LoadBalancerOptions {
|
|
11
|
+
weights?: number[]
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
class LoadBalancer implements ExecutorPolicy {
|
|
15
|
+
readonly name = 'load_balancer'
|
|
16
|
+
readonly isExecutorPolicy = true as const
|
|
17
|
+
|
|
18
|
+
private readonly pool: ProviderAdapter[]
|
|
19
|
+
private index = 0
|
|
20
|
+
|
|
21
|
+
constructor(adapters: ProviderAdapter[], options: LoadBalancerOptions = {}) {
|
|
22
|
+
if (adapters.length === 0) {
|
|
23
|
+
throw new Error('loadBalancer requires at least one adapter')
|
|
24
|
+
}
|
|
25
|
+
const weights = options.weights ?? adapters.map(() => 1)
|
|
26
|
+
if (weights.length !== adapters.length) {
|
|
27
|
+
throw new Error('weights must match adapters length')
|
|
28
|
+
}
|
|
29
|
+
// Expand by weight, then cycle for round-robin.
|
|
30
|
+
this.pool = []
|
|
31
|
+
adapters.forEach((adapter, i) => {
|
|
32
|
+
for (let k = 0; k < Math.max(1, weights[i]!); k++) this.pool.push(adapter)
|
|
33
|
+
})
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
async around(
|
|
37
|
+
request: GavioRequest,
|
|
38
|
+
ctx: InterceptorContext,
|
|
39
|
+
_callNext: Executor,
|
|
40
|
+
): Promise<GavioResponse> {
|
|
41
|
+
ctx.markFired(this.name)
|
|
42
|
+
const adapter = this.pool[this.index % this.pool.length]!
|
|
43
|
+
this.index += 1
|
|
44
|
+
const rerouted = request.copyWithMessages(request.messages)
|
|
45
|
+
rerouted.provider = coerceProvider(adapter.providerName)
|
|
46
|
+
return adapter.complete(rerouted)
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/** Factory: build a load balancer over a pool of adapters. */
|
|
51
|
+
export function loadBalancer(
|
|
52
|
+
adapters: ProviderAdapter[],
|
|
53
|
+
options: LoadBalancerOptions = {},
|
|
54
|
+
): ExecutorPolicy {
|
|
55
|
+
return new LoadBalancer(adapters, options)
|
|
56
|
+
}
|
package/src/pricing.ts
CHANGED
|
@@ -20,7 +20,11 @@ const DEFAULT_PRICES: Record<string, [number, number]> = {
|
|
|
20
20
|
'claude-sonnet-4-20250514': [0.003, 0.015],
|
|
21
21
|
'claude-haiku-4-5': [0.0008, 0.004],
|
|
22
22
|
'claude-opus-4-1': [0.015, 0.075],
|
|
23
|
-
//
|
|
23
|
+
// Gemini (approximate public pricing; override via config)
|
|
24
|
+
'gemini-2.0-flash': [0.0001, 0.0004],
|
|
25
|
+
'gemini-1.5-flash': [0.000075, 0.0003],
|
|
26
|
+
'gemini-1.5-pro': [0.00125, 0.005],
|
|
27
|
+
// Local (Ollama) / mock are free.
|
|
24
28
|
mock: [0.0, 0.0],
|
|
25
29
|
}
|
|
26
30
|
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
/** azureOpenaiAdapter — Azure OpenAI deployment-based chat completions. */
|
|
2
|
+
|
|
3
|
+
import { ConfigurationError } from '../errors.js'
|
|
4
|
+
import type { PricingProvider } from '../pricing.js'
|
|
5
|
+
import type { GavioRequest } from '../request.js'
|
|
6
|
+
import type { GavioResponse } from '../response.js'
|
|
7
|
+
import { TokenUsage } from '../types.js'
|
|
8
|
+
import { BaseProviderAdapter } from './base.js'
|
|
9
|
+
import { postJson } from './http.js'
|
|
10
|
+
|
|
11
|
+
const DEFAULT_API_VERSION = '2024-06-01'
|
|
12
|
+
|
|
13
|
+
export interface AzureOpenAIAdapterOptions {
|
|
14
|
+
apiKey?: string
|
|
15
|
+
endpoint?: string
|
|
16
|
+
deployment?: string
|
|
17
|
+
apiVersion?: string
|
|
18
|
+
timeoutMs?: number
|
|
19
|
+
pricing?: PricingProvider
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
class AzureOpenAIAdapter extends BaseProviderAdapter {
|
|
23
|
+
private readonly apiKey: string | undefined
|
|
24
|
+
readonly endpoint: string
|
|
25
|
+
private readonly deployment: string | undefined
|
|
26
|
+
private readonly apiVersion: string
|
|
27
|
+
private readonly timeoutSeconds: number
|
|
28
|
+
|
|
29
|
+
constructor(options: AzureOpenAIAdapterOptions = {}) {
|
|
30
|
+
super(options.pricing)
|
|
31
|
+
this.apiKey = options.apiKey ?? process.env['AZURE_OPENAI_API_KEY']
|
|
32
|
+
this.endpoint = (options.endpoint ?? process.env['AZURE_OPENAI_ENDPOINT'] ?? '').replace(
|
|
33
|
+
/\/+$/,
|
|
34
|
+
'',
|
|
35
|
+
)
|
|
36
|
+
this.deployment = options.deployment ?? process.env['AZURE_OPENAI_DEPLOYMENT']
|
|
37
|
+
this.apiVersion = options.apiVersion ?? DEFAULT_API_VERSION
|
|
38
|
+
this.timeoutSeconds = (options.timeoutMs ?? 30_000) / 1000
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
get providerName(): string {
|
|
42
|
+
return 'azure_openai'
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
url(request: GavioRequest): string {
|
|
46
|
+
const deployment = this.deployment ?? request.model
|
|
47
|
+
return `${this.endpoint}/openai/deployments/${deployment}/chat/completions?api-version=${this.apiVersion}`
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
async complete(request: GavioRequest): Promise<GavioResponse> {
|
|
51
|
+
if (!this.apiKey || !this.endpoint) {
|
|
52
|
+
throw new ConfigurationError('AZURE_OPENAI_API_KEY and AZURE_OPENAI_ENDPOINT must be set')
|
|
53
|
+
}
|
|
54
|
+
const started = performance.now()
|
|
55
|
+
const payload = {
|
|
56
|
+
messages: request.messages,
|
|
57
|
+
temperature: request.temperature,
|
|
58
|
+
max_tokens: request.maxTokens,
|
|
59
|
+
}
|
|
60
|
+
const data = await postJson(this.url(request), payload, { 'api-key': this.apiKey }, this.timeoutSeconds)
|
|
61
|
+
const choices = (data['choices'] as Array<Record<string, unknown>>) ?? []
|
|
62
|
+
const message = (choices[0]?.['message'] as Record<string, unknown>) ?? {}
|
|
63
|
+
const content = (message['content'] as string) ?? ''
|
|
64
|
+
const usageData = (data['usage'] as Record<string, number>) ?? {}
|
|
65
|
+
const usage = new TokenUsage(usageData['prompt_tokens'] ?? 0, usageData['completion_tokens'] ?? 0)
|
|
66
|
+
return this.buildResponse(request, content, usage, (data['model'] as string) ?? request.model, started)
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
async healthCheck(): Promise<boolean> {
|
|
70
|
+
return !!(this.apiKey && this.endpoint)
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/** Factory: build an Azure OpenAI provider adapter. */
|
|
75
|
+
export function azureOpenaiAdapter(options: AzureOpenAIAdapterOptions = {}): AzureOpenAIAdapter {
|
|
76
|
+
return new AzureOpenAIAdapter(options)
|
|
77
|
+
}
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
/** geminiAdapter — Google Generative Language API (generateContent). */
|
|
2
|
+
|
|
3
|
+
import { ConfigurationError } from '../errors.js'
|
|
4
|
+
import type { PricingProvider } from '../pricing.js'
|
|
5
|
+
import type { GavioRequest } from '../request.js'
|
|
6
|
+
import type { GavioResponse } from '../response.js'
|
|
7
|
+
import { TokenUsage, type Message } from '../types.js'
|
|
8
|
+
import { BaseProviderAdapter } from './base.js'
|
|
9
|
+
import { postJson } from './http.js'
|
|
10
|
+
|
|
11
|
+
const DEFAULT_BASE_URL = 'https://generativelanguage.googleapis.com/v1beta'
|
|
12
|
+
|
|
13
|
+
export interface GeminiAdapterOptions {
|
|
14
|
+
apiKey?: string
|
|
15
|
+
baseUrl?: string
|
|
16
|
+
timeoutMs?: number
|
|
17
|
+
pricing?: PricingProvider
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
interface GeminiContent {
|
|
21
|
+
role: string
|
|
22
|
+
parts: { text: string }[]
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/** Map Gavio messages to Gemini contents + a system instruction. */
|
|
26
|
+
export function geminiToContents(messages: Message[]): {
|
|
27
|
+
system: string | null
|
|
28
|
+
contents: GeminiContent[]
|
|
29
|
+
} {
|
|
30
|
+
let system: string | null = null
|
|
31
|
+
const contents: GeminiContent[] = []
|
|
32
|
+
for (const m of messages) {
|
|
33
|
+
const text = m.content
|
|
34
|
+
if (m.role === 'system') {
|
|
35
|
+
system = system ? `${system}\n${text}` : text
|
|
36
|
+
continue
|
|
37
|
+
}
|
|
38
|
+
contents.push({ role: m.role === 'assistant' ? 'model' : 'user', parts: [{ text }] })
|
|
39
|
+
}
|
|
40
|
+
return { system, contents }
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
class GeminiAdapter extends BaseProviderAdapter {
|
|
44
|
+
private readonly apiKey: string | undefined
|
|
45
|
+
private readonly baseUrl: string
|
|
46
|
+
private readonly timeoutSeconds: number
|
|
47
|
+
|
|
48
|
+
constructor(options: GeminiAdapterOptions = {}) {
|
|
49
|
+
super(options.pricing)
|
|
50
|
+
this.apiKey =
|
|
51
|
+
options.apiKey ?? process.env['GEMINI_API_KEY'] ?? process.env['GOOGLE_API_KEY']
|
|
52
|
+
this.baseUrl = (options.baseUrl ?? DEFAULT_BASE_URL).replace(/\/+$/, '')
|
|
53
|
+
this.timeoutSeconds = (options.timeoutMs ?? 30_000) / 1000
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
get providerName(): string {
|
|
57
|
+
return 'gemini'
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
private payload(request: GavioRequest): Record<string, unknown> {
|
|
61
|
+
const { system, contents } = geminiToContents(request.messages)
|
|
62
|
+
const payload: Record<string, unknown> = {
|
|
63
|
+
contents,
|
|
64
|
+
generationConfig: {
|
|
65
|
+
temperature: request.temperature,
|
|
66
|
+
maxOutputTokens: request.maxTokens,
|
|
67
|
+
},
|
|
68
|
+
}
|
|
69
|
+
if (system) payload['systemInstruction'] = { parts: [{ text: system }] }
|
|
70
|
+
return payload
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
async complete(request: GavioRequest): Promise<GavioResponse> {
|
|
74
|
+
if (!this.apiKey) throw new ConfigurationError('GEMINI_API_KEY not set')
|
|
75
|
+
const started = performance.now()
|
|
76
|
+
const url = `${this.baseUrl}/models/${request.model}:generateContent?key=${this.apiKey}`
|
|
77
|
+
const data = await postJson(url, this.payload(request), {}, this.timeoutSeconds)
|
|
78
|
+
const candidates = (data['candidates'] as Array<Record<string, unknown>>) ?? [{}]
|
|
79
|
+
const contentObj = (candidates[0]?.['content'] as Record<string, unknown>) ?? {}
|
|
80
|
+
const parts = (contentObj['parts'] as Array<{ text?: string }>) ?? []
|
|
81
|
+
const content = parts.map((p) => p.text ?? '').join('')
|
|
82
|
+
const um = (data['usageMetadata'] as Record<string, number>) ?? {}
|
|
83
|
+
const usage = new TokenUsage(um['promptTokenCount'] ?? 0, um['candidatesTokenCount'] ?? 0)
|
|
84
|
+
return this.buildResponse(request, content, usage, request.model, started)
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
async healthCheck(): Promise<boolean> {
|
|
88
|
+
return !!this.apiKey
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/** Factory: build a Gemini provider adapter. */
|
|
93
|
+
export function geminiAdapter(options: GeminiAdapterOptions = {}): GeminiAdapter {
|
|
94
|
+
return new GeminiAdapter(options)
|
|
95
|
+
}
|
package/src/providers/index.ts
CHANGED
|
@@ -4,8 +4,11 @@ import { ConfigurationError } from '../errors.js'
|
|
|
4
4
|
import type { PricingProvider } from '../pricing.js'
|
|
5
5
|
import { Provider, coerceProvider } from '../types.js'
|
|
6
6
|
import { anthropicAdapter } from './anthropic.js'
|
|
7
|
+
import { azureOpenaiAdapter } from './azure-openai.js'
|
|
7
8
|
import type { ProviderAdapter } from './base.js'
|
|
9
|
+
import { geminiAdapter } from './gemini.js'
|
|
8
10
|
import { mockProvider } from './mock.js'
|
|
11
|
+
import { ollamaAdapter } from './ollama.js'
|
|
9
12
|
import { openaiAdapter } from './openai.js'
|
|
10
13
|
|
|
11
14
|
export type { ProviderAdapter } from './base.js'
|
|
@@ -16,24 +19,37 @@ export { openaiAdapter } from './openai.js'
|
|
|
16
19
|
export type { OpenAIAdapterOptions } from './openai.js'
|
|
17
20
|
export { anthropicAdapter } from './anthropic.js'
|
|
18
21
|
export type { AnthropicAdapterOptions } from './anthropic.js'
|
|
22
|
+
export { geminiAdapter } from './gemini.js'
|
|
23
|
+
export type { GeminiAdapterOptions } from './gemini.js'
|
|
24
|
+
export { azureOpenaiAdapter } from './azure-openai.js'
|
|
25
|
+
export type { AzureOpenAIAdapterOptions } from './azure-openai.js'
|
|
26
|
+
export { ollamaAdapter } from './ollama.js'
|
|
27
|
+
export type { OllamaAdapterOptions } from './ollama.js'
|
|
19
28
|
export { Provider } from '../types.js'
|
|
20
29
|
|
|
21
|
-
/** Instantiate the default adapter for a provider id.
|
|
30
|
+
/** Instantiate the default adapter for a provider id. */
|
|
22
31
|
export function buildAdapter(
|
|
23
32
|
provider: Provider | string,
|
|
24
33
|
pricing?: PricingProvider,
|
|
25
34
|
): ProviderAdapter {
|
|
26
35
|
const p = coerceProvider(provider)
|
|
36
|
+
const opts = pricing ? { pricing } : {}
|
|
27
37
|
switch (p) {
|
|
28
38
|
case Provider.OPENAI:
|
|
29
|
-
return openaiAdapter(
|
|
39
|
+
return openaiAdapter(opts)
|
|
30
40
|
case Provider.ANTHROPIC:
|
|
31
|
-
return anthropicAdapter(
|
|
41
|
+
return anthropicAdapter(opts)
|
|
42
|
+
case Provider.GEMINI:
|
|
43
|
+
return geminiAdapter(opts)
|
|
44
|
+
case Provider.AZURE_OPENAI:
|
|
45
|
+
return azureOpenaiAdapter(opts)
|
|
46
|
+
case Provider.OLLAMA:
|
|
47
|
+
return ollamaAdapter(opts)
|
|
32
48
|
case Provider.MOCK:
|
|
33
|
-
return mockProvider(
|
|
49
|
+
return mockProvider(opts)
|
|
34
50
|
default:
|
|
35
51
|
throw new ConfigurationError(
|
|
36
|
-
`Provider '${p}' is not available
|
|
52
|
+
`Provider '${p}' is not available (v0.3.0 adds bedrock, cohere)`,
|
|
37
53
|
)
|
|
38
54
|
}
|
|
39
55
|
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/** ollamaAdapter — local models via the Ollama chat API. */
|
|
2
|
+
|
|
3
|
+
import type { PricingProvider } from '../pricing.js'
|
|
4
|
+
import type { GavioRequest } from '../request.js'
|
|
5
|
+
import type { GavioResponse } from '../response.js'
|
|
6
|
+
import { TokenUsage } from '../types.js'
|
|
7
|
+
import { BaseProviderAdapter } from './base.js'
|
|
8
|
+
import { postJson } from './http.js'
|
|
9
|
+
|
|
10
|
+
const DEFAULT_BASE_URL = 'http://localhost:11434'
|
|
11
|
+
|
|
12
|
+
export interface OllamaAdapterOptions {
|
|
13
|
+
baseUrl?: string
|
|
14
|
+
timeoutMs?: number
|
|
15
|
+
pricing?: PricingProvider
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
class OllamaAdapter extends BaseProviderAdapter {
|
|
19
|
+
private readonly baseUrl: string
|
|
20
|
+
private readonly timeoutSeconds: number
|
|
21
|
+
|
|
22
|
+
constructor(options: OllamaAdapterOptions = {}) {
|
|
23
|
+
super(options.pricing)
|
|
24
|
+
this.baseUrl = (options.baseUrl ?? process.env['OLLAMA_HOST'] ?? DEFAULT_BASE_URL).replace(
|
|
25
|
+
/\/+$/,
|
|
26
|
+
'',
|
|
27
|
+
)
|
|
28
|
+
this.timeoutSeconds = (options.timeoutMs ?? 60_000) / 1000
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
get providerName(): string {
|
|
32
|
+
return 'ollama'
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
async complete(request: GavioRequest): Promise<GavioResponse> {
|
|
36
|
+
const started = performance.now()
|
|
37
|
+
const payload = {
|
|
38
|
+
model: request.model,
|
|
39
|
+
messages: request.messages,
|
|
40
|
+
stream: false,
|
|
41
|
+
options: { temperature: request.temperature },
|
|
42
|
+
}
|
|
43
|
+
const data = await postJson(`${this.baseUrl}/api/chat`, payload, {}, this.timeoutSeconds)
|
|
44
|
+
const message = (data['message'] as Record<string, unknown>) ?? {}
|
|
45
|
+
const content = (message['content'] as string) ?? ''
|
|
46
|
+
const usage = new TokenUsage(
|
|
47
|
+
(data['prompt_eval_count'] as number) ?? 0,
|
|
48
|
+
(data['eval_count'] as number) ?? 0,
|
|
49
|
+
)
|
|
50
|
+
return this.buildResponse(request, content, usage, (data['model'] as string) ?? request.model, started)
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
async healthCheck(): Promise<boolean> {
|
|
54
|
+
return true
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/** Factory: build an Ollama provider adapter. */
|
|
59
|
+
export function ollamaAdapter(options: OllamaAdapterOptions = {}): OllamaAdapter {
|
|
60
|
+
return new OllamaAdapter(options)
|
|
61
|
+
}
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenAI drop-in shim (F-DX-04) — point existing OpenAI SDK code at Gavio.
|
|
3
|
+
*
|
|
4
|
+
* import { Gateway } from 'gavio'
|
|
5
|
+
* import { GavioOpenAI } from 'gavio/shim/openai'
|
|
6
|
+
*
|
|
7
|
+
* const client = new GavioOpenAI(new Gateway({ provider: 'openai', model: 'gpt-4o' }))
|
|
8
|
+
* const resp = await client.chat.completions.create({
|
|
9
|
+
* model: 'gpt-4o', messages: [{ role: 'user', content: 'hi' }],
|
|
10
|
+
* })
|
|
11
|
+
* console.log(resp.choices[0].message.content)
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import type { Gateway } from '../gateway.js'
|
|
15
|
+
import type { GavioResponse } from '../response.js'
|
|
16
|
+
import type { Message } from '../types.js'
|
|
17
|
+
|
|
18
|
+
export interface ChatCompletion {
|
|
19
|
+
id: string
|
|
20
|
+
object: 'chat.completion'
|
|
21
|
+
model: string
|
|
22
|
+
choices: { index: number; message: { role: string; content: string }; finish_reason: string }[]
|
|
23
|
+
usage: { prompt_tokens: number; completion_tokens: number; total_tokens: number }
|
|
24
|
+
gavio: { costUsd: number; cacheHit: boolean; interceptorsFired: string[] }
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export interface CreateParams {
|
|
28
|
+
messages: Message[]
|
|
29
|
+
model?: string
|
|
30
|
+
temperature?: number
|
|
31
|
+
// eslint-disable-next-line @typescript-eslint/naming-convention
|
|
32
|
+
max_tokens?: number
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function toCompletion(resp: GavioResponse): ChatCompletion {
|
|
36
|
+
return {
|
|
37
|
+
id: resp.traceId,
|
|
38
|
+
object: 'chat.completion',
|
|
39
|
+
model: resp.modelVersion || resp.model,
|
|
40
|
+
choices: [
|
|
41
|
+
{ index: 0, message: { role: 'assistant', content: resp.content }, finish_reason: 'stop' },
|
|
42
|
+
],
|
|
43
|
+
usage: {
|
|
44
|
+
prompt_tokens: resp.usage.promptTokens,
|
|
45
|
+
completion_tokens: resp.usage.completionTokens,
|
|
46
|
+
total_tokens: resp.usage.totalTokens,
|
|
47
|
+
},
|
|
48
|
+
gavio: {
|
|
49
|
+
costUsd: resp.costUsd,
|
|
50
|
+
cacheHit: resp.cacheHit,
|
|
51
|
+
interceptorsFired: resp.interceptorsFired,
|
|
52
|
+
},
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
class Completions {
|
|
57
|
+
constructor(private readonly gw: Gateway) {}
|
|
58
|
+
|
|
59
|
+
async create(params: CreateParams): Promise<ChatCompletion> {
|
|
60
|
+
const resp = await this.gw.complete({
|
|
61
|
+
messages: params.messages,
|
|
62
|
+
model: params.model,
|
|
63
|
+
options: { temperature: params.temperature ?? 0.7, maxTokens: params.max_tokens ?? 1024 },
|
|
64
|
+
})
|
|
65
|
+
return toCompletion(resp)
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/** OpenAI-client-shaped facade over a Gavio Gateway. */
|
|
70
|
+
export class GavioOpenAI {
|
|
71
|
+
readonly chat: { completions: Completions }
|
|
72
|
+
|
|
73
|
+
constructor(gateway: Gateway) {
|
|
74
|
+
this.chat = { completions: new Completions(gateway) }
|
|
75
|
+
}
|
|
76
|
+
}
|