gavio 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/config.js +106 -0
- package/dist/cjs/errors.js +29 -1
- package/dist/cjs/gateway.js +42 -0
- package/dist/cjs/interceptors/audit/index.js +4 -1
- package/dist/cjs/interceptors/audit/interceptor.js +7 -0
- package/dist/cjs/interceptors/audit/trace.js +43 -0
- package/dist/cjs/interceptors/cache/embedding.js +53 -0
- package/dist/cjs/interceptors/cache/index.js +9 -5
- package/dist/cjs/interceptors/cache/interceptor.js +80 -0
- package/dist/cjs/interceptors/cache/vector.js +35 -0
- package/dist/cjs/interceptors/governance/budget.js +45 -0
- package/dist/cjs/interceptors/governance/index.js +10 -0
- package/dist/cjs/interceptors/governance/model-policy.js +18 -0
- package/dist/cjs/interceptors/governance/rate-limit.js +46 -0
- package/dist/cjs/interceptors/guardrails/index.js +11 -0
- package/dist/cjs/interceptors/guardrails/interceptor.js +40 -0
- package/dist/cjs/interceptors/guardrails/validator.js +8 -0
- package/dist/cjs/interceptors/guardrails/validators/regex.js +32 -0
- package/dist/cjs/interceptors/guardrails/validators/schema.js +63 -0
- package/dist/cjs/interceptors/injection.js +62 -0
- package/dist/cjs/interceptors/reliability/circuit-breaker.js +82 -0
- package/dist/cjs/interceptors/reliability/index.js +6 -1
- package/dist/cjs/interceptors/reliability/load-balancer.js +38 -0
- package/dist/cjs/pricing.js +5 -1
- package/dist/cjs/providers/azure-openai.js +56 -0
- package/dist/cjs/providers/gemini.js +73 -0
- package/dist/cjs/providers/index.js +22 -6
- package/dist/cjs/providers/ollama.js +41 -0
- package/dist/cjs/shim/openai.js +57 -0
- package/dist/esm/config.d.ts +12 -0
- package/dist/esm/config.js +102 -0
- package/dist/esm/errors.d.ts +17 -0
- package/dist/esm/errors.js +24 -0
- package/dist/esm/gateway.d.ts +5 -0
- package/dist/esm/gateway.js +9 -0
- package/dist/esm/interceptors/audit/index.d.ts +2 -0
- package/dist/esm/interceptors/audit/index.js +1 -0
- package/dist/esm/interceptors/audit/interceptor.d.ts +2 -0
- package/dist/esm/interceptors/audit/interceptor.js +7 -0
- package/dist/esm/interceptors/audit/trace.d.ts +19 -0
- package/dist/esm/interceptors/audit/trace.js +39 -0
- package/dist/esm/interceptors/cache/embedding.d.ts +14 -0
- package/dist/esm/interceptors/cache/embedding.js +49 -0
- package/dist/esm/interceptors/cache/index.d.ts +7 -4
- package/dist/esm/interceptors/cache/index.js +4 -4
- package/dist/esm/interceptors/cache/interceptor.d.ts +19 -0
- package/dist/esm/interceptors/cache/interceptor.js +77 -0
- package/dist/esm/interceptors/cache/vector.d.ts +9 -0
- package/dist/esm/interceptors/cache/vector.js +32 -0
- package/dist/esm/interceptors/governance/budget.d.ts +11 -0
- package/dist/esm/interceptors/governance/budget.js +42 -0
- package/dist/esm/interceptors/governance/index.d.ts +7 -0
- package/dist/esm/interceptors/governance/index.js +4 -0
- package/dist/esm/interceptors/governance/model-policy.d.ts +8 -0
- package/dist/esm/interceptors/governance/model-policy.js +15 -0
- package/dist/esm/interceptors/governance/rate-limit.d.ts +9 -0
- package/dist/esm/interceptors/governance/rate-limit.js +43 -0
- package/dist/esm/interceptors/guardrails/index.d.ts +6 -0
- package/dist/esm/interceptors/guardrails/index.js +4 -0
- package/dist/esm/interceptors/guardrails/interceptor.d.ts +15 -0
- package/dist/esm/interceptors/guardrails/interceptor.js +37 -0
- package/dist/esm/interceptors/guardrails/validator.d.ts +11 -0
- package/dist/esm/interceptors/guardrails/validator.js +3 -0
- package/dist/esm/interceptors/guardrails/validators/regex.d.ts +6 -0
- package/dist/esm/interceptors/guardrails/validators/regex.js +28 -0
- package/dist/esm/interceptors/guardrails/validators/schema.d.ts +5 -0
- package/dist/esm/interceptors/guardrails/validators/schema.js +60 -0
- package/dist/esm/interceptors/injection.d.ts +17 -0
- package/dist/esm/interceptors/injection.js +59 -0
- package/dist/esm/interceptors/reliability/circuit-breaker.d.ts +15 -0
- package/dist/esm/interceptors/reliability/circuit-breaker.js +78 -0
- package/dist/esm/interceptors/reliability/index.d.ts +4 -0
- package/dist/esm/interceptors/reliability/index.js +2 -0
- package/dist/esm/interceptors/reliability/load-balancer.d.ts +8 -0
- package/dist/esm/interceptors/reliability/load-balancer.js +35 -0
- package/dist/esm/pricing.js +5 -1
- package/dist/esm/providers/azure-openai.d.ts +28 -0
- package/dist/esm/providers/azure-openai.js +53 -0
- package/dist/esm/providers/gemini.d.ts +36 -0
- package/dist/esm/providers/gemini.js +69 -0
- package/dist/esm/providers/index.d.ts +7 -1
- package/dist/esm/providers/index.js +18 -5
- package/dist/esm/providers/ollama.d.ts +21 -0
- package/dist/esm/providers/ollama.js +38 -0
- package/dist/esm/shim/openai.d.ts +56 -0
- package/dist/esm/shim/openai.js +53 -0
- package/package.json +31 -2
- package/src/config.ts +125 -0
- package/src/errors.ts +28 -0
- package/src/gateway.ts +10 -0
- package/src/interceptors/audit/index.ts +2 -0
- package/src/interceptors/audit/interceptor.ts +9 -0
- package/src/interceptors/audit/trace.ts +47 -0
- package/src/interceptors/cache/embedding.ts +53 -0
- package/src/interceptors/cache/index.ts +7 -4
- package/src/interceptors/cache/interceptor.ts +111 -0
- package/src/interceptors/cache/vector.ts +45 -0
- package/src/interceptors/governance/budget.ts +59 -0
- package/src/interceptors/governance/index.ts +8 -0
- package/src/interceptors/governance/model-policy.ts +25 -0
- package/src/interceptors/governance/rate-limit.ts +63 -0
- package/src/interceptors/guardrails/index.ts +7 -0
- package/src/interceptors/guardrails/interceptor.ts +56 -0
- package/src/interceptors/guardrails/validator.ts +14 -0
- package/src/interceptors/guardrails/validators/regex.ts +29 -0
- package/src/interceptors/guardrails/validators/schema.ts +62 -0
- package/src/interceptors/injection.ts +72 -0
- package/src/interceptors/reliability/circuit-breaker.ts +102 -0
- package/src/interceptors/reliability/index.ts +4 -0
- package/src/interceptors/reliability/load-balancer.ts +56 -0
- package/src/pricing.ts +5 -1
- package/src/providers/azure-openai.ts +77 -0
- package/src/providers/gemini.ts +95 -0
- package/src/providers/index.ts +21 -5
- package/src/providers/ollama.ts +61 -0
- package/src/shim/openai.ts +76 -0
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
/** geminiAdapter — Google Generative Language API (generateContent). */
|
|
2
|
+
import { ConfigurationError } from '../errors.js';
|
|
3
|
+
import { TokenUsage } from '../types.js';
|
|
4
|
+
import { BaseProviderAdapter } from './base.js';
|
|
5
|
+
import { postJson } from './http.js';
|
|
6
|
+
const DEFAULT_BASE_URL = 'https://generativelanguage.googleapis.com/v1beta';
|
|
7
|
+
/** Map Gavio messages to Gemini contents + a system instruction. */
|
|
8
|
+
export function geminiToContents(messages) {
|
|
9
|
+
let system = null;
|
|
10
|
+
const contents = [];
|
|
11
|
+
for (const m of messages) {
|
|
12
|
+
const text = m.content;
|
|
13
|
+
if (m.role === 'system') {
|
|
14
|
+
system = system ? `${system}\n${text}` : text;
|
|
15
|
+
continue;
|
|
16
|
+
}
|
|
17
|
+
contents.push({ role: m.role === 'assistant' ? 'model' : 'user', parts: [{ text }] });
|
|
18
|
+
}
|
|
19
|
+
return { system, contents };
|
|
20
|
+
}
|
|
21
|
+
class GeminiAdapter extends BaseProviderAdapter {
|
|
22
|
+
apiKey;
|
|
23
|
+
baseUrl;
|
|
24
|
+
timeoutSeconds;
|
|
25
|
+
constructor(options = {}) {
|
|
26
|
+
super(options.pricing);
|
|
27
|
+
this.apiKey =
|
|
28
|
+
options.apiKey ?? process.env['GEMINI_API_KEY'] ?? process.env['GOOGLE_API_KEY'];
|
|
29
|
+
this.baseUrl = (options.baseUrl ?? DEFAULT_BASE_URL).replace(/\/+$/, '');
|
|
30
|
+
this.timeoutSeconds = (options.timeoutMs ?? 30_000) / 1000;
|
|
31
|
+
}
|
|
32
|
+
get providerName() {
|
|
33
|
+
return 'gemini';
|
|
34
|
+
}
|
|
35
|
+
payload(request) {
|
|
36
|
+
const { system, contents } = geminiToContents(request.messages);
|
|
37
|
+
const payload = {
|
|
38
|
+
contents,
|
|
39
|
+
generationConfig: {
|
|
40
|
+
temperature: request.temperature,
|
|
41
|
+
maxOutputTokens: request.maxTokens,
|
|
42
|
+
},
|
|
43
|
+
};
|
|
44
|
+
if (system)
|
|
45
|
+
payload['systemInstruction'] = { parts: [{ text: system }] };
|
|
46
|
+
return payload;
|
|
47
|
+
}
|
|
48
|
+
async complete(request) {
|
|
49
|
+
if (!this.apiKey)
|
|
50
|
+
throw new ConfigurationError('GEMINI_API_KEY not set');
|
|
51
|
+
const started = performance.now();
|
|
52
|
+
const url = `${this.baseUrl}/models/${request.model}:generateContent?key=${this.apiKey}`;
|
|
53
|
+
const data = await postJson(url, this.payload(request), {}, this.timeoutSeconds);
|
|
54
|
+
const candidates = data['candidates'] ?? [{}];
|
|
55
|
+
const contentObj = candidates[0]?.['content'] ?? {};
|
|
56
|
+
const parts = contentObj['parts'] ?? [];
|
|
57
|
+
const content = parts.map((p) => p.text ?? '').join('');
|
|
58
|
+
const um = data['usageMetadata'] ?? {};
|
|
59
|
+
const usage = new TokenUsage(um['promptTokenCount'] ?? 0, um['candidatesTokenCount'] ?? 0);
|
|
60
|
+
return this.buildResponse(request, content, usage, request.model, started);
|
|
61
|
+
}
|
|
62
|
+
async healthCheck() {
|
|
63
|
+
return !!this.apiKey;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
/** Factory: build a Gemini provider adapter. */
|
|
67
|
+
export function geminiAdapter(options = {}) {
|
|
68
|
+
return new GeminiAdapter(options);
|
|
69
|
+
}
|
|
@@ -10,6 +10,12 @@ export { openaiAdapter } from './openai.js';
|
|
|
10
10
|
export type { OpenAIAdapterOptions } from './openai.js';
|
|
11
11
|
export { anthropicAdapter } from './anthropic.js';
|
|
12
12
|
export type { AnthropicAdapterOptions } from './anthropic.js';
|
|
13
|
+
export { geminiAdapter } from './gemini.js';
|
|
14
|
+
export type { GeminiAdapterOptions } from './gemini.js';
|
|
15
|
+
export { azureOpenaiAdapter } from './azure-openai.js';
|
|
16
|
+
export type { AzureOpenAIAdapterOptions } from './azure-openai.js';
|
|
17
|
+
export { ollamaAdapter } from './ollama.js';
|
|
18
|
+
export type { OllamaAdapterOptions } from './ollama.js';
|
|
13
19
|
export { Provider } from '../types.js';
|
|
14
|
-
/** Instantiate the default adapter for a provider id.
|
|
20
|
+
/** Instantiate the default adapter for a provider id. */
|
|
15
21
|
export declare function buildAdapter(provider: Provider | string, pricing?: PricingProvider): ProviderAdapter;
|
|
@@ -2,24 +2,37 @@
|
|
|
2
2
|
import { ConfigurationError } from '../errors.js';
|
|
3
3
|
import { Provider, coerceProvider } from '../types.js';
|
|
4
4
|
import { anthropicAdapter } from './anthropic.js';
|
|
5
|
+
import { azureOpenaiAdapter } from './azure-openai.js';
|
|
6
|
+
import { geminiAdapter } from './gemini.js';
|
|
5
7
|
import { mockProvider } from './mock.js';
|
|
8
|
+
import { ollamaAdapter } from './ollama.js';
|
|
6
9
|
import { openaiAdapter } from './openai.js';
|
|
7
10
|
export { BaseProviderAdapter } from './base.js';
|
|
8
11
|
export { mockProvider } from './mock.js';
|
|
9
12
|
export { openaiAdapter } from './openai.js';
|
|
10
13
|
export { anthropicAdapter } from './anthropic.js';
|
|
14
|
+
export { geminiAdapter } from './gemini.js';
|
|
15
|
+
export { azureOpenaiAdapter } from './azure-openai.js';
|
|
16
|
+
export { ollamaAdapter } from './ollama.js';
|
|
11
17
|
export { Provider } from '../types.js';
|
|
12
|
-
/** Instantiate the default adapter for a provider id.
|
|
18
|
+
/** Instantiate the default adapter for a provider id. */
|
|
13
19
|
export function buildAdapter(provider, pricing) {
|
|
14
20
|
const p = coerceProvider(provider);
|
|
21
|
+
const opts = pricing ? { pricing } : {};
|
|
15
22
|
switch (p) {
|
|
16
23
|
case Provider.OPENAI:
|
|
17
|
-
return openaiAdapter(
|
|
24
|
+
return openaiAdapter(opts);
|
|
18
25
|
case Provider.ANTHROPIC:
|
|
19
|
-
return anthropicAdapter(
|
|
26
|
+
return anthropicAdapter(opts);
|
|
27
|
+
case Provider.GEMINI:
|
|
28
|
+
return geminiAdapter(opts);
|
|
29
|
+
case Provider.AZURE_OPENAI:
|
|
30
|
+
return azureOpenaiAdapter(opts);
|
|
31
|
+
case Provider.OLLAMA:
|
|
32
|
+
return ollamaAdapter(opts);
|
|
20
33
|
case Provider.MOCK:
|
|
21
|
-
return mockProvider(
|
|
34
|
+
return mockProvider(opts);
|
|
22
35
|
default:
|
|
23
|
-
throw new ConfigurationError(`Provider '${p}' is not available
|
|
36
|
+
throw new ConfigurationError(`Provider '${p}' is not available (v0.3.0 adds bedrock, cohere)`);
|
|
24
37
|
}
|
|
25
38
|
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/** ollamaAdapter — local models via the Ollama chat API. */
|
|
2
|
+
import type { PricingProvider } from '../pricing.js';
|
|
3
|
+
import type { GavioRequest } from '../request.js';
|
|
4
|
+
import type { GavioResponse } from '../response.js';
|
|
5
|
+
import { BaseProviderAdapter } from './base.js';
|
|
6
|
+
export interface OllamaAdapterOptions {
|
|
7
|
+
baseUrl?: string;
|
|
8
|
+
timeoutMs?: number;
|
|
9
|
+
pricing?: PricingProvider;
|
|
10
|
+
}
|
|
11
|
+
declare class OllamaAdapter extends BaseProviderAdapter {
|
|
12
|
+
private readonly baseUrl;
|
|
13
|
+
private readonly timeoutSeconds;
|
|
14
|
+
constructor(options?: OllamaAdapterOptions);
|
|
15
|
+
get providerName(): string;
|
|
16
|
+
complete(request: GavioRequest): Promise<GavioResponse>;
|
|
17
|
+
healthCheck(): Promise<boolean>;
|
|
18
|
+
}
|
|
19
|
+
/** Factory: build an Ollama provider adapter. */
|
|
20
|
+
export declare function ollamaAdapter(options?: OllamaAdapterOptions): OllamaAdapter;
|
|
21
|
+
export {};
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/** ollamaAdapter — local models via the Ollama chat API. */
|
|
2
|
+
import { TokenUsage } from '../types.js';
|
|
3
|
+
import { BaseProviderAdapter } from './base.js';
|
|
4
|
+
import { postJson } from './http.js';
|
|
5
|
+
const DEFAULT_BASE_URL = 'http://localhost:11434';
|
|
6
|
+
class OllamaAdapter extends BaseProviderAdapter {
|
|
7
|
+
baseUrl;
|
|
8
|
+
timeoutSeconds;
|
|
9
|
+
constructor(options = {}) {
|
|
10
|
+
super(options.pricing);
|
|
11
|
+
this.baseUrl = (options.baseUrl ?? process.env['OLLAMA_HOST'] ?? DEFAULT_BASE_URL).replace(/\/+$/, '');
|
|
12
|
+
this.timeoutSeconds = (options.timeoutMs ?? 60_000) / 1000;
|
|
13
|
+
}
|
|
14
|
+
get providerName() {
|
|
15
|
+
return 'ollama';
|
|
16
|
+
}
|
|
17
|
+
async complete(request) {
|
|
18
|
+
const started = performance.now();
|
|
19
|
+
const payload = {
|
|
20
|
+
model: request.model,
|
|
21
|
+
messages: request.messages,
|
|
22
|
+
stream: false,
|
|
23
|
+
options: { temperature: request.temperature },
|
|
24
|
+
};
|
|
25
|
+
const data = await postJson(`${this.baseUrl}/api/chat`, payload, {}, this.timeoutSeconds);
|
|
26
|
+
const message = data['message'] ?? {};
|
|
27
|
+
const content = message['content'] ?? '';
|
|
28
|
+
const usage = new TokenUsage(data['prompt_eval_count'] ?? 0, data['eval_count'] ?? 0);
|
|
29
|
+
return this.buildResponse(request, content, usage, data['model'] ?? request.model, started);
|
|
30
|
+
}
|
|
31
|
+
async healthCheck() {
|
|
32
|
+
return true;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
/** Factory: build an Ollama provider adapter. */
|
|
36
|
+
export function ollamaAdapter(options = {}) {
|
|
37
|
+
return new OllamaAdapter(options);
|
|
38
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenAI drop-in shim (F-DX-04) — point existing OpenAI SDK code at Gavio.
|
|
3
|
+
*
|
|
4
|
+
* import { Gateway } from 'gavio'
|
|
5
|
+
* import { GavioOpenAI } from 'gavio/shim/openai'
|
|
6
|
+
*
|
|
7
|
+
* const client = new GavioOpenAI(new Gateway({ provider: 'openai', model: 'gpt-4o' }))
|
|
8
|
+
* const resp = await client.chat.completions.create({
|
|
9
|
+
* model: 'gpt-4o', messages: [{ role: 'user', content: 'hi' }],
|
|
10
|
+
* })
|
|
11
|
+
* console.log(resp.choices[0].message.content)
|
|
12
|
+
*/
|
|
13
|
+
import type { Gateway } from '../gateway.js';
|
|
14
|
+
import type { Message } from '../types.js';
|
|
15
|
+
export interface ChatCompletion {
|
|
16
|
+
id: string;
|
|
17
|
+
object: 'chat.completion';
|
|
18
|
+
model: string;
|
|
19
|
+
choices: {
|
|
20
|
+
index: number;
|
|
21
|
+
message: {
|
|
22
|
+
role: string;
|
|
23
|
+
content: string;
|
|
24
|
+
};
|
|
25
|
+
finish_reason: string;
|
|
26
|
+
}[];
|
|
27
|
+
usage: {
|
|
28
|
+
prompt_tokens: number;
|
|
29
|
+
completion_tokens: number;
|
|
30
|
+
total_tokens: number;
|
|
31
|
+
};
|
|
32
|
+
gavio: {
|
|
33
|
+
costUsd: number;
|
|
34
|
+
cacheHit: boolean;
|
|
35
|
+
interceptorsFired: string[];
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
export interface CreateParams {
|
|
39
|
+
messages: Message[];
|
|
40
|
+
model?: string;
|
|
41
|
+
temperature?: number;
|
|
42
|
+
max_tokens?: number;
|
|
43
|
+
}
|
|
44
|
+
declare class Completions {
|
|
45
|
+
private readonly gw;
|
|
46
|
+
constructor(gw: Gateway);
|
|
47
|
+
create(params: CreateParams): Promise<ChatCompletion>;
|
|
48
|
+
}
|
|
49
|
+
/** OpenAI-client-shaped facade over a Gavio Gateway. */
|
|
50
|
+
export declare class GavioOpenAI {
|
|
51
|
+
readonly chat: {
|
|
52
|
+
completions: Completions;
|
|
53
|
+
};
|
|
54
|
+
constructor(gateway: Gateway);
|
|
55
|
+
}
|
|
56
|
+
export {};
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenAI drop-in shim (F-DX-04) — point existing OpenAI SDK code at Gavio.
|
|
3
|
+
*
|
|
4
|
+
* import { Gateway } from 'gavio'
|
|
5
|
+
* import { GavioOpenAI } from 'gavio/shim/openai'
|
|
6
|
+
*
|
|
7
|
+
* const client = new GavioOpenAI(new Gateway({ provider: 'openai', model: 'gpt-4o' }))
|
|
8
|
+
* const resp = await client.chat.completions.create({
|
|
9
|
+
* model: 'gpt-4o', messages: [{ role: 'user', content: 'hi' }],
|
|
10
|
+
* })
|
|
11
|
+
* console.log(resp.choices[0].message.content)
|
|
12
|
+
*/
|
|
13
|
+
function toCompletion(resp) {
|
|
14
|
+
return {
|
|
15
|
+
id: resp.traceId,
|
|
16
|
+
object: 'chat.completion',
|
|
17
|
+
model: resp.modelVersion || resp.model,
|
|
18
|
+
choices: [
|
|
19
|
+
{ index: 0, message: { role: 'assistant', content: resp.content }, finish_reason: 'stop' },
|
|
20
|
+
],
|
|
21
|
+
usage: {
|
|
22
|
+
prompt_tokens: resp.usage.promptTokens,
|
|
23
|
+
completion_tokens: resp.usage.completionTokens,
|
|
24
|
+
total_tokens: resp.usage.totalTokens,
|
|
25
|
+
},
|
|
26
|
+
gavio: {
|
|
27
|
+
costUsd: resp.costUsd,
|
|
28
|
+
cacheHit: resp.cacheHit,
|
|
29
|
+
interceptorsFired: resp.interceptorsFired,
|
|
30
|
+
},
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
class Completions {
|
|
34
|
+
gw;
|
|
35
|
+
constructor(gw) {
|
|
36
|
+
this.gw = gw;
|
|
37
|
+
}
|
|
38
|
+
async create(params) {
|
|
39
|
+
const resp = await this.gw.complete({
|
|
40
|
+
messages: params.messages,
|
|
41
|
+
model: params.model,
|
|
42
|
+
options: { temperature: params.temperature ?? 0.7, maxTokens: params.max_tokens ?? 1024 },
|
|
43
|
+
});
|
|
44
|
+
return toCompletion(resp);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
/** OpenAI-client-shaped facade over a Gavio Gateway. */
|
|
48
|
+
export class GavioOpenAI {
|
|
49
|
+
chat;
|
|
50
|
+
constructor(gateway) {
|
|
51
|
+
this.chat = { completions: new Completions(gateway) };
|
|
52
|
+
}
|
|
53
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "gavio",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "The open standard AI gateway for production systems — PII guarding, audit, reliability, and cost tracking.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"repository": {
|
|
@@ -65,6 +65,26 @@
|
|
|
65
65
|
"import": "./dist/esm/interceptors/reliability/index.js",
|
|
66
66
|
"require": "./dist/cjs/interceptors/reliability/index.js"
|
|
67
67
|
},
|
|
68
|
+
"./interceptors/governance": {
|
|
69
|
+
"types": "./dist/esm/interceptors/governance/index.d.ts",
|
|
70
|
+
"import": "./dist/esm/interceptors/governance/index.js",
|
|
71
|
+
"require": "./dist/cjs/interceptors/governance/index.js"
|
|
72
|
+
},
|
|
73
|
+
"./interceptors/guardrails": {
|
|
74
|
+
"types": "./dist/esm/interceptors/guardrails/index.d.ts",
|
|
75
|
+
"import": "./dist/esm/interceptors/guardrails/index.js",
|
|
76
|
+
"require": "./dist/cjs/interceptors/guardrails/index.js"
|
|
77
|
+
},
|
|
78
|
+
"./interceptors/injection": {
|
|
79
|
+
"types": "./dist/esm/interceptors/injection.d.ts",
|
|
80
|
+
"import": "./dist/esm/interceptors/injection.js",
|
|
81
|
+
"require": "./dist/cjs/interceptors/injection.js"
|
|
82
|
+
},
|
|
83
|
+
"./shim/openai": {
|
|
84
|
+
"types": "./dist/esm/shim/openai.d.ts",
|
|
85
|
+
"import": "./dist/esm/shim/openai.js",
|
|
86
|
+
"require": "./dist/cjs/shim/openai.js"
|
|
87
|
+
},
|
|
68
88
|
"./providers": {
|
|
69
89
|
"types": "./dist/esm/providers/index.d.ts",
|
|
70
90
|
"import": "./dist/esm/providers/index.js",
|
|
@@ -99,11 +119,20 @@
|
|
|
99
119
|
},
|
|
100
120
|
"keywords": [
|
|
101
121
|
"ai",
|
|
102
|
-
"gateway",
|
|
122
|
+
"ai-gateway",
|
|
103
123
|
"llm",
|
|
124
|
+
"llm-gateway",
|
|
125
|
+
"llmops",
|
|
126
|
+
"gateway",
|
|
104
127
|
"pii",
|
|
128
|
+
"pii-redaction",
|
|
105
129
|
"audit",
|
|
130
|
+
"observability",
|
|
106
131
|
"reliability",
|
|
132
|
+
"retry",
|
|
133
|
+
"fallback",
|
|
134
|
+
"cost-tracking",
|
|
135
|
+
"interceptor",
|
|
107
136
|
"openai",
|
|
108
137
|
"anthropic"
|
|
109
138
|
],
|
package/src/config.ts
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Config loader (F-DX-05) — build a Gateway from an object or a JSON file.
|
|
3
|
+
*
|
|
4
|
+
* const gw = await Gateway.fromConfig('gateway.json')
|
|
5
|
+
*
|
|
6
|
+
* JSON is supported out of the box; string values expand ${ENV_VAR}.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { readFileSync } from 'node:fs'
|
|
10
|
+
import { ConfigurationError } from './errors.js'
|
|
11
|
+
import { Gateway } from './gateway.js'
|
|
12
|
+
import { auditInterceptor } from './interceptors/audit/index.js'
|
|
13
|
+
import { hashingEmbedder, semanticCache } from './interceptors/cache/index.js'
|
|
14
|
+
import { costControl, modelPolicy, rateLimiter } from './interceptors/governance/index.js'
|
|
15
|
+
import { promptInjectionGuard } from './interceptors/injection.js'
|
|
16
|
+
import { piiGuard } from './interceptors/pii/index.js'
|
|
17
|
+
import { retryInterceptor, timeoutPolicy } from './interceptors/reliability/index.js'
|
|
18
|
+
|
|
19
|
+
type Cfg = Record<string, unknown>
|
|
20
|
+
|
|
21
|
+
export function loadConfig(path: string): Cfg {
|
|
22
|
+
const text = readFileSync(path, 'utf8')
|
|
23
|
+
if (!path.endsWith('.json')) {
|
|
24
|
+
throw new ConfigurationError('JS config loader supports JSON only (use .json)')
|
|
25
|
+
}
|
|
26
|
+
return expand(JSON.parse(text)) as Cfg
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function expand(obj: unknown): unknown {
|
|
30
|
+
if (Array.isArray(obj)) return obj.map(expand)
|
|
31
|
+
if (obj && typeof obj === 'object') {
|
|
32
|
+
return Object.fromEntries(Object.entries(obj).map(([k, v]) => [k, expand(v)]))
|
|
33
|
+
}
|
|
34
|
+
if (typeof obj === 'string') {
|
|
35
|
+
return obj.replace(/\$\{(\w+)\}/g, (_, v: string) => process.env[v] ?? '')
|
|
36
|
+
}
|
|
37
|
+
return obj
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export function buildFromConfig(config: Cfg): Gateway {
|
|
41
|
+
const gatewayOptions: Cfg = {}
|
|
42
|
+
if (config['provider']) gatewayOptions['provider'] = config['provider']
|
|
43
|
+
if (config['model']) gatewayOptions['model'] = config['model']
|
|
44
|
+
if (config['devMode'] ?? config['dev_mode']) gatewayOptions['devMode'] = true
|
|
45
|
+
if (config['dryRun'] ?? config['dry_run']) gatewayOptions['dryRun'] = true
|
|
46
|
+
|
|
47
|
+
let gw = new Gateway(gatewayOptions)
|
|
48
|
+
const ic = (config['interceptors'] as Record<string, Cfg> | undefined) ?? {}
|
|
49
|
+
|
|
50
|
+
const cfg = (name: string): Cfg | null => {
|
|
51
|
+
const entry = ic[name]
|
|
52
|
+
return entry && entry['enabled'] !== false ? entry : null
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
let c: Cfg | null
|
|
56
|
+
if ((c = cfg('audit'))) {
|
|
57
|
+
gw = gw.use(
|
|
58
|
+
auditInterceptor({
|
|
59
|
+
sink: (c['sink'] as 'stdout') ?? 'stdout',
|
|
60
|
+
hashChain: Boolean(c['hashChain'] ?? c['hash_chain']),
|
|
61
|
+
}),
|
|
62
|
+
)
|
|
63
|
+
}
|
|
64
|
+
if ((c = cfg('prompt_injection'))) {
|
|
65
|
+
gw = gw.use(promptInjectionGuard({ action: (c['action'] as 'block' | 'flag') ?? 'block' }))
|
|
66
|
+
}
|
|
67
|
+
if ((c = cfg('pii_guard'))) {
|
|
68
|
+
gw = gw.use(
|
|
69
|
+
piiGuard({
|
|
70
|
+
sensitivity: (c['sensitivity'] as never) ?? 'strict',
|
|
71
|
+
mode: (c['mode'] as never) ?? 'redact',
|
|
72
|
+
}),
|
|
73
|
+
)
|
|
74
|
+
}
|
|
75
|
+
if ((c = cfg('cost_control'))) {
|
|
76
|
+
gw = gw.use(
|
|
77
|
+
costControl({
|
|
78
|
+
hardCapUsd: Number(c['hardCapUsd'] ?? c['hard_cap_usd']),
|
|
79
|
+
softCapUsd: (c['softCapUsd'] ?? c['soft_cap_usd']) as number | undefined,
|
|
80
|
+
scope: (c['scope'] as never) ?? 'global',
|
|
81
|
+
window: (c['window'] as never) ?? 'day',
|
|
82
|
+
}),
|
|
83
|
+
)
|
|
84
|
+
}
|
|
85
|
+
if ((c = cfg('rate_limiter'))) {
|
|
86
|
+
gw = gw.use(
|
|
87
|
+
rateLimiter({
|
|
88
|
+
maxRequestsPerMinute: (c['maxRequestsPerMinute'] ?? c['max_requests_per_minute']) as
|
|
89
|
+
| number
|
|
90
|
+
| undefined,
|
|
91
|
+
maxTokensPerMinute: (c['maxTokensPerMinute'] ?? c['max_tokens_per_minute']) as
|
|
92
|
+
| number
|
|
93
|
+
| undefined,
|
|
94
|
+
scope: (c['scope'] as never) ?? 'global',
|
|
95
|
+
}),
|
|
96
|
+
)
|
|
97
|
+
}
|
|
98
|
+
if ((c = cfg('model_policy'))) {
|
|
99
|
+
gw = gw.use(modelPolicy({ roles: (c['roles'] as Record<string, string[]>) ?? {} }))
|
|
100
|
+
}
|
|
101
|
+
if ((c = cfg('semantic_cache'))) {
|
|
102
|
+
const embedder = (c['enableSemantic'] ?? c['enable_semantic']) ? hashingEmbedder() : undefined
|
|
103
|
+
gw = gw.use(
|
|
104
|
+
semanticCache({
|
|
105
|
+
embedder,
|
|
106
|
+
similarityThreshold: Number(c['similarityThreshold'] ?? c['similarity_threshold'] ?? 0.95),
|
|
107
|
+
}),
|
|
108
|
+
)
|
|
109
|
+
}
|
|
110
|
+
if ((c = cfg('timeout'))) {
|
|
111
|
+
gw = gw.use(
|
|
112
|
+
timeoutPolicy({ timeoutSeconds: Number(c['timeoutSeconds'] ?? c['timeout_seconds'] ?? 30) }),
|
|
113
|
+
)
|
|
114
|
+
}
|
|
115
|
+
if ((c = cfg('retry'))) {
|
|
116
|
+
gw = gw.use(
|
|
117
|
+
retryInterceptor({
|
|
118
|
+
maxAttempts: Number(c['maxAttempts'] ?? c['max_attempts'] ?? 3),
|
|
119
|
+
baseDelayMs: Number(c['baseDelayMs'] ?? c['base_delay_ms'] ?? 500),
|
|
120
|
+
}),
|
|
121
|
+
)
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
return gw
|
|
125
|
+
}
|
package/src/errors.ts
CHANGED
|
@@ -43,5 +43,33 @@ export class PiiBlockedError extends GavioError {
|
|
|
43
43
|
/** A hard budget cap was exceeded. Never swallow this — surface to user. */
|
|
44
44
|
export class BudgetExceededError extends GavioError {}
|
|
45
45
|
|
|
46
|
+
/** The circuit breaker is open; the call was rejected without hitting the provider. */
|
|
47
|
+
export class CircuitOpenError extends ProviderUnavailableError {}
|
|
48
|
+
|
|
49
|
+
/** A local rate limit (requests/tokens per minute) was exceeded. */
|
|
50
|
+
export class RateLimitExceededError extends GavioError {}
|
|
51
|
+
|
|
52
|
+
/** The caller's role is not permitted to use the requested model (RBAC). */
|
|
53
|
+
export class ModelNotAllowedError extends GavioError {
|
|
54
|
+
readonly role: string
|
|
55
|
+
readonly model: string
|
|
56
|
+
|
|
57
|
+
constructor(role: string, model: string) {
|
|
58
|
+
super(`role ${JSON.stringify(role)} may not use model ${JSON.stringify(model)}`)
|
|
59
|
+
this.role = role
|
|
60
|
+
this.model = model
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
46
64
|
/** Output failed a guardrail validator with onFailure='error'. */
|
|
47
65
|
export class GuardrailViolationError extends GavioError {}
|
|
66
|
+
|
|
67
|
+
/** A prompt-injection attempt was detected and the guard is in block mode. */
|
|
68
|
+
export class PromptInjectionError extends GavioError {
|
|
69
|
+
readonly patterns: string[]
|
|
70
|
+
|
|
71
|
+
constructor(patterns: string[]) {
|
|
72
|
+
super(`prompt injection detected: ${patterns.join(', ')}`)
|
|
73
|
+
this.patterns = patterns
|
|
74
|
+
}
|
|
75
|
+
}
|
package/src/gateway.ts
CHANGED
|
@@ -66,6 +66,16 @@ export class Gateway {
|
|
|
66
66
|
this.pricing = options.pricing ?? new PricingProvider()
|
|
67
67
|
}
|
|
68
68
|
|
|
69
|
+
/**
|
|
70
|
+
* Build a Gateway from a config object or a JSON file path (F-DX-05).
|
|
71
|
+
* Async so the config module loads lazily (avoids a circular import).
|
|
72
|
+
*/
|
|
73
|
+
static async fromConfig(config: string | Record<string, unknown>): Promise<Gateway> {
|
|
74
|
+
const mod = await import('./config.js')
|
|
75
|
+
const data = typeof config === 'string' ? mod.loadConfig(config) : config
|
|
76
|
+
return mod.buildFromConfig(data)
|
|
77
|
+
}
|
|
78
|
+
|
|
69
79
|
/** Register an interceptor or executor policy. First-registered = outermost. */
|
|
70
80
|
use(interceptor: Interceptor): this {
|
|
71
81
|
this.interceptors.push(interceptor)
|
|
@@ -5,3 +5,5 @@ export type { AuditRecordInit } from './record.js'
|
|
|
5
5
|
export type { AuditSink } from './sink.js'
|
|
6
6
|
export { stdoutSink } from './sinks/stdout.js'
|
|
7
7
|
export type { StdoutSinkOptions } from './sinks/stdout.js'
|
|
8
|
+
export { verifyChain, buildCallGraph } from './trace.js'
|
|
9
|
+
export type { TraceNode } from './trace.js'
|
|
@@ -14,6 +14,8 @@ export const AUDIT_NAME = 'audit'
|
|
|
14
14
|
|
|
15
15
|
export interface AuditInterceptorOptions {
|
|
16
16
|
sink?: AuditSink | 'stdout'
|
|
17
|
+
/** F-OBS-02: link each record via previousHash into a tamper-evident chain. */
|
|
18
|
+
hashChain?: boolean
|
|
17
19
|
}
|
|
18
20
|
|
|
19
21
|
/**
|
|
@@ -29,9 +31,12 @@ class AuditInterceptor implements Interceptor {
|
|
|
29
31
|
readonly dryRunSafe = true // auditing is observation-only, so it always runs
|
|
30
32
|
|
|
31
33
|
private readonly sink: AuditSink
|
|
34
|
+
private readonly hashChain: boolean
|
|
35
|
+
private lastHash = ''
|
|
32
36
|
|
|
33
37
|
constructor(options: AuditInterceptorOptions = {}) {
|
|
34
38
|
this.sink = resolveSink(options.sink)
|
|
39
|
+
this.hashChain = options.hashChain ?? false
|
|
35
40
|
}
|
|
36
41
|
|
|
37
42
|
async before(request: GavioRequest, ctx: InterceptorContext): Promise<GavioRequest> {
|
|
@@ -65,6 +70,10 @@ class AuditInterceptor implements Interceptor {
|
|
|
65
70
|
guardrailOutcome: ctx.guardrailOutcome,
|
|
66
71
|
riskScore: ctx.riskScore,
|
|
67
72
|
})
|
|
73
|
+
if (this.hashChain) {
|
|
74
|
+
record.previousHash = this.lastHash
|
|
75
|
+
this.lastHash = record.contentHash()
|
|
76
|
+
}
|
|
68
77
|
response.audit = record
|
|
69
78
|
try {
|
|
70
79
|
await this.sink.write(record)
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/** Audit-chain verification (F-OBS-02) and multi-agent DAG trace (F-OBS-03). */
|
|
2
|
+
|
|
3
|
+
import type { AuditRecord } from './record.js'
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Return true if the records form an intact hash chain. Each record's
|
|
7
|
+
* previousHash must equal the content hash of the record before it; the first
|
|
8
|
+
* must be empty. Any edit, reorder, or deletion breaks the chain.
|
|
9
|
+
*/
|
|
10
|
+
export function verifyChain(records: AuditRecord[]): boolean {
|
|
11
|
+
let prevHash = ''
|
|
12
|
+
for (const rec of records) {
|
|
13
|
+
if (rec.previousHash !== prevHash) return false
|
|
14
|
+
prevHash = rec.contentHash()
|
|
15
|
+
}
|
|
16
|
+
return true
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface TraceNode {
|
|
20
|
+
traceId: string
|
|
21
|
+
agentId: string | null
|
|
22
|
+
parentTraceId: string | null
|
|
23
|
+
children: TraceNode[]
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Reconstruct the multi-agent DAG from audit records using parentTraceId +
|
|
28
|
+
* traceId. Returns the root nodes (those with no known parent).
|
|
29
|
+
*/
|
|
30
|
+
export function buildCallGraph(records: AuditRecord[]): TraceNode[] {
|
|
31
|
+
const nodes = new Map<string, TraceNode>()
|
|
32
|
+
for (const rec of records) {
|
|
33
|
+
nodes.set(rec.traceId, {
|
|
34
|
+
traceId: rec.traceId,
|
|
35
|
+
agentId: rec.agentId,
|
|
36
|
+
parentTraceId: rec.parentTraceId,
|
|
37
|
+
children: [],
|
|
38
|
+
})
|
|
39
|
+
}
|
|
40
|
+
const roots: TraceNode[] = []
|
|
41
|
+
for (const node of nodes.values()) {
|
|
42
|
+
const parent = node.parentTraceId ? nodes.get(node.parentTraceId) : undefined
|
|
43
|
+
if (parent) parent.children.push(node)
|
|
44
|
+
else roots.push(node)
|
|
45
|
+
}
|
|
46
|
+
return roots
|
|
47
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embeddings for the semantic cache (F-CACHE-02).
|
|
3
|
+
*
|
|
4
|
+
* Zero-dependency hashed bag-of-words embedder (L2-normalised) — good enough to
|
|
5
|
+
* dedup near-identical prompts. Plug in a real embedder implementing `Embedder`
|
|
6
|
+
* for production semantic matching.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { createHash } from 'node:crypto'
|
|
10
|
+
|
|
11
|
+
export interface Embedder {
|
|
12
|
+
embed(text: string): number[]
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
const TOKEN = /[a-z0-9]+/g
|
|
16
|
+
|
|
17
|
+
/** Deterministic hashed bag-of-words embedder. */
|
|
18
|
+
export function hashingEmbedder(dim = 256): Embedder {
|
|
19
|
+
return {
|
|
20
|
+
embed(text: string): number[] {
|
|
21
|
+
const vec = new Array<number>(dim).fill(0)
|
|
22
|
+
const tokens = text.toLowerCase().match(TOKEN) ?? []
|
|
23
|
+
for (const token of tokens) {
|
|
24
|
+
// Parity note: Python uses blake2b(digest_size=8); here we take the
|
|
25
|
+
// first 8 bytes of blake2b512. Both are deterministic; the JS cache is
|
|
26
|
+
// per-process so cross-language byte-parity is not required.
|
|
27
|
+
const digest = createHash('blake2b512').update(token).digest()
|
|
28
|
+
let n = 0n
|
|
29
|
+
for (let i = 0; i < 8; i++) n = (n << 8n) | BigInt(digest[i]!)
|
|
30
|
+
const bucket = Number(n % BigInt(dim))
|
|
31
|
+
vec[bucket]! += 1
|
|
32
|
+
}
|
|
33
|
+
const norm = Math.sqrt(vec.reduce((s, x) => s + x * x, 0))
|
|
34
|
+
if (norm === 0) return vec
|
|
35
|
+
return vec.map((x) => x / norm)
|
|
36
|
+
},
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/** Cosine similarity; safe for zero vectors. */
|
|
41
|
+
export function cosineSimilarity(a: number[], b: number[]): number {
|
|
42
|
+
if (a.length !== b.length) throw new Error('vectors must have equal length')
|
|
43
|
+
let dot = 0
|
|
44
|
+
let na = 0
|
|
45
|
+
let nb = 0
|
|
46
|
+
for (let i = 0; i < a.length; i++) {
|
|
47
|
+
dot += a[i]! * b[i]!
|
|
48
|
+
na += a[i]! * a[i]!
|
|
49
|
+
nb += b[i]! * b[i]!
|
|
50
|
+
}
|
|
51
|
+
if (na === 0 || nb === 0) return 0
|
|
52
|
+
return dot / (Math.sqrt(na) * Math.sqrt(nb))
|
|
53
|
+
}
|