gavio 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/config.js +106 -0
- package/dist/cjs/errors.js +29 -1
- package/dist/cjs/gateway.js +88 -0
- package/dist/cjs/index.js +4 -2
- package/dist/cjs/interceptors/audit/index.js +4 -1
- package/dist/cjs/interceptors/audit/interceptor.js +11 -0
- package/dist/cjs/interceptors/audit/record.js +17 -3
- package/dist/cjs/interceptors/audit/trace.js +43 -0
- package/dist/cjs/interceptors/cache/embedding.js +53 -0
- package/dist/cjs/interceptors/cache/index.js +9 -5
- package/dist/cjs/interceptors/cache/interceptor.js +80 -0
- package/dist/cjs/interceptors/cache/vector.js +35 -0
- package/dist/cjs/interceptors/governance/budget.js +45 -0
- package/dist/cjs/interceptors/governance/index.js +10 -0
- package/dist/cjs/interceptors/governance/model-policy.js +18 -0
- package/dist/cjs/interceptors/governance/rate-limit.js +46 -0
- package/dist/cjs/interceptors/guardrails/index.js +11 -0
- package/dist/cjs/interceptors/guardrails/interceptor.js +40 -0
- package/dist/cjs/interceptors/guardrails/validator.js +8 -0
- package/dist/cjs/interceptors/guardrails/validators/regex.js +32 -0
- package/dist/cjs/interceptors/guardrails/validators/schema.js +63 -0
- package/dist/cjs/interceptors/injection.js +62 -0
- package/dist/cjs/interceptors/metrics/index.js +9 -0
- package/dist/cjs/interceptors/metrics/interceptor.js +37 -0
- package/dist/cjs/interceptors/metrics/registry.js +0 -0
- package/dist/cjs/interceptors/quality/index.js +7 -0
- package/dist/cjs/interceptors/quality/risk.js +49 -0
- package/dist/cjs/interceptors/reliability/circuit-breaker.js +82 -0
- package/dist/cjs/interceptors/reliability/index.js +8 -1
- package/dist/cjs/interceptors/reliability/load-balancer.js +38 -0
- package/dist/cjs/interceptors/reliability/stream-buffer.js +28 -0
- package/dist/cjs/pricing.js +5 -1
- package/dist/cjs/providers/azure-openai.js +56 -0
- package/dist/cjs/providers/base.js +9 -0
- package/dist/cjs/providers/gemini.js +73 -0
- package/dist/cjs/providers/index.js +22 -6
- package/dist/cjs/providers/ollama.js +41 -0
- package/dist/cjs/request.js +3 -0
- package/dist/cjs/shim/openai.js +57 -0
- package/dist/cjs/types.js +53 -1
- package/dist/esm/config.d.ts +12 -0
- package/dist/esm/config.js +102 -0
- package/dist/esm/errors.d.ts +17 -0
- package/dist/esm/errors.js +24 -0
- package/dist/esm/gateway.d.ts +18 -1
- package/dist/esm/gateway.js +55 -0
- package/dist/esm/index.d.ts +3 -3
- package/dist/esm/index.js +2 -2
- package/dist/esm/interceptors/audit/index.d.ts +2 -0
- package/dist/esm/interceptors/audit/index.js +1 -0
- package/dist/esm/interceptors/audit/interceptor.d.ts +2 -0
- package/dist/esm/interceptors/audit/interceptor.js +11 -0
- package/dist/esm/interceptors/audit/record.d.ts +4 -2
- package/dist/esm/interceptors/audit/record.js +18 -4
- package/dist/esm/interceptors/audit/trace.d.ts +19 -0
- package/dist/esm/interceptors/audit/trace.js +39 -0
- package/dist/esm/interceptors/cache/embedding.d.ts +14 -0
- package/dist/esm/interceptors/cache/embedding.js +49 -0
- package/dist/esm/interceptors/cache/index.d.ts +7 -4
- package/dist/esm/interceptors/cache/index.js +4 -4
- package/dist/esm/interceptors/cache/interceptor.d.ts +19 -0
- package/dist/esm/interceptors/cache/interceptor.js +77 -0
- package/dist/esm/interceptors/cache/vector.d.ts +9 -0
- package/dist/esm/interceptors/cache/vector.js +32 -0
- package/dist/esm/interceptors/governance/budget.d.ts +11 -0
- package/dist/esm/interceptors/governance/budget.js +42 -0
- package/dist/esm/interceptors/governance/index.d.ts +7 -0
- package/dist/esm/interceptors/governance/index.js +4 -0
- package/dist/esm/interceptors/governance/model-policy.d.ts +8 -0
- package/dist/esm/interceptors/governance/model-policy.js +15 -0
- package/dist/esm/interceptors/governance/rate-limit.d.ts +9 -0
- package/dist/esm/interceptors/governance/rate-limit.js +43 -0
- package/dist/esm/interceptors/guardrails/index.d.ts +6 -0
- package/dist/esm/interceptors/guardrails/index.js +4 -0
- package/dist/esm/interceptors/guardrails/interceptor.d.ts +15 -0
- package/dist/esm/interceptors/guardrails/interceptor.js +37 -0
- package/dist/esm/interceptors/guardrails/validator.d.ts +11 -0
- package/dist/esm/interceptors/guardrails/validator.js +3 -0
- package/dist/esm/interceptors/guardrails/validators/regex.d.ts +6 -0
- package/dist/esm/interceptors/guardrails/validators/regex.js +28 -0
- package/dist/esm/interceptors/guardrails/validators/schema.d.ts +5 -0
- package/dist/esm/interceptors/guardrails/validators/schema.js +60 -0
- package/dist/esm/interceptors/injection.d.ts +17 -0
- package/dist/esm/interceptors/injection.js +59 -0
- package/dist/esm/interceptors/metrics/index.d.ts +5 -0
- package/dist/esm/interceptors/metrics/index.js +3 -0
- package/dist/esm/interceptors/metrics/interceptor.d.ts +22 -0
- package/dist/esm/interceptors/metrics/interceptor.js +33 -0
- package/dist/esm/interceptors/metrics/registry.d.ts +31 -0
- package/dist/esm/interceptors/metrics/registry.js +0 -0
- package/dist/esm/interceptors/quality/index.d.ts +3 -0
- package/dist/esm/interceptors/quality/index.js +2 -0
- package/dist/esm/interceptors/quality/risk.d.ts +32 -0
- package/dist/esm/interceptors/quality/risk.js +44 -0
- package/dist/esm/interceptors/reliability/circuit-breaker.d.ts +15 -0
- package/dist/esm/interceptors/reliability/circuit-breaker.js +78 -0
- package/dist/esm/interceptors/reliability/index.d.ts +5 -0
- package/dist/esm/interceptors/reliability/index.js +3 -0
- package/dist/esm/interceptors/reliability/load-balancer.d.ts +8 -0
- package/dist/esm/interceptors/reliability/load-balancer.js +35 -0
- package/dist/esm/interceptors/reliability/stream-buffer.d.ts +18 -0
- package/dist/esm/interceptors/reliability/stream-buffer.js +24 -0
- package/dist/esm/pricing.js +5 -1
- package/dist/esm/providers/azure-openai.d.ts +28 -0
- package/dist/esm/providers/azure-openai.js +53 -0
- package/dist/esm/providers/base.d.ts +7 -0
- package/dist/esm/providers/base.js +9 -1
- package/dist/esm/providers/gemini.d.ts +36 -0
- package/dist/esm/providers/gemini.js +69 -0
- package/dist/esm/providers/index.d.ts +7 -1
- package/dist/esm/providers/index.js +18 -5
- package/dist/esm/providers/ollama.d.ts +21 -0
- package/dist/esm/providers/ollama.js +38 -0
- package/dist/esm/request.d.ts +4 -1
- package/dist/esm/request.js +4 -1
- package/dist/esm/shim/openai.d.ts +56 -0
- package/dist/esm/shim/openai.js +53 -0
- package/dist/esm/types.d.ts +54 -0
- package/dist/esm/types.js +50 -0
- package/package.json +41 -2
- package/src/config.ts +125 -0
- package/src/errors.ts +28 -0
- package/src/gateway.ts +62 -1
- package/src/index.ts +4 -2
- package/src/interceptors/audit/index.ts +2 -0
- package/src/interceptors/audit/interceptor.ts +13 -0
- package/src/interceptors/audit/record.ts +18 -4
- package/src/interceptors/audit/trace.ts +47 -0
- package/src/interceptors/cache/embedding.ts +53 -0
- package/src/interceptors/cache/index.ts +7 -4
- package/src/interceptors/cache/interceptor.ts +111 -0
- package/src/interceptors/cache/vector.ts +45 -0
- package/src/interceptors/governance/budget.ts +59 -0
- package/src/interceptors/governance/index.ts +8 -0
- package/src/interceptors/governance/model-policy.ts +25 -0
- package/src/interceptors/governance/rate-limit.ts +63 -0
- package/src/interceptors/guardrails/index.ts +7 -0
- package/src/interceptors/guardrails/interceptor.ts +56 -0
- package/src/interceptors/guardrails/validator.ts +14 -0
- package/src/interceptors/guardrails/validators/regex.ts +29 -0
- package/src/interceptors/guardrails/validators/schema.ts +62 -0
- package/src/interceptors/injection.ts +72 -0
- package/src/interceptors/metrics/index.ts +6 -0
- package/src/interceptors/metrics/interceptor.ts +46 -0
- package/src/interceptors/metrics/registry.ts +0 -0
- package/src/interceptors/quality/index.ts +4 -0
- package/src/interceptors/quality/risk.ts +64 -0
- package/src/interceptors/reliability/circuit-breaker.ts +102 -0
- package/src/interceptors/reliability/index.ts +5 -0
- package/src/interceptors/reliability/load-balancer.ts +56 -0
- package/src/interceptors/reliability/stream-buffer.ts +27 -0
- package/src/pricing.ts +5 -1
- package/src/providers/azure-openai.ts +77 -0
- package/src/providers/base.ts +21 -1
- package/src/providers/gemini.ts +95 -0
- package/src/providers/index.ts +21 -5
- package/src/providers/ollama.ts +61 -0
- package/src/request.ts +6 -2
- package/src/shim/openai.ts +76 -0
- package/src/types.ts +77 -0
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RiskScorer (F-QUA-06) — a composite risk score from per-request signals.
|
|
3
|
+
*
|
|
4
|
+
* Folds the signals other interceptors leave on the {@link InterceptorContext}
|
|
5
|
+
* — PII entities found, guardrail outcome, and the prompt-injection risk — into
|
|
6
|
+
* a single score in `[0, 1]` written to `ctx.riskScore` (and thus the audit
|
|
7
|
+
* record). Register it *inside* the audit interceptor so audit sees the composite.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import type { InterceptorContext } from '../../context.js'
|
|
11
|
+
import type { GavioResponse } from '../../response.js'
|
|
12
|
+
import type { Interceptor } from '../base.js'
|
|
13
|
+
|
|
14
|
+
export interface RiskWeights {
|
|
15
|
+
pii?: number
|
|
16
|
+
guardrail?: number
|
|
17
|
+
injection?: number
|
|
18
|
+
/** PII entity count at which the PII signal saturates to 1.0 (<= 0 → any PII = 1.0). */
|
|
19
|
+
piiSaturation?: number
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
// Guardrail outcome → its contribution before weighting.
|
|
23
|
+
const GUARDRAIL_SIGNAL: Record<string, number> = { FAIL: 1.0, HITL: 0.6 }
|
|
24
|
+
|
|
25
|
+
export class RiskScorer implements Interceptor {
|
|
26
|
+
readonly name = 'risk_scorer'
|
|
27
|
+
readonly dryRunSafe = true
|
|
28
|
+
|
|
29
|
+
private readonly pii: number
|
|
30
|
+
private readonly guardrail: number
|
|
31
|
+
private readonly injection: number
|
|
32
|
+
private readonly piiSaturation: number
|
|
33
|
+
|
|
34
|
+
constructor(weights: RiskWeights = {}) {
|
|
35
|
+
this.pii = weights.pii ?? 0.3
|
|
36
|
+
this.guardrail = weights.guardrail ?? 0.4
|
|
37
|
+
this.injection = weights.injection ?? 0.3
|
|
38
|
+
this.piiSaturation = weights.piiSaturation ?? 4
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/** Compute the composite risk score from the three raw signals. */
|
|
42
|
+
score(piiCount: number, guardrailOutcome: string | null, injectionScore: number | null): number {
|
|
43
|
+
let piiSignal = 0
|
|
44
|
+
if (piiCount > 0) {
|
|
45
|
+
piiSignal = this.piiSaturation <= 0 ? 1 : Math.min(1, piiCount / this.piiSaturation)
|
|
46
|
+
}
|
|
47
|
+
const guardrailSignal = GUARDRAIL_SIGNAL[guardrailOutcome ?? ''] ?? 0
|
|
48
|
+
const injectionSignal = injectionScore ?? 0
|
|
49
|
+
const composite =
|
|
50
|
+
this.pii * piiSignal + this.guardrail * guardrailSignal + this.injection * injectionSignal
|
|
51
|
+
return Math.max(0, Math.min(1, composite))
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
async after(response: GavioResponse, ctx: InterceptorContext): Promise<GavioResponse> {
|
|
55
|
+
const piiCount = Object.values(ctx.piiEntityCounts).reduce((a, b) => a + b, 0)
|
|
56
|
+
ctx.riskScore = this.score(piiCount, ctx.guardrailOutcome, ctx.riskScore)
|
|
57
|
+
return response
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/** Build a risk scorer. */
|
|
62
|
+
export function riskScorer(weights: RiskWeights = {}): RiskScorer {
|
|
63
|
+
return new RiskScorer(weights)
|
|
64
|
+
}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
/** circuitBreaker (F-REL-03) — open/half-open/closed state machine. */
|
|
2
|
+
|
|
3
|
+
import type { InterceptorContext } from '../../context.js'
|
|
4
|
+
import { CircuitOpenError, ProviderError } from '../../errors.js'
|
|
5
|
+
import type { GavioRequest } from '../../request.js'
|
|
6
|
+
import type { GavioResponse } from '../../response.js'
|
|
7
|
+
import type { Executor, ExecutorPolicy } from '../base.js'
|
|
8
|
+
|
|
9
|
+
export const CircuitState = {
|
|
10
|
+
CLOSED: 'closed',
|
|
11
|
+
OPEN: 'open',
|
|
12
|
+
HALF_OPEN: 'half_open',
|
|
13
|
+
} as const
|
|
14
|
+
export type CircuitState = (typeof CircuitState)[keyof typeof CircuitState]
|
|
15
|
+
|
|
16
|
+
export interface CircuitBreakerOptions {
|
|
17
|
+
failureThreshold?: number
|
|
18
|
+
recoveryTimeoutSeconds?: number
|
|
19
|
+
halfOpenMaxCalls?: number
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
class CircuitBreaker implements ExecutorPolicy {
|
|
23
|
+
readonly name = 'circuit_breaker'
|
|
24
|
+
readonly isExecutorPolicy = true as const
|
|
25
|
+
|
|
26
|
+
private state: CircuitState = CircuitState.CLOSED
|
|
27
|
+
private failures = 0
|
|
28
|
+
private openedAt = 0
|
|
29
|
+
private halfOpenCalls = 0
|
|
30
|
+
|
|
31
|
+
private readonly failureThreshold: number
|
|
32
|
+
private readonly recoveryMs: number
|
|
33
|
+
private readonly halfOpenMaxCalls: number
|
|
34
|
+
|
|
35
|
+
constructor(options: CircuitBreakerOptions = {}) {
|
|
36
|
+
this.failureThreshold = options.failureThreshold ?? 5
|
|
37
|
+
this.recoveryMs = (options.recoveryTimeoutSeconds ?? 30) * 1000
|
|
38
|
+
this.halfOpenMaxCalls = options.halfOpenMaxCalls ?? 2
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
get currentState(): CircuitState {
|
|
42
|
+
return this.state
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
async around(
|
|
46
|
+
request: GavioRequest,
|
|
47
|
+
ctx: InterceptorContext,
|
|
48
|
+
callNext: Executor,
|
|
49
|
+
): Promise<GavioResponse> {
|
|
50
|
+
ctx.markFired(this.name)
|
|
51
|
+
this.admit() // throws CircuitOpenError if not allowed through
|
|
52
|
+
try {
|
|
53
|
+
const response = await callNext(request)
|
|
54
|
+
this.onSuccess()
|
|
55
|
+
return response
|
|
56
|
+
} catch (error) {
|
|
57
|
+
if (error instanceof ProviderError) this.onFailure()
|
|
58
|
+
throw error
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
private admit(): void {
|
|
63
|
+
if (this.state === CircuitState.OPEN) {
|
|
64
|
+
if (Date.now() - this.openedAt >= this.recoveryMs) {
|
|
65
|
+
this.state = CircuitState.HALF_OPEN
|
|
66
|
+
this.halfOpenCalls = 0
|
|
67
|
+
} else {
|
|
68
|
+
throw new CircuitOpenError('circuit is open')
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
if (this.state === CircuitState.HALF_OPEN) {
|
|
72
|
+
if (this.halfOpenCalls >= this.halfOpenMaxCalls) {
|
|
73
|
+
throw new CircuitOpenError('circuit half-open probe limit reached')
|
|
74
|
+
}
|
|
75
|
+
this.halfOpenCalls += 1
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
private onSuccess(): void {
|
|
80
|
+
this.state = CircuitState.CLOSED
|
|
81
|
+
this.failures = 0
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
private onFailure(): void {
|
|
85
|
+
if (this.state === CircuitState.HALF_OPEN) {
|
|
86
|
+
this.trip()
|
|
87
|
+
return
|
|
88
|
+
}
|
|
89
|
+
this.failures += 1
|
|
90
|
+
if (this.failures >= this.failureThreshold) this.trip()
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
private trip(): void {
|
|
94
|
+
this.state = CircuitState.OPEN
|
|
95
|
+
this.openedAt = Date.now()
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/** Factory: build a circuit breaker. */
|
|
100
|
+
export function circuitBreaker(options: CircuitBreakerOptions = {}): ExecutorPolicy {
|
|
101
|
+
return new CircuitBreaker(options)
|
|
102
|
+
}
|
|
@@ -6,3 +6,8 @@ export { timeoutPolicy, timeout } from './timeout.js'
|
|
|
6
6
|
export type { TimeoutPolicyOptions } from './timeout.js'
|
|
7
7
|
export { fallbackChain } from './fallback.js'
|
|
8
8
|
export type { FallbackChainOptions } from './fallback.js'
|
|
9
|
+
export { circuitBreaker, CircuitState } from './circuit-breaker.js'
|
|
10
|
+
export type { CircuitBreakerOptions } from './circuit-breaker.js'
|
|
11
|
+
export { loadBalancer } from './load-balancer.js'
|
|
12
|
+
export type { LoadBalancerOptions } from './load-balancer.js'
|
|
13
|
+
export { StreamBuffer } from './stream-buffer.js'
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/** loadBalancer (F-REL-04) — weighted round-robin across provider adapters. */
|
|
2
|
+
|
|
3
|
+
import type { InterceptorContext } from '../../context.js'
|
|
4
|
+
import type { ProviderAdapter } from '../../providers/base.js'
|
|
5
|
+
import type { GavioRequest } from '../../request.js'
|
|
6
|
+
import type { GavioResponse } from '../../response.js'
|
|
7
|
+
import { coerceProvider } from '../../types.js'
|
|
8
|
+
import type { Executor, ExecutorPolicy } from '../base.js'
|
|
9
|
+
|
|
10
|
+
export interface LoadBalancerOptions {
|
|
11
|
+
weights?: number[]
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
class LoadBalancer implements ExecutorPolicy {
|
|
15
|
+
readonly name = 'load_balancer'
|
|
16
|
+
readonly isExecutorPolicy = true as const
|
|
17
|
+
|
|
18
|
+
private readonly pool: ProviderAdapter[]
|
|
19
|
+
private index = 0
|
|
20
|
+
|
|
21
|
+
constructor(adapters: ProviderAdapter[], options: LoadBalancerOptions = {}) {
|
|
22
|
+
if (adapters.length === 0) {
|
|
23
|
+
throw new Error('loadBalancer requires at least one adapter')
|
|
24
|
+
}
|
|
25
|
+
const weights = options.weights ?? adapters.map(() => 1)
|
|
26
|
+
if (weights.length !== adapters.length) {
|
|
27
|
+
throw new Error('weights must match adapters length')
|
|
28
|
+
}
|
|
29
|
+
// Expand by weight, then cycle for round-robin.
|
|
30
|
+
this.pool = []
|
|
31
|
+
adapters.forEach((adapter, i) => {
|
|
32
|
+
for (let k = 0; k < Math.max(1, weights[i]!); k++) this.pool.push(adapter)
|
|
33
|
+
})
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
async around(
|
|
37
|
+
request: GavioRequest,
|
|
38
|
+
ctx: InterceptorContext,
|
|
39
|
+
_callNext: Executor,
|
|
40
|
+
): Promise<GavioResponse> {
|
|
41
|
+
ctx.markFired(this.name)
|
|
42
|
+
const adapter = this.pool[this.index % this.pool.length]!
|
|
43
|
+
this.index += 1
|
|
44
|
+
const rerouted = request.copyWithMessages(request.messages)
|
|
45
|
+
rerouted.provider = coerceProvider(adapter.providerName)
|
|
46
|
+
return adapter.complete(rerouted)
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/** Factory: build a load balancer over a pool of adapters. */
|
|
51
|
+
export function loadBalancer(
|
|
52
|
+
adapters: ProviderAdapter[],
|
|
53
|
+
options: LoadBalancerOptions = {},
|
|
54
|
+
): ExecutorPolicy {
|
|
55
|
+
return new LoadBalancer(adapters, options)
|
|
56
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* StreamBuffer (F-REL-06) — accumulate a provider stream for post-interceptors.
|
|
3
|
+
*
|
|
4
|
+
* Post-interceptors (guardrails, PII restore, audit) need the *complete*
|
|
5
|
+
* response, so a streamed reply is buffered in full before the post pipeline
|
|
6
|
+
* runs and before any chunk reaches the caller. This trades first-token latency
|
|
7
|
+
* for the guarantee that every interceptor sees — and can rewrite or block — the
|
|
8
|
+
* whole response.
|
|
9
|
+
*/
|
|
10
|
+
export class StreamBuffer {
|
|
11
|
+
private readonly parts: string[] = []
|
|
12
|
+
|
|
13
|
+
/** Add one streamed chunk. */
|
|
14
|
+
append(chunk: string): void {
|
|
15
|
+
this.parts.push(chunk)
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
/** The full buffered response so far. */
|
|
19
|
+
text(): string {
|
|
20
|
+
return this.parts.join('')
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/** Total buffered length in characters. */
|
|
24
|
+
get length(): number {
|
|
25
|
+
return this.parts.reduce((n, p) => n + p.length, 0)
|
|
26
|
+
}
|
|
27
|
+
}
|
package/src/pricing.ts
CHANGED
|
@@ -20,7 +20,11 @@ const DEFAULT_PRICES: Record<string, [number, number]> = {
|
|
|
20
20
|
'claude-sonnet-4-20250514': [0.003, 0.015],
|
|
21
21
|
'claude-haiku-4-5': [0.0008, 0.004],
|
|
22
22
|
'claude-opus-4-1': [0.015, 0.075],
|
|
23
|
-
//
|
|
23
|
+
// Gemini (approximate public pricing; override via config)
|
|
24
|
+
'gemini-2.0-flash': [0.0001, 0.0004],
|
|
25
|
+
'gemini-1.5-flash': [0.000075, 0.0003],
|
|
26
|
+
'gemini-1.5-pro': [0.00125, 0.005],
|
|
27
|
+
// Local (Ollama) / mock are free.
|
|
24
28
|
mock: [0.0, 0.0],
|
|
25
29
|
}
|
|
26
30
|
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
/** azureOpenaiAdapter — Azure OpenAI deployment-based chat completions. */
|
|
2
|
+
|
|
3
|
+
import { ConfigurationError } from '../errors.js'
|
|
4
|
+
import type { PricingProvider } from '../pricing.js'
|
|
5
|
+
import type { GavioRequest } from '../request.js'
|
|
6
|
+
import type { GavioResponse } from '../response.js'
|
|
7
|
+
import { TokenUsage } from '../types.js'
|
|
8
|
+
import { BaseProviderAdapter } from './base.js'
|
|
9
|
+
import { postJson } from './http.js'
|
|
10
|
+
|
|
11
|
+
const DEFAULT_API_VERSION = '2024-06-01'
|
|
12
|
+
|
|
13
|
+
export interface AzureOpenAIAdapterOptions {
|
|
14
|
+
apiKey?: string
|
|
15
|
+
endpoint?: string
|
|
16
|
+
deployment?: string
|
|
17
|
+
apiVersion?: string
|
|
18
|
+
timeoutMs?: number
|
|
19
|
+
pricing?: PricingProvider
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
class AzureOpenAIAdapter extends BaseProviderAdapter {
|
|
23
|
+
private readonly apiKey: string | undefined
|
|
24
|
+
readonly endpoint: string
|
|
25
|
+
private readonly deployment: string | undefined
|
|
26
|
+
private readonly apiVersion: string
|
|
27
|
+
private readonly timeoutSeconds: number
|
|
28
|
+
|
|
29
|
+
constructor(options: AzureOpenAIAdapterOptions = {}) {
|
|
30
|
+
super(options.pricing)
|
|
31
|
+
this.apiKey = options.apiKey ?? process.env['AZURE_OPENAI_API_KEY']
|
|
32
|
+
this.endpoint = (options.endpoint ?? process.env['AZURE_OPENAI_ENDPOINT'] ?? '').replace(
|
|
33
|
+
/\/+$/,
|
|
34
|
+
'',
|
|
35
|
+
)
|
|
36
|
+
this.deployment = options.deployment ?? process.env['AZURE_OPENAI_DEPLOYMENT']
|
|
37
|
+
this.apiVersion = options.apiVersion ?? DEFAULT_API_VERSION
|
|
38
|
+
this.timeoutSeconds = (options.timeoutMs ?? 30_000) / 1000
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
get providerName(): string {
|
|
42
|
+
return 'azure_openai'
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
url(request: GavioRequest): string {
|
|
46
|
+
const deployment = this.deployment ?? request.model
|
|
47
|
+
return `${this.endpoint}/openai/deployments/${deployment}/chat/completions?api-version=${this.apiVersion}`
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
async complete(request: GavioRequest): Promise<GavioResponse> {
|
|
51
|
+
if (!this.apiKey || !this.endpoint) {
|
|
52
|
+
throw new ConfigurationError('AZURE_OPENAI_API_KEY and AZURE_OPENAI_ENDPOINT must be set')
|
|
53
|
+
}
|
|
54
|
+
const started = performance.now()
|
|
55
|
+
const payload = {
|
|
56
|
+
messages: request.messages,
|
|
57
|
+
temperature: request.temperature,
|
|
58
|
+
max_tokens: request.maxTokens,
|
|
59
|
+
}
|
|
60
|
+
const data = await postJson(this.url(request), payload, { 'api-key': this.apiKey }, this.timeoutSeconds)
|
|
61
|
+
const choices = (data['choices'] as Array<Record<string, unknown>>) ?? []
|
|
62
|
+
const message = (choices[0]?.['message'] as Record<string, unknown>) ?? {}
|
|
63
|
+
const content = (message['content'] as string) ?? ''
|
|
64
|
+
const usageData = (data['usage'] as Record<string, number>) ?? {}
|
|
65
|
+
const usage = new TokenUsage(usageData['prompt_tokens'] ?? 0, usageData['completion_tokens'] ?? 0)
|
|
66
|
+
return this.buildResponse(request, content, usage, (data['model'] as string) ?? request.model, started)
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
async healthCheck(): Promise<boolean> {
|
|
70
|
+
return !!(this.apiKey && this.endpoint)
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/** Factory: build an Azure OpenAI provider adapter. */
|
|
75
|
+
export function azureOpenaiAdapter(options: AzureOpenAIAdapterOptions = {}): AzureOpenAIAdapter {
|
|
76
|
+
return new AzureOpenAIAdapter(options)
|
|
77
|
+
}
|
package/src/providers/base.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/** ProviderAdapter interface and shared response-building helpers. */
|
|
2
2
|
|
|
3
|
-
import { PricingProvider } from '../pricing.js'
|
|
3
|
+
import { PricingProvider, estimateTokens } from '../pricing.js'
|
|
4
4
|
import { GavioRequest } from '../request.js'
|
|
5
5
|
import { GavioResponse } from '../response.js'
|
|
6
6
|
import { TokenUsage } from '../types.js'
|
|
@@ -10,6 +10,8 @@ export interface ProviderAdapter {
|
|
|
10
10
|
readonly providerName: string
|
|
11
11
|
complete(request: GavioRequest): Promise<GavioResponse>
|
|
12
12
|
stream?(request: GavioRequest): AsyncIterable<string>
|
|
13
|
+
/** Build a response from a fully buffered stream (F-REL-06). */
|
|
14
|
+
buildStreamResponse?(request: GavioRequest, content: string, startedAt: number): GavioResponse
|
|
13
15
|
healthCheck(): Promise<boolean>
|
|
14
16
|
readonly reportedModelVersion?: string | null
|
|
15
17
|
}
|
|
@@ -30,6 +32,24 @@ export abstract class BaseProviderAdapter implements ProviderAdapter {
|
|
|
30
32
|
return null
|
|
31
33
|
}
|
|
32
34
|
|
|
35
|
+
/**
|
|
36
|
+
* Build a response from a fully buffered stream (F-REL-06). Streamed chunks
|
|
37
|
+
* carry text only, so token usage is estimated from prompt + content.
|
|
38
|
+
*/
|
|
39
|
+
buildStreamResponse(request: GavioRequest, content: string, startedAt: number): GavioResponse {
|
|
40
|
+
const usage = new TokenUsage(
|
|
41
|
+
estimateTokens(request.promptText()),
|
|
42
|
+
estimateTokens(content),
|
|
43
|
+
)
|
|
44
|
+
return this.buildResponse(
|
|
45
|
+
request,
|
|
46
|
+
content,
|
|
47
|
+
usage,
|
|
48
|
+
this.reportedModelVersion ?? request.model,
|
|
49
|
+
startedAt,
|
|
50
|
+
)
|
|
51
|
+
}
|
|
52
|
+
|
|
33
53
|
protected buildResponse(
|
|
34
54
|
request: GavioRequest,
|
|
35
55
|
content: string,
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
/** geminiAdapter — Google Generative Language API (generateContent). */
|
|
2
|
+
|
|
3
|
+
import { ConfigurationError } from '../errors.js'
|
|
4
|
+
import type { PricingProvider } from '../pricing.js'
|
|
5
|
+
import type { GavioRequest } from '../request.js'
|
|
6
|
+
import type { GavioResponse } from '../response.js'
|
|
7
|
+
import { TokenUsage, type Message } from '../types.js'
|
|
8
|
+
import { BaseProviderAdapter } from './base.js'
|
|
9
|
+
import { postJson } from './http.js'
|
|
10
|
+
|
|
11
|
+
const DEFAULT_BASE_URL = 'https://generativelanguage.googleapis.com/v1beta'
|
|
12
|
+
|
|
13
|
+
export interface GeminiAdapterOptions {
|
|
14
|
+
apiKey?: string
|
|
15
|
+
baseUrl?: string
|
|
16
|
+
timeoutMs?: number
|
|
17
|
+
pricing?: PricingProvider
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
interface GeminiContent {
|
|
21
|
+
role: string
|
|
22
|
+
parts: { text: string }[]
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/** Map Gavio messages to Gemini contents + a system instruction. */
|
|
26
|
+
export function geminiToContents(messages: Message[]): {
|
|
27
|
+
system: string | null
|
|
28
|
+
contents: GeminiContent[]
|
|
29
|
+
} {
|
|
30
|
+
let system: string | null = null
|
|
31
|
+
const contents: GeminiContent[] = []
|
|
32
|
+
for (const m of messages) {
|
|
33
|
+
const text = m.content
|
|
34
|
+
if (m.role === 'system') {
|
|
35
|
+
system = system ? `${system}\n${text}` : text
|
|
36
|
+
continue
|
|
37
|
+
}
|
|
38
|
+
contents.push({ role: m.role === 'assistant' ? 'model' : 'user', parts: [{ text }] })
|
|
39
|
+
}
|
|
40
|
+
return { system, contents }
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
class GeminiAdapter extends BaseProviderAdapter {
|
|
44
|
+
private readonly apiKey: string | undefined
|
|
45
|
+
private readonly baseUrl: string
|
|
46
|
+
private readonly timeoutSeconds: number
|
|
47
|
+
|
|
48
|
+
constructor(options: GeminiAdapterOptions = {}) {
|
|
49
|
+
super(options.pricing)
|
|
50
|
+
this.apiKey =
|
|
51
|
+
options.apiKey ?? process.env['GEMINI_API_KEY'] ?? process.env['GOOGLE_API_KEY']
|
|
52
|
+
this.baseUrl = (options.baseUrl ?? DEFAULT_BASE_URL).replace(/\/+$/, '')
|
|
53
|
+
this.timeoutSeconds = (options.timeoutMs ?? 30_000) / 1000
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
get providerName(): string {
|
|
57
|
+
return 'gemini'
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
private payload(request: GavioRequest): Record<string, unknown> {
|
|
61
|
+
const { system, contents } = geminiToContents(request.messages)
|
|
62
|
+
const payload: Record<string, unknown> = {
|
|
63
|
+
contents,
|
|
64
|
+
generationConfig: {
|
|
65
|
+
temperature: request.temperature,
|
|
66
|
+
maxOutputTokens: request.maxTokens,
|
|
67
|
+
},
|
|
68
|
+
}
|
|
69
|
+
if (system) payload['systemInstruction'] = { parts: [{ text: system }] }
|
|
70
|
+
return payload
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
async complete(request: GavioRequest): Promise<GavioResponse> {
|
|
74
|
+
if (!this.apiKey) throw new ConfigurationError('GEMINI_API_KEY not set')
|
|
75
|
+
const started = performance.now()
|
|
76
|
+
const url = `${this.baseUrl}/models/${request.model}:generateContent?key=${this.apiKey}`
|
|
77
|
+
const data = await postJson(url, this.payload(request), {}, this.timeoutSeconds)
|
|
78
|
+
const candidates = (data['candidates'] as Array<Record<string, unknown>>) ?? [{}]
|
|
79
|
+
const contentObj = (candidates[0]?.['content'] as Record<string, unknown>) ?? {}
|
|
80
|
+
const parts = (contentObj['parts'] as Array<{ text?: string }>) ?? []
|
|
81
|
+
const content = parts.map((p) => p.text ?? '').join('')
|
|
82
|
+
const um = (data['usageMetadata'] as Record<string, number>) ?? {}
|
|
83
|
+
const usage = new TokenUsage(um['promptTokenCount'] ?? 0, um['candidatesTokenCount'] ?? 0)
|
|
84
|
+
return this.buildResponse(request, content, usage, request.model, started)
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
async healthCheck(): Promise<boolean> {
|
|
88
|
+
return !!this.apiKey
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/** Factory: build a Gemini provider adapter. */
|
|
93
|
+
export function geminiAdapter(options: GeminiAdapterOptions = {}): GeminiAdapter {
|
|
94
|
+
return new GeminiAdapter(options)
|
|
95
|
+
}
|
package/src/providers/index.ts
CHANGED
|
@@ -4,8 +4,11 @@ import { ConfigurationError } from '../errors.js'
|
|
|
4
4
|
import type { PricingProvider } from '../pricing.js'
|
|
5
5
|
import { Provider, coerceProvider } from '../types.js'
|
|
6
6
|
import { anthropicAdapter } from './anthropic.js'
|
|
7
|
+
import { azureOpenaiAdapter } from './azure-openai.js'
|
|
7
8
|
import type { ProviderAdapter } from './base.js'
|
|
9
|
+
import { geminiAdapter } from './gemini.js'
|
|
8
10
|
import { mockProvider } from './mock.js'
|
|
11
|
+
import { ollamaAdapter } from './ollama.js'
|
|
9
12
|
import { openaiAdapter } from './openai.js'
|
|
10
13
|
|
|
11
14
|
export type { ProviderAdapter } from './base.js'
|
|
@@ -16,24 +19,37 @@ export { openaiAdapter } from './openai.js'
|
|
|
16
19
|
export type { OpenAIAdapterOptions } from './openai.js'
|
|
17
20
|
export { anthropicAdapter } from './anthropic.js'
|
|
18
21
|
export type { AnthropicAdapterOptions } from './anthropic.js'
|
|
22
|
+
export { geminiAdapter } from './gemini.js'
|
|
23
|
+
export type { GeminiAdapterOptions } from './gemini.js'
|
|
24
|
+
export { azureOpenaiAdapter } from './azure-openai.js'
|
|
25
|
+
export type { AzureOpenAIAdapterOptions } from './azure-openai.js'
|
|
26
|
+
export { ollamaAdapter } from './ollama.js'
|
|
27
|
+
export type { OllamaAdapterOptions } from './ollama.js'
|
|
19
28
|
export { Provider } from '../types.js'
|
|
20
29
|
|
|
21
|
-
/** Instantiate the default adapter for a provider id.
|
|
30
|
+
/** Instantiate the default adapter for a provider id. */
|
|
22
31
|
export function buildAdapter(
|
|
23
32
|
provider: Provider | string,
|
|
24
33
|
pricing?: PricingProvider,
|
|
25
34
|
): ProviderAdapter {
|
|
26
35
|
const p = coerceProvider(provider)
|
|
36
|
+
const opts = pricing ? { pricing } : {}
|
|
27
37
|
switch (p) {
|
|
28
38
|
case Provider.OPENAI:
|
|
29
|
-
return openaiAdapter(
|
|
39
|
+
return openaiAdapter(opts)
|
|
30
40
|
case Provider.ANTHROPIC:
|
|
31
|
-
return anthropicAdapter(
|
|
41
|
+
return anthropicAdapter(opts)
|
|
42
|
+
case Provider.GEMINI:
|
|
43
|
+
return geminiAdapter(opts)
|
|
44
|
+
case Provider.AZURE_OPENAI:
|
|
45
|
+
return azureOpenaiAdapter(opts)
|
|
46
|
+
case Provider.OLLAMA:
|
|
47
|
+
return ollamaAdapter(opts)
|
|
32
48
|
case Provider.MOCK:
|
|
33
|
-
return mockProvider(
|
|
49
|
+
return mockProvider(opts)
|
|
34
50
|
default:
|
|
35
51
|
throw new ConfigurationError(
|
|
36
|
-
`Provider '${p}' is not available
|
|
52
|
+
`Provider '${p}' is not available (v0.3.0 adds bedrock, cohere)`,
|
|
37
53
|
)
|
|
38
54
|
}
|
|
39
55
|
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/** ollamaAdapter — local models via the Ollama chat API. */
|
|
2
|
+
|
|
3
|
+
import type { PricingProvider } from '../pricing.js'
|
|
4
|
+
import type { GavioRequest } from '../request.js'
|
|
5
|
+
import type { GavioResponse } from '../response.js'
|
|
6
|
+
import { TokenUsage } from '../types.js'
|
|
7
|
+
import { BaseProviderAdapter } from './base.js'
|
|
8
|
+
import { postJson } from './http.js'
|
|
9
|
+
|
|
10
|
+
const DEFAULT_BASE_URL = 'http://localhost:11434'
|
|
11
|
+
|
|
12
|
+
export interface OllamaAdapterOptions {
|
|
13
|
+
baseUrl?: string
|
|
14
|
+
timeoutMs?: number
|
|
15
|
+
pricing?: PricingProvider
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
class OllamaAdapter extends BaseProviderAdapter {
|
|
19
|
+
private readonly baseUrl: string
|
|
20
|
+
private readonly timeoutSeconds: number
|
|
21
|
+
|
|
22
|
+
constructor(options: OllamaAdapterOptions = {}) {
|
|
23
|
+
super(options.pricing)
|
|
24
|
+
this.baseUrl = (options.baseUrl ?? process.env['OLLAMA_HOST'] ?? DEFAULT_BASE_URL).replace(
|
|
25
|
+
/\/+$/,
|
|
26
|
+
'',
|
|
27
|
+
)
|
|
28
|
+
this.timeoutSeconds = (options.timeoutMs ?? 60_000) / 1000
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
get providerName(): string {
|
|
32
|
+
return 'ollama'
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
async complete(request: GavioRequest): Promise<GavioResponse> {
|
|
36
|
+
const started = performance.now()
|
|
37
|
+
const payload = {
|
|
38
|
+
model: request.model,
|
|
39
|
+
messages: request.messages,
|
|
40
|
+
stream: false,
|
|
41
|
+
options: { temperature: request.temperature },
|
|
42
|
+
}
|
|
43
|
+
const data = await postJson(`${this.baseUrl}/api/chat`, payload, {}, this.timeoutSeconds)
|
|
44
|
+
const message = (data['message'] as Record<string, unknown>) ?? {}
|
|
45
|
+
const content = (message['content'] as string) ?? ''
|
|
46
|
+
const usage = new TokenUsage(
|
|
47
|
+
(data['prompt_eval_count'] as number) ?? 0,
|
|
48
|
+
(data['eval_count'] as number) ?? 0,
|
|
49
|
+
)
|
|
50
|
+
return this.buildResponse(request, content, usage, (data['model'] as string) ?? request.model, started)
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
async healthCheck(): Promise<boolean> {
|
|
54
|
+
return true
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/** Factory: build an Ollama provider adapter. */
|
|
59
|
+
export function ollamaAdapter(options: OllamaAdapterOptions = {}): OllamaAdapter {
|
|
60
|
+
return new OllamaAdapter(options)
|
|
61
|
+
}
|
package/src/request.ts
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
/** GavioRequest — the canonical, provider-agnostic request model. */
|
|
2
2
|
|
|
3
3
|
import { newTraceId } from './ids.js'
|
|
4
|
-
import { coerceProvider } from './types.js'
|
|
5
|
-
import type { Message, Provider } from './types.js'
|
|
4
|
+
import { coerceProvider, PromptLineage } from './types.js'
|
|
5
|
+
import type { Message, PromptLineageInit, Provider } from './types.js'
|
|
6
6
|
|
|
7
7
|
export interface GavioRequestInit {
|
|
8
8
|
messages: Message[]
|
|
@@ -14,6 +14,7 @@ export interface GavioRequestInit {
|
|
|
14
14
|
sessionId?: string | null
|
|
15
15
|
options?: Record<string, unknown>
|
|
16
16
|
metadata?: Record<string, unknown>
|
|
17
|
+
lineage?: PromptLineage | PromptLineageInit | null
|
|
17
18
|
}
|
|
18
19
|
|
|
19
20
|
/**
|
|
@@ -31,6 +32,7 @@ export class GavioRequest {
|
|
|
31
32
|
sessionId: string | null
|
|
32
33
|
options: Record<string, unknown>
|
|
33
34
|
metadata: Record<string, unknown>
|
|
35
|
+
lineage: PromptLineage | null
|
|
34
36
|
|
|
35
37
|
constructor(init: GavioRequestInit) {
|
|
36
38
|
this.messages = init.messages
|
|
@@ -42,6 +44,7 @@ export class GavioRequest {
|
|
|
42
44
|
this.sessionId = init.sessionId ?? null
|
|
43
45
|
this.options = init.options ?? {}
|
|
44
46
|
this.metadata = init.metadata ?? {}
|
|
47
|
+
this.lineage = init.lineage != null ? PromptLineage.from(init.lineage) : null
|
|
45
48
|
}
|
|
46
49
|
|
|
47
50
|
get temperature(): number {
|
|
@@ -71,6 +74,7 @@ export class GavioRequest {
|
|
|
71
74
|
sessionId: this.sessionId,
|
|
72
75
|
options: { ...this.options },
|
|
73
76
|
metadata: { ...this.metadata },
|
|
77
|
+
lineage: this.lineage,
|
|
74
78
|
})
|
|
75
79
|
}
|
|
76
80
|
}
|