gavio 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/config.js +106 -0
- package/dist/cjs/errors.js +29 -1
- package/dist/cjs/gateway.js +88 -0
- package/dist/cjs/index.js +4 -2
- package/dist/cjs/interceptors/audit/index.js +4 -1
- package/dist/cjs/interceptors/audit/interceptor.js +11 -0
- package/dist/cjs/interceptors/audit/record.js +17 -3
- package/dist/cjs/interceptors/audit/trace.js +43 -0
- package/dist/cjs/interceptors/cache/embedding.js +53 -0
- package/dist/cjs/interceptors/cache/index.js +9 -5
- package/dist/cjs/interceptors/cache/interceptor.js +80 -0
- package/dist/cjs/interceptors/cache/vector.js +35 -0
- package/dist/cjs/interceptors/governance/budget.js +45 -0
- package/dist/cjs/interceptors/governance/index.js +10 -0
- package/dist/cjs/interceptors/governance/model-policy.js +18 -0
- package/dist/cjs/interceptors/governance/rate-limit.js +46 -0
- package/dist/cjs/interceptors/guardrails/index.js +11 -0
- package/dist/cjs/interceptors/guardrails/interceptor.js +40 -0
- package/dist/cjs/interceptors/guardrails/validator.js +8 -0
- package/dist/cjs/interceptors/guardrails/validators/regex.js +32 -0
- package/dist/cjs/interceptors/guardrails/validators/schema.js +63 -0
- package/dist/cjs/interceptors/injection.js +62 -0
- package/dist/cjs/interceptors/metrics/index.js +9 -0
- package/dist/cjs/interceptors/metrics/interceptor.js +37 -0
- package/dist/cjs/interceptors/metrics/registry.js +0 -0
- package/dist/cjs/interceptors/quality/index.js +7 -0
- package/dist/cjs/interceptors/quality/risk.js +49 -0
- package/dist/cjs/interceptors/reliability/circuit-breaker.js +82 -0
- package/dist/cjs/interceptors/reliability/index.js +8 -1
- package/dist/cjs/interceptors/reliability/load-balancer.js +38 -0
- package/dist/cjs/interceptors/reliability/stream-buffer.js +28 -0
- package/dist/cjs/pricing.js +5 -1
- package/dist/cjs/providers/azure-openai.js +56 -0
- package/dist/cjs/providers/base.js +9 -0
- package/dist/cjs/providers/gemini.js +73 -0
- package/dist/cjs/providers/index.js +22 -6
- package/dist/cjs/providers/ollama.js +41 -0
- package/dist/cjs/request.js +3 -0
- package/dist/cjs/shim/openai.js +57 -0
- package/dist/cjs/types.js +53 -1
- package/dist/esm/config.d.ts +12 -0
- package/dist/esm/config.js +102 -0
- package/dist/esm/errors.d.ts +17 -0
- package/dist/esm/errors.js +24 -0
- package/dist/esm/gateway.d.ts +18 -1
- package/dist/esm/gateway.js +55 -0
- package/dist/esm/index.d.ts +3 -3
- package/dist/esm/index.js +2 -2
- package/dist/esm/interceptors/audit/index.d.ts +2 -0
- package/dist/esm/interceptors/audit/index.js +1 -0
- package/dist/esm/interceptors/audit/interceptor.d.ts +2 -0
- package/dist/esm/interceptors/audit/interceptor.js +11 -0
- package/dist/esm/interceptors/audit/record.d.ts +4 -2
- package/dist/esm/interceptors/audit/record.js +18 -4
- package/dist/esm/interceptors/audit/trace.d.ts +19 -0
- package/dist/esm/interceptors/audit/trace.js +39 -0
- package/dist/esm/interceptors/cache/embedding.d.ts +14 -0
- package/dist/esm/interceptors/cache/embedding.js +49 -0
- package/dist/esm/interceptors/cache/index.d.ts +7 -4
- package/dist/esm/interceptors/cache/index.js +4 -4
- package/dist/esm/interceptors/cache/interceptor.d.ts +19 -0
- package/dist/esm/interceptors/cache/interceptor.js +77 -0
- package/dist/esm/interceptors/cache/vector.d.ts +9 -0
- package/dist/esm/interceptors/cache/vector.js +32 -0
- package/dist/esm/interceptors/governance/budget.d.ts +11 -0
- package/dist/esm/interceptors/governance/budget.js +42 -0
- package/dist/esm/interceptors/governance/index.d.ts +7 -0
- package/dist/esm/interceptors/governance/index.js +4 -0
- package/dist/esm/interceptors/governance/model-policy.d.ts +8 -0
- package/dist/esm/interceptors/governance/model-policy.js +15 -0
- package/dist/esm/interceptors/governance/rate-limit.d.ts +9 -0
- package/dist/esm/interceptors/governance/rate-limit.js +43 -0
- package/dist/esm/interceptors/guardrails/index.d.ts +6 -0
- package/dist/esm/interceptors/guardrails/index.js +4 -0
- package/dist/esm/interceptors/guardrails/interceptor.d.ts +15 -0
- package/dist/esm/interceptors/guardrails/interceptor.js +37 -0
- package/dist/esm/interceptors/guardrails/validator.d.ts +11 -0
- package/dist/esm/interceptors/guardrails/validator.js +3 -0
- package/dist/esm/interceptors/guardrails/validators/regex.d.ts +6 -0
- package/dist/esm/interceptors/guardrails/validators/regex.js +28 -0
- package/dist/esm/interceptors/guardrails/validators/schema.d.ts +5 -0
- package/dist/esm/interceptors/guardrails/validators/schema.js +60 -0
- package/dist/esm/interceptors/injection.d.ts +17 -0
- package/dist/esm/interceptors/injection.js +59 -0
- package/dist/esm/interceptors/metrics/index.d.ts +5 -0
- package/dist/esm/interceptors/metrics/index.js +3 -0
- package/dist/esm/interceptors/metrics/interceptor.d.ts +22 -0
- package/dist/esm/interceptors/metrics/interceptor.js +33 -0
- package/dist/esm/interceptors/metrics/registry.d.ts +31 -0
- package/dist/esm/interceptors/metrics/registry.js +0 -0
- package/dist/esm/interceptors/quality/index.d.ts +3 -0
- package/dist/esm/interceptors/quality/index.js +2 -0
- package/dist/esm/interceptors/quality/risk.d.ts +32 -0
- package/dist/esm/interceptors/quality/risk.js +44 -0
- package/dist/esm/interceptors/reliability/circuit-breaker.d.ts +15 -0
- package/dist/esm/interceptors/reliability/circuit-breaker.js +78 -0
- package/dist/esm/interceptors/reliability/index.d.ts +5 -0
- package/dist/esm/interceptors/reliability/index.js +3 -0
- package/dist/esm/interceptors/reliability/load-balancer.d.ts +8 -0
- package/dist/esm/interceptors/reliability/load-balancer.js +35 -0
- package/dist/esm/interceptors/reliability/stream-buffer.d.ts +18 -0
- package/dist/esm/interceptors/reliability/stream-buffer.js +24 -0
- package/dist/esm/pricing.js +5 -1
- package/dist/esm/providers/azure-openai.d.ts +28 -0
- package/dist/esm/providers/azure-openai.js +53 -0
- package/dist/esm/providers/base.d.ts +7 -0
- package/dist/esm/providers/base.js +9 -1
- package/dist/esm/providers/gemini.d.ts +36 -0
- package/dist/esm/providers/gemini.js +69 -0
- package/dist/esm/providers/index.d.ts +7 -1
- package/dist/esm/providers/index.js +18 -5
- package/dist/esm/providers/ollama.d.ts +21 -0
- package/dist/esm/providers/ollama.js +38 -0
- package/dist/esm/request.d.ts +4 -1
- package/dist/esm/request.js +4 -1
- package/dist/esm/shim/openai.d.ts +56 -0
- package/dist/esm/shim/openai.js +53 -0
- package/dist/esm/types.d.ts +54 -0
- package/dist/esm/types.js +50 -0
- package/package.json +41 -2
- package/src/config.ts +125 -0
- package/src/errors.ts +28 -0
- package/src/gateway.ts +62 -1
- package/src/index.ts +4 -2
- package/src/interceptors/audit/index.ts +2 -0
- package/src/interceptors/audit/interceptor.ts +13 -0
- package/src/interceptors/audit/record.ts +18 -4
- package/src/interceptors/audit/trace.ts +47 -0
- package/src/interceptors/cache/embedding.ts +53 -0
- package/src/interceptors/cache/index.ts +7 -4
- package/src/interceptors/cache/interceptor.ts +111 -0
- package/src/interceptors/cache/vector.ts +45 -0
- package/src/interceptors/governance/budget.ts +59 -0
- package/src/interceptors/governance/index.ts +8 -0
- package/src/interceptors/governance/model-policy.ts +25 -0
- package/src/interceptors/governance/rate-limit.ts +63 -0
- package/src/interceptors/guardrails/index.ts +7 -0
- package/src/interceptors/guardrails/interceptor.ts +56 -0
- package/src/interceptors/guardrails/validator.ts +14 -0
- package/src/interceptors/guardrails/validators/regex.ts +29 -0
- package/src/interceptors/guardrails/validators/schema.ts +62 -0
- package/src/interceptors/injection.ts +72 -0
- package/src/interceptors/metrics/index.ts +6 -0
- package/src/interceptors/metrics/interceptor.ts +46 -0
- package/src/interceptors/metrics/registry.ts +0 -0
- package/src/interceptors/quality/index.ts +4 -0
- package/src/interceptors/quality/risk.ts +64 -0
- package/src/interceptors/reliability/circuit-breaker.ts +102 -0
- package/src/interceptors/reliability/index.ts +5 -0
- package/src/interceptors/reliability/load-balancer.ts +56 -0
- package/src/interceptors/reliability/stream-buffer.ts +27 -0
- package/src/pricing.ts +5 -1
- package/src/providers/azure-openai.ts +77 -0
- package/src/providers/base.ts +21 -1
- package/src/providers/gemini.ts +95 -0
- package/src/providers/index.ts +21 -5
- package/src/providers/ollama.ts +61 -0
- package/src/request.ts +6 -2
- package/src/shim/openai.ts +76 -0
- package/src/types.ts +77 -0
package/dist/esm/types.js
CHANGED
|
@@ -54,3 +54,53 @@ export class TokenUsage {
|
|
|
54
54
|
};
|
|
55
55
|
}
|
|
56
56
|
}
|
|
57
|
+
/**
|
|
58
|
+
* A single retrieved source that contributed to a prompt. Carries a *reference*
|
|
59
|
+
* to the source — never the retrieved text — so prompt lineage stays within the
|
|
60
|
+
* audit record's metadata-only contract.
|
|
61
|
+
*/
|
|
62
|
+
export class RagChunk {
|
|
63
|
+
source;
|
|
64
|
+
chunkId;
|
|
65
|
+
score;
|
|
66
|
+
constructor(init) {
|
|
67
|
+
this.source = init.source;
|
|
68
|
+
this.chunkId = init.chunkId ?? null;
|
|
69
|
+
this.score = init.score ?? null;
|
|
70
|
+
}
|
|
71
|
+
toJSON() {
|
|
72
|
+
return { source: this.source, chunkId: this.chunkId, score: this.score };
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Provenance for a rendered prompt (F-OBS-04): the template, the variable
|
|
77
|
+
* bindings interpolated into it, and the RAG chunk sources retrieved for it.
|
|
78
|
+
*
|
|
79
|
+
* Attached to a GavioRequest by the caller and copied into the AuditRecord so
|
|
80
|
+
* any prompt can be reconstructed and debugged. RAG chunk text is never stored
|
|
81
|
+
* — only source references (see {@link RagChunk}).
|
|
82
|
+
*/
|
|
83
|
+
export class PromptLineage {
|
|
84
|
+
templateId;
|
|
85
|
+
templateVersion;
|
|
86
|
+
variables;
|
|
87
|
+
ragChunks;
|
|
88
|
+
constructor(init = {}) {
|
|
89
|
+
this.templateId = init.templateId ?? null;
|
|
90
|
+
this.templateVersion = init.templateVersion ?? null;
|
|
91
|
+
this.variables = init.variables ?? {};
|
|
92
|
+
this.ragChunks = (init.ragChunks ?? []).map((c) => c instanceof RagChunk ? c : new RagChunk(c));
|
|
93
|
+
}
|
|
94
|
+
/** Coerce a PromptLineage instance or plain init object into a PromptLineage. */
|
|
95
|
+
static from(value) {
|
|
96
|
+
return value instanceof PromptLineage ? value : new PromptLineage(value);
|
|
97
|
+
}
|
|
98
|
+
toJSON() {
|
|
99
|
+
return {
|
|
100
|
+
templateId: this.templateId,
|
|
101
|
+
templateVersion: this.templateVersion,
|
|
102
|
+
variables: this.variables,
|
|
103
|
+
ragChunks: this.ragChunks.map((c) => c.toJSON()),
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "gavio",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.0",
|
|
4
4
|
"description": "The open standard AI gateway for production systems — PII guarding, audit, reliability, and cost tracking.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"repository": {
|
|
@@ -65,6 +65,36 @@
|
|
|
65
65
|
"import": "./dist/esm/interceptors/reliability/index.js",
|
|
66
66
|
"require": "./dist/cjs/interceptors/reliability/index.js"
|
|
67
67
|
},
|
|
68
|
+
"./interceptors/governance": {
|
|
69
|
+
"types": "./dist/esm/interceptors/governance/index.d.ts",
|
|
70
|
+
"import": "./dist/esm/interceptors/governance/index.js",
|
|
71
|
+
"require": "./dist/cjs/interceptors/governance/index.js"
|
|
72
|
+
},
|
|
73
|
+
"./interceptors/guardrails": {
|
|
74
|
+
"types": "./dist/esm/interceptors/guardrails/index.d.ts",
|
|
75
|
+
"import": "./dist/esm/interceptors/guardrails/index.js",
|
|
76
|
+
"require": "./dist/cjs/interceptors/guardrails/index.js"
|
|
77
|
+
},
|
|
78
|
+
"./interceptors/injection": {
|
|
79
|
+
"types": "./dist/esm/interceptors/injection.d.ts",
|
|
80
|
+
"import": "./dist/esm/interceptors/injection.js",
|
|
81
|
+
"require": "./dist/cjs/interceptors/injection.js"
|
|
82
|
+
},
|
|
83
|
+
"./interceptors/metrics": {
|
|
84
|
+
"types": "./dist/esm/interceptors/metrics/index.d.ts",
|
|
85
|
+
"import": "./dist/esm/interceptors/metrics/index.js",
|
|
86
|
+
"require": "./dist/cjs/interceptors/metrics/index.js"
|
|
87
|
+
},
|
|
88
|
+
"./interceptors/quality": {
|
|
89
|
+
"types": "./dist/esm/interceptors/quality/index.d.ts",
|
|
90
|
+
"import": "./dist/esm/interceptors/quality/index.js",
|
|
91
|
+
"require": "./dist/cjs/interceptors/quality/index.js"
|
|
92
|
+
},
|
|
93
|
+
"./shim/openai": {
|
|
94
|
+
"types": "./dist/esm/shim/openai.d.ts",
|
|
95
|
+
"import": "./dist/esm/shim/openai.js",
|
|
96
|
+
"require": "./dist/cjs/shim/openai.js"
|
|
97
|
+
},
|
|
68
98
|
"./providers": {
|
|
69
99
|
"types": "./dist/esm/providers/index.d.ts",
|
|
70
100
|
"import": "./dist/esm/providers/index.js",
|
|
@@ -99,11 +129,20 @@
|
|
|
99
129
|
},
|
|
100
130
|
"keywords": [
|
|
101
131
|
"ai",
|
|
102
|
-
"gateway",
|
|
132
|
+
"ai-gateway",
|
|
103
133
|
"llm",
|
|
134
|
+
"llm-gateway",
|
|
135
|
+
"llmops",
|
|
136
|
+
"gateway",
|
|
104
137
|
"pii",
|
|
138
|
+
"pii-redaction",
|
|
105
139
|
"audit",
|
|
140
|
+
"observability",
|
|
106
141
|
"reliability",
|
|
142
|
+
"retry",
|
|
143
|
+
"fallback",
|
|
144
|
+
"cost-tracking",
|
|
145
|
+
"interceptor",
|
|
107
146
|
"openai",
|
|
108
147
|
"anthropic"
|
|
109
148
|
],
|
package/src/config.ts
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Config loader (F-DX-05) — build a Gateway from an object or a JSON file.
|
|
3
|
+
*
|
|
4
|
+
* const gw = await Gateway.fromConfig('gateway.json')
|
|
5
|
+
*
|
|
6
|
+
* JSON is supported out of the box; string values expand ${ENV_VAR}.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { readFileSync } from 'node:fs'
|
|
10
|
+
import { ConfigurationError } from './errors.js'
|
|
11
|
+
import { Gateway } from './gateway.js'
|
|
12
|
+
import { auditInterceptor } from './interceptors/audit/index.js'
|
|
13
|
+
import { hashingEmbedder, semanticCache } from './interceptors/cache/index.js'
|
|
14
|
+
import { costControl, modelPolicy, rateLimiter } from './interceptors/governance/index.js'
|
|
15
|
+
import { promptInjectionGuard } from './interceptors/injection.js'
|
|
16
|
+
import { piiGuard } from './interceptors/pii/index.js'
|
|
17
|
+
import { retryInterceptor, timeoutPolicy } from './interceptors/reliability/index.js'
|
|
18
|
+
|
|
19
|
+
type Cfg = Record<string, unknown>
|
|
20
|
+
|
|
21
|
+
export function loadConfig(path: string): Cfg {
|
|
22
|
+
const text = readFileSync(path, 'utf8')
|
|
23
|
+
if (!path.endsWith('.json')) {
|
|
24
|
+
throw new ConfigurationError('JS config loader supports JSON only (use .json)')
|
|
25
|
+
}
|
|
26
|
+
return expand(JSON.parse(text)) as Cfg
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function expand(obj: unknown): unknown {
|
|
30
|
+
if (Array.isArray(obj)) return obj.map(expand)
|
|
31
|
+
if (obj && typeof obj === 'object') {
|
|
32
|
+
return Object.fromEntries(Object.entries(obj).map(([k, v]) => [k, expand(v)]))
|
|
33
|
+
}
|
|
34
|
+
if (typeof obj === 'string') {
|
|
35
|
+
return obj.replace(/\$\{(\w+)\}/g, (_, v: string) => process.env[v] ?? '')
|
|
36
|
+
}
|
|
37
|
+
return obj
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export function buildFromConfig(config: Cfg): Gateway {
|
|
41
|
+
const gatewayOptions: Cfg = {}
|
|
42
|
+
if (config['provider']) gatewayOptions['provider'] = config['provider']
|
|
43
|
+
if (config['model']) gatewayOptions['model'] = config['model']
|
|
44
|
+
if (config['devMode'] ?? config['dev_mode']) gatewayOptions['devMode'] = true
|
|
45
|
+
if (config['dryRun'] ?? config['dry_run']) gatewayOptions['dryRun'] = true
|
|
46
|
+
|
|
47
|
+
let gw = new Gateway(gatewayOptions)
|
|
48
|
+
const ic = (config['interceptors'] as Record<string, Cfg> | undefined) ?? {}
|
|
49
|
+
|
|
50
|
+
const cfg = (name: string): Cfg | null => {
|
|
51
|
+
const entry = ic[name]
|
|
52
|
+
return entry && entry['enabled'] !== false ? entry : null
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
let c: Cfg | null
|
|
56
|
+
if ((c = cfg('audit'))) {
|
|
57
|
+
gw = gw.use(
|
|
58
|
+
auditInterceptor({
|
|
59
|
+
sink: (c['sink'] as 'stdout') ?? 'stdout',
|
|
60
|
+
hashChain: Boolean(c['hashChain'] ?? c['hash_chain']),
|
|
61
|
+
}),
|
|
62
|
+
)
|
|
63
|
+
}
|
|
64
|
+
if ((c = cfg('prompt_injection'))) {
|
|
65
|
+
gw = gw.use(promptInjectionGuard({ action: (c['action'] as 'block' | 'flag') ?? 'block' }))
|
|
66
|
+
}
|
|
67
|
+
if ((c = cfg('pii_guard'))) {
|
|
68
|
+
gw = gw.use(
|
|
69
|
+
piiGuard({
|
|
70
|
+
sensitivity: (c['sensitivity'] as never) ?? 'strict',
|
|
71
|
+
mode: (c['mode'] as never) ?? 'redact',
|
|
72
|
+
}),
|
|
73
|
+
)
|
|
74
|
+
}
|
|
75
|
+
if ((c = cfg('cost_control'))) {
|
|
76
|
+
gw = gw.use(
|
|
77
|
+
costControl({
|
|
78
|
+
hardCapUsd: Number(c['hardCapUsd'] ?? c['hard_cap_usd']),
|
|
79
|
+
softCapUsd: (c['softCapUsd'] ?? c['soft_cap_usd']) as number | undefined,
|
|
80
|
+
scope: (c['scope'] as never) ?? 'global',
|
|
81
|
+
window: (c['window'] as never) ?? 'day',
|
|
82
|
+
}),
|
|
83
|
+
)
|
|
84
|
+
}
|
|
85
|
+
if ((c = cfg('rate_limiter'))) {
|
|
86
|
+
gw = gw.use(
|
|
87
|
+
rateLimiter({
|
|
88
|
+
maxRequestsPerMinute: (c['maxRequestsPerMinute'] ?? c['max_requests_per_minute']) as
|
|
89
|
+
| number
|
|
90
|
+
| undefined,
|
|
91
|
+
maxTokensPerMinute: (c['maxTokensPerMinute'] ?? c['max_tokens_per_minute']) as
|
|
92
|
+
| number
|
|
93
|
+
| undefined,
|
|
94
|
+
scope: (c['scope'] as never) ?? 'global',
|
|
95
|
+
}),
|
|
96
|
+
)
|
|
97
|
+
}
|
|
98
|
+
if ((c = cfg('model_policy'))) {
|
|
99
|
+
gw = gw.use(modelPolicy({ roles: (c['roles'] as Record<string, string[]>) ?? {} }))
|
|
100
|
+
}
|
|
101
|
+
if ((c = cfg('semantic_cache'))) {
|
|
102
|
+
const embedder = (c['enableSemantic'] ?? c['enable_semantic']) ? hashingEmbedder() : undefined
|
|
103
|
+
gw = gw.use(
|
|
104
|
+
semanticCache({
|
|
105
|
+
embedder,
|
|
106
|
+
similarityThreshold: Number(c['similarityThreshold'] ?? c['similarity_threshold'] ?? 0.95),
|
|
107
|
+
}),
|
|
108
|
+
)
|
|
109
|
+
}
|
|
110
|
+
if ((c = cfg('timeout'))) {
|
|
111
|
+
gw = gw.use(
|
|
112
|
+
timeoutPolicy({ timeoutSeconds: Number(c['timeoutSeconds'] ?? c['timeout_seconds'] ?? 30) }),
|
|
113
|
+
)
|
|
114
|
+
}
|
|
115
|
+
if ((c = cfg('retry'))) {
|
|
116
|
+
gw = gw.use(
|
|
117
|
+
retryInterceptor({
|
|
118
|
+
maxAttempts: Number(c['maxAttempts'] ?? c['max_attempts'] ?? 3),
|
|
119
|
+
baseDelayMs: Number(c['baseDelayMs'] ?? c['base_delay_ms'] ?? 500),
|
|
120
|
+
}),
|
|
121
|
+
)
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
return gw
|
|
125
|
+
}
|
package/src/errors.ts
CHANGED
|
@@ -43,5 +43,33 @@ export class PiiBlockedError extends GavioError {
|
|
|
43
43
|
/** A hard budget cap was exceeded. Never swallow this — surface to user. */
|
|
44
44
|
export class BudgetExceededError extends GavioError {}
|
|
45
45
|
|
|
46
|
+
/** The circuit breaker is open; the call was rejected without hitting the provider. */
|
|
47
|
+
export class CircuitOpenError extends ProviderUnavailableError {}
|
|
48
|
+
|
|
49
|
+
/** A local rate limit (requests/tokens per minute) was exceeded. */
|
|
50
|
+
export class RateLimitExceededError extends GavioError {}
|
|
51
|
+
|
|
52
|
+
/** The caller's role is not permitted to use the requested model (RBAC). */
|
|
53
|
+
export class ModelNotAllowedError extends GavioError {
|
|
54
|
+
readonly role: string
|
|
55
|
+
readonly model: string
|
|
56
|
+
|
|
57
|
+
constructor(role: string, model: string) {
|
|
58
|
+
super(`role ${JSON.stringify(role)} may not use model ${JSON.stringify(model)}`)
|
|
59
|
+
this.role = role
|
|
60
|
+
this.model = model
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
46
64
|
/** Output failed a guardrail validator with onFailure='error'. */
|
|
47
65
|
export class GuardrailViolationError extends GavioError {}
|
|
66
|
+
|
|
67
|
+
/** A prompt-injection attempt was detected and the guard is in block mode. */
|
|
68
|
+
export class PromptInjectionError extends GavioError {
|
|
69
|
+
readonly patterns: string[]
|
|
70
|
+
|
|
71
|
+
constructor(patterns: string[]) {
|
|
72
|
+
super(`prompt injection detected: ${patterns.join(', ')}`)
|
|
73
|
+
this.patterns = patterns
|
|
74
|
+
}
|
|
75
|
+
}
|
package/src/gateway.ts
CHANGED
|
@@ -6,6 +6,7 @@ import { auditInterceptor, isAuditInterceptor } from './interceptors/audit/index
|
|
|
6
6
|
import { isExecutorPolicy } from './interceptors/base.js'
|
|
7
7
|
import type { Executor, ExecutorPolicy, Interceptor } from './interceptors/base.js'
|
|
8
8
|
import { InterceptorChain } from './interceptors/chain.js'
|
|
9
|
+
import { StreamBuffer } from './interceptors/reliability/stream-buffer.js'
|
|
9
10
|
import { PricingProvider } from './pricing.js'
|
|
10
11
|
import { buildAdapter } from './providers/index.js'
|
|
11
12
|
import type { ProviderAdapter } from './providers/base.js'
|
|
@@ -13,7 +14,7 @@ import { mockProvider } from './providers/mock.js'
|
|
|
13
14
|
import { GavioRequest } from './request.js'
|
|
14
15
|
import type { GavioResponse } from './response.js'
|
|
15
16
|
import { Provider, coerceProvider } from './types.js'
|
|
16
|
-
import type { Message } from './types.js'
|
|
17
|
+
import type { Message, PromptLineage, PromptLineageInit } from './types.js'
|
|
17
18
|
|
|
18
19
|
export interface GatewayOptions {
|
|
19
20
|
provider?: Provider | string
|
|
@@ -33,6 +34,8 @@ export interface CompleteOptions {
|
|
|
33
34
|
metadata?: Record<string, unknown>
|
|
34
35
|
/** Provider sampling options (temperature, maxTokens, etc.). */
|
|
35
36
|
options?: Record<string, unknown>
|
|
37
|
+
/** Prompt provenance (F-OBS-04): template, variables, and RAG chunk sources. */
|
|
38
|
+
lineage?: PromptLineage | PromptLineageInit | null
|
|
36
39
|
}
|
|
37
40
|
|
|
38
41
|
const DEFAULT_MODELS: Record<string, string> = {
|
|
@@ -66,6 +69,16 @@ export class Gateway {
|
|
|
66
69
|
this.pricing = options.pricing ?? new PricingProvider()
|
|
67
70
|
}
|
|
68
71
|
|
|
72
|
+
/**
|
|
73
|
+
* Build a Gateway from a config object or a JSON file path (F-DX-05).
|
|
74
|
+
* Async so the config module loads lazily (avoids a circular import).
|
|
75
|
+
*/
|
|
76
|
+
static async fromConfig(config: string | Record<string, unknown>): Promise<Gateway> {
|
|
77
|
+
const mod = await import('./config.js')
|
|
78
|
+
const data = typeof config === 'string' ? mod.loadConfig(config) : config
|
|
79
|
+
return mod.buildFromConfig(data)
|
|
80
|
+
}
|
|
81
|
+
|
|
69
82
|
/** Register an interceptor or executor policy. First-registered = outermost. */
|
|
70
83
|
use(interceptor: Interceptor): this {
|
|
71
84
|
this.interceptors.push(interceptor)
|
|
@@ -99,6 +112,7 @@ export class Gateway {
|
|
|
99
112
|
sessionId: opts.sessionId ?? null,
|
|
100
113
|
options: opts.options ?? {},
|
|
101
114
|
metadata: opts.metadata ?? {},
|
|
115
|
+
lineage: opts.lineage ?? null,
|
|
102
116
|
})
|
|
103
117
|
const ctx = new InterceptorContext({
|
|
104
118
|
traceId: request.traceId,
|
|
@@ -112,6 +126,53 @@ export class Gateway {
|
|
|
112
126
|
return chain.execute(request, ctx, executor)
|
|
113
127
|
}
|
|
114
128
|
|
|
129
|
+
/**
|
|
130
|
+
* Stream a completion, buffering the provider stream (F-REL-06).
|
|
131
|
+
*
|
|
132
|
+
* The provider stream is buffered in full so the post-interceptor pipeline
|
|
133
|
+
* (guardrails, PII restore, audit) runs on the complete response before any
|
|
134
|
+
* chunk reaches the caller. Pre/post interceptors run via the chain; executor
|
|
135
|
+
* policies (retry, circuit breaker, cache) are not applied to the streaming
|
|
136
|
+
* path.
|
|
137
|
+
*/
|
|
138
|
+
async *stream(opts: CompleteOptions): AsyncGenerator<string> {
|
|
139
|
+
const adapter = this.resolveAdapter()
|
|
140
|
+
if (adapter.stream === undefined || adapter.buildStreamResponse === undefined) {
|
|
141
|
+
throw new ConfigurationError(`${adapter.providerName} does not support streaming`)
|
|
142
|
+
}
|
|
143
|
+
const model = opts.model ?? this.modelHint ?? this.resolveModel(adapter)
|
|
144
|
+
|
|
145
|
+
const request = new GavioRequest({
|
|
146
|
+
messages: opts.messages,
|
|
147
|
+
model,
|
|
148
|
+
provider: coerceProvider(adapter.providerName),
|
|
149
|
+
agentId: opts.agentId ?? null,
|
|
150
|
+
parentTraceId: opts.parentTraceId ?? null,
|
|
151
|
+
sessionId: opts.sessionId ?? null,
|
|
152
|
+
options: opts.options ?? {},
|
|
153
|
+
metadata: opts.metadata ?? {},
|
|
154
|
+
})
|
|
155
|
+
const ctx = new InterceptorContext({
|
|
156
|
+
traceId: request.traceId,
|
|
157
|
+
agentId: request.agentId,
|
|
158
|
+
parentTraceId: request.parentTraceId,
|
|
159
|
+
sessionId: request.sessionId,
|
|
160
|
+
dryRun: this.dryRunMode,
|
|
161
|
+
})
|
|
162
|
+
|
|
163
|
+
const startedAt = performance.now()
|
|
164
|
+
const buffer = new StreamBuffer()
|
|
165
|
+
const { chain } = this.buildPipeline(adapter, ctx)
|
|
166
|
+
const bufferingExecutor: Executor = async (req) => {
|
|
167
|
+
for await (const chunk of adapter.stream!(req)) buffer.append(chunk)
|
|
168
|
+
return adapter.buildStreamResponse!(req, buffer.text(), startedAt)
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
const response = await chain.execute(request, ctx, bufferingExecutor)
|
|
172
|
+
// Post-interceptors have run on the fully buffered response; emit it now.
|
|
173
|
+
yield response.content
|
|
174
|
+
}
|
|
175
|
+
|
|
115
176
|
async healthCheck(): Promise<boolean> {
|
|
116
177
|
return this.resolveAdapter().healthCheck()
|
|
117
178
|
}
|
package/src/index.ts
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
* See https://gavio.io for documentation. MIT licensed.
|
|
9
9
|
*/
|
|
10
10
|
|
|
11
|
-
export const VERSION = '0.
|
|
11
|
+
export const VERSION = '0.3.0'
|
|
12
12
|
|
|
13
13
|
export { Gateway } from './gateway.js'
|
|
14
14
|
export type { GatewayOptions, CompleteOptions } from './gateway.js'
|
|
@@ -30,9 +30,11 @@ export {
|
|
|
30
30
|
Sensitivity,
|
|
31
31
|
GuardrailOutcome,
|
|
32
32
|
TokenUsage,
|
|
33
|
+
PromptLineage,
|
|
34
|
+
RagChunk,
|
|
33
35
|
coerceProvider,
|
|
34
36
|
} from './types.js'
|
|
35
|
-
export type { Message } from './types.js'
|
|
37
|
+
export type { Message, PromptLineageInit, RagChunkInit } from './types.js'
|
|
36
38
|
|
|
37
39
|
export type { Interceptor, Executor, ExecutorPolicy } from './interceptors/base.js'
|
|
38
40
|
export { InterceptorChain } from './interceptors/chain.js'
|
|
@@ -5,3 +5,5 @@ export type { AuditRecordInit } from './record.js'
|
|
|
5
5
|
export type { AuditSink } from './sink.js'
|
|
6
6
|
export { stdoutSink } from './sinks/stdout.js'
|
|
7
7
|
export type { StdoutSinkOptions } from './sinks/stdout.js'
|
|
8
|
+
export { verifyChain, buildCallGraph } from './trace.js'
|
|
9
|
+
export type { TraceNode } from './trace.js'
|
|
@@ -3,17 +3,21 @@
|
|
|
3
3
|
import type { InterceptorContext } from '../../context.js'
|
|
4
4
|
import type { GavioRequest } from '../../request.js'
|
|
5
5
|
import type { GavioResponse } from '../../response.js'
|
|
6
|
+
import type { PromptLineage } from '../../types.js'
|
|
6
7
|
import type { Interceptor } from '../base.js'
|
|
7
8
|
import { AuditRecord } from './record.js'
|
|
8
9
|
import type { AuditSink } from './sink.js'
|
|
9
10
|
import { stdoutSink } from './sinks/stdout.js'
|
|
10
11
|
|
|
11
12
|
const PROMPT_HASH_KEY = 'audit_prompt_hash'
|
|
13
|
+
const LINEAGE_KEY = 'audit_lineage'
|
|
12
14
|
|
|
13
15
|
export const AUDIT_NAME = 'audit'
|
|
14
16
|
|
|
15
17
|
export interface AuditInterceptorOptions {
|
|
16
18
|
sink?: AuditSink | 'stdout'
|
|
19
|
+
/** F-OBS-02: link each record via previousHash into a tamper-evident chain. */
|
|
20
|
+
hashChain?: boolean
|
|
17
21
|
}
|
|
18
22
|
|
|
19
23
|
/**
|
|
@@ -29,13 +33,17 @@ class AuditInterceptor implements Interceptor {
|
|
|
29
33
|
readonly dryRunSafe = true // auditing is observation-only, so it always runs
|
|
30
34
|
|
|
31
35
|
private readonly sink: AuditSink
|
|
36
|
+
private readonly hashChain: boolean
|
|
37
|
+
private lastHash = ''
|
|
32
38
|
|
|
33
39
|
constructor(options: AuditInterceptorOptions = {}) {
|
|
34
40
|
this.sink = resolveSink(options.sink)
|
|
41
|
+
this.hashChain = options.hashChain ?? false
|
|
35
42
|
}
|
|
36
43
|
|
|
37
44
|
async before(request: GavioRequest, ctx: InterceptorContext): Promise<GavioRequest> {
|
|
38
45
|
ctx.state[PROMPT_HASH_KEY] = AuditRecord.hashText(request.promptText())
|
|
46
|
+
if (request.lineage != null) ctx.state[LINEAGE_KEY] = request.lineage
|
|
39
47
|
return request
|
|
40
48
|
}
|
|
41
49
|
|
|
@@ -64,7 +72,12 @@ class AuditInterceptor implements Interceptor {
|
|
|
64
72
|
cacheType: response.cacheType,
|
|
65
73
|
guardrailOutcome: ctx.guardrailOutcome,
|
|
66
74
|
riskScore: ctx.riskScore,
|
|
75
|
+
lineage: (ctx.state[LINEAGE_KEY] as PromptLineage | undefined) ?? null,
|
|
67
76
|
})
|
|
77
|
+
if (this.hashChain) {
|
|
78
|
+
record.previousHash = this.lastHash
|
|
79
|
+
this.lastHash = record.contentHash()
|
|
80
|
+
}
|
|
68
81
|
response.audit = record
|
|
69
82
|
try {
|
|
70
83
|
await this.sink.write(record)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/** AuditRecord — the immutable, per-request audit entry. */
|
|
2
2
|
|
|
3
3
|
import { createHash } from 'node:crypto'
|
|
4
|
-
import { TokenUsage } from '../../types.js'
|
|
4
|
+
import { PromptLineage, TokenUsage } from '../../types.js'
|
|
5
5
|
|
|
6
6
|
export const SCHEMA_VERSION = '1.0'
|
|
7
7
|
|
|
@@ -9,6 +9,17 @@ function sha256(text: string): string {
|
|
|
9
9
|
return createHash('sha256').update(text, 'utf-8').digest('hex')
|
|
10
10
|
}
|
|
11
11
|
|
|
12
|
+
/** Deterministic JSON with keys sorted at every nesting level. */
|
|
13
|
+
function stableStringify(value: unknown): string {
|
|
14
|
+
if (value === null || typeof value !== 'object') return JSON.stringify(value) ?? 'null'
|
|
15
|
+
if (Array.isArray(value)) return `[${value.map(stableStringify).join(',')}]`
|
|
16
|
+
const obj = value as Record<string, unknown>
|
|
17
|
+
const parts = Object.keys(obj)
|
|
18
|
+
.sort()
|
|
19
|
+
.map((k) => `${JSON.stringify(k)}:${stableStringify(obj[k])}`)
|
|
20
|
+
return `{${parts.join(',')}}`
|
|
21
|
+
}
|
|
22
|
+
|
|
12
23
|
export interface AuditRecordInit {
|
|
13
24
|
traceId: string
|
|
14
25
|
provider: string
|
|
@@ -30,6 +41,7 @@ export interface AuditRecordInit {
|
|
|
30
41
|
cacheType?: string | null
|
|
31
42
|
guardrailOutcome?: string | null
|
|
32
43
|
riskScore?: number | null
|
|
44
|
+
lineage?: PromptLineage | null
|
|
33
45
|
previousHash?: string
|
|
34
46
|
schemaVersion?: string
|
|
35
47
|
}
|
|
@@ -62,6 +74,7 @@ export class AuditRecord {
|
|
|
62
74
|
cacheType: string | null
|
|
63
75
|
guardrailOutcome: string | null
|
|
64
76
|
riskScore: number | null
|
|
77
|
+
lineage: PromptLineage | null
|
|
65
78
|
previousHash: string
|
|
66
79
|
schemaVersion: string
|
|
67
80
|
|
|
@@ -86,6 +99,7 @@ export class AuditRecord {
|
|
|
86
99
|
this.cacheType = init.cacheType ?? null
|
|
87
100
|
this.guardrailOutcome = init.guardrailOutcome ?? null
|
|
88
101
|
this.riskScore = init.riskScore ?? null
|
|
102
|
+
this.lineage = init.lineage ?? null
|
|
89
103
|
this.previousHash = init.previousHash ?? ''
|
|
90
104
|
this.schemaVersion = init.schemaVersion ?? SCHEMA_VERSION
|
|
91
105
|
}
|
|
@@ -120,15 +134,15 @@ export class AuditRecord {
|
|
|
120
134
|
cacheType: this.cacheType,
|
|
121
135
|
guardrailOutcome: this.guardrailOutcome,
|
|
122
136
|
riskScore: this.riskScore,
|
|
137
|
+
lineage: this.lineage ? this.lineage.toJSON() : null,
|
|
123
138
|
previousHash: this.previousHash,
|
|
124
139
|
schemaVersion: this.schemaVersion,
|
|
125
140
|
}
|
|
126
141
|
}
|
|
127
142
|
|
|
128
|
-
/** Stable JSON with sorted keys — used for the v0.2.0 hash chain. */
|
|
143
|
+
/** Stable JSON with recursively sorted keys — used for the v0.2.0 hash chain. */
|
|
129
144
|
toCanonicalJson(): string {
|
|
130
|
-
|
|
131
|
-
return JSON.stringify(data, Object.keys(data).sort())
|
|
145
|
+
return stableStringify(this.toJSON())
|
|
132
146
|
}
|
|
133
147
|
|
|
134
148
|
/** Hash of this record's content — used to build the v0.2.0 chain. */
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/** Audit-chain verification (F-OBS-02) and multi-agent DAG trace (F-OBS-03). */
|
|
2
|
+
|
|
3
|
+
import type { AuditRecord } from './record.js'
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Return true if the records form an intact hash chain. Each record's
|
|
7
|
+
* previousHash must equal the content hash of the record before it; the first
|
|
8
|
+
* must be empty. Any edit, reorder, or deletion breaks the chain.
|
|
9
|
+
*/
|
|
10
|
+
export function verifyChain(records: AuditRecord[]): boolean {
|
|
11
|
+
let prevHash = ''
|
|
12
|
+
for (const rec of records) {
|
|
13
|
+
if (rec.previousHash !== prevHash) return false
|
|
14
|
+
prevHash = rec.contentHash()
|
|
15
|
+
}
|
|
16
|
+
return true
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface TraceNode {
|
|
20
|
+
traceId: string
|
|
21
|
+
agentId: string | null
|
|
22
|
+
parentTraceId: string | null
|
|
23
|
+
children: TraceNode[]
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Reconstruct the multi-agent DAG from audit records using parentTraceId +
|
|
28
|
+
* traceId. Returns the root nodes (those with no known parent).
|
|
29
|
+
*/
|
|
30
|
+
export function buildCallGraph(records: AuditRecord[]): TraceNode[] {
|
|
31
|
+
const nodes = new Map<string, TraceNode>()
|
|
32
|
+
for (const rec of records) {
|
|
33
|
+
nodes.set(rec.traceId, {
|
|
34
|
+
traceId: rec.traceId,
|
|
35
|
+
agentId: rec.agentId,
|
|
36
|
+
parentTraceId: rec.parentTraceId,
|
|
37
|
+
children: [],
|
|
38
|
+
})
|
|
39
|
+
}
|
|
40
|
+
const roots: TraceNode[] = []
|
|
41
|
+
for (const node of nodes.values()) {
|
|
42
|
+
const parent = node.parentTraceId ? nodes.get(node.parentTraceId) : undefined
|
|
43
|
+
if (parent) parent.children.push(node)
|
|
44
|
+
else roots.push(node)
|
|
45
|
+
}
|
|
46
|
+
return roots
|
|
47
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embeddings for the semantic cache (F-CACHE-02).
|
|
3
|
+
*
|
|
4
|
+
* Zero-dependency hashed bag-of-words embedder (L2-normalised) — good enough to
|
|
5
|
+
* dedup near-identical prompts. Plug in a real embedder implementing `Embedder`
|
|
6
|
+
* for production semantic matching.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { createHash } from 'node:crypto'
|
|
10
|
+
|
|
11
|
+
export interface Embedder {
|
|
12
|
+
embed(text: string): number[]
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
const TOKEN = /[a-z0-9]+/g
|
|
16
|
+
|
|
17
|
+
/** Deterministic hashed bag-of-words embedder. */
|
|
18
|
+
export function hashingEmbedder(dim = 256): Embedder {
|
|
19
|
+
return {
|
|
20
|
+
embed(text: string): number[] {
|
|
21
|
+
const vec = new Array<number>(dim).fill(0)
|
|
22
|
+
const tokens = text.toLowerCase().match(TOKEN) ?? []
|
|
23
|
+
for (const token of tokens) {
|
|
24
|
+
// Parity note: Python uses blake2b(digest_size=8); here we take the
|
|
25
|
+
// first 8 bytes of blake2b512. Both are deterministic; the JS cache is
|
|
26
|
+
// per-process so cross-language byte-parity is not required.
|
|
27
|
+
const digest = createHash('blake2b512').update(token).digest()
|
|
28
|
+
let n = 0n
|
|
29
|
+
for (let i = 0; i < 8; i++) n = (n << 8n) | BigInt(digest[i]!)
|
|
30
|
+
const bucket = Number(n % BigInt(dim))
|
|
31
|
+
vec[bucket]! += 1
|
|
32
|
+
}
|
|
33
|
+
const norm = Math.sqrt(vec.reduce((s, x) => s + x * x, 0))
|
|
34
|
+
if (norm === 0) return vec
|
|
35
|
+
return vec.map((x) => x / norm)
|
|
36
|
+
},
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/** Cosine similarity; safe for zero vectors. */
|
|
41
|
+
export function cosineSimilarity(a: number[], b: number[]): number {
|
|
42
|
+
if (a.length !== b.length) throw new Error('vectors must have equal length')
|
|
43
|
+
let dot = 0
|
|
44
|
+
let na = 0
|
|
45
|
+
let nb = 0
|
|
46
|
+
for (let i = 0; i < a.length; i++) {
|
|
47
|
+
dot += a[i]! * b[i]!
|
|
48
|
+
na += a[i]! * a[i]!
|
|
49
|
+
nb += b[i]! * b[i]!
|
|
50
|
+
}
|
|
51
|
+
if (na === 0 || nb === 0) return 0
|
|
52
|
+
return dot / (Math.sqrt(na) * Math.sqrt(nb))
|
|
53
|
+
}
|
|
@@ -1,8 +1,11 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Caching substrate. The SemanticCache interceptor ships in v0.2.0; v0.1.0
|
|
3
|
-
* exposes the CacheBackend interface and the in-memory backend only.
|
|
4
|
-
*/
|
|
1
|
+
/** Caching (F-CACHE-01 exact, F-CACHE-02 semantic, F-CACHE-03 in-memory). */
|
|
5
2
|
|
|
6
3
|
export type { CacheBackend } from './backend.js'
|
|
7
4
|
export { memoryCacheBackend } from './backends/memory.js'
|
|
8
5
|
export type { MemoryCacheBackendOptions } from './backends/memory.js'
|
|
6
|
+
export { semanticCache } from './interceptor.js'
|
|
7
|
+
export type { SemanticCacheOptions } from './interceptor.js'
|
|
8
|
+
export { hashingEmbedder, cosineSimilarity } from './embedding.js'
|
|
9
|
+
export type { Embedder } from './embedding.js'
|
|
10
|
+
export { inMemoryVectorBackend } from './vector.js'
|
|
11
|
+
export type { VectorBackend } from './vector.js'
|