gavio 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/gateway.js +46 -0
- package/dist/cjs/index.js +4 -2
- package/dist/cjs/interceptors/audit/interceptor.js +4 -0
- package/dist/cjs/interceptors/audit/record.js +17 -3
- package/dist/cjs/interceptors/metrics/index.js +9 -0
- package/dist/cjs/interceptors/metrics/interceptor.js +37 -0
- package/dist/cjs/interceptors/metrics/registry.js +0 -0
- package/dist/cjs/interceptors/quality/index.js +7 -0
- package/dist/cjs/interceptors/quality/risk.js +49 -0
- package/dist/cjs/interceptors/reliability/index.js +3 -1
- package/dist/cjs/interceptors/reliability/stream-buffer.js +28 -0
- package/dist/cjs/providers/base.js +9 -0
- package/dist/cjs/request.js +3 -0
- package/dist/cjs/types.js +53 -1
- package/dist/esm/gateway.d.ts +13 -1
- package/dist/esm/gateway.js +46 -0
- package/dist/esm/index.d.ts +3 -3
- package/dist/esm/index.js +2 -2
- package/dist/esm/interceptors/audit/interceptor.js +4 -0
- package/dist/esm/interceptors/audit/record.d.ts +4 -2
- package/dist/esm/interceptors/audit/record.js +18 -4
- package/dist/esm/interceptors/metrics/index.d.ts +5 -0
- package/dist/esm/interceptors/metrics/index.js +3 -0
- package/dist/esm/interceptors/metrics/interceptor.d.ts +22 -0
- package/dist/esm/interceptors/metrics/interceptor.js +33 -0
- package/dist/esm/interceptors/metrics/registry.d.ts +31 -0
- package/dist/esm/interceptors/metrics/registry.js +0 -0
- package/dist/esm/interceptors/quality/index.d.ts +3 -0
- package/dist/esm/interceptors/quality/index.js +2 -0
- package/dist/esm/interceptors/quality/risk.d.ts +32 -0
- package/dist/esm/interceptors/quality/risk.js +44 -0
- package/dist/esm/interceptors/reliability/index.d.ts +1 -0
- package/dist/esm/interceptors/reliability/index.js +1 -0
- package/dist/esm/interceptors/reliability/stream-buffer.d.ts +18 -0
- package/dist/esm/interceptors/reliability/stream-buffer.js +24 -0
- package/dist/esm/providers/base.d.ts +7 -0
- package/dist/esm/providers/base.js +9 -1
- package/dist/esm/request.d.ts +4 -1
- package/dist/esm/request.js +4 -1
- package/dist/esm/types.d.ts +54 -0
- package/dist/esm/types.js +50 -0
- package/package.json +11 -1
- package/src/gateway.ts +52 -1
- package/src/index.ts +4 -2
- package/src/interceptors/audit/interceptor.ts +4 -0
- package/src/interceptors/audit/record.ts +18 -4
- package/src/interceptors/metrics/index.ts +6 -0
- package/src/interceptors/metrics/interceptor.ts +46 -0
- package/src/interceptors/metrics/registry.ts +0 -0
- package/src/interceptors/quality/index.ts +4 -0
- package/src/interceptors/quality/risk.ts +64 -0
- package/src/interceptors/reliability/index.ts +1 -0
- package/src/interceptors/reliability/stream-buffer.ts +27 -0
- package/src/providers/base.ts +21 -1
- package/src/request.ts +6 -2
- package/src/types.ts +77 -0
|
@@ -1,10 +1,22 @@
|
|
|
1
1
|
/** AuditRecord — the immutable, per-request audit entry. */
|
|
2
2
|
import { createHash } from 'node:crypto';
|
|
3
|
-
import { TokenUsage } from '../../types.js';
|
|
3
|
+
import { PromptLineage, TokenUsage } from '../../types.js';
|
|
4
4
|
export const SCHEMA_VERSION = '1.0';
|
|
5
5
|
function sha256(text) {
|
|
6
6
|
return createHash('sha256').update(text, 'utf-8').digest('hex');
|
|
7
7
|
}
|
|
8
|
+
/** Deterministic JSON with keys sorted at every nesting level. */
|
|
9
|
+
function stableStringify(value) {
|
|
10
|
+
if (value === null || typeof value !== 'object')
|
|
11
|
+
return JSON.stringify(value) ?? 'null';
|
|
12
|
+
if (Array.isArray(value))
|
|
13
|
+
return `[${value.map(stableStringify).join(',')}]`;
|
|
14
|
+
const obj = value;
|
|
15
|
+
const parts = Object.keys(obj)
|
|
16
|
+
.sort()
|
|
17
|
+
.map((k) => `${JSON.stringify(k)}:${stableStringify(obj[k])}`);
|
|
18
|
+
return `{${parts.join(',')}}`;
|
|
19
|
+
}
|
|
8
20
|
/**
|
|
9
21
|
* One append-only audit entry. Carries metadata only — never raw content.
|
|
10
22
|
*
|
|
@@ -33,6 +45,7 @@ export class AuditRecord {
|
|
|
33
45
|
cacheType;
|
|
34
46
|
guardrailOutcome;
|
|
35
47
|
riskScore;
|
|
48
|
+
lineage;
|
|
36
49
|
previousHash;
|
|
37
50
|
schemaVersion;
|
|
38
51
|
constructor(init) {
|
|
@@ -56,6 +69,7 @@ export class AuditRecord {
|
|
|
56
69
|
this.cacheType = init.cacheType ?? null;
|
|
57
70
|
this.guardrailOutcome = init.guardrailOutcome ?? null;
|
|
58
71
|
this.riskScore = init.riskScore ?? null;
|
|
72
|
+
this.lineage = init.lineage ?? null;
|
|
59
73
|
this.previousHash = init.previousHash ?? '';
|
|
60
74
|
this.schemaVersion = init.schemaVersion ?? SCHEMA_VERSION;
|
|
61
75
|
}
|
|
@@ -87,14 +101,14 @@ export class AuditRecord {
|
|
|
87
101
|
cacheType: this.cacheType,
|
|
88
102
|
guardrailOutcome: this.guardrailOutcome,
|
|
89
103
|
riskScore: this.riskScore,
|
|
104
|
+
lineage: this.lineage ? this.lineage.toJSON() : null,
|
|
90
105
|
previousHash: this.previousHash,
|
|
91
106
|
schemaVersion: this.schemaVersion,
|
|
92
107
|
};
|
|
93
108
|
}
|
|
94
|
-
/** Stable JSON with sorted keys — used for the v0.2.0 hash chain. */
|
|
109
|
+
/** Stable JSON with recursively sorted keys — used for the v0.2.0 hash chain. */
|
|
95
110
|
toCanonicalJson() {
|
|
96
|
-
|
|
97
|
-
return JSON.stringify(data, Object.keys(data).sort());
|
|
111
|
+
return stableStringify(this.toJSON());
|
|
98
112
|
}
|
|
99
113
|
/** Hash of this record's content — used to build the v0.2.0 chain. */
|
|
100
114
|
contentHash() {
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
/** Prometheus metrics (F-OBS-08). */
|
|
2
|
+
export { PrometheusMetrics } from './registry.js';
|
|
3
|
+
export type { RecordSample } from './registry.js';
|
|
4
|
+
export { metricsInterceptor, METRICS_NAME } from './interceptor.js';
|
|
5
|
+
export type { MetricsInterceptor } from './interceptor.js';
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/** metricsInterceptor (F-OBS-08) — records Prometheus metrics per request. */
|
|
2
|
+
import type { Interceptor } from '../base.js';
|
|
3
|
+
import { PrometheusMetrics } from './registry.js';
|
|
4
|
+
export declare const METRICS_NAME = "metrics";
|
|
5
|
+
/** An interceptor that also exposes the registry it records into. */
|
|
6
|
+
export interface MetricsInterceptor extends Interceptor {
|
|
7
|
+
readonly metrics: PrometheusMetrics;
|
|
8
|
+
}
|
|
9
|
+
/**
|
|
10
|
+
* Build a metrics interceptor. Pass a shared {@link PrometheusMetrics} registry
|
|
11
|
+
* (or let it create one) and scrape it via `.metrics.render()`:
|
|
12
|
+
*
|
|
13
|
+
* ```ts
|
|
14
|
+
* const m = metricsInterceptor()
|
|
15
|
+
* const gw = new Gateway({ devMode: true }).use(m)
|
|
16
|
+
* // ...
|
|
17
|
+
* console.log(m.metrics.render())
|
|
18
|
+
* ```
|
|
19
|
+
*
|
|
20
|
+
* Observation-only, so it always runs (including in dry-run).
|
|
21
|
+
*/
|
|
22
|
+
export declare function metricsInterceptor(metrics?: PrometheusMetrics): MetricsInterceptor;
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/** metricsInterceptor (F-OBS-08) — records Prometheus metrics per request. */
|
|
2
|
+
import { PrometheusMetrics } from './registry.js';
|
|
3
|
+
export const METRICS_NAME = 'metrics';
|
|
4
|
+
/**
|
|
5
|
+
* Build a metrics interceptor. Pass a shared {@link PrometheusMetrics} registry
|
|
6
|
+
* (or let it create one) and scrape it via `.metrics.render()`:
|
|
7
|
+
*
|
|
8
|
+
* ```ts
|
|
9
|
+
* const m = metricsInterceptor()
|
|
10
|
+
* const gw = new Gateway({ devMode: true }).use(m)
|
|
11
|
+
* // ...
|
|
12
|
+
* console.log(m.metrics.render())
|
|
13
|
+
* ```
|
|
14
|
+
*
|
|
15
|
+
* Observation-only, so it always runs (including in dry-run).
|
|
16
|
+
*/
|
|
17
|
+
export function metricsInterceptor(metrics = new PrometheusMetrics()) {
|
|
18
|
+
return {
|
|
19
|
+
name: METRICS_NAME,
|
|
20
|
+
dryRunSafe: true,
|
|
21
|
+
metrics,
|
|
22
|
+
async after(response, _ctx) {
|
|
23
|
+
metrics.record(response.provider, response.model, {
|
|
24
|
+
promptTokens: response.usage.promptTokens,
|
|
25
|
+
completionTokens: response.usage.completionTokens,
|
|
26
|
+
costUsd: response.costUsd,
|
|
27
|
+
latencyMs: response.latencyMs,
|
|
28
|
+
cacheHit: response.cacheHit,
|
|
29
|
+
});
|
|
30
|
+
return response;
|
|
31
|
+
},
|
|
32
|
+
};
|
|
33
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Prometheus metrics registry (F-OBS-08) — zero-dependency exposition.
|
|
3
|
+
*
|
|
4
|
+
* Holds counters and a latency histogram keyed by `provider|model` and renders
|
|
5
|
+
* them in the Prometheus text exposition format. No client library — the format
|
|
6
|
+
* is hand-rolled so the core stays dependency-free.
|
|
7
|
+
*/
|
|
8
|
+
export interface RecordSample {
|
|
9
|
+
promptTokens?: number;
|
|
10
|
+
completionTokens?: number;
|
|
11
|
+
costUsd?: number;
|
|
12
|
+
latencyMs?: number;
|
|
13
|
+
cacheHit?: boolean;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Thread-free, in-process metrics rendered as Prometheus exposition text. All
|
|
17
|
+
* series are labelled by `provider` and `model`.
|
|
18
|
+
*/
|
|
19
|
+
export declare class PrometheusMetrics {
|
|
20
|
+
private readonly ns;
|
|
21
|
+
private readonly requests;
|
|
22
|
+
private readonly tokens;
|
|
23
|
+
private readonly cost;
|
|
24
|
+
private readonly cacheHits;
|
|
25
|
+
private readonly latency;
|
|
26
|
+
private readonly keyLabels;
|
|
27
|
+
constructor(namespace?: string);
|
|
28
|
+
record(provider: string, model: string, sample?: RecordSample): void;
|
|
29
|
+
/** Return the Prometheus text exposition of all metrics. */
|
|
30
|
+
render(): string;
|
|
31
|
+
}
|
|
Binary file
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RiskScorer (F-QUA-06) — a composite risk score from per-request signals.
|
|
3
|
+
*
|
|
4
|
+
* Folds the signals other interceptors leave on the {@link InterceptorContext}
|
|
5
|
+
* — PII entities found, guardrail outcome, and the prompt-injection risk — into
|
|
6
|
+
* a single score in `[0, 1]` written to `ctx.riskScore` (and thus the audit
|
|
7
|
+
* record). Register it *inside* the audit interceptor so audit sees the composite.
|
|
8
|
+
*/
|
|
9
|
+
import type { InterceptorContext } from '../../context.js';
|
|
10
|
+
import type { GavioResponse } from '../../response.js';
|
|
11
|
+
import type { Interceptor } from '../base.js';
|
|
12
|
+
export interface RiskWeights {
|
|
13
|
+
pii?: number;
|
|
14
|
+
guardrail?: number;
|
|
15
|
+
injection?: number;
|
|
16
|
+
/** PII entity count at which the PII signal saturates to 1.0 (<= 0 → any PII = 1.0). */
|
|
17
|
+
piiSaturation?: number;
|
|
18
|
+
}
|
|
19
|
+
export declare class RiskScorer implements Interceptor {
|
|
20
|
+
readonly name = "risk_scorer";
|
|
21
|
+
readonly dryRunSafe = true;
|
|
22
|
+
private readonly pii;
|
|
23
|
+
private readonly guardrail;
|
|
24
|
+
private readonly injection;
|
|
25
|
+
private readonly piiSaturation;
|
|
26
|
+
constructor(weights?: RiskWeights);
|
|
27
|
+
/** Compute the composite risk score from the three raw signals. */
|
|
28
|
+
score(piiCount: number, guardrailOutcome: string | null, injectionScore: number | null): number;
|
|
29
|
+
after(response: GavioResponse, ctx: InterceptorContext): Promise<GavioResponse>;
|
|
30
|
+
}
|
|
31
|
+
/** Build a risk scorer. */
|
|
32
|
+
export declare function riskScorer(weights?: RiskWeights): RiskScorer;
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RiskScorer (F-QUA-06) — a composite risk score from per-request signals.
|
|
3
|
+
*
|
|
4
|
+
* Folds the signals other interceptors leave on the {@link InterceptorContext}
|
|
5
|
+
* — PII entities found, guardrail outcome, and the prompt-injection risk — into
|
|
6
|
+
* a single score in `[0, 1]` written to `ctx.riskScore` (and thus the audit
|
|
7
|
+
* record). Register it *inside* the audit interceptor so audit sees the composite.
|
|
8
|
+
*/
|
|
9
|
+
// Guardrail outcome → its contribution before weighting.
|
|
10
|
+
const GUARDRAIL_SIGNAL = { FAIL: 1.0, HITL: 0.6 };
|
|
11
|
+
export class RiskScorer {
|
|
12
|
+
name = 'risk_scorer';
|
|
13
|
+
dryRunSafe = true;
|
|
14
|
+
pii;
|
|
15
|
+
guardrail;
|
|
16
|
+
injection;
|
|
17
|
+
piiSaturation;
|
|
18
|
+
constructor(weights = {}) {
|
|
19
|
+
this.pii = weights.pii ?? 0.3;
|
|
20
|
+
this.guardrail = weights.guardrail ?? 0.4;
|
|
21
|
+
this.injection = weights.injection ?? 0.3;
|
|
22
|
+
this.piiSaturation = weights.piiSaturation ?? 4;
|
|
23
|
+
}
|
|
24
|
+
/** Compute the composite risk score from the three raw signals. */
|
|
25
|
+
score(piiCount, guardrailOutcome, injectionScore) {
|
|
26
|
+
let piiSignal = 0;
|
|
27
|
+
if (piiCount > 0) {
|
|
28
|
+
piiSignal = this.piiSaturation <= 0 ? 1 : Math.min(1, piiCount / this.piiSaturation);
|
|
29
|
+
}
|
|
30
|
+
const guardrailSignal = GUARDRAIL_SIGNAL[guardrailOutcome ?? ''] ?? 0;
|
|
31
|
+
const injectionSignal = injectionScore ?? 0;
|
|
32
|
+
const composite = this.pii * piiSignal + this.guardrail * guardrailSignal + this.injection * injectionSignal;
|
|
33
|
+
return Math.max(0, Math.min(1, composite));
|
|
34
|
+
}
|
|
35
|
+
async after(response, ctx) {
|
|
36
|
+
const piiCount = Object.values(ctx.piiEntityCounts).reduce((a, b) => a + b, 0);
|
|
37
|
+
ctx.riskScore = this.score(piiCount, ctx.guardrailOutcome, ctx.riskScore);
|
|
38
|
+
return response;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
/** Build a risk scorer. */
|
|
42
|
+
export function riskScorer(weights = {}) {
|
|
43
|
+
return new RiskScorer(weights);
|
|
44
|
+
}
|
|
@@ -9,3 +9,4 @@ export { circuitBreaker, CircuitState } from './circuit-breaker.js';
|
|
|
9
9
|
export type { CircuitBreakerOptions } from './circuit-breaker.js';
|
|
10
10
|
export { loadBalancer } from './load-balancer.js';
|
|
11
11
|
export type { LoadBalancerOptions } from './load-balancer.js';
|
|
12
|
+
export { StreamBuffer } from './stream-buffer.js';
|
|
@@ -4,3 +4,4 @@ export { timeoutPolicy, timeout } from './timeout.js';
|
|
|
4
4
|
export { fallbackChain } from './fallback.js';
|
|
5
5
|
export { circuitBreaker, CircuitState } from './circuit-breaker.js';
|
|
6
6
|
export { loadBalancer } from './load-balancer.js';
|
|
7
|
+
export { StreamBuffer } from './stream-buffer.js';
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* StreamBuffer (F-REL-06) — accumulate a provider stream for post-interceptors.
|
|
3
|
+
*
|
|
4
|
+
* Post-interceptors (guardrails, PII restore, audit) need the *complete*
|
|
5
|
+
* response, so a streamed reply is buffered in full before the post pipeline
|
|
6
|
+
* runs and before any chunk reaches the caller. This trades first-token latency
|
|
7
|
+
* for the guarantee that every interceptor sees — and can rewrite or block — the
|
|
8
|
+
* whole response.
|
|
9
|
+
*/
|
|
10
|
+
export declare class StreamBuffer {
|
|
11
|
+
private readonly parts;
|
|
12
|
+
/** Add one streamed chunk. */
|
|
13
|
+
append(chunk: string): void;
|
|
14
|
+
/** The full buffered response so far. */
|
|
15
|
+
text(): string;
|
|
16
|
+
/** Total buffered length in characters. */
|
|
17
|
+
get length(): number;
|
|
18
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* StreamBuffer (F-REL-06) — accumulate a provider stream for post-interceptors.
|
|
3
|
+
*
|
|
4
|
+
* Post-interceptors (guardrails, PII restore, audit) need the *complete*
|
|
5
|
+
* response, so a streamed reply is buffered in full before the post pipeline
|
|
6
|
+
* runs and before any chunk reaches the caller. This trades first-token latency
|
|
7
|
+
* for the guarantee that every interceptor sees — and can rewrite or block — the
|
|
8
|
+
* whole response.
|
|
9
|
+
*/
|
|
10
|
+
export class StreamBuffer {
|
|
11
|
+
parts = [];
|
|
12
|
+
/** Add one streamed chunk. */
|
|
13
|
+
append(chunk) {
|
|
14
|
+
this.parts.push(chunk);
|
|
15
|
+
}
|
|
16
|
+
/** The full buffered response so far. */
|
|
17
|
+
text() {
|
|
18
|
+
return this.parts.join('');
|
|
19
|
+
}
|
|
20
|
+
/** Total buffered length in characters. */
|
|
21
|
+
get length() {
|
|
22
|
+
return this.parts.reduce((n, p) => n + p.length, 0);
|
|
23
|
+
}
|
|
24
|
+
}
|
|
@@ -8,6 +8,8 @@ export interface ProviderAdapter {
|
|
|
8
8
|
readonly providerName: string;
|
|
9
9
|
complete(request: GavioRequest): Promise<GavioResponse>;
|
|
10
10
|
stream?(request: GavioRequest): AsyncIterable<string>;
|
|
11
|
+
/** Build a response from a fully buffered stream (F-REL-06). */
|
|
12
|
+
buildStreamResponse?(request: GavioRequest, content: string, startedAt: number): GavioResponse;
|
|
11
13
|
healthCheck(): Promise<boolean>;
|
|
12
14
|
readonly reportedModelVersion?: string | null;
|
|
13
15
|
}
|
|
@@ -19,5 +21,10 @@ export declare abstract class BaseProviderAdapter implements ProviderAdapter {
|
|
|
19
21
|
abstract complete(request: GavioRequest): Promise<GavioResponse>;
|
|
20
22
|
abstract healthCheck(): Promise<boolean>;
|
|
21
23
|
get reportedModelVersion(): string | null;
|
|
24
|
+
/**
|
|
25
|
+
* Build a response from a fully buffered stream (F-REL-06). Streamed chunks
|
|
26
|
+
* carry text only, so token usage is estimated from prompt + content.
|
|
27
|
+
*/
|
|
28
|
+
buildStreamResponse(request: GavioRequest, content: string, startedAt: number): GavioResponse;
|
|
22
29
|
protected buildResponse(request: GavioRequest, content: string, usage: TokenUsage, modelVersion: string, startedAt: number): GavioResponse;
|
|
23
30
|
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/** ProviderAdapter interface and shared response-building helpers. */
|
|
2
|
-
import { PricingProvider } from '../pricing.js';
|
|
2
|
+
import { PricingProvider, estimateTokens } from '../pricing.js';
|
|
3
3
|
import { GavioRequest } from '../request.js';
|
|
4
4
|
import { GavioResponse } from '../response.js';
|
|
5
5
|
import { TokenUsage } from '../types.js';
|
|
@@ -12,6 +12,14 @@ export class BaseProviderAdapter {
|
|
|
12
12
|
get reportedModelVersion() {
|
|
13
13
|
return null;
|
|
14
14
|
}
|
|
15
|
+
/**
|
|
16
|
+
* Build a response from a fully buffered stream (F-REL-06). Streamed chunks
|
|
17
|
+
* carry text only, so token usage is estimated from prompt + content.
|
|
18
|
+
*/
|
|
19
|
+
buildStreamResponse(request, content, startedAt) {
|
|
20
|
+
const usage = new TokenUsage(estimateTokens(request.promptText()), estimateTokens(content));
|
|
21
|
+
return this.buildResponse(request, content, usage, this.reportedModelVersion ?? request.model, startedAt);
|
|
22
|
+
}
|
|
15
23
|
buildResponse(request, content, usage, modelVersion, startedAt) {
|
|
16
24
|
const latencyMs = Math.floor(performance.now() - startedAt);
|
|
17
25
|
return new GavioResponse({
|
package/dist/esm/request.d.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
/** GavioRequest — the canonical, provider-agnostic request model. */
|
|
2
|
-
import
|
|
2
|
+
import { PromptLineage } from './types.js';
|
|
3
|
+
import type { Message, PromptLineageInit, Provider } from './types.js';
|
|
3
4
|
export interface GavioRequestInit {
|
|
4
5
|
messages: Message[];
|
|
5
6
|
model: string;
|
|
@@ -10,6 +11,7 @@ export interface GavioRequestInit {
|
|
|
10
11
|
sessionId?: string | null;
|
|
11
12
|
options?: Record<string, unknown>;
|
|
12
13
|
metadata?: Record<string, unknown>;
|
|
14
|
+
lineage?: PromptLineage | PromptLineageInit | null;
|
|
13
15
|
}
|
|
14
16
|
/**
|
|
15
17
|
* A single gateway call. A `traceId` (UUID v7, time-sortable) is assigned
|
|
@@ -26,6 +28,7 @@ export declare class GavioRequest {
|
|
|
26
28
|
sessionId: string | null;
|
|
27
29
|
options: Record<string, unknown>;
|
|
28
30
|
metadata: Record<string, unknown>;
|
|
31
|
+
lineage: PromptLineage | null;
|
|
29
32
|
constructor(init: GavioRequestInit);
|
|
30
33
|
get temperature(): number;
|
|
31
34
|
get maxTokens(): number;
|
package/dist/esm/request.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/** GavioRequest — the canonical, provider-agnostic request model. */
|
|
2
2
|
import { newTraceId } from './ids.js';
|
|
3
|
-
import { coerceProvider } from './types.js';
|
|
3
|
+
import { coerceProvider, PromptLineage } from './types.js';
|
|
4
4
|
/**
|
|
5
5
|
* A single gateway call. A `traceId` (UUID v7, time-sortable) is assigned
|
|
6
6
|
* automatically if not supplied. `parentTraceId` links calls into a
|
|
@@ -16,6 +16,7 @@ export class GavioRequest {
|
|
|
16
16
|
sessionId;
|
|
17
17
|
options;
|
|
18
18
|
metadata;
|
|
19
|
+
lineage;
|
|
19
20
|
constructor(init) {
|
|
20
21
|
this.messages = init.messages;
|
|
21
22
|
this.model = init.model;
|
|
@@ -26,6 +27,7 @@ export class GavioRequest {
|
|
|
26
27
|
this.sessionId = init.sessionId ?? null;
|
|
27
28
|
this.options = init.options ?? {};
|
|
28
29
|
this.metadata = init.metadata ?? {};
|
|
30
|
+
this.lineage = init.lineage != null ? PromptLineage.from(init.lineage) : null;
|
|
29
31
|
}
|
|
30
32
|
get temperature() {
|
|
31
33
|
const t = this.options['temperature'];
|
|
@@ -51,6 +53,7 @@ export class GavioRequest {
|
|
|
51
53
|
sessionId: this.sessionId,
|
|
52
54
|
options: { ...this.options },
|
|
53
55
|
metadata: { ...this.metadata },
|
|
56
|
+
lineage: this.lineage,
|
|
54
57
|
});
|
|
55
58
|
}
|
|
56
59
|
}
|
package/dist/esm/types.d.ts
CHANGED
|
@@ -56,3 +56,57 @@ export declare class TokenUsage {
|
|
|
56
56
|
totalTokens: number;
|
|
57
57
|
};
|
|
58
58
|
}
|
|
59
|
+
export interface RagChunkInit {
|
|
60
|
+
source: string;
|
|
61
|
+
chunkId?: string | null;
|
|
62
|
+
score?: number | null;
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* A single retrieved source that contributed to a prompt. Carries a *reference*
|
|
66
|
+
* to the source — never the retrieved text — so prompt lineage stays within the
|
|
67
|
+
* audit record's metadata-only contract.
|
|
68
|
+
*/
|
|
69
|
+
export declare class RagChunk {
|
|
70
|
+
readonly source: string;
|
|
71
|
+
readonly chunkId: string | null;
|
|
72
|
+
readonly score: number | null;
|
|
73
|
+
constructor(init: RagChunkInit);
|
|
74
|
+
toJSON(): {
|
|
75
|
+
source: string;
|
|
76
|
+
chunkId: string | null;
|
|
77
|
+
score: number | null;
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
export interface PromptLineageInit {
|
|
81
|
+
templateId?: string | null;
|
|
82
|
+
templateVersion?: string | null;
|
|
83
|
+
variables?: Record<string, unknown>;
|
|
84
|
+
ragChunks?: Array<RagChunk | RagChunkInit>;
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Provenance for a rendered prompt (F-OBS-04): the template, the variable
|
|
88
|
+
* bindings interpolated into it, and the RAG chunk sources retrieved for it.
|
|
89
|
+
*
|
|
90
|
+
* Attached to a GavioRequest by the caller and copied into the AuditRecord so
|
|
91
|
+
* any prompt can be reconstructed and debugged. RAG chunk text is never stored
|
|
92
|
+
* — only source references (see {@link RagChunk}).
|
|
93
|
+
*/
|
|
94
|
+
export declare class PromptLineage {
|
|
95
|
+
readonly templateId: string | null;
|
|
96
|
+
readonly templateVersion: string | null;
|
|
97
|
+
readonly variables: Record<string, unknown>;
|
|
98
|
+
readonly ragChunks: RagChunk[];
|
|
99
|
+
constructor(init?: PromptLineageInit);
|
|
100
|
+
/** Coerce a PromptLineage instance or plain init object into a PromptLineage. */
|
|
101
|
+
static from(value: PromptLineage | PromptLineageInit): PromptLineage;
|
|
102
|
+
toJSON(): {
|
|
103
|
+
templateId: string | null;
|
|
104
|
+
templateVersion: string | null;
|
|
105
|
+
variables: Record<string, unknown>;
|
|
106
|
+
ragChunks: Array<{
|
|
107
|
+
source: string;
|
|
108
|
+
chunkId: string | null;
|
|
109
|
+
score: number | null;
|
|
110
|
+
}>;
|
|
111
|
+
};
|
|
112
|
+
}
|
package/dist/esm/types.js
CHANGED
|
@@ -54,3 +54,53 @@ export class TokenUsage {
|
|
|
54
54
|
};
|
|
55
55
|
}
|
|
56
56
|
}
|
|
57
|
+
/**
|
|
58
|
+
* A single retrieved source that contributed to a prompt. Carries a *reference*
|
|
59
|
+
* to the source — never the retrieved text — so prompt lineage stays within the
|
|
60
|
+
* audit record's metadata-only contract.
|
|
61
|
+
*/
|
|
62
|
+
export class RagChunk {
|
|
63
|
+
source;
|
|
64
|
+
chunkId;
|
|
65
|
+
score;
|
|
66
|
+
constructor(init) {
|
|
67
|
+
this.source = init.source;
|
|
68
|
+
this.chunkId = init.chunkId ?? null;
|
|
69
|
+
this.score = init.score ?? null;
|
|
70
|
+
}
|
|
71
|
+
toJSON() {
|
|
72
|
+
return { source: this.source, chunkId: this.chunkId, score: this.score };
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Provenance for a rendered prompt (F-OBS-04): the template, the variable
|
|
77
|
+
* bindings interpolated into it, and the RAG chunk sources retrieved for it.
|
|
78
|
+
*
|
|
79
|
+
* Attached to a GavioRequest by the caller and copied into the AuditRecord so
|
|
80
|
+
* any prompt can be reconstructed and debugged. RAG chunk text is never stored
|
|
81
|
+
* — only source references (see {@link RagChunk}).
|
|
82
|
+
*/
|
|
83
|
+
export class PromptLineage {
|
|
84
|
+
templateId;
|
|
85
|
+
templateVersion;
|
|
86
|
+
variables;
|
|
87
|
+
ragChunks;
|
|
88
|
+
constructor(init = {}) {
|
|
89
|
+
this.templateId = init.templateId ?? null;
|
|
90
|
+
this.templateVersion = init.templateVersion ?? null;
|
|
91
|
+
this.variables = init.variables ?? {};
|
|
92
|
+
this.ragChunks = (init.ragChunks ?? []).map((c) => c instanceof RagChunk ? c : new RagChunk(c));
|
|
93
|
+
}
|
|
94
|
+
/** Coerce a PromptLineage instance or plain init object into a PromptLineage. */
|
|
95
|
+
static from(value) {
|
|
96
|
+
return value instanceof PromptLineage ? value : new PromptLineage(value);
|
|
97
|
+
}
|
|
98
|
+
toJSON() {
|
|
99
|
+
return {
|
|
100
|
+
templateId: this.templateId,
|
|
101
|
+
templateVersion: this.templateVersion,
|
|
102
|
+
variables: this.variables,
|
|
103
|
+
ragChunks: this.ragChunks.map((c) => c.toJSON()),
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "gavio",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.0",
|
|
4
4
|
"description": "The open standard AI gateway for production systems — PII guarding, audit, reliability, and cost tracking.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"repository": {
|
|
@@ -80,6 +80,16 @@
|
|
|
80
80
|
"import": "./dist/esm/interceptors/injection.js",
|
|
81
81
|
"require": "./dist/cjs/interceptors/injection.js"
|
|
82
82
|
},
|
|
83
|
+
"./interceptors/metrics": {
|
|
84
|
+
"types": "./dist/esm/interceptors/metrics/index.d.ts",
|
|
85
|
+
"import": "./dist/esm/interceptors/metrics/index.js",
|
|
86
|
+
"require": "./dist/cjs/interceptors/metrics/index.js"
|
|
87
|
+
},
|
|
88
|
+
"./interceptors/quality": {
|
|
89
|
+
"types": "./dist/esm/interceptors/quality/index.d.ts",
|
|
90
|
+
"import": "./dist/esm/interceptors/quality/index.js",
|
|
91
|
+
"require": "./dist/cjs/interceptors/quality/index.js"
|
|
92
|
+
},
|
|
83
93
|
"./shim/openai": {
|
|
84
94
|
"types": "./dist/esm/shim/openai.d.ts",
|
|
85
95
|
"import": "./dist/esm/shim/openai.js",
|
package/src/gateway.ts
CHANGED
|
@@ -6,6 +6,7 @@ import { auditInterceptor, isAuditInterceptor } from './interceptors/audit/index
|
|
|
6
6
|
import { isExecutorPolicy } from './interceptors/base.js'
|
|
7
7
|
import type { Executor, ExecutorPolicy, Interceptor } from './interceptors/base.js'
|
|
8
8
|
import { InterceptorChain } from './interceptors/chain.js'
|
|
9
|
+
import { StreamBuffer } from './interceptors/reliability/stream-buffer.js'
|
|
9
10
|
import { PricingProvider } from './pricing.js'
|
|
10
11
|
import { buildAdapter } from './providers/index.js'
|
|
11
12
|
import type { ProviderAdapter } from './providers/base.js'
|
|
@@ -13,7 +14,7 @@ import { mockProvider } from './providers/mock.js'
|
|
|
13
14
|
import { GavioRequest } from './request.js'
|
|
14
15
|
import type { GavioResponse } from './response.js'
|
|
15
16
|
import { Provider, coerceProvider } from './types.js'
|
|
16
|
-
import type { Message } from './types.js'
|
|
17
|
+
import type { Message, PromptLineage, PromptLineageInit } from './types.js'
|
|
17
18
|
|
|
18
19
|
export interface GatewayOptions {
|
|
19
20
|
provider?: Provider | string
|
|
@@ -33,6 +34,8 @@ export interface CompleteOptions {
|
|
|
33
34
|
metadata?: Record<string, unknown>
|
|
34
35
|
/** Provider sampling options (temperature, maxTokens, etc.). */
|
|
35
36
|
options?: Record<string, unknown>
|
|
37
|
+
/** Prompt provenance (F-OBS-04): template, variables, and RAG chunk sources. */
|
|
38
|
+
lineage?: PromptLineage | PromptLineageInit | null
|
|
36
39
|
}
|
|
37
40
|
|
|
38
41
|
const DEFAULT_MODELS: Record<string, string> = {
|
|
@@ -109,6 +112,7 @@ export class Gateway {
|
|
|
109
112
|
sessionId: opts.sessionId ?? null,
|
|
110
113
|
options: opts.options ?? {},
|
|
111
114
|
metadata: opts.metadata ?? {},
|
|
115
|
+
lineage: opts.lineage ?? null,
|
|
112
116
|
})
|
|
113
117
|
const ctx = new InterceptorContext({
|
|
114
118
|
traceId: request.traceId,
|
|
@@ -122,6 +126,53 @@ export class Gateway {
|
|
|
122
126
|
return chain.execute(request, ctx, executor)
|
|
123
127
|
}
|
|
124
128
|
|
|
129
|
+
/**
|
|
130
|
+
* Stream a completion, buffering the provider stream (F-REL-06).
|
|
131
|
+
*
|
|
132
|
+
* The provider stream is buffered in full so the post-interceptor pipeline
|
|
133
|
+
* (guardrails, PII restore, audit) runs on the complete response before any
|
|
134
|
+
* chunk reaches the caller. Pre/post interceptors run via the chain; executor
|
|
135
|
+
* policies (retry, circuit breaker, cache) are not applied to the streaming
|
|
136
|
+
* path.
|
|
137
|
+
*/
|
|
138
|
+
async *stream(opts: CompleteOptions): AsyncGenerator<string> {
|
|
139
|
+
const adapter = this.resolveAdapter()
|
|
140
|
+
if (adapter.stream === undefined || adapter.buildStreamResponse === undefined) {
|
|
141
|
+
throw new ConfigurationError(`${adapter.providerName} does not support streaming`)
|
|
142
|
+
}
|
|
143
|
+
const model = opts.model ?? this.modelHint ?? this.resolveModel(adapter)
|
|
144
|
+
|
|
145
|
+
const request = new GavioRequest({
|
|
146
|
+
messages: opts.messages,
|
|
147
|
+
model,
|
|
148
|
+
provider: coerceProvider(adapter.providerName),
|
|
149
|
+
agentId: opts.agentId ?? null,
|
|
150
|
+
parentTraceId: opts.parentTraceId ?? null,
|
|
151
|
+
sessionId: opts.sessionId ?? null,
|
|
152
|
+
options: opts.options ?? {},
|
|
153
|
+
metadata: opts.metadata ?? {},
|
|
154
|
+
})
|
|
155
|
+
const ctx = new InterceptorContext({
|
|
156
|
+
traceId: request.traceId,
|
|
157
|
+
agentId: request.agentId,
|
|
158
|
+
parentTraceId: request.parentTraceId,
|
|
159
|
+
sessionId: request.sessionId,
|
|
160
|
+
dryRun: this.dryRunMode,
|
|
161
|
+
})
|
|
162
|
+
|
|
163
|
+
const startedAt = performance.now()
|
|
164
|
+
const buffer = new StreamBuffer()
|
|
165
|
+
const { chain } = this.buildPipeline(adapter, ctx)
|
|
166
|
+
const bufferingExecutor: Executor = async (req) => {
|
|
167
|
+
for await (const chunk of adapter.stream!(req)) buffer.append(chunk)
|
|
168
|
+
return adapter.buildStreamResponse!(req, buffer.text(), startedAt)
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
const response = await chain.execute(request, ctx, bufferingExecutor)
|
|
172
|
+
// Post-interceptors have run on the fully buffered response; emit it now.
|
|
173
|
+
yield response.content
|
|
174
|
+
}
|
|
175
|
+
|
|
125
176
|
async healthCheck(): Promise<boolean> {
|
|
126
177
|
return this.resolveAdapter().healthCheck()
|
|
127
178
|
}
|
package/src/index.ts
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
* See https://gavio.io for documentation. MIT licensed.
|
|
9
9
|
*/
|
|
10
10
|
|
|
11
|
-
export const VERSION = '0.
|
|
11
|
+
export const VERSION = '0.3.0'
|
|
12
12
|
|
|
13
13
|
export { Gateway } from './gateway.js'
|
|
14
14
|
export type { GatewayOptions, CompleteOptions } from './gateway.js'
|
|
@@ -30,9 +30,11 @@ export {
|
|
|
30
30
|
Sensitivity,
|
|
31
31
|
GuardrailOutcome,
|
|
32
32
|
TokenUsage,
|
|
33
|
+
PromptLineage,
|
|
34
|
+
RagChunk,
|
|
33
35
|
coerceProvider,
|
|
34
36
|
} from './types.js'
|
|
35
|
-
export type { Message } from './types.js'
|
|
37
|
+
export type { Message, PromptLineageInit, RagChunkInit } from './types.js'
|
|
36
38
|
|
|
37
39
|
export type { Interceptor, Executor, ExecutorPolicy } from './interceptors/base.js'
|
|
38
40
|
export { InterceptorChain } from './interceptors/chain.js'
|