gavio 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/dist/cjs/config.js +106 -0
  2. package/dist/cjs/errors.js +29 -1
  3. package/dist/cjs/gateway.js +42 -0
  4. package/dist/cjs/interceptors/audit/index.js +4 -1
  5. package/dist/cjs/interceptors/audit/interceptor.js +7 -0
  6. package/dist/cjs/interceptors/audit/trace.js +43 -0
  7. package/dist/cjs/interceptors/cache/embedding.js +53 -0
  8. package/dist/cjs/interceptors/cache/index.js +9 -5
  9. package/dist/cjs/interceptors/cache/interceptor.js +80 -0
  10. package/dist/cjs/interceptors/cache/vector.js +35 -0
  11. package/dist/cjs/interceptors/governance/budget.js +45 -0
  12. package/dist/cjs/interceptors/governance/index.js +10 -0
  13. package/dist/cjs/interceptors/governance/model-policy.js +18 -0
  14. package/dist/cjs/interceptors/governance/rate-limit.js +46 -0
  15. package/dist/cjs/interceptors/guardrails/index.js +11 -0
  16. package/dist/cjs/interceptors/guardrails/interceptor.js +40 -0
  17. package/dist/cjs/interceptors/guardrails/validator.js +8 -0
  18. package/dist/cjs/interceptors/guardrails/validators/regex.js +32 -0
  19. package/dist/cjs/interceptors/guardrails/validators/schema.js +63 -0
  20. package/dist/cjs/interceptors/injection.js +62 -0
  21. package/dist/cjs/interceptors/reliability/circuit-breaker.js +82 -0
  22. package/dist/cjs/interceptors/reliability/index.js +6 -1
  23. package/dist/cjs/interceptors/reliability/load-balancer.js +38 -0
  24. package/dist/cjs/pricing.js +5 -1
  25. package/dist/cjs/providers/azure-openai.js +56 -0
  26. package/dist/cjs/providers/gemini.js +73 -0
  27. package/dist/cjs/providers/index.js +22 -6
  28. package/dist/cjs/providers/ollama.js +41 -0
  29. package/dist/cjs/shim/openai.js +57 -0
  30. package/dist/esm/config.d.ts +12 -0
  31. package/dist/esm/config.js +102 -0
  32. package/dist/esm/errors.d.ts +17 -0
  33. package/dist/esm/errors.js +24 -0
  34. package/dist/esm/gateway.d.ts +5 -0
  35. package/dist/esm/gateway.js +9 -0
  36. package/dist/esm/interceptors/audit/index.d.ts +2 -0
  37. package/dist/esm/interceptors/audit/index.js +1 -0
  38. package/dist/esm/interceptors/audit/interceptor.d.ts +2 -0
  39. package/dist/esm/interceptors/audit/interceptor.js +7 -0
  40. package/dist/esm/interceptors/audit/trace.d.ts +19 -0
  41. package/dist/esm/interceptors/audit/trace.js +39 -0
  42. package/dist/esm/interceptors/cache/embedding.d.ts +14 -0
  43. package/dist/esm/interceptors/cache/embedding.js +49 -0
  44. package/dist/esm/interceptors/cache/index.d.ts +7 -4
  45. package/dist/esm/interceptors/cache/index.js +4 -4
  46. package/dist/esm/interceptors/cache/interceptor.d.ts +19 -0
  47. package/dist/esm/interceptors/cache/interceptor.js +77 -0
  48. package/dist/esm/interceptors/cache/vector.d.ts +9 -0
  49. package/dist/esm/interceptors/cache/vector.js +32 -0
  50. package/dist/esm/interceptors/governance/budget.d.ts +11 -0
  51. package/dist/esm/interceptors/governance/budget.js +42 -0
  52. package/dist/esm/interceptors/governance/index.d.ts +7 -0
  53. package/dist/esm/interceptors/governance/index.js +4 -0
  54. package/dist/esm/interceptors/governance/model-policy.d.ts +8 -0
  55. package/dist/esm/interceptors/governance/model-policy.js +15 -0
  56. package/dist/esm/interceptors/governance/rate-limit.d.ts +9 -0
  57. package/dist/esm/interceptors/governance/rate-limit.js +43 -0
  58. package/dist/esm/interceptors/guardrails/index.d.ts +6 -0
  59. package/dist/esm/interceptors/guardrails/index.js +4 -0
  60. package/dist/esm/interceptors/guardrails/interceptor.d.ts +15 -0
  61. package/dist/esm/interceptors/guardrails/interceptor.js +37 -0
  62. package/dist/esm/interceptors/guardrails/validator.d.ts +11 -0
  63. package/dist/esm/interceptors/guardrails/validator.js +3 -0
  64. package/dist/esm/interceptors/guardrails/validators/regex.d.ts +6 -0
  65. package/dist/esm/interceptors/guardrails/validators/regex.js +28 -0
  66. package/dist/esm/interceptors/guardrails/validators/schema.d.ts +5 -0
  67. package/dist/esm/interceptors/guardrails/validators/schema.js +60 -0
  68. package/dist/esm/interceptors/injection.d.ts +17 -0
  69. package/dist/esm/interceptors/injection.js +59 -0
  70. package/dist/esm/interceptors/reliability/circuit-breaker.d.ts +15 -0
  71. package/dist/esm/interceptors/reliability/circuit-breaker.js +78 -0
  72. package/dist/esm/interceptors/reliability/index.d.ts +4 -0
  73. package/dist/esm/interceptors/reliability/index.js +2 -0
  74. package/dist/esm/interceptors/reliability/load-balancer.d.ts +8 -0
  75. package/dist/esm/interceptors/reliability/load-balancer.js +35 -0
  76. package/dist/esm/pricing.js +5 -1
  77. package/dist/esm/providers/azure-openai.d.ts +28 -0
  78. package/dist/esm/providers/azure-openai.js +53 -0
  79. package/dist/esm/providers/gemini.d.ts +36 -0
  80. package/dist/esm/providers/gemini.js +69 -0
  81. package/dist/esm/providers/index.d.ts +7 -1
  82. package/dist/esm/providers/index.js +18 -5
  83. package/dist/esm/providers/ollama.d.ts +21 -0
  84. package/dist/esm/providers/ollama.js +38 -0
  85. package/dist/esm/shim/openai.d.ts +56 -0
  86. package/dist/esm/shim/openai.js +53 -0
  87. package/package.json +31 -2
  88. package/src/config.ts +125 -0
  89. package/src/errors.ts +28 -0
  90. package/src/gateway.ts +10 -0
  91. package/src/interceptors/audit/index.ts +2 -0
  92. package/src/interceptors/audit/interceptor.ts +9 -0
  93. package/src/interceptors/audit/trace.ts +47 -0
  94. package/src/interceptors/cache/embedding.ts +53 -0
  95. package/src/interceptors/cache/index.ts +7 -4
  96. package/src/interceptors/cache/interceptor.ts +111 -0
  97. package/src/interceptors/cache/vector.ts +45 -0
  98. package/src/interceptors/governance/budget.ts +59 -0
  99. package/src/interceptors/governance/index.ts +8 -0
  100. package/src/interceptors/governance/model-policy.ts +25 -0
  101. package/src/interceptors/governance/rate-limit.ts +63 -0
  102. package/src/interceptors/guardrails/index.ts +7 -0
  103. package/src/interceptors/guardrails/interceptor.ts +56 -0
  104. package/src/interceptors/guardrails/validator.ts +14 -0
  105. package/src/interceptors/guardrails/validators/regex.ts +29 -0
  106. package/src/interceptors/guardrails/validators/schema.ts +62 -0
  107. package/src/interceptors/injection.ts +72 -0
  108. package/src/interceptors/reliability/circuit-breaker.ts +102 -0
  109. package/src/interceptors/reliability/index.ts +4 -0
  110. package/src/interceptors/reliability/load-balancer.ts +56 -0
  111. package/src/pricing.ts +5 -1
  112. package/src/providers/azure-openai.ts +77 -0
  113. package/src/providers/gemini.ts +95 -0
  114. package/src/providers/index.ts +21 -5
  115. package/src/providers/ollama.ts +61 -0
  116. package/src/shim/openai.ts +76 -0
@@ -0,0 +1,102 @@
1
+ /**
2
+ * Config loader (F-DX-05) — build a Gateway from an object or a JSON file.
3
+ *
4
+ * const gw = await Gateway.fromConfig('gateway.json')
5
+ *
6
+ * JSON is supported out of the box; string values expand ${ENV_VAR}.
7
+ */
8
+ import { readFileSync } from 'node:fs';
9
+ import { ConfigurationError } from './errors.js';
10
+ import { Gateway } from './gateway.js';
11
+ import { auditInterceptor } from './interceptors/audit/index.js';
12
+ import { hashingEmbedder, semanticCache } from './interceptors/cache/index.js';
13
+ import { costControl, modelPolicy, rateLimiter } from './interceptors/governance/index.js';
14
+ import { promptInjectionGuard } from './interceptors/injection.js';
15
+ import { piiGuard } from './interceptors/pii/index.js';
16
+ import { retryInterceptor, timeoutPolicy } from './interceptors/reliability/index.js';
17
+ export function loadConfig(path) {
18
+ const text = readFileSync(path, 'utf8');
19
+ if (!path.endsWith('.json')) {
20
+ throw new ConfigurationError('JS config loader supports JSON only (use .json)');
21
+ }
22
+ return expand(JSON.parse(text));
23
+ }
24
+ function expand(obj) {
25
+ if (Array.isArray(obj))
26
+ return obj.map(expand);
27
+ if (obj && typeof obj === 'object') {
28
+ return Object.fromEntries(Object.entries(obj).map(([k, v]) => [k, expand(v)]));
29
+ }
30
+ if (typeof obj === 'string') {
31
+ return obj.replace(/\$\{(\w+)\}/g, (_, v) => process.env[v] ?? '');
32
+ }
33
+ return obj;
34
+ }
35
+ export function buildFromConfig(config) {
36
+ const gatewayOptions = {};
37
+ if (config['provider'])
38
+ gatewayOptions['provider'] = config['provider'];
39
+ if (config['model'])
40
+ gatewayOptions['model'] = config['model'];
41
+ if (config['devMode'] ?? config['dev_mode'])
42
+ gatewayOptions['devMode'] = true;
43
+ if (config['dryRun'] ?? config['dry_run'])
44
+ gatewayOptions['dryRun'] = true;
45
+ let gw = new Gateway(gatewayOptions);
46
+ const ic = config['interceptors'] ?? {};
47
+ const cfg = (name) => {
48
+ const entry = ic[name];
49
+ return entry && entry['enabled'] !== false ? entry : null;
50
+ };
51
+ let c;
52
+ if ((c = cfg('audit'))) {
53
+ gw = gw.use(auditInterceptor({
54
+ sink: c['sink'] ?? 'stdout',
55
+ hashChain: Boolean(c['hashChain'] ?? c['hash_chain']),
56
+ }));
57
+ }
58
+ if ((c = cfg('prompt_injection'))) {
59
+ gw = gw.use(promptInjectionGuard({ action: c['action'] ?? 'block' }));
60
+ }
61
+ if ((c = cfg('pii_guard'))) {
62
+ gw = gw.use(piiGuard({
63
+ sensitivity: c['sensitivity'] ?? 'strict',
64
+ mode: c['mode'] ?? 'redact',
65
+ }));
66
+ }
67
+ if ((c = cfg('cost_control'))) {
68
+ gw = gw.use(costControl({
69
+ hardCapUsd: Number(c['hardCapUsd'] ?? c['hard_cap_usd']),
70
+ softCapUsd: (c['softCapUsd'] ?? c['soft_cap_usd']),
71
+ scope: c['scope'] ?? 'global',
72
+ window: c['window'] ?? 'day',
73
+ }));
74
+ }
75
+ if ((c = cfg('rate_limiter'))) {
76
+ gw = gw.use(rateLimiter({
77
+ maxRequestsPerMinute: (c['maxRequestsPerMinute'] ?? c['max_requests_per_minute']),
78
+ maxTokensPerMinute: (c['maxTokensPerMinute'] ?? c['max_tokens_per_minute']),
79
+ scope: c['scope'] ?? 'global',
80
+ }));
81
+ }
82
+ if ((c = cfg('model_policy'))) {
83
+ gw = gw.use(modelPolicy({ roles: c['roles'] ?? {} }));
84
+ }
85
+ if ((c = cfg('semantic_cache'))) {
86
+ const embedder = (c['enableSemantic'] ?? c['enable_semantic']) ? hashingEmbedder() : undefined;
87
+ gw = gw.use(semanticCache({
88
+ embedder,
89
+ similarityThreshold: Number(c['similarityThreshold'] ?? c['similarity_threshold'] ?? 0.95),
90
+ }));
91
+ }
92
+ if ((c = cfg('timeout'))) {
93
+ gw = gw.use(timeoutPolicy({ timeoutSeconds: Number(c['timeoutSeconds'] ?? c['timeout_seconds'] ?? 30) }));
94
+ }
95
+ if ((c = cfg('retry'))) {
96
+ gw = gw.use(retryInterceptor({
97
+ maxAttempts: Number(c['maxAttempts'] ?? c['max_attempts'] ?? 3),
98
+ baseDelayMs: Number(c['baseDelayMs'] ?? c['base_delay_ms'] ?? 500),
99
+ }));
100
+ }
101
+ return gw;
102
+ }
@@ -31,6 +31,23 @@ export declare class PiiBlockedError extends GavioError {
31
31
  /** A hard budget cap was exceeded. Never swallow this — surface to user. */
32
32
  export declare class BudgetExceededError extends GavioError {
33
33
  }
34
+ /** The circuit breaker is open; the call was rejected without hitting the provider. */
35
+ export declare class CircuitOpenError extends ProviderUnavailableError {
36
+ }
37
+ /** A local rate limit (requests/tokens per minute) was exceeded. */
38
+ export declare class RateLimitExceededError extends GavioError {
39
+ }
40
+ /** The caller's role is not permitted to use the requested model (RBAC). */
41
+ export declare class ModelNotAllowedError extends GavioError {
42
+ readonly role: string;
43
+ readonly model: string;
44
+ constructor(role: string, model: string);
45
+ }
34
46
  /** Output failed a guardrail validator with onFailure='error'. */
35
47
  export declare class GuardrailViolationError extends GavioError {
36
48
  }
49
+ /** A prompt-injection attempt was detected and the guard is in block mode. */
50
+ export declare class PromptInjectionError extends GavioError {
51
+ readonly patterns: string[];
52
+ constructor(patterns: string[]);
53
+ }
@@ -39,6 +39,30 @@ export class PiiBlockedError extends GavioError {
39
39
  /** A hard budget cap was exceeded. Never swallow this — surface to user. */
40
40
  export class BudgetExceededError extends GavioError {
41
41
  }
42
+ /** The circuit breaker is open; the call was rejected without hitting the provider. */
43
+ export class CircuitOpenError extends ProviderUnavailableError {
44
+ }
45
+ /** A local rate limit (requests/tokens per minute) was exceeded. */
46
+ export class RateLimitExceededError extends GavioError {
47
+ }
48
+ /** The caller's role is not permitted to use the requested model (RBAC). */
49
+ export class ModelNotAllowedError extends GavioError {
50
+ role;
51
+ model;
52
+ constructor(role, model) {
53
+ super(`role ${JSON.stringify(role)} may not use model ${JSON.stringify(model)}`);
54
+ this.role = role;
55
+ this.model = model;
56
+ }
57
+ }
42
58
  /** Output failed a guardrail validator with onFailure='error'. */
43
59
  export class GuardrailViolationError extends GavioError {
44
60
  }
61
+ /** A prompt-injection attempt was detected and the guard is in block mode. */
62
+ export class PromptInjectionError extends GavioError {
63
+ patterns;
64
+ constructor(patterns) {
65
+ super(`prompt injection detected: ${patterns.join(', ')}`);
66
+ this.patterns = patterns;
67
+ }
68
+ }
@@ -39,6 +39,11 @@ export declare class Gateway {
39
39
  private readonly pricing;
40
40
  private readonly interceptors;
41
41
  constructor(options?: GatewayOptions);
42
+ /**
43
+ * Build a Gateway from a config object or a JSON file path (F-DX-05).
44
+ * Async so the config module loads lazily (avoids a circular import).
45
+ */
46
+ static fromConfig(config: string | Record<string, unknown>): Promise<Gateway>;
42
47
  /** Register an interceptor or executor policy. First-registered = outermost. */
43
48
  use(interceptor: Interceptor): this;
44
49
  /** Supply a provider adapter explicitly (overrides `provider`). */
@@ -37,6 +37,15 @@ export class Gateway {
37
37
  this.dryRunMode = options.dryRun ?? false;
38
38
  this.pricing = options.pricing ?? new PricingProvider();
39
39
  }
40
+ /**
41
+ * Build a Gateway from a config object or a JSON file path (F-DX-05).
42
+ * Async so the config module loads lazily (avoids a circular import).
43
+ */
44
+ static async fromConfig(config) {
45
+ const mod = await import('./config.js');
46
+ const data = typeof config === 'string' ? mod.loadConfig(config) : config;
47
+ return mod.buildFromConfig(data);
48
+ }
40
49
  /** Register an interceptor or executor policy. First-registered = outermost. */
41
50
  use(interceptor) {
42
51
  this.interceptors.push(interceptor);
@@ -5,3 +5,5 @@ export type { AuditRecordInit } from './record.js';
5
5
  export type { AuditSink } from './sink.js';
6
6
  export { stdoutSink } from './sinks/stdout.js';
7
7
  export type { StdoutSinkOptions } from './sinks/stdout.js';
8
+ export { verifyChain, buildCallGraph } from './trace.js';
9
+ export type { TraceNode } from './trace.js';
@@ -1,3 +1,4 @@
1
1
  export { auditInterceptor, isAuditInterceptor, AUDIT_NAME } from './interceptor.js';
2
2
  export { AuditRecord, SCHEMA_VERSION } from './record.js';
3
3
  export { stdoutSink } from './sinks/stdout.js';
4
+ export { verifyChain, buildCallGraph } from './trace.js';
@@ -4,6 +4,8 @@ import type { AuditSink } from './sink.js';
4
4
  export declare const AUDIT_NAME = "audit";
5
5
  export interface AuditInterceptorOptions {
6
6
  sink?: AuditSink | 'stdout';
7
+ /** F-OBS-02: link each record via previousHash into a tamper-evident chain. */
8
+ hashChain?: boolean;
7
9
  }
8
10
  /** Factory: build an audit interceptor. */
9
11
  export declare function auditInterceptor(options?: AuditInterceptorOptions): Interceptor;
@@ -15,8 +15,11 @@ class AuditInterceptor {
15
15
  name = AUDIT_NAME;
16
16
  dryRunSafe = true; // auditing is observation-only, so it always runs
17
17
  sink;
18
+ hashChain;
19
+ lastHash = '';
18
20
  constructor(options = {}) {
19
21
  this.sink = resolveSink(options.sink);
22
+ this.hashChain = options.hashChain ?? false;
20
23
  }
21
24
  async before(request, ctx) {
22
25
  ctx.state[PROMPT_HASH_KEY] = AuditRecord.hashText(request.promptText());
@@ -45,6 +48,10 @@ class AuditInterceptor {
45
48
  guardrailOutcome: ctx.guardrailOutcome,
46
49
  riskScore: ctx.riskScore,
47
50
  });
51
+ if (this.hashChain) {
52
+ record.previousHash = this.lastHash;
53
+ this.lastHash = record.contentHash();
54
+ }
48
55
  response.audit = record;
49
56
  try {
50
57
  await this.sink.write(record);
@@ -0,0 +1,19 @@
1
+ /** Audit-chain verification (F-OBS-02) and multi-agent DAG trace (F-OBS-03). */
2
+ import type { AuditRecord } from './record.js';
3
+ /**
4
+ * Return true if the records form an intact hash chain. Each record's
5
+ * previousHash must equal the content hash of the record before it; the first
6
+ * must be empty. Any edit, reorder, or deletion breaks the chain.
7
+ */
8
+ export declare function verifyChain(records: AuditRecord[]): boolean;
9
+ export interface TraceNode {
10
+ traceId: string;
11
+ agentId: string | null;
12
+ parentTraceId: string | null;
13
+ children: TraceNode[];
14
+ }
15
+ /**
16
+ * Reconstruct the multi-agent DAG from audit records using parentTraceId +
17
+ * traceId. Returns the root nodes (those with no known parent).
18
+ */
19
+ export declare function buildCallGraph(records: AuditRecord[]): TraceNode[];
@@ -0,0 +1,39 @@
1
+ /** Audit-chain verification (F-OBS-02) and multi-agent DAG trace (F-OBS-03). */
2
+ /**
3
+ * Return true if the records form an intact hash chain. Each record's
4
+ * previousHash must equal the content hash of the record before it; the first
5
+ * must be empty. Any edit, reorder, or deletion breaks the chain.
6
+ */
7
+ export function verifyChain(records) {
8
+ let prevHash = '';
9
+ for (const rec of records) {
10
+ if (rec.previousHash !== prevHash)
11
+ return false;
12
+ prevHash = rec.contentHash();
13
+ }
14
+ return true;
15
+ }
16
+ /**
17
+ * Reconstruct the multi-agent DAG from audit records using parentTraceId +
18
+ * traceId. Returns the root nodes (those with no known parent).
19
+ */
20
+ export function buildCallGraph(records) {
21
+ const nodes = new Map();
22
+ for (const rec of records) {
23
+ nodes.set(rec.traceId, {
24
+ traceId: rec.traceId,
25
+ agentId: rec.agentId,
26
+ parentTraceId: rec.parentTraceId,
27
+ children: [],
28
+ });
29
+ }
30
+ const roots = [];
31
+ for (const node of nodes.values()) {
32
+ const parent = node.parentTraceId ? nodes.get(node.parentTraceId) : undefined;
33
+ if (parent)
34
+ parent.children.push(node);
35
+ else
36
+ roots.push(node);
37
+ }
38
+ return roots;
39
+ }
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Embeddings for the semantic cache (F-CACHE-02).
3
+ *
4
+ * Zero-dependency hashed bag-of-words embedder (L2-normalised) — good enough to
5
+ * dedup near-identical prompts. Plug in a real embedder implementing `Embedder`
6
+ * for production semantic matching.
7
+ */
8
+ export interface Embedder {
9
+ embed(text: string): number[];
10
+ }
11
+ /** Deterministic hashed bag-of-words embedder. */
12
+ export declare function hashingEmbedder(dim?: number): Embedder;
13
+ /** Cosine similarity; safe for zero vectors. */
14
+ export declare function cosineSimilarity(a: number[], b: number[]): number;
@@ -0,0 +1,49 @@
1
+ /**
2
+ * Embeddings for the semantic cache (F-CACHE-02).
3
+ *
4
+ * Zero-dependency hashed bag-of-words embedder (L2-normalised) — good enough to
5
+ * dedup near-identical prompts. Plug in a real embedder implementing `Embedder`
6
+ * for production semantic matching.
7
+ */
8
+ import { createHash } from 'node:crypto';
9
+ const TOKEN = /[a-z0-9]+/g;
10
+ /** Deterministic hashed bag-of-words embedder. */
11
+ export function hashingEmbedder(dim = 256) {
12
+ return {
13
+ embed(text) {
14
+ const vec = new Array(dim).fill(0);
15
+ const tokens = text.toLowerCase().match(TOKEN) ?? [];
16
+ for (const token of tokens) {
17
+ // Parity note: Python uses blake2b(digest_size=8); here we take the
18
+ // first 8 bytes of blake2b512. Both are deterministic; the JS cache is
19
+ // per-process so cross-language byte-parity is not required.
20
+ const digest = createHash('blake2b512').update(token).digest();
21
+ let n = 0n;
22
+ for (let i = 0; i < 8; i++)
23
+ n = (n << 8n) | BigInt(digest[i]);
24
+ const bucket = Number(n % BigInt(dim));
25
+ vec[bucket] += 1;
26
+ }
27
+ const norm = Math.sqrt(vec.reduce((s, x) => s + x * x, 0));
28
+ if (norm === 0)
29
+ return vec;
30
+ return vec.map((x) => x / norm);
31
+ },
32
+ };
33
+ }
34
+ /** Cosine similarity; safe for zero vectors. */
35
+ export function cosineSimilarity(a, b) {
36
+ if (a.length !== b.length)
37
+ throw new Error('vectors must have equal length');
38
+ let dot = 0;
39
+ let na = 0;
40
+ let nb = 0;
41
+ for (let i = 0; i < a.length; i++) {
42
+ dot += a[i] * b[i];
43
+ na += a[i] * a[i];
44
+ nb += b[i] * b[i];
45
+ }
46
+ if (na === 0 || nb === 0)
47
+ return 0;
48
+ return dot / (Math.sqrt(na) * Math.sqrt(nb));
49
+ }
@@ -1,7 +1,10 @@
1
- /**
2
- * Caching substrate. The SemanticCache interceptor ships in v0.2.0; v0.1.0
3
- * exposes the CacheBackend interface and the in-memory backend only.
4
- */
1
+ /** Caching (F-CACHE-01 exact, F-CACHE-02 semantic, F-CACHE-03 in-memory). */
5
2
  export type { CacheBackend } from './backend.js';
6
3
  export { memoryCacheBackend } from './backends/memory.js';
7
4
  export type { MemoryCacheBackendOptions } from './backends/memory.js';
5
+ export { semanticCache } from './interceptor.js';
6
+ export type { SemanticCacheOptions } from './interceptor.js';
7
+ export { hashingEmbedder, cosineSimilarity } from './embedding.js';
8
+ export type { Embedder } from './embedding.js';
9
+ export { inMemoryVectorBackend } from './vector.js';
10
+ export type { VectorBackend } from './vector.js';
@@ -1,5 +1,5 @@
1
- /**
2
- * Caching substrate. The SemanticCache interceptor ships in v0.2.0; v0.1.0
3
- * exposes the CacheBackend interface and the in-memory backend only.
4
- */
1
+ /** Caching (F-CACHE-01 exact, F-CACHE-02 semantic, F-CACHE-03 in-memory). */
5
2
  export { memoryCacheBackend } from './backends/memory.js';
3
+ export { semanticCache } from './interceptor.js';
4
+ export { hashingEmbedder, cosineSimilarity } from './embedding.js';
5
+ export { inMemoryVectorBackend } from './vector.js';
@@ -0,0 +1,19 @@
1
+ /**
2
+ * semanticCache (F-CACHE-01, F-CACHE-02) — two-level cache as an ExecutorPolicy.
3
+ *
4
+ * Exact SHA-256 cache, then optional semantic cosine cache; a hit returns the
5
+ * cached response and skips the provider. Register outermost.
6
+ */
7
+ import type { ExecutorPolicy } from '../base.js';
8
+ import type { CacheBackend } from './backend.js';
9
+ import type { Embedder } from './embedding.js';
10
+ import { type VectorBackend } from './vector.js';
11
+ export interface SemanticCacheOptions {
12
+ backend?: CacheBackend;
13
+ embedder?: Embedder;
14
+ vectorBackend?: VectorBackend;
15
+ exactTtlSeconds?: number;
16
+ semanticTtlSeconds?: number;
17
+ similarityThreshold?: number;
18
+ }
19
+ export declare function semanticCache(options?: SemanticCacheOptions): ExecutorPolicy;
@@ -0,0 +1,77 @@
1
+ /**
2
+ * semanticCache (F-CACHE-01, F-CACHE-02) — two-level cache as an ExecutorPolicy.
3
+ *
4
+ * Exact SHA-256 cache, then optional semantic cosine cache; a hit returns the
5
+ * cached response and skips the provider. Register outermost.
6
+ */
7
+ import { createHash } from 'node:crypto';
8
+ import { GavioResponse } from '../../response.js';
9
+ import { CacheType, TokenUsage } from '../../types.js';
10
+ import { memoryCacheBackend } from './backends/memory.js';
11
+ import { inMemoryVectorBackend } from './vector.js';
12
+ export function semanticCache(options = {}) {
13
+ const backend = options.backend ?? memoryCacheBackend();
14
+ const embedder = options.embedder;
15
+ const semantic = embedder != null;
16
+ const vector = options.vectorBackend ?? (semantic ? inMemoryVectorBackend() : null);
17
+ const exactTtl = options.exactTtlSeconds ?? 3600;
18
+ const semanticTtl = options.semanticTtlSeconds ?? 86400;
19
+ const threshold = options.similarityThreshold ?? 0.95;
20
+ function exactKey(request) {
21
+ const opts = request.options ?? {};
22
+ const sorted = {};
23
+ for (const k of Object.keys(opts).sort())
24
+ sorted[k] = opts[k];
25
+ const payload = JSON.stringify({
26
+ provider: String(request.provider),
27
+ model: request.model,
28
+ messages: request.messages,
29
+ options: sorted,
30
+ });
31
+ return 'gavio:exact:' + createHash('sha256').update(payload).digest('hex');
32
+ }
33
+ function hit(request, ctx, entry, type) {
34
+ ctx.cacheHit = true;
35
+ ctx.cacheType = type;
36
+ return new GavioResponse({
37
+ traceId: request.traceId,
38
+ content: entry.content,
39
+ model: request.model,
40
+ provider: String(request.provider),
41
+ modelVersion: entry.modelVersion,
42
+ usage: new TokenUsage(entry.promptTokens, entry.completionTokens),
43
+ costUsd: 0,
44
+ cacheHit: true,
45
+ cacheType: type,
46
+ });
47
+ }
48
+ return {
49
+ name: 'semantic_cache',
50
+ isExecutorPolicy: true,
51
+ async around(request, ctx, callNext) {
52
+ ctx.markFired('semantic_cache');
53
+ const key = exactKey(request);
54
+ const cached = (await backend.get(key));
55
+ if (cached)
56
+ return hit(request, ctx, cached, CacheType.EXACT);
57
+ let embedding = null;
58
+ if (semantic && vector && embedder) {
59
+ embedding = embedder.embed(request.promptText());
60
+ const semHit = (await vector.query(embedding, threshold));
61
+ if (semHit)
62
+ return hit(request, ctx, semHit, CacheType.SEMANTIC);
63
+ }
64
+ const response = await callNext(request);
65
+ const entry = {
66
+ content: response.content,
67
+ modelVersion: response.modelVersion,
68
+ promptTokens: response.usage.promptTokens,
69
+ completionTokens: response.usage.completionTokens,
70
+ };
71
+ await backend.set(key, entry, exactTtl);
72
+ if (embedding && vector)
73
+ await vector.add(embedding, entry, semanticTtl);
74
+ return response;
75
+ },
76
+ };
77
+ }
@@ -0,0 +1,9 @@
1
+ /** VectorBackend — nearest-neighbour store for the semantic cache (F-CACHE-02). */
2
+ export interface VectorBackend {
3
+ add(vector: number[], value: unknown, ttlSeconds?: number | null): Promise<void>;
4
+ /** Return the value of the nearest entry with similarity >= threshold. */
5
+ query(vector: number[], threshold: number): Promise<unknown | null>;
6
+ clear(): Promise<void>;
7
+ }
8
+ /** Bounded, brute-force in-memory vector store (default dev backend). */
9
+ export declare function inMemoryVectorBackend(maxSize?: number): VectorBackend;
@@ -0,0 +1,32 @@
1
+ /** VectorBackend — nearest-neighbour store for the semantic cache (F-CACHE-02). */
2
+ import { cosineSimilarity } from './embedding.js';
3
+ /** Bounded, brute-force in-memory vector store (default dev backend). */
4
+ export function inMemoryVectorBackend(maxSize = 1000) {
5
+ const items = [];
6
+ return {
7
+ async add(vector, value, ttlSeconds) {
8
+ const expiresAt = ttlSeconds ? Date.now() + ttlSeconds * 1000 : null;
9
+ items.push({ vector, value, expiresAt });
10
+ if (items.length > maxSize)
11
+ items.shift();
12
+ },
13
+ async query(vector, threshold) {
14
+ const now = Date.now();
15
+ let best = null;
16
+ let bestSim = threshold;
17
+ for (const item of items) {
18
+ if (item.expiresAt !== null && now > item.expiresAt)
19
+ continue;
20
+ const sim = cosineSimilarity(vector, item.vector);
21
+ if (sim >= bestSim) {
22
+ bestSim = sim;
23
+ best = item.value;
24
+ }
25
+ }
26
+ return best;
27
+ },
28
+ async clear() {
29
+ items.length = 0;
30
+ },
31
+ };
32
+ }
@@ -0,0 +1,11 @@
1
+ /** costControl (F-GOV-02) — soft/hard budget caps per scope and window. */
2
+ import type { Interceptor } from '../base.js';
3
+ export type Scope = 'agent' | 'session' | 'global';
4
+ export type Window = 'day' | 'month' | 'total';
5
+ export interface CostControlOptions {
6
+ hardCapUsd: number;
7
+ softCapUsd?: number;
8
+ scope?: Scope;
9
+ window?: Window;
10
+ }
11
+ export declare function costControl(options: CostControlOptions): Interceptor;
@@ -0,0 +1,42 @@
1
+ /** costControl (F-GOV-02) — soft/hard budget caps per scope and window. */
2
+ import { BudgetExceededError } from '../../errors.js';
3
+ function scopeKey(scope, ctx) {
4
+ if (scope === 'agent')
5
+ return `agent:${ctx.agentId ?? 'unknown'}`;
6
+ if (scope === 'session')
7
+ return `session:${ctx.sessionId ?? 'unknown'}`;
8
+ return 'global';
9
+ }
10
+ function windowBucket(window) {
11
+ const now = new Date().toISOString();
12
+ if (window === 'day')
13
+ return now.slice(0, 10);
14
+ if (window === 'month')
15
+ return now.slice(0, 7);
16
+ return 'total';
17
+ }
18
+ export function costControl(options) {
19
+ const { hardCapUsd, softCapUsd, scope = 'global', window = 'day' } = options;
20
+ const spend = new Map();
21
+ const key = (ctx) => `${scopeKey(scope, ctx)}|${windowBucket(window)}`;
22
+ return {
23
+ name: 'cost_control',
24
+ before(request, ctx) {
25
+ const spent = spend.get(key(ctx)) ?? 0;
26
+ if (spent >= hardCapUsd) {
27
+ throw new BudgetExceededError(`budget hard cap $${hardCapUsd.toFixed(2)} reached (spent $${spent.toFixed(4)})`);
28
+ }
29
+ return request;
30
+ },
31
+ after(response, ctx) {
32
+ const k = key(ctx);
33
+ const total = (spend.get(k) ?? 0) + response.costUsd;
34
+ spend.set(k, total);
35
+ if (softCapUsd !== undefined && total >= softCapUsd) {
36
+ // eslint-disable-next-line no-console
37
+ console.warn(`[gavio:budget] soft cap: $${total.toFixed(4)} of $${softCapUsd} for ${k}`);
38
+ }
39
+ return response;
40
+ },
41
+ };
42
+ }
@@ -0,0 +1,7 @@
1
+ /** Cost & governance (F-GOV-02 budget, F-GOV-03 rate limit, F-GOV-04 RBAC). */
2
+ export { costControl } from './budget.js';
3
+ export type { CostControlOptions, Scope, Window } from './budget.js';
4
+ export { rateLimiter } from './rate-limit.js';
5
+ export type { RateLimiterOptions } from './rate-limit.js';
6
+ export { modelPolicy } from './model-policy.js';
7
+ export type { ModelPolicyOptions } from './model-policy.js';
@@ -0,0 +1,4 @@
1
+ /** Cost & governance (F-GOV-02 budget, F-GOV-03 rate limit, F-GOV-04 RBAC). */
2
+ export { costControl } from './budget.js';
3
+ export { rateLimiter } from './rate-limit.js';
4
+ export { modelPolicy } from './model-policy.js';
@@ -0,0 +1,8 @@
1
+ /** modelPolicy (F-GOV-04) — per-role model allowlists (RBAC). */
2
+ import type { Interceptor } from '../base.js';
3
+ export interface ModelPolicyOptions {
4
+ roles: Record<string, string[]>;
5
+ defaultRole?: string;
6
+ roleKey?: string;
7
+ }
8
+ export declare function modelPolicy(options: ModelPolicyOptions): Interceptor;
@@ -0,0 +1,15 @@
1
+ /** modelPolicy (F-GOV-04) — per-role model allowlists (RBAC). */
2
+ import { ModelNotAllowedError } from '../../errors.js';
3
+ export function modelPolicy(options) {
4
+ const { roles, defaultRole = 'default', roleKey = 'role' } = options;
5
+ return {
6
+ name: 'model_policy',
7
+ before(request, _ctx) {
8
+ const role = String(request.metadata?.[roleKey] ?? defaultRole);
9
+ const allowed = roles[role] ?? [];
10
+ if (allowed.includes('*') || allowed.includes(request.model))
11
+ return request;
12
+ throw new ModelNotAllowedError(role, request.model);
13
+ },
14
+ };
15
+ }
@@ -0,0 +1,9 @@
1
+ /** rateLimiter (F-GOV-03) — fixed-window requests/tokens per minute per scope. */
2
+ import type { Interceptor } from '../base.js';
3
+ import type { Scope } from './budget.js';
4
+ export interface RateLimiterOptions {
5
+ maxRequestsPerMinute?: number;
6
+ maxTokensPerMinute?: number;
7
+ scope?: Scope;
8
+ }
9
+ export declare function rateLimiter(options?: RateLimiterOptions): Interceptor;