gateia 0.1.4 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +125 -126
- package/dist/engine/contract.d.ts +0 -1
- package/dist/engine/contract.js +0 -4
- package/dist/engine/policy.js +27 -17
- package/dist/index.d.ts +2 -3
- package/dist/index.js +4 -202
- package/dist/policies/finance.d.ts +1 -1
- package/dist/types.d.ts +21 -54
- package/dist/verify.d.ts +3 -0
- package/dist/verify.js +129 -0
- package/package.json +7 -7
package/README.md
CHANGED
|
@@ -1,160 +1,159 @@
|
|
|
1
1
|
# Gateia
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
<div align="center">
|
|
4
4
|
|
|
5
|
-
Gateia is a TypeScript-first SDK that sits between your application and LLMs. It enforces structured contracts (Zod), applies compliance policies, and provides detailed enforcement reports.
|
|
6
5
|
|
|
7
|
-
|
|
6
|
+
**The Deterministic Verification Layer for Enterprise AI.**
|
|
8
7
|
|
|
9
|
-
|
|
8
|
+
[](https://www.npmjs.com/package/gateia)
|
|
9
|
+
[](https://opensource.org/licenses/MIT)
|
|
10
|
+
[]()
|
|
11
|
+
[]()
|
|
10
12
|
|
|
11
|
-
|
|
12
|
-
- **Contract Enforcement**: Validators ensures outputs match your Zod schemas. Auto-repair loop included.
|
|
13
|
-
- **Policy Engine**: Deterministic rules to block or rewrite unsafe content.
|
|
14
|
-
- **Enforcement Reporting**: Every call returns a traceId and a full report of what happened.
|
|
13
|
+
</div>
|
|
15
14
|
|
|
16
|
-
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
**Gateia** is the standard for implementing **Deterministic Guardrails** in AI applications. It acts as a final, immutable security layer between your AI models and your customers.
|
|
18
|
+
|
|
19
|
+
Unlike "AI-judging-AI" solutions, Gateia enables you to enforce **strict, code-based contracts**, ensuring that your application never halluncinates, leaks PII, or violates business rules—regardless of the underlying model (OpenAI, Anthropic, or Llama).
|
|
20
|
+
|
|
21
|
+
## 🚀 Why Gateia?
|
|
22
|
+
|
|
23
|
+
In production, **probability is a liability.** Gateia restores deterministic control.
|
|
24
|
+
|
|
25
|
+
* **🛡️ Zero Hallucination Policy**: Gateia does not use LLMs to verify. It uses deterministic logic and regex engines. It is impossible for the verifier to hallucinate.
|
|
26
|
+
* **🏗️ Contract-First Architecture**: Define your data requirements with Zod schemas. If the output doesn't match, it doesn't ship.
|
|
27
|
+
* **📋 Audit-Ready Logging**: Every decision is traced, logged, and categorized by severity, making compliance (SOC2, HIPAA) audits straightforward.
|
|
28
|
+
* **🔒 Fail-Closed Security**: If a policy returns a block signal (even with malformed data), Gateia defaults to blocking. Security is never compromised by runtime errors.
|
|
29
|
+
|
|
30
|
+
---
|
|
31
|
+
|
|
32
|
+
## 📦 Installation
|
|
17
33
|
|
|
18
34
|
```bash
|
|
19
35
|
npm install gateia zod
|
|
20
36
|
```
|
|
21
37
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
## ⚡️ Quick Start
|
|
41
|
+
|
|
42
|
+
Secure a loan processing agent in 30 seconds.
|
|
25
43
|
|
|
26
|
-
|
|
27
|
-
|
|
44
|
+
```typescript
|
|
45
|
+
import { verify } from 'gateia';
|
|
46
|
+
import { z } from 'zod';
|
|
47
|
+
|
|
48
|
+
// 1. Define the Business Contract
|
|
49
|
+
// The AI *must* return data in this shape.
|
|
50
|
+
const LoanDecisionContract = z.object({
|
|
51
|
+
approved: z.boolean(),
|
|
52
|
+
rate: z.number().min(2.5).max(10.0), // Business Logic
|
|
28
53
|
reason: z.string(),
|
|
29
|
-
|
|
54
|
+
risk_level: z.enum(['low', 'medium', 'high'])
|
|
30
55
|
});
|
|
31
56
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
57
|
+
// 2. The Verification Step
|
|
58
|
+
// Run this *after* your LLM generates content.
|
|
59
|
+
const result = await verify({
|
|
60
|
+
output: llmResponse,
|
|
61
|
+
contract: LoanDecisionContract,
|
|
62
|
+
policies: ['finance-safe', 'pii-safe', 'secrets-safe'],
|
|
63
|
+
mode: 'enforce'
|
|
37
64
|
});
|
|
38
65
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
console.
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
console.log(result.
|
|
46
|
-
|
|
66
|
+
// 3. Deterministic Decision
|
|
67
|
+
if (!result.allowed) {
|
|
68
|
+
// Blocked. Do not show to user.
|
|
69
|
+
console.error("Security Violation:", result.enforcement.violations);
|
|
70
|
+
} else {
|
|
71
|
+
// Safe. Proceed to database/frontend.
|
|
72
|
+
console.log("Verified Data:", result.safeOutput);
|
|
73
|
+
}
|
|
47
74
|
```
|
|
48
75
|
|
|
49
|
-
|
|
76
|
+
---
|
|
50
77
|
|
|
51
|
-
|
|
52
|
-
```env
|
|
53
|
-
OPENAI_API_KEY=sk-...
|
|
54
|
-
GEMINI_API_KEY=AIza...
|
|
55
|
-
```
|
|
78
|
+
## 🛡️ Policy Library
|
|
56
79
|
|
|
57
|
-
|
|
58
|
-
```env
|
|
59
|
-
GATEIA_MOCK_ADAPTERS=true
|
|
60
|
-
```
|
|
80
|
+
Gateia ships with battle-tested policies for common enterprise risks.
|
|
61
81
|
|
|
62
|
-
|
|
82
|
+
| Policy ID | Risk Category | Description | Severity |
|
|
83
|
+
|-----------|---------------|-------------|----------|
|
|
84
|
+
| `finance-safe` | **Compliance** | Blocks non-compliant guarantee language (e.g., "100% no risk", "guaranteed return"). | High |
|
|
85
|
+
| `pii-safe` | **Privacy** | Redacts or blocks Personally Identifiable Information (Emails, Phone Numbers). | High |
|
|
86
|
+
| `secrets-safe` | **Security** | Detects leaked API keys (AWS, Stripe, OpenAI, Slack) and private keys. | High |
|
|
87
|
+
| `markup-safe` | **Security** | Prevents XSS by blocking `<script>`, `iframe`, and other HTML injection vectors. | High |
|
|
63
88
|
|
|
64
|
-
|
|
89
|
+
---
|
|
65
90
|
|
|
66
|
-
|
|
67
|
-
- `prompt`: String or chat object.
|
|
68
|
-
- `contract`: Zod schema.
|
|
69
|
-
- `policies`: Array of policy IDs (strings) or Policy objects.
|
|
70
|
-
- `behavior`: Logic for repair, retries, and blocking.
|
|
91
|
+
## 🧩 Advanced Usage
|
|
71
92
|
|
|
72
|
-
|
|
93
|
+
### Type-Safe Custom Policies
|
|
94
|
+
Gateia leverages TypeScript generics to ensure your security policies are strictly typed against your contracts.
|
|
73
95
|
|
|
74
96
|
```typescript
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
actions: Array<{ type: "rewrite" | "block"; policyId?: string; }>;
|
|
93
|
-
violations: Array<{
|
|
94
|
-
policyId: string;
|
|
95
|
-
code: string;
|
|
96
|
-
severity: "low"|"med"|"high";
|
|
97
|
-
message: string;
|
|
98
|
-
}>;
|
|
99
|
-
};
|
|
100
|
-
|
|
101
|
-
usage: {
|
|
102
|
-
model: string;
|
|
103
|
-
provider: string;
|
|
104
|
-
latencyMs: number;
|
|
105
|
-
tokens?: { prompt: number; completion: number; total: number };
|
|
106
|
-
costUsd?: number;
|
|
107
|
-
};
|
|
108
|
-
};
|
|
97
|
+
// Your contract expects { score: number }
|
|
98
|
+
const Contract = z.object({ score: z.number() });
|
|
99
|
+
|
|
100
|
+
// TypeScript knows 'output' is { score: number }
|
|
101
|
+
const result = await verify({
|
|
102
|
+
output: data,
|
|
103
|
+
contract: Contract,
|
|
104
|
+
policies: [{
|
|
105
|
+
id: 'check-score',
|
|
106
|
+
mode: 'enforce',
|
|
107
|
+
// Compile Error if you access invalid properties
|
|
108
|
+
check: (output) => {
|
|
109
|
+
if (output.score < 0) return { outcome: 'block', violations: [...] }
|
|
110
|
+
return { outcome: 'pass' }
|
|
111
|
+
}
|
|
112
|
+
}]
|
|
113
|
+
});
|
|
109
114
|
```
|
|
110
115
|
|
|
111
|
-
###
|
|
116
|
+
### Audit Mode (Passive Monitoring)
|
|
117
|
+
Deploy policies without disrupting user flow. Violations are recorded but `allowed` remains `true`.
|
|
118
|
+
```typescript
|
|
119
|
+
const result = await verify({
|
|
120
|
+
output: output,
|
|
121
|
+
contract: z.any(),
|
|
122
|
+
policies: ['finance-safe'],
|
|
123
|
+
mode: 'audit' // Logs violations, does not block.
|
|
124
|
+
});
|
|
125
|
+
```
|
|
112
126
|
|
|
113
|
-
|
|
127
|
+
---
|
|
128
|
+
|
|
129
|
+
## 📊 The Enforcement Report
|
|
130
|
+
|
|
131
|
+
Every call to `verify()` returns a comprehensive `EnforcementReport`. Use this for your internal dashboards and compliance logs.
|
|
132
|
+
|
|
133
|
+
```json
|
|
134
|
+
{
|
|
135
|
+
"allowed": false,
|
|
136
|
+
"traceId": "123e4567-e89b-12d3-a456-426614174000",
|
|
137
|
+
"enforcement": {
|
|
138
|
+
"contract": { "outcome": "pass" },
|
|
139
|
+
"appliedPolicies": [
|
|
140
|
+
{ "id": "finance-safe", "outcome": "block" },
|
|
141
|
+
{ "id": "pii-safe", "outcome": "pass" }
|
|
142
|
+
],
|
|
143
|
+
"violations": [
|
|
144
|
+
{
|
|
145
|
+
"policyId": "finance-safe",
|
|
146
|
+
"code": "FIN_GUARANTEE",
|
|
147
|
+
"message": "Contains forbidden guarantee language: 'no risk'",
|
|
148
|
+
"severity": "high"
|
|
149
|
+
}
|
|
150
|
+
]
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
```
|
|
114
154
|
|
|
115
|
-
|
|
155
|
+
---
|
|
116
156
|
|
|
117
|
-
|
|
118
|
-
|-----------|----------|------------|---------|
|
|
119
|
-
| **`finance-safe`** | Financial Compliance | "Guaranteed returns", "No risk", "Guaranteed approval", "100% guaranteed" | `BLOCK` |
|
|
120
|
-
| **`support-safe`** | Support Safety (Alias) | Same as above. Useful for refund processing agents. | `BLOCK` |
|
|
121
|
-
| **`pii-safe`** | Data Privacy | Email addresses (`x@y.com`), Phone numbers (Format: `123-456-7890`) | `BLOCK` |
|
|
122
|
-
| **`secrets-safe`** | Security | API Keys (OpenAI, AWS, GitHub), Private Keys | `BLOCK` |
|
|
123
|
-
| **`markup-safe`** | XSS Prevention | `<script>`, `<iframe>`, `javascript:` URIs | `BLOCK` |
|
|
157
|
+
## License
|
|
124
158
|
|
|
125
|
-
|
|
126
|
-
```typescript
|
|
127
|
-
import { gate, Policy } from 'gateia';
|
|
128
|
-
|
|
129
|
-
// 1. Define your custom policy
|
|
130
|
-
const noCompetitors: Policy = {
|
|
131
|
-
id: 'no-competitors',
|
|
132
|
-
mode: 'enforce', // 'audit' to just warn
|
|
133
|
-
check: (output) => {
|
|
134
|
-
// output is strict typed from your Contract (or string if simple)
|
|
135
|
-
const text = JSON.stringify(output).toLowerCase();
|
|
136
|
-
|
|
137
|
-
if (text.includes('acme corp')) {
|
|
138
|
-
return {
|
|
139
|
-
outcome: 'block',
|
|
140
|
-
violations: [{
|
|
141
|
-
policyId: 'no-competitors',
|
|
142
|
-
code: 'COMPETITOR_MENTION',
|
|
143
|
-
message: 'Mentioned competitor Acme Corp',
|
|
144
|
-
severity: 'high',
|
|
145
|
-
evidence: { snippet: 'acme corp' }
|
|
146
|
-
}]
|
|
147
|
-
};
|
|
148
|
-
}
|
|
149
|
-
return { outcome: 'pass' };
|
|
150
|
-
}
|
|
151
|
-
};
|
|
152
|
-
|
|
153
|
-
// 2. Use it in the gate
|
|
154
|
-
await gate({
|
|
155
|
-
model: 'gpt-4',
|
|
156
|
-
prompt: 'Who is the best provider?',
|
|
157
|
-
contract: z.string(),
|
|
158
|
-
policies: ['finance-safe', noCompetitors] // Mix built-in ID strings and custom objects
|
|
159
|
-
});
|
|
160
|
-
```
|
|
159
|
+
MIT
|
package/dist/engine/contract.js
CHANGED
|
@@ -18,9 +18,5 @@ class ContractEngine {
|
|
|
18
18
|
return { success: false, error: errorMsg, errors: formattedErrors };
|
|
19
19
|
}
|
|
20
20
|
}
|
|
21
|
-
// Helper to format error for the LLM repair prompt
|
|
22
|
-
formatRepairInstruction(error) {
|
|
23
|
-
return `The previous response failed schema validation:\n${error}\nPlease fix the JSON to match the schema. Return ONLY valid JSON.`;
|
|
24
|
-
}
|
|
25
21
|
}
|
|
26
22
|
exports.ContractEngine = ContractEngine;
|
package/dist/engine/policy.js
CHANGED
|
@@ -1,6 +1,21 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.PolicyEngine = void 0;
|
|
4
|
+
const zod_1 = require("zod");
|
|
5
|
+
const types_1 = require("../types");
|
|
6
|
+
// Runtime schema to validate Policy returns (Defense against bad JS)
|
|
7
|
+
const ViolationSchema = zod_1.z.object({
|
|
8
|
+
policyId: zod_1.z.string(),
|
|
9
|
+
code: zod_1.z.string(),
|
|
10
|
+
message: zod_1.z.string(),
|
|
11
|
+
severity: zod_1.z.enum(['low', 'med', 'high']),
|
|
12
|
+
evidence: zod_1.z.object({ snippet: zod_1.z.string() }).optional()
|
|
13
|
+
});
|
|
14
|
+
const PolicyResultSchema = zod_1.z.object({
|
|
15
|
+
outcome: zod_1.z.enum(['pass', 'block', 'warn', 'rewrite']),
|
|
16
|
+
violations: zod_1.z.array(ViolationSchema).optional(),
|
|
17
|
+
rewriteFn: zod_1.z.function().optional()
|
|
18
|
+
});
|
|
4
19
|
class PolicyEngine {
|
|
5
20
|
async evaluate(policies, output, context) {
|
|
6
21
|
const violations = [];
|
|
@@ -9,15 +24,15 @@ class PolicyEngine {
|
|
|
9
24
|
let hasRewrite = false;
|
|
10
25
|
// Policies are applied in order
|
|
11
26
|
for (const policy of policies) {
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
27
|
+
// Execute Policy
|
|
28
|
+
const rawResult = await policy.check(currentOutput, context);
|
|
29
|
+
// FATAL: Runtime Validation of Policy Result
|
|
30
|
+
// If the user made a typo (e.g. 'outcomee') or forgot severity, we MUST crash/alert.
|
|
31
|
+
const parse = PolicyResultSchema.safeParse(rawResult);
|
|
32
|
+
if (!parse.success) {
|
|
33
|
+
throw new types_1.GateiaError(`Invalid Policy Result from '${policy.id}': ${parse.error.issues.map(i => i.path.join('.') + ' ' + i.message).join(', ')}`, context.traceId);
|
|
19
34
|
}
|
|
20
|
-
const result =
|
|
35
|
+
const result = parse.data; // safe now
|
|
21
36
|
// If violations, add them
|
|
22
37
|
if (result.violations) {
|
|
23
38
|
violations.push(...result.violations);
|
|
@@ -36,19 +51,14 @@ class PolicyEngine {
|
|
|
36
51
|
}
|
|
37
52
|
}
|
|
38
53
|
else if (result.outcome === 'rewrite') {
|
|
39
|
-
|
|
40
|
-
|
|
54
|
+
// Re-attach the function from raw result because Zod strips functions sometimes or we just want the original reference
|
|
55
|
+
// Actually Zod schema above allows function pass-through if configured, but let's be safe:
|
|
56
|
+
if (rawResult.rewriteFn && policy.mode !== 'audit') {
|
|
57
|
+
currentOutput = rawResult.rewriteFn(currentOutput);
|
|
41
58
|
hasRewrite = true;
|
|
42
|
-
// If it was pass but now rewrite, outcome is pass (with rewrite)
|
|
43
|
-
}
|
|
44
|
-
else if (policy.mode === 'audit') {
|
|
45
|
-
// record that it WOULD have rewritten
|
|
46
59
|
}
|
|
47
60
|
}
|
|
48
61
|
}
|
|
49
|
-
// If any blocking violation occurred (non-audit), result is block
|
|
50
|
-
// We need to filter violations to check if any critical/enforced ones exist
|
|
51
|
-
// Actually simplicity: if `finalOutcome` was set to 'block', return block.
|
|
52
62
|
return {
|
|
53
63
|
outcome: finalOutcome,
|
|
54
64
|
violations,
|
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
|
|
2
|
-
import { GateParams, GateResult } from './types';
|
|
3
|
-
export declare function gate<T extends z.ZodTypeAny>(params: GateParams<T>): Promise<GateResult<z.infer<T>>>;
|
|
1
|
+
export { verify } from './verify';
|
|
4
2
|
export * from './types';
|
|
3
|
+
export * from './policies';
|
package/dist/index.js
CHANGED
|
@@ -14,206 +14,8 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
|
14
14
|
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
15
|
};
|
|
16
16
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
-
exports.
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
const uuid_1 = require("uuid");
|
|
21
|
-
const registry_1 = require("./adapters/registry");
|
|
22
|
-
const contract_1 = require("./engine/contract");
|
|
23
|
-
const policy_1 = require("./engine/policy");
|
|
24
|
-
const policies_1 = require("./policies");
|
|
25
|
-
// Global instances
|
|
26
|
-
const registry = new registry_1.ModelRegistry();
|
|
27
|
-
const contractEngine = new contract_1.ContractEngine();
|
|
28
|
-
const policyEngine = new policy_1.PolicyEngine();
|
|
29
|
-
async function gate(params) {
|
|
30
|
-
const traceId = (0, uuid_1.v4)();
|
|
31
|
-
const { model, prompt, contract, policies = [], behavior, options } = params;
|
|
32
|
-
try {
|
|
33
|
-
// 1. Resolve Adapter
|
|
34
|
-
const adapter = registry.getAdapter(model);
|
|
35
|
-
// 2. Resolve Policies
|
|
36
|
-
const activePolicies = policies.map(p => {
|
|
37
|
-
if (typeof p === 'string') {
|
|
38
|
-
const found = policies_1.policyLibrary[p];
|
|
39
|
-
if (!found)
|
|
40
|
-
throw new types_1.GateiaError(`Unknown policy: ${p}`, traceId);
|
|
41
|
-
return found;
|
|
42
|
-
}
|
|
43
|
-
return p;
|
|
44
|
-
});
|
|
45
|
-
// 3. Execution Loop (Simple Retry Logic for Contract)
|
|
46
|
-
let attempts = 0;
|
|
47
|
-
let maxRetries = behavior?.contract?.maxRetries ?? 0;
|
|
48
|
-
// Bounded auto repair
|
|
49
|
-
if (behavior?.contract?.repair === 'auto' && maxRetries === 0) {
|
|
50
|
-
maxRetries = 3; // Default
|
|
51
|
-
}
|
|
52
|
-
else if (behavior?.contract?.repair === 'off') {
|
|
53
|
-
maxRetries = 0;
|
|
54
|
-
}
|
|
55
|
-
let currentPrompt = prompt;
|
|
56
|
-
let lastError;
|
|
57
|
-
let finalSafeOutput;
|
|
58
|
-
let contractOutcome = 'fail';
|
|
59
|
-
// Track usage
|
|
60
|
-
let totalTokens = { prompt: 0, completion: 0, total: 0 };
|
|
61
|
-
let finalLatency = 0;
|
|
62
|
-
let rawResult;
|
|
63
|
-
while (attempts <= maxRetries) {
|
|
64
|
-
attempts++;
|
|
65
|
-
// Call Model
|
|
66
|
-
const result = await adapter.generate(currentPrompt, {
|
|
67
|
-
// If contract is object/schema, hint json mode if supported by adapter?
|
|
68
|
-
// For now, adapter ignores options in stub
|
|
69
|
-
});
|
|
70
|
-
if (result.tokens) {
|
|
71
|
-
totalTokens.prompt += result.tokens.prompt;
|
|
72
|
-
totalTokens.completion += result.tokens.completion;
|
|
73
|
-
totalTokens.total += result.tokens.total;
|
|
74
|
-
}
|
|
75
|
-
finalLatency = result.latencyMs || 0;
|
|
76
|
-
// Parse Output (assume text for now, try to parse JSON if schema expects object)
|
|
77
|
-
// Usually LLM returns string. If Schema is object, we try JSON.parse
|
|
78
|
-
let outputToValidate = result.text;
|
|
79
|
-
let isJsonSchema = contract instanceof zod_1.z.ZodObject || contract instanceof zod_1.z.ZodArray;
|
|
80
|
-
if (result.structured) {
|
|
81
|
-
outputToValidate = result.structured;
|
|
82
|
-
}
|
|
83
|
-
else if (isJsonSchema) {
|
|
84
|
-
try {
|
|
85
|
-
// Simple extraction of JSON if wrapped in markdown
|
|
86
|
-
const jsonMatch = result.text.match(/```json\n([\s\S]*?)\n```/) || result.text.match(/\{[\s\S]*\}/);
|
|
87
|
-
if (jsonMatch) {
|
|
88
|
-
outputToValidate = JSON.parse(jsonMatch[0].replace(/```json|```/g, ''));
|
|
89
|
-
}
|
|
90
|
-
else {
|
|
91
|
-
outputToValidate = JSON.parse(result.text);
|
|
92
|
-
}
|
|
93
|
-
}
|
|
94
|
-
catch (e) {
|
|
95
|
-
// Parsing failed
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
rawResult = outputToValidate;
|
|
99
|
-
// Validate Contract
|
|
100
|
-
const contractRes = contractEngine.validate(outputToValidate, contract);
|
|
101
|
-
if (contractRes.success) {
|
|
102
|
-
finalSafeOutput = contractRes.data;
|
|
103
|
-
contractOutcome = attempts === 1 ? 'pass' : 'repaired';
|
|
104
|
-
break; // Success!
|
|
105
|
-
}
|
|
106
|
-
else {
|
|
107
|
-
lastError = contractRes.error;
|
|
108
|
-
if (attempts <= maxRetries) {
|
|
109
|
-
// Prepare repair prompt
|
|
110
|
-
const repairMsg = contractEngine.formatRepairInstruction(lastError || "Invalid Format");
|
|
111
|
-
// Append to prompt? Or new message?
|
|
112
|
-
// Simple "chat" append if prompt is array-like?
|
|
113
|
-
// For MVP, if prompt is string, we append.
|
|
114
|
-
if (typeof currentPrompt === 'string') {
|
|
115
|
-
currentPrompt = `${currentPrompt}\n\nUser: ${repairMsg}`; // Very naive chat history
|
|
116
|
-
}
|
|
117
|
-
else {
|
|
118
|
-
// If it's object, assumes single turn. We can't easily extend without a chat structure.
|
|
119
|
-
// For MVP assume simple string prompt works best for repair loop.
|
|
120
|
-
// Or just append to user message.
|
|
121
|
-
currentPrompt = {
|
|
122
|
-
...currentPrompt,
|
|
123
|
-
user: `${currentPrompt.user}\n\n(System: Previous output invalid: ${lastError}. Fix it.)`
|
|
124
|
-
};
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
}
|
|
128
|
-
}
|
|
129
|
-
// Construct usage object
|
|
130
|
-
const finalUsage = {
|
|
131
|
-
provider: registry.getAdapter(model).constructor.name,
|
|
132
|
-
model: params.model,
|
|
133
|
-
latencyMs: finalLatency,
|
|
134
|
-
inputTokens: totalTokens.prompt,
|
|
135
|
-
outputTokens: totalTokens.completion,
|
|
136
|
-
totalTokens: totalTokens.total,
|
|
137
|
-
// costUsd: ... // TODO: Add cost calculation logic based on provider/model
|
|
138
|
-
};
|
|
139
|
-
if (!finalSafeOutput) {
|
|
140
|
-
// Block/Throw
|
|
141
|
-
// Construct report
|
|
142
|
-
const report = {
|
|
143
|
-
appliedPolicies: [], // Policies not applied due to contract failure
|
|
144
|
-
contract: {
|
|
145
|
-
outcome: 'fail',
|
|
146
|
-
errors: contractEngine.validate(rawResult, contract).errors // re-running valid logic or grabbing from lastError
|
|
147
|
-
},
|
|
148
|
-
actions: [],
|
|
149
|
-
violations: [{ policyId: 'contract', code: 'CONTRACT_FAIL', message: lastError || "Contract Validation Failed", severity: 'high' }]
|
|
150
|
-
};
|
|
151
|
-
// For accurate errors we might want to capture them better above instead of re-running or assuming lastError string
|
|
152
|
-
// But for now this matches structure.
|
|
153
|
-
throw new types_1.GateiaError("Contract Check Failed", traceId, report);
|
|
154
|
-
}
|
|
155
|
-
// 4. Apply Policies
|
|
156
|
-
// Note: Policies applied on safeOutput? Or raw text?
|
|
157
|
-
// Usually policies check content. If we have safe object, we check that.
|
|
158
|
-
const policyCtx = { model: params.model, prompt: params.prompt, traceId };
|
|
159
|
-
const policyResult = await policyEngine.evaluate(activePolicies, finalSafeOutput, policyCtx);
|
|
160
|
-
let finalViolations = policyResult.violations || [];
|
|
161
|
-
let enforcementActions = [];
|
|
162
|
-
// Detailed policy reporting
|
|
163
|
-
const appliedRecs = activePolicies.map(p => {
|
|
164
|
-
const policyViolations = finalViolations.filter(v => v.policyId === p.id);
|
|
165
|
-
let outcome = 'pass';
|
|
166
|
-
if (policyViolations.length > 0) {
|
|
167
|
-
const isBlock = policyViolations.some(v => v.severity === 'high');
|
|
168
|
-
outcome = isBlock ? 'block' : 'warn';
|
|
169
|
-
}
|
|
170
|
-
return {
|
|
171
|
-
id: p.id,
|
|
172
|
-
version: p.version,
|
|
173
|
-
mode: p.mode || 'enforce',
|
|
174
|
-
outcome,
|
|
175
|
-
reasons: policyViolations.map(v => v.message)
|
|
176
|
-
};
|
|
177
|
-
});
|
|
178
|
-
if (policyResult.rewrittenOutput) {
|
|
179
|
-
finalSafeOutput = policyResult.rewrittenOutput;
|
|
180
|
-
enforcementActions.push({ type: 'rewrite', policyId: 'policy-engine', note: 'Content rewritten by policy' });
|
|
181
|
-
}
|
|
182
|
-
const report = {
|
|
183
|
-
appliedPolicies: appliedRecs,
|
|
184
|
-
contract: {
|
|
185
|
-
outcome: contractOutcome,
|
|
186
|
-
// If repaired, we could list repairs here if we tracked them detailly
|
|
187
|
-
},
|
|
188
|
-
actions: enforcementActions,
|
|
189
|
-
violations: finalViolations
|
|
190
|
-
};
|
|
191
|
-
if (policyResult.outcome === 'block') {
|
|
192
|
-
const onBlock = behavior?.onBlock || 'throw'; // Default throw?
|
|
193
|
-
if (onBlock === 'throw') {
|
|
194
|
-
throw new types_1.GateiaError("Policy Blocked Response", traceId, report);
|
|
195
|
-
}
|
|
196
|
-
// Return with no safeOutput? Or just raw?
|
|
197
|
-
return {
|
|
198
|
-
safeOutput: undefined,
|
|
199
|
-
traceId,
|
|
200
|
-
enforcement: report,
|
|
201
|
-
usage: finalUsage,
|
|
202
|
-
rawOutput: options?.includeRawOutput ? rawResult : undefined
|
|
203
|
-
};
|
|
204
|
-
}
|
|
205
|
-
return {
|
|
206
|
-
safeOutput: finalSafeOutput,
|
|
207
|
-
traceId,
|
|
208
|
-
enforcement: report,
|
|
209
|
-
usage: finalUsage,
|
|
210
|
-
rawOutput: options?.includeRawOutput ? rawResult : undefined
|
|
211
|
-
};
|
|
212
|
-
}
|
|
213
|
-
catch (error) {
|
|
214
|
-
if (error instanceof types_1.GateiaError)
|
|
215
|
-
throw error;
|
|
216
|
-
throw new types_1.GateiaError(error.message, traceId, undefined, error);
|
|
217
|
-
}
|
|
218
|
-
}
|
|
17
|
+
exports.verify = void 0;
|
|
18
|
+
var verify_1 = require("./verify");
|
|
19
|
+
Object.defineProperty(exports, "verify", { enumerable: true, get: function () { return verify_1.verify; } });
|
|
219
20
|
__exportStar(require("./types"), exports);
|
|
21
|
+
__exportStar(require("./policies"), exports);
|
|
@@ -4,5 +4,5 @@ export declare const supportSafe: {
|
|
|
4
4
|
id: string;
|
|
5
5
|
version?: string;
|
|
6
6
|
mode?: "enforce" | "audit";
|
|
7
|
-
check: (output: any, context: import("../types").PolicyContext) => import("../types").PolicyResult | Promise<import("../types").PolicyResult
|
|
7
|
+
check: (output: any, context: import("../types").PolicyContext) => import("../types").PolicyResult<any> | Promise<import("../types").PolicyResult<any>>;
|
|
8
8
|
};
|
package/dist/types.d.ts
CHANGED
|
@@ -15,46 +15,19 @@ export interface Violation {
|
|
|
15
15
|
};
|
|
16
16
|
}
|
|
17
17
|
export interface PolicyContext {
|
|
18
|
-
model: string;
|
|
19
|
-
prompt: any;
|
|
20
18
|
traceId: string;
|
|
19
|
+
metadata?: Record<string, any>;
|
|
21
20
|
}
|
|
22
|
-
export interface PolicyResult {
|
|
21
|
+
export interface PolicyResult<T = any> {
|
|
23
22
|
outcome: GateOutcome;
|
|
24
23
|
violations?: Violation[];
|
|
25
|
-
rewriteFn?: (output:
|
|
24
|
+
rewriteFn?: (output: T) => T;
|
|
26
25
|
}
|
|
27
|
-
export interface Policy {
|
|
26
|
+
export interface Policy<T = any> {
|
|
28
27
|
id: string;
|
|
29
28
|
version?: string;
|
|
30
29
|
mode?: 'enforce' | 'audit';
|
|
31
|
-
check: (output:
|
|
32
|
-
}
|
|
33
|
-
export interface GateOptions {
|
|
34
|
-
includeRawOutput?: boolean;
|
|
35
|
-
}
|
|
36
|
-
export interface GateBehavior {
|
|
37
|
-
mode?: 'enforce' | 'audit';
|
|
38
|
-
contract?: {
|
|
39
|
-
repair?: 'off' | 'auto';
|
|
40
|
-
maxRetries?: number;
|
|
41
|
-
maxRepairAttempts?: number;
|
|
42
|
-
};
|
|
43
|
-
policy?: {
|
|
44
|
-
rewrite?: 'off' | 'allowed';
|
|
45
|
-
};
|
|
46
|
-
onBlock?: 'throw' | 'return';
|
|
47
|
-
}
|
|
48
|
-
export interface GateParams<T extends z.ZodTypeAny> {
|
|
49
|
-
model: string;
|
|
50
|
-
prompt: string | {
|
|
51
|
-
system?: string;
|
|
52
|
-
user: string;
|
|
53
|
-
};
|
|
54
|
-
contract: T;
|
|
55
|
-
policies?: (string | Policy)[];
|
|
56
|
-
behavior?: GateBehavior;
|
|
57
|
-
options?: GateOptions;
|
|
30
|
+
check: (output: T, context: PolicyContext) => PolicyResult<T> | Promise<PolicyResult<T>>;
|
|
58
31
|
}
|
|
59
32
|
export interface AppliedPolicyRec {
|
|
60
33
|
id: string;
|
|
@@ -64,43 +37,37 @@ export interface AppliedPolicyRec {
|
|
|
64
37
|
reasons?: string[];
|
|
65
38
|
}
|
|
66
39
|
export interface ContractEnforcement {
|
|
67
|
-
|
|
68
|
-
outcome: 'pass' | 'fail' | 'repaired';
|
|
40
|
+
outcome: 'pass' | 'fail';
|
|
69
41
|
errors?: {
|
|
70
42
|
path: string;
|
|
71
43
|
message: string;
|
|
72
44
|
}[];
|
|
73
|
-
repairs?: {
|
|
74
|
-
op: 'add' | 'remove' | 'replace';
|
|
75
|
-
path: string;
|
|
76
|
-
note?: string;
|
|
77
|
-
}[];
|
|
78
45
|
}
|
|
79
|
-
export interface
|
|
46
|
+
export interface VerifyParams<T extends z.ZodTypeAny> {
|
|
47
|
+
output: unknown;
|
|
48
|
+
contract: T;
|
|
49
|
+
policies?: (string | Policy<z.infer<T>>)[];
|
|
50
|
+
mode?: 'enforce' | 'audit';
|
|
51
|
+
options?: {
|
|
52
|
+
includeRawOutput?: boolean;
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
export interface EnforcementReport {
|
|
80
56
|
appliedPolicies: AppliedPolicyRec[];
|
|
81
57
|
contract: ContractEnforcement;
|
|
82
58
|
actions: PolicyAction[];
|
|
83
59
|
violations: Violation[];
|
|
84
60
|
}
|
|
85
|
-
export interface
|
|
86
|
-
|
|
87
|
-
model: string;
|
|
88
|
-
latencyMs: number;
|
|
89
|
-
inputTokens?: number;
|
|
90
|
-
outputTokens?: number;
|
|
91
|
-
totalTokens?: number;
|
|
92
|
-
costUsd?: number;
|
|
93
|
-
}
|
|
94
|
-
export interface GateResult<T> {
|
|
61
|
+
export interface VerifyResult<T> {
|
|
62
|
+
allowed: boolean;
|
|
95
63
|
safeOutput?: T;
|
|
96
64
|
traceId: string;
|
|
97
|
-
enforcement:
|
|
98
|
-
usage: GateUsage;
|
|
65
|
+
enforcement: EnforcementReport;
|
|
99
66
|
rawOutput?: any;
|
|
100
67
|
}
|
|
101
68
|
export declare class GateiaError extends Error {
|
|
102
69
|
traceId: string;
|
|
103
|
-
report?:
|
|
70
|
+
report?: EnforcementReport;
|
|
104
71
|
originalError?: unknown;
|
|
105
|
-
constructor(message: string, traceId: string, report?:
|
|
72
|
+
constructor(message: string, traceId: string, report?: EnforcementReport, originalError?: unknown);
|
|
106
73
|
}
|
package/dist/verify.d.ts
ADDED
package/dist/verify.js
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.verify = verify;
|
|
4
|
+
const zod_1 = require("zod");
|
|
5
|
+
const uuid_1 = require("uuid");
|
|
6
|
+
const types_1 = require("./types");
|
|
7
|
+
const contract_1 = require("./engine/contract");
|
|
8
|
+
const policy_1 = require("./engine/policy");
|
|
9
|
+
const policies_1 = require("./policies");
|
|
10
|
+
const contractEngine = new contract_1.ContractEngine();
|
|
11
|
+
const policyEngine = new policy_1.PolicyEngine();
|
|
12
|
+
async function verify(params) {
|
|
13
|
+
const traceId = (0, uuid_1.v4)();
|
|
14
|
+
const { output, contract, policies = [], mode = 'enforce' } = params;
|
|
15
|
+
try {
|
|
16
|
+
// --- 1. Contract Validation ---
|
|
17
|
+
// (If output is string and contract is Object, we might want to try parsing it?
|
|
18
|
+
// The pivot request says "Gateia works with outputs generated by ANY provider".
|
|
19
|
+
// Users might pass raw JSON string. Should we auto-parse?
|
|
20
|
+
// For now, assume user handles parsing OR we do simple string->object if schema demands it.)
|
|
21
|
+
let outputToValidate = output;
|
|
22
|
+
// Auto-parse JSON string if schema is object/array but output is string
|
|
23
|
+
if (typeof output === 'string' && (contract instanceof zod_1.z.ZodObject || contract instanceof zod_1.z.ZodArray)) {
|
|
24
|
+
try {
|
|
25
|
+
// 1. Try to extract from markdown code blocks
|
|
26
|
+
const jsonBlockMatch = output.match(/```(?:json)?\s*([\s\S]*?)\s*```/);
|
|
27
|
+
if (jsonBlockMatch && jsonBlockMatch[1]) {
|
|
28
|
+
outputToValidate = JSON.parse(jsonBlockMatch[1].trim());
|
|
29
|
+
}
|
|
30
|
+
else {
|
|
31
|
+
// 2. Try parsing the raw string (maybe it's just JSON)
|
|
32
|
+
outputToValidate = JSON.parse(output.trim());
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
catch (e) {
|
|
36
|
+
// Failed to parse, validation will likely fail next
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
const contractRes = contractEngine.validate(outputToValidate, contract);
|
|
40
|
+
let safeOutput = contractRes.success ? contractRes.data : undefined;
|
|
41
|
+
const contractErrors = contractRes.success ? undefined : contractRes.errors;
|
|
42
|
+
const contractOutcome = contractRes.success ? 'pass' : 'fail';
|
|
43
|
+
// --- 2. Policy Enforcement ---
|
|
44
|
+
// Resolve policies
|
|
45
|
+
const activePolicies = policies.map(p => {
|
|
46
|
+
if (typeof p === 'string') {
|
|
47
|
+
const found = policies_1.policyLibrary[p];
|
|
48
|
+
if (!found)
|
|
49
|
+
throw new types_1.GateiaError(`Unknown policy: ${p}`, traceId);
|
|
50
|
+
return found;
|
|
51
|
+
}
|
|
52
|
+
return p;
|
|
53
|
+
});
|
|
54
|
+
// Run policies
|
|
55
|
+
// Note: We run policies on the *safeOutput* if valid, or original *output* if not?
|
|
56
|
+
// Ideally we check specific fields if valid. If invalid, we might still check the raw string?
|
|
57
|
+
// Let's run on `currentOutput` which is `safeOutput` ?? `output`.
|
|
58
|
+
const contentToCheck = safeOutput !== undefined ? safeOutput : output;
|
|
59
|
+
const policyCtx = { traceId };
|
|
60
|
+
const policyResult = await policyEngine.evaluate(activePolicies, contentToCheck, policyCtx);
|
|
61
|
+
let finalViolations = policyResult.violations || [];
|
|
62
|
+
let enforcementActions = [];
|
|
63
|
+
// Apply Rewrites if allowed
|
|
64
|
+
if (policyResult.rewrittenOutput) {
|
|
65
|
+
safeOutput = policyResult.rewrittenOutput;
|
|
66
|
+
enforcementActions.push({ type: 'rewrite', policyId: 'policy-engine', note: 'Content rewritten' });
|
|
67
|
+
}
|
|
68
|
+
// Build Applied Report
|
|
69
|
+
const appliedRecs = activePolicies.map(p => {
|
|
70
|
+
const pViolations = finalViolations.filter(v => v.policyId === p.id);
|
|
71
|
+
let outcome = 'pass';
|
|
72
|
+
if (pViolations.length > 0) {
|
|
73
|
+
const isBlock = pViolations.some(v => v.severity === 'high');
|
|
74
|
+
outcome = isBlock ? 'block' : 'warn';
|
|
75
|
+
}
|
|
76
|
+
return {
|
|
77
|
+
id: p.id,
|
|
78
|
+
version: p.version,
|
|
79
|
+
mode: p.mode || 'enforce',
|
|
80
|
+
outcome,
|
|
81
|
+
reasons: pViolations.map(v => v.message)
|
|
82
|
+
};
|
|
83
|
+
});
|
|
84
|
+
// --- 3. Decision ---
|
|
85
|
+
// BLOCK if:
|
|
86
|
+
// 1. Contract Failed (Invalid structure)
|
|
87
|
+
// 2. Any High Severity Violation (unless mode=audit)
|
|
88
|
+
const contractFailed = !contractRes.success;
|
|
89
|
+
// Check both explicit violations AND the aggregated outcome from policy engine
|
|
90
|
+
// This handles cases where a user returns { outcome: 'block' } but omits violations
|
|
91
|
+
const blockedByPolicy = finalViolations.some(v => v.severity === 'high') ||
|
|
92
|
+
(policyResult.outcome === 'block' && mode === 'enforce');
|
|
93
|
+
// Safety Fallback: If blocked but no violations, inject one for reporting
|
|
94
|
+
if (blockedByPolicy && finalViolations.length === 0) {
|
|
95
|
+
finalViolations.push({
|
|
96
|
+
policyId: 'gateia-core',
|
|
97
|
+
code: 'IMPLICIT_BLOCK',
|
|
98
|
+
message: 'A policy returned BLOCK status but provided no violation details.',
|
|
99
|
+
severity: 'high'
|
|
100
|
+
});
|
|
101
|
+
}
|
|
102
|
+
let allowed = true;
|
|
103
|
+
if (contractFailed)
|
|
104
|
+
allowed = false;
|
|
105
|
+
if (blockedByPolicy && mode === 'enforce')
|
|
106
|
+
allowed = false;
|
|
107
|
+
const report = {
|
|
108
|
+
appliedPolicies: appliedRecs,
|
|
109
|
+
contract: {
|
|
110
|
+
outcome: contractOutcome,
|
|
111
|
+
errors: contractErrors
|
|
112
|
+
},
|
|
113
|
+
actions: enforcementActions,
|
|
114
|
+
violations: finalViolations
|
|
115
|
+
};
|
|
116
|
+
return {
|
|
117
|
+
allowed,
|
|
118
|
+
safeOutput: allowed ? safeOutput : undefined,
|
|
119
|
+
traceId,
|
|
120
|
+
enforcement: report,
|
|
121
|
+
rawOutput: params.options?.includeRawOutput ? output : undefined
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
catch (error) {
|
|
125
|
+
if (error instanceof types_1.GateiaError)
|
|
126
|
+
throw error;
|
|
127
|
+
throw new types_1.GateiaError(error.message, traceId, undefined, error);
|
|
128
|
+
}
|
|
129
|
+
}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "gateia",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "The
|
|
3
|
+
"version": "0.2.0",
|
|
4
|
+
"description": "The Deterministic Verification Layer for Enterprise AI.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai",
|
|
7
7
|
"llm",
|
|
@@ -31,14 +31,14 @@
|
|
|
31
31
|
"test": "vitest run"
|
|
32
32
|
},
|
|
33
33
|
"dependencies": {
|
|
34
|
-
"
|
|
35
|
-
"
|
|
34
|
+
"uuid": "^13.0.0",
|
|
35
|
+
"zod": "^3.22.4"
|
|
36
36
|
},
|
|
37
37
|
"devDependencies": {
|
|
38
|
-
"typescript": "^5.3.3",
|
|
39
|
-
"vitest": "^1.2.1",
|
|
40
38
|
"@types/node": "^20.11.0",
|
|
41
|
-
"@types/uuid": "^9.0.7"
|
|
39
|
+
"@types/uuid": "^9.0.7",
|
|
40
|
+
"typescript": "^5.3.3",
|
|
41
|
+
"vitest": "^1.2.1"
|
|
42
42
|
},
|
|
43
43
|
"engines": {
|
|
44
44
|
"node": ">=18"
|