ai-agenttrace 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_agenttrace-1.0.0/.gitignore +7 -0
- ai_agenttrace-1.0.0/PKG-INFO +332 -0
- ai_agenttrace-1.0.0/README.md +309 -0
- ai_agenttrace-1.0.0/agenttrace/__init__.py +25 -0
- ai_agenttrace-1.0.0/agenttrace/explainer.py +74 -0
- ai_agenttrace-1.0.0/agenttrace/guard.py +124 -0
- ai_agenttrace-1.0.0/agenttrace/rules.py +276 -0
- ai_agenttrace-1.0.0/agenttrace/store.py +42 -0
- ai_agenttrace-1.0.0/agenttrace/types.py +73 -0
- ai_agenttrace-1.0.0/pyproject.toml +40 -0
- ai_agenttrace-1.0.0/tests/test_guard.py +64 -0
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ai-agenttrace
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: The accountability layer for AI agents. Trace, explain, and control agent actions.
|
|
5
|
+
Project-URL: Homepage, https://github.com/kalash33/agenttrace
|
|
6
|
+
Project-URL: Issues, https://github.com/kalash33/agenttrace/issues
|
|
7
|
+
Author-email: AgentTrace Contributors <hello@agenttrace.ai>
|
|
8
|
+
License: MIT
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
13
|
+
Requires-Python: >=3.9
|
|
14
|
+
Requires-Dist: openai>=1.0.0
|
|
15
|
+
Requires-Dist: pydantic>=2.0.0
|
|
16
|
+
Requires-Dist: typing-extensions>=4.0.0
|
|
17
|
+
Provides-Extra: dev
|
|
18
|
+
Requires-Dist: black>=23.0.0; extra == 'dev'
|
|
19
|
+
Requires-Dist: mypy>=1.0.0; extra == 'dev'
|
|
20
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == 'dev'
|
|
21
|
+
Requires-Dist: pytest>=7.0.0; extra == 'dev'
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
|
|
24
|
+
# AgentTrace 🛡️
|
|
25
|
+
|
|
26
|
+
> **Block dangerous AI agent actions. Explain every decision. One line of code.**
|
|
27
|
+
|
|
28
|
+
[](https://www.npmjs.com/package/agenttrace)
|
|
29
|
+
[](https://pypi.org/project/agenttrace/)
|
|
30
|
+
[](https://opensource.org/licenses/MIT)
|
|
31
|
+
[](#)
|
|
32
|
+
|
|
33
|
+
*Your AI agent's conscience — blocks harm, explains reasoning, logs everything.*
|
|
34
|
+
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
## The Problem
|
|
38
|
+
|
|
39
|
+
AI agents are making autonomous decisions. **Nobody knows why.**
|
|
40
|
+
|
|
41
|
+
When they go wrong, nobody can explain what happened.
|
|
42
|
+
|
|
43
|
+
- 🔴 **51%** of enterprises have AI agents in production ([Ringly.io, 2026](https://www.ringly.io/blog/ai-agent-statistics-2026))
|
|
44
|
+
- 🔴 **75%** have experienced negative consequences from GenAI ([McKinsey, 2025](https://tianpan.co/blog/2026-04-20-ai-audit-trail-user-trust-agent-transparency))
|
|
45
|
+
- 🔴 **42%** abandoned AI projects due to reliability issues ([S&P Global, 2025](https://galileo.ai/blog/best-agent-observability-platforms-scaling-generative-ai))
|
|
46
|
+
- 🔴 **"AI Accountability"** is now the #1 enterprise requirement for new AI tools ([GlobeNewsWire/Jitterbit, May 2026](https://www.globenewswire.com/news-release/2026/05/06/3288602/0/en/AI-Accountability-tops-list-of-enterprise-requirements-for-new-AI-tools.html))
|
|
47
|
+
|
|
48
|
+
The EU AI Act mandates explainability by December 2027. Boards want decision logs. Your customers want to trust your AI.
|
|
49
|
+
|
|
50
|
+
**Nobody else provides this combination: open-source + real-time blocking + plain-English explanations + full trace.**
|
|
51
|
+
|
|
52
|
+
---
|
|
53
|
+
|
|
54
|
+
## What You Get
|
|
55
|
+
|
|
56
|
+
| Feature | AgentTrace | Langfuse | Portkey | Lakera |
|
|
57
|
+
|---------|-----------|---------|--------|--------|
|
|
58
|
+
| Blocks dangerous actions | ✅ | ❌ | ⚠️ Partial | ✅ (LLM only) |
|
|
59
|
+
| Explains WHY in plain English | ✅ | ❌ | ❌ | ❌ |
|
|
60
|
+
| Native AI agent support | ✅ | ✅ | ⚠️ Partial | ❌ |
|
|
61
|
+
| Open-source & self-hosted | ✅ | ✅ | ❌ | ❌ |
|
|
62
|
+
| Full audit trail | ✅ | ✅ | ⚠️ | ❌ |
|
|
63
|
+
|
|
64
|
+
---
|
|
65
|
+
|
|
66
|
+
## Quick Start
|
|
67
|
+
|
|
68
|
+
### TypeScript / Node.js
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
npm install agenttrace
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
```typescript
|
|
75
|
+
import { AgentTrace } from 'agenttrace';
|
|
76
|
+
|
|
77
|
+
const guard = new AgentTrace({
|
|
78
|
+
rules: [
|
|
79
|
+
'block_pii_leakage', // Stop PII leaking to users
|
|
80
|
+
'block_financial_advice', // No unqualified investment advice
|
|
81
|
+
'block_harmful_content', // Violence, illegal activities, self-harm
|
|
82
|
+
'require_human_approval', // Gate high-value transactions
|
|
83
|
+
],
|
|
84
|
+
explain: true, // Generate plain-English explanations
|
|
85
|
+
humanApproval: {
|
|
86
|
+
threshold: 1000, // Require approval for actions > $1,000
|
|
87
|
+
onApprovalRequired: async ({ description, amount }) => {
|
|
88
|
+
// Send Slack alert, email, UI prompt — whatever you need
|
|
89
|
+
return await myApprovalSystem.request(description, amount);
|
|
90
|
+
},
|
|
91
|
+
},
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
// Wrap your agent — same interface, now accountable
|
|
95
|
+
const safeAgent = guard.wrap(myAgent);
|
|
96
|
+
|
|
97
|
+
const result = await safeAgent.run("Process this customer refund");
|
|
98
|
+
|
|
99
|
+
// If BLOCKED:
|
|
100
|
+
// result.blocked → true
|
|
101
|
+
// result.reason → "Agent action BLOCKED. Violated rule(s): require_human_approval..."
|
|
102
|
+
// result.violations → [{ rule, description, severity, evidence }]
|
|
103
|
+
|
|
104
|
+
// If ALLOWED:
|
|
105
|
+
// result.blocked → false
|
|
106
|
+
// result.explanation → "Agent processed a $50 refund because the customer's..."
|
|
107
|
+
// result.riskLevel → 'LOW'
|
|
108
|
+
// result.auditTrail → [step1, step2, ...] — full reasoning chain
|
|
109
|
+
// result.auditId → 'uuid-...' — look it up later
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Python
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
pip install agenttrace
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
from agenttrace import AgentTrace, AgentTraceOptions
|
|
120
|
+
|
|
121
|
+
guard = AgentTrace(AgentTraceOptions(
|
|
122
|
+
rules=["block_pii_leakage", "block_harmful_content", "block_financial_advice"],
|
|
123
|
+
debug=True,
|
|
124
|
+
))
|
|
125
|
+
|
|
126
|
+
safe_agent = guard.wrap(my_langchain_agent)
|
|
127
|
+
result = safe_agent.invoke("Process customer request")
|
|
128
|
+
|
|
129
|
+
print(result.blocked) # True/False
|
|
130
|
+
print(result.risk_level) # 'LOW' | 'MEDIUM' | 'HIGH' | 'CRITICAL'
|
|
131
|
+
print(result.audit_id) # UUID for audit trail lookup
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
---
|
|
135
|
+
|
|
136
|
+
## Built-in Rules
|
|
137
|
+
|
|
138
|
+
AgentTrace ships with 13 built-in rules designed to enforce enterprise-grade accountability.
|
|
139
|
+
|
|
140
|
+
| Rule | Category | What it blocks | Severity |
|
|
141
|
+
|------|----------|---------------|----------|
|
|
142
|
+
| `block_pii_leakage` | **Privacy** | Emails, phones, SSNs, credit card numbers, Aadhaar, API Keys. | HIGH–CRITICAL |
|
|
143
|
+
| `block_special_category_data` | **Privacy** | GDPR Art 9 data: health, genetics, sexual orientation, political views. | HIGH–CRITICAL |
|
|
144
|
+
| `block_manipulation` | **EU AI Act** | Art 5 prohibited practices: artificial urgency, dark patterns, gaslighting. | HIGH–CRITICAL |
|
|
145
|
+
| `block_discriminatory_output` | **Fairness** | EU Charter Art 21: Bias on race, gender, age, religion, nationality, disability. | CRITICAL |
|
|
146
|
+
| `block_ai_identity_deception` | **Transparency**| EU AI Act Art 50: Agents claiming to be human or denying being AI. | CRITICAL |
|
|
147
|
+
| `block_medical_advice` | **Professional** | Unqualified diagnosis, treatment recommendations, dosage instructions. | CRITICAL |
|
|
148
|
+
| `block_legal_advice` | **Professional** | Unauthorized Practice of Law (UPL): specific legal strategy advice. | HIGH |
|
|
149
|
+
| `block_financial_advice` | **Professional** | Investment recommendations, guaranteed returns, loan guidance. | HIGH |
|
|
150
|
+
| `block_prompt_injection` | **Security** | OWASP LLM01: Detects instruction overrides, persona hijacking, data exfil. | CRITICAL |
|
|
151
|
+
| `block_system_prompt_leakage` | **Security** | OWASP LLM07: Agent exposing its internal configuration or instructions. | HIGH |
|
|
152
|
+
| `block_harmful_content` | **Safety** | Violence, illegal instructions, self-harm, hate speech. | HIGH–CRITICAL |
|
|
153
|
+
| `require_human_approval` | **Oversight** | Actions above a $ threshold, irreversible/destructive operations. | HIGH–CRITICAL |
|
|
154
|
+
| `block_hallucination` | **Quality** | Factual claims not supported by your RAG context documents. | HIGH |
|
|
155
|
+
|
|
156
|
+
All rules run **in parallel** — zero extra latency on the happy path. You can easily group these by using pre-configured bundles like `COMPLIANCE_BUNDLES.EU_AI_ACT` or `COMPLIANCE_BUNDLES.OWASP_LLM`.
|
|
157
|
+
|
|
158
|
+
---
|
|
159
|
+
|
|
160
|
+
## Custom Rules
|
|
161
|
+
|
|
162
|
+
Write your own rules in 5 lines:
|
|
163
|
+
|
|
164
|
+
```typescript
|
|
165
|
+
import { createRule, AgentTrace } from 'agenttrace';
|
|
166
|
+
|
|
167
|
+
const noCompetitorMentions = createRule(
|
|
168
|
+
'no_competitor_mentions',
|
|
169
|
+
async ({ result }) => {
|
|
170
|
+
const text = JSON.stringify(result);
|
|
171
|
+
if (text.toLowerCase().includes('rival-corp')) {
|
|
172
|
+
return [{ rule: 'no_competitor_mentions', description: 'Competitor mentioned', severity: 'MEDIUM' }];
|
|
173
|
+
}
|
|
174
|
+
return [];
|
|
175
|
+
}
|
|
176
|
+
);
|
|
177
|
+
|
|
178
|
+
const guard = new AgentTrace({ rules: [noCompetitorMentions, 'block_pii_leakage'] });
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
---
|
|
182
|
+
|
|
183
|
+
## Audit Trail
|
|
184
|
+
|
|
185
|
+
Every agent run is automatically stored in a local SQLite database:
|
|
186
|
+
|
|
187
|
+
```typescript
|
|
188
|
+
// Query your audit trail
|
|
189
|
+
const recent = guard.storage?.getRecent(20);
|
|
190
|
+
const blocked = guard.storage?.getBlocked();
|
|
191
|
+
const stats = guard.storage?.stats();
|
|
192
|
+
// → { total: 142, blocked: 3, byRiskLevel: { LOW: 138, HIGH: 3, CRITICAL: 1 } }
|
|
193
|
+
|
|
194
|
+
// Look up a specific run
|
|
195
|
+
const run = guard.storage?.getById('audit-uuid-here');
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
---
|
|
199
|
+
|
|
200
|
+
## Works With
|
|
201
|
+
|
|
202
|
+
- ✅ **OpenAI** — Assistants, Responses API, Chat Completions
|
|
203
|
+
- ✅ **LangChain / LangGraph** — any `.invoke()` or `.run()` agent
|
|
204
|
+
- ✅ **CrewAI** — crew.kickoff()
|
|
205
|
+
- ✅ **Anthropic** — tool use agents
|
|
206
|
+
- ✅ **Any async function** — use `guard.guardFn()`
|
|
207
|
+
|
|
208
|
+
```typescript
|
|
209
|
+
// Works with any async function — no agent object needed
|
|
210
|
+
const result = await guard.guardFn(
|
|
211
|
+
async () => await myCustomAgent.process(input),
|
|
212
|
+
input // original task for tracing
|
|
213
|
+
);
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
---
|
|
217
|
+
|
|
218
|
+
## Explanation Engine
|
|
219
|
+
|
|
220
|
+
Set `explain: true` and add `ANTHROPIC_API_KEY` to get plain-English explanations:
|
|
221
|
+
|
|
222
|
+
```
|
|
223
|
+
Agent processed a $50 refund for customer #12345 because:
|
|
224
|
+
(1) The purchase was within the 30-day return window,
|
|
225
|
+
(2) The amount was below the $100 automatic-approval threshold,
|
|
226
|
+
(3) The customer's account is in good standing.
|
|
227
|
+
Risk: LOW. Confidence: HIGH.
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
No API key? Explanations gracefully fall back to a shorter canned message. **AgentTrace never crashes because of a missing API key.**
|
|
231
|
+
|
|
232
|
+
---
|
|
233
|
+
|
|
234
|
+
## Architecture
|
|
235
|
+
|
|
236
|
+
```
|
|
237
|
+
Your Agent
|
|
238
|
+
│
|
|
239
|
+
▼ (Proxy intercept)
|
|
240
|
+
┌─────────────────────────────────────────┐
|
|
241
|
+
│ AgentTrace │
|
|
242
|
+
│ │
|
|
243
|
+
│ ┌─────────┐ ┌─────────────────────┐ │
|
|
244
|
+
│ │ Tracer │ │ Rule Engine │ │
|
|
245
|
+
│ │ │ │ (runs in parallel) │ │
|
|
246
|
+
│ │ Step 1 │ │ • block_pii │ │
|
|
247
|
+
│ │ Step 2 │ │ • block_financial │ │
|
|
248
|
+
│ │ Step 3 │ │ • block_harmful │ │
|
|
249
|
+
│ └─────────┘ │ • human_approval │ │
|
|
250
|
+
│ │ • hallucination │ │
|
|
251
|
+
│ │ • custom rules... │ │
|
|
252
|
+
│ └─────────────────────┘ │
|
|
253
|
+
│ │
|
|
254
|
+
│ ┌──────────────┐ ┌────────────────┐ │
|
|
255
|
+
│ │ Explainer │ │ Store │ │
|
|
256
|
+
│ │ (Anthropic │ │ (SQLite WAL) │ │
|
|
257
|
+
│ │ claude-3) │ │ │ │
|
|
258
|
+
│ └──────────────┘ └────────────────┘ │
|
|
259
|
+
└─────────────────────────────────────────┘
|
|
260
|
+
│
|
|
261
|
+
▼
|
|
262
|
+
GuardedResult {
|
|
263
|
+
blocked, reason, explanation,
|
|
264
|
+
riskLevel, auditId, auditTrail,
|
|
265
|
+
violations, result
|
|
266
|
+
}
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
---
|
|
270
|
+
|
|
271
|
+
## Self-Hosted (Free Forever)
|
|
272
|
+
|
|
273
|
+
AgentTrace stores everything locally in SQLite. Zero cloud dependency. Zero data leaves your machine.
|
|
274
|
+
|
|
275
|
+
```
|
|
276
|
+
.agenttrace/
|
|
277
|
+
└── traces.db ← all your audit trails, WAL mode, fast
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
## Cloud Dashboard (Coming Soon)
|
|
281
|
+
|
|
282
|
+
- Real-time monitoring dashboard
|
|
283
|
+
- Team access and alerts
|
|
284
|
+
- Compliance reports (EU AI Act, SOC2)
|
|
285
|
+
- 1-year retention with search
|
|
286
|
+
|
|
287
|
+
→ [Join the waitlist](#)
|
|
288
|
+
|
|
289
|
+
---
|
|
290
|
+
|
|
291
|
+
## FAQ
|
|
292
|
+
|
|
293
|
+
**Q: Does this add latency?**
|
|
294
|
+
A: Rules run in parallel. For the happy path (no violations), the overhead is typically <5ms. Explanation generation (optional) adds ~500-800ms via Anthropic's API.
|
|
295
|
+
|
|
296
|
+
**Q: What if my agent isn't an object with a `.run()` method?**
|
|
297
|
+
A: Use `guard.guardFn(async () => myFn(input), input)`.
|
|
298
|
+
|
|
299
|
+
**Q: Can I use this without an Anthropic API key?**
|
|
300
|
+
A: Yes. All rules work without any API key. The `explain: true` feature requires `ANTHROPIC_API_KEY` but falls back gracefully.
|
|
301
|
+
|
|
302
|
+
**Q: Is the audit trail tamper-proof?**
|
|
303
|
+
A: Currently it's an append-only SQLite WAL database. True cryptographic signing (hash-chain) is on the roadmap.
|
|
304
|
+
|
|
305
|
+
---
|
|
306
|
+
|
|
307
|
+
## Contributing
|
|
308
|
+
|
|
309
|
+
PRs welcome! See [CONTRIBUTING.md](./CONTRIBUTING.md) for guidelines.
|
|
310
|
+
|
|
311
|
+
Key areas for contribution:
|
|
312
|
+
- New built-in rules (domain-specific)
|
|
313
|
+
- Agent framework integrations (AutoGen, Semantic Kernel, etc.)
|
|
314
|
+
- Better hallucination detection (semantic similarity, vector search)
|
|
315
|
+
- Cloud dashboard
|
|
316
|
+
- Hash-chain audit trail (tamper-proof)
|
|
317
|
+
|
|
318
|
+
---
|
|
319
|
+
|
|
320
|
+
## License
|
|
321
|
+
|
|
322
|
+
MIT © 2026 AgentTrace Contributors
|
|
323
|
+
|
|
324
|
+
---
|
|
325
|
+
|
|
326
|
+
## Why "Accountability" and not "Guardrails"?
|
|
327
|
+
|
|
328
|
+
> "Intelligence may be scalable, but accountability is not." — Accenture/Wharton, 2026
|
|
329
|
+
|
|
330
|
+
Guardrails are a feature. Accountability is a principle. Guardrails prevent bad outputs. Accountability explains every output — blocked or allowed — and creates a chain of evidence that stands up to audit.
|
|
331
|
+
|
|
332
|
+
We believe every AI agent action should be traceable, explainable, and controllable. **Not just the bad ones.**
|
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
# AgentTrace 🛡️
|
|
2
|
+
|
|
3
|
+
> **Block dangerous AI agent actions. Explain every decision. One line of code.**
|
|
4
|
+
|
|
5
|
+
[](https://www.npmjs.com/package/agenttrace)
|
|
6
|
+
[](https://pypi.org/project/agenttrace/)
|
|
7
|
+
[](https://opensource.org/licenses/MIT)
|
|
8
|
+
[](#)
|
|
9
|
+
|
|
10
|
+
*Your AI agent's conscience — blocks harm, explains reasoning, logs everything.*
|
|
11
|
+
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
## The Problem
|
|
15
|
+
|
|
16
|
+
AI agents are making autonomous decisions. **Nobody knows why.**
|
|
17
|
+
|
|
18
|
+
When they go wrong, nobody can explain what happened.
|
|
19
|
+
|
|
20
|
+
- 🔴 **51%** of enterprises have AI agents in production ([Ringly.io, 2026](https://www.ringly.io/blog/ai-agent-statistics-2026))
|
|
21
|
+
- 🔴 **75%** have experienced negative consequences from GenAI ([McKinsey, 2025](https://tianpan.co/blog/2026-04-20-ai-audit-trail-user-trust-agent-transparency))
|
|
22
|
+
- 🔴 **42%** abandoned AI projects due to reliability issues ([S&P Global, 2025](https://galileo.ai/blog/best-agent-observability-platforms-scaling-generative-ai))
|
|
23
|
+
- 🔴 **"AI Accountability"** is now the #1 enterprise requirement for new AI tools ([GlobeNewsWire/Jitterbit, May 2026](https://www.globenewswire.com/news-release/2026/05/06/3288602/0/en/AI-Accountability-tops-list-of-enterprise-requirements-for-new-AI-tools.html))
|
|
24
|
+
|
|
25
|
+
The EU AI Act mandates explainability by December 2027. Boards want decision logs. Your customers want to trust your AI.
|
|
26
|
+
|
|
27
|
+
**Nobody else provides this combination: open-source + real-time blocking + plain-English explanations + full trace.**
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## What You Get
|
|
32
|
+
|
|
33
|
+
| Feature | AgentTrace | Langfuse | Portkey | Lakera |
|
|
34
|
+
|---------|-----------|---------|--------|--------|
|
|
35
|
+
| Blocks dangerous actions | ✅ | ❌ | ⚠️ Partial | ✅ (LLM only) |
|
|
36
|
+
| Explains WHY in plain English | ✅ | ❌ | ❌ | ❌ |
|
|
37
|
+
| Native AI agent support | ✅ | ✅ | ⚠️ Partial | ❌ |
|
|
38
|
+
| Open-source & self-hosted | ✅ | ✅ | ❌ | ❌ |
|
|
39
|
+
| Full audit trail | ✅ | ✅ | ⚠️ | ❌ |
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
## Quick Start
|
|
44
|
+
|
|
45
|
+
### TypeScript / Node.js
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
npm install agenttrace
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
```typescript
|
|
52
|
+
import { AgentTrace } from 'agenttrace';
|
|
53
|
+
|
|
54
|
+
const guard = new AgentTrace({
|
|
55
|
+
rules: [
|
|
56
|
+
'block_pii_leakage', // Stop PII leaking to users
|
|
57
|
+
'block_financial_advice', // No unqualified investment advice
|
|
58
|
+
'block_harmful_content', // Violence, illegal activities, self-harm
|
|
59
|
+
'require_human_approval', // Gate high-value transactions
|
|
60
|
+
],
|
|
61
|
+
explain: true, // Generate plain-English explanations
|
|
62
|
+
humanApproval: {
|
|
63
|
+
threshold: 1000, // Require approval for actions > $1,000
|
|
64
|
+
onApprovalRequired: async ({ description, amount }) => {
|
|
65
|
+
// Send Slack alert, email, UI prompt — whatever you need
|
|
66
|
+
return await myApprovalSystem.request(description, amount);
|
|
67
|
+
},
|
|
68
|
+
},
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
// Wrap your agent — same interface, now accountable
|
|
72
|
+
const safeAgent = guard.wrap(myAgent);
|
|
73
|
+
|
|
74
|
+
const result = await safeAgent.run("Process this customer refund");
|
|
75
|
+
|
|
76
|
+
// If BLOCKED:
|
|
77
|
+
// result.blocked → true
|
|
78
|
+
// result.reason → "Agent action BLOCKED. Violated rule(s): require_human_approval..."
|
|
79
|
+
// result.violations → [{ rule, description, severity, evidence }]
|
|
80
|
+
|
|
81
|
+
// If ALLOWED:
|
|
82
|
+
// result.blocked → false
|
|
83
|
+
// result.explanation → "Agent processed a $50 refund because the customer's..."
|
|
84
|
+
// result.riskLevel → 'LOW'
|
|
85
|
+
// result.auditTrail → [step1, step2, ...] — full reasoning chain
|
|
86
|
+
// result.auditId → 'uuid-...' — look it up later
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### Python
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
pip install agenttrace
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
from agenttrace import AgentTrace, AgentTraceOptions
|
|
97
|
+
|
|
98
|
+
guard = AgentTrace(AgentTraceOptions(
|
|
99
|
+
rules=["block_pii_leakage", "block_harmful_content", "block_financial_advice"],
|
|
100
|
+
debug=True,
|
|
101
|
+
))
|
|
102
|
+
|
|
103
|
+
safe_agent = guard.wrap(my_langchain_agent)
|
|
104
|
+
result = safe_agent.invoke("Process customer request")
|
|
105
|
+
|
|
106
|
+
print(result.blocked) # True/False
|
|
107
|
+
print(result.risk_level) # 'LOW' | 'MEDIUM' | 'HIGH' | 'CRITICAL'
|
|
108
|
+
print(result.audit_id) # UUID for audit trail lookup
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
---
|
|
112
|
+
|
|
113
|
+
## Built-in Rules
|
|
114
|
+
|
|
115
|
+
AgentTrace ships with 13 built-in rules designed to enforce enterprise-grade accountability.
|
|
116
|
+
|
|
117
|
+
| Rule | Category | What it blocks | Severity |
|
|
118
|
+
|------|----------|---------------|----------|
|
|
119
|
+
| `block_pii_leakage` | **Privacy** | Emails, phones, SSNs, credit card numbers, Aadhaar, API Keys. | HIGH–CRITICAL |
|
|
120
|
+
| `block_special_category_data` | **Privacy** | GDPR Art 9 data: health, genetics, sexual orientation, political views. | HIGH–CRITICAL |
|
|
121
|
+
| `block_manipulation` | **EU AI Act** | Art 5 prohibited practices: artificial urgency, dark patterns, gaslighting. | HIGH–CRITICAL |
|
|
122
|
+
| `block_discriminatory_output` | **Fairness** | EU Charter Art 21: Bias on race, gender, age, religion, nationality, disability. | CRITICAL |
|
|
123
|
+
| `block_ai_identity_deception` | **Transparency**| EU AI Act Art 50: Agents claiming to be human or denying being AI. | CRITICAL |
|
|
124
|
+
| `block_medical_advice` | **Professional** | Unqualified diagnosis, treatment recommendations, dosage instructions. | CRITICAL |
|
|
125
|
+
| `block_legal_advice` | **Professional** | Unauthorized Practice of Law (UPL): specific legal strategy advice. | HIGH |
|
|
126
|
+
| `block_financial_advice` | **Professional** | Investment recommendations, guaranteed returns, loan guidance. | HIGH |
|
|
127
|
+
| `block_prompt_injection` | **Security** | OWASP LLM01: Detects instruction overrides, persona hijacking, data exfil. | CRITICAL |
|
|
128
|
+
| `block_system_prompt_leakage` | **Security** | OWASP LLM07: Agent exposing its internal configuration or instructions. | HIGH |
|
|
129
|
+
| `block_harmful_content` | **Safety** | Violence, illegal instructions, self-harm, hate speech. | HIGH–CRITICAL |
|
|
130
|
+
| `require_human_approval` | **Oversight** | Actions above a $ threshold, irreversible/destructive operations. | HIGH–CRITICAL |
|
|
131
|
+
| `block_hallucination` | **Quality** | Factual claims not supported by your RAG context documents. | HIGH |
|
|
132
|
+
|
|
133
|
+
All rules run **in parallel** — zero extra latency on the happy path. You can easily group these by using pre-configured bundles like `COMPLIANCE_BUNDLES.EU_AI_ACT` or `COMPLIANCE_BUNDLES.OWASP_LLM`.
|
|
134
|
+
|
|
135
|
+
---
|
|
136
|
+
|
|
137
|
+
## Custom Rules
|
|
138
|
+
|
|
139
|
+
Write your own rules in 5 lines:
|
|
140
|
+
|
|
141
|
+
```typescript
|
|
142
|
+
import { createRule, AgentTrace } from 'agenttrace';
|
|
143
|
+
|
|
144
|
+
const noCompetitorMentions = createRule(
|
|
145
|
+
'no_competitor_mentions',
|
|
146
|
+
async ({ result }) => {
|
|
147
|
+
const text = JSON.stringify(result);
|
|
148
|
+
if (text.toLowerCase().includes('rival-corp')) {
|
|
149
|
+
return [{ rule: 'no_competitor_mentions', description: 'Competitor mentioned', severity: 'MEDIUM' }];
|
|
150
|
+
}
|
|
151
|
+
return [];
|
|
152
|
+
}
|
|
153
|
+
);
|
|
154
|
+
|
|
155
|
+
const guard = new AgentTrace({ rules: [noCompetitorMentions, 'block_pii_leakage'] });
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
---
|
|
159
|
+
|
|
160
|
+
## Audit Trail
|
|
161
|
+
|
|
162
|
+
Every agent run is automatically stored in a local SQLite database:
|
|
163
|
+
|
|
164
|
+
```typescript
|
|
165
|
+
// Query your audit trail
|
|
166
|
+
const recent = guard.storage?.getRecent(20);
|
|
167
|
+
const blocked = guard.storage?.getBlocked();
|
|
168
|
+
const stats = guard.storage?.stats();
|
|
169
|
+
// → { total: 142, blocked: 3, byRiskLevel: { LOW: 138, HIGH: 3, CRITICAL: 1 } }
|
|
170
|
+
|
|
171
|
+
// Look up a specific run
|
|
172
|
+
const run = guard.storage?.getById('audit-uuid-here');
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
---
|
|
176
|
+
|
|
177
|
+
## Works With
|
|
178
|
+
|
|
179
|
+
- ✅ **OpenAI** — Assistants, Responses API, Chat Completions
|
|
180
|
+
- ✅ **LangChain / LangGraph** — any `.invoke()` or `.run()` agent
|
|
181
|
+
- ✅ **CrewAI** — crew.kickoff()
|
|
182
|
+
- ✅ **Anthropic** — tool use agents
|
|
183
|
+
- ✅ **Any async function** — use `guard.guardFn()`
|
|
184
|
+
|
|
185
|
+
```typescript
|
|
186
|
+
// Works with any async function — no agent object needed
|
|
187
|
+
const result = await guard.guardFn(
|
|
188
|
+
async () => await myCustomAgent.process(input),
|
|
189
|
+
input // original task for tracing
|
|
190
|
+
);
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
---
|
|
194
|
+
|
|
195
|
+
## Explanation Engine
|
|
196
|
+
|
|
197
|
+
Set `explain: true` and add `ANTHROPIC_API_KEY` to get plain-English explanations:
|
|
198
|
+
|
|
199
|
+
```
|
|
200
|
+
Agent processed a $50 refund for customer #12345 because:
|
|
201
|
+
(1) The purchase was within the 30-day return window,
|
|
202
|
+
(2) The amount was below the $100 automatic-approval threshold,
|
|
203
|
+
(3) The customer's account is in good standing.
|
|
204
|
+
Risk: LOW. Confidence: HIGH.
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
No API key? Explanations gracefully fall back to a shorter canned message. **AgentTrace never crashes because of a missing API key.**
|
|
208
|
+
|
|
209
|
+
---
|
|
210
|
+
|
|
211
|
+
## Architecture
|
|
212
|
+
|
|
213
|
+
```
|
|
214
|
+
Your Agent
|
|
215
|
+
│
|
|
216
|
+
▼ (Proxy intercept)
|
|
217
|
+
┌─────────────────────────────────────────┐
|
|
218
|
+
│ AgentTrace │
|
|
219
|
+
│ │
|
|
220
|
+
│ ┌─────────┐ ┌─────────────────────┐ │
|
|
221
|
+
│ │ Tracer │ │ Rule Engine │ │
|
|
222
|
+
│ │ │ │ (runs in parallel) │ │
|
|
223
|
+
│ │ Step 1 │ │ • block_pii │ │
|
|
224
|
+
│ │ Step 2 │ │ • block_financial │ │
|
|
225
|
+
│ │ Step 3 │ │ • block_harmful │ │
|
|
226
|
+
│ └─────────┘ │ • human_approval │ │
|
|
227
|
+
│ │ • hallucination │ │
|
|
228
|
+
│ │ • custom rules... │ │
|
|
229
|
+
│ └─────────────────────┘ │
|
|
230
|
+
│ │
|
|
231
|
+
│ ┌──────────────┐ ┌────────────────┐ │
|
|
232
|
+
│ │ Explainer │ │ Store │ │
|
|
233
|
+
│ │ (Anthropic │ │ (SQLite WAL) │ │
|
|
234
|
+
│ │ claude-3) │ │ │ │
|
|
235
|
+
│ └──────────────┘ └────────────────┘ │
|
|
236
|
+
└─────────────────────────────────────────┘
|
|
237
|
+
│
|
|
238
|
+
▼
|
|
239
|
+
GuardedResult {
|
|
240
|
+
blocked, reason, explanation,
|
|
241
|
+
riskLevel, auditId, auditTrail,
|
|
242
|
+
violations, result
|
|
243
|
+
}
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
---
|
|
247
|
+
|
|
248
|
+
## Self-Hosted (Free Forever)
|
|
249
|
+
|
|
250
|
+
AgentTrace stores everything locally in SQLite. Zero cloud dependency. Zero data leaves your machine.
|
|
251
|
+
|
|
252
|
+
```
|
|
253
|
+
.agenttrace/
|
|
254
|
+
└── traces.db ← all your audit trails, WAL mode, fast
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
## Cloud Dashboard (Coming Soon)
|
|
258
|
+
|
|
259
|
+
- Real-time monitoring dashboard
|
|
260
|
+
- Team access and alerts
|
|
261
|
+
- Compliance reports (EU AI Act, SOC2)
|
|
262
|
+
- 1-year retention with search
|
|
263
|
+
|
|
264
|
+
→ [Join the waitlist](#)
|
|
265
|
+
|
|
266
|
+
---
|
|
267
|
+
|
|
268
|
+
## FAQ
|
|
269
|
+
|
|
270
|
+
**Q: Does this add latency?**
|
|
271
|
+
A: Rules run in parallel. For the happy path (no violations), the overhead is typically <5ms. Explanation generation (optional) adds ~500-800ms via Anthropic's API.
|
|
272
|
+
|
|
273
|
+
**Q: What if my agent isn't an object with a `.run()` method?**
|
|
274
|
+
A: Use `guard.guardFn(async () => myFn(input), input)`.
|
|
275
|
+
|
|
276
|
+
**Q: Can I use this without an Anthropic API key?**
|
|
277
|
+
A: Yes. All rules work without any API key. The `explain: true` feature requires `ANTHROPIC_API_KEY` but falls back gracefully.
|
|
278
|
+
|
|
279
|
+
**Q: Is the audit trail tamper-proof?**
|
|
280
|
+
A: Currently it's an append-only SQLite WAL database. True cryptographic signing (hash-chain) is on the roadmap.
|
|
281
|
+
|
|
282
|
+
---
|
|
283
|
+
|
|
284
|
+
## Contributing
|
|
285
|
+
|
|
286
|
+
PRs welcome! See [CONTRIBUTING.md](./CONTRIBUTING.md) for guidelines.
|
|
287
|
+
|
|
288
|
+
Key areas for contribution:
|
|
289
|
+
- New built-in rules (domain-specific)
|
|
290
|
+
- Agent framework integrations (AutoGen, Semantic Kernel, etc.)
|
|
291
|
+
- Better hallucination detection (semantic similarity, vector search)
|
|
292
|
+
- Cloud dashboard
|
|
293
|
+
- Hash-chain audit trail (tamper-proof)
|
|
294
|
+
|
|
295
|
+
---
|
|
296
|
+
|
|
297
|
+
## License
|
|
298
|
+
|
|
299
|
+
MIT © 2026 AgentTrace Contributors
|
|
300
|
+
|
|
301
|
+
---
|
|
302
|
+
|
|
303
|
+
## Why "Accountability" and not "Guardrails"?
|
|
304
|
+
|
|
305
|
+
> "Intelligence may be scalable, but accountability is not." — Accenture/Wharton, 2026
|
|
306
|
+
|
|
307
|
+
Guardrails are a feature. Accountability is a principle. Guardrails prevent bad outputs. Accountability explains every output — blocked or allowed — and creates a chain of evidence that stands up to audit.
|
|
308
|
+
|
|
309
|
+
We believe every AI agent action should be traceable, explainable, and controllable. **Not just the bad ones.**
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from .types import (
|
|
2
|
+
AgentTraceOptions,
|
|
3
|
+
GuardedResult,
|
|
4
|
+
RuleContext,
|
|
5
|
+
Violation,
|
|
6
|
+
Trace,
|
|
7
|
+
TraceStep,
|
|
8
|
+
RiskLevel,
|
|
9
|
+
Rule
|
|
10
|
+
)
|
|
11
|
+
from .guard import AgentTrace
|
|
12
|
+
from .rules import CustomRule
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"AgentTrace",
|
|
16
|
+
"AgentTraceOptions",
|
|
17
|
+
"GuardedResult",
|
|
18
|
+
"RuleContext",
|
|
19
|
+
"Violation",
|
|
20
|
+
"Trace",
|
|
21
|
+
"TraceStep",
|
|
22
|
+
"RiskLevel",
|
|
23
|
+
"Rule",
|
|
24
|
+
"CustomRule"
|
|
25
|
+
]
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import json
|
|
3
|
+
from typing import Any, List, Optional
|
|
4
|
+
from openai import AsyncOpenAI
|
|
5
|
+
from .types import ExplainerProvider, Trace, Violation
|
|
6
|
+
|
|
7
|
+
class NoOpExplainer(ExplainerProvider):
|
|
8
|
+
async def explain_allow(self, result: Any, trace: Trace) -> str:
|
|
9
|
+
return "Action completed successfully and passed all safety checks."
|
|
10
|
+
|
|
11
|
+
async def explain_block(self, violations: List[Violation], trace: Trace) -> str:
|
|
12
|
+
rules_str = ", ".join(v.rule for v in violations)
|
|
13
|
+
return f"Action BLOCKED. Violated rule(s): {rules_str}. Human review required."
|
|
14
|
+
|
|
15
|
+
class OpenAICompatibleExplainer(ExplainerProvider):
|
|
16
|
+
def __init__(self, api_key: Optional[str] = None, base_url: Optional[str] = None, model: Optional[str] = None):
|
|
17
|
+
# Default to Featherless if available
|
|
18
|
+
featherless_key = os.environ.get("FEATHERLESS_API_KEY")
|
|
19
|
+
openai_key = os.environ.get("OPENAI_API_KEY")
|
|
20
|
+
|
|
21
|
+
self.api_key = api_key or featherless_key or openai_key
|
|
22
|
+
self.base_url = base_url or ("https://api.featherless.ai/v1" if featherless_key else None)
|
|
23
|
+
self.model = model or ("deepseek-ai/DeepSeek-R1-Distill-Qwen-14B" if featherless_key else "gpt-3.5-turbo")
|
|
24
|
+
|
|
25
|
+
if self.api_key:
|
|
26
|
+
self.client = AsyncOpenAI(api_key=self.api_key, base_url=self.base_url)
|
|
27
|
+
else:
|
|
28
|
+
self.client = None
|
|
29
|
+
|
|
30
|
+
async def explain_allow(self, result: Any, trace: Trace) -> str:
|
|
31
|
+
if not self.client:
|
|
32
|
+
return await NoOpExplainer().explain_allow(result, trace)
|
|
33
|
+
|
|
34
|
+
try:
|
|
35
|
+
response = await self.client.chat.completions.create(
|
|
36
|
+
model=self.model,
|
|
37
|
+
messages=[
|
|
38
|
+
{
|
|
39
|
+
"role": "system",
|
|
40
|
+
"content": "You are an AI decision auditor. Explain WHY the agent produced this output in 2-3 sentences. Mention key factors, reasoning pattern, and confidence level. Write for a non-technical person."
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
"role": "user",
|
|
44
|
+
"content": f"TASK: {json.dumps(trace.original_input)}\nSTEPS TAKEN: {json.dumps([s.model_dump() for s in trace.steps])}\nFINAL OUTPUT: {json.dumps(result)}"
|
|
45
|
+
}
|
|
46
|
+
],
|
|
47
|
+
max_tokens=300
|
|
48
|
+
)
|
|
49
|
+
return response.choices[0].message.content or await NoOpExplainer().explain_allow(result, trace)
|
|
50
|
+
except Exception as e:
|
|
51
|
+
return await NoOpExplainer().explain_allow(result, trace)
|
|
52
|
+
|
|
53
|
+
async def explain_block(self, violations: List[Violation], trace: Trace) -> str:
|
|
54
|
+
if not self.client:
|
|
55
|
+
return await NoOpExplainer().explain_block(violations, trace)
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
response = await self.client.chat.completions.create(
|
|
59
|
+
model=self.model,
|
|
60
|
+
messages=[
|
|
61
|
+
{
|
|
62
|
+
"role": "system",
|
|
63
|
+
"content": "You are an AI compliance officer. Explain WHY this agent action was BLOCKED. Be clear, authoritative, and mention the specific rule violation."
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
"role": "user",
|
|
67
|
+
"content": f"VIOLATIONS: {json.dumps([v.model_dump() for v in violations])}\nATTEMPTED ACTION: {trace.last_action}"
|
|
68
|
+
}
|
|
69
|
+
],
|
|
70
|
+
max_tokens=300
|
|
71
|
+
)
|
|
72
|
+
return response.choices[0].message.content or await NoOpExplainer().explain_block(violations, trace)
|
|
73
|
+
except Exception:
|
|
74
|
+
return await NoOpExplainer().explain_block(violations, trace)
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import uuid
|
|
2
|
+
import inspect
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from typing import Any, Callable, Coroutine, Dict, Optional
|
|
5
|
+
|
|
6
|
+
from .types import AgentTraceOptions, GuardedResult, RuleContext, Trace, TraceStep
|
|
7
|
+
from .rules import resolve_rules, run_all_rules
|
|
8
|
+
from .explainer import NoOpExplainer, OpenAICompatibleExplainer
|
|
9
|
+
from .store import Store
|
|
10
|
+
|
|
11
|
+
class AgentTrace:
|
|
12
|
+
def __init__(self, options: AgentTraceOptions):
|
|
13
|
+
self.options = options
|
|
14
|
+
self.rules = resolve_rules(options.rules)
|
|
15
|
+
|
|
16
|
+
if options.explain:
|
|
17
|
+
self.explainer = OpenAICompatibleExplainer()
|
|
18
|
+
else:
|
|
19
|
+
self.explainer = NoOpExplainer()
|
|
20
|
+
|
|
21
|
+
self.store = Store(options.storage_path) if options.persist else None
|
|
22
|
+
|
|
23
|
+
async def guard_fn(self, func: Callable[..., Coroutine[Any, Any, Any]], original_input: Any, *args, **kwargs) -> GuardedResult:
|
|
24
|
+
trace_id = str(uuid.uuid4())
|
|
25
|
+
trace = Trace(
|
|
26
|
+
id=trace_id,
|
|
27
|
+
started_at=datetime.utcnow().isoformat() + "Z",
|
|
28
|
+
original_input=original_input,
|
|
29
|
+
last_action=func.__name__
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
start_time = datetime.utcnow()
|
|
33
|
+
try:
|
|
34
|
+
result = await func(*args, **kwargs)
|
|
35
|
+
except Exception as e:
|
|
36
|
+
# Re-raise exceptions from the agent
|
|
37
|
+
raise e
|
|
38
|
+
|
|
39
|
+
duration = int((datetime.utcnow() - start_time).total_seconds() * 1000)
|
|
40
|
+
trace.steps.append(TraceStep(
|
|
41
|
+
step_index=1,
|
|
42
|
+
timestamp=datetime.utcnow().isoformat() + "Z",
|
|
43
|
+
action=func.__name__,
|
|
44
|
+
input=original_input,
|
|
45
|
+
output=result,
|
|
46
|
+
duration_ms=duration
|
|
47
|
+
))
|
|
48
|
+
|
|
49
|
+
ctx = RuleContext(
|
|
50
|
+
result=result,
|
|
51
|
+
trace=trace,
|
|
52
|
+
guard_options=self.options
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
violations = await run_all_rules(self.rules, ctx)
|
|
56
|
+
|
|
57
|
+
if violations:
|
|
58
|
+
# Blocked or Shadow
|
|
59
|
+
is_shadow = self.options.enforcementMode == 'shadow'
|
|
60
|
+
explanation = await self.explainer.explain_block(violations, trace)
|
|
61
|
+
highest_severity = "CRITICAL" if any(v.severity == "CRITICAL" for v in violations) else "HIGH"
|
|
62
|
+
|
|
63
|
+
guarded_result = GuardedResult(
|
|
64
|
+
audit_id=trace.id,
|
|
65
|
+
blocked=not is_shadow,
|
|
66
|
+
reason=explanation,
|
|
67
|
+
explanation=explanation,
|
|
68
|
+
risk_level=highest_severity, # type: ignore
|
|
69
|
+
audit_trail=trace.steps,
|
|
70
|
+
violations=violations,
|
|
71
|
+
timestamp=datetime.utcnow().isoformat() + "Z",
|
|
72
|
+
metadata=self.options.metadata
|
|
73
|
+
)
|
|
74
|
+
if is_shadow:
|
|
75
|
+
guarded_result.result = result
|
|
76
|
+
else:
|
|
77
|
+
# Allowed
|
|
78
|
+
explanation = await self.explainer.explain_allow(result, trace)
|
|
79
|
+
|
|
80
|
+
guarded_result = GuardedResult(
|
|
81
|
+
audit_id=trace.id,
|
|
82
|
+
blocked=False,
|
|
83
|
+
explanation=explanation,
|
|
84
|
+
risk_level="LOW",
|
|
85
|
+
audit_trail=trace.steps,
|
|
86
|
+
result=result,
|
|
87
|
+
timestamp=datetime.utcnow().isoformat() + "Z",
|
|
88
|
+
metadata=self.options.metadata
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
if self.store:
|
|
92
|
+
self.store.save(guarded_result)
|
|
93
|
+
|
|
94
|
+
return guarded_result
|
|
95
|
+
|
|
96
|
+
def wrap(self, agent: Any) -> Any:
|
|
97
|
+
"""
|
|
98
|
+
Creates a proxy-like wrapper around an agent instance.
|
|
99
|
+
Intercepts 'invoke', 'run', 'chat', etc.
|
|
100
|
+
"""
|
|
101
|
+
class AgentProxy:
|
|
102
|
+
def __init__(self, target, guard):
|
|
103
|
+
self._target = target
|
|
104
|
+
self._guard = guard
|
|
105
|
+
|
|
106
|
+
def __getattr__(self, name):
|
|
107
|
+
target_attr = getattr(self._target, name)
|
|
108
|
+
|
|
109
|
+
if name in ['invoke', 'run', 'chat', 'generate', 'call', '__call__'] and inspect.iscoroutinefunction(target_attr):
|
|
110
|
+
async def wrapper(*args, **kwargs):
|
|
111
|
+
# Extract first arg as the input for tracing if possible
|
|
112
|
+
original_input = args[0] if args else kwargs
|
|
113
|
+
return await self._guard.guard_fn(target_attr, original_input, *args, **kwargs)
|
|
114
|
+
return wrapper
|
|
115
|
+
return target_attr
|
|
116
|
+
|
|
117
|
+
async def __call__(self, *args, **kwargs):
|
|
118
|
+
if inspect.iscoroutinefunction(self._target) or (hasattr(self._target, '__call__') and inspect.iscoroutinefunction(self._target.__call__)):
|
|
119
|
+
original_input = args[0] if args else kwargs
|
|
120
|
+
target_func = self._target if inspect.iscoroutinefunction(self._target) else self._target.__call__
|
|
121
|
+
return await self._guard.guard_fn(target_func, original_input, *args, **kwargs)
|
|
122
|
+
raise TypeError("Wrapped target is not an async callable.")
|
|
123
|
+
|
|
124
|
+
return AgentProxy(agent, self)
|
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import json
|
|
3
|
+
from typing import Any, List, Protocol
|
|
4
|
+
from .types import RuleContext, Violation, Rule
|
|
5
|
+
|
|
6
|
+
def _extract_text(value: Any) -> str:
|
|
7
|
+
if isinstance(value, str):
|
|
8
|
+
return value
|
|
9
|
+
try:
|
|
10
|
+
return json.dumps(value)
|
|
11
|
+
except Exception:
|
|
12
|
+
return str(value)
|
|
13
|
+
|
|
14
|
+
# --- Base Custom Rule ---
|
|
15
|
+
class CustomRule:
|
|
16
|
+
def __init__(self, name: str, check_func, description: str = ""):
|
|
17
|
+
self.name = name
|
|
18
|
+
self.description = description
|
|
19
|
+
self.check_func = check_func
|
|
20
|
+
|
|
21
|
+
async def check(self, ctx: RuleContext) -> List[Violation]:
|
|
22
|
+
return await self.check_func(ctx)
|
|
23
|
+
|
|
24
|
+
# --- 1. PII Leakage ---
|
|
25
|
+
class PiiRule:
|
|
26
|
+
name = "block_pii_leakage"
|
|
27
|
+
description = "Detects sensitive PII leakage."
|
|
28
|
+
|
|
29
|
+
PATTERNS = [
|
|
30
|
+
(re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'), 'Email address', 'HIGH'),
|
|
31
|
+
(re.compile(r'\b(?:\+?1[-.\s]?)?\(?[2-9]\d{2}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b'), 'Phone number', 'HIGH'),
|
|
32
|
+
(re.compile(r'\b\d{3}-\d{2}-\d{4}\b'), 'Social Security Number', 'CRITICAL'),
|
|
33
|
+
(re.compile(r'\b(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14}|3[47][0-9]{13}|3(?:0[0-5]|[68][0-9])[0-9]{11}|6(?:011|5[0-9]{2})[0-9]{12}|(?:2131|1800|35\d{3})\d{11})\b'), 'Credit Card Number', 'CRITICAL'),
|
|
34
|
+
(re.compile(r'\b[2-9]{1}[0-9]{3}\s[0-9]{4}\s[0-9]{4}\b'), 'Aadhaar Number', 'CRITICAL'),
|
|
35
|
+
(re.compile(r'\b(sk-proj-[A-Za-z0-9_-]+|sk-ant-[A-Za-z0-9_-]+|xox[baprs]-[A-Za-z0-9_-]+)\b'), 'API Key', 'CRITICAL'),
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
async def check(self, ctx: RuleContext) -> List[Violation]:
|
|
39
|
+
text = _extract_text(ctx.result)
|
|
40
|
+
violations = []
|
|
41
|
+
for pattern, desc, severity in self.PATTERNS:
|
|
42
|
+
match = pattern.search(text)
|
|
43
|
+
if match:
|
|
44
|
+
violations.append(Violation(
|
|
45
|
+
rule=self.name,
|
|
46
|
+
description=f"Output contains {desc}",
|
|
47
|
+
evidence=match.group(0)[:80],
|
|
48
|
+
severity=severity, # type: ignore
|
|
49
|
+
remediation="Redact the sensitive data."
|
|
50
|
+
))
|
|
51
|
+
break
|
|
52
|
+
return violations
|
|
53
|
+
|
|
54
|
+
# --- 2. Financial Advice ---
|
|
55
|
+
class FinancialAdviceRule:
|
|
56
|
+
name = "block_financial_advice"
|
|
57
|
+
description = "Blocks unqualified financial advice."
|
|
58
|
+
|
|
59
|
+
PATTERNS = [
|
|
60
|
+
re.compile(r'\b(you should (buy|sell|invest|short|hold)|I (recommend|suggest) (buying|selling|investing)).{0,30}(stock|crypto|bitcoin|shares|options)\b', re.I),
|
|
61
|
+
re.compile(r'\b(guaranteed (return|profit)|risk-free|100% (return|profit|safe))\b', re.I),
|
|
62
|
+
]
|
|
63
|
+
|
|
64
|
+
async def check(self, ctx: RuleContext) -> List[Violation]:
|
|
65
|
+
text = _extract_text(ctx.result)
|
|
66
|
+
for pattern in self.PATTERNS:
|
|
67
|
+
match = pattern.search(text)
|
|
68
|
+
if match:
|
|
69
|
+
return [Violation(
|
|
70
|
+
rule=self.name,
|
|
71
|
+
description="Output contains specific investment recommendations",
|
|
72
|
+
evidence=match.group(0)[:80],
|
|
73
|
+
severity="HIGH",
|
|
74
|
+
remediation="Replace with general information."
|
|
75
|
+
)]
|
|
76
|
+
return []
|
|
77
|
+
|
|
78
|
+
# --- 3. Harmful Content ---
|
|
79
|
+
class HarmfulContentRule:
|
|
80
|
+
name = "block_harmful_content"
|
|
81
|
+
description = "Blocks harmful or illegal content."
|
|
82
|
+
|
|
83
|
+
PATTERNS = [
|
|
84
|
+
re.compile(r'\b(how to (build|make|create) a (bomb|weapon|meth|drug|poison))\b', re.I),
|
|
85
|
+
re.compile(r'\b(kill yourself|commit suicide|end your life)\b', re.I),
|
|
86
|
+
]
|
|
87
|
+
|
|
88
|
+
async def check(self, ctx: RuleContext) -> List[Violation]:
|
|
89
|
+
text = _extract_text(ctx.result)
|
|
90
|
+
for pattern in self.PATTERNS:
|
|
91
|
+
match = pattern.search(text)
|
|
92
|
+
if match:
|
|
93
|
+
return [Violation(
|
|
94
|
+
rule=self.name,
|
|
95
|
+
description="Output contains severe harmful content.",
|
|
96
|
+
evidence=match.group(0)[:80],
|
|
97
|
+
severity="CRITICAL",
|
|
98
|
+
)]
|
|
99
|
+
return []
|
|
100
|
+
|
|
101
|
+
# --- 4. Special Category Data (GDPR Art 9) ---
|
|
102
|
+
class SpecialCategoryRule:
|
|
103
|
+
name = "block_special_category_data"
|
|
104
|
+
description = "Blocks GDPR Art 9 special category data."
|
|
105
|
+
|
|
106
|
+
PATTERNS = [
|
|
107
|
+
(re.compile(r'\b(diagnosis|medical condition|HIV|cancer|diabetes)\b', re.I), 'health data', 'CRITICAL'),
|
|
108
|
+
(re.compile(r'\b(sexual orientation|transgender|non-binary|gay)\b', re.I), 'sexual orientation', 'CRITICAL'),
|
|
109
|
+
]
|
|
110
|
+
|
|
111
|
+
async def check(self, ctx: RuleContext) -> List[Violation]:
|
|
112
|
+
text = _extract_text(ctx.result)
|
|
113
|
+
for pattern, cat, sev in self.PATTERNS:
|
|
114
|
+
match = pattern.search(text)
|
|
115
|
+
if match:
|
|
116
|
+
return [Violation(
|
|
117
|
+
rule=self.name,
|
|
118
|
+
description=f"Output contains {cat}",
|
|
119
|
+
evidence=match.group(0)[:80],
|
|
120
|
+
severity=sev, # type: ignore
|
|
121
|
+
)]
|
|
122
|
+
return []
|
|
123
|
+
|
|
124
|
+
# --- 5. Manipulation (EU AI Act Art 5) ---
|
|
125
|
+
class ManipulationRule:
|
|
126
|
+
name = "block_manipulation"
|
|
127
|
+
description = "Blocks manipulation / dark patterns."
|
|
128
|
+
|
|
129
|
+
PATTERNS = [
|
|
130
|
+
(re.compile(r'\b(act now|limited time|hurry|only \d+ left)\b', re.I), 'artificial urgency', 'HIGH'),
|
|
131
|
+
(re.compile(r'\b(you\'re (imagining|wrong|overreacting)|that didn\'t happen)\b', re.I), 'gaslighting', 'CRITICAL'),
|
|
132
|
+
]
|
|
133
|
+
|
|
134
|
+
async def check(self, ctx: RuleContext) -> List[Violation]:
|
|
135
|
+
text = _extract_text(ctx.result)
|
|
136
|
+
for pattern, cat, sev in self.PATTERNS:
|
|
137
|
+
match = pattern.search(text)
|
|
138
|
+
if match:
|
|
139
|
+
return [Violation(
|
|
140
|
+
rule=self.name,
|
|
141
|
+
description=f"Output uses {cat}",
|
|
142
|
+
evidence=match.group(0)[:80],
|
|
143
|
+
severity=sev, # type: ignore
|
|
144
|
+
)]
|
|
145
|
+
return []
|
|
146
|
+
|
|
147
|
+
# --- 6. Discriminatory Output (EU Charter Art 21) ---
|
|
148
|
+
class DiscriminatoryRule:
|
|
149
|
+
name = "block_discriminatory_output"
|
|
150
|
+
description = "Blocks discriminatory output."
|
|
151
|
+
|
|
152
|
+
PATTERNS = [
|
|
153
|
+
re.compile(r'\b(because (he|she|they) is (a )?(woman|man|old|young|disabled|Muslim|Jewish|Black|White|Asian))\b', re.I)
|
|
154
|
+
]
|
|
155
|
+
|
|
156
|
+
async def check(self, ctx: RuleContext) -> List[Violation]:
|
|
157
|
+
text = _extract_text(ctx.result)
|
|
158
|
+
for pattern in self.PATTERNS:
|
|
159
|
+
match = pattern.search(text)
|
|
160
|
+
if match:
|
|
161
|
+
return [Violation(
|
|
162
|
+
rule=self.name,
|
|
163
|
+
description="Output contains discriminatory language",
|
|
164
|
+
severity="CRITICAL",
|
|
165
|
+
)]
|
|
166
|
+
return []
|
|
167
|
+
|
|
168
|
+
# --- 7. AI Identity Deception (EU AI Act Art 50) ---
|
|
169
|
+
class IdentityDeceptionRule:
|
|
170
|
+
name = "block_ai_identity_deception"
|
|
171
|
+
description = "Blocks AI agents claiming to be human."
|
|
172
|
+
|
|
173
|
+
PATTERNS = [
|
|
174
|
+
re.compile(r'\b(I am (a |an )?(human|real person|person|human being))\b', re.I),
|
|
175
|
+
re.compile(r'\b(I(\'m| am) not (an |a )?(AI|bot|language model))\b', re.I),
|
|
176
|
+
]
|
|
177
|
+
|
|
178
|
+
async def check(self, ctx: RuleContext) -> List[Violation]:
|
|
179
|
+
text = _extract_text(ctx.result)
|
|
180
|
+
if "I am an AI" in text or "I'm an AI" in text:
|
|
181
|
+
return []
|
|
182
|
+
|
|
183
|
+
for pattern in self.PATTERNS:
|
|
184
|
+
match = pattern.search(text)
|
|
185
|
+
if match:
|
|
186
|
+
return [Violation(
|
|
187
|
+
rule=self.name,
|
|
188
|
+
description="Agent output claims to be human or denies being an AI",
|
|
189
|
+
severity="CRITICAL",
|
|
190
|
+
)]
|
|
191
|
+
return []
|
|
192
|
+
|
|
193
|
+
# --- Professional Advice ---
|
|
194
|
+
class MedicalAdviceRule:
|
|
195
|
+
name = "block_medical_advice"
|
|
196
|
+
description = "Blocks medical advice."
|
|
197
|
+
async def check(self, ctx: RuleContext) -> List[Violation]:
|
|
198
|
+
if re.search(r'\b(I (diagnose|recommend) (taking|using) (medication|drug|dose))\b', _extract_text(ctx.result), re.I):
|
|
199
|
+
return [Violation(rule=self.name, description="Medical advice", severity="CRITICAL")]
|
|
200
|
+
return []
|
|
201
|
+
|
|
202
|
+
class LegalAdviceRule:
|
|
203
|
+
name = "block_legal_advice"
|
|
204
|
+
description = "Blocks legal advice."
|
|
205
|
+
async def check(self, ctx: RuleContext) -> List[Violation]:
|
|
206
|
+
if re.search(r'\b(my advice is (to )?sue|file a lawsuit)\b', _extract_text(ctx.result), re.I):
|
|
207
|
+
return [Violation(rule=self.name, description="Legal advice", severity="HIGH")]
|
|
208
|
+
return []
|
|
209
|
+
|
|
210
|
+
# --- Security ---
|
|
211
|
+
class PromptInjectionRule:
|
|
212
|
+
name = "block_prompt_injection"
|
|
213
|
+
description = "Blocks prompt injection leakage in output."
|
|
214
|
+
async def check(self, ctx: RuleContext) -> List[Violation]:
|
|
215
|
+
if re.search(r'\b(ignore previous instructions|system prompt:|bypass filter)\b', _extract_text(ctx.result), re.I):
|
|
216
|
+
return [Violation(rule=self.name, description="Prompt injection", severity="CRITICAL")]
|
|
217
|
+
return []
|
|
218
|
+
|
|
219
|
+
class SystemPromptLeakageRule:
|
|
220
|
+
name = "block_system_prompt_leakage"
|
|
221
|
+
description = "Blocks system prompt leakage."
|
|
222
|
+
async def check(self, ctx: RuleContext) -> List[Violation]:
|
|
223
|
+
if re.search(r'\b(my system prompt (is|instructs)|I was instructed to)\b', _extract_text(ctx.result), re.I):
|
|
224
|
+
return [Violation(rule=self.name, description="System prompt leakage", severity="HIGH")]
|
|
225
|
+
return []
|
|
226
|
+
|
|
227
|
+
# --- Quality ---
|
|
228
|
+
class HallucinationRule:
|
|
229
|
+
name = "block_hallucination"
|
|
230
|
+
description = "Checks RAG context."
|
|
231
|
+
async def check(self, ctx: RuleContext) -> List[Violation]:
|
|
232
|
+
return [] # Placeholder, real impl requires vector check
|
|
233
|
+
|
|
234
|
+
class HumanApprovalRule:
|
|
235
|
+
name = "require_human_approval"
|
|
236
|
+
description = "Requires human approval for thresholds."
|
|
237
|
+
async def check(self, ctx: RuleContext) -> List[Violation]:
|
|
238
|
+
# Implementation depends on trace action parsing
|
|
239
|
+
return []
|
|
240
|
+
|
|
241
|
+
# --- Registry ---
|
|
242
|
+
BUILT_IN_RULES = {
|
|
243
|
+
'block_pii_leakage': PiiRule(),
|
|
244
|
+
'block_special_category_data': SpecialCategoryRule(),
|
|
245
|
+
'block_manipulation': ManipulationRule(),
|
|
246
|
+
'block_harmful_content': HarmfulContentRule(),
|
|
247
|
+
'block_discriminatory_output': DiscriminatoryRule(),
|
|
248
|
+
'block_ai_identity_deception': IdentityDeceptionRule(),
|
|
249
|
+
'block_financial_advice': FinancialAdviceRule(),
|
|
250
|
+
'block_medical_advice': MedicalAdviceRule(),
|
|
251
|
+
'block_legal_advice': LegalAdviceRule(),
|
|
252
|
+
'block_prompt_injection': PromptInjectionRule(),
|
|
253
|
+
'block_system_prompt_leakage': SystemPromptLeakageRule(),
|
|
254
|
+
'block_hallucination': HallucinationRule(),
|
|
255
|
+
'require_human_approval': HumanApprovalRule(),
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
def resolve_rules(rule_specs: List[Any]) -> List[Rule]:
|
|
259
|
+
resolved = []
|
|
260
|
+
for spec in rule_specs:
|
|
261
|
+
if isinstance(spec, str):
|
|
262
|
+
if spec in BUILT_IN_RULES:
|
|
263
|
+
resolved.append(BUILT_IN_RULES[spec])
|
|
264
|
+
else:
|
|
265
|
+
raise ValueError(f"Unknown built-in rule: {spec}")
|
|
266
|
+
else:
|
|
267
|
+
resolved.append(spec)
|
|
268
|
+
return resolved
|
|
269
|
+
|
|
270
|
+
async def run_all_rules(rules: List[Rule], ctx: RuleContext) -> List[Violation]:
|
|
271
|
+
violations = []
|
|
272
|
+
for rule in rules:
|
|
273
|
+
# In a real app we might use asyncio.gather, but sequential is fine for now
|
|
274
|
+
v = await rule.check(ctx)
|
|
275
|
+
violations.extend(v)
|
|
276
|
+
return violations
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import json
|
|
3
|
+
from typing import Any, Dict, List
|
|
4
|
+
from .types import GuardedResult
|
|
5
|
+
|
|
6
|
+
class Store:
|
|
7
|
+
def __init__(self, storage_path: str):
|
|
8
|
+
self.storage_path = storage_path
|
|
9
|
+
self._ensure_dir()
|
|
10
|
+
|
|
11
|
+
def _ensure_dir(self):
|
|
12
|
+
directory = os.path.dirname(self.storage_path)
|
|
13
|
+
if directory and not os.path.exists(directory):
|
|
14
|
+
os.makedirs(directory, exist_ok=True)
|
|
15
|
+
|
|
16
|
+
def save(self, result: GuardedResult) -> None:
|
|
17
|
+
try:
|
|
18
|
+
# We must serialize the Pydantic models correctly
|
|
19
|
+
data = result.model_dump()
|
|
20
|
+
with open(self.storage_path, "a") as f:
|
|
21
|
+
f.write(json.dumps(data) + "\n")
|
|
22
|
+
except Exception as e:
|
|
23
|
+
# Don't fail the agent run just because logging failed
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
def get_recent(self, limit: int = 50) -> List[Dict[str, Any]]:
|
|
27
|
+
if not os.path.exists(self.storage_path):
|
|
28
|
+
return []
|
|
29
|
+
|
|
30
|
+
results = []
|
|
31
|
+
try:
|
|
32
|
+
with open(self.storage_path, "r") as f:
|
|
33
|
+
lines = f.readlines()
|
|
34
|
+
for line in reversed(lines):
|
|
35
|
+
if not line.strip():
|
|
36
|
+
continue
|
|
37
|
+
results.append(json.loads(line))
|
|
38
|
+
if len(results) >= limit:
|
|
39
|
+
break
|
|
40
|
+
except Exception:
|
|
41
|
+
pass
|
|
42
|
+
return results
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
from typing import Any, Dict, List, Literal, Optional, Protocol, Union
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from pydantic import BaseModel, Field
|
|
4
|
+
|
|
5
|
+
RiskLevel = Literal['LOW', 'MEDIUM', 'HIGH', 'CRITICAL']
|
|
6
|
+
|
|
7
|
+
class TraceStep(BaseModel):
|
|
8
|
+
step_index: int
|
|
9
|
+
timestamp: str
|
|
10
|
+
action: str
|
|
11
|
+
input: Any
|
|
12
|
+
output: Any
|
|
13
|
+
duration_ms: int
|
|
14
|
+
metadata: Optional[Dict[str, Any]] = None
|
|
15
|
+
|
|
16
|
+
class Trace(BaseModel):
|
|
17
|
+
id: str
|
|
18
|
+
started_at: str
|
|
19
|
+
original_input: Any
|
|
20
|
+
steps: List[TraceStep] = Field(default_factory=list)
|
|
21
|
+
last_action: str = ""
|
|
22
|
+
token_usage: Optional[Dict[str, int]] = None
|
|
23
|
+
|
|
24
|
+
class AgentTraceOptions(BaseModel):
|
|
25
|
+
model_config = {"arbitrary_types_allowed": True}
|
|
26
|
+
|
|
27
|
+
rules: List[Any] = Field(default_factory=list)
|
|
28
|
+
enforcementMode: Literal['enforce', 'shadow'] = 'enforce'
|
|
29
|
+
explain: bool = False
|
|
30
|
+
persist: bool = True
|
|
31
|
+
storage_path: str = ".agenttrace/traces.ndjson"
|
|
32
|
+
human_approval_threshold: Optional[float] = 1000.0
|
|
33
|
+
context: Optional[List[str]] = None
|
|
34
|
+
debug: bool = False
|
|
35
|
+
metadata: Optional[Dict[str, Any]] = None
|
|
36
|
+
|
|
37
|
+
class RuleContext(BaseModel):
|
|
38
|
+
result: Any
|
|
39
|
+
trace: Trace
|
|
40
|
+
guard_options: AgentTraceOptions
|
|
41
|
+
|
|
42
|
+
class Violation(BaseModel):
|
|
43
|
+
rule: str
|
|
44
|
+
description: str
|
|
45
|
+
evidence: Optional[str] = None
|
|
46
|
+
severity: RiskLevel
|
|
47
|
+
remediation: Optional[str] = None
|
|
48
|
+
|
|
49
|
+
class Rule(Protocol):
|
|
50
|
+
name: str
|
|
51
|
+
description: str
|
|
52
|
+
|
|
53
|
+
async def check(self, ctx: RuleContext) -> List[Violation]:
|
|
54
|
+
...
|
|
55
|
+
|
|
56
|
+
class ExplainerProvider(Protocol):
|
|
57
|
+
async def explain_allow(self, result: Any, trace: Trace) -> str:
|
|
58
|
+
...
|
|
59
|
+
|
|
60
|
+
async def explain_block(self, violations: List[Violation], trace: Trace) -> str:
|
|
61
|
+
...
|
|
62
|
+
|
|
63
|
+
class GuardedResult(BaseModel):
|
|
64
|
+
audit_id: str
|
|
65
|
+
blocked: bool
|
|
66
|
+
reason: Optional[str] = None
|
|
67
|
+
explanation: Optional[str] = None
|
|
68
|
+
risk_level: RiskLevel
|
|
69
|
+
audit_trail: List[TraceStep] = Field(default_factory=list)
|
|
70
|
+
violations: Optional[List[Violation]] = None
|
|
71
|
+
result: Optional[Any] = None
|
|
72
|
+
timestamp: str
|
|
73
|
+
metadata: Optional[Dict[str, Any]] = None
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "ai-agenttrace"
|
|
7
|
+
version = "1.0.0"
|
|
8
|
+
description = "The accountability layer for AI agents. Trace, explain, and control agent actions."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "AgentTrace Contributors", email = "hello@agenttrace.ai" }
|
|
14
|
+
]
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Operating System :: OS Independent",
|
|
19
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
20
|
+
]
|
|
21
|
+
dependencies = [
|
|
22
|
+
"openai>=1.0.0",
|
|
23
|
+
"pydantic>=2.0.0",
|
|
24
|
+
"typing-extensions>=4.0.0",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
[project.optional-dependencies]
|
|
28
|
+
dev = [
|
|
29
|
+
"pytest>=7.0.0",
|
|
30
|
+
"pytest-asyncio>=0.21.0",
|
|
31
|
+
"black>=23.0.0",
|
|
32
|
+
"mypy>=1.0.0",
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
[project.urls]
|
|
36
|
+
Homepage = "https://github.com/kalash33/agenttrace"
|
|
37
|
+
Issues = "https://github.com/kalash33/agenttrace/issues"
|
|
38
|
+
|
|
39
|
+
[tool.hatch.build.targets.wheel]
|
|
40
|
+
packages = ["agenttrace"]
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
import os
|
|
3
|
+
from agenttrace import AgentTraceOptions, AgentTrace, RuleContext, Trace
|
|
4
|
+
|
|
5
|
+
@pytest.fixture
|
|
6
|
+
def base_options():
|
|
7
|
+
return AgentTraceOptions(
|
|
8
|
+
rules=["block_pii_leakage"],
|
|
9
|
+
persist=False,
|
|
10
|
+
explain=False
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
@pytest.mark.asyncio
|
|
14
|
+
async def test_agent_trace_allow_happy_path(base_options):
|
|
15
|
+
guard = AgentTrace(base_options)
|
|
16
|
+
|
|
17
|
+
async def fake_agent(input_text: str):
|
|
18
|
+
return {"status": "success", "message": f"Processed: {input_text}"}
|
|
19
|
+
|
|
20
|
+
safe_agent = guard.wrap(fake_agent)
|
|
21
|
+
result = await safe_agent("Hello World")
|
|
22
|
+
|
|
23
|
+
assert result.blocked is False
|
|
24
|
+
assert result.risk_level == "LOW"
|
|
25
|
+
assert result.result["message"] == "Processed: Hello World"
|
|
26
|
+
assert len(result.audit_trail) == 1
|
|
27
|
+
assert result.audit_trail[0].action == "fake_agent"
|
|
28
|
+
|
|
29
|
+
@pytest.mark.asyncio
|
|
30
|
+
async def test_agent_trace_blocks_pii(base_options):
|
|
31
|
+
guard = AgentTrace(base_options)
|
|
32
|
+
|
|
33
|
+
async def leaky_agent(input_text: str):
|
|
34
|
+
return "Here is the user email: john.doe@example.com"
|
|
35
|
+
|
|
36
|
+
safe_agent = guard.wrap(leaky_agent)
|
|
37
|
+
result = await safe_agent("Get user info")
|
|
38
|
+
|
|
39
|
+
assert result.blocked is True
|
|
40
|
+
assert result.risk_level == "HIGH"
|
|
41
|
+
assert result.violations is not None
|
|
42
|
+
assert len(result.violations) == 1
|
|
43
|
+
assert result.violations[0].rule == "block_pii_leakage"
|
|
44
|
+
|
|
45
|
+
@pytest.mark.asyncio
|
|
46
|
+
async def test_agent_trace_explainer_noop(base_options):
|
|
47
|
+
# Set explain to True but without API keys, it should use NoOpExplainer
|
|
48
|
+
base_options.explain = True
|
|
49
|
+
# Ensure no API keys in env for this test
|
|
50
|
+
os.environ.pop("FEATHERLESS_API_KEY", None)
|
|
51
|
+
os.environ.pop("OPENAI_API_KEY", None)
|
|
52
|
+
|
|
53
|
+
guard = AgentTrace(base_options)
|
|
54
|
+
|
|
55
|
+
async def leaky_agent(input_text: str):
|
|
56
|
+
return "Here is the user email: john.doe@example.com"
|
|
57
|
+
|
|
58
|
+
safe_agent = guard.wrap(leaky_agent)
|
|
59
|
+
result = await safe_agent("Get user info")
|
|
60
|
+
|
|
61
|
+
assert result.blocked is True
|
|
62
|
+
assert result.explanation is not None
|
|
63
|
+
assert "Action BLOCKED" in result.explanation
|
|
64
|
+
assert "block_pii_leakage" in result.explanation
|