truthguard-ai 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/README.npm.md +0 -144
- /package/{README.full.md → README.full.bak} +0 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "truthguard-ai",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.3",
|
|
4
4
|
"description": "TruthGuard — Standardized grounding validation for tool-calling AI agents. Detect, diagnose, and prevent grounding failures.",
|
|
5
5
|
"main": "dist-npm/thin.js",
|
|
6
6
|
"types": "dist-npm/thin.d.ts",
|
package/README.npm.md
DELETED
|
@@ -1,144 +0,0 @@
|
|
|
1
|
-
# TruthGuard
|
|
2
|
-
|
|
3
|
-
**Grounding validation for tool-calling AI agents.**
|
|
4
|
-
|
|
5
|
-
> Detect when an agent's response contradicts the data returned by the tools it called — without LLM-as-judge overhead.
|
|
6
|
-
|
|
7
|
-
[](https://www.npmjs.com/package/truthguard-ai)
|
|
8
|
-
[](LICENSE)
|
|
9
|
-
|
|
10
|
-
---
|
|
11
|
-
|
|
12
|
-
## The Problem
|
|
13
|
-
|
|
14
|
-
Most "hallucinations" in tool-calling agents are **grounding failures** — the agent calls a tool, gets accurate data, then ignores it, miscalculates, or fabricates from empty results.
|
|
15
|
-
|
|
16
|
-
## The Solution
|
|
17
|
-
|
|
18
|
-
TruthGuard extracts factual claims from the agent's response and cross-references them against tool outputs. Deterministic. No LLM calls. Runs in <50ms.
|
|
19
|
-
|
|
20
|
-
```bash
|
|
21
|
-
npm install truthguard-ai
|
|
22
|
-
```
|
|
23
|
-
|
|
24
|
-
---
|
|
25
|
-
|
|
26
|
-
## Quick Start
|
|
27
|
-
|
|
28
|
-
### Evaluate a trace
|
|
29
|
-
|
|
30
|
-
```typescript
|
|
31
|
-
import { TraceBuilder, GroundingEngine, generateReport } from 'truthguard-ai';
|
|
32
|
-
|
|
33
|
-
const trace = new TraceBuilder({ traceId: 'run-001' })
|
|
34
|
-
.addUserInput('How many employees are on leave today?')
|
|
35
|
-
.addToolCall('getLeaveRecords', { date: '2024-03-15' })
|
|
36
|
-
.addToolOutput('getLeaveRecords', [
|
|
37
|
-
{ employeeId: 'E01', name: 'Ana Jovic', status: 'on_leave' },
|
|
38
|
-
{ employeeId: 'E02', name: 'Ivan Petrovic', status: 'on_leave' },
|
|
39
|
-
])
|
|
40
|
-
.addFinalResponse('There are 3 employees on leave today.')
|
|
41
|
-
.build();
|
|
42
|
-
|
|
43
|
-
const engine = new GroundingEngine();
|
|
44
|
-
const report = engine.evaluate(trace);
|
|
45
|
-
|
|
46
|
-
console.log(report.groundingScore); // 0.5
|
|
47
|
-
console.log(report.detectedFailures[0]); // { type: 'grounding.data_ignored', severity: 'high' }
|
|
48
|
-
```
|
|
49
|
-
|
|
50
|
-
### SDK Wrappers (OpenAI & Anthropic)
|
|
51
|
-
|
|
52
|
-
```typescript
|
|
53
|
-
import { wrapOpenAI } from 'truthguard-ai';
|
|
54
|
-
import OpenAI from 'openai';
|
|
55
|
-
|
|
56
|
-
const openai = wrapOpenAI(new OpenAI(), {
|
|
57
|
-
mode: 'block',
|
|
58
|
-
threshold: 0.85,
|
|
59
|
-
});
|
|
60
|
-
```
|
|
61
|
-
|
|
62
|
-
```typescript
|
|
63
|
-
import { wrapAnthropic } from 'truthguard-ai';
|
|
64
|
-
import Anthropic from '@anthropic-ai/sdk';
|
|
65
|
-
|
|
66
|
-
const anthropic = wrapAnthropic(new Anthropic(), {
|
|
67
|
-
mode: 'warn',
|
|
68
|
-
threshold: 0.80,
|
|
69
|
-
});
|
|
70
|
-
```
|
|
71
|
-
|
|
72
|
-
### Production Monitoring (Proxy Mode)
|
|
73
|
-
|
|
74
|
-
Works with **any language** — PHP, Python, Go, Java, Ruby, C#:
|
|
75
|
-
|
|
76
|
-
```bash
|
|
77
|
-
npx truthguard-ai observe --port 3001
|
|
78
|
-
```
|
|
79
|
-
|
|
80
|
-
Point your AI SDK base URL to the proxy:
|
|
81
|
-
```
|
|
82
|
-
# OpenAI
|
|
83
|
-
OPENAI_BASE_URL=http://localhost:3001/proxy/openai
|
|
84
|
-
|
|
85
|
-
# Anthropic
|
|
86
|
-
ANTHROPIC_BASE_URL=http://localhost:3001/proxy/anthropic
|
|
87
|
-
```
|
|
88
|
-
|
|
89
|
-
Your app works identically. TruthGuard evaluates grounding in the background.
|
|
90
|
-
|
|
91
|
-
### CI Quality Gate
|
|
92
|
-
|
|
93
|
-
```bash
|
|
94
|
-
npx truthguard-ai run test-cases.jsonl --gate .ai-rcp-gate.yml
|
|
95
|
-
```
|
|
96
|
-
|
|
97
|
-
### MCP Server (VS Code, Cursor)
|
|
98
|
-
|
|
99
|
-
Use TruthGuard from your IDE — add to `.vscode/mcp.json`:
|
|
100
|
-
|
|
101
|
-
```json
|
|
102
|
-
{
|
|
103
|
-
"servers": {
|
|
104
|
-
"truthguard": {
|
|
105
|
-
"type": "stdio",
|
|
106
|
-
"command": "npx",
|
|
107
|
-
"args": ["-y", "truthguard-ai", "mcp"]
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
}
|
|
111
|
-
```
|
|
112
|
-
|
|
113
|
-
### Express Middleware
|
|
114
|
-
|
|
115
|
-
```typescript
|
|
116
|
-
import { groundingMiddleware, FileStore } from 'truthguard-ai';
|
|
117
|
-
|
|
118
|
-
app.post('/api/chat', groundingMiddleware({
|
|
119
|
-
mode: 'warn',
|
|
120
|
-
store: new FileStore('./traces/grounding.jsonl'),
|
|
121
|
-
extractTrace: (req, res, body) => body.trace,
|
|
122
|
-
}));
|
|
123
|
-
```
|
|
124
|
-
|
|
125
|
-
---
|
|
126
|
-
|
|
127
|
-
## Key Features
|
|
128
|
-
|
|
129
|
-
- **30+ deterministic failure detectors** — fabrication from empty results, math errors, ignored tool data, entity mismatches, and more
|
|
130
|
-
- **Policy engine** — per-failure enforcement (block / warn / observe)
|
|
131
|
-
- **Diagnostic reports** — actionable failure reports with severity levels
|
|
132
|
-
- **Baseline regression** — snapshot comparison to catch quality regressions
|
|
133
|
-
- **Multi-language support** — works with responses in 13+ languages
|
|
134
|
-
- **Configurable thresholds** — tune sensitivity to match your use case
|
|
135
|
-
|
|
136
|
-
---
|
|
137
|
-
|
|
138
|
-
## Documentation
|
|
139
|
-
|
|
140
|
-
Full docs, guides, and architecture details: [truthguard.dev](https://truthguard.dev)
|
|
141
|
-
|
|
142
|
-
## License
|
|
143
|
-
|
|
144
|
-
MIT
|
|
File without changes
|