truthguard-ai 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +363 -0
  3. package/dist/Advisor/index.d.ts +78 -0
  4. package/dist/Advisor/index.d.ts.map +1 -0
  5. package/dist/Advisor/index.js +539 -0
  6. package/dist/Advisor/index.js.map +1 -0
  7. package/dist/Alerting/index.d.ts +35 -0
  8. package/dist/Alerting/index.d.ts.map +1 -0
  9. package/dist/Alerting/index.js +76 -0
  10. package/dist/Alerting/index.js.map +1 -0
  11. package/dist/Auth/index.d.ts +82 -0
  12. package/dist/Auth/index.d.ts.map +1 -0
  13. package/dist/Auth/index.js +242 -0
  14. package/dist/Auth/index.js.map +1 -0
  15. package/dist/Baseline/index.d.ts +43 -0
  16. package/dist/Baseline/index.d.ts.map +1 -0
  17. package/dist/Baseline/index.js +195 -0
  18. package/dist/Baseline/index.js.map +1 -0
  19. package/dist/Claims/index.d.ts +73 -0
  20. package/dist/Claims/index.d.ts.map +1 -0
  21. package/dist/Claims/index.js +1669 -0
  22. package/dist/Claims/index.js.map +1 -0
  23. package/dist/Client/index.d.ts +90 -0
  24. package/dist/Client/index.d.ts.map +1 -0
  25. package/dist/Client/index.js +186 -0
  26. package/dist/Client/index.js.map +1 -0
  27. package/dist/Config/index.d.ts +41 -0
  28. package/dist/Config/index.d.ts.map +1 -0
  29. package/dist/Config/index.js +129 -0
  30. package/dist/Config/index.js.map +1 -0
  31. package/dist/Coverage/index.d.ts +28 -0
  32. package/dist/Coverage/index.d.ts.map +1 -0
  33. package/dist/Coverage/index.js +134 -0
  34. package/dist/Coverage/index.js.map +1 -0
  35. package/dist/Demo/index.d.ts +16 -0
  36. package/dist/Demo/index.d.ts.map +1 -0
  37. package/dist/Demo/index.js +189 -0
  38. package/dist/Demo/index.js.map +1 -0
  39. package/dist/Gate/index.d.ts +39 -0
  40. package/dist/Gate/index.d.ts.map +1 -0
  41. package/dist/Gate/index.js +207 -0
  42. package/dist/Gate/index.js.map +1 -0
  43. package/dist/Grounding/index.d.ts +40 -0
  44. package/dist/Grounding/index.d.ts.map +1 -0
  45. package/dist/Grounding/index.js +1433 -0
  46. package/dist/Grounding/index.js.map +1 -0
  47. package/dist/L2/index.d.ts +93 -0
  48. package/dist/L2/index.d.ts.map +1 -0
  49. package/dist/L2/index.js +1773 -0
  50. package/dist/L2/index.js.map +1 -0
  51. package/dist/MCP/index.d.ts +139 -0
  52. package/dist/MCP/index.d.ts.map +1 -0
  53. package/dist/MCP/index.js +1250 -0
  54. package/dist/MCP/index.js.map +1 -0
  55. package/dist/Matchers/index.d.ts +101 -0
  56. package/dist/Matchers/index.d.ts.map +1 -0
  57. package/dist/Matchers/index.js +690 -0
  58. package/dist/Matchers/index.js.map +1 -0
  59. package/dist/Middleware/index.d.ts +146 -0
  60. package/dist/Middleware/index.d.ts.map +1 -0
  61. package/dist/Middleware/index.js +239 -0
  62. package/dist/Middleware/index.js.map +1 -0
  63. package/dist/Mode/index.d.ts +87 -0
  64. package/dist/Mode/index.d.ts.map +1 -0
  65. package/dist/Mode/index.js +117 -0
  66. package/dist/Mode/index.js.map +1 -0
  67. package/dist/Policy/index.d.ts +89 -0
  68. package/dist/Policy/index.d.ts.map +1 -0
  69. package/dist/Policy/index.js +143 -0
  70. package/dist/Policy/index.js.map +1 -0
  71. package/dist/Proxy/SessionStore.d.ts +94 -0
  72. package/dist/Proxy/SessionStore.d.ts.map +1 -0
  73. package/dist/Proxy/SessionStore.js +225 -0
  74. package/dist/Proxy/SessionStore.js.map +1 -0
  75. package/dist/Proxy/index.d.ts +166 -0
  76. package/dist/Proxy/index.d.ts.map +1 -0
  77. package/dist/Proxy/index.js +531 -0
  78. package/dist/Proxy/index.js.map +1 -0
  79. package/dist/Registry/index.d.ts +93 -0
  80. package/dist/Registry/index.d.ts.map +1 -0
  81. package/dist/Registry/index.js +818 -0
  82. package/dist/Registry/index.js.map +1 -0
  83. package/dist/Reports/index.d.ts +38 -0
  84. package/dist/Reports/index.d.ts.map +1 -0
  85. package/dist/Reports/index.js +149 -0
  86. package/dist/Reports/index.js.map +1 -0
  87. package/dist/Rules/index.d.ts +587 -0
  88. package/dist/Rules/index.d.ts.map +1 -0
  89. package/dist/Rules/index.js +6236 -0
  90. package/dist/Rules/index.js.map +1 -0
  91. package/dist/Rules/intents.d.ts +22 -0
  92. package/dist/Rules/intents.d.ts.map +1 -0
  93. package/dist/Rules/intents.js +242 -0
  94. package/dist/Rules/intents.js.map +1 -0
  95. package/dist/Runner/index.d.ts +39 -0
  96. package/dist/Runner/index.d.ts.map +1 -0
  97. package/dist/Runner/index.js +185 -0
  98. package/dist/Runner/index.js.map +1 -0
  99. package/dist/SDK/anthropic.d.ts +102 -0
  100. package/dist/SDK/anthropic.d.ts.map +1 -0
  101. package/dist/SDK/anthropic.js +425 -0
  102. package/dist/SDK/anthropic.js.map +1 -0
  103. package/dist/SDK/openai.d.ts +164 -0
  104. package/dist/SDK/openai.d.ts.map +1 -0
  105. package/dist/SDK/openai.js +557 -0
  106. package/dist/SDK/openai.js.map +1 -0
  107. package/dist/Store/index.d.ts +72 -0
  108. package/dist/Store/index.d.ts.map +1 -0
  109. package/dist/Store/index.js +136 -0
  110. package/dist/Store/index.js.map +1 -0
  111. package/dist/Telemetry/index.d.ts +84 -0
  112. package/dist/Telemetry/index.d.ts.map +1 -0
  113. package/dist/Telemetry/index.js +239 -0
  114. package/dist/Telemetry/index.js.map +1 -0
  115. package/dist/Trace/index.d.ts +219 -0
  116. package/dist/Trace/index.d.ts.map +1 -0
  117. package/dist/Trace/index.js +763 -0
  118. package/dist/Trace/index.js.map +1 -0
  119. package/dist/TraceReadiness/index.d.ts +42 -0
  120. package/dist/TraceReadiness/index.d.ts.map +1 -0
  121. package/dist/TraceReadiness/index.js +169 -0
  122. package/dist/TraceReadiness/index.js.map +1 -0
  123. package/dist/cli/index.d.ts +15 -0
  124. package/dist/cli/index.d.ts.map +1 -0
  125. package/dist/cli/index.js +807 -0
  126. package/dist/cli/index.js.map +1 -0
  127. package/dist/i18n/index.d.ts +44 -0
  128. package/dist/i18n/index.d.ts.map +1 -0
  129. package/dist/i18n/index.js +124 -0
  130. package/dist/i18n/index.js.map +1 -0
  131. package/dist/index.d.ts +55 -0
  132. package/dist/index.d.ts.map +1 -0
  133. package/dist/index.js +218 -0
  134. package/dist/index.js.map +1 -0
  135. package/dist/thin.d.ts +39 -0
  136. package/dist/thin.d.ts.map +1 -0
  137. package/dist/thin.js +120 -0
  138. package/dist/thin.js.map +1 -0
  139. package/dist/types/index.d.ts +498 -0
  140. package/dist/types/index.d.ts.map +1 -0
  141. package/dist/types/index.js +17 -0
  142. package/dist/types/index.js.map +1 -0
  143. package/dist-npm/Alerting/index.d.ts +35 -0
  144. package/dist-npm/Alerting/index.d.ts.map +1 -0
  145. package/dist-npm/Alerting/index.js +76 -0
  146. package/dist-npm/Alerting/index.js.map +1 -0
  147. package/dist-npm/Auth/index.d.ts +82 -0
  148. package/dist-npm/Auth/index.d.ts.map +1 -0
  149. package/dist-npm/Auth/index.js +242 -0
  150. package/dist-npm/Auth/index.js.map +1 -0
  151. package/dist-npm/Client/index.d.ts +90 -0
  152. package/dist-npm/Client/index.d.ts.map +1 -0
  153. package/dist-npm/Client/index.js +186 -0
  154. package/dist-npm/Client/index.js.map +1 -0
  155. package/dist-npm/Demo/index.d.ts +16 -0
  156. package/dist-npm/Demo/index.d.ts.map +1 -0
  157. package/dist-npm/Demo/index.js +189 -0
  158. package/dist-npm/Demo/index.js.map +1 -0
  159. package/dist-npm/Middleware/index.d.ts +146 -0
  160. package/dist-npm/Middleware/index.d.ts.map +1 -0
  161. package/dist-npm/Middleware/index.js +239 -0
  162. package/dist-npm/Middleware/index.js.map +1 -0
  163. package/dist-npm/Proxy/SessionStore.d.ts +94 -0
  164. package/dist-npm/Proxy/SessionStore.d.ts.map +1 -0
  165. package/dist-npm/Proxy/SessionStore.js +225 -0
  166. package/dist-npm/Proxy/SessionStore.js.map +1 -0
  167. package/dist-npm/Proxy/index.d.ts +166 -0
  168. package/dist-npm/Proxy/index.d.ts.map +1 -0
  169. package/dist-npm/Proxy/index.js +531 -0
  170. package/dist-npm/Proxy/index.js.map +1 -0
  171. package/dist-npm/SDK/anthropic.d.ts +102 -0
  172. package/dist-npm/SDK/anthropic.d.ts.map +1 -0
  173. package/dist-npm/SDK/anthropic.js +425 -0
  174. package/dist-npm/SDK/anthropic.js.map +1 -0
  175. package/dist-npm/SDK/openai.d.ts +164 -0
  176. package/dist-npm/SDK/openai.d.ts.map +1 -0
  177. package/dist-npm/SDK/openai.js +557 -0
  178. package/dist-npm/SDK/openai.js.map +1 -0
  179. package/dist-npm/Store/index.d.ts +72 -0
  180. package/dist-npm/Store/index.d.ts.map +1 -0
  181. package/dist-npm/Store/index.js +136 -0
  182. package/dist-npm/Store/index.js.map +1 -0
  183. package/dist-npm/Telemetry/index.d.ts +84 -0
  184. package/dist-npm/Telemetry/index.d.ts.map +1 -0
  185. package/dist-npm/Telemetry/index.js +239 -0
  186. package/dist-npm/Telemetry/index.js.map +1 -0
  187. package/dist-npm/Trace/index.d.ts +219 -0
  188. package/dist-npm/Trace/index.d.ts.map +1 -0
  189. package/dist-npm/Trace/index.js +763 -0
  190. package/dist-npm/Trace/index.js.map +1 -0
  191. package/dist-npm/thin.d.ts +39 -0
  192. package/dist-npm/thin.d.ts.map +1 -0
  193. package/dist-npm/thin.js +120 -0
  194. package/dist-npm/thin.js.map +1 -0
  195. package/dist-npm/types/index.d.ts +498 -0
  196. package/dist-npm/types/index.d.ts.map +1 -0
  197. package/dist-npm/types/index.js +17 -0
  198. package/dist-npm/types/index.js.map +1 -0
  199. package/package.json +114 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 TruthGuard
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,363 @@
1
+ # TruthGuard
2
+
3
+ **Standardized grounding validation for tool-calling AI agents.**
4
+
5
+ > Detect when an agent's response contradicts the data returned by the tools it called — deterministically, without LLM-as-judge overhead.
6
+
7
+ [![npm version](https://img.shields.io/npm/v/truthguard.svg)](https://www.npmjs.com/package/truthguard)
8
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
9
+
10
+ ---
11
+
12
+ ## The Problem
13
+
14
+ Most "hallucinations" in tool-calling agents are **grounding failures** — the agent calls a tool, gets accurate data, and then ignores it, miscalculates, or fabricates from empty results. The source of truth is already in the trace.
15
+
16
+ ## The Solution
17
+
18
+ TruthGuard extracts factual claims from the agent's response, cross-references them against tool outputs, and reports grounding failures with standardized codes — like OBD diagnostic codes for AI.
19
+
20
+ ```
21
+ npm install truthguard
22
+ ```
23
+
24
+ **Zero LLM calls.** Deterministic regex extraction + fuzzy matching. 30 detection rules across 4 categories. Runs in <50ms.
25
+
26
+ ---
27
+
28
+ ## Quick Start — 3 Minutes
29
+
30
+ ### 1. Evaluate a trace
31
+
32
+ ```typescript
33
+ import { TraceBuilder, GroundingEngine, generateReport } from 'truthguard';
34
+
35
+ const trace = new TraceBuilder({ traceId: 'run-001' })
36
+ .addUserInput('How many employees are on leave today?')
37
+ .addToolCall('getLeaveRecords', { date: '2024-03-15' })
38
+ .addToolOutput('getLeaveRecords', [
39
+ { employeeId: 'E01', name: 'Ana Jovic', status: 'on_leave' },
40
+ { employeeId: 'E02', name: 'Ivan Petrovic', status: 'on_leave' },
41
+ ])
42
+ .addFinalResponse('There are 3 employees on leave today.') // ← Bug: says 3, data shows 2
43
+ .build();
44
+
45
+ const engine = new GroundingEngine();
46
+ const report = engine.evaluate(trace);
47
+
48
+ console.log(report.groundingScore); // 0.5
49
+ console.log(report.detectedFailures[0]); // { type: 'grounding.data_ignored', severity: 'high' }
50
+
51
+ const { text } = generateReport(report);
52
+ console.log(text);
53
+ ```
54
+
55
+ ### 2. Add a CI quality gate
56
+
57
+ ```typescript
58
+ import { loadDataset, runDataset, evaluateGate, loadGateConfig } from 'truthguard';
59
+
60
+ const entries = loadDataset('./test-cases.jsonl');
61
+ const result = runDataset(entries);
62
+ const gate = loadGateConfig('.ai-rcp-gate.yml');
63
+ const verdict = evaluateGate(result, gate);
64
+
65
+ if (!verdict.pass) {
66
+ console.error(verdict.report);
67
+ process.exit(1);
68
+ }
69
+ ```
70
+
71
+ ### 3. Monitor in production (proxy mode)
72
+
73
+ Works with **any language** — PHP, Python, Go, Java, Ruby, C#:
74
+
75
+ ```bash
76
+ npx truthguard observe --port 3001
77
+ ```
78
+
79
+ Change your AI base URL:
80
+ ```php
81
+ // Before: ANTHROPIC_BASE_URL=https://api.anthropic.com
82
+ // After:
83
+ ANTHROPIC_BASE_URL=http://localhost:3001/proxy/anthropic
84
+ ```
85
+
86
+ Your app works exactly the same. TruthGuard transparently proxies requests and evaluates grounding in the background.
87
+
88
+ ---
89
+
90
+ ## Detection Rules (30)
91
+
92
+ ### Grounding (16 rules)
93
+
94
+ | Code | Description |
95
+ |------|-------------|
96
+ | `empty_fabrication` | Tool returned `[]`, agent fabricated results |
97
+ | `no_tool_call` | Factual question answered without calling any tool |
98
+ | `math_error` | Incorrect calculation from correct tool data |
99
+ | `data_ignored` | Tool data altered or ignored in response |
100
+ | `wrong_query` | Tool called with incorrect parameters |
101
+ | `entity_mismatch` | Agent mixed up entities from results |
102
+ | `hallucinated_entity` | Agent invented entity not in tool data |
103
+ | `partial_answer` | Only part of the question answered |
104
+ | `question_not_answered` | Core question not addressed |
105
+ | `selective_omission` | Some tool results selectively excluded |
106
+ | `tool_error_ignored` | Tool error not handled |
107
+ | `stale_knowledge` | Used outdated data instead of tool results |
108
+ | `incomplete_response` | Empty or fallback response despite having data |
109
+ | `irrelevant_context` | Used unrelated data from different context |
110
+ | `contradictory_claims` | Response contains self-contradicting statements |
111
+ | `unverified_value` | Factual values with no tool data to verify against |
112
+
113
+ ### Orchestration (8 rules)
114
+
115
+ | Code | Description |
116
+ |------|-------------|
117
+ | `malformed_tool_input` | Bad parameter format in tool call |
118
+ | `raw_output_leak` | XML/JSON markup leaked into response |
119
+ | `intermediate_response_leak` | "Let me check..." text shown to user |
120
+ | `excessive_tool_calls` | Redundant repeated tool invocations |
121
+ | `token_limit_truncation` | Response cut off by token limit |
122
+ | `rate_limit_degradation` | Quality degraded due to rate limiting |
123
+ | `quota_exhaustion` | API quota exceeded |
124
+ | `model_fallback` | Unexpected model fallback |
125
+
126
+ ### Reasoning (4) & Safety (2)
127
+
128
+ `scope_mismatch`, `overconfident_language`, `language_mismatch`, `duplicate_user_input`, `prompt_leak`, `sensitive_data_exposure`
129
+
130
+ ---
131
+
132
+ ## Features
133
+
134
+ ### Diagnostic Advisor
135
+
136
+ Every detected failure includes root cause analysis, evidence from the trace, and two remediation paths:
137
+
138
+ ```typescript
139
+ import { generateAdvisorReport, formatAdvisorReport } from 'truthguard';
140
+
141
+ const advisor = generateAdvisorReport(report, trace);
142
+ console.log(formatAdvisorReport(advisor));
143
+ // REPAIR ORDER:
144
+ // 1. Fix grounding.no_tool_call (root cause)
145
+ // 2. Fix grounding.unverified_value (likely resolves after #1)
146
+ // PROMPT HINT: "Always call the relevant tool before answering factual questions"
147
+ // CODE GUARD: if (!trace.hasToolCall()) return forceToolCall(query);
148
+ ```
149
+
150
+ ### Policy Engine
151
+
152
+ Configure per-failure actions — block, warn, or observe:
153
+
154
+ ```typescript
155
+ import { wrapOpenAI, GroundingError } from 'truthguard';
156
+ import OpenAI from 'openai';
157
+
158
+ const openai = wrapOpenAI(new OpenAI(), {
159
+ mode: 'block',
160
+ threshold: 0.85,
161
+ policy: {
162
+ rules: {
163
+ 'grounding.empty_fabrication': 'block',
164
+ 'grounding.math_error': 'warn',
165
+ 'reasoning.overconfident_language': 'observe',
166
+ },
167
+ },
168
+ });
169
+ ```
170
+
171
+ ### Baseline Regression Detection
172
+
173
+ ```typescript
174
+ import { createSnapshot, saveBaseline, loadBaseline, compareToBaseline } from 'truthguard';
175
+
176
+ // Save after a known-good run
177
+ const snapshot = createSnapshot(result, 'v1.2-main');
178
+ saveBaseline('.ai-rcp-baseline.json', snapshot);
179
+
180
+ // Compare after changes
181
+ const comparison = compareToBaseline(newResult, snapshot);
182
+ if (!comparison.withinTolerance) {
183
+ console.error('Regression detected:', comparison.report);
184
+ }
185
+ ```
186
+
187
+ ### MCP Server (VS Code, Cursor)
188
+
189
+ Use TruthGuard directly from your IDE — no terminal needed.
190
+
191
+ **Setup (one time):**
192
+ 1. In VS Code: `Ctrl+Shift+P` → **"MCP: Open User Configuration"**
193
+ 2. Add this to `mcp.json`:
194
+
195
+ ```json
196
+ {
197
+ "servers": {
198
+ "truthguard": {
199
+ "type": "stdio",
200
+ "command": "npx",
201
+ "args": ["-y", "truthguard", "mcp"]
202
+ }
203
+ }
204
+ }
205
+ ```
206
+
207
+ 3. Restart VS Code
208
+
209
+ **Usage:** In Copilot Chat, say: *"Call truthguard verify_response with this trace: {...}"*
210
+
211
+ 8 tools available: `verify_response`, `quick_check`, `check_trace_quality`, `list_rules`, `get_failure_info`, `evaluate_with_policy`, `get_live_traces`, `get_trace_report`
212
+
213
+ The last two tools bridge proxy results to your IDE — ask Copilot *"Call get_live_traces"* to see recent production evaluations.
214
+
215
+ Full setup guide: [docs/getting-started.md](docs/getting-started.md#ide--mcp-server-vs-code-cursor)
216
+
217
+ ### Express Middleware
218
+
219
+ ```typescript
220
+ import express from 'express';
221
+ import { groundingMiddleware, FileStore } from 'truthguard';
222
+
223
+ const app = express();
224
+ app.post('/api/chat', groundingMiddleware({
225
+ mode: 'warn',
226
+ store: new FileStore('./traces/grounding.jsonl'),
227
+ extractTrace: (req, res, body) => body.trace,
228
+ }));
229
+ ```
230
+
231
+ ---
232
+
233
+ ## CLI
234
+
235
+ ```bash
236
+ npx truthguard debug trace.json # Evaluate one trace
237
+ npx truthguard run dataset.jsonl # Batch dataset evaluation
238
+ npx truthguard run dataset.jsonl --gate gate.yml # CI quality gate
239
+ npx truthguard observe --port 3001 # Start observe server + proxy
240
+ ```
241
+
242
+ ---
243
+
244
+ ## CI/CD Integration
245
+
246
+ ### GitHub Actions
247
+
248
+ ```yaml
249
+ # .github/workflows/truthguard-gate.yml
250
+ name: TruthGuard Quality Gate
251
+ on: [push, pull_request]
252
+
253
+ jobs:
254
+ grounding-gate:
255
+ runs-on: ubuntu-latest
256
+ steps:
257
+ - uses: actions/checkout@v4
258
+ - uses: actions/setup-node@v4
259
+ with:
260
+ node-version: '20'
261
+ - run: npm ci
262
+ - run: npx truthguard run test-cases.jsonl --gate .ai-rcp-gate.yml
263
+ ```
264
+
265
+ ### Gate config (`.ai-rcp-gate.yml`)
266
+
267
+ ```yaml
268
+ name: "Grounding Quality Gate"
269
+ assertions:
270
+ - metric: grounding_score
271
+ operator: ">="
272
+ threshold: 0.90
273
+ - metric: failure_count
274
+ operator: "<="
275
+ threshold: 0
276
+ - metric: pass_rate
277
+ operator: ">="
278
+ threshold: 1.0
279
+ ```
280
+
281
+ ---
282
+
283
+ ## How It Works
284
+
285
+ ```
286
+ Agent Response → Claim Extraction → Matcher → Rules → Report
287
+ (regex: numbers, (numeric, (30 (score,
288
+ dates, names, count, rules) failures,
289
+ counts) date, advisor)
290
+ name)
291
+ ```
292
+
293
+ 1. **Extract** factual claims from the agent's text response (numbers, dates, names, counts)
294
+ 2. **Match** each claim against values in tool outputs (with configurable tolerances)
295
+ 3. **Detect** failure patterns using 30 rules across 4 categories
296
+ 4. **Score** grounding quality with severity-weighted formula
297
+ 5. **Diagnose** root causes and suggest repair sequence
298
+
299
+ **No LLM calls.** 100% deterministic. ~55% claim coverage (numbers, dates, names, counts). L2 structured matching (booleans, enums, key-values) extends to ~70-75%.
300
+
301
+ ---
302
+
303
+ ## Configurable Tolerances
304
+
305
+ ```yaml
306
+ # .ai-rcp.yml
307
+ tolerances:
308
+ numeric:
309
+ relative_tolerance: 0.05 # ±5% for numbers
310
+ rounding_allowed: true
311
+ count:
312
+ exact_match: true
313
+ date:
314
+ exact_match: true
315
+ name:
316
+ fuzzy_match: true
317
+ threshold: 0.85 # Jaro-Winkler similarity
318
+ ```
319
+
320
+ ---
321
+
322
+ ## Language Support
323
+
324
+ - **Claim extraction:** Numbers, dates (7 formats incl. European DD.MM.YYYY), Serbian months (januar–decembar), relative dates (yesterday/juče, pre N dana)
325
+ - **Unit conversion:** 13 languages (EN, SR, ES, FR, PT, RU, HI, AR, BN, ZH, JA...)
326
+ - **Vague qualifier guard:** English + Serbian (oko, otprilike, negde)
327
+ - **Name matching:** Diacritics-aware (ć→c, š→s) via Jaro-Winkler
328
+
329
+ ---
330
+
331
+ ## Architecture
332
+
333
+ ```
334
+ src/
335
+ ├── Trace/ TraceBuilder SDK + multi-turn support
336
+ ├── Claims/ Claim extraction (regex, multilingual)
337
+ ├── Matchers/ Numeric, count, date, name matchers
338
+ ├── Rules/ 30 detection rules (4 categories)
339
+ ├── Grounding/ Engine orchestration + entity-aware grounding
340
+ ├── Advisor/ Diagnostic advisor (RCA, repair sequence, hints)
341
+ ├── Registry/ Failure registry (severity, suppression graph)
342
+ ├── Policy/ Per-failure enforcement (block/warn/observe)
343
+ ├── Reports/ JSON + text report generators
344
+ ├── Config/ YAML tolerance configuration
345
+ ├── Gate/ CI/CD quality gate
346
+ ├── Baseline/ Snapshot regression detection
347
+ ├── Runner/ JSONL dataset batch evaluation
348
+ ├── Mode/ Pipeline (debug/ci/observe/warn/block)
349
+ ├── Store/ FileStore + InMemoryStore
350
+ ├── Alerting/ Console, Webhook, Callback dispatchers
351
+ ├── Middleware/ Express middleware factory
352
+ ├── SDK/ OpenAI wrapper (auto trace capture)
353
+ ├── Proxy/ Transparent AI API proxy builders
354
+ ├── MCP/ MCP Server (8 IDE tools)
355
+ ├── L2/ Structured context matching (boolean, enum, key-value)
356
+ └── cli/ CLI commands
357
+ ```
358
+
359
+ ---
360
+
361
+ ## License
362
+
363
+ MIT
@@ -0,0 +1,78 @@
1
+ /**
2
+ * TruthGuard Diagnostic Advisor
3
+ *
4
+ * Generates actionable diagnostic advice for detected failures.
5
+ * Two types of hints per failure:
6
+ * - prompt_hint: suggest a prompt change (fast but unreliable — LLM may ignore)
7
+ * - code_guard: suggest a programmatic fix (slower to implement but deterministic)
8
+ *
9
+ * Does NOT auto-fix or generate patch-ready code snippets with "confidence" scores.
10
+ * Instead, gives honest, evidence-based guidance and re-evaluates after the fix.
11
+ */
12
+ import type { FailureType, FailureSeverity, GroundingReport, Trace } from '../types';
13
+ /** A single remediation hint — either prompt-level or code-level. */
14
+ export interface RemediationHint {
15
+ /** Whether this is a prompt-level or code-level suggestion. */
16
+ type: 'prompt_hint' | 'code_guard';
17
+ /** What direction to take. */
18
+ direction: string;
19
+ /** A concrete example of the fix. */
20
+ example: string;
21
+ /** Why this hint might not be sufficient. */
22
+ caveat: string;
23
+ }
24
+ /** Evidence extracted from the trace proving the failure. */
25
+ export interface DiagnosisEvidence {
26
+ /** What the trace showed. */
27
+ observation: string;
28
+ /** Where in the trace this was found (step ID or description). */
29
+ source: string;
30
+ }
31
+ /** Full diagnostic advice for a single detected failure. */
32
+ export interface DiagnosticAdvice {
33
+ /** The failure type this advice addresses. */
34
+ failureType: FailureType;
35
+ /** Severity from the detection. */
36
+ severity: FailureSeverity;
37
+ /** One-line summary of the problem. */
38
+ what: string;
39
+ /** Explanation of why this happened. */
40
+ why: string;
41
+ /** Evidence from the trace proving the failure. */
42
+ evidence: DiagnosisEvidence[];
43
+ /** Remediation hints — always includes both prompt_hint and code_guard. */
44
+ hints: RemediationHint[];
45
+ }
46
+ /** Result of running the diagnostic advisor on a full grounding report. */
47
+ export interface AdvisorReport {
48
+ /** The trace that was evaluated. */
49
+ traceId: string;
50
+ /** Total detected failures that were diagnosed. */
51
+ totalDiagnosed: number;
52
+ /** Diagnostic advice per detected failure. */
53
+ advice: DiagnosticAdvice[];
54
+ /** Ordered repair steps derived from failure dependencies. */
55
+ repairSequence?: {
56
+ type: FailureType;
57
+ reason: string;
58
+ }[];
59
+ /** High-level synthesized advice when multiple failures share a common root cause. */
60
+ synthesizedAdvice?: string;
61
+ }
62
+ /**
63
+ * Generate a full diagnostic advisor report from a grounding report.
64
+ *
65
+ * Only advises on primary and secondary failures (not suppressed).
66
+ * Hypotheses (low-confidence) are excluded — they need human review, not auto-advice.
67
+ */
68
+ export declare function generateAdvisorReport(report: GroundingReport, trace: Trace): AdvisorReport;
69
+ /**
70
+ * Generate diagnostic advice for a single failure type.
71
+ * Useful for looking up hints without running full evaluation.
72
+ */
73
+ export declare function getHintsForFailureType(failureType: FailureType): RemediationHint[];
74
+ /**
75
+ * Format the advisor report as human-readable text.
76
+ */
77
+ export declare function formatAdvisorReport(advisor: AdvisorReport): string;
78
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/Advisor/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,KAAK,EACV,WAAW,EACX,eAAe,EAEf,eAAe,EACf,KAAK,EAEN,MAAM,UAAU,CAAC;AAOlB,qEAAqE;AACrE,MAAM,WAAW,eAAe;IAC9B,+DAA+D;IAC/D,IAAI,EAAE,aAAa,GAAG,YAAY,CAAC;IACnC,8BAA8B;IAC9B,SAAS,EAAE,MAAM,CAAC;IAClB,qCAAqC;IACrC,OAAO,EAAE,MAAM,CAAC;IAChB,6CAA6C;IAC7C,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,6DAA6D;AAC7D,MAAM,WAAW,iBAAiB;IAChC,6BAA6B;IAC7B,WAAW,EAAE,MAAM,CAAC;IACpB,kEAAkE;IAClE,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,4DAA4D;AAC5D,MAAM,WAAW,gBAAgB;IAC/B,8CAA8C;IAC9C,WAAW,EAAE,WAAW,CAAC;IACzB,mCAAmC;IACnC,QAAQ,EAAE,eAAe,CAAC;IAC1B,uCAAuC;IACvC,IAAI,EAAE,MAAM,CAAC;IACb,wCAAwC;IACxC,GAAG,EAAE,MAAM,CAAC;IACZ,mDAAmD;IACnD,QAAQ,EAAE,iBAAiB,EAAE,CAAC;IAC9B,2EAA2E;IAC3E,KAAK,EAAE,eAAe,EAAE,CAAC;CAC1B;AAED,2EAA2E;AAC3E,MAAM,WAAW,aAAa;IAC5B,oCAAoC;IACpC,OAAO,EAAE,MAAM,CAAC;IAChB,mDAAmD;IACnD,cAAc,EAAE,MAAM,CAAC;IACvB,8CAA8C;IAC9C,MAAM,EAAE,gBAAgB,EAAE,CAAC;IAC3B,8DAA8D;IAC9D,cAAc,CAAC,EAAE;QAAE,IAAI,EAAE,WAAW,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;IACzD,sFAAsF;IACtF,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AAghBD;;;;;GAKG;AACH,wBAAgB,qBAAqB,CACnC,MAAM,EAAE,eAAe,EACvB,KAAK,EAAE,KAAK,GACX,aAAa,CAyBf;AAwFD;;;GAGG;AACH,wBAAgB,sBAAsB,CAAC,WAAW,EAAE,WAAW,GAAG,eAAe,EAAE,CAMlF;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,OAAO,EAAE,aAAa,GAAG,MAAM,CAsDlE"}