truthguard-ai 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of truthguard-ai might be problematic. Click here for more details.

Files changed (53) hide show
  1. package/dist-npm/Claims/index.d.ts +73 -0
  2. package/dist-npm/Claims/index.d.ts.map +1 -0
  3. package/dist-npm/Claims/index.js +1669 -0
  4. package/dist-npm/Claims/index.js.map +1 -0
  5. package/dist-npm/Config/index.d.ts +41 -0
  6. package/dist-npm/Config/index.d.ts.map +1 -0
  7. package/dist-npm/Config/index.js +129 -0
  8. package/dist-npm/Config/index.js.map +1 -0
  9. package/dist-npm/Grounding/index.d.ts +40 -0
  10. package/dist-npm/Grounding/index.d.ts.map +1 -0
  11. package/dist-npm/Grounding/index.js +1433 -0
  12. package/dist-npm/Grounding/index.js.map +1 -0
  13. package/dist-npm/L2/index.d.ts +93 -0
  14. package/dist-npm/L2/index.d.ts.map +1 -0
  15. package/dist-npm/L2/index.js +1773 -0
  16. package/dist-npm/L2/index.js.map +1 -0
  17. package/dist-npm/Matchers/index.d.ts +101 -0
  18. package/dist-npm/Matchers/index.d.ts.map +1 -0
  19. package/dist-npm/Matchers/index.js +690 -0
  20. package/dist-npm/Matchers/index.js.map +1 -0
  21. package/dist-npm/Mode/index.d.ts +87 -0
  22. package/dist-npm/Mode/index.d.ts.map +1 -0
  23. package/dist-npm/Mode/index.js +117 -0
  24. package/dist-npm/Mode/index.js.map +1 -0
  25. package/dist-npm/Policy/index.d.ts +89 -0
  26. package/dist-npm/Policy/index.d.ts.map +1 -0
  27. package/dist-npm/Policy/index.js +143 -0
  28. package/dist-npm/Policy/index.js.map +1 -0
  29. package/dist-npm/Registry/index.d.ts +93 -0
  30. package/dist-npm/Registry/index.d.ts.map +1 -0
  31. package/dist-npm/Registry/index.js +818 -0
  32. package/dist-npm/Registry/index.js.map +1 -0
  33. package/dist-npm/Rules/index.d.ts +587 -0
  34. package/dist-npm/Rules/index.d.ts.map +1 -0
  35. package/dist-npm/Rules/index.js +6236 -0
  36. package/dist-npm/Rules/index.js.map +1 -0
  37. package/dist-npm/Rules/intents.d.ts +22 -0
  38. package/dist-npm/Rules/intents.d.ts.map +1 -0
  39. package/dist-npm/Rules/intents.js +242 -0
  40. package/dist-npm/Rules/intents.js.map +1 -0
  41. package/dist-npm/TraceReadiness/index.d.ts +42 -0
  42. package/dist-npm/TraceReadiness/index.d.ts.map +1 -0
  43. package/dist-npm/TraceReadiness/index.js +169 -0
  44. package/dist-npm/TraceReadiness/index.js.map +1 -0
  45. package/dist-npm/i18n/index.d.ts +44 -0
  46. package/dist-npm/i18n/index.d.ts.map +1 -0
  47. package/dist-npm/i18n/index.js +124 -0
  48. package/dist-npm/i18n/index.js.map +1 -0
  49. package/package.json +5 -17
  50. package/dist/cli/index.d.ts +0 -15
  51. package/dist/cli/index.d.ts.map +0 -1
  52. package/dist/cli/index.js +0 -807
  53. package/dist/cli/index.js.map +0 -1
package/dist/cli/index.js DELETED
@@ -1,807 +0,0 @@
1
- #!/usr/bin/env node
2
- "use strict";
3
- /**
4
- * TruthGuard CLI
5
- *
6
- * Usage:
7
- * npx truthguard debug <trace.json> Pretty-print grounding verdicts
8
- * npx truthguard run <dataset.jsonl> Run grounding on a dataset
9
- * npx truthguard run <dataset.jsonl> --gate gate.yml Run + evaluate gate
10
- * npx truthguard observe <trace.json> --store traces.jsonl Evaluate + persist
11
- * npx truthguard baseline save <dataset.jsonl> -o base.json -l main
12
- * npx truthguard baseline compare <dataset.jsonl> --baseline base.json
13
- * npx truthguard evaluate <trace.json> Evaluate a single trace (alias for debug)
14
- */
15
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
16
- if (k2 === undefined) k2 = k;
17
- var desc = Object.getOwnPropertyDescriptor(m, k);
18
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
19
- desc = { enumerable: true, get: function() { return m[k]; } };
20
- }
21
- Object.defineProperty(o, k2, desc);
22
- }) : (function(o, m, k, k2) {
23
- if (k2 === undefined) k2 = k;
24
- o[k2] = m[k];
25
- }));
26
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
27
- Object.defineProperty(o, "default", { enumerable: true, value: v });
28
- }) : function(o, v) {
29
- o["default"] = v;
30
- });
31
- var __importStar = (this && this.__importStar) || (function () {
32
- var ownKeys = function(o) {
33
- ownKeys = Object.getOwnPropertyNames || function (o) {
34
- var ar = [];
35
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
36
- return ar;
37
- };
38
- return ownKeys(o);
39
- };
40
- return function (mod) {
41
- if (mod && mod.__esModule) return mod;
42
- var result = {};
43
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
44
- __setModuleDefault(result, mod);
45
- return result;
46
- };
47
- })();
48
- Object.defineProperty(exports, "__esModule", { value: true });
49
- exports.main = main;
50
- const fs = __importStar(require("fs"));
51
- const path = __importStar(require("path"));
52
- const readline = __importStar(require("readline"));
53
- const Grounding_1 = require("../Grounding");
54
- const Runner_1 = require("../Runner");
55
- const Gate_1 = require("../Gate");
56
- const Baseline_1 = require("../Baseline");
57
- const Mode_1 = require("../Mode");
58
- const Store_1 = require("../Store");
59
- const Alerting_1 = require("../Alerting");
60
- const Auth_1 = require("../Auth");
61
- const Telemetry_1 = require("../Telemetry");
62
- const TraceReadiness_1 = require("../TraceReadiness");
63
- const MCP_1 = require("../MCP");
64
- const Demo_1 = require("../Demo");
65
- const VERSION = '0.1.0';
66
- function parseArgs(argv) {
67
- const args = argv.slice(2); // skip node + script
68
- const positional = [];
69
- const flags = {};
70
- let i = 0;
71
- while (i < args.length) {
72
- const arg = args[i];
73
- if (arg.startsWith('--')) {
74
- const key = arg.slice(2);
75
- const next = args[i + 1];
76
- if (next && !next.startsWith('--')) {
77
- flags[key] = next;
78
- i += 2;
79
- }
80
- else {
81
- flags[key] = 'true';
82
- i++;
83
- }
84
- }
85
- else if (arg.startsWith('-') && arg.length === 2) {
86
- const key = arg.slice(1);
87
- const next = args[i + 1];
88
- if (next && !next.startsWith('-')) {
89
- flags[key] = next;
90
- i += 2;
91
- }
92
- else {
93
- flags[key] = 'true';
94
- i++;
95
- }
96
- }
97
- else {
98
- positional.push(arg);
99
- i++;
100
- }
101
- }
102
- return {
103
- command: positional[0] ?? '',
104
- subCommand: positional[1],
105
- positional,
106
- flags,
107
- };
108
- }
109
- // ---------------------------------------------------------------------------
110
- // ANSI colors (no external deps)
111
- // ---------------------------------------------------------------------------
112
- const c = {
113
- reset: '\x1b[0m',
114
- bold: '\x1b[1m',
115
- dim: '\x1b[2m',
116
- red: '\x1b[31m',
117
- green: '\x1b[32m',
118
- yellow: '\x1b[33m',
119
- blue: '\x1b[34m',
120
- magenta: '\x1b[35m',
121
- cyan: '\x1b[36m',
122
- white: '\x1b[37m',
123
- bgRed: '\x1b[41m',
124
- bgGreen: '\x1b[42m',
125
- bgYellow: '\x1b[43m',
126
- };
127
- function colorScore(score) {
128
- const pct = (score * 100).toFixed(1) + '%';
129
- if (score >= 0.9)
130
- return `${c.bgGreen}${c.bold} ${pct} ${c.reset}`;
131
- if (score >= 0.7)
132
- return `${c.bgYellow}${c.bold} ${pct} ${c.reset}`;
133
- return `${c.bgRed}${c.bold} ${pct} ${c.reset}`;
134
- }
135
- function colorVerdict(verdict) {
136
- switch (verdict) {
137
- case 'GROUNDED': return `${c.green}✓ GROUNDED${c.reset}`;
138
- case 'APPROXIMATE_MATCH': return `${c.yellow}≈ APPROXIMATE${c.reset}`;
139
- case 'UNGROUNDED': return `${c.red}✗ UNGROUNDED${c.reset}`;
140
- case 'UNVERIFIABLE': return `${c.dim}? UNVERIFIABLE${c.reset}`;
141
- case 'MULTI_STEP': return `${c.blue}⇉ MULTI_STEP${c.reset}`;
142
- default: return verdict;
143
- }
144
- }
145
- function printDebugReport(report) {
146
- console.log('');
147
- // Trace quality assessment — show BEFORE the report when quality is not HIGH
148
- const tq = report.traceQuality;
149
- if (tq && tq.quality !== 'HIGH') {
150
- const qualityColor = tq.quality === 'LOW' ? c.red : c.yellow;
151
- console.log(`${c.bold}┌──────────────────────────────────────────────────┐${c.reset}`);
152
- console.log(`${c.bold}│${c.reset} ${qualityColor}⚠ Trace Quality: ${tq.quality}${c.reset} (${tq.completeness}% complete)${' '.repeat(Math.max(0, 20 - tq.quality.length))}${c.bold}│${c.reset}`);
153
- console.log(`${c.bold}└──────────────────────────────────────────────────┘${c.reset}`);
154
- console.log('');
155
- for (const item of tq.checklist) {
156
- const icon = item.present ? `${c.green}✔${c.reset}` : `${c.red}✘${c.reset}`;
157
- const req = item.required ? `${c.dim}(required)${c.reset}` : '';
158
- console.log(` ${icon} ${item.element} ${req}`);
159
- }
160
- if (tq.cannotVerify.length > 0) {
161
- console.log('');
162
- console.log(` ${c.bold}${c.yellow}Impact:${c.reset}`);
163
- for (const cv of tq.cannotVerify) {
164
- console.log(` ${c.dim}• ${cv}${c.reset}`);
165
- }
166
- }
167
- if (tq.recommendations.length > 0) {
168
- console.log('');
169
- console.log(` ${c.bold}Recommendation:${c.reset}`);
170
- for (const rec of tq.recommendations) {
171
- console.log(` ${c.cyan}→ ${rec}${c.reset}`);
172
- }
173
- }
174
- console.log('');
175
- }
176
- console.log(`${c.bold}╔══════════════════════════════════════════════════╗${c.reset}`);
177
- console.log(`${c.bold}║ TruthGuard Grounding Report ║${c.reset}`);
178
- console.log(`${c.bold}╚══════════════════════════════════════════════════╝${c.reset}`);
179
- console.log('');
180
- console.log(` Trace ID: ${c.cyan}${report.traceId}${c.reset}`);
181
- if (report.turnCount) {
182
- console.log(` Turns: ${c.cyan}${report.turnCount}${c.reset} (evaluating turn ${report.activeTurnIndex + 1})`);
183
- }
184
- console.log(` Score: ${colorScore(report.groundingScore)}`);
185
- console.log(` Claims: ${report.claimsTotal} total, ${c.green}${report.claimsVerified} verified${c.reset}, ${c.red}${report.claimsFailed} failed${c.reset}, ${c.dim}${report.claimsSkipped} skipped${c.reset}`);
186
- console.log('');
187
- // Verdict table
188
- if (report.verdicts.length > 0) {
189
- console.log(` ${c.bold}Claims:${c.reset}`);
190
- console.log(` ${'─'.repeat(70)}`);
191
- for (const v of report.verdicts) {
192
- const type = v.claim.type.toUpperCase().padEnd(6);
193
- const val = String(v.claim.value).slice(0, 25).padEnd(25);
194
- console.log(` ${c.dim}${type}${c.reset} ${val} ${colorVerdict(v.verdict)}`);
195
- if (v.verdict === 'UNGROUNDED' || v.verdict === 'APPROXIMATE_MATCH') {
196
- console.log(` ${c.dim}${v.explanation}${c.reset}`);
197
- if (v.deviation !== undefined) {
198
- console.log(` ${c.dim}deviation: ${(v.deviation * 100).toFixed(1)}%${c.reset}`);
199
- }
200
- }
201
- }
202
- console.log(` ${'─'.repeat(70)}`);
203
- }
204
- // Failures
205
- if (report.detectedFailures.length > 0) {
206
- console.log('');
207
- console.log(` ${c.bold}${c.red}Failures:${c.reset}`);
208
- for (const f of report.detectedFailures) {
209
- const conf = f.confidence === 'high' ? c.red : f.confidence === 'medium' ? c.yellow : c.dim;
210
- console.log(` ${conf}● [${f.confidence}] ${f.type}${c.reset}`);
211
- console.log(` ${f.description}`);
212
- }
213
- }
214
- // Hypotheses
215
- if (report.hypotheses.length > 0) {
216
- console.log('');
217
- console.log(` ${c.bold}${c.yellow}Hypotheses (low confidence):${c.reset}`);
218
- for (const h of report.hypotheses) {
219
- console.log(` ${c.dim}○ ${h.type}: ${h.description}${c.reset}`);
220
- }
221
- }
222
- console.log('');
223
- }
224
- // ---------------------------------------------------------------------------
225
- // Commands
226
- // ---------------------------------------------------------------------------
227
- function cmdDebug(args) {
228
- const tracePath = args.positional[1];
229
- if (!tracePath) {
230
- console.error('Usage: truthguard debug <trace.json>');
231
- console.error(` ${c.cyan}→ No trace file? Run: truthguard demo${c.reset}`);
232
- console.error(` ${c.cyan}→ Create one with: truthguard init${c.reset}`);
233
- process.exit(1);
234
- }
235
- if (!fs.existsSync(tracePath)) {
236
- console.error(`File not found: ${tracePath}`);
237
- console.error(` ${c.cyan}→ Create a sample trace: truthguard init${c.reset}`);
238
- console.error(` ${c.cyan}→ Check trace format: truthguard demo${c.reset}`);
239
- process.exit(1);
240
- }
241
- let content;
242
- try {
243
- content = fs.readFileSync(tracePath, 'utf-8');
244
- }
245
- catch {
246
- console.error(`Cannot read file: ${tracePath}`);
247
- process.exit(1);
248
- return;
249
- }
250
- let trace;
251
- try {
252
- const parsed = JSON.parse(content);
253
- trace = (parsed.trace && parsed.trace.steps) ? parsed.trace : parsed;
254
- }
255
- catch {
256
- console.error(`Invalid JSON in: ${tracePath}`);
257
- console.error(` ${c.cyan}→ Check your trace format: truthguard check-trace ${tracePath}${c.reset}`);
258
- console.error(` ${c.cyan}→ See a valid example: truthguard demo${c.reset}`);
259
- process.exit(1);
260
- return;
261
- }
262
- // Auto trace quality warning
263
- const tq = (0, TraceReadiness_1.assessTraceQuality)(trace);
264
- if (tq.quality === 'LOW') {
265
- console.log('');
266
- console.log(` ${c.red}⚠ Trace quality is LOW${c.reset} — evaluation may not be reliable.`);
267
- console.log(` ${c.cyan}→ Run: truthguard check-trace ${tracePath}${c.reset}`);
268
- console.log('');
269
- }
270
- const start = Date.now();
271
- const engine = new Grounding_1.GroundingEngine();
272
- const report = engine.evaluate(trace);
273
- const durationMs = Date.now() - start;
274
- printDebugReport(report);
275
- (0, Telemetry_1.trackEvaluation)(report, durationMs);
276
- }
277
- function cmdCheckTrace(args) {
278
- const tracePath = args.positional[1];
279
- if (!tracePath) {
280
- console.error('Usage: truthguard check-trace <trace.json>');
281
- console.error(` ${c.cyan}→ No trace? Create one: truthguard init${c.reset}`);
282
- process.exit(1);
283
- }
284
- if (!fs.existsSync(tracePath)) {
285
- console.error(`File not found: ${tracePath}`);
286
- console.error(` ${c.cyan}→ Create a sample trace: truthguard init${c.reset}`);
287
- process.exit(1);
288
- }
289
- const content = fs.readFileSync(tracePath, 'utf-8');
290
- const parsed = JSON.parse(content);
291
- const trace = (parsed.trace && parsed.trace.steps) ? parsed.trace : parsed;
292
- const tq = (0, TraceReadiness_1.assessTraceQuality)(trace);
293
- const qualityColor = tq.quality === 'HIGH' ? c.green : tq.quality === 'MEDIUM' ? c.yellow : c.red;
294
- console.log('');
295
- console.log(`${c.bold}Trace Quality: ${qualityColor}${tq.quality}${c.reset} (${tq.completeness}% complete)`);
296
- console.log('');
297
- for (const item of tq.checklist) {
298
- const icon = item.present ? `${c.green}✔${c.reset}` : `${c.red}✘${c.reset}`;
299
- const req = item.required ? `${c.dim}(required)${c.reset}` : '';
300
- console.log(` ${icon} ${item.element} ${req}`);
301
- }
302
- console.log('');
303
- console.log(` Grounding ready: ${tq.groundingReady ? `${c.green}yes${c.reset}` : `${c.red}no${c.reset}`}`);
304
- if (tq.cannotVerify.length > 0) {
305
- console.log('');
306
- console.log(` ${c.bold}${c.yellow}Cannot verify:${c.reset}`);
307
- for (const cv of tq.cannotVerify) {
308
- console.log(` ${c.dim}• ${cv}${c.reset}`);
309
- }
310
- }
311
- if (tq.recommendations.length > 0) {
312
- console.log('');
313
- console.log(` ${c.bold}Recommendation:${c.reset}`);
314
- for (const rec of tq.recommendations) {
315
- console.log(` ${c.cyan}→ ${rec}${c.reset}`);
316
- }
317
- }
318
- console.log('');
319
- if (!tq.groundingReady) {
320
- process.exit(1);
321
- }
322
- }
323
- async function cmdObserve(args) {
324
- const tracePath = args.positional[1];
325
- if (!tracePath) {
326
- console.error('Usage: truthguard observe <trace.json> --store <traces.jsonl> [--threshold 0.8] [--webhook <url>]');
327
- process.exit(1);
328
- }
329
- if (!fs.existsSync(tracePath)) {
330
- console.error(`File not found: ${tracePath}`);
331
- process.exit(1);
332
- }
333
- const content = fs.readFileSync(tracePath, 'utf-8');
334
- const trace = JSON.parse(content);
335
- const mode = args.flags['warn'] ? 'warn'
336
- : args.flags['block'] ? 'block'
337
- : 'observe';
338
- const storePath = args.flags['store'] ?? args.flags['s'];
339
- const threshold = args.flags['threshold'] ? parseFloat(args.flags['threshold']) : 0.75;
340
- const webhookUrl = args.flags['webhook'];
341
- const alerters = [new Alerting_1.ConsoleAlertDispatcher()];
342
- if (webhookUrl) {
343
- alerters.push(new Alerting_1.WebhookAlertDispatcher({ url: webhookUrl }));
344
- }
345
- const pipeline = new Mode_1.Pipeline({
346
- mode,
347
- threshold,
348
- store: storePath ? new Store_1.FileStore(storePath) : undefined,
349
- alerters: mode !== 'observe' ? alerters : undefined,
350
- });
351
- const result = await pipeline.evaluate(trace);
352
- printDebugReport(result.report);
353
- (0, Telemetry_1.trackEvaluation)(result.report);
354
- if (storePath) {
355
- console.log(` ${c.dim}Persisted to ${storePath}${c.reset}`);
356
- }
357
- if (result.alertsSent > 0) {
358
- console.log(` ${c.yellow}Alerts sent: ${result.alertsSent}${c.reset}`);
359
- }
360
- if (!result.allowed) {
361
- console.log(` ${c.bgRed}${c.bold} BLOCKED ${c.reset} ${result.fallbackResponse}`);
362
- process.exit(1);
363
- }
364
- }
365
- // ---------------------------------------------------------------------------
366
- // Existing commands (enhanced)
367
- // ---------------------------------------------------------------------------
368
- function cmdRun(args) {
369
- const datasetPath = args.positional[1];
370
- if (!datasetPath) {
371
- console.error('Usage: truthguard run <dataset.jsonl> [--gate gate.yml] [--tolerance 0.05]');
372
- process.exit(1);
373
- }
374
- if (!fs.existsSync(datasetPath)) {
375
- console.error(`File not found: ${datasetPath}`);
376
- process.exit(1);
377
- }
378
- const toleranceOverride = args.flags['tolerance']
379
- ? { numericRelativeTolerance: parseFloat(args.flags['tolerance']) }
380
- : undefined;
381
- const entries = (0, Runner_1.loadDataset)(datasetPath);
382
- const start = Date.now();
383
- const result = (0, Runner_1.runDataset)(entries, toleranceOverride ? { tolerances: toleranceOverride } : {});
384
- const durationMs = Date.now() - start;
385
- console.log(`\nDataset: ${path.basename(datasetPath)}`);
386
- console.log(`Entries: ${result.totalEntries} | Passed: ${result.passedEntries} | Failed: ${result.failedEntries}`);
387
- console.log(`Average grounding score: ${(result.averageScore * 100).toFixed(1)}%\n`);
388
- // Show per-entry summary
389
- for (const entry of result.entries) {
390
- const icon = entry.passed ? '✅' : '❌';
391
- const score = (entry.report.groundingScore * 100).toFixed(1);
392
- const failures = entry.report.detectedFailures.length;
393
- console.log(` ${icon} ${entry.name} — score: ${score}%, failures: ${failures}`);
394
- for (const a of entry.assertions) {
395
- if (!a.passed) {
396
- console.log(` ↳ ${a.check}: ${a.detail ?? 'FAILED'}`);
397
- }
398
- }
399
- }
400
- // Track telemetry for each entry
401
- for (const entry of result.entries) {
402
- (0, Telemetry_1.trackEvaluation)(entry.report, Math.round(durationMs / result.totalEntries));
403
- }
404
- // Gate evaluation
405
- if (args.flags['gate']) {
406
- const gatePath = args.flags['gate'];
407
- if (!fs.existsSync(gatePath)) {
408
- console.error(`\nGate config not found: ${gatePath}`);
409
- process.exit(1);
410
- }
411
- const gateConfig = (0, Gate_1.loadGateConfig)(gatePath);
412
- const gateResult = (0, Gate_1.evaluateGate)(gateConfig, result);
413
- console.log('\n' + (0, Gate_1.formatGateReport)(gateResult));
414
- if (!gateResult.passed) {
415
- process.exit(1);
416
- }
417
- }
418
- // Baseline comparison
419
- if (args.flags['baseline']) {
420
- const baselinePath = args.flags['baseline'];
421
- if (!fs.existsSync(baselinePath)) {
422
- console.error(`\nBaseline file not found: ${baselinePath}`);
423
- process.exit(1);
424
- }
425
- const baseline = (0, Baseline_1.loadBaseline)(baselinePath);
426
- const tolerance = args.flags['regression-tolerance']
427
- ? parseFloat(args.flags['regression-tolerance'])
428
- : 0.02;
429
- const comparison = (0, Baseline_1.compareToBaseline)(result, baseline, tolerance);
430
- console.log('\n' + (0, Baseline_1.formatBaselineReport)(comparison));
431
- if (!comparison.withinTolerance) {
432
- process.exit(1);
433
- }
434
- }
435
- }
436
- function cmdBaselineSave(args) {
437
- const datasetPath = args.positional[2];
438
- if (!datasetPath) {
439
- console.error('Usage: truthguard baseline save <dataset.jsonl> -o baseline.json -l main');
440
- process.exit(1);
441
- }
442
- if (!fs.existsSync(datasetPath)) {
443
- console.error(`File not found: ${datasetPath}`);
444
- process.exit(1);
445
- }
446
- const outputPath = args.flags['o'] ?? args.flags['output'] ?? '.ai-rcp-baseline.json';
447
- const label = args.flags['l'] ?? args.flags['label'] ?? 'baseline';
448
- const entries = (0, Runner_1.loadDataset)(datasetPath);
449
- const result = (0, Runner_1.runDataset)(entries);
450
- const snapshot = (0, Baseline_1.createSnapshot)(result, label);
451
- (0, Baseline_1.saveBaseline)(outputPath, snapshot);
452
- console.log(`Baseline saved to ${outputPath}`);
453
- console.log(` Label: ${label}`);
454
- console.log(` Score: ${(snapshot.averageScore * 100).toFixed(1)}%`);
455
- console.log(` Entries: ${snapshot.entryScores.length}`);
456
- console.log(` Failures: ${snapshot.totalFailures}`);
457
- }
458
- function cmdBaselineCompare(args) {
459
- const datasetPath = args.positional[2];
460
- if (!datasetPath) {
461
- console.error('Usage: truthguard baseline compare <dataset.jsonl> --baseline base.json [--tolerance 0.02]');
462
- process.exit(1);
463
- }
464
- if (!fs.existsSync(datasetPath)) {
465
- console.error(`File not found: ${datasetPath}`);
466
- process.exit(1);
467
- }
468
- const baselinePath = args.flags['baseline'] ?? '.ai-rcp-baseline.json';
469
- if (!fs.existsSync(baselinePath)) {
470
- console.error(`Baseline file not found: ${baselinePath}`);
471
- process.exit(1);
472
- }
473
- const tolerance = args.flags['tolerance']
474
- ? parseFloat(args.flags['tolerance'])
475
- : 0.02;
476
- const entries = (0, Runner_1.loadDataset)(datasetPath);
477
- const result = (0, Runner_1.runDataset)(entries);
478
- const baseline = (0, Baseline_1.loadBaseline)(baselinePath);
479
- const comparison = (0, Baseline_1.compareToBaseline)(result, baseline, tolerance);
480
- console.log((0, Baseline_1.formatBaselineReport)(comparison));
481
- if (!comparison.withinTolerance) {
482
- process.exit(1);
483
- }
484
- }
485
- function cmdEvaluate(args) {
486
- // evaluate is now an alias for debug
487
- cmdDebug(args);
488
- }
489
- // ---------------------------------------------------------------------------
490
- // Onboarding commands
491
- // ---------------------------------------------------------------------------
492
- function cmdDemo() {
493
- const engine = new Grounding_1.GroundingEngine();
494
- // Part 1: Good trace
495
- const good = (0, Demo_1.buildGoldenTrace)();
496
- const goodReport = engine.evaluate(good);
497
- console.log('');
498
- console.log(`${c.bold}${c.green}━━━ DEMO: What a GOOD trace looks like ━━━${c.reset}`);
499
- console.log('');
500
- console.log(`${c.dim}Trace has: system_prompt → user_input → tool_call → tool_output → final_response${c.reset}`);
501
- console.log(`${c.dim}The agent's response correctly cites the tool's data.${c.reset}`);
502
- printDebugReport(goodReport);
503
- // Part 2: Bad trace
504
- const bad = (0, Demo_1.buildBadTrace)();
505
- const badReport = engine.evaluate(bad);
506
- console.log(`${c.bold}${c.red}━━━ DEMO: What a BAD trace looks like ━━━${c.reset}`);
507
- console.log('');
508
- console.log(`${c.dim}The tool returned EMPTY data, but the agent fabricated 5 absences.${c.reset}`);
509
- console.log(`${c.dim}TruthGuard detects this automatically:${c.reset}`);
510
- printDebugReport(badReport);
511
- // Part 3: Next steps
512
- console.log(`${c.bold}━━━ What's next? ━━━${c.reset}`);
513
- console.log('');
514
- console.log(` ${c.cyan}1.${c.reset} Create your first trace: ${c.bold}truthguard init${c.reset}`);
515
- console.log(` ${c.cyan}2.${c.reset} Evaluate your trace: ${c.bold}truthguard debug my-trace.json${c.reset}`);
516
- console.log(` ${c.cyan}3.${c.reset} Check trace quality: ${c.bold}truthguard check-trace my-trace.json${c.reset}`);
517
- console.log(` ${c.cyan}4.${c.reset} Run in CI: ${c.bold}truthguard run dataset.jsonl --gate gate.yml${c.reset}`);
518
- console.log(` ${c.cyan}5.${c.reset} Use in IDE (MCP): ${c.bold}truthguard mcp${c.reset}`);
519
- console.log('');
520
- console.log(` ${c.dim}Docs: https://github.com/truthguard/truthguard#readme${c.reset}`);
521
- console.log('');
522
- }
523
- function cmdInit(args) {
524
- const lang = args.flags['language'] ?? args.flags['l'] ?? 'json';
525
- const output = args.flags['output'] ?? args.flags['o'];
526
- if (lang === 'ts' || lang === 'typescript') {
527
- const filePath = output ?? 'truthguard-example.ts';
528
- if (fs.existsSync(filePath)) {
529
- console.error(`File already exists: ${filePath}`);
530
- console.error(` ${c.cyan}→ Choose a different name with: truthguard init -o another-name.ts${c.reset}`);
531
- process.exit(1);
532
- }
533
- fs.writeFileSync(filePath, Demo_1.TRACE_TEMPLATE_TS, 'utf-8');
534
- console.log('');
535
- console.log(`${c.green}✓${c.reset} Created ${c.bold}${filePath}${c.reset}`);
536
- console.log('');
537
- console.log(` ${c.bold}Next steps:${c.reset}`);
538
- console.log(` 1. Edit ${filePath} — replace YOUR placeholders with real data`);
539
- console.log(` 2. Run: ${c.cyan}npx ts-node ${filePath}${c.reset}`);
540
- console.log('');
541
- }
542
- else {
543
- const filePath = output ?? 'my-trace.json';
544
- if (fs.existsSync(filePath)) {
545
- console.error(`File already exists: ${filePath}`);
546
- console.error(` ${c.cyan}→ Choose a different name with: truthguard init -o another-name.json${c.reset}`);
547
- process.exit(1);
548
- }
549
- fs.writeFileSync(filePath, Demo_1.TRACE_TEMPLATE_JSON, 'utf-8');
550
- console.log('');
551
- console.log(`${c.green}✓${c.reset} Created ${c.bold}${filePath}${c.reset}`);
552
- console.log('');
553
- console.log(` ${c.bold}Next steps:${c.reset}`);
554
- console.log(` 1. Edit ${filePath} — replace YOUR placeholders with real data`);
555
- console.log(` 2. Check: ${c.cyan}truthguard check-trace ${filePath}${c.reset}`);
556
- console.log(` 3. Run: ${c.cyan}truthguard debug ${filePath}${c.reset}`);
557
- console.log('');
558
- }
559
- }
560
- function cmdMcp() {
561
- (0, MCP_1.startMCPServer)();
562
- }
563
- function printHelp() {
564
- console.log(`
565
- ${c.bold}TruthGuard${c.reset} v${VERSION} — AI Reliability Control Plane
566
-
567
- ${c.bold}Quick Start:${c.reset}
568
- ${c.green}demo${c.reset} See TruthGuard in action (no setup needed)
569
- ${c.green}init${c.reset} Create a sample trace file
570
- --language <json|ts> Language (default: json)
571
- -o, --output <path> Output file path
572
-
573
- ${c.bold}Authentication:${c.reset}
574
- ${c.cyan}login${c.reset} Authenticate with TruthGuard cloud
575
- --key <api-key> API key (or enter interactively)
576
- --no-telemetry Disable anonymous usage data
577
- ${c.dim}logout${c.reset} Remove stored credentials
578
- ${c.dim}status${c.reset} Show current auth & telemetry status
579
-
580
- ${c.bold}Funnel Modes:${c.reset}
581
- ${c.green}debug${c.reset} <trace.json> Pretty-print grounding verdicts (local dev)
582
- ${c.cyan}observe${c.reset} <trace.json> Evaluate + persist (production monitoring)
583
- --store <traces.jsonl> Persistence file
584
- --threshold <0.75> Score threshold
585
- --warn Enable alerts when below threshold
586
- --block Block response when below threshold
587
- --webhook <url> Send alerts to webhook URL
588
-
589
- ${c.bold}Trace Quality:${c.reset}
590
- ${c.magenta}check-trace${c.reset} <trace.json> Check trace completeness before evaluation
591
- Shows what can/cannot be verified
592
-
593
- ${c.bold}Dataset Testing:${c.reset}
594
- run <dataset.jsonl> Run grounding on a golden dataset
595
- --gate <gate.yml> Also evaluate a CI gate
596
- --baseline <baseline.json> Also compare against baseline
597
- --tolerance <0.05> Override numeric tolerance
598
- --regression-tolerance <0.02> Max allowed score drop from baseline
599
-
600
- ${c.bold}Baseline Management:${c.reset}
601
- baseline save <dataset.jsonl> Save current results as baseline
602
- -o, --output <path> Output file (default: .ai-rcp-baseline.json)
603
- -l, --label <name> Label (e.g. "main", commit SHA)
604
-
605
- baseline compare <dataset.jsonl> Compare current run against a baseline
606
- --baseline <path> Baseline file to compare against
607
- --tolerance <0.02> Max allowed score regression
608
-
609
- ${c.bold}MCP Server (IDE Integration):${c.reset}
610
- ${c.magenta}mcp${c.reset} Start MCP server on stdio (for Copilot, Cursor, etc.)
611
- Use in VS Code mcp.json:
612
- { "command": "npx", "args": ["-y", "truthguard", "mcp"] }
613
-
614
- ${c.bold}Other:${c.reset}
615
- evaluate <trace.json> Evaluate a single trace (alias for debug)
616
- --version Show version
617
- help Show this help message
618
-
619
- ${c.bold}Environment:${c.reset}
620
- TRUTHGUARD_API_KEY API key (alternative to login)
621
- TRUTHGUARD_TELEMETRY=0 Disable telemetry
622
- `.trim());
623
- }
624
- // ---------------------------------------------------------------------------
625
- // Main
626
- // ---------------------------------------------------------------------------
627
- // ---------------------------------------------------------------------------
628
- // Auth commands
629
- // ---------------------------------------------------------------------------
630
- function prompt(question) {
631
- const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
632
- return new Promise((resolve) => {
633
- rl.question(question, (answer) => {
634
- rl.close();
635
- resolve(answer.trim());
636
- });
637
- });
638
- }
639
- async function cmdLogin(args) {
640
- let apiKey = args.flags['key'] ?? args.flags['k'];
641
- const noTelemetry = args.flags['no-telemetry'] === 'true';
642
- if (!apiKey) {
643
- console.log(`\n${c.bold}TruthGuard Login${c.reset}\n`);
644
- console.log(`Get your API key from your project administrator\n`);
645
- apiKey = await prompt(`${c.bold}API Key:${c.reset} `);
646
- }
647
- if (!apiKey) {
648
- console.error('No API key provided.');
649
- process.exit(1);
650
- }
651
- console.log(`\n ${c.dim}Authenticating...${c.reset}`);
652
- const { validation, benchmark } = await (0, Auth_1.login)(apiKey, { telemetry: !noTelemetry });
653
- if (validation === null) {
654
- console.log(` ${c.yellow}⚠ Cloud service unreachable — credentials saved for offline use.${c.reset}`);
655
- console.log(` ${c.dim}Telemetry: ${noTelemetry ? 'disabled' : 'enabled (opt-in)'}${c.reset}`);
656
- console.log('');
657
- return;
658
- }
659
- if (!validation.valid) {
660
- console.error(` ${c.red}✗ Invalid API key.${c.reset}`);
661
- process.exit(1);
662
- }
663
- console.log(` ${c.green}✓ Authenticated${c.reset}`);
664
- if (validation.email)
665
- console.log(` ${c.dim}Email: ${validation.email}${c.reset}`);
666
- if (validation.plan)
667
- console.log(` ${c.dim}Plan: ${validation.plan}${c.reset}`);
668
- console.log(` ${c.dim}Telemetry: ${noTelemetry ? 'disabled' : 'enabled'}${c.reset}`);
669
- // Value exchange: show benchmark
670
- if (benchmark) {
671
- console.log('');
672
- console.log(` ${c.bold}📊 Your Benchmark${c.reset}`);
673
- console.log(` ${'─'.repeat(40)}`);
674
- if (benchmark.userAvgScore !== undefined) {
675
- console.log(` Your avg score: ${colorScore(benchmark.userAvgScore)}`);
676
- }
677
- console.log(` Global avg score: ${colorScore(benchmark.globalAvgScore)}`);
678
- if (benchmark.percentile !== undefined) {
679
- console.log(` Percentile: ${c.bold}top ${100 - benchmark.percentile}%${c.reset}`);
680
- }
681
- if (benchmark.topFailures.length > 0) {
682
- console.log(`\n ${c.bold}Top failures across users:${c.reset}`);
683
- for (const f of benchmark.topFailures.slice(0, 3)) {
684
- console.log(` ${c.dim}● ${f}${c.reset}`);
685
- }
686
- }
687
- if (benchmark.recommendations.length > 0) {
688
- console.log(`\n ${c.bold}💡 Recommendations:${c.reset}`);
689
- for (const r of benchmark.recommendations) {
690
- console.log(` ${c.cyan}→ ${r}${c.reset}`);
691
- }
692
- }
693
- console.log(` ${'─'.repeat(40)}`);
694
- }
695
- console.log('');
696
- }
697
- function cmdLogout() {
698
- const removed = (0, Auth_1.logout)();
699
- if (removed) {
700
- console.log(`${c.green}✓ Logged out. Credentials removed.${c.reset}`);
701
- }
702
- else {
703
- console.log(`${c.dim}Not logged in.${c.reset}`);
704
- }
705
- }
706
- function cmdStatus() {
707
- const config = (0, Auth_1.loadAuthConfig)();
708
- const envKey = process.env['TRUTHGUARD_API_KEY'];
709
- console.log(`\n${c.bold}TruthGuard Status${c.reset}\n`);
710
- if (config) {
711
- const masked = config.apiKey.slice(0, 8) + '...' + config.apiKey.slice(-4);
712
- console.log(` API Key: ${c.green}${masked}${c.reset}`);
713
- console.log(` API URL: ${c.dim}${config.apiUrl}${c.reset}`);
714
- console.log(` Telemetry: ${config.telemetryEnabled ? `${c.green}enabled${c.reset}` : `${c.dim}disabled${c.reset}`}`);
715
- console.log(` Logged in: ${c.dim}${config.createdAt}${c.reset}`);
716
- }
717
- else if (envKey) {
718
- const masked = envKey.slice(0, 8) + '...' + envKey.slice(-4);
719
- console.log(` API Key: ${c.green}${masked}${c.reset} ${c.dim}(from TRUTHGUARD_API_KEY)${c.reset}`);
720
- console.log(` Telemetry: ${process.env['TRUTHGUARD_TELEMETRY'] === '0' ? `${c.dim}disabled${c.reset}` : `${c.green}enabled${c.reset}`}`);
721
- }
722
- else {
723
- console.log(` ${c.dim}Not logged in. Run: truthguard login${c.reset}`);
724
- }
725
- console.log('');
726
- }
727
- function main(argv = process.argv) {
728
- const args = parseArgs(argv);
729
- // Handle --version / -v as flags (parseArgs puts them in flags, not command)
730
- if (args.flags['version'] || args.flags['v']) {
731
- console.log(`truthguard v${VERSION}`);
732
- return;
733
- }
734
- switch (args.command) {
735
- case 'login':
736
- cmdLogin(args).then(() => (0, Telemetry_1.shutdown)()).catch((err) => {
737
- console.error('Error:', err.message);
738
- process.exit(1);
739
- });
740
- break;
741
- case 'logout':
742
- cmdLogout();
743
- break;
744
- case 'status':
745
- cmdStatus();
746
- break;
747
- case 'demo':
748
- cmdDemo();
749
- break;
750
- case 'init':
751
- cmdInit(args);
752
- break;
753
- case 'mcp':
754
- cmdMcp();
755
- break;
756
- case 'debug':
757
- cmdDebug(args);
758
- (0, Telemetry_1.shutdown)().catch(() => { });
759
- break;
760
- case 'check-trace':
761
- cmdCheckTrace(args);
762
- break;
763
- case 'observe':
764
- cmdObserve(args).then(() => (0, Telemetry_1.shutdown)()).catch((err) => {
765
- console.error('Error:', err.message);
766
- process.exit(1);
767
- });
768
- break;
769
- case 'run':
770
- cmdRun(args);
771
- (0, Telemetry_1.shutdown)().catch(() => { });
772
- break;
773
- case 'baseline':
774
- if (args.subCommand === 'save') {
775
- cmdBaselineSave(args);
776
- }
777
- else if (args.subCommand === 'compare') {
778
- cmdBaselineCompare(args);
779
- }
780
- else {
781
- console.error(`Unknown baseline subcommand: ${args.subCommand}`);
782
- printHelp();
783
- process.exit(1);
784
- }
785
- break;
786
- case 'evaluate':
787
- cmdEvaluate(args);
788
- (0, Telemetry_1.shutdown)().catch(() => { });
789
- break;
790
- case 'help':
791
- case '--help':
792
- case '-h':
793
- case '':
794
- printHelp();
795
- break;
796
- default:
797
- console.error(`Unknown command: ${args.command}`);
798
- console.error(` ${c.cyan}→ Try: truthguard demo${c.reset}`);
799
- printHelp();
800
- process.exit(1);
801
- }
802
- }
803
- // Run when executed directly
804
- if (require.main === module) {
805
- main();
806
- }
807
- //# sourceMappingURL=index.js.map