truthguard-ai 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of truthguard-ai might be problematic. Click here for more details.

Files changed (53) hide show
  1. package/dist-npm/Claims/index.d.ts +73 -0
  2. package/dist-npm/Claims/index.d.ts.map +1 -0
  3. package/dist-npm/Claims/index.js +1669 -0
  4. package/dist-npm/Claims/index.js.map +1 -0
  5. package/dist-npm/Config/index.d.ts +41 -0
  6. package/dist-npm/Config/index.d.ts.map +1 -0
  7. package/dist-npm/Config/index.js +129 -0
  8. package/dist-npm/Config/index.js.map +1 -0
  9. package/dist-npm/Grounding/index.d.ts +40 -0
  10. package/dist-npm/Grounding/index.d.ts.map +1 -0
  11. package/dist-npm/Grounding/index.js +1433 -0
  12. package/dist-npm/Grounding/index.js.map +1 -0
  13. package/dist-npm/L2/index.d.ts +93 -0
  14. package/dist-npm/L2/index.d.ts.map +1 -0
  15. package/dist-npm/L2/index.js +1773 -0
  16. package/dist-npm/L2/index.js.map +1 -0
  17. package/dist-npm/Matchers/index.d.ts +101 -0
  18. package/dist-npm/Matchers/index.d.ts.map +1 -0
  19. package/dist-npm/Matchers/index.js +690 -0
  20. package/dist-npm/Matchers/index.js.map +1 -0
  21. package/dist-npm/Mode/index.d.ts +87 -0
  22. package/dist-npm/Mode/index.d.ts.map +1 -0
  23. package/dist-npm/Mode/index.js +117 -0
  24. package/dist-npm/Mode/index.js.map +1 -0
  25. package/dist-npm/Policy/index.d.ts +89 -0
  26. package/dist-npm/Policy/index.d.ts.map +1 -0
  27. package/dist-npm/Policy/index.js +143 -0
  28. package/dist-npm/Policy/index.js.map +1 -0
  29. package/dist-npm/Registry/index.d.ts +93 -0
  30. package/dist-npm/Registry/index.d.ts.map +1 -0
  31. package/dist-npm/Registry/index.js +818 -0
  32. package/dist-npm/Registry/index.js.map +1 -0
  33. package/dist-npm/Rules/index.d.ts +587 -0
  34. package/dist-npm/Rules/index.d.ts.map +1 -0
  35. package/dist-npm/Rules/index.js +6236 -0
  36. package/dist-npm/Rules/index.js.map +1 -0
  37. package/dist-npm/Rules/intents.d.ts +22 -0
  38. package/dist-npm/Rules/intents.d.ts.map +1 -0
  39. package/dist-npm/Rules/intents.js +242 -0
  40. package/dist-npm/Rules/intents.js.map +1 -0
  41. package/dist-npm/TraceReadiness/index.d.ts +42 -0
  42. package/dist-npm/TraceReadiness/index.d.ts.map +1 -0
  43. package/dist-npm/TraceReadiness/index.js +169 -0
  44. package/dist-npm/TraceReadiness/index.js.map +1 -0
  45. package/dist-npm/i18n/index.d.ts +44 -0
  46. package/dist-npm/i18n/index.d.ts.map +1 -0
  47. package/dist-npm/i18n/index.js +124 -0
  48. package/dist-npm/i18n/index.js.map +1 -0
  49. package/package.json +5 -17
  50. package/dist/cli/index.d.ts +0 -15
  51. package/dist/cli/index.d.ts.map +0 -1
  52. package/dist/cli/index.js +0 -807
  53. package/dist/cli/index.js.map +0 -1
@@ -0,0 +1,1433 @@
1
+ "use strict";
2
+ /**
3
+ * Grounding Engine
4
+ *
5
+ * Orchestrates the full grounding validation pipeline:
6
+ *
7
+ * 1. Extract claims from the final response.
8
+ * 2. For each claim, search tool outputs for a matching source value.
9
+ * 3. Produce a per-claim ClaimVerdict.
10
+ * 4. Run the grounding failure rules.
11
+ * 5. Return a GroundingReport.
12
+ */
13
+ Object.defineProperty(exports, "__esModule", { value: true });
14
+ exports.GroundingEngine = void 0;
15
+ const types_1 = require("../types");
16
+ const Config_1 = require("../Config");
17
+ const Claims_1 = require("../Claims");
18
+ const node_crypto_1 = require("node:crypto");
19
+ const Matchers_1 = require("../Matchers");
20
+ const Rules_1 = require("../Rules");
21
+ const Trace_1 = require("../Trace");
22
+ const Registry_1 = require("../Registry");
23
+ const TraceReadiness_1 = require("../TraceReadiness");
24
+ const L2_1 = require("../L2");
25
+ // ---------------------------------------------------------------------------
26
+ // GroundingEngine options
27
+ // ---------------------------------------------------------------------------
28
+ /** Fields whose string values are likely person/entity names. */
29
+ // Use (?:^|[_\W]) / (?:$|[_\W]) instead of \b so that underscore-separated
30
+ // keys like "employee_name" or "full_name" still match individual words.
31
+ const NAME_FIELD_RE = /(?:^|[_\W])(name|first_name|last_name|full_name|employee|agent|assignee|customer|user|patient|client|manager|supervisor|owner)(?:$|[_\W])/i;
32
+ /** Fields whose string values are likely dates/timestamps. */
33
+ const DATE_FIELD_RE = /(?:^|[_\W])(date|datum|created|updated|modified|timestamp|time|start|end|due|deadline|expir|birth|dob|hired|joined|scheduled|completed|resolved|closed|opened)(?:$|[_\W])/i;
34
+ /**
35
+ * Extract the leaf field name from a dotted key path.
36
+ * E.g. "tardiness_from_daily_stats.by_date.2026-03-15.people[0].name" → "name"
37
+ * This prevents parent path segments (like "by_date") from triggering
38
+ * field-type heuristics meant for the leaf key.
39
+ */
40
+ function leafFieldName(path) {
41
+ if (!path)
42
+ return undefined;
43
+ // Strip array indices, split on dots, take last segment
44
+ const clean = path.replace(/\[\d+\]/g, '');
45
+ const parts = clean.split('.');
46
+ return parts[parts.length - 1] || path;
47
+ }
48
+ // ---------------------------------------------------------------------------
49
+ // Error response detection — early exit for API failures (HTTP 529, etc.)
50
+ // ---------------------------------------------------------------------------
51
+ const ERROR_RESPONSE_PATTERNS = [
52
+ /^\[API Error\]/i,
53
+ /\b(?:HTTP|status)\s+[45]\d{2}\b/i,
54
+ /\b(?:overloaded|preoptere[cć]en|service unavailable)\b/i,
55
+ /\bAI servis.*(?:nije dostupan|preoptere[cć]en)\b/i,
56
+ /\b(?:server|service)\s+(?:is\s+)?(?:unavailable|down|error)\b/i,
57
+ ];
58
+ /** Check if a final response step is an error message rather than a real AI response. */
59
+ function isErrorResponse(step) {
60
+ // Explicit metadata flag set by buildErrorTrace / proxy
61
+ if (step.metadata?.apiError === true)
62
+ return true;
63
+ const content = (step.content ?? '').trim();
64
+ // Error responses may include long URLs / org IDs / headers, so allow up
65
+ // to 800 chars. The [API Error] prefix is our own tag so it's reliable.
66
+ if (content.length < 800 && ERROR_RESPONSE_PATTERNS.some((p) => p.test(content))) {
67
+ return true;
68
+ }
69
+ return false;
70
+ }
71
+ /** Build a DetectedFailure for ai_unavailable from an error step. */
72
+ function detectAIUnavailableFromStep(step) {
73
+ const content = (step.content ?? '').trim();
74
+ const statusCode = step.metadata?.statusCode;
75
+ const errorType = step.metadata?.errorType ?? 'unknown';
76
+ return {
77
+ type: 'orchestration.ai_unavailable',
78
+ description: statusCode
79
+ ? `AI service returned HTTP ${statusCode} error instead of a response.`
80
+ : `AI service error: ${content.substring(0, 120)}`,
81
+ confidence: 'high',
82
+ severity: 'critical',
83
+ role: 'primary',
84
+ claimIds: [],
85
+ diagnosis: `The AI provider did not return a usable response (${errorType}). No claims can be extracted or verified.`,
86
+ suggestedFix: 'Retry the request. If persistent, check provider status page or switch to a fallback model.',
87
+ };
88
+ }
89
+ /**
90
+ * Extract single-word proper names from tool outputs that are mentioned
91
+ * in the response but were not captured by the 2+-word NAME_REGEX in Claims.
92
+ * This is a context-aware pass: we only create name claims for words that
93
+ * actually exist as person-name values in tool data.
94
+ */
95
+ function extractToolContextNameClaims(trace, responseText, stepId, existingClaims) {
96
+ const toolSteps = Trace_1.TraceUtils.getToolOutputSteps(trace);
97
+ if (!toolSteps.length)
98
+ return [];
99
+ // Collect candidate name strings from name-like fields
100
+ const nameStrings = new Set();
101
+ const fullNameStrings = new Set();
102
+ for (const step of toolSteps) {
103
+ for (const to of step.toolOutputs ?? []) {
104
+ collectNameStrings(to.output, nameStrings, fullNameStrings);
105
+ }
106
+ }
107
+ if (nameStrings.size === 0)
108
+ return [];
109
+ // Already-claimed name values (lowercase)
110
+ const existingNames = new Set(existingClaims
111
+ .filter((c) => c.type === 'name')
112
+ .flatMap((c) => {
113
+ const v = String(c.value).toLowerCase();
114
+ return [v, ...v.split(/\s+/)];
115
+ }));
116
+ // Build per-word ambiguity count: how many distinct full names contain each word.
117
+ // Words that appear in 2+ different full names (e.g. shared surnames like "Tasić"
118
+ // in "Filip Tasić" and "Slađana Tasić") are ambiguous and should NOT become
119
+ // standalone claims — they could match the wrong person.
120
+ const wordFullNameCount = new Map();
121
+ for (const fullName of fullNameStrings) {
122
+ const tokens = fullName.toLowerCase().split(/\s+/);
123
+ for (const t of tokens) {
124
+ wordFullNameCount.set(t, (wordFullNameCount.get(t) ?? 0) + 1);
125
+ }
126
+ }
127
+ const lowResp = responseText.toLowerCase();
128
+ const claims = [];
129
+ for (const name of nameStrings) {
130
+ const low = name.toLowerCase();
131
+ // Skip if already covered by an existing claim
132
+ if (existingNames.has(low))
133
+ continue;
134
+ // Must be a single word (multi-word names are caught by NAME_REGEX)
135
+ if (name.includes(' '))
136
+ continue;
137
+ // Must be at least 3 chars to avoid matching "Mr", "Dr" etc.
138
+ if (name.length < 3)
139
+ continue;
140
+ // Skip words that appear in 2+ different full names (ambiguous — shared surname)
141
+ if ((wordFullNameCount.get(low) ?? 0) > 1)
142
+ continue;
143
+ // Check if the name appears in the response as a whole word
144
+ const re = new RegExp(`\\b${escapeRegExp(low)}\\b`, 'i');
145
+ if (!re.test(lowResp))
146
+ continue;
147
+ // Find the raw match position for rawText
148
+ const match = new RegExp(`\\b${escapeRegExp(name)}\\b`, 'i').exec(responseText);
149
+ const rawText = match ? match[0] : name;
150
+ claims.push({
151
+ claimId: (0, node_crypto_1.randomUUID)(),
152
+ type: 'name',
153
+ value: rawText,
154
+ rawText,
155
+ source: {
156
+ stepId,
157
+ role: 'final_response',
158
+ rawText,
159
+ },
160
+ });
161
+ existingNames.add(low);
162
+ }
163
+ return claims;
164
+ }
165
+ function collectNameStrings(output, names, fullNames, key) {
166
+ if (output === null || output === undefined)
167
+ return;
168
+ if (typeof output === 'string' && key && NAME_FIELD_RE.test(key)) {
169
+ // Split compound names: "Ana Jović" → ["Ana Jović", "Ana", "Jović"]
170
+ const trimmed = output.trim();
171
+ if (trimmed.length >= 2 && trimmed.length < 100) {
172
+ // Track the full name string for ambiguity detection
173
+ if (trimmed.includes(' '))
174
+ fullNames.add(trimmed);
175
+ const words = trimmed.split(/\s+/);
176
+ for (const w of words) {
177
+ if (w.length >= 2 && /^[\p{Lu}]/u.test(w))
178
+ names.add(w);
179
+ }
180
+ }
181
+ }
182
+ else if (Array.isArray(output)) {
183
+ output.forEach((item) => collectNameStrings(item, names, fullNames, key));
184
+ }
185
+ else if (typeof output === 'object') {
186
+ for (const [k, val] of Object.entries(output)) {
187
+ collectNameStrings(val, names, fullNames, k);
188
+ }
189
+ }
190
+ }
191
+ function escapeRegExp(s) {
192
+ return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
193
+ }
194
+ // ---------------------------------------------------------------------------
195
+ // Verdict builder helpers
196
+ // ---------------------------------------------------------------------------
197
+ function buildUnverifiable(claim, reason) {
198
+ return {
199
+ claim,
200
+ verdict: 'UNVERIFIABLE',
201
+ explanation: reason,
202
+ ruleUsed: 'unverifiable',
203
+ };
204
+ }
205
+ /**
206
+ * Compute severity based on claim type and verdict.
207
+ *
208
+ * - HIGH: UNGROUNDED count or date (exact match required, any deviation is serious)
209
+ * - MEDIUM: UNGROUNDED number (tolerance-based, deviation is meaningful)
210
+ * - LOW: APPROXIMATE_MATCH, or UNGROUNDED name
211
+ */
212
+ function computeSeverity(v) {
213
+ if (v.verdict === 'GROUNDED')
214
+ return 'LOW';
215
+ if (v.verdict === 'APPROXIMATE_MATCH')
216
+ return 'LOW';
217
+ if (v.verdict === 'UNVERIFIABLE' || v.verdict === 'MULTI_STEP')
218
+ return 'LOW';
219
+ // UNGROUNDED
220
+ if (v.claim.type === 'count' || v.claim.type === 'date')
221
+ return 'HIGH';
222
+ if (v.claim.type === 'number')
223
+ return 'MEDIUM';
224
+ return 'MEDIUM'; // name
225
+ }
226
+ // ---------------------------------------------------------------------------
227
+ // Empty-result output detection
228
+ // ---------------------------------------------------------------------------
229
+ /**
230
+ * Detect tool outputs that represent "no data found" results —
231
+ * e.g. {total: 0, data: []} or {count: 0, results: []}.
232
+ * These should NOT count as "substantive data" for scoring purposes.
233
+ */
234
+ function isEmptyResultOutput(output) {
235
+ if (output === null || output === undefined)
236
+ return true;
237
+ if (Array.isArray(output) && output.length === 0)
238
+ return true;
239
+ if (typeof output === 'object' && !Array.isArray(output)) {
240
+ const obj = output;
241
+ const keys = Object.keys(obj);
242
+ if (keys.length === 0)
243
+ return true;
244
+ const vals = Object.values(obj);
245
+ // {total: 0, data: []} pattern
246
+ const countKeys = ['total', 'count', 'length', 'total_count', 'totalCount'];
247
+ const dataKeys = ['data', 'results', 'items', 'records', 'rows', 'employees', 'entries'];
248
+ const hasZeroCount = countKeys.some((k) => obj[k] === 0);
249
+ const hasEmptyArray = dataKeys.some((k) => Array.isArray(obj[k]) && obj[k].length === 0);
250
+ if (hasZeroCount && hasEmptyArray)
251
+ return true;
252
+ // All values are zero, null, empty string, or empty array
253
+ if (vals.every((v) => v === 0 || v === null || v === undefined || v === '' ||
254
+ (Array.isArray(v) && v.length === 0)))
255
+ return true;
256
+ }
257
+ return false;
258
+ }
259
+ /**
260
+ * Walk a JSON record and check if any name-like field matches the given entity.
261
+ */
262
+ function recordBelongsToEntity(record, entityName) {
263
+ if (record === null || record === undefined || typeof record !== 'object')
264
+ return false;
265
+ if (Array.isArray(record))
266
+ return false;
267
+ for (const [key, val] of Object.entries(record)) {
268
+ if (typeof val === 'string' && NAME_FIELD_RE.test(key)) {
269
+ if ((0, Claims_1.entitiesMatch)(val, entityName))
270
+ return true;
271
+ }
272
+ }
273
+ return false;
274
+ }
275
+ /**
276
+ * Extract SourceEntries from tool outputs, filtered to only records
277
+ * that belong to the given entity. Used for entity-aware grounding.
278
+ */
279
+ function extractEntityScopedValues(trace, entityName, claimType) {
280
+ const entries = [];
281
+ const toolOutputSteps = Trace_1.TraceUtils.getToolOutputSteps(trace);
282
+ for (const step of toolOutputSteps) {
283
+ for (const to of step.toolOutputs ?? []) {
284
+ const output = to.output;
285
+ const records = [];
286
+ if (Array.isArray(output)) {
287
+ for (const item of output) {
288
+ if (recordBelongsToEntity(item, entityName))
289
+ records.push(item);
290
+ }
291
+ }
292
+ else if (typeof output === 'object' && output !== null) {
293
+ if (recordBelongsToEntity(output, entityName))
294
+ records.push(output);
295
+ }
296
+ for (const record of records) {
297
+ if (claimType === 'count') {
298
+ const cnt = (0, Matchers_1.extractCountFromOutput)(record);
299
+ if (cnt !== null) {
300
+ entries.push({ stepId: step.stepId, value: cnt, toolName: to.toolName });
301
+ }
302
+ }
303
+ const vals = (0, Matchers_1.extractValuesWithKeys)(record);
304
+ for (const v of vals) {
305
+ entries.push({
306
+ stepId: step.stepId,
307
+ value: v.value,
308
+ toolName: to.toolName,
309
+ fieldName: v.fieldName,
310
+ });
311
+ }
312
+ }
313
+ }
314
+ }
315
+ return entries;
316
+ }
317
+ /**
318
+ * Extract grounding-relevant values from the system prompt step.
319
+ * The system prompt is free text, so we use the claim extractor to parse
320
+ * individual numbers, names, and dates from it — then expose their values
321
+ * as potential grounding sources.
322
+ */
323
+ function extractSystemPromptValues(trace) {
324
+ const sysStep = Trace_1.TraceUtils.getSystemPrompt(trace);
325
+ if (!sysStep)
326
+ return [];
327
+ const claims = (0, Claims_1.extractClaims)(sysStep.content, {
328
+ sourceStepId: sysStep.stepId,
329
+ sourceRole: 'system_prompt',
330
+ });
331
+ return claims.map((c) => ({
332
+ stepId: sysStep.stepId,
333
+ value: c.value,
334
+ toolName: 'system_prompt',
335
+ }));
336
+ }
337
+ /**
338
+ * Extract grounding values from conversation history (previous turns).
339
+ *
340
+ * In proxy mode, each HTTP request is a separate trace. The AI sends the full
341
+ * conversation history in messages, but tool outputs from earlier proxy
342
+ * requests are lost. When the AI references data from a previous turn
343
+ * (e.g., correcting a user's claim about "6738 hours"), those values exist
344
+ * only in prior user_input and final_response steps.
345
+ *
346
+ * This function extracts numeric and text values from all user_input and
347
+ * prior final_response steps so they can be used as grounding sources.
348
+ */
349
+ function extractConversationHistoryValues(trace) {
350
+ const entries = [];
351
+ const steps = trace.steps ?? [];
352
+ // Find the last final_response — we don't extract from the active response
353
+ // (that's the text we're verifying, not a grounding source).
354
+ const finalResponseIndices = [];
355
+ for (let i = 0; i < steps.length; i++) {
356
+ if (steps[i].role === 'final_response')
357
+ finalResponseIndices.push(i);
358
+ }
359
+ const lastFinalIdx = finalResponseIndices.length > 0
360
+ ? finalResponseIndices[finalResponseIndices.length - 1]
361
+ : steps.length;
362
+ for (let i = 0; i < steps.length; i++) {
363
+ const step = steps[i];
364
+ // Include prior final_response steps only (not the last one being verified).
365
+ // We intentionally skip user_input steps — grounding against the user's
366
+ // own question would create circular "user_input_echo" matches.
367
+ if (step.role !== 'final_response')
368
+ continue;
369
+ if (i >= lastFinalIdx)
370
+ continue;
371
+ if (!step.content)
372
+ continue;
373
+ const claims = (0, Claims_1.extractClaims)(step.content, {
374
+ sourceStepId: step.stepId,
375
+ sourceRole: step.role,
376
+ });
377
+ for (const c of claims) {
378
+ entries.push({
379
+ stepId: step.stepId,
380
+ value: c.value,
381
+ toolName: `conversation_${step.role}`,
382
+ });
383
+ }
384
+ }
385
+ return entries;
386
+ }
387
+ // ---------------------------------------------------------------------------
388
+ // Text-level date search fallback
389
+ // ---------------------------------------------------------------------------
390
+ /**
391
+ * Search for an ISO date (YYYY-MM-DD) in the full text of all tool outputs.
392
+ * Also checks European (DD.MM.YYYY) and slash (MM/DD/YYYY) variants.
393
+ * Returns an explanation string if found, null otherwise.
394
+ */
395
+ function dateTextFallback(isoValue, toolOutputSteps) {
396
+ const isoDate = isoValue.substring(0, 10);
397
+ if (!/^\d{4}-\d{2}-\d{2}$/.test(isoDate))
398
+ return null;
399
+ // Build full text from all tool output content and serialized output objects
400
+ const parts = [];
401
+ for (const step of toolOutputSteps) {
402
+ if (step.content)
403
+ parts.push(step.content);
404
+ for (const to of step.toolOutputs ?? []) {
405
+ if (typeof to.output === 'string') {
406
+ parts.push(to.output);
407
+ }
408
+ else if (to.output != null) {
409
+ parts.push(JSON.stringify(to.output));
410
+ }
411
+ }
412
+ }
413
+ const fullText = parts.join(' ');
414
+ // 1. ISO format (2026-03-01)
415
+ if (fullText.includes(isoDate)) {
416
+ return `Date "${isoDate}" found in tool output text.`;
417
+ }
418
+ const [y, m, d] = isoDate.split('-');
419
+ // 2. European DD.MM.YYYY (01.03.2026 or 1.3.2026)
420
+ const euFull = `${d}.${m}.${y}`;
421
+ const euShort = `${parseInt(d)}.${parseInt(m)}.${y}`;
422
+ if (fullText.includes(euFull) || fullText.includes(euShort)) {
423
+ const fmt = fullText.includes(euFull) ? euFull : euShort;
424
+ return `Date "${isoDate}" found as "${fmt}" in tool output text.`;
425
+ }
426
+ // 3. Slash MM/DD/YYYY
427
+ const slashMDY = `${m}/${d}/${y}`;
428
+ if (fullText.includes(slashMDY)) {
429
+ return `Date "${isoDate}" found as "${slashMDY}" in tool output text.`;
430
+ }
431
+ // 4. Extract all date-like patterns from text and try parsing them
432
+ const datePatterns = fullText.match(/\b\d{1,2}[.\-/]\d{1,2}[.\-/]\d{2,4}\b/g);
433
+ if (datePatterns) {
434
+ for (const dp of datePatterns) {
435
+ const parsed = (0, Claims_1.tryParseDate)(dp);
436
+ if (parsed && parsed.substring(0, 10) === isoDate) {
437
+ return `Date "${isoDate}" found as "${dp}" in tool output text.`;
438
+ }
439
+ }
440
+ }
441
+ return null;
442
+ }
443
+ // ---------------------------------------------------------------------------
444
+ // Per-claim grounding logic
445
+ // ---------------------------------------------------------------------------
446
+ /**
447
+ * Check whether a date claim can be grounded via user input echo, relative
448
+ * date word resolution, or tool call parameter context — i.e. the date is
449
+ * "contextual" rather than a factual data claim from tool output.
450
+ *
451
+ * Returns a ClaimVerdict if the claim is contextually grounded, or null.
452
+ */
453
+ function tryDateContextGrounding(claim, trace) {
454
+ if (claim.type !== 'date' || typeof claim.value !== 'string')
455
+ return null;
456
+ const userInputSteps = (trace.steps ?? []).filter((s) => s.role === 'user_input');
457
+ const claimDateLower = claim.value.toLowerCase();
458
+ const claimRawLower = (claim.rawText ?? '').toLowerCase();
459
+ // Check 1: literal match in user input
460
+ const isEchoedFromUser = userInputSteps.some((s) => {
461
+ const uLower = (s.content ?? '').toLowerCase();
462
+ return ((claimRawLower && uLower.includes(claimRawLower)) ||
463
+ uLower.includes(claimDateLower));
464
+ });
465
+ // Check 2: relative date words in user input resolve to the claim date
466
+ const RELATIVE_TODAY = /\b(danas|today)\b/i;
467
+ const RELATIVE_YESTERDAY = /\b(ju[čc]e|yesterday)\b/i;
468
+ const RELATIVE_TOMORROW = /\b(sutra|tomorrow)\b/i;
469
+ const traceDate = trace.startedAt ? new Date(trace.startedAt) : new Date();
470
+ const toISODay = (d) => d.toISOString().substring(0, 10);
471
+ const todayISO = toISODay(traceDate);
472
+ const yesterdayD = new Date(traceDate);
473
+ yesterdayD.setDate(yesterdayD.getDate() - 1);
474
+ const tomorrowD = new Date(traceDate);
475
+ tomorrowD.setDate(tomorrowD.getDate() + 1);
476
+ let isRelativeDateMatch = false;
477
+ const allUserText = userInputSteps.map((s) => s.content ?? '').join(' ');
478
+ // Check both user input AND claim rawText for relative date words
479
+ const textToCheck = allUserText + ' ' + claimRawLower;
480
+ if (RELATIVE_TODAY.test(textToCheck) && claimDateLower === todayISO) {
481
+ isRelativeDateMatch = true;
482
+ }
483
+ else if (RELATIVE_YESTERDAY.test(textToCheck) && claimDateLower === toISODay(yesterdayD)) {
484
+ isRelativeDateMatch = true;
485
+ }
486
+ else if (RELATIVE_TOMORROW.test(textToCheck) && claimDateLower === toISODay(tomorrowD)) {
487
+ isRelativeDateMatch = true;
488
+ }
489
+ // Check 3: date appears in tool_call parameters (query context dates)
490
+ const toolCallSteps = (trace.steps ?? []).filter((s) => s.role === 'tool_call');
491
+ const isInToolCallParams = toolCallSteps.some((s) => (s.toolCalls ?? []).some((tc) => {
492
+ const paramStr = JSON.stringify(tc.parameters ?? {}).toLowerCase();
493
+ return paramStr.includes(claimDateLower);
494
+ }));
495
+ // Check 4: period-reference patterns — user says "first half of March" and
496
+ // the AI echoes "1-15. mart" → the 15th is a logical derivation, not a data claim.
497
+ let isPeriodRangeEcho = false;
498
+ if (!isEchoedFromUser && !isRelativeDateMatch && !isInToolCallParams) {
499
+ // Patterns that imply a date-range boundary the AI would derive
500
+ const FIRST_HALF = /\b(prv[aueoi]\s+polovin[aueoi]|first\s+half)\b/i;
501
+ const SECOND_HALF = /\b(drug[aueoi]\s+polovin[aueoi]|second\s+half)\b/i;
502
+ const FIRST_WEEK = /\b(prv[aueoi]\s+nedelj[aueoi]|prv[aueoi]\s+sedmic[aueoi]|first\s+week)\b/i;
503
+ const claimDay = parseInt(claimDateLower.split('-')[2], 10);
504
+ if (FIRST_HALF.test(allUserText) && (claimDay === 15 || claimDay === 1)) {
505
+ isPeriodRangeEcho = true;
506
+ }
507
+ else if (SECOND_HALF.test(allUserText) && (claimDay === 16 || claimDay >= 28)) {
508
+ isPeriodRangeEcho = true;
509
+ }
510
+ else if (FIRST_WEEK.test(allUserText) && (claimDay === 7 || claimDay === 1)) {
511
+ isPeriodRangeEcho = true;
512
+ }
513
+ }
514
+ if (isEchoedFromUser || isRelativeDateMatch || isInToolCallParams || isPeriodRangeEcho) {
515
+ const reason = isEchoedFromUser
516
+ ? 'appears in the user\'s question'
517
+ : isRelativeDateMatch
518
+ ? 'is a relative date word that resolves to the correct date'
519
+ : isInToolCallParams
520
+ ? 'appears in tool call parameters (query context)'
521
+ : 'is a period boundary derived from the user\'s time-range request';
522
+ const v = {
523
+ claim,
524
+ verdict: 'GROUNDED',
525
+ ruleUsed: 'user_input_echo',
526
+ explanation: `Date "${claim.rawText ?? claim.value}" ${reason} — ` +
527
+ `AI is echoing context, not making a data claim.`,
528
+ };
529
+ v.severity = computeSeverity(v);
530
+ return v;
531
+ }
532
+ return null;
533
+ }
534
+ /**
535
+ * If a name claim appears (wholly or partially) in user input, the AI is
536
+ * echoing the user's context rather than hallucinating. Return GROUNDED
537
+ * so these don't penalise the score when there's no tool data to verify.
538
+ */
539
+ function tryNameContextGrounding(claim, trace) {
540
+ if (claim.type !== 'name' || typeof claim.value !== 'string')
541
+ return null;
542
+ const userInputSteps = (trace.steps ?? []).filter((s) => s.role === 'user_input');
543
+ if (userInputSteps.length === 0)
544
+ return null;
545
+ const allUserText = (0, Claims_1.normalizeDiacritics)(userInputSteps.map((s) => (s.content ?? '').toLowerCase()).join(' '));
546
+ const nameWords = (0, Claims_1.normalizeDiacritics)(claim.value.toLowerCase()).split(/\s+/);
547
+ // At least one substantial word of the name (>2 chars) must appear in user input
548
+ const matchCount = nameWords.filter((w) => w.length > 2 && allUserText.includes(w)).length;
549
+ if (matchCount === 0)
550
+ return null;
551
+ const v = {
552
+ claim,
553
+ verdict: 'GROUNDED',
554
+ ruleUsed: 'user_input_echo',
555
+ explanation: `Name "${claim.value}" appears in the user's conversation context — ` +
556
+ `AI is referencing a name the user mentioned, not fabricating one.`,
557
+ };
558
+ v.severity = computeSeverity(v);
559
+ return v;
560
+ }
561
+ /**
562
+ * Try to ground a single claim against all available tool outputs.
563
+ * Returns the best ClaimVerdict found.
564
+ */
565
+ function groundClaim(claim, trace, tolerances) {
566
+ const toolOutputSteps = Trace_1.TraceUtils.getToolOutputSteps(trace);
567
+ // Also collect values from system prompt as a secondary grounding source
568
+ const systemPromptValues = extractSystemPromptValues(trace);
569
+ // Cross-turn grounding: extract values from prior conversation turns
570
+ // (user_input + previous final_response steps) so the AI can reference
571
+ // data discussed earlier without being penalised.
572
+ const conversationValues = extractConversationHistoryValues(trace);
573
+ if (toolOutputSteps.length === 0 && systemPromptValues.length === 0 && conversationValues.length === 0) {
574
+ return buildUnverifiable(claim, 'No tool outputs or system prompt found in trace.');
575
+ }
576
+ // Collect all (stepId, value) pairs from tool outputs
577
+ const allValues = [];
578
+ for (const step of toolOutputSteps) {
579
+ for (const to of step.toolOutputs ?? []) {
580
+ // For count claims, also extract the count of array outputs
581
+ if (claim.type === 'count') {
582
+ const cnt = (0, Matchers_1.extractCountFromOutput)(to.output);
583
+ if (cnt !== null) {
584
+ allValues.push({ stepId: step.stepId, value: cnt, toolName: to.toolName });
585
+ }
586
+ }
587
+ const vals = (0, Matchers_1.extractValuesWithKeys)(to.output);
588
+ for (const v of vals) {
589
+ allValues.push({
590
+ stepId: step.stepId,
591
+ value: v.value,
592
+ toolName: to.toolName,
593
+ fieldName: v.fieldName,
594
+ });
595
+ }
596
+ }
597
+ }
598
+ // Add system prompt values as secondary source
599
+ for (const entry of systemPromptValues) {
600
+ allValues.push(entry);
601
+ }
602
+ // Add conversation history values as tertiary source (cross-turn data)
603
+ for (const entry of conversationValues) {
604
+ allValues.push(entry);
605
+ }
606
+ // -------------------------------------------------------------------------
607
+ // L2 fast-path: structured claims carry sourceFieldValue from extraction
608
+ // -------------------------------------------------------------------------
609
+ if (claim.sourceFieldValue !== undefined) {
610
+ let l2Result = null;
611
+ let ruleUsed = '';
612
+ if (claim.type === 'boolean') {
613
+ l2Result = (0, L2_1.matchBoolean)(claim.value, claim.sourceFieldValue);
614
+ ruleUsed = 'boolean_match';
615
+ }
616
+ else if (claim.type === 'enum') {
617
+ l2Result = (0, L2_1.matchEnum)(claim.value, claim.sourceFieldValue);
618
+ ruleUsed = 'enum_match';
619
+ }
620
+ else if (claim.type === 'list_items' && claim.expectedItems && claim.mentionedItems) {
621
+ l2Result = (0, L2_1.matchListItems)(claim.expectedItems, claim.mentionedItems);
622
+ ruleUsed = 'list_items_match';
623
+ }
624
+ else if (claim.type === 'key_value') {
625
+ l2Result = (0, L2_1.matchKeyValue)(claim.value, claim.sourceFieldValue);
626
+ ruleUsed = 'key_value_match';
627
+ }
628
+ else if (claim.type === 'aggregation' && claim.computedValue !== undefined) {
629
+ l2Result = (0, L2_1.matchAggregation)(claim.value, claim.computedValue, claim.aggregationOp ?? 'sum');
630
+ ruleUsed = 'aggregation_match';
631
+ }
632
+ else if (claim.type === 'range') {
633
+ l2Result = (0, L2_1.matchRange)(claim.value, claim.sourceFieldValue);
634
+ ruleUsed = 'range_match';
635
+ }
636
+ if (l2Result) {
637
+ let verdictType = l2Result.matched ? 'GROUNDED' : 'UNGROUNDED';
638
+ let explanation = l2Result.explanation;
639
+ // Sibling-scope: weak_scope claims that fail grounding get downgraded
640
+ // to APPROXIMATE_MATCH with LOW severity — prevents false positives from
641
+ // sibling groups that only have a single incidental mention.
642
+ if (!l2Result.matched && claim.siblingScope === 'weak_scope') {
643
+ verdictType = 'APPROXIMATE_MATCH';
644
+ explanation += ` [weak_scope sibling — penalty suppressed; family: ${claim.siblingFamilyPattern ?? 'unknown'}]`;
645
+ }
646
+ const verdict = {
647
+ claim,
648
+ verdict: verdictType,
649
+ sourceValue: claim.sourceFieldValue,
650
+ sourceStepId: claim.source.stepId,
651
+ ruleUsed,
652
+ explanation,
653
+ };
654
+ verdict.severity = computeSeverity(verdict);
655
+ // Force LOW severity for weak_scope downgrades
656
+ if (claim.siblingScope === 'weak_scope' && verdictType === 'APPROXIMATE_MATCH') {
657
+ verdict.severity = 'LOW';
658
+ }
659
+ return verdict;
660
+ }
661
+ }
662
+ if (allValues.length === 0) {
663
+ // Check if tool outputs are empty ([], null, {}) — this means the claim
664
+ // has no grounding source and is fabricated, not just unverifiable.
665
+ const allOutputsEmpty = toolOutputSteps.every((step) => (step.toolOutputs ?? []).every((to) => {
666
+ const o = to.output;
667
+ if (o === null || o === undefined)
668
+ return true;
669
+ if (Array.isArray(o) && o.length === 0)
670
+ return true;
671
+ if (typeof o === 'string' && o.trim() === '')
672
+ return true;
673
+ if (typeof o === 'object' && !Array.isArray(o) && Object.keys(o).length === 0)
674
+ return true;
675
+ return false;
676
+ }));
677
+ if (allOutputsEmpty) {
678
+ // Before declaring UNGROUNDED, check if a date/name claim can be grounded
679
+ // via user-question echo or tool call parameter context.
680
+ const ctxVerdict = tryDateContextGrounding(claim, trace)
681
+ ?? tryNameContextGrounding(claim, trace);
682
+ if (ctxVerdict)
683
+ return ctxVerdict;
684
+ const v = {
685
+ claim,
686
+ verdict: 'UNGROUNDED',
687
+ explanation: 'All tool outputs are empty — claim has no grounding source.',
688
+ ruleUsed: 'empty_output',
689
+ };
690
+ v.severity = computeSeverity(v);
691
+ return v;
692
+ }
693
+ return buildUnverifiable(claim, 'Tool outputs contain no extractable values.');
694
+ }
695
+ // -------------------------------------------------------------------------
696
+ // Entity-aware grounding: when claim has an entity, prefer values from
697
+ // the tool output record belonging to that entity. This prevents
698
+ // cross-entity false groundings (e.g. "Ana: 11 lates" matching Gordana's 11).
699
+ // -------------------------------------------------------------------------
700
+ let valuesToSearch = allValues;
701
+ if (claim.entity) {
702
+ const entityScoped = extractEntityScopedValues(trace, claim.entity, claim.type);
703
+ if (entityScoped.length > 0) {
704
+ valuesToSearch = entityScoped;
705
+ }
706
+ }
707
+ // Try to find the best match among extracted values
708
+ let bestVerdict = null;
709
+ let bestScore = -Infinity;
710
+ for (const entry of valuesToSearch) {
711
+ const { stepId, value: sourceValue } = entry;
712
+ let verdict = 'UNGROUNDED';
713
+ let deviation;
714
+ let similarity;
715
+ let explanation = '';
716
+ let ruleUsed = '';
717
+ if (claim.type === 'number' && typeof sourceValue === 'number') {
718
+ // Try unit-aware matching first
719
+ const sourceUnit = (0, Matchers_1.inferUnitFromFieldName)(entry.fieldName);
720
+ const res = (0, Matchers_1.matchNumericWithUnits)(claim.value, claim.unit, sourceValue, sourceUnit, tolerances);
721
+ deviation = res.deviation;
722
+ explanation = res.explanation;
723
+ ruleUsed = 'numeric_match';
724
+ if (res.matched) {
725
+ verdict = deviation !== undefined && deviation === 0 ? 'GROUNDED' : 'APPROXIMATE_MATCH';
726
+ }
727
+ }
728
+ else if (claim.type === 'count' && typeof sourceValue === 'number') {
729
+ const res = (0, Matchers_1.matchCount)(claim.value, sourceValue, tolerances);
730
+ deviation = res.deviation;
731
+ explanation = res.explanation;
732
+ ruleUsed = 'count_match';
733
+ if (res.matched)
734
+ verdict = 'GROUNDED';
735
+ }
736
+ else if (claim.type === 'date' && typeof sourceValue === 'string') {
737
+ // Don't compare date claims against name-like fields (e.g. "Filip Tasić")
738
+ const dateLeaf = leafFieldName(entry.fieldName);
739
+ if (dateLeaf && NAME_FIELD_RE.test(dateLeaf))
740
+ continue;
741
+ const res = (0, Matchers_1.matchDate)(claim.value, sourceValue, tolerances);
742
+ explanation = res.explanation;
743
+ ruleUsed = 'date_match';
744
+ if (res.matched)
745
+ verdict = 'GROUNDED';
746
+ }
747
+ else if (claim.type === 'name' && typeof sourceValue === 'string') {
748
+ // Don't compare name claims against date-like fields (e.g. "2026-03-22")
749
+ const nameLeaf = leafFieldName(entry.fieldName);
750
+ if (nameLeaf && DATE_FIELD_RE.test(nameLeaf))
751
+ continue;
752
+ const res = (0, Matchers_1.matchName)(claim.value, sourceValue, tolerances);
753
+ similarity = res.similarity;
754
+ explanation = res.explanation;
755
+ ruleUsed = 'name_fuzzy_match';
756
+ if (res.matched)
757
+ verdict = 'GROUNDED';
758
+ }
759
+ else {
760
+ // Type mismatch between claim and source value — skip
761
+ continue;
762
+ }
763
+ // Score: GROUNDED=3, APPROXIMATE_MATCH=2, UNGROUNDED with low deviation=1
764
+ let score = 0;
765
+ if (verdict === 'GROUNDED')
766
+ score = 3;
767
+ else if (verdict === 'APPROXIMATE_MATCH')
768
+ score = 2;
769
+ else if (verdict === 'UNGROUNDED' && deviation !== undefined)
770
+ score = 1 - deviation;
771
+ else if (verdict === 'UNGROUNDED' && similarity !== undefined)
772
+ score = similarity;
773
+ if (score > bestScore) {
774
+ bestScore = score;
775
+ bestVerdict = {
776
+ claim,
777
+ verdict,
778
+ sourceValue,
779
+ sourceStepId: stepId,
780
+ deviation,
781
+ ruleUsed,
782
+ explanation,
783
+ };
784
+ }
785
+ }
786
+ if (bestVerdict === null) {
787
+ // -----------------------------------------------------------------------
788
+ // Text-level date search fallback: when no value of compatible type was
789
+ // found (e.g. tool output is a prose string), search the full tool output
790
+ // text for the ISO date string or common format variants.
791
+ // -----------------------------------------------------------------------
792
+ if (claim.type === 'date' && typeof claim.value === 'string') {
793
+ const textMatch = dateTextFallback(claim.value, toolOutputSteps);
794
+ if (textMatch) {
795
+ const v = {
796
+ claim,
797
+ verdict: 'GROUNDED',
798
+ ruleUsed: 'date_text_search',
799
+ explanation: textMatch,
800
+ };
801
+ v.severity = computeSeverity(v);
802
+ return v;
803
+ }
804
+ }
805
+ return buildUnverifiable(claim, `No tool output value of compatible type (${claim.type}) found.`);
806
+ }
807
+ // -------------------------------------------------------------------------
808
+ // Text-level date search fallback: if date claim is UNGROUNDED after the
809
+ // per-value matching loop, search full tool output text for the date.
810
+ // -------------------------------------------------------------------------
811
+ if (bestVerdict.verdict === 'UNGROUNDED' &&
812
+ claim.type === 'date' &&
813
+ typeof claim.value === 'string') {
814
+ const textMatch = dateTextFallback(claim.value, toolOutputSteps);
815
+ if (textMatch) {
816
+ bestVerdict.verdict = 'GROUNDED';
817
+ bestVerdict.ruleUsed = 'date_text_search';
818
+ bestVerdict.explanation = textMatch;
819
+ }
820
+ }
821
+ // Transposed digits detection: if a numeric claim is UNGROUNDED and its
822
+ // digits are a transposition of the source, flag as APPROXIMATE_MATCH
823
+ // with a specific explanation. This catches common LLM copy errors (23→32).
824
+ if (bestVerdict.verdict === 'UNGROUNDED' &&
825
+ (claim.type === 'number' || claim.type === 'count') &&
826
+ typeof bestVerdict.sourceValue === 'number' &&
827
+ (0, Matchers_1.isTransposedDigits)(claim.value, bestVerdict.sourceValue)) {
828
+ bestVerdict.verdict = 'APPROXIMATE_MATCH';
829
+ bestVerdict.ruleUsed = 'transposed_digits';
830
+ bestVerdict.explanation =
831
+ `${claim.value} appears to be a digit transposition of ${bestVerdict.sourceValue} (swapped adjacent digits).`;
832
+ }
833
+ // ---------------------------------------------------------------------------
834
+ // User-input echo detection: if a date or name claim is UNGROUNDED but
835
+ // appears in a user_input step, the AI is echoing the user's context —
836
+ // not a factual claim from tool data. Treat as GROUNDED.
837
+ // ---------------------------------------------------------------------------
838
+ if (bestVerdict.verdict === 'UNGROUNDED' &&
839
+ claim.type === 'date' &&
840
+ typeof claim.value === 'string') {
841
+ const ctxVerdict = tryDateContextGrounding(claim, trace);
842
+ if (ctxVerdict) {
843
+ bestVerdict.verdict = ctxVerdict.verdict;
844
+ bestVerdict.ruleUsed = ctxVerdict.ruleUsed;
845
+ bestVerdict.explanation = ctxVerdict.explanation;
846
+ }
847
+ }
848
+ if (bestVerdict.verdict === 'UNGROUNDED' && claim.type === 'name') {
849
+ const nameCtx = tryNameContextGrounding(claim, trace);
850
+ if (nameCtx) {
851
+ bestVerdict.verdict = nameCtx.verdict;
852
+ bestVerdict.ruleUsed = nameCtx.ruleUsed;
853
+ bestVerdict.explanation = nameCtx.explanation;
854
+ }
855
+ }
856
+ // ---------------------------------------------------------------------------
857
+ // Source=0 noise guard: when the best numeric match has sourceValue=0 and
858
+ // the claimed value is very large, the 0 is almost certainly from an
859
+ // unrelated field (e.g. total_sessions=0 matched against work hours).
860
+ // Downgrade to UNVERIFIABLE to avoid misleading "AI=6738 vs Source=0" verdicts.
861
+ // Only applies for large claim values where 0 is clearly noise, not for
862
+ // small counts where 0 could be a real mismatch (e.g. tool says 0, AI says 3).
863
+ // ---------------------------------------------------------------------------
864
+ if (bestVerdict.verdict === 'UNGROUNDED' &&
865
+ (claim.type === 'number' || claim.type === 'count') &&
866
+ bestVerdict.sourceValue === 0 &&
867
+ typeof claim.value === 'number' &&
868
+ Math.abs(claim.value) >= 100) {
869
+ bestVerdict.verdict = 'UNVERIFIABLE';
870
+ bestVerdict.explanation =
871
+ `No plausible source value found for ${claim.type} claim "${claim.value}". ` +
872
+ `Closest match was 0 from an unrelated field.`;
873
+ bestVerdict.ruleUsed = 'source_zero_noise';
874
+ }
875
+ bestVerdict.severity = computeSeverity(bestVerdict);
876
+ return bestVerdict;
877
+ }
878
+ // ---------------------------------------------------------------------------
879
+ // GroundingEngine
880
+ // ---------------------------------------------------------------------------
881
+ // ---------------------------------------------------------------------------
882
+ // Root Cause Analysis
883
+ // ---------------------------------------------------------------------------
884
+ const SEVERITY_ORDER = { critical: 4, high: 3, medium: 2, low: 1 };
885
+ /**
886
+ * Build a root-cause summary from orchestrated detected failures.
887
+ *
888
+ * Returns `{ rootCause, summary }` where:
889
+ * - `rootCause` is a one-sentence explanation derived from the highest-severity
890
+ * primary failure and its diagnosis.
891
+ * - `summary` is a multi-sentence narrative covering all primary failures,
892
+ * their causal chain (secondary suppressions), and suggested fixes.
893
+ *
894
+ * Returns undefined fields when no failures are present.
895
+ */
896
+ // ---------------------------------------------------------------------------
897
+ // Resolution assessment
898
+ // ---------------------------------------------------------------------------
899
+ /**
900
+ * Determines whether the user's question was actually resolved.
901
+ * This is a meta-signal derived from detected failures — not a new rule.
902
+ */
903
+ function assessResolution(detectedFailures, claimsTotal, claimsVerified, claimsSkipped) {
904
+ if (detectedFailures.length === 0) {
905
+ if (claimsTotal === 0) {
906
+ return { resolved: true, resolutionNote: 'No verifiable claims in response.' };
907
+ }
908
+ return { resolved: true };
909
+ }
910
+ const primaryFailures = detectedFailures.filter((f) => f.role === 'primary');
911
+ // Failure types that definitively mean "question not resolved"
912
+ const UNRESOLVED_TYPES = {
913
+ 'orchestration.answer_refusal': 'AI explicitly refused to answer.',
914
+ 'orchestration.ai_unavailable': 'AI service was unavailable.',
915
+ 'grounding.question_not_answered': 'The user\'s question was not addressed.',
916
+ 'grounding.empty_fabrication': 'Response was fabricated without tool data.',
917
+ 'grounding.no_tool_call': 'AI did not call any tools to gather data.',
918
+ };
919
+ for (const f of primaryFailures) {
920
+ if (f.type in UNRESOLVED_TYPES) {
921
+ return {
922
+ resolved: false,
923
+ resolutionNote: UNRESOLVED_TYPES[f.type],
924
+ };
925
+ }
926
+ }
927
+ // Partial resolution: high-severity completeness failures
928
+ const PARTIAL_TYPES = new Set([
929
+ 'grounding.incomplete_response',
930
+ 'orchestration.tool_budget_exhaustion',
931
+ 'orchestration.tool_selection_error',
932
+ ]);
933
+ const hasPartialFailure = primaryFailures.some((f) => PARTIAL_TYPES.has(f.type) && (f.severity === 'critical' || f.severity === 'high'));
934
+ if (hasPartialFailure) {
935
+ const verifiable = claimsTotal - claimsSkipped;
936
+ const verifiedRatio = verifiable > 0 ? claimsVerified / verifiable : 1;
937
+ if (verifiedRatio < 0.3) {
938
+ return {
939
+ resolved: false,
940
+ resolutionNote: `Only ${Math.round(verifiedRatio * 100)}% of claims verified due to incomplete data.`,
941
+ };
942
+ }
943
+ }
944
+ return { resolved: true };
945
+ }
946
+ function buildRootCauseSummary(detectedFailures, hypotheses) {
947
+ if (detectedFailures.length === 0 && hypotheses.length === 0)
948
+ return {};
949
+ const primaries = detectedFailures
950
+ .filter((f) => f.role === 'primary')
951
+ .sort((a, b) => (SEVERITY_ORDER[b.severity] ?? 0) - (SEVERITY_ORDER[a.severity] ?? 0));
952
+ const secondaries = detectedFailures.filter((f) => f.role === 'secondary');
953
+ if (primaries.length === 0) {
954
+ // Only hypotheses, no confirmed failures
955
+ const hypoLine = hypotheses.length > 0
956
+ ? `${hypotheses.length} low-confidence hypothesis(es) detected but no confirmed failures.`
957
+ : undefined;
958
+ return { summary: hypoLine };
959
+ }
960
+ // --- rootCause: one-sentence from highest-severity primary ---
961
+ const top = primaries[0];
962
+ const rootCause = top.diagnosis
963
+ ? `[${top.type}] ${top.diagnosis}`
964
+ : `[${top.type}] ${top.description}`;
965
+ // --- repair sequence: topological order from suppression graph ---
966
+ const activeTypes = detectedFailures
967
+ .filter((f) => f.role === 'primary' || f.role === 'secondary')
968
+ .map((f) => f.type);
969
+ const repairSequence = (0, Registry_1.getRepairOrder)(activeTypes);
970
+ // --- summary: multi-line narrative ---
971
+ const lines = [];
972
+ // Primary failures
973
+ lines.push(`${primaries.length} primary failure(s) detected:`);
974
+ for (const f of primaries) {
975
+ const diag = f.diagnosis ? ` — ${f.diagnosis}` : '';
976
+ const fix = f.suggestedFix ? ` Fix: ${f.suggestedFix}` : '';
977
+ lines.push(` • [${f.severity.toUpperCase()}] ${f.type}: ${f.description}${diag}${fix}`);
978
+ }
979
+ // Secondary (suppressed) failures
980
+ if (secondaries.length > 0) {
981
+ lines.push(`${secondaries.length} secondary failure(s) (caused by primary):`);
982
+ for (const f of secondaries) {
983
+ lines.push(` • ${f.type} (suppressed by ${f.suppressedBy})`);
984
+ }
985
+ }
986
+ // Hypotheses
987
+ if (hypotheses.length > 0) {
988
+ lines.push(`${hypotheses.length} hypothesis(es) for human review.`);
989
+ }
990
+ // Repair sequence
991
+ if (repairSequence.length > 1) {
992
+ lines.push('Recommended repair order:');
993
+ for (let i = 0; i < repairSequence.length; i++) {
994
+ const step = repairSequence[i];
995
+ lines.push(` ${i + 1}. ${step.type} — ${step.reason}`);
996
+ }
997
+ }
998
+ return { rootCause, summary: lines.join('\n'), repairSequence };
999
+ }
1000
+ /**
1001
+ * Main entry point for grounding validation.
1002
+ // ---------------------------------------------------------------------------
1003
+ // Report confidence — meta-assessment of the analysis itself
1004
+ // ---------------------------------------------------------------------------
1005
+
1006
+ /**
1007
+ * Compute how much we trust the grounding report.
1008
+ *
1009
+ * This is NOT the grounding score — it answers a different question:
1010
+ * "Given the trace quality and claim coverage, should the user trust this report?"
1011
+ *
1012
+ * Logic:
1013
+ * 1. Hard blockers → instant "low" (no tool_outputs, no final_response, 0 claims, completeness < 35%)
1014
+ * 2. Weighted composite score from 3 factors:
1015
+ * - Trace completeness (55%) — from TraceReadiness
1016
+ * - Grounded ratio (30%) — verified / verifiable claims
1017
+ * - cannotVerify penalty (15%) — -0.05 per disabled check
1018
+ * 3. Map score to tier: >= 0.80 → high, 0.50–0.79 → medium, < 0.50 → low
1019
+ */
1020
+ function computeReportConfidence(input) {
1021
+ const { traceQuality, claimsTotal, claimsVerified, claimsSkipped } = input;
1022
+ const reasons = [];
1023
+ // --- Hard blockers → instant low ---
1024
+ const hasToolOutputs = traceQuality.checklist.some((c) => c.element === 'tool_outputs' && c.present);
1025
+ const hasFinalResponse = traceQuality.checklist.some((c) => c.element === 'final_response' && c.present);
1026
+ if (!hasFinalResponse) {
1027
+ reasons.push('missing_final_response');
1028
+ return {
1029
+ level: 'low',
1030
+ score: 0,
1031
+ note: 'No final response found in trace — nothing to analyse.',
1032
+ reasons,
1033
+ };
1034
+ }
1035
+ if (!hasToolOutputs) {
1036
+ reasons.push('missing_tool_outputs');
1037
+ return {
1038
+ level: 'low',
1039
+ score: 0.1,
1040
+ note: 'No tool outputs in trace — claims cannot be verified against source data.',
1041
+ reasons,
1042
+ };
1043
+ }
1044
+ if (claimsTotal === 0) {
1045
+ reasons.push('no_claims_extracted');
1046
+ return {
1047
+ level: 'low',
1048
+ score: 0.15,
1049
+ note: 'No verifiable claims found in the response — analysis has nothing to check.',
1050
+ reasons,
1051
+ };
1052
+ }
1053
+ if (traceQuality.completeness < 35) {
1054
+ reasons.push('trace_too_incomplete');
1055
+ return {
1056
+ level: 'low',
1057
+ score: 0.2,
1058
+ note: `Trace completeness is only ${traceQuality.completeness}% — too little data for reliable analysis.`,
1059
+ reasons,
1060
+ };
1061
+ }
1062
+ // --- Factor A: Trace completeness (55% weight) ---
1063
+ const completenessScore = traceQuality.completeness / 100;
1064
+ if (traceQuality.quality === 'HIGH')
1065
+ reasons.push('complete_trace');
1066
+ else if (traceQuality.quality === 'MEDIUM')
1067
+ reasons.push('partial_trace');
1068
+ // --- Factor B: Grounded ratio (30% weight) ---
1069
+ const verifiable = claimsTotal - claimsSkipped;
1070
+ const groundedRatio = verifiable > 0 ? claimsVerified / verifiable : 0;
1071
+ if (groundedRatio >= 0.8)
1072
+ reasons.push('high_grounded_ratio');
1073
+ else if (groundedRatio >= 0.5)
1074
+ reasons.push('moderate_grounded_ratio');
1075
+ else
1076
+ reasons.push('low_grounded_ratio');
1077
+ if (verifiable >= 3)
1078
+ reasons.push('sufficient_claims');
1079
+ else
1080
+ reasons.push('few_verifiable_claims');
1081
+ // --- Factor C: cannotVerify penalty (15% weight) ---
1082
+ const cannotVerifyCount = traceQuality.cannotVerify.length;
1083
+ // Each disabled check costs 0.05, capped at 1.0 total penalty
1084
+ const cannotVerifyPenalty = Math.min(1.0, cannotVerifyCount * 0.05);
1085
+ const cannotVerifyFactor = 1.0 - cannotVerifyPenalty;
1086
+ if (cannotVerifyCount > 0)
1087
+ reasons.push(`disabled_checks_${cannotVerifyCount}`);
1088
+ // --- Composite score ---
1089
+ const score = Math.max(0, Math.min(1, completenessScore * 0.55 +
1090
+ groundedRatio * 0.30 +
1091
+ cannotVerifyFactor * 0.15));
1092
+ // --- Map to tier ---
1093
+ let level;
1094
+ if (score >= 0.80)
1095
+ level = 'high';
1096
+ else if (score >= 0.50)
1097
+ level = 'medium';
1098
+ else
1099
+ level = 'low';
1100
+ // --- Build note ---
1101
+ let note;
1102
+ if (level === 'high') {
1103
+ note = 'Analysis is reliable — complete trace with sufficient verified claims.';
1104
+ }
1105
+ else if (level === 'medium') {
1106
+ const caveats = [];
1107
+ if (traceQuality.quality !== 'HIGH')
1108
+ caveats.push('trace is partially complete');
1109
+ if (verifiable < 3)
1110
+ caveats.push('few verifiable claims');
1111
+ if (groundedRatio < 0.8)
1112
+ caveats.push('not all claims could be verified');
1113
+ if (cannotVerifyCount > 0)
1114
+ caveats.push(`${cannotVerifyCount} check(s) disabled`);
1115
+ note = `Analysis may be incomplete: ${caveats.join(', ')}.`;
1116
+ }
1117
+ else {
1118
+ note = 'Analysis has limited reliability — results should be treated with caution.';
1119
+ }
1120
+ return { level, score: Math.round(score * 100) / 100, note, reasons };
1121
+ }
1122
+ class GroundingEngine {
1123
+ constructor(options = {}) {
1124
+ if (options.configFile) {
1125
+ this.tolerances = (0, Config_1.loadTolerancesFromFile)(options.configFile);
1126
+ }
1127
+ else {
1128
+ this.tolerances = {
1129
+ ...types_1.DEFAULT_TOLERANCES,
1130
+ ...(options.tolerances ?? {}),
1131
+ };
1132
+ }
1133
+ }
1134
+ /**
1135
+ * Evaluate a trace and return a {@link GroundingReport}.
1136
+ */
1137
+ /**
1138
+ * Evaluate a trace and return a {@link GroundingReport}.
1139
+ * @param trace The trace to evaluate
1140
+ * @param mode 'guardrail' (default) or 'analytics'. In guardrail mode, only S/A tier failures are returned in detectedFailures; others go to hypotheses.
1141
+ */
1142
+ evaluate(trace, mode = 'guardrail') {
1143
+ // Normalise external trace formats (OpenAI, Anthropic, custom) to TG
1144
+ // canonical roles before any processing. This is idempotent.
1145
+ Trace_1.TraceUtils.normalizeTrace(trace);
1146
+ // Multi-turn: scope evaluation to the active (last) turn only.
1147
+ // The original trace is kept for metadata; the scoped trace is used for
1148
+ // claims extraction, grounding, and rule detection.
1149
+ const isMultiTurn = Trace_1.TraceUtils.isMultiTurn(trace);
1150
+ const turns = Trace_1.TraceUtils.getConversationTurns(trace);
1151
+ const turnCount = turns.length;
1152
+ const evalTrace = isMultiTurn ? Trace_1.TraceUtils.buildActiveTurnTrace(trace) : trace;
1153
+ // 1. Find the final response step
1154
+ const finalStep = Trace_1.TraceUtils.getFinalResponse(evalTrace);
1155
+ const responseText = finalStep?.content ?? '';
1156
+ // 1b. Early exit for error responses (HTTP 529, API failures, etc.)
1157
+ // When the AI service returned an error instead of a real response,
1158
+ // skip claim extraction and rule detection entirely — only report
1159
+ // ai_unavailable. This prevents false positives from extracting
1160
+ // HTTP status codes as numeric claims, instruction_violation on
1161
+ // error messages, or data_ignored when AI never responded.
1162
+ if (finalStep && isErrorResponse(finalStep)) {
1163
+ const errorFailure = detectAIUnavailableFromStep(finalStep);
1164
+ return {
1165
+ traceId: trace.traceId,
1166
+ generatedAt: new Date().toISOString(),
1167
+ claimsTotal: 0,
1168
+ claimsVerified: 0,
1169
+ claimsSkipped: 0,
1170
+ claimsFailed: 0,
1171
+ groundingScore: 1.0,
1172
+ verdicts: [],
1173
+ detectedFailures: errorFailure ? [errorFailure] : [],
1174
+ hypotheses: [],
1175
+ failureCounts: { high: errorFailure ? 1 : 0, medium: 0, low: 0 },
1176
+ tolerancesUsed: this.tolerances,
1177
+ traceQuality: (0, TraceReadiness_1.assessTraceQuality)(trace),
1178
+ rootCause: errorFailure?.type ?? undefined,
1179
+ summary: errorFailure?.description ?? 'AI service error — no response to evaluate.',
1180
+ repairSequence: [],
1181
+ resolved: false,
1182
+ resolutionNote: 'AI service did not produce a response.',
1183
+ reportConfidence: {
1184
+ level: 'low',
1185
+ score: 0,
1186
+ note: 'AI service returned an error — no response to analyse.',
1187
+ reasons: ['ai_service_error'],
1188
+ },
1189
+ ...(isMultiTurn ? { turnCount, activeTurnIndex: turnCount - 1 } : {}),
1190
+ };
1191
+ }
1192
+ // 2. Extract claims (L1: regex-based)
1193
+ // Pass trace startedAt as reference date so relative-date resolution
1194
+ // ("sutra", "yesterday") uses the correct temporal context.
1195
+ const refDate = evalTrace.startedAt
1196
+ ? new Date(evalTrace.startedAt)
1197
+ : undefined;
1198
+ const l1Claims = finalStep
1199
+ ? (0, Claims_1.extractClaims)(responseText, {
1200
+ sourceStepId: finalStep.stepId,
1201
+ sourceRole: 'final_response',
1202
+ referenceDate: refDate,
1203
+ })
1204
+ : [];
1205
+ // 2b. Extract L2 structured context claims
1206
+ const l2Claims = finalStep
1207
+ ? [
1208
+ ...(0, L2_1.extractStructuredClaims)(evalTrace, responseText, finalStep.stepId),
1209
+ ...(0, L2_1.extractListItemsClaims)(evalTrace, responseText, finalStep.stepId),
1210
+ ...(0, L2_1.extractKeyValueClaims)(evalTrace, responseText, finalStep.stepId),
1211
+ ...(0, L2_1.extractAggregationClaims)(evalTrace, responseText, finalStep.stepId),
1212
+ ...(0, L2_1.extractRangeClaims)(evalTrace, responseText, finalStep.stepId),
1213
+ ]
1214
+ : [];
1215
+ // Cross-layer deduplication: when L2 produces a claim covering the same
1216
+ // numeric value as an L1 claim, the L2 claim is more precise (has
1217
+ // sourceFieldValue, aggregationOp, etc.) and should take precedence.
1218
+ // This prevents L1 "number" claims from scoring UNGROUNDED for values
1219
+ // that L2 correctly identifies as SUM/AVG/aggregation.
1220
+ const l2CoveredValues = new Set();
1221
+ for (const c of l2Claims) {
1222
+ if (typeof c.value === 'number')
1223
+ l2CoveredValues.add(c.value);
1224
+ if (c.type === 'aggregation' && c.computedValue !== undefined) {
1225
+ l2CoveredValues.add(c.computedValue);
1226
+ }
1227
+ }
1228
+ const dedupedL1 = l1Claims.filter((c) => {
1229
+ if ((c.type === 'number' || c.type === 'count') && typeof c.value === 'number') {
1230
+ return !l2CoveredValues.has(c.value);
1231
+ }
1232
+ return true;
1233
+ });
1234
+ const claims = [...dedupedL1, ...l2Claims];
1235
+ // 2d. Extract single-word name claims from tool context
1236
+ if (finalStep) {
1237
+ const contextNames = extractToolContextNameClaims(evalTrace, responseText, finalStep.stepId, claims);
1238
+ claims.push(...contextNames);
1239
+ }
1240
+ // 2c. Enrich claims with period from tool call parameters
1241
+ const period = Trace_1.TraceUtils.inferPeriod(evalTrace);
1242
+ if (period) {
1243
+ for (const claim of claims) {
1244
+ if (!claim.period)
1245
+ claim.period = period;
1246
+ }
1247
+ }
1248
+ // 3. Ground each claim
1249
+ // For multi-turn traces, if a claim is UNGROUNDED or UNVERIFIABLE against
1250
+ // the active turn, try the full trace as fallback (the response may
1251
+ // reference data from earlier turns in the conversation).
1252
+ const verdicts = claims.map((claim) => {
1253
+ const v = groundClaim(claim, evalTrace, this.tolerances);
1254
+ if (isMultiTurn &&
1255
+ (v.verdict === 'UNGROUNDED' || v.verdict === 'UNVERIFIABLE') &&
1256
+ evalTrace !== trace) {
1257
+ const fullV = groundClaim(claim, trace, this.tolerances);
1258
+ if (fullV.verdict === 'GROUNDED' || fullV.verdict === 'APPROXIMATE_MATCH') {
1259
+ return fullV;
1260
+ }
1261
+ }
1262
+ return v;
1263
+ });
1264
+ // 4. Detect multi-step claims: UNVERIFIABLE where multiple tool calls exist
1265
+ // and the claim may depend on combined outputs — upgrade to MULTI_STEP
1266
+ const toolCallCount = Trace_1.TraceUtils.getAllToolCalls(evalTrace).length;
1267
+ const processedVerdicts = verdicts.map((v) => {
1268
+ if (v.verdict === 'UNVERIFIABLE' && toolCallCount > 1) {
1269
+ return {
1270
+ ...v,
1271
+ verdict: 'MULTI_STEP',
1272
+ explanation: v.explanation +
1273
+ ' Multiple tool calls present — verification may require cross-step analysis.',
1274
+ ruleUsed: 'multi_step',
1275
+ };
1276
+ }
1277
+ return v;
1278
+ });
1279
+ // 5. Run grounding failure rules
1280
+ // Pass the full trace as well so cross-turn rules (e.g.
1281
+ // stale_cross_turn_reuse) can see all conversation turns.
1282
+ const allFindings = (0, Rules_1.runAllRules)(evalTrace, claims, processedVerdicts, trace);
1283
+ let detectedFailures;
1284
+ let hypotheses;
1285
+ if (mode === 'guardrail') {
1286
+ // Only S/A tier (block/warn) go to detectedFailures, others to hypotheses
1287
+ detectedFailures = allFindings.filter((f) => {
1288
+ const reg = Registry_1.FAILURE_REGISTRY.find(r => r.type === f.type);
1289
+ return reg && (reg.tier === 'S' || reg.tier === 'A') && f.confidence !== 'low';
1290
+ });
1291
+ hypotheses = allFindings.filter((f) => {
1292
+ const reg = Registry_1.FAILURE_REGISTRY.find(r => r.type === f.type);
1293
+ return !reg || (reg.tier !== 'S' && reg.tier !== 'A') || f.confidence === 'low';
1294
+ });
1295
+ }
1296
+ else {
1297
+ detectedFailures = allFindings.filter((f) => f.confidence !== 'low');
1298
+ hypotheses = allFindings.filter((f) => f.confidence === 'low');
1299
+ }
1300
+ // 6. Compute aggregate stats
1301
+ const claimsTotal = claims.length;
1302
+ const claimsVerified = processedVerdicts.filter((v) => v.verdict === 'GROUNDED' || v.verdict === 'APPROXIMATE_MATCH').length;
1303
+ const claimsSkipped = processedVerdicts.filter((v) => v.verdict === 'UNVERIFIABLE' || v.verdict === 'MULTI_STEP').length;
1304
+ const claimsFailed = processedVerdicts.filter((v) => v.verdict === 'UNGROUNDED').length;
1305
+ // ═══════════════════════════════════════════════════════════════════
1306
+ // PHASE 1 — Claim-based score
1307
+ // Ratio of verified claim weight to total claim weight, using
1308
+ // severity tiers (HIGH = 3, MEDIUM = 2, LOW = 1). Defaults to
1309
+ // 1.0 when there are no verifiable claims. Three zero-score
1310
+ // guards then correct vacuously-true or entirely-ungrounded cases.
1311
+ // ═══════════════════════════════════════════════════════════════════
1312
+ // In guardrail mode, aggregation and list_items claims (red zone — high FP)
1313
+ // are excluded from score calculation. They still appear in verdicts.
1314
+ const GUARDRAIL_SCORE_EXCLUDED_TYPES = new Set(['aggregation', 'list_items']);
1315
+ const SEVERITY_WEIGHT = { HIGH: 3, MEDIUM: 2, LOW: 1 };
1316
+ const verifiableVerdicts = processedVerdicts.filter((v) => v.verdict !== 'UNVERIFIABLE' && v.verdict !== 'MULTI_STEP'
1317
+ && (mode !== 'guardrail' || !GUARDRAIL_SCORE_EXCLUDED_TYPES.has(v.claim.type)));
1318
+ // All non-UNVERIFIABLE/MULTI_STEP verdicts (for severity assignment)
1319
+ const allVerifiableVerdicts = processedVerdicts.filter((v) => v.verdict !== 'UNVERIFIABLE' && v.verdict !== 'MULTI_STEP');
1320
+ let totalWeight = 0;
1321
+ let verifiedWeight = 0;
1322
+ for (const v of allVerifiableVerdicts) {
1323
+ const sev = computeSeverity(v);
1324
+ v.severity = sev;
1325
+ }
1326
+ for (const v of verifiableVerdicts) {
1327
+ const w = SEVERITY_WEIGHT[v.severity];
1328
+ totalWeight += w;
1329
+ if (v.verdict === 'GROUNDED' || v.verdict === 'APPROXIMATE_MATCH') {
1330
+ verifiedWeight += w;
1331
+ }
1332
+ }
1333
+ let groundingScore = totalWeight > 0 ? verifiedWeight / totalWeight : 1.0;
1334
+ // Zero-score guards — force 0 when the response has no grounding
1335
+ // substance. Each branch captures a distinct failure mode:
1336
+ // (a) 0 claims + tool data exists → response ignored data entirely
1337
+ // (b) incomplete_response fired + 0 claims → vacuously true 1.0
1338
+ // (c) no_tool_call + all claims unverified + single-turn → ungrounded
1339
+ const hasIncompleteResponse = detectedFailures.some((f) => f.type === 'grounding.incomplete_response');
1340
+ const hasNoToolCall = detectedFailures.some((f) => f.type === 'grounding.no_tool_call');
1341
+ const isFollowUp = turnCount > 1;
1342
+ if (claimsTotal === 0) {
1343
+ const toolOutputSteps = Trace_1.TraceUtils.getToolOutputSteps(evalTrace);
1344
+ const hasToolData = toolOutputSteps.some((step) => (step.toolOutputs ?? []).some((to) => {
1345
+ if (isEmptyResultOutput(to.output))
1346
+ return false;
1347
+ const vals = (0, Matchers_1.extractValuesFromOutput)(to.output);
1348
+ return vals.length > 0;
1349
+ }));
1350
+ if (hasToolData || hasIncompleteResponse) {
1351
+ groundingScore = 0;
1352
+ }
1353
+ }
1354
+ else if (hasNoToolCall && claimsVerified === 0 && !isFollowUp) {
1355
+ groundingScore = 0;
1356
+ }
1357
+ // ═══════════════════════════════════════════════════════════════════
1358
+ // PHASE 2 — Structural adjustments
1359
+ // Penalties from structural failures (arithmetic errors, fabrication,
1360
+ // tool budget exhaustion, etc.) that are NOT reflected in claim
1361
+ // verdicts, plus a hard cap when all tool calls errored.
1362
+ // ═══════════════════════════════════════════════════════════════════
1363
+ const FAILURE_PENALTY = {
1364
+ critical: 0.15,
1365
+ high: 0.10,
1366
+ medium: 0.05,
1367
+ low: 0.02,
1368
+ };
1369
+ const primaryFailures = detectedFailures.filter((f) => f.role === 'primary');
1370
+ let totalPenalty = 0;
1371
+ for (const f of primaryFailures) {
1372
+ totalPenalty += FAILURE_PENALTY[f.severity] ?? 0.05;
1373
+ }
1374
+ if (totalPenalty > 0) {
1375
+ groundingScore = Math.max(0, groundingScore - totalPenalty);
1376
+ }
1377
+ // When EVERY tool call in the active turn returned an error/not-found,
1378
+ // the AI had zero valid data — cap the score at 0.30.
1379
+ const activeToolOutputs = Trace_1.TraceUtils.getActiveToolOutputs(evalTrace);
1380
+ if (activeToolOutputs.length >= 2) {
1381
+ const allErrors = activeToolOutputs.every((to) => {
1382
+ const outputStr = typeof to.output === 'string'
1383
+ ? to.output
1384
+ : JSON.stringify(to.output ?? '');
1385
+ return /\berror\b/i.test(outputStr) || /\bnot\s+found\b/i.test(outputStr)
1386
+ || /\blimit poziva\b/i.test(outputStr);
1387
+ });
1388
+ if (allErrors) {
1389
+ groundingScore = Math.min(groundingScore, 0.30);
1390
+ }
1391
+ }
1392
+ const { rootCause, summary: rcaSummary, repairSequence } = buildRootCauseSummary(detectedFailures, hypotheses);
1393
+ // --- Resolution assessment ---
1394
+ const { resolved, resolutionNote } = assessResolution(detectedFailures, claimsTotal, claimsVerified, claimsSkipped);
1395
+ // --- Report confidence (meta-assessment of the analysis itself) ---
1396
+ const traceQuality = (0, TraceReadiness_1.assessTraceQuality)(trace);
1397
+ const reportConfidence = computeReportConfidence({
1398
+ traceQuality,
1399
+ claimsTotal,
1400
+ claimsVerified,
1401
+ claimsSkipped,
1402
+ groundingScore,
1403
+ });
1404
+ return {
1405
+ traceId: trace.traceId,
1406
+ generatedAt: new Date().toISOString(),
1407
+ claimsTotal,
1408
+ claimsVerified,
1409
+ claimsSkipped,
1410
+ claimsFailed,
1411
+ groundingScore,
1412
+ verdicts: processedVerdicts,
1413
+ detectedFailures,
1414
+ hypotheses,
1415
+ failureCounts: {
1416
+ high: allFindings.filter((f) => f.confidence === 'high').length,
1417
+ medium: allFindings.filter((f) => f.confidence === 'medium').length,
1418
+ low: hypotheses.length,
1419
+ },
1420
+ tolerancesUsed: this.tolerances,
1421
+ traceQuality,
1422
+ rootCause,
1423
+ summary: rcaSummary,
1424
+ repairSequence,
1425
+ resolved,
1426
+ resolutionNote,
1427
+ reportConfidence,
1428
+ ...(isMultiTurn ? { turnCount, activeTurnIndex: turnCount - 1 } : {}),
1429
+ };
1430
+ }
1431
+ }
1432
+ exports.GroundingEngine = GroundingEngine;
1433
+ //# sourceMappingURL=index.js.map