truthguard-ai 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of truthguard-ai might be problematic. Click here for more details.

Files changed (53) hide show
  1. package/dist-npm/Claims/index.d.ts +73 -0
  2. package/dist-npm/Claims/index.d.ts.map +1 -0
  3. package/dist-npm/Claims/index.js +1669 -0
  4. package/dist-npm/Claims/index.js.map +1 -0
  5. package/dist-npm/Config/index.d.ts +41 -0
  6. package/dist-npm/Config/index.d.ts.map +1 -0
  7. package/dist-npm/Config/index.js +129 -0
  8. package/dist-npm/Config/index.js.map +1 -0
  9. package/dist-npm/Grounding/index.d.ts +40 -0
  10. package/dist-npm/Grounding/index.d.ts.map +1 -0
  11. package/dist-npm/Grounding/index.js +1433 -0
  12. package/dist-npm/Grounding/index.js.map +1 -0
  13. package/dist-npm/L2/index.d.ts +93 -0
  14. package/dist-npm/L2/index.d.ts.map +1 -0
  15. package/dist-npm/L2/index.js +1773 -0
  16. package/dist-npm/L2/index.js.map +1 -0
  17. package/dist-npm/Matchers/index.d.ts +101 -0
  18. package/dist-npm/Matchers/index.d.ts.map +1 -0
  19. package/dist-npm/Matchers/index.js +690 -0
  20. package/dist-npm/Matchers/index.js.map +1 -0
  21. package/dist-npm/Mode/index.d.ts +87 -0
  22. package/dist-npm/Mode/index.d.ts.map +1 -0
  23. package/dist-npm/Mode/index.js +117 -0
  24. package/dist-npm/Mode/index.js.map +1 -0
  25. package/dist-npm/Policy/index.d.ts +89 -0
  26. package/dist-npm/Policy/index.d.ts.map +1 -0
  27. package/dist-npm/Policy/index.js +143 -0
  28. package/dist-npm/Policy/index.js.map +1 -0
  29. package/dist-npm/Registry/index.d.ts +93 -0
  30. package/dist-npm/Registry/index.d.ts.map +1 -0
  31. package/dist-npm/Registry/index.js +818 -0
  32. package/dist-npm/Registry/index.js.map +1 -0
  33. package/dist-npm/Rules/index.d.ts +587 -0
  34. package/dist-npm/Rules/index.d.ts.map +1 -0
  35. package/dist-npm/Rules/index.js +6236 -0
  36. package/dist-npm/Rules/index.js.map +1 -0
  37. package/dist-npm/Rules/intents.d.ts +22 -0
  38. package/dist-npm/Rules/intents.d.ts.map +1 -0
  39. package/dist-npm/Rules/intents.js +242 -0
  40. package/dist-npm/Rules/intents.js.map +1 -0
  41. package/dist-npm/TraceReadiness/index.d.ts +42 -0
  42. package/dist-npm/TraceReadiness/index.d.ts.map +1 -0
  43. package/dist-npm/TraceReadiness/index.js +169 -0
  44. package/dist-npm/TraceReadiness/index.js.map +1 -0
  45. package/dist-npm/i18n/index.d.ts +44 -0
  46. package/dist-npm/i18n/index.d.ts.map +1 -0
  47. package/dist-npm/i18n/index.js +124 -0
  48. package/dist-npm/i18n/index.js.map +1 -0
  49. package/package.json +5 -17
  50. package/dist/cli/index.d.ts +0 -15
  51. package/dist/cli/index.d.ts.map +0 -1
  52. package/dist/cli/index.js +0 -807
  53. package/dist/cli/index.js.map +0 -1
@@ -0,0 +1,1773 @@
1
+ "use strict";
2
+ /**
3
+ * L2 — Structured Context Matching
4
+ *
5
+ * Extracts and matches claims that L1 (regex-based) cannot handle:
6
+ * - boolean fields (true/false → natural language equivalents)
7
+ * - enum/status fields (approved → "odobren", active → "aktivan")
8
+ * - list_items — verifies that response mentions all items from tool output arrays
9
+ * - key_value — matches identifiers (email, phone, ID) from tool output against response
10
+ * - aggregation — verifies SUM/AVG/COUNT/MIN/MAX computed from tool output arrays
11
+ *
12
+ * Fully deterministic — no LLM calls. Domain-specific enum translation
13
+ * tables enable cross-language matching (EN → SR, etc.).
14
+ */
15
+ Object.defineProperty(exports, "__esModule", { value: true });
16
+ exports.extractStructuredClaims = extractStructuredClaims;
17
+ exports.matchBoolean = matchBoolean;
18
+ exports.matchEnum = matchEnum;
19
+ exports.addEnumTranslations = addEnumTranslations;
20
+ exports.getEnumTranslations = getEnumTranslations;
21
+ exports.extractListItemsClaims = extractListItemsClaims;
22
+ exports.matchListItems = matchListItems;
23
+ exports.extractKeyValueClaims = extractKeyValueClaims;
24
+ exports.matchKeyValue = matchKeyValue;
25
+ exports.extractAggregationClaims = extractAggregationClaims;
26
+ exports.matchAggregation = matchAggregation;
27
+ exports.extractRangeClaims = extractRangeClaims;
28
+ exports.matchRange = matchRange;
29
+ const crypto_1 = require("crypto");
30
+ const Trace_1 = require("../Trace");
31
+ const Matchers_1 = require("../Matchers");
32
+ // ---------------------------------------------------------------------------
33
+ // Boolean synonym tables
34
+ // ---------------------------------------------------------------------------
35
+ /** Maps boolean true/false to natural-language equivalents. */
36
+ const BOOLEAN_SYNONYMS = {
37
+ true: [
38
+ // English
39
+ 'true', 'yes', 'active', 'enabled', 'on', 'present', 'available',
40
+ 'approved', 'confirmed', 'valid', 'open', 'done', 'completed',
41
+ // Serbian (Latin + Cyrillic-transliterated)
42
+ 'da', 'aktivan', 'aktivna', 'aktivno', 'aktivni',
43
+ 'prisutan', 'prisutna', 'prisutno', 'prisutni',
44
+ 'dostupan', 'dostupna', 'dostupno',
45
+ 'odobren', 'odobrena', 'odobreno',
46
+ 'otvoren', 'otvorena', 'otvoreno',
47
+ 'zavrseno', 'završeno', 'završen', 'završena',
48
+ ],
49
+ false: [
50
+ // English
51
+ 'false', 'no', 'inactive', 'disabled', 'off', 'absent', 'unavailable',
52
+ 'rejected', 'denied', 'invalid', 'closed', 'pending', 'not done',
53
+ // Serbian
54
+ 'ne', 'neaktivan', 'neaktivna', 'neaktivno',
55
+ 'odsutan', 'odsutna', 'odsutno', 'odsutni',
56
+ 'nedostupan', 'nedostupna', 'nedostupno',
57
+ 'odbijen', 'odbijena', 'odbijeno',
58
+ 'zatvoren', 'zatvorena', 'zatvoreno',
59
+ ],
60
+ };
61
+ // ---------------------------------------------------------------------------
62
+ // Enum translation table
63
+ // ---------------------------------------------------------------------------
64
+ /**
65
+ * Maps common enum values to their natural-language equivalents.
66
+ * Each entry: canonical_value → [synonyms in multiple languages]
67
+ *
68
+ * Organized by domain. Users can extend via addEnumTranslations().
69
+ */
70
+ const ENUM_TRANSLATIONS = {
71
+ // Status values
72
+ approved: ['approved', 'odobren', 'odobrena', 'odobreno', 'odobreni',
73
+ 'aprobado', 'aprobada', 'approuvé', 'approuvée', 'aprovado', 'aprovada',
74
+ 'одобрен', 'одобрена', 'одобрено', 'स्वीकृत', 'موافق', 'অনুমোদিত', '已批准', '承認済み'],
75
+ rejected: ['rejected', 'odbijen', 'odbijena', 'odbijeno', 'odbijeni',
76
+ 'rechazado', 'rechazada', 'rejeté', 'rejetée', 'rejeitado', 'rejeitada',
77
+ 'отклонён', 'отклонена', 'अस्वीकृत', 'مرفوض', 'প্রত্যাখ্যাত', '已拒绝', '却下'],
78
+ pending: ['pending', 'na cekanju', 'na čekanju', 'čeka', 'ceka', 'u toku',
79
+ 'pendiente', 'en attente', 'pendente',
80
+ 'ожидание', 'на рассмотрении', 'लंबित', 'معلق', 'মুলতুবি', '待定', '待处理', '保留中'],
81
+ active: ['active', 'aktivan', 'aktivna', 'aktivno', 'aktivni',
82
+ 'activo', 'activa', 'actif', 'active', 'ativo', 'ativa',
83
+ 'активный', 'активна', 'сक्रिय', 'نشط', 'সক্রিয়', '活跃', 'アクティブ'],
84
+ inactive: ['inactive', 'neaktivan', 'neaktivna', 'neaktivno', 'neaktivni',
85
+ 'inactivo', 'inactiva', 'inactif', 'inactive', 'inativo', 'inativa',
86
+ 'неактивный', 'неактивна', 'निष्क्रिय', 'غير نشط', 'নিষ্ক্রিয়', '不活跃', '非アクティブ'],
87
+ completed: ['completed', 'završen', 'završena', 'završeno', 'zavrseno', 'gotov', 'gotovo',
88
+ 'completado', 'completada', 'terminé', 'terminée', 'concluído', 'concluída',
89
+ 'завершён', 'завершена', 'पूर्ण', 'مكتمل', 'সম্পূর্ণ', '已完成', '完了'],
90
+ cancelled: ['cancelled', 'canceled', 'otkazan', 'otkazana', 'otkazano', 'otkazani',
91
+ 'cancelado', 'cancelada', 'annulé', 'annulée', 'cancelado', 'cancelada',
92
+ 'отменён', 'отменена', 'रद्द', 'ملغى', 'বাতিল', '已取消', 'キャンセル'],
93
+ open: ['open', 'otvoren', 'otvorena', 'otvoreno', 'otvoreni',
94
+ 'abierto', 'abierta', 'ouvert', 'ouverte', 'aberto', 'aberta',
95
+ 'открыт', 'открыта', 'खुला', 'مفتوح', 'খোলা', '开放', 'オープン'],
96
+ closed: ['closed', 'zatvoren', 'zatvorena', 'zatvoreno', 'zatvoreni',
97
+ 'cerrado', 'cerrada', 'fermé', 'fermée', 'fechado', 'fechada',
98
+ 'закрыт', 'закрыта', 'बंद', 'مغلق', 'বন্ধ', '已关闭', 'クローズ'],
99
+ // Leave / HR
100
+ on_leave: ['on leave', 'on_leave', 'na odmoru', 'na godišnjem', 'na godisnjem', 'odsutan', 'odsutna',
101
+ 'de permiso', 'en congé', 'de licença',
102
+ 'в отпуске', 'छुट्टी पर', 'في إجازة', 'ছুটিতে', '休假中', '休暇中'],
103
+ sick_leave: ['sick leave', 'sick_leave', 'bolovanje', 'na bolovanju',
104
+ 'baja por enfermedad', 'congé maladie', 'licença médica',
105
+ 'на больничном', 'बीमार छुट्टी', 'إجازة مرضية', 'অসুস্থতার ছুটি', '病假', '病気休暇'],
106
+ present: ['present', 'prisutan', 'prisutna', 'prisutno', 'prisutni',
107
+ 'presente', 'présent', 'présente',
108
+ 'присутствует', 'उपस्थित', 'حاضر', 'উপস্থিত', '在场', '出席'],
109
+ absent: ['absent', 'odsutan', 'odsutna', 'odsutno', 'odsutni',
110
+ 'ausente', 'absent', 'absente',
111
+ 'отсутствует', 'अनुपस्थित', 'غائب', 'অনুপস্থিত', '缺席', '欠席'],
112
+ late: ['late', 'kasni', 'kasnio', 'kasnila', 'zakašnjenje', 'zakasnjenje',
113
+ 'tarde', 'retardado', 'en retard', 'atrasado', 'atrasada',
114
+ 'опоздание', 'опоздал', 'देर से', 'متأخر', 'দেরি', '迟到', '遅刻'],
115
+ // Priority
116
+ high: ['high', 'visok', 'visoka', 'visoko', 'visoki', 'hitno', 'urgent',
117
+ 'alto', 'alta', 'élevé', 'élevée', 'haut', 'haute',
118
+ 'высокий', 'высокая', 'उच्च', 'عالي', 'উচ্চ', '高', '高い'],
119
+ medium: ['medium', 'srednji', 'srednja', 'srednje',
120
+ 'medio', 'media', 'moyen', 'moyenne', 'médio', 'média',
121
+ 'средний', 'средняя', 'मध्यम', 'متوسط', 'মাঝারি', '中', '中くらい'],
122
+ low: ['low', 'nizak', 'niska', 'nisko', 'niski',
123
+ 'bajo', 'baja', 'bas', 'basse', 'baixo', 'baixa',
124
+ 'низкий', 'низкая', 'कम', 'منخفض', 'নিম্ন', '低', '低い'],
125
+ critical: ['critical', 'kritičan', 'kritična', 'kritično', 'kritican',
126
+ 'crítico', 'crítica', 'critique', 'crítico', 'crítica',
127
+ 'критический', 'критическая', 'गंभीर', 'حرج', 'জটিল', '紧急', '重大'],
128
+ // Boolean-like strings
129
+ yes: ['yes', 'da',
130
+ 'sí', 'oui', 'sim', 'да', 'हाँ', 'نعم', 'হ্যাঁ', '是', 'はい'],
131
+ no: ['no', 'ne', 'nije',
132
+ 'no', 'non', 'não', 'нет', 'नहीं', 'لا', 'না', '否', 'いいえ'],
133
+ // Payment
134
+ paid: ['paid', 'plaćen', 'plaćena', 'plaćeno', 'placen', 'placeno',
135
+ 'pagado', 'pagada', 'payé', 'payée', 'pago', 'paga',
136
+ 'оплачено', 'भुगतान किया', 'مدفوع', 'পরিশোধিত', '已支付', '支払済み'],
137
+ unpaid: ['unpaid', 'neplaćen', 'neplaćena', 'neplaćeno', 'neplacen',
138
+ 'impago', 'impagado', 'non payé', 'não pago',
139
+ 'не оплачено', 'अवैतनिक', 'غير مدفوع', 'অপরিশোধিত', '未支付', '未払い'],
140
+ overdue: ['overdue', 'zakasnelo', 'kasni', 'dospelo', 'dospela',
141
+ 'vencido', 'vencida', 'en retard', 'atrasado', 'atrasada',
142
+ 'просрочено', 'अतिदेय', 'متأخر', 'বকেয়া', '逾期', '期限超過'],
143
+ // General
144
+ enabled: ['enabled', 'uključen', 'uključena', 'ukljucen', 'ukljucena',
145
+ 'habilitado', 'habilitada', 'activé', 'activée', 'ativado', 'ativada',
146
+ 'включён', 'включена', 'सक्षम', 'مفعل', 'সক্রিয়', '已启用', '有効'],
147
+ disabled: ['disabled', 'isključen', 'isključena', 'iskljucen', 'iskljucena',
148
+ 'deshabilitado', 'deshabilitada', 'désactivé', 'désactivée', 'desativado', 'desativada',
149
+ 'отключён', 'отключена', 'अक्षम', 'معطل', 'নিষ্ক্রিয়', '已禁用', '無効'],
150
+ available: ['available', 'dostupan', 'dostupna', 'dostupno',
151
+ 'disponible', 'disponible', 'disponível',
152
+ 'доступен', 'доступна', 'उपलब्ध', 'متاح', 'উপলব্ধ', '可用', '利用可能'],
153
+ unavailable: ['unavailable', 'nedostupan', 'nedostupna', 'nedostupno',
154
+ 'no disponible', 'indisponible', 'indisponível',
155
+ 'недоступен', 'недоступна', 'अनुपलब्ध', 'غير متاح', 'অনুপলব্ধ', '不可用', '利用不可'],
156
+ success: ['success', 'uspešno', 'uspesno', 'uspeh',
157
+ 'éxito', 'succès', 'sucesso',
158
+ 'успех', 'успешно', 'सफलता', 'نجاح', 'সাফল্য', '成功', '成功'],
159
+ failed: ['failed', 'neuspešno', 'neuspesno', 'neuspeo', 'neuspela', 'greška', 'greska',
160
+ 'fallido', 'fallida', 'échoué', 'falhou',
161
+ 'неудача', 'провал', 'विफल', 'فشل', 'ব্যর্থ', '失败', '失敗'],
162
+ error: ['error', 'greška', 'greska',
163
+ 'error', 'erreur', 'erro',
164
+ 'ошибка', 'त्रुटि', 'خطأ', 'ত্রুটি', '错误', 'エラー'],
165
+ };
166
+ /**
167
+ * Semantic opposites: maps each canonical enum value to a set of values
168
+ * that can meaningfully contradict it. Without this, findContradiction
169
+ * scans ALL enum values, causing false positives (e.g. "da" matching
170
+ * for "active" status fields in Serbian text).
171
+ */
172
+ const ENUM_OPPOSITES = {
173
+ approved: ['rejected', 'pending', 'cancelled'],
174
+ rejected: ['approved', 'pending'],
175
+ pending: ['approved', 'rejected', 'completed', 'cancelled'],
176
+ active: ['inactive'],
177
+ inactive: ['active'],
178
+ completed: ['pending', 'cancelled', 'open'],
179
+ cancelled: ['pending', 'completed'],
180
+ open: ['closed', 'completed'],
181
+ closed: ['open'],
182
+ present: ['absent', 'on_leave', 'sick_leave', 'late'],
183
+ absent: ['present'],
184
+ on_leave: ['present'],
185
+ sick_leave: ['present'],
186
+ late: ['present'],
187
+ yes: ['no'],
188
+ no: ['yes'],
189
+ paid: ['unpaid', 'overdue'],
190
+ unpaid: ['paid'],
191
+ overdue: ['paid'],
192
+ enabled: ['disabled'],
193
+ disabled: ['enabled'],
194
+ available: ['unavailable'],
195
+ unavailable: ['available'],
196
+ success: ['failed', 'error'],
197
+ failed: ['success'],
198
+ error: ['success'],
199
+ high: ['medium', 'low'],
200
+ medium: ['high', 'low', 'critical'],
201
+ low: ['high', 'medium', 'critical'],
202
+ critical: ['low', 'medium'],
203
+ };
204
+ /**
205
+ * Words that are common function words in supported languages and should
206
+ * NOT be matched as standalone enum/boolean synonyms in running text.
207
+ * They are only valid when the response is essentially just that word.
208
+ */
209
+ const AMBIGUOUS_SHORT_WORDS = new Set([
210
+ // Latin short words that collide with function words
211
+ 'da', 'ne', 'on', 'an', 'no', 'non', 'sim',
212
+ // Cyrillic — "да" is a conjunction in Russian, not just "yes"
213
+ 'да',
214
+ // CJK single characters — extremely common in running text
215
+ '是', '否', '高', '中', '低',
216
+ // Japanese kana that double as particles/prefixes
217
+ 'に', 'ご',
218
+ // Arabic — "لا" is ubiquitous negation particle
219
+ 'لا',
220
+ // Bengali — "না" is common negation
221
+ 'না',
222
+ ]);
223
+ /** Build a reverse-lookup: synonym → canonical value. */
224
+ function buildReverseLookup() {
225
+ const map = new Map();
226
+ for (const [canonical, synonyms] of Object.entries(ENUM_TRANSLATIONS)) {
227
+ for (const syn of synonyms) {
228
+ map.set(syn.toLowerCase(), canonical);
229
+ }
230
+ }
231
+ return map;
232
+ }
233
+ const ENUM_REVERSE = buildReverseLookup();
234
+ // ---------------------------------------------------------------------------
235
+ // L2 Claim Extraction
236
+ // ---------------------------------------------------------------------------
237
+ /** Fields likely to be boolean based on their name. */
238
+ const BOOLEAN_FIELD_PATTERNS = [
239
+ /^is_/i, /^has_/i, /^can_/i, /^should_/i, /^was_/i, /^did_/i,
240
+ /^enabled$/i, /^disabled$/i, /^active$/i, /^visible$/i, /^archived$/i,
241
+ /^deleted$/i, /^verified$/i, /^confirmed$/i, /^available$/i,
242
+ ];
243
+ /** Fields likely to be enum/status based on their name. */
244
+ const ENUM_FIELD_PATTERNS = [
245
+ /status/i, /state/i, /type/i, /priority/i, /level/i,
246
+ /category/i, /role/i, /phase/i, /stage/i, /mode/i,
247
+ /result/i, /outcome/i, /decision/i, /reason/i,
248
+ ];
249
+ /** Maximum recursion depth for JSON walkers (prevents stack overflow on deep/circular data). */
250
+ const MAX_WALK_DEPTH = 50;
251
+ /**
252
+ * Detect if a tool output looks like a DB schema description
253
+ * (contains "table"/"columns" keys typically returned by describe_database).
254
+ * These should NOT be treated as data for claim extraction.
255
+ */
256
+ function isSchemaOutput(output) {
257
+ if (output === null || typeof output !== 'object' || Array.isArray(output))
258
+ return false;
259
+ const obj = output;
260
+ // describe_database typically returns { table: "...", columns: [...] }
261
+ return (typeof obj['table'] === 'string' || typeof obj['tables'] === 'object') &&
262
+ (Array.isArray(obj['columns']) || typeof obj['columns'] === 'object');
263
+ }
264
+ /**
265
+ * Walk tool output JSON and extract boolean/enum fields with their context.
266
+ */
267
+ function extractStructuredFields(trace) {
268
+ const fields = [];
269
+ const steps = Trace_1.TraceUtils.getToolOutputSteps(trace);
270
+ for (const step of steps) {
271
+ for (const to of step.toolOutputs ?? []) {
272
+ if (isSchemaOutput(to.output))
273
+ continue;
274
+ walkOutput(to.output, step.stepId, '', fields);
275
+ }
276
+ }
277
+ return fields;
278
+ }
279
+ function walkOutput(value, stepId, path, out, depth = 0) {
280
+ if (value === null || value === undefined)
281
+ return;
282
+ if (depth > MAX_WALK_DEPTH)
283
+ return;
284
+ if (typeof value === 'boolean') {
285
+ out.push({ fieldName: path, value, stepId });
286
+ }
287
+ else if (typeof value === 'string' && path) {
288
+ // Check if this field name suggests boolean or enum
289
+ const lastKey = path.split('.').pop() ?? path;
290
+ const isLikelyBoolean = BOOLEAN_FIELD_PATTERNS.some((p) => p.test(lastKey));
291
+ const isLikelyEnum = ENUM_FIELD_PATTERNS.some((p) => p.test(lastKey));
292
+ if (isLikelyBoolean || isLikelyEnum || ENUM_REVERSE.has(value.toLowerCase())) {
293
+ out.push({ fieldName: path, value, stepId });
294
+ }
295
+ }
296
+ else if (typeof value === 'number') {
297
+ // Skip — numbers are handled by L1
298
+ }
299
+ else if (Array.isArray(value)) {
300
+ value.forEach((item, i) => walkOutput(item, stepId, path ? `${path}[${i}]` : `[${i}]`, out, depth + 1));
301
+ }
302
+ else if (typeof value === 'object') {
303
+ for (const [k, v] of Object.entries(value)) {
304
+ walkOutput(v, stepId, path ? `${path}.${k}` : k, out, depth + 1);
305
+ }
306
+ }
307
+ }
308
+ /**
309
+ * Extract L2 structured context claims by comparing tool output
310
+ * boolean/enum fields against the response text.
311
+ *
312
+ * Returns claims where the response text contains a natural-language
313
+ * equivalent of a structured field value.
314
+ */
315
+ function extractStructuredClaims(trace, responseText, sourceStepId) {
316
+ if (!responseText || !trace)
317
+ return [];
318
+ const fields = extractStructuredFields(trace);
319
+ const claims = [];
320
+ const responseLower = responseText.toLowerCase();
321
+ const source = {
322
+ stepId: sourceStepId ?? 'final_response',
323
+ role: 'final_response',
324
+ rawText: responseText,
325
+ };
326
+ for (const field of fields) {
327
+ if (typeof field.value === 'boolean') {
328
+ // Boolean field — look for true/false synonyms in response
329
+ const boolKey = field.value ? 'true' : 'false';
330
+ const oppositeKey = field.value ? 'false' : 'true';
331
+ const matchedSynonym = findSynonymInText(responseLower, BOOLEAN_SYNONYMS[boolKey]);
332
+ const matchedOpposite = findSynonymInText(responseLower, BOOLEAN_SYNONYMS[oppositeKey]);
333
+ if (matchedSynonym) {
334
+ claims.push({
335
+ claimId: (0, crypto_1.randomUUID)(),
336
+ type: 'boolean',
337
+ value: field.value ? 'true' : 'false',
338
+ rawText: matchedSynonym,
339
+ source,
340
+ fieldName: field.fieldName,
341
+ sourceFieldValue: field.value,
342
+ });
343
+ }
344
+ else if (matchedOpposite) {
345
+ // Response says the opposite of tool output — still a claim (UNGROUNDED)
346
+ claims.push({
347
+ claimId: (0, crypto_1.randomUUID)(),
348
+ type: 'boolean',
349
+ value: field.value ? 'false' : 'true', // opposite of source
350
+ rawText: matchedOpposite,
351
+ source,
352
+ fieldName: field.fieldName,
353
+ sourceFieldValue: field.value,
354
+ });
355
+ }
356
+ }
357
+ else if (typeof field.value === 'string') {
358
+ // Enum/status field — look for value or its translations
359
+ const canonical = ENUM_REVERSE.get(field.value.toLowerCase()) ?? field.value.toLowerCase();
360
+ const synonyms = ENUM_TRANSLATIONS[canonical] ?? [field.value.toLowerCase()];
361
+ const matchedSynonym = findSynonymInText(responseLower, synonyms);
362
+ if (matchedSynonym) {
363
+ // Require structural proximity: the matched synonym should appear near
364
+ // a field-name keyword (e.g., "status: late") or be a standalone assertion,
365
+ // not just a verb in flowing prose (e.g., "kasni na posao" = "is late to work").
366
+ const synPos = responseLower.indexOf(matchedSynonym.toLowerCase());
367
+ const cleanKey = (field.fieldName.split('.').pop() ?? '').replace(/\[\d+\]$/, '').toLowerCase();
368
+ let hasFieldProximity = false;
369
+ if (synPos >= 0 && cleanKey.length >= 2) {
370
+ // Check if the field name keyword appears within 40 chars of the synonym
371
+ const nearbyStart = Math.max(0, synPos - 40);
372
+ const nearbyEnd = Math.min(responseLower.length, synPos + matchedSynonym.length + 40);
373
+ const nearbyText = responseLower.substring(nearbyStart, nearbyEnd);
374
+ hasFieldProximity = nearbyText.includes(cleanKey);
375
+ }
376
+ // Also accept: if the field name keyword appears ANYWHERE in the response
377
+ // OR if the synonym is a multi-word phrase (more specific, less ambiguous)
378
+ // OR if the response is short (focused answer, not flowing prose)
379
+ const fieldMentioned = cleanKey.length >= 3 && responseLower.includes(cleanKey);
380
+ const isMultiWord = matchedSynonym.includes(' ');
381
+ const isShortResponse = responseLower.length <= 120;
382
+ if (hasFieldProximity || fieldMentioned || isMultiWord || isShortResponse) {
383
+ claims.push({
384
+ claimId: (0, crypto_1.randomUUID)(),
385
+ type: 'enum',
386
+ value: field.value, // original tool output value
387
+ rawText: matchedSynonym,
388
+ source,
389
+ fieldName: field.fieldName,
390
+ sourceFieldValue: field.value,
391
+ });
392
+ }
393
+ }
394
+ else {
395
+ // Check if response mentions a DIFFERENT value for this field type
396
+ // e.g., tool says "approved" but response says "odbijen" (rejected)
397
+ const contradictionMatch = findContradiction(responseLower, canonical);
398
+ if (contradictionMatch) {
399
+ claims.push({
400
+ claimId: (0, crypto_1.randomUUID)(),
401
+ type: 'enum',
402
+ value: contradictionMatch.foundCanonical, // what response actually says
403
+ rawText: contradictionMatch.foundSynonym,
404
+ source,
405
+ fieldName: field.fieldName,
406
+ sourceFieldValue: field.value,
407
+ });
408
+ }
409
+ }
410
+ }
411
+ }
412
+ return deduplicateClaims(claims);
413
+ }
414
+ /**
415
+ * Match a boolean claim against the tool output value.
416
+ */
417
+ function matchBoolean(claimedValue, // 'true' or 'false'
418
+ sourceValue) {
419
+ if (typeof sourceValue !== 'boolean') {
420
+ // String booleans: "true"/"false", "yes"/"no"
421
+ if (typeof sourceValue === 'string') {
422
+ const lower = sourceValue.toLowerCase();
423
+ if (lower === 'true' || lower === 'yes') {
424
+ sourceValue = true;
425
+ }
426
+ else if (lower === 'false' || lower === 'no') {
427
+ sourceValue = false;
428
+ }
429
+ else {
430
+ return { matched: false, explanation: `Source value "${sourceValue}" is not a boolean.` };
431
+ }
432
+ }
433
+ else {
434
+ return { matched: false, explanation: `Source value is not a boolean (type: ${typeof sourceValue}).` };
435
+ }
436
+ }
437
+ const claimedBool = claimedValue === 'true';
438
+ if (claimedBool === sourceValue) {
439
+ return { matched: true, explanation: `Boolean claim "${claimedValue}" matches source value ${sourceValue}.` };
440
+ }
441
+ return {
442
+ matched: false,
443
+ explanation: `Boolean contradiction: response indicates "${claimedValue}" but source is ${sourceValue}.`,
444
+ };
445
+ }
446
+ /**
447
+ * Match an enum claim against the tool output value.
448
+ * Uses the translation table for cross-language matching.
449
+ */
450
+ function matchEnum(claimedValue, sourceValue) {
451
+ if (typeof sourceValue !== 'string') {
452
+ return { matched: false, explanation: `Source value is not a string (type: ${typeof sourceValue}).` };
453
+ }
454
+ // Direct match
455
+ if (claimedValue.toLowerCase() === sourceValue.toLowerCase()) {
456
+ return { matched: true, explanation: `Enum claim "${claimedValue}" exactly matches source "${sourceValue}".` };
457
+ }
458
+ // Translation table match: both resolve to the same canonical value
459
+ const claimCanonical = ENUM_REVERSE.get(claimedValue.toLowerCase()) ?? claimedValue.toLowerCase();
460
+ const sourceCanonical = ENUM_REVERSE.get(sourceValue.toLowerCase()) ?? sourceValue.toLowerCase();
461
+ if (claimCanonical === sourceCanonical) {
462
+ return {
463
+ matched: true,
464
+ explanation: `Enum claim "${claimedValue}" matches source "${sourceValue}" via translation (both → "${claimCanonical}").`,
465
+ };
466
+ }
467
+ return {
468
+ matched: false,
469
+ explanation: `Enum mismatch: response says "${claimedValue}" (→ ${claimCanonical}) but source is "${sourceValue}" (→ ${sourceCanonical}).`,
470
+ };
471
+ }
472
+ // ---------------------------------------------------------------------------
473
+ // Public API for extending translation tables
474
+ // ---------------------------------------------------------------------------
475
+ /**
476
+ * Add custom enum translations for domain-specific values.
477
+ * @param translations Map of canonical_value → synonym array
478
+ */
479
+ function addEnumTranslations(translations) {
480
+ for (const [canonical, synonyms] of Object.entries(translations)) {
481
+ ENUM_TRANSLATIONS[canonical] = [
482
+ ...(ENUM_TRANSLATIONS[canonical] ?? []),
483
+ ...synonyms,
484
+ ];
485
+ for (const syn of synonyms) {
486
+ ENUM_REVERSE.set(syn.toLowerCase(), canonical);
487
+ }
488
+ }
489
+ }
490
+ /**
491
+ * Get a copy of the current enum translation table (for testing/inspection).
492
+ */
493
+ function getEnumTranslations() {
494
+ return { ...ENUM_TRANSLATIONS };
495
+ }
496
+ // =========================================================================
497
+ // L2 — LIST ITEMS (Array Set Verification)
498
+ // =========================================================================
499
+ /** Field names that typically hold a displayable item label. */
500
+ const LABEL_FIELD_PRIORITY = [
501
+ 'name', 'full_name', 'fullName', 'display_name', 'displayName',
502
+ 'title', 'label', 'description', 'email', 'username', 'id',
503
+ ];
504
+ /** Check if a string looks like an ISO date (YYYY-MM-DD). */
505
+ function isDateLikeKey(key) {
506
+ return /^\d{4}-\d{2}-\d{2}$/.test(key);
507
+ }
508
+ /**
509
+ * Parse a period string (from TraceUtils.inferPeriod) into concrete start/end dates.
510
+ * Returns null if the period cannot be parsed.
511
+ */
512
+ function parsePeriodBounds(period) {
513
+ if (!period)
514
+ return null;
515
+ // Range format: "2026-03-01/2026-03-31"
516
+ if (period.includes('/')) {
517
+ const [start, end] = period.split('/');
518
+ if (/^\d{4}-\d{2}-\d{2}$/.test(start) && /^\d{4}-\d{2}-\d{2}$/.test(end)) {
519
+ return { start, end };
520
+ }
521
+ }
522
+ // Single date: "2026-03-02"
523
+ if (/^\d{4}-\d{2}-\d{2}$/.test(period)) {
524
+ return { start: period, end: period };
525
+ }
526
+ return null;
527
+ }
528
+ /**
529
+ * Walk all tool outputs to find arrays of objects and extract their display labels.
530
+ * After basic extraction, flat arrays whose items have a date field are split
531
+ * into date-based sub-groups so that sibling-scope filtering can treat each
532
+ * date slice independently (same logic as keyed containers).
533
+ */
534
+ function extractArrayGroups(trace, period) {
535
+ const groups = [];
536
+ const steps = Trace_1.TraceUtils.getToolOutputSteps(trace);
537
+ const periodBounds = parsePeriodBounds(period);
538
+ for (const step of steps) {
539
+ for (const to of step.toolOutputs ?? []) {
540
+ if (isSchemaOutput(to.output))
541
+ continue;
542
+ walkForArrays(to.output, step.stepId, '', groups, 0, periodBounds);
543
+ }
544
+ }
545
+ // Post-process: split flat arrays by date field when items contain one.
546
+ // E.g. [{date:"2026-03-02",name:"Vladimir"},{date:"2026-03-14",name:"Dejan"}]
547
+ // → two sub-groups: path.date=2026-03-02 and path.date=2026-03-14
548
+ return splitDateFieldGroups(groups, trace);
549
+ }
550
+ const DATE_FIELD_NAMES = ['date', 'datum', 'day', 'dan', 'event_date', 'created_at', 'timestamp'];
551
+ const DATE_VALUE_RE = /^\d{4}-\d{2}-\d{2}/;
552
+ /**
553
+ * For flat arrays whose items have a date field, split the single ArrayGroup
554
+ * into per-date sub-groups. This allows the sibling-scope machinery to treat
555
+ * each date slice independently.
556
+ *
557
+ * Example: array at path "anomalies" with items spanning 3 dates →
558
+ * anomalies.date=2026-03-02, anomalies.date=2026-03-14, anomalies.date=2026-03-15
559
+ */
560
+ function splitDateFieldGroups(groups, trace) {
561
+ const toolSteps = Trace_1.TraceUtils.getToolOutputSteps(trace);
562
+ const result = [];
563
+ for (const group of groups) {
564
+ // Find the raw array from tool output to inspect its items' date fields
565
+ const rawArray = findRawArray(group, toolSteps);
566
+ if (!rawArray || rawArray.length < 2) {
567
+ result.push(group);
568
+ continue;
569
+ }
570
+ // Check if items have a date field
571
+ const dateField = DATE_FIELD_NAMES.find((f) => {
572
+ const sample = rawArray[0];
573
+ return typeof sample[f] === 'string' && DATE_VALUE_RE.test(sample[f]);
574
+ });
575
+ if (!dateField) {
576
+ result.push(group);
577
+ continue;
578
+ }
579
+ // Group items by date value
580
+ const byDate = new Map();
581
+ for (let i = 0; i < rawArray.length; i++) {
582
+ const item = rawArray[i];
583
+ const dateVal = item[dateField];
584
+ if (!dateVal)
585
+ continue;
586
+ const dateKey = dateVal.substring(0, 10); // YYYY-MM-DD
587
+ const label = group.labels[i];
588
+ if (!label)
589
+ continue;
590
+ if (!byDate.has(dateKey))
591
+ byDate.set(dateKey, []);
592
+ byDate.get(dateKey).push(label);
593
+ }
594
+ // Only split if there are multiple date values (otherwise no benefit)
595
+ if (byDate.size < 2) {
596
+ result.push(group);
597
+ continue;
598
+ }
599
+ for (const [dateKey, labels] of byDate) {
600
+ result.push({
601
+ arrayPath: `${group.arrayPath}.${dateField}=${dateKey}`,
602
+ labels,
603
+ stepId: group.stepId,
604
+ });
605
+ }
606
+ }
607
+ return result;
608
+ }
609
+ /**
610
+ * Find the raw array in tool outputs that corresponds to an ArrayGroup.
611
+ */
612
+ function findRawArray(group, steps) {
613
+ for (const step of steps) {
614
+ if (step.stepId !== group.stepId)
615
+ continue;
616
+ for (const to of step.toolOutputs ?? []) {
617
+ const found = resolveArrayPath(to.output, group.arrayPath);
618
+ if (found)
619
+ return found;
620
+ }
621
+ }
622
+ return null;
623
+ }
624
+ function resolveArrayPath(root, path) {
625
+ if (!path || path === 'root') {
626
+ return Array.isArray(root) ? root : null;
627
+ }
628
+ const segments = path.split('.');
629
+ let current = root;
630
+ for (const seg of segments) {
631
+ if (current === null || current === undefined)
632
+ return null;
633
+ if (typeof current !== 'object')
634
+ return null;
635
+ current = current[seg];
636
+ }
637
+ return Array.isArray(current) ? current : null;
638
+ }
639
+ function walkForArrays(value, stepId, path, out, depth = 0, periodBounds) {
640
+ if (value === null || value === undefined)
641
+ return;
642
+ if (depth > MAX_WALK_DEPTH)
643
+ return;
644
+ if (Array.isArray(value) && value.length > 0 && typeof value[0] === 'object' && value[0] !== null) {
645
+ // Array of objects — extract labels
646
+ const labels = [];
647
+ for (const item of value) {
648
+ if (typeof item !== 'object' || item === null)
649
+ continue;
650
+ const obj = item;
651
+ const label = pickLabel(obj);
652
+ if (label)
653
+ labels.push(label);
654
+ }
655
+ if (labels.length > 0) {
656
+ out.push({ arrayPath: path || 'root', labels, stepId });
657
+ }
658
+ // Also recurse into each item for nested arrays
659
+ value.forEach((item, i) => walkForArrays(item, stepId, path ? `${path}[${i}]` : `[${i}]`, out, depth + 1, periodBounds));
660
+ }
661
+ else if (typeof value === 'object' && !Array.isArray(value)) {
662
+ const entries = Object.entries(value);
663
+ // Detect date-keyed containers (e.g. by_date: { "2026-03-02": {...}, "2026-03-15": {...} })
664
+ // When a period is known, only descend into date keys within that period.
665
+ const dateKeyCount = entries.filter(([k]) => isDateLikeKey(k)).length;
666
+ const isDateKeyedContainer = dateKeyCount > 0 && dateKeyCount >= entries.length * 0.5;
667
+ for (const [k, v] of entries) {
668
+ if (isDateKeyedContainer && isDateLikeKey(k) && periodBounds) {
669
+ // Skip date keys outside the requested period
670
+ if (k < periodBounds.start || k > periodBounds.end)
671
+ continue;
672
+ }
673
+ walkForArrays(v, stepId, path ? `${path}.${k}` : k, out, depth + 1, periodBounds);
674
+ }
675
+ }
676
+ }
677
+ /** Pick the best display label from an object by field priority. */
678
+ function pickLabel(obj) {
679
+ for (const field of LABEL_FIELD_PRIORITY) {
680
+ if (typeof obj[field] === 'string' && obj[field])
681
+ return obj[field];
682
+ }
683
+ // Fallback: first string field
684
+ for (const val of Object.values(obj)) {
685
+ if (typeof val === 'string' && val.length > 0 && val.length < 100)
686
+ return val;
687
+ }
688
+ return null;
689
+ }
690
+ /**
691
+ * Extract list_items claims: for each array in tool output, check which
692
+ * items the response mentions and which it omits.
693
+ *
694
+ * Sibling-scope relevance filtering: when tool output contains a keyed
695
+ * container (e.g. by_date, by_employee, by_location — any `parent.<dynamic_key>.leaf`
696
+ * pattern), the user typically asks about a subset. We detect "sibling families"
697
+ * — groups whose paths differ in exactly one segment — and classify each group's
698
+ * activation strength:
699
+ * strong — ≥2 labels mentioned in the response
700
+ * weak — exactly 1 label mentioned
701
+ * dormant — 0 labels mentioned
702
+ *
703
+ * When at least one sibling is strong/weak, dormant siblings are suppressed.
704
+ * When only weak siblings exist among active ones, they are marked `weak_scope`.
705
+ * When ALL siblings are dormant, everything is kept (possible data_ignored).
706
+ */
707
+ function extractListItemsClaims(trace, responseText, sourceStepId) {
708
+ if (!responseText || !trace)
709
+ return [];
710
+ const period = Trace_1.TraceUtils.inferPeriod(trace);
711
+ const groups = extractArrayGroups(trace, period);
712
+ const claims = [];
713
+ const responseLower = responseText.toLowerCase();
714
+ const source = {
715
+ stepId: sourceStepId ?? 'final_response',
716
+ role: 'final_response',
717
+ rawText: responseText,
718
+ };
719
+ // -----------------------------------------------------------------------
720
+ // Sibling-scope relevance filtering
721
+ // -----------------------------------------------------------------------
722
+ const siblingFamilies = groupSiblingFamilies(groups);
723
+ // Map: arrayPath → { scope, familyPattern }
724
+ const scopeMap = new Map();
725
+ for (const family of siblingFamilies) {
726
+ if (family.groups.length < 2)
727
+ continue; // Not a true family
728
+ const activations = [];
729
+ for (const group of family.groups) {
730
+ const mentionCount = group.labels.filter((label) => isLabelMentioned(responseLower, label)).length;
731
+ const strength = mentionCount >= 2 ? 'strong' :
732
+ mentionCount === 1 ? 'weak' :
733
+ 'dormant';
734
+ activations.push({ group, strength, mentionCount });
735
+ }
736
+ const hasStrong = activations.some((a) => a.strength === 'strong');
737
+ const hasWeak = activations.some((a) => a.strength === 'weak');
738
+ const allDormant = activations.every((a) => a.strength === 'dormant');
739
+ if (allDormant) {
740
+ // All dormant → keep everything (possible data_ignored)
741
+ for (const a of activations) {
742
+ scopeMap.set(a.group.arrayPath, { scope: 'in_scope', familyPattern: family.pattern });
743
+ }
744
+ }
745
+ else {
746
+ // At least one active sibling exists
747
+ for (const a of activations) {
748
+ if (a.strength === 'strong') {
749
+ scopeMap.set(a.group.arrayPath, { scope: 'in_scope', familyPattern: family.pattern });
750
+ }
751
+ else if (a.strength === 'weak') {
752
+ // Weak activation: if there's also a strong sibling, this is weak_scope;
753
+ // if this is the strongest signal, it's in_scope
754
+ const scope = hasStrong ? 'weak_scope' : 'in_scope';
755
+ scopeMap.set(a.group.arrayPath, { scope, familyPattern: family.pattern });
756
+ }
757
+ else {
758
+ // Dormant: suppress when any sibling is active
759
+ scopeMap.set(a.group.arrayPath, { scope: 'suppressed', familyPattern: family.pattern });
760
+ }
761
+ }
762
+ }
763
+ }
764
+ for (const group of groups) {
765
+ if (group.labels.length < 2)
766
+ continue; // Single items don't need set verification
767
+ const scopeInfo = scopeMap.get(group.arrayPath);
768
+ // Hard-skip suppressed siblings (no claims generated)
769
+ if (scopeInfo?.scope === 'suppressed')
770
+ continue;
771
+ const mentioned = [];
772
+ const missing = [];
773
+ for (const label of group.labels) {
774
+ if (isLabelMentioned(responseLower, label)) {
775
+ mentioned.push(label);
776
+ }
777
+ else {
778
+ missing.push(label);
779
+ }
780
+ }
781
+ // Only create a claim if the response mentions at least one item (shows it's discussing the list)
782
+ if (mentioned.length > 0) {
783
+ const claim = {
784
+ claimId: (0, crypto_1.randomUUID)(),
785
+ type: 'list_items',
786
+ value: mentioned.join(', '),
787
+ rawText: responseText.substring(0, 200),
788
+ source,
789
+ fieldName: group.arrayPath,
790
+ sourceFieldValue: group.labels,
791
+ expectedItems: group.labels,
792
+ mentionedItems: mentioned,
793
+ };
794
+ // Attach sibling-scope metadata when applicable
795
+ if (scopeInfo) {
796
+ claim.siblingScope = scopeInfo.scope;
797
+ claim.siblingFamilyPattern = scopeInfo.familyPattern;
798
+ }
799
+ claims.push(claim);
800
+ }
801
+ }
802
+ return claims;
803
+ }
804
+ /**
805
+ * Detect sibling families: groups of ArrayGroups whose paths differ in exactly
806
+ * one segment (the dynamic key). Works for ANY keyed container pattern:
807
+ * by_date.2026-03-02.people → by_date.*.people
808
+ * by_employee.123.sessions → by_employee.*.sessions
809
+ * by_location.NS-01.people → by_location.*.people
810
+ * weeks.2026-W10.items → weeks.*.items
811
+ *
812
+ * Algorithm: for each group, try wildcarding each path segment; groups that
813
+ * map to the same pattern form a family. Each group is assigned to at most
814
+ * one family (largest match wins).
815
+ */
816
+ function groupSiblingFamilies(groups) {
817
+ // Build candidate families: pattern → { segmentIdx, arrayPaths }
818
+ const candidateMap = new Map();
819
+ for (const group of groups) {
820
+ const segments = group.arrayPath.split('.');
821
+ for (let i = 0; i < segments.length; i++) {
822
+ // Skip array index segments (e.g. "[0]", "items[2]")
823
+ if (/\[\d+\]/.test(segments[i]))
824
+ continue;
825
+ // Guard: don't wildcard the first or last segment of a multi-segment path.
826
+ // Wildcarding segment 0 would merge semantically different root containers
827
+ // (e.g. "summary.by_loc.NS.people" + "details.by_loc.NS.people" → "*.by_loc.NS.people").
828
+ // Wildcarding the leaf would merge structurally different array types.
829
+ // Only interior segments (dynamic keys between a container prefix and a leaf) are valid.
830
+ if (segments.length >= 3 && (i === 0 || i === segments.length - 1))
831
+ continue;
832
+ const pattern = segments.map((s, j) => (j === i ? '*' : s)).join('.');
833
+ let entry = candidateMap.get(pattern);
834
+ if (!entry) {
835
+ entry = { segmentIdx: i, paths: new Set() };
836
+ candidateMap.set(pattern, entry);
837
+ }
838
+ entry.paths.add(group.arrayPath);
839
+ }
840
+ }
841
+ // Keep only patterns with 2+ distinct members → real families
842
+ const candidates = [...candidateMap.entries()]
843
+ .filter(([, v]) => v.paths.size >= 2)
844
+ .sort(([, a], [, b]) => b.paths.size - a.paths.size); // prefer larger families
845
+ // Assign each group to at most one family (largest first → greedy)
846
+ const assigned = new Set();
847
+ const groupMap = new Map(groups.map((g) => [g.arrayPath, g]));
848
+ const families = [];
849
+ for (const [pattern, { segmentIdx }] of candidates) {
850
+ const members = [];
851
+ for (const path of candidateMap.get(pattern).paths) {
852
+ if (assigned.has(path))
853
+ continue;
854
+ const g = groupMap.get(path);
855
+ if (g)
856
+ members.push(g);
857
+ }
858
+ if (members.length < 2)
859
+ continue;
860
+ families.push({ pattern, segmentIdx, groups: members });
861
+ for (const m of members)
862
+ assigned.add(m.arrayPath);
863
+ }
864
+ return families;
865
+ }
866
+ /** Check if a label appears in the response (supports multi-word and partial name matching). */
867
+ function isLabelMentioned(responseLower, label) {
868
+ const labelLower = label.toLowerCase();
869
+ // Direct presence
870
+ if (responseLower.includes(labelLower))
871
+ return true;
872
+ // For multi-word names, check if the last name or first name alone appears
873
+ const parts = labelLower.split(/\s+/);
874
+ if (parts.length >= 2) {
875
+ // Check last name (more unique) — e.g. "Jovic" from "Ana Jovic"
876
+ if (parts[parts.length - 1].length >= 3) {
877
+ const re = new RegExp(`\\b${escapeRegex(parts[parts.length - 1])}\\b`, 'i');
878
+ if (re.test(responseLower))
879
+ return true;
880
+ }
881
+ }
882
+ return false;
883
+ }
884
+ /**
885
+ * Match a list_items claim: verify completeness of list mention.
886
+ */
887
+ function matchListItems(expectedItems, mentionedItems) {
888
+ if (expectedItems.length === 0) {
889
+ return { matched: true, explanation: 'Empty list — nothing to verify.' };
890
+ }
891
+ // Use diacritics-insensitive comparison for name lists
892
+ const norm = (s) => s.normalize('NFD').replace(/[\u0300-\u036f]/g, '').toLowerCase().trim();
893
+ const missing = expectedItems.filter((e) => !mentionedItems.some((m) => norm(m) === norm(e)));
894
+ if (missing.length === 0) {
895
+ return {
896
+ matched: true,
897
+ explanation: `All ${expectedItems.length} items mentioned in response: ${expectedItems.join(', ')}.`,
898
+ };
899
+ }
900
+ // High-coverage partial match: if ≥90% of items are mentioned, treat as GROUNDED.
901
+ // This handles cases where 16/17 names are listed but one has a minor spelling difference.
902
+ const matched = expectedItems.length - missing.length;
903
+ const coverage = matched / expectedItems.length;
904
+ if (coverage >= 0.9 && missing.length <= 2) {
905
+ return {
906
+ matched: true,
907
+ explanation: `Near-complete list: ${matched}/${expectedItems.length} items mentioned (${(coverage * 100).toFixed(0)}%). Minor omission: ${missing.join(', ')}.`,
908
+ };
909
+ }
910
+ return {
911
+ matched: false,
912
+ explanation: `Incomplete list: ${matched}/${expectedItems.length} items mentioned. Missing: ${missing.join(', ')}. Coverage: ${(coverage * 100).toFixed(0)}%.`,
913
+ };
914
+ }
915
+ // =========================================================================
916
+ // L2 — KEY-VALUE Pair Matching
917
+ // =========================================================================
918
+ /** Patterns for field names that hold identifiers or contact info. */
919
+ const KEY_VALUE_FIELD_PATTERNS = [
920
+ /email/i, /e_mail/i, /phone/i, /tel/i, /mobile/i, /fax/i,
921
+ /id$/i, /^id_/i, /code/i, /iban/i, /account/i, /number$/i,
922
+ /url/i, /link/i, /website/i, /address/i,
923
+ /jmbg/i, /pib/i, /matični/i, /maticni/i, /mbr/i,
924
+ /license/i, /registration/i,
925
+ ];
926
+ /**
927
+ * Walk tool outputs and extract key-value pairs that are identifiers,
928
+ * contact info, or codes (not already covered by L1 name/number matching).
929
+ */
930
+ function extractKeyValueFields(trace) {
931
+ const fields = [];
932
+ const steps = Trace_1.TraceUtils.getToolOutputSteps(trace);
933
+ for (const step of steps) {
934
+ for (const to of step.toolOutputs ?? []) {
935
+ if (isSchemaOutput(to.output))
936
+ continue;
937
+ walkForKeyValues(to.output, step.stepId, '', fields);
938
+ }
939
+ }
940
+ return fields;
941
+ }
942
+ function walkForKeyValues(value, stepId, path, out, depth = 0) {
943
+ if (value === null || value === undefined)
944
+ return;
945
+ if (depth > MAX_WALK_DEPTH)
946
+ return;
947
+ if (typeof value === 'string' && path) {
948
+ const lastKey = (path.split('.').pop() ?? path).replace(/\[\d+\]$/, '');
949
+ const isKeyValue = KEY_VALUE_FIELD_PATTERNS.some((p) => p.test(lastKey));
950
+ if (isKeyValue && value.length > 0 && value.length < 200) {
951
+ out.push({ fieldName: path, key: lastKey, value, stepId });
952
+ }
953
+ }
954
+ else if (Array.isArray(value)) {
955
+ value.forEach((item, i) => walkForKeyValues(item, stepId, path ? `${path}[${i}]` : `[${i}]`, out, depth + 1));
956
+ }
957
+ else if (typeof value === 'object') {
958
+ for (const [k, v] of Object.entries(value)) {
959
+ walkForKeyValues(v, stepId, path ? `${path}.${k}` : k, out, depth + 1);
960
+ }
961
+ }
962
+ }
963
+ /**
964
+ * Extract key_value claims: check if identifiers from tool output appear in the response.
965
+ */
966
+ function extractKeyValueClaims(trace, responseText, sourceStepId) {
967
+ if (!responseText || !trace)
968
+ return [];
969
+ const fields = extractKeyValueFields(trace);
970
+ const claims = [];
971
+ const responseLower = responseText.toLowerCase();
972
+ const source = {
973
+ stepId: sourceStepId ?? 'final_response',
974
+ role: 'final_response',
975
+ rawText: responseText,
976
+ };
977
+ for (const field of fields) {
978
+ const valueLower = field.value.toLowerCase();
979
+ // Check if the value (or something close) appears in the response
980
+ if (responseLower.includes(valueLower)) {
981
+ // Exact match — claim that the value is correct
982
+ claims.push({
983
+ claimId: (0, crypto_1.randomUUID)(),
984
+ type: 'key_value',
985
+ value: field.value,
986
+ rawText: field.value,
987
+ source,
988
+ fieldName: field.fieldName,
989
+ sourceFieldValue: field.value,
990
+ });
991
+ }
992
+ else {
993
+ // Check for "near-miss" — response contains something similar to the value
994
+ // This catches cases where LLM alters an ID or email slightly
995
+ const nearMiss = findNearMissInResponse(responseLower, valueLower, field.key);
996
+ if (nearMiss) {
997
+ claims.push({
998
+ claimId: (0, crypto_1.randomUUID)(),
999
+ type: 'key_value',
1000
+ value: nearMiss, // what response actually says
1001
+ rawText: nearMiss,
1002
+ source,
1003
+ fieldName: field.fieldName,
1004
+ sourceFieldValue: field.value,
1005
+ });
1006
+ }
1007
+ }
1008
+ }
1009
+ return deduplicateClaims(claims);
1010
+ }
1011
+ /**
1012
+ * Try to find a near-miss value in the response for a key-value field.
1013
+ * E.g., if source has "ana@company.com" but response says "ana@company.rs".
1014
+ */
1015
+ function findNearMissInResponse(responseLower, valueLower, fieldKey) {
1016
+ // For emails: look for @domain patterns
1017
+ if (/email|e_mail/i.test(fieldKey)) {
1018
+ const emailPattern = /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g;
1019
+ const matches = responseLower.match(emailPattern);
1020
+ if (matches) {
1021
+ for (const m of matches) {
1022
+ if (m !== valueLower && m.length > 3)
1023
+ return m;
1024
+ }
1025
+ }
1026
+ }
1027
+ // For phone numbers: look for phone-like patterns
1028
+ if (/phone|tel|mobile|fax/i.test(fieldKey)) {
1029
+ const phonePattern = /[+]?\d[\d\s\-()]{6,}/g;
1030
+ const matches = responseLower.match(phonePattern);
1031
+ if (matches) {
1032
+ const sourceDigits = valueLower.replace(/\D/g, '');
1033
+ for (const m of matches) {
1034
+ const respDigits = m.replace(/\D/g, '');
1035
+ if (respDigits.length >= 6 && respDigits !== sourceDigits)
1036
+ return m.trim();
1037
+ }
1038
+ }
1039
+ }
1040
+ // For IDs/codes: look for alphanumeric patterns of similar length
1041
+ if (/id$|^id_|code|iban|account|number$/i.test(fieldKey)) {
1042
+ // Search for a token in the response that's similar in structure
1043
+ const idPattern = /[A-Za-z0-9\-_.]{3,}/g;
1044
+ const matches = responseLower.match(idPattern);
1045
+ if (matches) {
1046
+ for (const m of matches) {
1047
+ // Same prefix but different — likely a mutated ID
1048
+ if (m !== valueLower && m.length >= valueLower.length - 2 && m.length <= valueLower.length + 2) {
1049
+ const commonPrefix = commonPrefixLength(m, valueLower);
1050
+ if (commonPrefix >= Math.min(3, valueLower.length * 0.5))
1051
+ return m;
1052
+ }
1053
+ }
1054
+ }
1055
+ }
1056
+ return null;
1057
+ }
1058
+ function commonPrefixLength(a, b) {
1059
+ let i = 0;
1060
+ while (i < a.length && i < b.length && a[i] === b[i])
1061
+ i++;
1062
+ return i;
1063
+ }
1064
+ /**
1065
+ * Match a key_value claim against the tool output value.
1066
+ * Exact string comparison for identifiers.
1067
+ */
1068
+ function matchKeyValue(claimedValue, sourceValue) {
1069
+ if (typeof sourceValue !== 'string') {
1070
+ return { matched: false, explanation: `Source value is not a string (type: ${typeof sourceValue}).` };
1071
+ }
1072
+ // Exact match (case-insensitive for most IDs)
1073
+ if (claimedValue.toLowerCase() === sourceValue.toLowerCase()) {
1074
+ return { matched: true, explanation: `Key-value "${claimedValue}" exactly matches source "${sourceValue}".` };
1075
+ }
1076
+ // For phone numbers, normalize and compare digits only
1077
+ const claimedDigits = claimedValue.replace(/\D/g, '');
1078
+ const sourceDigits = sourceValue.replace(/\D/g, '');
1079
+ if (claimedDigits.length >= 6 && sourceDigits.length >= 6 && claimedDigits === sourceDigits) {
1080
+ return { matched: true, explanation: `Phone digits match: "${claimedValue}" ≡ "${sourceValue}" (same digits).` };
1081
+ }
1082
+ return {
1083
+ matched: false,
1084
+ explanation: `Key-value mismatch: response says "${claimedValue}" but source is "${sourceValue}".`,
1085
+ };
1086
+ }
1087
+ // =========================================================================
1088
+ // L2 — AGGREGATION Verification
1089
+ // =========================================================================
1090
+ /** Field names likely to be numeric and aggregatable. */
1091
+ const NUMERIC_AGGREGATABLE_PATTERNS = [
1092
+ /hours?/i, /amount/i, /total/i, /price/i, /cost/i, /salary/i,
1093
+ /balance/i, /quantity/i, /count/i, /score/i, /rating/i,
1094
+ /duration/i, /distance/i, /weight/i, /age/i, /payment/i,
1095
+ /budget/i, /revenue/i, /profit/i, /loss/i, /expense/i,
1096
+ /minutes?/i, /tardiness/i, /lateness/i, /overtime/i, /absence/i,
1097
+ ];
1098
+ /** Words in response that suggest an aggregation was performed. */
1099
+ const AGGREGATION_KEYWORDS = {
1100
+ sum: [
1101
+ /\btotal\b/i, /\bukupno\b/i, /\bsum\b/i, /\bzbir\b/i, /\bsuma\b/i,
1102
+ /\bsve zajedno\b/i, /\ball together\b/i,
1103
+ ],
1104
+ avg: [
1105
+ /\baverage\b/i, /\bprosek\b/i, /\bprosečn/i, /\bprosecn/i, /\bmean\b/i,
1106
+ /\bavg\b/i, /\bpo (osobi|zaposlenom|članu)/i,
1107
+ ],
1108
+ count: [
1109
+ /\bcount\b/i, /\bbroj\b/i, /\bima\s+\d+/i, /\bthere\s+(are|is)\s+\d+/i,
1110
+ /\bukupno\s+\d+/i,
1111
+ ],
1112
+ count_distinct: [
1113
+ /\brazličit/i, /\brazlicit/i, /\bdistinct\b/i, /\bunique\b/i,
1114
+ /\bjedinstven/i, /\bposebnih\b/i,
1115
+ ],
1116
+ min: [
1117
+ /\bminimum\b/i, /\bmin\b/i, /\bnajmanj/i, /\blowest\b/i, /\bnajniž/i, /\bnajniz/i,
1118
+ ],
1119
+ max: [
1120
+ /\bmaximum\b/i, /\bmax\b/i, /\bnajveć/i, /\bnajvec/i, /\bhighest\b/i, /\bnajviš/i, /\bnajvis/i,
1121
+ ],
1122
+ pct_of_total: [
1123
+ /\b(\d+(?:[.,]\d+)?)\s*%\s*(?:of|od|от|de|des|di|из)\b/i,
1124
+ /\budeo\b/i, /\bučešće\b/i, /\bucešce\b/i, /\bučesce\b/i,
1125
+ /\bshare\b/i, /\bproportion\b/i, /\bpercentage\b/i,
1126
+ /\bodnos\b/i, /\brazmera\b/i,
1127
+ /\bдоля\b/i, /\bпроцент\b/i,
1128
+ /\bporcentaje\b/i, /\bporcentagem\b/i,
1129
+ ],
1130
+ };
1131
+ /**
1132
+ * Walk tool outputs and find arrays of objects with numeric fields.
1133
+ */
1134
+ function extractNumericArrays(trace) {
1135
+ const arrays = [];
1136
+ const steps = Trace_1.TraceUtils.getToolOutputSteps(trace);
1137
+ for (const step of steps) {
1138
+ for (const to of step.toolOutputs ?? []) {
1139
+ findNumericArrays(to.output, step.stepId, arrays);
1140
+ }
1141
+ }
1142
+ return arrays;
1143
+ }
1144
+ function findNumericArrays(value, stepId, out, depth = 0) {
1145
+ if (depth > MAX_WALK_DEPTH)
1146
+ return;
1147
+ if (!Array.isArray(value) || value.length === 0) {
1148
+ // Recurse into objects to find nested arrays
1149
+ if (typeof value === 'object' && value !== null && !Array.isArray(value)) {
1150
+ for (const v of Object.values(value)) {
1151
+ findNumericArrays(v, stepId, out, depth + 1);
1152
+ }
1153
+ }
1154
+ return;
1155
+ }
1156
+ // Check if it's an array of objects
1157
+ if (typeof value[0] !== 'object' || value[0] === null)
1158
+ return;
1159
+ // Find numeric fields across all items
1160
+ const fieldValues = new Map();
1161
+ for (const item of value) {
1162
+ if (typeof item !== 'object' || item === null)
1163
+ continue;
1164
+ for (const [k, v] of Object.entries(item)) {
1165
+ // Accept numbers directly, and numeric strings (e.g. "58" from DB queries)
1166
+ let numVal;
1167
+ if (typeof v === 'number' && !Number.isNaN(v)) {
1168
+ numVal = v;
1169
+ }
1170
+ else if (typeof v === 'string' && /^-?\d+(\.\d+)?$/.test(v.trim())) {
1171
+ numVal = parseFloat(v.trim());
1172
+ }
1173
+ if (numVal !== undefined) {
1174
+ const isAggregatable = NUMERIC_AGGREGATABLE_PATTERNS.some((p) => p.test(k));
1175
+ if (isAggregatable) {
1176
+ if (!fieldValues.has(k))
1177
+ fieldValues.set(k, []);
1178
+ fieldValues.get(k).push(numVal);
1179
+ }
1180
+ }
1181
+ }
1182
+ }
1183
+ for (const [fieldName, values] of fieldValues) {
1184
+ if (values.length >= 2) {
1185
+ out.push({ fieldName, values, stepId });
1186
+ }
1187
+ }
1188
+ // Recurse into items for nested arrays
1189
+ for (const item of value) {
1190
+ if (typeof item === 'object' && item !== null) {
1191
+ for (const v of Object.values(item)) {
1192
+ findNumericArrays(v, stepId, out, depth + 1);
1193
+ }
1194
+ }
1195
+ }
1196
+ }
1197
+ function computeAggregation(values, op) {
1198
+ switch (op) {
1199
+ case 'sum': return values.reduce((a, b) => a + b, 0);
1200
+ case 'avg': return values.reduce((a, b) => a + b, 0) / values.length;
1201
+ case 'count': return values.length;
1202
+ case 'count_distinct': return new Set(values).size;
1203
+ case 'min': return Math.min(...values);
1204
+ case 'max': return Math.max(...values);
1205
+ case 'pct_of_total': {
1206
+ // Returns the percentage of the first value relative to the total.
1207
+ // Used when response asks "what % is X of total".
1208
+ const total = values.reduce((a, b) => a + b, 0);
1209
+ return total === 0 ? 0 : (values[0] / total) * 100;
1210
+ }
1211
+ }
1212
+ }
1213
+ /**
1214
+ * Extract numbers from response text that might be aggregation results.
1215
+ * Filters out list ordinals (e.g. "14." at the start of a line) to avoid
1216
+ * treating numbered-list markers as aggregation claims.
1217
+ */
1218
+ function extractNumbersFromResponse(text) {
1219
+ // Match European dot-thousands (1.234.567), comma/dot decimals, or plain integers
1220
+ const pattern = /(?<!\w)(\d{1,3}(?:\.\d{3})+|\d+(?:[.,]\d+)?)(?!\w)/g;
1221
+ // European-style thousand separator: X.XXX (dot + exactly 3 digits, repeatable)
1222
+ const dotThousands = /^\d{1,3}(?:\.\d{3})+$/;
1223
+ const numbers = [];
1224
+ let match;
1225
+ while ((match = pattern.exec(text)) !== null) {
1226
+ const raw = match[1];
1227
+ // Skip list ordinals: "1." / "14." at the start of a line
1228
+ if (isListOrdinal(text, match.index, match.index + raw.length))
1229
+ continue;
1230
+ const num = dotThousands.test(raw)
1231
+ ? parseFloat(raw.replace(/\./g, '')) // "4.496" → 4496
1232
+ : parseFloat(raw.replace(',', '.')); // "4,5" → 4.5
1233
+ if (!Number.isNaN(num))
1234
+ numbers.push(num);
1235
+ }
1236
+ return numbers;
1237
+ }
1238
+ /**
1239
+ * Returns true if the number at [start, end) is a list ordinal marker,
1240
+ * e.g. "14." at the beginning of a line or "14." in an inline numbered list.
1241
+ */
1242
+ function isListOrdinal(text, start, end) {
1243
+ // Must be followed by '.' then whitespace or end-of-string
1244
+ const after = text.substring(end, end + 2);
1245
+ if (!/^\.\s/.test(after) && !/^\.$/.test(after))
1246
+ return false;
1247
+ // Case 1: at the start of a line (or start of text)
1248
+ if (start === 0)
1249
+ return true;
1250
+ const before = text.substring(Math.max(0, start - 5), start);
1251
+ if (/(?:^|\n)\s*(?:[*\->]\s*)?\**$/.test(before))
1252
+ return true;
1253
+ // Case 2: inline list — preceded by comma/semicolon + space, or " - "
1254
+ // Matches patterns like: "1. Ivan, 2. Milan, 14. Goran"
1255
+ if (/[,;]\s*$/.test(before)) {
1256
+ // Verify this is part of a numbered sequence: look for another "N." nearby
1257
+ const contextStart = Math.max(0, start - 80);
1258
+ const context = text.substring(contextStart, start);
1259
+ if (/\b\d{1,3}\.\s/.test(context))
1260
+ return true;
1261
+ }
1262
+ return false;
1263
+ }
1264
+ /**
1265
+ * Detect which aggregation operation the response describes and verify it.
1266
+ */
1267
+ function extractAggregationClaims(trace, responseText, sourceStepId) {
1268
+ if (!responseText || !trace)
1269
+ return [];
1270
+ const numArrays = extractNumericArrays(trace);
1271
+ if (numArrays.length === 0)
1272
+ return [];
1273
+ const claims = [];
1274
+ const responseLower = responseText.toLowerCase();
1275
+ const responseNumbers = extractNumbersFromResponse(responseText);
1276
+ const source = {
1277
+ stepId: sourceStepId ?? 'final_response',
1278
+ role: 'final_response',
1279
+ rawText: responseText,
1280
+ };
1281
+ // Build a global set of all individual values across ALL arrays.
1282
+ // Used to exclude data points that appear as raw values from being
1283
+ // misidentified as wrong aggregation attempts.
1284
+ const allIndividualValues = new Set();
1285
+ for (const arr of numArrays) {
1286
+ for (const v of arr.values)
1287
+ allIndividualValues.add(v);
1288
+ }
1289
+ // Also include scalar aggregate fields (total, count, etc.) from tool outputs.
1290
+ // Without this, a direct quote like "total: 35" could be flagged as a wrong
1291
+ // aggregation when a different array happens to sum to a nearby value.
1292
+ for (const step of Trace_1.TraceUtils.getToolOutputSteps(trace)) {
1293
+ for (const to of step.toolOutputs ?? []) {
1294
+ const cnt = (0, Matchers_1.extractCountFromOutput)(to.output);
1295
+ if (cnt !== null)
1296
+ allIndividualValues.add(cnt);
1297
+ }
1298
+ }
1299
+ for (const arr of numArrays) {
1300
+ // Detect which aggregation operation the response is performing
1301
+ for (const [op, patterns] of Object.entries(AGGREGATION_KEYWORDS)) {
1302
+ const opMatched = patterns.some((p) => p.test(responseLower));
1303
+ if (!opMatched)
1304
+ continue;
1305
+ const aggOp = op;
1306
+ const correctValue = computeAggregation(arr.values, aggOp);
1307
+ // Check if the response contains a number that could be this aggregation
1308
+ for (const respNum of responseNumbers) {
1309
+ // Is this number close to any aggregation result?
1310
+ const tolerance = aggOp === 'avg' ? 0.1 : 0.01;
1311
+ const diff = Math.abs(respNum - correctValue);
1312
+ const relDiff = correctValue !== 0 ? diff / Math.abs(correctValue) : diff;
1313
+ if (relDiff <= tolerance || diff < 0.5) {
1314
+ // Response has the correct (or close) aggregation
1315
+ claims.push({
1316
+ claimId: (0, crypto_1.randomUUID)(),
1317
+ type: 'aggregation',
1318
+ value: respNum,
1319
+ rawText: String(respNum),
1320
+ source,
1321
+ fieldName: arr.fieldName,
1322
+ sourceFieldValue: arr.values,
1323
+ aggregationOp: aggOp,
1324
+ computedValue: correctValue,
1325
+ });
1326
+ }
1327
+ else if (isPlausibleAggregation(respNum, arr.values, aggOp)) {
1328
+ // Skip numbers that exist as direct field values in ANY array —
1329
+ // these are individual data points, not wrong aggregation attempts.
1330
+ // E.g., break_count=70 for one employee ≠ avg of all break_counts.
1331
+ // Check all arrays (not just current) to prevent cross-array FPs.
1332
+ if (allIndividualValues.has(respNum))
1333
+ continue;
1334
+ // Response has a wrong aggregation — still create claim so it gets flagged
1335
+ claims.push({
1336
+ claimId: (0, crypto_1.randomUUID)(),
1337
+ type: 'aggregation',
1338
+ value: respNum,
1339
+ rawText: String(respNum),
1340
+ source,
1341
+ fieldName: arr.fieldName,
1342
+ sourceFieldValue: arr.values,
1343
+ aggregationOp: aggOp,
1344
+ computedValue: correctValue,
1345
+ });
1346
+ }
1347
+ }
1348
+ }
1349
+ }
1350
+ // -----------------------------------------------------------------------
1351
+ // Implicit aggregation: no keyword present, but a response number matches
1352
+ // a computed aggregation (sum or avg) and does NOT match any individual
1353
+ // data point. This catches cases like "160" appearing in the response
1354
+ // when the tool data sums to 160 but no "total"/"ukupno" keyword is used.
1355
+ // -----------------------------------------------------------------------
1356
+ if (claims.length === 0) {
1357
+ // allIndividualValues already built above
1358
+ for (const arr of numArrays) {
1359
+ if (arr.values.length < 2)
1360
+ continue;
1361
+ // Only try sum and avg for implicit detection (count/min/max too likely to collide)
1362
+ const implicitOps = ['sum', 'avg'];
1363
+ for (const op of implicitOps) {
1364
+ const correctValue = computeAggregation(arr.values, op);
1365
+ if (correctValue === 0)
1366
+ continue;
1367
+ for (const respNum of responseNumbers) {
1368
+ // Skip if this number appears as an individual data point
1369
+ if (allIndividualValues.has(respNum))
1370
+ continue;
1371
+ const tolerance = op === 'avg' ? 0.1 : 0.01;
1372
+ const diff = Math.abs(respNum - correctValue);
1373
+ const relDiff = diff / Math.abs(correctValue);
1374
+ if (relDiff <= tolerance || diff < 0.5) {
1375
+ claims.push({
1376
+ claimId: (0, crypto_1.randomUUID)(),
1377
+ type: 'aggregation',
1378
+ value: respNum,
1379
+ rawText: String(respNum),
1380
+ source,
1381
+ fieldName: arr.fieldName,
1382
+ sourceFieldValue: arr.values,
1383
+ aggregationOp: op,
1384
+ computedValue: correctValue,
1385
+ });
1386
+ }
1387
+ else if (isPlausibleAggregation(respNum, arr.values, op)) {
1388
+ // Wrong implicit aggregation
1389
+ if (allIndividualValues.has(respNum))
1390
+ continue;
1391
+ claims.push({
1392
+ claimId: (0, crypto_1.randomUUID)(),
1393
+ type: 'aggregation',
1394
+ value: respNum,
1395
+ rawText: String(respNum),
1396
+ source,
1397
+ fieldName: arr.fieldName,
1398
+ sourceFieldValue: arr.values,
1399
+ aggregationOp: op,
1400
+ computedValue: correctValue,
1401
+ });
1402
+ }
1403
+ }
1404
+ }
1405
+ }
1406
+ }
1407
+ // Value-level dedup: when the same response number creates claims against
1408
+ // multiple arrays, keep only the claim with the smallest deviation from
1409
+ // its computed value. This prevents cross-array false positives (e.g.,
1410
+ // "39" correctly matching count array sum but also wrongly matching
1411
+ // as a plausible aggregation of a tardiness array).
1412
+ const bestByValue = new Map();
1413
+ for (const c of claims) {
1414
+ const val = c.value;
1415
+ const existing = bestByValue.get(val);
1416
+ if (!existing) {
1417
+ bestByValue.set(val, c);
1418
+ continue;
1419
+ }
1420
+ const existingDiff = existing.computedValue !== undefined
1421
+ ? Math.abs(val - existing.computedValue) / (Math.abs(existing.computedValue) || 1)
1422
+ : Infinity;
1423
+ const newDiff = c.computedValue !== undefined
1424
+ ? Math.abs(val - c.computedValue) / (Math.abs(c.computedValue) || 1)
1425
+ : Infinity;
1426
+ if (newDiff < existingDiff) {
1427
+ bestByValue.set(val, c);
1428
+ }
1429
+ }
1430
+ const dedupedClaims = [...bestByValue.values()];
1431
+ return deduplicateClaims(dedupedClaims);
1432
+ }
1433
+ /**
1434
+ * Check if a number is a plausible (but wrong) aggregation of the values.
1435
+ * E.g., if sum is 113 but response says 120, it's likely a math error.
1436
+ */
1437
+ function isPlausibleAggregation(respNum, values, op) {
1438
+ const correct = computeAggregation(values, op);
1439
+ if (correct === 0)
1440
+ return false;
1441
+ const relDiff = Math.abs(respNum - correct) / Math.abs(correct);
1442
+ // Within 30% of the correct value — likely a math error, not random number
1443
+ if (relDiff <= 0.3 && relDiff > 0.01)
1444
+ return true;
1445
+ // For sums: response number is between min-sum and max-plausible-sum
1446
+ if (op === 'sum') {
1447
+ const maxPlausible = correct * 1.5;
1448
+ const minPlausible = correct * 0.5;
1449
+ if (respNum >= minPlausible && respNum <= maxPlausible && respNum !== correct)
1450
+ return true;
1451
+ }
1452
+ return false;
1453
+ }
1454
+ /**
1455
+ * Match an aggregation claim against the computed correct value.
1456
+ */
1457
+ function matchAggregation(claimedValue, computedValue, op) {
1458
+ const tolerance = op === 'avg' ? 0.1 : 0.01;
1459
+ const diff = Math.abs(claimedValue - computedValue);
1460
+ const relDiff = computedValue !== 0 ? diff / Math.abs(computedValue) : diff;
1461
+ if (relDiff <= tolerance || diff < 0.5) {
1462
+ return {
1463
+ matched: true,
1464
+ explanation: `Aggregation (${op}) claim ${claimedValue} matches computed value ${computedValue}.`,
1465
+ };
1466
+ }
1467
+ return {
1468
+ matched: false,
1469
+ explanation: `Aggregation error: response says ${claimedValue} but ${op}(${computedValue}) is the correct value. Off by ${diff.toFixed(2)} (${(relDiff * 100).toFixed(1)}%).`,
1470
+ };
1471
+ }
1472
+ // =========================================================================
1473
+ // L2 — RANGE Matching (min/max, salary bands, thresholds)
1474
+ // =========================================================================
1475
+ /** Patterns for paired min/max fields. */
1476
+ const RANGE_MIN_PATTERNS = [/^min/i, /min$/i, /_min_/i, /minimum/i, /lower/i, /from/i, /start/i];
1477
+ const RANGE_MAX_PATTERNS = [/^max/i, /max$/i, /_max_/i, /maximum/i, /upper/i, /to$/i, /end$/i, /limit/i];
1478
+ /**
1479
+ * Walk tool outputs and find paired min/max numeric fields.
1480
+ * Detects: salary_min/salary_max, min_hours/max_hours, price_from/price_to, etc.
1481
+ */
1482
+ function extractRangeFields(trace) {
1483
+ const steps = Trace_1.TraceUtils.getToolOutputSteps(trace);
1484
+ const numericFields = new Map();
1485
+ for (const step of steps) {
1486
+ for (const to of step.toolOutputs ?? []) {
1487
+ collectNumericFields(to.output, step.stepId, '', numericFields);
1488
+ }
1489
+ }
1490
+ // Pair min/max fields by root name
1491
+ const ranges = [];
1492
+ const processed = new Set();
1493
+ for (const [path, entries] of numericFields) {
1494
+ if (processed.has(path))
1495
+ continue;
1496
+ const lastKey = (path.split('.').pop() ?? path).toLowerCase();
1497
+ const isMin = RANGE_MIN_PATTERNS.some((p) => p.test(lastKey));
1498
+ const isMax = RANGE_MAX_PATTERNS.some((p) => p.test(lastKey));
1499
+ if (!isMin && !isMax)
1500
+ continue;
1501
+ // Find the counterpart
1502
+ const rootName = lastKey
1503
+ .replace(/^min_?|_?min$|minimum|lower|^from_?|_?from$|^start_?|_?start$/gi, '')
1504
+ .replace(/^max_?|_?max$|maximum|upper|^to_?|_?to$|^end_?|_?end$|^limit_?|_?limit$/gi, '')
1505
+ .replace(/^_|_$/g, '') || lastKey;
1506
+ for (const [otherPath, otherEntries] of numericFields) {
1507
+ if (otherPath === path || processed.has(otherPath))
1508
+ continue;
1509
+ const otherLastKey = (otherPath.split('.').pop() ?? otherPath).toLowerCase();
1510
+ const otherIsMin = RANGE_MIN_PATTERNS.some((p) => p.test(otherLastKey));
1511
+ const otherIsMax = RANGE_MAX_PATTERNS.some((p) => p.test(otherLastKey));
1512
+ if ((isMin && !otherIsMax) || (isMax && !otherIsMin))
1513
+ continue;
1514
+ // Check if they share a root name
1515
+ const otherRootName = otherLastKey
1516
+ .replace(/^min_?|_?min$|minimum|lower|^from_?|_?from$|^start_?|_?start$/gi, '')
1517
+ .replace(/^max_?|_?max$|maximum|upper|^to_?|_?to$|^end_?|_?end$|^limit_?|_?limit$/gi, '')
1518
+ .replace(/^_|_$/g, '') || otherLastKey;
1519
+ if (rootName === otherRootName || (rootName === '' && otherRootName === '')) {
1520
+ const minVal = isMin ? entries[0].value : otherEntries[0].value;
1521
+ const maxVal = isMax ? entries[0].value : otherEntries[0].value;
1522
+ if (minVal <= maxVal) {
1523
+ ranges.push({
1524
+ rootPath: rootName || path.replace(/\.?[^.]+$/, '') || 'range',
1525
+ min: minVal,
1526
+ max: maxVal,
1527
+ stepId: entries[0].stepId,
1528
+ });
1529
+ processed.add(path);
1530
+ processed.add(otherPath);
1531
+ }
1532
+ }
1533
+ }
1534
+ }
1535
+ return ranges;
1536
+ }
1537
+ function collectNumericFields(value, stepId, path, out, depth = 0) {
1538
+ if (depth > MAX_WALK_DEPTH)
1539
+ return;
1540
+ if (value === null || value === undefined)
1541
+ return;
1542
+ if (typeof value === 'number' && path && !Number.isNaN(value)) {
1543
+ const entries = out.get(path) ?? [];
1544
+ entries.push({ fieldName: path, value, stepId });
1545
+ out.set(path, entries);
1546
+ }
1547
+ else if (Array.isArray(value)) {
1548
+ value.forEach((item, i) => collectNumericFields(item, stepId, path ? `${path}[${i}]` : `[${i}]`, out, depth + 1));
1549
+ }
1550
+ else if (typeof value === 'object') {
1551
+ for (const [k, v] of Object.entries(value)) {
1552
+ collectNumericFields(v, stepId, path ? `${path}.${k}` : k, out, depth + 1);
1553
+ }
1554
+ }
1555
+ }
1556
+ /** Words in response that suggest a range is being described. */
1557
+ const RANGE_KEYWORDS = [
1558
+ /\b(between|od|od\s+\d|izme[đd]u)\b/i,
1559
+ /\b(up to|do|maksimalno|najvi[šs]e)\b/i,
1560
+ /\b(at least|najmanje|minimum|minimum)\b/i,
1561
+ /\b(range|raspon|opseg)\b/i,
1562
+ ];
1563
+ /**
1564
+ * Extract range claims: detect when response describes a min/max range from tool output.
1565
+ */
1566
+ function extractRangeClaims(trace, responseText, sourceStepId) {
1567
+ if (!responseText || !trace)
1568
+ return [];
1569
+ const ranges = extractRangeFields(trace);
1570
+ if (ranges.length === 0)
1571
+ return [];
1572
+ const claims = [];
1573
+ const responseLower = responseText.toLowerCase();
1574
+ const responseNumbers = extractNumbersFromResponse(responseText);
1575
+ const source = {
1576
+ stepId: sourceStepId ?? 'final_response',
1577
+ role: 'final_response',
1578
+ rawText: responseText,
1579
+ };
1580
+ // Check if response contains range-like language
1581
+ const hasRangeLanguage = RANGE_KEYWORDS.some((p) => p.test(responseLower));
1582
+ if (!hasRangeLanguage && responseNumbers.length < 2)
1583
+ return [];
1584
+ for (const range of ranges) {
1585
+ // Find numbers in response that could be min or max of this range
1586
+ for (const num of responseNumbers) {
1587
+ // Check if this number is close to the min or max
1588
+ const isNearMin = Math.abs(num - range.min) / (Math.abs(range.min) || 1) <= 0.01 || num === range.min;
1589
+ const isNearMax = Math.abs(num - range.max) / (Math.abs(range.max) || 1) <= 0.01 || num === range.max;
1590
+ if (isNearMin || isNearMax) {
1591
+ claims.push({
1592
+ claimId: (0, crypto_1.randomUUID)(),
1593
+ type: 'range',
1594
+ value: num,
1595
+ rawText: String(num),
1596
+ source,
1597
+ fieldName: range.rootPath,
1598
+ sourceFieldValue: { min: range.min, max: range.max },
1599
+ });
1600
+ }
1601
+ else {
1602
+ // Number in response doesn't match either endpoint — check if it's a plausible mistake
1603
+ const isInRange = num >= range.min && num <= range.max;
1604
+ const isNear = num >= range.min * 0.7 && num <= range.max * 1.3;
1605
+ if (isNear && !isInRange) {
1606
+ // Number is close to range but outside bounds — flaggable mismatch
1607
+ claims.push({
1608
+ claimId: (0, crypto_1.randomUUID)(),
1609
+ type: 'range',
1610
+ value: num,
1611
+ rawText: String(num),
1612
+ source,
1613
+ fieldName: range.rootPath,
1614
+ sourceFieldValue: { min: range.min, max: range.max },
1615
+ });
1616
+ }
1617
+ }
1618
+ }
1619
+ }
1620
+ return deduplicateClaims(claims);
1621
+ }
1622
+ /**
1623
+ * Match a range claim: verify claimed number against source min/max.
1624
+ */
1625
+ function matchRange(claimedValue, sourceValue) {
1626
+ if (typeof sourceValue !== 'object' || sourceValue === null) {
1627
+ return { matched: false, explanation: 'Source value is not a range object.' };
1628
+ }
1629
+ const range = sourceValue;
1630
+ if (typeof range.min !== 'number' || typeof range.max !== 'number') {
1631
+ return { matched: false, explanation: 'Source range missing min or max.' };
1632
+ }
1633
+ // Exact match to either endpoint
1634
+ if (claimedValue === range.min || claimedValue === range.max) {
1635
+ return {
1636
+ matched: true,
1637
+ explanation: `Range claim ${claimedValue} matches endpoint [${range.min}, ${range.max}].`,
1638
+ };
1639
+ }
1640
+ // Close to an endpoint (within 1%)
1641
+ const minDiff = Math.abs(claimedValue - range.min) / (Math.abs(range.min) || 1);
1642
+ const maxDiff = Math.abs(claimedValue - range.max) / (Math.abs(range.max) || 1);
1643
+ if (minDiff <= 0.01 || maxDiff <= 0.01) {
1644
+ return {
1645
+ matched: true,
1646
+ explanation: `Range claim ${claimedValue} approximately matches endpoint [${range.min}, ${range.max}].`,
1647
+ };
1648
+ }
1649
+ // Within the range (not exactly an endpoint, but valid)
1650
+ if (claimedValue >= range.min && claimedValue <= range.max) {
1651
+ return {
1652
+ matched: true,
1653
+ explanation: `Range claim ${claimedValue} is within bounds [${range.min}, ${range.max}].`,
1654
+ };
1655
+ }
1656
+ return {
1657
+ matched: false,
1658
+ explanation: `Range mismatch: response says ${claimedValue} but source range is [${range.min}, ${range.max}].`,
1659
+ };
1660
+ }
1661
+ // ---------------------------------------------------------------------------
1662
+ // Helpers
1663
+ // ---------------------------------------------------------------------------
1664
+ /** Find a synonym from the list that appears as a word boundary match in text. */
1665
+ function findSynonymInText(textLower, synonyms) {
1666
+ for (const syn of synonyms) {
1667
+ // Multi-word synonyms: "on leave", "na čekanju"
1668
+ if (syn.includes(' ')) {
1669
+ if (textLower.includes(syn.toLowerCase()))
1670
+ return syn;
1671
+ }
1672
+ else if (AMBIGUOUS_SHORT_WORDS.has(syn.toLowerCase())) {
1673
+ // Ambiguous short words like "da", "ne" — match when:
1674
+ // 1. Entire response is just the word, OR
1675
+ // 2. Response starts with the word + punctuation ("da, tačno"), OR
1676
+ // 3. Response starts with the word + space and is short (<80 chars),
1677
+ // indicating an affirmative/negative answer, not a conjunction.
1678
+ const trimmed = textLower.trim();
1679
+ const synLow = syn.toLowerCase();
1680
+ if (trimmed === synLow ||
1681
+ trimmed.startsWith(synLow + ',') ||
1682
+ trimmed.startsWith(synLow + '.') ||
1683
+ trimmed.startsWith(synLow + '!') ||
1684
+ trimmed.startsWith(synLow + ' -') ||
1685
+ trimmed.startsWith(synLow + ' –') ||
1686
+ (trimmed.startsWith(synLow + ' ') && trimmed.length < 80)) {
1687
+ return syn;
1688
+ }
1689
+ }
1690
+ else {
1691
+ // Single word — use word boundary for ASCII, includes() for non-ASCII
1692
+ // (JS \b only works for [a-zA-Z0-9_], not Cyrillic/CJK/Arabic/etc.)
1693
+ const synLow = syn.toLowerCase();
1694
+ if (/^[\x20-\x7e]+$/.test(synLow)) {
1695
+ const re = new RegExp(`\\b${escapeRegex(syn)}\\b`, 'i');
1696
+ if (re.test(textLower))
1697
+ return syn;
1698
+ }
1699
+ else {
1700
+ if (textLower.includes(synLow))
1701
+ return syn;
1702
+ }
1703
+ }
1704
+ }
1705
+ return null;
1706
+ }
1707
+ /** Find if response contains a DIFFERENT enum value from the same domain. */
1708
+ function findContradiction(responseLower, sourceCanonical) {
1709
+ // Only check semantically opposed values, not ALL enum translations
1710
+ const opposites = ENUM_OPPOSITES[sourceCanonical];
1711
+ if (!opposites)
1712
+ return null;
1713
+ for (const oppCanonical of opposites) {
1714
+ const synonyms = ENUM_TRANSLATIONS[oppCanonical];
1715
+ if (!synonyms)
1716
+ continue;
1717
+ for (const syn of synonyms) {
1718
+ // Skip ambiguous short words in contradiction detection
1719
+ if (AMBIGUOUS_SHORT_WORDS.has(syn.toLowerCase()))
1720
+ continue;
1721
+ if (syn.includes(' ')) {
1722
+ if (responseLower.includes(syn.toLowerCase())) {
1723
+ return { foundCanonical: oppCanonical, foundSynonym: syn };
1724
+ }
1725
+ }
1726
+ else {
1727
+ const synLow = syn.toLowerCase();
1728
+ if (/^[\x20-\x7e]+$/.test(synLow)) {
1729
+ const re = new RegExp(`\\b${escapeRegex(syn)}\\b`, 'i');
1730
+ if (re.test(responseLower)) {
1731
+ return { foundCanonical: oppCanonical, foundSynonym: syn };
1732
+ }
1733
+ }
1734
+ else {
1735
+ if (responseLower.includes(synLow)) {
1736
+ return { foundCanonical: oppCanonical, foundSynonym: syn };
1737
+ }
1738
+ }
1739
+ }
1740
+ }
1741
+ }
1742
+ return null;
1743
+ }
1744
+ function escapeRegex(str) {
1745
+ return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
1746
+ }
1747
+ /** Remove duplicate claims (same fieldName + same value). */
1748
+ /**
1749
+ * Derive a metric name from a JSON field path.
1750
+ * "employees[0].late_count" → "late_count", "overtime_hours" → "overtime_hours"
1751
+ */
1752
+ function deriveMetric(fieldName) {
1753
+ if (!fieldName)
1754
+ return undefined;
1755
+ const last = fieldName.split('.').pop() ?? fieldName;
1756
+ const cleaned = last.replace(/\[\d+\]$/g, '');
1757
+ return cleaned || undefined;
1758
+ }
1759
+ function deduplicateClaims(claims) {
1760
+ const seen = new Set();
1761
+ return claims.filter((c) => {
1762
+ const key = `${c.fieldName}::${c.value}`;
1763
+ if (seen.has(key))
1764
+ return false;
1765
+ seen.add(key);
1766
+ // Populate metric from fieldName if not already set
1767
+ if (!c.metric && c.fieldName) {
1768
+ c.metric = deriveMetric(c.fieldName);
1769
+ }
1770
+ return true;
1771
+ });
1772
+ }
1773
+ //# sourceMappingURL=index.js.map