truthguard-ai 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of truthguard-ai might be problematic. Click here for more details.
- package/dist-npm/Claims/index.d.ts +73 -0
- package/dist-npm/Claims/index.d.ts.map +1 -0
- package/dist-npm/Claims/index.js +1669 -0
- package/dist-npm/Claims/index.js.map +1 -0
- package/dist-npm/Config/index.d.ts +41 -0
- package/dist-npm/Config/index.d.ts.map +1 -0
- package/dist-npm/Config/index.js +129 -0
- package/dist-npm/Config/index.js.map +1 -0
- package/dist-npm/Grounding/index.d.ts +40 -0
- package/dist-npm/Grounding/index.d.ts.map +1 -0
- package/dist-npm/Grounding/index.js +1433 -0
- package/dist-npm/Grounding/index.js.map +1 -0
- package/dist-npm/L2/index.d.ts +93 -0
- package/dist-npm/L2/index.d.ts.map +1 -0
- package/dist-npm/L2/index.js +1773 -0
- package/dist-npm/L2/index.js.map +1 -0
- package/dist-npm/Matchers/index.d.ts +101 -0
- package/dist-npm/Matchers/index.d.ts.map +1 -0
- package/dist-npm/Matchers/index.js +690 -0
- package/dist-npm/Matchers/index.js.map +1 -0
- package/dist-npm/Mode/index.d.ts +87 -0
- package/dist-npm/Mode/index.d.ts.map +1 -0
- package/dist-npm/Mode/index.js +117 -0
- package/dist-npm/Mode/index.js.map +1 -0
- package/dist-npm/Policy/index.d.ts +89 -0
- package/dist-npm/Policy/index.d.ts.map +1 -0
- package/dist-npm/Policy/index.js +143 -0
- package/dist-npm/Policy/index.js.map +1 -0
- package/dist-npm/Registry/index.d.ts +93 -0
- package/dist-npm/Registry/index.d.ts.map +1 -0
- package/dist-npm/Registry/index.js +818 -0
- package/dist-npm/Registry/index.js.map +1 -0
- package/dist-npm/Rules/index.d.ts +587 -0
- package/dist-npm/Rules/index.d.ts.map +1 -0
- package/dist-npm/Rules/index.js +6236 -0
- package/dist-npm/Rules/index.js.map +1 -0
- package/dist-npm/Rules/intents.d.ts +22 -0
- package/dist-npm/Rules/intents.d.ts.map +1 -0
- package/dist-npm/Rules/intents.js +242 -0
- package/dist-npm/Rules/intents.js.map +1 -0
- package/dist-npm/TraceReadiness/index.d.ts +42 -0
- package/dist-npm/TraceReadiness/index.d.ts.map +1 -0
- package/dist-npm/TraceReadiness/index.js +169 -0
- package/dist-npm/TraceReadiness/index.js.map +1 -0
- package/dist-npm/i18n/index.d.ts +44 -0
- package/dist-npm/i18n/index.d.ts.map +1 -0
- package/dist-npm/i18n/index.js +124 -0
- package/dist-npm/i18n/index.js.map +1 -0
- package/package.json +5 -17
- package/dist/cli/index.d.ts +0 -15
- package/dist/cli/index.d.ts.map +0 -1
- package/dist/cli/index.js +0 -807
- package/dist/cli/index.js.map +0 -1
|
@@ -0,0 +1,1773 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* L2 — Structured Context Matching
|
|
4
|
+
*
|
|
5
|
+
* Extracts and matches claims that L1 (regex-based) cannot handle:
|
|
6
|
+
* - boolean fields (true/false → natural language equivalents)
|
|
7
|
+
* - enum/status fields (approved → "odobren", active → "aktivan")
|
|
8
|
+
* - list_items — verifies that response mentions all items from tool output arrays
|
|
9
|
+
* - key_value — matches identifiers (email, phone, ID) from tool output against response
|
|
10
|
+
* - aggregation — verifies SUM/AVG/COUNT/MIN/MAX computed from tool output arrays
|
|
11
|
+
*
|
|
12
|
+
* Fully deterministic — no LLM calls. Domain-specific enum translation
|
|
13
|
+
* tables enable cross-language matching (EN → SR, etc.).
|
|
14
|
+
*/
|
|
15
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
16
|
+
exports.extractStructuredClaims = extractStructuredClaims;
|
|
17
|
+
exports.matchBoolean = matchBoolean;
|
|
18
|
+
exports.matchEnum = matchEnum;
|
|
19
|
+
exports.addEnumTranslations = addEnumTranslations;
|
|
20
|
+
exports.getEnumTranslations = getEnumTranslations;
|
|
21
|
+
exports.extractListItemsClaims = extractListItemsClaims;
|
|
22
|
+
exports.matchListItems = matchListItems;
|
|
23
|
+
exports.extractKeyValueClaims = extractKeyValueClaims;
|
|
24
|
+
exports.matchKeyValue = matchKeyValue;
|
|
25
|
+
exports.extractAggregationClaims = extractAggregationClaims;
|
|
26
|
+
exports.matchAggregation = matchAggregation;
|
|
27
|
+
exports.extractRangeClaims = extractRangeClaims;
|
|
28
|
+
exports.matchRange = matchRange;
|
|
29
|
+
const crypto_1 = require("crypto");
|
|
30
|
+
const Trace_1 = require("../Trace");
|
|
31
|
+
const Matchers_1 = require("../Matchers");
|
|
32
|
+
// ---------------------------------------------------------------------------
|
|
33
|
+
// Boolean synonym tables
|
|
34
|
+
// ---------------------------------------------------------------------------
|
|
35
|
+
/** Maps boolean true/false to natural-language equivalents. */
|
|
36
|
+
const BOOLEAN_SYNONYMS = {
|
|
37
|
+
true: [
|
|
38
|
+
// English
|
|
39
|
+
'true', 'yes', 'active', 'enabled', 'on', 'present', 'available',
|
|
40
|
+
'approved', 'confirmed', 'valid', 'open', 'done', 'completed',
|
|
41
|
+
// Serbian (Latin + Cyrillic-transliterated)
|
|
42
|
+
'da', 'aktivan', 'aktivna', 'aktivno', 'aktivni',
|
|
43
|
+
'prisutan', 'prisutna', 'prisutno', 'prisutni',
|
|
44
|
+
'dostupan', 'dostupna', 'dostupno',
|
|
45
|
+
'odobren', 'odobrena', 'odobreno',
|
|
46
|
+
'otvoren', 'otvorena', 'otvoreno',
|
|
47
|
+
'zavrseno', 'završeno', 'završen', 'završena',
|
|
48
|
+
],
|
|
49
|
+
false: [
|
|
50
|
+
// English
|
|
51
|
+
'false', 'no', 'inactive', 'disabled', 'off', 'absent', 'unavailable',
|
|
52
|
+
'rejected', 'denied', 'invalid', 'closed', 'pending', 'not done',
|
|
53
|
+
// Serbian
|
|
54
|
+
'ne', 'neaktivan', 'neaktivna', 'neaktivno',
|
|
55
|
+
'odsutan', 'odsutna', 'odsutno', 'odsutni',
|
|
56
|
+
'nedostupan', 'nedostupna', 'nedostupno',
|
|
57
|
+
'odbijen', 'odbijena', 'odbijeno',
|
|
58
|
+
'zatvoren', 'zatvorena', 'zatvoreno',
|
|
59
|
+
],
|
|
60
|
+
};
|
|
61
|
+
// ---------------------------------------------------------------------------
|
|
62
|
+
// Enum translation table
|
|
63
|
+
// ---------------------------------------------------------------------------
|
|
64
|
+
/**
|
|
65
|
+
* Maps common enum values to their natural-language equivalents.
|
|
66
|
+
* Each entry: canonical_value → [synonyms in multiple languages]
|
|
67
|
+
*
|
|
68
|
+
* Organized by domain. Users can extend via addEnumTranslations().
|
|
69
|
+
*/
|
|
70
|
+
const ENUM_TRANSLATIONS = {
|
|
71
|
+
// Status values
|
|
72
|
+
approved: ['approved', 'odobren', 'odobrena', 'odobreno', 'odobreni',
|
|
73
|
+
'aprobado', 'aprobada', 'approuvé', 'approuvée', 'aprovado', 'aprovada',
|
|
74
|
+
'одобрен', 'одобрена', 'одобрено', 'स्वीकृत', 'موافق', 'অনুমোদিত', '已批准', '承認済み'],
|
|
75
|
+
rejected: ['rejected', 'odbijen', 'odbijena', 'odbijeno', 'odbijeni',
|
|
76
|
+
'rechazado', 'rechazada', 'rejeté', 'rejetée', 'rejeitado', 'rejeitada',
|
|
77
|
+
'отклонён', 'отклонена', 'अस्वीकृत', 'مرفوض', 'প্রত্যাখ্যাত', '已拒绝', '却下'],
|
|
78
|
+
pending: ['pending', 'na cekanju', 'na čekanju', 'čeka', 'ceka', 'u toku',
|
|
79
|
+
'pendiente', 'en attente', 'pendente',
|
|
80
|
+
'ожидание', 'на рассмотрении', 'लंबित', 'معلق', 'মুলতুবি', '待定', '待处理', '保留中'],
|
|
81
|
+
active: ['active', 'aktivan', 'aktivna', 'aktivno', 'aktivni',
|
|
82
|
+
'activo', 'activa', 'actif', 'active', 'ativo', 'ativa',
|
|
83
|
+
'активный', 'активна', 'сक्रिय', 'نشط', 'সক্রিয়', '活跃', 'アクティブ'],
|
|
84
|
+
inactive: ['inactive', 'neaktivan', 'neaktivna', 'neaktivno', 'neaktivni',
|
|
85
|
+
'inactivo', 'inactiva', 'inactif', 'inactive', 'inativo', 'inativa',
|
|
86
|
+
'неактивный', 'неактивна', 'निष्क्रिय', 'غير نشط', 'নিষ্ক্রিয়', '不活跃', '非アクティブ'],
|
|
87
|
+
completed: ['completed', 'završen', 'završena', 'završeno', 'zavrseno', 'gotov', 'gotovo',
|
|
88
|
+
'completado', 'completada', 'terminé', 'terminée', 'concluído', 'concluída',
|
|
89
|
+
'завершён', 'завершена', 'पूर्ण', 'مكتمل', 'সম্পূর্ণ', '已完成', '完了'],
|
|
90
|
+
cancelled: ['cancelled', 'canceled', 'otkazan', 'otkazana', 'otkazano', 'otkazani',
|
|
91
|
+
'cancelado', 'cancelada', 'annulé', 'annulée', 'cancelado', 'cancelada',
|
|
92
|
+
'отменён', 'отменена', 'रद्द', 'ملغى', 'বাতিল', '已取消', 'キャンセル'],
|
|
93
|
+
open: ['open', 'otvoren', 'otvorena', 'otvoreno', 'otvoreni',
|
|
94
|
+
'abierto', 'abierta', 'ouvert', 'ouverte', 'aberto', 'aberta',
|
|
95
|
+
'открыт', 'открыта', 'खुला', 'مفتوح', 'খোলা', '开放', 'オープン'],
|
|
96
|
+
closed: ['closed', 'zatvoren', 'zatvorena', 'zatvoreno', 'zatvoreni',
|
|
97
|
+
'cerrado', 'cerrada', 'fermé', 'fermée', 'fechado', 'fechada',
|
|
98
|
+
'закрыт', 'закрыта', 'बंद', 'مغلق', 'বন্ধ', '已关闭', 'クローズ'],
|
|
99
|
+
// Leave / HR
|
|
100
|
+
on_leave: ['on leave', 'on_leave', 'na odmoru', 'na godišnjem', 'na godisnjem', 'odsutan', 'odsutna',
|
|
101
|
+
'de permiso', 'en congé', 'de licença',
|
|
102
|
+
'в отпуске', 'छुट्टी पर', 'في إجازة', 'ছুটিতে', '休假中', '休暇中'],
|
|
103
|
+
sick_leave: ['sick leave', 'sick_leave', 'bolovanje', 'na bolovanju',
|
|
104
|
+
'baja por enfermedad', 'congé maladie', 'licença médica',
|
|
105
|
+
'на больничном', 'बीमार छुट्टी', 'إجازة مرضية', 'অসুস্থতার ছুটি', '病假', '病気休暇'],
|
|
106
|
+
present: ['present', 'prisutan', 'prisutna', 'prisutno', 'prisutni',
|
|
107
|
+
'presente', 'présent', 'présente',
|
|
108
|
+
'присутствует', 'उपस्थित', 'حاضر', 'উপস্থিত', '在场', '出席'],
|
|
109
|
+
absent: ['absent', 'odsutan', 'odsutna', 'odsutno', 'odsutni',
|
|
110
|
+
'ausente', 'absent', 'absente',
|
|
111
|
+
'отсутствует', 'अनुपस्थित', 'غائب', 'অনুপস্থিত', '缺席', '欠席'],
|
|
112
|
+
late: ['late', 'kasni', 'kasnio', 'kasnila', 'zakašnjenje', 'zakasnjenje',
|
|
113
|
+
'tarde', 'retardado', 'en retard', 'atrasado', 'atrasada',
|
|
114
|
+
'опоздание', 'опоздал', 'देर से', 'متأخر', 'দেরি', '迟到', '遅刻'],
|
|
115
|
+
// Priority
|
|
116
|
+
high: ['high', 'visok', 'visoka', 'visoko', 'visoki', 'hitno', 'urgent',
|
|
117
|
+
'alto', 'alta', 'élevé', 'élevée', 'haut', 'haute',
|
|
118
|
+
'высокий', 'высокая', 'उच्च', 'عالي', 'উচ্চ', '高', '高い'],
|
|
119
|
+
medium: ['medium', 'srednji', 'srednja', 'srednje',
|
|
120
|
+
'medio', 'media', 'moyen', 'moyenne', 'médio', 'média',
|
|
121
|
+
'средний', 'средняя', 'मध्यम', 'متوسط', 'মাঝারি', '中', '中くらい'],
|
|
122
|
+
low: ['low', 'nizak', 'niska', 'nisko', 'niski',
|
|
123
|
+
'bajo', 'baja', 'bas', 'basse', 'baixo', 'baixa',
|
|
124
|
+
'низкий', 'низкая', 'कम', 'منخفض', 'নিম্ন', '低', '低い'],
|
|
125
|
+
critical: ['critical', 'kritičan', 'kritična', 'kritično', 'kritican',
|
|
126
|
+
'crítico', 'crítica', 'critique', 'crítico', 'crítica',
|
|
127
|
+
'критический', 'критическая', 'गंभीर', 'حرج', 'জটিল', '紧急', '重大'],
|
|
128
|
+
// Boolean-like strings
|
|
129
|
+
yes: ['yes', 'da',
|
|
130
|
+
'sí', 'oui', 'sim', 'да', 'हाँ', 'نعم', 'হ্যাঁ', '是', 'はい'],
|
|
131
|
+
no: ['no', 'ne', 'nije',
|
|
132
|
+
'no', 'non', 'não', 'нет', 'नहीं', 'لا', 'না', '否', 'いいえ'],
|
|
133
|
+
// Payment
|
|
134
|
+
paid: ['paid', 'plaćen', 'plaćena', 'plaćeno', 'placen', 'placeno',
|
|
135
|
+
'pagado', 'pagada', 'payé', 'payée', 'pago', 'paga',
|
|
136
|
+
'оплачено', 'भुगतान किया', 'مدفوع', 'পরিশোধিত', '已支付', '支払済み'],
|
|
137
|
+
unpaid: ['unpaid', 'neplaćen', 'neplaćena', 'neplaćeno', 'neplacen',
|
|
138
|
+
'impago', 'impagado', 'non payé', 'não pago',
|
|
139
|
+
'не оплачено', 'अवैतनिक', 'غير مدفوع', 'অপরিশোধিত', '未支付', '未払い'],
|
|
140
|
+
overdue: ['overdue', 'zakasnelo', 'kasni', 'dospelo', 'dospela',
|
|
141
|
+
'vencido', 'vencida', 'en retard', 'atrasado', 'atrasada',
|
|
142
|
+
'просрочено', 'अतिदेय', 'متأخر', 'বকেয়া', '逾期', '期限超過'],
|
|
143
|
+
// General
|
|
144
|
+
enabled: ['enabled', 'uključen', 'uključena', 'ukljucen', 'ukljucena',
|
|
145
|
+
'habilitado', 'habilitada', 'activé', 'activée', 'ativado', 'ativada',
|
|
146
|
+
'включён', 'включена', 'सक्षम', 'مفعل', 'সক্রিয়', '已启用', '有効'],
|
|
147
|
+
disabled: ['disabled', 'isključen', 'isključena', 'iskljucen', 'iskljucena',
|
|
148
|
+
'deshabilitado', 'deshabilitada', 'désactivé', 'désactivée', 'desativado', 'desativada',
|
|
149
|
+
'отключён', 'отключена', 'अक्षम', 'معطل', 'নিষ্ক্রিয়', '已禁用', '無効'],
|
|
150
|
+
available: ['available', 'dostupan', 'dostupna', 'dostupno',
|
|
151
|
+
'disponible', 'disponible', 'disponível',
|
|
152
|
+
'доступен', 'доступна', 'उपलब्ध', 'متاح', 'উপলব্ধ', '可用', '利用可能'],
|
|
153
|
+
unavailable: ['unavailable', 'nedostupan', 'nedostupna', 'nedostupno',
|
|
154
|
+
'no disponible', 'indisponible', 'indisponível',
|
|
155
|
+
'недоступен', 'недоступна', 'अनुपलब्ध', 'غير متاح', 'অনুপলব্ধ', '不可用', '利用不可'],
|
|
156
|
+
success: ['success', 'uspešno', 'uspesno', 'uspeh',
|
|
157
|
+
'éxito', 'succès', 'sucesso',
|
|
158
|
+
'успех', 'успешно', 'सफलता', 'نجاح', 'সাফল্য', '成功', '成功'],
|
|
159
|
+
failed: ['failed', 'neuspešno', 'neuspesno', 'neuspeo', 'neuspela', 'greška', 'greska',
|
|
160
|
+
'fallido', 'fallida', 'échoué', 'falhou',
|
|
161
|
+
'неудача', 'провал', 'विफल', 'فشل', 'ব্যর্থ', '失败', '失敗'],
|
|
162
|
+
error: ['error', 'greška', 'greska',
|
|
163
|
+
'error', 'erreur', 'erro',
|
|
164
|
+
'ошибка', 'त्रुटि', 'خطأ', 'ত্রুটি', '错误', 'エラー'],
|
|
165
|
+
};
|
|
166
|
+
/**
|
|
167
|
+
* Semantic opposites: maps each canonical enum value to a set of values
|
|
168
|
+
* that can meaningfully contradict it. Without this, findContradiction
|
|
169
|
+
* scans ALL enum values, causing false positives (e.g. "da" matching
|
|
170
|
+
* for "active" status fields in Serbian text).
|
|
171
|
+
*/
|
|
172
|
+
const ENUM_OPPOSITES = {
|
|
173
|
+
approved: ['rejected', 'pending', 'cancelled'],
|
|
174
|
+
rejected: ['approved', 'pending'],
|
|
175
|
+
pending: ['approved', 'rejected', 'completed', 'cancelled'],
|
|
176
|
+
active: ['inactive'],
|
|
177
|
+
inactive: ['active'],
|
|
178
|
+
completed: ['pending', 'cancelled', 'open'],
|
|
179
|
+
cancelled: ['pending', 'completed'],
|
|
180
|
+
open: ['closed', 'completed'],
|
|
181
|
+
closed: ['open'],
|
|
182
|
+
present: ['absent', 'on_leave', 'sick_leave', 'late'],
|
|
183
|
+
absent: ['present'],
|
|
184
|
+
on_leave: ['present'],
|
|
185
|
+
sick_leave: ['present'],
|
|
186
|
+
late: ['present'],
|
|
187
|
+
yes: ['no'],
|
|
188
|
+
no: ['yes'],
|
|
189
|
+
paid: ['unpaid', 'overdue'],
|
|
190
|
+
unpaid: ['paid'],
|
|
191
|
+
overdue: ['paid'],
|
|
192
|
+
enabled: ['disabled'],
|
|
193
|
+
disabled: ['enabled'],
|
|
194
|
+
available: ['unavailable'],
|
|
195
|
+
unavailable: ['available'],
|
|
196
|
+
success: ['failed', 'error'],
|
|
197
|
+
failed: ['success'],
|
|
198
|
+
error: ['success'],
|
|
199
|
+
high: ['medium', 'low'],
|
|
200
|
+
medium: ['high', 'low', 'critical'],
|
|
201
|
+
low: ['high', 'medium', 'critical'],
|
|
202
|
+
critical: ['low', 'medium'],
|
|
203
|
+
};
|
|
204
|
+
/**
|
|
205
|
+
* Words that are common function words in supported languages and should
|
|
206
|
+
* NOT be matched as standalone enum/boolean synonyms in running text.
|
|
207
|
+
* They are only valid when the response is essentially just that word.
|
|
208
|
+
*/
|
|
209
|
+
const AMBIGUOUS_SHORT_WORDS = new Set([
|
|
210
|
+
// Latin short words that collide with function words
|
|
211
|
+
'da', 'ne', 'on', 'an', 'no', 'non', 'sim',
|
|
212
|
+
// Cyrillic — "да" is a conjunction in Russian, not just "yes"
|
|
213
|
+
'да',
|
|
214
|
+
// CJK single characters — extremely common in running text
|
|
215
|
+
'是', '否', '高', '中', '低',
|
|
216
|
+
// Japanese kana that double as particles/prefixes
|
|
217
|
+
'に', 'ご',
|
|
218
|
+
// Arabic — "لا" is ubiquitous negation particle
|
|
219
|
+
'لا',
|
|
220
|
+
// Bengali — "না" is common negation
|
|
221
|
+
'না',
|
|
222
|
+
]);
|
|
223
|
+
/** Build a reverse-lookup: synonym → canonical value. */
|
|
224
|
+
function buildReverseLookup() {
|
|
225
|
+
const map = new Map();
|
|
226
|
+
for (const [canonical, synonyms] of Object.entries(ENUM_TRANSLATIONS)) {
|
|
227
|
+
for (const syn of synonyms) {
|
|
228
|
+
map.set(syn.toLowerCase(), canonical);
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
return map;
|
|
232
|
+
}
|
|
233
|
+
const ENUM_REVERSE = buildReverseLookup();
|
|
234
|
+
// ---------------------------------------------------------------------------
|
|
235
|
+
// L2 Claim Extraction
|
|
236
|
+
// ---------------------------------------------------------------------------
|
|
237
|
+
/** Fields likely to be boolean based on their name. */
|
|
238
|
+
const BOOLEAN_FIELD_PATTERNS = [
|
|
239
|
+
/^is_/i, /^has_/i, /^can_/i, /^should_/i, /^was_/i, /^did_/i,
|
|
240
|
+
/^enabled$/i, /^disabled$/i, /^active$/i, /^visible$/i, /^archived$/i,
|
|
241
|
+
/^deleted$/i, /^verified$/i, /^confirmed$/i, /^available$/i,
|
|
242
|
+
];
|
|
243
|
+
/** Fields likely to be enum/status based on their name. */
|
|
244
|
+
const ENUM_FIELD_PATTERNS = [
|
|
245
|
+
/status/i, /state/i, /type/i, /priority/i, /level/i,
|
|
246
|
+
/category/i, /role/i, /phase/i, /stage/i, /mode/i,
|
|
247
|
+
/result/i, /outcome/i, /decision/i, /reason/i,
|
|
248
|
+
];
|
|
249
|
+
/** Maximum recursion depth for JSON walkers (prevents stack overflow on deep/circular data). */
|
|
250
|
+
const MAX_WALK_DEPTH = 50;
|
|
251
|
+
/**
|
|
252
|
+
* Detect if a tool output looks like a DB schema description
|
|
253
|
+
* (contains "table"/"columns" keys typically returned by describe_database).
|
|
254
|
+
* These should NOT be treated as data for claim extraction.
|
|
255
|
+
*/
|
|
256
|
+
function isSchemaOutput(output) {
|
|
257
|
+
if (output === null || typeof output !== 'object' || Array.isArray(output))
|
|
258
|
+
return false;
|
|
259
|
+
const obj = output;
|
|
260
|
+
// describe_database typically returns { table: "...", columns: [...] }
|
|
261
|
+
return (typeof obj['table'] === 'string' || typeof obj['tables'] === 'object') &&
|
|
262
|
+
(Array.isArray(obj['columns']) || typeof obj['columns'] === 'object');
|
|
263
|
+
}
|
|
264
|
+
/**
|
|
265
|
+
* Walk tool output JSON and extract boolean/enum fields with their context.
|
|
266
|
+
*/
|
|
267
|
+
function extractStructuredFields(trace) {
|
|
268
|
+
const fields = [];
|
|
269
|
+
const steps = Trace_1.TraceUtils.getToolOutputSteps(trace);
|
|
270
|
+
for (const step of steps) {
|
|
271
|
+
for (const to of step.toolOutputs ?? []) {
|
|
272
|
+
if (isSchemaOutput(to.output))
|
|
273
|
+
continue;
|
|
274
|
+
walkOutput(to.output, step.stepId, '', fields);
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
return fields;
|
|
278
|
+
}
|
|
279
|
+
function walkOutput(value, stepId, path, out, depth = 0) {
|
|
280
|
+
if (value === null || value === undefined)
|
|
281
|
+
return;
|
|
282
|
+
if (depth > MAX_WALK_DEPTH)
|
|
283
|
+
return;
|
|
284
|
+
if (typeof value === 'boolean') {
|
|
285
|
+
out.push({ fieldName: path, value, stepId });
|
|
286
|
+
}
|
|
287
|
+
else if (typeof value === 'string' && path) {
|
|
288
|
+
// Check if this field name suggests boolean or enum
|
|
289
|
+
const lastKey = path.split('.').pop() ?? path;
|
|
290
|
+
const isLikelyBoolean = BOOLEAN_FIELD_PATTERNS.some((p) => p.test(lastKey));
|
|
291
|
+
const isLikelyEnum = ENUM_FIELD_PATTERNS.some((p) => p.test(lastKey));
|
|
292
|
+
if (isLikelyBoolean || isLikelyEnum || ENUM_REVERSE.has(value.toLowerCase())) {
|
|
293
|
+
out.push({ fieldName: path, value, stepId });
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
else if (typeof value === 'number') {
|
|
297
|
+
// Skip — numbers are handled by L1
|
|
298
|
+
}
|
|
299
|
+
else if (Array.isArray(value)) {
|
|
300
|
+
value.forEach((item, i) => walkOutput(item, stepId, path ? `${path}[${i}]` : `[${i}]`, out, depth + 1));
|
|
301
|
+
}
|
|
302
|
+
else if (typeof value === 'object') {
|
|
303
|
+
for (const [k, v] of Object.entries(value)) {
|
|
304
|
+
walkOutput(v, stepId, path ? `${path}.${k}` : k, out, depth + 1);
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
/**
|
|
309
|
+
* Extract L2 structured context claims by comparing tool output
|
|
310
|
+
* boolean/enum fields against the response text.
|
|
311
|
+
*
|
|
312
|
+
* Returns claims where the response text contains a natural-language
|
|
313
|
+
* equivalent of a structured field value.
|
|
314
|
+
*/
|
|
315
|
+
function extractStructuredClaims(trace, responseText, sourceStepId) {
|
|
316
|
+
if (!responseText || !trace)
|
|
317
|
+
return [];
|
|
318
|
+
const fields = extractStructuredFields(trace);
|
|
319
|
+
const claims = [];
|
|
320
|
+
const responseLower = responseText.toLowerCase();
|
|
321
|
+
const source = {
|
|
322
|
+
stepId: sourceStepId ?? 'final_response',
|
|
323
|
+
role: 'final_response',
|
|
324
|
+
rawText: responseText,
|
|
325
|
+
};
|
|
326
|
+
for (const field of fields) {
|
|
327
|
+
if (typeof field.value === 'boolean') {
|
|
328
|
+
// Boolean field — look for true/false synonyms in response
|
|
329
|
+
const boolKey = field.value ? 'true' : 'false';
|
|
330
|
+
const oppositeKey = field.value ? 'false' : 'true';
|
|
331
|
+
const matchedSynonym = findSynonymInText(responseLower, BOOLEAN_SYNONYMS[boolKey]);
|
|
332
|
+
const matchedOpposite = findSynonymInText(responseLower, BOOLEAN_SYNONYMS[oppositeKey]);
|
|
333
|
+
if (matchedSynonym) {
|
|
334
|
+
claims.push({
|
|
335
|
+
claimId: (0, crypto_1.randomUUID)(),
|
|
336
|
+
type: 'boolean',
|
|
337
|
+
value: field.value ? 'true' : 'false',
|
|
338
|
+
rawText: matchedSynonym,
|
|
339
|
+
source,
|
|
340
|
+
fieldName: field.fieldName,
|
|
341
|
+
sourceFieldValue: field.value,
|
|
342
|
+
});
|
|
343
|
+
}
|
|
344
|
+
else if (matchedOpposite) {
|
|
345
|
+
// Response says the opposite of tool output — still a claim (UNGROUNDED)
|
|
346
|
+
claims.push({
|
|
347
|
+
claimId: (0, crypto_1.randomUUID)(),
|
|
348
|
+
type: 'boolean',
|
|
349
|
+
value: field.value ? 'false' : 'true', // opposite of source
|
|
350
|
+
rawText: matchedOpposite,
|
|
351
|
+
source,
|
|
352
|
+
fieldName: field.fieldName,
|
|
353
|
+
sourceFieldValue: field.value,
|
|
354
|
+
});
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
else if (typeof field.value === 'string') {
|
|
358
|
+
// Enum/status field — look for value or its translations
|
|
359
|
+
const canonical = ENUM_REVERSE.get(field.value.toLowerCase()) ?? field.value.toLowerCase();
|
|
360
|
+
const synonyms = ENUM_TRANSLATIONS[canonical] ?? [field.value.toLowerCase()];
|
|
361
|
+
const matchedSynonym = findSynonymInText(responseLower, synonyms);
|
|
362
|
+
if (matchedSynonym) {
|
|
363
|
+
// Require structural proximity: the matched synonym should appear near
|
|
364
|
+
// a field-name keyword (e.g., "status: late") or be a standalone assertion,
|
|
365
|
+
// not just a verb in flowing prose (e.g., "kasni na posao" = "is late to work").
|
|
366
|
+
const synPos = responseLower.indexOf(matchedSynonym.toLowerCase());
|
|
367
|
+
const cleanKey = (field.fieldName.split('.').pop() ?? '').replace(/\[\d+\]$/, '').toLowerCase();
|
|
368
|
+
let hasFieldProximity = false;
|
|
369
|
+
if (synPos >= 0 && cleanKey.length >= 2) {
|
|
370
|
+
// Check if the field name keyword appears within 40 chars of the synonym
|
|
371
|
+
const nearbyStart = Math.max(0, synPos - 40);
|
|
372
|
+
const nearbyEnd = Math.min(responseLower.length, synPos + matchedSynonym.length + 40);
|
|
373
|
+
const nearbyText = responseLower.substring(nearbyStart, nearbyEnd);
|
|
374
|
+
hasFieldProximity = nearbyText.includes(cleanKey);
|
|
375
|
+
}
|
|
376
|
+
// Also accept: if the field name keyword appears ANYWHERE in the response
|
|
377
|
+
// OR if the synonym is a multi-word phrase (more specific, less ambiguous)
|
|
378
|
+
// OR if the response is short (focused answer, not flowing prose)
|
|
379
|
+
const fieldMentioned = cleanKey.length >= 3 && responseLower.includes(cleanKey);
|
|
380
|
+
const isMultiWord = matchedSynonym.includes(' ');
|
|
381
|
+
const isShortResponse = responseLower.length <= 120;
|
|
382
|
+
if (hasFieldProximity || fieldMentioned || isMultiWord || isShortResponse) {
|
|
383
|
+
claims.push({
|
|
384
|
+
claimId: (0, crypto_1.randomUUID)(),
|
|
385
|
+
type: 'enum',
|
|
386
|
+
value: field.value, // original tool output value
|
|
387
|
+
rawText: matchedSynonym,
|
|
388
|
+
source,
|
|
389
|
+
fieldName: field.fieldName,
|
|
390
|
+
sourceFieldValue: field.value,
|
|
391
|
+
});
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
else {
|
|
395
|
+
// Check if response mentions a DIFFERENT value for this field type
|
|
396
|
+
// e.g., tool says "approved" but response says "odbijen" (rejected)
|
|
397
|
+
const contradictionMatch = findContradiction(responseLower, canonical);
|
|
398
|
+
if (contradictionMatch) {
|
|
399
|
+
claims.push({
|
|
400
|
+
claimId: (0, crypto_1.randomUUID)(),
|
|
401
|
+
type: 'enum',
|
|
402
|
+
value: contradictionMatch.foundCanonical, // what response actually says
|
|
403
|
+
rawText: contradictionMatch.foundSynonym,
|
|
404
|
+
source,
|
|
405
|
+
fieldName: field.fieldName,
|
|
406
|
+
sourceFieldValue: field.value,
|
|
407
|
+
});
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
return deduplicateClaims(claims);
|
|
413
|
+
}
|
|
414
|
+
/**
|
|
415
|
+
* Match a boolean claim against the tool output value.
|
|
416
|
+
*/
|
|
417
|
+
function matchBoolean(claimedValue, // 'true' or 'false'
|
|
418
|
+
sourceValue) {
|
|
419
|
+
if (typeof sourceValue !== 'boolean') {
|
|
420
|
+
// String booleans: "true"/"false", "yes"/"no"
|
|
421
|
+
if (typeof sourceValue === 'string') {
|
|
422
|
+
const lower = sourceValue.toLowerCase();
|
|
423
|
+
if (lower === 'true' || lower === 'yes') {
|
|
424
|
+
sourceValue = true;
|
|
425
|
+
}
|
|
426
|
+
else if (lower === 'false' || lower === 'no') {
|
|
427
|
+
sourceValue = false;
|
|
428
|
+
}
|
|
429
|
+
else {
|
|
430
|
+
return { matched: false, explanation: `Source value "${sourceValue}" is not a boolean.` };
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
else {
|
|
434
|
+
return { matched: false, explanation: `Source value is not a boolean (type: ${typeof sourceValue}).` };
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
const claimedBool = claimedValue === 'true';
|
|
438
|
+
if (claimedBool === sourceValue) {
|
|
439
|
+
return { matched: true, explanation: `Boolean claim "${claimedValue}" matches source value ${sourceValue}.` };
|
|
440
|
+
}
|
|
441
|
+
return {
|
|
442
|
+
matched: false,
|
|
443
|
+
explanation: `Boolean contradiction: response indicates "${claimedValue}" but source is ${sourceValue}.`,
|
|
444
|
+
};
|
|
445
|
+
}
|
|
446
|
+
/**
|
|
447
|
+
* Match an enum claim against the tool output value.
|
|
448
|
+
* Uses the translation table for cross-language matching.
|
|
449
|
+
*/
|
|
450
|
+
function matchEnum(claimedValue, sourceValue) {
|
|
451
|
+
if (typeof sourceValue !== 'string') {
|
|
452
|
+
return { matched: false, explanation: `Source value is not a string (type: ${typeof sourceValue}).` };
|
|
453
|
+
}
|
|
454
|
+
// Direct match
|
|
455
|
+
if (claimedValue.toLowerCase() === sourceValue.toLowerCase()) {
|
|
456
|
+
return { matched: true, explanation: `Enum claim "${claimedValue}" exactly matches source "${sourceValue}".` };
|
|
457
|
+
}
|
|
458
|
+
// Translation table match: both resolve to the same canonical value
|
|
459
|
+
const claimCanonical = ENUM_REVERSE.get(claimedValue.toLowerCase()) ?? claimedValue.toLowerCase();
|
|
460
|
+
const sourceCanonical = ENUM_REVERSE.get(sourceValue.toLowerCase()) ?? sourceValue.toLowerCase();
|
|
461
|
+
if (claimCanonical === sourceCanonical) {
|
|
462
|
+
return {
|
|
463
|
+
matched: true,
|
|
464
|
+
explanation: `Enum claim "${claimedValue}" matches source "${sourceValue}" via translation (both → "${claimCanonical}").`,
|
|
465
|
+
};
|
|
466
|
+
}
|
|
467
|
+
return {
|
|
468
|
+
matched: false,
|
|
469
|
+
explanation: `Enum mismatch: response says "${claimedValue}" (→ ${claimCanonical}) but source is "${sourceValue}" (→ ${sourceCanonical}).`,
|
|
470
|
+
};
|
|
471
|
+
}
|
|
472
|
+
// ---------------------------------------------------------------------------
|
|
473
|
+
// Public API for extending translation tables
|
|
474
|
+
// ---------------------------------------------------------------------------
|
|
475
|
+
/**
|
|
476
|
+
* Add custom enum translations for domain-specific values.
|
|
477
|
+
* @param translations Map of canonical_value → synonym array
|
|
478
|
+
*/
|
|
479
|
+
function addEnumTranslations(translations) {
|
|
480
|
+
for (const [canonical, synonyms] of Object.entries(translations)) {
|
|
481
|
+
ENUM_TRANSLATIONS[canonical] = [
|
|
482
|
+
...(ENUM_TRANSLATIONS[canonical] ?? []),
|
|
483
|
+
...synonyms,
|
|
484
|
+
];
|
|
485
|
+
for (const syn of synonyms) {
|
|
486
|
+
ENUM_REVERSE.set(syn.toLowerCase(), canonical);
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
/**
|
|
491
|
+
* Get a copy of the current enum translation table (for testing/inspection).
|
|
492
|
+
*/
|
|
493
|
+
function getEnumTranslations() {
|
|
494
|
+
return { ...ENUM_TRANSLATIONS };
|
|
495
|
+
}
|
|
496
|
+
// =========================================================================
|
|
497
|
+
// L2 — LIST ITEMS (Array Set Verification)
|
|
498
|
+
// =========================================================================
|
|
499
|
+
/** Field names that typically hold a displayable item label. */
|
|
500
|
+
const LABEL_FIELD_PRIORITY = [
|
|
501
|
+
'name', 'full_name', 'fullName', 'display_name', 'displayName',
|
|
502
|
+
'title', 'label', 'description', 'email', 'username', 'id',
|
|
503
|
+
];
|
|
504
|
+
/** Check if a string looks like an ISO date (YYYY-MM-DD). */
|
|
505
|
+
function isDateLikeKey(key) {
|
|
506
|
+
return /^\d{4}-\d{2}-\d{2}$/.test(key);
|
|
507
|
+
}
|
|
508
|
+
/**
|
|
509
|
+
* Parse a period string (from TraceUtils.inferPeriod) into concrete start/end dates.
|
|
510
|
+
* Returns null if the period cannot be parsed.
|
|
511
|
+
*/
|
|
512
|
+
function parsePeriodBounds(period) {
|
|
513
|
+
if (!period)
|
|
514
|
+
return null;
|
|
515
|
+
// Range format: "2026-03-01/2026-03-31"
|
|
516
|
+
if (period.includes('/')) {
|
|
517
|
+
const [start, end] = period.split('/');
|
|
518
|
+
if (/^\d{4}-\d{2}-\d{2}$/.test(start) && /^\d{4}-\d{2}-\d{2}$/.test(end)) {
|
|
519
|
+
return { start, end };
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
// Single date: "2026-03-02"
|
|
523
|
+
if (/^\d{4}-\d{2}-\d{2}$/.test(period)) {
|
|
524
|
+
return { start: period, end: period };
|
|
525
|
+
}
|
|
526
|
+
return null;
|
|
527
|
+
}
|
|
528
|
+
/**
|
|
529
|
+
* Walk all tool outputs to find arrays of objects and extract their display labels.
|
|
530
|
+
* After basic extraction, flat arrays whose items have a date field are split
|
|
531
|
+
* into date-based sub-groups so that sibling-scope filtering can treat each
|
|
532
|
+
* date slice independently (same logic as keyed containers).
|
|
533
|
+
*/
|
|
534
|
+
function extractArrayGroups(trace, period) {
|
|
535
|
+
const groups = [];
|
|
536
|
+
const steps = Trace_1.TraceUtils.getToolOutputSteps(trace);
|
|
537
|
+
const periodBounds = parsePeriodBounds(period);
|
|
538
|
+
for (const step of steps) {
|
|
539
|
+
for (const to of step.toolOutputs ?? []) {
|
|
540
|
+
if (isSchemaOutput(to.output))
|
|
541
|
+
continue;
|
|
542
|
+
walkForArrays(to.output, step.stepId, '', groups, 0, periodBounds);
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
// Post-process: split flat arrays by date field when items contain one.
|
|
546
|
+
// E.g. [{date:"2026-03-02",name:"Vladimir"},{date:"2026-03-14",name:"Dejan"}]
|
|
547
|
+
// → two sub-groups: path.date=2026-03-02 and path.date=2026-03-14
|
|
548
|
+
return splitDateFieldGroups(groups, trace);
|
|
549
|
+
}
|
|
550
|
+
const DATE_FIELD_NAMES = ['date', 'datum', 'day', 'dan', 'event_date', 'created_at', 'timestamp'];
|
|
551
|
+
const DATE_VALUE_RE = /^\d{4}-\d{2}-\d{2}/;
|
|
552
|
+
/**
|
|
553
|
+
* For flat arrays whose items have a date field, split the single ArrayGroup
|
|
554
|
+
* into per-date sub-groups. This allows the sibling-scope machinery to treat
|
|
555
|
+
* each date slice independently.
|
|
556
|
+
*
|
|
557
|
+
* Example: array at path "anomalies" with items spanning 3 dates →
|
|
558
|
+
* anomalies.date=2026-03-02, anomalies.date=2026-03-14, anomalies.date=2026-03-15
|
|
559
|
+
*/
|
|
560
|
+
function splitDateFieldGroups(groups, trace) {
|
|
561
|
+
const toolSteps = Trace_1.TraceUtils.getToolOutputSteps(trace);
|
|
562
|
+
const result = [];
|
|
563
|
+
for (const group of groups) {
|
|
564
|
+
// Find the raw array from tool output to inspect its items' date fields
|
|
565
|
+
const rawArray = findRawArray(group, toolSteps);
|
|
566
|
+
if (!rawArray || rawArray.length < 2) {
|
|
567
|
+
result.push(group);
|
|
568
|
+
continue;
|
|
569
|
+
}
|
|
570
|
+
// Check if items have a date field
|
|
571
|
+
const dateField = DATE_FIELD_NAMES.find((f) => {
|
|
572
|
+
const sample = rawArray[0];
|
|
573
|
+
return typeof sample[f] === 'string' && DATE_VALUE_RE.test(sample[f]);
|
|
574
|
+
});
|
|
575
|
+
if (!dateField) {
|
|
576
|
+
result.push(group);
|
|
577
|
+
continue;
|
|
578
|
+
}
|
|
579
|
+
// Group items by date value
|
|
580
|
+
const byDate = new Map();
|
|
581
|
+
for (let i = 0; i < rawArray.length; i++) {
|
|
582
|
+
const item = rawArray[i];
|
|
583
|
+
const dateVal = item[dateField];
|
|
584
|
+
if (!dateVal)
|
|
585
|
+
continue;
|
|
586
|
+
const dateKey = dateVal.substring(0, 10); // YYYY-MM-DD
|
|
587
|
+
const label = group.labels[i];
|
|
588
|
+
if (!label)
|
|
589
|
+
continue;
|
|
590
|
+
if (!byDate.has(dateKey))
|
|
591
|
+
byDate.set(dateKey, []);
|
|
592
|
+
byDate.get(dateKey).push(label);
|
|
593
|
+
}
|
|
594
|
+
// Only split if there are multiple date values (otherwise no benefit)
|
|
595
|
+
if (byDate.size < 2) {
|
|
596
|
+
result.push(group);
|
|
597
|
+
continue;
|
|
598
|
+
}
|
|
599
|
+
for (const [dateKey, labels] of byDate) {
|
|
600
|
+
result.push({
|
|
601
|
+
arrayPath: `${group.arrayPath}.${dateField}=${dateKey}`,
|
|
602
|
+
labels,
|
|
603
|
+
stepId: group.stepId,
|
|
604
|
+
});
|
|
605
|
+
}
|
|
606
|
+
}
|
|
607
|
+
return result;
|
|
608
|
+
}
|
|
609
|
+
/**
|
|
610
|
+
* Find the raw array in tool outputs that corresponds to an ArrayGroup.
|
|
611
|
+
*/
|
|
612
|
+
function findRawArray(group, steps) {
|
|
613
|
+
for (const step of steps) {
|
|
614
|
+
if (step.stepId !== group.stepId)
|
|
615
|
+
continue;
|
|
616
|
+
for (const to of step.toolOutputs ?? []) {
|
|
617
|
+
const found = resolveArrayPath(to.output, group.arrayPath);
|
|
618
|
+
if (found)
|
|
619
|
+
return found;
|
|
620
|
+
}
|
|
621
|
+
}
|
|
622
|
+
return null;
|
|
623
|
+
}
|
|
624
|
+
function resolveArrayPath(root, path) {
|
|
625
|
+
if (!path || path === 'root') {
|
|
626
|
+
return Array.isArray(root) ? root : null;
|
|
627
|
+
}
|
|
628
|
+
const segments = path.split('.');
|
|
629
|
+
let current = root;
|
|
630
|
+
for (const seg of segments) {
|
|
631
|
+
if (current === null || current === undefined)
|
|
632
|
+
return null;
|
|
633
|
+
if (typeof current !== 'object')
|
|
634
|
+
return null;
|
|
635
|
+
current = current[seg];
|
|
636
|
+
}
|
|
637
|
+
return Array.isArray(current) ? current : null;
|
|
638
|
+
}
|
|
639
|
+
function walkForArrays(value, stepId, path, out, depth = 0, periodBounds) {
|
|
640
|
+
if (value === null || value === undefined)
|
|
641
|
+
return;
|
|
642
|
+
if (depth > MAX_WALK_DEPTH)
|
|
643
|
+
return;
|
|
644
|
+
if (Array.isArray(value) && value.length > 0 && typeof value[0] === 'object' && value[0] !== null) {
|
|
645
|
+
// Array of objects — extract labels
|
|
646
|
+
const labels = [];
|
|
647
|
+
for (const item of value) {
|
|
648
|
+
if (typeof item !== 'object' || item === null)
|
|
649
|
+
continue;
|
|
650
|
+
const obj = item;
|
|
651
|
+
const label = pickLabel(obj);
|
|
652
|
+
if (label)
|
|
653
|
+
labels.push(label);
|
|
654
|
+
}
|
|
655
|
+
if (labels.length > 0) {
|
|
656
|
+
out.push({ arrayPath: path || 'root', labels, stepId });
|
|
657
|
+
}
|
|
658
|
+
// Also recurse into each item for nested arrays
|
|
659
|
+
value.forEach((item, i) => walkForArrays(item, stepId, path ? `${path}[${i}]` : `[${i}]`, out, depth + 1, periodBounds));
|
|
660
|
+
}
|
|
661
|
+
else if (typeof value === 'object' && !Array.isArray(value)) {
|
|
662
|
+
const entries = Object.entries(value);
|
|
663
|
+
// Detect date-keyed containers (e.g. by_date: { "2026-03-02": {...}, "2026-03-15": {...} })
|
|
664
|
+
// When a period is known, only descend into date keys within that period.
|
|
665
|
+
const dateKeyCount = entries.filter(([k]) => isDateLikeKey(k)).length;
|
|
666
|
+
const isDateKeyedContainer = dateKeyCount > 0 && dateKeyCount >= entries.length * 0.5;
|
|
667
|
+
for (const [k, v] of entries) {
|
|
668
|
+
if (isDateKeyedContainer && isDateLikeKey(k) && periodBounds) {
|
|
669
|
+
// Skip date keys outside the requested period
|
|
670
|
+
if (k < periodBounds.start || k > periodBounds.end)
|
|
671
|
+
continue;
|
|
672
|
+
}
|
|
673
|
+
walkForArrays(v, stepId, path ? `${path}.${k}` : k, out, depth + 1, periodBounds);
|
|
674
|
+
}
|
|
675
|
+
}
|
|
676
|
+
}
|
|
677
|
+
/** Pick the best display label from an object by field priority. */
|
|
678
|
+
function pickLabel(obj) {
|
|
679
|
+
for (const field of LABEL_FIELD_PRIORITY) {
|
|
680
|
+
if (typeof obj[field] === 'string' && obj[field])
|
|
681
|
+
return obj[field];
|
|
682
|
+
}
|
|
683
|
+
// Fallback: first string field
|
|
684
|
+
for (const val of Object.values(obj)) {
|
|
685
|
+
if (typeof val === 'string' && val.length > 0 && val.length < 100)
|
|
686
|
+
return val;
|
|
687
|
+
}
|
|
688
|
+
return null;
|
|
689
|
+
}
|
|
690
|
+
/**
|
|
691
|
+
* Extract list_items claims: for each array in tool output, check which
|
|
692
|
+
* items the response mentions and which it omits.
|
|
693
|
+
*
|
|
694
|
+
* Sibling-scope relevance filtering: when tool output contains a keyed
|
|
695
|
+
* container (e.g. by_date, by_employee, by_location — any `parent.<dynamic_key>.leaf`
|
|
696
|
+
* pattern), the user typically asks about a subset. We detect "sibling families"
|
|
697
|
+
* — groups whose paths differ in exactly one segment — and classify each group's
|
|
698
|
+
* activation strength:
|
|
699
|
+
* strong — ≥2 labels mentioned in the response
|
|
700
|
+
* weak — exactly 1 label mentioned
|
|
701
|
+
* dormant — 0 labels mentioned
|
|
702
|
+
*
|
|
703
|
+
* When at least one sibling is strong/weak, dormant siblings are suppressed.
|
|
704
|
+
* When only weak siblings exist among active ones, they are marked `weak_scope`.
|
|
705
|
+
* When ALL siblings are dormant, everything is kept (possible data_ignored).
|
|
706
|
+
*/
|
|
707
|
+
function extractListItemsClaims(trace, responseText, sourceStepId) {
|
|
708
|
+
if (!responseText || !trace)
|
|
709
|
+
return [];
|
|
710
|
+
const period = Trace_1.TraceUtils.inferPeriod(trace);
|
|
711
|
+
const groups = extractArrayGroups(trace, period);
|
|
712
|
+
const claims = [];
|
|
713
|
+
const responseLower = responseText.toLowerCase();
|
|
714
|
+
const source = {
|
|
715
|
+
stepId: sourceStepId ?? 'final_response',
|
|
716
|
+
role: 'final_response',
|
|
717
|
+
rawText: responseText,
|
|
718
|
+
};
|
|
719
|
+
// -----------------------------------------------------------------------
|
|
720
|
+
// Sibling-scope relevance filtering
|
|
721
|
+
// -----------------------------------------------------------------------
|
|
722
|
+
const siblingFamilies = groupSiblingFamilies(groups);
|
|
723
|
+
// Map: arrayPath → { scope, familyPattern }
|
|
724
|
+
const scopeMap = new Map();
|
|
725
|
+
for (const family of siblingFamilies) {
|
|
726
|
+
if (family.groups.length < 2)
|
|
727
|
+
continue; // Not a true family
|
|
728
|
+
const activations = [];
|
|
729
|
+
for (const group of family.groups) {
|
|
730
|
+
const mentionCount = group.labels.filter((label) => isLabelMentioned(responseLower, label)).length;
|
|
731
|
+
const strength = mentionCount >= 2 ? 'strong' :
|
|
732
|
+
mentionCount === 1 ? 'weak' :
|
|
733
|
+
'dormant';
|
|
734
|
+
activations.push({ group, strength, mentionCount });
|
|
735
|
+
}
|
|
736
|
+
const hasStrong = activations.some((a) => a.strength === 'strong');
|
|
737
|
+
const hasWeak = activations.some((a) => a.strength === 'weak');
|
|
738
|
+
const allDormant = activations.every((a) => a.strength === 'dormant');
|
|
739
|
+
if (allDormant) {
|
|
740
|
+
// All dormant → keep everything (possible data_ignored)
|
|
741
|
+
for (const a of activations) {
|
|
742
|
+
scopeMap.set(a.group.arrayPath, { scope: 'in_scope', familyPattern: family.pattern });
|
|
743
|
+
}
|
|
744
|
+
}
|
|
745
|
+
else {
|
|
746
|
+
// At least one active sibling exists
|
|
747
|
+
for (const a of activations) {
|
|
748
|
+
if (a.strength === 'strong') {
|
|
749
|
+
scopeMap.set(a.group.arrayPath, { scope: 'in_scope', familyPattern: family.pattern });
|
|
750
|
+
}
|
|
751
|
+
else if (a.strength === 'weak') {
|
|
752
|
+
// Weak activation: if there's also a strong sibling, this is weak_scope;
|
|
753
|
+
// if this is the strongest signal, it's in_scope
|
|
754
|
+
const scope = hasStrong ? 'weak_scope' : 'in_scope';
|
|
755
|
+
scopeMap.set(a.group.arrayPath, { scope, familyPattern: family.pattern });
|
|
756
|
+
}
|
|
757
|
+
else {
|
|
758
|
+
// Dormant: suppress when any sibling is active
|
|
759
|
+
scopeMap.set(a.group.arrayPath, { scope: 'suppressed', familyPattern: family.pattern });
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
}
|
|
763
|
+
}
|
|
764
|
+
for (const group of groups) {
|
|
765
|
+
if (group.labels.length < 2)
|
|
766
|
+
continue; // Single items don't need set verification
|
|
767
|
+
const scopeInfo = scopeMap.get(group.arrayPath);
|
|
768
|
+
// Hard-skip suppressed siblings (no claims generated)
|
|
769
|
+
if (scopeInfo?.scope === 'suppressed')
|
|
770
|
+
continue;
|
|
771
|
+
const mentioned = [];
|
|
772
|
+
const missing = [];
|
|
773
|
+
for (const label of group.labels) {
|
|
774
|
+
if (isLabelMentioned(responseLower, label)) {
|
|
775
|
+
mentioned.push(label);
|
|
776
|
+
}
|
|
777
|
+
else {
|
|
778
|
+
missing.push(label);
|
|
779
|
+
}
|
|
780
|
+
}
|
|
781
|
+
// Only create a claim if the response mentions at least one item (shows it's discussing the list)
|
|
782
|
+
if (mentioned.length > 0) {
|
|
783
|
+
const claim = {
|
|
784
|
+
claimId: (0, crypto_1.randomUUID)(),
|
|
785
|
+
type: 'list_items',
|
|
786
|
+
value: mentioned.join(', '),
|
|
787
|
+
rawText: responseText.substring(0, 200),
|
|
788
|
+
source,
|
|
789
|
+
fieldName: group.arrayPath,
|
|
790
|
+
sourceFieldValue: group.labels,
|
|
791
|
+
expectedItems: group.labels,
|
|
792
|
+
mentionedItems: mentioned,
|
|
793
|
+
};
|
|
794
|
+
// Attach sibling-scope metadata when applicable
|
|
795
|
+
if (scopeInfo) {
|
|
796
|
+
claim.siblingScope = scopeInfo.scope;
|
|
797
|
+
claim.siblingFamilyPattern = scopeInfo.familyPattern;
|
|
798
|
+
}
|
|
799
|
+
claims.push(claim);
|
|
800
|
+
}
|
|
801
|
+
}
|
|
802
|
+
return claims;
|
|
803
|
+
}
|
|
804
|
+
/**
|
|
805
|
+
* Detect sibling families: groups of ArrayGroups whose paths differ in exactly
|
|
806
|
+
* one segment (the dynamic key). Works for ANY keyed container pattern:
|
|
807
|
+
* by_date.2026-03-02.people → by_date.*.people
|
|
808
|
+
* by_employee.123.sessions → by_employee.*.sessions
|
|
809
|
+
* by_location.NS-01.people → by_location.*.people
|
|
810
|
+
* weeks.2026-W10.items → weeks.*.items
|
|
811
|
+
*
|
|
812
|
+
* Algorithm: for each group, try wildcarding each path segment; groups that
|
|
813
|
+
* map to the same pattern form a family. Each group is assigned to at most
|
|
814
|
+
* one family (largest match wins).
|
|
815
|
+
*/
|
|
816
|
+
function groupSiblingFamilies(groups) {
|
|
817
|
+
// Build candidate families: pattern → { segmentIdx, arrayPaths }
|
|
818
|
+
const candidateMap = new Map();
|
|
819
|
+
for (const group of groups) {
|
|
820
|
+
const segments = group.arrayPath.split('.');
|
|
821
|
+
for (let i = 0; i < segments.length; i++) {
|
|
822
|
+
// Skip array index segments (e.g. "[0]", "items[2]")
|
|
823
|
+
if (/\[\d+\]/.test(segments[i]))
|
|
824
|
+
continue;
|
|
825
|
+
// Guard: don't wildcard the first or last segment of a multi-segment path.
|
|
826
|
+
// Wildcarding segment 0 would merge semantically different root containers
|
|
827
|
+
// (e.g. "summary.by_loc.NS.people" + "details.by_loc.NS.people" → "*.by_loc.NS.people").
|
|
828
|
+
// Wildcarding the leaf would merge structurally different array types.
|
|
829
|
+
// Only interior segments (dynamic keys between a container prefix and a leaf) are valid.
|
|
830
|
+
if (segments.length >= 3 && (i === 0 || i === segments.length - 1))
|
|
831
|
+
continue;
|
|
832
|
+
const pattern = segments.map((s, j) => (j === i ? '*' : s)).join('.');
|
|
833
|
+
let entry = candidateMap.get(pattern);
|
|
834
|
+
if (!entry) {
|
|
835
|
+
entry = { segmentIdx: i, paths: new Set() };
|
|
836
|
+
candidateMap.set(pattern, entry);
|
|
837
|
+
}
|
|
838
|
+
entry.paths.add(group.arrayPath);
|
|
839
|
+
}
|
|
840
|
+
}
|
|
841
|
+
// Keep only patterns with 2+ distinct members → real families
|
|
842
|
+
const candidates = [...candidateMap.entries()]
|
|
843
|
+
.filter(([, v]) => v.paths.size >= 2)
|
|
844
|
+
.sort(([, a], [, b]) => b.paths.size - a.paths.size); // prefer larger families
|
|
845
|
+
// Assign each group to at most one family (largest first → greedy)
|
|
846
|
+
const assigned = new Set();
|
|
847
|
+
const groupMap = new Map(groups.map((g) => [g.arrayPath, g]));
|
|
848
|
+
const families = [];
|
|
849
|
+
for (const [pattern, { segmentIdx }] of candidates) {
|
|
850
|
+
const members = [];
|
|
851
|
+
for (const path of candidateMap.get(pattern).paths) {
|
|
852
|
+
if (assigned.has(path))
|
|
853
|
+
continue;
|
|
854
|
+
const g = groupMap.get(path);
|
|
855
|
+
if (g)
|
|
856
|
+
members.push(g);
|
|
857
|
+
}
|
|
858
|
+
if (members.length < 2)
|
|
859
|
+
continue;
|
|
860
|
+
families.push({ pattern, segmentIdx, groups: members });
|
|
861
|
+
for (const m of members)
|
|
862
|
+
assigned.add(m.arrayPath);
|
|
863
|
+
}
|
|
864
|
+
return families;
|
|
865
|
+
}
|
|
866
|
+
/** Check if a label appears in the response (supports multi-word and partial name matching). */
|
|
867
|
+
function isLabelMentioned(responseLower, label) {
|
|
868
|
+
const labelLower = label.toLowerCase();
|
|
869
|
+
// Direct presence
|
|
870
|
+
if (responseLower.includes(labelLower))
|
|
871
|
+
return true;
|
|
872
|
+
// For multi-word names, check if the last name or first name alone appears
|
|
873
|
+
const parts = labelLower.split(/\s+/);
|
|
874
|
+
if (parts.length >= 2) {
|
|
875
|
+
// Check last name (more unique) — e.g. "Jovic" from "Ana Jovic"
|
|
876
|
+
if (parts[parts.length - 1].length >= 3) {
|
|
877
|
+
const re = new RegExp(`\\b${escapeRegex(parts[parts.length - 1])}\\b`, 'i');
|
|
878
|
+
if (re.test(responseLower))
|
|
879
|
+
return true;
|
|
880
|
+
}
|
|
881
|
+
}
|
|
882
|
+
return false;
|
|
883
|
+
}
|
|
884
|
+
/**
|
|
885
|
+
* Match a list_items claim: verify completeness of list mention.
|
|
886
|
+
*/
|
|
887
|
+
function matchListItems(expectedItems, mentionedItems) {
|
|
888
|
+
if (expectedItems.length === 0) {
|
|
889
|
+
return { matched: true, explanation: 'Empty list — nothing to verify.' };
|
|
890
|
+
}
|
|
891
|
+
// Use diacritics-insensitive comparison for name lists
|
|
892
|
+
const norm = (s) => s.normalize('NFD').replace(/[\u0300-\u036f]/g, '').toLowerCase().trim();
|
|
893
|
+
const missing = expectedItems.filter((e) => !mentionedItems.some((m) => norm(m) === norm(e)));
|
|
894
|
+
if (missing.length === 0) {
|
|
895
|
+
return {
|
|
896
|
+
matched: true,
|
|
897
|
+
explanation: `All ${expectedItems.length} items mentioned in response: ${expectedItems.join(', ')}.`,
|
|
898
|
+
};
|
|
899
|
+
}
|
|
900
|
+
// High-coverage partial match: if ≥90% of items are mentioned, treat as GROUNDED.
|
|
901
|
+
// This handles cases where 16/17 names are listed but one has a minor spelling difference.
|
|
902
|
+
const matched = expectedItems.length - missing.length;
|
|
903
|
+
const coverage = matched / expectedItems.length;
|
|
904
|
+
if (coverage >= 0.9 && missing.length <= 2) {
|
|
905
|
+
return {
|
|
906
|
+
matched: true,
|
|
907
|
+
explanation: `Near-complete list: ${matched}/${expectedItems.length} items mentioned (${(coverage * 100).toFixed(0)}%). Minor omission: ${missing.join(', ')}.`,
|
|
908
|
+
};
|
|
909
|
+
}
|
|
910
|
+
return {
|
|
911
|
+
matched: false,
|
|
912
|
+
explanation: `Incomplete list: ${matched}/${expectedItems.length} items mentioned. Missing: ${missing.join(', ')}. Coverage: ${(coverage * 100).toFixed(0)}%.`,
|
|
913
|
+
};
|
|
914
|
+
}
|
|
915
|
+
// =========================================================================
|
|
916
|
+
// L2 — KEY-VALUE Pair Matching
|
|
917
|
+
// =========================================================================
|
|
918
|
+
/** Patterns for field names that hold identifiers or contact info. */
|
|
919
|
+
const KEY_VALUE_FIELD_PATTERNS = [
|
|
920
|
+
/email/i, /e_mail/i, /phone/i, /tel/i, /mobile/i, /fax/i,
|
|
921
|
+
/id$/i, /^id_/i, /code/i, /iban/i, /account/i, /number$/i,
|
|
922
|
+
/url/i, /link/i, /website/i, /address/i,
|
|
923
|
+
/jmbg/i, /pib/i, /matični/i, /maticni/i, /mbr/i,
|
|
924
|
+
/license/i, /registration/i,
|
|
925
|
+
];
|
|
926
|
+
/**
|
|
927
|
+
* Walk tool outputs and extract key-value pairs that are identifiers,
|
|
928
|
+
* contact info, or codes (not already covered by L1 name/number matching).
|
|
929
|
+
*/
|
|
930
|
+
function extractKeyValueFields(trace) {
|
|
931
|
+
const fields = [];
|
|
932
|
+
const steps = Trace_1.TraceUtils.getToolOutputSteps(trace);
|
|
933
|
+
for (const step of steps) {
|
|
934
|
+
for (const to of step.toolOutputs ?? []) {
|
|
935
|
+
if (isSchemaOutput(to.output))
|
|
936
|
+
continue;
|
|
937
|
+
walkForKeyValues(to.output, step.stepId, '', fields);
|
|
938
|
+
}
|
|
939
|
+
}
|
|
940
|
+
return fields;
|
|
941
|
+
}
|
|
942
|
+
function walkForKeyValues(value, stepId, path, out, depth = 0) {
|
|
943
|
+
if (value === null || value === undefined)
|
|
944
|
+
return;
|
|
945
|
+
if (depth > MAX_WALK_DEPTH)
|
|
946
|
+
return;
|
|
947
|
+
if (typeof value === 'string' && path) {
|
|
948
|
+
const lastKey = (path.split('.').pop() ?? path).replace(/\[\d+\]$/, '');
|
|
949
|
+
const isKeyValue = KEY_VALUE_FIELD_PATTERNS.some((p) => p.test(lastKey));
|
|
950
|
+
if (isKeyValue && value.length > 0 && value.length < 200) {
|
|
951
|
+
out.push({ fieldName: path, key: lastKey, value, stepId });
|
|
952
|
+
}
|
|
953
|
+
}
|
|
954
|
+
else if (Array.isArray(value)) {
|
|
955
|
+
value.forEach((item, i) => walkForKeyValues(item, stepId, path ? `${path}[${i}]` : `[${i}]`, out, depth + 1));
|
|
956
|
+
}
|
|
957
|
+
else if (typeof value === 'object') {
|
|
958
|
+
for (const [k, v] of Object.entries(value)) {
|
|
959
|
+
walkForKeyValues(v, stepId, path ? `${path}.${k}` : k, out, depth + 1);
|
|
960
|
+
}
|
|
961
|
+
}
|
|
962
|
+
}
|
|
963
|
+
/**
|
|
964
|
+
* Extract key_value claims: check if identifiers from tool output appear in the response.
|
|
965
|
+
*/
|
|
966
|
+
function extractKeyValueClaims(trace, responseText, sourceStepId) {
|
|
967
|
+
if (!responseText || !trace)
|
|
968
|
+
return [];
|
|
969
|
+
const fields = extractKeyValueFields(trace);
|
|
970
|
+
const claims = [];
|
|
971
|
+
const responseLower = responseText.toLowerCase();
|
|
972
|
+
const source = {
|
|
973
|
+
stepId: sourceStepId ?? 'final_response',
|
|
974
|
+
role: 'final_response',
|
|
975
|
+
rawText: responseText,
|
|
976
|
+
};
|
|
977
|
+
for (const field of fields) {
|
|
978
|
+
const valueLower = field.value.toLowerCase();
|
|
979
|
+
// Check if the value (or something close) appears in the response
|
|
980
|
+
if (responseLower.includes(valueLower)) {
|
|
981
|
+
// Exact match — claim that the value is correct
|
|
982
|
+
claims.push({
|
|
983
|
+
claimId: (0, crypto_1.randomUUID)(),
|
|
984
|
+
type: 'key_value',
|
|
985
|
+
value: field.value,
|
|
986
|
+
rawText: field.value,
|
|
987
|
+
source,
|
|
988
|
+
fieldName: field.fieldName,
|
|
989
|
+
sourceFieldValue: field.value,
|
|
990
|
+
});
|
|
991
|
+
}
|
|
992
|
+
else {
|
|
993
|
+
// Check for "near-miss" — response contains something similar to the value
|
|
994
|
+
// This catches cases where LLM alters an ID or email slightly
|
|
995
|
+
const nearMiss = findNearMissInResponse(responseLower, valueLower, field.key);
|
|
996
|
+
if (nearMiss) {
|
|
997
|
+
claims.push({
|
|
998
|
+
claimId: (0, crypto_1.randomUUID)(),
|
|
999
|
+
type: 'key_value',
|
|
1000
|
+
value: nearMiss, // what response actually says
|
|
1001
|
+
rawText: nearMiss,
|
|
1002
|
+
source,
|
|
1003
|
+
fieldName: field.fieldName,
|
|
1004
|
+
sourceFieldValue: field.value,
|
|
1005
|
+
});
|
|
1006
|
+
}
|
|
1007
|
+
}
|
|
1008
|
+
}
|
|
1009
|
+
return deduplicateClaims(claims);
|
|
1010
|
+
}
|
|
1011
|
+
/**
|
|
1012
|
+
* Try to find a near-miss value in the response for a key-value field.
|
|
1013
|
+
* E.g., if source has "ana@company.com" but response says "ana@company.rs".
|
|
1014
|
+
*/
|
|
1015
|
+
function findNearMissInResponse(responseLower, valueLower, fieldKey) {
|
|
1016
|
+
// For emails: look for @domain patterns
|
|
1017
|
+
if (/email|e_mail/i.test(fieldKey)) {
|
|
1018
|
+
const emailPattern = /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g;
|
|
1019
|
+
const matches = responseLower.match(emailPattern);
|
|
1020
|
+
if (matches) {
|
|
1021
|
+
for (const m of matches) {
|
|
1022
|
+
if (m !== valueLower && m.length > 3)
|
|
1023
|
+
return m;
|
|
1024
|
+
}
|
|
1025
|
+
}
|
|
1026
|
+
}
|
|
1027
|
+
// For phone numbers: look for phone-like patterns
|
|
1028
|
+
if (/phone|tel|mobile|fax/i.test(fieldKey)) {
|
|
1029
|
+
const phonePattern = /[+]?\d[\d\s\-()]{6,}/g;
|
|
1030
|
+
const matches = responseLower.match(phonePattern);
|
|
1031
|
+
if (matches) {
|
|
1032
|
+
const sourceDigits = valueLower.replace(/\D/g, '');
|
|
1033
|
+
for (const m of matches) {
|
|
1034
|
+
const respDigits = m.replace(/\D/g, '');
|
|
1035
|
+
if (respDigits.length >= 6 && respDigits !== sourceDigits)
|
|
1036
|
+
return m.trim();
|
|
1037
|
+
}
|
|
1038
|
+
}
|
|
1039
|
+
}
|
|
1040
|
+
// For IDs/codes: look for alphanumeric patterns of similar length
|
|
1041
|
+
if (/id$|^id_|code|iban|account|number$/i.test(fieldKey)) {
|
|
1042
|
+
// Search for a token in the response that's similar in structure
|
|
1043
|
+
const idPattern = /[A-Za-z0-9\-_.]{3,}/g;
|
|
1044
|
+
const matches = responseLower.match(idPattern);
|
|
1045
|
+
if (matches) {
|
|
1046
|
+
for (const m of matches) {
|
|
1047
|
+
// Same prefix but different — likely a mutated ID
|
|
1048
|
+
if (m !== valueLower && m.length >= valueLower.length - 2 && m.length <= valueLower.length + 2) {
|
|
1049
|
+
const commonPrefix = commonPrefixLength(m, valueLower);
|
|
1050
|
+
if (commonPrefix >= Math.min(3, valueLower.length * 0.5))
|
|
1051
|
+
return m;
|
|
1052
|
+
}
|
|
1053
|
+
}
|
|
1054
|
+
}
|
|
1055
|
+
}
|
|
1056
|
+
return null;
|
|
1057
|
+
}
|
|
1058
|
+
function commonPrefixLength(a, b) {
|
|
1059
|
+
let i = 0;
|
|
1060
|
+
while (i < a.length && i < b.length && a[i] === b[i])
|
|
1061
|
+
i++;
|
|
1062
|
+
return i;
|
|
1063
|
+
}
|
|
1064
|
+
/**
|
|
1065
|
+
* Match a key_value claim against the tool output value.
|
|
1066
|
+
* Exact string comparison for identifiers.
|
|
1067
|
+
*/
|
|
1068
|
+
function matchKeyValue(claimedValue, sourceValue) {
|
|
1069
|
+
if (typeof sourceValue !== 'string') {
|
|
1070
|
+
return { matched: false, explanation: `Source value is not a string (type: ${typeof sourceValue}).` };
|
|
1071
|
+
}
|
|
1072
|
+
// Exact match (case-insensitive for most IDs)
|
|
1073
|
+
if (claimedValue.toLowerCase() === sourceValue.toLowerCase()) {
|
|
1074
|
+
return { matched: true, explanation: `Key-value "${claimedValue}" exactly matches source "${sourceValue}".` };
|
|
1075
|
+
}
|
|
1076
|
+
// For phone numbers, normalize and compare digits only
|
|
1077
|
+
const claimedDigits = claimedValue.replace(/\D/g, '');
|
|
1078
|
+
const sourceDigits = sourceValue.replace(/\D/g, '');
|
|
1079
|
+
if (claimedDigits.length >= 6 && sourceDigits.length >= 6 && claimedDigits === sourceDigits) {
|
|
1080
|
+
return { matched: true, explanation: `Phone digits match: "${claimedValue}" ≡ "${sourceValue}" (same digits).` };
|
|
1081
|
+
}
|
|
1082
|
+
return {
|
|
1083
|
+
matched: false,
|
|
1084
|
+
explanation: `Key-value mismatch: response says "${claimedValue}" but source is "${sourceValue}".`,
|
|
1085
|
+
};
|
|
1086
|
+
}
|
|
1087
|
+
// =========================================================================
|
|
1088
|
+
// L2 — AGGREGATION Verification
|
|
1089
|
+
// =========================================================================
|
|
1090
|
+
/** Field names likely to be numeric and aggregatable. */
|
|
1091
|
+
const NUMERIC_AGGREGATABLE_PATTERNS = [
|
|
1092
|
+
/hours?/i, /amount/i, /total/i, /price/i, /cost/i, /salary/i,
|
|
1093
|
+
/balance/i, /quantity/i, /count/i, /score/i, /rating/i,
|
|
1094
|
+
/duration/i, /distance/i, /weight/i, /age/i, /payment/i,
|
|
1095
|
+
/budget/i, /revenue/i, /profit/i, /loss/i, /expense/i,
|
|
1096
|
+
/minutes?/i, /tardiness/i, /lateness/i, /overtime/i, /absence/i,
|
|
1097
|
+
];
|
|
1098
|
+
/** Words in response that suggest an aggregation was performed. */
|
|
1099
|
+
const AGGREGATION_KEYWORDS = {
|
|
1100
|
+
sum: [
|
|
1101
|
+
/\btotal\b/i, /\bukupno\b/i, /\bsum\b/i, /\bzbir\b/i, /\bsuma\b/i,
|
|
1102
|
+
/\bsve zajedno\b/i, /\ball together\b/i,
|
|
1103
|
+
],
|
|
1104
|
+
avg: [
|
|
1105
|
+
/\baverage\b/i, /\bprosek\b/i, /\bprosečn/i, /\bprosecn/i, /\bmean\b/i,
|
|
1106
|
+
/\bavg\b/i, /\bpo (osobi|zaposlenom|članu)/i,
|
|
1107
|
+
],
|
|
1108
|
+
count: [
|
|
1109
|
+
/\bcount\b/i, /\bbroj\b/i, /\bima\s+\d+/i, /\bthere\s+(are|is)\s+\d+/i,
|
|
1110
|
+
/\bukupno\s+\d+/i,
|
|
1111
|
+
],
|
|
1112
|
+
count_distinct: [
|
|
1113
|
+
/\brazličit/i, /\brazlicit/i, /\bdistinct\b/i, /\bunique\b/i,
|
|
1114
|
+
/\bjedinstven/i, /\bposebnih\b/i,
|
|
1115
|
+
],
|
|
1116
|
+
min: [
|
|
1117
|
+
/\bminimum\b/i, /\bmin\b/i, /\bnajmanj/i, /\blowest\b/i, /\bnajniž/i, /\bnajniz/i,
|
|
1118
|
+
],
|
|
1119
|
+
max: [
|
|
1120
|
+
/\bmaximum\b/i, /\bmax\b/i, /\bnajveć/i, /\bnajvec/i, /\bhighest\b/i, /\bnajviš/i, /\bnajvis/i,
|
|
1121
|
+
],
|
|
1122
|
+
pct_of_total: [
|
|
1123
|
+
/\b(\d+(?:[.,]\d+)?)\s*%\s*(?:of|od|от|de|des|di|из)\b/i,
|
|
1124
|
+
/\budeo\b/i, /\bučešće\b/i, /\bucešce\b/i, /\bučesce\b/i,
|
|
1125
|
+
/\bshare\b/i, /\bproportion\b/i, /\bpercentage\b/i,
|
|
1126
|
+
/\bodnos\b/i, /\brazmera\b/i,
|
|
1127
|
+
/\bдоля\b/i, /\bпроцент\b/i,
|
|
1128
|
+
/\bporcentaje\b/i, /\bporcentagem\b/i,
|
|
1129
|
+
],
|
|
1130
|
+
};
|
|
1131
|
+
/**
|
|
1132
|
+
* Walk tool outputs and find arrays of objects with numeric fields.
|
|
1133
|
+
*/
|
|
1134
|
+
function extractNumericArrays(trace) {
|
|
1135
|
+
const arrays = [];
|
|
1136
|
+
const steps = Trace_1.TraceUtils.getToolOutputSteps(trace);
|
|
1137
|
+
for (const step of steps) {
|
|
1138
|
+
for (const to of step.toolOutputs ?? []) {
|
|
1139
|
+
findNumericArrays(to.output, step.stepId, arrays);
|
|
1140
|
+
}
|
|
1141
|
+
}
|
|
1142
|
+
return arrays;
|
|
1143
|
+
}
|
|
1144
|
+
function findNumericArrays(value, stepId, out, depth = 0) {
|
|
1145
|
+
if (depth > MAX_WALK_DEPTH)
|
|
1146
|
+
return;
|
|
1147
|
+
if (!Array.isArray(value) || value.length === 0) {
|
|
1148
|
+
// Recurse into objects to find nested arrays
|
|
1149
|
+
if (typeof value === 'object' && value !== null && !Array.isArray(value)) {
|
|
1150
|
+
for (const v of Object.values(value)) {
|
|
1151
|
+
findNumericArrays(v, stepId, out, depth + 1);
|
|
1152
|
+
}
|
|
1153
|
+
}
|
|
1154
|
+
return;
|
|
1155
|
+
}
|
|
1156
|
+
// Check if it's an array of objects
|
|
1157
|
+
if (typeof value[0] !== 'object' || value[0] === null)
|
|
1158
|
+
return;
|
|
1159
|
+
// Find numeric fields across all items
|
|
1160
|
+
const fieldValues = new Map();
|
|
1161
|
+
for (const item of value) {
|
|
1162
|
+
if (typeof item !== 'object' || item === null)
|
|
1163
|
+
continue;
|
|
1164
|
+
for (const [k, v] of Object.entries(item)) {
|
|
1165
|
+
// Accept numbers directly, and numeric strings (e.g. "58" from DB queries)
|
|
1166
|
+
let numVal;
|
|
1167
|
+
if (typeof v === 'number' && !Number.isNaN(v)) {
|
|
1168
|
+
numVal = v;
|
|
1169
|
+
}
|
|
1170
|
+
else if (typeof v === 'string' && /^-?\d+(\.\d+)?$/.test(v.trim())) {
|
|
1171
|
+
numVal = parseFloat(v.trim());
|
|
1172
|
+
}
|
|
1173
|
+
if (numVal !== undefined) {
|
|
1174
|
+
const isAggregatable = NUMERIC_AGGREGATABLE_PATTERNS.some((p) => p.test(k));
|
|
1175
|
+
if (isAggregatable) {
|
|
1176
|
+
if (!fieldValues.has(k))
|
|
1177
|
+
fieldValues.set(k, []);
|
|
1178
|
+
fieldValues.get(k).push(numVal);
|
|
1179
|
+
}
|
|
1180
|
+
}
|
|
1181
|
+
}
|
|
1182
|
+
}
|
|
1183
|
+
for (const [fieldName, values] of fieldValues) {
|
|
1184
|
+
if (values.length >= 2) {
|
|
1185
|
+
out.push({ fieldName, values, stepId });
|
|
1186
|
+
}
|
|
1187
|
+
}
|
|
1188
|
+
// Recurse into items for nested arrays
|
|
1189
|
+
for (const item of value) {
|
|
1190
|
+
if (typeof item === 'object' && item !== null) {
|
|
1191
|
+
for (const v of Object.values(item)) {
|
|
1192
|
+
findNumericArrays(v, stepId, out, depth + 1);
|
|
1193
|
+
}
|
|
1194
|
+
}
|
|
1195
|
+
}
|
|
1196
|
+
}
|
|
1197
|
+
function computeAggregation(values, op) {
|
|
1198
|
+
switch (op) {
|
|
1199
|
+
case 'sum': return values.reduce((a, b) => a + b, 0);
|
|
1200
|
+
case 'avg': return values.reduce((a, b) => a + b, 0) / values.length;
|
|
1201
|
+
case 'count': return values.length;
|
|
1202
|
+
case 'count_distinct': return new Set(values).size;
|
|
1203
|
+
case 'min': return Math.min(...values);
|
|
1204
|
+
case 'max': return Math.max(...values);
|
|
1205
|
+
case 'pct_of_total': {
|
|
1206
|
+
// Returns the percentage of the first value relative to the total.
|
|
1207
|
+
// Used when response asks "what % is X of total".
|
|
1208
|
+
const total = values.reduce((a, b) => a + b, 0);
|
|
1209
|
+
return total === 0 ? 0 : (values[0] / total) * 100;
|
|
1210
|
+
}
|
|
1211
|
+
}
|
|
1212
|
+
}
|
|
1213
|
+
/**
|
|
1214
|
+
* Extract numbers from response text that might be aggregation results.
|
|
1215
|
+
* Filters out list ordinals (e.g. "14." at the start of a line) to avoid
|
|
1216
|
+
* treating numbered-list markers as aggregation claims.
|
|
1217
|
+
*/
|
|
1218
|
+
function extractNumbersFromResponse(text) {
|
|
1219
|
+
// Match European dot-thousands (1.234.567), comma/dot decimals, or plain integers
|
|
1220
|
+
const pattern = /(?<!\w)(\d{1,3}(?:\.\d{3})+|\d+(?:[.,]\d+)?)(?!\w)/g;
|
|
1221
|
+
// European-style thousand separator: X.XXX (dot + exactly 3 digits, repeatable)
|
|
1222
|
+
const dotThousands = /^\d{1,3}(?:\.\d{3})+$/;
|
|
1223
|
+
const numbers = [];
|
|
1224
|
+
let match;
|
|
1225
|
+
while ((match = pattern.exec(text)) !== null) {
|
|
1226
|
+
const raw = match[1];
|
|
1227
|
+
// Skip list ordinals: "1." / "14." at the start of a line
|
|
1228
|
+
if (isListOrdinal(text, match.index, match.index + raw.length))
|
|
1229
|
+
continue;
|
|
1230
|
+
const num = dotThousands.test(raw)
|
|
1231
|
+
? parseFloat(raw.replace(/\./g, '')) // "4.496" → 4496
|
|
1232
|
+
: parseFloat(raw.replace(',', '.')); // "4,5" → 4.5
|
|
1233
|
+
if (!Number.isNaN(num))
|
|
1234
|
+
numbers.push(num);
|
|
1235
|
+
}
|
|
1236
|
+
return numbers;
|
|
1237
|
+
}
|
|
1238
|
+
/**
|
|
1239
|
+
* Returns true if the number at [start, end) is a list ordinal marker,
|
|
1240
|
+
* e.g. "14." at the beginning of a line or "14." in an inline numbered list.
|
|
1241
|
+
*/
|
|
1242
|
+
function isListOrdinal(text, start, end) {
|
|
1243
|
+
// Must be followed by '.' then whitespace or end-of-string
|
|
1244
|
+
const after = text.substring(end, end + 2);
|
|
1245
|
+
if (!/^\.\s/.test(after) && !/^\.$/.test(after))
|
|
1246
|
+
return false;
|
|
1247
|
+
// Case 1: at the start of a line (or start of text)
|
|
1248
|
+
if (start === 0)
|
|
1249
|
+
return true;
|
|
1250
|
+
const before = text.substring(Math.max(0, start - 5), start);
|
|
1251
|
+
if (/(?:^|\n)\s*(?:[*\->]\s*)?\**$/.test(before))
|
|
1252
|
+
return true;
|
|
1253
|
+
// Case 2: inline list — preceded by comma/semicolon + space, or " - "
|
|
1254
|
+
// Matches patterns like: "1. Ivan, 2. Milan, 14. Goran"
|
|
1255
|
+
if (/[,;]\s*$/.test(before)) {
|
|
1256
|
+
// Verify this is part of a numbered sequence: look for another "N." nearby
|
|
1257
|
+
const contextStart = Math.max(0, start - 80);
|
|
1258
|
+
const context = text.substring(contextStart, start);
|
|
1259
|
+
if (/\b\d{1,3}\.\s/.test(context))
|
|
1260
|
+
return true;
|
|
1261
|
+
}
|
|
1262
|
+
return false;
|
|
1263
|
+
}
|
|
1264
|
+
/**
|
|
1265
|
+
* Detect which aggregation operation the response describes and verify it.
|
|
1266
|
+
*/
|
|
1267
|
+
function extractAggregationClaims(trace, responseText, sourceStepId) {
|
|
1268
|
+
if (!responseText || !trace)
|
|
1269
|
+
return [];
|
|
1270
|
+
const numArrays = extractNumericArrays(trace);
|
|
1271
|
+
if (numArrays.length === 0)
|
|
1272
|
+
return [];
|
|
1273
|
+
const claims = [];
|
|
1274
|
+
const responseLower = responseText.toLowerCase();
|
|
1275
|
+
const responseNumbers = extractNumbersFromResponse(responseText);
|
|
1276
|
+
const source = {
|
|
1277
|
+
stepId: sourceStepId ?? 'final_response',
|
|
1278
|
+
role: 'final_response',
|
|
1279
|
+
rawText: responseText,
|
|
1280
|
+
};
|
|
1281
|
+
// Build a global set of all individual values across ALL arrays.
|
|
1282
|
+
// Used to exclude data points that appear as raw values from being
|
|
1283
|
+
// misidentified as wrong aggregation attempts.
|
|
1284
|
+
const allIndividualValues = new Set();
|
|
1285
|
+
for (const arr of numArrays) {
|
|
1286
|
+
for (const v of arr.values)
|
|
1287
|
+
allIndividualValues.add(v);
|
|
1288
|
+
}
|
|
1289
|
+
// Also include scalar aggregate fields (total, count, etc.) from tool outputs.
|
|
1290
|
+
// Without this, a direct quote like "total: 35" could be flagged as a wrong
|
|
1291
|
+
// aggregation when a different array happens to sum to a nearby value.
|
|
1292
|
+
for (const step of Trace_1.TraceUtils.getToolOutputSteps(trace)) {
|
|
1293
|
+
for (const to of step.toolOutputs ?? []) {
|
|
1294
|
+
const cnt = (0, Matchers_1.extractCountFromOutput)(to.output);
|
|
1295
|
+
if (cnt !== null)
|
|
1296
|
+
allIndividualValues.add(cnt);
|
|
1297
|
+
}
|
|
1298
|
+
}
|
|
1299
|
+
for (const arr of numArrays) {
|
|
1300
|
+
// Detect which aggregation operation the response is performing
|
|
1301
|
+
for (const [op, patterns] of Object.entries(AGGREGATION_KEYWORDS)) {
|
|
1302
|
+
const opMatched = patterns.some((p) => p.test(responseLower));
|
|
1303
|
+
if (!opMatched)
|
|
1304
|
+
continue;
|
|
1305
|
+
const aggOp = op;
|
|
1306
|
+
const correctValue = computeAggregation(arr.values, aggOp);
|
|
1307
|
+
// Check if the response contains a number that could be this aggregation
|
|
1308
|
+
for (const respNum of responseNumbers) {
|
|
1309
|
+
// Is this number close to any aggregation result?
|
|
1310
|
+
const tolerance = aggOp === 'avg' ? 0.1 : 0.01;
|
|
1311
|
+
const diff = Math.abs(respNum - correctValue);
|
|
1312
|
+
const relDiff = correctValue !== 0 ? diff / Math.abs(correctValue) : diff;
|
|
1313
|
+
if (relDiff <= tolerance || diff < 0.5) {
|
|
1314
|
+
// Response has the correct (or close) aggregation
|
|
1315
|
+
claims.push({
|
|
1316
|
+
claimId: (0, crypto_1.randomUUID)(),
|
|
1317
|
+
type: 'aggregation',
|
|
1318
|
+
value: respNum,
|
|
1319
|
+
rawText: String(respNum),
|
|
1320
|
+
source,
|
|
1321
|
+
fieldName: arr.fieldName,
|
|
1322
|
+
sourceFieldValue: arr.values,
|
|
1323
|
+
aggregationOp: aggOp,
|
|
1324
|
+
computedValue: correctValue,
|
|
1325
|
+
});
|
|
1326
|
+
}
|
|
1327
|
+
else if (isPlausibleAggregation(respNum, arr.values, aggOp)) {
|
|
1328
|
+
// Skip numbers that exist as direct field values in ANY array —
|
|
1329
|
+
// these are individual data points, not wrong aggregation attempts.
|
|
1330
|
+
// E.g., break_count=70 for one employee ≠ avg of all break_counts.
|
|
1331
|
+
// Check all arrays (not just current) to prevent cross-array FPs.
|
|
1332
|
+
if (allIndividualValues.has(respNum))
|
|
1333
|
+
continue;
|
|
1334
|
+
// Response has a wrong aggregation — still create claim so it gets flagged
|
|
1335
|
+
claims.push({
|
|
1336
|
+
claimId: (0, crypto_1.randomUUID)(),
|
|
1337
|
+
type: 'aggregation',
|
|
1338
|
+
value: respNum,
|
|
1339
|
+
rawText: String(respNum),
|
|
1340
|
+
source,
|
|
1341
|
+
fieldName: arr.fieldName,
|
|
1342
|
+
sourceFieldValue: arr.values,
|
|
1343
|
+
aggregationOp: aggOp,
|
|
1344
|
+
computedValue: correctValue,
|
|
1345
|
+
});
|
|
1346
|
+
}
|
|
1347
|
+
}
|
|
1348
|
+
}
|
|
1349
|
+
}
|
|
1350
|
+
// -----------------------------------------------------------------------
|
|
1351
|
+
// Implicit aggregation: no keyword present, but a response number matches
|
|
1352
|
+
// a computed aggregation (sum or avg) and does NOT match any individual
|
|
1353
|
+
// data point. This catches cases like "160" appearing in the response
|
|
1354
|
+
// when the tool data sums to 160 but no "total"/"ukupno" keyword is used.
|
|
1355
|
+
// -----------------------------------------------------------------------
|
|
1356
|
+
if (claims.length === 0) {
|
|
1357
|
+
// allIndividualValues already built above
|
|
1358
|
+
for (const arr of numArrays) {
|
|
1359
|
+
if (arr.values.length < 2)
|
|
1360
|
+
continue;
|
|
1361
|
+
// Only try sum and avg for implicit detection (count/min/max too likely to collide)
|
|
1362
|
+
const implicitOps = ['sum', 'avg'];
|
|
1363
|
+
for (const op of implicitOps) {
|
|
1364
|
+
const correctValue = computeAggregation(arr.values, op);
|
|
1365
|
+
if (correctValue === 0)
|
|
1366
|
+
continue;
|
|
1367
|
+
for (const respNum of responseNumbers) {
|
|
1368
|
+
// Skip if this number appears as an individual data point
|
|
1369
|
+
if (allIndividualValues.has(respNum))
|
|
1370
|
+
continue;
|
|
1371
|
+
const tolerance = op === 'avg' ? 0.1 : 0.01;
|
|
1372
|
+
const diff = Math.abs(respNum - correctValue);
|
|
1373
|
+
const relDiff = diff / Math.abs(correctValue);
|
|
1374
|
+
if (relDiff <= tolerance || diff < 0.5) {
|
|
1375
|
+
claims.push({
|
|
1376
|
+
claimId: (0, crypto_1.randomUUID)(),
|
|
1377
|
+
type: 'aggregation',
|
|
1378
|
+
value: respNum,
|
|
1379
|
+
rawText: String(respNum),
|
|
1380
|
+
source,
|
|
1381
|
+
fieldName: arr.fieldName,
|
|
1382
|
+
sourceFieldValue: arr.values,
|
|
1383
|
+
aggregationOp: op,
|
|
1384
|
+
computedValue: correctValue,
|
|
1385
|
+
});
|
|
1386
|
+
}
|
|
1387
|
+
else if (isPlausibleAggregation(respNum, arr.values, op)) {
|
|
1388
|
+
// Wrong implicit aggregation
|
|
1389
|
+
if (allIndividualValues.has(respNum))
|
|
1390
|
+
continue;
|
|
1391
|
+
claims.push({
|
|
1392
|
+
claimId: (0, crypto_1.randomUUID)(),
|
|
1393
|
+
type: 'aggregation',
|
|
1394
|
+
value: respNum,
|
|
1395
|
+
rawText: String(respNum),
|
|
1396
|
+
source,
|
|
1397
|
+
fieldName: arr.fieldName,
|
|
1398
|
+
sourceFieldValue: arr.values,
|
|
1399
|
+
aggregationOp: op,
|
|
1400
|
+
computedValue: correctValue,
|
|
1401
|
+
});
|
|
1402
|
+
}
|
|
1403
|
+
}
|
|
1404
|
+
}
|
|
1405
|
+
}
|
|
1406
|
+
}
|
|
1407
|
+
// Value-level dedup: when the same response number creates claims against
|
|
1408
|
+
// multiple arrays, keep only the claim with the smallest deviation from
|
|
1409
|
+
// its computed value. This prevents cross-array false positives (e.g.,
|
|
1410
|
+
// "39" correctly matching count array sum but also wrongly matching
|
|
1411
|
+
// as a plausible aggregation of a tardiness array).
|
|
1412
|
+
const bestByValue = new Map();
|
|
1413
|
+
for (const c of claims) {
|
|
1414
|
+
const val = c.value;
|
|
1415
|
+
const existing = bestByValue.get(val);
|
|
1416
|
+
if (!existing) {
|
|
1417
|
+
bestByValue.set(val, c);
|
|
1418
|
+
continue;
|
|
1419
|
+
}
|
|
1420
|
+
const existingDiff = existing.computedValue !== undefined
|
|
1421
|
+
? Math.abs(val - existing.computedValue) / (Math.abs(existing.computedValue) || 1)
|
|
1422
|
+
: Infinity;
|
|
1423
|
+
const newDiff = c.computedValue !== undefined
|
|
1424
|
+
? Math.abs(val - c.computedValue) / (Math.abs(c.computedValue) || 1)
|
|
1425
|
+
: Infinity;
|
|
1426
|
+
if (newDiff < existingDiff) {
|
|
1427
|
+
bestByValue.set(val, c);
|
|
1428
|
+
}
|
|
1429
|
+
}
|
|
1430
|
+
const dedupedClaims = [...bestByValue.values()];
|
|
1431
|
+
return deduplicateClaims(dedupedClaims);
|
|
1432
|
+
}
|
|
1433
|
+
/**
|
|
1434
|
+
* Check if a number is a plausible (but wrong) aggregation of the values.
|
|
1435
|
+
* E.g., if sum is 113 but response says 120, it's likely a math error.
|
|
1436
|
+
*/
|
|
1437
|
+
function isPlausibleAggregation(respNum, values, op) {
|
|
1438
|
+
const correct = computeAggregation(values, op);
|
|
1439
|
+
if (correct === 0)
|
|
1440
|
+
return false;
|
|
1441
|
+
const relDiff = Math.abs(respNum - correct) / Math.abs(correct);
|
|
1442
|
+
// Within 30% of the correct value — likely a math error, not random number
|
|
1443
|
+
if (relDiff <= 0.3 && relDiff > 0.01)
|
|
1444
|
+
return true;
|
|
1445
|
+
// For sums: response number is between min-sum and max-plausible-sum
|
|
1446
|
+
if (op === 'sum') {
|
|
1447
|
+
const maxPlausible = correct * 1.5;
|
|
1448
|
+
const minPlausible = correct * 0.5;
|
|
1449
|
+
if (respNum >= minPlausible && respNum <= maxPlausible && respNum !== correct)
|
|
1450
|
+
return true;
|
|
1451
|
+
}
|
|
1452
|
+
return false;
|
|
1453
|
+
}
|
|
1454
|
+
/**
|
|
1455
|
+
* Match an aggregation claim against the computed correct value.
|
|
1456
|
+
*/
|
|
1457
|
+
function matchAggregation(claimedValue, computedValue, op) {
|
|
1458
|
+
const tolerance = op === 'avg' ? 0.1 : 0.01;
|
|
1459
|
+
const diff = Math.abs(claimedValue - computedValue);
|
|
1460
|
+
const relDiff = computedValue !== 0 ? diff / Math.abs(computedValue) : diff;
|
|
1461
|
+
if (relDiff <= tolerance || diff < 0.5) {
|
|
1462
|
+
return {
|
|
1463
|
+
matched: true,
|
|
1464
|
+
explanation: `Aggregation (${op}) claim ${claimedValue} matches computed value ${computedValue}.`,
|
|
1465
|
+
};
|
|
1466
|
+
}
|
|
1467
|
+
return {
|
|
1468
|
+
matched: false,
|
|
1469
|
+
explanation: `Aggregation error: response says ${claimedValue} but ${op}(${computedValue}) is the correct value. Off by ${diff.toFixed(2)} (${(relDiff * 100).toFixed(1)}%).`,
|
|
1470
|
+
};
|
|
1471
|
+
}
|
|
1472
|
+
// =========================================================================
|
|
1473
|
+
// L2 — RANGE Matching (min/max, salary bands, thresholds)
|
|
1474
|
+
// =========================================================================
|
|
1475
|
+
/** Patterns for paired min/max fields. */
|
|
1476
|
+
const RANGE_MIN_PATTERNS = [/^min/i, /min$/i, /_min_/i, /minimum/i, /lower/i, /from/i, /start/i];
|
|
1477
|
+
const RANGE_MAX_PATTERNS = [/^max/i, /max$/i, /_max_/i, /maximum/i, /upper/i, /to$/i, /end$/i, /limit/i];
|
|
1478
|
+
/**
|
|
1479
|
+
* Walk tool outputs and find paired min/max numeric fields.
|
|
1480
|
+
* Detects: salary_min/salary_max, min_hours/max_hours, price_from/price_to, etc.
|
|
1481
|
+
*/
|
|
1482
|
+
function extractRangeFields(trace) {
|
|
1483
|
+
const steps = Trace_1.TraceUtils.getToolOutputSteps(trace);
|
|
1484
|
+
const numericFields = new Map();
|
|
1485
|
+
for (const step of steps) {
|
|
1486
|
+
for (const to of step.toolOutputs ?? []) {
|
|
1487
|
+
collectNumericFields(to.output, step.stepId, '', numericFields);
|
|
1488
|
+
}
|
|
1489
|
+
}
|
|
1490
|
+
// Pair min/max fields by root name
|
|
1491
|
+
const ranges = [];
|
|
1492
|
+
const processed = new Set();
|
|
1493
|
+
for (const [path, entries] of numericFields) {
|
|
1494
|
+
if (processed.has(path))
|
|
1495
|
+
continue;
|
|
1496
|
+
const lastKey = (path.split('.').pop() ?? path).toLowerCase();
|
|
1497
|
+
const isMin = RANGE_MIN_PATTERNS.some((p) => p.test(lastKey));
|
|
1498
|
+
const isMax = RANGE_MAX_PATTERNS.some((p) => p.test(lastKey));
|
|
1499
|
+
if (!isMin && !isMax)
|
|
1500
|
+
continue;
|
|
1501
|
+
// Find the counterpart
|
|
1502
|
+
const rootName = lastKey
|
|
1503
|
+
.replace(/^min_?|_?min$|minimum|lower|^from_?|_?from$|^start_?|_?start$/gi, '')
|
|
1504
|
+
.replace(/^max_?|_?max$|maximum|upper|^to_?|_?to$|^end_?|_?end$|^limit_?|_?limit$/gi, '')
|
|
1505
|
+
.replace(/^_|_$/g, '') || lastKey;
|
|
1506
|
+
for (const [otherPath, otherEntries] of numericFields) {
|
|
1507
|
+
if (otherPath === path || processed.has(otherPath))
|
|
1508
|
+
continue;
|
|
1509
|
+
const otherLastKey = (otherPath.split('.').pop() ?? otherPath).toLowerCase();
|
|
1510
|
+
const otherIsMin = RANGE_MIN_PATTERNS.some((p) => p.test(otherLastKey));
|
|
1511
|
+
const otherIsMax = RANGE_MAX_PATTERNS.some((p) => p.test(otherLastKey));
|
|
1512
|
+
if ((isMin && !otherIsMax) || (isMax && !otherIsMin))
|
|
1513
|
+
continue;
|
|
1514
|
+
// Check if they share a root name
|
|
1515
|
+
const otherRootName = otherLastKey
|
|
1516
|
+
.replace(/^min_?|_?min$|minimum|lower|^from_?|_?from$|^start_?|_?start$/gi, '')
|
|
1517
|
+
.replace(/^max_?|_?max$|maximum|upper|^to_?|_?to$|^end_?|_?end$|^limit_?|_?limit$/gi, '')
|
|
1518
|
+
.replace(/^_|_$/g, '') || otherLastKey;
|
|
1519
|
+
if (rootName === otherRootName || (rootName === '' && otherRootName === '')) {
|
|
1520
|
+
const minVal = isMin ? entries[0].value : otherEntries[0].value;
|
|
1521
|
+
const maxVal = isMax ? entries[0].value : otherEntries[0].value;
|
|
1522
|
+
if (minVal <= maxVal) {
|
|
1523
|
+
ranges.push({
|
|
1524
|
+
rootPath: rootName || path.replace(/\.?[^.]+$/, '') || 'range',
|
|
1525
|
+
min: minVal,
|
|
1526
|
+
max: maxVal,
|
|
1527
|
+
stepId: entries[0].stepId,
|
|
1528
|
+
});
|
|
1529
|
+
processed.add(path);
|
|
1530
|
+
processed.add(otherPath);
|
|
1531
|
+
}
|
|
1532
|
+
}
|
|
1533
|
+
}
|
|
1534
|
+
}
|
|
1535
|
+
return ranges;
|
|
1536
|
+
}
|
|
1537
|
+
function collectNumericFields(value, stepId, path, out, depth = 0) {
|
|
1538
|
+
if (depth > MAX_WALK_DEPTH)
|
|
1539
|
+
return;
|
|
1540
|
+
if (value === null || value === undefined)
|
|
1541
|
+
return;
|
|
1542
|
+
if (typeof value === 'number' && path && !Number.isNaN(value)) {
|
|
1543
|
+
const entries = out.get(path) ?? [];
|
|
1544
|
+
entries.push({ fieldName: path, value, stepId });
|
|
1545
|
+
out.set(path, entries);
|
|
1546
|
+
}
|
|
1547
|
+
else if (Array.isArray(value)) {
|
|
1548
|
+
value.forEach((item, i) => collectNumericFields(item, stepId, path ? `${path}[${i}]` : `[${i}]`, out, depth + 1));
|
|
1549
|
+
}
|
|
1550
|
+
else if (typeof value === 'object') {
|
|
1551
|
+
for (const [k, v] of Object.entries(value)) {
|
|
1552
|
+
collectNumericFields(v, stepId, path ? `${path}.${k}` : k, out, depth + 1);
|
|
1553
|
+
}
|
|
1554
|
+
}
|
|
1555
|
+
}
|
|
1556
|
+
/** Words in response that suggest a range is being described. */
|
|
1557
|
+
const RANGE_KEYWORDS = [
|
|
1558
|
+
/\b(between|od|od\s+\d|izme[đd]u)\b/i,
|
|
1559
|
+
/\b(up to|do|maksimalno|najvi[šs]e)\b/i,
|
|
1560
|
+
/\b(at least|najmanje|minimum|minimum)\b/i,
|
|
1561
|
+
/\b(range|raspon|opseg)\b/i,
|
|
1562
|
+
];
|
|
1563
|
+
/**
|
|
1564
|
+
* Extract range claims: detect when response describes a min/max range from tool output.
|
|
1565
|
+
*/
|
|
1566
|
+
function extractRangeClaims(trace, responseText, sourceStepId) {
|
|
1567
|
+
if (!responseText || !trace)
|
|
1568
|
+
return [];
|
|
1569
|
+
const ranges = extractRangeFields(trace);
|
|
1570
|
+
if (ranges.length === 0)
|
|
1571
|
+
return [];
|
|
1572
|
+
const claims = [];
|
|
1573
|
+
const responseLower = responseText.toLowerCase();
|
|
1574
|
+
const responseNumbers = extractNumbersFromResponse(responseText);
|
|
1575
|
+
const source = {
|
|
1576
|
+
stepId: sourceStepId ?? 'final_response',
|
|
1577
|
+
role: 'final_response',
|
|
1578
|
+
rawText: responseText,
|
|
1579
|
+
};
|
|
1580
|
+
// Check if response contains range-like language
|
|
1581
|
+
const hasRangeLanguage = RANGE_KEYWORDS.some((p) => p.test(responseLower));
|
|
1582
|
+
if (!hasRangeLanguage && responseNumbers.length < 2)
|
|
1583
|
+
return [];
|
|
1584
|
+
for (const range of ranges) {
|
|
1585
|
+
// Find numbers in response that could be min or max of this range
|
|
1586
|
+
for (const num of responseNumbers) {
|
|
1587
|
+
// Check if this number is close to the min or max
|
|
1588
|
+
const isNearMin = Math.abs(num - range.min) / (Math.abs(range.min) || 1) <= 0.01 || num === range.min;
|
|
1589
|
+
const isNearMax = Math.abs(num - range.max) / (Math.abs(range.max) || 1) <= 0.01 || num === range.max;
|
|
1590
|
+
if (isNearMin || isNearMax) {
|
|
1591
|
+
claims.push({
|
|
1592
|
+
claimId: (0, crypto_1.randomUUID)(),
|
|
1593
|
+
type: 'range',
|
|
1594
|
+
value: num,
|
|
1595
|
+
rawText: String(num),
|
|
1596
|
+
source,
|
|
1597
|
+
fieldName: range.rootPath,
|
|
1598
|
+
sourceFieldValue: { min: range.min, max: range.max },
|
|
1599
|
+
});
|
|
1600
|
+
}
|
|
1601
|
+
else {
|
|
1602
|
+
// Number in response doesn't match either endpoint — check if it's a plausible mistake
|
|
1603
|
+
const isInRange = num >= range.min && num <= range.max;
|
|
1604
|
+
const isNear = num >= range.min * 0.7 && num <= range.max * 1.3;
|
|
1605
|
+
if (isNear && !isInRange) {
|
|
1606
|
+
// Number is close to range but outside bounds — flaggable mismatch
|
|
1607
|
+
claims.push({
|
|
1608
|
+
claimId: (0, crypto_1.randomUUID)(),
|
|
1609
|
+
type: 'range',
|
|
1610
|
+
value: num,
|
|
1611
|
+
rawText: String(num),
|
|
1612
|
+
source,
|
|
1613
|
+
fieldName: range.rootPath,
|
|
1614
|
+
sourceFieldValue: { min: range.min, max: range.max },
|
|
1615
|
+
});
|
|
1616
|
+
}
|
|
1617
|
+
}
|
|
1618
|
+
}
|
|
1619
|
+
}
|
|
1620
|
+
return deduplicateClaims(claims);
|
|
1621
|
+
}
|
|
1622
|
+
/**
|
|
1623
|
+
* Match a range claim: verify claimed number against source min/max.
|
|
1624
|
+
*/
|
|
1625
|
+
function matchRange(claimedValue, sourceValue) {
|
|
1626
|
+
if (typeof sourceValue !== 'object' || sourceValue === null) {
|
|
1627
|
+
return { matched: false, explanation: 'Source value is not a range object.' };
|
|
1628
|
+
}
|
|
1629
|
+
const range = sourceValue;
|
|
1630
|
+
if (typeof range.min !== 'number' || typeof range.max !== 'number') {
|
|
1631
|
+
return { matched: false, explanation: 'Source range missing min or max.' };
|
|
1632
|
+
}
|
|
1633
|
+
// Exact match to either endpoint
|
|
1634
|
+
if (claimedValue === range.min || claimedValue === range.max) {
|
|
1635
|
+
return {
|
|
1636
|
+
matched: true,
|
|
1637
|
+
explanation: `Range claim ${claimedValue} matches endpoint [${range.min}, ${range.max}].`,
|
|
1638
|
+
};
|
|
1639
|
+
}
|
|
1640
|
+
// Close to an endpoint (within 1%)
|
|
1641
|
+
const minDiff = Math.abs(claimedValue - range.min) / (Math.abs(range.min) || 1);
|
|
1642
|
+
const maxDiff = Math.abs(claimedValue - range.max) / (Math.abs(range.max) || 1);
|
|
1643
|
+
if (minDiff <= 0.01 || maxDiff <= 0.01) {
|
|
1644
|
+
return {
|
|
1645
|
+
matched: true,
|
|
1646
|
+
explanation: `Range claim ${claimedValue} approximately matches endpoint [${range.min}, ${range.max}].`,
|
|
1647
|
+
};
|
|
1648
|
+
}
|
|
1649
|
+
// Within the range (not exactly an endpoint, but valid)
|
|
1650
|
+
if (claimedValue >= range.min && claimedValue <= range.max) {
|
|
1651
|
+
return {
|
|
1652
|
+
matched: true,
|
|
1653
|
+
explanation: `Range claim ${claimedValue} is within bounds [${range.min}, ${range.max}].`,
|
|
1654
|
+
};
|
|
1655
|
+
}
|
|
1656
|
+
return {
|
|
1657
|
+
matched: false,
|
|
1658
|
+
explanation: `Range mismatch: response says ${claimedValue} but source range is [${range.min}, ${range.max}].`,
|
|
1659
|
+
};
|
|
1660
|
+
}
|
|
1661
|
+
// ---------------------------------------------------------------------------
|
|
1662
|
+
// Helpers
|
|
1663
|
+
// ---------------------------------------------------------------------------
|
|
1664
|
+
/** Find a synonym from the list that appears as a word boundary match in text. */
|
|
1665
|
+
function findSynonymInText(textLower, synonyms) {
|
|
1666
|
+
for (const syn of synonyms) {
|
|
1667
|
+
// Multi-word synonyms: "on leave", "na čekanju"
|
|
1668
|
+
if (syn.includes(' ')) {
|
|
1669
|
+
if (textLower.includes(syn.toLowerCase()))
|
|
1670
|
+
return syn;
|
|
1671
|
+
}
|
|
1672
|
+
else if (AMBIGUOUS_SHORT_WORDS.has(syn.toLowerCase())) {
|
|
1673
|
+
// Ambiguous short words like "da", "ne" — match when:
|
|
1674
|
+
// 1. Entire response is just the word, OR
|
|
1675
|
+
// 2. Response starts with the word + punctuation ("da, tačno"), OR
|
|
1676
|
+
// 3. Response starts with the word + space and is short (<80 chars),
|
|
1677
|
+
// indicating an affirmative/negative answer, not a conjunction.
|
|
1678
|
+
const trimmed = textLower.trim();
|
|
1679
|
+
const synLow = syn.toLowerCase();
|
|
1680
|
+
if (trimmed === synLow ||
|
|
1681
|
+
trimmed.startsWith(synLow + ',') ||
|
|
1682
|
+
trimmed.startsWith(synLow + '.') ||
|
|
1683
|
+
trimmed.startsWith(synLow + '!') ||
|
|
1684
|
+
trimmed.startsWith(synLow + ' -') ||
|
|
1685
|
+
trimmed.startsWith(synLow + ' –') ||
|
|
1686
|
+
(trimmed.startsWith(synLow + ' ') && trimmed.length < 80)) {
|
|
1687
|
+
return syn;
|
|
1688
|
+
}
|
|
1689
|
+
}
|
|
1690
|
+
else {
|
|
1691
|
+
// Single word — use word boundary for ASCII, includes() for non-ASCII
|
|
1692
|
+
// (JS \b only works for [a-zA-Z0-9_], not Cyrillic/CJK/Arabic/etc.)
|
|
1693
|
+
const synLow = syn.toLowerCase();
|
|
1694
|
+
if (/^[\x20-\x7e]+$/.test(synLow)) {
|
|
1695
|
+
const re = new RegExp(`\\b${escapeRegex(syn)}\\b`, 'i');
|
|
1696
|
+
if (re.test(textLower))
|
|
1697
|
+
return syn;
|
|
1698
|
+
}
|
|
1699
|
+
else {
|
|
1700
|
+
if (textLower.includes(synLow))
|
|
1701
|
+
return syn;
|
|
1702
|
+
}
|
|
1703
|
+
}
|
|
1704
|
+
}
|
|
1705
|
+
return null;
|
|
1706
|
+
}
|
|
1707
|
+
/** Find if response contains a DIFFERENT enum value from the same domain. */
|
|
1708
|
+
function findContradiction(responseLower, sourceCanonical) {
|
|
1709
|
+
// Only check semantically opposed values, not ALL enum translations
|
|
1710
|
+
const opposites = ENUM_OPPOSITES[sourceCanonical];
|
|
1711
|
+
if (!opposites)
|
|
1712
|
+
return null;
|
|
1713
|
+
for (const oppCanonical of opposites) {
|
|
1714
|
+
const synonyms = ENUM_TRANSLATIONS[oppCanonical];
|
|
1715
|
+
if (!synonyms)
|
|
1716
|
+
continue;
|
|
1717
|
+
for (const syn of synonyms) {
|
|
1718
|
+
// Skip ambiguous short words in contradiction detection
|
|
1719
|
+
if (AMBIGUOUS_SHORT_WORDS.has(syn.toLowerCase()))
|
|
1720
|
+
continue;
|
|
1721
|
+
if (syn.includes(' ')) {
|
|
1722
|
+
if (responseLower.includes(syn.toLowerCase())) {
|
|
1723
|
+
return { foundCanonical: oppCanonical, foundSynonym: syn };
|
|
1724
|
+
}
|
|
1725
|
+
}
|
|
1726
|
+
else {
|
|
1727
|
+
const synLow = syn.toLowerCase();
|
|
1728
|
+
if (/^[\x20-\x7e]+$/.test(synLow)) {
|
|
1729
|
+
const re = new RegExp(`\\b${escapeRegex(syn)}\\b`, 'i');
|
|
1730
|
+
if (re.test(responseLower)) {
|
|
1731
|
+
return { foundCanonical: oppCanonical, foundSynonym: syn };
|
|
1732
|
+
}
|
|
1733
|
+
}
|
|
1734
|
+
else {
|
|
1735
|
+
if (responseLower.includes(synLow)) {
|
|
1736
|
+
return { foundCanonical: oppCanonical, foundSynonym: syn };
|
|
1737
|
+
}
|
|
1738
|
+
}
|
|
1739
|
+
}
|
|
1740
|
+
}
|
|
1741
|
+
}
|
|
1742
|
+
return null;
|
|
1743
|
+
}
|
|
1744
|
+
function escapeRegex(str) {
|
|
1745
|
+
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
1746
|
+
}
|
|
1747
|
+
/** Remove duplicate claims (same fieldName + same value). */
|
|
1748
|
+
/**
|
|
1749
|
+
* Derive a metric name from a JSON field path.
|
|
1750
|
+
* "employees[0].late_count" → "late_count", "overtime_hours" → "overtime_hours"
|
|
1751
|
+
*/
|
|
1752
|
+
function deriveMetric(fieldName) {
|
|
1753
|
+
if (!fieldName)
|
|
1754
|
+
return undefined;
|
|
1755
|
+
const last = fieldName.split('.').pop() ?? fieldName;
|
|
1756
|
+
const cleaned = last.replace(/\[\d+\]$/g, '');
|
|
1757
|
+
return cleaned || undefined;
|
|
1758
|
+
}
|
|
1759
|
+
function deduplicateClaims(claims) {
|
|
1760
|
+
const seen = new Set();
|
|
1761
|
+
return claims.filter((c) => {
|
|
1762
|
+
const key = `${c.fieldName}::${c.value}`;
|
|
1763
|
+
if (seen.has(key))
|
|
1764
|
+
return false;
|
|
1765
|
+
seen.add(key);
|
|
1766
|
+
// Populate metric from fieldName if not already set
|
|
1767
|
+
if (!c.metric && c.fieldName) {
|
|
1768
|
+
c.metric = deriveMetric(c.fieldName);
|
|
1769
|
+
}
|
|
1770
|
+
return true;
|
|
1771
|
+
});
|
|
1772
|
+
}
|
|
1773
|
+
//# sourceMappingURL=index.js.map
|