truthguard-ai 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of truthguard-ai might be problematic. Click here for more details.
- package/dist-npm/Claims/index.d.ts +73 -0
- package/dist-npm/Claims/index.d.ts.map +1 -0
- package/dist-npm/Claims/index.js +1669 -0
- package/dist-npm/Claims/index.js.map +1 -0
- package/dist-npm/Config/index.d.ts +41 -0
- package/dist-npm/Config/index.d.ts.map +1 -0
- package/dist-npm/Config/index.js +129 -0
- package/dist-npm/Config/index.js.map +1 -0
- package/dist-npm/Grounding/index.d.ts +40 -0
- package/dist-npm/Grounding/index.d.ts.map +1 -0
- package/dist-npm/Grounding/index.js +1433 -0
- package/dist-npm/Grounding/index.js.map +1 -0
- package/dist-npm/L2/index.d.ts +93 -0
- package/dist-npm/L2/index.d.ts.map +1 -0
- package/dist-npm/L2/index.js +1773 -0
- package/dist-npm/L2/index.js.map +1 -0
- package/dist-npm/Matchers/index.d.ts +101 -0
- package/dist-npm/Matchers/index.d.ts.map +1 -0
- package/dist-npm/Matchers/index.js +690 -0
- package/dist-npm/Matchers/index.js.map +1 -0
- package/dist-npm/Mode/index.d.ts +87 -0
- package/dist-npm/Mode/index.d.ts.map +1 -0
- package/dist-npm/Mode/index.js +117 -0
- package/dist-npm/Mode/index.js.map +1 -0
- package/dist-npm/Policy/index.d.ts +89 -0
- package/dist-npm/Policy/index.d.ts.map +1 -0
- package/dist-npm/Policy/index.js +143 -0
- package/dist-npm/Policy/index.js.map +1 -0
- package/dist-npm/Registry/index.d.ts +93 -0
- package/dist-npm/Registry/index.d.ts.map +1 -0
- package/dist-npm/Registry/index.js +818 -0
- package/dist-npm/Registry/index.js.map +1 -0
- package/dist-npm/Rules/index.d.ts +587 -0
- package/dist-npm/Rules/index.d.ts.map +1 -0
- package/dist-npm/Rules/index.js +6236 -0
- package/dist-npm/Rules/index.js.map +1 -0
- package/dist-npm/Rules/intents.d.ts +22 -0
- package/dist-npm/Rules/intents.d.ts.map +1 -0
- package/dist-npm/Rules/intents.js +242 -0
- package/dist-npm/Rules/intents.js.map +1 -0
- package/dist-npm/TraceReadiness/index.d.ts +42 -0
- package/dist-npm/TraceReadiness/index.d.ts.map +1 -0
- package/dist-npm/TraceReadiness/index.js +169 -0
- package/dist-npm/TraceReadiness/index.js.map +1 -0
- package/dist-npm/i18n/index.d.ts +44 -0
- package/dist-npm/i18n/index.d.ts.map +1 -0
- package/dist-npm/i18n/index.js +124 -0
- package/dist-npm/i18n/index.js.map +1 -0
- package/package.json +5 -17
- package/dist/cli/index.d.ts +0 -15
- package/dist/cli/index.d.ts.map +0 -1
- package/dist/cli/index.js +0 -807
- package/dist/cli/index.js.map +0 -1
|
@@ -0,0 +1,690 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Matchers
|
|
4
|
+
*
|
|
5
|
+
* Matching strategies for comparing extracted claim values against
|
|
6
|
+
* values found in tool outputs.
|
|
7
|
+
*
|
|
8
|
+
* V1 supports:
|
|
9
|
+
* - NumericMatcher (relative tolerance)
|
|
10
|
+
* - CountMatcher (exact match)
|
|
11
|
+
* - DateMatcher (exact ISO-8601 match)
|
|
12
|
+
* - NameMatcher (fuzzy / Jaro-Winkler-like similarity)
|
|
13
|
+
*/
|
|
14
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
+
exports.matchNumeric = matchNumeric;
|
|
16
|
+
exports.normaliseUnit = normaliseUnit;
|
|
17
|
+
exports.tryUnitConversion = tryUnitConversion;
|
|
18
|
+
exports.matchNumericWithUnits = matchNumericWithUnits;
|
|
19
|
+
exports.isTransposedDigits = isTransposedDigits;
|
|
20
|
+
exports.matchCount = matchCount;
|
|
21
|
+
exports.matchDate = matchDate;
|
|
22
|
+
exports.jaroSimilarity = jaroSimilarity;
|
|
23
|
+
exports.jaroWinklerSimilarity = jaroWinklerSimilarity;
|
|
24
|
+
exports.matchName = matchName;
|
|
25
|
+
exports.extractValuesFromOutput = extractValuesFromOutput;
|
|
26
|
+
exports.extractValuesWithKeys = extractValuesWithKeys;
|
|
27
|
+
exports.inferUnitFromFieldName = inferUnitFromFieldName;
|
|
28
|
+
exports.extractCountFromOutput = extractCountFromOutput;
|
|
29
|
+
const Claims_1 = require("../Claims");
|
|
30
|
+
// ---------------------------------------------------------------------------
|
|
31
|
+
// Numeric matcher
|
|
32
|
+
// ---------------------------------------------------------------------------
|
|
33
|
+
/**
|
|
34
|
+
* Compare two numeric values using a configurable relative tolerance.
|
|
35
|
+
*/
|
|
36
|
+
function matchNumeric(claimed, source, tolerances) {
|
|
37
|
+
if (source === 0 && claimed === 0) {
|
|
38
|
+
return { matched: true, deviation: 0, explanation: 'Both values are 0.' };
|
|
39
|
+
}
|
|
40
|
+
if (source === 0) {
|
|
41
|
+
// Source is 0 but claim is non-zero → definite mismatch
|
|
42
|
+
return {
|
|
43
|
+
matched: false,
|
|
44
|
+
deviation: 1,
|
|
45
|
+
explanation: `Source is 0 but claim is ${claimed}.`,
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
const relDev = Math.abs(claimed - source) / Math.abs(source);
|
|
49
|
+
const tol = tolerances.numericRelativeTolerance;
|
|
50
|
+
const matched = relDev <= tol;
|
|
51
|
+
return {
|
|
52
|
+
matched,
|
|
53
|
+
deviation: relDev,
|
|
54
|
+
explanation: matched
|
|
55
|
+
? `${claimed} is within ${(tol * 100).toFixed(0)}% of ${source} (deviation: ${(relDev * 100).toFixed(2)}%).`
|
|
56
|
+
: `${claimed} deviates ${(relDev * 100).toFixed(2)}% from ${source} (tolerance: ${(tol * 100).toFixed(0)}%).`,
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
// ---------------------------------------------------------------------------
|
|
60
|
+
// Unit conversion table
|
|
61
|
+
// ---------------------------------------------------------------------------
|
|
62
|
+
/**
|
|
63
|
+
* Conversion factors between common units.
|
|
64
|
+
* Key format: "fromUnit→toUnit", value: multiply source by this to get target unit.
|
|
65
|
+
*/
|
|
66
|
+
const UNIT_CONVERSIONS = {
|
|
67
|
+
'minutes→hours': 1 / 60,
|
|
68
|
+
'hours→minutes': 60,
|
|
69
|
+
'seconds→minutes': 1 / 60,
|
|
70
|
+
'minutes→seconds': 60,
|
|
71
|
+
'seconds→hours': 1 / 3600,
|
|
72
|
+
'hours→seconds': 3600,
|
|
73
|
+
'days→hours': 24,
|
|
74
|
+
'hours→days': 1 / 24,
|
|
75
|
+
'days→minutes': 1440,
|
|
76
|
+
'minutes→days': 1 / 1440,
|
|
77
|
+
'weeks→days': 7,
|
|
78
|
+
'days→weeks': 1 / 7,
|
|
79
|
+
'months→days': 30,
|
|
80
|
+
'days→months': 1 / 30,
|
|
81
|
+
'years→days': 365,
|
|
82
|
+
'days→years': 1 / 365,
|
|
83
|
+
'years→months': 12,
|
|
84
|
+
'months→years': 1 / 12,
|
|
85
|
+
'km→meters': 1000,
|
|
86
|
+
'meters→km': 1 / 1000,
|
|
87
|
+
'km→miles': 0.621371,
|
|
88
|
+
'miles→km': 1.60934,
|
|
89
|
+
'kg→lbs': 2.20462,
|
|
90
|
+
'lbs→kg': 0.453592,
|
|
91
|
+
'bytes→kb': 1 / 1024,
|
|
92
|
+
'kb→bytes': 1024,
|
|
93
|
+
'kb→mb': 1 / 1024,
|
|
94
|
+
'mb→kb': 1024,
|
|
95
|
+
'mb→gb': 1 / 1024,
|
|
96
|
+
'gb→mb': 1024,
|
|
97
|
+
};
|
|
98
|
+
/**
|
|
99
|
+
* Canonical unit aliases — maps alternative forms to UnitType canonical values.
|
|
100
|
+
* Covers 11 languages: EN, ES, FR, PT, SR, RU, HI, AR, BN, ZH, JA.
|
|
101
|
+
*/
|
|
102
|
+
const UNIT_ALIASES = {
|
|
103
|
+
// ── English ──
|
|
104
|
+
minute: 'minutes', mins: 'minutes', min: 'minutes',
|
|
105
|
+
hour: 'hours', hrs: 'hours', hr: 'hours', h: 'hours',
|
|
106
|
+
second: 'seconds', secs: 'seconds', sec: 'seconds', s: 'seconds',
|
|
107
|
+
day: 'days', d: 'days',
|
|
108
|
+
week: 'weeks', wk: 'weeks', wks: 'weeks',
|
|
109
|
+
month: 'months', mo: 'months', mos: 'months',
|
|
110
|
+
year: 'years', yr: 'years', yrs: 'years',
|
|
111
|
+
kilometer: 'km', kilometers: 'km', kilometre: 'km', kilometres: 'km',
|
|
112
|
+
meter: 'meters', metre: 'meters', metres: 'meters',
|
|
113
|
+
mile: 'miles', mi: 'miles',
|
|
114
|
+
kilogram: 'kg', kilograms: 'kg',
|
|
115
|
+
pound: 'lbs', pounds: 'lbs', lb: 'lbs',
|
|
116
|
+
byte: 'bytes',
|
|
117
|
+
kilobyte: 'kb', kilobytes: 'kb',
|
|
118
|
+
megabyte: 'mb', megabytes: 'mb',
|
|
119
|
+
gigabyte: 'gb', gigabytes: 'gb',
|
|
120
|
+
percent: 'percent', percentage: 'percent',
|
|
121
|
+
degree: 'degrees',
|
|
122
|
+
// ── Spanish (es) ──
|
|
123
|
+
segundo: 'seconds', segundos: 'seconds',
|
|
124
|
+
minuto: 'minutes', minutos: 'minutes',
|
|
125
|
+
hora: 'hours', horas: 'hours',
|
|
126
|
+
'día': 'days', dia: 'days', 'días': 'days', dias: 'days',
|
|
127
|
+
semana: 'weeks', semanas: 'weeks',
|
|
128
|
+
mes: 'months', meses: 'months',
|
|
129
|
+
'año': 'years', 'años': 'years',
|
|
130
|
+
porcentaje: 'percent',
|
|
131
|
+
grado: 'degrees', grados: 'degrees',
|
|
132
|
+
// ── French (fr) ──
|
|
133
|
+
seconde: 'seconds', secondes: 'seconds',
|
|
134
|
+
heure: 'hours', heures: 'hours',
|
|
135
|
+
jour: 'days', jours: 'days',
|
|
136
|
+
semaine: 'weeks', semaines: 'weeks',
|
|
137
|
+
mois: 'months',
|
|
138
|
+
an: 'years', ans: 'years', 'année': 'years', 'années': 'years',
|
|
139
|
+
pourcent: 'percent', pourcentage: 'percent',
|
|
140
|
+
'degré': 'degrees', 'degrés': 'degrees',
|
|
141
|
+
// ── Portuguese (pt) ──
|
|
142
|
+
'mês': 'months',
|
|
143
|
+
ano: 'years', anos: 'years',
|
|
144
|
+
porcentagem: 'percent', porcento: 'percent',
|
|
145
|
+
grau: 'degrees', graus: 'degrees',
|
|
146
|
+
// ── Serbian (sr) ──
|
|
147
|
+
sekunda: 'seconds', sekundi: 'seconds', sekunde: 'seconds',
|
|
148
|
+
minut: 'minutes', minuta: 'minutes', minuti: 'minutes',
|
|
149
|
+
sat: 'hours', sati: 'hours',
|
|
150
|
+
dan: 'days', dana: 'days', dani: 'days',
|
|
151
|
+
nedelja: 'weeks', nedelje: 'weeks', sedmica: 'weeks', sedmice: 'weeks',
|
|
152
|
+
mesec: 'months', meseci: 'months', meseca: 'months',
|
|
153
|
+
godina: 'years', godine: 'years',
|
|
154
|
+
procenat: 'percent', procenata: 'percent', posto: 'percent',
|
|
155
|
+
stepen: 'degrees', stepeni: 'degrees', stepena: 'degrees',
|
|
156
|
+
// ── Russian (ru) ──
|
|
157
|
+
'секунда': 'seconds', 'секунды': 'seconds', 'секунд': 'seconds',
|
|
158
|
+
'минута': 'minutes', 'минуты': 'minutes', 'минут': 'minutes',
|
|
159
|
+
'час': 'hours', 'часа': 'hours', 'часов': 'hours',
|
|
160
|
+
'день': 'days', 'дня': 'days', 'дней': 'days',
|
|
161
|
+
'неделя': 'weeks', 'недели': 'weeks', 'недель': 'weeks',
|
|
162
|
+
'месяц': 'months', 'месяца': 'months', 'месяцев': 'months',
|
|
163
|
+
'год': 'years', 'года': 'years', 'лет': 'years',
|
|
164
|
+
'процент': 'percent', 'процентов': 'percent', 'процента': 'percent',
|
|
165
|
+
'градус': 'degrees', 'градуса': 'degrees', 'градусов': 'degrees',
|
|
166
|
+
// ── Hindi (hi) ──
|
|
167
|
+
'सेकंड': 'seconds',
|
|
168
|
+
'मिनट': 'minutes',
|
|
169
|
+
'घंटा': 'hours', 'घंटे': 'hours', 'घंटों': 'hours',
|
|
170
|
+
'दिन': 'days', 'दिनों': 'days',
|
|
171
|
+
'हफ्ता': 'weeks', 'हफ्ते': 'weeks', 'हफ्तों': 'weeks', 'सप्ताह': 'weeks',
|
|
172
|
+
'महीना': 'months', 'महीने': 'months', 'महीनों': 'months',
|
|
173
|
+
'साल': 'years', 'वर्ष': 'years',
|
|
174
|
+
'प्रतिशत': 'percent', 'डिग्री': 'degrees',
|
|
175
|
+
// ── Arabic (ar) ──
|
|
176
|
+
'ثانية': 'seconds', 'ثوان': 'seconds', 'ثواني': 'seconds',
|
|
177
|
+
'دقيقة': 'minutes', 'دقائق': 'minutes',
|
|
178
|
+
'ساعة': 'hours', 'ساعات': 'hours',
|
|
179
|
+
'يوم': 'days', 'أيام': 'days',
|
|
180
|
+
'أسبوع': 'weeks', 'أسابيع': 'weeks',
|
|
181
|
+
'شهر': 'months', 'أشهر': 'months', 'شهور': 'months',
|
|
182
|
+
'سنة': 'years', 'سنوات': 'years',
|
|
183
|
+
'بالمئة': 'percent', 'نسبة': 'percent',
|
|
184
|
+
'درجة': 'degrees', 'درجات': 'degrees',
|
|
185
|
+
// ── Bengali (bn) ──
|
|
186
|
+
'সেকেন্ড': 'seconds', 'মিনিট': 'minutes', 'ঘণ্টা': 'hours',
|
|
187
|
+
'দিন': 'days', 'সপ্তাহ': 'weeks', 'মাস': 'months', 'বছর': 'years',
|
|
188
|
+
'শতাংশ': 'percent', 'ডিগ্রি': 'degrees',
|
|
189
|
+
// ── Mandarin Chinese (zh) ──
|
|
190
|
+
'秒': 'seconds', '分钟': 'minutes', '小时': 'hours',
|
|
191
|
+
'天': 'days', '日': 'days',
|
|
192
|
+
'周': 'weeks', '星期': 'weeks',
|
|
193
|
+
'个月': 'months', '月': 'months', '年': 'years',
|
|
194
|
+
'百分比': 'percent', '度': 'degrees',
|
|
195
|
+
// ── Japanese (ja) ──
|
|
196
|
+
'時間': 'hours', '分': 'minutes',
|
|
197
|
+
'週間': 'weeks', '週': 'weeks',
|
|
198
|
+
'ヶ月': 'months', 'か月': 'months',
|
|
199
|
+
'パーセント': 'percent',
|
|
200
|
+
// ── Currency symbols / codes ──
|
|
201
|
+
'$': 'usd', 'usd': 'usd', 'dollar': 'usd', 'dollars': 'usd',
|
|
202
|
+
'€': 'eur', 'eur': 'eur', 'euro': 'eur', 'euros': 'eur', 'evro': 'eur', 'evra': 'eur',
|
|
203
|
+
'£': 'gbp', 'gbp': 'gbp', 'funta': 'gbp', 'funti': 'gbp',
|
|
204
|
+
'rsd': 'rsd', 'din': 'rsd', 'dinara': 'rsd', 'dinar': 'rsd', 'dinari': 'rsd',
|
|
205
|
+
'chf': 'chf', 'franc': 'chf', 'francs': 'chf', 'franak': 'chf', 'franaka': 'chf',
|
|
206
|
+
'jpy': 'jpy', 'yen': 'jpy', '円': 'jpy', '¥': 'jpy',
|
|
207
|
+
'cny': 'cny', 'yuan': 'cny', '元': 'cny',
|
|
208
|
+
'rub': 'rub', 'ruble': 'rub', 'rubles': 'rub', 'рубль': 'rub', 'рублей': 'rub', 'рубля': 'rub',
|
|
209
|
+
'inr': 'inr', 'rupee': 'inr', 'rupees': 'inr', 'रुपये': 'inr', 'रुपया': 'inr',
|
|
210
|
+
'brl': 'brl', 'real': 'brl', 'reais': 'brl',
|
|
211
|
+
};
|
|
212
|
+
/** Normalise a unit string to its canonical form. */
|
|
213
|
+
function normaliseUnit(raw) {
|
|
214
|
+
const lower = raw.toLowerCase().trim();
|
|
215
|
+
return UNIT_ALIASES[lower] ?? lower;
|
|
216
|
+
}
|
|
217
|
+
/**
|
|
218
|
+
* Try to convert `sourceValue` (in `sourceUnit`) to `claimUnit`.
|
|
219
|
+
* Returns the converted value, or null if no conversion path exists.
|
|
220
|
+
*/
|
|
221
|
+
function tryUnitConversion(sourceValue, sourceUnit, claimUnit) {
|
|
222
|
+
const from = normaliseUnit(sourceUnit);
|
|
223
|
+
const to = normaliseUnit(claimUnit);
|
|
224
|
+
if (from === to)
|
|
225
|
+
return sourceValue;
|
|
226
|
+
const key = `${from}→${to}`;
|
|
227
|
+
const factor = UNIT_CONVERSIONS[key];
|
|
228
|
+
if (factor !== undefined) {
|
|
229
|
+
return sourceValue * factor;
|
|
230
|
+
}
|
|
231
|
+
return null;
|
|
232
|
+
}
|
|
233
|
+
/**
|
|
234
|
+
* Compare two numbers with optional unit conversion.
|
|
235
|
+
* If units differ, attempts conversion before comparing.
|
|
236
|
+
*
|
|
237
|
+
* Special handling for percentages:
|
|
238
|
+
* - If claim is "45 percent" and source is 0.45 (or vice versa),
|
|
239
|
+
* normalises both to the same scale before comparing.
|
|
240
|
+
*/
|
|
241
|
+
function matchNumericWithUnits(claimed, claimUnit, source, sourceUnit, tolerances) {
|
|
242
|
+
// Direct match first
|
|
243
|
+
const direct = matchNumeric(claimed, source, tolerances);
|
|
244
|
+
if (direct.matched)
|
|
245
|
+
return direct;
|
|
246
|
+
// Percentage normalisation: 0.45 ↔ 45%
|
|
247
|
+
// If claim unit is percent and source is a fraction (0–1 exclusive), scale source ×100.
|
|
248
|
+
// If source unit is percent and claimed is a fraction, scale claimed ×100.
|
|
249
|
+
const normClaim = claimUnit ? normaliseUnit(claimUnit) : undefined;
|
|
250
|
+
const normSource = sourceUnit ? normaliseUnit(sourceUnit) : undefined;
|
|
251
|
+
if (normClaim === 'percent' && !normSource) {
|
|
252
|
+
// Claim says "45%" and source is a decimal fraction like 0.45
|
|
253
|
+
if (source > 0 && source < 1 && claimed >= 1 && claimed <= 100) {
|
|
254
|
+
const scaled = matchNumeric(claimed, source * 100, tolerances);
|
|
255
|
+
if (scaled.matched) {
|
|
256
|
+
return {
|
|
257
|
+
...scaled,
|
|
258
|
+
explanation: `${claimed}% matches source ${source} (interpreted as ${source * 100}%, deviation: ${((scaled.deviation ?? 0) * 100).toFixed(2)}%).`,
|
|
259
|
+
};
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
// Claim says "0.45%" (fraction-as-percent) and source is 45
|
|
263
|
+
if (claimed > 0 && claimed < 1 && source >= 1 && source <= 100) {
|
|
264
|
+
const scaled = matchNumeric(claimed * 100, source, tolerances);
|
|
265
|
+
if (scaled.matched) {
|
|
266
|
+
return {
|
|
267
|
+
...scaled,
|
|
268
|
+
explanation: `${claimed}% (=${claimed * 100}%) matches source ${source} (deviation: ${((scaled.deviation ?? 0) * 100).toFixed(2)}%).`,
|
|
269
|
+
};
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
if (!normClaim && normSource === 'percent') {
|
|
274
|
+
// Source is "45%" and claim is 0.45 fraction
|
|
275
|
+
if (claimed > 0 && claimed < 1 && source >= 1 && source <= 100) {
|
|
276
|
+
const scaled = matchNumeric(claimed * 100, source, tolerances);
|
|
277
|
+
if (scaled.matched) {
|
|
278
|
+
return {
|
|
279
|
+
...scaled,
|
|
280
|
+
explanation: `Claimed ${claimed} matches source ${source}% (normalised to ${claimed * 100}%, deviation: ${((scaled.deviation ?? 0) * 100).toFixed(2)}%).`,
|
|
281
|
+
};
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
// If both units are known and different, try conversion
|
|
286
|
+
if (claimUnit && sourceUnit) {
|
|
287
|
+
const converted = tryUnitConversion(source, sourceUnit, claimUnit);
|
|
288
|
+
if (converted !== null) {
|
|
289
|
+
const result = matchNumeric(claimed, converted, tolerances);
|
|
290
|
+
return {
|
|
291
|
+
...result,
|
|
292
|
+
explanation: result.matched
|
|
293
|
+
? `${claimed} ${claimUnit} matches ${source} ${sourceUnit} after unit conversion (=${converted.toFixed(2)} ${claimUnit}, deviation: ${((result.deviation ?? 0) * 100).toFixed(2)}%).`
|
|
294
|
+
: `${claimed} ${claimUnit} does not match ${source} ${sourceUnit} even after conversion (=${converted.toFixed(2)} ${claimUnit}, deviation: ${((result.deviation ?? 0) * 100).toFixed(2)}%).`,
|
|
295
|
+
};
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
return direct;
|
|
299
|
+
}
|
|
300
|
+
// ---------------------------------------------------------------------------
|
|
301
|
+
// Transposed digits detection
|
|
302
|
+
// ---------------------------------------------------------------------------
|
|
303
|
+
/**
|
|
304
|
+
* Check if two numbers differ only by a digit transposition.
|
|
305
|
+
* E.g. 23 vs 32, 1234 vs 1243, 156 vs 165.
|
|
306
|
+
* Returns true if swapping exactly one pair of adjacent digits in `claimed`
|
|
307
|
+
* produces `source`.
|
|
308
|
+
*/
|
|
309
|
+
function isTransposedDigits(claimed, source) {
|
|
310
|
+
if (claimed === source)
|
|
311
|
+
return false;
|
|
312
|
+
// Only for positive integers
|
|
313
|
+
if (!Number.isInteger(claimed) || !Number.isInteger(source))
|
|
314
|
+
return false;
|
|
315
|
+
if (claimed < 0 || source < 0)
|
|
316
|
+
return false;
|
|
317
|
+
const a = String(claimed);
|
|
318
|
+
const b = String(source);
|
|
319
|
+
if (a.length !== b.length)
|
|
320
|
+
return false;
|
|
321
|
+
if (a.length < 2)
|
|
322
|
+
return false;
|
|
323
|
+
// Try swapping each adjacent pair in `a` and check against `b`
|
|
324
|
+
for (let i = 0; i < a.length - 1; i++) {
|
|
325
|
+
if (a[i] === a[i + 1])
|
|
326
|
+
continue; // swapping identical digits is no-op
|
|
327
|
+
const swapped = a.slice(0, i) + a[i + 1] + a[i] + a.slice(i + 2);
|
|
328
|
+
if (swapped === b)
|
|
329
|
+
return true;
|
|
330
|
+
}
|
|
331
|
+
return false;
|
|
332
|
+
}
|
|
333
|
+
// ---------------------------------------------------------------------------
|
|
334
|
+
// Count matcher
|
|
335
|
+
// ---------------------------------------------------------------------------
|
|
336
|
+
/**
|
|
337
|
+
* Compare two count values (exact or within 1 for approximate matches).
|
|
338
|
+
*
|
|
339
|
+
* Deviation is returned as a **relative** fraction (same scale as numeric matcher)
|
|
340
|
+
* so that downstream rules (e.g. data_ignored) can apply a uniform threshold.
|
|
341
|
+
* matched counts → deviation 0
|
|
342
|
+
* mismatched → |claimed - source| / max(|source|, 1)
|
|
343
|
+
*/
|
|
344
|
+
function matchCount(claimed, source, tolerances) {
|
|
345
|
+
const relDev = source === 0 && claimed === 0
|
|
346
|
+
? 0
|
|
347
|
+
: Math.abs(claimed - source) / Math.max(Math.abs(source), 1);
|
|
348
|
+
if (tolerances.countExactMatch) {
|
|
349
|
+
const matched = claimed === source;
|
|
350
|
+
return {
|
|
351
|
+
matched,
|
|
352
|
+
deviation: relDev,
|
|
353
|
+
explanation: matched
|
|
354
|
+
? `Count ${claimed} exactly matches ${source}.`
|
|
355
|
+
: `Count ${claimed} does not match ${source} (exact match required).`,
|
|
356
|
+
};
|
|
357
|
+
}
|
|
358
|
+
// Non-exact: treat as numeric with 0% tolerance (effectively same as exact)
|
|
359
|
+
return {
|
|
360
|
+
matched: claimed === source,
|
|
361
|
+
deviation: relDev,
|
|
362
|
+
explanation: claimed === source
|
|
363
|
+
? `Count ${claimed} matches ${source}.`
|
|
364
|
+
: `Count ${claimed} differs from ${source} by ${Math.abs(claimed - source)}.`,
|
|
365
|
+
};
|
|
366
|
+
}
|
|
367
|
+
// ---------------------------------------------------------------------------
|
|
368
|
+
// Date matcher
|
|
369
|
+
// ---------------------------------------------------------------------------
|
|
370
|
+
/**
|
|
371
|
+
* Compare two ISO-8601 date strings (YYYY-MM-DD).
|
|
372
|
+
* Normalises both values before comparison.
|
|
373
|
+
*/
|
|
374
|
+
function matchDate(claimed, source, _tolerances) {
|
|
375
|
+
const normClaimed = claimed.trim().substring(0, 10);
|
|
376
|
+
const normSource = source.trim().substring(0, 10);
|
|
377
|
+
// Partial date support: if claimed has year 0000 (no year specified,
|
|
378
|
+
// e.g. European short "15.3." → "0000-03-15"), match only month-day.
|
|
379
|
+
let matched;
|
|
380
|
+
if (normClaimed.startsWith('0000-') && /^\d{4}-\d{2}-\d{2}$/.test(normSource)) {
|
|
381
|
+
matched = normClaimed.substring(5) === normSource.substring(5);
|
|
382
|
+
}
|
|
383
|
+
else {
|
|
384
|
+
matched = normClaimed === normSource;
|
|
385
|
+
}
|
|
386
|
+
// Fallback 1: if source is longer than a date string, search for the
|
|
387
|
+
// claimed ISO date as a substring within it (handles dates embedded in prose).
|
|
388
|
+
if (!matched && source.length > 10 && /^\d{4}-\d{2}-\d{2}$/.test(normClaimed)) {
|
|
389
|
+
if (source.includes(normClaimed)) {
|
|
390
|
+
matched = true;
|
|
391
|
+
}
|
|
392
|
+
else {
|
|
393
|
+
// Also search for European DD.MM.YYYY format within source
|
|
394
|
+
const [y, m, d] = normClaimed.split('-');
|
|
395
|
+
if (source.includes(`${d}.${m}.${y}`) || source.includes(`${parseInt(d)}.${parseInt(m)}.${y}`)) {
|
|
396
|
+
matched = true;
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
// Fallback 2: parse the source through tryParseDate to normalise
|
|
401
|
+
// different date formats (e.g. "01.03.2026" → "2026-03-01").
|
|
402
|
+
if (!matched && !matched && source.trim().length > 0 && source.trim().length <= 30) {
|
|
403
|
+
const parsed = (0, Claims_1.tryParseDate)(source.trim());
|
|
404
|
+
if (parsed) {
|
|
405
|
+
const normParsed = parsed.substring(0, 10);
|
|
406
|
+
if (normClaimed.startsWith('0000-') && /^\d{4}-\d{2}-\d{2}$/.test(normParsed)) {
|
|
407
|
+
matched = normClaimed.substring(5) === normParsed.substring(5);
|
|
408
|
+
}
|
|
409
|
+
else {
|
|
410
|
+
matched = normClaimed === normParsed;
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
return {
|
|
415
|
+
matched,
|
|
416
|
+
explanation: matched
|
|
417
|
+
? `Date "${claimed}" matches "${source}".`
|
|
418
|
+
: `Date "${claimed}" does not match "${source}".`,
|
|
419
|
+
};
|
|
420
|
+
}
|
|
421
|
+
// ---------------------------------------------------------------------------
|
|
422
|
+
// Name / fuzzy matcher (Jaro-Winkler inspired)
|
|
423
|
+
// ---------------------------------------------------------------------------
|
|
424
|
+
/**
|
|
425
|
+
* Compute Jaro similarity between two strings.
|
|
426
|
+
* Returns a value in [0, 1].
|
|
427
|
+
*/
|
|
428
|
+
function jaroSimilarity(s1, s2) {
|
|
429
|
+
if (s1 === s2)
|
|
430
|
+
return 1.0;
|
|
431
|
+
const len1 = s1.length;
|
|
432
|
+
const len2 = s2.length;
|
|
433
|
+
if (len1 === 0 || len2 === 0)
|
|
434
|
+
return 0.0;
|
|
435
|
+
const matchDist = Math.floor(Math.max(len1, len2) / 2) - 1;
|
|
436
|
+
const s1Matches = new Array(len1).fill(false);
|
|
437
|
+
const s2Matches = new Array(len2).fill(false);
|
|
438
|
+
let matches = 0;
|
|
439
|
+
let transpositions = 0;
|
|
440
|
+
for (let i = 0; i < len1; i++) {
|
|
441
|
+
const start = Math.max(0, i - matchDist);
|
|
442
|
+
const end = Math.min(i + matchDist + 1, len2);
|
|
443
|
+
for (let j = start; j < end; j++) {
|
|
444
|
+
if (s2Matches[j] || s1[i] !== s2[j])
|
|
445
|
+
continue;
|
|
446
|
+
s1Matches[i] = true;
|
|
447
|
+
s2Matches[j] = true;
|
|
448
|
+
matches++;
|
|
449
|
+
break;
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
if (matches === 0)
|
|
453
|
+
return 0.0;
|
|
454
|
+
let k = 0;
|
|
455
|
+
for (let i = 0; i < len1; i++) {
|
|
456
|
+
if (!s1Matches[i])
|
|
457
|
+
continue;
|
|
458
|
+
while (!s2Matches[k])
|
|
459
|
+
k++;
|
|
460
|
+
if (s1[i] !== s2[k])
|
|
461
|
+
transpositions++;
|
|
462
|
+
k++;
|
|
463
|
+
}
|
|
464
|
+
return ((matches / len1 + matches / len2 + (matches - transpositions / 2) / matches) / 3);
|
|
465
|
+
}
|
|
466
|
+
/**
|
|
467
|
+
* Compute Jaro-Winkler similarity (boosts prefix matches).
|
|
468
|
+
*/
|
|
469
|
+
function jaroWinklerSimilarity(s1, s2, prefixScale = 0.1) {
|
|
470
|
+
const jaro = jaroSimilarity(s1, s2);
|
|
471
|
+
let prefixLen = 0;
|
|
472
|
+
const maxPrefix = Math.min(4, Math.min(s1.length, s2.length));
|
|
473
|
+
while (prefixLen < maxPrefix && s1[prefixLen] === s2[prefixLen])
|
|
474
|
+
prefixLen++;
|
|
475
|
+
return jaro + prefixLen * prefixScale * (1 - jaro);
|
|
476
|
+
}
|
|
477
|
+
/**
|
|
478
|
+
* Fuzzy-match two name strings using Jaro-Winkler similarity.
|
|
479
|
+
* Case-insensitive.
|
|
480
|
+
*/
|
|
481
|
+
/**
|
|
482
|
+
* Strip diacritics / combining marks so that "Jović" → "Jovic",
|
|
483
|
+
* "č" → "c", etc. Uses Unicode NFD normalization.
|
|
484
|
+
*/
|
|
485
|
+
function stripDiacritics(s) {
|
|
486
|
+
// NFD decomposes most diacritics (ć, č, š, ž) into base + combining mark,
|
|
487
|
+
// but đ (U+0111) and Đ (U+0110) are atomic codepoints — handle them explicitly.
|
|
488
|
+
return s.replace(/đ/g, 'd').replace(/Đ/g, 'D').normalize('NFD').replace(/[\u0300-\u036f]/g, '');
|
|
489
|
+
}
|
|
490
|
+
function matchName(claimed, source, tolerances) {
|
|
491
|
+
const a = stripDiacritics(claimed.trim().toLowerCase());
|
|
492
|
+
const b = stripDiacritics(source.trim().toLowerCase());
|
|
493
|
+
if (a === b) {
|
|
494
|
+
return {
|
|
495
|
+
matched: true,
|
|
496
|
+
similarity: 1.0,
|
|
497
|
+
explanation: `"${claimed}" exactly matches "${source}".`,
|
|
498
|
+
};
|
|
499
|
+
}
|
|
500
|
+
const sim = jaroWinklerSimilarity(a, b);
|
|
501
|
+
const threshold = tolerances.nameSimilarityThreshold;
|
|
502
|
+
// Token-level matching for multi-token names (first + last).
|
|
503
|
+
// For multi-word names, full-string Jaro-Winkler is unreliable because
|
|
504
|
+
// Serbian surnames share suffixes (-ović, -ić) which inflate similarity:
|
|
505
|
+
// "Boban Stojković" vs "Boban Đokić" = 0.91 (different person!)
|
|
506
|
+
// "Miloš Tasić" vs "Slađana Tasić" = ~1.0 (same surname, different person!)
|
|
507
|
+
// So for multi-token names we ALWAYS use per-token matching.
|
|
508
|
+
const claimedTokens = stripDiacritics(claimed.trim().toLowerCase()).split(/\s+/).filter((t) => t.length >= 3);
|
|
509
|
+
const sourceTokens = stripDiacritics(source.trim().toLowerCase()).split(/\s+/).filter((t) => t.length >= 3);
|
|
510
|
+
const isMultiToken = claimedTokens.length >= 2 && sourceTokens.length >= 2;
|
|
511
|
+
// For single-token names, use full-string similarity
|
|
512
|
+
if (!isMultiToken && sim >= threshold) {
|
|
513
|
+
return {
|
|
514
|
+
matched: true,
|
|
515
|
+
similarity: sim,
|
|
516
|
+
explanation: `"${claimed}" is similar to "${source}" (similarity: ${sim.toFixed(3)} ≥ ${threshold}).`,
|
|
517
|
+
};
|
|
518
|
+
}
|
|
519
|
+
// Token-level matching: each significant token (≥3 chars) from the claimed
|
|
520
|
+
// name must match a token in the source name.
|
|
521
|
+
// Handles cases like "Zdravo Slavica" vs "Slavica Stojanović" where a
|
|
522
|
+
// greeting word dilutes the full-string similarity.
|
|
523
|
+
// Use a stricter threshold because Jaro-Winkler inflates scores on short
|
|
524
|
+
// strings (e.g. "markovic" ≈ "rajkovic" = 0.87 is a false match).
|
|
525
|
+
const tokenThreshold = Math.max(threshold, 0.92);
|
|
526
|
+
// For multi-token names (first + last), require BOTH tokens to match a
|
|
527
|
+
// DIFFERENT source token each. A single-token match (e.g. "Tasić" matching
|
|
528
|
+
// "Slađana Tasić" when claimed is "Miloš Tasić") produces false positives —
|
|
529
|
+
// different people share surnames.
|
|
530
|
+
//
|
|
531
|
+
// However, if the claimed text contains a non-name token (greeting, common
|
|
532
|
+
// word) that merely accompanies a real name (e.g. "Zdravo Slavica"), we
|
|
533
|
+
// count only the name-like tokens for the "must match 2" requirement.
|
|
534
|
+
const GREETING_TOKENS = new Set([
|
|
535
|
+
'zdravo', 'cao', 'hej', 'bok', 'hello', 'hi', 'hey',
|
|
536
|
+
'good', 'morning', 'afternoon', 'evening', 'dear',
|
|
537
|
+
'pozdrav', 'dobar', 'dobro', 'jutro', 'dan', 'vece',
|
|
538
|
+
]);
|
|
539
|
+
const namelikeClaimedTokens = claimedTokens.filter((t) => !GREETING_TOKENS.has(t));
|
|
540
|
+
const requiredMatches = namelikeClaimedTokens.length >= 2 && sourceTokens.length >= 2
|
|
541
|
+
? 2
|
|
542
|
+
: 1;
|
|
543
|
+
let matchedTokens = 0;
|
|
544
|
+
let bestSim = 0;
|
|
545
|
+
let bestClaimedToken = '';
|
|
546
|
+
let bestSourceToken = '';
|
|
547
|
+
const usedSourceIndices = new Set();
|
|
548
|
+
for (const ct of claimedTokens) {
|
|
549
|
+
let bestIdx = -1;
|
|
550
|
+
let bestTokenSim = 0;
|
|
551
|
+
for (let si = 0; si < sourceTokens.length; si++) {
|
|
552
|
+
if (usedSourceIndices.has(si))
|
|
553
|
+
continue;
|
|
554
|
+
const tokenSim = jaroWinklerSimilarity(ct, sourceTokens[si]);
|
|
555
|
+
if (tokenSim >= tokenThreshold && tokenSim > bestTokenSim) {
|
|
556
|
+
bestTokenSim = tokenSim;
|
|
557
|
+
bestIdx = si;
|
|
558
|
+
}
|
|
559
|
+
}
|
|
560
|
+
if (bestIdx >= 0) {
|
|
561
|
+
matchedTokens++;
|
|
562
|
+
usedSourceIndices.add(bestIdx);
|
|
563
|
+
if (bestTokenSim > bestSim) {
|
|
564
|
+
bestSim = bestTokenSim;
|
|
565
|
+
bestClaimedToken = ct;
|
|
566
|
+
bestSourceToken = sourceTokens[bestIdx];
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
}
|
|
570
|
+
if (matchedTokens >= requiredMatches) {
|
|
571
|
+
return {
|
|
572
|
+
matched: true,
|
|
573
|
+
similarity: bestSim,
|
|
574
|
+
explanation: `Token "${bestClaimedToken}" matches "${bestSourceToken}" in "${source}" (similarity: ${bestSim.toFixed(3)} ≥ ${threshold}).`,
|
|
575
|
+
};
|
|
576
|
+
}
|
|
577
|
+
return {
|
|
578
|
+
matched: false,
|
|
579
|
+
similarity: isMultiToken ? (bestSim || sim) : sim,
|
|
580
|
+
explanation: isMultiToken
|
|
581
|
+
? `"${claimed}" failed per-token matching against "${source}" (${matchedTokens}/${requiredMatches} tokens matched, best token similarity: ${(bestSim || 0).toFixed(3)}).`
|
|
582
|
+
: `"${claimed}" is not sufficiently similar to "${source}" (similarity: ${sim.toFixed(3)} < ${threshold}).`,
|
|
583
|
+
};
|
|
584
|
+
}
|
|
585
|
+
// ---------------------------------------------------------------------------
|
|
586
|
+
// Value extractor — pull scalar numbers/dates/strings from tool output
|
|
587
|
+
// ---------------------------------------------------------------------------
|
|
588
|
+
/**
|
|
589
|
+
* Recursively extract all leaf values from a JSON-like tool output.
|
|
590
|
+
* Returns strings and numbers found anywhere in the output tree.
|
|
591
|
+
*/
|
|
592
|
+
function extractValuesFromOutput(output) {
|
|
593
|
+
return extractValuesWithKeys(output).map((e) => e.value);
|
|
594
|
+
}
|
|
595
|
+
/**
|
|
596
|
+
* Recursively extract all leaf values with their field names.
|
|
597
|
+
*/
|
|
598
|
+
function extractValuesWithKeys(output) {
|
|
599
|
+
const results = [];
|
|
600
|
+
function walk(v, key) {
|
|
601
|
+
if (v === null || v === undefined)
|
|
602
|
+
return;
|
|
603
|
+
if (typeof v === 'number') {
|
|
604
|
+
results.push({ value: v, fieldName: key });
|
|
605
|
+
}
|
|
606
|
+
else if (typeof v === 'string') {
|
|
607
|
+
results.push({ value: v, fieldName: key });
|
|
608
|
+
// Coerce numeric strings (e.g. "44" from DB) to numbers so they
|
|
609
|
+
// can match numeric/count claims as well.
|
|
610
|
+
if (/^-?\d+(\.\d+)?$/.test(v.trim())) {
|
|
611
|
+
results.push({ value: parseFloat(v.trim()), fieldName: key });
|
|
612
|
+
}
|
|
613
|
+
// HH:MM duration strings (e.g. "6738:25" = 6738 hours 25 minutes).
|
|
614
|
+
// Extract the hours component as a number and total minutes.
|
|
615
|
+
const hhmmMatch = v.trim().match(/^(\d+):(\d{2})$/);
|
|
616
|
+
if (hhmmMatch && parseInt(hhmmMatch[2], 10) < 60) {
|
|
617
|
+
const hours = parseInt(hhmmMatch[1], 10);
|
|
618
|
+
const minutes = parseInt(hhmmMatch[2], 10);
|
|
619
|
+
results.push({ value: hours, fieldName: key });
|
|
620
|
+
if (minutes > 0) {
|
|
621
|
+
results.push({ value: hours * 60 + minutes, fieldName: key ? `${key}_total_minutes` : '_total_minutes' });
|
|
622
|
+
}
|
|
623
|
+
}
|
|
624
|
+
}
|
|
625
|
+
else if (Array.isArray(v)) {
|
|
626
|
+
v.forEach((item, i) => walk(item, key ? `${key}[${i}]` : `[${i}]`));
|
|
627
|
+
}
|
|
628
|
+
else if (typeof v === 'object') {
|
|
629
|
+
for (const [k, val] of Object.entries(v)) {
|
|
630
|
+
walk(val, key ? `${key}.${k}` : k);
|
|
631
|
+
}
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
walk(output);
|
|
635
|
+
return results;
|
|
636
|
+
}
|
|
637
|
+
/** Known unit-bearing field name patterns. */
|
|
638
|
+
const FIELD_UNIT_PATTERNS = [
|
|
639
|
+
[/minutes?$/i, 'minutes'],
|
|
640
|
+
[/hours?$/i, 'hours'],
|
|
641
|
+
[/seconds?$/i, 'seconds'],
|
|
642
|
+
[/days?$/i, 'days'],
|
|
643
|
+
[/weeks?$/i, 'weeks'],
|
|
644
|
+
[/months?$/i, 'months'],
|
|
645
|
+
[/years?$/i, 'years'],
|
|
646
|
+
[/\bkm$/i, 'km'],
|
|
647
|
+
[/meters?$/i, 'meters'],
|
|
648
|
+
[/miles?$/i, 'miles'],
|
|
649
|
+
[/\bkg$/i, 'kg'],
|
|
650
|
+
[/\blbs?$/i, 'lbs'],
|
|
651
|
+
[/bytes?$/i, 'bytes'],
|
|
652
|
+
];
|
|
653
|
+
/**
|
|
654
|
+
* Infer a unit from a JSON field name.
|
|
655
|
+
* E.g. "total_minutes" → "minutes", "hours_worked" → "hours"
|
|
656
|
+
*/
|
|
657
|
+
function inferUnitFromFieldName(fieldName) {
|
|
658
|
+
if (!fieldName)
|
|
659
|
+
return undefined;
|
|
660
|
+
// Take the last segment of the key path
|
|
661
|
+
const lastPart = fieldName.split('.').pop() ?? fieldName;
|
|
662
|
+
// Split on underscores/camelCase
|
|
663
|
+
const parts = lastPart.replace(/([a-z])([A-Z])/g, '$1_$2').toLowerCase().split(/[_\s]+/);
|
|
664
|
+
for (const part of parts) {
|
|
665
|
+
for (const [pattern, unit] of FIELD_UNIT_PATTERNS) {
|
|
666
|
+
if (pattern.test(part))
|
|
667
|
+
return unit;
|
|
668
|
+
}
|
|
669
|
+
}
|
|
670
|
+
return undefined;
|
|
671
|
+
}
|
|
672
|
+
/**
|
|
673
|
+
* Extracts the count of items from a tool output.
|
|
674
|
+
* If the output is an array, returns its length.
|
|
675
|
+
* If it has a "count" / "total" / "length" field, returns that.
|
|
676
|
+
* Otherwise returns null.
|
|
677
|
+
*/
|
|
678
|
+
function extractCountFromOutput(output) {
|
|
679
|
+
if (Array.isArray(output))
|
|
680
|
+
return output.length;
|
|
681
|
+
if (output !== null && typeof output === 'object') {
|
|
682
|
+
const obj = output;
|
|
683
|
+
for (const key of ['count', 'total', 'length', 'size', 'num', 'number']) {
|
|
684
|
+
if (typeof obj[key] === 'number')
|
|
685
|
+
return obj[key];
|
|
686
|
+
}
|
|
687
|
+
}
|
|
688
|
+
return null;
|
|
689
|
+
}
|
|
690
|
+
//# sourceMappingURL=index.js.map
|