mrz-genius 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +270 -0
- package/package.json +47 -0
- package/src/detector/mrzDetector.js +214 -0
- package/src/index.d.ts +141 -0
- package/src/index.js +150 -0
- package/src/ocr/llmExtractor.js +146 -0
- package/src/ocr/mrzOCR.js +489 -0
- package/src/parser/checkDigit.js +84 -0
- package/src/parser/fieldPositions.js +122 -0
- package/src/parser/mrzParser.js +487 -0
- package/src/parser/ocrCorrector.js +172 -0
|
@@ -0,0 +1,487 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MRZ Parser
|
|
3
|
+
* Parses MRZ strings for TD1, TD2, TD3, MRVA, MRVB document types
|
|
4
|
+
* Inspired by MRZParser Swift library
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
'use strict';
|
|
8
|
+
|
|
9
|
+
const { MRZ_FORMATS, getFieldPositions } = require('./fieldPositions');
|
|
10
|
+
const { calculateCheckDigit, isCheckDigitValid, isCompositeValid } = require('./checkDigit');
|
|
11
|
+
const { correctOCR, findMatchingStrings, repairFieldWithCheckDigit, repairIvorianDocumentNumber } = require('./ocrCorrector');
|
|
12
|
+
|
|
13
|
+
// ──────────────────────────────────────────────────────────
|
|
14
|
+
// Format detection
|
|
15
|
+
// ──────────────────────────────────────────────────────────
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Detect MRZ format from lines
|
|
19
|
+
* @param {string[]} lines - MRZ lines
|
|
20
|
+
* @returns {string|null} Format name or null
|
|
21
|
+
*/
|
|
22
|
+
function detectFormat(lines) {
|
|
23
|
+
if (!lines || lines.length === 0) return null;
|
|
24
|
+
|
|
25
|
+
const lineCount = lines.length;
|
|
26
|
+
const lineLength = lines[0].length;
|
|
27
|
+
|
|
28
|
+
// All lines must have equal length
|
|
29
|
+
if (!lines.every(l => l.length === lineLength)) return null;
|
|
30
|
+
|
|
31
|
+
const firstChar = lines[0][0];
|
|
32
|
+
const isVisa = firstChar === 'V';
|
|
33
|
+
|
|
34
|
+
if (lineCount === 3 && lineLength === 30) return 'TD1';
|
|
35
|
+
if (lineCount === 2 && lineLength === 36) return isVisa ? 'MRVB' : 'TD2';
|
|
36
|
+
if (lineCount === 2 && lineLength === 44) return isVisa ? 'MRVA' : 'TD3';
|
|
37
|
+
|
|
38
|
+
return null;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Try to auto-split a single MRZ string (concatenated lines) into proper lines
|
|
43
|
+
* Handles cases where MRZ is provided as a single string without line breaks
|
|
44
|
+
* @param {string} mrzString - Single MRZ string (possibly concatenated)
|
|
45
|
+
* @returns {string[]|null} Array of lines or null if cannot determine format
|
|
46
|
+
*/
|
|
47
|
+
function autoSplitMRZ(mrzString) {
|
|
48
|
+
const cleaned = mrzString.replace(/\s+/g, '').toUpperCase();
|
|
49
|
+
const len = cleaned.length;
|
|
50
|
+
|
|
51
|
+
// TD1: 3 lines × 30 = 90 characters
|
|
52
|
+
if (len === 90) {
|
|
53
|
+
return [
|
|
54
|
+
cleaned.substring(0, 30),
|
|
55
|
+
cleaned.substring(30, 60),
|
|
56
|
+
cleaned.substring(60, 90),
|
|
57
|
+
];
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// TD2/MRVB: 2 lines × 36 = 72 characters
|
|
61
|
+
if (len === 72) {
|
|
62
|
+
return [
|
|
63
|
+
cleaned.substring(0, 36),
|
|
64
|
+
cleaned.substring(36, 72),
|
|
65
|
+
];
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// TD3/MRVA: 2 lines × 44 = 88 characters
|
|
69
|
+
if (len === 88) {
|
|
70
|
+
return [
|
|
71
|
+
cleaned.substring(0, 44),
|
|
72
|
+
cleaned.substring(44, 88),
|
|
73
|
+
];
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Try fuzzy matching with tolerance ±2 chars (OCR sometimes adds/drops chars)
|
|
77
|
+
if (len >= 88 && len <= 92) {
|
|
78
|
+
// Likely TD1 (90)
|
|
79
|
+
return [
|
|
80
|
+
cleaned.substring(0, 30),
|
|
81
|
+
cleaned.substring(30, 60),
|
|
82
|
+
cleaned.substring(60),
|
|
83
|
+
].map(l => l.substring(0, 30).padEnd(30, '<'));
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
if (len >= 70 && len <= 74) {
|
|
87
|
+
// Likely TD2/MRVB (72)
|
|
88
|
+
return [
|
|
89
|
+
cleaned.substring(0, 36),
|
|
90
|
+
cleaned.substring(36),
|
|
91
|
+
].map(l => l.substring(0, 36).padEnd(36, '<'));
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
if (len >= 86 && len <= 90 && len !== 90) {
|
|
95
|
+
// Likely TD3/MRVA (88) — avoid conflicting with TD1 (90)
|
|
96
|
+
return [
|
|
97
|
+
cleaned.substring(0, 44),
|
|
98
|
+
cleaned.substring(44),
|
|
99
|
+
].map(l => l.substring(0, 44).padEnd(44, '<'));
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
return null;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// ──────────────────────────────────────────────────────────
|
|
106
|
+
// Field extraction
|
|
107
|
+
// ──────────────────────────────────────────────────────────
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Extract raw field value from MRZ lines
|
|
111
|
+
* @param {string[]} lines - MRZ lines
|
|
112
|
+
* @param {Object} position - Field position definition
|
|
113
|
+
* @returns {string} Raw field value
|
|
114
|
+
*/
|
|
115
|
+
function extractRawField(lines, position) {
|
|
116
|
+
const line = lines[position.line];
|
|
117
|
+
if (!line) return '';
|
|
118
|
+
return line.substring(position.start, position.end);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Extract check digit following a field
|
|
123
|
+
* In MRZ, '<' is treated as 0 for check digit positions
|
|
124
|
+
* @param {string[]} lines - MRZ lines
|
|
125
|
+
* @param {Object} position - Field position
|
|
126
|
+
* @returns {number|null} Check digit or null
|
|
127
|
+
*/
|
|
128
|
+
function extractCheckDigit(lines, position) {
|
|
129
|
+
if (!position.hasCheckDigit) return null;
|
|
130
|
+
const line = lines[position.line];
|
|
131
|
+
if (!line) return null;
|
|
132
|
+
const char = line[position.end];
|
|
133
|
+
if (char === undefined) return null;
|
|
134
|
+
|
|
135
|
+
// '<' in check digit position = 0
|
|
136
|
+
if (char === '<') return 0;
|
|
137
|
+
|
|
138
|
+
const digit = parseInt(char, 10);
|
|
139
|
+
return isNaN(digit) ? null : digit;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Clean filler characters from a field value
|
|
144
|
+
* @param {string} value - Raw value
|
|
145
|
+
* @returns {string|null} Clean value or null if empty
|
|
146
|
+
*/
|
|
147
|
+
function cleanFieldValue(value) {
|
|
148
|
+
if (value === null || value === undefined) return null;
|
|
149
|
+
const cleaned = value.replace(/^<+|<+$/g, '').replace(/</g, ' ');
|
|
150
|
+
return cleaned.length > 0 ? cleaned : null;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// ──────────────────────────────────────────────────────────
|
|
154
|
+
// Name parsing
|
|
155
|
+
// ──────────────────────────────────────────────────────────
|
|
156
|
+
|
|
157
|
+
/**
|
|
158
|
+
* Parse name field into surname and given names
|
|
159
|
+
* @param {string} rawName - Raw name field value
|
|
160
|
+
* @returns {{ surname: string, givenNames: string|null }}
|
|
161
|
+
*/
|
|
162
|
+
function parseName(rawName) {
|
|
163
|
+
if (!rawName) return { surname: null, givenNames: null };
|
|
164
|
+
const trimmed = rawName.replace(/^<+|<+$/g, '');
|
|
165
|
+
const parts = trimmed.split('<<');
|
|
166
|
+
const surname = (parts[0] || '').replace(/</g, ' ').trim();
|
|
167
|
+
const givenNames = parts.length > 1 ? parts[1].replace(/</g, ' ').trim() : null;
|
|
168
|
+
return {
|
|
169
|
+
surname: surname || null,
|
|
170
|
+
givenNames: givenNames || null,
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
// ──────────────────────────────────────────────────────────
|
|
175
|
+
// Date parsing
|
|
176
|
+
// ──────────────────────────────────────────────────────────
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Parse a 6-digit MRZ date (YYMMDD) into a Date object
|
|
180
|
+
* @param {string} raw - Raw 6-digit string
|
|
181
|
+
* @param {'birth'|'expiry'} type - Date type for century determination
|
|
182
|
+
* @returns {Date|null} Parsed date or null
|
|
183
|
+
*/
|
|
184
|
+
function parseDate(raw, type) {
|
|
185
|
+
if (!raw || raw.length !== 6) return null;
|
|
186
|
+
if (raw === '<<<<<<') return null;
|
|
187
|
+
|
|
188
|
+
const yy = parseInt(raw.substring(0, 2), 10);
|
|
189
|
+
const mm = parseInt(raw.substring(2, 4), 10);
|
|
190
|
+
const dd = parseInt(raw.substring(4, 6), 10);
|
|
191
|
+
|
|
192
|
+
if (isNaN(yy) || isNaN(mm) || isNaN(dd)) return null;
|
|
193
|
+
if (mm < 1 || mm > 12 || dd < 1 || dd > 31) return null;
|
|
194
|
+
|
|
195
|
+
const currentYear = new Date().getFullYear();
|
|
196
|
+
const currentYY = currentYear % 100;
|
|
197
|
+
|
|
198
|
+
let century;
|
|
199
|
+
if (type === 'birth') {
|
|
200
|
+
century = yy > currentYY ? 1900 : 2000;
|
|
201
|
+
} else {
|
|
202
|
+
// Expiry: if >= currentYY + 50, assume previous century
|
|
203
|
+
century = yy >= (currentYY + 50) ? 1900 : 2000;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
const year = century + yy;
|
|
207
|
+
return new Date(Date.UTC(year, mm - 1, dd));
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
/**
|
|
211
|
+
* Format date as YYYY-MM-DD string
|
|
212
|
+
* @param {Date} date
|
|
213
|
+
* @returns {string}
|
|
214
|
+
*/
|
|
215
|
+
function formatDate(date) {
|
|
216
|
+
if (!date) return null;
|
|
217
|
+
const y = date.getUTCFullYear();
|
|
218
|
+
const m = String(date.getUTCMonth() + 1).padStart(2, '0');
|
|
219
|
+
const d = String(date.getUTCDate()).padStart(2, '0');
|
|
220
|
+
return `${y}-${m}-${d}`;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// ──────────────────────────────────────────────────────────
|
|
224
|
+
// Sex parsing
|
|
225
|
+
// ──────────────────────────────────────────────────────────
|
|
226
|
+
|
|
227
|
+
/**
|
|
228
|
+
* Parse sex field
|
|
229
|
+
* @param {string} rawSex
|
|
230
|
+
* @returns {string}
|
|
231
|
+
*/
|
|
232
|
+
function parseSex(rawSex) {
|
|
233
|
+
const char = (rawSex || '').replace(/</g, '').trim();
|
|
234
|
+
switch (char) {
|
|
235
|
+
case 'M': return 'MALE';
|
|
236
|
+
case 'F': return 'FEMALE';
|
|
237
|
+
case 'X': return 'NON_BINARY';
|
|
238
|
+
default: return 'UNSPECIFIED';
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
// ──────────────────────────────────────────────────────────
|
|
243
|
+
// Document type parsing
|
|
244
|
+
// ──────────────────────────────────────────────────────────
|
|
245
|
+
|
|
246
|
+
/**
|
|
247
|
+
* Parse document type from the first character
|
|
248
|
+
* @param {string} rawType
|
|
249
|
+
* @returns {string}
|
|
250
|
+
*/
|
|
251
|
+
function parseDocumentType(rawType) {
|
|
252
|
+
const char = (rawType || '').replace(/</g, '').trim();
|
|
253
|
+
switch (char) {
|
|
254
|
+
case 'P': return 'PASSPORT';
|
|
255
|
+
case 'V': return 'VISA';
|
|
256
|
+
case 'I':
|
|
257
|
+
case 'A':
|
|
258
|
+
case 'C': return 'ID_CARD';
|
|
259
|
+
default: return char || 'UNKNOWN';
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
// ──────────────────────────────────────────────────────────
|
|
264
|
+
// Main parser
|
|
265
|
+
// ──────────────────────────────────────────────────────────
|
|
266
|
+
|
|
267
|
+
/**
|
|
268
|
+
* Parse MRZ string or lines
|
|
269
|
+
* @param {string|string[]} input - MRZ string (multi-line) or array of lines
|
|
270
|
+
* @param {Object} [options] - Parser options
|
|
271
|
+
* @param {boolean} [options.ocrCorrection=false] - Enable OCR error correction
|
|
272
|
+
* @returns {MRZResult|null} Parsed result or null if format unrecognized
|
|
273
|
+
*/
|
|
274
|
+
function parse(input, options = {}) {
|
|
275
|
+
const { ocrCorrection = false } = options;
|
|
276
|
+
|
|
277
|
+
// Normalize input
|
|
278
|
+
let lines;
|
|
279
|
+
if (typeof input === 'string') {
|
|
280
|
+
lines = input.trim().split(/[\n\r]+/).map(l => l.trim().toUpperCase());
|
|
281
|
+
} else if (Array.isArray(input)) {
|
|
282
|
+
lines = input.map(l => l.trim().toUpperCase());
|
|
283
|
+
} else {
|
|
284
|
+
return null;
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
// Remove empty lines and spaces
|
|
288
|
+
lines = lines.filter(l => l.length > 0)
|
|
289
|
+
.map(l => l.replace(/\s+/g, ''));
|
|
290
|
+
|
|
291
|
+
// If the total length matches a known MRZ size, join them and auto-split.
|
|
292
|
+
// This gracefully handles arrays where lines were clumped together (e.g. [30, 60] -> [30, 30, 30])
|
|
293
|
+
const totalLength = lines.reduce((sum, l) => sum + l.length, 0);
|
|
294
|
+
if ([72, 88, 90].includes(totalLength)) {
|
|
295
|
+
const autoSplit = autoSplitMRZ(lines.join(''));
|
|
296
|
+
if (autoSplit) {
|
|
297
|
+
lines = autoSplit;
|
|
298
|
+
}
|
|
299
|
+
} else if (lines.length === 1 && lines[0].length > 44) {
|
|
300
|
+
const autoSplit = autoSplitMRZ(lines[0]);
|
|
301
|
+
if (autoSplit) {
|
|
302
|
+
lines = autoSplit;
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
// Also try removing spaces from lines (OCR artifact)
|
|
307
|
+
lines = lines.map(l => l.replace(/\s+/g, ''));
|
|
308
|
+
|
|
309
|
+
// Detect format
|
|
310
|
+
let format = detectFormat(lines);
|
|
311
|
+
|
|
312
|
+
// If format not detected, try normalizing line lengths
|
|
313
|
+
if (!format && lines.length >= 2) {
|
|
314
|
+
const maxLen = Math.max(...lines.map(l => l.length));
|
|
315
|
+
// Try padding shorter lines
|
|
316
|
+
const padded = lines.map(l => l.padEnd(maxLen, '<'));
|
|
317
|
+
format = detectFormat(padded);
|
|
318
|
+
if (format) lines = padded;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
if (!format) return null;
|
|
322
|
+
|
|
323
|
+
// Get field positions
|
|
324
|
+
const positions = getFieldPositions(format);
|
|
325
|
+
if (!positions) return null;
|
|
326
|
+
|
|
327
|
+
// ─── Extract fields ───
|
|
328
|
+
const rawDocType = extractRawField(lines, positions.documentType);
|
|
329
|
+
const rawIssuingCountry = extractRawField(lines, positions.issuingCountry);
|
|
330
|
+
const rawName = extractRawField(lines, positions.name);
|
|
331
|
+
const rawDocNumber = extractRawField(lines, positions.documentNumber);
|
|
332
|
+
const rawNationality = extractRawField(lines, positions.nationality);
|
|
333
|
+
const rawBirthDate = extractRawField(lines, positions.birthDate);
|
|
334
|
+
const rawSex = extractRawField(lines, positions.sex);
|
|
335
|
+
const rawExpiryDate = extractRawField(lines, positions.expiryDate);
|
|
336
|
+
|
|
337
|
+
// Optional data
|
|
338
|
+
const rawOptionalData1 = positions.optionalData1 ? extractRawField(lines, positions.optionalData1) : null;
|
|
339
|
+
const rawOptionalData2 = positions.optionalData2 ? extractRawField(lines, positions.optionalData2) : null;
|
|
340
|
+
|
|
341
|
+
// ─── OCR correction on individual fields ───
|
|
342
|
+
const correctedIssuingCountry = ocrCorrection ? correctOCR(rawIssuingCountry, 'letters') : rawIssuingCountry;
|
|
343
|
+
const correctedNationality = ocrCorrection ? correctOCR(rawNationality, 'letters') : rawNationality;
|
|
344
|
+
|
|
345
|
+
let correctedDocNumber = ocrCorrection ? repairIvorianDocumentNumber(rawDocNumber, correctedIssuingCountry) : rawDocNumber;
|
|
346
|
+
let correctedBirthDate = ocrCorrection ? correctOCR(rawBirthDate, 'digits') : rawBirthDate;
|
|
347
|
+
let correctedExpiryDate = ocrCorrection ? correctOCR(rawExpiryDate, 'digits') : rawExpiryDate;
|
|
348
|
+
const correctedSex = ocrCorrection ? correctOCR(rawSex, 'sex') : rawSex;
|
|
349
|
+
|
|
350
|
+
// ─── Check digits ───
|
|
351
|
+
let docNumberCheckDigit = extractCheckDigit(lines, positions.documentNumber);
|
|
352
|
+
let birthDateCheckDigit = extractCheckDigit(lines, positions.birthDate);
|
|
353
|
+
let expiryDateCheckDigit = extractCheckDigit(lines, positions.expiryDate);
|
|
354
|
+
|
|
355
|
+
let docNumberValid = docNumberCheckDigit !== null ? isCheckDigitValid(correctedDocNumber, docNumberCheckDigit) : true;
|
|
356
|
+
let birthDateValid = birthDateCheckDigit !== null ? isCheckDigitValid(correctedBirthDate, birthDateCheckDigit) : true;
|
|
357
|
+
let expiryDateValid = expiryDateCheckDigit !== null ? isCheckDigitValid(correctedExpiryDate, expiryDateCheckDigit) : true;
|
|
358
|
+
|
|
359
|
+
if (ocrCorrection) {
|
|
360
|
+
if (!docNumberValid && docNumberCheckDigit !== null) {
|
|
361
|
+
const rep = repairFieldWithCheckDigit(correctedDocNumber, docNumberCheckDigit);
|
|
362
|
+
correctedDocNumber = rep.value; docNumberCheckDigit = rep.checkDigit; docNumberValid = rep.valid;
|
|
363
|
+
}
|
|
364
|
+
if (!birthDateValid && birthDateCheckDigit !== null) {
|
|
365
|
+
const rep = repairFieldWithCheckDigit(correctedBirthDate, birthDateCheckDigit);
|
|
366
|
+
correctedBirthDate = rep.value; birthDateCheckDigit = rep.checkDigit; birthDateValid = rep.valid;
|
|
367
|
+
}
|
|
368
|
+
if (!expiryDateValid && expiryDateCheckDigit !== null) {
|
|
369
|
+
const rep = repairFieldWithCheckDigit(correctedExpiryDate, expiryDateCheckDigit);
|
|
370
|
+
correctedExpiryDate = rep.value; expiryDateCheckDigit = rep.checkDigit; expiryDateValid = rep.valid;
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
// specific hack for Côte d'Ivoire where check digit equals 0 (filler `<`) incorrectly marked invalid due to standard check
|
|
374
|
+
if (correctedIssuingCountry === 'CIV' && docNumberCheckDigit === 0 && !docNumberValid) {
|
|
375
|
+
docNumberValid = true;
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
// Optional data 1 check digit (TD3 only)
|
|
380
|
+
let optionalData1CheckDigit = null;
|
|
381
|
+
let optionalData1Valid = true;
|
|
382
|
+
if (positions.optionalData1 && positions.optionalData1.hasCheckDigit) {
|
|
383
|
+
optionalData1CheckDigit = extractCheckDigit(lines, positions.optionalData1);
|
|
384
|
+
if (optionalData1CheckDigit !== null) {
|
|
385
|
+
optionalData1Valid = isCheckDigitValid(rawOptionalData1, optionalData1CheckDigit);
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
// Final composite check digit
|
|
390
|
+
let finalCheckDigitValid = true;
|
|
391
|
+
let finalCheckDigitValue = null;
|
|
392
|
+
if (positions.finalCheckDigit) {
|
|
393
|
+
const rawFinal = extractRawField(lines, positions.finalCheckDigit);
|
|
394
|
+
finalCheckDigitValue = rawFinal === '<' ? 0 : parseInt(rawFinal, 10);
|
|
395
|
+
|
|
396
|
+
if (finalCheckDigitValue !== null && !isNaN(finalCheckDigitValue)) {
|
|
397
|
+
// Build composite fields based on format
|
|
398
|
+
let compositeFields;
|
|
399
|
+
if (format === 'TD1') {
|
|
400
|
+
compositeFields = [
|
|
401
|
+
{ rawValue: correctedDocNumber, checkDigit: docNumberCheckDigit },
|
|
402
|
+
{ rawValue: rawOptionalData1 || '', checkDigit: null },
|
|
403
|
+
{ rawValue: correctedBirthDate, checkDigit: birthDateCheckDigit },
|
|
404
|
+
{ rawValue: correctedExpiryDate, checkDigit: expiryDateCheckDigit },
|
|
405
|
+
{ rawValue: rawOptionalData2 || '', checkDigit: null },
|
|
406
|
+
];
|
|
407
|
+
} else {
|
|
408
|
+
// TD2, TD3
|
|
409
|
+
compositeFields = [
|
|
410
|
+
{ rawValue: correctedDocNumber, checkDigit: docNumberCheckDigit },
|
|
411
|
+
{ rawValue: correctedBirthDate, checkDigit: birthDateCheckDigit },
|
|
412
|
+
{ rawValue: correctedExpiryDate, checkDigit: expiryDateCheckDigit },
|
|
413
|
+
];
|
|
414
|
+
if (format === 'TD3' && rawOptionalData1) {
|
|
415
|
+
compositeFields.push({
|
|
416
|
+
rawValue: rawOptionalData1,
|
|
417
|
+
checkDigit: optionalData1CheckDigit,
|
|
418
|
+
});
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
finalCheckDigitValid = isCompositeValid(compositeFields, finalCheckDigitValue);
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
// ─── Parse values ───
|
|
427
|
+
const documentType = parseDocumentType(rawDocType);
|
|
428
|
+
const name = parseName(rawName);
|
|
429
|
+
const birthDate = parseDate(correctedBirthDate, 'birth');
|
|
430
|
+
const expiryDate = parseDate(correctedExpiryDate, 'expiry');
|
|
431
|
+
const sex = parseSex(correctedSex);
|
|
432
|
+
|
|
433
|
+
const allChecksValid = docNumberValid && birthDateValid && expiryDateValid && optionalData1Valid && finalCheckDigitValid;
|
|
434
|
+
|
|
435
|
+
return {
|
|
436
|
+
valid: allChecksValid,
|
|
437
|
+
format,
|
|
438
|
+
documentType,
|
|
439
|
+
issuingCountry: cleanFieldValue(correctedIssuingCountry),
|
|
440
|
+
surname: name.surname,
|
|
441
|
+
givenNames: name.givenNames,
|
|
442
|
+
documentNumber: cleanFieldValue(correctedDocNumber),
|
|
443
|
+
nationality: cleanFieldValue(correctedNationality),
|
|
444
|
+
birthDate,
|
|
445
|
+
birthDateFormatted: formatDate(birthDate),
|
|
446
|
+
sex,
|
|
447
|
+
expiryDate,
|
|
448
|
+
expiryDateFormatted: formatDate(expiryDate),
|
|
449
|
+
optionalData1: cleanFieldValue(rawOptionalData1),
|
|
450
|
+
optionalData2: cleanFieldValue(rawOptionalData2),
|
|
451
|
+
mrzKey: buildMrzKey(correctedDocNumber, docNumberCheckDigit, correctedBirthDate, birthDateCheckDigit, correctedExpiryDate, expiryDateCheckDigit),
|
|
452
|
+
rawMRZ: lines.join('\n'),
|
|
453
|
+
details: {
|
|
454
|
+
fields: {
|
|
455
|
+
documentNumber: { raw: rawDocNumber, checkDigit: docNumberCheckDigit, valid: docNumberValid },
|
|
456
|
+
birthDate: { raw: rawBirthDate, checkDigit: birthDateCheckDigit, valid: birthDateValid },
|
|
457
|
+
expiryDate: { raw: rawExpiryDate, checkDigit: expiryDateCheckDigit, valid: expiryDateValid },
|
|
458
|
+
optionalData1: { raw: rawOptionalData1, checkDigit: optionalData1CheckDigit, valid: optionalData1Valid },
|
|
459
|
+
},
|
|
460
|
+
finalCheckDigitValid,
|
|
461
|
+
},
|
|
462
|
+
};
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
/**
|
|
466
|
+
* Build MRZ key (used for BAC in e-Passports)
|
|
467
|
+
*/
|
|
468
|
+
function buildMrzKey(docNum, docCheckDigit, birthDate, birthCheckDigit, expiryDate, expiryCheckDigit) {
|
|
469
|
+
let key = docNum;
|
|
470
|
+
if (docCheckDigit !== null && docCheckDigit !== undefined) key += String(docCheckDigit);
|
|
471
|
+
key += birthDate;
|
|
472
|
+
if (birthCheckDigit !== null && birthCheckDigit !== undefined) key += String(birthCheckDigit);
|
|
473
|
+
key += expiryDate;
|
|
474
|
+
if (expiryCheckDigit !== null && expiryCheckDigit !== undefined) key += String(expiryCheckDigit);
|
|
475
|
+
return key;
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
module.exports = {
|
|
479
|
+
parse,
|
|
480
|
+
detectFormat,
|
|
481
|
+
autoSplitMRZ,
|
|
482
|
+
parseName,
|
|
483
|
+
parseDate,
|
|
484
|
+
formatDate,
|
|
485
|
+
parseSex,
|
|
486
|
+
parseDocumentType,
|
|
487
|
+
};
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OCR Error Corrector
|
|
3
|
+
* Handles common OCR misreadings in MRZ strings
|
|
4
|
+
* Inspired by MRZParser's OCRCorrector.swift
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
'use strict';
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Common OCR character replacements based on content type
|
|
11
|
+
*/
|
|
12
|
+
const CORRECTIONS = {
|
|
13
|
+
digits: {
|
|
14
|
+
'O': '0', 'Q': '0', 'U': '0', 'D': '0',
|
|
15
|
+
'I': '1', 'L': '1',
|
|
16
|
+
'Z': '2',
|
|
17
|
+
'S': '5',
|
|
18
|
+
'G': '6',
|
|
19
|
+
'B': '8',
|
|
20
|
+
},
|
|
21
|
+
letters: {
|
|
22
|
+
'0': 'O',
|
|
23
|
+
'1': 'I',
|
|
24
|
+
'2': 'Z',
|
|
25
|
+
'8': 'B',
|
|
26
|
+
'5': 'S',
|
|
27
|
+
'6': 'G',
|
|
28
|
+
},
|
|
29
|
+
sex: {
|
|
30
|
+
'P': 'F',
|
|
31
|
+
},
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Correct OCR errors based on the expected content type
|
|
36
|
+
* @param {string} str - String to correct
|
|
37
|
+
* @param {'digits'|'letters'|'sex'|'mixed'} contentType - Expected content type
|
|
38
|
+
* @returns {string} Corrected string
|
|
39
|
+
*/
|
|
40
|
+
function correctOCR(str, contentType) {
|
|
41
|
+
if (contentType === 'mixed') return str;
|
|
42
|
+
|
|
43
|
+
const corrections = CORRECTIONS[contentType];
|
|
44
|
+
if (!corrections) return str;
|
|
45
|
+
|
|
46
|
+
let result = '';
|
|
47
|
+
for (const char of str) {
|
|
48
|
+
result += corrections[char] || char;
|
|
49
|
+
}
|
|
50
|
+
return result;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Get all possible character transformations for a character
|
|
55
|
+
* @param {string} char - Character to transform
|
|
56
|
+
* @returns {string[]} Array of possible characters
|
|
57
|
+
*/
|
|
58
|
+
function getTransformedChars(char) {
|
|
59
|
+
const digitsReplaced = CORRECTIONS.digits[char] || char;
|
|
60
|
+
const lettersReplaced = CORRECTIONS.letters[char] || char;
|
|
61
|
+
const results = new Set([char, digitsReplaced, lettersReplaced]);
|
|
62
|
+
return [...results];
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Brute-force OCR correction: try all possible character combinations
|
|
67
|
+
* to find a combination that satisfies a given validation function.
|
|
68
|
+
* @param {string[]} strings - Array of strings to correct
|
|
69
|
+
* @param {function(string[]): boolean} isCorrectCombination - Validation function
|
|
70
|
+
* @returns {string[]|null} Corrected strings or null if no valid combination found
|
|
71
|
+
*/
|
|
72
|
+
function findMatchingStrings(strings, isCorrectCombination) {
|
|
73
|
+
const stringsArray = strings.map(s => [...s]);
|
|
74
|
+
let result = null;
|
|
75
|
+
|
|
76
|
+
function dfs(stringIndex) {
|
|
77
|
+
if (stringIndex === stringsArray.length) {
|
|
78
|
+
const currentCombination = stringsArray.map(arr => arr.join(''));
|
|
79
|
+
if (isCorrectCombination(currentCombination)) {
|
|
80
|
+
result = currentCombination;
|
|
81
|
+
return true;
|
|
82
|
+
}
|
|
83
|
+
return false;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
for (let charIndex = 0; charIndex < stringsArray[stringIndex].length; charIndex++) {
|
|
87
|
+
const originalChar = stringsArray[stringIndex][charIndex];
|
|
88
|
+
const replacements = getTransformedChars(originalChar);
|
|
89
|
+
|
|
90
|
+
for (const replacement of replacements) {
|
|
91
|
+
stringsArray[stringIndex][charIndex] = replacement;
|
|
92
|
+
if (dfs(stringIndex + 1)) return true;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
stringsArray[stringIndex][charIndex] = originalChar;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
return false;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
return dfs(0) ? result : null;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function repairIvorianDocumentNumber(docNum, issuingCountry) {
|
|
105
|
+
if (issuingCountry !== 'CIV') return docNum;
|
|
106
|
+
if (!docNum || docNum.length < 9) return docNum;
|
|
107
|
+
|
|
108
|
+
// Format is CI followed by 7 digits
|
|
109
|
+
if (docNum.startsWith('CI')) {
|
|
110
|
+
const prefix = 'CI';
|
|
111
|
+
const numPart = docNum.substring(2);
|
|
112
|
+
// Force correct digits in numPart
|
|
113
|
+
return prefix + correctOCR(numPart, 'digits');
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// If it starts with C. (e.g., C1)
|
|
117
|
+
if (docNum[0] === 'C') {
|
|
118
|
+
const numPart = docNum.substring(2);
|
|
119
|
+
return 'CI' + correctOCR(numPart, 'digits');
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
return docNum;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Brute force repair a field and its check digit.
|
|
127
|
+
* Assumes either the field has ONE mistyped char OR the check digit is mistyped.
|
|
128
|
+
*/
|
|
129
|
+
function repairFieldWithCheckDigit(rawValue, rawCheckDigit) {
|
|
130
|
+
if (!rawValue || rawCheckDigit === null) return { value: rawValue, checkDigit: rawCheckDigit, valid: false };
|
|
131
|
+
|
|
132
|
+
// First check if already valid
|
|
133
|
+
const { calculateCheckDigit } = require('./checkDigit'); // dynamic require to avoid circular dep if any
|
|
134
|
+
if (calculateCheckDigit(rawValue) === parseInt(rawCheckDigit, 10)) {
|
|
135
|
+
return { value: rawValue, checkDigit: rawCheckDigit, valid: true };
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// Try modifying the value
|
|
139
|
+
const valArr = [...rawValue];
|
|
140
|
+
for (let i = 0; i < valArr.length; i++) {
|
|
141
|
+
const original = valArr[i];
|
|
142
|
+
const alts = getTransformedChars(original);
|
|
143
|
+
for (const alt of alts) {
|
|
144
|
+
if (alt === original) continue;
|
|
145
|
+
valArr[i] = alt;
|
|
146
|
+
const testVal = valArr.join('');
|
|
147
|
+
if (calculateCheckDigit(testVal) === parseInt(rawCheckDigit, 10)) {
|
|
148
|
+
return { value: testVal, checkDigit: rawCheckDigit, valid: true };
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
valArr[i] = original; // backtrack
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// Try modifying the check digit
|
|
155
|
+
const cdAlts = getTransformedChars(String(rawCheckDigit));
|
|
156
|
+
const baseCalc = calculateCheckDigit(rawValue);
|
|
157
|
+
for (const alt of cdAlts) {
|
|
158
|
+
if (alt !== String(rawCheckDigit) && parseInt(alt, 10) === baseCalc) {
|
|
159
|
+
return { value: rawValue, checkDigit: parseInt(alt, 10), valid: true };
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
return { value: rawValue, checkDigit: rawCheckDigit, valid: false };
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
module.exports = {
|
|
167
|
+
correctOCR,
|
|
168
|
+
getTransformedChars,
|
|
169
|
+
findMatchingStrings,
|
|
170
|
+
repairFieldWithCheckDigit,
|
|
171
|
+
repairIvorianDocumentNumber
|
|
172
|
+
};
|