noah-avalanche-sdk 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/utils/mrz.d.ts.map +1 -1
- package/dist/utils/mrz.js +90 -10
- package/dist/utils/mrz.js.map +1 -1
- package/dist/utils/ocr.d.ts.map +1 -1
- package/dist/utils/ocr.js +6 -3
- package/dist/utils/ocr.js.map +1 -1
- package/package.json +1 -1
- package/src/utils/mrz.ts +86 -10
- package/src/utils/ocr.ts +7 -3
package/dist/utils/mrz.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"mrz.d.ts","sourceRoot":"","sources":["../../src/utils/mrz.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,OAAO;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,cAAc,EAAE,MAAM,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,IAAI,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,IAAI,CAAC;IACjB,cAAc,EAAE,MAAM,CAAC;IACvB,GAAG,EAAE,MAAM,CAAC;CACf;AAED,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,OAAO,CAgB3E;
|
|
1
|
+
{"version":3,"file":"mrz.d.ts","sourceRoot":"","sources":["../../src/utils/mrz.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,OAAO;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,cAAc,EAAE,MAAM,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,IAAI,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,IAAI,CAAC;IACjB,cAAc,EAAE,MAAM,CAAC;IACvB,GAAG,EAAE,MAAM,CAAC;CACf;AAED,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,OAAO,CAgB3E;AA8ED,wBAAgB,QAAQ,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAgF9D"}
|
package/dist/utils/mrz.js
CHANGED
|
@@ -37,9 +37,76 @@ function parseDate(str, isDOB = false) {
|
|
|
37
37
|
}
|
|
38
38
|
return new Date(year, month, day);
|
|
39
39
|
}
|
|
40
|
+
function tryCorrect(str, checkDigit, isNumericOnly = false) {
|
|
41
|
+
if (validateCheckDigit(str, checkDigit))
|
|
42
|
+
return str;
|
|
43
|
+
const substitutions = {
|
|
44
|
+
'8': ['B'], 'B': ['8'],
|
|
45
|
+
'0': ['O', 'Q', 'D'], 'O': ['0'], 'Q': ['0'], 'D': ['0'],
|
|
46
|
+
'I': ['1'], '1': ['I'],
|
|
47
|
+
'Z': ['2'], '2': ['Z'],
|
|
48
|
+
'S': ['5'], '5': ['S']
|
|
49
|
+
};
|
|
50
|
+
// Try substituting one character at a time in the string
|
|
51
|
+
for (let i = 0; i < str.length; i++) {
|
|
52
|
+
const char = str[i];
|
|
53
|
+
if (substitutions[char]) {
|
|
54
|
+
for (const sub of substitutions[char]) {
|
|
55
|
+
const candidate = str.substring(0, i) + sub + str.substring(i + 1);
|
|
56
|
+
if (validateCheckDigit(candidate, checkDigit)) {
|
|
57
|
+
if (isNumericOnly && /[A-Z]/.test(candidate))
|
|
58
|
+
continue;
|
|
59
|
+
return candidate;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
// Checking if the check digit itself was read wrong
|
|
65
|
+
if (substitutions[checkDigit]) {
|
|
66
|
+
for (const sub of substitutions[checkDigit]) {
|
|
67
|
+
if (/[0-9]/.test(sub) && validateCheckDigit(str, sub)) {
|
|
68
|
+
return str;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
// Try normalising everything to numbers if strictly numeric
|
|
73
|
+
if (isNumericOnly) {
|
|
74
|
+
let numericCandidate = "";
|
|
75
|
+
for (let i = 0; i < str.length; i++) {
|
|
76
|
+
let c = str[i];
|
|
77
|
+
if (c === 'O' || c === 'D' || c === 'Q')
|
|
78
|
+
c = '0';
|
|
79
|
+
else if (c === 'I')
|
|
80
|
+
c = '1';
|
|
81
|
+
else if (c === 'Z')
|
|
82
|
+
c = '2';
|
|
83
|
+
else if (c === 'S')
|
|
84
|
+
c = '5';
|
|
85
|
+
else if (c === 'B')
|
|
86
|
+
c = '8';
|
|
87
|
+
numericCandidate += c;
|
|
88
|
+
}
|
|
89
|
+
if (validateCheckDigit(numericCandidate, checkDigit) && !/[A-Z]/.test(numericCandidate)) {
|
|
90
|
+
return numericCandidate;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
return null;
|
|
94
|
+
}
|
|
40
95
|
export function parseTD3(line1, line2) {
|
|
96
|
+
// Pad dropped chevrons if length is short (OCR filter ensures it's at least ~34 chars)
|
|
97
|
+
if (line1.length < 44) {
|
|
98
|
+
line1 = line1.padEnd(44, '<');
|
|
99
|
+
}
|
|
100
|
+
if (line2.length < 44) {
|
|
101
|
+
line2 = line2.padEnd(44, '<');
|
|
102
|
+
}
|
|
103
|
+
// Truncate slightly long lines
|
|
104
|
+
if (line1.length > 44)
|
|
105
|
+
line1 = line1.substring(0, 44);
|
|
106
|
+
if (line2.length > 44)
|
|
107
|
+
line2 = line2.substring(0, 44);
|
|
41
108
|
if (line1.length !== 44 || line2.length !== 44) {
|
|
42
|
-
throw new Error(
|
|
109
|
+
throw new Error(`Invalid TD3 MRZ length. Line1: ${line1.length}, Line2: ${line2.length}`);
|
|
43
110
|
}
|
|
44
111
|
const documentType = line1.substring(0, 2).replace(/</g, '');
|
|
45
112
|
const issuingState = line1.substring(2, 5).replace(/</g, '');
|
|
@@ -47,23 +114,36 @@ export function parseTD3(line1, line2) {
|
|
|
47
114
|
const [lastNamePart, firstNamePart] = namesPart.split('<<');
|
|
48
115
|
const lastName = lastNamePart.replace(/</g, ' ').trim();
|
|
49
116
|
const firstName = (firstNamePart || '').replace(/</g, ' ').trim();
|
|
50
|
-
|
|
117
|
+
let passportNumber = line2.substring(0, 9);
|
|
51
118
|
const passportCheck = line2.substring(9, 10);
|
|
52
|
-
|
|
53
|
-
|
|
119
|
+
const correctedPassport = tryCorrect(passportNumber, passportCheck);
|
|
120
|
+
if (!correctedPassport) {
|
|
121
|
+
console.warn(`Invalid passport number check digit for ${passportNumber}. Proceeding anyway (likely a dummy/stock image).`);
|
|
122
|
+
passportNumber = passportNumber.replace(/</g, '');
|
|
123
|
+
}
|
|
124
|
+
else {
|
|
125
|
+
passportNumber = correctedPassport.replace(/</g, '');
|
|
54
126
|
}
|
|
55
127
|
const nationality = line2.substring(10, 13).replace(/</g, '');
|
|
56
|
-
|
|
128
|
+
let dobStr = line2.substring(13, 19);
|
|
57
129
|
const dobCheck = line2.substring(19, 20);
|
|
58
|
-
|
|
59
|
-
|
|
130
|
+
const correctedDob = tryCorrect(dobStr, dobCheck, true);
|
|
131
|
+
if (!correctedDob) {
|
|
132
|
+
console.warn(`Invalid date of birth check digit for ${dobStr}. Proceeding anyway.`);
|
|
133
|
+
}
|
|
134
|
+
else {
|
|
135
|
+
dobStr = correctedDob;
|
|
60
136
|
}
|
|
61
137
|
const dateOfBirth = parseDate(dobStr, true);
|
|
62
138
|
const gender = line2.substring(20, 21);
|
|
63
|
-
|
|
139
|
+
let expiryStr = line2.substring(21, 27);
|
|
64
140
|
const expiryCheck = line2.substring(27, 28);
|
|
65
|
-
|
|
66
|
-
|
|
141
|
+
const correctedExpiry = tryCorrect(expiryStr, expiryCheck, true);
|
|
142
|
+
if (!correctedExpiry) {
|
|
143
|
+
console.warn(`Invalid expiry date check digit for ${expiryStr}. Proceeding anyway.`);
|
|
144
|
+
}
|
|
145
|
+
else {
|
|
146
|
+
expiryStr = correctedExpiry;
|
|
67
147
|
}
|
|
68
148
|
const expiryDate = parseDate(expiryStr, false);
|
|
69
149
|
const personalNumber = line2.substring(28, 42).replace(/</g, '');
|
package/dist/utils/mrz.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"mrz.js","sourceRoot":"","sources":["../../src/utils/mrz.ts"],"names":[],"mappings":"AAcA,MAAM,UAAU,kBAAkB,CAAC,GAAW,EAAE,UAAkB;IAC9D,MAAM,OAAO,GAAG,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;IAC1B,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,MAAM,IAAI,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;QACpB,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,IAAI,IAAI,KAAK,GAAG,EAAE,CAAC;YACf,KAAK,GAAG,CAAC,CAAC;QACd,CAAC;aAAM,IAAI,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAC5B,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC;QAC3B,CAAC;aAAM,IAAI,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAC5B,KAAK,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,CAAC;QACzC,CAAC;QACD,GAAG,IAAI,KAAK,GAAG,OAAO,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAClC,CAAC;IACD,OAAO,CAAC,GAAG,GAAG,EAAE,CAAC,KAAK,QAAQ,CAAC,UAAU,CAAC,CAAC;AAC/C,CAAC;AAED,SAAS,SAAS,CAAC,GAAW,EAAE,QAAiB,KAAK;IAClD,MAAM,OAAO,GAAG,GAAG,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IACpC,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IAChD,MAAM,GAAG,GAAG,QAAQ,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IAE1C,IAAI,IAAI,GAAG,QAAQ,CAAC,OAAO,CAAC,CAAC;IAC7B,MAAM,WAAW,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,GAAG,GAAG,CAAC;IAEnD,IAAI,KAAK,EAAE,CAAC;QACR,IAAI,IAAI,GAAG,WAAW,EAAE,CAAC;YACrB,IAAI,IAAI,IAAI,CAAC;QACjB,CAAC;aAAM,CAAC;YACJ,IAAI,IAAI,IAAI,CAAC;QACjB,CAAC;IACL,CAAC;SAAM,CAAC;QACJ,+BAA+B;QAC/B,IAAI,IAAI,IAAI,CAAC;IACjB,CAAC;IAED,OAAO,IAAI,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,GAAG,CAAC,CAAC;AACtC,CAAC;AAED,MAAM,UAAU,QAAQ,CAAC,KAAa,EAAE,KAAa;IACjD,IAAI,KAAK,CAAC,MAAM,KAAK,EAAE,IAAI,KAAK,CAAC,MAAM,KAAK,EAAE,EAAE,CAAC;QAC7C,MAAM,IAAI,KAAK,CAAC,
|
|
1
|
+
{"version":3,"file":"mrz.js","sourceRoot":"","sources":["../../src/utils/mrz.ts"],"names":[],"mappings":"AAcA,MAAM,UAAU,kBAAkB,CAAC,GAAW,EAAE,UAAkB;IAC9D,MAAM,OAAO,GAAG,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;IAC1B,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,MAAM,IAAI,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;QACpB,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,IAAI,IAAI,KAAK,GAAG,EAAE,CAAC;YACf,KAAK,GAAG,CAAC,CAAC;QACd,CAAC;aAAM,IAAI,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAC5B,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC;QAC3B,CAAC;aAAM,IAAI,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAC5B,KAAK,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,CAAC;QACzC,CAAC;QACD,GAAG,IAAI,KAAK,GAAG,OAAO,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAClC,CAAC;IACD,OAAO,CAAC,GAAG,GAAG,EAAE,CAAC,KAAK,QAAQ,CAAC,UAAU,CAAC,CAAC;AAC/C,CAAC;AAED,SAAS,SAAS,CAAC,GAAW,EAAE,QAAiB,KAAK;IAClD,MAAM,OAAO,GAAG,GAAG,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IACpC,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IAChD,MAAM,GAAG,GAAG,QAAQ,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IAE1C,IAAI,IAAI,GAAG,QAAQ,CAAC,OAAO,CAAC,CAAC;IAC7B,MAAM,WAAW,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,GAAG,GAAG,CAAC;IAEnD,IAAI,KAAK,EAAE,CAAC;QACR,IAAI,IAAI,GAAG,WAAW,EAAE,CAAC;YACrB,IAAI,IAAI,IAAI,CAAC;QACjB,CAAC;aAAM,CAAC;YACJ,IAAI,IAAI,IAAI,CAAC;QACjB,CAAC;IACL,CAAC;SAAM,CAAC;QACJ,+BAA+B;QAC/B,IAAI,IAAI,IAAI,CAAC;IACjB,CAAC;IAED,OAAO,IAAI,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,GAAG,CAAC,CAAC;AACtC,CAAC;AAED,SAAS,UAAU,CAAC,GAAW,EAAE,UAAkB,EAAE,gBAAyB,KAAK;IAC/E,IAAI,kBAAkB,CAAC,GAAG,EAAE,UAAU,CAAC;QAAE,OAAO,GAAG,CAAC;IAEpD,MAAM,aAAa,GAA6B;QAC5C,GAAG,EAAE,CAAC,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,CAAC;QACtB,GAAG,EAAE,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,CAAC;QACxD,GAAG,EAAE,CAAC,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,CAAC;QACtB,GAAG,EAAE,CAAC,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,CAAC;QACtB,GAAG,EAAE,CAAC,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,CAAC;KACzB,CAAC;IAEF,yDAAyD;IACzD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,MAAM,IAAI,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;QACpB,IAAI,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC;YACtB,KAAK,MAAM,GAAG,IAAI,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC;gBACpC,MAAM,SAAS,GAAG,GAAG,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,GAAG,GAAG,GAAG,CAAC,SAAS,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;gBACnE,IAAI,kBAAkB,CAAC,SAAS,EAAE,UAAU,CAAC,EAAE,CAAC;oBAC5C,IAAI,aAAa,IAAI,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC;wBAAE,SAAS;oBACvD,OAAO,SAAS,CAAC;gBACrB,CAAC;YACL,CAAC;QACL,CAAC;IACL,CAAC;IAED,oDAAoD;IACpD,IAAI,aAAa,CAAC,UAAU,CAAC,EAAE,CAAC;QAC5B,KAAK,MAAM,GAAG,IAAI,aAAa,CAAC,UAAU,CAAC,EAAE,CAAC;YAC1C,IAAI,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,kBAAkB,CAAC,GAAG,EAAE,GAAG,CAAC,EAAE,CAAC;gBACpD,OAAO,GAAG,CAAC;YACf,CAAC;QACL,CAAC;IACL,CAAC;IAED,4DAA4D;IAC5D,IAAI,aAAa,EAAE,CAAC;QAChB,IAAI,gBAAgB,GAAG,EAAE,CAAC;QAC1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAClC,IAAI,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;YACf,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,GAAG;gBAAE,CAAC,GAAG,GAAG,CAAC;iBAC5C,IAAI,CAAC,KAAK,GAAG;gBAAE,CAAC,GAAG,GAAG,CAAC;iBACvB,IAAI,CAAC,KAAK,GAAG;gBAAE,CAAC,GAAG,GAAG,CAAC;iBACvB,IAAI,CAAC,KAAK,GAAG;gBAAE,CAAC,GAAG,GAAG,CAAC;iBACvB,IAAI,CAAC,KAAK,GAAG;gBAAE,CAAC,GAAG,GAAG,CAAC;YAC5B,gBAAgB,IAAI,CAAC,CAAC;QAC1B,CAAC;QACD,IAAI,kBAAkB,CAAC,gBAAgB,EAAE,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,gBAAgB,CAAC,EAAE,CAAC;YACtF,OAAO,gBAAgB,CAAC;QAC5B,CAAC;IACL,CAAC;IAED,OAAO,IAAI,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,QAAQ,CAAC,KAAa,EAAE,KAAa;IACjD,uFAAuF;IACvF,IAAI,KAAK,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;QACpB,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC;IAClC,CAAC;IACD,IAAI,KAAK,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;QACpB,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC;IAClC,CAAC;IAED,+BAA+B;IAC/B,IAAI,KAAK,CAAC,MAAM,GAAG,EAAE;QAAE,KAAK,GAAG,KAAK,CAAC,SAAS,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IACtD,IAAI,KAAK,CAAC,MAAM,GAAG,EAAE;QAAE,KAAK,GAAG,KAAK,CAAC,SAAS,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IAEtD,IAAI,KAAK,CAAC,MAAM,KAAK,EAAE,IAAI,KAAK,CAAC,MAAM,KAAK,EAAE,EAAE,CAAC;QAC7C,MAAM,IAAI,KAAK,CAAC,kCAAkC,KAAK,CAAC,MAAM,YAAY,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;IAC9F,CAAC;IAED,MAAM,YAAY,GAAG,KAAK,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;IAC7D,MAAM,YAAY,GAAG,KAAK,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;IAE7D,MAAM,SAAS,GAAG,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC;IACrC,MAAM,CAAC,YAAY,EAAE,aAAa,CAAC,GAAG,SAAS,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC5D,MAAM,QAAQ,GAAG,YAAY,CAAC,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IACxD,MAAM,SAAS,GAAG,CAAC,aAAa,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IAElE,IAAI,cAAc,GAAG,KAAK,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IAC3C,MAAM,aAAa,GAAG,KAAK,CAAC,SAAS,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IAC7C,MAAM,iBAAiB,GAAG,UAAU,CAAC,cAAc,EAAE,aAAa,CAAC,CAAC;IACpE,IAAI,CAAC,iBAAiB,EAAE,CAAC;QACrB,OAAO,CAAC,IAAI,CAAC,2CAA2C,cAAc,mDAAmD,CAAC,CAAC;QAC3H,cAAc,GAAG,cAAc,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;IACtD,CAAC;SAAM,CAAC;QACJ,cAAc,GAAG,iBAAiB,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;IACzD,CAAC;IAED,MAAM,WAAW,GAAG,KAAK,CAAC,SAAS,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;IAC9D,IAAI,MAAM,GAAG,KAAK,CAAC,SAAS,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC;IACrC,MAAM,QAAQ,GAAG,KAAK,CAAC,SAAS,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC;IACzC,MAAM,YAAY,GAAG,UAAU,CAAC,MAAM,EAAE,QAAQ,EAAE,IAAI,CAAC,CAAC;IACxD,IAAI,CAAC,YAAY,EAAE,CAAC;QAChB,OAAO,CAAC,IAAI,CAAC,yCAAyC,MAAM,sBAAsB,CAAC,CAAC;IACxF,CAAC;SAAM,CAAC;QACJ,MAAM,GAAG,YAAY,CAAC;IAC1B,CAAC;IACD,MAAM,WAAW,GAAG,SAAS,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;IAE5C,MAAM,MAAM,GAAG,KAAK,CAAC,SAAS,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC;IACvC,IAAI,SAAS,GAAG,KAAK,CAAC,SAAS,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC;IACxC,MAAM,WAAW,GAAG,KAAK,CAAC,SAAS,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC;IAC5C,MAAM,eAAe,GAAG,UAAU,CAAC,SAAS,EAAE,WAAW,EAAE,IAAI,CAAC,CAAC;IACjE,IAAI,CAAC,eAAe,EAAE,CAAC;QACnB,OAAO,CAAC,IAAI,CAAC,uCAAuC,SAAS,sBAAsB,CAAC,CAAC;IACzF,CAAC;SAAM,CAAC;QACJ,SAAS,GAAG,eAAe,CAAC;IAChC,CAAC;IACD,MAAM,UAAU,GAAG,SAAS,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;IAE/C,MAAM,cAAc,GAAG,KAAK,CAAC,SAAS,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;IAEjE,gBAAgB;IAChB,MAAM,KAAK,GAAG,IAAI,IAAI,EAAE,CAAC;IACzB,IAAI,GAAG,GAAG,KAAK,CAAC,WAAW,EAAE,GAAG,WAAW,CAAC,WAAW,EAAE,CAAC;IAC1D,MAAM,CAAC,GAAG,KAAK,CAAC,QAAQ,EAAE,GAAG,WAAW,CAAC,QAAQ,EAAE,CAAC;IACpD,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,OAAO,EAAE,GAAG,WAAW,CAAC,OAAO,EAAE,CAAC,EAAE,CAAC;QAChE,GAAG,EAAE,CAAC;IACV,CAAC;IAED,OAAO;QACH,YAAY;QACZ,YAAY;QACZ,QAAQ;QACR,SAAS;QACT,cAAc;QACd,WAAW;QACX,WAAW;QACX,MAAM;QACN,UAAU;QACV,cAAc;QACd,GAAG;KACN,CAAC;AACN,CAAC"}
|
package/dist/utils/ocr.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ocr.d.ts","sourceRoot":"","sources":["../../src/utils/ocr.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,SAAS;IACtB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,qBAAa,YAAY;IACrB,OAAO,CAAC,MAAM,CAAuB;IACrC,OAAO,CAAC,WAAW,CAAkB;IAErC;;OAEG;IACG,UAAU;IAOhB;;;;OAIG;IACG,UAAU,CAAC,WAAW,EAAE,MAAM,GAAG,IAAI,GAAG,IAAI,GAAG,OAAO,CAAC,SAAS,CAAC;IAyBvE;;OAEG;IACH,OAAO,CAAC,cAAc;
|
|
1
|
+
{"version":3,"file":"ocr.d.ts","sourceRoot":"","sources":["../../src/utils/ocr.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,SAAS;IACtB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,qBAAa,YAAY;IACrB,OAAO,CAAC,MAAM,CAAuB;IACrC,OAAO,CAAC,WAAW,CAAkB;IAErC;;OAEG;IACG,UAAU;IAOhB;;;;OAIG;IACG,UAAU,CAAC,WAAW,EAAE,MAAM,GAAG,IAAI,GAAG,IAAI,GAAG,OAAO,CAAC,SAAS,CAAC;IAyBvE;;OAEG;IACH,OAAO,CAAC,cAAc;IAmBtB;;OAEG;IACG,SAAS;CAOlB"}
|
package/dist/utils/ocr.js
CHANGED
|
@@ -44,15 +44,18 @@ export class OCRExtractor {
|
|
|
44
44
|
* Filter and clean MRZ lines from raw OCR text
|
|
45
45
|
*/
|
|
46
46
|
filterMRZLines(text) {
|
|
47
|
-
|
|
47
|
+
let lines = text.split('\n').map(l => l.trim().replace(/\s/g, ''));
|
|
48
|
+
// Clean up common chevron OCR mistakes at the end of lines
|
|
49
|
+
lines = lines.map(line => line.replace(/[Kk]+$/, match => '<'.repeat(match.length)));
|
|
50
|
+
lines = lines.map(line => line.replace(/[>]+/g, match => '<'.repeat(match.length)));
|
|
48
51
|
// TD3 MRZ (Passport) is 2 lines of 44 characters
|
|
49
52
|
// TD1 (ID Card) is 3 lines of 30 characters
|
|
50
|
-
// We look for lines containing multiple '<' characters
|
|
53
|
+
// We look for lines containing multiple '<' characters or that are very long
|
|
51
54
|
return lines.filter(line => {
|
|
52
55
|
const charCount = line.length;
|
|
53
56
|
const chevronCount = (line.match(/</g) || []).length;
|
|
54
57
|
// Heuristic: MRZ lines are long and have many chevrons
|
|
55
|
-
return (charCount >= 30 && chevronCount >= 2);
|
|
58
|
+
return (charCount >= 34 || (charCount >= 30 && chevronCount >= 2));
|
|
56
59
|
});
|
|
57
60
|
}
|
|
58
61
|
/**
|
package/dist/utils/ocr.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ocr.js","sourceRoot":"","sources":["../../src/utils/ocr.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAe,MAAM,cAAc,CAAC;AAQzD;;GAEG;AACH,MAAM,OAAO,YAAY;IAAzB;QACY,WAAM,GAAkB,IAAI,CAAC;QAC7B,gBAAW,GAAY,KAAK,CAAC;
|
|
1
|
+
{"version":3,"file":"ocr.js","sourceRoot":"","sources":["../../src/utils/ocr.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAe,MAAM,cAAc,CAAC;AAQzD;;GAEG;AACH,MAAM,OAAO,YAAY;IAAzB;QACY,WAAM,GAAkB,IAAI,CAAC;QAC7B,gBAAW,GAAY,KAAK,CAAC;IA0EzC,CAAC;IAxEG;;OAEG;IACH,KAAK,CAAC,UAAU;QACZ,IAAI,IAAI,CAAC,WAAW;YAAE,OAAO;QAE7B,IAAI,CAAC,MAAM,GAAG,MAAM,YAAY,CAAC,KAAK,CAAC,CAAC,CAAC,iCAAiC;QAC1E,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;IAC5B,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,UAAU,CAAC,WAAiC;QAC9C,MAAM,IAAI,CAAC,UAAU,EAAE,CAAC;QAExB,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,4BAA4B,CAAC,CAAC;QAClD,CAAC;QAED,qCAAqC;QACrC,2EAA2E;QAC3E,kDAAkD;QAClD,MAAM,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC;YAC5B,uBAAuB,EAAE,uCAAuC;SACnE,CAAC,CAAC;QAEH,MAAM,EAAE,IAAI,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,EAAE,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;QAEhF,MAAM,QAAQ,GAAG,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC;QAE3C,OAAO;YACH,OAAO,EAAE,IAAI;YACb,QAAQ;YACR,UAAU;SACb,CAAC;IACN,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,IAAY;QAC/B,IAAI,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,CAAC;QAEnE,2DAA2D;QAC3D,KAAK,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,EAAE,KAAK,CAAC,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QACrF,KAAK,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,KAAK,CAAC,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QAEpF,iDAAiD;QACjD,4CAA4C;QAC5C,6EAA6E;QAC7E,OAAO,KAAK,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE;YACvB,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC;YAC9B,MAAM,YAAY,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;YAErD,uDAAuD;YACvD,OAAO,CAAC,SAAS,IAAI,EAAE,IAAI,CAAC,SAAS,IAAI,EAAE,IAAI,YAAY,IAAI,CAAC,CAAC,CAAC,CAAC;QACvE,CAAC,CAAC,CAAC;IACP,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,SAAS;QACX,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YACd,MAAM,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC;YAC9B,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC;YACnB,IAAI,CAAC,WAAW,GAAG,KAAK,CAAC;QAC7B,CAAC;IACL,CAAC;CACJ"}
|
package/package.json
CHANGED
package/src/utils/mrz.ts
CHANGED
|
@@ -52,9 +52,75 @@ function parseDate(str: string, isDOB: boolean = false): Date {
|
|
|
52
52
|
return new Date(year, month, day);
|
|
53
53
|
}
|
|
54
54
|
|
|
55
|
+
function tryCorrect(str: string, checkDigit: string, isNumericOnly: boolean = false): string | null {
|
|
56
|
+
if (validateCheckDigit(str, checkDigit)) return str;
|
|
57
|
+
|
|
58
|
+
const substitutions: Record<string, string[]> = {
|
|
59
|
+
'8': ['B'], 'B': ['8'],
|
|
60
|
+
'0': ['O', 'Q', 'D'], 'O': ['0'], 'Q': ['0'], 'D': ['0'],
|
|
61
|
+
'I': ['1'], '1': ['I'],
|
|
62
|
+
'Z': ['2'], '2': ['Z'],
|
|
63
|
+
'S': ['5'], '5': ['S']
|
|
64
|
+
};
|
|
65
|
+
|
|
66
|
+
// Try substituting one character at a time in the string
|
|
67
|
+
for (let i = 0; i < str.length; i++) {
|
|
68
|
+
const char = str[i];
|
|
69
|
+
if (substitutions[char]) {
|
|
70
|
+
for (const sub of substitutions[char]) {
|
|
71
|
+
const candidate = str.substring(0, i) + sub + str.substring(i + 1);
|
|
72
|
+
if (validateCheckDigit(candidate, checkDigit)) {
|
|
73
|
+
if (isNumericOnly && /[A-Z]/.test(candidate)) continue;
|
|
74
|
+
return candidate;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Checking if the check digit itself was read wrong
|
|
81
|
+
if (substitutions[checkDigit]) {
|
|
82
|
+
for (const sub of substitutions[checkDigit]) {
|
|
83
|
+
if (/[0-9]/.test(sub) && validateCheckDigit(str, sub)) {
|
|
84
|
+
return str;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// Try normalising everything to numbers if strictly numeric
|
|
90
|
+
if (isNumericOnly) {
|
|
91
|
+
let numericCandidate = "";
|
|
92
|
+
for (let i = 0; i < str.length; i++) {
|
|
93
|
+
let c = str[i];
|
|
94
|
+
if (c === 'O' || c === 'D' || c === 'Q') c = '0';
|
|
95
|
+
else if (c === 'I') c = '1';
|
|
96
|
+
else if (c === 'Z') c = '2';
|
|
97
|
+
else if (c === 'S') c = '5';
|
|
98
|
+
else if (c === 'B') c = '8';
|
|
99
|
+
numericCandidate += c;
|
|
100
|
+
}
|
|
101
|
+
if (validateCheckDigit(numericCandidate, checkDigit) && !/[A-Z]/.test(numericCandidate)) {
|
|
102
|
+
return numericCandidate;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
return null;
|
|
107
|
+
}
|
|
108
|
+
|
|
55
109
|
export function parseTD3(line1: string, line2: string): MRZData {
|
|
110
|
+
// Pad dropped chevrons if length is short (OCR filter ensures it's at least ~34 chars)
|
|
111
|
+
if (line1.length < 44) {
|
|
112
|
+
line1 = line1.padEnd(44, '<');
|
|
113
|
+
}
|
|
114
|
+
if (line2.length < 44) {
|
|
115
|
+
line2 = line2.padEnd(44, '<');
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// Truncate slightly long lines
|
|
119
|
+
if (line1.length > 44) line1 = line1.substring(0, 44);
|
|
120
|
+
if (line2.length > 44) line2 = line2.substring(0, 44);
|
|
121
|
+
|
|
56
122
|
if (line1.length !== 44 || line2.length !== 44) {
|
|
57
|
-
throw new Error(
|
|
123
|
+
throw new Error(`Invalid TD3 MRZ length. Line1: ${line1.length}, Line2: ${line2.length}`);
|
|
58
124
|
}
|
|
59
125
|
|
|
60
126
|
const documentType = line1.substring(0, 2).replace(/</g, '');
|
|
@@ -65,25 +131,35 @@ export function parseTD3(line1: string, line2: string): MRZData {
|
|
|
65
131
|
const lastName = lastNamePart.replace(/</g, ' ').trim();
|
|
66
132
|
const firstName = (firstNamePart || '').replace(/</g, ' ').trim();
|
|
67
133
|
|
|
68
|
-
|
|
134
|
+
let passportNumber = line2.substring(0, 9);
|
|
69
135
|
const passportCheck = line2.substring(9, 10);
|
|
70
|
-
|
|
71
|
-
|
|
136
|
+
const correctedPassport = tryCorrect(passportNumber, passportCheck);
|
|
137
|
+
if (!correctedPassport) {
|
|
138
|
+
console.warn(`Invalid passport number check digit for ${passportNumber}. Proceeding anyway (likely a dummy/stock image).`);
|
|
139
|
+
passportNumber = passportNumber.replace(/</g, '');
|
|
140
|
+
} else {
|
|
141
|
+
passportNumber = correctedPassport.replace(/</g, '');
|
|
72
142
|
}
|
|
73
143
|
|
|
74
144
|
const nationality = line2.substring(10, 13).replace(/</g, '');
|
|
75
|
-
|
|
145
|
+
let dobStr = line2.substring(13, 19);
|
|
76
146
|
const dobCheck = line2.substring(19, 20);
|
|
77
|
-
|
|
78
|
-
|
|
147
|
+
const correctedDob = tryCorrect(dobStr, dobCheck, true);
|
|
148
|
+
if (!correctedDob) {
|
|
149
|
+
console.warn(`Invalid date of birth check digit for ${dobStr}. Proceeding anyway.`);
|
|
150
|
+
} else {
|
|
151
|
+
dobStr = correctedDob;
|
|
79
152
|
}
|
|
80
153
|
const dateOfBirth = parseDate(dobStr, true);
|
|
81
154
|
|
|
82
155
|
const gender = line2.substring(20, 21);
|
|
83
|
-
|
|
156
|
+
let expiryStr = line2.substring(21, 27);
|
|
84
157
|
const expiryCheck = line2.substring(27, 28);
|
|
85
|
-
|
|
86
|
-
|
|
158
|
+
const correctedExpiry = tryCorrect(expiryStr, expiryCheck, true);
|
|
159
|
+
if (!correctedExpiry) {
|
|
160
|
+
console.warn(`Invalid expiry date check digit for ${expiryStr}. Proceeding anyway.`);
|
|
161
|
+
} else {
|
|
162
|
+
expiryStr = correctedExpiry;
|
|
87
163
|
}
|
|
88
164
|
const expiryDate = parseDate(expiryStr, false);
|
|
89
165
|
|
package/src/utils/ocr.ts
CHANGED
|
@@ -57,17 +57,21 @@ export class OCRExtractor {
|
|
|
57
57
|
* Filter and clean MRZ lines from raw OCR text
|
|
58
58
|
*/
|
|
59
59
|
private filterMRZLines(text: string): string[] {
|
|
60
|
-
|
|
60
|
+
let lines = text.split('\n').map(l => l.trim().replace(/\s/g, ''));
|
|
61
|
+
|
|
62
|
+
// Clean up common chevron OCR mistakes at the end of lines
|
|
63
|
+
lines = lines.map(line => line.replace(/[Kk]+$/, match => '<'.repeat(match.length)));
|
|
64
|
+
lines = lines.map(line => line.replace(/[>]+/g, match => '<'.repeat(match.length)));
|
|
61
65
|
|
|
62
66
|
// TD3 MRZ (Passport) is 2 lines of 44 characters
|
|
63
67
|
// TD1 (ID Card) is 3 lines of 30 characters
|
|
64
|
-
// We look for lines containing multiple '<' characters
|
|
68
|
+
// We look for lines containing multiple '<' characters or that are very long
|
|
65
69
|
return lines.filter(line => {
|
|
66
70
|
const charCount = line.length;
|
|
67
71
|
const chevronCount = (line.match(/</g) || []).length;
|
|
68
72
|
|
|
69
73
|
// Heuristic: MRZ lines are long and have many chevrons
|
|
70
|
-
return (charCount >= 30 && chevronCount >= 2);
|
|
74
|
+
return (charCount >= 34 || (charCount >= 30 && chevronCount >= 2));
|
|
71
75
|
});
|
|
72
76
|
}
|
|
73
77
|
|