id-scanner-lib 1.6.6 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/id-scanner-lib.esm.js +915 -838
- package/dist/id-scanner-lib.esm.js.map +1 -1
- package/dist/id-scanner-lib.js +915 -838
- package/dist/id-scanner-lib.js.map +1 -1
- package/package.json +1 -1
- package/src/core/camera-manager.ts +43 -76
- package/src/core/camera-stream-manager.ts +318 -0
- package/src/core/logger.ts +158 -81
- package/src/modules/face/face-comparator.ts +150 -0
- package/src/modules/face/face-detector-options.ts +104 -0
- package/src/modules/face/face-detector.ts +121 -376
- package/src/modules/face/face-detector.ts.bak +991 -0
- package/src/modules/face/face-model-loader.ts +222 -0
- package/src/modules/face/face-result-converter.ts +225 -0
- package/src/modules/face/face-tracker.ts +207 -0
- package/src/modules/face/liveness-detector.ts +2 -2
- package/src/modules/id-card/id-card-text-parser.ts +151 -0
- package/src/modules/id-card/ocr-processor.ts +20 -257
- package/src/modules/id-card/ocr-worker.ts +2 -183
- package/src/utils/canvas-pool.ts +273 -0
- package/src/utils/edge-detector.ts +232 -0
- package/src/utils/image-processing.ts +110 -446
- package/src/utils/index.ts +1 -0
- package/src/core/plugin-manager.ts +0 -429
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
import { IDCardType, IDCardInfo } from './types';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* 格式化日期字符串为标准格式 (YYYY-MM-DD)
|
|
5
|
+
*/
|
|
6
|
+
function formatDateString(dateStr: string): string {
|
|
7
|
+
const dateMatch = dateStr.match(/(\d{4})[-\.\u5e74\s]*(\d{1,2})[-\.\u6708\s]*(\d{1,2})[日]*/);
|
|
8
|
+
if (dateMatch) {
|
|
9
|
+
const year = dateMatch[1];
|
|
10
|
+
const month = dateMatch[2].padStart(2, "0");
|
|
11
|
+
const day = dateMatch[3].padStart(2, "0");
|
|
12
|
+
return `${year}-${month}-${day}`;
|
|
13
|
+
}
|
|
14
|
+
if (/^\d{8}$/.test(dateStr)) {
|
|
15
|
+
const year = dateStr.substring(0, 4);
|
|
16
|
+
const month = dateStr.substring(4, 6);
|
|
17
|
+
const day = dateStr.substring(6, 8);
|
|
18
|
+
return `${year}-${month}-${day}`;
|
|
19
|
+
}
|
|
20
|
+
return dateStr;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* 验证身份证号是否符合规则
|
|
25
|
+
*/
|
|
26
|
+
function validateIDNumber(idNumber: string): boolean {
|
|
27
|
+
if (!idNumber || idNumber.length !== 18) return false;
|
|
28
|
+
const pattern = /^\d{17}[\dX]$/;
|
|
29
|
+
if (!pattern.test(idNumber)) return false;
|
|
30
|
+
const year = parseInt(idNumber.substr(6, 4));
|
|
31
|
+
const month = parseInt(idNumber.substr(10, 2));
|
|
32
|
+
const day = parseInt(idNumber.substr(12, 2));
|
|
33
|
+
if (month < 1 || month > 12 || day < 1 || day > 31) return false;
|
|
34
|
+
return true;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* IDCardTextParser - 统一解析身份证OCR文本
|
|
39
|
+
* 提取 ocr-processor.ts 和 ocr-worker.ts 中的解析逻辑
|
|
40
|
+
*/
|
|
41
|
+
export class IDCardTextParser {
|
|
42
|
+
/**
|
|
43
|
+
* 解析身份证文本
|
|
44
|
+
* @param text OCR识别的原始文本
|
|
45
|
+
* @returns 解析后的身份证信息
|
|
46
|
+
*/
|
|
47
|
+
static parse(text: string): IDCardInfo {
|
|
48
|
+
const info: IDCardInfo = {};
|
|
49
|
+
const processedText = text.replace(/\s+/g, " ").trim();
|
|
50
|
+
const lines = processedText.split("\n").filter((line) => line.trim());
|
|
51
|
+
|
|
52
|
+
// 1. 解析身份证号码
|
|
53
|
+
const idNumberRegex = /(\d{17}[\dX])/;
|
|
54
|
+
const idNumberWithPrefixRegex = /公民身份号码[\s\:]*(\d{17}[\dX])/;
|
|
55
|
+
const basicMatch = processedText.match(idNumberRegex);
|
|
56
|
+
const prefixMatch = processedText.match(idNumberWithPrefixRegex);
|
|
57
|
+
if (prefixMatch && prefixMatch[1]) {
|
|
58
|
+
info.idNumber = prefixMatch[1];
|
|
59
|
+
} else if (basicMatch && basicMatch[1]) {
|
|
60
|
+
info.idNumber = basicMatch[1];
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// 2. 解析姓名
|
|
64
|
+
const nameWithLabelRegex = /姓名[\s\:]*([一-龥]{2,4})/;
|
|
65
|
+
const nameMatch = processedText.match(nameWithLabelRegex);
|
|
66
|
+
if (nameMatch && nameMatch[1]) {
|
|
67
|
+
info.name = nameMatch[1].trim();
|
|
68
|
+
} else {
|
|
69
|
+
for (const line of lines) {
|
|
70
|
+
if (line.length >= 2 && line.length <= 5 && /^[一-龥]+$/.test(line) &&
|
|
71
|
+
!/性别|民族|住址|公民|签发|有效/.test(line)) {
|
|
72
|
+
info.name = line.trim();
|
|
73
|
+
break;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// 3. 解析性别和民族
|
|
79
|
+
const genderAndNationalityRegex = /性别[\s\:]*([男女])[\s ]*民族[\s\:]*([一-龥]+族)/;
|
|
80
|
+
const genderOnlyRegex = /性别[\s\:]*([男女])/;
|
|
81
|
+
const nationalityOnlyRegex = /民族[\s\:]*([一-龥]+族)/;
|
|
82
|
+
const genderNationalityMatch = processedText.match(genderAndNationalityRegex);
|
|
83
|
+
const genderOnlyMatch = processedText.match(genderOnlyRegex);
|
|
84
|
+
const nationalityOnlyMatch = processedText.match(nationalityOnlyRegex);
|
|
85
|
+
if (genderNationalityMatch) {
|
|
86
|
+
info.gender = genderNationalityMatch[1];
|
|
87
|
+
info.ethnicity = genderNationalityMatch[2];
|
|
88
|
+
} else {
|
|
89
|
+
if (genderOnlyMatch) info.gender = genderOnlyMatch[1];
|
|
90
|
+
if (nationalityOnlyMatch) info.ethnicity = nationalityOnlyMatch[1];
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// 4. 判断身份证类型
|
|
94
|
+
if (processedText.includes('出生') || processedText.includes('公民身份号码')) {
|
|
95
|
+
info.type = IDCardType.FRONT;
|
|
96
|
+
} else if (processedText.includes('签发机关') || processedText.includes('有效期')) {
|
|
97
|
+
info.type = IDCardType.BACK;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// 5. 解析出生日期
|
|
101
|
+
const birthDateRegex1 = /出生[\s\:]*(\d{4})年(\d{1,2})月(\d{1,2})[日号]/;
|
|
102
|
+
const birthDateRegex2 = /出生[\s\:]*(\d{4})[-\/\.](\d{1,2})[-\/\.](\d{1,2})/;
|
|
103
|
+
const birthDateRegex3 = /出生日期[\s\:]*(\d{4})[-\/\.\u5e74](\d{1,2})[-\/\.\u6708](\d{1,2})[日号]?/;
|
|
104
|
+
const birthDateMatch = processedText.match(birthDateRegex1) || processedText.match(birthDateRegex2) || processedText.match(birthDateRegex3);
|
|
105
|
+
if (!birthDateMatch && info.idNumber && info.idNumber.length === 18) {
|
|
106
|
+
const year = info.idNumber.substring(6, 10);
|
|
107
|
+
const month = info.idNumber.substring(10, 12);
|
|
108
|
+
const day = info.idNumber.substring(12, 14);
|
|
109
|
+
info.birthDate = `${year}-${month}-${day}`;
|
|
110
|
+
} else if (birthDateMatch) {
|
|
111
|
+
const year = birthDateMatch[1];
|
|
112
|
+
const month = birthDateMatch[2].padStart(2, "0");
|
|
113
|
+
const day = birthDateMatch[3].padStart(2, "0");
|
|
114
|
+
info.birthDate = `${year}-${month}-${day}`;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// 6. 解析地址
|
|
118
|
+
const addressRegex1 = /住址[\s\:]*([\s\S]*?)(?=公民身份|出生|性别|签发)/;
|
|
119
|
+
const addressRegex2 = /住址[\s\:]*([一-龥a-zA-Z0-9\s\.\-]+)/;
|
|
120
|
+
const addressMatch = processedText.match(addressRegex1) || processedText.match(addressRegex2);
|
|
121
|
+
if (addressMatch && addressMatch[1]) {
|
|
122
|
+
info.address = addressMatch[1].replace(/\s+/g, "").replace(/\n/g, "").trim();
|
|
123
|
+
if (info.address.length > 70) info.address = info.address.substring(0, 70);
|
|
124
|
+
if (!/[一-龥]/.test(info.address)) info.address = '';
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// 7. 解析签发机关
|
|
128
|
+
const authorityRegex1 = /签发机关[\s\:]*([\s\S]*?)(?=有效|公民|出生|\d{8}|$)/;
|
|
129
|
+
const authorityRegex2 = /签发机关[\s\:]*([一-龥\s]+)/;
|
|
130
|
+
const authorityMatch = processedText.match(authorityRegex1) || processedText.match(authorityRegex2);
|
|
131
|
+
if (authorityMatch && authorityMatch[1]) {
|
|
132
|
+
info.issueAuthority = authorityMatch[1].replace(/\s+/g, "").replace(/\n/g, "").trim();
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// 8. 解析有效期限
|
|
136
|
+
const validPeriodRegex1 = /有效期限[\s\:]*(\d{4}[-\.\u5e74\s]\d{1,2}[-\.\u6708\s]\d{1,2}[日\s]*)[-\s]*(至|-)[-\s]*(\d{4}[-\.\u5e74\s]\d{1,2}[-\.\u6708\s]\d{1,2}[日]*|[永久长期]*)/;
|
|
137
|
+
const validPeriodRegex2 = /有效期限[\s\:]*(\d{8})[-\s]*(至|-)[-\s]*(\d{8}|[永久长期]*)/;
|
|
138
|
+
const validPeriodMatch = processedText.match(validPeriodRegex1) || processedText.match(validPeriodRegex2);
|
|
139
|
+
if (validPeriodMatch && validPeriodMatch[1] && validPeriodMatch[3]) {
|
|
140
|
+
const startDate = formatDateString(validPeriodMatch[1]);
|
|
141
|
+
const endDate = /\d/.test(validPeriodMatch[3]) ? formatDateString(validPeriodMatch[3]) : '长期有效';
|
|
142
|
+
info.validFrom = startDate;
|
|
143
|
+
info.validTo = endDate;
|
|
144
|
+
info.validPeriod = `${startDate}-${endDate}`;
|
|
145
|
+
} else if (validPeriodMatch) {
|
|
146
|
+
info.validPeriod = validPeriodMatch[0].replace("有效期限", "").trim();
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
return info;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
import { EventEmitter } from '../../core/event-emitter';
|
|
8
8
|
import { Logger } from '../../core/logger';
|
|
9
9
|
import { IDCardType, IDCardInfo } from './types';
|
|
10
|
+
import { IDCardTextParser } from './id-card-text-parser';
|
|
10
11
|
import {
|
|
11
12
|
createWorker,
|
|
12
13
|
Worker as TesseractWorker,
|
|
@@ -18,11 +19,11 @@ import { LRUCache, calculateImageFingerprint } from "../../utils/performance"
|
|
|
18
19
|
import {
|
|
19
20
|
isWorkerSupported,
|
|
20
21
|
createWorker as createCustomWorker,
|
|
21
|
-
} from "../../utils/worker"
|
|
22
|
+
} from "../../utils/worker"
|
|
22
23
|
import { processOCRInWorker, OCRProcessInput } from "./ocr-worker"
|
|
23
24
|
import { Disposable } from "../../utils/resource-manager"
|
|
24
25
|
|
|
25
|
-
//
|
|
26
|
+
// 自定义日志函数类型,兼容字符串和LoggerMessage
|
|
26
27
|
type LoggerFunction = ((message: string | LoggerMessage) => void) | undefined;
|
|
27
28
|
|
|
28
29
|
/**
|
|
@@ -96,7 +97,7 @@ export class OCRProcessor implements Disposable {
|
|
|
96
97
|
/**
|
|
97
98
|
* 初始化OCR引擎
|
|
98
99
|
*
|
|
99
|
-
* 加载Tesseract OCR
|
|
100
|
+
* 加载Tesseract OCR引擎和中文简体语言包,并设置适合身份证识别的参数
|
|
100
101
|
*
|
|
101
102
|
* @returns {Promise<void>} 初始化完成的Promise
|
|
102
103
|
*/
|
|
@@ -122,11 +123,11 @@ export class OCRProcessor implements Disposable {
|
|
|
122
123
|
await this.worker.initialize("chi_sim")
|
|
123
124
|
await this.worker.setParameters({
|
|
124
125
|
tessedit_char_whitelist:
|
|
125
|
-
"0123456789X年月日壹贰叁肆伍陆柒捌玖拾民族汉满回维吾尔藏苗彝壮朝鲜侗瑶白土家哈尼哈萨克傣黎傈僳佤高山拉祜水东乡纳西景颇柯尔克孜达斡尔仫佬羌布朗撒拉毛南仡佬锡伯阿昌普米塔吉克怒乌孜别克俄罗斯鄂温克德昂保安裕固京塔塔尔独龙鄂伦春赫哲门巴珞巴基诺男女住址出生公民身份号码签发机关有效期省市区县乡镇街道号楼单元室ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", //
|
|
126
|
+
"0123456789X年月日壹贰叁肆伍陆柒捌玖拾民族汉满回维吾尔藏苗彝壮朝鲜侗瑶白土家哈尼哈萨克傣黎傈僳佤高山拉祜水东乡纳西景颇柯尔克孜达斡尔仫佬羌布朗撒拉毛南仡佬锡伯阿昌普米塔吉克怒乌孜别克俄罗斯鄂温克德昂保安裕固京塔塔尔独龙鄂伦春赫哲门巴珞巴基诺男女住址出生公民身份号码签发机关有效期省市区县乡镇街道号楼单元室ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", // 优化字符白名单,增加常见地址字符,移除部分不常用汉字
|
|
126
127
|
})
|
|
127
|
-
//
|
|
128
|
+
// 增加一些针对性的参数,提高识别率
|
|
128
129
|
await this.worker.setParameters({
|
|
129
|
-
tessedit_pageseg_mode: 7, // PSM_SINGLE_LINE
|
|
130
|
+
tessedit_pageseg_mode: 7, // PSM_SINGLE_LINE,使用数字而不是字符串
|
|
130
131
|
preserve_interword_spaces: "1", // 保留单词间的空格
|
|
131
132
|
})
|
|
132
133
|
|
|
@@ -145,7 +146,7 @@ export class OCRProcessor implements Disposable {
|
|
|
145
146
|
await this.initialize()
|
|
146
147
|
}
|
|
147
148
|
|
|
148
|
-
//
|
|
149
|
+
// 计算图像指纹,用于缓存查找
|
|
149
150
|
if (this.options.enableCache) {
|
|
150
151
|
const fingerprint = calculateImageFingerprint(imageData)
|
|
151
152
|
|
|
@@ -171,7 +172,7 @@ export class OCRProcessor implements Disposable {
|
|
|
171
172
|
this.options.brightness !== undefined ? this.options.brightness : 10, // 调整默认亮度
|
|
172
173
|
contrast:
|
|
173
174
|
this.options.contrast !== undefined ? this.options.contrast : 20, // 调整默认对比度
|
|
174
|
-
sharpen: true, //
|
|
175
|
+
sharpen: true, // 默认启用锐化,通常对OCR有益
|
|
175
176
|
})
|
|
176
177
|
|
|
177
178
|
// 转换为base64供Tesseract处理
|
|
@@ -199,13 +200,13 @@ export class OCRProcessor implements Disposable {
|
|
|
199
200
|
// 使用Worker线程处理
|
|
200
201
|
const result = await this.ocrWorker.postMessage({
|
|
201
202
|
imageBase64: base64Image,
|
|
202
|
-
//
|
|
203
|
+
// 不传递函数对象,避免DataCloneError
|
|
203
204
|
tessWorkerOptions: {},
|
|
204
205
|
})
|
|
205
206
|
|
|
206
207
|
idCardInfo = result.idCardInfo
|
|
207
208
|
this.options.logger?.(
|
|
208
|
-
`OCR
|
|
209
|
+
`OCR处理完成,用时: ${result.processingTime.toFixed(2)}ms`
|
|
209
210
|
)
|
|
210
211
|
} else {
|
|
211
212
|
// 使用主线程处理
|
|
@@ -224,11 +225,11 @@ export class OCRProcessor implements Disposable {
|
|
|
224
225
|
}
|
|
225
226
|
|
|
226
227
|
// 解析身份证信息
|
|
227
|
-
idCardInfo =
|
|
228
|
+
idCardInfo = IDCardTextParser.parse(data.text)
|
|
228
229
|
|
|
229
230
|
const processingTime = performance.now() - startTime
|
|
230
231
|
this.options.logger?.(
|
|
231
|
-
`OCR
|
|
232
|
+
`OCR处理完成,用时: ${processingTime.toFixed(2)}ms`
|
|
232
233
|
)
|
|
233
234
|
}
|
|
234
235
|
|
|
@@ -241,15 +242,15 @@ export class OCRProcessor implements Disposable {
|
|
|
241
242
|
return idCardInfo
|
|
242
243
|
} catch (error) {
|
|
243
244
|
// 改进错误处理
|
|
244
|
-
const errorMessage = error instanceof Error
|
|
245
|
-
? error.message
|
|
246
|
-
: typeof error === 'object'
|
|
247
|
-
? JSON.stringify(error)
|
|
245
|
+
const errorMessage = error instanceof Error
|
|
246
|
+
? error.message
|
|
247
|
+
: typeof error === 'object'
|
|
248
|
+
? JSON.stringify(error)
|
|
248
249
|
: String(error);
|
|
249
|
-
|
|
250
|
+
|
|
250
251
|
this.options.logger?.(`OCR识别错误: ${errorMessage}`);
|
|
251
|
-
|
|
252
|
-
// 返回 null
|
|
252
|
+
|
|
253
|
+
// 返回 null,让调用方知道识别失败
|
|
253
254
|
return null;
|
|
254
255
|
}
|
|
255
256
|
}
|
|
@@ -263,244 +264,6 @@ export class OCRProcessor implements Disposable {
|
|
|
263
264
|
* @param {string} text - OCR识别到的文本
|
|
264
265
|
* @returns {IDCardInfo} 提取到的身份证信息对象
|
|
265
266
|
*/
|
|
266
|
-
/**
|
|
267
|
-
* 格式化日期字符串为标准格式 (YYYY-MM-DD)
|
|
268
|
-
* @param dateStr 原始日期字符串
|
|
269
|
-
* @returns 格式化后的日期字符串
|
|
270
|
-
*/
|
|
271
|
-
private formatDateString(dateStr: string): string {
|
|
272
|
-
// 先尝试提取年月日
|
|
273
|
-
const dateMatch = dateStr.match(
|
|
274
|
-
/(\d{4})[-\.\u5e74\s]*(\d{1,2})[-\.\u6708\s]*(\d{1,2})[日]*/
|
|
275
|
-
)
|
|
276
|
-
if (dateMatch) {
|
|
277
|
-
const year = dateMatch[1]
|
|
278
|
-
const month = dateMatch[2].padStart(2, "0")
|
|
279
|
-
const day = dateMatch[3].padStart(2, "0")
|
|
280
|
-
return `${year}-${month}-${day}`
|
|
281
|
-
}
|
|
282
|
-
|
|
283
|
-
// 如果是纯数字格式如 20220101
|
|
284
|
-
if (/^\d{8}$/.test(dateStr)) {
|
|
285
|
-
const year = dateStr.substring(0, 4)
|
|
286
|
-
const month = dateStr.substring(4, 6)
|
|
287
|
-
const day = dateStr.substring(6, 8)
|
|
288
|
-
return `${year}-${month}-${day}`
|
|
289
|
-
}
|
|
290
|
-
|
|
291
|
-
// 如果无法格式化,返回原始字符串
|
|
292
|
-
return dateStr
|
|
293
|
-
}
|
|
294
|
-
|
|
295
|
-
/**
|
|
296
|
-
* 验证身份证号是否符合规则
|
|
297
|
-
* @param idNumber 身份证号
|
|
298
|
-
* @returns 是否有效
|
|
299
|
-
*/
|
|
300
|
-
private validateIDNumber(idNumber: string): boolean {
|
|
301
|
-
// 基本验证,校验位有效性和长度
|
|
302
|
-
if (!idNumber || idNumber.length !== 18) {
|
|
303
|
-
return false
|
|
304
|
-
}
|
|
305
|
-
|
|
306
|
-
// 检查格式,前17位必须为数字,最后一位可以是数字或'X'
|
|
307
|
-
const pattern = /^\d{17}[\dX]$/
|
|
308
|
-
if (!pattern.test(idNumber)) {
|
|
309
|
-
return false
|
|
310
|
-
}
|
|
311
|
-
|
|
312
|
-
// 检查日期部分
|
|
313
|
-
const year = parseInt(idNumber.substr(6, 4))
|
|
314
|
-
const month = parseInt(idNumber.substr(10, 2))
|
|
315
|
-
const day = parseInt(idNumber.substr(12, 2))
|
|
316
|
-
|
|
317
|
-
if (month < 1 || month > 12 || day < 1 || day > 31) {
|
|
318
|
-
return false
|
|
319
|
-
}
|
|
320
|
-
|
|
321
|
-
// 更详细的检查可以添加校验位的验证等逻辑...
|
|
322
|
-
|
|
323
|
-
return true
|
|
324
|
-
}
|
|
325
|
-
|
|
326
|
-
private parseIDCardText(text: string): IDCardInfo {
|
|
327
|
-
const info: IDCardInfo = {}
|
|
328
|
-
|
|
329
|
-
// 预处理文本,清除多余空白
|
|
330
|
-
const processedText = text.replace(/\s+/g, " ").trim()
|
|
331
|
-
|
|
332
|
-
// 拆分为行,并过滤空行
|
|
333
|
-
const lines = processedText.split("\n").filter((line) => line.trim())
|
|
334
|
-
|
|
335
|
-
// 解析身份证号码 - 多种模式匹配
|
|
336
|
-
// 1. 普通18位身份证号模式
|
|
337
|
-
const idNumberRegex = /(\d{17}[\dX])/
|
|
338
|
-
// 2. 带前缀的模式
|
|
339
|
-
const idNumberWithPrefixRegex = /公民身份号码[\s\:]*(\d{17}[\dX])/
|
|
340
|
-
|
|
341
|
-
// 尝试所有模式
|
|
342
|
-
let idNumber = null
|
|
343
|
-
const basicMatch = processedText.match(idNumberRegex)
|
|
344
|
-
const prefixMatch = processedText.match(idNumberWithPrefixRegex)
|
|
345
|
-
|
|
346
|
-
if (prefixMatch && prefixMatch[1]) {
|
|
347
|
-
idNumber = prefixMatch[1] // 首选带前缀的匹配,因为最可靠
|
|
348
|
-
} else if (basicMatch && basicMatch[1]) {
|
|
349
|
-
idNumber = basicMatch[1] // 其次是常规匹配
|
|
350
|
-
}
|
|
351
|
-
|
|
352
|
-
if (idNumber) {
|
|
353
|
-
info.idNumber = idNumber
|
|
354
|
-
}
|
|
355
|
-
|
|
356
|
-
// 解析姓名 - 使用多种策略
|
|
357
|
-
// 1. 直接匹配姓名标签近的内容
|
|
358
|
-
const nameWithLabelRegex = /姓名[\s\:]*([一-龥]{2,4})/
|
|
359
|
-
const nameMatch = processedText.match(nameWithLabelRegex)
|
|
360
|
-
|
|
361
|
-
// 2. 分析行文本寻找姓名
|
|
362
|
-
if (nameMatch && nameMatch[1]) {
|
|
363
|
-
info.name = nameMatch[1].trim()
|
|
364
|
-
} else {
|
|
365
|
-
// 备用方案:查找短行且内容全是汉字
|
|
366
|
-
for (const line of lines) {
|
|
367
|
-
if (
|
|
368
|
-
line.length >= 2 &&
|
|
369
|
-
line.length <= 5 &&
|
|
370
|
-
/^[一-龥]+$/.test(line) &&
|
|
371
|
-
!/性别|民族|住址|公民|签发|有效/.test(line)
|
|
372
|
-
) {
|
|
373
|
-
info.name = line.trim()
|
|
374
|
-
break
|
|
375
|
-
}
|
|
376
|
-
}
|
|
377
|
-
}
|
|
378
|
-
|
|
379
|
-
// 解析性别和民族 - 多种模式匹配
|
|
380
|
-
// 1. 标准格式匹配
|
|
381
|
-
const genderAndNationalityRegex =
|
|
382
|
-
/性别[\s\:]*([男女])[\s ]*民族[\s\:]*([一-龥]+族)/
|
|
383
|
-
const genderNationalityMatch = processedText.match(
|
|
384
|
-
genderAndNationalityRegex
|
|
385
|
-
)
|
|
386
|
-
|
|
387
|
-
// 2. 只匹配性别
|
|
388
|
-
const genderOnlyRegex = /性别[\s\:]*([男女])/
|
|
389
|
-
const genderOnlyMatch = processedText.match(genderOnlyRegex)
|
|
390
|
-
|
|
391
|
-
// 3. 只匹配民族
|
|
392
|
-
const nationalityOnlyRegex = /民族[\s\:]*([一-龥]+族)/
|
|
393
|
-
const nationalityOnlyMatch = processedText.match(nationalityOnlyRegex)
|
|
394
|
-
|
|
395
|
-
if (genderNationalityMatch) {
|
|
396
|
-
info.gender = genderNationalityMatch[1]
|
|
397
|
-
info.nationality = genderNationalityMatch[2]
|
|
398
|
-
} else {
|
|
399
|
-
// 分开获取
|
|
400
|
-
if (genderOnlyMatch) info.gender = genderOnlyMatch[1]
|
|
401
|
-
if (nationalityOnlyMatch) info.nationality = nationalityOnlyMatch[1]
|
|
402
|
-
}
|
|
403
|
-
|
|
404
|
-
// 解析出生日期 - 支持多种格式
|
|
405
|
-
// 1. 标准格式:YYYY年MM月DD日
|
|
406
|
-
const birthDateRegex1 = /出生[\s\:]*(\d{4})年(\d{1,2})月(\d{1,2})[日号]/
|
|
407
|
-
// 2. 美式日期格式:YYYY-MM-DD或YYYY/MM/DD
|
|
408
|
-
const birthDateRegex2 = /出生[\s\:]*(\d{4})[-\/\.](\d{1,2})[-\/\.](\d{1,2})/
|
|
409
|
-
// 3. 带前缀的格式
|
|
410
|
-
const birthDateRegex3 =
|
|
411
|
-
/出生日期[\s\:]*(\d{4})[-\/\.\u5e74](\d{1,2})[-\/\.\u6708](\d{1,2})[日号]?/
|
|
412
|
-
|
|
413
|
-
let birthDateMatch =
|
|
414
|
-
processedText.match(birthDateRegex1) ||
|
|
415
|
-
processedText.match(birthDateRegex2) ||
|
|
416
|
-
processedText.match(birthDateRegex3)
|
|
417
|
-
|
|
418
|
-
// 4. 从身份证号码中提取出生日期(如果上述方法失败)
|
|
419
|
-
if (!birthDateMatch && info.idNumber && info.idNumber.length === 18) {
|
|
420
|
-
const year = info.idNumber.substring(6, 10)
|
|
421
|
-
const month = info.idNumber.substring(10, 12)
|
|
422
|
-
const day = info.idNumber.substring(12, 14)
|
|
423
|
-
info.birthDate = `${year}-${month}-${day}`
|
|
424
|
-
} else if (birthDateMatch) {
|
|
425
|
-
// 确保月份和日期是两位数
|
|
426
|
-
const year = birthDateMatch[1]
|
|
427
|
-
const month = birthDateMatch[2].padStart(2, "0")
|
|
428
|
-
const day = birthDateMatch[3].padStart(2, "0")
|
|
429
|
-
info.birthDate = `${year}-${month}-${day}`
|
|
430
|
-
}
|
|
431
|
-
|
|
432
|
-
// 解析地址 - 改进的正则匹配
|
|
433
|
-
// 1. 常规模式
|
|
434
|
-
const addressRegex1 = /住址[\s\:]*([\s\S]*?)(?=公民身份|出生|性别|签发)/
|
|
435
|
-
// 2. 更宽松的模式
|
|
436
|
-
const addressRegex2 = /住址[\s\:]*([一-龥a-zA-Z0-9\s\.\-]+)/
|
|
437
|
-
|
|
438
|
-
const addressMatch =
|
|
439
|
-
processedText.match(addressRegex1) || processedText.match(addressRegex2)
|
|
440
|
-
|
|
441
|
-
if (addressMatch && addressMatch[1]) {
|
|
442
|
-
// 清理地址中的常见错误和多余空格
|
|
443
|
-
info.address = addressMatch[1]
|
|
444
|
-
.replace(/\s+/g, "")
|
|
445
|
-
.replace(/\n/g, "")
|
|
446
|
-
.trim()
|
|
447
|
-
|
|
448
|
-
// 限制地址长度并判断地址合理性
|
|
449
|
-
if (info.address.length > 70) {
|
|
450
|
-
info.address = info.address.substring(0, 70)
|
|
451
|
-
}
|
|
452
|
-
|
|
453
|
-
// 确保地址是合理的(不仅仅包含符号或数字)
|
|
454
|
-
if (!/[一-龥]/.test(info.address)) {
|
|
455
|
-
info.address = "" // 如果没有中文字符,可能不是有效地址
|
|
456
|
-
}
|
|
457
|
-
}
|
|
458
|
-
|
|
459
|
-
// 解析签发机关
|
|
460
|
-
const authorityRegex1 =
|
|
461
|
-
/签发机关[\s\:]*([\s\S]*?)(?=有效|公民|出生|\d{8}|$)/
|
|
462
|
-
const authorityRegex2 = /签发机关[\s\:]*([一-龥\s]+)/
|
|
463
|
-
|
|
464
|
-
const authorityMatch =
|
|
465
|
-
processedText.match(authorityRegex1) ||
|
|
466
|
-
processedText.match(authorityRegex2)
|
|
467
|
-
|
|
468
|
-
if (authorityMatch && authorityMatch[1]) {
|
|
469
|
-
info.issuingAuthority = authorityMatch[1]
|
|
470
|
-
.replace(/\s+/g, "")
|
|
471
|
-
.replace(/\n/g, "")
|
|
472
|
-
.trim()
|
|
473
|
-
}
|
|
474
|
-
|
|
475
|
-
// 解析有效期限 - 支持多种格式
|
|
476
|
-
// 1. 常规格式:YYYY.MM.DD-YYYY.MM.DD
|
|
477
|
-
const validPeriodRegex1 =
|
|
478
|
-
/有效期限[\s\:]*(\d{4}[-\.\u5e74\s]\d{1,2}[-\.\u6708\s]\d{1,2}[日\s]*)[-\s]*(至|-)[-\s]*(\d{4}[-\.\u5e74\s]\d{1,2}[-\.\u6708\s]\d{1,2}[日]*|[永久长期]*)/
|
|
479
|
-
// 2. 简化格式:YYYYMMDD-YYYYMMDD
|
|
480
|
-
const validPeriodRegex2 =
|
|
481
|
-
/有效期限[\s\:]*(\d{8})[-\s]*(至|-)[-\s]*(\d{8}|[永久长期]*)/
|
|
482
|
-
|
|
483
|
-
const validPeriodMatch =
|
|
484
|
-
processedText.match(validPeriodRegex1) ||
|
|
485
|
-
processedText.match(validPeriodRegex2)
|
|
486
|
-
|
|
487
|
-
if (validPeriodMatch) {
|
|
488
|
-
// 格式化为统一的有效期限形式
|
|
489
|
-
if (validPeriodMatch[1] && validPeriodMatch[3]) {
|
|
490
|
-
const startDate = this.formatDateString(validPeriodMatch[1])
|
|
491
|
-
const endDate = /\d/.test(validPeriodMatch[3])
|
|
492
|
-
? this.formatDateString(validPeriodMatch[3])
|
|
493
|
-
: "长期有效"
|
|
494
|
-
|
|
495
|
-
info.validPeriod = `${startDate}-${endDate}`
|
|
496
|
-
} else {
|
|
497
|
-
info.validPeriod = validPeriodMatch[0].replace("有效期限", "").trim()
|
|
498
|
-
}
|
|
499
|
-
}
|
|
500
|
-
|
|
501
|
-
return info
|
|
502
|
-
}
|
|
503
|
-
|
|
504
267
|
/**
|
|
505
268
|
* 清除结果缓存
|
|
506
269
|
*/
|