id-scanner-lib 1.3.3 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +55 -460
- package/dist/id-scanner-lib.esm.js +4641 -0
- package/dist/id-scanner-lib.esm.js.map +1 -0
- package/dist/id-scanner-lib.js +14755 -0
- package/dist/id-scanner-lib.js.map +1 -0
- package/dist/types/core/base-module.d.ts +44 -0
- package/dist/types/core/camera-manager.d.ts +258 -0
- package/dist/types/core/config.d.ts +88 -0
- package/dist/types/core/errors.d.ts +111 -0
- package/dist/types/core/event-emitter.d.ts +55 -0
- package/dist/types/core/logger.d.ts +277 -0
- package/dist/types/core/module-manager.d.ts +78 -0
- package/dist/types/core/plugin-manager.d.ts +158 -0
- package/dist/types/core/resource-manager.d.ts +246 -0
- package/dist/types/core/result.d.ts +83 -0
- package/dist/types/core/scanner-factory.d.ts +93 -0
- package/dist/types/index.bundle.d.ts +1303 -0
- package/dist/types/index.d.ts +86 -0
- package/dist/types/interfaces/external-types.d.ts +174 -0
- package/dist/types/interfaces/face-detection.d.ts +293 -0
- package/dist/types/interfaces/scanner-module.d.ts +280 -0
- package/dist/types/modules/face/face-detector.d.ts +170 -0
- package/dist/types/modules/face/index.d.ts +56 -0
- package/dist/types/modules/face/liveness-detector.d.ts +177 -0
- package/dist/types/modules/face/types.d.ts +136 -0
- package/dist/types/modules/id-card/anti-fake-detector.d.ts +170 -0
- package/dist/types/modules/id-card/id-card-detector.d.ts +131 -0
- package/dist/types/modules/id-card/index.d.ts +89 -0
- package/dist/types/modules/id-card/ocr-processor.d.ts +110 -0
- package/dist/types/modules/id-card/ocr-worker.d.ts +31 -0
- package/dist/types/modules/id-card/types.d.ts +181 -0
- package/dist/types/modules/qrcode/index.d.ts +51 -0
- package/dist/types/modules/qrcode/qr-code-scanner.d.ts +64 -0
- package/dist/types/modules/qrcode/types.d.ts +67 -0
- package/dist/types/utils/camera.d.ts +81 -0
- package/dist/types/utils/image-processing.d.ts +176 -0
- package/dist/types/utils/index.d.ts +175 -0
- package/dist/types/utils/performance.d.ts +81 -0
- package/dist/types/utils/resource-manager.d.ts +53 -0
- package/dist/types/utils/types.d.ts +166 -0
- package/dist/types/utils/worker.d.ts +52 -0
- package/dist/types/version.d.ts +7 -0
- package/package.json +76 -75
- package/src/core/base-module.ts +78 -0
- package/src/core/camera-manager.ts +798 -0
- package/src/core/config.ts +268 -0
- package/src/core/errors.ts +174 -0
- package/src/core/event-emitter.ts +110 -0
- package/src/core/logger.ts +549 -0
- package/src/core/module-manager.ts +165 -0
- package/src/core/plugin-manager.ts +429 -0
- package/src/core/resource-manager.ts +762 -0
- package/src/core/result.ts +163 -0
- package/src/core/scanner-factory.ts +237 -0
- package/src/index.ts +113 -936
- package/src/interfaces/external-types.ts +200 -0
- package/src/interfaces/face-detection.ts +309 -0
- package/src/interfaces/scanner-module.ts +384 -0
- package/src/modules/face/face-detector.ts +931 -0
- package/src/modules/face/index.ts +208 -0
- package/src/modules/face/liveness-detector.ts +908 -0
- package/src/modules/face/types.ts +133 -0
- package/src/{id-recognition → modules/id-card}/anti-fake-detector.ts +273 -239
- package/src/modules/id-card/id-card-detector.ts +474 -0
- package/src/modules/id-card/index.ts +425 -0
- package/src/{id-recognition → modules/id-card}/ocr-processor.ts +149 -92
- package/src/modules/id-card/ocr-worker.ts +259 -0
- package/src/modules/id-card/types.ts +178 -0
- package/src/modules/qrcode/index.ts +175 -0
- package/src/modules/qrcode/qr-code-scanner.ts +230 -0
- package/src/modules/qrcode/types.ts +65 -0
- package/src/types/tesseract.d.ts +265 -22
- package/src/utils/image-processing.ts +68 -49
- package/src/utils/index.ts +426 -0
- package/src/utils/performance.ts +168 -131
- package/src/utils/resource-manager.ts +65 -146
- package/src/utils/types.ts +90 -2
- package/src/utils/worker.ts +123 -84
- package/src/version.ts +11 -0
- package/tools/scaffold.js +543 -0
- package/dist/id-scanner-core.esm.js +0 -11349
- package/dist/id-scanner-core.js +0 -11361
- package/dist/id-scanner-core.min.js +0 -1
- package/dist/id-scanner-ocr.esm.js +0 -2319
- package/dist/id-scanner-ocr.js +0 -2328
- package/dist/id-scanner-ocr.min.js +0 -1
- package/dist/id-scanner-qr.esm.js +0 -1296
- package/dist/id-scanner-qr.js +0 -1305
- package/dist/id-scanner-qr.min.js +0 -1
- package/dist/id-scanner.js +0 -4561
- package/dist/id-scanner.min.js +0 -1
- package/src/core.ts +0 -138
- package/src/demo/demo.ts +0 -204
- package/src/id-recognition/data-extractor.ts +0 -262
- package/src/id-recognition/id-detector.ts +0 -510
- package/src/id-recognition/ocr-worker.ts +0 -156
- package/src/index-umd.ts +0 -477
- package/src/ocr-module.ts +0 -187
- package/src/qr-module.ts +0 -179
- package/src/scanner/barcode-scanner.ts +0 -251
- package/src/scanner/qr-scanner.ts +0 -167
|
@@ -1,20 +1,29 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* @file OCR
|
|
3
|
-
* @description
|
|
4
|
-
* @module
|
|
5
|
-
* @version 1.3.2
|
|
2
|
+
* @file OCR处理器
|
|
3
|
+
* @description 提供身份证OCR识别功能
|
|
4
|
+
* @module modules/id-card/ocr-processor
|
|
6
5
|
*/
|
|
7
6
|
|
|
8
|
-
import {
|
|
9
|
-
import {
|
|
10
|
-
import {
|
|
11
|
-
import {
|
|
7
|
+
import { EventEmitter } from '../../core/event-emitter';
|
|
8
|
+
import { Logger } from '../../core/logger';
|
|
9
|
+
import { IDCardType, IDCardInfo } from './types';
|
|
10
|
+
import {
|
|
11
|
+
createWorker,
|
|
12
|
+
Worker as TesseractWorker,
|
|
13
|
+
LoggerMessage,
|
|
14
|
+
WorkerOptions,
|
|
15
|
+
} from "tesseract.js" // 导入 Worker 和 LoggerMessage 类型
|
|
16
|
+
import { ImageProcessor } from "../../utils/image-processing"
|
|
17
|
+
import { LRUCache, calculateImageFingerprint } from "../../utils/performance"
|
|
12
18
|
import {
|
|
13
19
|
isWorkerSupported,
|
|
14
20
|
createWorker as createCustomWorker,
|
|
15
|
-
} from "
|
|
21
|
+
} from "../../utils/worker"
|
|
16
22
|
import { processOCRInWorker, OCRProcessInput } from "./ocr-worker"
|
|
17
|
-
import { Disposable } from "
|
|
23
|
+
import { Disposable } from "../../utils/resource-manager"
|
|
24
|
+
|
|
25
|
+
// 自定义日志函数类型,兼容字符串和LoggerMessage
|
|
26
|
+
type LoggerFunction = ((message: string | LoggerMessage) => void) | undefined;
|
|
18
27
|
|
|
19
28
|
/**
|
|
20
29
|
* OCR处理器选项接口
|
|
@@ -29,7 +38,7 @@ export interface OCRProcessorOptions {
|
|
|
29
38
|
onProgress?: (progress: number) => void
|
|
30
39
|
enableCache?: boolean // 添加启用缓存选项
|
|
31
40
|
cacheSize?: number // 添加缓存大小选项
|
|
32
|
-
logger?:
|
|
41
|
+
logger?: LoggerFunction // 修改为兼容字符串的日志函数类型
|
|
33
42
|
}
|
|
34
43
|
|
|
35
44
|
/**
|
|
@@ -54,7 +63,7 @@ export interface OCRProcessorOptions {
|
|
|
54
63
|
* ```
|
|
55
64
|
*/
|
|
56
65
|
export class OCRProcessor implements Disposable {
|
|
57
|
-
private worker:
|
|
66
|
+
private worker: TesseractWorker | null = null // 使用导入的 TesseractWorker 类型
|
|
58
67
|
private ocrWorker: ReturnType<
|
|
59
68
|
typeof createCustomWorker<
|
|
60
69
|
OCRProcessInput,
|
|
@@ -99,21 +108,26 @@ export class OCRProcessor implements Disposable {
|
|
|
99
108
|
this.ocrWorker = createCustomWorker<
|
|
100
109
|
OCRProcessInput,
|
|
101
110
|
{ idCardInfo: IDCardInfo; processingTime: number }
|
|
102
|
-
>(processOCRInWorker)
|
|
111
|
+
>(processOCRInWorker as any) // 使用类型断言解决类型不兼容问题
|
|
103
112
|
this.initialized = true
|
|
104
113
|
this.options.logger?.("OCR Worker 初始化完成")
|
|
105
114
|
} else {
|
|
106
115
|
// 使用主线程处理OCR
|
|
107
116
|
this.worker = createWorker({
|
|
108
117
|
logger: this.options.logger,
|
|
109
|
-
}
|
|
118
|
+
})
|
|
110
119
|
|
|
111
120
|
await this.worker.load()
|
|
112
121
|
await this.worker.loadLanguage("chi_sim")
|
|
113
122
|
await this.worker.initialize("chi_sim")
|
|
114
123
|
await this.worker.setParameters({
|
|
115
124
|
tessedit_char_whitelist:
|
|
116
|
-
"0123456789X
|
|
125
|
+
"0123456789X年月日壹贰叁肆伍陆柒捌玖拾民族汉满回维吾尔藏苗彝壮朝鲜侗瑶白土家哈尼哈萨克傣黎傈僳佤高山拉祜水东乡纳西景颇柯尔克孜达斡尔仫佬羌布朗撒拉毛南仡佬锡伯阿昌普米塔吉克怒乌孜别克俄罗斯鄂温克德昂保安裕固京塔塔尔独龙鄂伦春赫哲门巴珞巴基诺男女住址出生公民身份号码签发机关有效期省市区县乡镇街道号楼单元室ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", // 优化字符白名单,增加常见地址字符,移除部分不常用汉字
|
|
126
|
+
})
|
|
127
|
+
// 增加一些针对性的参数,提高识别率
|
|
128
|
+
await this.worker.setParameters({
|
|
129
|
+
tessedit_pageseg_mode: 7, // PSM_SINGLE_LINE,使用数字而不是字符串
|
|
130
|
+
preserve_interword_spaces: "1", // 保留单词间的空格
|
|
117
131
|
})
|
|
118
132
|
|
|
119
133
|
this.initialized = true
|
|
@@ -153,9 +167,11 @@ export class OCRProcessor implements Disposable {
|
|
|
153
167
|
|
|
154
168
|
// 提高图像质量以获得更好的OCR结果
|
|
155
169
|
const enhancedImage = ImageProcessor.batchProcess(downsampledImage, {
|
|
156
|
-
brightness:
|
|
157
|
-
|
|
158
|
-
|
|
170
|
+
brightness:
|
|
171
|
+
this.options.brightness !== undefined ? this.options.brightness : 10, // 调整默认亮度
|
|
172
|
+
contrast:
|
|
173
|
+
this.options.contrast !== undefined ? this.options.contrast : 20, // 调整默认对比度
|
|
174
|
+
sharpen: true, // 默认启用锐化,通常对OCR有益
|
|
159
175
|
})
|
|
160
176
|
|
|
161
177
|
// 转换为base64供Tesseract处理
|
|
@@ -183,9 +199,8 @@ export class OCRProcessor implements Disposable {
|
|
|
183
199
|
// 使用Worker线程处理
|
|
184
200
|
const result = await this.ocrWorker.postMessage({
|
|
185
201
|
imageBase64: base64Image,
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
},
|
|
202
|
+
// 不传递函数对象,避免DataCloneError
|
|
203
|
+
tessWorkerOptions: {},
|
|
189
204
|
})
|
|
190
205
|
|
|
191
206
|
idCardInfo = result.idCardInfo
|
|
@@ -199,7 +214,14 @@ export class OCRProcessor implements Disposable {
|
|
|
199
214
|
// 转换ImageData为Canvas
|
|
200
215
|
const canvas = ImageProcessor.imageDataToCanvas(enhancedImage)
|
|
201
216
|
|
|
202
|
-
|
|
217
|
+
// 确保worker已初始化
|
|
218
|
+
if (!this.worker) {
|
|
219
|
+
throw new Error("OCR引擎未初始化");
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
const { data } = (await this.worker.recognize(canvas)) as {
|
|
223
|
+
data: { text: string }
|
|
224
|
+
}
|
|
203
225
|
|
|
204
226
|
// 解析身份证信息
|
|
205
227
|
idCardInfo = this.parseIDCardText(data.text)
|
|
@@ -218,7 +240,16 @@ export class OCRProcessor implements Disposable {
|
|
|
218
240
|
|
|
219
241
|
return idCardInfo
|
|
220
242
|
} catch (error) {
|
|
221
|
-
|
|
243
|
+
// 改进错误处理
|
|
244
|
+
const errorMessage = error instanceof Error
|
|
245
|
+
? error.message
|
|
246
|
+
: typeof error === 'object'
|
|
247
|
+
? JSON.stringify(error)
|
|
248
|
+
: String(error);
|
|
249
|
+
|
|
250
|
+
this.options.logger?.(`OCR识别错误: ${errorMessage}`);
|
|
251
|
+
|
|
252
|
+
// 返回空对象,避免完全失败
|
|
222
253
|
return {} as IDCardInfo
|
|
223
254
|
}
|
|
224
255
|
}
|
|
@@ -239,24 +270,26 @@ export class OCRProcessor implements Disposable {
|
|
|
239
270
|
*/
|
|
240
271
|
private formatDateString(dateStr: string): string {
|
|
241
272
|
// 先尝试提取年月日
|
|
242
|
-
const dateMatch = dateStr.match(
|
|
273
|
+
const dateMatch = dateStr.match(
|
|
274
|
+
/(\d{4})[-\.\u5e74\s]*(\d{1,2})[-\.\u6708\s]*(\d{1,2})[日]*/
|
|
275
|
+
)
|
|
243
276
|
if (dateMatch) {
|
|
244
|
-
const year = dateMatch[1]
|
|
245
|
-
const month = dateMatch[2].padStart(2,
|
|
246
|
-
const day = dateMatch[3].padStart(2,
|
|
247
|
-
return `${year}-${month}-${day}
|
|
277
|
+
const year = dateMatch[1]
|
|
278
|
+
const month = dateMatch[2].padStart(2, "0")
|
|
279
|
+
const day = dateMatch[3].padStart(2, "0")
|
|
280
|
+
return `${year}-${month}-${day}`
|
|
248
281
|
}
|
|
249
|
-
|
|
282
|
+
|
|
250
283
|
// 如果是纯数字格式如 20220101
|
|
251
284
|
if (/^\d{8}$/.test(dateStr)) {
|
|
252
|
-
const year = dateStr.substring(0, 4)
|
|
253
|
-
const month = dateStr.substring(4, 6)
|
|
254
|
-
const day = dateStr.substring(6, 8)
|
|
255
|
-
return `${year}-${month}-${day}
|
|
285
|
+
const year = dateStr.substring(0, 4)
|
|
286
|
+
const month = dateStr.substring(4, 6)
|
|
287
|
+
const day = dateStr.substring(6, 8)
|
|
288
|
+
return `${year}-${month}-${day}`
|
|
256
289
|
}
|
|
257
|
-
|
|
290
|
+
|
|
258
291
|
// 如果无法格式化,返回原始字符串
|
|
259
|
-
return dateStr
|
|
292
|
+
return dateStr
|
|
260
293
|
}
|
|
261
294
|
|
|
262
295
|
/**
|
|
@@ -267,32 +300,32 @@ export class OCRProcessor implements Disposable {
|
|
|
267
300
|
private validateIDNumber(idNumber: string): boolean {
|
|
268
301
|
// 基本验证,校验位有效性和长度
|
|
269
302
|
if (!idNumber || idNumber.length !== 18) {
|
|
270
|
-
return false
|
|
303
|
+
return false
|
|
271
304
|
}
|
|
272
|
-
|
|
305
|
+
|
|
273
306
|
// 检查格式,前17位必须为数字,最后一位可以是数字或'X'
|
|
274
|
-
const pattern = /^\d{17}[\dX]
|
|
307
|
+
const pattern = /^\d{17}[\dX]$/
|
|
275
308
|
if (!pattern.test(idNumber)) {
|
|
276
|
-
return false
|
|
309
|
+
return false
|
|
277
310
|
}
|
|
278
|
-
|
|
311
|
+
|
|
279
312
|
// 检查日期部分
|
|
280
|
-
const year = parseInt(idNumber.substr(6, 4))
|
|
281
|
-
const month = parseInt(idNumber.substr(10, 2))
|
|
282
|
-
const day = parseInt(idNumber.substr(12, 2))
|
|
283
|
-
|
|
313
|
+
const year = parseInt(idNumber.substr(6, 4))
|
|
314
|
+
const month = parseInt(idNumber.substr(10, 2))
|
|
315
|
+
const day = parseInt(idNumber.substr(12, 2))
|
|
316
|
+
|
|
284
317
|
if (month < 1 || month > 12 || day < 1 || day > 31) {
|
|
285
|
-
return false
|
|
318
|
+
return false
|
|
286
319
|
}
|
|
287
|
-
|
|
320
|
+
|
|
288
321
|
// 更详细的检查可以添加校验位的验证等逻辑...
|
|
289
|
-
|
|
290
|
-
return true
|
|
322
|
+
|
|
323
|
+
return true
|
|
291
324
|
}
|
|
292
|
-
|
|
325
|
+
|
|
293
326
|
private parseIDCardText(text: string): IDCardInfo {
|
|
294
327
|
const info: IDCardInfo = {}
|
|
295
|
-
|
|
328
|
+
|
|
296
329
|
// 预处理文本,清除多余空白
|
|
297
330
|
const processedText = text.replace(/\s+/g, " ").trim()
|
|
298
331
|
|
|
@@ -304,18 +337,18 @@ export class OCRProcessor implements Disposable {
|
|
|
304
337
|
const idNumberRegex = /(\d{17}[\dX])/
|
|
305
338
|
// 2. 带前缀的模式
|
|
306
339
|
const idNumberWithPrefixRegex = /公民身份号码[\s\:]*(\d{17}[\dX])/
|
|
307
|
-
|
|
340
|
+
|
|
308
341
|
// 尝试所有模式
|
|
309
342
|
let idNumber = null
|
|
310
343
|
const basicMatch = processedText.match(idNumberRegex)
|
|
311
344
|
const prefixMatch = processedText.match(idNumberWithPrefixRegex)
|
|
312
|
-
|
|
345
|
+
|
|
313
346
|
if (prefixMatch && prefixMatch[1]) {
|
|
314
|
-
idNumber = prefixMatch[1]
|
|
347
|
+
idNumber = prefixMatch[1] // 首选带前缀的匹配,因为最可靠
|
|
315
348
|
} else if (basicMatch && basicMatch[1]) {
|
|
316
|
-
idNumber = basicMatch[1]
|
|
349
|
+
idNumber = basicMatch[1] // 其次是常规匹配
|
|
317
350
|
}
|
|
318
|
-
|
|
351
|
+
|
|
319
352
|
if (idNumber) {
|
|
320
353
|
info.idNumber = idNumber
|
|
321
354
|
}
|
|
@@ -324,14 +357,19 @@ export class OCRProcessor implements Disposable {
|
|
|
324
357
|
// 1. 直接匹配姓名标签近的内容
|
|
325
358
|
const nameWithLabelRegex = /姓名[\s\:]*([一-龥]{2,4})/
|
|
326
359
|
const nameMatch = processedText.match(nameWithLabelRegex)
|
|
327
|
-
|
|
360
|
+
|
|
328
361
|
// 2. 分析行文本寻找姓名
|
|
329
362
|
if (nameMatch && nameMatch[1]) {
|
|
330
363
|
info.name = nameMatch[1].trim()
|
|
331
364
|
} else {
|
|
332
365
|
// 备用方案:查找短行且内容全是汉字
|
|
333
366
|
for (const line of lines) {
|
|
334
|
-
if (
|
|
367
|
+
if (
|
|
368
|
+
line.length >= 2 &&
|
|
369
|
+
line.length <= 5 &&
|
|
370
|
+
/^[一-龥]+$/.test(line) &&
|
|
371
|
+
!/性别|民族|住址|公民|签发|有效/.test(line)
|
|
372
|
+
) {
|
|
335
373
|
info.name = line.trim()
|
|
336
374
|
break
|
|
337
375
|
}
|
|
@@ -340,17 +378,20 @@ export class OCRProcessor implements Disposable {
|
|
|
340
378
|
|
|
341
379
|
// 解析性别和民族 - 多种模式匹配
|
|
342
380
|
// 1. 标准格式匹配
|
|
343
|
-
const genderAndNationalityRegex =
|
|
344
|
-
|
|
345
|
-
|
|
381
|
+
const genderAndNationalityRegex =
|
|
382
|
+
/性别[\s\:]*([男女])[\s ]*民族[\s\:]*([一-龥]+族)/
|
|
383
|
+
const genderNationalityMatch = processedText.match(
|
|
384
|
+
genderAndNationalityRegex
|
|
385
|
+
)
|
|
386
|
+
|
|
346
387
|
// 2. 只匹配性别
|
|
347
388
|
const genderOnlyRegex = /性别[\s\:]*([男女])/
|
|
348
389
|
const genderOnlyMatch = processedText.match(genderOnlyRegex)
|
|
349
|
-
|
|
390
|
+
|
|
350
391
|
// 3. 只匹配民族
|
|
351
392
|
const nationalityOnlyRegex = /民族[\s\:]*([一-龥]+族)/
|
|
352
393
|
const nationalityOnlyMatch = processedText.match(nationalityOnlyRegex)
|
|
353
|
-
|
|
394
|
+
|
|
354
395
|
if (genderNationalityMatch) {
|
|
355
396
|
info.gender = genderNationalityMatch[1]
|
|
356
397
|
info.nationality = genderNationalityMatch[2]
|
|
@@ -366,12 +407,14 @@ export class OCRProcessor implements Disposable {
|
|
|
366
407
|
// 2. 美式日期格式:YYYY-MM-DD或YYYY/MM/DD
|
|
367
408
|
const birthDateRegex2 = /出生[\s\:]*(\d{4})[-\/\.](\d{1,2})[-\/\.](\d{1,2})/
|
|
368
409
|
// 3. 带前缀的格式
|
|
369
|
-
const birthDateRegex3 =
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
410
|
+
const birthDateRegex3 =
|
|
411
|
+
/出生日期[\s\:]*(\d{4})[-\/\.\u5e74](\d{1,2})[-\/\.\u6708](\d{1,2})[日号]?/
|
|
412
|
+
|
|
413
|
+
let birthDateMatch =
|
|
414
|
+
processedText.match(birthDateRegex1) ||
|
|
415
|
+
processedText.match(birthDateRegex2) ||
|
|
416
|
+
processedText.match(birthDateRegex3)
|
|
417
|
+
|
|
375
418
|
// 4. 从身份证号码中提取出生日期(如果上述方法失败)
|
|
376
419
|
if (!birthDateMatch && info.idNumber && info.idNumber.length === 18) {
|
|
377
420
|
const year = info.idNumber.substring(6, 10)
|
|
@@ -381,8 +424,8 @@ export class OCRProcessor implements Disposable {
|
|
|
381
424
|
} else if (birthDateMatch) {
|
|
382
425
|
// 确保月份和日期是两位数
|
|
383
426
|
const year = birthDateMatch[1]
|
|
384
|
-
const month = birthDateMatch[2].padStart(2,
|
|
385
|
-
const day = birthDateMatch[3].padStart(2,
|
|
427
|
+
const month = birthDateMatch[2].padStart(2, "0")
|
|
428
|
+
const day = birthDateMatch[3].padStart(2, "0")
|
|
386
429
|
info.birthDate = `${year}-${month}-${day}`
|
|
387
430
|
}
|
|
388
431
|
|
|
@@ -391,53 +434,67 @@ export class OCRProcessor implements Disposable {
|
|
|
391
434
|
const addressRegex1 = /住址[\s\:]*([\s\S]*?)(?=公民身份|出生|性别|签发)/
|
|
392
435
|
// 2. 更宽松的模式
|
|
393
436
|
const addressRegex2 = /住址[\s\:]*([一-龥a-zA-Z0-9\s\.\-]+)/
|
|
394
|
-
|
|
395
|
-
const addressMatch =
|
|
396
|
-
|
|
437
|
+
|
|
438
|
+
const addressMatch =
|
|
439
|
+
processedText.match(addressRegex1) || processedText.match(addressRegex2)
|
|
440
|
+
|
|
397
441
|
if (addressMatch && addressMatch[1]) {
|
|
398
442
|
// 清理地址中的常见错误和多余空格
|
|
399
|
-
info.address = addressMatch[1]
|
|
400
|
-
|
|
443
|
+
info.address = addressMatch[1]
|
|
444
|
+
.replace(/\s+/g, "")
|
|
445
|
+
.replace(/\n/g, "")
|
|
446
|
+
.trim()
|
|
447
|
+
|
|
401
448
|
// 限制地址长度并判断地址合理性
|
|
402
449
|
if (info.address.length > 70) {
|
|
403
450
|
info.address = info.address.substring(0, 70)
|
|
404
451
|
}
|
|
405
|
-
|
|
452
|
+
|
|
406
453
|
// 确保地址是合理的(不仅仅包含符号或数字)
|
|
407
454
|
if (!/[一-龥]/.test(info.address)) {
|
|
408
|
-
info.address = ""
|
|
455
|
+
info.address = "" // 如果没有中文字符,可能不是有效地址
|
|
409
456
|
}
|
|
410
457
|
}
|
|
411
458
|
|
|
412
459
|
// 解析签发机关
|
|
413
|
-
const authorityRegex1 =
|
|
460
|
+
const authorityRegex1 =
|
|
461
|
+
/签发机关[\s\:]*([\s\S]*?)(?=有效|公民|出生|\d{8}|$)/
|
|
414
462
|
const authorityRegex2 = /签发机关[\s\:]*([一-龥\s]+)/
|
|
415
|
-
|
|
416
|
-
const authorityMatch =
|
|
417
|
-
|
|
463
|
+
|
|
464
|
+
const authorityMatch =
|
|
465
|
+
processedText.match(authorityRegex1) ||
|
|
466
|
+
processedText.match(authorityRegex2)
|
|
467
|
+
|
|
418
468
|
if (authorityMatch && authorityMatch[1]) {
|
|
419
|
-
info.issuingAuthority = authorityMatch[1]
|
|
469
|
+
info.issuingAuthority = authorityMatch[1]
|
|
470
|
+
.replace(/\s+/g, "")
|
|
471
|
+
.replace(/\n/g, "")
|
|
472
|
+
.trim()
|
|
420
473
|
}
|
|
421
474
|
|
|
422
475
|
// 解析有效期限 - 支持多种格式
|
|
423
476
|
// 1. 常规格式:YYYY.MM.DD-YYYY.MM.DD
|
|
424
|
-
const validPeriodRegex1 =
|
|
477
|
+
const validPeriodRegex1 =
|
|
478
|
+
/有效期限[\s\:]*(\d{4}[-\.\u5e74\s]\d{1,2}[-\.\u6708\s]\d{1,2}[日\s]*)[-\s]*(至|-)[-\s]*(\d{4}[-\.\u5e74\s]\d{1,2}[-\.\u6708\s]\d{1,2}[日]*|[永久长期]*)/
|
|
425
479
|
// 2. 简化格式:YYYYMMDD-YYYYMMDD
|
|
426
|
-
const validPeriodRegex2 =
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
480
|
+
const validPeriodRegex2 =
|
|
481
|
+
/有效期限[\s\:]*(\d{8})[-\s]*(至|-)[-\s]*(\d{8}|[永久长期]*)/
|
|
482
|
+
|
|
483
|
+
const validPeriodMatch =
|
|
484
|
+
processedText.match(validPeriodRegex1) ||
|
|
485
|
+
processedText.match(validPeriodRegex2)
|
|
486
|
+
|
|
430
487
|
if (validPeriodMatch) {
|
|
431
488
|
// 格式化为统一的有效期限形式
|
|
432
489
|
if (validPeriodMatch[1] && validPeriodMatch[3]) {
|
|
433
490
|
const startDate = this.formatDateString(validPeriodMatch[1])
|
|
434
|
-
const endDate = /\d/.test(validPeriodMatch[3])
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
491
|
+
const endDate = /\d/.test(validPeriodMatch[3])
|
|
492
|
+
? this.formatDateString(validPeriodMatch[3])
|
|
493
|
+
: "长期有效"
|
|
494
|
+
|
|
438
495
|
info.validPeriod = `${startDate}-${endDate}`
|
|
439
496
|
} else {
|
|
440
|
-
info.validPeriod = validPeriodMatch[0].replace(
|
|
497
|
+
info.validPeriod = validPeriodMatch[0].replace("有效期限", "").trim()
|
|
441
498
|
}
|
|
442
499
|
}
|
|
443
500
|
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file OCR Worker
|
|
3
|
+
* @description OCR处理的Worker线程实现
|
|
4
|
+
* @module modules/id-card/ocr-worker
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { IDCardInfo, IDCardType } from './types';
|
|
8
|
+
import { LoggerMessage } from 'tesseract.js';
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* OCR处理输入参数
|
|
12
|
+
*/
|
|
13
|
+
export interface OCRProcessInput {
|
|
14
|
+
/** 图像Base64数据 */
|
|
15
|
+
imageBase64: string;
|
|
16
|
+
/** Tesseract Worker选项 */
|
|
17
|
+
tessWorkerOptions?: {
|
|
18
|
+
/** 语言 */
|
|
19
|
+
language?: string;
|
|
20
|
+
/** 日志回调 */
|
|
21
|
+
logger?: (message: LoggerMessage) => void;
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* 在Worker中处理OCR识别
|
|
27
|
+
* @param input OCR处理输入参数
|
|
28
|
+
* @returns OCR处理结果
|
|
29
|
+
*/
|
|
30
|
+
export async function processOCRInWorker(
|
|
31
|
+
input: OCRProcessInput
|
|
32
|
+
): Promise<{ idCardInfo: IDCardInfo; processingTime: number }> {
|
|
33
|
+
const startTime = performance.now();
|
|
34
|
+
|
|
35
|
+
try {
|
|
36
|
+
// 导入Tesseract.js
|
|
37
|
+
const { createWorker } = await import('tesseract.js');
|
|
38
|
+
|
|
39
|
+
// 创建Tesseract Worker
|
|
40
|
+
const worker = createWorker({
|
|
41
|
+
logger: input.tessWorkerOptions?.logger
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
// 初始化Worker
|
|
45
|
+
await worker.load();
|
|
46
|
+
await worker.loadLanguage('chi_sim');
|
|
47
|
+
await worker.initialize('chi_sim');
|
|
48
|
+
|
|
49
|
+
// 设置识别参数
|
|
50
|
+
await worker.setParameters({
|
|
51
|
+
tessedit_char_whitelist: '0123456789X年月日壹贰叁肆伍陆柒捌玖拾民族汉满回维吾尔藏苗彝壮朝鲜侗瑶白土家哈尼哈萨克傣黎傈僳佤高山拉祜水东乡纳西景颇柯尔克孜达斡尔仫佬羌布朗撒拉毛南仡佬锡伯阿昌普米塔吉克怒乌孜别克俄罗斯鄂温克德昂保安裕固京塔塔尔独龙鄂伦春赫哲门巴珞巴基诺男女住址出生公民身份号码签发机关有效期省市区县乡镇街道号楼单元室ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz',
|
|
52
|
+
tessedit_pageseg_mode: 7, // PSM_SINGLE_LINE
|
|
53
|
+
preserve_interword_spaces: '1'
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
// 识别图像
|
|
57
|
+
const { data } = await worker.recognize(input.imageBase64);
|
|
58
|
+
|
|
59
|
+
// 解析身份证信息
|
|
60
|
+
const idCardInfo = parseIDCardText(data.text);
|
|
61
|
+
|
|
62
|
+
// 释放Worker资源
|
|
63
|
+
await worker.terminate();
|
|
64
|
+
|
|
65
|
+
const processingTime = performance.now() - startTime;
|
|
66
|
+
|
|
67
|
+
return { idCardInfo, processingTime };
|
|
68
|
+
} catch (error) {
|
|
69
|
+
console.error('OCR处理错误:', error);
|
|
70
|
+
return {
|
|
71
|
+
idCardInfo: {} as IDCardInfo,
|
|
72
|
+
processingTime: performance.now() - startTime
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* 解析身份证文本
|
|
79
|
+
* @param text OCR识别的文本
|
|
80
|
+
* @returns 解析后的身份证信息
|
|
81
|
+
*/
|
|
82
|
+
function parseIDCardText(text: string): IDCardInfo {
|
|
83
|
+
const info: IDCardInfo = {};
|
|
84
|
+
|
|
85
|
+
// 预处理文本,清除多余空白
|
|
86
|
+
const processedText = text.replace(/\s+/g, ' ').trim();
|
|
87
|
+
|
|
88
|
+
// 解析身份证号码
|
|
89
|
+
const idNumberRegex = /(\d{17}[\dX])/;
|
|
90
|
+
const idNumberWithPrefixRegex = /公民身份号码[\s\:]*(\d{17}[\dX])/;
|
|
91
|
+
|
|
92
|
+
const basicMatch = processedText.match(idNumberRegex);
|
|
93
|
+
const prefixMatch = processedText.match(idNumberWithPrefixRegex);
|
|
94
|
+
|
|
95
|
+
if (prefixMatch && prefixMatch[1]) {
|
|
96
|
+
info.idNumber = prefixMatch[1];
|
|
97
|
+
} else if (basicMatch && basicMatch[1]) {
|
|
98
|
+
info.idNumber = basicMatch[1];
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// 解析姓名
|
|
102
|
+
const nameWithLabelRegex = /姓名[\s\:]*([一-龥]{2,4})/;
|
|
103
|
+
const nameMatch = processedText.match(nameWithLabelRegex);
|
|
104
|
+
|
|
105
|
+
if (nameMatch && nameMatch[1]) {
|
|
106
|
+
info.name = nameMatch[1].trim();
|
|
107
|
+
} else {
|
|
108
|
+
// 备用方案:查找短行且内容全是汉字
|
|
109
|
+
const lines = processedText.split('\n').filter(line => line.trim());
|
|
110
|
+
for (const line of lines) {
|
|
111
|
+
if (
|
|
112
|
+
line.length >= 2 &&
|
|
113
|
+
line.length <= 5 &&
|
|
114
|
+
/^[一-龥]+$/.test(line) &&
|
|
115
|
+
!/性别|民族|住址|公民|签发|有效/.test(line)
|
|
116
|
+
) {
|
|
117
|
+
info.name = line.trim();
|
|
118
|
+
break;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// 解析性别和民族
|
|
124
|
+
const genderAndNationalityRegex = /性别[\s\:]*([男女])[\s ]*民族[\s\:]*([一-龥]+族)/;
|
|
125
|
+
const genderOnlyRegex = /性别[\s\:]*([男女])/;
|
|
126
|
+
const nationalityOnlyRegex = /民族[\s\:]*([一-龥]+族)/;
|
|
127
|
+
|
|
128
|
+
const genderNationalityMatch = processedText.match(genderAndNationalityRegex);
|
|
129
|
+
const genderOnlyMatch = processedText.match(genderOnlyRegex);
|
|
130
|
+
const nationalityOnlyMatch = processedText.match(nationalityOnlyRegex);
|
|
131
|
+
|
|
132
|
+
if (genderNationalityMatch) {
|
|
133
|
+
info.gender = genderNationalityMatch[1];
|
|
134
|
+
info.ethnicity = genderNationalityMatch[2];
|
|
135
|
+
} else {
|
|
136
|
+
if (genderOnlyMatch) info.gender = genderOnlyMatch[1];
|
|
137
|
+
if (nationalityOnlyMatch) info.ethnicity = nationalityOnlyMatch[1];
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// 根据内容判断身份证类型
|
|
141
|
+
if (processedText.includes('出生') || processedText.includes('公民身份号码')) {
|
|
142
|
+
info.type = IDCardType.FRONT; // 确保类型为枚举值而不是字符串
|
|
143
|
+
} else if (processedText.includes('签发机关') || processedText.includes('有效期')) {
|
|
144
|
+
info.type = IDCardType.BACK; // 确保类型为枚举值而不是字符串
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// 解析出生日期
|
|
148
|
+
const birthDateRegex1 = /出生[\s\:]*(\d{4})年(\d{1,2})月(\d{1,2})[日号]/;
|
|
149
|
+
const birthDateRegex2 = /出生[\s\:]*(\d{4})[-\/\.](\d{1,2})[-\/\.](\d{1,2})/;
|
|
150
|
+
const birthDateRegex3 = /出生日期[\s\:]*(\d{4})[-\/\.\u5e74](\d{1,2})[-\/\.\u6708](\d{1,2})[日号]?/;
|
|
151
|
+
|
|
152
|
+
let birthDateMatch =
|
|
153
|
+
processedText.match(birthDateRegex1) ||
|
|
154
|
+
processedText.match(birthDateRegex2) ||
|
|
155
|
+
processedText.match(birthDateRegex3);
|
|
156
|
+
|
|
157
|
+
if (!birthDateMatch && info.idNumber && info.idNumber.length === 18) {
|
|
158
|
+
const year = info.idNumber.substring(6, 10);
|
|
159
|
+
const month = info.idNumber.substring(10, 12);
|
|
160
|
+
const day = info.idNumber.substring(12, 14);
|
|
161
|
+
info.birthDate = `${year}-${month}-${day}`;
|
|
162
|
+
} else if (birthDateMatch) {
|
|
163
|
+
const year = birthDateMatch[1];
|
|
164
|
+
const month = birthDateMatch[2].padStart(2, '0');
|
|
165
|
+
const day = birthDateMatch[3].padStart(2, '0');
|
|
166
|
+
info.birthDate = `${year}-${month}-${day}`;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// 解析地址
|
|
170
|
+
const addressRegex1 = /住址[\s\:]*([\s\S]*?)(?=公民身份|出生|性别|签发)/;
|
|
171
|
+
const addressRegex2 = /住址[\s\:]*([一-龥a-zA-Z0-9\s\.\-]+)/;
|
|
172
|
+
|
|
173
|
+
const addressMatch =
|
|
174
|
+
processedText.match(addressRegex1) || processedText.match(addressRegex2);
|
|
175
|
+
|
|
176
|
+
if (addressMatch && addressMatch[1]) {
|
|
177
|
+
info.address = addressMatch[1]
|
|
178
|
+
.replace(/\s+/g, '')
|
|
179
|
+
.replace(/\n/g, '')
|
|
180
|
+
.trim();
|
|
181
|
+
|
|
182
|
+
if (info.address.length > 70) {
|
|
183
|
+
info.address = info.address.substring(0, 70);
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
if (!/[一-龥]/.test(info.address)) {
|
|
187
|
+
info.address = '';
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// 解析签发机关
|
|
192
|
+
const authorityRegex1 = /签发机关[\s\:]*([\s\S]*?)(?=有效|公民|出生|\d{8}|$)/;
|
|
193
|
+
const authorityRegex2 = /签发机关[\s\:]*([一-龥\s]+)/;
|
|
194
|
+
|
|
195
|
+
const authorityMatch =
|
|
196
|
+
processedText.match(authorityRegex1) ||
|
|
197
|
+
processedText.match(authorityRegex2);
|
|
198
|
+
|
|
199
|
+
if (authorityMatch && authorityMatch[1]) {
|
|
200
|
+
info.issueAuthority = authorityMatch[1]
|
|
201
|
+
.replace(/\s+/g, '')
|
|
202
|
+
.replace(/\n/g, '')
|
|
203
|
+
.trim();
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// 解析有效期限
|
|
207
|
+
const validPeriodRegex1 = /有效期限[\s\:]*(\d{4}[-\.\u5e74\s]\d{1,2}[-\.\u6708\s]\d{1,2}[日\s]*)[-\s]*(至|-)[-\s]*(\d{4}[-\.\u5e74\s]\d{1,2}[-\.\u6708\s]\d{1,2}[日]*|[永久长期]*)/;
|
|
208
|
+
const validPeriodRegex2 = /有效期限[\s\:]*(\d{8})[-\s]*(至|-)[-\s]*(\d{8}|[永久长期]*)/;
|
|
209
|
+
|
|
210
|
+
const validPeriodMatch =
|
|
211
|
+
processedText.match(validPeriodRegex1) ||
|
|
212
|
+
processedText.match(validPeriodRegex2);
|
|
213
|
+
|
|
214
|
+
if (validPeriodMatch) {
|
|
215
|
+
if (validPeriodMatch[1] && validPeriodMatch[3]) {
|
|
216
|
+
const startDate = formatDateString(validPeriodMatch[1]);
|
|
217
|
+
const endDate = /\d/.test(validPeriodMatch[3])
|
|
218
|
+
? formatDateString(validPeriodMatch[3])
|
|
219
|
+
: '长期有效';
|
|
220
|
+
|
|
221
|
+
info.validFrom = startDate;
|
|
222
|
+
info.validTo = endDate;
|
|
223
|
+
info.validPeriod = `${startDate}-${endDate}`;
|
|
224
|
+
} else {
|
|
225
|
+
info.validPeriod = validPeriodMatch[0].replace('有效期限', '').trim();
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
return info;
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
/**
|
|
233
|
+
* 格式化日期字符串
|
|
234
|
+
* @param dateStr 原始日期字符串
|
|
235
|
+
* @returns 格式化后的日期字符串
|
|
236
|
+
*/
|
|
237
|
+
function formatDateString(dateStr: string): string {
|
|
238
|
+
// 提取年月日
|
|
239
|
+
const dateMatch = dateStr.match(
|
|
240
|
+
/(\d{4})[-\.\u5e74\s]*(\d{1,2})[-\.\u6708\s]*(\d{1,2})[日]*/
|
|
241
|
+
);
|
|
242
|
+
if (dateMatch) {
|
|
243
|
+
const year = dateMatch[1];
|
|
244
|
+
const month = dateMatch[2].padStart(2, '0');
|
|
245
|
+
const day = dateMatch[3].padStart(2, '0');
|
|
246
|
+
return `${year}-${month}-${day}`;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
// 纯数字格式如 20220101
|
|
250
|
+
if (/^\d{8}$/.test(dateStr)) {
|
|
251
|
+
const year = dateStr.substring(0, 4);
|
|
252
|
+
const month = dateStr.substring(4, 6);
|
|
253
|
+
const day = dateStr.substring(6, 8);
|
|
254
|
+
return `${year}-${month}-${day}`;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
// 无法格式化,返回原始字符串
|
|
258
|
+
return dateStr;
|
|
259
|
+
}
|