id-scanner-lib 1.3.2 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. package/README.md +55 -460
  2. package/dist/id-scanner-lib.esm.js +4641 -0
  3. package/dist/id-scanner-lib.esm.js.map +1 -0
  4. package/dist/id-scanner-lib.js +14755 -0
  5. package/dist/id-scanner-lib.js.map +1 -0
  6. package/dist/types/core/base-module.d.ts +44 -0
  7. package/dist/types/core/camera-manager.d.ts +258 -0
  8. package/dist/types/core/config.d.ts +88 -0
  9. package/dist/types/core/errors.d.ts +111 -0
  10. package/dist/types/core/event-emitter.d.ts +55 -0
  11. package/dist/types/core/logger.d.ts +277 -0
  12. package/dist/types/core/module-manager.d.ts +78 -0
  13. package/dist/types/core/plugin-manager.d.ts +158 -0
  14. package/dist/types/core/resource-manager.d.ts +246 -0
  15. package/dist/types/core/result.d.ts +83 -0
  16. package/dist/types/core/scanner-factory.d.ts +93 -0
  17. package/dist/types/index.bundle.d.ts +1303 -0
  18. package/dist/types/index.d.ts +86 -0
  19. package/dist/types/interfaces/external-types.d.ts +174 -0
  20. package/dist/types/interfaces/face-detection.d.ts +293 -0
  21. package/dist/types/interfaces/scanner-module.d.ts +280 -0
  22. package/dist/types/modules/face/face-detector.d.ts +170 -0
  23. package/dist/types/modules/face/index.d.ts +56 -0
  24. package/dist/types/modules/face/liveness-detector.d.ts +177 -0
  25. package/dist/types/modules/face/types.d.ts +136 -0
  26. package/dist/types/modules/id-card/anti-fake-detector.d.ts +170 -0
  27. package/dist/types/modules/id-card/id-card-detector.d.ts +131 -0
  28. package/dist/types/modules/id-card/index.d.ts +89 -0
  29. package/dist/types/modules/id-card/ocr-processor.d.ts +110 -0
  30. package/dist/types/modules/id-card/ocr-worker.d.ts +31 -0
  31. package/dist/types/modules/id-card/types.d.ts +181 -0
  32. package/dist/types/modules/qrcode/index.d.ts +51 -0
  33. package/dist/types/modules/qrcode/qr-code-scanner.d.ts +64 -0
  34. package/dist/types/modules/qrcode/types.d.ts +67 -0
  35. package/dist/types/utils/camera.d.ts +81 -0
  36. package/dist/types/utils/image-processing.d.ts +176 -0
  37. package/dist/types/utils/index.d.ts +175 -0
  38. package/dist/types/utils/performance.d.ts +81 -0
  39. package/dist/types/utils/resource-manager.d.ts +53 -0
  40. package/dist/types/utils/types.d.ts +166 -0
  41. package/dist/types/utils/worker.d.ts +52 -0
  42. package/dist/types/version.d.ts +7 -0
  43. package/package.json +76 -77
  44. package/src/core/base-module.ts +78 -0
  45. package/src/core/camera-manager.ts +798 -0
  46. package/src/core/config.ts +268 -0
  47. package/src/core/errors.ts +174 -0
  48. package/src/core/event-emitter.ts +110 -0
  49. package/src/core/logger.ts +549 -0
  50. package/src/core/module-manager.ts +165 -0
  51. package/src/core/plugin-manager.ts +429 -0
  52. package/src/core/resource-manager.ts +762 -0
  53. package/src/core/result.ts +163 -0
  54. package/src/core/scanner-factory.ts +237 -0
  55. package/src/index.ts +113 -936
  56. package/src/interfaces/external-types.ts +200 -0
  57. package/src/interfaces/face-detection.ts +309 -0
  58. package/src/interfaces/scanner-module.ts +384 -0
  59. package/src/modules/face/face-detector.ts +931 -0
  60. package/src/modules/face/index.ts +208 -0
  61. package/src/modules/face/liveness-detector.ts +908 -0
  62. package/src/modules/face/types.ts +133 -0
  63. package/src/modules/id-card/anti-fake-detector.ts +732 -0
  64. package/src/modules/id-card/id-card-detector.ts +474 -0
  65. package/src/modules/id-card/index.ts +425 -0
  66. package/src/modules/id-card/ocr-processor.ts +538 -0
  67. package/src/modules/id-card/ocr-worker.ts +259 -0
  68. package/src/modules/id-card/types.ts +178 -0
  69. package/src/modules/qrcode/index.ts +175 -0
  70. package/src/modules/qrcode/qr-code-scanner.ts +230 -0
  71. package/src/modules/qrcode/types.ts +65 -0
  72. package/src/types/browser-image-compression.d.ts +19 -0
  73. package/src/types/tesseract.d.ts +280 -0
  74. package/src/utils/image-processing.ts +432 -49
  75. package/src/utils/index.ts +426 -0
  76. package/src/utils/performance.ts +168 -131
  77. package/src/utils/resource-manager.ts +65 -146
  78. package/src/utils/types.ts +90 -2
  79. package/src/utils/worker.ts +123 -84
  80. package/src/version.ts +11 -0
  81. package/tools/scaffold.js +543 -0
  82. package/dist/id-scanner-core.esm.js +0 -11076
  83. package/dist/id-scanner-core.esm.js.map +0 -1
  84. package/dist/id-scanner-core.js +0 -11088
  85. package/dist/id-scanner-core.js.map +0 -1
  86. package/dist/id-scanner-core.min.js +0 -1
  87. package/dist/id-scanner-core.min.js.map +0 -1
  88. package/dist/id-scanner-ocr.esm.js +0 -1802
  89. package/dist/id-scanner-ocr.esm.js.map +0 -1
  90. package/dist/id-scanner-ocr.js +0 -1811
  91. package/dist/id-scanner-ocr.js.map +0 -1
  92. package/dist/id-scanner-ocr.min.js +0 -1
  93. package/dist/id-scanner-ocr.min.js.map +0 -1
  94. package/dist/id-scanner-qr.esm.js +0 -1023
  95. package/dist/id-scanner-qr.esm.js.map +0 -1
  96. package/dist/id-scanner-qr.js +0 -1032
  97. package/dist/id-scanner-qr.js.map +0 -1
  98. package/dist/id-scanner-qr.min.js +0 -1
  99. package/dist/id-scanner-qr.min.js.map +0 -1
  100. package/dist/id-scanner.js +0 -3740
  101. package/dist/id-scanner.js.map +0 -1
  102. package/dist/id-scanner.min.js +0 -1
  103. package/dist/id-scanner.min.js.map +0 -1
  104. package/src/core.ts +0 -138
  105. package/src/demo/demo.ts +0 -204
  106. package/src/id-recognition/anti-fake-detector.ts +0 -317
  107. package/src/id-recognition/data-extractor.ts +0 -262
  108. package/src/id-recognition/id-detector.ts +0 -363
  109. package/src/id-recognition/ocr-processor.ts +0 -334
  110. package/src/id-recognition/ocr-worker.ts +0 -156
  111. package/src/index-umd.ts +0 -477
  112. package/src/ocr-module.ts +0 -187
  113. package/src/qr-module.ts +0 -179
  114. package/src/scanner/barcode-scanner.ts +0 -251
  115. package/src/scanner/qr-scanner.ts +0 -167
@@ -0,0 +1,538 @@
1
+ /**
2
+ * @file OCR处理器
3
+ * @description 提供身份证OCR识别功能
4
+ * @module modules/id-card/ocr-processor
5
+ */
6
+
7
+ import { EventEmitter } from '../../core/event-emitter';
8
+ import { Logger } from '../../core/logger';
9
+ import { IDCardType, IDCardInfo } from './types';
10
+ import {
11
+ createWorker,
12
+ Worker as TesseractWorker,
13
+ LoggerMessage,
14
+ WorkerOptions,
15
+ } from "tesseract.js" // 导入 Worker 和 LoggerMessage 类型
16
+ import { ImageProcessor } from "../../utils/image-processing"
17
+ import { LRUCache, calculateImageFingerprint } from "../../utils/performance"
18
+ import {
19
+ isWorkerSupported,
20
+ createWorker as createCustomWorker,
21
+ } from "../../utils/worker"
22
+ import { processOCRInWorker, OCRProcessInput } from "./ocr-worker"
23
+ import { Disposable } from "../../utils/resource-manager"
24
+
25
+ // 自定义日志函数类型,兼容字符串和LoggerMessage
26
+ type LoggerFunction = ((message: string | LoggerMessage) => void) | undefined;
27
+
28
+ /**
29
+ * OCR处理器选项接口
30
+ */
31
+ export interface OCRProcessorOptions {
32
+ language?: string
33
+ useWorker?: boolean
34
+ maxImageDimension?: number
35
+ timeout?: number
36
+ brightness?: number // 新增亮度参数
37
+ contrast?: number // 新增对比度参数
38
+ onProgress?: (progress: number) => void
39
+ enableCache?: boolean // 添加启用缓存选项
40
+ cacheSize?: number // 添加缓存大小选项
41
+ logger?: LoggerFunction // 修改为兼容字符串的日志函数类型
42
+ }
43
+
44
+ /**
45
+ * OCR处理器类
46
+ *
47
+ * 使用Tesseract.js实现对身份证图像的OCR文字识别和信息提取功能
48
+ *
49
+ * @example
50
+ * ```typescript
51
+ * // 创建OCR处理器
52
+ * const ocrProcessor = new OCRProcessor();
53
+ *
54
+ * // 初始化OCR引擎
55
+ * await ocrProcessor.initialize();
56
+ *
57
+ * // 处理身份证图像
58
+ * const idInfo = await ocrProcessor.processIDCard(idCardImageData);
59
+ * console.log('识别到的身份证信息:', idInfo);
60
+ *
61
+ * // 使用结束后释放资源
62
+ * await ocrProcessor.terminate();
63
+ * ```
64
+ */
65
+ export class OCRProcessor implements Disposable {
66
+ private worker: TesseractWorker | null = null // 使用导入的 TesseractWorker 类型
67
+ private ocrWorker: ReturnType<
68
+ typeof createCustomWorker<
69
+ OCRProcessInput,
70
+ { idCardInfo: IDCardInfo; processingTime: number }
71
+ >
72
+ > | null = null
73
+ private initialized: boolean = false
74
+ private resultCache: LRUCache<string, IDCardInfo>
75
+ private options: OCRProcessorOptions
76
+
77
+ /**
78
+ * 创建OCR处理器实例
79
+ *
80
+ * @param options OCR处理器选项
81
+ */
82
+ constructor(options: OCRProcessorOptions = {}) {
83
+ this.options = {
84
+ useWorker: isWorkerSupported(),
85
+ enableCache: true,
86
+ cacheSize: 50,
87
+ maxImageDimension: 1000,
88
+ logger: console.log,
89
+ ...options,
90
+ }
91
+
92
+ // 初始化缓存
93
+ this.resultCache = new LRUCache<string, IDCardInfo>(this.options.cacheSize)
94
+ }
95
+
96
+ /**
97
+ * 初始化OCR引擎
98
+ *
99
+ * 加载Tesseract OCR引擎和中文简体语言包,并设置适合身份证识别的参数
100
+ *
101
+ * @returns {Promise<void>} 初始化完成的Promise
102
+ */
103
+ async initialize(): Promise<void> {
104
+ if (this.initialized) return
105
+
106
+ if (this.options.useWorker) {
107
+ // 使用自定义Worker线程处理OCR
108
+ this.ocrWorker = createCustomWorker<
109
+ OCRProcessInput,
110
+ { idCardInfo: IDCardInfo; processingTime: number }
111
+ >(processOCRInWorker as any) // 使用类型断言解决类型不兼容问题
112
+ this.initialized = true
113
+ this.options.logger?.("OCR Worker 初始化完成")
114
+ } else {
115
+ // 使用主线程处理OCR
116
+ this.worker = createWorker({
117
+ logger: this.options.logger,
118
+ })
119
+
120
+ await this.worker.load()
121
+ await this.worker.loadLanguage("chi_sim")
122
+ await this.worker.initialize("chi_sim")
123
+ await this.worker.setParameters({
124
+ tessedit_char_whitelist:
125
+ "0123456789X年月日壹贰叁肆伍陆柒捌玖拾民族汉满回维吾尔藏苗彝壮朝鲜侗瑶白土家哈尼哈萨克傣黎傈僳佤高山拉祜水东乡纳西景颇柯尔克孜达斡尔仫佬羌布朗撒拉毛南仡佬锡伯阿昌普米塔吉克怒乌孜别克俄罗斯鄂温克德昂保安裕固京塔塔尔独龙鄂伦春赫哲门巴珞巴基诺男女住址出生公民身份号码签发机关有效期省市区县乡镇街道号楼单元室ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", // 优化字符白名单,增加常见地址字符,移除部分不常用汉字
126
+ })
127
+ // 增加一些针对性的参数,提高识别率
128
+ await this.worker.setParameters({
129
+ tessedit_pageseg_mode: 7, // PSM_SINGLE_LINE,使用数字而不是字符串
130
+ preserve_interword_spaces: "1", // 保留单词间的空格
131
+ })
132
+
133
+ this.initialized = true
134
+ this.options.logger?.("OCR引擎初始化完成")
135
+ }
136
+ }
137
+
138
+ /**
139
+ * 处理身份证图像并提取信息
140
+ * @param imageData 要处理的身份证图像数据
141
+ * @returns 提取的身份证信息
142
+ */
143
+ async processIDCard(imageData: ImageData): Promise<IDCardInfo> {
144
+ if (!this.initialized) {
145
+ await this.initialize()
146
+ }
147
+
148
+ // 计算图像指纹,用于缓存查找
149
+ if (this.options.enableCache) {
150
+ const fingerprint = calculateImageFingerprint(imageData)
151
+
152
+ // 检查缓存中是否有结果
153
+ const cachedResult = this.resultCache.get(fingerprint)
154
+ if (cachedResult) {
155
+ this.options.logger?.("使用缓存的OCR结果")
156
+ return cachedResult
157
+ }
158
+ }
159
+
160
+ // 调整图像大小以提高性能和准确性
161
+ const downsampledImage = ImageProcessor.resizeImage(
162
+ imageData,
163
+ this.options.maxImageDimension || 1000,
164
+ this.options.maxImageDimension || 1000,
165
+ true // 保持宽高比
166
+ )
167
+
168
+ // 提高图像质量以获得更好的OCR结果
169
+ const enhancedImage = ImageProcessor.batchProcess(downsampledImage, {
170
+ brightness:
171
+ this.options.brightness !== undefined ? this.options.brightness : 10, // 调整默认亮度
172
+ contrast:
173
+ this.options.contrast !== undefined ? this.options.contrast : 20, // 调整默认对比度
174
+ sharpen: true, // 默认启用锐化,通常对OCR有益
175
+ })
176
+
177
+ // 转换为base64供Tesseract处理
178
+ // 创建一个canvas元素
179
+ const canvas = document.createElement("canvas")
180
+ canvas.width = enhancedImage.width
181
+ canvas.height = enhancedImage.height
182
+ const ctx = canvas.getContext("2d")
183
+
184
+ if (!ctx) {
185
+ throw new Error("无法创建canvas上下文")
186
+ }
187
+
188
+ // 将ImageData绘制到canvas
189
+ ctx.putImageData(enhancedImage, 0, 0)
190
+
191
+ // 转换为Base64
192
+ const base64Image = canvas.toDataURL("image/jpeg", 0.7)
193
+
194
+ // OCR识别
195
+ try {
196
+ let idCardInfo: IDCardInfo
197
+
198
+ if (this.options.useWorker && this.ocrWorker) {
199
+ // 使用Worker线程处理
200
+ const result = await this.ocrWorker.postMessage({
201
+ imageBase64: base64Image,
202
+ // 不传递函数对象,避免DataCloneError
203
+ tessWorkerOptions: {},
204
+ })
205
+
206
+ idCardInfo = result.idCardInfo
207
+ this.options.logger?.(
208
+ `OCR处理完成,用时: ${result.processingTime.toFixed(2)}ms`
209
+ )
210
+ } else {
211
+ // 使用主线程处理
212
+ const startTime = performance.now()
213
+
214
+ // 转换ImageData为Canvas
215
+ const canvas = ImageProcessor.imageDataToCanvas(enhancedImage)
216
+
217
+ // 确保worker已初始化
218
+ if (!this.worker) {
219
+ throw new Error("OCR引擎未初始化");
220
+ }
221
+
222
+ const { data } = (await this.worker.recognize(canvas)) as {
223
+ data: { text: string }
224
+ }
225
+
226
+ // 解析身份证信息
227
+ idCardInfo = this.parseIDCardText(data.text)
228
+
229
+ const processingTime = performance.now() - startTime
230
+ this.options.logger?.(
231
+ `OCR处理完成,用时: ${processingTime.toFixed(2)}ms`
232
+ )
233
+ }
234
+
235
+ // 缓存结果
236
+ if (this.options.enableCache) {
237
+ const fingerprint = calculateImageFingerprint(imageData)
238
+ this.resultCache.set(fingerprint, idCardInfo)
239
+ }
240
+
241
+ return idCardInfo
242
+ } catch (error) {
243
+ // 改进错误处理
244
+ const errorMessage = error instanceof Error
245
+ ? error.message
246
+ : typeof error === 'object'
247
+ ? JSON.stringify(error)
248
+ : String(error);
249
+
250
+ this.options.logger?.(`OCR识别错误: ${errorMessage}`);
251
+
252
+ // 返回空对象,避免完全失败
253
+ return {} as IDCardInfo
254
+ }
255
+ }
256
+
257
+ /**
258
+ * 解析身份证文本信息
259
+ *
260
+ * 从OCR识别到的文本中提取结构化的身份证信息
261
+ *
262
+ * @private
263
+ * @param {string} text - OCR识别到的文本
264
+ * @returns {IDCardInfo} 提取到的身份证信息对象
265
+ */
266
+ /**
267
+ * 格式化日期字符串为标准格式 (YYYY-MM-DD)
268
+ * @param dateStr 原始日期字符串
269
+ * @returns 格式化后的日期字符串
270
+ */
271
+ private formatDateString(dateStr: string): string {
272
+ // 先尝试提取年月日
273
+ const dateMatch = dateStr.match(
274
+ /(\d{4})[-\.\u5e74\s]*(\d{1,2})[-\.\u6708\s]*(\d{1,2})[日]*/
275
+ )
276
+ if (dateMatch) {
277
+ const year = dateMatch[1]
278
+ const month = dateMatch[2].padStart(2, "0")
279
+ const day = dateMatch[3].padStart(2, "0")
280
+ return `${year}-${month}-${day}`
281
+ }
282
+
283
+ // 如果是纯数字格式如 20220101
284
+ if (/^\d{8}$/.test(dateStr)) {
285
+ const year = dateStr.substring(0, 4)
286
+ const month = dateStr.substring(4, 6)
287
+ const day = dateStr.substring(6, 8)
288
+ return `${year}-${month}-${day}`
289
+ }
290
+
291
+ // 如果无法格式化,返回原始字符串
292
+ return dateStr
293
+ }
294
+
295
+ /**
296
+ * 验证身份证号是否符合规则
297
+ * @param idNumber 身份证号
298
+ * @returns 是否有效
299
+ */
300
+ private validateIDNumber(idNumber: string): boolean {
301
+ // 基本验证,校验位有效性和长度
302
+ if (!idNumber || idNumber.length !== 18) {
303
+ return false
304
+ }
305
+
306
+ // 检查格式,前17位必须为数字,最后一位可以是数字或'X'
307
+ const pattern = /^\d{17}[\dX]$/
308
+ if (!pattern.test(idNumber)) {
309
+ return false
310
+ }
311
+
312
+ // 检查日期部分
313
+ const year = parseInt(idNumber.substr(6, 4))
314
+ const month = parseInt(idNumber.substr(10, 2))
315
+ const day = parseInt(idNumber.substr(12, 2))
316
+
317
+ if (month < 1 || month > 12 || day < 1 || day > 31) {
318
+ return false
319
+ }
320
+
321
+ // 更详细的检查可以添加校验位的验证等逻辑...
322
+
323
+ return true
324
+ }
325
+
326
+ private parseIDCardText(text: string): IDCardInfo {
327
+ const info: IDCardInfo = {}
328
+
329
+ // 预处理文本,清除多余空白
330
+ const processedText = text.replace(/\s+/g, " ").trim()
331
+
332
+ // 拆分为行,并过滤空行
333
+ const lines = processedText.split("\n").filter((line) => line.trim())
334
+
335
+ // 解析身份证号码 - 多种模式匹配
336
+ // 1. 普通18位身份证号模式
337
+ const idNumberRegex = /(\d{17}[\dX])/
338
+ // 2. 带前缀的模式
339
+ const idNumberWithPrefixRegex = /公民身份号码[\s\:]*(\d{17}[\dX])/
340
+
341
+ // 尝试所有模式
342
+ let idNumber = null
343
+ const basicMatch = processedText.match(idNumberRegex)
344
+ const prefixMatch = processedText.match(idNumberWithPrefixRegex)
345
+
346
+ if (prefixMatch && prefixMatch[1]) {
347
+ idNumber = prefixMatch[1] // 首选带前缀的匹配,因为最可靠
348
+ } else if (basicMatch && basicMatch[1]) {
349
+ idNumber = basicMatch[1] // 其次是常规匹配
350
+ }
351
+
352
+ if (idNumber) {
353
+ info.idNumber = idNumber
354
+ }
355
+
356
+ // 解析姓名 - 使用多种策略
357
+ // 1. 直接匹配姓名标签近的内容
358
+ const nameWithLabelRegex = /姓名[\s\:]*([一-龥]{2,4})/
359
+ const nameMatch = processedText.match(nameWithLabelRegex)
360
+
361
+ // 2. 分析行文本寻找姓名
362
+ if (nameMatch && nameMatch[1]) {
363
+ info.name = nameMatch[1].trim()
364
+ } else {
365
+ // 备用方案:查找短行且内容全是汉字
366
+ for (const line of lines) {
367
+ if (
368
+ line.length >= 2 &&
369
+ line.length <= 5 &&
370
+ /^[一-龥]+$/.test(line) &&
371
+ !/性别|民族|住址|公民|签发|有效/.test(line)
372
+ ) {
373
+ info.name = line.trim()
374
+ break
375
+ }
376
+ }
377
+ }
378
+
379
+ // 解析性别和民族 - 多种模式匹配
380
+ // 1. 标准格式匹配
381
+ const genderAndNationalityRegex =
382
+ /性别[\s\:]*([男女])[\s ]*民族[\s\:]*([一-龥]+族)/
383
+ const genderNationalityMatch = processedText.match(
384
+ genderAndNationalityRegex
385
+ )
386
+
387
+ // 2. 只匹配性别
388
+ const genderOnlyRegex = /性别[\s\:]*([男女])/
389
+ const genderOnlyMatch = processedText.match(genderOnlyRegex)
390
+
391
+ // 3. 只匹配民族
392
+ const nationalityOnlyRegex = /民族[\s\:]*([一-龥]+族)/
393
+ const nationalityOnlyMatch = processedText.match(nationalityOnlyRegex)
394
+
395
+ if (genderNationalityMatch) {
396
+ info.gender = genderNationalityMatch[1]
397
+ info.nationality = genderNationalityMatch[2]
398
+ } else {
399
+ // 分开获取
400
+ if (genderOnlyMatch) info.gender = genderOnlyMatch[1]
401
+ if (nationalityOnlyMatch) info.nationality = nationalityOnlyMatch[1]
402
+ }
403
+
404
+ // 解析出生日期 - 支持多种格式
405
+ // 1. 标准格式:YYYY年MM月DD日
406
+ const birthDateRegex1 = /出生[\s\:]*(\d{4})年(\d{1,2})月(\d{1,2})[日号]/
407
+ // 2. 美式日期格式:YYYY-MM-DD或YYYY/MM/DD
408
+ const birthDateRegex2 = /出生[\s\:]*(\d{4})[-\/\.](\d{1,2})[-\/\.](\d{1,2})/
409
+ // 3. 带前缀的格式
410
+ const birthDateRegex3 =
411
+ /出生日期[\s\:]*(\d{4})[-\/\.\u5e74](\d{1,2})[-\/\.\u6708](\d{1,2})[日号]?/
412
+
413
+ let birthDateMatch =
414
+ processedText.match(birthDateRegex1) ||
415
+ processedText.match(birthDateRegex2) ||
416
+ processedText.match(birthDateRegex3)
417
+
418
+ // 4. 从身份证号码中提取出生日期(如果上述方法失败)
419
+ if (!birthDateMatch && info.idNumber && info.idNumber.length === 18) {
420
+ const year = info.idNumber.substring(6, 10)
421
+ const month = info.idNumber.substring(10, 12)
422
+ const day = info.idNumber.substring(12, 14)
423
+ info.birthDate = `${year}-${month}-${day}`
424
+ } else if (birthDateMatch) {
425
+ // 确保月份和日期是两位数
426
+ const year = birthDateMatch[1]
427
+ const month = birthDateMatch[2].padStart(2, "0")
428
+ const day = birthDateMatch[3].padStart(2, "0")
429
+ info.birthDate = `${year}-${month}-${day}`
430
+ }
431
+
432
+ // 解析地址 - 改进的正则匹配
433
+ // 1. 常规模式
434
+ const addressRegex1 = /住址[\s\:]*([\s\S]*?)(?=公民身份|出生|性别|签发)/
435
+ // 2. 更宽松的模式
436
+ const addressRegex2 = /住址[\s\:]*([一-龥a-zA-Z0-9\s\.\-]+)/
437
+
438
+ const addressMatch =
439
+ processedText.match(addressRegex1) || processedText.match(addressRegex2)
440
+
441
+ if (addressMatch && addressMatch[1]) {
442
+ // 清理地址中的常见错误和多余空格
443
+ info.address = addressMatch[1]
444
+ .replace(/\s+/g, "")
445
+ .replace(/\n/g, "")
446
+ .trim()
447
+
448
+ // 限制地址长度并判断地址合理性
449
+ if (info.address.length > 70) {
450
+ info.address = info.address.substring(0, 70)
451
+ }
452
+
453
+ // 确保地址是合理的(不仅仅包含符号或数字)
454
+ if (!/[一-龥]/.test(info.address)) {
455
+ info.address = "" // 如果没有中文字符,可能不是有效地址
456
+ }
457
+ }
458
+
459
+ // 解析签发机关
460
+ const authorityRegex1 =
461
+ /签发机关[\s\:]*([\s\S]*?)(?=有效|公民|出生|\d{8}|$)/
462
+ const authorityRegex2 = /签发机关[\s\:]*([一-龥\s]+)/
463
+
464
+ const authorityMatch =
465
+ processedText.match(authorityRegex1) ||
466
+ processedText.match(authorityRegex2)
467
+
468
+ if (authorityMatch && authorityMatch[1]) {
469
+ info.issuingAuthority = authorityMatch[1]
470
+ .replace(/\s+/g, "")
471
+ .replace(/\n/g, "")
472
+ .trim()
473
+ }
474
+
475
+ // 解析有效期限 - 支持多种格式
476
+ // 1. 常规格式:YYYY.MM.DD-YYYY.MM.DD
477
+ const validPeriodRegex1 =
478
+ /有效期限[\s\:]*(\d{4}[-\.\u5e74\s]\d{1,2}[-\.\u6708\s]\d{1,2}[日\s]*)[-\s]*(至|-)[-\s]*(\d{4}[-\.\u5e74\s]\d{1,2}[-\.\u6708\s]\d{1,2}[日]*|[永久长期]*)/
479
+ // 2. 简化格式:YYYYMMDD-YYYYMMDD
480
+ const validPeriodRegex2 =
481
+ /有效期限[\s\:]*(\d{8})[-\s]*(至|-)[-\s]*(\d{8}|[永久长期]*)/
482
+
483
+ const validPeriodMatch =
484
+ processedText.match(validPeriodRegex1) ||
485
+ processedText.match(validPeriodRegex2)
486
+
487
+ if (validPeriodMatch) {
488
+ // 格式化为统一的有效期限形式
489
+ if (validPeriodMatch[1] && validPeriodMatch[3]) {
490
+ const startDate = this.formatDateString(validPeriodMatch[1])
491
+ const endDate = /\d/.test(validPeriodMatch[3])
492
+ ? this.formatDateString(validPeriodMatch[3])
493
+ : "长期有效"
494
+
495
+ info.validPeriod = `${startDate}-${endDate}`
496
+ } else {
497
+ info.validPeriod = validPeriodMatch[0].replace("有效期限", "").trim()
498
+ }
499
+ }
500
+
501
+ return info
502
+ }
503
+
504
+ /**
505
+ * 清除结果缓存
506
+ */
507
+ clearCache(): void {
508
+ this.resultCache.clear()
509
+ this.options.logger?.("OCR结果缓存已清除")
510
+ }
511
+
512
+ /**
513
+ * 终止OCR引擎并释放资源
514
+ *
515
+ * @returns {Promise<void>} 终止完成的Promise
516
+ */
517
+ async terminate(): Promise<void> {
518
+ if (this.worker) {
519
+ await this.worker.terminate()
520
+ this.worker = null
521
+ }
522
+
523
+ if (this.ocrWorker) {
524
+ this.ocrWorker.terminate()
525
+ this.ocrWorker = null
526
+ }
527
+
528
+ this.initialized = false
529
+ this.options.logger?.("OCR引擎已终止")
530
+ }
531
+
532
+ /**
533
+ * 释放资源
534
+ */
535
+ dispose(): Promise<void> {
536
+ return this.terminate()
537
+ }
538
+ }