id-scanner-lib 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,146 @@
1
+ /**
2
+ * @file OCR Worker处理模块
3
+ * @description 用于在Web Worker中执行OCR处理
4
+ * @module OCRWorker
5
+ */
6
+
7
+ import type { IDCardInfo } from '../utils/types';
8
+
9
+ /**
10
+ * OCR处理输入接口
11
+ */
12
+ export interface OCRProcessInput {
13
+ imageBase64: string;
14
+ tessWorkerOptions?: any;
15
+ }
16
+
17
+ /**
18
+ * OCR处理输出接口
19
+ */
20
+ export interface OCRProcessOutput {
21
+ idCardInfo: IDCardInfo;
22
+ processingTime: number;
23
+ }
24
+
25
+ /**
26
+ * 在Web Worker中执行OCR处理的函数
27
+ *
28
+ * 该函数用于在使用 createWorker 创建的 Worker 中执行
29
+ *
30
+ * @param input OCR处理输入数据
31
+ * @returns OCR处理结果
32
+ */
33
+ export async function processOCRInWorker(input: OCRProcessInput): Promise<OCRProcessOutput> {
34
+ // 计时开始
35
+ const startTime = performance.now();
36
+
37
+ // 加载Tesseract.js (Worker 环境下动态导入)
38
+ const { createWorker } = await import('tesseract.js');
39
+
40
+ // 创建OCR Worker
41
+ const worker = createWorker(input.tessWorkerOptions || {
42
+ logger: (m: any) => console.log(m)
43
+ });
44
+
45
+ try {
46
+ // 初始化OCR引擎
47
+ await worker.load();
48
+ await worker.loadLanguage('chi_sim');
49
+ await worker.initialize('chi_sim');
50
+ await worker.setParameters({
51
+ tessedit_char_whitelist: '0123456789X-年月日一二三四五六七八九十零壹贰叁肆伍陆柒捌玖拾ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz民族汉族满族回族维吾尔族藏族苗族彝族壮族朝鲜族侗族瑶族白族土家族哈尼族哈萨克族傣族黎族傈僳族佤族高山族拉祜族水族东乡族钠西族景颇族柯尔克孜族士族达斡尔族仫佬族羌族布朗族撒拉族毛南族仡佬族锡伯族阿昌族普米族塔吉克族怒族乌孜别克族俄罗斯族鄂温克族德昂族保安族裕固族京族塔塔尔族独龙族鄂伦春族赫哲族门巴族珞巴族基诺族男女性别住址出生公民身份号码签发机关有效期'
52
+ });
53
+
54
+ // 识别图像
55
+ const { data } = await worker.recognize(input.imageBase64);
56
+
57
+ // 解析识别结果
58
+ const idCardInfo = parseIDCardText(data.text);
59
+
60
+ // 处理完成后终止worker
61
+ await worker.terminate();
62
+
63
+ // 计算处理时间
64
+ const processingTime = performance.now() - startTime;
65
+
66
+ // 返回处理结果
67
+ return {
68
+ idCardInfo,
69
+ processingTime
70
+ };
71
+ } catch (error) {
72
+ // 确保资源被释放
73
+ await worker.terminate();
74
+ throw error;
75
+ }
76
+ }
77
+
78
+ /**
79
+ * 解析身份证文本信息
80
+ *
81
+ * 从OCR识别到的文本中提取结构化的身份证信息
82
+ *
83
+ * @private
84
+ * @param {string} text - OCR识别到的文本
85
+ * @returns {IDCardInfo} 提取到的身份证信息对象
86
+ */
87
+ function parseIDCardText(text: string): IDCardInfo {
88
+ const info: IDCardInfo = {};
89
+
90
+ // 拆分为行
91
+ const lines = text.split('\n').filter(line => line.trim());
92
+
93
+ // 解析身份证号码(最容易识别的部分)
94
+ const idNumberRegex = /(\d{17}[\dX])/;
95
+ const idNumberMatch = text.match(idNumberRegex);
96
+ if (idNumberMatch) {
97
+ info.idNumber = idNumberMatch[1];
98
+ }
99
+
100
+ // 解析姓名
101
+ for (const line of lines) {
102
+ if (line.includes('姓名') || line.length < 10 && line.length > 1 && !/\d/.test(line)) {
103
+ info.name = line.replace('姓名', '').trim();
104
+ break;
105
+ }
106
+ }
107
+
108
+ // 解析性别和民族
109
+ const genderNationalityRegex = /(男|女).*(族)/;
110
+ const genderMatch = text.match(genderNationalityRegex);
111
+ if (genderMatch) {
112
+ info.gender = genderMatch[1];
113
+ const nationalityText = genderMatch[0];
114
+ info.nationality = nationalityText.substring(nationalityText.indexOf(genderMatch[1]) + 1).trim();
115
+ }
116
+
117
+ // 解析出生日期
118
+ const birthDateRegex = /(\d{4})年(\d{1,2})月(\d{1,2})日/;
119
+ const birthDateMatch = text.match(birthDateRegex);
120
+ if (birthDateMatch) {
121
+ info.birthDate = `${birthDateMatch[1]}-${birthDateMatch[2]}-${birthDateMatch[3]}`;
122
+ }
123
+
124
+ // 解析地址
125
+ const addressRegex = /住址([\s\S]*?)公民身份号码/;
126
+ const addressMatch = text.match(addressRegex);
127
+ if (addressMatch) {
128
+ info.address = addressMatch[1].replace(/\n/g, '').trim();
129
+ }
130
+
131
+ // 解析签发机关
132
+ const authorityRegex = /签发机关([\s\S]*?)有效期/;
133
+ const authorityMatch = text.match(authorityRegex);
134
+ if (authorityMatch) {
135
+ info.issuingAuthority = authorityMatch[1].replace(/\n/g, '').trim();
136
+ }
137
+
138
+ // 解析有效期限
139
+ const validPeriodRegex = /有效期限([\s\S]*?)(-|至)/;
140
+ const validPeriodMatch = text.match(validPeriodRegex);
141
+ if (validPeriodMatch) {
142
+ info.validPeriod = validPeriodMatch[0].replace('有效期限', '').trim();
143
+ }
144
+
145
+ return info;
146
+ }
@@ -154,4 +154,208 @@ export class ImageProcessor {
154
154
 
155
155
  return imageData;
156
156
  }
157
+
158
+ /**
159
+ * 降低图像分辨率以提高处理速度
160
+ *
161
+ * 对于OCR和图像分析,降低分辨率可以在保持识别率的同时大幅提升处理速度
162
+ *
163
+ * @param {ImageData} imageData - 原图像数据
164
+ * @param {number} [maxDimension=1000] - 目标最大尺寸(宽或高)
165
+ * @returns {ImageData} 处理后的图像数据
166
+ */
167
+ static downsampleForProcessing(imageData: ImageData, maxDimension: number = 1000): ImageData {
168
+ const { width, height } = imageData;
169
+
170
+ // 如果图像尺寸已经小于或等于目标尺寸,则无需处理
171
+ if (width <= maxDimension && height <= maxDimension) {
172
+ return imageData;
173
+ }
174
+
175
+ // 计算缩放比例,保持宽高比
176
+ const scale = maxDimension / Math.max(width, height);
177
+ const newWidth = Math.round(width * scale);
178
+ const newHeight = Math.round(height * scale);
179
+
180
+ // 调整图像大小
181
+ return this.resize(imageData, newWidth, newHeight);
182
+ }
183
+
184
+ /**
185
+ * 转换图像为Base64格式,方便在Worker线程中传递
186
+ *
187
+ * @param {ImageData} imageData - 原图像数据
188
+ * @returns {string} base64编码的图像数据
189
+ */
190
+ static imageDataToBase64(imageData: ImageData): string {
191
+ const canvas = this.imageDataToCanvas(imageData);
192
+ return canvas.toDataURL('image/jpeg', 0.7); // 使用较低质量的JPEG格式减少数据量
193
+ }
194
+
195
+ /**
196
+ * 从Base64字符串还原图像数据
197
+ *
198
+ * @param {string} base64 - base64编码的图像数据
199
+ * @returns {Promise<ImageData>} 还原的图像数据
200
+ */
201
+ static async base64ToImageData(base64: string): Promise<ImageData> {
202
+ return new Promise((resolve, reject) => {
203
+ const img = new Image();
204
+ img.onload = () => {
205
+ const canvas = document.createElement('canvas');
206
+ canvas.width = img.width;
207
+ canvas.height = img.height;
208
+ const ctx = canvas.getContext('2d');
209
+
210
+ if (!ctx) {
211
+ reject(new Error('无法创建Canvas上下文'));
212
+ return;
213
+ }
214
+
215
+ ctx.drawImage(img, 0, 0);
216
+ const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
217
+ resolve(imageData);
218
+ };
219
+
220
+ img.onerror = () => {
221
+ reject(new Error('图像加载失败'));
222
+ };
223
+
224
+ img.src = base64;
225
+ });
226
+ }
227
+
228
+ /**
229
+ * 使用Web Worker并行处理图像
230
+ * 此方法将图像分割为多个部分,并行处理以提高性能
231
+ *
232
+ * @param {ImageData} imageData - 原图像数据
233
+ * @param {Function} processingFunction - 处理函数,接收ImageData返回ImageData
234
+ * @param {number} [chunks=4] - 分割的块数
235
+ * @returns {Promise<ImageData>} 处理后的图像数据
236
+ */
237
+ static async processImageInParallel(
238
+ imageData: ImageData,
239
+ processingFunction: (imgData: ImageData) => ImageData,
240
+ chunks: number = 4
241
+ ): Promise<ImageData> {
242
+ // 如果不支持Worker或图像太小,直接处理
243
+ if (typeof Worker === 'undefined' || imageData.width * imageData.height < 100000) {
244
+ return processingFunction(imageData);
245
+ }
246
+
247
+ // 创建结果canvas
248
+ const resultCanvas = document.createElement('canvas');
249
+ resultCanvas.width = imageData.width;
250
+ resultCanvas.height = imageData.height;
251
+ const resultCtx = resultCanvas.getContext('2d');
252
+
253
+ if (!resultCtx) {
254
+ throw new Error('无法创建Canvas上下文');
255
+ }
256
+
257
+ // 根据图像特性确定分割方向和每块大小
258
+ const isWide = imageData.width > imageData.height;
259
+ const chunkSize = Math.floor((isWide ? imageData.width : imageData.height) / chunks);
260
+
261
+ // 创建Worker处理每个块
262
+ const promises = [];
263
+
264
+ for (let i = 0; i < chunks; i++) {
265
+ const chunkCanvas = document.createElement('canvas');
266
+ const chunkCtx = chunkCanvas.getContext('2d');
267
+
268
+ if (!chunkCtx) continue;
269
+
270
+ let chunkImageData;
271
+
272
+ if (isWide) {
273
+ // 水平分割
274
+ const startX = i * chunkSize;
275
+ const width = (i === chunks - 1) ? imageData.width - startX : chunkSize;
276
+
277
+ chunkCanvas.width = width;
278
+ chunkCanvas.height = imageData.height;
279
+
280
+ // 复制原图像数据到分块
281
+ const tempCanvas = this.imageDataToCanvas(imageData);
282
+ chunkCtx.drawImage(
283
+ tempCanvas,
284
+ startX, 0, width, imageData.height,
285
+ 0, 0, width, imageData.height
286
+ );
287
+
288
+ chunkImageData = chunkCtx.getImageData(0, 0, width, imageData.height);
289
+ } else {
290
+ // 垂直分割
291
+ const startY = i * chunkSize;
292
+ const height = (i === chunks - 1) ? imageData.height - startY : chunkSize;
293
+
294
+ chunkCanvas.width = imageData.width;
295
+ chunkCanvas.height = height;
296
+
297
+ // 复制原图像数据到分块
298
+ const tempCanvas = this.imageDataToCanvas(imageData);
299
+ chunkCtx.drawImage(
300
+ tempCanvas,
301
+ 0, startY, imageData.width, height,
302
+ 0, 0, imageData.width, height
303
+ );
304
+
305
+ chunkImageData = chunkCtx.getImageData(0, 0, imageData.width, height);
306
+ }
307
+
308
+ // 使用Worker处理
309
+ const workerCode = `
310
+ self.onmessage = function(e) {
311
+ const imageData = e.data.imageData;
312
+ const processingFunction = ${processingFunction.toString()};
313
+ const result = processingFunction(imageData);
314
+ self.postMessage({ result, index: e.data.index }, [result.data.buffer]);
315
+ }
316
+ `;
317
+
318
+ const blob = new Blob([workerCode], { type: 'application/javascript' });
319
+ const workerUrl = URL.createObjectURL(blob);
320
+ const worker = new Worker(workerUrl);
321
+
322
+ const promise = new Promise<{ result: ImageData, index: number }>((resolve) => {
323
+ worker.onmessage = function(e) {
324
+ resolve(e.data);
325
+ worker.terminate();
326
+ URL.revokeObjectURL(workerUrl);
327
+ };
328
+
329
+ // 传输数据
330
+ worker.postMessage({
331
+ imageData: chunkImageData,
332
+ index: i
333
+ }, [chunkImageData.data.buffer]);
334
+ });
335
+
336
+ promises.push(promise);
337
+ }
338
+
339
+ // 等待所有Worker完成并组合结果
340
+ const results = await Promise.all(promises);
341
+
342
+ // 按索引排序结果
343
+ results.sort((a, b) => a.index - b.index);
344
+
345
+ // 将处理后的块绘制到结果canvas
346
+ for (let i = 0; i < results.length; i++) {
347
+ const { result } = results[i];
348
+ const tempCanvas = this.imageDataToCanvas(result);
349
+
350
+ if (isWide) {
351
+ const startX = i * chunkSize;
352
+ resultCtx.drawImage(tempCanvas, startX, 0);
353
+ } else {
354
+ const startY = i * chunkSize;
355
+ resultCtx.drawImage(tempCanvas, 0, startY);
356
+ }
357
+ }
358
+
359
+ return resultCtx.getImageData(0, 0, imageData.width, imageData.height);
360
+ }
157
361
  }
@@ -0,0 +1,208 @@
1
+ /**
2
+ * @file 性能优化工具类
3
+ * @description 提供节流、防抖、缓存等性能优化功能
4
+ * @module PerformanceUtils
5
+ */
6
+
7
+ /**
8
+ * 节流函数:限制函数在一定时间内只能执行一次
9
+ *
10
+ * @param fn 需要节流的函数
11
+ * @param delay 延迟时间(毫秒)
12
+ * @returns 节流处理后的函数
13
+ */
14
+ export function throttle<T extends (...args: any[]) => any>(
15
+ fn: T,
16
+ delay: number
17
+ ): (...args: Parameters<T>) => void {
18
+ let lastCall = 0;
19
+ let timeoutId: number | null = null;
20
+
21
+ return function(...args: Parameters<T>) {
22
+ const now = Date.now();
23
+ const remaining = delay - (now - lastCall);
24
+
25
+ if (remaining <= 0) {
26
+ if (timeoutId) {
27
+ clearTimeout(timeoutId);
28
+ timeoutId = null;
29
+ }
30
+ lastCall = now;
31
+ fn.apply(this, args);
32
+ } else if (!timeoutId) {
33
+ timeoutId = window.setTimeout(() => {
34
+ lastCall = Date.now();
35
+ timeoutId = null;
36
+ fn.apply(this, args);
37
+ }, remaining);
38
+ }
39
+ };
40
+ }
41
+
42
+ /**
43
+ * 防抖函数:函数在最后一次调用后延迟指定时间执行
44
+ *
45
+ * @param fn 需要防抖的函数
46
+ * @param delay 延迟时间(毫秒)
47
+ * @returns 防抖处理后的函数
48
+ */
49
+ export function debounce<T extends (...args: any[]) => any>(
50
+ fn: T,
51
+ delay: number
52
+ ): (...args: Parameters<T>) => void {
53
+ let timeoutId: number | null = null;
54
+
55
+ return function(...args: Parameters<T>) {
56
+ if (timeoutId) {
57
+ clearTimeout(timeoutId);
58
+ }
59
+
60
+ timeoutId = window.setTimeout(() => {
61
+ fn.apply(this, args);
62
+ timeoutId = null;
63
+ }, delay);
64
+ };
65
+ }
66
+
67
+ /**
68
+ * LRU缓存类 - 使用最近最少使用策略的缓存实现
69
+ */
70
+ export class LRUCache<K, V> {
71
+ private cache = new Map<K, V>();
72
+
73
+ /**
74
+ * 构造LRU缓存
75
+ * @param maxSize 缓存最大容量
76
+ */
77
+ constructor(private maxSize: number = 100) {}
78
+
79
+ /**
80
+ * 获取缓存项
81
+ * @param key 缓存键
82
+ * @returns 缓存值或undefined
83
+ */
84
+ get(key: K): V | undefined {
85
+ if (!this.cache.has(key)) {
86
+ return undefined;
87
+ }
88
+
89
+ // 获取值
90
+ const value = this.cache.get(key)!;
91
+
92
+ // 将项移至最新位置(删除后重新添加)
93
+ this.cache.delete(key);
94
+ this.cache.set(key, value);
95
+
96
+ return value;
97
+ }
98
+
99
+ /**
100
+ * 设置缓存项
101
+ * @param key 缓存键
102
+ * @param value 缓存值
103
+ */
104
+ set(key: K, value: V): void {
105
+ // 如果键已存在,需要先删除
106
+ if (this.cache.has(key)) {
107
+ this.cache.delete(key);
108
+ }
109
+
110
+ // 如果缓存已满,移除最老的项
111
+ if (this.cache.size >= this.maxSize) {
112
+ const oldestKey = this.cache.keys().next().value;
113
+ this.cache.delete(oldestKey);
114
+ }
115
+
116
+ // 添加新项
117
+ this.cache.set(key, value);
118
+ }
119
+
120
+ /**
121
+ * 删除缓存项
122
+ * @param key 缓存键
123
+ * @returns 是否成功删除
124
+ */
125
+ delete(key: K): boolean {
126
+ return this.cache.delete(key);
127
+ }
128
+
129
+ /**
130
+ * 清空缓存
131
+ */
132
+ clear(): void {
133
+ this.cache.clear();
134
+ }
135
+
136
+ /**
137
+ * 获取当前缓存大小
138
+ */
139
+ get size(): number {
140
+ return this.cache.size;
141
+ }
142
+
143
+ /**
144
+ * 检查键是否存在
145
+ * @param key 缓存键
146
+ */
147
+ has(key: K): boolean {
148
+ return this.cache.has(key);
149
+ }
150
+ }
151
+
152
+ /**
153
+ * 图像指纹计算函数 - 用于检测相同或相似图像
154
+ *
155
+ * @param imageData 图像数据
156
+ * @param size 指纹尺寸(默认8x8)
157
+ * @returns 图像指纹字符串
158
+ */
159
+ export function calculateImageFingerprint(imageData: ImageData, size: number = 8): string {
160
+ // 1. 缩小图像到指定尺寸
161
+ const canvas = document.createElement('canvas');
162
+ canvas.width = size;
163
+ canvas.height = size;
164
+ const ctx = canvas.getContext('2d');
165
+
166
+ if (!ctx) {
167
+ return '';
168
+ }
169
+
170
+ // 创建一个临时canvas来绘制原始imageData
171
+ const tempCanvas = document.createElement('canvas');
172
+ tempCanvas.width = imageData.width;
173
+ tempCanvas.height = imageData.height;
174
+ const tempCtx = tempCanvas.getContext('2d');
175
+
176
+ if (!tempCtx) {
177
+ return '';
178
+ }
179
+
180
+ tempCtx.putImageData(imageData, 0, 0);
181
+
182
+ // 缩小到目标尺寸
183
+ ctx.drawImage(tempCanvas, 0, 0, imageData.width, imageData.height, 0, 0, size, size);
184
+
185
+ // 2. 转换为灰度
186
+ const smallImgData = ctx.getImageData(0, 0, size, size);
187
+ const grayValues = [];
188
+
189
+ for (let i = 0; i < smallImgData.data.length; i += 4) {
190
+ const r = smallImgData.data[i];
191
+ const g = smallImgData.data[i + 1];
192
+ const b = smallImgData.data[i + 2];
193
+ // 转为灰度: 0.299r + 0.587g + 0.114b
194
+ const gray = Math.round(0.299 * r + 0.587 * g + 0.114 * b);
195
+ grayValues.push(gray);
196
+ }
197
+
198
+ // 3. 计算平均值
199
+ const avg = grayValues.reduce((sum, val) => sum + val, 0) / grayValues.length;
200
+
201
+ // 4. 比较每个像素与平均值,生成二进制指纹
202
+ let fingerprint = '';
203
+ for (const gray of grayValues) {
204
+ fingerprint += gray >= avg ? '1' : '0';
205
+ }
206
+
207
+ return fingerprint;
208
+ }