secure-redact 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +51 -58
- package/dist/lib.d.ts +1 -1
- package/dist/secure-redact.es.js +8623 -7028
- package/dist/secure-redact.es.js.map +1 -1
- package/dist/secure-redact.umd.js +1578 -28
- package/dist/secure-redact.umd.js.map +1 -1
- package/package.json +1 -1
- package/dist/assets/advanced.worker-DSVrF0gl.js +0 -890
- package/dist/assets/nlp.worker-u7Lr_A3c.js +0 -521
- package/dist/assets/ocr.worker-D5s6dY7M.js +0 -139
|
@@ -1,521 +0,0 @@
|
|
|
1
|
-
const f = /* @__PURE__ */ new Set([
|
|
2
|
-
"aarav",
|
|
3
|
-
"aditi",
|
|
4
|
-
"aditya",
|
|
5
|
-
"akash",
|
|
6
|
-
"amit",
|
|
7
|
-
"amita",
|
|
8
|
-
"ananya",
|
|
9
|
-
"anil",
|
|
10
|
-
"anita",
|
|
11
|
-
"anjali",
|
|
12
|
-
"ankita",
|
|
13
|
-
"arjun",
|
|
14
|
-
"arun",
|
|
15
|
-
"aruna",
|
|
16
|
-
"ashok",
|
|
17
|
-
"bhavna",
|
|
18
|
-
"chandra",
|
|
19
|
-
"deepak",
|
|
20
|
-
"deepika",
|
|
21
|
-
"dev",
|
|
22
|
-
"devika",
|
|
23
|
-
"dhruv",
|
|
24
|
-
"dinesh",
|
|
25
|
-
"divya",
|
|
26
|
-
"ganesh",
|
|
27
|
-
"gaurav",
|
|
28
|
-
"geeta",
|
|
29
|
-
"hari",
|
|
30
|
-
"harish",
|
|
31
|
-
"indira",
|
|
32
|
-
"isha",
|
|
33
|
-
"jagdish",
|
|
34
|
-
"kamala",
|
|
35
|
-
"karan",
|
|
36
|
-
"kavita",
|
|
37
|
-
"kishore",
|
|
38
|
-
"krishna",
|
|
39
|
-
"kumar",
|
|
40
|
-
"lakshmi",
|
|
41
|
-
"mahesh",
|
|
42
|
-
"manish",
|
|
43
|
-
"meera",
|
|
44
|
-
"mohan",
|
|
45
|
-
"mohit",
|
|
46
|
-
"nandini",
|
|
47
|
-
"naresh",
|
|
48
|
-
"neha",
|
|
49
|
-
"nikhil",
|
|
50
|
-
"nisha",
|
|
51
|
-
"pankaj",
|
|
52
|
-
"pooja",
|
|
53
|
-
"prakash",
|
|
54
|
-
"priya",
|
|
55
|
-
"rahul",
|
|
56
|
-
"rajesh",
|
|
57
|
-
"rajiv",
|
|
58
|
-
"raman",
|
|
59
|
-
"ramesh",
|
|
60
|
-
"rani",
|
|
61
|
-
"ravi",
|
|
62
|
-
"rekha",
|
|
63
|
-
"rohit",
|
|
64
|
-
"sachin",
|
|
65
|
-
"sandeep",
|
|
66
|
-
"sanjay",
|
|
67
|
-
"sapna",
|
|
68
|
-
"saroj",
|
|
69
|
-
"seema",
|
|
70
|
-
"shanti",
|
|
71
|
-
"sharma",
|
|
72
|
-
"shivani",
|
|
73
|
-
"shobha",
|
|
74
|
-
"shreya",
|
|
75
|
-
"sita",
|
|
76
|
-
"sneha",
|
|
77
|
-
"sunil",
|
|
78
|
-
"sunita",
|
|
79
|
-
"suresh",
|
|
80
|
-
"swati",
|
|
81
|
-
"tanvi",
|
|
82
|
-
"usha",
|
|
83
|
-
"varun",
|
|
84
|
-
"vijay",
|
|
85
|
-
"vikram",
|
|
86
|
-
"vinod",
|
|
87
|
-
"vishal",
|
|
88
|
-
"vivek",
|
|
89
|
-
"yash",
|
|
90
|
-
"yogesh",
|
|
91
|
-
"john",
|
|
92
|
-
"james",
|
|
93
|
-
"robert",
|
|
94
|
-
"michael",
|
|
95
|
-
"william",
|
|
96
|
-
"david",
|
|
97
|
-
"richard",
|
|
98
|
-
"joseph",
|
|
99
|
-
"thomas",
|
|
100
|
-
"charles",
|
|
101
|
-
"mary",
|
|
102
|
-
"patricia",
|
|
103
|
-
"jennifer",
|
|
104
|
-
"linda",
|
|
105
|
-
"elizabeth",
|
|
106
|
-
"barbara",
|
|
107
|
-
"susan",
|
|
108
|
-
"jessica",
|
|
109
|
-
"sarah",
|
|
110
|
-
"karen",
|
|
111
|
-
"mohammed",
|
|
112
|
-
"ahmed",
|
|
113
|
-
"ali",
|
|
114
|
-
"hassan",
|
|
115
|
-
"hussein",
|
|
116
|
-
"omar",
|
|
117
|
-
"fatima",
|
|
118
|
-
"aisha",
|
|
119
|
-
"zainab",
|
|
120
|
-
"khadija"
|
|
121
|
-
]), m = /* @__PURE__ */ new Set([
|
|
122
|
-
"sharma",
|
|
123
|
-
"verma",
|
|
124
|
-
"gupta",
|
|
125
|
-
"singh",
|
|
126
|
-
"kumar",
|
|
127
|
-
"patel",
|
|
128
|
-
"joshi",
|
|
129
|
-
"mishra",
|
|
130
|
-
"agarwal",
|
|
131
|
-
"mehta",
|
|
132
|
-
"reddy",
|
|
133
|
-
"rao",
|
|
134
|
-
"nair",
|
|
135
|
-
"menon",
|
|
136
|
-
"pillai",
|
|
137
|
-
"iyer",
|
|
138
|
-
"iyengar",
|
|
139
|
-
"mukherjee",
|
|
140
|
-
"chatterjee",
|
|
141
|
-
"banerjee",
|
|
142
|
-
"das",
|
|
143
|
-
"bose",
|
|
144
|
-
"sen",
|
|
145
|
-
"ghosh",
|
|
146
|
-
"roy",
|
|
147
|
-
"dutta",
|
|
148
|
-
"sinha",
|
|
149
|
-
"jain",
|
|
150
|
-
"shah",
|
|
151
|
-
"desai",
|
|
152
|
-
"kulkarni",
|
|
153
|
-
"patil",
|
|
154
|
-
"deshpande",
|
|
155
|
-
"kaur",
|
|
156
|
-
"gill",
|
|
157
|
-
"bajwa",
|
|
158
|
-
"chopra",
|
|
159
|
-
"kapoor",
|
|
160
|
-
"malhotra",
|
|
161
|
-
"khanna",
|
|
162
|
-
"saxena",
|
|
163
|
-
"pandey",
|
|
164
|
-
"tiwari",
|
|
165
|
-
"dubey",
|
|
166
|
-
"trivedi",
|
|
167
|
-
"dwivedi",
|
|
168
|
-
"shukla",
|
|
169
|
-
"chauhan",
|
|
170
|
-
"yadav",
|
|
171
|
-
"thakur",
|
|
172
|
-
"smith",
|
|
173
|
-
"johnson",
|
|
174
|
-
"williams",
|
|
175
|
-
"brown",
|
|
176
|
-
"jones",
|
|
177
|
-
"davis",
|
|
178
|
-
"miller",
|
|
179
|
-
"wilson",
|
|
180
|
-
"moore",
|
|
181
|
-
"taylor"
|
|
182
|
-
]), j = /* @__PURE__ */ new Set([
|
|
183
|
-
"andhra pradesh",
|
|
184
|
-
"arunachal pradesh",
|
|
185
|
-
"assam",
|
|
186
|
-
"bihar",
|
|
187
|
-
"chhattisgarh",
|
|
188
|
-
"goa",
|
|
189
|
-
"gujarat",
|
|
190
|
-
"haryana",
|
|
191
|
-
"himachal pradesh",
|
|
192
|
-
"jharkhand",
|
|
193
|
-
"karnataka",
|
|
194
|
-
"kerala",
|
|
195
|
-
"madhya pradesh",
|
|
196
|
-
"maharashtra",
|
|
197
|
-
"manipur",
|
|
198
|
-
"meghalaya",
|
|
199
|
-
"mizoram",
|
|
200
|
-
"nagaland",
|
|
201
|
-
"odisha",
|
|
202
|
-
"punjab",
|
|
203
|
-
"rajasthan",
|
|
204
|
-
"sikkim",
|
|
205
|
-
"tamil nadu",
|
|
206
|
-
"telangana",
|
|
207
|
-
"tripura",
|
|
208
|
-
"uttar pradesh",
|
|
209
|
-
"uttarakhand",
|
|
210
|
-
"west bengal",
|
|
211
|
-
"delhi",
|
|
212
|
-
"chandigarh",
|
|
213
|
-
"puducherry",
|
|
214
|
-
"jammu and kashmir",
|
|
215
|
-
"ladakh"
|
|
216
|
-
]), u = /* @__PURE__ */ new Set([
|
|
217
|
-
"diabetes",
|
|
218
|
-
"hypertension",
|
|
219
|
-
"asthma",
|
|
220
|
-
"cancer",
|
|
221
|
-
"HIV",
|
|
222
|
-
"AIDS",
|
|
223
|
-
"tuberculosis",
|
|
224
|
-
"TB",
|
|
225
|
-
"hepatitis",
|
|
226
|
-
"malaria",
|
|
227
|
-
"dengue",
|
|
228
|
-
"cholesterol",
|
|
229
|
-
"thyroid",
|
|
230
|
-
"arthritis",
|
|
231
|
-
"epilepsy",
|
|
232
|
-
"pneumonia",
|
|
233
|
-
"bronchitis",
|
|
234
|
-
"anemia",
|
|
235
|
-
"leukemia",
|
|
236
|
-
"lymphoma",
|
|
237
|
-
"insulin",
|
|
238
|
-
"metformin",
|
|
239
|
-
"blood pressure",
|
|
240
|
-
"heart disease",
|
|
241
|
-
"kidney disease",
|
|
242
|
-
"liver disease",
|
|
243
|
-
"lung disease",
|
|
244
|
-
"chemotherapy",
|
|
245
|
-
"radiation",
|
|
246
|
-
"surgery",
|
|
247
|
-
"biopsy",
|
|
248
|
-
"diagnosis",
|
|
249
|
-
"prognosis",
|
|
250
|
-
"prescription",
|
|
251
|
-
"medication",
|
|
252
|
-
"dosage",
|
|
253
|
-
"allergic",
|
|
254
|
-
"allergy",
|
|
255
|
-
"positive",
|
|
256
|
-
"negative",
|
|
257
|
-
"report",
|
|
258
|
-
"pathology",
|
|
259
|
-
"radiology",
|
|
260
|
-
"MRI",
|
|
261
|
-
"CT scan",
|
|
262
|
-
"X-ray",
|
|
263
|
-
"ultrasound",
|
|
264
|
-
"ECG",
|
|
265
|
-
"EKG",
|
|
266
|
-
"patient",
|
|
267
|
-
"hospital",
|
|
268
|
-
"clinic",
|
|
269
|
-
"doctor",
|
|
270
|
-
"physician",
|
|
271
|
-
"surgeon"
|
|
272
|
-
]), I = /* @__PURE__ */ new Set([
|
|
273
|
-
"road",
|
|
274
|
-
"rd",
|
|
275
|
-
"street",
|
|
276
|
-
"st",
|
|
277
|
-
"avenue",
|
|
278
|
-
"ave",
|
|
279
|
-
"lane",
|
|
280
|
-
"ln",
|
|
281
|
-
"nagar",
|
|
282
|
-
"colony",
|
|
283
|
-
"sector",
|
|
284
|
-
"block",
|
|
285
|
-
"plot",
|
|
286
|
-
"flat",
|
|
287
|
-
"floor",
|
|
288
|
-
"building",
|
|
289
|
-
"bldg",
|
|
290
|
-
"apartment",
|
|
291
|
-
"apt",
|
|
292
|
-
"house",
|
|
293
|
-
"no",
|
|
294
|
-
"near",
|
|
295
|
-
"opposite",
|
|
296
|
-
"opp",
|
|
297
|
-
"behind",
|
|
298
|
-
"beside",
|
|
299
|
-
"next to",
|
|
300
|
-
"main",
|
|
301
|
-
"cross",
|
|
302
|
-
"layout",
|
|
303
|
-
"extension",
|
|
304
|
-
"extn",
|
|
305
|
-
"phase",
|
|
306
|
-
"village",
|
|
307
|
-
"town",
|
|
308
|
-
"city",
|
|
309
|
-
"district",
|
|
310
|
-
"taluk",
|
|
311
|
-
"tehsil",
|
|
312
|
-
"post",
|
|
313
|
-
"pin",
|
|
314
|
-
"pincode",
|
|
315
|
-
"zip"
|
|
316
|
-
]);
|
|
317
|
-
function p() {
|
|
318
|
-
return "nlp_" + Math.random().toString(36).substring(2, 11);
|
|
319
|
-
}
|
|
320
|
-
function M(a) {
|
|
321
|
-
const n = [];
|
|
322
|
-
for (let e = 0; e < a.length; e++) {
|
|
323
|
-
const o = a[e].text.replace(/[^a-zA-Z]/g, "").toLowerCase();
|
|
324
|
-
if (o.length < 2) continue;
|
|
325
|
-
const t = f.has(o), i = m.has(o);
|
|
326
|
-
if (t || i) {
|
|
327
|
-
let l = a[e].text, s = { ...a[e].bbox }, r = 0.65;
|
|
328
|
-
if (e + 1 < a.length) {
|
|
329
|
-
const h = a[e + 1].text.replace(/[^a-zA-Z]/g, "").toLowerCase();
|
|
330
|
-
(f.has(h) || m.has(h)) && (l += " " + a[e + 1].text, s.w = a[e + 1].bbox.x + a[e + 1].bbox.w - s.x, r = 0.82, e++);
|
|
331
|
-
}
|
|
332
|
-
if (e + 1 < a.length) {
|
|
333
|
-
const h = a[e + 1].text.replace(/[^a-zA-Z]/g, "").toLowerCase();
|
|
334
|
-
m.has(h) && (l += " " + a[e + 1].text, s.w = a[e + 1].bbox.x + a[e + 1].bbox.w - s.x, r = 0.88, e++);
|
|
335
|
-
}
|
|
336
|
-
a[e - (l.split(" ").length - 1)]?.text[0]?.match(/[A-Z]/) && (r += 0.05), n.push({
|
|
337
|
-
id: p(),
|
|
338
|
-
type: "NAME",
|
|
339
|
-
value: l,
|
|
340
|
-
confidence: Math.min(r, 0.95),
|
|
341
|
-
bbox: s,
|
|
342
|
-
masked: !0,
|
|
343
|
-
layer: 2
|
|
344
|
-
});
|
|
345
|
-
}
|
|
346
|
-
}
|
|
347
|
-
return n;
|
|
348
|
-
}
|
|
349
|
-
function A(a) {
|
|
350
|
-
const n = [], e = a.map((i) => i.text).join(" ").toLowerCase(), o = /\b\d{6}\b/g;
|
|
351
|
-
let t;
|
|
352
|
-
for (; (t = o.exec(e)) !== null; ) {
|
|
353
|
-
const i = parseInt(t[0]);
|
|
354
|
-
if (i >= 110001 && i <= 855117) {
|
|
355
|
-
const l = k(a, t.index, e);
|
|
356
|
-
if (l >= 0) {
|
|
357
|
-
const s = Math.max(0, l - 8), r = a.slice(s, l + 1), h = r.some(
|
|
358
|
-
(x) => I.has(x.text.toLowerCase().replace(/[^a-z]/g, ""))
|
|
359
|
-
);
|
|
360
|
-
if (h || r.length >= 3) {
|
|
361
|
-
const x = r[0], d = r[r.length - 1];
|
|
362
|
-
n.push({
|
|
363
|
-
id: p(),
|
|
364
|
-
type: "ADDRESS",
|
|
365
|
-
value: r.map((c) => c.text).join(" "),
|
|
366
|
-
confidence: h ? 0.78 : 0.55,
|
|
367
|
-
bbox: {
|
|
368
|
-
x: x.bbox.x,
|
|
369
|
-
y: Math.min(...r.map((c) => c.bbox.y)),
|
|
370
|
-
w: d.bbox.x + d.bbox.w - x.bbox.x,
|
|
371
|
-
h: Math.max(...r.map((c) => c.bbox.y + c.bbox.h)) - Math.min(...r.map((c) => c.bbox.y)),
|
|
372
|
-
pageIndex: x.bbox.pageIndex
|
|
373
|
-
},
|
|
374
|
-
masked: !0,
|
|
375
|
-
layer: 2
|
|
376
|
-
});
|
|
377
|
-
}
|
|
378
|
-
}
|
|
379
|
-
}
|
|
380
|
-
}
|
|
381
|
-
for (const i of j) {
|
|
382
|
-
const l = i.split(" ");
|
|
383
|
-
for (let s = 0; s <= a.length - l.length; s++)
|
|
384
|
-
if (a.slice(s, s + l.length).map((h) => h.text.toLowerCase().replace(/[^a-z ]/g, "")).join(" ") === i) {
|
|
385
|
-
const h = a[s], x = a[s + l.length - 1];
|
|
386
|
-
n.push({
|
|
387
|
-
id: p(),
|
|
388
|
-
type: "ADDRESS",
|
|
389
|
-
value: a.slice(s, s + l.length).map((d) => d.text).join(" "),
|
|
390
|
-
confidence: 0.72,
|
|
391
|
-
bbox: {
|
|
392
|
-
x: h.bbox.x,
|
|
393
|
-
y: h.bbox.y,
|
|
394
|
-
w: x.bbox.x + x.bbox.w - h.bbox.x,
|
|
395
|
-
h: Math.max(h.bbox.h, x.bbox.h),
|
|
396
|
-
pageIndex: h.bbox.pageIndex
|
|
397
|
-
},
|
|
398
|
-
masked: !0,
|
|
399
|
-
layer: 2
|
|
400
|
-
});
|
|
401
|
-
}
|
|
402
|
-
}
|
|
403
|
-
return n;
|
|
404
|
-
}
|
|
405
|
-
function S(a) {
|
|
406
|
-
const n = [];
|
|
407
|
-
for (let e = 0; e < a.length; e++) {
|
|
408
|
-
const o = a[e].text.toLowerCase().replace(/[^a-z]/g, "");
|
|
409
|
-
if ((u.has(o) || u.has(a[e].text)) && n.push({
|
|
410
|
-
id: p(),
|
|
411
|
-
type: "MEDICAL",
|
|
412
|
-
value: a[e].text,
|
|
413
|
-
confidence: 0.75,
|
|
414
|
-
bbox: { ...a[e].bbox },
|
|
415
|
-
masked: !0,
|
|
416
|
-
layer: 2
|
|
417
|
-
}), e + 1 < a.length) {
|
|
418
|
-
const t = a[e].text + " " + a[e + 1].text;
|
|
419
|
-
u.has(t.toLowerCase()) && (n.push({
|
|
420
|
-
id: p(),
|
|
421
|
-
type: "MEDICAL",
|
|
422
|
-
value: t,
|
|
423
|
-
confidence: 0.8,
|
|
424
|
-
bbox: {
|
|
425
|
-
x: a[e].bbox.x,
|
|
426
|
-
y: a[e].bbox.y,
|
|
427
|
-
w: a[e + 1].bbox.x + a[e + 1].bbox.w - a[e].bbox.x,
|
|
428
|
-
h: Math.max(a[e].bbox.h, a[e + 1].bbox.h),
|
|
429
|
-
pageIndex: a[e].bbox.pageIndex
|
|
430
|
-
},
|
|
431
|
-
masked: !0,
|
|
432
|
-
layer: 2
|
|
433
|
-
}), e++);
|
|
434
|
-
}
|
|
435
|
-
}
|
|
436
|
-
return n;
|
|
437
|
-
}
|
|
438
|
-
function E(a) {
|
|
439
|
-
const n = [], e = /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/;
|
|
440
|
-
for (const o of a)
|
|
441
|
-
e.test(o.text) && n.push({
|
|
442
|
-
id: p(),
|
|
443
|
-
type: "EMAIL",
|
|
444
|
-
value: o.text,
|
|
445
|
-
confidence: 0.95,
|
|
446
|
-
bbox: { ...o.bbox },
|
|
447
|
-
masked: !0,
|
|
448
|
-
layer: 2
|
|
449
|
-
});
|
|
450
|
-
return n;
|
|
451
|
-
}
|
|
452
|
-
function L(a) {
|
|
453
|
-
const n = [], e = a.map((i) => i.text).join(" "), o = /\b(\d{1,2})[\/\-.](\d{1,2})[\/\-.](\d{4})\b/g;
|
|
454
|
-
let t;
|
|
455
|
-
for (; (t = o.exec(e)) !== null; ) {
|
|
456
|
-
const i = parseInt(t[1]), l = parseInt(t[2]), s = parseInt(t[3]);
|
|
457
|
-
if (i >= 1 && i <= 31 && l >= 1 && l <= 12 && s >= 1920 && s <= 2010) {
|
|
458
|
-
const r = Math.max(0, t.index - 30), h = e.substring(r, t.index).toLowerCase(), x = /\b(dob|date of birth|birth date|born|birthday|d\.o\.b)\b/.test(h), d = k(a, t.index, e);
|
|
459
|
-
if (d >= 0) {
|
|
460
|
-
const c = [];
|
|
461
|
-
let g = t.index;
|
|
462
|
-
for (let b = d; b < a.length && g < t.index + t[0].length; b++)
|
|
463
|
-
c.push(a[b]), g += a[b].text.length + 1;
|
|
464
|
-
if (c.length > 0) {
|
|
465
|
-
const b = c[0], y = c[c.length - 1];
|
|
466
|
-
n.push({
|
|
467
|
-
id: p(),
|
|
468
|
-
type: "DOB",
|
|
469
|
-
value: t[0],
|
|
470
|
-
confidence: x ? 0.9 : 0.6,
|
|
471
|
-
bbox: {
|
|
472
|
-
x: b.bbox.x,
|
|
473
|
-
y: b.bbox.y,
|
|
474
|
-
w: y.bbox.x + y.bbox.w - b.bbox.x,
|
|
475
|
-
h: Math.max(...c.map((v) => v.bbox.h)),
|
|
476
|
-
pageIndex: b.bbox.pageIndex
|
|
477
|
-
},
|
|
478
|
-
masked: !0,
|
|
479
|
-
layer: 2
|
|
480
|
-
});
|
|
481
|
-
}
|
|
482
|
-
}
|
|
483
|
-
}
|
|
484
|
-
}
|
|
485
|
-
return n;
|
|
486
|
-
}
|
|
487
|
-
function k(a, n, e) {
|
|
488
|
-
let o = 0;
|
|
489
|
-
for (let t = 0; t < a.length; t++) {
|
|
490
|
-
const i = e.indexOf(a[t].text, o);
|
|
491
|
-
if (i <= n && n < i + a[t].text.length)
|
|
492
|
-
return t;
|
|
493
|
-
o = i + a[t].text.length;
|
|
494
|
-
}
|
|
495
|
-
return -1;
|
|
496
|
-
}
|
|
497
|
-
self.onmessage = (a) => {
|
|
498
|
-
const { type: n, words: e, pageIndex: o } = a.data;
|
|
499
|
-
if (n === "NLP_ANALYZE")
|
|
500
|
-
try {
|
|
501
|
-
const t = [
|
|
502
|
-
...M(e),
|
|
503
|
-
...A(e),
|
|
504
|
-
...S(e),
|
|
505
|
-
...E(e),
|
|
506
|
-
...L(e)
|
|
507
|
-
];
|
|
508
|
-
for (const i of t)
|
|
509
|
-
i.bbox.pageIndex = o ?? 0;
|
|
510
|
-
self.postMessage({
|
|
511
|
-
type: "NLP_RESULT",
|
|
512
|
-
entities: t
|
|
513
|
-
});
|
|
514
|
-
} catch (t) {
|
|
515
|
-
self.postMessage({
|
|
516
|
-
type: "NLP_ERROR",
|
|
517
|
-
error: t instanceof Error ? t.message : "NLP analysis failed"
|
|
518
|
-
});
|
|
519
|
-
}
|
|
520
|
-
};
|
|
521
|
-
//# sourceMappingURL=nlp.worker-u7Lr_A3c.js.map
|
|
@@ -1,139 +0,0 @@
|
|
|
1
|
-
function d(t) {
|
|
2
|
-
for (let e = 0; e < t.length; e += 4) {
|
|
3
|
-
const r = Math.round(
|
|
4
|
-
0.299 * t[e] + // R
|
|
5
|
-
0.587 * t[e + 1] + // G
|
|
6
|
-
0.114 * t[e + 2]
|
|
7
|
-
// B
|
|
8
|
-
);
|
|
9
|
-
t[e] = r, t[e + 1] = r, t[e + 2] = r;
|
|
10
|
-
}
|
|
11
|
-
}
|
|
12
|
-
function y(t) {
|
|
13
|
-
let e = 255, r = 0;
|
|
14
|
-
for (let s = 0; s < t.length; s += 4) {
|
|
15
|
-
const o = t[s];
|
|
16
|
-
o < e && (e = o), o > r && (r = o);
|
|
17
|
-
}
|
|
18
|
-
const n = r - e;
|
|
19
|
-
if (n !== 0)
|
|
20
|
-
for (let s = 0; s < t.length; s += 4) {
|
|
21
|
-
const o = Math.round((t[s] - e) / n * 255);
|
|
22
|
-
t[s] = o, t[s + 1] = o, t[s + 2] = o;
|
|
23
|
-
}
|
|
24
|
-
}
|
|
25
|
-
function R(t) {
|
|
26
|
-
const e = new Array(256).fill(0), r = t.length / 4;
|
|
27
|
-
for (let i = 0; i < t.length; i += 4)
|
|
28
|
-
e[t[i]]++;
|
|
29
|
-
let n = 0;
|
|
30
|
-
for (let i = 0; i < 256; i++)
|
|
31
|
-
n += i * e[i];
|
|
32
|
-
let s = 0, o = 0, l = 0, f = 0;
|
|
33
|
-
for (let i = 0; i < 256; i++) {
|
|
34
|
-
if (o += e[i], o === 0) continue;
|
|
35
|
-
const a = r - o;
|
|
36
|
-
if (a === 0) break;
|
|
37
|
-
s += i * e[i];
|
|
38
|
-
const g = s / o, u = (n - s) / a, p = o * a * (g - u) * (g - u);
|
|
39
|
-
p > l && (l = p, f = i);
|
|
40
|
-
}
|
|
41
|
-
return f;
|
|
42
|
-
}
|
|
43
|
-
function w(t, e) {
|
|
44
|
-
for (let r = 0; r < t.length; r += 4) {
|
|
45
|
-
const n = t[r] >= e ? 255 : 0;
|
|
46
|
-
t[r] = n, t[r + 1] = n, t[r + 2] = n;
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
function O(t, e, r) {
|
|
50
|
-
let n = 0;
|
|
51
|
-
const s = e * r;
|
|
52
|
-
for (let l = 0; l < t.length; l += 4)
|
|
53
|
-
(t[l] + t[l + 1] + t[l + 2]) / 3 < 240 && n++;
|
|
54
|
-
const o = n / s;
|
|
55
|
-
return o < 0.2 ? 11 : o > 0.5 ? 6 : o > 0.3 ? 4 : 3;
|
|
56
|
-
}
|
|
57
|
-
async function k(t) {
|
|
58
|
-
try {
|
|
59
|
-
const e = await createImageBitmap(t), r = new OffscreenCanvas(e.width, e.height), n = r.getContext("2d", { willReadFrequently: !0 });
|
|
60
|
-
if (!n)
|
|
61
|
-
return e.close(), { blob: t, psm: 3 };
|
|
62
|
-
n.drawImage(e, 0, 0);
|
|
63
|
-
const s = n.getImageData(0, 0, e.width, e.height), o = s.data;
|
|
64
|
-
d(o), y(o);
|
|
65
|
-
const l = O(o, e.width, e.height), f = R(o);
|
|
66
|
-
w(o, f), n.putImageData(s, 0, 0);
|
|
67
|
-
const i = await r.convertToBlob({ type: "image/png" });
|
|
68
|
-
return e.close(), { blob: i, psm: l };
|
|
69
|
-
} catch (e) {
|
|
70
|
-
return console.warn("[OCR Worker] Preprocessing failed, using original image:", e), { blob: t, psm: 3 };
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
let h = null;
|
|
74
|
-
async function C() {
|
|
75
|
-
return h || (h = await (await import("./index-C62fEJ4q.js").then(function(e) {
|
|
76
|
-
return e.i;
|
|
77
|
-
})).createWorker("eng", void 0, {
|
|
78
|
-
logger: (e) => {
|
|
79
|
-
self.postMessage({
|
|
80
|
-
type: "OCR_PROGRESS",
|
|
81
|
-
progress: e.progress,
|
|
82
|
-
message: e.status
|
|
83
|
-
});
|
|
84
|
-
}
|
|
85
|
-
}), h);
|
|
86
|
-
}
|
|
87
|
-
self.onmessage = async (t) => {
|
|
88
|
-
const { type: e, fileBuffer: r, fileType: n, pageIndex: s } = t.data;
|
|
89
|
-
if (e === "OCR_START")
|
|
90
|
-
try {
|
|
91
|
-
const o = await C();
|
|
92
|
-
let l;
|
|
93
|
-
n === "application/pdf" ? l = new Blob([r], { type: "image/png" }) : l = new Blob([r], { type: n }), self.postMessage({
|
|
94
|
-
type: "OCR_PROGRESS",
|
|
95
|
-
progress: 0.1,
|
|
96
|
-
message: "Preprocessing image..."
|
|
97
|
-
});
|
|
98
|
-
const { blob: f, psm: i } = await k(l);
|
|
99
|
-
console.log(`[OCR Worker] Detected optimal PSM: ${i}`), await o.setParameters({
|
|
100
|
-
tessedit_pageseg_mode: i.toString()
|
|
101
|
-
});
|
|
102
|
-
const a = await o.recognize(f, {}, { text: !0, blocks: !0 }), g = [], u = (c) => {
|
|
103
|
-
!c || !c.text || !c.bbox || g.push({
|
|
104
|
-
text: c.text,
|
|
105
|
-
confidence: (c.confidence ?? 0) / 100,
|
|
106
|
-
bbox: {
|
|
107
|
-
x: c.bbox.x0,
|
|
108
|
-
y: c.bbox.y0,
|
|
109
|
-
w: c.bbox.x1 - c.bbox.x0,
|
|
110
|
-
h: c.bbox.y1 - c.bbox.y0,
|
|
111
|
-
pageIndex: s ?? 0
|
|
112
|
-
}
|
|
113
|
-
});
|
|
114
|
-
};
|
|
115
|
-
if (a.data.blocks && a.data.blocks.length > 0)
|
|
116
|
-
for (const c of a.data.blocks)
|
|
117
|
-
for (const m of c.paragraphs ?? [])
|
|
118
|
-
for (const x of m.lines ?? [])
|
|
119
|
-
for (const b of x.words ?? [])
|
|
120
|
-
u(b);
|
|
121
|
-
else if (a.data.words && a.data.words.length > 0)
|
|
122
|
-
for (const c of a.data.words)
|
|
123
|
-
u(c);
|
|
124
|
-
console.log(`[OCR Worker] Extracted ${g.length} words from Tesseract`);
|
|
125
|
-
let p = a.data.text ?? "";
|
|
126
|
-
!p.trim() && g.length > 0 && (p = g.map((c) => c.text).join(" "), console.log("[OCR Worker] Reconstructed fullText from words")), console.log(`[OCR Worker] fullText length: ${p.length}`), self.postMessage({
|
|
127
|
-
type: "OCR_RESULT",
|
|
128
|
-
words: g,
|
|
129
|
-
fullText: p,
|
|
130
|
-
pageIndex: s ?? 0
|
|
131
|
-
});
|
|
132
|
-
} catch (o) {
|
|
133
|
-
self.postMessage({
|
|
134
|
-
type: "OCR_ERROR",
|
|
135
|
-
error: o instanceof Error ? o.message : "OCR processing failed"
|
|
136
|
-
});
|
|
137
|
-
}
|
|
138
|
-
};
|
|
139
|
-
//# sourceMappingURL=ocr.worker-D5s6dY7M.js.map
|