secure-redact 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,521 @@
1
+ const f = /* @__PURE__ */ new Set([
2
+ "aarav",
3
+ "aditi",
4
+ "aditya",
5
+ "akash",
6
+ "amit",
7
+ "amita",
8
+ "ananya",
9
+ "anil",
10
+ "anita",
11
+ "anjali",
12
+ "ankita",
13
+ "arjun",
14
+ "arun",
15
+ "aruna",
16
+ "ashok",
17
+ "bhavna",
18
+ "chandra",
19
+ "deepak",
20
+ "deepika",
21
+ "dev",
22
+ "devika",
23
+ "dhruv",
24
+ "dinesh",
25
+ "divya",
26
+ "ganesh",
27
+ "gaurav",
28
+ "geeta",
29
+ "hari",
30
+ "harish",
31
+ "indira",
32
+ "isha",
33
+ "jagdish",
34
+ "kamala",
35
+ "karan",
36
+ "kavita",
37
+ "kishore",
38
+ "krishna",
39
+ "kumar",
40
+ "lakshmi",
41
+ "mahesh",
42
+ "manish",
43
+ "meera",
44
+ "mohan",
45
+ "mohit",
46
+ "nandini",
47
+ "naresh",
48
+ "neha",
49
+ "nikhil",
50
+ "nisha",
51
+ "pankaj",
52
+ "pooja",
53
+ "prakash",
54
+ "priya",
55
+ "rahul",
56
+ "rajesh",
57
+ "rajiv",
58
+ "raman",
59
+ "ramesh",
60
+ "rani",
61
+ "ravi",
62
+ "rekha",
63
+ "rohit",
64
+ "sachin",
65
+ "sandeep",
66
+ "sanjay",
67
+ "sapna",
68
+ "saroj",
69
+ "seema",
70
+ "shanti",
71
+ "sharma",
72
+ "shivani",
73
+ "shobha",
74
+ "shreya",
75
+ "sita",
76
+ "sneha",
77
+ "sunil",
78
+ "sunita",
79
+ "suresh",
80
+ "swati",
81
+ "tanvi",
82
+ "usha",
83
+ "varun",
84
+ "vijay",
85
+ "vikram",
86
+ "vinod",
87
+ "vishal",
88
+ "vivek",
89
+ "yash",
90
+ "yogesh",
91
+ "john",
92
+ "james",
93
+ "robert",
94
+ "michael",
95
+ "william",
96
+ "david",
97
+ "richard",
98
+ "joseph",
99
+ "thomas",
100
+ "charles",
101
+ "mary",
102
+ "patricia",
103
+ "jennifer",
104
+ "linda",
105
+ "elizabeth",
106
+ "barbara",
107
+ "susan",
108
+ "jessica",
109
+ "sarah",
110
+ "karen",
111
+ "mohammed",
112
+ "ahmed",
113
+ "ali",
114
+ "hassan",
115
+ "hussein",
116
+ "omar",
117
+ "fatima",
118
+ "aisha",
119
+ "zainab",
120
+ "khadija"
121
+ ]), m = /* @__PURE__ */ new Set([
122
+ "sharma",
123
+ "verma",
124
+ "gupta",
125
+ "singh",
126
+ "kumar",
127
+ "patel",
128
+ "joshi",
129
+ "mishra",
130
+ "agarwal",
131
+ "mehta",
132
+ "reddy",
133
+ "rao",
134
+ "nair",
135
+ "menon",
136
+ "pillai",
137
+ "iyer",
138
+ "iyengar",
139
+ "mukherjee",
140
+ "chatterjee",
141
+ "banerjee",
142
+ "das",
143
+ "bose",
144
+ "sen",
145
+ "ghosh",
146
+ "roy",
147
+ "dutta",
148
+ "sinha",
149
+ "jain",
150
+ "shah",
151
+ "desai",
152
+ "kulkarni",
153
+ "patil",
154
+ "deshpande",
155
+ "kaur",
156
+ "gill",
157
+ "bajwa",
158
+ "chopra",
159
+ "kapoor",
160
+ "malhotra",
161
+ "khanna",
162
+ "saxena",
163
+ "pandey",
164
+ "tiwari",
165
+ "dubey",
166
+ "trivedi",
167
+ "dwivedi",
168
+ "shukla",
169
+ "chauhan",
170
+ "yadav",
171
+ "thakur",
172
+ "smith",
173
+ "johnson",
174
+ "williams",
175
+ "brown",
176
+ "jones",
177
+ "davis",
178
+ "miller",
179
+ "wilson",
180
+ "moore",
181
+ "taylor"
182
+ ]), j = /* @__PURE__ */ new Set([
183
+ "andhra pradesh",
184
+ "arunachal pradesh",
185
+ "assam",
186
+ "bihar",
187
+ "chhattisgarh",
188
+ "goa",
189
+ "gujarat",
190
+ "haryana",
191
+ "himachal pradesh",
192
+ "jharkhand",
193
+ "karnataka",
194
+ "kerala",
195
+ "madhya pradesh",
196
+ "maharashtra",
197
+ "manipur",
198
+ "meghalaya",
199
+ "mizoram",
200
+ "nagaland",
201
+ "odisha",
202
+ "punjab",
203
+ "rajasthan",
204
+ "sikkim",
205
+ "tamil nadu",
206
+ "telangana",
207
+ "tripura",
208
+ "uttar pradesh",
209
+ "uttarakhand",
210
+ "west bengal",
211
+ "delhi",
212
+ "chandigarh",
213
+ "puducherry",
214
+ "jammu and kashmir",
215
+ "ladakh"
216
+ ]), u = /* @__PURE__ */ new Set([
217
+ "diabetes",
218
+ "hypertension",
219
+ "asthma",
220
+ "cancer",
221
+ "HIV",
222
+ "AIDS",
223
+ "tuberculosis",
224
+ "TB",
225
+ "hepatitis",
226
+ "malaria",
227
+ "dengue",
228
+ "cholesterol",
229
+ "thyroid",
230
+ "arthritis",
231
+ "epilepsy",
232
+ "pneumonia",
233
+ "bronchitis",
234
+ "anemia",
235
+ "leukemia",
236
+ "lymphoma",
237
+ "insulin",
238
+ "metformin",
239
+ "blood pressure",
240
+ "heart disease",
241
+ "kidney disease",
242
+ "liver disease",
243
+ "lung disease",
244
+ "chemotherapy",
245
+ "radiation",
246
+ "surgery",
247
+ "biopsy",
248
+ "diagnosis",
249
+ "prognosis",
250
+ "prescription",
251
+ "medication",
252
+ "dosage",
253
+ "allergic",
254
+ "allergy",
255
+ "positive",
256
+ "negative",
257
+ "report",
258
+ "pathology",
259
+ "radiology",
260
+ "MRI",
261
+ "CT scan",
262
+ "X-ray",
263
+ "ultrasound",
264
+ "ECG",
265
+ "EKG",
266
+ "patient",
267
+ "hospital",
268
+ "clinic",
269
+ "doctor",
270
+ "physician",
271
+ "surgeon"
272
+ ]), I = /* @__PURE__ */ new Set([
273
+ "road",
274
+ "rd",
275
+ "street",
276
+ "st",
277
+ "avenue",
278
+ "ave",
279
+ "lane",
280
+ "ln",
281
+ "nagar",
282
+ "colony",
283
+ "sector",
284
+ "block",
285
+ "plot",
286
+ "flat",
287
+ "floor",
288
+ "building",
289
+ "bldg",
290
+ "apartment",
291
+ "apt",
292
+ "house",
293
+ "no",
294
+ "near",
295
+ "opposite",
296
+ "opp",
297
+ "behind",
298
+ "beside",
299
+ "next to",
300
+ "main",
301
+ "cross",
302
+ "layout",
303
+ "extension",
304
+ "extn",
305
+ "phase",
306
+ "village",
307
+ "town",
308
+ "city",
309
+ "district",
310
+ "taluk",
311
+ "tehsil",
312
+ "post",
313
+ "pin",
314
+ "pincode",
315
+ "zip"
316
+ ]);
317
+ function p() {
318
+ return "nlp_" + Math.random().toString(36).substring(2, 11);
319
+ }
320
+ function M(a) {
321
+ const n = [];
322
+ for (let e = 0; e < a.length; e++) {
323
+ const o = a[e].text.replace(/[^a-zA-Z]/g, "").toLowerCase();
324
+ if (o.length < 2) continue;
325
+ const t = f.has(o), i = m.has(o);
326
+ if (t || i) {
327
+ let l = a[e].text, s = { ...a[e].bbox }, r = 0.65;
328
+ if (e + 1 < a.length) {
329
+ const h = a[e + 1].text.replace(/[^a-zA-Z]/g, "").toLowerCase();
330
+ (f.has(h) || m.has(h)) && (l += " " + a[e + 1].text, s.w = a[e + 1].bbox.x + a[e + 1].bbox.w - s.x, r = 0.82, e++);
331
+ }
332
+ if (e + 1 < a.length) {
333
+ const h = a[e + 1].text.replace(/[^a-zA-Z]/g, "").toLowerCase();
334
+ m.has(h) && (l += " " + a[e + 1].text, s.w = a[e + 1].bbox.x + a[e + 1].bbox.w - s.x, r = 0.88, e++);
335
+ }
336
+ a[e - (l.split(" ").length - 1)]?.text[0]?.match(/[A-Z]/) && (r += 0.05), n.push({
337
+ id: p(),
338
+ type: "NAME",
339
+ value: l,
340
+ confidence: Math.min(r, 0.95),
341
+ bbox: s,
342
+ masked: !0,
343
+ layer: 2
344
+ });
345
+ }
346
+ }
347
+ return n;
348
+ }
349
+ function A(a) {
350
+ const n = [], e = a.map((i) => i.text).join(" ").toLowerCase(), o = /\b\d{6}\b/g;
351
+ let t;
352
+ for (; (t = o.exec(e)) !== null; ) {
353
+ const i = parseInt(t[0]);
354
+ if (i >= 110001 && i <= 855117) {
355
+ const l = k(a, t.index, e);
356
+ if (l >= 0) {
357
+ const s = Math.max(0, l - 8), r = a.slice(s, l + 1), h = r.some(
358
+ (x) => I.has(x.text.toLowerCase().replace(/[^a-z]/g, ""))
359
+ );
360
+ if (h || r.length >= 3) {
361
+ const x = r[0], d = r[r.length - 1];
362
+ n.push({
363
+ id: p(),
364
+ type: "ADDRESS",
365
+ value: r.map((c) => c.text).join(" "),
366
+ confidence: h ? 0.78 : 0.55,
367
+ bbox: {
368
+ x: x.bbox.x,
369
+ y: Math.min(...r.map((c) => c.bbox.y)),
370
+ w: d.bbox.x + d.bbox.w - x.bbox.x,
371
+ h: Math.max(...r.map((c) => c.bbox.y + c.bbox.h)) - Math.min(...r.map((c) => c.bbox.y)),
372
+ pageIndex: x.bbox.pageIndex
373
+ },
374
+ masked: !0,
375
+ layer: 2
376
+ });
377
+ }
378
+ }
379
+ }
380
+ }
381
+ for (const i of j) {
382
+ const l = i.split(" ");
383
+ for (let s = 0; s <= a.length - l.length; s++)
384
+ if (a.slice(s, s + l.length).map((h) => h.text.toLowerCase().replace(/[^a-z ]/g, "")).join(" ") === i) {
385
+ const h = a[s], x = a[s + l.length - 1];
386
+ n.push({
387
+ id: p(),
388
+ type: "ADDRESS",
389
+ value: a.slice(s, s + l.length).map((d) => d.text).join(" "),
390
+ confidence: 0.72,
391
+ bbox: {
392
+ x: h.bbox.x,
393
+ y: h.bbox.y,
394
+ w: x.bbox.x + x.bbox.w - h.bbox.x,
395
+ h: Math.max(h.bbox.h, x.bbox.h),
396
+ pageIndex: h.bbox.pageIndex
397
+ },
398
+ masked: !0,
399
+ layer: 2
400
+ });
401
+ }
402
+ }
403
+ return n;
404
+ }
405
+ function S(a) {
406
+ const n = [];
407
+ for (let e = 0; e < a.length; e++) {
408
+ const o = a[e].text.toLowerCase().replace(/[^a-z]/g, "");
409
+ if ((u.has(o) || u.has(a[e].text)) && n.push({
410
+ id: p(),
411
+ type: "MEDICAL",
412
+ value: a[e].text,
413
+ confidence: 0.75,
414
+ bbox: { ...a[e].bbox },
415
+ masked: !0,
416
+ layer: 2
417
+ }), e + 1 < a.length) {
418
+ const t = a[e].text + " " + a[e + 1].text;
419
+ u.has(t.toLowerCase()) && (n.push({
420
+ id: p(),
421
+ type: "MEDICAL",
422
+ value: t,
423
+ confidence: 0.8,
424
+ bbox: {
425
+ x: a[e].bbox.x,
426
+ y: a[e].bbox.y,
427
+ w: a[e + 1].bbox.x + a[e + 1].bbox.w - a[e].bbox.x,
428
+ h: Math.max(a[e].bbox.h, a[e + 1].bbox.h),
429
+ pageIndex: a[e].bbox.pageIndex
430
+ },
431
+ masked: !0,
432
+ layer: 2
433
+ }), e++);
434
+ }
435
+ }
436
+ return n;
437
+ }
438
+ function E(a) {
439
+ const n = [], e = /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/;
440
+ for (const o of a)
441
+ e.test(o.text) && n.push({
442
+ id: p(),
443
+ type: "EMAIL",
444
+ value: o.text,
445
+ confidence: 0.95,
446
+ bbox: { ...o.bbox },
447
+ masked: !0,
448
+ layer: 2
449
+ });
450
+ return n;
451
+ }
452
+ function L(a) {
453
+ const n = [], e = a.map((i) => i.text).join(" "), o = /\b(\d{1,2})[\/\-.](\d{1,2})[\/\-.](\d{4})\b/g;
454
+ let t;
455
+ for (; (t = o.exec(e)) !== null; ) {
456
+ const i = parseInt(t[1]), l = parseInt(t[2]), s = parseInt(t[3]);
457
+ if (i >= 1 && i <= 31 && l >= 1 && l <= 12 && s >= 1920 && s <= 2010) {
458
+ const r = Math.max(0, t.index - 30), h = e.substring(r, t.index).toLowerCase(), x = /\b(dob|date of birth|birth date|born|birthday|d\.o\.b)\b/.test(h), d = k(a, t.index, e);
459
+ if (d >= 0) {
460
+ const c = [];
461
+ let g = t.index;
462
+ for (let b = d; b < a.length && g < t.index + t[0].length; b++)
463
+ c.push(a[b]), g += a[b].text.length + 1;
464
+ if (c.length > 0) {
465
+ const b = c[0], y = c[c.length - 1];
466
+ n.push({
467
+ id: p(),
468
+ type: "DOB",
469
+ value: t[0],
470
+ confidence: x ? 0.9 : 0.6,
471
+ bbox: {
472
+ x: b.bbox.x,
473
+ y: b.bbox.y,
474
+ w: y.bbox.x + y.bbox.w - b.bbox.x,
475
+ h: Math.max(...c.map((v) => v.bbox.h)),
476
+ pageIndex: b.bbox.pageIndex
477
+ },
478
+ masked: !0,
479
+ layer: 2
480
+ });
481
+ }
482
+ }
483
+ }
484
+ }
485
+ return n;
486
+ }
487
+ function k(a, n, e) {
488
+ let o = 0;
489
+ for (let t = 0; t < a.length; t++) {
490
+ const i = e.indexOf(a[t].text, o);
491
+ if (i <= n && n < i + a[t].text.length)
492
+ return t;
493
+ o = i + a[t].text.length;
494
+ }
495
+ return -1;
496
+ }
497
+ self.onmessage = (a) => {
498
+ const { type: n, words: e, pageIndex: o } = a.data;
499
+ if (n === "NLP_ANALYZE")
500
+ try {
501
+ const t = [
502
+ ...M(e),
503
+ ...A(e),
504
+ ...S(e),
505
+ ...E(e),
506
+ ...L(e)
507
+ ];
508
+ for (const i of t)
509
+ i.bbox.pageIndex = o ?? 0;
510
+ self.postMessage({
511
+ type: "NLP_RESULT",
512
+ entities: t
513
+ });
514
+ } catch (t) {
515
+ self.postMessage({
516
+ type: "NLP_ERROR",
517
+ error: t instanceof Error ? t.message : "NLP analysis failed"
518
+ });
519
+ }
520
+ };
521
+ //# sourceMappingURL=nlp.worker-u7Lr_A3c.js.map
@@ -0,0 +1,139 @@
1
+ function d(t) {
2
+ for (let e = 0; e < t.length; e += 4) {
3
+ const r = Math.round(
4
+ 0.299 * t[e] + // R
5
+ 0.587 * t[e + 1] + // G
6
+ 0.114 * t[e + 2]
7
+ // B
8
+ );
9
+ t[e] = r, t[e + 1] = r, t[e + 2] = r;
10
+ }
11
+ }
12
+ function y(t) {
13
+ let e = 255, r = 0;
14
+ for (let s = 0; s < t.length; s += 4) {
15
+ const o = t[s];
16
+ o < e && (e = o), o > r && (r = o);
17
+ }
18
+ const n = r - e;
19
+ if (n !== 0)
20
+ for (let s = 0; s < t.length; s += 4) {
21
+ const o = Math.round((t[s] - e) / n * 255);
22
+ t[s] = o, t[s + 1] = o, t[s + 2] = o;
23
+ }
24
+ }
25
+ function R(t) {
26
+ const e = new Array(256).fill(0), r = t.length / 4;
27
+ for (let i = 0; i < t.length; i += 4)
28
+ e[t[i]]++;
29
+ let n = 0;
30
+ for (let i = 0; i < 256; i++)
31
+ n += i * e[i];
32
+ let s = 0, o = 0, l = 0, f = 0;
33
+ for (let i = 0; i < 256; i++) {
34
+ if (o += e[i], o === 0) continue;
35
+ const a = r - o;
36
+ if (a === 0) break;
37
+ s += i * e[i];
38
+ const g = s / o, u = (n - s) / a, p = o * a * (g - u) * (g - u);
39
+ p > l && (l = p, f = i);
40
+ }
41
+ return f;
42
+ }
43
+ function w(t, e) {
44
+ for (let r = 0; r < t.length; r += 4) {
45
+ const n = t[r] >= e ? 255 : 0;
46
+ t[r] = n, t[r + 1] = n, t[r + 2] = n;
47
+ }
48
+ }
49
+ function O(t, e, r) {
50
+ let n = 0;
51
+ const s = e * r;
52
+ for (let l = 0; l < t.length; l += 4)
53
+ (t[l] + t[l + 1] + t[l + 2]) / 3 < 240 && n++;
54
+ const o = n / s;
55
+ return o < 0.2 ? 11 : o > 0.5 ? 6 : o > 0.3 ? 4 : 3;
56
+ }
57
+ async function k(t) {
58
+ try {
59
+ const e = await createImageBitmap(t), r = new OffscreenCanvas(e.width, e.height), n = r.getContext("2d", { willReadFrequently: !0 });
60
+ if (!n)
61
+ return e.close(), { blob: t, psm: 3 };
62
+ n.drawImage(e, 0, 0);
63
+ const s = n.getImageData(0, 0, e.width, e.height), o = s.data;
64
+ d(o), y(o);
65
+ const l = O(o, e.width, e.height), f = R(o);
66
+ w(o, f), n.putImageData(s, 0, 0);
67
+ const i = await r.convertToBlob({ type: "image/png" });
68
+ return e.close(), { blob: i, psm: l };
69
+ } catch (e) {
70
+ return console.warn("[OCR Worker] Preprocessing failed, using original image:", e), { blob: t, psm: 3 };
71
+ }
72
+ }
73
+ let h = null;
74
+ async function C() {
75
+ return h || (h = await (await import("./index-C62fEJ4q.js").then(function(e) {
76
+ return e.i;
77
+ })).createWorker("eng", void 0, {
78
+ logger: (e) => {
79
+ self.postMessage({
80
+ type: "OCR_PROGRESS",
81
+ progress: e.progress,
82
+ message: e.status
83
+ });
84
+ }
85
+ }), h);
86
+ }
87
+ self.onmessage = async (t) => {
88
+ const { type: e, fileBuffer: r, fileType: n, pageIndex: s } = t.data;
89
+ if (e === "OCR_START")
90
+ try {
91
+ const o = await C();
92
+ let l;
93
+ n === "application/pdf" ? l = new Blob([r], { type: "image/png" }) : l = new Blob([r], { type: n }), self.postMessage({
94
+ type: "OCR_PROGRESS",
95
+ progress: 0.1,
96
+ message: "Preprocessing image..."
97
+ });
98
+ const { blob: f, psm: i } = await k(l);
99
+ console.log(`[OCR Worker] Detected optimal PSM: ${i}`), await o.setParameters({
100
+ tessedit_pageseg_mode: i.toString()
101
+ });
102
+ const a = await o.recognize(f, {}, { text: !0, blocks: !0 }), g = [], u = (c) => {
103
+ !c || !c.text || !c.bbox || g.push({
104
+ text: c.text,
105
+ confidence: (c.confidence ?? 0) / 100,
106
+ bbox: {
107
+ x: c.bbox.x0,
108
+ y: c.bbox.y0,
109
+ w: c.bbox.x1 - c.bbox.x0,
110
+ h: c.bbox.y1 - c.bbox.y0,
111
+ pageIndex: s ?? 0
112
+ }
113
+ });
114
+ };
115
+ if (a.data.blocks && a.data.blocks.length > 0)
116
+ for (const c of a.data.blocks)
117
+ for (const m of c.paragraphs ?? [])
118
+ for (const x of m.lines ?? [])
119
+ for (const b of x.words ?? [])
120
+ u(b);
121
+ else if (a.data.words && a.data.words.length > 0)
122
+ for (const c of a.data.words)
123
+ u(c);
124
+ console.log(`[OCR Worker] Extracted ${g.length} words from Tesseract`);
125
+ let p = a.data.text ?? "";
126
+ !p.trim() && g.length > 0 && (p = g.map((c) => c.text).join(" "), console.log("[OCR Worker] Reconstructed fullText from words")), console.log(`[OCR Worker] fullText length: ${p.length}`), self.postMessage({
127
+ type: "OCR_RESULT",
128
+ words: g,
129
+ fullText: p,
130
+ pageIndex: s ?? 0
131
+ });
132
+ } catch (o) {
133
+ self.postMessage({
134
+ type: "OCR_ERROR",
135
+ error: o instanceof Error ? o.message : "OCR processing failed"
136
+ });
137
+ }
138
+ };
139
+ //# sourceMappingURL=ocr.worker-D5s6dY7M.js.map