secure-redact 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,521 +0,0 @@
1
- const f = /* @__PURE__ */ new Set([
2
- "aarav",
3
- "aditi",
4
- "aditya",
5
- "akash",
6
- "amit",
7
- "amita",
8
- "ananya",
9
- "anil",
10
- "anita",
11
- "anjali",
12
- "ankita",
13
- "arjun",
14
- "arun",
15
- "aruna",
16
- "ashok",
17
- "bhavna",
18
- "chandra",
19
- "deepak",
20
- "deepika",
21
- "dev",
22
- "devika",
23
- "dhruv",
24
- "dinesh",
25
- "divya",
26
- "ganesh",
27
- "gaurav",
28
- "geeta",
29
- "hari",
30
- "harish",
31
- "indira",
32
- "isha",
33
- "jagdish",
34
- "kamala",
35
- "karan",
36
- "kavita",
37
- "kishore",
38
- "krishna",
39
- "kumar",
40
- "lakshmi",
41
- "mahesh",
42
- "manish",
43
- "meera",
44
- "mohan",
45
- "mohit",
46
- "nandini",
47
- "naresh",
48
- "neha",
49
- "nikhil",
50
- "nisha",
51
- "pankaj",
52
- "pooja",
53
- "prakash",
54
- "priya",
55
- "rahul",
56
- "rajesh",
57
- "rajiv",
58
- "raman",
59
- "ramesh",
60
- "rani",
61
- "ravi",
62
- "rekha",
63
- "rohit",
64
- "sachin",
65
- "sandeep",
66
- "sanjay",
67
- "sapna",
68
- "saroj",
69
- "seema",
70
- "shanti",
71
- "sharma",
72
- "shivani",
73
- "shobha",
74
- "shreya",
75
- "sita",
76
- "sneha",
77
- "sunil",
78
- "sunita",
79
- "suresh",
80
- "swati",
81
- "tanvi",
82
- "usha",
83
- "varun",
84
- "vijay",
85
- "vikram",
86
- "vinod",
87
- "vishal",
88
- "vivek",
89
- "yash",
90
- "yogesh",
91
- "john",
92
- "james",
93
- "robert",
94
- "michael",
95
- "william",
96
- "david",
97
- "richard",
98
- "joseph",
99
- "thomas",
100
- "charles",
101
- "mary",
102
- "patricia",
103
- "jennifer",
104
- "linda",
105
- "elizabeth",
106
- "barbara",
107
- "susan",
108
- "jessica",
109
- "sarah",
110
- "karen",
111
- "mohammed",
112
- "ahmed",
113
- "ali",
114
- "hassan",
115
- "hussein",
116
- "omar",
117
- "fatima",
118
- "aisha",
119
- "zainab",
120
- "khadija"
121
- ]), m = /* @__PURE__ */ new Set([
122
- "sharma",
123
- "verma",
124
- "gupta",
125
- "singh",
126
- "kumar",
127
- "patel",
128
- "joshi",
129
- "mishra",
130
- "agarwal",
131
- "mehta",
132
- "reddy",
133
- "rao",
134
- "nair",
135
- "menon",
136
- "pillai",
137
- "iyer",
138
- "iyengar",
139
- "mukherjee",
140
- "chatterjee",
141
- "banerjee",
142
- "das",
143
- "bose",
144
- "sen",
145
- "ghosh",
146
- "roy",
147
- "dutta",
148
- "sinha",
149
- "jain",
150
- "shah",
151
- "desai",
152
- "kulkarni",
153
- "patil",
154
- "deshpande",
155
- "kaur",
156
- "gill",
157
- "bajwa",
158
- "chopra",
159
- "kapoor",
160
- "malhotra",
161
- "khanna",
162
- "saxena",
163
- "pandey",
164
- "tiwari",
165
- "dubey",
166
- "trivedi",
167
- "dwivedi",
168
- "shukla",
169
- "chauhan",
170
- "yadav",
171
- "thakur",
172
- "smith",
173
- "johnson",
174
- "williams",
175
- "brown",
176
- "jones",
177
- "davis",
178
- "miller",
179
- "wilson",
180
- "moore",
181
- "taylor"
182
- ]), j = /* @__PURE__ */ new Set([
183
- "andhra pradesh",
184
- "arunachal pradesh",
185
- "assam",
186
- "bihar",
187
- "chhattisgarh",
188
- "goa",
189
- "gujarat",
190
- "haryana",
191
- "himachal pradesh",
192
- "jharkhand",
193
- "karnataka",
194
- "kerala",
195
- "madhya pradesh",
196
- "maharashtra",
197
- "manipur",
198
- "meghalaya",
199
- "mizoram",
200
- "nagaland",
201
- "odisha",
202
- "punjab",
203
- "rajasthan",
204
- "sikkim",
205
- "tamil nadu",
206
- "telangana",
207
- "tripura",
208
- "uttar pradesh",
209
- "uttarakhand",
210
- "west bengal",
211
- "delhi",
212
- "chandigarh",
213
- "puducherry",
214
- "jammu and kashmir",
215
- "ladakh"
216
- ]), u = /* @__PURE__ */ new Set([
217
- "diabetes",
218
- "hypertension",
219
- "asthma",
220
- "cancer",
221
- "HIV",
222
- "AIDS",
223
- "tuberculosis",
224
- "TB",
225
- "hepatitis",
226
- "malaria",
227
- "dengue",
228
- "cholesterol",
229
- "thyroid",
230
- "arthritis",
231
- "epilepsy",
232
- "pneumonia",
233
- "bronchitis",
234
- "anemia",
235
- "leukemia",
236
- "lymphoma",
237
- "insulin",
238
- "metformin",
239
- "blood pressure",
240
- "heart disease",
241
- "kidney disease",
242
- "liver disease",
243
- "lung disease",
244
- "chemotherapy",
245
- "radiation",
246
- "surgery",
247
- "biopsy",
248
- "diagnosis",
249
- "prognosis",
250
- "prescription",
251
- "medication",
252
- "dosage",
253
- "allergic",
254
- "allergy",
255
- "positive",
256
- "negative",
257
- "report",
258
- "pathology",
259
- "radiology",
260
- "MRI",
261
- "CT scan",
262
- "X-ray",
263
- "ultrasound",
264
- "ECG",
265
- "EKG",
266
- "patient",
267
- "hospital",
268
- "clinic",
269
- "doctor",
270
- "physician",
271
- "surgeon"
272
- ]), I = /* @__PURE__ */ new Set([
273
- "road",
274
- "rd",
275
- "street",
276
- "st",
277
- "avenue",
278
- "ave",
279
- "lane",
280
- "ln",
281
- "nagar",
282
- "colony",
283
- "sector",
284
- "block",
285
- "plot",
286
- "flat",
287
- "floor",
288
- "building",
289
- "bldg",
290
- "apartment",
291
- "apt",
292
- "house",
293
- "no",
294
- "near",
295
- "opposite",
296
- "opp",
297
- "behind",
298
- "beside",
299
- "next to",
300
- "main",
301
- "cross",
302
- "layout",
303
- "extension",
304
- "extn",
305
- "phase",
306
- "village",
307
- "town",
308
- "city",
309
- "district",
310
- "taluk",
311
- "tehsil",
312
- "post",
313
- "pin",
314
- "pincode",
315
- "zip"
316
- ]);
317
- function p() {
318
- return "nlp_" + Math.random().toString(36).substring(2, 11);
319
- }
320
- function M(a) {
321
- const n = [];
322
- for (let e = 0; e < a.length; e++) {
323
- const o = a[e].text.replace(/[^a-zA-Z]/g, "").toLowerCase();
324
- if (o.length < 2) continue;
325
- const t = f.has(o), i = m.has(o);
326
- if (t || i) {
327
- let l = a[e].text, s = { ...a[e].bbox }, r = 0.65;
328
- if (e + 1 < a.length) {
329
- const h = a[e + 1].text.replace(/[^a-zA-Z]/g, "").toLowerCase();
330
- (f.has(h) || m.has(h)) && (l += " " + a[e + 1].text, s.w = a[e + 1].bbox.x + a[e + 1].bbox.w - s.x, r = 0.82, e++);
331
- }
332
- if (e + 1 < a.length) {
333
- const h = a[e + 1].text.replace(/[^a-zA-Z]/g, "").toLowerCase();
334
- m.has(h) && (l += " " + a[e + 1].text, s.w = a[e + 1].bbox.x + a[e + 1].bbox.w - s.x, r = 0.88, e++);
335
- }
336
- a[e - (l.split(" ").length - 1)]?.text[0]?.match(/[A-Z]/) && (r += 0.05), n.push({
337
- id: p(),
338
- type: "NAME",
339
- value: l,
340
- confidence: Math.min(r, 0.95),
341
- bbox: s,
342
- masked: !0,
343
- layer: 2
344
- });
345
- }
346
- }
347
- return n;
348
- }
349
- function A(a) {
350
- const n = [], e = a.map((i) => i.text).join(" ").toLowerCase(), o = /\b\d{6}\b/g;
351
- let t;
352
- for (; (t = o.exec(e)) !== null; ) {
353
- const i = parseInt(t[0]);
354
- if (i >= 110001 && i <= 855117) {
355
- const l = k(a, t.index, e);
356
- if (l >= 0) {
357
- const s = Math.max(0, l - 8), r = a.slice(s, l + 1), h = r.some(
358
- (x) => I.has(x.text.toLowerCase().replace(/[^a-z]/g, ""))
359
- );
360
- if (h || r.length >= 3) {
361
- const x = r[0], d = r[r.length - 1];
362
- n.push({
363
- id: p(),
364
- type: "ADDRESS",
365
- value: r.map((c) => c.text).join(" "),
366
- confidence: h ? 0.78 : 0.55,
367
- bbox: {
368
- x: x.bbox.x,
369
- y: Math.min(...r.map((c) => c.bbox.y)),
370
- w: d.bbox.x + d.bbox.w - x.bbox.x,
371
- h: Math.max(...r.map((c) => c.bbox.y + c.bbox.h)) - Math.min(...r.map((c) => c.bbox.y)),
372
- pageIndex: x.bbox.pageIndex
373
- },
374
- masked: !0,
375
- layer: 2
376
- });
377
- }
378
- }
379
- }
380
- }
381
- for (const i of j) {
382
- const l = i.split(" ");
383
- for (let s = 0; s <= a.length - l.length; s++)
384
- if (a.slice(s, s + l.length).map((h) => h.text.toLowerCase().replace(/[^a-z ]/g, "")).join(" ") === i) {
385
- const h = a[s], x = a[s + l.length - 1];
386
- n.push({
387
- id: p(),
388
- type: "ADDRESS",
389
- value: a.slice(s, s + l.length).map((d) => d.text).join(" "),
390
- confidence: 0.72,
391
- bbox: {
392
- x: h.bbox.x,
393
- y: h.bbox.y,
394
- w: x.bbox.x + x.bbox.w - h.bbox.x,
395
- h: Math.max(h.bbox.h, x.bbox.h),
396
- pageIndex: h.bbox.pageIndex
397
- },
398
- masked: !0,
399
- layer: 2
400
- });
401
- }
402
- }
403
- return n;
404
- }
405
- function S(a) {
406
- const n = [];
407
- for (let e = 0; e < a.length; e++) {
408
- const o = a[e].text.toLowerCase().replace(/[^a-z]/g, "");
409
- if ((u.has(o) || u.has(a[e].text)) && n.push({
410
- id: p(),
411
- type: "MEDICAL",
412
- value: a[e].text,
413
- confidence: 0.75,
414
- bbox: { ...a[e].bbox },
415
- masked: !0,
416
- layer: 2
417
- }), e + 1 < a.length) {
418
- const t = a[e].text + " " + a[e + 1].text;
419
- u.has(t.toLowerCase()) && (n.push({
420
- id: p(),
421
- type: "MEDICAL",
422
- value: t,
423
- confidence: 0.8,
424
- bbox: {
425
- x: a[e].bbox.x,
426
- y: a[e].bbox.y,
427
- w: a[e + 1].bbox.x + a[e + 1].bbox.w - a[e].bbox.x,
428
- h: Math.max(a[e].bbox.h, a[e + 1].bbox.h),
429
- pageIndex: a[e].bbox.pageIndex
430
- },
431
- masked: !0,
432
- layer: 2
433
- }), e++);
434
- }
435
- }
436
- return n;
437
- }
438
- function E(a) {
439
- const n = [], e = /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/;
440
- for (const o of a)
441
- e.test(o.text) && n.push({
442
- id: p(),
443
- type: "EMAIL",
444
- value: o.text,
445
- confidence: 0.95,
446
- bbox: { ...o.bbox },
447
- masked: !0,
448
- layer: 2
449
- });
450
- return n;
451
- }
452
- function L(a) {
453
- const n = [], e = a.map((i) => i.text).join(" "), o = /\b(\d{1,2})[\/\-.](\d{1,2})[\/\-.](\d{4})\b/g;
454
- let t;
455
- for (; (t = o.exec(e)) !== null; ) {
456
- const i = parseInt(t[1]), l = parseInt(t[2]), s = parseInt(t[3]);
457
- if (i >= 1 && i <= 31 && l >= 1 && l <= 12 && s >= 1920 && s <= 2010) {
458
- const r = Math.max(0, t.index - 30), h = e.substring(r, t.index).toLowerCase(), x = /\b(dob|date of birth|birth date|born|birthday|d\.o\.b)\b/.test(h), d = k(a, t.index, e);
459
- if (d >= 0) {
460
- const c = [];
461
- let g = t.index;
462
- for (let b = d; b < a.length && g < t.index + t[0].length; b++)
463
- c.push(a[b]), g += a[b].text.length + 1;
464
- if (c.length > 0) {
465
- const b = c[0], y = c[c.length - 1];
466
- n.push({
467
- id: p(),
468
- type: "DOB",
469
- value: t[0],
470
- confidence: x ? 0.9 : 0.6,
471
- bbox: {
472
- x: b.bbox.x,
473
- y: b.bbox.y,
474
- w: y.bbox.x + y.bbox.w - b.bbox.x,
475
- h: Math.max(...c.map((v) => v.bbox.h)),
476
- pageIndex: b.bbox.pageIndex
477
- },
478
- masked: !0,
479
- layer: 2
480
- });
481
- }
482
- }
483
- }
484
- }
485
- return n;
486
- }
487
- function k(a, n, e) {
488
- let o = 0;
489
- for (let t = 0; t < a.length; t++) {
490
- const i = e.indexOf(a[t].text, o);
491
- if (i <= n && n < i + a[t].text.length)
492
- return t;
493
- o = i + a[t].text.length;
494
- }
495
- return -1;
496
- }
497
- self.onmessage = (a) => {
498
- const { type: n, words: e, pageIndex: o } = a.data;
499
- if (n === "NLP_ANALYZE")
500
- try {
501
- const t = [
502
- ...M(e),
503
- ...A(e),
504
- ...S(e),
505
- ...E(e),
506
- ...L(e)
507
- ];
508
- for (const i of t)
509
- i.bbox.pageIndex = o ?? 0;
510
- self.postMessage({
511
- type: "NLP_RESULT",
512
- entities: t
513
- });
514
- } catch (t) {
515
- self.postMessage({
516
- type: "NLP_ERROR",
517
- error: t instanceof Error ? t.message : "NLP analysis failed"
518
- });
519
- }
520
- };
521
- //# sourceMappingURL=nlp.worker-u7Lr_A3c.js.map
@@ -1,139 +0,0 @@
1
- function d(t) {
2
- for (let e = 0; e < t.length; e += 4) {
3
- const r = Math.round(
4
- 0.299 * t[e] + // R
5
- 0.587 * t[e + 1] + // G
6
- 0.114 * t[e + 2]
7
- // B
8
- );
9
- t[e] = r, t[e + 1] = r, t[e + 2] = r;
10
- }
11
- }
12
- function y(t) {
13
- let e = 255, r = 0;
14
- for (let s = 0; s < t.length; s += 4) {
15
- const o = t[s];
16
- o < e && (e = o), o > r && (r = o);
17
- }
18
- const n = r - e;
19
- if (n !== 0)
20
- for (let s = 0; s < t.length; s += 4) {
21
- const o = Math.round((t[s] - e) / n * 255);
22
- t[s] = o, t[s + 1] = o, t[s + 2] = o;
23
- }
24
- }
25
- function R(t) {
26
- const e = new Array(256).fill(0), r = t.length / 4;
27
- for (let i = 0; i < t.length; i += 4)
28
- e[t[i]]++;
29
- let n = 0;
30
- for (let i = 0; i < 256; i++)
31
- n += i * e[i];
32
- let s = 0, o = 0, l = 0, f = 0;
33
- for (let i = 0; i < 256; i++) {
34
- if (o += e[i], o === 0) continue;
35
- const a = r - o;
36
- if (a === 0) break;
37
- s += i * e[i];
38
- const g = s / o, u = (n - s) / a, p = o * a * (g - u) * (g - u);
39
- p > l && (l = p, f = i);
40
- }
41
- return f;
42
- }
43
- function w(t, e) {
44
- for (let r = 0; r < t.length; r += 4) {
45
- const n = t[r] >= e ? 255 : 0;
46
- t[r] = n, t[r + 1] = n, t[r + 2] = n;
47
- }
48
- }
49
- function O(t, e, r) {
50
- let n = 0;
51
- const s = e * r;
52
- for (let l = 0; l < t.length; l += 4)
53
- (t[l] + t[l + 1] + t[l + 2]) / 3 < 240 && n++;
54
- const o = n / s;
55
- return o < 0.2 ? 11 : o > 0.5 ? 6 : o > 0.3 ? 4 : 3;
56
- }
57
- async function k(t) {
58
- try {
59
- const e = await createImageBitmap(t), r = new OffscreenCanvas(e.width, e.height), n = r.getContext("2d", { willReadFrequently: !0 });
60
- if (!n)
61
- return e.close(), { blob: t, psm: 3 };
62
- n.drawImage(e, 0, 0);
63
- const s = n.getImageData(0, 0, e.width, e.height), o = s.data;
64
- d(o), y(o);
65
- const l = O(o, e.width, e.height), f = R(o);
66
- w(o, f), n.putImageData(s, 0, 0);
67
- const i = await r.convertToBlob({ type: "image/png" });
68
- return e.close(), { blob: i, psm: l };
69
- } catch (e) {
70
- return console.warn("[OCR Worker] Preprocessing failed, using original image:", e), { blob: t, psm: 3 };
71
- }
72
- }
73
- let h = null;
74
- async function C() {
75
- return h || (h = await (await import("./index-C62fEJ4q.js").then(function(e) {
76
- return e.i;
77
- })).createWorker("eng", void 0, {
78
- logger: (e) => {
79
- self.postMessage({
80
- type: "OCR_PROGRESS",
81
- progress: e.progress,
82
- message: e.status
83
- });
84
- }
85
- }), h);
86
- }
87
- self.onmessage = async (t) => {
88
- const { type: e, fileBuffer: r, fileType: n, pageIndex: s } = t.data;
89
- if (e === "OCR_START")
90
- try {
91
- const o = await C();
92
- let l;
93
- n === "application/pdf" ? l = new Blob([r], { type: "image/png" }) : l = new Blob([r], { type: n }), self.postMessage({
94
- type: "OCR_PROGRESS",
95
- progress: 0.1,
96
- message: "Preprocessing image..."
97
- });
98
- const { blob: f, psm: i } = await k(l);
99
- console.log(`[OCR Worker] Detected optimal PSM: ${i}`), await o.setParameters({
100
- tessedit_pageseg_mode: i.toString()
101
- });
102
- const a = await o.recognize(f, {}, { text: !0, blocks: !0 }), g = [], u = (c) => {
103
- !c || !c.text || !c.bbox || g.push({
104
- text: c.text,
105
- confidence: (c.confidence ?? 0) / 100,
106
- bbox: {
107
- x: c.bbox.x0,
108
- y: c.bbox.y0,
109
- w: c.bbox.x1 - c.bbox.x0,
110
- h: c.bbox.y1 - c.bbox.y0,
111
- pageIndex: s ?? 0
112
- }
113
- });
114
- };
115
- if (a.data.blocks && a.data.blocks.length > 0)
116
- for (const c of a.data.blocks)
117
- for (const m of c.paragraphs ?? [])
118
- for (const x of m.lines ?? [])
119
- for (const b of x.words ?? [])
120
- u(b);
121
- else if (a.data.words && a.data.words.length > 0)
122
- for (const c of a.data.words)
123
- u(c);
124
- console.log(`[OCR Worker] Extracted ${g.length} words from Tesseract`);
125
- let p = a.data.text ?? "";
126
- !p.trim() && g.length > 0 && (p = g.map((c) => c.text).join(" "), console.log("[OCR Worker] Reconstructed fullText from words")), console.log(`[OCR Worker] fullText length: ${p.length}`), self.postMessage({
127
- type: "OCR_RESULT",
128
- words: g,
129
- fullText: p,
130
- pageIndex: s ?? 0
131
- });
132
- } catch (o) {
133
- self.postMessage({
134
- type: "OCR_ERROR",
135
- error: o instanceof Error ? o.message : "OCR processing failed"
136
- });
137
- }
138
- };
139
- //# sourceMappingURL=ocr.worker-D5s6dY7M.js.map