secure-redact 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,890 @@
1
+ function S(e) {
2
+ if (e.length === 0) return [];
3
+ const n = e.map((i) => i.bbox.h).sort((i, s) => i - s), a = (n[Math.floor(n.length / 2)] || 20) * 0.5, r = [...e].sort((i, s) => {
4
+ const l = i.bbox.y - s.bbox.y;
5
+ return Math.abs(l) < a ? i.bbox.x - s.bbox.x : l;
6
+ }), c = [];
7
+ let o = [r[0]];
8
+ for (let i = 1; i < r.length; i++) {
9
+ const s = r[i - 1], l = r[i];
10
+ Math.abs(l.bbox.y - s.bbox.y) < a ? o.push(l) : (c.push(v(o)), o = [l]);
11
+ }
12
+ return o.length > 0 && c.push(v(o)), c;
13
+ }
14
+ function v(e) {
15
+ const n = e.map((c) => c.bbox.x), t = e.map((c) => c.bbox.y), a = e.map((c) => c.bbox.h), r = e.map((c) => c.bbox.x + c.bbox.w);
16
+ return {
17
+ words: e,
18
+ y: t.reduce((c, o) => c + o, 0) / t.length,
19
+ x: Math.min(...n),
20
+ width: Math.max(...r) - Math.min(...n),
21
+ height: Math.max(...a),
22
+ text: e.map((c) => c.text).join(" ")
23
+ };
24
+ }
25
+ function w(e, n) {
26
+ if (e.length === 0) return [];
27
+ const t = [...e].sort((s, l) => s.y - l.y), a = [];
28
+ for (let s = 1; s < t.length; s++)
29
+ a.push(t[s].y - (t[s - 1].y + t[s - 1].height));
30
+ const c = (a.reduce((s, l) => s + l, 0) / a.length || 20) * 2.5, o = [];
31
+ let i = [t[0]];
32
+ for (let s = 1; s < t.length; s++) {
33
+ const l = t[s - 1], d = t[s];
34
+ d.y - (l.y + l.height) > c ? (o.push(k(i, n)), i = [d]) : i.push(d);
35
+ }
36
+ return i.length > 0 && o.push(k(i, n)), o;
37
+ }
38
+ function k(e, n) {
39
+ const t = e.map((o) => o.x), a = e.map((o) => o.y), r = e.map((o) => o.x + o.width), c = e.map((o) => o.y + o.height);
40
+ return {
41
+ lines: e,
42
+ bbox: {
43
+ x: Math.min(...t),
44
+ y: Math.min(...a),
45
+ w: Math.max(...r) - Math.min(...t),
46
+ h: Math.max(...c) - Math.min(...a),
47
+ pageIndex: n
48
+ },
49
+ text: e.map((o) => o.text).join(`
50
+ `)
51
+ };
52
+ }
53
+ const W = [
54
+ // Names
55
+ { pattern: /^(name|patient name|customer name|account holder|holder name|full name|cardholder|person name):?$/i, type: "NAME", confidence: 0.99 },
56
+ { pattern: /^(father'?s? name|father name|spouse name):?$/i, type: "NAME", confidence: 0.95 },
57
+ // Addresses
58
+ { pattern: /^(address|residence|location|billing address|shipping address|permanent address|correspondence address):?$/i, type: "ADDRESS", confidence: 0.98 },
59
+ // Phone
60
+ { pattern: /^(phone|mobile|tel|telephone|contact|ph\.?|mob\.?|contact no\.?|phone no\.?|mobile no\.?):?$/i, type: "PHONE", confidence: 0.97 },
61
+ // Email
62
+ { pattern: /^(email|e-mail|email id|email address):?$/i, type: "EMAIL", confidence: 0.98 },
63
+ // Aadhaar
64
+ { pattern: /^(aadhaar|aadhar|uid|aadhaar no\.?|aadhar no\.?|aadhaar number|enrollment no\.?):?$/i, type: "AADHAAR", confidence: 0.99 },
65
+ // PAN
66
+ { pattern: /^(pan|pan no\.?|pan number|permanent account number):?$/i, type: "PAN", confidence: 0.99 },
67
+ // Bank Account
68
+ { pattern: /^(account no\.?|account number|a\/c no\.?|acc no\.?|acct no\.?):?$/i, type: "ACCOUNT_NUMBER", confidence: 0.99 },
69
+ { pattern: /^(ifsc|ifsc code|branch code|micr|micr code):?$/i, type: "IFSC", confidence: 0.95 },
70
+ // DOB
71
+ { pattern: /^(dob|date of birth|birth date|d\.o\.b\.?):?$/i, type: "DOB", confidence: 0.97 },
72
+ // Medical
73
+ { pattern: /^(diagnosis|condition|disease|medication|prescription|blood group|blood type):?$/i, type: "MEDICAL", confidence: 0.9 },
74
+ // Invoice/Tax
75
+ { pattern: /^(invoice no\.?|bill no\.?|invoice number|bill number):?$/i, type: "INVOICE_NO", confidence: 0.85 },
76
+ { pattern: /^(gst no\.?|gstin|gst number|tax id):?$/i, type: "GST", confidence: 0.85 }
77
+ ];
78
+ function $(e, n) {
79
+ const t = [];
80
+ for (const a of e)
81
+ for (let r = 0; r < a.words.length; r++) {
82
+ const c = a.words[r], o = _(c.text);
83
+ if (o) {
84
+ const i = I(a.words, r);
85
+ if (i.length > 0) {
86
+ const s = [c], l = i.map((h) => h.text).join(" "), d = [...s, ...i], u = P(d, n);
87
+ t.push({
88
+ key: s,
89
+ value: i,
90
+ keyText: c.text,
91
+ valueText: l,
92
+ confidence: o.confidence,
93
+ type: o.type,
94
+ bbox: u
95
+ }), r += i.length;
96
+ }
97
+ }
98
+ }
99
+ return t;
100
+ }
101
+ function _(e) {
102
+ const n = e.trim();
103
+ for (const { pattern: t, type: a, confidence: r } of W)
104
+ if (t.test(n))
105
+ return { type: a, confidence: r };
106
+ return null;
107
+ }
108
+ function I(e, n) {
109
+ const t = [];
110
+ for (let a = n + 1; a < e.length; a++) {
111
+ const r = e[a];
112
+ if (_(r.text)) break;
113
+ if (/^[.,:;!?]+$/.test(r.text.trim())) {
114
+ if (t.length > 0) break;
115
+ continue;
116
+ }
117
+ if (t.length > 0) {
118
+ const c = t[t.length - 1], o = r.bbox.x - (c.bbox.x + c.bbox.w), i = c.bbox.w;
119
+ if (o > i * 2) break;
120
+ }
121
+ if (t.push(r), t.length >= 15) break;
122
+ }
123
+ return t;
124
+ }
125
+ function P(e, n) {
126
+ if (e.length === 0)
127
+ return { x: 0, y: 0, w: 0, h: 0, pageIndex: n };
128
+ const t = e.map((o) => o.bbox.x), a = e.map((o) => o.bbox.y), r = e.map((o) => o.bbox.x + o.bbox.w), c = e.map((o) => o.bbox.y + o.bbox.h);
129
+ return {
130
+ x: Math.min(...t),
131
+ y: Math.min(...a),
132
+ w: Math.max(...r) - Math.min(...t),
133
+ h: Math.max(...c) - Math.min(...a),
134
+ pageIndex: n
135
+ };
136
+ }
137
+ function O(e) {
138
+ return e.map((n) => ({
139
+ id: "kv_" + crypto.randomUUID().substring(0, 8),
140
+ type: n.type,
141
+ value: n.valueText,
142
+ confidence: n.confidence,
143
+ bbox: n.bbox,
144
+ masked: !0,
145
+ layer: 3
146
+ // Spatial layer (higher priority than NLP)
147
+ }));
148
+ }
149
+ function L(e) {
150
+ const n = /* @__PURE__ */ new Map(), t = 15;
151
+ for (const r of e)
152
+ for (const c of r.words) {
153
+ const o = c.bbox.x;
154
+ let i = !1;
155
+ for (const [s, l] of n)
156
+ if (Math.abs(o - s) < t) {
157
+ l.push(c), i = !0;
158
+ break;
159
+ }
160
+ i || n.set(o, [c]);
161
+ }
162
+ return Array.from(n.entries()).sort((r, c) => r[0] - c[0]).map((r, c) => ({
163
+ columnIndex: c,
164
+ words: r[1]
165
+ }));
166
+ }
167
+ const B = [
168
+ "invoice",
169
+ "bill",
170
+ "bill no",
171
+ "invoice no",
172
+ "inv no",
173
+ "invoice number",
174
+ "bill to",
175
+ "ship to",
176
+ "customer",
177
+ "vendor",
178
+ "supplier",
179
+ "po number",
180
+ "purchase order",
181
+ "payment terms",
182
+ "due date",
183
+ "subtotal",
184
+ "tax",
185
+ "gst",
186
+ "igst",
187
+ "cgst",
188
+ "sgst",
189
+ "total amount",
190
+ "amount due",
191
+ "line item",
192
+ "qty",
193
+ "quantity",
194
+ "price",
195
+ "item description",
196
+ "hsn",
197
+ "sac code"
198
+ ], j = [
199
+ "bank statement",
200
+ "account statement",
201
+ "statement of account",
202
+ "account number",
203
+ "account no",
204
+ "ifsc",
205
+ "ifsc code",
206
+ "branch",
207
+ "transaction",
208
+ "transaction date",
209
+ "credit",
210
+ "debit",
211
+ "balance",
212
+ "opening balance",
213
+ "closing balance",
214
+ "withdrawal",
215
+ "deposit",
216
+ "cheque",
217
+ "check",
218
+ "rtgs",
219
+ "neft",
220
+ "imps",
221
+ "upi",
222
+ "iban",
223
+ "swift",
224
+ "account holder",
225
+ "from date",
226
+ "to date",
227
+ "statement period"
228
+ ], K = [
229
+ "patient",
230
+ "patient name",
231
+ "doctor",
232
+ "physician",
233
+ "hospital",
234
+ "clinic",
235
+ "medical report",
236
+ "lab report",
237
+ "pathology",
238
+ "radiology",
239
+ "diagnosis",
240
+ "prescription",
241
+ "medication",
242
+ "blood test",
243
+ "urine test",
244
+ "mri",
245
+ "ct scan",
246
+ "x-ray",
247
+ "ultrasound",
248
+ "ecg",
249
+ "ekg",
250
+ "test results",
251
+ "normal range",
252
+ "abnormal",
253
+ "positive",
254
+ "negative",
255
+ "hemoglobin",
256
+ "glucose",
257
+ "cholesterol",
258
+ "blood pressure",
259
+ "heart rate",
260
+ "pulse",
261
+ "temperature",
262
+ "weight",
263
+ "bmi"
264
+ ], z = [
265
+ "income tax",
266
+ "tax return",
267
+ "itr",
268
+ "assessment year",
269
+ "financial year",
270
+ "pan",
271
+ "permanent account number",
272
+ "tax deducted",
273
+ "tds",
274
+ "form 16",
275
+ "form 26as",
276
+ "gross income",
277
+ "taxable income",
278
+ "deductions",
279
+ "exemptions",
280
+ "tax payable",
281
+ "tax refund",
282
+ "acknowledgement",
283
+ "ack no",
284
+ "return filed",
285
+ "tan",
286
+ "employer",
287
+ "salary",
288
+ "wages",
289
+ "capital gains"
290
+ ], H = [
291
+ "aadhaar",
292
+ "aadhar",
293
+ "uid",
294
+ "unique identification",
295
+ "uidai",
296
+ "government of india",
297
+ "bharatiya prachnya patr",
298
+ "enrollment no",
299
+ "vid",
300
+ "virtual id",
301
+ "date of birth",
302
+ "dob",
303
+ "gender",
304
+ "male",
305
+ "female",
306
+ "address",
307
+ "yob",
308
+ "year of birth"
309
+ ], F = [
310
+ "pan",
311
+ "permanent account number",
312
+ "income tax department",
313
+ "father name",
314
+ "fathers name",
315
+ "date of birth",
316
+ "dob",
317
+ "signature",
318
+ "photograph"
319
+ ], Y = [
320
+ "health report",
321
+ "medical certificate",
322
+ "fitness certificate",
323
+ "blood group",
324
+ "allergies",
325
+ "past medical history",
326
+ "current medications",
327
+ "vital signs",
328
+ "examination",
329
+ "clinical findings"
330
+ ];
331
+ function g(e, n) {
332
+ const t = e.toLowerCase();
333
+ let a = 0;
334
+ const r = [];
335
+ for (const c of n)
336
+ if (t.includes(c)) {
337
+ const o = c.split(" ").length;
338
+ a += o, r.push(c);
339
+ }
340
+ return { score: a, matched: r };
341
+ }
342
+ function U(e) {
343
+ const n = g(e, B), t = g(e, j), a = g(e, K), r = g(e, z), c = g(e, H), o = g(e, F), i = g(e, Y), s = [
344
+ { type: "invoice", score: n.score, matched: n.matched },
345
+ { type: "bank_statement", score: t.score, matched: t.matched },
346
+ { type: "medical_report", score: a.score, matched: a.matched },
347
+ { type: "tax_return", score: r.score, matched: r.matched },
348
+ { type: "id_card_aadhaar", score: c.score, matched: c.matched },
349
+ { type: "id_card_pan", score: o.score, matched: o.matched },
350
+ { type: "health_report", score: i.score, matched: i.matched }
351
+ ];
352
+ s.sort((p, m) => m.score - p.score);
353
+ const l = s[0], d = s.reduce((p, m) => p + m.score, 0);
354
+ if (l.score === 0)
355
+ return {
356
+ primaryType: "generic",
357
+ confidence: 0,
358
+ secondaryTypes: [],
359
+ detectedKeywords: []
360
+ };
361
+ const u = d > 0 ? l.score / d : 0, h = s.slice(1).filter((p) => p.score > 0).map((p) => ({
362
+ type: p.type,
363
+ confidence: d > 0 ? p.score / d : 0
364
+ }));
365
+ return {
366
+ primaryType: l.type,
367
+ confidence: Math.min(u, 0.95),
368
+ // Cap at 95%
369
+ secondaryTypes: h,
370
+ detectedKeywords: l.matched
371
+ };
372
+ }
373
+ function G(e) {
374
+ switch (e) {
375
+ case "invoice":
376
+ return {
377
+ prioritizeSpatialContext: !0,
378
+ enableTableDetection: !0,
379
+ aggressiveRegex: !0,
380
+ skipParagraphNER: !0,
381
+ confidenceBoost: 1.2
382
+ };
383
+ case "bank_statement":
384
+ return {
385
+ prioritizeSpatialContext: !0,
386
+ enableTableDetection: !0,
387
+ aggressiveRegex: !0,
388
+ skipParagraphNER: !0,
389
+ confidenceBoost: 1.3
390
+ };
391
+ case "medical_report":
392
+ case "health_report":
393
+ return {
394
+ prioritizeSpatialContext: !1,
395
+ enableTableDetection: !1,
396
+ aggressiveRegex: !1,
397
+ skipParagraphNER: !1,
398
+ confidenceBoost: 1
399
+ };
400
+ case "tax_return":
401
+ return {
402
+ prioritizeSpatialContext: !0,
403
+ enableTableDetection: !0,
404
+ aggressiveRegex: !0,
405
+ skipParagraphNER: !0,
406
+ confidenceBoost: 1.15
407
+ };
408
+ case "id_card_aadhaar":
409
+ case "id_card_pan":
410
+ return {
411
+ prioritizeSpatialContext: !0,
412
+ enableTableDetection: !1,
413
+ aggressiveRegex: !0,
414
+ skipParagraphNER: !0,
415
+ confidenceBoost: 1.25
416
+ };
417
+ default:
418
+ return {
419
+ prioritizeSpatialContext: !1,
420
+ enableTableDetection: !1,
421
+ aggressiveRegex: !1,
422
+ skipParagraphNER: !1,
423
+ confidenceBoost: 1
424
+ };
425
+ }
426
+ }
427
+ function b(e) {
428
+ let n = e.confidence;
429
+ switch (e.layer) {
430
+ case 0:
431
+ n = 1;
432
+ break;
433
+ case 1:
434
+ n = Math.max(n, 0.95);
435
+ break;
436
+ case 2:
437
+ n = Math.min(Math.max(n, 0.6), 0.9);
438
+ break;
439
+ case 3:
440
+ n = Math.min(Math.max(n, 0.95), 0.99);
441
+ break;
442
+ case 4:
443
+ n = Math.min(Math.max(n, 0.5), 0.95);
444
+ break;
445
+ default:
446
+ n = Math.max(n, 0.5);
447
+ }
448
+ return {
449
+ ...e,
450
+ confidence: n
451
+ };
452
+ }
453
+ function V(e, n) {
454
+ if (e.pageIndex !== n.pageIndex) return 0;
455
+ const t = Math.max(0, Math.min(e.x + e.w, n.x + n.w) - Math.max(e.x, n.x)), a = Math.max(0, Math.min(e.y + e.h, n.y + n.h) - Math.max(e.y, n.y)), r = t * a, c = e.w * e.h, o = n.w * n.h, i = c + o - r;
456
+ return i > 0 ? r / i : 0;
457
+ }
458
+ function X(e, n = 0.5) {
459
+ const t = [...e].sort((c, o) => c.layer !== o.layer ? o.layer - c.layer : Math.abs(c.confidence - o.confidence) > 0.01 ? o.confidence - c.confidence : o.value.length - c.value.length), a = [], r = [];
460
+ for (const c of t) {
461
+ let o = !0;
462
+ for (const i of a)
463
+ if (V(c.bbox, i.bbox) > n) {
464
+ o = !1, r.push(c);
465
+ break;
466
+ }
467
+ o && a.push(c);
468
+ }
469
+ return {
470
+ deduplicated: a,
471
+ removedCount: r.length
472
+ };
473
+ }
474
+ class Z {
475
+ config;
476
+ constructor(n) {
477
+ this.config = n;
478
+ }
479
+ /**
480
+ * Main fusion method: combines detections from all sources.
481
+ */
482
+ fuse(n, t, a, r) {
483
+ const c = n.map(b), o = t.map(b), i = a.map(b), s = r.map(b);
484
+ let l = [
485
+ ...c,
486
+ ...o,
487
+ ...i,
488
+ ...s
489
+ ];
490
+ console.log(`[FusionEngine] Combined ${l.length} entities from all sources`), console.log(`[FusionEngine] - Regex: ${c.length}`), console.log(`[FusionEngine] - Spatial: ${o.length}`), console.log(`[FusionEngine] - NLP: ${i.length}`), console.log(`[FusionEngine] - ML/Gemini: ${s.length}`);
491
+ const { deduplicated: d, removedCount: u } = X(
492
+ l,
493
+ this.config.deduplicationOverlapThreshold
494
+ );
495
+ console.log(`[FusionEngine] Deduplicated: removed ${u}, kept ${d.length}`);
496
+ const h = d.filter(
497
+ (m) => m.confidence >= this.config.confidenceThreshold
498
+ );
499
+ console.log(`[FusionEngine] After confidence filter (>=${this.config.confidenceThreshold}): ${h.length}`);
500
+ const p = this.generateStats(h, u);
501
+ return {
502
+ entities: h,
503
+ stats: p
504
+ };
505
+ }
506
+ /**
507
+ * Generates statistics about the fused entities.
508
+ */
509
+ generateStats(n, t) {
510
+ const a = {};
511
+ let r = 0, c = 0, o = 0;
512
+ for (const i of n)
513
+ a[i.layer] = (a[i.layer] || 0) + 1, i.confidence < 0.7 ? r++ : i.confidence < 0.9 ? c++ : o++;
514
+ return {
515
+ total: n.length,
516
+ byLayer: a,
517
+ byConfidenceRange: { low: r, medium: c, high: o },
518
+ deduplicated: t
519
+ };
520
+ }
521
+ /**
522
+ * Applies document-specific confidence boost.
523
+ */
524
+ applyDocumentTypeBoost(n) {
525
+ const t = this.getConfidenceBoost();
526
+ return t === 1 ? n : n.map((a) => ({
527
+ ...a,
528
+ confidence: Math.min(a.confidence * t, 1)
529
+ }));
530
+ }
531
+ getConfidenceBoost() {
532
+ switch (this.config.documentType) {
533
+ case "invoice":
534
+ return 1.2;
535
+ case "bank_statement":
536
+ return 1.3;
537
+ case "id_card_aadhaar":
538
+ case "id_card_pan":
539
+ return 1.25;
540
+ case "tax_return":
541
+ return 1.15;
542
+ default:
543
+ return 1;
544
+ }
545
+ }
546
+ }
547
+ const E = /\b(\d{4}[\s\-]?\d{4}[\s\-]?\d{4})\b/g;
548
+ function q(e) {
549
+ const n = [];
550
+ let t;
551
+ for (E.lastIndex = 0; (t = E.exec(e)) !== null; ) {
552
+ const a = t[1], r = a.replace(/\D/g, "");
553
+ r.length === 12 && !/^0/.test(r) && !/^1/.test(r) && n.push({
554
+ type: "AADHAAR",
555
+ value: r,
556
+ startIndex: t.index,
557
+ endIndex: t.index + t[0].length,
558
+ raw: a
559
+ });
560
+ }
561
+ return n;
562
+ }
563
+ const A = /\b([A-Z]{5}[0-9]{4}[A-Z])\b/g;
564
+ function J(e) {
565
+ const n = [];
566
+ let t;
567
+ for (A.lastIndex = 0; (t = A.exec(e)) !== null; )
568
+ n.push({
569
+ type: "PAN",
570
+ value: t[1],
571
+ startIndex: t.index,
572
+ endIndex: t.index + t[0].length,
573
+ raw: t[1]
574
+ });
575
+ return n;
576
+ }
577
+ const D = /\b(\d{4}[\s\-]?\d{4}[\s\-]?\d{4}[\s\-]?\d{1,7})\b/g;
578
+ function Q(e) {
579
+ const n = [];
580
+ let t;
581
+ for (D.lastIndex = 0; (t = D.exec(e)) !== null; ) {
582
+ const a = t[1], r = a.replace(/\D/g, "");
583
+ r.length >= 13 && r.length <= 19 && n.push({
584
+ type: "CREDIT_CARD",
585
+ value: r,
586
+ startIndex: t.index,
587
+ endIndex: t.index + t[0].length,
588
+ raw: a
589
+ });
590
+ }
591
+ return n;
592
+ }
593
+ const M = /(?:\+91[\s\-]?|0)?([6-9]\d{4}[\s\-]?\d{5})\b/g;
594
+ function ee(e) {
595
+ const n = [];
596
+ let t;
597
+ for (M.lastIndex = 0; (t = M.exec(e)) !== null; ) {
598
+ const a = t[0], r = a.replace(/\D/g, "");
599
+ r.length >= 10 && r.length <= 12 && n.push({
600
+ type: "PHONE",
601
+ value: r,
602
+ startIndex: t.index,
603
+ endIndex: t.index + t[0].length,
604
+ raw: a
605
+ });
606
+ }
607
+ return n;
608
+ }
609
+ function te(e) {
610
+ return [
611
+ ...q(e),
612
+ ...J(e),
613
+ ...Q(e),
614
+ ...ee(e)
615
+ ];
616
+ }
617
+ const ne = [
618
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
619
+ [1, 2, 3, 4, 0, 6, 7, 8, 9, 5],
620
+ [2, 3, 4, 0, 1, 7, 8, 9, 5, 6],
621
+ [3, 4, 0, 1, 2, 8, 9, 5, 6, 7],
622
+ [4, 0, 1, 2, 3, 9, 5, 6, 7, 8],
623
+ [5, 9, 8, 7, 6, 0, 4, 3, 2, 1],
624
+ [6, 5, 9, 8, 7, 1, 0, 4, 3, 2],
625
+ [7, 6, 5, 9, 8, 2, 1, 0, 4, 3],
626
+ [8, 7, 6, 5, 9, 3, 2, 1, 0, 4],
627
+ [9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
628
+ ], oe = [
629
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
630
+ [1, 5, 7, 6, 2, 8, 3, 0, 9, 4],
631
+ [5, 8, 0, 3, 7, 9, 6, 1, 4, 2],
632
+ [8, 9, 1, 6, 0, 4, 3, 5, 2, 7],
633
+ [9, 4, 5, 3, 1, 2, 6, 8, 7, 0],
634
+ [4, 2, 8, 6, 5, 7, 3, 9, 0, 1],
635
+ [2, 7, 9, 3, 8, 0, 6, 4, 1, 5],
636
+ [7, 0, 4, 6, 9, 1, 3, 2, 5, 8]
637
+ ];
638
+ function ae(e) {
639
+ const n = e.replace(/\D/g, "");
640
+ if (n.length !== 12) return !1;
641
+ let t = 0;
642
+ const a = n.length;
643
+ for (let r = 0; r < a; r++) {
644
+ const c = parseInt(n[a - r - 1]), o = oe[r % 8][c];
645
+ t = ne[t][o];
646
+ }
647
+ return t === 0;
648
+ }
649
+ function ce(e) {
650
+ const n = e.replace(/\D/g, "");
651
+ if (n.length < 13 || n.length > 19) return !1;
652
+ let t = 0, a = !1;
653
+ for (let r = n.length - 1; r >= 0; r--) {
654
+ let c = parseInt(n[r]);
655
+ a && (c *= 2, c > 9 && (c -= 9)), t += c, a = !a;
656
+ }
657
+ return t % 10 === 0;
658
+ }
659
+ function re(e) {
660
+ if (!/^[A-Z]{5}[0-9]{4}[A-Z]$/.test(e)) return !1;
661
+ const t = e[3];
662
+ return "ABCFGHLJPT".includes(t);
663
+ }
664
+ function ie(e, n, t = 0) {
665
+ const a = te(e), r = [];
666
+ for (const c of a) {
667
+ let o = 0.85, i = !0;
668
+ switch (c.type) {
669
+ case "AADHAAR":
670
+ i = ae(c.value), o = i ? 1 : 0.6;
671
+ break;
672
+ case "CREDIT_CARD":
673
+ i = ce(c.value), o = i ? 1 : 0.5;
674
+ break;
675
+ case "PAN":
676
+ i = re(c.value), o = i ? 1 : 0.7;
677
+ break;
678
+ case "PHONE":
679
+ o = 0.9;
680
+ break;
681
+ }
682
+ if (!i && o < 0.5) continue;
683
+ const s = se(c.startIndex, c.endIndex, e, n, t);
684
+ r.push({
685
+ id: "l1_" + crypto.randomUUID().substring(0, 8),
686
+ type: c.type,
687
+ value: c.raw,
688
+ confidence: o,
689
+ bbox: s,
690
+ masked: !0,
691
+ layer: 1
692
+ });
693
+ }
694
+ return r;
695
+ }
696
+ function x(e, n) {
697
+ const t = Math.min(...e.map((o) => o.bbox.x)), a = Math.min(...e.map((o) => o.bbox.y)), r = Math.max(...e.map((o) => o.bbox.x + o.bbox.w)), c = Math.max(...e.map((o) => o.bbox.y + o.bbox.h));
698
+ return { x: t, y: a, w: r - t, h: c - a, pageIndex: n };
699
+ }
700
+ function se(e, n, t, a, r) {
701
+ const o = t.substring(e, n).trim().split(/\s+/).filter((l) => l.length > 0);
702
+ if (o.length > 0 && a.length > 0) {
703
+ const l = (u) => u.replace(/[^a-zA-Z0-9]/g, ""), d = o.map(l).filter((u) => u.length > 0);
704
+ if (d.length > 0) {
705
+ for (let h = 0; h <= a.length - d.length; h++) {
706
+ let p = !0;
707
+ for (let m = 0; m < d.length; m++)
708
+ if (l(a[h + m].text) !== d[m]) {
709
+ p = !1;
710
+ break;
711
+ }
712
+ if (p)
713
+ return x(
714
+ a.slice(h, h + d.length),
715
+ r
716
+ );
717
+ }
718
+ const u = d[0];
719
+ for (let h = 0; h < a.length; h++) {
720
+ if (l(a[h].text) !== u) continue;
721
+ const p = [a[h]];
722
+ let m = 1;
723
+ for (let f = h + 1; f < a.length && m < d.length; f++)
724
+ l(a[f].text) === d[m] && (p.push(a[f]), m++);
725
+ if (p.length >= Math.ceil(d.length / 2))
726
+ return x(p, r);
727
+ }
728
+ }
729
+ }
730
+ let i = 0;
731
+ const s = [];
732
+ for (const l of a) {
733
+ const d = t.indexOf(l.text, i);
734
+ if (d === -1) continue;
735
+ const u = d + l.text.length;
736
+ if (u > e && d < n && s.push(l), i = u, d > n) break;
737
+ }
738
+ return s.length > 0 ? x(s, r) : { x: 0, y: 0, w: 100, h: 20, pageIndex: r };
739
+ }
740
+ const R = /* @__PURE__ */ new Set([
741
+ "aarav",
742
+ "aditi",
743
+ "aditya",
744
+ "akash",
745
+ "amit",
746
+ "amita",
747
+ "ananya",
748
+ "anil",
749
+ "anita",
750
+ "anjali",
751
+ "ankita",
752
+ "arjun",
753
+ "arun",
754
+ "aruna",
755
+ "ashok",
756
+ "bhavna",
757
+ "chandra",
758
+ "deepak",
759
+ "deepika",
760
+ "rahul",
761
+ "rajesh",
762
+ "priya",
763
+ "neha",
764
+ "vikram",
765
+ "sneha",
766
+ "pooja",
767
+ "rohit",
768
+ "john",
769
+ "james",
770
+ "mary",
771
+ "patricia",
772
+ "jennifer",
773
+ "michael",
774
+ "william",
775
+ "david",
776
+ "sarah",
777
+ "karen"
778
+ ]), T = /* @__PURE__ */ new Set([
779
+ "sharma",
780
+ "verma",
781
+ "gupta",
782
+ "singh",
783
+ "kumar",
784
+ "patel",
785
+ "joshi",
786
+ "mishra",
787
+ "agarwal",
788
+ "mehta",
789
+ "reddy",
790
+ "rao",
791
+ "nair",
792
+ "menon",
793
+ "iyer",
794
+ "mukherjee",
795
+ "chatterjee",
796
+ "das",
797
+ "roy",
798
+ "shah",
799
+ "smith",
800
+ "johnson",
801
+ "williams",
802
+ "brown",
803
+ "jones",
804
+ "davis",
805
+ "miller",
806
+ "wilson",
807
+ "moore"
808
+ ]);
809
+ function le(e) {
810
+ const n = [];
811
+ for (let t = 0; t < e.length; t++) {
812
+ const a = e[t].text.replace(/[^a-zA-Z]/g, "").toLowerCase();
813
+ if (!(a.length < 2) && (R.has(a) || T.has(a))) {
814
+ let r = e[t].text, c = { ...e[t].bbox }, o = 0.65;
815
+ if (t + 1 < e.length) {
816
+ const i = e[t + 1].text.replace(/[^a-zA-Z]/g, "").toLowerCase();
817
+ (R.has(i) || T.has(i)) && (r += " " + e[t + 1].text, c.w = e[t + 1].bbox.x + e[t + 1].bbox.w - c.x, o = 0.78, t++);
818
+ }
819
+ n.push({
820
+ id: "nlp_" + crypto.randomUUID().substring(0, 8),
821
+ type: "NAME",
822
+ value: r,
823
+ confidence: o,
824
+ bbox: c,
825
+ masked: !0,
826
+ layer: 2
827
+ // NLP heuristic layer
828
+ });
829
+ }
830
+ }
831
+ return n;
832
+ }
833
+ self.onmessage = async (e) => {
834
+ const { type: n, fullText: t, words: a, pageIndex: r, confidenceThreshold: c } = e.data;
835
+ if (n === "ADVANCED_DETECT")
836
+ try {
837
+ console.log("[Advanced Detection Worker] Starting multi-stage detection..."), console.log(`[Advanced Detection Worker] Input: ${a.length} words, ${t.length} chars`);
838
+ const o = U(t), i = G(o.primaryType);
839
+ console.log(`[Advanced Detection Worker] Document Type: ${o.primaryType} (${Math.round(o.confidence * 100)}%)`), console.log("[Advanced Detection Worker] Ruleset:", i);
840
+ let s = [];
841
+ if (i.prioritizeSpatialContext) {
842
+ console.log("[Advanced Detection Worker] Running spatial analysis...");
843
+ const f = S(a);
844
+ console.log(`[Advanced Detection Worker] Grouped into ${f.length} lines`);
845
+ const C = w(f, r);
846
+ console.log(`[Advanced Detection Worker] Grouped into ${C.length} blocks`);
847
+ const y = $(f, r);
848
+ if (console.log(`[Advanced Detection Worker] Detected ${y.length} key-value pairs`), s = O(y), i.enableTableDetection) {
849
+ const N = L(f);
850
+ console.log(`[Advanced Detection Worker] Detected ${N.length} table columns`);
851
+ }
852
+ }
853
+ console.log("[Advanced Detection Worker] Running regex detection...");
854
+ const l = ie(t, a, r);
855
+ console.log(`[Advanced Detection Worker] Regex detected: ${l.length}`);
856
+ let d = [];
857
+ i.skipParagraphNER || (console.log("[Advanced Detection Worker] Running NLP heuristics..."), d = le(a), console.log(`[Advanced Detection Worker] NLP detected: ${d.length}`)), console.log("[Advanced Detection Worker] Fusing detections...");
858
+ const u = new Z({
859
+ documentType: o.primaryType,
860
+ confidenceThreshold: c,
861
+ preferSpatialOverNLP: i.prioritizeSpatialContext,
862
+ deduplicationOverlapThreshold: 0.5
863
+ }), h = u.fuse(
864
+ l,
865
+ s,
866
+ d,
867
+ []
868
+ // ML/Gemini entities come from main pipeline
869
+ );
870
+ console.log(`[Advanced Detection Worker] Fusion complete: ${h.entities.length} entities`), console.log("[Advanced Detection Worker] Stats:", h.stats);
871
+ const m = {
872
+ type: "DETECTION_RESULT",
873
+ entities: u.applyDocumentTypeBoost(h.entities),
874
+ documentType: o.primaryType,
875
+ stats: {
876
+ ...h.stats,
877
+ documentConfidence: Math.round(o.confidence * 100),
878
+ detectedKeywords: o.detectedKeywords.slice(0, 5)
879
+ }
880
+ };
881
+ self.postMessage(m), l.length = 0, s.length = 0, d.length = 0;
882
+ } catch (o) {
883
+ console.error("[Advanced Detection Worker] Error:", o), self.postMessage({
884
+ type: "DETECTION_ERROR",
885
+ error: o instanceof Error ? o.message : "Detection failed"
886
+ });
887
+ }
888
+ };
889
+ self.postMessage({ type: "WORKER_READY" });
890
+ //# sourceMappingURL=advanced.worker-DSVrF0gl.js.map