rehydra 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +615 -0
- package/dist/crypto/index.d.ts +6 -0
- package/dist/crypto/index.d.ts.map +1 -0
- package/dist/crypto/index.js +6 -0
- package/dist/crypto/index.js.map +1 -0
- package/dist/crypto/pii-map-crypto.d.ts +114 -0
- package/dist/crypto/pii-map-crypto.d.ts.map +1 -0
- package/dist/crypto/pii-map-crypto.js +228 -0
- package/dist/crypto/pii-map-crypto.js.map +1 -0
- package/dist/index.d.ts +180 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +384 -0
- package/dist/index.js.map +1 -0
- package/dist/ner/bio-decoder.d.ts +64 -0
- package/dist/ner/bio-decoder.d.ts.map +1 -0
- package/dist/ner/bio-decoder.js +216 -0
- package/dist/ner/bio-decoder.js.map +1 -0
- package/dist/ner/index.d.ts +10 -0
- package/dist/ner/index.d.ts.map +1 -0
- package/dist/ner/index.js +10 -0
- package/dist/ner/index.js.map +1 -0
- package/dist/ner/model-manager.d.ts +111 -0
- package/dist/ner/model-manager.d.ts.map +1 -0
- package/dist/ner/model-manager.js +325 -0
- package/dist/ner/model-manager.js.map +1 -0
- package/dist/ner/ner-model.d.ts +114 -0
- package/dist/ner/ner-model.d.ts.map +1 -0
- package/dist/ner/ner-model.js +253 -0
- package/dist/ner/ner-model.js.map +1 -0
- package/dist/ner/onnx-runtime.d.ts +46 -0
- package/dist/ner/onnx-runtime.d.ts.map +1 -0
- package/dist/ner/onnx-runtime.js +130 -0
- package/dist/ner/onnx-runtime.js.map +1 -0
- package/dist/ner/tokenizer.d.ts +118 -0
- package/dist/ner/tokenizer.d.ts.map +1 -0
- package/dist/ner/tokenizer.js +332 -0
- package/dist/ner/tokenizer.js.map +1 -0
- package/dist/pipeline/index.d.ts +12 -0
- package/dist/pipeline/index.d.ts.map +1 -0
- package/dist/pipeline/index.js +12 -0
- package/dist/pipeline/index.js.map +1 -0
- package/dist/pipeline/prenormalize.d.ts +48 -0
- package/dist/pipeline/prenormalize.d.ts.map +1 -0
- package/dist/pipeline/prenormalize.js +94 -0
- package/dist/pipeline/prenormalize.js.map +1 -0
- package/dist/pipeline/resolver.d.ts +56 -0
- package/dist/pipeline/resolver.d.ts.map +1 -0
- package/dist/pipeline/resolver.js +239 -0
- package/dist/pipeline/resolver.js.map +1 -0
- package/dist/pipeline/semantic-data-loader.d.ts +165 -0
- package/dist/pipeline/semantic-data-loader.d.ts.map +1 -0
- package/dist/pipeline/semantic-data-loader.js +655 -0
- package/dist/pipeline/semantic-data-loader.js.map +1 -0
- package/dist/pipeline/semantic-enricher.d.ts +112 -0
- package/dist/pipeline/semantic-enricher.d.ts.map +1 -0
- package/dist/pipeline/semantic-enricher.js +318 -0
- package/dist/pipeline/semantic-enricher.js.map +1 -0
- package/dist/pipeline/tagger.d.ts +114 -0
- package/dist/pipeline/tagger.d.ts.map +1 -0
- package/dist/pipeline/tagger.js +374 -0
- package/dist/pipeline/tagger.js.map +1 -0
- package/dist/pipeline/title-extractor.d.ts +79 -0
- package/dist/pipeline/title-extractor.d.ts.map +1 -0
- package/dist/pipeline/title-extractor.js +801 -0
- package/dist/pipeline/title-extractor.js.map +1 -0
- package/dist/pipeline/validator.d.ts +65 -0
- package/dist/pipeline/validator.d.ts.map +1 -0
- package/dist/pipeline/validator.js +264 -0
- package/dist/pipeline/validator.js.map +1 -0
- package/dist/recognizers/base.d.ts +78 -0
- package/dist/recognizers/base.d.ts.map +1 -0
- package/dist/recognizers/base.js +100 -0
- package/dist/recognizers/base.js.map +1 -0
- package/dist/recognizers/bic-swift.d.ts +10 -0
- package/dist/recognizers/bic-swift.d.ts.map +1 -0
- package/dist/recognizers/bic-swift.js +107 -0
- package/dist/recognizers/bic-swift.js.map +1 -0
- package/dist/recognizers/credit-card.d.ts +32 -0
- package/dist/recognizers/credit-card.d.ts.map +1 -0
- package/dist/recognizers/credit-card.js +160 -0
- package/dist/recognizers/credit-card.js.map +1 -0
- package/dist/recognizers/custom-id.d.ts +28 -0
- package/dist/recognizers/custom-id.d.ts.map +1 -0
- package/dist/recognizers/custom-id.js +116 -0
- package/dist/recognizers/custom-id.js.map +1 -0
- package/dist/recognizers/email.d.ts +10 -0
- package/dist/recognizers/email.d.ts.map +1 -0
- package/dist/recognizers/email.js +75 -0
- package/dist/recognizers/email.js.map +1 -0
- package/dist/recognizers/iban.d.ts +14 -0
- package/dist/recognizers/iban.d.ts.map +1 -0
- package/dist/recognizers/iban.js +67 -0
- package/dist/recognizers/iban.js.map +1 -0
- package/dist/recognizers/index.d.ts +20 -0
- package/dist/recognizers/index.d.ts.map +1 -0
- package/dist/recognizers/index.js +42 -0
- package/dist/recognizers/index.js.map +1 -0
- package/dist/recognizers/ip-address.d.ts +14 -0
- package/dist/recognizers/ip-address.d.ts.map +1 -0
- package/dist/recognizers/ip-address.js +183 -0
- package/dist/recognizers/ip-address.js.map +1 -0
- package/dist/recognizers/phone.d.ts +10 -0
- package/dist/recognizers/phone.d.ts.map +1 -0
- package/dist/recognizers/phone.js +145 -0
- package/dist/recognizers/phone.js.map +1 -0
- package/dist/recognizers/registry.d.ts +59 -0
- package/dist/recognizers/registry.d.ts.map +1 -0
- package/dist/recognizers/registry.js +113 -0
- package/dist/recognizers/registry.js.map +1 -0
- package/dist/recognizers/url.d.ts +14 -0
- package/dist/recognizers/url.d.ts.map +1 -0
- package/dist/recognizers/url.js +121 -0
- package/dist/recognizers/url.js.map +1 -0
- package/dist/types/index.d.ts +197 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +80 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/pii-types.d.ts +50 -0
- package/dist/types/pii-types.d.ts.map +1 -0
- package/dist/types/pii-types.js +114 -0
- package/dist/types/pii-types.js.map +1 -0
- package/dist/utils/iban-checksum.d.ts +23 -0
- package/dist/utils/iban-checksum.d.ts.map +1 -0
- package/dist/utils/iban-checksum.js +106 -0
- package/dist/utils/iban-checksum.js.map +1 -0
- package/dist/utils/index.d.ts +10 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +10 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/luhn.d.ts +17 -0
- package/dist/utils/luhn.d.ts.map +1 -0
- package/dist/utils/luhn.js +55 -0
- package/dist/utils/luhn.js.map +1 -0
- package/dist/utils/offsets.d.ts +86 -0
- package/dist/utils/offsets.d.ts.map +1 -0
- package/dist/utils/offsets.js +124 -0
- package/dist/utils/offsets.js.map +1 -0
- package/dist/utils/path.d.ts +34 -0
- package/dist/utils/path.d.ts.map +1 -0
- package/dist/utils/path.js +96 -0
- package/dist/utils/path.js.map +1 -0
- package/dist/utils/storage-browser.d.ts +51 -0
- package/dist/utils/storage-browser.d.ts.map +1 -0
- package/dist/utils/storage-browser.js +381 -0
- package/dist/utils/storage-browser.js.map +1 -0
- package/dist/utils/storage-node.d.ts +43 -0
- package/dist/utils/storage-node.d.ts.map +1 -0
- package/dist/utils/storage-node.js +93 -0
- package/dist/utils/storage-node.js.map +1 -0
- package/dist/utils/storage.d.ts +70 -0
- package/dist/utils/storage.d.ts.map +1 -0
- package/dist/utils/storage.js +69 -0
- package/dist/utils/storage.js.map +1 -0
- package/package.json +66 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"title-extractor.js","sourceRoot":"","sources":["../../src/pipeline/title-extractor.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAEL,OAAO,EAEP,eAAe,GAChB,MAAM,mBAAmB,CAAC;AAc3B;;;;;;;;GAQG;AAEH,iBAAiB;AACjB,MAAM,SAAS,GAAG;IAChB,mBAAmB;IACnB,IAAI;IACJ,KAAK;IACL,QAAQ;IACR,KAAK;IACL,MAAM;IACN,QAAQ;IACR,IAAI;IACJ,KAAK;IACL,MAAM;IACN,IAAI;IACJ,KAAK,EAAE,iBAAiB;IACxB,wBAAwB;IACxB,IAAI;IACJ,KAAK;IACL,QAAQ;IACR,MAAM;IACN,OAAO;IACP,WAAW;IACX,KAAK;IACL,MAAM;IACN,UAAU;IACV,IAAI;IACJ,KAAK;IACL,QAAQ;IACR,IAAI;IACJ,KAAK;IACL,QAAQ;IACR,IAAI;IACJ,KAAK;IACL,SAAS;IACT,WAAW;IACX,MAAM;IACN,OAAO;IACP,SAAS;IACT,KAAK;IACL,MAAM;IACN,SAAS;IACT,KAAK;IACL,MAAM;IACN,SAAS;IACT,IAAI;IACJ,KAAK;IACL,YAAY;IACZ,KAAK;IACL,MAAM;IACN,UAAU;IACV,KAAK;IACL,MAAM;IACN,OAAO;IACP,KAAK;IACL,MAAM;IACN,UAAU;IACV,KAAK;IACL,MAAM;IACN,SAAS;IACT,KAAK;IACL,MAAM;IACN,SAAS;IACT,MAAM;IACN,OAAO;IACP,WAAW;IACX,qBAAqB;IACrB,KAAK;IACL,MAAM;IACN,MAAM;IACN,MAAM;IACN,KAAK;IACL,MAAM;IACN,WAAW;IACX,eAAe;IACf,QAAQ;IACR,UAAU;IACV,iBAAiB;IACjB,qBAAqB;IACrB,QAAQ;IACR,KAAK;IACL,MAAM;IACN,SAAS;IACT,MAAM;IACN,OAAO;IACP,UAAU;IACV,OAAO;IACP,SAAS;CACV,CAAC;AAEF,gBAAgB;AAChB,MAAM,SAAS,GAAG;IAChB,mBAAmB;IACnB,MAAM;IACN,MAAM;IACN,UAAU;IACV,wBAAwB;IACxB,IAAI;IACJ,KAAK;IACL,QAAQ;IACR,MAAM;IACN,OAAO;IACP,WAAW;IACX,KAAK;IACL,MAAM;IACN,UAAU;IACV,MAAM;IACN,OAAO;IACP,QAAQ;IACR,WAAW;IACX,YAAY;IACZ,iBAAiB;IACjB,KAAK;IACL,MAAM;IACN,WAAW;IACX,mDAAmD;IACnD,SAAS;IACT,UAAU;IACV,SAAS;IACT,UAAU;IACV,UAAU;IACV,WAAW;IACX,cAAc;IACd,eAAe;IACf,SAAS;IACT,UAAU;IACV,UAAU;IACV,WAAW;IACX,oBAAoB;IACpB,KAAK;IACL,MAAM;IACN,SAAS;IACT,QAAQ;IACR,OAAO;IACP,WAAW;IACX,YAAY;IACZ,SAAS;IACT,QAAQ;IACR,SAAS;CACV,CAAC;AAEF,gBAAgB;AAChB,MAAM,SAAS,GAAG;IAChB,mBAAmB;IACnB,GAAG;IACH,IAAI;IACJ,UAAU;IACV,KAAK;IACL,MAAM;IACN,QAAQ;IACR,MAAM;IACN,OAAO;IACP,cAAc;IACd,eAAe;IACf,IAAI;IACJ,KAAK;IACL,SAAS;IACT,IAAI;IACJ,KAAK;IACL,YAAY;IACZ,MAAM;IACN,OAAO;IACP,IAAI;IACJ,KAAK;IACL,QAAQ;IACR,QAAQ,EAAE,uBAAuB;IACjC,KAAK;IACL,MAAM;IACN,aAAa;IACb,WAAW;IACX,KAAK;IACL,MAAM;IACN,SAAS;IACT,KAAK;IACL,MAAM;IACN,KAAK;IACL,MAAM;IACN,SAAS;IACT,KAAK;IACL,MAAM;IACN,YAAY;IACZ,MAAM;IACN,OAAO;IACP,WAAW;IACX,IAAI;IACJ,KAAK;IACL,YAAY;IACZ,YAAY;IACZ,MAAM;IACN,OAAO;IACP,MAAM;IACN,OAAO;IACP,MAAM;CACP,CAAC;AAEF,iBAAiB;AACjB,MAAM,SAAS,GAAG;IAChB,mBAAmB;IACnB,IAAI;IACJ,KAAK;IACL,OAAO;IACP,KAAK;IACL,MAAM;IACN,QAAQ;IACR,MAAM;IACN,OAAO;IACP,UAAU;IACV,cAAc;IACd,KAAK;IACL,MAAM;IACN,IAAI;IACJ,MAAM;IACN,eAAe;IACf,IAAI;IACJ,KAAK;IACL,QAAQ;IACR,KAAK;IACL,MAAM;IACN,SAAS;IACT,MAAM;IACN,OAAO;IACP,UAAU;IACV,OAAO;IACP,QAAQ;IACR,WAAW;IACX,KAAK;IACL,MAAM;IACN,YAAY;IACZ,YAAY;IACZ,KAAK;IACL,MAAM;IACN,WAAW;IACX,WAAW;IACX,KAAK;IACL,MAAM;IACN,YAAY;IACZ,YAAY;IACZ,MAAM;IACN,OAAO;IACP,SAAS;IACT,SAAS;IACT,WAAW;IACX,MAAM;IACN,OAAO;IACP,SAAS;IACT,MAAM;IACN,OAAO;IACP,SAAS;IACT,KAAK;IACL,MAAM;IACN,SAAS;IACT,KAAK;IACL,MAAM;IACN,UAAU;IACV,YAAY;IACZ,OAAO;IACP,MAAM;IACN,KAAK;CACN,CAAC;AAEF,iBAAiB;AACjB,MAAM,SAAS,GAAG;IAChB,mBAAmB;IACnB,KAAK;IACL,MAAM;IACN,QAAQ;IACR,SAAS;IACT,QAAQ;IACR,SAAS;IACT,QAAQ;IACR,WAAW;IACX,eAAe;IACf,MAAM;IACN,OAAO;IACP,SAAS;IACT,QAAQ;IACR,UAAU;IACV,YAAY;IACZ,MAAM;IACN,OAAO;IACP,YAAY;IACZ,WAAW;IACX,UAAU;IACV,eAAe;IACf,KAAK;IACL,MAAM;IACN,WAAW;IACX,KAAK;IACL,MAAM;IACN,UAAU;IACV,MAAM;IACN,OAAO;IACP,YAAY;IACZ,KAAK;IACL,MAAM;IACN,YAAY;IACZ,MAAM;IACN,OAAO;IACP,UAAU;IACV,WAAW;IACX,OAAO;IACP,UAAU;IACV,UAAU;IACV,UAAU;IACV,UAAU;IACV,aAAa;IACb,MAAM;IACN,UAAU;IACV,YAAY;IACZ,KAAK;IACL,OAAO;IACP,KAAK;IACL,MAAM;IACN,MAAM;IACN,OAAO;IACP,YAAY;CACb,CAAC;AAEF,oBAAoB;AACpB,MAAM,SAAS,GAAG;IAChB,mBAAmB;IACnB,IAAI;IACJ,KAAK;IACL,QAAQ;IACR,KAAK;IACL,MAAM;IACN,SAAS;IACT,MAAM;IACN,OAAO;IACP,WAAW;IACX,eAAe;IACf,IAAI;IACJ,KAAK;IACL,QAAQ;IACR,KAAK;IACL,MAAM;IACN,SAAS;IACT,MAAM;IACN,OAAO;IACP,WAAW;IACX,OAAO;IACP,QAAQ;IACR,YAAY;IACZ,KAAK;IACL,MAAM;IACN,YAAY;IACZ,YAAY;IACZ,KAAK;IACL,MAAM;IACN,WAAW;IACX,WAAW;IACX,gCAAgC;IAChC,KAAK;IACL,MAAM;IACN,WAAW;IACX,KAAK;IACL,MAAM;IACN,SAAS;IACT,KAAK;IACL,MAAM;IACN,SAAS;IACT,KAAK;IACL,MAAM;IACN,SAAS;IACT,KAAK;IACL,MAAM;IACN,SAAS;IACT,YAAY;IACZ,OAAO;IACP,MAAM;IACN,MAAM;IACN,OAAO;CACR,CAAC;AAEF,eAAe;AACf,MAAM,SAAS,GAAG;IAChB,mBAAmB;IACnB,KAAK;IACL,MAAM;IACN,SAAS;IACT,QAAQ;IACR,UAAU;IACV,MAAM;IACN,OAAO;IACP,SAAS;IACT,IAAI;IACJ,KAAK;IACL,YAAY;IACZ,UAAU;IACV,wBAAwB;IACxB,IAAI;IACJ,KAAK;IACL,QAAQ;IACR,MAAM;IACN,OAAO;IACP,WAAW;IACX,IAAI;IACJ,KAAK;IACL,WAAW;IACX,IAAI;IACJ,KAAK;IACL,SAAS,EAAE,cAAc;IACzB,KAAK;IACL,MAAM;IACN,aAAa;IACb,KAAK;IACL,MAAM;IACN,WAAW;IACX,KAAK;IACL,MAAM;IACN,UAAU;IACV,KAAK;IACL,MAAM;IACN,SAAS;IACT,MAAM;IACN,OAAO;IACP,UAAU;IACV,YAAY;IACZ,IAAI;IACJ,KAAK;IACL,SAAS;IACT,SAAS;IACT,OAAO;CACR,CAAC;AAEF,iBAAiB;AACjB,MAAM,SAAS,GAAG;IAChB,mBAAmB;IACnB,OAAO;IACP,QAAQ;IACR,YAAY;IACZ,MAAM;IACN,OAAO;IACP,eAAe;IACf,IAAI;IACJ,KAAK;IACL,SAAS;IACT,MAAM;IACN,OAAO;IACP,WAAW;IACX,qEAAqE;CACtE,CAAC;AAEF,mDAAmD;AACnD,MAAM,SAAS,GAAG;IAChB,mCAAmC;IACnC,OAAO;IACP,QAAQ;IACR,QAAQ;IACR,oCAAoC;IACpC,WAAW;IACX,WAAW;IACX,QAAQ;IACR,YAAY;IACZ,YAAY;IACZ,SAAS;IACT,WAAW;IACX,QAAQ;IACR,+BAA+B;IAC/B,SAAS;IACT,UAAU;IACV,SAAS;IACT,UAAU;IACV,SAAS;IACT,UAAU;IACV,gCAAgC;IAChC,IAAI;IACJ,KAAK;IACL,QAAQ;IACR,QAAQ;IACR,SAAS;IACT,QAAQ;IACR,UAAU;IACV,WAAW;IACX,YAAY;IACZ,OAAO;IACP,QAAQ;IACR,QAAQ;IACR,QAAQ;IACR,QAAQ;IACR,MAAM;IACN,OAAO;IACP,MAAM;IACN,OAAO;IACP,SAAS;IACT,qBAAqB;IACrB,MAAM;IACN,MAAM;IACN,MAAM;IACN,OAAO;IACP,QAAQ;CACT,CAAC;AAEF,8CAA8C;AAC9C,MAAM,SAAS,GAAG;IAChB,mBAAmB;IACnB,IAAI,EAAE,kBAAkB;IACxB,IAAI,EAAE,cAAc;IACpB,IAAI,EAAE,iBAAiB;IACvB,IAAI,EAAE,gBAAgB;IACtB,wBAAwB;IACxB,IAAI,EAAE,uBAAuB;IAC7B,IAAI,EAAE,uBAAuB;IAC7B,IAAI,EAAE,6BAA6B;IACnC,IAAI;IACJ,IAAI,EAAE,mBAAmB;IACzB,KAAK;IACL,KAAK,EAAE,0BAA0B;IACjC,IAAI;IACJ,IAAI,EAAE,iBAAiB;IACvB,WAAW;IACX,IAAI;IACJ,IAAI,EAAE,qBAAqB;IAC3B,IAAI,EAAE,sBAAsB;IAC5B,IAAI,EAAE,qBAAqB;IAC3B,qBAAqB;IACrB,IAAI;IACJ,IAAI,EAAE,0BAA0B;IAChC,IAAI,EAAE,gCAAgC;CACvC,CAAC;AAEF;;;GAGG;AACH,MAAM,UAAU,GAAa;IAC3B,GAAG,SAAS;IACZ,GAAG,SAAS;IACZ,GAAG,SAAS;IACZ,GAAG,SAAS;IACZ,GAAG,SAAS;IACZ,GAAG,SAAS;IACZ,GAAG,SAAS;IACZ,GAAG,SAAS;IACZ,GAAG,SAAS;IACZ,GAAG,SAAS;CACb,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC;AAEtC;;;GAGG;AACH,MAAM,cAAc,GAClB,UAAU,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;IACzB,OAAO,EAAE,IAAI,MAAM,CAAC,IAAI,WAAW,CAAC,KAAK,CAAC,YAAY,EAAE,GAAG,CAAC;IAC5D,KAAK;CACN,CAAC,CAAC,CAAC;AAEN;;GAEG;AACH,SAAS,WAAW,CAAC,GAAW;IAC9B,OAAO,GAAG,CAAC,OAAO,CAAC,qBAAqB,EAAE,MAAM,CAAC,CAAC;AACpD,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,UAAU,YAAY,CAAC,IAAY;IACvC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAE5B,KAAK,MAAM,EAAE,OAAO,EAAE,IAAI,cAAc,EAAE,CAAC;QACzC,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QACrC,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;YACnB,MAAM,WAAW,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YAC7B,MAAM,gBAAgB,GAAG,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;YAElE,4DAA4D;YAC5D,IAAI,gBAAgB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAChC,OAAO;oBACL,KAAK,EAAE,WAAW,CAAC,OAAO,EAAE,EAAE,gCAAgC;oBAC9D,gBAAgB;oBAChB,WAAW,EAAE,WAAW,CAAC,MAAM;iBAChC,CAAC;YACJ,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO;QACL,KAAK,EAAE,SAAS;QAChB,gBAAgB,EAAE,OAAO;QACzB,WAAW,EAAE,CAAC;KACf,CAAC;AACJ,CAAC;AAUD;;;;;;;;GAQG;AACH,MAAM,UAAU,sBAAsB,CACpC,KAAkB,EAClB,YAAoB;IAEpB,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACxB,+BAA+B;QAC/B,IAAI,IAAI,CAAC,IAAI,KAAK,OAAO,CAAC,MAAM,EAAE,CAAC;YACjC,OAAO,IAAI,CAAC;QACd,CAAC;QAED,oCAAoC;QACpC,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC;QAC3B,MAAM,UAAU,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;QAE1C,0CAA0C;QAC1C,IAAI,UAAU,CAAC,KAAK,KAAK,SAAS,IAAI,UAAU,CAAC,WAAW,KAAK,CAAC,EAAE,CAAC;YACnE,OAAO,IAAI,CAAC;QACd,CAAC;QAED,kEAAkE;QAClE,IAAI,UAAU,CAAC,gBAAgB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC7C,OAAO,IAAI,CAAC;QACd,CAAC;QAED,gCAAgC;QAChC,gEAAgE;QAChE,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,GAAG,UAAU,CAAC,WAAW,CAAC;QAErD,kDAAkD;QAClD,IAAI,QAAQ,IAAI,IAAI,CAAC,GAAG,EAAE,CAAC;YACzB,OAAO,IAAI,CAAC;QACd,CAAC;QAED,6DAA6D;QAC7D,MAAM,YAAY,GAAG,YAAY,CAAC,KAAK,CAAC,QAAQ,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QACnE,IACE,YAAY,CAAC,WAAW,EAAE,KAAK,UAAU,CAAC,gBAAgB,CAAC,WAAW,EAAE,EACxE,CAAC;YACD,iFAAiF;YACjF,MAAM,WAAW,GAAG,YAAY,CAAC,OAAO,CACtC,UAAU,CAAC,gBAAgB,EAC3B,IAAI,CAAC,KAAK,CACX,CAAC;YACF,IAAI,WAAW,KAAK,CAAC,CAAC,IAAI,WAAW,IAAI,IAAI,CAAC,GAAG,EAAE,CAAC;gBAClD,OAAO,IAAI,CAAC,CAAC,qDAAqD;YACpE,CAAC;YACD,OAAO;gBACL,GAAG,IAAI;gBACP,KAAK,EAAE,WAAW;gBAClB,IAAI,EAAE,UAAU,CAAC,gBAAgB;gBACjC,QAAQ,EAAE;oBACR,GAAG,IAAI,CAAC,QAAQ;oBAChB,KAAK,EAAE,UAAU,CAAC,KAAK;iBACO;aACjC,CAAC;QACJ,CAAC;QAED,4EAA4E;QAC5E,OAAO;YACL,GAAG,IAAI;YACP,KAAK,EAAE,QAAQ;YACf,IAAI,EAAE,UAAU,CAAC,gBAAgB;YACjC,QAAQ,EAAE;gBACR,GAAG,IAAI,CAAC,QAAQ;gBAChB,KAAK,EAAE,UAAU,CAAC,KAAK;aACO;SACjC,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,oBAAoB,CAClC,QAA6E;IAE7E,MAAM,QAAQ,GAA6B;QACzC,EAAE,EAAE,SAAS;QACb,EAAE,EAAE,SAAS;QACb,EAAE,EAAE,SAAS;QACb,EAAE,EAAE,SAAS;QACb,EAAE,EAAE,SAAS;QACb,EAAE,EAAE,SAAS;QACb,EAAE,EAAE,SAAS;QACb,EAAE,EAAE,SAAS;QACb,EAAE,EAAE,SAAS;QACb,EAAE,EAAE,SAAS;KACd,CAAC;IACF,OAAO,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;AAClC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,YAAY;IAC1B,OAAO,CAAC,GAAG,UAAU,CAAC,CAAC;AACzB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe,CAAC,IAAY;IAC1C,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;IAClC,OAAO,MAAM,CAAC,KAAK,KAAK,SAAS,CAAC;AACpC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,WAAW,CAAC,IAAY;IACtC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC5B,6CAA6C;IAC7C,MAAM,kBAAkB,GAAG,OAAO,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;IAEpE,KAAK,MAAM,EAAE,OAAO,EAAE,IAAI,cAAc,EAAE,CAAC;QACzC,yDAAyD;QACzD,MAAM,KAAK,GAAG,kBAAkB,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAChD,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;YACnB,MAAM,SAAS,GAAG,kBAAkB,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;YACnE,qDAAqD;YACrD,IAAI,SAAS,KAAK,EAAE,EAAE,CAAC;gBACrB,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;IACH,CAAC;IAED,gDAAgD;IAChD,MAAM,cAAc,GAAG,kBAAkB,CAAC,WAAW,EAAE,CAAC;IACxD,KAAK,MAAM,KAAK,IAAI,UAAU,EAAE,CAAC;QAC/B,IAAI,cAAc,KAAK,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;YAC3C,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;;;;;;;;;;;GAaG;AACH,MAAM,UAAU,uBAAuB,CACrC,KAAkB,EAClB,YAAoB,EACpB,SAAiB,CAAC;IAElB,IAAI,KAAK,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QACtB,OAAO,KAAK,CAAC;IACf,CAAC;IAED,yBAAyB;IACzB,MAAM,MAAM,GAAG,CAAC,GAAG,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IAC5D,MAAM,MAAM,GAAgB,EAAE,CAAC;IAC/B,IAAI,CAAC,GAAG,CAAC,CAAC;IAEV,OAAO,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC;QACzB,MAAM,OAAO,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QAC1B,IAAI,OAAO,KAAK,SAAS,EAAE,CAAC;YAC1B,CAAC,EAAE,CAAC;YACJ,SAAS;QACX,CAAC;QAED,+BAA+B;QAC/B,IAAI,OAAO,CAAC,IAAI,KAAK,OAAO,CAAC,MAAM,EAAE,CAAC;YACpC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACrB,CAAC,EAAE,CAAC;YACJ,SAAS;QACX,CAAC;QAED,qCAAqC;QACrC,IAAI,CAAC,WAAW,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;YAC/B,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACrB,CAAC,EAAE,CAAC;YACJ,SAAS;QACX,CAAC;QAED,oDAAoD;QACpD,IAAI,MAAM,GAAG,KAAK,CAAC;QACnB,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC3C,MAAM,IAAI,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;YACvB,IAAI,IAAI,KAAK,SAAS;gBAAE,SAAS;YAEjC,8BAA8B;YAC9B,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC;YAErC,oCAAoC;YACpC,IAAI,GAAG,GAAG,MAAM,EAAE,CAAC;gBACjB,MAAM;YACR,CAAC;YAED,kEAAkE;YAClE,MAAM,OAAO,GAAG,YAAY,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC;YAC5D,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;gBACnC,MAAM;YACR,CAAC;YAED,qCAAqC;YACrC,IAAI,IAAI,CAAC,IAAI,KAAK,OAAO,CAAC,MAAM,EAAE,CAAC;gBACjC,MAAM,UAAU,GAAG,YAAY,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC;gBAC/D,MAAM,UAAU,GAAc;oBAC5B,IAAI,EAAE,OAAO,CAAC,MAAM;oBACpB,KAAK,EAAE,OAAO,CAAC,KAAK;oBACpB,GAAG,EAAE,IAAI,CAAC,GAAG;oBACb,4BAA4B;oBAC5B,UAAU,EAAE,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,UAAU,EAAE,IAAI,CAAC,UAAU,CAAC;oBACzD,6CAA6C;oBAC7C,MAAM,EACJ,OAAO,CAAC,MAAM,KAAK,IAAI,CAAC,MAAM;wBAC5B,CAAC,CAAC,OAAO,CAAC,MAAM;wBAChB,CAAC,CAAC,eAAe,CAAC,MAAM;oBAC5B,IAAI,EAAE,UAAU;oBAChB,6DAA6D;oBAC7D,QAAQ,EAAE;wBACR,GAAG,OAAO,CAAC,QAAQ;wBACnB,GAAG,IAAI,CAAC,QAAQ;qBACjB;iBACF,CAAC;gBAEF,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;gBACxB,MAAM,GAAG,IAAI,CAAC;gBACd,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,uBAAuB;gBAClC,MAAM;YACR,CAAC;QACH,CAAC;QAED,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACrB,CAAC,EAAE,CAAC;QACN,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Output Validator
|
|
3
|
+
* Validates anonymized output and performs leak scan
|
|
4
|
+
*/
|
|
5
|
+
import { PIIType, DetectedEntity, AnonymizationPolicy } from '../types/index.js';
|
|
6
|
+
/**
|
|
7
|
+
* Validation result
|
|
8
|
+
*/
|
|
9
|
+
export interface ValidationResult {
|
|
10
|
+
/** Whether validation passed */
|
|
11
|
+
valid: boolean;
|
|
12
|
+
/** List of validation errors */
|
|
13
|
+
errors: ValidationError[];
|
|
14
|
+
/** Whether leak scan passed (if performed) */
|
|
15
|
+
leakScanPassed?: boolean;
|
|
16
|
+
/** Potential leaks found by leak scan */
|
|
17
|
+
potentialLeaks?: LeakScanMatch[];
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Validation error
|
|
21
|
+
*/
|
|
22
|
+
export interface ValidationError {
|
|
23
|
+
/** Error code */
|
|
24
|
+
code: ValidationErrorCode;
|
|
25
|
+
/** Human-readable message */
|
|
26
|
+
message: string;
|
|
27
|
+
/** Additional details */
|
|
28
|
+
details?: Record<string, unknown>;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Validation error codes
|
|
32
|
+
*/
|
|
33
|
+
export declare enum ValidationErrorCode {
|
|
34
|
+
OVERLAPPING_ENTITIES = "OVERLAPPING_ENTITIES",
|
|
35
|
+
DUPLICATE_IDS = "DUPLICATE_IDS",
|
|
36
|
+
MALFORMED_TAG = "MALFORMED_TAG",
|
|
37
|
+
ID_MISMATCH = "ID_MISMATCH",
|
|
38
|
+
MISSING_IN_MAP = "MISSING_IN_MAP",
|
|
39
|
+
POTENTIAL_PII_LEAK = "POTENTIAL_PII_LEAK"
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Leak scan match
|
|
43
|
+
*/
|
|
44
|
+
export interface LeakScanMatch {
|
|
45
|
+
/** Type of potential leak */
|
|
46
|
+
type: PIIType;
|
|
47
|
+
/** Matched text */
|
|
48
|
+
text: string;
|
|
49
|
+
/** Position in anonymized text */
|
|
50
|
+
position: number;
|
|
51
|
+
/** Pattern that matched */
|
|
52
|
+
pattern: string;
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Validates anonymization output
|
|
56
|
+
*/
|
|
57
|
+
export declare function validateOutput(anonymizedText: string, entities: DetectedEntity[], piiMapKeys: string[], policy: AnonymizationPolicy): ValidationResult;
|
|
58
|
+
/**
|
|
59
|
+
* Validates that no overlaps exist (fast check)
|
|
60
|
+
*/
|
|
61
|
+
export declare function hasNoOverlaps(entities: Array<{
|
|
62
|
+
start: number;
|
|
63
|
+
end: number;
|
|
64
|
+
}>): boolean;
|
|
65
|
+
//# sourceMappingURL=validator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"validator.d.ts","sourceRoot":"","sources":["../../src/pipeline/validator.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,OAAO,EAAE,cAAc,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AAIjF;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,gCAAgC;IAChC,KAAK,EAAE,OAAO,CAAC;IACf,gCAAgC;IAChC,MAAM,EAAE,eAAe,EAAE,CAAC;IAC1B,8CAA8C;IAC9C,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,yCAAyC;IACzC,cAAc,CAAC,EAAE,aAAa,EAAE,CAAC;CAClC;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,iBAAiB;IACjB,IAAI,EAAE,mBAAmB,CAAC;IAC1B,6BAA6B;IAC7B,OAAO,EAAE,MAAM,CAAC;IAChB,yBAAyB;IACzB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACnC;AAED;;GAEG;AACH,oBAAY,mBAAmB;IAC7B,oBAAoB,yBAAyB;IAC7C,aAAa,kBAAkB;IAC/B,aAAa,kBAAkB;IAC/B,WAAW,gBAAgB;IAC3B,cAAc,mBAAmB;IACjC,kBAAkB,uBAAuB;CAC1C;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,6BAA6B;IAC7B,IAAI,EAAE,OAAO,CAAC;IACd,mBAAmB;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,kCAAkC;IAClC,QAAQ,EAAE,MAAM,CAAC;IACjB,2BAA2B;IAC3B,OAAO,EAAE,MAAM,CAAC;CACjB;AAkCD;;GAEG;AACH,wBAAgB,cAAc,CAC5B,cAAc,EAAE,MAAM,EACtB,QAAQ,EAAE,cAAc,EAAE,EAC1B,UAAU,EAAE,MAAM,EAAE,EACpB,MAAM,EAAE,mBAAmB,GAC1B,gBAAgB,CA+ClB;AAkMD;;GAEG;AACH,wBAAgB,aAAa,CAAC,QAAQ,EAAE,KAAK,CAAC;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAA;CAAE,CAAC,GAAG,OAAO,CAYtF"}
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Output Validator
|
|
3
|
+
* Validates anonymized output and performs leak scan
|
|
4
|
+
*/
|
|
5
|
+
import { PIIType } from '../types/index.js';
|
|
6
|
+
import { spansOverlap } from '../utils/offsets.js';
|
|
7
|
+
import { extractTags, isValidTag } from './tagger.js';
|
|
8
|
+
/**
|
|
9
|
+
* Validation error codes
|
|
10
|
+
*/
|
|
11
|
+
export var ValidationErrorCode;
|
|
12
|
+
(function (ValidationErrorCode) {
|
|
13
|
+
ValidationErrorCode["OVERLAPPING_ENTITIES"] = "OVERLAPPING_ENTITIES";
|
|
14
|
+
ValidationErrorCode["DUPLICATE_IDS"] = "DUPLICATE_IDS";
|
|
15
|
+
ValidationErrorCode["MALFORMED_TAG"] = "MALFORMED_TAG";
|
|
16
|
+
ValidationErrorCode["ID_MISMATCH"] = "ID_MISMATCH";
|
|
17
|
+
ValidationErrorCode["MISSING_IN_MAP"] = "MISSING_IN_MAP";
|
|
18
|
+
ValidationErrorCode["POTENTIAL_PII_LEAK"] = "POTENTIAL_PII_LEAK";
|
|
19
|
+
})(ValidationErrorCode || (ValidationErrorCode = {}));
|
|
20
|
+
/**
|
|
21
|
+
* Leak scan patterns for common structured PII
|
|
22
|
+
* These are simplified patterns for quick scanning
|
|
23
|
+
*/
|
|
24
|
+
const LEAK_SCAN_PATTERNS = [
|
|
25
|
+
{
|
|
26
|
+
type: PIIType.EMAIL,
|
|
27
|
+
pattern: /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g,
|
|
28
|
+
name: 'Email',
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
type: PIIType.PHONE,
|
|
32
|
+
pattern: /(?:\+|00)[1-9][0-9]{7,14}|0[1-9][0-9]{6,11}/g,
|
|
33
|
+
name: 'Phone',
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
type: PIIType.IBAN,
|
|
37
|
+
pattern: /[A-Z]{2}[0-9]{2}[A-Z0-9]{11,30}/gi,
|
|
38
|
+
name: 'IBAN',
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
type: PIIType.CREDIT_CARD,
|
|
42
|
+
pattern: /[0-9]{4}[\s-]?[0-9]{4}[\s-]?[0-9]{4}[\s-]?[0-9]{4}/g,
|
|
43
|
+
name: 'Credit Card',
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
type: PIIType.IP_ADDRESS,
|
|
47
|
+
pattern: /(?:\d{1,3}\.){3}\d{1,3}/g,
|
|
48
|
+
name: 'IP Address',
|
|
49
|
+
},
|
|
50
|
+
];
|
|
51
|
+
/**
|
|
52
|
+
* Validates anonymization output
|
|
53
|
+
*/
|
|
54
|
+
export function validateOutput(anonymizedText, entities, piiMapKeys, policy) {
|
|
55
|
+
const errors = [];
|
|
56
|
+
// Validate no overlapping entities
|
|
57
|
+
const overlapErrors = checkOverlappingEntities(entities);
|
|
58
|
+
errors.push(...overlapErrors);
|
|
59
|
+
// Validate unique IDs (per type or globally)
|
|
60
|
+
const idErrors = checkUniqueIds(entities);
|
|
61
|
+
errors.push(...idErrors);
|
|
62
|
+
// Validate tags in text are well-formed
|
|
63
|
+
const tagErrors = checkTags(anonymizedText);
|
|
64
|
+
errors.push(...tagErrors);
|
|
65
|
+
// Validate tag count matches entity count
|
|
66
|
+
const countErrors = checkTagEntityMatch(anonymizedText, entities);
|
|
67
|
+
errors.push(...countErrors);
|
|
68
|
+
// Validate all entities have entries in PII map
|
|
69
|
+
const mapErrors = checkPIIMapCompleteness(entities, piiMapKeys);
|
|
70
|
+
errors.push(...mapErrors);
|
|
71
|
+
// Perform leak scan if enabled
|
|
72
|
+
let leakScanPassed;
|
|
73
|
+
let potentialLeaks;
|
|
74
|
+
if (policy.enableLeakScan) {
|
|
75
|
+
const leakResult = performLeakScan(anonymizedText, policy);
|
|
76
|
+
potentialLeaks = leakResult.matches;
|
|
77
|
+
leakScanPassed = potentialLeaks.length === 0;
|
|
78
|
+
if (!leakScanPassed) {
|
|
79
|
+
errors.push({
|
|
80
|
+
code: ValidationErrorCode.POTENTIAL_PII_LEAK,
|
|
81
|
+
message: `Leak scan found ${potentialLeaks.length} potential PII leak(s)`,
|
|
82
|
+
details: { leaks: potentialLeaks },
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
return {
|
|
87
|
+
valid: errors.length === 0,
|
|
88
|
+
errors,
|
|
89
|
+
leakScanPassed,
|
|
90
|
+
potentialLeaks,
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Checks for overlapping entities
|
|
95
|
+
*/
|
|
96
|
+
function checkOverlappingEntities(entities) {
|
|
97
|
+
const errors = [];
|
|
98
|
+
for (let i = 0; i < entities.length; i++) {
|
|
99
|
+
for (let j = i + 1; j < entities.length; j++) {
|
|
100
|
+
const a = entities[i];
|
|
101
|
+
const b = entities[j];
|
|
102
|
+
if (spansOverlap(a, b)) {
|
|
103
|
+
errors.push({
|
|
104
|
+
code: ValidationErrorCode.OVERLAPPING_ENTITIES,
|
|
105
|
+
message: `Entities ${a.id} (${a.type}) and ${b.id} (${b.type}) overlap`,
|
|
106
|
+
details: {
|
|
107
|
+
entity1: { id: a.id, type: a.type, start: a.start, end: a.end },
|
|
108
|
+
entity2: { id: b.id, type: b.type, start: b.start, end: b.end },
|
|
109
|
+
},
|
|
110
|
+
});
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
return errors;
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* Checks for duplicate IDs
|
|
118
|
+
*/
|
|
119
|
+
function checkUniqueIds(entities) {
|
|
120
|
+
const errors = [];
|
|
121
|
+
const seenIds = new Map();
|
|
122
|
+
for (const entity of entities) {
|
|
123
|
+
const existing = seenIds.get(entity.id);
|
|
124
|
+
if (existing !== undefined) {
|
|
125
|
+
// Duplicate ID - only an error if they have different original text
|
|
126
|
+
// (reuse of IDs for same text is allowed with policy.reuseIdsForRepeatedPII)
|
|
127
|
+
if (existing.original !== entity.original) {
|
|
128
|
+
errors.push({
|
|
129
|
+
code: ValidationErrorCode.DUPLICATE_IDS,
|
|
130
|
+
message: `Duplicate ID ${entity.id} used for different text values`,
|
|
131
|
+
details: {
|
|
132
|
+
id: entity.id,
|
|
133
|
+
first: { type: existing.type, text: existing.original },
|
|
134
|
+
second: { type: entity.type, text: entity.original },
|
|
135
|
+
},
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
else {
|
|
140
|
+
seenIds.set(entity.id, entity);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
return errors;
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Checks that all tags in text are well-formed
|
|
147
|
+
*/
|
|
148
|
+
function checkTags(anonymizedText) {
|
|
149
|
+
const errors = [];
|
|
150
|
+
// Find anything that looks like a PII tag
|
|
151
|
+
const tagLikePattern = /<PII[^>]*>/g;
|
|
152
|
+
let match;
|
|
153
|
+
while ((match = tagLikePattern.exec(anonymizedText)) !== null) {
|
|
154
|
+
// Check if it ends with /> for self-closing
|
|
155
|
+
const fullTag = match[0].endsWith('/>') ? match[0] : match[0] + '/>';
|
|
156
|
+
if (!isValidTag(fullTag) && !match[0].endsWith('/>')) {
|
|
157
|
+
errors.push({
|
|
158
|
+
code: ValidationErrorCode.MALFORMED_TAG,
|
|
159
|
+
message: `Malformed PII tag at position ${match.index}`,
|
|
160
|
+
details: { tag: match[0], position: match.index },
|
|
161
|
+
});
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
return errors;
|
|
165
|
+
}
|
|
166
|
+
/**
|
|
167
|
+
* Checks that tag count matches entity count
|
|
168
|
+
*/
|
|
169
|
+
function checkTagEntityMatch(anonymizedText, entities) {
|
|
170
|
+
const errors = [];
|
|
171
|
+
const tags = extractTags(anonymizedText);
|
|
172
|
+
// Get unique entity IDs
|
|
173
|
+
const entityIds = new Set(entities.map((e) => e.id));
|
|
174
|
+
const tagIds = new Set(tags.map((t) => t.id));
|
|
175
|
+
// Check for mismatches
|
|
176
|
+
for (const id of entityIds) {
|
|
177
|
+
if (!tagIds.has(id)) {
|
|
178
|
+
errors.push({
|
|
179
|
+
code: ValidationErrorCode.ID_MISMATCH,
|
|
180
|
+
message: `Entity ID ${id} not found in anonymized text`,
|
|
181
|
+
details: { missingId: id },
|
|
182
|
+
});
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
return errors;
|
|
186
|
+
}
|
|
187
|
+
/**
|
|
188
|
+
* Checks that all entities have entries in PII map
|
|
189
|
+
*/
|
|
190
|
+
function checkPIIMapCompleteness(entities, piiMapKeys) {
|
|
191
|
+
const errors = [];
|
|
192
|
+
const keySet = new Set(piiMapKeys);
|
|
193
|
+
for (const entity of entities) {
|
|
194
|
+
const expectedKey = `${entity.type}_${entity.id}`;
|
|
195
|
+
if (!keySet.has(expectedKey)) {
|
|
196
|
+
errors.push({
|
|
197
|
+
code: ValidationErrorCode.MISSING_IN_MAP,
|
|
198
|
+
message: `Entity ${entity.id} (${entity.type}) missing from PII map`,
|
|
199
|
+
details: { entityId: entity.id, entityType: entity.type, expectedKey },
|
|
200
|
+
});
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
return errors;
|
|
204
|
+
}
|
|
205
|
+
/**
|
|
206
|
+
* Performs leak scan on anonymized text
|
|
207
|
+
*/
|
|
208
|
+
function performLeakScan(anonymizedText, policy) {
|
|
209
|
+
const matches = [];
|
|
210
|
+
// Skip scanning inside PII tags
|
|
211
|
+
const textWithoutTags = anonymizedText.replace(/<PII[^>]*\/>/g, ' '.repeat(20));
|
|
212
|
+
for (const { type, pattern, name } of LEAK_SCAN_PATTERNS) {
|
|
213
|
+
// Skip if type not enabled in policy
|
|
214
|
+
if (!policy.enabledTypes.has(type)) {
|
|
215
|
+
continue;
|
|
216
|
+
}
|
|
217
|
+
const globalPattern = new RegExp(pattern.source, 'g');
|
|
218
|
+
let match;
|
|
219
|
+
while ((match = globalPattern.exec(textWithoutTags)) !== null) {
|
|
220
|
+
// Double-check this isn't inside a tag
|
|
221
|
+
const position = match.index;
|
|
222
|
+
const isInTag = isPositionInsideTag(anonymizedText, position);
|
|
223
|
+
if (!isInTag) {
|
|
224
|
+
matches.push({
|
|
225
|
+
type,
|
|
226
|
+
text: match[0],
|
|
227
|
+
position,
|
|
228
|
+
pattern: name,
|
|
229
|
+
});
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
return { matches };
|
|
234
|
+
}
|
|
235
|
+
/**
|
|
236
|
+
* Checks if a position is inside a PII tag
|
|
237
|
+
*/
|
|
238
|
+
function isPositionInsideTag(text, position) {
|
|
239
|
+
// Find the nearest < before position
|
|
240
|
+
const before = text.lastIndexOf('<', position);
|
|
241
|
+
if (before === -1)
|
|
242
|
+
return false;
|
|
243
|
+
// Find the nearest > after the <
|
|
244
|
+
const after = text.indexOf('>', before);
|
|
245
|
+
if (after === -1)
|
|
246
|
+
return false;
|
|
247
|
+
// Position is inside tag if it's between < and >
|
|
248
|
+
return position > before && position < after;
|
|
249
|
+
}
|
|
250
|
+
/**
|
|
251
|
+
* Validates that no overlaps exist (fast check)
|
|
252
|
+
*/
|
|
253
|
+
export function hasNoOverlaps(entities) {
|
|
254
|
+
if (entities.length <= 1)
|
|
255
|
+
return true;
|
|
256
|
+
const sorted = [...entities].sort((a, b) => a.start - b.start);
|
|
257
|
+
for (let i = 0; i < sorted.length - 1; i++) {
|
|
258
|
+
if (sorted[i].end > sorted[i + 1].start) {
|
|
259
|
+
return false;
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
return true;
|
|
263
|
+
}
|
|
264
|
+
//# sourceMappingURL=validator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"validator.js","sourceRoot":"","sources":["../../src/pipeline/validator.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,OAAO,EAAuC,MAAM,mBAAmB,CAAC;AACjF,OAAO,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AACnD,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AA4BtD;;GAEG;AACH,MAAM,CAAN,IAAY,mBAOX;AAPD,WAAY,mBAAmB;IAC7B,oEAA6C,CAAA;IAC7C,sDAA+B,CAAA;IAC/B,sDAA+B,CAAA;IAC/B,kDAA2B,CAAA;IAC3B,wDAAiC,CAAA;IACjC,gEAAyC,CAAA;AAC3C,CAAC,EAPW,mBAAmB,KAAnB,mBAAmB,QAO9B;AAgBD;;;GAGG;AACH,MAAM,kBAAkB,GAA4D;IAClF;QACE,IAAI,EAAE,OAAO,CAAC,KAAK;QACnB,OAAO,EAAE,iDAAiD;QAC1D,IAAI,EAAE,OAAO;KACd;IACD;QACE,IAAI,EAAE,OAAO,CAAC,KAAK;QACnB,OAAO,EAAE,8CAA8C;QACvD,IAAI,EAAE,OAAO;KACd;IACD;QACE,IAAI,EAAE,OAAO,CAAC,IAAI;QAClB,OAAO,EAAE,mCAAmC;QAC5C,IAAI,EAAE,MAAM;KACb;IACD;QACE,IAAI,EAAE,OAAO,CAAC,WAAW;QACzB,OAAO,EAAE,qDAAqD;QAC9D,IAAI,EAAE,aAAa;KACpB;IACD;QACE,IAAI,EAAE,OAAO,CAAC,UAAU;QACxB,OAAO,EAAE,0BAA0B;QACnC,IAAI,EAAE,YAAY;KACnB;CACF,CAAC;AAEF;;GAEG;AACH,MAAM,UAAU,cAAc,CAC5B,cAAsB,EACtB,QAA0B,EAC1B,UAAoB,EACpB,MAA2B;IAE3B,MAAM,MAAM,GAAsB,EAAE,CAAC;IAErC,mCAAmC;IACnC,MAAM,aAAa,GAAG,wBAAwB,CAAC,QAAQ,CAAC,CAAC;IACzD,MAAM,CAAC,IAAI,CAAC,GAAG,aAAa,CAAC,CAAC;IAE9B,6CAA6C;IAC7C,MAAM,QAAQ,GAAG,cAAc,CAAC,QAAQ,CAAC,CAAC;IAC1C,MAAM,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC,CAAC;IAEzB,wCAAwC;IACxC,MAAM,SAAS,GAAG,SAAS,CAAC,cAAc,CAAC,CAAC;IAC5C,MAAM,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,CAAC;IAE1B,0CAA0C;IAC1C,MAAM,WAAW,GAAG,mBAAmB,CAAC,cAAc,EAAE,QAAQ,CAAC,CAAC;IAClE,MAAM,CAAC,IAAI,CAAC,GAAG,WAAW,CAAC,CAAC;IAE5B,gDAAgD;IAChD,MAAM,SAAS,GAAG,uBAAuB,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;IAChE,MAAM,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,CAAC;IAE1B,+BAA+B;IAC/B,IAAI,cAAmC,CAAC;IACxC,IAAI,cAA2C,CAAC;IAEhD,IAAI,MAAM,CAAC,cAAc,EAAE,CAAC;QAC1B,MAAM,UAAU,GAAG,eAAe,CAAC,cAAc,EAAE,MAAM,CAAC,CAAC;QAC3D,cAAc,GAAG,UAAU,CAAC,OAAO,CAAC;QACpC,cAAc,GAAG,cAAc,CAAC,MAAM,KAAK,CAAC,CAAC;QAE7C,IAAI,CAAC,cAAc,EAAE,CAAC;YACpB,MAAM,CAAC,IAAI,CAAC;gBACV,IAAI,EAAE,mBAAmB,CAAC,kBAAkB;gBAC5C,OAAO,EAAE,mBAAmB,cAAc,CAAC,MAAM,wBAAwB;gBACzE,OAAO,EAAE,EAAE,KAAK,EAAE,cAAc,EAAE;aACnC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO;QACL,KAAK,EAAE,MAAM,CAAC,MAAM,KAAK,CAAC;QAC1B,MAAM;QACN,cAAc;QACd,cAAc;KACf,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,wBAAwB,CAAC,QAA0B;IAC1D,MAAM,MAAM,GAAsB,EAAE,CAAC;IAErC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACzC,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC7C,MAAM,CAAC,GAAG,QAAQ,CAAC,CAAC,CAAE,CAAC;YACvB,MAAM,CAAC,GAAG,QAAQ,CAAC,CAAC,CAAE,CAAC;YAEvB,IAAI,YAAY,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;gBACvB,MAAM,CAAC,IAAI,CAAC;oBACV,IAAI,EAAE,mBAAmB,CAAC,oBAAoB;oBAC9C,OAAO,EAAE,YAAY,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,IAAI,SAAS,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,IAAI,WAAW;oBACvE,OAAO,EAAE;wBACP,OAAO,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC,GAAG,EAAE;wBAC/D,OAAO,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC,GAAG,EAAE;qBAChE;iBACF,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CAAC,QAA0B;IAChD,MAAM,MAAM,GAAsB,EAAE,CAAC;IACrC,MAAM,OAAO,GAAG,IAAI,GAAG,EAA0B,CAAC;IAElD,KAAK,MAAM,MAAM,IAAI,QAAQ,EAAE,CAAC;QAC9B,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QACxC,IAAI,QAAQ,KAAK,SAAS,EAAE,CAAC;YAC3B,oEAAoE;YACpE,6EAA6E;YAC7E,IAAI,QAAQ,CAAC,QAAQ,KAAK,MAAM,CAAC,QAAQ,EAAE,CAAC;gBAC1C,MAAM,CAAC,IAAI,CAAC;oBACV,IAAI,EAAE,mBAAmB,CAAC,aAAa;oBACvC,OAAO,EAAE,gBAAgB,MAAM,CAAC,EAAE,iCAAiC;oBACnE,OAAO,EAAE;wBACP,EAAE,EAAE,MAAM,CAAC,EAAE;wBACb,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,CAAC,IAAI,EAAE,IAAI,EAAE,QAAQ,CAAC,QAAQ,EAAE;wBACvD,MAAM,EAAE,EAAE,IAAI,EAAE,MAAM,CAAC,IAAI,EAAE,IAAI,EAAE,MAAM,CAAC,QAAQ,EAAE;qBACrD;iBACF,CAAC,CAAC;YACL,CAAC;QACH,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,EAAE,MAAM,CAAC,CAAC;QACjC,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAS,SAAS,CAAC,cAAsB;IACvC,MAAM,MAAM,GAAsB,EAAE,CAAC;IAErC,0CAA0C;IAC1C,MAAM,cAAc,GAAG,aAAa,CAAC;IACrC,IAAI,KAA6B,CAAC;IAElC,OAAO,CAAC,KAAK,GAAG,cAAc,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC9D,4CAA4C;QAC5C,MAAM,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC;QAErE,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;YACrD,MAAM,CAAC,IAAI,CAAC;gBACV,IAAI,EAAE,mBAAmB,CAAC,aAAa;gBACvC,OAAO,EAAE,iCAAiC,KAAK,CAAC,KAAK,EAAE;gBACvD,OAAO,EAAE,EAAE,GAAG,EAAE,KAAK,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,KAAK,CAAC,KAAK,EAAE;aAClD,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAS,mBAAmB,CAC1B,cAAsB,EACtB,QAA0B;IAE1B,MAAM,MAAM,GAAsB,EAAE,CAAC;IACrC,MAAM,IAAI,GAAG,WAAW,CAAC,cAAc,CAAC,CAAC;IAEzC,wBAAwB;IACxB,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IACrD,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IAE9C,uBAAuB;IACvB,KAAK,MAAM,EAAE,IAAI,SAAS,EAAE,CAAC;QAC3B,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;YACpB,MAAM,CAAC,IAAI,CAAC;gBACV,IAAI,EAAE,mBAAmB,CAAC,WAAW;gBACrC,OAAO,EAAE,aAAa,EAAE,+BAA+B;gBACvD,OAAO,EAAE,EAAE,SAAS,EAAE,EAAE,EAAE;aAC3B,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAS,uBAAuB,CAC9B,QAA0B,EAC1B,UAAoB;IAEpB,MAAM,MAAM,GAAsB,EAAE,CAAC;IACrC,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,CAAC;IAEnC,KAAK,MAAM,MAAM,IAAI,QAAQ,EAAE,CAAC;QAC9B,MAAM,WAAW,GAAG,GAAG,MAAM,CAAC,IAAI,IAAI,MAAM,CAAC,EAAE,EAAE,CAAC;QAClD,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,WAAW,CAAC,EAAE,CAAC;YAC7B,MAAM,CAAC,IAAI,CAAC;gBACV,IAAI,EAAE,mBAAmB,CAAC,cAAc;gBACxC,OAAO,EAAE,UAAU,MAAM,CAAC,EAAE,KAAK,MAAM,CAAC,IAAI,wBAAwB;gBACpE,OAAO,EAAE,EAAE,QAAQ,EAAE,MAAM,CAAC,EAAE,EAAE,UAAU,EAAE,MAAM,CAAC,IAAI,EAAE,WAAW,EAAE;aACvE,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAS,eAAe,CACtB,cAAsB,EACtB,MAA2B;IAE3B,MAAM,OAAO,GAAoB,EAAE,CAAC;IAEpC,gCAAgC;IAChC,MAAM,eAAe,GAAG,cAAc,CAAC,OAAO,CAAC,eAAe,EAAE,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;IAEhF,KAAK,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,kBAAkB,EAAE,CAAC;QACzD,qCAAqC;QACrC,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;YACnC,SAAS;QACX,CAAC;QAED,MAAM,aAAa,GAAG,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QACtD,IAAI,KAA6B,CAAC;QAElC,OAAO,CAAC,KAAK,GAAG,aAAa,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YAC9D,uCAAuC;YACvC,MAAM,QAAQ,GAAG,KAAK,CAAC,KAAK,CAAC;YAC7B,MAAM,OAAO,GAAG,mBAAmB,CAAC,cAAc,EAAE,QAAQ,CAAC,CAAC;YAE9D,IAAI,CAAC,OAAO,EAAE,CAAC;gBACb,OAAO,CAAC,IAAI,CAAC;oBACX,IAAI;oBACJ,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC;oBACd,QAAQ;oBACR,OAAO,EAAE,IAAI;iBACd,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,EAAE,OAAO,EAAE,CAAC;AACrB,CAAC;AAED;;GAEG;AACH,SAAS,mBAAmB,CAAC,IAAY,EAAE,QAAgB;IACzD,qCAAqC;IACrC,MAAM,MAAM,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;IAC/C,IAAI,MAAM,KAAK,CAAC,CAAC;QAAE,OAAO,KAAK,CAAC;IAEhC,iCAAiC;IACjC,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;IACxC,IAAI,KAAK,KAAK,CAAC,CAAC;QAAE,OAAO,KAAK,CAAC;IAE/B,iDAAiD;IACjD,OAAO,QAAQ,GAAG,MAAM,IAAI,QAAQ,GAAG,KAAK,CAAC;AAC/C,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,aAAa,CAAC,QAA+C;IAC3E,IAAI,QAAQ,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IAEtC,MAAM,MAAM,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IAE/D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3C,IAAI,MAAM,CAAC,CAAC,CAAE,CAAC,GAAG,GAAG,MAAM,CAAC,CAAC,GAAG,CAAC,CAAE,CAAC,KAAK,EAAE,CAAC;YAC1C,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC"}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Base Recognizer Interface
|
|
3
|
+
* Defines the contract for all PII recognizers (regex-based)
|
|
4
|
+
*/
|
|
5
|
+
import { PIIType, SpanMatch } from '../types/index.js';
|
|
6
|
+
/**
|
|
7
|
+
* Base interface for all PII recognizers
|
|
8
|
+
*/
|
|
9
|
+
export interface Recognizer {
|
|
10
|
+
/** The PII type this recognizer detects */
|
|
11
|
+
readonly type: PIIType;
|
|
12
|
+
/** Human-readable name for logging/debugging */
|
|
13
|
+
readonly name: string;
|
|
14
|
+
/** Default confidence score for matches (0.0 to 1.0) */
|
|
15
|
+
readonly defaultConfidence: number;
|
|
16
|
+
/**
|
|
17
|
+
* Finds all matches of this PII type in the given text
|
|
18
|
+
* @param text - The text to search
|
|
19
|
+
* @returns Array of span matches
|
|
20
|
+
*/
|
|
21
|
+
find(text: string): SpanMatch[];
|
|
22
|
+
/**
|
|
23
|
+
* Optional validation of a match (e.g., checksum validation)
|
|
24
|
+
* @param match - The matched text
|
|
25
|
+
* @returns true if the match is valid
|
|
26
|
+
*/
|
|
27
|
+
validate?(match: string): boolean;
|
|
28
|
+
/**
|
|
29
|
+
* Optional normalization of a match for storage
|
|
30
|
+
* @param match - The matched text
|
|
31
|
+
* @returns Normalized version of the match
|
|
32
|
+
*/
|
|
33
|
+
normalize?(match: string): string;
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Base class for regex-based recognizers
|
|
37
|
+
*/
|
|
38
|
+
export declare abstract class RegexRecognizer implements Recognizer {
|
|
39
|
+
abstract readonly type: PIIType;
|
|
40
|
+
abstract readonly name: string;
|
|
41
|
+
readonly defaultConfidence: number;
|
|
42
|
+
/** Compiled regex pattern(s) for matching */
|
|
43
|
+
protected abstract readonly patterns: RegExp[];
|
|
44
|
+
/**
|
|
45
|
+
* Finds all matches using the configured patterns
|
|
46
|
+
*/
|
|
47
|
+
find(text: string): SpanMatch[];
|
|
48
|
+
/**
|
|
49
|
+
* Removes duplicate matches (same span matched by multiple patterns)
|
|
50
|
+
*/
|
|
51
|
+
protected deduplicateMatches(matches: SpanMatch[]): SpanMatch[];
|
|
52
|
+
/**
|
|
53
|
+
* Default validation (always passes)
|
|
54
|
+
* Override in subclasses for checksum validation etc.
|
|
55
|
+
*/
|
|
56
|
+
validate?(match: string): boolean;
|
|
57
|
+
/**
|
|
58
|
+
* Default normalization (returns as-is)
|
|
59
|
+
* Override in subclasses for specific normalization
|
|
60
|
+
*/
|
|
61
|
+
normalize?(match: string): string;
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Configuration for a regex recognizer created from patterns
|
|
65
|
+
*/
|
|
66
|
+
export interface RegexRecognizerConfig {
|
|
67
|
+
type: PIIType;
|
|
68
|
+
name: string;
|
|
69
|
+
patterns: RegExp[];
|
|
70
|
+
defaultConfidence?: number;
|
|
71
|
+
validate?: (match: string) => boolean;
|
|
72
|
+
normalize?: (match: string) => string;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Creates a simple regex recognizer from configuration
|
|
76
|
+
*/
|
|
77
|
+
export declare function createRegexRecognizer(config: RegexRecognizerConfig): Recognizer;
|
|
78
|
+
//# sourceMappingURL=base.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"base.d.ts","sourceRoot":"","sources":["../../src/recognizers/base.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,OAAO,EAAE,SAAS,EAAmB,MAAM,mBAAmB,CAAC;AAExE;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,2CAA2C;IAC3C,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC;IAEvB,gDAAgD;IAChD,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IAEtB,wDAAwD;IACxD,QAAQ,CAAC,iBAAiB,EAAE,MAAM,CAAC;IAEnC;;;;OAIG;IACH,IAAI,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,EAAE,CAAC;IAEhC;;;;OAIG;IACH,QAAQ,CAAC,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC;IAElC;;;;OAIG;IACH,SAAS,CAAC,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAAC;CACnC;AAED;;GAEG;AACH,8BAAsB,eAAgB,YAAW,UAAU;IACzD,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC;IAChC,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IAC/B,QAAQ,CAAC,iBAAiB,EAAE,MAAM,CAAQ;IAE1C,6CAA6C;IAC7C,SAAS,CAAC,QAAQ,CAAC,QAAQ,CAAC,QAAQ,EAAE,MAAM,EAAE,CAAC;IAE/C;;OAEG;IACH,IAAI,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,EAAE;IAiC/B;;OAEG;IACH,SAAS,CAAC,kBAAkB,CAAC,OAAO,EAAE,SAAS,EAAE,GAAG,SAAS,EAAE;IAe/D;;;OAGG;IACH,QAAQ,CAAC,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO;IAEjC;;;OAGG;IACH,SAAS,CAAC,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM;CAClC;AAED;;GAEG;AACH,MAAM,WAAW,qBAAqB;IACpC,IAAI,EAAE,OAAO,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,OAAO,CAAC;IACtC,SAAS,CAAC,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,MAAM,CAAC;CACvC;AAED;;GAEG;AACH,wBAAgB,qBAAqB,CAAC,MAAM,EAAE,qBAAqB,GAAG,UAAU,CA8C/E"}
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Base Recognizer Interface
|
|
3
|
+
* Defines the contract for all PII recognizers (regex-based)
|
|
4
|
+
*/
|
|
5
|
+
import { DetectionSource } from '../types/index.js';
|
|
6
|
+
/**
|
|
7
|
+
* Base class for regex-based recognizers
|
|
8
|
+
*/
|
|
9
|
+
export class RegexRecognizer {
|
|
10
|
+
defaultConfidence = 0.95;
|
|
11
|
+
/**
|
|
12
|
+
* Finds all matches using the configured patterns
|
|
13
|
+
*/
|
|
14
|
+
find(text) {
|
|
15
|
+
const matches = [];
|
|
16
|
+
for (const pattern of this.patterns) {
|
|
17
|
+
// Ensure pattern has global flag for matchAll
|
|
18
|
+
const globalPattern = pattern.global
|
|
19
|
+
? pattern
|
|
20
|
+
: new RegExp(pattern.source, pattern.flags + 'g');
|
|
21
|
+
for (const match of text.matchAll(globalPattern)) {
|
|
22
|
+
if (match.index === undefined)
|
|
23
|
+
continue;
|
|
24
|
+
const matchText = match[0];
|
|
25
|
+
// Skip if validation fails
|
|
26
|
+
if (this.validate !== undefined && !this.validate(matchText)) {
|
|
27
|
+
continue;
|
|
28
|
+
}
|
|
29
|
+
matches.push({
|
|
30
|
+
type: this.type,
|
|
31
|
+
start: match.index,
|
|
32
|
+
end: match.index + matchText.length,
|
|
33
|
+
confidence: this.defaultConfidence,
|
|
34
|
+
source: DetectionSource.REGEX,
|
|
35
|
+
text: matchText,
|
|
36
|
+
});
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
return this.deduplicateMatches(matches);
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Removes duplicate matches (same span matched by multiple patterns)
|
|
43
|
+
*/
|
|
44
|
+
deduplicateMatches(matches) {
|
|
45
|
+
const seen = new Set();
|
|
46
|
+
const unique = [];
|
|
47
|
+
for (const match of matches) {
|
|
48
|
+
const key = `${match.start}:${match.end}`;
|
|
49
|
+
if (!seen.has(key)) {
|
|
50
|
+
seen.add(key);
|
|
51
|
+
unique.push(match);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
return unique;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Creates a simple regex recognizer from configuration
|
|
59
|
+
*/
|
|
60
|
+
export function createRegexRecognizer(config) {
|
|
61
|
+
return {
|
|
62
|
+
type: config.type,
|
|
63
|
+
name: config.name,
|
|
64
|
+
defaultConfidence: config.defaultConfidence ?? 0.95,
|
|
65
|
+
find(text) {
|
|
66
|
+
const matches = [];
|
|
67
|
+
const seen = new Set();
|
|
68
|
+
for (const pattern of config.patterns) {
|
|
69
|
+
const globalPattern = pattern.global
|
|
70
|
+
? pattern
|
|
71
|
+
: new RegExp(pattern.source, pattern.flags + 'g');
|
|
72
|
+
for (const match of text.matchAll(globalPattern)) {
|
|
73
|
+
if (match.index === undefined)
|
|
74
|
+
continue;
|
|
75
|
+
const matchText = match[0];
|
|
76
|
+
const key = `${match.index}:${match.index + matchText.length}`;
|
|
77
|
+
if (seen.has(key))
|
|
78
|
+
continue;
|
|
79
|
+
// Skip if validation fails
|
|
80
|
+
if (config.validate !== undefined && !config.validate(matchText)) {
|
|
81
|
+
continue;
|
|
82
|
+
}
|
|
83
|
+
seen.add(key);
|
|
84
|
+
matches.push({
|
|
85
|
+
type: config.type,
|
|
86
|
+
start: match.index,
|
|
87
|
+
end: match.index + matchText.length,
|
|
88
|
+
confidence: config.defaultConfidence ?? 0.95,
|
|
89
|
+
source: DetectionSource.REGEX,
|
|
90
|
+
text: matchText,
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
return matches;
|
|
95
|
+
},
|
|
96
|
+
validate: config.validate,
|
|
97
|
+
normalize: config.normalize,
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
//# sourceMappingURL=base.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"base.js","sourceRoot":"","sources":["../../src/recognizers/base.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAsB,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAqCxE;;GAEG;AACH,MAAM,OAAgB,eAAe;IAG1B,iBAAiB,GAAW,IAAI,CAAC;IAK1C;;OAEG;IACH,IAAI,CAAC,IAAY;QACf,MAAM,OAAO,GAAgB,EAAE,CAAC;QAEhC,KAAK,MAAM,OAAO,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YACpC,8CAA8C;YAC9C,MAAM,aAAa,GAAG,OAAO,CAAC,MAAM;gBAClC,CAAC,CAAC,OAAO;gBACT,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,OAAO,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC;YAEpD,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;gBACjD,IAAI,KAAK,CAAC,KAAK,KAAK,SAAS;oBAAE,SAAS;gBAExC,MAAM,SAAS,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;gBAE3B,2BAA2B;gBAC3B,IAAI,IAAI,CAAC,QAAQ,KAAK,SAAS,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;oBAC7D,SAAS;gBACX,CAAC;gBAED,OAAO,CAAC,IAAI,CAAC;oBACX,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,KAAK,EAAE,KAAK,CAAC,KAAK;oBAClB,GAAG,EAAE,KAAK,CAAC,KAAK,GAAG,SAAS,CAAC,MAAM;oBACnC,UAAU,EAAE,IAAI,CAAC,iBAAiB;oBAClC,MAAM,EAAE,eAAe,CAAC,KAAK;oBAC7B,IAAI,EAAE,SAAS;iBAChB,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,OAAO,IAAI,CAAC,kBAAkB,CAAC,OAAO,CAAC,CAAC;IAC1C,CAAC;IAED;;OAEG;IACO,kBAAkB,CAAC,OAAoB;QAC/C,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;QAC/B,MAAM,MAAM,GAAgB,EAAE,CAAC;QAE/B,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;YAC5B,MAAM,GAAG,GAAG,GAAG,KAAK,CAAC,KAAK,IAAI,KAAK,CAAC,GAAG,EAAE,CAAC;YAC1C,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;gBACnB,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;gBACd,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACrB,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;CAaF;AAcD;;GAEG;AACH,MAAM,UAAU,qBAAqB,CAAC,MAA6B;IACjE,OAAO;QACL,IAAI,EAAE,MAAM,CAAC,IAAI;QACjB,IAAI,EAAE,MAAM,CAAC,IAAI;QACjB,iBAAiB,EAAE,MAAM,CAAC,iBAAiB,IAAI,IAAI;QAEnD,IAAI,CAAC,IAAY;YACf,MAAM,OAAO,GAAgB,EAAE,CAAC;YAChC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;YAE/B,KAAK,MAAM,OAAO,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;gBACtC,MAAM,aAAa,GAAG,OAAO,CAAC,MAAM;oBAClC,CAAC,CAAC,OAAO;oBACT,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,OAAO,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC;gBAEpD,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;oBACjD,IAAI,KAAK,CAAC,KAAK,KAAK,SAAS;wBAAE,SAAS;oBAExC,MAAM,SAAS,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;oBAC3B,MAAM,GAAG,GAAG,GAAG,KAAK,CAAC,KAAK,IAAI,KAAK,CAAC,KAAK,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC;oBAE/D,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;wBAAE,SAAS;oBAE5B,2BAA2B;oBAC3B,IAAI,MAAM,CAAC,QAAQ,KAAK,SAAS,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;wBACjE,SAAS;oBACX,CAAC;oBAED,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;oBACd,OAAO,CAAC,IAAI,CAAC;wBACX,IAAI,EAAE,MAAM,CAAC,IAAI;wBACjB,KAAK,EAAE,KAAK,CAAC,KAAK;wBAClB,GAAG,EAAE,KAAK,CAAC,KAAK,GAAG,SAAS,CAAC,MAAM;wBACnC,UAAU,EAAE,MAAM,CAAC,iBAAiB,IAAI,IAAI;wBAC5C,MAAM,EAAE,eAAe,CAAC,KAAK;wBAC7B,IAAI,EAAE,SAAS;qBAChB,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;YAED,OAAO,OAAO,CAAC;QACjB,CAAC;QAED,QAAQ,EAAE,MAAM,CAAC,QAAQ;QACzB,SAAS,EAAE,MAAM,CAAC,SAAS;KAC5B,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bic-swift.d.ts","sourceRoot":"","sources":["../../src/recognizers/bic-swift.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AA2C5C;;GAEG;AACH,eAAO,MAAM,kBAAkB,EAAE,UAsEhC,CAAC"}
|