@elanlanguages/bridge-anonymization 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +73 -1
- package/dist/crypto/pii-map-crypto.d.ts.map +1 -1
- package/dist/crypto/pii-map-crypto.js +8 -8
- package/dist/crypto/pii-map-crypto.js.map +1 -1
- package/dist/index.d.ts +25 -20
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +103 -52
- package/dist/index.js.map +1 -1
- package/dist/ner/model-manager.d.ts.map +1 -1
- package/dist/ner/model-manager.js +10 -8
- package/dist/ner/model-manager.js.map +1 -1
- package/dist/ner/ner-model.d.ts.map +1 -1
- package/dist/ner/ner-model.js +10 -10
- package/dist/ner/ner-model.js.map +1 -1
- package/dist/ner/onnx-runtime.d.ts +3 -3
- package/dist/ner/onnx-runtime.d.ts.map +1 -1
- package/dist/ner/onnx-runtime.js +1 -1
- package/dist/ner/onnx-runtime.js.map +1 -1
- package/dist/ner/tokenizer.d.ts +26 -53
- package/dist/ner/tokenizer.d.ts.map +1 -1
- package/dist/ner/tokenizer.js +174 -196
- package/dist/ner/tokenizer.js.map +1 -1
- package/dist/pipeline/index.d.ts +7 -4
- package/dist/pipeline/index.d.ts.map +1 -1
- package/dist/pipeline/index.js +7 -4
- package/dist/pipeline/index.js.map +1 -1
- package/dist/pipeline/resolver.d.ts.map +1 -1
- package/dist/pipeline/resolver.js +3 -2
- package/dist/pipeline/resolver.js.map +1 -1
- package/dist/pipeline/semantic-data-loader.d.ts +157 -0
- package/dist/pipeline/semantic-data-loader.d.ts.map +1 -0
- package/dist/pipeline/semantic-data-loader.js +662 -0
- package/dist/pipeline/semantic-data-loader.js.map +1 -0
- package/dist/pipeline/semantic-enricher.d.ts +102 -0
- package/dist/pipeline/semantic-enricher.d.ts.map +1 -0
- package/dist/pipeline/semantic-enricher.js +268 -0
- package/dist/pipeline/semantic-enricher.js.map +1 -0
- package/dist/pipeline/tagger.d.ts +52 -12
- package/dist/pipeline/tagger.d.ts.map +1 -1
- package/dist/pipeline/tagger.js +226 -21
- package/dist/pipeline/tagger.js.map +1 -1
- package/dist/pipeline/title-extractor.d.ts +79 -0
- package/dist/pipeline/title-extractor.d.ts.map +1 -0
- package/dist/pipeline/title-extractor.js +801 -0
- package/dist/pipeline/title-extractor.js.map +1 -0
- package/dist/types/index.d.ts +66 -3
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/index.js +14 -3
- package/dist/types/index.js.map +1 -1
- package/dist/utils/index.d.ts +3 -3
- package/dist/utils/index.js +3 -3
- package/package.json +7 -5
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"semantic-data-loader.js","sourceRoot":"","sources":["../../src/pipeline/semantic-data-loader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAEH,OAAO,KAAK,EAAE,MAAM,IAAI,CAAC;AACzB,OAAO,KAAK,UAAU,MAAM,aAAa,CAAC;AAC1C,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAC7B,OAAO,KAAK,EAAE,MAAM,IAAI,CAAC;AAEzB;;GAEG;AACH,MAAM,eAAe,GAA2B;IAC9C,CAAC,EAAE,MAAM;IACT,IAAI,EAAE,MAAM;IACZ,IAAI,EAAE,MAAM,EAAE,cAAc;IAC5B,CAAC,EAAE,QAAQ;IACX,IAAI,EAAE,QAAQ;IACd,IAAI,EAAE,QAAQ,EAAE,gBAAgB;IAChC,GAAG,EAAE,SAAS,EAAE,SAAS;CAC1B,CAAC;AAEF;;;GAGG;AACH,MAAM,eAAe,GAA2B;IAC9C,EAAE,EAAE,CAAC,EAAE,gBAAgB;IACvB,EAAE,EAAE,CAAC,EAAE,UAAU;IACjB,EAAE,EAAE,CAAC,EAAE,MAAM;IACb,EAAE,EAAE,CAAC,EAAE,QAAQ;IACf,EAAE,EAAE,CAAC,EAAE,QAAQ;IACf,EAAE,EAAE,CAAC,EAAE,WAAW;IAClB,EAAE,EAAE,CAAC,EAAE,QAAQ;IACf,EAAE,EAAE,CAAC,EAAE,SAAS;IAChB,EAAE,EAAE,CAAC,EAAE,UAAU;IACjB,EAAE,EAAE,CAAC,EAAE,aAAa;IACpB,EAAE,EAAE,EAAE,EAAE,cAAc;IACtB,EAAE,EAAE,EAAE,EAAE,6CAA6C;IACrD,EAAE,EAAE,EAAE,EAAE,UAAU;IAClB,EAAE,EAAE,EAAE,EAAE,QAAQ;IAChB,EAAE,EAAE,EAAE,EAAE,UAAU;IAClB,EAAE,EAAE,EAAE,EAAE,UAAU;IAClB,EAAE,EAAE,EAAE,EAAE,SAAS;IACjB,EAAE,EAAE,EAAE,EAAE,SAAS;IACjB,EAAE,EAAE,EAAE,EAAE,UAAU;IAClB,EAAE,EAAE,EAAE,EAAE,UAAU;IAClB,EAAE,EAAE,EAAE,EAAE,SAAS;IACjB,EAAE,EAAE,EAAE,EAAE,YAAY;IACpB,EAAE,EAAE,EAAE,EAAE,SAAS;IACjB,EAAE,EAAE,EAAE,EAAE,iBAAiB;IACzB,EAAE,EAAE,EAAE,EAAE,WAAW;IACnB,EAAE,EAAE,EAAE,EAAE,UAAU;IAClB,EAAE,EAAE,EAAE,EAAE,UAAU;IAClB,EAAE,EAAE,EAAE,EAAE,WAAW;IACnB,EAAE,EAAE,EAAE,EAAE,oBAAoB;IAC5B,EAAE,EAAE,EAAE,EAAE,WAAW;IACnB,EAAE,EAAE,EAAE,EAAE,oBAAoB;IAC5B,EAAE,EAAE,EAAE,EAAE,YAAY;IACpB,EAAE,EAAE,EAAE,EAAE,SAAS;IACjB,EAAE,EAAE,EAAE,EAAE,SAAS;IACjB,EAAE,EAAE,EAAE,EAAE,UAAU;IAClB,EAAE,EAAE,EAAE,EAAE,UAAU;IAClB,EAAE,EAAE,EAAE,EAAE,UAAU;IAClB,EAAE,EAAE,EAAE,EAAE,UAAU;IAClB,EAAE,EAAE,EAAE,EAAE,aAAa;IACrB,EAAE,EAAE,EAAE,EAAE,UAAU;IAClB,EAAE,EAAE,EAAE,EAAE,wBAAwB;IAChC,EAAE,EAAE,EAAE,EAAE,SAAS;IACjB,EAAE,EAAE,EAAE,EAAE,gBAAgB;IACxB,EAAE,EAAE,EAAE,EAAE,SAAS;IACjB,EAAE,EAAE,EAAE,EAAE,QAAQ;IAChB,EAAE,EAAE,EAAE,EAAE,kBAAkB;IAC1B,EAAE,EAAE,EAAE,EAAE,QAAQ;IAChB,EAAE,EAAE,EAAE,EAAE,QAAQ;IAChB,EAAE,EAAE,EAAE,EAAE,UAAU;CACnB,CAAC;AAEF,iEAAiE;AACjE,KAAK,eAAe,CAAC;AAkCrB,oCAAoC;AACpC,IAAI,YAAY,GAAwB,IAAI,CAAC;AAE7C,gFAAgF;AAChF,6BAA6B;AAC7B,gFAAgF;AAEhF;;;GAGG;AACH,MAAM,UAAU,uBAAuB;IACrC,MAAM,OAAO,GAAG,EAAE,CAAC,OAAO,EAAE,CAAC;IAE7B,QAAQ,OAAO,CAAC,QAAQ,EAAE,CAAC;QACzB,KAAK,QAAQ;YACX,OAAO,IAAI,CAAC,IAAI,CACd,OAAO,EACP,SAAS,EACT,QAAQ,EACR,sBAAsB,EACtB,eAAe,CAChB,CAAC;QACJ,KAAK,OAAO;YACV,OAAO,IAAI,CAAC,IAAI,CACd,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,SAAS,EAAE,OAAO,CAAC,EACrE,sBAAsB,EACtB,eAAe,CAChB,CAAC;QACJ;YACE,oDAAoD;YACpD,OAAO,IAAI,CAAC,IAAI,CACd,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,QAAQ,CAAC,EAC7D,sBAAsB,EACtB,eAAe,CAChB,CAAC;IACN,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB;IAC9B,OAAO,uBAAuB,EAAE,CAAC;AACnC,CAAC;AAsBD;;GAEG;AACH,MAAM,CAAC,MAAM,mBAAmB,GAA2B;IACzD;QACE,QAAQ,EAAE,cAAc;QACxB,GAAG,EAAE,uGAAuG;QAC5G,QAAQ,EAAE,IAAI;QACd,WAAW,EAAE,mCAAmC;QAChD,IAAI,EAAE,SAAS;KAChB;IACD;QACE,QAAQ,EAAE,iBAAiB;QAC3B,GAAG,EAAE,2DAA2D;QAChE,QAAQ,EAAE,IAAI;QACd,WAAW,EAAE,0CAA0C;QACvD,IAAI,EAAE,oBAAoB;KAC3B;IACD;QACE,QAAQ,EAAE,iBAAiB;QAC3B,GAAG,EAAE,2DAA2D;QAChE,QAAQ,EAAE,IAAI;QACd,WAAW,EAAE,yBAAyB;QACtC,IAAI,EAAE,QAAQ;KACf;IACD;QACE,QAAQ,EAAE,sBAAsB;QAChC,GAAG,EAAE,gEAAgE;QACrE,QAAQ,EAAE,KAAK;QACf,WAAW,EAAE,8CAA8C;QAC3D,IAAI,EAAE,SAAS;KAChB;CACF,CAAC;AAgBF;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,wBAAwB;IAC5C,MAAM,OAAO,GAAG,uBAAuB,EAAE,CAAC;IAE1C,IAAI,CAAC;QACH,2BAA2B;QAC3B,KAAK,MAAM,IAAI,IAAI,mBAAmB,EAAE,CAAC;YACvC,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;gBAClB,MAAM,UAAU,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC;YAC7D,CAAC;QACH,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,uBAAuB;IACrC,MAAM,OAAO,GAAG,uBAAuB,EAAE,CAAC;IAC1C,MAAM,aAAa,GAAG,mBAAmB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;IAEpE,OAAO,aAAa,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,EAAE;QAClC,IAAI,CAAC;YACH,EAAE,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;YACpE,OAAO,IAAI,CAAC;QACd,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,YAAY,CACzB,GAAW,EACX,QAAgB,EAChB,UAA6C;IAE7C,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;QAChC,OAAO,EAAE;YACP,YAAY,EAAE,4BAA4B;SAC3C;KACF,CAAC,CAAC;IAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACjB,IAAI,QAAQ,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;YAC5B,MAAM,IAAI,KAAK,CAAC,mBAAmB,GAAG,EAAE,CAAC,CAAC;QAC5C,CAAC;QACD,MAAM,IAAI,KAAK,CACb,sBAAsB,GAAG,KAAK,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,UAAU,EAAE,CACvE,CAAC;IACJ,CAAC;IAED,MAAM,UAAU,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC;IAC1D,MAAM,KAAK,GACT,UAAU,KAAK,IAAI,IAAI,UAAU,KAAK,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAE7E,0BAA0B;IAC1B,MAAM,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAEpE,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAEzC,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,EAAE,SAAS,EAAE,CAAC;IAC1C,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;QACzB,MAAM,IAAI,KAAK,CAAC,+BAA+B,CAAC,CAAC;IACnD,CAAC;IAED,MAAM,MAAM,GAAiB,EAAE,CAAC;IAChC,IAAI,eAAe,GAAG,CAAC,CAAC;IAExB,iDAAiD;IACjD,OAAO,IAAI,EAAE,CAAC;QACZ,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;QAEnC,IAAI,MAAM,CAAC,IAAI;YAAE,MAAM;QAEvB,MAAM,KAAK,GAAG,MAAM,CAAC,KAAmB,CAAC;QACzC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACnB,eAAe,IAAI,KAAK,CAAC,MAAM,CAAC;QAEhC,IAAI,UAAU,EAAE,CAAC;YACf,UAAU,CAAC;gBACT,IAAI,EAAE,QAAQ;gBACd,eAAe;gBACf,UAAU,EAAE,KAAK;gBACjB,OAAO,EACL,KAAK,KAAK,IAAI,IAAI,KAAK,GAAG,CAAC;oBACzB,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,eAAe,GAAG,KAAK,CAAC,GAAG,GAAG,CAAC;oBAC7C,CAAC,CAAC,IAAI;aACX,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,2BAA2B;IAC3B,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IACrC,MAAM,UAAU,CAAC,SAAS,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;AAC/C,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,UAAU,CAAC,OAAe,EAAE,OAAe;IACxD,iDAAiD;IACjD,sEAAsE;IACtE,oEAAoE;IAEpE,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,MAAM,CAAC,eAAe,CAAC,CAAC;IAC/C,MAAM,EAAE,SAAS,EAAE,GAAG,MAAM,MAAM,CAAC,MAAM,CAAC,CAAC;IAC3C,MAAM,SAAS,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAElC,IAAI,CAAC;QACH,sDAAsD;QACtD,MAAM,SAAS,CAAC,aAAa,OAAO,SAAS,OAAO,GAAG,CAAC,CAAC;IAC3D,CAAC;IAAC,MAAM,CAAC;QACP,kEAAkE;QAClE,IAAI,CAAC;YACH,MAAM,SAAS,CAAC,YAAY,OAAO,SAAS,OAAO,GAAG,CAAC,CAAC;QAC1D,CAAC;QAAC,MAAM,CAAC;YACP,MAAM,IAAI,KAAK,CACb,iFAAiF,CAClF,CAAC;QACJ,CAAC;IACH,CAAC;IAED,oBAAoB;IACpB,MAAM,UAAU,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;AACnC,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,UAA6C,EAC7C,QAAmC;IAEnC,MAAM,OAAO,GAAG,uBAAuB,EAAE,CAAC;IAE1C,mBAAmB;IACnB,MAAM,UAAU,CAAC,KAAK,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAErD,QAAQ,EAAE,CAAC,yCAAyC,CAAC,CAAC;IACtD,QAAQ,EAAE,CAAC,oBAAoB,OAAO,EAAE,CAAC,CAAC;IAE1C,KAAK,MAAM,IAAI,IAAI,mBAAmB,EAAE,CAAC;QACvC,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;QACnD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QAExC,QAAQ,EAAE,CAAC,eAAe,IAAI,CAAC,WAAW,KAAK,CAAC,CAAC;QAEjD,IAAI,CAAC;YACH,IAAI,KAAK,EAAE,CAAC;gBACV,2BAA2B;gBAC3B,MAAM,OAAO,GAAG,QAAQ,GAAG,MAAM,CAAC;gBAClC,MAAM,YAAY,CAAC,IAAI,CAAC,GAAG,EAAE,OAAO,EAAE,UAAU,CAAC,CAAC;gBAClD,QAAQ,EAAE,CAAC,cAAc,IAAI,CAAC,QAAQ,KAAK,CAAC,CAAC;gBAC7C,MAAM,UAAU,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;YACrC,CAAC;iBAAM,CAAC;gBACN,MAAM,YAAY,CAAC,IAAI,CAAC,GAAG,EAAE,QAAQ,EAAE,UAAU,CAAC,CAAC;YACrD,CAAC;QACH,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;gBAClB,MAAM,IAAI,KAAK,CACb,oCAAoC,IAAI,CAAC,QAAQ,KAAK,MAAM,CAAC,CAAC,CAAC,EAAE,CAClE,CAAC;YACJ,CAAC;YACD,mCAAmC;YACnC,QAAQ,EAAE,CAAC,0BAA0B,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC;QACxD,CAAC;IACH,CAAC;IAED,QAAQ,EAAE,CAAC,kCAAkC,CAAC,CAAC;IAE/C,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,UAII,EAAE;IAEN,MAAM,EAAE,YAAY,GAAG,IAAI,EAAE,UAAU,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC;IAE9D,MAAM,OAAO,GAAG,uBAAuB,EAAE,CAAC;IAE1C,8BAA8B;IAC9B,MAAM,YAAY,GAAG,MAAM,wBAAwB,EAAE,CAAC;IAEtD,IAAI,CAAC,YAAY,EAAE,CAAC;QAClB,IAAI,CAAC,YAAY,EAAE,CAAC;YAClB,MAAM,IAAI,KAAK,CACb,8BAA8B,OAAO,OAAO;gBAC1C,mCAAmC;gBACnC,6EAA6E;gBAC7E,gCAAgC;gBAChC,sDAAsD,CACzD,CAAC;QACJ,CAAC;QAED,MAAM,oBAAoB,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;IACnD,CAAC;SAAM,CAAC;QACN,QAAQ,EAAE,CAAC,+BAA+B,OAAO,EAAE,CAAC,CAAC;IACvD,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,sBAAsB;IAC1C,MAAM,OAAO,GAAG,uBAAuB,EAAE,CAAC;IAC1C,MAAM,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;IAC/D,4BAA4B;IAC5B,iBAAiB,EAAE,CAAC;AACtB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,mBAAmB;IAKjC,OAAO;QACL,KAAK,EAAE,mBAAmB;QAC1B,QAAQ,EAAE,uBAAuB,EAAE;QACnC,SAAS,EAAE,OAAO;KACnB,CAAC;AACJ,CAAC;AAED,gFAAgF;AAChF,yBAAyB;AACzB,gFAAgF;AAEhF;;GAEG;AACH,SAAS,aAAa,CAAC,QAAgB;IACrC,MAAM,KAAK,GAAG,IAAI,GAAG,EAAqB,CAAC;IAE3C,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC,gCAAgC;IACrF,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,gCAAgC;QAChC,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE;YAAE,SAAS;QAEzD,iEAAiE;QACjE,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE,IAAI,IAAI,CAAC,EAAE,CAAC,KAAK,GAAG;YAAE,SAAS;QAEnD,6BAA6B;QAC7B,sDAAsD;QACtD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC;QAC1D,IAAI,CAAC,KAAK;YAAE,SAAS;QAErB,MAAM,CAAC,EAAE,UAAU,EAAE,IAAI,CAAC,GAAG,KAAK,CAAC;QACnC,IACE,UAAU,KAAK,SAAS;YACxB,UAAU,KAAK,EAAE;YACjB,IAAI,KAAK,SAAS;YAClB,IAAI,KAAK,EAAE;YAEX,SAAS;QAEX,MAAM,MAAM,GAAG,eAAe,CAAC,UAAU,CAAC,CAAC;QAC3C,IAAI,MAAM,KAAK,SAAS;YAAE,SAAS;QAEnC,MAAM,cAAc,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QAE1C,mEAAmE;QACnE,+EAA+E;QAC/E,4BAA4B;QAC5B,MAAM,eAAe,GAA2B,EAAE,CAAC;QAEnD,+DAA+D;QAC/D,wDAAwD;QAExD,kBAAkB;QAClB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,cAAc,CAAC,EAAE,CAAC;YAC/B,KAAK,CAAC,GAAG,CAAC,cAAc,EAAE;gBACxB,MAAM;gBACN,eAAe,EACb,MAAM,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,SAAS;aACxE,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,mFAAmF;YACnF,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,cAAc,CAAE,CAAC;YAC5C,IAAI,QAAQ,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;gBAC/B,mDAAmD;gBACnD,IAAI,CAAC,QAAQ,CAAC,eAAe,EAAE,CAAC;oBAC9B,QAAQ,CAAC,eAAe,GAAG,EAAE,CAAC;gBAChC,CAAC;gBACD,0EAA0E;YAC5E,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;GAGG;AACH,SAAS,WAAW,CAAC,QAAgB;IACnC,MAAM,MAAM,GAAG,IAAI,GAAG,EAAqB,CAAC;IAE5C,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IACnD,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAElC,gEAAgE;IAChE,MAAM,OAAO,GAAG,CAAC,IAAY,EAAE,KAAgB,EAAQ,EAAE;QACvD,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QACtC,MAAM,QAAQ,GAAG,MAAM,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QACxC,IAAI,CAAC,QAAQ,IAAI,KAAK,CAAC,UAAU,GAAG,QAAQ,CAAC,UAAU,EAAE,CAAC;YACxD,MAAM,CAAC,GAAG,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;QAChC,CAAC;IACH,CAAC,CAAC;IAEF,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE;YAAE,SAAS;QAEjC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC/B,IAAI,KAAK,CAAC,MAAM,GAAG,EAAE;YAAE,SAAS;QAEhC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACtB,MAAM,SAAS,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QAC3B,MAAM,cAAc,GAAG,KAAK,CAAC,CAAC,CAAC,EAAE,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;QAClD,MAAM,WAAW,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,UAAU,GAAG,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC,IAAI,GAAG,EAAE,EAAE,CAAC,CAAC;QAElD,IACE,IAAI,KAAK,SAAS;YAClB,IAAI,KAAK,EAAE;YACX,WAAW,KAAK,SAAS;YACzB,WAAW,KAAK,EAAE;YAElB,SAAS;QAEX,MAAM,SAAS,GAAc,EAAE,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,CAAC;QAElE,2CAA2C;QAC3C,OAAO,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;QAEzB,yDAAyD;QACzD,IACE,SAAS,KAAK,SAAS;YACvB,SAAS,KAAK,EAAE;YAChB,SAAS,CAAC,WAAW,EAAE,KAAK,IAAI,CAAC,WAAW,EAAE,EAC9C,CAAC;YACD,OAAO,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC;QAChC,CAAC;QAED,iDAAiD;QACjD,KAAK,MAAM,OAAO,IAAI,cAAc,EAAE,CAAC;YACrC,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC;YAC/B,IAAI,OAAO,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC5D,OAAO,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;YAC9B,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CAAC,QAAgB;IACtC,MAAM,SAAS,GAAG,IAAI,GAAG,EAAkB,CAAC;IAE5C,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IACnD,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,gCAAgC;QAChC,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE;YAAE,SAAS;QAEzD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC/B,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;YAAE,SAAS;QAE/B,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACtB,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QAEtB,IAAI,IAAI,KAAK,SAAS,IAAI,IAAI,KAAK,EAAE,IAAI,IAAI,KAAK,SAAS,IAAI,IAAI,KAAK,EAAE;YACxE,SAAS;QAEX,mCAAmC;QACnC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,EAAE,EAAE,IAAI,CAAC,CAAC;QAExC,wBAAwB;QACxB,uDAAuD;QACvD,MAAM,UAAU,GAAG,oBAAoB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;QACpD,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;YACnC,SAAS,CAAC,GAAG,CAAC,SAAS,CAAC,WAAW,EAAE,EAAE,IAAI,CAAC,CAAC;QAC/C,CAAC;IACH,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,SAAS,oBAAoB,CAAC,IAAY,EAAE,IAAY;IACtD,MAAM,UAAU,GAAa,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC;IAElD,oCAAoC;IACpC,MAAM,UAAU,GAA6B;QAC3C,eAAe,EAAE,CAAC,KAAK,EAAE,IAAI,EAAE,SAAS,EAAE,0BAA0B,CAAC;QACrE,gBAAgB,EAAE,CAAC,IAAI,EAAE,SAAS,EAAE,eAAe,EAAE,SAAS,CAAC;QAC/D,OAAO,EAAE,CAAC,aAAa,CAAC;QACxB,MAAM,EAAE,CAAC,YAAY,CAAC;QACtB,KAAK,EAAE,CAAC,QAAQ,EAAE,QAAQ,EAAE,SAAS,CAAC;QACtC,KAAK,EAAE,CAAC,QAAQ,EAAE,SAAS,CAAC;QAC5B,WAAW,EAAE,CAAC,SAAS,EAAE,iBAAiB,EAAE,aAAa,CAAC;QAC1D,WAAW,EAAE,CAAC,SAAS,EAAE,QAAQ,EAAE,UAAU,CAAC;QAC9C,OAAO,EAAE,CAAC,YAAY,EAAE,aAAa,CAAC;QACtC,OAAO,EAAE,CAAC,SAAS,EAAE,UAAU,CAAC;QAChC,MAAM,EAAE,CAAC,UAAU,EAAE,oBAAoB,CAAC;QAC1C,KAAK,EAAE,CAAC,4BAA4B,EAAE,KAAK,CAAC;QAC5C,KAAK,EAAE,CAAC,QAAQ,CAAC;QACjB,aAAa,EAAE,CAAC,OAAO,EAAE,mBAAmB,CAAC;QAC7C,sBAAsB,EAAE,CAAC,KAAK,EAAE,UAAU,CAAC;QAC3C,gBAAgB,EAAE,CAAC,SAAS,EAAE,YAAY,CAAC;KAC5C,CAAC;IAEF,IAAI,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;QACrB,UAAU,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;IACvC,CAAC;IAED,OAAO,UAAU,CAAC;AACpB,CAAC;AAED;;GAEG;AACH,SAAS,YAAY,CAAC,QAAgB;IACpC,MAAM,OAAO,GAAG,IAAI,GAAG,EAAuB,CAAC;IAE/C,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QACnD,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,IAAI,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE;gBAAE,SAAS;YAEjC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YAC/B,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;gBAAE,SAAS;YAE/B,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,kBAAkB;YACzC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACtB,MAAM,SAAS,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YAE3B,IACE,IAAI,KAAK,SAAS;gBAClB,IAAI,KAAK,EAAE;gBACX,IAAI,KAAK,SAAS;gBAClB,IAAI,KAAK,EAAE;gBAEX,SAAS;YAEX,MAAM,CAAC,WAAW,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YACtC,IAAI,WAAW,KAAK,SAAS,IAAI,WAAW,KAAK,EAAE;gBAAE,SAAS;YAE9D,MAAM,WAAW,GAAgB,EAAE,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;YAEhE,kBAAkB;YAClB,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,EAAE,EAAE,WAAW,CAAC,CAAC;YAE7C,8BAA8B;YAC9B,IACE,SAAS,KAAK,SAAS;gBACvB,SAAS,KAAK,EAAE;gBAChB,SAAS,CAAC,WAAW,EAAE,KAAK,IAAI,CAAC,WAAW,EAAE,EAC9C,CAAC;gBACD,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,WAAW,EAAE,EAAE,WAAW,CAAC,CAAC;YACpD,CAAC;QACH,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,0BAA0B;IAC5B,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,gFAAgF;AAChF,0BAA0B;AAC1B,gFAAgF;AAEhF;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,sBAAsB,CAC1C,UAII,EAAE;IAEN,4BAA4B;IAC5B,MAAM,kBAAkB,CAAC,OAAO,CAAC,CAAC;IAElC,gBAAgB;IAChB,gBAAgB,EAAE,CAAC;AACrB,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,gBAAgB;IAC9B,IAAI,YAAY,KAAK,IAAI,IAAI,YAAY,CAAC,MAAM,KAAK,IAAI,EAAE,CAAC;QAC1D,OAAO,YAAY,CAAC;IACtB,CAAC;IAED,MAAM,OAAO,GAAG,uBAAuB,EAAE,CAAC;IAE1C,IAAI,CAAC,uBAAuB,EAAE,EAAE,CAAC;QAC/B,MAAM,IAAI,KAAK,CACb,oCAAoC,OAAO,IAAI;YAC7C,gHAAgH,CACnH,CAAC;IACJ,CAAC;IAED,MAAM,KAAK,GAAG,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,cAAc,CAAC,CAAC,CAAC;IAChE,MAAM,MAAM,GAAG,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,iBAAiB,CAAC,CAAC,CAAC;IAClE,MAAM,SAAS,GAAG,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,iBAAiB,CAAC,CAAC,CAAC;IACxE,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,sBAAsB,CAAC,CAAC,CAAC;IAEzE,YAAY,GAAG;QACb,KAAK;QACL,MAAM;QACN,SAAS;QACT,OAAO;QACP,MAAM,EAAE,IAAI;KACb,CAAC;IAEF,OAAO,YAAY,CAAC;AACtB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe;IAC7B,IAAI,YAAY,KAAK,IAAI,IAAI,YAAY,CAAC,MAAM,KAAK,IAAI,EAAE,CAAC;QAC1D,OAAO,gBAAgB,EAAE,CAAC;IAC5B,CAAC;IACD,OAAO,YAAY,CAAC;AACtB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,iBAAiB;IAC/B,YAAY,GAAG,IAAI,CAAC;AACtB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,YAAY,CAC1B,IAAY,EACZ,MAAe;IAEf,MAAM,IAAI,GAAG,eAAe,EAAE,CAAC;IAC/B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC;IAEjD,IAAI,KAAK,KAAK,SAAS;QAAE,OAAO,SAAS,CAAC;IAE1C,qCAAqC;IACrC,IACE,MAAM,KAAK,SAAS;QACpB,MAAM,KAAK,EAAE;QACb,KAAK,CAAC,eAAe,KAAK,SAAS;QACnC,KAAK,CAAC,eAAe,CAAC,MAAM,CAAC,KAAK,SAAS,EAC3C,CAAC;QACD,OAAO,KAAK,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC;IACvC,CAAC;IAED,OAAO,KAAK,CAAC,MAAM,CAAC;AACtB,CAAC;AAED;;GAEG;AACH,MAAM,qBAAqB,GAAG,MAAM,CAAC;AAErC;;;GAGG;AACH,MAAM,UAAU,kBAAkB,CAChC,QAAgB;IAEhB,MAAM,IAAI,GAAG,eAAe,EAAE,CAAC;IAC/B,MAAM,UAAU,GAAG,QAAQ,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;IAEjD,iEAAiE;IACjE,MAAM,WAAW,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;IACnD,IAAI,WAAW,KAAK,SAAS,EAAE,CAAC;QAC9B,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,WAAW,EAAE,CAAC;IAC1C,CAAC;IAED,+EAA+E;IAC/E,MAAM,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;IACzC,IAAI,IAAI,IAAI,IAAI,CAAC,UAAU,IAAI,qBAAqB,EAAE,CAAC;QACrD,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,IAAI,CAAC,OAAO,EAAE,CAAC;IACrD,CAAC;IAED,gBAAgB;IAChB,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;IAC5C,IAAI,MAAM,EAAE,CAAC;QACX,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,MAAM,CAAC,OAAO,EAAE,CAAC;IACzD,CAAC;IAED,0CAA0C;IAC1C,IAAI,IAAI,EAAE,CAAC;QACT,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,IAAI,CAAC,OAAO,EAAE,CAAC;IACrD,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,YAAY;IAO1B,IAAI,YAAY,KAAK,IAAI,IAAI,YAAY,CAAC,MAAM,KAAK,IAAI,EAAE,CAAC;QAC1D,OAAO,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC;IAC1E,CAAC;IAED,OAAO;QACL,KAAK,EAAE,YAAY,CAAC,KAAK,CAAC,IAAI;QAC9B,MAAM,EAAE,YAAY,CAAC,MAAM,CAAC,IAAI;QAChC,SAAS,EAAE,YAAY,CAAC,SAAS,CAAC,IAAI;QACtC,OAAO,EAAE,YAAY,CAAC,OAAO,CAAC,IAAI;QAClC,MAAM,EAAE,IAAI;KACb,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Semantic Enricher
|
|
3
|
+
* Enriches PII spans with semantic attributes (gender, location scope)
|
|
4
|
+
* for MT-friendly tags that preserve grammatical context.
|
|
5
|
+
*
|
|
6
|
+
* This module uses data from the GeoNames and gender-guesser projects.
|
|
7
|
+
* Data is automatically downloaded when using:
|
|
8
|
+
* createAnonymizer({ semantic: { enabled: true, autoDownload: true } })
|
|
9
|
+
*/
|
|
10
|
+
import { SpanMatch, PersonGender, LocationScope } from "../types/index.js";
|
|
11
|
+
import { isSemanticDataAvailable, getDataDirectory } from "./semantic-data-loader.js";
|
|
12
|
+
export { isSemanticDataAvailable, getDataDirectory };
|
|
13
|
+
/**
|
|
14
|
+
* Configuration for semantic enrichment
|
|
15
|
+
*/
|
|
16
|
+
export interface EnricherConfig {
|
|
17
|
+
/** Locale hint for name gender disambiguation (e.g., 'de', 'it', 'fr') */
|
|
18
|
+
locale?: string;
|
|
19
|
+
/** Minimum confidence to apply semantic attributes (default: 0.0) */
|
|
20
|
+
minConfidence?: number;
|
|
21
|
+
/** Whether to mark low-confidence results as 'unknown' */
|
|
22
|
+
strictMode?: boolean;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Result of gender inference with confidence
|
|
26
|
+
*/
|
|
27
|
+
export interface GenderResult {
|
|
28
|
+
gender: PersonGender;
|
|
29
|
+
confidence: number;
|
|
30
|
+
source: "database" | "inference" | "unknown";
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Result of location classification with confidence
|
|
34
|
+
*/
|
|
35
|
+
export interface LocationResult {
|
|
36
|
+
scope: LocationScope;
|
|
37
|
+
confidence: number;
|
|
38
|
+
countryCode?: string;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Enriches PII spans with semantic attributes based on lookup tables
|
|
42
|
+
*
|
|
43
|
+
* @param spans - Array of detected PII spans
|
|
44
|
+
* @param config - Optional configuration for enrichment
|
|
45
|
+
* @returns Array of spans with semantic attributes added
|
|
46
|
+
*
|
|
47
|
+
* @example
|
|
48
|
+
* ```typescript
|
|
49
|
+
* const enrichedSpans = enrichSemantics(spans, { locale: 'de' });
|
|
50
|
+
* // "Mary" -> { gender: 'female' }
|
|
51
|
+
* // "Berlin" -> { scope: 'city' }
|
|
52
|
+
* ```
|
|
53
|
+
*/
|
|
54
|
+
export declare function enrichSemantics(spans: SpanMatch[], config?: EnricherConfig): SpanMatch[];
|
|
55
|
+
/**
|
|
56
|
+
* Infers gender from a person's name using the lookup database
|
|
57
|
+
*
|
|
58
|
+
* @param name - Full name or first name
|
|
59
|
+
* @param locale - Optional locale for disambiguation (e.g., 'de', 'it')
|
|
60
|
+
* @returns Gender result with confidence
|
|
61
|
+
*
|
|
62
|
+
* @example
|
|
63
|
+
* ```typescript
|
|
64
|
+
* inferGender('Mary Smith'); // { gender: 'female', confidence: 1.0 }
|
|
65
|
+
* inferGender('Andrea', 'it'); // { gender: 'male', confidence: 1.0 }
|
|
66
|
+
* inferGender('Andrea', 'en'); // { gender: 'female', confidence: 1.0 }
|
|
67
|
+
* ```
|
|
68
|
+
*/
|
|
69
|
+
export declare function inferGender(name: string, locale?: string): GenderResult;
|
|
70
|
+
/**
|
|
71
|
+
* Classifies a location by its geographic scope
|
|
72
|
+
*
|
|
73
|
+
* @param location - Location name
|
|
74
|
+
* @returns Classification result with confidence
|
|
75
|
+
*
|
|
76
|
+
* @example
|
|
77
|
+
* ```typescript
|
|
78
|
+
* classifyLocation('Berlin'); // { scope: 'city', confidence: 1.0 }
|
|
79
|
+
* classifyLocation('Germany'); // { scope: 'country', confidence: 1.0 }
|
|
80
|
+
* classifyLocation('Bavaria'); // { scope: 'region', confidence: 1.0 }
|
|
81
|
+
* ```
|
|
82
|
+
*/
|
|
83
|
+
export declare function classifyLocation(location: string): LocationResult;
|
|
84
|
+
/**
|
|
85
|
+
* Gets statistics about the lookup databases
|
|
86
|
+
*/
|
|
87
|
+
export declare function getDatabaseStats(): {
|
|
88
|
+
names: number;
|
|
89
|
+
cities: number;
|
|
90
|
+
countries: number;
|
|
91
|
+
regions: number;
|
|
92
|
+
loaded: boolean;
|
|
93
|
+
};
|
|
94
|
+
/**
|
|
95
|
+
* Checks if a name exists in the database
|
|
96
|
+
*/
|
|
97
|
+
export declare function hasName(name: string): boolean;
|
|
98
|
+
/**
|
|
99
|
+
* Checks if a location exists in the database
|
|
100
|
+
*/
|
|
101
|
+
export declare function hasLocation(location: string): boolean;
|
|
102
|
+
//# sourceMappingURL=semantic-enricher.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"semantic-enricher.d.ts","sourceRoot":"","sources":["../../src/pipeline/semantic-enricher.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EACL,SAAS,EAET,YAAY,EACZ,aAAa,EACd,MAAM,mBAAmB,CAAC;AAE3B,OAAO,EACL,uBAAuB,EAKvB,gBAAgB,EACjB,MAAM,2BAA2B,CAAC;AAGnC,OAAO,EAAE,uBAAuB,EAAE,gBAAgB,EAAE,CAAC;AAErD;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,0EAA0E;IAC1E,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,qEAAqE;IACrE,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,0DAA0D;IAC1D,UAAU,CAAC,EAAE,OAAO,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,MAAM,EAAE,YAAY,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,UAAU,GAAG,WAAW,GAAG,SAAS,CAAC;CAC9C;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,KAAK,EAAE,aAAa,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAuBD;;;;;;;;;;;;;GAaG;AACH,wBAAgB,eAAe,CAC7B,KAAK,EAAE,SAAS,EAAE,EAClB,MAAM,CAAC,EAAE,cAAc,GACtB,SAAS,EAAE,CAoBb;AAgCD;;;;;;;;;;;;;GAaG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,MAAM,GAAG,YAAY,CA2BvE;AAED;;;;;;;;;;;;GAYG;AACH,wBAAgB,gBAAgB,CAAC,QAAQ,EAAE,MAAM,GAAG,cAAc,CAmCjE;AA+ED;;GAEG;AACH,wBAAgB,gBAAgB,IAAI;IAClC,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,OAAO,CAAC;CACjB,CAEA;AAED;;GAEG;AACH,wBAAgB,OAAO,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAa7C;AAED;;GAEG;AACH,wBAAgB,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAWrD"}
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Semantic Enricher
|
|
3
|
+
* Enriches PII spans with semantic attributes (gender, location scope)
|
|
4
|
+
* for MT-friendly tags that preserve grammatical context.
|
|
5
|
+
*
|
|
6
|
+
* This module uses data from the GeoNames and gender-guesser projects.
|
|
7
|
+
* Data is automatically downloaded when using:
|
|
8
|
+
* createAnonymizer({ semantic: { enabled: true, autoDownload: true } })
|
|
9
|
+
*/
|
|
10
|
+
import { PIIType, } from "../types/index.js";
|
|
11
|
+
import { isSemanticDataAvailable, loadSemanticData, lookupGender, lookupLocationType, getDataStats, getDataDirectory, } from "./semantic-data-loader.js";
|
|
12
|
+
// Re-export data availability check and other exports from data loader
|
|
13
|
+
export { isSemanticDataAvailable, getDataDirectory };
|
|
14
|
+
// Track if data has been initialized
|
|
15
|
+
let dataInitialized = false;
|
|
16
|
+
/**
|
|
17
|
+
* Ensures semantic data is loaded synchronously (assumes files are already downloaded)
|
|
18
|
+
* @throws Error if data files are not available
|
|
19
|
+
*/
|
|
20
|
+
function ensureDataLoaded() {
|
|
21
|
+
if (dataInitialized)
|
|
22
|
+
return;
|
|
23
|
+
if (!isSemanticDataAvailable()) {
|
|
24
|
+
throw new Error(`Semantic enrichment data not available. ` +
|
|
25
|
+
`Use ensureSemanticData() or createAnonymizer({ semantic: { enabled: true } }) to download.`);
|
|
26
|
+
}
|
|
27
|
+
loadSemanticData();
|
|
28
|
+
dataInitialized = true;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Enriches PII spans with semantic attributes based on lookup tables
|
|
32
|
+
*
|
|
33
|
+
* @param spans - Array of detected PII spans
|
|
34
|
+
* @param config - Optional configuration for enrichment
|
|
35
|
+
* @returns Array of spans with semantic attributes added
|
|
36
|
+
*
|
|
37
|
+
* @example
|
|
38
|
+
* ```typescript
|
|
39
|
+
* const enrichedSpans = enrichSemantics(spans, { locale: 'de' });
|
|
40
|
+
* // "Mary" -> { gender: 'female' }
|
|
41
|
+
* // "Berlin" -> { scope: 'city' }
|
|
42
|
+
* ```
|
|
43
|
+
*/
|
|
44
|
+
export function enrichSemantics(spans, config) {
|
|
45
|
+
// Ensure data is loaded
|
|
46
|
+
if (!dataInitialized) {
|
|
47
|
+
if (!isSemanticDataAvailable()) {
|
|
48
|
+
// Silently skip enrichment if data not available
|
|
49
|
+
return spans;
|
|
50
|
+
}
|
|
51
|
+
ensureDataLoaded();
|
|
52
|
+
}
|
|
53
|
+
return spans.map((span) => {
|
|
54
|
+
switch (span.type) {
|
|
55
|
+
case PIIType.PERSON:
|
|
56
|
+
return enrichPerson(span, config?.locale);
|
|
57
|
+
case PIIType.LOCATION:
|
|
58
|
+
return enrichLocation(span);
|
|
59
|
+
default:
|
|
60
|
+
return span;
|
|
61
|
+
}
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Enriches a PERSON span with gender attribute
|
|
66
|
+
*/
|
|
67
|
+
function enrichPerson(span, locale) {
|
|
68
|
+
const result = inferGender(span.text, locale);
|
|
69
|
+
return {
|
|
70
|
+
...span,
|
|
71
|
+
semantic: {
|
|
72
|
+
...span.semantic,
|
|
73
|
+
gender: result.gender,
|
|
74
|
+
},
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Enriches a LOCATION span with scope attribute
|
|
79
|
+
*/
|
|
80
|
+
function enrichLocation(span) {
|
|
81
|
+
const result = classifyLocation(span.text);
|
|
82
|
+
return {
|
|
83
|
+
...span,
|
|
84
|
+
semantic: {
|
|
85
|
+
...span.semantic,
|
|
86
|
+
scope: result.scope,
|
|
87
|
+
},
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Infers gender from a person's name using the lookup database
|
|
92
|
+
*
|
|
93
|
+
* @param name - Full name or first name
|
|
94
|
+
* @param locale - Optional locale for disambiguation (e.g., 'de', 'it')
|
|
95
|
+
* @returns Gender result with confidence
|
|
96
|
+
*
|
|
97
|
+
* @example
|
|
98
|
+
* ```typescript
|
|
99
|
+
* inferGender('Mary Smith'); // { gender: 'female', confidence: 1.0 }
|
|
100
|
+
* inferGender('Andrea', 'it'); // { gender: 'male', confidence: 1.0 }
|
|
101
|
+
* inferGender('Andrea', 'en'); // { gender: 'female', confidence: 1.0 }
|
|
102
|
+
* ```
|
|
103
|
+
*/
|
|
104
|
+
export function inferGender(name, locale) {
|
|
105
|
+
// Extract first name (handles "John Smith" -> "John")
|
|
106
|
+
const firstName = extractFirstName(name);
|
|
107
|
+
if (firstName === null || firstName === "") {
|
|
108
|
+
return { gender: "unknown", confidence: 0, source: "unknown" };
|
|
109
|
+
}
|
|
110
|
+
// Check if data is available
|
|
111
|
+
if (!dataInitialized && !isSemanticDataAvailable()) {
|
|
112
|
+
return { gender: "unknown", confidence: 0, source: "unknown" };
|
|
113
|
+
}
|
|
114
|
+
if (!dataInitialized) {
|
|
115
|
+
ensureDataLoaded();
|
|
116
|
+
}
|
|
117
|
+
const gender = lookupGender(firstName, locale);
|
|
118
|
+
if (gender === undefined || gender === "") {
|
|
119
|
+
return { gender: "unknown", confidence: 0, source: "unknown" };
|
|
120
|
+
}
|
|
121
|
+
return {
|
|
122
|
+
gender: gender,
|
|
123
|
+
confidence: 1.0,
|
|
124
|
+
source: "database",
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* Classifies a location by its geographic scope
|
|
129
|
+
*
|
|
130
|
+
* @param location - Location name
|
|
131
|
+
* @returns Classification result with confidence
|
|
132
|
+
*
|
|
133
|
+
* @example
|
|
134
|
+
* ```typescript
|
|
135
|
+
* classifyLocation('Berlin'); // { scope: 'city', confidence: 1.0 }
|
|
136
|
+
* classifyLocation('Germany'); // { scope: 'country', confidence: 1.0 }
|
|
137
|
+
* classifyLocation('Bavaria'); // { scope: 'region', confidence: 1.0 }
|
|
138
|
+
* ```
|
|
139
|
+
*/
|
|
140
|
+
export function classifyLocation(location) {
|
|
141
|
+
// Check if data is available
|
|
142
|
+
if (!dataInitialized && !isSemanticDataAvailable()) {
|
|
143
|
+
return { scope: "unknown", confidence: 0 };
|
|
144
|
+
}
|
|
145
|
+
if (!dataInitialized) {
|
|
146
|
+
ensureDataLoaded();
|
|
147
|
+
}
|
|
148
|
+
const normalized = normalizeLocationName(location);
|
|
149
|
+
const result = lookupLocationType(normalized);
|
|
150
|
+
if (!result) {
|
|
151
|
+
// Try variations
|
|
152
|
+
const variations = generateLocationVariations(location);
|
|
153
|
+
for (const variant of variations) {
|
|
154
|
+
const variantResult = lookupLocationType(variant);
|
|
155
|
+
if (variantResult) {
|
|
156
|
+
return {
|
|
157
|
+
scope: variantResult.type,
|
|
158
|
+
confidence: 0.9,
|
|
159
|
+
countryCode: variantResult.countryCode,
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
return { scope: "unknown", confidence: 0 };
|
|
164
|
+
}
|
|
165
|
+
return {
|
|
166
|
+
scope: result.type,
|
|
167
|
+
confidence: 1.0,
|
|
168
|
+
countryCode: result.countryCode,
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
/**
|
|
172
|
+
* Extracts the first name from a full name
|
|
173
|
+
*/
|
|
174
|
+
function extractFirstName(fullName) {
|
|
175
|
+
const trimmed = fullName.trim();
|
|
176
|
+
if (!trimmed)
|
|
177
|
+
return null;
|
|
178
|
+
// Handle common prefixes (Dr., Mr., Mrs., etc.)
|
|
179
|
+
const withoutPrefix = trimmed.replace(/^(dr\.?|mr\.?|mrs\.?|ms\.?|prof\.?|rev\.?|sir|dame|lord|lady)\s+/i, "");
|
|
180
|
+
// Split and get first word
|
|
181
|
+
const parts = withoutPrefix.split(/\s+/);
|
|
182
|
+
return parts[0] ?? null;
|
|
183
|
+
}
|
|
184
|
+
/**
|
|
185
|
+
* Normalizes a location name for lookup
|
|
186
|
+
*/
|
|
187
|
+
function normalizeLocationName(location) {
|
|
188
|
+
return (location
|
|
189
|
+
.toLowerCase()
|
|
190
|
+
.trim()
|
|
191
|
+
// Remove common suffixes
|
|
192
|
+
.replace(/\s+(city|town|village|state|province|region|county)$/i, "")
|
|
193
|
+
// Normalize whitespace
|
|
194
|
+
.replace(/\s+/g, " "));
|
|
195
|
+
}
|
|
196
|
+
/**
|
|
197
|
+
* Generates variations of a location name for fuzzy matching
|
|
198
|
+
*/
|
|
199
|
+
function generateLocationVariations(location) {
|
|
200
|
+
const normalized = normalizeLocationName(location);
|
|
201
|
+
const variations = [];
|
|
202
|
+
// Try without "the"
|
|
203
|
+
if (normalized.startsWith("the ")) {
|
|
204
|
+
variations.push(normalized.slice(4));
|
|
205
|
+
}
|
|
206
|
+
// Try without common articles in other languages
|
|
207
|
+
const articlePatterns = [
|
|
208
|
+
/^(la|le|les|el|los|las|il|lo|gli|i|die|der|das|de|het)\s+/i,
|
|
209
|
+
];
|
|
210
|
+
for (const pattern of articlePatterns) {
|
|
211
|
+
const withoutArticle = normalized.replace(pattern, "");
|
|
212
|
+
if (withoutArticle !== normalized) {
|
|
213
|
+
variations.push(withoutArticle);
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
// Try ASCII transliteration for common diacritics
|
|
217
|
+
const asciiVersion = normalized
|
|
218
|
+
.replace(/[àáâãäå]/g, "a")
|
|
219
|
+
.replace(/[èéêë]/g, "e")
|
|
220
|
+
.replace(/[ìíîï]/g, "i")
|
|
221
|
+
.replace(/[òóôõö]/g, "o")
|
|
222
|
+
.replace(/[ùúûü]/g, "u")
|
|
223
|
+
.replace(/[ñ]/g, "n")
|
|
224
|
+
.replace(/[ç]/g, "c")
|
|
225
|
+
.replace(/[ß]/g, "ss")
|
|
226
|
+
.replace(/[æ]/g, "ae")
|
|
227
|
+
.replace(/[ø]/g, "o")
|
|
228
|
+
.replace(/[œ]/g, "oe");
|
|
229
|
+
if (asciiVersion !== normalized) {
|
|
230
|
+
variations.push(asciiVersion);
|
|
231
|
+
}
|
|
232
|
+
return variations;
|
|
233
|
+
}
|
|
234
|
+
/**
|
|
235
|
+
* Gets statistics about the lookup databases
|
|
236
|
+
*/
|
|
237
|
+
export function getDatabaseStats() {
|
|
238
|
+
return getDataStats();
|
|
239
|
+
}
|
|
240
|
+
/**
|
|
241
|
+
* Checks if a name exists in the database
|
|
242
|
+
*/
|
|
243
|
+
export function hasName(name) {
|
|
244
|
+
if (!dataInitialized && !isSemanticDataAvailable()) {
|
|
245
|
+
return false;
|
|
246
|
+
}
|
|
247
|
+
if (!dataInitialized) {
|
|
248
|
+
ensureDataLoaded();
|
|
249
|
+
}
|
|
250
|
+
const firstName = extractFirstName(name);
|
|
251
|
+
if (firstName === null || firstName === "")
|
|
252
|
+
return false;
|
|
253
|
+
return lookupGender(firstName) !== undefined;
|
|
254
|
+
}
|
|
255
|
+
/**
|
|
256
|
+
* Checks if a location exists in the database
|
|
257
|
+
*/
|
|
258
|
+
export function hasLocation(location) {
|
|
259
|
+
if (!dataInitialized && !isSemanticDataAvailable()) {
|
|
260
|
+
return false;
|
|
261
|
+
}
|
|
262
|
+
if (!dataInitialized) {
|
|
263
|
+
ensureDataLoaded();
|
|
264
|
+
}
|
|
265
|
+
const normalized = normalizeLocationName(location);
|
|
266
|
+
return lookupLocationType(normalized) !== undefined;
|
|
267
|
+
}
|
|
268
|
+
//# sourceMappingURL=semantic-enricher.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"semantic-enricher.js","sourceRoot":"","sources":["../../src/pipeline/semantic-enricher.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EAEL,OAAO,GAGR,MAAM,mBAAmB,CAAC;AAE3B,OAAO,EACL,uBAAuB,EACvB,gBAAgB,EAChB,YAAY,EACZ,kBAAkB,EAClB,YAAY,EACZ,gBAAgB,GACjB,MAAM,2BAA2B,CAAC;AAEnC,uEAAuE;AACvE,OAAO,EAAE,uBAAuB,EAAE,gBAAgB,EAAE,CAAC;AAgCrD,qCAAqC;AACrC,IAAI,eAAe,GAAG,KAAK,CAAC;AAE5B;;;GAGG;AACH,SAAS,gBAAgB;IACvB,IAAI,eAAe;QAAE,OAAO;IAE5B,IAAI,CAAC,uBAAuB,EAAE,EAAE,CAAC;QAC/B,MAAM,IAAI,KAAK,CACb,0CAA0C;YACxC,4FAA4F,CAC/F,CAAC;IACJ,CAAC;IAED,gBAAgB,EAAE,CAAC;IACnB,eAAe,GAAG,IAAI,CAAC;AACzB,CAAC;AAED;;;;;;;;;;;;;GAaG;AACH,MAAM,UAAU,eAAe,CAC7B,KAAkB,EAClB,MAAuB;IAEvB,wBAAwB;IACxB,IAAI,CAAC,eAAe,EAAE,CAAC;QACrB,IAAI,CAAC,uBAAuB,EAAE,EAAE,CAAC;YAC/B,iDAAiD;YACjD,OAAO,KAAK,CAAC;QACf,CAAC;QACD,gBAAgB,EAAE,CAAC;IACrB,CAAC;IAED,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACxB,QAAQ,IAAI,CAAC,IAAI,EAAE,CAAC;YAClB,KAAK,OAAO,CAAC,MAAM;gBACjB,OAAO,YAAY,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;YAC5C,KAAK,OAAO,CAAC,QAAQ;gBACnB,OAAO,cAAc,CAAC,IAAI,CAAC,CAAC;YAC9B;gBACE,OAAO,IAAI,CAAC;QAChB,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;GAEG;AACH,SAAS,YAAY,CAAC,IAAe,EAAE,MAAe;IACpD,MAAM,MAAM,GAAG,WAAW,CAAC,IAAI,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IAE9C,OAAO;QACL,GAAG,IAAI;QACP,QAAQ,EAAE;YACR,GAAG,IAAI,CAAC,QAAQ;YAChB,MAAM,EAAE,MAAM,CAAC,MAAM;SACtB;KACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CAAC,IAAe;IACrC,MAAM,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE3C,OAAO;QACL,GAAG,IAAI;QACP,QAAQ,EAAE;YACR,GAAG,IAAI,CAAC,QAAQ;YAChB,KAAK,EAAE,MAAM,CAAC,KAAK;SACpB;KACF,CAAC;AACJ,CAAC;AAED;;;;;;;;;;;;;GAaG;AACH,MAAM,UAAU,WAAW,CAAC,IAAY,EAAE,MAAe;IACvD,sDAAsD;IACtD,MAAM,SAAS,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC;IACzC,IAAI,SAAS,KAAK,IAAI,IAAI,SAAS,KAAK,EAAE,EAAE,CAAC;QAC3C,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;IACjE,CAAC;IAED,6BAA6B;IAC7B,IAAI,CAAC,eAAe,IAAI,CAAC,uBAAuB,EAAE,EAAE,CAAC;QACnD,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;IACjE,CAAC;IAED,IAAI,CAAC,eAAe,EAAE,CAAC;QACrB,gBAAgB,EAAE,CAAC;IACrB,CAAC;IAED,MAAM,MAAM,GAAG,YAAY,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAE/C,IAAI,MAAM,KAAK,SAAS,IAAI,MAAM,KAAK,EAAE,EAAE,CAAC;QAC1C,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;IACjE,CAAC;IAED,OAAO;QACL,MAAM,EAAE,MAAsB;QAC9B,UAAU,EAAE,GAAG;QACf,MAAM,EAAE,UAAU;KACnB,CAAC;AACJ,CAAC;AAED;;;;;;;;;;;;GAYG;AACH,MAAM,UAAU,gBAAgB,CAAC,QAAgB;IAC/C,6BAA6B;IAC7B,IAAI,CAAC,eAAe,IAAI,CAAC,uBAAuB,EAAE,EAAE,CAAC;QACnD,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,CAAC;IAC7C,CAAC;IAED,IAAI,CAAC,eAAe,EAAE,CAAC;QACrB,gBAAgB,EAAE,CAAC;IACrB,CAAC;IAED,MAAM,UAAU,GAAG,qBAAqB,CAAC,QAAQ,CAAC,CAAC;IACnD,MAAM,MAAM,GAAG,kBAAkB,CAAC,UAAU,CAAC,CAAC;IAE9C,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,iBAAiB;QACjB,MAAM,UAAU,GAAG,0BAA0B,CAAC,QAAQ,CAAC,CAAC;QACxD,KAAK,MAAM,OAAO,IAAI,UAAU,EAAE,CAAC;YACjC,MAAM,aAAa,GAAG,kBAAkB,CAAC,OAAO,CAAC,CAAC;YAClD,IAAI,aAAa,EAAE,CAAC;gBAClB,OAAO;oBACL,KAAK,EAAE,aAAa,CAAC,IAAqB;oBAC1C,UAAU,EAAE,GAAG;oBACf,WAAW,EAAE,aAAa,CAAC,WAAW;iBACvC,CAAC;YACJ,CAAC;QACH,CAAC;QAED,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,CAAC;IAC7C,CAAC;IAED,OAAO;QACL,KAAK,EAAE,MAAM,CAAC,IAAqB;QACnC,UAAU,EAAE,GAAG;QACf,WAAW,EAAE,MAAM,CAAC,WAAW;KAChC,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CAAC,QAAgB;IACxC,MAAM,OAAO,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC;IAChC,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAE1B,gDAAgD;IAChD,MAAM,aAAa,GAAG,OAAO,CAAC,OAAO,CACnC,mEAAmE,EACnE,EAAE,CACH,CAAC;IAEF,2BAA2B;IAC3B,MAAM,KAAK,GAAG,aAAa,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACzC,OAAO,KAAK,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;AAC1B,CAAC;AAED;;GAEG;AACH,SAAS,qBAAqB,CAAC,QAAgB;IAC7C,OAAO,CACL,QAAQ;SACL,WAAW,EAAE;SACb,IAAI,EAAE;QACP,yBAAyB;SACxB,OAAO,CAAC,uDAAuD,EAAE,EAAE,CAAC;QACrE,uBAAuB;SACtB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CACxB,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,0BAA0B,CAAC,QAAgB;IAClD,MAAM,UAAU,GAAG,qBAAqB,CAAC,QAAQ,CAAC,CAAC;IACnD,MAAM,UAAU,GAAa,EAAE,CAAC;IAEhC,oBAAoB;IACpB,IAAI,UAAU,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC;QAClC,UAAU,CAAC,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACvC,CAAC;IAED,iDAAiD;IACjD,MAAM,eAAe,GAAG;QACtB,4DAA4D;KAC7D,CAAC;IACF,KAAK,MAAM,OAAO,IAAI,eAAe,EAAE,CAAC;QACtC,MAAM,cAAc,GAAG,UAAU,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;QACvD,IAAI,cAAc,KAAK,UAAU,EAAE,CAAC;YAClC,UAAU,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;IAED,kDAAkD;IAClD,MAAM,YAAY,GAAG,UAAU;SAC5B,OAAO,CAAC,WAAW,EAAE,GAAG,CAAC;SACzB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;SACxB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,OAAO,CAAC,MAAM,EAAE,IAAI,CAAC;SACrB,OAAO,CAAC,MAAM,EAAE,IAAI,CAAC;SACrB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,OAAO,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;IAEzB,IAAI,YAAY,KAAK,UAAU,EAAE,CAAC;QAChC,UAAU,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IAChC,CAAC;IAED,OAAO,UAAU,CAAC;AACpB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB;IAO9B,OAAO,YAAY,EAAE,CAAC;AACxB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,OAAO,CAAC,IAAY;IAClC,IAAI,CAAC,eAAe,IAAI,CAAC,uBAAuB,EAAE,EAAE,CAAC;QACnD,OAAO,KAAK,CAAC;IACf,CAAC;IAED,IAAI,CAAC,eAAe,EAAE,CAAC;QACrB,gBAAgB,EAAE,CAAC;IACrB,CAAC;IAED,MAAM,SAAS,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC;IACzC,IAAI,SAAS,KAAK,IAAI,IAAI,SAAS,KAAK,EAAE;QAAE,OAAO,KAAK,CAAC;IAEzD,OAAO,YAAY,CAAC,SAAS,CAAC,KAAK,SAAS,CAAC;AAC/C,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,WAAW,CAAC,QAAgB;IAC1C,IAAI,CAAC,eAAe,IAAI,CAAC,uBAAuB,EAAE,EAAE,CAAC;QACnD,OAAO,KAAK,CAAC;IACf,CAAC;IAED,IAAI,CAAC,eAAe,EAAE,CAAC;QACrB,gBAAgB,EAAE,CAAC;IACrB,CAAC;IAED,MAAM,UAAU,GAAG,qBAAqB,CAAC,QAAQ,CAAC,CAAC;IACnD,OAAO,kBAAkB,CAAC,UAAU,CAAC,KAAK,SAAS,CAAC;AACtD,CAAC"}
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Replacement Tagger
|
|
3
3
|
* Replaces PII spans with placeholder tags and builds the PII map
|
|
4
4
|
*/
|
|
5
|
-
import { PIIType, SpanMatch, DetectedEntity, AnonymizationPolicy } from
|
|
5
|
+
import { PIIType, SpanMatch, DetectedEntity, AnonymizationPolicy, SemanticAttributes } from "../types/index.js";
|
|
6
6
|
/**
|
|
7
7
|
* PII Map entry (before encryption)
|
|
8
8
|
*/
|
|
@@ -31,17 +31,30 @@ export interface TaggingResult {
|
|
|
31
31
|
}
|
|
32
32
|
/**
|
|
33
33
|
* Generates a PII placeholder tag
|
|
34
|
-
* Format: <PII type="TYPE" id="N"/>
|
|
34
|
+
* Format: <PII type="TYPE" id="N"/> or <PII type="TYPE" gender="X" id="N"/> etc.
|
|
35
|
+
*
|
|
36
|
+
* Semantic attributes (gender, scope) are included when provided and not 'unknown'
|
|
35
37
|
*/
|
|
36
|
-
export declare function generateTag(type: PIIType, id: number): string;
|
|
38
|
+
export declare function generateTag(type: PIIType, id: number, semantic?: SemanticAttributes): string;
|
|
37
39
|
/**
|
|
38
|
-
*
|
|
39
|
-
* Returns null if not a valid tag
|
|
40
|
+
* Result of parsing a PII tag
|
|
40
41
|
*/
|
|
41
|
-
export
|
|
42
|
+
export interface ParsedTag {
|
|
42
43
|
type: PIIType;
|
|
43
44
|
id: number;
|
|
44
|
-
|
|
45
|
+
semantic?: SemanticAttributes;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Parses a PII tag to extract type, id, and semantic attributes
|
|
49
|
+
* Returns null if not a valid tag
|
|
50
|
+
*
|
|
51
|
+
* Supports formats:
|
|
52
|
+
* - <PII type="TYPE" id="N"/>
|
|
53
|
+
* - <PII type="TYPE" gender="X" id="N"/>
|
|
54
|
+
* - <PII type="TYPE" scope="X" id="N"/>
|
|
55
|
+
* - <PII type="TYPE" gender="X" scope="Y" id="N"/>
|
|
56
|
+
*/
|
|
57
|
+
export declare function parseTag(tag: string): ParsedTag | null;
|
|
45
58
|
/**
|
|
46
59
|
* Creates a key for the PII map
|
|
47
60
|
*/
|
|
@@ -55,20 +68,47 @@ export declare function tagEntities(text: string, matches: SpanMatch[], policy:
|
|
|
55
68
|
*/
|
|
56
69
|
export declare function isValidTag(tag: string): boolean;
|
|
57
70
|
/**
|
|
58
|
-
*
|
|
71
|
+
* Tag extraction result with the matched text for accurate replacement
|
|
59
72
|
*/
|
|
60
|
-
export
|
|
73
|
+
export interface ExtractedTag {
|
|
61
74
|
type: PIIType;
|
|
62
75
|
id: number;
|
|
63
76
|
position: number;
|
|
64
|
-
|
|
77
|
+
/** The actual matched text (needed for replacement when tag is mangled) */
|
|
78
|
+
matchedText: string;
|
|
79
|
+
/** Semantic attributes extracted from the tag */
|
|
80
|
+
semantic?: SemanticAttributes;
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Extracts all PII tags from anonymized text using fuzzy matching
|
|
84
|
+
* Handles mangled tags that may occur after translation
|
|
85
|
+
*
|
|
86
|
+
* Translation can mangle tags by:
|
|
87
|
+
* - Changing quote types (" → " or „ or « etc.)
|
|
88
|
+
* - Adding/removing whitespace
|
|
89
|
+
* - Changing case (type → Type, PII → pii)
|
|
90
|
+
* - Reordering attributes (id before type)
|
|
91
|
+
* - Modifying self-closing syntax (/> → / > or >)
|
|
92
|
+
*/
|
|
93
|
+
export declare function extractTags(anonymizedText: string): ExtractedTag[];
|
|
94
|
+
/**
|
|
95
|
+
* Extracts tags using strict matching (original behavior)
|
|
96
|
+
* Useful when you know tags haven't been mangled
|
|
97
|
+
* Supports optional gender and scope attributes
|
|
98
|
+
*/
|
|
99
|
+
export declare function extractTagsStrict(anonymizedText: string): ExtractedTag[];
|
|
65
100
|
/**
|
|
66
101
|
* Counts entities by type
|
|
67
102
|
*/
|
|
68
103
|
export declare function countEntitiesByType(entities: DetectedEntity[]): Record<PIIType, number>;
|
|
69
104
|
/**
|
|
70
105
|
* Rehydrates anonymized text using the PII map
|
|
71
|
-
*
|
|
106
|
+
* Uses fuzzy matching to handle tags that may have been mangled by translation
|
|
107
|
+
*
|
|
108
|
+
* @param anonymizedText - Text containing PII tags (possibly mangled)
|
|
109
|
+
* @param piiMap - Map of PII keys to original values
|
|
110
|
+
* @param strict - If true, use strict matching (original behavior). Default: false
|
|
111
|
+
* @returns Text with PII tags replaced by original values
|
|
72
112
|
*/
|
|
73
|
-
export declare function rehydrate(anonymizedText: string, piiMap: RawPIIMap): string;
|
|
113
|
+
export declare function rehydrate(anonymizedText: string, piiMap: RawPIIMap, strict?: boolean): string;
|
|
74
114
|
//# sourceMappingURL=tagger.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tagger.d.ts","sourceRoot":"","sources":["../../src/pipeline/tagger.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EACL,OAAO,EACP,SAAS,EACT,cAAc,
|
|
1
|
+
{"version":3,"file":"tagger.d.ts","sourceRoot":"","sources":["../../src/pipeline/tagger.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EACL,OAAO,EACP,SAAS,EACT,cAAc,EACd,mBAAmB,EACnB,kBAAkB,EACnB,MAAM,mBAAmB,CAAC;AAG3B;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,eAAe;IACf,IAAI,EAAE,OAAO,CAAC;IACd,gBAAgB;IAChB,EAAE,EAAE,MAAM,CAAC;IACX,oBAAoB;IACpB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,MAAM,SAAS,GAAG,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;AAE5C;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,4CAA4C;IAC5C,cAAc,EAAE,MAAM,CAAC;IACvB,kDAAkD;IAClD,QAAQ,EAAE,cAAc,EAAE,CAAC;IAC3B,wCAAwC;IACxC,MAAM,EAAE,SAAS,CAAC;CACnB;AAED;;;;;GAKG;AACH,wBAAgB,WAAW,CACzB,IAAI,EAAE,OAAO,EACb,EAAE,EAAE,MAAM,EACV,QAAQ,CAAC,EAAE,kBAAkB,GAC5B,MAAM,CAcR;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,OAAO,CAAC;IACd,EAAE,EAAE,MAAM,CAAC;IACX,QAAQ,CAAC,EAAE,kBAAkB,CAAC;CAC/B;AAED;;;;;;;;;GASG;AACH,wBAAgB,QAAQ,CAAC,GAAG,EAAE,MAAM,GAAG,SAAS,GAAG,IAAI,CA+CtD;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,IAAI,EAAE,OAAO,EAAE,EAAE,EAAE,MAAM,GAAG,MAAM,CAEjE;AAED;;GAEG;AACH,wBAAgB,WAAW,CACzB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,SAAS,EAAE,EACpB,MAAM,EAAE,mBAAmB,GAC1B,aAAa,CA8Ef;AAED;;GAEG;AACH,wBAAgB,UAAU,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAE/C;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,OAAO,CAAC;IACd,EAAE,EAAE,MAAM,CAAC;IACX,QAAQ,EAAE,MAAM,CAAC;IACjB,2EAA2E;IAC3E,WAAW,EAAE,MAAM,CAAC;IACpB,iDAAiD;IACjD,QAAQ,CAAC,EAAE,kBAAkB,CAAC;CAC/B;AA4DD;;;;;;;;;;GAUG;AACH,wBAAgB,WAAW,CAAC,cAAc,EAAE,MAAM,GAAG,YAAY,EAAE,CAyFlE;AAED;;;;GAIG;AACH,wBAAgB,iBAAiB,CAAC,cAAc,EAAE,MAAM,GAAG,YAAY,EAAE,CAqDxE;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CACjC,QAAQ,EAAE,cAAc,EAAE,GACzB,MAAM,CAAC,OAAO,EAAE,MAAM,CAAC,CAczB;AAED;;;;;;;;GAQG;AACH,wBAAgB,SAAS,CACvB,cAAc,EAAE,MAAM,EACtB,MAAM,EAAE,SAAS,EACjB,MAAM,GAAE,OAAe,GACtB,MAAM,CAyBR"}
|