@elanlanguages/bridge-anonymization 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +73 -1
  2. package/dist/crypto/pii-map-crypto.d.ts.map +1 -1
  3. package/dist/crypto/pii-map-crypto.js +8 -8
  4. package/dist/crypto/pii-map-crypto.js.map +1 -1
  5. package/dist/index.d.ts +25 -20
  6. package/dist/index.d.ts.map +1 -1
  7. package/dist/index.js +103 -52
  8. package/dist/index.js.map +1 -1
  9. package/dist/ner/model-manager.d.ts.map +1 -1
  10. package/dist/ner/model-manager.js +10 -8
  11. package/dist/ner/model-manager.js.map +1 -1
  12. package/dist/ner/ner-model.d.ts.map +1 -1
  13. package/dist/ner/ner-model.js +9 -9
  14. package/dist/ner/ner-model.js.map +1 -1
  15. package/dist/ner/onnx-runtime.d.ts +3 -3
  16. package/dist/ner/onnx-runtime.d.ts.map +1 -1
  17. package/dist/ner/onnx-runtime.js +1 -1
  18. package/dist/ner/onnx-runtime.js.map +1 -1
  19. package/dist/ner/tokenizer.js +3 -3
  20. package/dist/ner/tokenizer.js.map +1 -1
  21. package/dist/pipeline/index.d.ts +7 -4
  22. package/dist/pipeline/index.d.ts.map +1 -1
  23. package/dist/pipeline/index.js +7 -4
  24. package/dist/pipeline/index.js.map +1 -1
  25. package/dist/pipeline/resolver.d.ts.map +1 -1
  26. package/dist/pipeline/resolver.js +3 -2
  27. package/dist/pipeline/resolver.js.map +1 -1
  28. package/dist/pipeline/semantic-data-loader.d.ts +157 -0
  29. package/dist/pipeline/semantic-data-loader.d.ts.map +1 -0
  30. package/dist/pipeline/semantic-data-loader.js +662 -0
  31. package/dist/pipeline/semantic-data-loader.js.map +1 -0
  32. package/dist/pipeline/semantic-enricher.d.ts +102 -0
  33. package/dist/pipeline/semantic-enricher.d.ts.map +1 -0
  34. package/dist/pipeline/semantic-enricher.js +268 -0
  35. package/dist/pipeline/semantic-enricher.js.map +1 -0
  36. package/dist/pipeline/tagger.d.ts +52 -12
  37. package/dist/pipeline/tagger.d.ts.map +1 -1
  38. package/dist/pipeline/tagger.js +226 -21
  39. package/dist/pipeline/tagger.js.map +1 -1
  40. package/dist/pipeline/title-extractor.d.ts +79 -0
  41. package/dist/pipeline/title-extractor.d.ts.map +1 -0
  42. package/dist/pipeline/title-extractor.js +801 -0
  43. package/dist/pipeline/title-extractor.js.map +1 -0
  44. package/dist/types/index.d.ts +66 -3
  45. package/dist/types/index.d.ts.map +1 -1
  46. package/dist/types/index.js +14 -3
  47. package/dist/types/index.js.map +1 -1
  48. package/dist/utils/index.d.ts +3 -3
  49. package/dist/utils/index.js +3 -3
  50. package/package.json +7 -5
@@ -0,0 +1 @@
1
+ {"version":3,"file":"semantic-data-loader.js","sourceRoot":"","sources":["../../src/pipeline/semantic-data-loader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAEH,OAAO,KAAK,EAAE,MAAM,IAAI,CAAC;AACzB,OAAO,KAAK,UAAU,MAAM,aAAa,CAAC;AAC1C,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAC7B,OAAO,KAAK,EAAE,MAAM,IAAI,CAAC;AAEzB;;GAEG;AACH,MAAM,eAAe,GAA2B;IAC9C,CAAC,EAAE,MAAM;IACT,IAAI,EAAE,MAAM;IACZ,IAAI,EAAE,MAAM,EAAE,cAAc;IAC5B,CAAC,EAAE,QAAQ;IACX,IAAI,EAAE,QAAQ;IACd,IAAI,EAAE,QAAQ,EAAE,gBAAgB;IAChC,GAAG,EAAE,SAAS,EAAE,SAAS;CAC1B,CAAC;AAEF;;;GAGG;AACH,MAAM,eAAe,GAA2B;IAC9C,EAAE,EAAE,CAAC,EAAE,gBAAgB;IACvB,EAAE,EAAE,CAAC,EAAE,UAAU;IACjB,EAAE,EAAE,CAAC,EAAE,MAAM;IACb,EAAE,EAAE,CAAC,EAAE,QAAQ;IACf,EAAE,EAAE,CAAC,EAAE,QAAQ;IACf,EAAE,EAAE,CAAC,EAAE,WAAW;IAClB,EAAE,EAAE,CAAC,EAAE,QAAQ;IACf,EAAE,EAAE,CAAC,EAAE,SAAS;IAChB,EAAE,EAAE,CAAC,EAAE,UAAU;IACjB,EAAE,EAAE,CAAC,EAAE,aAAa;IACpB,EAAE,EAAE,EAAE,EAAE,cAAc;IACtB,EAAE,EAAE,EAAE,EAAE,6CAA6C;IACrD,EAAE,EAAE,EAAE,EAAE,UAAU;IAClB,EAAE,EAAE,EAAE,EAAE,QAAQ;IAChB,EAAE,EAAE,EAAE,EAAE,UAAU;IAClB,EAAE,EAAE,EAAE,EAAE,UAAU;IAClB,EAAE,EAAE,EAAE,EAAE,SAAS;IACjB,EAAE,EAAE,EAAE,EAAE,SAAS;IACjB,EAAE,EAAE,EAAE,EAAE,UAAU;IAClB,EAAE,EAAE,EAAE,EAAE,UAAU;IAClB,EAAE,EAAE,EAAE,EAAE,SAAS;IACjB,EAAE,EAAE,EAAE,EAAE,YAAY;IACpB,EAAE,EAAE,EAAE,EAAE,SAAS;IACjB,EAAE,EAAE,EAAE,EAAE,iBAAiB;IACzB,EAAE,EAAE,EAAE,EAAE,WAAW;IACnB,EAAE,EAAE,EAAE,EAAE,UAAU;IAClB,EAAE,EAAE,EAAE,EAAE,UAAU;IAClB,EAAE,EAAE,EAAE,EAAE,WAAW;IACnB,EAAE,EAAE,EAAE,EAAE,oBAAoB;IAC5B,EAAE,EAAE,EAAE,EAAE,WAAW;IACnB,EAAE,EAAE,EAAE,EAAE,oBAAoB;IAC5B,EAAE,EAAE,EAAE,EAAE,YAAY;IACpB,EAAE,EAAE,EAAE,EAAE,SAAS;IACjB,EAAE,EAAE,EAAE,EAAE,SAAS;IACjB,EAAE,EAAE,EAAE,EAAE,UAAU;IAClB,EAAE,EAAE,EAAE,EAAE,UAAU;IAClB,EAAE,EAAE,EAAE,EAAE,UAAU;IAClB,EAAE,EAAE,EAAE,EAAE,UAAU;IAClB,EAAE,EAAE,EAAE,EAAE,aAAa;IACrB,EAAE,EAAE,EAAE,EAAE,UAAU;IAClB,EAAE,EAAE,EAAE,EAAE,wBAAwB;IAChC,EAAE,EAAE,EAAE,EAAE,SAAS;IACjB,EAAE,EAAE,EAAE,EAAE,gBAAgB;IACxB,EAAE,EAAE,EAAE,EAAE,SAAS;IACjB,EAAE,EAAE,EAAE,EAAE,QAAQ;IAChB,EAAE,EAAE,EAAE,EAAE,kBAAkB;IAC1B,EAAE,EAAE,EAAE,EAAE,QAAQ;IAChB,EAAE,EAAE,EAAE,EAAE,QAAQ;IAChB,EAAE,EAAE,EAAE,EAAE,UAAU;CACnB,CAAC;AAEF,iEAAiE;AACjE,KAAK,eAAe,CAAC;AAkCrB,oCAAoC;AACpC,IAAI,YAAY,GAAwB,IAAI,CAAC;AAE7C,gFAAgF;AAChF,6BAA6B;AAC7B,gFAAgF;AAEhF;;;GAGG;AACH,MAAM,UAAU,uBAAuB;IACrC,MAAM,OAAO,GAAG,EAAE,CAAC,OAAO,EAAE,CAAC;IAE7B,QAAQ,OAAO,CAAC,QAAQ,EAAE,CAAC;QACzB,KAAK,QAAQ;YACX,OAAO,IAAI,CAAC,IAAI,CACd,OAAO,EACP,SAAS,EACT,QAAQ,EACR,sBAAsB,EACtB,eAAe,CAChB,CAAC;QACJ,KAAK,OAAO;YACV,OAAO,IAAI,CAAC,IAAI,CACd,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,SAAS,EAAE,OAAO,CAAC,EACrE,sBAAsB,EACtB,eAAe,CAChB,CAAC;QACJ;YACE,oDAAoD;YACpD,OAAO,IAAI,CAAC,IAAI,CACd,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,QAAQ,CAAC,EAC7D,sBAAsB,EACtB,eAAe,CAChB,CAAC;IACN,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB;IAC9B,OAAO,uBAAuB,EAAE,CAAC;AACnC,CAAC;AAsBD;;GAEG;AACH,MAAM,CAAC,MAAM,mBAAmB,GAA2B;IACzD;QACE,QAAQ,EAAE,cAAc;QACxB,GAAG,EAAE,uGAAuG;QAC5G,QAAQ,EAAE,IAAI;QACd,WAAW,EAAE,mCAAmC;QAChD,IAAI,EAAE,SAAS;KAChB;IACD;QACE,QAAQ,EAAE,iBAAiB;QAC3B,GAAG,EAAE,2DAA2D;QAChE,QAAQ,EAAE,IAAI;QACd,WAAW,EAAE,0CAA0C;QACvD,IAAI,EAAE,oBAAoB;KAC3B;IACD;QACE,QAAQ,EAAE,iBAAiB;QAC3B,GAAG,EAAE,2DAA2D;QAChE,QAAQ,EAAE,IAAI;QACd,WAAW,EAAE,yBAAyB;QACtC,IAAI,EAAE,QAAQ;KACf;IACD;QACE,QAAQ,EAAE,sBAAsB;QAChC,GAAG,EAAE,gEAAgE;QACrE,QAAQ,EAAE,KAAK;QACf,WAAW,EAAE,8CAA8C;QAC3D,IAAI,EAAE,SAAS;KAChB;CACF,CAAC;AAgBF;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,wBAAwB;IAC5C,MAAM,OAAO,GAAG,uBAAuB,EAAE,CAAC;IAE1C,IAAI,CAAC;QACH,2BAA2B;QAC3B,KAAK,MAAM,IAAI,IAAI,mBAAmB,EAAE,CAAC;YACvC,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;gBAClB,MAAM,UAAU,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC;YAC7D,CAAC;QACH,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,uBAAuB;IACrC,MAAM,OAAO,GAAG,uBAAuB,EAAE,CAAC;IAC1C,MAAM,aAAa,GAAG,mBAAmB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;IAEpE,OAAO,aAAa,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,EAAE;QAClC,IAAI,CAAC;YACH,EAAE,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;YACpE,OAAO,IAAI,CAAC;QACd,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,YAAY,CACzB,GAAW,EACX,QAAgB,EAChB,UAA6C;IAE7C,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;QAChC,OAAO,EAAE;YACP,YAAY,EAAE,4BAA4B;SAC3C;KACF,CAAC,CAAC;IAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACjB,IAAI,QAAQ,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;YAC5B,MAAM,IAAI,KAAK,CAAC,mBAAmB,GAAG,EAAE,CAAC,CAAC;QAC5C,CAAC;QACD,MAAM,IAAI,KAAK,CACb,sBAAsB,GAAG,KAAK,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,UAAU,EAAE,CACvE,CAAC;IACJ,CAAC;IAED,MAAM,UAAU,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC;IAC1D,MAAM,KAAK,GACT,UAAU,KAAK,IAAI,IAAI,UAAU,KAAK,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAE7E,0BAA0B;IAC1B,MAAM,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAEpE,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAEzC,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,EAAE,SAAS,EAAE,CAAC;IAC1C,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;QACzB,MAAM,IAAI,KAAK,CAAC,+BAA+B,CAAC,CAAC;IACnD,CAAC;IAED,MAAM,MAAM,GAAiB,EAAE,CAAC;IAChC,IAAI,eAAe,GAAG,CAAC,CAAC;IAExB,iDAAiD;IACjD,OAAO,IAAI,EAAE,CAAC;QACZ,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;QAEnC,IAAI,MAAM,CAAC,IAAI;YAAE,MAAM;QAEvB,MAAM,KAAK,GAAG,MAAM,CAAC,KAAmB,CAAC;QACzC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACnB,eAAe,IAAI,KAAK,CAAC,MAAM,CAAC;QAEhC,IAAI,UAAU,EAAE,CAAC;YACf,UAAU,CAAC;gBACT,IAAI,EAAE,QAAQ;gBACd,eAAe;gBACf,UAAU,EAAE,KAAK;gBACjB,OAAO,EACL,KAAK,KAAK,IAAI,IAAI,KAAK,GAAG,CAAC;oBACzB,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,eAAe,GAAG,KAAK,CAAC,GAAG,GAAG,CAAC;oBAC7C,CAAC,CAAC,IAAI;aACX,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,2BAA2B;IAC3B,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IACrC,MAAM,UAAU,CAAC,SAAS,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;AAC/C,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,UAAU,CAAC,OAAe,EAAE,OAAe;IACxD,iDAAiD;IACjD,sEAAsE;IACtE,oEAAoE;IAEpE,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,MAAM,CAAC,eAAe,CAAC,CAAC;IAC/C,MAAM,EAAE,SAAS,EAAE,GAAG,MAAM,MAAM,CAAC,MAAM,CAAC,CAAC;IAC3C,MAAM,SAAS,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAElC,IAAI,CAAC;QACH,sDAAsD;QACtD,MAAM,SAAS,CAAC,aAAa,OAAO,SAAS,OAAO,GAAG,CAAC,CAAC;IAC3D,CAAC;IAAC,MAAM,CAAC;QACP,kEAAkE;QAClE,IAAI,CAAC;YACH,MAAM,SAAS,CAAC,YAAY,OAAO,SAAS,OAAO,GAAG,CAAC,CAAC;QAC1D,CAAC;QAAC,MAAM,CAAC;YACP,MAAM,IAAI,KAAK,CACb,iFAAiF,CAClF,CAAC;QACJ,CAAC;IACH,CAAC;IAED,oBAAoB;IACpB,MAAM,UAAU,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;AACnC,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,UAA6C,EAC7C,QAAmC;IAEnC,MAAM,OAAO,GAAG,uBAAuB,EAAE,CAAC;IAE1C,mBAAmB;IACnB,MAAM,UAAU,CAAC,KAAK,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAErD,QAAQ,EAAE,CAAC,yCAAyC,CAAC,CAAC;IACtD,QAAQ,EAAE,CAAC,oBAAoB,OAAO,EAAE,CAAC,CAAC;IAE1C,KAAK,MAAM,IAAI,IAAI,mBAAmB,EAAE,CAAC;QACvC,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;QACnD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QAExC,QAAQ,EAAE,CAAC,eAAe,IAAI,CAAC,WAAW,KAAK,CAAC,CAAC;QAEjD,IAAI,CAAC;YACH,IAAI,KAAK,EAAE,CAAC;gBACV,2BAA2B;gBAC3B,MAAM,OAAO,GAAG,QAAQ,GAAG,MAAM,CAAC;gBAClC,MAAM,YAAY,CAAC,IAAI,CAAC,GAAG,EAAE,OAAO,EAAE,UAAU,CAAC,CAAC;gBAClD,QAAQ,EAAE,CAAC,cAAc,IAAI,CAAC,QAAQ,KAAK,CAAC,CAAC;gBAC7C,MAAM,UAAU,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;YACrC,CAAC;iBAAM,CAAC;gBACN,MAAM,YAAY,CAAC,IAAI,CAAC,GAAG,EAAE,QAAQ,EAAE,UAAU,CAAC,CAAC;YACrD,CAAC;QACH,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;gBAClB,MAAM,IAAI,KAAK,CACb,oCAAoC,IAAI,CAAC,QAAQ,KAAK,MAAM,CAAC,CAAC,CAAC,EAAE,CAClE,CAAC;YACJ,CAAC;YACD,mCAAmC;YACnC,QAAQ,EAAE,CAAC,0BAA0B,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC;QACxD,CAAC;IACH,CAAC;IAED,QAAQ,EAAE,CAAC,kCAAkC,CAAC,CAAC;IAE/C,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,UAII,EAAE;IAEN,MAAM,EAAE,YAAY,GAAG,IAAI,EAAE,UAAU,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC;IAE9D,MAAM,OAAO,GAAG,uBAAuB,EAAE,CAAC;IAE1C,8BAA8B;IAC9B,MAAM,YAAY,GAAG,MAAM,wBAAwB,EAAE,CAAC;IAEtD,IAAI,CAAC,YAAY,EAAE,CAAC;QAClB,IAAI,CAAC,YAAY,EAAE,CAAC;YAClB,MAAM,IAAI,KAAK,CACb,8BAA8B,OAAO,OAAO;gBAC1C,mCAAmC;gBACnC,6EAA6E;gBAC7E,gCAAgC;gBAChC,sDAAsD,CACzD,CAAC;QACJ,CAAC;QAED,MAAM,oBAAoB,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;IACnD,CAAC;SAAM,CAAC;QACN,QAAQ,EAAE,CAAC,+BAA+B,OAAO,EAAE,CAAC,CAAC;IACvD,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,sBAAsB;IAC1C,MAAM,OAAO,GAAG,uBAAuB,EAAE,CAAC;IAC1C,MAAM,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;IAC/D,4BAA4B;IAC5B,iBAAiB,EAAE,CAAC;AACtB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,mBAAmB;IAKjC,OAAO;QACL,KAAK,EAAE,mBAAmB;QAC1B,QAAQ,EAAE,uBAAuB,EAAE;QACnC,SAAS,EAAE,OAAO;KACnB,CAAC;AACJ,CAAC;AAED,gFAAgF;AAChF,yBAAyB;AACzB,gFAAgF;AAEhF;;GAEG;AACH,SAAS,aAAa,CAAC,QAAgB;IACrC,MAAM,KAAK,GAAG,IAAI,GAAG,EAAqB,CAAC;IAE3C,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC,gCAAgC;IACrF,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,gCAAgC;QAChC,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE;YAAE,SAAS;QAEzD,iEAAiE;QACjE,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE,IAAI,IAAI,CAAC,EAAE,CAAC,KAAK,GAAG;YAAE,SAAS;QAEnD,6BAA6B;QAC7B,sDAAsD;QACtD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC;QAC1D,IAAI,CAAC,KAAK;YAAE,SAAS;QAErB,MAAM,CAAC,EAAE,UAAU,EAAE,IAAI,CAAC,GAAG,KAAK,CAAC;QACnC,IACE,UAAU,KAAK,SAAS;YACxB,UAAU,KAAK,EAAE;YACjB,IAAI,KAAK,SAAS;YAClB,IAAI,KAAK,EAAE;YAEX,SAAS;QAEX,MAAM,MAAM,GAAG,eAAe,CAAC,UAAU,CAAC,CAAC;QAC3C,IAAI,MAAM,KAAK,SAAS;YAAE,SAAS;QAEnC,MAAM,cAAc,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QAE1C,mEAAmE;QACnE,+EAA+E;QAC/E,4BAA4B;QAC5B,MAAM,eAAe,GAA2B,EAAE,CAAC;QAEnD,+DAA+D;QAC/D,wDAAwD;QAExD,kBAAkB;QAClB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,cAAc,CAAC,EAAE,CAAC;YAC/B,KAAK,CAAC,GAAG,CAAC,cAAc,EAAE;gBACxB,MAAM;gBACN,eAAe,EACb,MAAM,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,SAAS;aACxE,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,mFAAmF;YACnF,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,cAAc,CAAE,CAAC;YAC5C,IAAI,QAAQ,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;gBAC/B,mDAAmD;gBACnD,IAAI,CAAC,QAAQ,CAAC,eAAe,EAAE,CAAC;oBAC9B,QAAQ,CAAC,eAAe,GAAG,EAAE,CAAC;gBAChC,CAAC;gBACD,0EAA0E;YAC5E,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;GAGG;AACH,SAAS,WAAW,CAAC,QAAgB;IACnC,MAAM,MAAM,GAAG,IAAI,GAAG,EAAqB,CAAC;IAE5C,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IACnD,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAElC,gEAAgE;IAChE,MAAM,OAAO,GAAG,CAAC,IAAY,EAAE,KAAgB,EAAQ,EAAE;QACvD,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QACtC,MAAM,QAAQ,GAAG,MAAM,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QACxC,IAAI,CAAC,QAAQ,IAAI,KAAK,CAAC,UAAU,GAAG,QAAQ,CAAC,UAAU,EAAE,CAAC;YACxD,MAAM,CAAC,GAAG,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;QAChC,CAAC;IACH,CAAC,CAAC;IAEF,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE;YAAE,SAAS;QAEjC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC/B,IAAI,KAAK,CAAC,MAAM,GAAG,EAAE;YAAE,SAAS;QAEhC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACtB,MAAM,SAAS,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QAC3B,MAAM,cAAc,GAAG,KAAK,CAAC,CAAC,CAAC,EAAE,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;QAClD,MAAM,WAAW,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,UAAU,GAAG,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC,IAAI,GAAG,EAAE,EAAE,CAAC,CAAC;QAElD,IACE,IAAI,KAAK,SAAS;YAClB,IAAI,KAAK,EAAE;YACX,WAAW,KAAK,SAAS;YACzB,WAAW,KAAK,EAAE;YAElB,SAAS;QAEX,MAAM,SAAS,GAAc,EAAE,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,CAAC;QAElE,2CAA2C;QAC3C,OAAO,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;QAEzB,yDAAyD;QACzD,IACE,SAAS,KAAK,SAAS;YACvB,SAAS,KAAK,EAAE;YAChB,SAAS,CAAC,WAAW,EAAE,KAAK,IAAI,CAAC,WAAW,EAAE,EAC9C,CAAC;YACD,OAAO,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC;QAChC,CAAC;QAED,iDAAiD;QACjD,KAAK,MAAM,OAAO,IAAI,cAAc,EAAE,CAAC;YACrC,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC;YAC/B,IAAI,OAAO,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC5D,OAAO,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;YAC9B,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CAAC,QAAgB;IACtC,MAAM,SAAS,GAAG,IAAI,GAAG,EAAkB,CAAC;IAE5C,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IACnD,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,gCAAgC;QAChC,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE;YAAE,SAAS;QAEzD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC/B,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;YAAE,SAAS;QAE/B,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACtB,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QAEtB,IAAI,IAAI,KAAK,SAAS,IAAI,IAAI,KAAK,EAAE,IAAI,IAAI,KAAK,SAAS,IAAI,IAAI,KAAK,EAAE;YACxE,SAAS;QAEX,mCAAmC;QACnC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,EAAE,EAAE,IAAI,CAAC,CAAC;QAExC,wBAAwB;QACxB,uDAAuD;QACvD,MAAM,UAAU,GAAG,oBAAoB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;QACpD,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;YACnC,SAAS,CAAC,GAAG,CAAC,SAAS,CAAC,WAAW,EAAE,EAAE,IAAI,CAAC,CAAC;QAC/C,CAAC;IACH,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,SAAS,oBAAoB,CAAC,IAAY,EAAE,IAAY;IACtD,MAAM,UAAU,GAAa,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC;IAElD,oCAAoC;IACpC,MAAM,UAAU,GAA6B;QAC3C,eAAe,EAAE,CAAC,KAAK,EAAE,IAAI,EAAE,SAAS,EAAE,0BAA0B,CAAC;QACrE,gBAAgB,EAAE,CAAC,IAAI,EAAE,SAAS,EAAE,eAAe,EAAE,SAAS,CAAC;QAC/D,OAAO,EAAE,CAAC,aAAa,CAAC;QACxB,MAAM,EAAE,CAAC,YAAY,CAAC;QACtB,KAAK,EAAE,CAAC,QAAQ,EAAE,QAAQ,EAAE,SAAS,CAAC;QACtC,KAAK,EAAE,CAAC,QAAQ,EAAE,SAAS,CAAC;QAC5B,WAAW,EAAE,CAAC,SAAS,EAAE,iBAAiB,EAAE,aAAa,CAAC;QAC1D,WAAW,EAAE,CAAC,SAAS,EAAE,QAAQ,EAAE,UAAU,CAAC;QAC9C,OAAO,EAAE,CAAC,YAAY,EAAE,aAAa,CAAC;QACtC,OAAO,EAAE,CAAC,SAAS,EAAE,UAAU,CAAC;QAChC,MAAM,EAAE,CAAC,UAAU,EAAE,oBAAoB,CAAC;QAC1C,KAAK,EAAE,CAAC,4BAA4B,EAAE,KAAK,CAAC;QAC5C,KAAK,EAAE,CAAC,QAAQ,CAAC;QACjB,aAAa,EAAE,CAAC,OAAO,EAAE,mBAAmB,CAAC;QAC7C,sBAAsB,EAAE,CAAC,KAAK,EAAE,UAAU,CAAC;QAC3C,gBAAgB,EAAE,CAAC,SAAS,EAAE,YAAY,CAAC;KAC5C,CAAC;IAEF,IAAI,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;QACrB,UAAU,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;IACvC,CAAC;IAED,OAAO,UAAU,CAAC;AACpB,CAAC;AAED;;GAEG;AACH,SAAS,YAAY,CAAC,QAAgB;IACpC,MAAM,OAAO,GAAG,IAAI,GAAG,EAAuB,CAAC;IAE/C,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QACnD,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,IAAI,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE;gBAAE,SAAS;YAEjC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YAC/B,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;gBAAE,SAAS;YAE/B,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,kBAAkB;YACzC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACtB,MAAM,SAAS,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YAE3B,IACE,IAAI,KAAK,SAAS;gBAClB,IAAI,KAAK,EAAE;gBACX,IAAI,KAAK,SAAS;gBAClB,IAAI,KAAK,EAAE;gBAEX,SAAS;YAEX,MAAM,CAAC,WAAW,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YACtC,IAAI,WAAW,KAAK,SAAS,IAAI,WAAW,KAAK,EAAE;gBAAE,SAAS;YAE9D,MAAM,WAAW,GAAgB,EAAE,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;YAEhE,kBAAkB;YAClB,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,EAAE,EAAE,WAAW,CAAC,CAAC;YAE7C,8BAA8B;YAC9B,IACE,SAAS,KAAK,SAAS;gBACvB,SAAS,KAAK,EAAE;gBAChB,SAAS,CAAC,WAAW,EAAE,KAAK,IAAI,CAAC,WAAW,EAAE,EAC9C,CAAC;gBACD,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,WAAW,EAAE,EAAE,WAAW,CAAC,CAAC;YACpD,CAAC;QACH,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,0BAA0B;IAC5B,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,gFAAgF;AAChF,0BAA0B;AAC1B,gFAAgF;AAEhF;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,sBAAsB,CAC1C,UAII,EAAE;IAEN,4BAA4B;IAC5B,MAAM,kBAAkB,CAAC,OAAO,CAAC,CAAC;IAElC,gBAAgB;IAChB,gBAAgB,EAAE,CAAC;AACrB,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,gBAAgB;IAC9B,IAAI,YAAY,KAAK,IAAI,IAAI,YAAY,CAAC,MAAM,KAAK,IAAI,EAAE,CAAC;QAC1D,OAAO,YAAY,CAAC;IACtB,CAAC;IAED,MAAM,OAAO,GAAG,uBAAuB,EAAE,CAAC;IAE1C,IAAI,CAAC,uBAAuB,EAAE,EAAE,CAAC;QAC/B,MAAM,IAAI,KAAK,CACb,oCAAoC,OAAO,IAAI;YAC7C,gHAAgH,CACnH,CAAC;IACJ,CAAC;IAED,MAAM,KAAK,GAAG,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,cAAc,CAAC,CAAC,CAAC;IAChE,MAAM,MAAM,GAAG,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,iBAAiB,CAAC,CAAC,CAAC;IAClE,MAAM,SAAS,GAAG,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,iBAAiB,CAAC,CAAC,CAAC;IACxE,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,sBAAsB,CAAC,CAAC,CAAC;IAEzE,YAAY,GAAG;QACb,KAAK;QACL,MAAM;QACN,SAAS;QACT,OAAO;QACP,MAAM,EAAE,IAAI;KACb,CAAC;IAEF,OAAO,YAAY,CAAC;AACtB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe;IAC7B,IAAI,YAAY,KAAK,IAAI,IAAI,YAAY,CAAC,MAAM,KAAK,IAAI,EAAE,CAAC;QAC1D,OAAO,gBAAgB,EAAE,CAAC;IAC5B,CAAC;IACD,OAAO,YAAY,CAAC;AACtB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,iBAAiB;IAC/B,YAAY,GAAG,IAAI,CAAC;AACtB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,YAAY,CAC1B,IAAY,EACZ,MAAe;IAEf,MAAM,IAAI,GAAG,eAAe,EAAE,CAAC;IAC/B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC;IAEjD,IAAI,KAAK,KAAK,SAAS;QAAE,OAAO,SAAS,CAAC;IAE1C,qCAAqC;IACrC,IACE,MAAM,KAAK,SAAS;QACpB,MAAM,KAAK,EAAE;QACb,KAAK,CAAC,eAAe,KAAK,SAAS;QACnC,KAAK,CAAC,eAAe,CAAC,MAAM,CAAC,KAAK,SAAS,EAC3C,CAAC;QACD,OAAO,KAAK,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC;IACvC,CAAC;IAED,OAAO,KAAK,CAAC,MAAM,CAAC;AACtB,CAAC;AAED;;GAEG;AACH,MAAM,qBAAqB,GAAG,MAAM,CAAC;AAErC;;;GAGG;AACH,MAAM,UAAU,kBAAkB,CAChC,QAAgB;IAEhB,MAAM,IAAI,GAAG,eAAe,EAAE,CAAC;IAC/B,MAAM,UAAU,GAAG,QAAQ,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;IAEjD,iEAAiE;IACjE,MAAM,WAAW,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;IACnD,IAAI,WAAW,KAAK,SAAS,EAAE,CAAC;QAC9B,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,WAAW,EAAE,CAAC;IAC1C,CAAC;IAED,+EAA+E;IAC/E,MAAM,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;IACzC,IAAI,IAAI,IAAI,IAAI,CAAC,UAAU,IAAI,qBAAqB,EAAE,CAAC;QACrD,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,IAAI,CAAC,OAAO,EAAE,CAAC;IACrD,CAAC;IAED,gBAAgB;IAChB,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;IAC5C,IAAI,MAAM,EAAE,CAAC;QACX,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,MAAM,CAAC,OAAO,EAAE,CAAC;IACzD,CAAC;IAED,0CAA0C;IAC1C,IAAI,IAAI,EAAE,CAAC;QACT,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,IAAI,CAAC,OAAO,EAAE,CAAC;IACrD,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,YAAY;IAO1B,IAAI,YAAY,KAAK,IAAI,IAAI,YAAY,CAAC,MAAM,KAAK,IAAI,EAAE,CAAC;QAC1D,OAAO,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC;IAC1E,CAAC;IAED,OAAO;QACL,KAAK,EAAE,YAAY,CAAC,KAAK,CAAC,IAAI;QAC9B,MAAM,EAAE,YAAY,CAAC,MAAM,CAAC,IAAI;QAChC,SAAS,EAAE,YAAY,CAAC,SAAS,CAAC,IAAI;QACtC,OAAO,EAAE,YAAY,CAAC,OAAO,CAAC,IAAI;QAClC,MAAM,EAAE,IAAI;KACb,CAAC;AACJ,CAAC"}
@@ -0,0 +1,102 @@
1
+ /**
2
+ * Semantic Enricher
3
+ * Enriches PII spans with semantic attributes (gender, location scope)
4
+ * for MT-friendly tags that preserve grammatical context.
5
+ *
6
+ * This module uses data from the GeoNames and gender-guesser projects.
7
+ * Data is automatically downloaded when using:
8
+ * createAnonymizer({ semantic: { enabled: true, autoDownload: true } })
9
+ */
10
+ import { SpanMatch, PersonGender, LocationScope } from "../types/index.js";
11
+ import { isSemanticDataAvailable, getDataDirectory } from "./semantic-data-loader.js";
12
+ export { isSemanticDataAvailable, getDataDirectory };
13
+ /**
14
+ * Configuration for semantic enrichment
15
+ */
16
+ export interface EnricherConfig {
17
+ /** Locale hint for name gender disambiguation (e.g., 'de', 'it', 'fr') */
18
+ locale?: string;
19
+ /** Minimum confidence to apply semantic attributes (default: 0.0) */
20
+ minConfidence?: number;
21
+ /** Whether to mark low-confidence results as 'unknown' */
22
+ strictMode?: boolean;
23
+ }
24
+ /**
25
+ * Result of gender inference with confidence
26
+ */
27
+ export interface GenderResult {
28
+ gender: PersonGender;
29
+ confidence: number;
30
+ source: "database" | "inference" | "unknown";
31
+ }
32
+ /**
33
+ * Result of location classification with confidence
34
+ */
35
+ export interface LocationResult {
36
+ scope: LocationScope;
37
+ confidence: number;
38
+ countryCode?: string;
39
+ }
40
+ /**
41
+ * Enriches PII spans with semantic attributes based on lookup tables
42
+ *
43
+ * @param spans - Array of detected PII spans
44
+ * @param config - Optional configuration for enrichment
45
+ * @returns Array of spans with semantic attributes added
46
+ *
47
+ * @example
48
+ * ```typescript
49
+ * const enrichedSpans = enrichSemantics(spans, { locale: 'de' });
50
+ * // "Mary" -> { gender: 'female' }
51
+ * // "Berlin" -> { scope: 'city' }
52
+ * ```
53
+ */
54
+ export declare function enrichSemantics(spans: SpanMatch[], config?: EnricherConfig): SpanMatch[];
55
+ /**
56
+ * Infers gender from a person's name using the lookup database
57
+ *
58
+ * @param name - Full name or first name
59
+ * @param locale - Optional locale for disambiguation (e.g., 'de', 'it')
60
+ * @returns Gender result with confidence
61
+ *
62
+ * @example
63
+ * ```typescript
64
+ * inferGender('Mary Smith'); // { gender: 'female', confidence: 1.0 }
65
+ * inferGender('Andrea', 'it'); // { gender: 'male', confidence: 1.0 }
66
+ * inferGender('Andrea', 'en'); // { gender: 'female', confidence: 1.0 }
67
+ * ```
68
+ */
69
+ export declare function inferGender(name: string, locale?: string): GenderResult;
70
+ /**
71
+ * Classifies a location by its geographic scope
72
+ *
73
+ * @param location - Location name
74
+ * @returns Classification result with confidence
75
+ *
76
+ * @example
77
+ * ```typescript
78
+ * classifyLocation('Berlin'); // { scope: 'city', confidence: 1.0 }
79
+ * classifyLocation('Germany'); // { scope: 'country', confidence: 1.0 }
80
+ * classifyLocation('Bavaria'); // { scope: 'region', confidence: 1.0 }
81
+ * ```
82
+ */
83
+ export declare function classifyLocation(location: string): LocationResult;
84
+ /**
85
+ * Gets statistics about the lookup databases
86
+ */
87
+ export declare function getDatabaseStats(): {
88
+ names: number;
89
+ cities: number;
90
+ countries: number;
91
+ regions: number;
92
+ loaded: boolean;
93
+ };
94
+ /**
95
+ * Checks if a name exists in the database
96
+ */
97
+ export declare function hasName(name: string): boolean;
98
+ /**
99
+ * Checks if a location exists in the database
100
+ */
101
+ export declare function hasLocation(location: string): boolean;
102
+ //# sourceMappingURL=semantic-enricher.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"semantic-enricher.d.ts","sourceRoot":"","sources":["../../src/pipeline/semantic-enricher.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EACL,SAAS,EAET,YAAY,EACZ,aAAa,EACd,MAAM,mBAAmB,CAAC;AAE3B,OAAO,EACL,uBAAuB,EAKvB,gBAAgB,EACjB,MAAM,2BAA2B,CAAC;AAGnC,OAAO,EAAE,uBAAuB,EAAE,gBAAgB,EAAE,CAAC;AAErD;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,0EAA0E;IAC1E,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,qEAAqE;IACrE,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,0DAA0D;IAC1D,UAAU,CAAC,EAAE,OAAO,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,MAAM,EAAE,YAAY,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,UAAU,GAAG,WAAW,GAAG,SAAS,CAAC;CAC9C;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,KAAK,EAAE,aAAa,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAuBD;;;;;;;;;;;;;GAaG;AACH,wBAAgB,eAAe,CAC7B,KAAK,EAAE,SAAS,EAAE,EAClB,MAAM,CAAC,EAAE,cAAc,GACtB,SAAS,EAAE,CAoBb;AAgCD;;;;;;;;;;;;;GAaG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,MAAM,GAAG,YAAY,CA2BvE;AAED;;;;;;;;;;;;GAYG;AACH,wBAAgB,gBAAgB,CAAC,QAAQ,EAAE,MAAM,GAAG,cAAc,CAmCjE;AA+ED;;GAEG;AACH,wBAAgB,gBAAgB,IAAI;IAClC,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,OAAO,CAAC;CACjB,CAEA;AAED;;GAEG;AACH,wBAAgB,OAAO,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAa7C;AAED;;GAEG;AACH,wBAAgB,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAWrD"}
@@ -0,0 +1,268 @@
1
+ /**
2
+ * Semantic Enricher
3
+ * Enriches PII spans with semantic attributes (gender, location scope)
4
+ * for MT-friendly tags that preserve grammatical context.
5
+ *
6
+ * This module uses data from the GeoNames and gender-guesser projects.
7
+ * Data is automatically downloaded when using:
8
+ * createAnonymizer({ semantic: { enabled: true, autoDownload: true } })
9
+ */
10
+ import { PIIType, } from "../types/index.js";
11
+ import { isSemanticDataAvailable, loadSemanticData, lookupGender, lookupLocationType, getDataStats, getDataDirectory, } from "./semantic-data-loader.js";
12
+ // Re-export data availability check and other exports from data loader
13
+ export { isSemanticDataAvailable, getDataDirectory };
14
+ // Track if data has been initialized
15
+ let dataInitialized = false;
16
+ /**
17
+ * Ensures semantic data is loaded synchronously (assumes files are already downloaded)
18
+ * @throws Error if data files are not available
19
+ */
20
+ function ensureDataLoaded() {
21
+ if (dataInitialized)
22
+ return;
23
+ if (!isSemanticDataAvailable()) {
24
+ throw new Error(`Semantic enrichment data not available. ` +
25
+ `Use ensureSemanticData() or createAnonymizer({ semantic: { enabled: true } }) to download.`);
26
+ }
27
+ loadSemanticData();
28
+ dataInitialized = true;
29
+ }
30
+ /**
31
+ * Enriches PII spans with semantic attributes based on lookup tables
32
+ *
33
+ * @param spans - Array of detected PII spans
34
+ * @param config - Optional configuration for enrichment
35
+ * @returns Array of spans with semantic attributes added
36
+ *
37
+ * @example
38
+ * ```typescript
39
+ * const enrichedSpans = enrichSemantics(spans, { locale: 'de' });
40
+ * // "Mary" -> { gender: 'female' }
41
+ * // "Berlin" -> { scope: 'city' }
42
+ * ```
43
+ */
44
+ export function enrichSemantics(spans, config) {
45
+ // Ensure data is loaded
46
+ if (!dataInitialized) {
47
+ if (!isSemanticDataAvailable()) {
48
+ // Silently skip enrichment if data not available
49
+ return spans;
50
+ }
51
+ ensureDataLoaded();
52
+ }
53
+ return spans.map((span) => {
54
+ switch (span.type) {
55
+ case PIIType.PERSON:
56
+ return enrichPerson(span, config?.locale);
57
+ case PIIType.LOCATION:
58
+ return enrichLocation(span);
59
+ default:
60
+ return span;
61
+ }
62
+ });
63
+ }
64
+ /**
65
+ * Enriches a PERSON span with gender attribute
66
+ */
67
+ function enrichPerson(span, locale) {
68
+ const result = inferGender(span.text, locale);
69
+ return {
70
+ ...span,
71
+ semantic: {
72
+ ...span.semantic,
73
+ gender: result.gender,
74
+ },
75
+ };
76
+ }
77
+ /**
78
+ * Enriches a LOCATION span with scope attribute
79
+ */
80
+ function enrichLocation(span) {
81
+ const result = classifyLocation(span.text);
82
+ return {
83
+ ...span,
84
+ semantic: {
85
+ ...span.semantic,
86
+ scope: result.scope,
87
+ },
88
+ };
89
+ }
90
+ /**
91
+ * Infers gender from a person's name using the lookup database
92
+ *
93
+ * @param name - Full name or first name
94
+ * @param locale - Optional locale for disambiguation (e.g., 'de', 'it')
95
+ * @returns Gender result with confidence
96
+ *
97
+ * @example
98
+ * ```typescript
99
+ * inferGender('Mary Smith'); // { gender: 'female', confidence: 1.0 }
100
+ * inferGender('Andrea', 'it'); // { gender: 'male', confidence: 1.0 }
101
+ * inferGender('Andrea', 'en'); // { gender: 'female', confidence: 1.0 }
102
+ * ```
103
+ */
104
+ export function inferGender(name, locale) {
105
+ // Extract first name (handles "John Smith" -> "John")
106
+ const firstName = extractFirstName(name);
107
+ if (firstName === null || firstName === "") {
108
+ return { gender: "unknown", confidence: 0, source: "unknown" };
109
+ }
110
+ // Check if data is available
111
+ if (!dataInitialized && !isSemanticDataAvailable()) {
112
+ return { gender: "unknown", confidence: 0, source: "unknown" };
113
+ }
114
+ if (!dataInitialized) {
115
+ ensureDataLoaded();
116
+ }
117
+ const gender = lookupGender(firstName, locale);
118
+ if (gender === undefined || gender === "") {
119
+ return { gender: "unknown", confidence: 0, source: "unknown" };
120
+ }
121
+ return {
122
+ gender: gender,
123
+ confidence: 1.0,
124
+ source: "database",
125
+ };
126
+ }
127
+ /**
128
+ * Classifies a location by its geographic scope
129
+ *
130
+ * @param location - Location name
131
+ * @returns Classification result with confidence
132
+ *
133
+ * @example
134
+ * ```typescript
135
+ * classifyLocation('Berlin'); // { scope: 'city', confidence: 1.0 }
136
+ * classifyLocation('Germany'); // { scope: 'country', confidence: 1.0 }
137
+ * classifyLocation('Bavaria'); // { scope: 'region', confidence: 1.0 }
138
+ * ```
139
+ */
140
+ export function classifyLocation(location) {
141
+ // Check if data is available
142
+ if (!dataInitialized && !isSemanticDataAvailable()) {
143
+ return { scope: "unknown", confidence: 0 };
144
+ }
145
+ if (!dataInitialized) {
146
+ ensureDataLoaded();
147
+ }
148
+ const normalized = normalizeLocationName(location);
149
+ const result = lookupLocationType(normalized);
150
+ if (!result) {
151
+ // Try variations
152
+ const variations = generateLocationVariations(location);
153
+ for (const variant of variations) {
154
+ const variantResult = lookupLocationType(variant);
155
+ if (variantResult) {
156
+ return {
157
+ scope: variantResult.type,
158
+ confidence: 0.9,
159
+ countryCode: variantResult.countryCode,
160
+ };
161
+ }
162
+ }
163
+ return { scope: "unknown", confidence: 0 };
164
+ }
165
+ return {
166
+ scope: result.type,
167
+ confidence: 1.0,
168
+ countryCode: result.countryCode,
169
+ };
170
+ }
171
+ /**
172
+ * Extracts the first name from a full name
173
+ */
174
+ function extractFirstName(fullName) {
175
+ const trimmed = fullName.trim();
176
+ if (!trimmed)
177
+ return null;
178
+ // Handle common prefixes (Dr., Mr., Mrs., etc.)
179
+ const withoutPrefix = trimmed.replace(/^(dr\.?|mr\.?|mrs\.?|ms\.?|prof\.?|rev\.?|sir|dame|lord|lady)\s+/i, "");
180
+ // Split and get first word
181
+ const parts = withoutPrefix.split(/\s+/);
182
+ return parts[0] ?? null;
183
+ }
184
+ /**
185
+ * Normalizes a location name for lookup
186
+ */
187
+ function normalizeLocationName(location) {
188
+ return (location
189
+ .toLowerCase()
190
+ .trim()
191
+ // Remove common suffixes
192
+ .replace(/\s+(city|town|village|state|province|region|county)$/i, "")
193
+ // Normalize whitespace
194
+ .replace(/\s+/g, " "));
195
+ }
196
+ /**
197
+ * Generates variations of a location name for fuzzy matching
198
+ */
199
+ function generateLocationVariations(location) {
200
+ const normalized = normalizeLocationName(location);
201
+ const variations = [];
202
+ // Try without "the"
203
+ if (normalized.startsWith("the ")) {
204
+ variations.push(normalized.slice(4));
205
+ }
206
+ // Try without common articles in other languages
207
+ const articlePatterns = [
208
+ /^(la|le|les|el|los|las|il|lo|gli|i|die|der|das|de|het)\s+/i,
209
+ ];
210
+ for (const pattern of articlePatterns) {
211
+ const withoutArticle = normalized.replace(pattern, "");
212
+ if (withoutArticle !== normalized) {
213
+ variations.push(withoutArticle);
214
+ }
215
+ }
216
+ // Try ASCII transliteration for common diacritics
217
+ const asciiVersion = normalized
218
+ .replace(/[àáâãäå]/g, "a")
219
+ .replace(/[èéêë]/g, "e")
220
+ .replace(/[ìíîï]/g, "i")
221
+ .replace(/[òóôõö]/g, "o")
222
+ .replace(/[ùúûü]/g, "u")
223
+ .replace(/[ñ]/g, "n")
224
+ .replace(/[ç]/g, "c")
225
+ .replace(/[ß]/g, "ss")
226
+ .replace(/[æ]/g, "ae")
227
+ .replace(/[ø]/g, "o")
228
+ .replace(/[œ]/g, "oe");
229
+ if (asciiVersion !== normalized) {
230
+ variations.push(asciiVersion);
231
+ }
232
+ return variations;
233
+ }
234
+ /**
235
+ * Gets statistics about the lookup databases
236
+ */
237
+ export function getDatabaseStats() {
238
+ return getDataStats();
239
+ }
240
+ /**
241
+ * Checks if a name exists in the database
242
+ */
243
+ export function hasName(name) {
244
+ if (!dataInitialized && !isSemanticDataAvailable()) {
245
+ return false;
246
+ }
247
+ if (!dataInitialized) {
248
+ ensureDataLoaded();
249
+ }
250
+ const firstName = extractFirstName(name);
251
+ if (firstName === null || firstName === "")
252
+ return false;
253
+ return lookupGender(firstName) !== undefined;
254
+ }
255
+ /**
256
+ * Checks if a location exists in the database
257
+ */
258
+ export function hasLocation(location) {
259
+ if (!dataInitialized && !isSemanticDataAvailable()) {
260
+ return false;
261
+ }
262
+ if (!dataInitialized) {
263
+ ensureDataLoaded();
264
+ }
265
+ const normalized = normalizeLocationName(location);
266
+ return lookupLocationType(normalized) !== undefined;
267
+ }
268
+ //# sourceMappingURL=semantic-enricher.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"semantic-enricher.js","sourceRoot":"","sources":["../../src/pipeline/semantic-enricher.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EAEL,OAAO,GAGR,MAAM,mBAAmB,CAAC;AAE3B,OAAO,EACL,uBAAuB,EACvB,gBAAgB,EAChB,YAAY,EACZ,kBAAkB,EAClB,YAAY,EACZ,gBAAgB,GACjB,MAAM,2BAA2B,CAAC;AAEnC,uEAAuE;AACvE,OAAO,EAAE,uBAAuB,EAAE,gBAAgB,EAAE,CAAC;AAgCrD,qCAAqC;AACrC,IAAI,eAAe,GAAG,KAAK,CAAC;AAE5B;;;GAGG;AACH,SAAS,gBAAgB;IACvB,IAAI,eAAe;QAAE,OAAO;IAE5B,IAAI,CAAC,uBAAuB,EAAE,EAAE,CAAC;QAC/B,MAAM,IAAI,KAAK,CACb,0CAA0C;YACxC,4FAA4F,CAC/F,CAAC;IACJ,CAAC;IAED,gBAAgB,EAAE,CAAC;IACnB,eAAe,GAAG,IAAI,CAAC;AACzB,CAAC;AAED;;;;;;;;;;;;;GAaG;AACH,MAAM,UAAU,eAAe,CAC7B,KAAkB,EAClB,MAAuB;IAEvB,wBAAwB;IACxB,IAAI,CAAC,eAAe,EAAE,CAAC;QACrB,IAAI,CAAC,uBAAuB,EAAE,EAAE,CAAC;YAC/B,iDAAiD;YACjD,OAAO,KAAK,CAAC;QACf,CAAC;QACD,gBAAgB,EAAE,CAAC;IACrB,CAAC;IAED,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACxB,QAAQ,IAAI,CAAC,IAAI,EAAE,CAAC;YAClB,KAAK,OAAO,CAAC,MAAM;gBACjB,OAAO,YAAY,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;YAC5C,KAAK,OAAO,CAAC,QAAQ;gBACnB,OAAO,cAAc,CAAC,IAAI,CAAC,CAAC;YAC9B;gBACE,OAAO,IAAI,CAAC;QAChB,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;GAEG;AACH,SAAS,YAAY,CAAC,IAAe,EAAE,MAAe;IACpD,MAAM,MAAM,GAAG,WAAW,CAAC,IAAI,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IAE9C,OAAO;QACL,GAAG,IAAI;QACP,QAAQ,EAAE;YACR,GAAG,IAAI,CAAC,QAAQ;YAChB,MAAM,EAAE,MAAM,CAAC,MAAM;SACtB;KACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CAAC,IAAe;IACrC,MAAM,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE3C,OAAO;QACL,GAAG,IAAI;QACP,QAAQ,EAAE;YACR,GAAG,IAAI,CAAC,QAAQ;YAChB,KAAK,EAAE,MAAM,CAAC,KAAK;SACpB;KACF,CAAC;AACJ,CAAC;AAED;;;;;;;;;;;;;GAaG;AACH,MAAM,UAAU,WAAW,CAAC,IAAY,EAAE,MAAe;IACvD,sDAAsD;IACtD,MAAM,SAAS,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC;IACzC,IAAI,SAAS,KAAK,IAAI,IAAI,SAAS,KAAK,EAAE,EAAE,CAAC;QAC3C,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;IACjE,CAAC;IAED,6BAA6B;IAC7B,IAAI,CAAC,eAAe,IAAI,CAAC,uBAAuB,EAAE,EAAE,CAAC;QACnD,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;IACjE,CAAC;IAED,IAAI,CAAC,eAAe,EAAE,CAAC;QACrB,gBAAgB,EAAE,CAAC;IACrB,CAAC;IAED,MAAM,MAAM,GAAG,YAAY,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAE/C,IAAI,MAAM,KAAK,SAAS,IAAI,MAAM,KAAK,EAAE,EAAE,CAAC;QAC1C,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;IACjE,CAAC;IAED,OAAO;QACL,MAAM,EAAE,MAAsB;QAC9B,UAAU,EAAE,GAAG;QACf,MAAM,EAAE,UAAU;KACnB,CAAC;AACJ,CAAC;AAED;;;;;;;;;;;;GAYG;AACH,MAAM,UAAU,gBAAgB,CAAC,QAAgB;IAC/C,6BAA6B;IAC7B,IAAI,CAAC,eAAe,IAAI,CAAC,uBAAuB,EAAE,EAAE,CAAC;QACnD,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,CAAC;IAC7C,CAAC;IAED,IAAI,CAAC,eAAe,EAAE,CAAC;QACrB,gBAAgB,EAAE,CAAC;IACrB,CAAC;IAED,MAAM,UAAU,GAAG,qBAAqB,CAAC,QAAQ,CAAC,CAAC;IACnD,MAAM,MAAM,GAAG,kBAAkB,CAAC,UAAU,CAAC,CAAC;IAE9C,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,iBAAiB;QACjB,MAAM,UAAU,GAAG,0BAA0B,CAAC,QAAQ,CAAC,CAAC;QACxD,KAAK,MAAM,OAAO,IAAI,UAAU,EAAE,CAAC;YACjC,MAAM,aAAa,GAAG,kBAAkB,CAAC,OAAO,CAAC,CAAC;YAClD,IAAI,aAAa,EAAE,CAAC;gBAClB,OAAO;oBACL,KAAK,EAAE,aAAa,CAAC,IAAqB;oBAC1C,UAAU,EAAE,GAAG;oBACf,WAAW,EAAE,aAAa,CAAC,WAAW;iBACvC,CAAC;YACJ,CAAC;QACH,CAAC;QAED,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,CAAC;IAC7C,CAAC;IAED,OAAO;QACL,KAAK,EAAE,MAAM,CAAC,IAAqB;QACnC,UAAU,EAAE,GAAG;QACf,WAAW,EAAE,MAAM,CAAC,WAAW;KAChC,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CAAC,QAAgB;IACxC,MAAM,OAAO,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC;IAChC,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAE1B,gDAAgD;IAChD,MAAM,aAAa,GAAG,OAAO,CAAC,OAAO,CACnC,mEAAmE,EACnE,EAAE,CACH,CAAC;IAEF,2BAA2B;IAC3B,MAAM,KAAK,GAAG,aAAa,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACzC,OAAO,KAAK,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;AAC1B,CAAC;AAED;;GAEG;AACH,SAAS,qBAAqB,CAAC,QAAgB;IAC7C,OAAO,CACL,QAAQ;SACL,WAAW,EAAE;SACb,IAAI,EAAE;QACP,yBAAyB;SACxB,OAAO,CAAC,uDAAuD,EAAE,EAAE,CAAC;QACrE,uBAAuB;SACtB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CACxB,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,0BAA0B,CAAC,QAAgB;IAClD,MAAM,UAAU,GAAG,qBAAqB,CAAC,QAAQ,CAAC,CAAC;IACnD,MAAM,UAAU,GAAa,EAAE,CAAC;IAEhC,oBAAoB;IACpB,IAAI,UAAU,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC;QAClC,UAAU,CAAC,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACvC,CAAC;IAED,iDAAiD;IACjD,MAAM,eAAe,GAAG;QACtB,4DAA4D;KAC7D,CAAC;IACF,KAAK,MAAM,OAAO,IAAI,eAAe,EAAE,CAAC;QACtC,MAAM,cAAc,GAAG,UAAU,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;QACvD,IAAI,cAAc,KAAK,UAAU,EAAE,CAAC;YAClC,UAAU,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;IAED,kDAAkD;IAClD,MAAM,YAAY,GAAG,UAAU;SAC5B,OAAO,CAAC,WAAW,EAAE,GAAG,CAAC;SACzB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;SACxB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,OAAO,CAAC,MAAM,EAAE,IAAI,CAAC;SACrB,OAAO,CAAC,MAAM,EAAE,IAAI,CAAC;SACrB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,OAAO,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;IAEzB,IAAI,YAAY,KAAK,UAAU,EAAE,CAAC;QAChC,UAAU,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IAChC,CAAC;IAED,OAAO,UAAU,CAAC;AACpB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB;IAO9B,OAAO,YAAY,EAAE,CAAC;AACxB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,OAAO,CAAC,IAAY;IAClC,IAAI,CAAC,eAAe,IAAI,CAAC,uBAAuB,EAAE,EAAE,CAAC;QACnD,OAAO,KAAK,CAAC;IACf,CAAC;IAED,IAAI,CAAC,eAAe,EAAE,CAAC;QACrB,gBAAgB,EAAE,CAAC;IACrB,CAAC;IAED,MAAM,SAAS,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC;IACzC,IAAI,SAAS,KAAK,IAAI,IAAI,SAAS,KAAK,EAAE;QAAE,OAAO,KAAK,CAAC;IAEzD,OAAO,YAAY,CAAC,SAAS,CAAC,KAAK,SAAS,CAAC;AAC/C,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,WAAW,CAAC,QAAgB;IAC1C,IAAI,CAAC,eAAe,IAAI,CAAC,uBAAuB,EAAE,EAAE,CAAC;QACnD,OAAO,KAAK,CAAC;IACf,CAAC;IAED,IAAI,CAAC,eAAe,EAAE,CAAC;QACrB,gBAAgB,EAAE,CAAC;IACrB,CAAC;IAED,MAAM,UAAU,GAAG,qBAAqB,CAAC,QAAQ,CAAC,CAAC;IACnD,OAAO,kBAAkB,CAAC,UAAU,CAAC,KAAK,SAAS,CAAC;AACtD,CAAC"}
@@ -2,7 +2,7 @@
2
2
  * Replacement Tagger
3
3
  * Replaces PII spans with placeholder tags and builds the PII map
4
4
  */
5
- import { PIIType, SpanMatch, DetectedEntity, AnonymizationPolicy } from '../types/index.js';
5
+ import { PIIType, SpanMatch, DetectedEntity, AnonymizationPolicy, SemanticAttributes } from "../types/index.js";
6
6
  /**
7
7
  * PII Map entry (before encryption)
8
8
  */
@@ -31,17 +31,30 @@ export interface TaggingResult {
31
31
  }
32
32
  /**
33
33
  * Generates a PII placeholder tag
34
- * Format: <PII type="TYPE" id="N"/>
34
+ * Format: <PII type="TYPE" id="N"/> or <PII type="TYPE" gender="X" id="N"/> etc.
35
+ *
36
+ * Semantic attributes (gender, scope) are included when provided and not 'unknown'
35
37
  */
36
- export declare function generateTag(type: PIIType, id: number): string;
38
+ export declare function generateTag(type: PIIType, id: number, semantic?: SemanticAttributes): string;
37
39
  /**
38
- * Parses a PII tag to extract type and id
39
- * Returns null if not a valid tag
40
+ * Result of parsing a PII tag
40
41
  */
41
- export declare function parseTag(tag: string): {
42
+ export interface ParsedTag {
42
43
  type: PIIType;
43
44
  id: number;
44
- } | null;
45
+ semantic?: SemanticAttributes;
46
+ }
47
+ /**
48
+ * Parses a PII tag to extract type, id, and semantic attributes
49
+ * Returns null if not a valid tag
50
+ *
51
+ * Supports formats:
52
+ * - <PII type="TYPE" id="N"/>
53
+ * - <PII type="TYPE" gender="X" id="N"/>
54
+ * - <PII type="TYPE" scope="X" id="N"/>
55
+ * - <PII type="TYPE" gender="X" scope="Y" id="N"/>
56
+ */
57
+ export declare function parseTag(tag: string): ParsedTag | null;
45
58
  /**
46
59
  * Creates a key for the PII map
47
60
  */
@@ -55,20 +68,47 @@ export declare function tagEntities(text: string, matches: SpanMatch[], policy:
55
68
  */
56
69
  export declare function isValidTag(tag: string): boolean;
57
70
  /**
58
- * Extracts all PII tags from anonymized text
71
+ * Tag extraction result with the matched text for accurate replacement
59
72
  */
60
- export declare function extractTags(anonymizedText: string): Array<{
73
+ export interface ExtractedTag {
61
74
  type: PIIType;
62
75
  id: number;
63
76
  position: number;
64
- }>;
77
+ /** The actual matched text (needed for replacement when tag is mangled) */
78
+ matchedText: string;
79
+ /** Semantic attributes extracted from the tag */
80
+ semantic?: SemanticAttributes;
81
+ }
82
+ /**
83
+ * Extracts all PII tags from anonymized text using fuzzy matching
84
+ * Handles mangled tags that may occur after translation
85
+ *
86
+ * Translation can mangle tags by:
87
+ * - Changing quote types (" → " or „ or « etc.)
88
+ * - Adding/removing whitespace
89
+ * - Changing case (type → Type, PII → pii)
90
+ * - Reordering attributes (id before type)
91
+ * - Modifying self-closing syntax (/> → / > or >)
92
+ */
93
+ export declare function extractTags(anonymizedText: string): ExtractedTag[];
94
+ /**
95
+ * Extracts tags using strict matching (original behavior)
96
+ * Useful when you know tags haven't been mangled
97
+ * Supports optional gender and scope attributes
98
+ */
99
+ export declare function extractTagsStrict(anonymizedText: string): ExtractedTag[];
65
100
  /**
66
101
  * Counts entities by type
67
102
  */
68
103
  export declare function countEntitiesByType(entities: DetectedEntity[]): Record<PIIType, number>;
69
104
  /**
70
105
  * Rehydrates anonymized text using the PII map
71
- * (For testing/debugging only - not part of the anonymization pipeline)
106
+ * Uses fuzzy matching to handle tags that may have been mangled by translation
107
+ *
108
+ * @param anonymizedText - Text containing PII tags (possibly mangled)
109
+ * @param piiMap - Map of PII keys to original values
110
+ * @param strict - If true, use strict matching (original behavior). Default: false
111
+ * @returns Text with PII tags replaced by original values
72
112
  */
73
- export declare function rehydrate(anonymizedText: string, piiMap: RawPIIMap): string;
113
+ export declare function rehydrate(anonymizedText: string, piiMap: RawPIIMap, strict?: boolean): string;
74
114
  //# sourceMappingURL=tagger.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"tagger.d.ts","sourceRoot":"","sources":["../../src/pipeline/tagger.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EACL,OAAO,EACP,SAAS,EACT,cAAc,EAEd,mBAAmB,EACpB,MAAM,mBAAmB,CAAC;AAG3B;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,eAAe;IACf,IAAI,EAAE,OAAO,CAAC;IACd,gBAAgB;IAChB,EAAE,EAAE,MAAM,CAAC;IACX,oBAAoB;IACpB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,MAAM,SAAS,GAAG,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;AAE5C;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,4CAA4C;IAC5C,cAAc,EAAE,MAAM,CAAC;IACvB,kDAAkD;IAClD,QAAQ,EAAE,cAAc,EAAE,CAAC;IAC3B,wCAAwC;IACxC,MAAM,EAAE,SAAS,CAAC;CACnB;AAED;;;GAGG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,OAAO,EAAE,EAAE,EAAE,MAAM,GAAG,MAAM,CAE7D;AAED;;;GAGG;AACH,wBAAgB,QAAQ,CAAC,GAAG,EAAE,MAAM,GAAG;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,EAAE,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,CAoB1E;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,IAAI,EAAE,OAAO,EAAE,EAAE,EAAE,MAAM,GAAG,MAAM,CAEjE;AAED;;GAEG;AACH,wBAAgB,WAAW,CACzB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,SAAS,EAAE,EACpB,MAAM,EAAE,mBAAmB,GAC1B,aAAa,CAyEf;AAED;;GAEG;AACH,wBAAgB,UAAU,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAE/C;AAED;;GAEG;AACH,wBAAgB,WAAW,CAAC,cAAc,EAAE,MAAM,GAAG,KAAK,CAAC;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,EAAE,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAA;CAAE,CAAC,CAoB1G;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,cAAc,EAAE,GAAG,MAAM,CAAC,OAAO,EAAE,MAAM,CAAC,CAcvF;AAED;;;GAGG;AACH,wBAAgB,SAAS,CAAC,cAAc,EAAE,MAAM,EAAE,MAAM,EAAE,SAAS,GAAG,MAAM,CAkB3E"}
1
+ {"version":3,"file":"tagger.d.ts","sourceRoot":"","sources":["../../src/pipeline/tagger.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EACL,OAAO,EACP,SAAS,EACT,cAAc,EACd,mBAAmB,EACnB,kBAAkB,EACnB,MAAM,mBAAmB,CAAC;AAG3B;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,eAAe;IACf,IAAI,EAAE,OAAO,CAAC;IACd,gBAAgB;IAChB,EAAE,EAAE,MAAM,CAAC;IACX,oBAAoB;IACpB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,MAAM,SAAS,GAAG,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;AAE5C;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,4CAA4C;IAC5C,cAAc,EAAE,MAAM,CAAC;IACvB,kDAAkD;IAClD,QAAQ,EAAE,cAAc,EAAE,CAAC;IAC3B,wCAAwC;IACxC,MAAM,EAAE,SAAS,CAAC;CACnB;AAED;;;;;GAKG;AACH,wBAAgB,WAAW,CACzB,IAAI,EAAE,OAAO,EACb,EAAE,EAAE,MAAM,EACV,QAAQ,CAAC,EAAE,kBAAkB,GAC5B,MAAM,CAcR;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,OAAO,CAAC;IACd,EAAE,EAAE,MAAM,CAAC;IACX,QAAQ,CAAC,EAAE,kBAAkB,CAAC;CAC/B;AAED;;;;;;;;;GASG;AACH,wBAAgB,QAAQ,CAAC,GAAG,EAAE,MAAM,GAAG,SAAS,GAAG,IAAI,CA+CtD;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,IAAI,EAAE,OAAO,EAAE,EAAE,EAAE,MAAM,GAAG,MAAM,CAEjE;AAED;;GAEG;AACH,wBAAgB,WAAW,CACzB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,SAAS,EAAE,EACpB,MAAM,EAAE,mBAAmB,GAC1B,aAAa,CA8Ef;AAED;;GAEG;AACH,wBAAgB,UAAU,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAE/C;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,OAAO,CAAC;IACd,EAAE,EAAE,MAAM,CAAC;IACX,QAAQ,EAAE,MAAM,CAAC;IACjB,2EAA2E;IAC3E,WAAW,EAAE,MAAM,CAAC;IACpB,iDAAiD;IACjD,QAAQ,CAAC,EAAE,kBAAkB,CAAC;CAC/B;AA4DD;;;;;;;;;;GAUG;AACH,wBAAgB,WAAW,CAAC,cAAc,EAAE,MAAM,GAAG,YAAY,EAAE,CAyFlE;AAED;;;;GAIG;AACH,wBAAgB,iBAAiB,CAAC,cAAc,EAAE,MAAM,GAAG,YAAY,EAAE,CAqDxE;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CACjC,QAAQ,EAAE,cAAc,EAAE,GACzB,MAAM,CAAC,OAAO,EAAE,MAAM,CAAC,CAczB;AAED;;;;;;;;GAQG;AACH,wBAAgB,SAAS,CACvB,cAAc,EAAE,MAAM,EACtB,MAAM,EAAE,SAAS,EACjB,MAAM,GAAE,OAAe,GACtB,MAAM,CAyBR"}