@mailwoman/neural 2.1.0 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/out/anchor-inference.d.ts +57 -0
- package/out/anchor-inference.d.ts.map +1 -0
- package/out/anchor-inference.js +94 -0
- package/out/anchor-inference.js.map +1 -0
- package/out/browser.d.ts +18 -0
- package/out/browser.d.ts.map +1 -0
- package/out/browser.js +19 -0
- package/out/browser.js.map +1 -0
- package/out/classifier.d.ts +145 -11
- package/out/classifier.d.ts.map +1 -1
- package/out/classifier.js +185 -20
- package/out/classifier.js.map +1 -1
- package/out/fst-prior.d.ts +71 -0
- package/out/fst-prior.d.ts.map +1 -0
- package/out/fst-prior.js +173 -0
- package/out/fst-prior.js.map +1 -0
- package/out/index.d.ts +7 -0
- package/out/index.d.ts.map +1 -1
- package/out/index.js +5 -0
- package/out/index.js.map +1 -1
- package/out/labels.d.ts +30 -6
- package/out/labels.d.ts.map +1 -1
- package/out/labels.js +43 -6
- package/out/labels.js.map +1 -1
- package/out/onnx-runner.d.ts +8 -1
- package/out/onnx-runner.d.ts.map +1 -1
- package/out/onnx-runner.js +31 -1
- package/out/onnx-runner.js.map +1 -1
- package/out/postcode-anchor.d.ts +117 -0
- package/out/postcode-anchor.d.ts.map +1 -0
- package/out/postcode-anchor.js +269 -0
- package/out/postcode-anchor.js.map +1 -0
- package/out/postcode-binary-resolver.d.ts +60 -0
- package/out/postcode-binary-resolver.d.ts.map +1 -0
- package/out/postcode-binary-resolver.js +208 -0
- package/out/postcode-binary-resolver.js.map +1 -0
- package/out/postcode-repair.d.ts +65 -0
- package/out/postcode-repair.d.ts.map +1 -0
- package/out/postcode-repair.js +171 -0
- package/out/postcode-repair.js.map +1 -0
- package/out/proposal-classifier.d.ts +5 -1
- package/out/proposal-classifier.d.ts.map +1 -1
- package/out/proposal-classifier.js +5 -3
- package/out/proposal-classifier.js.map +1 -1
- package/out/query-shape-prior.d.ts +74 -0
- package/out/query-shape-prior.d.ts.map +1 -0
- package/out/query-shape-prior.js +223 -0
- package/out/query-shape-prior.js.map +1 -0
- package/out/street-morphology-prior.d.ts +56 -0
- package/out/street-morphology-prior.d.ts.map +1 -0
- package/out/street-morphology-prior.js +159 -0
- package/out/street-morphology-prior.js.map +1 -0
- package/out/tokenizer.d.ts +6 -1
- package/out/tokenizer.d.ts.map +1 -1
- package/out/tokenizer.js +8 -3
- package/out/tokenizer.js.map +1 -1
- package/out/unit-repair.d.ts +46 -0
- package/out/unit-repair.d.ts.map +1 -0
- package/out/unit-repair.js +147 -0
- package/out/unit-repair.js.map +1 -0
- package/out/viterbi.d.ts +76 -0
- package/out/viterbi.d.ts.map +1 -0
- package/out/viterbi.js +163 -0
- package/out/viterbi.js.map +1 -0
- package/out/vitest.config.d.ts.map +1 -1
- package/out/vitest.config.js +3 -0
- package/out/vitest.config.js.map +1 -1
- package/out/weights.d.ts +42 -0
- package/out/weights.d.ts.map +1 -1
- package/out/weights.js +92 -4
- package/out/weights.js.map +1 -1
- package/package.json +10 -3
package/out/classifier.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"classifier.js","sourceRoot":"","sources":["../classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,
|
|
1
|
+
{"version":3,"file":"classifier.js","sourceRoot":"","sources":["../classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EACN,gBAAgB,EAChB,YAAY,EACZ,cAAc,EACd,WAAW,GAIX,MAAM,yBAAyB,CAAA;AAChC,OAAO,EAAE,sBAAsB,EAAuB,MAAM,gBAAgB,CAAA;AAC5E,OAAO,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAA;AAE/C,OAAO,EAAE,oBAAoB,EAAE,MAAM,sBAAsB,CAAA;AAC3D,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAA;AACnD,OAAO,EAAE,iBAAiB,EAAE,mBAAmB,EAAuB,MAAM,wBAAwB,CAAA;AACpG,OAAO,EAAE,mCAAmC,EAAkC,MAAM,8BAA8B,CAAA;AAClH,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAA;AACnD,OAAO,EAAE,mBAAmB,EAAqB,MAAM,uBAAuB,CAAA;AAC9E,OAAO,EAAE,eAAe,EAAE,iBAAiB,EAAE,sBAAsB,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAA;AAoD3G,MAAM,OAAO,uBAAuB;IAON;IANZ,MAAM,CAAmB;IACzB,UAAU,CAAsB;IAChC,WAAW,CAAY;IACvB,gBAAgB,CAAU;IAC1B,cAAc,CAAU;IAEzC,YAA6B,GAAkC;QAAlC,QAAG,GAAH,GAAG,CAA+B;QAC9D,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,MAAM,IAAI,iBAAiB,CAAA;QAC7C,IAAI,CAAC,UAAU,GAAG,GAAG,CAAC,MAAM,IAAI,SAAS,CAAA;QACzC,MAAM,UAAU,GAAG,sBAAsB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;QACtD,IAAI,GAAG,CAAC,WAAW,EAAE,CAAC;YACrB,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC,UAAU,EAAE,GAAG,CAAC,WAAW,CAAC,CAAA;QAC5D,CAAC;aAAM,CAAC;YACP,IAAI,CAAC,WAAW,GAAG,UAAU,CAAA;QAC9B,CAAC;QACD,IAAI,CAAC,gBAAgB,GAAG,GAAG,CAAC,gBAAgB,IAAI,iBAAiB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;QAC9E,IAAI,CAAC,cAAc,GAAG,GAAG,CAAC,cAAc,IAAI,eAAe,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;IACzE,CAAC;IAED;;;;;;;;;;;OAWG;IACH,MAAM,CAAC,KAAK,CAAC,eAAe,CAC3B,OAAqE,EAAE;QAEvE,yFAAyF;QACzF,2FAA2F;QAC3F,uFAAuF;QACvF,0FAA0F;QAC1F,2BAA2B;QAC3B,MAAM,CAAC,EAAE,UAAU,EAAE,EAAE,EAAE,cAAc,EAAE,uBAAuB,EAAE,kBAAkB,EAAE,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YAC3G,MAAM,CAAC,yBAAyB,CAAC,kBAAkB,CAAC;YACpD,MAAM,CAAC,yBAAyB,CAAC,cAAc,CAAC;SAChD,CAAC,CAAA;QACF,MAAM,QAAQ,GAAoB,cAAc,CAAC,IAAI,CAAC,CAAA;QACtD,MAAM,MAAM,GAAG,uBAAuB,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAA;QAC9D,MAAM,GAAG,GAAG,kBAAkB,CAAC,QAAQ,CAAC,kBAAkB,CAAC,CAAA;QAC3D,MAAM,CAAC,SAAS,EAAE,MAAM,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YAC7C,kBAAkB,CAAC,YAAY,CAAC,QAAQ,CAAC,aAAa,CAAC;YACvD,UAAU,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC;SACrC,CAAC,CAAA;QACF,OAAO,IAAI,uBAAuB,CAAC;YAClC,SAAS;YACT,MAAM;YACN,MAAM;YACN,WAAW,EAAE,GAAG,EAAE,WAAW;YAC7B,gBAAgB,EAAE,GAAG,EAAE,gBAAgB;YACvC,cAAc,EAAE,GAAG,EAAE,cAAc;YACnC,GAAG,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC,CAAC,EAAE,oBAAoB,EAAE,IAAI,CAAC,oBAAoB,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SACzF,CAAC,CAAA;IACH,CAAC;IAED,6DAA6D;IAC7D,KAAK,CAAC,KAAK,CAAC,IAAY,EAAE,IAAgB;QACzC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,EAAE,CAAA;QAEtD,MAAM,EAAE,MAAM,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;QACvD,gGAAgG;QAChG,+EAA+E;QAC/E,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,oBAAoB;YAC3C,CAAC,CAAC,mBAAmB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,oBAAoB,CAAC;YAClE,CAAC,CAAC,SAAS,CAAA;QACZ,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,EAAE,MAAM,CAAC,CAAA;QAE3D,IAAI,CAAC,mBAAmB,CAAC,MAAM,CAAC,CAAA;QAEhC,IAAI,SAAS,GAAG,IAAI,EAAE,UAAU;YAC/B,CAAC,CAAC,iBAAiB,CACjB,MAAM,EACN,mBAAmB,CAAC,IAAI,CAAC,UAAU,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE;gBACzD,SAAS,EAAE,IAAI,CAAC,mBAAmB,IAAI,GAAG;gBAC1C,SAAS,EAAE,IAAI;aACf,CAAC,CACF;YACF,CAAC,CAAC,MAAM,CAAA;QAET,IAAI,IAAI,EAAE,GAAG,EAAE,CAAC;YACf,SAAS,GAAG,iBAAiB,CAC5B,SAAS,EACT,sBAAsB,CAAC,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE;gBACrD,SAAS,EAAE,IAAI,CAAC,YAAY,IAAI,GAAG;aACnC,CAAC,CACF,CAAA;QACF,CAAC;QAED,IAAI,IAAI,EAAE,mBAAmB,EAAE,CAAC;YAC/B,SAAS,GAAG,iBAAiB,CAC5B,SAAS,EACT,mCAAmC,CAClC,IAAI,CAAC,mBAAmB,EACxB,MAAM,EACN,IAAI,CAAC,MAAM,EACX,IAAI,CAAC,uBAAuB,IAAI,EAAE,CAClC,CACD,CAAA;QACF,CAAC;QAED,MAAM,YAAY,GACjB,IAAI,CAAC,UAAU,KAAK,SAAS;YAC5B,CAAC,CAAC,OAAO,CAAC;gBACR,SAAS;gBACT,WAAW,EAAE,IAAI,CAAC,WAAW;gBAC7B,gBAAgB,EAAE,IAAI,CAAC,gBAAgB;gBACvC,cAAc,EAAE,IAAI,CAAC,cAAc;aACnC,CAAC,CAAC,IAAI;YACR,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAA;QAElD,IAAI,MAAM,GAAmB,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;YAChD,MAAM,GAAG,GAAG,YAAY,CAAC,CAAC,CAAE,CAAA;YAC5B,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAA;YACjC,OAAO;gBACN,KAAK,EAAE,CAAC,CAAC,KAAK;gBACd,KAAK,EAAE,CAAC,CAAC,KAAK;gBACd,GAAG,EAAE,CAAC,CAAC,GAAG;gBACV,KAAK,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,GAAG,CAA0B;gBACzD,UAAU,EAAE,KAAK,CAAC,GAAG,CAAE;aACvB,CAAA;QACF,CAAC,CAAC,CAAA;QAEF,IAAI,IAAI,EAAE,cAAc,EAAE,CAAC;YAC1B,MAAM,GAAG,oBAAoB,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC,MAAM,CAAA;QACnD,CAAC;QACD,IAAI,IAAI,EAAE,UAAU,EAAE,CAAC;YACtB,MAAM,GAAG,gBAAgB,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC,MAAM,CAAA;QAC/C,CAAC;QAED,OAAO,gBAAgB,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;IACtC,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,eAAe,CAAC,IAAY,EAAE,IAAgB;QACnD,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,EAAE,IAAI,EAAE,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,CAAA;QAClE,CAAC;QACD,MAAM,EAAE,MAAM,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;QACvD,gGAAgG;QAChG,+EAA+E;QAC/E,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,oBAAoB;YAC3C,CAAC,CAAC,mBAAmB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,oBAAoB,CAAC;YAClE,CAAC,CAAC,SAAS,CAAA;QACZ,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,EAAE,MAAM,CAAC,CAAA;QAE3D,IAAI,CAAC,mBAAmB,CAAC,MAAM,CAAC,CAAA;QAEhC,IAAI,SAAS,GAAG,IAAI,EAAE,UAAU;YAC/B,CAAC,CAAC,iBAAiB,CACjB,MAAM,EACN,mBAAmB,CAAC,IAAI,CAAC,UAAU,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE;gBACzD,SAAS,EAAE,IAAI,CAAC,mBAAmB,IAAI,GAAG;gBAC1C,SAAS,EAAE,IAAI;aACf,CAAC,CACF;YACF,CAAC,CAAC,MAAM,CAAA;QAET,IAAI,IAAI,EAAE,GAAG,EAAE,CAAC;YACf,SAAS,GAAG,iBAAiB,CAC5B,SAAS,EACT,sBAAsB,CAAC,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE;gBACrD,SAAS,EAAE,IAAI,CAAC,YAAY,IAAI,GAAG;aACnC,CAAC,CACF,CAAA;QACF,CAAC;QAED,IAAI,IAAI,EAAE,mBAAmB,EAAE,CAAC;YAC/B,SAAS,GAAG,iBAAiB,CAC5B,SAAS,EACT,mCAAmC,CAClC,IAAI,CAAC,mBAAmB,EACxB,MAAM,EACN,IAAI,CAAC,MAAM,EACX,IAAI,CAAC,uBAAuB,IAAI,EAAE,CAClC,CACD,CAAA;QACF,CAAC;QAED,MAAM,YAAY,GACjB,IAAI,CAAC,UAAU,KAAK,SAAS;YAC5B,CAAC,CAAC,OAAO,CAAC;gBACR,SAAS;gBACT,WAAW,EAAE,IAAI,CAAC,WAAW;gBAC7B,gBAAgB,EAAE,IAAI,CAAC,gBAAgB;gBACvC,cAAc,EAAE,IAAI,CAAC,cAAc;aACnC,CAAC,CAAC,IAAI;YACR,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAA;QAElD,MAAM,MAAM,GAAmB,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;YAClD,MAAM,GAAG,GAAG,YAAY,CAAC,CAAC,CAAE,CAAA;YAC5B,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAA;YACjC,OAAO;gBACN,KAAK,EAAE,CAAC,CAAC,KAAK;gBACd,KAAK,EAAE,CAAC,CAAC,KAAK;gBACd,GAAG,EAAE,CAAC,CAAC,GAAG;gBACV,KAAK,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,GAAG,CAA0B;gBACzD,UAAU,EAAE,KAAK,CAAC,GAAG,CAAE;aACvB,CAAA;QACF,CAAC,CAAC,CAAA;QAEF,OAAO;YACN,IAAI,EAAE,gBAAgB,CAAC,IAAI,EAAE,MAAM,CAAC;YACpC,MAAM;YACN,MAAM,EAAE,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC;SAC3D,CAAA;IACF,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,IAAY,EAAE,IAAgB;QAC7C,OAAO,YAAY,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAA;IAClD,CAAC;IAED,KAAK,CAAC,WAAW,CAAC,IAAY,EAAE,IAAgB;QAC/C,OAAO,cAAc,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAA;IACpD,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,IAAY,EAAE,IAA8D;QAC1F,OAAO,WAAW,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,EAAE,IAAI,EAAE,GAAG,CAAC,CAAA;IAC5D,CAAC;IAED;;;;;;;;;;OAUG;IACK,mBAAmB,CAAC,MAA2B;QACtD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;YAAE,OAAM;QAC/B,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAE,CAAC,MAAM,CAAA;QAC/B,IAAI,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;YAChC,MAAM,IAAI,KAAK,CACd,wCAAwC,KAAK,2CAA2C;gBACvF,wBAAwB,IAAI,CAAC,MAAM,CAAC,MAAM,iDAAiD;gBAC3F,oFAAoF,CACrF,CAAA;QACF,CAAC;IACF,CAAC;CACD;AA4DD,SAAS,aAAa,CAAC,GAAa;IACnC,IAAI,MAAM,GAAG,CAAC,CAAA;IACd,IAAI,MAAM,GAAG,GAAG,CAAC,CAAC,CAAE,CAAA;IACpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,IAAI,GAAG,CAAC,CAAC,CAAE,GAAG,MAAM,EAAE,CAAC;YACtB,MAAM,GAAG,GAAG,CAAC,CAAC,CAAE,CAAA;YAChB,MAAM,GAAG,CAAC,CAAA;QACX,CAAC;IACF,CAAC;IACD,IAAI,MAAM,GAAG,CAAC,CAAA;IACd,KAAK,MAAM,CAAC,IAAI,GAAG;QAAE,MAAM,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,CAAA;IACnD,MAAM,IAAI,GAAG,CAAC,GAAG,MAAM,CAAA;IACvB,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,CAAA;AAC7B,CAAC;AAED,uGAAuG;AACvG,SAAS,WAAW,CAAC,CAAa,EAAE,CAAa;IAChD,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,CAAA;IAClB,MAAM,GAAG,GAAe,EAAE,CAAA;IAC1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5B,MAAM,GAAG,GAAG,IAAI,KAAK,CAAS,CAAC,CAAC,CAAA;QAChC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE;YAAE,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC,CAAE,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC,CAAE,CAAA;QAC1D,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;IACd,CAAC;IACD,OAAO,GAAG,CAAA;AACX,CAAC"}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Soft-prior emission biases derived from the FST gazetteer. When the FST finds that a token
|
|
7
|
+
* sequence matches a known place name (e.g., "New York" → locality + region), this module
|
|
8
|
+
* produces additive biases that nudge the Viterbi decoder toward the matching BIO labels.
|
|
9
|
+
*
|
|
10
|
+
* Composes with the QueryShape prior via addEmissionMatrix — same integration point, same additive
|
|
11
|
+
* semantics.
|
|
12
|
+
*
|
|
13
|
+
* SentencePiece ↔ FST bridge: SentencePiece pieces are grouped into whitespace words (by the ▁
|
|
14
|
+
* sentinel), normalized through the same pipeline as FST edges (NFKC, lowercase, strip
|
|
15
|
+
* non-alnum), and walked through the FST as contiguous subpaths.
|
|
16
|
+
*
|
|
17
|
+
* Uses structural typing for the FST input so this module has zero dependencies on
|
|
18
|
+
* `@mailwoman/resolver-wof-sqlite` — consumers pass an FstMatcher instance, but this file only
|
|
19
|
+
* consumes the shape.
|
|
20
|
+
*/
|
|
21
|
+
import type { TokenLike } from "./query-shape-prior.js";
|
|
22
|
+
export interface FstMatchLike {
|
|
23
|
+
stateId: number;
|
|
24
|
+
accepted: boolean;
|
|
25
|
+
depth: number;
|
|
26
|
+
}
|
|
27
|
+
export interface FstPlaceEntryLike {
|
|
28
|
+
wofID: number;
|
|
29
|
+
placetype: string;
|
|
30
|
+
importance: number;
|
|
31
|
+
}
|
|
32
|
+
export interface FstMatcherLike {
|
|
33
|
+
walk(tokens: string[]): FstMatchLike | null;
|
|
34
|
+
walkFrom(prev: FstMatchLike, token: string): FstMatchLike | null;
|
|
35
|
+
accepting(stateId: number): FstPlaceEntryLike[];
|
|
36
|
+
}
|
|
37
|
+
export interface WordGroup {
|
|
38
|
+
fstToken: string;
|
|
39
|
+
pieceIndices: number[];
|
|
40
|
+
}
|
|
41
|
+
export interface FstPriorOpts {
|
|
42
|
+
biasScale?: number;
|
|
43
|
+
/**
|
|
44
|
+
* Maximum bias magnitude (logits). Prevents large-population places from overriding the model.
|
|
45
|
+
* Default 3.0.
|
|
46
|
+
*/
|
|
47
|
+
maxBias?: number;
|
|
48
|
+
suppressionScale?: number;
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Build a `[seqLen][numLabels]` bias matrix from FST gazetteer matches.
|
|
52
|
+
*
|
|
53
|
+
* Walks all contiguous subpaths of the reconstructed whitespace-token sequence through the FST. For
|
|
54
|
+
* each accepting state, biases the corresponding BIO labels on the matched pieces.
|
|
55
|
+
*/
|
|
56
|
+
export declare function buildFstEmissionPriors(fst: FstMatcherLike, pieces: ReadonlyArray<TokenLike & {
|
|
57
|
+
piece: string;
|
|
58
|
+
}>, labels: ReadonlyArray<string>, opts?: FstPriorOpts): number[][];
|
|
59
|
+
/**
|
|
60
|
+
* Group SentencePiece pieces into whitespace-delimited words. Each word's literal text is
|
|
61
|
+
* reconstructed by concatenating pieces (minus leading ▁), then normalized through the same
|
|
62
|
+
* pipeline the FST builder uses.
|
|
63
|
+
*
|
|
64
|
+
* Exported (alongside {@linkcode normalizeFstToken} and the {@linkcode WordGroup} type) so the
|
|
65
|
+
* street-morphology prior can reuse the same piece-grouping/normalization pipeline without
|
|
66
|
+
* duplication. Internal helper signature; not part of the public neural API.
|
|
67
|
+
*/
|
|
68
|
+
export declare function groupPiecesIntoWords(pieces: ReadonlyArray<{
|
|
69
|
+
piece: string;
|
|
70
|
+
}>): WordGroup[];
|
|
71
|
+
//# sourceMappingURL=fst-prior.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fst-prior.d.ts","sourceRoot":"","sources":["../fst-prior.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,wBAAwB,CAAA;AAQvD,MAAM,WAAW,YAAY;IAC5B,OAAO,EAAE,MAAM,CAAA;IACf,QAAQ,EAAE,OAAO,CAAA;IACjB,KAAK,EAAE,MAAM,CAAA;CACb;AAED,MAAM,WAAW,iBAAiB;IACjC,KAAK,EAAE,MAAM,CAAA;IACb,SAAS,EAAE,MAAM,CAAA;IACjB,UAAU,EAAE,MAAM,CAAA;CAClB;AAED,MAAM,WAAW,cAAc;IAC9B,IAAI,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,YAAY,GAAG,IAAI,CAAA;IAC3C,QAAQ,CAAC,IAAI,EAAE,YAAY,EAAE,KAAK,EAAE,MAAM,GAAG,YAAY,GAAG,IAAI,CAAA;IAChE,SAAS,CAAC,OAAO,EAAE,MAAM,GAAG,iBAAiB,EAAE,CAAA;CAC/C;AAiBD,MAAM,WAAW,SAAS;IACzB,QAAQ,EAAE,MAAM,CAAA;IAChB,YAAY,EAAE,MAAM,EAAE,CAAA;CACtB;AAID,MAAM,WAAW,YAAY;IAC5B,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,gBAAgB,CAAC,EAAE,MAAM,CAAA;CACzB;AAED;;;;;GAKG;AACH,wBAAgB,sBAAsB,CACrC,GAAG,EAAE,cAAc,EACnB,MAAM,EAAE,aAAa,CAAC,SAAS,GAAG;IAAE,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC,EACpD,MAAM,EAAE,aAAa,CAAC,MAAM,CAAC,EAC7B,IAAI,GAAE,YAAiB,GACrB,MAAM,EAAE,EAAE,CA+DZ;AAED;;;;;;;;GAQG;AACH,wBAAgB,oBAAoB,CAAC,MAAM,EAAE,aAAa,CAAC;IAAE,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC,GAAG,SAAS,EAAE,CAiC1F"}
|
package/out/fst-prior.js
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Soft-prior emission biases derived from the FST gazetteer. When the FST finds that a token
|
|
7
|
+
* sequence matches a known place name (e.g., "New York" → locality + region), this module
|
|
8
|
+
* produces additive biases that nudge the Viterbi decoder toward the matching BIO labels.
|
|
9
|
+
*
|
|
10
|
+
* Composes with the QueryShape prior via addEmissionMatrix — same integration point, same additive
|
|
11
|
+
* semantics.
|
|
12
|
+
*
|
|
13
|
+
* SentencePiece ↔ FST bridge: SentencePiece pieces are grouped into whitespace words (by the ▁
|
|
14
|
+
* sentinel), normalized through the same pipeline as FST edges (NFKC, lowercase, strip
|
|
15
|
+
* non-alnum), and walked through the FST as contiguous subpaths.
|
|
16
|
+
*
|
|
17
|
+
* Uses structural typing for the FST input so this module has zero dependencies on
|
|
18
|
+
* `@mailwoman/resolver-wof-sqlite` — consumers pass an FstMatcher instance, but this file only
|
|
19
|
+
* consumes the shape.
|
|
20
|
+
*/
|
|
21
|
+
const SPACE_SENTINEL = "▁";
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
// Placetype → BIO label mapping
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
const PLACETYPE_TO_BIO = new Map([
|
|
26
|
+
["country", "country"],
|
|
27
|
+
["region", "region"],
|
|
28
|
+
["locality", "locality"],
|
|
29
|
+
["postalcode", "postcode"],
|
|
30
|
+
]);
|
|
31
|
+
const SUPPRESS_WHEN_PLACE = ["B-street", "I-street", "B-house_number", "I-house_number", "B-venue"];
|
|
32
|
+
/**
|
|
33
|
+
* Build a `[seqLen][numLabels]` bias matrix from FST gazetteer matches.
|
|
34
|
+
*
|
|
35
|
+
* Walks all contiguous subpaths of the reconstructed whitespace-token sequence through the FST. For
|
|
36
|
+
* each accepting state, biases the corresponding BIO labels on the matched pieces.
|
|
37
|
+
*/
|
|
38
|
+
export function buildFstEmissionPriors(fst, pieces, labels, opts = {}) {
|
|
39
|
+
const T = pieces.length;
|
|
40
|
+
const L = labels.length;
|
|
41
|
+
const biasScale = opts.biasScale ?? 1.0;
|
|
42
|
+
const seenWOFIDs = new Set();
|
|
43
|
+
const maxBias = opts.maxBias ?? 3.0;
|
|
44
|
+
const suppressionScale = opts.suppressionScale ?? 1.5;
|
|
45
|
+
const matrix = [];
|
|
46
|
+
for (let t = 0; t < T; t++)
|
|
47
|
+
matrix.push(new Array(L).fill(0));
|
|
48
|
+
const labelToCol = new Map();
|
|
49
|
+
for (let k = 0; k < labels.length; k++)
|
|
50
|
+
labelToCol.set(labels[k], k);
|
|
51
|
+
const wordGroups = groupPiecesIntoWords(pieces);
|
|
52
|
+
if (wordGroups.length === 0)
|
|
53
|
+
return matrix;
|
|
54
|
+
for (let start = 0; start < wordGroups.length; start++) {
|
|
55
|
+
const group = wordGroups[start];
|
|
56
|
+
if (group.fstToken === "")
|
|
57
|
+
continue;
|
|
58
|
+
const match = fst.walk([group.fstToken]);
|
|
59
|
+
if (!match)
|
|
60
|
+
continue;
|
|
61
|
+
if (match.accepted) {
|
|
62
|
+
applyBias(matrix, labelToCol, fst.accepting(match.stateId), [group], biasScale, maxBias, suppressionScale, seenWOFIDs);
|
|
63
|
+
}
|
|
64
|
+
let current = match;
|
|
65
|
+
for (let end = start + 1; end < wordGroups.length; end++) {
|
|
66
|
+
const nextGroup = wordGroups[end];
|
|
67
|
+
if (nextGroup.fstToken === "")
|
|
68
|
+
continue;
|
|
69
|
+
const next = fst.walkFrom(current, nextGroup.fstToken);
|
|
70
|
+
if (!next)
|
|
71
|
+
break;
|
|
72
|
+
if (next.accepted) {
|
|
73
|
+
const matchedGroups = wordGroups.slice(start, end + 1).filter((g) => g.fstToken !== "");
|
|
74
|
+
applyBias(matrix, labelToCol, fst.accepting(next.stateId), matchedGroups, biasScale, maxBias, suppressionScale, seenWOFIDs);
|
|
75
|
+
}
|
|
76
|
+
current = next;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
return matrix;
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Group SentencePiece pieces into whitespace-delimited words. Each word's literal text is
|
|
83
|
+
* reconstructed by concatenating pieces (minus leading ▁), then normalized through the same
|
|
84
|
+
* pipeline the FST builder uses.
|
|
85
|
+
*
|
|
86
|
+
* Exported (alongside {@linkcode normalizeFstToken} and the {@linkcode WordGroup} type) so the
|
|
87
|
+
* street-morphology prior can reuse the same piece-grouping/normalization pipeline without
|
|
88
|
+
* duplication. Internal helper signature; not part of the public neural API.
|
|
89
|
+
*/
|
|
90
|
+
export function groupPiecesIntoWords(pieces) {
|
|
91
|
+
const groups = [];
|
|
92
|
+
let current = null;
|
|
93
|
+
for (let i = 0; i < pieces.length; i++) {
|
|
94
|
+
const p = pieces[i];
|
|
95
|
+
const hasAlnum = /[\p{L}\p{N}]/u.test(p.piece);
|
|
96
|
+
if (p.piece.startsWith(SPACE_SENTINEL) || i === 0 || !hasAlnum) {
|
|
97
|
+
if (current)
|
|
98
|
+
groups.push(current);
|
|
99
|
+
if (!hasAlnum) {
|
|
100
|
+
groups.push({ fstToken: "", pieceIndices: [i] });
|
|
101
|
+
current = null;
|
|
102
|
+
continue;
|
|
103
|
+
}
|
|
104
|
+
const literal = p.piece.startsWith(SPACE_SENTINEL) ? p.piece.slice(SPACE_SENTINEL.length) : p.piece;
|
|
105
|
+
current = { fstToken: literal, pieceIndices: [i] };
|
|
106
|
+
}
|
|
107
|
+
else {
|
|
108
|
+
if (current) {
|
|
109
|
+
current.pieceIndices.push(i);
|
|
110
|
+
current.fstToken += p.piece;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
if (current)
|
|
115
|
+
groups.push(current);
|
|
116
|
+
for (const g of groups) {
|
|
117
|
+
if (g.fstToken !== "") {
|
|
118
|
+
g.fstToken = normalizeFstToken(g.fstToken);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
return groups;
|
|
122
|
+
}
|
|
123
|
+
function normalizeFstToken(s) {
|
|
124
|
+
const cleaned = s
|
|
125
|
+
.normalize("NFKC")
|
|
126
|
+
.toLowerCase()
|
|
127
|
+
.replace(/[\p{P}\p{S}]/gu, "");
|
|
128
|
+
return cleaned.length > 0 ? cleaned : "";
|
|
129
|
+
}
|
|
130
|
+
function applyBias(matrix, labelToCol, entries, groups, biasScale, maxBias, suppressionScale, seenWOFIDs) {
|
|
131
|
+
const seenTags = new Map();
|
|
132
|
+
for (const entry of entries) {
|
|
133
|
+
if (seenWOFIDs.has(entry.wofID))
|
|
134
|
+
continue;
|
|
135
|
+
seenWOFIDs.add(entry.wofID);
|
|
136
|
+
const bioTag = PLACETYPE_TO_BIO.get(entry.placetype);
|
|
137
|
+
if (!bioTag)
|
|
138
|
+
continue;
|
|
139
|
+
const impBias = entry.importance * biasScale * maxBias;
|
|
140
|
+
const existing = seenTags.get(bioTag) ?? 0;
|
|
141
|
+
if (impBias > existing)
|
|
142
|
+
seenTags.set(bioTag, impBias);
|
|
143
|
+
}
|
|
144
|
+
if (seenTags.size === 0)
|
|
145
|
+
return;
|
|
146
|
+
const allPieceIndices = [];
|
|
147
|
+
for (const group of groups) {
|
|
148
|
+
for (const pi of group.pieceIndices)
|
|
149
|
+
allPieceIndices.push(pi);
|
|
150
|
+
}
|
|
151
|
+
for (const [bioTag, bias] of seenTags) {
|
|
152
|
+
const bCol = labelToCol.get(`B-${bioTag}`);
|
|
153
|
+
const iCol = labelToCol.get(`I-${bioTag}`);
|
|
154
|
+
if (bCol === undefined)
|
|
155
|
+
continue;
|
|
156
|
+
for (let k = 0; k < allPieceIndices.length; k++) {
|
|
157
|
+
const pi = allPieceIndices[k];
|
|
158
|
+
const col = k === 0 ? bCol : (iCol ?? bCol);
|
|
159
|
+
matrix[pi][col] = Math.max(matrix[pi][col], bias);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
if (suppressionScale > 0) {
|
|
163
|
+
for (const pi of allPieceIndices) {
|
|
164
|
+
for (const label of SUPPRESS_WHEN_PLACE) {
|
|
165
|
+
const col = labelToCol.get(label);
|
|
166
|
+
if (col !== undefined) {
|
|
167
|
+
matrix[pi][col] = Math.min(matrix[pi][col], -suppressionScale);
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
//# sourceMappingURL=fst-prior.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fst-prior.js","sourceRoot":"","sources":["../fst-prior.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAIH,MAAM,cAAc,GAAG,GAAG,CAAA;AAwB1B,8EAA8E;AAC9E,gCAAgC;AAChC,8EAA8E;AAE9E,MAAM,gBAAgB,GAAgC,IAAI,GAAG,CAAC;IAC7D,CAAC,SAAS,EAAE,SAAS,CAAC;IACtB,CAAC,QAAQ,EAAE,QAAQ,CAAC;IACpB,CAAC,UAAU,EAAE,UAAU,CAAC;IACxB,CAAC,YAAY,EAAE,UAAU,CAAC;CAC1B,CAAC,CAAA;AAWF,MAAM,mBAAmB,GAAsB,CAAC,UAAU,EAAE,UAAU,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,SAAS,CAAC,CAAA;AAYtH;;;;;GAKG;AACH,MAAM,UAAU,sBAAsB,CACrC,GAAmB,EACnB,MAAoD,EACpD,MAA6B,EAC7B,OAAqB,EAAE;IAEvB,MAAM,CAAC,GAAG,MAAM,CAAC,MAAM,CAAA;IACvB,MAAM,CAAC,GAAG,MAAM,CAAC,MAAM,CAAA;IACvB,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,GAAG,CAAA;IACvC,MAAM,UAAU,GAAG,IAAI,GAAG,EAAU,CAAA;IACpC,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,IAAI,GAAG,CAAA;IACnC,MAAM,gBAAgB,GAAG,IAAI,CAAC,gBAAgB,IAAI,GAAG,CAAA;IACrD,MAAM,MAAM,GAAe,EAAE,CAAA;IAC7B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE;QAAE,MAAM,CAAC,IAAI,CAAC,IAAI,KAAK,CAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAA;IAErE,MAAM,UAAU,GAAG,IAAI,GAAG,EAAkB,CAAA;IAC5C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE;QAAE,UAAU,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAE,EAAE,CAAC,CAAC,CAAA;IAErE,MAAM,UAAU,GAAG,oBAAoB,CAAC,MAAM,CAAC,CAAA;IAC/C,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,MAAM,CAAA;IAE1C,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,UAAU,CAAC,MAAM,EAAE,KAAK,EAAE,EAAE,CAAC;QACxD,MAAM,KAAK,GAAG,UAAU,CAAC,KAAK,CAAE,CAAA;QAChC,IAAI,KAAK,CAAC,QAAQ,KAAK,EAAE;YAAE,SAAQ;QAEnC,MAAM,KAAK,GAAG,GAAG,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAA;QACxC,IAAI,CAAC,KAAK;YAAE,SAAQ;QAEpB,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;YACpB,SAAS,CACR,MAAM,EACN,UAAU,EACV,GAAG,CAAC,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,EAC5B,CAAC,KAAK,CAAC,EACP,SAAS,EACT,OAAO,EACP,gBAAgB,EAChB,UAAU,CACV,CAAA;QACF,CAAC;QAED,IAAI,OAAO,GAAG,KAAK,CAAA;QACnB,KAAK,IAAI,GAAG,GAAG,KAAK,GAAG,CAAC,EAAE,GAAG,GAAG,UAAU,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC;YAC1D,MAAM,SAAS,GAAG,UAAU,CAAC,GAAG,CAAE,CAAA;YAClC,IAAI,SAAS,CAAC,QAAQ,KAAK,EAAE;gBAAE,SAAQ;YAEvC,MAAM,IAAI,GAAG,GAAG,CAAC,QAAQ,CAAC,OAAO,EAAE,SAAS,CAAC,QAAQ,CAAC,CAAA;YACtD,IAAI,CAAC,IAAI;gBAAE,MAAK;YAEhB,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACnB,MAAM,aAAa,GAAG,UAAU,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,EAAE,CAAC,CAAA;gBACvF,SAAS,CACR,MAAM,EACN,UAAU,EACV,GAAG,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,EAC3B,aAAa,EACb,SAAS,EACT,OAAO,EACP,gBAAgB,EAChB,UAAU,CACV,CAAA;YACF,CAAC;YAED,OAAO,GAAG,IAAI,CAAA;QACf,CAAC;IACF,CAAC;IAED,OAAO,MAAM,CAAA;AACd,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,oBAAoB,CAAC,MAAwC;IAC5E,MAAM,MAAM,GAAgB,EAAE,CAAA;IAC9B,IAAI,OAAO,GAAqB,IAAI,CAAA;IAEpC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACxC,MAAM,CAAC,GAAG,MAAM,CAAC,CAAC,CAAE,CAAA;QACpB,MAAM,QAAQ,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,CAAA;QAE9C,IAAI,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,cAAc,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;YAChE,IAAI,OAAO;gBAAE,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;YACjC,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACf,MAAM,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,EAAE,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAA;gBAChD,OAAO,GAAG,IAAI,CAAA;gBACd,SAAQ;YACT,CAAC;YACD,MAAM,OAAO,GAAG,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAA;YACnG,OAAO,GAAG,EAAE,QAAQ,EAAE,OAAO,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC,EAAE,CAAA;QACnD,CAAC;aAAM,CAAC;YACP,IAAI,OAAO,EAAE,CAAC;gBACb,OAAO,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;gBAC5B,OAAO,CAAC,QAAQ,IAAI,CAAC,CAAC,KAAK,CAAA;YAC5B,CAAC;QACF,CAAC;IACF,CAAC;IACD,IAAI,OAAO;QAAE,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;IAEjC,KAAK,MAAM,CAAC,IAAI,MAAM,EAAE,CAAC;QACxB,IAAI,CAAC,CAAC,QAAQ,KAAK,EAAE,EAAE,CAAC;YACvB,CAAC,CAAC,QAAQ,GAAG,iBAAiB,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAA;QAC3C,CAAC;IACF,CAAC;IAED,OAAO,MAAM,CAAA;AACd,CAAC;AAED,SAAS,iBAAiB,CAAC,CAAS;IACnC,MAAM,OAAO,GAAG,CAAC;SACf,SAAS,CAAC,MAAM,CAAC;SACjB,WAAW,EAAE;SACb,OAAO,CAAC,gBAAgB,EAAE,EAAE,CAAC,CAAA;IAC/B,OAAO,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAA;AACzC,CAAC;AAED,SAAS,SAAS,CACjB,MAAkB,EAClB,UAA+B,EAC/B,OAAyC,EACzC,MAAmB,EACnB,SAAiB,EACjB,OAAe,EACf,gBAAwB,EACxB,UAAuB;IAEvB,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAkB,CAAA;IAE1C,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC7B,IAAI,UAAU,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC;YAAE,SAAQ;QACzC,UAAU,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,CAAA;QAC3B,MAAM,MAAM,GAAG,gBAAgB,CAAC,GAAG,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;QACpD,IAAI,CAAC,MAAM;YAAE,SAAQ;QACrB,MAAM,OAAO,GAAG,KAAK,CAAC,UAAU,GAAG,SAAS,GAAG,OAAO,CAAA;QACtD,MAAM,QAAQ,GAAG,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;QAC1C,IAAI,OAAO,GAAG,QAAQ;YAAE,QAAQ,CAAC,GAAG,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;IACtD,CAAC;IAED,IAAI,QAAQ,CAAC,IAAI,KAAK,CAAC;QAAE,OAAM;IAE/B,MAAM,eAAe,GAAa,EAAE,CAAA;IACpC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC5B,KAAK,MAAM,EAAE,IAAI,KAAK,CAAC,YAAY;YAAE,eAAe,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IAC9D,CAAC;IAED,KAAK,MAAM,CAAC,MAAM,EAAE,IAAI,CAAC,IAAI,QAAQ,EAAE,CAAC;QACvC,MAAM,IAAI,GAAG,UAAU,CAAC,GAAG,CAAC,KAAK,MAAM,EAAE,CAAC,CAAA;QAC1C,MAAM,IAAI,GAAG,UAAU,CAAC,GAAG,CAAC,KAAK,MAAM,EAAE,CAAC,CAAA;QAC1C,IAAI,IAAI,KAAK,SAAS;YAAE,SAAQ;QAEhC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,eAAe,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACjD,MAAM,EAAE,GAAG,eAAe,CAAC,CAAC,CAAE,CAAA;YAC9B,MAAM,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,IAAI,IAAI,CAAC,CAAA;YAC3C,MAAM,CAAC,EAAE,CAAE,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAE,CAAC,GAAG,CAAE,EAAE,IAAI,CAAC,CAAA;QACrD,CAAC;IACF,CAAC;IAED,IAAI,gBAAgB,GAAG,CAAC,EAAE,CAAC;QAC1B,KAAK,MAAM,EAAE,IAAI,eAAe,EAAE,CAAC;YAClC,KAAK,MAAM,KAAK,IAAI,mBAAmB,EAAE,CAAC;gBACzC,MAAM,GAAG,GAAG,UAAU,CAAC,GAAG,CAAC,KAAK,CAAC,CAAA;gBACjC,IAAI,GAAG,KAAK,SAAS,EAAE,CAAC;oBACvB,MAAM,CAAC,EAAE,CAAE,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAE,CAAC,GAAG,CAAE,EAAE,CAAC,gBAAgB,CAAC,CAAA;gBAClE,CAAC;YACF,CAAC;QACF,CAAC;IACF,CAAC;AACF,CAAC"}
|
package/out/index.d.ts
CHANGED
|
@@ -3,10 +3,17 @@
|
|
|
3
3
|
* @license AGPL-3.0
|
|
4
4
|
* @author Teffen Ellis, et al.
|
|
5
5
|
*/
|
|
6
|
+
export * from "./anchor-inference.js";
|
|
6
7
|
export * from "./classifier.js";
|
|
7
8
|
export * from "./labels.js";
|
|
8
9
|
export * from "./onnx-runner.js";
|
|
10
|
+
export * from "./postcode-anchor.js";
|
|
11
|
+
export * from "./postcode-binary-resolver.js";
|
|
9
12
|
export * from "./proposal-classifier.js";
|
|
13
|
+
export { addEmissionMatrix, buildEmissionPriors } from "./query-shape-prior.js";
|
|
14
|
+
export type { BuildPriorsOpts, KnownFormatHitLike, QueryShapeLike, TokenLike } from "./query-shape-prior.js";
|
|
10
15
|
export * from "./tokenizer.js";
|
|
16
|
+
export { buildBioEndMask, buildBioStartMask, buildBioTransitionMask, perTokenArgmax, softmax, viterbi, } from "./viterbi.js";
|
|
17
|
+
export type { ViterbiInput, ViterbiResult } from "./viterbi.js";
|
|
11
18
|
export * from "./weights.js";
|
|
12
19
|
//# sourceMappingURL=index.d.ts.map
|
package/out/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,iBAAiB,CAAA;AAC/B,cAAc,aAAa,CAAA;AAC3B,cAAc,kBAAkB,CAAA;AAChC,cAAc,0BAA0B,CAAA;AACxC,cAAc,gBAAgB,CAAA;AAC9B,cAAc,cAAc,CAAA"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,uBAAuB,CAAA;AACrC,cAAc,iBAAiB,CAAA;AAC/B,cAAc,aAAa,CAAA;AAC3B,cAAc,kBAAkB,CAAA;AAChC,cAAc,sBAAsB,CAAA;AACpC,cAAc,+BAA+B,CAAA;AAC7C,cAAc,0BAA0B,CAAA;AACxC,OAAO,EAAE,iBAAiB,EAAE,mBAAmB,EAAE,MAAM,wBAAwB,CAAA;AAC/E,YAAY,EAAE,eAAe,EAAE,kBAAkB,EAAE,cAAc,EAAE,SAAS,EAAE,MAAM,wBAAwB,CAAA;AAC5G,cAAc,gBAAgB,CAAA;AAC9B,OAAO,EACN,eAAe,EACf,iBAAiB,EACjB,sBAAsB,EACtB,cAAc,EACd,OAAO,EACP,OAAO,GACP,MAAM,cAAc,CAAA;AACrB,YAAY,EAAE,YAAY,EAAE,aAAa,EAAE,MAAM,cAAc,CAAA;AAC/D,cAAc,cAAc,CAAA"}
|
package/out/index.js
CHANGED
|
@@ -3,10 +3,15 @@
|
|
|
3
3
|
* @license AGPL-3.0
|
|
4
4
|
* @author Teffen Ellis, et al.
|
|
5
5
|
*/
|
|
6
|
+
export * from "./anchor-inference.js";
|
|
6
7
|
export * from "./classifier.js";
|
|
7
8
|
export * from "./labels.js";
|
|
8
9
|
export * from "./onnx-runner.js";
|
|
10
|
+
export * from "./postcode-anchor.js";
|
|
11
|
+
export * from "./postcode-binary-resolver.js";
|
|
9
12
|
export * from "./proposal-classifier.js";
|
|
13
|
+
export { addEmissionMatrix, buildEmissionPriors } from "./query-shape-prior.js";
|
|
10
14
|
export * from "./tokenizer.js";
|
|
15
|
+
export { buildBioEndMask, buildBioStartMask, buildBioTransitionMask, perTokenArgmax, softmax, viterbi, } from "./viterbi.js";
|
|
11
16
|
export * from "./weights.js";
|
|
12
17
|
//# sourceMappingURL=index.js.map
|
package/out/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,iBAAiB,CAAA;AAC/B,cAAc,aAAa,CAAA;AAC3B,cAAc,kBAAkB,CAAA;AAChC,cAAc,0BAA0B,CAAA;AACxC,cAAc,gBAAgB,CAAA;AAC9B,cAAc,cAAc,CAAA"}
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,uBAAuB,CAAA;AACrC,cAAc,iBAAiB,CAAA;AAC/B,cAAc,aAAa,CAAA;AAC3B,cAAc,kBAAkB,CAAA;AAChC,cAAc,sBAAsB,CAAA;AACpC,cAAc,+BAA+B,CAAA;AAC7C,cAAc,0BAA0B,CAAA;AACxC,OAAO,EAAE,iBAAiB,EAAE,mBAAmB,EAAE,MAAM,wBAAwB,CAAA;AAE/E,cAAc,gBAAgB,CAAA;AAC9B,OAAO,EACN,eAAe,EACf,iBAAiB,EACjB,sBAAsB,EACtB,cAAc,EACd,OAAO,EACP,OAAO,GACP,MAAM,cAAc,CAAA;AAErB,cAAc,cAAc,CAAA"}
|
package/out/labels.d.ts
CHANGED
|
@@ -3,18 +3,42 @@
|
|
|
3
3
|
* @license AGPL-3.0
|
|
4
4
|
* @author Teffen Ellis, et al.
|
|
5
5
|
*
|
|
6
|
-
* Mirror of `packages/corpus-python/src/mailwoman_train/labels.py
|
|
6
|
+
* Mirror of `packages/corpus-python/src/mailwoman_train/labels.py`.
|
|
7
7
|
*
|
|
8
|
-
*
|
|
9
|
-
* silently corrupts
|
|
8
|
+
* Index ↔ label parity is load-bearing: the model emits logits in one canonical order on both sides
|
|
9
|
+
* and any drift here silently corrupts BIO decoding. STAGE2 strictly extends STAGE1 — the first
|
|
10
|
+
* 15 indices are identical, so reading a v0.2.0 (Stage 1) model with the Stage 2 label vocabulary
|
|
11
|
+
* stays correct; the extra entries are unused.
|
|
10
12
|
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
13
|
+
* Runtime loading: as of v0.4.0 the trained label vocabulary is carried in `model-card.json`'s
|
|
14
|
+
* `labels` field and read by `loadFromWeights` (see `weights.readLabelsFromModelCard`). These
|
|
15
|
+
* constants remain the compile-time fallback for legacy bundles whose cards predate the field —
|
|
16
|
+
* safe because such bundles are by construction Stage 1 or Stage 2, and Stage 2 prefix-extends
|
|
17
|
+
* Stage 1. A future Stage 3 ship will not be safe under the fallback; the loader treats a missing
|
|
18
|
+
* `labels` field as "you are loading a pre-v0.4.0 bundle" rather than "unknown stage".
|
|
14
19
|
*/
|
|
15
20
|
import type { BioLabel } from "@mailwoman/core/decoder";
|
|
16
21
|
/** Coarse component tags trained in Phase 2 Stage 1 (v0.1.0 / v0.2.0). */
|
|
17
22
|
export declare const STAGE1_COARSE_TAGS: readonly ["country", "region", "locality", "dependent_locality", "postcode", "subregion", "cedex"];
|
|
18
23
|
/** BIO label vocabulary for Stage 1 — O + (B-/I- per coarse tag). 1 + 14 = 15 labels. */
|
|
19
24
|
export declare const STAGE1_BIO_LABELS: readonly BioLabel[];
|
|
25
|
+
/**
|
|
26
|
+
* Fine-grained tags added in Phase 2 Stage 2 (v0.3.0). venue covers organization/POI/landmark
|
|
27
|
+
* names; street + house_number break out the street-address components that Stage 1 collapsed to
|
|
28
|
+
* `O`.
|
|
29
|
+
*/
|
|
30
|
+
export declare const STAGE2_FINE_TAGS: readonly ["venue", "street", "house_number"];
|
|
31
|
+
/** Stage 2 ships the full coarse + fine set in the order STAGE2_BIO_LABELS is interleaved. */
|
|
32
|
+
export declare const STAGE2_TAGS: readonly ["country", "region", "locality", "dependent_locality", "postcode", "subregion", "cedex", "venue", "street", "house_number"];
|
|
33
|
+
/**
|
|
34
|
+
* BIO label vocabulary for Stage 2 (v0.3.0) — O + (B-/I- per Stage 2 tag). 1 + 20 = 21 labels.
|
|
35
|
+
*
|
|
36
|
+
* Index parity vs Stage 1: STAGE2_BIO_LABELS[i] === STAGE1_BIO_LABELS[i] for i ∈ [0, 15). Anyone
|
|
37
|
+
* loading a Stage 1 model with this vocabulary still decodes correctly; the tail (15..20) just
|
|
38
|
+
* never gets argmax'd because Stage 1 only emits 15 logits.
|
|
39
|
+
*/
|
|
40
|
+
export declare const STAGE2_BIO_LABELS: readonly BioLabel[];
|
|
41
|
+
export declare const STAGE3_FINE_TAGS: readonly ["street_prefix", "street_suffix", "unit", "po_box", "intersection_a", "intersection_b"];
|
|
42
|
+
export declare const STAGE3_TAGS: readonly ["country", "region", "locality", "dependent_locality", "postcode", "subregion", "cedex", "venue", "street", "house_number", "street_prefix", "street_suffix", "unit", "po_box", "intersection_a", "intersection_b"];
|
|
43
|
+
export declare const STAGE3_BIO_LABELS: readonly BioLabel[];
|
|
20
44
|
//# sourceMappingURL=labels.d.ts.map
|
package/out/labels.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"labels.d.ts","sourceRoot":"","sources":["../labels.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"labels.d.ts","sourceRoot":"","sources":["../labels.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,yBAAyB,CAAA;AAEvD,0EAA0E;AAC1E,eAAO,MAAM,kBAAkB,oGAQrB,CAAA;AAEV,yFAAyF;AACzF,eAAO,MAAM,iBAAiB,EAAE,SAAS,QAAQ,EAG/C,CAAA;AAEF;;;;GAIG;AACH,eAAO,MAAM,gBAAgB,8CAA+C,CAAA;AAE5E,8FAA8F;AAC9F,eAAO,MAAM,WAAW,uIAAwD,CAAA;AAEhF;;;;;;GAMG;AACH,eAAO,MAAM,iBAAiB,EAAE,SAAS,QAAQ,EAG/C,CAAA;AAEF,eAAO,MAAM,gBAAgB,mGAOnB,CAAA;AAEV,eAAO,MAAM,WAAW,+NAAiD,CAAA;AAEzE,eAAO,MAAM,iBAAiB,EAAE,SAAS,QAAQ,EAG/C,CAAA"}
|
package/out/labels.js
CHANGED
|
@@ -3,14 +3,19 @@
|
|
|
3
3
|
* @license AGPL-3.0
|
|
4
4
|
* @author Teffen Ellis, et al.
|
|
5
5
|
*
|
|
6
|
-
* Mirror of `packages/corpus-python/src/mailwoman_train/labels.py
|
|
6
|
+
* Mirror of `packages/corpus-python/src/mailwoman_train/labels.py`.
|
|
7
7
|
*
|
|
8
|
-
*
|
|
9
|
-
* silently corrupts
|
|
8
|
+
* Index ↔ label parity is load-bearing: the model emits logits in one canonical order on both sides
|
|
9
|
+
* and any drift here silently corrupts BIO decoding. STAGE2 strictly extends STAGE1 — the first
|
|
10
|
+
* 15 indices are identical, so reading a v0.2.0 (Stage 1) model with the Stage 2 label vocabulary
|
|
11
|
+
* stays correct; the extra entries are unused.
|
|
10
12
|
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
13
|
+
* Runtime loading: as of v0.4.0 the trained label vocabulary is carried in `model-card.json`'s
|
|
14
|
+
* `labels` field and read by `loadFromWeights` (see `weights.readLabelsFromModelCard`). These
|
|
15
|
+
* constants remain the compile-time fallback for legacy bundles whose cards predate the field —
|
|
16
|
+
* safe because such bundles are by construction Stage 1 or Stage 2, and Stage 2 prefix-extends
|
|
17
|
+
* Stage 1. A future Stage 3 ship will not be safe under the fallback; the loader treats a missing
|
|
18
|
+
* `labels` field as "you are loading a pre-v0.4.0 bundle" rather than "unknown stage".
|
|
14
19
|
*/
|
|
15
20
|
/** Coarse component tags trained in Phase 2 Stage 1 (v0.1.0 / v0.2.0). */
|
|
16
21
|
export const STAGE1_COARSE_TAGS = [
|
|
@@ -27,4 +32,36 @@ export const STAGE1_BIO_LABELS = Object.freeze([
|
|
|
27
32
|
"O",
|
|
28
33
|
...STAGE1_COARSE_TAGS.flatMap((tag) => [`B-${tag}`, `I-${tag}`]),
|
|
29
34
|
]);
|
|
35
|
+
/**
|
|
36
|
+
* Fine-grained tags added in Phase 2 Stage 2 (v0.3.0). venue covers organization/POI/landmark
|
|
37
|
+
* names; street + house_number break out the street-address components that Stage 1 collapsed to
|
|
38
|
+
* `O`.
|
|
39
|
+
*/
|
|
40
|
+
export const STAGE2_FINE_TAGS = ["venue", "street", "house_number"];
|
|
41
|
+
/** Stage 2 ships the full coarse + fine set in the order STAGE2_BIO_LABELS is interleaved. */
|
|
42
|
+
export const STAGE2_TAGS = [...STAGE1_COARSE_TAGS, ...STAGE2_FINE_TAGS];
|
|
43
|
+
/**
|
|
44
|
+
* BIO label vocabulary for Stage 2 (v0.3.0) — O + (B-/I- per Stage 2 tag). 1 + 20 = 21 labels.
|
|
45
|
+
*
|
|
46
|
+
* Index parity vs Stage 1: STAGE2_BIO_LABELS[i] === STAGE1_BIO_LABELS[i] for i ∈ [0, 15). Anyone
|
|
47
|
+
* loading a Stage 1 model with this vocabulary still decodes correctly; the tail (15..20) just
|
|
48
|
+
* never gets argmax'd because Stage 1 only emits 15 logits.
|
|
49
|
+
*/
|
|
50
|
+
export const STAGE2_BIO_LABELS = Object.freeze([
|
|
51
|
+
"O",
|
|
52
|
+
...STAGE2_TAGS.flatMap((tag) => [`B-${tag}`, `I-${tag}`]),
|
|
53
|
+
]);
|
|
54
|
+
export const STAGE3_FINE_TAGS = [
|
|
55
|
+
"street_prefix",
|
|
56
|
+
"street_suffix",
|
|
57
|
+
"unit",
|
|
58
|
+
"po_box",
|
|
59
|
+
"intersection_a",
|
|
60
|
+
"intersection_b",
|
|
61
|
+
];
|
|
62
|
+
export const STAGE3_TAGS = [...STAGE2_TAGS, ...STAGE3_FINE_TAGS];
|
|
63
|
+
export const STAGE3_BIO_LABELS = Object.freeze([
|
|
64
|
+
"O",
|
|
65
|
+
...STAGE3_TAGS.flatMap((tag) => [`B-${tag}`, `I-${tag}`]),
|
|
66
|
+
]);
|
|
30
67
|
//# sourceMappingURL=labels.js.map
|
package/out/labels.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"labels.js","sourceRoot":"","sources":["../labels.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"labels.js","sourceRoot":"","sources":["../labels.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAIH,0EAA0E;AAC1E,MAAM,CAAC,MAAM,kBAAkB,GAAG;IACjC,SAAS;IACT,QAAQ;IACR,UAAU;IACV,oBAAoB;IACpB,UAAU;IACV,WAAW;IACX,OAAO;CACE,CAAA;AAEV,yFAAyF;AACzF,MAAM,CAAC,MAAM,iBAAiB,GAAwB,MAAM,CAAC,MAAM,CAAC;IACnE,GAAe;IACf,GAAG,kBAAkB,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,KAAK,GAAG,EAAc,EAAE,KAAK,GAAG,EAAc,CAAC,CAAC;CACxF,CAAC,CAAA;AAEF;;;;GAIG;AACH,MAAM,CAAC,MAAM,gBAAgB,GAAG,CAAC,OAAO,EAAE,QAAQ,EAAE,cAAc,CAAU,CAAA;AAE5E,8FAA8F;AAC9F,MAAM,CAAC,MAAM,WAAW,GAAG,CAAC,GAAG,kBAAkB,EAAE,GAAG,gBAAgB,CAAU,CAAA;AAEhF;;;;;;GAMG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAAwB,MAAM,CAAC,MAAM,CAAC;IACnE,GAAe;IACf,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,KAAK,GAAG,EAAc,EAAE,KAAK,GAAG,EAAc,CAAC,CAAC;CACjF,CAAC,CAAA;AAEF,MAAM,CAAC,MAAM,gBAAgB,GAAG;IAC/B,eAAe;IACf,eAAe;IACf,MAAM;IACN,QAAQ;IACR,gBAAgB;IAChB,gBAAgB;CACP,CAAA;AAEV,MAAM,CAAC,MAAM,WAAW,GAAG,CAAC,GAAG,WAAW,EAAE,GAAG,gBAAgB,CAAU,CAAA;AAEzE,MAAM,CAAC,MAAM,iBAAiB,GAAwB,MAAM,CAAC,MAAM,CAAC;IACnE,GAAe;IACf,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,KAAK,GAAG,EAAc,EAAE,KAAK,GAAG,EAAc,CAAC,CAAC;CACjF,CAAC,CAAA"}
|
package/out/onnx-runner.d.ts
CHANGED
|
@@ -50,7 +50,14 @@ export declare class OnnxRunner {
|
|
|
50
50
|
* back to the actual input length.
|
|
51
51
|
*
|
|
52
52
|
* @param tokenIds The id sequence produced by the tokenizer (no special tokens added).
|
|
53
|
+
* @param anchor Optional postcode-anchor channel (#239/#240). When supplied (only for anchor
|
|
54
|
+
* models — exported with the `anchor_features`/`anchor_confidence` inputs), per-piece features
|
|
55
|
+
* `(seqLen × dim)` + confidence `(seqLen,)` are fed, zero-padded to `fixedSeqLen`. Omit for
|
|
56
|
+
* plain models, whose ONNX has no anchor inputs.
|
|
53
57
|
*/
|
|
54
|
-
infer(tokenIds: number[]
|
|
58
|
+
infer(tokenIds: number[], anchor?: {
|
|
59
|
+
features: ReadonlyArray<ReadonlyArray<number>>;
|
|
60
|
+
confidence: ReadonlyArray<number>;
|
|
61
|
+
}): Promise<InferResult>;
|
|
55
62
|
}
|
|
56
63
|
//# sourceMappingURL=onnx-runner.d.ts.map
|
package/out/onnx-runner.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"onnx-runner.d.ts","sourceRoot":"","sources":["../onnx-runner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;
|
|
1
|
+
{"version":3,"file":"onnx-runner.d.ts","sourceRoot":"","sources":["../onnx-runner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAOH,MAAM,WAAW,cAAc;IAC9B,wEAAwE;IACxE,MAAM,CAAC,EAAE,OAAO,CAAA;IAChB;;;;;OAKG;IACH,WAAW,CAAC,EAAE,MAAM,CAAA;CACpB;AAED,8FAA8F;AAC9F,eAAO,MAAM,qBAAqB,MAAM,CAAA;AAExC,MAAM,WAAW,WAAW;IAC3B,2EAA2E;IAC3E,MAAM,EAAE,MAAM,EAAE,EAAE,CAAA;IAClB,oEAAoE;IACpE,SAAS,EAAE,MAAM,CAAA;CACjB;AAED,qBAAa,UAAU;IAMrB,OAAO,CAAC,QAAQ,CAAC,SAAS;IAC1B,OAAO,CAAC,QAAQ,CAAC,UAAU;IAN5B,OAAO,CAAC,OAAO,CAAoC;IACnD,OAAO,CAAC,WAAW,CAA6C;IAChE,SAAgB,WAAW,EAAE,MAAM,CAAA;IAEnC,OAAO;IAQP,oEAAoE;WACvD,MAAM,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,GAAE,cAAmB,GAAG,OAAO,CAAC,UAAU,CAAC;IAMtF,6CAA6C;WAChC,SAAS,CAAC,UAAU,EAAE,UAAU,EAAE,IAAI,GAAE,cAAmB,GAAG,OAAO,CAAC,UAAU,CAAC;YAMhF,aAAa;IAgB3B;;;;;;;;;;;OAWG;IACG,KAAK,CACV,QAAQ,EAAE,MAAM,EAAE,EAClB,MAAM,CAAC,EAAE;QAAE,QAAQ,EAAE,aAAa,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC;QAAC,UAAU,EAAE,aAAa,CAAC,MAAM,CAAC,CAAA;KAAE,GAC5F,OAAO,CAAC,WAAW,CAAC;CAqDvB"}
|
package/out/onnx-runner.js
CHANGED
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
*/
|
|
15
15
|
import { promises as fs } from "node:fs";
|
|
16
16
|
import ort from "onnxruntime-node";
|
|
17
|
+
import { ANCHOR_FEATURE_DIM } from "./anchor-inference.js";
|
|
17
18
|
/** Default sequence length for v0.1.0 / v0.2.0 (BertConfig max_position_embeddings = 128). */
|
|
18
19
|
export const DEFAULT_FIXED_SEQ_LEN = 128;
|
|
19
20
|
export class OnnxRunner {
|
|
@@ -64,8 +65,12 @@ export class OnnxRunner {
|
|
|
64
65
|
* back to the actual input length.
|
|
65
66
|
*
|
|
66
67
|
* @param tokenIds The id sequence produced by the tokenizer (no special tokens added).
|
|
68
|
+
* @param anchor Optional postcode-anchor channel (#239/#240). When supplied (only for anchor
|
|
69
|
+
* models — exported with the `anchor_features`/`anchor_confidence` inputs), per-piece features
|
|
70
|
+
* `(seqLen × dim)` + confidence `(seqLen,)` are fed, zero-padded to `fixedSeqLen`. Omit for
|
|
71
|
+
* plain models, whose ONNX has no anchor inputs.
|
|
67
72
|
*/
|
|
68
|
-
async infer(tokenIds) {
|
|
73
|
+
async infer(tokenIds, anchor) {
|
|
69
74
|
const session = await this.ensureSession();
|
|
70
75
|
const seqLen = Math.min(tokenIds.length, this.fixedSeqLen);
|
|
71
76
|
const padded = new BigInt64Array(this.fixedSeqLen);
|
|
@@ -78,6 +83,31 @@ export class OnnxRunner {
|
|
|
78
83
|
input_ids: new ort.Tensor("int64", padded, [1, this.fixedSeqLen]),
|
|
79
84
|
attention_mask: new ort.Tensor("int64", mask, [1, this.fixedSeqLen]),
|
|
80
85
|
};
|
|
86
|
+
if (anchor) {
|
|
87
|
+
const dim = anchor.features[0]?.length ?? 0;
|
|
88
|
+
const af = new Float32Array(this.fixedSeqLen * dim);
|
|
89
|
+
const ac = new Float32Array(this.fixedSeqLen);
|
|
90
|
+
for (let i = 0; i < seqLen; i++) {
|
|
91
|
+
ac[i] = anchor.confidence[i] ?? 0;
|
|
92
|
+
const row = anchor.features[i];
|
|
93
|
+
if (row)
|
|
94
|
+
for (let d = 0; d < dim; d++)
|
|
95
|
+
af[i * dim + d] = row[d] ?? 0;
|
|
96
|
+
}
|
|
97
|
+
feeds.anchor_features = new ort.Tensor("float32", af, [1, this.fixedSeqLen, dim]);
|
|
98
|
+
feeds.anchor_confidence = new ort.Tensor("float32", ac, [1, this.fixedSeqLen]);
|
|
99
|
+
}
|
|
100
|
+
else if (session.inputNames.includes("anchor_features")) {
|
|
101
|
+
// Anchor-trained model (its ONNX declares the anchor inputs as mandatory) but no anchor data
|
|
102
|
+
// was supplied: feed zeros. That's the `confidence = 0` identity — the model's anchor-off
|
|
103
|
+
// behavior. Without it the session throws on the missing required inputs.
|
|
104
|
+
feeds.anchor_features = new ort.Tensor("float32", new Float32Array(this.fixedSeqLen * ANCHOR_FEATURE_DIM), [
|
|
105
|
+
1,
|
|
106
|
+
this.fixedSeqLen,
|
|
107
|
+
ANCHOR_FEATURE_DIM,
|
|
108
|
+
]);
|
|
109
|
+
feeds.anchor_confidence = new ort.Tensor("float32", new Float32Array(this.fixedSeqLen), [1, this.fixedSeqLen]);
|
|
110
|
+
}
|
|
81
111
|
const output = await session.run(feeds);
|
|
82
112
|
const logitsTensor = output.logits;
|
|
83
113
|
if (!logitsTensor)
|
package/out/onnx-runner.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"onnx-runner.js","sourceRoot":"","sources":["../onnx-runner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,EAAE,QAAQ,IAAI,EAAE,EAAE,MAAM,SAAS,CAAA;AACxC,OAAO,GAAG,MAAM,kBAAkB,CAAA;
|
|
1
|
+
{"version":3,"file":"onnx-runner.js","sourceRoot":"","sources":["../onnx-runner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,EAAE,QAAQ,IAAI,EAAE,EAAE,MAAM,SAAS,CAAA;AACxC,OAAO,GAAG,MAAM,kBAAkB,CAAA;AAElC,OAAO,EAAE,kBAAkB,EAAE,MAAM,uBAAuB,CAAA;AAc1D,8FAA8F;AAC9F,MAAM,CAAC,MAAM,qBAAqB,GAAG,GAAG,CAAA;AASxC,MAAM,OAAO,UAAU;IAMJ;IACA;IANV,OAAO,GAAgC,IAAI,CAAA;IAC3C,WAAW,GAAyC,IAAI,CAAA;IAChD,WAAW,CAAQ;IAEnC,YACkB,SAAiB,EACjB,UAA6B,EAC9C,IAAoB;QAFH,cAAS,GAAT,SAAS,CAAQ;QACjB,eAAU,GAAV,UAAU,CAAmB;QAG9C,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,WAAW,IAAI,qBAAqB,CAAA;IAC7D,CAAC;IAED,oEAAoE;IACpE,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,SAAiB,EAAE,OAAuB,EAAE;QAC/D,MAAM,MAAM,GAAG,IAAI,UAAU,CAAC,SAAS,EAAE,IAAI,EAAE,IAAI,CAAC,CAAA;QACpD,IAAI,IAAI,CAAC,MAAM;YAAE,MAAM,MAAM,CAAC,aAAa,EAAE,CAAA;QAC7C,OAAO,MAAM,CAAA;IACd,CAAC;IAED,6CAA6C;IAC7C,MAAM,CAAC,KAAK,CAAC,SAAS,CAAC,UAAsB,EAAE,OAAuB,EAAE;QACvE,MAAM,MAAM,GAAG,IAAI,UAAU,CAAC,SAAS,EAAE,UAAU,EAAE,IAAI,CAAC,CAAA;QAC1D,IAAI,IAAI,CAAC,MAAM;YAAE,MAAM,MAAM,CAAC,aAAa,EAAE,CAAA;QAC7C,OAAO,MAAM,CAAA;IACd,CAAC;IAEO,KAAK,CAAC,aAAa;QAC1B,IAAI,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC,OAAO,CAAA;QACrC,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;YACvB,IAAI,CAAC,WAAW,GAAG,CAAC,KAAK,IAAI,EAAE;gBAC9B,MAAM,KAAK,GAAG,IAAI,CAAC,UAAU,IAAI,IAAI,UAAU,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAA;gBAClF,MAAM,OAAO,GAAG,MAAM,GAAG,CAAC,gBAAgB,CAAC,MAAM,CAAC,KAAK,EAAE;oBACxD,kBAAkB,EAAE,CAAC,KAAK,CAAC;oBAC3B,sBAAsB,EAAE,KAAK;iBAC7B,CAAC,CAAA;gBACF,IAAI,CAAC,OAAO,GAAG,OAAO,CAAA;gBACtB,OAAO,OAAO,CAAA;YACf,CAAC,CAAC,EAAE,CAAA;QACL,CAAC;QACD,OAAO,IAAI,CAAC,WAAW,CAAA;IACxB,CAAC;IAED;;;;;;;;;;;OAWG;IACH,KAAK,CAAC,KAAK,CACV,QAAkB,EAClB,MAA8F;QAE9F,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,aAAa,EAAE,CAAA;QAC1C,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,MAAM,EAAE,IAAI,CAAC,WAAW,CAAC,CAAA;QAC1D,MAAM,MAAM,GAAG,IAAI,aAAa,CAAC,IAAI,CAAC,WAAW,CAAC,CAAA;QAClD,MAAM,IAAI,GAAG,IAAI,aAAa,CAAC,IAAI,CAAC,WAAW,CAAC,CAAA;QAChD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACjC,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAE,CAAC,CAAA;YAChC,IAAI,CAAC,CAAC,CAAC,GAAG,EAAE,CAAA;QACb,CAAC;QAED,MAAM,KAAK,GAA+B;YACzC,SAAS,EAAE,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;YACjE,cAAc,EAAE,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;SACpE,CAAA;QAED,IAAI,MAAM,EAAE,CAAC;YACZ,MAAM,GAAG,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,MAAM,IAAI,CAAC,CAAA;YAC3C,MAAM,EAAE,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,WAAW,GAAG,GAAG,CAAC,CAAA;YACnD,MAAM,EAAE,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,WAAW,CAAC,CAAA;YAC7C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACjC,EAAE,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,CAAA;gBACjC,MAAM,GAAG,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAA;gBAC9B,IAAI,GAAG;oBAAE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE;wBAAE,EAAE,CAAC,CAAC,GAAG,GAAG,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAA;YACrE,CAAC;YACD,KAAK,CAAC,eAAe,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,SAAS,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC,CAAA;YACjF,KAAK,CAAC,iBAAiB,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,SAAS,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,CAAA;QAC/E,CAAC;aAAM,IAAI,OAAO,CAAC,UAAU,CAAC,QAAQ,CAAC,iBAAiB,CAAC,EAAE,CAAC;YAC3D,6FAA6F;YAC7F,0FAA0F;YAC1F,0EAA0E;YAC1E,KAAK,CAAC,eAAe,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,SAAS,EAAE,IAAI,YAAY,CAAC,IAAI,CAAC,WAAW,GAAG,kBAAkB,CAAC,EAAE;gBAC1G,CAAC;gBACD,IAAI,CAAC,WAAW;gBAChB,kBAAkB;aAClB,CAAC,CAAA;YACF,KAAK,CAAC,iBAAiB,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,SAAS,EAAE,IAAI,YAAY,CAAC,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,CAAA;QAC/G,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAA;QACvC,MAAM,YAAY,GAAG,MAAM,CAAC,MAAM,CAAA;QAClC,IAAI,CAAC,YAAY;YAAE,MAAM,IAAI,KAAK,CAAC,6CAA6C,CAAC,CAAA;QACjF,MAAM,IAAI,GAAG,YAAY,CAAC,IAAoB,CAAA;QAC9C,MAAM,CAAC,EAAE,AAAD,EAAG,SAAS,CAAC,GAAG,YAAY,CAAC,IAAyC,CAAA;QAE9E,MAAM,MAAM,GAAe,EAAE,CAAA;QAC7B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACjC,MAAM,GAAG,GAAa,IAAI,KAAK,CAAC,SAAS,CAAC,CAAA;YAC1C,MAAM,IAAI,GAAG,CAAC,GAAG,SAAS,CAAA;YAC1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE;gBAAE,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,GAAG,CAAC,CAAE,CAAA;YAC5D,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;QACjB,CAAC;QACD,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,CAAA;IAC7B,CAAC;CACD"}
|