@mailwoman/corpus 3.0.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/out/src/adapters/ban/adapter.d.ts.map +1 -1
  2. package/out/src/adapters/ban/adapter.js +6 -2
  3. package/out/src/adapters/ban/adapter.js.map +1 -1
  4. package/out/src/adapters/ban/street-decompose.d.ts +28 -0
  5. package/out/src/adapters/ban/street-decompose.d.ts.map +1 -0
  6. package/out/src/adapters/ban/street-decompose.js +78 -0
  7. package/out/src/adapters/ban/street-decompose.js.map +1 -0
  8. package/out/src/adapters/synth-po-box/adapter.d.ts +48 -0
  9. package/out/src/adapters/synth-po-box/adapter.d.ts.map +1 -0
  10. package/out/src/adapters/synth-po-box/adapter.js +101 -0
  11. package/out/src/adapters/synth-po-box/adapter.js.map +1 -0
  12. package/out/src/adapters/tiger/adapter.d.ts.map +1 -1
  13. package/out/src/adapters/tiger/adapter.js +9 -3
  14. package/out/src/adapters/tiger/adapter.js.map +1 -1
  15. package/out/src/adapters/tiger/street-decompose.d.ts +30 -0
  16. package/out/src/adapters/tiger/street-decompose.d.ts.map +1 -0
  17. package/out/src/adapters/tiger/street-decompose.js +99 -0
  18. package/out/src/adapters/tiger/street-decompose.js.map +1 -0
  19. package/out/src/adapters/usgov-nad/adapter.d.ts.map +1 -1
  20. package/out/src/adapters/usgov-nad/adapter.js +31 -10
  21. package/out/src/adapters/usgov-nad/adapter.js.map +1 -1
  22. package/out/src/adapters/wof-admin-jp/adapter.d.ts +58 -0
  23. package/out/src/adapters/wof-admin-jp/adapter.d.ts.map +1 -0
  24. package/out/src/adapters/wof-admin-jp/adapter.js +129 -0
  25. package/out/src/adapters/wof-admin-jp/adapter.js.map +1 -0
  26. package/out/src/index.d.ts +6 -0
  27. package/out/src/index.d.ts.map +1 -1
  28. package/out/src/index.js +6 -0
  29. package/out/src/index.js.map +1 -1
  30. package/out/src/synthesize-german.d.ts +75 -0
  31. package/out/src/synthesize-german.d.ts.map +1 -0
  32. package/out/src/synthesize-german.js +116 -0
  33. package/out/src/synthesize-german.js.map +1 -0
  34. package/out/src/synthesize-house-venue.d.ts +57 -0
  35. package/out/src/synthesize-house-venue.d.ts.map +1 -0
  36. package/out/src/synthesize-house-venue.js +147 -0
  37. package/out/src/synthesize-house-venue.js.map +1 -0
  38. package/out/src/synthesize-intersection.d.ts +48 -0
  39. package/out/src/synthesize-intersection.d.ts.map +1 -0
  40. package/out/src/synthesize-intersection.js +138 -0
  41. package/out/src/synthesize-intersection.js.map +1 -0
  42. package/out/src/synthesize-no-street.d.ts +70 -0
  43. package/out/src/synthesize-no-street.d.ts.map +1 -0
  44. package/out/src/synthesize-no-street.js +279 -0
  45. package/out/src/synthesize-no-street.js.map +1 -0
  46. package/out/src/synthesize-po-box.d.ts +75 -0
  47. package/out/src/synthesize-po-box.d.ts.map +1 -0
  48. package/out/src/synthesize-po-box.js +186 -0
  49. package/out/src/synthesize-po-box.js.map +1 -0
  50. package/out/src/synthesize-street.d.ts +53 -0
  51. package/out/src/synthesize-street.d.ts.map +1 -0
  52. package/out/src/synthesize-street.js +212 -0
  53. package/out/src/synthesize-street.js.map +1 -0
  54. package/out/src/synthesize.js +1 -1
  55. package/out/src/synthesize.js.map +1 -1
  56. package/package.json +3 -2
@@ -1 +1 @@
1
- {"version":3,"file":"adapter.js","sourceRoot":"","sources":["../../../../src/adapters/usgov-nad/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAEH,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAA;AAC1C,OAAO,EAAE,OAAO,EAAE,MAAM,kBAAkB,CAAA;AAC1C,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAA;AAChC,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAA;AAE/C,OAAO,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAA;AAGrD,MAAM,CAAC,MAAM,oBAAoB,GAAG,WAAW,CAAA;AAC/C,MAAM,CAAC,MAAM,yBAAyB,GAAG,eAAe,CAAA;AAgDxD,MAAM,aAAa,GAAG,IAAI,GAAG,CAAC;IAC7B,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,+BAA+B;IAC/B,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;CACJ,CAAC,CAAA;AAEF,SAAS,QAAQ,CAAC,GAAG,MAAwC;IAC5D,KAAK,MAAM,CAAC,IAAI,MAAM,EAAE,CAAC;QACxB,MAAM,OAAO,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,CAAA;QAC3C,IAAI,OAAO;YAAE,OAAO,OAAO,CAAA;IAC5B,CAAC;IACD,OAAO,SAAS,CAAA;AACjB,CAAC;AAED,SAAS,kBAAkB,CAAC,CAAY;IACvC,MAAM,IAAI,GAAG,CAAC,CAAC,CAAC,UAAU,IAAI,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,CAAA;IACnD,IAAI,IAAI;QAAE,OAAO,IAAI,CAAA;IACrB,MAAM,GAAG,GAAG,CAAC,CAAC,UAAU,IAAI,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,IAAI,EAAE,CAAA;IACnE,IAAI,CAAC,GAAG;QAAE,OAAO,SAAS,CAAA;IAC1B,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,UAAU,IAAI,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,CAAA;IAClD,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,UAAU,IAAI,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,CAAA;IAClD,OAAO,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,IAAI,SAAS,CAAA;AACrE,CAAC;AAED,SAAS,aAAa,CAAC,CAAY;IAClC,MAAM,IAAI,GAAG,CAAC,CAAC,CAAC,UAAU,IAAI,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,CAAA;IACnD,IAAI,IAAI;QAAE,OAAO,IAAI,CAAA;IACrB,MAAM,KAAK,GAAG,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,SAAS,CAAC;SAClH,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,CAAC;SACvC,MAAM,CAAC,OAAO,CAAC,CAAA;IACjB,OAAO,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAA;AAClD,CAAC;AAED,SAAS,eAAe,CAAC,CAAY;IACpC,OAAO,QAAQ,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC,UAAU,CAAC,CAAA;AACrE,CAAC;AAED,SAAS,eAAe,CAAC,CAAY;IACpC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,QAAQ,IAAI,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,CAAA;IAChD,IAAI,CAAC,GAAG;QAAE,OAAO,SAAS,CAAA;IAC1B,MAAM,KAAK,GAAG,CAAC,CAAC,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,CAAA;IAChD,OAAO,KAAK,CAAC,CAAC,CAAC,GAAG,GAAG,IAAI,KAAK,EAAE,CAAC,CAAC,CAAC,GAAG,CAAA;AACvC,CAAC;AAED,SAAS,UAAU,CAAC,KAOnB;IACA,MAAM,UAAU,GAAG,CAAC,KAAK,CAAC,WAAW,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;IACrF,MAAM,IAAI,GAAG,GAAG,KAAK,CAAC,QAAQ,KAAK,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAA;IACnE,OAAO,CAAC,KAAK,CAAC,KAAK,EAAE,UAAU,IAAI,SAAS,EAAE,IAAI,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;AAC/E,CAAC;AAED,MAAM,UAAU,qBAAqB;IACpC,OAAO;QACN,EAAE,EAAE,oBAAoB;QACxB,cAAc,EAAE,yBAAyB;QACzC,WAAW,EACV,6GAA6G;QAE9G,KAAK,CAAC,CAAC,IAAI,CAAC,IAAoB;YAC/B,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,EAAE,CAAC;gBAC3C,MAAM,IAAI,KAAK,CAAC,qDAAqD,IAAI,CAAC,OAAO,EAAE,CAAC,CAAA;YACrF,CAAC;YAED,qFAAqF;YACrF,mFAAmF;YACnF,2DAA2D;YAC3D,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;YAC7C,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,EAAE,CAAA;YAElE,IAAI,OAAO,GAAG,CAAC,CAAA;YACf,KAAK,EAAE,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;gBACnC,IAAI,IAAI,CAAC,MAAM,EAAE,OAAO;oBAAE,MAAK;gBAC/B,MAAM,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,KAAK,CAAC,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAA;gBAClF,MAAM,EAAE,GAAG,eAAe,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,CAAC,CAAA;gBAClE,IAAI,CAAC;oBACJ,IAAI,KAAK,EAAE,MAAM,IAAI,IAAI,EAAE,EAAE,CAAC;wBAC7B,IAAI,IAAI,CAAC,MAAM,EAAE,OAAO;4BAAE,MAAM,KAAK,CAAA;wBACrC,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK;4BAAE,MAAM,KAAK,CAAA;wBAClE,IAAI,CAAC,IAAI;4BAAE,SAAQ;wBAEnB,IAAI,MAAiB,CAAA;wBACrB,IAAI,CAAC;4BACJ,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAc,CAAA;wBACvC,CAAC;wBAAC,MAAM,CAAC;4BACR,SAAQ,CAAC,iCAAiC;wBAC3C,CAAC;wBAED,MAAM,KAAK,GAAG,CAAC,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAA;wBAClE,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,KAAK,CAAC;4BAAE,SAAQ;wBAEvC,MAAM,QAAQ,GAAG,eAAe,CAAC,MAAM,CAAC,CAAA;wBACxC,IAAI,CAAC,QAAQ;4BAAE,SAAQ;wBAEvB,MAAM,QAAQ,GAAG,eAAe,CAAC,MAAM,CAAC,CAAA;wBACxC,IAAI,CAAC,QAAQ;4BAAE,SAAQ;wBAEvB,MAAM,MAAM,GAAG,aAAa,CAAC,MAAM,CAAC,CAAA;wBACpC,MAAM,WAAW,GAAG,kBAAkB,CAAC,MAAM,CAAC,CAAA;wBAC9C,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC,UAAU,CAAC,CAAA;wBAEzC,MAAM,UAAU,GAA+B;4BAC9C,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;4BAC3B,GAAG,CAAC,WAAW,CAAC,CAAC,CAAC,EAAE,YAAY,EAAE,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;4BACrD,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;4BAC7B,QAAQ;4BACR,MAAM,EAAE,KAAK;4BACb,QAAQ;yBACR,CAAA;wBAED,MAAM,GAAG,GAAG,UAAU,CAAC,EAAE,KAAK,EAAE,WAAW,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAA;wBACzF,IAAI,CAAC,GAAG;4BAAE,SAAQ;wBAElB,MAAM,OAAO,GAAG,mBAAmB,CAAC,UAAU,EAAE,GAAG,CAAC,CAAA;wBACpD,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,IAAI,CAAC;4BAAE,SAAQ;wBAE9C,MAAM,QAAQ,GAAG,MAAM,CAAC,IAAI;4BAC3B,CAAC,CAAC,GAAG,oBAAoB,IAAI,MAAM,CAAC,IAAI,EAAE;4BAC1C,CAAC,CAAC,GAAG,oBAAoB,IAAI,MAAM,CAAC,QAAQ,IAAI,GAAG,KAAK,IAAI,OAAO,EAAE,EAAE,CAAA;wBAExE,MAAM;4BACL,GAAG;4BACH,UAAU,EAAE,OAAO;4BACnB,OAAO,EAAE,IAAI;4BACb,MAAM,EAAE,OAAO;4BACf,MAAM,EAAE,oBAAoB;4BAC5B,SAAS,EAAE,QAAQ;4BACnB,cAAc,EAAE,EAAE;4BAClB,OAAO,EAAE,yBAAyB;yBAClC,CAAA;wBACD,OAAO,EAAE,CAAA;oBACV,CAAC;gBACF,CAAC;wBAAS,CAAC;oBACV,EAAE,CAAC,KAAK,EAAE,CAAA;oBACV,MAAM,CAAC,OAAO,EAAE,CAAA;gBACjB,CAAC;YACF,CAAC;QACF,CAAC;KACD,CAAA;AACF,CAAC;AAED,MAAM,CAAC,MAAM,eAAe,GAAG,qBAAqB,EAAE,CAAA"}
1
+ {"version":3,"file":"adapter.js","sourceRoot":"","sources":["../../../../src/adapters/usgov-nad/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAEH,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAA;AAC1C,OAAO,EAAE,OAAO,EAAE,MAAM,kBAAkB,CAAA;AAC1C,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAA;AAChC,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAA;AAE/C,OAAO,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAA;AAGrD,MAAM,CAAC,MAAM,oBAAoB,GAAG,WAAW,CAAA;AAC/C,MAAM,CAAC,MAAM,yBAAyB,GAAG,eAAe,CAAA;AAgDxD,MAAM,aAAa,GAAG,IAAI,GAAG,CAAC;IAC7B,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,+BAA+B;IAC/B,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;CACJ,CAAC,CAAA;AAEF,SAAS,QAAQ,CAAC,GAAG,MAAwC;IAC5D,KAAK,MAAM,CAAC,IAAI,MAAM,EAAE,CAAC;QACxB,MAAM,OAAO,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,CAAA;QAC3C,IAAI,OAAO;YAAE,OAAO,OAAO,CAAA;IAC5B,CAAC;IACD,OAAO,SAAS,CAAA;AACjB,CAAC;AAED,SAAS,kBAAkB,CAAC,CAAY;IACvC,MAAM,IAAI,GAAG,CAAC,CAAC,CAAC,UAAU,IAAI,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,CAAA;IACnD,IAAI,IAAI;QAAE,OAAO,IAAI,CAAA;IACrB,MAAM,GAAG,GAAG,CAAC,CAAC,UAAU,IAAI,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,IAAI,EAAE,CAAA;IACnE,IAAI,CAAC,GAAG;QAAE,OAAO,SAAS,CAAA;IAC1B,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,UAAU,IAAI,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,CAAA;IAClD,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,UAAU,IAAI,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,CAAA;IAClD,OAAO,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,IAAI,SAAS,CAAA;AACrE,CAAC;AASD,SAAS,kBAAkB,CAAC,CAAY;IACvC,MAAM,IAAI,GAAG,CAAC,CAAC,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,CAAA;IAChD,IAAI,IAAI,EAAE,CAAC;QACV,MAAM,MAAM,GAAG,CAAC,CAAC,CAAC,SAAS,IAAI,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,CAAA;QACpD,MAAM,MAAM,GAAG,CAAC,CAAC,CAAC,SAAS,IAAI,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,CAAA;QACpD,MAAM,MAAM,GAAG,CAAC,CAAC,CAAC,SAAS,IAAI,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,CAAA;QACpD,MAAM,MAAM,GAAG,CAAC,CAAC,CAAC,SAAS,IAAI,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,CAAA;QACpD,MAAM,MAAM,GAAG,CAAC,CAAC,CAAC,SAAS,IAAI,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,CAAA;QACpD,MAAM,MAAM,GAAG,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,SAAS,CAAA;QAC9E,MAAM,MAAM,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,SAAS,CAAA;QACtE,MAAM,IAAI,GAAG,CAAC,MAAM,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;QAC7D,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,CAAA;IAC9C,CAAC;IACD,MAAM,IAAI,GAAG,CAAC,CAAC,CAAC,UAAU,IAAI,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,CAAA;IACnD,IAAI,IAAI;QAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,CAAA;IACvC,OAAO,SAAS,CAAA;AACjB,CAAC;AAED,SAAS,eAAe,CAAC,CAAY;IACpC,OAAO,QAAQ,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC,UAAU,CAAC,CAAA;AACrE,CAAC;AAED,SAAS,eAAe,CAAC,CAAY;IACpC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,QAAQ,IAAI,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,CAAA;IAChD,IAAI,CAAC,GAAG;QAAE,OAAO,SAAS,CAAA;IAC1B,MAAM,KAAK,GAAG,CAAC,CAAC,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,CAAA;IAChD,OAAO,KAAK,CAAC,CAAC,CAAC,GAAG,GAAG,IAAI,KAAK,EAAE,CAAC,CAAC,CAAC,GAAG,CAAA;AACvC,CAAC;AAED,SAAS,UAAU,CAAC,KAQnB;IACA,MAAM,UAAU,GAAG,CAAC,KAAK,CAAC,WAAW,EAAE,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;IACjG,MAAM,IAAI,GAAG,GAAG,KAAK,CAAC,QAAQ,KAAK,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAA;IACnE,OAAO,CAAC,KAAK,CAAC,KAAK,EAAE,UAAU,IAAI,SAAS,EAAE,IAAI,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;AAC/E,CAAC;AAED,MAAM,UAAU,qBAAqB;IACpC,OAAO;QACN,EAAE,EAAE,oBAAoB;QACxB,cAAc,EAAE,yBAAyB;QACzC,WAAW,EACV,6GAA6G;QAE9G,KAAK,CAAC,CAAC,IAAI,CAAC,IAAoB;YAC/B,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,EAAE,CAAC;gBAC3C,MAAM,IAAI,KAAK,CAAC,qDAAqD,IAAI,CAAC,OAAO,EAAE,CAAC,CAAA;YACrF,CAAC;YAED,qFAAqF;YACrF,mFAAmF;YACnF,2DAA2D;YAC3D,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;YAC7C,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,EAAE,CAAA;YAElE,IAAI,OAAO,GAAG,CAAC,CAAA;YACf,KAAK,EAAE,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;gBACnC,IAAI,IAAI,CAAC,MAAM,EAAE,OAAO;oBAAE,MAAK;gBAC/B,MAAM,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,KAAK,CAAC,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAA;gBAClF,MAAM,EAAE,GAAG,eAAe,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,CAAC,CAAA;gBAClE,IAAI,CAAC;oBACJ,IAAI,KAAK,EAAE,MAAM,IAAI,IAAI,EAAE,EAAE,CAAC;wBAC7B,IAAI,IAAI,CAAC,MAAM,EAAE,OAAO;4BAAE,MAAM,KAAK,CAAA;wBACrC,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK;4BAAE,MAAM,KAAK,CAAA;wBAClE,IAAI,CAAC,IAAI;4BAAE,SAAQ;wBAEnB,IAAI,MAAiB,CAAA;wBACrB,IAAI,CAAC;4BACJ,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAc,CAAA;wBACvC,CAAC;wBAAC,MAAM,CAAC;4BACR,SAAQ,CAAC,iCAAiC;wBAC3C,CAAC;wBAED,MAAM,KAAK,GAAG,CAAC,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAA;wBAClE,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,KAAK,CAAC;4BAAE,SAAQ;wBAEvC,MAAM,QAAQ,GAAG,eAAe,CAAC,MAAM,CAAC,CAAA;wBACxC,IAAI,CAAC,QAAQ;4BAAE,SAAQ;wBAEvB,MAAM,QAAQ,GAAG,eAAe,CAAC,MAAM,CAAC,CAAA;wBACxC,IAAI,CAAC,QAAQ;4BAAE,SAAQ;wBAEvB,MAAM,UAAU,GAAG,kBAAkB,CAAC,MAAM,CAAC,CAAA;wBAC7C,MAAM,WAAW,GAAG,kBAAkB,CAAC,MAAM,CAAC,CAAA;wBAC9C,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC,UAAU,CAAC,CAAA;wBACzC,MAAM,IAAI,GAAG,QAAQ,CAAC,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,QAAQ,EAAE,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,IAAI,CAAC,CAAA;wBAE9E,MAAM,UAAU,GAA+B;4BAC9C,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;4BAC3B,GAAG,CAAC,WAAW,CAAC,CAAC,CAAC,EAAE,YAAY,EAAE,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;4BACrD,GAAG,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC,CAAC,EAAE,aAAa,EAAE,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;4BACnE,GAAG,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;4BAC5D,GAAG,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC,CAAC,EAAE,aAAa,EAAE,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;4BACnE,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;4BACzB,QAAQ;4BACR,MAAM,EAAE,KAAK;4BACb,QAAQ;yBACR,CAAA;wBAED,MAAM,GAAG,GAAG,UAAU,CAAC;4BACtB,KAAK;4BACL,WAAW;4BACX,MAAM,EAAE,UAAU,EAAE,IAAI;4BACxB,IAAI;4BACJ,QAAQ;4BACR,MAAM,EAAE,KAAK;4BACb,QAAQ;yBACR,CAAC,CAAA;wBACF,IAAI,CAAC,GAAG;4BAAE,SAAQ;wBAElB,MAAM,OAAO,GAAG,mBAAmB,CAAC,UAAU,EAAE,GAAG,CAAC,CAAA;wBACpD,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,IAAI,CAAC;4BAAE,SAAQ;wBAE9C,MAAM,QAAQ,GAAG,MAAM,CAAC,IAAI;4BAC3B,CAAC,CAAC,GAAG,oBAAoB,IAAI,MAAM,CAAC,IAAI,EAAE;4BAC1C,CAAC,CAAC,GAAG,oBAAoB,IAAI,MAAM,CAAC,QAAQ,IAAI,GAAG,KAAK,IAAI,OAAO,EAAE,EAAE,CAAA;wBAExE,MAAM;4BACL,GAAG;4BACH,UAAU,EAAE,OAAO;4BACnB,OAAO,EAAE,IAAI;4BACb,MAAM,EAAE,OAAO;4BACf,MAAM,EAAE,oBAAoB;4BAC5B,SAAS,EAAE,QAAQ;4BACnB,cAAc,EAAE,EAAE;4BAClB,OAAO,EAAE,yBAAyB;yBAClC,CAAA;wBACD,OAAO,EAAE,CAAA;oBACV,CAAC;gBACF,CAAC;wBAAS,CAAC;oBACV,EAAE,CAAC,KAAK,EAAE,CAAA;oBACV,MAAM,CAAC,OAAO,EAAE,CAAA;gBACjB,CAAC;YACF,CAAC;QACF,CAAC;KACD,CAAA;AACF,CAAC;AAED,MAAM,CAAC,MAAM,eAAe,GAAG,qBAAqB,EAAE,CAAA"}
@@ -0,0 +1,58 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * `wof-admin-jp`: Japanese admin-hierarchy adapter.
7
+ *
8
+ * PROTOTYPE — not yet wired into the Stage 3 training corpus. Demonstrates the JP address shape and
9
+ * synthesizes BIO-labeled training rows from the global WOF SQLite.
10
+ *
11
+ * JP addresses differ from US/EU in three ways:
12
+ *
13
+ * 1. **Reversed ordering** — region → county → locality → block → house_number "東京都世田谷区南町1-2-3" not
14
+ * "1-2-3 Minamicho, Setagaya, Tokyo"
15
+ * 2. **No street names** — most JP addresses use a block/sub-block grid system. The "neighbourhood"
16
+ * placetype (丁目) is the closest analog to a street but is actually a grid cell.
17
+ * 3. **Postcode-first convention** — addresses are often prefixed with `〒NNN-NNNN`.
18
+ *
19
+ * Schema mapping to ComponentTags (subset of Stage 3 + JP-specific Phase 6 tags):
20
+ *
21
+ * | JP concept | WOF placetype | ComponentTag (Phase 6) |
22
+ * |-----------------|--------------------|-----------------------| | 都道府県 (prefecture) | region |
23
+ * region (or prefecture) | | 市区町村 (city/ward) | county/locality | locality (or municipality) | |
24
+ * 丁目 (chome) | neighbourhood | block (Phase 6 tag) | | 番地 (banchi) | (synth) | sub_block | | 号
25
+ * (gō) | (synth) | house_number | | 〒 (postcode prefix) | — | postcode |
26
+ *
27
+ * This adapter currently emits only the admin chain (region → locality → block). House numbers and
28
+ * sub-blocks require a different data source (JP postcode database or real-world address dumps
29
+ * from MLIT/JapanPost).
30
+ */
31
+ import type { CanonicalRow, CorpusAdapter } from "../../types.js";
32
+ export declare const WOF_ADMIN_JP_ADAPTER_ID = "wof-admin-jp";
33
+ interface PlaceRow {
34
+ id: number;
35
+ name: string;
36
+ placetype: string;
37
+ parent_id: number;
38
+ country: string;
39
+ }
40
+ /**
41
+ * Synthesize a JP address from a hierarchy chain.
42
+ *
43
+ * Format: `〒<postcode>? <region><locality><neighbourhood>?`
44
+ *
45
+ * No house numbers yet — needs MLIT data.
46
+ */
47
+ export declare function synthesizeJpAddress(chain: PlaceRow[], jpnNames: Map<number, string>): {
48
+ raw: string;
49
+ components: CanonicalRow["components"];
50
+ } | null;
51
+ /**
52
+ * Build the JP adapter. Reads from the unified global WOF SQLite, walks admin chains starting from
53
+ * neighbourhoods, and yields canonical rows.
54
+ */
55
+ export declare function createWofAdminJpAdapter(): CorpusAdapter;
56
+ export declare const wofAdminJpAdapter: CorpusAdapter;
57
+ export {};
58
+ //# sourceMappingURL=adapter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/wof-admin-jp/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AAGH,OAAO,KAAK,EAAkB,YAAY,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAEjF,eAAO,MAAM,uBAAuB,iBAAiB,CAAA;AAErD,UAAU,QAAQ;IACjB,EAAE,EAAE,MAAM,CAAA;IACV,IAAI,EAAE,MAAM,CAAA;IACZ,SAAS,EAAE,MAAM,CAAA;IACjB,SAAS,EAAE,MAAM,CAAA;IACjB,OAAO,EAAE,MAAM,CAAA;CACf;AA2BD;;;;;;GAMG;AACH,wBAAgB,mBAAmB,CAClC,KAAK,EAAE,QAAQ,EAAE,EACjB,QAAQ,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,GAC3B;IACF,GAAG,EAAE,MAAM,CAAA;IACX,UAAU,EAAE,YAAY,CAAC,YAAY,CAAC,CAAA;CACtC,GAAG,IAAI,CAqBP;AAED;;;GAGG;AACH,wBAAgB,uBAAuB,IAAI,aAAa,CAiDvD;AAED,eAAO,MAAM,iBAAiB,eAA4B,CAAA"}
@@ -0,0 +1,129 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * `wof-admin-jp`: Japanese admin-hierarchy adapter.
7
+ *
8
+ * PROTOTYPE — not yet wired into the Stage 3 training corpus. Demonstrates the JP address shape and
9
+ * synthesizes BIO-labeled training rows from the global WOF SQLite.
10
+ *
11
+ * JP addresses differ from US/EU in three ways:
12
+ *
13
+ * 1. **Reversed ordering** — region → county → locality → block → house_number "東京都世田谷区南町1-2-3" not
14
+ * "1-2-3 Minamicho, Setagaya, Tokyo"
15
+ * 2. **No street names** — most JP addresses use a block/sub-block grid system. The "neighbourhood"
16
+ * placetype (丁目) is the closest analog to a street but is actually a grid cell.
17
+ * 3. **Postcode-first convention** — addresses are often prefixed with `〒NNN-NNNN`.
18
+ *
19
+ * Schema mapping to ComponentTags (subset of Stage 3 + JP-specific Phase 6 tags):
20
+ *
21
+ * | JP concept | WOF placetype | ComponentTag (Phase 6) |
22
+ * |-----------------|--------------------|-----------------------| | 都道府県 (prefecture) | region |
23
+ * region (or prefecture) | | 市区町村 (city/ward) | county/locality | locality (or municipality) | |
24
+ * 丁目 (chome) | neighbourhood | block (Phase 6 tag) | | 番地 (banchi) | (synth) | sub_block | | 号
25
+ * (gō) | (synth) | house_number | | 〒 (postcode prefix) | — | postcode |
26
+ *
27
+ * This adapter currently emits only the admin chain (region → locality → block). House numbers and
28
+ * sub-blocks require a different data source (JP postcode database or real-world address dumps
29
+ * from MLIT/JapanPost).
30
+ */
31
+ import { DatabaseSync } from "node:sqlite";
32
+ export const WOF_ADMIN_JP_ADAPTER_ID = "wof-admin-jp";
33
+ /** Walk parent chain up to 6 levels. */
34
+ function chainOf(db, startId, jpnNames) {
35
+ const stmt = db.prepare(`SELECT id, name, placetype, parent_id, country FROM spr WHERE id = ?`);
36
+ const out = [];
37
+ let id = startId;
38
+ for (let i = 0; i < 6 && id > 0; i++) {
39
+ const row = stmt.get(id);
40
+ if (!row)
41
+ break;
42
+ out.push(row);
43
+ id = row.parent_id;
44
+ }
45
+ return out;
46
+ }
47
+ /** Pick the best display name for a place: prefer Japanese variant, fall back to English. */
48
+ function pickName(row, jpnNames) {
49
+ return jpnNames.get(row.id) ?? row.name;
50
+ }
51
+ /**
52
+ * Synthesize a JP address from a hierarchy chain.
53
+ *
54
+ * Format: `〒<postcode>? <region><locality><neighbourhood>?`
55
+ *
56
+ * No house numbers yet — needs MLIT data.
57
+ */
58
+ export function synthesizeJpAddress(chain, jpnNames) {
59
+ const region = chain.find((r) => r.placetype === "region");
60
+ const locality = chain.find((r) => r.placetype === "locality" || r.placetype === "county");
61
+ if (!region || !locality)
62
+ return null;
63
+ const neighbourhood = chain.find((r) => r.placetype === "neighbourhood");
64
+ const regionName = pickName(region, jpnNames);
65
+ const localityName = pickName(locality, jpnNames);
66
+ const neighbourhoodName = neighbourhood ? pickName(neighbourhood, jpnNames) : null;
67
+ const components = {
68
+ region: regionName,
69
+ locality: localityName,
70
+ country: "JP",
71
+ };
72
+ if (neighbourhoodName)
73
+ components.dependent_locality = neighbourhoodName;
74
+ const raw = [regionName, localityName, neighbourhoodName].filter(Boolean).join("");
75
+ return { raw, components };
76
+ }
77
+ /**
78
+ * Build the JP adapter. Reads from the unified global WOF SQLite, walks admin chains starting from
79
+ * neighbourhoods, and yields canonical rows.
80
+ */
81
+ export function createWofAdminJpAdapter() {
82
+ return {
83
+ id: WOF_ADMIN_JP_ADAPTER_ID,
84
+ defaultLicense: "CC-BY-4.0",
85
+ description: "Japanese admin hierarchy from WOF (synthesized addresses without house numbers).",
86
+ async *rows(opts) {
87
+ if (opts.country && opts.country !== "JP") {
88
+ throw new Error(`wof-admin-jp adapter: only JP supported, got country=${opts.country}`);
89
+ }
90
+ const db = new DatabaseSync(opts.inputPath, { readOnly: true });
91
+ try {
92
+ const jpnNamesStmt = db.prepare(`SELECT id, name FROM names WHERE language = 'jpn'`);
93
+ const jpnNames = new Map();
94
+ for (const row of jpnNamesStmt.all()) {
95
+ if (!jpnNames.has(row.id))
96
+ jpnNames.set(row.id, row.name);
97
+ }
98
+ const seeds = db.prepare(`SELECT id FROM spr WHERE country='JP' AND placetype='neighbourhood'`).all();
99
+ let emitted = 0;
100
+ for (const seed of seeds) {
101
+ if (opts.signal?.aborted)
102
+ break;
103
+ if (opts.limit !== undefined && emitted >= opts.limit)
104
+ break;
105
+ const chain = chainOf(db, seed.id, jpnNames);
106
+ const synth = synthesizeJpAddress(chain, jpnNames);
107
+ if (!synth)
108
+ continue;
109
+ yield {
110
+ raw: synth.raw,
111
+ components: synth.components,
112
+ country: "JP",
113
+ locale: "ja-JP",
114
+ source: WOF_ADMIN_JP_ADAPTER_ID,
115
+ source_id: `${WOF_ADMIN_JP_ADAPTER_ID}-${seed.id}`,
116
+ corpus_version: "",
117
+ license: "CC-BY-4.0",
118
+ };
119
+ emitted++;
120
+ }
121
+ }
122
+ finally {
123
+ db.close();
124
+ }
125
+ },
126
+ };
127
+ }
128
+ export const wofAdminJpAdapter = createWofAdminJpAdapter();
129
+ //# sourceMappingURL=adapter.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"adapter.js","sourceRoot":"","sources":["../../../../src/adapters/wof-admin-jp/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAA;AAG1C,MAAM,CAAC,MAAM,uBAAuB,GAAG,cAAc,CAAA;AAgBrD,wCAAwC;AACxC,SAAS,OAAO,CAAC,EAAgB,EAAE,OAAe,EAAE,QAA6B;IAChF,MAAM,IAAI,GAAG,EAAE,CAAC,OAAO,CAAC,sEAAsE,CAAC,CAAA;IAC/F,MAAM,GAAG,GAAe,EAAE,CAAA;IAC1B,IAAI,EAAE,GAAG,OAAO,CAAA;IAChB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,CAAyB,CAAA;QAChD,IAAI,CAAC,GAAG;YAAE,MAAK;QACf,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;QACb,EAAE,GAAG,GAAG,CAAC,SAAS,CAAA;IACnB,CAAC;IACD,OAAO,GAAG,CAAA;AACX,CAAC;AAED,6FAA6F;AAC7F,SAAS,QAAQ,CAAC,GAAa,EAAE,QAA6B;IAC7D,OAAO,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,GAAG,CAAC,IAAI,CAAA;AACxC,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,mBAAmB,CAClC,KAAiB,EACjB,QAA6B;IAK7B,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,QAAQ,CAAC,CAAA;IAC1D,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,UAAU,IAAI,CAAC,CAAC,SAAS,KAAK,QAAQ,CAAC,CAAA;IAC1F,IAAI,CAAC,MAAM,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAA;IAErC,MAAM,aAAa,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,eAAe,CAAC,CAAA;IAExE,MAAM,UAAU,GAAG,QAAQ,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAA;IAC7C,MAAM,YAAY,GAAG,QAAQ,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAA;IACjD,MAAM,iBAAiB,GAAG,aAAa,CAAC,CAAC,CAAC,QAAQ,CAAC,aAAa,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,CAAA;IAElF,MAAM,UAAU,GAA+B;QAC9C,MAAM,EAAE,UAAU;QAClB,QAAQ,EAAE,YAAY;QACtB,OAAO,EAAE,IAAI;KACb,CAAA;IACD,IAAI,iBAAiB;QAAE,UAAU,CAAC,kBAAkB,GAAG,iBAAiB,CAAA;IAExE,MAAM,GAAG,GAAG,CAAC,UAAU,EAAE,YAAY,EAAE,iBAAiB,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IAElF,OAAO,EAAE,GAAG,EAAE,UAAU,EAAE,CAAA;AAC3B,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,uBAAuB;IACtC,OAAO;QACN,EAAE,EAAE,uBAAuB;QAC3B,cAAc,EAAE,WAAW;QAC3B,WAAW,EAAE,kFAAkF;QAE/F,KAAK,CAAC,CAAC,IAAI,CAAC,IAAoB;YAC/B,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,EAAE,CAAC;gBAC3C,MAAM,IAAI,KAAK,CAAC,wDAAwD,IAAI,CAAC,OAAO,EAAE,CAAC,CAAA;YACxF,CAAC;YAED,MAAM,EAAE,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAA;YAC/D,IAAI,CAAC;gBACJ,MAAM,YAAY,GAAG,EAAE,CAAC,OAAO,CAAC,mDAAmD,CAAC,CAAA;gBACpF,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAkB,CAAA;gBAC1C,KAAK,MAAM,GAAG,IAAI,YAAY,CAAC,GAAG,EAAoC,EAAE,CAAC;oBACxE,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;wBAAE,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,IAAI,CAAC,CAAA;gBAC1D,CAAC;gBAED,MAAM,KAAK,GAAG,EAAE,CAAC,OAAO,CAAC,qEAAqE,CAAC,CAAC,GAAG,EAEhG,CAAA;gBAEH,IAAI,OAAO,GAAG,CAAC,CAAA;gBACf,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;oBAC1B,IAAI,IAAI,CAAC,MAAM,EAAE,OAAO;wBAAE,MAAK;oBAC/B,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK;wBAAE,MAAK;oBAE5D,MAAM,KAAK,GAAG,OAAO,CAAC,EAAE,EAAE,IAAI,CAAC,EAAE,EAAE,QAAQ,CAAC,CAAA;oBAC5C,MAAM,KAAK,GAAG,mBAAmB,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAA;oBAClD,IAAI,CAAC,KAAK;wBAAE,SAAQ;oBAEpB,MAAM;wBACL,GAAG,EAAE,KAAK,CAAC,GAAG;wBACd,UAAU,EAAE,KAAK,CAAC,UAAU;wBAC5B,OAAO,EAAE,IAAI;wBACb,MAAM,EAAE,OAAO;wBACf,MAAM,EAAE,uBAAuB;wBAC/B,SAAS,EAAE,GAAG,uBAAuB,IAAI,IAAI,CAAC,EAAE,EAAE;wBAClD,cAAc,EAAE,EAAE;wBAClB,OAAO,EAAE,WAAW;qBACpB,CAAA;oBACD,OAAO,EAAE,CAAA;gBACV,CAAC;YACF,CAAC;oBAAS,CAAC;gBACV,EAAE,CAAC,KAAK,EAAE,CAAA;YACX,CAAC;QACF,CAAC;KACD,CAAA;AACF,CAAC;AAED,MAAM,CAAC,MAAM,iBAAiB,GAAG,uBAAuB,EAAE,CAAA"}
@@ -12,6 +12,12 @@ export * from "./golden.js";
12
12
  export * from "./parquet.js";
13
13
  export * from "./runner.js";
14
14
  export * from "./split.js";
15
+ export * from "./synthesize-german.js";
16
+ export * from "./synthesize-house-venue.js";
17
+ export * from "./synthesize-intersection.js";
18
+ export * from "./synthesize-no-street.js";
19
+ export * from "./synthesize-po-box.js";
20
+ export * from "./synthesize-street.js";
15
21
  export * from "./synthesize.js";
16
22
  export * from "./tokenize.js";
17
23
  export * from "./types.js";
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,cAAc,CAAA;AAC5B,cAAc,qBAAqB,CAAA;AACnC,cAAc,YAAY,CAAA;AAC1B,cAAc,YAAY,CAAA;AAC1B,cAAc,aAAa,CAAA;AAC3B,cAAc,aAAa,CAAA;AAC3B,cAAc,cAAc,CAAA;AAC5B,cAAc,aAAa,CAAA;AAC3B,cAAc,YAAY,CAAA;AAC1B,cAAc,iBAAiB,CAAA;AAC/B,cAAc,eAAe,CAAA;AAC7B,cAAc,YAAY,CAAA"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,cAAc,CAAA;AAC5B,cAAc,qBAAqB,CAAA;AACnC,cAAc,YAAY,CAAA;AAC1B,cAAc,YAAY,CAAA;AAC1B,cAAc,aAAa,CAAA;AAC3B,cAAc,aAAa,CAAA;AAC3B,cAAc,cAAc,CAAA;AAC5B,cAAc,aAAa,CAAA;AAC3B,cAAc,YAAY,CAAA;AAC1B,cAAc,wBAAwB,CAAA;AACtC,cAAc,6BAA6B,CAAA;AAC3C,cAAc,8BAA8B,CAAA;AAC5C,cAAc,2BAA2B,CAAA;AACzC,cAAc,wBAAwB,CAAA;AACtC,cAAc,wBAAwB,CAAA;AACtC,cAAc,iBAAiB,CAAA;AAC/B,cAAc,eAAe,CAAA;AAC7B,cAAc,YAAY,CAAA"}
package/out/src/index.js CHANGED
@@ -12,6 +12,12 @@ export * from "./golden.js";
12
12
  export * from "./parquet.js";
13
13
  export * from "./runner.js";
14
14
  export * from "./split.js";
15
+ export * from "./synthesize-german.js";
16
+ export * from "./synthesize-house-venue.js";
17
+ export * from "./synthesize-intersection.js";
18
+ export * from "./synthesize-no-street.js";
19
+ export * from "./synthesize-po-box.js";
20
+ export * from "./synthesize-street.js";
15
21
  export * from "./synthesize.js";
16
22
  export * from "./tokenize.js";
17
23
  export * from "./types.js";
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,cAAc,CAAA;AAC5B,cAAc,qBAAqB,CAAA;AACnC,cAAc,YAAY,CAAA;AAC1B,cAAc,YAAY,CAAA;AAC1B,cAAc,aAAa,CAAA;AAC3B,cAAc,aAAa,CAAA;AAC3B,cAAc,cAAc,CAAA;AAC5B,cAAc,aAAa,CAAA;AAC3B,cAAc,YAAY,CAAA;AAC1B,cAAc,iBAAiB,CAAA;AAC/B,cAAc,eAAe,CAAA;AAC7B,cAAc,YAAY,CAAA"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,cAAc,CAAA;AAC5B,cAAc,qBAAqB,CAAA;AACnC,cAAc,YAAY,CAAA;AAC1B,cAAc,YAAY,CAAA;AAC1B,cAAc,aAAa,CAAA;AAC3B,cAAc,aAAa,CAAA;AAC3B,cAAc,cAAc,CAAA;AAC5B,cAAc,aAAa,CAAA;AAC3B,cAAc,YAAY,CAAA;AAC1B,cAAc,wBAAwB,CAAA;AACtC,cAAc,6BAA6B,CAAA;AAC3C,cAAc,8BAA8B,CAAA;AAC5C,cAAc,2BAA2B,CAAA;AACzC,cAAc,wBAAwB,CAAA;AACtC,cAAc,wBAAwB,CAAA;AACtC,cAAc,iBAAiB,CAAA;AAC/B,cAAc,eAAe,CAAA;AAC7B,cAAc,YAAY,CAAA"}
@@ -0,0 +1,75 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * German address synthesizer — multi-locale coverage (night-shift 2026-06-02, DE-1).
7
+ *
8
+ * The neural model is out-of-distribution on German: it truncates `Straußstraße`→`Strau` (exits at
9
+ * the ß-piece boundary), absorbs the house number into the street (`Hauptstraße 5` → one span),
10
+ * and mis-tags the native-order house number as a postcode (`Prenzlauer Allee 36, 10405 Berlin` →
11
+ * postcode `36`). The cause is ORDER: the model was trained US+FR (house-number-FIRST,
12
+ * postcode-AFTER-city), and never saw the German convention (house-number-AFTER-street,
13
+ * postcode-BEFORE-city). DE-0 confirmed the tokenizer round-trips German orthography cleanly, so
14
+ * this is a coverage gap, not a tokenizer ceiling.
15
+ *
16
+ * This generator produces the missing signal as a small targeted supplement shard
17
+ * (synthesis-as-supplement discipline: weight < 0.25, one-and-done). It does NOT synthesize
18
+ * German street names (German morphology is hard to fake) — it takes REAL German component tuples
19
+ * (from OpenAddresses Berlin/Saxony) and renders them in idiomatic German order via the OpenCage
20
+ * `DE` template (`formatAddress(..., "DE")` → `"Straußstraße 27, 12623 Berlin"`). The corpus
21
+ * aligner turns the row into BIO labels; every emitted component surface form occurs verbatim in
22
+ * `raw` so alignment lands.
23
+ */
24
+ import type { CanonicalRow } from "./types.js";
25
+ /** A real address tuple (e.g. one OpenAddresses row): street + locality required, rest optional. */
26
+ export interface LocaleBaseTuple {
27
+ house_number?: string;
28
+ street: string;
29
+ locality: string;
30
+ region?: string;
31
+ postcode?: string;
32
+ }
33
+ /** @deprecated Alias — use LocaleBaseTuple. */
34
+ export type GermanBaseTuple = LocaleBaseTuple;
35
+ export interface SynthesizedLocaleRow {
36
+ raw: string;
37
+ components: CanonicalRow["components"];
38
+ locale: string;
39
+ }
40
+ /** @deprecated Alias — use SynthesizedLocaleRow. */
41
+ export type SynthesizedGermanRow = SynthesizedLocaleRow;
42
+ export interface LocaleSynthesisOpts {
43
+ random?: () => number;
44
+ /**
45
+ * Rendering order for the SAME components. `"native"` (default) uses the country's own template
46
+ * (DE → house-AFTER-street, postcode-BEFORE-city). `"international"` renders house-FIRST,
47
+ * postcode-AFTER-city — the US/GB layout that international feeds, US-centric systems, and our own
48
+ * OpenAddresses de-sample impose on non-US addresses. Training both teaches the model that a
49
+ * German address can arrive either way, so the eval's US-order rendering stops reading as a
50
+ * collapse. See `docs/articles/evals/2026-06-06-anchor-pilot.md` (the order-artifact correction).
51
+ */
52
+ order?: "native" | "international";
53
+ }
54
+ /** @deprecated Alias — use LocaleSynthesisOpts. */
55
+ export type GermanSynthesisOpts = LocaleSynthesisOpts;
56
+ /**
57
+ * Render one real tuple into an idiomatic, locale-ordered `{raw, components}` row via the OpenCage
58
+ * `country` template (DE → house-after-street + postcode-before-city; ES/IT the same; GB
59
+ * house-first; NL carries the `1012 LM` postcode), with light variation (drop house number /
60
+ * postcode some of the time). Returns `null` when the tuple is too thin or a component wouldn't
61
+ * align cleanly.
62
+ *
63
+ * Region handling is order-dependent: NATIVE order omits it (the native template absorbs the admin
64
+ * region into the postcode/city line, so it rarely renders verbatim and would break BIO alignment),
65
+ * while INTERNATIONAL order includes it in the tail ("City, Region Postcode" — the US/feed layout the
66
+ * eval uses; v0.9.3 / #327).
67
+ *
68
+ * Pass `opts.order: "international"` to render the same components house-first / postcode-after-city
69
+ * instead (see {@link LocaleSynthesisOpts.order}) — the layout international feeds impose on foreign
70
+ * addresses, and the one a native-order-trained model treats as a "collapse."
71
+ */
72
+ export declare function synthesizeLocaleRow(base: LocaleBaseTuple, country: string, opts?: LocaleSynthesisOpts): SynthesizedLocaleRow | null;
73
+ /** German wrapper over {@link synthesizeLocaleRow}. Kept for the build-german-shard caller + tests. */
74
+ export declare function synthesizeGermanRow(base: LocaleBaseTuple, opts?: LocaleSynthesisOpts): SynthesizedLocaleRow | null;
75
+ //# sourceMappingURL=synthesize-german.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"synthesize-german.d.ts","sourceRoot":"","sources":["../../src/synthesize-german.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAGH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,YAAY,CAAA;AAE9C,oGAAoG;AACpG,MAAM,WAAW,eAAe;IAC/B,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,MAAM,EAAE,MAAM,CAAA;IACd,QAAQ,EAAE,MAAM,CAAA;IAChB,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,QAAQ,CAAC,EAAE,MAAM,CAAA;CACjB;AACD,+CAA+C;AAC/C,MAAM,MAAM,eAAe,GAAG,eAAe,CAAA;AAE7C,MAAM,WAAW,oBAAoB;IACpC,GAAG,EAAE,MAAM,CAAA;IACX,UAAU,EAAE,YAAY,CAAC,YAAY,CAAC,CAAA;IACtC,MAAM,EAAE,MAAM,CAAA;CACd;AACD,oDAAoD;AACpD,MAAM,MAAM,oBAAoB,GAAG,oBAAoB,CAAA;AAEvD,MAAM,WAAW,mBAAmB;IACnC,MAAM,CAAC,EAAE,MAAM,MAAM,CAAA;IACrB;;;;;;;OAOG;IACH,KAAK,CAAC,EAAE,QAAQ,GAAG,eAAe,CAAA;CAClC;AACD,mDAAmD;AACnD,MAAM,MAAM,mBAAmB,GAAG,mBAAmB,CAAA;AAuCrD;;;;;;;;;;;;;;;GAeG;AACH,wBAAgB,mBAAmB,CAClC,IAAI,EAAE,eAAe,EACrB,OAAO,EAAE,MAAM,EACf,IAAI,GAAE,mBAAwB,GAC5B,oBAAoB,GAAG,IAAI,CA+B7B;AAED,uGAAuG;AACvG,wBAAgB,mBAAmB,CAClC,IAAI,EAAE,eAAe,EACrB,IAAI,GAAE,mBAAwB,GAC5B,oBAAoB,GAAG,IAAI,CAE7B"}
@@ -0,0 +1,116 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * German address synthesizer — multi-locale coverage (night-shift 2026-06-02, DE-1).
7
+ *
8
+ * The neural model is out-of-distribution on German: it truncates `Straußstraße`→`Strau` (exits at
9
+ * the ß-piece boundary), absorbs the house number into the street (`Hauptstraße 5` → one span),
10
+ * and mis-tags the native-order house number as a postcode (`Prenzlauer Allee 36, 10405 Berlin` →
11
+ * postcode `36`). The cause is ORDER: the model was trained US+FR (house-number-FIRST,
12
+ * postcode-AFTER-city), and never saw the German convention (house-number-AFTER-street,
13
+ * postcode-BEFORE-city). DE-0 confirmed the tokenizer round-trips German orthography cleanly, so
14
+ * this is a coverage gap, not a tokenizer ceiling.
15
+ *
16
+ * This generator produces the missing signal as a small targeted supplement shard
17
+ * (synthesis-as-supplement discipline: weight < 0.25, one-and-done). It does NOT synthesize
18
+ * German street names (German morphology is hard to fake) — it takes REAL German component tuples
19
+ * (from OpenAddresses Berlin/Saxony) and renders them in idiomatic German order via the OpenCage
20
+ * `DE` template (`formatAddress(..., "DE")` → `"Straußstraße 27, 12623 Berlin"`). The corpus
21
+ * aligner turns the row into BIO labels; every emitted component surface form occurs verbatim in
22
+ * `raw` so alignment lands.
23
+ */
24
+ import { formatAddress } from "./format.js";
25
+ /** ISO-3166 alpha-2 → BCP-47 tag for the emitted rows (primary language per country). */
26
+ const LOCALE_TAG = {
27
+ DE: "de-DE",
28
+ ES: "es-ES",
29
+ IT: "it-IT",
30
+ NL: "nl-NL",
31
+ GB: "en-GB",
32
+ FR: "fr-FR",
33
+ US: "en-US",
34
+ };
35
+ /**
36
+ * Canonicalize a postcode to the form the country's template renders, so the stored component aligns
37
+ * verbatim against `raw`. NL is the case that needs it: OA stores `1011AB` but the OpenCage NL template
38
+ * emits the conventional spaced `1011 AB` (4 digits + space + 2 letters), which otherwise fails verbatim
39
+ * alignment and drops the row. Other countries pass through unchanged.
40
+ */
41
+ function normalizePostcode(postcode, country) {
42
+ if (country === "NL") {
43
+ const m = /^(\d{4})\s*([A-Za-z]{2})$/.exec(postcode);
44
+ if (m)
45
+ return `${m[1]} ${m[2].toUpperCase()}`;
46
+ }
47
+ return postcode;
48
+ }
49
+ /** True when `value` appears verbatim AND as a standalone token (so BIO alignment lands cleanly). */
50
+ function tokenPresent(raw, value) {
51
+ if (!raw.includes(value))
52
+ return false;
53
+ // Reject substring-of-a-larger-number collisions (e.g. house "2" inside postcode "12623").
54
+ const i = raw.indexOf(value);
55
+ const before = raw[i - 1];
56
+ const after = raw[i + value.length];
57
+ const isDigit = (c) => c !== undefined && c >= "0" && c <= "9";
58
+ if (/^\d+$/.test(value) && (isDigit(before) || isDigit(after)))
59
+ return false;
60
+ return true;
61
+ }
62
+ /**
63
+ * Render one real tuple into an idiomatic, locale-ordered `{raw, components}` row via the OpenCage
64
+ * `country` template (DE → house-after-street + postcode-before-city; ES/IT the same; GB
65
+ * house-first; NL carries the `1012 LM` postcode), with light variation (drop house number /
66
+ * postcode some of the time). Returns `null` when the tuple is too thin or a component wouldn't
67
+ * align cleanly.
68
+ *
69
+ * Region handling is order-dependent: NATIVE order omits it (the native template absorbs the admin
70
+ * region into the postcode/city line, so it rarely renders verbatim and would break BIO alignment),
71
+ * while INTERNATIONAL order includes it in the tail ("City, Region Postcode" — the US/feed layout the
72
+ * eval uses; v0.9.3 / #327).
73
+ *
74
+ * Pass `opts.order: "international"` to render the same components house-first / postcode-after-city
75
+ * instead (see {@link LocaleSynthesisOpts.order}) — the layout international feeds impose on foreign
76
+ * addresses, and the one a native-order-trained model treats as a "collapse."
77
+ */
78
+ export function synthesizeLocaleRow(base, country, opts = {}) {
79
+ const random = opts.random ?? Math.random;
80
+ const order = opts.order ?? "native";
81
+ if (!base.street || !base.locality)
82
+ return null;
83
+ const components = { street: base.street, locality: base.locality };
84
+ // ~80% keep the house number (the rest are street-only forms, also idiomatic).
85
+ if (base.house_number && random() < 0.8)
86
+ components.house_number = base.house_number;
87
+ // ~85% keep the postcode (canonicalized to the country's rendered form — NL spaces it).
88
+ if (base.postcode && random() < 0.85)
89
+ components.postcode = normalizePostcode(base.postcode, country);
90
+ // International order carries the REGION in the tail ("City, Region Postcode") — the layout real
91
+ // US/feed renderings (and our OA eval) use. v0.9.2 rendered international order WITHOUT the region,
92
+ // so the model never learned to segment the tail and mangled it at eval (region absorbed into the
93
+ // locality / locality dropped); v0.9.3 closes that gap (#327). Native order still drops the region
94
+ // (the native template absorbs it into the city line, which would break verbatim alignment).
95
+ if (order === "international" && base.region)
96
+ components.region = base.region;
97
+ // Native order uses the address's own country template; international order uses the US template —
98
+ // house-first, postcode-after-city, with a region slot for the tail. Neither branch consumes a
99
+ // `random()` draw for the template, so the RNG sequence existing callers/tests depend on is stable.
100
+ const renderCountry = order === "international" ? "US" : country;
101
+ const raw = formatAddress(components, renderCountry, { separator: ", " });
102
+ if (!raw)
103
+ return null;
104
+ // Every component must align — drop the row if the template didn't surface one verbatim, or a
105
+ // numeric component collides with a neighbouring digit run.
106
+ for (const value of Object.values(components)) {
107
+ if (!value || !tokenPresent(raw, value))
108
+ return null;
109
+ }
110
+ return { raw, components, locale: LOCALE_TAG[country] ?? country.toLowerCase() };
111
+ }
112
+ /** German wrapper over {@link synthesizeLocaleRow}. Kept for the build-german-shard caller + tests. */
113
+ export function synthesizeGermanRow(base, opts = {}) {
114
+ return synthesizeLocaleRow(base, "DE", opts);
115
+ }
116
+ //# sourceMappingURL=synthesize-german.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"synthesize-german.js","sourceRoot":"","sources":["../../src/synthesize-german.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAEH,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAA;AAqC3C,yFAAyF;AACzF,MAAM,UAAU,GAA2B;IAC1C,EAAE,EAAE,OAAO;IACX,EAAE,EAAE,OAAO;IACX,EAAE,EAAE,OAAO;IACX,EAAE,EAAE,OAAO;IACX,EAAE,EAAE,OAAO;IACX,EAAE,EAAE,OAAO;IACX,EAAE,EAAE,OAAO;CACX,CAAA;AAED;;;;;GAKG;AACH,SAAS,iBAAiB,CAAC,QAAgB,EAAE,OAAe;IAC3D,IAAI,OAAO,KAAK,IAAI,EAAE,CAAC;QACtB,MAAM,CAAC,GAAG,2BAA2B,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;QACpD,IAAI,CAAC;YAAE,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAE,CAAC,WAAW,EAAE,EAAE,CAAA;IAC/C,CAAC;IACD,OAAO,QAAQ,CAAA;AAChB,CAAC;AAED,qGAAqG;AACrG,SAAS,YAAY,CAAC,GAAW,EAAE,KAAa;IAC/C,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC;QAAE,OAAO,KAAK,CAAA;IACtC,2FAA2F;IAC3F,MAAM,CAAC,GAAG,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,CAAA;IAC5B,MAAM,MAAM,GAAG,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;IACzB,MAAM,KAAK,GAAG,GAAG,CAAC,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,CAAA;IACnC,MAAM,OAAO,GAAG,CAAC,CAAqB,EAAE,EAAE,CAAC,CAAC,KAAK,SAAS,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,GAAG,CAAA;IAClF,IAAI,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,OAAO,CAAC,KAAK,CAAC,CAAC;QAAE,OAAO,KAAK,CAAA;IAC5E,OAAO,IAAI,CAAA;AACZ,CAAC;AAED;;;;;;;;;;;;;;;GAeG;AACH,MAAM,UAAU,mBAAmB,CAClC,IAAqB,EACrB,OAAe,EACf,OAA4B,EAAE;IAE9B,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM,CAAA;IACzC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,QAAQ,CAAA;IACpC,IAAI,CAAC,IAAI,CAAC,MAAM,IAAI,CAAC,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAA;IAE/C,MAAM,UAAU,GAA+B,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,CAAA;IAC/F,+EAA+E;IAC/E,IAAI,IAAI,CAAC,YAAY,IAAI,MAAM,EAAE,GAAG,GAAG;QAAE,UAAU,CAAC,YAAY,GAAG,IAAI,CAAC,YAAY,CAAA;IACpF,wFAAwF;IACxF,IAAI,IAAI,CAAC,QAAQ,IAAI,MAAM,EAAE,GAAG,IAAI;QAAE,UAAU,CAAC,QAAQ,GAAG,iBAAiB,CAAC,IAAI,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAA;IACrG,iGAAiG;IACjG,oGAAoG;IACpG,kGAAkG;IAClG,mGAAmG;IACnG,6FAA6F;IAC7F,IAAI,KAAK,KAAK,eAAe,IAAI,IAAI,CAAC,MAAM;QAAE,UAAU,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAA;IAE7E,mGAAmG;IACnG,+FAA+F;IAC/F,oGAAoG;IACpG,MAAM,aAAa,GAAG,KAAK,KAAK,eAAe,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAA;IAChE,MAAM,GAAG,GAAG,aAAa,CAAC,UAAU,EAAE,aAAa,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;IACzE,IAAI,CAAC,GAAG;QAAE,OAAO,IAAI,CAAA;IAErB,8FAA8F;IAC9F,4DAA4D;IAC5D,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,EAAE,CAAC;QAC/C,IAAI,CAAC,KAAK,IAAI,CAAC,YAAY,CAAC,GAAG,EAAE,KAAK,CAAC;YAAE,OAAO,IAAI,CAAA;IACrD,CAAC;IAED,OAAO,EAAE,GAAG,EAAE,UAAU,EAAE,MAAM,EAAE,UAAU,CAAC,OAAO,CAAC,IAAI,OAAO,CAAC,WAAW,EAAE,EAAE,CAAA;AACjF,CAAC;AAED,uGAAuG;AACvG,MAAM,UAAU,mBAAmB,CAClC,IAAqB,EACrB,OAA4B,EAAE;IAE9B,OAAO,mBAAmB,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,CAAA;AAC7C,CAAC"}
@@ -0,0 +1,57 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * House-number + venue + street co-occurrence synthesizer. The v0.6.3 corrective shard.
7
+ *
8
+ * The v0.6.2 step-20K diagnostic showed that adding synth-no-street counter-distribution regressed
9
+ * house_number recall by ~4-5pp. DeepSeek's turn-8 root-cause:
10
+ *
11
+ * 1. Direct: `5th Avenue Theatre`-style adversarial venues teach the model that tokens like "5th"
12
+ * belong to venues, not house_numbers. (Fixed in `synthesize-no-street.ts` by removing
13
+ * digit+ordinal venue patterns.)
14
+ * 2. Distributional dilution: synth-no-street adds 122K rows where house_number is absent. The model's
15
+ * training distribution shifts toward "house_number is rare," and it under-emits the tag at
16
+ * inference.
17
+ *
18
+ * This synthesizer fixes #2 directly. Each emitted row has ALL of: house_number, street, venue,
19
+ * locality, region, postcode — a counter-example to "house_number is rare." Used as a companion
20
+ * shard to synth-no-street; the v0.6.3 config weights synth-no-street at 0.5 and
21
+ * synth-house-venue at 1.0 to recover the lost house_number signal.
22
+ *
23
+ * Real-world shape: business cards, mailing labels, store directories — `"123 Main St, Sunrise
24
+ * Bakery, Springfield, IL 62701"` is a perfectly ordinary address form.
25
+ *
26
+ * Venue pool: PLAIN_VENUES from `synthesize-no-street.ts` (re-exported here). Adversarial venues
27
+ * are deliberately NOT used here — the point is to teach co-occurrence, not to re-introduce
28
+ * decompose-mode pressure.
29
+ */
30
+ import type { CanonicalRow } from "./types.js";
31
+ export interface HouseVenueBaseTuple {
32
+ locality: string;
33
+ region: string;
34
+ postcode: string;
35
+ country: string;
36
+ street?: string;
37
+ houseNumber?: string;
38
+ }
39
+ export type HouseVenueTemplate = "venue-after-street" | "venue-before-street";
40
+ export interface HouseVenueSynthesisOpts {
41
+ random?: () => number;
42
+ forceTemplate?: HouseVenueTemplate;
43
+ }
44
+ export interface SynthesizedHouseVenueRow {
45
+ raw: string;
46
+ components: CanonicalRow["components"];
47
+ locale: string;
48
+ template: HouseVenueTemplate;
49
+ }
50
+ export declare function synthesizeHouseVenueRow(base: HouseVenueBaseTuple, opts?: HouseVenueSynthesisOpts): SynthesizedHouseVenueRow | null;
51
+ /**
52
+ * Contract: every synthesized row carries BOTH house_number AND venue (the co-occurrence signal
53
+ * that synth-no-street's distributional shift cost the model). Used by tests + downstream
54
+ * consumers.
55
+ */
56
+ export declare function hasHouseNumberAndVenue(components: CanonicalRow["components"]): boolean;
57
+ //# sourceMappingURL=synthesize-house-venue.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"synthesize-house-venue.d.ts","sourceRoot":"","sources":["../../src/synthesize-house-venue.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,YAAY,CAAA;AAE9C,MAAM,WAAW,mBAAmB;IACnC,QAAQ,EAAE,MAAM,CAAA;IAChB,MAAM,EAAE,MAAM,CAAA;IACd,QAAQ,EAAE,MAAM,CAAA;IAChB,OAAO,EAAE,MAAM,CAAA;IACf,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,WAAW,CAAC,EAAE,MAAM,CAAA;CACpB;AAED,MAAM,MAAM,kBAAkB,GAC3B,oBAAoB,GACpB,qBAAqB,CAAA;AAExB,MAAM,WAAW,uBAAuB;IACvC,MAAM,CAAC,EAAE,MAAM,MAAM,CAAA;IACrB,aAAa,CAAC,EAAE,kBAAkB,CAAA;CAClC;AAED,MAAM,WAAW,wBAAwB;IACxC,GAAG,EAAE,MAAM,CAAA;IACX,UAAU,EAAE,YAAY,CAAC,YAAY,CAAC,CAAA;IACtC,MAAM,EAAE,MAAM,CAAA;IACd,QAAQ,EAAE,kBAAkB,CAAA;CAC5B;AA8FD,wBAAgB,uBAAuB,CACtC,IAAI,EAAE,mBAAmB,EACzB,IAAI,GAAE,uBAA4B,GAChC,wBAAwB,GAAG,IAAI,CA6BjC;AAED;;;;GAIG;AACH,wBAAgB,sBAAsB,CAAC,UAAU,EAAE,YAAY,CAAC,YAAY,CAAC,GAAG,OAAO,CAEtF"}