@mailwoman/corpus 3.0.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/out/src/adapters/ban/adapter.d.ts.map +1 -1
  2. package/out/src/adapters/ban/adapter.js +6 -2
  3. package/out/src/adapters/ban/adapter.js.map +1 -1
  4. package/out/src/adapters/ban/street-decompose.d.ts +28 -0
  5. package/out/src/adapters/ban/street-decompose.d.ts.map +1 -0
  6. package/out/src/adapters/ban/street-decompose.js +78 -0
  7. package/out/src/adapters/ban/street-decompose.js.map +1 -0
  8. package/out/src/adapters/synth-po-box/adapter.d.ts +48 -0
  9. package/out/src/adapters/synth-po-box/adapter.d.ts.map +1 -0
  10. package/out/src/adapters/synth-po-box/adapter.js +101 -0
  11. package/out/src/adapters/synth-po-box/adapter.js.map +1 -0
  12. package/out/src/adapters/tiger/adapter.d.ts.map +1 -1
  13. package/out/src/adapters/tiger/adapter.js +9 -3
  14. package/out/src/adapters/tiger/adapter.js.map +1 -1
  15. package/out/src/adapters/tiger/street-decompose.d.ts +30 -0
  16. package/out/src/adapters/tiger/street-decompose.d.ts.map +1 -0
  17. package/out/src/adapters/tiger/street-decompose.js +99 -0
  18. package/out/src/adapters/tiger/street-decompose.js.map +1 -0
  19. package/out/src/adapters/usgov-nad/adapter.d.ts.map +1 -1
  20. package/out/src/adapters/usgov-nad/adapter.js +31 -10
  21. package/out/src/adapters/usgov-nad/adapter.js.map +1 -1
  22. package/out/src/adapters/wof-admin-jp/adapter.d.ts +58 -0
  23. package/out/src/adapters/wof-admin-jp/adapter.d.ts.map +1 -0
  24. package/out/src/adapters/wof-admin-jp/adapter.js +129 -0
  25. package/out/src/adapters/wof-admin-jp/adapter.js.map +1 -0
  26. package/out/src/index.d.ts +6 -0
  27. package/out/src/index.d.ts.map +1 -1
  28. package/out/src/index.js +6 -0
  29. package/out/src/index.js.map +1 -1
  30. package/out/src/synthesize-german.d.ts +75 -0
  31. package/out/src/synthesize-german.d.ts.map +1 -0
  32. package/out/src/synthesize-german.js +116 -0
  33. package/out/src/synthesize-german.js.map +1 -0
  34. package/out/src/synthesize-house-venue.d.ts +57 -0
  35. package/out/src/synthesize-house-venue.d.ts.map +1 -0
  36. package/out/src/synthesize-house-venue.js +147 -0
  37. package/out/src/synthesize-house-venue.js.map +1 -0
  38. package/out/src/synthesize-intersection.d.ts +48 -0
  39. package/out/src/synthesize-intersection.d.ts.map +1 -0
  40. package/out/src/synthesize-intersection.js +138 -0
  41. package/out/src/synthesize-intersection.js.map +1 -0
  42. package/out/src/synthesize-no-street.d.ts +70 -0
  43. package/out/src/synthesize-no-street.d.ts.map +1 -0
  44. package/out/src/synthesize-no-street.js +279 -0
  45. package/out/src/synthesize-no-street.js.map +1 -0
  46. package/out/src/synthesize-po-box.d.ts +75 -0
  47. package/out/src/synthesize-po-box.d.ts.map +1 -0
  48. package/out/src/synthesize-po-box.js +186 -0
  49. package/out/src/synthesize-po-box.js.map +1 -0
  50. package/out/src/synthesize-street.d.ts +53 -0
  51. package/out/src/synthesize-street.d.ts.map +1 -0
  52. package/out/src/synthesize-street.js +212 -0
  53. package/out/src/synthesize-street.js.map +1 -0
  54. package/out/src/synthesize.js +1 -1
  55. package/out/src/synthesize.js.map +1 -1
  56. package/package.json +3 -2
@@ -1 +1 @@
1
- {"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/ban/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAMH,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAEjF,eAAO,MAAM,cAAc,QAAQ,CAAA;AA4CnC,wBAAgB,gBAAgB,IAAI,aAAa,CAqEhD;AAED,eAAO,MAAM,UAAU,eAAqB,CAAA"}
1
+ {"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/ban/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAMH,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAGjF,eAAO,MAAM,cAAc,QAAQ,CAAA;AA4CnC,wBAAgB,gBAAgB,IAAI,aAAa,CAwEhD;AAED,eAAO,MAAM,UAAU,eAAqB,CAAA"}
@@ -29,6 +29,7 @@ import { parse as csvParse } from "csv-parse";
29
29
  import { createReadStream } from "node:fs";
30
30
  import { stableSourceId } from "../../adapter.js";
31
31
  import { reconcileComponents } from "../../format.js";
32
+ import { decomposeFrStreet } from "./street-decompose.js";
32
33
  export const BAN_ADAPTER_ID = "ban";
33
34
  /**
34
35
  * Compose `house_number` from `numero` + `rep`. BAN uses `rep` for repetition indices ("bis",
@@ -92,11 +93,14 @@ export function createBanAdapter() {
92
93
  continue;
93
94
  if (!house && !postcode)
94
95
  continue;
96
+ const decomposed = decomposeFrStreet(street);
95
97
  const components = {};
96
98
  if (house)
97
99
  components.house_number = house;
98
- if (street)
99
- components.street = street;
100
+ if (decomposed.prefix)
101
+ components.street_prefix = decomposed.prefix;
102
+ if (decomposed.street)
103
+ components.street = decomposed.street;
100
104
  if (postcode)
101
105
  components.postcode = postcode;
102
106
  if (locality)
@@ -1 +1 @@
1
- {"version":3,"file":"adapter.js","sourceRoot":"","sources":["../../../../src/adapters/ban/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAEH,OAAO,EAAE,KAAK,IAAI,QAAQ,EAAE,MAAM,WAAW,CAAA;AAC7C,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAA;AAC1C,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAA;AACjD,OAAO,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAA;AAGrD,MAAM,CAAC,MAAM,cAAc,GAAG,KAAK,CAAA;AAenC;;;GAGG;AACH,SAAS,kBAAkB,CAAC,MAAc,EAAE,GAAW;IACtD,MAAM,CAAC,GAAG,MAAM,CAAC,IAAI,EAAE,CAAA;IACvB,MAAM,CAAC,GAAG,GAAG,CAAC,IAAI,EAAE,CAAA;IACpB,IAAI,CAAC,CAAC;QAAE,OAAO,EAAE,CAAA;IACjB,OAAO,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAA;AAC3B,CAAC;AAED;;;;;;;;GAQG;AACH,SAAS,UAAU,CAAC,KAAa,EAAE,MAAc,EAAE,QAAgB,EAAE,QAAgB;IACpF,MAAM,KAAK,GAAa,EAAE,CAAA;IAC1B,MAAM,UAAU,GAAG,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;IACnE,IAAI,UAAU;QAAE,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAA;IACtC,MAAM,QAAQ,GAAG,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;IACtE,IAAI,QAAQ;QAAE,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;IAClC,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;AACpD,CAAC;AAED,MAAM,UAAU,gBAAgB;IAC/B,OAAO;QACN,EAAE,EAAE,cAAc;QAClB,cAAc,EAAE,UAAU;QAC1B,WAAW,EAAE,+EAA+E;QAE5F,KAAK,CAAC,CAAC,IAAI,CAAC,IAAoB;YAC/B,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,EAAE,CAAC;gBAC3C,MAAM,IAAI,KAAK,CAAC,+CAA+C,IAAI,CAAC,OAAO,EAAE,CAAC,CAAA;YAC/E,CAAC;YAED,MAAM,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAA;YACrE,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CACzB,QAAQ,CAAC;gBACR,SAAS,EAAE,GAAG;gBACd,OAAO,EAAE,IAAI;gBACb,gBAAgB,EAAE,IAAI;gBACtB,YAAY,EAAE,IAAI;gBAClB,kBAAkB,EAAE,IAAI;aACxB,CAAC,CACF,CAAA;YAED,IAAI,OAAO,GAAG,CAAC,CAAA;YACf,IAAI,CAAC;gBACJ,IAAI,KAAK,EAAE,MAAM,MAAM,IAAI,MAA+B,EAAE,CAAC;oBAC5D,IAAI,IAAI,CAAC,MAAM,EAAE,OAAO;wBAAE,MAAK;oBAC/B,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK;wBAAE,MAAK;oBAE5D,MAAM,KAAK,GAAG,kBAAkB,CAAC,MAAM,CAAC,MAAM,IAAI,EAAE,EAAE,MAAM,CAAC,GAAG,IAAI,EAAE,CAAC,CAAA;oBACvE,MAAM,MAAM,GAAG,CAAC,MAAM,CAAC,QAAQ,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAC7C,MAAM,QAAQ,GAAG,CAAC,MAAM,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAClD,MAAM,QAAQ,GAAG,CAAC,MAAM,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAElD,IAAI,CAAC,MAAM,IAAI,CAAC,QAAQ;wBAAE,SAAQ;oBAClC,IAAI,CAAC,KAAK,IAAI,CAAC,QAAQ;wBAAE,SAAQ;oBAEjC,MAAM,UAAU,GAA+B,EAAE,CAAA;oBACjD,IAAI,KAAK;wBAAE,UAAU,CAAC,YAAY,GAAG,KAAK,CAAA;oBAC1C,IAAI,MAAM;wBAAE,UAAU,CAAC,MAAM,GAAG,MAAM,CAAA;oBACtC,IAAI,QAAQ;wBAAE,UAAU,CAAC,QAAQ,GAAG,QAAQ,CAAA;oBAC5C,IAAI,QAAQ;wBAAE,UAAU,CAAC,QAAQ,GAAG,QAAQ,CAAA;oBAE5C,MAAM,GAAG,GAAG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAA;oBACzD,IAAI,CAAC,GAAG;wBAAE,SAAQ;oBAElB,MAAM,OAAO,GAAG,mBAAmB,CAAC,UAAU,EAAE,GAAG,CAAC,CAAA;oBACpD,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC;wBAAE,SAAQ;oBAE/C,MAAM,QAAQ,GAAG,MAAM,CAAC,EAAE,EAAE,IAAI,EAAE;wBACjC,CAAC,CAAC,GAAG,cAAc,IAAI,MAAM,CAAC,EAAE,CAAC,IAAI,EAAE,EAAE;wBACzC,CAAC,CAAC,cAAc,CAAC,cAAc,EAAE,OAAO,CAAC,CAAA;oBAE1C,MAAM;wBACL,GAAG;wBACH,UAAU,EAAE,OAAO;wBACnB,OAAO,EAAE,IAAI;wBACb,MAAM,EAAE,OAAO;wBACf,MAAM,EAAE,cAAc;wBACtB,SAAS,EAAE,QAAQ;wBACnB,cAAc,EAAE,EAAE;wBAClB,OAAO,EAAE,UAAU;qBACnB,CAAA;oBACD,OAAO,EAAE,CAAA;gBACV,CAAC;YACF,CAAC;oBAAS,CAAC;gBACV,MAAM,CAAC,OAAO,EAAE,CAAA;YACjB,CAAC;QACF,CAAC;KACD,CAAA;AACF,CAAC;AAED,MAAM,CAAC,MAAM,UAAU,GAAG,gBAAgB,EAAE,CAAA"}
1
+ {"version":3,"file":"adapter.js","sourceRoot":"","sources":["../../../../src/adapters/ban/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAEH,OAAO,EAAE,KAAK,IAAI,QAAQ,EAAE,MAAM,WAAW,CAAA;AAC7C,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAA;AAC1C,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAA;AACjD,OAAO,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAA;AAErD,OAAO,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAA;AAEzD,MAAM,CAAC,MAAM,cAAc,GAAG,KAAK,CAAA;AAenC;;;GAGG;AACH,SAAS,kBAAkB,CAAC,MAAc,EAAE,GAAW;IACtD,MAAM,CAAC,GAAG,MAAM,CAAC,IAAI,EAAE,CAAA;IACvB,MAAM,CAAC,GAAG,GAAG,CAAC,IAAI,EAAE,CAAA;IACpB,IAAI,CAAC,CAAC;QAAE,OAAO,EAAE,CAAA;IACjB,OAAO,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAA;AAC3B,CAAC;AAED;;;;;;;;GAQG;AACH,SAAS,UAAU,CAAC,KAAa,EAAE,MAAc,EAAE,QAAgB,EAAE,QAAgB;IACpF,MAAM,KAAK,GAAa,EAAE,CAAA;IAC1B,MAAM,UAAU,GAAG,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;IACnE,IAAI,UAAU;QAAE,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAA;IACtC,MAAM,QAAQ,GAAG,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;IACtE,IAAI,QAAQ;QAAE,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;IAClC,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;AACpD,CAAC;AAED,MAAM,UAAU,gBAAgB;IAC/B,OAAO;QACN,EAAE,EAAE,cAAc;QAClB,cAAc,EAAE,UAAU;QAC1B,WAAW,EAAE,+EAA+E;QAE5F,KAAK,CAAC,CAAC,IAAI,CAAC,IAAoB;YAC/B,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,EAAE,CAAC;gBAC3C,MAAM,IAAI,KAAK,CAAC,+CAA+C,IAAI,CAAC,OAAO,EAAE,CAAC,CAAA;YAC/E,CAAC;YAED,MAAM,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAA;YACrE,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CACzB,QAAQ,CAAC;gBACR,SAAS,EAAE,GAAG;gBACd,OAAO,EAAE,IAAI;gBACb,gBAAgB,EAAE,IAAI;gBACtB,YAAY,EAAE,IAAI;gBAClB,kBAAkB,EAAE,IAAI;aACxB,CAAC,CACF,CAAA;YAED,IAAI,OAAO,GAAG,CAAC,CAAA;YACf,IAAI,CAAC;gBACJ,IAAI,KAAK,EAAE,MAAM,MAAM,IAAI,MAA+B,EAAE,CAAC;oBAC5D,IAAI,IAAI,CAAC,MAAM,EAAE,OAAO;wBAAE,MAAK;oBAC/B,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK;wBAAE,MAAK;oBAE5D,MAAM,KAAK,GAAG,kBAAkB,CAAC,MAAM,CAAC,MAAM,IAAI,EAAE,EAAE,MAAM,CAAC,GAAG,IAAI,EAAE,CAAC,CAAA;oBACvE,MAAM,MAAM,GAAG,CAAC,MAAM,CAAC,QAAQ,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAC7C,MAAM,QAAQ,GAAG,CAAC,MAAM,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAClD,MAAM,QAAQ,GAAG,CAAC,MAAM,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAElD,IAAI,CAAC,MAAM,IAAI,CAAC,QAAQ;wBAAE,SAAQ;oBAClC,IAAI,CAAC,KAAK,IAAI,CAAC,QAAQ;wBAAE,SAAQ;oBAEjC,MAAM,UAAU,GAAG,iBAAiB,CAAC,MAAM,CAAC,CAAA;oBAE5C,MAAM,UAAU,GAA+B,EAAE,CAAA;oBACjD,IAAI,KAAK;wBAAE,UAAU,CAAC,YAAY,GAAG,KAAK,CAAA;oBAC1C,IAAI,UAAU,CAAC,MAAM;wBAAE,UAAU,CAAC,aAAa,GAAG,UAAU,CAAC,MAAM,CAAA;oBACnE,IAAI,UAAU,CAAC,MAAM;wBAAE,UAAU,CAAC,MAAM,GAAG,UAAU,CAAC,MAAM,CAAA;oBAC5D,IAAI,QAAQ;wBAAE,UAAU,CAAC,QAAQ,GAAG,QAAQ,CAAA;oBAC5C,IAAI,QAAQ;wBAAE,UAAU,CAAC,QAAQ,GAAG,QAAQ,CAAA;oBAE5C,MAAM,GAAG,GAAG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAA;oBACzD,IAAI,CAAC,GAAG;wBAAE,SAAQ;oBAElB,MAAM,OAAO,GAAG,mBAAmB,CAAC,UAAU,EAAE,GAAG,CAAC,CAAA;oBACpD,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC;wBAAE,SAAQ;oBAE/C,MAAM,QAAQ,GAAG,MAAM,CAAC,EAAE,EAAE,IAAI,EAAE;wBACjC,CAAC,CAAC,GAAG,cAAc,IAAI,MAAM,CAAC,EAAE,CAAC,IAAI,EAAE,EAAE;wBACzC,CAAC,CAAC,cAAc,CAAC,cAAc,EAAE,OAAO,CAAC,CAAA;oBAE1C,MAAM;wBACL,GAAG;wBACH,UAAU,EAAE,OAAO;wBACnB,OAAO,EAAE,IAAI;wBACb,MAAM,EAAE,OAAO;wBACf,MAAM,EAAE,cAAc;wBACtB,SAAS,EAAE,QAAQ;wBACnB,cAAc,EAAE,EAAE;wBAClB,OAAO,EAAE,UAAU;qBACnB,CAAA;oBACD,OAAO,EAAE,CAAA;gBACV,CAAC;YACF,CAAC;oBAAS,CAAC;gBACV,MAAM,CAAC,OAAO,EAAE,CAAA;YACjB,CAAC;QACF,CAAC;KACD,CAAA;AACF,CAAC;AAED,MAAM,CAAC,MAAM,UAAU,GAAG,gBAAgB,EAAE,CAAA"}
@@ -0,0 +1,28 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Decompose a French street name into Stage 3 components. French convention puts the street type as
7
+ * a leading word: "Rue de Rivoli", "Avenue des Champs-Élysées", "Bd Voltaire".
8
+ *
9
+ * The street type becomes street_prefix in our schema. The remaining tokens form the street name.
10
+ *
11
+ * Examples: "Rue de Rivoli" → { prefix: "Rue", street: "de Rivoli" } "Avenue des Champs-Élysées" →
12
+ * { prefix: "Avenue", street: "des Champs-Élysées" } "Boulevard Voltaire" → { prefix:
13
+ * "Boulevard", street: "Voltaire" }
14
+ *
15
+ * Sources street types from `core/data/libpostal/dictionaries/fr/street_types.txt`.
16
+ */
17
+ export interface DecomposedFrStreet {
18
+ prefix: string | null;
19
+ street: string;
20
+ }
21
+ /**
22
+ * Decompose a French street name into prefix (leading type word) and street name.
23
+ *
24
+ * If the first 1-2 tokens match a known street type (allowing for multi-word like "ancien chemin"),
25
+ * they become the prefix. Returns `{ prefix: null, street: original }` if no match.
26
+ */
27
+ export declare function decomposeFrStreet(fullname: string): DecomposedFrStreet;
28
+ //# sourceMappingURL=street-decompose.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"street-decompose.d.ts","sourceRoot":"","sources":["../../../../src/adapters/ban/street-decompose.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAoCH,MAAM,WAAW,kBAAkB;IAClC,MAAM,EAAE,MAAM,GAAG,IAAI,CAAA;IACrB,MAAM,EAAE,MAAM,CAAA;CACd;AAED;;;;;GAKG;AACH,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,MAAM,GAAG,kBAAkB,CAwBtE"}
@@ -0,0 +1,78 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Decompose a French street name into Stage 3 components. French convention puts the street type as
7
+ * a leading word: "Rue de Rivoli", "Avenue des Champs-Élysées", "Bd Voltaire".
8
+ *
9
+ * The street type becomes street_prefix in our schema. The remaining tokens form the street name.
10
+ *
11
+ * Examples: "Rue de Rivoli" → { prefix: "Rue", street: "de Rivoli" } "Avenue des Champs-Élysées" →
12
+ * { prefix: "Avenue", street: "des Champs-Élysées" } "Boulevard Voltaire" → { prefix:
13
+ * "Boulevard", street: "Voltaire" }
14
+ *
15
+ * Sources street types from `core/data/libpostal/dictionaries/fr/street_types.txt`.
16
+ */
17
+ import { readFileSync } from "node:fs";
18
+ import { dirname, resolve } from "node:path";
19
+ import { fileURLToPath } from "node:url";
20
+ const moduleDir = dirname(fileURLToPath(import.meta.url));
21
+ function loadDictionary(filename) {
22
+ const candidates = [
23
+ resolve(moduleDir, "../../../../core/data/libpostal/dictionaries/fr", filename),
24
+ resolve(moduleDir, "../../../../../core/data/libpostal/dictionaries/fr", filename),
25
+ resolve(process.cwd(), "core/data/libpostal/dictionaries/fr", filename),
26
+ ];
27
+ for (const path of candidates) {
28
+ try {
29
+ const text = readFileSync(path, "utf8");
30
+ const set = new Set();
31
+ for (const line of text.split("\n")) {
32
+ const trimmed = line.trim();
33
+ if (!trimmed || trimmed.startsWith("#"))
34
+ continue;
35
+ for (const form of trimmed.split("|")) {
36
+ const f = form.trim().toLowerCase();
37
+ if (f)
38
+ set.add(f);
39
+ }
40
+ }
41
+ return set;
42
+ }
43
+ catch {
44
+ // try next
45
+ }
46
+ }
47
+ throw new Error(`Could not load FR libpostal dictionary: ${filename}`);
48
+ }
49
+ const STREET_TYPES_FR = loadDictionary("street_types.txt");
50
+ /**
51
+ * Decompose a French street name into prefix (leading type word) and street name.
52
+ *
53
+ * If the first 1-2 tokens match a known street type (allowing for multi-word like "ancien chemin"),
54
+ * they become the prefix. Returns `{ prefix: null, street: original }` if no match.
55
+ */
56
+ export function decomposeFrStreet(fullname) {
57
+ const trimmed = fullname.trim();
58
+ if (!trimmed)
59
+ return { prefix: null, street: "" };
60
+ const tokens = trimmed.split(/\s+/);
61
+ if (tokens.length < 2)
62
+ return { prefix: null, street: trimmed };
63
+ const norm = (s) => s.toLowerCase().replace(/[.,;]$/, "");
64
+ // Try 2-word prefix first (e.g. "ancien chemin")
65
+ if (tokens.length >= 3) {
66
+ const twoWord = norm(tokens[0]) + " " + norm(tokens[1]);
67
+ if (STREET_TYPES_FR.has(twoWord)) {
68
+ return { prefix: tokens.slice(0, 2).join(" "), street: tokens.slice(2).join(" ") };
69
+ }
70
+ }
71
+ // Then try 1-word prefix
72
+ const first = norm(tokens[0]);
73
+ if (STREET_TYPES_FR.has(first)) {
74
+ return { prefix: tokens[0], street: tokens.slice(1).join(" ") };
75
+ }
76
+ return { prefix: null, street: trimmed };
77
+ }
78
+ //# sourceMappingURL=street-decompose.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"street-decompose.js","sourceRoot":"","sources":["../../../../src/adapters/ban/street-decompose.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAA;AACtC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AAC5C,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAA;AAExC,MAAM,SAAS,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAA;AAEzD,SAAS,cAAc,CAAC,QAAgB;IACvC,MAAM,UAAU,GAAG;QAClB,OAAO,CAAC,SAAS,EAAE,iDAAiD,EAAE,QAAQ,CAAC;QAC/E,OAAO,CAAC,SAAS,EAAE,oDAAoD,EAAE,QAAQ,CAAC;QAClF,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,qCAAqC,EAAE,QAAQ,CAAC;KACvE,CAAA;IACD,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC/B,IAAI,CAAC;YACJ,MAAM,IAAI,GAAG,YAAY,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;YACvC,MAAM,GAAG,GAAG,IAAI,GAAG,EAAU,CAAA;YAC7B,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;gBACrC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAA;gBAC3B,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;oBAAE,SAAQ;gBACjD,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC;oBACvC,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAA;oBACnC,IAAI,CAAC;wBAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;gBAClB,CAAC;YACF,CAAC;YACD,OAAO,GAAG,CAAA;QACX,CAAC;QAAC,MAAM,CAAC;YACR,WAAW;QACZ,CAAC;IACF,CAAC;IACD,MAAM,IAAI,KAAK,CAAC,2CAA2C,QAAQ,EAAE,CAAC,CAAA;AACvE,CAAC;AAED,MAAM,eAAe,GAAG,cAAc,CAAC,kBAAkB,CAAC,CAAA;AAO1D;;;;;GAKG;AACH,MAAM,UAAU,iBAAiB,CAAC,QAAgB;IACjD,MAAM,OAAO,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAA;IAC/B,IAAI,CAAC,OAAO;QAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE,CAAA;IAEjD,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAA;IACnC,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,CAAA;IAE/D,MAAM,IAAI,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAA;IAEjE,iDAAiD;IACjD,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QACxB,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,GAAG,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAA;QACzD,IAAI,eAAe,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;YAClC,OAAO,EAAE,MAAM,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAA;QACnF,CAAC;IACF,CAAC;IAED,yBAAyB;IACzB,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAA;IAC9B,IAAI,eAAe,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;QAChC,OAAO,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC,CAAE,EAAE,MAAM,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAA;IACjE,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,CAAA;AACzC,CAAC"}
@@ -0,0 +1,48 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * `synth-po-box`: PO box / PMB / Apartado / BP synthesizer adapter.
7
+ *
8
+ * Consumes a JSONL stream of (locality, region, postcode, country) tuples — typically extracted
9
+ * from existing corpus output (TIGER/NAD/BAN/WOF) — and emits synthetic PO box training rows. See
10
+ * `../../synthesize-po-box.ts` for the per-locale templates and number-noise logic.
11
+ *
12
+ * Why an adapter and not an augmenter:
13
+ *
14
+ * - Per USPS Pub 28 / DMM 508, a PO box delivery line is mutually exclusive with a street line.
15
+ * Synthesizing PO boxes by mutating a street row would teach the model an invalid pattern.
16
+ * The clean shape is: read just (locality, region, postcode, country) and produce a fresh
17
+ * PO-box-shaped row.
18
+ * - Per-DeepSeek (3-turn consult, 2026-05-28): PMB rows that COMBINE a street line with a PMB number
19
+ * ARE valid (CMRA addresses). Those are produced when `pmbRatio > 0` AND the input tuple
20
+ * carries a `street` field.
21
+ */
22
+ import { type PoBoxBaseTuple } from "../../synthesize-po-box.js";
23
+ import type { CorpusAdapter } from "../../types.js";
24
+ export declare const SYNTH_PO_BOX_ADAPTER_ID = "synth-po-box";
25
+ export declare const SYNTH_PO_BOX_LICENSE = "Synthetic \u2014 derived from CC-BY / public-domain input tuples";
26
+ export interface PoBoxInputRow extends PoBoxBaseTuple {
27
+ street?: string;
28
+ houseNumber?: string;
29
+ }
30
+ export interface SynthPoBoxAdapterOptions {
31
+ /**
32
+ * How many PO box variants to emit per input tuple. Each variant picks a different leader (and
33
+ * possibly a different number / noise level). Default 1.
34
+ */
35
+ variantsPerInput?: number;
36
+ /**
37
+ * Probability (0..1) of emitting a PMB-with-street variant when both the input has a street and
38
+ * the locale supports PMB. Default 0.15.
39
+ */
40
+ pmbRatio?: number;
41
+ /**
42
+ * Deterministic seed for reproducible synthesis. Default Date.now().
43
+ */
44
+ seed?: number;
45
+ }
46
+ export declare function createSynthPoBoxAdapter(opts?: SynthPoBoxAdapterOptions): CorpusAdapter;
47
+ export declare const synthPoBoxAdapter: CorpusAdapter;
48
+ //# sourceMappingURL=adapter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/synth-po-box/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAKH,OAAO,EAAsB,KAAK,cAAc,EAAE,MAAM,4BAA4B,CAAA;AACpF,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAEjF,eAAO,MAAM,uBAAuB,iBAAiB,CAAA;AACrD,eAAO,MAAM,oBAAoB,qEAAgE,CAAA;AAEjG,MAAM,WAAW,aAAc,SAAQ,cAAc;IACpD,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,WAAW,CAAC,EAAE,MAAM,CAAA;CACpB;AAED,MAAM,WAAW,wBAAwB;IACxC;;;OAGG;IACH,gBAAgB,CAAC,EAAE,MAAM,CAAA;IACzB;;;OAGG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB;;OAEG;IACH,IAAI,CAAC,EAAE,MAAM,CAAA;CACb;AAUD,wBAAgB,uBAAuB,CAAC,IAAI,GAAE,wBAA6B,GAAG,aAAa,CAuE1F;AAED,eAAO,MAAM,iBAAiB,eAA4B,CAAA"}
@@ -0,0 +1,101 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * `synth-po-box`: PO box / PMB / Apartado / BP synthesizer adapter.
7
+ *
8
+ * Consumes a JSONL stream of (locality, region, postcode, country) tuples — typically extracted
9
+ * from existing corpus output (TIGER/NAD/BAN/WOF) — and emits synthetic PO box training rows. See
10
+ * `../../synthesize-po-box.ts` for the per-locale templates and number-noise logic.
11
+ *
12
+ * Why an adapter and not an augmenter:
13
+ *
14
+ * - Per USPS Pub 28 / DMM 508, a PO box delivery line is mutually exclusive with a street line.
15
+ * Synthesizing PO boxes by mutating a street row would teach the model an invalid pattern.
16
+ * The clean shape is: read just (locality, region, postcode, country) and produce a fresh
17
+ * PO-box-shaped row.
18
+ * - Per-DeepSeek (3-turn consult, 2026-05-28): PMB rows that COMBINE a street line with a PMB number
19
+ * ARE valid (CMRA addresses). Those are produced when `pmbRatio > 0` AND the input tuple
20
+ * carries a `street` field.
21
+ */
22
+ import { createReadStream } from "node:fs";
23
+ import { createInterface } from "node:readline";
24
+ import { stableSourceId } from "../../adapter.js";
25
+ import { synthesizePoBoxRow } from "../../synthesize-po-box.js";
26
+ export const SYNTH_PO_BOX_ADAPTER_ID = "synth-po-box";
27
+ export const SYNTH_PO_BOX_LICENSE = "Synthetic — derived from CC-BY / public-domain input tuples";
28
+ function makeRandom(seed) {
29
+ let s = seed;
30
+ return () => {
31
+ s = (s * 1664525 + 1013904223) % 4294967296;
32
+ return s / 4294967296;
33
+ };
34
+ }
35
+ export function createSynthPoBoxAdapter(opts = {}) {
36
+ const variantsPerInput = opts.variantsPerInput ?? 1;
37
+ const pmbRatio = opts.pmbRatio ?? 0.15;
38
+ return {
39
+ id: SYNTH_PO_BOX_ADAPTER_ID,
40
+ defaultLicense: SYNTH_PO_BOX_LICENSE,
41
+ description: "Synthetic PO box / PMB / Apartado / Boîte Postale rows. Consumes JSONL of (locality, region, postcode, country) tuples and emits locale-appropriate PO box variants.",
42
+ async *rows(options) {
43
+ const random = makeRandom(opts.seed ?? Date.now());
44
+ const stream = createReadStream(options.inputPath, { encoding: "utf8" });
45
+ const rl = createInterface({ input: stream, crlfDelay: Infinity });
46
+ let emitted = 0;
47
+ let skipped = 0;
48
+ for await (const line of rl) {
49
+ if (options.signal?.aborted)
50
+ break;
51
+ if (options.limit !== undefined && emitted >= options.limit)
52
+ break;
53
+ const trimmed = line.trim();
54
+ if (!trimmed)
55
+ continue;
56
+ let input;
57
+ try {
58
+ input = JSON.parse(trimmed);
59
+ }
60
+ catch {
61
+ skipped++;
62
+ continue;
63
+ }
64
+ if (!input.locality || !input.region || !input.postcode || !input.country) {
65
+ skipped++;
66
+ continue;
67
+ }
68
+ if (options.country && options.country !== input.country)
69
+ continue;
70
+ for (let v = 0; v < variantsPerInput; v++) {
71
+ const synth = synthesizePoBoxRow(input, { random, pmbRatio });
72
+ if (!synth)
73
+ continue;
74
+ // Include `v` in dependent_locality slot to vary the digest across variants;
75
+ // stableSourceId only accepts ComponentTag keys.
76
+ const sourceId = stableSourceId(SYNTH_PO_BOX_ADAPTER_ID, {
77
+ locality: `${input.locality}#${v}`,
78
+ region: input.region,
79
+ postcode: input.postcode,
80
+ country: input.country,
81
+ });
82
+ yield {
83
+ raw: synth.raw,
84
+ components: synth.components,
85
+ country: input.country,
86
+ locale: synth.locale,
87
+ source: SYNTH_PO_BOX_ADAPTER_ID,
88
+ source_id: sourceId,
89
+ corpus_version: "",
90
+ license: SYNTH_PO_BOX_LICENSE,
91
+ };
92
+ emitted++;
93
+ if (options.limit !== undefined && emitted >= options.limit)
94
+ break;
95
+ }
96
+ }
97
+ },
98
+ };
99
+ }
100
+ export const synthPoBoxAdapter = createSynthPoBoxAdapter();
101
+ //# sourceMappingURL=adapter.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"adapter.js","sourceRoot":"","sources":["../../../../src/adapters/synth-po-box/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAA;AAC1C,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAA;AAC/C,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAA;AACjD,OAAO,EAAE,kBAAkB,EAAuB,MAAM,4BAA4B,CAAA;AAGpF,MAAM,CAAC,MAAM,uBAAuB,GAAG,cAAc,CAAA;AACrD,MAAM,CAAC,MAAM,oBAAoB,GAAG,6DAA6D,CAAA;AAwBjG,SAAS,UAAU,CAAC,IAAY;IAC/B,IAAI,CAAC,GAAG,IAAI,CAAA;IACZ,OAAO,GAAG,EAAE;QACX,CAAC,GAAG,CAAC,CAAC,GAAG,OAAO,GAAG,UAAU,CAAC,GAAG,UAAU,CAAA;QAC3C,OAAO,CAAC,GAAG,UAAU,CAAA;IACtB,CAAC,CAAA;AACF,CAAC;AAED,MAAM,UAAU,uBAAuB,CAAC,OAAiC,EAAE;IAC1E,MAAM,gBAAgB,GAAG,IAAI,CAAC,gBAAgB,IAAI,CAAC,CAAA;IACnD,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAA;IAEtC,OAAO;QACN,EAAE,EAAE,uBAAuB;QAC3B,cAAc,EAAE,oBAAoB;QACpC,WAAW,EACV,sKAAsK;QAEvK,KAAK,CAAC,CAAC,IAAI,CAAC,OAAuB;YAClC,MAAM,MAAM,GAAG,UAAU,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,GAAG,EAAE,CAAC,CAAA;YAElD,MAAM,MAAM,GAAG,gBAAgB,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAA;YACxE,MAAM,EAAE,GAAG,eAAe,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,CAAC,CAAA;YAElE,IAAI,OAAO,GAAG,CAAC,CAAA;YACf,IAAI,OAAO,GAAG,CAAC,CAAA;YAEf,IAAI,KAAK,EAAE,MAAM,IAAI,IAAI,EAAE,EAAE,CAAC;gBAC7B,IAAI,OAAO,CAAC,MAAM,EAAE,OAAO;oBAAE,MAAK;gBAClC,IAAI,OAAO,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,OAAO,CAAC,KAAK;oBAAE,MAAK;gBAElE,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAA;gBAC3B,IAAI,CAAC,OAAO;oBAAE,SAAQ;gBAEtB,IAAI,KAAoB,CAAA;gBACxB,IAAI,CAAC;oBACJ,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAkB,CAAA;gBAC7C,CAAC;gBAAC,MAAM,CAAC;oBACR,OAAO,EAAE,CAAA;oBACT,SAAQ;gBACT,CAAC;gBAED,IAAI,CAAC,KAAK,CAAC,QAAQ,IAAI,CAAC,KAAK,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,QAAQ,IAAI,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC;oBAC3E,OAAO,EAAE,CAAA;oBACT,SAAQ;gBACT,CAAC;gBAED,IAAI,OAAO,CAAC,OAAO,IAAI,OAAO,CAAC,OAAO,KAAK,KAAK,CAAC,OAAO;oBAAE,SAAQ;gBAElE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,gBAAgB,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC3C,MAAM,KAAK,GAAG,kBAAkB,CAAC,KAAK,EAAE,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC,CAAA;oBAC7D,IAAI,CAAC,KAAK;wBAAE,SAAQ;oBAEpB,6EAA6E;oBAC7E,iDAAiD;oBACjD,MAAM,QAAQ,GAAG,cAAc,CAAC,uBAAuB,EAAE;wBACxD,QAAQ,EAAE,GAAG,KAAK,CAAC,QAAQ,IAAI,CAAC,EAAE;wBAClC,MAAM,EAAE,KAAK,CAAC,MAAM;wBACpB,QAAQ,EAAE,KAAK,CAAC,QAAQ;wBACxB,OAAO,EAAE,KAAK,CAAC,OAAO;qBACtB,CAAC,CAAA;oBAEF,MAAM;wBACL,GAAG,EAAE,KAAK,CAAC,GAAG;wBACd,UAAU,EAAE,KAAK,CAAC,UAAU;wBAC5B,OAAO,EAAE,KAAK,CAAC,OAAO;wBACtB,MAAM,EAAE,KAAK,CAAC,MAAM;wBACpB,MAAM,EAAE,uBAAuB;wBAC/B,SAAS,EAAE,QAAQ;wBACnB,cAAc,EAAE,EAAE;wBAClB,OAAO,EAAE,oBAAoB;qBAC7B,CAAA;oBACD,OAAO,EAAE,CAAA;oBAET,IAAI,OAAO,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,OAAO,CAAC,KAAK;wBAAE,MAAK;gBACnE,CAAC;YACF,CAAC;QACF,CAAC;KACD,CAAA;AACF,CAAC;AAED,MAAM,CAAC,MAAM,iBAAiB,GAAG,uBAAuB,EAAE,CAAA"}
@@ -1 +1 @@
1
- {"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/tiger/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAKH,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAEjF,eAAO,MAAM,gBAAgB,UAAU,CAAA;AACvC,eAAO,MAAM,qBAAqB,kBAAkB,CAAA;AAsFpD,yFAAyF;AACzF,wBAAgB,kBAAkB,IAAI,aAAa,CAoElD;AAED,eAAO,MAAM,YAAY,eAAuB,CAAA"}
1
+ {"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/tiger/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAKH,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAGjF,eAAO,MAAM,gBAAgB,UAAU,CAAA;AACvC,eAAO,MAAM,qBAAqB,kBAAkB,CAAA;AA0FpD,yFAAyF;AACzF,wBAAgB,kBAAkB,IAAI,aAAa,CAoElD;AAED,eAAO,MAAM,YAAY,eAAuB,CAAA"}
@@ -39,6 +39,7 @@
39
39
  import { DatabaseSync } from "node:sqlite";
40
40
  import { lookupFipsState } from "../../codex/us-fips-state.js";
41
41
  import { formatAddress, reconcileComponents } from "../../format.js";
42
+ import { decomposeStreet } from "./street-decompose.js";
42
43
  export const TIGER_ADAPTER_ID = "tiger";
43
44
  export const TIGER_DEFAULT_LICENSE = "Public Domain";
44
45
  /**
@@ -54,18 +55,23 @@ const US_COUNTRY_DISPLAY = "United States of America";
54
55
  * - `zipl !== zipr` → two rows (one per side's ZIP).
55
56
  */
56
57
  function* streetVariants(row) {
57
- const street = row.fullname.trim();
58
- if (!street)
58
+ const fullname = row.fullname.trim();
59
+ if (!fullname)
59
60
  return;
60
61
  const state = lookupFipsState(row.statefp);
61
62
  if (!state)
62
63
  return;
63
64
  const zipl = row.zipl?.trim() ?? "";
64
65
  const zipr = row.zipr?.trim() ?? "";
66
+ const decomposed = decomposeStreet(fullname);
65
67
  const baseComponents = {
66
- street,
67
68
  region: state.abbreviation,
69
+ street: decomposed.street,
68
70
  };
71
+ if (decomposed.prefix)
72
+ baseComponents.street_prefix = decomposed.prefix;
73
+ if (decomposed.suffix)
74
+ baseComponents.street_suffix = decomposed.suffix;
69
75
  if (!zipl && !zipr) {
70
76
  yield { components: baseComponents, variantKey: "no-zip" };
71
77
  return;
@@ -1 +1 @@
1
- {"version":3,"file":"adapter.js","sourceRoot":"","sources":["../../../../src/adapters/tiger/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAA;AAC1C,OAAO,EAAE,eAAe,EAAE,MAAM,8BAA8B,CAAA;AAC9D,OAAO,EAAE,aAAa,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAA;AAGpE,MAAM,CAAC,MAAM,gBAAgB,GAAG,OAAO,CAAA;AACvC,MAAM,CAAC,MAAM,qBAAqB,GAAG,eAAe,CAAA;AAEpD;;;GAGG;AACH,MAAM,kBAAkB,GAAG,0BAA0B,CAAA;AAiBrD;;;;;;GAMG;AACH,QAAQ,CAAC,CAAC,cAAc,CAAC,GAAmB;IAI3C,MAAM,MAAM,GAAG,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAA;IAClC,IAAI,CAAC,MAAM;QAAE,OAAM;IACnB,MAAM,KAAK,GAAG,eAAe,CAAC,GAAG,CAAC,OAAO,CAAC,CAAA;IAC1C,IAAI,CAAC,KAAK;QAAE,OAAM;IAElB,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAA;IACnC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAA;IAEnC,MAAM,cAAc,GAA+B;QAClD,MAAM;QACN,MAAM,EAAE,KAAK,CAAC,YAAY;KAC1B,CAAA;IAED,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;QACpB,MAAM,EAAE,UAAU,EAAE,cAAc,EAAE,UAAU,EAAE,QAAQ,EAAE,CAAA;QAC1D,OAAM;IACP,CAAC;IACD,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;QACnC,MAAM;YACL,UAAU,EAAE,EAAE,GAAG,cAAc,EAAE,QAAQ,EAAE,IAAI,EAAE;YACjD,UAAU,EAAE,OAAO,IAAI,EAAE;SACzB,CAAA;QACD,OAAM;IACP,CAAC;IACD,IAAI,IAAI;QAAE,MAAM,EAAE,UAAU,EAAE,EAAE,GAAG,cAAc,EAAE,QAAQ,EAAE,IAAI,EAAE,EAAE,UAAU,EAAE,QAAQ,IAAI,EAAE,EAAE,CAAA;IACjG,IAAI,IAAI,IAAI,IAAI,KAAK,IAAI;QAAE,MAAM,EAAE,UAAU,EAAE,EAAE,GAAG,cAAc,EAAE,QAAQ,EAAE,IAAI,EAAE,EAAE,UAAU,EAAE,QAAQ,IAAI,EAAE,EAAE,CAAA;AACnH,CAAC;AAED,sEAAsE;AACtE,QAAQ,CAAC,CAAC,aAAa,CAAC,GAAkB;IAIzC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,CAAA;IAC5B,IAAI,CAAC,IAAI;QAAE,OAAM;IACjB,MAAM,KAAK,GAAG,eAAe,CAAC,GAAG,CAAC,OAAO,CAAC,CAAA;IAC1C,IAAI,CAAC,KAAK;QAAE,OAAM;IAElB,MAAM;QACL,UAAU,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE;QAC9B,UAAU,EAAE,eAAe;KAC3B,CAAA;IACD,MAAM;QACL,UAAU,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,CAAC,YAAY,EAAE;QAC1D,UAAU,EAAE,aAAa;KACzB,CAAA;IACD,MAAM;QACL,UAAU,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,CAAC,YAAY,EAAE,OAAO,EAAE,kBAAkB,EAAE;QACvF,UAAU,EAAE,qBAAqB;KACjC,CAAA;AACF,CAAC;AAED,yFAAyF;AACzF,MAAM,UAAU,kBAAkB;IACjC,OAAO;QACN,EAAE,EAAE,gBAAgB;QACpB,cAAc,EAAE,qBAAqB;QACrC,WAAW,EACV,4GAA4G;QAE7G,KAAK,CAAC,CAAC,IAAI,CAAC,IAAoB;YAC/B,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,EAAE,CAAC;gBAC3C,MAAM,IAAI,KAAK,CAAC,iDAAiD,IAAI,CAAC,OAAO,EAAE,CAAC,CAAA;YACjF,CAAC;YAED,MAAM,EAAE,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAA;YAC/D,IAAI,OAAO,GAAG,CAAC,CAAA;YACf,IAAI,CAAC;gBACJ,MAAM,UAAU,GAAG,EAAE,CAAC,OAAO,CAAC,mEAAmE,CAAC,CAAA;gBAClG,MAAM,SAAS,GAAG,EAAE,CAAC,OAAO,CAAC,qDAAqD,CAAC,CAAA;gBAEnF,KAAK,MAAM,GAAG,IAAI,UAAU,CAAC,OAAO,EAAsC,EAAE,CAAC;oBAC5E,IAAI,IAAI,CAAC,MAAM,EAAE,OAAO;wBAAE,OAAM;oBAChC,KAAK,MAAM,OAAO,IAAI,cAAc,CAAC,GAAG,CAAC,EAAE,CAAC;wBAC3C,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK;4BAAE,OAAM;wBAC7D,MAAM,GAAG,GAAG,aAAa,CAAC,OAAO,CAAC,UAAU,EAAE,IAAI,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;wBACxE,IAAI,CAAC,GAAG;4BAAE,SAAQ;wBAClB,MAAM,OAAO,GAAG,mBAAmB,CAAC,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC,CAAA;wBAC5D,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC;4BAAE,SAAQ;wBAE/C,MAAM;4BACL,GAAG;4BACH,UAAU,EAAE,OAAO;4BACnB,OAAO,EAAE,IAAI;4BACb,MAAM,EAAE,OAAO;4BACf,MAAM,EAAE,gBAAgB;4BACxB,SAAS,EAAE,GAAG,gBAAgB,OAAO,GAAG,CAAC,QAAQ,IAAI,OAAO,CAAC,UAAU,EAAE;4BACzE,cAAc,EAAE,EAAE;4BAClB,OAAO,EAAE,qBAAqB;yBAC9B,CAAA;wBACD,OAAO,EAAE,CAAA;oBACV,CAAC;gBACF,CAAC;gBAED,KAAK,MAAM,GAAG,IAAI,SAAS,CAAC,OAAO,EAAqC,EAAE,CAAC;oBAC1E,IAAI,IAAI,CAAC,MAAM,EAAE,OAAO;wBAAE,OAAM;oBAChC,KAAK,MAAM,OAAO,IAAI,aAAa,CAAC,GAAG,CAAC,EAAE,CAAC;wBAC1C,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK;4BAAE,OAAM;wBAC7D,MAAM,GAAG,GAAG,aAAa,CAAC,OAAO,CAAC,UAAU,EAAE,IAAI,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;wBACxE,IAAI,CAAC,GAAG;4BAAE,SAAQ;wBAClB,MAAM,OAAO,GAAG,mBAAmB,CAAC,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC,CAAA;wBAC5D,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC;4BAAE,SAAQ;wBAE/C,MAAM;4BACL,GAAG;4BACH,UAAU,EAAE,OAAO;4BACnB,OAAO,EAAE,IAAI;4BACb,MAAM,EAAE,OAAO;4BACf,MAAM,EAAE,gBAAgB;4BACxB,SAAS,EAAE,GAAG,gBAAgB,OAAO,GAAG,CAAC,KAAK,IAAI,OAAO,CAAC,UAAU,EAAE;4BACtE,cAAc,EAAE,EAAE;4BAClB,OAAO,EAAE,qBAAqB;yBAC9B,CAAA;wBACD,OAAO,EAAE,CAAA;oBACV,CAAC;gBACF,CAAC;YACF,CAAC;oBAAS,CAAC;gBACV,EAAE,CAAC,KAAK,EAAE,CAAA;YACX,CAAC;QACF,CAAC;KACD,CAAA;AACF,CAAC;AAED,MAAM,CAAC,MAAM,YAAY,GAAG,kBAAkB,EAAE,CAAA"}
1
+ {"version":3,"file":"adapter.js","sourceRoot":"","sources":["../../../../src/adapters/tiger/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAA;AAC1C,OAAO,EAAE,eAAe,EAAE,MAAM,8BAA8B,CAAA;AAC9D,OAAO,EAAE,aAAa,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAA;AAEpE,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAA;AAEvD,MAAM,CAAC,MAAM,gBAAgB,GAAG,OAAO,CAAA;AACvC,MAAM,CAAC,MAAM,qBAAqB,GAAG,eAAe,CAAA;AAEpD;;;GAGG;AACH,MAAM,kBAAkB,GAAG,0BAA0B,CAAA;AAiBrD;;;;;;GAMG;AACH,QAAQ,CAAC,CAAC,cAAc,CAAC,GAAmB;IAI3C,MAAM,QAAQ,GAAG,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAA;IACpC,IAAI,CAAC,QAAQ;QAAE,OAAM;IACrB,MAAM,KAAK,GAAG,eAAe,CAAC,GAAG,CAAC,OAAO,CAAC,CAAA;IAC1C,IAAI,CAAC,KAAK;QAAE,OAAM;IAElB,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAA;IACnC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAA;IAEnC,MAAM,UAAU,GAAG,eAAe,CAAC,QAAQ,CAAC,CAAA;IAE5C,MAAM,cAAc,GAA+B;QAClD,MAAM,EAAE,KAAK,CAAC,YAAY;QAC1B,MAAM,EAAE,UAAU,CAAC,MAAM;KACzB,CAAA;IACD,IAAI,UAAU,CAAC,MAAM;QAAE,cAAc,CAAC,aAAa,GAAG,UAAU,CAAC,MAAM,CAAA;IACvE,IAAI,UAAU,CAAC,MAAM;QAAE,cAAc,CAAC,aAAa,GAAG,UAAU,CAAC,MAAM,CAAA;IAEvE,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;QACpB,MAAM,EAAE,UAAU,EAAE,cAAc,EAAE,UAAU,EAAE,QAAQ,EAAE,CAAA;QAC1D,OAAM;IACP,CAAC;IACD,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;QACnC,MAAM;YACL,UAAU,EAAE,EAAE,GAAG,cAAc,EAAE,QAAQ,EAAE,IAAI,EAAE;YACjD,UAAU,EAAE,OAAO,IAAI,EAAE;SACzB,CAAA;QACD,OAAM;IACP,CAAC;IACD,IAAI,IAAI;QAAE,MAAM,EAAE,UAAU,EAAE,EAAE,GAAG,cAAc,EAAE,QAAQ,EAAE,IAAI,EAAE,EAAE,UAAU,EAAE,QAAQ,IAAI,EAAE,EAAE,CAAA;IACjG,IAAI,IAAI,IAAI,IAAI,KAAK,IAAI;QAAE,MAAM,EAAE,UAAU,EAAE,EAAE,GAAG,cAAc,EAAE,QAAQ,EAAE,IAAI,EAAE,EAAE,UAAU,EAAE,QAAQ,IAAI,EAAE,EAAE,CAAA;AACnH,CAAC;AAED,sEAAsE;AACtE,QAAQ,CAAC,CAAC,aAAa,CAAC,GAAkB;IAIzC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,CAAA;IAC5B,IAAI,CAAC,IAAI;QAAE,OAAM;IACjB,MAAM,KAAK,GAAG,eAAe,CAAC,GAAG,CAAC,OAAO,CAAC,CAAA;IAC1C,IAAI,CAAC,KAAK;QAAE,OAAM;IAElB,MAAM;QACL,UAAU,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE;QAC9B,UAAU,EAAE,eAAe;KAC3B,CAAA;IACD,MAAM;QACL,UAAU,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,CAAC,YAAY,EAAE;QAC1D,UAAU,EAAE,aAAa;KACzB,CAAA;IACD,MAAM;QACL,UAAU,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,CAAC,YAAY,EAAE,OAAO,EAAE,kBAAkB,EAAE;QACvF,UAAU,EAAE,qBAAqB;KACjC,CAAA;AACF,CAAC;AAED,yFAAyF;AACzF,MAAM,UAAU,kBAAkB;IACjC,OAAO;QACN,EAAE,EAAE,gBAAgB;QACpB,cAAc,EAAE,qBAAqB;QACrC,WAAW,EACV,4GAA4G;QAE7G,KAAK,CAAC,CAAC,IAAI,CAAC,IAAoB;YAC/B,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,EAAE,CAAC;gBAC3C,MAAM,IAAI,KAAK,CAAC,iDAAiD,IAAI,CAAC,OAAO,EAAE,CAAC,CAAA;YACjF,CAAC;YAED,MAAM,EAAE,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAA;YAC/D,IAAI,OAAO,GAAG,CAAC,CAAA;YACf,IAAI,CAAC;gBACJ,MAAM,UAAU,GAAG,EAAE,CAAC,OAAO,CAAC,mEAAmE,CAAC,CAAA;gBAClG,MAAM,SAAS,GAAG,EAAE,CAAC,OAAO,CAAC,qDAAqD,CAAC,CAAA;gBAEnF,KAAK,MAAM,GAAG,IAAI,UAAU,CAAC,OAAO,EAAsC,EAAE,CAAC;oBAC5E,IAAI,IAAI,CAAC,MAAM,EAAE,OAAO;wBAAE,OAAM;oBAChC,KAAK,MAAM,OAAO,IAAI,cAAc,CAAC,GAAG,CAAC,EAAE,CAAC;wBAC3C,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK;4BAAE,OAAM;wBAC7D,MAAM,GAAG,GAAG,aAAa,CAAC,OAAO,CAAC,UAAU,EAAE,IAAI,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;wBACxE,IAAI,CAAC,GAAG;4BAAE,SAAQ;wBAClB,MAAM,OAAO,GAAG,mBAAmB,CAAC,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC,CAAA;wBAC5D,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC;4BAAE,SAAQ;wBAE/C,MAAM;4BACL,GAAG;4BACH,UAAU,EAAE,OAAO;4BACnB,OAAO,EAAE,IAAI;4BACb,MAAM,EAAE,OAAO;4BACf,MAAM,EAAE,gBAAgB;4BACxB,SAAS,EAAE,GAAG,gBAAgB,OAAO,GAAG,CAAC,QAAQ,IAAI,OAAO,CAAC,UAAU,EAAE;4BACzE,cAAc,EAAE,EAAE;4BAClB,OAAO,EAAE,qBAAqB;yBAC9B,CAAA;wBACD,OAAO,EAAE,CAAA;oBACV,CAAC;gBACF,CAAC;gBAED,KAAK,MAAM,GAAG,IAAI,SAAS,CAAC,OAAO,EAAqC,EAAE,CAAC;oBAC1E,IAAI,IAAI,CAAC,MAAM,EAAE,OAAO;wBAAE,OAAM;oBAChC,KAAK,MAAM,OAAO,IAAI,aAAa,CAAC,GAAG,CAAC,EAAE,CAAC;wBAC1C,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK;4BAAE,OAAM;wBAC7D,MAAM,GAAG,GAAG,aAAa,CAAC,OAAO,CAAC,UAAU,EAAE,IAAI,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;wBACxE,IAAI,CAAC,GAAG;4BAAE,SAAQ;wBAClB,MAAM,OAAO,GAAG,mBAAmB,CAAC,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC,CAAA;wBAC5D,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC;4BAAE,SAAQ;wBAE/C,MAAM;4BACL,GAAG;4BACH,UAAU,EAAE,OAAO;4BACnB,OAAO,EAAE,IAAI;4BACb,MAAM,EAAE,OAAO;4BACf,MAAM,EAAE,gBAAgB;4BACxB,SAAS,EAAE,GAAG,gBAAgB,OAAO,GAAG,CAAC,KAAK,IAAI,OAAO,CAAC,UAAU,EAAE;4BACtE,cAAc,EAAE,EAAE;4BAClB,OAAO,EAAE,qBAAqB;yBAC9B,CAAA;wBACD,OAAO,EAAE,CAAA;oBACV,CAAC;gBACF,CAAC;YACF,CAAC;oBAAS,CAAC;gBACV,EAAE,CAAC,KAAK,EAAE,CAAA;YACX,CAAC;QACF,CAAC;KACD,CAAA;AACF,CAAC;AAED,MAAM,CAAC,MAAM,YAAY,GAAG,kBAAkB,EAAE,CAAA"}
@@ -0,0 +1,30 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Decompose a US street name into Stage 3 components: street_prefix, street, street_suffix.
7
+ *
8
+ * Sources directionals and street types from the curated libpostal/en dictionaries
9
+ * (`core/data/libpostal/dictionaries/en/{directionals,street_types}.txt`). These are the same
10
+ * dictionaries the runtime classifiers (StreetPrefixClassifier, StreetSuffixClassifier) use, so
11
+ * corpus labels and runtime classifications agree on the vocabulary.
12
+ *
13
+ * Examples: "N Main St" → { prefix: "N", street: "Main", suffix: "St" } "Pennsylvania Avenue NW" →
14
+ * { prefix: null, street: "Pennsylvania", suffix: "Avenue NW" } "Salmon St" → { prefix: null,
15
+ * street: "Salmon", suffix: "St" } "SE Hawthorne Blvd" → { prefix: "SE", street: "Hawthorne",
16
+ * suffix: "Blvd" }
17
+ */
18
+ export interface DecomposedStreet {
19
+ prefix: string | null;
20
+ street: string;
21
+ suffix: string | null;
22
+ }
23
+ /**
24
+ * Decompose a US street name into prefix/name/suffix components.
25
+ *
26
+ * Conservative — only emits prefix/suffix when there's a clear directional or street-type keyword.
27
+ * Returns the original as `street` if nothing matches.
28
+ */
29
+ export declare function decomposeStreet(fullname: string): DecomposedStreet;
30
+ //# sourceMappingURL=street-decompose.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"street-decompose.d.ts","sourceRoot":"","sources":["../../../../src/adapters/tiger/street-decompose.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAuCH,MAAM,WAAW,gBAAgB;IAChC,MAAM,EAAE,MAAM,GAAG,IAAI,CAAA;IACrB,MAAM,EAAE,MAAM,CAAA;IACd,MAAM,EAAE,MAAM,GAAG,IAAI,CAAA;CACrB;AAED;;;;;GAKG;AACH,wBAAgB,eAAe,CAAC,QAAQ,EAAE,MAAM,GAAG,gBAAgB,CA0ClE"}
@@ -0,0 +1,99 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Decompose a US street name into Stage 3 components: street_prefix, street, street_suffix.
7
+ *
8
+ * Sources directionals and street types from the curated libpostal/en dictionaries
9
+ * (`core/data/libpostal/dictionaries/en/{directionals,street_types}.txt`). These are the same
10
+ * dictionaries the runtime classifiers (StreetPrefixClassifier, StreetSuffixClassifier) use, so
11
+ * corpus labels and runtime classifications agree on the vocabulary.
12
+ *
13
+ * Examples: "N Main St" → { prefix: "N", street: "Main", suffix: "St" } "Pennsylvania Avenue NW" →
14
+ * { prefix: null, street: "Pennsylvania", suffix: "Avenue NW" } "Salmon St" → { prefix: null,
15
+ * street: "Salmon", suffix: "St" } "SE Hawthorne Blvd" → { prefix: "SE", street: "Hawthorne",
16
+ * suffix: "Blvd" }
17
+ */
18
+ import { readFileSync } from "node:fs";
19
+ import { dirname, resolve } from "node:path";
20
+ import { fileURLToPath } from "node:url";
21
+ const moduleDir = dirname(fileURLToPath(import.meta.url));
22
+ function loadDictionary(filename) {
23
+ // Resolve via the @mailwoman/core data directory.
24
+ const candidates = [
25
+ resolve(moduleDir, "../../../../core/data/libpostal/dictionaries/en", filename),
26
+ resolve(moduleDir, "../../../../../core/data/libpostal/dictionaries/en", filename),
27
+ resolve(process.cwd(), "core/data/libpostal/dictionaries/en", filename),
28
+ ];
29
+ for (const path of candidates) {
30
+ try {
31
+ const text = readFileSync(path, "utf8");
32
+ const set = new Set();
33
+ for (const line of text.split("\n")) {
34
+ const trimmed = line.trim();
35
+ if (!trimmed || trimmed.startsWith("#"))
36
+ continue;
37
+ // libpostal format: canonical|abbr|abbr|... — index all forms
38
+ for (const form of trimmed.split("|")) {
39
+ const f = form.trim().toLowerCase();
40
+ if (f)
41
+ set.add(f);
42
+ }
43
+ }
44
+ return set;
45
+ }
46
+ catch {
47
+ // try next candidate
48
+ }
49
+ }
50
+ throw new Error(`Could not load libpostal dictionary: ${filename}`);
51
+ }
52
+ const DIRECTIONALS = loadDictionary("directionals.txt");
53
+ const STREET_TYPES = loadDictionary("street_types.txt");
54
+ /**
55
+ * Decompose a US street name into prefix/name/suffix components.
56
+ *
57
+ * Conservative — only emits prefix/suffix when there's a clear directional or street-type keyword.
58
+ * Returns the original as `street` if nothing matches.
59
+ */
60
+ export function decomposeStreet(fullname) {
61
+ const trimmed = fullname.trim();
62
+ if (!trimmed)
63
+ return { prefix: null, street: "", suffix: null };
64
+ const tokens = trimmed.split(/\s+/);
65
+ if (tokens.length === 1)
66
+ return { prefix: null, street: trimmed, suffix: null };
67
+ const norm = (s) => s.toLowerCase().replace(/\.$/, "");
68
+ let prefix = null;
69
+ let suffix = null;
70
+ let startIdx = 0;
71
+ let endIdx = tokens.length;
72
+ // Leading directional prefix
73
+ if (DIRECTIONALS.has(norm(tokens[0])) && tokens.length >= 2) {
74
+ prefix = tokens[0];
75
+ startIdx = 1;
76
+ }
77
+ // Trailing post-directional combined with street type (e.g. "Pennsylvania Ave NW")
78
+ const last = norm(tokens[endIdx - 1]);
79
+ const secondLast = endIdx >= 2 ? norm(tokens[endIdx - 2]) : "";
80
+ if (DIRECTIONALS.has(last) && STREET_TYPES.has(secondLast)) {
81
+ suffix = tokens.slice(endIdx - 2, endIdx).join(" ");
82
+ endIdx -= 2;
83
+ }
84
+ else if (STREET_TYPES.has(last) && endIdx - startIdx >= 2) {
85
+ suffix = tokens[endIdx - 1];
86
+ endIdx -= 1;
87
+ }
88
+ else if (DIRECTIONALS.has(last) && endIdx - startIdx >= 2) {
89
+ // Post-directional without type
90
+ suffix = tokens[endIdx - 1];
91
+ endIdx -= 1;
92
+ }
93
+ const street = tokens.slice(startIdx, endIdx).join(" ").trim();
94
+ if (!street) {
95
+ return { prefix: null, street: trimmed, suffix: null };
96
+ }
97
+ return { prefix, street, suffix };
98
+ }
99
+ //# sourceMappingURL=street-decompose.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"street-decompose.js","sourceRoot":"","sources":["../../../../src/adapters/tiger/street-decompose.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAA;AACtC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AAC5C,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAA;AAExC,MAAM,SAAS,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAA;AAEzD,SAAS,cAAc,CAAC,QAAgB;IACvC,kDAAkD;IAClD,MAAM,UAAU,GAAG;QAClB,OAAO,CAAC,SAAS,EAAE,iDAAiD,EAAE,QAAQ,CAAC;QAC/E,OAAO,CAAC,SAAS,EAAE,oDAAoD,EAAE,QAAQ,CAAC;QAClF,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,qCAAqC,EAAE,QAAQ,CAAC;KACvE,CAAA;IACD,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC/B,IAAI,CAAC;YACJ,MAAM,IAAI,GAAG,YAAY,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;YACvC,MAAM,GAAG,GAAG,IAAI,GAAG,EAAU,CAAA;YAC7B,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;gBACrC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAA;gBAC3B,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;oBAAE,SAAQ;gBACjD,8DAA8D;gBAC9D,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC;oBACvC,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAA;oBACnC,IAAI,CAAC;wBAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;gBAClB,CAAC;YACF,CAAC;YACD,OAAO,GAAG,CAAA;QACX,CAAC;QAAC,MAAM,CAAC;YACR,qBAAqB;QACtB,CAAC;IACF,CAAC;IACD,MAAM,IAAI,KAAK,CAAC,wCAAwC,QAAQ,EAAE,CAAC,CAAA;AACpE,CAAC;AAED,MAAM,YAAY,GAAG,cAAc,CAAC,kBAAkB,CAAC,CAAA;AACvD,MAAM,YAAY,GAAG,cAAc,CAAC,kBAAkB,CAAC,CAAA;AAQvD;;;;;GAKG;AACH,MAAM,UAAU,eAAe,CAAC,QAAgB;IAC/C,MAAM,OAAO,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAA;IAC/B,IAAI,CAAC,OAAO;QAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,CAAA;IAE/D,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAA;IACnC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,CAAA;IAE/E,MAAM,IAAI,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAA;IAE9D,IAAI,MAAM,GAAkB,IAAI,CAAA;IAChC,IAAI,MAAM,GAAkB,IAAI,CAAA;IAChC,IAAI,QAAQ,GAAG,CAAC,CAAA;IAChB,IAAI,MAAM,GAAG,MAAM,CAAC,MAAM,CAAA;IAE1B,6BAA6B;IAC7B,IAAI,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAC,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QAC9D,MAAM,GAAG,MAAM,CAAC,CAAC,CAAE,CAAA;QACnB,QAAQ,GAAG,CAAC,CAAA;IACb,CAAC;IAED,mFAAmF;IACnF,MAAM,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAE,CAAC,CAAA;IACtC,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAA;IAE/D,IAAI,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,YAAY,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;QAC5D,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;QACnD,MAAM,IAAI,CAAC,CAAA;IACZ,CAAC;SAAM,IAAI,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,MAAM,GAAG,QAAQ,IAAI,CAAC,EAAE,CAAC;QAC7D,MAAM,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,CAAE,CAAA;QAC5B,MAAM,IAAI,CAAC,CAAA;IACZ,CAAC;SAAM,IAAI,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,MAAM,GAAG,QAAQ,IAAI,CAAC,EAAE,CAAC;QAC7D,gCAAgC;QAChC,MAAM,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,CAAE,CAAA;QAC5B,MAAM,IAAI,CAAC,CAAA;IACZ,CAAC;IAED,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;IAC9D,IAAI,CAAC,MAAM,EAAE,CAAC;QACb,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,CAAA;IACvD,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,CAAA;AAClC,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/usgov-nad/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAQH,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAEjF,eAAO,MAAM,oBAAoB,cAAc,CAAA;AAC/C,eAAO,MAAM,yBAAyB,kBAAkB,CAAA;AA+JxD,wBAAgB,qBAAqB,IAAI,aAAa,CAuFrD;AAED,eAAO,MAAM,eAAe,eAA0B,CAAA"}
1
+ {"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/usgov-nad/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAQH,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAEjF,eAAO,MAAM,oBAAoB,cAAc,CAAA;AAC/C,eAAO,MAAM,yBAAyB,kBAAkB,CAAA;AAgLxD,wBAAgB,qBAAqB,IAAI,aAAa,CAmGrD;AAED,eAAO,MAAM,eAAe,eAA0B,CAAA"}
@@ -114,14 +114,23 @@ function composeHouseNumber(r) {
114
114
  const suf = (r.AddNum_Suf ?? "").toString().trim();
115
115
  return [pre, num, suf].filter(Boolean).join(" ").trim() || undefined;
116
116
  }
117
- function composeStreet(r) {
117
+ function decomposeNadStreet(r) {
118
+ const name = (r.St_Name ?? "").toString().trim();
119
+ if (name) {
120
+ const preDir = (r.St_PreDir ?? "").toString().trim();
121
+ const preTyp = (r.St_PreTyp ?? "").toString().trim();
122
+ const preSep = (r.St_PreSep ?? "").toString().trim();
123
+ const posTyp = (r.St_PosTyp ?? "").toString().trim();
124
+ const posDir = (r.St_PosDir ?? "").toString().trim();
125
+ const prefix = [preDir, preTyp, preSep].filter(Boolean).join(" ") || undefined;
126
+ const suffix = [posTyp, posDir].filter(Boolean).join(" ") || undefined;
127
+ const full = [prefix, name, suffix].filter(Boolean).join(" ");
128
+ return { prefix, street: name, suffix, full };
129
+ }
118
130
  const full = (r.StNam_Full ?? "").toString().trim();
119
131
  if (full)
120
- return full;
121
- const parts = [r.St_PreMod, r.St_PreDir, r.St_PreTyp, r.St_PreSep, r.St_Name, r.St_PosTyp, r.St_PosDir, r.St_PosMod]
122
- .map((p) => (p ?? "").toString().trim())
123
- .filter(Boolean);
124
- return parts.length ? parts.join(" ") : undefined;
132
+ return { full, street: full };
133
+ return undefined;
125
134
  }
126
135
  function composeLocality(r) {
127
136
  return nonEmpty(r.Post_City, r.Inc_Muni, r.Census_Plc, r.Uninc_Comm);
@@ -134,7 +143,7 @@ function composePostcode(r) {
134
143
  return plus4 ? `${zip}-${plus4}` : zip;
135
144
  }
136
145
  function composeRaw(parts) {
137
- const streetLine = [parts.houseNumber, parts.street].filter(Boolean).join(" ").trim();
146
+ const streetLine = [parts.houseNumber, parts.street, parts.unit].filter(Boolean).join(" ").trim();
138
147
  const tail = `${parts.locality}, ${parts.region} ${parts.postcode}`;
139
148
  return [parts.venue, streetLine || undefined, tail].filter(Boolean).join(", ");
140
149
  }
@@ -182,18 +191,30 @@ export function createUsgovNadAdapter() {
182
191
  const postcode = composePostcode(record);
183
192
  if (!postcode)
184
193
  continue;
185
- const street = composeStreet(record);
194
+ const decomposed = decomposeNadStreet(record);
186
195
  const houseNumber = composeHouseNumber(record);
187
196
  const venue = nonEmpty(record.LandmkName);
197
+ const unit = nonEmpty(record.Unit, record.Building, record.Floor, record.Room);
188
198
  const components = {
189
199
  ...(venue ? { venue } : {}),
190
200
  ...(houseNumber ? { house_number: houseNumber } : {}),
191
- ...(street ? { street } : {}),
201
+ ...(decomposed?.prefix ? { street_prefix: decomposed.prefix } : {}),
202
+ ...(decomposed?.street ? { street: decomposed.street } : {}),
203
+ ...(decomposed?.suffix ? { street_suffix: decomposed.suffix } : {}),
204
+ ...(unit ? { unit } : {}),
192
205
  locality,
193
206
  region: state,
194
207
  postcode,
195
208
  };
196
- const raw = composeRaw({ venue, houseNumber, street, locality, region: state, postcode });
209
+ const raw = composeRaw({
210
+ venue,
211
+ houseNumber,
212
+ street: decomposed?.full,
213
+ unit,
214
+ locality,
215
+ region: state,
216
+ postcode,
217
+ });
197
218
  if (!raw)
198
219
  continue;
199
220
  const aligned = reconcileComponents(components, raw);