@mailwoman/classifiers 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/out/AdjacencyClassifier.d.ts +16 -0
- package/out/AdjacencyClassifier.d.ts.map +1 -0
- package/out/AdjacencyClassifier.js +50 -0
- package/out/AdjacencyClassifier.js.map +1 -0
- package/out/AlphaNumericClassifier.d.ts +10 -0
- package/out/AlphaNumericClassifier.d.ts.map +1 -0
- package/out/AlphaNumericClassifier.js +23 -0
- package/out/AlphaNumericClassifier.js.map +1 -0
- package/out/CentralEuropeanStreetNameClassifier.d.ts +16 -0
- package/out/CentralEuropeanStreetNameClassifier.d.ts.map +1 -0
- package/out/CentralEuropeanStreetNameClassifier.js +36 -0
- package/out/CentralEuropeanStreetNameClassifier.js.map +1 -0
- package/out/ChainClassifier.d.ts +14 -0
- package/out/ChainClassifier.d.ts.map +1 -0
- package/out/ChainClassifier.js +25 -0
- package/out/ChainClassifier.js.map +1 -0
- package/out/CompoundLevelClassifier.d.ts +16 -0
- package/out/CompoundLevelClassifier.d.ts.map +1 -0
- package/out/CompoundLevelClassifier.js +58 -0
- package/out/CompoundLevelClassifier.js.map +1 -0
- package/out/CompoundStreetClassifier.d.ts +12 -0
- package/out/CompoundStreetClassifier.d.ts.map +1 -0
- package/out/CompoundStreetClassifier.js +50 -0
- package/out/CompoundStreetClassifier.js.map +1 -0
- package/out/CompoundUnitDesignatorClassifier.d.ts +16 -0
- package/out/CompoundUnitDesignatorClassifier.d.ts.map +1 -0
- package/out/CompoundUnitDesignatorClassifier.js +57 -0
- package/out/CompoundUnitDesignatorClassifier.js.map +1 -0
- package/out/DirectionalClassifier.d.ts +11 -0
- package/out/DirectionalClassifier.d.ts.map +1 -0
- package/out/DirectionalClassifier.js +31 -0
- package/out/DirectionalClassifier.js.map +1 -0
- package/out/GivenNameClassifier.d.ts +11 -0
- package/out/GivenNameClassifier.d.ts.map +1 -0
- package/out/GivenNameClassifier.js +32 -0
- package/out/GivenNameClassifier.js.map +1 -0
- package/out/HouseNumberClassifier.d.ts +15 -0
- package/out/HouseNumberClassifier.d.ts.map +1 -0
- package/out/HouseNumberClassifier.js +79 -0
- package/out/HouseNumberClassifier.js.map +1 -0
- package/out/IntersectionClassifier.d.ts +11 -0
- package/out/IntersectionClassifier.d.ts.map +1 -0
- package/out/IntersectionClassifier.js +33 -0
- package/out/IntersectionClassifier.js.map +1 -0
- package/out/LevelClassifier.d.ts +11 -0
- package/out/LevelClassifier.d.ts.map +1 -0
- package/out/LevelClassifier.js +28 -0
- package/out/LevelClassifier.js.map +1 -0
- package/out/LevelDesignatorClassifier.d.ts +11 -0
- package/out/LevelDesignatorClassifier.d.ts.map +1 -0
- package/out/LevelDesignatorClassifier.js +21 -0
- package/out/LevelDesignatorClassifier.js.map +1 -0
- package/out/MiddleInitialClassifier.d.ts +10 -0
- package/out/MiddleInitialClassifier.d.ts.map +1 -0
- package/out/MiddleInitialClassifier.js +18 -0
- package/out/MiddleInitialClassifier.js.map +1 -0
- package/out/OrdinalClassifier.d.ts +10 -0
- package/out/OrdinalClassifier.d.ts.map +1 -0
- package/out/OrdinalClassifier.js +23 -0
- package/out/OrdinalClassifier.js.map +1 -0
- package/out/PersonClassifier.d.ts +11 -0
- package/out/PersonClassifier.d.ts.map +1 -0
- package/out/PersonClassifier.js +30 -0
- package/out/PersonClassifier.js.map +1 -0
- package/out/PersonalSuffixClassifier.d.ts +11 -0
- package/out/PersonalSuffixClassifier.d.ts.map +1 -0
- package/out/PersonalSuffixClassifier.js +31 -0
- package/out/PersonalSuffixClassifier.js.map +1 -0
- package/out/PersonalTitleClassifier.d.ts +11 -0
- package/out/PersonalTitleClassifier.d.ts.map +1 -0
- package/out/PersonalTitleClassifier.js +32 -0
- package/out/PersonalTitleClassifier.js.map +1 -0
- package/out/PlaceClassifier.d.ts +11 -0
- package/out/PlaceClassifier.d.ts.map +1 -0
- package/out/PlaceClassifier.js +38 -0
- package/out/PlaceClassifier.js.map +1 -0
- package/out/PostcodeClassifier.d.ts +14 -0
- package/out/PostcodeClassifier.d.ts.map +1 -0
- package/out/PostcodeClassifier.js +62 -0
- package/out/PostcodeClassifier.js.map +1 -0
- package/out/RoadTypeClassifier.d.ts +11 -0
- package/out/RoadTypeClassifier.d.ts.map +1 -0
- package/out/RoadTypeClassifier.js +20 -0
- package/out/RoadTypeClassifier.js.map +1 -0
- package/out/StopWordClassifier.d.ts +14 -0
- package/out/StopWordClassifier.d.ts.map +1 -0
- package/out/StopWordClassifier.js +35 -0
- package/out/StopWordClassifier.js.map +1 -0
- package/out/StreetPrefixClassifier.d.ts +11 -0
- package/out/StreetPrefixClassifier.d.ts.map +1 -0
- package/out/StreetPrefixClassifier.js +40 -0
- package/out/StreetPrefixClassifier.js.map +1 -0
- package/out/StreetProperNameClassifier.d.ts +16 -0
- package/out/StreetProperNameClassifier.d.ts.map +1 -0
- package/out/StreetProperNameClassifier.js +32 -0
- package/out/StreetProperNameClassifier.js.map +1 -0
- package/out/StreetSuffixClassifier.d.ts +11 -0
- package/out/StreetSuffixClassifier.d.ts.map +1 -0
- package/out/StreetSuffixClassifier.js +50 -0
- package/out/StreetSuffixClassifier.js.map +1 -0
- package/out/SurnameClassifier.d.ts +11 -0
- package/out/SurnameClassifier.d.ts.map +1 -0
- package/out/SurnameClassifier.js +29 -0
- package/out/SurnameClassifier.js.map +1 -0
- package/out/TokenPositionClassifier.d.ts +20 -0
- package/out/TokenPositionClassifier.d.ts.map +1 -0
- package/out/TokenPositionClassifier.js +41 -0
- package/out/TokenPositionClassifier.js.map +1 -0
- package/out/ToponymClassifier.d.ts +11 -0
- package/out/ToponymClassifier.d.ts.map +1 -0
- package/out/ToponymClassifier.js +26 -0
- package/out/ToponymClassifier.js.map +1 -0
- package/out/UnitClassifier.d.ts +11 -0
- package/out/UnitClassifier.d.ts.map +1 -0
- package/out/UnitClassifier.js +43 -0
- package/out/UnitClassifier.js.map +1 -0
- package/out/UnitDesignatorClassifier.d.ts +11 -0
- package/out/UnitDesignatorClassifier.d.ts.map +1 -0
- package/out/UnitDesignatorClassifier.js +25 -0
- package/out/UnitDesignatorClassifier.js.map +1 -0
- package/out/WhosOnFirstClassifier.d.ts +16 -0
- package/out/WhosOnFirstClassifier.d.ts.map +1 -0
- package/out/WhosOnFirstClassifier.js +148 -0
- package/out/WhosOnFirstClassifier.js.map +1 -0
- package/out/adapter.d.ts +77 -0
- package/out/adapter.d.ts.map +1 -0
- package/out/adapter.js +125 -0
- package/out/adapter.js.map +1 -0
- package/out/composite/index.d.ts +12 -0
- package/out/composite/index.d.ts.map +1 -0
- package/out/composite/index.js +12 -0
- package/out/composite/index.js.map +1 -0
- package/out/composite/intersection.d.ts +10 -0
- package/out/composite/intersection.d.ts.map +1 -0
- package/out/composite/intersection.js +115 -0
- package/out/composite/intersection.js.map +1 -0
- package/out/composite/person.d.ts +10 -0
- package/out/composite/person.d.ts.map +1 -0
- package/out/composite/person.js +112 -0
- package/out/composite/person.js.map +1 -0
- package/out/composite/street.d.ts +10 -0
- package/out/composite/street.d.ts.map +1 -0
- package/out/composite/street.js +384 -0
- package/out/composite/street.js.map +1 -0
- package/out/composite/street_name.d.ts +10 -0
- package/out/composite/street_name.d.ts.map +1 -0
- package/out/composite/street_name.js +78 -0
- package/out/composite/street_name.js.map +1 -0
- package/out/composite/subdivision.d.ts +10 -0
- package/out/composite/subdivision.d.ts.map +1 -0
- package/out/composite/subdivision.js +29 -0
- package/out/composite/subdivision.js.map +1 -0
- package/out/composite/venue.d.ts +10 -0
- package/out/composite/venue.d.ts.map +1 -0
- package/out/composite/venue.js +213 -0
- package/out/composite/venue.js.map +1 -0
- package/out/index.d.ts +39 -0
- package/out/index.d.ts.map +1 -0
- package/out/index.js +39 -0
- package/out/index.js.map +1 -0
- package/package.json +29 -0
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*/
|
|
6
|
+
import { PhraseClassifier, Span, TextNormalizer, WOFPlacenameCache, } from "@mailwoman/core";
|
|
7
|
+
import { resourceDictionaryPathBuilder } from "@mailwoman/core/utils";
|
|
8
|
+
const wofPlacetypeDictionary = resourceDictionaryPathBuilder("whosonfirst");
|
|
9
|
+
const wofInternalPlacetypeDictionary = resourceDictionaryPathBuilder("internal", "whosonfirst");
|
|
10
|
+
const WOFNormalizer = new TextNormalizer({
|
|
11
|
+
lowercase: true,
|
|
12
|
+
removeHyphen: true,
|
|
13
|
+
removeAccents: true,
|
|
14
|
+
minLength: 2,
|
|
15
|
+
});
|
|
16
|
+
// Note: These should be defined from most granular to least granular.
|
|
17
|
+
const placetypeConfigMap = new Map([
|
|
18
|
+
[
|
|
19
|
+
"locality",
|
|
20
|
+
{
|
|
21
|
+
files: ["name:*_x_preferred.txt"],
|
|
22
|
+
classifications: ["area", "locality"],
|
|
23
|
+
},
|
|
24
|
+
],
|
|
25
|
+
[
|
|
26
|
+
"region",
|
|
27
|
+
{
|
|
28
|
+
files: ["abrv:*_x_preferred.txt", "name:*_x_preferred.txt"],
|
|
29
|
+
classifications: ["area", "region"],
|
|
30
|
+
},
|
|
31
|
+
],
|
|
32
|
+
[
|
|
33
|
+
"country",
|
|
34
|
+
{
|
|
35
|
+
files: ["name:*_x_preferred.txt", "wof:country.txt", "wof:country_alpha3.txt"],
|
|
36
|
+
classifications: ["area", "country"],
|
|
37
|
+
},
|
|
38
|
+
],
|
|
39
|
+
]);
|
|
40
|
+
const tokenBlacklist = new Set([
|
|
41
|
+
// Cardinal directions
|
|
42
|
+
"north",
|
|
43
|
+
"south",
|
|
44
|
+
"east",
|
|
45
|
+
"west",
|
|
46
|
+
// Generic placetype names
|
|
47
|
+
"town",
|
|
48
|
+
"street",
|
|
49
|
+
"city",
|
|
50
|
+
"king",
|
|
51
|
+
// Stop words
|
|
52
|
+
"at",
|
|
53
|
+
"rue",
|
|
54
|
+
// Ordinal numerics
|
|
55
|
+
"one",
|
|
56
|
+
"two",
|
|
57
|
+
"three",
|
|
58
|
+
"four",
|
|
59
|
+
"five",
|
|
60
|
+
"six",
|
|
61
|
+
"seven",
|
|
62
|
+
"eight",
|
|
63
|
+
"nine",
|
|
64
|
+
"ten",
|
|
65
|
+
"cafe",
|
|
66
|
+
"small",
|
|
67
|
+
"grand",
|
|
68
|
+
]);
|
|
69
|
+
const localityBlacklist = new Set([
|
|
70
|
+
// ---
|
|
71
|
+
"avenue",
|
|
72
|
+
"lane",
|
|
73
|
+
"terrace",
|
|
74
|
+
"street",
|
|
75
|
+
"road",
|
|
76
|
+
"crescent",
|
|
77
|
+
"furlong",
|
|
78
|
+
"broadway",
|
|
79
|
+
]);
|
|
80
|
+
export class WhosOnFirstClassifier extends PhraseClassifier {
|
|
81
|
+
placetypeToCacheMap = new Map();
|
|
82
|
+
async ready() {
|
|
83
|
+
for (const [placetype, config] of placetypeConfigMap) {
|
|
84
|
+
const placenameCache = new WOFPlacenameCache({
|
|
85
|
+
patterns: config.files,
|
|
86
|
+
normalizer: WOFNormalizer,
|
|
87
|
+
blacklist: tokenBlacklist,
|
|
88
|
+
dataDirectory: wofPlacetypeDictionary(placetype),
|
|
89
|
+
internalDataDirectory: wofInternalPlacetypeDictionary(placetype),
|
|
90
|
+
});
|
|
91
|
+
await placenameCache.ready();
|
|
92
|
+
this.placetypeToCacheMap.set(placetype, placenameCache);
|
|
93
|
+
// Placetype specific modifications
|
|
94
|
+
if (placetype === "locality") {
|
|
95
|
+
const localityTokens = this.placetypeToCacheMap.get("locality");
|
|
96
|
+
// Remove locality names that sound like streets.
|
|
97
|
+
for (const [token] of localityTokens) {
|
|
98
|
+
const split = token.split(/\s/);
|
|
99
|
+
const lastWord = split[split.length - 1];
|
|
100
|
+
if (lastWord && localityBlacklist.has(lastWord)) {
|
|
101
|
+
localityTokens.delete(token);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
return this;
|
|
107
|
+
}
|
|
108
|
+
explore(span) {
|
|
109
|
+
if (span.is("stop_word") || span.children.first?.is("stop_word")) {
|
|
110
|
+
return;
|
|
111
|
+
}
|
|
112
|
+
const lastChild = span.children.last || span;
|
|
113
|
+
const { nextSibling } = lastChild;
|
|
114
|
+
if (nextSibling && (nextSibling.is("street_suffix") || nextSibling.is("place"))) {
|
|
115
|
+
return;
|
|
116
|
+
}
|
|
117
|
+
const firstChild = span.children.first || span;
|
|
118
|
+
const { previousSibling } = firstChild;
|
|
119
|
+
let confidence;
|
|
120
|
+
if (previousSibling) {
|
|
121
|
+
if (previousSibling.is("intersection"))
|
|
122
|
+
return;
|
|
123
|
+
if (previousSibling.is("stop_word")) {
|
|
124
|
+
confidence = 0.5;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
const normalizedPlacename = WOFNormalizer.normalize(span.normalized);
|
|
128
|
+
if (!normalizedPlacename)
|
|
129
|
+
return;
|
|
130
|
+
for (const [placetype, placetypeConfig] of placetypeConfigMap) {
|
|
131
|
+
const placetypeTokens = this.placetypeToCacheMap.get(placetype);
|
|
132
|
+
if (!placetypeTokens)
|
|
133
|
+
continue;
|
|
134
|
+
const languages = placetypeTokens.get(normalizedPlacename);
|
|
135
|
+
if (!languages)
|
|
136
|
+
continue;
|
|
137
|
+
// Finally, we add the classifications.
|
|
138
|
+
for (const classification of placetypeConfig.classifications) {
|
|
139
|
+
span.classifications.add({
|
|
140
|
+
classification,
|
|
141
|
+
confidence,
|
|
142
|
+
languages,
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
//# sourceMappingURL=WhosOnFirstClassifier.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"WhosOnFirstClassifier.js","sourceRoot":"","sources":["../WhosOnFirstClassifier.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAEN,gBAAgB,EAChB,IAAI,EACJ,cAAc,EAEd,iBAAiB,GACjB,MAAM,iBAAiB,CAAA;AAExB,OAAO,EAAE,6BAA6B,EAAE,MAAM,uBAAuB,CAAA;AACrE,MAAM,sBAAsB,GAAG,6BAA6B,CAAC,aAAa,CAAC,CAAA;AAC3E,MAAM,8BAA8B,GAAG,6BAA6B,CAAC,UAAU,EAAE,aAAa,CAAC,CAAA;AAE/F,MAAM,aAAa,GAAG,IAAI,cAAc,CAAC;IACxC,SAAS,EAAE,IAAI;IACf,YAAY,EAAE,IAAI;IAClB,aAAa,EAAE,IAAI;IACnB,SAAS,EAAE,CAAC;CACZ,CAAC,CAAA;AAOF,sEAAsE;AACtE,MAAM,kBAAkB,GAAG,IAAI,GAAG,CAAmD;IACpF;QACC,UAAU;QACV;YACC,KAAK,EAAE,CAAC,wBAAwB,CAAC;YACjC,eAAe,EAAE,CAAC,MAAM,EAAE,UAAU,CAAC;SACrC;KACD;IACD;QACC,QAAQ;QACR;YACC,KAAK,EAAE,CAAC,wBAAwB,EAAE,wBAAwB,CAAC;YAC3D,eAAe,EAAE,CAAC,MAAM,EAAE,QAAQ,CAAC;SACnC;KACD;IAED;QACC,SAAS;QACT;YACC,KAAK,EAAE,CAAC,wBAAwB,EAAE,iBAAiB,EAAE,wBAAwB,CAAC;YAC9E,eAAe,EAAE,CAAC,MAAM,EAAE,SAAS,CAAC;SACpC;KACD;CACD,CAAC,CAAA;AAEF,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC;IAC9B,sBAAsB;IACtB,OAAO;IACP,OAAO;IACP,MAAM;IACN,MAAM;IAEN,0BAA0B;IAC1B,MAAM;IACN,QAAQ;IACR,MAAM;IACN,MAAM;IACN,aAAa;IACb,IAAI;IACJ,KAAK;IAEL,mBAAmB;IACnB,KAAK;IACL,KAAK;IACL,OAAO;IACP,MAAM;IACN,MAAM;IACN,KAAK;IACL,OAAO;IACP,OAAO;IACP,MAAM;IACN,KAAK;IAEL,MAAM;IACN,OAAO;IACP,OAAO;CACE,CAAC,CAAA;AAEX,MAAM,iBAAiB,GAAG,IAAI,GAAG,CAAC;IACjC,MAAM;IACN,QAAQ;IACR,MAAM;IACN,SAAS;IACT,QAAQ;IACR,MAAM;IACN,UAAU;IACV,SAAS;IACT,UAAU;CACV,CAAC,CAAA;AAEF,MAAM,OAAO,qBAAsB,SAAQ,gBAAgB;IACnD,mBAAmB,GAAG,IAAI,GAAG,EAA2C,CAAA;IAE/E,KAAK,CAAC,KAAK;QACV,KAAK,MAAM,CAAC,SAAS,EAAE,MAAM,CAAC,IAAI,kBAAkB,EAAE,CAAC;YACtD,MAAM,cAAc,GAAG,IAAI,iBAAiB,CAAC;gBAC5C,QAAQ,EAAE,MAAM,CAAC,KAAK;gBACtB,UAAU,EAAE,aAAa;gBACzB,SAAS,EAAE,cAAc;gBACzB,aAAa,EAAE,sBAAsB,CAAC,SAAS,CAAC;gBAChD,qBAAqB,EAAE,8BAA8B,CAAC,SAAS,CAAC;aAChE,CAAC,CAAA;YAEF,MAAM,cAAc,CAAC,KAAK,EAAE,CAAA;YAE5B,IAAI,CAAC,mBAAmB,CAAC,GAAG,CAAC,SAAS,EAAE,cAAc,CAAC,CAAA;YAEvD,mCAAmC;YAEnC,IAAI,SAAS,KAAK,UAAU,EAAE,CAAC;gBAC9B,MAAM,cAAc,GAAG,IAAI,CAAC,mBAAmB,CAAC,GAAG,CAAC,UAAU,CAAE,CAAA;gBAChE,iDAAiD;gBAEjD,KAAK,MAAM,CAAC,KAAK,CAAC,IAAI,cAAc,EAAE,CAAC;oBACtC,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;oBAC/B,MAAM,QAAQ,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAA;oBAExC,IAAI,QAAQ,IAAI,iBAAiB,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;wBACjD,cAAc,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;oBAC7B,CAAC;gBACF,CAAC;YACF,CAAC;QACF,CAAC;QAED,OAAO,IAAI,CAAA;IACZ,CAAC;IAEM,OAAO,CAAC,IAAU;QACxB,IAAI,IAAI,CAAC,EAAE,CAAC,WAAW,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,KAAK,EAAE,EAAE,CAAC,WAAW,CAAC,EAAE,CAAC;YAClE,OAAM;QACP,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,IAAI,IAAI,CAAA;QAC5C,MAAM,EAAE,WAAW,EAAE,GAAG,SAAS,CAAA;QAEjC,IAAI,WAAW,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC,eAAe,CAAC,IAAI,WAAW,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC,EAAE,CAAC;YACjF,OAAM;QACP,CAAC;QAED,MAAM,UAAU,GAAG,IAAI,CAAC,QAAQ,CAAC,KAAK,IAAI,IAAI,CAAA;QAC9C,MAAM,EAAE,eAAe,EAAE,GAAG,UAAU,CAAA;QAEtC,IAAI,UAA8B,CAAA;QAElC,IAAI,eAAe,EAAE,CAAC;YACrB,IAAI,eAAe,CAAC,EAAE,CAAC,cAAc,CAAC;gBAAE,OAAM;YAE9C,IAAI,eAAe,CAAC,EAAE,CAAC,WAAW,CAAC,EAAE,CAAC;gBACrC,UAAU,GAAG,GAAG,CAAA;YACjB,CAAC;QACF,CAAC;QAED,MAAM,mBAAmB,GAAG,aAAa,CAAC,SAAS,CAAC,IAAI,CAAC,UAAU,CAAC,CAAA;QAEpE,IAAI,CAAC,mBAAmB;YAAE,OAAM;QAEhC,KAAK,MAAM,CAAC,SAAS,EAAE,eAAe,CAAC,IAAI,kBAAkB,EAAE,CAAC;YAC/D,MAAM,eAAe,GAAG,IAAI,CAAC,mBAAmB,CAAC,GAAG,CAAC,SAAS,CAAC,CAAA;YAE/D,IAAI,CAAC,eAAe;gBAAE,SAAQ;YAE9B,MAAM,SAAS,GAAG,eAAe,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAA;YAE1D,IAAI,CAAC,SAAS;gBAAE,SAAQ;YAExB,uCAAuC;YACvC,KAAK,MAAM,cAAc,IAAI,eAAe,CAAC,eAAe,EAAE,CAAC;gBAC9D,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC;oBACxB,cAAc;oBACd,UAAU;oBACV,SAAS;iBACT,CAAC,CAAA;YACH,CAAC;QACF,CAAC;IACF,CAAC;CACD"}
|
package/out/adapter.d.ts
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Adapter that wraps Mailwoman's legacy mutation-based rule classifiers (`classifyTokens(context):
|
|
7
|
+
* void`) into the proposal-emitting contract declared in `@mailwoman/core/types` (#6).
|
|
8
|
+
*
|
|
9
|
+
* Phase 0 contract scaffolding: the adapter lets a single rule classifier be exposed as a
|
|
10
|
+
* `ProposalClassifier`. The exhaustive one-wrapper-per-classifier sweep + solver rewire is a
|
|
11
|
+
* follow-up; see Phase 0 task 3 success criteria in plan #8.
|
|
12
|
+
*/
|
|
13
|
+
import { type Classification, type Classifier as LegacyClassifier, type ClassifierConstructor as LegacyClassifierConstructor, type ClassifierOptions as LegacyClassifierOptions } from "@mailwoman/core/classification";
|
|
14
|
+
import { Span, TokenContext } from "@mailwoman/core/tokenization";
|
|
15
|
+
import { type ComponentTag, type ProposalClassifier } from "@mailwoman/core/types";
|
|
16
|
+
/**
|
|
17
|
+
* Options describing how to wrap a legacy classifier in the proposal interface.
|
|
18
|
+
*/
|
|
19
|
+
export interface WrapLegacyClassifierOptions {
|
|
20
|
+
/** Stable identifier for this classifier; used as `source_id`. */
|
|
21
|
+
id: string;
|
|
22
|
+
/** Legacy classifier instance or constructor. */
|
|
23
|
+
classifier: LegacyClassifier | LegacyClassifierConstructor;
|
|
24
|
+
/**
|
|
25
|
+
* Constructor options forwarded when `classifier` is a constructor. Ignored when it's already an
|
|
26
|
+
* instance.
|
|
27
|
+
*/
|
|
28
|
+
classifierOptions?: LegacyClassifierOptions;
|
|
29
|
+
/**
|
|
30
|
+
* `ComponentTag`s this classifier may emit. Proposals carrying a tag not in this set are dropped
|
|
31
|
+
* (with a console warning in development).
|
|
32
|
+
*/
|
|
33
|
+
emits: readonly ComponentTag[];
|
|
34
|
+
/**
|
|
35
|
+
* Legacy classification tags this classifier is known to produce. The wrapper traverses the span
|
|
36
|
+
* graph for spans bearing one of these tags. Defaults to "any tag with a `ComponentTag`
|
|
37
|
+
* mapping."
|
|
38
|
+
*/
|
|
39
|
+
legacyTags?: readonly Classification[];
|
|
40
|
+
/**
|
|
41
|
+
* Locales this classifier is active for. `["*"]` (locale-agnostic) by default — matches the
|
|
42
|
+
* pre-refactor behavior.
|
|
43
|
+
*/
|
|
44
|
+
locales?: readonly (string | "*")[];
|
|
45
|
+
/** Default penalty applied to emitted proposals. */
|
|
46
|
+
penalty?: number;
|
|
47
|
+
/**
|
|
48
|
+
* Override the default mapping for a legacy → component pair. Returning `null` drops the
|
|
49
|
+
* proposal.
|
|
50
|
+
*/
|
|
51
|
+
mapTag?: (legacy: Classification) => ComponentTag | null;
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Wrap a single legacy rule classifier as a `ProposalClassifier`.
|
|
55
|
+
*
|
|
56
|
+
* Mechanics:
|
|
57
|
+
*
|
|
58
|
+
* 1. The legacy classifier is instantiated (if a constructor) and its `ready()` step is awaited.
|
|
59
|
+
* 2. On `classify(section)`, a fresh local `TokenContext` is built around the section's body text so
|
|
60
|
+
* legacy mutations don't bleed into the caller's graph.
|
|
61
|
+
* 3. After `classifyTokens(localContext)` runs, the wrapper walks all spans (sections, words, phrases)
|
|
62
|
+
* and collects spans bearing any of the wrapper's `legacyTags`.
|
|
63
|
+
* 4. Each such (span, classification) pair becomes a `ClassificationProposal`. The span is re-anchored
|
|
64
|
+
* to the caller's section so character offsets are correct relative to the original input.
|
|
65
|
+
*
|
|
66
|
+
* Note on isolation: building a fresh `TokenContext` is deliberately coarse — it ignores any prior
|
|
67
|
+
* classifications from earlier classifiers in the pipeline. Composite classifiers that depend on
|
|
68
|
+
* upstream tags will need a different bridge; that lives in a higher-level orchestrator, not in
|
|
69
|
+
* this adapter.
|
|
70
|
+
*/
|
|
71
|
+
export declare function wrapLegacyClassifier(options: WrapLegacyClassifierOptions): ProposalClassifier;
|
|
72
|
+
/**
|
|
73
|
+
* Walk every span attached to a `TokenContext` — sections, words, and phrases — in a single flat
|
|
74
|
+
* pass. Order is not guaranteed.
|
|
75
|
+
*/
|
|
76
|
+
export declare function iterateSpans(context: TokenContext): Generator<Span>;
|
|
77
|
+
//# sourceMappingURL=adapter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,EACN,KAAK,cAAc,EACnB,KAAK,UAAU,IAAI,gBAAgB,EACnC,KAAK,qBAAqB,IAAI,2BAA2B,EACzD,KAAK,iBAAiB,IAAI,uBAAuB,EACjD,MAAM,gCAAgC,CAAA;AACvC,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,MAAM,8BAA8B,CAAA;AACjE,OAAO,EAGN,KAAK,YAAY,EAEjB,KAAK,kBAAkB,EAEvB,MAAM,uBAAuB,CAAA;AAE9B;;GAEG;AACH,MAAM,WAAW,2BAA2B;IAC3C,kEAAkE;IAClE,EAAE,EAAE,MAAM,CAAA;IAEV,iDAAiD;IACjD,UAAU,EAAE,gBAAgB,GAAG,2BAA2B,CAAA;IAE1D;;;OAGG;IACH,iBAAiB,CAAC,EAAE,uBAAuB,CAAA;IAE3C;;;OAGG;IACH,KAAK,EAAE,SAAS,YAAY,EAAE,CAAA;IAE9B;;;;OAIG;IACH,UAAU,CAAC,EAAE,SAAS,cAAc,EAAE,CAAA;IAEtC;;;OAGG;IACH,OAAO,CAAC,EAAE,SAAS,CAAC,MAAM,GAAG,GAAG,CAAC,EAAE,CAAA;IAEnC,oDAAoD;IACpD,OAAO,CAAC,EAAE,MAAM,CAAA;IAEhB;;;OAGG;IACH,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,cAAc,KAAK,YAAY,GAAG,IAAI,CAAA;CACxD;AAED;;;;;;;;;;;;;;;;;GAiBG;AACH,wBAAgB,oBAAoB,CAAC,OAAO,EAAE,2BAA2B,GAAG,kBAAkB,CA6E7F;AAED;;;GAGG;AACH,wBAAiB,YAAY,CAAC,OAAO,EAAE,YAAY,GAAG,SAAS,CAAC,IAAI,CAAC,CAgBpE"}
|
package/out/adapter.js
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Adapter that wraps Mailwoman's legacy mutation-based rule classifiers (`classifyTokens(context):
|
|
7
|
+
* void`) into the proposal-emitting contract declared in `@mailwoman/core/types` (#6).
|
|
8
|
+
*
|
|
9
|
+
* Phase 0 contract scaffolding: the adapter lets a single rule classifier be exposed as a
|
|
10
|
+
* `ProposalClassifier`. The exhaustive one-wrapper-per-classifier sweep + solver rewire is a
|
|
11
|
+
* follow-up; see Phase 0 task 3 success criteria in plan #8.
|
|
12
|
+
*/
|
|
13
|
+
import {} from "@mailwoman/core/classification";
|
|
14
|
+
import { Span, TokenContext } from "@mailwoman/core/tokenization";
|
|
15
|
+
import { legacyClassificationToComponentTag, } from "@mailwoman/core/types";
|
|
16
|
+
/**
|
|
17
|
+
* Wrap a single legacy rule classifier as a `ProposalClassifier`.
|
|
18
|
+
*
|
|
19
|
+
* Mechanics:
|
|
20
|
+
*
|
|
21
|
+
* 1. The legacy classifier is instantiated (if a constructor) and its `ready()` step is awaited.
|
|
22
|
+
* 2. On `classify(section)`, a fresh local `TokenContext` is built around the section's body text so
|
|
23
|
+
* legacy mutations don't bleed into the caller's graph.
|
|
24
|
+
* 3. After `classifyTokens(localContext)` runs, the wrapper walks all spans (sections, words, phrases)
|
|
25
|
+
* and collects spans bearing any of the wrapper's `legacyTags`.
|
|
26
|
+
* 4. Each such (span, classification) pair becomes a `ClassificationProposal`. The span is re-anchored
|
|
27
|
+
* to the caller's section so character offsets are correct relative to the original input.
|
|
28
|
+
*
|
|
29
|
+
* Note on isolation: building a fresh `TokenContext` is deliberately coarse — it ignores any prior
|
|
30
|
+
* classifications from earlier classifiers in the pipeline. Composite classifiers that depend on
|
|
31
|
+
* upstream tags will need a different bridge; that lives in a higher-level orchestrator, not in
|
|
32
|
+
* this adapter.
|
|
33
|
+
*/
|
|
34
|
+
export function wrapLegacyClassifier(options) {
|
|
35
|
+
const { id, classifier: classifierOrCtor, classifierOptions, emits, legacyTags, locales = ["*"], penalty = 0, mapTag, } = options;
|
|
36
|
+
const instance = typeof classifierOrCtor === "function" ? new classifierOrCtor(classifierOptions) : classifierOrCtor;
|
|
37
|
+
const tagFilter = mapTag ??
|
|
38
|
+
((legacy) => {
|
|
39
|
+
const mapped = legacyClassificationToComponentTag(legacy);
|
|
40
|
+
return mapped && emits.includes(mapped) ? mapped : null;
|
|
41
|
+
});
|
|
42
|
+
const expectedLegacy = legacyTags ? new Set(legacyTags) : null;
|
|
43
|
+
let readyPromise = null;
|
|
44
|
+
function ensureReady() {
|
|
45
|
+
if (!instance.ready)
|
|
46
|
+
return Promise.resolve();
|
|
47
|
+
readyPromise ??= instance.ready().then(() => undefined);
|
|
48
|
+
return readyPromise;
|
|
49
|
+
}
|
|
50
|
+
async function runOnSection(section) {
|
|
51
|
+
await ensureReady();
|
|
52
|
+
const localContext = new TokenContext(section.body);
|
|
53
|
+
instance.classifyTokens(localContext);
|
|
54
|
+
const sectionOffset = section.start;
|
|
55
|
+
const proposals = [];
|
|
56
|
+
for (const span of iterateSpans(localContext)) {
|
|
57
|
+
for (const [legacy, match] of span.classifications) {
|
|
58
|
+
if (expectedLegacy && !expectedLegacy.has(legacy))
|
|
59
|
+
continue;
|
|
60
|
+
const component = tagFilter(legacy);
|
|
61
|
+
if (!component)
|
|
62
|
+
continue;
|
|
63
|
+
if (!emits.includes(component))
|
|
64
|
+
continue;
|
|
65
|
+
proposals.push({
|
|
66
|
+
span: rebaseSpan(span, sectionOffset),
|
|
67
|
+
component,
|
|
68
|
+
confidence: match.confidence,
|
|
69
|
+
source: "rule",
|
|
70
|
+
source_id: id,
|
|
71
|
+
penalty,
|
|
72
|
+
metadata: {
|
|
73
|
+
legacyClassification: legacy,
|
|
74
|
+
...(match.languages ? { languages: Array.from(match.languages) } : {}),
|
|
75
|
+
...(match.flags ? { flags: Array.from(match.flags) } : {}),
|
|
76
|
+
},
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
return proposals;
|
|
81
|
+
}
|
|
82
|
+
const wrapped = {
|
|
83
|
+
id,
|
|
84
|
+
emits,
|
|
85
|
+
locales,
|
|
86
|
+
ready: () => ensureReady(),
|
|
87
|
+
classify: (section, _context) => runOnSection(section),
|
|
88
|
+
};
|
|
89
|
+
return wrapped;
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Walk every span attached to a `TokenContext` — sections, words, and phrases — in a single flat
|
|
93
|
+
* pass. Order is not guaranteed.
|
|
94
|
+
*/
|
|
95
|
+
export function* iterateSpans(context) {
|
|
96
|
+
const visited = new Set();
|
|
97
|
+
const queue = [];
|
|
98
|
+
if (context.span)
|
|
99
|
+
queue.push(context.span);
|
|
100
|
+
for (const section of context.sections)
|
|
101
|
+
queue.push(section);
|
|
102
|
+
while (queue.length > 0) {
|
|
103
|
+
const span = queue.pop();
|
|
104
|
+
if (visited.has(span.id))
|
|
105
|
+
continue;
|
|
106
|
+
visited.add(span.id);
|
|
107
|
+
yield span;
|
|
108
|
+
for (const child of span.children)
|
|
109
|
+
queue.push(child);
|
|
110
|
+
for (const phrase of span.phrases)
|
|
111
|
+
queue.push(phrase);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Produce a Span whose character offsets are anchored to the caller's original input (not the local
|
|
116
|
+
* context's). The returned span is a thin clone for use in proposals; the source span continues to
|
|
117
|
+
* live in the local TokenContext.
|
|
118
|
+
*/
|
|
119
|
+
function rebaseSpan(span, sectionOffset) {
|
|
120
|
+
if (sectionOffset === 0)
|
|
121
|
+
return span;
|
|
122
|
+
const rebased = Span.from(span.body, { start: span.start + sectionOffset });
|
|
123
|
+
return rebased;
|
|
124
|
+
}
|
|
125
|
+
//# sourceMappingURL=adapter.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"adapter.js","sourceRoot":"","sources":["../adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,EAKN,MAAM,gCAAgC,CAAA;AACvC,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,MAAM,8BAA8B,CAAA;AACjE,OAAO,EAIN,kCAAkC,GAGlC,MAAM,uBAAuB,CAAA;AA+C9B;;;;;;;;;;;;;;;;;GAiBG;AACH,MAAM,UAAU,oBAAoB,CAAC,OAAoC;IACxE,MAAM,EACL,EAAE,EACF,UAAU,EAAE,gBAAgB,EAC5B,iBAAiB,EACjB,KAAK,EACL,UAAU,EACV,OAAO,GAAG,CAAC,GAAG,CAAC,EACf,OAAO,GAAG,CAAC,EACX,MAAM,GACN,GAAG,OAAO,CAAA;IAEX,MAAM,QAAQ,GACb,OAAO,gBAAgB,KAAK,UAAU,CAAC,CAAC,CAAC,IAAI,gBAAgB,CAAC,iBAAiB,CAAC,CAAC,CAAC,CAAC,gBAAgB,CAAA;IAEpG,MAAM,SAAS,GACd,MAAM;QACN,CAAC,CAAC,MAAM,EAAE,EAAE;YACX,MAAM,MAAM,GAAG,kCAAkC,CAAC,MAAM,CAAC,CAAA;YACzD,OAAO,MAAM,IAAI,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAA;QACxD,CAAC,CAAC,CAAA;IAEH,MAAM,cAAc,GAAG,UAAU,CAAC,CAAC,CAAC,IAAI,GAAG,CAAiB,UAAU,CAAC,CAAC,CAAC,CAAC,IAAI,CAAA;IAE9E,IAAI,YAAY,GAAyB,IAAI,CAAA;IAE7C,SAAS,WAAW;QACnB,IAAI,CAAC,QAAQ,CAAC,KAAK;YAAE,OAAO,OAAO,CAAC,OAAO,EAAE,CAAA;QAC7C,YAAY,KAAK,QAAQ,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC,CAAA;QACvD,OAAO,YAAY,CAAA;IACpB,CAAC;IAED,KAAK,UAAU,YAAY,CAAC,OAAgB;QAC3C,MAAM,WAAW,EAAE,CAAA;QAEnB,MAAM,YAAY,GAAG,IAAI,YAAY,CAAC,OAAO,CAAC,IAAI,CAAC,CAAA;QACnD,QAAQ,CAAC,cAAc,CAAC,YAAY,CAAC,CAAA;QAErC,MAAM,aAAa,GAAG,OAAO,CAAC,KAAK,CAAA;QACnC,MAAM,SAAS,GAA6B,EAAE,CAAA;QAE9C,KAAK,MAAM,IAAI,IAAI,YAAY,CAAC,YAAY,CAAC,EAAE,CAAC;YAC/C,KAAK,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;gBACpD,IAAI,cAAc,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,MAAM,CAAC;oBAAE,SAAQ;gBAE3D,MAAM,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC,CAAA;gBACnC,IAAI,CAAC,SAAS;oBAAE,SAAQ;gBACxB,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,SAAS,CAAC;oBAAE,SAAQ;gBAExC,SAAS,CAAC,IAAI,CAAC;oBACd,IAAI,EAAE,UAAU,CAAC,IAAI,EAAE,aAAa,CAAC;oBACrC,SAAS;oBACT,UAAU,EAAE,KAAK,CAAC,UAAU;oBAC5B,MAAM,EAAE,MAAM;oBACd,SAAS,EAAE,EAAE;oBACb,OAAO;oBACP,QAAQ,EAAE;wBACT,oBAAoB,EAAE,MAAM;wBAC5B,GAAG,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;wBACtE,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;qBAC1D;iBACD,CAAC,CAAA;YACH,CAAC;QACF,CAAC;QAED,OAAO,SAAS,CAAA;IACjB,CAAC;IAED,MAAM,OAAO,GAAuB;QACnC,EAAE;QACF,KAAK;QACL,OAAO;QACP,KAAK,EAAE,GAAG,EAAE,CAAC,WAAW,EAAE;QAC1B,QAAQ,EAAE,CAAC,OAAO,EAAE,QAA2B,EAAE,EAAE,CAAC,YAAY,CAAC,OAAO,CAAC;KACzE,CAAA;IAED,OAAO,OAAO,CAAA;AACf,CAAC;AAED;;;GAGG;AACH,MAAM,SAAS,CAAC,CAAC,YAAY,CAAC,OAAqB;IAClD,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAA;IACjC,MAAM,KAAK,GAAW,EAAE,CAAA;IAExB,IAAI,OAAO,CAAC,IAAI;QAAE,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAA;IAC1C,KAAK,MAAM,OAAO,IAAI,OAAO,CAAC,QAAQ;QAAE,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;IAE3D,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACzB,MAAM,IAAI,GAAG,KAAK,CAAC,GAAG,EAAG,CAAA;QACzB,IAAI,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;YAAE,SAAQ;QAClC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;QACpB,MAAM,IAAI,CAAA;QAEV,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ;YAAE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;QACpD,KAAK,MAAM,MAAM,IAAI,IAAI,CAAC,OAAO;YAAE,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;IACtD,CAAC;AACF,CAAC;AAED;;;;GAIG;AACH,SAAS,UAAU,CAAC,IAAU,EAAE,aAAqB;IACpD,IAAI,aAAa,KAAK,CAAC;QAAE,OAAO,IAAI,CAAA;IAEpC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,GAAG,aAAa,EAAE,CAAC,CAAA;IAC3E,OAAO,OAAO,CAAA;AACf,CAAC"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*/
|
|
6
|
+
export * from "./intersection.js";
|
|
7
|
+
export * from "./person.js";
|
|
8
|
+
export * from "./street.js";
|
|
9
|
+
export * from "./street_name.js";
|
|
10
|
+
export * from "./subdivision.js";
|
|
11
|
+
export * from "./venue.js";
|
|
12
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../composite/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,mBAAmB,CAAA;AACjC,cAAc,aAAa,CAAA;AAC3B,cAAc,aAAa,CAAA;AAC3B,cAAc,kBAAkB,CAAA;AAChC,cAAc,kBAAkB,CAAA;AAChC,cAAc,YAAY,CAAA"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*/
|
|
6
|
+
export * from "./intersection.js";
|
|
7
|
+
export * from "./person.js";
|
|
8
|
+
export * from "./street.js";
|
|
9
|
+
export * from "./street_name.js";
|
|
10
|
+
export * from "./subdivision.js";
|
|
11
|
+
export * from "./venue.js";
|
|
12
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../composite/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,mBAAmB,CAAA;AACjC,cAAc,aAAa,CAAA;AAC3B,cAAc,aAAa,CAAA;AAC3B,cAAc,kBAAkB,CAAA;AAChC,cAAc,kBAAkB,CAAA;AAChC,cAAc,YAAY,CAAA"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*/
|
|
6
|
+
import { CompositeClassifier } from "@mailwoman/core";
|
|
7
|
+
export declare class CompositeIntersectionClassifier extends CompositeClassifier {
|
|
8
|
+
constructor();
|
|
9
|
+
}
|
|
10
|
+
//# sourceMappingURL=intersection.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"intersection.d.ts","sourceRoot":"","sources":["../../composite/intersection.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAA+B,mBAAmB,EAAE,MAAM,iBAAiB,CAAA;AA0GlF,qBAAa,+BAAgC,SAAQ,mBAAmB;;CAIvE"}
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*/
|
|
6
|
+
import { CompositeClassifier } from "@mailwoman/core";
|
|
7
|
+
const configs = [
|
|
8
|
+
{
|
|
9
|
+
// SW 6th & Pine
|
|
10
|
+
scheme: [
|
|
11
|
+
{
|
|
12
|
+
is: ["directional"],
|
|
13
|
+
not: ["intersection", "street_suffix"],
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
is: ["alpha", "numeric", "ordinal"],
|
|
17
|
+
not: ["intersection", "street_suffix"],
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
is: ["intersection"],
|
|
21
|
+
not: ["street", "street_suffix"],
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
is: ["alpha", "numeric", "ordinal"],
|
|
25
|
+
not: ["intersection"],
|
|
26
|
+
},
|
|
27
|
+
],
|
|
28
|
+
},
|
|
29
|
+
{
|
|
30
|
+
// Foo St and Bar St
|
|
31
|
+
scheme: [
|
|
32
|
+
{
|
|
33
|
+
is: ["alpha", "numeric", "ordinal"],
|
|
34
|
+
not: ["intersection", "street_suffix"],
|
|
35
|
+
confidence: 0.81,
|
|
36
|
+
classification: "street",
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
is: ["intersection"],
|
|
40
|
+
not: ["street", "street_suffix"],
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
is: ["alpha", "numeric", "ordinal"],
|
|
44
|
+
not: ["intersection"],
|
|
45
|
+
confidence: 0.82,
|
|
46
|
+
classification: "street",
|
|
47
|
+
},
|
|
48
|
+
],
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
// Foo and Bar St
|
|
52
|
+
scheme: [
|
|
53
|
+
{
|
|
54
|
+
is: ["alpha"],
|
|
55
|
+
not: ["intersection", "street", "street_suffix"],
|
|
56
|
+
confidence: 0.53,
|
|
57
|
+
classification: "street",
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
is: ["intersection"],
|
|
61
|
+
not: ["street", "street_suffix"],
|
|
62
|
+
},
|
|
63
|
+
{
|
|
64
|
+
is: ["alpha", "numeric", "ordinal"],
|
|
65
|
+
not: ["intersection"],
|
|
66
|
+
},
|
|
67
|
+
],
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
// Foo St and Bar
|
|
71
|
+
scheme: [
|
|
72
|
+
{
|
|
73
|
+
is: ["alpha", "numeric", "ordinal"],
|
|
74
|
+
not: ["intersection"],
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
is: ["intersection"],
|
|
78
|
+
not: ["street", "street_suffix"],
|
|
79
|
+
},
|
|
80
|
+
{
|
|
81
|
+
is: ["alpha", "numeric", "ordinal"],
|
|
82
|
+
not: ["intersection", "street"],
|
|
83
|
+
confidence: 0.56,
|
|
84
|
+
classification: "street",
|
|
85
|
+
},
|
|
86
|
+
],
|
|
87
|
+
},
|
|
88
|
+
{
|
|
89
|
+
// Foo and Bar
|
|
90
|
+
scheme: [
|
|
91
|
+
{
|
|
92
|
+
is: ["alpha"],
|
|
93
|
+
not: ["intersection", "street", "street_suffix"],
|
|
94
|
+
confidence: 0.57,
|
|
95
|
+
classification: "street",
|
|
96
|
+
},
|
|
97
|
+
{
|
|
98
|
+
is: ["intersection"],
|
|
99
|
+
not: ["street", "street_suffix"],
|
|
100
|
+
},
|
|
101
|
+
{
|
|
102
|
+
is: ["alpha"],
|
|
103
|
+
not: ["intersection", "street", "street_suffix"],
|
|
104
|
+
confidence: 0.58,
|
|
105
|
+
classification: "street",
|
|
106
|
+
},
|
|
107
|
+
],
|
|
108
|
+
},
|
|
109
|
+
];
|
|
110
|
+
export class CompositeIntersectionClassifier extends CompositeClassifier {
|
|
111
|
+
constructor() {
|
|
112
|
+
super("multistreet", configs);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
//# sourceMappingURL=intersection.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"intersection.js","sourceRoot":"","sources":["../../composite/intersection.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAA+B,mBAAmB,EAAE,MAAM,iBAAiB,CAAA;AAElF,MAAM,OAAO,GAA6B;IACzC;QACC,gBAAgB;QAChB,MAAM,EAAE;YACP;gBACC,EAAE,EAAE,CAAC,aAAa,CAAC;gBACnB,GAAG,EAAE,CAAC,cAAc,EAAE,eAAe,CAAC;aACtC;YACD;gBACC,EAAE,EAAE,CAAC,OAAO,EAAE,SAAS,EAAE,SAAS,CAAC;gBACnC,GAAG,EAAE,CAAC,cAAc,EAAE,eAAe,CAAC;aACtC;YACD;gBACC,EAAE,EAAE,CAAC,cAAc,CAAC;gBACpB,GAAG,EAAE,CAAC,QAAQ,EAAE,eAAe,CAAC;aAChC;YACD;gBACC,EAAE,EAAE,CAAC,OAAO,EAAE,SAAS,EAAE,SAAS,CAAC;gBACnC,GAAG,EAAE,CAAC,cAAc,CAAC;aACrB;SACD;KACD;IACD;QACC,oBAAoB;QACpB,MAAM,EAAE;YACP;gBACC,EAAE,EAAE,CAAC,OAAO,EAAE,SAAS,EAAE,SAAS,CAAC;gBACnC,GAAG,EAAE,CAAC,cAAc,EAAE,eAAe,CAAC;gBACtC,UAAU,EAAE,IAAI;gBAChB,cAAc,EAAE,QAAQ;aACxB;YACD;gBACC,EAAE,EAAE,CAAC,cAAc,CAAC;gBACpB,GAAG,EAAE,CAAC,QAAQ,EAAE,eAAe,CAAC;aAChC;YACD;gBACC,EAAE,EAAE,CAAC,OAAO,EAAE,SAAS,EAAE,SAAS,CAAC;gBACnC,GAAG,EAAE,CAAC,cAAc,CAAC;gBACrB,UAAU,EAAE,IAAI;gBAChB,cAAc,EAAE,QAAQ;aACxB;SACD;KACD;IACD;QACC,iBAAiB;QACjB,MAAM,EAAE;YACP;gBACC,EAAE,EAAE,CAAC,OAAO,CAAC;gBACb,GAAG,EAAE,CAAC,cAAc,EAAE,QAAQ,EAAE,eAAe,CAAC;gBAChD,UAAU,EAAE,IAAI;gBAChB,cAAc,EAAE,QAAQ;aACxB;YACD;gBACC,EAAE,EAAE,CAAC,cAAc,CAAC;gBACpB,GAAG,EAAE,CAAC,QAAQ,EAAE,eAAe,CAAC;aAChC;YACD;gBACC,EAAE,EAAE,CAAC,OAAO,EAAE,SAAS,EAAE,SAAS,CAAC;gBACnC,GAAG,EAAE,CAAC,cAAc,CAAC;aACrB;SACD;KACD;IACD;QACC,iBAAiB;QACjB,MAAM,EAAE;YACP;gBACC,EAAE,EAAE,CAAC,OAAO,EAAE,SAAS,EAAE,SAAS,CAAC;gBACnC,GAAG,EAAE,CAAC,cAAc,CAAC;aACrB;YACD;gBACC,EAAE,EAAE,CAAC,cAAc,CAAC;gBACpB,GAAG,EAAE,CAAC,QAAQ,EAAE,eAAe,CAAC;aAChC;YACD;gBACC,EAAE,EAAE,CAAC,OAAO,EAAE,SAAS,EAAE,SAAS,CAAC;gBACnC,GAAG,EAAE,CAAC,cAAc,EAAE,QAAQ,CAAC;gBAC/B,UAAU,EAAE,IAAI;gBAChB,cAAc,EAAE,QAAQ;aACxB;SACD;KACD;IACD;QACC,cAAc;QACd,MAAM,EAAE;YACP;gBACC,EAAE,EAAE,CAAC,OAAO,CAAC;gBACb,GAAG,EAAE,CAAC,cAAc,EAAE,QAAQ,EAAE,eAAe,CAAC;gBAChD,UAAU,EAAE,IAAI;gBAChB,cAAc,EAAE,QAAQ;aACxB;YACD;gBACC,EAAE,EAAE,CAAC,cAAc,CAAC;gBACpB,GAAG,EAAE,CAAC,QAAQ,EAAE,eAAe,CAAC;aAChC;YACD;gBACC,EAAE,EAAE,CAAC,OAAO,CAAC;gBACb,GAAG,EAAE,CAAC,cAAc,EAAE,QAAQ,EAAE,eAAe,CAAC;gBAChD,UAAU,EAAE,IAAI;gBAChB,cAAc,EAAE,QAAQ;aACxB;SACD;KACD;CACD,CAAA;AAED,MAAM,OAAO,+BAAgC,SAAQ,mBAAmB;IACvE;QACC,KAAK,CAAC,aAAa,EAAE,OAAO,CAAC,CAAA;IAC9B,CAAC;CACD"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*/
|
|
6
|
+
import { CompositeClassifier } from "@mailwoman/core";
|
|
7
|
+
export declare class CompositePersonClassifier extends CompositeClassifier {
|
|
8
|
+
constructor();
|
|
9
|
+
}
|
|
10
|
+
//# sourceMappingURL=person.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"person.d.ts","sourceRoot":"","sources":["../../composite/person.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAA+B,mBAAmB,EAAE,MAAM,iBAAiB,CAAA;AAuGlF,qBAAa,yBAA0B,SAAQ,mBAAmB;;CAIjE"}
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*/
|
|
6
|
+
import { CompositeClassifier } from "@mailwoman/core";
|
|
7
|
+
const configs = [
|
|
8
|
+
{
|
|
9
|
+
// Anne Marie
|
|
10
|
+
classification: "given_name",
|
|
11
|
+
confidence: 0.25,
|
|
12
|
+
scheme: [
|
|
13
|
+
{
|
|
14
|
+
is: ["given_name"],
|
|
15
|
+
not: ["street", "intersection"],
|
|
16
|
+
},
|
|
17
|
+
{
|
|
18
|
+
is: ["given_name"],
|
|
19
|
+
not: ["street", "street_prefix", "stop_word"],
|
|
20
|
+
},
|
|
21
|
+
],
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
// Georges Bizet
|
|
25
|
+
confidence: 0.5,
|
|
26
|
+
scheme: [
|
|
27
|
+
{
|
|
28
|
+
is: ["given_name"],
|
|
29
|
+
not: ["street", "intersection"],
|
|
30
|
+
},
|
|
31
|
+
{
|
|
32
|
+
is: ["surname"],
|
|
33
|
+
not: ["street", "street_prefix", "stop_word"],
|
|
34
|
+
},
|
|
35
|
+
],
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
// Rose de Lima
|
|
39
|
+
confidence: 0.5,
|
|
40
|
+
scheme: [
|
|
41
|
+
{
|
|
42
|
+
is: ["given_name"],
|
|
43
|
+
not: ["street", "intersection"],
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
is: ["stop_word"],
|
|
47
|
+
not: ["street", "intersection"],
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
is: ["surname"],
|
|
51
|
+
not: ["street", "street_prefix", "stop_word"],
|
|
52
|
+
},
|
|
53
|
+
],
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
// Raul Leite Magalhães (first name, middle name, family name)
|
|
57
|
+
// Donald W. Reynolds
|
|
58
|
+
confidence: 0.5,
|
|
59
|
+
scheme: [
|
|
60
|
+
{
|
|
61
|
+
is: ["given_name"],
|
|
62
|
+
not: ["street", "intersection"],
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
is: ["given_name", "surname", "middle_initial"],
|
|
66
|
+
not: ["street", "intersection"],
|
|
67
|
+
},
|
|
68
|
+
{
|
|
69
|
+
is: ["surname"],
|
|
70
|
+
not: ["street", "street_prefix", "stop_word"],
|
|
71
|
+
},
|
|
72
|
+
],
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
// Unknown surname
|
|
76
|
+
confidence: 0.1,
|
|
77
|
+
scheme: [
|
|
78
|
+
{
|
|
79
|
+
is: ["given_name"],
|
|
80
|
+
not: ["street", "intersection"],
|
|
81
|
+
},
|
|
82
|
+
{
|
|
83
|
+
is: ["alpha"],
|
|
84
|
+
not: ["street", "street_prefix", "stop_word"],
|
|
85
|
+
},
|
|
86
|
+
],
|
|
87
|
+
},
|
|
88
|
+
{
|
|
89
|
+
// Unknown surname
|
|
90
|
+
confidence: 0.1,
|
|
91
|
+
scheme: [
|
|
92
|
+
{
|
|
93
|
+
is: ["given_name"],
|
|
94
|
+
not: ["street", "intersection"],
|
|
95
|
+
},
|
|
96
|
+
{
|
|
97
|
+
is: ["stop_word"],
|
|
98
|
+
not: ["street", "intersection"],
|
|
99
|
+
},
|
|
100
|
+
{
|
|
101
|
+
is: ["alpha"],
|
|
102
|
+
not: ["street", "street_prefix", "stop_word"],
|
|
103
|
+
},
|
|
104
|
+
],
|
|
105
|
+
},
|
|
106
|
+
];
|
|
107
|
+
export class CompositePersonClassifier extends CompositeClassifier {
|
|
108
|
+
constructor() {
|
|
109
|
+
super("person", configs);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
//# sourceMappingURL=person.js.map
|