@mailwoman/classifiers 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/out/AdjacencyClassifier.d.ts +16 -0
- package/out/AdjacencyClassifier.d.ts.map +1 -0
- package/out/AdjacencyClassifier.js +50 -0
- package/out/AdjacencyClassifier.js.map +1 -0
- package/out/AlphaNumericClassifier.d.ts +10 -0
- package/out/AlphaNumericClassifier.d.ts.map +1 -0
- package/out/AlphaNumericClassifier.js +23 -0
- package/out/AlphaNumericClassifier.js.map +1 -0
- package/out/CentralEuropeanStreetNameClassifier.d.ts +16 -0
- package/out/CentralEuropeanStreetNameClassifier.d.ts.map +1 -0
- package/out/CentralEuropeanStreetNameClassifier.js +36 -0
- package/out/CentralEuropeanStreetNameClassifier.js.map +1 -0
- package/out/ChainClassifier.d.ts +14 -0
- package/out/ChainClassifier.d.ts.map +1 -0
- package/out/ChainClassifier.js +25 -0
- package/out/ChainClassifier.js.map +1 -0
- package/out/CompoundLevelClassifier.d.ts +16 -0
- package/out/CompoundLevelClassifier.d.ts.map +1 -0
- package/out/CompoundLevelClassifier.js +58 -0
- package/out/CompoundLevelClassifier.js.map +1 -0
- package/out/CompoundStreetClassifier.d.ts +12 -0
- package/out/CompoundStreetClassifier.d.ts.map +1 -0
- package/out/CompoundStreetClassifier.js +50 -0
- package/out/CompoundStreetClassifier.js.map +1 -0
- package/out/CompoundUnitDesignatorClassifier.d.ts +16 -0
- package/out/CompoundUnitDesignatorClassifier.d.ts.map +1 -0
- package/out/CompoundUnitDesignatorClassifier.js +57 -0
- package/out/CompoundUnitDesignatorClassifier.js.map +1 -0
- package/out/DirectionalClassifier.d.ts +11 -0
- package/out/DirectionalClassifier.d.ts.map +1 -0
- package/out/DirectionalClassifier.js +31 -0
- package/out/DirectionalClassifier.js.map +1 -0
- package/out/GivenNameClassifier.d.ts +11 -0
- package/out/GivenNameClassifier.d.ts.map +1 -0
- package/out/GivenNameClassifier.js +32 -0
- package/out/GivenNameClassifier.js.map +1 -0
- package/out/HouseNumberClassifier.d.ts +15 -0
- package/out/HouseNumberClassifier.d.ts.map +1 -0
- package/out/HouseNumberClassifier.js +79 -0
- package/out/HouseNumberClassifier.js.map +1 -0
- package/out/IntersectionClassifier.d.ts +11 -0
- package/out/IntersectionClassifier.d.ts.map +1 -0
- package/out/IntersectionClassifier.js +33 -0
- package/out/IntersectionClassifier.js.map +1 -0
- package/out/LevelClassifier.d.ts +11 -0
- package/out/LevelClassifier.d.ts.map +1 -0
- package/out/LevelClassifier.js +28 -0
- package/out/LevelClassifier.js.map +1 -0
- package/out/LevelDesignatorClassifier.d.ts +11 -0
- package/out/LevelDesignatorClassifier.d.ts.map +1 -0
- package/out/LevelDesignatorClassifier.js +21 -0
- package/out/LevelDesignatorClassifier.js.map +1 -0
- package/out/MiddleInitialClassifier.d.ts +10 -0
- package/out/MiddleInitialClassifier.d.ts.map +1 -0
- package/out/MiddleInitialClassifier.js +18 -0
- package/out/MiddleInitialClassifier.js.map +1 -0
- package/out/OrdinalClassifier.d.ts +10 -0
- package/out/OrdinalClassifier.d.ts.map +1 -0
- package/out/OrdinalClassifier.js +23 -0
- package/out/OrdinalClassifier.js.map +1 -0
- package/out/PersonClassifier.d.ts +11 -0
- package/out/PersonClassifier.d.ts.map +1 -0
- package/out/PersonClassifier.js +30 -0
- package/out/PersonClassifier.js.map +1 -0
- package/out/PersonalSuffixClassifier.d.ts +11 -0
- package/out/PersonalSuffixClassifier.d.ts.map +1 -0
- package/out/PersonalSuffixClassifier.js +31 -0
- package/out/PersonalSuffixClassifier.js.map +1 -0
- package/out/PersonalTitleClassifier.d.ts +11 -0
- package/out/PersonalTitleClassifier.d.ts.map +1 -0
- package/out/PersonalTitleClassifier.js +32 -0
- package/out/PersonalTitleClassifier.js.map +1 -0
- package/out/PlaceClassifier.d.ts +11 -0
- package/out/PlaceClassifier.d.ts.map +1 -0
- package/out/PlaceClassifier.js +38 -0
- package/out/PlaceClassifier.js.map +1 -0
- package/out/PostcodeClassifier.d.ts +14 -0
- package/out/PostcodeClassifier.d.ts.map +1 -0
- package/out/PostcodeClassifier.js +62 -0
- package/out/PostcodeClassifier.js.map +1 -0
- package/out/RoadTypeClassifier.d.ts +11 -0
- package/out/RoadTypeClassifier.d.ts.map +1 -0
- package/out/RoadTypeClassifier.js +20 -0
- package/out/RoadTypeClassifier.js.map +1 -0
- package/out/StopWordClassifier.d.ts +14 -0
- package/out/StopWordClassifier.d.ts.map +1 -0
- package/out/StopWordClassifier.js +35 -0
- package/out/StopWordClassifier.js.map +1 -0
- package/out/StreetPrefixClassifier.d.ts +11 -0
- package/out/StreetPrefixClassifier.d.ts.map +1 -0
- package/out/StreetPrefixClassifier.js +40 -0
- package/out/StreetPrefixClassifier.js.map +1 -0
- package/out/StreetProperNameClassifier.d.ts +16 -0
- package/out/StreetProperNameClassifier.d.ts.map +1 -0
- package/out/StreetProperNameClassifier.js +32 -0
- package/out/StreetProperNameClassifier.js.map +1 -0
- package/out/StreetSuffixClassifier.d.ts +11 -0
- package/out/StreetSuffixClassifier.d.ts.map +1 -0
- package/out/StreetSuffixClassifier.js +50 -0
- package/out/StreetSuffixClassifier.js.map +1 -0
- package/out/SurnameClassifier.d.ts +11 -0
- package/out/SurnameClassifier.d.ts.map +1 -0
- package/out/SurnameClassifier.js +29 -0
- package/out/SurnameClassifier.js.map +1 -0
- package/out/TokenPositionClassifier.d.ts +20 -0
- package/out/TokenPositionClassifier.d.ts.map +1 -0
- package/out/TokenPositionClassifier.js +41 -0
- package/out/TokenPositionClassifier.js.map +1 -0
- package/out/ToponymClassifier.d.ts +11 -0
- package/out/ToponymClassifier.d.ts.map +1 -0
- package/out/ToponymClassifier.js +26 -0
- package/out/ToponymClassifier.js.map +1 -0
- package/out/UnitClassifier.d.ts +11 -0
- package/out/UnitClassifier.d.ts.map +1 -0
- package/out/UnitClassifier.js +43 -0
- package/out/UnitClassifier.js.map +1 -0
- package/out/UnitDesignatorClassifier.d.ts +11 -0
- package/out/UnitDesignatorClassifier.d.ts.map +1 -0
- package/out/UnitDesignatorClassifier.js +25 -0
- package/out/UnitDesignatorClassifier.js.map +1 -0
- package/out/WhosOnFirstClassifier.d.ts +16 -0
- package/out/WhosOnFirstClassifier.d.ts.map +1 -0
- package/out/WhosOnFirstClassifier.js +148 -0
- package/out/WhosOnFirstClassifier.js.map +1 -0
- package/out/adapter.d.ts +77 -0
- package/out/adapter.d.ts.map +1 -0
- package/out/adapter.js +125 -0
- package/out/adapter.js.map +1 -0
- package/out/composite/index.d.ts +12 -0
- package/out/composite/index.d.ts.map +1 -0
- package/out/composite/index.js +12 -0
- package/out/composite/index.js.map +1 -0
- package/out/composite/intersection.d.ts +10 -0
- package/out/composite/intersection.d.ts.map +1 -0
- package/out/composite/intersection.js +115 -0
- package/out/composite/intersection.js.map +1 -0
- package/out/composite/person.d.ts +10 -0
- package/out/composite/person.d.ts.map +1 -0
- package/out/composite/person.js +112 -0
- package/out/composite/person.js.map +1 -0
- package/out/composite/street.d.ts +10 -0
- package/out/composite/street.d.ts.map +1 -0
- package/out/composite/street.js +384 -0
- package/out/composite/street.js.map +1 -0
- package/out/composite/street_name.d.ts +10 -0
- package/out/composite/street_name.d.ts.map +1 -0
- package/out/composite/street_name.js +78 -0
- package/out/composite/street_name.js.map +1 -0
- package/out/composite/subdivision.d.ts +10 -0
- package/out/composite/subdivision.d.ts.map +1 -0
- package/out/composite/subdivision.js +29 -0
- package/out/composite/subdivision.js.map +1 -0
- package/out/composite/venue.d.ts +10 -0
- package/out/composite/venue.d.ts.map +1 -0
- package/out/composite/venue.js +213 -0
- package/out/composite/venue.js.map +1 -0
- package/out/index.d.ts +39 -0
- package/out/index.d.ts.map +1 -0
- package/out/index.js +39 -0
- package/out/index.js.map +1 -0
- package/package.json +29 -0
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*/
|
|
6
|
+
import { Span, WordClassifier } from "@mailwoman/core";
|
|
7
|
+
/**
|
|
8
|
+
* Regex builder.
|
|
9
|
+
*/
|
|
10
|
+
const r = (...clauses) => new RegExp(`^${clauses.join("")}$`);
|
|
11
|
+
// Constants for basic pattern components.
|
|
12
|
+
const NUMERIC = "\\d{1,5}";
|
|
13
|
+
const CYRILLIC_LETTER = "\\u0400-\\u04FF";
|
|
14
|
+
const ALPHA = `[a-zA-Z${CYRILLIC_LETTER}]`;
|
|
15
|
+
const OPTIONAL_LETTER = `[a-zA-Z${CYRILLIC_LETTER}]?`;
|
|
16
|
+
const DIRECTIONAL = "[nsewNSEW]";
|
|
17
|
+
// Common pattern combinations.
|
|
18
|
+
// const basicHouseNumber = `${NUMERIC}${OPTIONAL_LETTER}`
|
|
19
|
+
const rangeSeparator = "-";
|
|
20
|
+
const fractionSeparator = "\\/";
|
|
21
|
+
const cyrillicPattern = new RegExp(`[${CYRILLIC_LETTER}]`);
|
|
22
|
+
// note: \u0400-\u04FF represents a-z in the Cyrillic alphabet
|
|
23
|
+
const houseNumberPatterns = [
|
|
24
|
+
// 10
|
|
25
|
+
[r(NUMERIC), ["numeric"]],
|
|
26
|
+
// 10a
|
|
27
|
+
[r(NUMERIC, ALPHA), ["alphanumeric"]],
|
|
28
|
+
// 10-19
|
|
29
|
+
[r(NUMERIC, rangeSeparator, NUMERIC), ["numeric", "separator"]],
|
|
30
|
+
// 10-19a
|
|
31
|
+
[r(NUMERIC, rangeSeparator, NUMERIC, ALPHA), ["numeric", "separator", "alphanumeric"]],
|
|
32
|
+
// 1/135
|
|
33
|
+
[r(NUMERIC, fractionSeparator, NUMERIC), ["numeric", "fractional", "numeric"]],
|
|
34
|
+
// 1b/135
|
|
35
|
+
[r(NUMERIC, ALPHA, fractionSeparator, NUMERIC), ["alphanumeric", "fractional", "numeric"]],
|
|
36
|
+
// Fractional: 1 3/4
|
|
37
|
+
[/^(\d{1,5}) (\d\/\d)?$/, ["numeric", "separator", "fractional"]],
|
|
38
|
+
// 6N23 (i.e. Kane County, IL)
|
|
39
|
+
[r(NUMERIC, DIRECTIONAL, NUMERIC), ["numeric", "directional"]],
|
|
40
|
+
// W350N5337 (i.e. Waukesha County, WI)
|
|
41
|
+
[r(DIRECTIONAL, NUMERIC, DIRECTIONAL, NUMERIC, "?"), ["directional", "numeric"]],
|
|
42
|
+
// N453
|
|
43
|
+
[r(DIRECTIONAL, NUMERIC), ["directional", "numeric"]],
|
|
44
|
+
];
|
|
45
|
+
export class HouseNumberClassifier extends WordClassifier {
|
|
46
|
+
explore(span) {
|
|
47
|
+
if (!span.flags.has("numeral"))
|
|
48
|
+
return;
|
|
49
|
+
const { previousSibling } = span;
|
|
50
|
+
if (previousSibling) {
|
|
51
|
+
// House number must not be preceded by a level or unit designator
|
|
52
|
+
if (previousSibling.is("level_designator"))
|
|
53
|
+
return;
|
|
54
|
+
if (previousSibling.is("unit_designator"))
|
|
55
|
+
return;
|
|
56
|
+
}
|
|
57
|
+
const matches = houseNumberPatterns.filter(([pattern]) => pattern.test(span.normalized));
|
|
58
|
+
if (!matches.length)
|
|
59
|
+
return;
|
|
60
|
+
const flags = new Set(matches.map(([, matchFlags]) => matchFlags).flat());
|
|
61
|
+
let confidence = 1;
|
|
62
|
+
if (cyrillicPattern.test(span.normalized)) {
|
|
63
|
+
flags.add("cyrillic");
|
|
64
|
+
}
|
|
65
|
+
// It's possible to have 5 digit housenumbers but they are fairly uncommon.
|
|
66
|
+
if (/^\d{5}/.test(span.normalized)) {
|
|
67
|
+
confidence = 0.2;
|
|
68
|
+
}
|
|
69
|
+
else if (/^\d{4}/.test(span.normalized)) {
|
|
70
|
+
confidence = 0.9;
|
|
71
|
+
}
|
|
72
|
+
span.classifications.add({
|
|
73
|
+
classification: "house_number",
|
|
74
|
+
confidence,
|
|
75
|
+
flags,
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
//# sourceMappingURL=HouseNumberClassifier.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"HouseNumberClassifier.js","sourceRoot":"","sources":["../HouseNumberClassifier.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,IAAI,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAA;AAEtD;;GAEG;AACH,MAAM,CAAC,GAAG,CAAC,GAAG,OAAiB,EAAE,EAAE,CAAC,IAAI,MAAM,CAAC,IAAI,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,GAAG,CAAC,CAAA;AAEvE,0CAA0C;AAC1C,MAAM,OAAO,GAAG,UAAU,CAAA;AAC1B,MAAM,eAAe,GAAG,iBAAiB,CAAA;AACzC,MAAM,KAAK,GAAG,UAAU,eAAe,GAAG,CAAA;AAC1C,MAAM,eAAe,GAAG,UAAU,eAAe,IAAI,CAAA;AACrD,MAAM,WAAW,GAAG,YAAY,CAAA;AAEhC,+BAA+B;AAC/B,0DAA0D;AAC1D,MAAM,cAAc,GAAG,GAAG,CAAA;AAC1B,MAAM,iBAAiB,GAAG,KAAK,CAAA;AAQ/B,MAAM,eAAe,GAAG,IAAI,MAAM,CAAC,IAAI,eAAe,GAAG,CAAC,CAAA;AAE1D,8DAA8D;AAC9D,MAAM,mBAAmB,GAA2C;IACnE,KAAK;IACL,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,SAAS,CAAC,CAAC;IAEzB,MAAM;IACN,CAAC,CAAC,CAAC,OAAO,EAAE,KAAK,CAAC,EAAE,CAAC,cAAc,CAAC,CAAC;IAErC,QAAQ;IACR,CAAC,CAAC,CAAC,OAAO,EAAE,cAAc,EAAE,OAAO,CAAC,EAAE,CAAC,SAAS,EAAE,WAAW,CAAC,CAAC;IAE/D,SAAS;IACT,CAAC,CAAC,CAAC,OAAO,EAAE,cAAc,EAAE,OAAO,EAAE,KAAK,CAAC,EAAE,CAAC,SAAS,EAAE,WAAW,EAAE,cAAc,CAAC,CAAC;IAEtF,QAAQ;IACR,CAAC,CAAC,CAAC,OAAO,EAAE,iBAAiB,EAAE,OAAO,CAAC,EAAE,CAAC,SAAS,EAAE,YAAY,EAAE,SAAS,CAAC,CAAC;IAE9E,SAAS;IACT,CAAC,CAAC,CAAC,OAAO,EAAE,KAAK,EAAE,iBAAiB,EAAE,OAAO,CAAC,EAAE,CAAC,cAAc,EAAE,YAAY,EAAE,SAAS,CAAC,CAAC;IAE1F,oBAAoB;IACpB,CAAC,uBAAuB,EAAE,CAAC,SAAS,EAAE,WAAW,EAAE,YAAY,CAAC,CAAC;IAEjE,+BAA+B;IAC/B,CAAC,CAAC,CAAC,OAAO,EAAE,WAAW,EAAE,OAAO,CAAC,EAAE,CAAC,SAAS,EAAE,aAAa,CAAC,CAAC;IAE9D,uCAAuC;IACvC,CAAC,CAAC,CAAC,WAAW,EAAE,OAAO,EAAE,WAAW,EAAE,OAAO,EAAE,GAAG,CAAC,EAAE,CAAC,aAAa,EAAE,SAAS,CAAC,CAAC;IAChF,OAAO;IACP,CAAC,CAAC,CAAC,WAAW,EAAE,OAAO,CAAC,EAAE,CAAC,aAAa,EAAE,SAAS,CAAC,CAAC;CACrD,CAAA;AAED,MAAM,OAAO,qBAAsB,SAAQ,cAAc;IACjD,OAAO,CAAC,IAAU;QACxB,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,SAAS,CAAC;YAAE,OAAM;QAEtC,MAAM,EAAE,eAAe,EAAE,GAAG,IAAI,CAAA;QAEhC,IAAI,eAAe,EAAE,CAAC;YACrB,kEAAkE;YAClE,IAAI,eAAe,CAAC,EAAE,CAAC,kBAAkB,CAAC;gBAAE,OAAM;YAClD,IAAI,eAAe,CAAC,EAAE,CAAC,iBAAiB,CAAC;gBAAE,OAAM;QAClD,CAAC;QAED,MAAM,OAAO,GAAG,mBAAmB,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,CAAA;QAExF,IAAI,CAAC,OAAO,CAAC,MAAM;YAAE,OAAM;QAE3B,MAAM,KAAK,GAAG,IAAI,GAAG,CAAkB,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,UAAU,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,IAAI,EAAE,CAAC,CAAA;QAE1F,IAAI,UAAU,GAAG,CAAC,CAAA;QAElB,IAAI,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC;YAC3C,KAAK,CAAC,GAAG,CAAC,UAAU,CAAC,CAAA;QACtB,CAAC;QAED,2EAA2E;QAC3E,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC;YACpC,UAAU,GAAG,GAAG,CAAA;QACjB,CAAC;aAAM,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC;YAC3C,UAAU,GAAG,GAAG,CAAA;QACjB,CAAC;QAED,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC;YACxB,cAAc,EAAE,cAAc;YAC9B,UAAU;YACV,KAAK;SACL,CAAC,CAAA;IACH,CAAC;CACD"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*/
|
|
6
|
+
import { PhraseClassifier, Span } from "@mailwoman/core";
|
|
7
|
+
export declare class IntersectionClassifier extends PhraseClassifier {
|
|
8
|
+
intersectionSymbols: Set<string>;
|
|
9
|
+
explore(span: Span): void;
|
|
10
|
+
}
|
|
11
|
+
//# sourceMappingURL=IntersectionClassifier.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"IntersectionClassifier.d.ts","sourceRoot":"","sources":["../IntersectionClassifier.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,gBAAgB,EAAE,IAAI,EAAE,MAAM,iBAAiB,CAAA;AAExD,qBAAa,sBAAuB,SAAQ,gBAAgB;IACpD,mBAAmB,cASxB;IAEK,OAAO,CAAC,IAAI,EAAE,IAAI,GAAG,IAAI;CAgBhC"}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*/
|
|
6
|
+
import { PhraseClassifier, Span } from "@mailwoman/core";
|
|
7
|
+
export class IntersectionClassifier extends PhraseClassifier {
|
|
8
|
+
intersectionSymbols = new Set([
|
|
9
|
+
// ---
|
|
10
|
+
"&",
|
|
11
|
+
"and",
|
|
12
|
+
"und",
|
|
13
|
+
"@",
|
|
14
|
+
"at",
|
|
15
|
+
"con",
|
|
16
|
+
"an der ecke von",
|
|
17
|
+
]);
|
|
18
|
+
explore(span) {
|
|
19
|
+
if (span.flags.has("numeral"))
|
|
20
|
+
return;
|
|
21
|
+
const firstChild = span.children.first || span;
|
|
22
|
+
const { previousSibling, nextSibling } = firstChild;
|
|
23
|
+
if (!previousSibling || !nextSibling)
|
|
24
|
+
return;
|
|
25
|
+
if (this.intersectionSymbols.has(span.normalized)) {
|
|
26
|
+
span.classifications.add("intersection");
|
|
27
|
+
for (const child of span.children) {
|
|
28
|
+
child.classifications.add("intersection");
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
//# sourceMappingURL=IntersectionClassifier.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"IntersectionClassifier.js","sourceRoot":"","sources":["../IntersectionClassifier.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,gBAAgB,EAAE,IAAI,EAAE,MAAM,iBAAiB,CAAA;AAExD,MAAM,OAAO,sBAAuB,SAAQ,gBAAgB;IACpD,mBAAmB,GAAG,IAAI,GAAG,CAAS;QAC5C,MAAM;QACN,GAAG;QACH,KAAK;QACL,KAAK;QACL,GAAG;QACH,IAAI;QACJ,KAAK;QACL,iBAAiB;KACjB,CAAC,CAAA;IAEK,OAAO,CAAC,IAAU;QACxB,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,SAAS,CAAC;YAAE,OAAM;QAErC,MAAM,UAAU,GAAG,IAAI,CAAC,QAAQ,CAAC,KAAK,IAAI,IAAI,CAAA;QAC9C,MAAM,EAAE,eAAe,EAAE,WAAW,EAAE,GAAG,UAAU,CAAA;QAEnD,IAAI,CAAC,eAAe,IAAI,CAAC,WAAW;YAAE,OAAM;QAE5C,IAAI,IAAI,CAAC,mBAAmB,CAAC,GAAG,CAAC,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC;YACnD,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,cAAc,CAAC,CAAA;YAExC,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACnC,KAAK,CAAC,eAAe,CAAC,GAAG,CAAC,cAAc,CAAC,CAAA;YAC1C,CAAC;QACF,CAAC;IACF,CAAC;CACD"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*/
|
|
6
|
+
import { Span, WordClassifier } from "@mailwoman/core";
|
|
7
|
+
export declare class LevelClassifier extends WordClassifier {
|
|
8
|
+
explore(span: Span): void;
|
|
9
|
+
classify(input: Span | string, prev?: Span | string): Span;
|
|
10
|
+
}
|
|
11
|
+
//# sourceMappingURL=LevelClassifier.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"LevelClassifier.d.ts","sourceRoot":"","sources":["../LevelClassifier.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,IAAI,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAA;AAItD,qBAAa,eAAgB,SAAQ,cAAc;IAC3C,OAAO,CAAC,IAAI,EAAE,IAAI,GAAG,IAAI;IAUhB,QAAQ,CAAC,KAAK,EAAE,IAAI,GAAG,MAAM,EAAE,IAAI,CAAC,EAAE,IAAI,GAAG,MAAM,GAAG,IAAI;CAc1E"}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*/
|
|
6
|
+
import { Span, WordClassifier } from "@mailwoman/core";
|
|
7
|
+
const combinedFloorRegexp = /^\d{1,2}$/;
|
|
8
|
+
export class LevelClassifier extends WordClassifier {
|
|
9
|
+
explore(span) {
|
|
10
|
+
const { previousSibling } = span;
|
|
11
|
+
const hasPrevLevelToken = previousSibling?.is("level_designator");
|
|
12
|
+
// If the previous token in a level word, like floor, fl, or floor.
|
|
13
|
+
if (hasPrevLevelToken && combinedFloorRegexp.test(span.body)) {
|
|
14
|
+
span.classifications.add("level");
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
classify(input, prev) {
|
|
18
|
+
const span = Span.from(input);
|
|
19
|
+
if (prev) {
|
|
20
|
+
const previousSpan = Span.from(prev);
|
|
21
|
+
previousSpan.classifications.add("level_designator");
|
|
22
|
+
span.previousSiblings.add(previousSpan);
|
|
23
|
+
}
|
|
24
|
+
this.explore(span);
|
|
25
|
+
return span;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
//# sourceMappingURL=LevelClassifier.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"LevelClassifier.js","sourceRoot":"","sources":["../LevelClassifier.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,IAAI,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAA;AAEtD,MAAM,mBAAmB,GAAG,WAAW,CAAA;AAEvC,MAAM,OAAO,eAAgB,SAAQ,cAAc;IAC3C,OAAO,CAAC,IAAU;QACxB,MAAM,EAAE,eAAe,EAAE,GAAG,IAAI,CAAA;QAChC,MAAM,iBAAiB,GAAG,eAAe,EAAE,EAAE,CAAC,kBAAkB,CAAC,CAAA;QAEjE,mEAAmE;QACnE,IAAI,iBAAiB,IAAI,mBAAmB,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAC9D,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,OAAO,CAAC,CAAA;QAClC,CAAC;IACF,CAAC;IAEe,QAAQ,CAAC,KAAoB,EAAE,IAAoB;QAClE,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;QAE7B,IAAI,IAAI,EAAE,CAAC;YACV,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YAEpC,YAAY,CAAC,eAAe,CAAC,GAAG,CAAC,kBAAkB,CAAC,CAAA;YACpD,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,YAAY,CAAC,CAAA;QACxC,CAAC;QAED,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAA;QAElB,OAAO,IAAI,CAAA;IACZ,CAAC;CACD"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*/
|
|
6
|
+
import { Span, WordClassifier } from "@mailwoman/core";
|
|
7
|
+
export declare class LevelDesignatorClassifier extends WordClassifier {
|
|
8
|
+
ready(): Promise<this>;
|
|
9
|
+
explore(span: Span): void;
|
|
10
|
+
}
|
|
11
|
+
//# sourceMappingURL=LevelDesignatorClassifier.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"LevelDesignatorClassifier.d.ts","sourceRoot":"","sources":["../LevelDesignatorClassifier.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAA0C,IAAI,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAA;AAE9F,qBAAa,yBAA0B,SAAQ,cAAc;IAC/C,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAO5B,OAAO,CAAC,IAAI,EAAE,IAAI,GAAG,IAAI;CAOhC"}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*/
|
|
6
|
+
import { Alpha2LanguageCode, prepareLocaleIndex, Span, WordClassifier } from "@mailwoman/core";
|
|
7
|
+
export class LevelDesignatorClassifier extends WordClassifier {
|
|
8
|
+
async ready() {
|
|
9
|
+
const languages = this.languages ?? [Alpha2LanguageCode.English];
|
|
10
|
+
this.index = await prepareLocaleIndex(languages, "level_types_numbered.txt");
|
|
11
|
+
return this;
|
|
12
|
+
}
|
|
13
|
+
explore(span) {
|
|
14
|
+
if (span.flags.has("numeral"))
|
|
15
|
+
return;
|
|
16
|
+
if (this.index.has(span.normalized)) {
|
|
17
|
+
span.classifications.add("level_designator");
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
//# sourceMappingURL=LevelDesignatorClassifier.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"LevelDesignatorClassifier.js","sourceRoot":"","sources":["../LevelDesignatorClassifier.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,kBAAkB,EAAE,kBAAkB,EAAE,IAAI,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAA;AAE9F,MAAM,OAAO,yBAA0B,SAAQ,cAAc;IACrD,KAAK,CAAC,KAAK;QACjB,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,CAAC,kBAAkB,CAAC,OAAO,CAAC,CAAA;QAChE,IAAI,CAAC,KAAK,GAAG,MAAM,kBAAkB,CAAC,SAAS,EAAE,0BAA0B,CAAC,CAAA;QAE5E,OAAO,IAAI,CAAA;IACZ,CAAC;IAEM,OAAO,CAAC,IAAU;QACxB,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,SAAS,CAAC;YAAE,OAAM;QAErC,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC;YACrC,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,kBAAkB,CAAC,CAAA;QAC7C,CAAC;IACF,CAAC;CACD"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*/
|
|
6
|
+
import { PhraseClassifier, Span } from "@mailwoman/core";
|
|
7
|
+
export declare class MiddleInitialClassifier extends PhraseClassifier {
|
|
8
|
+
explore(span: Span): void;
|
|
9
|
+
}
|
|
10
|
+
//# sourceMappingURL=MiddleInitialClassifier.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"MiddleInitialClassifier.d.ts","sourceRoot":"","sources":["../MiddleInitialClassifier.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAsB,gBAAgB,EAAE,IAAI,EAAE,MAAM,iBAAiB,CAAA;AAI5E,qBAAa,uBAAwB,SAAQ,gBAAgB;IACrD,OAAO,CAAC,IAAI,EAAE,IAAI,GAAG,IAAI;CAQhC"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*/
|
|
6
|
+
import { Alpha2LanguageCode, PhraseClassifier, Span } from "@mailwoman/core";
|
|
7
|
+
const SingleLetterPattern = /^[A-Za-z]\.?$/;
|
|
8
|
+
export class MiddleInitialClassifier extends PhraseClassifier {
|
|
9
|
+
explore(span) {
|
|
10
|
+
if (!SingleLetterPattern.test(span.body))
|
|
11
|
+
return;
|
|
12
|
+
span.classifications.add({
|
|
13
|
+
classification: "middle_initial",
|
|
14
|
+
languages: new Set([Alpha2LanguageCode.English]),
|
|
15
|
+
});
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
//# sourceMappingURL=MiddleInitialClassifier.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"MiddleInitialClassifier.js","sourceRoot":"","sources":["../MiddleInitialClassifier.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,kBAAkB,EAAE,gBAAgB,EAAE,IAAI,EAAE,MAAM,iBAAiB,CAAA;AAE5E,MAAM,mBAAmB,GAAG,eAAe,CAAA;AAE3C,MAAM,OAAO,uBAAwB,SAAQ,gBAAgB;IACrD,OAAO,CAAC,IAAU;QACxB,IAAI,CAAC,mBAAmB,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC;YAAE,OAAM;QAEhD,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC;YACxB,cAAc,EAAE,gBAAgB;YAChC,SAAS,EAAE,IAAI,GAAG,CAAC,CAAC,kBAAkB,CAAC,OAAO,CAAC,CAAC;SAChD,CAAC,CAAA;IACH,CAAC;CACD"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*/
|
|
6
|
+
import { Span, WordClassifier } from "@mailwoman/core";
|
|
7
|
+
export declare class OrdinalClassifier extends WordClassifier {
|
|
8
|
+
explore(span: Span): void;
|
|
9
|
+
}
|
|
10
|
+
//# sourceMappingURL=OrdinalClassifier.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"OrdinalClassifier.d.ts","sourceRoot":"","sources":["../OrdinalClassifier.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,IAAI,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAA;AAWtD,qBAAa,iBAAkB,SAAQ,cAAc;IAC7C,OAAO,CAAC,IAAI,EAAE,IAAI,GAAG,IAAI;CAOhC"}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*/
|
|
6
|
+
import { Span, WordClassifier } from "@mailwoman/core";
|
|
7
|
+
let ord = "";
|
|
8
|
+
ord += "((1)st?|(2)nd?|(3)rd?|([4-9])th?)"; // singles
|
|
9
|
+
ord += "|"; // or
|
|
10
|
+
ord += "(0*([0-9]*)(1[0-9])th?)"; // teens
|
|
11
|
+
ord += "|"; // or
|
|
12
|
+
ord += "(0*([0-9]*[02-9])((1)st?|(2)nd?|(3)rd?|([04-9])th?))"; // the rest
|
|
13
|
+
const regex = new RegExp(`^${ord}$`, "i");
|
|
14
|
+
export class OrdinalClassifier extends WordClassifier {
|
|
15
|
+
explore(span) {
|
|
16
|
+
if (!span.flags.has("numeral"))
|
|
17
|
+
return;
|
|
18
|
+
if (regex.test(span.normalized)) {
|
|
19
|
+
span.classifications.add("ordinal");
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
//# sourceMappingURL=OrdinalClassifier.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"OrdinalClassifier.js","sourceRoot":"","sources":["../OrdinalClassifier.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,IAAI,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAA;AAEtD,IAAI,GAAG,GAAG,EAAE,CAAA;AACZ,GAAG,IAAI,mCAAmC,CAAA,CAAC,UAAU;AACrD,GAAG,IAAI,GAAG,CAAA,CAAC,KAAK;AAChB,GAAG,IAAI,yBAAyB,CAAA,CAAC,QAAQ;AACzC,GAAG,IAAI,GAAG,CAAA,CAAC,KAAK;AAChB,GAAG,IAAI,sDAAsD,CAAA,CAAC,WAAW;AAEzE,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,IAAI,GAAG,GAAG,EAAE,GAAG,CAAC,CAAA;AAEzC,MAAM,OAAO,iBAAkB,SAAQ,cAAc;IAC7C,OAAO,CAAC,IAAU;QACxB,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,SAAS,CAAC;YAAE,OAAM;QAEtC,IAAI,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC;YACjC,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,SAAS,CAAC,CAAA;QACpC,CAAC;IACF,CAAC;CACD"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*/
|
|
6
|
+
import { PhraseClassifier, Span } from "@mailwoman/core";
|
|
7
|
+
export declare class PersonClassifier extends PhraseClassifier {
|
|
8
|
+
ready(): Promise<this>;
|
|
9
|
+
explore(span: Span): void;
|
|
10
|
+
}
|
|
11
|
+
//# sourceMappingURL=PersonClassifier.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"PersonClassifier.d.ts","sourceRoot":"","sources":["../PersonClassifier.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAsB,gBAAgB,EAAE,IAAI,EAAsB,MAAM,iBAAiB,CAAA;AAEhG,qBAAa,gBAAiB,SAAQ,gBAAgB;IACxC,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAgB5B,OAAO,CAAC,IAAI,EAAE,IAAI,GAAG,IAAI;CAYhC"}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*/
|
|
6
|
+
import { Alpha2LanguageCode, PhraseClassifier, Span, prepareLocaleIndex } from "@mailwoman/core";
|
|
7
|
+
export class PersonClassifier extends PhraseClassifier {
|
|
8
|
+
async ready() {
|
|
9
|
+
this.index = await prepareLocaleIndex(this.languages ?? [
|
|
10
|
+
// ---
|
|
11
|
+
"all",
|
|
12
|
+
Alpha2LanguageCode.French,
|
|
13
|
+
], "people.txt", {
|
|
14
|
+
lowercase: true,
|
|
15
|
+
});
|
|
16
|
+
return this;
|
|
17
|
+
}
|
|
18
|
+
explore(span) {
|
|
19
|
+
if (span.flags.has("numeral"))
|
|
20
|
+
return;
|
|
21
|
+
const languages = this.index.get(span.normalized);
|
|
22
|
+
if (!languages)
|
|
23
|
+
return;
|
|
24
|
+
span.classifications.add({
|
|
25
|
+
classification: "person",
|
|
26
|
+
languages,
|
|
27
|
+
});
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
//# sourceMappingURL=PersonClassifier.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"PersonClassifier.js","sourceRoot":"","sources":["../PersonClassifier.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,kBAAkB,EAAE,gBAAgB,EAAE,IAAI,EAAE,kBAAkB,EAAE,MAAM,iBAAiB,CAAA;AAEhG,MAAM,OAAO,gBAAiB,SAAQ,gBAAgB;IAC9C,KAAK,CAAC,KAAK;QACjB,IAAI,CAAC,KAAK,GAAG,MAAM,kBAAkB,CACpC,IAAI,CAAC,SAAS,IAAI;YACjB,MAAM;YACN,KAAK;YACL,kBAAkB,CAAC,MAAM;SACzB,EACD,YAAY,EACZ;YACC,SAAS,EAAE,IAAI;SACf,CACD,CAAA;QAED,OAAO,IAAI,CAAA;IACZ,CAAC;IAEM,OAAO,CAAC,IAAU;QACxB,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,SAAS,CAAC;YAAE,OAAM;QAErC,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,UAAU,CAAC,CAAA;QAEjD,IAAI,CAAC,SAAS;YAAE,OAAM;QAEtB,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC;YACxB,cAAc,EAAE,QAAQ;YACxB,SAAS;SACT,CAAC,CAAA;IACH,CAAC;CACD"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*/
|
|
6
|
+
import { PhraseClassifier, Span } from "@mailwoman/core";
|
|
7
|
+
export declare class PersonalSuffixClassifier extends PhraseClassifier {
|
|
8
|
+
ready(): Promise<this>;
|
|
9
|
+
explore(span: Span): void;
|
|
10
|
+
}
|
|
11
|
+
//# sourceMappingURL=PersonalSuffixClassifier.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"PersonalSuffixClassifier.d.ts","sourceRoot":"","sources":["../PersonalSuffixClassifier.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,gBAAgB,EAAsB,IAAI,EAAE,MAAM,iBAAiB,CAAA;AAE5E,qBAAa,wBAAyB,SAAQ,gBAAgB;IAChD,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAa5B,OAAO,CAAC,IAAI,EAAE,IAAI,GAAG,IAAI;CAYhC"}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*/
|
|
6
|
+
import { PhraseClassifier, prepareLocaleIndex, Span } from "@mailwoman/core";
|
|
7
|
+
export class PersonalSuffixClassifier extends PhraseClassifier {
|
|
8
|
+
async ready() {
|
|
9
|
+
this.index = await prepareLocaleIndex(this.languages, "personal_suffixes.txt", {
|
|
10
|
+
replace: [
|
|
11
|
+
{
|
|
12
|
+
from: /\.$/,
|
|
13
|
+
to: "",
|
|
14
|
+
},
|
|
15
|
+
],
|
|
16
|
+
});
|
|
17
|
+
return this;
|
|
18
|
+
}
|
|
19
|
+
explore(span) {
|
|
20
|
+
if (span.flags.has("numeral"))
|
|
21
|
+
return;
|
|
22
|
+
const languages = this.index.get(span.normalized.replace(/\.$/, ""));
|
|
23
|
+
if (!languages)
|
|
24
|
+
return;
|
|
25
|
+
span.classifications.add({
|
|
26
|
+
classification: "personal_suffix",
|
|
27
|
+
languages,
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
//# sourceMappingURL=PersonalSuffixClassifier.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"PersonalSuffixClassifier.js","sourceRoot":"","sources":["../PersonalSuffixClassifier.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,gBAAgB,EAAE,kBAAkB,EAAE,IAAI,EAAE,MAAM,iBAAiB,CAAA;AAE5E,MAAM,OAAO,wBAAyB,SAAQ,gBAAgB;IACtD,KAAK,CAAC,KAAK;QACjB,IAAI,CAAC,KAAK,GAAG,MAAM,kBAAkB,CAAC,IAAI,CAAC,SAAS,EAAE,uBAAuB,EAAE;YAC9E,OAAO,EAAE;gBACR;oBACC,IAAI,EAAE,KAAK;oBACX,EAAE,EAAE,EAAE;iBACN;aACD;SACD,CAAC,CAAA;QAEF,OAAO,IAAI,CAAA;IACZ,CAAC;IAEM,OAAO,CAAC,IAAU;QACxB,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,SAAS,CAAC;YAAE,OAAM;QAErC,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,CAAA;QAEpE,IAAI,CAAC,SAAS;YAAE,OAAM;QAEtB,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC;YACxB,cAAc,EAAE,iBAAiB;YACjC,SAAS;SACT,CAAC,CAAA;IACH,CAAC;CACD"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*/
|
|
6
|
+
import { PhraseClassifier, Span } from "@mailwoman/core";
|
|
7
|
+
export declare class PersonalTitleClassifier extends PhraseClassifier {
|
|
8
|
+
ready(): Promise<this>;
|
|
9
|
+
explore(span: Span): void;
|
|
10
|
+
}
|
|
11
|
+
//# sourceMappingURL=PersonalTitleClassifier.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"PersonalTitleClassifier.d.ts","sourceRoot":"","sources":["../PersonalTitleClassifier.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,gBAAgB,EAAsB,IAAI,EAAE,MAAM,iBAAiB,CAAA;AAE5E,qBAAa,uBAAwB,SAAQ,gBAAgB;IAC/C,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAc5B,OAAO,CAAC,IAAI,EAAE,IAAI,GAAG,IAAI;CAYhC"}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*/
|
|
6
|
+
import { PhraseClassifier, prepareLocaleIndex, Span } from "@mailwoman/core";
|
|
7
|
+
export class PersonalTitleClassifier extends PhraseClassifier {
|
|
8
|
+
async ready() {
|
|
9
|
+
this.index = await prepareLocaleIndex(this.languages, "personal_titles.txt", {
|
|
10
|
+
minLength: 2,
|
|
11
|
+
replace: [
|
|
12
|
+
{
|
|
13
|
+
from: /\.$/,
|
|
14
|
+
to: "",
|
|
15
|
+
},
|
|
16
|
+
],
|
|
17
|
+
});
|
|
18
|
+
return this;
|
|
19
|
+
}
|
|
20
|
+
explore(span) {
|
|
21
|
+
if (span.flags.has("numeral"))
|
|
22
|
+
return;
|
|
23
|
+
const languages = this.index.get(span.normalized.replace(/\.$/, ""));
|
|
24
|
+
if (languages) {
|
|
25
|
+
span.classifications.add({
|
|
26
|
+
classification: "personal_title",
|
|
27
|
+
languages,
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
//# sourceMappingURL=PersonalTitleClassifier.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"PersonalTitleClassifier.js","sourceRoot":"","sources":["../PersonalTitleClassifier.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,gBAAgB,EAAE,kBAAkB,EAAE,IAAI,EAAE,MAAM,iBAAiB,CAAA;AAE5E,MAAM,OAAO,uBAAwB,SAAQ,gBAAgB;IACrD,KAAK,CAAC,KAAK;QACjB,IAAI,CAAC,KAAK,GAAG,MAAM,kBAAkB,CAAC,IAAI,CAAC,SAAS,EAAE,qBAAqB,EAAE;YAC5E,SAAS,EAAE,CAAC;YACZ,OAAO,EAAE;gBACR;oBACC,IAAI,EAAE,KAAK;oBACX,EAAE,EAAE,EAAE;iBACN;aACD;SACD,CAAC,CAAA;QAEF,OAAO,IAAI,CAAA;IACZ,CAAC;IAEM,OAAO,CAAC,IAAU;QACxB,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,SAAS,CAAC;YAAE,OAAM;QAErC,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,CAAA;QAEpE,IAAI,SAAS,EAAE,CAAC;YACf,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC;gBACxB,cAAc,EAAE,gBAAgB;gBAChC,SAAS;aACT,CAAC,CAAA;QACH,CAAC;IACF,CAAC;CACD"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*/
|
|
6
|
+
import { Span, WordClassifier } from "@mailwoman/core";
|
|
7
|
+
export declare class PlaceClassifier extends WordClassifier {
|
|
8
|
+
ready(): Promise<this>;
|
|
9
|
+
explore(span: Span): void;
|
|
10
|
+
}
|
|
11
|
+
//# sourceMappingURL=PlaceClassifier.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"PlaceClassifier.d.ts","sourceRoot":"","sources":["../PlaceClassifier.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAA0C,IAAI,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAA;AAE9F,qBAAa,eAAgB,SAAQ,cAAc;IACrC,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAkB5B,OAAO,CAAC,IAAI,EAAE,IAAI,GAAG,IAAI;CAmBhC"}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*/
|
|
6
|
+
import { Alpha2LanguageCode, prepareLocaleIndex, Span, WordClassifier } from "@mailwoman/core";
|
|
7
|
+
export class PlaceClassifier extends WordClassifier {
|
|
8
|
+
async ready() {
|
|
9
|
+
const languages = Array.from(this.languages ?? [
|
|
10
|
+
// ---
|
|
11
|
+
Alpha2LanguageCode.French,
|
|
12
|
+
Alpha2LanguageCode.German,
|
|
13
|
+
Alpha2LanguageCode.English,
|
|
14
|
+
Alpha2LanguageCode.Polish,
|
|
15
|
+
]);
|
|
16
|
+
this.index = await prepareLocaleIndex(languages, "place_names.txt", {
|
|
17
|
+
pluralize: true,
|
|
18
|
+
});
|
|
19
|
+
return this;
|
|
20
|
+
}
|
|
21
|
+
explore(span) {
|
|
22
|
+
if (span.flags.has("numeral"))
|
|
23
|
+
return;
|
|
24
|
+
const firstChild = span.children.first || span;
|
|
25
|
+
const prev = firstChild.previousSibling;
|
|
26
|
+
if (prev && prev.is("intersection")) {
|
|
27
|
+
return;
|
|
28
|
+
}
|
|
29
|
+
const languages = this.index.get(span.normalized);
|
|
30
|
+
if (languages) {
|
|
31
|
+
span.classifications.add({
|
|
32
|
+
classification: "place",
|
|
33
|
+
languages,
|
|
34
|
+
});
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
//# sourceMappingURL=PlaceClassifier.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"PlaceClassifier.js","sourceRoot":"","sources":["../PlaceClassifier.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,kBAAkB,EAAE,kBAAkB,EAAE,IAAI,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAA;AAE9F,MAAM,OAAO,eAAgB,SAAQ,cAAc;IAC3C,KAAK,CAAC,KAAK;QACjB,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAC3B,IAAI,CAAC,SAAS,IAAI;YACjB,MAAM;YACN,kBAAkB,CAAC,MAAM;YACzB,kBAAkB,CAAC,MAAM;YACzB,kBAAkB,CAAC,OAAO;YAC1B,kBAAkB,CAAC,MAAM;SACzB,CACD,CAAA;QAED,IAAI,CAAC,KAAK,GAAG,MAAM,kBAAkB,CAAC,SAAS,EAAE,iBAAiB,EAAE;YACnE,SAAS,EAAE,IAAI;SACf,CAAC,CAAA;QAEF,OAAO,IAAI,CAAA;IACZ,CAAC;IAEM,OAAO,CAAC,IAAU;QACxB,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,SAAS,CAAC;YAAE,OAAM;QAErC,MAAM,UAAU,GAAG,IAAI,CAAC,QAAQ,CAAC,KAAK,IAAI,IAAI,CAAA;QAC9C,MAAM,IAAI,GAAG,UAAU,CAAC,eAAe,CAAA;QAEvC,IAAI,IAAI,IAAI,IAAI,CAAC,EAAE,CAAC,cAAc,CAAC,EAAE,CAAC;YACrC,OAAM;QACP,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,UAAU,CAAC,CAAA;QAEjD,IAAI,SAAS,EAAE,CAAC;YACf,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC;gBACxB,cAAc,EAAE,OAAO;gBACvB,SAAS;aACT,CAAC,CAAA;QACH,CAAC;IACF,CAAC;CACD"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*/
|
|
6
|
+
import { Span, WordClassifier } from "@mailwoman/core";
|
|
7
|
+
export declare class PostcodeClassifier extends WordClassifier {
|
|
8
|
+
postcodePatterns: RegExp[];
|
|
9
|
+
protected countryCodes: Iterable<string>;
|
|
10
|
+
constructor(specs?: Iterable<string>);
|
|
11
|
+
ready(): Promise<this>;
|
|
12
|
+
explore(span: Span): void;
|
|
13
|
+
}
|
|
14
|
+
//# sourceMappingURL=PostcodeClassifier.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"PostcodeClassifier.d.ts","sourceRoot":"","sources":["../PostcodeClassifier.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAqB,IAAI,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAA;AAyBzE,qBAAa,kBAAmB,SAAQ,cAAc;IAC9C,gBAAgB,EAAE,MAAM,EAAE,CAAK;IACtC,SAAS,CAAC,YAAY,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAK;gBAEjC,KAAK,GAAE,QAAQ,CAAC,MAAM,CAA4B;IAMxD,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAoBrB,OAAO,CAAC,IAAI,EAAE,IAAI,GAAG,IAAI;CAehC"}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*/
|
|
6
|
+
import { Span, WordClassifier } from "@mailwoman/core";
|
|
7
|
+
import { corePackagePathBuilder } from "@mailwoman/core/utils";
|
|
8
|
+
import { readFile } from "node:fs/promises";
|
|
9
|
+
const dictPath = corePackagePathBuilder("data", "chromium-i18n", "ssl-address");
|
|
10
|
+
const DefaultPostcodeCountries = [
|
|
11
|
+
// ---
|
|
12
|
+
"US",
|
|
13
|
+
"GB",
|
|
14
|
+
"FR",
|
|
15
|
+
"DE",
|
|
16
|
+
"ES",
|
|
17
|
+
"PT",
|
|
18
|
+
"AU",
|
|
19
|
+
"NZ",
|
|
20
|
+
"KR",
|
|
21
|
+
"JP",
|
|
22
|
+
"IN",
|
|
23
|
+
"RU",
|
|
24
|
+
"BR",
|
|
25
|
+
"NL",
|
|
26
|
+
"PL",
|
|
27
|
+
];
|
|
28
|
+
export class PostcodeClassifier extends WordClassifier {
|
|
29
|
+
postcodePatterns = [];
|
|
30
|
+
countryCodes = [];
|
|
31
|
+
constructor(specs = DefaultPostcodeCountries) {
|
|
32
|
+
super();
|
|
33
|
+
this.countryCodes = specs;
|
|
34
|
+
}
|
|
35
|
+
async ready() {
|
|
36
|
+
const patterns = await Promise.all(Iterator.from(this.countryCodes).map(async (cc) => {
|
|
37
|
+
const countryDictPath = dictPath(`${cc.toUpperCase()}.json`);
|
|
38
|
+
const spec = await readFile(countryDictPath, "utf8").then(JSON.parse);
|
|
39
|
+
const pattern = new RegExp("^(" + spec.zip + ")$", "i");
|
|
40
|
+
return pattern;
|
|
41
|
+
}));
|
|
42
|
+
this.postcodePatterns = patterns
|
|
43
|
+
// remove countries with 3-digit postcodes
|
|
44
|
+
.filter((row) => !row.test("100"));
|
|
45
|
+
return this;
|
|
46
|
+
}
|
|
47
|
+
explore(span) {
|
|
48
|
+
if (!span.flags.has("numeral"))
|
|
49
|
+
return;
|
|
50
|
+
// Skip postcodes in the start position unless it's the only token in the section.
|
|
51
|
+
if (span.is("start_token") && (span.previousSiblings.size > 0 || span.nextSiblings.size > 0)) {
|
|
52
|
+
return;
|
|
53
|
+
}
|
|
54
|
+
for (const pattern of this.postcodePatterns) {
|
|
55
|
+
if (pattern.test(span.normalized)) {
|
|
56
|
+
span.classifications.add("postcode");
|
|
57
|
+
break;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
//# sourceMappingURL=PostcodeClassifier.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"PostcodeClassifier.js","sourceRoot":"","sources":["../PostcodeClassifier.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAqB,IAAI,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAA;AACzE,OAAO,EAAE,sBAAsB,EAAE,MAAM,uBAAuB,CAAA;AAC9D,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAA;AAE3C,MAAM,QAAQ,GAAG,sBAAsB,CAAC,MAAM,EAAE,eAAe,EAAE,aAAa,CAAC,CAAA;AAE/E,MAAM,wBAAwB,GAAsB;IACnD,MAAM;IACN,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;CACJ,CAAA;AAED,MAAM,OAAO,kBAAmB,SAAQ,cAAc;IAC9C,gBAAgB,GAAa,EAAE,CAAA;IAC5B,YAAY,GAAqB,EAAE,CAAA;IAE7C,YAAY,QAA0B,wBAAwB;QAC7D,KAAK,EAAE,CAAA;QAEP,IAAI,CAAC,YAAY,GAAG,KAAK,CAAA;IAC1B,CAAC;IAED,KAAK,CAAC,KAAK;QACV,MAAM,QAAQ,GAAG,MAAM,OAAO,CAAC,GAAG,CACjC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,GAAG,CAAC,KAAK,EAAE,EAAE,EAAE,EAAE;YACjD,MAAM,eAAe,GAAG,QAAQ,CAAC,GAAG,EAAE,CAAC,WAAW,EAAE,OAAO,CAAC,CAAA;YAE5D,MAAM,IAAI,GAAiB,MAAM,QAAQ,CAAC,eAAe,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;YAEnF,MAAM,OAAO,GAAG,IAAI,MAAM,CAAC,IAAI,GAAG,IAAI,CAAC,GAAG,GAAG,IAAI,EAAE,GAAG,CAAC,CAAA;YAEvD,OAAO,OAAO,CAAA;QACf,CAAC,CAAC,CACF,CAAA;QAED,IAAI,CAAC,gBAAgB,GAAG,QAAQ;YAC/B,0CAA0C;aACzC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAA;QAEnC,OAAO,IAAI,CAAA;IACZ,CAAC;IAEM,OAAO,CAAC,IAAU;QACxB,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,SAAS,CAAC;YAAE,OAAM;QAEtC,kFAAkF;QAClF,IAAI,IAAI,CAAC,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,gBAAgB,CAAC,IAAI,GAAG,CAAC,IAAI,IAAI,CAAC,YAAY,CAAC,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC;YAC9F,OAAM;QACP,CAAC;QAED,KAAK,MAAM,OAAO,IAAI,IAAI,CAAC,gBAAgB,EAAE,CAAC;YAC7C,IAAI,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC;gBACnC,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,UAAU,CAAC,CAAA;gBACpC,MAAK;YACN,CAAC;QACF,CAAC;IACF,CAAC;CACD"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*/
|
|
6
|
+
import { Span, WordClassifier } from "@mailwoman/core";
|
|
7
|
+
export declare class RoadTypeClassifier extends WordClassifier {
|
|
8
|
+
ready(): Promise<this>;
|
|
9
|
+
explore(span: Span): void;
|
|
10
|
+
}
|
|
11
|
+
//# sourceMappingURL=RoadTypeClassifier.d.ts.map
|