name-tools 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +912 -0
- package/dist/gender/GenderDB-Co_GybwH.d.mts +80 -0
- package/dist/gender/GenderDB-Co_GybwH.d.ts +80 -0
- package/dist/gender/all.d.mts +31 -0
- package/dist/gender/all.d.ts +31 -0
- package/dist/gender/all.js +191 -0
- package/dist/gender/all.mjs +37 -0
- package/dist/gender/chunk-YGP2PQOO.mjs +133 -0
- package/dist/gender/coverage95.d.mts +29 -0
- package/dist/gender/coverage95.d.ts +29 -0
- package/dist/gender/coverage95.js +191 -0
- package/dist/gender/coverage95.mjs +37 -0
- package/dist/gender/coverage99.d.mts +29 -0
- package/dist/gender/coverage99.d.ts +29 -0
- package/dist/gender/coverage99.js +191 -0
- package/dist/gender/coverage99.mjs +37 -0
- package/dist/index.d.mts +796 -0
- package/dist/index.d.ts +796 -0
- package/dist/index.js +3789 -0
- package/dist/index.mjs +3726 -0
- package/package.json +62 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,3789 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
|
|
20
|
+
// src/index.ts
|
|
21
|
+
var index_exports = {};
|
|
22
|
+
__export(index_exports, {
|
|
23
|
+
BUILT_IN_PRONOUNS: () => BUILT_IN_PRONOUNS,
|
|
24
|
+
COMMON_FIRST_NAMES: () => COMMON_FIRST_NAMES,
|
|
25
|
+
COMMON_SURNAMES: () => COMMON_SURNAMES,
|
|
26
|
+
MULTI_WORD_PARTICLES: () => MULTI_WORD_PARTICLES,
|
|
27
|
+
PARTICLES: () => PARTICLES,
|
|
28
|
+
SPEC_ALIASES: () => SPEC_ALIASES,
|
|
29
|
+
classifyName: () => classifyName,
|
|
30
|
+
entityToLegacy: () => entityToLegacy,
|
|
31
|
+
extractPronouns: () => extractPronouns,
|
|
32
|
+
fillPronounTemplate: () => fillPronounTemplate,
|
|
33
|
+
fillPronounTemplateSmart: () => fillPronounTemplateSmart,
|
|
34
|
+
formatName: () => formatName,
|
|
35
|
+
formatPronoun: () => formatPronoun,
|
|
36
|
+
getDefaultPronouns: () => getDefaultPronouns,
|
|
37
|
+
getFirstName: () => getFirstName,
|
|
38
|
+
getLastName: () => getLastName,
|
|
39
|
+
getNickname: () => getNickname,
|
|
40
|
+
getPronounSet: () => getPronounSet,
|
|
41
|
+
getPronouns: () => getPronouns,
|
|
42
|
+
getPronounsForEntity: () => getPronounsForEntity,
|
|
43
|
+
getPronounsForPerson: () => getPronounsForPerson,
|
|
44
|
+
hasPronouns: () => hasPronouns,
|
|
45
|
+
isCommonFirstName: () => isCommonFirstName,
|
|
46
|
+
isCommonSurname: () => isCommonSurname,
|
|
47
|
+
isCompound: () => isCompound,
|
|
48
|
+
isFamily: () => isFamily,
|
|
49
|
+
isMultiWordParticle: () => isMultiWordParticle,
|
|
50
|
+
isOrganization: () => isOrganization,
|
|
51
|
+
isParticle: () => isParticle,
|
|
52
|
+
isPerson: () => isPerson,
|
|
53
|
+
isRejected: () => isRejected,
|
|
54
|
+
isUnknown: () => isUnknown,
|
|
55
|
+
parseName: () => parseName,
|
|
56
|
+
parseNameList: () => parseNameList,
|
|
57
|
+
parsePersonName: () => parsePersonName,
|
|
58
|
+
parsePronounSpec: () => parsePronounSpec,
|
|
59
|
+
pronounsToGenderHint: () => pronounsToGenderHint
|
|
60
|
+
});
|
|
61
|
+
module.exports = __toCommonJS(index_exports);
|
|
62
|
+
|
|
63
|
+
// src/data/utils.ts
|
|
64
|
+
function isInList(list, value) {
|
|
65
|
+
if (!value) return false;
|
|
66
|
+
const cleanedValue = value.toLowerCase().replace(/\./g, "").trim();
|
|
67
|
+
return list.some((item) => {
|
|
68
|
+
const cleanedItem = item.toLowerCase().replace(/\./g, "").trim();
|
|
69
|
+
return cleanedItem === cleanedValue;
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// src/data/particles.ts
|
|
74
|
+
var DUTCH_GERMAN = [
|
|
75
|
+
"van",
|
|
76
|
+
"von",
|
|
77
|
+
"der",
|
|
78
|
+
"den",
|
|
79
|
+
"de",
|
|
80
|
+
"het",
|
|
81
|
+
"'t",
|
|
82
|
+
"t",
|
|
83
|
+
"ten",
|
|
84
|
+
"ter",
|
|
85
|
+
"te",
|
|
86
|
+
"zu",
|
|
87
|
+
"und",
|
|
88
|
+
"vom",
|
|
89
|
+
"am",
|
|
90
|
+
"im",
|
|
91
|
+
"von und zu"
|
|
92
|
+
// Complex German nobility particle
|
|
93
|
+
];
|
|
94
|
+
var ROMANCE = [
|
|
95
|
+
"la",
|
|
96
|
+
"le",
|
|
97
|
+
"lo",
|
|
98
|
+
"li",
|
|
99
|
+
"il",
|
|
100
|
+
"el",
|
|
101
|
+
"al",
|
|
102
|
+
"d'",
|
|
103
|
+
"de",
|
|
104
|
+
"de'",
|
|
105
|
+
"del",
|
|
106
|
+
"della",
|
|
107
|
+
"dello",
|
|
108
|
+
"degli",
|
|
109
|
+
"dei",
|
|
110
|
+
"do",
|
|
111
|
+
"du",
|
|
112
|
+
"des",
|
|
113
|
+
"dos",
|
|
114
|
+
"das",
|
|
115
|
+
"da",
|
|
116
|
+
"di",
|
|
117
|
+
"e",
|
|
118
|
+
"y",
|
|
119
|
+
"i"
|
|
120
|
+
// Conjunctions (Spanish/Italian)
|
|
121
|
+
];
|
|
122
|
+
var CELTIC = [
|
|
123
|
+
"mac",
|
|
124
|
+
"mc",
|
|
125
|
+
"mhic",
|
|
126
|
+
"mic",
|
|
127
|
+
"o'",
|
|
128
|
+
"ua"
|
|
129
|
+
];
|
|
130
|
+
var SCANDINAVIAN = [
|
|
131
|
+
"af",
|
|
132
|
+
"av",
|
|
133
|
+
"von"
|
|
134
|
+
];
|
|
135
|
+
var PARTICLES = [
|
|
136
|
+
...DUTCH_GERMAN,
|
|
137
|
+
...ROMANCE,
|
|
138
|
+
...CELTIC,
|
|
139
|
+
...SCANDINAVIAN
|
|
140
|
+
];
|
|
141
|
+
var MULTI_WORD_PARTICLES = [
|
|
142
|
+
"von und zu",
|
|
143
|
+
"de la",
|
|
144
|
+
"de los",
|
|
145
|
+
"de las",
|
|
146
|
+
"van der",
|
|
147
|
+
"van den",
|
|
148
|
+
"van de",
|
|
149
|
+
"de le",
|
|
150
|
+
"da la"
|
|
151
|
+
];
|
|
152
|
+
function isParticle(str) {
|
|
153
|
+
return isInList(PARTICLES, str);
|
|
154
|
+
}
|
|
155
|
+
function isMultiWordParticle(words) {
|
|
156
|
+
const combined = words.join(" ").toLowerCase();
|
|
157
|
+
for (const particle of MULTI_WORD_PARTICLES) {
|
|
158
|
+
if (combined === particle) {
|
|
159
|
+
return particle;
|
|
160
|
+
}
|
|
161
|
+
if (combined.startsWith(particle + " ")) {
|
|
162
|
+
return particle;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
return null;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// src/data/surnames.ts
|
|
169
|
+
var SPANISH = [
|
|
170
|
+
"garc\xEDa",
|
|
171
|
+
"gonzalez",
|
|
172
|
+
"gonz\xE1lez",
|
|
173
|
+
"rodriguez",
|
|
174
|
+
"rodr\xEDguez",
|
|
175
|
+
"fernandez",
|
|
176
|
+
"fern\xE1ndez",
|
|
177
|
+
"lopez",
|
|
178
|
+
"l\xF3pez",
|
|
179
|
+
"martinez",
|
|
180
|
+
"mart\xEDnez",
|
|
181
|
+
"sanchez",
|
|
182
|
+
"s\xE1nchez",
|
|
183
|
+
"perez",
|
|
184
|
+
"p\xE9rez",
|
|
185
|
+
"gomez",
|
|
186
|
+
"g\xF3mez",
|
|
187
|
+
"martin",
|
|
188
|
+
"mart\xEDn",
|
|
189
|
+
"jimenez",
|
|
190
|
+
"jim\xE9nez",
|
|
191
|
+
"ruiz",
|
|
192
|
+
"hernandez",
|
|
193
|
+
"hern\xE1ndez",
|
|
194
|
+
"diaz",
|
|
195
|
+
"d\xEDaz",
|
|
196
|
+
"moreno",
|
|
197
|
+
"alvarez",
|
|
198
|
+
"\xE1lvarez",
|
|
199
|
+
"mu\xF1oz",
|
|
200
|
+
"romero",
|
|
201
|
+
"alonso",
|
|
202
|
+
"gutierrez",
|
|
203
|
+
"guti\xE9rrez",
|
|
204
|
+
"navarro",
|
|
205
|
+
"torres",
|
|
206
|
+
"dominguez",
|
|
207
|
+
"dom\xEDnguez",
|
|
208
|
+
"vazquez",
|
|
209
|
+
"v\xE1zquez",
|
|
210
|
+
"ramos",
|
|
211
|
+
"gil",
|
|
212
|
+
"ramirez",
|
|
213
|
+
"ram\xEDrez",
|
|
214
|
+
"serrano",
|
|
215
|
+
"blanco",
|
|
216
|
+
"molina",
|
|
217
|
+
"castro",
|
|
218
|
+
"ortiz",
|
|
219
|
+
"rubio",
|
|
220
|
+
"nu\xF1ez",
|
|
221
|
+
"m\xE1rquez",
|
|
222
|
+
"marquez"
|
|
223
|
+
];
|
|
224
|
+
var PORTUGUESE = [
|
|
225
|
+
"silva",
|
|
226
|
+
"santos",
|
|
227
|
+
"ferreira",
|
|
228
|
+
"pereira",
|
|
229
|
+
"oliveira",
|
|
230
|
+
"costa",
|
|
231
|
+
"rodrigues",
|
|
232
|
+
"martins",
|
|
233
|
+
"jesus",
|
|
234
|
+
"sousa",
|
|
235
|
+
"souza",
|
|
236
|
+
"fernandes",
|
|
237
|
+
"goncalves",
|
|
238
|
+
"gon\xE7alves",
|
|
239
|
+
"gomes",
|
|
240
|
+
"lopes",
|
|
241
|
+
"marques",
|
|
242
|
+
"alves",
|
|
243
|
+
"almeida",
|
|
244
|
+
"ribeiro",
|
|
245
|
+
"pinto",
|
|
246
|
+
"carvalho",
|
|
247
|
+
"teixeira",
|
|
248
|
+
"moreira",
|
|
249
|
+
"correia",
|
|
250
|
+
"queir\xF3s",
|
|
251
|
+
"queiros",
|
|
252
|
+
"e\xE7a"
|
|
253
|
+
];
|
|
254
|
+
var ITALIAN = [
|
|
255
|
+
"rossi",
|
|
256
|
+
"russo",
|
|
257
|
+
"ferrari",
|
|
258
|
+
"esposito",
|
|
259
|
+
"bianchi",
|
|
260
|
+
"romano",
|
|
261
|
+
"colombo",
|
|
262
|
+
"ricci",
|
|
263
|
+
"marino",
|
|
264
|
+
"greco",
|
|
265
|
+
"bruno",
|
|
266
|
+
"gallo",
|
|
267
|
+
"conti",
|
|
268
|
+
"de luca",
|
|
269
|
+
"costa",
|
|
270
|
+
"giordano",
|
|
271
|
+
"mancini",
|
|
272
|
+
"rizzo",
|
|
273
|
+
"lombardi",
|
|
274
|
+
"moretti"
|
|
275
|
+
];
|
|
276
|
+
var FRENCH = [
|
|
277
|
+
"martin",
|
|
278
|
+
"bernard",
|
|
279
|
+
"dubois",
|
|
280
|
+
"thomas",
|
|
281
|
+
"robert",
|
|
282
|
+
"richard",
|
|
283
|
+
"petit",
|
|
284
|
+
"durand",
|
|
285
|
+
"leroy",
|
|
286
|
+
"moreau",
|
|
287
|
+
"simon",
|
|
288
|
+
"laurent",
|
|
289
|
+
"lefebvre",
|
|
290
|
+
"michel",
|
|
291
|
+
"garcia",
|
|
292
|
+
"david",
|
|
293
|
+
"bertrand",
|
|
294
|
+
"roux",
|
|
295
|
+
"vincent",
|
|
296
|
+
"fournier",
|
|
297
|
+
"fontaine",
|
|
298
|
+
"rousseau",
|
|
299
|
+
"dumas"
|
|
300
|
+
];
|
|
301
|
+
var COMMON_SURNAMES = [
|
|
302
|
+
...SPANISH,
|
|
303
|
+
...PORTUGUESE,
|
|
304
|
+
...ITALIAN,
|
|
305
|
+
...FRENCH
|
|
306
|
+
];
|
|
307
|
+
var COMMON_FIRST_NAMES = [
|
|
308
|
+
"mary",
|
|
309
|
+
"john",
|
|
310
|
+
"william",
|
|
311
|
+
"james",
|
|
312
|
+
"anne",
|
|
313
|
+
"sarah",
|
|
314
|
+
"marie",
|
|
315
|
+
"jean",
|
|
316
|
+
"george",
|
|
317
|
+
"paul",
|
|
318
|
+
"lee",
|
|
319
|
+
"billy",
|
|
320
|
+
"bob",
|
|
321
|
+
"thomas",
|
|
322
|
+
"robert",
|
|
323
|
+
"michael",
|
|
324
|
+
"david",
|
|
325
|
+
"martin",
|
|
326
|
+
"pierre",
|
|
327
|
+
"maria",
|
|
328
|
+
"jose",
|
|
329
|
+
"jos\xE9"
|
|
330
|
+
];
|
|
331
|
+
function isCommonSurname(str) {
|
|
332
|
+
return isInList(COMMON_SURNAMES, str);
|
|
333
|
+
}
|
|
334
|
+
function isCommonFirstName(str) {
|
|
335
|
+
return isInList(COMMON_FIRST_NAMES, str);
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
// src/data/affixes.ts
|
|
339
|
+
function normalizeAffixVariantForMatch(value) {
|
|
340
|
+
return value.trim().replace(/^[,;:\s]+/, "").replace(/[,;:\s]+$/, "").replace(/\s+/g, " ").replace(/[\u2019\u2018\u02BC]/g, "'").normalize("NFKD").replace(/[\u0300-\u036f]/g, "").replace(/\./g, "").toUpperCase().trim();
|
|
341
|
+
}
|
|
342
|
+
function buildAffixVariantIndex(entries, ctx) {
|
|
343
|
+
const map = /* @__PURE__ */ new Map();
|
|
344
|
+
for (const e of entries) {
|
|
345
|
+
if (e.ctx !== "both" && e.ctx !== ctx) continue;
|
|
346
|
+
const candidates = [];
|
|
347
|
+
if (e.short) candidates.push(e.short);
|
|
348
|
+
if (e.long) candidates.push(e.long);
|
|
349
|
+
if (e.variants) candidates.push(...e.variants);
|
|
350
|
+
for (const v of candidates) {
|
|
351
|
+
const k = normalizeAffixVariantForMatch(v);
|
|
352
|
+
if (!k) continue;
|
|
353
|
+
if (!map.has(k)) map.set(k, e);
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
return map;
|
|
357
|
+
}
|
|
358
|
+
var PREFIX_AFFIX_ENTRIES = [
|
|
359
|
+
// ---------------------------------------------------------------------------
|
|
360
|
+
// English-speaking countries (US/UK/CA/AU/NZ/IE) — common honorifics
|
|
361
|
+
// ---------------------------------------------------------------------------
|
|
362
|
+
{ id: "mr", type: "honorific", ctx: "prefix", short: "Mr.", long: "Mister", variants: ["mr", "mr."] },
|
|
363
|
+
{ id: "mrs", type: "honorific", ctx: "prefix", short: "Mrs.", variants: ["mrs", "mrs."] },
|
|
364
|
+
{ id: "ms", type: "honorific", ctx: "prefix", short: "Ms.", variants: ["ms", "ms."] },
|
|
365
|
+
{ id: "miss", type: "honorific", ctx: "prefix", short: "Miss", variants: ["miss"] },
|
|
366
|
+
{ id: "mx", type: "honorific", ctx: "prefix", short: "Mx", variants: ["mx", "mx."] },
|
|
367
|
+
{ id: "madam", type: "honorific", ctx: "prefix", short: "Madam", variants: ["madam"] },
|
|
368
|
+
// ---------------------------------------------------------------------------
|
|
369
|
+
// Plural honorifics (for couples/groups)
|
|
370
|
+
// ---------------------------------------------------------------------------
|
|
371
|
+
{ id: "messrs", type: "honorific", ctx: "prefix", short: "Messrs.", long: "Messieurs", variants: ["messrs", "messrs.", "messieurs"] },
|
|
372
|
+
{ id: "mmes", type: "honorific", ctx: "prefix", short: "Mmes.", long: "Mesdames", variants: ["mmes", "mmes.", "mesdames"] },
|
|
373
|
+
{ id: "drs", type: "honorific", ctx: "prefix", short: "Drs.", long: "Doctors", variants: ["drs", "drs.", "doctors"] },
|
|
374
|
+
{ id: "profs", type: "honorific", ctx: "prefix", short: "Profs.", long: "Professors", variants: ["profs", "profs.", "professors"] },
|
|
375
|
+
{ id: "revs", type: "honorific", ctx: "prefix", short: "Revs.", long: "Reverends", variants: ["revs", "revs.", "reverends"] },
|
|
376
|
+
{ id: "dr", type: "honorific", ctx: "prefix", short: "Dr.", long: "Doctor", variants: ["dr", "dr."] },
|
|
377
|
+
{ id: "prof", type: "honorific", ctx: "prefix", short: "Prof.", long: "Professor", variants: ["prof", "prof.", "professor"] },
|
|
378
|
+
// Legal/professional (prefix usage varies; keep as tolerant input)
|
|
379
|
+
{ id: "atty", type: "professional", ctx: "prefix", short: "Atty.", long: "Attorney", variants: ["atty", "atty.", "attorney"] },
|
|
380
|
+
{ id: "lic", type: "professional", ctx: "prefix", short: "Lic.", long: "Licentiate", variants: ["lic", "lic.", "licentiate"] },
|
|
381
|
+
// Corporate designator; uncommon as a true name prefix, but supported for tolerance.
|
|
382
|
+
{ id: "llc", type: "professional", ctx: "prefix", short: "LLC", variants: ["llc", "l.l.c."] },
|
|
383
|
+
// ---------------------------------------------------------------------------
|
|
384
|
+
// Clergy / religious (common in English + EU contexts)
|
|
385
|
+
// ---------------------------------------------------------------------------
|
|
386
|
+
{ id: "rev", type: "religious", ctx: "prefix", short: "Rev.", long: "Reverend", variants: ["rev", "rev.", "reverend"] },
|
|
387
|
+
{ id: "revd", type: "religious", ctx: "prefix", short: "Revd", long: "Reverend", variants: ["revd", "revd.", "rev d", "rev. d."] },
|
|
388
|
+
{ id: "fr", type: "religious", ctx: "prefix", short: "Fr.", long: "Father", variants: ["fr", "fr.", "father"] },
|
|
389
|
+
{ id: "sr_sister", type: "religious", ctx: "prefix", short: "Sr.", long: "Sister", variants: ["sr", "sr.", "sister"] },
|
|
390
|
+
{ id: "br", type: "religious", ctx: "prefix", short: "Br.", long: "Brother", variants: ["br", "br.", "brother"] },
|
|
391
|
+
{ id: "rabbi", type: "religious", ctx: "prefix", short: "Rabbi", variants: ["rabbi"] },
|
|
392
|
+
{ id: "imam", type: "religious", ctx: "prefix", short: "Imam", variants: ["imam"] },
|
|
393
|
+
{ id: "pastor", type: "religious", ctx: "prefix", short: "Pastor", variants: ["pastor"] },
|
|
394
|
+
{ id: "monsignor", type: "religious", ctx: "prefix", short: "Monsignor", variants: ["monsignor", "msgr", "msgr."] },
|
|
395
|
+
// Higher clergy + Christian honor styles (UK/EU common)
|
|
396
|
+
{ id: "canon", type: "religious", ctx: "prefix", short: "Canon", variants: ["canon"] },
|
|
397
|
+
{ id: "cardinal", type: "religious", ctx: "prefix", short: "Cardinal", variants: ["cardinal"] },
|
|
398
|
+
{ id: "archdeacon", type: "religious", ctx: "prefix", short: "Archdeacon", variants: ["archdeacon"] },
|
|
399
|
+
{ id: "archbishop", type: "religious", ctx: "prefix", short: "Archbishop", variants: ["archbishop"] },
|
|
400
|
+
{ id: "archbishop_emeritus", type: "religious", ctx: "prefix", short: "Archbishop Emeritus", variants: ["archbishop emeritus"] },
|
|
401
|
+
{ id: "bishop", type: "religious", ctx: "prefix", short: "Bishop", variants: ["bishop"] },
|
|
402
|
+
{ id: "bishop_emeritus", type: "religious", ctx: "prefix", short: "Bishop Emeritus", variants: ["bishop emeritus"] },
|
|
403
|
+
{ id: "suffragan_bishop", type: "religious", ctx: "prefix", short: "Suffragan Bishop", variants: ["suffragan bishop"] },
|
|
404
|
+
{ id: "dom", type: "religious", ctx: "prefix", short: "Dom", variants: ["dom"] },
|
|
405
|
+
{ id: "elder", type: "religious", ctx: "prefix", short: "Elder", variants: ["elder"] },
|
|
406
|
+
{ id: "brother_superior", type: "religious", ctx: "prefix", short: "Brother Superior", variants: ["brother superior"] },
|
|
407
|
+
{ id: "provincial", type: "religious", ctx: "prefix", short: "Provincial", variants: ["provincial"] },
|
|
408
|
+
{ id: "chaplain", type: "religious", ctx: "prefix", short: "Chaplain", variants: ["chaplain"] },
|
|
409
|
+
{ id: "chaplain_general", type: "religious", ctx: "prefix", short: "Chaplain General", variants: ["chaplain general"] },
|
|
410
|
+
{ id: "chaplain_in_chief", type: "religious", ctx: "prefix", short: "Chaplain-in-Chief", variants: ["chaplain-in-chief", "chaplain in chief"] },
|
|
411
|
+
{ id: "most_reverend", type: "religious", ctx: "prefix", short: "Most Reverend", variants: ["most reverend"] },
|
|
412
|
+
{ id: "the_most_reverend", type: "religious", ctx: "prefix", short: "The Most Reverend", variants: ["the most reverend"] },
|
|
413
|
+
{ id: "right_reverend", type: "religious", ctx: "prefix", short: "Right Reverend", variants: ["right reverend"] },
|
|
414
|
+
{ id: "very_reverend", type: "religious", ctx: "prefix", short: "Very Reverend", variants: ["very reverend"] },
|
|
415
|
+
{ id: "the_venerable", type: "religious", ctx: "prefix", short: "The Venerable", variants: ["the venerable"] },
|
|
416
|
+
// Combined honorifics (common in fixtures and UK usage)
|
|
417
|
+
{ id: "rev_canon", type: "religious", ctx: "prefix", short: "Rev. Canon", long: "Reverend Canon", variants: ["rev canon", "rev. canon", "the reverend canon"] },
|
|
418
|
+
{ id: "rev_dr", type: "religious", ctx: "prefix", short: "Rev. Dr.", variants: ["rev dr", "rev. dr", "rev. dr."] },
|
|
419
|
+
// ---------------------------------------------------------------------------
|
|
420
|
+
// Military / police ranks (primarily English canonical forms; EU input variants included)
|
|
421
|
+
// ---------------------------------------------------------------------------
|
|
422
|
+
{ id: "pvt", type: "military", ctx: "prefix", short: "Pvt.", long: "Private", variants: ["pvt", "pvt.", "private"] },
|
|
423
|
+
{ id: "cpl", type: "military", ctx: "prefix", short: "Cpl.", long: "Corporal", variants: ["cpl", "cpl.", "corporal"] },
|
|
424
|
+
{ id: "sgt", type: "military", ctx: "prefix", short: "Sgt.", long: "Sergeant", variants: ["sgt", "sgt.", "sergeant"] },
|
|
425
|
+
{ id: "lt", type: "military", ctx: "prefix", short: "Lt.", long: "Lieutenant", variants: ["lt", "lt.", "lieutenant"] },
|
|
426
|
+
{ id: "capt", type: "military", ctx: "prefix", short: "Capt.", long: "Captain", variants: ["capt", "capt.", "cpt", "cpt.", "captain"] },
|
|
427
|
+
{ id: "maj", type: "military", ctx: "prefix", short: "Maj.", long: "Major", variants: ["maj", "maj.", "major"] },
|
|
428
|
+
{ id: "col", type: "military", ctx: "prefix", short: "Col.", long: "Colonel", variants: ["col", "col.", "colonel"] },
|
|
429
|
+
{ id: "gen", type: "military", ctx: "prefix", short: "Gen.", long: "General", variants: ["gen", "gen.", "general"] },
|
|
430
|
+
{ id: "adm", type: "military", ctx: "prefix", short: "Adm.", long: "Admiral", variants: ["adm", "adm.", "admiral"] },
|
|
431
|
+
{ id: "air_chief_marshal", type: "military", ctx: "prefix", short: "Air Chief Marshal", variants: ["air chief marshal"] },
|
|
432
|
+
// Expanded ranks (common UK/US and some Commonwealth usage)
|
|
433
|
+
{ id: "rear_admiral", type: "military", ctx: "prefix", short: "Rear Admiral", variants: ["rear admiral"] },
|
|
434
|
+
{ id: "vice_admiral", type: "military", ctx: "prefix", short: "Vice Admiral", variants: ["vice admiral"] },
|
|
435
|
+
{ id: "air_commodore", type: "military", ctx: "prefix", short: "Air Commodore", variants: ["air commodore"] },
|
|
436
|
+
{ id: "air_marshal", type: "military", ctx: "prefix", short: "Air Marshal", variants: ["air marshal"] },
|
|
437
|
+
{ id: "air_vice_marshal", type: "military", ctx: "prefix", short: "Air Vice Marshal", variants: ["air vice marshal"] },
|
|
438
|
+
{ id: "field_marshal", type: "military", ctx: "prefix", short: "Field Marshal", variants: ["field marshal"] },
|
|
439
|
+
{ id: "marshal_of_the_raf", type: "military", ctx: "prefix", short: "Marshal of the RAF", long: "Marshal of the Royal Air Force", variants: ["marshal of the raf", "marshal of the r.a.f."] },
|
|
440
|
+
{ id: "flight_lieutenant", type: "military", ctx: "prefix", short: "Flight Lieutenant", variants: ["flight lieutenant"] },
|
|
441
|
+
{ id: "squadron_leader", type: "military", ctx: "prefix", short: "Squadron Leader", variants: ["squadron leader"] },
|
|
442
|
+
{ id: "petty_officer", type: "military", ctx: "prefix", short: "Petty Officer", variants: ["petty officer"] },
|
|
443
|
+
{ id: "pipe_major", type: "military", ctx: "prefix", short: "Pipe Major", variants: ["pipe major"] },
|
|
444
|
+
{ id: "lance_corporal", type: "military", ctx: "prefix", short: "Lance Corporal", variants: ["lance corporal"] },
|
|
445
|
+
{ id: "lance_sergeant", type: "military", ctx: "prefix", short: "Lance Sergeant", variants: ["lance sergeant"] },
|
|
446
|
+
{ id: "second_lieutenant", type: "military", ctx: "prefix", short: "Second Lieutenant", variants: ["second lieutenant"] },
|
|
447
|
+
{ id: "senior_aircraftman", type: "military", ctx: "prefix", short: "Senior Aircraftman", variants: ["senior aircraftman"] },
|
|
448
|
+
{ id: "senior_aircraftwoman", type: "military", ctx: "prefix", short: "Senior Aircraftwoman", variants: ["senior aircraftwoman"] },
|
|
449
|
+
{ id: "staff_corporal", type: "military", ctx: "prefix", short: "Staff Corporal", variants: ["staff corporal"] },
|
|
450
|
+
{ id: "staff_sergeant", type: "military", ctx: "prefix", short: "Staff Sergeant", variants: ["staff sergeant"] },
|
|
451
|
+
{ id: "warrant_officer", type: "military", ctx: "prefix", short: "Warrant Officer", variants: ["warrant officer"] },
|
|
452
|
+
{ id: "warrant_officer_class_1", type: "military", ctx: "prefix", short: "Warrant Officer Class 1", variants: ["warrant officer class 1", "warrant officer class i"] },
|
|
453
|
+
{ id: "warrant_officer_class_2", type: "military", ctx: "prefix", short: "Warrant Officer Class 2", variants: ["warrant officer class 2", "warrant officer class ii"] },
|
|
454
|
+
{ id: "brigadier", type: "military", ctx: "prefix", short: "Brigadier", variants: ["brigadier"] },
|
|
455
|
+
{ id: "brig_gen", type: "military", ctx: "prefix", short: "Brig Gen", long: "Brigadier General", variants: ["brig gen", "brig. gen.", "brigadier general"] },
|
|
456
|
+
{ id: "regimental_corporal_major", type: "military", ctx: "prefix", short: "Regimental Corporal Major", variants: ["regimental corporal major"] },
|
|
457
|
+
{ id: "regimental_sergeant_major", type: "military", ctx: "prefix", short: "Regimental Sergeant Major", variants: ["regimental sergeant major"] },
|
|
458
|
+
{ id: "colour_sergeant", type: "military", ctx: "prefix", short: "Colour Sergeant", variants: ["colour sergeant", "color sergeant"] },
|
|
459
|
+
{ id: "commander_rank", type: "military", ctx: "prefix", short: "Commander", variants: ["commander"] },
|
|
460
|
+
{ id: "commodore", type: "military", ctx: "prefix", short: "Commodore", variants: ["commodore"] },
|
|
461
|
+
{ id: "lt_col", type: "military", ctx: "prefix", short: "Lt Col", long: "Lieutenant Colonel", variants: ["lt col", "lt. col.", "lieutenant colonel"] },
|
|
462
|
+
{ id: "lt_commander", type: "military", ctx: "prefix", short: "Lt Commander", variants: ["lt commander", "lt. commander", "lieutenant commander"] },
|
|
463
|
+
{ id: "lt_cpl", type: "military", ctx: "prefix", short: "Lt Cpl", variants: ["lt cpl", "lt. cpl."] },
|
|
464
|
+
{ id: "lt_general", type: "military", ctx: "prefix", short: "Lt General", long: "Lieutenant General", variants: ["lt general", "lt. general", "lieutenant general"] },
|
|
465
|
+
{ id: "major_general", type: "military", ctx: "prefix", short: "Major General", variants: ["major general"] },
|
|
466
|
+
// ---------------------------------------------------------------------------
|
|
467
|
+
// UK/IE/CA/AU/NZ styles, nobility, and honorific styles (treated as "style")
|
|
468
|
+
// ---------------------------------------------------------------------------
|
|
469
|
+
{ id: "sir", type: "honorific", ctx: "prefix", short: "Sir", variants: ["sir"] },
|
|
470
|
+
{ id: "dame", type: "honorific", ctx: "prefix", short: "Dame", variants: ["dame"] },
|
|
471
|
+
{ id: "dame_commander", type: "honorific", ctx: "prefix", short: "Dame Commander", variants: ["dame commander"] },
|
|
472
|
+
{ id: "dame_grand_cross", type: "honorific", ctx: "prefix", short: "Dame Grand Cross", variants: ["dame grand cross"] },
|
|
473
|
+
{ id: "lord", type: "style", ctx: "prefix", short: "Lord", variants: ["lord"] },
|
|
474
|
+
{ id: "lady", type: "style", ctx: "prefix", short: "Lady", variants: ["lady"] },
|
|
475
|
+
{ id: "lord_lieutenant", type: "style", ctx: "prefix", short: "Lord Lieutenant", variants: ["lord lieutenant"] },
|
|
476
|
+
{ id: "lord_mayor", type: "style", ctx: "prefix", short: "Lord Mayor", variants: ["lord mayor"] },
|
|
477
|
+
{ id: "lord_high_admiral", type: "style", ctx: "prefix", short: "Lord High Admiral", variants: ["lord high admiral"] },
|
|
478
|
+
{ id: "lord_high_commissioner", type: "style", ctx: "prefix", short: "Lord High Commissioner", variants: ["lord high commissioner"] },
|
|
479
|
+
{ id: "lord_of_the_manor", type: "style", ctx: "prefix", short: "Lord of the Manor", variants: ["lord of the manor"] },
|
|
480
|
+
{ id: "lord_president_of_the_council", type: "style", ctx: "prefix", short: "Lord President of the Council", variants: ["lord president of the council"] },
|
|
481
|
+
{ id: "duke", type: "style", ctx: "prefix", short: "Duke", variants: ["duke"] },
|
|
482
|
+
{ id: "duchess", type: "style", ctx: "prefix", short: "Duchess", variants: ["duchess"] },
|
|
483
|
+
{ id: "earl", type: "style", ctx: "prefix", short: "Earl", variants: ["earl"] },
|
|
484
|
+
{ id: "baron", type: "style", ctx: "prefix", short: "Baron", variants: ["baron"] },
|
|
485
|
+
{ id: "baroness", type: "style", ctx: "prefix", short: "Baroness", variants: ["baroness"] },
|
|
486
|
+
{ id: "count", type: "style", ctx: "prefix", short: "Count", variants: ["count"] },
|
|
487
|
+
{ id: "countess", type: "style", ctx: "prefix", short: "Countess", variants: ["countess"] },
|
|
488
|
+
{ id: "marquess", type: "style", ctx: "prefix", short: "Marquess", variants: ["marquess"] },
|
|
489
|
+
{ id: "marquis", type: "style", ctx: "prefix", short: "Marquis", variants: ["marquis"] },
|
|
490
|
+
{ id: "viscount", type: "style", ctx: "prefix", short: "Viscount", variants: ["viscount"] },
|
|
491
|
+
{ id: "viscountess", type: "style", ctx: "prefix", short: "Viscountess", variants: ["viscountess"] },
|
|
492
|
+
{ id: "visc", type: "style", ctx: "prefix", short: "Visc", long: "Viscount", variants: ["visc"] },
|
|
493
|
+
// Additional nobility/courtesy styles seen in UK-oriented datasets
|
|
494
|
+
{ id: "archduke", type: "style", ctx: "prefix", short: "Archduke", variants: ["archduke"] },
|
|
495
|
+
{ id: "archchancellor", type: "style", ctx: "prefix", short: "Archchancellor", variants: ["archchancellor"] },
|
|
496
|
+
{ id: "baronet", type: "style", ctx: "prefix", short: "Baronet", variants: ["baronet"] },
|
|
497
|
+
{ id: "baron_of_parliament", type: "style", ctx: "prefix", short: "Baron of Parliament", variants: ["baron of parliament"] },
|
|
498
|
+
{ id: "baronial_lord", type: "style", ctx: "prefix", short: "Baronial Lord", variants: ["baronial lord"] },
|
|
499
|
+
{ id: "count_palatine", type: "style", ctx: "prefix", short: "Count Palatine", variants: ["count palatine"] },
|
|
500
|
+
{ id: "countess_of", type: "style", ctx: "prefix", short: "Countess of", variants: ["countess of"] },
|
|
501
|
+
{ id: "dowager_countess", type: "style", ctx: "prefix", short: "Dowager Countess", variants: ["dowager countess"] },
|
|
502
|
+
{ id: "premier_duke", type: "style", ctx: "prefix", short: "Premier Duke", variants: ["premier duke"] },
|
|
503
|
+
{ id: "marchioness", type: "style", ctx: "prefix", short: "Marchioness", variants: ["marchioness"] },
|
|
504
|
+
{ id: "marcher_lord", type: "style", ctx: "prefix", short: "Marcher Lord", variants: ["marcher lord"] },
|
|
505
|
+
{ id: "hereditary_lord", type: "style", ctx: "prefix", short: "Hereditary Lord", variants: ["hereditary lord"] },
|
|
506
|
+
{ id: "high_steward", type: "style", ctx: "prefix", short: "High Steward", variants: ["high steward"] },
|
|
507
|
+
{ id: "keeper_of_the_privy_seal", type: "style", ctx: "prefix", short: "Keeper of the Privy Seal", variants: ["keeper of the privy seal"] },
|
|
508
|
+
{ id: "constable_of_the_tower", type: "style", ctx: "prefix", short: "Constable of the Tower", variants: ["constable of the tower"] },
|
|
509
|
+
{ id: "freeman_of_the_city", type: "style", ctx: "prefix", short: "Freeman of the City", variants: ["freeman of the city"] },
|
|
510
|
+
{ id: "yeoman_warder", type: "style", ctx: "prefix", short: "Yeoman Warder", variants: ["yeoman warder"] },
|
|
511
|
+
{ id: "the_earl_of", type: "style", ctx: "prefix", short: "The Earl of", variants: ["the earl of", "earl of"] },
|
|
512
|
+
// UK parliament/legal courtesy
|
|
513
|
+
{
|
|
514
|
+
id: "the_honourable",
|
|
515
|
+
type: "style",
|
|
516
|
+
ctx: "prefix",
|
|
517
|
+
short: "The Hon.",
|
|
518
|
+
long: "The Honourable",
|
|
519
|
+
variants: ["the hon", "the hon.", "the honourable", "the honorable"]
|
|
520
|
+
},
|
|
521
|
+
{
|
|
522
|
+
id: "the_right_honourable",
|
|
523
|
+
type: "style",
|
|
524
|
+
ctx: "prefix",
|
|
525
|
+
short: "The Rt Hon",
|
|
526
|
+
long: "The Right Honourable",
|
|
527
|
+
variants: ["the rt hon", "the rt. hon.", "the right honourable", "right honourable", "right honorable"]
|
|
528
|
+
},
|
|
529
|
+
{ id: "his_excellency", type: "style", ctx: "prefix", short: "His Excellency", variants: ["his excellency"] },
|
|
530
|
+
{ id: "her_excellency", type: "style", ctx: "prefix", short: "Her Excellency", variants: ["her excellency"] },
|
|
531
|
+
{ id: "he_abbrev", type: "style", ctx: "prefix", short: "HE", long: "His/Her Excellency", variants: ["he", "h.e.", "h.e"] },
|
|
532
|
+
{ id: "hma", type: "style", ctx: "prefix", short: "HMA", long: "His/Her Majesty\u2019s Ambassador", variants: ["hma"] },
|
|
533
|
+
{ id: "kc_prefix", type: "professional", ctx: "prefix", short: "KC", long: "King\u2019s Counsel", variants: ["kc", "king's counsel", "kings counsel"] },
|
|
534
|
+
// Royalty (canonical apostrophe used in some titles)
|
|
535
|
+
{ id: "her_majesty", type: "style", ctx: "prefix", short: "Her Majesty", variants: ["her majesty"] },
|
|
536
|
+
{ id: "his_majesty", type: "style", ctx: "prefix", short: "His Majesty", variants: ["his majesty"] },
|
|
537
|
+
{ id: "her_grace", type: "style", ctx: "prefix", short: "Her Grace", variants: ["her grace"] },
|
|
538
|
+
{ id: "his_grace", type: "style", ctx: "prefix", short: "His Grace", variants: ["his grace"] },
|
|
539
|
+
{ id: "prince", type: "style", ctx: "prefix", short: "Prince", variants: ["prince"] },
|
|
540
|
+
{ id: "princess", type: "style", ctx: "prefix", short: "Princess", variants: ["princess"] },
|
|
541
|
+
{ id: "prince_consort", type: "style", ctx: "prefix", short: "Prince Consort", variants: ["prince consort"] },
|
|
542
|
+
{ id: "princess_royal", type: "style", ctx: "prefix", short: "Princess Royal", variants: ["princess royal"] },
|
|
543
|
+
{
|
|
544
|
+
id: "her_majestys_counsel",
|
|
545
|
+
type: "style",
|
|
546
|
+
ctx: "prefix",
|
|
547
|
+
short: "Her Majesty\u2019s Counsel",
|
|
548
|
+
variants: ["her majesty's counsel", "her majesty\u2019s counsel", "hma counsel"]
|
|
549
|
+
},
|
|
550
|
+
{
|
|
551
|
+
id: "his_majestys_counsel",
|
|
552
|
+
type: "style",
|
|
553
|
+
ctx: "prefix",
|
|
554
|
+
short: "His Majesty\u2019s Counsel",
|
|
555
|
+
variants: ["his majesty's counsel", "his majesty\u2019s counsel", "hma counsel"]
|
|
556
|
+
},
|
|
557
|
+
// ---------------------------------------------------------------------------
|
|
558
|
+
// Civic / diplomatic / political / academic / institutional (English-speaking)
|
|
559
|
+
// ---------------------------------------------------------------------------
|
|
560
|
+
{ id: "alderman", type: "style", ctx: "prefix", short: "Alderman", variants: ["alderman"] },
|
|
561
|
+
{ id: "ambassador", type: "style", ctx: "prefix", short: "Ambassador", variants: ["ambassador"] },
|
|
562
|
+
{ id: "ambassador_at_large", type: "style", ctx: "prefix", short: "Ambassador-at-Large", variants: ["ambassador-at-large", "ambassador at large"] },
|
|
563
|
+
{ id: "consul", type: "style", ctx: "prefix", short: "Consul", variants: ["consul"] },
|
|
564
|
+
{ id: "consul_general", type: "style", ctx: "prefix", short: "Consul General", variants: ["consul general"] },
|
|
565
|
+
{ id: "envoy_extraordinary", type: "style", ctx: "prefix", short: "Envoy Extraordinary", variants: ["envoy extraordinary"] },
|
|
566
|
+
{ id: "deputy", type: "style", ctx: "prefix", short: "Deputy", variants: ["deputy"] },
|
|
567
|
+
{ id: "deputy_high_commissioner", type: "style", ctx: "prefix", short: "Deputy High Commissioner", variants: ["deputy high commissioner"] },
|
|
568
|
+
{ id: "chancellor", type: "style", ctx: "prefix", short: "Chancellor", variants: ["chancellor"] },
|
|
569
|
+
{ id: "vice_chancellor", type: "style", ctx: "prefix", short: "Vice Chancellor", variants: ["vice chancellor"] },
|
|
570
|
+
{ id: "chancellor_of_the_exchequer", type: "style", ctx: "prefix", short: "Chancellor of the Exchequer", variants: ["chancellor of the exchequer"] },
|
|
571
|
+
{ id: "minister", type: "style", ctx: "prefix", short: "Minister", variants: ["minister"] },
|
|
572
|
+
{ id: "minister_of_state", type: "style", ctx: "prefix", short: "Minister of State", variants: ["minister of state"] },
|
|
573
|
+
{ id: "senator", type: "style", ctx: "prefix", short: "Senator", variants: ["senator"] },
|
|
574
|
+
{ id: "chief", type: "style", ctx: "prefix", short: "Chief", variants: ["chief"] },
|
|
575
|
+
{ id: "chief_constable", type: "style", ctx: "prefix", short: "Chief Constable", variants: ["chief constable"] },
|
|
576
|
+
{ id: "speaker_of_the_house", type: "style", ctx: "prefix", short: "Speaker of the House", variants: ["speaker of the house"] },
|
|
577
|
+
{ id: "sheriff", type: "style", ctx: "prefix", short: "Sheriff", variants: ["sheriff"] },
|
|
578
|
+
{ id: "cllr", type: "style", ctx: "prefix", short: "Cllr", long: "Councillor", variants: ["cllr", "councillor", "councilor"] },
|
|
579
|
+
{ id: "churchwarden", type: "style", ctx: "prefix", short: "Churchwarden", variants: ["churchwarden"] },
|
|
580
|
+
{ id: "headmaster", type: "style", ctx: "prefix", short: "Headmaster", variants: ["headmaster"] },
|
|
581
|
+
{ id: "headmistress", type: "style", ctx: "prefix", short: "Headmistress", variants: ["headmistress"] },
|
|
582
|
+
{ id: "dean", type: "style", ctx: "prefix", short: "Dean", variants: ["dean"] },
|
|
583
|
+
{ id: "dean_emeritus", type: "style", ctx: "prefix", short: "Dean Emeritus", variants: ["dean emeritus"] },
|
|
584
|
+
{ id: "fellow", type: "style", ctx: "prefix", short: "Fellow", variants: ["fellow"] },
|
|
585
|
+
{ id: "provost", type: "style", ctx: "prefix", short: "Provost", variants: ["provost"] },
|
|
586
|
+
{ id: "provost_academic", type: "style", ctx: "prefix", short: "Provost (academic)", variants: ["provost (academic)", "provost academic"] },
|
|
587
|
+
{ id: "warden", type: "style", ctx: "prefix", short: "Warden", variants: ["warden"] },
|
|
588
|
+
{ id: "master", type: "style", ctx: "prefix", short: "Master", variants: ["master"] },
|
|
589
|
+
{ id: "master_of_arts", type: "style", ctx: "prefix", short: "Master of Arts", variants: ["master of arts"] },
|
|
590
|
+
{ id: "master_of_the_rolls", type: "style", ctx: "prefix", short: "Master of the Rolls", variants: ["master of the rolls"] },
|
|
591
|
+
{ id: "rector", type: "style", ctx: "prefix", short: "Rector", variants: ["rector"] },
|
|
592
|
+
{ id: "rector_magnificus", type: "style", ctx: "prefix", short: "Rector Magnificus", variants: ["rector magnificus"] },
|
|
593
|
+
// ---------------------------------------------------------------------------
|
|
594
|
+
// Chivalric / orders (UK)
|
|
595
|
+
// ---------------------------------------------------------------------------
|
|
596
|
+
{ id: "knight_bachelor", type: "style", ctx: "prefix", short: "Knight Bachelor", variants: ["knight bachelor"] },
|
|
597
|
+
{ id: "knight_commander", type: "style", ctx: "prefix", short: "Knight Commander", variants: ["knight commander"] },
|
|
598
|
+
{ id: "knight_grand_cross", type: "style", ctx: "prefix", short: "Knight Grand Cross", variants: ["knight grand cross"] },
|
|
599
|
+
{ id: "knight_marshal", type: "style", ctx: "prefix", short: "Knight Marshal", variants: ["knight marshal"] },
|
|
600
|
+
// ---------------------------------------------------------------------------
|
|
601
|
+
// Judicial (UK/IE/US common)
|
|
602
|
+
// ---------------------------------------------------------------------------
|
|
603
|
+
{ id: "judge", type: "judicial", ctx: "prefix", short: "Judge", variants: ["judge"] },
|
|
604
|
+
{ id: "justice", type: "judicial", ctx: "prefix", short: "Justice", variants: ["justice"] },
|
|
605
|
+
{ id: "chief_justice", type: "judicial", ctx: "prefix", short: "Chief Justice", variants: ["chief justice"] },
|
|
606
|
+
{ id: "lord_chief_justice", type: "judicial", ctx: "prefix", short: "Lord Chief Justice", variants: ["lord chief justice"] },
|
|
607
|
+
{ id: "lord_justice", type: "judicial", ctx: "prefix", short: "Lord Justice", variants: ["lord justice"] },
|
|
608
|
+
{ id: "lord_chancellor", type: "judicial", ctx: "prefix", short: "Lord Chancellor", variants: ["lord chancellor"] },
|
|
609
|
+
{ id: "lord_advocate", type: "judicial", ctx: "prefix", short: "Lord Advocate", variants: ["lord advocate"] },
|
|
610
|
+
{ id: "the_learned_judge", type: "judicial", ctx: "prefix", short: "The Learned Judge", variants: ["the learned judge"] },
|
|
611
|
+
// ---------------------------------------------------------------------------
|
|
612
|
+
// Multi-person combined prefixes (common couple/pair honorifics)
|
|
613
|
+
// ---------------------------------------------------------------------------
|
|
614
|
+
// Common paired honorifics (Mr. & Mrs., etc.)
|
|
615
|
+
{ id: "mr_and_mrs", type: "honorific", ctx: "prefix", short: "Mr. & Mrs.", variants: ["mr & mrs", "mr and mrs", "mr. & mrs.", "mr. and mrs.", "mr.&mrs.", "mr&mrs"] },
|
|
616
|
+
{ id: "mr_and_ms", type: "honorific", ctx: "prefix", short: "Mr. & Ms.", variants: ["mr & ms", "mr and ms", "mr. & ms.", "mr. and ms."] },
|
|
617
|
+
{ id: "mr_and_mr", type: "honorific", ctx: "prefix", short: "Mr. & Mr.", variants: ["mr & mr", "mr and mr", "mr. & mr.", "mr. and mr."] },
|
|
618
|
+
{ id: "mrs_and_mrs", type: "honorific", ctx: "prefix", short: "Mrs. & Mrs.", variants: ["mrs & mrs", "mrs and mrs", "mrs. & mrs.", "mrs. and mrs."] },
|
|
619
|
+
{ id: "ms_and_ms", type: "honorific", ctx: "prefix", short: "Ms. & Ms.", variants: ["ms & ms", "ms and ms", "ms. & ms.", "ms. and ms."] },
|
|
620
|
+
{ id: "dr_and_mrs", type: "honorific", ctx: "prefix", short: "Dr. & Mrs.", variants: ["dr & mrs", "dr and mrs", "dr. & mrs.", "dr. and mrs."] },
|
|
621
|
+
{ id: "dr_and_mr", type: "honorific", ctx: "prefix", short: "Dr. & Mr.", variants: ["dr & mr", "dr and mr", "dr. & mr.", "dr. and mr."] },
|
|
622
|
+
{ id: "dr_and_ms", type: "honorific", ctx: "prefix", short: "Dr. & Ms.", variants: ["dr & ms", "dr and ms", "dr. & ms.", "dr. and ms."] },
|
|
623
|
+
{ id: "dr_and_dr", type: "honorific", ctx: "prefix", short: "Dr. & Dr.", variants: ["dr & dr", "dr and dr", "dr. & dr.", "dr. and dr."] },
|
|
624
|
+
// UK/formal paired prefixes
|
|
625
|
+
{ id: "brig_and_mrs", type: "style", ctx: "prefix", short: "Brig & Mrs", variants: ["brig & mrs", "brig and mrs"] },
|
|
626
|
+
{ id: "commander_and_mrs", type: "style", ctx: "prefix", short: "Commander & Mrs", variants: ["commander & mrs", "commander and mrs"] },
|
|
627
|
+
{ id: "lord_and_lady", type: "style", ctx: "prefix", short: "Lord & Lady", variants: ["lord & lady", "lord and lady"] },
|
|
628
|
+
{ id: "prof_and_dr", type: "style", ctx: "prefix", short: "Prof & Dr", variants: ["prof & dr", "prof and dr"] },
|
|
629
|
+
{ id: "prof_and_mrs", type: "style", ctx: "prefix", short: "Prof & Mrs", variants: ["prof & mrs", "prof and mrs"] },
|
|
630
|
+
{ id: "prof_and_rev", type: "style", ctx: "prefix", short: "Prof & Rev", variants: ["prof & rev", "prof and rev"] },
|
|
631
|
+
{ id: "prof_dame", type: "style", ctx: "prefix", short: "Prof Dame", variants: ["prof dame"] },
|
|
632
|
+
{ id: "prof_dr", type: "style", ctx: "prefix", short: "Prof Dr", variants: ["prof dr"] },
|
|
633
|
+
{ id: "rev_and_mrs", type: "style", ctx: "prefix", short: "Rev & Mrs", variants: ["rev & mrs", "rev and mrs"] },
|
|
634
|
+
{ id: "sir_and_lady", type: "style", ctx: "prefix", short: "Sir & Lady", variants: ["sir & lady", "sir and lady"] },
|
|
635
|
+
{ id: "capt_and_mrs", type: "style", ctx: "prefix", short: "Capt. & Mrs.", variants: ["capt & mrs", "capt and mrs", "capt. & mrs.", "capt. and mrs."] },
|
|
636
|
+
{ id: "col_and_mrs", type: "style", ctx: "prefix", short: "Col. & Mrs.", variants: ["col & mrs", "col and mrs", "col. & mrs.", "col. and mrs."] },
|
|
637
|
+
{ id: "gen_and_mrs", type: "style", ctx: "prefix", short: "Gen. & Mrs.", variants: ["gen & mrs", "gen and mrs", "gen. & mrs.", "gen. and mrs."] },
|
|
638
|
+
{ id: "maj_and_mrs", type: "style", ctx: "prefix", short: "Maj. & Mrs.", variants: ["maj & mrs", "maj and mrs", "maj. & mrs.", "maj. and mrs."] },
|
|
639
|
+
// ---------------------------------------------------------------------------
|
|
640
|
+
// European Union — common civil honorifics (local-language)
|
|
641
|
+
// NOTE: canonical forms are local-language display forms. Matching folds diacritics.
|
|
642
|
+
// ---------------------------------------------------------------------------
|
|
643
|
+
// French (FR/BE/LU)
|
|
644
|
+
{ id: "fr_monsieur", type: "honorific", ctx: "prefix", short: "M.", long: "Monsieur", variants: ["m", "m.", "monsieur"] },
|
|
645
|
+
{ id: "fr_madame", type: "honorific", ctx: "prefix", short: "Mme", long: "Madame", variants: ["mme", "mme.", "madame"] },
|
|
646
|
+
{ id: "fr_mademoiselle", type: "honorific", ctx: "prefix", short: "Mlle", long: "Mademoiselle", variants: ["mlle", "mlle.", "mademoiselle"] },
|
|
647
|
+
// German (DE/AT)
|
|
648
|
+
{ id: "de_herr", type: "honorific", ctx: "prefix", short: "Herr", variants: ["herr"] },
|
|
649
|
+
{ id: "de_frau", type: "honorific", ctx: "prefix", short: "Frau", variants: ["frau"] },
|
|
650
|
+
{ id: "de_dr", type: "honorific", ctx: "prefix", short: "Dr.", long: "Doktor", variants: ["dr", "dr.", "doktor"] },
|
|
651
|
+
{ id: "de_prof", type: "honorific", ctx: "prefix", short: "Prof.", long: "Professor", variants: ["prof", "prof.", "professor"] },
|
|
652
|
+
{ id: "de_ing", type: "professional", ctx: "prefix", short: "Ing.", long: "Ingenieur", variants: ["ing", "ing.", "ingenieur", "ingenieurin"] },
|
|
653
|
+
// Spanish (ES)
|
|
654
|
+
{ id: "es_senor", type: "honorific", ctx: "prefix", short: "Sr.", long: "Se\xF1or", variants: ["sr", "sr.", "senor", "se\xF1or"] },
|
|
655
|
+
{ id: "es_senora", type: "honorific", ctx: "prefix", short: "Sra.", long: "Se\xF1ora", variants: ["sra", "sra.", "senora", "se\xF1ora"] },
|
|
656
|
+
{ id: "es_senorita", type: "honorific", ctx: "prefix", short: "Srta.", long: "Se\xF1orita", variants: ["srta", "srta.", "senorita", "se\xF1orita"] },
|
|
657
|
+
{ id: "es_don", type: "style", ctx: "prefix", short: "Don", variants: ["don"] },
|
|
658
|
+
{ id: "es_dona", type: "style", ctx: "prefix", short: "Do\xF1a", variants: ["dona", "do\xF1a"] },
|
|
659
|
+
// Portuguese (PT)
|
|
660
|
+
{ id: "pt_senhor", type: "honorific", ctx: "prefix", short: "Sr.", long: "Senhor", variants: ["sr", "sr.", "senhor"] },
|
|
661
|
+
{ id: "pt_senhora", type: "honorific", ctx: "prefix", short: "Sra.", long: "Senhora", variants: ["sra", "sra.", "senhora"] },
|
|
662
|
+
{ id: "pt_doutor", type: "honorific", ctx: "prefix", short: "Dr.", long: "Doutor", variants: ["dr", "dr.", "doutor", "doutora"] },
|
|
663
|
+
// Italian (IT)
|
|
664
|
+
{ id: "it_signore", type: "honorific", ctx: "prefix", short: "Sig.", long: "Signore", variants: ["sig", "sig.", "signore"] },
|
|
665
|
+
{ id: "it_signora", type: "honorific", ctx: "prefix", short: "Sig.ra", long: "Signora", variants: ["sig.ra", "sigra", "signora"] },
|
|
666
|
+
{ id: "it_signorina", type: "honorific", ctx: "prefix", short: "Sig.na", long: "Signorina", variants: ["sig.na", "signorina"] },
|
|
667
|
+
{ id: "it_dottore", type: "honorific", ctx: "prefix", short: "Dott.", long: "Dottore", variants: ["dott", "dott.", "dottore"] },
|
|
668
|
+
{ id: "it_dottoressa", type: "honorific", ctx: "prefix", short: "Dott.ssa", long: "Dottoressa", variants: ["dott.ssa", "dottsa", "dottoressa"] },
|
|
669
|
+
{ id: "it_professore", type: "honorific", ctx: "prefix", short: "Prof.", long: "Professore", variants: ["prof", "prof.", "professore"] },
|
|
670
|
+
{ id: "it_professoressa", type: "honorific", ctx: "prefix", short: "Prof.ssa", long: "Professoressa", variants: ["prof.ssa", "profssa", "professoressa"] },
|
|
671
|
+
// Dutch (NL/BE)
|
|
672
|
+
{ id: "nl_de_heer", type: "honorific", ctx: "prefix", short: "Dhr.", long: "De heer", variants: ["dhr", "dhr.", "de heer"] },
|
|
673
|
+
{ id: "nl_mevrouw", type: "honorific", ctx: "prefix", short: "Mevr.", long: "Mevrouw", variants: ["mevr", "mevr.", "mevrouw"] },
|
|
674
|
+
{ id: "nl_juffrouw", type: "honorific", ctx: "prefix", short: "Juf.", long: "Juffrouw", variants: ["juf", "juf.", "juffrouw"] },
|
|
675
|
+
// Swedish (SE)
|
|
676
|
+
{ id: "se_herr", type: "honorific", ctx: "prefix", short: "Herr", variants: ["herr"] },
|
|
677
|
+
{ id: "se_fru", type: "honorific", ctx: "prefix", short: "Fru", variants: ["fru"] },
|
|
678
|
+
{ id: "se_fr\xF6ken", type: "honorific", ctx: "prefix", short: "Fr\xF6ken", variants: ["froken", "fr\xF6ken"] },
|
|
679
|
+
// Danish (DK) / Norwegian (NO)
|
|
680
|
+
{ id: "dk_hr", type: "honorific", ctx: "prefix", short: "Hr.", long: "Herr", variants: ["hr", "hr.", "herr"] },
|
|
681
|
+
{ id: "dk_fru", type: "honorific", ctx: "prefix", short: "Fru", variants: ["fru"] },
|
|
682
|
+
{ id: "no_hr", type: "honorific", ctx: "prefix", short: "Hr.", variants: ["hr", "hr."] },
|
|
683
|
+
{ id: "no_fru", type: "honorific", ctx: "prefix", short: "Fru", variants: ["fru"] },
|
|
684
|
+
// Polish (PL)
|
|
685
|
+
{ id: "pl_pan", type: "honorific", ctx: "prefix", short: "Pan", variants: ["pan"] },
|
|
686
|
+
{ id: "pl_pani", type: "honorific", ctx: "prefix", short: "Pani", variants: ["pani"] },
|
|
687
|
+
// Czech (CZ) / Slovak (SK)
|
|
688
|
+
{ id: "cz_pan", type: "honorific", ctx: "prefix", short: "Pan", variants: ["pan"] },
|
|
689
|
+
{ id: "cz_pani", type: "honorific", ctx: "prefix", short: "Pan\xED", variants: ["pani", "pan\xED"] },
|
|
690
|
+
// Greek (GR) — common abbreviations (ASCII-friendly variants included)
|
|
691
|
+
{ id: "gr_kyr", type: "honorific", ctx: "prefix", short: "\u03BA.", long: "\u039A\u03CD\u03C1\u03B9\u03BF\u03C2", variants: ["k", "k.", "\u03BA", "\u03BA.", "\u03BA\u03C5\u03C1\u03B9\u03BF\u03C2", "\u03BA\u03CD\u03C1\u03B9\u03BF\u03C2"] },
|
|
692
|
+
{ id: "gr_kyria", type: "honorific", ctx: "prefix", short: "\u03BA\u03B1.", long: "\u039A\u03C5\u03C1\u03AF\u03B1", variants: ["ka", "ka.", "\u03BA\u03B1", "\u03BA\u03B1.", "\u03BA\u03C5\u03C1\u03B9\u03B1", "\u03BA\u03C5\u03C1\u03AF\u03B1"] }
|
|
693
|
+
];
|
|
694
|
+
var SUFFIX_AFFIX_ENTRIES = [
|
|
695
|
+
{ id: "jr", type: "generational", ctx: "suffix", short: "Jr.", variants: ["jr", "jr."] },
|
|
696
|
+
{ id: "sr", type: "generational", ctx: "suffix", short: "Sr.", variants: ["sr", "sr."] },
|
|
697
|
+
{ id: "ii", type: "dynasticNumber", ctx: "suffix", short: "II", variants: ["ii"] },
|
|
698
|
+
{ id: "iii", type: "dynasticNumber", ctx: "suffix", short: "III", variants: ["iii"] },
|
|
699
|
+
{ id: "iv", type: "dynasticNumber", ctx: "suffix", short: "IV", variants: ["iv"] },
|
|
700
|
+
{ id: "v", type: "dynasticNumber", ctx: "suffix", short: "V", variants: ["v"] },
|
|
701
|
+
// ---------------------------------------------------------------------------
|
|
702
|
+
// English-speaking + broadly EU-used postnominals (degrees, credentials)
|
|
703
|
+
// Canonical punctuation is stored; matching tolerates stripped dots.
|
|
704
|
+
// ---------------------------------------------------------------------------
|
|
705
|
+
// Associate's
|
|
706
|
+
{ id: "aa", type: "education", ctx: "suffix", short: "A.A.", long: "Associate of Arts", variants: ["aa", "a.a."] },
|
|
707
|
+
{ id: "as", type: "education", ctx: "suffix", short: "A.S.", long: "Associate of Science", variants: ["as", "a.s."] },
|
|
708
|
+
{ id: "aas", type: "education", ctx: "suffix", short: "A.A.S.", long: "Associate of Applied Science", variants: ["aas", "a.a.s."] },
|
|
709
|
+
// Bachelor's
|
|
710
|
+
{ id: "ba", type: "education", ctx: "suffix", short: "B.A.", long: "Bachelor of Arts", variants: ["ba", "b.a."] },
|
|
711
|
+
{ id: "bs", type: "education", ctx: "suffix", short: "B.S.", long: "Bachelor of Science", variants: ["bs", "b.s."] },
|
|
712
|
+
{ id: "bba", type: "education", ctx: "suffix", short: "B.B.A.", long: "Bachelor of Business Administration", variants: ["bba", "b.b.a."] },
|
|
713
|
+
// Master's
|
|
714
|
+
{ id: "ma", type: "education", ctx: "suffix", short: "M.A.", long: "Master of Arts", variants: ["ma", "m.a."] },
|
|
715
|
+
{ id: "ms", type: "education", ctx: "suffix", short: "M.S.", long: "Master of Science", variants: ["ms", "m.s."] },
|
|
716
|
+
{ id: "phd", type: "education", ctx: "suffix", short: "Ph.D.", variants: ["phd", "ph.d."] },
|
|
717
|
+
{ id: "dphil", type: "education", ctx: "suffix", short: "D.Phil.", variants: ["dphil", "d.phil."] },
|
|
718
|
+
{ id: "md", type: "education", ctx: "suffix", short: "M.D.", variants: ["md", "m.d."] },
|
|
719
|
+
{ id: "do", type: "education", ctx: "suffix", short: "D.O.", variants: ["do", "d.o."] },
|
|
720
|
+
{ id: "dds", type: "education", ctx: "suffix", short: "D.D.S.", variants: ["dds", "d.d.s."] },
|
|
721
|
+
{ id: "dmd", type: "education", ctx: "suffix", short: "D.M.D.", variants: ["dmd", "d.m.d."] },
|
|
722
|
+
{ id: "dvm", type: "education", ctx: "suffix", short: "D.V.M.", variants: ["dvm", "d.v.m."] },
|
|
723
|
+
{ id: "jd", type: "education", ctx: "suffix", short: "J.D.", variants: ["jd", "j.d."] },
|
|
724
|
+
{ id: "edd", type: "education", ctx: "suffix", short: "Ed.D.", variants: ["edd", "ed.d."] },
|
|
725
|
+
{ id: "pharmd", type: "education", ctx: "suffix", short: "Pharm.D.", variants: ["pharmd", "pharm.d."] },
|
|
726
|
+
{ id: "psyd", type: "education", ctx: "suffix", short: "Psy.D.", variants: ["psyd", "psy.d."] },
|
|
727
|
+
{ id: "dpt", type: "education", ctx: "suffix", short: "D.P.T.", variants: ["dpt", "d.p.t."] },
|
|
728
|
+
{ id: "od", type: "education", ctx: "suffix", short: "O.D.", variants: ["od", "o.d."] },
|
|
729
|
+
{ id: "llb", type: "education", ctx: "suffix", short: "LL.B.", variants: ["llb", "ll.b."] },
|
|
730
|
+
{ id: "llm", type: "education", ctx: "suffix", short: "LL.M.", variants: ["llm", "ll.m."] },
|
|
731
|
+
{ id: "mba", type: "education", ctx: "suffix", short: "M.B.A.", variants: ["mba", "m.b.a."] },
|
|
732
|
+
{ id: "med", type: "education", ctx: "suffix", short: "M.Ed.", variants: ["med", "m.ed."] },
|
|
733
|
+
{ id: "mat", type: "education", ctx: "suffix", short: "M.A.T.", variants: ["mat", "m.a.t."] },
|
|
734
|
+
{ id: "msc", type: "education", ctx: "suffix", short: "M.Sc.", variants: ["msc", "m.sc."] },
|
|
735
|
+
{ id: "bsc", type: "education", ctx: "suffix", short: "B.Sc.", variants: ["bsc", "b.sc."] },
|
|
736
|
+
{ id: "mpa", type: "education", ctx: "suffix", short: "M.P.A.", variants: ["mpa", "m.p.a."] },
|
|
737
|
+
{ id: "msw", type: "education", ctx: "suffix", short: "M.S.W.", variants: ["msw", "m.s.w."] },
|
|
738
|
+
{ id: "meng", type: "education", ctx: "suffix", short: "M.Eng.", variants: ["meng", "m.eng."] },
|
|
739
|
+
{ id: "beng", type: "education", ctx: "suffix", short: "B.Eng.", variants: ["beng", "b.eng."] },
|
|
740
|
+
// Nursing/medical
|
|
741
|
+
{ id: "rn", type: "professional", ctx: "suffix", short: "RN", variants: ["rn"] },
|
|
742
|
+
{ id: "np", type: "professional", ctx: "suffix", short: "NP", variants: ["np"] },
|
|
743
|
+
{ id: "pa_c", type: "professional", ctx: "suffix", short: "PA-C", variants: ["pa-c", "pac", "pa c"] },
|
|
744
|
+
// Accounting/finance
|
|
745
|
+
{ id: "cpa", type: "professional", ctx: "suffix", short: "CPA", variants: ["cpa"] },
|
|
746
|
+
{ id: "cfa", type: "professional", ctx: "suffix", short: "CFA", variants: ["cfa"] },
|
|
747
|
+
// Legal
|
|
748
|
+
{ id: "esq", type: "professional", ctx: "suffix", short: "Esq.", variants: ["esq", "esq."] },
|
|
749
|
+
{ id: "kc", type: "professional", ctx: "suffix", short: "KC", variants: ["kc"] },
|
|
750
|
+
{ id: "qc", type: "professional", ctx: "suffix", short: "QC", variants: ["qc"] },
|
|
751
|
+
// ---------------------------------------------------------------------------
|
|
752
|
+
// UK/IE honours (postnominals) — stored canonically
|
|
753
|
+
// ---------------------------------------------------------------------------
|
|
754
|
+
{ id: "obe", type: "postnominalHonor", ctx: "suffix", short: "OBE", variants: ["obe"] },
|
|
755
|
+
{ id: "mbe", type: "postnominalHonor", ctx: "suffix", short: "MBE", variants: ["mbe"] },
|
|
756
|
+
{ id: "cbe", type: "postnominalHonor", ctx: "suffix", short: "CBE", variants: ["cbe"] },
|
|
757
|
+
{ id: "kbe", type: "postnominalHonor", ctx: "suffix", short: "KBE", variants: ["kbe"] },
|
|
758
|
+
{ id: "dbe", type: "postnominalHonor", ctx: "suffix", short: "DBE", variants: ["dbe"] },
|
|
759
|
+
{ id: "cmg", type: "postnominalHonor", ctx: "suffix", short: "CMG", variants: ["cmg"] },
|
|
760
|
+
{ id: "cvo", type: "postnominalHonor", ctx: "suffix", short: "CVO", variants: ["cvo"] },
|
|
761
|
+
{ id: "mvo", type: "postnominalHonor", ctx: "suffix", short: "MVO", variants: ["mvo"] }
|
|
762
|
+
];
|
|
763
|
+
|
|
764
|
+
// src/affixes.ts
|
|
765
|
+
var ROMAN_NUMERALS = /* @__PURE__ */ new Set(["II", "III", "IV", "V", "VI", "VII", "VIII", "IX", "X"]);
|
|
766
|
+
var PREFIX_INDEX = buildAffixVariantIndex(PREFIX_AFFIX_ENTRIES, "prefix");
|
|
767
|
+
var SUFFIX_INDEX = buildAffixVariantIndex(SUFFIX_AFFIX_ENTRIES, "suffix");
|
|
768
|
+
var HONORIFIC = /* @__PURE__ */ new Set(["MR", "MRS", "MS", "MISS", "MX", "DR", "PROF", "SIR", "DAME"]);
|
|
769
|
+
var STYLE_PHRASES = /* @__PURE__ */ new Set([
|
|
770
|
+
"THE HON",
|
|
771
|
+
"THE HONOURABLE",
|
|
772
|
+
"THE RIGHT HONOURABLE",
|
|
773
|
+
"RIGHT HONOURABLE",
|
|
774
|
+
"THE RT HON",
|
|
775
|
+
"HIS EXCELLENCY",
|
|
776
|
+
"HER EXCELLENCY"
|
|
777
|
+
]);
|
|
778
|
+
var NOBILITY_AND_ROYALTY = /* @__PURE__ */ new Set([
|
|
779
|
+
"HER MAJESTY",
|
|
780
|
+
"HIS MAJESTY",
|
|
781
|
+
"HER GRACE",
|
|
782
|
+
"HIS GRACE",
|
|
783
|
+
"PRINCE",
|
|
784
|
+
"PRINCESS",
|
|
785
|
+
"DUKE",
|
|
786
|
+
"DUCHESS",
|
|
787
|
+
"EARL",
|
|
788
|
+
"LORD",
|
|
789
|
+
"LADY",
|
|
790
|
+
"BARON",
|
|
791
|
+
"BARONESS",
|
|
792
|
+
"COUNT",
|
|
793
|
+
"COUNTESS",
|
|
794
|
+
"MARQUESS",
|
|
795
|
+
"MARQUIS",
|
|
796
|
+
"VISCOUNT",
|
|
797
|
+
"VISCOUNTESS",
|
|
798
|
+
"VISC"
|
|
799
|
+
// common abbreviation used in "The Rt Hon Visc"
|
|
800
|
+
]);
|
|
801
|
+
var RELIGIOUS = /* @__PURE__ */ new Set(["REV", "REVEREND", "FR", "FATHER", "RABBI", "IMAM", "PASTOR", "SISTER", "SR", "BR", "BROTHER"]);
|
|
802
|
+
var MILITARY = /* @__PURE__ */ new Set(["PVT", "CPL", "SGT", "LT", "CPT", "CAPT", "MAJ", "COL", "GEN", "ADM"]);
|
|
803
|
+
var JUDICIAL = /* @__PURE__ */ new Set(["JUDGE", "JUSTICE"]);
|
|
804
|
+
var PROFESSIONAL = /* @__PURE__ */ new Set(["ESQ", "CPA", "CFA", "PE", "RN", "DDS"]);
|
|
805
|
+
var EDUCATION = /* @__PURE__ */ new Set(["PHD", "MD", "JD", "MBA", "MS", "MA", "BS", "BA", "DVM"]);
|
|
806
|
+
var POSTNOMINAL_HONOR = /* @__PURE__ */ new Set(["OBE", "MBE", "CBE", "KBE", "DBE"]);
|
|
807
|
+
var SPLITTABLE_WORDS = /* @__PURE__ */ new Set([
|
|
808
|
+
...HONORIFIC,
|
|
809
|
+
...NOBILITY_AND_ROYALTY,
|
|
810
|
+
...RELIGIOUS,
|
|
811
|
+
...MILITARY,
|
|
812
|
+
...JUDICIAL,
|
|
813
|
+
...PROFESSIONAL,
|
|
814
|
+
...EDUCATION,
|
|
815
|
+
...POSTNOMINAL_HONOR,
|
|
816
|
+
"JR",
|
|
817
|
+
"SR",
|
|
818
|
+
...ROMAN_NUMERALS,
|
|
819
|
+
"HON"
|
|
820
|
+
// allow splitting "The Hon Dr" once style phrase is handled
|
|
821
|
+
]);
|
|
822
|
+
for (const entry of [...PREFIX_AFFIX_ENTRIES, ...SUFFIX_AFFIX_ENTRIES]) {
|
|
823
|
+
const candidates = [];
|
|
824
|
+
if (entry.short) candidates.push(entry.short);
|
|
825
|
+
if (entry.long) candidates.push(entry.long);
|
|
826
|
+
if (entry.variants) candidates.push(...entry.variants);
|
|
827
|
+
for (const c of candidates) {
|
|
828
|
+
const k = normalizeAffixVariantForMatch(c);
|
|
829
|
+
if (k && !k.includes(" ")) SPLITTABLE_WORDS.add(k);
|
|
830
|
+
}
|
|
831
|
+
}
|
|
832
|
+
var MULTIWORD_PREFIX_PHRASES = (() => {
|
|
833
|
+
const phrases = [];
|
|
834
|
+
const add = (s) => {
|
|
835
|
+
const k = normalizeAffixVariantForMatch(s);
|
|
836
|
+
if (!k || !k.includes(" ")) return;
|
|
837
|
+
const words = k.split(" ").filter(Boolean);
|
|
838
|
+
if (words.length >= 2) phrases.push({ words, len: words.length });
|
|
839
|
+
};
|
|
840
|
+
for (const entry of PREFIX_AFFIX_ENTRIES) {
|
|
841
|
+
if (entry.short) add(entry.short);
|
|
842
|
+
if (entry.long) add(entry.long);
|
|
843
|
+
if (entry.variants) entry.variants.forEach(add);
|
|
844
|
+
}
|
|
845
|
+
phrases.sort((a, b) => b.len - a.len);
|
|
846
|
+
const seen = /* @__PURE__ */ new Set();
|
|
847
|
+
return phrases.filter((p) => {
|
|
848
|
+
const key = p.words.join(" ");
|
|
849
|
+
if (seen.has(key)) return false;
|
|
850
|
+
seen.add(key);
|
|
851
|
+
return true;
|
|
852
|
+
});
|
|
853
|
+
})();
|
|
854
|
+
function collapseSpaces(value) {
|
|
855
|
+
return value.trim().replace(/\s+/g, " ");
|
|
856
|
+
}
|
|
857
|
+
function stripEdgePunctuation(value) {
|
|
858
|
+
return value.trim().replace(/^[,;:\s]+/, "").replace(/[,;:\s]+$/, "");
|
|
859
|
+
}
|
|
860
|
+
function normalizeAffix(value) {
|
|
861
|
+
const raw = collapseSpaces(stripEdgePunctuation(value));
|
|
862
|
+
const normalized = raw.replace(/^[.]+/, "").replace(/[.]+$/, "").replace(/\s+/g, " ").replace(/[\u2019\u2018\u02BC]/g, "'").normalize("NFKD").replace(/[\u0300-\u036f]/g, "").toUpperCase();
|
|
863
|
+
const normalizedKey = normalized.replace(/\./g, "").replace(/\s+/g, " ").trim();
|
|
864
|
+
return { normalized, normalizedKey };
|
|
865
|
+
}
|
|
866
|
+
function looksAbbreviated(value, normalizedKey) {
|
|
867
|
+
if (/[.]/.test(value)) return true;
|
|
868
|
+
if (normalizedKey.includes(" ")) return false;
|
|
869
|
+
return /^[A-Z]{2,5}$/.test(normalizedKey);
|
|
870
|
+
}
|
|
871
|
+
function classifyType(normalizedKey, ctx) {
|
|
872
|
+
if (ROMAN_NUMERALS.has(normalizedKey) && ctx === "suffix") return "dynasticNumber";
|
|
873
|
+
if (/^(JR|SR)$/.test(normalizedKey)) return "generational";
|
|
874
|
+
if (NOBILITY_AND_ROYALTY.has(normalizedKey)) return "style";
|
|
875
|
+
if (EDUCATION.has(normalizedKey)) return "education";
|
|
876
|
+
if (PROFESSIONAL.has(normalizedKey)) return "professional";
|
|
877
|
+
if (POSTNOMINAL_HONOR.has(normalizedKey)) return "postnominalHonor";
|
|
878
|
+
if (MILITARY.has(normalizedKey)) return "military";
|
|
879
|
+
if (JUDICIAL.has(normalizedKey)) return "judicial";
|
|
880
|
+
if (normalizedKey === "SR" && ctx === "prefix") return "religious";
|
|
881
|
+
if (RELIGIOUS.has(normalizedKey)) return "religious";
|
|
882
|
+
if (HONORIFIC.has(normalizedKey)) return "honorific";
|
|
883
|
+
if (STYLE_PHRASES.has(normalizedKey)) return "style";
|
|
884
|
+
if (ctx === "prefix" && normalizedKey.includes(" ")) {
|
|
885
|
+
const k = normalizedKey;
|
|
886
|
+
if (k.includes("EXCELLENCY") || k.includes("HONOURABLE") || k.includes("HON")) return "style";
|
|
887
|
+
if (k.includes("JUDGE") || k.includes("JUSTICE")) return "judicial";
|
|
888
|
+
if (k.includes("RABBI") || k.includes("IMAM") || k.includes("REVEREND") || k.includes("SISTER") || k.includes("BROTHER") || k.includes("FATHER")) return "religious";
|
|
889
|
+
if (k.includes("ADMIRAL") || k.includes("MARSHAL") || k.includes("GENERAL") || k.includes("COLONEL") || k.includes("CAPTAIN") || k.includes("LIEUTENANT") || k.includes("SERGEANT")) {
|
|
890
|
+
return "military";
|
|
891
|
+
}
|
|
892
|
+
}
|
|
893
|
+
if (ctx === "suffix" && normalizedKey.includes(" ")) {
|
|
894
|
+
const k = normalizedKey;
|
|
895
|
+
if (k.includes("PHD") || k.includes("MBA") || k.includes("MD") || k.includes("JD")) return "education";
|
|
896
|
+
if (k.includes("ESQ") || k.includes("CPA") || k.includes("RN") || k.includes("PE")) return "professional";
|
|
897
|
+
}
|
|
898
|
+
return "other";
|
|
899
|
+
}
|
|
900
|
+
function classifyAffixToken(value, ctx) {
|
|
901
|
+
const v = collapseSpaces(stripEdgePunctuation(value));
|
|
902
|
+
const { normalizedKey } = normalizeAffix(v);
|
|
903
|
+
const entry = (ctx === "prefix" ? PREFIX_INDEX : SUFFIX_INDEX).get(normalizedKey);
|
|
904
|
+
const type = entry ? entry.type : classifyType(normalizedKey, ctx);
|
|
905
|
+
const isAbbrev = looksAbbreviated(v, normalizedKey);
|
|
906
|
+
const requiresCommaBefore = ctx === "suffix" && (type === "generational" || type === "professional" || type === "education" || type === "postnominalHonor" || normalizedKey === "ESQ");
|
|
907
|
+
return {
|
|
908
|
+
type,
|
|
909
|
+
value: v,
|
|
910
|
+
normalized: normalizedKey,
|
|
911
|
+
entryId: entry?.id,
|
|
912
|
+
canonicalShort: entry?.short,
|
|
913
|
+
canonicalLong: entry?.long,
|
|
914
|
+
isAbbrev: isAbbrev || void 0,
|
|
915
|
+
requiresCommaBefore: requiresCommaBefore || void 0
|
|
916
|
+
};
|
|
917
|
+
}
|
|
918
|
+
function matchKnownPrefixPhraseAt(words, startIdx) {
|
|
919
|
+
const remaining = words.slice(startIdx);
|
|
920
|
+
for (const p of MULTIWORD_PREFIX_PHRASES) {
|
|
921
|
+
if (remaining.length < p.len) continue;
|
|
922
|
+
const slice = remaining.slice(0, p.len).join(" ");
|
|
923
|
+
if (slice === p.words.join(" ")) return p.len;
|
|
924
|
+
}
|
|
925
|
+
return 0;
|
|
926
|
+
}
|
|
927
|
+
function matchStylePhraseAt(words, startIdx) {
|
|
928
|
+
const remaining = words.slice(startIdx);
|
|
929
|
+
const candidates = [
|
|
930
|
+
{ phrase: ["THE", "RIGHT", "HONOURABLE"], len: 3 },
|
|
931
|
+
{ phrase: ["RIGHT", "HONOURABLE"], len: 2 },
|
|
932
|
+
{ phrase: ["THE", "HONOURABLE"], len: 2 },
|
|
933
|
+
{ phrase: ["THE", "RT", "HON"], len: 3 },
|
|
934
|
+
{ phrase: ["THE", "HON"], len: 2 },
|
|
935
|
+
{ phrase: ["HIS", "EXCELLENCY"], len: 2 },
|
|
936
|
+
{ phrase: ["HER", "EXCELLENCY"], len: 2 },
|
|
937
|
+
{ phrase: ["HIS", "MAJESTY"], len: 2 },
|
|
938
|
+
{ phrase: ["HER", "MAJESTY"], len: 2 },
|
|
939
|
+
{ phrase: ["HIS", "GRACE"], len: 2 },
|
|
940
|
+
{ phrase: ["HER", "GRACE"], len: 2 }
|
|
941
|
+
];
|
|
942
|
+
for (const c of candidates) {
|
|
943
|
+
if (remaining.length < c.len) continue;
|
|
944
|
+
const slice = remaining.slice(0, c.len).join(" ");
|
|
945
|
+
if (slice === c.phrase.join(" ")) return c.len;
|
|
946
|
+
}
|
|
947
|
+
return 0;
|
|
948
|
+
}
|
|
949
|
+
function splitAffixToAtomicParts(value, ctx) {
|
|
950
|
+
const raw = collapseSpaces(value);
|
|
951
|
+
if (!raw) return [];
|
|
952
|
+
const delimiterSplit = raw.split(/[;,/]+/g).map((s) => s.trim()).filter(Boolean).flatMap((chunk) => {
|
|
953
|
+
if (ctx === "suffix") {
|
|
954
|
+
return chunk.split(/\band\b/gi).map((s) => s.trim()).filter(Boolean);
|
|
955
|
+
}
|
|
956
|
+
return [chunk];
|
|
957
|
+
});
|
|
958
|
+
const out = [];
|
|
959
|
+
for (const chunk of delimiterSplit) {
|
|
960
|
+
const words = chunk.split(/\s+/).filter(Boolean);
|
|
961
|
+
if (words.length <= 1) {
|
|
962
|
+
out.push(chunk);
|
|
963
|
+
continue;
|
|
964
|
+
}
|
|
965
|
+
const normalizedWords = words.map((w) => normalizeAffix(w).normalizedKey.replace(/\s+/g, " "));
|
|
966
|
+
const allSplittable = normalizedWords.every((w) => SPLITTABLE_WORDS.has(w));
|
|
967
|
+
if (allSplittable) {
|
|
968
|
+
out.push(...words);
|
|
969
|
+
continue;
|
|
970
|
+
}
|
|
971
|
+
let i = 0;
|
|
972
|
+
while (i < words.length) {
|
|
973
|
+
if (ctx === "prefix") {
|
|
974
|
+
const knownLen = matchKnownPrefixPhraseAt(normalizedWords, i);
|
|
975
|
+
if (knownLen > 0) {
|
|
976
|
+
out.push(words.slice(i, i + knownLen).join(" "));
|
|
977
|
+
i += knownLen;
|
|
978
|
+
continue;
|
|
979
|
+
}
|
|
980
|
+
}
|
|
981
|
+
const styleLen = matchStylePhraseAt(normalizedWords, i);
|
|
982
|
+
if (styleLen > 0) {
|
|
983
|
+
out.push(words.slice(i, i + styleLen).join(" "));
|
|
984
|
+
i += styleLen;
|
|
985
|
+
continue;
|
|
986
|
+
}
|
|
987
|
+
if (SPLITTABLE_WORDS.has(normalizedWords[i])) {
|
|
988
|
+
out.push(words[i]);
|
|
989
|
+
i += 1;
|
|
990
|
+
continue;
|
|
991
|
+
}
|
|
992
|
+
const nextSplittableIdx = normalizedWords.findIndex((w, idx) => idx > i && SPLITTABLE_WORDS.has(w));
|
|
993
|
+
if (nextSplittableIdx > i) {
|
|
994
|
+
out.push(words.slice(i, nextSplittableIdx).join(" "));
|
|
995
|
+
i = nextSplittableIdx;
|
|
996
|
+
continue;
|
|
997
|
+
}
|
|
998
|
+
out.push(words.slice(i).join(" "));
|
|
999
|
+
break;
|
|
1000
|
+
}
|
|
1001
|
+
}
|
|
1002
|
+
return out.map(stripEdgePunctuation).map(collapseSpaces).filter(Boolean);
|
|
1003
|
+
}
|
|
1004
|
+
function buildAffixTokens(displayValue, ctx) {
|
|
1005
|
+
if (!displayValue) return void 0;
|
|
1006
|
+
const parts = splitAffixToAtomicParts(displayValue, ctx);
|
|
1007
|
+
if (parts.length === 0) return void 0;
|
|
1008
|
+
return parts.map((p) => classifyAffixToken(p, ctx));
|
|
1009
|
+
}
|
|
1010
|
+
|
|
1011
|
+
// src/normalize.ts
|
|
1012
|
+
function normalizeInput(raw) {
|
|
1013
|
+
if (!raw) return "";
|
|
1014
|
+
let s = raw.trim();
|
|
1015
|
+
s = s.replace(/\s+/g, " ");
|
|
1016
|
+
s = s.replace(/[""]/g, '"');
|
|
1017
|
+
s = s.replace(/['']/g, "'");
|
|
1018
|
+
s = s.replace(/\s*&\s*/g, " & ");
|
|
1019
|
+
s = s.replace(/\s*\+\s*/g, " + ");
|
|
1020
|
+
s = s.replace(/[,\s]+$/g, "");
|
|
1021
|
+
return s;
|
|
1022
|
+
}
|
|
1023
|
+
function tokenize(text) {
|
|
1024
|
+
return text.split(/\s+/).filter(Boolean);
|
|
1025
|
+
}
|
|
1026
|
+
function isNameLikeToken(token) {
|
|
1027
|
+
if (/^([A-Z]\.?)+$/.test(token)) return true;
|
|
1028
|
+
return /^[A-Z][a-zA-Z]*(?:['-][A-zA-Z]+)*$/.test(token);
|
|
1029
|
+
}
|
|
1030
|
+
function extractParenContent(text) {
|
|
1031
|
+
const match = text.match(/\s*\(([^)]+)\)\s*/);
|
|
1032
|
+
if (match) {
|
|
1033
|
+
return {
|
|
1034
|
+
main: text.replace(match[0], " ").replace(/\s+/g, " ").trim(),
|
|
1035
|
+
paren: match[1].trim()
|
|
1036
|
+
};
|
|
1037
|
+
}
|
|
1038
|
+
return null;
|
|
1039
|
+
}
|
|
1040
|
+
function isAllCaps(text) {
|
|
1041
|
+
const letters = text.replace(/[^a-zA-Z]/g, "");
|
|
1042
|
+
return letters.length > 0 && letters === letters.toUpperCase();
|
|
1043
|
+
}
|
|
1044
|
+
function hasAtSymbol(text) {
|
|
1045
|
+
return text.includes("@");
|
|
1046
|
+
}
|
|
1047
|
+
function extractAngleBrackets(text) {
|
|
1048
|
+
const match = text.match(/^(.*?)\s*<([^>]+)>\s*$/);
|
|
1049
|
+
if (match) {
|
|
1050
|
+
return { display: match[1].trim(), bracket: match[2].trim() };
|
|
1051
|
+
}
|
|
1052
|
+
return null;
|
|
1053
|
+
}
|
|
1054
|
+
function startsWithThe(text) {
|
|
1055
|
+
return /^the\s+/i.test(text);
|
|
1056
|
+
}
|
|
1057
|
+
function stripLeadingThe(text) {
|
|
1058
|
+
return text.replace(/^the\s+/i, "");
|
|
1059
|
+
}
|
|
1060
|
+
function hasPluralSurnameEnding(text) {
|
|
1061
|
+
return /\b[A-Z][a-z]+(s|es)\s*$/i.test(text);
|
|
1062
|
+
}
|
|
1063
|
+
|
|
1064
|
+
// src/data/legal-forms.ts
|
|
1065
|
+
var LEGAL_FORM_ENTRIES = [
|
|
1066
|
+
// US corporate forms (strong)
|
|
1067
|
+
{ id: "Inc", patterns: ["INC", "INCORPORATED"], strong: true },
|
|
1068
|
+
{ id: "Corp", patterns: ["CORP", "CORPORATION"], strong: true },
|
|
1069
|
+
{ id: "LLC", patterns: ["LLC", "L L C", "L.L.C."], strong: true },
|
|
1070
|
+
{ id: "LLP", patterns: ["LLP", "L L P", "L.L.P."], strong: true },
|
|
1071
|
+
{ id: "LP", patterns: ["LP", "L P", "L.P."], strong: true },
|
|
1072
|
+
// UK/Commonwealth forms (strong)
|
|
1073
|
+
{ id: "Ltd", patterns: ["LTD", "LIMITED"], strong: true },
|
|
1074
|
+
{ id: "PLC", patterns: ["PLC", "P L C", "P.L.C."], strong: true },
|
|
1075
|
+
// European forms (strong)
|
|
1076
|
+
{ id: "GmbH", patterns: ["GMBH", "G M B H"], strong: true },
|
|
1077
|
+
{ id: "AG", patterns: ["AG", "A G"], strong: true },
|
|
1078
|
+
{ id: "SA", patterns: ["SA", "S A", "S.A."], strong: true },
|
|
1079
|
+
{ id: "SAS", patterns: ["SAS", "S A S"], strong: true },
|
|
1080
|
+
{ id: "BV", patterns: ["BV", "B V", "B.V."], strong: true },
|
|
1081
|
+
{ id: "Oy", patterns: ["OY"], strong: true },
|
|
1082
|
+
{ id: "SRL", patterns: ["SRL", "S R L"], strong: true },
|
|
1083
|
+
{ id: "SpA", patterns: ["SPA", "S P A"], strong: true },
|
|
1084
|
+
// Institutional forms (strong)
|
|
1085
|
+
{ id: "Trust", patterns: ["TRUST"], strong: true },
|
|
1086
|
+
{ id: "Foundation", patterns: ["FOUNDATION"], strong: true },
|
|
1087
|
+
// Weaker signals (need context)
|
|
1088
|
+
{ id: "Company", patterns: ["COMPANY"], strong: false },
|
|
1089
|
+
{ id: "Co", patterns: ["CO"], strong: false }
|
|
1090
|
+
];
|
|
1091
|
+
function buildLegalFormIndex() {
|
|
1092
|
+
const map = /* @__PURE__ */ new Map();
|
|
1093
|
+
for (const entry of LEGAL_FORM_ENTRIES) {
|
|
1094
|
+
for (const pattern of entry.patterns) {
|
|
1095
|
+
const key = normalizeForMatch(pattern);
|
|
1096
|
+
if (!map.has(key)) {
|
|
1097
|
+
map.set(key, entry);
|
|
1098
|
+
}
|
|
1099
|
+
}
|
|
1100
|
+
}
|
|
1101
|
+
return map;
|
|
1102
|
+
}
|
|
1103
|
+
function normalizeForMatch(value) {
|
|
1104
|
+
return value.toUpperCase().replace(/\./g, "").replace(/\s+/g, " ").trim();
|
|
1105
|
+
}
|
|
1106
|
+
var LEGAL_FORM_INDEX = buildLegalFormIndex();
|
|
1107
|
+
function matchLegalForm(token) {
|
|
1108
|
+
const normalized = normalizeForMatch(token);
|
|
1109
|
+
return LEGAL_FORM_INDEX.get(normalized);
|
|
1110
|
+
}
|
|
1111
|
+
var LEGAL_SUFFIX_END_RE = new RegExp(
|
|
1112
|
+
"(?:^|[\\s,])(inc\\.?|incorporated|corp\\.?|corporation|llc|l\\.l\\.c\\.|llp|l\\.l\\.p\\.|lp|l\\.p\\.|ltd\\.?|limited|plc|p\\.l\\.c\\.|gmbh|ag|s\\.?a\\.?|sas|bv|b\\.v\\.|oy|srl|spa|trust|foundation)\\.?$",
|
|
1113
|
+
"i"
|
|
1114
|
+
);
|
|
1115
|
+
var COMMA_LEGAL_RE = /,\s*(inc\.?|llc|l\.l\.c\.|corp\.?|ltd\.?|plc|gmbh|s\.a\.)\.?$/i;
|
|
1116
|
+
function extractLegalSuffix(text) {
|
|
1117
|
+
const commaMatch = text.match(COMMA_LEGAL_RE);
|
|
1118
|
+
if (commaMatch) {
|
|
1119
|
+
const suffix = commaMatch[1];
|
|
1120
|
+
const baseName = text.slice(0, commaMatch.index).trim();
|
|
1121
|
+
const entry = matchLegalForm(suffix);
|
|
1122
|
+
return {
|
|
1123
|
+
baseName,
|
|
1124
|
+
suffix: commaMatch[0].trim(),
|
|
1125
|
+
legalForm: entry?.id ?? "UnknownLegalForm"
|
|
1126
|
+
};
|
|
1127
|
+
}
|
|
1128
|
+
const endMatch = text.match(LEGAL_SUFFIX_END_RE);
|
|
1129
|
+
if (endMatch) {
|
|
1130
|
+
const suffix = endMatch[1];
|
|
1131
|
+
const fullMatch = endMatch[0];
|
|
1132
|
+
const baseName = text.slice(0, text.length - fullMatch.length).trim();
|
|
1133
|
+
const entry = matchLegalForm(suffix);
|
|
1134
|
+
return {
|
|
1135
|
+
baseName: baseName || text.replace(new RegExp(suffix + "\\.?$", "i"), "").trim(),
|
|
1136
|
+
suffix,
|
|
1137
|
+
legalForm: entry?.id ?? "UnknownLegalForm"
|
|
1138
|
+
};
|
|
1139
|
+
}
|
|
1140
|
+
return null;
|
|
1141
|
+
}
|
|
1142
|
+
|
|
1143
|
+
// src/data/institutions.ts
|
|
1144
|
+
var INSTITUTION_PHRASES = [
|
|
1145
|
+
// Banking/Financial (strong)
|
|
1146
|
+
{ pattern: /\bbank\s+of\b/i, legalForm: "Bank", strong: true },
|
|
1147
|
+
{ pattern: /\bfirst\s+national\s+bank\b/i, legalForm: "Bank", strong: true },
|
|
1148
|
+
{ pattern: /\btrust\s+company\b/i, legalForm: "TrustCompany", strong: true },
|
|
1149
|
+
{ pattern: /\bcredit\s+union\b/i, legalForm: "CreditUnion", strong: true },
|
|
1150
|
+
{ pattern: /\bsavings\s+(?:and\s+)?loan\b/i, legalForm: "Bank", strong: true },
|
|
1151
|
+
{ pattern: /\b(?:national|federal)\s+bank\b/i, legalForm: "Bank", strong: true },
|
|
1152
|
+
// Educational (strong)
|
|
1153
|
+
{ pattern: /\buniversity\s+of\b/i, legalForm: "University", strong: true },
|
|
1154
|
+
{ pattern: /\buniversity$/i, legalForm: "University", strong: true },
|
|
1155
|
+
{ pattern: /\bcollege\s+of\b/i, legalForm: "University", strong: true },
|
|
1156
|
+
{ pattern: /\binstitute\s+of\b/i, legalForm: "University", strong: true },
|
|
1157
|
+
// Healthcare (strong)
|
|
1158
|
+
{ pattern: /\bhospital\b/i, legalForm: "Hospital", strong: true },
|
|
1159
|
+
{ pattern: /\bmedical\s+center\b/i, legalForm: "Hospital", strong: true },
|
|
1160
|
+
{ pattern: /\bclinic\b/i, legalForm: "Hospital", strong: false },
|
|
1161
|
+
// Religious (strong)
|
|
1162
|
+
{ pattern: /\bchurch\s+of\b/i, legalForm: "Church", strong: true },
|
|
1163
|
+
{ pattern: /\bchurch$/i, legalForm: "Church", strong: true },
|
|
1164
|
+
{ pattern: /\bministry\b/i, legalForm: "Church", strong: true },
|
|
1165
|
+
{ pattern: /\bsynagogue\b/i, legalForm: "Church", strong: true },
|
|
1166
|
+
{ pattern: /\bmosque\b/i, legalForm: "Church", strong: true },
|
|
1167
|
+
{ pattern: /\btemple\b/i, legalForm: "Church", strong: false },
|
|
1168
|
+
// Government (strong)
|
|
1169
|
+
{ pattern: /\bcity\s+of\b/i, legalForm: "Government", strong: true },
|
|
1170
|
+
{ pattern: /\bcounty\s+of\b/i, legalForm: "Government", strong: true },
|
|
1171
|
+
{ pattern: /\bstate\s+of\b/i, legalForm: "Government", strong: true },
|
|
1172
|
+
{ pattern: /\bdepartment\s+of\b/i, legalForm: "Government", strong: true },
|
|
1173
|
+
{ pattern: /\bgovernment\s+of\b/i, legalForm: "Government", strong: true },
|
|
1174
|
+
{ pattern: /\boffice\s+of\b/i, legalForm: "Government", strong: false }
|
|
1175
|
+
];
|
|
1176
|
+
var ORG_WEAK_KEYWORDS_RE = /\b(bank|trust|holdings|partners|group|company|co\.|associates|enterprises|services|solutions|consulting)\b/i;
|
|
1177
|
+
var DBA_RE = /\b(d\/b\/a|doing\s+business\s+as|dba|aka|a\/k\/a)\b/i;
|
|
1178
|
+
var CARE_OF_RE = /\b(c\/o|care\s+of|attn:?|attention:?)\b/i;
|
|
1179
|
+
function matchInstitutionPhrase(text) {
|
|
1180
|
+
for (const phrase of INSTITUTION_PHRASES) {
|
|
1181
|
+
if (phrase.pattern.test(text)) {
|
|
1182
|
+
return phrase;
|
|
1183
|
+
}
|
|
1184
|
+
}
|
|
1185
|
+
return null;
|
|
1186
|
+
}
|
|
1187
|
+
function hasWeakOrgKeyword(text) {
|
|
1188
|
+
return ORG_WEAK_KEYWORDS_RE.test(text);
|
|
1189
|
+
}
|
|
1190
|
+
function hasDbaPattern(text) {
|
|
1191
|
+
return DBA_RE.test(text);
|
|
1192
|
+
}
|
|
1193
|
+
function hasCareOfPattern(text) {
|
|
1194
|
+
return CARE_OF_RE.test(text);
|
|
1195
|
+
}
|
|
1196
|
+
function extractDba(text) {
|
|
1197
|
+
const match = text.match(DBA_RE);
|
|
1198
|
+
if (!match) return null;
|
|
1199
|
+
const idx = match.index;
|
|
1200
|
+
const primary = text.slice(0, idx).trim();
|
|
1201
|
+
const aka = text.slice(idx + match[0].length).trim();
|
|
1202
|
+
if (primary && aka) {
|
|
1203
|
+
return { primary, aka };
|
|
1204
|
+
}
|
|
1205
|
+
return null;
|
|
1206
|
+
}
|
|
1207
|
+
|
|
1208
|
+
// src/detectors/organization.ts
|
|
1209
|
+
function detectOrganization(normalized, raw) {
|
|
1210
|
+
const reasons = [];
|
|
1211
|
+
let confidence = 0.5;
|
|
1212
|
+
let baseName = normalized;
|
|
1213
|
+
let legalSuffixRaw;
|
|
1214
|
+
let legalForm;
|
|
1215
|
+
let aka;
|
|
1216
|
+
const legalSuffixResult = extractLegalSuffix(normalized);
|
|
1217
|
+
if (legalSuffixResult) {
|
|
1218
|
+
reasons.push("ORG_LEGAL_SUFFIX");
|
|
1219
|
+
confidence = 1;
|
|
1220
|
+
baseName = legalSuffixResult.baseName;
|
|
1221
|
+
legalSuffixRaw = legalSuffixResult.suffix;
|
|
1222
|
+
legalForm = legalSuffixResult.legalForm;
|
|
1223
|
+
if (COMMA_LEGAL_RE.test(normalized)) {
|
|
1224
|
+
reasons.push("ORG_COMMA_LEGAL");
|
|
1225
|
+
}
|
|
1226
|
+
}
|
|
1227
|
+
const institutionMatch = matchInstitutionPhrase(normalized);
|
|
1228
|
+
if (institutionMatch) {
|
|
1229
|
+
reasons.push("ORG_INSTITUTION_PHRASE");
|
|
1230
|
+
if (institutionMatch.strong) {
|
|
1231
|
+
confidence = Math.max(confidence, 0.75);
|
|
1232
|
+
} else {
|
|
1233
|
+
confidence = Math.max(confidence, 0.5);
|
|
1234
|
+
}
|
|
1235
|
+
if (!legalForm) {
|
|
1236
|
+
legalForm = institutionMatch.legalForm;
|
|
1237
|
+
}
|
|
1238
|
+
}
|
|
1239
|
+
if (hasDbaPattern(normalized)) {
|
|
1240
|
+
reasons.push("ORG_DBA");
|
|
1241
|
+
confidence = Math.max(confidence, 0.75);
|
|
1242
|
+
const dbaResult = extractDba(normalized);
|
|
1243
|
+
if (dbaResult) {
|
|
1244
|
+
baseName = dbaResult.primary;
|
|
1245
|
+
aka = [dbaResult.aka];
|
|
1246
|
+
}
|
|
1247
|
+
}
|
|
1248
|
+
if (hasCareOfPattern(normalized)) {
|
|
1249
|
+
reasons.push("ORG_CARE_OF");
|
|
1250
|
+
if (reasons.length > 1) {
|
|
1251
|
+
confidence = Math.max(confidence, 0.5);
|
|
1252
|
+
}
|
|
1253
|
+
}
|
|
1254
|
+
if (reasons.length === 0 && hasWeakOrgKeyword(normalized)) {
|
|
1255
|
+
reasons.push("ORG_WEAK_KEYWORD");
|
|
1256
|
+
confidence = 0.5;
|
|
1257
|
+
}
|
|
1258
|
+
const isOrg = reasons.some(
|
|
1259
|
+
(r) => r === "ORG_LEGAL_SUFFIX" || r === "ORG_INSTITUTION_PHRASE" || r === "ORG_DBA"
|
|
1260
|
+
);
|
|
1261
|
+
if (!isOrg) {
|
|
1262
|
+
return { isOrg: false, confidence: 0, reasons: [] };
|
|
1263
|
+
}
|
|
1264
|
+
return {
|
|
1265
|
+
isOrg: true,
|
|
1266
|
+
confidence,
|
|
1267
|
+
reasons,
|
|
1268
|
+
entity: {
|
|
1269
|
+
kind: "organization",
|
|
1270
|
+
baseName: baseName || normalized,
|
|
1271
|
+
legalForm,
|
|
1272
|
+
legalSuffixRaw,
|
|
1273
|
+
aka
|
|
1274
|
+
}
|
|
1275
|
+
};
|
|
1276
|
+
}
|
|
1277
|
+
function buildOrganizationEntity(result, raw, normalized, locale = "en") {
|
|
1278
|
+
const meta = {
|
|
1279
|
+
raw,
|
|
1280
|
+
normalized,
|
|
1281
|
+
confidence: result.confidence,
|
|
1282
|
+
reasons: result.reasons,
|
|
1283
|
+
locale
|
|
1284
|
+
};
|
|
1285
|
+
return {
|
|
1286
|
+
kind: "organization",
|
|
1287
|
+
baseName: result.entity?.baseName || normalized,
|
|
1288
|
+
legalForm: result.entity?.legalForm,
|
|
1289
|
+
legalSuffixRaw: result.entity?.legalSuffixRaw,
|
|
1290
|
+
aka: result.entity?.aka,
|
|
1291
|
+
meta
|
|
1292
|
+
};
|
|
1293
|
+
}
|
|
1294
|
+
|
|
1295
|
+
// src/detectors/compound.ts
|
|
1296
|
+
var COMPOUND_CONNECTOR_RE = /(?:^|\s)(&|and|\+|et|;|\|)(?:\s|$)|(?:\s)(\/)\s/i;
|
|
1297
|
+
var PAIRED_HONORIFIC_PATTERNS = [
|
|
1298
|
+
{ pattern: /^mr\.?\s*[&+]\s*mrs\.?/i, first: "Mr.", second: "Mrs." },
|
|
1299
|
+
{ pattern: /^mr\.?\s+and\s+mrs\.?/i, first: "Mr.", second: "Mrs." },
|
|
1300
|
+
{ pattern: /^mr\.?\s*[&+]\s*ms\.?/i, first: "Mr.", second: "Ms." },
|
|
1301
|
+
{ pattern: /^mr\.?\s+and\s+ms\.?/i, first: "Mr.", second: "Ms." },
|
|
1302
|
+
{ pattern: /^mr\.?\s*[&+]\s*mr\.?/i, first: "Mr.", second: "Mr." },
|
|
1303
|
+
{ pattern: /^mr\.?\s+and\s+mr\.?/i, first: "Mr.", second: "Mr." },
|
|
1304
|
+
{ pattern: /^mrs\.?\s*[&+]\s*mrs\.?/i, first: "Mrs.", second: "Mrs." },
|
|
1305
|
+
{ pattern: /^mrs\.?\s+and\s+mrs\.?/i, first: "Mrs.", second: "Mrs." },
|
|
1306
|
+
{ pattern: /^ms\.?\s*[&+]\s*ms\.?/i, first: "Ms.", second: "Ms." },
|
|
1307
|
+
{ pattern: /^ms\.?\s+and\s+ms\.?/i, first: "Ms.", second: "Ms." },
|
|
1308
|
+
{ pattern: /^dr\.?\s*[&+]\s*mrs\.?/i, first: "Dr.", second: "Mrs." },
|
|
1309
|
+
{ pattern: /^dr\.?\s+and\s+mrs\.?/i, first: "Dr.", second: "Mrs." },
|
|
1310
|
+
{ pattern: /^dr\.?\s*[&+]\s*mr\.?/i, first: "Dr.", second: "Mr." },
|
|
1311
|
+
{ pattern: /^dr\.?\s+and\s+mr\.?/i, first: "Dr.", second: "Mr." },
|
|
1312
|
+
{ pattern: /^dr\.?\s*[&+]\s*ms\.?/i, first: "Dr.", second: "Ms." },
|
|
1313
|
+
{ pattern: /^dr\.?\s+and\s+ms\.?/i, first: "Dr.", second: "Ms." },
|
|
1314
|
+
{ pattern: /^dr\.?\s*[&+]\s*dr\.?/i, first: "Dr.", second: "Dr." },
|
|
1315
|
+
{ pattern: /^dr\.?\s+and\s+dr\.?/i, first: "Dr.", second: "Dr." }
|
|
1316
|
+
];
|
|
1317
|
+
var PLURAL_HONORIFICS = {
|
|
1318
|
+
"drs": "Dr.",
|
|
1319
|
+
"drs.": "Dr.",
|
|
1320
|
+
"doctors": "Dr.",
|
|
1321
|
+
"messrs": "Mr.",
|
|
1322
|
+
"messrs.": "Mr.",
|
|
1323
|
+
"messieurs": "Mr.",
|
|
1324
|
+
"mmes": "Mrs.",
|
|
1325
|
+
"mmes.": "Mrs.",
|
|
1326
|
+
"mesdames": "Mrs.",
|
|
1327
|
+
"profs": "Prof.",
|
|
1328
|
+
"profs.": "Prof.",
|
|
1329
|
+
"professors": "Prof.",
|
|
1330
|
+
"revs": "Rev.",
|
|
1331
|
+
"revs.": "Rev.",
|
|
1332
|
+
"reverends": "Rev."
|
|
1333
|
+
};
|
|
1334
|
+
var SINGLE_HONORIFIC_RE = /^(mr|mrs|ms|miss|mx|dr|prof|sir|dame|rev|fr|rabbi|imam|pastor|judge|justice|capt|maj|col|gen|adm|sgt|lt)(?:\.\s*|\s+|$)/i;
|
|
1335
|
+
var SUFFIX_SET = /* @__PURE__ */ new Set([
|
|
1336
|
+
"jr",
|
|
1337
|
+
"jr.",
|
|
1338
|
+
"sr",
|
|
1339
|
+
"sr.",
|
|
1340
|
+
"ii",
|
|
1341
|
+
"iii",
|
|
1342
|
+
"iv",
|
|
1343
|
+
"v",
|
|
1344
|
+
"vi",
|
|
1345
|
+
"vii",
|
|
1346
|
+
"viii",
|
|
1347
|
+
"ix",
|
|
1348
|
+
"x",
|
|
1349
|
+
"phd",
|
|
1350
|
+
"ph.d.",
|
|
1351
|
+
"ph.d",
|
|
1352
|
+
"md",
|
|
1353
|
+
"m.d.",
|
|
1354
|
+
"dds",
|
|
1355
|
+
"d.d.s.",
|
|
1356
|
+
"dmd",
|
|
1357
|
+
"d.m.d.",
|
|
1358
|
+
"esq",
|
|
1359
|
+
"esq.",
|
|
1360
|
+
"jd",
|
|
1361
|
+
"j.d.",
|
|
1362
|
+
"mba",
|
|
1363
|
+
"m.b.a.",
|
|
1364
|
+
"cpa",
|
|
1365
|
+
"cfa",
|
|
1366
|
+
"rn",
|
|
1367
|
+
"np",
|
|
1368
|
+
"pa-c",
|
|
1369
|
+
"obe",
|
|
1370
|
+
"mbe",
|
|
1371
|
+
"cbe",
|
|
1372
|
+
"kbe",
|
|
1373
|
+
"dbe"
|
|
1374
|
+
]);
|
|
1375
|
+
function isSuffixToken(token) {
|
|
1376
|
+
return SUFFIX_SET.has(token.toLowerCase().replace(/\.$/, ""));
|
|
1377
|
+
}
|
|
1378
|
+
function getConnectorType(connector) {
|
|
1379
|
+
const lower = connector.toLowerCase().trim();
|
|
1380
|
+
if (lower === "&") return "&";
|
|
1381
|
+
if (lower === "and") return "and";
|
|
1382
|
+
if (lower === "+") return "+";
|
|
1383
|
+
if (lower === "et") return "et";
|
|
1384
|
+
return "unknown";
|
|
1385
|
+
}
|
|
1386
|
+
function detectPairedHonorifics(text) {
|
|
1387
|
+
for (const pair of PAIRED_HONORIFIC_PATTERNS) {
|
|
1388
|
+
if (pair.pattern.test(text)) {
|
|
1389
|
+
return pair;
|
|
1390
|
+
}
|
|
1391
|
+
}
|
|
1392
|
+
return null;
|
|
1393
|
+
}
|
|
1394
|
+
function detectPluralHonorific(text) {
|
|
1395
|
+
const tokens = tokenize(text);
|
|
1396
|
+
if (tokens.length === 0) return null;
|
|
1397
|
+
const firstToken = tokens[0].toLowerCase();
|
|
1398
|
+
const singular = PLURAL_HONORIFICS[firstToken];
|
|
1399
|
+
if (singular) {
|
|
1400
|
+
const remainder = tokens.slice(1).join(" ");
|
|
1401
|
+
return { plural: tokens[0], singular, remainder };
|
|
1402
|
+
}
|
|
1403
|
+
return null;
|
|
1404
|
+
}
|
|
1405
|
+
function parseMemberTokens(text) {
|
|
1406
|
+
let workingText = text.trim();
|
|
1407
|
+
let honorific;
|
|
1408
|
+
let suffix;
|
|
1409
|
+
const honorificMatch = workingText.match(SINGLE_HONORIFIC_RE);
|
|
1410
|
+
if (honorificMatch) {
|
|
1411
|
+
honorific = honorificMatch[0].trim();
|
|
1412
|
+
workingText = workingText.slice(honorificMatch[0].length).trim();
|
|
1413
|
+
}
|
|
1414
|
+
const commaIdx = workingText.lastIndexOf(",");
|
|
1415
|
+
if (commaIdx > 0) {
|
|
1416
|
+
const afterComma = workingText.slice(commaIdx + 1).trim();
|
|
1417
|
+
const suffixTokens = tokenize(afterComma);
|
|
1418
|
+
if (suffixTokens.length > 0 && isSuffixToken(suffixTokens[0])) {
|
|
1419
|
+
suffix = afterComma;
|
|
1420
|
+
workingText = workingText.slice(0, commaIdx).trim();
|
|
1421
|
+
}
|
|
1422
|
+
}
|
|
1423
|
+
const tokens = tokenize(workingText);
|
|
1424
|
+
while (tokens.length > 1 && isSuffixToken(tokens[tokens.length - 1])) {
|
|
1425
|
+
const suffixToken = tokens.pop();
|
|
1426
|
+
suffix = suffix ? `${suffixToken}, ${suffix}` : suffixToken;
|
|
1427
|
+
}
|
|
1428
|
+
workingText = tokens.join(" ");
|
|
1429
|
+
const nameTokens = tokenize(workingText);
|
|
1430
|
+
let given;
|
|
1431
|
+
let middle;
|
|
1432
|
+
let family;
|
|
1433
|
+
if (nameTokens.length === 0) {
|
|
1434
|
+
} else if (nameTokens.length === 1) {
|
|
1435
|
+
given = nameTokens[0];
|
|
1436
|
+
} else {
|
|
1437
|
+
given = nameTokens[0];
|
|
1438
|
+
family = nameTokens[nameTokens.length - 1];
|
|
1439
|
+
if (nameTokens.length > 2) {
|
|
1440
|
+
middle = nameTokens.slice(1, -1).join(" ");
|
|
1441
|
+
}
|
|
1442
|
+
}
|
|
1443
|
+
return { honorific, given, middle, family, suffix, raw: text };
|
|
1444
|
+
}
|
|
1445
|
+
function detectCompound(normalized) {
|
|
1446
|
+
const reasons = [];
|
|
1447
|
+
const pairedMatch = detectPairedHonorifics(normalized);
|
|
1448
|
+
if (pairedMatch) {
|
|
1449
|
+
const remainder = normalized.replace(pairedMatch.pattern, "").trim();
|
|
1450
|
+
const tokens = tokenize(remainder);
|
|
1451
|
+
reasons.push("COMPOUND_CONNECTOR");
|
|
1452
|
+
reasons.push("COMPOUND_PAIRED_HONORIFIC");
|
|
1453
|
+
if (tokens.length === 1 && isNameLikeToken(tokens[0])) {
|
|
1454
|
+
reasons.push("COMPOUND_SHARED_FAMILY");
|
|
1455
|
+
return {
|
|
1456
|
+
isCompound: true,
|
|
1457
|
+
confidence: 0.75,
|
|
1458
|
+
reasons,
|
|
1459
|
+
connector: "&",
|
|
1460
|
+
leftPart: pairedMatch.first,
|
|
1461
|
+
rightPart: pairedMatch.second,
|
|
1462
|
+
sharedFamily: tokens[0],
|
|
1463
|
+
pairedHonorifics: { first: pairedMatch.first, second: pairedMatch.second }
|
|
1464
|
+
};
|
|
1465
|
+
}
|
|
1466
|
+
if (tokens.length === 2 && isNameLikeToken(tokens[0]) && isNameLikeToken(tokens[1])) {
|
|
1467
|
+
reasons.push("COMPOUND_SHARED_FAMILY");
|
|
1468
|
+
return {
|
|
1469
|
+
isCompound: true,
|
|
1470
|
+
confidence: 1,
|
|
1471
|
+
reasons,
|
|
1472
|
+
connector: "&",
|
|
1473
|
+
leftPart: `${pairedMatch.first} ${tokens[0]}`,
|
|
1474
|
+
rightPart: pairedMatch.second,
|
|
1475
|
+
sharedFamily: tokens[1],
|
|
1476
|
+
pairedHonorifics: { first: pairedMatch.first, second: pairedMatch.second }
|
|
1477
|
+
};
|
|
1478
|
+
}
|
|
1479
|
+
const innerConnectorMatch = remainder.match(COMPOUND_CONNECTOR_RE);
|
|
1480
|
+
if (innerConnectorMatch) {
|
|
1481
|
+
const connectorIdx2 = innerConnectorMatch.index;
|
|
1482
|
+
const fullMatch2 = innerConnectorMatch[0];
|
|
1483
|
+
const connector2 = innerConnectorMatch[1] || innerConnectorMatch[2];
|
|
1484
|
+
const leftName = remainder.slice(0, connectorIdx2).trim();
|
|
1485
|
+
const rightName = remainder.slice(connectorIdx2 + fullMatch2.length).trim();
|
|
1486
|
+
if (leftName && rightName) {
|
|
1487
|
+
const rightTokens2 = tokenize(rightName);
|
|
1488
|
+
let sharedFamily2;
|
|
1489
|
+
if (rightTokens2.length >= 2) {
|
|
1490
|
+
const lastToken = rightTokens2[rightTokens2.length - 1];
|
|
1491
|
+
if (isNameLikeToken(lastToken) && !isSuffixToken(lastToken)) {
|
|
1492
|
+
sharedFamily2 = lastToken;
|
|
1493
|
+
reasons.push("COMPOUND_SHARED_FAMILY");
|
|
1494
|
+
}
|
|
1495
|
+
}
|
|
1496
|
+
return {
|
|
1497
|
+
isCompound: true,
|
|
1498
|
+
confidence: sharedFamily2 ? 1 : 0.75,
|
|
1499
|
+
reasons,
|
|
1500
|
+
connector: getConnectorType(connector2),
|
|
1501
|
+
leftPart: `${pairedMatch.first} ${leftName}`.trim(),
|
|
1502
|
+
rightPart: `${pairedMatch.second} ${rightName}`.trim(),
|
|
1503
|
+
sharedFamily: sharedFamily2,
|
|
1504
|
+
pairedHonorifics: { first: pairedMatch.first, second: pairedMatch.second }
|
|
1505
|
+
};
|
|
1506
|
+
}
|
|
1507
|
+
}
|
|
1508
|
+
if (remainder) {
|
|
1509
|
+
reasons.push("COMPOUND_SHARED_FAMILY");
|
|
1510
|
+
return {
|
|
1511
|
+
isCompound: true,
|
|
1512
|
+
confidence: 0.75,
|
|
1513
|
+
reasons,
|
|
1514
|
+
connector: "&",
|
|
1515
|
+
leftPart: pairedMatch.first,
|
|
1516
|
+
rightPart: pairedMatch.second,
|
|
1517
|
+
sharedFamily: remainder,
|
|
1518
|
+
pairedHonorifics: { first: pairedMatch.first, second: pairedMatch.second }
|
|
1519
|
+
};
|
|
1520
|
+
}
|
|
1521
|
+
}
|
|
1522
|
+
const pluralMatch = detectPluralHonorific(normalized);
|
|
1523
|
+
if (pluralMatch) {
|
|
1524
|
+
const connectorMatch2 = pluralMatch.remainder.match(COMPOUND_CONNECTOR_RE);
|
|
1525
|
+
if (connectorMatch2) {
|
|
1526
|
+
const connectorIdx2 = connectorMatch2.index;
|
|
1527
|
+
const fullMatch2 = connectorMatch2[0];
|
|
1528
|
+
const connector2 = connectorMatch2[1] || connectorMatch2[2];
|
|
1529
|
+
const leftName = pluralMatch.remainder.slice(0, connectorIdx2).trim();
|
|
1530
|
+
const rightName = pluralMatch.remainder.slice(connectorIdx2 + fullMatch2.length).trim();
|
|
1531
|
+
if (leftName && rightName) {
|
|
1532
|
+
reasons.push("COMPOUND_CONNECTOR");
|
|
1533
|
+
reasons.push("COMPOUND_PLURAL_HONORIFIC");
|
|
1534
|
+
const leftPart2 = `${pluralMatch.singular} ${leftName}`.trim();
|
|
1535
|
+
const rightPart2 = `${pluralMatch.singular} ${rightName}`.trim();
|
|
1536
|
+
const rightTokens2 = tokenize(rightName);
|
|
1537
|
+
let sharedFamily2;
|
|
1538
|
+
if (rightTokens2.length >= 2) {
|
|
1539
|
+
const lastToken = rightTokens2[rightTokens2.length - 1];
|
|
1540
|
+
if (isNameLikeToken(lastToken) && !isSuffixToken(lastToken)) {
|
|
1541
|
+
sharedFamily2 = lastToken;
|
|
1542
|
+
reasons.push("COMPOUND_SHARED_FAMILY");
|
|
1543
|
+
}
|
|
1544
|
+
}
|
|
1545
|
+
return {
|
|
1546
|
+
isCompound: true,
|
|
1547
|
+
confidence: sharedFamily2 ? 1 : 0.75,
|
|
1548
|
+
reasons,
|
|
1549
|
+
connector: getConnectorType(connector2),
|
|
1550
|
+
leftPart: leftPart2,
|
|
1551
|
+
rightPart: rightPart2,
|
|
1552
|
+
sharedFamily: sharedFamily2,
|
|
1553
|
+
pluralHonorific: pluralMatch.plural,
|
|
1554
|
+
singularHonorific: pluralMatch.singular
|
|
1555
|
+
};
|
|
1556
|
+
}
|
|
1557
|
+
}
|
|
1558
|
+
}
|
|
1559
|
+
const connectorMatch = normalized.match(COMPOUND_CONNECTOR_RE);
|
|
1560
|
+
if (!connectorMatch) {
|
|
1561
|
+
return { isCompound: false, confidence: 0, reasons: [] };
|
|
1562
|
+
}
|
|
1563
|
+
const connectorIdx = connectorMatch.index;
|
|
1564
|
+
const fullMatch = connectorMatch[0];
|
|
1565
|
+
const connector = connectorMatch[1] || connectorMatch[2];
|
|
1566
|
+
const connectorType = getConnectorType(connector);
|
|
1567
|
+
const leftPart = normalized.slice(0, connectorIdx).trim();
|
|
1568
|
+
const rightPart = normalized.slice(connectorIdx + fullMatch.length).trim();
|
|
1569
|
+
if (!leftPart || !rightPart) {
|
|
1570
|
+
return { isCompound: false, confidence: 0, reasons: [] };
|
|
1571
|
+
}
|
|
1572
|
+
const leftTokens = tokenize(leftPart);
|
|
1573
|
+
const rightTokens = tokenize(rightPart);
|
|
1574
|
+
const leftHasName = leftTokens.some(isNameLikeToken);
|
|
1575
|
+
const rightHasName = rightTokens.some(isNameLikeToken);
|
|
1576
|
+
if (!leftHasName || !rightHasName) {
|
|
1577
|
+
return { isCompound: false, confidence: 0, reasons: [] };
|
|
1578
|
+
}
|
|
1579
|
+
reasons.push("COMPOUND_CONNECTOR");
|
|
1580
|
+
let confidence = 0.5;
|
|
1581
|
+
if (leftHasName && rightHasName) {
|
|
1582
|
+
confidence = 0.75;
|
|
1583
|
+
}
|
|
1584
|
+
let sharedFamily;
|
|
1585
|
+
if (rightTokens.length >= 2) {
|
|
1586
|
+
let familyIdx = rightTokens.length - 1;
|
|
1587
|
+
while (familyIdx >= 0 && isSuffixToken(rightTokens[familyIdx])) {
|
|
1588
|
+
familyIdx--;
|
|
1589
|
+
}
|
|
1590
|
+
if (familyIdx >= 1) {
|
|
1591
|
+
const potentialShared = rightTokens[familyIdx];
|
|
1592
|
+
if (isNameLikeToken(potentialShared)) {
|
|
1593
|
+
const leftParsed = parseMemberTokens(leftPart);
|
|
1594
|
+
if (!leftParsed.family || leftParsed.given === leftParsed.family) {
|
|
1595
|
+
sharedFamily = potentialShared;
|
|
1596
|
+
reasons.push("COMPOUND_SHARED_FAMILY");
|
|
1597
|
+
confidence = 1;
|
|
1598
|
+
}
|
|
1599
|
+
}
|
|
1600
|
+
}
|
|
1601
|
+
}
|
|
1602
|
+
if (!sharedFamily && rightTokens.length === 1 && isNameLikeToken(rightTokens[0])) {
|
|
1603
|
+
const leftLower = leftPart.toLowerCase();
|
|
1604
|
+
if (/^(mr|mrs|ms|dr|rev)\.?\s*/i.test(leftLower)) {
|
|
1605
|
+
sharedFamily = rightTokens[0];
|
|
1606
|
+
reasons.push("COMPOUND_SHARED_FAMILY");
|
|
1607
|
+
confidence = 0.75;
|
|
1608
|
+
}
|
|
1609
|
+
}
|
|
1610
|
+
if (!sharedFamily && leftPart.includes(",")) {
|
|
1611
|
+
const commaParts = leftPart.split(",").map((p) => p.trim());
|
|
1612
|
+
if (commaParts.length >= 2 && isNameLikeToken(commaParts[0])) {
|
|
1613
|
+
sharedFamily = commaParts[0];
|
|
1614
|
+
reasons.push("COMPOUND_SHARED_FAMILY");
|
|
1615
|
+
confidence = 0.75;
|
|
1616
|
+
}
|
|
1617
|
+
}
|
|
1618
|
+
return {
|
|
1619
|
+
isCompound: true,
|
|
1620
|
+
confidence,
|
|
1621
|
+
reasons,
|
|
1622
|
+
connector: connectorType,
|
|
1623
|
+
leftPart,
|
|
1624
|
+
rightPart,
|
|
1625
|
+
sharedFamily
|
|
1626
|
+
};
|
|
1627
|
+
}
|
|
1628
|
+
function parseCompoundMember(text, raw, sharedFamily, inheritedHonorific, locale = "en") {
|
|
1629
|
+
const meta = {
|
|
1630
|
+
raw,
|
|
1631
|
+
normalized: text,
|
|
1632
|
+
confidence: 0.5,
|
|
1633
|
+
reasons: [],
|
|
1634
|
+
locale
|
|
1635
|
+
};
|
|
1636
|
+
if (!text.trim()) {
|
|
1637
|
+
return {
|
|
1638
|
+
kind: "unknown",
|
|
1639
|
+
text,
|
|
1640
|
+
meta
|
|
1641
|
+
};
|
|
1642
|
+
}
|
|
1643
|
+
const parsed = parseMemberTokens(text);
|
|
1644
|
+
const honorific = parsed.honorific || inheritedHonorific;
|
|
1645
|
+
let family = parsed.family;
|
|
1646
|
+
if (!family && sharedFamily) {
|
|
1647
|
+
family = sharedFamily;
|
|
1648
|
+
}
|
|
1649
|
+
if (parsed.family && sharedFamily && parsed.family.toLowerCase() === sharedFamily.toLowerCase()) {
|
|
1650
|
+
} else if (parsed.family && sharedFamily) {
|
|
1651
|
+
family = parsed.family;
|
|
1652
|
+
}
|
|
1653
|
+
return {
|
|
1654
|
+
kind: "person",
|
|
1655
|
+
honorific,
|
|
1656
|
+
given: parsed.given,
|
|
1657
|
+
middle: parsed.middle,
|
|
1658
|
+
family,
|
|
1659
|
+
suffix: parsed.suffix,
|
|
1660
|
+
meta
|
|
1661
|
+
};
|
|
1662
|
+
}
|
|
1663
|
+
function buildCompoundEntity(result, raw, normalized, locale = "en") {
|
|
1664
|
+
const meta = {
|
|
1665
|
+
raw,
|
|
1666
|
+
normalized,
|
|
1667
|
+
confidence: result.confidence,
|
|
1668
|
+
reasons: result.reasons,
|
|
1669
|
+
locale
|
|
1670
|
+
};
|
|
1671
|
+
const members = [];
|
|
1672
|
+
if (result.leftPart) {
|
|
1673
|
+
const inheritedHonorific = result.pairedHonorifics?.first || result.singularHonorific;
|
|
1674
|
+
const leftText = result.leftPart;
|
|
1675
|
+
const isJustHonorific = inheritedHonorific && leftText.toLowerCase().replace(/\./g, "") === inheritedHonorific.toLowerCase().replace(/\./g, "");
|
|
1676
|
+
const hasOwnHonorific = SINGLE_HONORIFIC_RE.test(leftText);
|
|
1677
|
+
if (isJustHonorific) {
|
|
1678
|
+
members.push({
|
|
1679
|
+
kind: "person",
|
|
1680
|
+
honorific: inheritedHonorific,
|
|
1681
|
+
family: result.sharedFamily,
|
|
1682
|
+
meta: {
|
|
1683
|
+
raw: leftText,
|
|
1684
|
+
normalized: leftText,
|
|
1685
|
+
confidence: 0.5,
|
|
1686
|
+
reasons: [],
|
|
1687
|
+
locale
|
|
1688
|
+
}
|
|
1689
|
+
});
|
|
1690
|
+
} else {
|
|
1691
|
+
members.push(parseCompoundMember(
|
|
1692
|
+
leftText,
|
|
1693
|
+
leftText,
|
|
1694
|
+
result.sharedFamily,
|
|
1695
|
+
hasOwnHonorific ? void 0 : inheritedHonorific,
|
|
1696
|
+
locale
|
|
1697
|
+
));
|
|
1698
|
+
}
|
|
1699
|
+
}
|
|
1700
|
+
if (result.rightPart) {
|
|
1701
|
+
let rightText = result.rightPart;
|
|
1702
|
+
if (result.sharedFamily) {
|
|
1703
|
+
const familyRegex = new RegExp(`\\s+${escapeRegex(result.sharedFamily)}\\s*$`, "i");
|
|
1704
|
+
rightText = rightText.replace(familyRegex, "").trim() || result.rightPart;
|
|
1705
|
+
}
|
|
1706
|
+
const inheritedHonorific = result.pairedHonorifics?.second || result.singularHonorific;
|
|
1707
|
+
const isJustHonorific = inheritedHonorific && rightText.toLowerCase().replace(/\./g, "") === inheritedHonorific.toLowerCase().replace(/\./g, "");
|
|
1708
|
+
const hasOwnHonorific = SINGLE_HONORIFIC_RE.test(rightText);
|
|
1709
|
+
if (isJustHonorific) {
|
|
1710
|
+
members.push({
|
|
1711
|
+
kind: "person",
|
|
1712
|
+
honorific: inheritedHonorific,
|
|
1713
|
+
family: result.sharedFamily,
|
|
1714
|
+
meta: {
|
|
1715
|
+
raw: result.rightPart,
|
|
1716
|
+
normalized: rightText,
|
|
1717
|
+
confidence: 0.5,
|
|
1718
|
+
reasons: [],
|
|
1719
|
+
locale
|
|
1720
|
+
}
|
|
1721
|
+
});
|
|
1722
|
+
} else {
|
|
1723
|
+
members.push(parseCompoundMember(
|
|
1724
|
+
rightText || result.rightPart,
|
|
1725
|
+
result.rightPart,
|
|
1726
|
+
result.sharedFamily,
|
|
1727
|
+
hasOwnHonorific ? void 0 : inheritedHonorific,
|
|
1728
|
+
locale
|
|
1729
|
+
));
|
|
1730
|
+
}
|
|
1731
|
+
}
|
|
1732
|
+
return {
|
|
1733
|
+
kind: "compound",
|
|
1734
|
+
connector: result.connector || "unknown",
|
|
1735
|
+
members,
|
|
1736
|
+
sharedFamily: result.sharedFamily,
|
|
1737
|
+
meta
|
|
1738
|
+
};
|
|
1739
|
+
}
|
|
1740
|
+
function escapeRegex(str) {
|
|
1741
|
+
return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
1742
|
+
}
|
|
1743
|
+
|
|
1744
|
+
// src/detectors/family.ts
|
|
1745
|
+
var FAMILY_WORD_END_RE = /\b(family|household)\s*$/i;
|
|
1746
|
+
var FAMILY_WORD_RE = /\b(family|household)\b/i;
|
|
1747
|
+
function hasGivenNameTokens(text) {
|
|
1748
|
+
const tokens = tokenize(text);
|
|
1749
|
+
const familyWordIdx = tokens.findIndex((t) => /^(family|household)$/i.test(t));
|
|
1750
|
+
if (familyWordIdx > 0) {
|
|
1751
|
+
const beforeFamily = tokens.slice(0, familyWordIdx);
|
|
1752
|
+
return beforeFamily.length > 2 && beforeFamily.every(isNameLikeToken);
|
|
1753
|
+
}
|
|
1754
|
+
return false;
|
|
1755
|
+
}
|
|
1756
|
+
function detectFamily(normalized) {
|
|
1757
|
+
const reasons = [];
|
|
1758
|
+
let confidence = 0.5;
|
|
1759
|
+
let kind = "family";
|
|
1760
|
+
let style = "familyWord";
|
|
1761
|
+
let familyName = normalized;
|
|
1762
|
+
let article;
|
|
1763
|
+
let familyWord;
|
|
1764
|
+
const hasThe = startsWithThe(normalized);
|
|
1765
|
+
if (hasThe) {
|
|
1766
|
+
reasons.push("FAMILY_STARTS_WITH_THE");
|
|
1767
|
+
article = "The";
|
|
1768
|
+
}
|
|
1769
|
+
const withoutThe = hasThe ? stripLeadingThe(normalized) : normalized;
|
|
1770
|
+
const familyWordMatch = withoutThe.match(FAMILY_WORD_END_RE);
|
|
1771
|
+
if (familyWordMatch) {
|
|
1772
|
+
reasons.push("FAMILY_ENDS_WITH_FAMILY");
|
|
1773
|
+
reasons.push("FAMILY_HAS_FAMILY_WORD");
|
|
1774
|
+
confidence = 1;
|
|
1775
|
+
const word = familyWordMatch[1].toLowerCase();
|
|
1776
|
+
kind = word === "household" ? "household" : "family";
|
|
1777
|
+
familyWord = word === "household" ? "Household" : "Family";
|
|
1778
|
+
style = "familyWord";
|
|
1779
|
+
familyName = withoutThe.slice(0, familyWordMatch.index).trim();
|
|
1780
|
+
if (hasGivenNameTokens(withoutThe)) {
|
|
1781
|
+
confidence = 0.75;
|
|
1782
|
+
}
|
|
1783
|
+
return {
|
|
1784
|
+
isFamily: true,
|
|
1785
|
+
confidence,
|
|
1786
|
+
reasons,
|
|
1787
|
+
entity: {
|
|
1788
|
+
kind,
|
|
1789
|
+
article,
|
|
1790
|
+
familyName,
|
|
1791
|
+
style,
|
|
1792
|
+
familyWord
|
|
1793
|
+
}
|
|
1794
|
+
};
|
|
1795
|
+
}
|
|
1796
|
+
if (hasThe && hasPluralSurnameEnding(withoutThe)) {
|
|
1797
|
+
reasons.push("FAMILY_PLURAL_SURNAME");
|
|
1798
|
+
style = "pluralSurname";
|
|
1799
|
+
familyName = withoutThe.trim();
|
|
1800
|
+
confidence = 0.75;
|
|
1801
|
+
if (!FAMILY_WORD_RE.test(normalized)) {
|
|
1802
|
+
reasons.push("AMBIGUOUS_THE_PLURAL");
|
|
1803
|
+
confidence = 0.5;
|
|
1804
|
+
}
|
|
1805
|
+
return {
|
|
1806
|
+
isFamily: true,
|
|
1807
|
+
confidence,
|
|
1808
|
+
reasons,
|
|
1809
|
+
entity: {
|
|
1810
|
+
kind,
|
|
1811
|
+
article,
|
|
1812
|
+
familyName,
|
|
1813
|
+
style
|
|
1814
|
+
}
|
|
1815
|
+
};
|
|
1816
|
+
}
|
|
1817
|
+
if (hasThe) {
|
|
1818
|
+
return { isFamily: false, confidence: 0, reasons: [] };
|
|
1819
|
+
}
|
|
1820
|
+
return { isFamily: false, confidence: 0, reasons: [] };
|
|
1821
|
+
}
|
|
1822
|
+
function buildFamilyEntity(result, raw, normalized, locale = "en") {
|
|
1823
|
+
const meta = {
|
|
1824
|
+
raw,
|
|
1825
|
+
normalized,
|
|
1826
|
+
confidence: result.confidence,
|
|
1827
|
+
reasons: result.reasons,
|
|
1828
|
+
locale
|
|
1829
|
+
};
|
|
1830
|
+
return {
|
|
1831
|
+
kind: result.entity?.kind || "family",
|
|
1832
|
+
article: result.entity?.article,
|
|
1833
|
+
familyName: result.entity?.familyName || normalized,
|
|
1834
|
+
style: result.entity?.style || "familyWord",
|
|
1835
|
+
familyWord: result.entity?.familyWord,
|
|
1836
|
+
meta
|
|
1837
|
+
};
|
|
1838
|
+
}
|
|
1839
|
+
|
|
1840
|
+
// src/detectors/person.ts
|
|
1841
|
+
var SUFFIX_ALLOW_LIST = /* @__PURE__ */ new Set([
|
|
1842
|
+
"jr",
|
|
1843
|
+
"jr.",
|
|
1844
|
+
"sr",
|
|
1845
|
+
"sr.",
|
|
1846
|
+
"ii",
|
|
1847
|
+
"iii",
|
|
1848
|
+
"iv",
|
|
1849
|
+
"v",
|
|
1850
|
+
"vi",
|
|
1851
|
+
"vii",
|
|
1852
|
+
"viii",
|
|
1853
|
+
"ix",
|
|
1854
|
+
"x",
|
|
1855
|
+
"phd",
|
|
1856
|
+
"ph.d.",
|
|
1857
|
+
"ph.d",
|
|
1858
|
+
"md",
|
|
1859
|
+
"m.d.",
|
|
1860
|
+
"dds",
|
|
1861
|
+
"d.d.s.",
|
|
1862
|
+
"esq",
|
|
1863
|
+
"esq.",
|
|
1864
|
+
"jd",
|
|
1865
|
+
"j.d.",
|
|
1866
|
+
"mba",
|
|
1867
|
+
"m.b.a.",
|
|
1868
|
+
"cpa"
|
|
1869
|
+
]);
|
|
1870
|
+
var HONORIFIC_RE = /^(mr|mrs|ms|miss|mx|dr|prof|sir|dame|rev|fr|rabbi|imam|pastor|judge|justice|capt|maj|col|gen|adm|sgt|lt)(?:\.\s*|\s+|$)/i;
|
|
1871
|
+
function isKnownSuffix(token) {
|
|
1872
|
+
return SUFFIX_ALLOW_LIST.has(token.toLowerCase().replace(/\.$/, ""));
|
|
1873
|
+
}
|
|
1874
|
+
function tryParseReversed(normalized) {
|
|
1875
|
+
let text = normalized;
|
|
1876
|
+
let nickname;
|
|
1877
|
+
const quoteMatch = text.match(/[""']([^""']+)[""']/);
|
|
1878
|
+
if (quoteMatch) {
|
|
1879
|
+
nickname = quoteMatch[1].trim();
|
|
1880
|
+
text = text.replace(quoteMatch[0], " ").replace(/\s+/g, " ").trim();
|
|
1881
|
+
}
|
|
1882
|
+
let fullGiven;
|
|
1883
|
+
const parenMatch = text.match(/\s*\(([^)]+)\)\s*/);
|
|
1884
|
+
if (parenMatch) {
|
|
1885
|
+
fullGiven = parenMatch[1].trim();
|
|
1886
|
+
text = text.replace(parenMatch[0], " ").trim();
|
|
1887
|
+
}
|
|
1888
|
+
const parts = text.split(",").map((p) => p.trim()).filter(Boolean);
|
|
1889
|
+
if (parts.length < 2 || parts.length > 4) {
|
|
1890
|
+
return null;
|
|
1891
|
+
}
|
|
1892
|
+
const reasons = [];
|
|
1893
|
+
const familyPart = parts[0];
|
|
1894
|
+
if (!familyPart || !isNameLikeToken(familyPart.split(/\s+/)[0])) {
|
|
1895
|
+
return null;
|
|
1896
|
+
}
|
|
1897
|
+
const givenPart = parts[1];
|
|
1898
|
+
const givenTokens = tokenize(givenPart);
|
|
1899
|
+
if (givenTokens.length === 0 || !isNameLikeToken(givenTokens[0])) {
|
|
1900
|
+
return null;
|
|
1901
|
+
}
|
|
1902
|
+
let suffix;
|
|
1903
|
+
const remainingParts = parts.slice(2);
|
|
1904
|
+
for (const part of remainingParts) {
|
|
1905
|
+
const firstWord = part.split(/\s+/)[0];
|
|
1906
|
+
if (isKnownSuffix(firstWord)) {
|
|
1907
|
+
suffix = suffix ? `${suffix}, ${part}` : part;
|
|
1908
|
+
reasons.push("PERSON_HAS_SUFFIX");
|
|
1909
|
+
} else {
|
|
1910
|
+
return null;
|
|
1911
|
+
}
|
|
1912
|
+
}
|
|
1913
|
+
reasons.push("PERSON_REVERSED_FORMAT");
|
|
1914
|
+
const given = givenTokens[0];
|
|
1915
|
+
const middle = givenTokens.length > 1 ? givenTokens.slice(1).join(" ") : void 0;
|
|
1916
|
+
const familyTokens = tokenize(familyPart);
|
|
1917
|
+
const family = familyPart;
|
|
1918
|
+
const particles = extractParticles(familyTokens);
|
|
1919
|
+
const confidence = suffix ? 1 : 0.75;
|
|
1920
|
+
return {
|
|
1921
|
+
isPerson: true,
|
|
1922
|
+
confidence,
|
|
1923
|
+
reasons,
|
|
1924
|
+
entity: {
|
|
1925
|
+
kind: "person",
|
|
1926
|
+
given,
|
|
1927
|
+
fullGiven,
|
|
1928
|
+
middle,
|
|
1929
|
+
family,
|
|
1930
|
+
suffix,
|
|
1931
|
+
nickname,
|
|
1932
|
+
particles: particles.length > 0 ? particles : void 0,
|
|
1933
|
+
reversed: true
|
|
1934
|
+
}
|
|
1935
|
+
};
|
|
1936
|
+
}
|
|
1937
|
+
function extractParticles(familyTokens) {
|
|
1938
|
+
const particles = [];
|
|
1939
|
+
const multiWord = isMultiWordParticle(familyTokens);
|
|
1940
|
+
if (multiWord) {
|
|
1941
|
+
particles.push(multiWord);
|
|
1942
|
+
return particles;
|
|
1943
|
+
}
|
|
1944
|
+
for (const token of familyTokens) {
|
|
1945
|
+
if (isParticle(token)) {
|
|
1946
|
+
particles.push(token);
|
|
1947
|
+
} else {
|
|
1948
|
+
break;
|
|
1949
|
+
}
|
|
1950
|
+
}
|
|
1951
|
+
return particles;
|
|
1952
|
+
}
|
|
1953
|
+
function parseStandardFormat(normalized) {
|
|
1954
|
+
const reasons = [];
|
|
1955
|
+
let confidence = 0.5;
|
|
1956
|
+
let text = normalized;
|
|
1957
|
+
let honorific;
|
|
1958
|
+
let nickname;
|
|
1959
|
+
let suffix;
|
|
1960
|
+
const honorificMatch = text.match(HONORIFIC_RE);
|
|
1961
|
+
if (honorificMatch) {
|
|
1962
|
+
honorific = honorificMatch[0].trim();
|
|
1963
|
+
text = text.slice(honorificMatch[0].length).trim();
|
|
1964
|
+
reasons.push("PERSON_HAS_HONORIFIC");
|
|
1965
|
+
confidence = 0.75;
|
|
1966
|
+
}
|
|
1967
|
+
let fullGiven;
|
|
1968
|
+
const parenResult = extractParenContent(text);
|
|
1969
|
+
if (parenResult) {
|
|
1970
|
+
fullGiven = parenResult.paren;
|
|
1971
|
+
text = parenResult.main;
|
|
1972
|
+
reasons.push("HAS_PAREN_ANNOTATION");
|
|
1973
|
+
}
|
|
1974
|
+
const quoteMatch = text.match(/[""']([^""']+)[""']/);
|
|
1975
|
+
if (quoteMatch) {
|
|
1976
|
+
nickname = quoteMatch[1].trim();
|
|
1977
|
+
text = text.replace(quoteMatch[0], " ").replace(/\s+/g, " ").trim();
|
|
1978
|
+
}
|
|
1979
|
+
const commaIdx = text.lastIndexOf(",");
|
|
1980
|
+
if (commaIdx > 0) {
|
|
1981
|
+
const afterComma = text.slice(commaIdx + 1).trim();
|
|
1982
|
+
const firstWord = afterComma.split(/\s+/)[0];
|
|
1983
|
+
if (isKnownSuffix(firstWord)) {
|
|
1984
|
+
suffix = afterComma;
|
|
1985
|
+
text = text.slice(0, commaIdx).trim();
|
|
1986
|
+
reasons.push("PERSON_HAS_SUFFIX");
|
|
1987
|
+
confidence = Math.max(confidence, 0.75);
|
|
1988
|
+
}
|
|
1989
|
+
}
|
|
1990
|
+
const tokens = tokenize(text);
|
|
1991
|
+
while (tokens.length > 1) {
|
|
1992
|
+
const lastToken = tokens[tokens.length - 1];
|
|
1993
|
+
if (isKnownSuffix(lastToken)) {
|
|
1994
|
+
suffix = suffix ? `${lastToken}, ${suffix}` : lastToken;
|
|
1995
|
+
tokens.pop();
|
|
1996
|
+
if (!reasons.includes("PERSON_HAS_SUFFIX")) {
|
|
1997
|
+
reasons.push("PERSON_HAS_SUFFIX");
|
|
1998
|
+
}
|
|
1999
|
+
} else {
|
|
2000
|
+
break;
|
|
2001
|
+
}
|
|
2002
|
+
}
|
|
2003
|
+
if (tokens.length === 0) {
|
|
2004
|
+
return { isPerson: false, confidence: 0, reasons: [] };
|
|
2005
|
+
}
|
|
2006
|
+
reasons.push("PERSON_STANDARD_FORMAT");
|
|
2007
|
+
let given;
|
|
2008
|
+
let middle;
|
|
2009
|
+
let family;
|
|
2010
|
+
if (tokens.length === 1) {
|
|
2011
|
+
given = tokens[0];
|
|
2012
|
+
reasons.push("AMBIGUOUS_SHORT_NAME");
|
|
2013
|
+
} else {
|
|
2014
|
+
given = tokens[0];
|
|
2015
|
+
family = tokens[tokens.length - 1];
|
|
2016
|
+
if (tokens.length > 2) {
|
|
2017
|
+
middle = tokens.slice(1, -1).join(" ");
|
|
2018
|
+
}
|
|
2019
|
+
confidence = Math.max(confidence, 0.75);
|
|
2020
|
+
}
|
|
2021
|
+
const particles = [];
|
|
2022
|
+
if (middle) {
|
|
2023
|
+
const middleTokens = middle.split(/\s+/);
|
|
2024
|
+
for (const t of middleTokens) {
|
|
2025
|
+
if (isParticle(t)) {
|
|
2026
|
+
particles.push(t);
|
|
2027
|
+
}
|
|
2028
|
+
}
|
|
2029
|
+
}
|
|
2030
|
+
if (family) {
|
|
2031
|
+
const famParticles = extractParticles(family.split(/\s+/));
|
|
2032
|
+
for (const p of famParticles) {
|
|
2033
|
+
if (!particles.includes(p)) {
|
|
2034
|
+
particles.push(p);
|
|
2035
|
+
}
|
|
2036
|
+
}
|
|
2037
|
+
}
|
|
2038
|
+
return {
|
|
2039
|
+
isPerson: true,
|
|
2040
|
+
confidence,
|
|
2041
|
+
reasons,
|
|
2042
|
+
entity: {
|
|
2043
|
+
kind: "person",
|
|
2044
|
+
honorific,
|
|
2045
|
+
given,
|
|
2046
|
+
fullGiven,
|
|
2047
|
+
middle,
|
|
2048
|
+
family,
|
|
2049
|
+
suffix,
|
|
2050
|
+
nickname,
|
|
2051
|
+
particles: particles.length > 0 ? particles : void 0,
|
|
2052
|
+
reversed: false
|
|
2053
|
+
}
|
|
2054
|
+
};
|
|
2055
|
+
}
|
|
2056
|
+
function detectPerson(normalized) {
|
|
2057
|
+
const reversedResult = tryParseReversed(normalized);
|
|
2058
|
+
if (reversedResult) {
|
|
2059
|
+
return reversedResult;
|
|
2060
|
+
}
|
|
2061
|
+
return parseStandardFormat(normalized);
|
|
2062
|
+
}
|
|
2063
|
+
function buildPersonEntity(result, raw, normalized, locale = "en") {
|
|
2064
|
+
const meta = {
|
|
2065
|
+
raw,
|
|
2066
|
+
normalized,
|
|
2067
|
+
confidence: result.confidence,
|
|
2068
|
+
reasons: result.reasons,
|
|
2069
|
+
locale
|
|
2070
|
+
};
|
|
2071
|
+
return {
|
|
2072
|
+
kind: "person",
|
|
2073
|
+
honorific: result.entity?.honorific,
|
|
2074
|
+
given: result.entity?.given,
|
|
2075
|
+
fullGiven: result.entity?.fullGiven,
|
|
2076
|
+
middle: result.entity?.middle,
|
|
2077
|
+
family: result.entity?.family,
|
|
2078
|
+
suffix: result.entity?.suffix,
|
|
2079
|
+
nickname: result.entity?.nickname,
|
|
2080
|
+
particles: result.entity?.particles,
|
|
2081
|
+
reversed: result.entity?.reversed,
|
|
2082
|
+
meta
|
|
2083
|
+
};
|
|
2084
|
+
}
|
|
2085
|
+
|
|
2086
|
+
// src/classifier.ts
|
|
2087
|
+
function classifyName(input, options = {}) {
|
|
2088
|
+
const raw = input;
|
|
2089
|
+
const locale = options.locale ?? "en";
|
|
2090
|
+
if (!input || typeof input !== "string" || !input.trim()) {
|
|
2091
|
+
return buildUnknown("", "", locale, [], "person");
|
|
2092
|
+
}
|
|
2093
|
+
let normalized = normalizeInput(input);
|
|
2094
|
+
const reasons = [];
|
|
2095
|
+
const angleBracketResult = extractAngleBrackets(normalized);
|
|
2096
|
+
if (angleBracketResult) {
|
|
2097
|
+
normalized = angleBracketResult.display || normalized;
|
|
2098
|
+
if (hasAtSymbol(angleBracketResult.bracket)) {
|
|
2099
|
+
reasons.push("HAS_EMAIL_OR_HANDLE");
|
|
2100
|
+
}
|
|
2101
|
+
}
|
|
2102
|
+
if (hasAtSymbol(normalized)) {
|
|
2103
|
+
reasons.push("HAS_EMAIL_OR_HANDLE");
|
|
2104
|
+
return applyStrict(buildUnknown(raw, normalized, locale, reasons), options);
|
|
2105
|
+
}
|
|
2106
|
+
if (isAllCaps(normalized)) {
|
|
2107
|
+
reasons.push("HAS_ALLCAPS");
|
|
2108
|
+
}
|
|
2109
|
+
const orgResult = detectOrganization(normalized, raw);
|
|
2110
|
+
if (orgResult.isOrg) {
|
|
2111
|
+
const entity = buildOrganizationEntity(orgResult, raw, normalized, locale);
|
|
2112
|
+
return applyStrict(entity, options);
|
|
2113
|
+
}
|
|
2114
|
+
const compoundResult = detectCompound(normalized);
|
|
2115
|
+
if (compoundResult.isCompound) {
|
|
2116
|
+
const entity = buildCompoundEntity(compoundResult, raw, normalized, locale);
|
|
2117
|
+
return applyStrict(entity, options);
|
|
2118
|
+
}
|
|
2119
|
+
const familyResult = detectFamily(normalized);
|
|
2120
|
+
if (familyResult.isFamily) {
|
|
2121
|
+
const entity = buildFamilyEntity(familyResult, raw, normalized, locale);
|
|
2122
|
+
return applyStrict(entity, options);
|
|
2123
|
+
}
|
|
2124
|
+
const personResult = detectPerson(normalized);
|
|
2125
|
+
if (personResult.isPerson) {
|
|
2126
|
+
const entity = buildPersonEntity(personResult, raw, normalized, locale);
|
|
2127
|
+
return applyStrict(entity, options);
|
|
2128
|
+
}
|
|
2129
|
+
return applyStrict(buildUnknown(raw, normalized, locale, reasons, guessType(normalized)), options);
|
|
2130
|
+
}
|
|
2131
|
+
function guessType(text) {
|
|
2132
|
+
if (text.length < 20 && /^[A-Z][a-z]+(\s+[A-Z][a-z]+)*$/.test(text)) {
|
|
2133
|
+
return "person";
|
|
2134
|
+
}
|
|
2135
|
+
if (/\b(corp|company|group|holdings|services|consulting)\b/i.test(text)) {
|
|
2136
|
+
return "organization";
|
|
2137
|
+
}
|
|
2138
|
+
return void 0;
|
|
2139
|
+
}
|
|
2140
|
+
function buildUnknown(raw, normalized, locale, reasons, guess) {
|
|
2141
|
+
const meta = {
|
|
2142
|
+
raw,
|
|
2143
|
+
normalized,
|
|
2144
|
+
confidence: 0.25,
|
|
2145
|
+
reasons,
|
|
2146
|
+
locale
|
|
2147
|
+
};
|
|
2148
|
+
return {
|
|
2149
|
+
kind: "unknown",
|
|
2150
|
+
text: normalized || raw,
|
|
2151
|
+
guess,
|
|
2152
|
+
meta
|
|
2153
|
+
};
|
|
2154
|
+
}
|
|
2155
|
+
function applyStrict(entity, options) {
|
|
2156
|
+
if (options.strictKind === "person" && entity.kind !== "person") {
|
|
2157
|
+
const meta = {
|
|
2158
|
+
...entity.meta,
|
|
2159
|
+
confidence: 1,
|
|
2160
|
+
reasons: [...entity.meta.reasons]
|
|
2161
|
+
};
|
|
2162
|
+
const rejected = {
|
|
2163
|
+
kind: "rejected",
|
|
2164
|
+
rejectedAs: entity.kind === "rejected" ? "unknown" : entity.kind,
|
|
2165
|
+
meta
|
|
2166
|
+
};
|
|
2167
|
+
return rejected;
|
|
2168
|
+
}
|
|
2169
|
+
return entity;
|
|
2170
|
+
}
|
|
2171
|
+
function isPerson(entity) {
|
|
2172
|
+
return entity.kind === "person";
|
|
2173
|
+
}
|
|
2174
|
+
function isOrganization(entity) {
|
|
2175
|
+
return entity.kind === "organization";
|
|
2176
|
+
}
|
|
2177
|
+
function isFamily(entity) {
|
|
2178
|
+
return entity.kind === "family" || entity.kind === "household";
|
|
2179
|
+
}
|
|
2180
|
+
function isCompound(entity) {
|
|
2181
|
+
return entity.kind === "compound";
|
|
2182
|
+
}
|
|
2183
|
+
function isUnknown(entity) {
|
|
2184
|
+
return entity.kind === "unknown";
|
|
2185
|
+
}
|
|
2186
|
+
function isRejected(entity) {
|
|
2187
|
+
return entity.kind === "rejected";
|
|
2188
|
+
}
|
|
2189
|
+
|
|
2190
|
+
// src/parsers.ts
|
|
2191
|
+
function parseName(input, options) {
|
|
2192
|
+
return classifyName(input, options);
|
|
2193
|
+
}
|
|
2194
|
+
function parsePersonName(fullName) {
|
|
2195
|
+
if (!fullName || typeof fullName !== "string") {
|
|
2196
|
+
throw new Error("Invalid name: expected non-empty string");
|
|
2197
|
+
}
|
|
2198
|
+
let text = fullName.trim();
|
|
2199
|
+
const result = {};
|
|
2200
|
+
text = extractNickname(text, result);
|
|
2201
|
+
text = extractSuffixes(text, result);
|
|
2202
|
+
let parts = text.split(/\s+/);
|
|
2203
|
+
parts = extractPrefixes(parts, result);
|
|
2204
|
+
assignNameParts(parts, result);
|
|
2205
|
+
if (!result.first && !result.last) {
|
|
2206
|
+
throw new Error("Invalid name: no name parts found after parsing");
|
|
2207
|
+
}
|
|
2208
|
+
result.prefixTokens = buildAffixTokens(result.prefix, "prefix");
|
|
2209
|
+
result.suffixTokens = buildAffixTokens(result.suffix, "suffix");
|
|
2210
|
+
deriveFamilyParticle(result);
|
|
2211
|
+
derivePreferredGiven(result);
|
|
2212
|
+
deriveSortHelpers(result);
|
|
2213
|
+
return result;
|
|
2214
|
+
}
|
|
2215
|
+
function extractNickname(text, result) {
|
|
2216
|
+
let workingText = text;
|
|
2217
|
+
const quoteMatch = workingText.match(/["']([^"']+)["']/);
|
|
2218
|
+
if (quoteMatch) {
|
|
2219
|
+
result.nickname = quoteMatch[1].trim();
|
|
2220
|
+
workingText = workingText.replace(quoteMatch[0], " ").replace(/\s+/g, " ").trim();
|
|
2221
|
+
}
|
|
2222
|
+
const parenMatch = workingText.match(/[\(\[]([^)\]]+)[\)\]]/);
|
|
2223
|
+
if (parenMatch) {
|
|
2224
|
+
result.fullGiven = parenMatch[1].trim();
|
|
2225
|
+
workingText = workingText.replace(parenMatch[0], " ").replace(/\s+/g, " ").trim();
|
|
2226
|
+
}
|
|
2227
|
+
return workingText;
|
|
2228
|
+
}
|
|
2229
|
+
function extractSuffixes(text, result) {
|
|
2230
|
+
let workingText = text;
|
|
2231
|
+
const suffixesFound = [];
|
|
2232
|
+
const looksLikeKnownOrHeuristicSuffix = (value) => {
|
|
2233
|
+
const tokens = buildAffixTokens(value, "suffix");
|
|
2234
|
+
return !!tokens && tokens.length > 0 && tokens.some((t) => t.type !== "other");
|
|
2235
|
+
};
|
|
2236
|
+
const looksLikeUnknownPostNominalChunk = (value) => {
|
|
2237
|
+
const v = value.trim().replace(/^[,;:\s]+/, "").replace(/[,;:\s]+$/, "");
|
|
2238
|
+
if (!v) return false;
|
|
2239
|
+
if (v.length > 18) return false;
|
|
2240
|
+
if (/\d/.test(v)) return false;
|
|
2241
|
+
if (/[^\p{L}.\-\s]/u.test(v)) return false;
|
|
2242
|
+
if (!/[.]/.test(v) && !/[A-Z]/.test(v)) return false;
|
|
2243
|
+
const lettersOnly = v.normalize("NFKD").replace(/[\u0300-\u036f]/g, "").replace(/[.\-\s]/g, "");
|
|
2244
|
+
if (!/^[A-Za-z]+$/.test(lettersOnly)) return false;
|
|
2245
|
+
if (lettersOnly.length < 2 || lettersOnly.length > 10) return false;
|
|
2246
|
+
const upperCount = (lettersOnly.match(/[A-Z]/g) ?? []).length;
|
|
2247
|
+
if (!/[.]/.test(v) && upperCount / lettersOnly.length < 0.7) return false;
|
|
2248
|
+
return true;
|
|
2249
|
+
};
|
|
2250
|
+
const parts = workingText.split(",");
|
|
2251
|
+
while (parts.length > 1) {
|
|
2252
|
+
const lastPart = parts[parts.length - 1].trim();
|
|
2253
|
+
const firstWordOfLast = lastPart.split(/\s+/)[0];
|
|
2254
|
+
if (looksLikeKnownOrHeuristicSuffix(firstWordOfLast) || looksLikeKnownOrHeuristicSuffix(lastPart) || looksLikeUnknownPostNominalChunk(lastPart) || /queen|king|consort/i.test(lastPart)) {
|
|
2255
|
+
suffixesFound.unshift(lastPart);
|
|
2256
|
+
parts.pop();
|
|
2257
|
+
} else {
|
|
2258
|
+
break;
|
|
2259
|
+
}
|
|
2260
|
+
}
|
|
2261
|
+
workingText = parts.join(",").trim();
|
|
2262
|
+
const spaceParts = workingText.split(/\s+/);
|
|
2263
|
+
const spaceSuffixes = [];
|
|
2264
|
+
while (spaceParts.length > 1) {
|
|
2265
|
+
const lastWord = spaceParts[spaceParts.length - 1];
|
|
2266
|
+
const cleanWord = lastWord.replace(/[,]$/, "");
|
|
2267
|
+
if (looksLikeKnownOrHeuristicSuffix(cleanWord)) {
|
|
2268
|
+
spaceSuffixes.unshift(lastWord);
|
|
2269
|
+
spaceParts.pop();
|
|
2270
|
+
} else {
|
|
2271
|
+
break;
|
|
2272
|
+
}
|
|
2273
|
+
}
|
|
2274
|
+
const allSuffixes = [...spaceSuffixes, ...suffixesFound];
|
|
2275
|
+
if (allSuffixes.length > 0) {
|
|
2276
|
+
result.suffix = allSuffixes.join(", ");
|
|
2277
|
+
workingText = spaceParts.join(" ").trim();
|
|
2278
|
+
}
|
|
2279
|
+
return workingText;
|
|
2280
|
+
}
|
|
2281
|
+
function extractPrefixes(parts, result) {
|
|
2282
|
+
const prefixesFound = [];
|
|
2283
|
+
const looksLikePrefix = (value) => {
|
|
2284
|
+
const tokens = buildAffixTokens(value, "prefix");
|
|
2285
|
+
return !!tokens && tokens.length > 0 && tokens.every((t) => t.type !== "other");
|
|
2286
|
+
};
|
|
2287
|
+
while (parts.length > 1) {
|
|
2288
|
+
let matchFound = false;
|
|
2289
|
+
for (let len = Math.min(parts.length - 1, 5); len >= 1; len--) {
|
|
2290
|
+
const candidate = parts.slice(0, len).join(" ");
|
|
2291
|
+
if (looksLikePrefix(candidate)) {
|
|
2292
|
+
prefixesFound.push(candidate);
|
|
2293
|
+
parts.splice(0, len);
|
|
2294
|
+
matchFound = true;
|
|
2295
|
+
break;
|
|
2296
|
+
}
|
|
2297
|
+
}
|
|
2298
|
+
if (!matchFound) {
|
|
2299
|
+
break;
|
|
2300
|
+
}
|
|
2301
|
+
}
|
|
2302
|
+
if (prefixesFound.length > 0) {
|
|
2303
|
+
result.prefix = prefixesFound.join(" ");
|
|
2304
|
+
}
|
|
2305
|
+
return parts;
|
|
2306
|
+
}
|
|
2307
|
+
function assignNameParts(parts, result) {
|
|
2308
|
+
if (parts.length === 0) return;
|
|
2309
|
+
if (parts.length === 1) {
|
|
2310
|
+
result.first = parts[0];
|
|
2311
|
+
return;
|
|
2312
|
+
}
|
|
2313
|
+
let surnameStartIndex = parts.length - 1;
|
|
2314
|
+
for (let i = parts.length - 2; i >= 0; i--) {
|
|
2315
|
+
const word = parts[i];
|
|
2316
|
+
if (isParticle(word)) {
|
|
2317
|
+
surnameStartIndex = i;
|
|
2318
|
+
continue;
|
|
2319
|
+
}
|
|
2320
|
+
if (isCommonSurname(word) && !isCommonFirstName(word) && i > 0) {
|
|
2321
|
+
surnameStartIndex = i;
|
|
2322
|
+
continue;
|
|
2323
|
+
}
|
|
2324
|
+
break;
|
|
2325
|
+
}
|
|
2326
|
+
if (surnameStartIndex === 0) {
|
|
2327
|
+
result.last = parts.join(" ");
|
|
2328
|
+
} else {
|
|
2329
|
+
result.first = parts[0];
|
|
2330
|
+
if (surnameStartIndex > 1) {
|
|
2331
|
+
result.middle = parts.slice(1, surnameStartIndex).join(" ");
|
|
2332
|
+
}
|
|
2333
|
+
result.last = parts.slice(surnameStartIndex).join(" ");
|
|
2334
|
+
}
|
|
2335
|
+
}
|
|
2336
|
+
var FAMILY_PARTICLE_PHRASES = [
|
|
2337
|
+
// multi-word (check first)
|
|
2338
|
+
"de la",
|
|
2339
|
+
"de los",
|
|
2340
|
+
"de las",
|
|
2341
|
+
"van der",
|
|
2342
|
+
"van den",
|
|
2343
|
+
"van de",
|
|
2344
|
+
// single-word
|
|
2345
|
+
"de",
|
|
2346
|
+
"del",
|
|
2347
|
+
"da",
|
|
2348
|
+
"dos",
|
|
2349
|
+
"di",
|
|
2350
|
+
"van",
|
|
2351
|
+
"von",
|
|
2352
|
+
"al",
|
|
2353
|
+
"el",
|
|
2354
|
+
"bin",
|
|
2355
|
+
"ibn"
|
|
2356
|
+
];
|
|
2357
|
+
function deriveFamilyParticle(result) {
|
|
2358
|
+
const last = result.last?.trim();
|
|
2359
|
+
if (!last) return;
|
|
2360
|
+
const words = last.split(/\s+/).filter(Boolean);
|
|
2361
|
+
if (words.length < 2) return;
|
|
2362
|
+
const lowerWords = words.map((w) => w.toLowerCase());
|
|
2363
|
+
const candidates = [...FAMILY_PARTICLE_PHRASES].sort((a, b) => b.split(" ").length - a.split(" ").length);
|
|
2364
|
+
for (const phrase of candidates) {
|
|
2365
|
+
const pWords = phrase.split(" ");
|
|
2366
|
+
if (pWords.length >= words.length) continue;
|
|
2367
|
+
const matches = pWords.every((pw, idx) => lowerWords[idx] === pw);
|
|
2368
|
+
if (!matches) continue;
|
|
2369
|
+
const particleOriginal = words.slice(0, pWords.length).join(" ");
|
|
2370
|
+
const remainderWords = words.slice(pWords.length);
|
|
2371
|
+
if (remainderWords.length === 0) return;
|
|
2372
|
+
result.familyParticle = particleOriginal;
|
|
2373
|
+
result.familyParts = remainderWords;
|
|
2374
|
+
result.familyParticleBehavior = "localeDefault";
|
|
2375
|
+
return;
|
|
2376
|
+
}
|
|
2377
|
+
}
|
|
2378
|
+
function derivePreferredGiven(result) {
|
|
2379
|
+
if (result.preferredGiven) return;
|
|
2380
|
+
const nick = result.nickname?.trim();
|
|
2381
|
+
if (!nick) return;
|
|
2382
|
+
result.preferredGiven = nick.replace(/^[\"'\(\[]+/, "").replace(/[\"'\)\]]+$/, "").trim() || void 0;
|
|
2383
|
+
}
|
|
2384
|
+
function deriveSortHelpers(result) {
|
|
2385
|
+
const last = result.last?.trim();
|
|
2386
|
+
const first = result.first?.trim();
|
|
2387
|
+
const middle = result.middle?.trim();
|
|
2388
|
+
let display = "";
|
|
2389
|
+
if (last && first) {
|
|
2390
|
+
display = `${last}, ${first}${middle ? ` ${middle}` : ""}`;
|
|
2391
|
+
} else if (last) {
|
|
2392
|
+
display = last;
|
|
2393
|
+
} else if (first) {
|
|
2394
|
+
display = `${first}${middle ? ` ${middle}` : ""}`;
|
|
2395
|
+
}
|
|
2396
|
+
const identitySuffix = result.suffixTokens?.filter((t) => t.type === "generational" || t.type === "dynasticNumber").map((t) => t.value).filter(Boolean).join(", ");
|
|
2397
|
+
if (display && identitySuffix) {
|
|
2398
|
+
display = `${display}, ${identitySuffix}`;
|
|
2399
|
+
}
|
|
2400
|
+
if (!display) return;
|
|
2401
|
+
const key = display.normalize("NFKD").replace(/[\u0300-\u036f]/g, "").toLowerCase().replace(/[^a-z0-9\s]/g, " ").replace(/\s+/g, " ").trim();
|
|
2402
|
+
result.sort = { display, key };
|
|
2403
|
+
}
|
|
2404
|
+
function getFirstName(fullName) {
|
|
2405
|
+
return parsePersonName(fullName).first;
|
|
2406
|
+
}
|
|
2407
|
+
function getLastName(fullName) {
|
|
2408
|
+
return parsePersonName(fullName).last;
|
|
2409
|
+
}
|
|
2410
|
+
function getNickname(fullName) {
|
|
2411
|
+
return parsePersonName(fullName).nickname;
|
|
2412
|
+
}
|
|
2413
|
+
function entityToLegacy(entity) {
|
|
2414
|
+
if (entity.kind !== "person") {
|
|
2415
|
+
return null;
|
|
2416
|
+
}
|
|
2417
|
+
const person = entity;
|
|
2418
|
+
const result = {};
|
|
2419
|
+
if (person.honorific) result.prefix = person.honorific;
|
|
2420
|
+
if (person.given) result.first = person.given;
|
|
2421
|
+
if (person.fullGiven) result.fullGiven = person.fullGiven;
|
|
2422
|
+
if (person.middle) result.middle = person.middle;
|
|
2423
|
+
if (person.family) result.last = person.family;
|
|
2424
|
+
if (person.suffix) result.suffix = person.suffix;
|
|
2425
|
+
if (person.nickname) result.nickname = person.nickname;
|
|
2426
|
+
result.prefixTokens = buildAffixTokens(result.prefix, "prefix");
|
|
2427
|
+
result.suffixTokens = buildAffixTokens(result.suffix, "suffix");
|
|
2428
|
+
return result;
|
|
2429
|
+
}
|
|
2430
|
+
|
|
2431
|
+
// src/email-extractor.ts
|
|
2432
|
+
var EMAIL_RE = /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/;
|
|
2433
|
+
var ANGLE_BRACKET_RE = /^(.*?)\s*<([^>]+)>\s*$/;
|
|
2434
|
+
var PAREN_EMAIL_RE = /^([^(]+)\s*\(([^)]+)\)\s*$/;
|
|
2435
|
+
var MAILTO_RE = /\[mailto:([^\]]+)\]/i;
|
|
2436
|
+
var SMTP_RE = /<SMTP:([^>]+)>/i;
|
|
2437
|
+
var X500_RE = /\/O=[^/]+\/.*\/CN=([^/\s]+)/i;
|
|
2438
|
+
function normalizeEmail(email) {
|
|
2439
|
+
return email.toLowerCase().replace(/^mailto:/i, "").replace(/^smtp:/i, "").trim();
|
|
2440
|
+
}
|
|
2441
|
+
function extractEmail(text) {
|
|
2442
|
+
const trimmed = text.trim();
|
|
2443
|
+
if (!trimmed) {
|
|
2444
|
+
return null;
|
|
2445
|
+
}
|
|
2446
|
+
const angleMatch = trimmed.match(ANGLE_BRACKET_RE);
|
|
2447
|
+
if (angleMatch) {
|
|
2448
|
+
const display = angleMatch[1].trim();
|
|
2449
|
+
const bracket = angleMatch[2].trim();
|
|
2450
|
+
if (EMAIL_RE.test(bracket)) {
|
|
2451
|
+
return {
|
|
2452
|
+
displayName: unquoteDisplay(display),
|
|
2453
|
+
email: normalizeEmail(bracket),
|
|
2454
|
+
addressRaw: bracket
|
|
2455
|
+
};
|
|
2456
|
+
}
|
|
2457
|
+
const smtpMatch2 = bracket.match(/^SMTP:(.+)$/i);
|
|
2458
|
+
if (smtpMatch2) {
|
|
2459
|
+
return {
|
|
2460
|
+
displayName: unquoteDisplay(display),
|
|
2461
|
+
email: normalizeEmail(smtpMatch2[1]),
|
|
2462
|
+
addressRaw: bracket
|
|
2463
|
+
};
|
|
2464
|
+
}
|
|
2465
|
+
const x500Match = bracket.match(X500_RE);
|
|
2466
|
+
if (x500Match) {
|
|
2467
|
+
return {
|
|
2468
|
+
displayName: unquoteDisplay(display),
|
|
2469
|
+
email: normalizeEmail(x500Match[1]),
|
|
2470
|
+
addressRaw: bracket
|
|
2471
|
+
};
|
|
2472
|
+
}
|
|
2473
|
+
}
|
|
2474
|
+
const mailtoMatch = trimmed.match(MAILTO_RE);
|
|
2475
|
+
if (mailtoMatch) {
|
|
2476
|
+
const email = normalizeEmail(mailtoMatch[1]);
|
|
2477
|
+
const display = trimmed.replace(MAILTO_RE, "").trim();
|
|
2478
|
+
return {
|
|
2479
|
+
displayName: unquoteDisplay(display),
|
|
2480
|
+
email,
|
|
2481
|
+
addressRaw: mailtoMatch[0]
|
|
2482
|
+
};
|
|
2483
|
+
}
|
|
2484
|
+
const smtpMatch = trimmed.match(SMTP_RE);
|
|
2485
|
+
if (smtpMatch) {
|
|
2486
|
+
const email = normalizeEmail(smtpMatch[1]);
|
|
2487
|
+
const display = trimmed.replace(SMTP_RE, "").trim();
|
|
2488
|
+
return {
|
|
2489
|
+
displayName: unquoteDisplay(display),
|
|
2490
|
+
email,
|
|
2491
|
+
addressRaw: smtpMatch[0]
|
|
2492
|
+
};
|
|
2493
|
+
}
|
|
2494
|
+
const parenMatch = trimmed.match(PAREN_EMAIL_RE);
|
|
2495
|
+
if (parenMatch) {
|
|
2496
|
+
const beforeParen = parenMatch[1].trim();
|
|
2497
|
+
const inParen = parenMatch[2].trim();
|
|
2498
|
+
if (EMAIL_RE.test(beforeParen)) {
|
|
2499
|
+
return {
|
|
2500
|
+
displayName: inParen,
|
|
2501
|
+
email: normalizeEmail(beforeParen),
|
|
2502
|
+
addressRaw: beforeParen
|
|
2503
|
+
};
|
|
2504
|
+
}
|
|
2505
|
+
}
|
|
2506
|
+
const bareEmailMatch = trimmed.match(EMAIL_RE);
|
|
2507
|
+
if (bareEmailMatch) {
|
|
2508
|
+
const email = normalizeEmail(bareEmailMatch[0]);
|
|
2509
|
+
const display = trimmed.replace(EMAIL_RE, "").trim();
|
|
2510
|
+
return {
|
|
2511
|
+
displayName: display,
|
|
2512
|
+
email,
|
|
2513
|
+
addressRaw: bareEmailMatch[0]
|
|
2514
|
+
};
|
|
2515
|
+
}
|
|
2516
|
+
return null;
|
|
2517
|
+
}
|
|
2518
|
+
function unquoteDisplay(display) {
|
|
2519
|
+
let result = display.trim();
|
|
2520
|
+
if (result.startsWith('"') && result.endsWith('"')) {
|
|
2521
|
+
result = result.slice(1, -1);
|
|
2522
|
+
}
|
|
2523
|
+
if (result.startsWith("'") && result.endsWith("'")) {
|
|
2524
|
+
result = result.slice(1, -1);
|
|
2525
|
+
}
|
|
2526
|
+
return result.trim();
|
|
2527
|
+
}
|
|
2528
|
+
function hasEmail(text) {
|
|
2529
|
+
return EMAIL_RE.test(text);
|
|
2530
|
+
}
|
|
2531
|
+
|
|
2532
|
+
// src/list-parser.ts
|
|
2533
|
+
function splitRecipients(input) {
|
|
2534
|
+
const results = [];
|
|
2535
|
+
let current = "";
|
|
2536
|
+
let inQuotes = false;
|
|
2537
|
+
let inAngleBrackets = false;
|
|
2538
|
+
let quoteChar = "";
|
|
2539
|
+
for (let i = 0; i < input.length; i++) {
|
|
2540
|
+
const char = input[i];
|
|
2541
|
+
const nextChar = input[i + 1];
|
|
2542
|
+
if ((char === '"' || char === "'") && !inAngleBrackets) {
|
|
2543
|
+
if (!inQuotes) {
|
|
2544
|
+
inQuotes = true;
|
|
2545
|
+
quoteChar = char;
|
|
2546
|
+
} else if (char === quoteChar) {
|
|
2547
|
+
inQuotes = false;
|
|
2548
|
+
quoteChar = "";
|
|
2549
|
+
}
|
|
2550
|
+
}
|
|
2551
|
+
if (char === "<" && !inQuotes) {
|
|
2552
|
+
inAngleBrackets = true;
|
|
2553
|
+
} else if (char === ">" && !inQuotes) {
|
|
2554
|
+
inAngleBrackets = false;
|
|
2555
|
+
}
|
|
2556
|
+
if (!inQuotes && !inAngleBrackets) {
|
|
2557
|
+
if (char === ";") {
|
|
2558
|
+
const trimmed2 = current.trim();
|
|
2559
|
+
if (trimmed2) {
|
|
2560
|
+
results.push(trimmed2);
|
|
2561
|
+
}
|
|
2562
|
+
current = "";
|
|
2563
|
+
continue;
|
|
2564
|
+
}
|
|
2565
|
+
if (char === "\n") {
|
|
2566
|
+
const trimmed2 = current.trim();
|
|
2567
|
+
if (trimmed2) {
|
|
2568
|
+
results.push(trimmed2);
|
|
2569
|
+
}
|
|
2570
|
+
current = "";
|
|
2571
|
+
continue;
|
|
2572
|
+
}
|
|
2573
|
+
if (char === ",") {
|
|
2574
|
+
if (!isReversedNameComma(current, input.slice(i + 1))) {
|
|
2575
|
+
const trimmed2 = current.trim();
|
|
2576
|
+
if (trimmed2) {
|
|
2577
|
+
results.push(trimmed2);
|
|
2578
|
+
}
|
|
2579
|
+
current = "";
|
|
2580
|
+
continue;
|
|
2581
|
+
}
|
|
2582
|
+
}
|
|
2583
|
+
}
|
|
2584
|
+
current += char;
|
|
2585
|
+
}
|
|
2586
|
+
const trimmed = current.trim();
|
|
2587
|
+
if (trimmed) {
|
|
2588
|
+
results.push(trimmed);
|
|
2589
|
+
}
|
|
2590
|
+
return results.map((r) => {
|
|
2591
|
+
return r.replace(/^(To|Cc|Bcc|From):\s*/i, "").trim();
|
|
2592
|
+
}).filter(Boolean);
|
|
2593
|
+
}
|
|
2594
|
+
var SUFFIX_PATTERN = /^(Jr\.?|Sr\.?|II|III|IV|V|VI|VII|VIII|Esq\.?|Ph\.?D\.?|M\.?D\.?|D\.?D\.?S\.?|D\.?O\.?|R\.?N\.?|CPA|MBA|JD|LLD|DDS|DO|RN)$/i;
|
|
2595
|
+
function isReversedNameComma(before, after) {
|
|
2596
|
+
const beforeTrimmed = before.trim();
|
|
2597
|
+
const afterTrimmed = after.trim();
|
|
2598
|
+
if (!beforeTrimmed) return false;
|
|
2599
|
+
const beforeTokens = beforeTrimmed.split(/[\s,]+/).filter(Boolean);
|
|
2600
|
+
if (hasEmail(afterTrimmed.split(/[,;\r\n]/)[0])) {
|
|
2601
|
+
return false;
|
|
2602
|
+
}
|
|
2603
|
+
const exactNextChunk = afterTrimmed.split(/[,;\r\n]/)[0].trim();
|
|
2604
|
+
if (beforeTokens.length > 0 && matchLegalForm(exactNextChunk)) {
|
|
2605
|
+
return true;
|
|
2606
|
+
}
|
|
2607
|
+
const afterTokens = afterTrimmed.split(/[\s,;\r\n]+/).filter(Boolean);
|
|
2608
|
+
const firstAfter = afterTokens[0];
|
|
2609
|
+
if (!firstAfter) return false;
|
|
2610
|
+
if (SUFFIX_PATTERN.test(firstAfter)) {
|
|
2611
|
+
return true;
|
|
2612
|
+
}
|
|
2613
|
+
if (beforeTokens.length <= 3) {
|
|
2614
|
+
const lastBeforeToken = beforeTokens[beforeTokens.length - 1];
|
|
2615
|
+
if (lastBeforeToken && matchLegalForm(lastBeforeToken)) {
|
|
2616
|
+
return false;
|
|
2617
|
+
}
|
|
2618
|
+
if (isNameLikeToken(firstAfter)) {
|
|
2619
|
+
if (beforeTokens.length >= 2 && afterTokens.length >= 2) {
|
|
2620
|
+
const firstBeforeToken = beforeTokens[0];
|
|
2621
|
+
if (isCommonFirstName(firstBeforeToken) && isCommonFirstName(firstAfter)) {
|
|
2622
|
+
return false;
|
|
2623
|
+
}
|
|
2624
|
+
if (isNameLikeToken(firstBeforeToken) && isNameLikeToken(beforeTokens[1]) && isNameLikeToken(firstAfter) && isNameLikeToken(afterTokens[1])) {
|
|
2625
|
+
if (isCommonFirstName(firstBeforeToken) || isCommonFirstName(firstAfter)) {
|
|
2626
|
+
return false;
|
|
2627
|
+
}
|
|
2628
|
+
}
|
|
2629
|
+
}
|
|
2630
|
+
const commaIdx = afterTrimmed.indexOf(",");
|
|
2631
|
+
if (commaIdx > 0 && commaIdx < 30) {
|
|
2632
|
+
const afterComma = afterTrimmed.slice(commaIdx + 1).trim();
|
|
2633
|
+
const nextWord = afterComma.split(/[\s,;\r\n]+/)[0];
|
|
2634
|
+
if (nextWord && SUFFIX_PATTERN.test(nextWord)) {
|
|
2635
|
+
return true;
|
|
2636
|
+
}
|
|
2637
|
+
const betweenCommas = afterTrimmed.slice(0, commaIdx).trim();
|
|
2638
|
+
const namePattern = /^[A-Z][a-z]+(\s+[A-Z]\.?)?$/;
|
|
2639
|
+
if (namePattern.test(betweenCommas)) {
|
|
2640
|
+
return true;
|
|
2641
|
+
}
|
|
2642
|
+
}
|
|
2643
|
+
return true;
|
|
2644
|
+
}
|
|
2645
|
+
}
|
|
2646
|
+
return false;
|
|
2647
|
+
}
|
|
2648
|
+
function parseNameList(input, options = {}) {
|
|
2649
|
+
if (!input || typeof input !== "string") {
|
|
2650
|
+
return [];
|
|
2651
|
+
}
|
|
2652
|
+
const recipients = splitRecipients(input);
|
|
2653
|
+
const results = [];
|
|
2654
|
+
for (const recipientRaw of recipients) {
|
|
2655
|
+
const reasons = [];
|
|
2656
|
+
const emailResult = extractEmail(recipientRaw);
|
|
2657
|
+
if (emailResult) {
|
|
2658
|
+
const displayName = emailResult.displayName;
|
|
2659
|
+
reasons.push("HAS_EMAIL_OR_HANDLE");
|
|
2660
|
+
if (displayName) {
|
|
2661
|
+
const entity = classifyName(displayName, options);
|
|
2662
|
+
results.push({
|
|
2663
|
+
raw: recipientRaw,
|
|
2664
|
+
display: entity,
|
|
2665
|
+
email: emailResult.email,
|
|
2666
|
+
addressRaw: emailResult.addressRaw,
|
|
2667
|
+
meta: {
|
|
2668
|
+
confidence: entity.meta.confidence,
|
|
2669
|
+
reasons: [...reasons, ...entity.meta.reasons],
|
|
2670
|
+
warnings: entity.meta.warnings
|
|
2671
|
+
}
|
|
2672
|
+
});
|
|
2673
|
+
} else {
|
|
2674
|
+
results.push({
|
|
2675
|
+
raw: recipientRaw,
|
|
2676
|
+
email: emailResult.email,
|
|
2677
|
+
addressRaw: emailResult.addressRaw,
|
|
2678
|
+
meta: {
|
|
2679
|
+
confidence: 0.5,
|
|
2680
|
+
reasons
|
|
2681
|
+
}
|
|
2682
|
+
});
|
|
2683
|
+
}
|
|
2684
|
+
} else {
|
|
2685
|
+
const entity = classifyName(recipientRaw, options);
|
|
2686
|
+
results.push({
|
|
2687
|
+
raw: recipientRaw,
|
|
2688
|
+
display: entity,
|
|
2689
|
+
meta: {
|
|
2690
|
+
confidence: entity.meta.confidence,
|
|
2691
|
+
reasons: entity.meta.reasons,
|
|
2692
|
+
warnings: entity.meta.warnings
|
|
2693
|
+
}
|
|
2694
|
+
});
|
|
2695
|
+
}
|
|
2696
|
+
}
|
|
2697
|
+
return results;
|
|
2698
|
+
}
|
|
2699
|
+
|
|
2700
|
+
// src/formatters.ts
|
|
2701
|
+
var DEFAULTS = {
|
|
2702
|
+
preset: "display",
|
|
2703
|
+
output: "text",
|
|
2704
|
+
typography: "ui",
|
|
2705
|
+
noBreak: "smart",
|
|
2706
|
+
join: "none",
|
|
2707
|
+
conjunction: "and",
|
|
2708
|
+
oxfordComma: true,
|
|
2709
|
+
shareLastName: "whenSame",
|
|
2710
|
+
sharePrefix: "auto",
|
|
2711
|
+
shareSuffix: "auto"
|
|
2712
|
+
};
|
|
2713
|
+
var PRESET_DEFAULTS = {
|
|
2714
|
+
display: { prefix: "omit", prefer: "auto", middle: "none", suffix: "auto", order: "given-family" },
|
|
2715
|
+
preferredDisplay: { prefix: "omit", prefer: "nickname", middle: "none", suffix: "auto", order: "given-family" },
|
|
2716
|
+
informal: { prefix: "omit", prefer: "first", middle: "none", suffix: "omit", order: "given-family" },
|
|
2717
|
+
firstOnly: { prefix: "omit", prefer: "first", middle: "none", suffix: "omit", order: "given-family" },
|
|
2718
|
+
preferredFirst: { prefix: "omit", prefer: "nickname", middle: "none", suffix: "omit", order: "given-family" },
|
|
2719
|
+
formalFull: { prefix: "include", prefer: "first", middle: "full", suffix: "include", order: "given-family" },
|
|
2720
|
+
formalShort: { prefix: "include", prefer: "first", middle: "none", suffix: "omit", order: "given-family" },
|
|
2721
|
+
expandedFull: { prefix: "include", prefer: "fullGiven", middle: "none", suffix: "include", order: "given-family" },
|
|
2722
|
+
alphabetical: { prefix: "omit", prefer: "first", middle: "initial", suffix: "auto", order: "family-given" },
|
|
2723
|
+
library: { prefix: "omit", prefer: "first", middle: "initial", suffix: "auto", order: "family-given" },
|
|
2724
|
+
initialed: { prefix: "omit", prefer: "first", middle: "initial", suffix: "omit", order: "given-family" }
|
|
2725
|
+
};
|
|
2726
|
+
function normalizeCollapseSpaces(value) {
|
|
2727
|
+
return value.trim().replace(/\s+/g, " ");
|
|
2728
|
+
}
|
|
2729
|
+
function normalizeTrim(value) {
|
|
2730
|
+
return value.trim();
|
|
2731
|
+
}
|
|
2732
|
+
function getSpaceTokens(output) {
|
|
2733
|
+
return output === "html" ? { SP: " ", NBSP: " ", NNBSP: " " } : { SP: " ", NBSP: "\xA0", NNBSP: "\u202F" };
|
|
2734
|
+
}
|
|
2735
|
+
function escapeForHtml(value) {
|
|
2736
|
+
return value.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'");
|
|
2737
|
+
}
|
|
2738
|
+
function sanitizePart(value, output) {
|
|
2739
|
+
if (!value) return value;
|
|
2740
|
+
return output === "html" ? escapeForHtml(value) : value;
|
|
2741
|
+
}
|
|
2742
|
+
function sanitizeParsedName(parsed) {
|
|
2743
|
+
const e = escapeForHtml;
|
|
2744
|
+
return {
|
|
2745
|
+
...parsed,
|
|
2746
|
+
prefix: parsed.prefix ? e(parsed.prefix) : void 0,
|
|
2747
|
+
first: parsed.first ? e(parsed.first) : void 0,
|
|
2748
|
+
fullGiven: parsed.fullGiven ? e(parsed.fullGiven) : void 0,
|
|
2749
|
+
middle: parsed.middle ? e(parsed.middle) : void 0,
|
|
2750
|
+
last: parsed.last ? e(parsed.last) : void 0,
|
|
2751
|
+
suffix: parsed.suffix ? e(parsed.suffix) : void 0,
|
|
2752
|
+
nickname: parsed.nickname ? e(parsed.nickname) : void 0,
|
|
2753
|
+
preferredGiven: parsed.preferredGiven ? e(parsed.preferredGiven) : void 0
|
|
2754
|
+
};
|
|
2755
|
+
}
|
|
2756
|
+
function resolveOptions(options) {
|
|
2757
|
+
const preset = options?.preset ?? DEFAULTS.preset;
|
|
2758
|
+
const base = PRESET_DEFAULTS[preset];
|
|
2759
|
+
return {
|
|
2760
|
+
preset,
|
|
2761
|
+
output: options?.output ?? DEFAULTS.output,
|
|
2762
|
+
typography: options?.typography ?? DEFAULTS.typography,
|
|
2763
|
+
noBreak: options?.noBreak ?? DEFAULTS.noBreak,
|
|
2764
|
+
join: options?.join ?? DEFAULTS.join,
|
|
2765
|
+
conjunction: options?.conjunction ?? DEFAULTS.conjunction,
|
|
2766
|
+
oxfordComma: options?.oxfordComma ?? DEFAULTS.oxfordComma,
|
|
2767
|
+
shareLastName: options?.shareLastName ?? DEFAULTS.shareLastName,
|
|
2768
|
+
sharePrefix: options?.sharePrefix ?? DEFAULTS.sharePrefix,
|
|
2769
|
+
shareSuffix: options?.shareSuffix ?? DEFAULTS.shareSuffix,
|
|
2770
|
+
prefer: options?.prefer ?? base.prefer,
|
|
2771
|
+
middle: options?.middle ?? base.middle,
|
|
2772
|
+
prefix: options?.prefix ?? base.prefix,
|
|
2773
|
+
suffix: options?.suffix ?? base.suffix,
|
|
2774
|
+
order: options?.order ?? base.order,
|
|
2775
|
+
prefixForm: options?.prefixForm ?? "short",
|
|
2776
|
+
suffixForm: options?.suffixForm ?? "short",
|
|
2777
|
+
capitalization: options?.capitalization ?? "canonical",
|
|
2778
|
+
punctuation: options?.punctuation ?? "canonical",
|
|
2779
|
+
apostrophes: options?.apostrophes ?? "canonical"
|
|
2780
|
+
};
|
|
2781
|
+
}
|
|
2782
|
+
function toWords(value) {
|
|
2783
|
+
return value.split(/\s+/).map((w) => w.trim()).filter(Boolean);
|
|
2784
|
+
}
|
|
2785
|
+
function toInitial(word) {
|
|
2786
|
+
const w = word.trim();
|
|
2787
|
+
if (!w) return void 0;
|
|
2788
|
+
return w.charAt(0).toUpperCase() + ".";
|
|
2789
|
+
}
|
|
2790
|
+
function resolveGiven(parsed, prefer) {
|
|
2791
|
+
const first = parsed.first ? normalizeTrim(parsed.first) : void 0;
|
|
2792
|
+
const fullGiven = parsed.fullGiven ? normalizeTrim(parsed.fullGiven) : void 0;
|
|
2793
|
+
const nickname = parsed.nickname ? normalizeTrim(parsed.nickname) : void 0;
|
|
2794
|
+
const preferredGiven = parsed.preferredGiven ? normalizeTrim(parsed.preferredGiven) : void 0;
|
|
2795
|
+
if (prefer === "nickname") return preferredGiven ?? nickname ?? first;
|
|
2796
|
+
if (prefer === "fullGiven") return fullGiven ?? first ?? nickname;
|
|
2797
|
+
if (prefer === "first") return first ?? nickname;
|
|
2798
|
+
return first ?? nickname;
|
|
2799
|
+
}
|
|
2800
|
+
function resolvePrefix(parsed, prefixMode, o) {
|
|
2801
|
+
if (prefixMode === "omit") return void 0;
|
|
2802
|
+
const renderedFromTokens = renderAffixTokens(parsed.prefixTokens, "prefix", o);
|
|
2803
|
+
if (renderedFromTokens) return renderedFromTokens;
|
|
2804
|
+
const prefix = parsed.prefix ? normalizeCollapseSpaces(parsed.prefix) : void 0;
|
|
2805
|
+
if (!prefix) return void 0;
|
|
2806
|
+
if (prefixMode === "include") return prefix;
|
|
2807
|
+
return prefix;
|
|
2808
|
+
}
|
|
2809
|
+
function resolveLast(parsed) {
|
|
2810
|
+
if (parsed.last == null) return void 0;
|
|
2811
|
+
const last = normalizeTrim(parsed.last);
|
|
2812
|
+
return last.length > 0 ? last : void 0;
|
|
2813
|
+
}
|
|
2814
|
+
function resolveSuffix(parsed, suffixMode, o) {
|
|
2815
|
+
const suffix = parsed.suffix ? normalizeCollapseSpaces(parsed.suffix) : void 0;
|
|
2816
|
+
if (suffixMode === "omit") return void 0;
|
|
2817
|
+
if (suffixMode === "include") {
|
|
2818
|
+
return renderAffixTokens(parsed.suffixTokens, "suffix", o) ?? suffix;
|
|
2819
|
+
}
|
|
2820
|
+
if (parsed.suffixTokens && parsed.suffixTokens.length > 0) {
|
|
2821
|
+
return renderAffixTokens(parsed.suffixTokens, "suffix", o) ?? suffix;
|
|
2822
|
+
}
|
|
2823
|
+
return suffix;
|
|
2824
|
+
}
|
|
2825
|
+
function applyPunctuation(value, mode) {
|
|
2826
|
+
if (mode === "strip") return value.replace(/\./g, "");
|
|
2827
|
+
return value;
|
|
2828
|
+
}
|
|
2829
|
+
function applyApostrophes(value, mode) {
|
|
2830
|
+
if (mode === "ascii") return value.replace(/[\u2019\u2018\u02BC]/g, "'");
|
|
2831
|
+
return value;
|
|
2832
|
+
}
|
|
2833
|
+
function applyCapitalization(value, mode) {
|
|
2834
|
+
if (mode === "lower") return value.toLowerCase();
|
|
2835
|
+
if (mode === "upper") return value.toUpperCase();
|
|
2836
|
+
return value;
|
|
2837
|
+
}
|
|
2838
|
+
function renderAffixTokens(tokens, ctx, o) {
|
|
2839
|
+
if (!tokens || tokens.length === 0) return void 0;
|
|
2840
|
+
const t = getSpaceTokens(o.output);
|
|
2841
|
+
const form = ctx === "prefix" ? o.prefixForm : o.suffixForm;
|
|
2842
|
+
const rendered = tokens.map((t2) => {
|
|
2843
|
+
if (o.capitalization === "preserve" || o.punctuation === "preserve" || o.apostrophes === "preserve") {
|
|
2844
|
+
return String(t2.value ?? "").trim();
|
|
2845
|
+
}
|
|
2846
|
+
let base = String(t2.value ?? "").trim();
|
|
2847
|
+
if (form !== "asInput") {
|
|
2848
|
+
const canonical = form === "long" ? t2.canonicalLong : t2.canonicalShort;
|
|
2849
|
+
if (canonical) base = canonical;
|
|
2850
|
+
}
|
|
2851
|
+
base = applyApostrophes(base, o.apostrophes);
|
|
2852
|
+
base = applyPunctuation(base, o.punctuation);
|
|
2853
|
+
base = applyCapitalization(base, o.capitalization);
|
|
2854
|
+
return base.trim();
|
|
2855
|
+
}).filter((s) => s.length > 0);
|
|
2856
|
+
if (rendered.length === 0) return void 0;
|
|
2857
|
+
if (ctx === "suffix") {
|
|
2858
|
+
const commaSep = "," + boundarySpace("commaSpace", o, t);
|
|
2859
|
+
return rendered.join(commaSep);
|
|
2860
|
+
}
|
|
2861
|
+
return rendered.join(" ");
|
|
2862
|
+
}
|
|
2863
|
+
function boundarySpace(boundary, o, t) {
|
|
2864
|
+
const noBreak = o.noBreak;
|
|
2865
|
+
const typography = o.typography;
|
|
2866
|
+
if (noBreak === "none" || typography === "plain") {
|
|
2867
|
+
return t.SP;
|
|
2868
|
+
}
|
|
2869
|
+
if (noBreak === "all") {
|
|
2870
|
+
return t.NBSP;
|
|
2871
|
+
}
|
|
2872
|
+
switch (boundary) {
|
|
2873
|
+
case "initialTight":
|
|
2874
|
+
return typography === "ui" || typography === "fine" ? t.NNBSP : t.NBSP;
|
|
2875
|
+
case "prefixToNext":
|
|
2876
|
+
case "givenToLast":
|
|
2877
|
+
case "initialToWord":
|
|
2878
|
+
case "commaSpace":
|
|
2879
|
+
case "commaToGiven":
|
|
2880
|
+
return t.NBSP;
|
|
2881
|
+
case "space":
|
|
2882
|
+
default:
|
|
2883
|
+
return t.SP;
|
|
2884
|
+
}
|
|
2885
|
+
}
|
|
2886
|
+
function joinInitials(initials, o, t) {
|
|
2887
|
+
if (initials.length === 0) return "";
|
|
2888
|
+
if (initials.length === 1) return initials[0];
|
|
2889
|
+
const sep = boundarySpace("initialTight", o, t);
|
|
2890
|
+
return initials.join(sep);
|
|
2891
|
+
}
|
|
2892
|
+
function renderMiddle(parsed, middleMode, o, t) {
|
|
2893
|
+
if (!parsed.middle) return void 0;
|
|
2894
|
+
const middle = normalizeTrim(parsed.middle);
|
|
2895
|
+
if (!middle) return void 0;
|
|
2896
|
+
if (middleMode === "none") return void 0;
|
|
2897
|
+
if (middleMode === "full") return middle;
|
|
2898
|
+
const initials = toWords(middle).map(toInitial).filter(Boolean);
|
|
2899
|
+
if (initials.length === 0) return void 0;
|
|
2900
|
+
return joinInitials(initials, o, t);
|
|
2901
|
+
}
|
|
2902
|
+
function renderGivenPlusMiddle(parsed, o, t) {
|
|
2903
|
+
const given = resolveGiven(parsed, o.prefer);
|
|
2904
|
+
if (!given) return { givenLikeText: void 0, finalGivenToken: void 0 };
|
|
2905
|
+
if (o.preset === "initialed") {
|
|
2906
|
+
const firstInitial = toInitial(given);
|
|
2907
|
+
const middleInitials = parsed.middle ? toWords(normalizeTrim(parsed.middle)).map(toInitial).filter(Boolean) : [];
|
|
2908
|
+
const all = [firstInitial, ...middleInitials].filter(Boolean);
|
|
2909
|
+
const initialsText = joinInitials(all, o, t);
|
|
2910
|
+
const finalToken = all[all.length - 1];
|
|
2911
|
+
return { givenLikeText: initialsText, finalGivenToken: finalToken };
|
|
2912
|
+
}
|
|
2913
|
+
let effectiveMiddleMode = o.middle;
|
|
2914
|
+
if (o.prefer === "fullGiven" && parsed.fullGiven && given === normalizeTrim(parsed.fullGiven)) {
|
|
2915
|
+
effectiveMiddleMode = "none";
|
|
2916
|
+
}
|
|
2917
|
+
if (o.preset === "display" && effectiveMiddleMode === "none") {
|
|
2918
|
+
if (/^[A-Za-z]\.?$/.test(given.trim())) {
|
|
2919
|
+
effectiveMiddleMode = "initial";
|
|
2920
|
+
}
|
|
2921
|
+
}
|
|
2922
|
+
const middleText = renderMiddle(parsed, effectiveMiddleMode, o, t);
|
|
2923
|
+
let givenLikeText = given;
|
|
2924
|
+
let finalGivenToken = given;
|
|
2925
|
+
if (middleText) {
|
|
2926
|
+
const sep = boundarySpace("space", o, t);
|
|
2927
|
+
givenLikeText = given + sep + middleText;
|
|
2928
|
+
finalGivenToken = middleText;
|
|
2929
|
+
}
|
|
2930
|
+
if (o.preset === "library" && parsed.fullGiven) {
|
|
2931
|
+
const fullGivenTrimmed = normalizeTrim(parsed.fullGiven);
|
|
2932
|
+
if (fullGivenTrimmed) {
|
|
2933
|
+
givenLikeText += `${t.SP}(${fullGivenTrimmed})`;
|
|
2934
|
+
finalGivenToken = `(${fullGivenTrimmed})`;
|
|
2935
|
+
}
|
|
2936
|
+
}
|
|
2937
|
+
return { givenLikeText, finalGivenToken };
|
|
2938
|
+
}
|
|
2939
|
+
function renderSingle(parsed, o) {
|
|
2940
|
+
const t = getSpaceTokens(o.output);
|
|
2941
|
+
const safeParsed = o.output === "html" ? sanitizeParsedName(parsed) : parsed;
|
|
2942
|
+
const prefixText = resolvePrefix(safeParsed, o.prefix, o);
|
|
2943
|
+
const lastText = resolveLast(safeParsed);
|
|
2944
|
+
const suffixText = resolveSuffix(safeParsed, o.suffix, o);
|
|
2945
|
+
const { givenLikeText } = renderGivenPlusMiddle(safeParsed, o, t);
|
|
2946
|
+
if (o.preset === "formalShort") {
|
|
2947
|
+
const pieces = [];
|
|
2948
|
+
if (prefixText) pieces.push(prefixText);
|
|
2949
|
+
if (lastText) pieces.push(lastText);
|
|
2950
|
+
let base2 = "";
|
|
2951
|
+
if (pieces.length === 0) base2 = "";
|
|
2952
|
+
else if (pieces.length === 1) base2 = pieces[0];
|
|
2953
|
+
else base2 = `${pieces[0]}${boundarySpace("prefixToNext", o, t)}${pieces[1]}`;
|
|
2954
|
+
if (suffixText) {
|
|
2955
|
+
base2 += `,${boundarySpace("commaSpace", o, t)}${suffixText}`;
|
|
2956
|
+
}
|
|
2957
|
+
return { prefixText, givenText: void 0, lastText, suffixText, fullText: base2 };
|
|
2958
|
+
}
|
|
2959
|
+
if (o.preset === "firstOnly" || o.preset === "preferredFirst") {
|
|
2960
|
+
const onlyGiven = resolveGiven(safeParsed, o.prefer);
|
|
2961
|
+
const normalizedOnlyGiven = onlyGiven ? normalizeTrim(onlyGiven) : void 0;
|
|
2962
|
+
const effectivePrefix = prefixText;
|
|
2963
|
+
if (!normalizedOnlyGiven) {
|
|
2964
|
+
return { prefixText: effectivePrefix, givenText: void 0, lastText, suffixText, fullText: "" };
|
|
2965
|
+
}
|
|
2966
|
+
if (effectivePrefix) {
|
|
2967
|
+
const sep = boundarySpace("prefixToNext", o, t);
|
|
2968
|
+
return { prefixText: effectivePrefix, givenText: normalizedOnlyGiven, lastText, suffixText, fullText: effectivePrefix + sep + normalizedOnlyGiven };
|
|
2969
|
+
}
|
|
2970
|
+
return { prefixText: void 0, givenText: normalizedOnlyGiven, lastText, suffixText, fullText: normalizedOnlyGiven };
|
|
2971
|
+
}
|
|
2972
|
+
if (o.order === "family-given") {
|
|
2973
|
+
const pieces = [];
|
|
2974
|
+
if (lastText) pieces.push(lastText);
|
|
2975
|
+
if (givenLikeText) {
|
|
2976
|
+
if (lastText) {
|
|
2977
|
+
const comma = ",";
|
|
2978
|
+
const afterComma = boundarySpace("commaToGiven", o, t);
|
|
2979
|
+
pieces.push(comma + afterComma + givenLikeText);
|
|
2980
|
+
} else {
|
|
2981
|
+
pieces.push(givenLikeText);
|
|
2982
|
+
}
|
|
2983
|
+
}
|
|
2984
|
+
let base2 = pieces.join("");
|
|
2985
|
+
if (suffixText) {
|
|
2986
|
+
const comma = ",";
|
|
2987
|
+
const afterComma = boundarySpace("commaSpace", o, t);
|
|
2988
|
+
base2 += comma + afterComma + suffixText;
|
|
2989
|
+
}
|
|
2990
|
+
return {
|
|
2991
|
+
prefixText: void 0,
|
|
2992
|
+
givenText: givenLikeText,
|
|
2993
|
+
lastText,
|
|
2994
|
+
suffixText,
|
|
2995
|
+
fullText: base2
|
|
2996
|
+
};
|
|
2997
|
+
}
|
|
2998
|
+
const parts = [];
|
|
2999
|
+
if (prefixText) {
|
|
3000
|
+
parts.push(prefixText);
|
|
3001
|
+
}
|
|
3002
|
+
if (givenLikeText) {
|
|
3003
|
+
parts.push(givenLikeText);
|
|
3004
|
+
}
|
|
3005
|
+
if (lastText) {
|
|
3006
|
+
parts.push(lastText);
|
|
3007
|
+
}
|
|
3008
|
+
let base = "";
|
|
3009
|
+
const emitted = [];
|
|
3010
|
+
if (prefixText) emitted.push(prefixText);
|
|
3011
|
+
if (givenLikeText) emitted.push(givenLikeText);
|
|
3012
|
+
if (lastText) emitted.push(lastText);
|
|
3013
|
+
if (emitted.length === 0) {
|
|
3014
|
+
base = "";
|
|
3015
|
+
} else if (emitted.length === 1) {
|
|
3016
|
+
base = emitted[0];
|
|
3017
|
+
} else {
|
|
3018
|
+
if (prefixText && givenLikeText) {
|
|
3019
|
+
base = prefixText + boundarySpace("prefixToNext", o, t) + givenLikeText;
|
|
3020
|
+
if (lastText) {
|
|
3021
|
+
base += boundarySpace("givenToLast", o, t) + lastText;
|
|
3022
|
+
}
|
|
3023
|
+
} else if (givenLikeText && lastText) {
|
|
3024
|
+
base = givenLikeText + boundarySpace("givenToLast", o, t) + lastText;
|
|
3025
|
+
} else {
|
|
3026
|
+
base = emitted.join(boundarySpace("space", o, t));
|
|
3027
|
+
}
|
|
3028
|
+
}
|
|
3029
|
+
if (suffixText) {
|
|
3030
|
+
const comma = ",";
|
|
3031
|
+
const afterComma = boundarySpace("commaSpace", o, t);
|
|
3032
|
+
base += comma + afterComma + suffixText;
|
|
3033
|
+
}
|
|
3034
|
+
return { prefixText, givenText: givenLikeText, lastText, suffixText, fullText: base };
|
|
3035
|
+
}
|
|
3036
|
+
function normalizeCompareKey(value) {
|
|
3037
|
+
if (!value) return void 0;
|
|
3038
|
+
return value.trim().replace(/\s+/g, " ").toLowerCase();
|
|
3039
|
+
}
|
|
3040
|
+
function joinList(items, o) {
|
|
3041
|
+
const n = items.length;
|
|
3042
|
+
if (n === 0) return "";
|
|
3043
|
+
if (n === 1) return items[0];
|
|
3044
|
+
if (n === 2) return `${items[0]} ${o.conjunction} ${items[1]}`;
|
|
3045
|
+
const head = items.slice(0, -1).join(", ");
|
|
3046
|
+
const tail = items[n - 1];
|
|
3047
|
+
const comma = o.oxfordComma ? "," : "";
|
|
3048
|
+
return `${head}${comma} ${o.conjunction} ${tail}`;
|
|
3049
|
+
}
|
|
3050
|
+
function shouldShare(mode, same) {
|
|
3051
|
+
if (mode === "never") return false;
|
|
3052
|
+
if (mode === "whenSame") return same;
|
|
3053
|
+
return same;
|
|
3054
|
+
}
|
|
3055
|
+
function joinCouple(a, b, o) {
|
|
3056
|
+
const t = getSpaceTokens(o.output);
|
|
3057
|
+
const aLastKey = normalizeCompareKey(a.lastText);
|
|
3058
|
+
const bLastKey = normalizeCompareKey(b.lastText);
|
|
3059
|
+
const aPrefixKey = normalizeCompareKey(a.prefixText);
|
|
3060
|
+
const bPrefixKey = normalizeCompareKey(b.prefixText);
|
|
3061
|
+
const aSuffixKey = normalizeCompareKey(a.suffixText);
|
|
3062
|
+
const bSuffixKey = normalizeCompareKey(b.suffixText);
|
|
3063
|
+
const sameLast = aLastKey != null && aLastKey === bLastKey;
|
|
3064
|
+
const samePrefix = aPrefixKey != null && aPrefixKey === bPrefixKey;
|
|
3065
|
+
const sameSuffix = aSuffixKey != null && aSuffixKey === bSuffixKey;
|
|
3066
|
+
if (o.order !== "given-family") {
|
|
3067
|
+
return `${a.fullText} ${o.conjunction} ${b.fullText}`;
|
|
3068
|
+
}
|
|
3069
|
+
const shareLast = shouldShare(o.shareLastName, sameLast);
|
|
3070
|
+
const sharePrefix = shouldShare(o.sharePrefix, samePrefix);
|
|
3071
|
+
const shareSuffix = shouldShare(o.shareSuffix, sameSuffix);
|
|
3072
|
+
if (shareLast && !a.prefixText && !b.prefixText && a.givenText && b.givenText && a.lastText) {
|
|
3073
|
+
const glue = boundarySpace("givenToLast", o, t);
|
|
3074
|
+
let result = `${a.givenText} ${o.conjunction} ${b.givenText}${glue}${a.lastText}`;
|
|
3075
|
+
if (shareSuffix && a.suffixText) {
|
|
3076
|
+
result += `,${boundarySpace("commaSpace", o, t)}${a.suffixText}`;
|
|
3077
|
+
}
|
|
3078
|
+
return result;
|
|
3079
|
+
}
|
|
3080
|
+
if (shareLast && a.lastText && a.givenText && b.givenText && a.prefixText && b.prefixText) {
|
|
3081
|
+
if (sharePrefix && samePrefix) {
|
|
3082
|
+
const prefixGlue = boundarySpace("prefixToNext", o, t);
|
|
3083
|
+
const lastGlue2 = boundarySpace("givenToLast", o, t);
|
|
3084
|
+
let result2 = `${a.prefixText}${prefixGlue}${a.givenText} ${o.conjunction} ${b.givenText}${lastGlue2}${a.lastText}`;
|
|
3085
|
+
if (shareSuffix && a.suffixText) {
|
|
3086
|
+
result2 += `,${boundarySpace("commaSpace", o, t)}${a.suffixText}`;
|
|
3087
|
+
}
|
|
3088
|
+
return result2;
|
|
3089
|
+
}
|
|
3090
|
+
const lastGlue = boundarySpace("givenToLast", o, t);
|
|
3091
|
+
const prefix2Glue = boundarySpace("prefixToNext", o, t);
|
|
3092
|
+
let result = `${a.prefixText} ${o.conjunction} ${b.prefixText}${prefix2Glue}${a.givenText} ${o.conjunction} ${b.givenText}${lastGlue}${a.lastText}`;
|
|
3093
|
+
if (shareSuffix && a.suffixText) {
|
|
3094
|
+
result += `,${boundarySpace("commaSpace", o, t)}${a.suffixText}`;
|
|
3095
|
+
}
|
|
3096
|
+
return result;
|
|
3097
|
+
}
|
|
3098
|
+
return `${a.fullText} ${o.conjunction} ${b.fullText}`;
|
|
3099
|
+
}
|
|
3100
|
+
function isParsedNameEntity(input) {
|
|
3101
|
+
return typeof input === "object" && input !== null && "kind" in input && typeof input.kind === "string";
|
|
3102
|
+
}
|
|
3103
|
+
function personEntityToLegacy(entity) {
|
|
3104
|
+
const result = {};
|
|
3105
|
+
if (entity.honorific) result.prefix = entity.honorific;
|
|
3106
|
+
if (entity.given) result.first = entity.given;
|
|
3107
|
+
if (entity.fullGiven) result.fullGiven = entity.fullGiven;
|
|
3108
|
+
if (entity.middle) result.middle = entity.middle;
|
|
3109
|
+
if (entity.family) result.last = entity.family;
|
|
3110
|
+
if (entity.suffix) result.suffix = entity.suffix;
|
|
3111
|
+
if (entity.nickname) result.nickname = entity.nickname;
|
|
3112
|
+
return result;
|
|
3113
|
+
}
|
|
3114
|
+
function formatOrganization(org, o) {
|
|
3115
|
+
const t = getSpaceTokens(o.output);
|
|
3116
|
+
const s = (v) => sanitizePart(v, o.output);
|
|
3117
|
+
const fullName = s(org.meta.raw.trim()) || "";
|
|
3118
|
+
const baseName = s(org.baseName) || "";
|
|
3119
|
+
const legalSuffix = s(org.legalSuffixRaw);
|
|
3120
|
+
switch (o.preset) {
|
|
3121
|
+
case "informal":
|
|
3122
|
+
case "firstOnly":
|
|
3123
|
+
case "preferredFirst":
|
|
3124
|
+
return baseName;
|
|
3125
|
+
case "formalShort":
|
|
3126
|
+
return baseName;
|
|
3127
|
+
case "alphabetical":
|
|
3128
|
+
if (legalSuffix) {
|
|
3129
|
+
const trimmed = legalSuffix.replace(/^[,\s]+/, "");
|
|
3130
|
+
return `${baseName},${boundarySpace("commaSpace", o, t)}${trimmed}`;
|
|
3131
|
+
}
|
|
3132
|
+
return baseName;
|
|
3133
|
+
case "initialed":
|
|
3134
|
+
return baseName;
|
|
3135
|
+
case "display":
|
|
3136
|
+
case "preferredDisplay":
|
|
3137
|
+
case "formalFull":
|
|
3138
|
+
default:
|
|
3139
|
+
return fullName;
|
|
3140
|
+
}
|
|
3141
|
+
}
|
|
3142
|
+
function formatFamily(family, o) {
|
|
3143
|
+
const t = getSpaceTokens(o.output);
|
|
3144
|
+
const familyName = sanitizePart(family.familyName, o.output) || "";
|
|
3145
|
+
const article = sanitizePart(family.article, o.output);
|
|
3146
|
+
const familyWord = sanitizePart(family.familyWord, o.output);
|
|
3147
|
+
const style = family.style;
|
|
3148
|
+
switch (o.preset) {
|
|
3149
|
+
case "informal":
|
|
3150
|
+
case "firstOnly":
|
|
3151
|
+
case "preferredFirst":
|
|
3152
|
+
if (style === "pluralSurname") {
|
|
3153
|
+
return `The${boundarySpace("prefixToNext", o, t)}${familyName}`;
|
|
3154
|
+
}
|
|
3155
|
+
return familyName;
|
|
3156
|
+
case "formalShort":
|
|
3157
|
+
if (style === "pluralSurname") {
|
|
3158
|
+
return `The${boundarySpace("prefixToNext", o, t)}${familyName}`;
|
|
3159
|
+
}
|
|
3160
|
+
return `${familyName}${boundarySpace("givenToLast", o, t)}${familyWord || "Family"}`;
|
|
3161
|
+
case "alphabetical":
|
|
3162
|
+
if (familyWord) {
|
|
3163
|
+
return `${familyName}${boundarySpace("givenToLast", o, t)}${familyWord}`;
|
|
3164
|
+
}
|
|
3165
|
+
return familyName;
|
|
3166
|
+
case "initialed":
|
|
3167
|
+
return familyName;
|
|
3168
|
+
case "display":
|
|
3169
|
+
case "preferredDisplay":
|
|
3170
|
+
case "formalFull":
|
|
3171
|
+
default:
|
|
3172
|
+
if (article && familyWord) {
|
|
3173
|
+
return `${article}${boundarySpace("prefixToNext", o, t)}${familyName}${boundarySpace("givenToLast", o, t)}${familyWord}`;
|
|
3174
|
+
}
|
|
3175
|
+
if (article) {
|
|
3176
|
+
return `${article}${boundarySpace("prefixToNext", o, t)}${familyName}`;
|
|
3177
|
+
}
|
|
3178
|
+
if (familyWord) {
|
|
3179
|
+
return `${familyName}${boundarySpace("givenToLast", o, t)}${familyWord}`;
|
|
3180
|
+
}
|
|
3181
|
+
return familyName;
|
|
3182
|
+
}
|
|
3183
|
+
}
|
|
3184
|
+
function formatCompound(compound, o) {
|
|
3185
|
+
const t = getSpaceTokens(o.output);
|
|
3186
|
+
const connector = compound.connector === "&" ? "&" : compound.connector === "+" ? "+" : compound.connector === "et" ? "et" : "and";
|
|
3187
|
+
const sharedFamily = sanitizePart(compound.sharedFamily, o.output);
|
|
3188
|
+
const formattedMembers = compound.members.map((member) => {
|
|
3189
|
+
if (member.kind === "person") {
|
|
3190
|
+
const parsed = personEntityToLegacy(member);
|
|
3191
|
+
if (sharedFamily) {
|
|
3192
|
+
const withoutFamily = { ...parsed, last: void 0 };
|
|
3193
|
+
return renderSingle(withoutFamily, o).fullText;
|
|
3194
|
+
}
|
|
3195
|
+
return renderSingle(parsed, o).fullText;
|
|
3196
|
+
}
|
|
3197
|
+
return sanitizePart(member.text, o.output) || "";
|
|
3198
|
+
}).filter(Boolean);
|
|
3199
|
+
if (formattedMembers.length === 0) {
|
|
3200
|
+
return sanitizePart(compound.meta.raw, o.output) || "";
|
|
3201
|
+
}
|
|
3202
|
+
const joined = formattedMembers.join(` ${connector} `);
|
|
3203
|
+
if (sharedFamily) {
|
|
3204
|
+
if (o.order === "family-given") {
|
|
3205
|
+
return `${sharedFamily},${boundarySpace("commaSpace", o, t)}${joined}`;
|
|
3206
|
+
}
|
|
3207
|
+
return `${joined}${boundarySpace("givenToLast", o, t)}${sharedFamily}`;
|
|
3208
|
+
}
|
|
3209
|
+
return joined;
|
|
3210
|
+
}
|
|
3211
|
+
function formatUnknown(unknown, o) {
|
|
3212
|
+
return sanitizePart(unknown.text || unknown.meta.raw, o.output) || "";
|
|
3213
|
+
}
|
|
3214
|
+
function formatRejected(rejected, o) {
|
|
3215
|
+
return sanitizePart(rejected.meta.raw, o.output) || "";
|
|
3216
|
+
}
|
|
3217
|
+
function formatEntity(entity, o) {
|
|
3218
|
+
switch (entity.kind) {
|
|
3219
|
+
case "person":
|
|
3220
|
+
return renderSingle(personEntityToLegacy(entity), o).fullText;
|
|
3221
|
+
case "organization":
|
|
3222
|
+
return formatOrganization(entity, o);
|
|
3223
|
+
case "family":
|
|
3224
|
+
case "household":
|
|
3225
|
+
return formatFamily(entity, o);
|
|
3226
|
+
case "compound":
|
|
3227
|
+
return formatCompound(entity, o);
|
|
3228
|
+
case "unknown":
|
|
3229
|
+
return formatUnknown(entity, o);
|
|
3230
|
+
case "rejected":
|
|
3231
|
+
return formatRejected(entity, o);
|
|
3232
|
+
default:
|
|
3233
|
+
return entity.meta?.raw || "";
|
|
3234
|
+
}
|
|
3235
|
+
}
|
|
3236
|
+
function ensureParsedLegacy(input) {
|
|
3237
|
+
if (typeof input === "string") {
|
|
3238
|
+
return parsePersonName(input);
|
|
3239
|
+
}
|
|
3240
|
+
if (isParsedNameEntity(input)) {
|
|
3241
|
+
if (input.kind === "person") {
|
|
3242
|
+
return personEntityToLegacy(input);
|
|
3243
|
+
}
|
|
3244
|
+
return parsePersonName(input.meta.raw);
|
|
3245
|
+
}
|
|
3246
|
+
return input;
|
|
3247
|
+
}
|
|
3248
|
+
function formatName(input, options) {
|
|
3249
|
+
const o = resolveOptions(options);
|
|
3250
|
+
if (Array.isArray(input)) {
|
|
3251
|
+
if (o.join === "none") {
|
|
3252
|
+
throw new Error('formatName: array input requires options.join !== "none"');
|
|
3253
|
+
}
|
|
3254
|
+
const formatItem = (item) => {
|
|
3255
|
+
if (isParsedNameEntity(item)) {
|
|
3256
|
+
return formatEntity(item, o);
|
|
3257
|
+
}
|
|
3258
|
+
if (typeof item === "string") {
|
|
3259
|
+
const entity = parseName(item);
|
|
3260
|
+
if (entity.kind !== "person" && entity.kind !== "unknown") {
|
|
3261
|
+
return formatEntity(entity, o);
|
|
3262
|
+
}
|
|
3263
|
+
}
|
|
3264
|
+
return renderSingle(ensureParsedLegacy(item), o).fullText;
|
|
3265
|
+
};
|
|
3266
|
+
if (o.join === "list" || input.length !== 2) {
|
|
3267
|
+
const rendered = input.map(formatItem);
|
|
3268
|
+
return joinList(rendered, o);
|
|
3269
|
+
}
|
|
3270
|
+
const parsedPeople = input.map(ensureParsedLegacy);
|
|
3271
|
+
const [p1, p2] = parsedPeople;
|
|
3272
|
+
const r1 = renderSingle(p1, { ...o, join: "none" });
|
|
3273
|
+
const r2 = renderSingle(p2, { ...o, join: "none" });
|
|
3274
|
+
return joinCouple(r1, r2, o);
|
|
3275
|
+
}
|
|
3276
|
+
if (isParsedNameEntity(input)) {
|
|
3277
|
+
return formatEntity(input, o);
|
|
3278
|
+
}
|
|
3279
|
+
if (typeof input === "string") {
|
|
3280
|
+
const entity = parseName(input);
|
|
3281
|
+
if (entity.kind !== "person" && entity.kind !== "unknown") {
|
|
3282
|
+
return formatEntity(entity, o);
|
|
3283
|
+
}
|
|
3284
|
+
}
|
|
3285
|
+
const parsed = ensureParsedLegacy(input);
|
|
3286
|
+
return renderSingle(parsed, o).fullText;
|
|
3287
|
+
}
|
|
3288
|
+
|
|
3289
|
+
// src/pronouns/data.ts
|
|
3290
|
+
var BUILT_IN_PRONOUNS = {
|
|
3291
|
+
// Standard pronouns
|
|
3292
|
+
he: {
|
|
3293
|
+
id: "he",
|
|
3294
|
+
label: "he/him",
|
|
3295
|
+
subject: "he",
|
|
3296
|
+
object: "him",
|
|
3297
|
+
possessiveDeterminer: "his",
|
|
3298
|
+
possessivePronoun: "his",
|
|
3299
|
+
reflexive: "himself",
|
|
3300
|
+
notes: "Masculine pronouns"
|
|
3301
|
+
},
|
|
3302
|
+
she: {
|
|
3303
|
+
id: "she",
|
|
3304
|
+
label: "she/her",
|
|
3305
|
+
subject: "she",
|
|
3306
|
+
object: "her",
|
|
3307
|
+
possessiveDeterminer: "her",
|
|
3308
|
+
possessivePronoun: "hers",
|
|
3309
|
+
reflexive: "herself",
|
|
3310
|
+
notes: "Feminine pronouns"
|
|
3311
|
+
},
|
|
3312
|
+
they: {
|
|
3313
|
+
id: "they",
|
|
3314
|
+
label: "they/them",
|
|
3315
|
+
subject: "they",
|
|
3316
|
+
object: "them",
|
|
3317
|
+
possessiveDeterminer: "their",
|
|
3318
|
+
possessivePronoun: "theirs",
|
|
3319
|
+
reflexive: "themselves",
|
|
3320
|
+
notes: "Singular they/them pronouns"
|
|
3321
|
+
},
|
|
3322
|
+
it: {
|
|
3323
|
+
id: "it",
|
|
3324
|
+
label: "it/its",
|
|
3325
|
+
subject: "it",
|
|
3326
|
+
object: "it",
|
|
3327
|
+
possessiveDeterminer: "its",
|
|
3328
|
+
possessivePronoun: "its",
|
|
3329
|
+
reflexive: "itself",
|
|
3330
|
+
notes: "Neutral/inanimate pronouns"
|
|
3331
|
+
},
|
|
3332
|
+
// Neopronouns
|
|
3333
|
+
"ze-hir": {
|
|
3334
|
+
id: "ze-hir",
|
|
3335
|
+
label: "ze/hir",
|
|
3336
|
+
subject: "ze",
|
|
3337
|
+
object: "hir",
|
|
3338
|
+
possessiveDeterminer: "hir",
|
|
3339
|
+
possessivePronoun: "hirs",
|
|
3340
|
+
reflexive: "hirself",
|
|
3341
|
+
notes: "Neopronouns ze/hir"
|
|
3342
|
+
},
|
|
3343
|
+
"ze-zir": {
|
|
3344
|
+
id: "ze-zir",
|
|
3345
|
+
label: "ze/zir",
|
|
3346
|
+
subject: "ze",
|
|
3347
|
+
object: "zir",
|
|
3348
|
+
possessiveDeterminer: "zir",
|
|
3349
|
+
possessivePronoun: "zirs",
|
|
3350
|
+
reflexive: "zirself",
|
|
3351
|
+
notes: "Neopronouns ze/zir"
|
|
3352
|
+
},
|
|
3353
|
+
"xe-xem": {
|
|
3354
|
+
id: "xe-xem",
|
|
3355
|
+
label: "xe/xem",
|
|
3356
|
+
subject: "xe",
|
|
3357
|
+
object: "xem",
|
|
3358
|
+
possessiveDeterminer: "xyr",
|
|
3359
|
+
possessivePronoun: "xyrs",
|
|
3360
|
+
reflexive: "xemself",
|
|
3361
|
+
notes: "Neopronouns xe/xem"
|
|
3362
|
+
},
|
|
3363
|
+
"fae-faer": {
|
|
3364
|
+
id: "fae-faer",
|
|
3365
|
+
label: "fae/faer",
|
|
3366
|
+
subject: "fae",
|
|
3367
|
+
object: "faer",
|
|
3368
|
+
possessiveDeterminer: "faer",
|
|
3369
|
+
possessivePronoun: "faers",
|
|
3370
|
+
reflexive: "faerself",
|
|
3371
|
+
notes: "Neopronouns fae/faer"
|
|
3372
|
+
},
|
|
3373
|
+
"ey-em": {
|
|
3374
|
+
id: "ey-em",
|
|
3375
|
+
label: "ey/em",
|
|
3376
|
+
subject: "ey",
|
|
3377
|
+
object: "em",
|
|
3378
|
+
possessiveDeterminer: "eir",
|
|
3379
|
+
possessivePronoun: "eirs",
|
|
3380
|
+
reflexive: "emself",
|
|
3381
|
+
notes: "Neopronouns ey/em (Spivak)"
|
|
3382
|
+
},
|
|
3383
|
+
// Special pseudo-sets
|
|
3384
|
+
any: {
|
|
3385
|
+
id: "any",
|
|
3386
|
+
label: "any pronouns",
|
|
3387
|
+
subject: "they",
|
|
3388
|
+
object: "them",
|
|
3389
|
+
possessiveDeterminer: "their",
|
|
3390
|
+
possessivePronoun: "theirs",
|
|
3391
|
+
reflexive: "themselves",
|
|
3392
|
+
notes: "User accepts any pronouns; defaults to they/them for text generation"
|
|
3393
|
+
},
|
|
3394
|
+
"name-only": {
|
|
3395
|
+
id: "name-only",
|
|
3396
|
+
label: "use name only",
|
|
3397
|
+
subject: "",
|
|
3398
|
+
object: "",
|
|
3399
|
+
possessiveDeterminer: "",
|
|
3400
|
+
possessivePronoun: "",
|
|
3401
|
+
reflexive: "",
|
|
3402
|
+
notes: "User prefers name instead of pronouns; consumer must handle empty strings"
|
|
3403
|
+
}
|
|
3404
|
+
};
|
|
3405
|
+
var SPEC_ALIASES = {
|
|
3406
|
+
// he/him family
|
|
3407
|
+
"he/him": "he",
|
|
3408
|
+
"he/his": "he",
|
|
3409
|
+
"he/him/his": "he",
|
|
3410
|
+
"he/him/his/his": "he",
|
|
3411
|
+
"he/him/his/his/himself": "he",
|
|
3412
|
+
// she/her family
|
|
3413
|
+
"she/her": "she",
|
|
3414
|
+
"she/hers": "she",
|
|
3415
|
+
"she/her/hers": "she",
|
|
3416
|
+
"she/her/her/hers": "she",
|
|
3417
|
+
"she/her/her/hers/herself": "she",
|
|
3418
|
+
// they/them family
|
|
3419
|
+
"they/them": "they",
|
|
3420
|
+
"they/their": "they",
|
|
3421
|
+
"they/theirs": "they",
|
|
3422
|
+
"they/them/their": "they",
|
|
3423
|
+
"they/them/their/theirs": "they",
|
|
3424
|
+
"they/them/their/theirs/themselves": "they",
|
|
3425
|
+
"they/them/their/theirs/themself": "they",
|
|
3426
|
+
// it/its family
|
|
3427
|
+
"it/its": "it",
|
|
3428
|
+
"it/it/its": "it",
|
|
3429
|
+
"it/it/its/its/itself": "it",
|
|
3430
|
+
// Neopronouns - common short forms
|
|
3431
|
+
"ze/hir": "ze-hir",
|
|
3432
|
+
"ze/hir/hirs": "ze-hir",
|
|
3433
|
+
"ze/zir": "ze-zir",
|
|
3434
|
+
"ze/zir/zirs": "ze-zir",
|
|
3435
|
+
"xe/xem": "xe-xem",
|
|
3436
|
+
"xe/xem/xyr": "xe-xem",
|
|
3437
|
+
"fae/faer": "fae-faer",
|
|
3438
|
+
"fae/faer/faers": "fae-faer",
|
|
3439
|
+
"ey/em": "ey-em",
|
|
3440
|
+
"ey/em/eir": "ey-em",
|
|
3441
|
+
// Special sets
|
|
3442
|
+
any: "any",
|
|
3443
|
+
"any pronouns": "any",
|
|
3444
|
+
"all pronouns": "any",
|
|
3445
|
+
"no pronouns": "name-only",
|
|
3446
|
+
"name-only": "name-only",
|
|
3447
|
+
"use name": "name-only",
|
|
3448
|
+
"use name only": "name-only",
|
|
3449
|
+
"name only": "name-only"
|
|
3450
|
+
};
|
|
3451
|
+
|
|
3452
|
+
// src/pronouns/parser.ts
|
|
3453
|
+
function normalizeSpec(spec) {
|
|
3454
|
+
return spec.trim().toLowerCase().replace(/\s+/g, " ");
|
|
3455
|
+
}
|
|
3456
|
+
function deriveReflexive(subject) {
|
|
3457
|
+
const s = subject.toLowerCase();
|
|
3458
|
+
if (s === "he") return "himself";
|
|
3459
|
+
if (s === "she") return "herself";
|
|
3460
|
+
if (s === "it") return "itself";
|
|
3461
|
+
if (s === "they") return "themselves";
|
|
3462
|
+
return subject + "self";
|
|
3463
|
+
}
|
|
3464
|
+
function parsePronounSpec(spec) {
|
|
3465
|
+
const norm = normalizeSpec(spec);
|
|
3466
|
+
const aliasId = SPEC_ALIASES[norm];
|
|
3467
|
+
if (aliasId && BUILT_IN_PRONOUNS[aliasId]) {
|
|
3468
|
+
return { ...BUILT_IN_PRONOUNS[aliasId] };
|
|
3469
|
+
}
|
|
3470
|
+
if (BUILT_IN_PRONOUNS[norm]) {
|
|
3471
|
+
return { ...BUILT_IN_PRONOUNS[norm] };
|
|
3472
|
+
}
|
|
3473
|
+
const rawTokens = spec.split("/").map((t) => t.trim()).filter(Boolean);
|
|
3474
|
+
if (rawTokens.length < 1) {
|
|
3475
|
+
throw new Error(`Invalid pronoun spec: "${spec}". Expected at least one token.`);
|
|
3476
|
+
}
|
|
3477
|
+
const [subject, second, third, fourth, fifth] = rawTokens;
|
|
3478
|
+
let object = "";
|
|
3479
|
+
let possDet = "";
|
|
3480
|
+
let possPron = "";
|
|
3481
|
+
let reflexive = "";
|
|
3482
|
+
if (rawTokens.length === 1) {
|
|
3483
|
+
const maybeId = subject.toLowerCase();
|
|
3484
|
+
if (BUILT_IN_PRONOUNS[maybeId]) {
|
|
3485
|
+
return { ...BUILT_IN_PRONOUNS[maybeId] };
|
|
3486
|
+
}
|
|
3487
|
+
object = subject;
|
|
3488
|
+
possDet = subject + "'s";
|
|
3489
|
+
possPron = possDet;
|
|
3490
|
+
reflexive = deriveReflexive(subject);
|
|
3491
|
+
} else if (rawTokens.length === 2) {
|
|
3492
|
+
const subjLower = subject.toLowerCase();
|
|
3493
|
+
const secondLower = second.toLowerCase();
|
|
3494
|
+
if (subjLower === "he" && (secondLower === "him" || secondLower === "his")) {
|
|
3495
|
+
return { ...BUILT_IN_PRONOUNS["he"] };
|
|
3496
|
+
}
|
|
3497
|
+
if (subjLower === "she" && secondLower.startsWith("her")) {
|
|
3498
|
+
return { ...BUILT_IN_PRONOUNS["she"] };
|
|
3499
|
+
}
|
|
3500
|
+
if (subjLower === "they" && (secondLower === "them" || secondLower.startsWith("their"))) {
|
|
3501
|
+
return { ...BUILT_IN_PRONOUNS["they"] };
|
|
3502
|
+
}
|
|
3503
|
+
object = second;
|
|
3504
|
+
possDet = second;
|
|
3505
|
+
possPron = second;
|
|
3506
|
+
reflexive = deriveReflexive(subject);
|
|
3507
|
+
} else if (rawTokens.length === 3) {
|
|
3508
|
+
object = second;
|
|
3509
|
+
possDet = third;
|
|
3510
|
+
possPron = third;
|
|
3511
|
+
reflexive = deriveReflexive(subject);
|
|
3512
|
+
} else if (rawTokens.length === 4) {
|
|
3513
|
+
object = second;
|
|
3514
|
+
possDet = third;
|
|
3515
|
+
possPron = fourth;
|
|
3516
|
+
reflexive = deriveReflexive(subject);
|
|
3517
|
+
} else {
|
|
3518
|
+
object = second;
|
|
3519
|
+
possDet = third;
|
|
3520
|
+
possPron = fourth;
|
|
3521
|
+
reflexive = fifth || deriveReflexive(subject);
|
|
3522
|
+
}
|
|
3523
|
+
return {
|
|
3524
|
+
id: norm.replace(/\s+/g, ""),
|
|
3525
|
+
label: rawTokens.join("/"),
|
|
3526
|
+
subject,
|
|
3527
|
+
object,
|
|
3528
|
+
possessiveDeterminer: possDet,
|
|
3529
|
+
possessivePronoun: possPron,
|
|
3530
|
+
reflexive,
|
|
3531
|
+
notes: "Custom pronoun set"
|
|
3532
|
+
};
|
|
3533
|
+
}
|
|
3534
|
+
function getPronounSet(input) {
|
|
3535
|
+
if (!input) {
|
|
3536
|
+
throw new Error("getPronounSet: input is required");
|
|
3537
|
+
}
|
|
3538
|
+
if (typeof input === "object") {
|
|
3539
|
+
return { ...input };
|
|
3540
|
+
}
|
|
3541
|
+
return parsePronounSpec(input);
|
|
3542
|
+
}
|
|
3543
|
+
|
|
3544
|
+
// src/pronouns/formatter.ts
|
|
3545
|
+
function applyCapitalization2(s, mode) {
|
|
3546
|
+
if (!s) return s;
|
|
3547
|
+
switch (mode) {
|
|
3548
|
+
case "upper":
|
|
3549
|
+
return s.toUpperCase();
|
|
3550
|
+
case "title":
|
|
3551
|
+
return s.charAt(0).toUpperCase() + s.slice(1);
|
|
3552
|
+
case "lower":
|
|
3553
|
+
default:
|
|
3554
|
+
return s.toLowerCase();
|
|
3555
|
+
}
|
|
3556
|
+
}
|
|
3557
|
+
function formatPronoun(set, role, options = {}) {
|
|
3558
|
+
const { capitalization = "lower" } = options;
|
|
3559
|
+
const value = set[role] || "";
|
|
3560
|
+
if (!value) return "";
|
|
3561
|
+
return applyCapitalization2(value, capitalization);
|
|
3562
|
+
}
|
|
3563
|
+
var TEMPLATE_PLACEHOLDERS = {
|
|
3564
|
+
"{{subject}}": "subject",
|
|
3565
|
+
"{{object}}": "object",
|
|
3566
|
+
"{{possDet}}": "possessiveDeterminer",
|
|
3567
|
+
"{{possessiveDeterminer}}": "possessiveDeterminer",
|
|
3568
|
+
"{{possPron}}": "possessivePronoun",
|
|
3569
|
+
"{{possessivePronoun}}": "possessivePronoun",
|
|
3570
|
+
"{{reflexive}}": "reflexive"
|
|
3571
|
+
};
|
|
3572
|
+
function fillPronounTemplate(template, set, options = {}) {
|
|
3573
|
+
const { capitalization = "lower" } = options;
|
|
3574
|
+
let result = template;
|
|
3575
|
+
for (const [placeholder, role] of Object.entries(TEMPLATE_PLACEHOLDERS)) {
|
|
3576
|
+
const value = set[role] || "";
|
|
3577
|
+
const formatted = applyCapitalization2(value, capitalization);
|
|
3578
|
+
result = result.split(placeholder).join(formatted);
|
|
3579
|
+
}
|
|
3580
|
+
return result;
|
|
3581
|
+
}
|
|
3582
|
+
function fillPronounTemplateSmart(template, set) {
|
|
3583
|
+
let result = template;
|
|
3584
|
+
for (const [placeholder, role] of Object.entries(TEMPLATE_PLACEHOLDERS)) {
|
|
3585
|
+
const value = set[role] || "";
|
|
3586
|
+
if (!value) {
|
|
3587
|
+
result = result.split(placeholder).join("");
|
|
3588
|
+
continue;
|
|
3589
|
+
}
|
|
3590
|
+
const marker = `\0PRONOUN_${role}\0`;
|
|
3591
|
+
result = result.split(placeholder).join(marker);
|
|
3592
|
+
}
|
|
3593
|
+
const sentenceStartRe = /(^|[.!?]\s+)(\x00PRONOUN_\w+\x00)/g;
|
|
3594
|
+
result = result.replace(sentenceStartRe, (_, prefix, marker) => {
|
|
3595
|
+
const roleMatch = marker.match(/PRONOUN_(\w+)/);
|
|
3596
|
+
if (!roleMatch) return prefix + marker;
|
|
3597
|
+
const role = roleMatch[1];
|
|
3598
|
+
const value = set[role] || "";
|
|
3599
|
+
return prefix + applyCapitalization2(value, "title");
|
|
3600
|
+
});
|
|
3601
|
+
const remainingRe = /\x00PRONOUN_(\w+)\x00/g;
|
|
3602
|
+
result = result.replace(remainingRe, (_, role) => {
|
|
3603
|
+
const value = set[role] || "";
|
|
3604
|
+
return applyCapitalization2(value, "lower");
|
|
3605
|
+
});
|
|
3606
|
+
return result;
|
|
3607
|
+
}
|
|
3608
|
+
|
|
3609
|
+
// src/pronouns/integration.ts
|
|
3610
|
+
function getDefaultPronouns(gender) {
|
|
3611
|
+
switch (gender) {
|
|
3612
|
+
case "male":
|
|
3613
|
+
return { ...BUILT_IN_PRONOUNS["he"] };
|
|
3614
|
+
case "female":
|
|
3615
|
+
return { ...BUILT_IN_PRONOUNS["she"] };
|
|
3616
|
+
case "unknown":
|
|
3617
|
+
case null:
|
|
3618
|
+
default:
|
|
3619
|
+
return { ...BUILT_IN_PRONOUNS["they"] };
|
|
3620
|
+
}
|
|
3621
|
+
}
|
|
3622
|
+
function getPronounsForEntity(entity) {
|
|
3623
|
+
return { ...BUILT_IN_PRONOUNS["they"] };
|
|
3624
|
+
}
|
|
3625
|
+
function getPronounsForPerson(entity, options = {}) {
|
|
3626
|
+
const {
|
|
3627
|
+
genderDB,
|
|
3628
|
+
explicitPronouns,
|
|
3629
|
+
defaultOnUnknown,
|
|
3630
|
+
genderThreshold = 0.8
|
|
3631
|
+
} = options;
|
|
3632
|
+
if (explicitPronouns) {
|
|
3633
|
+
return getPronounSet(explicitPronouns);
|
|
3634
|
+
}
|
|
3635
|
+
if (genderDB && entity.given) {
|
|
3636
|
+
const gender = genderDB.guessGender(entity.given, genderThreshold);
|
|
3637
|
+
if (gender === "male") {
|
|
3638
|
+
return { ...BUILT_IN_PRONOUNS["he"] };
|
|
3639
|
+
}
|
|
3640
|
+
if (gender === "female") {
|
|
3641
|
+
return { ...BUILT_IN_PRONOUNS["she"] };
|
|
3642
|
+
}
|
|
3643
|
+
}
|
|
3644
|
+
return defaultOnUnknown ? { ...defaultOnUnknown } : { ...BUILT_IN_PRONOUNS["they"] };
|
|
3645
|
+
}
|
|
3646
|
+
function getPronouns(entity, options = {}) {
|
|
3647
|
+
if (entity.kind === "person") {
|
|
3648
|
+
return getPronounsForPerson(entity, options);
|
|
3649
|
+
}
|
|
3650
|
+
return { ...BUILT_IN_PRONOUNS["they"] };
|
|
3651
|
+
}
|
|
3652
|
+
|
|
3653
|
+
// src/pronouns/extractor.ts
|
|
3654
|
+
var PRONOUN_SUFFIX_RE = /\s*\(([^)]+)\)\s*$/;
|
|
3655
|
+
var LOOKS_LIKE_PRONOUNS_RE = /^[a-z]+\/[a-z]+/i;
|
|
3656
|
+
var NON_PRONOUN_WORDS = /* @__PURE__ */ new Set([
|
|
3657
|
+
"billing",
|
|
3658
|
+
"shipping",
|
|
3659
|
+
"home",
|
|
3660
|
+
"work",
|
|
3661
|
+
"office",
|
|
3662
|
+
"mobile",
|
|
3663
|
+
"cell",
|
|
3664
|
+
"primary",
|
|
3665
|
+
"secondary",
|
|
3666
|
+
"main",
|
|
3667
|
+
"alt",
|
|
3668
|
+
"alternative",
|
|
3669
|
+
"personal",
|
|
3670
|
+
"business",
|
|
3671
|
+
"emergency",
|
|
3672
|
+
"contact",
|
|
3673
|
+
"other",
|
|
3674
|
+
"preferred",
|
|
3675
|
+
"cabin",
|
|
3676
|
+
"vacation",
|
|
3677
|
+
"rental",
|
|
3678
|
+
"legal",
|
|
3679
|
+
"maiden",
|
|
3680
|
+
"former",
|
|
3681
|
+
"deceased",
|
|
3682
|
+
"retired",
|
|
3683
|
+
"inactive",
|
|
3684
|
+
"accounts",
|
|
3685
|
+
"payable",
|
|
3686
|
+
"receivable",
|
|
3687
|
+
"department",
|
|
3688
|
+
"dept",
|
|
3689
|
+
"div",
|
|
3690
|
+
"division"
|
|
3691
|
+
]);
|
|
3692
|
+
function looksLikePronouns(spec) {
|
|
3693
|
+
const trimmed = spec.trim().toLowerCase();
|
|
3694
|
+
if (!LOOKS_LIKE_PRONOUNS_RE.test(trimmed)) {
|
|
3695
|
+
return false;
|
|
3696
|
+
}
|
|
3697
|
+
const firstWord = trimmed.split(/[\/\s]/)[0];
|
|
3698
|
+
if (NON_PRONOUN_WORDS.has(firstWord)) {
|
|
3699
|
+
return false;
|
|
3700
|
+
}
|
|
3701
|
+
const normalized = trimmed.replace(/\s+/g, "");
|
|
3702
|
+
if (SPEC_ALIASES[normalized]) {
|
|
3703
|
+
return true;
|
|
3704
|
+
}
|
|
3705
|
+
if (firstWord.length <= 5 && /^[a-z]+$/i.test(firstWord)) {
|
|
3706
|
+
return true;
|
|
3707
|
+
}
|
|
3708
|
+
return false;
|
|
3709
|
+
}
|
|
3710
|
+
function extractPronouns(nameWithPronouns) {
|
|
3711
|
+
if (!nameWithPronouns) {
|
|
3712
|
+
return { name: "" };
|
|
3713
|
+
}
|
|
3714
|
+
const match = nameWithPronouns.match(PRONOUN_SUFFIX_RE);
|
|
3715
|
+
if (!match) {
|
|
3716
|
+
return { name: nameWithPronouns };
|
|
3717
|
+
}
|
|
3718
|
+
const rawSpec = match[1].trim();
|
|
3719
|
+
const potentialName = nameWithPronouns.slice(0, match.index).trim();
|
|
3720
|
+
if (!looksLikePronouns(rawSpec)) {
|
|
3721
|
+
return { name: nameWithPronouns };
|
|
3722
|
+
}
|
|
3723
|
+
try {
|
|
3724
|
+
const pronouns = parsePronounSpec(rawSpec);
|
|
3725
|
+
return {
|
|
3726
|
+
name: potentialName,
|
|
3727
|
+
pronouns,
|
|
3728
|
+
rawPronounSpec: rawSpec
|
|
3729
|
+
};
|
|
3730
|
+
} catch {
|
|
3731
|
+
return { name: nameWithPronouns };
|
|
3732
|
+
}
|
|
3733
|
+
}
|
|
3734
|
+
function hasPronouns(name) {
|
|
3735
|
+
if (!name) return false;
|
|
3736
|
+
const match = name.match(PRONOUN_SUFFIX_RE);
|
|
3737
|
+
if (!match) return false;
|
|
3738
|
+
return looksLikePronouns(match[1]);
|
|
3739
|
+
}
|
|
3740
|
+
function pronounsToGenderHint(rawSpec) {
|
|
3741
|
+
const norm = rawSpec.trim().toLowerCase().replace(/\s+/g, "");
|
|
3742
|
+
if (norm.startsWith("he/") || norm === "he" || norm.includes("/him")) {
|
|
3743
|
+
return "male";
|
|
3744
|
+
}
|
|
3745
|
+
if (norm.startsWith("she/") || norm === "she" || norm.includes("/her")) {
|
|
3746
|
+
return "female";
|
|
3747
|
+
}
|
|
3748
|
+
return "unknown";
|
|
3749
|
+
}
|
|
3750
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
3751
|
+
0 && (module.exports = {
|
|
3752
|
+
BUILT_IN_PRONOUNS,
|
|
3753
|
+
COMMON_FIRST_NAMES,
|
|
3754
|
+
COMMON_SURNAMES,
|
|
3755
|
+
MULTI_WORD_PARTICLES,
|
|
3756
|
+
PARTICLES,
|
|
3757
|
+
SPEC_ALIASES,
|
|
3758
|
+
classifyName,
|
|
3759
|
+
entityToLegacy,
|
|
3760
|
+
extractPronouns,
|
|
3761
|
+
fillPronounTemplate,
|
|
3762
|
+
fillPronounTemplateSmart,
|
|
3763
|
+
formatName,
|
|
3764
|
+
formatPronoun,
|
|
3765
|
+
getDefaultPronouns,
|
|
3766
|
+
getFirstName,
|
|
3767
|
+
getLastName,
|
|
3768
|
+
getNickname,
|
|
3769
|
+
getPronounSet,
|
|
3770
|
+
getPronouns,
|
|
3771
|
+
getPronounsForEntity,
|
|
3772
|
+
getPronounsForPerson,
|
|
3773
|
+
hasPronouns,
|
|
3774
|
+
isCommonFirstName,
|
|
3775
|
+
isCommonSurname,
|
|
3776
|
+
isCompound,
|
|
3777
|
+
isFamily,
|
|
3778
|
+
isMultiWordParticle,
|
|
3779
|
+
isOrganization,
|
|
3780
|
+
isParticle,
|
|
3781
|
+
isPerson,
|
|
3782
|
+
isRejected,
|
|
3783
|
+
isUnknown,
|
|
3784
|
+
parseName,
|
|
3785
|
+
parseNameList,
|
|
3786
|
+
parsePersonName,
|
|
3787
|
+
parsePronounSpec,
|
|
3788
|
+
pronounsToGenderHint
|
|
3789
|
+
});
|