name-tools 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +912 -0
- package/dist/gender/GenderDB-Co_GybwH.d.mts +80 -0
- package/dist/gender/GenderDB-Co_GybwH.d.ts +80 -0
- package/dist/gender/all.d.mts +31 -0
- package/dist/gender/all.d.ts +31 -0
- package/dist/gender/all.js +191 -0
- package/dist/gender/all.mjs +37 -0
- package/dist/gender/chunk-YGP2PQOO.mjs +133 -0
- package/dist/gender/coverage95.d.mts +29 -0
- package/dist/gender/coverage95.d.ts +29 -0
- package/dist/gender/coverage95.js +191 -0
- package/dist/gender/coverage95.mjs +37 -0
- package/dist/gender/coverage99.d.mts +29 -0
- package/dist/gender/coverage99.d.ts +29 -0
- package/dist/gender/coverage99.js +191 -0
- package/dist/gender/coverage99.mjs +37 -0
- package/dist/index.d.mts +796 -0
- package/dist/index.d.ts +796 -0
- package/dist/index.js +3789 -0
- package/dist/index.mjs +3726 -0
- package/package.json +62 -0
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,796 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Combined list of all particles
|
|
3
|
+
*/
|
|
4
|
+
declare const PARTICLES: readonly string[];
|
|
5
|
+
/**
|
|
6
|
+
* Multi-word particles that should be kept together
|
|
7
|
+
* These are checked before single-word particles
|
|
8
|
+
*/
|
|
9
|
+
declare const MULTI_WORD_PARTICLES: readonly ["von und zu", "de la", "de los", "de las", "van der", "van den", "van de", "de le", "da la"];
|
|
10
|
+
/**
|
|
11
|
+
* Check if a string is a known surname particle
|
|
12
|
+
*/
|
|
13
|
+
declare function isParticle(str: string): boolean;
|
|
14
|
+
/**
|
|
15
|
+
* Check if a sequence of words forms a multi-word particle
|
|
16
|
+
*/
|
|
17
|
+
declare function isMultiWordParticle(words: string[]): string | null;
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Combined list of common surnames
|
|
21
|
+
*/
|
|
22
|
+
declare const COMMON_SURNAMES: readonly string[];
|
|
23
|
+
/**
|
|
24
|
+
* Common first names to avoid false positives
|
|
25
|
+
* (names that could be either first or last)
|
|
26
|
+
*/
|
|
27
|
+
declare const COMMON_FIRST_NAMES: readonly ["mary", "john", "william", "james", "anne", "sarah", "marie", "jean", "george", "paul", "lee", "billy", "bob", "thomas", "robert", "michael", "david", "martin", "pierre", "maria", "jose", "josé"];
|
|
28
|
+
/**
|
|
29
|
+
* Check if a string is a known common surname
|
|
30
|
+
*/
|
|
31
|
+
declare function isCommonSurname(str: string): boolean;
|
|
32
|
+
/**
|
|
33
|
+
* Check if a string is a common first name
|
|
34
|
+
*/
|
|
35
|
+
declare function isCommonFirstName(str: string): boolean;
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* The kind of entity detected in a name field
|
|
39
|
+
*/
|
|
40
|
+
type NameKind = 'person' | 'family' | 'household' | 'compound' | 'organization' | 'unknown' | 'rejected';
|
|
41
|
+
/**
|
|
42
|
+
* Confidence level in classification (0 = no confidence, 1 = certain)
|
|
43
|
+
*/
|
|
44
|
+
type Confidence = 0 | 0.25 | 0.5 | 0.75 | 1;
|
|
45
|
+
/**
|
|
46
|
+
* Machine-readable reason codes explaining classification decisions
|
|
47
|
+
*/
|
|
48
|
+
type ReasonCode = 'ORG_LEGAL_SUFFIX' | 'ORG_COMMA_LEGAL' | 'ORG_INSTITUTION_PHRASE' | 'ORG_DBA' | 'ORG_CARE_OF' | 'ORG_WEAK_KEYWORD' | 'FAMILY_ENDS_WITH_FAMILY' | 'FAMILY_STARTS_WITH_THE' | 'FAMILY_PLURAL_SURNAME' | 'FAMILY_HAS_FAMILY_WORD' | 'COMPOUND_CONNECTOR' | 'COMPOUND_SHARED_FAMILY' | 'COMPOUND_PAIRED_HONORIFIC' | 'COMPOUND_PLURAL_HONORIFIC' | 'PERSON_STANDARD_FORMAT' | 'PERSON_REVERSED_FORMAT' | 'PERSON_HAS_HONORIFIC' | 'PERSON_HAS_SUFFIX' | 'AMBIGUOUS_THE_PLURAL' | 'AMBIGUOUS_SHORT_NAME' | 'HAS_PUNCTUATION_SIGNALS' | 'HAS_PAREN_ANNOTATION' | 'HAS_ALLCAPS' | 'HAS_EMAIL_OR_HANDLE' | 'HAS_ROLE_OR_TITLE';
|
|
49
|
+
/**
|
|
50
|
+
* Metadata about the parsing process
|
|
51
|
+
*/
|
|
52
|
+
interface ParseMeta {
|
|
53
|
+
/** Exact input string */
|
|
54
|
+
raw: string;
|
|
55
|
+
/** Normalized string used for parsing */
|
|
56
|
+
normalized: string;
|
|
57
|
+
/** Overall confidence in classification */
|
|
58
|
+
confidence: Confidence;
|
|
59
|
+
/** Reason codes explaining the classification */
|
|
60
|
+
reasons: ReasonCode[];
|
|
61
|
+
/** Human-readable warnings */
|
|
62
|
+
warnings?: string[];
|
|
63
|
+
/** Locale hint (default: "en") */
|
|
64
|
+
locale?: string;
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Base interface for all entity types
|
|
68
|
+
*/
|
|
69
|
+
interface BaseEntity {
|
|
70
|
+
kind: NameKind;
|
|
71
|
+
meta: ParseMeta;
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Legal form suffixes for organizations
|
|
75
|
+
*/
|
|
76
|
+
type LegalForm = 'Inc' | 'Incorporated' | 'Corp' | 'Corporation' | 'LLC' | 'LLP' | 'LP' | 'Ltd' | 'Limited' | 'PLC' | 'GmbH' | 'SA' | 'SAS' | 'BV' | 'AG' | 'Oy' | 'SRL' | 'SpA' | 'Trust' | 'TrustCompany' | 'Bank' | 'CreditUnion' | 'Foundation' | 'University' | 'Hospital' | 'Church' | 'Government' | 'Nonprofit' | 'Company' | 'Co' | 'UnknownLegalForm';
|
|
77
|
+
/**
|
|
78
|
+
* Organization entity (company, institution, trust, etc.)
|
|
79
|
+
*/
|
|
80
|
+
interface OrganizationName extends BaseEntity {
|
|
81
|
+
kind: 'organization';
|
|
82
|
+
/** Base name without legal suffix */
|
|
83
|
+
baseName: string;
|
|
84
|
+
/** Detected legal form */
|
|
85
|
+
legalForm?: LegalForm;
|
|
86
|
+
/** Raw legal suffix as written */
|
|
87
|
+
legalSuffixRaw?: string;
|
|
88
|
+
/** Additional qualifiers ("of", "for", etc.) */
|
|
89
|
+
qualifiers?: string[];
|
|
90
|
+
/** Alternate names (d/b/a) */
|
|
91
|
+
aka?: string[];
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Style of family name
|
|
95
|
+
*/
|
|
96
|
+
type FamilyStyle = 'familyWord' | 'pluralSurname';
|
|
97
|
+
/**
|
|
98
|
+
* Family or household entity
|
|
99
|
+
*/
|
|
100
|
+
interface FamilyName extends BaseEntity {
|
|
101
|
+
kind: 'family' | 'household';
|
|
102
|
+
/** Leading article if present */
|
|
103
|
+
article?: 'The';
|
|
104
|
+
/** Core family/surname */
|
|
105
|
+
familyName: string;
|
|
106
|
+
/** How the family was expressed */
|
|
107
|
+
style: FamilyStyle;
|
|
108
|
+
/** The word used (Family, Household) */
|
|
109
|
+
familyWord?: 'Family' | 'Household';
|
|
110
|
+
}
|
|
111
|
+
/**
|
|
112
|
+
* Connector used in compound names
|
|
113
|
+
*/
|
|
114
|
+
type CompoundConnector = '&' | 'and' | '+' | 'et' | 'unknown';
|
|
115
|
+
/**
|
|
116
|
+
* Compound entity (multiple people in one field)
|
|
117
|
+
*/
|
|
118
|
+
interface CompoundName extends BaseEntity {
|
|
119
|
+
kind: 'compound';
|
|
120
|
+
/** The connector detected */
|
|
121
|
+
connector: CompoundConnector;
|
|
122
|
+
/** Parsed members (may be PersonName or UnknownName) */
|
|
123
|
+
members: Array<PersonName | UnknownName>;
|
|
124
|
+
/** Shared family name if inferred */
|
|
125
|
+
sharedFamily?: string;
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* Person entity (individual human)
|
|
129
|
+
*/
|
|
130
|
+
interface PersonName extends BaseEntity {
|
|
131
|
+
kind: 'person';
|
|
132
|
+
/** Title/honorific (Dr., Mr., etc.) */
|
|
133
|
+
honorific?: string;
|
|
134
|
+
/** Given/first name */
|
|
135
|
+
given?: string;
|
|
136
|
+
/** Full given name if explicitly provided (e.g. from parenthetical: "Thomas A. (Thomas Alva) Edison") */
|
|
137
|
+
fullGiven?: string;
|
|
138
|
+
/** Middle name(s) */
|
|
139
|
+
middle?: string;
|
|
140
|
+
/** Family/last name */
|
|
141
|
+
family?: string;
|
|
142
|
+
/** Suffix (Jr., PhD, etc.) */
|
|
143
|
+
suffix?: string;
|
|
144
|
+
/** Nickname */
|
|
145
|
+
nickname?: string;
|
|
146
|
+
/** Surname particles (von, de, etc.) */
|
|
147
|
+
particles?: string[];
|
|
148
|
+
/** Whether name was in reversed format */
|
|
149
|
+
reversed?: boolean;
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* Unknown entity (could not be classified)
|
|
153
|
+
*/
|
|
154
|
+
interface UnknownName extends BaseEntity {
|
|
155
|
+
kind: 'unknown';
|
|
156
|
+
/** Best-effort normalized text */
|
|
157
|
+
text: string;
|
|
158
|
+
/** Best guess at what it might be */
|
|
159
|
+
guess?: Exclude<NameKind, 'unknown' | 'rejected'>;
|
|
160
|
+
}
|
|
161
|
+
/**
|
|
162
|
+
* Rejected entity (strict mode rejection)
|
|
163
|
+
*/
|
|
164
|
+
interface RejectedName extends BaseEntity {
|
|
165
|
+
kind: 'rejected';
|
|
166
|
+
/** What kind it would have been classified as */
|
|
167
|
+
rejectedAs: Exclude<NameKind, 'rejected'>;
|
|
168
|
+
}
|
|
169
|
+
/**
|
|
170
|
+
* Union of all entity types returned by parseName
|
|
171
|
+
*/
|
|
172
|
+
type ParsedNameEntity = PersonName | FamilyName | CompoundName | OrganizationName | UnknownName | RejectedName;
|
|
173
|
+
/**
|
|
174
|
+
* Options for parseName
|
|
175
|
+
*/
|
|
176
|
+
interface ParseOptions {
|
|
177
|
+
/** Locale hint (default: "en") */
|
|
178
|
+
locale?: string;
|
|
179
|
+
/** If set, reject non-person entities */
|
|
180
|
+
strictKind?: 'person';
|
|
181
|
+
}
|
|
182
|
+
/**
|
|
183
|
+
* Parsed recipient from a list (email To/CC line)
|
|
184
|
+
*/
|
|
185
|
+
interface ParsedRecipient {
|
|
186
|
+
/** Original raw string */
|
|
187
|
+
raw: string;
|
|
188
|
+
/** Parsed display name */
|
|
189
|
+
display?: ParsedNameEntity;
|
|
190
|
+
/** Extracted email address */
|
|
191
|
+
email?: string;
|
|
192
|
+
/** Raw address string before normalization */
|
|
193
|
+
addressRaw?: string;
|
|
194
|
+
/** Metadata about parsing */
|
|
195
|
+
meta: {
|
|
196
|
+
confidence: Confidence;
|
|
197
|
+
reasons: ReasonCode[];
|
|
198
|
+
warnings?: string[];
|
|
199
|
+
};
|
|
200
|
+
}
|
|
201
|
+
/**
|
|
202
|
+
* Options for parseNameList
|
|
203
|
+
*/
|
|
204
|
+
interface ParseListOptions extends ParseOptions {
|
|
205
|
+
}
|
|
206
|
+
/**
|
|
207
|
+
* Represents a person's name broken down into its component parts
|
|
208
|
+
*/
|
|
209
|
+
type NameAffixTokenType = 'honorific' | 'style' | 'religious' | 'military' | 'judicial' | 'professional' | 'education' | 'generational' | 'dynasticNumber' | 'postnominalHonor' | 'other';
|
|
210
|
+
interface NameAffixToken {
|
|
211
|
+
type: NameAffixTokenType;
|
|
212
|
+
value: string;
|
|
213
|
+
normalized?: string;
|
|
214
|
+
entryId?: string;
|
|
215
|
+
canonicalShort?: string;
|
|
216
|
+
canonicalLong?: string;
|
|
217
|
+
isAbbrev?: boolean;
|
|
218
|
+
requiresCommaBefore?: boolean;
|
|
219
|
+
}
|
|
220
|
+
type NameTokenType = 'prefix' | 'given' | 'middle' | 'family' | 'particle' | 'suffix' | 'nickname' | 'literal';
|
|
221
|
+
interface NameToken {
|
|
222
|
+
type: NameTokenType;
|
|
223
|
+
value: string;
|
|
224
|
+
normalized?: string;
|
|
225
|
+
noBreakAfter?: boolean;
|
|
226
|
+
noBreakBefore?: boolean;
|
|
227
|
+
}
|
|
228
|
+
interface ParsedName {
|
|
229
|
+
prefix?: string;
|
|
230
|
+
first?: string;
|
|
231
|
+
fullGiven?: string;
|
|
232
|
+
middle?: string;
|
|
233
|
+
last?: string;
|
|
234
|
+
suffix?: string;
|
|
235
|
+
nickname?: string;
|
|
236
|
+
prefixTokens?: NameAffixToken[];
|
|
237
|
+
suffixTokens?: NameAffixToken[];
|
|
238
|
+
familyParts?: string[];
|
|
239
|
+
familyParticle?: string;
|
|
240
|
+
familyParticleBehavior?: 'attach' | 'separate' | 'localeDefault';
|
|
241
|
+
preferredGiven?: string;
|
|
242
|
+
sort?: {
|
|
243
|
+
key?: string;
|
|
244
|
+
display?: string;
|
|
245
|
+
};
|
|
246
|
+
tokens?: NameToken[];
|
|
247
|
+
}
|
|
248
|
+
type NamePreset = 'display' | 'informal' | 'formalFull' | 'formalShort' | 'expandedFull' | 'alphabetical' | 'library' | 'initialed' | 'firstOnly' | 'preferredFirst' | 'preferredDisplay';
|
|
249
|
+
type NameFormatOptions = {
|
|
250
|
+
preset?: NamePreset;
|
|
251
|
+
locale?: string | string[];
|
|
252
|
+
output?: 'text' | 'html';
|
|
253
|
+
typography?: 'plain' | 'ui' | 'fine';
|
|
254
|
+
noBreak?: 'none' | 'smart' | 'all';
|
|
255
|
+
join?: 'none' | 'list' | 'couple';
|
|
256
|
+
conjunction?: 'and' | '&' | string;
|
|
257
|
+
oxfordComma?: boolean;
|
|
258
|
+
shareLastName?: 'auto' | 'never' | 'whenSame';
|
|
259
|
+
sharePrefix?: 'auto' | 'never' | 'whenSame';
|
|
260
|
+
shareSuffix?: 'auto' | 'never' | 'whenSame';
|
|
261
|
+
prefer?: 'auto' | 'nickname' | 'first' | 'fullGiven';
|
|
262
|
+
middle?: 'full' | 'initial' | 'none';
|
|
263
|
+
prefix?: 'include' | 'omit' | 'auto';
|
|
264
|
+
suffix?: 'include' | 'omit' | 'auto';
|
|
265
|
+
order?: 'given-family' | 'family-given' | 'auto';
|
|
266
|
+
prefixForm?: 'short' | 'long' | 'asInput';
|
|
267
|
+
suffixForm?: 'short' | 'long' | 'asInput';
|
|
268
|
+
capitalization?: 'canonical' | 'preserve' | 'lower' | 'upper';
|
|
269
|
+
punctuation?: 'canonical' | 'strip' | 'preserve';
|
|
270
|
+
apostrophes?: 'canonical' | 'ascii' | 'preserve';
|
|
271
|
+
};
|
|
272
|
+
|
|
273
|
+
/**
|
|
274
|
+
* Parse a name string into a classified entity
|
|
275
|
+
*
|
|
276
|
+
* This is the main entry point for name parsing. It classifies the input as:
|
|
277
|
+
* - person: Individual human name
|
|
278
|
+
* - organization: Company, institution, trust, etc.
|
|
279
|
+
* - family: Family or household (e.g., "The Smith Family")
|
|
280
|
+
* - compound: Multiple people (e.g., "Bob & Mary Smith")
|
|
281
|
+
* - unknown: Could not be classified
|
|
282
|
+
* - rejected: Strict mode rejection
|
|
283
|
+
*
|
|
284
|
+
* @param input - The name string to parse
|
|
285
|
+
* @param options - Parsing options
|
|
286
|
+
* @returns Classified entity with metadata
|
|
287
|
+
*/
|
|
288
|
+
declare function parseName(input: string, options?: ParseOptions): ParsedNameEntity;
|
|
289
|
+
/**
|
|
290
|
+
* Parse a full name string into its component parts using international name parsing rules
|
|
291
|
+
* (Legacy function for internal use by formatName)
|
|
292
|
+
*
|
|
293
|
+
* Supports:
|
|
294
|
+
* - Surname particles (van, von, de, da, etc.)
|
|
295
|
+
* - Compound surnames (García Márquez)
|
|
296
|
+
* - Nicknames in quotes or parentheses
|
|
297
|
+
* - Complex suffixes and titles
|
|
298
|
+
*
|
|
299
|
+
* @param fullName - The full name to parse
|
|
300
|
+
* @returns Object containing parsed name components
|
|
301
|
+
* @internal
|
|
302
|
+
*/
|
|
303
|
+
declare function parsePersonName(fullName: string): ParsedName;
|
|
304
|
+
/**
|
|
305
|
+
* Extract first name from a full name
|
|
306
|
+
* Uses the legacy person parser for compatibility
|
|
307
|
+
*/
|
|
308
|
+
declare function getFirstName(fullName: string): string | undefined;
|
|
309
|
+
/**
|
|
310
|
+
* Extract last name from a full name
|
|
311
|
+
* Uses the legacy person parser for compatibility
|
|
312
|
+
*/
|
|
313
|
+
declare function getLastName(fullName: string): string | undefined;
|
|
314
|
+
/**
|
|
315
|
+
* Extract nickname from a full name
|
|
316
|
+
* Uses the legacy person parser for compatibility
|
|
317
|
+
*/
|
|
318
|
+
declare function getNickname(fullName: string): string | undefined;
|
|
319
|
+
/**
|
|
320
|
+
* Convert a ParsedNameEntity to a ParsedName (legacy format)
|
|
321
|
+
* Useful for formatName compatibility when working with new API
|
|
322
|
+
*/
|
|
323
|
+
declare function entityToLegacy(entity: ParsedNameEntity): ParsedName | null;
|
|
324
|
+
|
|
325
|
+
/**
|
|
326
|
+
* Recipient list parsing for To/CC lines and bulk input
|
|
327
|
+
*/
|
|
328
|
+
|
|
329
|
+
/**
|
|
330
|
+
* Parse a recipient list (To/CC line or bulk input)
|
|
331
|
+
*
|
|
332
|
+
* @param input - The recipient list string
|
|
333
|
+
* @param options - Parsing options
|
|
334
|
+
* @returns Array of parsed recipients
|
|
335
|
+
*/
|
|
336
|
+
declare function parseNameList(input: string, options?: ParseListOptions): ParsedRecipient[];
|
|
337
|
+
|
|
338
|
+
/**
|
|
339
|
+
* Classify a name string into an entity type
|
|
340
|
+
* Priority order:
|
|
341
|
+
* 1. Organization (legal suffixes, institution phrases override everything)
|
|
342
|
+
* 2. Compound (& / and / + connectors with name-like tokens)
|
|
343
|
+
* 3. Family/Household ("Family" word, "The" + plural surname)
|
|
344
|
+
* 4. Person (standard and reversed formats)
|
|
345
|
+
* 5. Unknown (fallback)
|
|
346
|
+
*/
|
|
347
|
+
declare function classifyName(input: string, options?: ParseOptions): ParsedNameEntity;
|
|
348
|
+
/**
|
|
349
|
+
* Check if an entity is a person
|
|
350
|
+
*/
|
|
351
|
+
declare function isPerson(entity: ParsedNameEntity): entity is PersonName;
|
|
352
|
+
/**
|
|
353
|
+
* Check if an entity is an organization
|
|
354
|
+
*/
|
|
355
|
+
declare function isOrganization(entity: ParsedNameEntity): entity is OrganizationName;
|
|
356
|
+
/**
|
|
357
|
+
* Check if an entity is a family
|
|
358
|
+
*/
|
|
359
|
+
declare function isFamily(entity: ParsedNameEntity): entity is FamilyName;
|
|
360
|
+
/**
|
|
361
|
+
* Check if an entity is a compound
|
|
362
|
+
*/
|
|
363
|
+
declare function isCompound(entity: ParsedNameEntity): entity is CompoundName;
|
|
364
|
+
/**
|
|
365
|
+
* Check if an entity is unknown
|
|
366
|
+
*/
|
|
367
|
+
declare function isUnknown(entity: ParsedNameEntity): entity is UnknownName;
|
|
368
|
+
/**
|
|
369
|
+
* Check if an entity was rejected (strict mode)
|
|
370
|
+
*/
|
|
371
|
+
declare function isRejected(entity: ParsedNameEntity): entity is RejectedName;
|
|
372
|
+
|
|
373
|
+
type FormatInput = string | ParsedName | ParsedNameEntity;
|
|
374
|
+
/**
|
|
375
|
+
* Public formatting entry point (single name or array of names).
|
|
376
|
+
*
|
|
377
|
+
* Accepts:
|
|
378
|
+
* - string: Will be parsed as a person name
|
|
379
|
+
* - ParsedName: Legacy parsed name object
|
|
380
|
+
* - ParsedNameEntity: Entity from parseName() (person, organization, family, compound, etc.)
|
|
381
|
+
* - Array of any of the above
|
|
382
|
+
*/
|
|
383
|
+
declare function formatName(input: FormatInput | Array<FormatInput>, options?: NameFormatOptions): string;
|
|
384
|
+
|
|
385
|
+
/**
|
|
386
|
+
* Pronoun role - the grammatical function of the pronoun
|
|
387
|
+
*/
|
|
388
|
+
type PronounRole = 'subject' | 'object' | 'possessiveDeterminer' | 'possessivePronoun' | 'reflexive';
|
|
389
|
+
/**
|
|
390
|
+
* A complete set of pronouns for all grammatical roles.
|
|
391
|
+
*
|
|
392
|
+
* Fields are intentionally explicit for legal document and template generation.
|
|
393
|
+
*/
|
|
394
|
+
interface PronounSet {
|
|
395
|
+
/** Stable identifier (e.g., "he", "she", "they", "ze-hir") */
|
|
396
|
+
id: string;
|
|
397
|
+
/** Short human-readable label (e.g., "he/him", "she/her", "they/them") */
|
|
398
|
+
label: string;
|
|
399
|
+
/** Subjective case: he, she, they, ze */
|
|
400
|
+
subject: string;
|
|
401
|
+
/** Objective case: him, her, them, zir */
|
|
402
|
+
object: string;
|
|
403
|
+
/** Possessive determiner: his, her, their (as in "their book") */
|
|
404
|
+
possessiveDeterminer: string;
|
|
405
|
+
/** Possessive pronoun: his, hers, theirs (as in "the book is theirs") */
|
|
406
|
+
possessivePronoun: string;
|
|
407
|
+
/** Reflexive: himself, herself, themselves */
|
|
408
|
+
reflexive: string;
|
|
409
|
+
/** Optional notes about usage */
|
|
410
|
+
notes?: string;
|
|
411
|
+
}
|
|
412
|
+
/**
|
|
413
|
+
* Capitalization options for formatting pronouns
|
|
414
|
+
*/
|
|
415
|
+
type Capitalization = 'lower' | 'title' | 'upper';
|
|
416
|
+
/**
|
|
417
|
+
* Options for formatting pronouns
|
|
418
|
+
*/
|
|
419
|
+
interface FormatOptions {
|
|
420
|
+
/** How to capitalize the pronoun (default: 'lower') */
|
|
421
|
+
capitalization?: Capitalization;
|
|
422
|
+
}
|
|
423
|
+
/**
|
|
424
|
+
* Result of extracting pronouns from a name string
|
|
425
|
+
*/
|
|
426
|
+
interface PronounExtractionResult {
|
|
427
|
+
/** The name with pronouns removed */
|
|
428
|
+
name: string;
|
|
429
|
+
/** Extracted pronoun set, if found */
|
|
430
|
+
pronouns?: PronounSet;
|
|
431
|
+
/** Raw pronoun spec as written (e.g., "they/them") */
|
|
432
|
+
rawPronounSpec?: string;
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
/**
|
|
436
|
+
* Built-in pronoun sets commonly in use.
|
|
437
|
+
*
|
|
438
|
+
* Includes standard pronouns (he, she, they, it), common neopronouns,
|
|
439
|
+
* and special pseudo-sets for "any pronouns" and "use name only".
|
|
440
|
+
*/
|
|
441
|
+
declare const BUILT_IN_PRONOUNS: Record<string, PronounSet>;
|
|
442
|
+
/**
|
|
443
|
+
* Alias map for common shorthand specs.
|
|
444
|
+
*
|
|
445
|
+
* Maps normalized input strings to built-in pronoun set IDs.
|
|
446
|
+
* Handles variations like "He/Him", "he/his", "she/hers", etc.
|
|
447
|
+
*/
|
|
448
|
+
declare const SPEC_ALIASES: Record<string, string>;
|
|
449
|
+
|
|
450
|
+
/**
|
|
451
|
+
* Parse a slash-separated pronoun spec into a PronounSet.
|
|
452
|
+
*
|
|
453
|
+
* Supports various formats:
|
|
454
|
+
* - 2 tokens: "he/him" → subject, object (others derived)
|
|
455
|
+
* - 3 tokens: "she/her/hers" → subject, object, possessive
|
|
456
|
+
* - 4 tokens: "they/them/their/theirs" → full set minus reflexive
|
|
457
|
+
* - 5 tokens: "ze/zir/zir/zirs/zirself" → fully specified
|
|
458
|
+
*
|
|
459
|
+
* @param spec - The pronoun specification string
|
|
460
|
+
* @returns A complete PronounSet
|
|
461
|
+
* @throws Error if the spec is invalid (empty or no tokens)
|
|
462
|
+
*/
|
|
463
|
+
declare function parsePronounSpec(spec: string): PronounSet;
|
|
464
|
+
/**
|
|
465
|
+
* Get a PronounSet by ID or shorthand specification.
|
|
466
|
+
*
|
|
467
|
+
* This is the main entry point for pronoun lookup. Accepts:
|
|
468
|
+
* - Built-in IDs: "he", "she", "they", "ze-hir", etc.
|
|
469
|
+
* - Common specs: "he/him", "she/her", "they/them", etc.
|
|
470
|
+
* - Custom specs: "ey/em/eir/eirs/emself"
|
|
471
|
+
* - An existing PronounSet (returns a copy)
|
|
472
|
+
*
|
|
473
|
+
* @param input - A pronoun ID, spec string, or existing PronounSet
|
|
474
|
+
* @returns A complete PronounSet
|
|
475
|
+
* @throws Error if input is empty or invalid
|
|
476
|
+
*
|
|
477
|
+
* @example
|
|
478
|
+
* ```typescript
|
|
479
|
+
* const hePronouns = getPronounSet('he');
|
|
480
|
+
* const shePronouns = getPronounSet('she/her');
|
|
481
|
+
* const theyPronouns = getPronounSet('they/them');
|
|
482
|
+
* const customPronouns = getPronounSet('xe/xem/xyr/xyrs/xemself');
|
|
483
|
+
* ```
|
|
484
|
+
*/
|
|
485
|
+
declare function getPronounSet(input: string | PronounSet): PronounSet;
|
|
486
|
+
|
|
487
|
+
/**
|
|
488
|
+
* Get a single pronoun from a set, with optional capitalization.
|
|
489
|
+
*
|
|
490
|
+
* @param set - The pronoun set to extract from
|
|
491
|
+
* @param role - Which grammatical role to get
|
|
492
|
+
* @param options - Formatting options
|
|
493
|
+
* @returns The formatted pronoun string
|
|
494
|
+
*
|
|
495
|
+
* @example
|
|
496
|
+
* ```typescript
|
|
497
|
+
* const pronouns = getPronounSet('she');
|
|
498
|
+
* formatPronoun(pronouns, 'subject', { capitalization: 'title' }); // "She"
|
|
499
|
+
* formatPronoun(pronouns, 'object'); // "her"
|
|
500
|
+
* formatPronoun(pronouns, 'reflexive', { capitalization: 'upper' }); // "HERSELF"
|
|
501
|
+
* ```
|
|
502
|
+
*/
|
|
503
|
+
declare function formatPronoun(set: PronounSet, role: PronounRole, options?: FormatOptions): string;
|
|
504
|
+
/**
|
|
505
|
+
* Fill pronoun placeholders in a template string.
|
|
506
|
+
*
|
|
507
|
+
* Supported placeholders:
|
|
508
|
+
* - `{{subject}}` - subjective pronoun (he, she, they)
|
|
509
|
+
* - `{{object}}` - objective pronoun (him, her, them)
|
|
510
|
+
* - `{{possDet}}` or `{{possessiveDeterminer}}` - possessive determiner (his, her, their)
|
|
511
|
+
* - `{{possPron}}` or `{{possessivePronoun}}` - possessive pronoun (his, hers, theirs)
|
|
512
|
+
* - `{{reflexive}}` - reflexive pronoun (himself, herself, themselves)
|
|
513
|
+
*
|
|
514
|
+
* @param template - The template string with placeholders
|
|
515
|
+
* @param set - The pronoun set to use for replacements
|
|
516
|
+
* @param options - Formatting options (capitalization applies to all replacements)
|
|
517
|
+
* @returns The filled template string
|
|
518
|
+
*
|
|
519
|
+
* @example
|
|
520
|
+
* ```typescript
|
|
521
|
+
* const template = '{{subject}} signed {{possDet}} name and identified {{reflexive}}.';
|
|
522
|
+
* const borrower = getPronounSet('she');
|
|
523
|
+
*
|
|
524
|
+
* fillPronounTemplate(template, borrower);
|
|
525
|
+
* // "she signed her name and identified herself."
|
|
526
|
+
*
|
|
527
|
+
* fillPronounTemplate(template, borrower, { capitalization: 'title' });
|
|
528
|
+
* // "She signed Her name and identified Herself."
|
|
529
|
+
* ```
|
|
530
|
+
*/
|
|
531
|
+
declare function fillPronounTemplate(template: string, set: PronounSet, options?: FormatOptions): string;
|
|
532
|
+
/**
|
|
533
|
+
* Fill pronoun placeholders with smart capitalization.
|
|
534
|
+
*
|
|
535
|
+
* Unlike `fillPronounTemplate`, this version detects sentence-initial positions
|
|
536
|
+
* and applies title case only there, keeping other occurrences lowercase.
|
|
537
|
+
*
|
|
538
|
+
* @param template - The template string with placeholders
|
|
539
|
+
* @param set - The pronoun set to use
|
|
540
|
+
* @returns The filled template with smart capitalization
|
|
541
|
+
*
|
|
542
|
+
* @example
|
|
543
|
+
* ```typescript
|
|
544
|
+
* const template = '{{subject}} read the document. Then {{subject}} signed it.';
|
|
545
|
+
* const pronouns = getPronounSet('she');
|
|
546
|
+
*
|
|
547
|
+
* fillPronounTemplateSmart(template, pronouns);
|
|
548
|
+
* // "She read the document. Then she signed it."
|
|
549
|
+
* ```
|
|
550
|
+
*/
|
|
551
|
+
declare function fillPronounTemplateSmart(template: string, set: PronounSet): string;
|
|
552
|
+
|
|
553
|
+
/**
|
|
554
|
+
* GenderDB - Efficient gender probability lookup using a binary trie structure.
|
|
555
|
+
*
|
|
556
|
+
* This class provides O(k) name lookup where k is the length of the name.
|
|
557
|
+
* It uses a compact binary format with implicit child pointers for minimal memory usage.
|
|
558
|
+
*
|
|
559
|
+
* Binary format:
|
|
560
|
+
* - Header: 8 bytes (Magic 'GNDR' + node count)
|
|
561
|
+
* - Nodes: 4 bytes each (packed char, flags, sibling pointer)
|
|
562
|
+
* - Probs: 1 byte each (gender probability 1-255)
|
|
563
|
+
*/
|
|
564
|
+
interface GenderResult {
|
|
565
|
+
/** Probability that the name is male (0.0 = female, 1.0 = male) */
|
|
566
|
+
maleProbability: number;
|
|
567
|
+
/** Raw probability value from database (1-255) */
|
|
568
|
+
rawValue: number;
|
|
569
|
+
/** Whether the name was found in the database */
|
|
570
|
+
found: true;
|
|
571
|
+
}
|
|
572
|
+
interface GenderNotFound {
|
|
573
|
+
found: false;
|
|
574
|
+
}
|
|
575
|
+
type GenderLookupResult = GenderResult | GenderNotFound;
|
|
576
|
+
/**
|
|
577
|
+
* Gender probability database using optimized binary trie storage.
|
|
578
|
+
*/
|
|
579
|
+
declare class GenderDB {
|
|
580
|
+
private nodes;
|
|
581
|
+
private probs;
|
|
582
|
+
private readonly nodeCount;
|
|
583
|
+
/**
|
|
584
|
+
* Create a GenderDB instance from a binary ArrayBuffer.
|
|
585
|
+
* @param buffer - ArrayBuffer containing the binary trie data
|
|
586
|
+
*/
|
|
587
|
+
constructor(buffer: ArrayBuffer);
|
|
588
|
+
/**
|
|
589
|
+
* Get the number of nodes in the trie.
|
|
590
|
+
*/
|
|
591
|
+
get size(): number;
|
|
592
|
+
/**
|
|
593
|
+
* Look up the male probability for a given name.
|
|
594
|
+
*
|
|
595
|
+
* @param name - The first name to look up (case-insensitive)
|
|
596
|
+
* @returns GenderLookupResult with probability if found, or { found: false }
|
|
597
|
+
*/
|
|
598
|
+
lookup(name: string): GenderLookupResult;
|
|
599
|
+
/**
|
|
600
|
+
* Convenience method to get male probability as a number.
|
|
601
|
+
* Returns null if name not found.
|
|
602
|
+
*
|
|
603
|
+
* @param name - The first name to look up
|
|
604
|
+
* @returns Male probability (0.0-1.0) or null if not found
|
|
605
|
+
*/
|
|
606
|
+
getMaleProbability(name: string): number | null;
|
|
607
|
+
/**
|
|
608
|
+
* Convenience method to get female probability as a number.
|
|
609
|
+
* Returns null if name not found.
|
|
610
|
+
*
|
|
611
|
+
* @param name - The first name to look up
|
|
612
|
+
* @returns Female probability (0.0-1.0) or null if not found
|
|
613
|
+
*/
|
|
614
|
+
getFemaleProbability(name: string): number | null;
|
|
615
|
+
/**
|
|
616
|
+
* Make an informed guess about the likely gender based on probability threshold.
|
|
617
|
+
*
|
|
618
|
+
* @param name - The first name to look up
|
|
619
|
+
* @param threshold - Confidence threshold for guessing (default 0.8, meaning 80% confidence required)
|
|
620
|
+
* @returns 'male', 'female', 'unknown', or null if name not found in database
|
|
621
|
+
*/
|
|
622
|
+
guessGender(name: string, threshold?: number): 'male' | 'female' | 'unknown' | null;
|
|
623
|
+
/**
|
|
624
|
+
* Check if a name exists in the database.
|
|
625
|
+
*
|
|
626
|
+
* @param name - The first name to check
|
|
627
|
+
* @returns true if the name exists in the database
|
|
628
|
+
*/
|
|
629
|
+
has(name: string): boolean;
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
/**
|
|
633
|
+
* Get default pronouns based on a gender guess result.
|
|
634
|
+
*
|
|
635
|
+
* Maps the output of `GenderDB.guessGender()` to appropriate pronouns:
|
|
636
|
+
* - 'male' → he/him
|
|
637
|
+
* - 'female' → she/her
|
|
638
|
+
* - 'unknown' or null → they/them (safe default)
|
|
639
|
+
*
|
|
640
|
+
* @param gender - Result from guessGender()
|
|
641
|
+
* @returns The appropriate PronounSet
|
|
642
|
+
*
|
|
643
|
+
* @example
|
|
644
|
+
* ```typescript
|
|
645
|
+
* const db = createGenderDB();
|
|
646
|
+
* const gender = db.guessGender('John'); // 'male'
|
|
647
|
+
* const pronouns = getDefaultPronouns(gender); // he/him
|
|
648
|
+
* ```
|
|
649
|
+
*/
|
|
650
|
+
declare function getDefaultPronouns(gender: 'male' | 'female' | 'unknown' | null): PronounSet;
|
|
651
|
+
/**
|
|
652
|
+
* Get default pronouns for any parsed entity.
|
|
653
|
+
*
|
|
654
|
+
* Entity kind determines pronouns:
|
|
655
|
+
* - `person` → they/them (use getPronounsForPerson for gender-aware lookup)
|
|
656
|
+
* - `organization` → they/them
|
|
657
|
+
* - `family` / `household` → they/them
|
|
658
|
+
* - `compound` → they/them
|
|
659
|
+
* - `unknown` / `rejected` → they/them
|
|
660
|
+
*
|
|
661
|
+
* For person entities, this returns they/them as a safe default.
|
|
662
|
+
* Use `getPronounsForPerson()` with a GenderDB for gender-aware pronouns.
|
|
663
|
+
*
|
|
664
|
+
* @param entity - The parsed name entity
|
|
665
|
+
* @returns The appropriate PronounSet
|
|
666
|
+
*
|
|
667
|
+
* @example
|
|
668
|
+
* ```typescript
|
|
669
|
+
* const org = classifyName('Acme Inc.');
|
|
670
|
+
* getPronounsForEntity(org); // they/them
|
|
671
|
+
*
|
|
672
|
+
* const family = classifyName('The Smiths');
|
|
673
|
+
* getPronounsForEntity(family); // they/them
|
|
674
|
+
* ```
|
|
675
|
+
*/
|
|
676
|
+
declare function getPronounsForEntity(entity: ParsedNameEntity): PronounSet;
|
|
677
|
+
/**
|
|
678
|
+
* Options for getting pronouns for a person entity
|
|
679
|
+
*/
|
|
680
|
+
interface GetPronounsForPersonOptions {
|
|
681
|
+
/** Optional gender database for name-based gender lookup */
|
|
682
|
+
genderDB?: GenderDB;
|
|
683
|
+
/** Explicit pronouns override (user-specified, takes priority) */
|
|
684
|
+
explicitPronouns?: string | PronounSet;
|
|
685
|
+
/** Custom default when gender is unknown (default: they/them) */
|
|
686
|
+
defaultOnUnknown?: PronounSet;
|
|
687
|
+
/** Threshold for gender guessing (default: 0.8) */
|
|
688
|
+
genderThreshold?: number;
|
|
689
|
+
}
|
|
690
|
+
/**
|
|
691
|
+
* Get pronouns for a person entity with optional gender lookup.
|
|
692
|
+
*
|
|
693
|
+
* Priority order:
|
|
694
|
+
* 1. Explicit pronouns (if provided)
|
|
695
|
+
* 2. Gender-based pronouns (if genderDB provided and given name exists)
|
|
696
|
+
* 3. Custom default or they/them
|
|
697
|
+
*
|
|
698
|
+
* @param entity - The person entity
|
|
699
|
+
* @param options - Options including optional GenderDB
|
|
700
|
+
* @returns The appropriate PronounSet
|
|
701
|
+
*
|
|
702
|
+
* @example
|
|
703
|
+
* ```typescript
|
|
704
|
+
* const genderDB = createGenderDB();
|
|
705
|
+
* const person = parseName('John Smith') as PersonName;
|
|
706
|
+
*
|
|
707
|
+
* // With gender lookup
|
|
708
|
+
* getPronounsForPerson(person, { genderDB });
|
|
709
|
+
* // Returns he/him (John is typically male)
|
|
710
|
+
*
|
|
711
|
+
* // With explicit override
|
|
712
|
+
* getPronounsForPerson(person, { explicitPronouns: 'they/them' });
|
|
713
|
+
* // Returns they/them regardless of name
|
|
714
|
+
*
|
|
715
|
+
* // Without gender lookup
|
|
716
|
+
* getPronounsForPerson(person);
|
|
717
|
+
* // Returns they/them (safe default)
|
|
718
|
+
* ```
|
|
719
|
+
*/
|
|
720
|
+
declare function getPronounsForPerson(entity: PersonName, options?: GetPronounsForPersonOptions): PronounSet;
|
|
721
|
+
/**
|
|
722
|
+
* Get pronouns for any entity with full integration.
|
|
723
|
+
*
|
|
724
|
+
* This is a convenience function that handles all entity types:
|
|
725
|
+
* - For person entities, uses gender lookup if genderDB is provided
|
|
726
|
+
* - For all other entities, returns they/them
|
|
727
|
+
*
|
|
728
|
+
* @param entity - Any parsed name entity
|
|
729
|
+
* @param options - Options for person entity handling
|
|
730
|
+
* @returns The appropriate PronounSet
|
|
731
|
+
*
|
|
732
|
+
* @example
|
|
733
|
+
* ```typescript
|
|
734
|
+
* const genderDB = createGenderDB();
|
|
735
|
+
*
|
|
736
|
+
* const person = classifyName('Jane Smith');
|
|
737
|
+
* getPronouns(person, { genderDB }); // she/her
|
|
738
|
+
*
|
|
739
|
+
* const org = classifyName('Acme Inc.');
|
|
740
|
+
* getPronouns(org, { genderDB }); // they/them (ignores genderDB)
|
|
741
|
+
* ```
|
|
742
|
+
*/
|
|
743
|
+
declare function getPronouns(entity: ParsedNameEntity, options?: GetPronounsForPersonOptions): PronounSet;
|
|
744
|
+
|
|
745
|
+
/**
|
|
746
|
+
* Extract pronouns from a name string if present.
|
|
747
|
+
*
|
|
748
|
+
* Looks for pronoun specifications in parentheses at the end of names:
|
|
749
|
+
* - "Alex Johnson (they/them)"
|
|
750
|
+
* - "Sam Smith (he/his)"
|
|
751
|
+
* - "Jordan Lee (she/her/hers)"
|
|
752
|
+
*
|
|
753
|
+
* Does NOT extract non-pronoun parentheticals:
|
|
754
|
+
* - "John Smith (billing)" → not extracted
|
|
755
|
+
* - "The Smith Family (cabin)" → not extracted
|
|
756
|
+
*
|
|
757
|
+
* @param nameWithPronouns - The name string potentially containing pronouns
|
|
758
|
+
* @returns Object with cleaned name and optional pronouns
|
|
759
|
+
*
|
|
760
|
+
* @example
|
|
761
|
+
* ```typescript
|
|
762
|
+
* extractPronouns('Alex Johnson (they/them)');
|
|
763
|
+
* // { name: 'Alex Johnson', pronouns: {...}, rawPronounSpec: 'they/them' }
|
|
764
|
+
*
|
|
765
|
+
* extractPronouns('John Smith (billing)');
|
|
766
|
+
* // { name: 'John Smith (billing)' } - not extracted
|
|
767
|
+
*
|
|
768
|
+
* extractPronouns('Jane Doe');
|
|
769
|
+
* // { name: 'Jane Doe' } - no pronouns found
|
|
770
|
+
* ```
|
|
771
|
+
*/
|
|
772
|
+
declare function extractPronouns(nameWithPronouns: string): PronounExtractionResult;
|
|
773
|
+
/**
|
|
774
|
+
* Check if a name string appears to contain pronouns.
|
|
775
|
+
*
|
|
776
|
+
* Useful for conditional logic without extracting.
|
|
777
|
+
*
|
|
778
|
+
* @param name - The name string to check
|
|
779
|
+
* @returns true if the name appears to contain pronouns
|
|
780
|
+
*/
|
|
781
|
+
declare function hasPronouns(name: string): boolean;
|
|
782
|
+
/**
|
|
783
|
+
* Use extracted or inferred pronouns to hint at gender.
|
|
784
|
+
*
|
|
785
|
+
* Maps pronoun sets to gender hints:
|
|
786
|
+
* - he/him → 'male'
|
|
787
|
+
* - she/her → 'female'
|
|
788
|
+
* - they/them, neopronouns → 'unknown'
|
|
789
|
+
* - name-only, any → 'unknown'
|
|
790
|
+
*
|
|
791
|
+
* @param rawSpec - The raw pronoun spec string
|
|
792
|
+
* @returns Gender hint or 'unknown'
|
|
793
|
+
*/
|
|
794
|
+
declare function pronounsToGenderHint(rawSpec: string): 'male' | 'female' | 'unknown';
|
|
795
|
+
|
|
796
|
+
export { BUILT_IN_PRONOUNS, type BaseEntity, COMMON_FIRST_NAMES, COMMON_SURNAMES, type Capitalization, type CompoundConnector, type CompoundName, type Confidence, type FamilyName, type FamilyStyle, type FormatOptions, type GetPronounsForPersonOptions, type LegalForm, MULTI_WORD_PARTICLES, type NameAffixToken, type NameAffixTokenType, type NameFormatOptions, type NameKind, type NamePreset, type NameToken, type NameTokenType, type OrganizationName, PARTICLES, type ParseListOptions, type ParseMeta, type ParseOptions, type ParsedName, type ParsedNameEntity, type ParsedRecipient, type PersonName, type PronounExtractionResult, type PronounRole, type PronounSet, type ReasonCode, type RejectedName, SPEC_ALIASES, type UnknownName, classifyName, entityToLegacy, extractPronouns, fillPronounTemplate, fillPronounTemplateSmart, formatName, formatPronoun, getDefaultPronouns, getFirstName, getLastName, getNickname, getPronounSet, getPronouns, getPronounsForEntity, getPronounsForPerson, hasPronouns, isCommonFirstName, isCommonSurname, isCompound, isFamily, isMultiWordParticle, isOrganization, isParticle, isPerson, isRejected, isUnknown, parseName, parseNameList, parsePersonName, parsePronounSpec, pronounsToGenderHint };
|