@fpw/en-wiktionary-la-modules 0.0.10 → 0.0.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/LaEngine.js +6 -6
- package/dist/LaEngine.js.map +1 -1
- package/dist/index.js +5 -1
- package/dist/index.js.map +1 -1
- package/dist/modules/conjugation/LaVerb.d.ts +1 -1
- package/dist/modules/conjugation/LaVerb.js +379 -375
- package/dist/modules/conjugation/LaVerb.js.map +1 -1
- package/dist/modules/conjugation/VerbForm.js +2 -2
- package/dist/modules/conjugation/VerbForm.js.map +1 -1
- package/dist/modules/conjugation/VerbType.d.ts +1 -0
- package/dist/modules/conjugation/VerbType.js +3 -2
- package/dist/modules/conjugation/VerbType.js.map +1 -1
- package/dist/modules/declination/LaAdjData.d.ts +1 -1
- package/dist/modules/declination/LaAdjData.js +505 -428
- package/dist/modules/declination/LaAdjData.js.map +1 -1
- package/dist/modules/declination/LaNominal.d.ts +10 -0
- package/dist/modules/declination/LaNominal.js +79 -73
- package/dist/modules/declination/LaNominal.js.map +1 -1
- package/dist/modules/declination/LaNounData.js +467 -417
- package/dist/modules/declination/LaNounData.js.map +1 -1
- package/dist/modules/declination/NominalForm.js +2 -2
- package/dist/modules/declination/NominalForm.js.map +1 -1
- package/dist/modules/headword/HeadwordParser.js +17 -17
- package/dist/modules/headword/HeadwordParser.js.map +1 -1
- package/package.json +9 -8
- package/src/LaEngine.ts +82 -0
- package/src/index.ts +16 -0
- package/src/modules/common.ts +164 -0
- package/src/modules/conjugation/LaVerb.ts +2490 -0
- package/src/modules/conjugation/VerbAffix.ts +18 -0
- package/src/modules/conjugation/VerbForm.ts +223 -0
- package/src/modules/conjugation/VerbType.ts +51 -0
- package/src/modules/declination/LaAdjData.ts +913 -0
- package/src/modules/declination/LaNominal.ts +1992 -0
- package/src/modules/declination/LaNounData.ts +896 -0
- package/src/modules/declination/LaPersonalPronoun.ts +77 -0
- package/src/modules/declination/NominalForm.ts +89 -0
- package/src/modules/headword/HeadWord.ts +132 -0
- package/src/modules/headword/HeadwordParser.ts +514 -0
- package/dist/modules/LaEngine.d.ts +0 -15
- package/dist/modules/LaEngine.js +0 -46
- package/dist/modules/LaEngine.js.map +0 -1
- package/dist/modules/LaWiktionary.d.ts +0 -18
- package/dist/modules/LaWiktionary.js +0 -171
- package/dist/modules/LaWiktionary.js.map +0 -1
|
@@ -0,0 +1,1992 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* This is a complete re-implementation of Wiktionary's Module:la-nominal, developed by Benwing2.
|
|
3
|
+
* It was converted from Lua to TypeScript by Folke Will <folko@solhost.org>.
|
|
4
|
+
*
|
|
5
|
+
* Original source: https://en.wiktionary.org/wiki/Module:la-nominal
|
|
6
|
+
* Based on version: https://en.wiktionary.org/w/index.php?title=Module:la-nominal&oldid=62391877
|
|
7
|
+
*
|
|
8
|
+
* Lua idioms, function and variable names kept as in the original in order to easily
|
|
9
|
+
* backport later changes to this implementation.
|
|
10
|
+
*
|
|
11
|
+
* For that reason, it's suggested to add a type-aware wrapper around this class and leave
|
|
12
|
+
* this code unchanged instead of improving the types and use of idioms in this class.
|
|
13
|
+
*
|
|
14
|
+
*/
|
|
15
|
+
import { ArgMap, array_equals, extract_base, FormMap, is_enum_value, read_list, remove_links } from "../common";
|
|
16
|
+
import { m_adj_decl } from "./LaAdjData";
|
|
17
|
+
import { m_noun_decl } from "./LaNounData";
|
|
18
|
+
import { getNominalForm, NominalForm, setNominalForm } from "./NominalForm";
|
|
19
|
+
|
|
20
|
+
export interface DeclOptions {
|
|
21
|
+
suppressOldGenitive?: boolean;
|
|
22
|
+
suppressNonNeuterIStemAccIs?: boolean;
|
|
23
|
+
suppressRareIrregForms?: boolean;
|
|
24
|
+
populateAllTerminations?: boolean;
|
|
25
|
+
suppressAdjPtcForms?: boolean;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export enum Gender {
|
|
29
|
+
M = "M",
|
|
30
|
+
F = "F",
|
|
31
|
+
N = "N",
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export enum NumberTantum {
|
|
35
|
+
Singular = "sg",
|
|
36
|
+
Plural = "pl",
|
|
37
|
+
Both = "both",
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
interface SegmentRun {
|
|
41
|
+
segments: (Segment | Alternant)[];
|
|
42
|
+
loc: boolean;
|
|
43
|
+
num?: NumberTantum;
|
|
44
|
+
gender?: Gender;
|
|
45
|
+
is_adj?: boolean;
|
|
46
|
+
propses: DeclProp[];
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
interface Segment {
|
|
50
|
+
type: "Segment";
|
|
51
|
+
|
|
52
|
+
decl: string;
|
|
53
|
+
headword_decl: string;
|
|
54
|
+
is_adj: boolean;
|
|
55
|
+
lemma: string;
|
|
56
|
+
orig_lemma: string;
|
|
57
|
+
stem2?: string;
|
|
58
|
+
gender?: Gender;
|
|
59
|
+
types: Set<string>;
|
|
60
|
+
num?: NumberTantum;
|
|
61
|
+
loc: boolean;
|
|
62
|
+
args: string[];
|
|
63
|
+
orig_prefix?: string;
|
|
64
|
+
prefix?: string;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
interface Alternant {
|
|
68
|
+
type: "Alternant";
|
|
69
|
+
|
|
70
|
+
alternants: SegmentRun[];
|
|
71
|
+
loc: boolean;
|
|
72
|
+
num?: NumberTantum;
|
|
73
|
+
gender?: Gender;
|
|
74
|
+
is_adj?: boolean;
|
|
75
|
+
propses: DeclProp[];
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
export interface DeclProp {
|
|
79
|
+
decl: string;
|
|
80
|
+
headword_decl: string;
|
|
81
|
+
types: Set<string>;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
interface Declensions {
|
|
85
|
+
forms: FormMap<NominalForm>;
|
|
86
|
+
notes: Map<string, string[][]>;
|
|
87
|
+
title: string[];
|
|
88
|
+
subtitleses: (string | string[])[];
|
|
89
|
+
orig_titles: string[];
|
|
90
|
+
categories: string[];
|
|
91
|
+
voc: boolean;
|
|
92
|
+
noneut: boolean;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
export interface DeclensionData {
|
|
96
|
+
templateType: "declension";
|
|
97
|
+
declensionType: "noun" | "adjective";
|
|
98
|
+
|
|
99
|
+
title: string;
|
|
100
|
+
num?: NumberTantum;
|
|
101
|
+
propses: DeclProp[];
|
|
102
|
+
forms: FormMap<NominalForm>;
|
|
103
|
+
categories: string[];
|
|
104
|
+
notes: Map<string, string[]>;
|
|
105
|
+
user_specified: Set<string>;
|
|
106
|
+
pos: string;
|
|
107
|
+
num_type?: string;
|
|
108
|
+
|
|
109
|
+
// only in headwords
|
|
110
|
+
indecl: boolean;
|
|
111
|
+
overriding_lemma: string[];
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
export interface NounData extends DeclensionData {
|
|
115
|
+
declensionType: "noun";
|
|
116
|
+
|
|
117
|
+
// only in headwords
|
|
118
|
+
gender?: Gender;
|
|
119
|
+
m?: string[];
|
|
120
|
+
f?: string[];
|
|
121
|
+
overriding_genders?: string[];
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
export interface AdjectiveData extends DeclensionData {
|
|
125
|
+
declensionType: "adjective";
|
|
126
|
+
|
|
127
|
+
voc: boolean;
|
|
128
|
+
noneut: boolean;
|
|
129
|
+
|
|
130
|
+
// only in headwords
|
|
131
|
+
comp: string[];
|
|
132
|
+
sup: string[];
|
|
133
|
+
adv: string[];
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
export interface SegmentData {
|
|
137
|
+
declOpts: DeclOptions;
|
|
138
|
+
title?: string;
|
|
139
|
+
subtitles: (string | string[])[];
|
|
140
|
+
footnote: string;
|
|
141
|
+
num?: NumberTantum;
|
|
142
|
+
loc?: boolean;
|
|
143
|
+
pos: string;
|
|
144
|
+
forms: FormMap<NominalForm>;
|
|
145
|
+
types: Set<string>;
|
|
146
|
+
categories: string[];
|
|
147
|
+
notes: Map<string, string>;
|
|
148
|
+
|
|
149
|
+
// adjectives
|
|
150
|
+
gender?: Gender;
|
|
151
|
+
voc?: boolean;
|
|
152
|
+
noneut?: boolean;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
type EndingTable = [
|
|
156
|
+
string | string[],
|
|
157
|
+
string,
|
|
158
|
+
string[],
|
|
159
|
+
((base: string, stem2: string) => [string, string])?
|
|
160
|
+
][];
|
|
161
|
+
|
|
162
|
+
export class LaNominal {
|
|
163
|
+
public static readonly EmptyForm = "—";
|
|
164
|
+
private readonly options: DeclOptions;
|
|
165
|
+
private readonly cases = ["nom", "gen", "dat", "acc", "abl", "voc", "loc"];
|
|
166
|
+
private readonly genders = ["m", "f", "n"];
|
|
167
|
+
private readonly nums = ["sg", "pl"];
|
|
168
|
+
private readonly linked_prefixes = ["", "linked_"];
|
|
169
|
+
private readonly potential_noun_lemma_slots = ["nom_sg", "nom_pl"];
|
|
170
|
+
private readonly potential_adj_lemma_slots = [
|
|
171
|
+
"nom_sg_m",
|
|
172
|
+
"nom_pl_m",
|
|
173
|
+
"nom_sg_f",
|
|
174
|
+
"nom_pl_f",
|
|
175
|
+
"nom_sg_n",
|
|
176
|
+
"nom_pl_n"
|
|
177
|
+
];
|
|
178
|
+
|
|
179
|
+
private readonly irreg_adj_to_decl: Map<string, string> = new Map([
|
|
180
|
+
["duo", "irreg+"],
|
|
181
|
+
["ambō", "irreg+"],
|
|
182
|
+
["mīlle", "3-1+"],
|
|
183
|
+
["plūs", "3-1+"],
|
|
184
|
+
["is", "1&2+"],
|
|
185
|
+
["īdem", "1&2+"],
|
|
186
|
+
["ille", "1&2+"],
|
|
187
|
+
["ipse", "1&2+"],
|
|
188
|
+
["iste", "1&2+"],
|
|
189
|
+
["quis", "irreg+"],
|
|
190
|
+
["quī", "irreg+"],
|
|
191
|
+
["quisquis", "irreg+"],
|
|
192
|
+
]);
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
private readonly irreg_noun_to_decl: Map<string, string> = new Map([
|
|
196
|
+
["bōs", "3"],
|
|
197
|
+
["cherub", "irreg"],
|
|
198
|
+
["deus", "2"],
|
|
199
|
+
["Deus", "2"],
|
|
200
|
+
["domus", "4,2"],
|
|
201
|
+
["Iēsus", "4"],
|
|
202
|
+
["Jēsus", "4"],
|
|
203
|
+
["Iēsūs", "4"],
|
|
204
|
+
["Jēsūs", "4"],
|
|
205
|
+
["iūgerum", "2,3"],
|
|
206
|
+
["jūgerum", "2,3"],
|
|
207
|
+
["sūs", "3"],
|
|
208
|
+
["ēthos", "3"],
|
|
209
|
+
["Athōs", "2"],
|
|
210
|
+
["lexis", "3"],
|
|
211
|
+
["vēnum", "4,2"],
|
|
212
|
+
["vīs", "3"],
|
|
213
|
+
]);
|
|
214
|
+
|
|
215
|
+
private readonly declension_to_english = new Map([
|
|
216
|
+
["1", "first"],
|
|
217
|
+
["2", "second"],
|
|
218
|
+
["3", "third"],
|
|
219
|
+
["4", "fourth"],
|
|
220
|
+
["5", "fifth"],
|
|
221
|
+
]);
|
|
222
|
+
|
|
223
|
+
public constructor(options?: DeclOptions) {
|
|
224
|
+
this.options = options || {};
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
public do_generate_noun_forms(args: ArgMap, pos: string = "nouns", from_headword = false): NounData {
|
|
228
|
+
const parsed_run = this.parse_segment_run_allowing_alternants(args.get("1")?.trim() || "");
|
|
229
|
+
parsed_run.loc = parsed_run.loc || args.has("loc_sg") || args.has("loc_pl");
|
|
230
|
+
|
|
231
|
+
let num = args.get("num");
|
|
232
|
+
if (num !== undefined && !is_enum_value(NumberTantum, num)) {
|
|
233
|
+
num = undefined;
|
|
234
|
+
}
|
|
235
|
+
parsed_run.num = num || parsed_run.num;
|
|
236
|
+
|
|
237
|
+
const declensions = this.decline_segment_run(parsed_run, pos, false);
|
|
238
|
+
|
|
239
|
+
if (!parsed_run.loc) {
|
|
240
|
+
setNominalForm(declensions.forms, "loc_sg", undefined);
|
|
241
|
+
setNominalForm(declensions.forms, "loc_pl", undefined);
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
declensions.title = [this.construct_title(args.get("title"), declensions.title.join(""), false, parsed_run)];
|
|
245
|
+
|
|
246
|
+
const all_data: NounData = {
|
|
247
|
+
templateType: "declension",
|
|
248
|
+
declensionType: "noun",
|
|
249
|
+
|
|
250
|
+
title: declensions.title.join(" "),
|
|
251
|
+
num: parsed_run.num,
|
|
252
|
+
gender: parsed_run.gender,
|
|
253
|
+
propses: parsed_run.propses,
|
|
254
|
+
forms: declensions.forms,
|
|
255
|
+
categories: declensions.categories,
|
|
256
|
+
notes: new Map(),
|
|
257
|
+
user_specified: new Set(),
|
|
258
|
+
pos: pos,
|
|
259
|
+
num_type: args.get("type"),
|
|
260
|
+
|
|
261
|
+
// only if headword
|
|
262
|
+
indecl: args.has("indecl"),
|
|
263
|
+
m: read_list(args, "m"),
|
|
264
|
+
f: read_list(args, "f"),
|
|
265
|
+
overriding_lemma: read_list(args, "lemma"),
|
|
266
|
+
overriding_genders: read_list(args, "g")
|
|
267
|
+
};
|
|
268
|
+
|
|
269
|
+
for (const slot of this.iter_noun_slots()) {
|
|
270
|
+
const noteses = declensions.notes.get(slot);
|
|
271
|
+
if (noteses) {
|
|
272
|
+
noteses.forEach((notes, index) => {
|
|
273
|
+
all_data.notes.set(`${slot}${index + 1}`, notes);
|
|
274
|
+
});
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
this.process_noun_forms_and_overrides(all_data, args);
|
|
279
|
+
|
|
280
|
+
return all_data;
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
public do_generate_adj_forms(args: ArgMap, pos: string = "adjectives", from_headword = false): AdjectiveData {
|
|
284
|
+
let segment_run = args.get("1")?.trim() || "";
|
|
285
|
+
if (!segment_run.match(/[<(]/)) {
|
|
286
|
+
segment_run = segment_run + (args.has("indecl") ? "<0+>" : "<+>");
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
const parsed_run = this.parse_segment_run_allowing_alternants(segment_run);
|
|
290
|
+
parsed_run.loc = parsed_run.loc || (args.has("loc_sg_m") || args.has("loc_sg_f") || args.has("loc_sg_n") || args.has("loc_pl_m") || args.has("loc_pl_f") || args.has("loc_pl_n"));
|
|
291
|
+
|
|
292
|
+
let num = args.get("num");
|
|
293
|
+
if (num !== undefined && !is_enum_value(NumberTantum, num)) {
|
|
294
|
+
num = undefined;
|
|
295
|
+
}
|
|
296
|
+
parsed_run.num = num || parsed_run.num;
|
|
297
|
+
|
|
298
|
+
const overriding_voc = (args.has("voc_sg_m") || args.has("voc_sg_f") || args.has("voc_sg_n") || args.has("voc_pl_m") || args.has("voc_pl_f") || args.has("voc_pl_n"));
|
|
299
|
+
const declensions = this.decline_segment_run(parsed_run, pos, true);
|
|
300
|
+
|
|
301
|
+
if (!parsed_run.loc) {
|
|
302
|
+
setNominalForm(declensions.forms, "loc_sg_m", undefined);
|
|
303
|
+
setNominalForm(declensions.forms, "loc_sg_f", undefined);
|
|
304
|
+
setNominalForm(declensions.forms, "loc_sg_n", undefined);
|
|
305
|
+
setNominalForm(declensions.forms, "loc_pl_m", undefined);
|
|
306
|
+
setNominalForm(declensions.forms, "loc_pl_f", undefined);
|
|
307
|
+
setNominalForm(declensions.forms, "loc_pl_n", undefined);
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
if (!overriding_voc && !declensions.voc) {
|
|
311
|
+
setNominalForm(declensions.forms, "voc_sg_m", undefined);
|
|
312
|
+
setNominalForm(declensions.forms, "voc_sg_f", undefined);
|
|
313
|
+
setNominalForm(declensions.forms, "voc_sg_n", undefined);
|
|
314
|
+
setNominalForm(declensions.forms, "voc_pl_m", undefined);
|
|
315
|
+
setNominalForm(declensions.forms, "voc_pl_f", undefined);
|
|
316
|
+
setNominalForm(declensions.forms, "voc_pl_n", undefined);
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
declensions.title = [this.construct_title(args.get("title"), declensions.title.join(""), from_headword, parsed_run)];
|
|
320
|
+
|
|
321
|
+
const all_data: AdjectiveData = {
|
|
322
|
+
templateType: "declension",
|
|
323
|
+
declensionType: "adjective",
|
|
324
|
+
|
|
325
|
+
title: declensions.title.join(""),
|
|
326
|
+
num: parsed_run.num,
|
|
327
|
+
propses: parsed_run.propses,
|
|
328
|
+
forms: declensions.forms,
|
|
329
|
+
categories: declensions.categories,
|
|
330
|
+
notes: new Map(),
|
|
331
|
+
user_specified: new Set(),
|
|
332
|
+
voc: declensions.voc,
|
|
333
|
+
noneut: args.has("noneut") || declensions.noneut,
|
|
334
|
+
pos: pos,
|
|
335
|
+
num_type: args.get("type"),
|
|
336
|
+
|
|
337
|
+
// only if headword
|
|
338
|
+
overriding_lemma: read_list(args, "lemma"),
|
|
339
|
+
indecl: args.has("indecl"),
|
|
340
|
+
comp: read_list(args, "comp"),
|
|
341
|
+
sup: read_list(args, "sup"),
|
|
342
|
+
adv: read_list(args, "adv")
|
|
343
|
+
|
|
344
|
+
};
|
|
345
|
+
|
|
346
|
+
for (const slot of this.iter_adj_slots()) {
|
|
347
|
+
const noteses = declensions.notes.get(slot);
|
|
348
|
+
if (noteses) {
|
|
349
|
+
noteses.forEach((notes, index) => {
|
|
350
|
+
all_data.notes.set(`${slot}${index + 1}`, notes);
|
|
351
|
+
});
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
this.process_adj_forms_and_overrides(all_data, args);
|
|
356
|
+
|
|
357
|
+
return all_data;
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
private construct_title(args_title: string | undefined, declensions_title: string, from_headword: boolean, parsed_run: SegmentRun): string {
|
|
361
|
+
if (args_title) {
|
|
362
|
+
declensions_title = args_title.replace("<1>", "first declension");
|
|
363
|
+
declensions_title = declensions_title.replace("<1&2>", "first/second declension");
|
|
364
|
+
declensions_title = declensions_title.replace("<2>", "second declension");
|
|
365
|
+
declensions_title = declensions_title.replace("<3>", "third declension");
|
|
366
|
+
declensions_title = declensions_title.replace("<4>", "fourth declension");
|
|
367
|
+
declensions_title = declensions_title.replace("<5>", "fifth declension");
|
|
368
|
+
if (from_headword) {
|
|
369
|
+
declensions_title = declensions_title[0].toLowerCase() + declensions_title.replace(/\.$/, "").substr(1);
|
|
370
|
+
} else {
|
|
371
|
+
if (declensions_title.startsWith(" ")) {
|
|
372
|
+
declensions_title = declensions_title.substr(1);
|
|
373
|
+
}
|
|
374
|
+
declensions_title = declensions_title[0].toUpperCase() + declensions_title.substr(1);
|
|
375
|
+
}
|
|
376
|
+
} else {
|
|
377
|
+
const post_text_parts = [];
|
|
378
|
+
if (parsed_run.loc) {
|
|
379
|
+
post_text_parts.push(", with locative");
|
|
380
|
+
}
|
|
381
|
+
if (parsed_run.num == "sg") {
|
|
382
|
+
post_text_parts.push(", singular only");
|
|
383
|
+
} else if (parsed_run.num == "pl") {
|
|
384
|
+
post_text_parts.push(", plural only");
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
const post_text = post_text_parts.join("");
|
|
388
|
+
if (from_headword) {
|
|
389
|
+
declensions_title = declensions_title[0].toLowerCase() + declensions_title.substr(1) + post_text;
|
|
390
|
+
} else {
|
|
391
|
+
if (declensions_title.length > 0) {
|
|
392
|
+
declensions_title = declensions_title[0].toUpperCase() + declensions_title.substr(1) + post_text + ".";
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
return declensions_title;
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
private process_noun_forms_and_overrides(data: NounData, args: ArgMap) {
|
|
401
|
+
const linked_to_non_linked_noun_slots = new Map();
|
|
402
|
+
|
|
403
|
+
for (const slot of this.potential_noun_lemma_slots) {
|
|
404
|
+
linked_to_non_linked_noun_slots.set("linked_" + slot, slot);
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
for (const slot of this.iter_noun_slots()) {
|
|
408
|
+
let val: string[] = [];
|
|
409
|
+
if (args.has(slot)) {
|
|
410
|
+
val = args.get(slot)?.split("/") || [];
|
|
411
|
+
data.user_specified.add(slot);
|
|
412
|
+
} else {
|
|
413
|
+
const non_linked_equiv_slot = linked_to_non_linked_noun_slots.get(slot);
|
|
414
|
+
if (non_linked_equiv_slot && args.has(non_linked_equiv_slot)) {
|
|
415
|
+
val = args.get(non_linked_equiv_slot)?.split("/") || [];
|
|
416
|
+
data.user_specified.add(slot);
|
|
417
|
+
} else {
|
|
418
|
+
val = getNominalForm(data.forms, slot) || [];
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
if (val) {
|
|
423
|
+
if ((data.num == "pl" && slot.includes("sg")) || (data.num == "sg" && slot.includes("pl"))) {
|
|
424
|
+
setNominalForm(data.forms, slot, [""]);
|
|
425
|
+
} else if (val[0] == "" || val[0] == "-" || val[0] == "—") {
|
|
426
|
+
setNominalForm(data.forms, slot, [LaNominal.EmptyForm]);
|
|
427
|
+
} else {
|
|
428
|
+
setNominalForm(data.forms, slot, val);
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
private process_adj_forms_and_overrides(data: AdjectiveData, args: ArgMap) {
|
|
435
|
+
const linked_to_non_linked_adj_slots = new Map();
|
|
436
|
+
for (const slot of this.potential_adj_lemma_slots) {
|
|
437
|
+
linked_to_non_linked_adj_slots.set("linked_" + slot, slot);
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
for (const slot of this.iter_adj_slots()) {
|
|
441
|
+
if (data.noneut && slot.match(/_n/)) {
|
|
442
|
+
setNominalForm(data.forms, slot, undefined);
|
|
443
|
+
}
|
|
444
|
+
let val: string[] | undefined;
|
|
445
|
+
const ovr = args.get(slot);
|
|
446
|
+
if (ovr) {
|
|
447
|
+
val = ovr.split("/");
|
|
448
|
+
data.user_specified.add(slot);
|
|
449
|
+
} else {
|
|
450
|
+
const non_linked_equiv_slot = linked_to_non_linked_adj_slots.get(slot);
|
|
451
|
+
if (non_linked_equiv_slot && args.has(non_linked_equiv_slot)) {
|
|
452
|
+
val = args.get(non_linked_equiv_slot)?.split("/") || [];
|
|
453
|
+
data.user_specified.add(slot);
|
|
454
|
+
} else {
|
|
455
|
+
val = getNominalForm(data.forms, slot);
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
if (val) {
|
|
459
|
+
if ((data.num == "pl" && slot.match(/sg/)) || (data.num == "sg" && slot.match(/pl/))) {
|
|
460
|
+
setNominalForm(data.forms, slot, undefined);
|
|
461
|
+
} else if (val[0] == "" || val[0] == "-" || val[0] == "—") {
|
|
462
|
+
setNominalForm(data.forms, slot, [LaNominal.EmptyForm]);
|
|
463
|
+
} else {
|
|
464
|
+
setNominalForm(data.forms, slot, val);
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
for (const gender of ["f", "n"]) {
|
|
470
|
+
let other_is_masc = true;
|
|
471
|
+
for (const cas of this.cases) {
|
|
472
|
+
for (const num of this.nums) {
|
|
473
|
+
const genderForm = getNominalForm(data.forms, cas + "_" + num + "_" + gender);
|
|
474
|
+
const amscForm = getNominalForm(data.forms, cas + "_" + num + "_m");
|
|
475
|
+
if (!array_equals(genderForm, amscForm)) {
|
|
476
|
+
other_is_masc = false;
|
|
477
|
+
break;
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
if (!other_is_masc) {
|
|
481
|
+
break;
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
if (other_is_masc && !this.options.populateAllTerminations) {
|
|
486
|
+
for (const cas of this.cases) {
|
|
487
|
+
for (const num of this.nums) {
|
|
488
|
+
setNominalForm(data.forms, cas + "_" + num + "_" + gender, undefined);
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
}
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
private iter_slots(is_adj: boolean, overridable_only = false) {
|
|
496
|
+
if (is_adj) {
|
|
497
|
+
return this.iter_adj_slots(overridable_only);
|
|
498
|
+
} else {
|
|
499
|
+
return this.iter_noun_slots(overridable_only);
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
private iter_adj_slots(overridable_only = false): string[] {
|
|
504
|
+
let cas = 1;
|
|
505
|
+
let num = 1;
|
|
506
|
+
let gen = 1;
|
|
507
|
+
let linked_variant = 0;
|
|
508
|
+
const entries: string[] = [];
|
|
509
|
+
|
|
510
|
+
while (true) {
|
|
511
|
+
linked_variant = linked_variant + 1;
|
|
512
|
+
let max_linked_variant;
|
|
513
|
+
if (overridable_only) {
|
|
514
|
+
max_linked_variant = 1;
|
|
515
|
+
} else {
|
|
516
|
+
if (this.cases[cas - 1] == "nom" && this.genders[gen - 1] == "m") {
|
|
517
|
+
max_linked_variant = 2;
|
|
518
|
+
} else {
|
|
519
|
+
max_linked_variant = 1;
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
if (linked_variant > max_linked_variant) {
|
|
523
|
+
linked_variant = 1;
|
|
524
|
+
gen++;
|
|
525
|
+
if (gen > this.genders.length) {
|
|
526
|
+
gen = 1;
|
|
527
|
+
num++;
|
|
528
|
+
if (num > this.nums.length) {
|
|
529
|
+
num = 1;
|
|
530
|
+
cas++;
|
|
531
|
+
if (cas > this.cases.length) {
|
|
532
|
+
break;
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
entries.push(this.linked_prefixes[linked_variant - 1] + this.cases[cas - 1] + "_" + this.nums[num - 1] + "_" + this.genders[gen - 1]);
|
|
538
|
+
}
|
|
539
|
+
return entries;
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
private iter_noun_slots(overridable_only = false): string[] {
|
|
543
|
+
let cas = 1;
|
|
544
|
+
let num = 1;
|
|
545
|
+
let linked_variant = 0;
|
|
546
|
+
|
|
547
|
+
const entries: string[] = [];
|
|
548
|
+
|
|
549
|
+
while (true) {
|
|
550
|
+
linked_variant = linked_variant + 1;
|
|
551
|
+
let max_linked_variant = 1;
|
|
552
|
+
if (!overridable_only) {
|
|
553
|
+
if (this.cases[cas - 1] == "nom") {
|
|
554
|
+
max_linked_variant = 2;
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
if (linked_variant > max_linked_variant) {
|
|
558
|
+
linked_variant = 1;
|
|
559
|
+
num++;
|
|
560
|
+
if (num > this.nums.length) {
|
|
561
|
+
num = 1;
|
|
562
|
+
cas++;
|
|
563
|
+
if (cas > this.cases.length) {
|
|
564
|
+
break;
|
|
565
|
+
}
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
entries.push(this.linked_prefixes[linked_variant - 1] + this.cases[cas - 1] + "_" + this.nums[num - 1]);
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
return entries;
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
private parse_segment_run_allowing_alternants(segment_run: string): SegmentRun {
|
|
575
|
+
const alternating_segments = this.capturing_split(segment_run, /(\(\(.*?\)\))/);
|
|
576
|
+
const parsed_segments: (Segment | Alternant)[] = [];
|
|
577
|
+
let loc = false;
|
|
578
|
+
let num: NumberTantum | undefined;
|
|
579
|
+
let gender: Gender | undefined;
|
|
580
|
+
let is_adj: boolean | undefined;
|
|
581
|
+
const propses: DeclProp[] = [];
|
|
582
|
+
|
|
583
|
+
for (let i = 0; i < alternating_segments.length; i++) {
|
|
584
|
+
const alternating_segment = alternating_segments[i];
|
|
585
|
+
let this_is_adj: boolean | undefined;
|
|
586
|
+
if (alternating_segment) {
|
|
587
|
+
if (i % 2 == 0) {
|
|
588
|
+
const parsed_run = this.parse_segment_run(alternating_segment);
|
|
589
|
+
for (const parsed_segment of parsed_run.segments) {
|
|
590
|
+
parsed_segments.push(parsed_segment);
|
|
591
|
+
}
|
|
592
|
+
loc = loc || parsed_run.loc;
|
|
593
|
+
num = num || parsed_run.num;
|
|
594
|
+
gender = gender || parsed_run.gender;
|
|
595
|
+
this_is_adj = parsed_run.is_adj;
|
|
596
|
+
for (const prop of parsed_run.propses) {
|
|
597
|
+
propses.push(prop);
|
|
598
|
+
}
|
|
599
|
+
} else {
|
|
600
|
+
const parsed_alternating_segment = this.parse_alternant(alternating_segment);
|
|
601
|
+
parsed_segments.push(parsed_alternating_segment);
|
|
602
|
+
loc = loc || parsed_alternating_segment.loc;
|
|
603
|
+
num = num || parsed_alternating_segment.num;
|
|
604
|
+
gender = gender || parsed_alternating_segment.gender;
|
|
605
|
+
this_is_adj = parsed_alternating_segment.is_adj;
|
|
606
|
+
propses.push(...parsed_alternating_segment.propses);
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
if (is_adj === undefined) {
|
|
610
|
+
is_adj = this_is_adj;
|
|
611
|
+
} else if (this_is_adj !== undefined) {
|
|
612
|
+
is_adj = is_adj && this_is_adj;
|
|
613
|
+
}
|
|
614
|
+
}
|
|
615
|
+
|
|
616
|
+
return {
|
|
617
|
+
segments: parsed_segments,
|
|
618
|
+
loc: loc,
|
|
619
|
+
num: num,
|
|
620
|
+
gender: gender,
|
|
621
|
+
is_adj: is_adj,
|
|
622
|
+
propses: propses
|
|
623
|
+
};
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
private parse_alternant(alternant: string): Alternant {
|
|
627
|
+
const parsed_alternants: SegmentRun[] = [];
|
|
628
|
+
const alternant_spec = alternant.match(/^\(\((.*)\)\)$/);
|
|
629
|
+
let loc = false;
|
|
630
|
+
let num: NumberTantum | undefined;
|
|
631
|
+
let gender: Gender | undefined;
|
|
632
|
+
let is_adj: boolean | undefined;
|
|
633
|
+
const propses: DeclProp[] = [];
|
|
634
|
+
|
|
635
|
+
if (!alternant_spec) {
|
|
636
|
+
throw Error(`Invalid alternant spec`);
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
const alternants = alternant_spec[1].split(",");
|
|
640
|
+
alternants.forEach((altr, i) => {
|
|
641
|
+
const parsed_run = this.parse_segment_run(altr);
|
|
642
|
+
parsed_alternants.push(parsed_run);
|
|
643
|
+
loc = loc || parsed_run.loc;
|
|
644
|
+
if (i == 0) {
|
|
645
|
+
num = parsed_run.num;
|
|
646
|
+
} else if (num != parsed_run.num) {
|
|
647
|
+
num = NumberTantum.Both;
|
|
648
|
+
}
|
|
649
|
+
gender = gender || parsed_run.gender;
|
|
650
|
+
if (is_adj === undefined) {
|
|
651
|
+
is_adj = parsed_run.is_adj;
|
|
652
|
+
} else if (parsed_run.is_adj !== undefined && parsed_run.is_adj !== is_adj) {
|
|
653
|
+
throw Error(`Saw both noun and adjective alternants; not allowed`);
|
|
654
|
+
}
|
|
655
|
+
propses.push(...parsed_run.propses);
|
|
656
|
+
});
|
|
657
|
+
|
|
658
|
+
return {
|
|
659
|
+
type: "Alternant",
|
|
660
|
+
alternants: parsed_alternants,
|
|
661
|
+
loc: loc,
|
|
662
|
+
num: num,
|
|
663
|
+
gender: gender,
|
|
664
|
+
is_adj: is_adj,
|
|
665
|
+
propses: propses,
|
|
666
|
+
};
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
private parse_segment_run(segment_run: string): SegmentRun {
|
|
670
|
+
const is_suffix = segment_run.startsWith("-");
|
|
671
|
+
const segments: string[] = [];
|
|
672
|
+
|
|
673
|
+
const bracketed_segments = this.capturing_split(segment_run, /(\[\[[^\[\]]-\]\]<.*?>)/);
|
|
674
|
+
bracketed_segments.forEach((bracketed_segment, i) => {
|
|
675
|
+
if (i % 2 == 1) {
|
|
676
|
+
segments.push(bracketed_segment);
|
|
677
|
+
} else {
|
|
678
|
+
let regex;
|
|
679
|
+
if (is_suffix) {
|
|
680
|
+
regex = /([^<> ,]+<.*?>)/;
|
|
681
|
+
} else {
|
|
682
|
+
regex = /([^<> ,\-]+<.*?>)/;
|
|
683
|
+
}
|
|
684
|
+
for (const subsegment of this.capturing_split(bracketed_segment, regex)) {
|
|
685
|
+
segments.push(subsegment);
|
|
686
|
+
}
|
|
687
|
+
}
|
|
688
|
+
});
|
|
689
|
+
|
|
690
|
+
let loc = false;
|
|
691
|
+
let num: NumberTantum | undefined;
|
|
692
|
+
let gender: Gender| undefined;
|
|
693
|
+
let is_adj: boolean | undefined;
|
|
694
|
+
const propses: DeclProp[] = [];
|
|
695
|
+
const parsed_segments: Segment[] = [];
|
|
696
|
+
|
|
697
|
+
for (let i = 1; i < segments.length; i += 2) {
|
|
698
|
+
const parsed_segment = this.parse_segment(segments[i]);
|
|
699
|
+
loc = loc || parsed_segment.loc;
|
|
700
|
+
num = num || parsed_segment.num;
|
|
701
|
+
if (is_adj === undefined) {
|
|
702
|
+
is_adj = parsed_segment.is_adj;
|
|
703
|
+
} else {
|
|
704
|
+
is_adj = is_adj && parsed_segment.is_adj;
|
|
705
|
+
}
|
|
706
|
+
gender = gender || parsed_segment.gender;
|
|
707
|
+
parsed_segment.orig_prefix = segments[i - 1];
|
|
708
|
+
parsed_segment.prefix = remove_links(segments[i - 1]);
|
|
709
|
+
parsed_segments.push(parsed_segment);
|
|
710
|
+
const props: DeclProp = {
|
|
711
|
+
decl: parsed_segment.decl,
|
|
712
|
+
headword_decl: parsed_segment.headword_decl,
|
|
713
|
+
types: parsed_segment.types,
|
|
714
|
+
};
|
|
715
|
+
propses.push(props);
|
|
716
|
+
}
|
|
717
|
+
|
|
718
|
+
if (segments[segments.length - 1]) {
|
|
719
|
+
parsed_segments.push({
|
|
720
|
+
type: "Segment",
|
|
721
|
+
args: [],
|
|
722
|
+
decl: "",
|
|
723
|
+
headword_decl: "",
|
|
724
|
+
is_adj: false,
|
|
725
|
+
lemma: "",
|
|
726
|
+
orig_lemma: "",
|
|
727
|
+
loc: false,
|
|
728
|
+
types: new Set(),
|
|
729
|
+
orig_prefix: segments[segments.length - 1],
|
|
730
|
+
prefix: remove_links(segments[segments.length - 1])
|
|
731
|
+
});
|
|
732
|
+
}
|
|
733
|
+
|
|
734
|
+
return {
|
|
735
|
+
segments: parsed_segments,
|
|
736
|
+
loc: loc,
|
|
737
|
+
num: num,
|
|
738
|
+
gender: gender,
|
|
739
|
+
propses: propses
|
|
740
|
+
};
|
|
741
|
+
}
|
|
742
|
+
|
|
743
|
+
private capturing_split(str: string, pattern: RegExp): string[] {
|
|
744
|
+
return str.split(pattern);
|
|
745
|
+
}
|
|
746
|
+
|
|
747
|
+
private parse_segment(segment: string): Segment {
|
|
748
|
+
const match = segment.match(/^(.*)<(.*?)>$/);
|
|
749
|
+
if (!match) {
|
|
750
|
+
throw Error("No match");
|
|
751
|
+
}
|
|
752
|
+
const stem_part = match[1];
|
|
753
|
+
const spec_part = match[2];
|
|
754
|
+
const stems = stem_part.split("/");
|
|
755
|
+
const specs = spec_part.split(".");
|
|
756
|
+
|
|
757
|
+
const types = new Set<string>();
|
|
758
|
+
let num: NumberTantum | undefined;
|
|
759
|
+
let loc = false;
|
|
760
|
+
|
|
761
|
+
let decl: string = "";
|
|
762
|
+
for (let j = 0; j < specs.length; j++) {
|
|
763
|
+
let spec = specs[j];
|
|
764
|
+
if (j == 0) {
|
|
765
|
+
decl = spec;
|
|
766
|
+
} else {
|
|
767
|
+
const m2 = spec.match(/^(-?)(.*?)$/);
|
|
768
|
+
if (m2) {
|
|
769
|
+
const begins_with_hypen = m2[1];
|
|
770
|
+
spec = m2[2];
|
|
771
|
+
spec = begins_with_hypen + spec.replace(/-/g, "_");
|
|
772
|
+
types.add(spec);
|
|
773
|
+
}
|
|
774
|
+
}
|
|
775
|
+
}
|
|
776
|
+
|
|
777
|
+
const orig_lemma = stems[0];
|
|
778
|
+
if (!orig_lemma) {
|
|
779
|
+
throw Error("No lemma");
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
const lemma = remove_links(orig_lemma);
|
|
783
|
+
let stem2 = stems[1];
|
|
784
|
+
|
|
785
|
+
if (stems.length > 2) {
|
|
786
|
+
throw Error(`Too many stems, at most 2 should be give: ${stem_part}`);
|
|
787
|
+
}
|
|
788
|
+
|
|
789
|
+
let is_adj = false;
|
|
790
|
+
|
|
791
|
+
let headword_decl;
|
|
792
|
+
let base;
|
|
793
|
+
let detected_subtypes;
|
|
794
|
+
|
|
795
|
+
if (decl.match(/\+/)) {
|
|
796
|
+
decl = decl.replace(/\+/g, "");
|
|
797
|
+
[base, stem2, decl, detected_subtypes] = this.detect_adj_type_and_subtype(lemma, stem2, decl, types);
|
|
798
|
+
is_adj = true;
|
|
799
|
+
|
|
800
|
+
const irreg = this.irreg_adj_to_decl.get(lemma);
|
|
801
|
+
if (irreg) {
|
|
802
|
+
headword_decl = `irreg/${irreg}`;
|
|
803
|
+
} else {
|
|
804
|
+
headword_decl = decl + "+";
|
|
805
|
+
}
|
|
806
|
+
|
|
807
|
+
for (const subtype of detected_subtypes) {
|
|
808
|
+
if (types.has("-" + subtype)) {
|
|
809
|
+
types.delete("-" + subtype);
|
|
810
|
+
} else {
|
|
811
|
+
types.add(subtype);
|
|
812
|
+
}
|
|
813
|
+
}
|
|
814
|
+
} else {
|
|
815
|
+
[base, stem2, detected_subtypes] = this.detect_noun_subtype(lemma, stem2, decl, types);
|
|
816
|
+
|
|
817
|
+
const irreg = this.irreg_noun_to_decl.get(lemma);
|
|
818
|
+
if (irreg) {
|
|
819
|
+
headword_decl = `irreg/${irreg}`;
|
|
820
|
+
} else {
|
|
821
|
+
headword_decl = decl;
|
|
822
|
+
}
|
|
823
|
+
|
|
824
|
+
for (const subtype of detected_subtypes) {
|
|
825
|
+
if (types.has("-" + subtype)) {
|
|
826
|
+
types.delete("-" + subtype);
|
|
827
|
+
} else if ((subtype == "M" || subtype == "F" || subtype == "N") && (types.has("M") || types.has("F") || types.has("N"))) {
|
|
828
|
+
// don't create conflicting gender specs
|
|
829
|
+
} else if ((subtype == "sg" || subtype == "pl" || subtype == "both") && (types.has("sg") || types.has("pl") || types.has("both"))) {
|
|
830
|
+
// don't create conflicting number restrictions
|
|
831
|
+
} else {
|
|
832
|
+
types.add(subtype);
|
|
833
|
+
}
|
|
834
|
+
}
|
|
835
|
+
|
|
836
|
+
if (!types.has("pl") && !types.has("both") && lemma.match(/^[A-ZĀĒĪŌŪȲĂĔĬŎŬ]/)) {
|
|
837
|
+
types.add("sg");
|
|
838
|
+
}
|
|
839
|
+
}
|
|
840
|
+
|
|
841
|
+
if (types.has("loc")) {
|
|
842
|
+
loc = true;
|
|
843
|
+
types.delete("loc");
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
let gender: Gender | undefined;
|
|
847
|
+
if (types.has("M")) {
|
|
848
|
+
gender = Gender.M;
|
|
849
|
+
} else if (types.has("F")) {
|
|
850
|
+
gender = Gender.F;
|
|
851
|
+
} else if (types.has("N")) {
|
|
852
|
+
gender = Gender.N;
|
|
853
|
+
}
|
|
854
|
+
|
|
855
|
+
if (types.has("pl")) {
|
|
856
|
+
num = NumberTantum.Plural;
|
|
857
|
+
types.delete("pl");
|
|
858
|
+
} else if (types.has("sg")) {
|
|
859
|
+
num = NumberTantum.Singular;
|
|
860
|
+
types.delete("sg");
|
|
861
|
+
}
|
|
862
|
+
|
|
863
|
+
const args = [base, stem2];
|
|
864
|
+
|
|
865
|
+
return {
|
|
866
|
+
type: "Segment",
|
|
867
|
+
decl: decl,
|
|
868
|
+
headword_decl: headword_decl,
|
|
869
|
+
is_adj: is_adj,
|
|
870
|
+
gender: gender,
|
|
871
|
+
orig_lemma: orig_lemma,
|
|
872
|
+
lemma: lemma,
|
|
873
|
+
stem2: stem2,
|
|
874
|
+
types: types,
|
|
875
|
+
num: num,
|
|
876
|
+
loc: loc,
|
|
877
|
+
args: args,
|
|
878
|
+
};
|
|
879
|
+
}
|
|
880
|
+
|
|
881
|
+
private detect_adj_type_and_subtype(lemma: string, stem2: string, typ: string, subtypes: Set<string>): [any, any, any, any] {
|
|
882
|
+
if (!typ.match(/^[0123]/) && !typ.match(/^irreg/)) {
|
|
883
|
+
subtypes = new Set(subtypes);
|
|
884
|
+
subtypes.add(typ);
|
|
885
|
+
typ = "";
|
|
886
|
+
}
|
|
887
|
+
|
|
888
|
+
function base_as_stem2(base: string, stem2x: string): [string, string] {
|
|
889
|
+
return ["foo", base];
|
|
890
|
+
}
|
|
891
|
+
|
|
892
|
+
function constant_base(baseval: string): ((base: string, stem2: string) => [string, string]) {
|
|
893
|
+
return (base: string, s2: string) => [baseval, ""];
|
|
894
|
+
}
|
|
895
|
+
|
|
896
|
+
function decl12_stem2(base: string): string {
|
|
897
|
+
return base;
|
|
898
|
+
}
|
|
899
|
+
|
|
900
|
+
function decl3_stem2(base: string): string {
|
|
901
|
+
return LaNominal.make_stem2(base);
|
|
902
|
+
}
|
|
903
|
+
|
|
904
|
+
const decl12_entries: EndingTable = [
|
|
905
|
+
["us", "1&2", []],
|
|
906
|
+
["a", "1&2", []],
|
|
907
|
+
["um", "1&2", []],
|
|
908
|
+
["ī", "1&2", ["pl"]],
|
|
909
|
+
["ae", "1&2", ["pl"]],
|
|
910
|
+
["a", "1&2", ["pl"]],
|
|
911
|
+
["os", "1&2", ["greekA", "-greekE"]],
|
|
912
|
+
["os", "1&2", ["greekE", "-greekA"]],
|
|
913
|
+
["ē", "1&2", ["greekE", "-greekA"]],
|
|
914
|
+
["on", "1&2", ["greekA", "-greekE"]],
|
|
915
|
+
["on", "1&2", ["greekE", "-greekA"]],
|
|
916
|
+
["^(.*er)$", "1&2", ["er"]],
|
|
917
|
+
["^(.*ur)$", "1&2", ["er"]],
|
|
918
|
+
["^(h)ic$", "1&2", ["ic"]],
|
|
919
|
+
];
|
|
920
|
+
|
|
921
|
+
const decl3_entries: EndingTable = [
|
|
922
|
+
["^(.*er)$", "3-3", []],
|
|
923
|
+
["is", "3-2", []],
|
|
924
|
+
["e", "3-2", []],
|
|
925
|
+
["^(.*[ij])or$", "3-C", []],
|
|
926
|
+
["^(min)or$", "3-C", []],
|
|
927
|
+
["^(.*ēs)$", "3-1", ["I"]],
|
|
928
|
+
["^(.*ēs)$", "3-1", ["par"]],
|
|
929
|
+
["^(.*[ij])ōrēs$", "3-C", ["pl"]],
|
|
930
|
+
["^(min)ōrēs$", "3-C", ["pl"]],
|
|
931
|
+
["ēs", "3-2", ["pl", "I"]],
|
|
932
|
+
["ēs", "3-1", ["pl", "par"], base_as_stem2],
|
|
933
|
+
["ia", "3-2", ["pl", "I"]],
|
|
934
|
+
["a", "3-1", ["pl", "par"], base_as_stem2],
|
|
935
|
+
["", "3-1", ["I"]],
|
|
936
|
+
["", "3-1", ["par"]],
|
|
937
|
+
];
|
|
938
|
+
|
|
939
|
+
if (!typ) {
|
|
940
|
+
const [base, new_stem2, rettype, new_subtypes] = this.get_adj_type_and_subtype_by_ending(lemma, stem2, undefined, subtypes, decl12_entries, decl12_stem2);
|
|
941
|
+
if (base) {
|
|
942
|
+
return [base, new_stem2, rettype, new_subtypes];
|
|
943
|
+
} else {
|
|
944
|
+
return this.get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, decl3_entries, decl3_stem2);
|
|
945
|
+
}
|
|
946
|
+
} else if (typ == "0") {
|
|
947
|
+
return [lemma, "", "0", []];
|
|
948
|
+
} else if (typ == "3") {
|
|
949
|
+
return this.get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, decl3_entries, decl3_stem2);
|
|
950
|
+
} else if (typ == "1&2") {
|
|
951
|
+
return this.get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, decl12_entries, decl12_stem2);
|
|
952
|
+
} else if (typ == "1-1") {
|
|
953
|
+
return this.get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, [
|
|
954
|
+
["a", "1-1", []],
|
|
955
|
+
["ae", "1-1", ["pl"]]
|
|
956
|
+
]);
|
|
957
|
+
} else if (typ == "2-2") {
|
|
958
|
+
return this.get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, [
|
|
959
|
+
["us", "2-2", []],
|
|
960
|
+
["um", "2-2", []],
|
|
961
|
+
["ī", "2-2", ["pl"]],
|
|
962
|
+
["a", "2-2", ["pl"]],
|
|
963
|
+
["os", "2-2", ["greek"]],
|
|
964
|
+
["on", "2-2", ["greek"]],
|
|
965
|
+
["oe", "2-2", ["greek", "pl"]],
|
|
966
|
+
]);
|
|
967
|
+
} else if (typ == "3-1") {
|
|
968
|
+
return this.get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, [
|
|
969
|
+
["^(.*ēs)$", "3-1", ["I"]],
|
|
970
|
+
["^(.*ēs)$", "3-1", ["par"]],
|
|
971
|
+
["ēs", "3-1", ["pl", "I"], base_as_stem2],
|
|
972
|
+
["ēs", "3-1", ["pl", "par"], base_as_stem2],
|
|
973
|
+
["ia", "3-1", ["pl", "I"], base_as_stem2],
|
|
974
|
+
["a", "3-1", ["pl", "par"], base_as_stem2],
|
|
975
|
+
["", "3-1", ["I"]],
|
|
976
|
+
["", "3-1", ["par"]],
|
|
977
|
+
], decl3_stem2);
|
|
978
|
+
} else if (typ == "3-2") {
|
|
979
|
+
return this.get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, [
|
|
980
|
+
["is", "3-2", []],
|
|
981
|
+
["e", "3-2", []],
|
|
982
|
+
["ēs", "3-2", []],
|
|
983
|
+
["ēs", "3-2", ["pl"]],
|
|
984
|
+
["ia", "3-2", ["pl"]],
|
|
985
|
+
], decl3_stem2);
|
|
986
|
+
} else if (typ == "3-C") {
|
|
987
|
+
return this.get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, [
|
|
988
|
+
["^(.*[ij])or$", "3-C", []],
|
|
989
|
+
["^(min)or$", "3-C", []],
|
|
990
|
+
["^(.*[ij])ōrēs$", "3-C", ["pl"]],
|
|
991
|
+
["^(min)ōrēs$", "3-C", ["pl"]],
|
|
992
|
+
], decl3_stem2);
|
|
993
|
+
} else if (typ == "irreg") {
|
|
994
|
+
return this.get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, [
|
|
995
|
+
["^(duo)$", typ, ["pl"]],
|
|
996
|
+
["^(ambō)$", typ, ["pl"]],
|
|
997
|
+
["^(mīll?ia)$", typ, ["N", "pl"], constant_base("mīlle")],
|
|
998
|
+
["^(ea)$", typ, [], constant_base("is")],
|
|
999
|
+
["^(id)$", typ, [], constant_base("is")],
|
|
1000
|
+
["^([ei]ī)$", typ, ["pl"], constant_base("is")],
|
|
1001
|
+
["^(eae?)$", typ, ["pl"], constant_base("is")],
|
|
1002
|
+
["^(eadem)$", typ, [], constant_base("īdem")],
|
|
1003
|
+
["^([īi]dem)$", typ, [], constant_base("īdem")],
|
|
1004
|
+
["^(īdem)$", typ, ["pl"]],
|
|
1005
|
+
["^(eae?dem)$", typ, ["pl"], constant_base("īdem")],
|
|
1006
|
+
["^(i[lps][lst])a$", typ, [], (base: string, s2: string) => [base + "e", ""]],
|
|
1007
|
+
["^(i[ls][lt])ud$", typ, [], (base: string, s2: string) => [base + "e", ""]],
|
|
1008
|
+
["^(ipsum)$", typ, [], constant_base("ipse")],
|
|
1009
|
+
["^(i[lps][lst])ī$", typ, ["pl"], (base: string, s2: string) => [base + "e", ""]],
|
|
1010
|
+
["^(i[lps][lst])ae?$", typ, ["pl"], (base: string, s2: string) => [base + "e", ""]],
|
|
1011
|
+
["^(quī)$", typ, []],
|
|
1012
|
+
["^(quī)$", typ, ["pl"]],
|
|
1013
|
+
["^(quae)$", typ, [], constant_base("quī")],
|
|
1014
|
+
["^(quae)$", typ, ["pl"], constant_base("quī")],
|
|
1015
|
+
["^(quid)$", typ, [], constant_base("quis")],
|
|
1016
|
+
["^(quod)$", typ, [], constant_base("quī")],
|
|
1017
|
+
["^(qui[cd]quid)$", typ, [], constant_base("quisquis")],
|
|
1018
|
+
["^(quīquī)$", typ, ["pl"], constant_base("quisquis")],
|
|
1019
|
+
["^(quaequae)$", typ, ["pl"], constant_base("quisquis")],
|
|
1020
|
+
["", typ, []],
|
|
1021
|
+
]);
|
|
1022
|
+
} else {
|
|
1023
|
+
return this.get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, [
|
|
1024
|
+
["ēs", typ, ["pl"], base_as_stem2],
|
|
1025
|
+
["ia", typ, ["pl"], base_as_stem2],
|
|
1026
|
+
["", typ, []],
|
|
1027
|
+
], decl3_stem2);
|
|
1028
|
+
}
|
|
1029
|
+
}
|
|
1030
|
+
|
|
1031
|
+
private get_adj_type_and_subtype_by_ending(
|
|
1032
|
+
lemma: string,
|
|
1033
|
+
stem2: string,
|
|
1034
|
+
decltype: string | undefined,
|
|
1035
|
+
specified_subtypes: Set<string>,
|
|
1036
|
+
endings_and_subtypes: EndingTable,
|
|
1037
|
+
process_stem2?: (base: string) => string):
|
|
1038
|
+
[string, string, string, string[]]
|
|
1039
|
+
{
|
|
1040
|
+
for (const [ending, rettype, subtypes, process_retval] of endings_and_subtypes) {
|
|
1041
|
+
let not_this_subtype = false;
|
|
1042
|
+
if (specified_subtypes.has("pl") && !subtypes.includes("pl")) {
|
|
1043
|
+
not_this_subtype = true;
|
|
1044
|
+
} else {
|
|
1045
|
+
for (const subtype of subtypes) {
|
|
1046
|
+
if (specified_subtypes.has("-" + subtype)) {
|
|
1047
|
+
not_this_subtype = true;
|
|
1048
|
+
break;
|
|
1049
|
+
}
|
|
1050
|
+
const must_not_be_present = subtype.match(/^-(.*)$/);
|
|
1051
|
+
if (must_not_be_present && specified_subtypes.has(must_not_be_present[1])) {
|
|
1052
|
+
not_this_subtype = true;
|
|
1053
|
+
break;
|
|
1054
|
+
}
|
|
1055
|
+
}
|
|
1056
|
+
}
|
|
1057
|
+
if (!not_this_subtype) {
|
|
1058
|
+
let base: string | undefined;
|
|
1059
|
+
if (typeof(ending) != "string") {
|
|
1060
|
+
const lemma_ending = ending[0];
|
|
1061
|
+
const stem2_ending = ending[1];
|
|
1062
|
+
base = extract_base(lemma, lemma_ending);
|
|
1063
|
+
if (base && base + stem2_ending != stem2) {
|
|
1064
|
+
base = undefined;
|
|
1065
|
+
}
|
|
1066
|
+
} else {
|
|
1067
|
+
base = extract_base(lemma, ending);
|
|
1068
|
+
}
|
|
1069
|
+
if (base !== undefined) {
|
|
1070
|
+
const new_subtypes = [];
|
|
1071
|
+
for (const subtype of subtypes) {
|
|
1072
|
+
if (!subtype.startsWith("-")) {
|
|
1073
|
+
new_subtypes.push(subtype);
|
|
1074
|
+
}
|
|
1075
|
+
}
|
|
1076
|
+
if (process_retval) {
|
|
1077
|
+
[base, stem2] = process_retval(base, stem2);
|
|
1078
|
+
}
|
|
1079
|
+
if (process_stem2) {
|
|
1080
|
+
stem2 = stem2 || process_stem2(base);
|
|
1081
|
+
}
|
|
1082
|
+
return [base, stem2, rettype, new_subtypes];
|
|
1083
|
+
}
|
|
1084
|
+
}
|
|
1085
|
+
}
|
|
1086
|
+
|
|
1087
|
+
if (decltype === undefined) {
|
|
1088
|
+
return ["", "", "", []];
|
|
1089
|
+
} else if (decltype == "") {
|
|
1090
|
+
throw Error(`Unrecognized ending for adjective: ${lemma}`);
|
|
1091
|
+
} else {
|
|
1092
|
+
throw Error(`Unrecognized ending for declension-${decltype} adjective: ${lemma}`);
|
|
1093
|
+
}
|
|
1094
|
+
}
|
|
1095
|
+
|
|
1096
|
+
private detect_noun_subtype(lemma: string, stem2: string, typ: string, subtypes: Set<string>): [string, string, Set<string>] {
|
|
1097
|
+
if (typ == "1") {
|
|
1098
|
+
return this.get_noun_subtype_by_ending(lemma, stem2, typ, subtypes, [
|
|
1099
|
+
["ām", ["F", "am"]],
|
|
1100
|
+
["ās", ["M", "Greek", "Ma"]],
|
|
1101
|
+
["ēs", ["M", "Greek", "Me"]],
|
|
1102
|
+
["ē", ["F", "Greek"]],
|
|
1103
|
+
["ae", ["F", "pl"]],
|
|
1104
|
+
["a", ["F"]],
|
|
1105
|
+
]);
|
|
1106
|
+
} else if (typ == "2") {
|
|
1107
|
+
let detected_subtypes;
|
|
1108
|
+
[lemma, stem2, detected_subtypes] = this.get_noun_subtype_by_ending(lemma, stem2, typ, subtypes, [
|
|
1109
|
+
["^(.*r)$", ["M", "er"]],
|
|
1110
|
+
["^(.*v)os$", ["M", "vos"]],
|
|
1111
|
+
["^(.*v)om$", ["N", "vom"]],
|
|
1112
|
+
["os", ["M", "Greek"]],
|
|
1113
|
+
["os", ["N", "Greek", "us"]],
|
|
1114
|
+
["on", ["N", "Greek"]],
|
|
1115
|
+
["^([A-ZĀĒĪŌŪȲĂĔĬŎŬ].*)ius$", ["M", "ius", "voci", "sg"]],
|
|
1116
|
+
["ius", ["M", "ius"]],
|
|
1117
|
+
["ium", ["N", "ium"]],
|
|
1118
|
+
["us", ["M"]],
|
|
1119
|
+
["us", ["N", "us"]],
|
|
1120
|
+
["um", ["N"]],
|
|
1121
|
+
["iī", ["M", "ius", "pl"]],
|
|
1122
|
+
["ia", ["N", "ium", "pl"]],
|
|
1123
|
+
["ī", ["M", "pl"]],
|
|
1124
|
+
["ī", ["N", "us", "pl"]],
|
|
1125
|
+
["a", ["N", "pl"]],
|
|
1126
|
+
]);
|
|
1127
|
+
stem2 = stem2 || lemma;
|
|
1128
|
+
return [lemma, stem2, detected_subtypes];
|
|
1129
|
+
} else if (typ == "3") {
|
|
1130
|
+
let match;
|
|
1131
|
+
if (subtypes.has("pl")) {
|
|
1132
|
+
if (subtypes.has("Greek")) {
|
|
1133
|
+
match = lemma.match(/^(.*)erēs$/);
|
|
1134
|
+
if (match) {
|
|
1135
|
+
return [match[1] + "ēr", match[1] + "er", new Set(["er"])];
|
|
1136
|
+
}
|
|
1137
|
+
match = lemma.match(/^(.*)ontēs$/);
|
|
1138
|
+
if (match) {
|
|
1139
|
+
return [match[1] + "ōn", match[1] + "ont", new Set(["on"])];
|
|
1140
|
+
}
|
|
1141
|
+
match = lemma.match(/^(.*)es$/);
|
|
1142
|
+
if (match) {
|
|
1143
|
+
return ["foo", stem2 || match[1], new Set()];
|
|
1144
|
+
}
|
|
1145
|
+
throw Error(`Unrecognized ending for declension-3 plural Greek noun: ${lemma}`);
|
|
1146
|
+
}
|
|
1147
|
+
match = lemma.match(/^(.*)ia$/);
|
|
1148
|
+
if (match) {
|
|
1149
|
+
return ["foo", stem2 || match[1], new Set(["N", "I", "pure"])];
|
|
1150
|
+
}
|
|
1151
|
+
match = lemma.match(/^(.*)a$/);
|
|
1152
|
+
if (match) {
|
|
1153
|
+
return ["foo", stem2 || match[1], new Set(["N"])];
|
|
1154
|
+
}
|
|
1155
|
+
match = lemma.match(/^(.*)ēs$/);
|
|
1156
|
+
if (match) {
|
|
1157
|
+
return ["foo", stem2 || match[1], new Set()];
|
|
1158
|
+
}
|
|
1159
|
+
throw Error(`Unrecognized ending for declension-3 plural noun: ${lemma}`);
|
|
1160
|
+
}
|
|
1161
|
+
|
|
1162
|
+
stem2 = stem2 || LaNominal.make_stem2(lemma);
|
|
1163
|
+
let detected_subtypes;
|
|
1164
|
+
let base;
|
|
1165
|
+
let tmp;
|
|
1166
|
+
if (subtypes.has("Greek")) {
|
|
1167
|
+
[base, tmp, detected_subtypes] = this.get_noun_subtype_by_ending(lemma, stem2, "", subtypes, [
|
|
1168
|
+
[["is", ""], ["I"]],
|
|
1169
|
+
["ēr", ["er"]],
|
|
1170
|
+
["ōn", ["on"]],
|
|
1171
|
+
]);
|
|
1172
|
+
if (base) {
|
|
1173
|
+
return [lemma, stem2, detected_subtypes];
|
|
1174
|
+
}
|
|
1175
|
+
return [lemma, stem2, new Set()];
|
|
1176
|
+
}
|
|
1177
|
+
if (!subtypes.has("N")) {
|
|
1178
|
+
[base, tmp, detected_subtypes] = this.get_noun_subtype_by_ending(lemma, stem2, "", subtypes, [
|
|
1179
|
+
[["^([A-ZĀĒĪŌŪȲĂĔĬŎŬ].*pol)is$", ""], ["F", "polis", "sg", "loc"]],
|
|
1180
|
+
[["tūdō", "tūdin"], ["F"]],
|
|
1181
|
+
[["tās", "tāt"], ["F"]],
|
|
1182
|
+
[["tūs", "tūt"], ["F"]],
|
|
1183
|
+
[["tiō", "tiōn"], ["F"]],
|
|
1184
|
+
[["siō", "siōn"], ["F"]],
|
|
1185
|
+
[["xiō", "xiōn"], ["F"]],
|
|
1186
|
+
[["gō", "gin"], ["F"]],
|
|
1187
|
+
[["or", "ōr"], ["M"]],
|
|
1188
|
+
[["trīx", "trīc"], ["F"]],
|
|
1189
|
+
[["trix", "trīc"], ["F"]],
|
|
1190
|
+
[["is", ""], ["I"]],
|
|
1191
|
+
[["^([a-zāēīōūȳăĕĭŏŭ].*)ēs$", ""], ["I"]],
|
|
1192
|
+
]);
|
|
1193
|
+
if (base) {
|
|
1194
|
+
return [lemma, stem2, detected_subtypes];
|
|
1195
|
+
}
|
|
1196
|
+
}
|
|
1197
|
+
|
|
1198
|
+
[base, tmp, detected_subtypes] = this.get_noun_subtype_by_ending(lemma, stem2, "", subtypes, [
|
|
1199
|
+
[["us", "or"], ["N"]],
|
|
1200
|
+
[["us", "er"], ["N"]],
|
|
1201
|
+
[["ma", "mat"], ["N"]],
|
|
1202
|
+
[["men", "min"], ["N"]],
|
|
1203
|
+
[["^([A-ZĀĒĪŌŪȲĂĔĬŎŬ].*)e$", ""], ["N", "sg"]],
|
|
1204
|
+
[["e", ""], ["N", "I", "pure"]],
|
|
1205
|
+
[["al", "āl"], ["N", "I", "pure"]],
|
|
1206
|
+
[["ar", "ār"], ["N", "I", "pure"]],
|
|
1207
|
+
]);
|
|
1208
|
+
if (base) {
|
|
1209
|
+
return [lemma, stem2, detected_subtypes];
|
|
1210
|
+
}
|
|
1211
|
+
return [lemma, stem2, new Set()];
|
|
1212
|
+
} else if (typ == "4") {
|
|
1213
|
+
if (subtypes.has("echo") || subtypes.has("argo") || subtypes.has("Callisto")) {
|
|
1214
|
+
const match = lemma.match(/^(.*)ō$/);
|
|
1215
|
+
if (!match) {
|
|
1216
|
+
throw Error(`Declension-4 noun of subtype .echo, .argo or .Callisto should end in -ō: ${lemma}`);
|
|
1217
|
+
}
|
|
1218
|
+
const base = match[1];
|
|
1219
|
+
if (subtypes.has("Callisto")) {
|
|
1220
|
+
return [base, "", new Set(["F", "sg"])];
|
|
1221
|
+
} else {
|
|
1222
|
+
return [base, "", new Set(["F"])];
|
|
1223
|
+
}
|
|
1224
|
+
}
|
|
1225
|
+
return this.get_noun_subtype_by_ending(lemma, stem2, typ, subtypes, [
|
|
1226
|
+
["us", ["M"]],
|
|
1227
|
+
["ū", ["N"]],
|
|
1228
|
+
["ūs", ["M", "pl"]],
|
|
1229
|
+
["ua", ["N", "pl"]],
|
|
1230
|
+
]);
|
|
1231
|
+
} else if (typ == "5") {
|
|
1232
|
+
return this.get_noun_subtype_by_ending(lemma, stem2, typ, subtypes, [
|
|
1233
|
+
["iēs", ["F", "i"]],
|
|
1234
|
+
["iēs", ["F", "i", "pl"]],
|
|
1235
|
+
["ēs", ["F"]],
|
|
1236
|
+
["ēs", ["F", "pl"]],
|
|
1237
|
+
]);
|
|
1238
|
+
} else if (typ == "irreg" && lemma == "domus") {
|
|
1239
|
+
return [lemma, "", new Set(["loc"])];
|
|
1240
|
+
} else if (typ == "indecl" || (typ == "irreg" && (lemma == "Deus" || lemma == "Iēsus" || lemma == "Jēsus" || lemma == "Athōs" || lemma == "vēnum"))) {
|
|
1241
|
+
return [lemma, "", new Set(["sg"])];
|
|
1242
|
+
} else {
|
|
1243
|
+
return [lemma, "", new Set()];
|
|
1244
|
+
}
|
|
1245
|
+
}
|
|
1246
|
+
|
|
1247
|
+
private static make_stem2(stem: string): string {
|
|
1248
|
+
const patterns = [
|
|
1249
|
+
["tūdō", "tūdin"],
|
|
1250
|
+
["is", ""],
|
|
1251
|
+
["ēs", ""],
|
|
1252
|
+
["āns", "ant"],
|
|
1253
|
+
["ēns", "ent"],
|
|
1254
|
+
["ōns", "ont"],
|
|
1255
|
+
["ceps", "cipit"],
|
|
1256
|
+
["us", "or"],
|
|
1257
|
+
["ex", "ic"],
|
|
1258
|
+
["ma", "mat"],
|
|
1259
|
+
["e", ""],
|
|
1260
|
+
["al", "āl"],
|
|
1261
|
+
["ar", "ār"],
|
|
1262
|
+
["men", "min"],
|
|
1263
|
+
["er", "r"],
|
|
1264
|
+
["or", "ōr"],
|
|
1265
|
+
["gō", "gin"],
|
|
1266
|
+
["ō", "ōn"],
|
|
1267
|
+
["ps", "p"],
|
|
1268
|
+
["bs", "b"],
|
|
1269
|
+
["s", "t"],
|
|
1270
|
+
["x", "c"],
|
|
1271
|
+
];
|
|
1272
|
+
|
|
1273
|
+
for (const pattern of patterns) {
|
|
1274
|
+
const key = pattern[0];
|
|
1275
|
+
const val = pattern[1];
|
|
1276
|
+
if (stem.match(new RegExp(key + "$"))) {
|
|
1277
|
+
return stem.replace(new RegExp(key + "$"), val);
|
|
1278
|
+
}
|
|
1279
|
+
}
|
|
1280
|
+
return stem;
|
|
1281
|
+
}
|
|
1282
|
+
|
|
1283
|
+
private get_noun_subtype_by_ending(
|
|
1284
|
+
lemma: string,
|
|
1285
|
+
stem2: string,
|
|
1286
|
+
decltype: string,
|
|
1287
|
+
specified_subtypes: Set<string>,
|
|
1288
|
+
endings_and_subtypes: [(string | string[]), string[]][]):
|
|
1289
|
+
[string, string, Set<string>]
|
|
1290
|
+
{
|
|
1291
|
+
for (const ending_and_subtype of endings_and_subtypes) {
|
|
1292
|
+
const ending = ending_and_subtype[0];
|
|
1293
|
+
const subtypes = ending_and_subtype[1];
|
|
1294
|
+
let not_this_subtype = false;
|
|
1295
|
+
if (specified_subtypes.has("pl") && !subtypes.includes("pl")) {
|
|
1296
|
+
not_this_subtype = true;
|
|
1297
|
+
} else {
|
|
1298
|
+
for (const subtype of subtypes) {
|
|
1299
|
+
if (specified_subtypes.has("-" + subtype) ||
|
|
1300
|
+
(subtype == "N" && (specified_subtypes.has("M") || specified_subtypes.has("F"))) ||
|
|
1301
|
+
((subtype == "M" || subtype == "F") && specified_subtypes.has("N")) ||
|
|
1302
|
+
(subtype == "sg" && specified_subtypes.has("pl")) ||
|
|
1303
|
+
(subtype == "pl" && specified_subtypes.has("sg"))) {
|
|
1304
|
+
not_this_subtype = true;
|
|
1305
|
+
break;
|
|
1306
|
+
}
|
|
1307
|
+
}
|
|
1308
|
+
}
|
|
1309
|
+
if (!not_this_subtype) {
|
|
1310
|
+
if (Array.isArray(ending)) {
|
|
1311
|
+
const lemma_ending = ending[0];
|
|
1312
|
+
const stem2_ending = ending[1];
|
|
1313
|
+
const base = extract_base(lemma, lemma_ending);
|
|
1314
|
+
if (base && (base + stem2_ending) == stem2) {
|
|
1315
|
+
return [base, stem2, new Set(subtypes)];
|
|
1316
|
+
}
|
|
1317
|
+
} else {
|
|
1318
|
+
const base = extract_base(lemma, ending);
|
|
1319
|
+
if (base) {
|
|
1320
|
+
return [base, stem2, new Set(subtypes)];
|
|
1321
|
+
}
|
|
1322
|
+
}
|
|
1323
|
+
}
|
|
1324
|
+
}
|
|
1325
|
+
if (decltype) {
|
|
1326
|
+
throw Error(`Unrecognized ending for declension-${decltype} noun: ${lemma}`);
|
|
1327
|
+
}
|
|
1328
|
+
|
|
1329
|
+
return ["", "", new Set()];
|
|
1330
|
+
}
|
|
1331
|
+
|
|
1332
|
+
private decline_segment_run(parsed_run: SegmentRun, pos: string, is_adj: boolean): Declensions {
|
|
1333
|
+
const declensions: Declensions = {
|
|
1334
|
+
forms: new Map(),
|
|
1335
|
+
notes: new Map(),
|
|
1336
|
+
title: [],
|
|
1337
|
+
subtitleses: [],
|
|
1338
|
+
orig_titles: [],
|
|
1339
|
+
categories: [],
|
|
1340
|
+
voc: true,
|
|
1341
|
+
noneut: false,
|
|
1342
|
+
};
|
|
1343
|
+
|
|
1344
|
+
for (const slot of this.iter_slots(is_adj)) {
|
|
1345
|
+
setNominalForm(declensions.forms, slot, [""]);
|
|
1346
|
+
}
|
|
1347
|
+
|
|
1348
|
+
for (const seg of parsed_run.segments) {
|
|
1349
|
+
if (seg.type == "Segment" && seg.decl) {
|
|
1350
|
+
seg.loc = parsed_run.loc;
|
|
1351
|
+
seg.num = seg.num || parsed_run.num;
|
|
1352
|
+
seg.gender = seg.gender || parsed_run.gender;
|
|
1353
|
+
|
|
1354
|
+
let data: SegmentData;
|
|
1355
|
+
let potential_lemma_slots;
|
|
1356
|
+
|
|
1357
|
+
if (seg.is_adj) {
|
|
1358
|
+
const decline = m_adj_decl.get(seg.decl);
|
|
1359
|
+
if (!decline) {
|
|
1360
|
+
throw Error(`Unrecognized declension '${seg.decl}'`);
|
|
1361
|
+
}
|
|
1362
|
+
|
|
1363
|
+
potential_lemma_slots = this.potential_adj_lemma_slots;
|
|
1364
|
+
data = {
|
|
1365
|
+
declOpts: this.options,
|
|
1366
|
+
subtitles: [],
|
|
1367
|
+
footnote: "",
|
|
1368
|
+
num: seg.num,
|
|
1369
|
+
gender: seg.gender,
|
|
1370
|
+
voc: true,
|
|
1371
|
+
noneut: false,
|
|
1372
|
+
pos: is_adj ? pos : "adjectives",
|
|
1373
|
+
forms: new Map(),
|
|
1374
|
+
types: seg.types,
|
|
1375
|
+
categories: [],
|
|
1376
|
+
notes: new Map(),
|
|
1377
|
+
};
|
|
1378
|
+
decline(data, seg.args);
|
|
1379
|
+
|
|
1380
|
+
if (!data.voc) {
|
|
1381
|
+
declensions.voc = false;
|
|
1382
|
+
}
|
|
1383
|
+
if (data.noneut) {
|
|
1384
|
+
declensions.noneut = true;
|
|
1385
|
+
}
|
|
1386
|
+
|
|
1387
|
+
if (data.types.has("sufn")) {
|
|
1388
|
+
data.subtitles.push(["with", " 'm' optionally → 'n' in compounds"]);
|
|
1389
|
+
} else if (data.types.has("not_sufn")) {
|
|
1390
|
+
data.subtitles.push(["without", " 'm' optionally → 'n' in compounds"]);
|
|
1391
|
+
}
|
|
1392
|
+
|
|
1393
|
+
if (data.title) {
|
|
1394
|
+
declensions.orig_titles.push(data.title);
|
|
1395
|
+
}
|
|
1396
|
+
if (data.subtitles.length > 0) {
|
|
1397
|
+
const subtitles: string[] = [];
|
|
1398
|
+
for (const subtitle of data.subtitles) {
|
|
1399
|
+
if (typeof(subtitle) == "string") {
|
|
1400
|
+
subtitles.push(subtitle);
|
|
1401
|
+
} else {
|
|
1402
|
+
subtitles.push(subtitle.join(""));
|
|
1403
|
+
}
|
|
1404
|
+
}
|
|
1405
|
+
data.title = data.title + " (" + subtitles.join(", ") + ")";
|
|
1406
|
+
}
|
|
1407
|
+
for (const subtitle of data.subtitles) {
|
|
1408
|
+
declensions.subtitleses.push(subtitle);
|
|
1409
|
+
}
|
|
1410
|
+
} else {
|
|
1411
|
+
const decline = m_noun_decl.get(seg.decl);
|
|
1412
|
+
if (!decline) {
|
|
1413
|
+
throw Error(`Unrecognized declension '${seg.decl}'`);
|
|
1414
|
+
}
|
|
1415
|
+
potential_lemma_slots = this.potential_noun_lemma_slots;
|
|
1416
|
+
data = {
|
|
1417
|
+
declOpts: this.options,
|
|
1418
|
+
subtitles: [],
|
|
1419
|
+
footnote: "",
|
|
1420
|
+
num: seg.num,
|
|
1421
|
+
loc: seg.loc,
|
|
1422
|
+
pos: pos,
|
|
1423
|
+
forms: new Map(),
|
|
1424
|
+
types: seg.types,
|
|
1425
|
+
categories: [],
|
|
1426
|
+
notes: new Map(),
|
|
1427
|
+
};
|
|
1428
|
+
|
|
1429
|
+
decline(data, seg.args);
|
|
1430
|
+
|
|
1431
|
+
if (!data.title) {
|
|
1432
|
+
const match = seg.headword_decl.match(/^irreg\/(.*)$/);
|
|
1433
|
+
let apparent_decl;
|
|
1434
|
+
if (match) {
|
|
1435
|
+
apparent_decl = match[1];
|
|
1436
|
+
if (data.subtitles.length == 0) {
|
|
1437
|
+
data.subtitles.push("irregular");
|
|
1438
|
+
}
|
|
1439
|
+
} else {
|
|
1440
|
+
apparent_decl = seg.headword_decl;
|
|
1441
|
+
}
|
|
1442
|
+
const english = this.declension_to_english.get(apparent_decl);
|
|
1443
|
+
if (english) {
|
|
1444
|
+
data.title = `${english}-declension`;
|
|
1445
|
+
} else if (apparent_decl == "irreg") {
|
|
1446
|
+
data.title = "irregular";
|
|
1447
|
+
} else if (apparent_decl == "indecl" || apparent_decl == "0") {
|
|
1448
|
+
data.title = "indeclinable";
|
|
1449
|
+
} else {
|
|
1450
|
+
throw Error(`Internal error! Don't recognize noun declension ${apparent_decl}`);
|
|
1451
|
+
}
|
|
1452
|
+
data.title = data.title + " noun";
|
|
1453
|
+
}
|
|
1454
|
+
if (data.types.has("sufn")) {
|
|
1455
|
+
data.subtitles.push(["with", " 'm' optionally → 'n' in compounds"]);
|
|
1456
|
+
} else if (data.types.has("not_sufn")) {
|
|
1457
|
+
data.subtitles.push(["without", " 'm' optionally → 'n' in compounds"]);
|
|
1458
|
+
}
|
|
1459
|
+
declensions.orig_titles.push(data.title);
|
|
1460
|
+
if (data.subtitles.length > 0) {
|
|
1461
|
+
const subtitles: string[] = [];
|
|
1462
|
+
for (const subtitle of data.subtitles) {
|
|
1463
|
+
if (typeof(subtitle) == "string") {
|
|
1464
|
+
subtitles.push(subtitle);
|
|
1465
|
+
} else {
|
|
1466
|
+
subtitles.push(subtitle.join(""));
|
|
1467
|
+
}
|
|
1468
|
+
}
|
|
1469
|
+
data.title = data.title + ` (${subtitles.join(", ")})`;
|
|
1470
|
+
}
|
|
1471
|
+
|
|
1472
|
+
for (const subtitle of data.subtitles) {
|
|
1473
|
+
declensions.subtitleses.push(subtitle);
|
|
1474
|
+
}
|
|
1475
|
+
}
|
|
1476
|
+
|
|
1477
|
+
for (const slot of potential_lemma_slots) {
|
|
1478
|
+
const forms = getNominalForm(data.forms, slot);
|
|
1479
|
+
if (forms) {
|
|
1480
|
+
const linked_forms = [];
|
|
1481
|
+
for (const form of forms) {
|
|
1482
|
+
if (form == seg.lemma) {
|
|
1483
|
+
linked_forms.push(seg.orig_lemma);
|
|
1484
|
+
} else {
|
|
1485
|
+
linked_forms.push(form);
|
|
1486
|
+
}
|
|
1487
|
+
}
|
|
1488
|
+
setNominalForm(data.forms, `linked_${slot}`, linked_forms);
|
|
1489
|
+
}
|
|
1490
|
+
}
|
|
1491
|
+
|
|
1492
|
+
if (seg.types.has("lig")) {
|
|
1493
|
+
this.apply_ligatures(data.forms, is_adj);
|
|
1494
|
+
}
|
|
1495
|
+
|
|
1496
|
+
if (seg.types.has("sufn")) {
|
|
1497
|
+
this.apply_sufn(data.forms, is_adj);
|
|
1498
|
+
}
|
|
1499
|
+
|
|
1500
|
+
this.propagate_number_restrictions(data.forms, seg.num, is_adj);
|
|
1501
|
+
|
|
1502
|
+
for (const slot of this.iter_slots(is_adj)) {
|
|
1503
|
+
let new_forms: string[] | undefined;
|
|
1504
|
+
if (is_adj) {
|
|
1505
|
+
if (!seg.is_adj) {
|
|
1506
|
+
throw Error(`Can't decline noun '${seg.lemma}' when overall term is an adjective`);
|
|
1507
|
+
}
|
|
1508
|
+
new_forms = getNominalForm(data.forms, slot);
|
|
1509
|
+
if (!new_forms && slot.match(/_[fn]$/)) {
|
|
1510
|
+
new_forms = getNominalForm(data.forms, slot.replace(/_[fn]$/, "_m"));
|
|
1511
|
+
}
|
|
1512
|
+
} else if (seg.is_adj) {
|
|
1513
|
+
if (!seg.gender) {
|
|
1514
|
+
throw Error(`Declining modifying adjective ${seg.lemma} but don't know gender of associated noun`);
|
|
1515
|
+
}
|
|
1516
|
+
new_forms = getNominalForm(data.forms, slot + "_" + seg.gender.toLowerCase()) ||
|
|
1517
|
+
getNominalForm(data.forms, slot + "_m");
|
|
1518
|
+
} else {
|
|
1519
|
+
new_forms = getNominalForm(data.forms, slot);
|
|
1520
|
+
}
|
|
1521
|
+
|
|
1522
|
+
const new_notes: string[][] = [];
|
|
1523
|
+
|
|
1524
|
+
if (new_forms) {
|
|
1525
|
+
for (let j = 0; j < new_forms.length; j++) {
|
|
1526
|
+
const noteses = data.notes.get(`${slot}${j + 1}`);
|
|
1527
|
+
if (noteses) {
|
|
1528
|
+
new_notes[j] = [noteses];
|
|
1529
|
+
}
|
|
1530
|
+
}
|
|
1531
|
+
}
|
|
1532
|
+
|
|
1533
|
+
const oldForms = getNominalForm(declensions.forms, slot);
|
|
1534
|
+
const [forms, notes] = this.append_form(oldForms, declensions.notes.get(slot), new_forms, new_notes, slot.includes("linked") ? seg.orig_prefix : seg.prefix);
|
|
1535
|
+
setNominalForm(declensions.forms, slot, forms);
|
|
1536
|
+
declensions.notes.set(slot, notes);
|
|
1537
|
+
}
|
|
1538
|
+
|
|
1539
|
+
if (!seg.types.has("nocat") && (is_adj || !seg.is_adj)) {
|
|
1540
|
+
for (const cat of data.categories) {
|
|
1541
|
+
this.insert_if_not(declensions.categories, cat);
|
|
1542
|
+
}
|
|
1543
|
+
}
|
|
1544
|
+
|
|
1545
|
+
if (seg.prefix != "" && seg.prefix != "-" && seg.prefix != " ") {
|
|
1546
|
+
declensions.title.push("indeclinable portion");
|
|
1547
|
+
}
|
|
1548
|
+
|
|
1549
|
+
if (data.title) {
|
|
1550
|
+
declensions.title.push(data.title);
|
|
1551
|
+
}
|
|
1552
|
+
} else if (seg.type == "Alternant") {
|
|
1553
|
+
let seg_declensions: Declensions | undefined;
|
|
1554
|
+
const seg_titles: string[] = [];
|
|
1555
|
+
const seg_subtitleses: (string | string[])[][] = [];
|
|
1556
|
+
const seg_stems_seen: string[] = [];
|
|
1557
|
+
const seg_categories: string[] = [];
|
|
1558
|
+
|
|
1559
|
+
let title_the_hard_way = false;
|
|
1560
|
+
let alternant_decl: string = "";
|
|
1561
|
+
let alternant_decl_title;
|
|
1562
|
+
|
|
1563
|
+
for (const this_parsed_run of seg.alternants) {
|
|
1564
|
+
let num_non_constant_segments = 0;
|
|
1565
|
+
for (const segment of (this_parsed_run.segments)) {
|
|
1566
|
+
if (segment.type == "Segment" && segment.decl) {
|
|
1567
|
+
if (!alternant_decl) {
|
|
1568
|
+
alternant_decl = segment.decl;
|
|
1569
|
+
} else if (alternant_decl != segment.decl) {
|
|
1570
|
+
title_the_hard_way = true;
|
|
1571
|
+
num_non_constant_segments = 500;
|
|
1572
|
+
break;
|
|
1573
|
+
}
|
|
1574
|
+
num_non_constant_segments++;
|
|
1575
|
+
}
|
|
1576
|
+
}
|
|
1577
|
+
if (num_non_constant_segments != 1) {
|
|
1578
|
+
title_the_hard_way = true;
|
|
1579
|
+
}
|
|
1580
|
+
}
|
|
1581
|
+
if (!title_the_hard_way) {
|
|
1582
|
+
const subtypeses: Set<string> = new Set();
|
|
1583
|
+
for (const this_parsed_run of seg.alternants) {
|
|
1584
|
+
for (const segment of this_parsed_run.segments) {
|
|
1585
|
+
if (segment.type == "Segment" && segment.decl) {
|
|
1586
|
+
segment.types.forEach(t => subtypeses.add(t));
|
|
1587
|
+
this.insert_if_not(seg_stems_seen, segment.stem2 || "");
|
|
1588
|
+
}
|
|
1589
|
+
}
|
|
1590
|
+
}
|
|
1591
|
+
for (const this_parsed_run of seg.alternants) {
|
|
1592
|
+
for (const segment of this_parsed_run.segments) {
|
|
1593
|
+
if (segment.type == "Segment" && segment.decl) {
|
|
1594
|
+
const neg_subtypes = this.set_difference(subtypeses, segment.types);
|
|
1595
|
+
for (const neg_subtype of neg_subtypes) {
|
|
1596
|
+
segment.types.add("not_" + neg_subtype);
|
|
1597
|
+
}
|
|
1598
|
+
}
|
|
1599
|
+
}
|
|
1600
|
+
}
|
|
1601
|
+
}
|
|
1602
|
+
|
|
1603
|
+
for (const this_parsed_run of seg.alternants) {
|
|
1604
|
+
this_parsed_run.loc = seg.loc;
|
|
1605
|
+
this_parsed_run.num = this_parsed_run.num || seg.num;
|
|
1606
|
+
this_parsed_run.gender = this_parsed_run.gender || seg.gender;
|
|
1607
|
+
const this_declensions = this.decline_segment_run(this_parsed_run, pos, is_adj);
|
|
1608
|
+
|
|
1609
|
+
if (!this_declensions.voc) {
|
|
1610
|
+
declensions.voc = false;
|
|
1611
|
+
}
|
|
1612
|
+
|
|
1613
|
+
if (this_declensions.noneut) {
|
|
1614
|
+
declensions.noneut = true;
|
|
1615
|
+
}
|
|
1616
|
+
|
|
1617
|
+
if (this_parsed_run.num == "sg" || this_parsed_run.num == "pl") {
|
|
1618
|
+
for (const slot of (this.iter_slots(is_adj))) {
|
|
1619
|
+
if ((this_parsed_run.num == "sg" && slot.includes("pl")) ||
|
|
1620
|
+
(this_parsed_run.num == "pl" && slot.includes("sg"))) {
|
|
1621
|
+
setNominalForm(this_declensions.forms, slot, []);
|
|
1622
|
+
this_declensions.notes.set(slot, []);
|
|
1623
|
+
}
|
|
1624
|
+
}
|
|
1625
|
+
}
|
|
1626
|
+
|
|
1627
|
+
if (!seg_declensions) {
|
|
1628
|
+
seg_declensions = this_declensions;
|
|
1629
|
+
} else {
|
|
1630
|
+
for (const slot of this.iter_slots(is_adj)) {
|
|
1631
|
+
const curforms = getNominalForm(seg_declensions.forms, slot) || [];
|
|
1632
|
+
const newforms = getNominalForm(this_declensions.forms, slot) || [];
|
|
1633
|
+
const newform_index_to_new_index: number[] = [];
|
|
1634
|
+
newforms.forEach((form, newj) => {
|
|
1635
|
+
let did_break = false;
|
|
1636
|
+
for (let j = 0; j < curforms.length; j++) {
|
|
1637
|
+
if (curforms[j] == form) {
|
|
1638
|
+
newform_index_to_new_index[newj] = j;
|
|
1639
|
+
did_break = true;
|
|
1640
|
+
break;
|
|
1641
|
+
}
|
|
1642
|
+
}
|
|
1643
|
+
if (!did_break) {
|
|
1644
|
+
curforms.push(form);
|
|
1645
|
+
newform_index_to_new_index[newj] = curforms.length - 1;
|
|
1646
|
+
}
|
|
1647
|
+
});
|
|
1648
|
+
|
|
1649
|
+
setNominalForm(seg_declensions.forms, slot, curforms);
|
|
1650
|
+
const curnotes = seg_declensions.notes.get(slot) || [];
|
|
1651
|
+
const newnotes = this_declensions.notes.get(slot);
|
|
1652
|
+
if (newnotes) {
|
|
1653
|
+
newnotes.forEach((notes, index) => {
|
|
1654
|
+
const combined_index = newform_index_to_new_index[index];
|
|
1655
|
+
if (!curnotes[combined_index]) {
|
|
1656
|
+
curnotes[combined_index] = notes;
|
|
1657
|
+
} else {
|
|
1658
|
+
const combined = Array.from(curnotes[combined_index]);
|
|
1659
|
+
for (const note of newnotes) {
|
|
1660
|
+
this.insert_if_not(combined, newnotes);
|
|
1661
|
+
}
|
|
1662
|
+
curnotes[combined_index] = combined;
|
|
1663
|
+
}
|
|
1664
|
+
});
|
|
1665
|
+
}
|
|
1666
|
+
}
|
|
1667
|
+
}
|
|
1668
|
+
for (const cat of this_declensions.categories) {
|
|
1669
|
+
this.insert_if_not(seg_categories, cat);
|
|
1670
|
+
}
|
|
1671
|
+
this.insert_if_not(seg_titles, this_declensions.title.join(""));
|
|
1672
|
+
|
|
1673
|
+
seg_subtitleses.push(this_declensions.subtitleses);
|
|
1674
|
+
|
|
1675
|
+
if (!alternant_decl_title) {
|
|
1676
|
+
alternant_decl_title = this_declensions.orig_titles[0];
|
|
1677
|
+
}
|
|
1678
|
+
}
|
|
1679
|
+
|
|
1680
|
+
if (!seg_declensions) {
|
|
1681
|
+
throw Error("No segment declensions");
|
|
1682
|
+
}
|
|
1683
|
+
|
|
1684
|
+
this.propagate_number_restrictions(seg_declensions?.forms, parsed_run.num, is_adj);
|
|
1685
|
+
|
|
1686
|
+
for (const slot of this.iter_slots(is_adj)) {
|
|
1687
|
+
const declForms = getNominalForm(declensions.forms, slot);
|
|
1688
|
+
const segForms = getNominalForm(seg_declensions.forms, slot);
|
|
1689
|
+
const [newForms, notes] = this.append_form(declForms, declensions.notes.get(slot), segForms, seg_declensions.notes.get(slot), undefined);
|
|
1690
|
+
setNominalForm(declensions.forms, slot, newForms);
|
|
1691
|
+
declensions.notes.set(slot, notes);
|
|
1692
|
+
}
|
|
1693
|
+
|
|
1694
|
+
if (is_adj || !seg.is_adj) {
|
|
1695
|
+
for (const cat of seg_categories) {
|
|
1696
|
+
this.insert_if_not(declensions.categories, cat);
|
|
1697
|
+
}
|
|
1698
|
+
}
|
|
1699
|
+
|
|
1700
|
+
let title_to_insert;
|
|
1701
|
+
if (title_the_hard_way) {
|
|
1702
|
+
title_to_insert = this.join_sentences(seg_titles, " or ");
|
|
1703
|
+
} else {
|
|
1704
|
+
const first = seg_subtitleses[0];
|
|
1705
|
+
if (typeof(first) == "string") {
|
|
1706
|
+
throw Error("Expected multi-title");
|
|
1707
|
+
}
|
|
1708
|
+
const first_subtitles: (string | string[])[] = first;
|
|
1709
|
+
let num_common_subtitles = first_subtitles.length;
|
|
1710
|
+
for (let i = 1; i < seg_subtitleses.length; i++) {
|
|
1711
|
+
const this_subtitles = seg_subtitleses[i];
|
|
1712
|
+
for (let j = 0; j < num_common_subtitles; j++) {
|
|
1713
|
+
if (first_subtitles[j] != this_subtitles[j]) {
|
|
1714
|
+
num_common_subtitles = j;
|
|
1715
|
+
break;
|
|
1716
|
+
}
|
|
1717
|
+
}
|
|
1718
|
+
}
|
|
1719
|
+
|
|
1720
|
+
const common_subtitles: string[] = [];
|
|
1721
|
+
for (let i = 0; i < num_common_subtitles; i++) {
|
|
1722
|
+
const entry = first_subtitles[i];
|
|
1723
|
+
if (typeof(entry) != "string") {
|
|
1724
|
+
common_subtitles.push(entry.join(""));
|
|
1725
|
+
} else {
|
|
1726
|
+
common_subtitles.push(entry);
|
|
1727
|
+
}
|
|
1728
|
+
}
|
|
1729
|
+
|
|
1730
|
+
const common_subtitle_portion = common_subtitles.join(", ");
|
|
1731
|
+
let non_common_subtitle_portion: string | undefined;
|
|
1732
|
+
let common_prefix: string | undefined;
|
|
1733
|
+
let common_suffix: string | undefined;
|
|
1734
|
+
|
|
1735
|
+
for (let i = 0; i < seg_subtitleses.length; i++) {
|
|
1736
|
+
const this_subtitles = seg_subtitleses[i];
|
|
1737
|
+
if (typeof(this_subtitles) == "string") {
|
|
1738
|
+
throw Error("Expected subtitles to be array");
|
|
1739
|
+
}
|
|
1740
|
+
if (this_subtitles.length != num_common_subtitles + 1 || typeof(this_subtitles[num_common_subtitles]) == "string" || this_subtitles[num_common_subtitles].length != 2) {
|
|
1741
|
+
break;
|
|
1742
|
+
}
|
|
1743
|
+
if (i == 0) {
|
|
1744
|
+
common_prefix = this_subtitles[num_common_subtitles][0];
|
|
1745
|
+
common_suffix = this_subtitles[num_common_subtitles][1];
|
|
1746
|
+
} else {
|
|
1747
|
+
const this_prefix = this_subtitles[num_common_subtitles][0];
|
|
1748
|
+
const this_suffix = this_subtitles[num_common_subtitles][1];
|
|
1749
|
+
if (this_prefix != common_prefix) {
|
|
1750
|
+
common_prefix = undefined;
|
|
1751
|
+
}
|
|
1752
|
+
if (this_suffix != common_suffix) {
|
|
1753
|
+
common_suffix = undefined;
|
|
1754
|
+
}
|
|
1755
|
+
if (!common_prefix && !common_suffix) {
|
|
1756
|
+
break;
|
|
1757
|
+
}
|
|
1758
|
+
}
|
|
1759
|
+
}
|
|
1760
|
+
if (common_prefix || common_suffix) {
|
|
1761
|
+
if (common_prefix && common_suffix) {
|
|
1762
|
+
throw Error("Something is wrong, first non-common subtitle is actually common to all segments");
|
|
1763
|
+
}
|
|
1764
|
+
if (common_prefix) {
|
|
1765
|
+
const non_common_parts = [];
|
|
1766
|
+
for (const subtitles of seg_subtitleses) {
|
|
1767
|
+
non_common_parts.push(subtitles[num_common_subtitles][1]);
|
|
1768
|
+
}
|
|
1769
|
+
non_common_subtitle_portion = common_prefix + non_common_parts.join(" or ");
|
|
1770
|
+
} else {
|
|
1771
|
+
const non_common_parts = [];
|
|
1772
|
+
for (const subtitles of seg_subtitleses) {
|
|
1773
|
+
non_common_parts.push(subtitles[num_common_subtitles][0]);
|
|
1774
|
+
}
|
|
1775
|
+
non_common_subtitle_portion = non_common_parts.join( " or ") + common_suffix;
|
|
1776
|
+
}
|
|
1777
|
+
} else {
|
|
1778
|
+
let saw_non_common_subtitles = false;
|
|
1779
|
+
const non_common_subtitles = [];
|
|
1780
|
+
for (const this_subtitles of seg_subtitleses) {
|
|
1781
|
+
const this_non_common_subtitles = [];
|
|
1782
|
+
for (let j = num_common_subtitles; j < this_subtitles.length; j++) {
|
|
1783
|
+
this_non_common_subtitles.push(this_subtitles[j]);
|
|
1784
|
+
}
|
|
1785
|
+
if (this_non_common_subtitles.length > 0) {
|
|
1786
|
+
non_common_subtitles.push(this_non_common_subtitles.join(", "));
|
|
1787
|
+
saw_non_common_subtitles = true;
|
|
1788
|
+
} else {
|
|
1789
|
+
non_common_subtitles.push("otherwise");
|
|
1790
|
+
}
|
|
1791
|
+
}
|
|
1792
|
+
non_common_subtitle_portion = saw_non_common_subtitles ? non_common_subtitles.join(" or ") : "";
|
|
1793
|
+
}
|
|
1794
|
+
|
|
1795
|
+
const subtitle_portions = [];
|
|
1796
|
+
if (common_subtitle_portion) {
|
|
1797
|
+
subtitle_portions.push(common_subtitle_portion);
|
|
1798
|
+
}
|
|
1799
|
+
if (non_common_subtitle_portion) {
|
|
1800
|
+
subtitle_portions.push(non_common_subtitle_portion);
|
|
1801
|
+
}
|
|
1802
|
+
if (seg_stems_seen.length > 1) {
|
|
1803
|
+
const number_to_english = [
|
|
1804
|
+
"zero", "one", "two", "three", "four", "five"
|
|
1805
|
+
];
|
|
1806
|
+
|
|
1807
|
+
subtitle_portions.push((number_to_english[seg_stems_seen.length] || `${seg_stems_seen.length}`) + " different stems");
|
|
1808
|
+
}
|
|
1809
|
+
const subtitle_portion = subtitle_portions.join("; ");
|
|
1810
|
+
if (subtitle_portion) {
|
|
1811
|
+
title_to_insert = alternant_decl_title + " (" + subtitle_portion + ")";
|
|
1812
|
+
} else {
|
|
1813
|
+
title_to_insert = alternant_decl_title;
|
|
1814
|
+
}
|
|
1815
|
+
}
|
|
1816
|
+
if (title_to_insert) {
|
|
1817
|
+
declensions.title.push(title_to_insert);
|
|
1818
|
+
}
|
|
1819
|
+
} else {
|
|
1820
|
+
for (const slot of this.iter_slots(is_adj)) {
|
|
1821
|
+
const prefix = slot.includes("linked") ? seg.orig_prefix : seg.prefix;
|
|
1822
|
+
const [forms, notes] = this.append_form(getNominalForm(declensions.forms, slot), declensions.notes.get(slot), [prefix || ""], undefined, undefined);
|
|
1823
|
+
setNominalForm(declensions.forms, slot, forms);
|
|
1824
|
+
declensions.notes.set(slot, notes);
|
|
1825
|
+
}
|
|
1826
|
+
declensions.title.push("indeclinable portion");
|
|
1827
|
+
}
|
|
1828
|
+
}
|
|
1829
|
+
|
|
1830
|
+
const titles: string[] = [];
|
|
1831
|
+
declensions.title.forEach((title, i) => {
|
|
1832
|
+
if (i == 0) {
|
|
1833
|
+
titles.push(title[0].toUpperCase() + title.substr(1));
|
|
1834
|
+
} else {
|
|
1835
|
+
titles.push(this.add_indefinite_article(title));
|
|
1836
|
+
}
|
|
1837
|
+
});
|
|
1838
|
+
declensions.title = [titles.join(" with ")];
|
|
1839
|
+
|
|
1840
|
+
return declensions;
|
|
1841
|
+
}
|
|
1842
|
+
|
|
1843
|
+
private add_indefinite_article(text: string): string {
|
|
1844
|
+
if (text.match(/^[aeiou]/i)) {
|
|
1845
|
+
return `an ${text}`;
|
|
1846
|
+
} else {
|
|
1847
|
+
return `a ${text}`;
|
|
1848
|
+
}
|
|
1849
|
+
}
|
|
1850
|
+
|
|
1851
|
+
private join_sentences(sentences: string[], joiner: string): string {
|
|
1852
|
+
const sentences_to_join: string[] = [];
|
|
1853
|
+
sentences.forEach((sentence, i) => {
|
|
1854
|
+
if (i < sentences.length - 1) {
|
|
1855
|
+
sentence = sentence.replace(/\.$/, "");
|
|
1856
|
+
}
|
|
1857
|
+
if (i > 0) {
|
|
1858
|
+
sentence = sentence[0].toLowerCase() + sentence.substr(1);
|
|
1859
|
+
}
|
|
1860
|
+
sentences_to_join.push(sentence);
|
|
1861
|
+
});
|
|
1862
|
+
return sentences_to_join.join(joiner);
|
|
1863
|
+
}
|
|
1864
|
+
|
|
1865
|
+
private set_difference(a: Set<string>, b: Set<string>): Set<string> {
|
|
1866
|
+
const res = new Set<string>();
|
|
1867
|
+
for (const key of a.keys()) {
|
|
1868
|
+
if (!b.has(key)) {
|
|
1869
|
+
res.add(key);
|
|
1870
|
+
}
|
|
1871
|
+
}
|
|
1872
|
+
return res;
|
|
1873
|
+
}
|
|
1874
|
+
|
|
1875
|
+
private append_form(
|
|
1876
|
+
forms: string[] | undefined,
|
|
1877
|
+
notes: string[][] | undefined,
|
|
1878
|
+
new_forms: string[] | undefined,
|
|
1879
|
+
new_notes: string[][] | undefined,
|
|
1880
|
+
prefix: string | undefined):
|
|
1881
|
+
[string[], string[][]]
|
|
1882
|
+
{
|
|
1883
|
+
forms = forms || [];
|
|
1884
|
+
new_forms = new_forms || [];
|
|
1885
|
+
notes = notes || [];
|
|
1886
|
+
new_notes = new_notes || [];
|
|
1887
|
+
prefix = prefix || "";
|
|
1888
|
+
|
|
1889
|
+
if (new_forms.length == 1) {
|
|
1890
|
+
for (let i = 0; i < forms.length; i++) {
|
|
1891
|
+
forms[i] = forms[i] + prefix + new_forms[0];
|
|
1892
|
+
if (new_notes[0]) {
|
|
1893
|
+
if (!notes[i]) {
|
|
1894
|
+
notes[i] = new_notes[0];
|
|
1895
|
+
} else {
|
|
1896
|
+
const combined_notes = Array.from(notes[i]);
|
|
1897
|
+
for (const note of new_notes[0]) {
|
|
1898
|
+
combined_notes.push(note);
|
|
1899
|
+
}
|
|
1900
|
+
notes[i] = combined_notes;
|
|
1901
|
+
}
|
|
1902
|
+
}
|
|
1903
|
+
}
|
|
1904
|
+
return [forms, notes];
|
|
1905
|
+
} else {
|
|
1906
|
+
const ret_forms = [];
|
|
1907
|
+
const ret_notes: string[][] = [];
|
|
1908
|
+
|
|
1909
|
+
for (let i = 0; i < forms.length; i++) {
|
|
1910
|
+
for (let j = 0; j < new_forms.length; j++) {
|
|
1911
|
+
ret_forms.push(forms[i] + prefix + new_forms[j]);
|
|
1912
|
+
if (new_notes[j]) {
|
|
1913
|
+
if (!notes[i]) {
|
|
1914
|
+
ret_notes[i * new_forms.length + j] = new_notes[j];
|
|
1915
|
+
} else {
|
|
1916
|
+
const combined_notes = Array.from(notes[i]);
|
|
1917
|
+
for (const note of new_notes[j]) {
|
|
1918
|
+
combined_notes.push(note);
|
|
1919
|
+
}
|
|
1920
|
+
ret_notes[i * new_forms.length + j] = combined_notes;
|
|
1921
|
+
}
|
|
1922
|
+
}
|
|
1923
|
+
}
|
|
1924
|
+
}
|
|
1925
|
+
return [ret_forms, ret_notes];
|
|
1926
|
+
}
|
|
1927
|
+
}
|
|
1928
|
+
|
|
1929
|
+
private apply_ligatures(forms: FormMap<NominalForm>, is_adj: boolean) {
|
|
1930
|
+
for (const slot of this.iter_slots(is_adj)) {
|
|
1931
|
+
const ffs = getNominalForm(forms, slot) || [];
|
|
1932
|
+
for (let i = 0; i < ffs.length; i++) {
|
|
1933
|
+
ffs[i] = ffs[i].replace(/Ae/g, "Æ");
|
|
1934
|
+
ffs[i] = ffs[i].replace(/Oe/g, "Œ");
|
|
1935
|
+
ffs[i] = ffs[i].replace(/ae/g, "æ");
|
|
1936
|
+
ffs[i] = ffs[i].replace(/oe/g, "œ");
|
|
1937
|
+
}
|
|
1938
|
+
setNominalForm(forms, slot, ffs);
|
|
1939
|
+
}
|
|
1940
|
+
}
|
|
1941
|
+
|
|
1942
|
+
private apply_sufn(forms: FormMap<NominalForm>, is_adj: boolean) {
|
|
1943
|
+
for (const slot of this.iter_slots(is_adj)) {
|
|
1944
|
+
const ffs = getNominalForm(forms, slot) || [];
|
|
1945
|
+
if (ffs.length == 1 && !slot.includes("linked")) {
|
|
1946
|
+
const form = ffs[0];
|
|
1947
|
+
if (form.match(/m$/)) {
|
|
1948
|
+
setNominalForm(forms, slot, [form.replace(/m$/, "n"), ...ffs]);
|
|
1949
|
+
}
|
|
1950
|
+
} else {
|
|
1951
|
+
let final_m = false;
|
|
1952
|
+
for (const form of getNominalForm(forms, slot) || []) {
|
|
1953
|
+
if (form.match(/m$/)) {
|
|
1954
|
+
final_m = true;
|
|
1955
|
+
}
|
|
1956
|
+
}
|
|
1957
|
+
if (final_m) {
|
|
1958
|
+
const newval = [];
|
|
1959
|
+
for (const form of getNominalForm(forms, slot) || []) {
|
|
1960
|
+
if (form.match(/m$/)) {
|
|
1961
|
+
const val = form.replace(/m$/, "n");
|
|
1962
|
+
newval.push(val);
|
|
1963
|
+
}
|
|
1964
|
+
newval.push(form);
|
|
1965
|
+
}
|
|
1966
|
+
}
|
|
1967
|
+
}
|
|
1968
|
+
}
|
|
1969
|
+
}
|
|
1970
|
+
|
|
1971
|
+
private propagate_number_restrictions(forms: FormMap<NominalForm>, num: string | undefined, is_adj: boolean) {
|
|
1972
|
+
if (num == "sg" || num == "pl") {
|
|
1973
|
+
for (const slot of this.iter_slots(is_adj)) {
|
|
1974
|
+
if (slot.match(num)) {
|
|
1975
|
+
const other_num_slot = (num == "sg") ? slot.replace("sg", "pl") : slot.replace("pl", "sg");
|
|
1976
|
+
setNominalForm(forms, other_num_slot, getNominalForm(forms, slot) || []);
|
|
1977
|
+
}
|
|
1978
|
+
}
|
|
1979
|
+
}
|
|
1980
|
+
}
|
|
1981
|
+
|
|
1982
|
+
private insert_if_not(data: any[], entry: any, pos = 0) {
|
|
1983
|
+
if (data.includes(entry)) {
|
|
1984
|
+
return;
|
|
1985
|
+
}
|
|
1986
|
+
if (pos == 0) {
|
|
1987
|
+
data.push(entry);
|
|
1988
|
+
} else {
|
|
1989
|
+
data.splice(pos - 1, 0, entry);
|
|
1990
|
+
}
|
|
1991
|
+
}
|
|
1992
|
+
}
|