henkan 2.2.5 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/LICENSE.md +1 -1
  2. package/dist/index.cjs.js +596 -604
  3. package/dist/index.cjs.js.map +3 -3
  4. package/dist/index.mjs +585 -587
  5. package/dist/index.mjs.map +3 -3
  6. package/dist/types/constants.d.ts +1 -0
  7. package/dist/types/constants.d.ts.map +1 -1
  8. package/dist/types/types.d.ts +13 -9
  9. package/dist/types/types.d.ts.map +1 -1
  10. package/dist/types/utils.d.ts +34 -22
  11. package/dist/types/utils.d.ts.map +1 -1
  12. package/docs/api/README.md +2 -0
  13. package/docs/api/functions/convertJMdict.md +1 -1
  14. package/docs/api/functions/convertJawiktionaryAsync.md +1 -1
  15. package/docs/api/functions/convertJawiktionarySync.md +1 -1
  16. package/docs/api/functions/convertKanjiDic.md +1 -1
  17. package/docs/api/functions/convertKradFile.md +1 -1
  18. package/docs/api/functions/convertRadkFile.md +1 -1
  19. package/docs/api/functions/convertTanakaCorpus.md +1 -1
  20. package/docs/api/functions/convertTanakaCorpusWithFurigana.md +1 -1
  21. package/docs/api/functions/createEntryMaps.md +1 -1
  22. package/docs/api/functions/generateAnkiNote.md +1 -1
  23. package/docs/api/functions/generateAnkiNotesFile.md +1 -1
  24. package/docs/api/functions/getKanji.md +4 -4
  25. package/docs/api/functions/getKanjiExtended.md +1 -1
  26. package/docs/api/functions/getValidForms.md +1 -1
  27. package/docs/api/functions/getWord.md +4 -4
  28. package/docs/api/functions/getWordDefinitions.md +4 -4
  29. package/docs/api/functions/getWordDefinitionsWithFurigana.md +1 -1
  30. package/docs/api/functions/hiraganaToKatakana.md +27 -0
  31. package/docs/api/functions/katakanaToHiragana.md +27 -0
  32. package/docs/api/interfaces/DefaultNoteInfo.md +4 -4
  33. package/docs/api/interfaces/Definition.md +6 -2
  34. package/docs/api/interfaces/DictKanjiMisc.md +1 -1
  35. package/docs/api/interfaces/DictWord.md +3 -3
  36. package/docs/api/interfaces/Grammar.md +16 -16
  37. package/docs/api/interfaces/GrammarMeaning.md +3 -3
  38. package/docs/api/interfaces/JaWiktionaryEntry.md +2 -2
  39. package/docs/api/interfaces/Kana.md +11 -11
  40. package/docs/api/interfaces/Kanji.md +24 -24
  41. package/docs/api/interfaces/KanjiComponent.md +3 -3
  42. package/docs/api/interfaces/KanjiForm.md +4 -4
  43. package/docs/api/interfaces/NoteAndTag.md +3 -3
  44. package/docs/api/interfaces/NoteHeaderKeys.md +7 -7
  45. package/docs/api/interfaces/Phrase.md +5 -5
  46. package/docs/api/interfaces/Radical.md +16 -16
  47. package/docs/api/interfaces/Reading.md +5 -5
  48. package/docs/api/interfaces/ResultEntry.md +10 -10
  49. package/docs/api/interfaces/Translation.md +3 -3
  50. package/docs/api/interfaces/UsefulRegExps.md +8 -8
  51. package/docs/api/interfaces/Word.md +15 -15
  52. package/docs/api/interfaces/WordDefinitionPair.md +4 -4
  53. package/docs/api/type-aliases/Dict.md +1 -1
  54. package/docs/api/type-aliases/EntryType.md +1 -1
  55. package/docs/api/type-aliases/JLPT.md +1 -1
  56. package/docs/api/type-aliases/Result.md +1 -1
  57. package/package.json +11 -6
  58. package/src/constants.ts +1225 -0
  59. package/src/index.ts +3 -0
  60. package/src/types.ts +1056 -0
  61. package/src/utils.ts +3018 -0
  62. package/tsconfig.json +60 -0
  63. package/tsconfig.types.json +9 -0
package/src/utils.ts ADDED
@@ -0,0 +1,3018 @@
1
+ import { ReadStream } from "fs";
2
+ import { createInterface, Interface } from "readline";
3
+ import libxml from "libxmljs2";
4
+ import xml from "xml2js";
5
+ import iconv from "iconv-lite";
6
+ import {
7
+ noteHeaderKeys,
8
+ noteMap,
9
+ notSearchedForms,
10
+ posList,
11
+ regexps,
12
+ } from "./constants";
13
+ import {
14
+ DefaultNoteInfo,
15
+ Definition,
16
+ DictKanji,
17
+ DictKanjiForm,
18
+ DictKanjiReading,
19
+ DictKanjiReadingMeaning,
20
+ DictKanjiReadingMeaningGroup,
21
+ DictKanjiWithRadicals,
22
+ DictMeaning,
23
+ DictRadical,
24
+ DictReading,
25
+ DictTranslation,
26
+ DictWord,
27
+ EntryMaps,
28
+ ExamplePart,
29
+ Grammar,
30
+ JaWiktionaryEntry,
31
+ Kana,
32
+ Kanji,
33
+ KanjiComponent,
34
+ KanjiEntryMap,
35
+ KanjiForm,
36
+ KanjiSVGMap,
37
+ KanjiWordsMap,
38
+ NoteAndTag,
39
+ Phrase,
40
+ POS,
41
+ Radical,
42
+ Reading,
43
+ ReadingsKanjiFormsPair,
44
+ Result,
45
+ StringNumber,
46
+ TanakaExample,
47
+ TanakaID,
48
+ Translation,
49
+ Word,
50
+ WordDefinitionPair,
51
+ WordDefinitionsMap,
52
+ WordExamplesMap,
53
+ WordIDEntryMap,
54
+ } from "./types";
55
+
56
+ const Kuroshiro: any = require("kuroshiro");
57
+ const KuromojiAnalyzer: any = require("kuroshiro-analyzer-kuromoji");
58
+
59
+ /**
60
+ * Capitalizes a string.
61
+ * @param value The string to capitalize
62
+ * @returns The capitalized string
63
+ */
64
+ export function capitalizeString(value: string): string {
65
+ return value.charAt(0).toUpperCase() + value.slice(1);
66
+ }
67
+
68
+ /**
69
+ * Checks if the argument is an array and has at least one element.
70
+ * @param arg The argument
71
+ * @returns Whether or not {@link arg} is an array and has at least one element
72
+ */
73
+ export function isValidArrayWithFirstElement(arg: unknown): arg is any[] {
74
+ return Array.isArray(arg) && arg.length > 0;
75
+ }
76
+
77
+ /**
78
+ * Checks if the argument is an array of strings.
79
+ * @param arg The argument
80
+ * @returns Whether or not {@link arg} is an array of strings
81
+ */
82
+ export function isStringArray(arg: unknown): arg is string[] {
83
+ return (
84
+ isValidArrayWithFirstElement(arg) &&
85
+ arg.every((element: any) => typeof element === "string")
86
+ );
87
+ }
88
+
89
+ /**
90
+ * Checks if the argument is an array of object.
91
+ * @param arg The argument
92
+ * @returns Whether or not {@link arg} is an array of objects
93
+ */
94
+ export function isObjectArray(arg: unknown): arg is any[] {
95
+ return (
96
+ isValidArrayWithFirstElement(arg) &&
97
+ arg.every((element: unknown) => typeof element === "object")
98
+ );
99
+ }
100
+
101
+ /**
102
+ * Shuffles an array using the `Fisher–Yates shuffle` algorithm
103
+ * @param arr The array to be shuffled
104
+ * @returns The shuffled array
105
+ */
106
+ export function shuffleArray<T>(arr: readonly T[]): T[] {
107
+ const a: T[] = Array.from(arr);
108
+
109
+ for (let i: number = a.length - 1; i > 0; i--) {
110
+ const j: number = Math.floor(Math.random() * (i + 1));
111
+ const tmp: T = a[i]!;
112
+ a[i] = a[j]!;
113
+ a[j] = tmp!;
114
+ }
115
+
116
+ return a;
117
+ }
118
+
119
+ /**
120
+ * Convert hiragana text to katakana text
121
+ * @param input The hiragana text
122
+ * @returns The katakana text
123
+ */
124
+ export function hiraganaToKatakana(input: string): string {
125
+ const decomposed: string = input.normalize("NFD");
126
+ const output: string[] = [];
127
+
128
+ for (const ch of decomposed.split("")) {
129
+ const cp: number = ch.codePointAt(0)!;
130
+
131
+ if (cp === 0x3099 || cp === 0x309a) {
132
+ output.push(ch);
133
+ continue;
134
+ }
135
+
136
+ if (cp >= 0x3040 && cp <= 0x309f)
137
+ output.push(String.fromCodePoint(cp + 0x60));
138
+ else output.push(ch);
139
+ }
140
+
141
+ return output.join("").normalize("NFC");
142
+ }
143
+
144
+ /**
145
+ * Convert katakana text to hiragana text
146
+ * @param input The katakana text
147
+ * @returns The hiragana text
148
+ */
149
+ export function katakanaToHiragana(input: string): string {
150
+ const decomposed: string = input.normalize("NFD");
151
+ const output: string[] = [];
152
+
153
+ for (const ch of decomposed.split("")) {
154
+ const cp: number = ch.codePointAt(0)!;
155
+
156
+ if (cp === 0x3099 || cp === 0x309a) {
157
+ output.push(ch);
158
+ continue;
159
+ }
160
+
161
+ if (cp >= 0x30a0 && cp <= 0x30ff)
162
+ output.push(String.fromCodePoint(cp - 0x60));
163
+ else output.push(ch);
164
+ }
165
+
166
+ return output.join("").normalize("NFC");
167
+ }
168
+
169
+ /**
170
+ * Filters out all the old/rare or (if {@link wordIsCommon} is `true`) uncommon readings and kanji forms of a JMdict entry.
171
+ * @param readings The word's readings
172
+ * @param kanjiForms The word's kanji forms
173
+ * @param wordIsCommon Whether or not the word is common
174
+ * @returns An object containing the valid readings and kanji forms of the word
175
+ */
176
+ export function getValidForms(
177
+ readings: readonly DictReading[],
178
+ kanjiForms?: readonly DictKanjiForm[],
179
+ wordIsCommon?: true,
180
+ ): ReadingsKanjiFormsPair {
181
+ const kanjiFormRestrictions: Set<string> = new Set<string>();
182
+
183
+ const validReadings: DictReading[] = readings.filter(
184
+ (reading: DictReading, index: number) => {
185
+ if (index === 0) return true;
186
+
187
+ if (
188
+ reading.notes === undefined ||
189
+ !reading.notes.some((note: string) => notSearchedForms.has(note))
190
+ ) {
191
+ if (reading.kanjiFormRestrictions !== undefined) {
192
+ for (const kfr of reading.kanjiFormRestrictions)
193
+ kanjiFormRestrictions.add(kfr);
194
+
195
+ return true;
196
+ }
197
+
198
+ if (wordIsCommon === undefined || reading.commonness !== undefined)
199
+ return true;
200
+ }
201
+
202
+ return false;
203
+ },
204
+ );
205
+
206
+ const existValidKf: boolean | undefined = kanjiForms?.some(
207
+ (kf: DictKanjiForm, index: number) =>
208
+ index !== 0 &&
209
+ (kf.notes === undefined ||
210
+ (!kf.notes.some((note: string) => notSearchedForms.has(note)) &&
211
+ (wordIsCommon === undefined || kf.commonness !== undefined)) ||
212
+ kanjiFormRestrictions.has(kf.form)),
213
+ );
214
+
215
+ const validKanjiForms: DictKanjiForm[] | undefined = kanjiForms?.filter(
216
+ (kanjiForm: DictKanjiForm, index: number) => {
217
+ if (index === 0) return true;
218
+
219
+ if (existValidKf === true)
220
+ return (
221
+ kanjiForm.notes === undefined ||
222
+ (!kanjiForm.notes.some((note: string) =>
223
+ notSearchedForms.has(note),
224
+ ) &&
225
+ (wordIsCommon === undefined ||
226
+ kanjiForm.commonness !== undefined)) ||
227
+ kanjiFormRestrictions.has(kanjiForm.form)
228
+ );
229
+ else return true;
230
+ },
231
+ );
232
+
233
+ return {
234
+ readings: validReadings,
235
+ ...(validKanjiForms !== undefined ? { kanjiForms: validKanjiForms } : {}),
236
+ };
237
+ }
238
+
239
+ /**
240
+ * Converts a JMdict `JMdict_e` file into an array of {@link DictWord} objects.
241
+ * @param xmlString The raw `JMdict_e` file contents
242
+ * @param examples An array of converted `Tanaka Corpus` examples
243
+ * @returns An array of converted {@link DictWord} objects
244
+ */
245
+ export function convertJMdict(
246
+ xmlString: string,
247
+ examples?: readonly TanakaExample[],
248
+ ): DictWord[] {
249
+ const dictParsed: libxml.Document = libxml.parseXml(xmlString, {
250
+ dtdvalid: true,
251
+ nonet: false,
252
+ noent: true,
253
+ recover: false,
254
+ });
255
+ const dict: DictWord[] = [];
256
+
257
+ xml.parseString(dictParsed, (_err: Error | null, result: any) => {
258
+ const tanakaParts: Set<string> | undefined =
259
+ examples !== undefined && examples.length > 0
260
+ ? new Set<string>(
261
+ examples.flatMap((example: TanakaExample) =>
262
+ example.parts.flatMap((part: ExamplePart) => [
263
+ part.baseForm,
264
+ ...(part.reading !== undefined ? [part.reading] : []),
265
+ ...(part.inflectedForm !== undefined
266
+ ? [part.inflectedForm]
267
+ : []),
268
+ ...(part.referenceID !== undefined ? [part.referenceID] : []),
269
+ ]),
270
+ ),
271
+ )
272
+ : undefined;
273
+
274
+ for (const entry of result.JMdict.entry) {
275
+ const entryObj: DictWord = {
276
+ id: entry.ent_seq[0],
277
+ readings: [],
278
+ meanings: [],
279
+ };
280
+
281
+ const kanjiForms: any = entry.k_ele;
282
+ const readings: any = entry.r_ele;
283
+ const meanings: any = entry.sense;
284
+
285
+ if (isObjectArray(kanjiForms)) {
286
+ entryObj.kanjiForms = [];
287
+
288
+ for (const kanjiForm of kanjiForms) {
289
+ const form: DictKanjiForm = {
290
+ form: kanjiForm.keb[0],
291
+ };
292
+
293
+ if (isStringArray(kanjiForm.ke_inf)) form.notes = kanjiForm.ke_inf;
294
+ if (isStringArray(kanjiForm.ke_pri)) {
295
+ form.commonness = kanjiForm.ke_pri;
296
+
297
+ if (entryObj.isCommon === undefined) entryObj.isCommon = true;
298
+ }
299
+
300
+ entryObj.kanjiForms.push(form);
301
+ }
302
+ }
303
+
304
+ for (const reading of readings) {
305
+ const readingObj: DictReading = {
306
+ reading: reading.reb[0],
307
+ };
308
+
309
+ if (isStringArray(reading.re_inf)) readingObj.notes = reading.re_inf;
310
+ if (isStringArray(reading.re_restr))
311
+ readingObj.kanjiFormRestrictions = reading.re_restr;
312
+
313
+ if (isStringArray(reading.re_pri)) {
314
+ readingObj.commonness = reading.re_pri;
315
+
316
+ if (entryObj.isCommon === undefined) entryObj.isCommon = true;
317
+ }
318
+
319
+ entryObj.readings.push(readingObj);
320
+ }
321
+
322
+ let usuallyInKanaMeanings: number = 0;
323
+
324
+ for (const meaning of meanings) {
325
+ const meaningObj: DictMeaning = { partOfSpeech: [], translations: [] };
326
+
327
+ meaningObj.partOfSpeech = meaning.pos;
328
+
329
+ meaningObj.translations = [];
330
+
331
+ for (const gloss of meaning.gloss)
332
+ if (typeof gloss === "string") meaningObj.translations.push(gloss);
333
+ else if (
334
+ typeof gloss === "object" &&
335
+ typeof gloss._ === "string" &&
336
+ typeof gloss.$ === "object" &&
337
+ (gloss.$.g_type === "lit" ||
338
+ gloss.$.g_type === "expl" ||
339
+ gloss.$.g_type === "tm")
340
+ )
341
+ meaningObj.translations.push({
342
+ translation: gloss._,
343
+ type: gloss.$.g_type,
344
+ });
345
+
346
+ if (isStringArray(meaning.xref)) meaningObj.references = meaning.xref;
347
+ if (isStringArray(meaning.stagk))
348
+ meaningObj.kanjiFormRestrictions = meaning.stagk;
349
+
350
+ if (isStringArray(meaning.stagr))
351
+ meaningObj.readingRestrictions = meaning.stagr;
352
+
353
+ if (isStringArray(meaning.ant)) meaningObj.antonyms = meaning.ant;
354
+ if (isStringArray(meaning.field)) meaningObj.fields = meaning.field;
355
+ if (isStringArray(meaning.s_inf)) meaningObj.info = meaning.s_inf;
356
+ if (isStringArray(meaning.misc)) {
357
+ meaningObj.misc = meaning.misc;
358
+
359
+ if (
360
+ meaningObj.misc !== undefined &&
361
+ meaningObj.misc.includes("word usually written using kana alone")
362
+ )
363
+ usuallyInKanaMeanings++;
364
+ }
365
+ if (isStringArray(meaning.dial)) meaningObj.dialects = meaning.dial;
366
+
367
+ entryObj.meanings.push(meaningObj);
368
+ }
369
+
370
+ if (entryObj.meanings.length === usuallyInKanaMeanings)
371
+ entryObj.usuallyInKana = true;
372
+
373
+ if (examples !== undefined) {
374
+ let existsExample: boolean = false;
375
+
376
+ if (tanakaParts !== undefined && tanakaParts.has(entryObj.id))
377
+ existsExample = true;
378
+
379
+ if (!existsExample) {
380
+ const rkf: ReadingsKanjiFormsPair = getValidForms(
381
+ entryObj.readings,
382
+ entryObj.kanjiForms,
383
+ entryObj.isCommon,
384
+ );
385
+
386
+ const validReadings: Set<string> = new Set<string>(
387
+ rkf.readings.map((r: DictReading) => r.reading),
388
+ );
389
+ const validKanjiForms: Set<string> | undefined =
390
+ rkf.kanjiForms !== undefined
391
+ ? new Set<string>(
392
+ rkf.kanjiForms.map((kf: DictKanjiForm) => kf.form),
393
+ )
394
+ : undefined;
395
+
396
+ if (
397
+ validKanjiForms !== undefined &&
398
+ validKanjiForms.size > 0 &&
399
+ tanakaParts !== undefined
400
+ )
401
+ for (const kf of validKanjiForms)
402
+ if (tanakaParts.has(kf)) {
403
+ existsExample = true;
404
+ break;
405
+ }
406
+
407
+ if (
408
+ entryObj.kanjiForms === undefined &&
409
+ validReadings.size > 0 &&
410
+ tanakaParts !== undefined
411
+ )
412
+ for (const r of validReadings)
413
+ if (tanakaParts.has(r)) {
414
+ existsExample = true;
415
+ break;
416
+ }
417
+ }
418
+
419
+ if (existsExample) entryObj.hasPhrases = true;
420
+ }
421
+
422
+ dict.push(entryObj);
423
+ }
424
+ });
425
+
426
+ return dict;
427
+ }
428
+
429
+ /**
430
+ * Converts a KANJIDIC `kanjidic2.xml` file into an array of {@link DictKanji} objects.
431
+ * @param xmlString The raw `kanjidic2.xml` file contents
432
+ * @returns An array of converted {@link DictKanji} objects
433
+ */
434
+ export function convertKanjiDic(xmlString: string): DictKanji[] {
435
+ const dictParsed: libxml.Document = libxml.parseXml(xmlString, {
436
+ dtdvalid: true,
437
+ nonet: false,
438
+ noent: true,
439
+ recover: false,
440
+ });
441
+ const dict: DictKanji[] = [];
442
+
443
+ xml.parseString(dictParsed, (_err: Error | null, result: any) => {
444
+ for (const entry of result.kanjidic2.character) {
445
+ const kanjiObj: DictKanji = {
446
+ kanji: entry.literal[0],
447
+ readingMeaning: [],
448
+ };
449
+
450
+ if (typeof kanjiObj.kanji === "string" && kanjiObj.kanji.length === 1) {
451
+ const misc: any = entry.misc[0];
452
+
453
+ kanjiObj.misc = { strokeNumber: misc.stroke_count[0] };
454
+
455
+ if (isStringArray(misc.grade)) kanjiObj.misc.grade = misc.grade[0];
456
+ if (isStringArray(misc.freq)) kanjiObj.misc.frequency = misc.freq[0];
457
+ if (
458
+ isStringArray(misc.jlpt) &&
459
+ ["5", "4", "3", "2", "1"].includes(misc.jlpt[0])
460
+ )
461
+ kanjiObj.misc.jlpt = `N${Number(String(misc.jlpt[0]))}`;
462
+
463
+ if (isObjectArray(entry.reading_meaning))
464
+ for (const rm of entry.reading_meaning) {
465
+ const rmObj: DictKanjiReadingMeaning = { groups: [] };
466
+
467
+ for (const group of rm.rmgroup) {
468
+ const groupObj: DictKanjiReadingMeaningGroup = {
469
+ readings: [],
470
+ meanings: [],
471
+ };
472
+
473
+ if (isObjectArray(group.reading))
474
+ for (const reading of group.reading)
475
+ if (
476
+ typeof reading._ === "string" &&
477
+ typeof reading.$ === "object" &&
478
+ (reading.$.r_type === "ja_on" ||
479
+ reading.$.r_type === "ja_kun")
480
+ )
481
+ groupObj.readings!.push({
482
+ reading: reading._,
483
+ type: reading.$.r_type,
484
+ });
485
+
486
+ if (Array.isArray(group.meaning))
487
+ for (const meaning of group.meaning)
488
+ if (typeof meaning === "string") {
489
+ if (
490
+ kanjiObj.isKokuji === undefined &&
491
+ meaning === "(kokuji)"
492
+ ) {
493
+ kanjiObj.isKokuji = true;
494
+ continue;
495
+ }
496
+
497
+ groupObj.meanings!.push(meaning);
498
+ }
499
+
500
+ if (
501
+ groupObj.readings!.length > 0 ||
502
+ groupObj.meanings!.length > 0
503
+ ) {
504
+ if (groupObj.readings!.length === 0) delete groupObj.readings;
505
+ if (groupObj.meanings!.length === 0) delete groupObj.meanings;
506
+
507
+ rmObj.groups!.push(groupObj);
508
+ }
509
+ }
510
+
511
+ if (isStringArray(rm.nanori) && rm.nanori.length > 0)
512
+ rmObj.nanori = rm.nanori;
513
+
514
+ if (rmObj.groups!.length > 0 || rmObj.nanori !== undefined)
515
+ kanjiObj.readingMeaning!.push(rmObj);
516
+ }
517
+
518
+ dict.push(kanjiObj);
519
+ }
520
+ }
521
+ });
522
+
523
+ return dict;
524
+ }
525
+
526
+ /**
527
+ * Converts a Tanaka Corpus `examples.utf` file into an array of {@link TanakaExample} objects.
528
+ * @param tanakaString The raw contents of a `examples.utf` file
529
+ * @returns A promise resolving with an array of converted {@link TanakaExample} objects
530
+ */
531
+ export function convertTanakaCorpus(tanakaString: string): TanakaExample[] {
532
+ const tanakaArray: TanakaExample[] = [];
533
+ const tanakaParsed: string[] = tanakaString.split("\n");
534
+
535
+ for (let i: number = 0; i <= tanakaParsed.length; i += 2) {
536
+ let a: string | undefined = tanakaParsed[i];
537
+ let b: string | undefined = tanakaParsed[i + 1];
538
+
539
+ if (
540
+ a !== undefined &&
541
+ b !== undefined &&
542
+ a.startsWith("A: ") &&
543
+ b.startsWith("B: ")
544
+ ) {
545
+ a = a.replace("A: ", "");
546
+ b = b.replace("B: ", "");
547
+
548
+ const idMatch: string | undefined = regexps.tanakaID
549
+ .exec(a)
550
+ ?.groups!["id"]?.trim();
551
+ const idParts: string[] = String(idMatch).split("_");
552
+ const id: TanakaID = `${Number(idParts[0])}_${Number(idParts[1])}`;
553
+
554
+ const aParts: string[] = a.replace(regexps.tanakaID, "").split("\t");
555
+
556
+ const bRawParts: string[] = b
557
+ .split(" ")
558
+ .filter((part: string) => part.trim().length !== 0);
559
+
560
+ const bParts: ExamplePart[] = [];
561
+
562
+ for (const part of bRawParts) {
563
+ const partMatches: RegExpExecArray | null =
564
+ regexps.tanakaPart.exec(part);
565
+
566
+ const baseForm: string = partMatches?.groups!["base"]!;
567
+
568
+ const examplePart: ExamplePart = { baseForm: baseForm };
569
+
570
+ const reading: string | undefined = partMatches?.groups!["reading"];
571
+ const glossNumber: string | undefined =
572
+ partMatches?.groups!["glossnum"];
573
+ const inflectedForm: string | undefined =
574
+ partMatches?.groups!["inflection"];
575
+
576
+ if (reading !== undefined)
577
+ if (regexps.tanakaReferenceID.test(reading)) {
578
+ const referenceID: RegExpExecArray | null =
579
+ regexps.tanakaReferenceID.exec(reading);
580
+
581
+ examplePart.referenceID = `${Number(
582
+ referenceID?.groups!["entryid"],
583
+ )}`;
584
+ } else examplePart.reading = reading;
585
+
586
+ if (glossNumber !== undefined)
587
+ examplePart.glossNumber = glossNumber.startsWith("0")
588
+ ? Number.parseInt(glossNumber.substring(1))
589
+ : Number.parseInt(glossNumber);
590
+
591
+ if (inflectedForm !== undefined)
592
+ examplePart.inflectedForm = inflectedForm;
593
+
594
+ if (baseForm.endsWith("~")) {
595
+ examplePart.edited = true;
596
+ examplePart.baseForm = examplePart.baseForm.replace("~", "");
597
+ }
598
+
599
+ bParts.push(examplePart);
600
+ }
601
+
602
+ const phrase: string = aParts[0]!;
603
+ const translation: string = aParts[1]!;
604
+
605
+ tanakaArray.push({
606
+ id: id,
607
+ phrase: phrase.trim(),
608
+ translation: translation.trim(),
609
+ parts: bParts,
610
+ });
611
+ }
612
+ }
613
+
614
+ return tanakaArray;
615
+ }
616
+
617
+ /**
618
+ * Converts a Tanaka Corpus `examples.utf` file into an array of {@link TanakaExample} objects (with furigana).
619
+ * @param tanakaString The raw contents of a `examples.utf` file
620
+ * @returns A promise resolving with an array of converted {@link TanakaExample} objects (with furigana)
621
+ */
622
+ export async function convertTanakaCorpusWithFurigana(
623
+ tanakaString: string,
624
+ ): Promise<TanakaExample[]> {
625
+ const tanakaArray: TanakaExample[] = convertTanakaCorpus(tanakaString);
626
+
627
+ const kuroshiro: any = new Kuroshiro.default();
628
+ await kuroshiro.init(new KuromojiAnalyzer());
629
+
630
+ const convert: any = kuroshiro.convert.bind(kuroshiro);
631
+
632
+ for (let i: number = 0; i < tanakaArray.length; i++)
633
+ if (!tanakaArray[i]!.phrase.includes("・"))
634
+ tanakaArray[i]!.furigana = String(
635
+ await convert(tanakaArray[i]!.phrase, {
636
+ to: "hiragana",
637
+ mode: "furigana",
638
+ }),
639
+ );
640
+
641
+ return tanakaArray;
642
+ }
643
+
644
+ /**
645
+ * Converts a `radkfile2` file (EUC-JP encoded) into an array of {@link DictRadical} objects.
646
+ * @param radkBuffer A `radkfile2` buffer
647
+ * @param kanjiDic An array of converted `KANJIDIC` entries
648
+ * @returns An array of converted {@link DictRadical} objects
649
+ */
650
+ export function convertRadkFile(
651
+ radkBuffer: Buffer<ArrayBuffer>,
652
+ kanjiDic: readonly DictKanji[],
653
+ ): DictRadical[] {
654
+ const fileParsed: string[] = iconv
655
+ .decode(radkBuffer, "euc-jp")
656
+ .split("\n")
657
+ .filter((line: string) => !line.startsWith("#"));
658
+ const radicals: DictRadical[] = [];
659
+
660
+ for (let i: number = 0; i < fileParsed.length; i++) {
661
+ const line: string | undefined = fileParsed[i];
662
+
663
+ if (line !== undefined && line.startsWith("$ ")) {
664
+ const radical: DictRadical = {
665
+ radical: line.charAt(2).trim(),
666
+ strokes: line.substring(4).trim(),
667
+ };
668
+
669
+ let j: number = i + 1;
670
+ let kanjiLine: string | undefined = fileParsed[j];
671
+
672
+ const kanjiList: DictKanji[] = [];
673
+
674
+ while (kanjiLine !== undefined && !kanjiLine.startsWith("$ ")) {
675
+ const kanjis: string[] = kanjiLine.split("");
676
+
677
+ for (const kanji of kanjis) {
678
+ const foundKanji: DictKanji = kanjiDic.find(
679
+ (dictKanji: DictKanji) => dictKanji.kanji === kanji,
680
+ )!;
681
+
682
+ let kanjiObj: DictKanji = { kanji: kanji };
683
+
684
+ kanjiObj = foundKanji;
685
+
686
+ kanjiList.push(kanjiObj);
687
+ }
688
+
689
+ kanjiLine = fileParsed[++j];
690
+ if (kanjiLine === undefined) continue;
691
+
692
+ if (kanjiLine.startsWith("$ ")) i = j - 1;
693
+ }
694
+
695
+ if (kanjiList.length > 0) radical.kanji = kanjiList;
696
+
697
+ radicals.push(radical);
698
+ }
699
+ }
700
+
701
+ return radicals;
702
+ }
703
+
704
+ /**
705
+ * Converts a `kradfile2` file (EUC-JP encoded) into an array of {@link DictKanjiWithRadicals} objects.
706
+ * @param kradBuffer A `kradfile2` buffer
707
+ * @param kanjiDic An array of converted `KANJIDIC` entries
708
+ * @param katakanaList An array of katakana {@link Kana} objects
709
+ * @returns An array of converted {@link DictKanjiWithRadicals} objects
710
+ */
711
+ export function convertKradFile(
712
+ kradBuffer: Buffer<ArrayBuffer>,
713
+ kanjiDic: readonly DictKanji[],
714
+ katakanaList: readonly Kana[],
715
+ ): DictKanjiWithRadicals[] {
716
+ const fileParsed: string[] = iconv
717
+ .decode(kradBuffer, "euc-jp")
718
+ .split("\n")
719
+ .filter((line: string) => !line.startsWith("#"));
720
+ const kanjiWithRadicals: DictKanjiWithRadicals[] = [];
721
+
722
+ for (const line of fileParsed)
723
+ if (line.length > 0) {
724
+ const split: string[] = line.split(" : ");
725
+
726
+ const kanjiChar: string = split[0]!;
727
+ const radicalsRow: string = split[1]!;
728
+
729
+ const kanji: DictKanjiWithRadicals = {
730
+ kanji: kanjiChar,
731
+ radicals: [],
732
+ };
733
+
734
+ const radicals: string[] = radicalsRow.split(" ");
735
+
736
+ for (const radical of radicals) {
737
+ const foundRadical: DictKanji | undefined = kanjiDic.find(
738
+ (dictKanji: DictKanji) => dictKanji.kanji === radical,
739
+ );
740
+
741
+ let radicalObj: DictKanji = foundRadical ?? { kanji: radical };
742
+
743
+ if (foundRadical === undefined) {
744
+ const katakanaChar: Kana | undefined = katakanaList.find(
745
+ (kana: Kana) => kana.kana === radical,
746
+ );
747
+ if (katakanaChar === undefined) continue;
748
+
749
+ radicalObj = {
750
+ kanji: katakanaChar.kana,
751
+ readingMeaning: [
752
+ {
753
+ groups: [
754
+ {
755
+ readings: [{ reading: katakanaChar.kana, type: "ja_on" }],
756
+ meanings: [katakanaChar.reading],
757
+ },
758
+ ],
759
+ },
760
+ ],
761
+ };
762
+ }
763
+
764
+ kanji.radicals.push(radicalObj);
765
+ }
766
+
767
+ if (kanji.kanji.length === 1 && kanji.radicals.length > 0)
768
+ kanjiWithRadicals.push(kanji);
769
+ }
770
+
771
+ return kanjiWithRadicals;
772
+ }
773
+
774
+ /**
775
+ * Maps entry properties (IDs and kanji) with other entries.
776
+ *
777
+ * - {@link jmDict} => {@link WordIDEntryMap}, {@link KanjiWordsMap}
778
+ *
779
+ * - {@link kanjiDic} => {@link KanjiEntryMap}, {@link KanjiSVGMap} (only if {@link svgList} is present)
780
+ *
781
+ * - {@link tanakaExamples} (requires {@link jmDict}) => {@link WordExamplesMap}
782
+ *
783
+ * - {@link wordDefinitionPairs} => {@link WordDefinitionsMap}
784
+ *
785
+ * @param jmDict An array of converted `JMdict` entries
786
+ * @param kanjiDic An array of converted `KANJIDIC` entries
787
+ * @param tanakaExamples An array of converted `Tanaka Corpus` examples
788
+ * @param wordDefinitionPairs An array of `ja.wiktionary.org` word-definitions pairs
789
+ * @param svgList An array of SVG file names
790
+ * @returns An object containing of any entry maps, their presence being dependent on the provided arguments.
791
+ */
792
+ export function createEntryMaps(
793
+ jmDict?: readonly DictWord[],
794
+ kanjiDic?: readonly DictKanji[],
795
+ tanakaExamples?: readonly TanakaExample[],
796
+ wordDefinitionPairs?: readonly WordDefinitionPair[],
797
+ svgList?: readonly string[],
798
+ ): EntryMaps {
799
+ const kanjiEntryMap: KanjiEntryMap = new Map<string, DictKanji>();
800
+ const wordIDEntryMap: WordIDEntryMap = new Map<StringNumber, DictWord>();
801
+ const kanjiWordsMap: KanjiWordsMap = new Map<string, DictWord[]>();
802
+ const wordExamplesMap: WordExamplesMap = new Map<
803
+ StringNumber,
804
+ TanakaExample[]
805
+ >();
806
+ const wordDefinitionsMap: WordDefinitionsMap = new Map<
807
+ StringNumber,
808
+ Definition[]
809
+ >();
810
+ const kanjiSVGMap: KanjiSVGMap = new Map<string, string>();
811
+
812
+ const wordPartsMap: Map<StringNumber, Set<string>> = new Map<
813
+ StringNumber,
814
+ Set<string>
815
+ >();
816
+ const partExamplesMap: Map<string, TanakaExample[]> = new Map<
817
+ string,
818
+ TanakaExample[]
819
+ >();
820
+ const entryParts: Set<string> = new Set<string>();
821
+
822
+ if (kanjiDic !== undefined)
823
+ for (const kanji of kanjiDic) kanjiEntryMap.set(kanji.kanji, kanji);
824
+
825
+ if (wordDefinitionPairs !== undefined)
826
+ for (const pair of wordDefinitionPairs)
827
+ wordDefinitionsMap.set(pair.wordID, pair.definitions);
828
+
829
+ if (kanjiDic !== undefined && svgList !== undefined)
830
+ for (const kanji of kanjiDic) {
831
+ const codePoint: string = kanji.kanji
832
+ .codePointAt(0)!
833
+ .toString(16)
834
+ .toLowerCase();
835
+
836
+ const svg: string | undefined = svgList.find((file: string) => {
837
+ const baseName: string = file.split(".")[0]!.toLowerCase();
838
+
839
+ return baseName === codePoint || baseName === `0${codePoint}`;
840
+ });
841
+
842
+ if (svg !== undefined) kanjiSVGMap.set(kanji.kanji, svg);
843
+ }
844
+
845
+ if (jmDict !== undefined) {
846
+ for (const word of jmDict) {
847
+ wordIDEntryMap.set(word.id, word);
848
+
849
+ if (word.kanjiForms !== undefined)
850
+ for (const kf of word.kanjiForms)
851
+ for (const char of kf.form
852
+ .split("")
853
+ .filter((c: string) => regexps.kanji.test(c)))
854
+ if (!kanjiWordsMap.has(char)) kanjiWordsMap.set(char, [word]);
855
+ else kanjiWordsMap.get(char)!.push(word);
856
+
857
+ if (tanakaExamples !== undefined) {
858
+ const rkf: ReadingsKanjiFormsPair = getValidForms(
859
+ word.readings,
860
+ word.kanjiForms,
861
+ word.isCommon,
862
+ );
863
+
864
+ const localPartParts: Set<string> = new Set<string>();
865
+
866
+ for (const reading of rkf.readings) {
867
+ entryParts.add(reading.reading);
868
+ localPartParts.add(reading.reading);
869
+ }
870
+
871
+ if (rkf.kanjiForms !== undefined && rkf.kanjiForms.length > 0)
872
+ for (const kanjiForm of rkf.kanjiForms) {
873
+ entryParts.add(kanjiForm.form);
874
+ localPartParts.add(kanjiForm.form);
875
+ }
876
+
877
+ entryParts.add(word.id);
878
+ localPartParts.add(word.id);
879
+
880
+ wordPartsMap.set(word.id, localPartParts);
881
+ }
882
+ }
883
+
884
+ if (tanakaExamples !== undefined) {
885
+ for (const ex of tanakaExamples)
886
+ for (const part of ex.parts) {
887
+ if (entryParts.has(part.baseForm)) {
888
+ let exList: TanakaExample[] | undefined = partExamplesMap.get(
889
+ part.baseForm,
890
+ );
891
+ if (exList === undefined) {
892
+ exList = [];
893
+ partExamplesMap.set(part.baseForm, exList);
894
+ }
895
+
896
+ exList.push(ex);
897
+ }
898
+ if (part.reading !== undefined && entryParts.has(part.reading)) {
899
+ let exList: TanakaExample[] | undefined = partExamplesMap.get(
900
+ part.reading,
901
+ );
902
+ if (exList === undefined) {
903
+ exList = [];
904
+ partExamplesMap.set(part.reading, exList);
905
+ }
906
+
907
+ exList.push(ex);
908
+ }
909
+ if (
910
+ part.inflectedForm !== undefined &&
911
+ entryParts.has(part.inflectedForm)
912
+ ) {
913
+ let exList: TanakaExample[] | undefined = partExamplesMap.get(
914
+ part.inflectedForm,
915
+ );
916
+ if (exList === undefined) {
917
+ exList = [];
918
+ partExamplesMap.set(part.inflectedForm, exList);
919
+ }
920
+
921
+ exList.push(ex);
922
+ }
923
+
924
+ if (
925
+ part.referenceID !== undefined &&
926
+ entryParts.has(part.referenceID)
927
+ ) {
928
+ let exList: TanakaExample[] | undefined = partExamplesMap.get(
929
+ part.referenceID,
930
+ );
931
+ if (exList === undefined) {
932
+ exList = [];
933
+ partExamplesMap.set(part.referenceID, exList);
934
+ }
935
+
936
+ exList.push(ex);
937
+ }
938
+ }
939
+
940
+ for (const word of jmDict) {
941
+ const seenEx: Set<string> = new Set<string>();
942
+ const validExamples: TanakaExample[] = [];
943
+
944
+ for (const p of wordPartsMap.get(word.id)!) {
945
+ const examplesForPart: TanakaExample[] | undefined = partExamplesMap
946
+ .get(p)
947
+ ?.filter((ex: TanakaExample) => !seenEx.has(ex.id));
948
+ if (examplesForPart === undefined) continue;
949
+
950
+ for (const ex of examplesForPart) {
951
+ seenEx.add(ex.id);
952
+ validExamples.push(ex);
953
+ }
954
+ }
955
+
956
+ if (validExamples.length > 0)
957
+ wordExamplesMap.set(word.id, validExamples);
958
+ }
959
+ }
960
+ }
961
+
962
+ return {
963
+ ...(wordIDEntryMap.size > 0 ? { wordIDEntryMap: wordIDEntryMap } : {}),
964
+ ...(kanjiWordsMap.size > 0 ? { kanjiWordsMap: kanjiWordsMap } : {}),
965
+ ...(kanjiEntryMap.size > 0 ? { kanjiEntryMap: kanjiEntryMap } : {}),
966
+ ...(wordExamplesMap.size > 0 ? { wordExamplesMap: wordExamplesMap } : {}),
967
+ ...(wordDefinitionsMap.size > 0
968
+ ? { wordDefinitionsMap: wordDefinitionsMap }
969
+ : {}),
970
+ ...(kanjiSVGMap.size > 0 ? { kanjiSVGMap: kanjiSVGMap } : {}),
971
+ };
972
+ }
973
+
974
+ function mapEntry(entry: any): JaWiktionaryEntry {
975
+ return {
976
+ word: entry.word,
977
+ pos_title: entry.pos_title,
978
+ senses: entry.senses
979
+ .filter(
980
+ (sense: any) =>
981
+ (isObjectArray(sense.form_of)
982
+ ? sense.form_of.every((form: any) => typeof form.word === "string")
983
+ : isStringArray(sense.glosses)) === true ||
984
+ isStringArray(sense.glosses),
985
+ )
986
+ .map((sense: any) => ({
987
+ ...(isObjectArray(sense.form_of)
988
+ ? {
989
+ form_of: sense.form_of.map((form: any) => String(form.word)),
990
+ }
991
+ : {}),
992
+ glosses: sense.glosses,
993
+ })),
994
+ ...(isObjectArray(entry.forms) &&
995
+ entry.forms.every((form: any) => typeof form.form === "string") === true
996
+ ? { forms: entry.forms.map((form: any) => String(form.form)) }
997
+ : {}),
998
+ };
999
+ }
1000
+
1001
+ /**
1002
+ * Converts and filters a `ja.wiktionary.org` JSONL dump (sync)
1003
+ *
1004
+ * The dump file needs to be converted from a `jawiktionary-latest-pages-articles.xml.bz2` file from {@link https://dumps.wikimedia.org/jawiktionary/latest/} using {@link https://github.com/tatuylonen/wiktextract | wiktextract}.
1005
+ * @param buffer A JSONL dump file buffer
1006
+ * @returns An array containing only the Japanese entries
1007
+ */
1008
+ export function convertJawiktionarySync(buffer: Buffer): JaWiktionaryEntry[] {
1009
+ const lines: string[] = buffer.toString("utf-8").split("\n");
1010
+ const entries: JaWiktionaryEntry[] = [];
1011
+
1012
+ for (let i: number = 0; i < lines.length; i++) {
1013
+ const line: string | undefined = lines[i]?.trim();
1014
+ if (line === undefined || line.length === 0) continue;
1015
+
1016
+ const obj: any = JSON.parse(line);
1017
+
1018
+ if (
1019
+ typeof obj === "object" &&
1020
+ (obj.lang === "日本語" || obj.lang === "古典日本語")
1021
+ )
1022
+ entries.push(mapEntry(obj));
1023
+ }
1024
+
1025
+ return entries;
1026
+ }
1027
+
1028
+ /**
1029
+ * Converts and filters a `ja.wiktionary.org` JSONL dump (async)
1030
+ *
1031
+ * The dump file needs to be converted from a `jawiktionary-latest-pages-articles.xml.bz2` file from {@link https://dumps.wikimedia.org/jawiktionary/latest/} using {@link https://github.com/tatuylonen/wiktextract | wiktextract}.
1032
+ * @param stream A JSONL dump file stream
1033
+ * @returns An array containing only the Japanese entries
1034
+ */
1035
+ export async function convertJawiktionaryAsync(
1036
+ stream: ReadStream,
1037
+ ): Promise<JaWiktionaryEntry[]> {
1038
+ const rl: Interface = createInterface({
1039
+ input: stream,
1040
+ crlfDelay: Infinity,
1041
+ });
1042
+
1043
+ const entries: JaWiktionaryEntry[] = [];
1044
+
1045
+ for await (const line of rl) {
1046
+ const obj: any = JSON.parse(line.trim());
1047
+
1048
+ if (
1049
+ typeof obj === "object" &&
1050
+ (obj.lang === "日本語" || obj.lang === "古典日本語")
1051
+ )
1052
+ entries.push(mapEntry(obj));
1053
+ }
1054
+
1055
+ rl.close();
1056
+ stream.close();
1057
+ stream.destroy();
1058
+
1059
+ return entries;
1060
+ }
1061
+
1062
+ function parseEntry(
1063
+ entry: JaWiktionaryEntry,
1064
+ definitions: Definition[],
1065
+ definitionMap: Map<string, { count: number }>,
1066
+ ): void {
1067
+ for (const sense of entry.senses) {
1068
+ const definition: string = sense.glosses.join("");
1069
+
1070
+ if (!definitions.some((def: Definition) => def.definition === definition)) {
1071
+ if (!definitionMap.has(definition))
1072
+ definitionMap.set(definition, { count: 1 });
1073
+ else definitionMap.get(definition)!.count++;
1074
+
1075
+ definitions.push({ definition: definition });
1076
+ }
1077
+ }
1078
+ }
1079
+
1080
+ /**
1081
+ * Pairs Japanese definitions with JMdict word entries
1082
+ * @param wiktionaryEntries An array containing `ja.wiktionary.org` Japanese entries (converted using {@link convertJawiktionarySync} or {@link convertJawiktionaryAsync})
1083
+ * @param jmDict An array of converted `JMdict` entries
1084
+ * @returns An array of {@link WordDefinitionPair} objects
1085
+ */
1086
+ export function getWordDefinitions(
1087
+ wiktionaryEntries: readonly JaWiktionaryEntry[],
1088
+ jmDict: readonly DictWord[],
1089
+ ): WordDefinitionPair[] {
1090
+ const entries: Map<string, JaWiktionaryEntry[]> = new Map<
1091
+ string,
1092
+ JaWiktionaryEntry[]
1093
+ >();
1094
+
1095
+ for (const entry of wiktionaryEntries) {
1096
+ const ent: JaWiktionaryEntry[] | undefined = entries.get(entry.word);
1097
+
1098
+ if (ent !== undefined) ent.push(entry);
1099
+ else entries.set(entry.word, [entry]);
1100
+ }
1101
+
1102
+ const japaneseDefinitions: WordDefinitionPair[] = [];
1103
+ const definitionMap: Map<string, { count: number }> = new Map<
1104
+ string,
1105
+ { count: number }
1106
+ >();
1107
+
1108
+ const wordFormsMap: Map<
1109
+ StringNumber,
1110
+ { readings: Set<string>; kanjiForms?: Set<string> | undefined }
1111
+ > = new Map<
1112
+ StringNumber,
1113
+ { readings: Set<string>; kanjiForms?: Set<string> | undefined }
1114
+ >();
1115
+ const validReadings: Set<string> = new Set<string>();
1116
+ const validKanjiForms: Set<string> = new Set<string>();
1117
+ const validForms: Set<string> = new Set<string>();
1118
+
1119
+ for (const word of jmDict) {
1120
+ const wordReadings: Set<string> = new Set<string>();
1121
+ const wordKanjiForms: Set<string> = new Set<string>();
1122
+
1123
+ const rkf: ReadingsKanjiFormsPair = getValidForms(
1124
+ word.readings,
1125
+ word.kanjiForms,
1126
+ );
1127
+
1128
+ for (const r of rkf.readings) {
1129
+ validReadings.add(r.reading);
1130
+ wordReadings.add(r.reading);
1131
+ validForms.add(r.reading);
1132
+ }
1133
+ if (rkf.kanjiForms !== undefined && rkf.kanjiForms.length > 0)
1134
+ for (const kf of rkf.kanjiForms) {
1135
+ validKanjiForms.add(kf.form);
1136
+ wordKanjiForms.add(kf.form);
1137
+ validForms.add(kf.form);
1138
+ }
1139
+
1140
+ wordFormsMap.set(word.id, {
1141
+ readings: wordReadings,
1142
+ ...(wordKanjiForms.size > 0 ? { kanjiForms: wordKanjiForms } : {}),
1143
+ });
1144
+ }
1145
+
1146
+ const validTitleEntries: Map<string, JaWiktionaryEntry[]> = new Map<
1147
+ string,
1148
+ JaWiktionaryEntry[]
1149
+ >();
1150
+ const entriesWithFormTitlesGlobal: Map<string, JaWiktionaryEntry[]> = new Map<
1151
+ string,
1152
+ JaWiktionaryEntry[]
1153
+ >();
1154
+ const entriesWithFormsGlobal: Map<string, JaWiktionaryEntry[]> = new Map<
1155
+ string,
1156
+ JaWiktionaryEntry[]
1157
+ >();
1158
+
1159
+ const validFormOfEntries: Set<string> = new Set<string>();
1160
+ const validGlossesEntries: Set<string> = new Set<string>();
1161
+ const validFormsEntries: Set<string> = new Set<string>();
1162
+
1163
+ const ents: JaWiktionaryEntry[] = Array.from(entries.values()).flat();
1164
+
1165
+ for (const entry of ents) {
1166
+ let valid: boolean = false;
1167
+
1168
+ if (validKanjiForms.has(entry.word)) {
1169
+ valid = true;
1170
+
1171
+ for (const sense of entry.senses) {
1172
+ if (
1173
+ sense.form_of !== undefined &&
1174
+ sense.form_of.some((form: string) => validForms.has(form))
1175
+ )
1176
+ validFormOfEntries.add(entry.word);
1177
+
1178
+ for (const gloss of sense.glosses) {
1179
+ let hasForm: boolean = false;
1180
+
1181
+ for (const r of validForms)
1182
+ if (gloss.includes(r)) {
1183
+ hasForm = true;
1184
+ break;
1185
+ }
1186
+
1187
+ if (hasForm) validGlossesEntries.add(entry.word);
1188
+ }
1189
+ }
1190
+
1191
+ if (entry.forms !== undefined)
1192
+ for (const form of entry.forms)
1193
+ if (validForms.has(form)) validFormsEntries.add(entry.word);
1194
+ }
1195
+
1196
+ if (validForms.has(entry.word)) {
1197
+ valid = true;
1198
+ const ftEntry: JaWiktionaryEntry[] | undefined =
1199
+ entriesWithFormTitlesGlobal.get(entry.word);
1200
+
1201
+ if (ftEntry !== undefined) ftEntry.push(entry);
1202
+ else entriesWithFormTitlesGlobal.set(entry.word, [entry]);
1203
+ }
1204
+
1205
+ if (valid) {
1206
+ const tEntry: JaWiktionaryEntry[] | undefined = validTitleEntries.get(
1207
+ entry.word,
1208
+ );
1209
+
1210
+ if (tEntry !== undefined) tEntry.push(entry);
1211
+ else validTitleEntries.set(entry.word, [entry]);
1212
+ }
1213
+
1214
+ if (
1215
+ entry.forms !== undefined &&
1216
+ validForms.has(entry.word) &&
1217
+ entry.forms.some((form: string) => validForms.has(form))
1218
+ ) {
1219
+ const wfEntry: JaWiktionaryEntry[] | undefined =
1220
+ entriesWithFormsGlobal.get(entry.word);
1221
+
1222
+ if (wfEntry !== undefined) wfEntry.push(entry);
1223
+ else entriesWithFormsGlobal.set(entry.word, [entry]);
1224
+ }
1225
+ }
1226
+
1227
+ ents.length = 0;
1228
+
1229
+ const posMap: Map<
1230
+ POS,
1231
+ {
1232
+ title?: Map<string, JaWiktionaryEntry[]>;
1233
+ formTitle?: Map<string, JaWiktionaryEntry[]>;
1234
+ form?: Map<string, JaWiktionaryEntry[]>;
1235
+ }
1236
+ > = new Map<
1237
+ POS,
1238
+ {
1239
+ title?: Map<string, JaWiktionaryEntry[]>;
1240
+ formTitle?: Map<string, JaWiktionaryEntry[]>;
1241
+ form?: Map<string, JaWiktionaryEntry[]>;
1242
+ }
1243
+ >();
1244
+
1245
+ const vte: JaWiktionaryEntry[] = Array.from(
1246
+ validTitleEntries.values(),
1247
+ ).flat();
1248
+ const fge: JaWiktionaryEntry[] = Array.from(
1249
+ entriesWithFormTitlesGlobal.values(),
1250
+ ).flat();
1251
+ const wfe: JaWiktionaryEntry[] = Array.from(
1252
+ entriesWithFormsGlobal.values(),
1253
+ ).flat();
1254
+
1255
+ for (const pos of posList) {
1256
+ posMap.set(pos, {});
1257
+
1258
+ for (const te of vte)
1259
+ if (te.pos_title === pos || te.pos_title === "和語の漢字表記") {
1260
+ const posEntries: {
1261
+ title?: Map<string, JaWiktionaryEntry[]>;
1262
+ formTitle?: Map<string, JaWiktionaryEntry[]>;
1263
+ form?: Map<string, JaWiktionaryEntry[]>;
1264
+ } = posMap.get(pos)!;
1265
+
1266
+ if (posEntries.title === undefined)
1267
+ posEntries.title = new Map<string, JaWiktionaryEntry[]>();
1268
+
1269
+ const entryList: JaWiktionaryEntry[] | undefined = posEntries.title.get(
1270
+ te.word,
1271
+ );
1272
+
1273
+ if (entryList !== undefined) entryList.push(te);
1274
+ else posEntries.title.set(te.word, [te]);
1275
+ }
1276
+
1277
+ for (const ft of fge)
1278
+ if (ft.pos_title === pos) {
1279
+ const posEntries: {
1280
+ title?: Map<string, JaWiktionaryEntry[]>;
1281
+ formTitle?: Map<string, JaWiktionaryEntry[]>;
1282
+ form?: Map<string, JaWiktionaryEntry[]>;
1283
+ } = posMap.get(pos)!;
1284
+
1285
+ if (posEntries.formTitle === undefined)
1286
+ posEntries.formTitle = new Map<string, JaWiktionaryEntry[]>();
1287
+
1288
+ const entryList: JaWiktionaryEntry[] | undefined =
1289
+ posEntries.formTitle.get(ft.word);
1290
+
1291
+ if (entryList !== undefined) entryList.push(ft);
1292
+ else posEntries.formTitle.set(ft.word, [ft]);
1293
+ }
1294
+
1295
+ for (const wf of wfe)
1296
+ if (wf.pos_title === pos) {
1297
+ const posEntries: {
1298
+ title?: Map<string, JaWiktionaryEntry[]>;
1299
+ formTitle?: Map<string, JaWiktionaryEntry[]>;
1300
+ form?: Map<string, JaWiktionaryEntry[]>;
1301
+ } = posMap.get(pos)!;
1302
+
1303
+ if (posEntries.form === undefined)
1304
+ posEntries.form = new Map<string, JaWiktionaryEntry[]>();
1305
+
1306
+ const entryList: JaWiktionaryEntry[] | undefined = posEntries.form.get(
1307
+ wf.word,
1308
+ );
1309
+
1310
+ if (entryList !== undefined) entryList.push(wf);
1311
+ else posEntries.form.set(wf.word, [wf]);
1312
+ }
1313
+ }
1314
+
1315
+ vte.length = 0;
1316
+ fge.length = 0;
1317
+ wfe.length = 0;
1318
+
1319
+ const wordEntriesPairs: {
1320
+ word: DictWord;
1321
+ readings: Set<string>;
1322
+ forms: Set<string>;
1323
+ entriesWithTitles: JaWiktionaryEntry[];
1324
+ entriesWithFormTitles: JaWiktionaryEntry[];
1325
+ entriesWithForms: JaWiktionaryEntry[];
1326
+ kanjiForms?: Set<string>;
1327
+ }[] = [];
1328
+
1329
+ for (const word of jmDict) {
1330
+ const poses: Set<POS> = new Set<POS>();
1331
+
1332
+ for (const m of word.meanings)
1333
+ for (const note of m.partOfSpeech) {
1334
+ const noteEntry:
1335
+ | readonly [string, string]
1336
+ | readonly [string, string, POS | readonly POS[]]
1337
+ | undefined = noteMap.get(note);
1338
+
1339
+ if (noteEntry?.length === 3) {
1340
+ const notePos: POS | readonly POS[] = noteEntry[2];
1341
+
1342
+ if (Array.isArray(notePos))
1343
+ for (const pos of notePos) {
1344
+ if (!poses.has(pos)) poses.add(pos);
1345
+ }
1346
+ else if (typeof notePos === "string" && !poses.has(notePos))
1347
+ poses.add(notePos);
1348
+ }
1349
+ }
1350
+
1351
+ const rkf: { readings: Set<string>; kanjiForms?: Set<string> | undefined } =
1352
+ wordFormsMap.get(word.id)!;
1353
+
1354
+ const entriesWithTitles: JaWiktionaryEntry[] = [];
1355
+ const entriesWithFormTitles: JaWiktionaryEntry[] = [];
1356
+ const entriesWithForms: JaWiktionaryEntry[] = [];
1357
+
1358
+ if (poses.size > 0)
1359
+ for (const pos of poses) {
1360
+ const posEntries: {
1361
+ title?: Map<string, JaWiktionaryEntry[]>;
1362
+ formTitle?: Map<string, JaWiktionaryEntry[]>;
1363
+ form?: Map<string, JaWiktionaryEntry[]>;
1364
+ } = posMap.get(pos)!;
1365
+
1366
+ if (rkf.kanjiForms !== undefined)
1367
+ for (const kf of rkf.kanjiForms) {
1368
+ const te: JaWiktionaryEntry[] | undefined =
1369
+ posEntries.title?.get(kf);
1370
+ const fe: JaWiktionaryEntry[] | undefined =
1371
+ posEntries.form?.get(kf);
1372
+
1373
+ if (te !== undefined)
1374
+ entriesWithTitles.push(
1375
+ ...te.filter(
1376
+ (ent: JaWiktionaryEntry) =>
1377
+ validFormOfEntries.has(ent.word) ||
1378
+ validGlossesEntries.has(ent.word) ||
1379
+ validFormsEntries.has(ent.word),
1380
+ ),
1381
+ );
1382
+
1383
+ if (fe !== undefined)
1384
+ entriesWithForms.push(
1385
+ ...fe.filter(
1386
+ (ent: JaWiktionaryEntry) =>
1387
+ ent.forms !== undefined &&
1388
+ ent.forms.some(
1389
+ (form: string) =>
1390
+ (rkf.kanjiForms !== undefined &&
1391
+ rkf.kanjiForms.has(form)) ||
1392
+ rkf.readings.has(form),
1393
+ ),
1394
+ ),
1395
+ );
1396
+ }
1397
+
1398
+ for (const r of rkf.readings) {
1399
+ const te: JaWiktionaryEntry[] | undefined = posEntries.title?.get(r);
1400
+ const fe: JaWiktionaryEntry[] | undefined = posEntries.form?.get(r);
1401
+ const fte: JaWiktionaryEntry[] | undefined =
1402
+ posEntries.formTitle?.get(r);
1403
+
1404
+ if (te !== undefined)
1405
+ entriesWithTitles.push(
1406
+ ...te.filter(
1407
+ (ent: JaWiktionaryEntry) =>
1408
+ ent.forms !== undefined &&
1409
+ ent.forms.some(
1410
+ (form: string) =>
1411
+ (rkf.kanjiForms !== undefined &&
1412
+ rkf.kanjiForms.has(form)) ||
1413
+ rkf.readings.has(form),
1414
+ ),
1415
+ ),
1416
+ );
1417
+
1418
+ if (fe !== undefined)
1419
+ entriesWithForms.push(
1420
+ ...fe.filter(
1421
+ (ent: JaWiktionaryEntry) =>
1422
+ ent.forms !== undefined &&
1423
+ ent.forms.some(
1424
+ (form: string) =>
1425
+ (rkf.kanjiForms !== undefined &&
1426
+ rkf.kanjiForms.has(form)) ||
1427
+ rkf.readings.has(form),
1428
+ ),
1429
+ ),
1430
+ );
1431
+
1432
+ if (fte !== undefined) entriesWithFormTitles.push(...fte);
1433
+ }
1434
+ }
1435
+
1436
+ if (
1437
+ entriesWithTitles.length === 0 &&
1438
+ entriesWithFormTitles.length === 0 &&
1439
+ entriesWithForms.length === 0
1440
+ ) {
1441
+ if (rkf.kanjiForms !== undefined)
1442
+ for (const kf of rkf.kanjiForms) {
1443
+ const te: JaWiktionaryEntry[] | undefined = validTitleEntries.get(kf);
1444
+ const fe: JaWiktionaryEntry[] | undefined =
1445
+ entriesWithFormsGlobal.get(kf);
1446
+
1447
+ if (te !== undefined)
1448
+ entriesWithTitles.push(
1449
+ ...te.filter(
1450
+ (ent: JaWiktionaryEntry) =>
1451
+ validFormOfEntries.has(ent.word) ||
1452
+ validGlossesEntries.has(ent.word) ||
1453
+ validFormsEntries.has(ent.word),
1454
+ ),
1455
+ );
1456
+
1457
+ if (fe !== undefined)
1458
+ entriesWithForms.push(
1459
+ ...fe.filter(
1460
+ (ent: JaWiktionaryEntry) =>
1461
+ ent.forms !== undefined &&
1462
+ ent.forms.some(
1463
+ (form: string) =>
1464
+ (rkf.kanjiForms !== undefined &&
1465
+ rkf.kanjiForms.has(form)) ||
1466
+ rkf.readings.has(form),
1467
+ ),
1468
+ ),
1469
+ );
1470
+ }
1471
+
1472
+ for (const r of rkf.readings) {
1473
+ const te: JaWiktionaryEntry[] | undefined = validTitleEntries.get(r);
1474
+ const fe: JaWiktionaryEntry[] | undefined =
1475
+ entriesWithFormsGlobal.get(r);
1476
+ const fte: JaWiktionaryEntry[] | undefined =
1477
+ entriesWithFormTitlesGlobal.get(r);
1478
+
1479
+ if (te !== undefined)
1480
+ entriesWithTitles.push(
1481
+ ...te.filter(
1482
+ (ent: JaWiktionaryEntry) =>
1483
+ ent.forms !== undefined &&
1484
+ ent.forms.some(
1485
+ (form: string) =>
1486
+ (rkf.kanjiForms !== undefined &&
1487
+ rkf.kanjiForms.has(form)) ||
1488
+ rkf.readings.has(form),
1489
+ ),
1490
+ ),
1491
+ );
1492
+
1493
+ if (fe !== undefined)
1494
+ entriesWithForms.push(
1495
+ ...fe.filter(
1496
+ (ent: JaWiktionaryEntry) =>
1497
+ ent.forms !== undefined &&
1498
+ ent.forms.some(
1499
+ (form: string) =>
1500
+ (rkf.kanjiForms !== undefined &&
1501
+ rkf.kanjiForms.has(form)) ||
1502
+ rkf.readings.has(form),
1503
+ ),
1504
+ ),
1505
+ );
1506
+
1507
+ if (fte !== undefined) entriesWithFormTitles.push(...fte);
1508
+ }
1509
+ }
1510
+
1511
+ if (
1512
+ entriesWithTitles.length > 0 &&
1513
+ (entriesWithFormTitles.length > 0 || entriesWithForms.length > 0)
1514
+ )
1515
+ wordEntriesPairs.push({
1516
+ word: word,
1517
+ readings: rkf.readings,
1518
+ ...(rkf.kanjiForms !== undefined ? { kanjiForms: rkf.kanjiForms } : {}),
1519
+ forms:
1520
+ rkf.kanjiForms !== undefined
1521
+ ? rkf.readings.union(rkf.kanjiForms)
1522
+ : rkf.readings,
1523
+ entriesWithTitles: entriesWithTitles,
1524
+ entriesWithFormTitles: entriesWithFormTitles,
1525
+ entriesWithForms: entriesWithForms,
1526
+ });
1527
+ }
1528
+
1529
+ for (const pair of wordEntriesPairs) {
1530
+ const definitions: Definition[] = [];
1531
+
1532
+ const kanjiFormEntries: JaWiktionaryEntry[] = [];
1533
+ const entriesWithForms: JaWiktionaryEntry[] = [];
1534
+ const readingEntries: JaWiktionaryEntry[] = [];
1535
+
1536
+ const titleFormMap: Map<string, Set<string>> = new Map<
1537
+ string,
1538
+ Set<string>
1539
+ >();
1540
+ const refsMap: Map<string, Set<string>> = new Map<string, Set<string>>();
1541
+ const forms: Set<string> = new Set<string>();
1542
+
1543
+ for (const ent of pair.entriesWithTitles) {
1544
+ const hasValidFormOf: boolean = validFormOfEntries.has(ent.word);
1545
+ const hasValidForms: boolean = validFormsEntries.has(ent.word);
1546
+
1547
+ const hasForms: boolean =
1548
+ ent.forms !== undefined &&
1549
+ ent.forms.some((form: string) => pair.forms.has(form));
1550
+
1551
+ if (pair.kanjiForms !== undefined && pair.kanjiForms.has(ent.word)) {
1552
+ kanjiFormEntries.push(ent);
1553
+
1554
+ for (const sense of ent.senses) {
1555
+ if (hasValidFormOf && sense.form_of !== undefined)
1556
+ for (const form of sense.form_of)
1557
+ if (pair.forms.has(form)) {
1558
+ const elem: Set<string> | undefined = titleFormMap.get(form);
1559
+
1560
+ if (elem === undefined)
1561
+ titleFormMap.set(form, new Set<string>([ent.word]));
1562
+ else elem.add(ent.word);
1563
+ }
1564
+
1565
+ for (const gloss of sense.glosses)
1566
+ for (const f of pair.forms)
1567
+ if (gloss.includes(f)) {
1568
+ const elem: Set<string> | undefined = refsMap.get(f);
1569
+
1570
+ if (elem === undefined)
1571
+ refsMap.set(f, new Set<string>([ent.word]));
1572
+ else elem.add(ent.word);
1573
+ }
1574
+ }
1575
+
1576
+ if (hasValidForms && ent.forms !== undefined)
1577
+ for (const form of ent.forms)
1578
+ if (pair.forms.has(form)) forms.add(form);
1579
+ }
1580
+
1581
+ if (pair.readings.has(ent.word) && hasForms) entriesWithForms.push(ent);
1582
+ if (pair.kanjiForms === undefined && pair.readings.has(ent.word))
1583
+ readingEntries.push(ent);
1584
+ }
1585
+
1586
+ for (const entry of pair.entriesWithForms) {
1587
+ const elem: Set<string> | undefined = titleFormMap.get(entry.word);
1588
+
1589
+ if (
1590
+ elem !== undefined &&
1591
+ entry.forms !== undefined &&
1592
+ entry.forms.some((form: string) => elem.has(form))
1593
+ )
1594
+ entriesWithForms.push(entry);
1595
+ }
1596
+
1597
+ for (const entry of pair.entriesWithFormTitles) {
1598
+ if (forms.has(entry.word)) {
1599
+ entriesWithForms.push(entry);
1600
+ continue;
1601
+ }
1602
+
1603
+ const ft: Set<string> | undefined = refsMap.get(entry.word);
1604
+
1605
+ if (ft !== undefined && !ft.isDisjointFrom(pair.forms))
1606
+ entriesWithForms.push(entry);
1607
+ }
1608
+
1609
+ if (kanjiFormEntries.length > 0)
1610
+ for (const entry of kanjiFormEntries)
1611
+ if (entry.pos_title !== "和語の漢字表記")
1612
+ parseEntry(entry, definitions, definitionMap);
1613
+
1614
+ if (entriesWithForms.length > 0)
1615
+ for (const ref of entriesWithForms)
1616
+ parseEntry(ref, definitions, definitionMap);
1617
+
1618
+ if (readingEntries.length > 0)
1619
+ for (const readingEntry of readingEntries)
1620
+ parseEntry(readingEntry, definitions, definitionMap);
1621
+
1622
+ if (definitions.length > 0)
1623
+ japaneseDefinitions.push({
1624
+ wordID: pair.word.id,
1625
+ definitions: definitions,
1626
+ wordForms: pair.forms.union(
1627
+ new Set<string>(
1628
+ pair.forms
1629
+ .values()
1630
+ .toArray()
1631
+ .flatMap((form: string) => [
1632
+ hiraganaToKatakana(form),
1633
+ katakanaToHiragana(form),
1634
+ ]),
1635
+ ),
1636
+ ),
1637
+ });
1638
+ }
1639
+
1640
+ for (let i: number = 0; i < japaneseDefinitions.length; i++) {
1641
+ const pair: WordDefinitionPair = japaneseDefinitions[i]!;
1642
+
1643
+ for (let j: number = 0; j < pair.definitions.length; j++) {
1644
+ const defCount: { count: number } | undefined = definitionMap.get(
1645
+ pair.definitions[j]!.definition,
1646
+ );
1647
+
1648
+ if (defCount !== undefined && defCount.count > 1) {
1649
+ let mnba: boolean = true;
1650
+
1651
+ for (const f of pair.wordForms!)
1652
+ if (pair.definitions[j]!.definition.includes(f)) {
1653
+ mnba = false;
1654
+ break;
1655
+ }
1656
+
1657
+ pair.definitions[j]!.mayNotBeAccurate = mnba ? 2 : 1;
1658
+ }
1659
+ }
1660
+
1661
+ delete pair.wordForms;
1662
+
1663
+ japaneseDefinitions[i] = pair;
1664
+ }
1665
+
1666
+ return japaneseDefinitions;
1667
+ }
1668
+
1669
+ /**
1670
+ * Pairs Japanese definitions with JMdict word entries (with furigana)
1671
+ * @param entryList An array containing `ja.wiktionary.org` Japanese entries (converted using {@link convertJawiktionarySync} or {@link convertJawiktionaryAsync})
1672
+ * @param jmDict An array of converted `JMdict` entries
1673
+ * @returns A promise resolving with an array of {@link WordDefinitionPair} objects (with furigana)
1674
+ */
1675
+ export async function getWordDefinitionsWithFurigana(
1676
+ entryList: readonly JaWiktionaryEntry[],
1677
+ jmDict: readonly DictWord[],
1678
+ ): Promise<WordDefinitionPair[]> {
1679
+ const japaneseDefinitions: WordDefinitionPair[] = getWordDefinitions(
1680
+ entryList,
1681
+ jmDict,
1682
+ );
1683
+
1684
+ const kuroshiro: any = new Kuroshiro.default();
1685
+ await kuroshiro.init(new KuromojiAnalyzer());
1686
+
1687
+ const convert: any = kuroshiro.convert.bind(kuroshiro);
1688
+
1689
+ for (let i: number = 0; i < japaneseDefinitions.length; i++) {
1690
+ const pair: WordDefinitionPair = japaneseDefinitions[i]!;
1691
+
1692
+ for (let j: number = 0; j < pair.definitions.length; j++)
1693
+ if (!pair.definitions[j]!.definition.includes("・"))
1694
+ pair.definitions[j]!.furigana = String(
1695
+ await convert(pair.definitions[j]!.definition, {
1696
+ to: "hiragana",
1697
+ mode: "furigana",
1698
+ }),
1699
+ );
1700
+
1701
+ japaneseDefinitions[i] = pair;
1702
+ }
1703
+
1704
+ return japaneseDefinitions;
1705
+ }
1706
+
1707
+ function lookupWordNote(
1708
+ key: string,
1709
+ notes: string[],
1710
+ tags: string[],
1711
+ ): NoteAndTag {
1712
+ const info:
1713
+ | readonly [string, string]
1714
+ | readonly [string, string, POS | readonly POS[]]
1715
+ | undefined = noteMap.get(key.toLowerCase());
1716
+
1717
+ if (info === undefined) {
1718
+ notes.push(key);
1719
+
1720
+ return { note: key };
1721
+ }
1722
+
1723
+ const tag: string = `word::${info[0]}`;
1724
+
1725
+ if (!tags.includes(tag)) tags.push(tag);
1726
+ notes.push(info[1]);
1727
+
1728
+ return { note: info[1], tag: tag };
1729
+ }
1730
+
1731
+ const wordAddNoteArray: (
1732
+ arr: string[] | undefined,
1733
+ cb: (v: string) => void,
1734
+ ) => void = (arr: string[] | undefined, cb: (v: string) => void): void => {
1735
+ if (arr === undefined) return;
1736
+
1737
+ for (const v of arr) cb(v);
1738
+ };
1739
+
1740
+ /**
1741
+ * Transforms a converted `KANJIDIC` entry into a more readable format, by providing either the kanji or the {@link DictKanji} object directly.
1742
+ * @param searchedKanji The kanji character (requires {@link dict}) or a {@link DictKanji} object
1743
+ * @param dict An array of converted `KANJIDIC` entries or a {@link KanjiEntryMap} (not needed if {@link searchedKanji} is a {@link DictKanji} object)
1744
+ * @param jmDict An array of converted `JMdict` entries or a {@link KanjiWordsMap}
1745
+ * @param svgList An array of SVG file names or a {@link KanjiSVGMap}
1746
+ * @param noteTypeName The Anki note type name
1747
+ * @param deckPath The full Anki deck path
1748
+ * @returns The transformed {@link DictKanji} object or `undefined` if entry is not found
1749
+ */
1750
+ export function getKanji(
1751
+ searchedKanji: string | DictKanji,
1752
+ dict?: readonly DictKanji[] | KanjiEntryMap,
1753
+ jmDict?: readonly DictWord[] | KanjiWordsMap,
1754
+ svgList?: readonly string[] | KanjiSVGMap,
1755
+ noteTypeName?: string,
1756
+ deckPath?: string,
1757
+ ): Kanji | undefined {
1758
+ let dictKanji: DictKanji | undefined = undefined;
1759
+
1760
+ if (typeof searchedKanji === "string" && dict !== undefined)
1761
+ dictKanji =
1762
+ dict instanceof Map
1763
+ ? dict.get(searchedKanji)
1764
+ : dict.find((entry: DictKanji) => entry.kanji === searchedKanji);
1765
+ else if (typeof searchedKanji === "object") dictKanji = searchedKanji;
1766
+
1767
+ if (dictKanji !== undefined) {
1768
+ const kanji: Kanji = {
1769
+ kanji: dictKanji.kanji,
1770
+ strokes: dictKanji.misc!.strokeNumber,
1771
+ ...(dictKanji.misc?.grade !== undefined
1772
+ ? { grade: dictKanji.misc.grade }
1773
+ : {}),
1774
+ ...(dictKanji.misc?.frequency !== undefined
1775
+ ? { frequency: dictKanji.misc.frequency }
1776
+ : {}),
1777
+ ...(dictKanji.misc?.jlpt !== undefined
1778
+ ? { jlpt: dictKanji.misc.jlpt }
1779
+ : {}),
1780
+ noteID: `kanji_${dictKanji.kanji}`,
1781
+ ...(noteTypeName !== undefined ? { noteTypeName: noteTypeName } : {}),
1782
+ ...(deckPath !== undefined ? { deckPath: deckPath } : {}),
1783
+ tags: [],
1784
+ };
1785
+
1786
+ if (
1787
+ dictKanji.readingMeaning !== undefined &&
1788
+ dictKanji.readingMeaning.length > 0
1789
+ ) {
1790
+ const meanings: string[] = [];
1791
+ const nanori: string[] = [];
1792
+ const onyomi: string[] = [];
1793
+ const kunyomi: string[] = [];
1794
+
1795
+ for (const rm of dictKanji.readingMeaning) {
1796
+ if (rm.nanori !== undefined && rm.nanori.length > 0)
1797
+ nanori.push(...rm.nanori);
1798
+
1799
+ for (const group of rm.groups!) {
1800
+ if (group.readings !== undefined) {
1801
+ onyomi.push(
1802
+ ...group.readings
1803
+ .filter((reading: DictKanjiReading) => reading.type === "ja_on")
1804
+ .map((reading: DictKanjiReading) => reading.reading),
1805
+ );
1806
+ kunyomi.push(
1807
+ ...group.readings
1808
+ .filter(
1809
+ (reading: DictKanjiReading) => reading.type === "ja_kun",
1810
+ )
1811
+ .map((reading: DictKanjiReading) => reading.reading),
1812
+ );
1813
+ }
1814
+
1815
+ if (group.meanings !== undefined && group.meanings.length > 0)
1816
+ meanings.push(...group.meanings);
1817
+ }
1818
+ }
1819
+
1820
+ if (meanings.length > 0) kanji.meanings = meanings;
1821
+ if (nanori.length > 0) kanji.nanori = nanori;
1822
+ if (onyomi.length > 0) kanji.onyomi = onyomi;
1823
+ if (kunyomi.length > 0) kanji.kunyomi = kunyomi;
1824
+ }
1825
+
1826
+ if (jmDict !== undefined) {
1827
+ let kanjiWords: readonly DictWord[] | Word[] | undefined =
1828
+ jmDict instanceof Map ? jmDict.get(kanji.kanji) : jmDict;
1829
+
1830
+ const firstKfWords: readonly DictWord[] | undefined = kanjiWords?.filter(
1831
+ (word: DictWord) =>
1832
+ word.kanjiForms !== undefined &&
1833
+ word.kanjiForms[0]!.form.includes(kanji.kanji),
1834
+ );
1835
+
1836
+ if (firstKfWords !== undefined && firstKfWords.length > 0)
1837
+ kanjiWords = firstKfWords;
1838
+
1839
+ if (kanjiWords !== undefined) {
1840
+ const validWords: Word[] = [];
1841
+
1842
+ for (const word of kanjiWords) {
1843
+ const kanjiForm: string | undefined = (
1844
+ firstKfWords !== undefined && firstKfWords.length > 0
1845
+ ? word.kanjiForms![0]
1846
+ : word.kanjiForms!.find((kf: DictKanjiForm) =>
1847
+ kf.form.includes(kanji.kanji),
1848
+ )
1849
+ )?.form;
1850
+
1851
+ if (kanjiForm !== undefined) {
1852
+ const reading: string | undefined = (
1853
+ firstKfWords !== undefined && firstKfWords.length > 0
1854
+ ? word.readings[0]
1855
+ : word.readings.find(
1856
+ (r: DictReading) =>
1857
+ r.kanjiFormRestrictions !== undefined &&
1858
+ r.kanjiFormRestrictions.includes(kanjiForm),
1859
+ )
1860
+ )?.reading;
1861
+ if (reading === undefined) continue;
1862
+
1863
+ const translation: DictTranslation | undefined = (
1864
+ firstKfWords !== undefined && firstKfWords.length > 0
1865
+ ? word.meanings[0]
1866
+ : word.meanings.find(
1867
+ (m: DictMeaning) =>
1868
+ m.kanjiFormRestrictions !== undefined &&
1869
+ m.kanjiFormRestrictions.includes(kanjiForm),
1870
+ )
1871
+ )?.translations!.map((t: DictTranslation) =>
1872
+ typeof t === "string" ? t : t.translation,
1873
+ )[0];
1874
+ if (translation === undefined) continue;
1875
+
1876
+ validWords.push({
1877
+ kanjiForms: [{ kanjiForm: kanjiForm }],
1878
+ readings: [{ reading: reading }],
1879
+ translations: [
1880
+ {
1881
+ translation: translation,
1882
+ },
1883
+ ],
1884
+ });
1885
+ }
1886
+
1887
+ if (validWords.length === 3) break;
1888
+ }
1889
+
1890
+ if (validWords.length > 0) kanji.words = validWords;
1891
+ }
1892
+ }
1893
+
1894
+ if (svgList !== undefined) {
1895
+ const codePoint: string = kanji.kanji
1896
+ .codePointAt(0)!
1897
+ .toString(16)
1898
+ .toLowerCase();
1899
+
1900
+ const svg: string | undefined =
1901
+ svgList instanceof Map
1902
+ ? svgList.get(kanji.kanji)
1903
+ : svgList.find((svgFile: string) =>
1904
+ [`${codePoint}.svg`, `0${codePoint}.svg`].includes(
1905
+ svgFile.toLowerCase(),
1906
+ ),
1907
+ );
1908
+
1909
+ if (svg !== undefined) kanji.svg = svg;
1910
+ }
1911
+
1912
+ if (dictKanji.isKokuji === true) {
1913
+ kanji.kokuji = true;
1914
+ kanji.tags!.push("kanji::kokuji");
1915
+ }
1916
+
1917
+ kanji.tags!.push(
1918
+ `kanji::strokes::${kanji.strokes}`,
1919
+ ...(kanji.frequency !== undefined
1920
+ ? [`kanji::frequency::${kanji.frequency}`]
1921
+ : []),
1922
+ ...(kanji.grade !== undefined ? [`kanji::grade::${kanji.grade}`] : []),
1923
+ ...(kanji.jlpt !== undefined
1924
+ ? [`kanji::pre-2010_jlpt::${kanji.jlpt.toLowerCase()}`]
1925
+ : []),
1926
+ `kanji::onyomi::${kanji.onyomi?.length ?? 0}`,
1927
+ `kanji::kunyomi::${kanji.kunyomi?.length ?? 0}`,
1928
+ `kanji::nanori::${kanji.nanori?.length ?? 0}`,
1929
+ `kanji::words::${kanji.words?.length ?? 0}`,
1930
+ ...(kanji.svg !== undefined ? ["kanji::has_svg"] : []),
1931
+ );
1932
+
1933
+ return kanji;
1934
+ } else return undefined;
1935
+ }
1936
+
1937
+ /**
1938
+ * Same as {@link getKanji}, but with possible extra info.
1939
+ * @param info Additional info for the kanji (mnemonic, components, words)
1940
+ * @param kanji The kanji character or a {@link DictKanji} object
1941
+ * @param dict An array of converted KANJIDIC entries or a {@link KanjiEntryMap} *(not needed if {@link kanji} is a {@link DictKanji} object)*
1942
+ * @param useWords Whether or not to use the words provided in the `info` object (if present) instead of other words from `JMdict`
1943
+ * @param jmDict An array of converted `JMdict` entries or a {@link KanjiWordsMap}
1944
+ * @param svgList An array of SVG file names or a {@link KanjiSVGMap}
1945
+ * @param noteTypeName The Anki note type name
1946
+ * @param deckPath The full Anki deck path
1947
+ * @param sourceURL A link leading to the source of {@link info}
1948
+ * @returns The transformed {@link DictKanji} object
1949
+ */
1950
+ export function getKanjiExtended(
1951
+ info: Kanji,
1952
+ kanji: string | DictKanji,
1953
+ dict?: readonly DictKanji[] | KanjiEntryMap,
1954
+ useWords?: true,
1955
+ jmDict?: readonly DictWord[] | KanjiWordsMap,
1956
+ svgList?: readonly string[] | KanjiSVGMap,
1957
+ noteTypeName?: string,
1958
+ deckPath?: string,
1959
+ sourceURL?: string,
1960
+ ): Kanji | undefined {
1961
+ const kanjiObj: Kanji | undefined = getKanji(
1962
+ kanji,
1963
+ dict,
1964
+ jmDict,
1965
+ svgList,
1966
+ noteTypeName,
1967
+ deckPath,
1968
+ );
1969
+
1970
+ if (kanjiObj !== undefined) {
1971
+ let usedInfo: boolean = false;
1972
+
1973
+ if (info.components !== undefined) {
1974
+ kanjiObj.components = info.components;
1975
+ kanjiObj.tags!.push(`kanji::components::${kanjiObj.components.length}`);
1976
+
1977
+ usedInfo = true;
1978
+ }
1979
+ if (info.mnemonic !== undefined && info.mnemonic.length > 0) {
1980
+ kanjiObj.mnemonic = info.mnemonic;
1981
+ kanjiObj.tags!.push("kanji::has_mnemonic");
1982
+
1983
+ usedInfo = true;
1984
+ }
1985
+ if (
1986
+ useWords === true &&
1987
+ info.words !== undefined &&
1988
+ info.words.length > 0
1989
+ ) {
1990
+ kanjiObj.words = info.words;
1991
+ kanjiObj.tags!.forEach((tag: string, index: number) => {
1992
+ if (tag.startsWith("kanji::words::") && kanjiObj.words !== undefined)
1993
+ kanjiObj.tags!.splice(
1994
+ index,
1995
+ 1,
1996
+ `kanji::words::${kanjiObj.words.length}`,
1997
+ );
1998
+ });
1999
+
2000
+ usedInfo = true;
2001
+ }
2002
+
2003
+ if (sourceURL !== undefined && info.externalInfo === true && usedInfo)
2004
+ kanjiObj.source = sourceURL;
2005
+
2006
+ return kanjiObj;
2007
+ } else return undefined;
2008
+ }
2009
+
2010
+ /**
2011
+ * Transforms a converted `JMdict` entry into a more readable format, by providing either its JMdict entry ID or the {@link DictWord} object directly.
2012
+ * @param searchedWord The ID of the `JMdict` entry (requires {@link dict}) or a {@link DictWord} object
2013
+ * @param dict An array converted `JMdict` entries or a {@link WordIDEntryMap} *(not needed if {@link searchedWord} is a {@link DictWord} object)*
2014
+ * @param kanjiDic An array of converted `KANJIDIC` entries or a {@link KanjiEntryMap}
2015
+ * @param examples An array of converted `Tanaka Corpus` examples or a {@link WordExamplesMap}
2016
+ * @param definitions An array of `ja.wiktionary.org` word-definitions pairs or a {@link WordDefinitionsMap}
2017
+ * @param noteTypeName The Anki note type name
2018
+ * @param deckPath The full Anki deck path
2019
+ * @returns The transformed {@link DictWord} object or `undefined` if entry is not found
2020
+ */
2021
+ export function getWord(
2022
+ searchedWord: StringNumber | DictWord,
2023
+ dict?: readonly DictWord[] | WordIDEntryMap,
2024
+ kanjiDic?: readonly DictKanji[] | KanjiEntryMap,
2025
+ examples?: readonly TanakaExample[] | WordExamplesMap,
2026
+ definitions?: readonly WordDefinitionPair[] | WordDefinitionsMap,
2027
+ noteTypeName?: string,
2028
+ deckPath?: string,
2029
+ ): Word | undefined {
2030
+ let dictWord: DictWord | undefined = undefined;
2031
+
2032
+ if (typeof searchedWord === "string" && dict !== undefined) {
2033
+ if (Array.isArray(dict))
2034
+ dictWord = (dict as readonly DictWord[]).find(
2035
+ (entry: DictWord) => entry.id === searchedWord,
2036
+ );
2037
+
2038
+ if (dict instanceof Map) dictWord = dict.get(searchedWord);
2039
+ }
2040
+
2041
+ if (typeof searchedWord === "object") dictWord = searchedWord;
2042
+
2043
+ if (dictWord !== undefined) {
2044
+ const word: Word = {
2045
+ id: dictWord.id,
2046
+ readings: [],
2047
+ translations: [],
2048
+ noteID: `word_${dictWord.id}`,
2049
+ noteTypeName: noteTypeName,
2050
+ deckPath: deckPath,
2051
+ tags: [],
2052
+ };
2053
+
2054
+ if (dictWord.isCommon === true) {
2055
+ word.common = true;
2056
+ word.tags!.push("word::common");
2057
+ }
2058
+
2059
+ if (dictWord.kanjiForms !== undefined)
2060
+ word.kanjiForms = dictWord.kanjiForms.map(
2061
+ (dictKanjiForm: DictKanjiForm) => ({
2062
+ kanjiForm: dictKanjiForm.form,
2063
+ ...(dictKanjiForm.notes !== undefined
2064
+ ? {
2065
+ notes: dictKanjiForm.notes.map((note: string) => {
2066
+ const noteAndTag: NoteAndTag = lookupWordNote(
2067
+ note,
2068
+ [],
2069
+ word.tags!,
2070
+ );
2071
+
2072
+ return capitalizeString(noteAndTag.note);
2073
+ }),
2074
+ }
2075
+ : {}),
2076
+ ...(dictKanjiForm.commonness !== undefined &&
2077
+ dictKanjiForm.commonness.length > 0
2078
+ ? { common: true }
2079
+ : {}),
2080
+ }),
2081
+ );
2082
+
2083
+ word.readings = dictWord.readings.map((dictReading: DictReading) => ({
2084
+ reading: dictReading.reading,
2085
+ ...(dictReading.kanjiFormRestrictions !== undefined ||
2086
+ dictReading.notes !== undefined
2087
+ ? {
2088
+ notes: [
2089
+ ...(dictReading.kanjiFormRestrictions !== undefined
2090
+ ? dictReading.kanjiFormRestrictions.map(
2091
+ (restriction: string) =>
2092
+ `Reading restricted to ${restriction}`,
2093
+ )
2094
+ : []),
2095
+ ...(dictReading.notes !== undefined
2096
+ ? dictReading.notes.map((note: string) => {
2097
+ const noteAndTag: NoteAndTag = lookupWordNote(
2098
+ note,
2099
+ [],
2100
+ word.tags!,
2101
+ );
2102
+
2103
+ return capitalizeString(noteAndTag.note);
2104
+ })
2105
+ : []),
2106
+ ],
2107
+ }
2108
+ : {}),
2109
+ ...(dictReading.commonness !== undefined &&
2110
+ dictReading.commonness.length > 0
2111
+ ? { common: true }
2112
+ : {}),
2113
+ }));
2114
+
2115
+ word.translations = [];
2116
+
2117
+ for (const dictMeaning of dictWord.meanings) {
2118
+ const translationTypes: string[] = [];
2119
+ const translations: string[] = dictMeaning.translations.map(
2120
+ (
2121
+ translation:
2122
+ | string
2123
+ | { translation: string; type: "lit" | "expl" | "tm" },
2124
+ ) => {
2125
+ if (typeof translation === "string") return translation;
2126
+ else {
2127
+ const translationNoteAndTag:
2128
+ | readonly [string, string]
2129
+ | readonly [string, string, POS | readonly POS[]] = noteMap.get(
2130
+ translation.type,
2131
+ )!;
2132
+
2133
+ translationTypes.push(translationNoteAndTag[1]);
2134
+ word.tags!.push(`word::${translationNoteAndTag[0]}`);
2135
+
2136
+ return translation.translation;
2137
+ }
2138
+ },
2139
+ );
2140
+
2141
+ const notes: string[] = [];
2142
+
2143
+ wordAddNoteArray(
2144
+ dictMeaning.kanjiFormRestrictions,
2145
+ (restriction: string) =>
2146
+ notes.push(`Meaning restricted to ${restriction}`),
2147
+ );
2148
+ wordAddNoteArray(dictMeaning.readingRestrictions, (restriction: string) =>
2149
+ notes.push(`Meaning restricted to ${restriction}`),
2150
+ );
2151
+ for (const t of translationTypes) notes.push(t);
2152
+ wordAddNoteArray(dictMeaning.partOfSpeech, (pos: string) =>
2153
+ lookupWordNote(pos, notes, word.tags!),
2154
+ );
2155
+ wordAddNoteArray(dictMeaning.fields, (field: string) =>
2156
+ lookupWordNote(field, notes, word.tags!),
2157
+ );
2158
+ wordAddNoteArray(dictMeaning.dialects, (dialect: string) =>
2159
+ lookupWordNote(dialect, notes, word.tags!),
2160
+ );
2161
+ wordAddNoteArray(dictMeaning.antonyms, (antonym: string) =>
2162
+ notes.push(`Antonym: ${antonym}`),
2163
+ );
2164
+ wordAddNoteArray(dictMeaning.references, (reference: string) =>
2165
+ notes.push(`Related: ${reference}`),
2166
+ );
2167
+ wordAddNoteArray(dictMeaning.info, (info: string) =>
2168
+ lookupWordNote(info, notes, word.tags!),
2169
+ );
2170
+ wordAddNoteArray(dictMeaning.misc, (misc: string) =>
2171
+ lookupWordNote(misc, notes, word.tags!),
2172
+ );
2173
+
2174
+ for (let i: number = 0; i < notes.length; i++)
2175
+ notes[i] = capitalizeString(notes[i]!);
2176
+
2177
+ word.translations.push({
2178
+ translation: translations.join("; "),
2179
+ notes: notes,
2180
+ });
2181
+ }
2182
+
2183
+ if (dictWord.usuallyInKana === true) {
2184
+ word.usuallyInKana = true;
2185
+ word.tags!.push("word::usually_in_kana_for_all_senses");
2186
+ }
2187
+
2188
+ if (kanjiDic !== undefined && word.kanjiForms !== undefined) {
2189
+ const kanji: Kanji[] = [];
2190
+ const seenChars: Set<string> = new Set<string>();
2191
+
2192
+ for (const kanjiForm of word.kanjiForms)
2193
+ for (const char of kanjiForm.kanjiForm
2194
+ .split("")
2195
+ .filter((c: string) => regexps.kanji.test(c))) {
2196
+ if (seenChars.has(char)) continue;
2197
+ seenChars.add(char);
2198
+
2199
+ const kanjiEntry: DictKanji | undefined =
2200
+ kanjiDic instanceof Map ? kanjiDic.get(char) : undefined;
2201
+
2202
+ const kanjiObj: Kanji | undefined = getKanji(
2203
+ kanjiEntry ?? char,
2204
+ !(kanjiDic instanceof Map) ? kanjiDic : undefined,
2205
+ );
2206
+
2207
+ if (kanjiObj !== undefined)
2208
+ kanji.push({
2209
+ kanji: kanjiObj.kanji,
2210
+ ...(kanjiObj.meanings !== undefined &&
2211
+ kanjiObj.meanings.length > 0
2212
+ ? { meanings: kanjiObj.meanings }
2213
+ : {}),
2214
+ });
2215
+ }
2216
+
2217
+ if (kanji.length > 0) word.kanji = kanji;
2218
+ }
2219
+
2220
+ if (dictWord.hasPhrases === true && examples !== undefined) {
2221
+ const exampleList: readonly TanakaExample[] =
2222
+ examples instanceof Map ? (examples.get(dictWord.id) ?? []) : examples;
2223
+
2224
+ const rkf: ReadingsKanjiFormsPair = getValidForms(
2225
+ dictWord.readings,
2226
+ dictWord.kanjiForms,
2227
+ dictWord.isCommon,
2228
+ );
2229
+
2230
+ const readings: Set<string> = new Set<string>(
2231
+ rkf.readings.map((r: DictReading) => r.reading),
2232
+ );
2233
+ const kanjiForms: Set<string> | undefined =
2234
+ rkf.kanjiForms !== undefined
2235
+ ? new Set<string>(rkf.kanjiForms.map((kf: DictKanjiForm) => kf.form))
2236
+ : undefined;
2237
+
2238
+ let kanjiFormExamples: {
2239
+ ex: TanakaExample;
2240
+ partIndex: number;
2241
+ form?: string | undefined;
2242
+ }[] = [];
2243
+ const readingMatchingKanjiFormExamples: {
2244
+ ex: TanakaExample;
2245
+ partIndex: number;
2246
+ }[] = [];
2247
+ const readingExamples: { ex: TanakaExample; partIndex: number }[] = [];
2248
+ const readingMatchingKanjiForms: Set<string> = new Set<string>();
2249
+
2250
+ for (const example of exampleList)
2251
+ for (let i: number = 0; i < example.parts.length; i++) {
2252
+ const part: ExamplePart = example.parts[i]!;
2253
+
2254
+ const readingAsReadingMatch: boolean =
2255
+ part.reading !== undefined && readings.has(part.reading);
2256
+ const readingAsInflectedFormMatch: boolean =
2257
+ part.inflectedForm !== undefined &&
2258
+ readings.has(part.inflectedForm);
2259
+
2260
+ const referenceIDMatch: boolean = part.referenceID === dictWord.id;
2261
+
2262
+ if (
2263
+ (kanjiForms !== undefined && kanjiForms.has(part.baseForm)) ||
2264
+ referenceIDMatch
2265
+ ) {
2266
+ if (readingAsReadingMatch || readingAsInflectedFormMatch) {
2267
+ readingMatchingKanjiFormExamples.push({
2268
+ ex: example,
2269
+ partIndex: i,
2270
+ });
2271
+
2272
+ readingMatchingKanjiForms.add(part.baseForm);
2273
+ } else
2274
+ kanjiFormExamples.push({
2275
+ ex: example,
2276
+ partIndex: i,
2277
+ form: part.baseForm,
2278
+ });
2279
+
2280
+ break;
2281
+ }
2282
+
2283
+ const readingAsBaseFormMatch: boolean = readings.has(part.baseForm);
2284
+
2285
+ if (readingAsBaseFormMatch && kanjiForms === undefined) {
2286
+ readingExamples.push({ ex: example, partIndex: i });
2287
+
2288
+ break;
2289
+ }
2290
+ }
2291
+
2292
+ if (readingMatchingKanjiForms.size > 0)
2293
+ kanjiFormExamples = kanjiFormExamples.filter(
2294
+ (ex: {
2295
+ ex: TanakaExample;
2296
+ partIndex: number;
2297
+ form?: string | undefined;
2298
+ }) => ex.form !== undefined && readingMatchingKanjiForms.has(ex.form),
2299
+ );
2300
+
2301
+ const includeKanjiFormExamples: boolean = word.kanjiForms !== undefined;
2302
+
2303
+ let wordExamples: { ex: TanakaExample; partIndex: number }[] = [
2304
+ ...(includeKanjiFormExamples
2305
+ ? [...readingMatchingKanjiFormExamples, ...kanjiFormExamples]
2306
+ : readingExamples),
2307
+ ];
2308
+
2309
+ wordExamples.sort(
2310
+ (
2311
+ a: { ex: TanakaExample; partIndex: number },
2312
+ b: { ex: TanakaExample; partIndex: number },
2313
+ ) => a.ex.phrase.length - b.ex.phrase.length,
2314
+ );
2315
+
2316
+ readingMatchingKanjiForms.clear();
2317
+
2318
+ const glossSpecificExamples: {
2319
+ ex: TanakaExample;
2320
+ partIndex: number;
2321
+ }[] = [];
2322
+ const seenPhrases: Set<string> = new Set<string>();
2323
+
2324
+ for (let i: number = 0; i < word.translations.length; i++)
2325
+ outer: for (const example of wordExamples) {
2326
+ if (seenPhrases.has(example.ex.phrase)) continue;
2327
+
2328
+ for (let j: number = 0; j < example.ex.parts.length; j++) {
2329
+ const part: ExamplePart = example.ex.parts[j]!;
2330
+
2331
+ if (j === example.partIndex && part.glossNumber === i + 1) {
2332
+ example.ex.glossNumber = {
2333
+ wordId: word.id!,
2334
+ glossNumber: i + 1,
2335
+ };
2336
+
2337
+ glossSpecificExamples.push(example);
2338
+ seenPhrases.add(example.ex.phrase);
2339
+
2340
+ break outer;
2341
+ }
2342
+ }
2343
+ }
2344
+
2345
+ if (glossSpecificExamples.length > 0) {
2346
+ if (glossSpecificExamples.length < 5) {
2347
+ wordExamples = wordExamples.filter(
2348
+ (ex: { ex: TanakaExample; partIndex: number }) =>
2349
+ !seenPhrases.has(ex.ex.phrase),
2350
+ );
2351
+
2352
+ if (wordExamples.length > 0)
2353
+ for (const ex of wordExamples) {
2354
+ glossSpecificExamples.push(ex);
2355
+
2356
+ if (glossSpecificExamples.length === 5) break;
2357
+ }
2358
+ }
2359
+
2360
+ wordExamples = glossSpecificExamples;
2361
+ }
2362
+
2363
+ if (wordExamples.length > 0) {
2364
+ word.phrases = (
2365
+ glossSpecificExamples.length === 0
2366
+ ? wordExamples.slice(0, 5)
2367
+ : wordExamples
2368
+ ).map((ex: { ex: TanakaExample; partIndex: number }) => ({
2369
+ phrase: ex.ex.furigana ?? ex.ex.phrase,
2370
+ translation: ex.ex.translation,
2371
+ originalPhrase: ex.ex.phrase,
2372
+ ...(ex.ex.glossNumber !== undefined
2373
+ ? { glossNumber: ex.ex.glossNumber }
2374
+ : {}),
2375
+ }));
2376
+
2377
+ word.tags!.push("word::has_phrases");
2378
+ if (glossSpecificExamples.length > 0)
2379
+ word.tags!.push("word::has_meaning-specific_phrases");
2380
+ }
2381
+ }
2382
+
2383
+ if (definitions !== undefined) {
2384
+ const defs: readonly Definition[] | undefined =
2385
+ definitions instanceof Map
2386
+ ? definitions.get(word.id!)
2387
+ : definitions.find(
2388
+ (wdp: WordDefinitionPair) => wdp.wordID === word.id!,
2389
+ )?.definitions;
2390
+
2391
+ if (defs !== undefined)
2392
+ word.definitions = [
2393
+ ...defs.toSorted(
2394
+ (a: Definition, b: Definition) =>
2395
+ (a.mayNotBeAccurate ?? 0) - (b.mayNotBeAccurate ?? 0),
2396
+ ),
2397
+ ];
2398
+ }
2399
+
2400
+ return word;
2401
+ } else return undefined;
2402
+ }
2403
+
2404
+ export function isWord(entry: Result): entry is Word {
2405
+ return (
2406
+ isObjectArray(Object.getOwnPropertyDescriptor(entry, "readings")?.value) &&
2407
+ isObjectArray(Object.getOwnPropertyDescriptor(entry, "translations")?.value)
2408
+ );
2409
+ }
2410
+
2411
+ export function isRadical(entry: Result): entry is Radical {
2412
+ return (
2413
+ typeof Object.getOwnPropertyDescriptor(entry, "radical")?.value === "string"
2414
+ );
2415
+ }
2416
+
2417
+ export function isKanji(entry: Result): entry is Kanji {
2418
+ return (
2419
+ !Object.hasOwn(entry, "translations") &&
2420
+ !Object.hasOwn(entry, "readings") &&
2421
+ !Object.hasOwn(entry, "radical") &&
2422
+ typeof Object.getOwnPropertyDescriptor(entry, "kanji")?.value === "string"
2423
+ );
2424
+ }
2425
+
2426
+ export function isKana(entry: Result): entry is Kana {
2427
+ return (
2428
+ typeof Object.getOwnPropertyDescriptor(entry, "kana")?.value === "string"
2429
+ );
2430
+ }
2431
+
2432
+ export function isGrammar(entry: Result): entry is Grammar {
2433
+ return (
2434
+ typeof Object.getOwnPropertyDescriptor(entry, "point")?.value === "string"
2435
+ );
2436
+ }
2437
+
2438
+ const createNotes: (notes: string[], phrase?: true) => string = (
2439
+ notes: string[],
2440
+ phrase?: true,
2441
+ ) =>
2442
+ `${phrase === true ? "<details><summary>Show translation</summary>" : ""}<ul class="note-list">${notes.map((note: string) => `<li class="note">${note}</li>`).join("")}</ul>${phrase === true ? "</details>" : ""}`;
2443
+ const createEntry: (
2444
+ entry: string,
2445
+ notes?: string[],
2446
+ phrase?: true,
2447
+ glossSpecific?: true,
2448
+ ) => string = (
2449
+ entry: string,
2450
+ notes?: string[],
2451
+ phrase?: true,
2452
+ glossSpecific?: true,
2453
+ ) =>
2454
+ `<div class="entry${glossSpecific === true ? " gloss-specific" : ""}">${entry}${notes !== undefined && notes.length > 0 ? createNotes(notes, phrase) : ""}</div>`;
2455
+
2456
+ /**
2457
+ * Generates an array where each field holds an entry’s info wrapped in HTML tags.
2458
+ * @param entry Any type of mapped entry ({@link Word}, {@link Kanji}, {@link Radical}, {@link Kana}, {@link Grammar})
2459
+ * @returns An array of fields, each corresponding to an Anki note type field
2460
+ */
2461
+ export function generateAnkiNote(entry: Result): string[] {
2462
+ const fields: string[] = [];
2463
+
2464
+ if (isWord(entry)) {
2465
+ const firstReading: string = createEntry(
2466
+ `<span class="word word-reading">${entry.readings[0]!.reading}${entry.readings[0]!.audio !== undefined ? `<br>[sound:${entry.readings[0]!.audio}]` : ""}</span>`,
2467
+ entry.readings[0]!.notes,
2468
+ );
2469
+ const otherReadings: string =
2470
+ entry.readings.length > 1
2471
+ ? `<details><summary>Show other readings</summary>${entry.readings
2472
+ .slice(1)
2473
+ .map((readingEntry: Reading) =>
2474
+ createEntry(
2475
+ `<span class="word word-reading">${readingEntry.reading}${readingEntry.audio !== undefined ? `<br>[sound:${readingEntry.audio}]` : ""}</span>`,
2476
+ readingEntry.notes,
2477
+ ),
2478
+ )
2479
+ .join("")}</details>`
2480
+ : "";
2481
+ const readingsField: string = `${firstReading}${otherReadings}`;
2482
+
2483
+ const firstKanjiForm: string | undefined =
2484
+ entry.kanjiForms !== undefined
2485
+ ? createEntry(
2486
+ `<span class="word word-kanjiform"><ruby><rb>${entry.kanjiForms[0]!.kanjiForm}</rb><rt>${entry.readings[0]!.reading}</rt></ruby></span>`,
2487
+ entry.kanjiForms[0]!.notes,
2488
+ )
2489
+ : undefined;
2490
+ const otherKanjiForms: string =
2491
+ entry.kanjiForms !== undefined && entry.kanjiForms.length > 1
2492
+ ? `<details><summary>Show other kanji forms</summary>${entry.kanjiForms
2493
+ .slice(1)
2494
+ .map((kanjiFormEntry: KanjiForm) => {
2495
+ const restrictedReading: Reading | undefined =
2496
+ entry.readings.find(
2497
+ (r: Reading) =>
2498
+ r.notes !== undefined &&
2499
+ r.notes.includes(
2500
+ `Reading restricted to ${kanjiFormEntry.kanjiForm}`,
2501
+ ),
2502
+ );
2503
+
2504
+ return createEntry(
2505
+ `<span class="word word-kanjiform">${restrictedReading !== undefined ? "<ruby><rb>" : ""}${kanjiFormEntry.kanjiForm}${restrictedReading !== undefined ? `</rb><rt>${restrictedReading.reading}</rt></ruby>` : ""}</span>`,
2506
+ kanjiFormEntry.notes,
2507
+ );
2508
+ })
2509
+ .join("")}</details>`
2510
+ : "";
2511
+
2512
+ const kanjiFormsField: string =
2513
+ firstKanjiForm !== undefined
2514
+ ? `${firstKanjiForm}${otherKanjiForms}`
2515
+ : '<span class="word word-kanjiform">(no kanji forms)</span>';
2516
+
2517
+ const firstThreeTranslations: string = entry.translations
2518
+ .slice(0, 3)
2519
+ .map((translationEntry: Translation, index: number) =>
2520
+ createEntry(
2521
+ `<span class="word word-translation">${translationEntry.translation}</span>`,
2522
+ translationEntry.notes,
2523
+ undefined,
2524
+ entry.phrases !== undefined
2525
+ ? entry.phrases.some(
2526
+ (phrase: Phrase, index2: number) =>
2527
+ index === index2 &&
2528
+ phrase.glossNumber !== undefined &&
2529
+ phrase.glossNumber.wordId === entry.id &&
2530
+ phrase.glossNumber.glossNumber === index + 1,
2531
+ )
2532
+ ? true
2533
+ : undefined
2534
+ : undefined,
2535
+ ),
2536
+ )
2537
+ .join("");
2538
+
2539
+ const otherTranslations: string =
2540
+ entry.translations.length > 3
2541
+ ? `<details><summary>Show other translations</summary>${entry.translations
2542
+ .map((translationEntry: Translation, index: number) => {
2543
+ if (index < 3) return "null";
2544
+
2545
+ return createEntry(
2546
+ `<span class="word word-translation">${translationEntry.translation}</span>`,
2547
+ translationEntry.notes,
2548
+ undefined,
2549
+ entry.phrases !== undefined
2550
+ ? entry.phrases.some(
2551
+ (phrase: Phrase, index2: number) =>
2552
+ index === index2 &&
2553
+ phrase.glossNumber !== undefined &&
2554
+ phrase.glossNumber.wordId === entry.id &&
2555
+ phrase.glossNumber.glossNumber === index + 1,
2556
+ )
2557
+ ? true
2558
+ : undefined
2559
+ : undefined,
2560
+ );
2561
+ })
2562
+ .filter((translation: string) => translation !== "null")
2563
+ .join("")}</details>`
2564
+ : "";
2565
+
2566
+ const translationsField: string = `${firstThreeTranslations}${otherTranslations}`;
2567
+
2568
+ const firstFivePhrases: string | undefined = entry.phrases
2569
+ ?.slice(0, 5)
2570
+ .map((phraseEntry: Phrase, index: number) =>
2571
+ createEntry(
2572
+ `<span class="word word-phrase"><span class="word word-phrase-original">${phraseEntry.originalPhrase}</span><span class="word word-phrase-furigana">${phraseEntry.phrase}</span></span>`,
2573
+ [phraseEntry.translation],
2574
+ true,
2575
+ entry.translations.some(
2576
+ (_translation: Translation, index2: number) =>
2577
+ index === index2 &&
2578
+ phraseEntry.glossNumber !== undefined &&
2579
+ phraseEntry.glossNumber.wordId === entry.id &&
2580
+ phraseEntry.glossNumber.glossNumber === index2 + 1,
2581
+ )
2582
+ ? true
2583
+ : undefined,
2584
+ ),
2585
+ )
2586
+ .join("");
2587
+
2588
+ const otherPhrases: string =
2589
+ entry.phrases !== undefined && entry.phrases.length > 5
2590
+ ? `<details><summary>Show other phrases</summary>${entry.phrases
2591
+ .map((phraseEntry: Phrase, index: number) => {
2592
+ if (index < 5) return "null";
2593
+
2594
+ return createEntry(
2595
+ `<span class="word word-phrase"><span class="word word-phrase-original">${phraseEntry.originalPhrase}</span><span class="word word-phrase-furigana">${phraseEntry.phrase}</span></span>`,
2596
+ [phraseEntry.translation],
2597
+ true,
2598
+ entry.translations.some(
2599
+ (_translation: Translation, index2: number) =>
2600
+ index === index2 &&
2601
+ phraseEntry.glossNumber !== undefined &&
2602
+ phraseEntry.glossNumber.wordId === entry.id &&
2603
+ phraseEntry.glossNumber.glossNumber === index2 + 1,
2604
+ )
2605
+ ? true
2606
+ : undefined,
2607
+ );
2608
+ })
2609
+ .filter((phrase: string) => phrase !== "null")
2610
+ .join("")}</details>`
2611
+ : "";
2612
+
2613
+ const phrasesField: string =
2614
+ firstFivePhrases !== undefined
2615
+ ? `${firstFivePhrases}${otherPhrases}`
2616
+ : '<span class="word word-phrase">(no phrases) (Search on dictionaries!)</span>';
2617
+
2618
+ const firstThreeDefinitions: string | undefined = entry.definitions
2619
+ ?.slice(0, 3)
2620
+ .map((definitionEntry: Definition) =>
2621
+ createEntry(
2622
+ `<span class="word word-definition${definitionEntry.mayNotBeAccurate === 2 ? " mnba2" : definitionEntry.mayNotBeAccurate === 1 ? " mnba1" : ""}"><span class="word word-definition-original">${definitionEntry.definition}</span><span class="word word-definition-furigana">${definitionEntry.furigana ?? definitionEntry.definition}</span></span>`,
2623
+ ),
2624
+ )
2625
+ .join("");
2626
+
2627
+ const otherDefinitions: string =
2628
+ entry.definitions !== undefined && entry.definitions.length > 3
2629
+ ? `<details><summary>Show other definitions</summary>${entry.definitions
2630
+ .map((definitionEntry: Definition, index: number) =>
2631
+ index > 2
2632
+ ? createEntry(
2633
+ `<span class="word word-definition${definitionEntry.mayNotBeAccurate === 2 ? " mnba2" : definitionEntry.mayNotBeAccurate === 1 ? " mnba1" : ""}"><span class="word word-definition-original">${definitionEntry.definition}</span><span class="word word-definition-furigana">${definitionEntry.furigana ?? definitionEntry.definition}</span></span>`,
2634
+ )
2635
+ : "null",
2636
+ )
2637
+ .filter((definition: string) => definition !== "null")
2638
+ .join("")}</details>`
2639
+ : "";
2640
+
2641
+ const definitionsField: string =
2642
+ firstThreeDefinitions !== undefined
2643
+ ? `${firstThreeDefinitions}${otherDefinitions}`
2644
+ : '<span class="word word-definition">(no definitions)</span>';
2645
+
2646
+ fields.push(
2647
+ ...(entry.kanjiForms !== undefined && !entry.usuallyInKana
2648
+ ? [kanjiFormsField, readingsField]
2649
+ : [readingsField, kanjiFormsField]),
2650
+ translationsField,
2651
+ phrasesField,
2652
+ definitionsField,
2653
+ entry.kanji !== undefined
2654
+ ? entry.kanji
2655
+ .map((kanjiEntry: Kanji) =>
2656
+ createEntry(
2657
+ `<span class="word word-kanji">${kanjiEntry.kanji}${kanjiEntry.meanings === undefined ? " (no meanings)" : ""}</span>`,
2658
+ kanjiEntry.meanings,
2659
+ ),
2660
+ )
2661
+ .join("")
2662
+ : '<span class="word word-kanji">(no kanji)</span>',
2663
+ ...(entry.tags !== undefined && entry.tags.length > 0
2664
+ ? [
2665
+ entry.tags
2666
+ .map((tag: string) =>
2667
+ tag.trim().toLowerCase().replaceAll(" ", "::"),
2668
+ )
2669
+ .join(" "),
2670
+ ]
2671
+ : []),
2672
+ );
2673
+ }
2674
+
2675
+ if (isRadical(entry))
2676
+ fields.push(
2677
+ createEntry(
2678
+ `<span class="radical radical-character">${entry.radical}</span>`,
2679
+ ),
2680
+ createEntry(
2681
+ `<span class="radical radical-reading">${entry.reading}</span>`,
2682
+ ),
2683
+ entry.meanings
2684
+ .map((meaningEntry: string) =>
2685
+ createEntry(
2686
+ `<span class="radical radical-meaning">${meaningEntry}</span>`,
2687
+ ),
2688
+ )
2689
+ .join(""),
2690
+ entry.mnemonic !== undefined
2691
+ ? createEntry(
2692
+ `<span class="radical radical-mnemonic">${entry.mnemonic}</span>`,
2693
+ )
2694
+ : '<span class="radical radical-mnemonic">(no mnemonic) (Come up with your own!)</span>',
2695
+ entry.kanji !== undefined
2696
+ ? entry.kanji
2697
+ .map((kanji: Kanji) =>
2698
+ createEntry(
2699
+ `<span class="radical radical-kanji">${kanji.kanji}${kanji.meanings !== undefined && kanji.meanings.length > 0 ? ` - ${kanji.meanings[0]}` : ""}</span>`,
2700
+ ),
2701
+ )
2702
+ .join("")
2703
+ : '<span class="radical radical-kanji">(no "used-in" kanji)</span>',
2704
+ entry.strokes !== undefined
2705
+ ? createEntry(
2706
+ `<span class="radical radical-strokes">${entry.strokes}<br>${entry.svg !== undefined ? `<img class="radical radical-stroke-order" src="${entry.svg}" alt="${entry.radical} stroke order SVG">` : "(no stroke order SVG available)"}</span>`,
2707
+ )
2708
+ : '<span class="radical radical-strokes">(no stroke number)</span>',
2709
+ entry.sources !== undefined
2710
+ ? `<span class="radical radical-source">${entry.sources.map((source: string, index: number) => `<a href="${source}" target="_blank">Source ${index + 1}</a>`).join("<br>")}</span>`
2711
+ : '<span class="radical radical-source">(no sources)</span>',
2712
+ ...(entry.tags !== undefined && entry.tags.length > 0
2713
+ ? [
2714
+ entry.tags
2715
+ .map((tag: string) =>
2716
+ tag.trim().toLowerCase().replaceAll(" ", "::"),
2717
+ )
2718
+ .join(" "),
2719
+ ]
2720
+ : []),
2721
+ );
2722
+
2723
+ if (isKanji(entry))
2724
+ fields.push(
2725
+ createEntry(`<span class="kanji kanji-character">${entry.kanji}</span>`),
2726
+ entry.meanings !== undefined
2727
+ ? entry.meanings
2728
+ .map((meaningEntry: string) =>
2729
+ createEntry(
2730
+ `<span class="kanji kanji-meaning">${meaningEntry}</span>`,
2731
+ ),
2732
+ )
2733
+ .join("")
2734
+ : '<span class="kanji kanji-meaning">(no meanings)</span>',
2735
+ entry.onyomi !== undefined
2736
+ ? entry.onyomi
2737
+ .map((onyomiEntry: string) =>
2738
+ createEntry(
2739
+ `<span class="kanji kanji-onyomi">${onyomiEntry}</span>`,
2740
+ ),
2741
+ )
2742
+ .join("")
2743
+ : `<span class="kanji kanji-onyomi">(no onyomi) ${entry.kokuji === true ? "(kokuji)" : ""}</span>`,
2744
+ entry.kunyomi !== undefined
2745
+ ? entry.kunyomi
2746
+ .map((kunyomiEntry: string) =>
2747
+ createEntry(
2748
+ `<span class="kanji kanji-kunyomi">${kunyomiEntry}</span>`,
2749
+ ),
2750
+ )
2751
+ .join("")
2752
+ : `<span class="kanji kanji-kunyomi">(no kunyomi) ${entry.kokuji === true ? "(kokuji)" : ""}</span>`,
2753
+ entry.nanori !== undefined
2754
+ ? entry.nanori
2755
+ .map((nanoriEntry: string) =>
2756
+ createEntry(
2757
+ `<span class="kanji kanji-nanori">${nanoriEntry}</span>`,
2758
+ ),
2759
+ )
2760
+ .join("")
2761
+ : '<span class="kanji kanji-nanori">(no nanori)</span>',
2762
+ entry.components !== undefined
2763
+ ? entry.components
2764
+ .map((componentEntry: KanjiComponent) =>
2765
+ createEntry(
2766
+ `<span class="kanji kanji-component">${componentEntry.component}${componentEntry.meaning !== undefined ? ` - ${componentEntry.meaning}` : ""}</span>`,
2767
+ ),
2768
+ )
2769
+ .join("")
2770
+ : '<span class="kanji kanji-component">(no components)</span>',
2771
+ entry.mnemonic !== undefined
2772
+ ? createEntry(
2773
+ `<span class="kanji kanji-mnemonic">${entry.mnemonic}</span>`,
2774
+ )
2775
+ : '<span class="kanji kanji-mnemonic">(no mnemonic) (Come up with your own!)</span>',
2776
+ entry.words !== undefined
2777
+ ? entry.words
2778
+ .map((word: Word) =>
2779
+ createEntry(
2780
+ `<span class="kanji kanji-words">${word.kanjiForms!.find((kf: KanjiForm) => kf.kanjiForm.includes(entry.kanji))!.kanjiForm} / ${word.readings[0]!.reading} - ${word.translations[0]!.translation}</span>`,
2781
+ ),
2782
+ )
2783
+ .join("")
2784
+ : '<span class="kanji kanji-words">(no words) (Search on dictionaries!)</span>',
2785
+ entry.strokes !== undefined
2786
+ ? createEntry(
2787
+ `<span class="kanji kanji-strokes">${entry.strokes}<br>${entry.svg !== undefined ? `<img class="kanji kanji-stroke-order" src="${entry.svg}" alt="${entry.kanji} stroke order SVG">` : "(no stroke order SVG available)"}</span>`,
2788
+ )
2789
+ : '<span class="kanji kanji-strokes">(no stroke number)</span>',
2790
+ entry.source !== undefined
2791
+ ? `<span class="kanji kanji-source"><a href="${entry.source}" target="_blank">Source</a></span>`
2792
+ : '<span class="kanji kanji-source">(no components/mnemonic source)</span>',
2793
+ ...(entry.tags !== undefined && entry.tags.length > 0
2794
+ ? [
2795
+ entry.tags
2796
+ .map((tag: string) =>
2797
+ tag.trim().toLowerCase().replaceAll(" ", "::"),
2798
+ )
2799
+ .join(" "),
2800
+ ]
2801
+ : []),
2802
+ );
2803
+
2804
+ if (isKana(entry))
2805
+ fields.push(
2806
+ createEntry(`<span class="kana kana-character">${entry.kana}</span>`),
2807
+ createEntry(
2808
+ `<span class="kana kana-reading">${entry.reading}${entry.audio !== undefined ? `<br>[sound:${entry.audio}]` : ""}</span>`,
2809
+ ),
2810
+ entry.svg !== undefined
2811
+ ? createEntry(
2812
+ `<img class="kana kana-stroke-order" src="${entry.svg}" alt="${entry.kana} stroke order SVG">`,
2813
+ )
2814
+ : "(no stroke order SVG available)",
2815
+ ...(entry.tags !== undefined && entry.tags.length > 0
2816
+ ? [
2817
+ entry.tags
2818
+ .map((tag: string) =>
2819
+ tag.trim().toLowerCase().replaceAll(" ", "::"),
2820
+ )
2821
+ .join(" "),
2822
+ ]
2823
+ : []),
2824
+ );
2825
+
2826
+ if (isGrammar(entry))
2827
+ fields.push(
2828
+ createEntry(`<span class="grammar grammar-point">${entry.point}</span>`),
2829
+ entry.readings !== undefined
2830
+ ? entry.readings
2831
+ .map((readingEntry: Reading) =>
2832
+ createEntry(
2833
+ `<span class="grammar grammar-reading">${readingEntry.reading}</span>`,
2834
+ ),
2835
+ )
2836
+ .join("")
2837
+ : '<span class="grammar grammar-reading">(no additional readings)</span>',
2838
+ createEntry(
2839
+ `<span class="grammar grammar-meaning">${entry.meaning.meaning}${entry.meaning.example !== undefined && entry.meaning.example.length > 0 ? `<br><span class="grammar grammar-meaning-example">${entry.meaning.example}</span>` : ""}</span>`,
2840
+ ),
2841
+ entry.usages !== undefined
2842
+ ? entry.usages
2843
+ .map((usage: string) =>
2844
+ createEntry(
2845
+ `<span class="grammar grammar-usage">${usage}</span>`,
2846
+ ),
2847
+ )
2848
+ .join("")
2849
+ : '<span class="grammar grammar-usage">(no usages)</span>',
2850
+ entry.phrases !== undefined
2851
+ ? entry.phrases
2852
+ .map((phraseEntry: Phrase) =>
2853
+ createEntry(
2854
+ `<span class="grammar grammar-phrase"><span class="grammar grammar-phrase-original">${phraseEntry.originalPhrase}</span><span class="grammar grammar-phrase-furigana">${phraseEntry.phrase}</span></span>`,
2855
+ [phraseEntry.translation],
2856
+ true,
2857
+ ),
2858
+ )
2859
+ .join("")
2860
+ : '<span class="grammar grammar-phrase">(no phrases) (Search on dictionaries!)</span>',
2861
+ entry.source !== undefined
2862
+ ? `<span class="grammar grammar-source"><a href="${entry.source}" target="_blank">Source</a></span>`
2863
+ : '<span class="grammar grammar-source">(no source)</span>',
2864
+ ...(entry.tags !== undefined && entry.tags.length > 0
2865
+ ? [
2866
+ entry.tags
2867
+ .map((tag: string) =>
2868
+ tag.trim().toLowerCase().replaceAll(" ", "::"),
2869
+ )
2870
+ .join(" "),
2871
+ ]
2872
+ : []),
2873
+ );
2874
+
2875
+ return fields.map((field: string) => field.replaceAll("\n", "<br>"));
2876
+ }
2877
+
2878
+ /**
2879
+ * Generates an Anki notes file with each entry’s info organized into fields, either in HTML or plain text.
2880
+ * @param list An array containing any type of transformed entries ({@link Word}, {@link Kanji}, {@link Radical}, {@link Kana}, {@link Grammar})
2881
+ * @param defaultNoteInfo An object with options regarding default values of some note information
2882
+ * @returns The resulting Anki notes file's content
2883
+ */
2884
+ export function generateAnkiNotesFile(
2885
+ list: readonly Result[],
2886
+ defaultNoteInfo?: DefaultNoteInfo,
2887
+ ): string {
2888
+ const headers: string[] = [noteHeaderKeys.separator, noteHeaderKeys.html];
2889
+ let ankiNotes: string = "";
2890
+
2891
+ if (list.length > 0) {
2892
+ let noteInfo: DefaultNoteInfo = {};
2893
+
2894
+ if (defaultNoteInfo !== undefined) noteInfo = { ...defaultNoteInfo };
2895
+
2896
+ const infoValues: any[] = Object.values(noteInfo);
2897
+
2898
+ let invalidList: boolean = false;
2899
+
2900
+ const firstEntry: Result = list[0]!;
2901
+ const firstEntryInfo: {
2902
+ readonly guid:
2903
+ | "string"
2904
+ | "number"
2905
+ | "bigint"
2906
+ | "boolean"
2907
+ | "symbol"
2908
+ | "undefined"
2909
+ | "object"
2910
+ | "function";
2911
+ readonly noteType:
2912
+ | "string"
2913
+ | "number"
2914
+ | "bigint"
2915
+ | "boolean"
2916
+ | "symbol"
2917
+ | "undefined"
2918
+ | "object"
2919
+ | "function";
2920
+ readonly deckPath:
2921
+ | "string"
2922
+ | "number"
2923
+ | "bigint"
2924
+ | "boolean"
2925
+ | "symbol"
2926
+ | "undefined"
2927
+ | "object"
2928
+ | "function";
2929
+ } = {
2930
+ guid: typeof firstEntry.noteID,
2931
+ noteType: typeof firstEntry.noteTypeName,
2932
+ deckPath: typeof firstEntry.deckPath,
2933
+ };
2934
+
2935
+ if (
2936
+ infoValues.length === 0 ||
2937
+ infoValues.some((value: any) => value === true || value === undefined)
2938
+ )
2939
+ for (const res of list)
2940
+ if (
2941
+ (noteInfo.guid === true && res.noteID === undefined) ||
2942
+ (noteInfo.noteType === true && res.noteTypeName === undefined) ||
2943
+ (noteInfo.deckPath === true && res.deckPath === undefined) ||
2944
+ (noteInfo.guid === undefined &&
2945
+ typeof res.noteID !== firstEntryInfo.guid) ||
2946
+ (noteInfo.noteType === undefined &&
2947
+ typeof res.noteTypeName !== firstEntryInfo.noteType) ||
2948
+ (noteInfo.deckPath === undefined &&
2949
+ typeof res.deckPath !== firstEntryInfo.deckPath)
2950
+ ) {
2951
+ invalidList = true;
2952
+ break;
2953
+ }
2954
+
2955
+ if (invalidList) throw new Error("Invalid result list");
2956
+
2957
+ const hasHeader: {
2958
+ guid: boolean;
2959
+ noteType: boolean;
2960
+ deckPath: boolean;
2961
+ tags: boolean;
2962
+ } = { guid: false, noteType: false, deckPath: false, tags: false };
2963
+ let headerCount: 0 | 1 | 2 | 3 = 0;
2964
+
2965
+ ankiNotes = list
2966
+ .filter((result: Result) => result.doNotCreateNote === undefined)
2967
+ .map((result: Result) => {
2968
+ if (typeof noteInfo.guid === "string" && result.noteID === undefined) {
2969
+ if (isWord(result) && result.id !== undefined)
2970
+ result.noteID = result.id;
2971
+
2972
+ if (isKanji(result)) result.noteID = result.kanji;
2973
+ if (isRadical(result)) result.noteID = result.radical;
2974
+ if (isKana(result)) result.noteID = result.kana;
2975
+ if (isGrammar(result) && result.id !== undefined)
2976
+ result.noteID = result.id;
2977
+ }
2978
+
2979
+ if (
2980
+ typeof noteInfo.noteType === "string" &&
2981
+ result.noteTypeName === undefined
2982
+ )
2983
+ result.noteTypeName = noteInfo.noteType;
2984
+
2985
+ if (
2986
+ typeof noteInfo.deckPath === "string" &&
2987
+ result.deckPath === undefined
2988
+ )
2989
+ result.deckPath = noteInfo.deckPath;
2990
+
2991
+ if (!hasHeader.guid && result.noteID !== undefined) {
2992
+ headers.push(`${noteHeaderKeys.guid}${++headerCount}`);
2993
+ hasHeader.guid = true;
2994
+ }
2995
+ if (!hasHeader.noteType && result.noteTypeName !== undefined) {
2996
+ headers.push(`${noteHeaderKeys.notetype}${++headerCount}`);
2997
+ hasHeader.noteType = true;
2998
+ }
2999
+ if (!hasHeader.deckPath && result.deckPath !== undefined) {
3000
+ headers.push(`${noteHeaderKeys.deck}${++headerCount}`);
3001
+ hasHeader.deckPath = true;
3002
+ }
3003
+
3004
+ const note: string[] = generateAnkiNote(result);
3005
+
3006
+ if (!hasHeader.tags) {
3007
+ headers.push(`${noteHeaderKeys.tags}${note.length + headerCount}`);
3008
+ hasHeader.tags = true;
3009
+ }
3010
+
3011
+ return `${result.noteID !== undefined ? `${result.noteID}\t` : ""}${result.noteTypeName !== undefined ? `${result.noteTypeName}\t` : ""}${result.deckPath !== undefined ? `${result.deckPath}\t` : ""}${note.join("\t")}`;
3012
+ })
3013
+ .join("\n")
3014
+ .trim();
3015
+ }
3016
+
3017
+ return `${headers.join("\n")}\n\n${ankiNotes}`;
3018
+ }