henkan 2.0.4 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs.js +789 -736
- package/dist/index.cjs.js.map +3 -3
- package/dist/index.mjs +781 -730
- package/dist/index.mjs.map +3 -3
- package/dist/types/constants.d.ts.map +1 -1
- package/dist/types/types.d.ts +93 -19
- package/dist/types/types.d.ts.map +1 -1
- package/dist/types/utils.d.ts +55 -28
- package/dist/types/utils.d.ts.map +1 -1
- package/docs/api/README.md +12 -1
- package/docs/api/functions/capitalizeString.md +1 -1
- package/docs/api/functions/convertJMdict.md +1 -1
- package/docs/api/functions/convertJawiktionaryAsync.md +1 -1
- package/docs/api/functions/convertJawiktionarySync.md +1 -1
- package/docs/api/functions/convertKanjiDic.md +1 -1
- package/docs/api/functions/convertKradFile.md +1 -1
- package/docs/api/functions/convertRadkFile.md +1 -1
- package/docs/api/functions/convertTanakaCorpus.md +1 -1
- package/docs/api/functions/convertTanakaCorpusWithFurigana.md +1 -1
- package/docs/api/functions/createEntryMaps.md +59 -0
- package/docs/api/functions/generateAnkiNote.md +1 -1
- package/docs/api/functions/generateAnkiNotesFile.md +1 -1
- package/docs/api/functions/getKanji.md +10 -10
- package/docs/api/functions/getKanjiExtended.md +8 -8
- package/docs/api/functions/getValidForms.md +39 -0
- package/docs/api/functions/getWord.md +11 -11
- package/docs/api/functions/getWordDefinitions.md +1 -1
- package/docs/api/functions/getWordDefinitionsWithFurigana.md +1 -1
- package/docs/api/functions/isObjectArray.md +27 -0
- package/docs/api/functions/isStringArray.md +2 -2
- package/docs/api/functions/isValidArrayWithFirstElement.md +2 -2
- package/docs/api/functions/shuffleArray.md +1 -1
- package/docs/api/interfaces/DefaultNoteInfo.md +4 -4
- package/docs/api/interfaces/Definition.md +4 -4
- package/docs/api/interfaces/DictKanji.md +5 -5
- package/docs/api/interfaces/DictKanjiForm.md +4 -4
- package/docs/api/interfaces/DictKanjiMisc.md +5 -5
- package/docs/api/interfaces/DictKanjiReading.md +3 -3
- package/docs/api/interfaces/DictKanjiReadingMeaning.md +3 -3
- package/docs/api/interfaces/DictKanjiReadingMeaningGroup.md +3 -3
- package/docs/api/interfaces/DictKanjiWithRadicals.md +3 -3
- package/docs/api/interfaces/DictMeaning.md +15 -15
- package/docs/api/interfaces/DictRadical.md +4 -4
- package/docs/api/interfaces/DictReading.md +5 -5
- package/docs/api/interfaces/DictWord.md +9 -9
- package/docs/api/interfaces/EntryMaps.md +83 -0
- package/docs/api/interfaces/ExamplePart.md +8 -8
- package/docs/api/interfaces/GlossSpecificNumber.md +4 -4
- package/docs/api/interfaces/Grammar.md +15 -15
- package/docs/api/interfaces/GrammarMeaning.md +3 -3
- package/docs/api/interfaces/JaWiktionaryEntry.md +8 -20
- package/docs/api/interfaces/Kana.md +11 -11
- package/docs/api/interfaces/Kanji.md +24 -24
- package/docs/api/interfaces/KanjiComponent.md +3 -3
- package/docs/api/interfaces/KanjiForm.md +4 -4
- package/docs/api/interfaces/NoteAndTag.md +3 -3
- package/docs/api/interfaces/NoteHeaderKeys.md +7 -7
- package/docs/api/interfaces/Phrase.md +5 -5
- package/docs/api/interfaces/Radical.md +16 -16
- package/docs/api/interfaces/Reading.md +5 -5
- package/docs/api/interfaces/ReadingsKanjiFormsPair.md +31 -0
- package/docs/api/interfaces/ResultEntry.md +7 -7
- package/docs/api/interfaces/TanakaExample.md +7 -7
- package/docs/api/interfaces/Translation.md +3 -3
- package/docs/api/interfaces/UsefulRegExps.md +8 -8
- package/docs/api/interfaces/Word.md +15 -15
- package/docs/api/interfaces/WordDefinitionPair.md +4 -4
- package/docs/api/type-aliases/Dict.md +1 -1
- package/docs/api/type-aliases/DictTranslation.md +1 -1
- package/docs/api/type-aliases/EntryType.md +1 -1
- package/docs/api/type-aliases/KanjiEntryMap.md +13 -0
- package/docs/api/type-aliases/KanjiSVGMap.md +13 -0
- package/docs/api/type-aliases/KanjiWordsMap.md +13 -0
- package/docs/api/type-aliases/Result.md +1 -1
- package/docs/api/type-aliases/StringNumber.md +13 -0
- package/docs/api/type-aliases/WordDefinitionsMap.md +13 -0
- package/docs/api/type-aliases/WordExamplesMap.md +13 -0
- package/docs/api/type-aliases/WordIDEntryMap.md +13 -0
- package/package.json +4 -4
- package/docs/api/functions/isValidArray.md +0 -27
package/dist/index.cjs.js
CHANGED
|
@@ -39,19 +39,21 @@ __export(index_exports, {
|
|
|
39
39
|
convertRadkFile: () => convertRadkFile,
|
|
40
40
|
convertTanakaCorpus: () => convertTanakaCorpus,
|
|
41
41
|
convertTanakaCorpusWithFurigana: () => convertTanakaCorpusWithFurigana,
|
|
42
|
+
createEntryMaps: () => createEntryMaps,
|
|
42
43
|
generateAnkiNote: () => generateAnkiNote,
|
|
43
44
|
generateAnkiNotesFile: () => generateAnkiNotesFile,
|
|
44
45
|
getKanji: () => getKanji,
|
|
45
46
|
getKanjiExtended: () => getKanjiExtended,
|
|
47
|
+
getValidForms: () => getValidForms,
|
|
46
48
|
getWord: () => getWord,
|
|
47
49
|
getWordDefinitions: () => getWordDefinitions,
|
|
48
50
|
getWordDefinitionsWithFurigana: () => getWordDefinitionsWithFurigana,
|
|
49
51
|
isGrammar: () => isGrammar,
|
|
50
52
|
isKana: () => isKana,
|
|
51
53
|
isKanji: () => isKanji,
|
|
54
|
+
isObjectArray: () => isObjectArray,
|
|
52
55
|
isRadical: () => isRadical,
|
|
53
56
|
isStringArray: () => isStringArray,
|
|
54
|
-
isValidArray: () => isValidArray,
|
|
55
57
|
isValidArrayWithFirstElement: () => isValidArrayWithFirstElement,
|
|
56
58
|
isWord: () => isWord,
|
|
57
59
|
notSearchedForms: () => notSearchedForms,
|
|
@@ -112,6 +114,9 @@ var noteMap = /* @__PURE__ */ new Map([
|
|
|
112
114
|
["tsugaru-ben", ["dialect::tsugaru-ben", "Dialect: Tsugaru-ben"]],
|
|
113
115
|
["aichi dialect", ["dialect::aichi", "Dialect: Aichi"]],
|
|
114
116
|
["tochigi dialect", ["dialect::tochigi", "Dialect: Tochigi"]],
|
|
117
|
+
["lit", ["literal_meaning", "Literal meaning"]],
|
|
118
|
+
["expl", ["explanation", "Explanation"]],
|
|
119
|
+
["tm", ["trademark", "Trademark"]],
|
|
115
120
|
["adjective (keiyoushi)", ["adjective::i", "\u3044-adjective", "\u5F62\u5BB9\u8A5E"]],
|
|
116
121
|
["'taru' adjective", ["adjective::taru", "\u305F\u308B-adjective", "\u5F62\u5BB9\u52D5\u8A5E"]],
|
|
117
122
|
[
|
|
@@ -1246,14 +1251,14 @@ var KuromojiAnalyzer = require("kuroshiro-analyzer-kuromoji");
|
|
|
1246
1251
|
function capitalizeString(value) {
|
|
1247
1252
|
return value.charAt(0).toUpperCase() + value.slice(1);
|
|
1248
1253
|
}
|
|
1249
|
-
function isValidArray(arg) {
|
|
1250
|
-
return arg !== null && arg !== void 0 && Array.isArray(arg);
|
|
1251
|
-
}
|
|
1252
1254
|
function isValidArrayWithFirstElement(arg) {
|
|
1253
|
-
return
|
|
1255
|
+
return Array.isArray(arg) && arg.length > 0;
|
|
1254
1256
|
}
|
|
1255
1257
|
function isStringArray(arg) {
|
|
1256
|
-
return
|
|
1258
|
+
return isValidArrayWithFirstElement(arg) && arg.every((element) => typeof element === "string");
|
|
1259
|
+
}
|
|
1260
|
+
function isObjectArray(arg) {
|
|
1261
|
+
return isValidArrayWithFirstElement(arg) && arg.every((element) => typeof element === "object");
|
|
1257
1262
|
}
|
|
1258
1263
|
function shuffleArray(arr) {
|
|
1259
1264
|
const a = arr.slice();
|
|
@@ -1265,6 +1270,37 @@ function shuffleArray(arr) {
|
|
|
1265
1270
|
}
|
|
1266
1271
|
return a;
|
|
1267
1272
|
}
|
|
1273
|
+
function getValidForms(readings, kanjiForms, wordIsCommon) {
|
|
1274
|
+
const kanjiFormRestrictions = /* @__PURE__ */ new Set();
|
|
1275
|
+
const validReadings = readings.filter(
|
|
1276
|
+
(reading) => {
|
|
1277
|
+
if (reading.notes === void 0 || !reading.notes.some((note) => notSearchedForms.has(note))) {
|
|
1278
|
+
if (reading.kanjiFormRestrictions) {
|
|
1279
|
+
for (const kfr of reading.kanjiFormRestrictions)
|
|
1280
|
+
kanjiFormRestrictions.add(kfr);
|
|
1281
|
+
return true;
|
|
1282
|
+
}
|
|
1283
|
+
if (wordIsCommon === void 0 || reading.commonness !== void 0)
|
|
1284
|
+
return true;
|
|
1285
|
+
}
|
|
1286
|
+
return false;
|
|
1287
|
+
}
|
|
1288
|
+
);
|
|
1289
|
+
const existValidKf = kanjiForms ? kanjiForms.some(
|
|
1290
|
+
(kf) => (kf.notes === void 0 || !kf.notes.some((note) => notSearchedForms.has(note))) && (wordIsCommon === void 0 || kf.commonness !== void 0) || kanjiFormRestrictions.has(kf.form)
|
|
1291
|
+
) : void 0;
|
|
1292
|
+
const validKanjiForms = kanjiForms ? kanjiForms.filter((kanjiForm) => {
|
|
1293
|
+
if (existValidKf === true)
|
|
1294
|
+
return (kanjiForm.notes === void 0 || !kanjiForm.notes.some(
|
|
1295
|
+
(note) => notSearchedForms.has(note)
|
|
1296
|
+
)) && (wordIsCommon === void 0 || kanjiForm.commonness !== void 0) || kanjiFormRestrictions.has(kanjiForm.form);
|
|
1297
|
+
else return true;
|
|
1298
|
+
}) : void 0;
|
|
1299
|
+
return {
|
|
1300
|
+
readings: validReadings,
|
|
1301
|
+
...validKanjiForms ? { kanjiForms: validKanjiForms } : {}
|
|
1302
|
+
};
|
|
1303
|
+
}
|
|
1268
1304
|
function convertJMdict(xmlString, examples) {
|
|
1269
1305
|
const dictParsed = import_libxmljs2.default.parseXml(xmlString, {
|
|
1270
1306
|
dtdvalid: true,
|
|
@@ -1273,144 +1309,116 @@ function convertJMdict(xmlString, examples) {
|
|
|
1273
1309
|
recover: false
|
|
1274
1310
|
});
|
|
1275
1311
|
const dict = [];
|
|
1276
|
-
import_xml2js.default.parseString(dictParsed, (
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1312
|
+
import_xml2js.default.parseString(dictParsed, (_err, result) => {
|
|
1313
|
+
const tanakaParts = examples && examples.length > 0 ? new Set(
|
|
1314
|
+
examples.flatMap(
|
|
1315
|
+
(example) => example.parts.flatMap((part) => [
|
|
1316
|
+
part.baseForm,
|
|
1317
|
+
...part.reading ? [part.reading] : [],
|
|
1318
|
+
...part.inflectedForm ? [part.inflectedForm] : [],
|
|
1319
|
+
...part.referenceID ? [part.referenceID] : []
|
|
1320
|
+
])
|
|
1321
|
+
)
|
|
1322
|
+
) : void 0;
|
|
1323
|
+
for (const entry of result.JMdict.entry) {
|
|
1324
|
+
const entryObj = {
|
|
1325
|
+
id: entry.ent_seq[0],
|
|
1326
|
+
readings: [],
|
|
1327
|
+
meanings: []
|
|
1328
|
+
};
|
|
1329
|
+
const kanjiForms = entry.k_ele;
|
|
1330
|
+
const readings = entry.r_ele;
|
|
1331
|
+
const meanings = entry.sense;
|
|
1332
|
+
if (isObjectArray(kanjiForms)) {
|
|
1333
|
+
entryObj.kanjiForms = [];
|
|
1334
|
+
for (const kanjiForm of kanjiForms) {
|
|
1335
|
+
const form = {
|
|
1336
|
+
form: kanjiForm.keb[0]
|
|
1293
1337
|
};
|
|
1294
|
-
if (
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
1319
|
-
|
|
1320
|
-
|
|
1321
|
-
|
|
1322
|
-
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1365
|
-
if (meaningObj.misc && meaningObj.misc.includes(
|
|
1366
|
-
"word usually written using kana alone"
|
|
1367
|
-
))
|
|
1368
|
-
usuallyInKanaMeanings++;
|
|
1369
|
-
}
|
|
1370
|
-
if (isStringArray(meaning.dial))
|
|
1371
|
-
meaningObj.dialects = meaning.dial;
|
|
1372
|
-
if (meaningObj.partOfSpeech && meaningObj.partOfSpeech.length > 0)
|
|
1373
|
-
entryObj.meanings.push(meaningObj);
|
|
1374
|
-
}
|
|
1375
|
-
if (entryObj.meanings.length === usuallyInKanaMeanings)
|
|
1376
|
-
entryObj.usuallyInKana = true;
|
|
1377
|
-
}
|
|
1378
|
-
if (examples) {
|
|
1379
|
-
const readings2 = new Set(
|
|
1380
|
-
entryObj.readings.filter(
|
|
1381
|
-
(reading) => reading.notes === void 0 || !reading.notes.some(
|
|
1382
|
-
(note) => notSearchedForms.has(note)
|
|
1383
|
-
) || reading.commonness
|
|
1384
|
-
).map((reading) => reading.reading)
|
|
1385
|
-
);
|
|
1386
|
-
const kanjiForms2 = entryObj.kanjiForms ? new Set(
|
|
1387
|
-
entryObj.kanjiForms.map(
|
|
1388
|
-
(kanjiForm) => kanjiForm.form
|
|
1389
|
-
)
|
|
1390
|
-
) : void 0;
|
|
1391
|
-
let existsExample = false;
|
|
1392
|
-
if (kanjiForms2 && kanjiForms2.size > 0 && tanakaParts) {
|
|
1393
|
-
for (const kf of kanjiForms2)
|
|
1394
|
-
if (tanakaParts.has(kf)) {
|
|
1395
|
-
existsExample = true;
|
|
1396
|
-
break;
|
|
1397
|
-
}
|
|
1398
|
-
}
|
|
1399
|
-
if (!existsExample && readings2.size > 0 && tanakaParts) {
|
|
1400
|
-
for (const r of readings2)
|
|
1401
|
-
if (tanakaParts.has(r)) {
|
|
1402
|
-
existsExample = true;
|
|
1403
|
-
break;
|
|
1404
|
-
}
|
|
1338
|
+
if (isStringArray(kanjiForm.ke_inf)) form.notes = kanjiForm.ke_inf;
|
|
1339
|
+
if (isStringArray(kanjiForm.ke_pri)) {
|
|
1340
|
+
form.commonness = kanjiForm.ke_pri;
|
|
1341
|
+
if (entryObj.isCommon === void 0) entryObj.isCommon = true;
|
|
1342
|
+
}
|
|
1343
|
+
entryObj.kanjiForms.push(form);
|
|
1344
|
+
}
|
|
1345
|
+
}
|
|
1346
|
+
for (const reading of readings) {
|
|
1347
|
+
const readingObj = {
|
|
1348
|
+
reading: reading.reb[0]
|
|
1349
|
+
};
|
|
1350
|
+
if (isStringArray(reading.re_inf)) readingObj.notes = reading.re_inf;
|
|
1351
|
+
if (isStringArray(reading.re_restr))
|
|
1352
|
+
readingObj.kanjiFormRestrictions = reading.re_restr;
|
|
1353
|
+
if (isStringArray(reading.re_pri)) {
|
|
1354
|
+
readingObj.commonness = reading.re_pri;
|
|
1355
|
+
if (entryObj.isCommon === void 0) entryObj.isCommon = true;
|
|
1356
|
+
}
|
|
1357
|
+
entryObj.readings.push(readingObj);
|
|
1358
|
+
}
|
|
1359
|
+
let usuallyInKanaMeanings = 0;
|
|
1360
|
+
for (const meaning of meanings) {
|
|
1361
|
+
const meaningObj = { partOfSpeech: [], translations: [] };
|
|
1362
|
+
meaningObj.partOfSpeech = meaning.pos;
|
|
1363
|
+
meaningObj.translations = [];
|
|
1364
|
+
for (const gloss of meaning.gloss)
|
|
1365
|
+
if (typeof gloss === "string") meaningObj.translations.push(gloss);
|
|
1366
|
+
else if (typeof gloss === "object" && gloss._ && typeof gloss._ === "string" && gloss.$ && typeof gloss.$ === "object" && gloss.$.g_type && (gloss.$.g_type === "lit" || gloss.$.g_type === "expl" || gloss.$.g_type === "tm"))
|
|
1367
|
+
meaningObj.translations.push({
|
|
1368
|
+
translation: gloss._,
|
|
1369
|
+
type: gloss.$.g_type
|
|
1370
|
+
});
|
|
1371
|
+
if (isStringArray(meaning.xref)) meaningObj.references = meaning.xref;
|
|
1372
|
+
if (isStringArray(meaning.stagk))
|
|
1373
|
+
meaningObj.kanjiFormRestrictions = meaning.stagk;
|
|
1374
|
+
if (isStringArray(meaning.stagr))
|
|
1375
|
+
meaningObj.readingRestrictions = meaning.stagr;
|
|
1376
|
+
if (isStringArray(meaning.ant)) meaningObj.antonyms = meaning.ant;
|
|
1377
|
+
if (isStringArray(meaning.field)) meaningObj.fields = meaning.field;
|
|
1378
|
+
if (isStringArray(meaning.s_inf)) meaningObj.info = meaning.s_inf;
|
|
1379
|
+
if (isStringArray(meaning.misc)) {
|
|
1380
|
+
meaningObj.misc = meaning.misc;
|
|
1381
|
+
if (meaningObj.misc && meaningObj.misc.includes("word usually written using kana alone"))
|
|
1382
|
+
usuallyInKanaMeanings++;
|
|
1383
|
+
}
|
|
1384
|
+
if (isStringArray(meaning.dial)) meaningObj.dialects = meaning.dial;
|
|
1385
|
+
entryObj.meanings.push(meaningObj);
|
|
1386
|
+
}
|
|
1387
|
+
if (entryObj.meanings.length === usuallyInKanaMeanings)
|
|
1388
|
+
entryObj.usuallyInKana = true;
|
|
1389
|
+
if (examples) {
|
|
1390
|
+
let existsExample = false;
|
|
1391
|
+
if (tanakaParts && tanakaParts.has(entryObj.id)) existsExample = true;
|
|
1392
|
+
if (!existsExample) {
|
|
1393
|
+
const rkf = getValidForms(
|
|
1394
|
+
entryObj.readings,
|
|
1395
|
+
entryObj.kanjiForms,
|
|
1396
|
+
entryObj.isCommon
|
|
1397
|
+
);
|
|
1398
|
+
const readings2 = new Set(
|
|
1399
|
+
rkf.readings.map((r) => r.reading)
|
|
1400
|
+
);
|
|
1401
|
+
const kanjiForms2 = rkf.kanjiForms ? new Set(
|
|
1402
|
+
rkf.kanjiForms.map((kf) => kf.form)
|
|
1403
|
+
) : void 0;
|
|
1404
|
+
if (kanjiForms2 && kanjiForms2.size > 0 && tanakaParts) {
|
|
1405
|
+
for (const kf of kanjiForms2)
|
|
1406
|
+
if (tanakaParts.has(kf)) {
|
|
1407
|
+
existsExample = true;
|
|
1408
|
+
break;
|
|
1405
1409
|
}
|
|
1406
|
-
|
|
1410
|
+
}
|
|
1411
|
+
if (entryObj.kanjiForms === void 0 && readings2.size > 0 && tanakaParts) {
|
|
1412
|
+
for (const r of readings2)
|
|
1413
|
+
if (tanakaParts.has(r)) {
|
|
1407
1414
|
existsExample = true;
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
if (entryObj.id.length > 0 && entryObj.readings.length > 0 && entryObj.meanings.length > 0)
|
|
1411
|
-
dict.push(entryObj);
|
|
1415
|
+
break;
|
|
1416
|
+
}
|
|
1412
1417
|
}
|
|
1413
1418
|
}
|
|
1419
|
+
if (existsExample) entryObj.hasPhrases = true;
|
|
1420
|
+
}
|
|
1421
|
+
dict.push(entryObj);
|
|
1414
1422
|
}
|
|
1415
1423
|
});
|
|
1416
1424
|
return dict;
|
|
@@ -1423,75 +1431,64 @@ function convertKanjiDic(xmlString) {
|
|
|
1423
1431
|
recover: false
|
|
1424
1432
|
});
|
|
1425
1433
|
const dict = [];
|
|
1426
|
-
import_xml2js.default.parseString(dictParsed, (
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
|
|
1448
|
-
const
|
|
1449
|
-
|
|
1450
|
-
|
|
1451
|
-
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
});
|
|
1462
|
-
}
|
|
1463
|
-
if (isValidArray(group.meaning)) {
|
|
1464
|
-
for (const meaning of group.meaning)
|
|
1465
|
-
if (typeof meaning === "string") {
|
|
1466
|
-
if (kanjiObj.isKokuji === void 0 && meaning === "(kokuji)")
|
|
1467
|
-
kanjiObj.isKokuji = true;
|
|
1468
|
-
groupObj.meanings.push(meaning);
|
|
1469
|
-
}
|
|
1470
|
-
}
|
|
1471
|
-
if (groupObj.readings.length > 0 || groupObj.meanings.length > 0) {
|
|
1472
|
-
if (groupObj.readings.length === 0)
|
|
1473
|
-
delete groupObj.readings;
|
|
1474
|
-
if (groupObj.meanings.length === 0)
|
|
1475
|
-
delete groupObj.meanings;
|
|
1476
|
-
rmObj.groups.push(groupObj);
|
|
1434
|
+
import_xml2js.default.parseString(dictParsed, (_err, result) => {
|
|
1435
|
+
for (const entry of result.kanjidic2.character) {
|
|
1436
|
+
const kanjiObj = {
|
|
1437
|
+
kanji: entry.literal[0],
|
|
1438
|
+
readingMeaning: []
|
|
1439
|
+
};
|
|
1440
|
+
if (typeof kanjiObj.kanji === "string" && kanjiObj.kanji.length === 1) {
|
|
1441
|
+
const misc = entry.misc[0];
|
|
1442
|
+
kanjiObj.misc = { strokeNumber: misc.stroke_count[0] };
|
|
1443
|
+
if (isStringArray(misc.grade)) kanjiObj.misc.grade = misc.grade[0];
|
|
1444
|
+
if (isStringArray(misc.freq)) kanjiObj.misc.frequency = misc.freq[0];
|
|
1445
|
+
if (isStringArray(misc.jlpt) && ["5", "4", "3", "2", "1"].includes(misc.jlpt[0]))
|
|
1446
|
+
kanjiObj.misc.jlpt = `N${misc.jlpt[0]}`;
|
|
1447
|
+
if (isObjectArray(entry.reading_meaning))
|
|
1448
|
+
for (const rm of entry.reading_meaning) {
|
|
1449
|
+
const rmObj = { groups: [] };
|
|
1450
|
+
for (const group of rm.rmgroup) {
|
|
1451
|
+
const groupObj = {
|
|
1452
|
+
readings: [],
|
|
1453
|
+
meanings: []
|
|
1454
|
+
};
|
|
1455
|
+
if (isObjectArray(group.reading)) {
|
|
1456
|
+
for (const reading of group.reading)
|
|
1457
|
+
if (reading._ && typeof reading._ === "string" && reading.$ && typeof reading.$ === "object" && reading.$.r_type && (reading.$.r_type === "ja_on" || reading.$.r_type === "ja_kun"))
|
|
1458
|
+
groupObj.readings.push({
|
|
1459
|
+
reading: reading._,
|
|
1460
|
+
type: reading.$.r_type
|
|
1461
|
+
});
|
|
1462
|
+
}
|
|
1463
|
+
if (Array.isArray(group.meaning)) {
|
|
1464
|
+
for (const meaning of group.meaning)
|
|
1465
|
+
if (typeof meaning === "string") {
|
|
1466
|
+
if (kanjiObj.isKokuji === void 0 && meaning === "(kokuji)") {
|
|
1467
|
+
kanjiObj.isKokuji = true;
|
|
1468
|
+
continue;
|
|
1477
1469
|
}
|
|
1470
|
+
groupObj.meanings.push(meaning);
|
|
1478
1471
|
}
|
|
1479
|
-
if (isStringArray(rm.nanori) && rm.nanori.length > 0)
|
|
1480
|
-
rmObj.nanori = rm.nanori;
|
|
1481
|
-
if (rmObj.groups.length > 0 || rmObj.nanori) {
|
|
1482
|
-
if (kanjiObj.readingMeaning === void 0)
|
|
1483
|
-
kanjiObj.readingMeaning = [];
|
|
1484
|
-
kanjiObj.readingMeaning.push(rmObj);
|
|
1485
|
-
}
|
|
1486
1472
|
}
|
|
1487
|
-
|
|
1473
|
+
if (groupObj.readings.length > 0 || groupObj.meanings.length > 0) {
|
|
1474
|
+
if (groupObj.readings.length === 0) delete groupObj.readings;
|
|
1475
|
+
if (groupObj.meanings.length === 0) delete groupObj.meanings;
|
|
1476
|
+
rmObj.groups.push(groupObj);
|
|
1477
|
+
}
|
|
1478
|
+
}
|
|
1479
|
+
if (isStringArray(rm.nanori) && rm.nanori.length > 0)
|
|
1480
|
+
rmObj.nanori = rm.nanori;
|
|
1481
|
+
if (rmObj.groups.length > 0 || rmObj.nanori)
|
|
1482
|
+
kanjiObj.readingMeaning.push(rmObj);
|
|
1488
1483
|
}
|
|
1489
|
-
|
|
1484
|
+
dict.push(kanjiObj);
|
|
1485
|
+
}
|
|
1490
1486
|
}
|
|
1491
1487
|
});
|
|
1492
1488
|
return dict;
|
|
1493
1489
|
}
|
|
1494
1490
|
function convertTanakaCorpus(tanakaString) {
|
|
1491
|
+
var _a;
|
|
1495
1492
|
const tanakaArray = [];
|
|
1496
1493
|
const tanakaParsed = tanakaString.split("\n");
|
|
1497
1494
|
for (let i = 0; i <= tanakaParsed.length; i += 2) {
|
|
@@ -1501,46 +1498,38 @@ function convertTanakaCorpus(tanakaString) {
|
|
|
1501
1498
|
a = a.replace("A: ", "");
|
|
1502
1499
|
b = b.replace("B: ", "");
|
|
1503
1500
|
const idMatch = regexps.tanakaID.exec(a);
|
|
1504
|
-
|
|
1505
|
-
|
|
1506
|
-
|
|
1507
|
-
|
|
1508
|
-
|
|
1509
|
-
|
|
1510
|
-
|
|
1511
|
-
|
|
1512
|
-
|
|
1513
|
-
|
|
1514
|
-
|
|
1515
|
-
|
|
1516
|
-
|
|
1517
|
-
|
|
1518
|
-
|
|
1519
|
-
|
|
1520
|
-
|
|
1521
|
-
|
|
1522
|
-
|
|
1523
|
-
|
|
1524
|
-
|
|
1525
|
-
if (inflectedForm) examplePart.inflectedForm = inflectedForm;
|
|
1526
|
-
if (baseForm.endsWith("~")) {
|
|
1527
|
-
examplePart.edited = true;
|
|
1528
|
-
examplePart.baseForm = examplePart.baseForm.replace("~", "");
|
|
1529
|
-
}
|
|
1530
|
-
bParts.push(examplePart);
|
|
1531
|
-
}
|
|
1532
|
-
}
|
|
1501
|
+
const aParts = a.replace(regexps.tanakaID, "").split(" ");
|
|
1502
|
+
const bRawParts = b.split(" ").filter((part) => part.trim().length !== 0);
|
|
1503
|
+
const bParts = [];
|
|
1504
|
+
for (const part of bRawParts) {
|
|
1505
|
+
const partMatches = regexps.tanakaPart.exec(part);
|
|
1506
|
+
const baseForm = partMatches == null ? void 0 : partMatches.groups["base"];
|
|
1507
|
+
const examplePart = { baseForm };
|
|
1508
|
+
const reading = partMatches == null ? void 0 : partMatches.groups["reading"];
|
|
1509
|
+
const glossNumber = partMatches == null ? void 0 : partMatches.groups["glossnum"];
|
|
1510
|
+
const inflectedForm = partMatches == null ? void 0 : partMatches.groups["inflection"];
|
|
1511
|
+
if (reading)
|
|
1512
|
+
if (regexps.tanakaReferenceID.test(reading)) {
|
|
1513
|
+
const referenceID = regexps.tanakaReferenceID.exec(reading);
|
|
1514
|
+
examplePart.referenceID = referenceID == null ? void 0 : referenceID.groups["entryid"];
|
|
1515
|
+
} else examplePart.reading = reading;
|
|
1516
|
+
if (glossNumber)
|
|
1517
|
+
examplePart.glossNumber = glossNumber.startsWith("0") ? Number.parseInt(glossNumber.substring(1)) : Number.parseInt(glossNumber);
|
|
1518
|
+
if (inflectedForm) examplePart.inflectedForm = inflectedForm;
|
|
1519
|
+
if (baseForm.endsWith("~")) {
|
|
1520
|
+
examplePart.edited = true;
|
|
1521
|
+
examplePart.baseForm = examplePart.baseForm.replace("~", "");
|
|
1533
1522
|
}
|
|
1534
|
-
|
|
1535
|
-
const translation = aParts[1];
|
|
1536
|
-
if (phrase && translation)
|
|
1537
|
-
tanakaArray.push({
|
|
1538
|
-
id: idMatch.groups["id"].trim(),
|
|
1539
|
-
phrase: phrase.trim(),
|
|
1540
|
-
translation: translation.trim(),
|
|
1541
|
-
parts: bParts
|
|
1542
|
-
});
|
|
1523
|
+
bParts.push(examplePart);
|
|
1543
1524
|
}
|
|
1525
|
+
const phrase = aParts[0];
|
|
1526
|
+
const translation = aParts[1];
|
|
1527
|
+
tanakaArray.push({
|
|
1528
|
+
id: (_a = idMatch == null ? void 0 : idMatch.groups["id"]) == null ? void 0 : _a.trim(),
|
|
1529
|
+
phrase: phrase.trim(),
|
|
1530
|
+
translation: translation.trim(),
|
|
1531
|
+
parts: bParts
|
|
1532
|
+
});
|
|
1544
1533
|
}
|
|
1545
1534
|
}
|
|
1546
1535
|
return tanakaArray;
|
|
@@ -1580,27 +1569,23 @@ function convertRadkFile(radkBuffer, kanjiDic) {
|
|
|
1580
1569
|
};
|
|
1581
1570
|
let j = i + 1;
|
|
1582
1571
|
let kanjiLine = fileParsed[j];
|
|
1583
|
-
|
|
1584
|
-
|
|
1585
|
-
|
|
1586
|
-
|
|
1587
|
-
|
|
1588
|
-
|
|
1589
|
-
|
|
1590
|
-
|
|
1591
|
-
|
|
1592
|
-
|
|
1593
|
-
kanjiList.push(kanjiObj);
|
|
1594
|
-
}
|
|
1595
|
-
j++;
|
|
1596
|
-
kanjiLine = fileParsed[j];
|
|
1597
|
-
if (!kanjiLine) continue;
|
|
1598
|
-
if (kanjiLine.startsWith("$ ")) i = j - 1;
|
|
1572
|
+
const kanjiList = [];
|
|
1573
|
+
while (kanjiLine && !kanjiLine.startsWith("$ ")) {
|
|
1574
|
+
const kanjis = kanjiLine.split("");
|
|
1575
|
+
for (const kanji of kanjis) {
|
|
1576
|
+
const foundKanji = kanjiDic.find(
|
|
1577
|
+
(dictKanji) => dictKanji.kanji === kanji
|
|
1578
|
+
);
|
|
1579
|
+
let kanjiObj = { kanji };
|
|
1580
|
+
kanjiObj = foundKanji;
|
|
1581
|
+
kanjiList.push(kanjiObj);
|
|
1599
1582
|
}
|
|
1600
|
-
|
|
1601
|
-
if (
|
|
1602
|
-
|
|
1583
|
+
kanjiLine = fileParsed[++j];
|
|
1584
|
+
if (!kanjiLine) continue;
|
|
1585
|
+
if (kanjiLine.startsWith("$ ")) i = j - 1;
|
|
1603
1586
|
}
|
|
1587
|
+
if (kanjiList.length > 0) radical.kanji = kanjiList;
|
|
1588
|
+
radicals.push(radical);
|
|
1604
1589
|
}
|
|
1605
1590
|
}
|
|
1606
1591
|
return radicals;
|
|
@@ -1613,66 +1598,185 @@ function convertKradFile(kradBuffer, kanjiDic, katakanaList) {
|
|
|
1613
1598
|
const split = line.split(" : ");
|
|
1614
1599
|
const kanjiChar = split[0];
|
|
1615
1600
|
const radicalsRow = split[1];
|
|
1616
|
-
|
|
1617
|
-
|
|
1618
|
-
|
|
1619
|
-
|
|
1620
|
-
|
|
1621
|
-
|
|
1622
|
-
|
|
1623
|
-
|
|
1624
|
-
|
|
1625
|
-
|
|
1626
|
-
|
|
1627
|
-
|
|
1628
|
-
|
|
1629
|
-
|
|
1630
|
-
|
|
1631
|
-
|
|
1632
|
-
|
|
1633
|
-
|
|
1634
|
-
|
|
1635
|
-
|
|
1601
|
+
const kanji = {
|
|
1602
|
+
kanji: kanjiChar,
|
|
1603
|
+
radicals: []
|
|
1604
|
+
};
|
|
1605
|
+
const radicals = radicalsRow.split(" ");
|
|
1606
|
+
for (const radical of radicals) {
|
|
1607
|
+
const foundRadical = kanjiDic.find(
|
|
1608
|
+
(dictKanji) => dictKanji.kanji === radical
|
|
1609
|
+
);
|
|
1610
|
+
let radicalObj = foundRadical != null ? foundRadical : { kanji: radical };
|
|
1611
|
+
if (!foundRadical) {
|
|
1612
|
+
const katakanaChar = katakanaList.find(
|
|
1613
|
+
(kana) => kana.kana === radical
|
|
1614
|
+
);
|
|
1615
|
+
if (!katakanaChar) continue;
|
|
1616
|
+
radicalObj = {
|
|
1617
|
+
kanji: katakanaChar.kana,
|
|
1618
|
+
readingMeaning: [
|
|
1619
|
+
{
|
|
1620
|
+
groups: [
|
|
1636
1621
|
{
|
|
1637
|
-
|
|
1638
|
-
|
|
1639
|
-
readings: [
|
|
1640
|
-
{ reading: katakanaChar.kana, type: "ja_on" }
|
|
1641
|
-
],
|
|
1642
|
-
meanings: [katakanaChar.reading]
|
|
1643
|
-
}
|
|
1644
|
-
]
|
|
1622
|
+
readings: [{ reading: katakanaChar.kana, type: "ja_on" }],
|
|
1623
|
+
meanings: [katakanaChar.reading]
|
|
1645
1624
|
}
|
|
1646
1625
|
]
|
|
1647
|
-
}
|
|
1626
|
+
}
|
|
1627
|
+
]
|
|
1628
|
+
};
|
|
1629
|
+
}
|
|
1630
|
+
kanji.radicals.push(radicalObj);
|
|
1631
|
+
}
|
|
1632
|
+
if (kanji.kanji.length === 1 && kanji.radicals.length > 0)
|
|
1633
|
+
kanjiWithRadicals.push(kanji);
|
|
1634
|
+
}
|
|
1635
|
+
}
|
|
1636
|
+
return kanjiWithRadicals;
|
|
1637
|
+
}
|
|
1638
|
+
function createEntryMaps(jmDict, kanjiDic, tanakaExamples, wordDefinitionPairs, svgList) {
|
|
1639
|
+
var _a;
|
|
1640
|
+
const kanjiEntryMap = /* @__PURE__ */ new Map();
|
|
1641
|
+
const wordIDEntryMap = /* @__PURE__ */ new Map();
|
|
1642
|
+
const kanjiWordsMap = /* @__PURE__ */ new Map();
|
|
1643
|
+
const wordExamplesMap = /* @__PURE__ */ new Map();
|
|
1644
|
+
const wordDefinitionsMap = /* @__PURE__ */ new Map();
|
|
1645
|
+
const kanjiSVGMap = /* @__PURE__ */ new Map();
|
|
1646
|
+
const wordPartsMap = /* @__PURE__ */ new Map();
|
|
1647
|
+
const partExamplesMap = /* @__PURE__ */ new Map();
|
|
1648
|
+
const entryParts = /* @__PURE__ */ new Set();
|
|
1649
|
+
if (kanjiDic)
|
|
1650
|
+
for (const kanji of kanjiDic) kanjiEntryMap.set(kanji.kanji, kanji);
|
|
1651
|
+
if (wordDefinitionPairs)
|
|
1652
|
+
for (const pair of wordDefinitionPairs)
|
|
1653
|
+
wordDefinitionsMap.set(pair.wordID, pair.definitions);
|
|
1654
|
+
if (kanjiDic && svgList)
|
|
1655
|
+
for (const kanji of kanjiDic) {
|
|
1656
|
+
const codePoint = kanji.kanji.codePointAt(0).toString(16).toLowerCase();
|
|
1657
|
+
const svg = svgList.find((file) => {
|
|
1658
|
+
const baseName = file.split(".")[0].toLowerCase();
|
|
1659
|
+
return baseName === codePoint || baseName === `0${codePoint}`;
|
|
1660
|
+
});
|
|
1661
|
+
if (svg) kanjiSVGMap.set(kanji.kanji, svg);
|
|
1662
|
+
}
|
|
1663
|
+
if (jmDict) {
|
|
1664
|
+
for (const word of jmDict) {
|
|
1665
|
+
wordIDEntryMap.set(word.id, word);
|
|
1666
|
+
if (word.kanjiForms)
|
|
1667
|
+
for (const kf of word.kanjiForms)
|
|
1668
|
+
for (const char of kf.form.split("").filter((c) => regexps.kanji.test(c))) {
|
|
1669
|
+
if (!kanjiWordsMap.has(char)) kanjiWordsMap.set(char, [word]);
|
|
1670
|
+
else kanjiWordsMap.get(char).push(word);
|
|
1671
|
+
}
|
|
1672
|
+
if (tanakaExamples) {
|
|
1673
|
+
const rkf = getValidForms(
|
|
1674
|
+
word.readings,
|
|
1675
|
+
word.kanjiForms,
|
|
1676
|
+
word.isCommon
|
|
1677
|
+
);
|
|
1678
|
+
const localPartParts = /* @__PURE__ */ new Set();
|
|
1679
|
+
if (rkf.readings.length > 0)
|
|
1680
|
+
for (const reading of rkf.readings) {
|
|
1681
|
+
entryParts.add(reading.reading);
|
|
1682
|
+
localPartParts.add(reading.reading);
|
|
1683
|
+
}
|
|
1684
|
+
if (rkf.kanjiForms && rkf.kanjiForms.length > 0)
|
|
1685
|
+
for (const kanjiForm of rkf.kanjiForms) {
|
|
1686
|
+
entryParts.add(kanjiForm.form);
|
|
1687
|
+
localPartParts.add(kanjiForm.form);
|
|
1688
|
+
}
|
|
1689
|
+
entryParts.add(word.id);
|
|
1690
|
+
localPartParts.add(word.id);
|
|
1691
|
+
wordPartsMap.set(word.id, localPartParts);
|
|
1692
|
+
}
|
|
1693
|
+
}
|
|
1694
|
+
if (tanakaExamples) {
|
|
1695
|
+
for (const ex of tanakaExamples) {
|
|
1696
|
+
for (const part of ex.parts) {
|
|
1697
|
+
if (entryParts.has(part.baseForm)) {
|
|
1698
|
+
let exList = partExamplesMap.get(
|
|
1699
|
+
part.baseForm
|
|
1700
|
+
);
|
|
1701
|
+
if (!exList) {
|
|
1702
|
+
exList = [];
|
|
1703
|
+
partExamplesMap.set(part.baseForm, exList);
|
|
1704
|
+
}
|
|
1705
|
+
exList.push(ex);
|
|
1706
|
+
}
|
|
1707
|
+
if (part.reading && entryParts.has(part.reading)) {
|
|
1708
|
+
let exList = partExamplesMap.get(
|
|
1709
|
+
part.reading
|
|
1710
|
+
);
|
|
1711
|
+
if (!exList) {
|
|
1712
|
+
exList = [];
|
|
1713
|
+
partExamplesMap.set(part.reading, exList);
|
|
1714
|
+
}
|
|
1715
|
+
exList.push(ex);
|
|
1716
|
+
}
|
|
1717
|
+
if (part.inflectedForm && entryParts.has(part.inflectedForm)) {
|
|
1718
|
+
let exList = partExamplesMap.get(
|
|
1719
|
+
part.inflectedForm
|
|
1720
|
+
);
|
|
1721
|
+
if (!exList) {
|
|
1722
|
+
exList = [];
|
|
1723
|
+
partExamplesMap.set(part.inflectedForm, exList);
|
|
1724
|
+
}
|
|
1725
|
+
exList.push(ex);
|
|
1726
|
+
}
|
|
1727
|
+
if (part.referenceID && entryParts.has(part.referenceID)) {
|
|
1728
|
+
let exList = partExamplesMap.get(
|
|
1729
|
+
part.referenceID
|
|
1730
|
+
);
|
|
1731
|
+
if (!exList) {
|
|
1732
|
+
exList = [];
|
|
1733
|
+
partExamplesMap.set(part.referenceID, exList);
|
|
1648
1734
|
}
|
|
1649
|
-
|
|
1735
|
+
exList.push(ex);
|
|
1650
1736
|
}
|
|
1651
1737
|
}
|
|
1652
|
-
|
|
1653
|
-
|
|
1738
|
+
}
|
|
1739
|
+
for (const word of jmDict) {
|
|
1740
|
+
const entryParts2 = wordPartsMap.get(word.id);
|
|
1741
|
+
const seenEx = /* @__PURE__ */ new Set();
|
|
1742
|
+
const validExamples = [];
|
|
1743
|
+
for (const p of entryParts2) {
|
|
1744
|
+
const examplesForPart = (_a = partExamplesMap.get(p)) == null ? void 0 : _a.filter((ex) => !seenEx.has(ex.id));
|
|
1745
|
+
if (!examplesForPart) continue;
|
|
1746
|
+
for (const ex of examplesForPart) {
|
|
1747
|
+
seenEx.add(ex.id);
|
|
1748
|
+
validExamples.push(ex);
|
|
1749
|
+
}
|
|
1750
|
+
}
|
|
1751
|
+
if (validExamples.length > 0)
|
|
1752
|
+
wordExamplesMap.set(word.id, validExamples);
|
|
1654
1753
|
}
|
|
1655
1754
|
}
|
|
1656
1755
|
}
|
|
1657
|
-
return
|
|
1756
|
+
return {
|
|
1757
|
+
...wordIDEntryMap.size > 0 ? { wordIDEntryMap } : {},
|
|
1758
|
+
...kanjiWordsMap.size > 0 ? { kanjiWordsMap } : {},
|
|
1759
|
+
...kanjiEntryMap.size > 0 ? { kanjiEntryMap } : {},
|
|
1760
|
+
...wordExamplesMap.size > 0 ? { wordExamplesMap } : {},
|
|
1761
|
+
...wordDefinitionsMap.size > 0 ? { wordDefinitionsMap } : {},
|
|
1762
|
+
...kanjiSVGMap.size > 0 ? { kanjiSVGMap } : {}
|
|
1763
|
+
};
|
|
1658
1764
|
}
|
|
1659
1765
|
function mapEntry(entry) {
|
|
1660
1766
|
return {
|
|
1661
1767
|
word: entry.word,
|
|
1662
1768
|
pos_title: entry.pos_title,
|
|
1663
1769
|
senses: entry.senses.filter(
|
|
1664
|
-
(sense) =>
|
|
1770
|
+
(sense) => isObjectArray(sense.form_of) && sense.form_of.every(
|
|
1665
1771
|
(form) => form.word && typeof form.word === "string"
|
|
1666
1772
|
) || isStringArray(sense.glosses)
|
|
1667
1773
|
).map((sense) => ({
|
|
1668
1774
|
...sense.form_of ? {
|
|
1669
|
-
form_of: sense.form_of.map((form) =>
|
|
1670
|
-
word: form.word
|
|
1671
|
-
}))
|
|
1775
|
+
form_of: sense.form_of.map((form) => form.word)
|
|
1672
1776
|
} : {},
|
|
1673
1777
|
glosses: sense.glosses
|
|
1674
1778
|
})),
|
|
1675
|
-
...
|
|
1779
|
+
...isObjectArray(entry.forms) && entry.forms.every((form) => typeof form.form === "string") ? { forms: entry.forms.map((form) => form.form) } : {}
|
|
1676
1780
|
};
|
|
1677
1781
|
}
|
|
1678
1782
|
function convertJawiktionarySync(buffer) {
|
|
@@ -1682,7 +1786,7 @@ function convertJawiktionarySync(buffer) {
|
|
|
1682
1786
|
const line = lines[i];
|
|
1683
1787
|
if (!line) continue;
|
|
1684
1788
|
const obj = JSON.parse(line);
|
|
1685
|
-
if (obj && typeof obj === "object" && obj.
|
|
1789
|
+
if (obj && typeof obj === "object" && typeof obj.lang === "string" && (obj.lang === "\u65E5\u672C\u8A9E" || obj.lang === "\u53E4\u5178\u65E5\u672C\u8A9E"))
|
|
1686
1790
|
entries.push(mapEntry(obj));
|
|
1687
1791
|
}
|
|
1688
1792
|
return entries;
|
|
@@ -1697,7 +1801,7 @@ async function convertJawiktionaryAsync(stream) {
|
|
|
1697
1801
|
const entries = [];
|
|
1698
1802
|
for await (const line of rl) {
|
|
1699
1803
|
const obj = JSON.parse(line.trim());
|
|
1700
|
-
if (obj && typeof obj === "object" && obj.
|
|
1804
|
+
if (obj && typeof obj === "object" && typeof obj.lang === "string" && (obj.lang === "\u65E5\u672C\u8A9E" || obj.lang === "\u53E4\u5178\u65E5\u672C\u8A9E"))
|
|
1701
1805
|
entries.push(mapEntry(obj));
|
|
1702
1806
|
}
|
|
1703
1807
|
rl.close();
|
|
@@ -1708,17 +1812,14 @@ async function convertJawiktionaryAsync(stream) {
|
|
|
1708
1812
|
);
|
|
1709
1813
|
}
|
|
1710
1814
|
function parseEntry(entry, definitions, definitionMap) {
|
|
1711
|
-
|
|
1712
|
-
|
|
1713
|
-
|
|
1714
|
-
|
|
1715
|
-
|
|
1716
|
-
|
|
1717
|
-
|
|
1718
|
-
|
|
1719
|
-
definitions.push({ definition });
|
|
1720
|
-
}
|
|
1721
|
-
}
|
|
1815
|
+
for (const sense of entry.senses) {
|
|
1816
|
+
const definition = sense.glosses.join("");
|
|
1817
|
+
if (!definitions.some((def) => def.definition === definition)) {
|
|
1818
|
+
if (!definitionMap.has(definition))
|
|
1819
|
+
definitionMap.set(definition, { count: 1 });
|
|
1820
|
+
else definitionMap.get(definition).count++;
|
|
1821
|
+
definitions.push({ definition });
|
|
1822
|
+
}
|
|
1722
1823
|
}
|
|
1723
1824
|
}
|
|
1724
1825
|
function getWordDefinitions(entryList, jmDict) {
|
|
@@ -1726,31 +1827,40 @@ function getWordDefinitions(entryList, jmDict) {
|
|
|
1726
1827
|
const entries = /* @__PURE__ */ new Map();
|
|
1727
1828
|
for (const entry of entryList) {
|
|
1728
1829
|
const ent = entries.get(entry.word);
|
|
1729
|
-
if (ent) ent.push(
|
|
1730
|
-
else entries.set(entry.word, [
|
|
1830
|
+
if (ent) ent.push(entry);
|
|
1831
|
+
else entries.set(entry.word, [entry]);
|
|
1731
1832
|
}
|
|
1732
1833
|
const japaneseDefinitions = [];
|
|
1733
1834
|
const definitionMap = /* @__PURE__ */ new Map();
|
|
1734
|
-
const
|
|
1835
|
+
const wordFormsMap = /* @__PURE__ */ new Map();
|
|
1735
1836
|
const validReadings = /* @__PURE__ */ new Set();
|
|
1736
1837
|
const validKanjiForms = /* @__PURE__ */ new Set();
|
|
1737
|
-
|
|
1738
|
-
|
|
1739
|
-
|
|
1740
|
-
|
|
1741
|
-
|
|
1742
|
-
|
|
1743
|
-
|
|
1744
|
-
|
|
1745
|
-
|
|
1746
|
-
|
|
1747
|
-
|
|
1748
|
-
|
|
1749
|
-
|
|
1750
|
-
|
|
1838
|
+
const validForms = /* @__PURE__ */ new Set();
|
|
1839
|
+
for (const word of jmDict) {
|
|
1840
|
+
const wordReadings = /* @__PURE__ */ new Set();
|
|
1841
|
+
const wordKanjiForms = /* @__PURE__ */ new Set();
|
|
1842
|
+
const rkf = getValidForms(
|
|
1843
|
+
word.readings,
|
|
1844
|
+
word.kanjiForms,
|
|
1845
|
+
word.isCommon
|
|
1846
|
+
);
|
|
1847
|
+
if (rkf.readings.length > 0)
|
|
1848
|
+
for (const r of rkf.readings) {
|
|
1849
|
+
validReadings.add(r.reading);
|
|
1850
|
+
wordReadings.add(r.reading);
|
|
1851
|
+
validForms.add(r.reading);
|
|
1751
1852
|
}
|
|
1752
|
-
|
|
1753
|
-
|
|
1853
|
+
if (rkf.kanjiForms && rkf.kanjiForms.length > 0)
|
|
1854
|
+
for (const kf of rkf.kanjiForms) {
|
|
1855
|
+
validKanjiForms.add(kf.form);
|
|
1856
|
+
wordKanjiForms.add(kf.form);
|
|
1857
|
+
validForms.add(kf.form);
|
|
1858
|
+
}
|
|
1859
|
+
wordFormsMap.set(word.id, {
|
|
1860
|
+
readings: wordReadings,
|
|
1861
|
+
...wordKanjiForms.size > 0 ? { kanjiForms: wordKanjiForms } : {}
|
|
1862
|
+
});
|
|
1863
|
+
}
|
|
1754
1864
|
const validTitleEntries = /* @__PURE__ */ new Map();
|
|
1755
1865
|
const entriesWithFormTitlesGlobal = /* @__PURE__ */ new Map();
|
|
1756
1866
|
const entriesWithFormsGlobal = /* @__PURE__ */ new Map();
|
|
@@ -1762,34 +1872,27 @@ function getWordDefinitions(entryList, jmDict) {
|
|
|
1762
1872
|
let valid = false;
|
|
1763
1873
|
if (validKanjiForms && validKanjiForms.has(entry.word)) {
|
|
1764
1874
|
valid = true;
|
|
1765
|
-
|
|
1766
|
-
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
1770
|
-
|
|
1771
|
-
|
|
1772
|
-
|
|
1773
|
-
|
|
1774
|
-
|
|
1775
|
-
if (gloss.trim().includes("\u6F22\u5B57\u8868\u8A18") || gloss.trim().includes("\u53C2\u7167")) {
|
|
1776
|
-
for (const r of validReadings)
|
|
1777
|
-
if (gloss.trim().includes(r)) {
|
|
1778
|
-
reading = r;
|
|
1779
|
-
break;
|
|
1780
|
-
}
|
|
1781
|
-
}
|
|
1875
|
+
for (const sense of entry.senses) {
|
|
1876
|
+
if (sense.form_of && sense.form_of.some((form) => validReadings.has(form)))
|
|
1877
|
+
validFormOfEntries.add(entry.word);
|
|
1878
|
+
for (const gloss of sense.glosses) {
|
|
1879
|
+
let hasForm = false;
|
|
1880
|
+
if (gloss.includes("\u8868\u8A18") || gloss.includes("\u53C2\u7167")) {
|
|
1881
|
+
for (const r of validForms)
|
|
1882
|
+
if (gloss.includes(r)) {
|
|
1883
|
+
hasForm = true;
|
|
1884
|
+
break;
|
|
1782
1885
|
}
|
|
1783
|
-
if (reading) validGlossesEntries.add(entry.word);
|
|
1784
|
-
}
|
|
1785
1886
|
}
|
|
1887
|
+
if (hasForm) validGlossesEntries.add(entry.word);
|
|
1786
1888
|
}
|
|
1787
|
-
|
|
1889
|
+
}
|
|
1890
|
+
if (entry.forms) {
|
|
1788
1891
|
for (const form of entry.forms)
|
|
1789
|
-
if (
|
|
1790
|
-
validFormsEntries.add(entry.word);
|
|
1892
|
+
if (validReadings.has(form)) validFormsEntries.add(entry.word);
|
|
1791
1893
|
}
|
|
1792
|
-
}
|
|
1894
|
+
}
|
|
1895
|
+
if (validReadings.has(entry.word)) {
|
|
1793
1896
|
valid = true;
|
|
1794
1897
|
const ftEntry = entriesWithFormTitlesGlobal.get(entry.word);
|
|
1795
1898
|
if (ftEntry) ftEntry.push(entry);
|
|
@@ -1800,8 +1903,8 @@ function getWordDefinitions(entryList, jmDict) {
|
|
|
1800
1903
|
if (tEntry) tEntry.push(entry);
|
|
1801
1904
|
else validTitleEntries.set(entry.word, [entry]);
|
|
1802
1905
|
}
|
|
1803
|
-
if (
|
|
1804
|
-
(form) => validKanjiForms.has(form
|
|
1906
|
+
if (entry.forms && (validKanjiForms.has(entry.word) || validReadings.has(entry.word)) && entry.forms.some(
|
|
1907
|
+
(form) => validKanjiForms.has(form) || validReadings.has(form)
|
|
1805
1908
|
)) {
|
|
1806
1909
|
const wfEntry = entriesWithFormsGlobal.get(entry.word);
|
|
1807
1910
|
if (wfEntry) wfEntry.push(entry);
|
|
@@ -1856,7 +1959,7 @@ function getWordDefinitions(entryList, jmDict) {
|
|
|
1856
1959
|
"\u982D\u5B57\u8A9E",
|
|
1857
1960
|
"\u63A5\u5C3E\u8A9E"
|
|
1858
1961
|
]) {
|
|
1859
|
-
|
|
1962
|
+
posMap.set(pos, {});
|
|
1860
1963
|
for (const te of vte)
|
|
1861
1964
|
if (te.pos_title === pos || te.pos_title === "\u548C\u8A9E\u306E\u6F22\u5B57\u8868\u8A18") {
|
|
1862
1965
|
const posEntries = posMap.get(pos);
|
|
@@ -1893,86 +1996,73 @@ function getWordDefinitions(entryList, jmDict) {
|
|
|
1893
1996
|
fge.length = 0;
|
|
1894
1997
|
wfe.length = 0;
|
|
1895
1998
|
const wordEntriesPairs = [];
|
|
1896
|
-
for (const word of
|
|
1999
|
+
for (const word of jmDict) {
|
|
1897
2000
|
const poses = /* @__PURE__ */ new Set();
|
|
1898
|
-
for (const m of word.meanings)
|
|
1899
|
-
|
|
1900
|
-
|
|
1901
|
-
|
|
1902
|
-
|
|
1903
|
-
|
|
1904
|
-
|
|
1905
|
-
|
|
1906
|
-
|
|
1907
|
-
|
|
1908
|
-
|
|
1909
|
-
poses.add(notePos);
|
|
1910
|
-
}
|
|
2001
|
+
for (const m of word.meanings)
|
|
2002
|
+
for (const note of m.partOfSpeech) {
|
|
2003
|
+
const noteEntry = noteMap.get(note);
|
|
2004
|
+
if (noteEntry && noteEntry.length === 3) {
|
|
2005
|
+
const notePos = noteEntry[2];
|
|
2006
|
+
if (Array.isArray(notePos))
|
|
2007
|
+
for (const pos of notePos) {
|
|
2008
|
+
if (!poses.has(pos)) poses.add(pos);
|
|
2009
|
+
}
|
|
2010
|
+
else if (typeof notePos === "string" && !poses.has(notePos))
|
|
2011
|
+
poses.add(notePos);
|
|
1911
2012
|
}
|
|
1912
|
-
|
|
1913
|
-
const
|
|
1914
|
-
word.readings.filter(
|
|
1915
|
-
(r) => r.notes === void 0 || !r.notes.some((note) => notSearchedForms.has(note)) || r.commonness !== void 0
|
|
1916
|
-
).map((r) => r.reading)
|
|
1917
|
-
);
|
|
1918
|
-
const validWordKanjiForms = word.kanjiForms ? new Set(
|
|
1919
|
-
word.kanjiForms.filter(
|
|
1920
|
-
(kf) => kf.notes === void 0 || !kf.notes.some((note) => notSearchedForms.has(note)) || kf.commonness !== void 0
|
|
1921
|
-
).map((kf) => kf.form)
|
|
1922
|
-
) : void 0;
|
|
2013
|
+
}
|
|
2014
|
+
const rkf = wordFormsMap.get(word.id);
|
|
1923
2015
|
const entriesWithTitles = [];
|
|
1924
2016
|
const entriesWithFormTitles = [];
|
|
1925
2017
|
const entriesWithForms = [];
|
|
1926
2018
|
if (poses.size > 0)
|
|
1927
2019
|
for (const pos of poses) {
|
|
1928
2020
|
const posEntries = posMap.get(pos);
|
|
1929
|
-
if (
|
|
1930
|
-
|
|
1931
|
-
|
|
1932
|
-
|
|
1933
|
-
const fe = (_b = posEntries.form) == null ? void 0 : _b.get(kf);
|
|
1934
|
-
if (te)
|
|
1935
|
-
entriesWithTitles.push(
|
|
1936
|
-
...te.filter(
|
|
1937
|
-
(ent) => validFormOfEntries.has(ent.word) || validGlossesEntries.has(ent.word) || validFormsEntries.has(ent.word)
|
|
1938
|
-
)
|
|
1939
|
-
);
|
|
1940
|
-
if (fe)
|
|
1941
|
-
entriesWithForms.push(
|
|
1942
|
-
...fe.filter(
|
|
1943
|
-
(ent) => ent.forms && ent.forms.some(
|
|
1944
|
-
(form) => validWordKanjiForms.has(form.form) || validWordReadings.has(form.form)
|
|
1945
|
-
)
|
|
1946
|
-
)
|
|
1947
|
-
);
|
|
1948
|
-
}
|
|
1949
|
-
for (const r of validWordReadings) {
|
|
1950
|
-
const te = (_c = posEntries.title) == null ? void 0 : _c.get(r);
|
|
1951
|
-
const fe = (_d = posEntries.form) == null ? void 0 : _d.get(r);
|
|
1952
|
-
const fte = (_e = posEntries.formTitle) == null ? void 0 : _e.get(r);
|
|
2021
|
+
if (rkf.kanjiForms)
|
|
2022
|
+
for (const kf of rkf.kanjiForms) {
|
|
2023
|
+
const te = (_a = posEntries.title) == null ? void 0 : _a.get(kf);
|
|
2024
|
+
const fe = (_b = posEntries.form) == null ? void 0 : _b.get(kf);
|
|
1953
2025
|
if (te)
|
|
1954
2026
|
entriesWithTitles.push(
|
|
1955
2027
|
...te.filter(
|
|
1956
|
-
(ent) => ent.
|
|
1957
|
-
(form) => validWordKanjiForms.has(form.form)
|
|
1958
|
-
) || validWordKanjiForms === void 0
|
|
2028
|
+
(ent) => validFormOfEntries.has(ent.word) || validGlossesEntries.has(ent.word) || validFormsEntries.has(ent.word)
|
|
1959
2029
|
)
|
|
1960
2030
|
);
|
|
1961
2031
|
if (fe)
|
|
1962
2032
|
entriesWithForms.push(
|
|
1963
2033
|
...fe.filter(
|
|
1964
2034
|
(ent) => ent.forms && ent.forms.some(
|
|
1965
|
-
(form) =>
|
|
2035
|
+
(form) => rkf.kanjiForms.has(form) || rkf.readings.has(form)
|
|
1966
2036
|
)
|
|
1967
2037
|
)
|
|
1968
2038
|
);
|
|
1969
|
-
if (fte) entriesWithFormTitles.push(...fte);
|
|
1970
2039
|
}
|
|
2040
|
+
for (const r of rkf.readings) {
|
|
2041
|
+
const te = (_c = posEntries.title) == null ? void 0 : _c.get(r);
|
|
2042
|
+
const fe = (_d = posEntries.form) == null ? void 0 : _d.get(r);
|
|
2043
|
+
const fte = (_e = posEntries.formTitle) == null ? void 0 : _e.get(r);
|
|
2044
|
+
if (te)
|
|
2045
|
+
entriesWithTitles.push(
|
|
2046
|
+
...te.filter(
|
|
2047
|
+
(ent) => ent.forms && rkf.kanjiForms && ent.forms.some(
|
|
2048
|
+
(form) => rkf.kanjiForms.has(form)
|
|
2049
|
+
) || rkf.kanjiForms === void 0
|
|
2050
|
+
)
|
|
2051
|
+
);
|
|
2052
|
+
if (fe)
|
|
2053
|
+
entriesWithForms.push(
|
|
2054
|
+
...fe.filter(
|
|
2055
|
+
(ent) => ent.forms && ent.forms.some(
|
|
2056
|
+
(form) => rkf.kanjiForms && rkf.kanjiForms.has(form) || rkf.readings.has(form)
|
|
2057
|
+
)
|
|
2058
|
+
)
|
|
2059
|
+
);
|
|
2060
|
+
if (fte) entriesWithFormTitles.push(...fte);
|
|
1971
2061
|
}
|
|
1972
2062
|
}
|
|
1973
2063
|
if (entriesWithTitles.length === 0 && entriesWithFormTitles.length === 0 && entriesWithForms.length === 0) {
|
|
1974
|
-
if (
|
|
1975
|
-
for (const kf of
|
|
2064
|
+
if (rkf.kanjiForms)
|
|
2065
|
+
for (const kf of rkf.kanjiForms) {
|
|
1976
2066
|
const te = validTitleEntries.get(kf);
|
|
1977
2067
|
const fe = entriesWithFormsGlobal.get(kf);
|
|
1978
2068
|
if (te)
|
|
@@ -1985,28 +2075,28 @@ function getWordDefinitions(entryList, jmDict) {
|
|
|
1985
2075
|
entriesWithForms.push(
|
|
1986
2076
|
...fe.filter(
|
|
1987
2077
|
(ent) => ent.forms && ent.forms.some(
|
|
1988
|
-
(form) =>
|
|
2078
|
+
(form) => rkf.kanjiForms.has(form) || rkf.readings.has(form)
|
|
1989
2079
|
)
|
|
1990
2080
|
)
|
|
1991
2081
|
);
|
|
1992
2082
|
}
|
|
1993
|
-
for (const r of
|
|
2083
|
+
for (const r of rkf.readings) {
|
|
1994
2084
|
const te = validTitleEntries.get(r);
|
|
1995
2085
|
const fe = entriesWithFormsGlobal.get(r);
|
|
1996
2086
|
const fte = entriesWithFormTitlesGlobal.get(r);
|
|
1997
2087
|
if (te)
|
|
1998
2088
|
entriesWithTitles.push(
|
|
1999
2089
|
...te.filter(
|
|
2000
|
-
(ent) => ent.forms &&
|
|
2001
|
-
(form) =>
|
|
2002
|
-
) ||
|
|
2090
|
+
(ent) => ent.forms && rkf.kanjiForms && ent.forms.some(
|
|
2091
|
+
(form) => rkf.kanjiForms.has(form)
|
|
2092
|
+
) || rkf.kanjiForms === void 0
|
|
2003
2093
|
)
|
|
2004
2094
|
);
|
|
2005
2095
|
if (fe)
|
|
2006
2096
|
entriesWithForms.push(
|
|
2007
2097
|
...fe.filter(
|
|
2008
2098
|
(ent) => ent.forms && ent.forms.some(
|
|
2009
|
-
(form) =>
|
|
2099
|
+
(form) => rkf.kanjiForms && rkf.kanjiForms.has(form) || rkf.readings.has(form)
|
|
2010
2100
|
)
|
|
2011
2101
|
)
|
|
2012
2102
|
);
|
|
@@ -2016,8 +2106,9 @@ function getWordDefinitions(entryList, jmDict) {
|
|
|
2016
2106
|
if (entriesWithTitles.length > 0 && (entriesWithFormTitles.length > 0 || entriesWithForms.length > 0))
|
|
2017
2107
|
wordEntriesPairs.push({
|
|
2018
2108
|
word,
|
|
2019
|
-
readings:
|
|
2020
|
-
...
|
|
2109
|
+
readings: rkf.readings,
|
|
2110
|
+
...rkf.kanjiForms ? { kanjiForms: rkf.kanjiForms } : {},
|
|
2111
|
+
forms: rkf.kanjiForms ? rkf.readings.union(rkf.kanjiForms) : rkf.readings,
|
|
2021
2112
|
entriesWithTitles,
|
|
2022
2113
|
entriesWithFormTitles,
|
|
2023
2114
|
entriesWithForms
|
|
@@ -2026,7 +2117,7 @@ function getWordDefinitions(entryList, jmDict) {
|
|
|
2026
2117
|
for (const pair of wordEntriesPairs) {
|
|
2027
2118
|
const definitions = [];
|
|
2028
2119
|
const kanjiFormEntries = [];
|
|
2029
|
-
const
|
|
2120
|
+
const entriesWithForms = [];
|
|
2030
2121
|
const readingEntries = [];
|
|
2031
2122
|
const titleFormMap = /* @__PURE__ */ new Map();
|
|
2032
2123
|
const refsMap = /* @__PURE__ */ new Map();
|
|
@@ -2034,80 +2125,64 @@ function getWordDefinitions(entryList, jmDict) {
|
|
|
2034
2125
|
for (const ent of pair.entriesWithTitles) {
|
|
2035
2126
|
const validFormOf = validFormOfEntries.has(ent.word);
|
|
2036
2127
|
const validGlosses = validGlossesEntries.has(ent.word);
|
|
2037
|
-
const
|
|
2038
|
-
|
|
2128
|
+
const validForms2 = validFormsEntries.has(ent.word);
|
|
2129
|
+
const hasKanjiForms = ent.forms !== void 0 && pair.kanjiForms !== void 0 && ent.forms.some((form) => pair.kanjiForms.has(form));
|
|
2130
|
+
if (pair.kanjiForms && pair.kanjiForms.has(ent.word) && (validFormOf || validGlosses || validForms2)) {
|
|
2039
2131
|
kanjiFormEntries.push(ent);
|
|
2040
|
-
if ((validFormOf || validGlosses) &&
|
|
2132
|
+
if ((validFormOf || validGlosses) && ent.senses)
|
|
2041
2133
|
for (const sense of ent.senses) {
|
|
2042
|
-
if (validFormOf &&
|
|
2134
|
+
if (validFormOf && sense.form_of) {
|
|
2043
2135
|
for (const form of sense.form_of)
|
|
2044
|
-
if (
|
|
2045
|
-
const elem = titleFormMap.get(
|
|
2046
|
-
form.word
|
|
2047
|
-
);
|
|
2136
|
+
if (pair.readings.has(form)) {
|
|
2137
|
+
const elem = titleFormMap.get(form);
|
|
2048
2138
|
if (!elem)
|
|
2049
|
-
titleFormMap.set(form
|
|
2139
|
+
titleFormMap.set(form, /* @__PURE__ */ new Set([ent.word]));
|
|
2050
2140
|
else elem.add(ent.word);
|
|
2051
2141
|
}
|
|
2052
|
-
}
|
|
2053
|
-
|
|
2054
|
-
|
|
2055
|
-
if (gloss
|
|
2056
|
-
|
|
2057
|
-
|
|
2058
|
-
|
|
2059
|
-
|
|
2060
|
-
|
|
2061
|
-
|
|
2062
|
-
}
|
|
2063
|
-
}
|
|
2064
|
-
if (reading) {
|
|
2065
|
-
const elem = refsMap.get(reading);
|
|
2066
|
-
if (!elem) refsMap.set(reading, /* @__PURE__ */ new Set([ent.word]));
|
|
2067
|
-
else elem.add(ent.word);
|
|
2142
|
+
}
|
|
2143
|
+
if (validGlosses) {
|
|
2144
|
+
for (const gloss of sense.glosses)
|
|
2145
|
+
if (gloss.includes("\u8868\u8A18") || gloss.includes("\u53C2\u7167")) {
|
|
2146
|
+
for (const f of pair.forms)
|
|
2147
|
+
if (gloss.includes(f)) {
|
|
2148
|
+
const elem = refsMap.get(f);
|
|
2149
|
+
if (!elem) refsMap.set(f, /* @__PURE__ */ new Set([ent.word]));
|
|
2150
|
+
else elem.add(ent.word);
|
|
2151
|
+
}
|
|
2068
2152
|
}
|
|
2069
|
-
}
|
|
2070
2153
|
}
|
|
2071
2154
|
}
|
|
2072
|
-
if (
|
|
2155
|
+
if (validForms2 && ent.forms) {
|
|
2073
2156
|
for (const form of ent.forms)
|
|
2074
|
-
if (
|
|
2075
|
-
readingForms.add(form.form);
|
|
2157
|
+
if (pair.readings.has(form)) readingForms.add(form);
|
|
2076
2158
|
}
|
|
2077
|
-
}
|
|
2078
|
-
|
|
2079
|
-
|
|
2159
|
+
}
|
|
2160
|
+
if (pair.readings.has(ent.word) && hasKanjiForms)
|
|
2161
|
+
entriesWithForms.push(ent);
|
|
2162
|
+
if (pair.kanjiForms === void 0 && pair.readings.has(ent.word))
|
|
2080
2163
|
readingEntries.push(ent);
|
|
2081
2164
|
}
|
|
2082
2165
|
for (const entry of pair.entriesWithForms) {
|
|
2083
2166
|
const elem = titleFormMap.get(entry.word);
|
|
2084
|
-
if (elem && entry.forms.some((form) => elem.has(form
|
|
2085
|
-
|
|
2167
|
+
if (elem && entry.forms && entry.forms.some((form) => elem.has(form)))
|
|
2168
|
+
entriesWithForms.push(entry);
|
|
2086
2169
|
}
|
|
2087
2170
|
for (const entry of pair.entriesWithFormTitles) {
|
|
2088
2171
|
if (readingForms.has(entry.word)) {
|
|
2089
|
-
|
|
2172
|
+
entriesWithForms.push(entry);
|
|
2090
2173
|
continue;
|
|
2091
2174
|
}
|
|
2092
|
-
|
|
2093
|
-
|
|
2094
|
-
if (ft && !ft.isDisjointFrom(pair.kanjiForms))
|
|
2095
|
-
readingWithFormsEntries.push(entry);
|
|
2096
|
-
}
|
|
2175
|
+
const ft = refsMap.get(entry.word);
|
|
2176
|
+
if (ft && !ft.isDisjointFrom(pair.forms)) entriesWithForms.push(entry);
|
|
2097
2177
|
}
|
|
2098
|
-
|
|
2099
|
-
|
|
2100
|
-
|
|
2101
|
-
|
|
2102
|
-
parsedReadingWithFormsEntries = true;
|
|
2103
|
-
for (const ref of readingWithFormsEntries)
|
|
2104
|
-
parseEntry(ref, definitions, definitionMap);
|
|
2105
|
-
} else parseEntry(entry, definitions, definitionMap);
|
|
2106
|
-
if (!parsedReadingWithFormsEntries && readingWithFormsEntries.length > 0) {
|
|
2107
|
-
parsedReadingWithFormsEntries = true;
|
|
2108
|
-
for (const ref of readingWithFormsEntries)
|
|
2109
|
-
parseEntry(ref, definitions, definitionMap);
|
|
2178
|
+
if (kanjiFormEntries.length > 0) {
|
|
2179
|
+
for (const entry of kanjiFormEntries)
|
|
2180
|
+
if (entry.pos_title !== "\u548C\u8A9E\u306E\u6F22\u5B57\u8868\u8A18")
|
|
2181
|
+
parseEntry(entry, definitions, definitionMap);
|
|
2110
2182
|
}
|
|
2183
|
+
if (entriesWithForms.length > 0)
|
|
2184
|
+
for (const ref of entriesWithForms)
|
|
2185
|
+
parseEntry(ref, definitions, definitionMap);
|
|
2111
2186
|
if (readingEntries.length > 0)
|
|
2112
2187
|
for (const readingEntry of readingEntries)
|
|
2113
2188
|
parseEntry(readingEntry, definitions, definitionMap);
|
|
@@ -2158,12 +2233,12 @@ async function getWordDefinitionsWithFurigana(entryList, jmDict) {
|
|
|
2158
2233
|
function lookupWordNote(key, notes, tags) {
|
|
2159
2234
|
const info = noteMap.get(key.toLowerCase());
|
|
2160
2235
|
if (!info) {
|
|
2161
|
-
|
|
2236
|
+
notes.push(key);
|
|
2162
2237
|
return { note: key };
|
|
2163
2238
|
}
|
|
2164
2239
|
const tag = `word::${info[0]}`;
|
|
2165
|
-
if (
|
|
2166
|
-
|
|
2240
|
+
if (!tags.includes(tag)) tags.push(tag);
|
|
2241
|
+
notes.push(info[1]);
|
|
2167
2242
|
return { note: info[1], tag };
|
|
2168
2243
|
}
|
|
2169
2244
|
var wordAddNoteArray = (arr, cb) => {
|
|
@@ -2171,11 +2246,16 @@ var wordAddNoteArray = (arr, cb) => {
|
|
|
2171
2246
|
for (const v of arr) cb(v);
|
|
2172
2247
|
};
|
|
2173
2248
|
function getWord(word, dict, kanjiDic, examples, definitions, noteTypeName, deckPath) {
|
|
2174
|
-
var _a;
|
|
2249
|
+
var _a, _b;
|
|
2175
2250
|
let dictWord = void 0;
|
|
2176
|
-
if (typeof word === "string" && dict)
|
|
2177
|
-
|
|
2178
|
-
|
|
2251
|
+
if (typeof word === "string" && dict) {
|
|
2252
|
+
if (Array.isArray(dict))
|
|
2253
|
+
dictWord = dict.find(
|
|
2254
|
+
(entry) => entry.id === word
|
|
2255
|
+
);
|
|
2256
|
+
if (dict instanceof Map) dictWord = dict.get(word);
|
|
2257
|
+
}
|
|
2258
|
+
if (typeof word === "object") dictWord = word;
|
|
2179
2259
|
if (dictWord) {
|
|
2180
2260
|
const word2 = {
|
|
2181
2261
|
id: dictWord.id,
|
|
@@ -2198,7 +2278,7 @@ function getWord(word, dict, kanjiDic, examples, definitions, noteTypeName, deck
|
|
|
2198
2278
|
notes: dictKanjiForm.notes.map((note) => {
|
|
2199
2279
|
const noteAndTag = lookupWordNote(
|
|
2200
2280
|
note,
|
|
2201
|
-
|
|
2281
|
+
[],
|
|
2202
2282
|
word2.tags
|
|
2203
2283
|
);
|
|
2204
2284
|
return capitalizeString(noteAndTag.note);
|
|
@@ -2217,7 +2297,7 @@ function getWord(word, dict, kanjiDic, examples, definitions, noteTypeName, deck
|
|
|
2217
2297
|
...dictReading.notes ? dictReading.notes.map((note) => {
|
|
2218
2298
|
const noteAndTag = lookupWordNote(
|
|
2219
2299
|
note,
|
|
2220
|
-
|
|
2300
|
+
[],
|
|
2221
2301
|
word2.tags
|
|
2222
2302
|
);
|
|
2223
2303
|
return capitalizeString(noteAndTag.note);
|
|
@@ -2227,85 +2307,81 @@ function getWord(word, dict, kanjiDic, examples, definitions, noteTypeName, deck
|
|
|
2227
2307
|
...dictReading.commonness && dictReading.commonness.length > 0 ? { common: true } : {}
|
|
2228
2308
|
}));
|
|
2229
2309
|
word2.translations = [];
|
|
2230
|
-
for (const dictMeaning of dictWord.meanings)
|
|
2231
|
-
|
|
2232
|
-
|
|
2233
|
-
|
|
2234
|
-
(translation)
|
|
2235
|
-
|
|
2236
|
-
|
|
2237
|
-
|
|
2238
|
-
|
|
2239
|
-
|
|
2240
|
-
|
|
2241
|
-
|
|
2242
|
-
word2.tags.push("word::explanation");
|
|
2243
|
-
} else if (translation.type === "tm") {
|
|
2244
|
-
translationTypes.push("Trademark");
|
|
2245
|
-
word2.tags.push("word::trademark");
|
|
2246
|
-
}
|
|
2247
|
-
return translation.translation;
|
|
2248
|
-
}
|
|
2310
|
+
for (const dictMeaning of dictWord.meanings) {
|
|
2311
|
+
const translationTypes = [];
|
|
2312
|
+
const translations = dictMeaning.translations.map(
|
|
2313
|
+
(translation) => {
|
|
2314
|
+
if (typeof translation === "string") return translation;
|
|
2315
|
+
else {
|
|
2316
|
+
const translationNoteAndTag = noteMap.get(
|
|
2317
|
+
translation.type
|
|
2318
|
+
);
|
|
2319
|
+
translationTypes.push(translationNoteAndTag[1]);
|
|
2320
|
+
word2.tags.push(`word::${translationNoteAndTag[0]}`);
|
|
2321
|
+
return translation.translation;
|
|
2249
2322
|
}
|
|
2250
|
-
|
|
2251
|
-
|
|
2252
|
-
|
|
2253
|
-
|
|
2254
|
-
|
|
2255
|
-
)
|
|
2256
|
-
|
|
2257
|
-
|
|
2258
|
-
|
|
2259
|
-
)
|
|
2260
|
-
|
|
2261
|
-
|
|
2262
|
-
|
|
2263
|
-
|
|
2264
|
-
)
|
|
2265
|
-
|
|
2266
|
-
|
|
2267
|
-
|
|
2268
|
-
)
|
|
2269
|
-
|
|
2270
|
-
|
|
2271
|
-
|
|
2272
|
-
)
|
|
2273
|
-
|
|
2274
|
-
|
|
2275
|
-
|
|
2276
|
-
)
|
|
2277
|
-
|
|
2278
|
-
|
|
2279
|
-
|
|
2280
|
-
)
|
|
2281
|
-
|
|
2282
|
-
|
|
2283
|
-
|
|
2284
|
-
)
|
|
2285
|
-
|
|
2286
|
-
|
|
2287
|
-
|
|
2288
|
-
)
|
|
2289
|
-
|
|
2290
|
-
|
|
2291
|
-
|
|
2292
|
-
|
|
2293
|
-
|
|
2294
|
-
|
|
2295
|
-
}
|
|
2323
|
+
}
|
|
2324
|
+
);
|
|
2325
|
+
const notes = [];
|
|
2326
|
+
wordAddNoteArray(
|
|
2327
|
+
dictMeaning.kanjiFormRestrictions,
|
|
2328
|
+
(restriction) => notes.push(`Meaning restricted to ${restriction}`)
|
|
2329
|
+
);
|
|
2330
|
+
wordAddNoteArray(
|
|
2331
|
+
dictMeaning.readingRestrictions,
|
|
2332
|
+
(restriction) => notes.push(`Meaning restricted to ${restriction}`)
|
|
2333
|
+
);
|
|
2334
|
+
for (const t of translationTypes) notes.push(t);
|
|
2335
|
+
wordAddNoteArray(
|
|
2336
|
+
dictMeaning.partOfSpeech,
|
|
2337
|
+
(pos) => lookupWordNote(pos, notes, word2.tags)
|
|
2338
|
+
);
|
|
2339
|
+
wordAddNoteArray(
|
|
2340
|
+
dictMeaning.fields,
|
|
2341
|
+
(field) => lookupWordNote(field, notes, word2.tags)
|
|
2342
|
+
);
|
|
2343
|
+
wordAddNoteArray(
|
|
2344
|
+
dictMeaning.dialects,
|
|
2345
|
+
(dialect) => lookupWordNote(dialect, notes, word2.tags)
|
|
2346
|
+
);
|
|
2347
|
+
wordAddNoteArray(
|
|
2348
|
+
dictMeaning.antonyms,
|
|
2349
|
+
(antonym) => notes.push(`Antonym: ${antonym}`)
|
|
2350
|
+
);
|
|
2351
|
+
wordAddNoteArray(
|
|
2352
|
+
dictMeaning.references,
|
|
2353
|
+
(reference) => notes.push(`Related: ${reference}`)
|
|
2354
|
+
);
|
|
2355
|
+
wordAddNoteArray(
|
|
2356
|
+
dictMeaning.info,
|
|
2357
|
+
(info) => lookupWordNote(info, notes, word2.tags)
|
|
2358
|
+
);
|
|
2359
|
+
wordAddNoteArray(
|
|
2360
|
+
dictMeaning.misc,
|
|
2361
|
+
(misc) => lookupWordNote(misc, notes, word2.tags)
|
|
2362
|
+
);
|
|
2363
|
+
for (let i = 0; i < notes.length; i++)
|
|
2364
|
+
notes[i] = capitalizeString(notes[i]);
|
|
2365
|
+
word2.translations.push({
|
|
2366
|
+
translation: translations.join("; "),
|
|
2367
|
+
notes
|
|
2368
|
+
});
|
|
2369
|
+
}
|
|
2296
2370
|
if (dictWord.usuallyInKana === true) {
|
|
2297
2371
|
word2.usuallyInKana = true;
|
|
2298
2372
|
word2.tags.push("word::usually_in_kana_for_all_senses");
|
|
2299
2373
|
}
|
|
2300
2374
|
if (kanjiDic && word2.kanjiForms) {
|
|
2301
2375
|
const kanji = [];
|
|
2376
|
+
const seenChars = /* @__PURE__ */ new Set();
|
|
2302
2377
|
for (const kanjiForm of word2.kanjiForms)
|
|
2303
|
-
for (const char of kanjiForm.kanjiForm) {
|
|
2304
|
-
if (
|
|
2305
|
-
|
|
2378
|
+
for (const char of kanjiForm.kanjiForm.split("").filter((c) => regexps.kanji.test(c))) {
|
|
2379
|
+
if (seenChars.has(char)) continue;
|
|
2380
|
+
seenChars.add(char);
|
|
2381
|
+
const kanjiEntry = kanjiDic instanceof Map ? kanjiDic.get(char) : void 0;
|
|
2306
2382
|
const kanjiObj = getKanji(
|
|
2307
|
-
char,
|
|
2308
|
-
Array.isArray(kanjiDic) ? kanjiDic :
|
|
2383
|
+
kanjiEntry != null ? kanjiEntry : char,
|
|
2384
|
+
Array.isArray(kanjiDic) ? kanjiDic : void 0
|
|
2309
2385
|
);
|
|
2310
2386
|
if (kanjiObj)
|
|
2311
2387
|
kanji.push({
|
|
@@ -2315,130 +2391,119 @@ function getWord(word, dict, kanjiDic, examples, definitions, noteTypeName, deck
|
|
|
2315
2391
|
}
|
|
2316
2392
|
if (kanji.length > 0) word2.kanji = kanji;
|
|
2317
2393
|
}
|
|
2318
|
-
if (dictWord.hasPhrases
|
|
2319
|
-
const exampleList = Array.isArray(
|
|
2320
|
-
|
|
2321
|
-
|
|
2322
|
-
|
|
2323
|
-
|
|
2324
|
-
|
|
2325
|
-
|
|
2326
|
-
|
|
2327
|
-
|
|
2328
|
-
|
|
2394
|
+
if (dictWord.hasPhrases !== void 0 && examples) {
|
|
2395
|
+
const exampleList = Array.isArray(examples) ? examples : (_a = examples.get(dictWord.id)) != null ? _a : [];
|
|
2396
|
+
const rkf = getValidForms(
|
|
2397
|
+
dictWord.readings,
|
|
2398
|
+
dictWord.kanjiForms,
|
|
2399
|
+
dictWord.isCommon
|
|
2400
|
+
);
|
|
2401
|
+
const readings = new Set(
|
|
2402
|
+
rkf.readings.map((r) => r.reading)
|
|
2403
|
+
);
|
|
2404
|
+
const kanjiForms = rkf.kanjiForms ? new Set(rkf.kanjiForms.map((kf) => kf.form)) : void 0;
|
|
2405
|
+
let kanjiFormExamples = [];
|
|
2406
|
+
const readingMatchingKanjiFormExamples = [];
|
|
2407
|
+
const readingExamples = [];
|
|
2408
|
+
const readingMatchingKanjiForms = /* @__PURE__ */ new Set();
|
|
2409
|
+
for (const example of exampleList)
|
|
2410
|
+
for (let i = 0; i < example.parts.length; i++) {
|
|
2411
|
+
const part = example.parts[i];
|
|
2412
|
+
const readingAsReadingMatch = part.reading !== void 0 && readings.has(part.reading);
|
|
2413
|
+
const readingAsInflectedFormMatch = part.inflectedForm !== void 0 && readings.has(part.inflectedForm);
|
|
2414
|
+
const referenceIDMatch = part.referenceID === dictWord.id;
|
|
2415
|
+
if (kanjiForms && kanjiForms.has(part.baseForm) || referenceIDMatch) {
|
|
2416
|
+
if (readingAsReadingMatch || readingAsInflectedFormMatch) {
|
|
2417
|
+
readingMatchingKanjiFormExamples.push({
|
|
2418
|
+
ex: example,
|
|
2419
|
+
partIndex: i
|
|
2420
|
+
});
|
|
2421
|
+
readingMatchingKanjiForms.add(part.baseForm);
|
|
2422
|
+
} else
|
|
2423
|
+
kanjiFormExamples.push({
|
|
2424
|
+
ex: example,
|
|
2425
|
+
partIndex: i,
|
|
2426
|
+
form: part.baseForm
|
|
2427
|
+
});
|
|
2428
|
+
break;
|
|
2429
|
+
}
|
|
2430
|
+
const readingAsBaseFormMatch = readings.has(part.baseForm);
|
|
2431
|
+
if ((readingAsBaseFormMatch || referenceIDMatch) && kanjiForms === void 0) {
|
|
2432
|
+
readingExamples.push({ ex: example, partIndex: i });
|
|
2433
|
+
break;
|
|
2434
|
+
}
|
|
2435
|
+
}
|
|
2436
|
+
if (readingMatchingKanjiForms.size > 0)
|
|
2437
|
+
kanjiFormExamples = kanjiFormExamples.filter(
|
|
2438
|
+
(ex) => ex.form && readingMatchingKanjiForms.has(ex.form)
|
|
2329
2439
|
);
|
|
2330
|
-
|
|
2331
|
-
|
|
2332
|
-
|
|
2333
|
-
|
|
2334
|
-
|
|
2335
|
-
|
|
2336
|
-
|
|
2337
|
-
|
|
2338
|
-
|
|
2339
|
-
|
|
2340
|
-
|
|
2341
|
-
|
|
2342
|
-
|
|
2343
|
-
|
|
2344
|
-
|
|
2345
|
-
|
|
2346
|
-
|
|
2347
|
-
|
|
2348
|
-
|
|
2349
|
-
|
|
2350
|
-
const part = example.parts[i];
|
|
2351
|
-
const readingAsReadingMatch = part.reading !== void 0 && readings.has(part.reading);
|
|
2352
|
-
const readingAsInflectedFormMatch = part.inflectedForm !== void 0 && readings.has(part.inflectedForm);
|
|
2353
|
-
const referenceIDMatch = part.referenceID !== void 0 && word2.id !== void 0 && part.referenceID === word2.id;
|
|
2354
|
-
if (kanjiForms && kanjiForms.has(part.baseForm) || referenceIDMatch) {
|
|
2355
|
-
if (readingAsReadingMatch || readingAsInflectedFormMatch) {
|
|
2356
|
-
readingMatchingKanjiFormExamples.push({
|
|
2357
|
-
ex: example,
|
|
2358
|
-
partIndex: i
|
|
2359
|
-
});
|
|
2360
|
-
readingMatchingKanjiForms.add(part.baseForm);
|
|
2361
|
-
} else
|
|
2362
|
-
kanjiFormExamples.push({
|
|
2363
|
-
ex: example,
|
|
2364
|
-
partIndex: i,
|
|
2365
|
-
form: part.baseForm
|
|
2366
|
-
});
|
|
2367
|
-
break;
|
|
2368
|
-
}
|
|
2369
|
-
const readingAsBaseFormMatch = readings.has(part.baseForm);
|
|
2370
|
-
if ((readingAsBaseFormMatch || referenceIDMatch) && kanjiForms === void 0) {
|
|
2371
|
-
readingExamples.push({ ex: example, partIndex: i });
|
|
2372
|
-
break;
|
|
2440
|
+
const includeKanjiFormExamples = word2.kanjiForms !== void 0;
|
|
2441
|
+
let wordExamples = [
|
|
2442
|
+
...includeKanjiFormExamples ? [...readingMatchingKanjiFormExamples, ...kanjiFormExamples] : readingExamples
|
|
2443
|
+
];
|
|
2444
|
+
readingMatchingKanjiForms.clear();
|
|
2445
|
+
const glossSpecificExamples = [];
|
|
2446
|
+
const seenPhrases = /* @__PURE__ */ new Set();
|
|
2447
|
+
for (let i = 0; i < word2.translations.length; i++) {
|
|
2448
|
+
outer: for (const example of wordExamples) {
|
|
2449
|
+
if (seenPhrases.has(example.ex.phrase)) continue;
|
|
2450
|
+
for (let j = 0; j < example.ex.parts.length; j++) {
|
|
2451
|
+
const part = example.ex.parts[j];
|
|
2452
|
+
if (j === example.partIndex && part.glossNumber === i + 1) {
|
|
2453
|
+
example.ex.glossNumber = {
|
|
2454
|
+
wordId: word2.id,
|
|
2455
|
+
glossNumber: i + 1
|
|
2456
|
+
};
|
|
2457
|
+
glossSpecificExamples.push(example);
|
|
2458
|
+
seenPhrases.add(example.ex.phrase);
|
|
2459
|
+
break outer;
|
|
2373
2460
|
}
|
|
2374
2461
|
}
|
|
2375
|
-
|
|
2376
|
-
|
|
2377
|
-
|
|
2462
|
+
}
|
|
2463
|
+
}
|
|
2464
|
+
if (glossSpecificExamples.length === word2.translations.length)
|
|
2465
|
+
wordExamples = glossSpecificExamples;
|
|
2466
|
+
else if (glossSpecificExamples.length > 0) {
|
|
2467
|
+
const exes = glossSpecificExamples;
|
|
2468
|
+
if (exes.length < 5) {
|
|
2469
|
+
wordExamples = wordExamples.filter(
|
|
2470
|
+
(ex) => !seenPhrases.has(ex.ex.phrase)
|
|
2378
2471
|
);
|
|
2379
|
-
|
|
2380
|
-
|
|
2381
|
-
|
|
2382
|
-
|
|
2383
|
-
];
|
|
2384
|
-
readingMatchingKanjiForms.clear();
|
|
2385
|
-
const glossSpecificExamples = [];
|
|
2386
|
-
const seenPhrases = /* @__PURE__ */ new Set();
|
|
2387
|
-
for (let i = 0; i < word2.translations.length; i++) {
|
|
2388
|
-
outer: for (const example of wordExamples) {
|
|
2389
|
-
if (seenPhrases.has(example.ex.phrase)) continue;
|
|
2390
|
-
for (let j = 0; j < example.ex.parts.length; j++) {
|
|
2391
|
-
const part = example.ex.parts[j];
|
|
2392
|
-
if (j === example.partIndex && part.glossNumber === i + 1) {
|
|
2393
|
-
example.ex.glossNumber = {
|
|
2394
|
-
wordId: word2.id,
|
|
2395
|
-
glossNumber: i + 1
|
|
2396
|
-
};
|
|
2397
|
-
glossSpecificExamples.push(example);
|
|
2398
|
-
seenPhrases.add(example.ex.phrase);
|
|
2399
|
-
break outer;
|
|
2400
|
-
}
|
|
2472
|
+
if (wordExamples.length > 0)
|
|
2473
|
+
for (const ex of wordExamples) {
|
|
2474
|
+
exes.push(ex);
|
|
2475
|
+
if (exes.length === 5) break;
|
|
2401
2476
|
}
|
|
2402
|
-
}
|
|
2403
|
-
if (glossSpecificExamples.length === 5) break;
|
|
2404
|
-
}
|
|
2405
|
-
if (glossSpecificExamples.length === 5)
|
|
2406
|
-
wordExamples = [...glossSpecificExamples];
|
|
2407
|
-
else if (glossSpecificExamples.length > 0)
|
|
2408
|
-
wordExamples = [
|
|
2409
|
-
...glossSpecificExamples,
|
|
2410
|
-
...wordExamples.filter(
|
|
2411
|
-
(ex) => !seenPhrases.has(ex.ex.phrase)
|
|
2412
|
-
).slice(0, 5 - glossSpecificExamples.length)
|
|
2413
|
-
];
|
|
2414
|
-
if (wordExamples.length > 0) {
|
|
2415
|
-
word2.phrases = (wordExamples.length > 5 ? wordExamples.slice(0, 5) : wordExamples).map((ex) => {
|
|
2416
|
-
var _a2;
|
|
2417
|
-
return {
|
|
2418
|
-
phrase: (_a2 = ex.ex.furigana) != null ? _a2 : ex.ex.phrase,
|
|
2419
|
-
translation: ex.ex.translation,
|
|
2420
|
-
originalPhrase: ex.ex.phrase,
|
|
2421
|
-
...ex.ex.glossNumber ? { glossNumber: ex.ex.glossNumber } : {}
|
|
2422
|
-
};
|
|
2423
|
-
});
|
|
2424
|
-
word2.tags.push("word::has_phrases");
|
|
2425
|
-
if (glossSpecificExamples.length > 0)
|
|
2426
|
-
word2.tags.push("word::has_meaning-specific_phrases");
|
|
2427
2477
|
}
|
|
2478
|
+
wordExamples = exes;
|
|
2479
|
+
}
|
|
2480
|
+
if (wordExamples.length > 0) {
|
|
2481
|
+
word2.phrases = (wordExamples.length > 5 ? wordExamples.slice(0, 5) : wordExamples).map((ex) => {
|
|
2482
|
+
var _a2;
|
|
2483
|
+
return {
|
|
2484
|
+
phrase: (_a2 = ex.ex.furigana) != null ? _a2 : ex.ex.phrase,
|
|
2485
|
+
translation: ex.ex.translation,
|
|
2486
|
+
originalPhrase: ex.ex.phrase,
|
|
2487
|
+
...ex.ex.glossNumber ? { glossNumber: ex.ex.glossNumber } : {}
|
|
2488
|
+
};
|
|
2489
|
+
});
|
|
2490
|
+
word2.tags.push("word::has_phrases");
|
|
2491
|
+
if (glossSpecificExamples.length > 0)
|
|
2492
|
+
word2.tags.push("word::has_meaning-specific_phrases");
|
|
2428
2493
|
}
|
|
2429
2494
|
}
|
|
2430
2495
|
if (definitions) {
|
|
2431
|
-
const defs = Array.isArray(definitions) ? (
|
|
2496
|
+
const defs = Array.isArray(definitions) ? (_b = definitions.find((wdp) => wdp.wordID === word2.id)) == null ? void 0 : _b.definitions : definitions.get(word2.id);
|
|
2432
2497
|
if (defs) word2.definitions = [...defs];
|
|
2433
2498
|
}
|
|
2434
2499
|
return word2;
|
|
2435
2500
|
} else return void 0;
|
|
2436
2501
|
}
|
|
2437
2502
|
function getKanji(kanji, dict, jmDict, svgList, noteTypeName, deckPath) {
|
|
2438
|
-
var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k
|
|
2503
|
+
var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k;
|
|
2439
2504
|
let dictKanji = void 0;
|
|
2440
2505
|
if (typeof kanji === "string" && dict)
|
|
2441
|
-
dictKanji = dict.find((entry) => entry.kanji === kanji);
|
|
2506
|
+
dictKanji = Array.isArray(dict) ? dict.find((entry) => entry.kanji === kanji) : dict.get(kanji);
|
|
2442
2507
|
else if (typeof kanji === "object") dictKanji = kanji;
|
|
2443
2508
|
if (dictKanji) {
|
|
2444
2509
|
const kanji2 = {
|
|
@@ -2459,23 +2524,20 @@ function getKanji(kanji, dict, jmDict, svgList, noteTypeName, deckPath) {
|
|
|
2459
2524
|
const kunyomi = [];
|
|
2460
2525
|
for (const rm of dictKanji.readingMeaning) {
|
|
2461
2526
|
if (rm.nanori && rm.nanori.length > 0) nanori.push(...rm.nanori);
|
|
2462
|
-
|
|
2463
|
-
|
|
2464
|
-
|
|
2465
|
-
|
|
2466
|
-
|
|
2467
|
-
|
|
2468
|
-
|
|
2469
|
-
|
|
2470
|
-
|
|
2471
|
-
|
|
2472
|
-
(reading) => reading.type === "ja_kun"
|
|
2473
|
-
).map((reading) => reading.reading)
|
|
2474
|
-
);
|
|
2475
|
-
}
|
|
2476
|
-
if (group.meanings && group.meanings.length > 0)
|
|
2477
|
-
meanings.push(...group.meanings);
|
|
2527
|
+
for (const group of rm.groups) {
|
|
2528
|
+
if (group.readings) {
|
|
2529
|
+
onyomi.push(
|
|
2530
|
+
...group.readings.filter((reading) => reading.type === "ja_on").map((reading) => reading.reading)
|
|
2531
|
+
);
|
|
2532
|
+
kunyomi.push(
|
|
2533
|
+
...group.readings.filter(
|
|
2534
|
+
(reading) => reading.type === "ja_kun"
|
|
2535
|
+
).map((reading) => reading.reading)
|
|
2536
|
+
);
|
|
2478
2537
|
}
|
|
2538
|
+
if (group.meanings && group.meanings.length > 0)
|
|
2539
|
+
meanings.push(...group.meanings);
|
|
2540
|
+
}
|
|
2479
2541
|
}
|
|
2480
2542
|
if (meanings.length > 0) kanji2.meanings = meanings;
|
|
2481
2543
|
if (nanori.length > 0) kanji2.nanori = nanori;
|
|
@@ -2490,71 +2552,62 @@ function getKanji(kanji, dict, jmDict, svgList, noteTypeName, deckPath) {
|
|
|
2490
2552
|
(word) => word.kanjiForms && word.kanjiForms[0].form.includes(kanji2.kanji)
|
|
2491
2553
|
);
|
|
2492
2554
|
if (firstKfWords && firstKfWords.length > 0) kanjiWords = firstKfWords;
|
|
2493
|
-
else if (kanjiWords) kanjiWords = kanjiWords;
|
|
2494
2555
|
if (kanjiWords) {
|
|
2495
2556
|
const validWords = [];
|
|
2496
2557
|
for (const word of kanjiWords) {
|
|
2497
2558
|
const kanjiForm = (_a = firstKfWords && firstKfWords.length > 0 ? word.kanjiForms[0] : word.kanjiForms.find(
|
|
2498
2559
|
(kf) => kf.form.includes(kanji2.kanji)
|
|
2499
2560
|
)) == null ? void 0 : _a.form;
|
|
2500
|
-
if (
|
|
2501
|
-
|
|
2502
|
-
|
|
2503
|
-
|
|
2504
|
-
|
|
2505
|
-
|
|
2506
|
-
|
|
2507
|
-
|
|
2508
|
-
|
|
2509
|
-
|
|
2510
|
-
|
|
2511
|
-
|
|
2512
|
-
|
|
2513
|
-
{
|
|
2514
|
-
|
|
2515
|
-
|
|
2516
|
-
|
|
2517
|
-
|
|
2561
|
+
if (kanjiForm) {
|
|
2562
|
+
const reading = (_b = firstKfWords && firstKfWords.length > 0 ? word.readings[0] : word.readings.find(
|
|
2563
|
+
(reading2) => reading2.kanjiFormRestrictions && reading2.kanjiFormRestrictions.includes(kanjiForm)
|
|
2564
|
+
)) == null ? void 0 : _b.reading;
|
|
2565
|
+
if (!reading) continue;
|
|
2566
|
+
const translation = (_c = firstKfWords && firstKfWords.length > 0 ? word.meanings[0] : word.meanings.find(
|
|
2567
|
+
(m) => m.translations && m.kanjiFormRestrictions && m.kanjiFormRestrictions.includes(kanjiForm)
|
|
2568
|
+
)) == null ? void 0 : _c.translations.map(
|
|
2569
|
+
(t) => typeof t === "string" ? t : t.translation
|
|
2570
|
+
)[0];
|
|
2571
|
+
if (!translation) continue;
|
|
2572
|
+
validWords.push({
|
|
2573
|
+
kanjiForms: [{ kanjiForm }],
|
|
2574
|
+
readings: [{ reading }],
|
|
2575
|
+
translations: [
|
|
2576
|
+
{
|
|
2577
|
+
translation
|
|
2578
|
+
}
|
|
2579
|
+
]
|
|
2580
|
+
});
|
|
2581
|
+
}
|
|
2518
2582
|
if (validWords.length === 3) break;
|
|
2519
2583
|
}
|
|
2520
2584
|
if (validWords.length > 0) kanji2.words = validWords;
|
|
2521
2585
|
}
|
|
2522
2586
|
}
|
|
2523
2587
|
if (svgList) {
|
|
2524
|
-
|
|
2525
|
-
|
|
2526
|
-
|
|
2527
|
-
|
|
2528
|
-
|
|
2529
|
-
|
|
2530
|
-
|
|
2531
|
-
const svg = svgList.find(
|
|
2532
|
-
(svgFile) => fileNames.includes(svgFile.toLowerCase())
|
|
2533
|
-
);
|
|
2534
|
-
if (svg) kanji2.svg = svg;
|
|
2535
|
-
}
|
|
2588
|
+
const codePoint = kanji2.kanji.codePointAt(0).toString(16).toLowerCase();
|
|
2589
|
+
const svg = Array.isArray(svgList) ? svgList.find(
|
|
2590
|
+
(svgFile) => [`${codePoint}.svg`, `0${codePoint}.svg`].includes(
|
|
2591
|
+
svgFile.toLowerCase()
|
|
2592
|
+
)
|
|
2593
|
+
) : svgList.get(kanji2.kanji);
|
|
2594
|
+
if (svg) kanji2.svg = svg;
|
|
2536
2595
|
}
|
|
2537
2596
|
if (kanji2.tags && dictKanji.isKokuji === true) {
|
|
2538
2597
|
kanji2.kokuji = true;
|
|
2539
2598
|
kanji2.tags.push("kanji::kokuji");
|
|
2540
|
-
if (kanji2.meanings)
|
|
2541
|
-
kanji2.meanings.splice(
|
|
2542
|
-
kanji2.meanings.findIndex((meaning) => meaning === "(kokuji)"),
|
|
2543
|
-
1
|
|
2544
|
-
);
|
|
2545
2599
|
}
|
|
2546
|
-
|
|
2547
|
-
kanji2.
|
|
2548
|
-
|
|
2549
|
-
|
|
2550
|
-
|
|
2551
|
-
|
|
2552
|
-
|
|
2553
|
-
|
|
2554
|
-
|
|
2555
|
-
|
|
2556
|
-
|
|
2557
|
-
);
|
|
2600
|
+
kanji2.tags.push(
|
|
2601
|
+
`kanji::strokes::${kanji2.strokes}`,
|
|
2602
|
+
...kanji2.frequency ? [`kanji::frequency::${kanji2.frequency}`] : [],
|
|
2603
|
+
...kanji2.grade ? [`kanji::grade::${kanji2.grade}`] : [],
|
|
2604
|
+
...kanji2.jlpt ? [`kanji::pre-2010_jlpt::${kanji2.jlpt.toLowerCase()}`] : [],
|
|
2605
|
+
`kanji::onyomi::${(_e = (_d = kanji2.onyomi) == null ? void 0 : _d.length) != null ? _e : 0}`,
|
|
2606
|
+
`kanji::kunyomi::${(_g = (_f = kanji2.kunyomi) == null ? void 0 : _f.length) != null ? _g : 0}`,
|
|
2607
|
+
`kanji::nanori::${(_i = (_h = kanji2.nanori) == null ? void 0 : _h.length) != null ? _i : 0}`,
|
|
2608
|
+
`kanji::words::${(_k = (_j = kanji2.words) == null ? void 0 : _j.length) != null ? _k : 0}`,
|
|
2609
|
+
...kanji2.svg ? ["kanji::has_svg"] : []
|
|
2610
|
+
);
|
|
2558
2611
|
return kanji2;
|
|
2559
2612
|
} else return void 0;
|
|
2560
2613
|
}
|
|
@@ -2581,22 +2634,20 @@ function getKanjiExtended(info, kanji, dict, useWords, jmDict, svgList, noteType
|
|
|
2581
2634
|
kanjiObj.words = info.words;
|
|
2582
2635
|
usedInfo = true;
|
|
2583
2636
|
}
|
|
2584
|
-
if (kanjiObj.
|
|
2585
|
-
|
|
2586
|
-
|
|
2587
|
-
|
|
2588
|
-
|
|
2589
|
-
|
|
2590
|
-
|
|
2591
|
-
|
|
2592
|
-
|
|
2593
|
-
|
|
2594
|
-
|
|
2595
|
-
|
|
2596
|
-
|
|
2597
|
-
|
|
2598
|
-
});
|
|
2599
|
-
}
|
|
2637
|
+
if (kanjiObj.components)
|
|
2638
|
+
kanjiObj.tags.push(`kanji::components::${kanjiObj.components.length}`);
|
|
2639
|
+
if (kanjiObj.mnemonic && kanjiObj.mnemonic.length > 0)
|
|
2640
|
+
kanjiObj.tags.push("kanji::has_mnemonic");
|
|
2641
|
+
if (useWords === true && kanjiObj.words && info.words)
|
|
2642
|
+
kanjiObj.tags.forEach((tag, index) => {
|
|
2643
|
+
if (tag.startsWith("kanji::words::") && kanjiObj.words) {
|
|
2644
|
+
kanjiObj.tags.splice(
|
|
2645
|
+
index,
|
|
2646
|
+
1,
|
|
2647
|
+
`kanji::words::${kanjiObj.words.length}`
|
|
2648
|
+
);
|
|
2649
|
+
}
|
|
2650
|
+
});
|
|
2600
2651
|
if (sourceURL && info.externalInfo === true && usedInfo)
|
|
2601
2652
|
kanjiObj.source = sourceURL;
|
|
2602
2653
|
return kanjiObj;
|
|
@@ -2890,19 +2941,21 @@ ${ankiNotes}`;
|
|
|
2890
2941
|
convertRadkFile,
|
|
2891
2942
|
convertTanakaCorpus,
|
|
2892
2943
|
convertTanakaCorpusWithFurigana,
|
|
2944
|
+
createEntryMaps,
|
|
2893
2945
|
generateAnkiNote,
|
|
2894
2946
|
generateAnkiNotesFile,
|
|
2895
2947
|
getKanji,
|
|
2896
2948
|
getKanjiExtended,
|
|
2949
|
+
getValidForms,
|
|
2897
2950
|
getWord,
|
|
2898
2951
|
getWordDefinitions,
|
|
2899
2952
|
getWordDefinitionsWithFurigana,
|
|
2900
2953
|
isGrammar,
|
|
2901
2954
|
isKana,
|
|
2902
2955
|
isKanji,
|
|
2956
|
+
isObjectArray,
|
|
2903
2957
|
isRadical,
|
|
2904
2958
|
isStringArray,
|
|
2905
|
-
isValidArray,
|
|
2906
2959
|
isValidArrayWithFirstElement,
|
|
2907
2960
|
isWord,
|
|
2908
2961
|
notSearchedForms,
|