henkan 2.0.4 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs.js +789 -736
- package/dist/index.cjs.js.map +3 -3
- package/dist/index.mjs +781 -730
- package/dist/index.mjs.map +3 -3
- package/dist/types/constants.d.ts.map +1 -1
- package/dist/types/types.d.ts +93 -19
- package/dist/types/types.d.ts.map +1 -1
- package/dist/types/utils.d.ts +55 -28
- package/dist/types/utils.d.ts.map +1 -1
- package/docs/api/README.md +12 -1
- package/docs/api/functions/capitalizeString.md +1 -1
- package/docs/api/functions/convertJMdict.md +1 -1
- package/docs/api/functions/convertJawiktionaryAsync.md +1 -1
- package/docs/api/functions/convertJawiktionarySync.md +1 -1
- package/docs/api/functions/convertKanjiDic.md +1 -1
- package/docs/api/functions/convertKradFile.md +1 -1
- package/docs/api/functions/convertRadkFile.md +1 -1
- package/docs/api/functions/convertTanakaCorpus.md +1 -1
- package/docs/api/functions/convertTanakaCorpusWithFurigana.md +1 -1
- package/docs/api/functions/createEntryMaps.md +59 -0
- package/docs/api/functions/generateAnkiNote.md +1 -1
- package/docs/api/functions/generateAnkiNotesFile.md +1 -1
- package/docs/api/functions/getKanji.md +10 -10
- package/docs/api/functions/getKanjiExtended.md +8 -8
- package/docs/api/functions/getValidForms.md +39 -0
- package/docs/api/functions/getWord.md +11 -11
- package/docs/api/functions/getWordDefinitions.md +1 -1
- package/docs/api/functions/getWordDefinitionsWithFurigana.md +1 -1
- package/docs/api/functions/isObjectArray.md +27 -0
- package/docs/api/functions/isStringArray.md +2 -2
- package/docs/api/functions/isValidArrayWithFirstElement.md +2 -2
- package/docs/api/functions/shuffleArray.md +1 -1
- package/docs/api/interfaces/DefaultNoteInfo.md +4 -4
- package/docs/api/interfaces/Definition.md +4 -4
- package/docs/api/interfaces/DictKanji.md +5 -5
- package/docs/api/interfaces/DictKanjiForm.md +4 -4
- package/docs/api/interfaces/DictKanjiMisc.md +5 -5
- package/docs/api/interfaces/DictKanjiReading.md +3 -3
- package/docs/api/interfaces/DictKanjiReadingMeaning.md +3 -3
- package/docs/api/interfaces/DictKanjiReadingMeaningGroup.md +3 -3
- package/docs/api/interfaces/DictKanjiWithRadicals.md +3 -3
- package/docs/api/interfaces/DictMeaning.md +15 -15
- package/docs/api/interfaces/DictRadical.md +4 -4
- package/docs/api/interfaces/DictReading.md +5 -5
- package/docs/api/interfaces/DictWord.md +9 -9
- package/docs/api/interfaces/EntryMaps.md +83 -0
- package/docs/api/interfaces/ExamplePart.md +8 -8
- package/docs/api/interfaces/GlossSpecificNumber.md +4 -4
- package/docs/api/interfaces/Grammar.md +15 -15
- package/docs/api/interfaces/GrammarMeaning.md +3 -3
- package/docs/api/interfaces/JaWiktionaryEntry.md +8 -20
- package/docs/api/interfaces/Kana.md +11 -11
- package/docs/api/interfaces/Kanji.md +24 -24
- package/docs/api/interfaces/KanjiComponent.md +3 -3
- package/docs/api/interfaces/KanjiForm.md +4 -4
- package/docs/api/interfaces/NoteAndTag.md +3 -3
- package/docs/api/interfaces/NoteHeaderKeys.md +7 -7
- package/docs/api/interfaces/Phrase.md +5 -5
- package/docs/api/interfaces/Radical.md +16 -16
- package/docs/api/interfaces/Reading.md +5 -5
- package/docs/api/interfaces/ReadingsKanjiFormsPair.md +31 -0
- package/docs/api/interfaces/ResultEntry.md +7 -7
- package/docs/api/interfaces/TanakaExample.md +7 -7
- package/docs/api/interfaces/Translation.md +3 -3
- package/docs/api/interfaces/UsefulRegExps.md +8 -8
- package/docs/api/interfaces/Word.md +15 -15
- package/docs/api/interfaces/WordDefinitionPair.md +4 -4
- package/docs/api/type-aliases/Dict.md +1 -1
- package/docs/api/type-aliases/DictTranslation.md +1 -1
- package/docs/api/type-aliases/EntryType.md +1 -1
- package/docs/api/type-aliases/KanjiEntryMap.md +13 -0
- package/docs/api/type-aliases/KanjiSVGMap.md +13 -0
- package/docs/api/type-aliases/KanjiWordsMap.md +13 -0
- package/docs/api/type-aliases/Result.md +1 -1
- package/docs/api/type-aliases/StringNumber.md +13 -0
- package/docs/api/type-aliases/WordDefinitionsMap.md +13 -0
- package/docs/api/type-aliases/WordExamplesMap.md +13 -0
- package/docs/api/type-aliases/WordIDEntryMap.md +13 -0
- package/package.json +4 -4
- package/docs/api/functions/isValidArray.md +0 -27
package/dist/index.mjs
CHANGED
|
@@ -55,6 +55,9 @@ var noteMap = /* @__PURE__ */ new Map([
|
|
|
55
55
|
["tsugaru-ben", ["dialect::tsugaru-ben", "Dialect: Tsugaru-ben"]],
|
|
56
56
|
["aichi dialect", ["dialect::aichi", "Dialect: Aichi"]],
|
|
57
57
|
["tochigi dialect", ["dialect::tochigi", "Dialect: Tochigi"]],
|
|
58
|
+
["lit", ["literal_meaning", "Literal meaning"]],
|
|
59
|
+
["expl", ["explanation", "Explanation"]],
|
|
60
|
+
["tm", ["trademark", "Trademark"]],
|
|
58
61
|
["adjective (keiyoushi)", ["adjective::i", "\u3044-adjective", "\u5F62\u5BB9\u8A5E"]],
|
|
59
62
|
["'taru' adjective", ["adjective::taru", "\u305F\u308B-adjective", "\u5F62\u5BB9\u52D5\u8A5E"]],
|
|
60
63
|
[
|
|
@@ -1189,14 +1192,14 @@ var KuromojiAnalyzer = __require("kuroshiro-analyzer-kuromoji");
|
|
|
1189
1192
|
function capitalizeString(value) {
|
|
1190
1193
|
return value.charAt(0).toUpperCase() + value.slice(1);
|
|
1191
1194
|
}
|
|
1192
|
-
function isValidArray(arg) {
|
|
1193
|
-
return arg !== null && arg !== void 0 && Array.isArray(arg);
|
|
1194
|
-
}
|
|
1195
1195
|
function isValidArrayWithFirstElement(arg) {
|
|
1196
|
-
return
|
|
1196
|
+
return Array.isArray(arg) && arg.length > 0;
|
|
1197
1197
|
}
|
|
1198
1198
|
function isStringArray(arg) {
|
|
1199
|
-
return
|
|
1199
|
+
return isValidArrayWithFirstElement(arg) && arg.every((element) => typeof element === "string");
|
|
1200
|
+
}
|
|
1201
|
+
function isObjectArray(arg) {
|
|
1202
|
+
return isValidArrayWithFirstElement(arg) && arg.every((element) => typeof element === "object");
|
|
1200
1203
|
}
|
|
1201
1204
|
function shuffleArray(arr) {
|
|
1202
1205
|
const a = arr.slice();
|
|
@@ -1208,6 +1211,37 @@ function shuffleArray(arr) {
|
|
|
1208
1211
|
}
|
|
1209
1212
|
return a;
|
|
1210
1213
|
}
|
|
1214
|
+
function getValidForms(readings, kanjiForms, wordIsCommon) {
|
|
1215
|
+
const kanjiFormRestrictions = /* @__PURE__ */ new Set();
|
|
1216
|
+
const validReadings = readings.filter(
|
|
1217
|
+
(reading) => {
|
|
1218
|
+
if (reading.notes === void 0 || !reading.notes.some((note) => notSearchedForms.has(note))) {
|
|
1219
|
+
if (reading.kanjiFormRestrictions) {
|
|
1220
|
+
for (const kfr of reading.kanjiFormRestrictions)
|
|
1221
|
+
kanjiFormRestrictions.add(kfr);
|
|
1222
|
+
return true;
|
|
1223
|
+
}
|
|
1224
|
+
if (wordIsCommon === void 0 || reading.commonness !== void 0)
|
|
1225
|
+
return true;
|
|
1226
|
+
}
|
|
1227
|
+
return false;
|
|
1228
|
+
}
|
|
1229
|
+
);
|
|
1230
|
+
const existValidKf = kanjiForms ? kanjiForms.some(
|
|
1231
|
+
(kf) => (kf.notes === void 0 || !kf.notes.some((note) => notSearchedForms.has(note))) && (wordIsCommon === void 0 || kf.commonness !== void 0) || kanjiFormRestrictions.has(kf.form)
|
|
1232
|
+
) : void 0;
|
|
1233
|
+
const validKanjiForms = kanjiForms ? kanjiForms.filter((kanjiForm) => {
|
|
1234
|
+
if (existValidKf === true)
|
|
1235
|
+
return (kanjiForm.notes === void 0 || !kanjiForm.notes.some(
|
|
1236
|
+
(note) => notSearchedForms.has(note)
|
|
1237
|
+
)) && (wordIsCommon === void 0 || kanjiForm.commonness !== void 0) || kanjiFormRestrictions.has(kanjiForm.form);
|
|
1238
|
+
else return true;
|
|
1239
|
+
}) : void 0;
|
|
1240
|
+
return {
|
|
1241
|
+
readings: validReadings,
|
|
1242
|
+
...validKanjiForms ? { kanjiForms: validKanjiForms } : {}
|
|
1243
|
+
};
|
|
1244
|
+
}
|
|
1211
1245
|
function convertJMdict(xmlString, examples) {
|
|
1212
1246
|
const dictParsed = libxml.parseXml(xmlString, {
|
|
1213
1247
|
dtdvalid: true,
|
|
@@ -1216,144 +1250,116 @@ function convertJMdict(xmlString, examples) {
|
|
|
1216
1250
|
recover: false
|
|
1217
1251
|
});
|
|
1218
1252
|
const dict = [];
|
|
1219
|
-
xml.parseString(dictParsed, (
|
|
1220
|
-
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1253
|
+
xml.parseString(dictParsed, (_err, result) => {
|
|
1254
|
+
const tanakaParts = examples && examples.length > 0 ? new Set(
|
|
1255
|
+
examples.flatMap(
|
|
1256
|
+
(example) => example.parts.flatMap((part) => [
|
|
1257
|
+
part.baseForm,
|
|
1258
|
+
...part.reading ? [part.reading] : [],
|
|
1259
|
+
...part.inflectedForm ? [part.inflectedForm] : [],
|
|
1260
|
+
...part.referenceID ? [part.referenceID] : []
|
|
1261
|
+
])
|
|
1262
|
+
)
|
|
1263
|
+
) : void 0;
|
|
1264
|
+
for (const entry of result.JMdict.entry) {
|
|
1265
|
+
const entryObj = {
|
|
1266
|
+
id: entry.ent_seq[0],
|
|
1267
|
+
readings: [],
|
|
1268
|
+
meanings: []
|
|
1269
|
+
};
|
|
1270
|
+
const kanjiForms = entry.k_ele;
|
|
1271
|
+
const readings = entry.r_ele;
|
|
1272
|
+
const meanings = entry.sense;
|
|
1273
|
+
if (isObjectArray(kanjiForms)) {
|
|
1274
|
+
entryObj.kanjiForms = [];
|
|
1275
|
+
for (const kanjiForm of kanjiForms) {
|
|
1276
|
+
const form = {
|
|
1277
|
+
form: kanjiForm.keb[0]
|
|
1236
1278
|
};
|
|
1237
|
-
if (
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
if (meaningObj.misc && meaningObj.misc.includes(
|
|
1309
|
-
"word usually written using kana alone"
|
|
1310
|
-
))
|
|
1311
|
-
usuallyInKanaMeanings++;
|
|
1312
|
-
}
|
|
1313
|
-
if (isStringArray(meaning.dial))
|
|
1314
|
-
meaningObj.dialects = meaning.dial;
|
|
1315
|
-
if (meaningObj.partOfSpeech && meaningObj.partOfSpeech.length > 0)
|
|
1316
|
-
entryObj.meanings.push(meaningObj);
|
|
1317
|
-
}
|
|
1318
|
-
if (entryObj.meanings.length === usuallyInKanaMeanings)
|
|
1319
|
-
entryObj.usuallyInKana = true;
|
|
1320
|
-
}
|
|
1321
|
-
if (examples) {
|
|
1322
|
-
const readings2 = new Set(
|
|
1323
|
-
entryObj.readings.filter(
|
|
1324
|
-
(reading) => reading.notes === void 0 || !reading.notes.some(
|
|
1325
|
-
(note) => notSearchedForms.has(note)
|
|
1326
|
-
) || reading.commonness
|
|
1327
|
-
).map((reading) => reading.reading)
|
|
1328
|
-
);
|
|
1329
|
-
const kanjiForms2 = entryObj.kanjiForms ? new Set(
|
|
1330
|
-
entryObj.kanjiForms.map(
|
|
1331
|
-
(kanjiForm) => kanjiForm.form
|
|
1332
|
-
)
|
|
1333
|
-
) : void 0;
|
|
1334
|
-
let existsExample = false;
|
|
1335
|
-
if (kanjiForms2 && kanjiForms2.size > 0 && tanakaParts) {
|
|
1336
|
-
for (const kf of kanjiForms2)
|
|
1337
|
-
if (tanakaParts.has(kf)) {
|
|
1338
|
-
existsExample = true;
|
|
1339
|
-
break;
|
|
1340
|
-
}
|
|
1341
|
-
}
|
|
1342
|
-
if (!existsExample && readings2.size > 0 && tanakaParts) {
|
|
1343
|
-
for (const r of readings2)
|
|
1344
|
-
if (tanakaParts.has(r)) {
|
|
1345
|
-
existsExample = true;
|
|
1346
|
-
break;
|
|
1347
|
-
}
|
|
1279
|
+
if (isStringArray(kanjiForm.ke_inf)) form.notes = kanjiForm.ke_inf;
|
|
1280
|
+
if (isStringArray(kanjiForm.ke_pri)) {
|
|
1281
|
+
form.commonness = kanjiForm.ke_pri;
|
|
1282
|
+
if (entryObj.isCommon === void 0) entryObj.isCommon = true;
|
|
1283
|
+
}
|
|
1284
|
+
entryObj.kanjiForms.push(form);
|
|
1285
|
+
}
|
|
1286
|
+
}
|
|
1287
|
+
for (const reading of readings) {
|
|
1288
|
+
const readingObj = {
|
|
1289
|
+
reading: reading.reb[0]
|
|
1290
|
+
};
|
|
1291
|
+
if (isStringArray(reading.re_inf)) readingObj.notes = reading.re_inf;
|
|
1292
|
+
if (isStringArray(reading.re_restr))
|
|
1293
|
+
readingObj.kanjiFormRestrictions = reading.re_restr;
|
|
1294
|
+
if (isStringArray(reading.re_pri)) {
|
|
1295
|
+
readingObj.commonness = reading.re_pri;
|
|
1296
|
+
if (entryObj.isCommon === void 0) entryObj.isCommon = true;
|
|
1297
|
+
}
|
|
1298
|
+
entryObj.readings.push(readingObj);
|
|
1299
|
+
}
|
|
1300
|
+
let usuallyInKanaMeanings = 0;
|
|
1301
|
+
for (const meaning of meanings) {
|
|
1302
|
+
const meaningObj = { partOfSpeech: [], translations: [] };
|
|
1303
|
+
meaningObj.partOfSpeech = meaning.pos;
|
|
1304
|
+
meaningObj.translations = [];
|
|
1305
|
+
for (const gloss of meaning.gloss)
|
|
1306
|
+
if (typeof gloss === "string") meaningObj.translations.push(gloss);
|
|
1307
|
+
else if (typeof gloss === "object" && gloss._ && typeof gloss._ === "string" && gloss.$ && typeof gloss.$ === "object" && gloss.$.g_type && (gloss.$.g_type === "lit" || gloss.$.g_type === "expl" || gloss.$.g_type === "tm"))
|
|
1308
|
+
meaningObj.translations.push({
|
|
1309
|
+
translation: gloss._,
|
|
1310
|
+
type: gloss.$.g_type
|
|
1311
|
+
});
|
|
1312
|
+
if (isStringArray(meaning.xref)) meaningObj.references = meaning.xref;
|
|
1313
|
+
if (isStringArray(meaning.stagk))
|
|
1314
|
+
meaningObj.kanjiFormRestrictions = meaning.stagk;
|
|
1315
|
+
if (isStringArray(meaning.stagr))
|
|
1316
|
+
meaningObj.readingRestrictions = meaning.stagr;
|
|
1317
|
+
if (isStringArray(meaning.ant)) meaningObj.antonyms = meaning.ant;
|
|
1318
|
+
if (isStringArray(meaning.field)) meaningObj.fields = meaning.field;
|
|
1319
|
+
if (isStringArray(meaning.s_inf)) meaningObj.info = meaning.s_inf;
|
|
1320
|
+
if (isStringArray(meaning.misc)) {
|
|
1321
|
+
meaningObj.misc = meaning.misc;
|
|
1322
|
+
if (meaningObj.misc && meaningObj.misc.includes("word usually written using kana alone"))
|
|
1323
|
+
usuallyInKanaMeanings++;
|
|
1324
|
+
}
|
|
1325
|
+
if (isStringArray(meaning.dial)) meaningObj.dialects = meaning.dial;
|
|
1326
|
+
entryObj.meanings.push(meaningObj);
|
|
1327
|
+
}
|
|
1328
|
+
if (entryObj.meanings.length === usuallyInKanaMeanings)
|
|
1329
|
+
entryObj.usuallyInKana = true;
|
|
1330
|
+
if (examples) {
|
|
1331
|
+
let existsExample = false;
|
|
1332
|
+
if (tanakaParts && tanakaParts.has(entryObj.id)) existsExample = true;
|
|
1333
|
+
if (!existsExample) {
|
|
1334
|
+
const rkf = getValidForms(
|
|
1335
|
+
entryObj.readings,
|
|
1336
|
+
entryObj.kanjiForms,
|
|
1337
|
+
entryObj.isCommon
|
|
1338
|
+
);
|
|
1339
|
+
const readings2 = new Set(
|
|
1340
|
+
rkf.readings.map((r) => r.reading)
|
|
1341
|
+
);
|
|
1342
|
+
const kanjiForms2 = rkf.kanjiForms ? new Set(
|
|
1343
|
+
rkf.kanjiForms.map((kf) => kf.form)
|
|
1344
|
+
) : void 0;
|
|
1345
|
+
if (kanjiForms2 && kanjiForms2.size > 0 && tanakaParts) {
|
|
1346
|
+
for (const kf of kanjiForms2)
|
|
1347
|
+
if (tanakaParts.has(kf)) {
|
|
1348
|
+
existsExample = true;
|
|
1349
|
+
break;
|
|
1348
1350
|
}
|
|
1349
|
-
|
|
1351
|
+
}
|
|
1352
|
+
if (entryObj.kanjiForms === void 0 && readings2.size > 0 && tanakaParts) {
|
|
1353
|
+
for (const r of readings2)
|
|
1354
|
+
if (tanakaParts.has(r)) {
|
|
1350
1355
|
existsExample = true;
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
if (entryObj.id.length > 0 && entryObj.readings.length > 0 && entryObj.meanings.length > 0)
|
|
1354
|
-
dict.push(entryObj);
|
|
1356
|
+
break;
|
|
1357
|
+
}
|
|
1355
1358
|
}
|
|
1356
1359
|
}
|
|
1360
|
+
if (existsExample) entryObj.hasPhrases = true;
|
|
1361
|
+
}
|
|
1362
|
+
dict.push(entryObj);
|
|
1357
1363
|
}
|
|
1358
1364
|
});
|
|
1359
1365
|
return dict;
|
|
@@ -1366,75 +1372,64 @@ function convertKanjiDic(xmlString) {
|
|
|
1366
1372
|
recover: false
|
|
1367
1373
|
});
|
|
1368
1374
|
const dict = [];
|
|
1369
|
-
xml.parseString(dictParsed, (
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
|
|
1382
|
-
|
|
1383
|
-
|
|
1384
|
-
|
|
1385
|
-
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
const
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
});
|
|
1405
|
-
}
|
|
1406
|
-
if (isValidArray(group.meaning)) {
|
|
1407
|
-
for (const meaning of group.meaning)
|
|
1408
|
-
if (typeof meaning === "string") {
|
|
1409
|
-
if (kanjiObj.isKokuji === void 0 && meaning === "(kokuji)")
|
|
1410
|
-
kanjiObj.isKokuji = true;
|
|
1411
|
-
groupObj.meanings.push(meaning);
|
|
1412
|
-
}
|
|
1413
|
-
}
|
|
1414
|
-
if (groupObj.readings.length > 0 || groupObj.meanings.length > 0) {
|
|
1415
|
-
if (groupObj.readings.length === 0)
|
|
1416
|
-
delete groupObj.readings;
|
|
1417
|
-
if (groupObj.meanings.length === 0)
|
|
1418
|
-
delete groupObj.meanings;
|
|
1419
|
-
rmObj.groups.push(groupObj);
|
|
1375
|
+
xml.parseString(dictParsed, (_err, result) => {
|
|
1376
|
+
for (const entry of result.kanjidic2.character) {
|
|
1377
|
+
const kanjiObj = {
|
|
1378
|
+
kanji: entry.literal[0],
|
|
1379
|
+
readingMeaning: []
|
|
1380
|
+
};
|
|
1381
|
+
if (typeof kanjiObj.kanji === "string" && kanjiObj.kanji.length === 1) {
|
|
1382
|
+
const misc = entry.misc[0];
|
|
1383
|
+
kanjiObj.misc = { strokeNumber: misc.stroke_count[0] };
|
|
1384
|
+
if (isStringArray(misc.grade)) kanjiObj.misc.grade = misc.grade[0];
|
|
1385
|
+
if (isStringArray(misc.freq)) kanjiObj.misc.frequency = misc.freq[0];
|
|
1386
|
+
if (isStringArray(misc.jlpt) && ["5", "4", "3", "2", "1"].includes(misc.jlpt[0]))
|
|
1387
|
+
kanjiObj.misc.jlpt = `N${misc.jlpt[0]}`;
|
|
1388
|
+
if (isObjectArray(entry.reading_meaning))
|
|
1389
|
+
for (const rm of entry.reading_meaning) {
|
|
1390
|
+
const rmObj = { groups: [] };
|
|
1391
|
+
for (const group of rm.rmgroup) {
|
|
1392
|
+
const groupObj = {
|
|
1393
|
+
readings: [],
|
|
1394
|
+
meanings: []
|
|
1395
|
+
};
|
|
1396
|
+
if (isObjectArray(group.reading)) {
|
|
1397
|
+
for (const reading of group.reading)
|
|
1398
|
+
if (reading._ && typeof reading._ === "string" && reading.$ && typeof reading.$ === "object" && reading.$.r_type && (reading.$.r_type === "ja_on" || reading.$.r_type === "ja_kun"))
|
|
1399
|
+
groupObj.readings.push({
|
|
1400
|
+
reading: reading._,
|
|
1401
|
+
type: reading.$.r_type
|
|
1402
|
+
});
|
|
1403
|
+
}
|
|
1404
|
+
if (Array.isArray(group.meaning)) {
|
|
1405
|
+
for (const meaning of group.meaning)
|
|
1406
|
+
if (typeof meaning === "string") {
|
|
1407
|
+
if (kanjiObj.isKokuji === void 0 && meaning === "(kokuji)") {
|
|
1408
|
+
kanjiObj.isKokuji = true;
|
|
1409
|
+
continue;
|
|
1420
1410
|
}
|
|
1411
|
+
groupObj.meanings.push(meaning);
|
|
1421
1412
|
}
|
|
1422
|
-
if (isStringArray(rm.nanori) && rm.nanori.length > 0)
|
|
1423
|
-
rmObj.nanori = rm.nanori;
|
|
1424
|
-
if (rmObj.groups.length > 0 || rmObj.nanori) {
|
|
1425
|
-
if (kanjiObj.readingMeaning === void 0)
|
|
1426
|
-
kanjiObj.readingMeaning = [];
|
|
1427
|
-
kanjiObj.readingMeaning.push(rmObj);
|
|
1428
|
-
}
|
|
1429
1413
|
}
|
|
1430
|
-
|
|
1414
|
+
if (groupObj.readings.length > 0 || groupObj.meanings.length > 0) {
|
|
1415
|
+
if (groupObj.readings.length === 0) delete groupObj.readings;
|
|
1416
|
+
if (groupObj.meanings.length === 0) delete groupObj.meanings;
|
|
1417
|
+
rmObj.groups.push(groupObj);
|
|
1418
|
+
}
|
|
1419
|
+
}
|
|
1420
|
+
if (isStringArray(rm.nanori) && rm.nanori.length > 0)
|
|
1421
|
+
rmObj.nanori = rm.nanori;
|
|
1422
|
+
if (rmObj.groups.length > 0 || rmObj.nanori)
|
|
1423
|
+
kanjiObj.readingMeaning.push(rmObj);
|
|
1431
1424
|
}
|
|
1432
|
-
|
|
1425
|
+
dict.push(kanjiObj);
|
|
1426
|
+
}
|
|
1433
1427
|
}
|
|
1434
1428
|
});
|
|
1435
1429
|
return dict;
|
|
1436
1430
|
}
|
|
1437
1431
|
function convertTanakaCorpus(tanakaString) {
|
|
1432
|
+
var _a;
|
|
1438
1433
|
const tanakaArray = [];
|
|
1439
1434
|
const tanakaParsed = tanakaString.split("\n");
|
|
1440
1435
|
for (let i = 0; i <= tanakaParsed.length; i += 2) {
|
|
@@ -1444,46 +1439,38 @@ function convertTanakaCorpus(tanakaString) {
|
|
|
1444
1439
|
a = a.replace("A: ", "");
|
|
1445
1440
|
b = b.replace("B: ", "");
|
|
1446
1441
|
const idMatch = regexps.tanakaID.exec(a);
|
|
1447
|
-
|
|
1448
|
-
|
|
1449
|
-
|
|
1450
|
-
|
|
1451
|
-
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
|
|
1463
|
-
|
|
1464
|
-
|
|
1465
|
-
|
|
1466
|
-
|
|
1467
|
-
|
|
1468
|
-
if (inflectedForm) examplePart.inflectedForm = inflectedForm;
|
|
1469
|
-
if (baseForm.endsWith("~")) {
|
|
1470
|
-
examplePart.edited = true;
|
|
1471
|
-
examplePart.baseForm = examplePart.baseForm.replace("~", "");
|
|
1472
|
-
}
|
|
1473
|
-
bParts.push(examplePart);
|
|
1474
|
-
}
|
|
1475
|
-
}
|
|
1442
|
+
const aParts = a.replace(regexps.tanakaID, "").split(" ");
|
|
1443
|
+
const bRawParts = b.split(" ").filter((part) => part.trim().length !== 0);
|
|
1444
|
+
const bParts = [];
|
|
1445
|
+
for (const part of bRawParts) {
|
|
1446
|
+
const partMatches = regexps.tanakaPart.exec(part);
|
|
1447
|
+
const baseForm = partMatches == null ? void 0 : partMatches.groups["base"];
|
|
1448
|
+
const examplePart = { baseForm };
|
|
1449
|
+
const reading = partMatches == null ? void 0 : partMatches.groups["reading"];
|
|
1450
|
+
const glossNumber = partMatches == null ? void 0 : partMatches.groups["glossnum"];
|
|
1451
|
+
const inflectedForm = partMatches == null ? void 0 : partMatches.groups["inflection"];
|
|
1452
|
+
if (reading)
|
|
1453
|
+
if (regexps.tanakaReferenceID.test(reading)) {
|
|
1454
|
+
const referenceID = regexps.tanakaReferenceID.exec(reading);
|
|
1455
|
+
examplePart.referenceID = referenceID == null ? void 0 : referenceID.groups["entryid"];
|
|
1456
|
+
} else examplePart.reading = reading;
|
|
1457
|
+
if (glossNumber)
|
|
1458
|
+
examplePart.glossNumber = glossNumber.startsWith("0") ? Number.parseInt(glossNumber.substring(1)) : Number.parseInt(glossNumber);
|
|
1459
|
+
if (inflectedForm) examplePart.inflectedForm = inflectedForm;
|
|
1460
|
+
if (baseForm.endsWith("~")) {
|
|
1461
|
+
examplePart.edited = true;
|
|
1462
|
+
examplePart.baseForm = examplePart.baseForm.replace("~", "");
|
|
1476
1463
|
}
|
|
1477
|
-
|
|
1478
|
-
const translation = aParts[1];
|
|
1479
|
-
if (phrase && translation)
|
|
1480
|
-
tanakaArray.push({
|
|
1481
|
-
id: idMatch.groups["id"].trim(),
|
|
1482
|
-
phrase: phrase.trim(),
|
|
1483
|
-
translation: translation.trim(),
|
|
1484
|
-
parts: bParts
|
|
1485
|
-
});
|
|
1464
|
+
bParts.push(examplePart);
|
|
1486
1465
|
}
|
|
1466
|
+
const phrase = aParts[0];
|
|
1467
|
+
const translation = aParts[1];
|
|
1468
|
+
tanakaArray.push({
|
|
1469
|
+
id: (_a = idMatch == null ? void 0 : idMatch.groups["id"]) == null ? void 0 : _a.trim(),
|
|
1470
|
+
phrase: phrase.trim(),
|
|
1471
|
+
translation: translation.trim(),
|
|
1472
|
+
parts: bParts
|
|
1473
|
+
});
|
|
1487
1474
|
}
|
|
1488
1475
|
}
|
|
1489
1476
|
return tanakaArray;
|
|
@@ -1523,27 +1510,23 @@ function convertRadkFile(radkBuffer, kanjiDic) {
|
|
|
1523
1510
|
};
|
|
1524
1511
|
let j = i + 1;
|
|
1525
1512
|
let kanjiLine = fileParsed[j];
|
|
1526
|
-
|
|
1527
|
-
|
|
1528
|
-
|
|
1529
|
-
|
|
1530
|
-
|
|
1531
|
-
|
|
1532
|
-
|
|
1533
|
-
|
|
1534
|
-
|
|
1535
|
-
|
|
1536
|
-
kanjiList.push(kanjiObj);
|
|
1537
|
-
}
|
|
1538
|
-
j++;
|
|
1539
|
-
kanjiLine = fileParsed[j];
|
|
1540
|
-
if (!kanjiLine) continue;
|
|
1541
|
-
if (kanjiLine.startsWith("$ ")) i = j - 1;
|
|
1513
|
+
const kanjiList = [];
|
|
1514
|
+
while (kanjiLine && !kanjiLine.startsWith("$ ")) {
|
|
1515
|
+
const kanjis = kanjiLine.split("");
|
|
1516
|
+
for (const kanji of kanjis) {
|
|
1517
|
+
const foundKanji = kanjiDic.find(
|
|
1518
|
+
(dictKanji) => dictKanji.kanji === kanji
|
|
1519
|
+
);
|
|
1520
|
+
let kanjiObj = { kanji };
|
|
1521
|
+
kanjiObj = foundKanji;
|
|
1522
|
+
kanjiList.push(kanjiObj);
|
|
1542
1523
|
}
|
|
1543
|
-
|
|
1544
|
-
if (
|
|
1545
|
-
|
|
1524
|
+
kanjiLine = fileParsed[++j];
|
|
1525
|
+
if (!kanjiLine) continue;
|
|
1526
|
+
if (kanjiLine.startsWith("$ ")) i = j - 1;
|
|
1546
1527
|
}
|
|
1528
|
+
if (kanjiList.length > 0) radical.kanji = kanjiList;
|
|
1529
|
+
radicals.push(radical);
|
|
1547
1530
|
}
|
|
1548
1531
|
}
|
|
1549
1532
|
return radicals;
|
|
@@ -1556,66 +1539,185 @@ function convertKradFile(kradBuffer, kanjiDic, katakanaList) {
|
|
|
1556
1539
|
const split = line.split(" : ");
|
|
1557
1540
|
const kanjiChar = split[0];
|
|
1558
1541
|
const radicalsRow = split[1];
|
|
1559
|
-
|
|
1560
|
-
|
|
1561
|
-
|
|
1562
|
-
|
|
1563
|
-
|
|
1564
|
-
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
|
|
1568
|
-
|
|
1569
|
-
|
|
1570
|
-
|
|
1571
|
-
|
|
1572
|
-
|
|
1573
|
-
|
|
1574
|
-
|
|
1575
|
-
|
|
1576
|
-
|
|
1577
|
-
|
|
1578
|
-
|
|
1542
|
+
const kanji = {
|
|
1543
|
+
kanji: kanjiChar,
|
|
1544
|
+
radicals: []
|
|
1545
|
+
};
|
|
1546
|
+
const radicals = radicalsRow.split(" ");
|
|
1547
|
+
for (const radical of radicals) {
|
|
1548
|
+
const foundRadical = kanjiDic.find(
|
|
1549
|
+
(dictKanji) => dictKanji.kanji === radical
|
|
1550
|
+
);
|
|
1551
|
+
let radicalObj = foundRadical ?? { kanji: radical };
|
|
1552
|
+
if (!foundRadical) {
|
|
1553
|
+
const katakanaChar = katakanaList.find(
|
|
1554
|
+
(kana) => kana.kana === radical
|
|
1555
|
+
);
|
|
1556
|
+
if (!katakanaChar) continue;
|
|
1557
|
+
radicalObj = {
|
|
1558
|
+
kanji: katakanaChar.kana,
|
|
1559
|
+
readingMeaning: [
|
|
1560
|
+
{
|
|
1561
|
+
groups: [
|
|
1579
1562
|
{
|
|
1580
|
-
|
|
1581
|
-
|
|
1582
|
-
readings: [
|
|
1583
|
-
{ reading: katakanaChar.kana, type: "ja_on" }
|
|
1584
|
-
],
|
|
1585
|
-
meanings: [katakanaChar.reading]
|
|
1586
|
-
}
|
|
1587
|
-
]
|
|
1563
|
+
readings: [{ reading: katakanaChar.kana, type: "ja_on" }],
|
|
1564
|
+
meanings: [katakanaChar.reading]
|
|
1588
1565
|
}
|
|
1589
1566
|
]
|
|
1590
|
-
}
|
|
1567
|
+
}
|
|
1568
|
+
]
|
|
1569
|
+
};
|
|
1570
|
+
}
|
|
1571
|
+
kanji.radicals.push(radicalObj);
|
|
1572
|
+
}
|
|
1573
|
+
if (kanji.kanji.length === 1 && kanji.radicals.length > 0)
|
|
1574
|
+
kanjiWithRadicals.push(kanji);
|
|
1575
|
+
}
|
|
1576
|
+
}
|
|
1577
|
+
return kanjiWithRadicals;
|
|
1578
|
+
}
|
|
1579
|
+
function createEntryMaps(jmDict, kanjiDic, tanakaExamples, wordDefinitionPairs, svgList) {
|
|
1580
|
+
var _a;
|
|
1581
|
+
const kanjiEntryMap = /* @__PURE__ */ new Map();
|
|
1582
|
+
const wordIDEntryMap = /* @__PURE__ */ new Map();
|
|
1583
|
+
const kanjiWordsMap = /* @__PURE__ */ new Map();
|
|
1584
|
+
const wordExamplesMap = /* @__PURE__ */ new Map();
|
|
1585
|
+
const wordDefinitionsMap = /* @__PURE__ */ new Map();
|
|
1586
|
+
const kanjiSVGMap = /* @__PURE__ */ new Map();
|
|
1587
|
+
const wordPartsMap = /* @__PURE__ */ new Map();
|
|
1588
|
+
const partExamplesMap = /* @__PURE__ */ new Map();
|
|
1589
|
+
const entryParts = /* @__PURE__ */ new Set();
|
|
1590
|
+
if (kanjiDic)
|
|
1591
|
+
for (const kanji of kanjiDic) kanjiEntryMap.set(kanji.kanji, kanji);
|
|
1592
|
+
if (wordDefinitionPairs)
|
|
1593
|
+
for (const pair of wordDefinitionPairs)
|
|
1594
|
+
wordDefinitionsMap.set(pair.wordID, pair.definitions);
|
|
1595
|
+
if (kanjiDic && svgList)
|
|
1596
|
+
for (const kanji of kanjiDic) {
|
|
1597
|
+
const codePoint = kanji.kanji.codePointAt(0).toString(16).toLowerCase();
|
|
1598
|
+
const svg = svgList.find((file) => {
|
|
1599
|
+
const baseName = file.split(".")[0].toLowerCase();
|
|
1600
|
+
return baseName === codePoint || baseName === `0${codePoint}`;
|
|
1601
|
+
});
|
|
1602
|
+
if (svg) kanjiSVGMap.set(kanji.kanji, svg);
|
|
1603
|
+
}
|
|
1604
|
+
if (jmDict) {
|
|
1605
|
+
for (const word of jmDict) {
|
|
1606
|
+
wordIDEntryMap.set(word.id, word);
|
|
1607
|
+
if (word.kanjiForms)
|
|
1608
|
+
for (const kf of word.kanjiForms)
|
|
1609
|
+
for (const char of kf.form.split("").filter((c) => regexps.kanji.test(c))) {
|
|
1610
|
+
if (!kanjiWordsMap.has(char)) kanjiWordsMap.set(char, [word]);
|
|
1611
|
+
else kanjiWordsMap.get(char).push(word);
|
|
1612
|
+
}
|
|
1613
|
+
if (tanakaExamples) {
|
|
1614
|
+
const rkf = getValidForms(
|
|
1615
|
+
word.readings,
|
|
1616
|
+
word.kanjiForms,
|
|
1617
|
+
word.isCommon
|
|
1618
|
+
);
|
|
1619
|
+
const localPartParts = /* @__PURE__ */ new Set();
|
|
1620
|
+
if (rkf.readings.length > 0)
|
|
1621
|
+
for (const reading of rkf.readings) {
|
|
1622
|
+
entryParts.add(reading.reading);
|
|
1623
|
+
localPartParts.add(reading.reading);
|
|
1624
|
+
}
|
|
1625
|
+
if (rkf.kanjiForms && rkf.kanjiForms.length > 0)
|
|
1626
|
+
for (const kanjiForm of rkf.kanjiForms) {
|
|
1627
|
+
entryParts.add(kanjiForm.form);
|
|
1628
|
+
localPartParts.add(kanjiForm.form);
|
|
1629
|
+
}
|
|
1630
|
+
entryParts.add(word.id);
|
|
1631
|
+
localPartParts.add(word.id);
|
|
1632
|
+
wordPartsMap.set(word.id, localPartParts);
|
|
1633
|
+
}
|
|
1634
|
+
}
|
|
1635
|
+
if (tanakaExamples) {
|
|
1636
|
+
for (const ex of tanakaExamples) {
|
|
1637
|
+
for (const part of ex.parts) {
|
|
1638
|
+
if (entryParts.has(part.baseForm)) {
|
|
1639
|
+
let exList = partExamplesMap.get(
|
|
1640
|
+
part.baseForm
|
|
1641
|
+
);
|
|
1642
|
+
if (!exList) {
|
|
1643
|
+
exList = [];
|
|
1644
|
+
partExamplesMap.set(part.baseForm, exList);
|
|
1645
|
+
}
|
|
1646
|
+
exList.push(ex);
|
|
1647
|
+
}
|
|
1648
|
+
if (part.reading && entryParts.has(part.reading)) {
|
|
1649
|
+
let exList = partExamplesMap.get(
|
|
1650
|
+
part.reading
|
|
1651
|
+
);
|
|
1652
|
+
if (!exList) {
|
|
1653
|
+
exList = [];
|
|
1654
|
+
partExamplesMap.set(part.reading, exList);
|
|
1655
|
+
}
|
|
1656
|
+
exList.push(ex);
|
|
1657
|
+
}
|
|
1658
|
+
if (part.inflectedForm && entryParts.has(part.inflectedForm)) {
|
|
1659
|
+
let exList = partExamplesMap.get(
|
|
1660
|
+
part.inflectedForm
|
|
1661
|
+
);
|
|
1662
|
+
if (!exList) {
|
|
1663
|
+
exList = [];
|
|
1664
|
+
partExamplesMap.set(part.inflectedForm, exList);
|
|
1665
|
+
}
|
|
1666
|
+
exList.push(ex);
|
|
1667
|
+
}
|
|
1668
|
+
if (part.referenceID && entryParts.has(part.referenceID)) {
|
|
1669
|
+
let exList = partExamplesMap.get(
|
|
1670
|
+
part.referenceID
|
|
1671
|
+
);
|
|
1672
|
+
if (!exList) {
|
|
1673
|
+
exList = [];
|
|
1674
|
+
partExamplesMap.set(part.referenceID, exList);
|
|
1591
1675
|
}
|
|
1592
|
-
|
|
1676
|
+
exList.push(ex);
|
|
1677
|
+
}
|
|
1678
|
+
}
|
|
1679
|
+
}
|
|
1680
|
+
for (const word of jmDict) {
|
|
1681
|
+
const entryParts2 = wordPartsMap.get(word.id);
|
|
1682
|
+
const seenEx = /* @__PURE__ */ new Set();
|
|
1683
|
+
const validExamples = [];
|
|
1684
|
+
for (const p of entryParts2) {
|
|
1685
|
+
const examplesForPart = (_a = partExamplesMap.get(p)) == null ? void 0 : _a.filter((ex) => !seenEx.has(ex.id));
|
|
1686
|
+
if (!examplesForPart) continue;
|
|
1687
|
+
for (const ex of examplesForPart) {
|
|
1688
|
+
seenEx.add(ex.id);
|
|
1689
|
+
validExamples.push(ex);
|
|
1593
1690
|
}
|
|
1594
1691
|
}
|
|
1595
|
-
if (
|
|
1596
|
-
|
|
1692
|
+
if (validExamples.length > 0)
|
|
1693
|
+
wordExamplesMap.set(word.id, validExamples);
|
|
1597
1694
|
}
|
|
1598
1695
|
}
|
|
1599
1696
|
}
|
|
1600
|
-
return
|
|
1697
|
+
return {
|
|
1698
|
+
...wordIDEntryMap.size > 0 ? { wordIDEntryMap } : {},
|
|
1699
|
+
...kanjiWordsMap.size > 0 ? { kanjiWordsMap } : {},
|
|
1700
|
+
...kanjiEntryMap.size > 0 ? { kanjiEntryMap } : {},
|
|
1701
|
+
...wordExamplesMap.size > 0 ? { wordExamplesMap } : {},
|
|
1702
|
+
...wordDefinitionsMap.size > 0 ? { wordDefinitionsMap } : {},
|
|
1703
|
+
...kanjiSVGMap.size > 0 ? { kanjiSVGMap } : {}
|
|
1704
|
+
};
|
|
1601
1705
|
}
|
|
1602
1706
|
function mapEntry(entry) {
|
|
1603
1707
|
return {
|
|
1604
1708
|
word: entry.word,
|
|
1605
1709
|
pos_title: entry.pos_title,
|
|
1606
1710
|
senses: entry.senses.filter(
|
|
1607
|
-
(sense) =>
|
|
1711
|
+
(sense) => isObjectArray(sense.form_of) && sense.form_of.every(
|
|
1608
1712
|
(form) => form.word && typeof form.word === "string"
|
|
1609
1713
|
) || isStringArray(sense.glosses)
|
|
1610
1714
|
).map((sense) => ({
|
|
1611
1715
|
...sense.form_of ? {
|
|
1612
|
-
form_of: sense.form_of.map((form) =>
|
|
1613
|
-
word: form.word
|
|
1614
|
-
}))
|
|
1716
|
+
form_of: sense.form_of.map((form) => form.word)
|
|
1615
1717
|
} : {},
|
|
1616
1718
|
glosses: sense.glosses
|
|
1617
1719
|
})),
|
|
1618
|
-
...
|
|
1720
|
+
...isObjectArray(entry.forms) && entry.forms.every((form) => typeof form.form === "string") ? { forms: entry.forms.map((form) => form.form) } : {}
|
|
1619
1721
|
};
|
|
1620
1722
|
}
|
|
1621
1723
|
function convertJawiktionarySync(buffer) {
|
|
@@ -1625,7 +1727,7 @@ function convertJawiktionarySync(buffer) {
|
|
|
1625
1727
|
const line = lines[i];
|
|
1626
1728
|
if (!line) continue;
|
|
1627
1729
|
const obj = JSON.parse(line);
|
|
1628
|
-
if (obj && typeof obj === "object" && obj.
|
|
1730
|
+
if (obj && typeof obj === "object" && typeof obj.lang === "string" && (obj.lang === "\u65E5\u672C\u8A9E" || obj.lang === "\u53E4\u5178\u65E5\u672C\u8A9E"))
|
|
1629
1731
|
entries.push(mapEntry(obj));
|
|
1630
1732
|
}
|
|
1631
1733
|
return entries;
|
|
@@ -1640,7 +1742,7 @@ async function convertJawiktionaryAsync(stream) {
|
|
|
1640
1742
|
const entries = [];
|
|
1641
1743
|
for await (const line of rl) {
|
|
1642
1744
|
const obj = JSON.parse(line.trim());
|
|
1643
|
-
if (obj && typeof obj === "object" && obj.
|
|
1745
|
+
if (obj && typeof obj === "object" && typeof obj.lang === "string" && (obj.lang === "\u65E5\u672C\u8A9E" || obj.lang === "\u53E4\u5178\u65E5\u672C\u8A9E"))
|
|
1644
1746
|
entries.push(mapEntry(obj));
|
|
1645
1747
|
}
|
|
1646
1748
|
rl.close();
|
|
@@ -1651,17 +1753,14 @@ async function convertJawiktionaryAsync(stream) {
|
|
|
1651
1753
|
);
|
|
1652
1754
|
}
|
|
1653
1755
|
function parseEntry(entry, definitions, definitionMap) {
|
|
1654
|
-
|
|
1655
|
-
|
|
1656
|
-
|
|
1657
|
-
|
|
1658
|
-
|
|
1659
|
-
|
|
1660
|
-
|
|
1661
|
-
|
|
1662
|
-
definitions.push({ definition });
|
|
1663
|
-
}
|
|
1664
|
-
}
|
|
1756
|
+
for (const sense of entry.senses) {
|
|
1757
|
+
const definition = sense.glosses.join("");
|
|
1758
|
+
if (!definitions.some((def) => def.definition === definition)) {
|
|
1759
|
+
if (!definitionMap.has(definition))
|
|
1760
|
+
definitionMap.set(definition, { count: 1 });
|
|
1761
|
+
else definitionMap.get(definition).count++;
|
|
1762
|
+
definitions.push({ definition });
|
|
1763
|
+
}
|
|
1665
1764
|
}
|
|
1666
1765
|
}
|
|
1667
1766
|
function getWordDefinitions(entryList, jmDict) {
|
|
@@ -1669,31 +1768,40 @@ function getWordDefinitions(entryList, jmDict) {
|
|
|
1669
1768
|
const entries = /* @__PURE__ */ new Map();
|
|
1670
1769
|
for (const entry of entryList) {
|
|
1671
1770
|
const ent = entries.get(entry.word);
|
|
1672
|
-
if (ent) ent.push(
|
|
1673
|
-
else entries.set(entry.word, [
|
|
1771
|
+
if (ent) ent.push(entry);
|
|
1772
|
+
else entries.set(entry.word, [entry]);
|
|
1674
1773
|
}
|
|
1675
1774
|
const japaneseDefinitions = [];
|
|
1676
1775
|
const definitionMap = /* @__PURE__ */ new Map();
|
|
1677
|
-
const
|
|
1776
|
+
const wordFormsMap = /* @__PURE__ */ new Map();
|
|
1678
1777
|
const validReadings = /* @__PURE__ */ new Set();
|
|
1679
1778
|
const validKanjiForms = /* @__PURE__ */ new Set();
|
|
1680
|
-
|
|
1681
|
-
|
|
1682
|
-
|
|
1683
|
-
|
|
1684
|
-
|
|
1685
|
-
|
|
1686
|
-
|
|
1687
|
-
|
|
1688
|
-
|
|
1689
|
-
|
|
1690
|
-
|
|
1691
|
-
|
|
1692
|
-
|
|
1693
|
-
|
|
1779
|
+
const validForms = /* @__PURE__ */ new Set();
|
|
1780
|
+
for (const word of jmDict) {
|
|
1781
|
+
const wordReadings = /* @__PURE__ */ new Set();
|
|
1782
|
+
const wordKanjiForms = /* @__PURE__ */ new Set();
|
|
1783
|
+
const rkf = getValidForms(
|
|
1784
|
+
word.readings,
|
|
1785
|
+
word.kanjiForms,
|
|
1786
|
+
word.isCommon
|
|
1787
|
+
);
|
|
1788
|
+
if (rkf.readings.length > 0)
|
|
1789
|
+
for (const r of rkf.readings) {
|
|
1790
|
+
validReadings.add(r.reading);
|
|
1791
|
+
wordReadings.add(r.reading);
|
|
1792
|
+
validForms.add(r.reading);
|
|
1694
1793
|
}
|
|
1695
|
-
|
|
1696
|
-
|
|
1794
|
+
if (rkf.kanjiForms && rkf.kanjiForms.length > 0)
|
|
1795
|
+
for (const kf of rkf.kanjiForms) {
|
|
1796
|
+
validKanjiForms.add(kf.form);
|
|
1797
|
+
wordKanjiForms.add(kf.form);
|
|
1798
|
+
validForms.add(kf.form);
|
|
1799
|
+
}
|
|
1800
|
+
wordFormsMap.set(word.id, {
|
|
1801
|
+
readings: wordReadings,
|
|
1802
|
+
...wordKanjiForms.size > 0 ? { kanjiForms: wordKanjiForms } : {}
|
|
1803
|
+
});
|
|
1804
|
+
}
|
|
1697
1805
|
const validTitleEntries = /* @__PURE__ */ new Map();
|
|
1698
1806
|
const entriesWithFormTitlesGlobal = /* @__PURE__ */ new Map();
|
|
1699
1807
|
const entriesWithFormsGlobal = /* @__PURE__ */ new Map();
|
|
@@ -1705,34 +1813,27 @@ function getWordDefinitions(entryList, jmDict) {
|
|
|
1705
1813
|
let valid = false;
|
|
1706
1814
|
if (validKanjiForms && validKanjiForms.has(entry.word)) {
|
|
1707
1815
|
valid = true;
|
|
1708
|
-
|
|
1709
|
-
|
|
1710
|
-
|
|
1711
|
-
|
|
1712
|
-
|
|
1713
|
-
|
|
1714
|
-
|
|
1715
|
-
|
|
1716
|
-
|
|
1717
|
-
|
|
1718
|
-
if (gloss.trim().includes("\u6F22\u5B57\u8868\u8A18") || gloss.trim().includes("\u53C2\u7167")) {
|
|
1719
|
-
for (const r of validReadings)
|
|
1720
|
-
if (gloss.trim().includes(r)) {
|
|
1721
|
-
reading = r;
|
|
1722
|
-
break;
|
|
1723
|
-
}
|
|
1724
|
-
}
|
|
1816
|
+
for (const sense of entry.senses) {
|
|
1817
|
+
if (sense.form_of && sense.form_of.some((form) => validReadings.has(form)))
|
|
1818
|
+
validFormOfEntries.add(entry.word);
|
|
1819
|
+
for (const gloss of sense.glosses) {
|
|
1820
|
+
let hasForm = false;
|
|
1821
|
+
if (gloss.includes("\u8868\u8A18") || gloss.includes("\u53C2\u7167")) {
|
|
1822
|
+
for (const r of validForms)
|
|
1823
|
+
if (gloss.includes(r)) {
|
|
1824
|
+
hasForm = true;
|
|
1825
|
+
break;
|
|
1725
1826
|
}
|
|
1726
|
-
if (reading) validGlossesEntries.add(entry.word);
|
|
1727
|
-
}
|
|
1728
1827
|
}
|
|
1828
|
+
if (hasForm) validGlossesEntries.add(entry.word);
|
|
1729
1829
|
}
|
|
1730
|
-
|
|
1830
|
+
}
|
|
1831
|
+
if (entry.forms) {
|
|
1731
1832
|
for (const form of entry.forms)
|
|
1732
|
-
if (
|
|
1733
|
-
validFormsEntries.add(entry.word);
|
|
1833
|
+
if (validReadings.has(form)) validFormsEntries.add(entry.word);
|
|
1734
1834
|
}
|
|
1735
|
-
}
|
|
1835
|
+
}
|
|
1836
|
+
if (validReadings.has(entry.word)) {
|
|
1736
1837
|
valid = true;
|
|
1737
1838
|
const ftEntry = entriesWithFormTitlesGlobal.get(entry.word);
|
|
1738
1839
|
if (ftEntry) ftEntry.push(entry);
|
|
@@ -1743,8 +1844,8 @@ function getWordDefinitions(entryList, jmDict) {
|
|
|
1743
1844
|
if (tEntry) tEntry.push(entry);
|
|
1744
1845
|
else validTitleEntries.set(entry.word, [entry]);
|
|
1745
1846
|
}
|
|
1746
|
-
if (
|
|
1747
|
-
(form) => validKanjiForms.has(form
|
|
1847
|
+
if (entry.forms && (validKanjiForms.has(entry.word) || validReadings.has(entry.word)) && entry.forms.some(
|
|
1848
|
+
(form) => validKanjiForms.has(form) || validReadings.has(form)
|
|
1748
1849
|
)) {
|
|
1749
1850
|
const wfEntry = entriesWithFormsGlobal.get(entry.word);
|
|
1750
1851
|
if (wfEntry) wfEntry.push(entry);
|
|
@@ -1799,7 +1900,7 @@ function getWordDefinitions(entryList, jmDict) {
|
|
|
1799
1900
|
"\u982D\u5B57\u8A9E",
|
|
1800
1901
|
"\u63A5\u5C3E\u8A9E"
|
|
1801
1902
|
]) {
|
|
1802
|
-
|
|
1903
|
+
posMap.set(pos, {});
|
|
1803
1904
|
for (const te of vte)
|
|
1804
1905
|
if (te.pos_title === pos || te.pos_title === "\u548C\u8A9E\u306E\u6F22\u5B57\u8868\u8A18") {
|
|
1805
1906
|
const posEntries = posMap.get(pos);
|
|
@@ -1836,86 +1937,73 @@ function getWordDefinitions(entryList, jmDict) {
|
|
|
1836
1937
|
fge.length = 0;
|
|
1837
1938
|
wfe.length = 0;
|
|
1838
1939
|
const wordEntriesPairs = [];
|
|
1839
|
-
for (const word of
|
|
1940
|
+
for (const word of jmDict) {
|
|
1840
1941
|
const poses = /* @__PURE__ */ new Set();
|
|
1841
|
-
for (const m of word.meanings)
|
|
1842
|
-
|
|
1843
|
-
|
|
1844
|
-
|
|
1845
|
-
|
|
1846
|
-
|
|
1847
|
-
|
|
1848
|
-
|
|
1849
|
-
|
|
1850
|
-
|
|
1851
|
-
|
|
1852
|
-
poses.add(notePos);
|
|
1853
|
-
}
|
|
1942
|
+
for (const m of word.meanings)
|
|
1943
|
+
for (const note of m.partOfSpeech) {
|
|
1944
|
+
const noteEntry = noteMap.get(note);
|
|
1945
|
+
if (noteEntry && noteEntry.length === 3) {
|
|
1946
|
+
const notePos = noteEntry[2];
|
|
1947
|
+
if (Array.isArray(notePos))
|
|
1948
|
+
for (const pos of notePos) {
|
|
1949
|
+
if (!poses.has(pos)) poses.add(pos);
|
|
1950
|
+
}
|
|
1951
|
+
else if (typeof notePos === "string" && !poses.has(notePos))
|
|
1952
|
+
poses.add(notePos);
|
|
1854
1953
|
}
|
|
1855
|
-
|
|
1856
|
-
const
|
|
1857
|
-
word.readings.filter(
|
|
1858
|
-
(r) => r.notes === void 0 || !r.notes.some((note) => notSearchedForms.has(note)) || r.commonness !== void 0
|
|
1859
|
-
).map((r) => r.reading)
|
|
1860
|
-
);
|
|
1861
|
-
const validWordKanjiForms = word.kanjiForms ? new Set(
|
|
1862
|
-
word.kanjiForms.filter(
|
|
1863
|
-
(kf) => kf.notes === void 0 || !kf.notes.some((note) => notSearchedForms.has(note)) || kf.commonness !== void 0
|
|
1864
|
-
).map((kf) => kf.form)
|
|
1865
|
-
) : void 0;
|
|
1954
|
+
}
|
|
1955
|
+
const rkf = wordFormsMap.get(word.id);
|
|
1866
1956
|
const entriesWithTitles = [];
|
|
1867
1957
|
const entriesWithFormTitles = [];
|
|
1868
1958
|
const entriesWithForms = [];
|
|
1869
1959
|
if (poses.size > 0)
|
|
1870
1960
|
for (const pos of poses) {
|
|
1871
1961
|
const posEntries = posMap.get(pos);
|
|
1872
|
-
if (
|
|
1873
|
-
|
|
1874
|
-
|
|
1875
|
-
|
|
1876
|
-
const fe = (_b = posEntries.form) == null ? void 0 : _b.get(kf);
|
|
1877
|
-
if (te)
|
|
1878
|
-
entriesWithTitles.push(
|
|
1879
|
-
...te.filter(
|
|
1880
|
-
(ent) => validFormOfEntries.has(ent.word) || validGlossesEntries.has(ent.word) || validFormsEntries.has(ent.word)
|
|
1881
|
-
)
|
|
1882
|
-
);
|
|
1883
|
-
if (fe)
|
|
1884
|
-
entriesWithForms.push(
|
|
1885
|
-
...fe.filter(
|
|
1886
|
-
(ent) => ent.forms && ent.forms.some(
|
|
1887
|
-
(form) => validWordKanjiForms.has(form.form) || validWordReadings.has(form.form)
|
|
1888
|
-
)
|
|
1889
|
-
)
|
|
1890
|
-
);
|
|
1891
|
-
}
|
|
1892
|
-
for (const r of validWordReadings) {
|
|
1893
|
-
const te = (_c = posEntries.title) == null ? void 0 : _c.get(r);
|
|
1894
|
-
const fe = (_d = posEntries.form) == null ? void 0 : _d.get(r);
|
|
1895
|
-
const fte = (_e = posEntries.formTitle) == null ? void 0 : _e.get(r);
|
|
1962
|
+
if (rkf.kanjiForms)
|
|
1963
|
+
for (const kf of rkf.kanjiForms) {
|
|
1964
|
+
const te = (_a = posEntries.title) == null ? void 0 : _a.get(kf);
|
|
1965
|
+
const fe = (_b = posEntries.form) == null ? void 0 : _b.get(kf);
|
|
1896
1966
|
if (te)
|
|
1897
1967
|
entriesWithTitles.push(
|
|
1898
1968
|
...te.filter(
|
|
1899
|
-
(ent) => ent.
|
|
1900
|
-
(form) => validWordKanjiForms.has(form.form)
|
|
1901
|
-
) || validWordKanjiForms === void 0
|
|
1969
|
+
(ent) => validFormOfEntries.has(ent.word) || validGlossesEntries.has(ent.word) || validFormsEntries.has(ent.word)
|
|
1902
1970
|
)
|
|
1903
1971
|
);
|
|
1904
1972
|
if (fe)
|
|
1905
1973
|
entriesWithForms.push(
|
|
1906
1974
|
...fe.filter(
|
|
1907
1975
|
(ent) => ent.forms && ent.forms.some(
|
|
1908
|
-
(form) =>
|
|
1976
|
+
(form) => rkf.kanjiForms.has(form) || rkf.readings.has(form)
|
|
1909
1977
|
)
|
|
1910
1978
|
)
|
|
1911
1979
|
);
|
|
1912
|
-
if (fte) entriesWithFormTitles.push(...fte);
|
|
1913
1980
|
}
|
|
1981
|
+
for (const r of rkf.readings) {
|
|
1982
|
+
const te = (_c = posEntries.title) == null ? void 0 : _c.get(r);
|
|
1983
|
+
const fe = (_d = posEntries.form) == null ? void 0 : _d.get(r);
|
|
1984
|
+
const fte = (_e = posEntries.formTitle) == null ? void 0 : _e.get(r);
|
|
1985
|
+
if (te)
|
|
1986
|
+
entriesWithTitles.push(
|
|
1987
|
+
...te.filter(
|
|
1988
|
+
(ent) => ent.forms && rkf.kanjiForms && ent.forms.some(
|
|
1989
|
+
(form) => rkf.kanjiForms.has(form)
|
|
1990
|
+
) || rkf.kanjiForms === void 0
|
|
1991
|
+
)
|
|
1992
|
+
);
|
|
1993
|
+
if (fe)
|
|
1994
|
+
entriesWithForms.push(
|
|
1995
|
+
...fe.filter(
|
|
1996
|
+
(ent) => ent.forms && ent.forms.some(
|
|
1997
|
+
(form) => rkf.kanjiForms && rkf.kanjiForms.has(form) || rkf.readings.has(form)
|
|
1998
|
+
)
|
|
1999
|
+
)
|
|
2000
|
+
);
|
|
2001
|
+
if (fte) entriesWithFormTitles.push(...fte);
|
|
1914
2002
|
}
|
|
1915
2003
|
}
|
|
1916
2004
|
if (entriesWithTitles.length === 0 && entriesWithFormTitles.length === 0 && entriesWithForms.length === 0) {
|
|
1917
|
-
if (
|
|
1918
|
-
for (const kf of
|
|
2005
|
+
if (rkf.kanjiForms)
|
|
2006
|
+
for (const kf of rkf.kanjiForms) {
|
|
1919
2007
|
const te = validTitleEntries.get(kf);
|
|
1920
2008
|
const fe = entriesWithFormsGlobal.get(kf);
|
|
1921
2009
|
if (te)
|
|
@@ -1928,28 +2016,28 @@ function getWordDefinitions(entryList, jmDict) {
|
|
|
1928
2016
|
entriesWithForms.push(
|
|
1929
2017
|
...fe.filter(
|
|
1930
2018
|
(ent) => ent.forms && ent.forms.some(
|
|
1931
|
-
(form) =>
|
|
2019
|
+
(form) => rkf.kanjiForms.has(form) || rkf.readings.has(form)
|
|
1932
2020
|
)
|
|
1933
2021
|
)
|
|
1934
2022
|
);
|
|
1935
2023
|
}
|
|
1936
|
-
for (const r of
|
|
2024
|
+
for (const r of rkf.readings) {
|
|
1937
2025
|
const te = validTitleEntries.get(r);
|
|
1938
2026
|
const fe = entriesWithFormsGlobal.get(r);
|
|
1939
2027
|
const fte = entriesWithFormTitlesGlobal.get(r);
|
|
1940
2028
|
if (te)
|
|
1941
2029
|
entriesWithTitles.push(
|
|
1942
2030
|
...te.filter(
|
|
1943
|
-
(ent) => ent.forms &&
|
|
1944
|
-
(form) =>
|
|
1945
|
-
) ||
|
|
2031
|
+
(ent) => ent.forms && rkf.kanjiForms && ent.forms.some(
|
|
2032
|
+
(form) => rkf.kanjiForms.has(form)
|
|
2033
|
+
) || rkf.kanjiForms === void 0
|
|
1946
2034
|
)
|
|
1947
2035
|
);
|
|
1948
2036
|
if (fe)
|
|
1949
2037
|
entriesWithForms.push(
|
|
1950
2038
|
...fe.filter(
|
|
1951
2039
|
(ent) => ent.forms && ent.forms.some(
|
|
1952
|
-
(form) =>
|
|
2040
|
+
(form) => rkf.kanjiForms && rkf.kanjiForms.has(form) || rkf.readings.has(form)
|
|
1953
2041
|
)
|
|
1954
2042
|
)
|
|
1955
2043
|
);
|
|
@@ -1959,8 +2047,9 @@ function getWordDefinitions(entryList, jmDict) {
|
|
|
1959
2047
|
if (entriesWithTitles.length > 0 && (entriesWithFormTitles.length > 0 || entriesWithForms.length > 0))
|
|
1960
2048
|
wordEntriesPairs.push({
|
|
1961
2049
|
word,
|
|
1962
|
-
readings:
|
|
1963
|
-
...
|
|
2050
|
+
readings: rkf.readings,
|
|
2051
|
+
...rkf.kanjiForms ? { kanjiForms: rkf.kanjiForms } : {},
|
|
2052
|
+
forms: rkf.kanjiForms ? rkf.readings.union(rkf.kanjiForms) : rkf.readings,
|
|
1964
2053
|
entriesWithTitles,
|
|
1965
2054
|
entriesWithFormTitles,
|
|
1966
2055
|
entriesWithForms
|
|
@@ -1969,7 +2058,7 @@ function getWordDefinitions(entryList, jmDict) {
|
|
|
1969
2058
|
for (const pair of wordEntriesPairs) {
|
|
1970
2059
|
const definitions = [];
|
|
1971
2060
|
const kanjiFormEntries = [];
|
|
1972
|
-
const
|
|
2061
|
+
const entriesWithForms = [];
|
|
1973
2062
|
const readingEntries = [];
|
|
1974
2063
|
const titleFormMap = /* @__PURE__ */ new Map();
|
|
1975
2064
|
const refsMap = /* @__PURE__ */ new Map();
|
|
@@ -1977,80 +2066,64 @@ function getWordDefinitions(entryList, jmDict) {
|
|
|
1977
2066
|
for (const ent of pair.entriesWithTitles) {
|
|
1978
2067
|
const validFormOf = validFormOfEntries.has(ent.word);
|
|
1979
2068
|
const validGlosses = validGlossesEntries.has(ent.word);
|
|
1980
|
-
const
|
|
1981
|
-
|
|
2069
|
+
const validForms2 = validFormsEntries.has(ent.word);
|
|
2070
|
+
const hasKanjiForms = ent.forms !== void 0 && pair.kanjiForms !== void 0 && ent.forms.some((form) => pair.kanjiForms.has(form));
|
|
2071
|
+
if (pair.kanjiForms && pair.kanjiForms.has(ent.word) && (validFormOf || validGlosses || validForms2)) {
|
|
1982
2072
|
kanjiFormEntries.push(ent);
|
|
1983
|
-
if ((validFormOf || validGlosses) &&
|
|
2073
|
+
if ((validFormOf || validGlosses) && ent.senses)
|
|
1984
2074
|
for (const sense of ent.senses) {
|
|
1985
|
-
if (validFormOf &&
|
|
2075
|
+
if (validFormOf && sense.form_of) {
|
|
1986
2076
|
for (const form of sense.form_of)
|
|
1987
|
-
if (
|
|
1988
|
-
const elem = titleFormMap.get(
|
|
1989
|
-
form.word
|
|
1990
|
-
);
|
|
2077
|
+
if (pair.readings.has(form)) {
|
|
2078
|
+
const elem = titleFormMap.get(form);
|
|
1991
2079
|
if (!elem)
|
|
1992
|
-
titleFormMap.set(form
|
|
2080
|
+
titleFormMap.set(form, /* @__PURE__ */ new Set([ent.word]));
|
|
1993
2081
|
else elem.add(ent.word);
|
|
1994
2082
|
}
|
|
1995
|
-
}
|
|
1996
|
-
|
|
1997
|
-
|
|
1998
|
-
if (gloss
|
|
1999
|
-
|
|
2000
|
-
|
|
2001
|
-
|
|
2002
|
-
|
|
2003
|
-
|
|
2004
|
-
|
|
2005
|
-
}
|
|
2006
|
-
}
|
|
2007
|
-
if (reading) {
|
|
2008
|
-
const elem = refsMap.get(reading);
|
|
2009
|
-
if (!elem) refsMap.set(reading, /* @__PURE__ */ new Set([ent.word]));
|
|
2010
|
-
else elem.add(ent.word);
|
|
2083
|
+
}
|
|
2084
|
+
if (validGlosses) {
|
|
2085
|
+
for (const gloss of sense.glosses)
|
|
2086
|
+
if (gloss.includes("\u8868\u8A18") || gloss.includes("\u53C2\u7167")) {
|
|
2087
|
+
for (const f of pair.forms)
|
|
2088
|
+
if (gloss.includes(f)) {
|
|
2089
|
+
const elem = refsMap.get(f);
|
|
2090
|
+
if (!elem) refsMap.set(f, /* @__PURE__ */ new Set([ent.word]));
|
|
2091
|
+
else elem.add(ent.word);
|
|
2092
|
+
}
|
|
2011
2093
|
}
|
|
2012
|
-
}
|
|
2013
2094
|
}
|
|
2014
2095
|
}
|
|
2015
|
-
if (
|
|
2096
|
+
if (validForms2 && ent.forms) {
|
|
2016
2097
|
for (const form of ent.forms)
|
|
2017
|
-
if (
|
|
2018
|
-
readingForms.add(form.form);
|
|
2098
|
+
if (pair.readings.has(form)) readingForms.add(form);
|
|
2019
2099
|
}
|
|
2020
|
-
}
|
|
2021
|
-
|
|
2022
|
-
|
|
2100
|
+
}
|
|
2101
|
+
if (pair.readings.has(ent.word) && hasKanjiForms)
|
|
2102
|
+
entriesWithForms.push(ent);
|
|
2103
|
+
if (pair.kanjiForms === void 0 && pair.readings.has(ent.word))
|
|
2023
2104
|
readingEntries.push(ent);
|
|
2024
2105
|
}
|
|
2025
2106
|
for (const entry of pair.entriesWithForms) {
|
|
2026
2107
|
const elem = titleFormMap.get(entry.word);
|
|
2027
|
-
if (elem && entry.forms.some((form) => elem.has(form
|
|
2028
|
-
|
|
2108
|
+
if (elem && entry.forms && entry.forms.some((form) => elem.has(form)))
|
|
2109
|
+
entriesWithForms.push(entry);
|
|
2029
2110
|
}
|
|
2030
2111
|
for (const entry of pair.entriesWithFormTitles) {
|
|
2031
2112
|
if (readingForms.has(entry.word)) {
|
|
2032
|
-
|
|
2113
|
+
entriesWithForms.push(entry);
|
|
2033
2114
|
continue;
|
|
2034
2115
|
}
|
|
2035
|
-
|
|
2036
|
-
|
|
2037
|
-
if (ft && !ft.isDisjointFrom(pair.kanjiForms))
|
|
2038
|
-
readingWithFormsEntries.push(entry);
|
|
2039
|
-
}
|
|
2116
|
+
const ft = refsMap.get(entry.word);
|
|
2117
|
+
if (ft && !ft.isDisjointFrom(pair.forms)) entriesWithForms.push(entry);
|
|
2040
2118
|
}
|
|
2041
|
-
|
|
2042
|
-
|
|
2043
|
-
|
|
2044
|
-
|
|
2045
|
-
parsedReadingWithFormsEntries = true;
|
|
2046
|
-
for (const ref of readingWithFormsEntries)
|
|
2047
|
-
parseEntry(ref, definitions, definitionMap);
|
|
2048
|
-
} else parseEntry(entry, definitions, definitionMap);
|
|
2049
|
-
if (!parsedReadingWithFormsEntries && readingWithFormsEntries.length > 0) {
|
|
2050
|
-
parsedReadingWithFormsEntries = true;
|
|
2051
|
-
for (const ref of readingWithFormsEntries)
|
|
2052
|
-
parseEntry(ref, definitions, definitionMap);
|
|
2119
|
+
if (kanjiFormEntries.length > 0) {
|
|
2120
|
+
for (const entry of kanjiFormEntries)
|
|
2121
|
+
if (entry.pos_title !== "\u548C\u8A9E\u306E\u6F22\u5B57\u8868\u8A18")
|
|
2122
|
+
parseEntry(entry, definitions, definitionMap);
|
|
2053
2123
|
}
|
|
2124
|
+
if (entriesWithForms.length > 0)
|
|
2125
|
+
for (const ref of entriesWithForms)
|
|
2126
|
+
parseEntry(ref, definitions, definitionMap);
|
|
2054
2127
|
if (readingEntries.length > 0)
|
|
2055
2128
|
for (const readingEntry of readingEntries)
|
|
2056
2129
|
parseEntry(readingEntry, definitions, definitionMap);
|
|
@@ -2101,12 +2174,12 @@ async function getWordDefinitionsWithFurigana(entryList, jmDict) {
|
|
|
2101
2174
|
function lookupWordNote(key, notes, tags) {
|
|
2102
2175
|
const info = noteMap.get(key.toLowerCase());
|
|
2103
2176
|
if (!info) {
|
|
2104
|
-
|
|
2177
|
+
notes.push(key);
|
|
2105
2178
|
return { note: key };
|
|
2106
2179
|
}
|
|
2107
2180
|
const tag = `word::${info[0]}`;
|
|
2108
|
-
if (
|
|
2109
|
-
|
|
2181
|
+
if (!tags.includes(tag)) tags.push(tag);
|
|
2182
|
+
notes.push(info[1]);
|
|
2110
2183
|
return { note: info[1], tag };
|
|
2111
2184
|
}
|
|
2112
2185
|
var wordAddNoteArray = (arr, cb) => {
|
|
@@ -2116,9 +2189,14 @@ var wordAddNoteArray = (arr, cb) => {
|
|
|
2116
2189
|
function getWord(word, dict, kanjiDic, examples, definitions, noteTypeName, deckPath) {
|
|
2117
2190
|
var _a;
|
|
2118
2191
|
let dictWord = void 0;
|
|
2119
|
-
if (typeof word === "string" && dict)
|
|
2120
|
-
|
|
2121
|
-
|
|
2192
|
+
if (typeof word === "string" && dict) {
|
|
2193
|
+
if (Array.isArray(dict))
|
|
2194
|
+
dictWord = dict.find(
|
|
2195
|
+
(entry) => entry.id === word
|
|
2196
|
+
);
|
|
2197
|
+
if (dict instanceof Map) dictWord = dict.get(word);
|
|
2198
|
+
}
|
|
2199
|
+
if (typeof word === "object") dictWord = word;
|
|
2122
2200
|
if (dictWord) {
|
|
2123
2201
|
const word2 = {
|
|
2124
2202
|
id: dictWord.id,
|
|
@@ -2141,7 +2219,7 @@ function getWord(word, dict, kanjiDic, examples, definitions, noteTypeName, deck
|
|
|
2141
2219
|
notes: dictKanjiForm.notes.map((note) => {
|
|
2142
2220
|
const noteAndTag = lookupWordNote(
|
|
2143
2221
|
note,
|
|
2144
|
-
|
|
2222
|
+
[],
|
|
2145
2223
|
word2.tags
|
|
2146
2224
|
);
|
|
2147
2225
|
return capitalizeString(noteAndTag.note);
|
|
@@ -2160,7 +2238,7 @@ function getWord(word, dict, kanjiDic, examples, definitions, noteTypeName, deck
|
|
|
2160
2238
|
...dictReading.notes ? dictReading.notes.map((note) => {
|
|
2161
2239
|
const noteAndTag = lookupWordNote(
|
|
2162
2240
|
note,
|
|
2163
|
-
|
|
2241
|
+
[],
|
|
2164
2242
|
word2.tags
|
|
2165
2243
|
);
|
|
2166
2244
|
return capitalizeString(noteAndTag.note);
|
|
@@ -2170,85 +2248,81 @@ function getWord(word, dict, kanjiDic, examples, definitions, noteTypeName, deck
|
|
|
2170
2248
|
...dictReading.commonness && dictReading.commonness.length > 0 ? { common: true } : {}
|
|
2171
2249
|
}));
|
|
2172
2250
|
word2.translations = [];
|
|
2173
|
-
for (const dictMeaning of dictWord.meanings)
|
|
2174
|
-
|
|
2175
|
-
|
|
2176
|
-
|
|
2177
|
-
(translation)
|
|
2178
|
-
|
|
2179
|
-
|
|
2180
|
-
|
|
2181
|
-
|
|
2182
|
-
|
|
2183
|
-
|
|
2184
|
-
|
|
2185
|
-
word2.tags.push("word::explanation");
|
|
2186
|
-
} else if (translation.type === "tm") {
|
|
2187
|
-
translationTypes.push("Trademark");
|
|
2188
|
-
word2.tags.push("word::trademark");
|
|
2189
|
-
}
|
|
2190
|
-
return translation.translation;
|
|
2191
|
-
}
|
|
2251
|
+
for (const dictMeaning of dictWord.meanings) {
|
|
2252
|
+
const translationTypes = [];
|
|
2253
|
+
const translations = dictMeaning.translations.map(
|
|
2254
|
+
(translation) => {
|
|
2255
|
+
if (typeof translation === "string") return translation;
|
|
2256
|
+
else {
|
|
2257
|
+
const translationNoteAndTag = noteMap.get(
|
|
2258
|
+
translation.type
|
|
2259
|
+
);
|
|
2260
|
+
translationTypes.push(translationNoteAndTag[1]);
|
|
2261
|
+
word2.tags.push(`word::${translationNoteAndTag[0]}`);
|
|
2262
|
+
return translation.translation;
|
|
2192
2263
|
}
|
|
2193
|
-
|
|
2194
|
-
|
|
2195
|
-
|
|
2196
|
-
|
|
2197
|
-
|
|
2198
|
-
)
|
|
2199
|
-
|
|
2200
|
-
|
|
2201
|
-
|
|
2202
|
-
)
|
|
2203
|
-
|
|
2204
|
-
|
|
2205
|
-
|
|
2206
|
-
|
|
2207
|
-
)
|
|
2208
|
-
|
|
2209
|
-
|
|
2210
|
-
|
|
2211
|
-
)
|
|
2212
|
-
|
|
2213
|
-
|
|
2214
|
-
|
|
2215
|
-
)
|
|
2216
|
-
|
|
2217
|
-
|
|
2218
|
-
|
|
2219
|
-
)
|
|
2220
|
-
|
|
2221
|
-
|
|
2222
|
-
|
|
2223
|
-
)
|
|
2224
|
-
|
|
2225
|
-
|
|
2226
|
-
|
|
2227
|
-
)
|
|
2228
|
-
|
|
2229
|
-
|
|
2230
|
-
|
|
2231
|
-
)
|
|
2232
|
-
|
|
2233
|
-
|
|
2234
|
-
|
|
2235
|
-
|
|
2236
|
-
|
|
2237
|
-
|
|
2238
|
-
}
|
|
2264
|
+
}
|
|
2265
|
+
);
|
|
2266
|
+
const notes = [];
|
|
2267
|
+
wordAddNoteArray(
|
|
2268
|
+
dictMeaning.kanjiFormRestrictions,
|
|
2269
|
+
(restriction) => notes.push(`Meaning restricted to ${restriction}`)
|
|
2270
|
+
);
|
|
2271
|
+
wordAddNoteArray(
|
|
2272
|
+
dictMeaning.readingRestrictions,
|
|
2273
|
+
(restriction) => notes.push(`Meaning restricted to ${restriction}`)
|
|
2274
|
+
);
|
|
2275
|
+
for (const t of translationTypes) notes.push(t);
|
|
2276
|
+
wordAddNoteArray(
|
|
2277
|
+
dictMeaning.partOfSpeech,
|
|
2278
|
+
(pos) => lookupWordNote(pos, notes, word2.tags)
|
|
2279
|
+
);
|
|
2280
|
+
wordAddNoteArray(
|
|
2281
|
+
dictMeaning.fields,
|
|
2282
|
+
(field) => lookupWordNote(field, notes, word2.tags)
|
|
2283
|
+
);
|
|
2284
|
+
wordAddNoteArray(
|
|
2285
|
+
dictMeaning.dialects,
|
|
2286
|
+
(dialect) => lookupWordNote(dialect, notes, word2.tags)
|
|
2287
|
+
);
|
|
2288
|
+
wordAddNoteArray(
|
|
2289
|
+
dictMeaning.antonyms,
|
|
2290
|
+
(antonym) => notes.push(`Antonym: ${antonym}`)
|
|
2291
|
+
);
|
|
2292
|
+
wordAddNoteArray(
|
|
2293
|
+
dictMeaning.references,
|
|
2294
|
+
(reference) => notes.push(`Related: ${reference}`)
|
|
2295
|
+
);
|
|
2296
|
+
wordAddNoteArray(
|
|
2297
|
+
dictMeaning.info,
|
|
2298
|
+
(info) => lookupWordNote(info, notes, word2.tags)
|
|
2299
|
+
);
|
|
2300
|
+
wordAddNoteArray(
|
|
2301
|
+
dictMeaning.misc,
|
|
2302
|
+
(misc) => lookupWordNote(misc, notes, word2.tags)
|
|
2303
|
+
);
|
|
2304
|
+
for (let i = 0; i < notes.length; i++)
|
|
2305
|
+
notes[i] = capitalizeString(notes[i]);
|
|
2306
|
+
word2.translations.push({
|
|
2307
|
+
translation: translations.join("; "),
|
|
2308
|
+
notes
|
|
2309
|
+
});
|
|
2310
|
+
}
|
|
2239
2311
|
if (dictWord.usuallyInKana === true) {
|
|
2240
2312
|
word2.usuallyInKana = true;
|
|
2241
2313
|
word2.tags.push("word::usually_in_kana_for_all_senses");
|
|
2242
2314
|
}
|
|
2243
2315
|
if (kanjiDic && word2.kanjiForms) {
|
|
2244
2316
|
const kanji = [];
|
|
2317
|
+
const seenChars = /* @__PURE__ */ new Set();
|
|
2245
2318
|
for (const kanjiForm of word2.kanjiForms)
|
|
2246
|
-
for (const char of kanjiForm.kanjiForm) {
|
|
2247
|
-
if (
|
|
2248
|
-
|
|
2319
|
+
for (const char of kanjiForm.kanjiForm.split("").filter((c) => regexps.kanji.test(c))) {
|
|
2320
|
+
if (seenChars.has(char)) continue;
|
|
2321
|
+
seenChars.add(char);
|
|
2322
|
+
const kanjiEntry = kanjiDic instanceof Map ? kanjiDic.get(char) : void 0;
|
|
2249
2323
|
const kanjiObj = getKanji(
|
|
2250
|
-
char,
|
|
2251
|
-
Array.isArray(kanjiDic) ? kanjiDic :
|
|
2324
|
+
kanjiEntry ?? char,
|
|
2325
|
+
Array.isArray(kanjiDic) ? kanjiDic : void 0
|
|
2252
2326
|
);
|
|
2253
2327
|
if (kanjiObj)
|
|
2254
2328
|
kanji.push({
|
|
@@ -2258,113 +2332,102 @@ function getWord(word, dict, kanjiDic, examples, definitions, noteTypeName, deck
|
|
|
2258
2332
|
}
|
|
2259
2333
|
if (kanji.length > 0) word2.kanji = kanji;
|
|
2260
2334
|
}
|
|
2261
|
-
if (dictWord.hasPhrases
|
|
2262
|
-
const exampleList = Array.isArray(
|
|
2263
|
-
|
|
2264
|
-
|
|
2265
|
-
|
|
2266
|
-
|
|
2267
|
-
|
|
2268
|
-
|
|
2269
|
-
|
|
2270
|
-
|
|
2271
|
-
|
|
2335
|
+
if (dictWord.hasPhrases !== void 0 && examples) {
|
|
2336
|
+
const exampleList = Array.isArray(examples) ? examples : examples.get(dictWord.id) ?? [];
|
|
2337
|
+
const rkf = getValidForms(
|
|
2338
|
+
dictWord.readings,
|
|
2339
|
+
dictWord.kanjiForms,
|
|
2340
|
+
dictWord.isCommon
|
|
2341
|
+
);
|
|
2342
|
+
const readings = new Set(
|
|
2343
|
+
rkf.readings.map((r) => r.reading)
|
|
2344
|
+
);
|
|
2345
|
+
const kanjiForms = rkf.kanjiForms ? new Set(rkf.kanjiForms.map((kf) => kf.form)) : void 0;
|
|
2346
|
+
let kanjiFormExamples = [];
|
|
2347
|
+
const readingMatchingKanjiFormExamples = [];
|
|
2348
|
+
const readingExamples = [];
|
|
2349
|
+
const readingMatchingKanjiForms = /* @__PURE__ */ new Set();
|
|
2350
|
+
for (const example of exampleList)
|
|
2351
|
+
for (let i = 0; i < example.parts.length; i++) {
|
|
2352
|
+
const part = example.parts[i];
|
|
2353
|
+
const readingAsReadingMatch = part.reading !== void 0 && readings.has(part.reading);
|
|
2354
|
+
const readingAsInflectedFormMatch = part.inflectedForm !== void 0 && readings.has(part.inflectedForm);
|
|
2355
|
+
const referenceIDMatch = part.referenceID === dictWord.id;
|
|
2356
|
+
if (kanjiForms && kanjiForms.has(part.baseForm) || referenceIDMatch) {
|
|
2357
|
+
if (readingAsReadingMatch || readingAsInflectedFormMatch) {
|
|
2358
|
+
readingMatchingKanjiFormExamples.push({
|
|
2359
|
+
ex: example,
|
|
2360
|
+
partIndex: i
|
|
2361
|
+
});
|
|
2362
|
+
readingMatchingKanjiForms.add(part.baseForm);
|
|
2363
|
+
} else
|
|
2364
|
+
kanjiFormExamples.push({
|
|
2365
|
+
ex: example,
|
|
2366
|
+
partIndex: i,
|
|
2367
|
+
form: part.baseForm
|
|
2368
|
+
});
|
|
2369
|
+
break;
|
|
2370
|
+
}
|
|
2371
|
+
const readingAsBaseFormMatch = readings.has(part.baseForm);
|
|
2372
|
+
if ((readingAsBaseFormMatch || referenceIDMatch) && kanjiForms === void 0) {
|
|
2373
|
+
readingExamples.push({ ex: example, partIndex: i });
|
|
2374
|
+
break;
|
|
2375
|
+
}
|
|
2376
|
+
}
|
|
2377
|
+
if (readingMatchingKanjiForms.size > 0)
|
|
2378
|
+
kanjiFormExamples = kanjiFormExamples.filter(
|
|
2379
|
+
(ex) => ex.form && readingMatchingKanjiForms.has(ex.form)
|
|
2272
2380
|
);
|
|
2273
|
-
|
|
2274
|
-
|
|
2275
|
-
|
|
2276
|
-
|
|
2277
|
-
|
|
2278
|
-
|
|
2279
|
-
|
|
2280
|
-
|
|
2281
|
-
|
|
2282
|
-
|
|
2283
|
-
|
|
2284
|
-
|
|
2285
|
-
|
|
2286
|
-
|
|
2287
|
-
|
|
2288
|
-
|
|
2289
|
-
|
|
2290
|
-
|
|
2291
|
-
|
|
2292
|
-
|
|
2293
|
-
const part = example.parts[i];
|
|
2294
|
-
const readingAsReadingMatch = part.reading !== void 0 && readings.has(part.reading);
|
|
2295
|
-
const readingAsInflectedFormMatch = part.inflectedForm !== void 0 && readings.has(part.inflectedForm);
|
|
2296
|
-
const referenceIDMatch = part.referenceID !== void 0 && word2.id !== void 0 && part.referenceID === word2.id;
|
|
2297
|
-
if (kanjiForms && kanjiForms.has(part.baseForm) || referenceIDMatch) {
|
|
2298
|
-
if (readingAsReadingMatch || readingAsInflectedFormMatch) {
|
|
2299
|
-
readingMatchingKanjiFormExamples.push({
|
|
2300
|
-
ex: example,
|
|
2301
|
-
partIndex: i
|
|
2302
|
-
});
|
|
2303
|
-
readingMatchingKanjiForms.add(part.baseForm);
|
|
2304
|
-
} else
|
|
2305
|
-
kanjiFormExamples.push({
|
|
2306
|
-
ex: example,
|
|
2307
|
-
partIndex: i,
|
|
2308
|
-
form: part.baseForm
|
|
2309
|
-
});
|
|
2310
|
-
break;
|
|
2311
|
-
}
|
|
2312
|
-
const readingAsBaseFormMatch = readings.has(part.baseForm);
|
|
2313
|
-
if ((readingAsBaseFormMatch || referenceIDMatch) && kanjiForms === void 0) {
|
|
2314
|
-
readingExamples.push({ ex: example, partIndex: i });
|
|
2315
|
-
break;
|
|
2381
|
+
const includeKanjiFormExamples = word2.kanjiForms !== void 0;
|
|
2382
|
+
let wordExamples = [
|
|
2383
|
+
...includeKanjiFormExamples ? [...readingMatchingKanjiFormExamples, ...kanjiFormExamples] : readingExamples
|
|
2384
|
+
];
|
|
2385
|
+
readingMatchingKanjiForms.clear();
|
|
2386
|
+
const glossSpecificExamples = [];
|
|
2387
|
+
const seenPhrases = /* @__PURE__ */ new Set();
|
|
2388
|
+
for (let i = 0; i < word2.translations.length; i++) {
|
|
2389
|
+
outer: for (const example of wordExamples) {
|
|
2390
|
+
if (seenPhrases.has(example.ex.phrase)) continue;
|
|
2391
|
+
for (let j = 0; j < example.ex.parts.length; j++) {
|
|
2392
|
+
const part = example.ex.parts[j];
|
|
2393
|
+
if (j === example.partIndex && part.glossNumber === i + 1) {
|
|
2394
|
+
example.ex.glossNumber = {
|
|
2395
|
+
wordId: word2.id,
|
|
2396
|
+
glossNumber: i + 1
|
|
2397
|
+
};
|
|
2398
|
+
glossSpecificExamples.push(example);
|
|
2399
|
+
seenPhrases.add(example.ex.phrase);
|
|
2400
|
+
break outer;
|
|
2316
2401
|
}
|
|
2317
2402
|
}
|
|
2318
|
-
|
|
2319
|
-
|
|
2320
|
-
|
|
2403
|
+
}
|
|
2404
|
+
}
|
|
2405
|
+
if (glossSpecificExamples.length === word2.translations.length)
|
|
2406
|
+
wordExamples = glossSpecificExamples;
|
|
2407
|
+
else if (glossSpecificExamples.length > 0) {
|
|
2408
|
+
const exes = glossSpecificExamples;
|
|
2409
|
+
if (exes.length < 5) {
|
|
2410
|
+
wordExamples = wordExamples.filter(
|
|
2411
|
+
(ex) => !seenPhrases.has(ex.ex.phrase)
|
|
2321
2412
|
);
|
|
2322
|
-
|
|
2323
|
-
|
|
2324
|
-
|
|
2325
|
-
|
|
2326
|
-
];
|
|
2327
|
-
readingMatchingKanjiForms.clear();
|
|
2328
|
-
const glossSpecificExamples = [];
|
|
2329
|
-
const seenPhrases = /* @__PURE__ */ new Set();
|
|
2330
|
-
for (let i = 0; i < word2.translations.length; i++) {
|
|
2331
|
-
outer: for (const example of wordExamples) {
|
|
2332
|
-
if (seenPhrases.has(example.ex.phrase)) continue;
|
|
2333
|
-
for (let j = 0; j < example.ex.parts.length; j++) {
|
|
2334
|
-
const part = example.ex.parts[j];
|
|
2335
|
-
if (j === example.partIndex && part.glossNumber === i + 1) {
|
|
2336
|
-
example.ex.glossNumber = {
|
|
2337
|
-
wordId: word2.id,
|
|
2338
|
-
glossNumber: i + 1
|
|
2339
|
-
};
|
|
2340
|
-
glossSpecificExamples.push(example);
|
|
2341
|
-
seenPhrases.add(example.ex.phrase);
|
|
2342
|
-
break outer;
|
|
2343
|
-
}
|
|
2413
|
+
if (wordExamples.length > 0)
|
|
2414
|
+
for (const ex of wordExamples) {
|
|
2415
|
+
exes.push(ex);
|
|
2416
|
+
if (exes.length === 5) break;
|
|
2344
2417
|
}
|
|
2345
|
-
}
|
|
2346
|
-
if (glossSpecificExamples.length === 5) break;
|
|
2347
|
-
}
|
|
2348
|
-
if (glossSpecificExamples.length === 5)
|
|
2349
|
-
wordExamples = [...glossSpecificExamples];
|
|
2350
|
-
else if (glossSpecificExamples.length > 0)
|
|
2351
|
-
wordExamples = [
|
|
2352
|
-
...glossSpecificExamples,
|
|
2353
|
-
...wordExamples.filter(
|
|
2354
|
-
(ex) => !seenPhrases.has(ex.ex.phrase)
|
|
2355
|
-
).slice(0, 5 - glossSpecificExamples.length)
|
|
2356
|
-
];
|
|
2357
|
-
if (wordExamples.length > 0) {
|
|
2358
|
-
word2.phrases = (wordExamples.length > 5 ? wordExamples.slice(0, 5) : wordExamples).map((ex) => ({
|
|
2359
|
-
phrase: ex.ex.furigana ?? ex.ex.phrase,
|
|
2360
|
-
translation: ex.ex.translation,
|
|
2361
|
-
originalPhrase: ex.ex.phrase,
|
|
2362
|
-
...ex.ex.glossNumber ? { glossNumber: ex.ex.glossNumber } : {}
|
|
2363
|
-
}));
|
|
2364
|
-
word2.tags.push("word::has_phrases");
|
|
2365
|
-
if (glossSpecificExamples.length > 0)
|
|
2366
|
-
word2.tags.push("word::has_meaning-specific_phrases");
|
|
2367
2418
|
}
|
|
2419
|
+
wordExamples = exes;
|
|
2420
|
+
}
|
|
2421
|
+
if (wordExamples.length > 0) {
|
|
2422
|
+
word2.phrases = (wordExamples.length > 5 ? wordExamples.slice(0, 5) : wordExamples).map((ex) => ({
|
|
2423
|
+
phrase: ex.ex.furigana ?? ex.ex.phrase,
|
|
2424
|
+
translation: ex.ex.translation,
|
|
2425
|
+
originalPhrase: ex.ex.phrase,
|
|
2426
|
+
...ex.ex.glossNumber ? { glossNumber: ex.ex.glossNumber } : {}
|
|
2427
|
+
}));
|
|
2428
|
+
word2.tags.push("word::has_phrases");
|
|
2429
|
+
if (glossSpecificExamples.length > 0)
|
|
2430
|
+
word2.tags.push("word::has_meaning-specific_phrases");
|
|
2368
2431
|
}
|
|
2369
2432
|
}
|
|
2370
2433
|
if (definitions) {
|
|
@@ -2375,10 +2438,10 @@ function getWord(word, dict, kanjiDic, examples, definitions, noteTypeName, deck
|
|
|
2375
2438
|
} else return void 0;
|
|
2376
2439
|
}
|
|
2377
2440
|
function getKanji(kanji, dict, jmDict, svgList, noteTypeName, deckPath) {
|
|
2378
|
-
var _a, _b, _c, _d, _e, _f, _g
|
|
2441
|
+
var _a, _b, _c, _d, _e, _f, _g;
|
|
2379
2442
|
let dictKanji = void 0;
|
|
2380
2443
|
if (typeof kanji === "string" && dict)
|
|
2381
|
-
dictKanji = dict.find((entry) => entry.kanji === kanji);
|
|
2444
|
+
dictKanji = Array.isArray(dict) ? dict.find((entry) => entry.kanji === kanji) : dict.get(kanji);
|
|
2382
2445
|
else if (typeof kanji === "object") dictKanji = kanji;
|
|
2383
2446
|
if (dictKanji) {
|
|
2384
2447
|
const kanji2 = {
|
|
@@ -2399,23 +2462,20 @@ function getKanji(kanji, dict, jmDict, svgList, noteTypeName, deckPath) {
|
|
|
2399
2462
|
const kunyomi = [];
|
|
2400
2463
|
for (const rm of dictKanji.readingMeaning) {
|
|
2401
2464
|
if (rm.nanori && rm.nanori.length > 0) nanori.push(...rm.nanori);
|
|
2402
|
-
|
|
2403
|
-
|
|
2404
|
-
|
|
2405
|
-
|
|
2406
|
-
|
|
2407
|
-
|
|
2408
|
-
|
|
2409
|
-
|
|
2410
|
-
|
|
2411
|
-
|
|
2412
|
-
(reading) => reading.type === "ja_kun"
|
|
2413
|
-
).map((reading) => reading.reading)
|
|
2414
|
-
);
|
|
2415
|
-
}
|
|
2416
|
-
if (group.meanings && group.meanings.length > 0)
|
|
2417
|
-
meanings.push(...group.meanings);
|
|
2465
|
+
for (const group of rm.groups) {
|
|
2466
|
+
if (group.readings) {
|
|
2467
|
+
onyomi.push(
|
|
2468
|
+
...group.readings.filter((reading) => reading.type === "ja_on").map((reading) => reading.reading)
|
|
2469
|
+
);
|
|
2470
|
+
kunyomi.push(
|
|
2471
|
+
...group.readings.filter(
|
|
2472
|
+
(reading) => reading.type === "ja_kun"
|
|
2473
|
+
).map((reading) => reading.reading)
|
|
2474
|
+
);
|
|
2418
2475
|
}
|
|
2476
|
+
if (group.meanings && group.meanings.length > 0)
|
|
2477
|
+
meanings.push(...group.meanings);
|
|
2478
|
+
}
|
|
2419
2479
|
}
|
|
2420
2480
|
if (meanings.length > 0) kanji2.meanings = meanings;
|
|
2421
2481
|
if (nanori.length > 0) kanji2.nanori = nanori;
|
|
@@ -2430,71 +2490,62 @@ function getKanji(kanji, dict, jmDict, svgList, noteTypeName, deckPath) {
|
|
|
2430
2490
|
(word) => word.kanjiForms && word.kanjiForms[0].form.includes(kanji2.kanji)
|
|
2431
2491
|
);
|
|
2432
2492
|
if (firstKfWords && firstKfWords.length > 0) kanjiWords = firstKfWords;
|
|
2433
|
-
else if (kanjiWords) kanjiWords = kanjiWords;
|
|
2434
2493
|
if (kanjiWords) {
|
|
2435
2494
|
const validWords = [];
|
|
2436
2495
|
for (const word of kanjiWords) {
|
|
2437
2496
|
const kanjiForm = (_a = firstKfWords && firstKfWords.length > 0 ? word.kanjiForms[0] : word.kanjiForms.find(
|
|
2438
2497
|
(kf) => kf.form.includes(kanji2.kanji)
|
|
2439
2498
|
)) == null ? void 0 : _a.form;
|
|
2440
|
-
if (
|
|
2441
|
-
|
|
2442
|
-
|
|
2443
|
-
|
|
2444
|
-
|
|
2445
|
-
|
|
2446
|
-
|
|
2447
|
-
|
|
2448
|
-
|
|
2449
|
-
|
|
2450
|
-
|
|
2451
|
-
|
|
2452
|
-
|
|
2453
|
-
{
|
|
2454
|
-
|
|
2455
|
-
|
|
2456
|
-
|
|
2457
|
-
|
|
2499
|
+
if (kanjiForm) {
|
|
2500
|
+
const reading = (_b = firstKfWords && firstKfWords.length > 0 ? word.readings[0] : word.readings.find(
|
|
2501
|
+
(reading2) => reading2.kanjiFormRestrictions && reading2.kanjiFormRestrictions.includes(kanjiForm)
|
|
2502
|
+
)) == null ? void 0 : _b.reading;
|
|
2503
|
+
if (!reading) continue;
|
|
2504
|
+
const translation = (_c = firstKfWords && firstKfWords.length > 0 ? word.meanings[0] : word.meanings.find(
|
|
2505
|
+
(m) => m.translations && m.kanjiFormRestrictions && m.kanjiFormRestrictions.includes(kanjiForm)
|
|
2506
|
+
)) == null ? void 0 : _c.translations.map(
|
|
2507
|
+
(t) => typeof t === "string" ? t : t.translation
|
|
2508
|
+
)[0];
|
|
2509
|
+
if (!translation) continue;
|
|
2510
|
+
validWords.push({
|
|
2511
|
+
kanjiForms: [{ kanjiForm }],
|
|
2512
|
+
readings: [{ reading }],
|
|
2513
|
+
translations: [
|
|
2514
|
+
{
|
|
2515
|
+
translation
|
|
2516
|
+
}
|
|
2517
|
+
]
|
|
2518
|
+
});
|
|
2519
|
+
}
|
|
2458
2520
|
if (validWords.length === 3) break;
|
|
2459
2521
|
}
|
|
2460
2522
|
if (validWords.length > 0) kanji2.words = validWords;
|
|
2461
2523
|
}
|
|
2462
2524
|
}
|
|
2463
2525
|
if (svgList) {
|
|
2464
|
-
|
|
2465
|
-
|
|
2466
|
-
|
|
2467
|
-
|
|
2468
|
-
|
|
2469
|
-
|
|
2470
|
-
|
|
2471
|
-
const svg = svgList.find(
|
|
2472
|
-
(svgFile) => fileNames.includes(svgFile.toLowerCase())
|
|
2473
|
-
);
|
|
2474
|
-
if (svg) kanji2.svg = svg;
|
|
2475
|
-
}
|
|
2526
|
+
const codePoint = kanji2.kanji.codePointAt(0).toString(16).toLowerCase();
|
|
2527
|
+
const svg = Array.isArray(svgList) ? svgList.find(
|
|
2528
|
+
(svgFile) => [`${codePoint}.svg`, `0${codePoint}.svg`].includes(
|
|
2529
|
+
svgFile.toLowerCase()
|
|
2530
|
+
)
|
|
2531
|
+
) : svgList.get(kanji2.kanji);
|
|
2532
|
+
if (svg) kanji2.svg = svg;
|
|
2476
2533
|
}
|
|
2477
2534
|
if (kanji2.tags && dictKanji.isKokuji === true) {
|
|
2478
2535
|
kanji2.kokuji = true;
|
|
2479
2536
|
kanji2.tags.push("kanji::kokuji");
|
|
2480
|
-
if (kanji2.meanings)
|
|
2481
|
-
kanji2.meanings.splice(
|
|
2482
|
-
kanji2.meanings.findIndex((meaning) => meaning === "(kokuji)"),
|
|
2483
|
-
1
|
|
2484
|
-
);
|
|
2485
2537
|
}
|
|
2486
|
-
|
|
2487
|
-
kanji2.
|
|
2488
|
-
|
|
2489
|
-
|
|
2490
|
-
|
|
2491
|
-
|
|
2492
|
-
|
|
2493
|
-
|
|
2494
|
-
|
|
2495
|
-
|
|
2496
|
-
|
|
2497
|
-
);
|
|
2538
|
+
kanji2.tags.push(
|
|
2539
|
+
`kanji::strokes::${kanji2.strokes}`,
|
|
2540
|
+
...kanji2.frequency ? [`kanji::frequency::${kanji2.frequency}`] : [],
|
|
2541
|
+
...kanji2.grade ? [`kanji::grade::${kanji2.grade}`] : [],
|
|
2542
|
+
...kanji2.jlpt ? [`kanji::pre-2010_jlpt::${kanji2.jlpt.toLowerCase()}`] : [],
|
|
2543
|
+
`kanji::onyomi::${((_d = kanji2.onyomi) == null ? void 0 : _d.length) ?? 0}`,
|
|
2544
|
+
`kanji::kunyomi::${((_e = kanji2.kunyomi) == null ? void 0 : _e.length) ?? 0}`,
|
|
2545
|
+
`kanji::nanori::${((_f = kanji2.nanori) == null ? void 0 : _f.length) ?? 0}`,
|
|
2546
|
+
`kanji::words::${((_g = kanji2.words) == null ? void 0 : _g.length) ?? 0}`,
|
|
2547
|
+
...kanji2.svg ? ["kanji::has_svg"] : []
|
|
2548
|
+
);
|
|
2498
2549
|
return kanji2;
|
|
2499
2550
|
} else return void 0;
|
|
2500
2551
|
}
|
|
@@ -2521,22 +2572,20 @@ function getKanjiExtended(info, kanji, dict, useWords, jmDict, svgList, noteType
|
|
|
2521
2572
|
kanjiObj.words = info.words;
|
|
2522
2573
|
usedInfo = true;
|
|
2523
2574
|
}
|
|
2524
|
-
if (kanjiObj.
|
|
2525
|
-
|
|
2526
|
-
|
|
2527
|
-
|
|
2528
|
-
|
|
2529
|
-
|
|
2530
|
-
|
|
2531
|
-
|
|
2532
|
-
|
|
2533
|
-
|
|
2534
|
-
|
|
2535
|
-
|
|
2536
|
-
|
|
2537
|
-
|
|
2538
|
-
});
|
|
2539
|
-
}
|
|
2575
|
+
if (kanjiObj.components)
|
|
2576
|
+
kanjiObj.tags.push(`kanji::components::${kanjiObj.components.length}`);
|
|
2577
|
+
if (kanjiObj.mnemonic && kanjiObj.mnemonic.length > 0)
|
|
2578
|
+
kanjiObj.tags.push("kanji::has_mnemonic");
|
|
2579
|
+
if (useWords === true && kanjiObj.words && info.words)
|
|
2580
|
+
kanjiObj.tags.forEach((tag, index) => {
|
|
2581
|
+
if (tag.startsWith("kanji::words::") && kanjiObj.words) {
|
|
2582
|
+
kanjiObj.tags.splice(
|
|
2583
|
+
index,
|
|
2584
|
+
1,
|
|
2585
|
+
`kanji::words::${kanjiObj.words.length}`
|
|
2586
|
+
);
|
|
2587
|
+
}
|
|
2588
|
+
});
|
|
2540
2589
|
if (sourceURL && info.externalInfo === true && usedInfo)
|
|
2541
2590
|
kanjiObj.source = sourceURL;
|
|
2542
2591
|
return kanjiObj;
|
|
@@ -2826,19 +2875,21 @@ export {
|
|
|
2826
2875
|
convertRadkFile,
|
|
2827
2876
|
convertTanakaCorpus,
|
|
2828
2877
|
convertTanakaCorpusWithFurigana,
|
|
2878
|
+
createEntryMaps,
|
|
2829
2879
|
generateAnkiNote,
|
|
2830
2880
|
generateAnkiNotesFile,
|
|
2831
2881
|
getKanji,
|
|
2832
2882
|
getKanjiExtended,
|
|
2883
|
+
getValidForms,
|
|
2833
2884
|
getWord,
|
|
2834
2885
|
getWordDefinitions,
|
|
2835
2886
|
getWordDefinitionsWithFurigana,
|
|
2836
2887
|
isGrammar,
|
|
2837
2888
|
isKana,
|
|
2838
2889
|
isKanji,
|
|
2890
|
+
isObjectArray,
|
|
2839
2891
|
isRadical,
|
|
2840
2892
|
isStringArray,
|
|
2841
|
-
isValidArray,
|
|
2842
2893
|
isValidArrayWithFirstElement,
|
|
2843
2894
|
isWord,
|
|
2844
2895
|
notSearchedForms,
|