henkan 1.1.2 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/index.cjs.js +1047 -1115
- package/dist/index.cjs.js.map +3 -3
- package/dist/index.mjs +1042 -1113
- package/dist/index.mjs.map +3 -3
- package/dist/types/constants.d.ts +3 -2
- package/dist/types/constants.d.ts.map +1 -1
- package/dist/types/types.d.ts +74 -15
- package/dist/types/types.d.ts.map +1 -1
- package/dist/types/utils.d.ts +49 -40
- package/dist/types/utils.d.ts.map +1 -1
- package/docs/api/README.md +6 -2
- package/docs/api/functions/capitalizeString.md +1 -1
- package/docs/api/functions/convertJMdict.md +2 -2
- package/docs/api/functions/convertJawiktionaryAsync.md +29 -0
- package/docs/api/functions/convertJawiktionarySync.md +29 -0
- package/docs/api/functions/convertKanjiDic.md +1 -1
- package/docs/api/functions/convertKradFile.md +3 -3
- package/docs/api/functions/convertRadkFile.md +2 -2
- package/docs/api/functions/convertTanakaCorpus.md +3 -9
- package/docs/api/functions/convertTanakaCorpusWithFurigana.md +27 -0
- package/docs/api/functions/generateAnkiNote.md +1 -1
- package/docs/api/functions/generateAnkiNotesFile.md +9 -3
- package/docs/api/functions/getKanji.md +13 -19
- package/docs/api/functions/getKanjiExtended.md +18 -18
- package/docs/api/functions/getWord.md +17 -23
- package/docs/api/functions/getWordDefinitions.md +6 -12
- package/docs/api/functions/getWordDefinitionsWithFurigana.md +33 -0
- package/docs/api/functions/isStringArray.md +1 -1
- package/docs/api/functions/isValidArray.md +1 -1
- package/docs/api/functions/isValidArrayWithFirstElement.md +1 -1
- package/docs/api/functions/shuffleArray.md +2 -2
- package/docs/api/interfaces/DefaultNoteInfo.md +51 -0
- package/docs/api/interfaces/DictKanjiMisc.md +2 -2
- package/docs/api/interfaces/DictKanjiReadingMeaning.md +2 -2
- package/docs/api/interfaces/DictKanjiReadingMeaningGroup.md +4 -4
- package/docs/api/interfaces/DictKanjiWithRadicals.md +2 -2
- package/docs/api/interfaces/DictWord.md +1 -1
- package/docs/api/interfaces/Grammar.md +2 -2
- package/docs/api/interfaces/Kana.md +2 -2
- package/docs/api/interfaces/Kanji.md +12 -12
- package/docs/api/interfaces/NoteAndTag.md +2 -2
- package/docs/api/interfaces/NoteHeaderKeys.md +75 -0
- package/docs/api/interfaces/Radical.md +5 -5
- package/docs/api/interfaces/ResultEntry.md +2 -2
- package/docs/api/interfaces/TanakaExample.md +1 -1
- package/docs/api/interfaces/UsefulRegExps.md +8 -8
- package/docs/api/interfaces/Word.md +2 -2
- package/package.json +11 -12
- package/docs/api/functions/convertJawiktionary.md +0 -29
- package/docs/api/functions/synthesizeSpeech.md +0 -39
package/dist/index.mjs
CHANGED
|
@@ -15,6 +15,14 @@ var regexps = {
|
|
|
15
15
|
tanakaPart: /(?<base>[^()\[\]\{\}\s]+)(?:\((?<reading>[\S]+)\))?(?:\[(?<glossnum>[\S]+)\])?(?:\{(?<inflection>[\S]+)\})?/,
|
|
16
16
|
tanakaReferenceID: /#(?<entryid>[\d]+)/
|
|
17
17
|
};
|
|
18
|
+
var noteHeaderKeys = {
|
|
19
|
+
separator: "#separator:tab",
|
|
20
|
+
html: "#html:true",
|
|
21
|
+
guid: "#guid column:",
|
|
22
|
+
notetype: "#notetype column:",
|
|
23
|
+
deck: "#deck column:",
|
|
24
|
+
tags: "#tags column:"
|
|
25
|
+
};
|
|
18
26
|
var notSearchedForms = /* @__PURE__ */ new Set([
|
|
19
27
|
"search-only kana form",
|
|
20
28
|
"Search-only kana form",
|
|
@@ -1171,9 +1179,6 @@ var noteMap = /* @__PURE__ */ new Map([
|
|
|
1171
1179
|
import libxml from "libxmljs2";
|
|
1172
1180
|
import xml from "xml2js";
|
|
1173
1181
|
import iconv from "iconv-lite";
|
|
1174
|
-
import {
|
|
1175
|
-
SynthesizeSpeechCommand
|
|
1176
|
-
} from "@aws-sdk/client-polly";
|
|
1177
1182
|
import { createInterface } from "readline";
|
|
1178
1183
|
var Kuroshiro = __require("kuroshiro");
|
|
1179
1184
|
var KuromojiAnalyzer = __require("kuroshiro-analyzer-kuromoji");
|
|
@@ -1190,29 +1195,25 @@ function isStringArray(arg) {
|
|
|
1190
1195
|
return arg !== null && arg !== void 0 && Array.isArray(arg) && arg.every((element) => typeof element === "string");
|
|
1191
1196
|
}
|
|
1192
1197
|
function shuffleArray(arr) {
|
|
1193
|
-
if (arr.length < 2) return arr;
|
|
1194
1198
|
const a = arr.slice();
|
|
1195
1199
|
for (let i = a.length - 1; i > 0; i--) {
|
|
1196
1200
|
const j = Math.floor(Math.random() * (i + 1));
|
|
1197
1201
|
const tmp = a[i];
|
|
1198
|
-
|
|
1199
|
-
if (!tmp || !tmp2) throw new Error("Invalid array");
|
|
1200
|
-
a[i] = tmp2;
|
|
1202
|
+
a[i] = a[j];
|
|
1201
1203
|
a[j] = tmp;
|
|
1202
1204
|
}
|
|
1203
1205
|
return a;
|
|
1204
1206
|
}
|
|
1205
1207
|
function convertJMdict(xmlString, examples) {
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
if (err) throw err;
|
|
1208
|
+
const dictParsed = libxml.parseXml(xmlString, {
|
|
1209
|
+
dtdvalid: true,
|
|
1210
|
+
nonet: false,
|
|
1211
|
+
noent: true,
|
|
1212
|
+
recover: false
|
|
1213
|
+
});
|
|
1214
|
+
const dict = [];
|
|
1215
|
+
xml.parseString(dictParsed, (err, result) => {
|
|
1216
|
+
if (err === null) {
|
|
1216
1217
|
const tanakaParts = examples && examples.length > 0 ? new Set(
|
|
1217
1218
|
examples.map(
|
|
1218
1219
|
(example) => example.parts.map((part) => [
|
|
@@ -1225,239 +1226,229 @@ function convertJMdict(xmlString, examples) {
|
|
|
1225
1226
|
if (result.JMdict && typeof result.JMdict === "object" && isValidArray(result.JMdict.entry))
|
|
1226
1227
|
for (const entry of result.JMdict.entry) {
|
|
1227
1228
|
const entryObj = {
|
|
1228
|
-
|
|
1229
|
+
id: entry.ent_seq[0],
|
|
1229
1230
|
readings: [],
|
|
1230
1231
|
meanings: []
|
|
1231
1232
|
};
|
|
1232
|
-
if (entryObj.id
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1233
|
+
if (typeof entryObj.id === "string") {
|
|
1234
|
+
const kanjiForms = entry.k_ele;
|
|
1235
|
+
const readings = entry.r_ele;
|
|
1236
|
+
const meanings = entry.sense;
|
|
1237
|
+
if (isValidArray(kanjiForms)) {
|
|
1238
|
+
entryObj.kanjiForms = [];
|
|
1239
|
+
for (const kanjiForm of kanjiForms) {
|
|
1240
|
+
const form = {
|
|
1241
|
+
form: kanjiForm.keb[0]
|
|
1242
|
+
};
|
|
1243
|
+
if (typeof form.form === "string") {
|
|
1244
|
+
if (isStringArray(kanjiForm.ke_inf))
|
|
1245
|
+
form.notes = kanjiForm.ke_inf;
|
|
1246
|
+
if (isStringArray(kanjiForm.ke_pri)) {
|
|
1247
|
+
form.commonness = kanjiForm.ke_pri;
|
|
1248
|
+
if (entryObj.isCommon === void 0)
|
|
1249
|
+
entryObj.isCommon = true;
|
|
1250
|
+
}
|
|
1251
|
+
entryObj.kanjiForms.push(form);
|
|
1252
|
+
}
|
|
1251
1253
|
}
|
|
1252
|
-
if (form.form.length > 0) entryObj.kanjiForms.push(form);
|
|
1253
1254
|
}
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1255
|
+
if (isValidArray(readings))
|
|
1256
|
+
for (const reading of readings) {
|
|
1257
|
+
const readingObj = {
|
|
1258
|
+
reading: reading.reb[0]
|
|
1259
|
+
};
|
|
1260
|
+
if (typeof readingObj.reading === "string") {
|
|
1261
|
+
if (isStringArray(reading.re_inf))
|
|
1262
|
+
readingObj.notes = reading.re_inf;
|
|
1263
|
+
if (isStringArray(reading.re_restr))
|
|
1264
|
+
readingObj.kanjiFormRestrictions = reading.re_restr;
|
|
1265
|
+
if (isStringArray(reading.re_pri)) {
|
|
1266
|
+
readingObj.commonness = reading.re_pri;
|
|
1267
|
+
if (entryObj.isCommon === void 0)
|
|
1268
|
+
entryObj.isCommon = true;
|
|
1269
|
+
}
|
|
1270
|
+
entryObj.readings.push(readingObj);
|
|
1271
|
+
}
|
|
1269
1272
|
}
|
|
1270
|
-
|
|
1271
|
-
|
|
1273
|
+
if (isValidArray(meanings)) {
|
|
1274
|
+
let usuallyInKanaMeanings = 0;
|
|
1275
|
+
for (const meaning of meanings) {
|
|
1276
|
+
const meaningObj = {};
|
|
1277
|
+
if (isStringArray(meaning.pos))
|
|
1278
|
+
meaningObj.partOfSpeech = meaning.pos;
|
|
1279
|
+
if (isValidArray(meaning.gloss)) {
|
|
1280
|
+
meaningObj.translations = [];
|
|
1281
|
+
for (const gloss of meaning.gloss)
|
|
1282
|
+
if (typeof gloss === "string")
|
|
1283
|
+
meaningObj.translations.push(gloss);
|
|
1284
|
+
else if (typeof gloss === "object" && gloss._ && typeof gloss._ === "string" && gloss.$ && typeof gloss.$ === "object" && gloss.$.g_type && (gloss.$.g_type === "lit" || gloss.$.g_type === "expl" || gloss.$.g_type === "tm"))
|
|
1285
|
+
meaningObj.translations.push({
|
|
1286
|
+
translation: gloss._,
|
|
1287
|
+
type: gloss.$.g_type
|
|
1288
|
+
});
|
|
1289
|
+
}
|
|
1290
|
+
if (isStringArray(meaning.xref))
|
|
1291
|
+
meaningObj.references = meaning.xref;
|
|
1292
|
+
if (isStringArray(meaning.stagk))
|
|
1293
|
+
meaningObj.kanjiFormRestrictions = meaning.stagk;
|
|
1294
|
+
if (isStringArray(meaning.stagr))
|
|
1295
|
+
meaningObj.readingRestrictions = meaning.stagr;
|
|
1296
|
+
if (isStringArray(meaning.ant))
|
|
1297
|
+
meaningObj.antonyms = meaning.ant;
|
|
1298
|
+
if (isStringArray(meaning.field))
|
|
1299
|
+
meaningObj.fields = meaning.field;
|
|
1300
|
+
if (isStringArray(meaning.s_inf))
|
|
1301
|
+
meaningObj.info = meaning.s_inf;
|
|
1302
|
+
if (isStringArray(meaning.misc)) {
|
|
1303
|
+
meaningObj.misc = meaning.misc;
|
|
1304
|
+
if (meaningObj.misc && meaningObj.misc.includes(
|
|
1305
|
+
"word usually written using kana alone"
|
|
1306
|
+
))
|
|
1307
|
+
usuallyInKanaMeanings++;
|
|
1308
|
+
}
|
|
1309
|
+
if (isStringArray(meaning.dial))
|
|
1310
|
+
meaningObj.dialects = meaning.dial;
|
|
1311
|
+
if (meaningObj.partOfSpeech && meaningObj.partOfSpeech.length > 0)
|
|
1312
|
+
entryObj.meanings.push(meaningObj);
|
|
1313
|
+
}
|
|
1314
|
+
if (entryObj.meanings.length === usuallyInKanaMeanings)
|
|
1315
|
+
entryObj.usuallyInKana = true;
|
|
1272
1316
|
}
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1317
|
+
if (examples) {
|
|
1318
|
+
const readings2 = new Set(
|
|
1319
|
+
entryObj.readings.filter(
|
|
1320
|
+
(reading) => reading.notes === void 0 || !reading.notes.some(
|
|
1321
|
+
(note) => notSearchedForms.has(note)
|
|
1322
|
+
) || reading.commonness
|
|
1323
|
+
).map((reading) => reading.reading)
|
|
1324
|
+
);
|
|
1325
|
+
const kanjiForms2 = entryObj.kanjiForms ? new Set(
|
|
1326
|
+
entryObj.kanjiForms.map(
|
|
1327
|
+
(kanjiForm) => kanjiForm.form
|
|
1328
|
+
)
|
|
1329
|
+
) : void 0;
|
|
1330
|
+
let existsExample = false;
|
|
1331
|
+
if (kanjiForms2 && kanjiForms2.size > 0 && tanakaParts) {
|
|
1332
|
+
for (const kf of kanjiForms2)
|
|
1333
|
+
if (tanakaParts.has(kf)) {
|
|
1334
|
+
existsExample = true;
|
|
1335
|
+
break;
|
|
1336
|
+
}
|
|
1289
1337
|
}
|
|
1290
|
-
if (
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
if (isStringArray(meaning.ant)) meaningObj.antonyms = meaning.ant;
|
|
1297
|
-
if (isStringArray(meaning.field))
|
|
1298
|
-
meaningObj.fields = meaning.field;
|
|
1299
|
-
if (isStringArray(meaning.s_inf)) meaningObj.info = meaning.s_inf;
|
|
1300
|
-
if (isStringArray(meaning.misc)) {
|
|
1301
|
-
meaningObj.misc = meaning.misc;
|
|
1302
|
-
if (meaningObj.misc && meaningObj.misc.includes(
|
|
1303
|
-
"word usually written using kana alone"
|
|
1304
|
-
))
|
|
1305
|
-
usuallyInKanaMeanings++;
|
|
1338
|
+
if (!existsExample && readings2.size > 0 && tanakaParts) {
|
|
1339
|
+
for (const r of readings2)
|
|
1340
|
+
if (tanakaParts.has(r)) {
|
|
1341
|
+
existsExample = true;
|
|
1342
|
+
break;
|
|
1343
|
+
}
|
|
1306
1344
|
}
|
|
1307
|
-
if (
|
|
1308
|
-
|
|
1309
|
-
if (
|
|
1310
|
-
entryObj.meanings.push(meaningObj);
|
|
1311
|
-
}
|
|
1312
|
-
if (entryObj.meanings.length === usuallyInKanaMeanings)
|
|
1313
|
-
entryObj.usuallyInKana = true;
|
|
1314
|
-
}
|
|
1315
|
-
if (examples) {
|
|
1316
|
-
const readings2 = new Set(
|
|
1317
|
-
entryObj.readings.filter(
|
|
1318
|
-
(reading) => reading.notes === void 0 || !reading.notes.some(
|
|
1319
|
-
(note) => notSearchedForms.has(note)
|
|
1320
|
-
) || reading.commonness
|
|
1321
|
-
).map((reading) => reading.reading)
|
|
1322
|
-
);
|
|
1323
|
-
const kanjiForms2 = entryObj.kanjiForms ? new Set(
|
|
1324
|
-
entryObj.kanjiForms.map(
|
|
1325
|
-
(kanjiForm) => kanjiForm.form
|
|
1326
|
-
)
|
|
1327
|
-
) : void 0;
|
|
1328
|
-
let existsExample = false;
|
|
1329
|
-
if (kanjiForms2 && kanjiForms2.size > 0 && tanakaParts) {
|
|
1330
|
-
for (const kf of kanjiForms2)
|
|
1331
|
-
if (tanakaParts.has(kf)) {
|
|
1332
|
-
existsExample = true;
|
|
1333
|
-
break;
|
|
1334
|
-
}
|
|
1345
|
+
if (tanakaParts && tanakaParts.has(entryObj.id))
|
|
1346
|
+
existsExample = true;
|
|
1347
|
+
if (existsExample) entryObj.hasPhrases = true;
|
|
1335
1348
|
}
|
|
1336
|
-
if (
|
|
1337
|
-
|
|
1338
|
-
if (tanakaParts.has(r)) {
|
|
1339
|
-
existsExample = true;
|
|
1340
|
-
break;
|
|
1341
|
-
}
|
|
1342
|
-
}
|
|
1343
|
-
if (!existsExample && tanakaParts && tanakaParts.has(entryObj.id))
|
|
1344
|
-
existsExample = true;
|
|
1345
|
-
if (existsExample) entryObj.hasPhrases = true;
|
|
1349
|
+
if (entryObj.id.length > 0 && entryObj.readings.length > 0 && entryObj.meanings.length > 0)
|
|
1350
|
+
dict.push(entryObj);
|
|
1346
1351
|
}
|
|
1347
|
-
if (entryObj.id.length > 0 && entryObj.readings.length > 0 && entryObj.meanings.length > 0)
|
|
1348
|
-
dict.push(entryObj);
|
|
1349
1352
|
}
|
|
1350
|
-
}
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
throw err;
|
|
1354
|
-
}
|
|
1353
|
+
}
|
|
1354
|
+
});
|
|
1355
|
+
return dict;
|
|
1355
1356
|
}
|
|
1356
1357
|
function convertKanjiDic(xmlString) {
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1365
|
-
|
|
1366
|
-
if (err) throw err;
|
|
1358
|
+
const dictParsed = libxml.parseXml(xmlString, {
|
|
1359
|
+
dtdvalid: true,
|
|
1360
|
+
nonet: false,
|
|
1361
|
+
noent: true,
|
|
1362
|
+
recover: false
|
|
1363
|
+
});
|
|
1364
|
+
const dict = [];
|
|
1365
|
+
xml.parseString(dictParsed, (err, result) => {
|
|
1366
|
+
if (err === null) {
|
|
1367
1367
|
if (result.kanjidic2 && typeof result.kanjidic2 === "object" && isValidArray(result.kanjidic2.character))
|
|
1368
1368
|
for (const entry of result.kanjidic2.character) {
|
|
1369
1369
|
const kanjiObj = {
|
|
1370
|
-
|
|
1371
|
-
misc: {
|
|
1372
|
-
strokeNumber: ""
|
|
1373
|
-
},
|
|
1374
|
-
readingMeaning: []
|
|
1370
|
+
kanji: entry.literal[0]
|
|
1375
1371
|
};
|
|
1376
|
-
if (kanjiObj.kanji.length ===
|
|
1377
|
-
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
|
|
1382
|
-
|
|
1383
|
-
|
|
1384
|
-
|
|
1385
|
-
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1389
|
-
if (
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
|
|
1372
|
+
if (typeof kanjiObj.kanji === "string" && kanjiObj.kanji.length === 1) {
|
|
1373
|
+
if (isValidArrayWithFirstElement(entry.misc) && typeof entry.misc[0] === "object") {
|
|
1374
|
+
const misc = entry.misc[0];
|
|
1375
|
+
kanjiObj.misc = {};
|
|
1376
|
+
if (isValidArrayWithFirstElement(misc.stroke_count) && typeof misc.stroke_count[0] === "string")
|
|
1377
|
+
kanjiObj.misc.strokeNumber = misc.stroke_count[0];
|
|
1378
|
+
if (isValidArrayWithFirstElement(misc.grade) && typeof misc.grade[0] === "string")
|
|
1379
|
+
kanjiObj.misc.grade = misc.grade[0];
|
|
1380
|
+
if (isValidArrayWithFirstElement(misc.freq) && typeof misc.freq[0] === "string")
|
|
1381
|
+
kanjiObj.misc.frequency = misc.freq[0];
|
|
1382
|
+
if (isValidArrayWithFirstElement(misc.jlpt) && typeof misc.jlpt[0] === "string" && ["5", "4", "3", "2", "1"].includes(misc.jlpt[0]))
|
|
1383
|
+
kanjiObj.misc.jlpt = `N${misc.jlpt[0]}`;
|
|
1384
|
+
}
|
|
1385
|
+
if (isValidArray(entry.reading_meaning))
|
|
1386
|
+
for (const rm of entry.reading_meaning) {
|
|
1387
|
+
const rmObj = { groups: [] };
|
|
1388
|
+
if (isValidArray(rm.rmgroup))
|
|
1389
|
+
for (const group of rm.rmgroup) {
|
|
1390
|
+
const groupObj = {
|
|
1391
|
+
readings: [],
|
|
1392
|
+
meanings: []
|
|
1393
|
+
};
|
|
1394
|
+
if (isValidArray(group.reading)) {
|
|
1395
|
+
for (const reading of group.reading)
|
|
1396
|
+
if (reading._ && typeof reading._ === "string" && reading.$ && typeof reading.$ === "object" && reading.$.r_type && (reading.$.r_type === "ja_on" || reading.$.r_type === "ja_kun"))
|
|
1397
|
+
groupObj.readings.push({
|
|
1398
|
+
reading: reading._,
|
|
1399
|
+
type: reading.$.r_type
|
|
1400
|
+
});
|
|
1401
|
+
}
|
|
1402
|
+
if (isValidArray(group.meaning)) {
|
|
1403
|
+
for (const meaning of group.meaning)
|
|
1404
|
+
if (typeof meaning === "string") {
|
|
1405
|
+
if (kanjiObj.isKokuji === void 0 && meaning === "(kokuji)")
|
|
1406
|
+
kanjiObj.isKokuji = true;
|
|
1407
|
+
groupObj.meanings.push(meaning);
|
|
1408
|
+
}
|
|
1409
|
+
}
|
|
1410
|
+
if (groupObj.readings.length > 0 || groupObj.meanings.length > 0) {
|
|
1411
|
+
if (groupObj.readings.length === 0)
|
|
1412
|
+
delete groupObj.readings;
|
|
1413
|
+
if (groupObj.meanings.length === 0)
|
|
1414
|
+
delete groupObj.meanings;
|
|
1415
|
+
rmObj.groups.push(groupObj);
|
|
1416
|
+
}
|
|
1416
1417
|
}
|
|
1417
|
-
|
|
1418
|
-
|
|
1418
|
+
if (isStringArray(rm.nanori) && rm.nanori.length > 0)
|
|
1419
|
+
rmObj.nanori = rm.nanori;
|
|
1420
|
+
if (rmObj.groups.length > 0 || rmObj.nanori) {
|
|
1421
|
+
if (kanjiObj.readingMeaning === void 0)
|
|
1422
|
+
kanjiObj.readingMeaning = [];
|
|
1423
|
+
kanjiObj.readingMeaning.push(rmObj);
|
|
1419
1424
|
}
|
|
1420
|
-
|
|
1421
|
-
if (rmObj.groups.length > 0 && kanjiObj.readingMeaning)
|
|
1422
|
-
kanjiObj.readingMeaning.push(rmObj);
|
|
1423
|
-
}
|
|
1424
|
-
if (kanjiObj.readingMeaning && kanjiObj.readingMeaning.length === 0)
|
|
1425
|
-
delete kanjiObj.readingMeaning;
|
|
1426
|
-
if (kanjiObj.kanji.length === 1 && kanjiObj.misc && kanjiObj.misc.strokeNumber.length > 0)
|
|
1425
|
+
}
|
|
1427
1426
|
dict.push(kanjiObj);
|
|
1427
|
+
}
|
|
1428
1428
|
}
|
|
1429
|
-
}
|
|
1430
|
-
|
|
1431
|
-
|
|
1432
|
-
throw err;
|
|
1433
|
-
}
|
|
1429
|
+
}
|
|
1430
|
+
});
|
|
1431
|
+
return dict;
|
|
1434
1432
|
}
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
|
|
1448
|
-
|
|
1449
|
-
|
|
1450
|
-
|
|
1451
|
-
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
const bParts = b.split(" ").filter((part) => part.trim().length !== 0).map((part) => {
|
|
1455
|
-
const partMatches = regexps.tanakaPart.exec(part);
|
|
1456
|
-
if (!partMatches || !partMatches.groups || partMatches.length === 0)
|
|
1457
|
-
throw new Error(`Invalid B part: ${part}`);
|
|
1458
|
-
const baseForm = partMatches.groups["base"];
|
|
1459
|
-
if (!baseForm)
|
|
1460
|
-
throw new Error(`Invalid base form of B part: ${part}`);
|
|
1433
|
+
function convertTanakaCorpus(tanakaString) {
|
|
1434
|
+
const tanakaArray = [];
|
|
1435
|
+
const tanakaParsed = tanakaString.split("\n");
|
|
1436
|
+
for (let i = 0; i <= tanakaParsed.length; i += 2) {
|
|
1437
|
+
let a = tanakaParsed[i];
|
|
1438
|
+
let b = tanakaParsed[i + 1];
|
|
1439
|
+
if (a && b && a.startsWith("A: ") && b.startsWith("B: ")) {
|
|
1440
|
+
a = a.replace("A: ", "");
|
|
1441
|
+
b = b.replace("B: ", "");
|
|
1442
|
+
const idMatch = regexps.tanakaID.exec(a);
|
|
1443
|
+
if (idMatch && idMatch.groups && idMatch.groups["id"]) {
|
|
1444
|
+
const aParts = a.replace(regexps.tanakaID, "").split(" ");
|
|
1445
|
+
const bRawParts = b.split(" ").filter((part) => part.trim().length !== 0);
|
|
1446
|
+
const bParts = [];
|
|
1447
|
+
for (const part of bRawParts) {
|
|
1448
|
+
const partMatches = regexps.tanakaPart.exec(part);
|
|
1449
|
+
if (partMatches && partMatches.groups && partMatches.length > 0) {
|
|
1450
|
+
const baseForm = partMatches.groups["base"];
|
|
1451
|
+
if (baseForm) {
|
|
1461
1452
|
const examplePart = { baseForm };
|
|
1462
1453
|
const reading = partMatches.groups["reading"];
|
|
1463
1454
|
const glossNumber = partMatches.groups["glossnum"];
|
|
@@ -1465,9 +1456,8 @@ async function convertTanakaCorpus(tanakaString, generateFurigana) {
|
|
|
1465
1456
|
if (reading)
|
|
1466
1457
|
if (regexps.tanakaReferenceID.test(reading)) {
|
|
1467
1458
|
const referenceID = regexps.tanakaReferenceID.exec(reading);
|
|
1468
|
-
if (
|
|
1469
|
-
|
|
1470
|
-
examplePart.referenceID = referenceID.groups["entryid"];
|
|
1459
|
+
if (referenceID && referenceID.groups && referenceID.groups["entryid"])
|
|
1460
|
+
examplePart.referenceID = referenceID.groups["entryid"];
|
|
1471
1461
|
} else examplePart.reading = reading;
|
|
1472
1462
|
if (glossNumber)
|
|
1473
1463
|
examplePart.glossNumber = glossNumber.startsWith("0") ? Number.parseInt(glossNumber.substring(1)) : Number.parseInt(glossNumber);
|
|
@@ -1476,50 +1466,60 @@ async function convertTanakaCorpus(tanakaString, generateFurigana) {
|
|
|
1476
1466
|
examplePart.edited = true;
|
|
1477
1467
|
examplePart.baseForm = examplePart.baseForm.replace("~", "");
|
|
1478
1468
|
}
|
|
1479
|
-
|
|
1480
|
-
});
|
|
1481
|
-
const phrase = aParts[0];
|
|
1482
|
-
const translation = aParts[1];
|
|
1483
|
-
if (phrase && translation) {
|
|
1484
|
-
let furigana = void 0;
|
|
1485
|
-
if (convert !== null && !phrase.includes("\u30FB"))
|
|
1486
|
-
furigana = await convert(phrase, {
|
|
1487
|
-
to: "hiragana",
|
|
1488
|
-
mode: "furigana"
|
|
1489
|
-
});
|
|
1490
|
-
tanakaArray.push({
|
|
1491
|
-
id: idMatch.groups["id"].trim(),
|
|
1492
|
-
phrase: phrase.trim(),
|
|
1493
|
-
translation: translation.trim(),
|
|
1494
|
-
parts: bParts,
|
|
1495
|
-
...furigana ? { furigana } : {}
|
|
1496
|
-
});
|
|
1469
|
+
bParts.push(examplePart);
|
|
1497
1470
|
}
|
|
1498
1471
|
}
|
|
1499
1472
|
}
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
|
|
1503
|
-
|
|
1473
|
+
const phrase = aParts[0];
|
|
1474
|
+
const translation = aParts[1];
|
|
1475
|
+
if (phrase && translation)
|
|
1476
|
+
tanakaArray.push({
|
|
1477
|
+
id: idMatch.groups["id"].trim(),
|
|
1478
|
+
phrase: phrase.trim(),
|
|
1479
|
+
translation: translation.trim(),
|
|
1480
|
+
parts: bParts
|
|
1481
|
+
});
|
|
1504
1482
|
}
|
|
1505
1483
|
}
|
|
1484
|
+
}
|
|
1485
|
+
return tanakaArray;
|
|
1486
|
+
}
|
|
1487
|
+
async function convertTanakaCorpusWithFurigana(tanakaString) {
|
|
1488
|
+
return new Promise(
|
|
1489
|
+
async (resolve) => {
|
|
1490
|
+
let tanakaArray = convertTanakaCorpus(tanakaString);
|
|
1491
|
+
const kuroshiro = new Kuroshiro.default();
|
|
1492
|
+
await kuroshiro.init(new KuromojiAnalyzer());
|
|
1493
|
+
const convert = kuroshiro.convert.bind(kuroshiro);
|
|
1494
|
+
tanakaArray = await Promise.all(
|
|
1495
|
+
tanakaArray.map(async (ex) => {
|
|
1496
|
+
let furigana = void 0;
|
|
1497
|
+
if (convert !== null && !ex.phrase.includes("\u30FB"))
|
|
1498
|
+
furigana = await convert(ex.phrase, {
|
|
1499
|
+
to: "hiragana",
|
|
1500
|
+
mode: "furigana"
|
|
1501
|
+
});
|
|
1502
|
+
if (furigana) ex.furigana = furigana;
|
|
1503
|
+
return ex;
|
|
1504
|
+
})
|
|
1505
|
+
);
|
|
1506
|
+
resolve(tanakaArray);
|
|
1507
|
+
}
|
|
1506
1508
|
);
|
|
1507
1509
|
}
|
|
1508
1510
|
function convertRadkFile(radkBuffer, kanjiDic) {
|
|
1509
|
-
|
|
1510
|
-
|
|
1511
|
-
|
|
1512
|
-
|
|
1513
|
-
|
|
1514
|
-
|
|
1515
|
-
|
|
1516
|
-
|
|
1517
|
-
|
|
1518
|
-
|
|
1519
|
-
|
|
1520
|
-
|
|
1521
|
-
let kanjiLine = fileParsed[j];
|
|
1522
|
-
if (!kanjiLine) continue;
|
|
1511
|
+
const fileParsed = iconv.decode(radkBuffer, "euc-jp").split("\n").filter((line) => !line.startsWith("#"));
|
|
1512
|
+
const radicals = [];
|
|
1513
|
+
for (let i = 0; i < fileParsed.length; i++) {
|
|
1514
|
+
const line = fileParsed[i];
|
|
1515
|
+
if (line && line.startsWith("$ ")) {
|
|
1516
|
+
const radical = {
|
|
1517
|
+
radical: line.charAt(2).trim(),
|
|
1518
|
+
strokes: line.substring(4).trim()
|
|
1519
|
+
};
|
|
1520
|
+
let j = i + 1;
|
|
1521
|
+
let kanjiLine = fileParsed[j];
|
|
1522
|
+
if (kanjiLine) {
|
|
1523
1523
|
const kanjiList = [];
|
|
1524
1524
|
while (kanjiLine && !kanjiLine.startsWith("$ ")) {
|
|
1525
1525
|
const kanjis = kanjiLine.split("");
|
|
@@ -1527,8 +1527,9 @@ function convertRadkFile(radkBuffer, kanjiDic) {
|
|
|
1527
1527
|
const foundKanji = kanjiDic.find(
|
|
1528
1528
|
(dictKanji) => dictKanji.kanji === kanji
|
|
1529
1529
|
);
|
|
1530
|
-
|
|
1531
|
-
|
|
1530
|
+
let kanjiObj = { kanji };
|
|
1531
|
+
if (foundKanji) kanjiObj = foundKanji;
|
|
1532
|
+
kanjiList.push(kanjiObj);
|
|
1532
1533
|
}
|
|
1533
1534
|
j++;
|
|
1534
1535
|
kanjiLine = fileParsed[j];
|
|
@@ -1540,133 +1541,116 @@ function convertRadkFile(radkBuffer, kanjiDic) {
|
|
|
1540
1541
|
radicals.push(radical);
|
|
1541
1542
|
}
|
|
1542
1543
|
}
|
|
1543
|
-
fileParsed.length = 0;
|
|
1544
|
-
return radicals;
|
|
1545
|
-
} catch (err) {
|
|
1546
|
-
throw err;
|
|
1547
1544
|
}
|
|
1545
|
+
return radicals;
|
|
1548
1546
|
}
|
|
1549
1547
|
function convertKradFile(kradBuffer, kanjiDic, katakanaList) {
|
|
1550
|
-
|
|
1551
|
-
|
|
1552
|
-
|
|
1553
|
-
|
|
1554
|
-
if (line.length === 0) continue;
|
|
1548
|
+
const fileParsed = iconv.decode(kradBuffer, "euc-jp").split("\n").filter((line) => !line.startsWith("#"));
|
|
1549
|
+
const kanjiWithRadicals = [];
|
|
1550
|
+
for (const line of fileParsed) {
|
|
1551
|
+
if (line.length > 0) {
|
|
1555
1552
|
const split = line.split(" : ");
|
|
1556
1553
|
const kanjiChar = split[0];
|
|
1557
1554
|
const radicalsRow = split[1];
|
|
1558
|
-
if (
|
|
1559
|
-
|
|
1560
|
-
|
|
1561
|
-
|
|
1562
|
-
|
|
1563
|
-
|
|
1564
|
-
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
|
|
1568
|
-
);
|
|
1569
|
-
if (!foundRadical) {
|
|
1570
|
-
const katakanaChar = katakanaList.find(
|
|
1571
|
-
(kana) => kana.kana === radical
|
|
1555
|
+
if (kanjiChar && radicalsRow) {
|
|
1556
|
+
const kanji = {
|
|
1557
|
+
kanji: kanjiChar,
|
|
1558
|
+
radicals: []
|
|
1559
|
+
};
|
|
1560
|
+
if (kanji.kanji.length === 1) {
|
|
1561
|
+
const radicals = radicalsRow.split(" ");
|
|
1562
|
+
for (const radical of radicals) {
|
|
1563
|
+
const foundRadical = kanjiDic.find(
|
|
1564
|
+
(dictKanji) => dictKanji.kanji === radical
|
|
1572
1565
|
);
|
|
1573
|
-
|
|
1574
|
-
foundRadical
|
|
1575
|
-
|
|
1576
|
-
|
|
1577
|
-
|
|
1578
|
-
|
|
1579
|
-
|
|
1580
|
-
|
|
1581
|
-
|
|
1582
|
-
|
|
1583
|
-
|
|
1584
|
-
|
|
1585
|
-
|
|
1586
|
-
|
|
1566
|
+
let radicalObj = foundRadical ?? { kanji: radical };
|
|
1567
|
+
if (!foundRadical) {
|
|
1568
|
+
const katakanaChar = katakanaList.find(
|
|
1569
|
+
(kana) => kana.kana === radical
|
|
1570
|
+
);
|
|
1571
|
+
if (!katakanaChar) continue;
|
|
1572
|
+
radicalObj = {
|
|
1573
|
+
kanji: katakanaChar.kana,
|
|
1574
|
+
readingMeaning: [
|
|
1575
|
+
{
|
|
1576
|
+
groups: [
|
|
1577
|
+
{
|
|
1578
|
+
readings: [
|
|
1579
|
+
{ reading: katakanaChar.kana, type: "ja_on" }
|
|
1580
|
+
],
|
|
1581
|
+
meanings: [katakanaChar.reading]
|
|
1582
|
+
}
|
|
1583
|
+
]
|
|
1584
|
+
}
|
|
1585
|
+
]
|
|
1586
|
+
};
|
|
1587
|
+
}
|
|
1588
|
+
kanji.radicals.push(radicalObj);
|
|
1587
1589
|
}
|
|
1588
|
-
kanji.radicals.push(foundRadical ? foundRadical : radical);
|
|
1589
1590
|
}
|
|
1591
|
+
if (kanji.kanji.length === 1 && kanji.radicals.length > 0)
|
|
1592
|
+
kanjiWithRadicals.push(kanji);
|
|
1590
1593
|
}
|
|
1591
|
-
if (kanji.kanji.length === 1 && kanji.radicals.length > 0)
|
|
1592
|
-
kanjiWithRadicals.push(kanji);
|
|
1593
1594
|
}
|
|
1594
|
-
fileParsed.length = 0;
|
|
1595
|
-
return kanjiWithRadicals;
|
|
1596
|
-
} catch (err) {
|
|
1597
|
-
throw err;
|
|
1598
1595
|
}
|
|
1599
|
-
|
|
1600
|
-
async function convertJawiktionary(stream) {
|
|
1601
|
-
const rl = createInterface({
|
|
1602
|
-
input: stream,
|
|
1603
|
-
crlfDelay: Infinity
|
|
1604
|
-
});
|
|
1605
|
-
let lineNumber = 0;
|
|
1606
|
-
return await new Promise(
|
|
1607
|
-
async (resolve, reject) => {
|
|
1608
|
-
try {
|
|
1609
|
-
const entries = [];
|
|
1610
|
-
for await (const line of rl) {
|
|
1611
|
-
lineNumber++;
|
|
1612
|
-
let obj = void 0;
|
|
1613
|
-
try {
|
|
1614
|
-
obj = JSON.parse(line.trim());
|
|
1615
|
-
} catch (err) {
|
|
1616
|
-
rl.close();
|
|
1617
|
-
throw new Error(
|
|
1618
|
-
`Invalid JSONL at line ${lineNumber}: ${err.message}`
|
|
1619
|
-
);
|
|
1620
|
-
}
|
|
1621
|
-
if (obj !== void 0 && obj !== null && typeof obj === "object" && obj.lang_code === "ja" && obj.lang === "\u65E5\u672C\u8A9E")
|
|
1622
|
-
entries.push(obj);
|
|
1623
|
-
}
|
|
1624
|
-
rl.close();
|
|
1625
|
-
stream.close();
|
|
1626
|
-
stream.destroy();
|
|
1627
|
-
resolve(entries);
|
|
1628
|
-
} catch (err) {
|
|
1629
|
-
reject(err);
|
|
1630
|
-
}
|
|
1631
|
-
}
|
|
1632
|
-
);
|
|
1596
|
+
return kanjiWithRadicals;
|
|
1633
1597
|
}
|
|
1634
1598
|
function mapEntry(entry) {
|
|
1635
|
-
if (entry.word === void 0 || typeof entry.word !== "string")
|
|
1636
|
-
throw new Error("Invalid ja.wiktionary entry");
|
|
1637
1599
|
return {
|
|
1638
1600
|
word: entry.word,
|
|
1639
|
-
|
|
1640
|
-
|
|
1641
|
-
|
|
1642
|
-
(
|
|
1643
|
-
|
|
1644
|
-
|
|
1645
|
-
|
|
1646
|
-
|
|
1647
|
-
|
|
1648
|
-
|
|
1649
|
-
|
|
1650
|
-
|
|
1651
|
-
|
|
1652
|
-
}))
|
|
1653
|
-
} : {},
|
|
1601
|
+
pos_title: entry.pos_title,
|
|
1602
|
+
senses: entry.senses.filter(
|
|
1603
|
+
(sense) => isValidArray(sense.form_of) && sense.form_of.every(
|
|
1604
|
+
(form) => form.word && typeof form.word === "string"
|
|
1605
|
+
) || isStringArray(sense.glosses)
|
|
1606
|
+
).map((sense) => ({
|
|
1607
|
+
...sense.form_of ? {
|
|
1608
|
+
form_of: sense.form_of.map((form) => ({
|
|
1609
|
+
word: form.word
|
|
1610
|
+
}))
|
|
1611
|
+
} : {},
|
|
1612
|
+
glosses: sense.glosses
|
|
1613
|
+
})),
|
|
1654
1614
|
...isValidArray(entry.forms) && entry.forms.every((form) => typeof form.form === "string") ? { forms: entry.forms.map((form) => ({ form: form.form })) } : {}
|
|
1655
1615
|
};
|
|
1656
1616
|
}
|
|
1617
|
+
function convertJawiktionarySync(buffer) {
|
|
1618
|
+
const lines = buffer.toString("utf-8").split("\n");
|
|
1619
|
+
const entries = [];
|
|
1620
|
+
for (let i = 0; i < lines.length; i++) {
|
|
1621
|
+
const line = lines[i];
|
|
1622
|
+
if (!line) continue;
|
|
1623
|
+
const obj = JSON.parse(line);
|
|
1624
|
+
if (obj && typeof obj === "object" && obj.lang_code === "ja" && obj.lang === "\u65E5\u672C\u8A9E")
|
|
1625
|
+
entries.push(mapEntry(obj));
|
|
1626
|
+
}
|
|
1627
|
+
return entries;
|
|
1628
|
+
}
|
|
1629
|
+
async function convertJawiktionaryAsync(stream) {
|
|
1630
|
+
return new Promise(
|
|
1631
|
+
async (resolve) => {
|
|
1632
|
+
const rl = createInterface({
|
|
1633
|
+
input: stream,
|
|
1634
|
+
crlfDelay: Infinity
|
|
1635
|
+
});
|
|
1636
|
+
const entries = [];
|
|
1637
|
+
for await (const line of rl) {
|
|
1638
|
+
const obj = JSON.parse(line.trim());
|
|
1639
|
+
if (obj && typeof obj === "object" && obj.lang_code === "ja" && obj.lang === "\u65E5\u672C\u8A9E")
|
|
1640
|
+
entries.push(mapEntry(obj));
|
|
1641
|
+
}
|
|
1642
|
+
rl.close();
|
|
1643
|
+
stream.close();
|
|
1644
|
+
stream.destroy();
|
|
1645
|
+
resolve(entries);
|
|
1646
|
+
}
|
|
1647
|
+
);
|
|
1648
|
+
}
|
|
1657
1649
|
function parseEntry(entry, definitions, definitionMap) {
|
|
1658
1650
|
if (isValidArray(entry.senses)) {
|
|
1659
1651
|
for (const sense of entry.senses)
|
|
1660
1652
|
if (isStringArray(sense.glosses)) {
|
|
1661
|
-
|
|
1662
|
-
for (let i = 0; i < sense.glosses.length; i += 2) {
|
|
1663
|
-
if (i !== 0) {
|
|
1664
|
-
let prev = sense.glosses[i - 1];
|
|
1665
|
-
let cur = sense.glosses[i];
|
|
1666
|
-
if (prev && cur)
|
|
1667
|
-
definition += `${prev}${!prev.endsWith("\u3002") ? "\u3002" : ""}${cur}`;
|
|
1668
|
-
} else definition += sense.glosses[i];
|
|
1669
|
-
}
|
|
1653
|
+
const definition = sense.glosses.join("");
|
|
1670
1654
|
if (!definitions.some((def) => def.definition === definition)) {
|
|
1671
1655
|
if (!definitionMap.has(definition))
|
|
1672
1656
|
definitionMap.set(definition, { count: 1 });
|
|
@@ -1676,262 +1660,216 @@ function parseEntry(entry, definitions, definitionMap) {
|
|
|
1676
1660
|
}
|
|
1677
1661
|
}
|
|
1678
1662
|
}
|
|
1679
|
-
|
|
1680
|
-
|
|
1681
|
-
|
|
1682
|
-
|
|
1683
|
-
|
|
1684
|
-
|
|
1685
|
-
|
|
1686
|
-
|
|
1687
|
-
|
|
1688
|
-
|
|
1689
|
-
|
|
1690
|
-
|
|
1691
|
-
|
|
1692
|
-
|
|
1693
|
-
|
|
1694
|
-
|
|
1695
|
-
|
|
1696
|
-
|
|
1697
|
-
|
|
1698
|
-
|
|
1663
|
+
function getWordDefinitions(entryList, jmDict) {
|
|
1664
|
+
var _a, _b, _c, _d, _e;
|
|
1665
|
+
const entries = /* @__PURE__ */ new Map();
|
|
1666
|
+
for (const entry of entryList) {
|
|
1667
|
+
const ent = entries.get(entry.word);
|
|
1668
|
+
if (ent) ent.push(mapEntry(entry));
|
|
1669
|
+
else entries.set(entry.word, [mapEntry(entry)]);
|
|
1670
|
+
}
|
|
1671
|
+
const japaneseDefinitions = [];
|
|
1672
|
+
const definitionMap = /* @__PURE__ */ new Map();
|
|
1673
|
+
const validWords = [];
|
|
1674
|
+
const validReadings = /* @__PURE__ */ new Set();
|
|
1675
|
+
const validKanjiForms = /* @__PURE__ */ new Set();
|
|
1676
|
+
if (jmDict.length > 0)
|
|
1677
|
+
for (const word of jmDict) {
|
|
1678
|
+
let valid = false;
|
|
1679
|
+
for (const r of word.readings)
|
|
1680
|
+
if ((r.notes === void 0 || !r.notes.some((note) => notSearchedForms.has(note)) || r.commonness !== void 0) && !validReadings.has(r.reading)) {
|
|
1681
|
+
validReadings.add(r.reading);
|
|
1682
|
+
if (!valid) valid = true;
|
|
1683
|
+
}
|
|
1684
|
+
if (word.kanjiForms) {
|
|
1685
|
+
for (const kf of word.kanjiForms)
|
|
1686
|
+
if ((kf.notes === void 0 || !kf.notes.some((note) => notSearchedForms.has(note)) || kf.commonness !== void 0) && !validKanjiForms.has(kf.form)) {
|
|
1687
|
+
validKanjiForms.add(kf.form);
|
|
1699
1688
|
if (!valid) valid = true;
|
|
1700
1689
|
}
|
|
1701
|
-
if (word.kanjiForms) {
|
|
1702
|
-
for (const kf of word.kanjiForms)
|
|
1703
|
-
if ((kf.notes === void 0 || !kf.notes.some((note) => notSearchedForms.has(note)) || kf.commonness !== void 0) && !validKanjiForms.has(kf.form)) {
|
|
1704
|
-
validKanjiForms.add(kf.form);
|
|
1705
|
-
if (!valid) valid = true;
|
|
1706
|
-
}
|
|
1707
|
-
}
|
|
1708
|
-
if (valid) validWords.push(word);
|
|
1709
1690
|
}
|
|
1710
|
-
|
|
1711
|
-
|
|
1712
|
-
|
|
1713
|
-
|
|
1714
|
-
|
|
1715
|
-
|
|
1716
|
-
|
|
1717
|
-
|
|
1718
|
-
|
|
1719
|
-
|
|
1720
|
-
|
|
1721
|
-
|
|
1722
|
-
|
|
1723
|
-
|
|
1724
|
-
|
|
1725
|
-
|
|
1726
|
-
|
|
1727
|
-
|
|
1728
|
-
|
|
1729
|
-
|
|
1730
|
-
|
|
1731
|
-
|
|
1732
|
-
|
|
1733
|
-
|
|
1734
|
-
|
|
1735
|
-
|
|
1736
|
-
|
|
1691
|
+
if (valid) validWords.push(word);
|
|
1692
|
+
}
|
|
1693
|
+
const validTitleEntries = /* @__PURE__ */ new Map();
|
|
1694
|
+
const entriesWithFormTitlesGlobal = /* @__PURE__ */ new Map();
|
|
1695
|
+
const entriesWithFormsGlobal = /* @__PURE__ */ new Map();
|
|
1696
|
+
const validFormOfEntries = /* @__PURE__ */ new Set();
|
|
1697
|
+
const validGlossesEntries = /* @__PURE__ */ new Set();
|
|
1698
|
+
const validFormsEntries = /* @__PURE__ */ new Set();
|
|
1699
|
+
const ents = Array.from(entries.values()).flat();
|
|
1700
|
+
for (const entry of ents) {
|
|
1701
|
+
let valid = false;
|
|
1702
|
+
if (validKanjiForms && validKanjiForms.has(entry.word)) {
|
|
1703
|
+
valid = true;
|
|
1704
|
+
if (isValidArray(entry.senses))
|
|
1705
|
+
for (const sense of entry.senses) {
|
|
1706
|
+
if (isValidArray(sense.form_of) && sense.form_of.some(
|
|
1707
|
+
(form) => form.word && typeof form.word === "string" && validReadings.has(form.word)
|
|
1708
|
+
))
|
|
1709
|
+
validFormOfEntries.add(entry.word);
|
|
1710
|
+
else if (isStringArray(sense.glosses)) {
|
|
1711
|
+
for (const gloss of sense.glosses) {
|
|
1712
|
+
let reading = void 0;
|
|
1713
|
+
if (gloss !== void 0) {
|
|
1714
|
+
if (gloss.trim().includes("\u306E\u6F22\u5B57\u8868\u8A18\u3002") || gloss.trim().includes("\u3092\u53C2\u7167\u3002")) {
|
|
1715
|
+
for (const r of validReadings)
|
|
1716
|
+
if (gloss.trim().includes(r)) {
|
|
1717
|
+
reading = r;
|
|
1718
|
+
break;
|
|
1737
1719
|
}
|
|
1738
|
-
}
|
|
1739
|
-
if (reading) validGlossesEntries.add(entry.word);
|
|
1740
1720
|
}
|
|
1741
1721
|
}
|
|
1722
|
+
if (reading) validGlossesEntries.add(entry.word);
|
|
1742
1723
|
}
|
|
1743
|
-
if (isValidArray(entry.forms)) {
|
|
1744
|
-
for (const form of entry.forms)
|
|
1745
|
-
if (form.form && typeof form.form === "string" && validReadings.has(form.form))
|
|
1746
|
-
validFormsEntries.add(entry.word);
|
|
1747
1724
|
}
|
|
1748
|
-
} else if (validReadings.has(entry.word)) {
|
|
1749
|
-
valid = true;
|
|
1750
|
-
const ftEntry = entriesWithFormTitlesGlobal.get(entry.word);
|
|
1751
|
-
if (ftEntry) ftEntry.push(entry);
|
|
1752
|
-
else entriesWithFormTitlesGlobal.set(entry.word, [entry]);
|
|
1753
|
-
}
|
|
1754
|
-
if (valid) {
|
|
1755
|
-
const tEntry = validTitleEntries.get(entry.word);
|
|
1756
|
-
if (tEntry) tEntry.push(entry);
|
|
1757
|
-
else validTitleEntries.set(entry.word, [entry]);
|
|
1758
|
-
}
|
|
1759
|
-
if (isValidArray(entry.forms) && (validKanjiForms.has(entry.word) || validReadings.has(entry.word)) && entry.forms.some(
|
|
1760
|
-
(form) => validKanjiForms.has(form.form) || validReadings.has(form.form)
|
|
1761
|
-
)) {
|
|
1762
|
-
const wfEntry = entriesWithFormsGlobal.get(entry.word);
|
|
1763
|
-
if (wfEntry) wfEntry.push(entry);
|
|
1764
|
-
else entriesWithFormsGlobal.set(entry.word, [entry]);
|
|
1765
1725
|
}
|
|
1726
|
+
if (isValidArray(entry.forms)) {
|
|
1727
|
+
for (const form of entry.forms)
|
|
1728
|
+
if (form.form && typeof form.form === "string" && validReadings.has(form.form))
|
|
1729
|
+
validFormsEntries.add(entry.word);
|
|
1766
1730
|
}
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
const
|
|
1770
|
-
|
|
1771
|
-
|
|
1772
|
-
|
|
1773
|
-
|
|
1774
|
-
|
|
1775
|
-
|
|
1776
|
-
|
|
1777
|
-
|
|
1778
|
-
|
|
1779
|
-
|
|
1780
|
-
|
|
1781
|
-
|
|
1782
|
-
|
|
1783
|
-
|
|
1784
|
-
|
|
1785
|
-
|
|
1786
|
-
|
|
1787
|
-
|
|
1788
|
-
|
|
1789
|
-
|
|
1790
|
-
|
|
1791
|
-
|
|
1792
|
-
|
|
1793
|
-
|
|
1794
|
-
|
|
1795
|
-
|
|
1796
|
-
|
|
1797
|
-
|
|
1798
|
-
|
|
1799
|
-
|
|
1800
|
-
|
|
1801
|
-
|
|
1802
|
-
|
|
1803
|
-
|
|
1804
|
-
|
|
1805
|
-
|
|
1806
|
-
|
|
1807
|
-
|
|
1808
|
-
|
|
1809
|
-
|
|
1810
|
-
|
|
1811
|
-
|
|
1812
|
-
|
|
1813
|
-
|
|
1814
|
-
|
|
1815
|
-
|
|
1816
|
-
|
|
1817
|
-
|
|
1818
|
-
|
|
1819
|
-
|
|
1820
|
-
|
|
1821
|
-
|
|
1822
|
-
|
|
1823
|
-
|
|
1824
|
-
|
|
1825
|
-
|
|
1826
|
-
|
|
1827
|
-
|
|
1828
|
-
|
|
1829
|
-
|
|
1830
|
-
|
|
1831
|
-
|
|
1832
|
-
|
|
1833
|
-
|
|
1834
|
-
|
|
1835
|
-
|
|
1836
|
-
|
|
1837
|
-
|
|
1838
|
-
|
|
1839
|
-
|
|
1840
|
-
|
|
1841
|
-
|
|
1842
|
-
|
|
1843
|
-
|
|
1844
|
-
|
|
1731
|
+
} else if (validReadings.has(entry.word)) {
|
|
1732
|
+
valid = true;
|
|
1733
|
+
const ftEntry = entriesWithFormTitlesGlobal.get(entry.word);
|
|
1734
|
+
if (ftEntry) ftEntry.push(entry);
|
|
1735
|
+
else entriesWithFormTitlesGlobal.set(entry.word, [entry]);
|
|
1736
|
+
}
|
|
1737
|
+
if (valid) {
|
|
1738
|
+
const tEntry = validTitleEntries.get(entry.word);
|
|
1739
|
+
if (tEntry) tEntry.push(entry);
|
|
1740
|
+
else validTitleEntries.set(entry.word, [entry]);
|
|
1741
|
+
}
|
|
1742
|
+
if (isValidArray(entry.forms) && (validKanjiForms.has(entry.word) || validReadings.has(entry.word)) && entry.forms.some(
|
|
1743
|
+
(form) => validKanjiForms.has(form.form) || validReadings.has(form.form)
|
|
1744
|
+
)) {
|
|
1745
|
+
const wfEntry = entriesWithFormsGlobal.get(entry.word);
|
|
1746
|
+
if (wfEntry) wfEntry.push(entry);
|
|
1747
|
+
else entriesWithFormsGlobal.set(entry.word, [entry]);
|
|
1748
|
+
}
|
|
1749
|
+
}
|
|
1750
|
+
ents.length = 0;
|
|
1751
|
+
const posMap = /* @__PURE__ */ new Map();
|
|
1752
|
+
const vte = Array.from(
|
|
1753
|
+
validTitleEntries.values()
|
|
1754
|
+
).flat();
|
|
1755
|
+
const fge = Array.from(
|
|
1756
|
+
entriesWithFormTitlesGlobal.values()
|
|
1757
|
+
).flat();
|
|
1758
|
+
const wfe = Array.from(
|
|
1759
|
+
entriesWithFormsGlobal.values()
|
|
1760
|
+
).flat();
|
|
1761
|
+
for (const pos of [
|
|
1762
|
+
"\u540D\u8A5E",
|
|
1763
|
+
"\u52D5\u8A5E",
|
|
1764
|
+
"\u6210\u53E5",
|
|
1765
|
+
"\u526F\u8A5E",
|
|
1766
|
+
"\u5F62\u5BB9\u52D5\u8A5E",
|
|
1767
|
+
"\u52A9\u8A5E",
|
|
1768
|
+
"\u611F\u52D5\u8A5E",
|
|
1769
|
+
"\u4EE3\u540D\u8A5E",
|
|
1770
|
+
"\u63A5\u5C3E\u8F9E",
|
|
1771
|
+
"\u63A5\u982D\u8A9E",
|
|
1772
|
+
"\u9020\u8A9E\u6210\u5206",
|
|
1773
|
+
"\u7565\u8A9E",
|
|
1774
|
+
"\u56FA\u6709\u540D\u8A5E",
|
|
1775
|
+
"\u4EBA\u79F0\u4EE3\u540D\u8A5E",
|
|
1776
|
+
"\u63A5\u982D\u8F9E",
|
|
1777
|
+
"\u63A5\u7D9A\u52A9\u8A5E",
|
|
1778
|
+
"\u9593\u6295\u8A5E",
|
|
1779
|
+
"\u52A9\u52D5\u8A5E",
|
|
1780
|
+
"\u5F62\u5BB9\u8A5E",
|
|
1781
|
+
"\u7E2E\u7D04\u5F62",
|
|
1782
|
+
"\u63A5\u8F9E",
|
|
1783
|
+
"\u63A5\u7D9A\u8A5E",
|
|
1784
|
+
"\u9023\u4F53\u8A5E",
|
|
1785
|
+
"\u4EBA\u540D",
|
|
1786
|
+
"\u8A18\u53F7",
|
|
1787
|
+
"\u6570\u8A5E",
|
|
1788
|
+
"\u6163\u7528\u53E5",
|
|
1789
|
+
"\u3053\u3068\u308F\u3056",
|
|
1790
|
+
"\u52A9\u6570\u8A5E",
|
|
1791
|
+
"\u82F1\u6570\u5B57\u6DF7\u5408\u8868\u8A18",
|
|
1792
|
+
"\u52D5\u8A5E\u53E5",
|
|
1793
|
+
"\u6210\u8A9E",
|
|
1794
|
+
"\u610F\u7FA9",
|
|
1795
|
+
"\u982D\u5B57\u8A9E",
|
|
1796
|
+
"\u63A5\u5C3E\u8A9E"
|
|
1797
|
+
]) {
|
|
1798
|
+
if (!posMap.has(pos)) posMap.set(pos, {});
|
|
1799
|
+
for (const te of vte)
|
|
1800
|
+
if (te.pos_title === pos || te.pos_title === "\u548C\u8A9E\u306E\u6F22\u5B57\u8868\u8A18") {
|
|
1801
|
+
const posEntries = posMap.get(pos);
|
|
1802
|
+
if (posEntries.title === void 0)
|
|
1803
|
+
posEntries.title = /* @__PURE__ */ new Map();
|
|
1804
|
+
const entryList2 = posEntries.title.get(
|
|
1805
|
+
te.word
|
|
1806
|
+
);
|
|
1807
|
+
if (entryList2) entryList2.push(te);
|
|
1808
|
+
else posEntries.title.set(te.word, [te]);
|
|
1845
1809
|
}
|
|
1846
|
-
|
|
1847
|
-
|
|
1848
|
-
|
|
1849
|
-
|
|
1850
|
-
|
|
1851
|
-
const
|
|
1852
|
-
|
|
1853
|
-
|
|
1854
|
-
|
|
1855
|
-
|
|
1856
|
-
|
|
1857
|
-
|
|
1858
|
-
|
|
1859
|
-
|
|
1860
|
-
|
|
1861
|
-
|
|
1862
|
-
else if (typeof notePos === "string" && !poses.has(notePos))
|
|
1863
|
-
poses.add(notePos);
|
|
1864
|
-
}
|
|
1865
|
-
}
|
|
1866
|
-
}
|
|
1867
|
-
const validWordReadings = new Set(
|
|
1868
|
-
word.readings.filter(
|
|
1869
|
-
(r) => r.notes === void 0 || !r.notes.some((note) => notSearchedForms.has(note)) || r.commonness !== void 0
|
|
1870
|
-
).map((r) => r.reading)
|
|
1810
|
+
for (const ft of fge)
|
|
1811
|
+
if (ft.pos_title === pos) {
|
|
1812
|
+
const posEntries = posMap.get(pos);
|
|
1813
|
+
if (posEntries.formTitle === void 0)
|
|
1814
|
+
posEntries.formTitle = /* @__PURE__ */ new Map();
|
|
1815
|
+
const entryList2 = posEntries.formTitle.get(ft.word);
|
|
1816
|
+
if (entryList2) entryList2.push(ft);
|
|
1817
|
+
else posEntries.formTitle.set(ft.word, [ft]);
|
|
1818
|
+
}
|
|
1819
|
+
for (const wf of wfe)
|
|
1820
|
+
if (wf.pos_title === pos) {
|
|
1821
|
+
const posEntries = posMap.get(pos);
|
|
1822
|
+
if (posEntries.form === void 0)
|
|
1823
|
+
posEntries.form = /* @__PURE__ */ new Map();
|
|
1824
|
+
const entryList2 = posEntries.form.get(
|
|
1825
|
+
wf.word
|
|
1871
1826
|
);
|
|
1872
|
-
|
|
1873
|
-
|
|
1874
|
-
|
|
1875
|
-
|
|
1876
|
-
|
|
1877
|
-
|
|
1878
|
-
|
|
1879
|
-
|
|
1880
|
-
|
|
1881
|
-
|
|
1882
|
-
|
|
1883
|
-
|
|
1884
|
-
|
|
1885
|
-
|
|
1886
|
-
|
|
1887
|
-
|
|
1888
|
-
|
|
1889
|
-
|
|
1890
|
-
|
|
1891
|
-
|
|
1892
|
-
|
|
1893
|
-
|
|
1894
|
-
)
|
|
1895
|
-
);
|
|
1896
|
-
if (fe)
|
|
1897
|
-
entriesWithForms.push(
|
|
1898
|
-
...fe.filter(
|
|
1899
|
-
(ent) => ent.forms && ent.forms.some(
|
|
1900
|
-
(form) => validWordKanjiForms.has(form.form) || validWordReadings.has(form.form)
|
|
1901
|
-
)
|
|
1902
|
-
)
|
|
1903
|
-
);
|
|
1904
|
-
}
|
|
1905
|
-
if (posEntries.title || posEntries.formTitle || posEntries.form)
|
|
1906
|
-
for (const r of validWordReadings) {
|
|
1907
|
-
const te = (_c = posEntries.title) == null ? void 0 : _c.get(r);
|
|
1908
|
-
const fe = (_d = posEntries.form) == null ? void 0 : _d.get(r);
|
|
1909
|
-
const fte = (_e = posEntries.formTitle) == null ? void 0 : _e.get(r);
|
|
1910
|
-
if (te)
|
|
1911
|
-
entriesWithTitles.push(
|
|
1912
|
-
...te.filter(
|
|
1913
|
-
(ent) => ent.forms && validWordKanjiForms && ent.forms.some(
|
|
1914
|
-
(form) => validWordKanjiForms.has(form.form)
|
|
1915
|
-
) || validWordKanjiForms === void 0
|
|
1916
|
-
)
|
|
1917
|
-
);
|
|
1918
|
-
if (fe)
|
|
1919
|
-
entriesWithForms.push(
|
|
1920
|
-
...fe.filter(
|
|
1921
|
-
(ent) => ent.forms && ent.forms.some(
|
|
1922
|
-
(form) => validWordKanjiForms && validWordKanjiForms.has(form.form) || validWordReadings.has(form.form)
|
|
1923
|
-
)
|
|
1924
|
-
)
|
|
1925
|
-
);
|
|
1926
|
-
if (fte) entriesWithFormTitles.push(...fte);
|
|
1927
|
-
}
|
|
1928
|
-
}
|
|
1827
|
+
if (entryList2) entryList2.push(wf);
|
|
1828
|
+
else posEntries.form.set(wf.word, [wf]);
|
|
1829
|
+
}
|
|
1830
|
+
}
|
|
1831
|
+
vte.length = 0;
|
|
1832
|
+
fge.length = 0;
|
|
1833
|
+
wfe.length = 0;
|
|
1834
|
+
const wordEntriesPairs = [];
|
|
1835
|
+
for (const word of validWords) {
|
|
1836
|
+
const poses = /* @__PURE__ */ new Set();
|
|
1837
|
+
for (const m of word.meanings) {
|
|
1838
|
+
if (m.partOfSpeech)
|
|
1839
|
+
for (const note of m.partOfSpeech) {
|
|
1840
|
+
const noteEntry = noteMap.get(note);
|
|
1841
|
+
if (noteEntry && noteEntry.length === 3) {
|
|
1842
|
+
const notePos = noteEntry[2];
|
|
1843
|
+
if (Array.isArray(notePos))
|
|
1844
|
+
for (const pos of notePos) {
|
|
1845
|
+
if (!poses.has(pos)) poses.add(pos);
|
|
1846
|
+
}
|
|
1847
|
+
else if (typeof notePos === "string" && !poses.has(notePos))
|
|
1848
|
+
poses.add(notePos);
|
|
1929
1849
|
}
|
|
1930
|
-
|
|
1850
|
+
}
|
|
1851
|
+
}
|
|
1852
|
+
const validWordReadings = new Set(
|
|
1853
|
+
word.readings.filter(
|
|
1854
|
+
(r) => r.notes === void 0 || !r.notes.some((note) => notSearchedForms.has(note)) || r.commonness !== void 0
|
|
1855
|
+
).map((r) => r.reading)
|
|
1856
|
+
);
|
|
1857
|
+
const validWordKanjiForms = word.kanjiForms ? new Set(
|
|
1858
|
+
word.kanjiForms.filter(
|
|
1859
|
+
(kf) => kf.notes === void 0 || !kf.notes.some((note) => notSearchedForms.has(note)) || kf.commonness !== void 0
|
|
1860
|
+
).map((kf) => kf.form)
|
|
1861
|
+
) : void 0;
|
|
1862
|
+
const entriesWithTitles = [];
|
|
1863
|
+
const entriesWithFormTitles = [];
|
|
1864
|
+
const entriesWithForms = [];
|
|
1865
|
+
if (poses.size > 0)
|
|
1866
|
+
for (const pos of poses) {
|
|
1867
|
+
const posEntries = posMap.get(pos);
|
|
1868
|
+
if (posEntries) {
|
|
1931
1869
|
if (validWordKanjiForms)
|
|
1932
1870
|
for (const kf of validWordKanjiForms) {
|
|
1933
|
-
const te =
|
|
1934
|
-
const fe =
|
|
1871
|
+
const te = (_a = posEntries.title) == null ? void 0 : _a.get(kf);
|
|
1872
|
+
const fe = (_b = posEntries.form) == null ? void 0 : _b.get(kf);
|
|
1935
1873
|
if (te)
|
|
1936
1874
|
entriesWithTitles.push(
|
|
1937
1875
|
...te.filter(
|
|
@@ -1948,9 +1886,9 @@ async function getWordDefinitions(entryList, jmDict, generateFurigana) {
|
|
|
1948
1886
|
);
|
|
1949
1887
|
}
|
|
1950
1888
|
for (const r of validWordReadings) {
|
|
1951
|
-
const te =
|
|
1952
|
-
const fe =
|
|
1953
|
-
const fte =
|
|
1889
|
+
const te = (_c = posEntries.title) == null ? void 0 : _c.get(r);
|
|
1890
|
+
const fe = (_d = posEntries.form) == null ? void 0 : _d.get(r);
|
|
1891
|
+
const fte = (_e = posEntries.formTitle) == null ? void 0 : _e.get(r);
|
|
1954
1892
|
if (te)
|
|
1955
1893
|
entriesWithTitles.push(
|
|
1956
1894
|
...te.filter(
|
|
@@ -1970,148 +1908,197 @@ async function getWordDefinitions(entryList, jmDict, generateFurigana) {
|
|
|
1970
1908
|
if (fte) entriesWithFormTitles.push(...fte);
|
|
1971
1909
|
}
|
|
1972
1910
|
}
|
|
1973
|
-
if (entriesWithTitles.length > 0 && (entriesWithFormTitles.length > 0 || entriesWithForms.length > 0))
|
|
1974
|
-
wordEntriesPairs.push({
|
|
1975
|
-
word,
|
|
1976
|
-
readings: validWordReadings,
|
|
1977
|
-
...validWordKanjiForms ? { kanjiForms: validWordKanjiForms } : {},
|
|
1978
|
-
entriesWithTitles,
|
|
1979
|
-
entriesWithFormTitles,
|
|
1980
|
-
entriesWithForms
|
|
1981
|
-
});
|
|
1982
1911
|
}
|
|
1983
|
-
|
|
1984
|
-
|
|
1985
|
-
const
|
|
1986
|
-
|
|
1987
|
-
|
|
1988
|
-
|
|
1989
|
-
|
|
1990
|
-
|
|
1991
|
-
|
|
1992
|
-
|
|
1993
|
-
|
|
1994
|
-
|
|
1995
|
-
|
|
1996
|
-
|
|
1997
|
-
|
|
1998
|
-
|
|
1999
|
-
|
|
2000
|
-
|
|
2001
|
-
|
|
2002
|
-
|
|
2003
|
-
|
|
2004
|
-
|
|
2005
|
-
|
|
2006
|
-
|
|
2007
|
-
|
|
2008
|
-
|
|
2009
|
-
|
|
2010
|
-
|
|
2011
|
-
|
|
2012
|
-
|
|
2013
|
-
|
|
2014
|
-
|
|
2015
|
-
|
|
2016
|
-
|
|
2017
|
-
|
|
2018
|
-
|
|
2019
|
-
|
|
2020
|
-
|
|
2021
|
-
|
|
1912
|
+
if (entriesWithTitles.length === 0 && entriesWithFormTitles.length === 0 && entriesWithForms.length === 0) {
|
|
1913
|
+
if (validWordKanjiForms)
|
|
1914
|
+
for (const kf of validWordKanjiForms) {
|
|
1915
|
+
const te = validTitleEntries.get(kf);
|
|
1916
|
+
const fe = entriesWithFormsGlobal.get(kf);
|
|
1917
|
+
if (te)
|
|
1918
|
+
entriesWithTitles.push(
|
|
1919
|
+
...te.filter(
|
|
1920
|
+
(ent) => validFormOfEntries.has(ent.word) || validGlossesEntries.has(ent.word) || validFormsEntries.has(ent.word)
|
|
1921
|
+
)
|
|
1922
|
+
);
|
|
1923
|
+
if (fe)
|
|
1924
|
+
entriesWithForms.push(
|
|
1925
|
+
...fe.filter(
|
|
1926
|
+
(ent) => ent.forms && ent.forms.some(
|
|
1927
|
+
(form) => validWordKanjiForms.has(form.form) || validWordReadings.has(form.form)
|
|
1928
|
+
)
|
|
1929
|
+
)
|
|
1930
|
+
);
|
|
1931
|
+
}
|
|
1932
|
+
for (const r of validWordReadings) {
|
|
1933
|
+
const te = validTitleEntries.get(r);
|
|
1934
|
+
const fe = entriesWithFormsGlobal.get(r);
|
|
1935
|
+
const fte = entriesWithFormTitlesGlobal.get(r);
|
|
1936
|
+
if (te)
|
|
1937
|
+
entriesWithTitles.push(
|
|
1938
|
+
...te.filter(
|
|
1939
|
+
(ent) => ent.forms && validWordKanjiForms && ent.forms.some(
|
|
1940
|
+
(form) => validWordKanjiForms.has(form.form)
|
|
1941
|
+
) || validWordKanjiForms === void 0
|
|
1942
|
+
)
|
|
1943
|
+
);
|
|
1944
|
+
if (fe)
|
|
1945
|
+
entriesWithForms.push(
|
|
1946
|
+
...fe.filter(
|
|
1947
|
+
(ent) => ent.forms && ent.forms.some(
|
|
1948
|
+
(form) => validWordKanjiForms && validWordKanjiForms.has(form.form) || validWordReadings.has(form.form)
|
|
1949
|
+
)
|
|
1950
|
+
)
|
|
1951
|
+
);
|
|
1952
|
+
if (fte) entriesWithFormTitles.push(...fte);
|
|
1953
|
+
}
|
|
1954
|
+
}
|
|
1955
|
+
if (entriesWithTitles.length > 0 && (entriesWithFormTitles.length > 0 || entriesWithForms.length > 0))
|
|
1956
|
+
wordEntriesPairs.push({
|
|
1957
|
+
word,
|
|
1958
|
+
readings: validWordReadings,
|
|
1959
|
+
...validWordKanjiForms ? { kanjiForms: validWordKanjiForms } : {},
|
|
1960
|
+
entriesWithTitles,
|
|
1961
|
+
entriesWithFormTitles,
|
|
1962
|
+
entriesWithForms
|
|
1963
|
+
});
|
|
1964
|
+
}
|
|
1965
|
+
for (const pair of wordEntriesPairs) {
|
|
1966
|
+
const definitions = [];
|
|
1967
|
+
const kanjiFormEntries = [];
|
|
1968
|
+
const readingWithFormsEntries = [];
|
|
1969
|
+
const readingEntries = [];
|
|
1970
|
+
const titleFormMap = /* @__PURE__ */ new Map();
|
|
1971
|
+
const refsMap = /* @__PURE__ */ new Map();
|
|
1972
|
+
const readingForms = /* @__PURE__ */ new Set();
|
|
1973
|
+
for (const ent of pair.entriesWithTitles) {
|
|
1974
|
+
const validFormOf = validFormOfEntries.has(ent.word);
|
|
1975
|
+
const validGlosses = validGlossesEntries.has(ent.word);
|
|
1976
|
+
const validForms = validFormsEntries.has(ent.word);
|
|
1977
|
+
if (pair.kanjiForms && pair.kanjiForms.has(ent.word) && (validFormOf || validGlosses || validForms)) {
|
|
1978
|
+
kanjiFormEntries.push(ent);
|
|
1979
|
+
if ((validFormOf || validGlosses) && isValidArray(ent.senses))
|
|
1980
|
+
for (const sense of ent.senses) {
|
|
1981
|
+
if (validFormOf && isValidArray(sense.form_of)) {
|
|
1982
|
+
for (const form of sense.form_of)
|
|
1983
|
+
if (form.word && typeof form.word === "string" && pair.readings.has(form.word)) {
|
|
1984
|
+
const elem = titleFormMap.get(
|
|
1985
|
+
form.word
|
|
1986
|
+
);
|
|
1987
|
+
if (!elem)
|
|
1988
|
+
titleFormMap.set(form.word, /* @__PURE__ */ new Set([ent.word]));
|
|
1989
|
+
else elem.add(ent.word);
|
|
1990
|
+
}
|
|
1991
|
+
} else if (validGlosses && isStringArray(sense.glosses)) {
|
|
1992
|
+
for (const gloss of sense.glosses) {
|
|
1993
|
+
let reading = void 0;
|
|
1994
|
+
if (gloss !== void 0) {
|
|
1995
|
+
if (gloss.trim().includes("\u306E\u6F22\u5B57\u8868\u8A18\u3002") || gloss.trim().includes("\u3092\u53C2\u7167\u3002")) {
|
|
1996
|
+
for (const r of pair.readings)
|
|
1997
|
+
if (gloss.trim().includes(r)) {
|
|
1998
|
+
reading = r;
|
|
1999
|
+
break;
|
|
2022
2000
|
}
|
|
2023
|
-
}
|
|
2024
|
-
if (reading) {
|
|
2025
|
-
const elem = refsMap.get(reading);
|
|
2026
|
-
if (!elem)
|
|
2027
|
-
refsMap.set(reading, /* @__PURE__ */ new Set([ent.word]));
|
|
2028
|
-
else elem.add(ent.word);
|
|
2029
|
-
}
|
|
2030
2001
|
}
|
|
2031
2002
|
}
|
|
2003
|
+
if (reading) {
|
|
2004
|
+
const elem = refsMap.get(reading);
|
|
2005
|
+
if (!elem) refsMap.set(reading, /* @__PURE__ */ new Set([ent.word]));
|
|
2006
|
+
else elem.add(ent.word);
|
|
2007
|
+
}
|
|
2032
2008
|
}
|
|
2033
|
-
if (validForms && isValidArray(ent.forms)) {
|
|
2034
|
-
for (const form of ent.forms)
|
|
2035
|
-
if (form.form && typeof form.form === "string" && pair.readings.has(form.form))
|
|
2036
|
-
readingForms.add(form.form);
|
|
2037
2009
|
}
|
|
2038
|
-
} else if (pair.readings.has(ent.word) && isValidArray(ent.forms) && pair.kanjiForms && ent.forms.some((form) => pair.kanjiForms.has(form.form)))
|
|
2039
|
-
readingWithFormsEntries.push(ent);
|
|
2040
|
-
else if (pair.kanjiForms === void 0 && pair.readings.has(ent.word))
|
|
2041
|
-
readingEntries.push(ent);
|
|
2042
|
-
}
|
|
2043
|
-
for (const entry of pair.entriesWithForms) {
|
|
2044
|
-
const elem = titleFormMap.get(entry.word);
|
|
2045
|
-
if (elem && entry.forms.some((form) => elem.has(form.form)))
|
|
2046
|
-
readingWithFormsEntries.push(entry);
|
|
2047
|
-
}
|
|
2048
|
-
for (const entry of pair.entriesWithFormTitles) {
|
|
2049
|
-
if (readingForms.has(entry.word)) {
|
|
2050
|
-
readingWithFormsEntries.push(entry);
|
|
2051
|
-
continue;
|
|
2052
|
-
}
|
|
2053
|
-
if (pair.kanjiForms) {
|
|
2054
|
-
const ft = refsMap.get(entry.word);
|
|
2055
|
-
if (ft && ft.intersection(pair.kanjiForms).size > 0)
|
|
2056
|
-
readingWithFormsEntries.push(entry);
|
|
2057
2010
|
}
|
|
2011
|
+
if (validForms && isValidArray(ent.forms)) {
|
|
2012
|
+
for (const form of ent.forms)
|
|
2013
|
+
if (form.form && typeof form.form === "string" && pair.readings.has(form.form))
|
|
2014
|
+
readingForms.add(form.form);
|
|
2058
2015
|
}
|
|
2059
|
-
|
|
2060
|
-
|
|
2061
|
-
|
|
2062
|
-
|
|
2063
|
-
|
|
2064
|
-
|
|
2065
|
-
|
|
2066
|
-
|
|
2067
|
-
|
|
2068
|
-
|
|
2069
|
-
|
|
2070
|
-
|
|
2071
|
-
|
|
2072
|
-
|
|
2073
|
-
for (const readingEntry of readingEntries)
|
|
2074
|
-
parseEntry(readingEntry, definitions, definitionMap);
|
|
2075
|
-
if (definitions.length > 0)
|
|
2076
|
-
japaneseDefinitions.push({
|
|
2077
|
-
wordID: pair.word.id,
|
|
2078
|
-
definitions
|
|
2079
|
-
});
|
|
2016
|
+
} else if (pair.readings.has(ent.word) && isValidArray(ent.forms) && pair.kanjiForms && ent.forms.some((form) => pair.kanjiForms.has(form.form)))
|
|
2017
|
+
readingWithFormsEntries.push(ent);
|
|
2018
|
+
else if (pair.kanjiForms === void 0 && pair.readings.has(ent.word))
|
|
2019
|
+
readingEntries.push(ent);
|
|
2020
|
+
}
|
|
2021
|
+
for (const entry of pair.entriesWithForms) {
|
|
2022
|
+
const elem = titleFormMap.get(entry.word);
|
|
2023
|
+
if (elem && entry.forms.some((form) => elem.has(form.form)))
|
|
2024
|
+
readingWithFormsEntries.push(entry);
|
|
2025
|
+
}
|
|
2026
|
+
for (const entry of pair.entriesWithFormTitles) {
|
|
2027
|
+
if (readingForms.has(entry.word)) {
|
|
2028
|
+
readingWithFormsEntries.push(entry);
|
|
2029
|
+
continue;
|
|
2080
2030
|
}
|
|
2081
|
-
|
|
2082
|
-
|
|
2083
|
-
|
|
2084
|
-
|
|
2085
|
-
const pair = japaneseDefinitions[i];
|
|
2086
|
-
for (let j = 0; j < pair.definitions.length; j++) {
|
|
2087
|
-
const defCount = definitionMap.get(
|
|
2088
|
-
pair.definitions[j].definition
|
|
2089
|
-
);
|
|
2090
|
-
if (defCount && defCount.count > 1)
|
|
2091
|
-
pair.definitions[j].mayNotBeAccurate = true;
|
|
2092
|
-
if (convert !== null && !pair.definitions[j].definition.includes("\u30FB"))
|
|
2093
|
-
pair.definitions[j].furigana = await convert(
|
|
2094
|
-
pair.definitions[j].definition,
|
|
2095
|
-
{
|
|
2096
|
-
to: "hiragana",
|
|
2097
|
-
mode: "furigana"
|
|
2098
|
-
}
|
|
2099
|
-
);
|
|
2100
|
-
}
|
|
2101
|
-
japaneseDefinitions[i] = pair;
|
|
2031
|
+
if (pair.kanjiForms) {
|
|
2032
|
+
const ft = refsMap.get(entry.word);
|
|
2033
|
+
if (ft && !ft.isDisjointFrom(pair.kanjiForms))
|
|
2034
|
+
readingWithFormsEntries.push(entry);
|
|
2102
2035
|
}
|
|
2103
|
-
resolve(japaneseDefinitions);
|
|
2104
|
-
} catch (err) {
|
|
2105
|
-
reject(err);
|
|
2106
2036
|
}
|
|
2037
|
+
let parsedReadingWithFormsEntries = false;
|
|
2038
|
+
for (const entry of kanjiFormEntries)
|
|
2039
|
+
if (entry.pos_title === "\u548C\u8A9E\u306E\u6F22\u5B57\u8868\u8A18" && readingWithFormsEntries.length > 0) {
|
|
2040
|
+
if (!parsedReadingWithFormsEntries)
|
|
2041
|
+
parsedReadingWithFormsEntries = true;
|
|
2042
|
+
for (const ref of readingWithFormsEntries)
|
|
2043
|
+
parseEntry(ref, definitions, definitionMap);
|
|
2044
|
+
} else parseEntry(entry, definitions, definitionMap);
|
|
2045
|
+
if (!parsedReadingWithFormsEntries && readingWithFormsEntries.length > 0) {
|
|
2046
|
+
parsedReadingWithFormsEntries = true;
|
|
2047
|
+
for (const ref of readingWithFormsEntries)
|
|
2048
|
+
parseEntry(ref, definitions, definitionMap);
|
|
2049
|
+
}
|
|
2050
|
+
if (readingEntries.length > 0)
|
|
2051
|
+
for (const readingEntry of readingEntries)
|
|
2052
|
+
parseEntry(readingEntry, definitions, definitionMap);
|
|
2053
|
+
if (definitions.length > 0)
|
|
2054
|
+
japaneseDefinitions.push({
|
|
2055
|
+
wordID: pair.word.id,
|
|
2056
|
+
definitions
|
|
2057
|
+
});
|
|
2058
|
+
}
|
|
2059
|
+
for (let i = 0; i < japaneseDefinitions.length; i++) {
|
|
2060
|
+
const pair = japaneseDefinitions[i];
|
|
2061
|
+
for (let j = 0; j < pair.definitions.length; j++) {
|
|
2062
|
+
const defCount = definitionMap.get(
|
|
2063
|
+
pair.definitions[j].definition
|
|
2064
|
+
);
|
|
2065
|
+
if (defCount && defCount.count > 1)
|
|
2066
|
+
pair.definitions[j].mayNotBeAccurate = true;
|
|
2067
|
+
}
|
|
2068
|
+
japaneseDefinitions[i] = pair;
|
|
2069
|
+
}
|
|
2070
|
+
return japaneseDefinitions;
|
|
2071
|
+
}
|
|
2072
|
+
async function getWordDefinitionsWithFurigana(entryList, jmDict) {
|
|
2073
|
+
return new Promise(async (resolve) => {
|
|
2074
|
+
const japaneseDefinitions = getWordDefinitions(
|
|
2075
|
+
entryList,
|
|
2076
|
+
jmDict
|
|
2077
|
+
);
|
|
2078
|
+
const kuroshiro = new Kuroshiro.default();
|
|
2079
|
+
await kuroshiro.init(new KuromojiAnalyzer());
|
|
2080
|
+
const convert = kuroshiro.convert.bind(kuroshiro);
|
|
2081
|
+
for (let i = 0; i < japaneseDefinitions.length; i++) {
|
|
2082
|
+
const pair = japaneseDefinitions[i];
|
|
2083
|
+
for (let j = 0; j < pair.definitions.length; j++)
|
|
2084
|
+
if (!pair.definitions[j].definition.includes("\u30FB"))
|
|
2085
|
+
pair.definitions[j].furigana = await convert(
|
|
2086
|
+
pair.definitions[j].definition,
|
|
2087
|
+
{
|
|
2088
|
+
to: "hiragana",
|
|
2089
|
+
mode: "furigana"
|
|
2090
|
+
}
|
|
2091
|
+
);
|
|
2092
|
+
japaneseDefinitions[i] = pair;
|
|
2093
|
+
}
|
|
2094
|
+
resolve(japaneseDefinitions);
|
|
2107
2095
|
});
|
|
2108
2096
|
}
|
|
2109
|
-
function lookupWordNote(key, notes, tags
|
|
2097
|
+
function lookupWordNote(key, notes, tags) {
|
|
2110
2098
|
const info = noteMap.get(key.toLowerCase());
|
|
2111
2099
|
if (!info) {
|
|
2112
|
-
if (
|
|
2113
|
-
|
|
2114
|
-
return { note: fallback ?? key };
|
|
2100
|
+
if (notes) notes.push(key);
|
|
2101
|
+
return { note: key };
|
|
2115
2102
|
}
|
|
2116
2103
|
const tag = `word::${info[0]}`;
|
|
2117
2104
|
if (tags && !tags.includes(tag)) tags.push(tag);
|
|
@@ -2122,68 +2109,65 @@ var wordAddNoteArray = (arr, cb) => {
|
|
|
2122
2109
|
if (!arr) return;
|
|
2123
2110
|
for (const v of arr) cb(v);
|
|
2124
2111
|
};
|
|
2125
|
-
function getWord(
|
|
2112
|
+
function getWord(word, dict, kanjiDic, examples, definitions, noteTypeName, deckPath) {
|
|
2126
2113
|
var _a;
|
|
2127
|
-
|
|
2128
|
-
|
|
2129
|
-
|
|
2130
|
-
|
|
2131
|
-
|
|
2132
|
-
|
|
2133
|
-
|
|
2134
|
-
|
|
2135
|
-
|
|
2136
|
-
|
|
2137
|
-
|
|
2138
|
-
|
|
2139
|
-
|
|
2140
|
-
|
|
2141
|
-
|
|
2142
|
-
|
|
2143
|
-
|
|
2144
|
-
|
|
2145
|
-
|
|
2146
|
-
|
|
2147
|
-
|
|
2148
|
-
|
|
2149
|
-
|
|
2150
|
-
|
|
2151
|
-
note,
|
|
2152
|
-
void 0,
|
|
2153
|
-
word.tags,
|
|
2154
|
-
false,
|
|
2155
|
-
note
|
|
2156
|
-
);
|
|
2157
|
-
return capitalizeString(noteAndTag.note ?? note);
|
|
2158
|
-
})
|
|
2159
|
-
} : {},
|
|
2160
|
-
...dictKanjiForm.commonness && dictKanjiForm.commonness.length > 0 ? { common: true } : {}
|
|
2161
|
-
})
|
|
2162
|
-
);
|
|
2163
|
-
word.readings = dictWord.readings.map((dictReading) => ({
|
|
2164
|
-
reading: dictReading.reading,
|
|
2165
|
-
...dictReading.kanjiFormRestrictions || dictReading.notes ? {
|
|
2166
|
-
notes: [
|
|
2167
|
-
...dictReading.kanjiFormRestrictions ? dictReading.kanjiFormRestrictions.map(
|
|
2168
|
-
(restriction) => `Reading restricted to ${restriction}`
|
|
2169
|
-
) : [],
|
|
2170
|
-
...dictReading.notes ? dictReading.notes.map((note) => {
|
|
2114
|
+
let dictWord = void 0;
|
|
2115
|
+
if (typeof word === "string" && dict)
|
|
2116
|
+
dictWord = dict.find((entry) => entry.id === word);
|
|
2117
|
+
else if (typeof word === "object") dictWord = word;
|
|
2118
|
+
if (dictWord) {
|
|
2119
|
+
const word2 = {
|
|
2120
|
+
id: dictWord.id,
|
|
2121
|
+
readings: [],
|
|
2122
|
+
translations: [],
|
|
2123
|
+
noteID: `word_${dictWord.id}`,
|
|
2124
|
+
noteTypeName,
|
|
2125
|
+
deckPath,
|
|
2126
|
+
tags: []
|
|
2127
|
+
};
|
|
2128
|
+
if (dictWord.isCommon === true) {
|
|
2129
|
+
word2.common = true;
|
|
2130
|
+
word2.tags.push("word::common");
|
|
2131
|
+
}
|
|
2132
|
+
if (dictWord.kanjiForms)
|
|
2133
|
+
word2.kanjiForms = dictWord.kanjiForms.map(
|
|
2134
|
+
(dictKanjiForm) => ({
|
|
2135
|
+
kanjiForm: dictKanjiForm.form,
|
|
2136
|
+
...dictKanjiForm.notes ? {
|
|
2137
|
+
notes: dictKanjiForm.notes.map((note) => {
|
|
2171
2138
|
const noteAndTag = lookupWordNote(
|
|
2172
2139
|
note,
|
|
2173
2140
|
void 0,
|
|
2174
|
-
|
|
2175
|
-
false,
|
|
2176
|
-
note
|
|
2141
|
+
word2.tags
|
|
2177
2142
|
);
|
|
2178
|
-
return capitalizeString(noteAndTag.note
|
|
2179
|
-
})
|
|
2180
|
-
|
|
2181
|
-
|
|
2182
|
-
|
|
2183
|
-
|
|
2184
|
-
|
|
2185
|
-
|
|
2186
|
-
|
|
2143
|
+
return capitalizeString(noteAndTag.note);
|
|
2144
|
+
})
|
|
2145
|
+
} : {},
|
|
2146
|
+
...dictKanjiForm.commonness && dictKanjiForm.commonness.length > 0 ? { common: true } : {}
|
|
2147
|
+
})
|
|
2148
|
+
);
|
|
2149
|
+
word2.readings = dictWord.readings.map((dictReading) => ({
|
|
2150
|
+
reading: dictReading.reading,
|
|
2151
|
+
...dictReading.kanjiFormRestrictions || dictReading.notes ? {
|
|
2152
|
+
notes: [
|
|
2153
|
+
...dictReading.kanjiFormRestrictions ? dictReading.kanjiFormRestrictions.map(
|
|
2154
|
+
(restriction) => `Reading restricted to ${restriction}`
|
|
2155
|
+
) : [],
|
|
2156
|
+
...dictReading.notes ? dictReading.notes.map((note) => {
|
|
2157
|
+
const noteAndTag = lookupWordNote(
|
|
2158
|
+
note,
|
|
2159
|
+
void 0,
|
|
2160
|
+
word2.tags
|
|
2161
|
+
);
|
|
2162
|
+
return capitalizeString(noteAndTag.note);
|
|
2163
|
+
}) : []
|
|
2164
|
+
]
|
|
2165
|
+
} : {},
|
|
2166
|
+
...dictReading.commonness && dictReading.commonness.length > 0 ? { common: true } : {}
|
|
2167
|
+
}));
|
|
2168
|
+
word2.translations = [];
|
|
2169
|
+
for (const dictMeaning of dictWord.meanings)
|
|
2170
|
+
if (dictMeaning.translations) {
|
|
2187
2171
|
const translationTypes = [];
|
|
2188
2172
|
const translations = dictMeaning.translations.map(
|
|
2189
2173
|
(translation) => {
|
|
@@ -2191,13 +2175,13 @@ function getWord(dict, id, kanjiDic, examples, definitions, dictWord, noteTypeNa
|
|
|
2191
2175
|
else {
|
|
2192
2176
|
if (translation.type === "lit") {
|
|
2193
2177
|
translationTypes.push("Literal meaning");
|
|
2194
|
-
|
|
2178
|
+
word2.tags.push("word::literal_meaning");
|
|
2195
2179
|
} else if (translation.type === "expl") {
|
|
2196
2180
|
translationTypes.push("Explanation");
|
|
2197
|
-
|
|
2181
|
+
word2.tags.push("word::explanation");
|
|
2198
2182
|
} else if (translation.type === "tm") {
|
|
2199
2183
|
translationTypes.push("Trademark");
|
|
2200
|
-
|
|
2184
|
+
word2.tags.push("word::trademark");
|
|
2201
2185
|
}
|
|
2202
2186
|
return translation.translation;
|
|
2203
2187
|
}
|
|
@@ -2215,15 +2199,15 @@ function getWord(dict, id, kanjiDic, examples, definitions, dictWord, noteTypeNa
|
|
|
2215
2199
|
for (const t of translationTypes) notes.push(t);
|
|
2216
2200
|
wordAddNoteArray(
|
|
2217
2201
|
dictMeaning.partOfSpeech,
|
|
2218
|
-
(pos) => lookupWordNote(pos, notes,
|
|
2202
|
+
(pos) => lookupWordNote(pos, notes, word2.tags)
|
|
2219
2203
|
);
|
|
2220
2204
|
wordAddNoteArray(
|
|
2221
2205
|
dictMeaning.fields,
|
|
2222
|
-
(field) => lookupWordNote(field, notes,
|
|
2206
|
+
(field) => lookupWordNote(field, notes, word2.tags)
|
|
2223
2207
|
);
|
|
2224
2208
|
wordAddNoteArray(
|
|
2225
2209
|
dictMeaning.dialects,
|
|
2226
|
-
(dialect) => lookupWordNote(dialect, notes,
|
|
2210
|
+
(dialect) => lookupWordNote(dialect, notes, word2.tags)
|
|
2227
2211
|
);
|
|
2228
2212
|
wordAddNoteArray(
|
|
2229
2213
|
dictMeaning.antonyms,
|
|
@@ -2235,64 +2219,64 @@ function getWord(dict, id, kanjiDic, examples, definitions, dictWord, noteTypeNa
|
|
|
2235
2219
|
);
|
|
2236
2220
|
wordAddNoteArray(
|
|
2237
2221
|
dictMeaning.info,
|
|
2238
|
-
(info) => lookupWordNote(info, notes,
|
|
2222
|
+
(info) => lookupWordNote(info, notes, word2.tags)
|
|
2239
2223
|
);
|
|
2240
2224
|
wordAddNoteArray(
|
|
2241
2225
|
dictMeaning.misc,
|
|
2242
|
-
(misc) => lookupWordNote(misc, notes,
|
|
2226
|
+
(misc) => lookupWordNote(misc, notes, word2.tags)
|
|
2243
2227
|
);
|
|
2244
2228
|
for (let i = 0; i < notes.length; i++)
|
|
2245
2229
|
notes[i] = capitalizeString(notes[i]);
|
|
2246
|
-
|
|
2230
|
+
word2.translations.push({
|
|
2247
2231
|
translation: translations.join("; "),
|
|
2248
2232
|
notes
|
|
2249
|
-
};
|
|
2250
|
-
});
|
|
2251
|
-
if (dictWord.usuallyInKana === true) {
|
|
2252
|
-
word.usuallyInKana = true;
|
|
2253
|
-
word.tags.push("word::usually_in_kana_for_all_senses");
|
|
2233
|
+
});
|
|
2254
2234
|
}
|
|
2255
|
-
|
|
2256
|
-
|
|
2257
|
-
|
|
2258
|
-
|
|
2259
|
-
|
|
2260
|
-
|
|
2261
|
-
|
|
2262
|
-
|
|
2263
|
-
|
|
2264
|
-
|
|
2265
|
-
|
|
2266
|
-
|
|
2267
|
-
|
|
2268
|
-
|
|
2269
|
-
|
|
2270
|
-
|
|
2271
|
-
|
|
2272
|
-
|
|
2273
|
-
|
|
2274
|
-
word.tags.push("word::no_kanji");
|
|
2235
|
+
if (dictWord.usuallyInKana === true) {
|
|
2236
|
+
word2.usuallyInKana = true;
|
|
2237
|
+
word2.tags.push("word::usually_in_kana_for_all_senses");
|
|
2238
|
+
}
|
|
2239
|
+
if (kanjiDic && word2.kanjiForms) {
|
|
2240
|
+
const kanji = [];
|
|
2241
|
+
for (const kanjiForm of word2.kanjiForms)
|
|
2242
|
+
for (const char of kanjiForm.kanjiForm) {
|
|
2243
|
+
if (!regexps.kanji.test(char) || kanji.some((kanji2) => kanji2.kanji === char))
|
|
2244
|
+
continue;
|
|
2245
|
+
const kanjiObj = getKanji(
|
|
2246
|
+
char,
|
|
2247
|
+
Array.isArray(kanjiDic) ? kanjiDic : kanjiDic.get(char)
|
|
2248
|
+
);
|
|
2249
|
+
if (kanjiObj)
|
|
2250
|
+
kanji.push({
|
|
2251
|
+
kanji: kanjiObj.kanji,
|
|
2252
|
+
...kanjiObj.meanings && kanjiObj.meanings.length > 0 ? { meanings: kanjiObj.meanings } : {}
|
|
2253
|
+
});
|
|
2275
2254
|
}
|
|
2276
|
-
|
|
2277
|
-
|
|
2255
|
+
if (kanji.length > 0) word2.kanji = kanji;
|
|
2256
|
+
}
|
|
2257
|
+
if (dictWord.hasPhrases === true && examples) {
|
|
2258
|
+
const exampleList = Array.isArray(
|
|
2259
|
+
examples
|
|
2260
|
+
) ? examples : examples.get(word2.id);
|
|
2261
|
+
if (exampleList) {
|
|
2278
2262
|
const readings = new Set(
|
|
2279
|
-
|
|
2263
|
+
word2.readings.filter(
|
|
2280
2264
|
(reading) => (reading.notes === void 0 || !reading.notes.some(
|
|
2281
2265
|
(note) => notSearchedForms.has(note)
|
|
2282
|
-
)) && (
|
|
2266
|
+
)) && (word2.common === void 0 || reading.common === true)
|
|
2283
2267
|
).map((reading) => reading.reading)
|
|
2284
2268
|
);
|
|
2285
|
-
const existValidKf =
|
|
2269
|
+
const existValidKf = word2.kanjiForms && word2.kanjiForms.length > 0 ? word2.kanjiForms.some(
|
|
2286
2270
|
(kf) => (kf.notes === void 0 || !kf.notes.some(
|
|
2287
2271
|
(note) => notSearchedForms.has(note)
|
|
2288
|
-
)) && (
|
|
2272
|
+
)) && (word2.common === void 0 || kf.common === true)
|
|
2289
2273
|
) : void 0;
|
|
2290
|
-
const kanjiForms =
|
|
2291
|
-
|
|
2274
|
+
const kanjiForms = word2.kanjiForms && word2.kanjiForms.length > 0 ? new Set(
|
|
2275
|
+
word2.kanjiForms.filter((kanjiForm) => {
|
|
2292
2276
|
if (existValidKf === true)
|
|
2293
2277
|
return (kanjiForm.notes === void 0 || !kanjiForm.notes.some(
|
|
2294
2278
|
(note) => notSearchedForms.has(note)
|
|
2295
|
-
)) && (
|
|
2279
|
+
)) && (word2.common === void 0 || kanjiForm.common === true);
|
|
2296
2280
|
else return true;
|
|
2297
2281
|
}).map((kanjiForm) => kanjiForm.kanjiForm)
|
|
2298
2282
|
) : void 0;
|
|
@@ -2300,12 +2284,12 @@ function getWord(dict, id, kanjiDic, examples, definitions, dictWord, noteTypeNa
|
|
|
2300
2284
|
const readingMatchingKanjiFormExamples = [];
|
|
2301
2285
|
const readingExamples = [];
|
|
2302
2286
|
const readingMatchingKanjiForms = /* @__PURE__ */ new Set();
|
|
2303
|
-
for (const example of
|
|
2287
|
+
for (const example of exampleList)
|
|
2304
2288
|
for (let i = 0; i < example.parts.length; i++) {
|
|
2305
2289
|
const part = example.parts[i];
|
|
2306
2290
|
const readingAsReadingMatch = part.reading !== void 0 && readings.has(part.reading);
|
|
2307
2291
|
const readingAsInflectedFormMatch = part.inflectedForm !== void 0 && readings.has(part.inflectedForm);
|
|
2308
|
-
const referenceIDMatch = part.referenceID !== void 0 &&
|
|
2292
|
+
const referenceIDMatch = part.referenceID !== void 0 && word2.id !== void 0 && part.referenceID === word2.id;
|
|
2309
2293
|
if (kanjiForms && kanjiForms.has(part.baseForm) || referenceIDMatch) {
|
|
2310
2294
|
if (readingAsReadingMatch || readingAsInflectedFormMatch) {
|
|
2311
2295
|
readingMatchingKanjiFormExamples.push({
|
|
@@ -2331,7 +2315,7 @@ function getWord(dict, id, kanjiDic, examples, definitions, dictWord, noteTypeNa
|
|
|
2331
2315
|
kanjiFormExamples = kanjiFormExamples.filter(
|
|
2332
2316
|
(ex) => ex.form && readingMatchingKanjiForms.has(ex.form)
|
|
2333
2317
|
);
|
|
2334
|
-
const includeKanjiFormExamples =
|
|
2318
|
+
const includeKanjiFormExamples = word2.kanjiForms !== void 0;
|
|
2335
2319
|
let wordExamples = [
|
|
2336
2320
|
...includeKanjiFormExamples ? [...readingMatchingKanjiFormExamples, ...kanjiFormExamples] : [],
|
|
2337
2321
|
...!includeKanjiFormExamples ? readingExamples : []
|
|
@@ -2339,14 +2323,14 @@ function getWord(dict, id, kanjiDic, examples, definitions, dictWord, noteTypeNa
|
|
|
2339
2323
|
readingMatchingKanjiForms.clear();
|
|
2340
2324
|
const glossSpecificExamples = [];
|
|
2341
2325
|
const seenPhrases = /* @__PURE__ */ new Set();
|
|
2342
|
-
for (let i = 0; i <
|
|
2326
|
+
for (let i = 0; i < word2.translations.length; i++) {
|
|
2343
2327
|
outer: for (const example of wordExamples) {
|
|
2344
2328
|
if (seenPhrases.has(example.ex.phrase)) continue;
|
|
2345
2329
|
for (let j = 0; j < example.ex.parts.length; j++) {
|
|
2346
2330
|
const part = example.ex.parts[j];
|
|
2347
2331
|
if (j === example.partIndex && part.glossNumber === i + 1) {
|
|
2348
2332
|
example.ex.glossNumber = {
|
|
2349
|
-
wordId:
|
|
2333
|
+
wordId: word2.id,
|
|
2350
2334
|
glossNumber: i + 1
|
|
2351
2335
|
};
|
|
2352
2336
|
glossSpecificExamples.push(example);
|
|
@@ -2367,284 +2351,194 @@ function getWord(dict, id, kanjiDic, examples, definitions, dictWord, noteTypeNa
|
|
|
2367
2351
|
).slice(0, 5 - glossSpecificExamples.length)
|
|
2368
2352
|
];
|
|
2369
2353
|
if (wordExamples.length > 0) {
|
|
2370
|
-
|
|
2354
|
+
word2.phrases = (wordExamples.length > 5 ? wordExamples.slice(0, 5) : wordExamples).map((ex) => ({
|
|
2371
2355
|
phrase: ex.ex.furigana ?? ex.ex.phrase,
|
|
2372
2356
|
translation: ex.ex.translation,
|
|
2373
2357
|
originalPhrase: ex.ex.phrase,
|
|
2374
2358
|
...ex.ex.glossNumber ? { glossNumber: ex.ex.glossNumber } : {}
|
|
2375
2359
|
}));
|
|
2376
|
-
|
|
2360
|
+
word2.tags.push("word::has_phrases");
|
|
2377
2361
|
if (glossSpecificExamples.length > 0)
|
|
2378
|
-
|
|
2362
|
+
word2.tags.push("word::has_meaning-specific_phrases");
|
|
2379
2363
|
}
|
|
2380
2364
|
}
|
|
2381
|
-
|
|
2382
|
-
|
|
2383
|
-
|
|
2384
|
-
|
|
2385
|
-
|
|
2386
|
-
|
|
2387
|
-
|
|
2388
|
-
} else throw new Error(`Word${id ? ` ${id}` : ""} not found`);
|
|
2389
|
-
} catch (err) {
|
|
2390
|
-
throw err;
|
|
2391
|
-
}
|
|
2365
|
+
}
|
|
2366
|
+
if (definitions) {
|
|
2367
|
+
const defs = Array.isArray(definitions) ? (_a = definitions.find((wdp) => wdp.wordID === word2.id)) == null ? void 0 : _a.definitions : definitions.get(word2.id);
|
|
2368
|
+
if (defs) word2.definitions = [...defs];
|
|
2369
|
+
}
|
|
2370
|
+
return word2;
|
|
2371
|
+
} else return void 0;
|
|
2392
2372
|
}
|
|
2393
|
-
function getKanji(
|
|
2394
|
-
var _a, _b, _c, _d;
|
|
2395
|
-
|
|
2396
|
-
|
|
2397
|
-
|
|
2398
|
-
|
|
2399
|
-
|
|
2400
|
-
|
|
2401
|
-
|
|
2402
|
-
|
|
2403
|
-
|
|
2404
|
-
|
|
2405
|
-
|
|
2406
|
-
|
|
2407
|
-
|
|
2408
|
-
}
|
|
2409
|
-
|
|
2410
|
-
|
|
2411
|
-
|
|
2412
|
-
|
|
2413
|
-
|
|
2414
|
-
|
|
2415
|
-
|
|
2416
|
-
|
|
2373
|
+
function getKanji(kanji, dict, jmDict, svgList, noteTypeName, deckPath) {
|
|
2374
|
+
var _a, _b, _c, _d, _e;
|
|
2375
|
+
let dictKanji = void 0;
|
|
2376
|
+
if (typeof kanji === "string" && dict)
|
|
2377
|
+
dictKanji = dict.find((entry) => entry.kanji === kanji);
|
|
2378
|
+
else if (typeof kanji === "object") dictKanji = kanji;
|
|
2379
|
+
if (dictKanji) {
|
|
2380
|
+
const kanji2 = {
|
|
2381
|
+
kanji: dictKanji.kanji,
|
|
2382
|
+
strokes: dictKanji.misc.strokeNumber,
|
|
2383
|
+
...dictKanji.misc && dictKanji.misc.grade ? { grade: dictKanji.misc.grade } : {},
|
|
2384
|
+
...dictKanji.misc && dictKanji.misc.frequency ? { frequency: dictKanji.misc.frequency } : {},
|
|
2385
|
+
...dictKanji.misc && dictKanji.misc.jlpt ? { jlpt: dictKanji.misc.jlpt } : {},
|
|
2386
|
+
noteID: `kanji_${dictKanji.kanji}`,
|
|
2387
|
+
...noteTypeName ? { noteTypeName } : {},
|
|
2388
|
+
...deckPath ? { deckPath } : {},
|
|
2389
|
+
tags: []
|
|
2390
|
+
};
|
|
2391
|
+
if (dictKanji.readingMeaning && dictKanji.readingMeaning.length > 0) {
|
|
2392
|
+
const meanings = [];
|
|
2393
|
+
const nanori = [];
|
|
2394
|
+
const onyomi = [];
|
|
2395
|
+
const kunyomi = [];
|
|
2396
|
+
for (const rm of dictKanji.readingMeaning) {
|
|
2397
|
+
if (rm.nanori && rm.nanori.length > 0) nanori.push(...rm.nanori);
|
|
2398
|
+
if (rm.groups)
|
|
2417
2399
|
for (const group of rm.groups) {
|
|
2418
|
-
|
|
2419
|
-
|
|
2420
|
-
|
|
2421
|
-
|
|
2422
|
-
|
|
2423
|
-
|
|
2424
|
-
|
|
2425
|
-
|
|
2426
|
-
|
|
2400
|
+
if (group.readings) {
|
|
2401
|
+
onyomi.push(
|
|
2402
|
+
...group.readings.filter(
|
|
2403
|
+
(reading) => reading.type === "ja_on"
|
|
2404
|
+
).map((reading) => reading.reading)
|
|
2405
|
+
);
|
|
2406
|
+
kunyomi.push(
|
|
2407
|
+
...group.readings.filter(
|
|
2408
|
+
(reading) => reading.type === "ja_kun"
|
|
2409
|
+
).map((reading) => reading.reading)
|
|
2410
|
+
);
|
|
2411
|
+
}
|
|
2412
|
+
if (group.meanings && group.meanings.length > 0)
|
|
2413
|
+
meanings.push(...group.meanings);
|
|
2427
2414
|
}
|
|
2428
|
-
}
|
|
2429
|
-
if (kanji.meanings && kanji.meanings.length === 0)
|
|
2430
|
-
delete kanji.meanings;
|
|
2431
|
-
if (kanji.nanori && kanji.nanori.length === 0) delete kanji.nanori;
|
|
2432
|
-
if (kanji.onyomi && kanji.onyomi.length === 0) delete kanji.onyomi;
|
|
2433
|
-
if (kanji.kunyomi && kanji.kunyomi.length === 0) delete kanji.kunyomi;
|
|
2434
2415
|
}
|
|
2435
|
-
if (
|
|
2436
|
-
|
|
2437
|
-
|
|
2438
|
-
|
|
2439
|
-
|
|
2440
|
-
|
|
2441
|
-
|
|
2442
|
-
|
|
2443
|
-
|
|
2444
|
-
|
|
2445
|
-
|
|
2446
|
-
|
|
2447
|
-
|
|
2448
|
-
|
|
2449
|
-
|
|
2450
|
-
|
|
2451
|
-
|
|
2452
|
-
|
|
2453
|
-
const kanjiForm = wordObj.kanjiForms[0];
|
|
2454
|
-
let reading = wordObj.readings.find(
|
|
2455
|
-
(reading2) => reading2.notes && reading2.notes.some(
|
|
2456
|
-
(note) => note.toLowerCase().startsWith("reading restricted to ") && note.endsWith(kanjiForm.kanjiForm)
|
|
2457
|
-
)
|
|
2458
|
-
);
|
|
2459
|
-
let translation = wordObj.translations.find(
|
|
2460
|
-
(translation2) => translation2.notes && translation2.notes.some(
|
|
2461
|
-
(note) => note.toLowerCase().startsWith("meaning restricted to ") && (note.endsWith(kanjiForm.kanjiForm) || reading && note.endsWith(reading.reading))
|
|
2462
|
-
)
|
|
2463
|
-
);
|
|
2464
|
-
if (!reading) reading = wordObj.readings[0];
|
|
2465
|
-
if (!translation) translation = wordObj.translations[0];
|
|
2466
|
-
return {
|
|
2467
|
-
kanjiForms: [kanjiForm],
|
|
2468
|
-
readings: [reading],
|
|
2469
|
-
translations: [translation]
|
|
2470
|
-
};
|
|
2471
|
-
});
|
|
2472
|
-
if (kanjiWords.length !== 3) {
|
|
2473
|
-
const wordNumber = 3 - kanjiWords.length;
|
|
2474
|
-
kanjiWords = jmDict.filter(
|
|
2475
|
-
(word) => word.kanjiForms && word.kanjiForms.some(
|
|
2476
|
-
(kanjiForm) => kanjiForm.form.includes(kanji.kanji)
|
|
2477
|
-
)
|
|
2478
|
-
).map((word) => {
|
|
2479
|
-
const wordObj = getWord(
|
|
2480
|
-
void 0,
|
|
2481
|
-
void 0,
|
|
2482
|
-
void 0,
|
|
2483
|
-
void 0,
|
|
2484
|
-
void 0,
|
|
2485
|
-
word,
|
|
2486
|
-
void 0
|
|
2487
|
-
);
|
|
2488
|
-
if (!wordObj.translations)
|
|
2489
|
-
throw new Error(`Invalid word: ${word.id}`);
|
|
2490
|
-
const kanjiForm = wordObj.kanjiForms.find(
|
|
2491
|
-
(kanjiForm2) => kanjiForm2.kanjiForm.includes(kanji.kanji)
|
|
2492
|
-
);
|
|
2493
|
-
if (!kanjiForm) throw new Error("Invalid kanji form");
|
|
2494
|
-
let reading = wordObj.readings.find(
|
|
2495
|
-
(reading2) => reading2.notes && reading2.notes.some(
|
|
2496
|
-
(note) => note.toLowerCase().startsWith("reading restricted to ") && note.endsWith(kanjiForm.kanjiForm)
|
|
2497
|
-
)
|
|
2498
|
-
);
|
|
2499
|
-
let translation = wordObj.translations.find(
|
|
2500
|
-
(translation2) => translation2.notes && translation2.notes.some(
|
|
2501
|
-
(note) => note.toLowerCase().startsWith("meaning restricted to ") && (note.endsWith(kanjiForm.kanjiForm) || reading && note.endsWith(reading.reading))
|
|
2502
|
-
)
|
|
2503
|
-
);
|
|
2504
|
-
if (!reading) reading = wordObj.readings[0];
|
|
2505
|
-
if (!translation) translation = wordObj.translations[0];
|
|
2506
|
-
return {
|
|
2507
|
-
kanjiForms: [kanjiForm],
|
|
2508
|
-
readings: [reading],
|
|
2509
|
-
translations: [translation]
|
|
2510
|
-
};
|
|
2416
|
+
if (meanings.length > 0) kanji2.meanings = meanings;
|
|
2417
|
+
if (nanori.length > 0) kanji2.nanori = nanori;
|
|
2418
|
+
if (onyomi.length > 0) kanji2.onyomi = onyomi;
|
|
2419
|
+
if (kunyomi.length > 0) kanji2.kunyomi = kunyomi;
|
|
2420
|
+
}
|
|
2421
|
+
if (jmDict) {
|
|
2422
|
+
const kanjiWords = (_a = Array.isArray(jmDict) ? jmDict : jmDict.get(kanji2.kanji)) == null ? void 0 : _a.filter(
|
|
2423
|
+
(word) => word.kanjiForms && word.kanjiForms[0].form.includes(kanji2.kanji)
|
|
2424
|
+
).slice(0, 3);
|
|
2425
|
+
if (kanjiWords) {
|
|
2426
|
+
const validWords = [];
|
|
2427
|
+
for (const word of kanjiWords) {
|
|
2428
|
+
const translation = word.meanings[0].translations[0];
|
|
2429
|
+
const translationText = typeof translation === "object" ? translation.translation : translation;
|
|
2430
|
+
validWords.push({
|
|
2431
|
+
kanjiForms: [{ kanjiForm: word.kanjiForms[0].form }],
|
|
2432
|
+
readings: [{ reading: word.readings[0].reading }],
|
|
2433
|
+
translations: [{ translation: translationText }]
|
|
2511
2434
|
});
|
|
2512
|
-
if (kanjiWords.length > wordNumber)
|
|
2513
|
-
kanjiWords = kanjiWords.slice(0, wordNumber - 1);
|
|
2514
|
-
if (kanjiWords.length > 0)
|
|
2515
|
-
if (kanji.words) kanji.words.push(...kanjiWords);
|
|
2516
|
-
else kanji.words = kanjiWords;
|
|
2517
|
-
}
|
|
2518
|
-
}
|
|
2519
|
-
if (svgList) {
|
|
2520
|
-
let codePoint = kanji.kanji.codePointAt(0);
|
|
2521
|
-
if (codePoint !== void 0) {
|
|
2522
|
-
codePoint = codePoint.toString(16);
|
|
2523
|
-
const fileNames = [
|
|
2524
|
-
`0${codePoint}.svg`,
|
|
2525
|
-
`${codePoint}.svg`
|
|
2526
|
-
];
|
|
2527
|
-
const svg = svgList.find(
|
|
2528
|
-
(svgFile) => fileNames.includes(svgFile.toLowerCase())
|
|
2529
|
-
);
|
|
2530
|
-
if (svg) kanji.svg = svg;
|
|
2531
2435
|
}
|
|
2436
|
+
if (validWords.length > 0) kanji2.words = validWords;
|
|
2532
2437
|
}
|
|
2533
|
-
|
|
2534
|
-
|
|
2535
|
-
|
|
2536
|
-
|
|
2537
|
-
|
|
2538
|
-
|
|
2539
|
-
|
|
2540
|
-
|
|
2541
|
-
|
|
2542
|
-
|
|
2543
|
-
)
|
|
2438
|
+
}
|
|
2439
|
+
if (svgList) {
|
|
2440
|
+
let codePoint = kanji2.kanji.codePointAt(0);
|
|
2441
|
+
if (codePoint !== void 0) {
|
|
2442
|
+
codePoint = codePoint.toString(16);
|
|
2443
|
+
const fileNames = [
|
|
2444
|
+
`0${codePoint}.svg`,
|
|
2445
|
+
`${codePoint}.svg`
|
|
2446
|
+
];
|
|
2447
|
+
const svg = svgList.find(
|
|
2448
|
+
(svgFile) => fileNames.includes(svgFile.toLowerCase())
|
|
2449
|
+
);
|
|
2450
|
+
if (svg) kanji2.svg = svg;
|
|
2544
2451
|
}
|
|
2545
|
-
|
|
2546
|
-
|
|
2547
|
-
|
|
2548
|
-
|
|
2549
|
-
|
|
2550
|
-
|
|
2551
|
-
|
|
2552
|
-
|
|
2553
|
-
|
|
2554
|
-
|
|
2452
|
+
}
|
|
2453
|
+
if (kanji2.tags && dictKanji.isKokuji === true) {
|
|
2454
|
+
kanji2.kokuji = true;
|
|
2455
|
+
kanji2.tags.push("kanji::kokuji");
|
|
2456
|
+
if (kanji2.meanings)
|
|
2457
|
+
kanji2.meanings.splice(
|
|
2458
|
+
kanji2.meanings.findIndex((meaning) => meaning === "(kokuji)"),
|
|
2459
|
+
1
|
|
2460
|
+
);
|
|
2461
|
+
}
|
|
2462
|
+
if (kanji2.tags)
|
|
2463
|
+
kanji2.tags.push(
|
|
2464
|
+
`kanji::strokes::${kanji2.strokes}`,
|
|
2465
|
+
...kanji2.frequency ? [`kanji::frequency::${kanji2.frequency}`] : [],
|
|
2466
|
+
...kanji2.grade ? [`kanji::grade::${kanji2.grade}`] : [],
|
|
2467
|
+
...kanji2.jlpt ? [`kanji::pre-2010_jlpt::${kanji2.jlpt.toLowerCase()}`] : [],
|
|
2468
|
+
`kanji::onyomi::${((_b = kanji2.onyomi) == null ? void 0 : _b.length) ?? 0}`,
|
|
2469
|
+
`kanji::kunyomi::${((_c = kanji2.kunyomi) == null ? void 0 : _c.length) ?? 0}`,
|
|
2470
|
+
`kanji::nanori::${((_d = kanji2.nanori) == null ? void 0 : _d.length) ?? 0}`,
|
|
2471
|
+
`kanji::words::${((_e = kanji2.words) == null ? void 0 : _e.length) ?? 0}`,
|
|
2472
|
+
...kanji2.svg ? ["kanji::has_svg"] : []
|
|
2555
2473
|
);
|
|
2556
|
-
|
|
2557
|
-
|
|
2558
|
-
throw new Error(`Kanji not found${kanjiChar ? `: ${kanjiChar}` : ""}`);
|
|
2559
|
-
} catch (err) {
|
|
2560
|
-
throw err;
|
|
2561
|
-
}
|
|
2474
|
+
return kanji2;
|
|
2475
|
+
} else return void 0;
|
|
2562
2476
|
}
|
|
2563
|
-
function getKanjiExtended(info,
|
|
2564
|
-
|
|
2565
|
-
|
|
2566
|
-
|
|
2567
|
-
|
|
2568
|
-
|
|
2569
|
-
|
|
2570
|
-
|
|
2571
|
-
|
|
2572
|
-
|
|
2573
|
-
|
|
2574
|
-
)
|
|
2575
|
-
|
|
2576
|
-
|
|
2577
|
-
|
|
2578
|
-
|
|
2579
|
-
|
|
2580
|
-
|
|
2581
|
-
|
|
2582
|
-
|
|
2583
|
-
|
|
2584
|
-
|
|
2585
|
-
|
|
2586
|
-
|
|
2587
|
-
|
|
2588
|
-
|
|
2589
|
-
|
|
2477
|
+
function getKanjiExtended(info, kanji, dict, useWords, jmDict, svgList, noteTypeName, deckPath, sourceURL) {
|
|
2478
|
+
const kanjiObj = getKanji(
|
|
2479
|
+
kanji,
|
|
2480
|
+
dict,
|
|
2481
|
+
jmDict,
|
|
2482
|
+
svgList,
|
|
2483
|
+
noteTypeName,
|
|
2484
|
+
deckPath
|
|
2485
|
+
);
|
|
2486
|
+
if (kanjiObj) {
|
|
2487
|
+
let usedInfo = false;
|
|
2488
|
+
if (info.components) {
|
|
2489
|
+
kanjiObj.components = info.components;
|
|
2490
|
+
usedInfo = true;
|
|
2491
|
+
}
|
|
2492
|
+
if (info.mnemonic && info.mnemonic.length > 0) {
|
|
2493
|
+
kanjiObj.mnemonic = info.mnemonic;
|
|
2494
|
+
usedInfo = true;
|
|
2495
|
+
}
|
|
2496
|
+
if (useWords === true && info.words && info.words.length > 0) {
|
|
2497
|
+
kanjiObj.words = info.words;
|
|
2498
|
+
usedInfo = true;
|
|
2499
|
+
}
|
|
2500
|
+
if (kanjiObj.tags) {
|
|
2501
|
+
if (kanjiObj.components)
|
|
2502
|
+
kanjiObj.tags.push(`kanji::components::${kanjiObj.components.length}`);
|
|
2503
|
+
if (kanjiObj.mnemonic && kanjiObj.mnemonic.length > 0)
|
|
2504
|
+
kanjiObj.tags.push("kanji::has_mnemonic");
|
|
2505
|
+
if (useWords === true && kanjiObj.words && info.words)
|
|
2506
|
+
kanjiObj.tags.forEach((tag, index) => {
|
|
2507
|
+
if (tag.startsWith("kanji::words::") && kanjiObj.words && kanjiObj.tags) {
|
|
2508
|
+
kanjiObj.tags.splice(
|
|
2590
2509
|
index,
|
|
2591
2510
|
1,
|
|
2592
|
-
`kanji::words::${
|
|
2511
|
+
`kanji::words::${kanjiObj.words.length}`
|
|
2593
2512
|
);
|
|
2594
|
-
|
|
2595
|
-
} else return false;
|
|
2596
|
-
}))
|
|
2597
|
-
kanji.tags.push(`kanji::words::${((_b = kanji.words) == null ? void 0 : _b.length) ?? 0}`);
|
|
2598
|
-
}
|
|
2599
|
-
}
|
|
2600
|
-
if (info.fromJpdb === true && (kanji.mnemonic || kanji.components || kanji.words && useWords === true))
|
|
2601
|
-
kanji.source = `https://jpdb.io/kanji/${kanji.kanji}`;
|
|
2602
|
-
return kanji;
|
|
2603
|
-
} catch (err) {
|
|
2604
|
-
throw err;
|
|
2605
|
-
}
|
|
2606
|
-
}
|
|
2607
|
-
async function synthesizeSpeech(client, input, options) {
|
|
2608
|
-
return await new Promise(
|
|
2609
|
-
async (resolve, reject) => {
|
|
2610
|
-
try {
|
|
2611
|
-
const command = new SynthesizeSpeechCommand({
|
|
2612
|
-
Text: input,
|
|
2613
|
-
...options
|
|
2513
|
+
}
|
|
2614
2514
|
});
|
|
2615
|
-
const response = await client.send(command);
|
|
2616
|
-
const stream = response.AudioStream ? Buffer.from(await response.AudioStream.transformToByteArray()) : null;
|
|
2617
|
-
resolve(stream);
|
|
2618
|
-
} catch (err) {
|
|
2619
|
-
reject(err);
|
|
2620
|
-
}
|
|
2621
2515
|
}
|
|
2622
|
-
|
|
2516
|
+
if (sourceURL && info.externalInfo === true && usedInfo)
|
|
2517
|
+
kanjiObj.source = sourceURL;
|
|
2518
|
+
return kanjiObj;
|
|
2519
|
+
} else return void 0;
|
|
2623
2520
|
}
|
|
2624
2521
|
function isWord(entry) {
|
|
2625
2522
|
return entry.translations !== void 0 && entry.readings !== void 0;
|
|
2626
2523
|
}
|
|
2627
2524
|
function isRadical(entry) {
|
|
2628
|
-
return entry.radical !== void 0
|
|
2525
|
+
return entry.radical !== void 0;
|
|
2629
2526
|
}
|
|
2630
2527
|
function isKanji(entry) {
|
|
2631
2528
|
return entry.translations === void 0 && entry.readings === void 0 && entry.radical === void 0 && entry.kanji !== void 0;
|
|
2632
2529
|
}
|
|
2633
2530
|
function isKana(entry) {
|
|
2634
|
-
return entry.kana !== void 0
|
|
2531
|
+
return entry.kana !== void 0;
|
|
2635
2532
|
}
|
|
2636
2533
|
function isGrammar(entry) {
|
|
2637
|
-
return entry.point !== void 0
|
|
2534
|
+
return entry.point !== void 0;
|
|
2638
2535
|
}
|
|
2639
2536
|
var createNotes = (notes, phrase) => `${phrase === true ? "<details><summary>Show translation</summary>" : ""}<ul class="note-list">${notes.map((note) => `<li class="note">${note}</li>`).join("")}</ul>${phrase === true ? "</details>" : ""}`;
|
|
2640
2537
|
var createEntry = (entry, notes, phrase, glossSpecific) => `<div class="entry${glossSpecific ? " gloss-specific" : ""}">${entry}${notes && notes.length > 0 ? createNotes(notes, phrase) : ""}</div>`;
|
|
2641
2538
|
var noKanjiForms = '<span class="word word-kanjiform">(no kanji forms)</span>';
|
|
2642
2539
|
function generateAnkiNote(entry) {
|
|
2643
|
-
if (!entry.noteID) throw new Error("Invalid note ID");
|
|
2644
2540
|
const fields = [];
|
|
2645
2541
|
if (isWord(entry)) {
|
|
2646
|
-
if (!entry.translations || entry.readings.length === 0)
|
|
2647
|
-
throw new Error(`Invalid word: ${entry.noteID}`);
|
|
2648
2542
|
const firstReading = createEntry(
|
|
2649
2543
|
`<span class="word word-reading">${entry.readings[0].reading}${entry.readings[0].audio !== void 0 ? `<br>[sound:${entry.readings[0].audio}]` : ""}</span>`,
|
|
2650
2544
|
entry.readings[0].notes
|
|
@@ -2739,7 +2633,7 @@ function generateAnkiNote(entry) {
|
|
|
2739
2633
|
entry.strokes ? createEntry(
|
|
2740
2634
|
`<span class="radical radical-strokes">${entry.strokes}<br>${entry.svg ? `<img class="radical radical-stroke-order" src="${entry.svg}" alt="${entry.radical} stroke order SVG">` : "(no stroke order SVG available)"}</span>`
|
|
2741
2635
|
) : '<span class="radical radical-strokes">(no stroke number)</span>',
|
|
2742
|
-
entry.sources ? `<span class="radical radical-source">${entry.sources.map((source, index) => `<a href="${source}" target="_blank">Source ${index + 1}</a>`).join("<br>")}</span>` : '<span class="
|
|
2636
|
+
entry.sources ? `<span class="radical radical-source">${entry.sources.map((source, index) => `<a href="${source}" target="_blank">Source ${index + 1}</a>`).join("<br>")}</span>` : '<span class="radical radical-source">(no sources)</span>',
|
|
2743
2637
|
...entry.tags && entry.tags.length > 0 ? [
|
|
2744
2638
|
entry.tags.map(
|
|
2745
2639
|
(tag) => tag.trim().toLowerCase().replaceAll(" ", "::")
|
|
@@ -2777,11 +2671,9 @@ function generateAnkiNote(entry) {
|
|
|
2777
2671
|
entry.mnemonic ? createEntry(
|
|
2778
2672
|
`<span class="kanji kanji-mnemonic">${entry.mnemonic}</span>`
|
|
2779
2673
|
) : '<span class="kanji kanji-mnemonic">(no mnemonic) (Come up with your own!)</span>',
|
|
2780
|
-
entry.words ? entry.words.
|
|
2781
|
-
(word) => word.translations && word.translations.length > 0
|
|
2782
|
-
).map(
|
|
2674
|
+
entry.words ? entry.words.map(
|
|
2783
2675
|
(word) => createEntry(
|
|
2784
|
-
`<span class="kanji kanji-words">${word.kanjiForms
|
|
2676
|
+
`<span class="kanji kanji-words">${word.kanjiForms[0].kanjiForm} / ${word.readings[0].reading} - ${word.translations[0].translation}</span>`
|
|
2785
2677
|
)
|
|
2786
2678
|
).join("") : '<span class="kanji kanji-words">(no words) (Search on dictionaries!)</span>',
|
|
2787
2679
|
entry.strokes ? createEntry(
|
|
@@ -2839,47 +2731,84 @@ function generateAnkiNote(entry) {
|
|
|
2839
2731
|
).join(" ")
|
|
2840
2732
|
] : []
|
|
2841
2733
|
);
|
|
2842
|
-
|
|
2843
|
-
return fields.map((field) => field.replaceAll("\n", "<br>"));
|
|
2844
|
-
else throw new Error("Invalid entry");
|
|
2734
|
+
return fields.map((field) => field.replaceAll("\n", "<br>"));
|
|
2845
2735
|
}
|
|
2846
|
-
function generateAnkiNotesFile(list) {
|
|
2736
|
+
function generateAnkiNotesFile(list, defaultNoteInfo) {
|
|
2737
|
+
const headers = [noteHeaderKeys.separator, noteHeaderKeys.html];
|
|
2738
|
+
let ankiNotes = "";
|
|
2847
2739
|
if (list.length > 0) {
|
|
2848
|
-
|
|
2849
|
-
|
|
2850
|
-
|
|
2851
|
-
|
|
2852
|
-
|
|
2853
|
-
|
|
2854
|
-
|
|
2855
|
-
|
|
2856
|
-
|
|
2857
|
-
|
|
2740
|
+
if (defaultNoteInfo === void 0) defaultNoteInfo = {};
|
|
2741
|
+
const infoValues = Object.values(defaultNoteInfo);
|
|
2742
|
+
let invalidList = false;
|
|
2743
|
+
const firstEntry = list[0];
|
|
2744
|
+
const firstEntryInfo = {
|
|
2745
|
+
guid: typeof firstEntry.noteID,
|
|
2746
|
+
noteType: typeof firstEntry.noteTypeName,
|
|
2747
|
+
deckPath: typeof firstEntry.deckPath
|
|
2748
|
+
};
|
|
2749
|
+
if (infoValues.length === 0 || infoValues.some((value) => value === true || value === void 0)) {
|
|
2750
|
+
for (const res of list)
|
|
2751
|
+
if (defaultNoteInfo.guid === true && res.noteID === void 0 || defaultNoteInfo.noteType === true && res.noteTypeName === void 0 || defaultNoteInfo.deckPath === true && res.deckPath === void 0 || defaultNoteInfo.guid === void 0 && typeof res.noteID !== firstEntryInfo.guid || defaultNoteInfo.noteType === void 0 && typeof res.noteTypeName !== firstEntryInfo.noteType || defaultNoteInfo.deckPath === void 0 && typeof res.deckPath !== firstEntryInfo.deckPath) {
|
|
2752
|
+
invalidList = true;
|
|
2753
|
+
break;
|
|
2754
|
+
}
|
|
2755
|
+
}
|
|
2756
|
+
if (invalidList) throw new Error("Invalid result list");
|
|
2757
|
+
const hasHeader = { guid: false, noteType: false, deckPath: false, tags: false };
|
|
2758
|
+
let headerCount = 0;
|
|
2759
|
+
ankiNotes = list.filter((result) => result.doNotCreateNote === void 0).map((result) => {
|
|
2760
|
+
if (typeof defaultNoteInfo.guid === "string" && result.noteID === void 0) {
|
|
2761
|
+
if (isWord(result) && result.id) result.noteID = result.id;
|
|
2762
|
+
if (isKanji(result)) result.noteID = result.kanji;
|
|
2763
|
+
if (isRadical(result)) result.noteID = result.radical;
|
|
2764
|
+
if (isKana(result)) result.noteID = result.kana;
|
|
2765
|
+
if (isGrammar(result) && result.id) result.noteID = result.id;
|
|
2766
|
+
}
|
|
2767
|
+
if (typeof defaultNoteInfo.noteType === "string" && result.noteTypeName === void 0)
|
|
2768
|
+
result.noteTypeName = defaultNoteInfo.noteType;
|
|
2769
|
+
if (typeof defaultNoteInfo.deckPath === "string" && result.deckPath === void 0)
|
|
2770
|
+
result.deckPath = defaultNoteInfo.deckPath;
|
|
2771
|
+
if (!hasHeader.guid && result.noteID) {
|
|
2772
|
+
headers.push(`${noteHeaderKeys.guid}:${++headerCount}`);
|
|
2773
|
+
hasHeader.guid = true;
|
|
2774
|
+
}
|
|
2775
|
+
if (!hasHeader.noteType && result.noteTypeName) {
|
|
2776
|
+
headers.push(`${noteHeaderKeys.notetype}:${++headerCount}`);
|
|
2777
|
+
hasHeader.noteType = true;
|
|
2778
|
+
}
|
|
2779
|
+
if (!hasHeader.deckPath && result.deckPath) {
|
|
2780
|
+
headers.push(`${noteHeaderKeys.deck}:${++headerCount}`);
|
|
2781
|
+
hasHeader.deckPath = true;
|
|
2782
|
+
}
|
|
2858
2783
|
const note = generateAnkiNote(result);
|
|
2859
|
-
if (
|
|
2860
|
-
headers.push(
|
|
2861
|
-
|
|
2862
|
-
|
|
2784
|
+
if (!hasHeader.tags) {
|
|
2785
|
+
headers.push(`${noteHeaderKeys.tags}${note.length + headerCount}`);
|
|
2786
|
+
hasHeader.tags = true;
|
|
2787
|
+
}
|
|
2788
|
+
return `${result.noteID ? `${result.noteID} ` : ""}${result.noteTypeName ? `${result.noteTypeName} ` : ""}${result.deckPath ? `${result.deckPath} ` : ""}${note.join(" ")}`;
|
|
2863
2789
|
}).join("\n").trim();
|
|
2864
|
-
|
|
2865
|
-
|
|
2790
|
+
}
|
|
2791
|
+
return `${headers.join("\n")}
|
|
2792
|
+
|
|
2866
2793
|
${ankiNotes}`;
|
|
2867
|
-
} else throw new Error("No entries available for Anki notes creation");
|
|
2868
2794
|
}
|
|
2869
2795
|
export {
|
|
2870
2796
|
capitalizeString,
|
|
2871
2797
|
convertJMdict,
|
|
2872
|
-
|
|
2798
|
+
convertJawiktionaryAsync,
|
|
2799
|
+
convertJawiktionarySync,
|
|
2873
2800
|
convertKanjiDic,
|
|
2874
2801
|
convertKradFile,
|
|
2875
2802
|
convertRadkFile,
|
|
2876
2803
|
convertTanakaCorpus,
|
|
2804
|
+
convertTanakaCorpusWithFurigana,
|
|
2877
2805
|
generateAnkiNote,
|
|
2878
2806
|
generateAnkiNotesFile,
|
|
2879
2807
|
getKanji,
|
|
2880
2808
|
getKanjiExtended,
|
|
2881
2809
|
getWord,
|
|
2882
2810
|
getWordDefinitions,
|
|
2811
|
+
getWordDefinitionsWithFurigana,
|
|
2883
2812
|
isGrammar,
|
|
2884
2813
|
isKana,
|
|
2885
2814
|
isKanji,
|
|
@@ -2889,9 +2818,9 @@ export {
|
|
|
2889
2818
|
isValidArrayWithFirstElement,
|
|
2890
2819
|
isWord,
|
|
2891
2820
|
notSearchedForms,
|
|
2821
|
+
noteHeaderKeys,
|
|
2892
2822
|
noteMap,
|
|
2893
2823
|
regexps,
|
|
2894
|
-
shuffleArray
|
|
2895
|
-
synthesizeSpeech
|
|
2824
|
+
shuffleArray
|
|
2896
2825
|
};
|
|
2897
2826
|
//# sourceMappingURL=index.mjs.map
|