synset 0.9.6 → 0.9.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.cjs +68 -12
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +63 -7
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +65 -12
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +4 -2
- package/dist/index.d.ts +4 -2
- package/dist/index.js +60 -7
- package/dist/index.js.map +1 -1
- package/dist/schema.sql +10 -0
- package/package.json +1 -1
package/dist/cli.cjs
CHANGED
|
@@ -24,6 +24,7 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
|
|
|
24
24
|
));
|
|
25
25
|
|
|
26
26
|
// src/export-sqlite.ts
|
|
27
|
+
var import_node_fs = require("fs");
|
|
27
28
|
var import_libsql = __toESM(require("libsql"), 1);
|
|
28
29
|
|
|
29
30
|
// src/helpers.ts
|
|
@@ -375,9 +376,11 @@ CREATE TABLE IF NOT EXISTS synsets (
|
|
|
375
376
|
CREATE TABLE IF NOT EXISTS word_synsets (
|
|
376
377
|
word_id INTEGER NOT NULL,
|
|
377
378
|
synset_id TEXT NOT NULL,
|
|
379
|
+
sense_order INTEGER NOT NULL DEFAULT 0,
|
|
378
380
|
PRIMARY KEY (word_id, synset_id)
|
|
379
381
|
);
|
|
380
382
|
CREATE INDEX IF NOT EXISTS idx_ws_word ON word_synsets(word_id);
|
|
383
|
+
CREATE INDEX IF NOT EXISTS idx_ws_order ON word_synsets(word_id, sense_order);
|
|
381
384
|
|
|
382
385
|
CREATE TABLE IF NOT EXISTS synset_relations (
|
|
383
386
|
source_id TEXT NOT NULL,
|
|
@@ -397,9 +400,26 @@ CREATE TABLE IF NOT EXISTS sense_relations (
|
|
|
397
400
|
PRIMARY KEY (source_word_id, source_synset_id, target_word_id, target_synset_id, rel_type)
|
|
398
401
|
);
|
|
399
402
|
CREATE INDEX IF NOT EXISTS idx_sense_rel_source ON sense_relations(source_word_id, source_synset_id);
|
|
403
|
+
|
|
404
|
+
CREATE TABLE IF NOT EXISTS synset_examples (
|
|
405
|
+
synset_id TEXT NOT NULL,
|
|
406
|
+
example TEXT NOT NULL,
|
|
407
|
+
example_order INTEGER NOT NULL DEFAULT 0,
|
|
408
|
+
PRIMARY KEY (synset_id, example_order)
|
|
409
|
+
);
|
|
410
|
+
CREATE INDEX IF NOT EXISTS idx_examples_synset ON synset_examples(synset_id);
|
|
400
411
|
`;
|
|
401
412
|
function exportToSQLite(lexicon, outputPath, options = {}) {
|
|
402
|
-
const { onProgress } = options;
|
|
413
|
+
const { onProgress, overwrite } = options;
|
|
414
|
+
if ((0, import_node_fs.existsSync)(outputPath)) {
|
|
415
|
+
if (overwrite) {
|
|
416
|
+
(0, import_node_fs.unlinkSync)(outputPath);
|
|
417
|
+
} else {
|
|
418
|
+
throw new Error(
|
|
419
|
+
`File already exists: ${outputPath}. Use --overwrite to replace it.`
|
|
420
|
+
);
|
|
421
|
+
}
|
|
422
|
+
}
|
|
403
423
|
const db = new import_libsql.default(outputPath);
|
|
404
424
|
db.exec("PRAGMA journal_mode = OFF");
|
|
405
425
|
db.exec("PRAGMA synchronous = OFF");
|
|
@@ -466,7 +486,7 @@ function exportToSQLite(lexicon, outputPath, options = {}) {
|
|
|
466
486
|
}
|
|
467
487
|
db.exec("COMMIT");
|
|
468
488
|
const insertWordSynset = db.prepare(
|
|
469
|
-
"INSERT OR IGNORE INTO word_synsets (word_id, synset_id) VALUES (?, ?)"
|
|
489
|
+
"INSERT OR IGNORE INTO word_synsets (word_id, synset_id, sense_order) VALUES (?, ?, ?)"
|
|
470
490
|
);
|
|
471
491
|
let wsCount = 0;
|
|
472
492
|
const totalWordSynsets = Array.from(wordToEntries.values()).reduce(
|
|
@@ -477,9 +497,11 @@ function exportToSQLite(lexicon, outputPath, options = {}) {
|
|
|
477
497
|
for (const [word, entries] of wordToEntries) {
|
|
478
498
|
const wId = wordIds.get(word);
|
|
479
499
|
if (!wId) continue;
|
|
500
|
+
let senseOrder = 0;
|
|
480
501
|
for (const entry of entries) {
|
|
481
502
|
for (const sense of entry.senses) {
|
|
482
|
-
insertWordSynset.run(wId, sense.synset);
|
|
503
|
+
insertWordSynset.run(wId, sense.synset, senseOrder);
|
|
504
|
+
senseOrder++;
|
|
483
505
|
wsCount++;
|
|
484
506
|
if (onProgress && wsCount % 1e4 === 0) {
|
|
485
507
|
onProgress({
|
|
@@ -492,6 +514,37 @@ function exportToSQLite(lexicon, outputPath, options = {}) {
|
|
|
492
514
|
}
|
|
493
515
|
}
|
|
494
516
|
db.exec("COMMIT");
|
|
517
|
+
const insertExample = db.prepare(
|
|
518
|
+
"INSERT OR IGNORE INTO synset_examples (synset_id, example, example_order) VALUES (?, ?, ?)"
|
|
519
|
+
);
|
|
520
|
+
let totalExamples = 0;
|
|
521
|
+
for (const synsetId of usedSynsetIds) {
|
|
522
|
+
const synset = synsetMap.get(synsetId);
|
|
523
|
+
if (synset?.examples) {
|
|
524
|
+
totalExamples += synset.examples.length;
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
db.exec("BEGIN TRANSACTION");
|
|
528
|
+
let exCount = 0;
|
|
529
|
+
for (const synsetId of usedSynsetIds) {
|
|
530
|
+
const synset = synsetMap.get(synsetId);
|
|
531
|
+
if (!synset?.examples) continue;
|
|
532
|
+
for (let i = 0; i < synset.examples.length; i++) {
|
|
533
|
+
const example = decodeXmlEntities(synset.examples[i].inner);
|
|
534
|
+
if (example) {
|
|
535
|
+
insertExample.run(synsetId, example, i);
|
|
536
|
+
exCount++;
|
|
537
|
+
if (onProgress && exCount % 1e4 === 0) {
|
|
538
|
+
onProgress({
|
|
539
|
+
phase: "synset_examples",
|
|
540
|
+
current: exCount,
|
|
541
|
+
total: totalExamples
|
|
542
|
+
});
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
db.exec("COMMIT");
|
|
495
548
|
const insertSynsetRelation = db.prepare(
|
|
496
549
|
"INSERT OR IGNORE INTO synset_relations (source_id, target_id, rel_type) VALUES (?, ?, ?)"
|
|
497
550
|
);
|
|
@@ -701,7 +754,7 @@ var SynsetRelationRelType2 = {
|
|
|
701
754
|
};
|
|
702
755
|
|
|
703
756
|
// src/loader.ts
|
|
704
|
-
var
|
|
757
|
+
var import_node_fs2 = require("fs");
|
|
705
758
|
var import_node_path = __toESM(require("path"), 1);
|
|
706
759
|
var import_node_stream = require("stream");
|
|
707
760
|
|
|
@@ -960,8 +1013,8 @@ function getDefaultCacheDir() {
|
|
|
960
1013
|
return import_node_path.default.join(homeDir, ".cache", "synset");
|
|
961
1014
|
}
|
|
962
1015
|
function fileExists(filePath) {
|
|
963
|
-
if ((0,
|
|
964
|
-
const stat = (0,
|
|
1016
|
+
if ((0, import_node_fs2.existsSync)(filePath)) {
|
|
1017
|
+
const stat = (0, import_node_fs2.statSync)(filePath);
|
|
965
1018
|
return stat.isFile();
|
|
966
1019
|
}
|
|
967
1020
|
return false;
|
|
@@ -979,8 +1032,8 @@ function extractVersionFromFilename(filename) {
|
|
|
979
1032
|
return match ? parseInt(match[1], 10) : null;
|
|
980
1033
|
}
|
|
981
1034
|
function findCachedVersion(cacheDir) {
|
|
982
|
-
if (!(0,
|
|
983
|
-
const files = (0,
|
|
1035
|
+
if (!(0, import_node_fs2.existsSync)(cacheDir)) return null;
|
|
1036
|
+
const files = (0, import_node_fs2.readdirSync)(cacheDir);
|
|
984
1037
|
const wordnetFiles = files.map((f) => ({ file: f, year: extractVersionFromFilename(f) })).filter((x) => x.year !== null).sort((a, b) => b.year - a.year);
|
|
985
1038
|
return wordnetFiles.length > 0 ? wordnetFiles[0].year.toString() : null;
|
|
986
1039
|
}
|
|
@@ -1041,14 +1094,14 @@ async function downloadWordNet(version, destPath) {
|
|
|
1041
1094
|
);
|
|
1042
1095
|
const arrayBuffer = await new Response(decompressed).arrayBuffer();
|
|
1043
1096
|
const dir = import_node_path.default.dirname(destPath);
|
|
1044
|
-
if (!(0,
|
|
1045
|
-
(0,
|
|
1097
|
+
if (!(0, import_node_fs2.existsSync)(dir)) {
|
|
1098
|
+
(0, import_node_fs2.mkdirSync)(dir, { recursive: true });
|
|
1046
1099
|
}
|
|
1047
|
-
(0,
|
|
1100
|
+
(0, import_node_fs2.writeFileSync)(destPath, Buffer.from(arrayBuffer));
|
|
1048
1101
|
}
|
|
1049
1102
|
function createParser(filePath) {
|
|
1050
1103
|
const resolvedPath = import_node_path.default.resolve(filePath);
|
|
1051
|
-
const nodeStream = (0,
|
|
1104
|
+
const nodeStream = (0, import_node_fs2.createReadStream)(resolvedPath);
|
|
1052
1105
|
const webStream = import_node_stream.Readable.toWeb(nodeStream);
|
|
1053
1106
|
return parse(webStream, {
|
|
1054
1107
|
ignoreDeclaration: false,
|
|
@@ -1223,6 +1276,7 @@ Commands:
|
|
|
1223
1276
|
|
|
1224
1277
|
Options:
|
|
1225
1278
|
--file <path> Use a local WordNet XML file instead of cache
|
|
1279
|
+
--overwrite Overwrite existing file (for export-sqlite)
|
|
1226
1280
|
--help, -h Show this help message
|
|
1227
1281
|
|
|
1228
1282
|
Examples:
|
|
@@ -1258,10 +1312,12 @@ async function main() {
|
|
|
1258
1312
|
console.error("Error: Missing output path for export-sqlite");
|
|
1259
1313
|
process.exit(1);
|
|
1260
1314
|
}
|
|
1315
|
+
const overwrite = args.includes("--overwrite");
|
|
1261
1316
|
console.log("Loading WordNet data...");
|
|
1262
1317
|
const lexicon2 = filePath ? await loadWordNet(filePath) : (await fetchWordNet({ onProgress: console.log })).lexicon;
|
|
1263
1318
|
console.log(`Exporting to ${outputPath}...`);
|
|
1264
1319
|
exportToSQLite(lexicon2, outputPath, {
|
|
1320
|
+
overwrite,
|
|
1265
1321
|
onProgress: ({ phase, current, total }) => {
|
|
1266
1322
|
process.stdout.write(`\r${phase}: ${current}/${total}`);
|
|
1267
1323
|
}
|