synset 0.9.6 → 0.9.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.cjs CHANGED
@@ -24,6 +24,7 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
24
24
  ));
25
25
 
26
26
  // src/export-sqlite.ts
27
+ var import_node_fs = require("fs");
27
28
  var import_libsql = __toESM(require("libsql"), 1);
28
29
 
29
30
  // src/helpers.ts
@@ -375,9 +376,11 @@ CREATE TABLE IF NOT EXISTS synsets (
375
376
  CREATE TABLE IF NOT EXISTS word_synsets (
376
377
  word_id INTEGER NOT NULL,
377
378
  synset_id TEXT NOT NULL,
379
+ sense_order INTEGER NOT NULL DEFAULT 0,
378
380
  PRIMARY KEY (word_id, synset_id)
379
381
  );
380
382
  CREATE INDEX IF NOT EXISTS idx_ws_word ON word_synsets(word_id);
383
+ CREATE INDEX IF NOT EXISTS idx_ws_order ON word_synsets(word_id, sense_order);
381
384
 
382
385
  CREATE TABLE IF NOT EXISTS synset_relations (
383
386
  source_id TEXT NOT NULL,
@@ -397,9 +400,26 @@ CREATE TABLE IF NOT EXISTS sense_relations (
397
400
  PRIMARY KEY (source_word_id, source_synset_id, target_word_id, target_synset_id, rel_type)
398
401
  );
399
402
  CREATE INDEX IF NOT EXISTS idx_sense_rel_source ON sense_relations(source_word_id, source_synset_id);
403
+
404
+ CREATE TABLE IF NOT EXISTS synset_examples (
405
+ synset_id TEXT NOT NULL,
406
+ example TEXT NOT NULL,
407
+ example_order INTEGER NOT NULL DEFAULT 0,
408
+ PRIMARY KEY (synset_id, example_order)
409
+ );
410
+ CREATE INDEX IF NOT EXISTS idx_examples_synset ON synset_examples(synset_id);
400
411
  `;
401
412
  function exportToSQLite(lexicon, outputPath, options = {}) {
402
- const { onProgress } = options;
413
+ const { onProgress, overwrite } = options;
414
+ if ((0, import_node_fs.existsSync)(outputPath)) {
415
+ if (overwrite) {
416
+ (0, import_node_fs.unlinkSync)(outputPath);
417
+ } else {
418
+ throw new Error(
419
+ `File already exists: ${outputPath}. Use --overwrite to replace it.`
420
+ );
421
+ }
422
+ }
403
423
  const db = new import_libsql.default(outputPath);
404
424
  db.exec("PRAGMA journal_mode = OFF");
405
425
  db.exec("PRAGMA synchronous = OFF");
@@ -466,7 +486,7 @@ function exportToSQLite(lexicon, outputPath, options = {}) {
466
486
  }
467
487
  db.exec("COMMIT");
468
488
  const insertWordSynset = db.prepare(
469
- "INSERT OR IGNORE INTO word_synsets (word_id, synset_id) VALUES (?, ?)"
489
+ "INSERT OR IGNORE INTO word_synsets (word_id, synset_id, sense_order) VALUES (?, ?, ?)"
470
490
  );
471
491
  let wsCount = 0;
472
492
  const totalWordSynsets = Array.from(wordToEntries.values()).reduce(
@@ -477,9 +497,11 @@ function exportToSQLite(lexicon, outputPath, options = {}) {
477
497
  for (const [word, entries] of wordToEntries) {
478
498
  const wId = wordIds.get(word);
479
499
  if (!wId) continue;
500
+ let senseOrder = 0;
480
501
  for (const entry of entries) {
481
502
  for (const sense of entry.senses) {
482
- insertWordSynset.run(wId, sense.synset);
503
+ insertWordSynset.run(wId, sense.synset, senseOrder);
504
+ senseOrder++;
483
505
  wsCount++;
484
506
  if (onProgress && wsCount % 1e4 === 0) {
485
507
  onProgress({
@@ -492,6 +514,37 @@ function exportToSQLite(lexicon, outputPath, options = {}) {
492
514
  }
493
515
  }
494
516
  db.exec("COMMIT");
517
+ const insertExample = db.prepare(
518
+ "INSERT OR IGNORE INTO synset_examples (synset_id, example, example_order) VALUES (?, ?, ?)"
519
+ );
520
+ let totalExamples = 0;
521
+ for (const synsetId of usedSynsetIds) {
522
+ const synset = synsetMap.get(synsetId);
523
+ if (synset?.examples) {
524
+ totalExamples += synset.examples.length;
525
+ }
526
+ }
527
+ db.exec("BEGIN TRANSACTION");
528
+ let exCount = 0;
529
+ for (const synsetId of usedSynsetIds) {
530
+ const synset = synsetMap.get(synsetId);
531
+ if (!synset?.examples) continue;
532
+ for (let i = 0; i < synset.examples.length; i++) {
533
+ const example = decodeXmlEntities(synset.examples[i].inner);
534
+ if (example) {
535
+ insertExample.run(synsetId, example, i);
536
+ exCount++;
537
+ if (onProgress && exCount % 1e4 === 0) {
538
+ onProgress({
539
+ phase: "synset_examples",
540
+ current: exCount,
541
+ total: totalExamples
542
+ });
543
+ }
544
+ }
545
+ }
546
+ }
547
+ db.exec("COMMIT");
495
548
  const insertSynsetRelation = db.prepare(
496
549
  "INSERT OR IGNORE INTO synset_relations (source_id, target_id, rel_type) VALUES (?, ?, ?)"
497
550
  );
@@ -701,7 +754,7 @@ var SynsetRelationRelType2 = {
701
754
  };
702
755
 
703
756
  // src/loader.ts
704
- var import_node_fs = require("fs");
757
+ var import_node_fs2 = require("fs");
705
758
  var import_node_path = __toESM(require("path"), 1);
706
759
  var import_node_stream = require("stream");
707
760
 
@@ -960,8 +1013,8 @@ function getDefaultCacheDir() {
960
1013
  return import_node_path.default.join(homeDir, ".cache", "synset");
961
1014
  }
962
1015
  function fileExists(filePath) {
963
- if ((0, import_node_fs.existsSync)(filePath)) {
964
- const stat = (0, import_node_fs.statSync)(filePath);
1016
+ if ((0, import_node_fs2.existsSync)(filePath)) {
1017
+ const stat = (0, import_node_fs2.statSync)(filePath);
965
1018
  return stat.isFile();
966
1019
  }
967
1020
  return false;
@@ -979,8 +1032,8 @@ function extractVersionFromFilename(filename) {
979
1032
  return match ? parseInt(match[1], 10) : null;
980
1033
  }
981
1034
  function findCachedVersion(cacheDir) {
982
- if (!(0, import_node_fs.existsSync)(cacheDir)) return null;
983
- const files = (0, import_node_fs.readdirSync)(cacheDir);
1035
+ if (!(0, import_node_fs2.existsSync)(cacheDir)) return null;
1036
+ const files = (0, import_node_fs2.readdirSync)(cacheDir);
984
1037
  const wordnetFiles = files.map((f) => ({ file: f, year: extractVersionFromFilename(f) })).filter((x) => x.year !== null).sort((a, b) => b.year - a.year);
985
1038
  return wordnetFiles.length > 0 ? wordnetFiles[0].year.toString() : null;
986
1039
  }
@@ -1041,14 +1094,14 @@ async function downloadWordNet(version, destPath) {
1041
1094
  );
1042
1095
  const arrayBuffer = await new Response(decompressed).arrayBuffer();
1043
1096
  const dir = import_node_path.default.dirname(destPath);
1044
- if (!(0, import_node_fs.existsSync)(dir)) {
1045
- (0, import_node_fs.mkdirSync)(dir, { recursive: true });
1097
+ if (!(0, import_node_fs2.existsSync)(dir)) {
1098
+ (0, import_node_fs2.mkdirSync)(dir, { recursive: true });
1046
1099
  }
1047
- (0, import_node_fs.writeFileSync)(destPath, Buffer.from(arrayBuffer));
1100
+ (0, import_node_fs2.writeFileSync)(destPath, Buffer.from(arrayBuffer));
1048
1101
  }
1049
1102
  function createParser(filePath) {
1050
1103
  const resolvedPath = import_node_path.default.resolve(filePath);
1051
- const nodeStream = (0, import_node_fs.createReadStream)(resolvedPath);
1104
+ const nodeStream = (0, import_node_fs2.createReadStream)(resolvedPath);
1052
1105
  const webStream = import_node_stream.Readable.toWeb(nodeStream);
1053
1106
  return parse(webStream, {
1054
1107
  ignoreDeclaration: false,
@@ -1223,6 +1276,7 @@ Commands:
1223
1276
 
1224
1277
  Options:
1225
1278
  --file <path> Use a local WordNet XML file instead of cache
1279
+ --overwrite Overwrite existing file (for export-sqlite)
1226
1280
  --help, -h Show this help message
1227
1281
 
1228
1282
  Examples:
@@ -1258,10 +1312,12 @@ async function main() {
1258
1312
  console.error("Error: Missing output path for export-sqlite");
1259
1313
  process.exit(1);
1260
1314
  }
1315
+ const overwrite = args.includes("--overwrite");
1261
1316
  console.log("Loading WordNet data...");
1262
1317
  const lexicon2 = filePath ? await loadWordNet(filePath) : (await fetchWordNet({ onProgress: console.log })).lexicon;
1263
1318
  console.log(`Exporting to ${outputPath}...`);
1264
1319
  exportToSQLite(lexicon2, outputPath, {
1320
+ overwrite,
1265
1321
  onProgress: ({ phase, current, total }) => {
1266
1322
  process.stdout.write(`\r${phase}: ${current}/${total}`);
1267
1323
  }