synset 0.9.5 → 0.9.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -88,7 +88,7 @@ import { exportToSQLite } from 'synset'
88
88
  // Export to SQLite
89
89
  exportToSQLite(lexicon, 'dictionary.db', {
90
90
  onProgress: ({ phase, current, total }) => {
91
- // phases: words, synsets, word_synsets, synset_relations
91
+ // phases: words, synsets, word_synsets, synset_relations, sense_relations
92
92
  console.log(`${phase}: ${current}/${total}`)
93
93
  }
94
94
  })
@@ -103,9 +103,11 @@ Tables:
103
103
  - `synsets` - definitions with part of speech
104
104
  - `word_synsets` - word → synset mappings
105
105
  - `synset_relations` - hypernym, hyponym, meronym, etc. links between synsets
106
+ - `sense_relations` - antonym, derivation, pertainym, etc. links between word senses
106
107
 
107
- Example query for word hypernyms:
108
+ Example queries:
108
109
  ```sql
110
+ -- Hypernyms via synset relations (dog → canine, domestic animal)
109
111
  SELECT w2.word_display, s2.definition
110
112
  FROM words w
111
113
  JOIN word_synsets ws ON w.id = ws.word_id
@@ -114,7 +116,14 @@ JOIN synsets s2 ON sr.target_id = s2.id
114
116
  JOIN word_synsets ws2 ON s2.id = ws2.synset_id
115
117
  JOIN words w2 ON ws2.word_id = w2.id
116
118
  WHERE w.word = 'dog' AND sr.rel_type = 'hypernym';
117
- -- Returns: canine, domestic animal, ...
119
+
120
+ -- Antonyms via sense relations (happy → unhappy)
121
+ SELECT w2.word_display, s2.definition
122
+ FROM words w
123
+ JOIN sense_relations sr ON w.id = sr.source_word_id
124
+ JOIN words w2 ON sr.target_word_id = w2.id
125
+ JOIN synsets s2 ON sr.target_synset_id = s2.id
126
+ WHERE w.word = 'happy' AND sr.rel_type = 'antonym';
118
127
  ```
119
128
 
120
129
  ## Runtime
package/dist/cli.cjs CHANGED
@@ -24,8 +24,12 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
24
24
  ));
25
25
 
26
26
  // src/export-sqlite.ts
27
+ var import_node_fs = require("fs");
27
28
  var import_libsql = __toESM(require("libsql"), 1);
28
29
 
30
+ // src/helpers.ts
31
+ var import_entities = require("entities");
32
+
29
33
  // src/types.ts
30
34
  var import_zod = require("zod");
31
35
  var LexiconId = import_zod.z.string();
@@ -327,10 +331,7 @@ function LexiconNode(node) {
327
331
  };
328
332
  return Lexicon.parse(extendWithRestAttr(node, obj, (s) => s));
329
333
  }
330
- var decodeXmlEntities = (s) => {
331
- if (s === void 0) return void 0;
332
- return s.replace(/&amp;/g, "&").replace(/&lt;/g, "<").replace(/&gt;/g, ">").replace(/&apos;/g, "'").replace(/&quot;/g, '"');
333
- };
334
+ var decodeXmlEntities = (s) => s === void 0 ? void 0 : (0, import_entities.decodeXML)(s);
334
335
  var attr = (node, attrName) => {
335
336
  const value = decodeXmlEntities(node.attributes[attrName]);
336
337
  if (value === void 0) {
@@ -387,9 +388,28 @@ CREATE TABLE IF NOT EXISTS synset_relations (
387
388
  );
388
389
  CREATE INDEX IF NOT EXISTS idx_sr_source ON synset_relations(source_id);
389
390
  CREATE INDEX IF NOT EXISTS idx_sr_target ON synset_relations(target_id);
391
+
392
+ CREATE TABLE IF NOT EXISTS sense_relations (
393
+ source_word_id INTEGER NOT NULL,
394
+ source_synset_id TEXT NOT NULL,
395
+ target_word_id INTEGER NOT NULL,
396
+ target_synset_id TEXT NOT NULL,
397
+ rel_type TEXT NOT NULL,
398
+ PRIMARY KEY (source_word_id, source_synset_id, target_word_id, target_synset_id, rel_type)
399
+ );
400
+ CREATE INDEX IF NOT EXISTS idx_sense_rel_source ON sense_relations(source_word_id, source_synset_id);
390
401
  `;
391
402
  function exportToSQLite(lexicon, outputPath, options = {}) {
392
- const { onProgress } = options;
403
+ const { onProgress, overwrite } = options;
404
+ if ((0, import_node_fs.existsSync)(outputPath)) {
405
+ if (overwrite) {
406
+ (0, import_node_fs.unlinkSync)(outputPath);
407
+ } else {
408
+ throw new Error(
409
+ `File already exists: ${outputPath}. Use --overwrite to replace it.`
410
+ );
411
+ }
412
+ }
393
413
  const db = new import_libsql.default(outputPath);
394
414
  db.exec("PRAGMA journal_mode = OFF");
395
415
  db.exec("PRAGMA synchronous = OFF");
@@ -516,6 +536,66 @@ function exportToSQLite(lexicon, outputPath, options = {}) {
516
536
  }
517
537
  }
518
538
  db.exec("COMMIT");
539
+ const senseToWordSynset = /* @__PURE__ */ new Map();
540
+ for (const [word, entries] of wordToEntries) {
541
+ const wId = wordIds.get(word);
542
+ if (!wId) continue;
543
+ for (const entry of entries) {
544
+ for (const sense of entry.senses) {
545
+ senseToWordSynset.set(sense.id, {
546
+ wordId: wId,
547
+ synsetId: sense.synset
548
+ });
549
+ }
550
+ }
551
+ }
552
+ const insertSenseRelation = db.prepare(
553
+ "INSERT OR IGNORE INTO sense_relations (source_word_id, source_synset_id, target_word_id, target_synset_id, rel_type) VALUES (?, ?, ?, ?, ?)"
554
+ );
555
+ let totalSenseRelations = 0;
556
+ for (const entries of wordToEntries.values()) {
557
+ for (const entry of entries) {
558
+ for (const sense of entry.senses) {
559
+ for (const rel of sense.senseRelations) {
560
+ if (senseToWordSynset.has(rel.target)) {
561
+ totalSenseRelations++;
562
+ }
563
+ }
564
+ }
565
+ }
566
+ }
567
+ db.exec("BEGIN TRANSACTION");
568
+ let senseRelCount = 0;
569
+ for (const [word, entries] of wordToEntries) {
570
+ const sourceWordId = wordIds.get(word);
571
+ if (!sourceWordId) continue;
572
+ for (const entry of entries) {
573
+ for (const sense of entry.senses) {
574
+ const sourceSynsetId = sense.synset;
575
+ for (const rel of sense.senseRelations) {
576
+ const target = senseToWordSynset.get(rel.target);
577
+ if (target) {
578
+ insertSenseRelation.run(
579
+ sourceWordId,
580
+ sourceSynsetId,
581
+ target.wordId,
582
+ target.synsetId,
583
+ rel.relType
584
+ );
585
+ senseRelCount++;
586
+ if (onProgress && senseRelCount % 1e4 === 0) {
587
+ onProgress({
588
+ phase: "sense_relations",
589
+ current: senseRelCount,
590
+ total: totalSenseRelations
591
+ });
592
+ }
593
+ }
594
+ }
595
+ }
596
+ }
597
+ }
598
+ db.exec("COMMIT");
519
599
  db.close();
520
600
  }
521
601
 
@@ -631,7 +711,7 @@ var SynsetRelationRelType2 = {
631
711
  };
632
712
 
633
713
  // src/loader.ts
634
- var import_node_fs = require("fs");
714
+ var import_node_fs2 = require("fs");
635
715
  var import_node_path = __toESM(require("path"), 1);
636
716
  var import_node_stream = require("stream");
637
717
 
@@ -890,8 +970,8 @@ function getDefaultCacheDir() {
890
970
  return import_node_path.default.join(homeDir, ".cache", "synset");
891
971
  }
892
972
  function fileExists(filePath) {
893
- if ((0, import_node_fs.existsSync)(filePath)) {
894
- const stat = (0, import_node_fs.statSync)(filePath);
973
+ if ((0, import_node_fs2.existsSync)(filePath)) {
974
+ const stat = (0, import_node_fs2.statSync)(filePath);
895
975
  return stat.isFile();
896
976
  }
897
977
  return false;
@@ -909,8 +989,8 @@ function extractVersionFromFilename(filename) {
909
989
  return match ? parseInt(match[1], 10) : null;
910
990
  }
911
991
  function findCachedVersion(cacheDir) {
912
- if (!(0, import_node_fs.existsSync)(cacheDir)) return null;
913
- const files = (0, import_node_fs.readdirSync)(cacheDir);
992
+ if (!(0, import_node_fs2.existsSync)(cacheDir)) return null;
993
+ const files = (0, import_node_fs2.readdirSync)(cacheDir);
914
994
  const wordnetFiles = files.map((f) => ({ file: f, year: extractVersionFromFilename(f) })).filter((x) => x.year !== null).sort((a, b) => b.year - a.year);
915
995
  return wordnetFiles.length > 0 ? wordnetFiles[0].year.toString() : null;
916
996
  }
@@ -971,14 +1051,14 @@ async function downloadWordNet(version, destPath) {
971
1051
  );
972
1052
  const arrayBuffer = await new Response(decompressed).arrayBuffer();
973
1053
  const dir = import_node_path.default.dirname(destPath);
974
- if (!(0, import_node_fs.existsSync)(dir)) {
975
- (0, import_node_fs.mkdirSync)(dir, { recursive: true });
1054
+ if (!(0, import_node_fs2.existsSync)(dir)) {
1055
+ (0, import_node_fs2.mkdirSync)(dir, { recursive: true });
976
1056
  }
977
- (0, import_node_fs.writeFileSync)(destPath, Buffer.from(arrayBuffer));
1057
+ (0, import_node_fs2.writeFileSync)(destPath, Buffer.from(arrayBuffer));
978
1058
  }
979
1059
  function createParser(filePath) {
980
1060
  const resolvedPath = import_node_path.default.resolve(filePath);
981
- const nodeStream = (0, import_node_fs.createReadStream)(resolvedPath);
1061
+ const nodeStream = (0, import_node_fs2.createReadStream)(resolvedPath);
982
1062
  const webStream = import_node_stream.Readable.toWeb(nodeStream);
983
1063
  return parse(webStream, {
984
1064
  ignoreDeclaration: false,
@@ -1153,6 +1233,7 @@ Commands:
1153
1233
 
1154
1234
  Options:
1155
1235
  --file <path> Use a local WordNet XML file instead of cache
1236
+ --overwrite Overwrite existing file (for export-sqlite)
1156
1237
  --help, -h Show this help message
1157
1238
 
1158
1239
  Examples:
@@ -1188,10 +1269,12 @@ async function main() {
1188
1269
  console.error("Error: Missing output path for export-sqlite");
1189
1270
  process.exit(1);
1190
1271
  }
1272
+ const overwrite = args.includes("--overwrite");
1191
1273
  console.log("Loading WordNet data...");
1192
1274
  const lexicon2 = filePath ? await loadWordNet(filePath) : (await fetchWordNet({ onProgress: console.log })).lexicon;
1193
1275
  console.log(`Exporting to ${outputPath}...`);
1194
1276
  exportToSQLite(lexicon2, outputPath, {
1277
+ overwrite,
1195
1278
  onProgress: ({ phase, current, total }) => {
1196
1279
  process.stdout.write(`\r${phase}: ${current}/${total}`);
1197
1280
  }