synset 0.9.4 → 0.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -88,15 +88,44 @@ import { exportToSQLite } from 'synset'
88
88
  // Export to SQLite
89
89
  exportToSQLite(lexicon, 'dictionary.db', {
90
90
  onProgress: ({ phase, current, total }) => {
91
+ // phases: words, synsets, word_synsets, synset_relations, sense_relations
91
92
  console.log(`${phase}: ${current}/${total}`)
92
93
  }
93
94
  })
94
95
  ```
95
96
 
96
- Schema (`words`, `synsets`, `word_synsets` tables) is available as:
97
+ Schema is available as:
97
98
  - `import { SCHEMA } from 'synset'` - SQL string constant
98
99
  - `synset/schema.sql` - standalone file via package exports
99
100
 
101
+ Tables:
102
+ - `words` - unique words with display form
103
+ - `synsets` - definitions with part of speech
104
+ - `word_synsets` - word → synset mappings
105
+ - `synset_relations` - hypernym, hyponym, meronym, etc. links between synsets
106
+ - `sense_relations` - antonym, derivation, pertainym, etc. links between word senses
107
+
108
+ Example queries:
109
+ ```sql
110
+ -- Hypernyms via synset relations (dog → canine, domestic animal)
111
+ SELECT w2.word_display, s2.definition
112
+ FROM words w
113
+ JOIN word_synsets ws ON w.id = ws.word_id
114
+ JOIN synset_relations sr ON ws.synset_id = sr.source_id
115
+ JOIN synsets s2 ON sr.target_id = s2.id
116
+ JOIN word_synsets ws2 ON s2.id = ws2.synset_id
117
+ JOIN words w2 ON ws2.word_id = w2.id
118
+ WHERE w.word = 'dog' AND sr.rel_type = 'hypernym';
119
+
120
+ -- Antonyms via sense relations (happy → unhappy)
121
+ SELECT w2.word_display, s2.definition
122
+ FROM words w
123
+ JOIN sense_relations sr ON w.id = sr.source_word_id
124
+ JOIN words w2 ON sr.target_word_id = w2.id
125
+ JOIN synsets s2 ON sr.target_synset_id = s2.id
126
+ WHERE w.word = 'happy' AND sr.rel_type = 'antonym';
127
+ ```
128
+
100
129
  ## Runtime
101
130
 
102
131
  - **Bun**: Full support
package/dist/cli.cjs CHANGED
@@ -26,6 +26,9 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
26
26
  // src/export-sqlite.ts
27
27
  var import_libsql = __toESM(require("libsql"), 1);
28
28
 
29
+ // src/helpers.ts
30
+ var import_entities = require("entities");
31
+
29
32
  // src/types.ts
30
33
  var import_zod = require("zod");
31
34
  var LexiconId = import_zod.z.string();
@@ -327,10 +330,7 @@ function LexiconNode(node) {
327
330
  };
328
331
  return Lexicon.parse(extendWithRestAttr(node, obj, (s) => s));
329
332
  }
330
- var decodeXmlEntities = (s) => {
331
- if (s === void 0) return void 0;
332
- return s.replace(/&amp;/g, "&").replace(/&lt;/g, "<").replace(/&gt;/g, ">").replace(/&apos;/g, "'").replace(/&quot;/g, '"');
333
- };
333
+ var decodeXmlEntities = (s) => s === void 0 ? void 0 : (0, import_entities.decodeXML)(s);
334
334
  var attr = (node, attrName) => {
335
335
  const value = decodeXmlEntities(node.attributes[attrName]);
336
336
  if (value === void 0) {
@@ -378,6 +378,25 @@ CREATE TABLE IF NOT EXISTS word_synsets (
378
378
  PRIMARY KEY (word_id, synset_id)
379
379
  );
380
380
  CREATE INDEX IF NOT EXISTS idx_ws_word ON word_synsets(word_id);
381
+
382
+ CREATE TABLE IF NOT EXISTS synset_relations (
383
+ source_id TEXT NOT NULL,
384
+ target_id TEXT NOT NULL,
385
+ rel_type TEXT NOT NULL,
386
+ PRIMARY KEY (source_id, target_id, rel_type)
387
+ );
388
+ CREATE INDEX IF NOT EXISTS idx_sr_source ON synset_relations(source_id);
389
+ CREATE INDEX IF NOT EXISTS idx_sr_target ON synset_relations(target_id);
390
+
391
+ CREATE TABLE IF NOT EXISTS sense_relations (
392
+ source_word_id INTEGER NOT NULL,
393
+ source_synset_id TEXT NOT NULL,
394
+ target_word_id INTEGER NOT NULL,
395
+ target_synset_id TEXT NOT NULL,
396
+ rel_type TEXT NOT NULL,
397
+ PRIMARY KEY (source_word_id, source_synset_id, target_word_id, target_synset_id, rel_type)
398
+ );
399
+ CREATE INDEX IF NOT EXISTS idx_sense_rel_source ON sense_relations(source_word_id, source_synset_id);
381
400
  `;
382
401
  function exportToSQLite(lexicon, outputPath, options = {}) {
383
402
  const { onProgress } = options;
@@ -446,33 +465,127 @@ function exportToSQLite(lexicon, outputPath, options = {}) {
446
465
  }
447
466
  }
448
467
  db.exec("COMMIT");
449
- const insertRelation = db.prepare(
468
+ const insertWordSynset = db.prepare(
450
469
  "INSERT OR IGNORE INTO word_synsets (word_id, synset_id) VALUES (?, ?)"
451
470
  );
452
- let relationCount = 0;
453
- const totalRelations = Array.from(wordToEntries.values()).reduce(
471
+ let wsCount = 0;
472
+ const totalWordSynsets = Array.from(wordToEntries.values()).reduce(
454
473
  (sum, entries) => sum + entries.reduce((s, e) => s + e.senses.length, 0),
455
474
  0
456
475
  );
457
476
  db.exec("BEGIN TRANSACTION");
458
477
  for (const [word, entries] of wordToEntries) {
459
- const wordId2 = wordIds.get(word);
460
- if (!wordId2) continue;
478
+ const wId = wordIds.get(word);
479
+ if (!wId) continue;
461
480
  for (const entry of entries) {
462
481
  for (const sense of entry.senses) {
463
- insertRelation.run(wordId2, sense.synset);
464
- relationCount++;
465
- if (onProgress && relationCount % 1e4 === 0) {
482
+ insertWordSynset.run(wId, sense.synset);
483
+ wsCount++;
484
+ if (onProgress && wsCount % 1e4 === 0) {
466
485
  onProgress({
467
- phase: "relations",
468
- current: relationCount,
469
- total: totalRelations
486
+ phase: "word_synsets",
487
+ current: wsCount,
488
+ total: totalWordSynsets
470
489
  });
471
490
  }
472
491
  }
473
492
  }
474
493
  }
475
494
  db.exec("COMMIT");
495
+ const insertSynsetRelation = db.prepare(
496
+ "INSERT OR IGNORE INTO synset_relations (source_id, target_id, rel_type) VALUES (?, ?, ?)"
497
+ );
498
+ let totalSynsetRelations = 0;
499
+ for (const synsetId of usedSynsetIds) {
500
+ const synset = synsetMap.get(synsetId);
501
+ if (synset) {
502
+ for (const rel of synset.synsetRelations) {
503
+ if (usedSynsetIds.has(rel.target)) {
504
+ totalSynsetRelations++;
505
+ }
506
+ }
507
+ }
508
+ }
509
+ db.exec("BEGIN TRANSACTION");
510
+ let srCount = 0;
511
+ for (const synsetId of usedSynsetIds) {
512
+ const synset = synsetMap.get(synsetId);
513
+ if (!synset) continue;
514
+ for (const rel of synset.synsetRelations) {
515
+ if (usedSynsetIds.has(rel.target)) {
516
+ insertSynsetRelation.run(synsetId, rel.target, rel.relType);
517
+ srCount++;
518
+ if (onProgress && srCount % 1e4 === 0) {
519
+ onProgress({
520
+ phase: "synset_relations",
521
+ current: srCount,
522
+ total: totalSynsetRelations
523
+ });
524
+ }
525
+ }
526
+ }
527
+ }
528
+ db.exec("COMMIT");
529
+ const senseToWordSynset = /* @__PURE__ */ new Map();
530
+ for (const [word, entries] of wordToEntries) {
531
+ const wId = wordIds.get(word);
532
+ if (!wId) continue;
533
+ for (const entry of entries) {
534
+ for (const sense of entry.senses) {
535
+ senseToWordSynset.set(sense.id, {
536
+ wordId: wId,
537
+ synsetId: sense.synset
538
+ });
539
+ }
540
+ }
541
+ }
542
+ const insertSenseRelation = db.prepare(
543
+ "INSERT OR IGNORE INTO sense_relations (source_word_id, source_synset_id, target_word_id, target_synset_id, rel_type) VALUES (?, ?, ?, ?, ?)"
544
+ );
545
+ let totalSenseRelations = 0;
546
+ for (const entries of wordToEntries.values()) {
547
+ for (const entry of entries) {
548
+ for (const sense of entry.senses) {
549
+ for (const rel of sense.senseRelations) {
550
+ if (senseToWordSynset.has(rel.target)) {
551
+ totalSenseRelations++;
552
+ }
553
+ }
554
+ }
555
+ }
556
+ }
557
+ db.exec("BEGIN TRANSACTION");
558
+ let senseRelCount = 0;
559
+ for (const [word, entries] of wordToEntries) {
560
+ const sourceWordId = wordIds.get(word);
561
+ if (!sourceWordId) continue;
562
+ for (const entry of entries) {
563
+ for (const sense of entry.senses) {
564
+ const sourceSynsetId = sense.synset;
565
+ for (const rel of sense.senseRelations) {
566
+ const target = senseToWordSynset.get(rel.target);
567
+ if (target) {
568
+ insertSenseRelation.run(
569
+ sourceWordId,
570
+ sourceSynsetId,
571
+ target.wordId,
572
+ target.synsetId,
573
+ rel.relType
574
+ );
575
+ senseRelCount++;
576
+ if (onProgress && senseRelCount % 1e4 === 0) {
577
+ onProgress({
578
+ phase: "sense_relations",
579
+ current: senseRelCount,
580
+ total: totalSenseRelations
581
+ });
582
+ }
583
+ }
584
+ }
585
+ }
586
+ }
587
+ }
588
+ db.exec("COMMIT");
476
589
  db.close();
477
590
  }
478
591