synset 0.9.0 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.cjs CHANGED
@@ -23,251 +23,8 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
23
23
  mod
24
24
  ));
25
25
 
26
- // src/loader.ts
27
- var import_node_fs = require("fs");
28
- var import_node_path = __toESM(require("path"), 1);
29
-
30
- // node_modules/@dbushell/xml-streamify/src/node.ts
31
- var Node = class {
32
- #type;
33
- #children;
34
- #parent;
35
- #attr;
36
- #raw;
37
- constructor(type, parent, raw) {
38
- this.#type = type;
39
- this.#parent = parent;
40
- this.#raw = raw;
41
- this.#children = [];
42
- }
43
- get type() {
44
- return this.#type;
45
- }
46
- get raw() {
47
- return this.#raw ?? "";
48
- }
49
- get parent() {
50
- return this.#parent;
51
- }
52
- get children() {
53
- return this.#children;
54
- }
55
- get attributes() {
56
- if (this.#attr) {
57
- return this.#attr;
58
- }
59
- this.#attr = {};
60
- if (this.raw) {
61
- const regex = /([\w:.-]+)\s*=\s*(["'])(.*?)\2/g;
62
- let match;
63
- while ((match = regex.exec(this.raw)) !== null) {
64
- this.#attr[match[1]] = match[3];
65
- }
66
- }
67
- return this.#attr;
68
- }
69
- get innerText() {
70
- if (this.children.length) {
71
- let text = "";
72
- for (const child of this.children) {
73
- text += child.innerText;
74
- }
75
- return text;
76
- }
77
- return (this.raw.match(/<!\[CDATA\[(.*?)]]>/s) ?? [, this.raw])[1];
78
- }
79
- addChild(child) {
80
- this.#children.push(child);
81
- }
82
- /**
83
- * Returns true if node and parents match the key hierarchy
84
- * @param keys - XML tag names
85
- */
86
- is(...keys) {
87
- if (!keys.length) return false;
88
- let parent;
89
- for (const key of keys.toReversed()) {
90
- parent = parent ? parent.parent : this;
91
- if (parent?.type !== key) {
92
- return false;
93
- }
94
- }
95
- return true;
96
- }
97
- /**
98
- * Return the first child matching the key
99
- * @param key - XML tag name
100
- */
101
- first(key) {
102
- return this.children.find((n) => n.type === key);
103
- }
104
- /**
105
- * Return all children matching the key hierarchy
106
- * @param keys - XML tag names
107
- */
108
- all(...keys) {
109
- let nodes = this.children;
110
- let found = [];
111
- for (const [i, k] of Object.entries(keys)) {
112
- if (Number.parseInt(i) === keys.length - 1) {
113
- found = nodes.filter((n) => n.type === k);
114
- break;
115
- }
116
- nodes = nodes?.find((n) => n.type === k)?.children;
117
- if (!nodes) return [];
118
- }
119
- return found;
120
- }
121
- };
122
-
123
- // node_modules/@dbushell/xml-streamify/src/stream.ts
124
- var ENTITIES = {
125
- cdata: {
126
- end: "]]>",
127
- start: /^<!\[CDATA\[/
128
- },
129
- comment: {
130
- end: "-->",
131
- start: /^<!--/
132
- },
133
- declaration: {
134
- end: "?>",
135
- start: /^<\?/
136
- },
137
- doctype: {
138
- end: ">",
139
- start: /^<!DOCTYPE/i
140
- },
141
- element: {
142
- end: ">",
143
- start: /^<[\w:.-/]/
144
- }
145
- };
146
- var transformer = {
147
- buf: "",
148
- state: "skip" /* SKIP */,
149
- previous: ["skip" /* SKIP */, -1],
150
- flush(controller) {
151
- if (this.buf.length > 0) {
152
- controller.enqueue(["text" /* TEXT */, this.buf]);
153
- }
154
- },
155
- transform(chunk, controller) {
156
- this.buf += chunk;
157
- while (this.buf.length) {
158
- if (this.state === this.previous[0] && this.buf.length === this.previous[1]) {
159
- break;
160
- }
161
- this.previous = [this.state, this.buf.length];
162
- if (this.state === "skip" /* SKIP */) {
163
- const index = this.buf.indexOf("<");
164
- if (index < 0) break;
165
- controller.enqueue(["text" /* TEXT */, this.buf.substring(0, index)]);
166
- this.buf = this.buf.substring(index);
167
- this.state = "search" /* SEARCH */;
168
- }
169
- if (this.state === "search" /* SEARCH */) {
170
- if (this.buf.length < 3) break;
171
- for (const [state, entity] of Object.entries(ENTITIES)) {
172
- if (this.buf.match(entity.start)) {
173
- this.state = state;
174
- break;
175
- }
176
- }
177
- continue;
178
- }
179
- if (Object.hasOwn(ENTITIES, this.state)) {
180
- const { end } = ENTITIES[this.state];
181
- const index = this.buf.indexOf(end);
182
- if (index < 0) break;
183
- controller.enqueue([
184
- this.state,
185
- this.buf.substring(0, index + end.length)
186
- ]);
187
- this.buf = this.buf.substring(index + end.length);
188
- this.state = "skip" /* SKIP */;
189
- continue;
190
- }
191
- throw new Error();
192
- }
193
- }
194
- };
195
- var XMLStream = class extends TransformStream {
196
- constructor() {
197
- super({ ...transformer });
198
- }
199
- };
200
-
201
- // node_modules/@dbushell/xml-streamify/src/parse.ts
202
- var ignoreTypes = {
203
- ["comment" /* COMMENT */]: "ignoreComments",
204
- ["declaration" /* DECLARATION */]: "ignoreDeclaration",
205
- ["doctype" /* DOCTYPE */]: "ignoreDoctype"
206
- };
207
- async function* parse(input, options) {
208
- const document = new Node("@document");
209
- try {
210
- const init = { ...options?.fetchOptions };
211
- if (options?.signal) {
212
- init.signal = options.signal;
213
- }
214
- let source;
215
- if (typeof input === "string" || input instanceof URL) {
216
- input = new URL(input);
217
- const response = await fetch(input, init);
218
- if (!response.ok || !response.body) {
219
- throw new Error(`Bad response`);
220
- }
221
- source = response.body;
222
- } else {
223
- source = input;
224
- }
225
- const stream = source.pipeThrough(new TextDecoderStream()).pipeThrough(new XMLStream(), {
226
- signal: options?.signal
227
- });
228
- let node = document;
229
- for await (const [type, value] of stream) {
230
- if (options?.signal?.aborted) {
231
- break;
232
- }
233
- if (type === "text" /* TEXT */) {
234
- if (options?.ignoreWhitespace !== false && value.trim().length === 0) {
235
- continue;
236
- }
237
- }
238
- if (type in ignoreTypes && options?.[ignoreTypes[type]] === false) {
239
- const newNode = new Node(type, node, value);
240
- node.addChild(newNode);
241
- yield newNode;
242
- continue;
243
- }
244
- if (type === "element" /* ELEMENT */) {
245
- const name = value.match(/<\/?([\w:.-]+)/)[1];
246
- if (value.endsWith("/>")) {
247
- const newNode2 = new Node(name, node, value);
248
- node.addChild(newNode2);
249
- yield newNode2;
250
- continue;
251
- }
252
- if (value.startsWith("</")) {
253
- yield node;
254
- node = node.parent;
255
- continue;
256
- }
257
- const newNode = new Node(name, node, value);
258
- node.addChild(newNode);
259
- node = newNode;
260
- continue;
261
- }
262
- node.addChild(new Node(type, node, value));
263
- }
264
- } catch (err) {
265
- if (options?.silent === false) {
266
- throw err;
267
- }
268
- }
269
- return document;
270
- }
26
+ // src/export-sqlite.ts
27
+ var import_bun_sqlite = require("bun:sqlite");
271
28
 
272
29
  // src/types.ts
273
30
  var import_zod = require("zod");
@@ -434,7 +191,9 @@ var Lexicon = import_zod.z.object({
434
191
  synsets: import_zod.z.array(Synset).min(0),
435
192
  syntacticBehaviors: import_zod.z.array(SyntacticBehavior).min(0)
436
193
  });
437
- var partsOfSpeechList = PartsOfSpeech.options.map((v) => v.value);
194
+ var partsOfSpeechList = PartsOfSpeech.options.map(
195
+ (v) => v.value
196
+ );
438
197
 
439
198
  // src/helpers.ts
440
199
  function PronunciationNode(node) {
@@ -462,7 +221,7 @@ function SenseRelationNode(node) {
462
221
  dcType: optAttr(node, "dc:type")
463
222
  };
464
223
  return SenseRelation.parse(
465
- extendWithRestAttr(node, obj, (s) => s == "dc:type" ? "dcType" : s)
224
+ extendWithRestAttr(node, obj, (s) => s === "dc:type" ? "dcType" : s)
466
225
  );
467
226
  }
468
227
  function SenseNode(node) {
@@ -478,7 +237,7 @@ function SenseNode(node) {
478
237
  extendWithRestAttr(
479
238
  node,
480
239
  obj,
481
- (s) => s == "subcat" ? "subCat" : s == "adjposition" ? "adjPosition" : s
240
+ (s) => s === "subcat" ? "subCat" : s === "adjposition" ? "adjPosition" : s
482
241
  )
483
242
  );
484
243
  }
@@ -509,7 +268,7 @@ function ExampleNode(node) {
509
268
  dcSource: optAttr(node, "dc:source")
510
269
  };
511
270
  return Example.parse(
512
- extendWithRestAttr(node, obj, (s) => s == "dc:source" ? "dcSource" : s)
271
+ extendWithRestAttr(node, obj, (s) => s === "dc:source" ? "dcSource" : s)
513
272
  );
514
273
  }
515
274
  function ILIDefinitionNode(node) {
@@ -546,7 +305,7 @@ function SynsetNode(node) {
546
305
  synsetRelations: children(node, "SynsetRelation", SynsetRelationNode)
547
306
  };
548
307
  return Synset.parse(
549
- extendWithRestAttr(node, obj, (s) => s == "dc:source" ? "dcSource" : s)
308
+ extendWithRestAttr(node, obj, (s) => s === "dc:source" ? "dcSource" : s)
550
309
  );
551
310
  }
552
311
  function LexiconNode(node) {
@@ -575,7 +334,9 @@ var decodeXmlEntities = (s) => {
575
334
  var attr = (node, attrName) => {
576
335
  const value = decodeXmlEntities(node.attributes[attrName]);
577
336
  if (value === void 0) {
578
- throw new Error(`Missing required attribute "${attrName}" on node "${node.type}"`);
337
+ throw new Error(
338
+ `Missing required attribute "${attrName}" on node "${node.type}"`
339
+ );
579
340
  }
580
341
  return value;
581
342
  };
@@ -593,20 +354,497 @@ var extendWithRestAttr = (node, obj, proxy) => {
593
354
  return Object.assign(obj, restAttrs(node, obj, proxy));
594
355
  };
595
356
  var children = (node, type, fn) => {
596
- return node.children.filter((v) => v.type == type).map((v) => fn(v));
357
+ return node.children.filter((v) => v.type === type).map((v) => fn(v));
597
358
  };
598
359
 
599
- // src/loader.ts
600
- var BASE_VERSION = "2024";
601
- function getFilename(version) {
602
- return `english-wordnet-${version}.xml`;
603
- }
604
- function getDownloadUrl(version) {
605
- return `https://en-word.net/static/${getFilename(version)}.gz`;
606
- }
607
- function getDefaultCacheDir() {
608
- const homeDir = process.env.HOME || process.env.USERPROFILE || ".";
609
- return import_node_path.default.join(homeDir, ".cache", "synset");
360
+ // src/export-sqlite.ts
361
+ var SCHEMA = `
362
+ CREATE TABLE IF NOT EXISTS words (
363
+ id INTEGER PRIMARY KEY,
364
+ word TEXT NOT NULL,
365
+ word_display TEXT NOT NULL
366
+ );
367
+ CREATE INDEX IF NOT EXISTS idx_words_word ON words(word);
368
+
369
+ CREATE TABLE IF NOT EXISTS synsets (
370
+ id TEXT PRIMARY KEY,
371
+ pos TEXT NOT NULL,
372
+ definition TEXT NOT NULL
373
+ );
374
+
375
+ CREATE TABLE IF NOT EXISTS word_synsets (
376
+ word_id INTEGER NOT NULL,
377
+ synset_id TEXT NOT NULL,
378
+ PRIMARY KEY (word_id, synset_id)
379
+ );
380
+ CREATE INDEX IF NOT EXISTS idx_ws_word ON word_synsets(word_id);
381
+ `;
382
+ function exportToSQLite(lexicon, outputPath, options = {}) {
383
+ const { onProgress } = options;
384
+ const db = new import_bun_sqlite.Database(outputPath, { create: true });
385
+ db.exec("PRAGMA journal_mode = OFF");
386
+ db.exec("PRAGMA synchronous = OFF");
387
+ db.exec(SCHEMA);
388
+ const wordToEntries = /* @__PURE__ */ new Map();
389
+ for (const entry of lexicon.lexicalEntries) {
390
+ const word = entry.lemmas[0]?.writtenForm;
391
+ if (word) {
392
+ const lower = word.toLowerCase();
393
+ const existing = wordToEntries.get(lower) || [];
394
+ existing.push(entry);
395
+ wordToEntries.set(lower, existing);
396
+ }
397
+ }
398
+ const synsetMap = /* @__PURE__ */ new Map();
399
+ for (const synset of lexicon.synsets) {
400
+ synsetMap.set(synset.id, synset);
401
+ }
402
+ const insertWord = db.prepare(
403
+ "INSERT INTO words (word, word_display) VALUES (?, ?)"
404
+ );
405
+ const wordIds = /* @__PURE__ */ new Map();
406
+ const words = Array.from(wordToEntries.keys()).sort();
407
+ const totalWords = words.length;
408
+ db.exec("BEGIN TRANSACTION");
409
+ let wordId = 0;
410
+ for (let i = 0; i < words.length; i++) {
411
+ const word = words[i];
412
+ const entries = wordToEntries.get(word);
413
+ if (!entries) continue;
414
+ const display = entries[0].lemmas[0]?.writtenForm || word;
415
+ insertWord.run(word, display);
416
+ wordId++;
417
+ wordIds.set(word, wordId);
418
+ if (onProgress && i % 1e4 === 0) {
419
+ onProgress({ phase: "words", current: i, total: totalWords });
420
+ }
421
+ }
422
+ db.exec("COMMIT");
423
+ const usedSynsetIds = /* @__PURE__ */ new Set();
424
+ for (const entries of wordToEntries.values()) {
425
+ for (const entry of entries) {
426
+ for (const sense of entry.senses) {
427
+ usedSynsetIds.add(sense.synset);
428
+ }
429
+ }
430
+ }
431
+ const insertSynset = db.prepare(
432
+ "INSERT OR IGNORE INTO synsets (id, pos, definition) VALUES (?, ?, ?)"
433
+ );
434
+ const synsetList = Array.from(usedSynsetIds);
435
+ const totalSynsets = synsetList.length;
436
+ db.exec("BEGIN TRANSACTION");
437
+ for (let i = 0; i < synsetList.length; i++) {
438
+ const synsetId = synsetList[i];
439
+ const synset = synsetMap.get(synsetId);
440
+ if (synset) {
441
+ const def = decodeXmlEntities(synset.definitions[0]?.inner) || "";
442
+ insertSynset.run(synsetId, synset.partOfSpeech, def);
443
+ }
444
+ if (onProgress && i % 1e4 === 0) {
445
+ onProgress({ phase: "synsets", current: i, total: totalSynsets });
446
+ }
447
+ }
448
+ db.exec("COMMIT");
449
+ const insertRelation = db.prepare(
450
+ "INSERT OR IGNORE INTO word_synsets (word_id, synset_id) VALUES (?, ?)"
451
+ );
452
+ let relationCount = 0;
453
+ const totalRelations = Array.from(wordToEntries.values()).reduce(
454
+ (sum, entries) => sum + entries.reduce((s, e) => s + e.senses.length, 0),
455
+ 0
456
+ );
457
+ db.exec("BEGIN TRANSACTION");
458
+ for (const [word, entries] of wordToEntries) {
459
+ const wordId2 = wordIds.get(word);
460
+ if (!wordId2) continue;
461
+ for (const entry of entries) {
462
+ for (const sense of entry.senses) {
463
+ insertRelation.run(wordId2, sense.synset);
464
+ relationCount++;
465
+ if (onProgress && relationCount % 1e4 === 0) {
466
+ onProgress({
467
+ phase: "relations",
468
+ current: relationCount,
469
+ total: totalRelations
470
+ });
471
+ }
472
+ }
473
+ }
474
+ }
475
+ db.exec("COMMIT");
476
+ db.close();
477
+ }
478
+
479
+ // src/literals.ts
480
+ var PartsOfSpeech2 = {
481
+ n: "Noun",
482
+ v: "Verb",
483
+ a: "Adjective",
484
+ r: "Adverb",
485
+ s: "Adjective Satellite",
486
+ t: "?",
487
+ c: "Conjunction",
488
+ p: "Adposition (Preposition, postposition, etc.)",
489
+ x: "Other (inc. particle, classifier, bound morphemes, determiners)",
490
+ u: "Unknown"
491
+ };
492
+ var SynsetRelationRelType2 = {
493
+ agent: "Agent",
494
+ also: "See also",
495
+ anto_converse: "Converse antonym",
496
+ anto_gradable: "Gradable antonym",
497
+ anto_simple: "Simple antonym",
498
+ antonym: "Antonym",
499
+ attribute: "Attribute",
500
+ augmentative: "Augmentative",
501
+ be_in_state: "Be in state",
502
+ cause: "Cause",
503
+ causes: "Causes",
504
+ classified_by: "Classified by",
505
+ classifies: "Classifies",
506
+ co_agent_instrument: "Co-agent instrument",
507
+ co_agent_patient: "Co-agent patient",
508
+ co_agent_result: "Co-agent result",
509
+ co_instrument_agent: "Co-instrument agent",
510
+ co_instrument_patient: "Co-instrument patient",
511
+ co_instrument_result: "Co-instrument result",
512
+ co_patient_agent: "Co-patient agent",
513
+ co_patient_instrument: "Co-patient instrument",
514
+ co_result_agent: "Co-result agent",
515
+ co_result_instrument: "Co-result instrument",
516
+ co_role: "Co-role",
517
+ diminutive: "Diminutive",
518
+ direction: "Direction",
519
+ domain_member_region: "Domain member region",
520
+ domain_member_topic: "Domain member topic",
521
+ domain_region: "Domain region",
522
+ domain_topic: "Domain topic",
523
+ entail: "Entail",
524
+ entails: "Entails",
525
+ eq_synonym: "Equivalent synonym",
526
+ exemplifies: "Exemplifies",
527
+ feminine: "Feminine",
528
+ has_augmentative: "Has augmentative",
529
+ has_diminutive: "Has diminutive",
530
+ has_domain_region: "Has domain region",
531
+ has_domain_topic: "Has domain topic",
532
+ has_feminine: "Has feminine",
533
+ has_masculine: "Has masculine",
534
+ has_young: "Has young",
535
+ holo_location: "Holonym location",
536
+ holo_member: "Member holonym",
537
+ holo_part: "Part holonym",
538
+ holo_portion: "Portion holonym",
539
+ holo_substance: "Substance holonym",
540
+ holonym: "Holonym",
541
+ hypernym: "Hypernym",
542
+ hyponym: "Hyponym",
543
+ in_manner: "In manner",
544
+ instance_hypernym: "Instance hypernym",
545
+ instance_hyponym: "Instance hyponym",
546
+ instrument: "Instrument",
547
+ involved: "Involved",
548
+ involved_agent: "Involved agent",
549
+ involved_direction: "Involved direction",
550
+ involved_instrument: "Involved instrument",
551
+ involved_location: "Involved location",
552
+ involved_patient: "Involved patient",
553
+ involved_result: "Involved result",
554
+ involved_source_direction: "Involved source direction",
555
+ involved_target_direction: "Involved target direction",
556
+ ir_synonym: "IR synonym",
557
+ is_caused_by: "Is caused by",
558
+ is_entailed_by: "Is entailed by",
559
+ is_exemplified_by: "Is exemplified by",
560
+ is_subevent_of: "Is subevent of",
561
+ location: "Location",
562
+ manner_of: "Manner of",
563
+ masculine: "Masculine",
564
+ member_holonym: "Member holonym",
565
+ member_meronym: "Member meronym",
566
+ mero_location: "Meronym location",
567
+ mero_member: "Member meronym",
568
+ mero_part: "Part meronym",
569
+ mero_portion: "Portion meronym",
570
+ mero_substance: "Substance meronym",
571
+ meronym: "Meronym",
572
+ other: "Other",
573
+ part_holonym: "Part holonym",
574
+ part_meronym: "Part meronym",
575
+ patient: "Patient",
576
+ restricted_by: "Restricted by",
577
+ restricts: "Restricts",
578
+ result: "Result",
579
+ role: "Role",
580
+ similar: "Similar",
581
+ source_direction: "Source direction",
582
+ state_of: "State of",
583
+ subevent: "Subevent",
584
+ substance_holonym: "Substance holonym",
585
+ substance_meronym: "Substance meronym",
586
+ target_direction: "Target direction",
587
+ young: "Young"
588
+ };
589
+
590
+ // src/loader.ts
591
+ var import_node_fs = require("fs");
592
+ var import_node_path = __toESM(require("path"), 1);
593
+ var import_node_stream = require("stream");
594
+
595
+ // node_modules/@dbushell/xml-streamify/src/node.ts
596
+ var Node = class {
597
+ #type;
598
+ #children;
599
+ #parent;
600
+ #attr;
601
+ #raw;
602
+ constructor(type, parent, raw) {
603
+ this.#type = type;
604
+ this.#parent = parent;
605
+ this.#raw = raw;
606
+ this.#children = [];
607
+ }
608
+ get type() {
609
+ return this.#type;
610
+ }
611
+ get raw() {
612
+ return this.#raw ?? "";
613
+ }
614
+ get parent() {
615
+ return this.#parent;
616
+ }
617
+ get children() {
618
+ return this.#children;
619
+ }
620
+ get attributes() {
621
+ if (this.#attr) {
622
+ return this.#attr;
623
+ }
624
+ this.#attr = {};
625
+ if (this.raw) {
626
+ const regex = /([\w:.-]+)\s*=\s*(["'])(.*?)\2/g;
627
+ let match;
628
+ while ((match = regex.exec(this.raw)) !== null) {
629
+ this.#attr[match[1]] = match[3];
630
+ }
631
+ }
632
+ return this.#attr;
633
+ }
634
+ get innerText() {
635
+ if (this.children.length) {
636
+ let text = "";
637
+ for (const child of this.children) {
638
+ text += child.innerText;
639
+ }
640
+ return text;
641
+ }
642
+ return (this.raw.match(/<!\[CDATA\[(.*?)]]>/s) ?? [, this.raw])[1];
643
+ }
644
+ addChild(child) {
645
+ this.#children.push(child);
646
+ }
647
+ /**
648
+ * Returns true if node and parents match the key hierarchy
649
+ * @param keys - XML tag names
650
+ */
651
+ is(...keys) {
652
+ if (!keys.length) return false;
653
+ let parent;
654
+ for (const key of keys.toReversed()) {
655
+ parent = parent ? parent.parent : this;
656
+ if (parent?.type !== key) {
657
+ return false;
658
+ }
659
+ }
660
+ return true;
661
+ }
662
+ /**
663
+ * Return the first child matching the key
664
+ * @param key - XML tag name
665
+ */
666
+ first(key) {
667
+ return this.children.find((n) => n.type === key);
668
+ }
669
+ /**
670
+ * Return all children matching the key hierarchy
671
+ * @param keys - XML tag names
672
+ */
673
+ all(...keys) {
674
+ let nodes = this.children;
675
+ let found = [];
676
+ for (const [i, k] of Object.entries(keys)) {
677
+ if (Number.parseInt(i) === keys.length - 1) {
678
+ found = nodes.filter((n) => n.type === k);
679
+ break;
680
+ }
681
+ nodes = nodes?.find((n) => n.type === k)?.children;
682
+ if (!nodes) return [];
683
+ }
684
+ return found;
685
+ }
686
+ };
687
+
688
+ // node_modules/@dbushell/xml-streamify/src/stream.ts
689
+ var ENTITIES = {
690
+ cdata: {
691
+ end: "]]>",
692
+ start: /^<!\[CDATA\[/
693
+ },
694
+ comment: {
695
+ end: "-->",
696
+ start: /^<!--/
697
+ },
698
+ declaration: {
699
+ end: "?>",
700
+ start: /^<\?/
701
+ },
702
+ doctype: {
703
+ end: ">",
704
+ start: /^<!DOCTYPE/i
705
+ },
706
+ element: {
707
+ end: ">",
708
+ start: /^<[\w:.-/]/
709
+ }
710
+ };
711
+ var transformer = {
712
+ buf: "",
713
+ state: "skip" /* SKIP */,
714
+ previous: ["skip" /* SKIP */, -1],
715
+ flush(controller) {
716
+ if (this.buf.length > 0) {
717
+ controller.enqueue(["text" /* TEXT */, this.buf]);
718
+ }
719
+ },
720
+ transform(chunk, controller) {
721
+ this.buf += chunk;
722
+ while (this.buf.length) {
723
+ if (this.state === this.previous[0] && this.buf.length === this.previous[1]) {
724
+ break;
725
+ }
726
+ this.previous = [this.state, this.buf.length];
727
+ if (this.state === "skip" /* SKIP */) {
728
+ const index = this.buf.indexOf("<");
729
+ if (index < 0) break;
730
+ controller.enqueue(["text" /* TEXT */, this.buf.substring(0, index)]);
731
+ this.buf = this.buf.substring(index);
732
+ this.state = "search" /* SEARCH */;
733
+ }
734
+ if (this.state === "search" /* SEARCH */) {
735
+ if (this.buf.length < 3) break;
736
+ for (const [state, entity] of Object.entries(ENTITIES)) {
737
+ if (this.buf.match(entity.start)) {
738
+ this.state = state;
739
+ break;
740
+ }
741
+ }
742
+ continue;
743
+ }
744
+ if (Object.hasOwn(ENTITIES, this.state)) {
745
+ const { end } = ENTITIES[this.state];
746
+ const index = this.buf.indexOf(end);
747
+ if (index < 0) break;
748
+ controller.enqueue([
749
+ this.state,
750
+ this.buf.substring(0, index + end.length)
751
+ ]);
752
+ this.buf = this.buf.substring(index + end.length);
753
+ this.state = "skip" /* SKIP */;
754
+ continue;
755
+ }
756
+ throw new Error();
757
+ }
758
+ }
759
+ };
760
+ var XMLStream = class extends TransformStream {
761
+ constructor() {
762
+ super({ ...transformer });
763
+ }
764
+ };
765
+
766
+ // node_modules/@dbushell/xml-streamify/src/parse.ts
767
+ var ignoreTypes = {
768
+ ["comment" /* COMMENT */]: "ignoreComments",
769
+ ["declaration" /* DECLARATION */]: "ignoreDeclaration",
770
+ ["doctype" /* DOCTYPE */]: "ignoreDoctype"
771
+ };
772
+ async function* parse(input, options) {
773
+ const document = new Node("@document");
774
+ try {
775
+ const init = { ...options?.fetchOptions };
776
+ if (options?.signal) {
777
+ init.signal = options.signal;
778
+ }
779
+ let source;
780
+ if (typeof input === "string" || input instanceof URL) {
781
+ input = new URL(input);
782
+ const response = await fetch(input, init);
783
+ if (!response.ok || !response.body) {
784
+ throw new Error(`Bad response`);
785
+ }
786
+ source = response.body;
787
+ } else {
788
+ source = input;
789
+ }
790
+ const stream = source.pipeThrough(new TextDecoderStream()).pipeThrough(new XMLStream(), {
791
+ signal: options?.signal
792
+ });
793
+ let node = document;
794
+ for await (const [type, value] of stream) {
795
+ if (options?.signal?.aborted) {
796
+ break;
797
+ }
798
+ if (type === "text" /* TEXT */) {
799
+ if (options?.ignoreWhitespace !== false && value.trim().length === 0) {
800
+ continue;
801
+ }
802
+ }
803
+ if (type in ignoreTypes && options?.[ignoreTypes[type]] === false) {
804
+ const newNode = new Node(type, node, value);
805
+ node.addChild(newNode);
806
+ yield newNode;
807
+ continue;
808
+ }
809
+ if (type === "element" /* ELEMENT */) {
810
+ const name = value.match(/<\/?([\w:.-]+)/)[1];
811
+ if (value.endsWith("/>")) {
812
+ const newNode2 = new Node(name, node, value);
813
+ node.addChild(newNode2);
814
+ yield newNode2;
815
+ continue;
816
+ }
817
+ if (value.startsWith("</")) {
818
+ yield node;
819
+ node = node.parent;
820
+ continue;
821
+ }
822
+ const newNode = new Node(name, node, value);
823
+ node.addChild(newNode);
824
+ node = newNode;
825
+ continue;
826
+ }
827
+ node.addChild(new Node(type, node, value));
828
+ }
829
+ } catch (err) {
830
+ if (options?.silent === false) {
831
+ throw err;
832
+ }
833
+ }
834
+ return document;
835
+ }
836
+
837
+ // src/loader.ts
838
+ var BASE_VERSION = "2024";
839
+ function getFilename(version) {
840
+ return `english-wordnet-${version}.xml`;
841
+ }
842
+ function getDownloadUrl(version) {
843
+ return `https://en-word.net/static/${getFilename(version)}.gz`;
844
+ }
845
+ function getDefaultCacheDir() {
846
+ const homeDir = process.env.HOME || process.env.USERPROFILE || ".";
847
+ return import_node_path.default.join(homeDir, ".cache", "synset");
610
848
  }
611
849
  function fileExists(filePath) {
612
850
  if ((0, import_node_fs.existsSync)(filePath)) {
@@ -661,7 +899,6 @@ async function findLatestVersion(onProgress, cacheDir) {
661
899
  for (let year = baseYear + 1; year <= lastReleasableYear; year++) {
662
900
  const version = year.toString();
663
901
  if (await urlExists(getDownloadUrl(version))) {
664
- continue;
665
902
  } else {
666
903
  return (year - 1).toString();
667
904
  }
@@ -682,9 +919,13 @@ async function downloadWordNet(version, destPath) {
682
919
  const url = getDownloadUrl(version);
683
920
  const response = await fetch(url);
684
921
  if (!response.ok || !response.body) {
685
- throw new Error(`Failed to download WordNet ${version}: ${response.statusText}`);
922
+ throw new Error(
923
+ `Failed to download WordNet ${version}: ${response.statusText}`
924
+ );
686
925
  }
687
- const decompressed = response.body.pipeThrough(new DecompressionStream("gzip"));
926
+ const decompressed = response.body.pipeThrough(
927
+ new DecompressionStream("gzip")
928
+ );
688
929
  const arrayBuffer = await new Response(decompressed).arrayBuffer();
689
930
  const dir = import_node_path.default.dirname(destPath);
690
931
  if (!(0, import_node_fs.existsSync)(dir)) {
@@ -694,8 +935,9 @@ async function downloadWordNet(version, destPath) {
694
935
  }
695
936
  function createParser(filePath) {
696
937
  const resolvedPath = import_node_path.default.resolve(filePath);
697
- const fileUrl = resolvedPath.startsWith("/") ? `file://${resolvedPath}` : `file:///${resolvedPath.replace(/\\/g, "/")}`;
698
- return parse(fileUrl, {
938
+ const nodeStream = (0, import_node_fs.createReadStream)(resolvedPath);
939
+ const webStream = import_node_stream.Readable.toWeb(nodeStream);
940
+ return parse(webStream, {
699
941
  ignoreDeclaration: false,
700
942
  silent: false
701
943
  });
@@ -848,117 +1090,6 @@ function getSynsetWords(index, synset) {
848
1090
  return synset.members.map((id) => index.entries.get(id)).filter((e) => e !== void 0).map((e) => e.lemmas[0]?.writtenForm).filter((w) => w !== void 0);
849
1091
  }
850
1092
 
851
- // src/literals.ts
852
- var PartsOfSpeech2 = {
853
- n: "Noun",
854
- v: "Verb",
855
- a: "Adjective",
856
- r: "Adverb",
857
- s: "Adjective Satellite",
858
- t: "?",
859
- c: "Conjunction",
860
- p: "Adposition (Preposition, postposition, etc.)",
861
- x: "Other (inc. particle, classifier, bound morphemes, determiners)",
862
- u: "Unknown"
863
- };
864
- var SynsetRelationRelType2 = {
865
- agent: "Agent",
866
- also: "See also",
867
- anto_converse: "Converse antonym",
868
- anto_gradable: "Gradable antonym",
869
- anto_simple: "Simple antonym",
870
- antonym: "Antonym",
871
- attribute: "Attribute",
872
- augmentative: "Augmentative",
873
- be_in_state: "Be in state",
874
- cause: "Cause",
875
- causes: "Causes",
876
- classified_by: "Classified by",
877
- classifies: "Classifies",
878
- co_agent_instrument: "Co-agent instrument",
879
- co_agent_patient: "Co-agent patient",
880
- co_agent_result: "Co-agent result",
881
- co_instrument_agent: "Co-instrument agent",
882
- co_instrument_patient: "Co-instrument patient",
883
- co_instrument_result: "Co-instrument result",
884
- co_patient_agent: "Co-patient agent",
885
- co_patient_instrument: "Co-patient instrument",
886
- co_result_agent: "Co-result agent",
887
- co_result_instrument: "Co-result instrument",
888
- co_role: "Co-role",
889
- diminutive: "Diminutive",
890
- direction: "Direction",
891
- domain_member_region: "Domain member region",
892
- domain_member_topic: "Domain member topic",
893
- domain_region: "Domain region",
894
- domain_topic: "Domain topic",
895
- entail: "Entail",
896
- entails: "Entails",
897
- eq_synonym: "Equivalent synonym",
898
- exemplifies: "Exemplifies",
899
- feminine: "Feminine",
900
- has_augmentative: "Has augmentative",
901
- has_diminutive: "Has diminutive",
902
- has_domain_region: "Has domain region",
903
- has_domain_topic: "Has domain topic",
904
- has_feminine: "Has feminine",
905
- has_masculine: "Has masculine",
906
- has_young: "Has young",
907
- holo_location: "Holonym location",
908
- holo_member: "Member holonym",
909
- holo_part: "Part holonym",
910
- holo_portion: "Portion holonym",
911
- holo_substance: "Substance holonym",
912
- holonym: "Holonym",
913
- hypernym: "Hypernym",
914
- hyponym: "Hyponym",
915
- in_manner: "In manner",
916
- instance_hypernym: "Instance hypernym",
917
- instance_hyponym: "Instance hyponym",
918
- instrument: "Instrument",
919
- involved: "Involved",
920
- involved_agent: "Involved agent",
921
- involved_direction: "Involved direction",
922
- involved_instrument: "Involved instrument",
923
- involved_location: "Involved location",
924
- involved_patient: "Involved patient",
925
- involved_result: "Involved result",
926
- involved_source_direction: "Involved source direction",
927
- involved_target_direction: "Involved target direction",
928
- ir_synonym: "IR synonym",
929
- is_caused_by: "Is caused by",
930
- is_entailed_by: "Is entailed by",
931
- is_exemplified_by: "Is exemplified by",
932
- is_subevent_of: "Is subevent of",
933
- location: "Location",
934
- manner_of: "Manner of",
935
- masculine: "Masculine",
936
- member_holonym: "Member holonym",
937
- member_meronym: "Member meronym",
938
- mero_location: "Meronym location",
939
- mero_member: "Member meronym",
940
- mero_part: "Part meronym",
941
- mero_portion: "Portion meronym",
942
- mero_substance: "Substance meronym",
943
- meronym: "Meronym",
944
- other: "Other",
945
- part_holonym: "Part holonym",
946
- part_meronym: "Part meronym",
947
- patient: "Patient",
948
- restricted_by: "Restricted by",
949
- restricts: "Restricts",
950
- result: "Result",
951
- role: "Role",
952
- similar: "Similar",
953
- source_direction: "Source direction",
954
- state_of: "State of",
955
- subevent: "Subevent",
956
- substance_holonym: "Substance holonym",
957
- substance_meronym: "Substance meronym",
958
- target_direction: "Target direction",
959
- young: "Young"
960
- };
961
-
962
1093
  // src/cli.ts
963
1094
  var decode = (s) => decodeXmlEntities(s) ?? "";
964
1095
  var HELP = `
@@ -975,6 +1106,7 @@ Commands:
975
1106
  related <word> Show all relations for a word
976
1107
  info <synset-id> Show details for a synset ID
977
1108
  fetch Download WordNet data to cache
1109
+ export-sqlite <out> Export dictionary to SQLite database
978
1110
 
979
1111
  Options:
980
1112
  --file <path> Use a local WordNet XML file instead of cache
@@ -985,6 +1117,7 @@ Examples:
985
1117
  synset synonyms happy
986
1118
  synset related computer --file ./wordnet.xml
987
1119
  synset fetch
1120
+ synset export-sqlite dictionary.db
988
1121
  `;
989
1122
  async function main() {
990
1123
  const args = process.argv.slice(2);
@@ -1006,6 +1139,24 @@ async function main() {
1006
1139
  console.log(`WordNet ${version} cached at: ${cachedPath}`);
1007
1140
  return;
1008
1141
  }
1142
+ if (command === "export-sqlite") {
1143
+ const outputPath = cleanArgs[1];
1144
+ if (!outputPath) {
1145
+ console.error("Error: Missing output path for export-sqlite");
1146
+ process.exit(1);
1147
+ }
1148
+ console.log("Loading WordNet data...");
1149
+ const lexicon2 = filePath ? await loadWordNet(filePath) : (await fetchWordNet({ onProgress: console.log })).lexicon;
1150
+ console.log(`Exporting to ${outputPath}...`);
1151
+ exportToSQLite(lexicon2, outputPath, {
1152
+ onProgress: ({ phase, current, total }) => {
1153
+ process.stdout.write(`\r${phase}: ${current}/${total}`);
1154
+ }
1155
+ });
1156
+ console.log(`
1157
+ Exported to ${outputPath}`);
1158
+ return;
1159
+ }
1009
1160
  if (!word && command !== "fetch") {
1010
1161
  console.error(`Error: Missing word argument for command '${command}'`);
1011
1162
  process.exit(1);
@@ -1089,7 +1240,7 @@ async function main() {
1089
1240
  for (const [relType, words] of relsByType) {
1090
1241
  const label = SynsetRelationRelType2[relType] || relType;
1091
1242
  console.log(` ${label}:`);
1092
- words.forEach((w) => console.log(` - ${w}`));
1243
+ for (const w of words) console.log(` - ${w}`);
1093
1244
  }
1094
1245
  }
1095
1246
  break;
@@ -1108,11 +1259,12 @@ async function main() {
1108
1259
  console.log(`ILI: ${synset.ili}`);
1109
1260
  console.log(`
1110
1261
  Definitions:`);
1111
- synset.definitions.forEach((d) => console.log(` - ${decode(d.inner)}`));
1262
+ for (const d of synset.definitions) console.log(` - ${decode(d.inner)}`);
1112
1263
  if (synset.examples.length > 0) {
1113
1264
  console.log(`
1114
1265
  Examples:`);
1115
- synset.examples.forEach((e) => console.log(` - "${decode(e.inner)}"`));
1266
+ for (const e of synset.examples)
1267
+ console.log(` - "${decode(e.inner)}"`);
1116
1268
  }
1117
1269
  if (synset.synsetRelations.length > 0) {
1118
1270
  console.log(`