synset 0.9.0 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1,250 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
 
3
- // src/loader.ts
4
- import { existsSync, statSync, writeFileSync, mkdirSync, readdirSync } from "fs";
5
- import path from "path";
6
-
7
- // node_modules/@dbushell/xml-streamify/src/node.ts
8
- var Node = class {
9
- #type;
10
- #children;
11
- #parent;
12
- #attr;
13
- #raw;
14
- constructor(type, parent, raw) {
15
- this.#type = type;
16
- this.#parent = parent;
17
- this.#raw = raw;
18
- this.#children = [];
19
- }
20
- get type() {
21
- return this.#type;
22
- }
23
- get raw() {
24
- return this.#raw ?? "";
25
- }
26
- get parent() {
27
- return this.#parent;
28
- }
29
- get children() {
30
- return this.#children;
31
- }
32
- get attributes() {
33
- if (this.#attr) {
34
- return this.#attr;
35
- }
36
- this.#attr = {};
37
- if (this.raw) {
38
- const regex = /([\w:.-]+)\s*=\s*(["'])(.*?)\2/g;
39
- let match;
40
- while ((match = regex.exec(this.raw)) !== null) {
41
- this.#attr[match[1]] = match[3];
42
- }
43
- }
44
- return this.#attr;
45
- }
46
- get innerText() {
47
- if (this.children.length) {
48
- let text = "";
49
- for (const child of this.children) {
50
- text += child.innerText;
51
- }
52
- return text;
53
- }
54
- return (this.raw.match(/<!\[CDATA\[(.*?)]]>/s) ?? [, this.raw])[1];
55
- }
56
- addChild(child) {
57
- this.#children.push(child);
58
- }
59
- /**
60
- * Returns true if node and parents match the key hierarchy
61
- * @param keys - XML tag names
62
- */
63
- is(...keys) {
64
- if (!keys.length) return false;
65
- let parent;
66
- for (const key of keys.toReversed()) {
67
- parent = parent ? parent.parent : this;
68
- if (parent?.type !== key) {
69
- return false;
70
- }
71
- }
72
- return true;
73
- }
74
- /**
75
- * Return the first child matching the key
76
- * @param key - XML tag name
77
- */
78
- first(key) {
79
- return this.children.find((n) => n.type === key);
80
- }
81
- /**
82
- * Return all children matching the key hierarchy
83
- * @param keys - XML tag names
84
- */
85
- all(...keys) {
86
- let nodes = this.children;
87
- let found = [];
88
- for (const [i, k] of Object.entries(keys)) {
89
- if (Number.parseInt(i) === keys.length - 1) {
90
- found = nodes.filter((n) => n.type === k);
91
- break;
92
- }
93
- nodes = nodes?.find((n) => n.type === k)?.children;
94
- if (!nodes) return [];
95
- }
96
- return found;
97
- }
98
- };
99
-
100
- // node_modules/@dbushell/xml-streamify/src/stream.ts
101
- var ENTITIES = {
102
- cdata: {
103
- end: "]]>",
104
- start: /^<!\[CDATA\[/
105
- },
106
- comment: {
107
- end: "-->",
108
- start: /^<!--/
109
- },
110
- declaration: {
111
- end: "?>",
112
- start: /^<\?/
113
- },
114
- doctype: {
115
- end: ">",
116
- start: /^<!DOCTYPE/i
117
- },
118
- element: {
119
- end: ">",
120
- start: /^<[\w:.-/]/
121
- }
122
- };
123
- var transformer = {
124
- buf: "",
125
- state: "skip" /* SKIP */,
126
- previous: ["skip" /* SKIP */, -1],
127
- flush(controller) {
128
- if (this.buf.length > 0) {
129
- controller.enqueue(["text" /* TEXT */, this.buf]);
130
- }
131
- },
132
- transform(chunk, controller) {
133
- this.buf += chunk;
134
- while (this.buf.length) {
135
- if (this.state === this.previous[0] && this.buf.length === this.previous[1]) {
136
- break;
137
- }
138
- this.previous = [this.state, this.buf.length];
139
- if (this.state === "skip" /* SKIP */) {
140
- const index = this.buf.indexOf("<");
141
- if (index < 0) break;
142
- controller.enqueue(["text" /* TEXT */, this.buf.substring(0, index)]);
143
- this.buf = this.buf.substring(index);
144
- this.state = "search" /* SEARCH */;
145
- }
146
- if (this.state === "search" /* SEARCH */) {
147
- if (this.buf.length < 3) break;
148
- for (const [state, entity] of Object.entries(ENTITIES)) {
149
- if (this.buf.match(entity.start)) {
150
- this.state = state;
151
- break;
152
- }
153
- }
154
- continue;
155
- }
156
- if (Object.hasOwn(ENTITIES, this.state)) {
157
- const { end } = ENTITIES[this.state];
158
- const index = this.buf.indexOf(end);
159
- if (index < 0) break;
160
- controller.enqueue([
161
- this.state,
162
- this.buf.substring(0, index + end.length)
163
- ]);
164
- this.buf = this.buf.substring(index + end.length);
165
- this.state = "skip" /* SKIP */;
166
- continue;
167
- }
168
- throw new Error();
169
- }
170
- }
171
- };
172
- var XMLStream = class extends TransformStream {
173
- constructor() {
174
- super({ ...transformer });
175
- }
176
- };
177
-
178
- // node_modules/@dbushell/xml-streamify/src/parse.ts
179
- var ignoreTypes = {
180
- ["comment" /* COMMENT */]: "ignoreComments",
181
- ["declaration" /* DECLARATION */]: "ignoreDeclaration",
182
- ["doctype" /* DOCTYPE */]: "ignoreDoctype"
183
- };
184
- async function* parse(input, options) {
185
- const document = new Node("@document");
186
- try {
187
- const init = { ...options?.fetchOptions };
188
- if (options?.signal) {
189
- init.signal = options.signal;
190
- }
191
- let source;
192
- if (typeof input === "string" || input instanceof URL) {
193
- input = new URL(input);
194
- const response = await fetch(input, init);
195
- if (!response.ok || !response.body) {
196
- throw new Error(`Bad response`);
197
- }
198
- source = response.body;
199
- } else {
200
- source = input;
201
- }
202
- const stream = source.pipeThrough(new TextDecoderStream()).pipeThrough(new XMLStream(), {
203
- signal: options?.signal
204
- });
205
- let node = document;
206
- for await (const [type, value] of stream) {
207
- if (options?.signal?.aborted) {
208
- break;
209
- }
210
- if (type === "text" /* TEXT */) {
211
- if (options?.ignoreWhitespace !== false && value.trim().length === 0) {
212
- continue;
213
- }
214
- }
215
- if (type in ignoreTypes && options?.[ignoreTypes[type]] === false) {
216
- const newNode = new Node(type, node, value);
217
- node.addChild(newNode);
218
- yield newNode;
219
- continue;
220
- }
221
- if (type === "element" /* ELEMENT */) {
222
- const name = value.match(/<\/?([\w:.-]+)/)[1];
223
- if (value.endsWith("/>")) {
224
- const newNode2 = new Node(name, node, value);
225
- node.addChild(newNode2);
226
- yield newNode2;
227
- continue;
228
- }
229
- if (value.startsWith("</")) {
230
- yield node;
231
- node = node.parent;
232
- continue;
233
- }
234
- const newNode = new Node(name, node, value);
235
- node.addChild(newNode);
236
- node = newNode;
237
- continue;
238
- }
239
- node.addChild(new Node(type, node, value));
240
- }
241
- } catch (err) {
242
- if (options?.silent === false) {
243
- throw err;
244
- }
245
- }
246
- return document;
247
- }
3
+ // src/export-sqlite.ts
4
+ import { Database } from "bun:sqlite";
248
5
 
249
6
  // src/types.ts
250
7
  import { z } from "zod";
@@ -411,7 +168,9 @@ var Lexicon = z.object({
411
168
  synsets: z.array(Synset).min(0),
412
169
  syntacticBehaviors: z.array(SyntacticBehavior).min(0)
413
170
  });
414
- var partsOfSpeechList = PartsOfSpeech.options.map((v) => v.value);
171
+ var partsOfSpeechList = PartsOfSpeech.options.map(
172
+ (v) => v.value
173
+ );
415
174
 
416
175
  // src/helpers.ts
417
176
  function PronunciationNode(node) {
@@ -439,7 +198,7 @@ function SenseRelationNode(node) {
439
198
  dcType: optAttr(node, "dc:type")
440
199
  };
441
200
  return SenseRelation.parse(
442
- extendWithRestAttr(node, obj, (s) => s == "dc:type" ? "dcType" : s)
201
+ extendWithRestAttr(node, obj, (s) => s === "dc:type" ? "dcType" : s)
443
202
  );
444
203
  }
445
204
  function SenseNode(node) {
@@ -455,7 +214,7 @@ function SenseNode(node) {
455
214
  extendWithRestAttr(
456
215
  node,
457
216
  obj,
458
- (s) => s == "subcat" ? "subCat" : s == "adjposition" ? "adjPosition" : s
217
+ (s) => s === "subcat" ? "subCat" : s === "adjposition" ? "adjPosition" : s
459
218
  )
460
219
  );
461
220
  }
@@ -486,7 +245,7 @@ function ExampleNode(node) {
486
245
  dcSource: optAttr(node, "dc:source")
487
246
  };
488
247
  return Example.parse(
489
- extendWithRestAttr(node, obj, (s) => s == "dc:source" ? "dcSource" : s)
248
+ extendWithRestAttr(node, obj, (s) => s === "dc:source" ? "dcSource" : s)
490
249
  );
491
250
  }
492
251
  function ILIDefinitionNode(node) {
@@ -523,7 +282,7 @@ function SynsetNode(node) {
523
282
  synsetRelations: children(node, "SynsetRelation", SynsetRelationNode)
524
283
  };
525
284
  return Synset.parse(
526
- extendWithRestAttr(node, obj, (s) => s == "dc:source" ? "dcSource" : s)
285
+ extendWithRestAttr(node, obj, (s) => s === "dc:source" ? "dcSource" : s)
527
286
  );
528
287
  }
529
288
  function LexiconNode(node) {
@@ -552,7 +311,9 @@ var decodeXmlEntities = (s) => {
552
311
  var attr = (node, attrName) => {
553
312
  const value = decodeXmlEntities(node.attributes[attrName]);
554
313
  if (value === void 0) {
555
- throw new Error(`Missing required attribute "${attrName}" on node "${node.type}"`);
314
+ throw new Error(
315
+ `Missing required attribute "${attrName}" on node "${node.type}"`
316
+ );
556
317
  }
557
318
  return value;
558
319
  };
@@ -570,21 +331,505 @@ var extendWithRestAttr = (node, obj, proxy) => {
570
331
  return Object.assign(obj, restAttrs(node, obj, proxy));
571
332
  };
572
333
  var children = (node, type, fn) => {
573
- return node.children.filter((v) => v.type == type).map((v) => fn(v));
334
+ return node.children.filter((v) => v.type === type).map((v) => fn(v));
574
335
  };
575
336
 
576
- // src/loader.ts
577
- var BASE_VERSION = "2024";
578
- function getFilename(version) {
579
- return `english-wordnet-${version}.xml`;
580
- }
581
- function getDownloadUrl(version) {
582
- return `https://en-word.net/static/${getFilename(version)}.gz`;
583
- }
584
- function getDefaultCacheDir() {
585
- const homeDir = process.env.HOME || process.env.USERPROFILE || ".";
586
- return path.join(homeDir, ".cache", "synset");
587
- }
337
+ // src/export-sqlite.ts
338
+ var SCHEMA = `
339
+ CREATE TABLE IF NOT EXISTS words (
340
+ id INTEGER PRIMARY KEY,
341
+ word TEXT NOT NULL,
342
+ word_display TEXT NOT NULL
343
+ );
344
+ CREATE INDEX IF NOT EXISTS idx_words_word ON words(word);
345
+
346
+ CREATE TABLE IF NOT EXISTS synsets (
347
+ id TEXT PRIMARY KEY,
348
+ pos TEXT NOT NULL,
349
+ definition TEXT NOT NULL
350
+ );
351
+
352
+ CREATE TABLE IF NOT EXISTS word_synsets (
353
+ word_id INTEGER NOT NULL,
354
+ synset_id TEXT NOT NULL,
355
+ PRIMARY KEY (word_id, synset_id)
356
+ );
357
+ CREATE INDEX IF NOT EXISTS idx_ws_word ON word_synsets(word_id);
358
+ `;
359
+ function exportToSQLite(lexicon, outputPath, options = {}) {
360
+ const { onProgress } = options;
361
+ const db = new Database(outputPath, { create: true });
362
+ db.exec("PRAGMA journal_mode = OFF");
363
+ db.exec("PRAGMA synchronous = OFF");
364
+ db.exec(SCHEMA);
365
+ const wordToEntries = /* @__PURE__ */ new Map();
366
+ for (const entry of lexicon.lexicalEntries) {
367
+ const word = entry.lemmas[0]?.writtenForm;
368
+ if (word) {
369
+ const lower = word.toLowerCase();
370
+ const existing = wordToEntries.get(lower) || [];
371
+ existing.push(entry);
372
+ wordToEntries.set(lower, existing);
373
+ }
374
+ }
375
+ const synsetMap = /* @__PURE__ */ new Map();
376
+ for (const synset of lexicon.synsets) {
377
+ synsetMap.set(synset.id, synset);
378
+ }
379
+ const insertWord = db.prepare(
380
+ "INSERT INTO words (word, word_display) VALUES (?, ?)"
381
+ );
382
+ const wordIds = /* @__PURE__ */ new Map();
383
+ const words = Array.from(wordToEntries.keys()).sort();
384
+ const totalWords = words.length;
385
+ db.exec("BEGIN TRANSACTION");
386
+ let wordId = 0;
387
+ for (let i = 0; i < words.length; i++) {
388
+ const word = words[i];
389
+ const entries = wordToEntries.get(word);
390
+ if (!entries) continue;
391
+ const display = entries[0].lemmas[0]?.writtenForm || word;
392
+ insertWord.run(word, display);
393
+ wordId++;
394
+ wordIds.set(word, wordId);
395
+ if (onProgress && i % 1e4 === 0) {
396
+ onProgress({ phase: "words", current: i, total: totalWords });
397
+ }
398
+ }
399
+ db.exec("COMMIT");
400
+ const usedSynsetIds = /* @__PURE__ */ new Set();
401
+ for (const entries of wordToEntries.values()) {
402
+ for (const entry of entries) {
403
+ for (const sense of entry.senses) {
404
+ usedSynsetIds.add(sense.synset);
405
+ }
406
+ }
407
+ }
408
+ const insertSynset = db.prepare(
409
+ "INSERT OR IGNORE INTO synsets (id, pos, definition) VALUES (?, ?, ?)"
410
+ );
411
+ const synsetList = Array.from(usedSynsetIds);
412
+ const totalSynsets = synsetList.length;
413
+ db.exec("BEGIN TRANSACTION");
414
+ for (let i = 0; i < synsetList.length; i++) {
415
+ const synsetId = synsetList[i];
416
+ const synset = synsetMap.get(synsetId);
417
+ if (synset) {
418
+ const def = decodeXmlEntities(synset.definitions[0]?.inner) || "";
419
+ insertSynset.run(synsetId, synset.partOfSpeech, def);
420
+ }
421
+ if (onProgress && i % 1e4 === 0) {
422
+ onProgress({ phase: "synsets", current: i, total: totalSynsets });
423
+ }
424
+ }
425
+ db.exec("COMMIT");
426
+ const insertRelation = db.prepare(
427
+ "INSERT OR IGNORE INTO word_synsets (word_id, synset_id) VALUES (?, ?)"
428
+ );
429
+ let relationCount = 0;
430
+ const totalRelations = Array.from(wordToEntries.values()).reduce(
431
+ (sum, entries) => sum + entries.reduce((s, e) => s + e.senses.length, 0),
432
+ 0
433
+ );
434
+ db.exec("BEGIN TRANSACTION");
435
+ for (const [word, entries] of wordToEntries) {
436
+ const wordId2 = wordIds.get(word);
437
+ if (!wordId2) continue;
438
+ for (const entry of entries) {
439
+ for (const sense of entry.senses) {
440
+ insertRelation.run(wordId2, sense.synset);
441
+ relationCount++;
442
+ if (onProgress && relationCount % 1e4 === 0) {
443
+ onProgress({
444
+ phase: "relations",
445
+ current: relationCount,
446
+ total: totalRelations
447
+ });
448
+ }
449
+ }
450
+ }
451
+ }
452
+ db.exec("COMMIT");
453
+ db.close();
454
+ }
455
+
456
+ // src/literals.ts
457
+ var PartsOfSpeech2 = {
458
+ n: "Noun",
459
+ v: "Verb",
460
+ a: "Adjective",
461
+ r: "Adverb",
462
+ s: "Adjective Satellite",
463
+ t: "?",
464
+ c: "Conjunction",
465
+ p: "Adposition (Preposition, postposition, etc.)",
466
+ x: "Other (inc. particle, classifier, bound morphemes, determiners)",
467
+ u: "Unknown"
468
+ };
469
+ var SynsetRelationRelType2 = {
470
+ agent: "Agent",
471
+ also: "See also",
472
+ anto_converse: "Converse antonym",
473
+ anto_gradable: "Gradable antonym",
474
+ anto_simple: "Simple antonym",
475
+ antonym: "Antonym",
476
+ attribute: "Attribute",
477
+ augmentative: "Augmentative",
478
+ be_in_state: "Be in state",
479
+ cause: "Cause",
480
+ causes: "Causes",
481
+ classified_by: "Classified by",
482
+ classifies: "Classifies",
483
+ co_agent_instrument: "Co-agent instrument",
484
+ co_agent_patient: "Co-agent patient",
485
+ co_agent_result: "Co-agent result",
486
+ co_instrument_agent: "Co-instrument agent",
487
+ co_instrument_patient: "Co-instrument patient",
488
+ co_instrument_result: "Co-instrument result",
489
+ co_patient_agent: "Co-patient agent",
490
+ co_patient_instrument: "Co-patient instrument",
491
+ co_result_agent: "Co-result agent",
492
+ co_result_instrument: "Co-result instrument",
493
+ co_role: "Co-role",
494
+ diminutive: "Diminutive",
495
+ direction: "Direction",
496
+ domain_member_region: "Domain member region",
497
+ domain_member_topic: "Domain member topic",
498
+ domain_region: "Domain region",
499
+ domain_topic: "Domain topic",
500
+ entail: "Entail",
501
+ entails: "Entails",
502
+ eq_synonym: "Equivalent synonym",
503
+ exemplifies: "Exemplifies",
504
+ feminine: "Feminine",
505
+ has_augmentative: "Has augmentative",
506
+ has_diminutive: "Has diminutive",
507
+ has_domain_region: "Has domain region",
508
+ has_domain_topic: "Has domain topic",
509
+ has_feminine: "Has feminine",
510
+ has_masculine: "Has masculine",
511
+ has_young: "Has young",
512
+ holo_location: "Holonym location",
513
+ holo_member: "Member holonym",
514
+ holo_part: "Part holonym",
515
+ holo_portion: "Portion holonym",
516
+ holo_substance: "Substance holonym",
517
+ holonym: "Holonym",
518
+ hypernym: "Hypernym",
519
+ hyponym: "Hyponym",
520
+ in_manner: "In manner",
521
+ instance_hypernym: "Instance hypernym",
522
+ instance_hyponym: "Instance hyponym",
523
+ instrument: "Instrument",
524
+ involved: "Involved",
525
+ involved_agent: "Involved agent",
526
+ involved_direction: "Involved direction",
527
+ involved_instrument: "Involved instrument",
528
+ involved_location: "Involved location",
529
+ involved_patient: "Involved patient",
530
+ involved_result: "Involved result",
531
+ involved_source_direction: "Involved source direction",
532
+ involved_target_direction: "Involved target direction",
533
+ ir_synonym: "IR synonym",
534
+ is_caused_by: "Is caused by",
535
+ is_entailed_by: "Is entailed by",
536
+ is_exemplified_by: "Is exemplified by",
537
+ is_subevent_of: "Is subevent of",
538
+ location: "Location",
539
+ manner_of: "Manner of",
540
+ masculine: "Masculine",
541
+ member_holonym: "Member holonym",
542
+ member_meronym: "Member meronym",
543
+ mero_location: "Meronym location",
544
+ mero_member: "Member meronym",
545
+ mero_part: "Part meronym",
546
+ mero_portion: "Portion meronym",
547
+ mero_substance: "Substance meronym",
548
+ meronym: "Meronym",
549
+ other: "Other",
550
+ part_holonym: "Part holonym",
551
+ part_meronym: "Part meronym",
552
+ patient: "Patient",
553
+ restricted_by: "Restricted by",
554
+ restricts: "Restricts",
555
+ result: "Result",
556
+ role: "Role",
557
+ similar: "Similar",
558
+ source_direction: "Source direction",
559
+ state_of: "State of",
560
+ subevent: "Subevent",
561
+ substance_holonym: "Substance holonym",
562
+ substance_meronym: "Substance meronym",
563
+ target_direction: "Target direction",
564
+ young: "Young"
565
+ };
566
+
567
+ // src/loader.ts
568
+ import {
569
+ createReadStream,
570
+ existsSync,
571
+ mkdirSync,
572
+ readdirSync,
573
+ statSync,
574
+ writeFileSync
575
+ } from "fs";
576
+ import path from "path";
577
+ import { Readable } from "stream";
578
+
579
+ // node_modules/@dbushell/xml-streamify/src/node.ts
580
+ var Node = class {
581
+ #type;
582
+ #children;
583
+ #parent;
584
+ #attr;
585
+ #raw;
586
+ constructor(type, parent, raw) {
587
+ this.#type = type;
588
+ this.#parent = parent;
589
+ this.#raw = raw;
590
+ this.#children = [];
591
+ }
592
+ get type() {
593
+ return this.#type;
594
+ }
595
+ get raw() {
596
+ return this.#raw ?? "";
597
+ }
598
+ get parent() {
599
+ return this.#parent;
600
+ }
601
+ get children() {
602
+ return this.#children;
603
+ }
604
+ get attributes() {
605
+ if (this.#attr) {
606
+ return this.#attr;
607
+ }
608
+ this.#attr = {};
609
+ if (this.raw) {
610
+ const regex = /([\w:.-]+)\s*=\s*(["'])(.*?)\2/g;
611
+ let match;
612
+ while ((match = regex.exec(this.raw)) !== null) {
613
+ this.#attr[match[1]] = match[3];
614
+ }
615
+ }
616
+ return this.#attr;
617
+ }
618
+ get innerText() {
619
+ if (this.children.length) {
620
+ let text = "";
621
+ for (const child of this.children) {
622
+ text += child.innerText;
623
+ }
624
+ return text;
625
+ }
626
+ return (this.raw.match(/<!\[CDATA\[(.*?)]]>/s) ?? [, this.raw])[1];
627
+ }
628
+ addChild(child) {
629
+ this.#children.push(child);
630
+ }
631
+ /**
632
+ * Returns true if node and parents match the key hierarchy
633
+ * @param keys - XML tag names
634
+ */
635
+ is(...keys) {
636
+ if (!keys.length) return false;
637
+ let parent;
638
+ for (const key of keys.toReversed()) {
639
+ parent = parent ? parent.parent : this;
640
+ if (parent?.type !== key) {
641
+ return false;
642
+ }
643
+ }
644
+ return true;
645
+ }
646
+ /**
647
+ * Return the first child matching the key
648
+ * @param key - XML tag name
649
+ */
650
+ first(key) {
651
+ return this.children.find((n) => n.type === key);
652
+ }
653
+ /**
654
+ * Return all children matching the key hierarchy
655
+ * @param keys - XML tag names
656
+ */
657
+ all(...keys) {
658
+ let nodes = this.children;
659
+ let found = [];
660
+ for (const [i, k] of Object.entries(keys)) {
661
+ if (Number.parseInt(i) === keys.length - 1) {
662
+ found = nodes.filter((n) => n.type === k);
663
+ break;
664
+ }
665
+ nodes = nodes?.find((n) => n.type === k)?.children;
666
+ if (!nodes) return [];
667
+ }
668
+ return found;
669
+ }
670
+ };
671
+
672
+ // node_modules/@dbushell/xml-streamify/src/stream.ts
673
+ var ENTITIES = {
674
+ cdata: {
675
+ end: "]]>",
676
+ start: /^<!\[CDATA\[/
677
+ },
678
+ comment: {
679
+ end: "-->",
680
+ start: /^<!--/
681
+ },
682
+ declaration: {
683
+ end: "?>",
684
+ start: /^<\?/
685
+ },
686
+ doctype: {
687
+ end: ">",
688
+ start: /^<!DOCTYPE/i
689
+ },
690
+ element: {
691
+ end: ">",
692
+ start: /^<[\w:.-/]/
693
+ }
694
+ };
695
+ var transformer = {
696
+ buf: "",
697
+ state: "skip" /* SKIP */,
698
+ previous: ["skip" /* SKIP */, -1],
699
+ flush(controller) {
700
+ if (this.buf.length > 0) {
701
+ controller.enqueue(["text" /* TEXT */, this.buf]);
702
+ }
703
+ },
704
+ transform(chunk, controller) {
705
+ this.buf += chunk;
706
+ while (this.buf.length) {
707
+ if (this.state === this.previous[0] && this.buf.length === this.previous[1]) {
708
+ break;
709
+ }
710
+ this.previous = [this.state, this.buf.length];
711
+ if (this.state === "skip" /* SKIP */) {
712
+ const index = this.buf.indexOf("<");
713
+ if (index < 0) break;
714
+ controller.enqueue(["text" /* TEXT */, this.buf.substring(0, index)]);
715
+ this.buf = this.buf.substring(index);
716
+ this.state = "search" /* SEARCH */;
717
+ }
718
+ if (this.state === "search" /* SEARCH */) {
719
+ if (this.buf.length < 3) break;
720
+ for (const [state, entity] of Object.entries(ENTITIES)) {
721
+ if (this.buf.match(entity.start)) {
722
+ this.state = state;
723
+ break;
724
+ }
725
+ }
726
+ continue;
727
+ }
728
+ if (Object.hasOwn(ENTITIES, this.state)) {
729
+ const { end } = ENTITIES[this.state];
730
+ const index = this.buf.indexOf(end);
731
+ if (index < 0) break;
732
+ controller.enqueue([
733
+ this.state,
734
+ this.buf.substring(0, index + end.length)
735
+ ]);
736
+ this.buf = this.buf.substring(index + end.length);
737
+ this.state = "skip" /* SKIP */;
738
+ continue;
739
+ }
740
+ throw new Error();
741
+ }
742
+ }
743
+ };
744
+ var XMLStream = class extends TransformStream {
745
+ constructor() {
746
+ super({ ...transformer });
747
+ }
748
+ };
749
+
750
+ // node_modules/@dbushell/xml-streamify/src/parse.ts
751
+ var ignoreTypes = {
752
+ ["comment" /* COMMENT */]: "ignoreComments",
753
+ ["declaration" /* DECLARATION */]: "ignoreDeclaration",
754
+ ["doctype" /* DOCTYPE */]: "ignoreDoctype"
755
+ };
756
+ async function* parse(input, options) {
757
+ const document = new Node("@document");
758
+ try {
759
+ const init = { ...options?.fetchOptions };
760
+ if (options?.signal) {
761
+ init.signal = options.signal;
762
+ }
763
+ let source;
764
+ if (typeof input === "string" || input instanceof URL) {
765
+ input = new URL(input);
766
+ const response = await fetch(input, init);
767
+ if (!response.ok || !response.body) {
768
+ throw new Error(`Bad response`);
769
+ }
770
+ source = response.body;
771
+ } else {
772
+ source = input;
773
+ }
774
+ const stream = source.pipeThrough(new TextDecoderStream()).pipeThrough(new XMLStream(), {
775
+ signal: options?.signal
776
+ });
777
+ let node = document;
778
+ for await (const [type, value] of stream) {
779
+ if (options?.signal?.aborted) {
780
+ break;
781
+ }
782
+ if (type === "text" /* TEXT */) {
783
+ if (options?.ignoreWhitespace !== false && value.trim().length === 0) {
784
+ continue;
785
+ }
786
+ }
787
+ if (type in ignoreTypes && options?.[ignoreTypes[type]] === false) {
788
+ const newNode = new Node(type, node, value);
789
+ node.addChild(newNode);
790
+ yield newNode;
791
+ continue;
792
+ }
793
+ if (type === "element" /* ELEMENT */) {
794
+ const name = value.match(/<\/?([\w:.-]+)/)[1];
795
+ if (value.endsWith("/>")) {
796
+ const newNode2 = new Node(name, node, value);
797
+ node.addChild(newNode2);
798
+ yield newNode2;
799
+ continue;
800
+ }
801
+ if (value.startsWith("</")) {
802
+ yield node;
803
+ node = node.parent;
804
+ continue;
805
+ }
806
+ const newNode = new Node(name, node, value);
807
+ node.addChild(newNode);
808
+ node = newNode;
809
+ continue;
810
+ }
811
+ node.addChild(new Node(type, node, value));
812
+ }
813
+ } catch (err) {
814
+ if (options?.silent === false) {
815
+ throw err;
816
+ }
817
+ }
818
+ return document;
819
+ }
820
+
821
+ // src/loader.ts
822
+ var BASE_VERSION = "2024";
823
+ function getFilename(version) {
824
+ return `english-wordnet-${version}.xml`;
825
+ }
826
+ function getDownloadUrl(version) {
827
+ return `https://en-word.net/static/${getFilename(version)}.gz`;
828
+ }
829
+ function getDefaultCacheDir() {
830
+ const homeDir = process.env.HOME || process.env.USERPROFILE || ".";
831
+ return path.join(homeDir, ".cache", "synset");
832
+ }
588
833
  function fileExists(filePath) {
589
834
  if (existsSync(filePath)) {
590
835
  const stat = statSync(filePath);
@@ -638,7 +883,6 @@ async function findLatestVersion(onProgress, cacheDir) {
638
883
  for (let year = baseYear + 1; year <= lastReleasableYear; year++) {
639
884
  const version = year.toString();
640
885
  if (await urlExists(getDownloadUrl(version))) {
641
- continue;
642
886
  } else {
643
887
  return (year - 1).toString();
644
888
  }
@@ -659,9 +903,13 @@ async function downloadWordNet(version, destPath) {
659
903
  const url = getDownloadUrl(version);
660
904
  const response = await fetch(url);
661
905
  if (!response.ok || !response.body) {
662
- throw new Error(`Failed to download WordNet ${version}: ${response.statusText}`);
906
+ throw new Error(
907
+ `Failed to download WordNet ${version}: ${response.statusText}`
908
+ );
663
909
  }
664
- const decompressed = response.body.pipeThrough(new DecompressionStream("gzip"));
910
+ const decompressed = response.body.pipeThrough(
911
+ new DecompressionStream("gzip")
912
+ );
665
913
  const arrayBuffer = await new Response(decompressed).arrayBuffer();
666
914
  const dir = path.dirname(destPath);
667
915
  if (!existsSync(dir)) {
@@ -671,8 +919,9 @@ async function downloadWordNet(version, destPath) {
671
919
  }
672
920
  function createParser(filePath) {
673
921
  const resolvedPath = path.resolve(filePath);
674
- const fileUrl = resolvedPath.startsWith("/") ? `file://${resolvedPath}` : `file:///${resolvedPath.replace(/\\/g, "/")}`;
675
- return parse(fileUrl, {
922
+ const nodeStream = createReadStream(resolvedPath);
923
+ const webStream = Readable.toWeb(nodeStream);
924
+ return parse(webStream, {
676
925
  ignoreDeclaration: false,
677
926
  silent: false
678
927
  });
@@ -825,117 +1074,6 @@ function getSynsetWords(index, synset) {
825
1074
  return synset.members.map((id) => index.entries.get(id)).filter((e) => e !== void 0).map((e) => e.lemmas[0]?.writtenForm).filter((w) => w !== void 0);
826
1075
  }
827
1076
 
828
- // src/literals.ts
829
- var PartsOfSpeech2 = {
830
- n: "Noun",
831
- v: "Verb",
832
- a: "Adjective",
833
- r: "Adverb",
834
- s: "Adjective Satellite",
835
- t: "?",
836
- c: "Conjunction",
837
- p: "Adposition (Preposition, postposition, etc.)",
838
- x: "Other (inc. particle, classifier, bound morphemes, determiners)",
839
- u: "Unknown"
840
- };
841
- var SynsetRelationRelType2 = {
842
- agent: "Agent",
843
- also: "See also",
844
- anto_converse: "Converse antonym",
845
- anto_gradable: "Gradable antonym",
846
- anto_simple: "Simple antonym",
847
- antonym: "Antonym",
848
- attribute: "Attribute",
849
- augmentative: "Augmentative",
850
- be_in_state: "Be in state",
851
- cause: "Cause",
852
- causes: "Causes",
853
- classified_by: "Classified by",
854
- classifies: "Classifies",
855
- co_agent_instrument: "Co-agent instrument",
856
- co_agent_patient: "Co-agent patient",
857
- co_agent_result: "Co-agent result",
858
- co_instrument_agent: "Co-instrument agent",
859
- co_instrument_patient: "Co-instrument patient",
860
- co_instrument_result: "Co-instrument result",
861
- co_patient_agent: "Co-patient agent",
862
- co_patient_instrument: "Co-patient instrument",
863
- co_result_agent: "Co-result agent",
864
- co_result_instrument: "Co-result instrument",
865
- co_role: "Co-role",
866
- diminutive: "Diminutive",
867
- direction: "Direction",
868
- domain_member_region: "Domain member region",
869
- domain_member_topic: "Domain member topic",
870
- domain_region: "Domain region",
871
- domain_topic: "Domain topic",
872
- entail: "Entail",
873
- entails: "Entails",
874
- eq_synonym: "Equivalent synonym",
875
- exemplifies: "Exemplifies",
876
- feminine: "Feminine",
877
- has_augmentative: "Has augmentative",
878
- has_diminutive: "Has diminutive",
879
- has_domain_region: "Has domain region",
880
- has_domain_topic: "Has domain topic",
881
- has_feminine: "Has feminine",
882
- has_masculine: "Has masculine",
883
- has_young: "Has young",
884
- holo_location: "Holonym location",
885
- holo_member: "Member holonym",
886
- holo_part: "Part holonym",
887
- holo_portion: "Portion holonym",
888
- holo_substance: "Substance holonym",
889
- holonym: "Holonym",
890
- hypernym: "Hypernym",
891
- hyponym: "Hyponym",
892
- in_manner: "In manner",
893
- instance_hypernym: "Instance hypernym",
894
- instance_hyponym: "Instance hyponym",
895
- instrument: "Instrument",
896
- involved: "Involved",
897
- involved_agent: "Involved agent",
898
- involved_direction: "Involved direction",
899
- involved_instrument: "Involved instrument",
900
- involved_location: "Involved location",
901
- involved_patient: "Involved patient",
902
- involved_result: "Involved result",
903
- involved_source_direction: "Involved source direction",
904
- involved_target_direction: "Involved target direction",
905
- ir_synonym: "IR synonym",
906
- is_caused_by: "Is caused by",
907
- is_entailed_by: "Is entailed by",
908
- is_exemplified_by: "Is exemplified by",
909
- is_subevent_of: "Is subevent of",
910
- location: "Location",
911
- manner_of: "Manner of",
912
- masculine: "Masculine",
913
- member_holonym: "Member holonym",
914
- member_meronym: "Member meronym",
915
- mero_location: "Meronym location",
916
- mero_member: "Member meronym",
917
- mero_part: "Part meronym",
918
- mero_portion: "Portion meronym",
919
- mero_substance: "Substance meronym",
920
- meronym: "Meronym",
921
- other: "Other",
922
- part_holonym: "Part holonym",
923
- part_meronym: "Part meronym",
924
- patient: "Patient",
925
- restricted_by: "Restricted by",
926
- restricts: "Restricts",
927
- result: "Result",
928
- role: "Role",
929
- similar: "Similar",
930
- source_direction: "Source direction",
931
- state_of: "State of",
932
- subevent: "Subevent",
933
- substance_holonym: "Substance holonym",
934
- substance_meronym: "Substance meronym",
935
- target_direction: "Target direction",
936
- young: "Young"
937
- };
938
-
939
1077
  // src/cli.ts
940
1078
  var decode = (s) => decodeXmlEntities(s) ?? "";
941
1079
  var HELP = `
@@ -952,6 +1090,7 @@ Commands:
952
1090
  related <word> Show all relations for a word
953
1091
  info <synset-id> Show details for a synset ID
954
1092
  fetch Download WordNet data to cache
1093
+ export-sqlite <out> Export dictionary to SQLite database
955
1094
 
956
1095
  Options:
957
1096
  --file <path> Use a local WordNet XML file instead of cache
@@ -962,6 +1101,7 @@ Examples:
962
1101
  synset synonyms happy
963
1102
  synset related computer --file ./wordnet.xml
964
1103
  synset fetch
1104
+ synset export-sqlite dictionary.db
965
1105
  `;
966
1106
  async function main() {
967
1107
  const args = process.argv.slice(2);
@@ -983,6 +1123,24 @@ async function main() {
983
1123
  console.log(`WordNet ${version} cached at: ${cachedPath}`);
984
1124
  return;
985
1125
  }
1126
+ if (command === "export-sqlite") {
1127
+ const outputPath = cleanArgs[1];
1128
+ if (!outputPath) {
1129
+ console.error("Error: Missing output path for export-sqlite");
1130
+ process.exit(1);
1131
+ }
1132
+ console.log("Loading WordNet data...");
1133
+ const lexicon2 = filePath ? await loadWordNet(filePath) : (await fetchWordNet({ onProgress: console.log })).lexicon;
1134
+ console.log(`Exporting to ${outputPath}...`);
1135
+ exportToSQLite(lexicon2, outputPath, {
1136
+ onProgress: ({ phase, current, total }) => {
1137
+ process.stdout.write(`\r${phase}: ${current}/${total}`);
1138
+ }
1139
+ });
1140
+ console.log(`
1141
+ Exported to ${outputPath}`);
1142
+ return;
1143
+ }
986
1144
  if (!word && command !== "fetch") {
987
1145
  console.error(`Error: Missing word argument for command '${command}'`);
988
1146
  process.exit(1);
@@ -1066,7 +1224,7 @@ async function main() {
1066
1224
  for (const [relType, words] of relsByType) {
1067
1225
  const label = SynsetRelationRelType2[relType] || relType;
1068
1226
  console.log(` ${label}:`);
1069
- words.forEach((w) => console.log(` - ${w}`));
1227
+ for (const w of words) console.log(` - ${w}`);
1070
1228
  }
1071
1229
  }
1072
1230
  break;
@@ -1085,11 +1243,12 @@ async function main() {
1085
1243
  console.log(`ILI: ${synset.ili}`);
1086
1244
  console.log(`
1087
1245
  Definitions:`);
1088
- synset.definitions.forEach((d) => console.log(` - ${decode(d.inner)}`));
1246
+ for (const d of synset.definitions) console.log(` - ${decode(d.inner)}`);
1089
1247
  if (synset.examples.length > 0) {
1090
1248
  console.log(`
1091
1249
  Examples:`);
1092
- synset.examples.forEach((e) => console.log(` - "${decode(e.inner)}"`));
1250
+ for (const e of synset.examples)
1251
+ console.log(` - "${decode(e.inner)}"`);
1093
1252
  }
1094
1253
  if (synset.synsetRelations.length > 0) {
1095
1254
  console.log(`