synset 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.cjs ADDED
@@ -0,0 +1,1139 @@
1
+ #!/usr/bin/env node
2
+ "use strict";
3
+ var __create = Object.create;
4
+ var __defProp = Object.defineProperty;
5
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
6
+ var __getOwnPropNames = Object.getOwnPropertyNames;
7
+ var __getProtoOf = Object.getPrototypeOf;
8
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
9
+ var __copyProps = (to, from, except, desc) => {
10
+ if (from && typeof from === "object" || typeof from === "function") {
11
+ for (let key of __getOwnPropNames(from))
12
+ if (!__hasOwnProp.call(to, key) && key !== except)
13
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
14
+ }
15
+ return to;
16
+ };
17
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
18
+ // If the importer is in node compatibility mode or this is not an ESM
19
+ // file that has been converted to a CommonJS file using a Babel-
20
+ // compatible transform (i.e. "__esModule" has not been set), then set
21
+ // "default" to the CommonJS "module.exports" for node compatibility.
22
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
23
+ mod
24
+ ));
25
+
26
+ // src/loader.ts
27
+ var import_node_fs = require("fs");
28
+ var import_node_path = __toESM(require("path"), 1);
29
+
30
+ // node_modules/@dbushell/xml-streamify/src/node.ts
31
+ var Node = class {
32
+ #type;
33
+ #children;
34
+ #parent;
35
+ #attr;
36
+ #raw;
37
+ constructor(type, parent, raw) {
38
+ this.#type = type;
39
+ this.#parent = parent;
40
+ this.#raw = raw;
41
+ this.#children = [];
42
+ }
43
+ get type() {
44
+ return this.#type;
45
+ }
46
+ get raw() {
47
+ return this.#raw ?? "";
48
+ }
49
+ get parent() {
50
+ return this.#parent;
51
+ }
52
+ get children() {
53
+ return this.#children;
54
+ }
55
+ get attributes() {
56
+ if (this.#attr) {
57
+ return this.#attr;
58
+ }
59
+ this.#attr = {};
60
+ if (this.raw) {
61
+ const regex = /([\w:.-]+)\s*=\s*(["'])(.*?)\2/g;
62
+ let match;
63
+ while ((match = regex.exec(this.raw)) !== null) {
64
+ this.#attr[match[1]] = match[3];
65
+ }
66
+ }
67
+ return this.#attr;
68
+ }
69
+ get innerText() {
70
+ if (this.children.length) {
71
+ let text = "";
72
+ for (const child of this.children) {
73
+ text += child.innerText;
74
+ }
75
+ return text;
76
+ }
77
+ return (this.raw.match(/<!\[CDATA\[(.*?)]]>/s) ?? [, this.raw])[1];
78
+ }
79
+ addChild(child) {
80
+ this.#children.push(child);
81
+ }
82
+ /**
83
+ * Returns true if node and parents match the key hierarchy
84
+ * @param keys - XML tag names
85
+ */
86
+ is(...keys) {
87
+ if (!keys.length) return false;
88
+ let parent;
89
+ for (const key of keys.toReversed()) {
90
+ parent = parent ? parent.parent : this;
91
+ if (parent?.type !== key) {
92
+ return false;
93
+ }
94
+ }
95
+ return true;
96
+ }
97
+ /**
98
+ * Return the first child matching the key
99
+ * @param key - XML tag name
100
+ */
101
+ first(key) {
102
+ return this.children.find((n) => n.type === key);
103
+ }
104
+ /**
105
+ * Return all children matching the key hierarchy
106
+ * @param keys - XML tag names
107
+ */
108
+ all(...keys) {
109
+ let nodes = this.children;
110
+ let found = [];
111
+ for (const [i, k] of Object.entries(keys)) {
112
+ if (Number.parseInt(i) === keys.length - 1) {
113
+ found = nodes.filter((n) => n.type === k);
114
+ break;
115
+ }
116
+ nodes = nodes?.find((n) => n.type === k)?.children;
117
+ if (!nodes) return [];
118
+ }
119
+ return found;
120
+ }
121
+ };
122
+
123
+ // node_modules/@dbushell/xml-streamify/src/stream.ts
124
+ var ENTITIES = {
125
+ cdata: {
126
+ end: "]]>",
127
+ start: /^<!\[CDATA\[/
128
+ },
129
+ comment: {
130
+ end: "-->",
131
+ start: /^<!--/
132
+ },
133
+ declaration: {
134
+ end: "?>",
135
+ start: /^<\?/
136
+ },
137
+ doctype: {
138
+ end: ">",
139
+ start: /^<!DOCTYPE/i
140
+ },
141
+ element: {
142
+ end: ">",
143
+ start: /^<[\w:.-/]/
144
+ }
145
+ };
146
+ var transformer = {
147
+ buf: "",
148
+ state: "skip" /* SKIP */,
149
+ previous: ["skip" /* SKIP */, -1],
150
+ flush(controller) {
151
+ if (this.buf.length > 0) {
152
+ controller.enqueue(["text" /* TEXT */, this.buf]);
153
+ }
154
+ },
155
+ transform(chunk, controller) {
156
+ this.buf += chunk;
157
+ while (this.buf.length) {
158
+ if (this.state === this.previous[0] && this.buf.length === this.previous[1]) {
159
+ break;
160
+ }
161
+ this.previous = [this.state, this.buf.length];
162
+ if (this.state === "skip" /* SKIP */) {
163
+ const index = this.buf.indexOf("<");
164
+ if (index < 0) break;
165
+ controller.enqueue(["text" /* TEXT */, this.buf.substring(0, index)]);
166
+ this.buf = this.buf.substring(index);
167
+ this.state = "search" /* SEARCH */;
168
+ }
169
+ if (this.state === "search" /* SEARCH */) {
170
+ if (this.buf.length < 3) break;
171
+ for (const [state, entity] of Object.entries(ENTITIES)) {
172
+ if (this.buf.match(entity.start)) {
173
+ this.state = state;
174
+ break;
175
+ }
176
+ }
177
+ continue;
178
+ }
179
+ if (Object.hasOwn(ENTITIES, this.state)) {
180
+ const { end } = ENTITIES[this.state];
181
+ const index = this.buf.indexOf(end);
182
+ if (index < 0) break;
183
+ controller.enqueue([
184
+ this.state,
185
+ this.buf.substring(0, index + end.length)
186
+ ]);
187
+ this.buf = this.buf.substring(index + end.length);
188
+ this.state = "skip" /* SKIP */;
189
+ continue;
190
+ }
191
+ throw new Error();
192
+ }
193
+ }
194
+ };
195
+ var XMLStream = class extends TransformStream {
196
+ constructor() {
197
+ super({ ...transformer });
198
+ }
199
+ };
200
+
201
+ // node_modules/@dbushell/xml-streamify/src/parse.ts
202
+ var ignoreTypes = {
203
+ ["comment" /* COMMENT */]: "ignoreComments",
204
+ ["declaration" /* DECLARATION */]: "ignoreDeclaration",
205
+ ["doctype" /* DOCTYPE */]: "ignoreDoctype"
206
+ };
207
+ async function* parse(input, options) {
208
+ const document = new Node("@document");
209
+ try {
210
+ const init = { ...options?.fetchOptions };
211
+ if (options?.signal) {
212
+ init.signal = options.signal;
213
+ }
214
+ let source;
215
+ if (typeof input === "string" || input instanceof URL) {
216
+ input = new URL(input);
217
+ const response = await fetch(input, init);
218
+ if (!response.ok || !response.body) {
219
+ throw new Error(`Bad response`);
220
+ }
221
+ source = response.body;
222
+ } else {
223
+ source = input;
224
+ }
225
+ const stream = source.pipeThrough(new TextDecoderStream()).pipeThrough(new XMLStream(), {
226
+ signal: options?.signal
227
+ });
228
+ let node = document;
229
+ for await (const [type, value] of stream) {
230
+ if (options?.signal?.aborted) {
231
+ break;
232
+ }
233
+ if (type === "text" /* TEXT */) {
234
+ if (options?.ignoreWhitespace !== false && value.trim().length === 0) {
235
+ continue;
236
+ }
237
+ }
238
+ if (type in ignoreTypes && options?.[ignoreTypes[type]] === false) {
239
+ const newNode = new Node(type, node, value);
240
+ node.addChild(newNode);
241
+ yield newNode;
242
+ continue;
243
+ }
244
+ if (type === "element" /* ELEMENT */) {
245
+ const name = value.match(/<\/?([\w:.-]+)/)[1];
246
+ if (value.endsWith("/>")) {
247
+ const newNode2 = new Node(name, node, value);
248
+ node.addChild(newNode2);
249
+ yield newNode2;
250
+ continue;
251
+ }
252
+ if (value.startsWith("</")) {
253
+ yield node;
254
+ node = node.parent;
255
+ continue;
256
+ }
257
+ const newNode = new Node(name, node, value);
258
+ node.addChild(newNode);
259
+ node = newNode;
260
+ continue;
261
+ }
262
+ node.addChild(new Node(type, node, value));
263
+ }
264
+ } catch (err) {
265
+ if (options?.silent === false) {
266
+ throw err;
267
+ }
268
+ }
269
+ return document;
270
+ }
271
+
272
+ // src/types.ts
273
+ var import_zod = require("zod");
274
+ var LexiconId = import_zod.z.string();
275
+ var LexicalEntryId = import_zod.z.string();
276
+ var SynsetId = import_zod.z.string();
277
+ var SenseId = import_zod.z.string();
278
+ var SyntacticBehaviorId = import_zod.z.string();
279
+ var PartsOfSpeech = import_zod.z.union([
280
+ import_zod.z.literal("a"),
281
+ import_zod.z.literal("c"),
282
+ import_zod.z.literal("n"),
283
+ import_zod.z.literal("p"),
284
+ import_zod.z.literal("r"),
285
+ import_zod.z.literal("s"),
286
+ import_zod.z.literal("u"),
287
+ import_zod.z.literal("v"),
288
+ import_zod.z.literal("x")
289
+ ]);
290
+ var SenseRelationRelType = import_zod.z.union([
291
+ import_zod.z.literal("also"),
292
+ import_zod.z.literal("antonym"),
293
+ import_zod.z.literal("derivation"),
294
+ import_zod.z.literal("domain_member_region"),
295
+ import_zod.z.literal("domain_member_topic"),
296
+ import_zod.z.literal("domain_region"),
297
+ import_zod.z.literal("domain_topic"),
298
+ import_zod.z.literal("exemplifies"),
299
+ import_zod.z.literal("is_exemplified_by"),
300
+ import_zod.z.literal("other"),
301
+ // TODO: Then "dc:type" attribute should define what relation
302
+ import_zod.z.literal("participle"),
303
+ import_zod.z.literal("pertainym"),
304
+ import_zod.z.literal("similar")
305
+ ]);
306
+ var SynsetRelationRelType = import_zod.z.union([
307
+ import_zod.z.literal("also"),
308
+ import_zod.z.literal("attribute"),
309
+ import_zod.z.literal("cause"),
310
+ import_zod.z.literal("causes"),
311
+ import_zod.z.literal("domain_member_region"),
312
+ import_zod.z.literal("domain_member_topic"),
313
+ import_zod.z.literal("domain_region"),
314
+ import_zod.z.literal("domain_topic"),
315
+ import_zod.z.literal("entail"),
316
+ import_zod.z.literal("entails"),
317
+ import_zod.z.literal("exemplifies"),
318
+ import_zod.z.literal("has_domain_region"),
319
+ import_zod.z.literal("has_domain_topic"),
320
+ import_zod.z.literal("holo_member"),
321
+ import_zod.z.literal("holo_part"),
322
+ import_zod.z.literal("holo_substance"),
323
+ import_zod.z.literal("hypernym"),
324
+ import_zod.z.literal("hyponym"),
325
+ import_zod.z.literal("instance_hypernym"),
326
+ import_zod.z.literal("instance_hyponym"),
327
+ import_zod.z.literal("is_caused_by"),
328
+ import_zod.z.literal("is_entailed_by"),
329
+ import_zod.z.literal("is_exemplified_by"),
330
+ import_zod.z.literal("member_holonym"),
331
+ import_zod.z.literal("member_meronym"),
332
+ import_zod.z.literal("mero_member"),
333
+ import_zod.z.literal("mero_part"),
334
+ import_zod.z.literal("mero_substance"),
335
+ import_zod.z.literal("part_holonym"),
336
+ import_zod.z.literal("part_meronym"),
337
+ import_zod.z.literal("similar"),
338
+ import_zod.z.literal("substance_holonym"),
339
+ import_zod.z.literal("substance_meronym")
340
+ ]);
341
+ var AdjPosition = import_zod.z.union([
342
+ import_zod.z.literal("a"),
343
+ import_zod.z.literal("ip"),
344
+ import_zod.z.literal("p")
345
+ ]);
346
+ var Pronunciation = import_zod.z.object({
347
+ variety: import_zod.z.string().optional(),
348
+ // TODO: "GB", "US", ...
349
+ inner: import_zod.z.string()
350
+ // Actual value
351
+ });
352
+ var Lemma = import_zod.z.object({
353
+ writtenForm: import_zod.z.string(),
354
+ // Actual value
355
+ partOfSpeech: PartsOfSpeech,
356
+ pronunciations: import_zod.z.array(Pronunciation).min(0)
357
+ });
358
+ var SenseRelation = import_zod.z.object({
359
+ relType: SenseRelationRelType,
360
+ dcType: import_zod.z.string().optional(),
361
+ // TODO: This is only when relType is "other"
362
+ target: SenseId
363
+ });
364
+ var Sense = import_zod.z.object({
365
+ id: SenseId,
366
+ synset: SynsetId,
367
+ subCat: SyntacticBehaviorId.optional(),
368
+ adjPosition: AdjPosition.optional(),
369
+ senseRelations: import_zod.z.array(SenseRelation).min(0)
370
+ });
371
+ var Form = import_zod.z.object({
372
+ writtenForm: import_zod.z.string()
373
+ // This is where huge variety lives
374
+ });
375
+ var LexicalEntry = import_zod.z.object({
376
+ id: LexicalEntryId,
377
+ lemmas: import_zod.z.array(Lemma).length(1),
378
+ senses: import_zod.z.array(Sense).min(1),
379
+ forms: import_zod.z.array(Form).min(0)
380
+ });
381
+ var Definition = import_zod.z.object({
382
+ inner: import_zod.z.string()
383
+ // Actual value
384
+ });
385
+ var Example = import_zod.z.object({
386
+ inner: import_zod.z.string(),
387
+ // Actual value
388
+ dcSource: import_zod.z.string().optional()
389
+ });
390
+ var ILIDefinition = import_zod.z.object({
391
+ inner: import_zod.z.string()
392
+ // Actual value
393
+ });
394
+ var SynsetRelation = import_zod.z.object({
395
+ relType: SynsetRelationRelType,
396
+ target: SynsetId
397
+ });
398
+ var Synset = import_zod.z.object({
399
+ id: SynsetId,
400
+ ili: import_zod.z.string(),
401
+ members: import_zod.z.array(LexicalEntryId).min(1),
402
+ // space-separated list of refs that we unwrap to array
403
+ partOfSpeech: PartsOfSpeech,
404
+ lexfile: import_zod.z.string(),
405
+ dcSource: import_zod.z.string().optional(),
406
+ definitions: import_zod.z.array(Definition).min(1),
407
+ examples: import_zod.z.array(Example).min(0),
408
+ iliDefinitions: import_zod.z.array(ILIDefinition).min(0),
409
+ synsetRelations: import_zod.z.array(SynsetRelation).min(0)
410
+ });
411
+ var SyntacticBehavior = import_zod.z.object({
412
+ id: SyntacticBehaviorId,
413
+ subcategorizationFrame: import_zod.z.string()
414
+ // Sentence structure. This is where (not very huge) variety lives
415
+ });
416
+ var Lexicon = import_zod.z.object({
417
+ id: LexiconId,
418
+ // "oewn"
419
+ label: import_zod.z.string(),
420
+ // "Open English WordNet"
421
+ language: import_zod.z.string(),
422
+ // "en"
423
+ email: import_zod.z.string(),
424
+ // "english-wordnet@googlegroups.com"
425
+ license: import_zod.z.string(),
426
+ // "https://creativecommons.org/licenses/by/4.0/"
427
+ version: import_zod.z.string(),
428
+ // "2023"
429
+ url: import_zod.z.string(),
430
+ // "https://github.com/globalwordnet/english-wordnet">
431
+ citation: import_zod.z.string().optional(),
432
+ // "John P. McCrae, Alexandre Rademaker, Francis Bond, Ewa Rudnicka and Christiane Fellbaum (2019) English WordNet 2019 – An Open-Source WordNet for English, *Proceedings of the 10th Global WordNet Conference* – GWC 2019"
433
+ lexicalEntries: import_zod.z.array(LexicalEntry).min(0),
434
+ synsets: import_zod.z.array(Synset).min(0),
435
+ syntacticBehaviors: import_zod.z.array(SyntacticBehavior).min(0)
436
+ });
437
+ var partsOfSpeechList = PartsOfSpeech.options.map((v) => v.value);
438
+
439
+ // src/helpers.ts
440
+ function PronunciationNode(node) {
441
+ const obj = {
442
+ variety: optAttr(node, "variety"),
443
+ inner: node.innerText
444
+ };
445
+ return Pronunciation.parse(extendWithRestAttr(node, obj, (s) => s));
446
+ }
447
+ function LemmaNode(node) {
448
+ const obj = {
449
+ writtenForm: attr(node, "writtenForm"),
450
+ partOfSpeech: PartsOfSpeech.parse(attr(node, "partOfSpeech")),
451
+ pronunciations: (
452
+ //
453
+ children(node, "Pronunciation", (v) => PronunciationNode(v))
454
+ )
455
+ };
456
+ return Lemma.parse(extendWithRestAttr(node, obj, (s) => s));
457
+ }
458
+ function SenseRelationNode(node) {
459
+ const obj = {
460
+ relType: SenseRelationRelType.parse(attr(node, "relType")),
461
+ target: attr(node, "target"),
462
+ dcType: optAttr(node, "dc:type")
463
+ };
464
+ return SenseRelation.parse(
465
+ extendWithRestAttr(node, obj, (s) => s == "dc:type" ? "dcType" : s)
466
+ );
467
+ }
468
+ function SenseNode(node) {
469
+ const adjPos = optAttr(node, "adjposition");
470
+ const obj = {
471
+ id: attr(node, "id"),
472
+ synset: SynsetId.parse(attr(node, "synset")),
473
+ senseRelations: children(node, "SenseRelation", SenseRelationNode),
474
+ subCat: optAttr(node, "subcat"),
475
+ adjPosition: adjPos ? AdjPosition.parse(adjPos) : void 0
476
+ };
477
+ return Sense.parse(
478
+ extendWithRestAttr(
479
+ node,
480
+ obj,
481
+ (s) => s == "subcat" ? "subCat" : s == "adjposition" ? "adjPosition" : s
482
+ )
483
+ );
484
+ }
485
+ function FormNode(node) {
486
+ const obj = {
487
+ writtenForm: attr(node, "writtenForm")
488
+ };
489
+ return Form.parse(extendWithRestAttr(node, obj, (s) => s));
490
+ }
491
+ function LexicalEntryNode(node) {
492
+ const obj = {
493
+ id: attr(node, "id"),
494
+ lemmas: children(node, "Lemma", LemmaNode),
495
+ senses: children(node, "Sense", SenseNode),
496
+ forms: children(node, "Form", FormNode)
497
+ };
498
+ return LexicalEntry.parse(extendWithRestAttr(node, obj, (s) => s));
499
+ }
500
+ function DefinitionNode(node) {
501
+ const obj = {
502
+ inner: node.innerText
503
+ };
504
+ return Definition.parse(extendWithRestAttr(node, obj, (s) => s));
505
+ }
506
+ function ExampleNode(node) {
507
+ const obj = {
508
+ inner: node.innerText,
509
+ dcSource: optAttr(node, "dc:source")
510
+ };
511
+ return Example.parse(
512
+ extendWithRestAttr(node, obj, (s) => s == "dc:source" ? "dcSource" : s)
513
+ );
514
+ }
515
+ function ILIDefinitionNode(node) {
516
+ const obj = {
517
+ inner: node.innerText
518
+ };
519
+ return ILIDefinition.parse(extendWithRestAttr(node, obj, (s) => s));
520
+ }
521
+ function SynsetRelationNode(node) {
522
+ const obj = {
523
+ relType: SynsetRelationRelType.parse(attr(node, "relType")),
524
+ target: attr(node, "target")
525
+ };
526
+ return SynsetRelation.parse(extendWithRestAttr(node, obj, (s) => s));
527
+ }
528
+ function SyntacticBehaviorNode(node) {
529
+ const obj = {
530
+ id: attr(node, "id"),
531
+ subcategorizationFrame: attr(node, "subcategorizationFrame")
532
+ };
533
+ return SyntacticBehavior.parse(extendWithRestAttr(node, obj, (s) => s));
534
+ }
535
+ function SynsetNode(node) {
536
+ const obj = {
537
+ id: attr(node, "id"),
538
+ ili: attr(node, "ili"),
539
+ lexfile: attr(node, "lexfile"),
540
+ members: attr(node, "members").split(" "),
541
+ dcSource: optAttr(node, "dc:source"),
542
+ partOfSpeech: PartsOfSpeech.parse(attr(node, "partOfSpeech")),
543
+ definitions: children(node, "Definition", (v) => DefinitionNode(v)),
544
+ examples: children(node, "Example", (v) => ExampleNode(v)),
545
+ iliDefinitions: children(node, "ILIDefinition", ILIDefinitionNode),
546
+ synsetRelations: children(node, "SynsetRelation", SynsetRelationNode)
547
+ };
548
+ return Synset.parse(
549
+ extendWithRestAttr(node, obj, (s) => s == "dc:source" ? "dcSource" : s)
550
+ );
551
+ }
552
+ function LexiconNode(node) {
553
+ const obj = {
554
+ id: attr(node, "id"),
555
+ label: attr(node, "label"),
556
+ language: attr(node, "language"),
557
+ email: attr(node, "email"),
558
+ license: attr(node, "license"),
559
+ version: attr(node, "version"),
560
+ citation: optAttr(node, "citation"),
561
+ url: attr(node, "url"),
562
+ lexicalEntries: children(node, "LexicalEntry", LexicalEntryNode),
563
+ synsets: children(node, "Synset", SynsetNode),
564
+ syntacticBehaviors: (
565
+ //
566
+ children(node, "SyntacticBehaviour", SyntacticBehaviorNode)
567
+ )
568
+ };
569
+ return Lexicon.parse(extendWithRestAttr(node, obj, (s) => s));
570
+ }
571
+ var decodeXmlEntities = (s) => {
572
+ if (s === void 0) return void 0;
573
+ return s.replace(/&amp;/g, "&").replace(/&lt;/g, "<").replace(/&gt;/g, ">").replace(/&apos;/g, "'").replace(/&quot;/g, '"');
574
+ };
575
+ var attr = (node, attrName) => {
576
+ const value = decodeXmlEntities(node.attributes[attrName]);
577
+ if (value === void 0) {
578
+ throw new Error(`Missing required attribute "${attrName}" on node "${node.type}"`);
579
+ }
580
+ return value;
581
+ };
582
+ var optAttr = (node, attrName) => {
583
+ return decodeXmlEntities(node.attributes[attrName]);
584
+ };
585
+ var restAttrs = (node, obj, proxy) => {
586
+ const result = {};
587
+ Object.keys(node.attributes).filter((a) => !(proxy(a) in obj)).forEach((k) => {
588
+ result[k] = decodeXmlEntities(node.attributes[k]) ?? node.attributes[k];
589
+ });
590
+ return result;
591
+ };
592
+ var extendWithRestAttr = (node, obj, proxy) => {
593
+ return Object.assign(obj, restAttrs(node, obj, proxy));
594
+ };
595
+ var children = (node, type, fn) => {
596
+ return node.children.filter((v) => v.type == type).map((v) => fn(v));
597
+ };
598
+
599
+ // src/loader.ts
600
+ var BASE_VERSION = "2024";
601
+ function getFilename(version) {
602
+ return `english-wordnet-${version}.xml`;
603
+ }
604
+ function getDownloadUrl(version) {
605
+ return `https://en-word.net/static/${getFilename(version)}.gz`;
606
+ }
607
+ function getDefaultCacheDir() {
608
+ const homeDir = process.env.HOME || process.env.USERPROFILE || ".";
609
+ return import_node_path.default.join(homeDir, ".cache", "synset");
610
+ }
611
+ function fileExists(filePath) {
612
+ if ((0, import_node_fs.existsSync)(filePath)) {
613
+ const stat = (0, import_node_fs.statSync)(filePath);
614
+ return stat.isFile();
615
+ }
616
+ return false;
617
+ }
618
+ async function urlExists(url) {
619
+ try {
620
+ const response = await fetch(url, { method: "HEAD" });
621
+ return response.ok;
622
+ } catch {
623
+ return false;
624
+ }
625
+ }
626
+ function extractVersionFromFilename(filename) {
627
+ const match = filename.match(/english-wordnet-(\d{4})\.xml/);
628
+ return match ? parseInt(match[1], 10) : null;
629
+ }
630
+ function findCachedVersion(cacheDir) {
631
+ if (!(0, import_node_fs.existsSync)(cacheDir)) return null;
632
+ const files = (0, import_node_fs.readdirSync)(cacheDir);
633
+ const wordnetFiles = files.map((f) => ({ file: f, year: extractVersionFromFilename(f) })).filter((x) => x.year !== null).sort((a, b) => b.year - a.year);
634
+ return wordnetFiles.length > 0 ? wordnetFiles[0].year.toString() : null;
635
+ }
636
+ async function findLatestVersion(onProgress, cacheDir) {
637
+ const log = onProgress || (() => {
638
+ });
639
+ const currentYear = (/* @__PURE__ */ new Date()).getFullYear();
640
+ const lastReleasableYear = currentYear - 1;
641
+ const baseYear = parseInt(BASE_VERSION, 10);
642
+ const dir = cacheDir || getDefaultCacheDir();
643
+ const cachedVersion = findCachedVersion(dir);
644
+ if (cachedVersion) {
645
+ const cachedYear = parseInt(cachedVersion, 10);
646
+ if (cachedYear >= lastReleasableYear) {
647
+ return cachedVersion;
648
+ }
649
+ log(`Checking for newer version...`);
650
+ for (let year = cachedYear + 1; year <= lastReleasableYear; year++) {
651
+ const version = year.toString();
652
+ if (await urlExists(getDownloadUrl(version))) {
653
+ log(`Found ${version}`);
654
+ return version;
655
+ }
656
+ }
657
+ return cachedVersion;
658
+ }
659
+ log(`Checking available versions...`);
660
+ if (await urlExists(getDownloadUrl(BASE_VERSION))) {
661
+ for (let year = baseYear + 1; year <= lastReleasableYear; year++) {
662
+ const version = year.toString();
663
+ if (await urlExists(getDownloadUrl(version))) {
664
+ continue;
665
+ } else {
666
+ return (year - 1).toString();
667
+ }
668
+ }
669
+ return lastReleasableYear.toString();
670
+ }
671
+ for (let year = baseYear + 1; year <= lastReleasableYear; year++) {
672
+ const version = year.toString();
673
+ if (await urlExists(getDownloadUrl(version))) {
674
+ return version;
675
+ }
676
+ }
677
+ throw new Error(
678
+ `No WordNet version found between ${BASE_VERSION} and ${lastReleasableYear}`
679
+ );
680
+ }
681
+ async function downloadWordNet(version, destPath) {
682
+ const url = getDownloadUrl(version);
683
+ const response = await fetch(url);
684
+ if (!response.ok || !response.body) {
685
+ throw new Error(`Failed to download WordNet ${version}: ${response.statusText}`);
686
+ }
687
+ const decompressed = response.body.pipeThrough(new DecompressionStream("gzip"));
688
+ const arrayBuffer = await new Response(decompressed).arrayBuffer();
689
+ const dir = import_node_path.default.dirname(destPath);
690
+ if (!(0, import_node_fs.existsSync)(dir)) {
691
+ (0, import_node_fs.mkdirSync)(dir, { recursive: true });
692
+ }
693
+ (0, import_node_fs.writeFileSync)(destPath, Buffer.from(arrayBuffer));
694
+ }
695
+ function createParser(filePath) {
696
+ const resolvedPath = import_node_path.default.resolve(filePath);
697
+ const fileUrl = resolvedPath.startsWith("/") ? `file://${resolvedPath}` : `file:///${resolvedPath.replace(/\\/g, "/")}`;
698
+ return parse(fileUrl, {
699
+ ignoreDeclaration: false,
700
+ silent: false
701
+ });
702
+ }
703
+ async function parseLexicon(parser) {
704
+ for await (const node of parser) {
705
+ if (node.type === "Lexicon") {
706
+ return LexiconNode(node);
707
+ }
708
+ }
709
+ return void 0;
710
+ }
711
+ async function loadWordNet(filePath) {
712
+ if (!fileExists(filePath)) {
713
+ throw new Error(`WordNet file not found: ${filePath}`);
714
+ }
715
+ const parser = createParser(filePath);
716
+ const lexicon = await parseLexicon(parser);
717
+ if (!lexicon) {
718
+ throw new Error("Failed to parse WordNet: no Lexicon node found");
719
+ }
720
+ return lexicon;
721
+ }
722
+ async function fetchWordNet(options = {}) {
723
+ const cacheDir = options.cacheDir || getDefaultCacheDir();
724
+ const log = options.onProgress || (() => {
725
+ });
726
+ const version = options.version || await findLatestVersion(log, cacheDir);
727
+ const filename = getFilename(version);
728
+ const cachedPath = import_node_path.default.join(cacheDir, filename);
729
+ if (!fileExists(cachedPath) || options.forceDownload) {
730
+ const url = getDownloadUrl(version);
731
+ log(`Downloading WordNet ${version} from ${url}`);
732
+ await downloadWordNet(version, cachedPath);
733
+ log(`Saved to ${cachedPath}`);
734
+ } else {
735
+ log(`Using cached ${cachedPath}`);
736
+ }
737
+ const lexicon = await loadWordNet(cachedPath);
738
+ return { lexicon, version, filePath: cachedPath };
739
+ }
740
+ async function ensureWordNetCached(options = {}) {
741
+ const cacheDir = options.cacheDir || getDefaultCacheDir();
742
+ const log = options.onProgress || (() => {
743
+ });
744
+ const version = options.version || await findLatestVersion(log, cacheDir);
745
+ const filename = getFilename(version);
746
+ const cachedPath = import_node_path.default.join(cacheDir, filename);
747
+ if (!fileExists(cachedPath) || options.forceDownload) {
748
+ const url = getDownloadUrl(version);
749
+ log(`Downloading WordNet ${version} from ${url}`);
750
+ await downloadWordNet(version, cachedPath);
751
+ log(`Saved to ${cachedPath}`);
752
+ } else {
753
+ log(`Using cached ${cachedPath}`);
754
+ }
755
+ return { filePath: cachedPath, version };
756
+ }
757
+ var WORDNET_FILENAME = getFilename(BASE_VERSION);
758
+ var WORDNET_URL = getDownloadUrl(BASE_VERSION);
759
+
760
+ // src/query.ts
761
+ function buildIndex(lexicon) {
762
+ const synsets = /* @__PURE__ */ new Map();
763
+ const senses = /* @__PURE__ */ new Map();
764
+ const entries = /* @__PURE__ */ new Map();
765
+ const byWord = /* @__PURE__ */ new Map();
766
+ const sensesByWord = /* @__PURE__ */ new Map();
767
+ const synsetsByWord = /* @__PURE__ */ new Map();
768
+ for (const synset of lexicon.synsets) {
769
+ synsets.set(synset.id, synset);
770
+ }
771
+ for (const entry of lexicon.lexicalEntries) {
772
+ entries.set(entry.id, entry);
773
+ const word = entry.lemmas[0]?.writtenForm.toLowerCase();
774
+ if (word) {
775
+ const existing = byWord.get(word) || [];
776
+ existing.push(entry);
777
+ byWord.set(word, existing);
778
+ for (const sense of entry.senses) {
779
+ senses.set(sense.id, sense);
780
+ const existingSenses = sensesByWord.get(word) || [];
781
+ existingSenses.push(sense);
782
+ sensesByWord.set(word, existingSenses);
783
+ const synset = synsets.get(sense.synset);
784
+ if (synset) {
785
+ const existingSynsets = synsetsByWord.get(word) || [];
786
+ if (!existingSynsets.includes(synset)) {
787
+ existingSynsets.push(synset);
788
+ synsetsByWord.set(word, existingSynsets);
789
+ }
790
+ }
791
+ }
792
+ }
793
+ }
794
+ return {
795
+ synsets,
796
+ senses,
797
+ entries,
798
+ byWord,
799
+ sensesByWord,
800
+ synsetsByWord,
801
+ lexicon
802
+ };
803
+ }
804
+ function findSynsets(index, word) {
805
+ return index.synsetsByWord.get(word.toLowerCase()) || [];
806
+ }
807
+ function getDefinitions(index, word) {
808
+ const synsets = findSynsets(index, word);
809
+ return synsets.flatMap(
810
+ (synset) => synset.definitions.map((d) => ({
811
+ text: d.inner,
812
+ synset,
813
+ partOfSpeech: synset.partOfSpeech
814
+ }))
815
+ );
816
+ }
817
+ function getRelated(index, synset, relType) {
818
+ return synset.synsetRelations.filter((r) => r.relType === relType).map((r) => index.synsets.get(r.target)).filter((s) => s !== void 0);
819
+ }
820
+ function getHypernyms(index, word) {
821
+ const synsets = findSynsets(index, word);
822
+ return synsets.flatMap((s) => getRelated(index, s, "hypernym"));
823
+ }
824
+ function getHyponyms(index, word) {
825
+ const synsets = findSynsets(index, word);
826
+ return synsets.flatMap((s) => getRelated(index, s, "hyponym"));
827
+ }
828
+ function getSynonyms(index, word) {
829
+ const synsets = findSynsets(index, word);
830
+ const lowerWord = word.toLowerCase();
831
+ const seen = /* @__PURE__ */ new Set();
832
+ const results = [];
833
+ for (const synset of synsets) {
834
+ for (const memberId of synset.members) {
835
+ const entry = index.entries.get(memberId);
836
+ if (entry) {
837
+ const lemma = entry.lemmas[0]?.writtenForm;
838
+ if (lemma && lemma.toLowerCase() !== lowerWord && !seen.has(lemma)) {
839
+ seen.add(lemma);
840
+ results.push({ word: lemma, entry, synset });
841
+ }
842
+ }
843
+ }
844
+ }
845
+ return results;
846
+ }
847
+ function getSynsetWords(index, synset) {
848
+ return synset.members.map((id) => index.entries.get(id)).filter((e) => e !== void 0).map((e) => e.lemmas[0]?.writtenForm).filter((w) => w !== void 0);
849
+ }
850
+
851
+ // src/literals.ts
852
+ var PartsOfSpeech2 = {
853
+ n: "Noun",
854
+ v: "Verb",
855
+ a: "Adjective",
856
+ r: "Adverb",
857
+ s: "Adjective Satellite",
858
+ t: "?",
859
+ c: "Conjunction",
860
+ p: "Adposition (Preposition, postposition, etc.)",
861
+ x: "Other (inc. particle, classifier, bound morphemes, determiners)",
862
+ u: "Unknown"
863
+ };
864
+ var SynsetRelationRelType2 = {
865
+ agent: "Agent",
866
+ also: "See also",
867
+ anto_converse: "Converse antonym",
868
+ anto_gradable: "Gradable antonym",
869
+ anto_simple: "Simple antonym",
870
+ antonym: "Antonym",
871
+ attribute: "Attribute",
872
+ augmentative: "Augmentative",
873
+ be_in_state: "Be in state",
874
+ cause: "Cause",
875
+ causes: "Causes",
876
+ classified_by: "Classified by",
877
+ classifies: "Classifies",
878
+ co_agent_instrument: "Co-agent instrument",
879
+ co_agent_patient: "Co-agent patient",
880
+ co_agent_result: "Co-agent result",
881
+ co_instrument_agent: "Co-instrument agent",
882
+ co_instrument_patient: "Co-instrument patient",
883
+ co_instrument_result: "Co-instrument result",
884
+ co_patient_agent: "Co-patient agent",
885
+ co_patient_instrument: "Co-patient instrument",
886
+ co_result_agent: "Co-result agent",
887
+ co_result_instrument: "Co-result instrument",
888
+ co_role: "Co-role",
889
+ diminutive: "Diminutive",
890
+ direction: "Direction",
891
+ domain_member_region: "Domain member region",
892
+ domain_member_topic: "Domain member topic",
893
+ domain_region: "Domain region",
894
+ domain_topic: "Domain topic",
895
+ entail: "Entail",
896
+ entails: "Entails",
897
+ eq_synonym: "Equivalent synonym",
898
+ exemplifies: "Exemplifies",
899
+ feminine: "Feminine",
900
+ has_augmentative: "Has augmentative",
901
+ has_diminutive: "Has diminutive",
902
+ has_domain_region: "Has domain region",
903
+ has_domain_topic: "Has domain topic",
904
+ has_feminine: "Has feminine",
905
+ has_masculine: "Has masculine",
906
+ has_young: "Has young",
907
+ holo_location: "Holonym location",
908
+ holo_member: "Member holonym",
909
+ holo_part: "Part holonym",
910
+ holo_portion: "Portion holonym",
911
+ holo_substance: "Substance holonym",
912
+ holonym: "Holonym",
913
+ hypernym: "Hypernym",
914
+ hyponym: "Hyponym",
915
+ in_manner: "In manner",
916
+ instance_hypernym: "Instance hypernym",
917
+ instance_hyponym: "Instance hyponym",
918
+ instrument: "Instrument",
919
+ involved: "Involved",
920
+ involved_agent: "Involved agent",
921
+ involved_direction: "Involved direction",
922
+ involved_instrument: "Involved instrument",
923
+ involved_location: "Involved location",
924
+ involved_patient: "Involved patient",
925
+ involved_result: "Involved result",
926
+ involved_source_direction: "Involved source direction",
927
+ involved_target_direction: "Involved target direction",
928
+ ir_synonym: "IR synonym",
929
+ is_caused_by: "Is caused by",
930
+ is_entailed_by: "Is entailed by",
931
+ is_exemplified_by: "Is exemplified by",
932
+ is_subevent_of: "Is subevent of",
933
+ location: "Location",
934
+ manner_of: "Manner of",
935
+ masculine: "Masculine",
936
+ member_holonym: "Member holonym",
937
+ member_meronym: "Member meronym",
938
+ mero_location: "Meronym location",
939
+ mero_member: "Member meronym",
940
+ mero_part: "Part meronym",
941
+ mero_portion: "Portion meronym",
942
+ mero_substance: "Substance meronym",
943
+ meronym: "Meronym",
944
+ other: "Other",
945
+ part_holonym: "Part holonym",
946
+ part_meronym: "Part meronym",
947
+ patient: "Patient",
948
+ restricted_by: "Restricted by",
949
+ restricts: "Restricts",
950
+ result: "Result",
951
+ role: "Role",
952
+ similar: "Similar",
953
+ source_direction: "Source direction",
954
+ state_of: "State of",
955
+ subevent: "Subevent",
956
+ substance_holonym: "Substance holonym",
957
+ substance_meronym: "Substance meronym",
958
+ target_direction: "Target direction",
959
+ young: "Young"
960
+ };
961
+
962
+ // src/cli.ts
963
+ var decode = (s) => decodeXmlEntities(s) ?? "";
964
+ var HELP = `
965
+ synset - WordNet dictionary explorer
966
+
967
+ Usage:
968
+ synset <command> [options]
969
+
970
+ Commands:
971
+ define <word> Show definitions for a word
972
+ synonyms <word> List synonyms for a word
973
+ hypernyms <word> Show hypernyms (more general terms)
974
+ hyponyms <word> Show hyponyms (more specific terms)
975
+ related <word> Show all relations for a word
976
+ info <synset-id> Show details for a synset ID
977
+ fetch Download WordNet data to cache
978
+
979
+ Options:
980
+ --file <path> Use a local WordNet XML file instead of cache
981
+ --help, -h Show this help message
982
+
983
+ Examples:
984
+ synset define dog
985
+ synset synonyms happy
986
+ synset related computer --file ./wordnet.xml
987
+ synset fetch
988
+ `;
989
+ async function main() {
990
+ const args = process.argv.slice(2);
991
+ if (args.length === 0 || args.includes("--help") || args.includes("-h")) {
992
+ console.log(HELP);
993
+ process.exit(0);
994
+ }
995
+ const command = args[0];
996
+ const fileIndex = args.indexOf("--file");
997
+ const filePath = fileIndex !== -1 ? args[fileIndex + 1] : void 0;
998
+ const cleanArgs = fileIndex === -1 ? args : args.filter((_, i) => i !== fileIndex && i !== fileIndex + 1);
999
+ const word = cleanArgs[1];
1000
+ if (command === "fetch") {
1001
+ console.log("Downloading WordNet data...");
1002
+ const { filePath: cachedPath, version } = await ensureWordNetCached({
1003
+ forceDownload: args.includes("--force"),
1004
+ onProgress: console.log
1005
+ });
1006
+ console.log(`WordNet ${version} cached at: ${cachedPath}`);
1007
+ return;
1008
+ }
1009
+ if (!word && command !== "fetch") {
1010
+ console.error(`Error: Missing word argument for command '${command}'`);
1011
+ process.exit(1);
1012
+ }
1013
+ const lexicon = filePath ? await loadWordNet(filePath) : (await fetchWordNet({ onProgress: console.log })).lexicon;
1014
+ const index = buildIndex(lexicon);
1015
+ switch (command) {
1016
+ case "define": {
1017
+ const definitions = getDefinitions(index, word);
1018
+ if (definitions.length === 0) {
1019
+ console.log(`No definitions found for "${word}"`);
1020
+ } else {
1021
+ console.log(`Definitions for "${word}":`);
1022
+ definitions.forEach((def, i) => {
1023
+ const pos = PartsOfSpeech2[def.partOfSpeech] || def.partOfSpeech;
1024
+ console.log(` ${i + 1}. [${pos}] ${decode(def.text)}`);
1025
+ });
1026
+ }
1027
+ break;
1028
+ }
1029
+ case "synonyms": {
1030
+ const synonyms = getSynonyms(index, word);
1031
+ if (synonyms.length === 0) {
1032
+ console.log(`No synonyms found for "${word}"`);
1033
+ } else {
1034
+ console.log(`Synonyms for "${word}":`);
1035
+ console.log(` ${synonyms.map((s) => s.word).join(", ")}`);
1036
+ }
1037
+ break;
1038
+ }
1039
+ case "hypernyms": {
1040
+ const hypernyms = getHypernyms(index, word);
1041
+ if (hypernyms.length === 0) {
1042
+ console.log(`No hypernyms found for "${word}"`);
1043
+ } else {
1044
+ console.log(`Hypernyms for "${word}" (more general):`);
1045
+ hypernyms.forEach((s) => {
1046
+ const words = getSynsetWords(index, s);
1047
+ const def = decode(s.definitions[0]?.inner);
1048
+ console.log(` - ${words.join(", ")}: ${def}`);
1049
+ });
1050
+ }
1051
+ break;
1052
+ }
1053
+ case "hyponyms": {
1054
+ const hyponyms = getHyponyms(index, word);
1055
+ if (hyponyms.length === 0) {
1056
+ console.log(`No hyponyms found for "${word}"`);
1057
+ } else {
1058
+ console.log(`Hyponyms for "${word}" (more specific):`);
1059
+ hyponyms.forEach((s) => {
1060
+ const words = getSynsetWords(index, s);
1061
+ const def = decode(s.definitions[0]?.inner);
1062
+ console.log(` - ${words.join(", ")}: ${def}`);
1063
+ });
1064
+ }
1065
+ break;
1066
+ }
1067
+ case "related": {
1068
+ const synsets = findSynsets(index, word);
1069
+ if (synsets.length === 0) {
1070
+ console.log(`No synsets found for "${word}"`);
1071
+ break;
1072
+ }
1073
+ console.log(`Relations for "${word}":`);
1074
+ for (const synset of synsets) {
1075
+ const pos = PartsOfSpeech2[synset.partOfSpeech] || synset.partOfSpeech;
1076
+ const def = decode(synset.definitions[0]?.inner);
1077
+ console.log(`
1078
+ [${pos}] ${def}`);
1079
+ const relsByType = /* @__PURE__ */ new Map();
1080
+ for (const rel of synset.synsetRelations) {
1081
+ const relatedSynset = index.synsets.get(rel.target);
1082
+ if (relatedSynset) {
1083
+ const words = getSynsetWords(index, relatedSynset);
1084
+ const existing = relsByType.get(rel.relType) || [];
1085
+ existing.push(words.join(", "));
1086
+ relsByType.set(rel.relType, existing);
1087
+ }
1088
+ }
1089
+ for (const [relType, words] of relsByType) {
1090
+ const label = SynsetRelationRelType2[relType] || relType;
1091
+ console.log(` ${label}:`);
1092
+ words.forEach((w) => console.log(` - ${w}`));
1093
+ }
1094
+ }
1095
+ break;
1096
+ }
1097
+ case "info": {
1098
+ const synset = index.synsets.get(word);
1099
+ if (!synset) {
1100
+ console.log(`Synset not found: ${word}`);
1101
+ break;
1102
+ }
1103
+ const pos = PartsOfSpeech2[synset.partOfSpeech] || synset.partOfSpeech;
1104
+ const words = getSynsetWords(index, synset);
1105
+ console.log(`Synset: ${synset.id}`);
1106
+ console.log(`Words: ${words.join(", ")}`);
1107
+ console.log(`Part of Speech: ${pos}`);
1108
+ console.log(`ILI: ${synset.ili}`);
1109
+ console.log(`
1110
+ Definitions:`);
1111
+ synset.definitions.forEach((d) => console.log(` - ${decode(d.inner)}`));
1112
+ if (synset.examples.length > 0) {
1113
+ console.log(`
1114
+ Examples:`);
1115
+ synset.examples.forEach((e) => console.log(` - "${decode(e.inner)}"`));
1116
+ }
1117
+ if (synset.synsetRelations.length > 0) {
1118
+ console.log(`
1119
+ Relations:`);
1120
+ for (const rel of synset.synsetRelations) {
1121
+ const label = SynsetRelationRelType2[rel.relType] || rel.relType;
1122
+ const relatedSynset = index.synsets.get(rel.target);
1123
+ const relatedWords = relatedSynset ? getSynsetWords(index, relatedSynset).join(", ") : rel.target;
1124
+ console.log(` ${label}: ${relatedWords}`);
1125
+ }
1126
+ }
1127
+ break;
1128
+ }
1129
+ default:
1130
+ console.error(`Unknown command: ${command}`);
1131
+ console.log(HELP);
1132
+ process.exit(1);
1133
+ }
1134
+ }
1135
+ main().catch((err) => {
1136
+ console.error("Error:", err.message);
1137
+ process.exit(1);
1138
+ });
1139
+ //# sourceMappingURL=cli.cjs.map