synset 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +190 -0
- package/dist/cli.cjs +1139 -0
- package/dist/cli.cjs.map +1 -0
- package/dist/cli.d.cts +1 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +1116 -0
- package/dist/cli.js.map +1 -0
- package/dist/index.cjs +1139 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +319 -0
- package/dist/index.d.ts +319 -0
- package/dist/index.js +1048 -0
- package/dist/index.js.map +1 -0
- package/package.json +63 -0
package/dist/cli.js
ADDED
|
@@ -0,0 +1,1116 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
// src/loader.ts
|
|
4
|
+
import { existsSync, statSync, writeFileSync, mkdirSync, readdirSync } from "fs";
|
|
5
|
+
import path from "path";
|
|
6
|
+
|
|
7
|
+
// node_modules/@dbushell/xml-streamify/src/node.ts
|
|
8
|
+
var Node = class {
|
|
9
|
+
#type;
|
|
10
|
+
#children;
|
|
11
|
+
#parent;
|
|
12
|
+
#attr;
|
|
13
|
+
#raw;
|
|
14
|
+
constructor(type, parent, raw) {
|
|
15
|
+
this.#type = type;
|
|
16
|
+
this.#parent = parent;
|
|
17
|
+
this.#raw = raw;
|
|
18
|
+
this.#children = [];
|
|
19
|
+
}
|
|
20
|
+
get type() {
|
|
21
|
+
return this.#type;
|
|
22
|
+
}
|
|
23
|
+
get raw() {
|
|
24
|
+
return this.#raw ?? "";
|
|
25
|
+
}
|
|
26
|
+
get parent() {
|
|
27
|
+
return this.#parent;
|
|
28
|
+
}
|
|
29
|
+
get children() {
|
|
30
|
+
return this.#children;
|
|
31
|
+
}
|
|
32
|
+
get attributes() {
|
|
33
|
+
if (this.#attr) {
|
|
34
|
+
return this.#attr;
|
|
35
|
+
}
|
|
36
|
+
this.#attr = {};
|
|
37
|
+
if (this.raw) {
|
|
38
|
+
const regex = /([\w:.-]+)\s*=\s*(["'])(.*?)\2/g;
|
|
39
|
+
let match;
|
|
40
|
+
while ((match = regex.exec(this.raw)) !== null) {
|
|
41
|
+
this.#attr[match[1]] = match[3];
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
return this.#attr;
|
|
45
|
+
}
|
|
46
|
+
get innerText() {
|
|
47
|
+
if (this.children.length) {
|
|
48
|
+
let text = "";
|
|
49
|
+
for (const child of this.children) {
|
|
50
|
+
text += child.innerText;
|
|
51
|
+
}
|
|
52
|
+
return text;
|
|
53
|
+
}
|
|
54
|
+
return (this.raw.match(/<!\[CDATA\[(.*?)]]>/s) ?? [, this.raw])[1];
|
|
55
|
+
}
|
|
56
|
+
addChild(child) {
|
|
57
|
+
this.#children.push(child);
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Returns true if node and parents match the key hierarchy
|
|
61
|
+
* @param keys - XML tag names
|
|
62
|
+
*/
|
|
63
|
+
is(...keys) {
|
|
64
|
+
if (!keys.length) return false;
|
|
65
|
+
let parent;
|
|
66
|
+
for (const key of keys.toReversed()) {
|
|
67
|
+
parent = parent ? parent.parent : this;
|
|
68
|
+
if (parent?.type !== key) {
|
|
69
|
+
return false;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
return true;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Return the first child matching the key
|
|
76
|
+
* @param key - XML tag name
|
|
77
|
+
*/
|
|
78
|
+
first(key) {
|
|
79
|
+
return this.children.find((n) => n.type === key);
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Return all children matching the key hierarchy
|
|
83
|
+
* @param keys - XML tag names
|
|
84
|
+
*/
|
|
85
|
+
all(...keys) {
|
|
86
|
+
let nodes = this.children;
|
|
87
|
+
let found = [];
|
|
88
|
+
for (const [i, k] of Object.entries(keys)) {
|
|
89
|
+
if (Number.parseInt(i) === keys.length - 1) {
|
|
90
|
+
found = nodes.filter((n) => n.type === k);
|
|
91
|
+
break;
|
|
92
|
+
}
|
|
93
|
+
nodes = nodes?.find((n) => n.type === k)?.children;
|
|
94
|
+
if (!nodes) return [];
|
|
95
|
+
}
|
|
96
|
+
return found;
|
|
97
|
+
}
|
|
98
|
+
};
|
|
99
|
+
|
|
100
|
+
// node_modules/@dbushell/xml-streamify/src/stream.ts
|
|
101
|
+
var ENTITIES = {
|
|
102
|
+
cdata: {
|
|
103
|
+
end: "]]>",
|
|
104
|
+
start: /^<!\[CDATA\[/
|
|
105
|
+
},
|
|
106
|
+
comment: {
|
|
107
|
+
end: "-->",
|
|
108
|
+
start: /^<!--/
|
|
109
|
+
},
|
|
110
|
+
declaration: {
|
|
111
|
+
end: "?>",
|
|
112
|
+
start: /^<\?/
|
|
113
|
+
},
|
|
114
|
+
doctype: {
|
|
115
|
+
end: ">",
|
|
116
|
+
start: /^<!DOCTYPE/i
|
|
117
|
+
},
|
|
118
|
+
element: {
|
|
119
|
+
end: ">",
|
|
120
|
+
start: /^<[\w:.-/]/
|
|
121
|
+
}
|
|
122
|
+
};
|
|
123
|
+
var transformer = {
|
|
124
|
+
buf: "",
|
|
125
|
+
state: "skip" /* SKIP */,
|
|
126
|
+
previous: ["skip" /* SKIP */, -1],
|
|
127
|
+
flush(controller) {
|
|
128
|
+
if (this.buf.length > 0) {
|
|
129
|
+
controller.enqueue(["text" /* TEXT */, this.buf]);
|
|
130
|
+
}
|
|
131
|
+
},
|
|
132
|
+
transform(chunk, controller) {
|
|
133
|
+
this.buf += chunk;
|
|
134
|
+
while (this.buf.length) {
|
|
135
|
+
if (this.state === this.previous[0] && this.buf.length === this.previous[1]) {
|
|
136
|
+
break;
|
|
137
|
+
}
|
|
138
|
+
this.previous = [this.state, this.buf.length];
|
|
139
|
+
if (this.state === "skip" /* SKIP */) {
|
|
140
|
+
const index = this.buf.indexOf("<");
|
|
141
|
+
if (index < 0) break;
|
|
142
|
+
controller.enqueue(["text" /* TEXT */, this.buf.substring(0, index)]);
|
|
143
|
+
this.buf = this.buf.substring(index);
|
|
144
|
+
this.state = "search" /* SEARCH */;
|
|
145
|
+
}
|
|
146
|
+
if (this.state === "search" /* SEARCH */) {
|
|
147
|
+
if (this.buf.length < 3) break;
|
|
148
|
+
for (const [state, entity] of Object.entries(ENTITIES)) {
|
|
149
|
+
if (this.buf.match(entity.start)) {
|
|
150
|
+
this.state = state;
|
|
151
|
+
break;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
continue;
|
|
155
|
+
}
|
|
156
|
+
if (Object.hasOwn(ENTITIES, this.state)) {
|
|
157
|
+
const { end } = ENTITIES[this.state];
|
|
158
|
+
const index = this.buf.indexOf(end);
|
|
159
|
+
if (index < 0) break;
|
|
160
|
+
controller.enqueue([
|
|
161
|
+
this.state,
|
|
162
|
+
this.buf.substring(0, index + end.length)
|
|
163
|
+
]);
|
|
164
|
+
this.buf = this.buf.substring(index + end.length);
|
|
165
|
+
this.state = "skip" /* SKIP */;
|
|
166
|
+
continue;
|
|
167
|
+
}
|
|
168
|
+
throw new Error();
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
};
|
|
172
|
+
var XMLStream = class extends TransformStream {
|
|
173
|
+
constructor() {
|
|
174
|
+
super({ ...transformer });
|
|
175
|
+
}
|
|
176
|
+
};
|
|
177
|
+
|
|
178
|
+
// node_modules/@dbushell/xml-streamify/src/parse.ts
|
|
179
|
+
var ignoreTypes = {
|
|
180
|
+
["comment" /* COMMENT */]: "ignoreComments",
|
|
181
|
+
["declaration" /* DECLARATION */]: "ignoreDeclaration",
|
|
182
|
+
["doctype" /* DOCTYPE */]: "ignoreDoctype"
|
|
183
|
+
};
|
|
184
|
+
async function* parse(input, options) {
|
|
185
|
+
const document = new Node("@document");
|
|
186
|
+
try {
|
|
187
|
+
const init = { ...options?.fetchOptions };
|
|
188
|
+
if (options?.signal) {
|
|
189
|
+
init.signal = options.signal;
|
|
190
|
+
}
|
|
191
|
+
let source;
|
|
192
|
+
if (typeof input === "string" || input instanceof URL) {
|
|
193
|
+
input = new URL(input);
|
|
194
|
+
const response = await fetch(input, init);
|
|
195
|
+
if (!response.ok || !response.body) {
|
|
196
|
+
throw new Error(`Bad response`);
|
|
197
|
+
}
|
|
198
|
+
source = response.body;
|
|
199
|
+
} else {
|
|
200
|
+
source = input;
|
|
201
|
+
}
|
|
202
|
+
const stream = source.pipeThrough(new TextDecoderStream()).pipeThrough(new XMLStream(), {
|
|
203
|
+
signal: options?.signal
|
|
204
|
+
});
|
|
205
|
+
let node = document;
|
|
206
|
+
for await (const [type, value] of stream) {
|
|
207
|
+
if (options?.signal?.aborted) {
|
|
208
|
+
break;
|
|
209
|
+
}
|
|
210
|
+
if (type === "text" /* TEXT */) {
|
|
211
|
+
if (options?.ignoreWhitespace !== false && value.trim().length === 0) {
|
|
212
|
+
continue;
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
if (type in ignoreTypes && options?.[ignoreTypes[type]] === false) {
|
|
216
|
+
const newNode = new Node(type, node, value);
|
|
217
|
+
node.addChild(newNode);
|
|
218
|
+
yield newNode;
|
|
219
|
+
continue;
|
|
220
|
+
}
|
|
221
|
+
if (type === "element" /* ELEMENT */) {
|
|
222
|
+
const name = value.match(/<\/?([\w:.-]+)/)[1];
|
|
223
|
+
if (value.endsWith("/>")) {
|
|
224
|
+
const newNode2 = new Node(name, node, value);
|
|
225
|
+
node.addChild(newNode2);
|
|
226
|
+
yield newNode2;
|
|
227
|
+
continue;
|
|
228
|
+
}
|
|
229
|
+
if (value.startsWith("</")) {
|
|
230
|
+
yield node;
|
|
231
|
+
node = node.parent;
|
|
232
|
+
continue;
|
|
233
|
+
}
|
|
234
|
+
const newNode = new Node(name, node, value);
|
|
235
|
+
node.addChild(newNode);
|
|
236
|
+
node = newNode;
|
|
237
|
+
continue;
|
|
238
|
+
}
|
|
239
|
+
node.addChild(new Node(type, node, value));
|
|
240
|
+
}
|
|
241
|
+
} catch (err) {
|
|
242
|
+
if (options?.silent === false) {
|
|
243
|
+
throw err;
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
return document;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
// src/types.ts
|
|
250
|
+
import { z } from "zod";
|
|
251
|
+
var LexiconId = z.string();
|
|
252
|
+
var LexicalEntryId = z.string();
|
|
253
|
+
var SynsetId = z.string();
|
|
254
|
+
var SenseId = z.string();
|
|
255
|
+
var SyntacticBehaviorId = z.string();
|
|
256
|
+
var PartsOfSpeech = z.union([
|
|
257
|
+
z.literal("a"),
|
|
258
|
+
z.literal("c"),
|
|
259
|
+
z.literal("n"),
|
|
260
|
+
z.literal("p"),
|
|
261
|
+
z.literal("r"),
|
|
262
|
+
z.literal("s"),
|
|
263
|
+
z.literal("u"),
|
|
264
|
+
z.literal("v"),
|
|
265
|
+
z.literal("x")
|
|
266
|
+
]);
|
|
267
|
+
var SenseRelationRelType = z.union([
|
|
268
|
+
z.literal("also"),
|
|
269
|
+
z.literal("antonym"),
|
|
270
|
+
z.literal("derivation"),
|
|
271
|
+
z.literal("domain_member_region"),
|
|
272
|
+
z.literal("domain_member_topic"),
|
|
273
|
+
z.literal("domain_region"),
|
|
274
|
+
z.literal("domain_topic"),
|
|
275
|
+
z.literal("exemplifies"),
|
|
276
|
+
z.literal("is_exemplified_by"),
|
|
277
|
+
z.literal("other"),
|
|
278
|
+
// TODO: Then "dc:type" attribute should define what relation
|
|
279
|
+
z.literal("participle"),
|
|
280
|
+
z.literal("pertainym"),
|
|
281
|
+
z.literal("similar")
|
|
282
|
+
]);
|
|
283
|
+
var SynsetRelationRelType = z.union([
|
|
284
|
+
z.literal("also"),
|
|
285
|
+
z.literal("attribute"),
|
|
286
|
+
z.literal("cause"),
|
|
287
|
+
z.literal("causes"),
|
|
288
|
+
z.literal("domain_member_region"),
|
|
289
|
+
z.literal("domain_member_topic"),
|
|
290
|
+
z.literal("domain_region"),
|
|
291
|
+
z.literal("domain_topic"),
|
|
292
|
+
z.literal("entail"),
|
|
293
|
+
z.literal("entails"),
|
|
294
|
+
z.literal("exemplifies"),
|
|
295
|
+
z.literal("has_domain_region"),
|
|
296
|
+
z.literal("has_domain_topic"),
|
|
297
|
+
z.literal("holo_member"),
|
|
298
|
+
z.literal("holo_part"),
|
|
299
|
+
z.literal("holo_substance"),
|
|
300
|
+
z.literal("hypernym"),
|
|
301
|
+
z.literal("hyponym"),
|
|
302
|
+
z.literal("instance_hypernym"),
|
|
303
|
+
z.literal("instance_hyponym"),
|
|
304
|
+
z.literal("is_caused_by"),
|
|
305
|
+
z.literal("is_entailed_by"),
|
|
306
|
+
z.literal("is_exemplified_by"),
|
|
307
|
+
z.literal("member_holonym"),
|
|
308
|
+
z.literal("member_meronym"),
|
|
309
|
+
z.literal("mero_member"),
|
|
310
|
+
z.literal("mero_part"),
|
|
311
|
+
z.literal("mero_substance"),
|
|
312
|
+
z.literal("part_holonym"),
|
|
313
|
+
z.literal("part_meronym"),
|
|
314
|
+
z.literal("similar"),
|
|
315
|
+
z.literal("substance_holonym"),
|
|
316
|
+
z.literal("substance_meronym")
|
|
317
|
+
]);
|
|
318
|
+
var AdjPosition = z.union([
|
|
319
|
+
z.literal("a"),
|
|
320
|
+
z.literal("ip"),
|
|
321
|
+
z.literal("p")
|
|
322
|
+
]);
|
|
323
|
+
var Pronunciation = z.object({
|
|
324
|
+
variety: z.string().optional(),
|
|
325
|
+
// TODO: "GB", "US", ...
|
|
326
|
+
inner: z.string()
|
|
327
|
+
// Actual value
|
|
328
|
+
});
|
|
329
|
+
var Lemma = z.object({
|
|
330
|
+
writtenForm: z.string(),
|
|
331
|
+
// Actual value
|
|
332
|
+
partOfSpeech: PartsOfSpeech,
|
|
333
|
+
pronunciations: z.array(Pronunciation).min(0)
|
|
334
|
+
});
|
|
335
|
+
var SenseRelation = z.object({
|
|
336
|
+
relType: SenseRelationRelType,
|
|
337
|
+
dcType: z.string().optional(),
|
|
338
|
+
// TODO: This is only when relType is "other"
|
|
339
|
+
target: SenseId
|
|
340
|
+
});
|
|
341
|
+
var Sense = z.object({
|
|
342
|
+
id: SenseId,
|
|
343
|
+
synset: SynsetId,
|
|
344
|
+
subCat: SyntacticBehaviorId.optional(),
|
|
345
|
+
adjPosition: AdjPosition.optional(),
|
|
346
|
+
senseRelations: z.array(SenseRelation).min(0)
|
|
347
|
+
});
|
|
348
|
+
var Form = z.object({
|
|
349
|
+
writtenForm: z.string()
|
|
350
|
+
// This is where huge variety lives
|
|
351
|
+
});
|
|
352
|
+
var LexicalEntry = z.object({
|
|
353
|
+
id: LexicalEntryId,
|
|
354
|
+
lemmas: z.array(Lemma).length(1),
|
|
355
|
+
senses: z.array(Sense).min(1),
|
|
356
|
+
forms: z.array(Form).min(0)
|
|
357
|
+
});
|
|
358
|
+
var Definition = z.object({
|
|
359
|
+
inner: z.string()
|
|
360
|
+
// Actual value
|
|
361
|
+
});
|
|
362
|
+
var Example = z.object({
|
|
363
|
+
inner: z.string(),
|
|
364
|
+
// Actual value
|
|
365
|
+
dcSource: z.string().optional()
|
|
366
|
+
});
|
|
367
|
+
var ILIDefinition = z.object({
|
|
368
|
+
inner: z.string()
|
|
369
|
+
// Actual value
|
|
370
|
+
});
|
|
371
|
+
var SynsetRelation = z.object({
|
|
372
|
+
relType: SynsetRelationRelType,
|
|
373
|
+
target: SynsetId
|
|
374
|
+
});
|
|
375
|
+
var Synset = z.object({
|
|
376
|
+
id: SynsetId,
|
|
377
|
+
ili: z.string(),
|
|
378
|
+
members: z.array(LexicalEntryId).min(1),
|
|
379
|
+
// space-separated list of refs that we unwrap to array
|
|
380
|
+
partOfSpeech: PartsOfSpeech,
|
|
381
|
+
lexfile: z.string(),
|
|
382
|
+
dcSource: z.string().optional(),
|
|
383
|
+
definitions: z.array(Definition).min(1),
|
|
384
|
+
examples: z.array(Example).min(0),
|
|
385
|
+
iliDefinitions: z.array(ILIDefinition).min(0),
|
|
386
|
+
synsetRelations: z.array(SynsetRelation).min(0)
|
|
387
|
+
});
|
|
388
|
+
var SyntacticBehavior = z.object({
|
|
389
|
+
id: SyntacticBehaviorId,
|
|
390
|
+
subcategorizationFrame: z.string()
|
|
391
|
+
// Sentence structure. This is where (not very huge) variety lives
|
|
392
|
+
});
|
|
393
|
+
var Lexicon = z.object({
|
|
394
|
+
id: LexiconId,
|
|
395
|
+
// "oewn"
|
|
396
|
+
label: z.string(),
|
|
397
|
+
// "Open English WordNet"
|
|
398
|
+
language: z.string(),
|
|
399
|
+
// "en"
|
|
400
|
+
email: z.string(),
|
|
401
|
+
// "english-wordnet@googlegroups.com"
|
|
402
|
+
license: z.string(),
|
|
403
|
+
// "https://creativecommons.org/licenses/by/4.0/"
|
|
404
|
+
version: z.string(),
|
|
405
|
+
// "2023"
|
|
406
|
+
url: z.string(),
|
|
407
|
+
// "https://github.com/globalwordnet/english-wordnet">
|
|
408
|
+
citation: z.string().optional(),
|
|
409
|
+
// "John P. McCrae, Alexandre Rademaker, Francis Bond, Ewa Rudnicka and Christiane Fellbaum (2019) English WordNet 2019 – An Open-Source WordNet for English, *Proceedings of the 10th Global WordNet Conference* – GWC 2019"
|
|
410
|
+
lexicalEntries: z.array(LexicalEntry).min(0),
|
|
411
|
+
synsets: z.array(Synset).min(0),
|
|
412
|
+
syntacticBehaviors: z.array(SyntacticBehavior).min(0)
|
|
413
|
+
});
|
|
414
|
+
var partsOfSpeechList = PartsOfSpeech.options.map((v) => v.value);
|
|
415
|
+
|
|
416
|
+
// src/helpers.ts
|
|
417
|
+
function PronunciationNode(node) {
|
|
418
|
+
const obj = {
|
|
419
|
+
variety: optAttr(node, "variety"),
|
|
420
|
+
inner: node.innerText
|
|
421
|
+
};
|
|
422
|
+
return Pronunciation.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
423
|
+
}
|
|
424
|
+
function LemmaNode(node) {
|
|
425
|
+
const obj = {
|
|
426
|
+
writtenForm: attr(node, "writtenForm"),
|
|
427
|
+
partOfSpeech: PartsOfSpeech.parse(attr(node, "partOfSpeech")),
|
|
428
|
+
pronunciations: (
|
|
429
|
+
//
|
|
430
|
+
children(node, "Pronunciation", (v) => PronunciationNode(v))
|
|
431
|
+
)
|
|
432
|
+
};
|
|
433
|
+
return Lemma.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
434
|
+
}
|
|
435
|
+
function SenseRelationNode(node) {
|
|
436
|
+
const obj = {
|
|
437
|
+
relType: SenseRelationRelType.parse(attr(node, "relType")),
|
|
438
|
+
target: attr(node, "target"),
|
|
439
|
+
dcType: optAttr(node, "dc:type")
|
|
440
|
+
};
|
|
441
|
+
return SenseRelation.parse(
|
|
442
|
+
extendWithRestAttr(node, obj, (s) => s == "dc:type" ? "dcType" : s)
|
|
443
|
+
);
|
|
444
|
+
}
|
|
445
|
+
function SenseNode(node) {
|
|
446
|
+
const adjPos = optAttr(node, "adjposition");
|
|
447
|
+
const obj = {
|
|
448
|
+
id: attr(node, "id"),
|
|
449
|
+
synset: SynsetId.parse(attr(node, "synset")),
|
|
450
|
+
senseRelations: children(node, "SenseRelation", SenseRelationNode),
|
|
451
|
+
subCat: optAttr(node, "subcat"),
|
|
452
|
+
adjPosition: adjPos ? AdjPosition.parse(adjPos) : void 0
|
|
453
|
+
};
|
|
454
|
+
return Sense.parse(
|
|
455
|
+
extendWithRestAttr(
|
|
456
|
+
node,
|
|
457
|
+
obj,
|
|
458
|
+
(s) => s == "subcat" ? "subCat" : s == "adjposition" ? "adjPosition" : s
|
|
459
|
+
)
|
|
460
|
+
);
|
|
461
|
+
}
|
|
462
|
+
function FormNode(node) {
|
|
463
|
+
const obj = {
|
|
464
|
+
writtenForm: attr(node, "writtenForm")
|
|
465
|
+
};
|
|
466
|
+
return Form.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
467
|
+
}
|
|
468
|
+
function LexicalEntryNode(node) {
|
|
469
|
+
const obj = {
|
|
470
|
+
id: attr(node, "id"),
|
|
471
|
+
lemmas: children(node, "Lemma", LemmaNode),
|
|
472
|
+
senses: children(node, "Sense", SenseNode),
|
|
473
|
+
forms: children(node, "Form", FormNode)
|
|
474
|
+
};
|
|
475
|
+
return LexicalEntry.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
476
|
+
}
|
|
477
|
+
function DefinitionNode(node) {
|
|
478
|
+
const obj = {
|
|
479
|
+
inner: node.innerText
|
|
480
|
+
};
|
|
481
|
+
return Definition.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
482
|
+
}
|
|
483
|
+
function ExampleNode(node) {
|
|
484
|
+
const obj = {
|
|
485
|
+
inner: node.innerText,
|
|
486
|
+
dcSource: optAttr(node, "dc:source")
|
|
487
|
+
};
|
|
488
|
+
return Example.parse(
|
|
489
|
+
extendWithRestAttr(node, obj, (s) => s == "dc:source" ? "dcSource" : s)
|
|
490
|
+
);
|
|
491
|
+
}
|
|
492
|
+
function ILIDefinitionNode(node) {
|
|
493
|
+
const obj = {
|
|
494
|
+
inner: node.innerText
|
|
495
|
+
};
|
|
496
|
+
return ILIDefinition.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
497
|
+
}
|
|
498
|
+
function SynsetRelationNode(node) {
|
|
499
|
+
const obj = {
|
|
500
|
+
relType: SynsetRelationRelType.parse(attr(node, "relType")),
|
|
501
|
+
target: attr(node, "target")
|
|
502
|
+
};
|
|
503
|
+
return SynsetRelation.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
504
|
+
}
|
|
505
|
+
function SyntacticBehaviorNode(node) {
|
|
506
|
+
const obj = {
|
|
507
|
+
id: attr(node, "id"),
|
|
508
|
+
subcategorizationFrame: attr(node, "subcategorizationFrame")
|
|
509
|
+
};
|
|
510
|
+
return SyntacticBehavior.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
511
|
+
}
|
|
512
|
+
function SynsetNode(node) {
|
|
513
|
+
const obj = {
|
|
514
|
+
id: attr(node, "id"),
|
|
515
|
+
ili: attr(node, "ili"),
|
|
516
|
+
lexfile: attr(node, "lexfile"),
|
|
517
|
+
members: attr(node, "members").split(" "),
|
|
518
|
+
dcSource: optAttr(node, "dc:source"),
|
|
519
|
+
partOfSpeech: PartsOfSpeech.parse(attr(node, "partOfSpeech")),
|
|
520
|
+
definitions: children(node, "Definition", (v) => DefinitionNode(v)),
|
|
521
|
+
examples: children(node, "Example", (v) => ExampleNode(v)),
|
|
522
|
+
iliDefinitions: children(node, "ILIDefinition", ILIDefinitionNode),
|
|
523
|
+
synsetRelations: children(node, "SynsetRelation", SynsetRelationNode)
|
|
524
|
+
};
|
|
525
|
+
return Synset.parse(
|
|
526
|
+
extendWithRestAttr(node, obj, (s) => s == "dc:source" ? "dcSource" : s)
|
|
527
|
+
);
|
|
528
|
+
}
|
|
529
|
+
function LexiconNode(node) {
|
|
530
|
+
const obj = {
|
|
531
|
+
id: attr(node, "id"),
|
|
532
|
+
label: attr(node, "label"),
|
|
533
|
+
language: attr(node, "language"),
|
|
534
|
+
email: attr(node, "email"),
|
|
535
|
+
license: attr(node, "license"),
|
|
536
|
+
version: attr(node, "version"),
|
|
537
|
+
citation: optAttr(node, "citation"),
|
|
538
|
+
url: attr(node, "url"),
|
|
539
|
+
lexicalEntries: children(node, "LexicalEntry", LexicalEntryNode),
|
|
540
|
+
synsets: children(node, "Synset", SynsetNode),
|
|
541
|
+
syntacticBehaviors: (
|
|
542
|
+
//
|
|
543
|
+
children(node, "SyntacticBehaviour", SyntacticBehaviorNode)
|
|
544
|
+
)
|
|
545
|
+
};
|
|
546
|
+
return Lexicon.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
547
|
+
}
|
|
548
|
+
var decodeXmlEntities = (s) => {
|
|
549
|
+
if (s === void 0) return void 0;
|
|
550
|
+
return s.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/'/g, "'").replace(/"/g, '"');
|
|
551
|
+
};
|
|
552
|
+
var attr = (node, attrName) => {
|
|
553
|
+
const value = decodeXmlEntities(node.attributes[attrName]);
|
|
554
|
+
if (value === void 0) {
|
|
555
|
+
throw new Error(`Missing required attribute "${attrName}" on node "${node.type}"`);
|
|
556
|
+
}
|
|
557
|
+
return value;
|
|
558
|
+
};
|
|
559
|
+
var optAttr = (node, attrName) => {
|
|
560
|
+
return decodeXmlEntities(node.attributes[attrName]);
|
|
561
|
+
};
|
|
562
|
+
var restAttrs = (node, obj, proxy) => {
|
|
563
|
+
const result = {};
|
|
564
|
+
Object.keys(node.attributes).filter((a) => !(proxy(a) in obj)).forEach((k) => {
|
|
565
|
+
result[k] = decodeXmlEntities(node.attributes[k]) ?? node.attributes[k];
|
|
566
|
+
});
|
|
567
|
+
return result;
|
|
568
|
+
};
|
|
569
|
+
var extendWithRestAttr = (node, obj, proxy) => {
|
|
570
|
+
return Object.assign(obj, restAttrs(node, obj, proxy));
|
|
571
|
+
};
|
|
572
|
+
var children = (node, type, fn) => {
|
|
573
|
+
return node.children.filter((v) => v.type == type).map((v) => fn(v));
|
|
574
|
+
};
|
|
575
|
+
|
|
576
|
+
// src/loader.ts
|
|
577
|
+
var BASE_VERSION = "2024";
|
|
578
|
+
function getFilename(version) {
|
|
579
|
+
return `english-wordnet-${version}.xml`;
|
|
580
|
+
}
|
|
581
|
+
function getDownloadUrl(version) {
|
|
582
|
+
return `https://en-word.net/static/${getFilename(version)}.gz`;
|
|
583
|
+
}
|
|
584
|
+
function getDefaultCacheDir() {
|
|
585
|
+
const homeDir = process.env.HOME || process.env.USERPROFILE || ".";
|
|
586
|
+
return path.join(homeDir, ".cache", "synset");
|
|
587
|
+
}
|
|
588
|
+
function fileExists(filePath) {
|
|
589
|
+
if (existsSync(filePath)) {
|
|
590
|
+
const stat = statSync(filePath);
|
|
591
|
+
return stat.isFile();
|
|
592
|
+
}
|
|
593
|
+
return false;
|
|
594
|
+
}
|
|
595
|
+
async function urlExists(url) {
|
|
596
|
+
try {
|
|
597
|
+
const response = await fetch(url, { method: "HEAD" });
|
|
598
|
+
return response.ok;
|
|
599
|
+
} catch {
|
|
600
|
+
return false;
|
|
601
|
+
}
|
|
602
|
+
}
|
|
603
|
+
function extractVersionFromFilename(filename) {
|
|
604
|
+
const match = filename.match(/english-wordnet-(\d{4})\.xml/);
|
|
605
|
+
return match ? parseInt(match[1], 10) : null;
|
|
606
|
+
}
|
|
607
|
+
function findCachedVersion(cacheDir) {
|
|
608
|
+
if (!existsSync(cacheDir)) return null;
|
|
609
|
+
const files = readdirSync(cacheDir);
|
|
610
|
+
const wordnetFiles = files.map((f) => ({ file: f, year: extractVersionFromFilename(f) })).filter((x) => x.year !== null).sort((a, b) => b.year - a.year);
|
|
611
|
+
return wordnetFiles.length > 0 ? wordnetFiles[0].year.toString() : null;
|
|
612
|
+
}
|
|
613
|
+
async function findLatestVersion(onProgress, cacheDir) {
|
|
614
|
+
const log = onProgress || (() => {
|
|
615
|
+
});
|
|
616
|
+
const currentYear = (/* @__PURE__ */ new Date()).getFullYear();
|
|
617
|
+
const lastReleasableYear = currentYear - 1;
|
|
618
|
+
const baseYear = parseInt(BASE_VERSION, 10);
|
|
619
|
+
const dir = cacheDir || getDefaultCacheDir();
|
|
620
|
+
const cachedVersion = findCachedVersion(dir);
|
|
621
|
+
if (cachedVersion) {
|
|
622
|
+
const cachedYear = parseInt(cachedVersion, 10);
|
|
623
|
+
if (cachedYear >= lastReleasableYear) {
|
|
624
|
+
return cachedVersion;
|
|
625
|
+
}
|
|
626
|
+
log(`Checking for newer version...`);
|
|
627
|
+
for (let year = cachedYear + 1; year <= lastReleasableYear; year++) {
|
|
628
|
+
const version = year.toString();
|
|
629
|
+
if (await urlExists(getDownloadUrl(version))) {
|
|
630
|
+
log(`Found ${version}`);
|
|
631
|
+
return version;
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
return cachedVersion;
|
|
635
|
+
}
|
|
636
|
+
log(`Checking available versions...`);
|
|
637
|
+
if (await urlExists(getDownloadUrl(BASE_VERSION))) {
|
|
638
|
+
for (let year = baseYear + 1; year <= lastReleasableYear; year++) {
|
|
639
|
+
const version = year.toString();
|
|
640
|
+
if (await urlExists(getDownloadUrl(version))) {
|
|
641
|
+
continue;
|
|
642
|
+
} else {
|
|
643
|
+
return (year - 1).toString();
|
|
644
|
+
}
|
|
645
|
+
}
|
|
646
|
+
return lastReleasableYear.toString();
|
|
647
|
+
}
|
|
648
|
+
for (let year = baseYear + 1; year <= lastReleasableYear; year++) {
|
|
649
|
+
const version = year.toString();
|
|
650
|
+
if (await urlExists(getDownloadUrl(version))) {
|
|
651
|
+
return version;
|
|
652
|
+
}
|
|
653
|
+
}
|
|
654
|
+
throw new Error(
|
|
655
|
+
`No WordNet version found between ${BASE_VERSION} and ${lastReleasableYear}`
|
|
656
|
+
);
|
|
657
|
+
}
|
|
658
|
+
async function downloadWordNet(version, destPath) {
|
|
659
|
+
const url = getDownloadUrl(version);
|
|
660
|
+
const response = await fetch(url);
|
|
661
|
+
if (!response.ok || !response.body) {
|
|
662
|
+
throw new Error(`Failed to download WordNet ${version}: ${response.statusText}`);
|
|
663
|
+
}
|
|
664
|
+
const decompressed = response.body.pipeThrough(new DecompressionStream("gzip"));
|
|
665
|
+
const arrayBuffer = await new Response(decompressed).arrayBuffer();
|
|
666
|
+
const dir = path.dirname(destPath);
|
|
667
|
+
if (!existsSync(dir)) {
|
|
668
|
+
mkdirSync(dir, { recursive: true });
|
|
669
|
+
}
|
|
670
|
+
writeFileSync(destPath, Buffer.from(arrayBuffer));
|
|
671
|
+
}
|
|
672
|
+
function createParser(filePath) {
|
|
673
|
+
const resolvedPath = path.resolve(filePath);
|
|
674
|
+
const fileUrl = resolvedPath.startsWith("/") ? `file://${resolvedPath}` : `file:///${resolvedPath.replace(/\\/g, "/")}`;
|
|
675
|
+
return parse(fileUrl, {
|
|
676
|
+
ignoreDeclaration: false,
|
|
677
|
+
silent: false
|
|
678
|
+
});
|
|
679
|
+
}
|
|
680
|
+
async function parseLexicon(parser) {
|
|
681
|
+
for await (const node of parser) {
|
|
682
|
+
if (node.type === "Lexicon") {
|
|
683
|
+
return LexiconNode(node);
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
return void 0;
|
|
687
|
+
}
|
|
688
|
+
async function loadWordNet(filePath) {
|
|
689
|
+
if (!fileExists(filePath)) {
|
|
690
|
+
throw new Error(`WordNet file not found: ${filePath}`);
|
|
691
|
+
}
|
|
692
|
+
const parser = createParser(filePath);
|
|
693
|
+
const lexicon = await parseLexicon(parser);
|
|
694
|
+
if (!lexicon) {
|
|
695
|
+
throw new Error("Failed to parse WordNet: no Lexicon node found");
|
|
696
|
+
}
|
|
697
|
+
return lexicon;
|
|
698
|
+
}
|
|
699
|
+
async function fetchWordNet(options = {}) {
|
|
700
|
+
const cacheDir = options.cacheDir || getDefaultCacheDir();
|
|
701
|
+
const log = options.onProgress || (() => {
|
|
702
|
+
});
|
|
703
|
+
const version = options.version || await findLatestVersion(log, cacheDir);
|
|
704
|
+
const filename = getFilename(version);
|
|
705
|
+
const cachedPath = path.join(cacheDir, filename);
|
|
706
|
+
if (!fileExists(cachedPath) || options.forceDownload) {
|
|
707
|
+
const url = getDownloadUrl(version);
|
|
708
|
+
log(`Downloading WordNet ${version} from ${url}`);
|
|
709
|
+
await downloadWordNet(version, cachedPath);
|
|
710
|
+
log(`Saved to ${cachedPath}`);
|
|
711
|
+
} else {
|
|
712
|
+
log(`Using cached ${cachedPath}`);
|
|
713
|
+
}
|
|
714
|
+
const lexicon = await loadWordNet(cachedPath);
|
|
715
|
+
return { lexicon, version, filePath: cachedPath };
|
|
716
|
+
}
|
|
717
|
+
async function ensureWordNetCached(options = {}) {
|
|
718
|
+
const cacheDir = options.cacheDir || getDefaultCacheDir();
|
|
719
|
+
const log = options.onProgress || (() => {
|
|
720
|
+
});
|
|
721
|
+
const version = options.version || await findLatestVersion(log, cacheDir);
|
|
722
|
+
const filename = getFilename(version);
|
|
723
|
+
const cachedPath = path.join(cacheDir, filename);
|
|
724
|
+
if (!fileExists(cachedPath) || options.forceDownload) {
|
|
725
|
+
const url = getDownloadUrl(version);
|
|
726
|
+
log(`Downloading WordNet ${version} from ${url}`);
|
|
727
|
+
await downloadWordNet(version, cachedPath);
|
|
728
|
+
log(`Saved to ${cachedPath}`);
|
|
729
|
+
} else {
|
|
730
|
+
log(`Using cached ${cachedPath}`);
|
|
731
|
+
}
|
|
732
|
+
return { filePath: cachedPath, version };
|
|
733
|
+
}
|
|
734
|
+
var WORDNET_FILENAME = getFilename(BASE_VERSION);
|
|
735
|
+
var WORDNET_URL = getDownloadUrl(BASE_VERSION);
|
|
736
|
+
|
|
737
|
+
// src/query.ts
|
|
738
|
+
function buildIndex(lexicon) {
|
|
739
|
+
const synsets = /* @__PURE__ */ new Map();
|
|
740
|
+
const senses = /* @__PURE__ */ new Map();
|
|
741
|
+
const entries = /* @__PURE__ */ new Map();
|
|
742
|
+
const byWord = /* @__PURE__ */ new Map();
|
|
743
|
+
const sensesByWord = /* @__PURE__ */ new Map();
|
|
744
|
+
const synsetsByWord = /* @__PURE__ */ new Map();
|
|
745
|
+
for (const synset of lexicon.synsets) {
|
|
746
|
+
synsets.set(synset.id, synset);
|
|
747
|
+
}
|
|
748
|
+
for (const entry of lexicon.lexicalEntries) {
|
|
749
|
+
entries.set(entry.id, entry);
|
|
750
|
+
const word = entry.lemmas[0]?.writtenForm.toLowerCase();
|
|
751
|
+
if (word) {
|
|
752
|
+
const existing = byWord.get(word) || [];
|
|
753
|
+
existing.push(entry);
|
|
754
|
+
byWord.set(word, existing);
|
|
755
|
+
for (const sense of entry.senses) {
|
|
756
|
+
senses.set(sense.id, sense);
|
|
757
|
+
const existingSenses = sensesByWord.get(word) || [];
|
|
758
|
+
existingSenses.push(sense);
|
|
759
|
+
sensesByWord.set(word, existingSenses);
|
|
760
|
+
const synset = synsets.get(sense.synset);
|
|
761
|
+
if (synset) {
|
|
762
|
+
const existingSynsets = synsetsByWord.get(word) || [];
|
|
763
|
+
if (!existingSynsets.includes(synset)) {
|
|
764
|
+
existingSynsets.push(synset);
|
|
765
|
+
synsetsByWord.set(word, existingSynsets);
|
|
766
|
+
}
|
|
767
|
+
}
|
|
768
|
+
}
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
return {
|
|
772
|
+
synsets,
|
|
773
|
+
senses,
|
|
774
|
+
entries,
|
|
775
|
+
byWord,
|
|
776
|
+
sensesByWord,
|
|
777
|
+
synsetsByWord,
|
|
778
|
+
lexicon
|
|
779
|
+
};
|
|
780
|
+
}
|
|
781
|
+
function findSynsets(index, word) {
|
|
782
|
+
return index.synsetsByWord.get(word.toLowerCase()) || [];
|
|
783
|
+
}
|
|
784
|
+
function getDefinitions(index, word) {
|
|
785
|
+
const synsets = findSynsets(index, word);
|
|
786
|
+
return synsets.flatMap(
|
|
787
|
+
(synset) => synset.definitions.map((d) => ({
|
|
788
|
+
text: d.inner,
|
|
789
|
+
synset,
|
|
790
|
+
partOfSpeech: synset.partOfSpeech
|
|
791
|
+
}))
|
|
792
|
+
);
|
|
793
|
+
}
|
|
794
|
+
function getRelated(index, synset, relType) {
|
|
795
|
+
return synset.synsetRelations.filter((r) => r.relType === relType).map((r) => index.synsets.get(r.target)).filter((s) => s !== void 0);
|
|
796
|
+
}
|
|
797
|
+
function getHypernyms(index, word) {
|
|
798
|
+
const synsets = findSynsets(index, word);
|
|
799
|
+
return synsets.flatMap((s) => getRelated(index, s, "hypernym"));
|
|
800
|
+
}
|
|
801
|
+
function getHyponyms(index, word) {
|
|
802
|
+
const synsets = findSynsets(index, word);
|
|
803
|
+
return synsets.flatMap((s) => getRelated(index, s, "hyponym"));
|
|
804
|
+
}
|
|
805
|
+
function getSynonyms(index, word) {
|
|
806
|
+
const synsets = findSynsets(index, word);
|
|
807
|
+
const lowerWord = word.toLowerCase();
|
|
808
|
+
const seen = /* @__PURE__ */ new Set();
|
|
809
|
+
const results = [];
|
|
810
|
+
for (const synset of synsets) {
|
|
811
|
+
for (const memberId of synset.members) {
|
|
812
|
+
const entry = index.entries.get(memberId);
|
|
813
|
+
if (entry) {
|
|
814
|
+
const lemma = entry.lemmas[0]?.writtenForm;
|
|
815
|
+
if (lemma && lemma.toLowerCase() !== lowerWord && !seen.has(lemma)) {
|
|
816
|
+
seen.add(lemma);
|
|
817
|
+
results.push({ word: lemma, entry, synset });
|
|
818
|
+
}
|
|
819
|
+
}
|
|
820
|
+
}
|
|
821
|
+
}
|
|
822
|
+
return results;
|
|
823
|
+
}
|
|
824
|
+
function getSynsetWords(index, synset) {
|
|
825
|
+
return synset.members.map((id) => index.entries.get(id)).filter((e) => e !== void 0).map((e) => e.lemmas[0]?.writtenForm).filter((w) => w !== void 0);
|
|
826
|
+
}
|
|
827
|
+
|
|
828
|
+
// src/literals.ts
|
|
829
|
+
var PartsOfSpeech2 = {
|
|
830
|
+
n: "Noun",
|
|
831
|
+
v: "Verb",
|
|
832
|
+
a: "Adjective",
|
|
833
|
+
r: "Adverb",
|
|
834
|
+
s: "Adjective Satellite",
|
|
835
|
+
t: "?",
|
|
836
|
+
c: "Conjunction",
|
|
837
|
+
p: "Adposition (Preposition, postposition, etc.)",
|
|
838
|
+
x: "Other (inc. particle, classifier, bound morphemes, determiners)",
|
|
839
|
+
u: "Unknown"
|
|
840
|
+
};
|
|
841
|
+
var SynsetRelationRelType2 = {
|
|
842
|
+
agent: "Agent",
|
|
843
|
+
also: "See also",
|
|
844
|
+
anto_converse: "Converse antonym",
|
|
845
|
+
anto_gradable: "Gradable antonym",
|
|
846
|
+
anto_simple: "Simple antonym",
|
|
847
|
+
antonym: "Antonym",
|
|
848
|
+
attribute: "Attribute",
|
|
849
|
+
augmentative: "Augmentative",
|
|
850
|
+
be_in_state: "Be in state",
|
|
851
|
+
cause: "Cause",
|
|
852
|
+
causes: "Causes",
|
|
853
|
+
classified_by: "Classified by",
|
|
854
|
+
classifies: "Classifies",
|
|
855
|
+
co_agent_instrument: "Co-agent instrument",
|
|
856
|
+
co_agent_patient: "Co-agent patient",
|
|
857
|
+
co_agent_result: "Co-agent result",
|
|
858
|
+
co_instrument_agent: "Co-instrument agent",
|
|
859
|
+
co_instrument_patient: "Co-instrument patient",
|
|
860
|
+
co_instrument_result: "Co-instrument result",
|
|
861
|
+
co_patient_agent: "Co-patient agent",
|
|
862
|
+
co_patient_instrument: "Co-patient instrument",
|
|
863
|
+
co_result_agent: "Co-result agent",
|
|
864
|
+
co_result_instrument: "Co-result instrument",
|
|
865
|
+
co_role: "Co-role",
|
|
866
|
+
diminutive: "Diminutive",
|
|
867
|
+
direction: "Direction",
|
|
868
|
+
domain_member_region: "Domain member region",
|
|
869
|
+
domain_member_topic: "Domain member topic",
|
|
870
|
+
domain_region: "Domain region",
|
|
871
|
+
domain_topic: "Domain topic",
|
|
872
|
+
entail: "Entail",
|
|
873
|
+
entails: "Entails",
|
|
874
|
+
eq_synonym: "Equivalent synonym",
|
|
875
|
+
exemplifies: "Exemplifies",
|
|
876
|
+
feminine: "Feminine",
|
|
877
|
+
has_augmentative: "Has augmentative",
|
|
878
|
+
has_diminutive: "Has diminutive",
|
|
879
|
+
has_domain_region: "Has domain region",
|
|
880
|
+
has_domain_topic: "Has domain topic",
|
|
881
|
+
has_feminine: "Has feminine",
|
|
882
|
+
has_masculine: "Has masculine",
|
|
883
|
+
has_young: "Has young",
|
|
884
|
+
holo_location: "Holonym location",
|
|
885
|
+
holo_member: "Member holonym",
|
|
886
|
+
holo_part: "Part holonym",
|
|
887
|
+
holo_portion: "Portion holonym",
|
|
888
|
+
holo_substance: "Substance holonym",
|
|
889
|
+
holonym: "Holonym",
|
|
890
|
+
hypernym: "Hypernym",
|
|
891
|
+
hyponym: "Hyponym",
|
|
892
|
+
in_manner: "In manner",
|
|
893
|
+
instance_hypernym: "Instance hypernym",
|
|
894
|
+
instance_hyponym: "Instance hyponym",
|
|
895
|
+
instrument: "Instrument",
|
|
896
|
+
involved: "Involved",
|
|
897
|
+
involved_agent: "Involved agent",
|
|
898
|
+
involved_direction: "Involved direction",
|
|
899
|
+
involved_instrument: "Involved instrument",
|
|
900
|
+
involved_location: "Involved location",
|
|
901
|
+
involved_patient: "Involved patient",
|
|
902
|
+
involved_result: "Involved result",
|
|
903
|
+
involved_source_direction: "Involved source direction",
|
|
904
|
+
involved_target_direction: "Involved target direction",
|
|
905
|
+
ir_synonym: "IR synonym",
|
|
906
|
+
is_caused_by: "Is caused by",
|
|
907
|
+
is_entailed_by: "Is entailed by",
|
|
908
|
+
is_exemplified_by: "Is exemplified by",
|
|
909
|
+
is_subevent_of: "Is subevent of",
|
|
910
|
+
location: "Location",
|
|
911
|
+
manner_of: "Manner of",
|
|
912
|
+
masculine: "Masculine",
|
|
913
|
+
member_holonym: "Member holonym",
|
|
914
|
+
member_meronym: "Member meronym",
|
|
915
|
+
mero_location: "Meronym location",
|
|
916
|
+
mero_member: "Member meronym",
|
|
917
|
+
mero_part: "Part meronym",
|
|
918
|
+
mero_portion: "Portion meronym",
|
|
919
|
+
mero_substance: "Substance meronym",
|
|
920
|
+
meronym: "Meronym",
|
|
921
|
+
other: "Other",
|
|
922
|
+
part_holonym: "Part holonym",
|
|
923
|
+
part_meronym: "Part meronym",
|
|
924
|
+
patient: "Patient",
|
|
925
|
+
restricted_by: "Restricted by",
|
|
926
|
+
restricts: "Restricts",
|
|
927
|
+
result: "Result",
|
|
928
|
+
role: "Role",
|
|
929
|
+
similar: "Similar",
|
|
930
|
+
source_direction: "Source direction",
|
|
931
|
+
state_of: "State of",
|
|
932
|
+
subevent: "Subevent",
|
|
933
|
+
substance_holonym: "Substance holonym",
|
|
934
|
+
substance_meronym: "Substance meronym",
|
|
935
|
+
target_direction: "Target direction",
|
|
936
|
+
young: "Young"
|
|
937
|
+
};
|
|
938
|
+
|
|
939
|
+
// src/cli.ts
|
|
940
|
+
var decode = (s) => decodeXmlEntities(s) ?? "";
|
|
941
|
+
var HELP = `
|
|
942
|
+
synset - WordNet dictionary explorer
|
|
943
|
+
|
|
944
|
+
Usage:
|
|
945
|
+
synset <command> [options]
|
|
946
|
+
|
|
947
|
+
Commands:
|
|
948
|
+
define <word> Show definitions for a word
|
|
949
|
+
synonyms <word> List synonyms for a word
|
|
950
|
+
hypernyms <word> Show hypernyms (more general terms)
|
|
951
|
+
hyponyms <word> Show hyponyms (more specific terms)
|
|
952
|
+
related <word> Show all relations for a word
|
|
953
|
+
info <synset-id> Show details for a synset ID
|
|
954
|
+
fetch Download WordNet data to cache
|
|
955
|
+
|
|
956
|
+
Options:
|
|
957
|
+
--file <path> Use a local WordNet XML file instead of cache
|
|
958
|
+
--help, -h Show this help message
|
|
959
|
+
|
|
960
|
+
Examples:
|
|
961
|
+
synset define dog
|
|
962
|
+
synset synonyms happy
|
|
963
|
+
synset related computer --file ./wordnet.xml
|
|
964
|
+
synset fetch
|
|
965
|
+
`;
|
|
966
|
+
async function main() {
|
|
967
|
+
const args = process.argv.slice(2);
|
|
968
|
+
if (args.length === 0 || args.includes("--help") || args.includes("-h")) {
|
|
969
|
+
console.log(HELP);
|
|
970
|
+
process.exit(0);
|
|
971
|
+
}
|
|
972
|
+
const command = args[0];
|
|
973
|
+
const fileIndex = args.indexOf("--file");
|
|
974
|
+
const filePath = fileIndex !== -1 ? args[fileIndex + 1] : void 0;
|
|
975
|
+
const cleanArgs = fileIndex === -1 ? args : args.filter((_, i) => i !== fileIndex && i !== fileIndex + 1);
|
|
976
|
+
const word = cleanArgs[1];
|
|
977
|
+
if (command === "fetch") {
|
|
978
|
+
console.log("Downloading WordNet data...");
|
|
979
|
+
const { filePath: cachedPath, version } = await ensureWordNetCached({
|
|
980
|
+
forceDownload: args.includes("--force"),
|
|
981
|
+
onProgress: console.log
|
|
982
|
+
});
|
|
983
|
+
console.log(`WordNet ${version} cached at: ${cachedPath}`);
|
|
984
|
+
return;
|
|
985
|
+
}
|
|
986
|
+
if (!word && command !== "fetch") {
|
|
987
|
+
console.error(`Error: Missing word argument for command '${command}'`);
|
|
988
|
+
process.exit(1);
|
|
989
|
+
}
|
|
990
|
+
const lexicon = filePath ? await loadWordNet(filePath) : (await fetchWordNet({ onProgress: console.log })).lexicon;
|
|
991
|
+
const index = buildIndex(lexicon);
|
|
992
|
+
switch (command) {
|
|
993
|
+
case "define": {
|
|
994
|
+
const definitions = getDefinitions(index, word);
|
|
995
|
+
if (definitions.length === 0) {
|
|
996
|
+
console.log(`No definitions found for "${word}"`);
|
|
997
|
+
} else {
|
|
998
|
+
console.log(`Definitions for "${word}":`);
|
|
999
|
+
definitions.forEach((def, i) => {
|
|
1000
|
+
const pos = PartsOfSpeech2[def.partOfSpeech] || def.partOfSpeech;
|
|
1001
|
+
console.log(` ${i + 1}. [${pos}] ${decode(def.text)}`);
|
|
1002
|
+
});
|
|
1003
|
+
}
|
|
1004
|
+
break;
|
|
1005
|
+
}
|
|
1006
|
+
case "synonyms": {
|
|
1007
|
+
const synonyms = getSynonyms(index, word);
|
|
1008
|
+
if (synonyms.length === 0) {
|
|
1009
|
+
console.log(`No synonyms found for "${word}"`);
|
|
1010
|
+
} else {
|
|
1011
|
+
console.log(`Synonyms for "${word}":`);
|
|
1012
|
+
console.log(` ${synonyms.map((s) => s.word).join(", ")}`);
|
|
1013
|
+
}
|
|
1014
|
+
break;
|
|
1015
|
+
}
|
|
1016
|
+
case "hypernyms": {
|
|
1017
|
+
const hypernyms = getHypernyms(index, word);
|
|
1018
|
+
if (hypernyms.length === 0) {
|
|
1019
|
+
console.log(`No hypernyms found for "${word}"`);
|
|
1020
|
+
} else {
|
|
1021
|
+
console.log(`Hypernyms for "${word}" (more general):`);
|
|
1022
|
+
hypernyms.forEach((s) => {
|
|
1023
|
+
const words = getSynsetWords(index, s);
|
|
1024
|
+
const def = decode(s.definitions[0]?.inner);
|
|
1025
|
+
console.log(` - ${words.join(", ")}: ${def}`);
|
|
1026
|
+
});
|
|
1027
|
+
}
|
|
1028
|
+
break;
|
|
1029
|
+
}
|
|
1030
|
+
case "hyponyms": {
|
|
1031
|
+
const hyponyms = getHyponyms(index, word);
|
|
1032
|
+
if (hyponyms.length === 0) {
|
|
1033
|
+
console.log(`No hyponyms found for "${word}"`);
|
|
1034
|
+
} else {
|
|
1035
|
+
console.log(`Hyponyms for "${word}" (more specific):`);
|
|
1036
|
+
hyponyms.forEach((s) => {
|
|
1037
|
+
const words = getSynsetWords(index, s);
|
|
1038
|
+
const def = decode(s.definitions[0]?.inner);
|
|
1039
|
+
console.log(` - ${words.join(", ")}: ${def}`);
|
|
1040
|
+
});
|
|
1041
|
+
}
|
|
1042
|
+
break;
|
|
1043
|
+
}
|
|
1044
|
+
case "related": {
|
|
1045
|
+
const synsets = findSynsets(index, word);
|
|
1046
|
+
if (synsets.length === 0) {
|
|
1047
|
+
console.log(`No synsets found for "${word}"`);
|
|
1048
|
+
break;
|
|
1049
|
+
}
|
|
1050
|
+
console.log(`Relations for "${word}":`);
|
|
1051
|
+
for (const synset of synsets) {
|
|
1052
|
+
const pos = PartsOfSpeech2[synset.partOfSpeech] || synset.partOfSpeech;
|
|
1053
|
+
const def = decode(synset.definitions[0]?.inner);
|
|
1054
|
+
console.log(`
|
|
1055
|
+
[${pos}] ${def}`);
|
|
1056
|
+
const relsByType = /* @__PURE__ */ new Map();
|
|
1057
|
+
for (const rel of synset.synsetRelations) {
|
|
1058
|
+
const relatedSynset = index.synsets.get(rel.target);
|
|
1059
|
+
if (relatedSynset) {
|
|
1060
|
+
const words = getSynsetWords(index, relatedSynset);
|
|
1061
|
+
const existing = relsByType.get(rel.relType) || [];
|
|
1062
|
+
existing.push(words.join(", "));
|
|
1063
|
+
relsByType.set(rel.relType, existing);
|
|
1064
|
+
}
|
|
1065
|
+
}
|
|
1066
|
+
for (const [relType, words] of relsByType) {
|
|
1067
|
+
const label = SynsetRelationRelType2[relType] || relType;
|
|
1068
|
+
console.log(` ${label}:`);
|
|
1069
|
+
words.forEach((w) => console.log(` - ${w}`));
|
|
1070
|
+
}
|
|
1071
|
+
}
|
|
1072
|
+
break;
|
|
1073
|
+
}
|
|
1074
|
+
case "info": {
|
|
1075
|
+
const synset = index.synsets.get(word);
|
|
1076
|
+
if (!synset) {
|
|
1077
|
+
console.log(`Synset not found: ${word}`);
|
|
1078
|
+
break;
|
|
1079
|
+
}
|
|
1080
|
+
const pos = PartsOfSpeech2[synset.partOfSpeech] || synset.partOfSpeech;
|
|
1081
|
+
const words = getSynsetWords(index, synset);
|
|
1082
|
+
console.log(`Synset: ${synset.id}`);
|
|
1083
|
+
console.log(`Words: ${words.join(", ")}`);
|
|
1084
|
+
console.log(`Part of Speech: ${pos}`);
|
|
1085
|
+
console.log(`ILI: ${synset.ili}`);
|
|
1086
|
+
console.log(`
|
|
1087
|
+
Definitions:`);
|
|
1088
|
+
synset.definitions.forEach((d) => console.log(` - ${decode(d.inner)}`));
|
|
1089
|
+
if (synset.examples.length > 0) {
|
|
1090
|
+
console.log(`
|
|
1091
|
+
Examples:`);
|
|
1092
|
+
synset.examples.forEach((e) => console.log(` - "${decode(e.inner)}"`));
|
|
1093
|
+
}
|
|
1094
|
+
if (synset.synsetRelations.length > 0) {
|
|
1095
|
+
console.log(`
|
|
1096
|
+
Relations:`);
|
|
1097
|
+
for (const rel of synset.synsetRelations) {
|
|
1098
|
+
const label = SynsetRelationRelType2[rel.relType] || rel.relType;
|
|
1099
|
+
const relatedSynset = index.synsets.get(rel.target);
|
|
1100
|
+
const relatedWords = relatedSynset ? getSynsetWords(index, relatedSynset).join(", ") : rel.target;
|
|
1101
|
+
console.log(` ${label}: ${relatedWords}`);
|
|
1102
|
+
}
|
|
1103
|
+
}
|
|
1104
|
+
break;
|
|
1105
|
+
}
|
|
1106
|
+
default:
|
|
1107
|
+
console.error(`Unknown command: ${command}`);
|
|
1108
|
+
console.log(HELP);
|
|
1109
|
+
process.exit(1);
|
|
1110
|
+
}
|
|
1111
|
+
}
|
|
1112
|
+
main().catch((err) => {
|
|
1113
|
+
console.error("Error:", err.message);
|
|
1114
|
+
process.exit(1);
|
|
1115
|
+
});
|
|
1116
|
+
//# sourceMappingURL=cli.js.map
|