synset 0.9.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -44
- package/dist/cli.cjs +392 -382
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +400 -383
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +175 -166
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +7 -7
- package/dist/index.d.ts +7 -7
- package/dist/index.js +183 -167
- package/dist/index.js.map +1 -1
- package/package.json +10 -5
package/dist/cli.cjs
CHANGED
|
@@ -23,252 +23,6 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
|
|
|
23
23
|
mod
|
|
24
24
|
));
|
|
25
25
|
|
|
26
|
-
// src/loader.ts
|
|
27
|
-
var import_node_fs = require("fs");
|
|
28
|
-
var import_node_path = __toESM(require("path"), 1);
|
|
29
|
-
|
|
30
|
-
// node_modules/@dbushell/xml-streamify/src/node.ts
|
|
31
|
-
var Node = class {
|
|
32
|
-
#type;
|
|
33
|
-
#children;
|
|
34
|
-
#parent;
|
|
35
|
-
#attr;
|
|
36
|
-
#raw;
|
|
37
|
-
constructor(type, parent, raw) {
|
|
38
|
-
this.#type = type;
|
|
39
|
-
this.#parent = parent;
|
|
40
|
-
this.#raw = raw;
|
|
41
|
-
this.#children = [];
|
|
42
|
-
}
|
|
43
|
-
get type() {
|
|
44
|
-
return this.#type;
|
|
45
|
-
}
|
|
46
|
-
get raw() {
|
|
47
|
-
return this.#raw ?? "";
|
|
48
|
-
}
|
|
49
|
-
get parent() {
|
|
50
|
-
return this.#parent;
|
|
51
|
-
}
|
|
52
|
-
get children() {
|
|
53
|
-
return this.#children;
|
|
54
|
-
}
|
|
55
|
-
get attributes() {
|
|
56
|
-
if (this.#attr) {
|
|
57
|
-
return this.#attr;
|
|
58
|
-
}
|
|
59
|
-
this.#attr = {};
|
|
60
|
-
if (this.raw) {
|
|
61
|
-
const regex = /([\w:.-]+)\s*=\s*(["'])(.*?)\2/g;
|
|
62
|
-
let match;
|
|
63
|
-
while ((match = regex.exec(this.raw)) !== null) {
|
|
64
|
-
this.#attr[match[1]] = match[3];
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
return this.#attr;
|
|
68
|
-
}
|
|
69
|
-
get innerText() {
|
|
70
|
-
if (this.children.length) {
|
|
71
|
-
let text = "";
|
|
72
|
-
for (const child of this.children) {
|
|
73
|
-
text += child.innerText;
|
|
74
|
-
}
|
|
75
|
-
return text;
|
|
76
|
-
}
|
|
77
|
-
return (this.raw.match(/<!\[CDATA\[(.*?)]]>/s) ?? [, this.raw])[1];
|
|
78
|
-
}
|
|
79
|
-
addChild(child) {
|
|
80
|
-
this.#children.push(child);
|
|
81
|
-
}
|
|
82
|
-
/**
|
|
83
|
-
* Returns true if node and parents match the key hierarchy
|
|
84
|
-
* @param keys - XML tag names
|
|
85
|
-
*/
|
|
86
|
-
is(...keys) {
|
|
87
|
-
if (!keys.length) return false;
|
|
88
|
-
let parent;
|
|
89
|
-
for (const key of keys.toReversed()) {
|
|
90
|
-
parent = parent ? parent.parent : this;
|
|
91
|
-
if (parent?.type !== key) {
|
|
92
|
-
return false;
|
|
93
|
-
}
|
|
94
|
-
}
|
|
95
|
-
return true;
|
|
96
|
-
}
|
|
97
|
-
/**
|
|
98
|
-
* Return the first child matching the key
|
|
99
|
-
* @param key - XML tag name
|
|
100
|
-
*/
|
|
101
|
-
first(key) {
|
|
102
|
-
return this.children.find((n) => n.type === key);
|
|
103
|
-
}
|
|
104
|
-
/**
|
|
105
|
-
* Return all children matching the key hierarchy
|
|
106
|
-
* @param keys - XML tag names
|
|
107
|
-
*/
|
|
108
|
-
all(...keys) {
|
|
109
|
-
let nodes = this.children;
|
|
110
|
-
let found = [];
|
|
111
|
-
for (const [i, k] of Object.entries(keys)) {
|
|
112
|
-
if (Number.parseInt(i) === keys.length - 1) {
|
|
113
|
-
found = nodes.filter((n) => n.type === k);
|
|
114
|
-
break;
|
|
115
|
-
}
|
|
116
|
-
nodes = nodes?.find((n) => n.type === k)?.children;
|
|
117
|
-
if (!nodes) return [];
|
|
118
|
-
}
|
|
119
|
-
return found;
|
|
120
|
-
}
|
|
121
|
-
};
|
|
122
|
-
|
|
123
|
-
// node_modules/@dbushell/xml-streamify/src/stream.ts
|
|
124
|
-
var ENTITIES = {
|
|
125
|
-
cdata: {
|
|
126
|
-
end: "]]>",
|
|
127
|
-
start: /^<!\[CDATA\[/
|
|
128
|
-
},
|
|
129
|
-
comment: {
|
|
130
|
-
end: "-->",
|
|
131
|
-
start: /^<!--/
|
|
132
|
-
},
|
|
133
|
-
declaration: {
|
|
134
|
-
end: "?>",
|
|
135
|
-
start: /^<\?/
|
|
136
|
-
},
|
|
137
|
-
doctype: {
|
|
138
|
-
end: ">",
|
|
139
|
-
start: /^<!DOCTYPE/i
|
|
140
|
-
},
|
|
141
|
-
element: {
|
|
142
|
-
end: ">",
|
|
143
|
-
start: /^<[\w:.-/]/
|
|
144
|
-
}
|
|
145
|
-
};
|
|
146
|
-
var transformer = {
|
|
147
|
-
buf: "",
|
|
148
|
-
state: "skip" /* SKIP */,
|
|
149
|
-
previous: ["skip" /* SKIP */, -1],
|
|
150
|
-
flush(controller) {
|
|
151
|
-
if (this.buf.length > 0) {
|
|
152
|
-
controller.enqueue(["text" /* TEXT */, this.buf]);
|
|
153
|
-
}
|
|
154
|
-
},
|
|
155
|
-
transform(chunk, controller) {
|
|
156
|
-
this.buf += chunk;
|
|
157
|
-
while (this.buf.length) {
|
|
158
|
-
if (this.state === this.previous[0] && this.buf.length === this.previous[1]) {
|
|
159
|
-
break;
|
|
160
|
-
}
|
|
161
|
-
this.previous = [this.state, this.buf.length];
|
|
162
|
-
if (this.state === "skip" /* SKIP */) {
|
|
163
|
-
const index = this.buf.indexOf("<");
|
|
164
|
-
if (index < 0) break;
|
|
165
|
-
controller.enqueue(["text" /* TEXT */, this.buf.substring(0, index)]);
|
|
166
|
-
this.buf = this.buf.substring(index);
|
|
167
|
-
this.state = "search" /* SEARCH */;
|
|
168
|
-
}
|
|
169
|
-
if (this.state === "search" /* SEARCH */) {
|
|
170
|
-
if (this.buf.length < 3) break;
|
|
171
|
-
for (const [state, entity] of Object.entries(ENTITIES)) {
|
|
172
|
-
if (this.buf.match(entity.start)) {
|
|
173
|
-
this.state = state;
|
|
174
|
-
break;
|
|
175
|
-
}
|
|
176
|
-
}
|
|
177
|
-
continue;
|
|
178
|
-
}
|
|
179
|
-
if (Object.hasOwn(ENTITIES, this.state)) {
|
|
180
|
-
const { end } = ENTITIES[this.state];
|
|
181
|
-
const index = this.buf.indexOf(end);
|
|
182
|
-
if (index < 0) break;
|
|
183
|
-
controller.enqueue([
|
|
184
|
-
this.state,
|
|
185
|
-
this.buf.substring(0, index + end.length)
|
|
186
|
-
]);
|
|
187
|
-
this.buf = this.buf.substring(index + end.length);
|
|
188
|
-
this.state = "skip" /* SKIP */;
|
|
189
|
-
continue;
|
|
190
|
-
}
|
|
191
|
-
throw new Error();
|
|
192
|
-
}
|
|
193
|
-
}
|
|
194
|
-
};
|
|
195
|
-
var XMLStream = class extends TransformStream {
|
|
196
|
-
constructor() {
|
|
197
|
-
super({ ...transformer });
|
|
198
|
-
}
|
|
199
|
-
};
|
|
200
|
-
|
|
201
|
-
// node_modules/@dbushell/xml-streamify/src/parse.ts
|
|
202
|
-
var ignoreTypes = {
|
|
203
|
-
["comment" /* COMMENT */]: "ignoreComments",
|
|
204
|
-
["declaration" /* DECLARATION */]: "ignoreDeclaration",
|
|
205
|
-
["doctype" /* DOCTYPE */]: "ignoreDoctype"
|
|
206
|
-
};
|
|
207
|
-
async function* parse(input, options) {
|
|
208
|
-
const document = new Node("@document");
|
|
209
|
-
try {
|
|
210
|
-
const init = { ...options?.fetchOptions };
|
|
211
|
-
if (options?.signal) {
|
|
212
|
-
init.signal = options.signal;
|
|
213
|
-
}
|
|
214
|
-
let source;
|
|
215
|
-
if (typeof input === "string" || input instanceof URL) {
|
|
216
|
-
input = new URL(input);
|
|
217
|
-
const response = await fetch(input, init);
|
|
218
|
-
if (!response.ok || !response.body) {
|
|
219
|
-
throw new Error(`Bad response`);
|
|
220
|
-
}
|
|
221
|
-
source = response.body;
|
|
222
|
-
} else {
|
|
223
|
-
source = input;
|
|
224
|
-
}
|
|
225
|
-
const stream = source.pipeThrough(new TextDecoderStream()).pipeThrough(new XMLStream(), {
|
|
226
|
-
signal: options?.signal
|
|
227
|
-
});
|
|
228
|
-
let node = document;
|
|
229
|
-
for await (const [type, value] of stream) {
|
|
230
|
-
if (options?.signal?.aborted) {
|
|
231
|
-
break;
|
|
232
|
-
}
|
|
233
|
-
if (type === "text" /* TEXT */) {
|
|
234
|
-
if (options?.ignoreWhitespace !== false && value.trim().length === 0) {
|
|
235
|
-
continue;
|
|
236
|
-
}
|
|
237
|
-
}
|
|
238
|
-
if (type in ignoreTypes && options?.[ignoreTypes[type]] === false) {
|
|
239
|
-
const newNode = new Node(type, node, value);
|
|
240
|
-
node.addChild(newNode);
|
|
241
|
-
yield newNode;
|
|
242
|
-
continue;
|
|
243
|
-
}
|
|
244
|
-
if (type === "element" /* ELEMENT */) {
|
|
245
|
-
const name = value.match(/<\/?([\w:.-]+)/)[1];
|
|
246
|
-
if (value.endsWith("/>")) {
|
|
247
|
-
const newNode2 = new Node(name, node, value);
|
|
248
|
-
node.addChild(newNode2);
|
|
249
|
-
yield newNode2;
|
|
250
|
-
continue;
|
|
251
|
-
}
|
|
252
|
-
if (value.startsWith("</")) {
|
|
253
|
-
yield node;
|
|
254
|
-
node = node.parent;
|
|
255
|
-
continue;
|
|
256
|
-
}
|
|
257
|
-
const newNode = new Node(name, node, value);
|
|
258
|
-
node.addChild(newNode);
|
|
259
|
-
node = newNode;
|
|
260
|
-
continue;
|
|
261
|
-
}
|
|
262
|
-
node.addChild(new Node(type, node, value));
|
|
263
|
-
}
|
|
264
|
-
} catch (err) {
|
|
265
|
-
if (options?.silent === false) {
|
|
266
|
-
throw err;
|
|
267
|
-
}
|
|
268
|
-
}
|
|
269
|
-
return document;
|
|
270
|
-
}
|
|
271
|
-
|
|
272
26
|
// src/types.ts
|
|
273
27
|
var import_zod = require("zod");
|
|
274
28
|
var LexiconId = import_zod.z.string();
|
|
@@ -434,7 +188,9 @@ var Lexicon = import_zod.z.object({
|
|
|
434
188
|
synsets: import_zod.z.array(Synset).min(0),
|
|
435
189
|
syntacticBehaviors: import_zod.z.array(SyntacticBehavior).min(0)
|
|
436
190
|
});
|
|
437
|
-
var partsOfSpeechList = PartsOfSpeech.options.map(
|
|
191
|
+
var partsOfSpeechList = PartsOfSpeech.options.map(
|
|
192
|
+
(v) => v.value
|
|
193
|
+
);
|
|
438
194
|
|
|
439
195
|
// src/helpers.ts
|
|
440
196
|
function PronunciationNode(node) {
|
|
@@ -462,7 +218,7 @@ function SenseRelationNode(node) {
|
|
|
462
218
|
dcType: optAttr(node, "dc:type")
|
|
463
219
|
};
|
|
464
220
|
return SenseRelation.parse(
|
|
465
|
-
extendWithRestAttr(node, obj, (s) => s
|
|
221
|
+
extendWithRestAttr(node, obj, (s) => s === "dc:type" ? "dcType" : s)
|
|
466
222
|
);
|
|
467
223
|
}
|
|
468
224
|
function SenseNode(node) {
|
|
@@ -478,7 +234,7 @@ function SenseNode(node) {
|
|
|
478
234
|
extendWithRestAttr(
|
|
479
235
|
node,
|
|
480
236
|
obj,
|
|
481
|
-
(s) => s
|
|
237
|
+
(s) => s === "subcat" ? "subCat" : s === "adjposition" ? "adjPosition" : s
|
|
482
238
|
)
|
|
483
239
|
);
|
|
484
240
|
}
|
|
@@ -509,7 +265,7 @@ function ExampleNode(node) {
|
|
|
509
265
|
dcSource: optAttr(node, "dc:source")
|
|
510
266
|
};
|
|
511
267
|
return Example.parse(
|
|
512
|
-
extendWithRestAttr(node, obj, (s) => s
|
|
268
|
+
extendWithRestAttr(node, obj, (s) => s === "dc:source" ? "dcSource" : s)
|
|
513
269
|
);
|
|
514
270
|
}
|
|
515
271
|
function ILIDefinitionNode(node) {
|
|
@@ -546,7 +302,7 @@ function SynsetNode(node) {
|
|
|
546
302
|
synsetRelations: children(node, "SynsetRelation", SynsetRelationNode)
|
|
547
303
|
};
|
|
548
304
|
return Synset.parse(
|
|
549
|
-
extendWithRestAttr(node, obj, (s) => s
|
|
305
|
+
extendWithRestAttr(node, obj, (s) => s === "dc:source" ? "dcSource" : s)
|
|
550
306
|
);
|
|
551
307
|
}
|
|
552
308
|
function LexiconNode(node) {
|
|
@@ -575,7 +331,9 @@ var decodeXmlEntities = (s) => {
|
|
|
575
331
|
var attr = (node, attrName) => {
|
|
576
332
|
const value = decodeXmlEntities(node.attributes[attrName]);
|
|
577
333
|
if (value === void 0) {
|
|
578
|
-
throw new Error(
|
|
334
|
+
throw new Error(
|
|
335
|
+
`Missing required attribute "${attrName}" on node "${node.type}"`
|
|
336
|
+
);
|
|
579
337
|
}
|
|
580
338
|
return value;
|
|
581
339
|
};
|
|
@@ -593,19 +351,377 @@ var extendWithRestAttr = (node, obj, proxy) => {
|
|
|
593
351
|
return Object.assign(obj, restAttrs(node, obj, proxy));
|
|
594
352
|
};
|
|
595
353
|
var children = (node, type, fn) => {
|
|
596
|
-
return node.children.filter((v) => v.type
|
|
354
|
+
return node.children.filter((v) => v.type === type).map((v) => fn(v));
|
|
597
355
|
};
|
|
598
356
|
|
|
599
|
-
// src/
|
|
600
|
-
var
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
357
|
+
// src/literals.ts
|
|
358
|
+
var PartsOfSpeech2 = {
|
|
359
|
+
n: "Noun",
|
|
360
|
+
v: "Verb",
|
|
361
|
+
a: "Adjective",
|
|
362
|
+
r: "Adverb",
|
|
363
|
+
s: "Adjective Satellite",
|
|
364
|
+
t: "?",
|
|
365
|
+
c: "Conjunction",
|
|
366
|
+
p: "Adposition (Preposition, postposition, etc.)",
|
|
367
|
+
x: "Other (inc. particle, classifier, bound morphemes, determiners)",
|
|
368
|
+
u: "Unknown"
|
|
369
|
+
};
|
|
370
|
+
var SynsetRelationRelType2 = {
|
|
371
|
+
agent: "Agent",
|
|
372
|
+
also: "See also",
|
|
373
|
+
anto_converse: "Converse antonym",
|
|
374
|
+
anto_gradable: "Gradable antonym",
|
|
375
|
+
anto_simple: "Simple antonym",
|
|
376
|
+
antonym: "Antonym",
|
|
377
|
+
attribute: "Attribute",
|
|
378
|
+
augmentative: "Augmentative",
|
|
379
|
+
be_in_state: "Be in state",
|
|
380
|
+
cause: "Cause",
|
|
381
|
+
causes: "Causes",
|
|
382
|
+
classified_by: "Classified by",
|
|
383
|
+
classifies: "Classifies",
|
|
384
|
+
co_agent_instrument: "Co-agent instrument",
|
|
385
|
+
co_agent_patient: "Co-agent patient",
|
|
386
|
+
co_agent_result: "Co-agent result",
|
|
387
|
+
co_instrument_agent: "Co-instrument agent",
|
|
388
|
+
co_instrument_patient: "Co-instrument patient",
|
|
389
|
+
co_instrument_result: "Co-instrument result",
|
|
390
|
+
co_patient_agent: "Co-patient agent",
|
|
391
|
+
co_patient_instrument: "Co-patient instrument",
|
|
392
|
+
co_result_agent: "Co-result agent",
|
|
393
|
+
co_result_instrument: "Co-result instrument",
|
|
394
|
+
co_role: "Co-role",
|
|
395
|
+
diminutive: "Diminutive",
|
|
396
|
+
direction: "Direction",
|
|
397
|
+
domain_member_region: "Domain member region",
|
|
398
|
+
domain_member_topic: "Domain member topic",
|
|
399
|
+
domain_region: "Domain region",
|
|
400
|
+
domain_topic: "Domain topic",
|
|
401
|
+
entail: "Entail",
|
|
402
|
+
entails: "Entails",
|
|
403
|
+
eq_synonym: "Equivalent synonym",
|
|
404
|
+
exemplifies: "Exemplifies",
|
|
405
|
+
feminine: "Feminine",
|
|
406
|
+
has_augmentative: "Has augmentative",
|
|
407
|
+
has_diminutive: "Has diminutive",
|
|
408
|
+
has_domain_region: "Has domain region",
|
|
409
|
+
has_domain_topic: "Has domain topic",
|
|
410
|
+
has_feminine: "Has feminine",
|
|
411
|
+
has_masculine: "Has masculine",
|
|
412
|
+
has_young: "Has young",
|
|
413
|
+
holo_location: "Holonym location",
|
|
414
|
+
holo_member: "Member holonym",
|
|
415
|
+
holo_part: "Part holonym",
|
|
416
|
+
holo_portion: "Portion holonym",
|
|
417
|
+
holo_substance: "Substance holonym",
|
|
418
|
+
holonym: "Holonym",
|
|
419
|
+
hypernym: "Hypernym",
|
|
420
|
+
hyponym: "Hyponym",
|
|
421
|
+
in_manner: "In manner",
|
|
422
|
+
instance_hypernym: "Instance hypernym",
|
|
423
|
+
instance_hyponym: "Instance hyponym",
|
|
424
|
+
instrument: "Instrument",
|
|
425
|
+
involved: "Involved",
|
|
426
|
+
involved_agent: "Involved agent",
|
|
427
|
+
involved_direction: "Involved direction",
|
|
428
|
+
involved_instrument: "Involved instrument",
|
|
429
|
+
involved_location: "Involved location",
|
|
430
|
+
involved_patient: "Involved patient",
|
|
431
|
+
involved_result: "Involved result",
|
|
432
|
+
involved_source_direction: "Involved source direction",
|
|
433
|
+
involved_target_direction: "Involved target direction",
|
|
434
|
+
ir_synonym: "IR synonym",
|
|
435
|
+
is_caused_by: "Is caused by",
|
|
436
|
+
is_entailed_by: "Is entailed by",
|
|
437
|
+
is_exemplified_by: "Is exemplified by",
|
|
438
|
+
is_subevent_of: "Is subevent of",
|
|
439
|
+
location: "Location",
|
|
440
|
+
manner_of: "Manner of",
|
|
441
|
+
masculine: "Masculine",
|
|
442
|
+
member_holonym: "Member holonym",
|
|
443
|
+
member_meronym: "Member meronym",
|
|
444
|
+
mero_location: "Meronym location",
|
|
445
|
+
mero_member: "Member meronym",
|
|
446
|
+
mero_part: "Part meronym",
|
|
447
|
+
mero_portion: "Portion meronym",
|
|
448
|
+
mero_substance: "Substance meronym",
|
|
449
|
+
meronym: "Meronym",
|
|
450
|
+
other: "Other",
|
|
451
|
+
part_holonym: "Part holonym",
|
|
452
|
+
part_meronym: "Part meronym",
|
|
453
|
+
patient: "Patient",
|
|
454
|
+
restricted_by: "Restricted by",
|
|
455
|
+
restricts: "Restricts",
|
|
456
|
+
result: "Result",
|
|
457
|
+
role: "Role",
|
|
458
|
+
similar: "Similar",
|
|
459
|
+
source_direction: "Source direction",
|
|
460
|
+
state_of: "State of",
|
|
461
|
+
subevent: "Subevent",
|
|
462
|
+
substance_holonym: "Substance holonym",
|
|
463
|
+
substance_meronym: "Substance meronym",
|
|
464
|
+
target_direction: "Target direction",
|
|
465
|
+
young: "Young"
|
|
466
|
+
};
|
|
467
|
+
|
|
468
|
+
// src/loader.ts
|
|
469
|
+
var import_node_fs = require("fs");
|
|
470
|
+
var import_node_path = __toESM(require("path"), 1);
|
|
471
|
+
var import_node_stream = require("stream");
|
|
472
|
+
|
|
473
|
+
// node_modules/@dbushell/xml-streamify/src/node.ts
|
|
474
|
+
var Node = class {
|
|
475
|
+
#type;
|
|
476
|
+
#children;
|
|
477
|
+
#parent;
|
|
478
|
+
#attr;
|
|
479
|
+
#raw;
|
|
480
|
+
constructor(type, parent, raw) {
|
|
481
|
+
this.#type = type;
|
|
482
|
+
this.#parent = parent;
|
|
483
|
+
this.#raw = raw;
|
|
484
|
+
this.#children = [];
|
|
485
|
+
}
|
|
486
|
+
get type() {
|
|
487
|
+
return this.#type;
|
|
488
|
+
}
|
|
489
|
+
get raw() {
|
|
490
|
+
return this.#raw ?? "";
|
|
491
|
+
}
|
|
492
|
+
get parent() {
|
|
493
|
+
return this.#parent;
|
|
494
|
+
}
|
|
495
|
+
get children() {
|
|
496
|
+
return this.#children;
|
|
497
|
+
}
|
|
498
|
+
get attributes() {
|
|
499
|
+
if (this.#attr) {
|
|
500
|
+
return this.#attr;
|
|
501
|
+
}
|
|
502
|
+
this.#attr = {};
|
|
503
|
+
if (this.raw) {
|
|
504
|
+
const regex = /([\w:.-]+)\s*=\s*(["'])(.*?)\2/g;
|
|
505
|
+
let match;
|
|
506
|
+
while ((match = regex.exec(this.raw)) !== null) {
|
|
507
|
+
this.#attr[match[1]] = match[3];
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
return this.#attr;
|
|
511
|
+
}
|
|
512
|
+
get innerText() {
|
|
513
|
+
if (this.children.length) {
|
|
514
|
+
let text = "";
|
|
515
|
+
for (const child of this.children) {
|
|
516
|
+
text += child.innerText;
|
|
517
|
+
}
|
|
518
|
+
return text;
|
|
519
|
+
}
|
|
520
|
+
return (this.raw.match(/<!\[CDATA\[(.*?)]]>/s) ?? [, this.raw])[1];
|
|
521
|
+
}
|
|
522
|
+
addChild(child) {
|
|
523
|
+
this.#children.push(child);
|
|
524
|
+
}
|
|
525
|
+
/**
|
|
526
|
+
* Returns true if node and parents match the key hierarchy
|
|
527
|
+
* @param keys - XML tag names
|
|
528
|
+
*/
|
|
529
|
+
is(...keys) {
|
|
530
|
+
if (!keys.length) return false;
|
|
531
|
+
let parent;
|
|
532
|
+
for (const key of keys.toReversed()) {
|
|
533
|
+
parent = parent ? parent.parent : this;
|
|
534
|
+
if (parent?.type !== key) {
|
|
535
|
+
return false;
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
return true;
|
|
539
|
+
}
|
|
540
|
+
/**
|
|
541
|
+
* Return the first child matching the key
|
|
542
|
+
* @param key - XML tag name
|
|
543
|
+
*/
|
|
544
|
+
first(key) {
|
|
545
|
+
return this.children.find((n) => n.type === key);
|
|
546
|
+
}
|
|
547
|
+
/**
|
|
548
|
+
* Return all children matching the key hierarchy
|
|
549
|
+
* @param keys - XML tag names
|
|
550
|
+
*/
|
|
551
|
+
all(...keys) {
|
|
552
|
+
let nodes = this.children;
|
|
553
|
+
let found = [];
|
|
554
|
+
for (const [i, k] of Object.entries(keys)) {
|
|
555
|
+
if (Number.parseInt(i) === keys.length - 1) {
|
|
556
|
+
found = nodes.filter((n) => n.type === k);
|
|
557
|
+
break;
|
|
558
|
+
}
|
|
559
|
+
nodes = nodes?.find((n) => n.type === k)?.children;
|
|
560
|
+
if (!nodes) return [];
|
|
561
|
+
}
|
|
562
|
+
return found;
|
|
563
|
+
}
|
|
564
|
+
};
|
|
565
|
+
|
|
566
|
+
// node_modules/@dbushell/xml-streamify/src/stream.ts
|
|
567
|
+
var ENTITIES = {
|
|
568
|
+
cdata: {
|
|
569
|
+
end: "]]>",
|
|
570
|
+
start: /^<!\[CDATA\[/
|
|
571
|
+
},
|
|
572
|
+
comment: {
|
|
573
|
+
end: "-->",
|
|
574
|
+
start: /^<!--/
|
|
575
|
+
},
|
|
576
|
+
declaration: {
|
|
577
|
+
end: "?>",
|
|
578
|
+
start: /^<\?/
|
|
579
|
+
},
|
|
580
|
+
doctype: {
|
|
581
|
+
end: ">",
|
|
582
|
+
start: /^<!DOCTYPE/i
|
|
583
|
+
},
|
|
584
|
+
element: {
|
|
585
|
+
end: ">",
|
|
586
|
+
start: /^<[\w:.-/]/
|
|
587
|
+
}
|
|
588
|
+
};
|
|
589
|
+
var transformer = {
|
|
590
|
+
buf: "",
|
|
591
|
+
state: "skip" /* SKIP */,
|
|
592
|
+
previous: ["skip" /* SKIP */, -1],
|
|
593
|
+
flush(controller) {
|
|
594
|
+
if (this.buf.length > 0) {
|
|
595
|
+
controller.enqueue(["text" /* TEXT */, this.buf]);
|
|
596
|
+
}
|
|
597
|
+
},
|
|
598
|
+
transform(chunk, controller) {
|
|
599
|
+
this.buf += chunk;
|
|
600
|
+
while (this.buf.length) {
|
|
601
|
+
if (this.state === this.previous[0] && this.buf.length === this.previous[1]) {
|
|
602
|
+
break;
|
|
603
|
+
}
|
|
604
|
+
this.previous = [this.state, this.buf.length];
|
|
605
|
+
if (this.state === "skip" /* SKIP */) {
|
|
606
|
+
const index = this.buf.indexOf("<");
|
|
607
|
+
if (index < 0) break;
|
|
608
|
+
controller.enqueue(["text" /* TEXT */, this.buf.substring(0, index)]);
|
|
609
|
+
this.buf = this.buf.substring(index);
|
|
610
|
+
this.state = "search" /* SEARCH */;
|
|
611
|
+
}
|
|
612
|
+
if (this.state === "search" /* SEARCH */) {
|
|
613
|
+
if (this.buf.length < 3) break;
|
|
614
|
+
for (const [state, entity] of Object.entries(ENTITIES)) {
|
|
615
|
+
if (this.buf.match(entity.start)) {
|
|
616
|
+
this.state = state;
|
|
617
|
+
break;
|
|
618
|
+
}
|
|
619
|
+
}
|
|
620
|
+
continue;
|
|
621
|
+
}
|
|
622
|
+
if (Object.hasOwn(ENTITIES, this.state)) {
|
|
623
|
+
const { end } = ENTITIES[this.state];
|
|
624
|
+
const index = this.buf.indexOf(end);
|
|
625
|
+
if (index < 0) break;
|
|
626
|
+
controller.enqueue([
|
|
627
|
+
this.state,
|
|
628
|
+
this.buf.substring(0, index + end.length)
|
|
629
|
+
]);
|
|
630
|
+
this.buf = this.buf.substring(index + end.length);
|
|
631
|
+
this.state = "skip" /* SKIP */;
|
|
632
|
+
continue;
|
|
633
|
+
}
|
|
634
|
+
throw new Error();
|
|
635
|
+
}
|
|
636
|
+
}
|
|
637
|
+
};
|
|
638
|
+
var XMLStream = class extends TransformStream {
|
|
639
|
+
constructor() {
|
|
640
|
+
super({ ...transformer });
|
|
641
|
+
}
|
|
642
|
+
};
|
|
643
|
+
|
|
644
|
+
// node_modules/@dbushell/xml-streamify/src/parse.ts
|
|
645
|
+
var ignoreTypes = {
|
|
646
|
+
["comment" /* COMMENT */]: "ignoreComments",
|
|
647
|
+
["declaration" /* DECLARATION */]: "ignoreDeclaration",
|
|
648
|
+
["doctype" /* DOCTYPE */]: "ignoreDoctype"
|
|
649
|
+
};
|
|
650
|
+
async function* parse(input, options) {
|
|
651
|
+
const document = new Node("@document");
|
|
652
|
+
try {
|
|
653
|
+
const init = { ...options?.fetchOptions };
|
|
654
|
+
if (options?.signal) {
|
|
655
|
+
init.signal = options.signal;
|
|
656
|
+
}
|
|
657
|
+
let source;
|
|
658
|
+
if (typeof input === "string" || input instanceof URL) {
|
|
659
|
+
input = new URL(input);
|
|
660
|
+
const response = await fetch(input, init);
|
|
661
|
+
if (!response.ok || !response.body) {
|
|
662
|
+
throw new Error(`Bad response`);
|
|
663
|
+
}
|
|
664
|
+
source = response.body;
|
|
665
|
+
} else {
|
|
666
|
+
source = input;
|
|
667
|
+
}
|
|
668
|
+
const stream = source.pipeThrough(new TextDecoderStream()).pipeThrough(new XMLStream(), {
|
|
669
|
+
signal: options?.signal
|
|
670
|
+
});
|
|
671
|
+
let node = document;
|
|
672
|
+
for await (const [type, value] of stream) {
|
|
673
|
+
if (options?.signal?.aborted) {
|
|
674
|
+
break;
|
|
675
|
+
}
|
|
676
|
+
if (type === "text" /* TEXT */) {
|
|
677
|
+
if (options?.ignoreWhitespace !== false && value.trim().length === 0) {
|
|
678
|
+
continue;
|
|
679
|
+
}
|
|
680
|
+
}
|
|
681
|
+
if (type in ignoreTypes && options?.[ignoreTypes[type]] === false) {
|
|
682
|
+
const newNode = new Node(type, node, value);
|
|
683
|
+
node.addChild(newNode);
|
|
684
|
+
yield newNode;
|
|
685
|
+
continue;
|
|
686
|
+
}
|
|
687
|
+
if (type === "element" /* ELEMENT */) {
|
|
688
|
+
const name = value.match(/<\/?([\w:.-]+)/)[1];
|
|
689
|
+
if (value.endsWith("/>")) {
|
|
690
|
+
const newNode2 = new Node(name, node, value);
|
|
691
|
+
node.addChild(newNode2);
|
|
692
|
+
yield newNode2;
|
|
693
|
+
continue;
|
|
694
|
+
}
|
|
695
|
+
if (value.startsWith("</")) {
|
|
696
|
+
yield node;
|
|
697
|
+
node = node.parent;
|
|
698
|
+
continue;
|
|
699
|
+
}
|
|
700
|
+
const newNode = new Node(name, node, value);
|
|
701
|
+
node.addChild(newNode);
|
|
702
|
+
node = newNode;
|
|
703
|
+
continue;
|
|
704
|
+
}
|
|
705
|
+
node.addChild(new Node(type, node, value));
|
|
706
|
+
}
|
|
707
|
+
} catch (err) {
|
|
708
|
+
if (options?.silent === false) {
|
|
709
|
+
throw err;
|
|
710
|
+
}
|
|
711
|
+
}
|
|
712
|
+
return document;
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
// src/loader.ts
|
|
716
|
+
var BASE_VERSION = "2024";
|
|
717
|
+
function getFilename(version) {
|
|
718
|
+
return `english-wordnet-${version}.xml`;
|
|
719
|
+
}
|
|
720
|
+
function getDownloadUrl(version) {
|
|
721
|
+
return `https://en-word.net/static/${getFilename(version)}.gz`;
|
|
722
|
+
}
|
|
723
|
+
function getDefaultCacheDir() {
|
|
724
|
+
const homeDir = process.env.HOME || process.env.USERPROFILE || ".";
|
|
609
725
|
return import_node_path.default.join(homeDir, ".cache", "synset");
|
|
610
726
|
}
|
|
611
727
|
function fileExists(filePath) {
|
|
@@ -661,7 +777,6 @@ async function findLatestVersion(onProgress, cacheDir) {
|
|
|
661
777
|
for (let year = baseYear + 1; year <= lastReleasableYear; year++) {
|
|
662
778
|
const version = year.toString();
|
|
663
779
|
if (await urlExists(getDownloadUrl(version))) {
|
|
664
|
-
continue;
|
|
665
780
|
} else {
|
|
666
781
|
return (year - 1).toString();
|
|
667
782
|
}
|
|
@@ -682,9 +797,13 @@ async function downloadWordNet(version, destPath) {
|
|
|
682
797
|
const url = getDownloadUrl(version);
|
|
683
798
|
const response = await fetch(url);
|
|
684
799
|
if (!response.ok || !response.body) {
|
|
685
|
-
throw new Error(
|
|
800
|
+
throw new Error(
|
|
801
|
+
`Failed to download WordNet ${version}: ${response.statusText}`
|
|
802
|
+
);
|
|
686
803
|
}
|
|
687
|
-
const decompressed = response.body.pipeThrough(
|
|
804
|
+
const decompressed = response.body.pipeThrough(
|
|
805
|
+
new DecompressionStream("gzip")
|
|
806
|
+
);
|
|
688
807
|
const arrayBuffer = await new Response(decompressed).arrayBuffer();
|
|
689
808
|
const dir = import_node_path.default.dirname(destPath);
|
|
690
809
|
if (!(0, import_node_fs.existsSync)(dir)) {
|
|
@@ -694,8 +813,9 @@ async function downloadWordNet(version, destPath) {
|
|
|
694
813
|
}
|
|
695
814
|
function createParser(filePath) {
|
|
696
815
|
const resolvedPath = import_node_path.default.resolve(filePath);
|
|
697
|
-
const
|
|
698
|
-
|
|
816
|
+
const nodeStream = (0, import_node_fs.createReadStream)(resolvedPath);
|
|
817
|
+
const webStream = import_node_stream.Readable.toWeb(nodeStream);
|
|
818
|
+
return parse(webStream, {
|
|
699
819
|
ignoreDeclaration: false,
|
|
700
820
|
silent: false
|
|
701
821
|
});
|
|
@@ -848,117 +968,6 @@ function getSynsetWords(index, synset) {
|
|
|
848
968
|
return synset.members.map((id) => index.entries.get(id)).filter((e) => e !== void 0).map((e) => e.lemmas[0]?.writtenForm).filter((w) => w !== void 0);
|
|
849
969
|
}
|
|
850
970
|
|
|
851
|
-
// src/literals.ts
|
|
852
|
-
var PartsOfSpeech2 = {
|
|
853
|
-
n: "Noun",
|
|
854
|
-
v: "Verb",
|
|
855
|
-
a: "Adjective",
|
|
856
|
-
r: "Adverb",
|
|
857
|
-
s: "Adjective Satellite",
|
|
858
|
-
t: "?",
|
|
859
|
-
c: "Conjunction",
|
|
860
|
-
p: "Adposition (Preposition, postposition, etc.)",
|
|
861
|
-
x: "Other (inc. particle, classifier, bound morphemes, determiners)",
|
|
862
|
-
u: "Unknown"
|
|
863
|
-
};
|
|
864
|
-
var SynsetRelationRelType2 = {
|
|
865
|
-
agent: "Agent",
|
|
866
|
-
also: "See also",
|
|
867
|
-
anto_converse: "Converse antonym",
|
|
868
|
-
anto_gradable: "Gradable antonym",
|
|
869
|
-
anto_simple: "Simple antonym",
|
|
870
|
-
antonym: "Antonym",
|
|
871
|
-
attribute: "Attribute",
|
|
872
|
-
augmentative: "Augmentative",
|
|
873
|
-
be_in_state: "Be in state",
|
|
874
|
-
cause: "Cause",
|
|
875
|
-
causes: "Causes",
|
|
876
|
-
classified_by: "Classified by",
|
|
877
|
-
classifies: "Classifies",
|
|
878
|
-
co_agent_instrument: "Co-agent instrument",
|
|
879
|
-
co_agent_patient: "Co-agent patient",
|
|
880
|
-
co_agent_result: "Co-agent result",
|
|
881
|
-
co_instrument_agent: "Co-instrument agent",
|
|
882
|
-
co_instrument_patient: "Co-instrument patient",
|
|
883
|
-
co_instrument_result: "Co-instrument result",
|
|
884
|
-
co_patient_agent: "Co-patient agent",
|
|
885
|
-
co_patient_instrument: "Co-patient instrument",
|
|
886
|
-
co_result_agent: "Co-result agent",
|
|
887
|
-
co_result_instrument: "Co-result instrument",
|
|
888
|
-
co_role: "Co-role",
|
|
889
|
-
diminutive: "Diminutive",
|
|
890
|
-
direction: "Direction",
|
|
891
|
-
domain_member_region: "Domain member region",
|
|
892
|
-
domain_member_topic: "Domain member topic",
|
|
893
|
-
domain_region: "Domain region",
|
|
894
|
-
domain_topic: "Domain topic",
|
|
895
|
-
entail: "Entail",
|
|
896
|
-
entails: "Entails",
|
|
897
|
-
eq_synonym: "Equivalent synonym",
|
|
898
|
-
exemplifies: "Exemplifies",
|
|
899
|
-
feminine: "Feminine",
|
|
900
|
-
has_augmentative: "Has augmentative",
|
|
901
|
-
has_diminutive: "Has diminutive",
|
|
902
|
-
has_domain_region: "Has domain region",
|
|
903
|
-
has_domain_topic: "Has domain topic",
|
|
904
|
-
has_feminine: "Has feminine",
|
|
905
|
-
has_masculine: "Has masculine",
|
|
906
|
-
has_young: "Has young",
|
|
907
|
-
holo_location: "Holonym location",
|
|
908
|
-
holo_member: "Member holonym",
|
|
909
|
-
holo_part: "Part holonym",
|
|
910
|
-
holo_portion: "Portion holonym",
|
|
911
|
-
holo_substance: "Substance holonym",
|
|
912
|
-
holonym: "Holonym",
|
|
913
|
-
hypernym: "Hypernym",
|
|
914
|
-
hyponym: "Hyponym",
|
|
915
|
-
in_manner: "In manner",
|
|
916
|
-
instance_hypernym: "Instance hypernym",
|
|
917
|
-
instance_hyponym: "Instance hyponym",
|
|
918
|
-
instrument: "Instrument",
|
|
919
|
-
involved: "Involved",
|
|
920
|
-
involved_agent: "Involved agent",
|
|
921
|
-
involved_direction: "Involved direction",
|
|
922
|
-
involved_instrument: "Involved instrument",
|
|
923
|
-
involved_location: "Involved location",
|
|
924
|
-
involved_patient: "Involved patient",
|
|
925
|
-
involved_result: "Involved result",
|
|
926
|
-
involved_source_direction: "Involved source direction",
|
|
927
|
-
involved_target_direction: "Involved target direction",
|
|
928
|
-
ir_synonym: "IR synonym",
|
|
929
|
-
is_caused_by: "Is caused by",
|
|
930
|
-
is_entailed_by: "Is entailed by",
|
|
931
|
-
is_exemplified_by: "Is exemplified by",
|
|
932
|
-
is_subevent_of: "Is subevent of",
|
|
933
|
-
location: "Location",
|
|
934
|
-
manner_of: "Manner of",
|
|
935
|
-
masculine: "Masculine",
|
|
936
|
-
member_holonym: "Member holonym",
|
|
937
|
-
member_meronym: "Member meronym",
|
|
938
|
-
mero_location: "Meronym location",
|
|
939
|
-
mero_member: "Member meronym",
|
|
940
|
-
mero_part: "Part meronym",
|
|
941
|
-
mero_portion: "Portion meronym",
|
|
942
|
-
mero_substance: "Substance meronym",
|
|
943
|
-
meronym: "Meronym",
|
|
944
|
-
other: "Other",
|
|
945
|
-
part_holonym: "Part holonym",
|
|
946
|
-
part_meronym: "Part meronym",
|
|
947
|
-
patient: "Patient",
|
|
948
|
-
restricted_by: "Restricted by",
|
|
949
|
-
restricts: "Restricts",
|
|
950
|
-
result: "Result",
|
|
951
|
-
role: "Role",
|
|
952
|
-
similar: "Similar",
|
|
953
|
-
source_direction: "Source direction",
|
|
954
|
-
state_of: "State of",
|
|
955
|
-
subevent: "Subevent",
|
|
956
|
-
substance_holonym: "Substance holonym",
|
|
957
|
-
substance_meronym: "Substance meronym",
|
|
958
|
-
target_direction: "Target direction",
|
|
959
|
-
young: "Young"
|
|
960
|
-
};
|
|
961
|
-
|
|
962
971
|
// src/cli.ts
|
|
963
972
|
var decode = (s) => decodeXmlEntities(s) ?? "";
|
|
964
973
|
var HELP = `
|
|
@@ -1089,7 +1098,7 @@ async function main() {
|
|
|
1089
1098
|
for (const [relType, words] of relsByType) {
|
|
1090
1099
|
const label = SynsetRelationRelType2[relType] || relType;
|
|
1091
1100
|
console.log(` ${label}:`);
|
|
1092
|
-
|
|
1101
|
+
for (const w of words) console.log(` - ${w}`);
|
|
1093
1102
|
}
|
|
1094
1103
|
}
|
|
1095
1104
|
break;
|
|
@@ -1108,11 +1117,12 @@ async function main() {
|
|
|
1108
1117
|
console.log(`ILI: ${synset.ili}`);
|
|
1109
1118
|
console.log(`
|
|
1110
1119
|
Definitions:`);
|
|
1111
|
-
synset.definitions
|
|
1120
|
+
for (const d of synset.definitions) console.log(` - ${decode(d.inner)}`);
|
|
1112
1121
|
if (synset.examples.length > 0) {
|
|
1113
1122
|
console.log(`
|
|
1114
1123
|
Examples:`);
|
|
1115
|
-
|
|
1124
|
+
for (const e of synset.examples)
|
|
1125
|
+
console.log(` - "${decode(e.inner)}"`);
|
|
1116
1126
|
}
|
|
1117
1127
|
if (synset.synsetRelations.length > 0) {
|
|
1118
1128
|
console.log(`
|