synset 0.9.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -44
- package/dist/cli.cjs +392 -382
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +400 -383
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +175 -166
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +7 -7
- package/dist/index.d.ts +7 -7
- package/dist/index.js +183 -167
- package/dist/index.js.map +1 -1
- package/package.json +10 -5
package/dist/cli.js
CHANGED
|
@@ -1,251 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
|
-
// src/loader.ts
|
|
4
|
-
import { existsSync, statSync, writeFileSync, mkdirSync, readdirSync } from "fs";
|
|
5
|
-
import path from "path";
|
|
6
|
-
|
|
7
|
-
// node_modules/@dbushell/xml-streamify/src/node.ts
|
|
8
|
-
var Node = class {
|
|
9
|
-
#type;
|
|
10
|
-
#children;
|
|
11
|
-
#parent;
|
|
12
|
-
#attr;
|
|
13
|
-
#raw;
|
|
14
|
-
constructor(type, parent, raw) {
|
|
15
|
-
this.#type = type;
|
|
16
|
-
this.#parent = parent;
|
|
17
|
-
this.#raw = raw;
|
|
18
|
-
this.#children = [];
|
|
19
|
-
}
|
|
20
|
-
get type() {
|
|
21
|
-
return this.#type;
|
|
22
|
-
}
|
|
23
|
-
get raw() {
|
|
24
|
-
return this.#raw ?? "";
|
|
25
|
-
}
|
|
26
|
-
get parent() {
|
|
27
|
-
return this.#parent;
|
|
28
|
-
}
|
|
29
|
-
get children() {
|
|
30
|
-
return this.#children;
|
|
31
|
-
}
|
|
32
|
-
get attributes() {
|
|
33
|
-
if (this.#attr) {
|
|
34
|
-
return this.#attr;
|
|
35
|
-
}
|
|
36
|
-
this.#attr = {};
|
|
37
|
-
if (this.raw) {
|
|
38
|
-
const regex = /([\w:.-]+)\s*=\s*(["'])(.*?)\2/g;
|
|
39
|
-
let match;
|
|
40
|
-
while ((match = regex.exec(this.raw)) !== null) {
|
|
41
|
-
this.#attr[match[1]] = match[3];
|
|
42
|
-
}
|
|
43
|
-
}
|
|
44
|
-
return this.#attr;
|
|
45
|
-
}
|
|
46
|
-
get innerText() {
|
|
47
|
-
if (this.children.length) {
|
|
48
|
-
let text = "";
|
|
49
|
-
for (const child of this.children) {
|
|
50
|
-
text += child.innerText;
|
|
51
|
-
}
|
|
52
|
-
return text;
|
|
53
|
-
}
|
|
54
|
-
return (this.raw.match(/<!\[CDATA\[(.*?)]]>/s) ?? [, this.raw])[1];
|
|
55
|
-
}
|
|
56
|
-
addChild(child) {
|
|
57
|
-
this.#children.push(child);
|
|
58
|
-
}
|
|
59
|
-
/**
|
|
60
|
-
* Returns true if node and parents match the key hierarchy
|
|
61
|
-
* @param keys - XML tag names
|
|
62
|
-
*/
|
|
63
|
-
is(...keys) {
|
|
64
|
-
if (!keys.length) return false;
|
|
65
|
-
let parent;
|
|
66
|
-
for (const key of keys.toReversed()) {
|
|
67
|
-
parent = parent ? parent.parent : this;
|
|
68
|
-
if (parent?.type !== key) {
|
|
69
|
-
return false;
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
return true;
|
|
73
|
-
}
|
|
74
|
-
/**
|
|
75
|
-
* Return the first child matching the key
|
|
76
|
-
* @param key - XML tag name
|
|
77
|
-
*/
|
|
78
|
-
first(key) {
|
|
79
|
-
return this.children.find((n) => n.type === key);
|
|
80
|
-
}
|
|
81
|
-
/**
|
|
82
|
-
* Return all children matching the key hierarchy
|
|
83
|
-
* @param keys - XML tag names
|
|
84
|
-
*/
|
|
85
|
-
all(...keys) {
|
|
86
|
-
let nodes = this.children;
|
|
87
|
-
let found = [];
|
|
88
|
-
for (const [i, k] of Object.entries(keys)) {
|
|
89
|
-
if (Number.parseInt(i) === keys.length - 1) {
|
|
90
|
-
found = nodes.filter((n) => n.type === k);
|
|
91
|
-
break;
|
|
92
|
-
}
|
|
93
|
-
nodes = nodes?.find((n) => n.type === k)?.children;
|
|
94
|
-
if (!nodes) return [];
|
|
95
|
-
}
|
|
96
|
-
return found;
|
|
97
|
-
}
|
|
98
|
-
};
|
|
99
|
-
|
|
100
|
-
// node_modules/@dbushell/xml-streamify/src/stream.ts
|
|
101
|
-
var ENTITIES = {
|
|
102
|
-
cdata: {
|
|
103
|
-
end: "]]>",
|
|
104
|
-
start: /^<!\[CDATA\[/
|
|
105
|
-
},
|
|
106
|
-
comment: {
|
|
107
|
-
end: "-->",
|
|
108
|
-
start: /^<!--/
|
|
109
|
-
},
|
|
110
|
-
declaration: {
|
|
111
|
-
end: "?>",
|
|
112
|
-
start: /^<\?/
|
|
113
|
-
},
|
|
114
|
-
doctype: {
|
|
115
|
-
end: ">",
|
|
116
|
-
start: /^<!DOCTYPE/i
|
|
117
|
-
},
|
|
118
|
-
element: {
|
|
119
|
-
end: ">",
|
|
120
|
-
start: /^<[\w:.-/]/
|
|
121
|
-
}
|
|
122
|
-
};
|
|
123
|
-
var transformer = {
|
|
124
|
-
buf: "",
|
|
125
|
-
state: "skip" /* SKIP */,
|
|
126
|
-
previous: ["skip" /* SKIP */, -1],
|
|
127
|
-
flush(controller) {
|
|
128
|
-
if (this.buf.length > 0) {
|
|
129
|
-
controller.enqueue(["text" /* TEXT */, this.buf]);
|
|
130
|
-
}
|
|
131
|
-
},
|
|
132
|
-
transform(chunk, controller) {
|
|
133
|
-
this.buf += chunk;
|
|
134
|
-
while (this.buf.length) {
|
|
135
|
-
if (this.state === this.previous[0] && this.buf.length === this.previous[1]) {
|
|
136
|
-
break;
|
|
137
|
-
}
|
|
138
|
-
this.previous = [this.state, this.buf.length];
|
|
139
|
-
if (this.state === "skip" /* SKIP */) {
|
|
140
|
-
const index = this.buf.indexOf("<");
|
|
141
|
-
if (index < 0) break;
|
|
142
|
-
controller.enqueue(["text" /* TEXT */, this.buf.substring(0, index)]);
|
|
143
|
-
this.buf = this.buf.substring(index);
|
|
144
|
-
this.state = "search" /* SEARCH */;
|
|
145
|
-
}
|
|
146
|
-
if (this.state === "search" /* SEARCH */) {
|
|
147
|
-
if (this.buf.length < 3) break;
|
|
148
|
-
for (const [state, entity] of Object.entries(ENTITIES)) {
|
|
149
|
-
if (this.buf.match(entity.start)) {
|
|
150
|
-
this.state = state;
|
|
151
|
-
break;
|
|
152
|
-
}
|
|
153
|
-
}
|
|
154
|
-
continue;
|
|
155
|
-
}
|
|
156
|
-
if (Object.hasOwn(ENTITIES, this.state)) {
|
|
157
|
-
const { end } = ENTITIES[this.state];
|
|
158
|
-
const index = this.buf.indexOf(end);
|
|
159
|
-
if (index < 0) break;
|
|
160
|
-
controller.enqueue([
|
|
161
|
-
this.state,
|
|
162
|
-
this.buf.substring(0, index + end.length)
|
|
163
|
-
]);
|
|
164
|
-
this.buf = this.buf.substring(index + end.length);
|
|
165
|
-
this.state = "skip" /* SKIP */;
|
|
166
|
-
continue;
|
|
167
|
-
}
|
|
168
|
-
throw new Error();
|
|
169
|
-
}
|
|
170
|
-
}
|
|
171
|
-
};
|
|
172
|
-
var XMLStream = class extends TransformStream {
|
|
173
|
-
constructor() {
|
|
174
|
-
super({ ...transformer });
|
|
175
|
-
}
|
|
176
|
-
};
|
|
177
|
-
|
|
178
|
-
// node_modules/@dbushell/xml-streamify/src/parse.ts
|
|
179
|
-
var ignoreTypes = {
|
|
180
|
-
["comment" /* COMMENT */]: "ignoreComments",
|
|
181
|
-
["declaration" /* DECLARATION */]: "ignoreDeclaration",
|
|
182
|
-
["doctype" /* DOCTYPE */]: "ignoreDoctype"
|
|
183
|
-
};
|
|
184
|
-
async function* parse(input, options) {
|
|
185
|
-
const document = new Node("@document");
|
|
186
|
-
try {
|
|
187
|
-
const init = { ...options?.fetchOptions };
|
|
188
|
-
if (options?.signal) {
|
|
189
|
-
init.signal = options.signal;
|
|
190
|
-
}
|
|
191
|
-
let source;
|
|
192
|
-
if (typeof input === "string" || input instanceof URL) {
|
|
193
|
-
input = new URL(input);
|
|
194
|
-
const response = await fetch(input, init);
|
|
195
|
-
if (!response.ok || !response.body) {
|
|
196
|
-
throw new Error(`Bad response`);
|
|
197
|
-
}
|
|
198
|
-
source = response.body;
|
|
199
|
-
} else {
|
|
200
|
-
source = input;
|
|
201
|
-
}
|
|
202
|
-
const stream = source.pipeThrough(new TextDecoderStream()).pipeThrough(new XMLStream(), {
|
|
203
|
-
signal: options?.signal
|
|
204
|
-
});
|
|
205
|
-
let node = document;
|
|
206
|
-
for await (const [type, value] of stream) {
|
|
207
|
-
if (options?.signal?.aborted) {
|
|
208
|
-
break;
|
|
209
|
-
}
|
|
210
|
-
if (type === "text" /* TEXT */) {
|
|
211
|
-
if (options?.ignoreWhitespace !== false && value.trim().length === 0) {
|
|
212
|
-
continue;
|
|
213
|
-
}
|
|
214
|
-
}
|
|
215
|
-
if (type in ignoreTypes && options?.[ignoreTypes[type]] === false) {
|
|
216
|
-
const newNode = new Node(type, node, value);
|
|
217
|
-
node.addChild(newNode);
|
|
218
|
-
yield newNode;
|
|
219
|
-
continue;
|
|
220
|
-
}
|
|
221
|
-
if (type === "element" /* ELEMENT */) {
|
|
222
|
-
const name = value.match(/<\/?([\w:.-]+)/)[1];
|
|
223
|
-
if (value.endsWith("/>")) {
|
|
224
|
-
const newNode2 = new Node(name, node, value);
|
|
225
|
-
node.addChild(newNode2);
|
|
226
|
-
yield newNode2;
|
|
227
|
-
continue;
|
|
228
|
-
}
|
|
229
|
-
if (value.startsWith("</")) {
|
|
230
|
-
yield node;
|
|
231
|
-
node = node.parent;
|
|
232
|
-
continue;
|
|
233
|
-
}
|
|
234
|
-
const newNode = new Node(name, node, value);
|
|
235
|
-
node.addChild(newNode);
|
|
236
|
-
node = newNode;
|
|
237
|
-
continue;
|
|
238
|
-
}
|
|
239
|
-
node.addChild(new Node(type, node, value));
|
|
240
|
-
}
|
|
241
|
-
} catch (err) {
|
|
242
|
-
if (options?.silent === false) {
|
|
243
|
-
throw err;
|
|
244
|
-
}
|
|
245
|
-
}
|
|
246
|
-
return document;
|
|
247
|
-
}
|
|
248
|
-
|
|
249
3
|
// src/types.ts
|
|
250
4
|
import { z } from "zod";
|
|
251
5
|
var LexiconId = z.string();
|
|
@@ -411,7 +165,9 @@ var Lexicon = z.object({
|
|
|
411
165
|
synsets: z.array(Synset).min(0),
|
|
412
166
|
syntacticBehaviors: z.array(SyntacticBehavior).min(0)
|
|
413
167
|
});
|
|
414
|
-
var partsOfSpeechList = PartsOfSpeech.options.map(
|
|
168
|
+
var partsOfSpeechList = PartsOfSpeech.options.map(
|
|
169
|
+
(v) => v.value
|
|
170
|
+
);
|
|
415
171
|
|
|
416
172
|
// src/helpers.ts
|
|
417
173
|
function PronunciationNode(node) {
|
|
@@ -439,7 +195,7 @@ function SenseRelationNode(node) {
|
|
|
439
195
|
dcType: optAttr(node, "dc:type")
|
|
440
196
|
};
|
|
441
197
|
return SenseRelation.parse(
|
|
442
|
-
extendWithRestAttr(node, obj, (s) => s
|
|
198
|
+
extendWithRestAttr(node, obj, (s) => s === "dc:type" ? "dcType" : s)
|
|
443
199
|
);
|
|
444
200
|
}
|
|
445
201
|
function SenseNode(node) {
|
|
@@ -455,7 +211,7 @@ function SenseNode(node) {
|
|
|
455
211
|
extendWithRestAttr(
|
|
456
212
|
node,
|
|
457
213
|
obj,
|
|
458
|
-
(s) => s
|
|
214
|
+
(s) => s === "subcat" ? "subCat" : s === "adjposition" ? "adjPosition" : s
|
|
459
215
|
)
|
|
460
216
|
);
|
|
461
217
|
}
|
|
@@ -486,7 +242,7 @@ function ExampleNode(node) {
|
|
|
486
242
|
dcSource: optAttr(node, "dc:source")
|
|
487
243
|
};
|
|
488
244
|
return Example.parse(
|
|
489
|
-
extendWithRestAttr(node, obj, (s) => s
|
|
245
|
+
extendWithRestAttr(node, obj, (s) => s === "dc:source" ? "dcSource" : s)
|
|
490
246
|
);
|
|
491
247
|
}
|
|
492
248
|
function ILIDefinitionNode(node) {
|
|
@@ -523,7 +279,7 @@ function SynsetNode(node) {
|
|
|
523
279
|
synsetRelations: children(node, "SynsetRelation", SynsetRelationNode)
|
|
524
280
|
};
|
|
525
281
|
return Synset.parse(
|
|
526
|
-
extendWithRestAttr(node, obj, (s) => s
|
|
282
|
+
extendWithRestAttr(node, obj, (s) => s === "dc:source" ? "dcSource" : s)
|
|
527
283
|
);
|
|
528
284
|
}
|
|
529
285
|
function LexiconNode(node) {
|
|
@@ -552,7 +308,9 @@ var decodeXmlEntities = (s) => {
|
|
|
552
308
|
var attr = (node, attrName) => {
|
|
553
309
|
const value = decodeXmlEntities(node.attributes[attrName]);
|
|
554
310
|
if (value === void 0) {
|
|
555
|
-
throw new Error(
|
|
311
|
+
throw new Error(
|
|
312
|
+
`Missing required attribute "${attrName}" on node "${node.type}"`
|
|
313
|
+
);
|
|
556
314
|
}
|
|
557
315
|
return value;
|
|
558
316
|
};
|
|
@@ -570,20 +328,385 @@ var extendWithRestAttr = (node, obj, proxy) => {
|
|
|
570
328
|
return Object.assign(obj, restAttrs(node, obj, proxy));
|
|
571
329
|
};
|
|
572
330
|
var children = (node, type, fn) => {
|
|
573
|
-
return node.children.filter((v) => v.type
|
|
331
|
+
return node.children.filter((v) => v.type === type).map((v) => fn(v));
|
|
574
332
|
};
|
|
575
333
|
|
|
576
|
-
// src/
|
|
577
|
-
var
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
334
|
+
// src/literals.ts
|
|
335
|
+
var PartsOfSpeech2 = {
|
|
336
|
+
n: "Noun",
|
|
337
|
+
v: "Verb",
|
|
338
|
+
a: "Adjective",
|
|
339
|
+
r: "Adverb",
|
|
340
|
+
s: "Adjective Satellite",
|
|
341
|
+
t: "?",
|
|
342
|
+
c: "Conjunction",
|
|
343
|
+
p: "Adposition (Preposition, postposition, etc.)",
|
|
344
|
+
x: "Other (inc. particle, classifier, bound morphemes, determiners)",
|
|
345
|
+
u: "Unknown"
|
|
346
|
+
};
|
|
347
|
+
var SynsetRelationRelType2 = {
|
|
348
|
+
agent: "Agent",
|
|
349
|
+
also: "See also",
|
|
350
|
+
anto_converse: "Converse antonym",
|
|
351
|
+
anto_gradable: "Gradable antonym",
|
|
352
|
+
anto_simple: "Simple antonym",
|
|
353
|
+
antonym: "Antonym",
|
|
354
|
+
attribute: "Attribute",
|
|
355
|
+
augmentative: "Augmentative",
|
|
356
|
+
be_in_state: "Be in state",
|
|
357
|
+
cause: "Cause",
|
|
358
|
+
causes: "Causes",
|
|
359
|
+
classified_by: "Classified by",
|
|
360
|
+
classifies: "Classifies",
|
|
361
|
+
co_agent_instrument: "Co-agent instrument",
|
|
362
|
+
co_agent_patient: "Co-agent patient",
|
|
363
|
+
co_agent_result: "Co-agent result",
|
|
364
|
+
co_instrument_agent: "Co-instrument agent",
|
|
365
|
+
co_instrument_patient: "Co-instrument patient",
|
|
366
|
+
co_instrument_result: "Co-instrument result",
|
|
367
|
+
co_patient_agent: "Co-patient agent",
|
|
368
|
+
co_patient_instrument: "Co-patient instrument",
|
|
369
|
+
co_result_agent: "Co-result agent",
|
|
370
|
+
co_result_instrument: "Co-result instrument",
|
|
371
|
+
co_role: "Co-role",
|
|
372
|
+
diminutive: "Diminutive",
|
|
373
|
+
direction: "Direction",
|
|
374
|
+
domain_member_region: "Domain member region",
|
|
375
|
+
domain_member_topic: "Domain member topic",
|
|
376
|
+
domain_region: "Domain region",
|
|
377
|
+
domain_topic: "Domain topic",
|
|
378
|
+
entail: "Entail",
|
|
379
|
+
entails: "Entails",
|
|
380
|
+
eq_synonym: "Equivalent synonym",
|
|
381
|
+
exemplifies: "Exemplifies",
|
|
382
|
+
feminine: "Feminine",
|
|
383
|
+
has_augmentative: "Has augmentative",
|
|
384
|
+
has_diminutive: "Has diminutive",
|
|
385
|
+
has_domain_region: "Has domain region",
|
|
386
|
+
has_domain_topic: "Has domain topic",
|
|
387
|
+
has_feminine: "Has feminine",
|
|
388
|
+
has_masculine: "Has masculine",
|
|
389
|
+
has_young: "Has young",
|
|
390
|
+
holo_location: "Holonym location",
|
|
391
|
+
holo_member: "Member holonym",
|
|
392
|
+
holo_part: "Part holonym",
|
|
393
|
+
holo_portion: "Portion holonym",
|
|
394
|
+
holo_substance: "Substance holonym",
|
|
395
|
+
holonym: "Holonym",
|
|
396
|
+
hypernym: "Hypernym",
|
|
397
|
+
hyponym: "Hyponym",
|
|
398
|
+
in_manner: "In manner",
|
|
399
|
+
instance_hypernym: "Instance hypernym",
|
|
400
|
+
instance_hyponym: "Instance hyponym",
|
|
401
|
+
instrument: "Instrument",
|
|
402
|
+
involved: "Involved",
|
|
403
|
+
involved_agent: "Involved agent",
|
|
404
|
+
involved_direction: "Involved direction",
|
|
405
|
+
involved_instrument: "Involved instrument",
|
|
406
|
+
involved_location: "Involved location",
|
|
407
|
+
involved_patient: "Involved patient",
|
|
408
|
+
involved_result: "Involved result",
|
|
409
|
+
involved_source_direction: "Involved source direction",
|
|
410
|
+
involved_target_direction: "Involved target direction",
|
|
411
|
+
ir_synonym: "IR synonym",
|
|
412
|
+
is_caused_by: "Is caused by",
|
|
413
|
+
is_entailed_by: "Is entailed by",
|
|
414
|
+
is_exemplified_by: "Is exemplified by",
|
|
415
|
+
is_subevent_of: "Is subevent of",
|
|
416
|
+
location: "Location",
|
|
417
|
+
manner_of: "Manner of",
|
|
418
|
+
masculine: "Masculine",
|
|
419
|
+
member_holonym: "Member holonym",
|
|
420
|
+
member_meronym: "Member meronym",
|
|
421
|
+
mero_location: "Meronym location",
|
|
422
|
+
mero_member: "Member meronym",
|
|
423
|
+
mero_part: "Part meronym",
|
|
424
|
+
mero_portion: "Portion meronym",
|
|
425
|
+
mero_substance: "Substance meronym",
|
|
426
|
+
meronym: "Meronym",
|
|
427
|
+
other: "Other",
|
|
428
|
+
part_holonym: "Part holonym",
|
|
429
|
+
part_meronym: "Part meronym",
|
|
430
|
+
patient: "Patient",
|
|
431
|
+
restricted_by: "Restricted by",
|
|
432
|
+
restricts: "Restricts",
|
|
433
|
+
result: "Result",
|
|
434
|
+
role: "Role",
|
|
435
|
+
similar: "Similar",
|
|
436
|
+
source_direction: "Source direction",
|
|
437
|
+
state_of: "State of",
|
|
438
|
+
subevent: "Subevent",
|
|
439
|
+
substance_holonym: "Substance holonym",
|
|
440
|
+
substance_meronym: "Substance meronym",
|
|
441
|
+
target_direction: "Target direction",
|
|
442
|
+
young: "Young"
|
|
443
|
+
};
|
|
444
|
+
|
|
445
|
+
// src/loader.ts
|
|
446
|
+
import {
|
|
447
|
+
createReadStream,
|
|
448
|
+
existsSync,
|
|
449
|
+
mkdirSync,
|
|
450
|
+
readdirSync,
|
|
451
|
+
statSync,
|
|
452
|
+
writeFileSync
|
|
453
|
+
} from "fs";
|
|
454
|
+
import path from "path";
|
|
455
|
+
import { Readable } from "stream";
|
|
456
|
+
|
|
457
|
+
// node_modules/@dbushell/xml-streamify/src/node.ts
|
|
458
|
+
var Node = class {
|
|
459
|
+
#type;
|
|
460
|
+
#children;
|
|
461
|
+
#parent;
|
|
462
|
+
#attr;
|
|
463
|
+
#raw;
|
|
464
|
+
constructor(type, parent, raw) {
|
|
465
|
+
this.#type = type;
|
|
466
|
+
this.#parent = parent;
|
|
467
|
+
this.#raw = raw;
|
|
468
|
+
this.#children = [];
|
|
469
|
+
}
|
|
470
|
+
get type() {
|
|
471
|
+
return this.#type;
|
|
472
|
+
}
|
|
473
|
+
get raw() {
|
|
474
|
+
return this.#raw ?? "";
|
|
475
|
+
}
|
|
476
|
+
get parent() {
|
|
477
|
+
return this.#parent;
|
|
478
|
+
}
|
|
479
|
+
get children() {
|
|
480
|
+
return this.#children;
|
|
481
|
+
}
|
|
482
|
+
get attributes() {
|
|
483
|
+
if (this.#attr) {
|
|
484
|
+
return this.#attr;
|
|
485
|
+
}
|
|
486
|
+
this.#attr = {};
|
|
487
|
+
if (this.raw) {
|
|
488
|
+
const regex = /([\w:.-]+)\s*=\s*(["'])(.*?)\2/g;
|
|
489
|
+
let match;
|
|
490
|
+
while ((match = regex.exec(this.raw)) !== null) {
|
|
491
|
+
this.#attr[match[1]] = match[3];
|
|
492
|
+
}
|
|
493
|
+
}
|
|
494
|
+
return this.#attr;
|
|
495
|
+
}
|
|
496
|
+
get innerText() {
|
|
497
|
+
if (this.children.length) {
|
|
498
|
+
let text = "";
|
|
499
|
+
for (const child of this.children) {
|
|
500
|
+
text += child.innerText;
|
|
501
|
+
}
|
|
502
|
+
return text;
|
|
503
|
+
}
|
|
504
|
+
return (this.raw.match(/<!\[CDATA\[(.*?)]]>/s) ?? [, this.raw])[1];
|
|
505
|
+
}
|
|
506
|
+
addChild(child) {
|
|
507
|
+
this.#children.push(child);
|
|
508
|
+
}
|
|
509
|
+
/**
|
|
510
|
+
* Returns true if node and parents match the key hierarchy
|
|
511
|
+
* @param keys - XML tag names
|
|
512
|
+
*/
|
|
513
|
+
is(...keys) {
|
|
514
|
+
if (!keys.length) return false;
|
|
515
|
+
let parent;
|
|
516
|
+
for (const key of keys.toReversed()) {
|
|
517
|
+
parent = parent ? parent.parent : this;
|
|
518
|
+
if (parent?.type !== key) {
|
|
519
|
+
return false;
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
return true;
|
|
523
|
+
}
|
|
524
|
+
/**
|
|
525
|
+
* Return the first child matching the key
|
|
526
|
+
* @param key - XML tag name
|
|
527
|
+
*/
|
|
528
|
+
first(key) {
|
|
529
|
+
return this.children.find((n) => n.type === key);
|
|
530
|
+
}
|
|
531
|
+
/**
|
|
532
|
+
* Return all children matching the key hierarchy
|
|
533
|
+
* @param keys - XML tag names
|
|
534
|
+
*/
|
|
535
|
+
all(...keys) {
|
|
536
|
+
let nodes = this.children;
|
|
537
|
+
let found = [];
|
|
538
|
+
for (const [i, k] of Object.entries(keys)) {
|
|
539
|
+
if (Number.parseInt(i) === keys.length - 1) {
|
|
540
|
+
found = nodes.filter((n) => n.type === k);
|
|
541
|
+
break;
|
|
542
|
+
}
|
|
543
|
+
nodes = nodes?.find((n) => n.type === k)?.children;
|
|
544
|
+
if (!nodes) return [];
|
|
545
|
+
}
|
|
546
|
+
return found;
|
|
547
|
+
}
|
|
548
|
+
};
|
|
549
|
+
|
|
550
|
+
// node_modules/@dbushell/xml-streamify/src/stream.ts
|
|
551
|
+
var ENTITIES = {
|
|
552
|
+
cdata: {
|
|
553
|
+
end: "]]>",
|
|
554
|
+
start: /^<!\[CDATA\[/
|
|
555
|
+
},
|
|
556
|
+
comment: {
|
|
557
|
+
end: "-->",
|
|
558
|
+
start: /^<!--/
|
|
559
|
+
},
|
|
560
|
+
declaration: {
|
|
561
|
+
end: "?>",
|
|
562
|
+
start: /^<\?/
|
|
563
|
+
},
|
|
564
|
+
doctype: {
|
|
565
|
+
end: ">",
|
|
566
|
+
start: /^<!DOCTYPE/i
|
|
567
|
+
},
|
|
568
|
+
element: {
|
|
569
|
+
end: ">",
|
|
570
|
+
start: /^<[\w:.-/]/
|
|
571
|
+
}
|
|
572
|
+
};
|
|
573
|
+
var transformer = {
|
|
574
|
+
buf: "",
|
|
575
|
+
state: "skip" /* SKIP */,
|
|
576
|
+
previous: ["skip" /* SKIP */, -1],
|
|
577
|
+
flush(controller) {
|
|
578
|
+
if (this.buf.length > 0) {
|
|
579
|
+
controller.enqueue(["text" /* TEXT */, this.buf]);
|
|
580
|
+
}
|
|
581
|
+
},
|
|
582
|
+
transform(chunk, controller) {
|
|
583
|
+
this.buf += chunk;
|
|
584
|
+
while (this.buf.length) {
|
|
585
|
+
if (this.state === this.previous[0] && this.buf.length === this.previous[1]) {
|
|
586
|
+
break;
|
|
587
|
+
}
|
|
588
|
+
this.previous = [this.state, this.buf.length];
|
|
589
|
+
if (this.state === "skip" /* SKIP */) {
|
|
590
|
+
const index = this.buf.indexOf("<");
|
|
591
|
+
if (index < 0) break;
|
|
592
|
+
controller.enqueue(["text" /* TEXT */, this.buf.substring(0, index)]);
|
|
593
|
+
this.buf = this.buf.substring(index);
|
|
594
|
+
this.state = "search" /* SEARCH */;
|
|
595
|
+
}
|
|
596
|
+
if (this.state === "search" /* SEARCH */) {
|
|
597
|
+
if (this.buf.length < 3) break;
|
|
598
|
+
for (const [state, entity] of Object.entries(ENTITIES)) {
|
|
599
|
+
if (this.buf.match(entity.start)) {
|
|
600
|
+
this.state = state;
|
|
601
|
+
break;
|
|
602
|
+
}
|
|
603
|
+
}
|
|
604
|
+
continue;
|
|
605
|
+
}
|
|
606
|
+
if (Object.hasOwn(ENTITIES, this.state)) {
|
|
607
|
+
const { end } = ENTITIES[this.state];
|
|
608
|
+
const index = this.buf.indexOf(end);
|
|
609
|
+
if (index < 0) break;
|
|
610
|
+
controller.enqueue([
|
|
611
|
+
this.state,
|
|
612
|
+
this.buf.substring(0, index + end.length)
|
|
613
|
+
]);
|
|
614
|
+
this.buf = this.buf.substring(index + end.length);
|
|
615
|
+
this.state = "skip" /* SKIP */;
|
|
616
|
+
continue;
|
|
617
|
+
}
|
|
618
|
+
throw new Error();
|
|
619
|
+
}
|
|
620
|
+
}
|
|
621
|
+
};
|
|
622
|
+
var XMLStream = class extends TransformStream {
|
|
623
|
+
constructor() {
|
|
624
|
+
super({ ...transformer });
|
|
625
|
+
}
|
|
626
|
+
};
|
|
627
|
+
|
|
628
|
+
// node_modules/@dbushell/xml-streamify/src/parse.ts
|
|
629
|
+
var ignoreTypes = {
|
|
630
|
+
["comment" /* COMMENT */]: "ignoreComments",
|
|
631
|
+
["declaration" /* DECLARATION */]: "ignoreDeclaration",
|
|
632
|
+
["doctype" /* DOCTYPE */]: "ignoreDoctype"
|
|
633
|
+
};
|
|
634
|
+
async function* parse(input, options) {
|
|
635
|
+
const document = new Node("@document");
|
|
636
|
+
try {
|
|
637
|
+
const init = { ...options?.fetchOptions };
|
|
638
|
+
if (options?.signal) {
|
|
639
|
+
init.signal = options.signal;
|
|
640
|
+
}
|
|
641
|
+
let source;
|
|
642
|
+
if (typeof input === "string" || input instanceof URL) {
|
|
643
|
+
input = new URL(input);
|
|
644
|
+
const response = await fetch(input, init);
|
|
645
|
+
if (!response.ok || !response.body) {
|
|
646
|
+
throw new Error(`Bad response`);
|
|
647
|
+
}
|
|
648
|
+
source = response.body;
|
|
649
|
+
} else {
|
|
650
|
+
source = input;
|
|
651
|
+
}
|
|
652
|
+
const stream = source.pipeThrough(new TextDecoderStream()).pipeThrough(new XMLStream(), {
|
|
653
|
+
signal: options?.signal
|
|
654
|
+
});
|
|
655
|
+
let node = document;
|
|
656
|
+
for await (const [type, value] of stream) {
|
|
657
|
+
if (options?.signal?.aborted) {
|
|
658
|
+
break;
|
|
659
|
+
}
|
|
660
|
+
if (type === "text" /* TEXT */) {
|
|
661
|
+
if (options?.ignoreWhitespace !== false && value.trim().length === 0) {
|
|
662
|
+
continue;
|
|
663
|
+
}
|
|
664
|
+
}
|
|
665
|
+
if (type in ignoreTypes && options?.[ignoreTypes[type]] === false) {
|
|
666
|
+
const newNode = new Node(type, node, value);
|
|
667
|
+
node.addChild(newNode);
|
|
668
|
+
yield newNode;
|
|
669
|
+
continue;
|
|
670
|
+
}
|
|
671
|
+
if (type === "element" /* ELEMENT */) {
|
|
672
|
+
const name = value.match(/<\/?([\w:.-]+)/)[1];
|
|
673
|
+
if (value.endsWith("/>")) {
|
|
674
|
+
const newNode2 = new Node(name, node, value);
|
|
675
|
+
node.addChild(newNode2);
|
|
676
|
+
yield newNode2;
|
|
677
|
+
continue;
|
|
678
|
+
}
|
|
679
|
+
if (value.startsWith("</")) {
|
|
680
|
+
yield node;
|
|
681
|
+
node = node.parent;
|
|
682
|
+
continue;
|
|
683
|
+
}
|
|
684
|
+
const newNode = new Node(name, node, value);
|
|
685
|
+
node.addChild(newNode);
|
|
686
|
+
node = newNode;
|
|
687
|
+
continue;
|
|
688
|
+
}
|
|
689
|
+
node.addChild(new Node(type, node, value));
|
|
690
|
+
}
|
|
691
|
+
} catch (err) {
|
|
692
|
+
if (options?.silent === false) {
|
|
693
|
+
throw err;
|
|
694
|
+
}
|
|
695
|
+
}
|
|
696
|
+
return document;
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
// src/loader.ts
|
|
700
|
+
var BASE_VERSION = "2024";
|
|
701
|
+
function getFilename(version) {
|
|
702
|
+
return `english-wordnet-${version}.xml`;
|
|
703
|
+
}
|
|
704
|
+
function getDownloadUrl(version) {
|
|
705
|
+
return `https://en-word.net/static/${getFilename(version)}.gz`;
|
|
706
|
+
}
|
|
707
|
+
function getDefaultCacheDir() {
|
|
708
|
+
const homeDir = process.env.HOME || process.env.USERPROFILE || ".";
|
|
709
|
+
return path.join(homeDir, ".cache", "synset");
|
|
587
710
|
}
|
|
588
711
|
function fileExists(filePath) {
|
|
589
712
|
if (existsSync(filePath)) {
|
|
@@ -638,7 +761,6 @@ async function findLatestVersion(onProgress, cacheDir) {
|
|
|
638
761
|
for (let year = baseYear + 1; year <= lastReleasableYear; year++) {
|
|
639
762
|
const version = year.toString();
|
|
640
763
|
if (await urlExists(getDownloadUrl(version))) {
|
|
641
|
-
continue;
|
|
642
764
|
} else {
|
|
643
765
|
return (year - 1).toString();
|
|
644
766
|
}
|
|
@@ -659,9 +781,13 @@ async function downloadWordNet(version, destPath) {
|
|
|
659
781
|
const url = getDownloadUrl(version);
|
|
660
782
|
const response = await fetch(url);
|
|
661
783
|
if (!response.ok || !response.body) {
|
|
662
|
-
throw new Error(
|
|
784
|
+
throw new Error(
|
|
785
|
+
`Failed to download WordNet ${version}: ${response.statusText}`
|
|
786
|
+
);
|
|
663
787
|
}
|
|
664
|
-
const decompressed = response.body.pipeThrough(
|
|
788
|
+
const decompressed = response.body.pipeThrough(
|
|
789
|
+
new DecompressionStream("gzip")
|
|
790
|
+
);
|
|
665
791
|
const arrayBuffer = await new Response(decompressed).arrayBuffer();
|
|
666
792
|
const dir = path.dirname(destPath);
|
|
667
793
|
if (!existsSync(dir)) {
|
|
@@ -671,8 +797,9 @@ async function downloadWordNet(version, destPath) {
|
|
|
671
797
|
}
|
|
672
798
|
function createParser(filePath) {
|
|
673
799
|
const resolvedPath = path.resolve(filePath);
|
|
674
|
-
const
|
|
675
|
-
|
|
800
|
+
const nodeStream = createReadStream(resolvedPath);
|
|
801
|
+
const webStream = Readable.toWeb(nodeStream);
|
|
802
|
+
return parse(webStream, {
|
|
676
803
|
ignoreDeclaration: false,
|
|
677
804
|
silent: false
|
|
678
805
|
});
|
|
@@ -825,117 +952,6 @@ function getSynsetWords(index, synset) {
|
|
|
825
952
|
return synset.members.map((id) => index.entries.get(id)).filter((e) => e !== void 0).map((e) => e.lemmas[0]?.writtenForm).filter((w) => w !== void 0);
|
|
826
953
|
}
|
|
827
954
|
|
|
828
|
-
// src/literals.ts
|
|
829
|
-
var PartsOfSpeech2 = {
|
|
830
|
-
n: "Noun",
|
|
831
|
-
v: "Verb",
|
|
832
|
-
a: "Adjective",
|
|
833
|
-
r: "Adverb",
|
|
834
|
-
s: "Adjective Satellite",
|
|
835
|
-
t: "?",
|
|
836
|
-
c: "Conjunction",
|
|
837
|
-
p: "Adposition (Preposition, postposition, etc.)",
|
|
838
|
-
x: "Other (inc. particle, classifier, bound morphemes, determiners)",
|
|
839
|
-
u: "Unknown"
|
|
840
|
-
};
|
|
841
|
-
var SynsetRelationRelType2 = {
|
|
842
|
-
agent: "Agent",
|
|
843
|
-
also: "See also",
|
|
844
|
-
anto_converse: "Converse antonym",
|
|
845
|
-
anto_gradable: "Gradable antonym",
|
|
846
|
-
anto_simple: "Simple antonym",
|
|
847
|
-
antonym: "Antonym",
|
|
848
|
-
attribute: "Attribute",
|
|
849
|
-
augmentative: "Augmentative",
|
|
850
|
-
be_in_state: "Be in state",
|
|
851
|
-
cause: "Cause",
|
|
852
|
-
causes: "Causes",
|
|
853
|
-
classified_by: "Classified by",
|
|
854
|
-
classifies: "Classifies",
|
|
855
|
-
co_agent_instrument: "Co-agent instrument",
|
|
856
|
-
co_agent_patient: "Co-agent patient",
|
|
857
|
-
co_agent_result: "Co-agent result",
|
|
858
|
-
co_instrument_agent: "Co-instrument agent",
|
|
859
|
-
co_instrument_patient: "Co-instrument patient",
|
|
860
|
-
co_instrument_result: "Co-instrument result",
|
|
861
|
-
co_patient_agent: "Co-patient agent",
|
|
862
|
-
co_patient_instrument: "Co-patient instrument",
|
|
863
|
-
co_result_agent: "Co-result agent",
|
|
864
|
-
co_result_instrument: "Co-result instrument",
|
|
865
|
-
co_role: "Co-role",
|
|
866
|
-
diminutive: "Diminutive",
|
|
867
|
-
direction: "Direction",
|
|
868
|
-
domain_member_region: "Domain member region",
|
|
869
|
-
domain_member_topic: "Domain member topic",
|
|
870
|
-
domain_region: "Domain region",
|
|
871
|
-
domain_topic: "Domain topic",
|
|
872
|
-
entail: "Entail",
|
|
873
|
-
entails: "Entails",
|
|
874
|
-
eq_synonym: "Equivalent synonym",
|
|
875
|
-
exemplifies: "Exemplifies",
|
|
876
|
-
feminine: "Feminine",
|
|
877
|
-
has_augmentative: "Has augmentative",
|
|
878
|
-
has_diminutive: "Has diminutive",
|
|
879
|
-
has_domain_region: "Has domain region",
|
|
880
|
-
has_domain_topic: "Has domain topic",
|
|
881
|
-
has_feminine: "Has feminine",
|
|
882
|
-
has_masculine: "Has masculine",
|
|
883
|
-
has_young: "Has young",
|
|
884
|
-
holo_location: "Holonym location",
|
|
885
|
-
holo_member: "Member holonym",
|
|
886
|
-
holo_part: "Part holonym",
|
|
887
|
-
holo_portion: "Portion holonym",
|
|
888
|
-
holo_substance: "Substance holonym",
|
|
889
|
-
holonym: "Holonym",
|
|
890
|
-
hypernym: "Hypernym",
|
|
891
|
-
hyponym: "Hyponym",
|
|
892
|
-
in_manner: "In manner",
|
|
893
|
-
instance_hypernym: "Instance hypernym",
|
|
894
|
-
instance_hyponym: "Instance hyponym",
|
|
895
|
-
instrument: "Instrument",
|
|
896
|
-
involved: "Involved",
|
|
897
|
-
involved_agent: "Involved agent",
|
|
898
|
-
involved_direction: "Involved direction",
|
|
899
|
-
involved_instrument: "Involved instrument",
|
|
900
|
-
involved_location: "Involved location",
|
|
901
|
-
involved_patient: "Involved patient",
|
|
902
|
-
involved_result: "Involved result",
|
|
903
|
-
involved_source_direction: "Involved source direction",
|
|
904
|
-
involved_target_direction: "Involved target direction",
|
|
905
|
-
ir_synonym: "IR synonym",
|
|
906
|
-
is_caused_by: "Is caused by",
|
|
907
|
-
is_entailed_by: "Is entailed by",
|
|
908
|
-
is_exemplified_by: "Is exemplified by",
|
|
909
|
-
is_subevent_of: "Is subevent of",
|
|
910
|
-
location: "Location",
|
|
911
|
-
manner_of: "Manner of",
|
|
912
|
-
masculine: "Masculine",
|
|
913
|
-
member_holonym: "Member holonym",
|
|
914
|
-
member_meronym: "Member meronym",
|
|
915
|
-
mero_location: "Meronym location",
|
|
916
|
-
mero_member: "Member meronym",
|
|
917
|
-
mero_part: "Part meronym",
|
|
918
|
-
mero_portion: "Portion meronym",
|
|
919
|
-
mero_substance: "Substance meronym",
|
|
920
|
-
meronym: "Meronym",
|
|
921
|
-
other: "Other",
|
|
922
|
-
part_holonym: "Part holonym",
|
|
923
|
-
part_meronym: "Part meronym",
|
|
924
|
-
patient: "Patient",
|
|
925
|
-
restricted_by: "Restricted by",
|
|
926
|
-
restricts: "Restricts",
|
|
927
|
-
result: "Result",
|
|
928
|
-
role: "Role",
|
|
929
|
-
similar: "Similar",
|
|
930
|
-
source_direction: "Source direction",
|
|
931
|
-
state_of: "State of",
|
|
932
|
-
subevent: "Subevent",
|
|
933
|
-
substance_holonym: "Substance holonym",
|
|
934
|
-
substance_meronym: "Substance meronym",
|
|
935
|
-
target_direction: "Target direction",
|
|
936
|
-
young: "Young"
|
|
937
|
-
};
|
|
938
|
-
|
|
939
955
|
// src/cli.ts
|
|
940
956
|
var decode = (s) => decodeXmlEntities(s) ?? "";
|
|
941
957
|
var HELP = `
|
|
@@ -1066,7 +1082,7 @@ async function main() {
|
|
|
1066
1082
|
for (const [relType, words] of relsByType) {
|
|
1067
1083
|
const label = SynsetRelationRelType2[relType] || relType;
|
|
1068
1084
|
console.log(` ${label}:`);
|
|
1069
|
-
|
|
1085
|
+
for (const w of words) console.log(` - ${w}`);
|
|
1070
1086
|
}
|
|
1071
1087
|
}
|
|
1072
1088
|
break;
|
|
@@ -1085,11 +1101,12 @@ async function main() {
|
|
|
1085
1101
|
console.log(`ILI: ${synset.ili}`);
|
|
1086
1102
|
console.log(`
|
|
1087
1103
|
Definitions:`);
|
|
1088
|
-
synset.definitions
|
|
1104
|
+
for (const d of synset.definitions) console.log(` - ${decode(d.inner)}`);
|
|
1089
1105
|
if (synset.examples.length > 0) {
|
|
1090
1106
|
console.log(`
|
|
1091
1107
|
Examples:`);
|
|
1092
|
-
|
|
1108
|
+
for (const e of synset.examples)
|
|
1109
|
+
console.log(` - "${decode(e.inner)}"`);
|
|
1093
1110
|
}
|
|
1094
1111
|
if (synset.synsetRelations.length > 0) {
|
|
1095
1112
|
console.log(`
|