synset 0.9.0 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +54 -39
- package/dist/cli.cjs +534 -382
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +542 -383
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +301 -166
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +26 -7
- package/dist/index.d.ts +26 -7
- package/dist/index.js +307 -167
- package/dist/index.js.map +1 -1
- package/dist/schema.sql +22 -0
- package/package.json +10 -5
package/dist/cli.js
CHANGED
|
@@ -1,250 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
|
-
// src/
|
|
4
|
-
import {
|
|
5
|
-
import path from "path";
|
|
6
|
-
|
|
7
|
-
// node_modules/@dbushell/xml-streamify/src/node.ts
|
|
8
|
-
var Node = class {
|
|
9
|
-
#type;
|
|
10
|
-
#children;
|
|
11
|
-
#parent;
|
|
12
|
-
#attr;
|
|
13
|
-
#raw;
|
|
14
|
-
constructor(type, parent, raw) {
|
|
15
|
-
this.#type = type;
|
|
16
|
-
this.#parent = parent;
|
|
17
|
-
this.#raw = raw;
|
|
18
|
-
this.#children = [];
|
|
19
|
-
}
|
|
20
|
-
get type() {
|
|
21
|
-
return this.#type;
|
|
22
|
-
}
|
|
23
|
-
get raw() {
|
|
24
|
-
return this.#raw ?? "";
|
|
25
|
-
}
|
|
26
|
-
get parent() {
|
|
27
|
-
return this.#parent;
|
|
28
|
-
}
|
|
29
|
-
get children() {
|
|
30
|
-
return this.#children;
|
|
31
|
-
}
|
|
32
|
-
get attributes() {
|
|
33
|
-
if (this.#attr) {
|
|
34
|
-
return this.#attr;
|
|
35
|
-
}
|
|
36
|
-
this.#attr = {};
|
|
37
|
-
if (this.raw) {
|
|
38
|
-
const regex = /([\w:.-]+)\s*=\s*(["'])(.*?)\2/g;
|
|
39
|
-
let match;
|
|
40
|
-
while ((match = regex.exec(this.raw)) !== null) {
|
|
41
|
-
this.#attr[match[1]] = match[3];
|
|
42
|
-
}
|
|
43
|
-
}
|
|
44
|
-
return this.#attr;
|
|
45
|
-
}
|
|
46
|
-
get innerText() {
|
|
47
|
-
if (this.children.length) {
|
|
48
|
-
let text = "";
|
|
49
|
-
for (const child of this.children) {
|
|
50
|
-
text += child.innerText;
|
|
51
|
-
}
|
|
52
|
-
return text;
|
|
53
|
-
}
|
|
54
|
-
return (this.raw.match(/<!\[CDATA\[(.*?)]]>/s) ?? [, this.raw])[1];
|
|
55
|
-
}
|
|
56
|
-
addChild(child) {
|
|
57
|
-
this.#children.push(child);
|
|
58
|
-
}
|
|
59
|
-
/**
|
|
60
|
-
* Returns true if node and parents match the key hierarchy
|
|
61
|
-
* @param keys - XML tag names
|
|
62
|
-
*/
|
|
63
|
-
is(...keys) {
|
|
64
|
-
if (!keys.length) return false;
|
|
65
|
-
let parent;
|
|
66
|
-
for (const key of keys.toReversed()) {
|
|
67
|
-
parent = parent ? parent.parent : this;
|
|
68
|
-
if (parent?.type !== key) {
|
|
69
|
-
return false;
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
return true;
|
|
73
|
-
}
|
|
74
|
-
/**
|
|
75
|
-
* Return the first child matching the key
|
|
76
|
-
* @param key - XML tag name
|
|
77
|
-
*/
|
|
78
|
-
first(key) {
|
|
79
|
-
return this.children.find((n) => n.type === key);
|
|
80
|
-
}
|
|
81
|
-
/**
|
|
82
|
-
* Return all children matching the key hierarchy
|
|
83
|
-
* @param keys - XML tag names
|
|
84
|
-
*/
|
|
85
|
-
all(...keys) {
|
|
86
|
-
let nodes = this.children;
|
|
87
|
-
let found = [];
|
|
88
|
-
for (const [i, k] of Object.entries(keys)) {
|
|
89
|
-
if (Number.parseInt(i) === keys.length - 1) {
|
|
90
|
-
found = nodes.filter((n) => n.type === k);
|
|
91
|
-
break;
|
|
92
|
-
}
|
|
93
|
-
nodes = nodes?.find((n) => n.type === k)?.children;
|
|
94
|
-
if (!nodes) return [];
|
|
95
|
-
}
|
|
96
|
-
return found;
|
|
97
|
-
}
|
|
98
|
-
};
|
|
99
|
-
|
|
100
|
-
// node_modules/@dbushell/xml-streamify/src/stream.ts
|
|
101
|
-
var ENTITIES = {
|
|
102
|
-
cdata: {
|
|
103
|
-
end: "]]>",
|
|
104
|
-
start: /^<!\[CDATA\[/
|
|
105
|
-
},
|
|
106
|
-
comment: {
|
|
107
|
-
end: "-->",
|
|
108
|
-
start: /^<!--/
|
|
109
|
-
},
|
|
110
|
-
declaration: {
|
|
111
|
-
end: "?>",
|
|
112
|
-
start: /^<\?/
|
|
113
|
-
},
|
|
114
|
-
doctype: {
|
|
115
|
-
end: ">",
|
|
116
|
-
start: /^<!DOCTYPE/i
|
|
117
|
-
},
|
|
118
|
-
element: {
|
|
119
|
-
end: ">",
|
|
120
|
-
start: /^<[\w:.-/]/
|
|
121
|
-
}
|
|
122
|
-
};
|
|
123
|
-
var transformer = {
|
|
124
|
-
buf: "",
|
|
125
|
-
state: "skip" /* SKIP */,
|
|
126
|
-
previous: ["skip" /* SKIP */, -1],
|
|
127
|
-
flush(controller) {
|
|
128
|
-
if (this.buf.length > 0) {
|
|
129
|
-
controller.enqueue(["text" /* TEXT */, this.buf]);
|
|
130
|
-
}
|
|
131
|
-
},
|
|
132
|
-
transform(chunk, controller) {
|
|
133
|
-
this.buf += chunk;
|
|
134
|
-
while (this.buf.length) {
|
|
135
|
-
if (this.state === this.previous[0] && this.buf.length === this.previous[1]) {
|
|
136
|
-
break;
|
|
137
|
-
}
|
|
138
|
-
this.previous = [this.state, this.buf.length];
|
|
139
|
-
if (this.state === "skip" /* SKIP */) {
|
|
140
|
-
const index = this.buf.indexOf("<");
|
|
141
|
-
if (index < 0) break;
|
|
142
|
-
controller.enqueue(["text" /* TEXT */, this.buf.substring(0, index)]);
|
|
143
|
-
this.buf = this.buf.substring(index);
|
|
144
|
-
this.state = "search" /* SEARCH */;
|
|
145
|
-
}
|
|
146
|
-
if (this.state === "search" /* SEARCH */) {
|
|
147
|
-
if (this.buf.length < 3) break;
|
|
148
|
-
for (const [state, entity] of Object.entries(ENTITIES)) {
|
|
149
|
-
if (this.buf.match(entity.start)) {
|
|
150
|
-
this.state = state;
|
|
151
|
-
break;
|
|
152
|
-
}
|
|
153
|
-
}
|
|
154
|
-
continue;
|
|
155
|
-
}
|
|
156
|
-
if (Object.hasOwn(ENTITIES, this.state)) {
|
|
157
|
-
const { end } = ENTITIES[this.state];
|
|
158
|
-
const index = this.buf.indexOf(end);
|
|
159
|
-
if (index < 0) break;
|
|
160
|
-
controller.enqueue([
|
|
161
|
-
this.state,
|
|
162
|
-
this.buf.substring(0, index + end.length)
|
|
163
|
-
]);
|
|
164
|
-
this.buf = this.buf.substring(index + end.length);
|
|
165
|
-
this.state = "skip" /* SKIP */;
|
|
166
|
-
continue;
|
|
167
|
-
}
|
|
168
|
-
throw new Error();
|
|
169
|
-
}
|
|
170
|
-
}
|
|
171
|
-
};
|
|
172
|
-
var XMLStream = class extends TransformStream {
|
|
173
|
-
constructor() {
|
|
174
|
-
super({ ...transformer });
|
|
175
|
-
}
|
|
176
|
-
};
|
|
177
|
-
|
|
178
|
-
// node_modules/@dbushell/xml-streamify/src/parse.ts
|
|
179
|
-
var ignoreTypes = {
|
|
180
|
-
["comment" /* COMMENT */]: "ignoreComments",
|
|
181
|
-
["declaration" /* DECLARATION */]: "ignoreDeclaration",
|
|
182
|
-
["doctype" /* DOCTYPE */]: "ignoreDoctype"
|
|
183
|
-
};
|
|
184
|
-
async function* parse(input, options) {
|
|
185
|
-
const document = new Node("@document");
|
|
186
|
-
try {
|
|
187
|
-
const init = { ...options?.fetchOptions };
|
|
188
|
-
if (options?.signal) {
|
|
189
|
-
init.signal = options.signal;
|
|
190
|
-
}
|
|
191
|
-
let source;
|
|
192
|
-
if (typeof input === "string" || input instanceof URL) {
|
|
193
|
-
input = new URL(input);
|
|
194
|
-
const response = await fetch(input, init);
|
|
195
|
-
if (!response.ok || !response.body) {
|
|
196
|
-
throw new Error(`Bad response`);
|
|
197
|
-
}
|
|
198
|
-
source = response.body;
|
|
199
|
-
} else {
|
|
200
|
-
source = input;
|
|
201
|
-
}
|
|
202
|
-
const stream = source.pipeThrough(new TextDecoderStream()).pipeThrough(new XMLStream(), {
|
|
203
|
-
signal: options?.signal
|
|
204
|
-
});
|
|
205
|
-
let node = document;
|
|
206
|
-
for await (const [type, value] of stream) {
|
|
207
|
-
if (options?.signal?.aborted) {
|
|
208
|
-
break;
|
|
209
|
-
}
|
|
210
|
-
if (type === "text" /* TEXT */) {
|
|
211
|
-
if (options?.ignoreWhitespace !== false && value.trim().length === 0) {
|
|
212
|
-
continue;
|
|
213
|
-
}
|
|
214
|
-
}
|
|
215
|
-
if (type in ignoreTypes && options?.[ignoreTypes[type]] === false) {
|
|
216
|
-
const newNode = new Node(type, node, value);
|
|
217
|
-
node.addChild(newNode);
|
|
218
|
-
yield newNode;
|
|
219
|
-
continue;
|
|
220
|
-
}
|
|
221
|
-
if (type === "element" /* ELEMENT */) {
|
|
222
|
-
const name = value.match(/<\/?([\w:.-]+)/)[1];
|
|
223
|
-
if (value.endsWith("/>")) {
|
|
224
|
-
const newNode2 = new Node(name, node, value);
|
|
225
|
-
node.addChild(newNode2);
|
|
226
|
-
yield newNode2;
|
|
227
|
-
continue;
|
|
228
|
-
}
|
|
229
|
-
if (value.startsWith("</")) {
|
|
230
|
-
yield node;
|
|
231
|
-
node = node.parent;
|
|
232
|
-
continue;
|
|
233
|
-
}
|
|
234
|
-
const newNode = new Node(name, node, value);
|
|
235
|
-
node.addChild(newNode);
|
|
236
|
-
node = newNode;
|
|
237
|
-
continue;
|
|
238
|
-
}
|
|
239
|
-
node.addChild(new Node(type, node, value));
|
|
240
|
-
}
|
|
241
|
-
} catch (err) {
|
|
242
|
-
if (options?.silent === false) {
|
|
243
|
-
throw err;
|
|
244
|
-
}
|
|
245
|
-
}
|
|
246
|
-
return document;
|
|
247
|
-
}
|
|
3
|
+
// src/export-sqlite.ts
|
|
4
|
+
import { Database } from "bun:sqlite";
|
|
248
5
|
|
|
249
6
|
// src/types.ts
|
|
250
7
|
import { z } from "zod";
|
|
@@ -411,7 +168,9 @@ var Lexicon = z.object({
|
|
|
411
168
|
synsets: z.array(Synset).min(0),
|
|
412
169
|
syntacticBehaviors: z.array(SyntacticBehavior).min(0)
|
|
413
170
|
});
|
|
414
|
-
var partsOfSpeechList = PartsOfSpeech.options.map(
|
|
171
|
+
var partsOfSpeechList = PartsOfSpeech.options.map(
|
|
172
|
+
(v) => v.value
|
|
173
|
+
);
|
|
415
174
|
|
|
416
175
|
// src/helpers.ts
|
|
417
176
|
function PronunciationNode(node) {
|
|
@@ -439,7 +198,7 @@ function SenseRelationNode(node) {
|
|
|
439
198
|
dcType: optAttr(node, "dc:type")
|
|
440
199
|
};
|
|
441
200
|
return SenseRelation.parse(
|
|
442
|
-
extendWithRestAttr(node, obj, (s) => s
|
|
201
|
+
extendWithRestAttr(node, obj, (s) => s === "dc:type" ? "dcType" : s)
|
|
443
202
|
);
|
|
444
203
|
}
|
|
445
204
|
function SenseNode(node) {
|
|
@@ -455,7 +214,7 @@ function SenseNode(node) {
|
|
|
455
214
|
extendWithRestAttr(
|
|
456
215
|
node,
|
|
457
216
|
obj,
|
|
458
|
-
(s) => s
|
|
217
|
+
(s) => s === "subcat" ? "subCat" : s === "adjposition" ? "adjPosition" : s
|
|
459
218
|
)
|
|
460
219
|
);
|
|
461
220
|
}
|
|
@@ -486,7 +245,7 @@ function ExampleNode(node) {
|
|
|
486
245
|
dcSource: optAttr(node, "dc:source")
|
|
487
246
|
};
|
|
488
247
|
return Example.parse(
|
|
489
|
-
extendWithRestAttr(node, obj, (s) => s
|
|
248
|
+
extendWithRestAttr(node, obj, (s) => s === "dc:source" ? "dcSource" : s)
|
|
490
249
|
);
|
|
491
250
|
}
|
|
492
251
|
function ILIDefinitionNode(node) {
|
|
@@ -523,7 +282,7 @@ function SynsetNode(node) {
|
|
|
523
282
|
synsetRelations: children(node, "SynsetRelation", SynsetRelationNode)
|
|
524
283
|
};
|
|
525
284
|
return Synset.parse(
|
|
526
|
-
extendWithRestAttr(node, obj, (s) => s
|
|
285
|
+
extendWithRestAttr(node, obj, (s) => s === "dc:source" ? "dcSource" : s)
|
|
527
286
|
);
|
|
528
287
|
}
|
|
529
288
|
function LexiconNode(node) {
|
|
@@ -552,7 +311,9 @@ var decodeXmlEntities = (s) => {
|
|
|
552
311
|
var attr = (node, attrName) => {
|
|
553
312
|
const value = decodeXmlEntities(node.attributes[attrName]);
|
|
554
313
|
if (value === void 0) {
|
|
555
|
-
throw new Error(
|
|
314
|
+
throw new Error(
|
|
315
|
+
`Missing required attribute "${attrName}" on node "${node.type}"`
|
|
316
|
+
);
|
|
556
317
|
}
|
|
557
318
|
return value;
|
|
558
319
|
};
|
|
@@ -570,21 +331,505 @@ var extendWithRestAttr = (node, obj, proxy) => {
|
|
|
570
331
|
return Object.assign(obj, restAttrs(node, obj, proxy));
|
|
571
332
|
};
|
|
572
333
|
var children = (node, type, fn) => {
|
|
573
|
-
return node.children.filter((v) => v.type
|
|
334
|
+
return node.children.filter((v) => v.type === type).map((v) => fn(v));
|
|
574
335
|
};
|
|
575
336
|
|
|
576
|
-
// src/
|
|
577
|
-
var
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
337
|
+
// src/export-sqlite.ts
|
|
338
|
+
var SCHEMA = `
|
|
339
|
+
CREATE TABLE IF NOT EXISTS words (
|
|
340
|
+
id INTEGER PRIMARY KEY,
|
|
341
|
+
word TEXT NOT NULL,
|
|
342
|
+
word_display TEXT NOT NULL
|
|
343
|
+
);
|
|
344
|
+
CREATE INDEX IF NOT EXISTS idx_words_word ON words(word);
|
|
345
|
+
|
|
346
|
+
CREATE TABLE IF NOT EXISTS synsets (
|
|
347
|
+
id TEXT PRIMARY KEY,
|
|
348
|
+
pos TEXT NOT NULL,
|
|
349
|
+
definition TEXT NOT NULL
|
|
350
|
+
);
|
|
351
|
+
|
|
352
|
+
CREATE TABLE IF NOT EXISTS word_synsets (
|
|
353
|
+
word_id INTEGER NOT NULL,
|
|
354
|
+
synset_id TEXT NOT NULL,
|
|
355
|
+
PRIMARY KEY (word_id, synset_id)
|
|
356
|
+
);
|
|
357
|
+
CREATE INDEX IF NOT EXISTS idx_ws_word ON word_synsets(word_id);
|
|
358
|
+
`;
|
|
359
|
+
function exportToSQLite(lexicon, outputPath, options = {}) {
|
|
360
|
+
const { onProgress } = options;
|
|
361
|
+
const db = new Database(outputPath, { create: true });
|
|
362
|
+
db.exec("PRAGMA journal_mode = OFF");
|
|
363
|
+
db.exec("PRAGMA synchronous = OFF");
|
|
364
|
+
db.exec(SCHEMA);
|
|
365
|
+
const wordToEntries = /* @__PURE__ */ new Map();
|
|
366
|
+
for (const entry of lexicon.lexicalEntries) {
|
|
367
|
+
const word = entry.lemmas[0]?.writtenForm;
|
|
368
|
+
if (word) {
|
|
369
|
+
const lower = word.toLowerCase();
|
|
370
|
+
const existing = wordToEntries.get(lower) || [];
|
|
371
|
+
existing.push(entry);
|
|
372
|
+
wordToEntries.set(lower, existing);
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
const synsetMap = /* @__PURE__ */ new Map();
|
|
376
|
+
for (const synset of lexicon.synsets) {
|
|
377
|
+
synsetMap.set(synset.id, synset);
|
|
378
|
+
}
|
|
379
|
+
const insertWord = db.prepare(
|
|
380
|
+
"INSERT INTO words (word, word_display) VALUES (?, ?)"
|
|
381
|
+
);
|
|
382
|
+
const wordIds = /* @__PURE__ */ new Map();
|
|
383
|
+
const words = Array.from(wordToEntries.keys()).sort();
|
|
384
|
+
const totalWords = words.length;
|
|
385
|
+
db.exec("BEGIN TRANSACTION");
|
|
386
|
+
let wordId = 0;
|
|
387
|
+
for (let i = 0; i < words.length; i++) {
|
|
388
|
+
const word = words[i];
|
|
389
|
+
const entries = wordToEntries.get(word);
|
|
390
|
+
if (!entries) continue;
|
|
391
|
+
const display = entries[0].lemmas[0]?.writtenForm || word;
|
|
392
|
+
insertWord.run(word, display);
|
|
393
|
+
wordId++;
|
|
394
|
+
wordIds.set(word, wordId);
|
|
395
|
+
if (onProgress && i % 1e4 === 0) {
|
|
396
|
+
onProgress({ phase: "words", current: i, total: totalWords });
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
db.exec("COMMIT");
|
|
400
|
+
const usedSynsetIds = /* @__PURE__ */ new Set();
|
|
401
|
+
for (const entries of wordToEntries.values()) {
|
|
402
|
+
for (const entry of entries) {
|
|
403
|
+
for (const sense of entry.senses) {
|
|
404
|
+
usedSynsetIds.add(sense.synset);
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
const insertSynset = db.prepare(
|
|
409
|
+
"INSERT OR IGNORE INTO synsets (id, pos, definition) VALUES (?, ?, ?)"
|
|
410
|
+
);
|
|
411
|
+
const synsetList = Array.from(usedSynsetIds);
|
|
412
|
+
const totalSynsets = synsetList.length;
|
|
413
|
+
db.exec("BEGIN TRANSACTION");
|
|
414
|
+
for (let i = 0; i < synsetList.length; i++) {
|
|
415
|
+
const synsetId = synsetList[i];
|
|
416
|
+
const synset = synsetMap.get(synsetId);
|
|
417
|
+
if (synset) {
|
|
418
|
+
const def = decodeXmlEntities(synset.definitions[0]?.inner) || "";
|
|
419
|
+
insertSynset.run(synsetId, synset.partOfSpeech, def);
|
|
420
|
+
}
|
|
421
|
+
if (onProgress && i % 1e4 === 0) {
|
|
422
|
+
onProgress({ phase: "synsets", current: i, total: totalSynsets });
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
db.exec("COMMIT");
|
|
426
|
+
const insertRelation = db.prepare(
|
|
427
|
+
"INSERT OR IGNORE INTO word_synsets (word_id, synset_id) VALUES (?, ?)"
|
|
428
|
+
);
|
|
429
|
+
let relationCount = 0;
|
|
430
|
+
const totalRelations = Array.from(wordToEntries.values()).reduce(
|
|
431
|
+
(sum, entries) => sum + entries.reduce((s, e) => s + e.senses.length, 0),
|
|
432
|
+
0
|
|
433
|
+
);
|
|
434
|
+
db.exec("BEGIN TRANSACTION");
|
|
435
|
+
for (const [word, entries] of wordToEntries) {
|
|
436
|
+
const wordId2 = wordIds.get(word);
|
|
437
|
+
if (!wordId2) continue;
|
|
438
|
+
for (const entry of entries) {
|
|
439
|
+
for (const sense of entry.senses) {
|
|
440
|
+
insertRelation.run(wordId2, sense.synset);
|
|
441
|
+
relationCount++;
|
|
442
|
+
if (onProgress && relationCount % 1e4 === 0) {
|
|
443
|
+
onProgress({
|
|
444
|
+
phase: "relations",
|
|
445
|
+
current: relationCount,
|
|
446
|
+
total: totalRelations
|
|
447
|
+
});
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
db.exec("COMMIT");
|
|
453
|
+
db.close();
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
// src/literals.ts
|
|
457
|
+
var PartsOfSpeech2 = {
|
|
458
|
+
n: "Noun",
|
|
459
|
+
v: "Verb",
|
|
460
|
+
a: "Adjective",
|
|
461
|
+
r: "Adverb",
|
|
462
|
+
s: "Adjective Satellite",
|
|
463
|
+
t: "?",
|
|
464
|
+
c: "Conjunction",
|
|
465
|
+
p: "Adposition (Preposition, postposition, etc.)",
|
|
466
|
+
x: "Other (inc. particle, classifier, bound morphemes, determiners)",
|
|
467
|
+
u: "Unknown"
|
|
468
|
+
};
|
|
469
|
+
var SynsetRelationRelType2 = {
|
|
470
|
+
agent: "Agent",
|
|
471
|
+
also: "See also",
|
|
472
|
+
anto_converse: "Converse antonym",
|
|
473
|
+
anto_gradable: "Gradable antonym",
|
|
474
|
+
anto_simple: "Simple antonym",
|
|
475
|
+
antonym: "Antonym",
|
|
476
|
+
attribute: "Attribute",
|
|
477
|
+
augmentative: "Augmentative",
|
|
478
|
+
be_in_state: "Be in state",
|
|
479
|
+
cause: "Cause",
|
|
480
|
+
causes: "Causes",
|
|
481
|
+
classified_by: "Classified by",
|
|
482
|
+
classifies: "Classifies",
|
|
483
|
+
co_agent_instrument: "Co-agent instrument",
|
|
484
|
+
co_agent_patient: "Co-agent patient",
|
|
485
|
+
co_agent_result: "Co-agent result",
|
|
486
|
+
co_instrument_agent: "Co-instrument agent",
|
|
487
|
+
co_instrument_patient: "Co-instrument patient",
|
|
488
|
+
co_instrument_result: "Co-instrument result",
|
|
489
|
+
co_patient_agent: "Co-patient agent",
|
|
490
|
+
co_patient_instrument: "Co-patient instrument",
|
|
491
|
+
co_result_agent: "Co-result agent",
|
|
492
|
+
co_result_instrument: "Co-result instrument",
|
|
493
|
+
co_role: "Co-role",
|
|
494
|
+
diminutive: "Diminutive",
|
|
495
|
+
direction: "Direction",
|
|
496
|
+
domain_member_region: "Domain member region",
|
|
497
|
+
domain_member_topic: "Domain member topic",
|
|
498
|
+
domain_region: "Domain region",
|
|
499
|
+
domain_topic: "Domain topic",
|
|
500
|
+
entail: "Entail",
|
|
501
|
+
entails: "Entails",
|
|
502
|
+
eq_synonym: "Equivalent synonym",
|
|
503
|
+
exemplifies: "Exemplifies",
|
|
504
|
+
feminine: "Feminine",
|
|
505
|
+
has_augmentative: "Has augmentative",
|
|
506
|
+
has_diminutive: "Has diminutive",
|
|
507
|
+
has_domain_region: "Has domain region",
|
|
508
|
+
has_domain_topic: "Has domain topic",
|
|
509
|
+
has_feminine: "Has feminine",
|
|
510
|
+
has_masculine: "Has masculine",
|
|
511
|
+
has_young: "Has young",
|
|
512
|
+
holo_location: "Holonym location",
|
|
513
|
+
holo_member: "Member holonym",
|
|
514
|
+
holo_part: "Part holonym",
|
|
515
|
+
holo_portion: "Portion holonym",
|
|
516
|
+
holo_substance: "Substance holonym",
|
|
517
|
+
holonym: "Holonym",
|
|
518
|
+
hypernym: "Hypernym",
|
|
519
|
+
hyponym: "Hyponym",
|
|
520
|
+
in_manner: "In manner",
|
|
521
|
+
instance_hypernym: "Instance hypernym",
|
|
522
|
+
instance_hyponym: "Instance hyponym",
|
|
523
|
+
instrument: "Instrument",
|
|
524
|
+
involved: "Involved",
|
|
525
|
+
involved_agent: "Involved agent",
|
|
526
|
+
involved_direction: "Involved direction",
|
|
527
|
+
involved_instrument: "Involved instrument",
|
|
528
|
+
involved_location: "Involved location",
|
|
529
|
+
involved_patient: "Involved patient",
|
|
530
|
+
involved_result: "Involved result",
|
|
531
|
+
involved_source_direction: "Involved source direction",
|
|
532
|
+
involved_target_direction: "Involved target direction",
|
|
533
|
+
ir_synonym: "IR synonym",
|
|
534
|
+
is_caused_by: "Is caused by",
|
|
535
|
+
is_entailed_by: "Is entailed by",
|
|
536
|
+
is_exemplified_by: "Is exemplified by",
|
|
537
|
+
is_subevent_of: "Is subevent of",
|
|
538
|
+
location: "Location",
|
|
539
|
+
manner_of: "Manner of",
|
|
540
|
+
masculine: "Masculine",
|
|
541
|
+
member_holonym: "Member holonym",
|
|
542
|
+
member_meronym: "Member meronym",
|
|
543
|
+
mero_location: "Meronym location",
|
|
544
|
+
mero_member: "Member meronym",
|
|
545
|
+
mero_part: "Part meronym",
|
|
546
|
+
mero_portion: "Portion meronym",
|
|
547
|
+
mero_substance: "Substance meronym",
|
|
548
|
+
meronym: "Meronym",
|
|
549
|
+
other: "Other",
|
|
550
|
+
part_holonym: "Part holonym",
|
|
551
|
+
part_meronym: "Part meronym",
|
|
552
|
+
patient: "Patient",
|
|
553
|
+
restricted_by: "Restricted by",
|
|
554
|
+
restricts: "Restricts",
|
|
555
|
+
result: "Result",
|
|
556
|
+
role: "Role",
|
|
557
|
+
similar: "Similar",
|
|
558
|
+
source_direction: "Source direction",
|
|
559
|
+
state_of: "State of",
|
|
560
|
+
subevent: "Subevent",
|
|
561
|
+
substance_holonym: "Substance holonym",
|
|
562
|
+
substance_meronym: "Substance meronym",
|
|
563
|
+
target_direction: "Target direction",
|
|
564
|
+
young: "Young"
|
|
565
|
+
};
|
|
566
|
+
|
|
567
|
+
// src/loader.ts
|
|
568
|
+
import {
|
|
569
|
+
createReadStream,
|
|
570
|
+
existsSync,
|
|
571
|
+
mkdirSync,
|
|
572
|
+
readdirSync,
|
|
573
|
+
statSync,
|
|
574
|
+
writeFileSync
|
|
575
|
+
} from "fs";
|
|
576
|
+
import path from "path";
|
|
577
|
+
import { Readable } from "stream";
|
|
578
|
+
|
|
579
|
+
// node_modules/@dbushell/xml-streamify/src/node.ts
|
|
580
|
+
var Node = class {
|
|
581
|
+
#type;
|
|
582
|
+
#children;
|
|
583
|
+
#parent;
|
|
584
|
+
#attr;
|
|
585
|
+
#raw;
|
|
586
|
+
constructor(type, parent, raw) {
|
|
587
|
+
this.#type = type;
|
|
588
|
+
this.#parent = parent;
|
|
589
|
+
this.#raw = raw;
|
|
590
|
+
this.#children = [];
|
|
591
|
+
}
|
|
592
|
+
get type() {
|
|
593
|
+
return this.#type;
|
|
594
|
+
}
|
|
595
|
+
get raw() {
|
|
596
|
+
return this.#raw ?? "";
|
|
597
|
+
}
|
|
598
|
+
get parent() {
|
|
599
|
+
return this.#parent;
|
|
600
|
+
}
|
|
601
|
+
get children() {
|
|
602
|
+
return this.#children;
|
|
603
|
+
}
|
|
604
|
+
get attributes() {
|
|
605
|
+
if (this.#attr) {
|
|
606
|
+
return this.#attr;
|
|
607
|
+
}
|
|
608
|
+
this.#attr = {};
|
|
609
|
+
if (this.raw) {
|
|
610
|
+
const regex = /([\w:.-]+)\s*=\s*(["'])(.*?)\2/g;
|
|
611
|
+
let match;
|
|
612
|
+
while ((match = regex.exec(this.raw)) !== null) {
|
|
613
|
+
this.#attr[match[1]] = match[3];
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
return this.#attr;
|
|
617
|
+
}
|
|
618
|
+
get innerText() {
|
|
619
|
+
if (this.children.length) {
|
|
620
|
+
let text = "";
|
|
621
|
+
for (const child of this.children) {
|
|
622
|
+
text += child.innerText;
|
|
623
|
+
}
|
|
624
|
+
return text;
|
|
625
|
+
}
|
|
626
|
+
return (this.raw.match(/<!\[CDATA\[(.*?)]]>/s) ?? [, this.raw])[1];
|
|
627
|
+
}
|
|
628
|
+
addChild(child) {
|
|
629
|
+
this.#children.push(child);
|
|
630
|
+
}
|
|
631
|
+
/**
|
|
632
|
+
* Returns true if node and parents match the key hierarchy
|
|
633
|
+
* @param keys - XML tag names
|
|
634
|
+
*/
|
|
635
|
+
is(...keys) {
|
|
636
|
+
if (!keys.length) return false;
|
|
637
|
+
let parent;
|
|
638
|
+
for (const key of keys.toReversed()) {
|
|
639
|
+
parent = parent ? parent.parent : this;
|
|
640
|
+
if (parent?.type !== key) {
|
|
641
|
+
return false;
|
|
642
|
+
}
|
|
643
|
+
}
|
|
644
|
+
return true;
|
|
645
|
+
}
|
|
646
|
+
/**
|
|
647
|
+
* Return the first child matching the key
|
|
648
|
+
* @param key - XML tag name
|
|
649
|
+
*/
|
|
650
|
+
first(key) {
|
|
651
|
+
return this.children.find((n) => n.type === key);
|
|
652
|
+
}
|
|
653
|
+
/**
|
|
654
|
+
* Return all children matching the key hierarchy
|
|
655
|
+
* @param keys - XML tag names
|
|
656
|
+
*/
|
|
657
|
+
all(...keys) {
|
|
658
|
+
let nodes = this.children;
|
|
659
|
+
let found = [];
|
|
660
|
+
for (const [i, k] of Object.entries(keys)) {
|
|
661
|
+
if (Number.parseInt(i) === keys.length - 1) {
|
|
662
|
+
found = nodes.filter((n) => n.type === k);
|
|
663
|
+
break;
|
|
664
|
+
}
|
|
665
|
+
nodes = nodes?.find((n) => n.type === k)?.children;
|
|
666
|
+
if (!nodes) return [];
|
|
667
|
+
}
|
|
668
|
+
return found;
|
|
669
|
+
}
|
|
670
|
+
};
|
|
671
|
+
|
|
672
|
+
// node_modules/@dbushell/xml-streamify/src/stream.ts
|
|
673
|
+
var ENTITIES = {
|
|
674
|
+
cdata: {
|
|
675
|
+
end: "]]>",
|
|
676
|
+
start: /^<!\[CDATA\[/
|
|
677
|
+
},
|
|
678
|
+
comment: {
|
|
679
|
+
end: "-->",
|
|
680
|
+
start: /^<!--/
|
|
681
|
+
},
|
|
682
|
+
declaration: {
|
|
683
|
+
end: "?>",
|
|
684
|
+
start: /^<\?/
|
|
685
|
+
},
|
|
686
|
+
doctype: {
|
|
687
|
+
end: ">",
|
|
688
|
+
start: /^<!DOCTYPE/i
|
|
689
|
+
},
|
|
690
|
+
element: {
|
|
691
|
+
end: ">",
|
|
692
|
+
start: /^<[\w:.-/]/
|
|
693
|
+
}
|
|
694
|
+
};
|
|
695
|
+
var transformer = {
|
|
696
|
+
buf: "",
|
|
697
|
+
state: "skip" /* SKIP */,
|
|
698
|
+
previous: ["skip" /* SKIP */, -1],
|
|
699
|
+
flush(controller) {
|
|
700
|
+
if (this.buf.length > 0) {
|
|
701
|
+
controller.enqueue(["text" /* TEXT */, this.buf]);
|
|
702
|
+
}
|
|
703
|
+
},
|
|
704
|
+
transform(chunk, controller) {
|
|
705
|
+
this.buf += chunk;
|
|
706
|
+
while (this.buf.length) {
|
|
707
|
+
if (this.state === this.previous[0] && this.buf.length === this.previous[1]) {
|
|
708
|
+
break;
|
|
709
|
+
}
|
|
710
|
+
this.previous = [this.state, this.buf.length];
|
|
711
|
+
if (this.state === "skip" /* SKIP */) {
|
|
712
|
+
const index = this.buf.indexOf("<");
|
|
713
|
+
if (index < 0) break;
|
|
714
|
+
controller.enqueue(["text" /* TEXT */, this.buf.substring(0, index)]);
|
|
715
|
+
this.buf = this.buf.substring(index);
|
|
716
|
+
this.state = "search" /* SEARCH */;
|
|
717
|
+
}
|
|
718
|
+
if (this.state === "search" /* SEARCH */) {
|
|
719
|
+
if (this.buf.length < 3) break;
|
|
720
|
+
for (const [state, entity] of Object.entries(ENTITIES)) {
|
|
721
|
+
if (this.buf.match(entity.start)) {
|
|
722
|
+
this.state = state;
|
|
723
|
+
break;
|
|
724
|
+
}
|
|
725
|
+
}
|
|
726
|
+
continue;
|
|
727
|
+
}
|
|
728
|
+
if (Object.hasOwn(ENTITIES, this.state)) {
|
|
729
|
+
const { end } = ENTITIES[this.state];
|
|
730
|
+
const index = this.buf.indexOf(end);
|
|
731
|
+
if (index < 0) break;
|
|
732
|
+
controller.enqueue([
|
|
733
|
+
this.state,
|
|
734
|
+
this.buf.substring(0, index + end.length)
|
|
735
|
+
]);
|
|
736
|
+
this.buf = this.buf.substring(index + end.length);
|
|
737
|
+
this.state = "skip" /* SKIP */;
|
|
738
|
+
continue;
|
|
739
|
+
}
|
|
740
|
+
throw new Error();
|
|
741
|
+
}
|
|
742
|
+
}
|
|
743
|
+
};
|
|
744
|
+
var XMLStream = class extends TransformStream {
|
|
745
|
+
constructor() {
|
|
746
|
+
super({ ...transformer });
|
|
747
|
+
}
|
|
748
|
+
};
|
|
749
|
+
|
|
750
|
+
// node_modules/@dbushell/xml-streamify/src/parse.ts
|
|
751
|
+
var ignoreTypes = {
|
|
752
|
+
["comment" /* COMMENT */]: "ignoreComments",
|
|
753
|
+
["declaration" /* DECLARATION */]: "ignoreDeclaration",
|
|
754
|
+
["doctype" /* DOCTYPE */]: "ignoreDoctype"
|
|
755
|
+
};
|
|
756
|
+
async function* parse(input, options) {
|
|
757
|
+
const document = new Node("@document");
|
|
758
|
+
try {
|
|
759
|
+
const init = { ...options?.fetchOptions };
|
|
760
|
+
if (options?.signal) {
|
|
761
|
+
init.signal = options.signal;
|
|
762
|
+
}
|
|
763
|
+
let source;
|
|
764
|
+
if (typeof input === "string" || input instanceof URL) {
|
|
765
|
+
input = new URL(input);
|
|
766
|
+
const response = await fetch(input, init);
|
|
767
|
+
if (!response.ok || !response.body) {
|
|
768
|
+
throw new Error(`Bad response`);
|
|
769
|
+
}
|
|
770
|
+
source = response.body;
|
|
771
|
+
} else {
|
|
772
|
+
source = input;
|
|
773
|
+
}
|
|
774
|
+
const stream = source.pipeThrough(new TextDecoderStream()).pipeThrough(new XMLStream(), {
|
|
775
|
+
signal: options?.signal
|
|
776
|
+
});
|
|
777
|
+
let node = document;
|
|
778
|
+
for await (const [type, value] of stream) {
|
|
779
|
+
if (options?.signal?.aborted) {
|
|
780
|
+
break;
|
|
781
|
+
}
|
|
782
|
+
if (type === "text" /* TEXT */) {
|
|
783
|
+
if (options?.ignoreWhitespace !== false && value.trim().length === 0) {
|
|
784
|
+
continue;
|
|
785
|
+
}
|
|
786
|
+
}
|
|
787
|
+
if (type in ignoreTypes && options?.[ignoreTypes[type]] === false) {
|
|
788
|
+
const newNode = new Node(type, node, value);
|
|
789
|
+
node.addChild(newNode);
|
|
790
|
+
yield newNode;
|
|
791
|
+
continue;
|
|
792
|
+
}
|
|
793
|
+
if (type === "element" /* ELEMENT */) {
|
|
794
|
+
const name = value.match(/<\/?([\w:.-]+)/)[1];
|
|
795
|
+
if (value.endsWith("/>")) {
|
|
796
|
+
const newNode2 = new Node(name, node, value);
|
|
797
|
+
node.addChild(newNode2);
|
|
798
|
+
yield newNode2;
|
|
799
|
+
continue;
|
|
800
|
+
}
|
|
801
|
+
if (value.startsWith("</")) {
|
|
802
|
+
yield node;
|
|
803
|
+
node = node.parent;
|
|
804
|
+
continue;
|
|
805
|
+
}
|
|
806
|
+
const newNode = new Node(name, node, value);
|
|
807
|
+
node.addChild(newNode);
|
|
808
|
+
node = newNode;
|
|
809
|
+
continue;
|
|
810
|
+
}
|
|
811
|
+
node.addChild(new Node(type, node, value));
|
|
812
|
+
}
|
|
813
|
+
} catch (err) {
|
|
814
|
+
if (options?.silent === false) {
|
|
815
|
+
throw err;
|
|
816
|
+
}
|
|
817
|
+
}
|
|
818
|
+
return document;
|
|
819
|
+
}
|
|
820
|
+
|
|
821
|
+
// src/loader.ts
|
|
822
|
+
var BASE_VERSION = "2024";
|
|
823
|
+
function getFilename(version) {
|
|
824
|
+
return `english-wordnet-${version}.xml`;
|
|
825
|
+
}
|
|
826
|
+
function getDownloadUrl(version) {
|
|
827
|
+
return `https://en-word.net/static/${getFilename(version)}.gz`;
|
|
828
|
+
}
|
|
829
|
+
function getDefaultCacheDir() {
|
|
830
|
+
const homeDir = process.env.HOME || process.env.USERPROFILE || ".";
|
|
831
|
+
return path.join(homeDir, ".cache", "synset");
|
|
832
|
+
}
|
|
588
833
|
function fileExists(filePath) {
|
|
589
834
|
if (existsSync(filePath)) {
|
|
590
835
|
const stat = statSync(filePath);
|
|
@@ -638,7 +883,6 @@ async function findLatestVersion(onProgress, cacheDir) {
|
|
|
638
883
|
for (let year = baseYear + 1; year <= lastReleasableYear; year++) {
|
|
639
884
|
const version = year.toString();
|
|
640
885
|
if (await urlExists(getDownloadUrl(version))) {
|
|
641
|
-
continue;
|
|
642
886
|
} else {
|
|
643
887
|
return (year - 1).toString();
|
|
644
888
|
}
|
|
@@ -659,9 +903,13 @@ async function downloadWordNet(version, destPath) {
|
|
|
659
903
|
const url = getDownloadUrl(version);
|
|
660
904
|
const response = await fetch(url);
|
|
661
905
|
if (!response.ok || !response.body) {
|
|
662
|
-
throw new Error(
|
|
906
|
+
throw new Error(
|
|
907
|
+
`Failed to download WordNet ${version}: ${response.statusText}`
|
|
908
|
+
);
|
|
663
909
|
}
|
|
664
|
-
const decompressed = response.body.pipeThrough(
|
|
910
|
+
const decompressed = response.body.pipeThrough(
|
|
911
|
+
new DecompressionStream("gzip")
|
|
912
|
+
);
|
|
665
913
|
const arrayBuffer = await new Response(decompressed).arrayBuffer();
|
|
666
914
|
const dir = path.dirname(destPath);
|
|
667
915
|
if (!existsSync(dir)) {
|
|
@@ -671,8 +919,9 @@ async function downloadWordNet(version, destPath) {
|
|
|
671
919
|
}
|
|
672
920
|
function createParser(filePath) {
|
|
673
921
|
const resolvedPath = path.resolve(filePath);
|
|
674
|
-
const
|
|
675
|
-
|
|
922
|
+
const nodeStream = createReadStream(resolvedPath);
|
|
923
|
+
const webStream = Readable.toWeb(nodeStream);
|
|
924
|
+
return parse(webStream, {
|
|
676
925
|
ignoreDeclaration: false,
|
|
677
926
|
silent: false
|
|
678
927
|
});
|
|
@@ -825,117 +1074,6 @@ function getSynsetWords(index, synset) {
|
|
|
825
1074
|
return synset.members.map((id) => index.entries.get(id)).filter((e) => e !== void 0).map((e) => e.lemmas[0]?.writtenForm).filter((w) => w !== void 0);
|
|
826
1075
|
}
|
|
827
1076
|
|
|
828
|
-
// src/literals.ts
|
|
829
|
-
var PartsOfSpeech2 = {
|
|
830
|
-
n: "Noun",
|
|
831
|
-
v: "Verb",
|
|
832
|
-
a: "Adjective",
|
|
833
|
-
r: "Adverb",
|
|
834
|
-
s: "Adjective Satellite",
|
|
835
|
-
t: "?",
|
|
836
|
-
c: "Conjunction",
|
|
837
|
-
p: "Adposition (Preposition, postposition, etc.)",
|
|
838
|
-
x: "Other (inc. particle, classifier, bound morphemes, determiners)",
|
|
839
|
-
u: "Unknown"
|
|
840
|
-
};
|
|
841
|
-
var SynsetRelationRelType2 = {
|
|
842
|
-
agent: "Agent",
|
|
843
|
-
also: "See also",
|
|
844
|
-
anto_converse: "Converse antonym",
|
|
845
|
-
anto_gradable: "Gradable antonym",
|
|
846
|
-
anto_simple: "Simple antonym",
|
|
847
|
-
antonym: "Antonym",
|
|
848
|
-
attribute: "Attribute",
|
|
849
|
-
augmentative: "Augmentative",
|
|
850
|
-
be_in_state: "Be in state",
|
|
851
|
-
cause: "Cause",
|
|
852
|
-
causes: "Causes",
|
|
853
|
-
classified_by: "Classified by",
|
|
854
|
-
classifies: "Classifies",
|
|
855
|
-
co_agent_instrument: "Co-agent instrument",
|
|
856
|
-
co_agent_patient: "Co-agent patient",
|
|
857
|
-
co_agent_result: "Co-agent result",
|
|
858
|
-
co_instrument_agent: "Co-instrument agent",
|
|
859
|
-
co_instrument_patient: "Co-instrument patient",
|
|
860
|
-
co_instrument_result: "Co-instrument result",
|
|
861
|
-
co_patient_agent: "Co-patient agent",
|
|
862
|
-
co_patient_instrument: "Co-patient instrument",
|
|
863
|
-
co_result_agent: "Co-result agent",
|
|
864
|
-
co_result_instrument: "Co-result instrument",
|
|
865
|
-
co_role: "Co-role",
|
|
866
|
-
diminutive: "Diminutive",
|
|
867
|
-
direction: "Direction",
|
|
868
|
-
domain_member_region: "Domain member region",
|
|
869
|
-
domain_member_topic: "Domain member topic",
|
|
870
|
-
domain_region: "Domain region",
|
|
871
|
-
domain_topic: "Domain topic",
|
|
872
|
-
entail: "Entail",
|
|
873
|
-
entails: "Entails",
|
|
874
|
-
eq_synonym: "Equivalent synonym",
|
|
875
|
-
exemplifies: "Exemplifies",
|
|
876
|
-
feminine: "Feminine",
|
|
877
|
-
has_augmentative: "Has augmentative",
|
|
878
|
-
has_diminutive: "Has diminutive",
|
|
879
|
-
has_domain_region: "Has domain region",
|
|
880
|
-
has_domain_topic: "Has domain topic",
|
|
881
|
-
has_feminine: "Has feminine",
|
|
882
|
-
has_masculine: "Has masculine",
|
|
883
|
-
has_young: "Has young",
|
|
884
|
-
holo_location: "Holonym location",
|
|
885
|
-
holo_member: "Member holonym",
|
|
886
|
-
holo_part: "Part holonym",
|
|
887
|
-
holo_portion: "Portion holonym",
|
|
888
|
-
holo_substance: "Substance holonym",
|
|
889
|
-
holonym: "Holonym",
|
|
890
|
-
hypernym: "Hypernym",
|
|
891
|
-
hyponym: "Hyponym",
|
|
892
|
-
in_manner: "In manner",
|
|
893
|
-
instance_hypernym: "Instance hypernym",
|
|
894
|
-
instance_hyponym: "Instance hyponym",
|
|
895
|
-
instrument: "Instrument",
|
|
896
|
-
involved: "Involved",
|
|
897
|
-
involved_agent: "Involved agent",
|
|
898
|
-
involved_direction: "Involved direction",
|
|
899
|
-
involved_instrument: "Involved instrument",
|
|
900
|
-
involved_location: "Involved location",
|
|
901
|
-
involved_patient: "Involved patient",
|
|
902
|
-
involved_result: "Involved result",
|
|
903
|
-
involved_source_direction: "Involved source direction",
|
|
904
|
-
involved_target_direction: "Involved target direction",
|
|
905
|
-
ir_synonym: "IR synonym",
|
|
906
|
-
is_caused_by: "Is caused by",
|
|
907
|
-
is_entailed_by: "Is entailed by",
|
|
908
|
-
is_exemplified_by: "Is exemplified by",
|
|
909
|
-
is_subevent_of: "Is subevent of",
|
|
910
|
-
location: "Location",
|
|
911
|
-
manner_of: "Manner of",
|
|
912
|
-
masculine: "Masculine",
|
|
913
|
-
member_holonym: "Member holonym",
|
|
914
|
-
member_meronym: "Member meronym",
|
|
915
|
-
mero_location: "Meronym location",
|
|
916
|
-
mero_member: "Member meronym",
|
|
917
|
-
mero_part: "Part meronym",
|
|
918
|
-
mero_portion: "Portion meronym",
|
|
919
|
-
mero_substance: "Substance meronym",
|
|
920
|
-
meronym: "Meronym",
|
|
921
|
-
other: "Other",
|
|
922
|
-
part_holonym: "Part holonym",
|
|
923
|
-
part_meronym: "Part meronym",
|
|
924
|
-
patient: "Patient",
|
|
925
|
-
restricted_by: "Restricted by",
|
|
926
|
-
restricts: "Restricts",
|
|
927
|
-
result: "Result",
|
|
928
|
-
role: "Role",
|
|
929
|
-
similar: "Similar",
|
|
930
|
-
source_direction: "Source direction",
|
|
931
|
-
state_of: "State of",
|
|
932
|
-
subevent: "Subevent",
|
|
933
|
-
substance_holonym: "Substance holonym",
|
|
934
|
-
substance_meronym: "Substance meronym",
|
|
935
|
-
target_direction: "Target direction",
|
|
936
|
-
young: "Young"
|
|
937
|
-
};
|
|
938
|
-
|
|
939
1077
|
// src/cli.ts
|
|
940
1078
|
var decode = (s) => decodeXmlEntities(s) ?? "";
|
|
941
1079
|
var HELP = `
|
|
@@ -952,6 +1090,7 @@ Commands:
|
|
|
952
1090
|
related <word> Show all relations for a word
|
|
953
1091
|
info <synset-id> Show details for a synset ID
|
|
954
1092
|
fetch Download WordNet data to cache
|
|
1093
|
+
export-sqlite <out> Export dictionary to SQLite database
|
|
955
1094
|
|
|
956
1095
|
Options:
|
|
957
1096
|
--file <path> Use a local WordNet XML file instead of cache
|
|
@@ -962,6 +1101,7 @@ Examples:
|
|
|
962
1101
|
synset synonyms happy
|
|
963
1102
|
synset related computer --file ./wordnet.xml
|
|
964
1103
|
synset fetch
|
|
1104
|
+
synset export-sqlite dictionary.db
|
|
965
1105
|
`;
|
|
966
1106
|
async function main() {
|
|
967
1107
|
const args = process.argv.slice(2);
|
|
@@ -983,6 +1123,24 @@ async function main() {
|
|
|
983
1123
|
console.log(`WordNet ${version} cached at: ${cachedPath}`);
|
|
984
1124
|
return;
|
|
985
1125
|
}
|
|
1126
|
+
if (command === "export-sqlite") {
|
|
1127
|
+
const outputPath = cleanArgs[1];
|
|
1128
|
+
if (!outputPath) {
|
|
1129
|
+
console.error("Error: Missing output path for export-sqlite");
|
|
1130
|
+
process.exit(1);
|
|
1131
|
+
}
|
|
1132
|
+
console.log("Loading WordNet data...");
|
|
1133
|
+
const lexicon2 = filePath ? await loadWordNet(filePath) : (await fetchWordNet({ onProgress: console.log })).lexicon;
|
|
1134
|
+
console.log(`Exporting to ${outputPath}...`);
|
|
1135
|
+
exportToSQLite(lexicon2, outputPath, {
|
|
1136
|
+
onProgress: ({ phase, current, total }) => {
|
|
1137
|
+
process.stdout.write(`\r${phase}: ${current}/${total}`);
|
|
1138
|
+
}
|
|
1139
|
+
});
|
|
1140
|
+
console.log(`
|
|
1141
|
+
Exported to ${outputPath}`);
|
|
1142
|
+
return;
|
|
1143
|
+
}
|
|
986
1144
|
if (!word && command !== "fetch") {
|
|
987
1145
|
console.error(`Error: Missing word argument for command '${command}'`);
|
|
988
1146
|
process.exit(1);
|
|
@@ -1066,7 +1224,7 @@ async function main() {
|
|
|
1066
1224
|
for (const [relType, words] of relsByType) {
|
|
1067
1225
|
const label = SynsetRelationRelType2[relType] || relType;
|
|
1068
1226
|
console.log(` ${label}:`);
|
|
1069
|
-
|
|
1227
|
+
for (const w of words) console.log(` - ${w}`);
|
|
1070
1228
|
}
|
|
1071
1229
|
}
|
|
1072
1230
|
break;
|
|
@@ -1085,11 +1243,12 @@ async function main() {
|
|
|
1085
1243
|
console.log(`ILI: ${synset.ili}`);
|
|
1086
1244
|
console.log(`
|
|
1087
1245
|
Definitions:`);
|
|
1088
|
-
synset.definitions
|
|
1246
|
+
for (const d of synset.definitions) console.log(` - ${decode(d.inner)}`);
|
|
1089
1247
|
if (synset.examples.length > 0) {
|
|
1090
1248
|
console.log(`
|
|
1091
1249
|
Examples:`);
|
|
1092
|
-
|
|
1250
|
+
for (const e of synset.examples)
|
|
1251
|
+
console.log(` - "${decode(e.inner)}"`);
|
|
1093
1252
|
}
|
|
1094
1253
|
if (synset.synsetRelations.length > 0) {
|
|
1095
1254
|
console.log(`
|