synset 0.9.0 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +54 -39
- package/dist/cli.cjs +534 -382
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +542 -383
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +301 -166
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +26 -7
- package/dist/index.d.ts +26 -7
- package/dist/index.js +307 -167
- package/dist/index.js.map +1 -1
- package/dist/schema.sql +22 -0
- package/package.json +10 -5
package/dist/cli.cjs
CHANGED
|
@@ -23,251 +23,8 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
|
|
|
23
23
|
mod
|
|
24
24
|
));
|
|
25
25
|
|
|
26
|
-
// src/
|
|
27
|
-
var
|
|
28
|
-
var import_node_path = __toESM(require("path"), 1);
|
|
29
|
-
|
|
30
|
-
// node_modules/@dbushell/xml-streamify/src/node.ts
|
|
31
|
-
var Node = class {
|
|
32
|
-
#type;
|
|
33
|
-
#children;
|
|
34
|
-
#parent;
|
|
35
|
-
#attr;
|
|
36
|
-
#raw;
|
|
37
|
-
constructor(type, parent, raw) {
|
|
38
|
-
this.#type = type;
|
|
39
|
-
this.#parent = parent;
|
|
40
|
-
this.#raw = raw;
|
|
41
|
-
this.#children = [];
|
|
42
|
-
}
|
|
43
|
-
get type() {
|
|
44
|
-
return this.#type;
|
|
45
|
-
}
|
|
46
|
-
get raw() {
|
|
47
|
-
return this.#raw ?? "";
|
|
48
|
-
}
|
|
49
|
-
get parent() {
|
|
50
|
-
return this.#parent;
|
|
51
|
-
}
|
|
52
|
-
get children() {
|
|
53
|
-
return this.#children;
|
|
54
|
-
}
|
|
55
|
-
get attributes() {
|
|
56
|
-
if (this.#attr) {
|
|
57
|
-
return this.#attr;
|
|
58
|
-
}
|
|
59
|
-
this.#attr = {};
|
|
60
|
-
if (this.raw) {
|
|
61
|
-
const regex = /([\w:.-]+)\s*=\s*(["'])(.*?)\2/g;
|
|
62
|
-
let match;
|
|
63
|
-
while ((match = regex.exec(this.raw)) !== null) {
|
|
64
|
-
this.#attr[match[1]] = match[3];
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
return this.#attr;
|
|
68
|
-
}
|
|
69
|
-
get innerText() {
|
|
70
|
-
if (this.children.length) {
|
|
71
|
-
let text = "";
|
|
72
|
-
for (const child of this.children) {
|
|
73
|
-
text += child.innerText;
|
|
74
|
-
}
|
|
75
|
-
return text;
|
|
76
|
-
}
|
|
77
|
-
return (this.raw.match(/<!\[CDATA\[(.*?)]]>/s) ?? [, this.raw])[1];
|
|
78
|
-
}
|
|
79
|
-
addChild(child) {
|
|
80
|
-
this.#children.push(child);
|
|
81
|
-
}
|
|
82
|
-
/**
|
|
83
|
-
* Returns true if node and parents match the key hierarchy
|
|
84
|
-
* @param keys - XML tag names
|
|
85
|
-
*/
|
|
86
|
-
is(...keys) {
|
|
87
|
-
if (!keys.length) return false;
|
|
88
|
-
let parent;
|
|
89
|
-
for (const key of keys.toReversed()) {
|
|
90
|
-
parent = parent ? parent.parent : this;
|
|
91
|
-
if (parent?.type !== key) {
|
|
92
|
-
return false;
|
|
93
|
-
}
|
|
94
|
-
}
|
|
95
|
-
return true;
|
|
96
|
-
}
|
|
97
|
-
/**
|
|
98
|
-
* Return the first child matching the key
|
|
99
|
-
* @param key - XML tag name
|
|
100
|
-
*/
|
|
101
|
-
first(key) {
|
|
102
|
-
return this.children.find((n) => n.type === key);
|
|
103
|
-
}
|
|
104
|
-
/**
|
|
105
|
-
* Return all children matching the key hierarchy
|
|
106
|
-
* @param keys - XML tag names
|
|
107
|
-
*/
|
|
108
|
-
all(...keys) {
|
|
109
|
-
let nodes = this.children;
|
|
110
|
-
let found = [];
|
|
111
|
-
for (const [i, k] of Object.entries(keys)) {
|
|
112
|
-
if (Number.parseInt(i) === keys.length - 1) {
|
|
113
|
-
found = nodes.filter((n) => n.type === k);
|
|
114
|
-
break;
|
|
115
|
-
}
|
|
116
|
-
nodes = nodes?.find((n) => n.type === k)?.children;
|
|
117
|
-
if (!nodes) return [];
|
|
118
|
-
}
|
|
119
|
-
return found;
|
|
120
|
-
}
|
|
121
|
-
};
|
|
122
|
-
|
|
123
|
-
// node_modules/@dbushell/xml-streamify/src/stream.ts
|
|
124
|
-
var ENTITIES = {
|
|
125
|
-
cdata: {
|
|
126
|
-
end: "]]>",
|
|
127
|
-
start: /^<!\[CDATA\[/
|
|
128
|
-
},
|
|
129
|
-
comment: {
|
|
130
|
-
end: "-->",
|
|
131
|
-
start: /^<!--/
|
|
132
|
-
},
|
|
133
|
-
declaration: {
|
|
134
|
-
end: "?>",
|
|
135
|
-
start: /^<\?/
|
|
136
|
-
},
|
|
137
|
-
doctype: {
|
|
138
|
-
end: ">",
|
|
139
|
-
start: /^<!DOCTYPE/i
|
|
140
|
-
},
|
|
141
|
-
element: {
|
|
142
|
-
end: ">",
|
|
143
|
-
start: /^<[\w:.-/]/
|
|
144
|
-
}
|
|
145
|
-
};
|
|
146
|
-
var transformer = {
|
|
147
|
-
buf: "",
|
|
148
|
-
state: "skip" /* SKIP */,
|
|
149
|
-
previous: ["skip" /* SKIP */, -1],
|
|
150
|
-
flush(controller) {
|
|
151
|
-
if (this.buf.length > 0) {
|
|
152
|
-
controller.enqueue(["text" /* TEXT */, this.buf]);
|
|
153
|
-
}
|
|
154
|
-
},
|
|
155
|
-
transform(chunk, controller) {
|
|
156
|
-
this.buf += chunk;
|
|
157
|
-
while (this.buf.length) {
|
|
158
|
-
if (this.state === this.previous[0] && this.buf.length === this.previous[1]) {
|
|
159
|
-
break;
|
|
160
|
-
}
|
|
161
|
-
this.previous = [this.state, this.buf.length];
|
|
162
|
-
if (this.state === "skip" /* SKIP */) {
|
|
163
|
-
const index = this.buf.indexOf("<");
|
|
164
|
-
if (index < 0) break;
|
|
165
|
-
controller.enqueue(["text" /* TEXT */, this.buf.substring(0, index)]);
|
|
166
|
-
this.buf = this.buf.substring(index);
|
|
167
|
-
this.state = "search" /* SEARCH */;
|
|
168
|
-
}
|
|
169
|
-
if (this.state === "search" /* SEARCH */) {
|
|
170
|
-
if (this.buf.length < 3) break;
|
|
171
|
-
for (const [state, entity] of Object.entries(ENTITIES)) {
|
|
172
|
-
if (this.buf.match(entity.start)) {
|
|
173
|
-
this.state = state;
|
|
174
|
-
break;
|
|
175
|
-
}
|
|
176
|
-
}
|
|
177
|
-
continue;
|
|
178
|
-
}
|
|
179
|
-
if (Object.hasOwn(ENTITIES, this.state)) {
|
|
180
|
-
const { end } = ENTITIES[this.state];
|
|
181
|
-
const index = this.buf.indexOf(end);
|
|
182
|
-
if (index < 0) break;
|
|
183
|
-
controller.enqueue([
|
|
184
|
-
this.state,
|
|
185
|
-
this.buf.substring(0, index + end.length)
|
|
186
|
-
]);
|
|
187
|
-
this.buf = this.buf.substring(index + end.length);
|
|
188
|
-
this.state = "skip" /* SKIP */;
|
|
189
|
-
continue;
|
|
190
|
-
}
|
|
191
|
-
throw new Error();
|
|
192
|
-
}
|
|
193
|
-
}
|
|
194
|
-
};
|
|
195
|
-
var XMLStream = class extends TransformStream {
|
|
196
|
-
constructor() {
|
|
197
|
-
super({ ...transformer });
|
|
198
|
-
}
|
|
199
|
-
};
|
|
200
|
-
|
|
201
|
-
// node_modules/@dbushell/xml-streamify/src/parse.ts
|
|
202
|
-
var ignoreTypes = {
|
|
203
|
-
["comment" /* COMMENT */]: "ignoreComments",
|
|
204
|
-
["declaration" /* DECLARATION */]: "ignoreDeclaration",
|
|
205
|
-
["doctype" /* DOCTYPE */]: "ignoreDoctype"
|
|
206
|
-
};
|
|
207
|
-
async function* parse(input, options) {
|
|
208
|
-
const document = new Node("@document");
|
|
209
|
-
try {
|
|
210
|
-
const init = { ...options?.fetchOptions };
|
|
211
|
-
if (options?.signal) {
|
|
212
|
-
init.signal = options.signal;
|
|
213
|
-
}
|
|
214
|
-
let source;
|
|
215
|
-
if (typeof input === "string" || input instanceof URL) {
|
|
216
|
-
input = new URL(input);
|
|
217
|
-
const response = await fetch(input, init);
|
|
218
|
-
if (!response.ok || !response.body) {
|
|
219
|
-
throw new Error(`Bad response`);
|
|
220
|
-
}
|
|
221
|
-
source = response.body;
|
|
222
|
-
} else {
|
|
223
|
-
source = input;
|
|
224
|
-
}
|
|
225
|
-
const stream = source.pipeThrough(new TextDecoderStream()).pipeThrough(new XMLStream(), {
|
|
226
|
-
signal: options?.signal
|
|
227
|
-
});
|
|
228
|
-
let node = document;
|
|
229
|
-
for await (const [type, value] of stream) {
|
|
230
|
-
if (options?.signal?.aborted) {
|
|
231
|
-
break;
|
|
232
|
-
}
|
|
233
|
-
if (type === "text" /* TEXT */) {
|
|
234
|
-
if (options?.ignoreWhitespace !== false && value.trim().length === 0) {
|
|
235
|
-
continue;
|
|
236
|
-
}
|
|
237
|
-
}
|
|
238
|
-
if (type in ignoreTypes && options?.[ignoreTypes[type]] === false) {
|
|
239
|
-
const newNode = new Node(type, node, value);
|
|
240
|
-
node.addChild(newNode);
|
|
241
|
-
yield newNode;
|
|
242
|
-
continue;
|
|
243
|
-
}
|
|
244
|
-
if (type === "element" /* ELEMENT */) {
|
|
245
|
-
const name = value.match(/<\/?([\w:.-]+)/)[1];
|
|
246
|
-
if (value.endsWith("/>")) {
|
|
247
|
-
const newNode2 = new Node(name, node, value);
|
|
248
|
-
node.addChild(newNode2);
|
|
249
|
-
yield newNode2;
|
|
250
|
-
continue;
|
|
251
|
-
}
|
|
252
|
-
if (value.startsWith("</")) {
|
|
253
|
-
yield node;
|
|
254
|
-
node = node.parent;
|
|
255
|
-
continue;
|
|
256
|
-
}
|
|
257
|
-
const newNode = new Node(name, node, value);
|
|
258
|
-
node.addChild(newNode);
|
|
259
|
-
node = newNode;
|
|
260
|
-
continue;
|
|
261
|
-
}
|
|
262
|
-
node.addChild(new Node(type, node, value));
|
|
263
|
-
}
|
|
264
|
-
} catch (err) {
|
|
265
|
-
if (options?.silent === false) {
|
|
266
|
-
throw err;
|
|
267
|
-
}
|
|
268
|
-
}
|
|
269
|
-
return document;
|
|
270
|
-
}
|
|
26
|
+
// src/export-sqlite.ts
|
|
27
|
+
var import_bun_sqlite = require("bun:sqlite");
|
|
271
28
|
|
|
272
29
|
// src/types.ts
|
|
273
30
|
var import_zod = require("zod");
|
|
@@ -434,7 +191,9 @@ var Lexicon = import_zod.z.object({
|
|
|
434
191
|
synsets: import_zod.z.array(Synset).min(0),
|
|
435
192
|
syntacticBehaviors: import_zod.z.array(SyntacticBehavior).min(0)
|
|
436
193
|
});
|
|
437
|
-
var partsOfSpeechList = PartsOfSpeech.options.map(
|
|
194
|
+
var partsOfSpeechList = PartsOfSpeech.options.map(
|
|
195
|
+
(v) => v.value
|
|
196
|
+
);
|
|
438
197
|
|
|
439
198
|
// src/helpers.ts
|
|
440
199
|
function PronunciationNode(node) {
|
|
@@ -462,7 +221,7 @@ function SenseRelationNode(node) {
|
|
|
462
221
|
dcType: optAttr(node, "dc:type")
|
|
463
222
|
};
|
|
464
223
|
return SenseRelation.parse(
|
|
465
|
-
extendWithRestAttr(node, obj, (s) => s
|
|
224
|
+
extendWithRestAttr(node, obj, (s) => s === "dc:type" ? "dcType" : s)
|
|
466
225
|
);
|
|
467
226
|
}
|
|
468
227
|
function SenseNode(node) {
|
|
@@ -478,7 +237,7 @@ function SenseNode(node) {
|
|
|
478
237
|
extendWithRestAttr(
|
|
479
238
|
node,
|
|
480
239
|
obj,
|
|
481
|
-
(s) => s
|
|
240
|
+
(s) => s === "subcat" ? "subCat" : s === "adjposition" ? "adjPosition" : s
|
|
482
241
|
)
|
|
483
242
|
);
|
|
484
243
|
}
|
|
@@ -509,7 +268,7 @@ function ExampleNode(node) {
|
|
|
509
268
|
dcSource: optAttr(node, "dc:source")
|
|
510
269
|
};
|
|
511
270
|
return Example.parse(
|
|
512
|
-
extendWithRestAttr(node, obj, (s) => s
|
|
271
|
+
extendWithRestAttr(node, obj, (s) => s === "dc:source" ? "dcSource" : s)
|
|
513
272
|
);
|
|
514
273
|
}
|
|
515
274
|
function ILIDefinitionNode(node) {
|
|
@@ -546,7 +305,7 @@ function SynsetNode(node) {
|
|
|
546
305
|
synsetRelations: children(node, "SynsetRelation", SynsetRelationNode)
|
|
547
306
|
};
|
|
548
307
|
return Synset.parse(
|
|
549
|
-
extendWithRestAttr(node, obj, (s) => s
|
|
308
|
+
extendWithRestAttr(node, obj, (s) => s === "dc:source" ? "dcSource" : s)
|
|
550
309
|
);
|
|
551
310
|
}
|
|
552
311
|
function LexiconNode(node) {
|
|
@@ -575,7 +334,9 @@ var decodeXmlEntities = (s) => {
|
|
|
575
334
|
var attr = (node, attrName) => {
|
|
576
335
|
const value = decodeXmlEntities(node.attributes[attrName]);
|
|
577
336
|
if (value === void 0) {
|
|
578
|
-
throw new Error(
|
|
337
|
+
throw new Error(
|
|
338
|
+
`Missing required attribute "${attrName}" on node "${node.type}"`
|
|
339
|
+
);
|
|
579
340
|
}
|
|
580
341
|
return value;
|
|
581
342
|
};
|
|
@@ -593,20 +354,497 @@ var extendWithRestAttr = (node, obj, proxy) => {
|
|
|
593
354
|
return Object.assign(obj, restAttrs(node, obj, proxy));
|
|
594
355
|
};
|
|
595
356
|
var children = (node, type, fn) => {
|
|
596
|
-
return node.children.filter((v) => v.type
|
|
357
|
+
return node.children.filter((v) => v.type === type).map((v) => fn(v));
|
|
597
358
|
};
|
|
598
359
|
|
|
599
|
-
// src/
|
|
600
|
-
var
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
360
|
+
// src/export-sqlite.ts
|
|
361
|
+
var SCHEMA = `
|
|
362
|
+
CREATE TABLE IF NOT EXISTS words (
|
|
363
|
+
id INTEGER PRIMARY KEY,
|
|
364
|
+
word TEXT NOT NULL,
|
|
365
|
+
word_display TEXT NOT NULL
|
|
366
|
+
);
|
|
367
|
+
CREATE INDEX IF NOT EXISTS idx_words_word ON words(word);
|
|
368
|
+
|
|
369
|
+
CREATE TABLE IF NOT EXISTS synsets (
|
|
370
|
+
id TEXT PRIMARY KEY,
|
|
371
|
+
pos TEXT NOT NULL,
|
|
372
|
+
definition TEXT NOT NULL
|
|
373
|
+
);
|
|
374
|
+
|
|
375
|
+
CREATE TABLE IF NOT EXISTS word_synsets (
|
|
376
|
+
word_id INTEGER NOT NULL,
|
|
377
|
+
synset_id TEXT NOT NULL,
|
|
378
|
+
PRIMARY KEY (word_id, synset_id)
|
|
379
|
+
);
|
|
380
|
+
CREATE INDEX IF NOT EXISTS idx_ws_word ON word_synsets(word_id);
|
|
381
|
+
`;
|
|
382
|
+
function exportToSQLite(lexicon, outputPath, options = {}) {
|
|
383
|
+
const { onProgress } = options;
|
|
384
|
+
const db = new import_bun_sqlite.Database(outputPath, { create: true });
|
|
385
|
+
db.exec("PRAGMA journal_mode = OFF");
|
|
386
|
+
db.exec("PRAGMA synchronous = OFF");
|
|
387
|
+
db.exec(SCHEMA);
|
|
388
|
+
const wordToEntries = /* @__PURE__ */ new Map();
|
|
389
|
+
for (const entry of lexicon.lexicalEntries) {
|
|
390
|
+
const word = entry.lemmas[0]?.writtenForm;
|
|
391
|
+
if (word) {
|
|
392
|
+
const lower = word.toLowerCase();
|
|
393
|
+
const existing = wordToEntries.get(lower) || [];
|
|
394
|
+
existing.push(entry);
|
|
395
|
+
wordToEntries.set(lower, existing);
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
const synsetMap = /* @__PURE__ */ new Map();
|
|
399
|
+
for (const synset of lexicon.synsets) {
|
|
400
|
+
synsetMap.set(synset.id, synset);
|
|
401
|
+
}
|
|
402
|
+
const insertWord = db.prepare(
|
|
403
|
+
"INSERT INTO words (word, word_display) VALUES (?, ?)"
|
|
404
|
+
);
|
|
405
|
+
const wordIds = /* @__PURE__ */ new Map();
|
|
406
|
+
const words = Array.from(wordToEntries.keys()).sort();
|
|
407
|
+
const totalWords = words.length;
|
|
408
|
+
db.exec("BEGIN TRANSACTION");
|
|
409
|
+
let wordId = 0;
|
|
410
|
+
for (let i = 0; i < words.length; i++) {
|
|
411
|
+
const word = words[i];
|
|
412
|
+
const entries = wordToEntries.get(word);
|
|
413
|
+
if (!entries) continue;
|
|
414
|
+
const display = entries[0].lemmas[0]?.writtenForm || word;
|
|
415
|
+
insertWord.run(word, display);
|
|
416
|
+
wordId++;
|
|
417
|
+
wordIds.set(word, wordId);
|
|
418
|
+
if (onProgress && i % 1e4 === 0) {
|
|
419
|
+
onProgress({ phase: "words", current: i, total: totalWords });
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
db.exec("COMMIT");
|
|
423
|
+
const usedSynsetIds = /* @__PURE__ */ new Set();
|
|
424
|
+
for (const entries of wordToEntries.values()) {
|
|
425
|
+
for (const entry of entries) {
|
|
426
|
+
for (const sense of entry.senses) {
|
|
427
|
+
usedSynsetIds.add(sense.synset);
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
const insertSynset = db.prepare(
|
|
432
|
+
"INSERT OR IGNORE INTO synsets (id, pos, definition) VALUES (?, ?, ?)"
|
|
433
|
+
);
|
|
434
|
+
const synsetList = Array.from(usedSynsetIds);
|
|
435
|
+
const totalSynsets = synsetList.length;
|
|
436
|
+
db.exec("BEGIN TRANSACTION");
|
|
437
|
+
for (let i = 0; i < synsetList.length; i++) {
|
|
438
|
+
const synsetId = synsetList[i];
|
|
439
|
+
const synset = synsetMap.get(synsetId);
|
|
440
|
+
if (synset) {
|
|
441
|
+
const def = decodeXmlEntities(synset.definitions[0]?.inner) || "";
|
|
442
|
+
insertSynset.run(synsetId, synset.partOfSpeech, def);
|
|
443
|
+
}
|
|
444
|
+
if (onProgress && i % 1e4 === 0) {
|
|
445
|
+
onProgress({ phase: "synsets", current: i, total: totalSynsets });
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
db.exec("COMMIT");
|
|
449
|
+
const insertRelation = db.prepare(
|
|
450
|
+
"INSERT OR IGNORE INTO word_synsets (word_id, synset_id) VALUES (?, ?)"
|
|
451
|
+
);
|
|
452
|
+
let relationCount = 0;
|
|
453
|
+
const totalRelations = Array.from(wordToEntries.values()).reduce(
|
|
454
|
+
(sum, entries) => sum + entries.reduce((s, e) => s + e.senses.length, 0),
|
|
455
|
+
0
|
|
456
|
+
);
|
|
457
|
+
db.exec("BEGIN TRANSACTION");
|
|
458
|
+
for (const [word, entries] of wordToEntries) {
|
|
459
|
+
const wordId2 = wordIds.get(word);
|
|
460
|
+
if (!wordId2) continue;
|
|
461
|
+
for (const entry of entries) {
|
|
462
|
+
for (const sense of entry.senses) {
|
|
463
|
+
insertRelation.run(wordId2, sense.synset);
|
|
464
|
+
relationCount++;
|
|
465
|
+
if (onProgress && relationCount % 1e4 === 0) {
|
|
466
|
+
onProgress({
|
|
467
|
+
phase: "relations",
|
|
468
|
+
current: relationCount,
|
|
469
|
+
total: totalRelations
|
|
470
|
+
});
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
db.exec("COMMIT");
|
|
476
|
+
db.close();
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
// src/literals.ts
|
|
480
|
+
var PartsOfSpeech2 = {
|
|
481
|
+
n: "Noun",
|
|
482
|
+
v: "Verb",
|
|
483
|
+
a: "Adjective",
|
|
484
|
+
r: "Adverb",
|
|
485
|
+
s: "Adjective Satellite",
|
|
486
|
+
t: "?",
|
|
487
|
+
c: "Conjunction",
|
|
488
|
+
p: "Adposition (Preposition, postposition, etc.)",
|
|
489
|
+
x: "Other (inc. particle, classifier, bound morphemes, determiners)",
|
|
490
|
+
u: "Unknown"
|
|
491
|
+
};
|
|
492
|
+
var SynsetRelationRelType2 = {
|
|
493
|
+
agent: "Agent",
|
|
494
|
+
also: "See also",
|
|
495
|
+
anto_converse: "Converse antonym",
|
|
496
|
+
anto_gradable: "Gradable antonym",
|
|
497
|
+
anto_simple: "Simple antonym",
|
|
498
|
+
antonym: "Antonym",
|
|
499
|
+
attribute: "Attribute",
|
|
500
|
+
augmentative: "Augmentative",
|
|
501
|
+
be_in_state: "Be in state",
|
|
502
|
+
cause: "Cause",
|
|
503
|
+
causes: "Causes",
|
|
504
|
+
classified_by: "Classified by",
|
|
505
|
+
classifies: "Classifies",
|
|
506
|
+
co_agent_instrument: "Co-agent instrument",
|
|
507
|
+
co_agent_patient: "Co-agent patient",
|
|
508
|
+
co_agent_result: "Co-agent result",
|
|
509
|
+
co_instrument_agent: "Co-instrument agent",
|
|
510
|
+
co_instrument_patient: "Co-instrument patient",
|
|
511
|
+
co_instrument_result: "Co-instrument result",
|
|
512
|
+
co_patient_agent: "Co-patient agent",
|
|
513
|
+
co_patient_instrument: "Co-patient instrument",
|
|
514
|
+
co_result_agent: "Co-result agent",
|
|
515
|
+
co_result_instrument: "Co-result instrument",
|
|
516
|
+
co_role: "Co-role",
|
|
517
|
+
diminutive: "Diminutive",
|
|
518
|
+
direction: "Direction",
|
|
519
|
+
domain_member_region: "Domain member region",
|
|
520
|
+
domain_member_topic: "Domain member topic",
|
|
521
|
+
domain_region: "Domain region",
|
|
522
|
+
domain_topic: "Domain topic",
|
|
523
|
+
entail: "Entail",
|
|
524
|
+
entails: "Entails",
|
|
525
|
+
eq_synonym: "Equivalent synonym",
|
|
526
|
+
exemplifies: "Exemplifies",
|
|
527
|
+
feminine: "Feminine",
|
|
528
|
+
has_augmentative: "Has augmentative",
|
|
529
|
+
has_diminutive: "Has diminutive",
|
|
530
|
+
has_domain_region: "Has domain region",
|
|
531
|
+
has_domain_topic: "Has domain topic",
|
|
532
|
+
has_feminine: "Has feminine",
|
|
533
|
+
has_masculine: "Has masculine",
|
|
534
|
+
has_young: "Has young",
|
|
535
|
+
holo_location: "Holonym location",
|
|
536
|
+
holo_member: "Member holonym",
|
|
537
|
+
holo_part: "Part holonym",
|
|
538
|
+
holo_portion: "Portion holonym",
|
|
539
|
+
holo_substance: "Substance holonym",
|
|
540
|
+
holonym: "Holonym",
|
|
541
|
+
hypernym: "Hypernym",
|
|
542
|
+
hyponym: "Hyponym",
|
|
543
|
+
in_manner: "In manner",
|
|
544
|
+
instance_hypernym: "Instance hypernym",
|
|
545
|
+
instance_hyponym: "Instance hyponym",
|
|
546
|
+
instrument: "Instrument",
|
|
547
|
+
involved: "Involved",
|
|
548
|
+
involved_agent: "Involved agent",
|
|
549
|
+
involved_direction: "Involved direction",
|
|
550
|
+
involved_instrument: "Involved instrument",
|
|
551
|
+
involved_location: "Involved location",
|
|
552
|
+
involved_patient: "Involved patient",
|
|
553
|
+
involved_result: "Involved result",
|
|
554
|
+
involved_source_direction: "Involved source direction",
|
|
555
|
+
involved_target_direction: "Involved target direction",
|
|
556
|
+
ir_synonym: "IR synonym",
|
|
557
|
+
is_caused_by: "Is caused by",
|
|
558
|
+
is_entailed_by: "Is entailed by",
|
|
559
|
+
is_exemplified_by: "Is exemplified by",
|
|
560
|
+
is_subevent_of: "Is subevent of",
|
|
561
|
+
location: "Location",
|
|
562
|
+
manner_of: "Manner of",
|
|
563
|
+
masculine: "Masculine",
|
|
564
|
+
member_holonym: "Member holonym",
|
|
565
|
+
member_meronym: "Member meronym",
|
|
566
|
+
mero_location: "Meronym location",
|
|
567
|
+
mero_member: "Member meronym",
|
|
568
|
+
mero_part: "Part meronym",
|
|
569
|
+
mero_portion: "Portion meronym",
|
|
570
|
+
mero_substance: "Substance meronym",
|
|
571
|
+
meronym: "Meronym",
|
|
572
|
+
other: "Other",
|
|
573
|
+
part_holonym: "Part holonym",
|
|
574
|
+
part_meronym: "Part meronym",
|
|
575
|
+
patient: "Patient",
|
|
576
|
+
restricted_by: "Restricted by",
|
|
577
|
+
restricts: "Restricts",
|
|
578
|
+
result: "Result",
|
|
579
|
+
role: "Role",
|
|
580
|
+
similar: "Similar",
|
|
581
|
+
source_direction: "Source direction",
|
|
582
|
+
state_of: "State of",
|
|
583
|
+
subevent: "Subevent",
|
|
584
|
+
substance_holonym: "Substance holonym",
|
|
585
|
+
substance_meronym: "Substance meronym",
|
|
586
|
+
target_direction: "Target direction",
|
|
587
|
+
young: "Young"
|
|
588
|
+
};
|
|
589
|
+
|
|
590
|
+
// src/loader.ts
|
|
591
|
+
var import_node_fs = require("fs");
|
|
592
|
+
var import_node_path = __toESM(require("path"), 1);
|
|
593
|
+
var import_node_stream = require("stream");
|
|
594
|
+
|
|
595
|
+
// node_modules/@dbushell/xml-streamify/src/node.ts
|
|
596
|
+
var Node = class {
|
|
597
|
+
#type;
|
|
598
|
+
#children;
|
|
599
|
+
#parent;
|
|
600
|
+
#attr;
|
|
601
|
+
#raw;
|
|
602
|
+
constructor(type, parent, raw) {
|
|
603
|
+
this.#type = type;
|
|
604
|
+
this.#parent = parent;
|
|
605
|
+
this.#raw = raw;
|
|
606
|
+
this.#children = [];
|
|
607
|
+
}
|
|
608
|
+
get type() {
|
|
609
|
+
return this.#type;
|
|
610
|
+
}
|
|
611
|
+
get raw() {
|
|
612
|
+
return this.#raw ?? "";
|
|
613
|
+
}
|
|
614
|
+
get parent() {
|
|
615
|
+
return this.#parent;
|
|
616
|
+
}
|
|
617
|
+
get children() {
|
|
618
|
+
return this.#children;
|
|
619
|
+
}
|
|
620
|
+
get attributes() {
|
|
621
|
+
if (this.#attr) {
|
|
622
|
+
return this.#attr;
|
|
623
|
+
}
|
|
624
|
+
this.#attr = {};
|
|
625
|
+
if (this.raw) {
|
|
626
|
+
const regex = /([\w:.-]+)\s*=\s*(["'])(.*?)\2/g;
|
|
627
|
+
let match;
|
|
628
|
+
while ((match = regex.exec(this.raw)) !== null) {
|
|
629
|
+
this.#attr[match[1]] = match[3];
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
return this.#attr;
|
|
633
|
+
}
|
|
634
|
+
get innerText() {
|
|
635
|
+
if (this.children.length) {
|
|
636
|
+
let text = "";
|
|
637
|
+
for (const child of this.children) {
|
|
638
|
+
text += child.innerText;
|
|
639
|
+
}
|
|
640
|
+
return text;
|
|
641
|
+
}
|
|
642
|
+
return (this.raw.match(/<!\[CDATA\[(.*?)]]>/s) ?? [, this.raw])[1];
|
|
643
|
+
}
|
|
644
|
+
addChild(child) {
|
|
645
|
+
this.#children.push(child);
|
|
646
|
+
}
|
|
647
|
+
/**
|
|
648
|
+
* Returns true if node and parents match the key hierarchy
|
|
649
|
+
* @param keys - XML tag names
|
|
650
|
+
*/
|
|
651
|
+
is(...keys) {
|
|
652
|
+
if (!keys.length) return false;
|
|
653
|
+
let parent;
|
|
654
|
+
for (const key of keys.toReversed()) {
|
|
655
|
+
parent = parent ? parent.parent : this;
|
|
656
|
+
if (parent?.type !== key) {
|
|
657
|
+
return false;
|
|
658
|
+
}
|
|
659
|
+
}
|
|
660
|
+
return true;
|
|
661
|
+
}
|
|
662
|
+
/**
|
|
663
|
+
* Return the first child matching the key
|
|
664
|
+
* @param key - XML tag name
|
|
665
|
+
*/
|
|
666
|
+
first(key) {
|
|
667
|
+
return this.children.find((n) => n.type === key);
|
|
668
|
+
}
|
|
669
|
+
/**
|
|
670
|
+
* Return all children matching the key hierarchy
|
|
671
|
+
* @param keys - XML tag names
|
|
672
|
+
*/
|
|
673
|
+
all(...keys) {
|
|
674
|
+
let nodes = this.children;
|
|
675
|
+
let found = [];
|
|
676
|
+
for (const [i, k] of Object.entries(keys)) {
|
|
677
|
+
if (Number.parseInt(i) === keys.length - 1) {
|
|
678
|
+
found = nodes.filter((n) => n.type === k);
|
|
679
|
+
break;
|
|
680
|
+
}
|
|
681
|
+
nodes = nodes?.find((n) => n.type === k)?.children;
|
|
682
|
+
if (!nodes) return [];
|
|
683
|
+
}
|
|
684
|
+
return found;
|
|
685
|
+
}
|
|
686
|
+
};
|
|
687
|
+
|
|
688
|
+
// node_modules/@dbushell/xml-streamify/src/stream.ts
|
|
689
|
+
var ENTITIES = {
|
|
690
|
+
cdata: {
|
|
691
|
+
end: "]]>",
|
|
692
|
+
start: /^<!\[CDATA\[/
|
|
693
|
+
},
|
|
694
|
+
comment: {
|
|
695
|
+
end: "-->",
|
|
696
|
+
start: /^<!--/
|
|
697
|
+
},
|
|
698
|
+
declaration: {
|
|
699
|
+
end: "?>",
|
|
700
|
+
start: /^<\?/
|
|
701
|
+
},
|
|
702
|
+
doctype: {
|
|
703
|
+
end: ">",
|
|
704
|
+
start: /^<!DOCTYPE/i
|
|
705
|
+
},
|
|
706
|
+
element: {
|
|
707
|
+
end: ">",
|
|
708
|
+
start: /^<[\w:.-/]/
|
|
709
|
+
}
|
|
710
|
+
};
|
|
711
|
+
var transformer = {
|
|
712
|
+
buf: "",
|
|
713
|
+
state: "skip" /* SKIP */,
|
|
714
|
+
previous: ["skip" /* SKIP */, -1],
|
|
715
|
+
flush(controller) {
|
|
716
|
+
if (this.buf.length > 0) {
|
|
717
|
+
controller.enqueue(["text" /* TEXT */, this.buf]);
|
|
718
|
+
}
|
|
719
|
+
},
|
|
720
|
+
transform(chunk, controller) {
|
|
721
|
+
this.buf += chunk;
|
|
722
|
+
while (this.buf.length) {
|
|
723
|
+
if (this.state === this.previous[0] && this.buf.length === this.previous[1]) {
|
|
724
|
+
break;
|
|
725
|
+
}
|
|
726
|
+
this.previous = [this.state, this.buf.length];
|
|
727
|
+
if (this.state === "skip" /* SKIP */) {
|
|
728
|
+
const index = this.buf.indexOf("<");
|
|
729
|
+
if (index < 0) break;
|
|
730
|
+
controller.enqueue(["text" /* TEXT */, this.buf.substring(0, index)]);
|
|
731
|
+
this.buf = this.buf.substring(index);
|
|
732
|
+
this.state = "search" /* SEARCH */;
|
|
733
|
+
}
|
|
734
|
+
if (this.state === "search" /* SEARCH */) {
|
|
735
|
+
if (this.buf.length < 3) break;
|
|
736
|
+
for (const [state, entity] of Object.entries(ENTITIES)) {
|
|
737
|
+
if (this.buf.match(entity.start)) {
|
|
738
|
+
this.state = state;
|
|
739
|
+
break;
|
|
740
|
+
}
|
|
741
|
+
}
|
|
742
|
+
continue;
|
|
743
|
+
}
|
|
744
|
+
if (Object.hasOwn(ENTITIES, this.state)) {
|
|
745
|
+
const { end } = ENTITIES[this.state];
|
|
746
|
+
const index = this.buf.indexOf(end);
|
|
747
|
+
if (index < 0) break;
|
|
748
|
+
controller.enqueue([
|
|
749
|
+
this.state,
|
|
750
|
+
this.buf.substring(0, index + end.length)
|
|
751
|
+
]);
|
|
752
|
+
this.buf = this.buf.substring(index + end.length);
|
|
753
|
+
this.state = "skip" /* SKIP */;
|
|
754
|
+
continue;
|
|
755
|
+
}
|
|
756
|
+
throw new Error();
|
|
757
|
+
}
|
|
758
|
+
}
|
|
759
|
+
};
|
|
760
|
+
var XMLStream = class extends TransformStream {
|
|
761
|
+
constructor() {
|
|
762
|
+
super({ ...transformer });
|
|
763
|
+
}
|
|
764
|
+
};
|
|
765
|
+
|
|
766
|
+
// node_modules/@dbushell/xml-streamify/src/parse.ts
|
|
767
|
+
var ignoreTypes = {
|
|
768
|
+
["comment" /* COMMENT */]: "ignoreComments",
|
|
769
|
+
["declaration" /* DECLARATION */]: "ignoreDeclaration",
|
|
770
|
+
["doctype" /* DOCTYPE */]: "ignoreDoctype"
|
|
771
|
+
};
|
|
772
|
+
async function* parse(input, options) {
|
|
773
|
+
const document = new Node("@document");
|
|
774
|
+
try {
|
|
775
|
+
const init = { ...options?.fetchOptions };
|
|
776
|
+
if (options?.signal) {
|
|
777
|
+
init.signal = options.signal;
|
|
778
|
+
}
|
|
779
|
+
let source;
|
|
780
|
+
if (typeof input === "string" || input instanceof URL) {
|
|
781
|
+
input = new URL(input);
|
|
782
|
+
const response = await fetch(input, init);
|
|
783
|
+
if (!response.ok || !response.body) {
|
|
784
|
+
throw new Error(`Bad response`);
|
|
785
|
+
}
|
|
786
|
+
source = response.body;
|
|
787
|
+
} else {
|
|
788
|
+
source = input;
|
|
789
|
+
}
|
|
790
|
+
const stream = source.pipeThrough(new TextDecoderStream()).pipeThrough(new XMLStream(), {
|
|
791
|
+
signal: options?.signal
|
|
792
|
+
});
|
|
793
|
+
let node = document;
|
|
794
|
+
for await (const [type, value] of stream) {
|
|
795
|
+
if (options?.signal?.aborted) {
|
|
796
|
+
break;
|
|
797
|
+
}
|
|
798
|
+
if (type === "text" /* TEXT */) {
|
|
799
|
+
if (options?.ignoreWhitespace !== false && value.trim().length === 0) {
|
|
800
|
+
continue;
|
|
801
|
+
}
|
|
802
|
+
}
|
|
803
|
+
if (type in ignoreTypes && options?.[ignoreTypes[type]] === false) {
|
|
804
|
+
const newNode = new Node(type, node, value);
|
|
805
|
+
node.addChild(newNode);
|
|
806
|
+
yield newNode;
|
|
807
|
+
continue;
|
|
808
|
+
}
|
|
809
|
+
if (type === "element" /* ELEMENT */) {
|
|
810
|
+
const name = value.match(/<\/?([\w:.-]+)/)[1];
|
|
811
|
+
if (value.endsWith("/>")) {
|
|
812
|
+
const newNode2 = new Node(name, node, value);
|
|
813
|
+
node.addChild(newNode2);
|
|
814
|
+
yield newNode2;
|
|
815
|
+
continue;
|
|
816
|
+
}
|
|
817
|
+
if (value.startsWith("</")) {
|
|
818
|
+
yield node;
|
|
819
|
+
node = node.parent;
|
|
820
|
+
continue;
|
|
821
|
+
}
|
|
822
|
+
const newNode = new Node(name, node, value);
|
|
823
|
+
node.addChild(newNode);
|
|
824
|
+
node = newNode;
|
|
825
|
+
continue;
|
|
826
|
+
}
|
|
827
|
+
node.addChild(new Node(type, node, value));
|
|
828
|
+
}
|
|
829
|
+
} catch (err) {
|
|
830
|
+
if (options?.silent === false) {
|
|
831
|
+
throw err;
|
|
832
|
+
}
|
|
833
|
+
}
|
|
834
|
+
return document;
|
|
835
|
+
}
|
|
836
|
+
|
|
837
|
+
// src/loader.ts
|
|
838
|
+
var BASE_VERSION = "2024";
|
|
839
|
+
function getFilename(version) {
|
|
840
|
+
return `english-wordnet-${version}.xml`;
|
|
841
|
+
}
|
|
842
|
+
function getDownloadUrl(version) {
|
|
843
|
+
return `https://en-word.net/static/${getFilename(version)}.gz`;
|
|
844
|
+
}
|
|
845
|
+
function getDefaultCacheDir() {
|
|
846
|
+
const homeDir = process.env.HOME || process.env.USERPROFILE || ".";
|
|
847
|
+
return import_node_path.default.join(homeDir, ".cache", "synset");
|
|
610
848
|
}
|
|
611
849
|
function fileExists(filePath) {
|
|
612
850
|
if ((0, import_node_fs.existsSync)(filePath)) {
|
|
@@ -661,7 +899,6 @@ async function findLatestVersion(onProgress, cacheDir) {
|
|
|
661
899
|
for (let year = baseYear + 1; year <= lastReleasableYear; year++) {
|
|
662
900
|
const version = year.toString();
|
|
663
901
|
if (await urlExists(getDownloadUrl(version))) {
|
|
664
|
-
continue;
|
|
665
902
|
} else {
|
|
666
903
|
return (year - 1).toString();
|
|
667
904
|
}
|
|
@@ -682,9 +919,13 @@ async function downloadWordNet(version, destPath) {
|
|
|
682
919
|
const url = getDownloadUrl(version);
|
|
683
920
|
const response = await fetch(url);
|
|
684
921
|
if (!response.ok || !response.body) {
|
|
685
|
-
throw new Error(
|
|
922
|
+
throw new Error(
|
|
923
|
+
`Failed to download WordNet ${version}: ${response.statusText}`
|
|
924
|
+
);
|
|
686
925
|
}
|
|
687
|
-
const decompressed = response.body.pipeThrough(
|
|
926
|
+
const decompressed = response.body.pipeThrough(
|
|
927
|
+
new DecompressionStream("gzip")
|
|
928
|
+
);
|
|
688
929
|
const arrayBuffer = await new Response(decompressed).arrayBuffer();
|
|
689
930
|
const dir = import_node_path.default.dirname(destPath);
|
|
690
931
|
if (!(0, import_node_fs.existsSync)(dir)) {
|
|
@@ -694,8 +935,9 @@ async function downloadWordNet(version, destPath) {
|
|
|
694
935
|
}
|
|
695
936
|
function createParser(filePath) {
|
|
696
937
|
const resolvedPath = import_node_path.default.resolve(filePath);
|
|
697
|
-
const
|
|
698
|
-
|
|
938
|
+
const nodeStream = (0, import_node_fs.createReadStream)(resolvedPath);
|
|
939
|
+
const webStream = import_node_stream.Readable.toWeb(nodeStream);
|
|
940
|
+
return parse(webStream, {
|
|
699
941
|
ignoreDeclaration: false,
|
|
700
942
|
silent: false
|
|
701
943
|
});
|
|
@@ -848,117 +1090,6 @@ function getSynsetWords(index, synset) {
|
|
|
848
1090
|
return synset.members.map((id) => index.entries.get(id)).filter((e) => e !== void 0).map((e) => e.lemmas[0]?.writtenForm).filter((w) => w !== void 0);
|
|
849
1091
|
}
|
|
850
1092
|
|
|
851
|
-
// src/literals.ts
|
|
852
|
-
var PartsOfSpeech2 = {
|
|
853
|
-
n: "Noun",
|
|
854
|
-
v: "Verb",
|
|
855
|
-
a: "Adjective",
|
|
856
|
-
r: "Adverb",
|
|
857
|
-
s: "Adjective Satellite",
|
|
858
|
-
t: "?",
|
|
859
|
-
c: "Conjunction",
|
|
860
|
-
p: "Adposition (Preposition, postposition, etc.)",
|
|
861
|
-
x: "Other (inc. particle, classifier, bound morphemes, determiners)",
|
|
862
|
-
u: "Unknown"
|
|
863
|
-
};
|
|
864
|
-
var SynsetRelationRelType2 = {
|
|
865
|
-
agent: "Agent",
|
|
866
|
-
also: "See also",
|
|
867
|
-
anto_converse: "Converse antonym",
|
|
868
|
-
anto_gradable: "Gradable antonym",
|
|
869
|
-
anto_simple: "Simple antonym",
|
|
870
|
-
antonym: "Antonym",
|
|
871
|
-
attribute: "Attribute",
|
|
872
|
-
augmentative: "Augmentative",
|
|
873
|
-
be_in_state: "Be in state",
|
|
874
|
-
cause: "Cause",
|
|
875
|
-
causes: "Causes",
|
|
876
|
-
classified_by: "Classified by",
|
|
877
|
-
classifies: "Classifies",
|
|
878
|
-
co_agent_instrument: "Co-agent instrument",
|
|
879
|
-
co_agent_patient: "Co-agent patient",
|
|
880
|
-
co_agent_result: "Co-agent result",
|
|
881
|
-
co_instrument_agent: "Co-instrument agent",
|
|
882
|
-
co_instrument_patient: "Co-instrument patient",
|
|
883
|
-
co_instrument_result: "Co-instrument result",
|
|
884
|
-
co_patient_agent: "Co-patient agent",
|
|
885
|
-
co_patient_instrument: "Co-patient instrument",
|
|
886
|
-
co_result_agent: "Co-result agent",
|
|
887
|
-
co_result_instrument: "Co-result instrument",
|
|
888
|
-
co_role: "Co-role",
|
|
889
|
-
diminutive: "Diminutive",
|
|
890
|
-
direction: "Direction",
|
|
891
|
-
domain_member_region: "Domain member region",
|
|
892
|
-
domain_member_topic: "Domain member topic",
|
|
893
|
-
domain_region: "Domain region",
|
|
894
|
-
domain_topic: "Domain topic",
|
|
895
|
-
entail: "Entail",
|
|
896
|
-
entails: "Entails",
|
|
897
|
-
eq_synonym: "Equivalent synonym",
|
|
898
|
-
exemplifies: "Exemplifies",
|
|
899
|
-
feminine: "Feminine",
|
|
900
|
-
has_augmentative: "Has augmentative",
|
|
901
|
-
has_diminutive: "Has diminutive",
|
|
902
|
-
has_domain_region: "Has domain region",
|
|
903
|
-
has_domain_topic: "Has domain topic",
|
|
904
|
-
has_feminine: "Has feminine",
|
|
905
|
-
has_masculine: "Has masculine",
|
|
906
|
-
has_young: "Has young",
|
|
907
|
-
holo_location: "Holonym location",
|
|
908
|
-
holo_member: "Member holonym",
|
|
909
|
-
holo_part: "Part holonym",
|
|
910
|
-
holo_portion: "Portion holonym",
|
|
911
|
-
holo_substance: "Substance holonym",
|
|
912
|
-
holonym: "Holonym",
|
|
913
|
-
hypernym: "Hypernym",
|
|
914
|
-
hyponym: "Hyponym",
|
|
915
|
-
in_manner: "In manner",
|
|
916
|
-
instance_hypernym: "Instance hypernym",
|
|
917
|
-
instance_hyponym: "Instance hyponym",
|
|
918
|
-
instrument: "Instrument",
|
|
919
|
-
involved: "Involved",
|
|
920
|
-
involved_agent: "Involved agent",
|
|
921
|
-
involved_direction: "Involved direction",
|
|
922
|
-
involved_instrument: "Involved instrument",
|
|
923
|
-
involved_location: "Involved location",
|
|
924
|
-
involved_patient: "Involved patient",
|
|
925
|
-
involved_result: "Involved result",
|
|
926
|
-
involved_source_direction: "Involved source direction",
|
|
927
|
-
involved_target_direction: "Involved target direction",
|
|
928
|
-
ir_synonym: "IR synonym",
|
|
929
|
-
is_caused_by: "Is caused by",
|
|
930
|
-
is_entailed_by: "Is entailed by",
|
|
931
|
-
is_exemplified_by: "Is exemplified by",
|
|
932
|
-
is_subevent_of: "Is subevent of",
|
|
933
|
-
location: "Location",
|
|
934
|
-
manner_of: "Manner of",
|
|
935
|
-
masculine: "Masculine",
|
|
936
|
-
member_holonym: "Member holonym",
|
|
937
|
-
member_meronym: "Member meronym",
|
|
938
|
-
mero_location: "Meronym location",
|
|
939
|
-
mero_member: "Member meronym",
|
|
940
|
-
mero_part: "Part meronym",
|
|
941
|
-
mero_portion: "Portion meronym",
|
|
942
|
-
mero_substance: "Substance meronym",
|
|
943
|
-
meronym: "Meronym",
|
|
944
|
-
other: "Other",
|
|
945
|
-
part_holonym: "Part holonym",
|
|
946
|
-
part_meronym: "Part meronym",
|
|
947
|
-
patient: "Patient",
|
|
948
|
-
restricted_by: "Restricted by",
|
|
949
|
-
restricts: "Restricts",
|
|
950
|
-
result: "Result",
|
|
951
|
-
role: "Role",
|
|
952
|
-
similar: "Similar",
|
|
953
|
-
source_direction: "Source direction",
|
|
954
|
-
state_of: "State of",
|
|
955
|
-
subevent: "Subevent",
|
|
956
|
-
substance_holonym: "Substance holonym",
|
|
957
|
-
substance_meronym: "Substance meronym",
|
|
958
|
-
target_direction: "Target direction",
|
|
959
|
-
young: "Young"
|
|
960
|
-
};
|
|
961
|
-
|
|
962
1093
|
// src/cli.ts
|
|
963
1094
|
var decode = (s) => decodeXmlEntities(s) ?? "";
|
|
964
1095
|
var HELP = `
|
|
@@ -975,6 +1106,7 @@ Commands:
|
|
|
975
1106
|
related <word> Show all relations for a word
|
|
976
1107
|
info <synset-id> Show details for a synset ID
|
|
977
1108
|
fetch Download WordNet data to cache
|
|
1109
|
+
export-sqlite <out> Export dictionary to SQLite database
|
|
978
1110
|
|
|
979
1111
|
Options:
|
|
980
1112
|
--file <path> Use a local WordNet XML file instead of cache
|
|
@@ -985,6 +1117,7 @@ Examples:
|
|
|
985
1117
|
synset synonyms happy
|
|
986
1118
|
synset related computer --file ./wordnet.xml
|
|
987
1119
|
synset fetch
|
|
1120
|
+
synset export-sqlite dictionary.db
|
|
988
1121
|
`;
|
|
989
1122
|
async function main() {
|
|
990
1123
|
const args = process.argv.slice(2);
|
|
@@ -1006,6 +1139,24 @@ async function main() {
|
|
|
1006
1139
|
console.log(`WordNet ${version} cached at: ${cachedPath}`);
|
|
1007
1140
|
return;
|
|
1008
1141
|
}
|
|
1142
|
+
if (command === "export-sqlite") {
|
|
1143
|
+
const outputPath = cleanArgs[1];
|
|
1144
|
+
if (!outputPath) {
|
|
1145
|
+
console.error("Error: Missing output path for export-sqlite");
|
|
1146
|
+
process.exit(1);
|
|
1147
|
+
}
|
|
1148
|
+
console.log("Loading WordNet data...");
|
|
1149
|
+
const lexicon2 = filePath ? await loadWordNet(filePath) : (await fetchWordNet({ onProgress: console.log })).lexicon;
|
|
1150
|
+
console.log(`Exporting to ${outputPath}...`);
|
|
1151
|
+
exportToSQLite(lexicon2, outputPath, {
|
|
1152
|
+
onProgress: ({ phase, current, total }) => {
|
|
1153
|
+
process.stdout.write(`\r${phase}: ${current}/${total}`);
|
|
1154
|
+
}
|
|
1155
|
+
});
|
|
1156
|
+
console.log(`
|
|
1157
|
+
Exported to ${outputPath}`);
|
|
1158
|
+
return;
|
|
1159
|
+
}
|
|
1009
1160
|
if (!word && command !== "fetch") {
|
|
1010
1161
|
console.error(`Error: Missing word argument for command '${command}'`);
|
|
1011
1162
|
process.exit(1);
|
|
@@ -1089,7 +1240,7 @@ async function main() {
|
|
|
1089
1240
|
for (const [relType, words] of relsByType) {
|
|
1090
1241
|
const label = SynsetRelationRelType2[relType] || relType;
|
|
1091
1242
|
console.log(` ${label}:`);
|
|
1092
|
-
|
|
1243
|
+
for (const w of words) console.log(` - ${w}`);
|
|
1093
1244
|
}
|
|
1094
1245
|
}
|
|
1095
1246
|
break;
|
|
@@ -1108,11 +1259,12 @@ async function main() {
|
|
|
1108
1259
|
console.log(`ILI: ${synset.ili}`);
|
|
1109
1260
|
console.log(`
|
|
1110
1261
|
Definitions:`);
|
|
1111
|
-
synset.definitions
|
|
1262
|
+
for (const d of synset.definitions) console.log(` - ${decode(d.inner)}`);
|
|
1112
1263
|
if (synset.examples.length > 0) {
|
|
1113
1264
|
console.log(`
|
|
1114
1265
|
Examples:`);
|
|
1115
|
-
|
|
1266
|
+
for (const e of synset.examples)
|
|
1267
|
+
console.log(` - "${decode(e.inner)}"`);
|
|
1116
1268
|
}
|
|
1117
1269
|
if (synset.synsetRelations.length > 0) {
|
|
1118
1270
|
console.log(`
|