synset 0.9.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -44
- package/dist/cli.cjs +392 -382
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +400 -383
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +175 -166
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +7 -7
- package/dist/index.d.ts +7 -7
- package/dist/index.js +183 -167
- package/dist/index.js.map +1 -1
- package/package.json +10 -5
package/dist/index.js
CHANGED
|
@@ -163,7 +163,171 @@ var Lexicon = z.object({
|
|
|
163
163
|
synsets: z.array(Synset).min(0),
|
|
164
164
|
syntacticBehaviors: z.array(SyntacticBehavior).min(0)
|
|
165
165
|
});
|
|
166
|
-
var partsOfSpeechList = PartsOfSpeech.options.map(
|
|
166
|
+
var partsOfSpeechList = PartsOfSpeech.options.map(
|
|
167
|
+
(v) => v.value
|
|
168
|
+
);
|
|
169
|
+
|
|
170
|
+
// src/helpers.ts
|
|
171
|
+
function PronunciationNode(node) {
|
|
172
|
+
const obj = {
|
|
173
|
+
variety: optAttr(node, "variety"),
|
|
174
|
+
inner: node.innerText
|
|
175
|
+
};
|
|
176
|
+
return Pronunciation.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
177
|
+
}
|
|
178
|
+
function LemmaNode(node) {
|
|
179
|
+
const obj = {
|
|
180
|
+
writtenForm: attr(node, "writtenForm"),
|
|
181
|
+
partOfSpeech: PartsOfSpeech.parse(attr(node, "partOfSpeech")),
|
|
182
|
+
pronunciations: (
|
|
183
|
+
//
|
|
184
|
+
children(node, "Pronunciation", (v) => PronunciationNode(v))
|
|
185
|
+
)
|
|
186
|
+
};
|
|
187
|
+
return Lemma.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
188
|
+
}
|
|
189
|
+
function SenseRelationNode(node) {
|
|
190
|
+
const obj = {
|
|
191
|
+
relType: SenseRelationRelType.parse(attr(node, "relType")),
|
|
192
|
+
target: attr(node, "target"),
|
|
193
|
+
dcType: optAttr(node, "dc:type")
|
|
194
|
+
};
|
|
195
|
+
return SenseRelation.parse(
|
|
196
|
+
extendWithRestAttr(node, obj, (s) => s === "dc:type" ? "dcType" : s)
|
|
197
|
+
);
|
|
198
|
+
}
|
|
199
|
+
function SenseNode(node) {
|
|
200
|
+
const adjPos = optAttr(node, "adjposition");
|
|
201
|
+
const obj = {
|
|
202
|
+
id: attr(node, "id"),
|
|
203
|
+
synset: SynsetId.parse(attr(node, "synset")),
|
|
204
|
+
senseRelations: children(node, "SenseRelation", SenseRelationNode),
|
|
205
|
+
subCat: optAttr(node, "subcat"),
|
|
206
|
+
adjPosition: adjPos ? AdjPosition.parse(adjPos) : void 0
|
|
207
|
+
};
|
|
208
|
+
return Sense.parse(
|
|
209
|
+
extendWithRestAttr(
|
|
210
|
+
node,
|
|
211
|
+
obj,
|
|
212
|
+
(s) => s === "subcat" ? "subCat" : s === "adjposition" ? "adjPosition" : s
|
|
213
|
+
)
|
|
214
|
+
);
|
|
215
|
+
}
|
|
216
|
+
function FormNode(node) {
|
|
217
|
+
const obj = {
|
|
218
|
+
writtenForm: attr(node, "writtenForm")
|
|
219
|
+
};
|
|
220
|
+
return Form.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
221
|
+
}
|
|
222
|
+
function LexicalEntryNode(node) {
|
|
223
|
+
const obj = {
|
|
224
|
+
id: attr(node, "id"),
|
|
225
|
+
lemmas: children(node, "Lemma", LemmaNode),
|
|
226
|
+
senses: children(node, "Sense", SenseNode),
|
|
227
|
+
forms: children(node, "Form", FormNode)
|
|
228
|
+
};
|
|
229
|
+
return LexicalEntry.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
230
|
+
}
|
|
231
|
+
function DefinitionNode(node) {
|
|
232
|
+
const obj = {
|
|
233
|
+
inner: node.innerText
|
|
234
|
+
};
|
|
235
|
+
return Definition.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
236
|
+
}
|
|
237
|
+
function ExampleNode(node) {
|
|
238
|
+
const obj = {
|
|
239
|
+
inner: node.innerText,
|
|
240
|
+
dcSource: optAttr(node, "dc:source")
|
|
241
|
+
};
|
|
242
|
+
return Example.parse(
|
|
243
|
+
extendWithRestAttr(node, obj, (s) => s === "dc:source" ? "dcSource" : s)
|
|
244
|
+
);
|
|
245
|
+
}
|
|
246
|
+
function ILIDefinitionNode(node) {
|
|
247
|
+
const obj = {
|
|
248
|
+
inner: node.innerText
|
|
249
|
+
};
|
|
250
|
+
return ILIDefinition.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
251
|
+
}
|
|
252
|
+
function SynsetRelationNode(node) {
|
|
253
|
+
const obj = {
|
|
254
|
+
relType: SynsetRelationRelType.parse(attr(node, "relType")),
|
|
255
|
+
target: attr(node, "target")
|
|
256
|
+
};
|
|
257
|
+
return SynsetRelation.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
258
|
+
}
|
|
259
|
+
function SyntacticBehaviorNode(node) {
|
|
260
|
+
const obj = {
|
|
261
|
+
id: attr(node, "id"),
|
|
262
|
+
subcategorizationFrame: attr(node, "subcategorizationFrame")
|
|
263
|
+
};
|
|
264
|
+
return SyntacticBehavior.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
265
|
+
}
|
|
266
|
+
function SynsetNode(node) {
|
|
267
|
+
const obj = {
|
|
268
|
+
id: attr(node, "id"),
|
|
269
|
+
ili: attr(node, "ili"),
|
|
270
|
+
lexfile: attr(node, "lexfile"),
|
|
271
|
+
members: attr(node, "members").split(" "),
|
|
272
|
+
dcSource: optAttr(node, "dc:source"),
|
|
273
|
+
partOfSpeech: PartsOfSpeech.parse(attr(node, "partOfSpeech")),
|
|
274
|
+
definitions: children(node, "Definition", (v) => DefinitionNode(v)),
|
|
275
|
+
examples: children(node, "Example", (v) => ExampleNode(v)),
|
|
276
|
+
iliDefinitions: children(node, "ILIDefinition", ILIDefinitionNode),
|
|
277
|
+
synsetRelations: children(node, "SynsetRelation", SynsetRelationNode)
|
|
278
|
+
};
|
|
279
|
+
return Synset.parse(
|
|
280
|
+
extendWithRestAttr(node, obj, (s) => s === "dc:source" ? "dcSource" : s)
|
|
281
|
+
);
|
|
282
|
+
}
|
|
283
|
+
function LexiconNode(node) {
|
|
284
|
+
const obj = {
|
|
285
|
+
id: attr(node, "id"),
|
|
286
|
+
label: attr(node, "label"),
|
|
287
|
+
language: attr(node, "language"),
|
|
288
|
+
email: attr(node, "email"),
|
|
289
|
+
license: attr(node, "license"),
|
|
290
|
+
version: attr(node, "version"),
|
|
291
|
+
citation: optAttr(node, "citation"),
|
|
292
|
+
url: attr(node, "url"),
|
|
293
|
+
lexicalEntries: children(node, "LexicalEntry", LexicalEntryNode),
|
|
294
|
+
synsets: children(node, "Synset", SynsetNode),
|
|
295
|
+
syntacticBehaviors: (
|
|
296
|
+
//
|
|
297
|
+
children(node, "SyntacticBehaviour", SyntacticBehaviorNode)
|
|
298
|
+
)
|
|
299
|
+
};
|
|
300
|
+
return Lexicon.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
301
|
+
}
|
|
302
|
+
var decodeXmlEntities = (s) => {
|
|
303
|
+
if (s === void 0) return void 0;
|
|
304
|
+
return s.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/'/g, "'").replace(/"/g, '"');
|
|
305
|
+
};
|
|
306
|
+
var attr = (node, attrName) => {
|
|
307
|
+
const value = decodeXmlEntities(node.attributes[attrName]);
|
|
308
|
+
if (value === void 0) {
|
|
309
|
+
throw new Error(
|
|
310
|
+
`Missing required attribute "${attrName}" on node "${node.type}"`
|
|
311
|
+
);
|
|
312
|
+
}
|
|
313
|
+
return value;
|
|
314
|
+
};
|
|
315
|
+
var optAttr = (node, attrName) => {
|
|
316
|
+
return decodeXmlEntities(node.attributes[attrName]);
|
|
317
|
+
};
|
|
318
|
+
var restAttrs = (node, obj, proxy) => {
|
|
319
|
+
const result = {};
|
|
320
|
+
Object.keys(node.attributes).filter((a) => !(proxy(a) in obj)).forEach((k) => {
|
|
321
|
+
result[k] = decodeXmlEntities(node.attributes[k]) ?? node.attributes[k];
|
|
322
|
+
});
|
|
323
|
+
return result;
|
|
324
|
+
};
|
|
325
|
+
var extendWithRestAttr = (node, obj, proxy) => {
|
|
326
|
+
return Object.assign(obj, restAttrs(node, obj, proxy));
|
|
327
|
+
};
|
|
328
|
+
var children = (node, type, fn) => {
|
|
329
|
+
return node.children.filter((v) => v.type === type).map((v) => fn(v));
|
|
330
|
+
};
|
|
167
331
|
|
|
168
332
|
// src/literals.ts
|
|
169
333
|
var PartsOfSpeech2 = {
|
|
@@ -316,8 +480,16 @@ var AdjPosition2 = {
|
|
|
316
480
|
};
|
|
317
481
|
|
|
318
482
|
// src/loader.ts
|
|
319
|
-
import {
|
|
483
|
+
import {
|
|
484
|
+
createReadStream,
|
|
485
|
+
existsSync,
|
|
486
|
+
mkdirSync,
|
|
487
|
+
readdirSync,
|
|
488
|
+
statSync,
|
|
489
|
+
writeFileSync
|
|
490
|
+
} from "fs";
|
|
320
491
|
import path from "path";
|
|
492
|
+
import { Readable } from "stream";
|
|
321
493
|
|
|
322
494
|
// node_modules/@dbushell/xml-streamify/src/node.ts
|
|
323
495
|
var Node = class {
|
|
@@ -561,166 +733,6 @@ async function* parse(input, options) {
|
|
|
561
733
|
return document;
|
|
562
734
|
}
|
|
563
735
|
|
|
564
|
-
// src/helpers.ts
|
|
565
|
-
function PronunciationNode(node) {
|
|
566
|
-
const obj = {
|
|
567
|
-
variety: optAttr(node, "variety"),
|
|
568
|
-
inner: node.innerText
|
|
569
|
-
};
|
|
570
|
-
return Pronunciation.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
571
|
-
}
|
|
572
|
-
function LemmaNode(node) {
|
|
573
|
-
const obj = {
|
|
574
|
-
writtenForm: attr(node, "writtenForm"),
|
|
575
|
-
partOfSpeech: PartsOfSpeech.parse(attr(node, "partOfSpeech")),
|
|
576
|
-
pronunciations: (
|
|
577
|
-
//
|
|
578
|
-
children(node, "Pronunciation", (v) => PronunciationNode(v))
|
|
579
|
-
)
|
|
580
|
-
};
|
|
581
|
-
return Lemma.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
582
|
-
}
|
|
583
|
-
function SenseRelationNode(node) {
|
|
584
|
-
const obj = {
|
|
585
|
-
relType: SenseRelationRelType.parse(attr(node, "relType")),
|
|
586
|
-
target: attr(node, "target"),
|
|
587
|
-
dcType: optAttr(node, "dc:type")
|
|
588
|
-
};
|
|
589
|
-
return SenseRelation.parse(
|
|
590
|
-
extendWithRestAttr(node, obj, (s) => s == "dc:type" ? "dcType" : s)
|
|
591
|
-
);
|
|
592
|
-
}
|
|
593
|
-
function SenseNode(node) {
|
|
594
|
-
const adjPos = optAttr(node, "adjposition");
|
|
595
|
-
const obj = {
|
|
596
|
-
id: attr(node, "id"),
|
|
597
|
-
synset: SynsetId.parse(attr(node, "synset")),
|
|
598
|
-
senseRelations: children(node, "SenseRelation", SenseRelationNode),
|
|
599
|
-
subCat: optAttr(node, "subcat"),
|
|
600
|
-
adjPosition: adjPos ? AdjPosition.parse(adjPos) : void 0
|
|
601
|
-
};
|
|
602
|
-
return Sense.parse(
|
|
603
|
-
extendWithRestAttr(
|
|
604
|
-
node,
|
|
605
|
-
obj,
|
|
606
|
-
(s) => s == "subcat" ? "subCat" : s == "adjposition" ? "adjPosition" : s
|
|
607
|
-
)
|
|
608
|
-
);
|
|
609
|
-
}
|
|
610
|
-
function FormNode(node) {
|
|
611
|
-
const obj = {
|
|
612
|
-
writtenForm: attr(node, "writtenForm")
|
|
613
|
-
};
|
|
614
|
-
return Form.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
615
|
-
}
|
|
616
|
-
function LexicalEntryNode(node) {
|
|
617
|
-
const obj = {
|
|
618
|
-
id: attr(node, "id"),
|
|
619
|
-
lemmas: children(node, "Lemma", LemmaNode),
|
|
620
|
-
senses: children(node, "Sense", SenseNode),
|
|
621
|
-
forms: children(node, "Form", FormNode)
|
|
622
|
-
};
|
|
623
|
-
return LexicalEntry.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
624
|
-
}
|
|
625
|
-
function DefinitionNode(node) {
|
|
626
|
-
const obj = {
|
|
627
|
-
inner: node.innerText
|
|
628
|
-
};
|
|
629
|
-
return Definition.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
630
|
-
}
|
|
631
|
-
function ExampleNode(node) {
|
|
632
|
-
const obj = {
|
|
633
|
-
inner: node.innerText,
|
|
634
|
-
dcSource: optAttr(node, "dc:source")
|
|
635
|
-
};
|
|
636
|
-
return Example.parse(
|
|
637
|
-
extendWithRestAttr(node, obj, (s) => s == "dc:source" ? "dcSource" : s)
|
|
638
|
-
);
|
|
639
|
-
}
|
|
640
|
-
function ILIDefinitionNode(node) {
|
|
641
|
-
const obj = {
|
|
642
|
-
inner: node.innerText
|
|
643
|
-
};
|
|
644
|
-
return ILIDefinition.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
645
|
-
}
|
|
646
|
-
function SynsetRelationNode(node) {
|
|
647
|
-
const obj = {
|
|
648
|
-
relType: SynsetRelationRelType.parse(attr(node, "relType")),
|
|
649
|
-
target: attr(node, "target")
|
|
650
|
-
};
|
|
651
|
-
return SynsetRelation.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
652
|
-
}
|
|
653
|
-
function SyntacticBehaviorNode(node) {
|
|
654
|
-
const obj = {
|
|
655
|
-
id: attr(node, "id"),
|
|
656
|
-
subcategorizationFrame: attr(node, "subcategorizationFrame")
|
|
657
|
-
};
|
|
658
|
-
return SyntacticBehavior.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
659
|
-
}
|
|
660
|
-
function SynsetNode(node) {
|
|
661
|
-
const obj = {
|
|
662
|
-
id: attr(node, "id"),
|
|
663
|
-
ili: attr(node, "ili"),
|
|
664
|
-
lexfile: attr(node, "lexfile"),
|
|
665
|
-
members: attr(node, "members").split(" "),
|
|
666
|
-
dcSource: optAttr(node, "dc:source"),
|
|
667
|
-
partOfSpeech: PartsOfSpeech.parse(attr(node, "partOfSpeech")),
|
|
668
|
-
definitions: children(node, "Definition", (v) => DefinitionNode(v)),
|
|
669
|
-
examples: children(node, "Example", (v) => ExampleNode(v)),
|
|
670
|
-
iliDefinitions: children(node, "ILIDefinition", ILIDefinitionNode),
|
|
671
|
-
synsetRelations: children(node, "SynsetRelation", SynsetRelationNode)
|
|
672
|
-
};
|
|
673
|
-
return Synset.parse(
|
|
674
|
-
extendWithRestAttr(node, obj, (s) => s == "dc:source" ? "dcSource" : s)
|
|
675
|
-
);
|
|
676
|
-
}
|
|
677
|
-
function LexiconNode(node) {
|
|
678
|
-
const obj = {
|
|
679
|
-
id: attr(node, "id"),
|
|
680
|
-
label: attr(node, "label"),
|
|
681
|
-
language: attr(node, "language"),
|
|
682
|
-
email: attr(node, "email"),
|
|
683
|
-
license: attr(node, "license"),
|
|
684
|
-
version: attr(node, "version"),
|
|
685
|
-
citation: optAttr(node, "citation"),
|
|
686
|
-
url: attr(node, "url"),
|
|
687
|
-
lexicalEntries: children(node, "LexicalEntry", LexicalEntryNode),
|
|
688
|
-
synsets: children(node, "Synset", SynsetNode),
|
|
689
|
-
syntacticBehaviors: (
|
|
690
|
-
//
|
|
691
|
-
children(node, "SyntacticBehaviour", SyntacticBehaviorNode)
|
|
692
|
-
)
|
|
693
|
-
};
|
|
694
|
-
return Lexicon.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
695
|
-
}
|
|
696
|
-
var decodeXmlEntities = (s) => {
|
|
697
|
-
if (s === void 0) return void 0;
|
|
698
|
-
return s.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/'/g, "'").replace(/"/g, '"');
|
|
699
|
-
};
|
|
700
|
-
var attr = (node, attrName) => {
|
|
701
|
-
const value = decodeXmlEntities(node.attributes[attrName]);
|
|
702
|
-
if (value === void 0) {
|
|
703
|
-
throw new Error(`Missing required attribute "${attrName}" on node "${node.type}"`);
|
|
704
|
-
}
|
|
705
|
-
return value;
|
|
706
|
-
};
|
|
707
|
-
var optAttr = (node, attrName) => {
|
|
708
|
-
return decodeXmlEntities(node.attributes[attrName]);
|
|
709
|
-
};
|
|
710
|
-
var restAttrs = (node, obj, proxy) => {
|
|
711
|
-
const result = {};
|
|
712
|
-
Object.keys(node.attributes).filter((a) => !(proxy(a) in obj)).forEach((k) => {
|
|
713
|
-
result[k] = decodeXmlEntities(node.attributes[k]) ?? node.attributes[k];
|
|
714
|
-
});
|
|
715
|
-
return result;
|
|
716
|
-
};
|
|
717
|
-
var extendWithRestAttr = (node, obj, proxy) => {
|
|
718
|
-
return Object.assign(obj, restAttrs(node, obj, proxy));
|
|
719
|
-
};
|
|
720
|
-
var children = (node, type, fn) => {
|
|
721
|
-
return node.children.filter((v) => v.type == type).map((v) => fn(v));
|
|
722
|
-
};
|
|
723
|
-
|
|
724
736
|
// src/loader.ts
|
|
725
737
|
var BASE_VERSION = "2024";
|
|
726
738
|
function getFilename(version) {
|
|
@@ -786,7 +798,6 @@ async function findLatestVersion(onProgress, cacheDir) {
|
|
|
786
798
|
for (let year = baseYear + 1; year <= lastReleasableYear; year++) {
|
|
787
799
|
const version = year.toString();
|
|
788
800
|
if (await urlExists(getDownloadUrl(version))) {
|
|
789
|
-
continue;
|
|
790
801
|
} else {
|
|
791
802
|
return (year - 1).toString();
|
|
792
803
|
}
|
|
@@ -807,9 +818,13 @@ async function downloadWordNet(version, destPath) {
|
|
|
807
818
|
const url = getDownloadUrl(version);
|
|
808
819
|
const response = await fetch(url);
|
|
809
820
|
if (!response.ok || !response.body) {
|
|
810
|
-
throw new Error(
|
|
821
|
+
throw new Error(
|
|
822
|
+
`Failed to download WordNet ${version}: ${response.statusText}`
|
|
823
|
+
);
|
|
811
824
|
}
|
|
812
|
-
const decompressed = response.body.pipeThrough(
|
|
825
|
+
const decompressed = response.body.pipeThrough(
|
|
826
|
+
new DecompressionStream("gzip")
|
|
827
|
+
);
|
|
813
828
|
const arrayBuffer = await new Response(decompressed).arrayBuffer();
|
|
814
829
|
const dir = path.dirname(destPath);
|
|
815
830
|
if (!existsSync(dir)) {
|
|
@@ -819,8 +834,9 @@ async function downloadWordNet(version, destPath) {
|
|
|
819
834
|
}
|
|
820
835
|
function createParser(filePath) {
|
|
821
836
|
const resolvedPath = path.resolve(filePath);
|
|
822
|
-
const
|
|
823
|
-
|
|
837
|
+
const nodeStream = createReadStream(resolvedPath);
|
|
838
|
+
const webStream = Readable.toWeb(nodeStream);
|
|
839
|
+
return parse(webStream, {
|
|
824
840
|
ignoreDeclaration: false,
|
|
825
841
|
silent: false
|
|
826
842
|
});
|