synset 0.9.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -44
- package/dist/cli.cjs +392 -382
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +400 -383
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +175 -166
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +7 -7
- package/dist/index.d.ts +7 -7
- package/dist/index.js +183 -167
- package/dist/index.js.map +1 -1
- package/package.json +10 -5
package/dist/index.cjs
CHANGED
|
@@ -253,7 +253,171 @@ var Lexicon = import_zod.z.object({
|
|
|
253
253
|
synsets: import_zod.z.array(Synset).min(0),
|
|
254
254
|
syntacticBehaviors: import_zod.z.array(SyntacticBehavior).min(0)
|
|
255
255
|
});
|
|
256
|
-
var partsOfSpeechList = PartsOfSpeech.options.map(
|
|
256
|
+
var partsOfSpeechList = PartsOfSpeech.options.map(
|
|
257
|
+
(v) => v.value
|
|
258
|
+
);
|
|
259
|
+
|
|
260
|
+
// src/helpers.ts
|
|
261
|
+
function PronunciationNode(node) {
|
|
262
|
+
const obj = {
|
|
263
|
+
variety: optAttr(node, "variety"),
|
|
264
|
+
inner: node.innerText
|
|
265
|
+
};
|
|
266
|
+
return Pronunciation.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
267
|
+
}
|
|
268
|
+
function LemmaNode(node) {
|
|
269
|
+
const obj = {
|
|
270
|
+
writtenForm: attr(node, "writtenForm"),
|
|
271
|
+
partOfSpeech: PartsOfSpeech.parse(attr(node, "partOfSpeech")),
|
|
272
|
+
pronunciations: (
|
|
273
|
+
//
|
|
274
|
+
children(node, "Pronunciation", (v) => PronunciationNode(v))
|
|
275
|
+
)
|
|
276
|
+
};
|
|
277
|
+
return Lemma.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
278
|
+
}
|
|
279
|
+
function SenseRelationNode(node) {
|
|
280
|
+
const obj = {
|
|
281
|
+
relType: SenseRelationRelType.parse(attr(node, "relType")),
|
|
282
|
+
target: attr(node, "target"),
|
|
283
|
+
dcType: optAttr(node, "dc:type")
|
|
284
|
+
};
|
|
285
|
+
return SenseRelation.parse(
|
|
286
|
+
extendWithRestAttr(node, obj, (s) => s === "dc:type" ? "dcType" : s)
|
|
287
|
+
);
|
|
288
|
+
}
|
|
289
|
+
function SenseNode(node) {
|
|
290
|
+
const adjPos = optAttr(node, "adjposition");
|
|
291
|
+
const obj = {
|
|
292
|
+
id: attr(node, "id"),
|
|
293
|
+
synset: SynsetId.parse(attr(node, "synset")),
|
|
294
|
+
senseRelations: children(node, "SenseRelation", SenseRelationNode),
|
|
295
|
+
subCat: optAttr(node, "subcat"),
|
|
296
|
+
adjPosition: adjPos ? AdjPosition.parse(adjPos) : void 0
|
|
297
|
+
};
|
|
298
|
+
return Sense.parse(
|
|
299
|
+
extendWithRestAttr(
|
|
300
|
+
node,
|
|
301
|
+
obj,
|
|
302
|
+
(s) => s === "subcat" ? "subCat" : s === "adjposition" ? "adjPosition" : s
|
|
303
|
+
)
|
|
304
|
+
);
|
|
305
|
+
}
|
|
306
|
+
function FormNode(node) {
|
|
307
|
+
const obj = {
|
|
308
|
+
writtenForm: attr(node, "writtenForm")
|
|
309
|
+
};
|
|
310
|
+
return Form.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
311
|
+
}
|
|
312
|
+
function LexicalEntryNode(node) {
|
|
313
|
+
const obj = {
|
|
314
|
+
id: attr(node, "id"),
|
|
315
|
+
lemmas: children(node, "Lemma", LemmaNode),
|
|
316
|
+
senses: children(node, "Sense", SenseNode),
|
|
317
|
+
forms: children(node, "Form", FormNode)
|
|
318
|
+
};
|
|
319
|
+
return LexicalEntry.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
320
|
+
}
|
|
321
|
+
function DefinitionNode(node) {
|
|
322
|
+
const obj = {
|
|
323
|
+
inner: node.innerText
|
|
324
|
+
};
|
|
325
|
+
return Definition.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
326
|
+
}
|
|
327
|
+
function ExampleNode(node) {
|
|
328
|
+
const obj = {
|
|
329
|
+
inner: node.innerText,
|
|
330
|
+
dcSource: optAttr(node, "dc:source")
|
|
331
|
+
};
|
|
332
|
+
return Example.parse(
|
|
333
|
+
extendWithRestAttr(node, obj, (s) => s === "dc:source" ? "dcSource" : s)
|
|
334
|
+
);
|
|
335
|
+
}
|
|
336
|
+
function ILIDefinitionNode(node) {
|
|
337
|
+
const obj = {
|
|
338
|
+
inner: node.innerText
|
|
339
|
+
};
|
|
340
|
+
return ILIDefinition.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
341
|
+
}
|
|
342
|
+
function SynsetRelationNode(node) {
|
|
343
|
+
const obj = {
|
|
344
|
+
relType: SynsetRelationRelType.parse(attr(node, "relType")),
|
|
345
|
+
target: attr(node, "target")
|
|
346
|
+
};
|
|
347
|
+
return SynsetRelation.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
348
|
+
}
|
|
349
|
+
function SyntacticBehaviorNode(node) {
|
|
350
|
+
const obj = {
|
|
351
|
+
id: attr(node, "id"),
|
|
352
|
+
subcategorizationFrame: attr(node, "subcategorizationFrame")
|
|
353
|
+
};
|
|
354
|
+
return SyntacticBehavior.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
355
|
+
}
|
|
356
|
+
function SynsetNode(node) {
|
|
357
|
+
const obj = {
|
|
358
|
+
id: attr(node, "id"),
|
|
359
|
+
ili: attr(node, "ili"),
|
|
360
|
+
lexfile: attr(node, "lexfile"),
|
|
361
|
+
members: attr(node, "members").split(" "),
|
|
362
|
+
dcSource: optAttr(node, "dc:source"),
|
|
363
|
+
partOfSpeech: PartsOfSpeech.parse(attr(node, "partOfSpeech")),
|
|
364
|
+
definitions: children(node, "Definition", (v) => DefinitionNode(v)),
|
|
365
|
+
examples: children(node, "Example", (v) => ExampleNode(v)),
|
|
366
|
+
iliDefinitions: children(node, "ILIDefinition", ILIDefinitionNode),
|
|
367
|
+
synsetRelations: children(node, "SynsetRelation", SynsetRelationNode)
|
|
368
|
+
};
|
|
369
|
+
return Synset.parse(
|
|
370
|
+
extendWithRestAttr(node, obj, (s) => s === "dc:source" ? "dcSource" : s)
|
|
371
|
+
);
|
|
372
|
+
}
|
|
373
|
+
function LexiconNode(node) {
|
|
374
|
+
const obj = {
|
|
375
|
+
id: attr(node, "id"),
|
|
376
|
+
label: attr(node, "label"),
|
|
377
|
+
language: attr(node, "language"),
|
|
378
|
+
email: attr(node, "email"),
|
|
379
|
+
license: attr(node, "license"),
|
|
380
|
+
version: attr(node, "version"),
|
|
381
|
+
citation: optAttr(node, "citation"),
|
|
382
|
+
url: attr(node, "url"),
|
|
383
|
+
lexicalEntries: children(node, "LexicalEntry", LexicalEntryNode),
|
|
384
|
+
synsets: children(node, "Synset", SynsetNode),
|
|
385
|
+
syntacticBehaviors: (
|
|
386
|
+
//
|
|
387
|
+
children(node, "SyntacticBehaviour", SyntacticBehaviorNode)
|
|
388
|
+
)
|
|
389
|
+
};
|
|
390
|
+
return Lexicon.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
391
|
+
}
|
|
392
|
+
var decodeXmlEntities = (s) => {
|
|
393
|
+
if (s === void 0) return void 0;
|
|
394
|
+
return s.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/'/g, "'").replace(/"/g, '"');
|
|
395
|
+
};
|
|
396
|
+
var attr = (node, attrName) => {
|
|
397
|
+
const value = decodeXmlEntities(node.attributes[attrName]);
|
|
398
|
+
if (value === void 0) {
|
|
399
|
+
throw new Error(
|
|
400
|
+
`Missing required attribute "${attrName}" on node "${node.type}"`
|
|
401
|
+
);
|
|
402
|
+
}
|
|
403
|
+
return value;
|
|
404
|
+
};
|
|
405
|
+
var optAttr = (node, attrName) => {
|
|
406
|
+
return decodeXmlEntities(node.attributes[attrName]);
|
|
407
|
+
};
|
|
408
|
+
var restAttrs = (node, obj, proxy) => {
|
|
409
|
+
const result = {};
|
|
410
|
+
Object.keys(node.attributes).filter((a) => !(proxy(a) in obj)).forEach((k) => {
|
|
411
|
+
result[k] = decodeXmlEntities(node.attributes[k]) ?? node.attributes[k];
|
|
412
|
+
});
|
|
413
|
+
return result;
|
|
414
|
+
};
|
|
415
|
+
var extendWithRestAttr = (node, obj, proxy) => {
|
|
416
|
+
return Object.assign(obj, restAttrs(node, obj, proxy));
|
|
417
|
+
};
|
|
418
|
+
var children = (node, type, fn) => {
|
|
419
|
+
return node.children.filter((v) => v.type === type).map((v) => fn(v));
|
|
420
|
+
};
|
|
257
421
|
|
|
258
422
|
// src/literals.ts
|
|
259
423
|
var PartsOfSpeech2 = {
|
|
@@ -408,6 +572,7 @@ var AdjPosition2 = {
|
|
|
408
572
|
// src/loader.ts
|
|
409
573
|
var import_node_fs = require("fs");
|
|
410
574
|
var import_node_path = __toESM(require("path"), 1);
|
|
575
|
+
var import_node_stream = require("stream");
|
|
411
576
|
|
|
412
577
|
// node_modules/@dbushell/xml-streamify/src/node.ts
|
|
413
578
|
var Node = class {
|
|
@@ -651,166 +816,6 @@ async function* parse(input, options) {
|
|
|
651
816
|
return document;
|
|
652
817
|
}
|
|
653
818
|
|
|
654
|
-
// src/helpers.ts
|
|
655
|
-
function PronunciationNode(node) {
|
|
656
|
-
const obj = {
|
|
657
|
-
variety: optAttr(node, "variety"),
|
|
658
|
-
inner: node.innerText
|
|
659
|
-
};
|
|
660
|
-
return Pronunciation.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
661
|
-
}
|
|
662
|
-
function LemmaNode(node) {
|
|
663
|
-
const obj = {
|
|
664
|
-
writtenForm: attr(node, "writtenForm"),
|
|
665
|
-
partOfSpeech: PartsOfSpeech.parse(attr(node, "partOfSpeech")),
|
|
666
|
-
pronunciations: (
|
|
667
|
-
//
|
|
668
|
-
children(node, "Pronunciation", (v) => PronunciationNode(v))
|
|
669
|
-
)
|
|
670
|
-
};
|
|
671
|
-
return Lemma.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
672
|
-
}
|
|
673
|
-
function SenseRelationNode(node) {
|
|
674
|
-
const obj = {
|
|
675
|
-
relType: SenseRelationRelType.parse(attr(node, "relType")),
|
|
676
|
-
target: attr(node, "target"),
|
|
677
|
-
dcType: optAttr(node, "dc:type")
|
|
678
|
-
};
|
|
679
|
-
return SenseRelation.parse(
|
|
680
|
-
extendWithRestAttr(node, obj, (s) => s == "dc:type" ? "dcType" : s)
|
|
681
|
-
);
|
|
682
|
-
}
|
|
683
|
-
function SenseNode(node) {
|
|
684
|
-
const adjPos = optAttr(node, "adjposition");
|
|
685
|
-
const obj = {
|
|
686
|
-
id: attr(node, "id"),
|
|
687
|
-
synset: SynsetId.parse(attr(node, "synset")),
|
|
688
|
-
senseRelations: children(node, "SenseRelation", SenseRelationNode),
|
|
689
|
-
subCat: optAttr(node, "subcat"),
|
|
690
|
-
adjPosition: adjPos ? AdjPosition.parse(adjPos) : void 0
|
|
691
|
-
};
|
|
692
|
-
return Sense.parse(
|
|
693
|
-
extendWithRestAttr(
|
|
694
|
-
node,
|
|
695
|
-
obj,
|
|
696
|
-
(s) => s == "subcat" ? "subCat" : s == "adjposition" ? "adjPosition" : s
|
|
697
|
-
)
|
|
698
|
-
);
|
|
699
|
-
}
|
|
700
|
-
function FormNode(node) {
|
|
701
|
-
const obj = {
|
|
702
|
-
writtenForm: attr(node, "writtenForm")
|
|
703
|
-
};
|
|
704
|
-
return Form.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
705
|
-
}
|
|
706
|
-
function LexicalEntryNode(node) {
|
|
707
|
-
const obj = {
|
|
708
|
-
id: attr(node, "id"),
|
|
709
|
-
lemmas: children(node, "Lemma", LemmaNode),
|
|
710
|
-
senses: children(node, "Sense", SenseNode),
|
|
711
|
-
forms: children(node, "Form", FormNode)
|
|
712
|
-
};
|
|
713
|
-
return LexicalEntry.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
714
|
-
}
|
|
715
|
-
function DefinitionNode(node) {
|
|
716
|
-
const obj = {
|
|
717
|
-
inner: node.innerText
|
|
718
|
-
};
|
|
719
|
-
return Definition.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
720
|
-
}
|
|
721
|
-
function ExampleNode(node) {
|
|
722
|
-
const obj = {
|
|
723
|
-
inner: node.innerText,
|
|
724
|
-
dcSource: optAttr(node, "dc:source")
|
|
725
|
-
};
|
|
726
|
-
return Example.parse(
|
|
727
|
-
extendWithRestAttr(node, obj, (s) => s == "dc:source" ? "dcSource" : s)
|
|
728
|
-
);
|
|
729
|
-
}
|
|
730
|
-
function ILIDefinitionNode(node) {
|
|
731
|
-
const obj = {
|
|
732
|
-
inner: node.innerText
|
|
733
|
-
};
|
|
734
|
-
return ILIDefinition.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
735
|
-
}
|
|
736
|
-
function SynsetRelationNode(node) {
|
|
737
|
-
const obj = {
|
|
738
|
-
relType: SynsetRelationRelType.parse(attr(node, "relType")),
|
|
739
|
-
target: attr(node, "target")
|
|
740
|
-
};
|
|
741
|
-
return SynsetRelation.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
742
|
-
}
|
|
743
|
-
function SyntacticBehaviorNode(node) {
|
|
744
|
-
const obj = {
|
|
745
|
-
id: attr(node, "id"),
|
|
746
|
-
subcategorizationFrame: attr(node, "subcategorizationFrame")
|
|
747
|
-
};
|
|
748
|
-
return SyntacticBehavior.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
749
|
-
}
|
|
750
|
-
function SynsetNode(node) {
|
|
751
|
-
const obj = {
|
|
752
|
-
id: attr(node, "id"),
|
|
753
|
-
ili: attr(node, "ili"),
|
|
754
|
-
lexfile: attr(node, "lexfile"),
|
|
755
|
-
members: attr(node, "members").split(" "),
|
|
756
|
-
dcSource: optAttr(node, "dc:source"),
|
|
757
|
-
partOfSpeech: PartsOfSpeech.parse(attr(node, "partOfSpeech")),
|
|
758
|
-
definitions: children(node, "Definition", (v) => DefinitionNode(v)),
|
|
759
|
-
examples: children(node, "Example", (v) => ExampleNode(v)),
|
|
760
|
-
iliDefinitions: children(node, "ILIDefinition", ILIDefinitionNode),
|
|
761
|
-
synsetRelations: children(node, "SynsetRelation", SynsetRelationNode)
|
|
762
|
-
};
|
|
763
|
-
return Synset.parse(
|
|
764
|
-
extendWithRestAttr(node, obj, (s) => s == "dc:source" ? "dcSource" : s)
|
|
765
|
-
);
|
|
766
|
-
}
|
|
767
|
-
function LexiconNode(node) {
|
|
768
|
-
const obj = {
|
|
769
|
-
id: attr(node, "id"),
|
|
770
|
-
label: attr(node, "label"),
|
|
771
|
-
language: attr(node, "language"),
|
|
772
|
-
email: attr(node, "email"),
|
|
773
|
-
license: attr(node, "license"),
|
|
774
|
-
version: attr(node, "version"),
|
|
775
|
-
citation: optAttr(node, "citation"),
|
|
776
|
-
url: attr(node, "url"),
|
|
777
|
-
lexicalEntries: children(node, "LexicalEntry", LexicalEntryNode),
|
|
778
|
-
synsets: children(node, "Synset", SynsetNode),
|
|
779
|
-
syntacticBehaviors: (
|
|
780
|
-
//
|
|
781
|
-
children(node, "SyntacticBehaviour", SyntacticBehaviorNode)
|
|
782
|
-
)
|
|
783
|
-
};
|
|
784
|
-
return Lexicon.parse(extendWithRestAttr(node, obj, (s) => s));
|
|
785
|
-
}
|
|
786
|
-
var decodeXmlEntities = (s) => {
|
|
787
|
-
if (s === void 0) return void 0;
|
|
788
|
-
return s.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/'/g, "'").replace(/"/g, '"');
|
|
789
|
-
};
|
|
790
|
-
var attr = (node, attrName) => {
|
|
791
|
-
const value = decodeXmlEntities(node.attributes[attrName]);
|
|
792
|
-
if (value === void 0) {
|
|
793
|
-
throw new Error(`Missing required attribute "${attrName}" on node "${node.type}"`);
|
|
794
|
-
}
|
|
795
|
-
return value;
|
|
796
|
-
};
|
|
797
|
-
var optAttr = (node, attrName) => {
|
|
798
|
-
return decodeXmlEntities(node.attributes[attrName]);
|
|
799
|
-
};
|
|
800
|
-
var restAttrs = (node, obj, proxy) => {
|
|
801
|
-
const result = {};
|
|
802
|
-
Object.keys(node.attributes).filter((a) => !(proxy(a) in obj)).forEach((k) => {
|
|
803
|
-
result[k] = decodeXmlEntities(node.attributes[k]) ?? node.attributes[k];
|
|
804
|
-
});
|
|
805
|
-
return result;
|
|
806
|
-
};
|
|
807
|
-
var extendWithRestAttr = (node, obj, proxy) => {
|
|
808
|
-
return Object.assign(obj, restAttrs(node, obj, proxy));
|
|
809
|
-
};
|
|
810
|
-
var children = (node, type, fn) => {
|
|
811
|
-
return node.children.filter((v) => v.type == type).map((v) => fn(v));
|
|
812
|
-
};
|
|
813
|
-
|
|
814
819
|
// src/loader.ts
|
|
815
820
|
var BASE_VERSION = "2024";
|
|
816
821
|
function getFilename(version) {
|
|
@@ -876,7 +881,6 @@ async function findLatestVersion(onProgress, cacheDir) {
|
|
|
876
881
|
for (let year = baseYear + 1; year <= lastReleasableYear; year++) {
|
|
877
882
|
const version = year.toString();
|
|
878
883
|
if (await urlExists(getDownloadUrl(version))) {
|
|
879
|
-
continue;
|
|
880
884
|
} else {
|
|
881
885
|
return (year - 1).toString();
|
|
882
886
|
}
|
|
@@ -897,9 +901,13 @@ async function downloadWordNet(version, destPath) {
|
|
|
897
901
|
const url = getDownloadUrl(version);
|
|
898
902
|
const response = await fetch(url);
|
|
899
903
|
if (!response.ok || !response.body) {
|
|
900
|
-
throw new Error(
|
|
904
|
+
throw new Error(
|
|
905
|
+
`Failed to download WordNet ${version}: ${response.statusText}`
|
|
906
|
+
);
|
|
901
907
|
}
|
|
902
|
-
const decompressed = response.body.pipeThrough(
|
|
908
|
+
const decompressed = response.body.pipeThrough(
|
|
909
|
+
new DecompressionStream("gzip")
|
|
910
|
+
);
|
|
903
911
|
const arrayBuffer = await new Response(decompressed).arrayBuffer();
|
|
904
912
|
const dir = import_node_path.default.dirname(destPath);
|
|
905
913
|
if (!(0, import_node_fs.existsSync)(dir)) {
|
|
@@ -909,8 +917,9 @@ async function downloadWordNet(version, destPath) {
|
|
|
909
917
|
}
|
|
910
918
|
function createParser(filePath) {
|
|
911
919
|
const resolvedPath = import_node_path.default.resolve(filePath);
|
|
912
|
-
const
|
|
913
|
-
|
|
920
|
+
const nodeStream = (0, import_node_fs.createReadStream)(resolvedPath);
|
|
921
|
+
const webStream = import_node_stream.Readable.toWeb(nodeStream);
|
|
922
|
+
return parse(webStream, {
|
|
914
923
|
ignoreDeclaration: false,
|
|
915
924
|
silent: false
|
|
916
925
|
});
|