synset 0.9.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -253,7 +253,171 @@ var Lexicon = import_zod.z.object({
253
253
  synsets: import_zod.z.array(Synset).min(0),
254
254
  syntacticBehaviors: import_zod.z.array(SyntacticBehavior).min(0)
255
255
  });
256
- var partsOfSpeechList = PartsOfSpeech.options.map((v) => v.value);
256
+ var partsOfSpeechList = PartsOfSpeech.options.map(
257
+ (v) => v.value
258
+ );
259
+
260
+ // src/helpers.ts
261
+ function PronunciationNode(node) {
262
+ const obj = {
263
+ variety: optAttr(node, "variety"),
264
+ inner: node.innerText
265
+ };
266
+ return Pronunciation.parse(extendWithRestAttr(node, obj, (s) => s));
267
+ }
268
+ function LemmaNode(node) {
269
+ const obj = {
270
+ writtenForm: attr(node, "writtenForm"),
271
+ partOfSpeech: PartsOfSpeech.parse(attr(node, "partOfSpeech")),
272
+ pronunciations: (
273
+ //
274
+ children(node, "Pronunciation", (v) => PronunciationNode(v))
275
+ )
276
+ };
277
+ return Lemma.parse(extendWithRestAttr(node, obj, (s) => s));
278
+ }
279
+ function SenseRelationNode(node) {
280
+ const obj = {
281
+ relType: SenseRelationRelType.parse(attr(node, "relType")),
282
+ target: attr(node, "target"),
283
+ dcType: optAttr(node, "dc:type")
284
+ };
285
+ return SenseRelation.parse(
286
+ extendWithRestAttr(node, obj, (s) => s === "dc:type" ? "dcType" : s)
287
+ );
288
+ }
289
+ function SenseNode(node) {
290
+ const adjPos = optAttr(node, "adjposition");
291
+ const obj = {
292
+ id: attr(node, "id"),
293
+ synset: SynsetId.parse(attr(node, "synset")),
294
+ senseRelations: children(node, "SenseRelation", SenseRelationNode),
295
+ subCat: optAttr(node, "subcat"),
296
+ adjPosition: adjPos ? AdjPosition.parse(adjPos) : void 0
297
+ };
298
+ return Sense.parse(
299
+ extendWithRestAttr(
300
+ node,
301
+ obj,
302
+ (s) => s === "subcat" ? "subCat" : s === "adjposition" ? "adjPosition" : s
303
+ )
304
+ );
305
+ }
306
+ function FormNode(node) {
307
+ const obj = {
308
+ writtenForm: attr(node, "writtenForm")
309
+ };
310
+ return Form.parse(extendWithRestAttr(node, obj, (s) => s));
311
+ }
312
+ function LexicalEntryNode(node) {
313
+ const obj = {
314
+ id: attr(node, "id"),
315
+ lemmas: children(node, "Lemma", LemmaNode),
316
+ senses: children(node, "Sense", SenseNode),
317
+ forms: children(node, "Form", FormNode)
318
+ };
319
+ return LexicalEntry.parse(extendWithRestAttr(node, obj, (s) => s));
320
+ }
321
+ function DefinitionNode(node) {
322
+ const obj = {
323
+ inner: node.innerText
324
+ };
325
+ return Definition.parse(extendWithRestAttr(node, obj, (s) => s));
326
+ }
327
+ function ExampleNode(node) {
328
+ const obj = {
329
+ inner: node.innerText,
330
+ dcSource: optAttr(node, "dc:source")
331
+ };
332
+ return Example.parse(
333
+ extendWithRestAttr(node, obj, (s) => s === "dc:source" ? "dcSource" : s)
334
+ );
335
+ }
336
+ function ILIDefinitionNode(node) {
337
+ const obj = {
338
+ inner: node.innerText
339
+ };
340
+ return ILIDefinition.parse(extendWithRestAttr(node, obj, (s) => s));
341
+ }
342
+ function SynsetRelationNode(node) {
343
+ const obj = {
344
+ relType: SynsetRelationRelType.parse(attr(node, "relType")),
345
+ target: attr(node, "target")
346
+ };
347
+ return SynsetRelation.parse(extendWithRestAttr(node, obj, (s) => s));
348
+ }
349
+ function SyntacticBehaviorNode(node) {
350
+ const obj = {
351
+ id: attr(node, "id"),
352
+ subcategorizationFrame: attr(node, "subcategorizationFrame")
353
+ };
354
+ return SyntacticBehavior.parse(extendWithRestAttr(node, obj, (s) => s));
355
+ }
356
+ function SynsetNode(node) {
357
+ const obj = {
358
+ id: attr(node, "id"),
359
+ ili: attr(node, "ili"),
360
+ lexfile: attr(node, "lexfile"),
361
+ members: attr(node, "members").split(" "),
362
+ dcSource: optAttr(node, "dc:source"),
363
+ partOfSpeech: PartsOfSpeech.parse(attr(node, "partOfSpeech")),
364
+ definitions: children(node, "Definition", (v) => DefinitionNode(v)),
365
+ examples: children(node, "Example", (v) => ExampleNode(v)),
366
+ iliDefinitions: children(node, "ILIDefinition", ILIDefinitionNode),
367
+ synsetRelations: children(node, "SynsetRelation", SynsetRelationNode)
368
+ };
369
+ return Synset.parse(
370
+ extendWithRestAttr(node, obj, (s) => s === "dc:source" ? "dcSource" : s)
371
+ );
372
+ }
373
+ function LexiconNode(node) {
374
+ const obj = {
375
+ id: attr(node, "id"),
376
+ label: attr(node, "label"),
377
+ language: attr(node, "language"),
378
+ email: attr(node, "email"),
379
+ license: attr(node, "license"),
380
+ version: attr(node, "version"),
381
+ citation: optAttr(node, "citation"),
382
+ url: attr(node, "url"),
383
+ lexicalEntries: children(node, "LexicalEntry", LexicalEntryNode),
384
+ synsets: children(node, "Synset", SynsetNode),
385
+ syntacticBehaviors: (
386
+ //
387
+ children(node, "SyntacticBehaviour", SyntacticBehaviorNode)
388
+ )
389
+ };
390
+ return Lexicon.parse(extendWithRestAttr(node, obj, (s) => s));
391
+ }
392
+ var decodeXmlEntities = (s) => {
393
+ if (s === void 0) return void 0;
394
+ return s.replace(/&amp;/g, "&").replace(/&lt;/g, "<").replace(/&gt;/g, ">").replace(/&apos;/g, "'").replace(/&quot;/g, '"');
395
+ };
396
+ var attr = (node, attrName) => {
397
+ const value = decodeXmlEntities(node.attributes[attrName]);
398
+ if (value === void 0) {
399
+ throw new Error(
400
+ `Missing required attribute "${attrName}" on node "${node.type}"`
401
+ );
402
+ }
403
+ return value;
404
+ };
405
+ var optAttr = (node, attrName) => {
406
+ return decodeXmlEntities(node.attributes[attrName]);
407
+ };
408
+ var restAttrs = (node, obj, proxy) => {
409
+ const result = {};
410
+ Object.keys(node.attributes).filter((a) => !(proxy(a) in obj)).forEach((k) => {
411
+ result[k] = decodeXmlEntities(node.attributes[k]) ?? node.attributes[k];
412
+ });
413
+ return result;
414
+ };
415
+ var extendWithRestAttr = (node, obj, proxy) => {
416
+ return Object.assign(obj, restAttrs(node, obj, proxy));
417
+ };
418
+ var children = (node, type, fn) => {
419
+ return node.children.filter((v) => v.type === type).map((v) => fn(v));
420
+ };
257
421
 
258
422
  // src/literals.ts
259
423
  var PartsOfSpeech2 = {
@@ -408,6 +572,7 @@ var AdjPosition2 = {
408
572
  // src/loader.ts
409
573
  var import_node_fs = require("fs");
410
574
  var import_node_path = __toESM(require("path"), 1);
575
+ var import_node_stream = require("stream");
411
576
 
412
577
  // node_modules/@dbushell/xml-streamify/src/node.ts
413
578
  var Node = class {
@@ -651,166 +816,6 @@ async function* parse(input, options) {
651
816
  return document;
652
817
  }
653
818
 
654
- // src/helpers.ts
655
- function PronunciationNode(node) {
656
- const obj = {
657
- variety: optAttr(node, "variety"),
658
- inner: node.innerText
659
- };
660
- return Pronunciation.parse(extendWithRestAttr(node, obj, (s) => s));
661
- }
662
- function LemmaNode(node) {
663
- const obj = {
664
- writtenForm: attr(node, "writtenForm"),
665
- partOfSpeech: PartsOfSpeech.parse(attr(node, "partOfSpeech")),
666
- pronunciations: (
667
- //
668
- children(node, "Pronunciation", (v) => PronunciationNode(v))
669
- )
670
- };
671
- return Lemma.parse(extendWithRestAttr(node, obj, (s) => s));
672
- }
673
- function SenseRelationNode(node) {
674
- const obj = {
675
- relType: SenseRelationRelType.parse(attr(node, "relType")),
676
- target: attr(node, "target"),
677
- dcType: optAttr(node, "dc:type")
678
- };
679
- return SenseRelation.parse(
680
- extendWithRestAttr(node, obj, (s) => s == "dc:type" ? "dcType" : s)
681
- );
682
- }
683
- function SenseNode(node) {
684
- const adjPos = optAttr(node, "adjposition");
685
- const obj = {
686
- id: attr(node, "id"),
687
- synset: SynsetId.parse(attr(node, "synset")),
688
- senseRelations: children(node, "SenseRelation", SenseRelationNode),
689
- subCat: optAttr(node, "subcat"),
690
- adjPosition: adjPos ? AdjPosition.parse(adjPos) : void 0
691
- };
692
- return Sense.parse(
693
- extendWithRestAttr(
694
- node,
695
- obj,
696
- (s) => s == "subcat" ? "subCat" : s == "adjposition" ? "adjPosition" : s
697
- )
698
- );
699
- }
700
- function FormNode(node) {
701
- const obj = {
702
- writtenForm: attr(node, "writtenForm")
703
- };
704
- return Form.parse(extendWithRestAttr(node, obj, (s) => s));
705
- }
706
- function LexicalEntryNode(node) {
707
- const obj = {
708
- id: attr(node, "id"),
709
- lemmas: children(node, "Lemma", LemmaNode),
710
- senses: children(node, "Sense", SenseNode),
711
- forms: children(node, "Form", FormNode)
712
- };
713
- return LexicalEntry.parse(extendWithRestAttr(node, obj, (s) => s));
714
- }
715
- function DefinitionNode(node) {
716
- const obj = {
717
- inner: node.innerText
718
- };
719
- return Definition.parse(extendWithRestAttr(node, obj, (s) => s));
720
- }
721
- function ExampleNode(node) {
722
- const obj = {
723
- inner: node.innerText,
724
- dcSource: optAttr(node, "dc:source")
725
- };
726
- return Example.parse(
727
- extendWithRestAttr(node, obj, (s) => s == "dc:source" ? "dcSource" : s)
728
- );
729
- }
730
- function ILIDefinitionNode(node) {
731
- const obj = {
732
- inner: node.innerText
733
- };
734
- return ILIDefinition.parse(extendWithRestAttr(node, obj, (s) => s));
735
- }
736
- function SynsetRelationNode(node) {
737
- const obj = {
738
- relType: SynsetRelationRelType.parse(attr(node, "relType")),
739
- target: attr(node, "target")
740
- };
741
- return SynsetRelation.parse(extendWithRestAttr(node, obj, (s) => s));
742
- }
743
- function SyntacticBehaviorNode(node) {
744
- const obj = {
745
- id: attr(node, "id"),
746
- subcategorizationFrame: attr(node, "subcategorizationFrame")
747
- };
748
- return SyntacticBehavior.parse(extendWithRestAttr(node, obj, (s) => s));
749
- }
750
- function SynsetNode(node) {
751
- const obj = {
752
- id: attr(node, "id"),
753
- ili: attr(node, "ili"),
754
- lexfile: attr(node, "lexfile"),
755
- members: attr(node, "members").split(" "),
756
- dcSource: optAttr(node, "dc:source"),
757
- partOfSpeech: PartsOfSpeech.parse(attr(node, "partOfSpeech")),
758
- definitions: children(node, "Definition", (v) => DefinitionNode(v)),
759
- examples: children(node, "Example", (v) => ExampleNode(v)),
760
- iliDefinitions: children(node, "ILIDefinition", ILIDefinitionNode),
761
- synsetRelations: children(node, "SynsetRelation", SynsetRelationNode)
762
- };
763
- return Synset.parse(
764
- extendWithRestAttr(node, obj, (s) => s == "dc:source" ? "dcSource" : s)
765
- );
766
- }
767
- function LexiconNode(node) {
768
- const obj = {
769
- id: attr(node, "id"),
770
- label: attr(node, "label"),
771
- language: attr(node, "language"),
772
- email: attr(node, "email"),
773
- license: attr(node, "license"),
774
- version: attr(node, "version"),
775
- citation: optAttr(node, "citation"),
776
- url: attr(node, "url"),
777
- lexicalEntries: children(node, "LexicalEntry", LexicalEntryNode),
778
- synsets: children(node, "Synset", SynsetNode),
779
- syntacticBehaviors: (
780
- //
781
- children(node, "SyntacticBehaviour", SyntacticBehaviorNode)
782
- )
783
- };
784
- return Lexicon.parse(extendWithRestAttr(node, obj, (s) => s));
785
- }
786
- var decodeXmlEntities = (s) => {
787
- if (s === void 0) return void 0;
788
- return s.replace(/&amp;/g, "&").replace(/&lt;/g, "<").replace(/&gt;/g, ">").replace(/&apos;/g, "'").replace(/&quot;/g, '"');
789
- };
790
- var attr = (node, attrName) => {
791
- const value = decodeXmlEntities(node.attributes[attrName]);
792
- if (value === void 0) {
793
- throw new Error(`Missing required attribute "${attrName}" on node "${node.type}"`);
794
- }
795
- return value;
796
- };
797
- var optAttr = (node, attrName) => {
798
- return decodeXmlEntities(node.attributes[attrName]);
799
- };
800
- var restAttrs = (node, obj, proxy) => {
801
- const result = {};
802
- Object.keys(node.attributes).filter((a) => !(proxy(a) in obj)).forEach((k) => {
803
- result[k] = decodeXmlEntities(node.attributes[k]) ?? node.attributes[k];
804
- });
805
- return result;
806
- };
807
- var extendWithRestAttr = (node, obj, proxy) => {
808
- return Object.assign(obj, restAttrs(node, obj, proxy));
809
- };
810
- var children = (node, type, fn) => {
811
- return node.children.filter((v) => v.type == type).map((v) => fn(v));
812
- };
813
-
814
819
  // src/loader.ts
815
820
  var BASE_VERSION = "2024";
816
821
  function getFilename(version) {
@@ -876,7 +881,6 @@ async function findLatestVersion(onProgress, cacheDir) {
876
881
  for (let year = baseYear + 1; year <= lastReleasableYear; year++) {
877
882
  const version = year.toString();
878
883
  if (await urlExists(getDownloadUrl(version))) {
879
- continue;
880
884
  } else {
881
885
  return (year - 1).toString();
882
886
  }
@@ -897,9 +901,13 @@ async function downloadWordNet(version, destPath) {
897
901
  const url = getDownloadUrl(version);
898
902
  const response = await fetch(url);
899
903
  if (!response.ok || !response.body) {
900
- throw new Error(`Failed to download WordNet ${version}: ${response.statusText}`);
904
+ throw new Error(
905
+ `Failed to download WordNet ${version}: ${response.statusText}`
906
+ );
901
907
  }
902
- const decompressed = response.body.pipeThrough(new DecompressionStream("gzip"));
908
+ const decompressed = response.body.pipeThrough(
909
+ new DecompressionStream("gzip")
910
+ );
903
911
  const arrayBuffer = await new Response(decompressed).arrayBuffer();
904
912
  const dir = import_node_path.default.dirname(destPath);
905
913
  if (!(0, import_node_fs.existsSync)(dir)) {
@@ -909,8 +917,9 @@ async function downloadWordNet(version, destPath) {
909
917
  }
910
918
  function createParser(filePath) {
911
919
  const resolvedPath = import_node_path.default.resolve(filePath);
912
- const fileUrl = resolvedPath.startsWith("/") ? `file://${resolvedPath}` : `file:///${resolvedPath.replace(/\\/g, "/")}`;
913
- return parse(fileUrl, {
920
+ const nodeStream = (0, import_node_fs.createReadStream)(resolvedPath);
921
+ const webStream = import_node_stream.Readable.toWeb(nodeStream);
922
+ return parse(webStream, {
914
923
  ignoreDeclaration: false,
915
924
  silent: false
916
925
  });