synset 0.9.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -163,7 +163,171 @@ var Lexicon = z.object({
163
163
  synsets: z.array(Synset).min(0),
164
164
  syntacticBehaviors: z.array(SyntacticBehavior).min(0)
165
165
  });
166
- var partsOfSpeechList = PartsOfSpeech.options.map((v) => v.value);
166
+ var partsOfSpeechList = PartsOfSpeech.options.map(
167
+ (v) => v.value
168
+ );
169
+
170
+ // src/helpers.ts
171
+ function PronunciationNode(node) {
172
+ const obj = {
173
+ variety: optAttr(node, "variety"),
174
+ inner: node.innerText
175
+ };
176
+ return Pronunciation.parse(extendWithRestAttr(node, obj, (s) => s));
177
+ }
178
+ function LemmaNode(node) {
179
+ const obj = {
180
+ writtenForm: attr(node, "writtenForm"),
181
+ partOfSpeech: PartsOfSpeech.parse(attr(node, "partOfSpeech")),
182
+ pronunciations: (
183
+ //
184
+ children(node, "Pronunciation", (v) => PronunciationNode(v))
185
+ )
186
+ };
187
+ return Lemma.parse(extendWithRestAttr(node, obj, (s) => s));
188
+ }
189
+ function SenseRelationNode(node) {
190
+ const obj = {
191
+ relType: SenseRelationRelType.parse(attr(node, "relType")),
192
+ target: attr(node, "target"),
193
+ dcType: optAttr(node, "dc:type")
194
+ };
195
+ return SenseRelation.parse(
196
+ extendWithRestAttr(node, obj, (s) => s === "dc:type" ? "dcType" : s)
197
+ );
198
+ }
199
+ function SenseNode(node) {
200
+ const adjPos = optAttr(node, "adjposition");
201
+ const obj = {
202
+ id: attr(node, "id"),
203
+ synset: SynsetId.parse(attr(node, "synset")),
204
+ senseRelations: children(node, "SenseRelation", SenseRelationNode),
205
+ subCat: optAttr(node, "subcat"),
206
+ adjPosition: adjPos ? AdjPosition.parse(adjPos) : void 0
207
+ };
208
+ return Sense.parse(
209
+ extendWithRestAttr(
210
+ node,
211
+ obj,
212
+ (s) => s === "subcat" ? "subCat" : s === "adjposition" ? "adjPosition" : s
213
+ )
214
+ );
215
+ }
216
+ function FormNode(node) {
217
+ const obj = {
218
+ writtenForm: attr(node, "writtenForm")
219
+ };
220
+ return Form.parse(extendWithRestAttr(node, obj, (s) => s));
221
+ }
222
+ function LexicalEntryNode(node) {
223
+ const obj = {
224
+ id: attr(node, "id"),
225
+ lemmas: children(node, "Lemma", LemmaNode),
226
+ senses: children(node, "Sense", SenseNode),
227
+ forms: children(node, "Form", FormNode)
228
+ };
229
+ return LexicalEntry.parse(extendWithRestAttr(node, obj, (s) => s));
230
+ }
231
+ function DefinitionNode(node) {
232
+ const obj = {
233
+ inner: node.innerText
234
+ };
235
+ return Definition.parse(extendWithRestAttr(node, obj, (s) => s));
236
+ }
237
+ function ExampleNode(node) {
238
+ const obj = {
239
+ inner: node.innerText,
240
+ dcSource: optAttr(node, "dc:source")
241
+ };
242
+ return Example.parse(
243
+ extendWithRestAttr(node, obj, (s) => s === "dc:source" ? "dcSource" : s)
244
+ );
245
+ }
246
+ function ILIDefinitionNode(node) {
247
+ const obj = {
248
+ inner: node.innerText
249
+ };
250
+ return ILIDefinition.parse(extendWithRestAttr(node, obj, (s) => s));
251
+ }
252
+ function SynsetRelationNode(node) {
253
+ const obj = {
254
+ relType: SynsetRelationRelType.parse(attr(node, "relType")),
255
+ target: attr(node, "target")
256
+ };
257
+ return SynsetRelation.parse(extendWithRestAttr(node, obj, (s) => s));
258
+ }
259
+ function SyntacticBehaviorNode(node) {
260
+ const obj = {
261
+ id: attr(node, "id"),
262
+ subcategorizationFrame: attr(node, "subcategorizationFrame")
263
+ };
264
+ return SyntacticBehavior.parse(extendWithRestAttr(node, obj, (s) => s));
265
+ }
266
+ function SynsetNode(node) {
267
+ const obj = {
268
+ id: attr(node, "id"),
269
+ ili: attr(node, "ili"),
270
+ lexfile: attr(node, "lexfile"),
271
+ members: attr(node, "members").split(" "),
272
+ dcSource: optAttr(node, "dc:source"),
273
+ partOfSpeech: PartsOfSpeech.parse(attr(node, "partOfSpeech")),
274
+ definitions: children(node, "Definition", (v) => DefinitionNode(v)),
275
+ examples: children(node, "Example", (v) => ExampleNode(v)),
276
+ iliDefinitions: children(node, "ILIDefinition", ILIDefinitionNode),
277
+ synsetRelations: children(node, "SynsetRelation", SynsetRelationNode)
278
+ };
279
+ return Synset.parse(
280
+ extendWithRestAttr(node, obj, (s) => s === "dc:source" ? "dcSource" : s)
281
+ );
282
+ }
283
+ function LexiconNode(node) {
284
+ const obj = {
285
+ id: attr(node, "id"),
286
+ label: attr(node, "label"),
287
+ language: attr(node, "language"),
288
+ email: attr(node, "email"),
289
+ license: attr(node, "license"),
290
+ version: attr(node, "version"),
291
+ citation: optAttr(node, "citation"),
292
+ url: attr(node, "url"),
293
+ lexicalEntries: children(node, "LexicalEntry", LexicalEntryNode),
294
+ synsets: children(node, "Synset", SynsetNode),
295
+ syntacticBehaviors: (
296
+ //
297
+ children(node, "SyntacticBehaviour", SyntacticBehaviorNode)
298
+ )
299
+ };
300
+ return Lexicon.parse(extendWithRestAttr(node, obj, (s) => s));
301
+ }
302
+ var decodeXmlEntities = (s) => {
303
+ if (s === void 0) return void 0;
304
+ return s.replace(/&amp;/g, "&").replace(/&lt;/g, "<").replace(/&gt;/g, ">").replace(/&apos;/g, "'").replace(/&quot;/g, '"');
305
+ };
306
+ var attr = (node, attrName) => {
307
+ const value = decodeXmlEntities(node.attributes[attrName]);
308
+ if (value === void 0) {
309
+ throw new Error(
310
+ `Missing required attribute "${attrName}" on node "${node.type}"`
311
+ );
312
+ }
313
+ return value;
314
+ };
315
+ var optAttr = (node, attrName) => {
316
+ return decodeXmlEntities(node.attributes[attrName]);
317
+ };
318
+ var restAttrs = (node, obj, proxy) => {
319
+ const result = {};
320
+ Object.keys(node.attributes).filter((a) => !(proxy(a) in obj)).forEach((k) => {
321
+ result[k] = decodeXmlEntities(node.attributes[k]) ?? node.attributes[k];
322
+ });
323
+ return result;
324
+ };
325
+ var extendWithRestAttr = (node, obj, proxy) => {
326
+ return Object.assign(obj, restAttrs(node, obj, proxy));
327
+ };
328
+ var children = (node, type, fn) => {
329
+ return node.children.filter((v) => v.type === type).map((v) => fn(v));
330
+ };
167
331
 
168
332
  // src/literals.ts
169
333
  var PartsOfSpeech2 = {
@@ -316,8 +480,16 @@ var AdjPosition2 = {
316
480
  };
317
481
 
318
482
  // src/loader.ts
319
- import { existsSync, statSync, writeFileSync, mkdirSync, readdirSync } from "fs";
483
+ import {
484
+ createReadStream,
485
+ existsSync,
486
+ mkdirSync,
487
+ readdirSync,
488
+ statSync,
489
+ writeFileSync
490
+ } from "fs";
320
491
  import path from "path";
492
+ import { Readable } from "stream";
321
493
 
322
494
  // node_modules/@dbushell/xml-streamify/src/node.ts
323
495
  var Node = class {
@@ -561,166 +733,6 @@ async function* parse(input, options) {
561
733
  return document;
562
734
  }
563
735
 
564
- // src/helpers.ts
565
- function PronunciationNode(node) {
566
- const obj = {
567
- variety: optAttr(node, "variety"),
568
- inner: node.innerText
569
- };
570
- return Pronunciation.parse(extendWithRestAttr(node, obj, (s) => s));
571
- }
572
- function LemmaNode(node) {
573
- const obj = {
574
- writtenForm: attr(node, "writtenForm"),
575
- partOfSpeech: PartsOfSpeech.parse(attr(node, "partOfSpeech")),
576
- pronunciations: (
577
- //
578
- children(node, "Pronunciation", (v) => PronunciationNode(v))
579
- )
580
- };
581
- return Lemma.parse(extendWithRestAttr(node, obj, (s) => s));
582
- }
583
- function SenseRelationNode(node) {
584
- const obj = {
585
- relType: SenseRelationRelType.parse(attr(node, "relType")),
586
- target: attr(node, "target"),
587
- dcType: optAttr(node, "dc:type")
588
- };
589
- return SenseRelation.parse(
590
- extendWithRestAttr(node, obj, (s) => s == "dc:type" ? "dcType" : s)
591
- );
592
- }
593
- function SenseNode(node) {
594
- const adjPos = optAttr(node, "adjposition");
595
- const obj = {
596
- id: attr(node, "id"),
597
- synset: SynsetId.parse(attr(node, "synset")),
598
- senseRelations: children(node, "SenseRelation", SenseRelationNode),
599
- subCat: optAttr(node, "subcat"),
600
- adjPosition: adjPos ? AdjPosition.parse(adjPos) : void 0
601
- };
602
- return Sense.parse(
603
- extendWithRestAttr(
604
- node,
605
- obj,
606
- (s) => s == "subcat" ? "subCat" : s == "adjposition" ? "adjPosition" : s
607
- )
608
- );
609
- }
610
- function FormNode(node) {
611
- const obj = {
612
- writtenForm: attr(node, "writtenForm")
613
- };
614
- return Form.parse(extendWithRestAttr(node, obj, (s) => s));
615
- }
616
- function LexicalEntryNode(node) {
617
- const obj = {
618
- id: attr(node, "id"),
619
- lemmas: children(node, "Lemma", LemmaNode),
620
- senses: children(node, "Sense", SenseNode),
621
- forms: children(node, "Form", FormNode)
622
- };
623
- return LexicalEntry.parse(extendWithRestAttr(node, obj, (s) => s));
624
- }
625
- function DefinitionNode(node) {
626
- const obj = {
627
- inner: node.innerText
628
- };
629
- return Definition.parse(extendWithRestAttr(node, obj, (s) => s));
630
- }
631
- function ExampleNode(node) {
632
- const obj = {
633
- inner: node.innerText,
634
- dcSource: optAttr(node, "dc:source")
635
- };
636
- return Example.parse(
637
- extendWithRestAttr(node, obj, (s) => s == "dc:source" ? "dcSource" : s)
638
- );
639
- }
640
- function ILIDefinitionNode(node) {
641
- const obj = {
642
- inner: node.innerText
643
- };
644
- return ILIDefinition.parse(extendWithRestAttr(node, obj, (s) => s));
645
- }
646
- function SynsetRelationNode(node) {
647
- const obj = {
648
- relType: SynsetRelationRelType.parse(attr(node, "relType")),
649
- target: attr(node, "target")
650
- };
651
- return SynsetRelation.parse(extendWithRestAttr(node, obj, (s) => s));
652
- }
653
- function SyntacticBehaviorNode(node) {
654
- const obj = {
655
- id: attr(node, "id"),
656
- subcategorizationFrame: attr(node, "subcategorizationFrame")
657
- };
658
- return SyntacticBehavior.parse(extendWithRestAttr(node, obj, (s) => s));
659
- }
660
- function SynsetNode(node) {
661
- const obj = {
662
- id: attr(node, "id"),
663
- ili: attr(node, "ili"),
664
- lexfile: attr(node, "lexfile"),
665
- members: attr(node, "members").split(" "),
666
- dcSource: optAttr(node, "dc:source"),
667
- partOfSpeech: PartsOfSpeech.parse(attr(node, "partOfSpeech")),
668
- definitions: children(node, "Definition", (v) => DefinitionNode(v)),
669
- examples: children(node, "Example", (v) => ExampleNode(v)),
670
- iliDefinitions: children(node, "ILIDefinition", ILIDefinitionNode),
671
- synsetRelations: children(node, "SynsetRelation", SynsetRelationNode)
672
- };
673
- return Synset.parse(
674
- extendWithRestAttr(node, obj, (s) => s == "dc:source" ? "dcSource" : s)
675
- );
676
- }
677
- function LexiconNode(node) {
678
- const obj = {
679
- id: attr(node, "id"),
680
- label: attr(node, "label"),
681
- language: attr(node, "language"),
682
- email: attr(node, "email"),
683
- license: attr(node, "license"),
684
- version: attr(node, "version"),
685
- citation: optAttr(node, "citation"),
686
- url: attr(node, "url"),
687
- lexicalEntries: children(node, "LexicalEntry", LexicalEntryNode),
688
- synsets: children(node, "Synset", SynsetNode),
689
- syntacticBehaviors: (
690
- //
691
- children(node, "SyntacticBehaviour", SyntacticBehaviorNode)
692
- )
693
- };
694
- return Lexicon.parse(extendWithRestAttr(node, obj, (s) => s));
695
- }
696
- var decodeXmlEntities = (s) => {
697
- if (s === void 0) return void 0;
698
- return s.replace(/&amp;/g, "&").replace(/&lt;/g, "<").replace(/&gt;/g, ">").replace(/&apos;/g, "'").replace(/&quot;/g, '"');
699
- };
700
- var attr = (node, attrName) => {
701
- const value = decodeXmlEntities(node.attributes[attrName]);
702
- if (value === void 0) {
703
- throw new Error(`Missing required attribute "${attrName}" on node "${node.type}"`);
704
- }
705
- return value;
706
- };
707
- var optAttr = (node, attrName) => {
708
- return decodeXmlEntities(node.attributes[attrName]);
709
- };
710
- var restAttrs = (node, obj, proxy) => {
711
- const result = {};
712
- Object.keys(node.attributes).filter((a) => !(proxy(a) in obj)).forEach((k) => {
713
- result[k] = decodeXmlEntities(node.attributes[k]) ?? node.attributes[k];
714
- });
715
- return result;
716
- };
717
- var extendWithRestAttr = (node, obj, proxy) => {
718
- return Object.assign(obj, restAttrs(node, obj, proxy));
719
- };
720
- var children = (node, type, fn) => {
721
- return node.children.filter((v) => v.type == type).map((v) => fn(v));
722
- };
723
-
724
736
  // src/loader.ts
725
737
  var BASE_VERSION = "2024";
726
738
  function getFilename(version) {
@@ -786,7 +798,6 @@ async function findLatestVersion(onProgress, cacheDir) {
786
798
  for (let year = baseYear + 1; year <= lastReleasableYear; year++) {
787
799
  const version = year.toString();
788
800
  if (await urlExists(getDownloadUrl(version))) {
789
- continue;
790
801
  } else {
791
802
  return (year - 1).toString();
792
803
  }
@@ -807,9 +818,13 @@ async function downloadWordNet(version, destPath) {
807
818
  const url = getDownloadUrl(version);
808
819
  const response = await fetch(url);
809
820
  if (!response.ok || !response.body) {
810
- throw new Error(`Failed to download WordNet ${version}: ${response.statusText}`);
821
+ throw new Error(
822
+ `Failed to download WordNet ${version}: ${response.statusText}`
823
+ );
811
824
  }
812
- const decompressed = response.body.pipeThrough(new DecompressionStream("gzip"));
825
+ const decompressed = response.body.pipeThrough(
826
+ new DecompressionStream("gzip")
827
+ );
813
828
  const arrayBuffer = await new Response(decompressed).arrayBuffer();
814
829
  const dir = path.dirname(destPath);
815
830
  if (!existsSync(dir)) {
@@ -819,8 +834,9 @@ async function downloadWordNet(version, destPath) {
819
834
  }
820
835
  function createParser(filePath) {
821
836
  const resolvedPath = path.resolve(filePath);
822
- const fileUrl = resolvedPath.startsWith("/") ? `file://${resolvedPath}` : `file:///${resolvedPath.replace(/\\/g, "/")}`;
823
- return parse(fileUrl, {
837
+ const nodeStream = createReadStream(resolvedPath);
838
+ const webStream = Readable.toWeb(nodeStream);
839
+ return parse(webStream, {
824
840
  ignoreDeclaration: false,
825
841
  silent: false
826
842
  });