lemma-is 0.2.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
package/dist/index.d.mts CHANGED
@@ -250,8 +250,6 @@ interface DisambiguatorOptions {
250
250
  leftWeight?: number;
251
251
  /** Weight for right context (next word) */
252
252
  rightWeight?: number;
253
- /** Enable phrase-based disambiguation */
254
- usePhraseRules?: boolean;
255
253
  /** Enable preference rules (e.g., "á" context rules) */
256
254
  usePreferenceRules?: boolean;
257
255
  /** Enable grammar rules (case government) */
@@ -281,6 +279,10 @@ interface DisambiguatedToken {
281
279
  interface MorphLemmatizerLike extends LemmatizerLike {
282
280
  lemmatizeWithMorph?(word: string): LemmaWithMorph[];
283
281
  }
282
+ interface DisambiguationContextHint {
283
+ prevLemmas?: string[];
284
+ nextLemmas?: string[];
285
+ }
284
286
  /**
285
287
  * Disambiguate lemmas using a multi-phase pipeline.
286
288
  */
@@ -289,7 +291,6 @@ declare class Disambiguator {
289
291
  bigrams: BigramProvider | null;
290
292
  leftWeight: number;
291
293
  rightWeight: number;
292
- usePhraseRules: boolean;
293
294
  usePreferenceRules: boolean;
294
295
  useGrammarRules: boolean;
295
296
  constructor(lemmatizer: LemmatizerLike, bigrams?: BigramProvider | null, options?: DisambiguatorOptions);
@@ -300,7 +301,7 @@ declare class Disambiguator {
300
301
  * @param prevWord - Previous word (left context), or null
301
302
  * @param nextWord - Next word (right context), or null
302
303
  */
303
- disambiguate(word: string, prevWord: string | null, nextWord: string | null): DisambiguatedToken;
304
+ disambiguate(word: string, prevWord: string | null, nextWord: string | null, hint?: DisambiguationContextHint): DisambiguatedToken;
304
305
  /**
305
306
  * Disambiguate an array of tokens.
306
307
  *
@@ -363,6 +364,12 @@ declare function getRulesForWord(word: string): DisambiguationRule[];
363
364
  declare function hasDisambiguationRules(word: string): boolean;
364
365
  //#endregion
365
366
  //#region src/mini-grammar.d.ts
367
+ /**
368
+ * Interface for lemmatizer used in grammar rules.
369
+ */
370
+ interface GrammarLemmatizerLike {
371
+ lemmatizeWithPOS?(word: string): LemmaWithPOS[];
372
+ }
366
373
  /**
367
374
  * Preposition case government rules.
368
375
  *
@@ -422,19 +429,41 @@ declare function applyPrepositionRule(candidates: LemmaWithMorph[], nextWordMorp
422
429
  * @returns GrammarRuleMatch if a rule applies, null otherwise
423
430
  */
424
431
  declare function applyPronounVerbRule(candidates: LemmaWithMorph[], prevWord: string | null): GrammarRuleMatch | null;
432
+ /**
433
+ * Apply noun-after-preposition rule to disambiguate.
434
+ *
435
+ * If the previous word is a preposition and the current word has a
436
+ * noun candidate with a case governed by that preposition, prefer
437
+ * the noun reading.
438
+ *
439
+ * This rule only applies when:
440
+ * - The previous word is UNAMBIGUOUSLY a preposition (no pronoun reading), OR
441
+ * - The current word has no verb candidate
442
+ *
443
+ * Example: "til fundar" → "fundar" is noun "fundur" (genitive), not verb "funda"
444
+ * Counter-example: "við fórum" → "við" is pronoun, "fórum" is verb "fara"
445
+ *
446
+ * @param candidates - All possible readings of the current word
447
+ * @param prevWord - The previous word (raw form)
448
+ * @param lemmatizer - Lemmatizer for looking up the previous word
449
+ * @returns GrammarRuleMatch if a rule applies, null otherwise
450
+ */
451
+ declare function applyNounAfterPrepositionRule(candidates: LemmaWithMorph[], prevWord: string | null, lemmatizer: GrammarLemmatizerLike | null): GrammarRuleMatch | null;
425
452
  /**
426
453
  * Apply all mini-grammar rules in sequence.
427
454
  *
428
455
  * Rules are applied in order of specificity:
429
456
  * 1. Preposition + case government (most reliable)
430
- * 2. Pronoun + verb pattern
457
+ * 2. Noun after preposition (governed case)
458
+ * 3. Pronoun + verb pattern
431
459
  *
432
460
  * @param candidates - All possible readings of the current word
433
461
  * @param prevWord - Previous word (raw form)
434
462
  * @param nextWordMorph - Morphological analyses of the next word
463
+ * @param lemmatizer - Optional lemmatizer for looking up previous word POS
435
464
  * @returns GrammarRuleMatch if any rule applies, null otherwise
436
465
  */
437
- declare function applyGrammarRules(candidates: LemmaWithMorph[], prevWord: string | null, nextWordMorph: LemmaWithMorph[]): GrammarRuleMatch | null;
466
+ declare function applyGrammarRules(candidates: LemmaWithMorph[], prevWord: string | null, nextWordMorph: LemmaWithMorph[], lemmatizer?: GrammarLemmatizerLike | null): GrammarRuleMatch | null;
438
467
  /**
439
468
  * Check if a word is a known preposition.
440
469
  */
@@ -583,6 +612,8 @@ interface ProcessedToken {
583
612
  confidence?: number;
584
613
  /** Compound split result if applicable */
585
614
  compoundSplit?: CompoundSplit;
615
+ /** Lemmas derived from compound parts (if any) */
616
+ compoundLemmas?: string[];
586
617
  }
587
618
  /**
588
619
  * Options for text processing.
@@ -673,5 +704,5 @@ declare function runBenchmark(text: string, lemmatizer: LemmatizerLike, strategy
673
704
  compoundSplitter?: CompoundSplitter;
674
705
  }): ProcessingMetrics;
675
706
  //#endregion
676
- export { type BigramProvider, type BinaryLemmatizeOptions, BinaryLemmatizer, type BinaryLemmatizerOptions, CASE_NAMES, CONTEXTUAL_STOPWORDS, type CompoundSplit, type CompoundSplitMode, CompoundSplitter, type CompoundSplitterOptions, DISAMBIGUATION_RULES, type DisambiguatedToken, type DisambiguationRule, Disambiguator, type DisambiguatorOptions, GENDER_NAMES, type GrammarRuleMatch, type GrammaticalCase, type GrammaticalGender, type GrammaticalNumber, type LemmaWithMorph, type LemmaWithPOS, type LemmatizerLike, type MorphFeatures, NOMINATIVE_PRONOUNS, NUMBER_NAMES, PREPOSITION_CASES, PROTECTED_LEMMAS, type ProcessOptions, type ProcessedToken, type ProcessingMetrics, type ProcessingStrategy, STATIC_PHRASES, STOPWORDS_IS, type StaticPhrase, WORD_CLASS_NAMES, WORD_CLASS_NAMES_IS, type WordClass, applyGrammarRules, applyPrepositionRule, applyPronounVerbRule, canGovernCase, createKnownLemmaSet, extractDisambiguatedLemmas, extractIndexableLemmas, getGovernedCases, getPhraseInfo, getRulesForWord, hasDisambiguationRules, isContextualStopword, isKnownPhrase, isKnownPreposition, isStopword, matchPhrase, processText, removeStopwords, runBenchmark };
707
+ export { type BigramProvider, type BinaryLemmatizeOptions, BinaryLemmatizer, type BinaryLemmatizerOptions, CASE_NAMES, CONTEXTUAL_STOPWORDS, type CompoundSplit, type CompoundSplitMode, CompoundSplitter, type CompoundSplitterOptions, DISAMBIGUATION_RULES, type DisambiguatedToken, type DisambiguationRule, Disambiguator, type DisambiguatorOptions, GENDER_NAMES, type GrammarLemmatizerLike, type GrammarRuleMatch, type GrammaticalCase, type GrammaticalGender, type GrammaticalNumber, type LemmaWithMorph, type LemmaWithPOS, type LemmatizerLike, type MorphFeatures, NOMINATIVE_PRONOUNS, NUMBER_NAMES, PREPOSITION_CASES, PROTECTED_LEMMAS, type ProcessOptions, type ProcessedToken, type ProcessingMetrics, type ProcessingStrategy, STATIC_PHRASES, STOPWORDS_IS, type StaticPhrase, WORD_CLASS_NAMES, WORD_CLASS_NAMES_IS, type WordClass, applyGrammarRules, applyNounAfterPrepositionRule, applyPrepositionRule, applyPronounVerbRule, canGovernCase, createKnownLemmaSet, extractDisambiguatedLemmas, extractIndexableLemmas, getGovernedCases, getPhraseInfo, getRulesForWord, hasDisambiguationRules, isContextualStopword, isKnownPhrase, isKnownPreposition, isStopword, matchPhrase, processText, removeStopwords, runBenchmark };
677
708
  //# sourceMappingURL=index.d.mts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.mts","names":[],"sources":["../src/stopwords.ts","../src/types.ts","../src/binary-lemmatizer.ts","../src/disambiguate.ts","../src/disambiguation-rules.ts","../src/mini-grammar.ts","../src/compounds.ts","../src/phrases.ts","../src/pipeline.ts"],"mappings":";;AAUA;;;;;AA4EA;;cA5Ea,YAAA,EAAY,GAAA;;;AA0FzB;iBAdgB,UAAA,CAAW,IAAA;;;;AAuD3B;;;;;AAiBA;;cA1Da,oBAAA,EAAsB,GAAA,SAAY,GAAA;;;;;;;;;;;iBAyC/B,oBAAA,CAAqB,KAAA,UAAe,GAAA;;;;iBAiBpC,eAAA,kBAAA,CAAkC,KAAA,EAAO,CAAA,KAAM,CAAA;;;;AApJ/D;;;;;AA4EA;;;;KC3EY,SAAA;ADyFZ;;;AAAA,cC1Ea,gBAAA,EAAkB,MAAA,CAAO,SAAA;;ADmHtC;;cCnGa,mBAAA,EAAqB,MAAA,CAAO,SAAA;;;ADoHzC;KCpGY,eAAA;;;;KAKA,iBAAA;;;;KAKA,iBAAA;;;;cAKC,UAAA,EAAY,MAAA,CAAO,eAAA;;;;cAUnB,YAAA,EAAc,MAAA,CAAO,iBAAA;AAzDlC;;;AAAA,cAkEa,YAAA,EAAc,MAAA,CAAO,iBAAA;;AAlDlC;;UA0DiB,aAAA;EACf,IAAA,GAAO,eAAA;EACP,MAAA,GAAS,iBAAA;EACT,MAAA,GAAS,iBAAA;AAAA;;;;UAMM,YAAA;EACf,KAAA;EACA,GAAA,EAAK,SAAA;AAAA;;AA3CP;;UAiDiB,cAAA,SAAuB,YAAA;EACtC,KAAA,GAAQ,aAAA;AAAA;AA7CV;;;;AAAA,UAoDiB,cAAA;EACf,SAAA,CAAU,IAAA;EACV,gBAAA,EAAkB,IAAA,WAAe,YAAA;AAAA;;;AAnCnC;;UA0CiB,cAAA;EACf,IAAA,CAAK,KAAA,UAAe,KAAA;AAAA;;;UCjEL,uBAAA;EACf,KAAA,UAAe,KAAA;AAAA;AAAA,UAGA,sBAAA;EACf,SAAA,GAAY,SAAA;AAAA;AAAA,cAGD,gBAAA,YAA4B,cAAA,EAAgB,cAAA;EAAA,QAC/C,MAAA;EAAA,QACA,UAAA;EAAA,QACA,YAAA;EAAA,QACA,YAAA;EAAA,QACA,WAAA;EAAA,QACA,WAAA;EAAA,QACA,YAAA;EAAA,QACA,OAAA;EAAA,QACA,eAAA;EAAA,QACA,eAAA;EAAA,QACA,eAAA;EAAA,QACA,eAAA;EAAA,QACA,WAAA;EAAA,QAEA,UAAA;EAAA,QACA,SAAA;EAAA,QACA,UAAA;EAAA,QACA,WAAA;EAAA,QACA,OAAA;EAAA,QAEA,OAAA;EAAA,QAED,WAAA,CAAA;ED3EsC;AAgB/C;;EAhB+C,OCiKhC,IAAA,CACX,GAAA,UACA,OAAA,GAAS,uBAAA,GACR,OAAA,CAAQ,gBAAA;EDpJqB;;AAgBlC;EAhBkC,OCmKzB,cAAA,CAAe,MAAA,EAAQ,WAAA,GAAc,gBAAA;;;;UAOpC,SAAA;EDrJmB;;;EAAA,QC4JnB,QAAA;EDvJE;;;EAAA,QC8JF,OAAA;ED9JmB;AAK7B;;;EAL6B,QCsKnB,QAAA;EDjKqC;AAU/C;;;;ECiLE,SAAA,CAAU,IAAA,UAAc,OAAA,GAAS,sBAAA;EDxKtB;;;;;EAAA,QCkNH,WAAA;ED1MoB;;;;ECwO5B,gBAAA,CAAiB,IAAA,WAAe,YAAA;EDrON;;;;ECqQ1B,kBAAA,CAAmB,IAAA,WAAe,cAAA;EDtQzB;;;EC4ST,gBAAA,CAAA;ED3S0B;AAM5B;;EC4SE,UAAA,CAAA;ED1Sc;;;EAAA,QCiTN,UAAA;EDjTM;;AAMhB;;ECmVE,UAAA,CAAW,KAAA,UAAe,KAAA;EDnVwB;;;;EC4VlD,IAAA,CAAK,KAAA,UAAe,KAAA;ED3VC;AAOvB;;EC2VE,OAAA,CAAQ,IAAA;EDzVqC;;;EAAA,ICgWzC,eAAA,CAAA;EDhWc;;;EAAA,ICuWd,aAAA,CAAA;EDhWW;;;EAAA,ICuWX,gBAAA,CAAA;EDtWJ;;;EAAA,IC6WI,UAAA,CAAA;ED7W6B;;;;ECqXjC,YAAA,CAAA;AAAA;;;UCxee,oBAAA;EHyHD;EGvHd,UAAA;;EAEA,WAAA;EHqH8D;EGnH9D,cAAA;EHoI6B;EGlI7B,kBAAA;EHkI8D;EGhI9D,eAAA;AAAA;AAAA,UAGe,kBAAA;EH6H8C;EG3H7D,KAAA;EH2H8D;EGzH9D,KAAA;;EAEA,GAAA,GAAM,SAAA;EF5BI;EE8BV,UAAA;;EAEA,iBAAA,GAAoB,YAAA;EFhCD;EEkCnB,SAAA;EFRD;EEUC,UAAA;EFrB6B;EEuB7B,UAAA;AAAA;;;;UAMQ,mBAAA,SAA4B,cAAA;EACpC,kBAAA,EAAoB,IAAA,WAAe,cAAA;AAAA;;;;cAgQxB,aAAA;EACX,UAAA,EAAY,mBAAA;EACZ,OAAA,EAAS,cAAA;EACT,UAAA;EACA,WAAA;EACA,cAAA;EACA,kBAAA;EACA,eAAA;cAGE,UAAA,EAAY,cAAA,EACZ,OAAA,GAAS,cAAA,SACT,OAAA,GAAS,oBAAA;EFhQgB;AAK7B;;;;;AAUA;EEmQE,YAAA,CACE,IAAA,UACA,QAAA,iBACA,QAAA,kBACC,kBAAA;;;;AF9PL;;;EE8TE,eAAA,CAAgB,MAAA,aAAmB,kBAAA;EF9Tc;AAQnD;;;;;EE0UE,aAAA,CAAc,MAAA,aAAmB,GAAA;AAAA;;;;iBAenB,0BAAA,CACd,IAAA,UACA,UAAA,EAAY,cAAA,EACZ,OAAA,EAAS,cAAA,EACT,OAAA;EACE,QAAA,IAAY,IAAA;EACZ,eAAA;AAAA,IAED,GAAA;;;;;;;AHjWH;;UIrFiB,kBAAA;EJqFkB;EInFjC,IAAA;EJ4Hc;EI1Hd,MAAA,EAAQ,SAAA;;EAER,IAAA,EAAM,SAAA;EJwHwD;EItH9D,OAAA;EJuI6B;EIrI7B,WAAA;AAAA;;;;;;;;;;cAYW,oBAAA,EAAsB,kBAAA;;;;iBA6InB,eAAA,CAAgB,IAAA,WAAe,kBAAA;AHxJ/C;;;AAAA,iBGgKgB,sBAAA,CAAuB,IAAA;;;AJtFvC;;;;;AAyCA;;;;AAzCA,cK1Ea,iBAAA,EAAmB,GAAA,SAAY,GAAA,CAAI,eAAA;ALoIhD;;;;;AAAA,cKrFa,mBAAA,EAAmB,GAAA;;;;UAgBf,gBAAA;;EAEf,KAAA;;EAEA,GAAA,EAAK,SAAA;EJlFc;EIoFnB,IAAA;EJpFmB;EIsFnB,UAAA;AAAA;;;;;AJvDF;;;iBIiEgB,aAAA,CACd,SAAA,UACA,YAAA,EAAc,eAAA;;AJnDhB;;;;;AAKA;;;;iBI+DgB,oBAAA,CACd,UAAA,EAAY,cAAA,IACZ,aAAA,EAAe,cAAA,KACd,gBAAA;AJ7DH;;;;;AAKA;;;;;AALA,iBI6FgB,oBAAA,CACd,UAAA,EAAY,cAAA,IACZ,QAAA,kBACC,gBAAA;;;;;AJxEH;;;;;AAQA;;;iBIsGgB,iBAAA,CACd,UAAA,EAAY,cAAA,IACZ,QAAA,iBACA,aAAA,EAAe,cAAA,KACd,gBAAA;;;;iBAea,kBAAA,CAAmB,KAAA;;;;iBAOnB,gBAAA,CAAiB,SAAA,WAAoB,GAAA,CAAI,eAAA;;;;;ALvFzD;;cMzHa,gBAAA,EAAgB,GAAA;AAAA,UAoFZ,aAAA;ENqC+C;EMnC9D,IAAA;ENoD6B;EMlD7B,KAAA;ENkD8D;EMhD9D,UAAA;ENgDuD;EM9CvD,UAAA;EN8C6D;EM5C7D,UAAA;AAAA;;;;ALvGF;;;;KKiHY,iBAAA;AAAA,UAEK,uBAAA;ELzFhB;;;;EK8FC,aAAA;EL9ED;EKgFC,iBAAA;EL3FgC;;AAgBlC;;EKgFE,IAAA,GAAO,iBAAA;AAAA;AAAA,cA8EI,gBAAA;EAAA,QACH,UAAA;EAAA,QACA,aAAA;EAAA,QACA,iBAAA;EAAA,QACA,WAAA;EAAA,QACA,IAAA;cAGN,UAAA,EAAY,cAAA,EACZ,WAAA,EAAa,GAAA,UACb,OAAA,GAAS,uBAAA;EL9JgB;;;EAAA,QK0KnB,OAAA;ELrKG;;;;;AAUb;;;EK6KE,KAAA,CAAM,IAAA,WAAe,aAAA;EL7K4B;AASnD;;EATmD,QK0RzC,aAAA;EAAA,QAwBA,QAAA;ELzSyC;AAQnD;;;EKqWE,YAAA,CAAa,IAAA;AAAA;;;;;iBAUC,mBAAA,CAAoB,MAAA,aAAmB,GAAA;;;;ANzcvD;;;;;AA4EA;;;;;AAcA;UOvFiB,YAAA;;EAEf,KAAA;EPqFgD;EOnFhD,UAAA;EP4HkC;EO1HlC,GAAA;AAAA;;AP2IF;;;cOpIa,cAAA,EAAgB,GAAA,SAAY,YAAA;;;;;iBA6GzB,WAAA,CACd,KAAA,YACA,UAAA;EACG,MAAA,EAAQ,YAAA;EAAc,SAAA;AAAA;;AN/H3B;;iBM+IgB,aAAA,CAAc,IAAA;;;ANhI9B;iBMuIgB,aAAA,CAAc,IAAA,WAAe,YAAA;;;;;;UC5H5B,cAAA;ER4Ff;EQ1FA,QAAA;ER6DiC;EQ3DjC,IAAA;ERoGc;EQlGd,MAAA;;EAEA,QAAA;ERgG8D;EQ9F9D,aAAA;ER+G6B;EQ7G7B,UAAA;ER6G8D;EQ3G9D,aAAA,GAAgB,aAAA;AAAA;;;;UAMD,cAAA;;EAEf,OAAA,GAAU,cAAA;;EAEV,gBAAA,GAAmB,gBAAA;EPlDA;EOoDnB,eAAA;EPpDmB;;AAerB;;;;EO4CE,sBAAA;EP5BW;EO8BX,cAAA;;;;APdF;;;EOqBE,kBAAA;EPrByB;AAK3B;;;;;EOuBE,kBAAA;AAAA;;;;APbF;;;;;iBOwBgB,WAAA,CACd,IAAA,UACA,UAAA,EAAY,cAAA,EACZ,OAAA,GAAS,cAAA,GACR,cAAA;;;;;APTH;;;;iBOwIgB,sBAAA,CACd,IAAA,UACA,UAAA,EAAY,cAAA,EACZ,OAAA,GAAS,cAAA,GACR,GAAA;APpIH;;;AAAA,KOqMY,kBAAA;;;;UAKK,iBAAA;EPzMf;EO2MA,SAAA;EP1MA;EO4MA,eAAA;EP3MA;EO6MA,QAAA;EP7M0B;EO+M1B,cAAA;EPzMe;EO2Mf,aAAA;;EAEA,aAAA;EP5MA;EO8MA,cAAA;EP7MK;EO+ML,eAAA;EP/Mc;EOiNd,YAAA;EP3M8B;EO6M9B,MAAA;AAAA;;;;iBAMc,YAAA,CACd,IAAA,UACA,UAAA,EAAY,cAAA,EACZ,QAAA,EAAU,kBAAA,EACV,SAAA;EACE,OAAA,GAAU,cAAA;EACV,gBAAA,GAAmB,gBAAA;AAAA,IAEpB,iBAAA"}
1
+ {"version":3,"file":"index.d.mts","names":[],"sources":["../src/stopwords.ts","../src/types.ts","../src/binary-lemmatizer.ts","../src/disambiguate.ts","../src/disambiguation-rules.ts","../src/mini-grammar.ts","../src/compounds.ts","../src/phrases.ts","../src/pipeline.ts"],"mappings":";;AAUA;;;;;AA4EA;;cA5Ea,YAAA,EAAY,GAAA;;;AA0FzB;iBAdgB,UAAA,CAAW,IAAA;;;;AAuD3B;;;;;AAiBA;;cA1Da,oBAAA,EAAsB,GAAA,SAAY,GAAA;;;;;;;;;;;iBAyC/B,oBAAA,CAAqB,KAAA,UAAe,GAAA;;;;iBAiBpC,eAAA,kBAAA,CAAkC,KAAA,EAAO,CAAA,KAAM,CAAA;;;;AApJ/D;;;;;AA4EA;;;;KC3EY,SAAA;ADyFZ;;;AAAA,cC1Ea,gBAAA,EAAkB,MAAA,CAAO,SAAA;;ADmHtC;;cCnGa,mBAAA,EAAqB,MAAA,CAAO,SAAA;;;ADoHzC;KCpGY,eAAA;;;;KAKA,iBAAA;;;;KAKA,iBAAA;;;;cAKC,UAAA,EAAY,MAAA,CAAO,eAAA;;;;cAUnB,YAAA,EAAc,MAAA,CAAO,iBAAA;AAzDlC;;;AAAA,cAkEa,YAAA,EAAc,MAAA,CAAO,iBAAA;;AAlDlC;;UA0DiB,aAAA;EACf,IAAA,GAAO,eAAA;EACP,MAAA,GAAS,iBAAA;EACT,MAAA,GAAS,iBAAA;AAAA;;;;UAMM,YAAA;EACf,KAAA;EACA,GAAA,EAAK,SAAA;AAAA;;AA3CP;;UAiDiB,cAAA,SAAuB,YAAA;EACtC,KAAA,GAAQ,aAAA;AAAA;AA7CV;;;;AAAA,UAoDiB,cAAA;EACf,SAAA,CAAU,IAAA;EACV,gBAAA,EAAkB,IAAA,WAAe,YAAA;AAAA;;;AAnCnC;;UA0CiB,cAAA;EACf,IAAA,CAAK,KAAA,UAAe,KAAA;AAAA;;;UCjEL,uBAAA;EACf,KAAA,UAAe,KAAA;AAAA;AAAA,UAGA,sBAAA;EACf,SAAA,GAAY,SAAA;AAAA;AAAA,cAGD,gBAAA,YAA4B,cAAA,EAAgB,cAAA;EAAA,QAC/C,MAAA;EAAA,QACA,UAAA;EAAA,QACA,YAAA;EAAA,QACA,YAAA;EAAA,QACA,WAAA;EAAA,QACA,WAAA;EAAA,QACA,YAAA;EAAA,QACA,OAAA;EAAA,QACA,eAAA;EAAA,QACA,eAAA;EAAA,QACA,eAAA;EAAA,QACA,eAAA;EAAA,QACA,WAAA;EAAA,QAEA,UAAA;EAAA,QACA,SAAA;EAAA,QACA,UAAA;EAAA,QACA,WAAA;EAAA,QACA,OAAA;EAAA,QAEA,OAAA;EAAA,QAED,WAAA,CAAA;ED3EsC;AAgB/C;;EAhB+C,OCiKhC,IAAA,CACX,GAAA,UACA,OAAA,GAAS,uBAAA,GACR,OAAA,CAAQ,gBAAA;EDpJqB;;AAgBlC;EAhBkC,OCmKzB,cAAA,CAAe,MAAA,EAAQ,WAAA,GAAc,gBAAA;;;;UAOpC,SAAA;EDrJmB;;;EAAA,QC4JnB,QAAA;EDvJE;;;EAAA,QC8JF,OAAA;ED9JmB;AAK7B;;;EAL6B,QCsKnB,QAAA;EDjKqC;AAU/C;;;;ECiLE,SAAA,CAAU,IAAA,UAAc,OAAA,GAAS,sBAAA;EDxKtB;;;;;EAAA,QCkNH,WAAA;ED1MoB;;;;ECwO5B,gBAAA,CAAiB,IAAA,WAAe,YAAA;EDrON;;;;ECqQ1B,kBAAA,CAAmB,IAAA,WAAe,cAAA;EDtQzB;;;EC4ST,gBAAA,CAAA;ED3S0B;AAM5B;;EC4SE,UAAA,CAAA;ED1Sc;;;EAAA,QCiTN,UAAA;EDjTM;;AAMhB;;ECmVE,UAAA,CAAW,KAAA,UAAe,KAAA;EDnVwB;;;;EC4VlD,IAAA,CAAK,KAAA,UAAe,KAAA;ED3VC;AAOvB;;EC2VE,OAAA,CAAQ,IAAA;EDzVqC;;;EAAA,ICgWzC,eAAA,CAAA;EDhWc;;;EAAA,ICuWd,aAAA,CAAA;EDhWW;;;EAAA,ICuWX,gBAAA,CAAA;EDtWJ;;;EAAA,IC6WI,UAAA,CAAA;ED7W6B;;;;ECqXjC,YAAA,CAAA;AAAA;;;UCxee,oBAAA;EHyHD;EGvHd,UAAA;;EAEA,WAAA;EHqH8D;EGnH9D,kBAAA;EHoI6B;EGlI7B,eAAA;AAAA;AAAA,UAGe,kBAAA;EH+HwC;EG7HvD,KAAA;EH6H6D;EG3H7D,KAAA;EH2H8D;EGzH9D,GAAA,GAAM,SAAA;;EAEN,UAAA;EF5BU;EE8BV,iBAAA,GAAoB,YAAA;;EAEpB,SAAA;EFhCmB;EEkCnB,UAAA;EFRD;EEUC,UAAA;AAAA;;AFLF;;UEWU,mBAAA,SAA4B,cAAA;EACpC,kBAAA,EAAoB,IAAA,WAAe,cAAA;AAAA;AAAA,UAuBpB,yBAAA;EACf,UAAA;EACA,UAAA;AAAA;;AFhBF;;cE6Pa,aAAA;EACX,UAAA,EAAY,mBAAA;EACZ,OAAA,EAAS,cAAA;EACT,UAAA;EACA,WAAA;EACA,kBAAA;EACA,eAAA;cAGE,UAAA,EAAY,cAAA,EACZ,OAAA,GAAS,cAAA,SACT,OAAA,GAAS,oBAAA;EF9PA;;;;;AAUb;;EEqQE,YAAA,CACE,IAAA,UACA,QAAA,iBACA,QAAA,iBACA,IAAA,GAAM,yBAAA,GACL,kBAAA;EF1QsB;;AAS3B;;;;EEmUE,eAAA,CAAgB,MAAA,aAAmB,kBAAA;EF3TpB;;;;;;EE+Uf,aAAA,CAAc,MAAA,aAAmB,GAAA;AAAA;;;;iBAenB,0BAAA,CACd,IAAA,UACA,UAAA,EAAY,cAAA,EACZ,OAAA,EAAS,cAAA,EACT,OAAA;EACE,QAAA,IAAY,IAAA;EACZ,eAAA;AAAA,IAED,GAAA;;;;;;;AHtWH;;UIrFiB,kBAAA;EJqFkB;EInFjC,IAAA;EJ4Hc;EI1Hd,MAAA,EAAQ,SAAA;;EAER,IAAA,EAAM,SAAA;EJwHwD;EItH9D,OAAA;EJuI6B;EIrI7B,WAAA;AAAA;;;;;;;;;;cAYW,oBAAA,EAAsB,kBAAA;;;;iBA6InB,eAAA,CAAgB,IAAA,WAAe,kBAAA;AHxJ/C;;;AAAA,iBGgKgB,sBAAA,CAAuB,IAAA;;;AJtFvC;;;AAAA,UK/EiB,qBAAA;EACf,gBAAA,EAAkB,IAAA,WAAe,YAAA;AAAA;;;;;ALwInC;;;;;cK5Ha,iBAAA,EAAmB,GAAA,SAAY,GAAA,CAAI,eAAA;;;;;;cA+CnC,mBAAA,EAAmB,GAAA;;AJtEhC;;UIsFiB,gBAAA;EJtFI;EIwFnB,KAAA;EJzEW;EI2EX,GAAA,EAAK,SAAA;;EAEL,IAAA;EJ7E6C;EI+E7C,UAAA;AAAA;;;;AJ/CF;;;;iBIyDgB,aAAA,CACd,SAAA,UACA,YAAA,EAAc,eAAA;AJtDhB;;;;;AAKA;;;;;AALA,iBIuEgB,oBAAA,CACd,UAAA,EAAY,cAAA,IACZ,aAAA,EAAe,cAAA,KACd,gBAAA;;;;;AJtDH;;;;;AASA;iBI6EgB,oBAAA,CACd,UAAA,EAAY,cAAA,IACZ,QAAA,kBACC,gBAAA;;;;AJxEH;;;;;;;;;;;;;;;;iBIqHgB,6BAAA,CACd,UAAA,EAAY,cAAA,IACZ,QAAA,iBACA,UAAA,EAAY,qBAAA,UACX,gBAAA;;;;;;;;;AJxGH;;;;;;iBI8JgB,iBAAA,CACd,UAAA,EAAY,cAAA,IACZ,QAAA,iBACA,aAAA,EAAe,cAAA,IACf,UAAA,GAAY,qBAAA,UACX,gBAAA;;;AJ3JH;iBI8KgB,kBAAA,CAAmB,KAAA;;;;iBAOnB,gBAAA,CAAiB,SAAA,WAAoB,GAAA,CAAI,eAAA;;;;;ALrKzD;;cMzHa,gBAAA,EAAgB,GAAA;AAAA,UAoFZ,aAAA;ENqC+C;EMnC9D,IAAA;ENoD6B;EMlD7B,KAAA;ENkD8D;EMhD9D,UAAA;ENgDuD;EM9CvD,UAAA;EN8C6D;EM5C7D,UAAA;AAAA;;;;ALvGF;;;;KKiHY,iBAAA;AAAA,UAEK,uBAAA;ELzFhB;;;;EK8FC,aAAA;EL9ED;EKgFC,iBAAA;EL3FgC;;AAgBlC;;EKgFE,IAAA,GAAO,iBAAA;AAAA;AAAA,cA8EI,gBAAA;EAAA,QACH,UAAA;EAAA,QACA,aAAA;EAAA,QACA,iBAAA;EAAA,QACA,WAAA;EAAA,QACA,IAAA;cAGN,UAAA,EAAY,cAAA,EACZ,WAAA,EAAa,GAAA,UACb,OAAA,GAAS,uBAAA;EL9JgB;;;EAAA,QK0KnB,OAAA;ELrKG;;;;;AAUb;;;EK6KE,KAAA,CAAM,IAAA,WAAe,aAAA;EL7K4B;AASnD;;EATmD,QK0RzC,aAAA;EAAA,QAwBA,QAAA;ELzSyC;AAQnD;;;EKqWE,YAAA,CAAa,IAAA;AAAA;;;;;iBAUC,mBAAA,CAAoB,MAAA,aAAmB,GAAA;;;;ANzcvD;;;;;AA4EA;;;;;AAcA;UOvFiB,YAAA;;EAEf,KAAA;EPqFgD;EOnFhD,UAAA;EP4HkC;EO1HlC,GAAA;AAAA;;AP2IF;;;cOpIa,cAAA,EAAgB,GAAA,SAAY,YAAA;;;;;iBA6GzB,WAAA,CACd,KAAA,YACA,UAAA;EACG,MAAA,EAAQ,YAAA;EAAc,SAAA;AAAA;;AN/H3B;;iBM+IgB,aAAA,CAAc,IAAA;;;ANhI9B;iBMuIgB,aAAA,CAAc,IAAA,WAAe,YAAA;;;;;;UC5H5B,cAAA;ER4Ff;EQ1FA,QAAA;ER6DiC;EQ3DjC,IAAA;ERoGc;EQlGd,MAAA;;EAEA,QAAA;ERgG8D;EQ9F9D,aAAA;ER+G6B;EQ7G7B,UAAA;ER6G8D;EQ3G9D,aAAA,GAAgB,aAAA;ER2GuC;EQzGvD,cAAA;AAAA;;;;UAMe,cAAA;;EAEf,OAAA,GAAU,cAAA;EPlDS;EOoDnB,gBAAA,GAAmB,gBAAA;EPpDA;EOsDnB,eAAA;EPvCW;;;;;AAgBb;EO8BE,sBAAA;;EAEA,cAAA;EPhCgD;AAgBlD;;;;;EOuBE,kBAAA;EPlB2B;;;;AAK7B;;EOoBE,kBAAA;AAAA;;APfF;;;;;AAUA;;iBOgBgB,WAAA,CACd,IAAA,UACA,UAAA,EAAY,cAAA,EACZ,OAAA,GAAS,cAAA,GACR,cAAA;;;APXH;;;;;AAQA;iBOiJgB,sBAAA,CACd,IAAA,UACA,UAAA,EAAY,cAAA,EACZ,OAAA,GAAS,cAAA,GACR,GAAA;;;;KAiES,kBAAA;;;;UAKK,iBAAA;EPzNf;EO2NA,SAAA;EP1NA;EO4NA,eAAA;EP5N0B;EO8N1B,QAAA;EPxNe;EO0Nf,cAAA;;EAEA,aAAA;EP3NA;EO6NA,aAAA;EP5NK;EO8NL,cAAA;EP9Nc;EOgOd,eAAA;EP1N8B;EO4N9B,YAAA;EP5NkD;EO8NlD,MAAA;AAAA;;;;iBAMc,YAAA,CACd,IAAA,UACA,UAAA,EAAY,cAAA,EACZ,QAAA,EAAU,kBAAA,EACV,SAAA;EACE,OAAA,GAAU,cAAA;EACV,gBAAA,GAAmB,gBAAA;AAAA,IAEpB,iBAAA"}
package/dist/index.mjs CHANGED
@@ -1,2 +1,2 @@
1
- import{tokenize as e}from"tokenize-is";const t=new Set(`á.að.aðra.aðrar.aðrir.af.alla.allan.allar.allir.allnokkra.allnokkrar.allnokkrir.allnokkru.allnokkrum.allnokkuð.allnokkur.allnokkurn.allnokkurra.allnokkurrar.allnokkurri.allnokkurs.allnokkurt.allra.allrar.allri.alls.allt.alltað.allur.án.andspænis.annað.annaðhvort.annan.annar.annarra.annarrar.annarri.annars.árla.ásamt.auk.austan.austanundir.austur.báða.báðar.báðir.báðum.bæði.bak.beggja.eða.eður.ef.eftir.ég.ein.eina.einar.einhver.einhverja.einhverjar.einhverjir.einhverju.einhverjum.einhvern.einhverra.einhverrar.einhverri.einhvers.einir.einn.einna.einnar.einni.eins.einskis.einu.einum.eitt.eitthvað.eitthvert.ekkert.ella.ellegar.en.enda.enga.engan.engar.engin.enginn.engir.engra.engrar.engri.engu.engum.er.fáein.fáeina.fáeinar.fáeinir.fáeinna.fáeinum.fjær.fjarri.flestalla.flestallan.flestallar.flestallir.flestallra.flestallrar.flestallri.flestalls.flestallt.flestallur.flestöll.flestöllu.flestöllum.frá.fram.fyrir.fyrst.gagnstætt.gagnvart.gegn.gegnt.gegnum.hana.handa.handan.hann.hans.heldur.hennar.henni.hið.hin.hina.hinar.hinir.hinn.hinna.hinnar.hinni.hins.hinu.hinum.hitt.hjá.honum.hún.hvað.hvaða.hvenær.hver.hverja.hverjar.hverjir.hverju.hverjum.hvern.hverra.hverrar.hverri.hvers.hvert.hvílík.hvílíka.hvílíkan.hvílíkar.hvílíkir.hvílíkra.hvílíkrar.hvílíkri.hvílíks.hvílíkt.hvílíku.hvílíkum.hvílíkur.hvor.hvora.hvorar.hvorir.hvorki.hvorn.hvorra.hvorrar.hvorri.hvors.hvort.hvoru.hvorug.hvoruga.hvorugan.hvorugar.hvorugir.hvorugra.hvorugrar.hvorugri.hvorugs.hvorugt.hvorugu.hvorugum.hvorugur.hvorum.í.inn.innan.innanundir.jafnframt.jafnhliða.kring.kringum.með.meðal.meðan.meður.mér.mestalla.mestallan.mestallar.mestallir.mestallra.mestallrar.mestallri.mestalls.mestallt.mestallur.mestöll.mestöllu.mestöllum.miðli.mig.milli.millum.mín.mína.mínar.mínir.minn.minna.minnar.minni.míns.mínu.mínum.mitt.mót.móti.nær.nærri.næst.næstum.nálægt.né.neðan.nein.neina.neinar.neinir.neinn.neinna.neinnar.neinni.neins.neinu.neinum.neitt.nema.niður.nokkra.nokkrar.nokkrir.nokkru.nokkrum.nokkuð.nokkur.nokkurn.nokkurra.nokkurrar.nokkurri.nokkurs.nokkurt.norðan.nú.öðru.öðrum.of.ofan.ofar.og.óháð.okkar.okkur.öll.öllu.öllum.önnur.órafjarri.oss.sá.sakir.sama.saman.samar.samfara.samhliða.sami.samir.samkvæmt.samra.samrar.samri.sams.samskipa.samt.samtímis.samur.sem.sér.sérhvað.sérhver.sérhverja.sérhverjar.sérhverjir.sérhverju.sérhverjum.sérhvern.sérhverra.sérhverrar.sérhverri.sérhvers.sérhvert.síðan.síðla.sig.sín.sína.sínar.sínhver.sínhverja.sínhverjar.sínhverjir.sínhverju.sínhverjum.sínhvern.sínhverra.sínhverrar.sínhverri.sínhvers.sínhvert.sínhvor.sínhvora.sínhvorar.sínhvorir.sínhvorn.sínhvorra.sínhvorrar.sínhvorri.sínhvors.sínhvort.sínhvoru.sínhvorum.sínir.sinn.sinna.sinnar.sinnhver.sinnhverja.sinnhverjar.sinnhverjir.sinnhverju.sinnhverjum.sinnhvern.sinnhverra.sinnhverrar.sinnhverri.sinnhvers.sinnhvert.sinnhvor.sinnhvora.sinnhvorar.sinnhvorir.sinnhvorn.sinnhvorra.sinnhvorrar.sinnhvorri.sinnhvors.sinnhvort.sinnhvoru.sinnhvorum.sinni.síns.sínu.sínum.sitt.sitthvað.sitthver.sitthverja.sitthverjar.sitthverjir.sitthverju.sitthverjum.sitthvern.sitthverra.sitthverrar.sitthverri.sitthvers.sitthvert.sitthvor.sitthvora.sitthvorar.sitthvorir.sitthvorn.sitthvorra.sitthvorrar.sitthvorri.sitthvors.sitthvort.sitthvoru.sitthvorum.sjálf.sjálfa.sjálfan.sjálfar.sjálfir.sjálfra.sjálfrar.sjálfri.sjálfs.sjálft.sjálfu.sjálfum.sjálfur.slík.slíka.slíkan.slíkar.slíkir.slíkra.slíkrar.slíkri.slíks.slíkt.slíku.slíkum.slíkur.snemma.sökum.söm.sömu.sömum.sú.sum.suma.suman.sumar.sumir.sumra.sumrar.sumri.sums.sumt.sumu.sumum.sumur.sunnan.svo.til.tráss.um.umfram.umhverfis.undan.undir.uns.upp.úr.út.utan.útundan.vegna.vér.vestan.vestur.vettugi.við.viður.vor.vora.vorar.vorir.vorn.vorra.vorrar.vorri.vors.vort.voru.vorum.yðar.yður.yfir.ykkar.ykkur.ýmis.ýmiss.ýmissa.ýmissar.ýmissi.ýmist.ýmsa.ýmsan.ýmsar.ýmsir.ýmsu.ýmsum.þá.það.þær.þann.þar.þau.þegar.þeim.þeir.þeirra.þeirrar.þeirri.þennan.þér.þess.þessa.þessar.þessara.þessarar.þessari.þessi.þessir.þessu.þessum.þetta.þið.þig.þín.þína.þínar.þínir.þinn.þinna.þinnar.þinni.þíns.þínu.þínum.þitt.þó.þónokkra.þónokkrar.þónokkrir.þónokkru.þónokkrum.þónokkuð.þónokkur.þónokkurn.þónokkurra.þónokkurrar.þónokkurri.þónokkurs.þónokkurt.þótt.þú.því.þvílík.þvílíka.þvílíkan.þvílíkar.þvílíkir.þvílíkra.þvílíkrar.þvílíkri.þvílíks.þvílíkt.þvílíku.þvílíkum.þvílíkur`.split(`.`));function n(e){return t.has(e.toLowerCase())}const r=new Map([[`á`,new Set([`fs`,`ao`])],[`við`,new Set([`fs`,`fn`])],[`af`,new Set([`fs`,`ao`])],[`til`,new Set([`fs`])],[`um`,new Set([`fs`])],[`frá`,new Set([`fs`])],[`yfir`,new Set([`fs`,`ao`])],[`undir`,new Set([`fs`,`ao`])],[`fyrir`,new Set([`fs`,`ao`])],[`eftir`,new Set([`fs`,`ao`])],[`gegn`,new Set([`fs`])],[`hjá`,new Set([`fs`])],[`úr`,new Set([`fs`])],[`í`,new Set([`fs`])]]);function i(e,n){let i=e.toLowerCase(),a=r.get(i);return a&&n?a.has(n):t.has(i)}function a(e){return e.filter(e=>!n(e))}const o=1279610177,s=[`no`,`so`,`lo`,`ao`,`fs`,`fn`,`st`,`to`,`gr`,`uh`],c=[void 0,`nf`,`þf`,`þgf`,`ef`],l=[void 0,`kk`,`kvk`,`hk`],u=[`et`,`ft`];var d=class e{buffer;stringPool;lemmaOffsets;lemmaLengths;wordOffsets;wordLengths;entryOffsets;entries;bigramW1Offsets;bigramW1Lengths;bigramW2Offsets;bigramW2Lengths;bigramFreqs;lemmaCount;wordCount;entryCount;bigramCount;version;decoder=new TextDecoder(`utf-8`);constructor(e){this.buffer=e;let t=new DataView(e),n=t.getUint32(0,!0);if(n!==o)throw Error(`Invalid binary format: expected magic 0x${o.toString(16)}, got 0x${n.toString(16)}`);if(this.version=t.getUint32(4,!0),this.version!==1&&this.version!==2)throw Error(`Unsupported version: ${this.version}`);let r=t.getUint32(8,!0);this.lemmaCount=t.getUint32(12,!0),this.wordCount=t.getUint32(16,!0),this.entryCount=t.getUint32(20,!0),this.bigramCount=t.getUint32(24,!0);let i=32;this.stringPool=new Uint8Array(e,i,r),i+=r,this.lemmaOffsets=new Uint32Array(e,i,this.lemmaCount),i+=this.lemmaCount*4,this.lemmaLengths=new Uint8Array(e,i,this.lemmaCount),i+=this.lemmaCount,i=i+3&-4,this.wordOffsets=new Uint32Array(e,i,this.wordCount),i+=this.wordCount*4,this.wordLengths=new Uint8Array(e,i,this.wordCount),i+=this.wordCount,i=i+3&-4,this.entryOffsets=new Uint32Array(e,i,this.wordCount+1),i+=(this.wordCount+1)*4,this.entries=new Uint32Array(e,i,this.entryCount),i+=this.entryCount*4,this.bigramW1Offsets=new Uint32Array(e,i,this.bigramCount),i+=this.bigramCount*4,this.bigramW1Lengths=new Uint8Array(e,i,this.bigramCount),i+=this.bigramCount,i=i+3&-4,this.bigramW2Offsets=new Uint32Array(e,i,this.bigramCount),i+=this.bigramCount*4,this.bigramW2Lengths=new Uint8Array(e,i,this.bigramCount),i+=this.bigramCount,i=i+3&-4,this.bigramFreqs=new Uint32Array(e,i,this.bigramCount)}static async load(t,n={}){let r=await(n.fetch??fetch)(t);if(!r.ok)throw Error(`Failed to load binary data: ${r.status}`);return new e(await r.arrayBuffer())}static loadFromBuffer(t){return new e(t)}getString(e,t){return this.decoder.decode(this.stringPool.subarray(e,e+t))}getLemma(e){return this.getString(this.lemmaOffsets[e],this.lemmaLengths[e])}getWord(e){return this.getString(this.wordOffsets[e],this.wordLengths[e])}findWord(e){let t=0,n=this.wordCount-1;for(;t<=n;){let r=t+n>>>1,i=this.getWord(r);if(i===e)return r;i<e?t=r+1:n=r-1}return-1}lemmatize(e,t={}){let n=e.toLowerCase(),r=this.findWord(n);if(r===-1)return[n];let i=this.entryOffsets[r],a=this.entryOffsets[r+1],{wordClass:o}=t,c=new Set,l=[];for(let e=i;e<a;e++){let{lemmaIdx:t,posCode:n}=this.unpackEntry(this.entries[e]),r=s[n];if(o&&r!==o)continue;let i=this.getLemma(t);c.has(i)||(c.add(i),l.push(i))}return l.length===0?[n]:l}unpackEntry(e){return this.version===1?{lemmaIdx:e>>>4,posCode:e&15,caseCode:0,genderCode:0,numberCode:0}:{lemmaIdx:e>>>10,posCode:e&15,caseCode:e>>>4&7,genderCode:e>>>7&3,numberCode:e>>>9&1}}lemmatizeWithPOS(e){let t=e.toLowerCase(),n=this.findWord(t);if(n===-1)return[];let r=this.entryOffsets[n],i=this.entryOffsets[n+1],a=new Set,o=[];for(let e=r;e<i;e++){let{lemmaIdx:t,posCode:n}=this.unpackEntry(this.entries[e]),r=this.getLemma(t),i=s[n]??``,c=`${r}:${i}`;a.has(c)||(a.add(c),o.push({lemma:r,pos:i}))}return o}lemmatizeWithMorph(e){let t=e.toLowerCase(),n=this.findWord(t);if(n===-1)return[];let r=this.entryOffsets[n],i=this.entryOffsets[n+1],a=[];for(let e=r;e<i;e++){let{lemmaIdx:t,posCode:n,caseCode:r,genderCode:i,numberCode:o}=this.unpackEntry(this.entries[e]),d={},f=c[r],p=l[i],m=u[o];f&&(d.case=f),p&&(d.gender=p),m&&(d.number=m),a.push({lemma:this.getLemma(t),pos:s[n]??``,morph:Object.keys(d).length>0?d:void 0})}return a}hasMorphFeatures(){return this.version>=2}getVersion(){return this.version}findBigram(e,t){let n=0,r=this.bigramCount-1;for(;n<=r;){let i=n+r>>>1,a=this.getString(this.bigramW1Offsets[i],this.bigramW1Lengths[i]);if(a<e)n=i+1;else if(a>e)r=i-1;else{let e=this.getString(this.bigramW2Offsets[i],this.bigramW2Lengths[i]);if(e===t)return i;e<t?n=i+1:r=i-1}}return-1}bigramFreq(e,t){let n=this.findBigram(e.toLowerCase(),t.toLowerCase());return n===-1?0:this.bigramFreqs[n]}freq(e,t){return this.bigramFreq(e,t)}isKnown(e){return this.findWord(e.toLowerCase())!==-1}get lemmaCountValue(){return this.lemmaCount}get wordFormCount(){return this.wordCount}get bigramCountValue(){return this.bigramCount}get bufferSize(){return this.buffer.byteLength}getAllLemmas(){let e=[];for(let t=0;t<this.lemmaCount;t++)e.push(this.getLemma(t));return e}};const f=[{word:`á`,prefer:`so`,over:`fs`,context:`after_pronoun`,description:`á after pronoun = verb 'eiga' (I own, you own)`},{word:`á`,prefer:`fs`,over:`so`,context:`before_noun`,description:`á before noun = preposition (on, at)`},{word:`við`,prefer:`fn`,over:`fs`,context:`sentence_start`,description:`við at sentence start = pronoun 'we'`},{word:`við`,prefer:`fs`,over:`fn`,context:`before_noun`,description:`við before noun = preposition 'by/at'`},{word:`af`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`af before noun = preposition 'of/from'`},{word:`til`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`til before noun = preposition 'to'`},{word:`um`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`um before noun = preposition 'about/around'`},{word:`yfir`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`yfir before noun = preposition 'over'`},{word:`undir`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`undir before noun = preposition 'under'`},{word:`fyrir`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`fyrir before noun = preposition 'for/before'`},{word:`eftir`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`eftir before noun = preposition 'after'`},{word:`frá`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`frá before noun = preposition 'from'`},{word:`með`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`með before noun = preposition 'with'`},{word:`í`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`í before noun = preposition 'in'`},{word:`úr`,prefer:`fs`,over:`no`,context:`before_noun`,description:`úr before noun = preposition 'out of'`}];function p(e){let t=e.toLowerCase();return f.filter(e=>e.word===t)}function m(e){return f.some(t=>t.word===e.toLowerCase())}const h=new Map([[`á`,new Set([`þf`,`þgf`])],[`í`,new Set([`þf`,`þgf`])],[`við`,new Set([`þf`,`þgf`])],[`með`,new Set([`þf`,`þgf`])],[`undir`,new Set([`þf`,`þgf`])],[`yfir`,new Set([`þf`,`þgf`])],[`fyrir`,new Set([`þf`,`þgf`])],[`um`,new Set([`þf`])],[`gegnum`,new Set([`þf`])],[`kringum`,new Set([`þf`])],[`umhverfis`,new Set([`þf`])],[`af`,new Set([`þgf`])],[`frá`,new Set([`þgf`])],[`hjá`,new Set([`þgf`])],[`úr`,new Set([`þgf`])],[`að`,new Set([`þgf`])],[`móti`,new Set([`þgf`])],[`nálægt`,new Set([`þgf`])],[`gegn`,new Set([`þgf`])],[`gagnvart`,new Set([`þgf`])],[`handa`,new Set([`þgf`])],[`meðal`,new Set([`ef`])],[`til`,new Set([`ef`])],[`án`,new Set([`ef`])],[`vegna`,new Set([`ef`])],[`sakir`,new Set([`ef`])],[`utan`,new Set([`ef`])],[`innan`,new Set([`ef`])],[`meðfram`,new Set([`þgf`])],[`milli`,new Set([`ef`])],[`auk`,new Set([`ef`])],[`í stað`,new Set([`ef`])]]),g=new Set([`ég`,`þú`,`hann`,`hún`,`það`,`við`,`þið`,`þeir`,`þær`,`þau`]);function _(e,t){return t?h.get(e)?.has(t)??!1:!1}function v(e,t){let n=e.filter(e=>e.pos===`fs`);if(n.length===0)return null;for(let e of n)for(let n of t)if(n.morph?.case&&_(e.lemma,n.morph.case))return{lemma:e.lemma,pos:`fs`,rule:`prep+${n.morph.case}`,confidence:.9};return null}function y(e,t){if(!t)return null;let n=t.toLowerCase();if(!g.has(n))return null;let r=e.filter(e=>e.pos===`so`);return r.length===0||!e.some(e=>e.pos!==`so`)?null:{lemma:(r.find(e=>e.lemma===`eiga`)??r[0]).lemma,pos:`so`,rule:`pronoun+verb`,confidence:.85}}function b(e,t,n){return v(e,n)||y(e,t)||null}function x(e){return h.has(e)}function S(e){return h.get(e)}const C={name:`unambiguous`,run(e){return e.length===1?{lemma:e[0].lemma,pos:e[0].pos,confidence:1}:null}},w={name:`preference_rules`,run(e,t,n){if(!n.usePreferenceRules)return null;for(let n of f){let r=T(n,e,t);if(r)return{lemma:r.lemma,pos:r.pos,confidence:.85}}return null}};function T(e,t,n){let r=t.find(t=>t.lemma.toLowerCase()===e.word.toLowerCase()&&t.pos===e.prefer),i=t.find(t=>t.lemma.toLowerCase()===e.word.toLowerCase()&&t.pos===e.over);if(!r||!i)return null;if(e.context===`before_noun`){let e=n.nextWord;if(e&&/^[A-ZÁÉÍÓÚÝÞÆÖ]/.test(e))return r}else if(e.context===`before_verb`){let e=n.nextWord?.toLowerCase();if(e&&![`þessi`,`þetta`,`sá`,`sú`,`það`,`hinn`,`hin`,`hið`].includes(e))return r}else if(e.context===`after_pronoun`){let e=n.prevWord?.toLowerCase();if(e&&[`ég`,`þú`,`hann`,`hún`,`það`,`við`,`þið`,`þeir`,`þær`,`þau`].includes(e))return r}return null}const E=[C,w,{name:`grammar_rules`,run(e,t,n){if(!n.useGrammarRules)return null;let r=e.map(e=>({...e,morph:void 0}));if(n.lemmatizer.lemmatizeWithMorph){let e=t.allTokens[t.index];if(e){let t=n.lemmatizer.lemmatizeWithMorph(e);r.length=0,r.push(...t)}}let i=b(r,t.prevWord,t.nextWordMorph??[]);return i?{lemma:i.lemma,pos:i.pos,confidence:i.confidence}:null}},{name:`word_bigrams`,run(e,t,n){if(!n.bigrams||e.length===0)return null;let r=[];for(let i of e){let e=0;if(t.prevWord){let r=t.prevLemmas||n.lemmatizer.lemmatize(t.prevWord);for(let t of r){let r=n.bigrams.freq(t,i.lemma);r>0&&(e+=Math.log(r+1)*n.leftWeight)}}if(t.nextWord){let r=t.nextLemmas||n.lemmatizer.lemmatize(t.nextWord);for(let t of r){let r=n.bigrams.freq(i.lemma,t);r>0&&(e+=Math.log(r+1)*n.rightWeight)}}r.push({candidate:i,score:e})}if(r.sort((e,t)=>t.score-e.score),r.length>0&&r[0].score>0){let e=r[0].score,t=r.reduce((e,t)=>e+Math.exp(t.score),0),n=t>0?Math.exp(e)/t:.5;return{lemma:r[0].candidate.lemma,pos:r[0].candidate.pos,confidence:n}}return null}},{name:`fallback`,run(e){return e.length>0?{lemma:e[0].lemma,pos:e[0].pos,confidence:1/e.length}:null}}];var D=class{lemmatizer;bigrams;leftWeight;rightWeight;usePhraseRules;usePreferenceRules;useGrammarRules;constructor(e,t=null,n={}){this.lemmatizer=e,this.bigrams=t,this.leftWeight=n.leftWeight??1,this.rightWeight=n.rightWeight??1,this.usePhraseRules=n.usePhraseRules??!0,this.usePreferenceRules=n.usePreferenceRules??!0,this.useGrammarRules=n.useGrammarRules??!0}disambiguate(e,t,n){let r;r=this.lemmatizer.lemmatizeWithPOS?this.lemmatizer.lemmatizeWithPOS(e):this.lemmatizer.lemmatize(e).map(e=>({lemma:e,pos:`no`}));let i=r.map(e=>e.lemma),a=e,o;n&&this.lemmatizer.lemmatizeWithMorph&&(o=this.lemmatizer.lemmatizeWithMorph(n));let s={prevWord:t,nextWord:n,nextWordMorph:o,allTokens:[e],index:0};for(let e of E){let t=e.run(r,s,this);if(t)return{token:a,lemma:t.lemma,pos:t.pos,candidates:i,candidatesWithPOS:r,ambiguous:i.length>1,confidence:t.confidence,resolvedBy:e.name}}return{token:a,lemma:e.toLowerCase(),candidates:i,candidatesWithPOS:r,ambiguous:!1,confidence:0,resolvedBy:`none`}}disambiguateAll(e){let t=[];for(let n=0;n<e.length;n++){let r=e[n],i=n>0?e[n-1]:null,a=n<e.length-1?e[n+1]:null;t.push(this.disambiguate(r,i,a))}return t}extractLemmas(e){let t=new Set,n=this.disambiguateAll(e);for(let e of n)t.add(e.lemma);return t}};function O(e,n,r,i={}){let{tokenize:a,removeStopwords:o}=i,s=a?a(e):e.split(/\s+/).filter(e=>e.length>0).map(e=>e.replace(/^[^\p{L}\p{N}]+|[^\p{L}\p{N}]+$/gu,``)).filter(e=>e.length>0),c=new D(n,r).extractLemmas(s);if(o)for(let e of c)t.has(e)&&c.delete(e);return c}const k={no:`noun`,so:`verb`,lo:`adjective`,ao:`adverb`,fs:`preposition`,fn:`pronoun`,st:`conjunction`,to:`numeral`,gr:`article`,uh:`interjection`},A={no:`nafnorð`,so:`sagnorð`,lo:`lýsingarorð`,ao:`atviksorð`,fs:`forsetning`,fn:`fornafn`,st:`samtenging`,to:`töluorð`,gr:`greinir`,uh:`upphrópun`},j={nf:`nominative`,þf:`accusative`,þgf:`dative`,ef:`genitive`},M={kk:`masculine`,kvk:`feminine`,hk:`neuter`},N={et:`singular`,ft:`plural`},P=new Set(`ísland.england.írland.skotland.finnland.grænland.holland.þýskaland.frakkland.pólland.tékkland.svissland.rússland.eistland.lettland.litháen.danmörk.noregur.svíþjóð.bandaríkin.spánn.portúgal.ítalía.grikkland.þingvellir.akureyri.ísafjörður.reykjavík.keflavík.hafnarfjörður.kópavogur.seltjarnarnes.garðabær.mosfellsbær.vestmannaeyjar.húsavík.sauðárkrókur.siglufjörður.ólafsfjörður.dalvík.egilsstaðir.neskaupstaður.seyðisfjörður.eskifjörður.reyðarfjörður.fáskrúðsfjörður.stöðvarfjörður.djúpivogur.höfn.vík.selfoss.hveragerði.þorlákshöfn.grindavík.sandgerði.borgarnes.stykkishólmur.grundarfjörður.ólafsvík.búðardalur.patreksfjörður.flateyri.suðureyri.bolungarvík.hólmavík.hvammstangi.blönduós.skagaströnd.varmahlíð.hlíðarendi.bergþórshvol.íslandsbanki.landsbankinn.arionbanki.alþingi`.split(`.`)),F=new Set(`maður.kona.stjóri.ráðherra.forseti.formaður.fulltrúi.starfsmaður.hús.staður.vegur.borg.bær.dalur.fjörður.félag.banki.sjóður.stofnun.ráð.rannsókn.greiðsla.mál.kerfi.verk.þjónusta.rekstur.viðskipti.verð.kostnaður`.split(`.`)),I=new Set([`vera`,`hafa`,`gera`,`fara`,`koma`,`segja`,`vilja`,`mega`,`þurfa`,`verða`,`geta`,`sjá`,`taka`,`eiga`,`láta`,`halda`,`leyfa`,`búa`]),L=[`s`,`u`,`a`];var R=class{lemmatizer;minPartLength;tryLinkingLetters;knownLemmas;mode;constructor(e,t,n={}){this.lemmatizer=e,this.knownLemmas=t,this.minPartLength=n.minPartLength??3,this.tryLinkingLetters=n.tryLinkingLetters??!0,this.mode=n.mode??`balanced`}noSplit(e,t){return{word:e,parts:t,indexTerms:t,confidence:0,isCompound:!1}}split(e){let t=e.toLowerCase(),n=this.lemmatizer.lemmatize(e),r=n[0]?.toLowerCase();if(r&&P.has(r)||P.has(t))return this.noSplit(e,n);let i=n.length>0&&n[0].toLowerCase()!==t,a=n.length===1;if(this.mode===`conservative`)return e.includes(`-`)?this.splitAtHyphen(e,n):this.noSplit(e,n);if(this.mode===`balanced`&&i&&a&&t.length<12||t.length<this.minPartLength*2)return this.noSplit(e,n);let o=[];for(let e=this.minPartLength;e<=t.length-this.minPartLength;e++){let n=t.slice(0,e),r=t.slice(e),i=this.trySplit(n,r);if(i&&o.push(i),this.tryLinkingLetters){for(let e of L)if(n.endsWith(e)&&n.length>this.minPartLength){let e=n.slice(0,-1),t=this.trySplit(e,r);t&&o.push({...t,score:t.score*.95})}}}if(o.length===0)return this.noSplit(e,n);o.sort((e,t)=>t.score-e.score);let s=o[0];if(this.mode===`balanced`&&i&&s.score<.6)return this.noSplit(e,n);let c=[...new Set([...s.leftParts,...s.rightParts])];return{word:e,parts:c,indexTerms:[...new Set([...c,t])],confidence:Math.min(s.score,1),isCompound:!0}}splitAtHyphen(e,t){let n=e.split(`-`).filter(e=>e.length>0);if(n.length<2)return this.noSplit(e,t);let r=[];for(let e of n){let t=this.lemmatizer.lemmatize(e);r.push(...t)}let i=[...new Set(r)];return{word:e,parts:i,indexTerms:[...new Set([...i,e.toLowerCase()])],confidence:.9,isCompound:!0}}trySplit(e,t){let n=this.lemmatizer.lemmatize(e),r=this.lemmatizer.lemmatize(t),i=[...new Set(n.filter(e=>this.knownLemmas.has(e)))],a=[...new Set(r.filter(e=>this.knownLemmas.has(e)))];if(i.length===0||a.length===0)return null;let o=0,s=1-Math.abs(e.length-t.length)/(e.length+t.length);o+=s*.2;let c=(e.length+t.length)/2,l=Math.min(c/6,1);o+=l*.2,a.some(e=>F.has(e))&&(o+=.3);let u=i.some(e=>I.has(e)),d=a.some(e=>I.has(e));return u&&d?o-=.3:!u&&!d&&(o+=.2),(e.length<4||t.length<4)&&(o-=.15),{leftParts:i,rightParts:a,score:Math.max(0,o)}}getAllLemmas(e){return this.split(e).indexTerms}};function z(e){return new Set(e.map(e=>e.toLowerCase()))}const B=new Map([[`til dæmis`,{lemma:`til dæmi`,isStopword:!0,pos:`ao`}],[`með öðrum orðum`,{lemma:`með annar orð`,isStopword:!0,pos:`ao`}],[`í raun`,{lemma:`í raun`,isStopword:!0,pos:`ao`}],[`í raun og veru`,{lemma:`í raun og vera`,isStopword:!0,pos:`ao`}],[`af og til`,{lemma:`af og til`,isStopword:!0,pos:`ao`}],[`aftur á móti`,{lemma:`aftur á mót`,isStopword:!0,pos:`ao`}],[`alla vega`,{lemma:`allur vegur`,isStopword:!0,pos:`ao`}],[`alls ekki`,{lemma:`alls ekki`,isStopword:!0,pos:`ao`}],[`alls staðar`,{lemma:`allur staður`,isStopword:!0,pos:`ao`}],[`allt í allt`,{lemma:`allur í allur`,isStopword:!0,pos:`ao`}],[`annars vegar`,{lemma:`annar vegur`,isStopword:!0,pos:`ao`}],[`auk þess`,{lemma:`auk það`,isStopword:!0,pos:`ao`}],[`að auki`,{lemma:`að auki`,isStopword:!0,pos:`ao`}],[`að vísu`,{lemma:`að vís`,isStopword:!0,pos:`ao`}],[`að sjálfsögðu`,{lemma:`að sjálfsagður`,isStopword:!0,pos:`ao`}],[`að minnsta kosti`,{lemma:`að lítill kostur`,isStopword:!0,pos:`ao`}],[`að öllu leyti`,{lemma:`að allur leyti`,isStopword:!0,pos:`ao`}],[`að nokkru leyti`,{lemma:`að nokkur leyti`,isStopword:!0,pos:`ao`}],[`ef til vill`,{lemma:`ef til vilja`,isStopword:!0,pos:`ao`}],[`einhvers staðar`,{lemma:`einhver staður`,isStopword:!0,pos:`ao`}],[`einhvern veginn`,{lemma:`einhver vegur`,isStopword:!0,pos:`ao`}],[`ekki síst`,{lemma:`ekki síður`,isStopword:!0,pos:`ao`}],[`engu að síður`,{lemma:`enginn að síður`,isStopword:!0,pos:`ao`}],[`fyrst og fremst`,{lemma:`snemma og fremri`,isStopword:!0,pos:`ao`}],[`hins vegar`,{lemma:`hinn vegur`,isStopword:!0,pos:`ao`}],[`hér og þar`,{lemma:`hér og þar`,isStopword:!0,pos:`ao`}],[`hér um bil`,{lemma:`hér um bil`,isStopword:!0,pos:`ao`}],[`hér á landi`,{lemma:`hér á land`,isStopword:!0,pos:`ao`}],[`hvað mest`,{lemma:`hvað mjög`,isStopword:!0,pos:`ao`}],[`hverju sinni`,{lemma:`hver sinn`,isStopword:!0,pos:`ao`}],[`hvorki né`,{lemma:`hvorki né`,isStopword:!0,pos:`ao`}],[`í burtu`,{lemma:`í burtu`,isStopword:!0,pos:`ao`}],[`í gær`,{lemma:`í gær`,isStopword:!0,pos:`ao`}],[`í senn`,{lemma:`í senn`,isStopword:!0,pos:`ao`}],[`í sífellu`,{lemma:`í sífella`,isStopword:!0,pos:`ao`}],[`lengi vel`,{lemma:`lengi vel`,isStopword:!0,pos:`ao`}],[`meira að segja`,{lemma:`mikill að segja`,isStopword:!0,pos:`ao`}],[`meira og minna`,{lemma:`mikill og lítill`,isStopword:!0,pos:`ao`}],[`meðal annars`,{lemma:`meðal annar`,isStopword:!0,pos:`ao`}],[`nokkurn veginn`,{lemma:`nokkur vegur`,isStopword:!0,pos:`ao`}],[`og svo framvegis`,{lemma:`og svo framvegis`,isStopword:!0,pos:`ao`}],[`satt að segja`,{lemma:`sannur að segja`,isStopword:!0,pos:`ao`}],[`sem betur fer`,{lemma:`sem vel fara`,isStopword:!0,pos:`ao`}],[`smám saman`,{lemma:`smátt saman`,isStopword:!0,pos:`ao`}],[`svo sem`,{lemma:`svo sem`,isStopword:!0,pos:`ao`}],[`sér í lagi`,{lemma:`sér í lag`,isStopword:!0,pos:`ao`}],[`til og frá`,{lemma:`til og frá`,isStopword:!0,pos:`ao`}],[`til baka`,{lemma:`til baka`,isStopword:!0,pos:`ao`}],[`vítt og breitt`,{lemma:`vítt og breitt`,isStopword:!0,pos:`ao`}],[`á ný`,{lemma:`á ný`,isStopword:!0,pos:`ao`}],[`á meðan`,{lemma:`á meðan`,isStopword:!0,pos:`ao`}],[`á sama tíma`,{lemma:`á samur tími`,isStopword:!0,pos:`ao`}],[`á hinn bóginn`,{lemma:`á hinn bógur`,isStopword:!0,pos:`ao`}],[`þar af leiðandi`,{lemma:`þar af leiða`,isStopword:!0,pos:`ao`}],[`þar að auki`,{lemma:`þar að auki`,isStopword:!0,pos:`ao`}],[`það er að segja`,{lemma:`það vera að segja`,isStopword:!0,pos:`ao`}],[`þess vegna`,{lemma:`það vegna`,isStopword:!0,pos:`ao`}],[`því miður`,{lemma:`það lítt`,isStopword:!0,pos:`ao`}],[`þrátt fyrir`,{lemma:`þrátt fyrir`,isStopword:!0,pos:`ao`}],[`á dögunum`,{lemma:`á dagur`,isStopword:!0,pos:`ao`}],[`á sínum tíma`,{lemma:`á sinn tími`,isStopword:!0,pos:`ao`}],[`á endanum`,{lemma:`á endi`,isStopword:!0,pos:`ao`}],[`einu sinni`,{lemma:`einn sinn`,isStopword:!1,pos:`ao`}],[`eitt sinn`,{lemma:`einn sinn`,isStopword:!1,pos:`ao`}],[`í fyrsta sinn`,{lemma:`í fyrstur sinn`,isStopword:!1,pos:`ao`}],[`í kvöld`,{lemma:`í kvöld`,isStopword:!1,pos:`ao`}],[`í morgun`,{lemma:`í morgunn`,isStopword:!1,pos:`ao`}],[`á morgun`,{lemma:`á morgunn`,isStopword:!1,pos:`ao`}],[`fyrir hönd`,{lemma:`fyrir hönd`,isStopword:!1,pos:`fs`}],[`með tilliti til`,{lemma:`með tillit til`,isStopword:!1,pos:`fs`}],[`í ljósi`,{lemma:`í ljós`,isStopword:!1,pos:`fs`}],[`í stað`,{lemma:`í staður`,isStopword:!1,pos:`fs`}],[`fyrir aftan`,{lemma:`fyrir aftan`,isStopword:!1,pos:`fs`}],[`fyrir austan`,{lemma:`fyrir austan`,isStopword:!1,pos:`fs`}],[`fyrir framan`,{lemma:`fyrir framan`,isStopword:!1,pos:`fs`}],[`fyrir handan`,{lemma:`fyrir handan`,isStopword:!1,pos:`fs`}],[`fyrir innan`,{lemma:`fyrir innan`,isStopword:!1,pos:`fs`}],[`fyrir neðan`,{lemma:`fyrir neðan`,isStopword:!1,pos:`fs`}],[`fyrir norðan`,{lemma:`fyrir norðan`,isStopword:!1,pos:`fs`}],[`fyrir ofan`,{lemma:`fyrir ofan`,isStopword:!1,pos:`fs`}],[`fyrir sunnan`,{lemma:`fyrir sunnan`,isStopword:!1,pos:`fs`}],[`fyrir utan`,{lemma:`fyrir utan`,isStopword:!1,pos:`fs`}],[`fyrir vestan`,{lemma:`fyrir vestan`,isStopword:!1,pos:`fs`}],[`í gegnum`,{lemma:`í gegnum`,isStopword:!1,pos:`fs`}],[`í kringum`,{lemma:`í kringum`,isStopword:!1,pos:`fs`}],[`innan við`,{lemma:`innan við`,isStopword:!1,pos:`fs`}],[`upp úr`,{lemma:`upp úr`,isStopword:!1,pos:`fs`}],[`þvert á`,{lemma:`þvert á`,isStopword:!1,pos:`fs`}],[`þar eð`,{lemma:`þar eð`,isStopword:!0,pos:`st`}],[`sameinuðu þjóðirnar`,{lemma:`Sameinuðu þjóðirnar`,isStopword:!1,pos:`entity`}],[`evrópusambandið`,{lemma:`Evrópusambandið`,isStopword:!1,pos:`entity`}],[`nato`,{lemma:`NATO`,isStopword:!1,pos:`entity`}],[`nató`,{lemma:`NATO`,isStopword:!1,pos:`entity`}]]);function V(e,t){for(let n=Math.min(4,e.length-t);n>=2;n--){let r=e.slice(t,t+n).join(` `).toLowerCase(),i=B.get(r);if(i)return{phrase:i,wordCount:n}}return null}function H(e){return B.has(e.toLowerCase())}function U(e){return B.get(e.toLowerCase())}const W=new Set([`word`]),G=new Set([`person`,`company`,`entity`]),K=new Set([`punctuation`,`s_begin`,`s_end`,`s_split`,`unknown`]);function q(t,n,r={}){let{bigrams:i,compoundSplitter:a,includeNumbers:o=!1,alwaysTryCompounds:s=!0}=r,c=e(t),l=[],u=[];for(let e=0;e<c.length;e++){let t=c[e];if(!K.has(t.kind)){if(G.has(t.kind)){l.push({original:t.text??``,kind:t.kind,lemmas:[],isEntity:!0});continue}if(t.kind===`number`||t.kind===`ordinal`){o&&l.push({original:t.text??``,kind:t.kind,lemmas:[],isEntity:!1});continue}if(W.has(t.kind)){let e=t.text??``,r=n.lemmatize(e),i={original:e,kind:t.kind,lemmas:r,isEntity:!1},o=r.length===1&&r[0]===e.toLowerCase();if(a&&(s||o)){let t=a.split(e);if(t.isCompound){i.compoundSplit=t;let e=t.parts.flatMap(e=>n.lemmatize(e));i.lemmas=[...new Set([...r,...e])]}}l.push(i),u.push({index:l.length-1,token:t});continue}l.push({original:t.text??``,kind:t.kind,lemmas:[],isEntity:!1})}}if(i&&u.length>0){let e=new D(n,i);for(let t=0;t<u.length;t++){let{index:n,token:r}=u[t],i=t>0?u[t-1].token:null,a=t<u.length-1?u[t+1].token:null,o=e.disambiguate(r.text??``,i?.text??null,a?.text??null);l[n].disambiguated=o.lemma,l[n].confidence=o.confidence}}else for(let{index:e}of u){let t=l[e];t.lemmas.length>0&&(t.disambiguated=t.lemmas[0],t.confidence=t.lemmas.length===1?1:.5)}return l}function J(e,n,r={}){let{removeStopwords:a=!1,indexAllCandidates:o=!0,useContextualStopwords:s=!1}=r,c=q(e,n,r),l=new Set,u=(e,n)=>a?s?i(e,n):t.has(e):!1;for(let e of c)if(!e.isEntity){if(o)for(let t of e.lemmas)u(t)||l.add(t);else e.disambiguated&&(u(e.disambiguated)||l.add(e.disambiguated));if(e.compoundSplit?.isCompound)for(let t of e.compoundSplit.parts){let e=n.lemmatize(t);for(let t of e)u(t)||l.add(t)}}return l}function Y(e,t,n,r={}){let i=performance.now(),a,o;switch(n){case`naive`:{let n=e.split(/\s+/).filter(e=>e.length>0),r=[];for(let e of n){let n=e.replace(/^[^\p{L}\p{N}]+|[^\p{L}\p{N}]+$/gu,``);if(n){let e=t.lemmatize(n);r.push({original:n,kind:`word`,lemmas:e,isEntity:!1,disambiguated:e[0],confidence:e.length===1?1:.5})}}a=r,o=new Set(r.map(e=>e.disambiguated).filter(Boolean));break}case`tokenized`:a=q(e,t),o=new Set(a.filter(e=>e.kind===`word`&&e.lemmas.length>0).map(e=>e.lemmas[0]));break;case`disambiguated`:a=q(e,t,{bigrams:r.bigrams}),o=J(e,t,{bigrams:r.bigrams});break;case`full`:a=q(e,t,{bigrams:r.bigrams,compoundSplitter:r.compoundSplitter}),o=J(e,t,{bigrams:r.bigrams,compoundSplitter:r.compoundSplitter});break}let s=performance.now()-i,c=a.filter(e=>e.kind===`word`),l=c.length,u=c.filter(e=>e.lemmas.length>0&&!(e.lemmas.length===1&&e.lemmas[0]===e.original.toLowerCase())).length,d=c.filter(e=>e.lemmas.length>1).length,f=c.filter(e=>e.confidence!==void 0).map(e=>e.confidence),p=f.length>0?f.reduce((e,t)=>e+t,0)/f.length:0,m=c.filter(e=>e.compoundSplit?.isCompound).length,h=a.filter(e=>e.isEntity).length;return{wordCount:l,lemmatizedCount:u,coverage:l>0?u/l:0,ambiguousCount:d,ambiguityRate:l>0?d/l:0,avgConfidence:p,compoundsFound:m,entitiesSkipped:h,uniqueLemmas:o.size,timeMs:s}}export{d as BinaryLemmatizer,j as CASE_NAMES,r as CONTEXTUAL_STOPWORDS,R as CompoundSplitter,f as DISAMBIGUATION_RULES,D as Disambiguator,M as GENDER_NAMES,g as NOMINATIVE_PRONOUNS,N as NUMBER_NAMES,h as PREPOSITION_CASES,P as PROTECTED_LEMMAS,B as STATIC_PHRASES,t as STOPWORDS_IS,k as WORD_CLASS_NAMES,A as WORD_CLASS_NAMES_IS,b as applyGrammarRules,v as applyPrepositionRule,y as applyPronounVerbRule,_ as canGovernCase,z as createKnownLemmaSet,O as extractDisambiguatedLemmas,J as extractIndexableLemmas,S as getGovernedCases,U as getPhraseInfo,p as getRulesForWord,m as hasDisambiguationRules,i as isContextualStopword,H as isKnownPhrase,x as isKnownPreposition,n as isStopword,V as matchPhrase,q as processText,a as removeStopwords,Y as runBenchmark};
1
+ import{tokenize as e}from"tokenize-is";const t=new Set(`á.að.aðra.aðrar.aðrir.af.alla.allan.allar.allir.allnokkra.allnokkrar.allnokkrir.allnokkru.allnokkrum.allnokkuð.allnokkur.allnokkurn.allnokkurra.allnokkurrar.allnokkurri.allnokkurs.allnokkurt.allra.allrar.allri.alls.allt.alltað.allur.án.andspænis.annað.annaðhvort.annan.annar.annarra.annarrar.annarri.annars.árla.ásamt.auk.austan.austanundir.austur.báða.báðar.báðir.báðum.bæði.bak.beggja.eða.eður.ef.eftir.ég.ein.eina.einar.einhver.einhverja.einhverjar.einhverjir.einhverju.einhverjum.einhvern.einhverra.einhverrar.einhverri.einhvers.einir.einn.einna.einnar.einni.eins.einskis.einu.einum.eitt.eitthvað.eitthvert.ekkert.ella.ellegar.en.enda.enga.engan.engar.engin.enginn.engir.engra.engrar.engri.engu.engum.er.fáein.fáeina.fáeinar.fáeinir.fáeinna.fáeinum.fjær.fjarri.flestalla.flestallan.flestallar.flestallir.flestallra.flestallrar.flestallri.flestalls.flestallt.flestallur.flestöll.flestöllu.flestöllum.frá.fram.fyrir.fyrst.gagnstætt.gagnvart.gegn.gegnt.gegnum.hana.handa.handan.hann.hans.heldur.hennar.henni.hið.hin.hina.hinar.hinir.hinn.hinna.hinnar.hinni.hins.hinu.hinum.hitt.hjá.honum.hún.hvað.hvaða.hvenær.hver.hverja.hverjar.hverjir.hverju.hverjum.hvern.hverra.hverrar.hverri.hvers.hvert.hvílík.hvílíka.hvílíkan.hvílíkar.hvílíkir.hvílíkra.hvílíkrar.hvílíkri.hvílíks.hvílíkt.hvílíku.hvílíkum.hvílíkur.hvor.hvora.hvorar.hvorir.hvorki.hvorn.hvorra.hvorrar.hvorri.hvors.hvort.hvoru.hvorug.hvoruga.hvorugan.hvorugar.hvorugir.hvorugra.hvorugrar.hvorugri.hvorugs.hvorugt.hvorugu.hvorugum.hvorugur.hvorum.í.inn.innan.innanundir.jafnframt.jafnhliða.kring.kringum.með.meðal.meðan.meður.mér.mestalla.mestallan.mestallar.mestallir.mestallra.mestallrar.mestallri.mestalls.mestallt.mestallur.mestöll.mestöllu.mestöllum.miðli.mig.milli.millum.mín.mína.mínar.mínir.minn.minna.minnar.minni.míns.mínu.mínum.mitt.mót.móti.nær.nærri.næst.næstum.nálægt.né.neðan.nein.neina.neinar.neinir.neinn.neinna.neinnar.neinni.neins.neinu.neinum.neitt.nema.niður.nokkra.nokkrar.nokkrir.nokkru.nokkrum.nokkuð.nokkur.nokkurn.nokkurra.nokkurrar.nokkurri.nokkurs.nokkurt.norðan.nú.öðru.öðrum.of.ofan.ofar.og.óháð.okkar.okkur.öll.öllu.öllum.önnur.órafjarri.oss.sá.sakir.sama.saman.samar.samfara.samhliða.sami.samir.samkvæmt.samra.samrar.samri.sams.samskipa.samt.samtímis.samur.sem.sér.sérhvað.sérhver.sérhverja.sérhverjar.sérhverjir.sérhverju.sérhverjum.sérhvern.sérhverra.sérhverrar.sérhverri.sérhvers.sérhvert.síðan.síðla.sig.sín.sína.sínar.sínhver.sínhverja.sínhverjar.sínhverjir.sínhverju.sínhverjum.sínhvern.sínhverra.sínhverrar.sínhverri.sínhvers.sínhvert.sínhvor.sínhvora.sínhvorar.sínhvorir.sínhvorn.sínhvorra.sínhvorrar.sínhvorri.sínhvors.sínhvort.sínhvoru.sínhvorum.sínir.sinn.sinna.sinnar.sinnhver.sinnhverja.sinnhverjar.sinnhverjir.sinnhverju.sinnhverjum.sinnhvern.sinnhverra.sinnhverrar.sinnhverri.sinnhvers.sinnhvert.sinnhvor.sinnhvora.sinnhvorar.sinnhvorir.sinnhvorn.sinnhvorra.sinnhvorrar.sinnhvorri.sinnhvors.sinnhvort.sinnhvoru.sinnhvorum.sinni.síns.sínu.sínum.sitt.sitthvað.sitthver.sitthverja.sitthverjar.sitthverjir.sitthverju.sitthverjum.sitthvern.sitthverra.sitthverrar.sitthverri.sitthvers.sitthvert.sitthvor.sitthvora.sitthvorar.sitthvorir.sitthvorn.sitthvorra.sitthvorrar.sitthvorri.sitthvors.sitthvort.sitthvoru.sitthvorum.sjálf.sjálfa.sjálfan.sjálfar.sjálfir.sjálfra.sjálfrar.sjálfri.sjálfs.sjálft.sjálfu.sjálfum.sjálfur.slík.slíka.slíkan.slíkar.slíkir.slíkra.slíkrar.slíkri.slíks.slíkt.slíku.slíkum.slíkur.snemma.sökum.söm.sömu.sömum.sú.sum.suma.suman.sumar.sumir.sumra.sumrar.sumri.sums.sumt.sumu.sumum.sumur.sunnan.svo.til.tráss.um.umfram.umhverfis.undan.undir.uns.upp.úr.út.utan.útundan.vegna.vér.vestan.vestur.vettugi.við.viður.vor.vora.vorar.vorir.vorn.vorra.vorrar.vorri.vors.vort.voru.vorum.yðar.yður.yfir.ykkar.ykkur.ýmis.ýmiss.ýmissa.ýmissar.ýmissi.ýmist.ýmsa.ýmsan.ýmsar.ýmsir.ýmsu.ýmsum.þá.það.þær.þann.þar.þau.þegar.þeim.þeir.þeirra.þeirrar.þeirri.þennan.þér.þess.þessa.þessar.þessara.þessarar.þessari.þessi.þessir.þessu.þessum.þetta.þið.þig.þín.þína.þínar.þínir.þinn.þinna.þinnar.þinni.þíns.þínu.þínum.þitt.þó.þónokkra.þónokkrar.þónokkrir.þónokkru.þónokkrum.þónokkuð.þónokkur.þónokkurn.þónokkurra.þónokkurrar.þónokkurri.þónokkurs.þónokkurt.þótt.þú.því.þvílík.þvílíka.þvílíkan.þvílíkar.þvílíkir.þvílíkra.þvílíkrar.þvílíkri.þvílíks.þvílíkt.þvílíku.þvílíkum.þvílíkur`.split(`.`));function n(e){return t.has(e.toLowerCase())}const r=new Map([[`á`,new Set([`fs`,`ao`])],[`við`,new Set([`fs`,`fn`])],[`af`,new Set([`fs`,`ao`])],[`til`,new Set([`fs`])],[`um`,new Set([`fs`])],[`frá`,new Set([`fs`])],[`yfir`,new Set([`fs`,`ao`])],[`undir`,new Set([`fs`,`ao`])],[`fyrir`,new Set([`fs`,`ao`])],[`eftir`,new Set([`fs`,`ao`])],[`gegn`,new Set([`fs`])],[`hjá`,new Set([`fs`])],[`úr`,new Set([`fs`])],[`í`,new Set([`fs`])]]);function i(e,n){let i=e.toLowerCase(),a=r.get(i);return a&&n?a.has(n):t.has(i)}function a(e){return e.filter(e=>!n(e))}const o=1279610177,s=[`no`,`so`,`lo`,`ao`,`fs`,`fn`,`st`,`to`,`gr`,`uh`],c=[void 0,`nf`,`þf`,`þgf`,`ef`],l=[void 0,`kk`,`kvk`,`hk`],u=[`et`,`ft`];var d=class e{buffer;stringPool;lemmaOffsets;lemmaLengths;wordOffsets;wordLengths;entryOffsets;entries;bigramW1Offsets;bigramW1Lengths;bigramW2Offsets;bigramW2Lengths;bigramFreqs;lemmaCount;wordCount;entryCount;bigramCount;version;decoder=new TextDecoder(`utf-8`);constructor(e){this.buffer=e;let t=new DataView(e),n=t.getUint32(0,!0);if(n!==o)throw Error(`Invalid binary format: expected magic 0x${o.toString(16)}, got 0x${n.toString(16)}`);if(this.version=t.getUint32(4,!0),this.version!==1&&this.version!==2)throw Error(`Unsupported version: ${this.version}`);let r=t.getUint32(8,!0);this.lemmaCount=t.getUint32(12,!0),this.wordCount=t.getUint32(16,!0),this.entryCount=t.getUint32(20,!0),this.bigramCount=t.getUint32(24,!0);let i=32;this.stringPool=new Uint8Array(e,i,r),i+=r,this.lemmaOffsets=new Uint32Array(e,i,this.lemmaCount),i+=this.lemmaCount*4,this.lemmaLengths=new Uint8Array(e,i,this.lemmaCount),i+=this.lemmaCount,i=i+3&-4,this.wordOffsets=new Uint32Array(e,i,this.wordCount),i+=this.wordCount*4,this.wordLengths=new Uint8Array(e,i,this.wordCount),i+=this.wordCount,i=i+3&-4,this.entryOffsets=new Uint32Array(e,i,this.wordCount+1),i+=(this.wordCount+1)*4,this.entries=new Uint32Array(e,i,this.entryCount),i+=this.entryCount*4,this.bigramW1Offsets=new Uint32Array(e,i,this.bigramCount),i+=this.bigramCount*4,this.bigramW1Lengths=new Uint8Array(e,i,this.bigramCount),i+=this.bigramCount,i=i+3&-4,this.bigramW2Offsets=new Uint32Array(e,i,this.bigramCount),i+=this.bigramCount*4,this.bigramW2Lengths=new Uint8Array(e,i,this.bigramCount),i+=this.bigramCount,i=i+3&-4,this.bigramFreqs=new Uint32Array(e,i,this.bigramCount)}static async load(t,n={}){let r=await(n.fetch??fetch)(t);if(!r.ok)throw Error(`Failed to load binary data: ${r.status}`);return new e(await r.arrayBuffer())}static loadFromBuffer(t){return new e(t)}getString(e,t){return this.decoder.decode(this.stringPool.subarray(e,e+t))}getLemma(e){return this.getString(this.lemmaOffsets[e],this.lemmaLengths[e])}getWord(e){return this.getString(this.wordOffsets[e],this.wordLengths[e])}findWord(e){let t=0,n=this.wordCount-1;for(;t<=n;){let r=t+n>>>1,i=this.getWord(r);if(i===e)return r;i<e?t=r+1:n=r-1}return-1}lemmatize(e,t={}){let n=e.toLowerCase(),r=this.findWord(n);if(r===-1)return[n];let i=this.entryOffsets[r],a=this.entryOffsets[r+1],{wordClass:o}=t,c=new Set,l=[];for(let e=i;e<a;e++){let{lemmaIdx:t,posCode:n}=this.unpackEntry(this.entries[e]),r=s[n];if(o&&r!==o)continue;let i=this.getLemma(t);c.has(i)||(c.add(i),l.push(i))}return l.length===0?[n]:l}unpackEntry(e){return this.version===1?{lemmaIdx:e>>>4,posCode:e&15,caseCode:0,genderCode:0,numberCode:0}:{lemmaIdx:e>>>10,posCode:e&15,caseCode:e>>>4&7,genderCode:e>>>7&3,numberCode:e>>>9&1}}lemmatizeWithPOS(e){let t=e.toLowerCase(),n=this.findWord(t);if(n===-1)return[];let r=this.entryOffsets[n],i=this.entryOffsets[n+1],a=new Set,o=[];for(let e=r;e<i;e++){let{lemmaIdx:t,posCode:n}=this.unpackEntry(this.entries[e]),r=this.getLemma(t),i=s[n]??``,c=`${r}:${i}`;a.has(c)||(a.add(c),o.push({lemma:r,pos:i}))}return o}lemmatizeWithMorph(e){let t=e.toLowerCase(),n=this.findWord(t);if(n===-1)return[];let r=this.entryOffsets[n],i=this.entryOffsets[n+1],a=[];for(let e=r;e<i;e++){let{lemmaIdx:t,posCode:n,caseCode:r,genderCode:i,numberCode:o}=this.unpackEntry(this.entries[e]),d={},f=c[r],p=l[i],m=u[o];f&&(d.case=f),p&&(d.gender=p),m&&(d.number=m),a.push({lemma:this.getLemma(t),pos:s[n]??``,morph:Object.keys(d).length>0?d:void 0})}return a}hasMorphFeatures(){return this.version>=2}getVersion(){return this.version}findBigram(e,t){let n=0,r=this.bigramCount-1;for(;n<=r;){let i=n+r>>>1,a=this.getString(this.bigramW1Offsets[i],this.bigramW1Lengths[i]);if(a<e)n=i+1;else if(a>e)r=i-1;else{let e=this.getString(this.bigramW2Offsets[i],this.bigramW2Lengths[i]);if(e===t)return i;e<t?n=i+1:r=i-1}}return-1}bigramFreq(e,t){let n=this.findBigram(e.toLowerCase(),t.toLowerCase());return n===-1?0:this.bigramFreqs[n]}freq(e,t){return this.bigramFreq(e,t)}isKnown(e){return this.findWord(e.toLowerCase())!==-1}get lemmaCountValue(){return this.lemmaCount}get wordFormCount(){return this.wordCount}get bigramCountValue(){return this.bigramCount}get bufferSize(){return this.buffer.byteLength}getAllLemmas(){let e=[];for(let t=0;t<this.lemmaCount;t++)e.push(this.getLemma(t));return e}};const f=[{word:`á`,prefer:`so`,over:`fs`,context:`after_pronoun`,description:`á after pronoun = verb 'eiga' (I own, you own)`},{word:`á`,prefer:`fs`,over:`so`,context:`before_noun`,description:`á before noun = preposition (on, at)`},{word:`við`,prefer:`fn`,over:`fs`,context:`sentence_start`,description:`við at sentence start = pronoun 'we'`},{word:`við`,prefer:`fs`,over:`fn`,context:`before_noun`,description:`við before noun = preposition 'by/at'`},{word:`af`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`af before noun = preposition 'of/from'`},{word:`til`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`til before noun = preposition 'to'`},{word:`um`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`um before noun = preposition 'about/around'`},{word:`yfir`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`yfir before noun = preposition 'over'`},{word:`undir`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`undir before noun = preposition 'under'`},{word:`fyrir`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`fyrir before noun = preposition 'for/before'`},{word:`eftir`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`eftir before noun = preposition 'after'`},{word:`frá`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`frá before noun = preposition 'from'`},{word:`með`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`með before noun = preposition 'with'`},{word:`í`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`í before noun = preposition 'in'`},{word:`úr`,prefer:`fs`,over:`no`,context:`before_noun`,description:`úr before noun = preposition 'out of'`}];function p(e){let t=e.toLowerCase();return f.filter(e=>e.word===t)}function m(e){return f.some(t=>t.word===e.toLowerCase())}const h=new Map([[`á`,new Set([`þf`,`þgf`])],[`í`,new Set([`þf`,`þgf`])],[`við`,new Set([`þf`,`þgf`])],[`með`,new Set([`þf`,`þgf`])],[`undir`,new Set([`þf`,`þgf`])],[`yfir`,new Set([`þf`,`þgf`])],[`fyrir`,new Set([`þf`,`þgf`])],[`um`,new Set([`þf`])],[`gegnum`,new Set([`þf`])],[`kringum`,new Set([`þf`])],[`umhverfis`,new Set([`þf`])],[`af`,new Set([`þgf`])],[`frá`,new Set([`þgf`])],[`hjá`,new Set([`þgf`])],[`úr`,new Set([`þgf`])],[`að`,new Set([`þgf`])],[`móti`,new Set([`þgf`])],[`nálægt`,new Set([`þgf`])],[`gegn`,new Set([`þgf`])],[`gagnvart`,new Set([`þgf`])],[`handa`,new Set([`þgf`])],[`meðal`,new Set([`ef`])],[`til`,new Set([`ef`])],[`án`,new Set([`ef`])],[`vegna`,new Set([`ef`])],[`sakir`,new Set([`ef`])],[`utan`,new Set([`ef`])],[`innan`,new Set([`ef`])],[`meðfram`,new Set([`þgf`])],[`milli`,new Set([`ef`])],[`auk`,new Set([`ef`])],[`í stað`,new Set([`ef`])]]),g=new Set([`ég`,`þú`,`hann`,`hún`,`það`,`við`,`þið`,`þeir`,`þær`,`þau`]);function _(e,t){return t?h.get(e)?.has(t)??!1:!1}function v(e,t){let n=e.filter(e=>e.pos===`fs`);if(n.length===0)return null;for(let e of n)for(let n of t)if(n.morph?.case&&_(e.lemma,n.morph.case))return{lemma:e.lemma,pos:`fs`,rule:`prep+${n.morph.case}`,confidence:.9};return null}function y(e,t){if(!t)return null;let n=t.toLowerCase();if(!g.has(n))return null;let r=e.filter(e=>e.pos===`so`);return r.length===0||!e.some(e=>e.pos!==`so`)?null:{lemma:(r.find(e=>e.lemma===`eiga`)??r[0]).lemma,pos:`so`,rule:`pronoun+verb`,confidence:.85}}function b(e,t,n){if(!t||!n?.lemmatizeWithPOS)return null;let r=n.lemmatizeWithPOS(t),i=r.find(e=>e.pos===`fs`);if(!i)return null;let a=r.some(e=>e.pos===`fn`),o=e.some(e=>e.pos===`so`);if(a&&o)return null;let s=h.get(i.lemma);if(!s)return null;let c=e.filter(e=>e.pos===`no`);for(let e of c)if(e.morph?.case&&s.has(e.morph.case))return{lemma:e.lemma,pos:`no`,rule:`noun_after_prep+${e.morph.case}`,confidence:.9};return null}function x(e,t,n,r=null){return v(e,n)||b(e,t,r)||y(e,t)||null}function S(e){return h.has(e)}function C(e){return h.get(e)}const w={name:`unambiguous`,run(e){return e.length===1?{lemma:e[0].lemma,pos:e[0].pos,confidence:1}:null}},T={name:`preference_rules`,run(e,t,n){if(!n.usePreferenceRules)return null;for(let n of f){let r=E(n,e,t);if(r)return{lemma:r.lemma,pos:r.pos,confidence:.85}}return null}};function E(e,t,n){let r=t.find(t=>t.lemma.toLowerCase()===e.word.toLowerCase()&&t.pos===e.prefer),i=t.find(t=>t.lemma.toLowerCase()===e.word.toLowerCase()&&t.pos===e.over);if(!r||!i)return null;if(e.context===`before_noun`){let e=n.nextWord;if(e&&/^[A-ZÁÉÍÓÚÝÞÆÖ]/.test(e))return r}else if(e.context===`before_verb`){let e=n.nextWord?.toLowerCase();if(e&&![`þessi`,`þetta`,`sá`,`sú`,`það`,`hinn`,`hin`,`hið`].includes(e))return r}else if(e.context===`after_pronoun`){let e=n.prevWord?.toLowerCase();if(e&&[`ég`,`þú`,`hann`,`hún`,`það`,`við`,`þið`,`þeir`,`þær`,`þau`].includes(e))return r}return null}const D=[w,T,{name:`grammar_rules`,run(e,t,n){if(!n.useGrammarRules)return null;let r=e.map(e=>({...e,morph:void 0}));if(n.lemmatizer.lemmatizeWithMorph){let e=t.allTokens[t.index];if(e){let t=n.lemmatizer.lemmatizeWithMorph(e);r.length=0,r.push(...t)}}let i=x(r,t.prevWord,t.nextWordMorph??[],n.lemmatizer);return i?{lemma:i.lemma,pos:i.pos,confidence:i.confidence}:null}},{name:`word_bigrams`,run(e,t,n){if(!n.bigrams||e.length===0)return null;let r=[];for(let i of e){let e=0;if(t.prevWord){let r=t.prevLemmas||n.lemmatizer.lemmatize(t.prevWord);for(let t of r){let r=n.bigrams.freq(t,i.lemma);r>0&&(e+=Math.log(r+1)*n.leftWeight)}}if(t.nextWord){let r=t.nextLemmas||n.lemmatizer.lemmatize(t.nextWord);for(let t of r){let r=n.bigrams.freq(i.lemma,t);r>0&&(e+=Math.log(r+1)*n.rightWeight)}}r.push({candidate:i,score:e})}if(r.sort((e,t)=>t.score-e.score),r.length>0&&r[0].score>0){let e=r[0].score,t=r.reduce((e,t)=>e+Math.exp(t.score),0),n=t>0?Math.exp(e)/t:.5;return{lemma:r[0].candidate.lemma,pos:r[0].candidate.pos,confidence:n}}return null}},{name:`fallback`,run(e){return e.length>0?{lemma:e[0].lemma,pos:e[0].pos,confidence:1/e.length}:null}}];var O=class{lemmatizer;bigrams;leftWeight;rightWeight;usePreferenceRules;useGrammarRules;constructor(e,t=null,n={}){this.lemmatizer=e,this.bigrams=t,this.leftWeight=n.leftWeight??1,this.rightWeight=n.rightWeight??1,this.usePreferenceRules=n.usePreferenceRules??!0,this.useGrammarRules=n.useGrammarRules??!0}disambiguate(e,t,n,r={}){let i;i=this.lemmatizer.lemmatizeWithPOS?this.lemmatizer.lemmatizeWithPOS(e):this.lemmatizer.lemmatize(e).map(e=>({lemma:e,pos:`no`}));let a=i.map(e=>e.lemma),o=e,s;n&&this.lemmatizer.lemmatizeWithMorph&&(s=this.lemmatizer.lemmatizeWithMorph(n));let c={prevWord:t,nextWord:n,prevLemmas:r.prevLemmas,nextLemmas:r.nextLemmas,nextWordMorph:s,allTokens:[e],index:0};for(let e of D){let t=e.run(i,c,this);if(t)return{token:o,lemma:t.lemma,pos:t.pos,candidates:a,candidatesWithPOS:i,ambiguous:a.length>1,confidence:t.confidence,resolvedBy:e.name}}return{token:o,lemma:e.toLowerCase(),candidates:a,candidatesWithPOS:i,ambiguous:!1,confidence:0,resolvedBy:`none`}}disambiguateAll(e){let t=[];for(let n=0;n<e.length;n++){let r=e[n],i=n>0?e[n-1]:null,a=n<e.length-1?e[n+1]:null;t.push(this.disambiguate(r,i,a))}return t}extractLemmas(e){let t=new Set,n=this.disambiguateAll(e);for(let e of n)t.add(e.lemma);return t}};function k(e,n,r,i={}){let{tokenize:a,removeStopwords:o}=i,s=a?a(e):e.split(/\s+/).filter(e=>e.length>0).map(e=>e.replace(/^[^\p{L}\p{N}]+|[^\p{L}\p{N}]+$/gu,``)).filter(e=>e.length>0),c=new O(n,r).extractLemmas(s);if(o)for(let e of c)t.has(e)&&c.delete(e);return c}const A={no:`noun`,so:`verb`,lo:`adjective`,ao:`adverb`,fs:`preposition`,fn:`pronoun`,st:`conjunction`,to:`numeral`,gr:`article`,uh:`interjection`},j={no:`nafnorð`,so:`sagnorð`,lo:`lýsingarorð`,ao:`atviksorð`,fs:`forsetning`,fn:`fornafn`,st:`samtenging`,to:`töluorð`,gr:`greinir`,uh:`upphrópun`},M={nf:`nominative`,þf:`accusative`,þgf:`dative`,ef:`genitive`},N={kk:`masculine`,kvk:`feminine`,hk:`neuter`},P={et:`singular`,ft:`plural`},F=new Set(`ísland.england.írland.skotland.finnland.grænland.holland.þýskaland.frakkland.pólland.tékkland.svissland.rússland.eistland.lettland.litháen.danmörk.noregur.svíþjóð.bandaríkin.spánn.portúgal.ítalía.grikkland.þingvellir.akureyri.ísafjörður.reykjavík.keflavík.hafnarfjörður.kópavogur.seltjarnarnes.garðabær.mosfellsbær.vestmannaeyjar.húsavík.sauðárkrókur.siglufjörður.ólafsfjörður.dalvík.egilsstaðir.neskaupstaður.seyðisfjörður.eskifjörður.reyðarfjörður.fáskrúðsfjörður.stöðvarfjörður.djúpivogur.höfn.vík.selfoss.hveragerði.þorlákshöfn.grindavík.sandgerði.borgarnes.stykkishólmur.grundarfjörður.ólafsvík.búðardalur.patreksfjörður.flateyri.suðureyri.bolungarvík.hólmavík.hvammstangi.blönduós.skagaströnd.varmahlíð.hlíðarendi.bergþórshvol.íslandsbanki.landsbankinn.arionbanki.alþingi`.split(`.`)),I=new Set(`maður.kona.stjóri.ráðherra.forseti.formaður.fulltrúi.starfsmaður.hús.staður.vegur.borg.bær.dalur.fjörður.félag.banki.sjóður.stofnun.ráð.rannsókn.greiðsla.mál.kerfi.verk.þjónusta.rekstur.viðskipti.verð.kostnaður`.split(`.`)),L=new Set([`vera`,`hafa`,`gera`,`fara`,`koma`,`segja`,`vilja`,`mega`,`þurfa`,`verða`,`geta`,`sjá`,`taka`,`eiga`,`láta`,`halda`,`leyfa`,`búa`]),R=[`s`,`u`,`a`];var z=class{lemmatizer;minPartLength;tryLinkingLetters;knownLemmas;mode;constructor(e,t,n={}){this.lemmatizer=e,this.knownLemmas=t,this.minPartLength=n.minPartLength??3,this.tryLinkingLetters=n.tryLinkingLetters??!0,this.mode=n.mode??`balanced`}noSplit(e,t){return{word:e,parts:t,indexTerms:t,confidence:0,isCompound:!1}}split(e){let t=e.toLowerCase(),n=this.lemmatizer.lemmatize(e),r=n[0]?.toLowerCase();if(r&&F.has(r)||F.has(t))return this.noSplit(e,n);let i=n.length>0&&n[0].toLowerCase()!==t,a=n.length===1;if(this.mode===`conservative`)return e.includes(`-`)?this.splitAtHyphen(e,n):this.noSplit(e,n);if(this.mode===`balanced`&&i&&a&&t.length<12||t.length<this.minPartLength*2)return this.noSplit(e,n);let o=[];for(let e=this.minPartLength;e<=t.length-this.minPartLength;e++){let n=t.slice(0,e),r=t.slice(e),i=this.trySplit(n,r);if(i&&o.push(i),this.tryLinkingLetters){for(let e of R)if(n.endsWith(e)&&n.length>this.minPartLength){let e=n.slice(0,-1),t=this.trySplit(e,r);t&&o.push({...t,score:t.score*.95})}}}if(o.length===0)return this.noSplit(e,n);o.sort((e,t)=>t.score-e.score);let s=o[0];if(this.mode===`balanced`&&i&&s.score<.6)return this.noSplit(e,n);let c=[...new Set([...s.leftParts,...s.rightParts])];return{word:e,parts:c,indexTerms:[...new Set([...c,t])],confidence:Math.min(s.score,1),isCompound:!0}}splitAtHyphen(e,t){let n=e.split(`-`).filter(e=>e.length>0);if(n.length<2)return this.noSplit(e,t);let r=[];for(let e of n){let t=this.lemmatizer.lemmatize(e);r.push(...t)}let i=[...new Set(r)];return{word:e,parts:i,indexTerms:[...new Set([...i,e.toLowerCase()])],confidence:.9,isCompound:!0}}trySplit(e,t){let n=this.lemmatizer.lemmatize(e),r=this.lemmatizer.lemmatize(t),i=[...new Set(n.filter(e=>this.knownLemmas.has(e)))],a=[...new Set(r.filter(e=>this.knownLemmas.has(e)))];if(i.length===0||a.length===0)return null;let o=0,s=1-Math.abs(e.length-t.length)/(e.length+t.length);o+=s*.2;let c=(e.length+t.length)/2,l=Math.min(c/6,1);o+=l*.2,a.some(e=>I.has(e))&&(o+=.3);let u=i.some(e=>L.has(e)),d=a.some(e=>L.has(e));return u&&d?o-=.3:!u&&!d&&(o+=.2),(e.length<4||t.length<4)&&(o-=.15),{leftParts:i,rightParts:a,score:Math.max(0,o)}}getAllLemmas(e){return this.split(e).indexTerms}};function B(e){return new Set(e.map(e=>e.toLowerCase()))}const V=new Map([[`til dæmis`,{lemma:`til dæmi`,isStopword:!0,pos:`ao`}],[`með öðrum orðum`,{lemma:`með annar orð`,isStopword:!0,pos:`ao`}],[`í raun`,{lemma:`í raun`,isStopword:!0,pos:`ao`}],[`í raun og veru`,{lemma:`í raun og vera`,isStopword:!0,pos:`ao`}],[`af og til`,{lemma:`af og til`,isStopword:!0,pos:`ao`}],[`aftur á móti`,{lemma:`aftur á mót`,isStopword:!0,pos:`ao`}],[`alla vega`,{lemma:`allur vegur`,isStopword:!0,pos:`ao`}],[`alls ekki`,{lemma:`alls ekki`,isStopword:!0,pos:`ao`}],[`alls staðar`,{lemma:`allur staður`,isStopword:!0,pos:`ao`}],[`allt í allt`,{lemma:`allur í allur`,isStopword:!0,pos:`ao`}],[`annars vegar`,{lemma:`annar vegur`,isStopword:!0,pos:`ao`}],[`auk þess`,{lemma:`auk það`,isStopword:!0,pos:`ao`}],[`að auki`,{lemma:`að auki`,isStopword:!0,pos:`ao`}],[`að vísu`,{lemma:`að vís`,isStopword:!0,pos:`ao`}],[`að sjálfsögðu`,{lemma:`að sjálfsagður`,isStopword:!0,pos:`ao`}],[`að minnsta kosti`,{lemma:`að lítill kostur`,isStopword:!0,pos:`ao`}],[`að öllu leyti`,{lemma:`að allur leyti`,isStopword:!0,pos:`ao`}],[`að nokkru leyti`,{lemma:`að nokkur leyti`,isStopword:!0,pos:`ao`}],[`ef til vill`,{lemma:`ef til vilja`,isStopword:!0,pos:`ao`}],[`einhvers staðar`,{lemma:`einhver staður`,isStopword:!0,pos:`ao`}],[`einhvern veginn`,{lemma:`einhver vegur`,isStopword:!0,pos:`ao`}],[`ekki síst`,{lemma:`ekki síður`,isStopword:!0,pos:`ao`}],[`engu að síður`,{lemma:`enginn að síður`,isStopword:!0,pos:`ao`}],[`fyrst og fremst`,{lemma:`snemma og fremri`,isStopword:!0,pos:`ao`}],[`hins vegar`,{lemma:`hinn vegur`,isStopword:!0,pos:`ao`}],[`hér og þar`,{lemma:`hér og þar`,isStopword:!0,pos:`ao`}],[`hér um bil`,{lemma:`hér um bil`,isStopword:!0,pos:`ao`}],[`hér á landi`,{lemma:`hér á land`,isStopword:!0,pos:`ao`}],[`hvað mest`,{lemma:`hvað mjög`,isStopword:!0,pos:`ao`}],[`hverju sinni`,{lemma:`hver sinn`,isStopword:!0,pos:`ao`}],[`hvorki né`,{lemma:`hvorki né`,isStopword:!0,pos:`ao`}],[`í burtu`,{lemma:`í burtu`,isStopword:!0,pos:`ao`}],[`í gær`,{lemma:`í gær`,isStopword:!0,pos:`ao`}],[`í senn`,{lemma:`í senn`,isStopword:!0,pos:`ao`}],[`í sífellu`,{lemma:`í sífella`,isStopword:!0,pos:`ao`}],[`lengi vel`,{lemma:`lengi vel`,isStopword:!0,pos:`ao`}],[`meira að segja`,{lemma:`mikill að segja`,isStopword:!0,pos:`ao`}],[`meira og minna`,{lemma:`mikill og lítill`,isStopword:!0,pos:`ao`}],[`meðal annars`,{lemma:`meðal annar`,isStopword:!0,pos:`ao`}],[`nokkurn veginn`,{lemma:`nokkur vegur`,isStopword:!0,pos:`ao`}],[`og svo framvegis`,{lemma:`og svo framvegis`,isStopword:!0,pos:`ao`}],[`satt að segja`,{lemma:`sannur að segja`,isStopword:!0,pos:`ao`}],[`sem betur fer`,{lemma:`sem vel fara`,isStopword:!0,pos:`ao`}],[`smám saman`,{lemma:`smátt saman`,isStopword:!0,pos:`ao`}],[`svo sem`,{lemma:`svo sem`,isStopword:!0,pos:`ao`}],[`sér í lagi`,{lemma:`sér í lag`,isStopword:!0,pos:`ao`}],[`til og frá`,{lemma:`til og frá`,isStopword:!0,pos:`ao`}],[`til baka`,{lemma:`til baka`,isStopword:!0,pos:`ao`}],[`vítt og breitt`,{lemma:`vítt og breitt`,isStopword:!0,pos:`ao`}],[`á ný`,{lemma:`á ný`,isStopword:!0,pos:`ao`}],[`á meðan`,{lemma:`á meðan`,isStopword:!0,pos:`ao`}],[`á sama tíma`,{lemma:`á samur tími`,isStopword:!0,pos:`ao`}],[`á hinn bóginn`,{lemma:`á hinn bógur`,isStopword:!0,pos:`ao`}],[`þar af leiðandi`,{lemma:`þar af leiða`,isStopword:!0,pos:`ao`}],[`þar að auki`,{lemma:`þar að auki`,isStopword:!0,pos:`ao`}],[`það er að segja`,{lemma:`það vera að segja`,isStopword:!0,pos:`ao`}],[`þess vegna`,{lemma:`það vegna`,isStopword:!0,pos:`ao`}],[`því miður`,{lemma:`það lítt`,isStopword:!0,pos:`ao`}],[`þrátt fyrir`,{lemma:`þrátt fyrir`,isStopword:!0,pos:`ao`}],[`á dögunum`,{lemma:`á dagur`,isStopword:!0,pos:`ao`}],[`á sínum tíma`,{lemma:`á sinn tími`,isStopword:!0,pos:`ao`}],[`á endanum`,{lemma:`á endi`,isStopword:!0,pos:`ao`}],[`einu sinni`,{lemma:`einn sinn`,isStopword:!1,pos:`ao`}],[`eitt sinn`,{lemma:`einn sinn`,isStopword:!1,pos:`ao`}],[`í fyrsta sinn`,{lemma:`í fyrstur sinn`,isStopword:!1,pos:`ao`}],[`í kvöld`,{lemma:`í kvöld`,isStopword:!1,pos:`ao`}],[`í morgun`,{lemma:`í morgunn`,isStopword:!1,pos:`ao`}],[`á morgun`,{lemma:`á morgunn`,isStopword:!1,pos:`ao`}],[`fyrir hönd`,{lemma:`fyrir hönd`,isStopword:!1,pos:`fs`}],[`með tilliti til`,{lemma:`með tillit til`,isStopword:!1,pos:`fs`}],[`í ljósi`,{lemma:`í ljós`,isStopword:!1,pos:`fs`}],[`í stað`,{lemma:`í staður`,isStopword:!1,pos:`fs`}],[`fyrir aftan`,{lemma:`fyrir aftan`,isStopword:!1,pos:`fs`}],[`fyrir austan`,{lemma:`fyrir austan`,isStopword:!1,pos:`fs`}],[`fyrir framan`,{lemma:`fyrir framan`,isStopword:!1,pos:`fs`}],[`fyrir handan`,{lemma:`fyrir handan`,isStopword:!1,pos:`fs`}],[`fyrir innan`,{lemma:`fyrir innan`,isStopword:!1,pos:`fs`}],[`fyrir neðan`,{lemma:`fyrir neðan`,isStopword:!1,pos:`fs`}],[`fyrir norðan`,{lemma:`fyrir norðan`,isStopword:!1,pos:`fs`}],[`fyrir ofan`,{lemma:`fyrir ofan`,isStopword:!1,pos:`fs`}],[`fyrir sunnan`,{lemma:`fyrir sunnan`,isStopword:!1,pos:`fs`}],[`fyrir utan`,{lemma:`fyrir utan`,isStopword:!1,pos:`fs`}],[`fyrir vestan`,{lemma:`fyrir vestan`,isStopword:!1,pos:`fs`}],[`í gegnum`,{lemma:`í gegnum`,isStopword:!1,pos:`fs`}],[`í kringum`,{lemma:`í kringum`,isStopword:!1,pos:`fs`}],[`innan við`,{lemma:`innan við`,isStopword:!1,pos:`fs`}],[`upp úr`,{lemma:`upp úr`,isStopword:!1,pos:`fs`}],[`þvert á`,{lemma:`þvert á`,isStopword:!1,pos:`fs`}],[`þar eð`,{lemma:`þar eð`,isStopword:!0,pos:`st`}],[`sameinuðu þjóðirnar`,{lemma:`Sameinuðu þjóðirnar`,isStopword:!1,pos:`entity`}],[`evrópusambandið`,{lemma:`Evrópusambandið`,isStopword:!1,pos:`entity`}],[`nato`,{lemma:`NATO`,isStopword:!1,pos:`entity`}],[`nató`,{lemma:`NATO`,isStopword:!1,pos:`entity`}]]);function H(e,t){for(let n=Math.min(4,e.length-t);n>=2;n--){let r=e.slice(t,t+n).join(` `).toLowerCase(),i=V.get(r);if(i)return{phrase:i,wordCount:n}}return null}function U(e){return V.has(e.toLowerCase())}function W(e){return V.get(e.toLowerCase())}const G=new Set([`word`]),K=new Set([`person`,`company`,`entity`]),q=new Set([`punctuation`,`s_begin`,`s_end`,`s_split`,`unknown`]);function J(t,n,r={}){let{bigrams:i,compoundSplitter:a,includeNumbers:o=!1,alwaysTryCompounds:s=!0}=r,c=e(t),l=[],u=[],d=new Map,f=e=>{let t=e.toLowerCase(),r=d.get(t);if(r)return r;let i=n.lemmatize(e);return d.set(t,i),i};for(let e=0;e<c.length;e++){let t=c[e];if(!q.has(t.kind)){if(K.has(t.kind)){l.push({original:t.text??``,kind:t.kind,lemmas:[],isEntity:!0});continue}if(t.kind===`number`||t.kind===`ordinal`){o&&l.push({original:t.text??``,kind:t.kind,lemmas:[],isEntity:!1});continue}if(G.has(t.kind)){let e=t.text??``,n=f(e),r={original:e,kind:t.kind,lemmas:n,isEntity:!1},i=n.length===1&&n[0]===e.toLowerCase();if(a&&(s||i)){let t=a.split(e);if(t.isCompound){r.compoundSplit=t;let e=t.parts.flatMap(e=>f(e));r.compoundLemmas=e,r.lemmas=[...new Set([...n,...e])]}}l.push(r),u.push({index:l.length-1,token:t});continue}l.push({original:t.text??``,kind:t.kind,lemmas:[],isEntity:!1})}}if(i&&u.length>0){let e=new O(n,i);for(let t=0;t<u.length;t++){let{index:n,token:r}=u[t],i=t>0?u[t-1].token:null,a=t<u.length-1?u[t+1].token:null,o=e.disambiguate(r.text??``,i?.text??null,a?.text??null,{prevLemmas:i?.text?f(i.text):void 0,nextLemmas:a?.text?f(a.text):void 0});l[n].disambiguated=o.lemma,l[n].confidence=o.confidence}}else for(let{index:e}of u){let t=l[e];t.lemmas.length>0&&(t.disambiguated=t.lemmas[0],t.confidence=t.lemmas.length===1?1:.5)}return l}function Y(e,n,r={}){let{removeStopwords:a=!1,indexAllCandidates:o=!0,useContextualStopwords:s=!1}=r,c=J(e,n,r),l=new Set,u=(e,n)=>a?s?i(e,n):t.has(e):!1;for(let e of c)if(!e.isEntity){if(o)for(let t of e.lemmas)u(t)||l.add(t);else e.disambiguated&&(u(e.disambiguated)||l.add(e.disambiguated));if(e.compoundSplit?.isCompound){let t=e.compoundLemmas?e.compoundLemmas:e.compoundSplit.parts.flatMap(e=>n.lemmatize(e));for(let e of t)u(e)||l.add(e)}}return l}function X(e,t,n,r={}){let i=performance.now(),a,o;switch(n){case`naive`:{let n=e.split(/\s+/).filter(e=>e.length>0),r=[];for(let e of n){let n=e.replace(/^[^\p{L}\p{N}]+|[^\p{L}\p{N}]+$/gu,``);if(n){let e=t.lemmatize(n);r.push({original:n,kind:`word`,lemmas:e,isEntity:!1,disambiguated:e[0],confidence:e.length===1?1:.5})}}a=r,o=new Set(r.map(e=>e.disambiguated).filter(Boolean));break}case`tokenized`:a=J(e,t),o=new Set(a.filter(e=>e.kind===`word`&&e.lemmas.length>0).map(e=>e.lemmas[0]));break;case`disambiguated`:a=J(e,t,{bigrams:r.bigrams}),o=Y(e,t,{bigrams:r.bigrams});break;case`full`:a=J(e,t,{bigrams:r.bigrams,compoundSplitter:r.compoundSplitter}),o=Y(e,t,{bigrams:r.bigrams,compoundSplitter:r.compoundSplitter});break}let s=performance.now()-i,c=a.filter(e=>e.kind===`word`),l=c.length,u=c.filter(e=>e.lemmas.length>0&&!(e.lemmas.length===1&&e.lemmas[0]===e.original.toLowerCase())).length,d=c.filter(e=>e.lemmas.length>1).length,f=c.filter(e=>e.confidence!==void 0).map(e=>e.confidence),p=f.length>0?f.reduce((e,t)=>e+t,0)/f.length:0,m=c.filter(e=>e.compoundSplit?.isCompound).length,h=a.filter(e=>e.isEntity).length;return{wordCount:l,lemmatizedCount:u,coverage:l>0?u/l:0,ambiguousCount:d,ambiguityRate:l>0?d/l:0,avgConfidence:p,compoundsFound:m,entitiesSkipped:h,uniqueLemmas:o.size,timeMs:s}}export{d as BinaryLemmatizer,M as CASE_NAMES,r as CONTEXTUAL_STOPWORDS,z as CompoundSplitter,f as DISAMBIGUATION_RULES,O as Disambiguator,N as GENDER_NAMES,g as NOMINATIVE_PRONOUNS,P as NUMBER_NAMES,h as PREPOSITION_CASES,F as PROTECTED_LEMMAS,V as STATIC_PHRASES,t as STOPWORDS_IS,A as WORD_CLASS_NAMES,j as WORD_CLASS_NAMES_IS,x as applyGrammarRules,b as applyNounAfterPrepositionRule,v as applyPrepositionRule,y as applyPronounVerbRule,_ as canGovernCase,B as createKnownLemmaSet,k as extractDisambiguatedLemmas,Y as extractIndexableLemmas,C as getGovernedCases,W as getPhraseInfo,p as getRulesForWord,m as hasDisambiguationRules,i as isContextualStopword,U as isKnownPhrase,S as isKnownPreposition,n as isStopword,H as matchPhrase,J as processText,a as removeStopwords,X as runBenchmark};
2
2
  //# sourceMappingURL=index.mjs.map