lemma-is 0.7.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -28,6 +28,19 @@ hestur, hest, hesti, hests, hestar, hesta, hestum, hestanna...
28
28
 
29
29
  If a user searches "hestur" but your document contains "hestinum", they won't find it—unless you normalize both to the lemma at index time.
30
30
 
31
+ ## Background
32
+
33
+ Icelandic is underserved in the search ecosystem:
34
+
35
+ - **PostgreSQL** has no Icelandic stemmer ([Snowball](https://snowballstem.org/) doesn't support it)
36
+ - **Elasticsearch** has no Icelandic analyzer in its [36 built-in languages](https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-lang-analyzer.html)
37
+ - **Algolia** lists Icelandic but only provides basic plurals—no morphological analysis
38
+ - **Existing Icelandic NLP tools** ([Greynir](https://github.com/mideind/GreynirPackage), [Nefnir](https://github.com/jonfd/nefnir)) are Python-only
39
+
40
+ For comparison, Finnish has [Voikko](https://voikko.puimula.org/) with PostgreSQL and Elasticsearch plugins. Icelandic has had nothing equivalent—until now.
41
+
42
+ lemma-is is the first npm package providing Icelandic lemmatization for search. It embeds the [BÍN](https://bin.arnastofnun.is/) morphological database and runs anywhere JavaScript runs.
43
+
31
44
  ## Why lemma-is?
32
45
 
33
46
  GreynirEngine remains the gold standard for **sentence parsing** and grammatical analysis in Icelandic. But full parsing is not forgiving: if a sentence doesn't parse, you don't get disambiguated lemmas. That makes it a poor fit for messy, real‑world search indexing where recall matters.
package/dist/index.d.mts CHANGED
@@ -1,3 +1,5 @@
1
+ import { Token } from "tokenize-is";
2
+
1
3
  //#region src/stopwords.d.ts
2
4
  /**
3
5
  * Icelandic stopwords for search indexing.
@@ -668,6 +670,12 @@ interface ProcessOptions {
668
670
  * Default: true
669
671
  */
670
672
  alwaysTryCompounds?: boolean;
673
+ /**
674
+ * Strip Icelandic suffixes from unknown words to find base forms.
675
+ * Useful for foreign names: "Simons" → "simon", "Obamas" → "obama".
676
+ * Default: true
677
+ */
678
+ stripUnknownSuffixes?: boolean;
671
679
  }
672
680
  /**
673
681
  * Process text through the full pipeline.
@@ -764,5 +772,14 @@ declare function runBenchmark(text: string, lemmatizer: LemmatizerLike, strategy
764
772
  compoundSplitter?: CompoundSplitter;
765
773
  }): ProcessingMetrics;
766
774
  //#endregion
767
- export { type BigramProvider, type BinaryLemmatizeOptions, BinaryLemmatizer, type BinaryLemmatizerOptions, CASE_NAMES, CONTEXTUAL_STOPWORDS, type CompoundSplit, type CompoundSplitMode, CompoundSplitter, type CompoundSplitterOptions, DISAMBIGUATION_RULES, type DisambiguatedToken, type DisambiguationRule, Disambiguator, type DisambiguatorOptions, GENDER_NAMES, type GrammarLemmatizerLike, type GrammarRuleMatch, type GrammaticalCase, type GrammaticalGender, type GrammaticalNumber, type KnownLemmaFilterOptions, type KnownLemmaLookup, type LemmaWithMorph, type LemmaWithPOS, type LemmatizerLike, type MorphFeatures, NOMINATIVE_PRONOUNS, NUMBER_NAMES, PREPOSITION_CASES, PROTECTED_LEMMAS, type ProcessOptions, type ProcessedToken, type ProcessingMetrics, type ProcessingStrategy, STATIC_PHRASES, STOPWORDS_IS, type SearchQueryOptions, type SearchQueryResult, type StaticPhrase, WORD_CLASS_NAMES, WORD_CLASS_NAMES_IS, type WordClass, applyGrammarRules, applyNounAfterPrepositionRule, applyPrepositionRule, applyPronounVerbRule, buildSearchQuery, canGovernCase, createKnownLemmaFilter, createKnownLemmaSet, extractDisambiguatedLemmas, extractIndexableLemmas, getGovernedCases, getPhraseInfo, getRulesForWord, hasDisambiguationRules, isContextualStopword, isKnownPhrase, isKnownPreposition, isStopword, matchPhrase, processText, removeStopwords, runBenchmark };
775
+ //#region src/normalizers.d.ts
776
+ /**
777
+ * Normalize a token to indexable string values.
778
+ *
779
+ * @param token - Token from tokenize-is
780
+ * @returns Array of normalized strings for indexing (may be empty)
781
+ */
782
+ declare function normalizeToken(token: Token): string[];
783
+ //#endregion
784
+ export { type BigramProvider, type BinaryLemmatizeOptions, BinaryLemmatizer, type BinaryLemmatizerOptions, CASE_NAMES, CONTEXTUAL_STOPWORDS, type CompoundSplit, type CompoundSplitMode, CompoundSplitter, type CompoundSplitterOptions, DISAMBIGUATION_RULES, type DisambiguatedToken, type DisambiguationRule, Disambiguator, type DisambiguatorOptions, GENDER_NAMES, type GrammarLemmatizerLike, type GrammarRuleMatch, type GrammaticalCase, type GrammaticalGender, type GrammaticalNumber, type KnownLemmaFilterOptions, type KnownLemmaLookup, type LemmaWithMorph, type LemmaWithPOS, type LemmatizerLike, type MorphFeatures, NOMINATIVE_PRONOUNS, NUMBER_NAMES, PREPOSITION_CASES, PROTECTED_LEMMAS, type ProcessOptions, type ProcessedToken, type ProcessingMetrics, type ProcessingStrategy, STATIC_PHRASES, STOPWORDS_IS, type SearchQueryOptions, type SearchQueryResult, type StaticPhrase, WORD_CLASS_NAMES, WORD_CLASS_NAMES_IS, type WordClass, applyGrammarRules, applyNounAfterPrepositionRule, applyPrepositionRule, applyPronounVerbRule, buildSearchQuery, canGovernCase, createKnownLemmaFilter, createKnownLemmaSet, extractDisambiguatedLemmas, extractIndexableLemmas, getGovernedCases, getPhraseInfo, getRulesForWord, hasDisambiguationRules, isContextualStopword, isKnownPhrase, isKnownPreposition, isStopword, matchPhrase, normalizeToken, processText, removeStopwords, runBenchmark };
768
785
  //# sourceMappingURL=index.d.mts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.mts","names":[],"sources":["../src/stopwords.ts","../src/types.ts","../src/binary-lemmatizer.ts","../src/disambiguate.ts","../src/disambiguation-rules.ts","../src/mini-grammar.ts","../src/bloom.ts","../src/compounds.ts","../src/phrases.ts","../src/pipeline.ts"],"mappings":";;AAUA;;;;;AA4EA;;cA5Ea,YAAA,EAAY,GAAA;;;AA0FzB;iBAdgB,UAAA,CAAW,IAAA;;;;AAuD3B;;;;;AAiBA;;cA1Da,oBAAA,EAAsB,GAAA,SAAY,GAAA;;;;;;;;;;;iBAyC/B,oBAAA,CAAqB,KAAA,UAAe,GAAA;;;;iBAiBpC,eAAA,kBAAA,CAAkC,KAAA,EAAO,CAAA,KAAM,CAAA;;;;AApJ/D;;;;;AA4EA;;;;KC3EY,SAAA;ADyFZ;;;AAAA,cC1Ea,gBAAA,EAAkB,MAAA,CAAO,SAAA;;ADmHtC;;cCnGa,mBAAA,EAAqB,MAAA,CAAO,SAAA;;;ADoHzC;KCpGY,eAAA;;;;KAKA,iBAAA;;;;KAKA,iBAAA;;;;cAKC,UAAA,EAAY,MAAA,CAAO,eAAA;;;;cAUnB,YAAA,EAAc,MAAA,CAAO,iBAAA;AAzDlC;;;AAAA,cAkEa,YAAA,EAAc,MAAA,CAAO,iBAAA;;AAlDlC;;UA0DiB,aAAA;EACf,IAAA,GAAO,eAAA;EACP,MAAA,GAAS,iBAAA;EACT,MAAA,GAAS,iBAAA;AAAA;;;;UAMM,YAAA;EACf,KAAA;EACA,GAAA,EAAK,SAAA;AAAA;;AA3CP;;UAiDiB,cAAA,SAAuB,YAAA;EACtC,KAAA,GAAQ,aAAA;AAAA;AA7CV;;;;AAAA,UAoDiB,cAAA;EACf,SAAA,CAAU,IAAA;EACV,gBAAA,EAAkB,IAAA,WAAe,YAAA;AAAA;;;AAnCnC;;UA0CiB,cAAA;EACf,IAAA,CAAK,KAAA,UAAe,KAAA;AAAA;;;UCjEL,uBAAA;EACf,KAAA,UAAe,KAAA;AAAA;AAAA,UAGA,sBAAA;EACf,SAAA,GAAY,SAAA;AAAA;AAAA,cAGD,gBAAA,YAA4B,cAAA,EAAgB,cAAA;EAAA,QAC/C,MAAA;EAAA,QACA,UAAA;EAAA,QACA,YAAA;EAAA,QACA,YAAA;EAAA,QACA,WAAA;EAAA,QACA,WAAA;EAAA,QACA,YAAA;EAAA,QACA,OAAA;EAAA,QACA,eAAA;EAAA,QACA,eAAA;EAAA,QACA,eAAA;EAAA,QACA,eAAA;EAAA,QACA,WAAA;EAAA,QAEA,UAAA;EAAA,QACA,SAAA;EAAA,QACA,UAAA;EAAA,QACA,WAAA;EAAA,QACA,OAAA;EAAA,QAEA,OAAA;EAAA,QAED,WAAA,CAAA;ED3EsC;AAgB/C;;EAhB+C,OCiKhC,IAAA,CACX,GAAA,UACA,OAAA,GAAS,uBAAA,GACR,OAAA,CAAQ,gBAAA;EDpJqB;;AAgBlC;EAhBkC,OCmKzB,cAAA,CAAe,MAAA,EAAQ,WAAA,GAAc,gBAAA;;;;UAOpC,SAAA;EDrJmB;;;EAAA,QC4JnB,QAAA;EDvJE;;;EAAA,QC8JF,OAAA;ED9JmB;AAK7B;;;EAL6B,QCsKnB,QAAA;EDjKqC;AAU/C;;;;ECiLE,SAAA,CAAU,IAAA,UAAc,OAAA,GAAS,sBAAA;EDxKtB;;;;;EAAA,QCkNH,WAAA;ED1MoB;;;;ECwO5B,gBAAA,CAAiB,IAAA,WAAe,YAAA;EDrON;;;;ECqQ1B,kBAAA,CAAmB,IAAA,WAAe,cAAA;EDtQzB;;;EC4ST,gBAAA,CAAA;ED3S0B;AAM5B;;EC4SE,UAAA,CAAA;ED1Sc;;;EAAA,QCiTN,UAAA;EDjTM;;AAMhB;;ECmVE,UAAA,CAAW,KAAA,UAAe,KAAA;EDnVwB;;;;EC4VlD,IAAA,CAAK,KAAA,UAAe,KAAA;ED3VC;AAOvB;;EC2VE,OAAA,CAAQ,IAAA;EDzVqC;;;EAAA,ICgWzC,eAAA,CAAA;EDhWc;;;EAAA,ICuWd,aAAA,CAAA;EDhWW;;;EAAA,ICuWX,gBAAA,CAAA;EDtWJ;;;EAAA,IC6WI,UAAA,CAAA;ED7W6B;;;;ECqXjC,YAAA,CAAA;AAAA;;;UCxee,oBAAA;EHyHD;EGvHd,UAAA;;EAEA,WAAA;EHqH8D;EGnH9D,kBAAA;EHoI6B;EGlI7B,eAAA;AAAA;AAAA,UAGe,kBAAA;EH+HwC;EG7HvD,KAAA;EH6H6D;EG3H7D,KAAA;EH2H8D;EGzH9D,GAAA,GAAM,SAAA;;EAEN,UAAA;EF5BU;EE8BV,iBAAA,GAAoB,YAAA;;EAEpB,SAAA;EFhCmB;EEkCnB,UAAA;EFRD;EEUC,UAAA;AAAA;;AFLF;;UEWU,mBAAA,SAA4B,cAAA;EACpC,kBAAA,EAAoB,IAAA,WAAe,cAAA;AAAA;AAAA,UAuBpB,yBAAA;EACf,UAAA;EACA,UAAA;AAAA;;AFhBF;;cE6Pa,aAAA;EACX,UAAA,EAAY,mBAAA;EACZ,OAAA,EAAS,cAAA;EACT,UAAA;EACA,WAAA;EACA,kBAAA;EACA,eAAA;EAAA,QACQ,UAAA;cAGN,UAAA,EAAY,cAAA,EACZ,OAAA,GAAS,cAAA,SACT,OAAA,GAAS,oBAAA;EAAA,QAWH,QAAA;;;;AFhQV;;;;EEiRE,YAAA,CACE,IAAA,UACA,QAAA,iBACA,QAAA,iBACA,IAAA,GAAM,yBAAA,GACL,kBAAA;EF7QQ;;;;;AAQb;EEuUE,eAAA,CAAgB,MAAA,aAAmB,kBAAA;;;;;;;EAoBnC,aAAA,CAAc,MAAA,aAAmB,GAAA;AAAA;;;;iBAenB,0BAAA,CACd,IAAA,UACA,UAAA,EAAY,cAAA,EACZ,OAAA,EAAS,cAAA,EACT,OAAA;EACE,QAAA,IAAY,IAAA;EACZ,eAAA;AAAA,IAED,GAAA;;;;;;;AHlXH;;UIrFiB,kBAAA;EJqFkB;EInFjC,IAAA;EJ4Hc;EI1Hd,MAAA,EAAQ,SAAA;;EAER,IAAA,EAAM,SAAA;EJwHwD;EItH9D,OAAA;EJuI6B;EIrI7B,WAAA;AAAA;;;;;;;;;;cAYW,oBAAA,EAAsB,kBAAA;;;;iBA6InB,eAAA,CAAgB,IAAA,WAAe,kBAAA;AHxJ/C;;;AAAA,iBGgKgB,sBAAA,CAAuB,IAAA;;;AJtFvC;;;AAAA,UK/EiB,qBAAA;EACf,gBAAA,EAAkB,IAAA,WAAe,YAAA;AAAA;;;;;ALwInC;;;;;cK5Ha,iBAAA,EAAmB,GAAA,SAAY,GAAA,CAAI,eAAA;;;;;;cA+CnC,mBAAA,EAAmB,GAAA;;AJtEhC;;UIsFiB,gBAAA;EJtFI;EIwFnB,KAAA;EJzEW;EI2EX,GAAA,EAAK,SAAA;;EAEL,IAAA;EJ7E6C;EI+E7C,UAAA;AAAA;;;;AJ/CF;;;;iBIyDgB,aAAA,CACd,SAAA,UACA,YAAA,EAAc,eAAA;AJtDhB;;;;;AAKA;;;;;AALA,iBIuEgB,oBAAA,CACd,UAAA,EAAY,cAAA,IACZ,aAAA,EAAe,cAAA,KACd,gBAAA;;;;;AJtDH;;;;;AASA;iBI6EgB,oBAAA,CACd,UAAA,EAAY,cAAA,IACZ,QAAA,kBACC,gBAAA;;;;AJxEH;;;;;;;;;;;;;;;;iBIqHgB,6BAAA,CACd,UAAA,EAAY,cAAA,IACZ,QAAA,iBACA,UAAA,EAAY,qBAAA,UACX,gBAAA;;;;;;;;;AJxGH;;;;;;iBI8JgB,iBAAA,CACd,UAAA,EAAY,cAAA,IACZ,QAAA,iBACA,aAAA,EAAe,cAAA,IACf,UAAA,GAAY,qBAAA,UACX,gBAAA;;;AJ3JH;iBI8KgB,kBAAA,CAAmB,KAAA;;;;iBAOnB,gBAAA,CAAiB,SAAA,WAAoB,GAAA,CAAI,eAAA;;;;ALxSzD;;UMNiB,kBAAA;EACf,iBAAA;EACA,gBAAA;AAAA;;;;ANuIF;;;cOxHa,gBAAA,EAAgB,GAAA;AAAA,UAoFZ,aAAA;EPqDD;EOnDd,IAAA;EPmD6B;EOjD7B,KAAA;EPiD8B;EO/C9B,UAAA;EP+CgD;EO7ChD,UAAA;EP6C8D;EO3C9D,UAAA;AAAA;;;ANxGF;;;;;KMkHY,iBAAA;AAAA,UAEK,uBAAA;;;;ANrFjB;EM0FE,aAAA;;EAEA,iBAAA;EN5FgD;AAgBlD;;;EMiFE,IAAA,GAAO,iBAAA;AAAA;AAAA,cA8EI,gBAAA;EAAA,QACH,UAAA;EAAA,QACA,aAAA;EAAA,QACA,iBAAA;EAAA,QACA,WAAA;EAAA,QACA,IAAA;cAGN,UAAA,EAAY,cAAA,EACZ,WAAA,EAAa,gBAAA,EACb,OAAA,GAAS,uBAAA;;;;UAYH,OAAA;ENjKT;;;;AAKD;;;;EM8KE,KAAA,CAAM,IAAA,WAAe,aAAA;ENrKV;;;EAAA,QMkRH,aAAA;EAAA,QAwBA,QAAA;ENlSO;;;;EMsWf,YAAA,CAAa,IAAA;AAAA;;;;;iBAUC,mBAAA,CAAoB,MAAA,aAAmB,GAAA;AAAA,UAItC,gBAAA;EACf,GAAA,CAAI,KAAA;AAAA;AAAA,UAGW,uBAAA,SAAgC,kBAAA;AN/WjD;;;;AAAA,iBMqXgB,sBAAA,CACd,MAAA,YACA,OAAA,GAAS,uBAAA,GACR,gBAAA;;;;AP3dH;;;;;AA4EA;;;;;AAcA;UQvFiB,YAAA;;EAEf,KAAA;ERqFgD;EQnFhD,UAAA;ER4HkC;EQ1HlC,GAAA;AAAA;;AR2IF;;;cQpIa,cAAA,EAAgB,GAAA,SAAY,YAAA;;;;;iBA6GzB,WAAA,CACd,KAAA,YACA,UAAA;EACG,MAAA,EAAQ,YAAA;EAAc,SAAA;AAAA;;AP/H3B;;iBO+IgB,aAAA,CAAc,IAAA;;;APhI9B;iBOuIgB,aAAA,CAAc,IAAA,WAAe,YAAA;;;;;;UCzG5B,cAAA;ETyEf;ESvEA,QAAA;ET0CiC;ESxCjC,IAAA;ETiFc;ES/Ed,MAAA;;EAEA,QAAA;ET6E8D;ES3E9D,aAAA;ET4F6B;ES1F7B,UAAA;ET0F8D;ESxF9D,aAAA,GAAgB,aAAA;ETwFuC;EStFvD,cAAA;AAAA;;;;UAMe,cAAA;;EAEf,OAAA,GAAU,cAAA;ERrES;EQuEnB,gBAAA,GAAmB,gBAAA;ERvEA;EQyEnB,eAAA;ER1DW;;;;;AAgBb;EQiDE,sBAAA;;EAEA,cAAA;ERnDgD;AAgBlD;;;;;EQ0CE,kBAAA;ERrC2B;;;;AAK7B;;EQuCE,kBAAA;AAAA;;ARlCF;;;;;AAUA;;iBQmCgB,WAAA,CACd,IAAA,UACA,UAAA,EAAY,cAAA,EACZ,OAAA,GAAS,cAAA,GACR,cAAA;;;AR9BH;;;;;AAQA;iBQwNgB,sBAAA,CACd,IAAA,UACA,UAAA,EAAY,cAAA,EACZ,OAAA,GAAS,cAAA,GACR,GAAA;;;;UAiEc,kBAAA,SAA2B,cAAA;ER1RhB;EQ4R1B,WAAA;ER9RA;EQgSA,UAAA;ER/RA;EQiSA,UAAA;ERhSA;;;;AAMF;EQgSE,eAAA;;EAEA,iBAAA;AAAA;;;;UAMe,iBAAA;ERhSA;EQkSf,MAAA;;EAEA,KAAA;AAAA;;;;;AR5RF;;;;;;;iBQ0SgB,gBAAA,CACd,IAAA,UACA,UAAA,EAAY,cAAA,EACZ,OAAA,GAAS,kBAAA,GACR,iBAAA;;;;KAwES,kBAAA;;;;UAKK,iBAAA;ERjXV;EQmXL,SAAA;ERnXiC;EQqXjC,eAAA;;EAEA,QAAA;;EAEA,cAAA;EP1bsC;EO4btC,aAAA;EP3bA;EO6bA,aAAA;EP1be;EO4bf,cAAA;;EAEA,eAAA;EP7bqB;EO+brB,YAAA;EP5b4B;EO8b5B,MAAA;AAAA;;;;iBAMc,YAAA,CACd,IAAA,UACA,UAAA,EAAY,cAAA,EACZ,QAAA,EAAU,kBAAA,EACV,SAAA;EACE,OAAA,GAAU,cAAA;EACV,gBAAA,GAAmB,gBAAA;AAAA,IAEpB,iBAAA"}
1
+ {"version":3,"file":"index.d.mts","names":[],"sources":["../src/stopwords.ts","../src/types.ts","../src/binary-lemmatizer.ts","../src/disambiguate.ts","../src/disambiguation-rules.ts","../src/mini-grammar.ts","../src/bloom.ts","../src/compounds.ts","../src/phrases.ts","../src/pipeline.ts","../src/normalizers.ts"],"mappings":";;;;;;AAUA;;;;;cAAa,YAAA,EAAY,GAAA;;;;iBA4ET,UAAA,CAAW,IAAA;AAc3B;;;;;AAyCA;;;;;AAzCA,cAAa,oBAAA,EAAsB,GAAA,SAAY,GAAA;;;;;;;;;;;iBAyC/B,oBAAA,CAAqB,KAAA,UAAe,GAAA;;AClIpD;;iBDmJgB,eAAA,kBAAA,CAAkC,KAAA,EAAO,CAAA,KAAM,CAAA;;;;;;AApJ/D;;;;;AA4EA;;AA5EA,KCCY,SAAA;;;ADyFZ;cC1Ea,gBAAA,EAAkB,MAAA,CAAO,SAAA;;;;cAgBzB,mBAAA,EAAqB,MAAA,CAAO,SAAA;;;;KAgB7B,eAAA;ADoGZ;;;AAAA,KC/FY,iBAAA;;;;KAKA,iBAAA;;;;cAKC,UAAA,EAAY,MAAA,CAAO,eAAA;;AA9DhC;;cAwEa,YAAA,EAAc,MAAA,CAAO,iBAAA;;;AAzDlC;cAkEa,YAAA,EAAc,MAAA,CAAO,iBAAA;;;;UAQjB,aAAA;EACf,IAAA,GAAO,eAAA;EACP,MAAA,GAAS,iBAAA;EACT,MAAA,GAAS,iBAAA;AAAA;AA7CX;;;AAAA,UAmDiB,YAAA;EACf,KAAA;EACA,GAAA,EAAK,SAAA;AAAA;;;;UAMU,cAAA,SAAuB,YAAA;EACtC,KAAA,GAAQ,aAAA;AAAA;;;AA7CV;;UAoDiB,cAAA;EACf,SAAA,CAAU,IAAA;EACV,gBAAA,EAAkB,IAAA,WAAe,YAAA;AAAA;;;;;UAOlB,cAAA;EACf,IAAA,CAAK,KAAA,UAAe,KAAA;AAAA;;;UCjEL,uBAAA;EACf,KAAA,UAAe,KAAA;AAAA;AAAA,UAGA,sBAAA;EACf,SAAA,GAAY,SAAA;AAAA;AAAA,cAGD,gBAAA,YAA4B,cAAA,EAAgB,cAAA;EAAA,QAC/C,MAAA;EAAA,QACA,UAAA;EAAA,QACA,YAAA;EAAA,QACA,YAAA;EAAA,QACA,WAAA;EAAA,QACA,WAAA;EAAA,QACA,YAAA;EAAA,QACA,OAAA;EAAA,QACA,eAAA;EAAA,QACA,eAAA;EAAA,QACA,eAAA;EAAA,QACA,eAAA;EAAA,QACA,WAAA;EAAA,QAEA,UAAA;EAAA,QACA,SAAA;EAAA,QACA,UAAA;EAAA,QACA,WAAA;EAAA,QACA,OAAA;EAAA,QAEA,OAAA;EAAA,QAED,WAAA,CAAA;;;;SAsFM,IAAA,CACX,GAAA,UACA,OAAA,GAAS,uBAAA,GACR,OAAA,CAAQ,gBAAA;EDzIZ;;;EAAA,OCwJQ,cAAA,CAAe,MAAA,EAAQ,WAAA,GAAc,gBAAA;EDnJlC;;;EAAA,QC0JF,SAAA;ED1JiB;AAK3B;;EAL2B,QCiKjB,QAAA;ED5JmB;;AAK7B;EAL6B,QCmKnB,OAAA;;;;ADzJV;UCiKU,QAAA;;;;ADvJV;;ECiLE,SAAA,CAAU,IAAA,UAAc,OAAA,GAAS,sBAAA;EDjLR;;AAS3B;;;EAT2B,QC2NjB,WAAA;EDlNyC;AAQnD;;;ECwOE,gBAAA,CAAiB,IAAA,WAAe,YAAA;EDtOvB;;;;ECsQT,kBAAA,CAAmB,IAAA,WAAe,cAAA;EDvQ3B;;;EC6SP,gBAAA,CAAA;ED3SS;;;ECkTT,UAAA,CAAA;ED5S2B;;;EAAA,QCmTnB,UAAA;EDjTR;;;;ECyVA,UAAA,CAAW,KAAA,UAAe,KAAA;EDnVI;;;;EC4V9B,IAAA,CAAK,KAAA,UAAe,KAAA;ED3VZ;;;ECkWR,OAAA,CAAQ,IAAA;ED3VqB;;;EAAA,ICkWzB,eAAA,CAAA;EDjWM;;;EAAA,ICwWN,aAAA,CAAA;EDvWyC;;AAO/C;EAP+C,IC8WzC,gBAAA,CAAA;;;;MAOA,UAAA,CAAA;ED7WgB;;;;ECqXpB,YAAA,CAAA;AAAA;;;UCxee,oBAAA;EHgFkB;EG9EjC,UAAA;EHuHc;EGrHd,WAAA;;EAEA,kBAAA;EHmH8D;EGjH9D,eAAA;AAAA;AAAA,UAGe,kBAAA;EH+H+C;EG7H9D,KAAA;EH6HuD;EG3HvD,KAAA;EH2H6D;EGzH7D,GAAA,GAAM,SAAA;EHyHwD;EGvH9D,UAAA;;EAEA,iBAAA,GAAoB,YAAA;EF9BV;EEgCV,SAAA;;EAEA,UAAA;EFlCmB;EEoCnB,UAAA;AAAA;;;;UAMQ,mBAAA,SAA4B,cAAA;EACpC,kBAAA,EAAoB,IAAA,WAAe,cAAA;AAAA;AAAA,UAuBpB,yBAAA;EACf,UAAA;EACA,UAAA;AAAA;;;;cA6OW,aAAA;EACX,UAAA,EAAY,mBAAA;EACZ,OAAA,EAAS,cAAA;EACT,UAAA;EACA,WAAA;EACA,kBAAA;EACA,eAAA;EAAA,QACQ,UAAA;cAGN,UAAA,EAAY,cAAA,EACZ,OAAA,GAAS,cAAA,SACT,OAAA,GAAS,oBAAA;EAAA,QAWH,QAAA;EF1QG;;;;;AAUb;;EEiRE,YAAA,CACE,IAAA,UACA,QAAA,iBACA,QAAA,iBACA,IAAA,GAAM,yBAAA,GACL,kBAAA;EFtRsB;;AAS3B;;;;EE+UE,eAAA,CAAgB,MAAA,aAAmB,kBAAA;EFvUpB;;;;;;EE2Vf,aAAA,CAAc,MAAA,aAAmB,GAAA;AAAA;;;;iBAenB,0BAAA,CACd,IAAA,UACA,UAAA,EAAY,cAAA,EACZ,OAAA,EAAS,cAAA,EACT,OAAA;EACE,QAAA,IAAY,IAAA;EACZ,eAAA;AAAA,IAED,GAAA;;;;AHhYH;;;;;UIvEiB,kBAAA;EJkHf;EIhHA,IAAA;EJmFiC;EIjFjC,MAAA,EAAQ,SAAA;EJ0HM;EIxHd,IAAA,EAAM,SAAA;;EAEN,OAAA;EJsH8D;EIpH9D,WAAA;AAAA;;;;;;;;;;cAYW,oBAAA,EAAsB,kBAAA;;AH1BnC;;iBGuKgB,eAAA,CAAgB,IAAA,WAAe,kBAAA;;;AHxJ/C;iBGgKgB,sBAAA,CAAuB,IAAA;;;;;AJtFvC;UK/EiB,qBAAA;EACf,gBAAA,EAAkB,IAAA,WAAe,YAAA;AAAA;;ALuHnC;;;;;AAiBA;;;cK5Ha,iBAAA,EAAmB,GAAA,SAAY,GAAA,CAAI,eAAA;;;;;;cA+CnC,mBAAA,EAAmB,GAAA;;;;UAgBf,gBAAA;EJtFI;EIwFnB,KAAA;EJxFmB;EI0FnB,GAAA,EAAK,SAAA;EJ3EM;EI6EX,IAAA;;EAEA,UAAA;AAAA;AJ/DF;;;;;AAgBA;;AAhBA,iBIyEgB,aAAA,CACd,SAAA,UACA,YAAA,EAAc,eAAA;;;AJtDhB;;;;;AAKA;;;iBIkEgB,oBAAA,CACd,UAAA,EAAY,cAAA,IACZ,aAAA,EAAe,cAAA,KACd,gBAAA;;AJhEH;;;;;AAUA;;;;iBIsFgB,oBAAA,CACd,UAAA,EAAY,cAAA,IACZ,QAAA,kBACC,gBAAA;AJhFH;;;;;AAQA;;;;;;;;;;;;;;AARA,iBI6HgB,6BAAA,CACd,UAAA,EAAY,cAAA,IACZ,QAAA,iBACA,UAAA,EAAY,qBAAA,UACX,gBAAA;;AJhHH;;;;;;;;;AAQA;;;;iBI8JgB,iBAAA,CACd,UAAA,EAAY,cAAA,IACZ,QAAA,iBACA,aAAA,EAAe,cAAA,IACf,UAAA,GAAY,qBAAA,UACX,gBAAA;;;;iBAmBa,kBAAA,CAAmB,KAAA;AJ9KnC;;;AAAA,iBIqLgB,gBAAA,CAAiB,SAAA,WAAoB,GAAA,CAAI,eAAA;;;;;;UC9SxC,kBAAA;EACf,iBAAA;EACA,gBAAA;AAAA;;;;;;ANuIF;cOxHa,gBAAA,EAAgB,GAAA;AAAA,UAoFZ,aAAA;EPoCoB;EOlCnC,IAAA;EPmDc;EOjDd,KAAA;EPiD6B;EO/C7B,UAAA;EP+C8B;EO7C9B,UAAA;EP6CgD;EO3ChD,UAAA;AAAA;;;;;ANxGF;;;KMkHY,iBAAA;AAAA,UAEK,uBAAA;ENrGJ;;;;EM0GX,aAAA;EN1FW;EM4FX,iBAAA;;;;AN5EF;EMiFE,IAAA,GAAO,iBAAA;AAAA;AAAA,cA8EI,gBAAA;EAAA,QACH,UAAA;EAAA,QACA,aAAA;EAAA,QACA,iBAAA;EAAA,QACA,WAAA;EAAA,QACA,IAAA;cAGN,UAAA,EAAY,cAAA,EACZ,WAAA,EAAa,gBAAA,EACb,OAAA,GAAS,uBAAA;EN/JD;;;EAAA,QM2KF,OAAA;EN3KmB;AAK7B;;;;;AAUA;;EM8KE,KAAA,CAAM,IAAA,WAAe,aAAA;EN9KI;;AAS3B;EAT2B,QM2RjB,aAAA;EAAA,QAwBA,QAAA;EN1SiB;;AAQ3B;;EMsWE,YAAA,CAAa,IAAA;AAAA;;;;;iBAUC,mBAAA,CAAoB,MAAA,aAAmB,GAAA;AAAA,UAItC,gBAAA;EACf,GAAA,CAAI,KAAA;AAAA;AAAA,UAGW,uBAAA,SAAgC,kBAAA;;;AN/WjD;;iBMqXgB,sBAAA,CACd,MAAA,YACA,OAAA,GAAS,uBAAA,GACR,gBAAA;;;;;;AP3dH;;;;;AA4EA;;;;UQzEiB,YAAA;ERuFJ;EQrFX,KAAA;;EAEA,UAAA;ERmFgD;EQjFhD,GAAA;AAAA;;;;AR2IF;cQpIa,cAAA,EAAgB,GAAA,SAAY,YAAA;;;;;iBA6GzB,WAAA,CACd,KAAA,YACA,UAAA;EACG,MAAA,EAAQ,YAAA;EAAc,SAAA;AAAA;;;;iBAgBX,aAAA,CAAc,IAAA;;;;iBAOd,aAAA,CAAc,IAAA,WAAe,YAAA;;;AR3E7C;;;AAAA,USlBiB,cAAA;ETkBsB;EShBrC,QAAA;ET2DA;ESzDA,IAAA;ET4BiC;ES1BjC,MAAA;ETmEc;ESjEd,QAAA;;EAEA,aAAA;ET+D8D;ES7D9D,UAAA;ET8E6B;ES5E7B,aAAA,GAAgB,aAAA;ET4E8C;ES1E9D,cAAA;AAAA;;;;UAMe,cAAA;;EAEf,OAAA,GAAU,cAAA;;EAEV,gBAAA,GAAmB,gBAAA;ERnFA;EQqFnB,eAAA;ERrFmB;;AAerB;;;;EQ6EE,sBAAA;ER7DW;EQ+DX,cAAA;;;;AR/CF;;;EQsDE,kBAAA;ERtDyB;AAK3B;;;;;EQwDE,kBAAA;ERnD2B;;;;AAK7B;EQoDE,oBAAA;AAAA;;;AR1CF;;;;;AASA;iBQ4CgB,WAAA,CACd,IAAA,UACA,UAAA,EAAY,cAAA,EACZ,OAAA,GAAS,cAAA,GACR,cAAA;;;;ARxCH;;;;;iBQ+QgB,sBAAA,CACd,IAAA,UACA,UAAA,EAAY,cAAA,EACZ,OAAA,GAAS,cAAA,GACR,GAAA;;;;UAiEc,kBAAA,SAA2B,cAAA;ERlV1C;EQoVA,WAAA;ERnVA;EQqVA,UAAA;ERrV0B;EQuV1B,UAAA;ERjVe;;;;;EQuVf,eAAA;ERrVK;EQuVL,iBAAA;AAAA;ARjVF;;;AAAA,UQuViB,iBAAA;ERvVuB;EQyVtC,MAAA;ERxVQ;EQ0VR,KAAA;AAAA;ARnVF;;;;;;;;;;;AAAA,iBQiWgB,gBAAA,CACd,IAAA,UACA,UAAA,EAAY,cAAA,EACZ,OAAA,GAAS,kBAAA,GACR,iBAAA;;;;KAwES,kBAAA;;;;UAKK,iBAAA;;EAEf,SAAA;;EAEA,eAAA;EP7esC;EO+etC,QAAA;EP9eA;EOgfA,cAAA;EP7ee;EO+ef,aAAA;;EAEA,aAAA;EPhfqB;EOkfrB,cAAA;EP/e4B;EOif5B,eAAA;EPlYW;EOoYX,YAAA;EPnYG;EOqYH,MAAA;AAAA;;;;iBAMc,YAAA,CACd,IAAA,UACA,UAAA,EAAY,cAAA,EACZ,QAAA,EAAU,kBAAA,EACV,SAAA;EACE,OAAA,GAAU,cAAA;EACV,gBAAA,GAAmB,gBAAA;AAAA,IAEpB,iBAAA;;;;AT3fH;;;;;iBUvEgB,cAAA,CAAe,KAAA,EAAO,KAAA"}
package/dist/index.mjs CHANGED
@@ -1,2 +1,2 @@
1
- import{tokenize as e}from"tokenize-is";const t=new Set(`á.að.aðra.aðrar.aðrir.af.alla.allan.allar.allir.allnokkra.allnokkrar.allnokkrir.allnokkru.allnokkrum.allnokkuð.allnokkur.allnokkurn.allnokkurra.allnokkurrar.allnokkurri.allnokkurs.allnokkurt.allra.allrar.allri.alls.allt.alltað.allur.án.andspænis.annað.annaðhvort.annan.annar.annarra.annarrar.annarri.annars.árla.ásamt.auk.austan.austanundir.austur.báða.báðar.báðir.báðum.bæði.bak.beggja.eða.eður.ef.eftir.ég.ein.eina.einar.einhver.einhverja.einhverjar.einhverjir.einhverju.einhverjum.einhvern.einhverra.einhverrar.einhverri.einhvers.einir.einn.einna.einnar.einni.eins.einskis.einu.einum.eitt.eitthvað.eitthvert.ekkert.ella.ellegar.en.enda.enga.engan.engar.engin.enginn.engir.engra.engrar.engri.engu.engum.er.fáein.fáeina.fáeinar.fáeinir.fáeinna.fáeinum.fjær.fjarri.flestalla.flestallan.flestallar.flestallir.flestallra.flestallrar.flestallri.flestalls.flestallt.flestallur.flestöll.flestöllu.flestöllum.frá.fram.fyrir.fyrst.gagnstætt.gagnvart.gegn.gegnt.gegnum.hana.handa.handan.hann.hans.heldur.hennar.henni.hið.hin.hina.hinar.hinir.hinn.hinna.hinnar.hinni.hins.hinu.hinum.hitt.hjá.honum.hún.hvað.hvaða.hvenær.hver.hverja.hverjar.hverjir.hverju.hverjum.hvern.hverra.hverrar.hverri.hvers.hvert.hvílík.hvílíka.hvílíkan.hvílíkar.hvílíkir.hvílíkra.hvílíkrar.hvílíkri.hvílíks.hvílíkt.hvílíku.hvílíkum.hvílíkur.hvor.hvora.hvorar.hvorir.hvorki.hvorn.hvorra.hvorrar.hvorri.hvors.hvort.hvoru.hvorug.hvoruga.hvorugan.hvorugar.hvorugir.hvorugra.hvorugrar.hvorugri.hvorugs.hvorugt.hvorugu.hvorugum.hvorugur.hvorum.í.inn.innan.innanundir.jafnframt.jafnhliða.kring.kringum.með.meðal.meðan.meður.mér.mestalla.mestallan.mestallar.mestallir.mestallra.mestallrar.mestallri.mestalls.mestallt.mestallur.mestöll.mestöllu.mestöllum.miðli.mig.milli.millum.mín.mína.mínar.mínir.minn.minna.minnar.minni.míns.mínu.mínum.mitt.mót.móti.nær.nærri.næst.næstum.nálægt.né.neðan.nein.neina.neinar.neinir.neinn.neinna.neinnar.neinni.neins.neinu.neinum.neitt.nema.niður.nokkra.nokkrar.nokkrir.nokkru.nokkrum.nokkuð.nokkur.nokkurn.nokkurra.nokkurrar.nokkurri.nokkurs.nokkurt.norðan.nú.öðru.öðrum.of.ofan.ofar.og.óháð.okkar.okkur.öll.öllu.öllum.önnur.órafjarri.oss.sá.sakir.sama.saman.samar.samfara.samhliða.sami.samir.samkvæmt.samra.samrar.samri.sams.samskipa.samt.samtímis.samur.sem.sér.sérhvað.sérhver.sérhverja.sérhverjar.sérhverjir.sérhverju.sérhverjum.sérhvern.sérhverra.sérhverrar.sérhverri.sérhvers.sérhvert.síðan.síðla.sig.sín.sína.sínar.sínhver.sínhverja.sínhverjar.sínhverjir.sínhverju.sínhverjum.sínhvern.sínhverra.sínhverrar.sínhverri.sínhvers.sínhvert.sínhvor.sínhvora.sínhvorar.sínhvorir.sínhvorn.sínhvorra.sínhvorrar.sínhvorri.sínhvors.sínhvort.sínhvoru.sínhvorum.sínir.sinn.sinna.sinnar.sinnhver.sinnhverja.sinnhverjar.sinnhverjir.sinnhverju.sinnhverjum.sinnhvern.sinnhverra.sinnhverrar.sinnhverri.sinnhvers.sinnhvert.sinnhvor.sinnhvora.sinnhvorar.sinnhvorir.sinnhvorn.sinnhvorra.sinnhvorrar.sinnhvorri.sinnhvors.sinnhvort.sinnhvoru.sinnhvorum.sinni.síns.sínu.sínum.sitt.sitthvað.sitthver.sitthverja.sitthverjar.sitthverjir.sitthverju.sitthverjum.sitthvern.sitthverra.sitthverrar.sitthverri.sitthvers.sitthvert.sitthvor.sitthvora.sitthvorar.sitthvorir.sitthvorn.sitthvorra.sitthvorrar.sitthvorri.sitthvors.sitthvort.sitthvoru.sitthvorum.sjálf.sjálfa.sjálfan.sjálfar.sjálfir.sjálfra.sjálfrar.sjálfri.sjálfs.sjálft.sjálfu.sjálfum.sjálfur.slík.slíka.slíkan.slíkar.slíkir.slíkra.slíkrar.slíkri.slíks.slíkt.slíku.slíkum.slíkur.snemma.sökum.söm.sömu.sömum.sú.sum.suma.suman.sumar.sumir.sumra.sumrar.sumri.sums.sumt.sumu.sumum.sumur.sunnan.svo.til.tráss.um.umfram.umhverfis.undan.undir.uns.upp.úr.út.utan.útundan.vegna.vér.vestan.vestur.vettugi.við.viður.vor.vora.vorar.vorir.vorn.vorra.vorrar.vorri.vors.vort.voru.vorum.yðar.yður.yfir.ykkar.ykkur.ýmis.ýmiss.ýmissa.ýmissar.ýmissi.ýmist.ýmsa.ýmsan.ýmsar.ýmsir.ýmsu.ýmsum.þá.það.þær.þann.þar.þau.þegar.þeim.þeir.þeirra.þeirrar.þeirri.þennan.þér.þess.þessa.þessar.þessara.þessarar.þessari.þessi.þessir.þessu.þessum.þetta.þið.þig.þín.þína.þínar.þínir.þinn.þinna.þinnar.þinni.þíns.þínu.þínum.þitt.þó.þónokkra.þónokkrar.þónokkrir.þónokkru.þónokkrum.þónokkuð.þónokkur.þónokkurn.þónokkurra.þónokkurrar.þónokkurri.þónokkurs.þónokkurt.þótt.þú.því.þvílík.þvílíka.þvílíkan.þvílíkar.þvílíkir.þvílíkra.þvílíkrar.þvílíkri.þvílíks.þvílíkt.þvílíku.þvílíkum.þvílíkur`.split(`.`));function n(e){return t.has(e.toLowerCase())}const r=new Map([[`á`,new Set([`fs`,`ao`])],[`við`,new Set([`fs`,`fn`])],[`af`,new Set([`fs`,`ao`])],[`til`,new Set([`fs`])],[`um`,new Set([`fs`])],[`frá`,new Set([`fs`])],[`yfir`,new Set([`fs`,`ao`])],[`undir`,new Set([`fs`,`ao`])],[`fyrir`,new Set([`fs`,`ao`])],[`eftir`,new Set([`fs`,`ao`])],[`gegn`,new Set([`fs`])],[`hjá`,new Set([`fs`])],[`úr`,new Set([`fs`])],[`í`,new Set([`fs`])]]);function i(e,n){let i=e.toLowerCase(),a=r.get(i);return a&&n?a.has(n):t.has(i)}function a(e){return e.filter(e=>!n(e))}const o=1279610177,s=[`no`,`so`,`lo`,`ao`,`fs`,`fn`,`st`,`to`,`gr`,`uh`],c=[void 0,`nf`,`þf`,`þgf`,`ef`],l=[void 0,`kk`,`kvk`,`hk`],u=[`et`,`ft`];var d=class e{buffer;stringPool;lemmaOffsets;lemmaLengths;wordOffsets;wordLengths;entryOffsets;entries;bigramW1Offsets;bigramW1Lengths;bigramW2Offsets;bigramW2Lengths;bigramFreqs;lemmaCount;wordCount;entryCount;bigramCount;version;decoder=new TextDecoder(`utf-8`);constructor(e){this.buffer=e;let t=new DataView(e),n=t.getUint32(0,!0);if(n!==o)throw Error(`Invalid binary format: expected magic 0x${o.toString(16)}, got 0x${n.toString(16)}`);if(this.version=t.getUint32(4,!0),this.version!==1&&this.version!==2)throw Error(`Unsupported version: ${this.version}`);let r=t.getUint32(8,!0);this.lemmaCount=t.getUint32(12,!0),this.wordCount=t.getUint32(16,!0),this.entryCount=t.getUint32(20,!0),this.bigramCount=t.getUint32(24,!0);let i=32;this.stringPool=new Uint8Array(e,i,r),i+=r,this.lemmaOffsets=new Uint32Array(e,i,this.lemmaCount),i+=this.lemmaCount*4,this.lemmaLengths=new Uint8Array(e,i,this.lemmaCount),i+=this.lemmaCount,i=i+3&-4,this.wordOffsets=new Uint32Array(e,i,this.wordCount),i+=this.wordCount*4,this.wordLengths=new Uint8Array(e,i,this.wordCount),i+=this.wordCount,i=i+3&-4,this.entryOffsets=new Uint32Array(e,i,this.wordCount+1),i+=(this.wordCount+1)*4,this.entries=new Uint32Array(e,i,this.entryCount),i+=this.entryCount*4,this.bigramW1Offsets=new Uint32Array(e,i,this.bigramCount),i+=this.bigramCount*4,this.bigramW1Lengths=new Uint8Array(e,i,this.bigramCount),i+=this.bigramCount,i=i+3&-4,this.bigramW2Offsets=new Uint32Array(e,i,this.bigramCount),i+=this.bigramCount*4,this.bigramW2Lengths=new Uint8Array(e,i,this.bigramCount),i+=this.bigramCount,i=i+3&-4,this.bigramFreqs=new Uint32Array(e,i,this.bigramCount)}static async load(t,n={}){let r=await(n.fetch??fetch)(t);if(!r.ok)throw Error(`Failed to load binary data: ${r.status}`);return new e(await r.arrayBuffer())}static loadFromBuffer(t){return new e(t)}getString(e,t){return this.decoder.decode(this.stringPool.subarray(e,e+t))}getLemma(e){return this.getString(this.lemmaOffsets[e],this.lemmaLengths[e])}getWord(e){return this.getString(this.wordOffsets[e],this.wordLengths[e])}findWord(e){let t=0,n=this.wordCount-1;for(;t<=n;){let r=t+n>>>1,i=this.getWord(r);if(i===e)return r;i<e?t=r+1:n=r-1}return-1}lemmatize(e,t={}){let n=e.toLowerCase(),r=this.findWord(n);if(r===-1)return[n];let i=this.entryOffsets[r],a=this.entryOffsets[r+1],{wordClass:o}=t,c=new Set,l=[];for(let e=i;e<a;e++){let{lemmaIdx:t,posCode:n}=this.unpackEntry(this.entries[e]),r=s[n];if(o&&r!==o)continue;let i=this.getLemma(t);c.has(i)||(c.add(i),l.push(i))}return l.length===0?[n]:l}unpackEntry(e){return this.version===1?{lemmaIdx:e>>>4,posCode:e&15,caseCode:0,genderCode:0,numberCode:0}:{lemmaIdx:e>>>10,posCode:e&15,caseCode:e>>>4&7,genderCode:e>>>7&3,numberCode:e>>>9&1}}lemmatizeWithPOS(e){let t=e.toLowerCase(),n=this.findWord(t);if(n===-1)return[];let r=this.entryOffsets[n],i=this.entryOffsets[n+1],a=new Set,o=[];for(let e=r;e<i;e++){let{lemmaIdx:t,posCode:n}=this.unpackEntry(this.entries[e]),r=this.getLemma(t),i=s[n]??``,c=`${r}:${i}`;a.has(c)||(a.add(c),o.push({lemma:r,pos:i}))}return o}lemmatizeWithMorph(e){let t=e.toLowerCase(),n=this.findWord(t);if(n===-1)return[];let r=this.entryOffsets[n],i=this.entryOffsets[n+1],a=[];for(let e=r;e<i;e++){let{lemmaIdx:t,posCode:n,caseCode:r,genderCode:i,numberCode:o}=this.unpackEntry(this.entries[e]),d={},f=c[r],p=l[i],m=u[o];f&&(d.case=f),p&&(d.gender=p),m&&(d.number=m),a.push({lemma:this.getLemma(t),pos:s[n]??``,morph:Object.keys(d).length>0?d:void 0})}return a}hasMorphFeatures(){return this.version>=2}getVersion(){return this.version}findBigram(e,t){let n=0,r=this.bigramCount-1;for(;n<=r;){let i=n+r>>>1,a=this.getString(this.bigramW1Offsets[i],this.bigramW1Lengths[i]);if(a<e)n=i+1;else if(a>e)r=i-1;else{let e=this.getString(this.bigramW2Offsets[i],this.bigramW2Lengths[i]);if(e===t)return i;e<t?n=i+1:r=i-1}}return-1}bigramFreq(e,t){let n=this.findBigram(e.toLowerCase(),t.toLowerCase());return n===-1?0:this.bigramFreqs[n]}freq(e,t){return this.bigramFreq(e,t)}isKnown(e){return this.findWord(e.toLowerCase())!==-1}get lemmaCountValue(){return this.lemmaCount}get wordFormCount(){return this.wordCount}get bigramCountValue(){return this.bigramCount}get bufferSize(){return this.buffer.byteLength}getAllLemmas(){let e=[];for(let t=0;t<this.lemmaCount;t++)e.push(this.getLemma(t));return e}};const f=[{word:`á`,prefer:`so`,over:`fs`,context:`after_pronoun`,description:`á after pronoun = verb 'eiga' (I own, you own)`},{word:`á`,prefer:`fs`,over:`so`,context:`before_noun`,description:`á before noun = preposition (on, at)`},{word:`við`,prefer:`fn`,over:`fs`,context:`sentence_start`,description:`við at sentence start = pronoun 'we'`},{word:`við`,prefer:`fs`,over:`fn`,context:`before_noun`,description:`við before noun = preposition 'by/at'`},{word:`af`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`af before noun = preposition 'of/from'`},{word:`til`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`til before noun = preposition 'to'`},{word:`um`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`um before noun = preposition 'about/around'`},{word:`yfir`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`yfir before noun = preposition 'over'`},{word:`undir`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`undir before noun = preposition 'under'`},{word:`fyrir`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`fyrir before noun = preposition 'for/before'`},{word:`eftir`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`eftir before noun = preposition 'after'`},{word:`frá`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`frá before noun = preposition 'from'`},{word:`með`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`með before noun = preposition 'with'`},{word:`í`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`í before noun = preposition 'in'`},{word:`úr`,prefer:`fs`,over:`no`,context:`before_noun`,description:`úr before noun = preposition 'out of'`}];function p(e){let t=e.toLowerCase();return f.filter(e=>e.word===t)}function m(e){return f.some(t=>t.word===e.toLowerCase())}const h=new Map([[`á`,new Set([`þf`,`þgf`])],[`í`,new Set([`þf`,`þgf`])],[`við`,new Set([`þf`,`þgf`])],[`með`,new Set([`þf`,`þgf`])],[`undir`,new Set([`þf`,`þgf`])],[`yfir`,new Set([`þf`,`þgf`])],[`fyrir`,new Set([`þf`,`þgf`])],[`um`,new Set([`þf`])],[`gegnum`,new Set([`þf`])],[`kringum`,new Set([`þf`])],[`umhverfis`,new Set([`þf`])],[`af`,new Set([`þgf`])],[`frá`,new Set([`þgf`])],[`hjá`,new Set([`þgf`])],[`úr`,new Set([`þgf`])],[`að`,new Set([`þgf`])],[`móti`,new Set([`þgf`])],[`nálægt`,new Set([`þgf`])],[`gegn`,new Set([`þgf`])],[`gagnvart`,new Set([`þgf`])],[`handa`,new Set([`þgf`])],[`meðal`,new Set([`ef`])],[`til`,new Set([`ef`])],[`án`,new Set([`ef`])],[`vegna`,new Set([`ef`])],[`sakir`,new Set([`ef`])],[`utan`,new Set([`ef`])],[`innan`,new Set([`ef`])],[`meðfram`,new Set([`þgf`])],[`milli`,new Set([`ef`])],[`auk`,new Set([`ef`])],[`í stað`,new Set([`ef`])]]),g=new Set([`ég`,`þú`,`hann`,`hún`,`það`,`við`,`þið`,`þeir`,`þær`,`þau`]);function _(e,t){return t?h.get(e)?.has(t)??!1:!1}function v(e,t){let n=e.filter(e=>e.pos===`fs`);if(n.length===0)return null;for(let e of n)for(let n of t)if(n.morph?.case&&_(e.lemma,n.morph.case))return{lemma:e.lemma,pos:`fs`,rule:`prep+${n.morph.case}`,confidence:.9};return null}function y(e,t){if(!t)return null;let n=t.toLowerCase();if(!g.has(n))return null;let r=e.filter(e=>e.pos===`so`);return r.length===0||!e.some(e=>e.pos!==`so`)?null:{lemma:(r.find(e=>e.lemma===`eiga`)??r[0]).lemma,pos:`so`,rule:`pronoun+verb`,confidence:.85}}function b(e,t,n){if(!t||!n?.lemmatizeWithPOS)return null;let r=n.lemmatizeWithPOS(t),i=r.find(e=>e.pos===`fs`);if(!i)return null;let a=r.some(e=>e.pos===`fn`),o=e.some(e=>e.pos===`so`);if(a&&o)return null;let s=h.get(i.lemma);if(!s)return null;let c=e.filter(e=>e.pos===`no`);for(let e of c)if(e.morph?.case&&s.has(e.morph.case))return{lemma:e.lemma,pos:`no`,rule:`noun_after_prep+${e.morph.case}`,confidence:.9};return null}function x(e,t,n,r=null){return v(e,n)||b(e,t,r)||y(e,t)||null}function S(e){return h.has(e)}function C(e){return h.get(e)}const w={name:`unambiguous`,run(e){return e.length===1?{lemma:e[0].lemma,pos:e[0].pos,confidence:1}:null}},T={name:`preference_rules`,run(e,t,n){if(!n.usePreferenceRules)return null;for(let n of f){let r=E(n,e,t);if(r)return{lemma:r.lemma,pos:r.pos,confidence:.85}}return null}};function E(e,t,n){let r=t.find(t=>t.lemma.toLowerCase()===e.word.toLowerCase()&&t.pos===e.prefer),i=t.find(t=>t.lemma.toLowerCase()===e.word.toLowerCase()&&t.pos===e.over);if(!r||!i)return null;if(e.context===`before_noun`){let e=n.nextWord;if(e&&/^[A-ZÁÉÍÓÚÝÞÆÖ]/.test(e))return r}else if(e.context===`before_verb`){let e=n.nextWord?.toLowerCase();if(e&&![`þessi`,`þetta`,`sá`,`sú`,`það`,`hinn`,`hin`,`hið`].includes(e))return r}else if(e.context===`after_pronoun`){let e=n.prevWord?.toLowerCase();if(e&&[`ég`,`þú`,`hann`,`hún`,`það`,`við`,`þið`,`þeir`,`þær`,`þau`].includes(e))return r}return null}const D=[w,T,{name:`grammar_rules`,run(e,t,n){if(!n.useGrammarRules)return null;let r=e.map(e=>({...e,morph:void 0})),i=t.allTokens[t.index];if(i){let e=n.getMorph(i);e&&(r.length=0,r.push(...e))}let a=x(r,t.prevWord,t.nextWordMorph??[],n.lemmatizer);return a?{lemma:a.lemma,pos:a.pos,confidence:a.confidence}:null}},{name:`word_bigrams`,run(e,t,n){if(!n.bigrams||e.length===0)return null;let r=[];for(let i of e){let e=0;if(t.prevWord){let r=t.prevLemmas||n.lemmatizer.lemmatize(t.prevWord);for(let t of r){let r=n.bigrams.freq(t,i.lemma);r>0&&(e+=Math.log(r+1)*n.leftWeight)}}if(t.nextWord){let r=t.nextLemmas||n.lemmatizer.lemmatize(t.nextWord);for(let t of r){let r=n.bigrams.freq(i.lemma,t);r>0&&(e+=Math.log(r+1)*n.rightWeight)}}r.push({candidate:i,score:e})}if(r.sort((e,t)=>t.score-e.score),r.length>0&&r[0].score>0){let e=r[0].score,t=r.reduce((e,t)=>e+Math.exp(t.score),0),n=t>0?Math.exp(e)/t:.5;return{lemma:r[0].candidate.lemma,pos:r[0].candidate.pos,confidence:n}}return null}},{name:`fallback`,run(e){return e.length>0?{lemma:e[0].lemma,pos:e[0].pos,confidence:1/e.length}:null}}];var O=class{lemmatizer;bigrams;leftWeight;rightWeight;usePreferenceRules;useGrammarRules;morphCache;constructor(e,t=null,n={}){this.lemmatizer=e,this.bigrams=t,this.leftWeight=n.leftWeight??1,this.rightWeight=n.rightWeight??1,this.usePreferenceRules=n.usePreferenceRules??!0,this.useGrammarRules=n.useGrammarRules??!0,this.morphCache=this.lemmatizer.lemmatizeWithMorph?new Map:null}getMorph(e){if(!this.lemmatizer.lemmatizeWithMorph||!this.morphCache)return;let t=e.toLowerCase(),n=this.morphCache.get(t);if(n)return n;let r=this.lemmatizer.lemmatizeWithMorph(e);return this.morphCache.set(t,r),r}disambiguate(e,t,n,r={}){let i;i=this.lemmatizer.lemmatizeWithPOS?this.lemmatizer.lemmatizeWithPOS(e):this.lemmatizer.lemmatize(e).map(e=>({lemma:e,pos:`no`}));let a=i.map(e=>e.lemma),o=e,s;n&&(s=this.getMorph(n));let c={prevWord:t,nextWord:n,prevLemmas:r.prevLemmas,nextLemmas:r.nextLemmas,nextWordMorph:s,allTokens:[e],index:0};for(let e of D){let t=e.run(i,c,this);if(t)return{token:o,lemma:t.lemma,pos:t.pos,candidates:a,candidatesWithPOS:i,ambiguous:a.length>1,confidence:t.confidence,resolvedBy:e.name}}return{token:o,lemma:e.toLowerCase(),candidates:a,candidatesWithPOS:i,ambiguous:!1,confidence:0,resolvedBy:`none`}}disambiguateAll(e){let t=[];for(let n=0;n<e.length;n++){let r=e[n],i=n>0?e[n-1]:null,a=n<e.length-1?e[n+1]:null;t.push(this.disambiguate(r,i,a))}return t}extractLemmas(e){let t=new Set,n=this.disambiguateAll(e);for(let e of n)t.add(e.lemma);return t}};function k(e,n,r,i={}){let{tokenize:a,removeStopwords:o}=i,s=a?a(e):e.split(/\s+/).filter(e=>e.length>0).map(e=>e.replace(/^[^\p{L}\p{N}]+|[^\p{L}\p{N}]+$/gu,``)).filter(e=>e.length>0),c=new O(n,r).extractLemmas(s);if(o)for(let e of c)t.has(e)&&c.delete(e);return c}const A={no:`noun`,so:`verb`,lo:`adjective`,ao:`adverb`,fs:`preposition`,fn:`pronoun`,st:`conjunction`,to:`numeral`,gr:`article`,uh:`interjection`},j={no:`nafnorð`,so:`sagnorð`,lo:`lýsingarorð`,ao:`atviksorð`,fs:`forsetning`,fn:`fornafn`,st:`samtenging`,to:`töluorð`,gr:`greinir`,uh:`upphrópun`},M={nf:`nominative`,þf:`accusative`,þgf:`dative`,ef:`genitive`},N={kk:`masculine`,kvk:`feminine`,hk:`neuter`},P={et:`singular`,ft:`plural`};var F=class e{bits;sizeBits;hashCount;constructor(e,t,n){this.bits=e,this.sizeBits=t,this.hashCount=n}static fromValues(t,n={}){let r=Math.max(t.length,1),i=n.falsePositiveRate??.01,a=Math.max(1,Math.ceil(-r*Math.log(i)/(Math.LN2*Math.LN2))),o=Math.max(1,Math.round(a/r*Math.LN2)),s=n.maxHashFunctions?Math.min(o,n.maxHashFunctions):o,c=Math.ceil(a/8),l=new e(new Uint8Array(c),a,s);for(let e of t)l.add(e);return l}add(e){let[t,n]=this.hashes(e);for(let e=0;e<this.hashCount;e++){let r=(t+e*n)%this.sizeBits;this.setBit(r)}}has(e){let[t,n]=this.hashes(e);for(let e=0;e<this.hashCount;e++){let r=(t+e*n)%this.sizeBits;if(!this.getBit(r))return!1}return!0}setBit(e){let t=e>>>3,n=e&7;this.bits[t]|=1<<n}getBit(e){let t=e>>>3,n=e&7;return(this.bits[t]&1<<n)!=0}hashes(e){let t=e.toLowerCase(),n=2166136261,r=2166136261;for(let e=0;e<t.length;e++){let i=t.charCodeAt(e);n^=i,n=Math.imul(n,16777619)>>>0,r^=i,r=Math.imul(r,2166136261)>>>0}return r^=r>>>13,r=Math.imul(r,2246822507)>>>0,r^=r>>>16,[n>>>0,r>>>0||668265261]}};const I=new Set(`ísland.england.írland.skotland.finnland.grænland.holland.þýskaland.frakkland.pólland.tékkland.svissland.rússland.eistland.lettland.litháen.danmörk.noregur.svíþjóð.bandaríkin.spánn.portúgal.ítalía.grikkland.þingvellir.akureyri.ísafjörður.reykjavík.keflavík.hafnarfjörður.kópavogur.seltjarnarnes.garðabær.mosfellsbær.vestmannaeyjar.húsavík.sauðárkrókur.siglufjörður.ólafsfjörður.dalvík.egilsstaðir.neskaupstaður.seyðisfjörður.eskifjörður.reyðarfjörður.fáskrúðsfjörður.stöðvarfjörður.djúpivogur.höfn.vík.selfoss.hveragerði.þorlákshöfn.grindavík.sandgerði.borgarnes.stykkishólmur.grundarfjörður.ólafsvík.búðardalur.patreksfjörður.flateyri.suðureyri.bolungarvík.hólmavík.hvammstangi.blönduós.skagaströnd.varmahlíð.hlíðarendi.bergþórshvol.íslandsbanki.landsbankinn.arionbanki.alþingi`.split(`.`)),L=new Set(`maður.kona.stjóri.ráðherra.forseti.formaður.fulltrúi.starfsmaður.hús.staður.vegur.borg.bær.dalur.fjörður.félag.banki.sjóður.stofnun.ráð.rannsókn.greiðsla.mál.kerfi.verk.þjónusta.rekstur.viðskipti.verð.kostnaður`.split(`.`)),R=new Set([`vera`,`hafa`,`gera`,`fara`,`koma`,`segja`,`vilja`,`mega`,`þurfa`,`verða`,`geta`,`sjá`,`taka`,`eiga`,`láta`,`halda`,`leyfa`,`búa`]),z=[`s`,`u`,`a`];var B=class{lemmatizer;minPartLength;tryLinkingLetters;knownLemmas;mode;constructor(e,t,n={}){this.lemmatizer=e,this.knownLemmas=t,this.minPartLength=n.minPartLength??3,this.tryLinkingLetters=n.tryLinkingLetters??!0,this.mode=n.mode??`balanced`}noSplit(e,t){return{word:e,parts:t,indexTerms:t,confidence:0,isCompound:!1}}split(e){let t=e.toLowerCase(),n=this.lemmatizer.lemmatize(e),r=n[0]?.toLowerCase();if(r&&I.has(r)||I.has(t))return this.noSplit(e,n);let i=n.length>0&&n[0].toLowerCase()!==t,a=n.length===1;if(this.mode===`conservative`)return e.includes(`-`)?this.splitAtHyphen(e,n):this.noSplit(e,n);if(this.mode===`balanced`&&i&&a&&t.length<12||t.length<this.minPartLength*2)return this.noSplit(e,n);let o=[];for(let e=this.minPartLength;e<=t.length-this.minPartLength;e++){let n=t.slice(0,e),r=t.slice(e),i=this.trySplit(n,r);if(i&&o.push(i),this.tryLinkingLetters){for(let e of z)if(n.endsWith(e)&&n.length>this.minPartLength){let e=n.slice(0,-1),t=this.trySplit(e,r);t&&o.push({...t,score:t.score*.95})}}}if(o.length===0)return this.noSplit(e,n);o.sort((e,t)=>t.score-e.score);let s=o[0];if(this.mode===`balanced`&&i&&s.score<.6)return this.noSplit(e,n);let c=[...new Set([...s.leftParts,...s.rightParts])];return{word:e,parts:c,indexTerms:[...new Set([...c,t])],confidence:Math.min(s.score,1),isCompound:!0}}splitAtHyphen(e,t){let n=e.split(`-`).filter(e=>e.length>0);if(n.length<2)return this.noSplit(e,t);let r=[];for(let e of n){let t=this.lemmatizer.lemmatize(e);r.push(...t)}let i=[...new Set(r)];return{word:e,parts:i,indexTerms:[...new Set([...i,e.toLowerCase()])],confidence:.9,isCompound:!0}}trySplit(e,t){let n=this.lemmatizer.lemmatize(e),r=this.lemmatizer.lemmatize(t),i=[...new Set(n.filter(e=>this.knownLemmas.has(e)))],a=[...new Set(r.filter(e=>this.knownLemmas.has(e)))];if(i.length===0||a.length===0)return null;let o=0,s=1-Math.abs(e.length-t.length)/(e.length+t.length);o+=s*.2;let c=(e.length+t.length)/2,l=Math.min(c/6,1);o+=l*.2,a.some(e=>L.has(e))&&(o+=.3);let u=i.some(e=>R.has(e)),d=a.some(e=>R.has(e));return u&&d?o-=.3:!u&&!d&&(o+=.2),(e.length<4||t.length<4)&&(o-=.15),{leftParts:i,rightParts:a,score:Math.max(0,o)}}getAllLemmas(e){return this.split(e).indexTerms}};function V(e){return new Set(e.map(e=>e.toLowerCase()))}function H(e,t={}){let n=e.map(e=>e.toLowerCase());return F.fromValues(n,t)}const U=new Map([[`til dæmis`,{lemma:`til dæmi`,isStopword:!0,pos:`ao`}],[`með öðrum orðum`,{lemma:`með annar orð`,isStopword:!0,pos:`ao`}],[`í raun`,{lemma:`í raun`,isStopword:!0,pos:`ao`}],[`í raun og veru`,{lemma:`í raun og vera`,isStopword:!0,pos:`ao`}],[`af og til`,{lemma:`af og til`,isStopword:!0,pos:`ao`}],[`aftur á móti`,{lemma:`aftur á mót`,isStopword:!0,pos:`ao`}],[`alla vega`,{lemma:`allur vegur`,isStopword:!0,pos:`ao`}],[`alls ekki`,{lemma:`alls ekki`,isStopword:!0,pos:`ao`}],[`alls staðar`,{lemma:`allur staður`,isStopword:!0,pos:`ao`}],[`allt í allt`,{lemma:`allur í allur`,isStopword:!0,pos:`ao`}],[`annars vegar`,{lemma:`annar vegur`,isStopword:!0,pos:`ao`}],[`auk þess`,{lemma:`auk það`,isStopword:!0,pos:`ao`}],[`að auki`,{lemma:`að auki`,isStopword:!0,pos:`ao`}],[`að vísu`,{lemma:`að vís`,isStopword:!0,pos:`ao`}],[`að sjálfsögðu`,{lemma:`að sjálfsagður`,isStopword:!0,pos:`ao`}],[`að minnsta kosti`,{lemma:`að lítill kostur`,isStopword:!0,pos:`ao`}],[`að öllu leyti`,{lemma:`að allur leyti`,isStopword:!0,pos:`ao`}],[`að nokkru leyti`,{lemma:`að nokkur leyti`,isStopword:!0,pos:`ao`}],[`ef til vill`,{lemma:`ef til vilja`,isStopword:!0,pos:`ao`}],[`einhvers staðar`,{lemma:`einhver staður`,isStopword:!0,pos:`ao`}],[`einhvern veginn`,{lemma:`einhver vegur`,isStopword:!0,pos:`ao`}],[`ekki síst`,{lemma:`ekki síður`,isStopword:!0,pos:`ao`}],[`engu að síður`,{lemma:`enginn að síður`,isStopword:!0,pos:`ao`}],[`fyrst og fremst`,{lemma:`snemma og fremri`,isStopword:!0,pos:`ao`}],[`hins vegar`,{lemma:`hinn vegur`,isStopword:!0,pos:`ao`}],[`hér og þar`,{lemma:`hér og þar`,isStopword:!0,pos:`ao`}],[`hér um bil`,{lemma:`hér um bil`,isStopword:!0,pos:`ao`}],[`hér á landi`,{lemma:`hér á land`,isStopword:!0,pos:`ao`}],[`hvað mest`,{lemma:`hvað mjög`,isStopword:!0,pos:`ao`}],[`hverju sinni`,{lemma:`hver sinn`,isStopword:!0,pos:`ao`}],[`hvorki né`,{lemma:`hvorki né`,isStopword:!0,pos:`ao`}],[`í burtu`,{lemma:`í burtu`,isStopword:!0,pos:`ao`}],[`í gær`,{lemma:`í gær`,isStopword:!0,pos:`ao`}],[`í senn`,{lemma:`í senn`,isStopword:!0,pos:`ao`}],[`í sífellu`,{lemma:`í sífella`,isStopword:!0,pos:`ao`}],[`lengi vel`,{lemma:`lengi vel`,isStopword:!0,pos:`ao`}],[`meira að segja`,{lemma:`mikill að segja`,isStopword:!0,pos:`ao`}],[`meira og minna`,{lemma:`mikill og lítill`,isStopword:!0,pos:`ao`}],[`meðal annars`,{lemma:`meðal annar`,isStopword:!0,pos:`ao`}],[`nokkurn veginn`,{lemma:`nokkur vegur`,isStopword:!0,pos:`ao`}],[`og svo framvegis`,{lemma:`og svo framvegis`,isStopword:!0,pos:`ao`}],[`satt að segja`,{lemma:`sannur að segja`,isStopword:!0,pos:`ao`}],[`sem betur fer`,{lemma:`sem vel fara`,isStopword:!0,pos:`ao`}],[`smám saman`,{lemma:`smátt saman`,isStopword:!0,pos:`ao`}],[`svo sem`,{lemma:`svo sem`,isStopword:!0,pos:`ao`}],[`sér í lagi`,{lemma:`sér í lag`,isStopword:!0,pos:`ao`}],[`til og frá`,{lemma:`til og frá`,isStopword:!0,pos:`ao`}],[`til baka`,{lemma:`til baka`,isStopword:!0,pos:`ao`}],[`vítt og breitt`,{lemma:`vítt og breitt`,isStopword:!0,pos:`ao`}],[`á ný`,{lemma:`á ný`,isStopword:!0,pos:`ao`}],[`á meðan`,{lemma:`á meðan`,isStopword:!0,pos:`ao`}],[`á sama tíma`,{lemma:`á samur tími`,isStopword:!0,pos:`ao`}],[`á hinn bóginn`,{lemma:`á hinn bógur`,isStopword:!0,pos:`ao`}],[`þar af leiðandi`,{lemma:`þar af leiða`,isStopword:!0,pos:`ao`}],[`þar að auki`,{lemma:`þar að auki`,isStopword:!0,pos:`ao`}],[`það er að segja`,{lemma:`það vera að segja`,isStopword:!0,pos:`ao`}],[`þess vegna`,{lemma:`það vegna`,isStopword:!0,pos:`ao`}],[`því miður`,{lemma:`það lítt`,isStopword:!0,pos:`ao`}],[`þrátt fyrir`,{lemma:`þrátt fyrir`,isStopword:!0,pos:`ao`}],[`á dögunum`,{lemma:`á dagur`,isStopword:!0,pos:`ao`}],[`á sínum tíma`,{lemma:`á sinn tími`,isStopword:!0,pos:`ao`}],[`á endanum`,{lemma:`á endi`,isStopword:!0,pos:`ao`}],[`einu sinni`,{lemma:`einn sinn`,isStopword:!1,pos:`ao`}],[`eitt sinn`,{lemma:`einn sinn`,isStopword:!1,pos:`ao`}],[`í fyrsta sinn`,{lemma:`í fyrstur sinn`,isStopword:!1,pos:`ao`}],[`í kvöld`,{lemma:`í kvöld`,isStopword:!1,pos:`ao`}],[`í morgun`,{lemma:`í morgunn`,isStopword:!1,pos:`ao`}],[`á morgun`,{lemma:`á morgunn`,isStopword:!1,pos:`ao`}],[`fyrir hönd`,{lemma:`fyrir hönd`,isStopword:!1,pos:`fs`}],[`með tilliti til`,{lemma:`með tillit til`,isStopword:!1,pos:`fs`}],[`í ljósi`,{lemma:`í ljós`,isStopword:!1,pos:`fs`}],[`í stað`,{lemma:`í staður`,isStopword:!1,pos:`fs`}],[`fyrir aftan`,{lemma:`fyrir aftan`,isStopword:!1,pos:`fs`}],[`fyrir austan`,{lemma:`fyrir austan`,isStopword:!1,pos:`fs`}],[`fyrir framan`,{lemma:`fyrir framan`,isStopword:!1,pos:`fs`}],[`fyrir handan`,{lemma:`fyrir handan`,isStopword:!1,pos:`fs`}],[`fyrir innan`,{lemma:`fyrir innan`,isStopword:!1,pos:`fs`}],[`fyrir neðan`,{lemma:`fyrir neðan`,isStopword:!1,pos:`fs`}],[`fyrir norðan`,{lemma:`fyrir norðan`,isStopword:!1,pos:`fs`}],[`fyrir ofan`,{lemma:`fyrir ofan`,isStopword:!1,pos:`fs`}],[`fyrir sunnan`,{lemma:`fyrir sunnan`,isStopword:!1,pos:`fs`}],[`fyrir utan`,{lemma:`fyrir utan`,isStopword:!1,pos:`fs`}],[`fyrir vestan`,{lemma:`fyrir vestan`,isStopword:!1,pos:`fs`}],[`í gegnum`,{lemma:`í gegnum`,isStopword:!1,pos:`fs`}],[`í kringum`,{lemma:`í kringum`,isStopword:!1,pos:`fs`}],[`innan við`,{lemma:`innan við`,isStopword:!1,pos:`fs`}],[`upp úr`,{lemma:`upp úr`,isStopword:!1,pos:`fs`}],[`þvert á`,{lemma:`þvert á`,isStopword:!1,pos:`fs`}],[`þar eð`,{lemma:`þar eð`,isStopword:!0,pos:`st`}],[`sameinuðu þjóðirnar`,{lemma:`Sameinuðu þjóðirnar`,isStopword:!1,pos:`entity`}],[`evrópusambandið`,{lemma:`Evrópusambandið`,isStopword:!1,pos:`entity`}],[`nato`,{lemma:`NATO`,isStopword:!1,pos:`entity`}],[`nató`,{lemma:`NATO`,isStopword:!1,pos:`entity`}]]);function W(e,t){for(let n=Math.min(4,e.length-t);n>=2;n--){let r=e.slice(t,t+n).join(` `).toLowerCase(),i=U.get(r);if(i)return{phrase:i,wordCount:n}}return null}function G(e){return U.has(e.toLowerCase())}function K(e){return U.get(e.toLowerCase())}const q=new Set([`word`]),J=new Set([`person`,`company`,`entity`]),Y=new Set([`punctuation`,`s_begin`,`s_end`,`s_split`,`unknown`]),X=[`arinnar`,`anna`,`unum`,`um`,`ir`,`ar`,`ur`,`a`,`i`,`ið`,`inn`,`in`];function Z(t,n,r={}){let{bigrams:i,compoundSplitter:a,includeNumbers:o=!1,alwaysTryCompounds:s=!0}=r,c=e(t),l=[],u=[],d=new Map,f=`bigramCountValue`in n?n.bigramCountValue===0:!1,p=(e,t)=>t.length===1&&t[0]===e.toLowerCase(),m=e=>{let t=e,r=null;for(let e=0;e<2;e++){let e=t.toLowerCase();r=null;for(let i of X){if(!e.endsWith(i))continue;let a=t.slice(0,t.length-i.length);if(a.length<3)continue;let o=n.lemmatize(a);if(!p(a,o))return o;r||=a}if(!r||r.length<6)break;t=r}return null},h=e=>{let t=e.toLowerCase(),r=d.get(t);if(r)return r;let i=n.lemmatize(e);if(f&&p(e,i)&&e.length>=6){let n=m(e);if(n)return d.set(t,n),n}return d.set(t,i),i};for(let e=0;e<c.length;e++){let t=c[e];if(!Y.has(t.kind)){if(J.has(t.kind)){l.push({original:t.text??``,kind:t.kind,lemmas:[],isEntity:!0});continue}if(t.kind===`number`||t.kind===`ordinal`){o&&l.push({original:t.text??``,kind:t.kind,lemmas:[],isEntity:!1});continue}if(q.has(t.kind)){let e=t.text??``,n=h(e),r={original:e,kind:t.kind,lemmas:n,isEntity:!1},i=n.length===1&&n[0]===e.toLowerCase();if(a&&(s||i)){let t=a.split(e);if(t.isCompound){r.compoundSplit=t;let e=t.parts.flatMap(e=>h(e));r.compoundLemmas=e,r.lemmas=[...new Set([...n,...e])]}}l.push(r),u.push({index:l.length-1,token:t});continue}l.push({original:t.text??``,kind:t.kind,lemmas:[],isEntity:!1})}}if(i&&u.length>0){let e=new O(n,i);for(let t=0;t<u.length;t++){let{index:n,token:r}=u[t],i=t>0?u[t-1].token:null,a=t<u.length-1?u[t+1].token:null,o=e.disambiguate(r.text??``,i?.text??null,a?.text??null,{prevLemmas:i?.text?h(i.text):void 0,nextLemmas:a?.text?h(a.text):void 0});l[n].disambiguated=o.lemma,l[n].confidence=o.confidence}}else for(let{index:e}of u){let t=l[e];t.lemmas.length>0&&(t.disambiguated=t.lemmas[0],t.confidence=t.lemmas.length===1?1:.5)}return l}function Q(e,n,r={}){let{removeStopwords:a=!1,indexAllCandidates:o=!0,useContextualStopwords:s=!1}=r,c=Z(e,n,r),l=new Set,u=(e,n)=>a?s?i(e,n):t.has(e):!1;for(let e of c)if(!e.isEntity){if(o)for(let t of e.lemmas)u(t)||l.add(t);else e.disambiguated&&(u(e.disambiguated)||l.add(e.disambiguated));if(e.compoundSplit?.isCompound){let t=e.compoundLemmas?e.compoundLemmas:e.compoundSplit.parts.flatMap(e=>n.lemmatize(e));for(let e of t)u(e)||l.add(e)}}return l}function $(e,n,r={}){let{removeStopwords:a=!1,indexAllCandidates:o=!0,useContextualStopwords:s=!1,andOperator:c=` & `,orOperator:l=` | `,wrapGroups:u=!0,includeOriginal:d=!1,lowercaseOriginal:f=!0}=r,p=Z(e,n,r),m=[],h=(e,n)=>a?s?i(e,n):t.has(e):!1;for(let e of p){if(e.isEntity)continue;let t=[];if(o?t=e.lemmas:e.disambiguated&&(t=[e.disambiguated]),d){let n=e.original??``;if(n.length>0){let e=f?n.toLowerCase():n;t=[...t,e]}}let n=[...new Set(t.filter(e=>e&&!h(e)))];n.length>0&&m.push(n)}return{groups:m,query:m.map(e=>{let t=e.join(l);return u&&e.length>1?`(${t})`:t}).filter(e=>e.length>0).join(c)}}function ee(e,t,n,r={}){let i=performance.now(),a,o;switch(n){case`naive`:{let n=e.split(/\s+/).filter(e=>e.length>0),r=[];for(let e of n){let n=e.replace(/^[^\p{L}\p{N}]+|[^\p{L}\p{N}]+$/gu,``);if(n){let e=t.lemmatize(n);r.push({original:n,kind:`word`,lemmas:e,isEntity:!1,disambiguated:e[0],confidence:e.length===1?1:.5})}}a=r,o=new Set(r.map(e=>e.disambiguated).filter(Boolean));break}case`tokenized`:a=Z(e,t),o=new Set(a.filter(e=>e.kind===`word`&&e.lemmas.length>0).map(e=>e.lemmas[0]));break;case`disambiguated`:a=Z(e,t,{bigrams:r.bigrams}),o=Q(e,t,{bigrams:r.bigrams});break;case`full`:a=Z(e,t,{bigrams:r.bigrams,compoundSplitter:r.compoundSplitter}),o=Q(e,t,{bigrams:r.bigrams,compoundSplitter:r.compoundSplitter});break}let s=performance.now()-i,c=a.filter(e=>e.kind===`word`),l=c.length,u=c.filter(e=>e.lemmas.length>0&&!(e.lemmas.length===1&&e.lemmas[0]===e.original.toLowerCase())).length,d=c.filter(e=>e.lemmas.length>1).length,f=c.filter(e=>e.confidence!==void 0).map(e=>e.confidence),p=f.length>0?f.reduce((e,t)=>e+t,0)/f.length:0,m=c.filter(e=>e.compoundSplit?.isCompound).length,h=a.filter(e=>e.isEntity).length;return{wordCount:l,lemmatizedCount:u,coverage:l>0?u/l:0,ambiguousCount:d,ambiguityRate:l>0?d/l:0,avgConfidence:p,compoundsFound:m,entitiesSkipped:h,uniqueLemmas:o.size,timeMs:s}}export{d as BinaryLemmatizer,M as CASE_NAMES,r as CONTEXTUAL_STOPWORDS,B as CompoundSplitter,f as DISAMBIGUATION_RULES,O as Disambiguator,N as GENDER_NAMES,g as NOMINATIVE_PRONOUNS,P as NUMBER_NAMES,h as PREPOSITION_CASES,I as PROTECTED_LEMMAS,U as STATIC_PHRASES,t as STOPWORDS_IS,A as WORD_CLASS_NAMES,j as WORD_CLASS_NAMES_IS,x as applyGrammarRules,b as applyNounAfterPrepositionRule,v as applyPrepositionRule,y as applyPronounVerbRule,$ as buildSearchQuery,_ as canGovernCase,H as createKnownLemmaFilter,V as createKnownLemmaSet,k as extractDisambiguatedLemmas,Q as extractIndexableLemmas,C as getGovernedCases,K as getPhraseInfo,p as getRulesForWord,m as hasDisambiguationRules,i as isContextualStopword,G as isKnownPhrase,S as isKnownPreposition,n as isStopword,W as matchPhrase,Z as processText,a as removeStopwords,ee as runBenchmark};
1
+ import{tokenize as e}from"tokenize-is";const t=new Set(`á.að.aðra.aðrar.aðrir.af.alla.allan.allar.allir.allnokkra.allnokkrar.allnokkrir.allnokkru.allnokkrum.allnokkuð.allnokkur.allnokkurn.allnokkurra.allnokkurrar.allnokkurri.allnokkurs.allnokkurt.allra.allrar.allri.alls.allt.alltað.allur.án.andspænis.annað.annaðhvort.annan.annar.annarra.annarrar.annarri.annars.árla.ásamt.auk.austan.austanundir.austur.báða.báðar.báðir.báðum.bæði.bak.beggja.eða.eður.ef.eftir.ég.ein.eina.einar.einhver.einhverja.einhverjar.einhverjir.einhverju.einhverjum.einhvern.einhverra.einhverrar.einhverri.einhvers.einir.einn.einna.einnar.einni.eins.einskis.einu.einum.eitt.eitthvað.eitthvert.ekkert.ella.ellegar.en.enda.enga.engan.engar.engin.enginn.engir.engra.engrar.engri.engu.engum.er.fáein.fáeina.fáeinar.fáeinir.fáeinna.fáeinum.fjær.fjarri.flestalla.flestallan.flestallar.flestallir.flestallra.flestallrar.flestallri.flestalls.flestallt.flestallur.flestöll.flestöllu.flestöllum.frá.fram.fyrir.fyrst.gagnstætt.gagnvart.gegn.gegnt.gegnum.hana.handa.handan.hann.hans.heldur.hennar.henni.hið.hin.hina.hinar.hinir.hinn.hinna.hinnar.hinni.hins.hinu.hinum.hitt.hjá.honum.hún.hvað.hvaða.hvenær.hver.hverja.hverjar.hverjir.hverju.hverjum.hvern.hverra.hverrar.hverri.hvers.hvert.hvílík.hvílíka.hvílíkan.hvílíkar.hvílíkir.hvílíkra.hvílíkrar.hvílíkri.hvílíks.hvílíkt.hvílíku.hvílíkum.hvílíkur.hvor.hvora.hvorar.hvorir.hvorki.hvorn.hvorra.hvorrar.hvorri.hvors.hvort.hvoru.hvorug.hvoruga.hvorugan.hvorugar.hvorugir.hvorugra.hvorugrar.hvorugri.hvorugs.hvorugt.hvorugu.hvorugum.hvorugur.hvorum.í.inn.innan.innanundir.jafnframt.jafnhliða.kring.kringum.með.meðal.meðan.meður.mér.mestalla.mestallan.mestallar.mestallir.mestallra.mestallrar.mestallri.mestalls.mestallt.mestallur.mestöll.mestöllu.mestöllum.miðli.mig.milli.millum.mín.mína.mínar.mínir.minn.minna.minnar.minni.míns.mínu.mínum.mitt.mót.móti.nær.nærri.næst.næstum.nálægt.né.neðan.nein.neina.neinar.neinir.neinn.neinna.neinnar.neinni.neins.neinu.neinum.neitt.nema.niður.nokkra.nokkrar.nokkrir.nokkru.nokkrum.nokkuð.nokkur.nokkurn.nokkurra.nokkurrar.nokkurri.nokkurs.nokkurt.norðan.nú.öðru.öðrum.of.ofan.ofar.og.óháð.okkar.okkur.öll.öllu.öllum.önnur.órafjarri.oss.sá.sakir.sama.saman.samar.samfara.samhliða.sami.samir.samkvæmt.samra.samrar.samri.sams.samskipa.samt.samtímis.samur.sem.sér.sérhvað.sérhver.sérhverja.sérhverjar.sérhverjir.sérhverju.sérhverjum.sérhvern.sérhverra.sérhverrar.sérhverri.sérhvers.sérhvert.síðan.síðla.sig.sín.sína.sínar.sínhver.sínhverja.sínhverjar.sínhverjir.sínhverju.sínhverjum.sínhvern.sínhverra.sínhverrar.sínhverri.sínhvers.sínhvert.sínhvor.sínhvora.sínhvorar.sínhvorir.sínhvorn.sínhvorra.sínhvorrar.sínhvorri.sínhvors.sínhvort.sínhvoru.sínhvorum.sínir.sinn.sinna.sinnar.sinnhver.sinnhverja.sinnhverjar.sinnhverjir.sinnhverju.sinnhverjum.sinnhvern.sinnhverra.sinnhverrar.sinnhverri.sinnhvers.sinnhvert.sinnhvor.sinnhvora.sinnhvorar.sinnhvorir.sinnhvorn.sinnhvorra.sinnhvorrar.sinnhvorri.sinnhvors.sinnhvort.sinnhvoru.sinnhvorum.sinni.síns.sínu.sínum.sitt.sitthvað.sitthver.sitthverja.sitthverjar.sitthverjir.sitthverju.sitthverjum.sitthvern.sitthverra.sitthverrar.sitthverri.sitthvers.sitthvert.sitthvor.sitthvora.sitthvorar.sitthvorir.sitthvorn.sitthvorra.sitthvorrar.sitthvorri.sitthvors.sitthvort.sitthvoru.sitthvorum.sjálf.sjálfa.sjálfan.sjálfar.sjálfir.sjálfra.sjálfrar.sjálfri.sjálfs.sjálft.sjálfu.sjálfum.sjálfur.slík.slíka.slíkan.slíkar.slíkir.slíkra.slíkrar.slíkri.slíks.slíkt.slíku.slíkum.slíkur.snemma.sökum.söm.sömu.sömum.sú.sum.suma.suman.sumar.sumir.sumra.sumrar.sumri.sums.sumt.sumu.sumum.sumur.sunnan.svo.til.tráss.um.umfram.umhverfis.undan.undir.uns.upp.úr.út.utan.útundan.vegna.vér.vestan.vestur.vettugi.við.viður.vor.vora.vorar.vorir.vorn.vorra.vorrar.vorri.vors.vort.voru.vorum.yðar.yður.yfir.ykkar.ykkur.ýmis.ýmiss.ýmissa.ýmissar.ýmissi.ýmist.ýmsa.ýmsan.ýmsar.ýmsir.ýmsu.ýmsum.þá.það.þær.þann.þar.þau.þegar.þeim.þeir.þeirra.þeirrar.þeirri.þennan.þér.þess.þessa.þessar.þessara.þessarar.þessari.þessi.þessir.þessu.þessum.þetta.þið.þig.þín.þína.þínar.þínir.þinn.þinna.þinnar.þinni.þíns.þínu.þínum.þitt.þó.þónokkra.þónokkrar.þónokkrir.þónokkru.þónokkrum.þónokkuð.þónokkur.þónokkurn.þónokkurra.þónokkurrar.þónokkurri.þónokkurs.þónokkurt.þótt.þú.því.þvílík.þvílíka.þvílíkan.þvílíkar.þvílíkir.þvílíkra.þvílíkrar.þvílíkri.þvílíks.þvílíkt.þvílíku.þvílíkum.þvílíkur`.split(`.`));function n(e){return t.has(e.toLowerCase())}const r=new Map([[`á`,new Set([`fs`,`ao`])],[`við`,new Set([`fs`,`fn`])],[`af`,new Set([`fs`,`ao`])],[`til`,new Set([`fs`])],[`um`,new Set([`fs`])],[`frá`,new Set([`fs`])],[`yfir`,new Set([`fs`,`ao`])],[`undir`,new Set([`fs`,`ao`])],[`fyrir`,new Set([`fs`,`ao`])],[`eftir`,new Set([`fs`,`ao`])],[`gegn`,new Set([`fs`])],[`hjá`,new Set([`fs`])],[`úr`,new Set([`fs`])],[`í`,new Set([`fs`])]]);function i(e,n){let i=e.toLowerCase(),a=r.get(i);return a&&n?a.has(n):t.has(i)}function a(e){return e.filter(e=>!n(e))}const o=1279610177,s=[`no`,`so`,`lo`,`ao`,`fs`,`fn`,`st`,`to`,`gr`,`uh`],c=[void 0,`nf`,`þf`,`þgf`,`ef`],l=[void 0,`kk`,`kvk`,`hk`],u=[`et`,`ft`];var d=class e{buffer;stringPool;lemmaOffsets;lemmaLengths;wordOffsets;wordLengths;entryOffsets;entries;bigramW1Offsets;bigramW1Lengths;bigramW2Offsets;bigramW2Lengths;bigramFreqs;lemmaCount;wordCount;entryCount;bigramCount;version;decoder=new TextDecoder(`utf-8`);constructor(e){this.buffer=e;let t=new DataView(e),n=t.getUint32(0,!0);if(n!==o)throw Error(`Invalid binary format: expected magic 0x${o.toString(16)}, got 0x${n.toString(16)}`);if(this.version=t.getUint32(4,!0),this.version!==1&&this.version!==2)throw Error(`Unsupported version: ${this.version}`);let r=t.getUint32(8,!0);this.lemmaCount=t.getUint32(12,!0),this.wordCount=t.getUint32(16,!0),this.entryCount=t.getUint32(20,!0),this.bigramCount=t.getUint32(24,!0);let i=32;this.stringPool=new Uint8Array(e,i,r),i+=r,this.lemmaOffsets=new Uint32Array(e,i,this.lemmaCount),i+=this.lemmaCount*4,this.lemmaLengths=new Uint8Array(e,i,this.lemmaCount),i+=this.lemmaCount,i=i+3&-4,this.wordOffsets=new Uint32Array(e,i,this.wordCount),i+=this.wordCount*4,this.wordLengths=new Uint8Array(e,i,this.wordCount),i+=this.wordCount,i=i+3&-4,this.entryOffsets=new Uint32Array(e,i,this.wordCount+1),i+=(this.wordCount+1)*4,this.entries=new Uint32Array(e,i,this.entryCount),i+=this.entryCount*4,this.bigramW1Offsets=new Uint32Array(e,i,this.bigramCount),i+=this.bigramCount*4,this.bigramW1Lengths=new Uint8Array(e,i,this.bigramCount),i+=this.bigramCount,i=i+3&-4,this.bigramW2Offsets=new Uint32Array(e,i,this.bigramCount),i+=this.bigramCount*4,this.bigramW2Lengths=new Uint8Array(e,i,this.bigramCount),i+=this.bigramCount,i=i+3&-4,this.bigramFreqs=new Uint32Array(e,i,this.bigramCount)}static async load(t,n={}){let r=await(n.fetch??fetch)(t);if(!r.ok)throw Error(`Failed to load binary data: ${r.status}`);return new e(await r.arrayBuffer())}static loadFromBuffer(t){return new e(t)}getString(e,t){return this.decoder.decode(this.stringPool.subarray(e,e+t))}getLemma(e){return this.getString(this.lemmaOffsets[e],this.lemmaLengths[e])}getWord(e){return this.getString(this.wordOffsets[e],this.wordLengths[e])}findWord(e){let t=0,n=this.wordCount-1;for(;t<=n;){let r=t+n>>>1,i=this.getWord(r);if(i===e)return r;i<e?t=r+1:n=r-1}return-1}lemmatize(e,t={}){let n=e.toLowerCase(),r=this.findWord(n);if(r===-1)return[n];let i=this.entryOffsets[r],a=this.entryOffsets[r+1],{wordClass:o}=t,c=new Set,l=[];for(let e=i;e<a;e++){let{lemmaIdx:t,posCode:n}=this.unpackEntry(this.entries[e]),r=s[n];if(o&&r!==o)continue;let i=this.getLemma(t);c.has(i)||(c.add(i),l.push(i))}return l.length===0?[n]:l}unpackEntry(e){return this.version===1?{lemmaIdx:e>>>4,posCode:e&15,caseCode:0,genderCode:0,numberCode:0}:{lemmaIdx:e>>>10,posCode:e&15,caseCode:e>>>4&7,genderCode:e>>>7&3,numberCode:e>>>9&1}}lemmatizeWithPOS(e){let t=e.toLowerCase(),n=this.findWord(t);if(n===-1)return[];let r=this.entryOffsets[n],i=this.entryOffsets[n+1],a=new Set,o=[];for(let e=r;e<i;e++){let{lemmaIdx:t,posCode:n}=this.unpackEntry(this.entries[e]),r=this.getLemma(t),i=s[n]??``,c=`${r}:${i}`;a.has(c)||(a.add(c),o.push({lemma:r,pos:i}))}return o}lemmatizeWithMorph(e){let t=e.toLowerCase(),n=this.findWord(t);if(n===-1)return[];let r=this.entryOffsets[n],i=this.entryOffsets[n+1],a=[];for(let e=r;e<i;e++){let{lemmaIdx:t,posCode:n,caseCode:r,genderCode:i,numberCode:o}=this.unpackEntry(this.entries[e]),d={},f=c[r],p=l[i],m=u[o];f&&(d.case=f),p&&(d.gender=p),m&&(d.number=m),a.push({lemma:this.getLemma(t),pos:s[n]??``,morph:Object.keys(d).length>0?d:void 0})}return a}hasMorphFeatures(){return this.version>=2}getVersion(){return this.version}findBigram(e,t){let n=0,r=this.bigramCount-1;for(;n<=r;){let i=n+r>>>1,a=this.getString(this.bigramW1Offsets[i],this.bigramW1Lengths[i]);if(a<e)n=i+1;else if(a>e)r=i-1;else{let e=this.getString(this.bigramW2Offsets[i],this.bigramW2Lengths[i]);if(e===t)return i;e<t?n=i+1:r=i-1}}return-1}bigramFreq(e,t){let n=this.findBigram(e.toLowerCase(),t.toLowerCase());return n===-1?0:this.bigramFreqs[n]}freq(e,t){return this.bigramFreq(e,t)}isKnown(e){return this.findWord(e.toLowerCase())!==-1}get lemmaCountValue(){return this.lemmaCount}get wordFormCount(){return this.wordCount}get bigramCountValue(){return this.bigramCount}get bufferSize(){return this.buffer.byteLength}getAllLemmas(){let e=[];for(let t=0;t<this.lemmaCount;t++)e.push(this.getLemma(t));return e}};const f=[{word:`á`,prefer:`so`,over:`fs`,context:`after_pronoun`,description:`á after pronoun = verb 'eiga' (I own, you own)`},{word:`á`,prefer:`fs`,over:`so`,context:`before_noun`,description:`á before noun = preposition (on, at)`},{word:`við`,prefer:`fn`,over:`fs`,context:`sentence_start`,description:`við at sentence start = pronoun 'we'`},{word:`við`,prefer:`fs`,over:`fn`,context:`before_noun`,description:`við before noun = preposition 'by/at'`},{word:`af`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`af before noun = preposition 'of/from'`},{word:`til`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`til before noun = preposition 'to'`},{word:`um`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`um before noun = preposition 'about/around'`},{word:`yfir`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`yfir before noun = preposition 'over'`},{word:`undir`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`undir before noun = preposition 'under'`},{word:`fyrir`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`fyrir before noun = preposition 'for/before'`},{word:`eftir`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`eftir before noun = preposition 'after'`},{word:`frá`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`frá before noun = preposition 'from'`},{word:`með`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`með before noun = preposition 'with'`},{word:`í`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`í before noun = preposition 'in'`},{word:`úr`,prefer:`fs`,over:`no`,context:`before_noun`,description:`úr before noun = preposition 'out of'`}];function p(e){let t=e.toLowerCase();return f.filter(e=>e.word===t)}function m(e){return f.some(t=>t.word===e.toLowerCase())}const h=new Map([[`á`,new Set([`þf`,`þgf`])],[`í`,new Set([`þf`,`þgf`])],[`við`,new Set([`þf`,`þgf`])],[`með`,new Set([`þf`,`þgf`])],[`undir`,new Set([`þf`,`þgf`])],[`yfir`,new Set([`þf`,`þgf`])],[`fyrir`,new Set([`þf`,`þgf`])],[`um`,new Set([`þf`])],[`gegnum`,new Set([`þf`])],[`kringum`,new Set([`þf`])],[`umhverfis`,new Set([`þf`])],[`af`,new Set([`þgf`])],[`frá`,new Set([`þgf`])],[`hjá`,new Set([`þgf`])],[`úr`,new Set([`þgf`])],[`að`,new Set([`þgf`])],[`móti`,new Set([`þgf`])],[`nálægt`,new Set([`þgf`])],[`gegn`,new Set([`þgf`])],[`gagnvart`,new Set([`þgf`])],[`handa`,new Set([`þgf`])],[`meðal`,new Set([`ef`])],[`til`,new Set([`ef`])],[`án`,new Set([`ef`])],[`vegna`,new Set([`ef`])],[`sakir`,new Set([`ef`])],[`utan`,new Set([`ef`])],[`innan`,new Set([`ef`])],[`meðfram`,new Set([`þgf`])],[`milli`,new Set([`ef`])],[`auk`,new Set([`ef`])],[`í stað`,new Set([`ef`])]]),g=new Set([`ég`,`þú`,`hann`,`hún`,`það`,`við`,`þið`,`þeir`,`þær`,`þau`]);function _(e,t){return t?h.get(e)?.has(t)??!1:!1}function v(e,t){let n=e.filter(e=>e.pos===`fs`);if(n.length===0)return null;for(let e of n)for(let n of t)if(n.morph?.case&&_(e.lemma,n.morph.case))return{lemma:e.lemma,pos:`fs`,rule:`prep+${n.morph.case}`,confidence:.9};return null}function y(e,t){if(!t)return null;let n=t.toLowerCase();if(!g.has(n))return null;let r=e.filter(e=>e.pos===`so`);return r.length===0||!e.some(e=>e.pos!==`so`)?null:{lemma:(r.find(e=>e.lemma===`eiga`)??r[0]).lemma,pos:`so`,rule:`pronoun+verb`,confidence:.85}}function b(e,t,n){if(!t||!n?.lemmatizeWithPOS)return null;let r=n.lemmatizeWithPOS(t),i=r.find(e=>e.pos===`fs`);if(!i)return null;let a=r.some(e=>e.pos===`fn`),o=e.some(e=>e.pos===`so`);if(a&&o)return null;let s=h.get(i.lemma);if(!s)return null;let c=e.filter(e=>e.pos===`no`);for(let e of c)if(e.morph?.case&&s.has(e.morph.case))return{lemma:e.lemma,pos:`no`,rule:`noun_after_prep+${e.morph.case}`,confidence:.9};return null}function x(e,t,n,r=null){return v(e,n)||b(e,t,r)||y(e,t)||null}function S(e){return h.has(e)}function C(e){return h.get(e)}const w={name:`unambiguous`,run(e){return e.length===1?{lemma:e[0].lemma,pos:e[0].pos,confidence:1}:null}},T={name:`preference_rules`,run(e,t,n){if(!n.usePreferenceRules)return null;for(let n of f){let r=E(n,e,t);if(r)return{lemma:r.lemma,pos:r.pos,confidence:.85}}return null}};function E(e,t,n){let r=t.find(t=>t.lemma.toLowerCase()===e.word.toLowerCase()&&t.pos===e.prefer),i=t.find(t=>t.lemma.toLowerCase()===e.word.toLowerCase()&&t.pos===e.over);if(!r||!i)return null;if(e.context===`before_noun`){let e=n.nextWord;if(e&&/^[A-ZÁÉÍÓÚÝÞÆÖ]/.test(e))return r}else if(e.context===`before_verb`){let e=n.nextWord?.toLowerCase();if(e&&![`þessi`,`þetta`,`sá`,`sú`,`það`,`hinn`,`hin`,`hið`].includes(e))return r}else if(e.context===`after_pronoun`){let e=n.prevWord?.toLowerCase();if(e&&[`ég`,`þú`,`hann`,`hún`,`það`,`við`,`þið`,`þeir`,`þær`,`þau`].includes(e))return r}return null}const D=[w,T,{name:`grammar_rules`,run(e,t,n){if(!n.useGrammarRules)return null;let r=e.map(e=>({...e,morph:void 0})),i=t.allTokens[t.index];if(i){let e=n.getMorph(i);e&&(r.length=0,r.push(...e))}let a=x(r,t.prevWord,t.nextWordMorph??[],n.lemmatizer);return a?{lemma:a.lemma,pos:a.pos,confidence:a.confidence}:null}},{name:`word_bigrams`,run(e,t,n){if(!n.bigrams||e.length===0)return null;let r=[];for(let i of e){let e=0;if(t.prevWord){let r=t.prevLemmas||n.lemmatizer.lemmatize(t.prevWord);for(let t of r){let r=n.bigrams.freq(t,i.lemma);r>0&&(e+=Math.log(r+1)*n.leftWeight)}}if(t.nextWord){let r=t.nextLemmas||n.lemmatizer.lemmatize(t.nextWord);for(let t of r){let r=n.bigrams.freq(i.lemma,t);r>0&&(e+=Math.log(r+1)*n.rightWeight)}}r.push({candidate:i,score:e})}if(r.sort((e,t)=>t.score-e.score),r.length>0&&r[0].score>0){let e=r[0].score,t=r.reduce((e,t)=>e+Math.exp(t.score),0),n=t>0?Math.exp(e)/t:.5;return{lemma:r[0].candidate.lemma,pos:r[0].candidate.pos,confidence:n}}return null}},{name:`fallback`,run(e){return e.length>0?{lemma:e[0].lemma,pos:e[0].pos,confidence:1/e.length}:null}}];var O=class{lemmatizer;bigrams;leftWeight;rightWeight;usePreferenceRules;useGrammarRules;morphCache;constructor(e,t=null,n={}){this.lemmatizer=e,this.bigrams=t,this.leftWeight=n.leftWeight??1,this.rightWeight=n.rightWeight??1,this.usePreferenceRules=n.usePreferenceRules??!0,this.useGrammarRules=n.useGrammarRules??!0,this.morphCache=this.lemmatizer.lemmatizeWithMorph?new Map:null}getMorph(e){if(!this.lemmatizer.lemmatizeWithMorph||!this.morphCache)return;let t=e.toLowerCase(),n=this.morphCache.get(t);if(n)return n;let r=this.lemmatizer.lemmatizeWithMorph(e);return this.morphCache.set(t,r),r}disambiguate(e,t,n,r={}){let i;i=this.lemmatizer.lemmatizeWithPOS?this.lemmatizer.lemmatizeWithPOS(e):this.lemmatizer.lemmatize(e).map(e=>({lemma:e,pos:`no`}));let a=i.map(e=>e.lemma),o=e,s;n&&(s=this.getMorph(n));let c={prevWord:t,nextWord:n,prevLemmas:r.prevLemmas,nextLemmas:r.nextLemmas,nextWordMorph:s,allTokens:[e],index:0};for(let e of D){let t=e.run(i,c,this);if(t)return{token:o,lemma:t.lemma,pos:t.pos,candidates:a,candidatesWithPOS:i,ambiguous:a.length>1,confidence:t.confidence,resolvedBy:e.name}}return{token:o,lemma:e.toLowerCase(),candidates:a,candidatesWithPOS:i,ambiguous:!1,confidence:0,resolvedBy:`none`}}disambiguateAll(e){let t=[];for(let n=0;n<e.length;n++){let r=e[n],i=n>0?e[n-1]:null,a=n<e.length-1?e[n+1]:null;t.push(this.disambiguate(r,i,a))}return t}extractLemmas(e){let t=new Set,n=this.disambiguateAll(e);for(let e of n)t.add(e.lemma);return t}};function k(e,n,r,i={}){let{tokenize:a,removeStopwords:o}=i,s=a?a(e):e.split(/\s+/).filter(e=>e.length>0).map(e=>e.replace(/^[^\p{L}\p{N}]+|[^\p{L}\p{N}]+$/gu,``)).filter(e=>e.length>0),c=new O(n,r).extractLemmas(s);if(o)for(let e of c)t.has(e)&&c.delete(e);return c}const A={no:`noun`,so:`verb`,lo:`adjective`,ao:`adverb`,fs:`preposition`,fn:`pronoun`,st:`conjunction`,to:`numeral`,gr:`article`,uh:`interjection`},j={no:`nafnorð`,so:`sagnorð`,lo:`lýsingarorð`,ao:`atviksorð`,fs:`forsetning`,fn:`fornafn`,st:`samtenging`,to:`töluorð`,gr:`greinir`,uh:`upphrópun`},M={nf:`nominative`,þf:`accusative`,þgf:`dative`,ef:`genitive`},N={kk:`masculine`,kvk:`feminine`,hk:`neuter`},P={et:`singular`,ft:`plural`};var F=class e{bits;sizeBits;hashCount;constructor(e,t,n){this.bits=e,this.sizeBits=t,this.hashCount=n}static fromValues(t,n={}){let r=Math.max(t.length,1),i=n.falsePositiveRate??.01,a=Math.max(1,Math.ceil(-r*Math.log(i)/(Math.LN2*Math.LN2))),o=Math.max(1,Math.round(a/r*Math.LN2)),s=n.maxHashFunctions?Math.min(o,n.maxHashFunctions):o,c=Math.ceil(a/8),l=new e(new Uint8Array(c),a,s);for(let e of t)l.add(e);return l}add(e){let[t,n]=this.hashes(e);for(let e=0;e<this.hashCount;e++){let r=(t+e*n)%this.sizeBits;this.setBit(r)}}has(e){let[t,n]=this.hashes(e);for(let e=0;e<this.hashCount;e++){let r=(t+e*n)%this.sizeBits;if(!this.getBit(r))return!1}return!0}setBit(e){let t=e>>>3,n=e&7;this.bits[t]|=1<<n}getBit(e){let t=e>>>3,n=e&7;return(this.bits[t]&1<<n)!=0}hashes(e){let t=e.toLowerCase(),n=2166136261,r=2166136261;for(let e=0;e<t.length;e++){let i=t.charCodeAt(e);n^=i,n=Math.imul(n,16777619)>>>0,r^=i,r=Math.imul(r,2166136261)>>>0}return r^=r>>>13,r=Math.imul(r,2246822507)>>>0,r^=r>>>16,[n>>>0,r>>>0||668265261]}};const I=new Set(`ísland.england.írland.skotland.finnland.grænland.holland.þýskaland.frakkland.pólland.tékkland.svissland.rússland.eistland.lettland.litháen.danmörk.noregur.svíþjóð.bandaríkin.spánn.portúgal.ítalía.grikkland.þingvellir.akureyri.ísafjörður.reykjavík.keflavík.hafnarfjörður.kópavogur.seltjarnarnes.garðabær.mosfellsbær.vestmannaeyjar.húsavík.sauðárkrókur.siglufjörður.ólafsfjörður.dalvík.egilsstaðir.neskaupstaður.seyðisfjörður.eskifjörður.reyðarfjörður.fáskrúðsfjörður.stöðvarfjörður.djúpivogur.höfn.vík.selfoss.hveragerði.þorlákshöfn.grindavík.sandgerði.borgarnes.stykkishólmur.grundarfjörður.ólafsvík.búðardalur.patreksfjörður.flateyri.suðureyri.bolungarvík.hólmavík.hvammstangi.blönduós.skagaströnd.varmahlíð.hlíðarendi.bergþórshvol.íslandsbanki.landsbankinn.arionbanki.alþingi`.split(`.`)),L=new Set(`maður.kona.stjóri.ráðherra.forseti.formaður.fulltrúi.starfsmaður.hús.staður.vegur.borg.bær.dalur.fjörður.félag.banki.sjóður.stofnun.ráð.rannsókn.greiðsla.mál.kerfi.verk.þjónusta.rekstur.viðskipti.verð.kostnaður`.split(`.`)),R=new Set([`vera`,`hafa`,`gera`,`fara`,`koma`,`segja`,`vilja`,`mega`,`þurfa`,`verða`,`geta`,`sjá`,`taka`,`eiga`,`láta`,`halda`,`leyfa`,`búa`]),z=[`s`,`u`,`a`];var B=class{lemmatizer;minPartLength;tryLinkingLetters;knownLemmas;mode;constructor(e,t,n={}){this.lemmatizer=e,this.knownLemmas=t,this.minPartLength=n.minPartLength??3,this.tryLinkingLetters=n.tryLinkingLetters??!0,this.mode=n.mode??`balanced`}noSplit(e,t){return{word:e,parts:t,indexTerms:t,confidence:0,isCompound:!1}}split(e){let t=e.toLowerCase(),n=this.lemmatizer.lemmatize(e),r=n[0]?.toLowerCase();if(r&&I.has(r)||I.has(t))return this.noSplit(e,n);let i=n.length>0&&n[0].toLowerCase()!==t,a=n.length===1;if(this.mode===`conservative`)return e.includes(`-`)?this.splitAtHyphen(e,n):this.noSplit(e,n);if(this.mode===`balanced`&&i&&a&&t.length<12||t.length<this.minPartLength*2)return this.noSplit(e,n);let o=[];for(let e=this.minPartLength;e<=t.length-this.minPartLength;e++){let n=t.slice(0,e),r=t.slice(e),i=this.trySplit(n,r);if(i&&o.push(i),this.tryLinkingLetters){for(let e of z)if(n.endsWith(e)&&n.length>this.minPartLength){let e=n.slice(0,-1),t=this.trySplit(e,r);t&&o.push({...t,score:t.score*.95})}}}if(o.length===0)return this.noSplit(e,n);o.sort((e,t)=>t.score-e.score);let s=o[0];if(this.mode===`balanced`&&i&&s.score<.6)return this.noSplit(e,n);let c=[...new Set([...s.leftParts,...s.rightParts])];return{word:e,parts:c,indexTerms:[...new Set([...c,t])],confidence:Math.min(s.score,1),isCompound:!0}}splitAtHyphen(e,t){let n=e.split(`-`).filter(e=>e.length>0);if(n.length<2)return this.noSplit(e,t);let r=[];for(let e of n){let t=this.lemmatizer.lemmatize(e);r.push(...t)}let i=[...new Set(r)];return{word:e,parts:i,indexTerms:[...new Set([...i,e.toLowerCase()])],confidence:.9,isCompound:!0}}trySplit(e,t){let n=this.lemmatizer.lemmatize(e),r=this.lemmatizer.lemmatize(t),i=[...new Set(n.filter(e=>this.knownLemmas.has(e)))],a=[...new Set(r.filter(e=>this.knownLemmas.has(e)))];if(i.length===0||a.length===0)return null;let o=0,s=1-Math.abs(e.length-t.length)/(e.length+t.length);o+=s*.2;let c=(e.length+t.length)/2,l=Math.min(c/6,1);o+=l*.2,a.some(e=>L.has(e))&&(o+=.3);let u=i.some(e=>R.has(e)),d=a.some(e=>R.has(e));return u&&d?o-=.3:!u&&!d&&(o+=.2),(e.length<4||t.length<4)&&(o-=.15),{leftParts:i,rightParts:a,score:Math.max(0,o)}}getAllLemmas(e){return this.split(e).indexTerms}};function V(e){return new Set(e.map(e=>e.toLowerCase()))}function H(e,t={}){let n=e.map(e=>e.toLowerCase());return F.fromValues(n,t)}const U=new Map([[`til dæmis`,{lemma:`til dæmi`,isStopword:!0,pos:`ao`}],[`með öðrum orðum`,{lemma:`með annar orð`,isStopword:!0,pos:`ao`}],[`í raun`,{lemma:`í raun`,isStopword:!0,pos:`ao`}],[`í raun og veru`,{lemma:`í raun og vera`,isStopword:!0,pos:`ao`}],[`af og til`,{lemma:`af og til`,isStopword:!0,pos:`ao`}],[`aftur á móti`,{lemma:`aftur á mót`,isStopword:!0,pos:`ao`}],[`alla vega`,{lemma:`allur vegur`,isStopword:!0,pos:`ao`}],[`alls ekki`,{lemma:`alls ekki`,isStopword:!0,pos:`ao`}],[`alls staðar`,{lemma:`allur staður`,isStopword:!0,pos:`ao`}],[`allt í allt`,{lemma:`allur í allur`,isStopword:!0,pos:`ao`}],[`annars vegar`,{lemma:`annar vegur`,isStopword:!0,pos:`ao`}],[`auk þess`,{lemma:`auk það`,isStopword:!0,pos:`ao`}],[`að auki`,{lemma:`að auki`,isStopword:!0,pos:`ao`}],[`að vísu`,{lemma:`að vís`,isStopword:!0,pos:`ao`}],[`að sjálfsögðu`,{lemma:`að sjálfsagður`,isStopword:!0,pos:`ao`}],[`að minnsta kosti`,{lemma:`að lítill kostur`,isStopword:!0,pos:`ao`}],[`að öllu leyti`,{lemma:`að allur leyti`,isStopword:!0,pos:`ao`}],[`að nokkru leyti`,{lemma:`að nokkur leyti`,isStopword:!0,pos:`ao`}],[`ef til vill`,{lemma:`ef til vilja`,isStopword:!0,pos:`ao`}],[`einhvers staðar`,{lemma:`einhver staður`,isStopword:!0,pos:`ao`}],[`einhvern veginn`,{lemma:`einhver vegur`,isStopword:!0,pos:`ao`}],[`ekki síst`,{lemma:`ekki síður`,isStopword:!0,pos:`ao`}],[`engu að síður`,{lemma:`enginn að síður`,isStopword:!0,pos:`ao`}],[`fyrst og fremst`,{lemma:`snemma og fremri`,isStopword:!0,pos:`ao`}],[`hins vegar`,{lemma:`hinn vegur`,isStopword:!0,pos:`ao`}],[`hér og þar`,{lemma:`hér og þar`,isStopword:!0,pos:`ao`}],[`hér um bil`,{lemma:`hér um bil`,isStopword:!0,pos:`ao`}],[`hér á landi`,{lemma:`hér á land`,isStopword:!0,pos:`ao`}],[`hvað mest`,{lemma:`hvað mjög`,isStopword:!0,pos:`ao`}],[`hverju sinni`,{lemma:`hver sinn`,isStopword:!0,pos:`ao`}],[`hvorki né`,{lemma:`hvorki né`,isStopword:!0,pos:`ao`}],[`í burtu`,{lemma:`í burtu`,isStopword:!0,pos:`ao`}],[`í gær`,{lemma:`í gær`,isStopword:!0,pos:`ao`}],[`í senn`,{lemma:`í senn`,isStopword:!0,pos:`ao`}],[`í sífellu`,{lemma:`í sífella`,isStopword:!0,pos:`ao`}],[`lengi vel`,{lemma:`lengi vel`,isStopword:!0,pos:`ao`}],[`meira að segja`,{lemma:`mikill að segja`,isStopword:!0,pos:`ao`}],[`meira og minna`,{lemma:`mikill og lítill`,isStopword:!0,pos:`ao`}],[`meðal annars`,{lemma:`meðal annar`,isStopword:!0,pos:`ao`}],[`nokkurn veginn`,{lemma:`nokkur vegur`,isStopword:!0,pos:`ao`}],[`og svo framvegis`,{lemma:`og svo framvegis`,isStopword:!0,pos:`ao`}],[`satt að segja`,{lemma:`sannur að segja`,isStopword:!0,pos:`ao`}],[`sem betur fer`,{lemma:`sem vel fara`,isStopword:!0,pos:`ao`}],[`smám saman`,{lemma:`smátt saman`,isStopword:!0,pos:`ao`}],[`svo sem`,{lemma:`svo sem`,isStopword:!0,pos:`ao`}],[`sér í lagi`,{lemma:`sér í lag`,isStopword:!0,pos:`ao`}],[`til og frá`,{lemma:`til og frá`,isStopword:!0,pos:`ao`}],[`til baka`,{lemma:`til baka`,isStopword:!0,pos:`ao`}],[`vítt og breitt`,{lemma:`vítt og breitt`,isStopword:!0,pos:`ao`}],[`á ný`,{lemma:`á ný`,isStopword:!0,pos:`ao`}],[`á meðan`,{lemma:`á meðan`,isStopword:!0,pos:`ao`}],[`á sama tíma`,{lemma:`á samur tími`,isStopword:!0,pos:`ao`}],[`á hinn bóginn`,{lemma:`á hinn bógur`,isStopword:!0,pos:`ao`}],[`þar af leiðandi`,{lemma:`þar af leiða`,isStopword:!0,pos:`ao`}],[`þar að auki`,{lemma:`þar að auki`,isStopword:!0,pos:`ao`}],[`það er að segja`,{lemma:`það vera að segja`,isStopword:!0,pos:`ao`}],[`þess vegna`,{lemma:`það vegna`,isStopword:!0,pos:`ao`}],[`því miður`,{lemma:`það lítt`,isStopword:!0,pos:`ao`}],[`þrátt fyrir`,{lemma:`þrátt fyrir`,isStopword:!0,pos:`ao`}],[`á dögunum`,{lemma:`á dagur`,isStopword:!0,pos:`ao`}],[`á sínum tíma`,{lemma:`á sinn tími`,isStopword:!0,pos:`ao`}],[`á endanum`,{lemma:`á endi`,isStopword:!0,pos:`ao`}],[`einu sinni`,{lemma:`einn sinn`,isStopword:!1,pos:`ao`}],[`eitt sinn`,{lemma:`einn sinn`,isStopword:!1,pos:`ao`}],[`í fyrsta sinn`,{lemma:`í fyrstur sinn`,isStopword:!1,pos:`ao`}],[`í kvöld`,{lemma:`í kvöld`,isStopword:!1,pos:`ao`}],[`í morgun`,{lemma:`í morgunn`,isStopword:!1,pos:`ao`}],[`á morgun`,{lemma:`á morgunn`,isStopword:!1,pos:`ao`}],[`fyrir hönd`,{lemma:`fyrir hönd`,isStopword:!1,pos:`fs`}],[`með tilliti til`,{lemma:`með tillit til`,isStopword:!1,pos:`fs`}],[`í ljósi`,{lemma:`í ljós`,isStopword:!1,pos:`fs`}],[`í stað`,{lemma:`í staður`,isStopword:!1,pos:`fs`}],[`fyrir aftan`,{lemma:`fyrir aftan`,isStopword:!1,pos:`fs`}],[`fyrir austan`,{lemma:`fyrir austan`,isStopword:!1,pos:`fs`}],[`fyrir framan`,{lemma:`fyrir framan`,isStopword:!1,pos:`fs`}],[`fyrir handan`,{lemma:`fyrir handan`,isStopword:!1,pos:`fs`}],[`fyrir innan`,{lemma:`fyrir innan`,isStopword:!1,pos:`fs`}],[`fyrir neðan`,{lemma:`fyrir neðan`,isStopword:!1,pos:`fs`}],[`fyrir norðan`,{lemma:`fyrir norðan`,isStopword:!1,pos:`fs`}],[`fyrir ofan`,{lemma:`fyrir ofan`,isStopword:!1,pos:`fs`}],[`fyrir sunnan`,{lemma:`fyrir sunnan`,isStopword:!1,pos:`fs`}],[`fyrir utan`,{lemma:`fyrir utan`,isStopword:!1,pos:`fs`}],[`fyrir vestan`,{lemma:`fyrir vestan`,isStopword:!1,pos:`fs`}],[`í gegnum`,{lemma:`í gegnum`,isStopword:!1,pos:`fs`}],[`í kringum`,{lemma:`í kringum`,isStopword:!1,pos:`fs`}],[`innan við`,{lemma:`innan við`,isStopword:!1,pos:`fs`}],[`upp úr`,{lemma:`upp úr`,isStopword:!1,pos:`fs`}],[`þvert á`,{lemma:`þvert á`,isStopword:!1,pos:`fs`}],[`þar eð`,{lemma:`þar eð`,isStopword:!0,pos:`st`}],[`sameinuðu þjóðirnar`,{lemma:`Sameinuðu þjóðirnar`,isStopword:!1,pos:`entity`}],[`evrópusambandið`,{lemma:`Evrópusambandið`,isStopword:!1,pos:`entity`}],[`nato`,{lemma:`NATO`,isStopword:!1,pos:`entity`}],[`nató`,{lemma:`NATO`,isStopword:!1,pos:`entity`}]]);function W(e,t){for(let n=Math.min(4,e.length-t);n>=2;n--){let r=e.slice(t,t+n).join(` `).toLowerCase(),i=U.get(r);if(i)return{phrase:i,wordCount:n}}return null}function G(e){return U.has(e.toLowerCase())}function K(e){return U.get(e.toLowerCase())}function q(e){switch(e.kind){case`telno`:return[e.cc?`+${e.cc}${e.number}`:e.number];case`email`:return[e.text.toLowerCase()];case`url`:return[e.text];case`domain`:return[e.text.toLowerCase()];case`date`:case`dateabs`:case`daterel`:{let t=e.year||0,n=String(e.month).padStart(2,`0`),r=String(e.day).padStart(2,`0`);return t>0?[`${t}-${n}-${r}`]:[`${n}-${r}`]}case`time`:{let t=String(e.hour).padStart(2,`0`),n=String(e.minute).padStart(2,`0`);return e.second>0?[`${t}:${n}:${String(e.second).padStart(2,`0`)}`]:[`${t}:${n}`]}case`timestamp`:case`timestampabs`:case`timestamprel`:return[`${`${e.year}-${String(e.month).padStart(2,`0`)}-${String(e.day).padStart(2,`0`)}`}T${`${String(e.hour).padStart(2,`0`)}:${String(e.minute).padStart(2,`0`)}:${String(e.second).padStart(2,`0`)}`}`];case`ssn`:return[`${e.value.slice(0,6)}-${e.value.slice(6)}`];case`amount`:return[`${e.value} ${e.currency}`];case`measurement`:return[`${e.value} ${e.unit}`];case`percent`:return[`${e.value}%`];case`hashtag`:return[`#${e.text.slice(1).toLowerCase()}`];case`username`:return[`@${e.username.toLowerCase()}`];case`year`:return[String(e.value)];case`number`:case`ordinal`:return[String(e.value)];default:return[]}}const J=new Set([`word`]),Y=new Set([`person`,`company`,`entity`]),X=new Set([`punctuation`,`s_begin`,`s_end`,`s_split`,`unknown`]),Z=[`arinnar`,`inum`,`anna`,`unum`,`sins`,`inn`,`ins`,`ið`,`in`,`um`,`ir`,`ar`,`ur`,`s`,`a`,`i`];function Q(t,n,r={}){let{bigrams:i,compoundSplitter:a,includeNumbers:o=!1,alwaysTryCompounds:s=!0,stripUnknownSuffixes:c=!0}=r,l=e(t),u=[],d=[],f=new Map,p=(e,t)=>t.length===1&&t[0]===e.toLowerCase(),m=e=>{let t=e,r=[];for(let e=0;e<2;e++){let e=t.toLowerCase(),i=null;for(let a of Z){if(!e.endsWith(a))continue;let o=t.slice(0,t.length-a.length);if(o.length<2)continue;let s=n.lemmatize(o);if(!p(o,s))return[...new Set([...r,...s])];i||(i=o,r.push(o.toLowerCase()))}if(!i||i.length<4)break;t=i}return r.length>0?[...new Set(r)]:null},h=e=>{let t=e.toLowerCase(),r=f.get(t);if(r)return r;let i=n.lemmatize(e);if(c&&p(e,i)&&!n.isKnown(e)&&e.length>=4){let n=m(e);if(n){let e=[...new Set([...i,...n])];return f.set(t,e),e}}return f.set(t,i),i};for(let e=0;e<l.length;e++){let t=l[e];if(X.has(t.kind))continue;if(Y.has(t.kind)){u.push({original:t.text??``,kind:t.kind,lemmas:[],isEntity:!0});continue}if(J.has(t.kind)){let e=t.text??``,n=h(e),r={original:e,kind:t.kind,lemmas:n,isEntity:!1},i=n.length===1&&n[0]===e.toLowerCase();if(i&&e.includes(`-`)){let t=e.split(`-`),i=[];for(let e of t)e.length>0&&i.push(...h(e));i.length>0&&(r.lemmas=[...new Set([...n,...i])])}if(a&&(s||i)){let t=a.split(e);if(t.isCompound){r.compoundSplit=t;let e=t.parts.flatMap(e=>h(e));r.compoundLemmas=e,r.lemmas=[...new Set([...n,...e])]}}u.push(r),d.push({index:u.length-1,token:t});continue}let n=q(t);if(n.length>0){if((t.kind===`number`||t.kind===`ordinal`)&&!o)continue;u.push({original:t.text??``,kind:t.kind,lemmas:n,isEntity:!1});continue}u.push({original:t.text??``,kind:t.kind,lemmas:[],isEntity:!1})}if(i&&d.length>0){let e=new O(n,i);for(let t=0;t<d.length;t++){let{index:n,token:r}=d[t],i=t>0?d[t-1].token:null,a=t<d.length-1?d[t+1].token:null,o=e.disambiguate(r.text??``,i?.text??null,a?.text??null,{prevLemmas:i?.text?h(i.text):void 0,nextLemmas:a?.text?h(a.text):void 0});u[n].disambiguated=o.lemma,u[n].confidence=o.confidence}}else for(let{index:e}of d){let t=u[e];t.lemmas.length>0&&(t.disambiguated=t.lemmas[0],t.confidence=t.lemmas.length===1?1:.5)}return u}function $(e,n,r={}){let{removeStopwords:a=!1,indexAllCandidates:o=!0,useContextualStopwords:s=!1}=r,c=Q(e,n,r),l=new Set,u=(e,n)=>a?s?i(e,n):t.has(e):!1;for(let e of c)if(!e.isEntity){if(o)for(let t of e.lemmas)u(t)||l.add(t);else e.disambiguated&&(u(e.disambiguated)||l.add(e.disambiguated));if(e.compoundSplit?.isCompound){let t=e.compoundLemmas?e.compoundLemmas:e.compoundSplit.parts.flatMap(e=>n.lemmatize(e));for(let e of t)u(e)||l.add(e)}}return l}function ee(e,n,r={}){let{removeStopwords:a=!1,indexAllCandidates:o=!0,useContextualStopwords:s=!1,andOperator:c=` & `,orOperator:l=` | `,wrapGroups:u=!0,includeOriginal:d=!1,lowercaseOriginal:f=!0}=r,p=Q(e,n,r),m=[],h=(e,n)=>a?s?i(e,n):t.has(e):!1;for(let e of p){if(e.isEntity)continue;let t=[];if(o?t=e.lemmas:e.disambiguated&&(t=[e.disambiguated]),d){let n=e.original??``;if(n.length>0){let e=f?n.toLowerCase():n;t=[...t,e]}}let n=[...new Set(t.filter(e=>e&&!h(e)))];n.length>0&&m.push(n)}return{groups:m,query:m.map(e=>{let t=e.join(l);return u&&e.length>1?`(${t})`:t}).filter(e=>e.length>0).join(c)}}function te(e,t,n,r={}){let i=performance.now(),a,o;switch(n){case`naive`:{let n=e.split(/\s+/).filter(e=>e.length>0),r=[];for(let e of n){let n=e.replace(/^[^\p{L}\p{N}]+|[^\p{L}\p{N}]+$/gu,``);if(n){let e=t.lemmatize(n);r.push({original:n,kind:`word`,lemmas:e,isEntity:!1,disambiguated:e[0],confidence:e.length===1?1:.5})}}a=r,o=new Set(r.map(e=>e.disambiguated).filter(Boolean));break}case`tokenized`:a=Q(e,t),o=new Set(a.filter(e=>e.kind===`word`&&e.lemmas.length>0).map(e=>e.lemmas[0]));break;case`disambiguated`:a=Q(e,t,{bigrams:r.bigrams}),o=$(e,t,{bigrams:r.bigrams});break;case`full`:a=Q(e,t,{bigrams:r.bigrams,compoundSplitter:r.compoundSplitter}),o=$(e,t,{bigrams:r.bigrams,compoundSplitter:r.compoundSplitter});break}let s=performance.now()-i,c=a.filter(e=>e.kind===`word`),l=c.length,u=c.filter(e=>e.lemmas.length>0&&!(e.lemmas.length===1&&e.lemmas[0]===e.original.toLowerCase())).length,d=c.filter(e=>e.lemmas.length>1).length,f=c.filter(e=>e.confidence!==void 0).map(e=>e.confidence),p=f.length>0?f.reduce((e,t)=>e+t,0)/f.length:0,m=c.filter(e=>e.compoundSplit?.isCompound).length,h=a.filter(e=>e.isEntity).length;return{wordCount:l,lemmatizedCount:u,coverage:l>0?u/l:0,ambiguousCount:d,ambiguityRate:l>0?d/l:0,avgConfidence:p,compoundsFound:m,entitiesSkipped:h,uniqueLemmas:o.size,timeMs:s}}export{d as BinaryLemmatizer,M as CASE_NAMES,r as CONTEXTUAL_STOPWORDS,B as CompoundSplitter,f as DISAMBIGUATION_RULES,O as Disambiguator,N as GENDER_NAMES,g as NOMINATIVE_PRONOUNS,P as NUMBER_NAMES,h as PREPOSITION_CASES,I as PROTECTED_LEMMAS,U as STATIC_PHRASES,t as STOPWORDS_IS,A as WORD_CLASS_NAMES,j as WORD_CLASS_NAMES_IS,x as applyGrammarRules,b as applyNounAfterPrepositionRule,v as applyPrepositionRule,y as applyPronounVerbRule,ee as buildSearchQuery,_ as canGovernCase,H as createKnownLemmaFilter,V as createKnownLemmaSet,k as extractDisambiguatedLemmas,$ as extractIndexableLemmas,C as getGovernedCases,K as getPhraseInfo,p as getRulesForWord,m as hasDisambiguationRules,i as isContextualStopword,G as isKnownPhrase,S as isKnownPreposition,n as isStopword,W as matchPhrase,q as normalizeToken,Q as processText,a as removeStopwords,te as runBenchmark};
2
2
  //# sourceMappingURL=index.mjs.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.mjs","names":[],"sources":["../src/stopwords.ts","../src/binary-lemmatizer.ts","../src/disambiguation-rules.ts","../src/mini-grammar.ts","../src/disambiguate.ts","../src/types.ts","../src/bloom.ts","../src/compounds.ts","../src/phrases.ts","../src/pipeline.ts"],"sourcesContent":["/**\n * Icelandic stopwords for search indexing.\n *\n * Source: https://github.com/atlijas/icelandic-stop-words\n * Data from DIM (Database of Icelandic Morphology) by Árni Magnússon Institute.\n *\n * Includes all inflected forms of pronouns, prepositions, conjunctions, etc.\n */\n\n// prettier-ignore\nexport const STOPWORDS_IS = new Set([\n \"á\",\"að\",\"aðra\",\"aðrar\",\"aðrir\",\"af\",\"alla\",\"allan\",\"allar\",\"allir\",\n \"allnokkra\",\"allnokkrar\",\"allnokkrir\",\"allnokkru\",\"allnokkrum\",\"allnokkuð\",\n \"allnokkur\",\"allnokkurn\",\"allnokkurra\",\"allnokkurrar\",\"allnokkurri\",\"allnokkurs\",\n \"allnokkurt\",\"allra\",\"allrar\",\"allri\",\"alls\",\"allt\",\"alltað\",\"allur\",\"án\",\n \"andspænis\",\"annað\",\"annaðhvort\",\"annan\",\"annar\",\"annarra\",\"annarrar\",\"annarri\",\n \"annars\",\"árla\",\"ásamt\",\"auk\",\"austan\",\"austanundir\",\"austur\",\"báða\",\"báðar\",\n \"báðir\",\"báðum\",\"bæði\",\"bak\",\"beggja\",\"eða\",\"eður\",\"ef\",\"eftir\",\"ég\",\"ein\",\n \"eina\",\"einar\",\"einhver\",\"einhverja\",\"einhverjar\",\"einhverjir\",\"einhverju\",\n \"einhverjum\",\"einhvern\",\"einhverra\",\"einhverrar\",\"einhverri\",\"einhvers\",\"einir\",\n \"einn\",\"einna\",\"einnar\",\"einni\",\"eins\",\"einskis\",\"einu\",\"einum\",\"eitt\",\"eitthvað\",\n \"eitthvert\",\"ekkert\",\"ella\",\"ellegar\",\"en\",\"enda\",\"enga\",\"engan\",\"engar\",\"engin\",\n \"enginn\",\"engir\",\"engra\",\"engrar\",\"engri\",\"engu\",\"engum\",\"er\",\"fáein\",\"fáeina\",\n \"fáeinar\",\"fáeinir\",\"fáeinna\",\"fáeinum\",\"fjær\",\"fjarri\",\"flestalla\",\"flestallan\",\n \"flestallar\",\"flestallir\",\"flestallra\",\"flestallrar\",\"flestallri\",\"flestalls\",\n \"flestallt\",\"flestallur\",\"flestöll\",\"flestöllu\",\"flestöllum\",\"frá\",\"fram\",\"fyrir\",\n \"fyrst\",\"gagnstætt\",\"gagnvart\",\"gegn\",\"gegnt\",\"gegnum\",\"hana\",\"handa\",\"handan\",\n \"hann\",\"hans\",\"heldur\",\"hennar\",\"henni\",\"hið\",\"hin\",\"hina\",\"hinar\",\"hinir\",\"hinn\",\n \"hinna\",\"hinnar\",\"hinni\",\"hins\",\"hinu\",\"hinum\",\"hitt\",\"hjá\",\"honum\",\"hún\",\"hvað\",\n \"hvaða\",\"hvenær\",\"hver\",\"hverja\",\"hverjar\",\"hverjir\",\"hverju\",\"hverjum\",\"hvern\",\n \"hverra\",\"hverrar\",\"hverri\",\"hvers\",\"hvert\",\"hvílík\",\"hvílíka\",\"hvílíkan\",\n \"hvílíkar\",\"hvílíkir\",\"hvílíkra\",\"hvílíkrar\",\"hvílíkri\",\"hvílíks\",\"hvílíkt\",\n \"hvílíku\",\"hvílíkum\",\"hvílíkur\",\"hvor\",\"hvora\",\"hvorar\",\"hvorir\",\"hvorki\",\"hvorn\",\n \"hvorra\",\"hvorrar\",\"hvorri\",\"hvors\",\"hvort\",\"hvoru\",\"hvorug\",\"hvoruga\",\"hvorugan\",\n \"hvorugar\",\"hvorugir\",\"hvorugra\",\"hvorugrar\",\"hvorugri\",\"hvorugs\",\"hvorugt\",\n \"hvorugu\",\"hvorugum\",\"hvorugur\",\"hvorum\",\"í\",\"inn\",\"innan\",\"innanundir\",\"jafnframt\",\n \"jafnhliða\",\"kring\",\"kringum\",\"með\",\"meðal\",\"meðan\",\"meður\",\"mér\",\"mestalla\",\n \"mestallan\",\"mestallar\",\"mestallir\",\"mestallra\",\"mestallrar\",\"mestallri\",\"mestalls\",\n \"mestallt\",\"mestallur\",\"mestöll\",\"mestöllu\",\"mestöllum\",\"miðli\",\"mig\",\"milli\",\n \"millum\",\"mín\",\"mína\",\"mínar\",\"mínir\",\"minn\",\"minna\",\"minnar\",\"minni\",\"míns\",\n \"mínu\",\"mínum\",\"mitt\",\"mót\",\"móti\",\"nær\",\"nærri\",\"næst\",\"næstum\",\"nálægt\",\"né\",\n \"neðan\",\"nein\",\"neina\",\"neinar\",\"neinir\",\"neinn\",\"neinna\",\"neinnar\",\"neinni\",\n \"neins\",\"neinu\",\"neinum\",\"neitt\",\"nema\",\"niður\",\"nokkra\",\"nokkrar\",\"nokkrir\",\n \"nokkru\",\"nokkrum\",\"nokkuð\",\"nokkur\",\"nokkurn\",\"nokkurra\",\"nokkurrar\",\"nokkurri\",\n \"nokkurs\",\"nokkurt\",\"norðan\",\"nú\",\"öðru\",\"öðrum\",\"of\",\"ofan\",\"ofar\",\"og\",\"óháð\",\n \"okkar\",\"okkur\",\"öll\",\"öllu\",\"öllum\",\"önnur\",\"órafjarri\",\"oss\",\"sá\",\"sakir\",\n \"sama\",\"saman\",\"samar\",\"samfara\",\"samhliða\",\"sami\",\"samir\",\"samkvæmt\",\"samra\",\n \"samrar\",\"samri\",\"sams\",\"samskipa\",\"samt\",\"samtímis\",\"samur\",\"sem\",\"sér\",\"sérhvað\",\n \"sérhver\",\"sérhverja\",\"sérhverjar\",\"sérhverjir\",\"sérhverju\",\"sérhverjum\",\"sérhvern\",\n \"sérhverra\",\"sérhverrar\",\"sérhverri\",\"sérhvers\",\"sérhvert\",\"síðan\",\"síðla\",\"sig\",\n \"sín\",\"sína\",\"sínar\",\"sínhver\",\"sínhverja\",\"sínhverjar\",\"sínhverjir\",\"sínhverju\",\n \"sínhverjum\",\"sínhvern\",\"sínhverra\",\"sínhverrar\",\"sínhverri\",\"sínhvers\",\"sínhvert\",\n \"sínhvor\",\"sínhvora\",\"sínhvorar\",\"sínhvorir\",\"sínhvorn\",\"sínhvorra\",\"sínhvorrar\",\n \"sínhvorri\",\"sínhvors\",\"sínhvort\",\"sínhvoru\",\"sínhvorum\",\"sínir\",\"sinn\",\"sinna\",\n \"sinnar\",\"sinnhver\",\"sinnhverja\",\"sinnhverjar\",\"sinnhverjir\",\"sinnhverju\",\n \"sinnhverjum\",\"sinnhvern\",\"sinnhverra\",\"sinnhverrar\",\"sinnhverri\",\"sinnhvers\",\n \"sinnhvert\",\"sinnhvor\",\"sinnhvora\",\"sinnhvorar\",\"sinnhvorir\",\"sinnhvorn\",\n \"sinnhvorra\",\"sinnhvorrar\",\"sinnhvorri\",\"sinnhvors\",\"sinnhvort\",\"sinnhvoru\",\n \"sinnhvorum\",\"sinni\",\"síns\",\"sínu\",\"sínum\",\"sitt\",\"sitthvað\",\"sitthver\",\n \"sitthverja\",\"sitthverjar\",\"sitthverjir\",\"sitthverju\",\"sitthverjum\",\"sitthvern\",\n \"sitthverra\",\"sitthverrar\",\"sitthverri\",\"sitthvers\",\"sitthvert\",\"sitthvor\",\n \"sitthvora\",\"sitthvorar\",\"sitthvorir\",\"sitthvorn\",\"sitthvorra\",\"sitthvorrar\",\n \"sitthvorri\",\"sitthvors\",\"sitthvort\",\"sitthvoru\",\"sitthvorum\",\"sjálf\",\"sjálfa\",\n \"sjálfan\",\"sjálfar\",\"sjálfir\",\"sjálfra\",\"sjálfrar\",\"sjálfri\",\"sjálfs\",\"sjálft\",\n \"sjálfu\",\"sjálfum\",\"sjálfur\",\"slík\",\"slíka\",\"slíkan\",\"slíkar\",\"slíkir\",\"slíkra\",\n \"slíkrar\",\"slíkri\",\"slíks\",\"slíkt\",\"slíku\",\"slíkum\",\"slíkur\",\"snemma\",\"sökum\",\n \"söm\",\"sömu\",\"sömum\",\"sú\",\"sum\",\"suma\",\"suman\",\"sumar\",\"sumir\",\"sumra\",\"sumrar\",\n \"sumri\",\"sums\",\"sumt\",\"sumu\",\"sumum\",\"sumur\",\"sunnan\",\"svo\",\"til\",\"tráss\",\"um\",\n \"umfram\",\"umhverfis\",\"undan\",\"undir\",\"uns\",\"upp\",\"úr\",\"út\",\"utan\",\"útundan\",\n \"vegna\",\"vér\",\"vestan\",\"vestur\",\"vettugi\",\"við\",\"viður\",\"vor\",\"vora\",\"vorar\",\n \"vorir\",\"vorn\",\"vorra\",\"vorrar\",\"vorri\",\"vors\",\"vort\",\"voru\",\"vorum\",\"yðar\",\n \"yður\",\"yfir\",\"ykkar\",\"ykkur\",\"ýmis\",\"ýmiss\",\"ýmissa\",\"ýmissar\",\"ýmissi\",\"ýmist\",\n \"ýmsa\",\"ýmsan\",\"ýmsar\",\"ýmsir\",\"ýmsu\",\"ýmsum\",\"þá\",\"það\",\"þær\",\"þann\",\"þar\",\n \"þau\",\"þegar\",\"þeim\",\"þeir\",\"þeirra\",\"þeirrar\",\"þeirri\",\"þennan\",\"þér\",\"þess\",\n \"þessa\",\"þessar\",\"þessara\",\"þessarar\",\"þessari\",\"þessi\",\"þessir\",\"þessu\",\n \"þessum\",\"þetta\",\"þið\",\"þig\",\"þín\",\"þína\",\"þínar\",\"þínir\",\"þinn\",\"þinna\",\n \"þinnar\",\"þinni\",\"þíns\",\"þínu\",\"þínum\",\"þitt\",\"þó\",\"þónokkra\",\"þónokkrar\",\n \"þónokkrir\",\"þónokkru\",\"þónokkrum\",\"þónokkuð\",\"þónokkur\",\"þónokkurn\",\"þónokkurra\",\n \"þónokkurrar\",\"þónokkurri\",\"þónokkurs\",\"þónokkurt\",\"þótt\",\"þú\",\"því\",\"þvílík\",\n \"þvílíka\",\"þvílíkan\",\"þvílíkar\",\"þvílíkir\",\"þvílíkra\",\"þvílíkrar\",\"þvílíkri\",\n \"þvílíks\",\"þvílíkt\",\"þvílíku\",\"þvílíkum\",\"þvílíkur\",\n]);\n\n/**\n * Check if a word is a stopword.\n */\nexport function isStopword(word: string): boolean {\n return STOPWORDS_IS.has(word.toLowerCase());\n}\n\n/**\n * Contextual stopword rules for ambiguous words.\n *\n * Some words are stopwords in certain grammatical contexts but not others:\n * - \"á\" as preposition (fs) or adverb (ao) = stopword\n * - \"á\" as verb \"eiga\" (so) = NOT a stopword (\"Ég á bíl\")\n * - \"á\" as noun \"river\" (no) = NOT a stopword (\"við ána\")\n *\n * Map: lemma -> Set of POS codes where it IS a stopword\n */\nexport const CONTEXTUAL_STOPWORDS: Map<string, Set<string>> = new Map([\n // \"á\" - prep/adverb = stop, verb/noun = keep\n [\"á\", new Set([\"fs\", \"ao\"])],\n // \"við\" - prep = stop, pronoun \"we\" = stop, noun \"viður\" = keep\n [\"við\", new Set([\"fs\", \"fn\"])],\n // \"af\" - prep/adverb = stop\n [\"af\", new Set([\"fs\", \"ao\"])],\n // \"til\" - prep = stop\n [\"til\", new Set([\"fs\"])],\n // \"um\" - prep = stop\n [\"um\", new Set([\"fs\"])],\n // \"frá\" - prep = stop\n [\"frá\", new Set([\"fs\"])],\n // \"yfir\" - prep/adverb = stop\n [\"yfir\", new Set([\"fs\", \"ao\"])],\n // \"undir\" - prep/adverb = stop\n [\"undir\", new Set([\"fs\", \"ao\"])],\n // \"fyrir\" - prep/adverb = stop\n [\"fyrir\", new Set([\"fs\", \"ao\"])],\n // \"eftir\" - prep/adverb = stop\n [\"eftir\", new Set([\"fs\", \"ao\"])],\n // \"gegn\" - prep = stop\n [\"gegn\", new Set([\"fs\"])],\n // \"hjá\" - prep = stop\n [\"hjá\", new Set([\"fs\"])],\n // \"úr\" - prep = stop, noun \"úr\" (watch) = keep\n [\"úr\", new Set([\"fs\"])],\n // \"í\" - prep = stop\n [\"í\", new Set([\"fs\"])],\n]);\n\n/**\n * Check if a lemma is a stopword in a specific grammatical context.\n *\n * For ambiguous words, uses POS to determine stopword status.\n * For unambiguous words, falls back to standard stopword check.\n *\n * @param lemma - The lemmatized word\n * @param pos - Part of speech code (fs, ao, so, no, etc.)\n * @returns true if the word should be treated as a stopword\n */\nexport function isContextualStopword(lemma: string, pos?: string): boolean {\n const normalized = lemma.toLowerCase();\n\n // Check if this lemma has context-dependent rules\n const contextRule = CONTEXTUAL_STOPWORDS.get(normalized);\n if (contextRule && pos) {\n // Use the rule: stopword only if POS is in the stopword set\n return contextRule.has(pos);\n }\n\n // Fall back to standard stopword check\n return STOPWORDS_IS.has(normalized);\n}\n\n/**\n * Filter stopwords from an array of words/lemmas.\n */\nexport function removeStopwords<T extends string>(words: T[]): T[] {\n return words.filter((w) => !isStopword(w));\n}\n","/**\n * Binary format lemmatizer for efficient memory usage.\n *\n * Uses ArrayBuffer with TypedArray views and binary search for O(log n) lookups.\n * Target memory: ~70MB vs ~1.2GB for JS Map-based approach.\n *\n * Binary file format:\n * - Header (32 bytes): magic, version, counts\n * - String pool: all strings concatenated UTF-8\n * - Lemma index: offsets + lengths\n * - Word index: offsets + lengths (sorted alphabetically)\n * - Entry offsets: start/end of entries for each word\n * - Entries: packed lemmaIdx:20 + posCode:4\n * - Bigrams: word1/word2 offsets + lengths + frequencies (sorted)\n */\n\nimport type {\n WordClass,\n LemmaWithPOS,\n LemmaWithMorph,\n LemmatizerLike,\n BigramProvider,\n GrammaticalCase,\n GrammaticalGender,\n GrammaticalNumber,\n MorphFeatures,\n} from \"./types.js\";\n\nconst MAGIC = 0x4c454d41; // \"LEMA\"\n\n// POS code to string mapping (must match build-binary.py)\nconst CODE_TO_POS: WordClass[] = [\n \"no\",\n \"so\",\n \"lo\",\n \"ao\",\n \"fs\",\n \"fn\",\n \"st\",\n \"to\",\n \"gr\",\n \"uh\",\n];\n\n// Case code to string mapping (must match build-binary.py)\n// 0=none, 1=nf, 2=þf, 3=þgf, 4=ef\nconst CODE_TO_CASE: (GrammaticalCase | undefined)[] = [\n undefined, // 0 = none\n \"nf\", // 1 = nominative\n \"þf\", // 2 = accusative\n \"þgf\", // 3 = dative\n \"ef\", // 4 = genitive\n];\n\n// Gender code to string mapping (must match build-binary.py)\n// 0=none, 1=kk, 2=kvk, 3=hk\nconst CODE_TO_GENDER: (GrammaticalGender | undefined)[] = [\n undefined, // 0 = none\n \"kk\", // 1 = masculine\n \"kvk\", // 2 = feminine\n \"hk\", // 3 = neuter\n];\n\n// Number code to string mapping (must match build-binary.py)\n// 0=et/none, 1=ft\nconst CODE_TO_NUMBER: (GrammaticalNumber | undefined)[] = [\n \"et\", // 0 = singular (or none)\n \"ft\", // 1 = plural\n];\n\nexport interface BinaryLemmatizerOptions {\n fetch?: typeof fetch;\n}\n\nexport interface BinaryLemmatizeOptions {\n wordClass?: WordClass;\n}\n\nexport class BinaryLemmatizer implements LemmatizerLike, BigramProvider {\n private buffer: ArrayBuffer;\n private stringPool: Uint8Array;\n private lemmaOffsets: Uint32Array;\n private lemmaLengths: Uint8Array;\n private wordOffsets: Uint32Array;\n private wordLengths: Uint8Array;\n private entryOffsets: Uint32Array;\n private entries: Uint32Array;\n private bigramW1Offsets: Uint32Array;\n private bigramW1Lengths: Uint8Array;\n private bigramW2Offsets: Uint32Array;\n private bigramW2Lengths: Uint8Array;\n private bigramFreqs: Uint32Array;\n\n private lemmaCount: number;\n private wordCount: number;\n private entryCount: number;\n private bigramCount: number;\n private version: number;\n\n private decoder = new TextDecoder(\"utf-8\");\n\n private constructor(buffer: ArrayBuffer) {\n this.buffer = buffer;\n const view = new DataView(buffer);\n\n // Read header\n const magic = view.getUint32(0, true);\n if (magic !== MAGIC) {\n throw new Error(\n `Invalid binary format: expected magic 0x${MAGIC.toString(16)}, got 0x${magic.toString(16)}`\n );\n }\n\n this.version = view.getUint32(4, true);\n if (this.version !== 1 && this.version !== 2) {\n throw new Error(`Unsupported version: ${this.version}`);\n }\n\n const stringPoolSize = view.getUint32(8, true);\n this.lemmaCount = view.getUint32(12, true);\n this.wordCount = view.getUint32(16, true);\n this.entryCount = view.getUint32(20, true);\n this.bigramCount = view.getUint32(24, true);\n // reserved at 28\n\n // Calculate section offsets\n let offset = 32;\n\n // String pool\n this.stringPool = new Uint8Array(buffer, offset, stringPoolSize);\n offset += stringPoolSize;\n\n // Lemma offsets (u32 × lemmaCount)\n this.lemmaOffsets = new Uint32Array(buffer, offset, this.lemmaCount);\n offset += this.lemmaCount * 4;\n\n // Lemma lengths (u8 × lemmaCount)\n this.lemmaLengths = new Uint8Array(buffer, offset, this.lemmaCount);\n offset += this.lemmaCount;\n // Align to 4 bytes\n offset = (offset + 3) & ~3;\n\n // Word offsets (u32 × wordCount)\n this.wordOffsets = new Uint32Array(buffer, offset, this.wordCount);\n offset += this.wordCount * 4;\n\n // Word lengths (u8 × wordCount)\n this.wordLengths = new Uint8Array(buffer, offset, this.wordCount);\n offset += this.wordCount;\n // Align to 4 bytes\n offset = (offset + 3) & ~3;\n\n // Entry offsets (u32 × (wordCount + 1))\n this.entryOffsets = new Uint32Array(buffer, offset, this.wordCount + 1);\n offset += (this.wordCount + 1) * 4;\n\n // Entries (u32 × entryCount)\n this.entries = new Uint32Array(buffer, offset, this.entryCount);\n offset += this.entryCount * 4;\n\n // Bigram word1 offsets\n this.bigramW1Offsets = new Uint32Array(buffer, offset, this.bigramCount);\n offset += this.bigramCount * 4;\n\n // Bigram word1 lengths\n this.bigramW1Lengths = new Uint8Array(buffer, offset, this.bigramCount);\n offset += this.bigramCount;\n // Align to 4 bytes\n offset = (offset + 3) & ~3;\n\n // Bigram word2 offsets\n this.bigramW2Offsets = new Uint32Array(buffer, offset, this.bigramCount);\n offset += this.bigramCount * 4;\n\n // Bigram word2 lengths\n this.bigramW2Lengths = new Uint8Array(buffer, offset, this.bigramCount);\n offset += this.bigramCount;\n // Align to 4 bytes\n offset = (offset + 3) & ~3;\n\n // Bigram frequencies\n this.bigramFreqs = new Uint32Array(buffer, offset, this.bigramCount);\n }\n\n /**\n * Load binary lemmatizer from URL.\n */\n static async load(\n url: string,\n options: BinaryLemmatizerOptions = {}\n ): Promise<BinaryLemmatizer> {\n const fetchFn = options.fetch ?? fetch;\n const response = await fetchFn(url);\n\n if (!response.ok) {\n throw new Error(`Failed to load binary data: ${response.status}`);\n }\n\n const buffer = await response.arrayBuffer();\n return new BinaryLemmatizer(buffer);\n }\n\n /**\n * Load from ArrayBuffer (for Node.js or pre-loaded data).\n */\n static loadFromBuffer(buffer: ArrayBuffer): BinaryLemmatizer {\n return new BinaryLemmatizer(buffer);\n }\n\n /**\n * Get string from string pool.\n */\n private getString(offset: number, length: number): string {\n return this.decoder.decode(this.stringPool.subarray(offset, offset + length));\n }\n\n /**\n * Get lemma by index.\n */\n private getLemma(index: number): string {\n return this.getString(this.lemmaOffsets[index], this.lemmaLengths[index]);\n }\n\n /**\n * Get word by index.\n */\n private getWord(index: number): string {\n return this.getString(this.wordOffsets[index], this.wordLengths[index]);\n }\n\n /**\n * Binary search for word in sorted word array.\n * Returns index or -1 if not found.\n */\n private findWord(word: string): number {\n let left = 0;\n let right = this.wordCount - 1;\n\n while (left <= right) {\n const mid = (left + right) >>> 1;\n const midWord = this.getWord(mid);\n\n if (midWord === word) {\n return mid;\n }\n if (midWord < word) {\n left = mid + 1;\n } else {\n right = mid - 1;\n }\n }\n\n return -1;\n }\n\n /**\n * Look up possible lemmas for a word form.\n * Results are sorted by corpus frequency (most common first).\n * Duplicates are removed (same lemma with different morph features).\n */\n lemmatize(word: string, options: BinaryLemmatizeOptions = {}): string[] {\n const normalized = word.toLowerCase();\n const idx = this.findWord(normalized);\n\n if (idx === -1) {\n return [normalized];\n }\n\n const start = this.entryOffsets[idx];\n const end = this.entryOffsets[idx + 1];\n\n const { wordClass } = options;\n const seen = new Set<string>();\n const result: string[] = [];\n\n for (let i = start; i < end; i++) {\n const { lemmaIdx, posCode } = this.unpackEntry(this.entries[i]);\n const pos = CODE_TO_POS[posCode];\n\n if (wordClass && pos !== wordClass) {\n continue;\n }\n\n const lemma = this.getLemma(lemmaIdx);\n if (!seen.has(lemma)) {\n seen.add(lemma);\n result.push(lemma);\n }\n }\n\n if (result.length === 0) {\n return [normalized];\n }\n\n return result;\n }\n\n /**\n * Unpack entry based on binary format version.\n * Version 1: bits 0-3=pos, bits 4-23=lemmaIdx\n * Version 2: bits 0-3=pos, bits 4-6=case, bits 7-8=gender, bit 9=number, bits 10-29=lemmaIdx\n */\n private unpackEntry(entry: number): {\n lemmaIdx: number;\n posCode: number;\n caseCode: number;\n genderCode: number;\n numberCode: number;\n } {\n if (this.version === 1) {\n return {\n lemmaIdx: entry >>> 4,\n posCode: entry & 0xf,\n caseCode: 0,\n genderCode: 0,\n numberCode: 0,\n };\n }\n // Version 2\n return {\n lemmaIdx: entry >>> 10,\n posCode: entry & 0xf,\n caseCode: (entry >>> 4) & 0x7,\n genderCode: (entry >>> 7) & 0x3,\n numberCode: (entry >>> 9) & 0x1,\n };\n }\n\n /**\n * Look up lemmas with their word class (POS) tags.\n * Duplicates are removed (same lemma+pos with different morph features).\n */\n lemmatizeWithPOS(word: string): LemmaWithPOS[] {\n const normalized = word.toLowerCase();\n const idx = this.findWord(normalized);\n\n if (idx === -1) {\n return [];\n }\n\n const start = this.entryOffsets[idx];\n const end = this.entryOffsets[idx + 1];\n const seen = new Set<string>();\n const result: LemmaWithPOS[] = [];\n\n for (let i = start; i < end; i++) {\n const { lemmaIdx, posCode } = this.unpackEntry(this.entries[i]);\n const lemma = this.getLemma(lemmaIdx);\n const pos = CODE_TO_POS[posCode] ?? (\"\" as WordClass);\n const key = `${lemma}:${pos}`;\n\n if (!seen.has(key)) {\n seen.add(key);\n result.push({ lemma, pos });\n }\n }\n\n return result;\n }\n\n /**\n * Look up lemmas with word class and morphological features.\n * Only available with version 2 binary format.\n */\n lemmatizeWithMorph(word: string): LemmaWithMorph[] {\n const normalized = word.toLowerCase();\n const idx = this.findWord(normalized);\n\n if (idx === -1) {\n return [];\n }\n\n const start = this.entryOffsets[idx];\n const end = this.entryOffsets[idx + 1];\n const result: LemmaWithMorph[] = [];\n\n for (let i = start; i < end; i++) {\n const { lemmaIdx, posCode, caseCode, genderCode, numberCode } =\n this.unpackEntry(this.entries[i]);\n\n const morph: MorphFeatures = {};\n const caseVal = CODE_TO_CASE[caseCode];\n const genderVal = CODE_TO_GENDER[genderCode];\n const numberVal = CODE_TO_NUMBER[numberCode];\n\n if (caseVal) morph.case = caseVal;\n if (genderVal) morph.gender = genderVal;\n if (numberVal) morph.number = numberVal;\n\n result.push({\n lemma: this.getLemma(lemmaIdx),\n pos: CODE_TO_POS[posCode] ?? (\"\" as WordClass),\n morph: Object.keys(morph).length > 0 ? morph : undefined,\n });\n }\n\n return result;\n }\n\n /**\n * Check if morphological features are available (version 2+).\n */\n hasMorphFeatures(): boolean {\n return this.version >= 2;\n }\n\n /**\n * Get the binary format version.\n */\n getVersion(): number {\n return this.version;\n }\n\n /**\n * Binary search for bigram. Returns index or -1.\n */\n private findBigram(word1: string, word2: string): number {\n let left = 0;\n let right = this.bigramCount - 1;\n\n while (left <= right) {\n const mid = (left + right) >>> 1;\n const midW1 = this.getString(\n this.bigramW1Offsets[mid],\n this.bigramW1Lengths[mid]\n );\n\n if (midW1 < word1) {\n left = mid + 1;\n } else if (midW1 > word1) {\n right = mid - 1;\n } else {\n // word1 matches, compare word2\n const midW2 = this.getString(\n this.bigramW2Offsets[mid],\n this.bigramW2Lengths[mid]\n );\n\n if (midW2 === word2) {\n return mid;\n }\n if (midW2 < word2) {\n left = mid + 1;\n } else {\n right = mid - 1;\n }\n }\n }\n\n return -1;\n }\n\n /**\n * Get bigram frequency.\n * @returns Frequency count, or 0 if not found\n */\n bigramFreq(word1: string, word2: string): number {\n const idx = this.findBigram(word1.toLowerCase(), word2.toLowerCase());\n return idx === -1 ? 0 : this.bigramFreqs[idx];\n }\n\n /**\n * Alias for bigramFreq to satisfy BigramProvider interface.\n * @returns Frequency count, or 0 if not found\n */\n freq(word1: string, word2: string): number {\n return this.bigramFreq(word1, word2);\n }\n\n /**\n * Check if a word is known to the lemmatizer.\n */\n isKnown(word: string): boolean {\n return this.findWord(word.toLowerCase()) !== -1;\n }\n\n /**\n * Get the total number of lemmas in the database.\n */\n get lemmaCountValue(): number {\n return this.lemmaCount;\n }\n\n /**\n * Get the total number of word forms.\n */\n get wordFormCount(): number {\n return this.wordCount;\n }\n\n /**\n * Get the total number of bigrams.\n */\n get bigramCountValue(): number {\n return this.bigramCount;\n }\n\n /**\n * Get raw buffer size (approximate memory usage).\n */\n get bufferSize(): number {\n return this.buffer.byteLength;\n }\n\n /**\n * Get all unique lemmas from the binary data.\n * Useful for compound splitting.\n */\n getAllLemmas(): string[] {\n const lemmas: string[] = [];\n for (let i = 0; i < this.lemmaCount; i++) {\n lemmas.push(this.getLemma(i));\n }\n return lemmas;\n }\n}\n","/**\n * Disambiguation rules for Icelandic.\n *\n * Based on GreynirEngine's Prefs.conf and linguistic patterns.\n * These rules help resolve ambiguous words by considering context.\n */\n\nimport type { WordClass } from \"./types.js\";\n\n/**\n * A disambiguation preference rule.\n *\n * When the word matches and the context condition is met,\n * prefer `prefer` POS over `over` POS.\n */\nexport interface DisambiguationRule {\n /** The ambiguous word (lowercase) */\n word: string;\n /** Preferred part of speech in this context */\n prefer: WordClass;\n /** Dispreferred part of speech */\n over: WordClass;\n /** Context condition for when to apply this rule */\n context: \"before_noun\" | \"before_verb\" | \"after_pronoun\" | \"sentence_start\" | \"any\";\n /** Optional description */\n description?: string;\n}\n\n/**\n * Disambiguation rules extracted from Greynir's patterns.\n *\n * Format: { word, prefer, over, context }\n *\n * Common patterns:\n * - \"á\" as preposition (fs) when before noun, as verb \"eiga\" (so) after pronoun\n * - \"við\" as preposition (fs) when before noun, as pronoun (fn) at sentence start\n */\nexport const DISAMBIGUATION_RULES: DisambiguationRule[] = [\n // \"á\" - one of the most ambiguous words\n // Preposition: \"á borðinu\", \"á Íslandi\"\n // Verb (eiga): \"Ég á bíl\", \"Hún á hest\"\n // Noun (river): \"við ána\"\n {\n word: \"á\",\n prefer: \"so\", // verb \"eiga\"\n over: \"fs\", // preposition\n context: \"after_pronoun\",\n description: \"á after pronoun = verb 'eiga' (I own, you own)\",\n },\n {\n word: \"á\",\n prefer: \"fs\", // preposition\n over: \"so\", // verb\n context: \"before_noun\",\n description: \"á before noun = preposition (on, at)\",\n },\n\n // \"við\" - preposition vs pronoun\n // Preposition: \"við gluggann\", \"við borðið\"\n // Pronoun: \"Við erum hér\" (we are here)\n {\n word: \"við\",\n prefer: \"fn\", // pronoun \"we\"\n over: \"fs\", // preposition\n context: \"sentence_start\",\n description: \"við at sentence start = pronoun 'we'\",\n },\n {\n word: \"við\",\n prefer: \"fs\", // preposition\n over: \"fn\", // pronoun\n context: \"before_noun\",\n description: \"við before noun = preposition 'by/at'\",\n },\n\n // \"af\" - preposition vs adverb\n {\n word: \"af\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"af before noun = preposition 'of/from'\",\n },\n\n // \"til\" - preposition\n {\n word: \"til\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"til before noun = preposition 'to'\",\n },\n\n // \"um\" - preposition vs adverb\n {\n word: \"um\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"um before noun = preposition 'about/around'\",\n },\n\n // \"yfir\" - preposition vs adverb\n {\n word: \"yfir\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"yfir before noun = preposition 'over'\",\n },\n\n // \"undir\" - preposition vs adverb\n {\n word: \"undir\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"undir before noun = preposition 'under'\",\n },\n\n // \"fyrir\" - preposition vs adverb\n {\n word: \"fyrir\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"fyrir before noun = preposition 'for/before'\",\n },\n\n // \"eftir\" - preposition vs adverb\n {\n word: \"eftir\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"eftir before noun = preposition 'after'\",\n },\n\n // \"frá\" - preposition\n {\n word: \"frá\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"frá before noun = preposition 'from'\",\n },\n\n // \"með\" - preposition vs adverb\n {\n word: \"með\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"með before noun = preposition 'with'\",\n },\n\n // \"í\" - preposition\n {\n word: \"í\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"í before noun = preposition 'in'\",\n },\n\n // \"úr\" - preposition vs noun (watch)\n {\n word: \"úr\",\n prefer: \"fs\",\n over: \"no\",\n context: \"before_noun\",\n description: \"úr before noun = preposition 'out of'\",\n },\n];\n\n/**\n * Look up rules that apply to a specific word.\n */\nexport function getRulesForWord(word: string): DisambiguationRule[] {\n const normalized = word.toLowerCase();\n return DISAMBIGUATION_RULES.filter((r) => r.word === normalized);\n}\n\n/**\n * Check if a word has disambiguation rules.\n */\nexport function hasDisambiguationRules(word: string): boolean {\n return DISAMBIGUATION_RULES.some((r) => r.word === word.toLowerCase());\n}\n","/**\n * Mini-grammar disambiguation rules for Icelandic.\n *\n * Uses case government (forsetningar stjórna falli) to disambiguate\n * prepositions from other parts of speech. For example:\n * - \"á\" + dative noun = preposition \"on/at\"\n * - \"á\" after pronoun = verb \"eiga\" (to own)\n *\n * Based on Greynir's Prepositions.conf but simplified for fast lookup.\n */\n\nimport type {\n GrammaticalCase,\n LemmaWithMorph,\n LemmaWithPOS,\n WordClass,\n} from \"./types.js\";\n\n/**\n * Interface for lemmatizer used in grammar rules.\n */\nexport interface GrammarLemmatizerLike {\n lemmatizeWithPOS?(word: string): LemmaWithPOS[];\n}\n\n/**\n * Preposition case government rules.\n *\n * Maps preposition lemma to the grammatical cases it governs.\n * When a preposition is followed by a noun in one of these cases,\n * we can be confident it's being used as a preposition.\n *\n * Source: Greynir's Prepositions.conf\n */\nexport const PREPOSITION_CASES: Map<string, Set<GrammaticalCase>> = new Map<string, Set<GrammaticalCase>>([\n // Both accusative and dative\n [\"á\", new Set<GrammaticalCase>([\"þf\", \"þgf\"])], // on/at (þf=direction, þgf=location)\n [\"í\", new Set<GrammaticalCase>([\"þf\", \"þgf\"])], // in (þf=into, þgf=inside)\n [\"við\", new Set<GrammaticalCase>([\"þf\", \"þgf\"])], // at/by (þf=against, þgf=near)\n [\"með\", new Set<GrammaticalCase>([\"þf\", \"þgf\"])], // with (þf=bring, þgf=accompany)\n [\"undir\", new Set<GrammaticalCase>([\"þf\", \"þgf\"])], // under (þf=motion, þgf=position)\n [\"yfir\", new Set<GrammaticalCase>([\"þf\", \"þgf\"])], // over (þf=motion, þgf=position)\n [\"fyrir\", new Set<GrammaticalCase>([\"þf\", \"þgf\"])], // for/before (þf=in exchange, þgf=in front)\n\n // Accusative only\n [\"um\", new Set<GrammaticalCase>([\"þf\"])], // about/around\n [\"gegnum\", new Set<GrammaticalCase>([\"þf\"])], // through\n [\"kringum\", new Set<GrammaticalCase>([\"þf\"])], // around\n [\"umhverfis\", new Set<GrammaticalCase>([\"þf\"])], // around/surrounding\n\n // Dative only\n [\"af\", new Set<GrammaticalCase>([\"þgf\"])], // of/from\n [\"frá\", new Set<GrammaticalCase>([\"þgf\"])], // from\n [\"hjá\", new Set<GrammaticalCase>([\"þgf\"])], // at/with (someone's place)\n [\"úr\", new Set<GrammaticalCase>([\"þgf\"])], // out of\n [\"að\", new Set<GrammaticalCase>([\"þgf\"])], // to/at\n [\"móti\", new Set<GrammaticalCase>([\"þgf\"])], // against\n [\"nálægt\", new Set<GrammaticalCase>([\"þgf\"])], // near\n [\"gegn\", new Set<GrammaticalCase>([\"þgf\"])], // against\n [\"gagnvart\", new Set<GrammaticalCase>([\"þgf\"])], // towards/regarding\n [\"handa\", new Set<GrammaticalCase>([\"þgf\"])], // for (someone)\n [\"meðal\", new Set<GrammaticalCase>([\"ef\"])], // among (actually genitive)\n\n // Genitive only\n [\"til\", new Set<GrammaticalCase>([\"ef\"])], // to\n [\"án\", new Set<GrammaticalCase>([\"ef\"])], // without\n [\"vegna\", new Set<GrammaticalCase>([\"ef\"])], // because of\n [\"sakir\", new Set<GrammaticalCase>([\"ef\"])], // because of\n [\"utan\", new Set<GrammaticalCase>([\"ef\"])], // outside\n [\"innan\", new Set<GrammaticalCase>([\"ef\"])], // inside\n [\"meðfram\", new Set<GrammaticalCase>([\"þgf\"])], // along\n [\"milli\", new Set<GrammaticalCase>([\"ef\"])], // between\n [\"auk\", new Set<GrammaticalCase>([\"ef\"])], // in addition to\n [\"í stað\", new Set<GrammaticalCase>([\"ef\"])], // instead of\n]);\n\n/**\n * Nominative-case pronouns that can precede verbs.\n * When one of these is followed by a potentially ambiguous word,\n * prefer the verb reading.\n */\nexport const NOMINATIVE_PRONOUNS = new Set([\n \"ég\",\n \"þú\",\n \"hann\",\n \"hún\",\n \"það\",\n \"við\",\n \"þið\",\n \"þeir\",\n \"þær\",\n \"þau\",\n]);\n\n/**\n * Result of applying a mini-grammar rule.\n */\nexport interface GrammarRuleMatch {\n /** The preferred lemma */\n lemma: string;\n /** The preferred POS */\n pos: WordClass;\n /** Rule that matched */\n rule: string;\n /** Confidence score (0-1) */\n confidence: number;\n}\n\n/**\n * Check if a preposition candidate can govern the case of the following word.\n *\n * @param prepLemma - The potential preposition lemma\n * @param nextWordMorph - Morphological features of the next word\n * @returns True if the preposition can govern this case\n */\nexport function canGovernCase(\n prepLemma: string,\n nextWordCase: GrammaticalCase | undefined\n): boolean {\n if (!nextWordCase) return false;\n const cases = PREPOSITION_CASES.get(prepLemma);\n return cases?.has(nextWordCase) ?? false;\n}\n\n/**\n * Apply preposition+case rule to disambiguate.\n *\n * If the current word can be a preposition and the next word has\n * a case governed by that preposition, prefer the preposition reading.\n *\n * @param candidates - All possible readings of the current word\n * @param nextWordMorph - Morphological analyses of the next word\n * @returns GrammarRuleMatch if a rule applies, null otherwise\n */\nexport function applyPrepositionRule(\n candidates: LemmaWithMorph[],\n nextWordMorph: LemmaWithMorph[]\n): GrammarRuleMatch | null {\n // Find preposition candidates\n const prepCandidates = candidates.filter((c) => c.pos === \"fs\");\n if (prepCandidates.length === 0) return null;\n\n // Check if any next word form has a case governed by any prep candidate\n for (const prep of prepCandidates) {\n for (const nextForm of nextWordMorph) {\n if (nextForm.morph?.case && canGovernCase(prep.lemma, nextForm.morph.case)) {\n return {\n lemma: prep.lemma,\n pos: \"fs\",\n rule: `prep+${nextForm.morph.case}`,\n confidence: 0.9,\n };\n }\n }\n }\n\n return null;\n}\n\n/**\n * Apply pronoun+verb rule to disambiguate.\n *\n * If the previous word is a nominative pronoun and the current word\n * can be a verb, prefer the verb reading.\n *\n * @param candidates - All possible readings of the current word\n * @param prevWord - The previous word (raw form)\n * @returns GrammarRuleMatch if a rule applies, null otherwise\n */\nexport function applyPronounVerbRule(\n candidates: LemmaWithMorph[],\n prevWord: string | null\n): GrammarRuleMatch | null {\n if (!prevWord) return null;\n\n const prevLower = prevWord.toLowerCase();\n if (!NOMINATIVE_PRONOUNS.has(prevLower)) return null;\n\n // Find verb candidates\n const verbCandidates = candidates.filter((c) => c.pos === \"so\");\n if (verbCandidates.length === 0) return null;\n\n // Prefer verb over preposition/noun when after pronoun\n const hasNonVerb = candidates.some((c) => c.pos !== \"so\");\n if (!hasNonVerb) return null;\n\n // Return the verb candidate (prefer eiga for \"á\")\n const eigaCandidate = verbCandidates.find((c) => c.lemma === \"eiga\");\n const verbCandidate = eigaCandidate ?? verbCandidates[0];\n\n return {\n lemma: verbCandidate.lemma,\n pos: \"so\",\n rule: \"pronoun+verb\",\n confidence: 0.85,\n };\n}\n\n/**\n * Apply noun-after-preposition rule to disambiguate.\n *\n * If the previous word is a preposition and the current word has a\n * noun candidate with a case governed by that preposition, prefer\n * the noun reading.\n *\n * This rule only applies when:\n * - The previous word is UNAMBIGUOUSLY a preposition (no pronoun reading), OR\n * - The current word has no verb candidate\n *\n * Example: \"til fundar\" → \"fundar\" is noun \"fundur\" (genitive), not verb \"funda\"\n * Counter-example: \"við fórum\" → \"við\" is pronoun, \"fórum\" is verb \"fara\"\n *\n * @param candidates - All possible readings of the current word\n * @param prevWord - The previous word (raw form)\n * @param lemmatizer - Lemmatizer for looking up the previous word\n * @returns GrammarRuleMatch if a rule applies, null otherwise\n */\nexport function applyNounAfterPrepositionRule(\n candidates: LemmaWithMorph[],\n prevWord: string | null,\n lemmatizer: GrammarLemmatizerLike | null\n): GrammarRuleMatch | null {\n if (!prevWord || !lemmatizer?.lemmatizeWithPOS) return null;\n\n // Check if previous word is a preposition\n const prevLemmas = lemmatizer.lemmatizeWithPOS(prevWord);\n const prepCandidate = prevLemmas.find((l) => l.pos === \"fs\");\n if (!prepCandidate) return null;\n\n // Check if the previous word could also be a pronoun\n const hasPronounReading = prevLemmas.some((l) => l.pos === \"fn\");\n\n // Check if current word has a verb candidate\n const hasVerbCandidate = candidates.some((c) => c.pos === \"so\");\n\n // If prevWord is ambiguously pronoun/preposition AND current word can be a verb,\n // don't apply this rule (let pronoun+verb rule or bigrams handle it)\n if (hasPronounReading && hasVerbCandidate) {\n return null;\n }\n\n // Get cases this preposition governs\n const governedCases = PREPOSITION_CASES.get(prepCandidate.lemma);\n if (!governedCases) return null;\n\n // Find noun candidate with matching case\n const nounCandidates = candidates.filter((c) => c.pos === \"no\");\n for (const noun of nounCandidates) {\n if (noun.morph?.case && governedCases.has(noun.morph.case)) {\n return {\n lemma: noun.lemma,\n pos: \"no\",\n rule: `noun_after_prep+${noun.morph.case}`,\n confidence: 0.9,\n };\n }\n }\n\n return null;\n}\n\n/**\n * Apply all mini-grammar rules in sequence.\n *\n * Rules are applied in order of specificity:\n * 1. Preposition + case government (most reliable)\n * 2. Noun after preposition (governed case)\n * 3. Pronoun + verb pattern\n *\n * @param candidates - All possible readings of the current word\n * @param prevWord - Previous word (raw form)\n * @param nextWordMorph - Morphological analyses of the next word\n * @param lemmatizer - Optional lemmatizer for looking up previous word POS\n * @returns GrammarRuleMatch if any rule applies, null otherwise\n */\nexport function applyGrammarRules(\n candidates: LemmaWithMorph[],\n prevWord: string | null,\n nextWordMorph: LemmaWithMorph[],\n lemmatizer: GrammarLemmatizerLike | null = null\n): GrammarRuleMatch | null {\n // Rule 1: Preposition + governed case\n const prepRule = applyPrepositionRule(candidates, nextWordMorph);\n if (prepRule) return prepRule;\n\n // Rule 2: Noun after preposition with governed case\n const nounAfterPrepRule = applyNounAfterPrepositionRule(candidates, prevWord, lemmatizer);\n if (nounAfterPrepRule) return nounAfterPrepRule;\n\n // Rule 3: Pronoun + verb\n const verbRule = applyPronounVerbRule(candidates, prevWord);\n if (verbRule) return verbRule;\n\n return null;\n}\n\n/**\n * Check if a word is a known preposition.\n */\nexport function isKnownPreposition(lemma: string): boolean {\n return PREPOSITION_CASES.has(lemma);\n}\n\n/**\n * Get the cases governed by a preposition.\n */\nexport function getGovernedCases(prepLemma: string): Set<GrammaticalCase> | undefined {\n return PREPOSITION_CASES.get(prepLemma);\n}\n","/**\n * Disambiguation algorithm using a multi-phase pipeline.\n *\n * When a word has multiple possible lemmas, use surrounding context\n * and linguistic rules to select the most likely one.\n *\n * Pipeline phases:\n * 1. Unambiguous - words with only one lemma candidate\n * 2. Phrase rules - multi-word expressions and fixed phrases\n * 3. Disambiguation rules - contextual preferences (e.g., \"á\" after pronoun = verb)\n * 4. Grammar rules - case government (preposition + case noun)\n * 5. Word bigrams - statistical scoring using bigram frequencies\n * 6. Fallback - use first lemma if no other evidence\n */\n\nimport { STOPWORDS_IS } from \"./stopwords.js\";\nimport type { LemmatizerLike, LemmaWithPOS, LemmaWithMorph, BigramProvider, WordClass } from \"./types.js\";\nimport { DISAMBIGUATION_RULES, type DisambiguationRule } from \"./disambiguation-rules.js\";\nimport { applyGrammarRules } from \"./mini-grammar.js\";\n\nexport interface DisambiguatorOptions {\n /** Weight for left context (previous word) */\n leftWeight?: number;\n /** Weight for right context (next word) */\n rightWeight?: number;\n /** Enable preference rules (e.g., \"á\" context rules) */\n usePreferenceRules?: boolean;\n /** Enable grammar rules (case government) */\n useGrammarRules?: boolean;\n}\n\nexport interface DisambiguatedToken {\n /** Original token */\n token: string;\n /** Chosen lemma */\n lemma: string;\n /** Part of speech (if available) */\n pos?: WordClass;\n /** All candidate lemmas */\n candidates: string[];\n /** Candidates with POS (if available) */\n candidatesWithPOS?: LemmaWithPOS[];\n /** Was disambiguation needed? */\n ambiguous: boolean;\n /** Confidence score (0-1) */\n confidence: number;\n /** Which phase resolved this token */\n resolvedBy?: string;\n}\n\n/**\n * Extended lemmatizer interface that supports morphological lookup.\n */\ninterface MorphLemmatizerLike extends LemmatizerLike {\n lemmatizeWithMorph?(word: string): LemmaWithMorph[];\n}\n\n/**\n * Context for disambiguation, including surrounding tokens.\n */\ninterface DisambiguationContext {\n /** Previous word (if any) */\n prevWord: string | null;\n /** Next word (if any) */\n nextWord: string | null;\n /** Previous token's lemmas (if available) */\n prevLemmas?: string[];\n /** Next token's lemmas (if available) */\n nextLemmas?: string[];\n /** Next word's morphological analyses (if available) */\n nextWordMorph?: LemmaWithMorph[];\n /** All tokens in the sequence */\n allTokens: string[];\n /** Current index in the sequence */\n index: number;\n}\n\nexport interface DisambiguationContextHint {\n prevLemmas?: string[];\n nextLemmas?: string[];\n}\n\n/**\n * A disambiguation phase that processes candidates.\n */\ninterface DisambiguationPhase {\n name: string;\n run(\n candidates: LemmaWithPOS[],\n context: DisambiguationContext,\n disambiguator: Disambiguator\n ): { lemma: string; pos?: WordClass; confidence: number } | null;\n}\n\n/**\n * Phase 1: Handle unambiguous cases (single candidate).\n */\nconst unambiguousPhase: DisambiguationPhase = {\n name: \"unambiguous\",\n run(candidates) {\n if (candidates.length === 1) {\n return {\n lemma: candidates[0].lemma,\n pos: candidates[0].pos,\n confidence: 1.0,\n };\n }\n return null;\n },\n};\n\n/**\n * Phase 2: Apply disambiguation rules based on context.\n */\nconst preferenceRulesPhase: DisambiguationPhase = {\n name: \"preference_rules\",\n run(candidates, context, disambiguator) {\n if (!disambiguator.usePreferenceRules) return null;\n\n for (const rule of DISAMBIGUATION_RULES) {\n const match = applyRule(rule, candidates, context);\n if (match) {\n return {\n lemma: match.lemma,\n pos: match.pos,\n confidence: 0.85,\n };\n }\n }\n return null;\n },\n};\n\n/**\n * Apply a single disambiguation rule.\n */\nfunction applyRule(\n rule: DisambiguationRule,\n candidates: LemmaWithPOS[],\n context: DisambiguationContext\n): LemmaWithPOS | null {\n // Find candidates matching the word and preferred POS\n const preferredCandidate = candidates.find(\n (c) => c.lemma.toLowerCase() === rule.word.toLowerCase() && c.pos === rule.prefer\n );\n const dispreferred = candidates.find(\n (c) => c.lemma.toLowerCase() === rule.word.toLowerCase() && c.pos === rule.over\n );\n\n if (!preferredCandidate || !dispreferred) {\n return null;\n }\n\n // Check context condition\n if (rule.context === \"before_noun\") {\n // Next word should be a noun (starts with uppercase or known noun)\n const next = context.nextWord;\n if (next && /^[A-ZÁÉÍÓÚÝÞÆÖ]/.test(next)) {\n return preferredCandidate;\n }\n } else if (rule.context === \"before_verb\") {\n // Next word suggests a verb context (harder to detect without POS)\n // Simple heuristic: if next word is lowercase and not a common noun determiner\n const next = context.nextWord?.toLowerCase();\n if (next && ![\"þessi\", \"þetta\", \"sá\", \"sú\", \"það\", \"hinn\", \"hin\", \"hið\"].includes(next)) {\n return preferredCandidate;\n }\n } else if (rule.context === \"after_pronoun\") {\n // Previous word is a pronoun\n const prev = context.prevWord?.toLowerCase();\n const pronouns = [\"ég\", \"þú\", \"hann\", \"hún\", \"það\", \"við\", \"þið\", \"þeir\", \"þær\", \"þau\"];\n if (prev && pronouns.includes(prev)) {\n return preferredCandidate;\n }\n }\n\n return null;\n}\n\n/**\n * Phase 3: Apply grammar rules (case government).\n *\n * Uses morphological features to apply preposition+case and pronoun+verb rules.\n */\nconst grammarRulesPhase: DisambiguationPhase = {\n name: \"grammar_rules\",\n run(candidates, context, disambiguator) {\n if (!disambiguator.useGrammarRules) return null;\n\n // Convert LemmaWithPOS to LemmaWithMorph if needed\n const candidatesWithMorph: LemmaWithMorph[] = candidates.map((c) => ({\n ...c,\n morph: undefined,\n }));\n\n // Get morphological info for candidates if available\n const currentWord = context.allTokens[context.index];\n if (currentWord) {\n const morphCandidates = disambiguator.getMorph(currentWord);\n if (morphCandidates) {\n // Replace with morph-enriched candidates\n candidatesWithMorph.length = 0;\n candidatesWithMorph.push(...morphCandidates);\n }\n }\n\n // Apply grammar rules\n const result = applyGrammarRules(\n candidatesWithMorph,\n context.prevWord,\n context.nextWordMorph ?? [],\n disambiguator.lemmatizer\n );\n\n if (result) {\n return {\n lemma: result.lemma,\n pos: result.pos,\n confidence: result.confidence,\n };\n }\n\n return null;\n },\n};\n\n/**\n * Phase 4: Score using bigram frequencies.\n */\nconst bigramPhase: DisambiguationPhase = {\n name: \"word_bigrams\",\n run(candidates, context, disambiguator) {\n if (!disambiguator.bigrams) return null;\n if (candidates.length === 0) return null;\n\n const scores: { candidate: LemmaWithPOS; score: number }[] = [];\n\n for (const candidate of candidates) {\n let score = 0;\n\n // Left context: bigram(prevWord, lemma)\n if (context.prevWord) {\n const prevLemmas = context.prevLemmas || disambiguator.lemmatizer.lemmatize(context.prevWord);\n for (const prevLemma of prevLemmas) {\n const freq = disambiguator.bigrams.freq(prevLemma, candidate.lemma);\n if (freq > 0) {\n score += Math.log(freq + 1) * disambiguator.leftWeight;\n }\n }\n }\n\n // Right context: bigram(lemma, nextWord)\n if (context.nextWord) {\n const nextLemmas = context.nextLemmas || disambiguator.lemmatizer.lemmatize(context.nextWord);\n for (const nextLemma of nextLemmas) {\n const freq = disambiguator.bigrams.freq(candidate.lemma, nextLemma);\n if (freq > 0) {\n score += Math.log(freq + 1) * disambiguator.rightWeight;\n }\n }\n }\n\n scores.push({ candidate, score });\n }\n\n // Sort by score\n scores.sort((a, b) => b.score - a.score);\n\n // Check if we have scores and if top score is positive\n if (scores.length > 0 && scores[0].score > 0) {\n const topScore = scores[0].score;\n const totalScore = scores.reduce((sum, s) => sum + Math.exp(s.score), 0);\n const confidence = totalScore > 0 ? Math.exp(topScore) / totalScore : 0.5;\n\n return {\n lemma: scores[0].candidate.lemma,\n pos: scores[0].candidate.pos,\n confidence,\n };\n }\n\n return null;\n },\n};\n\n/**\n * Phase 5: Fallback to first candidate.\n */\nconst fallbackPhase: DisambiguationPhase = {\n name: \"fallback\",\n run(candidates) {\n if (candidates.length > 0) {\n return {\n lemma: candidates[0].lemma,\n pos: candidates[0].pos,\n confidence: 1 / candidates.length,\n };\n }\n return null;\n },\n};\n\n/**\n * All disambiguation phases in order.\n */\nconst PHASES: DisambiguationPhase[] = [\n unambiguousPhase,\n preferenceRulesPhase,\n grammarRulesPhase,\n bigramPhase,\n fallbackPhase,\n];\n\n/**\n * Disambiguate lemmas using a multi-phase pipeline.\n */\nexport class Disambiguator {\n lemmatizer: MorphLemmatizerLike;\n bigrams: BigramProvider | null;\n leftWeight: number;\n rightWeight: number;\n usePreferenceRules: boolean;\n useGrammarRules: boolean;\n private morphCache: Map<string, LemmaWithMorph[]> | null;\n\n constructor(\n lemmatizer: LemmatizerLike,\n bigrams: BigramProvider | null = null,\n options: DisambiguatorOptions = {}\n ) {\n this.lemmatizer = lemmatizer as MorphLemmatizerLike;\n this.bigrams = bigrams;\n this.leftWeight = options.leftWeight ?? 1.0;\n this.rightWeight = options.rightWeight ?? 1.0;\n this.usePreferenceRules = options.usePreferenceRules ?? true;\n this.useGrammarRules = options.useGrammarRules ?? true;\n this.morphCache = this.lemmatizer.lemmatizeWithMorph ? new Map() : null;\n }\n\n private getMorph(word: string): LemmaWithMorph[] | undefined {\n if (!this.lemmatizer.lemmatizeWithMorph || !this.morphCache) return undefined;\n const key = word.toLowerCase();\n const cached = this.morphCache.get(key);\n if (cached) return cached;\n const morph = this.lemmatizer.lemmatizeWithMorph(word);\n this.morphCache.set(key, morph);\n return morph;\n }\n\n /**\n * Disambiguate a single word given context.\n *\n * @param word - The word to lemmatize\n * @param prevWord - Previous word (left context), or null\n * @param nextWord - Next word (right context), or null\n */\n disambiguate(\n word: string,\n prevWord: string | null,\n nextWord: string | null,\n hint: DisambiguationContextHint = {}\n ): DisambiguatedToken {\n // Get candidates with POS if available\n let candidatesWithPOS: LemmaWithPOS[];\n if (this.lemmatizer.lemmatizeWithPOS) {\n candidatesWithPOS = this.lemmatizer.lemmatizeWithPOS(word);\n } else {\n // Fall back to plain lemmatization\n const lemmas = this.lemmatizer.lemmatize(word);\n candidatesWithPOS = lemmas.map((l) => ({ lemma: l, pos: \"no\" as WordClass }));\n }\n\n const candidates = candidatesWithPOS.map((c) => c.lemma);\n const token = word;\n\n // Get morphological info for next word if available\n let nextWordMorph: LemmaWithMorph[] | undefined;\n if (nextWord) {\n nextWordMorph = this.getMorph(nextWord);\n }\n\n // Build context\n const context: DisambiguationContext = {\n prevWord,\n nextWord,\n prevLemmas: hint.prevLemmas,\n nextLemmas: hint.nextLemmas,\n nextWordMorph,\n allTokens: [word],\n index: 0,\n };\n\n // Run through phases\n for (const phase of PHASES) {\n const result = phase.run(candidatesWithPOS, context, this);\n if (result) {\n return {\n token,\n lemma: result.lemma,\n pos: result.pos,\n candidates,\n candidatesWithPOS,\n ambiguous: candidates.length > 1,\n confidence: result.confidence,\n resolvedBy: phase.name,\n };\n }\n }\n\n // Should never reach here due to fallback phase\n return {\n token,\n lemma: word.toLowerCase(),\n candidates,\n candidatesWithPOS,\n ambiguous: false,\n confidence: 0,\n resolvedBy: \"none\",\n };\n }\n\n /**\n * Disambiguate an array of tokens.\n *\n * @param tokens - Array of word tokens\n * @returns Array of disambiguated tokens\n */\n disambiguateAll(tokens: string[]): DisambiguatedToken[] {\n const results: DisambiguatedToken[] = [];\n\n for (let i = 0; i < tokens.length; i++) {\n const word = tokens[i];\n const prevWord = i > 0 ? tokens[i - 1] : null;\n const nextWord = i < tokens.length - 1 ? tokens[i + 1] : null;\n\n results.push(this.disambiguate(word, prevWord, nextWord));\n }\n\n return results;\n }\n\n /**\n * Extract unique lemmas from text with disambiguation.\n *\n * @param tokens - Array of word tokens\n * @returns Set of unique lemmas (best guess for each ambiguous word)\n */\n extractLemmas(tokens: string[]): Set<string> {\n const lemmas = new Set<string>();\n const disambiguated = this.disambiguateAll(tokens);\n\n for (const result of disambiguated) {\n lemmas.add(result.lemma);\n }\n\n return lemmas;\n }\n}\n\n/**\n * Shortcut for simple lemma extraction with disambiguation.\n */\nexport function extractDisambiguatedLemmas(\n text: string,\n lemmatizer: LemmatizerLike,\n bigrams: BigramProvider,\n options: {\n tokenize?: (text: string) => string[];\n removeStopwords?: boolean;\n } = {}\n): Set<string> {\n const { tokenize, removeStopwords } = options;\n\n // Tokenize\n const tokens = tokenize\n ? tokenize(text)\n : text\n .split(/\\s+/)\n .filter((t) => t.length > 0)\n .map((t) => t.replace(/^[^\\p{L}\\p{N}]+|[^\\p{L}\\p{N}]+$/gu, \"\"))\n .filter((t) => t.length > 0);\n\n // Disambiguate\n const disambiguator = new Disambiguator(lemmatizer, bigrams);\n const lemmas = disambiguator.extractLemmas(tokens);\n\n // Filter stopwords if requested\n if (removeStopwords) {\n for (const lemma of lemmas) {\n if (STOPWORDS_IS.has(lemma)) {\n lemmas.delete(lemma);\n }\n }\n }\n\n return lemmas;\n}\n","/**\n * Shared type definitions to avoid circular imports.\n */\n\n/**\n * Word class (part-of-speech) codes from BÍN.\n *\n * These are simplified from BÍN's detailed categories:\n * - kk/kvk/hk (gendered nouns) → 'no'\n * - pfn (personal pronoun) → 'fn'\n */\nexport type WordClass =\n | \"no\" // nafnorð (noun)\n | \"so\" // sagnorð (verb)\n | \"lo\" // lýsingarorð (adjective)\n | \"ao\" // atviksorð (adverb)\n | \"fs\" // forsetning (preposition)\n | \"fn\" // fornafn (pronoun)\n | \"st\" // samtenging (conjunction)\n | \"to\" // töluorð (numeral)\n | \"gr\" // greinir (article)\n | \"uh\"; // upphrópun (interjection)\n\n/**\n * Human-readable names for word classes.\n */\nexport const WORD_CLASS_NAMES: Record<WordClass, string> = {\n no: \"noun\",\n so: \"verb\",\n lo: \"adjective\",\n ao: \"adverb\",\n fs: \"preposition\",\n fn: \"pronoun\",\n st: \"conjunction\",\n to: \"numeral\",\n gr: \"article\",\n uh: \"interjection\",\n};\n\n/**\n * Icelandic names for word classes.\n */\nexport const WORD_CLASS_NAMES_IS: Record<WordClass, string> = {\n no: \"nafnorð\",\n so: \"sagnorð\",\n lo: \"lýsingarorð\",\n ao: \"atviksorð\",\n fs: \"forsetning\",\n fn: \"fornafn\",\n st: \"samtenging\",\n to: \"töluorð\",\n gr: \"greinir\",\n uh: \"upphrópun\",\n};\n\n/**\n * Grammatical case (fall) in Icelandic.\n */\nexport type GrammaticalCase = \"nf\" | \"þf\" | \"þgf\" | \"ef\";\n\n/**\n * Grammatical gender (kyn) in Icelandic.\n */\nexport type GrammaticalGender = \"kk\" | \"kvk\" | \"hk\";\n\n/**\n * Grammatical number (tala) in Icelandic.\n */\nexport type GrammaticalNumber = \"et\" | \"ft\";\n\n/**\n * Human-readable names for cases.\n */\nexport const CASE_NAMES: Record<GrammaticalCase, string> = {\n nf: \"nominative\",\n þf: \"accusative\",\n þgf: \"dative\",\n ef: \"genitive\",\n};\n\n/**\n * Human-readable names for genders.\n */\nexport const GENDER_NAMES: Record<GrammaticalGender, string> = {\n kk: \"masculine\",\n kvk: \"feminine\",\n hk: \"neuter\",\n};\n\n/**\n * Human-readable names for numbers.\n */\nexport const NUMBER_NAMES: Record<GrammaticalNumber, string> = {\n et: \"singular\",\n ft: \"plural\",\n};\n\n/**\n * Morphological features extracted from BÍN.\n */\nexport interface MorphFeatures {\n case?: GrammaticalCase;\n gender?: GrammaticalGender;\n number?: GrammaticalNumber;\n}\n\n/**\n * A lemma with its word class.\n */\nexport interface LemmaWithPOS {\n lemma: string;\n pos: WordClass;\n}\n\n/**\n * A lemma with word class and morphological features.\n */\nexport interface LemmaWithMorph extends LemmaWithPOS {\n morph?: MorphFeatures;\n}\n\n/**\n * Interface for lemmatizer-like objects.\n * Used to avoid circular dependency between modules.\n */\nexport interface LemmatizerLike {\n lemmatize(word: string): string[];\n lemmatizeWithPOS?(word: string): LemmaWithPOS[];\n}\n\n/**\n * Interface for bigram frequency lookup.\n * Used for disambiguation scoring.\n */\nexport interface BigramProvider {\n freq(word1: string, word2: string): number;\n}\n","/**\n * Minimal Bloom filter for compact set membership checks.\n */\n\nexport interface BloomFilterOptions {\n falsePositiveRate?: number;\n maxHashFunctions?: number;\n}\n\nexport class BloomFilter {\n private bits: Uint8Array;\n private sizeBits: number;\n private hashCount: number;\n\n private constructor(bits: Uint8Array, sizeBits: number, hashCount: number) {\n this.bits = bits;\n this.sizeBits = sizeBits;\n this.hashCount = hashCount;\n }\n\n static fromValues(values: string[], options: BloomFilterOptions = {}): BloomFilter {\n const n = Math.max(values.length, 1);\n const p = options.falsePositiveRate ?? 0.01;\n\n const m = Math.max(1, Math.ceil((-n * Math.log(p)) / (Math.LN2 * Math.LN2)));\n const k = Math.max(1, Math.round((m / n) * Math.LN2));\n const hashCount = options.maxHashFunctions\n ? Math.min(k, options.maxHashFunctions)\n : k;\n\n const bytes = Math.ceil(m / 8);\n const bits = new Uint8Array(bytes);\n const filter = new BloomFilter(bits, m, hashCount);\n\n for (const value of values) {\n filter.add(value);\n }\n\n return filter;\n }\n\n add(value: string): void {\n const [h1, h2] = this.hashes(value);\n for (let i = 0; i < this.hashCount; i++) {\n const combined = (h1 + i * h2) % this.sizeBits;\n this.setBit(combined);\n }\n }\n\n has(value: string): boolean {\n const [h1, h2] = this.hashes(value);\n for (let i = 0; i < this.hashCount; i++) {\n const combined = (h1 + i * h2) % this.sizeBits;\n if (!this.getBit(combined)) return false;\n }\n return true;\n }\n\n private setBit(index: number): void {\n const byteIndex = index >>> 3;\n const bit = index & 7;\n this.bits[byteIndex] |= 1 << bit;\n }\n\n private getBit(index: number): boolean {\n const byteIndex = index >>> 3;\n const bit = index & 7;\n return (this.bits[byteIndex] & (1 << bit)) !== 0;\n }\n\n private hashes(value: string): [number, number] {\n const str = value.toLowerCase();\n let hash1 = 2166136261 >>> 0;\n let hash2 = 2166136261 >>> 0;\n\n for (let i = 0; i < str.length; i++) {\n const code = str.charCodeAt(i);\n hash1 ^= code;\n hash1 = Math.imul(hash1, 16777619) >>> 0;\n\n hash2 ^= code;\n hash2 = Math.imul(hash2, 2166136261) >>> 0;\n }\n\n hash2 ^= hash2 >>> 13;\n hash2 = Math.imul(hash2, 0x85ebca6b) >>> 0;\n hash2 ^= hash2 >>> 16;\n\n return [hash1 >>> 0, hash2 >>> 0 || 0x27d4eb2d];\n }\n}\n","/**\n * Compound word splitting for Icelandic.\n *\n * Icelandic compounds are written as single words:\n * - \"bílstjóri\" = \"bíl\" (car) + \"stjóri\" (driver)\n * - \"sjúkrahús\" = \"sjúkra\" (sick-GEN) + \"hús\" (house)\n *\n * Strategy:\n * 1. Try splitting at each position\n * 2. Check if both parts are known words\n * 3. Handle common compound linking letters (s, u, a)\n * 4. Score by part lengths (prefer balanced splits)\n */\n\nimport type { LemmatizerLike } from \"./types.js\";\nimport { BloomFilter, type BloomFilterOptions } from \"./bloom.js\";\n\n/**\n * Protected lemmas that should NEVER be split as compounds.\n * Mostly place names that happen to end in common word parts.\n */\nexport const PROTECTED_LEMMAS = new Set([\n // Countries ending in -land\n \"ísland\",\n \"england\",\n \"írland\",\n \"skotland\",\n \"finnland\",\n \"grænland\",\n \"holland\",\n \"þýskaland\",\n \"frakkland\",\n \"pólland\",\n \"tékkland\",\n \"svissland\",\n \"rússland\",\n \"eistland\",\n \"lettland\",\n \"litháen\",\n // Other countries/regions\n \"danmörk\",\n \"noregur\",\n \"svíþjóð\",\n \"bandaríkin\",\n \"spánn\",\n \"portúgal\",\n \"ítalía\",\n \"grikkland\",\n // Icelandic place names (from BÍN)\n \"þingvellir\",\n \"akureyri\",\n \"ísafjörður\",\n \"reykjavík\",\n \"keflavík\",\n \"hafnarfjörður\",\n \"kópavogur\",\n \"seltjarnarnes\",\n \"garðabær\",\n \"mosfellsbær\",\n \"vestmannaeyjar\",\n \"húsavík\",\n \"sauðárkrókur\",\n \"siglufjörður\",\n \"ólafsfjörður\",\n \"dalvík\",\n \"egilsstaðir\",\n \"neskaupstaður\",\n \"seyðisfjörður\",\n \"eskifjörður\",\n \"reyðarfjörður\",\n \"fáskrúðsfjörður\",\n \"stöðvarfjörður\",\n \"djúpivogur\",\n \"höfn\",\n \"vík\",\n \"selfoss\",\n \"hveragerði\",\n \"þorlákshöfn\",\n \"grindavík\",\n \"sandgerði\",\n \"borgarnes\",\n \"stykkishólmur\",\n \"grundarfjörður\",\n \"ólafsvík\",\n \"búðardalur\",\n \"patreksfjörður\",\n \"flateyri\",\n \"suðureyri\",\n \"bolungarvík\",\n \"hólmavík\",\n \"hvammstangi\",\n \"blönduós\",\n \"skagaströnd\",\n \"varmahlíð\",\n // Literary/historical places\n \"hlíðarendi\",\n \"bergþórshvol\",\n // Company names\n \"íslandsbanki\",\n \"landsbankinn\",\n \"arionbanki\",\n // Institutions\n \"alþingi\",\n]);\n\nexport interface CompoundSplit {\n /** Original word */\n word: string;\n /** Constituent parts (lemmatized) - all variants for indexing */\n parts: string[];\n /** All index terms: parts + original word */\n indexTerms: string[];\n /** Split confidence (0-1) */\n confidence: number;\n /** Is this a compound? */\n isCompound: boolean;\n}\n\n/**\n * Splitting mode for compound words.\n *\n * - \"aggressive\": Try to split all words, even known BÍN entries\n * - \"balanced\": Split unknown words; split known words only if high confidence\n * - \"conservative\": Only split at hyphens or very high confidence cases\n */\nexport type CompoundSplitMode = \"aggressive\" | \"balanced\" | \"conservative\";\n\nexport interface CompoundSplitterOptions {\n /**\n * Minimum part length.\n * Default: 3. Set to 2 for more aggressive splitting (e.g., \"ís\" in \"ísland\").\n */\n minPartLength?: number;\n /** Try removing linking letters (s, u, a) */\n tryLinkingLetters?: boolean;\n /**\n * Splitting mode.\n * Default: \"balanced\"\n */\n mode?: CompoundSplitMode;\n}\n\n/**\n * Common compound tail words in Icelandic.\n * These are often the second part of compounds and boost split confidence.\n */\nconst COMMON_COMPOUND_TAILS = new Set([\n // People/roles\n \"maður\",\n \"kona\",\n \"stjóri\",\n \"ráðherra\",\n \"forseti\",\n \"formaður\",\n \"fulltrúi\",\n \"starfsmaður\",\n // Places\n \"hús\",\n \"staður\",\n \"vegur\",\n \"borg\",\n \"bær\",\n \"dalur\",\n \"fjörður\",\n // Organizations\n \"félag\",\n \"banki\",\n \"sjóður\",\n \"stofnun\",\n \"ráð\",\n // Things/concepts\n \"rannsókn\",\n \"greiðsla\",\n \"mál\",\n \"kerfi\",\n \"verk\",\n \"þjónusta\",\n \"rekstur\",\n \"viðskipti\",\n \"verð\",\n \"kostnaður\",\n]);\n\n/**\n * Very common standalone words that should rarely be compound parts.\n * Penalize splits where BOTH parts are common standalone words.\n */\nconst COMMON_STANDALONE = new Set([\n \"vera\",\n \"hafa\",\n \"gera\",\n \"fara\",\n \"koma\",\n \"segja\",\n \"vilja\",\n \"mega\",\n \"þurfa\",\n \"verða\",\n \"geta\",\n \"sjá\",\n \"taka\",\n \"eiga\",\n \"láta\",\n \"halda\",\n \"leyfa\",\n \"búa\",\n]);\n\n/**\n * Common compound linking patterns in Icelandic.\n * These letters often join compound parts:\n * - \"s\" (genitive): húss + eigandi -> \"húseigandi\"\n * - \"u\" (genitive/linking): vatnu + fall -> \"vatnufall\" (rare)\n * - \"a\" (genitive): daga + blað -> \"dagablað\"\n */\nconst LINKING_PATTERNS = [\"s\", \"u\", \"a\"];\n\nexport class CompoundSplitter {\n private lemmatizer: LemmatizerLike;\n private minPartLength: number;\n private tryLinkingLetters: boolean;\n private knownLemmas: KnownLemmaLookup;\n private mode: CompoundSplitMode;\n\n constructor(\n lemmatizer: LemmatizerLike,\n knownLemmas: KnownLemmaLookup,\n options: CompoundSplitterOptions = {}\n ) {\n this.lemmatizer = lemmatizer;\n this.knownLemmas = knownLemmas;\n this.minPartLength = options.minPartLength ?? 3;\n this.tryLinkingLetters = options.tryLinkingLetters ?? true;\n this.mode = options.mode ?? \"balanced\";\n }\n\n /**\n * Helper to create a no-split result.\n */\n private noSplit(word: string, lemmas: string[]): CompoundSplit {\n return {\n word,\n parts: lemmas,\n indexTerms: lemmas,\n confidence: 0,\n isCompound: false,\n };\n }\n\n /**\n * Try to split a word into compound parts.\n *\n * Uses a lookup-first strategy:\n * 1. Check protected lemmas - never split\n * 2. Check if word is known in BÍN and unambiguous - don't split\n * 3. Apply mode-based splitting rules\n */\n split(word: string): CompoundSplit {\n const normalized = word.toLowerCase();\n\n // Step 1: Check protected lemmas - never split these\n const directLemmas = this.lemmatizer.lemmatize(word);\n const primaryLemma = directLemmas[0]?.toLowerCase();\n if (primaryLemma && PROTECTED_LEMMAS.has(primaryLemma)) {\n return this.noSplit(word, directLemmas);\n }\n\n // Also check if the word itself is protected (for inflected forms)\n if (PROTECTED_LEMMAS.has(normalized)) {\n return this.noSplit(word, directLemmas);\n }\n\n // Step 2: Check if known in BÍN and unambiguous\n // A word is \"known\" if lemmatization returned something other than the word itself\n const isKnownWord =\n directLemmas.length > 0 && directLemmas[0].toLowerCase() !== normalized;\n const isUnambiguous = directLemmas.length === 1;\n\n // For conservative mode, only split at hyphens\n if (this.mode === \"conservative\") {\n if (word.includes(\"-\")) {\n return this.splitAtHyphen(word, directLemmas);\n }\n return this.noSplit(word, directLemmas);\n }\n\n // For balanced mode, don't split unambiguous known words\n if (this.mode === \"balanced\" && isKnownWord && isUnambiguous) {\n // Exception: still try if the word is very long (likely a compound)\n if (normalized.length < 12) {\n return this.noSplit(word, directLemmas);\n }\n }\n\n // Too short to be a compound\n if (normalized.length < this.minPartLength * 2) {\n return this.noSplit(word, directLemmas);\n }\n\n // Step 3: Try algorithmic splitting\n const candidates: {\n leftParts: string[];\n rightParts: string[];\n score: number;\n }[] = [];\n\n for (\n let i = this.minPartLength;\n i <= normalized.length - this.minPartLength;\n i++\n ) {\n const leftPart = normalized.slice(0, i);\n const rightPart = normalized.slice(i);\n\n // Try direct split\n const directResult = this.trySplit(leftPart, rightPart);\n if (directResult) {\n candidates.push(directResult);\n }\n\n // Try with linking letters removed from split point\n if (this.tryLinkingLetters) {\n for (const linker of LINKING_PATTERNS) {\n // Remove linking letter from end of left part\n if (leftPart.endsWith(linker) && leftPart.length > this.minPartLength) {\n const trimmedLeft = leftPart.slice(0, -1);\n const result = this.trySplit(trimmedLeft, rightPart);\n if (result) {\n // Slightly lower score for linked compounds\n candidates.push({ ...result, score: result.score * 0.95 });\n }\n }\n }\n }\n }\n\n if (candidates.length === 0) {\n return this.noSplit(word, directLemmas);\n }\n\n // Pick best candidate by score\n candidates.sort((a, b) => b.score - a.score);\n const best = candidates[0];\n\n // In balanced mode, require higher confidence for known words\n if (this.mode === \"balanced\" && isKnownWord && best.score < 0.6) {\n return this.noSplit(word, directLemmas);\n }\n\n // Collect all unique parts from best split\n const parts = [...new Set([...best.leftParts, ...best.rightParts])];\n // Index terms include parts + original word for search\n const indexTerms = [...new Set([...parts, normalized])];\n\n return {\n word,\n parts,\n indexTerms,\n confidence: Math.min(best.score, 1),\n isCompound: true,\n };\n }\n\n /**\n * Split a hyphenated word.\n */\n private splitAtHyphen(word: string, directLemmas: string[]): CompoundSplit {\n const parts = word.split(\"-\").filter((p) => p.length > 0);\n if (parts.length < 2) {\n return this.noSplit(word, directLemmas);\n }\n\n const allParts: string[] = [];\n for (const part of parts) {\n const lemmas = this.lemmatizer.lemmatize(part);\n allParts.push(...lemmas);\n }\n\n const uniqueParts = [...new Set(allParts)];\n const indexTerms = [...new Set([...uniqueParts, word.toLowerCase()])];\n\n return {\n word,\n parts: uniqueParts,\n indexTerms,\n confidence: 0.9,\n isCompound: true,\n };\n }\n\n private trySplit(\n leftPart: string,\n rightPart: string\n ): { leftParts: string[]; rightParts: string[]; score: number } | null {\n // Get lemmas for both parts\n const leftLemmas = this.lemmatizer.lemmatize(leftPart);\n const rightLemmas = this.lemmatizer.lemmatize(rightPart);\n\n // Filter to known lemmas only, deduplicated\n const leftKnown = [...new Set(leftLemmas.filter((l) => this.knownLemmas.has(l)))];\n const rightKnown = [...new Set(rightLemmas.filter((l) => this.knownLemmas.has(l)))];\n\n if (leftKnown.length === 0 || rightKnown.length === 0) {\n return null;\n }\n\n // Calculate score with multiple factors\n let score = 0;\n\n // Factor 1: Length balance (20% weight)\n // Prefer balanced splits, but not too strictly\n const lengthBalance =\n 1 - Math.abs(leftPart.length - rightPart.length) / (leftPart.length + rightPart.length);\n score += lengthBalance * 0.2;\n\n // Factor 2: Part length bonus (20% weight)\n // Prefer longer parts (more likely to be real words)\n const avgLength = (leftPart.length + rightPart.length) / 2;\n const lengthBonus = Math.min(avgLength / 6, 1);\n score += lengthBonus * 0.2;\n\n // Factor 3: Common compound tail bonus (30% weight)\n // Strongly prefer splits where right part is a known compound tail\n const hasCompoundTail = rightKnown.some((lemma) => COMMON_COMPOUND_TAILS.has(lemma));\n if (hasCompoundTail) {\n score += 0.3;\n }\n\n // Factor 4: Penalty for both parts being common standalone words (30% weight)\n // E.g., \"ísland\" -> \"ís\" + \"land\" should be penalized\n const leftIsCommon = leftKnown.some((lemma) => COMMON_STANDALONE.has(lemma));\n const rightIsCommon = rightKnown.some((lemma) => COMMON_STANDALONE.has(lemma));\n if (leftIsCommon && rightIsCommon) {\n // Strong penalty if both parts are very common standalone\n score -= 0.3;\n } else if (!leftIsCommon && !rightIsCommon) {\n // Bonus if neither is a common standalone (more likely a real compound)\n score += 0.2;\n }\n\n // Factor 5: Minimum part length requirement\n // Very short parts (2-3 chars) get a penalty\n if (leftPart.length < 4 || rightPart.length < 4) {\n score -= 0.15;\n }\n\n // Return all known lemmas from both parts\n return {\n leftParts: leftKnown,\n rightParts: rightKnown,\n score: Math.max(0, score), // Ensure non-negative\n };\n }\n\n /**\n * Get all lemmas for a word, including compound parts.\n * Useful for search indexing.\n */\n getAllLemmas(word: string): string[] {\n const split = this.split(word);\n return split.indexTerms;\n }\n}\n\n/**\n * Create a set of known lemmas from the lemmatizer.\n * This is used to check if compound parts are valid words.\n */\nexport function createKnownLemmaSet(lemmas: string[]): Set<string> {\n return new Set(lemmas.map((l) => l.toLowerCase()));\n}\n\nexport interface KnownLemmaLookup {\n has(lemma: string): boolean;\n}\n\nexport interface KnownLemmaFilterOptions extends BloomFilterOptions {}\n\n/**\n * Create a compact lookup for known lemmas using a Bloom filter.\n * False positives are possible (more splits), false negatives are not.\n */\nexport function createKnownLemmaFilter(\n lemmas: string[],\n options: KnownLemmaFilterOptions = {}\n): KnownLemmaLookup {\n const normalized = lemmas.map((l) => l.toLowerCase());\n return BloomFilter.fromValues(normalized, options);\n}\n","/**\n * Static multi-word phrases for Icelandic.\n *\n * Source: Extracted from GreynirEngine's Phrases.conf (MIT License)\n * https://github.com/mideind/GreynirEngine\n *\n * These phrases should be recognized as units rather than individual words,\n * enabling better stopword detection and lemmatization.\n */\n\n/**\n * A static phrase definition.\n */\nexport interface StaticPhrase {\n /** The canonical/lemma form of the phrase */\n lemma: string;\n /** Whether this phrase functions as a stopword (e.g., \"til dæmis\") */\n isStopword: boolean;\n /** Part of speech category */\n pos?: \"ao\" | \"fs\" | \"st\" | \"entity\";\n}\n\n/**\n * Common Icelandic multi-word phrases.\n * Keys are lowercase, normalized forms.\n */\nexport const STATIC_PHRASES: Map<string, StaticPhrase> = new Map([\n // Adverbial phrases (ao frasi) - often function as stopwords\n [\"til dæmis\", { lemma: \"til dæmi\", isStopword: true, pos: \"ao\" }],\n [\"með öðrum orðum\", { lemma: \"með annar orð\", isStopword: true, pos: \"ao\" }],\n [\"í raun\", { lemma: \"í raun\", isStopword: true, pos: \"ao\" }],\n [\"í raun og veru\", { lemma: \"í raun og vera\", isStopword: true, pos: \"ao\" }],\n [\"af og til\", { lemma: \"af og til\", isStopword: true, pos: \"ao\" }],\n [\"aftur á móti\", { lemma: \"aftur á mót\", isStopword: true, pos: \"ao\" }],\n [\"alla vega\", { lemma: \"allur vegur\", isStopword: true, pos: \"ao\" }],\n [\"alls ekki\", { lemma: \"alls ekki\", isStopword: true, pos: \"ao\" }],\n [\"alls staðar\", { lemma: \"allur staður\", isStopword: true, pos: \"ao\" }],\n [\"allt í allt\", { lemma: \"allur í allur\", isStopword: true, pos: \"ao\" }],\n [\"annars vegar\", { lemma: \"annar vegur\", isStopword: true, pos: \"ao\" }],\n [\"auk þess\", { lemma: \"auk það\", isStopword: true, pos: \"ao\" }],\n [\"að auki\", { lemma: \"að auki\", isStopword: true, pos: \"ao\" }],\n [\"að vísu\", { lemma: \"að vís\", isStopword: true, pos: \"ao\" }],\n [\"að sjálfsögðu\", { lemma: \"að sjálfsagður\", isStopword: true, pos: \"ao\" }],\n [\"að minnsta kosti\", { lemma: \"að lítill kostur\", isStopword: true, pos: \"ao\" }],\n [\"að öllu leyti\", { lemma: \"að allur leyti\", isStopword: true, pos: \"ao\" }],\n [\"að nokkru leyti\", { lemma: \"að nokkur leyti\", isStopword: true, pos: \"ao\" }],\n [\"ef til vill\", { lemma: \"ef til vilja\", isStopword: true, pos: \"ao\" }],\n [\"einhvers staðar\", { lemma: \"einhver staður\", isStopword: true, pos: \"ao\" }],\n [\"einhvern veginn\", { lemma: \"einhver vegur\", isStopword: true, pos: \"ao\" }],\n [\"ekki síst\", { lemma: \"ekki síður\", isStopword: true, pos: \"ao\" }],\n [\"engu að síður\", { lemma: \"enginn að síður\", isStopword: true, pos: \"ao\" }],\n [\"fyrst og fremst\", { lemma: \"snemma og fremri\", isStopword: true, pos: \"ao\" }],\n [\"hins vegar\", { lemma: \"hinn vegur\", isStopword: true, pos: \"ao\" }],\n [\"hér og þar\", { lemma: \"hér og þar\", isStopword: true, pos: \"ao\" }],\n [\"hér um bil\", { lemma: \"hér um bil\", isStopword: true, pos: \"ao\" }],\n [\"hér á landi\", { lemma: \"hér á land\", isStopword: true, pos: \"ao\" }],\n [\"hvað mest\", { lemma: \"hvað mjög\", isStopword: true, pos: \"ao\" }],\n [\"hverju sinni\", { lemma: \"hver sinn\", isStopword: true, pos: \"ao\" }],\n [\"hvorki né\", { lemma: \"hvorki né\", isStopword: true, pos: \"ao\" }],\n [\"í burtu\", { lemma: \"í burtu\", isStopword: true, pos: \"ao\" }],\n [\"í gær\", { lemma: \"í gær\", isStopword: true, pos: \"ao\" }],\n [\"í senn\", { lemma: \"í senn\", isStopword: true, pos: \"ao\" }],\n [\"í sífellu\", { lemma: \"í sífella\", isStopword: true, pos: \"ao\" }],\n [\"lengi vel\", { lemma: \"lengi vel\", isStopword: true, pos: \"ao\" }],\n [\"meira að segja\", { lemma: \"mikill að segja\", isStopword: true, pos: \"ao\" }],\n [\"meira og minna\", { lemma: \"mikill og lítill\", isStopword: true, pos: \"ao\" }],\n [\"meðal annars\", { lemma: \"meðal annar\", isStopword: true, pos: \"ao\" }],\n [\"nokkurn veginn\", { lemma: \"nokkur vegur\", isStopword: true, pos: \"ao\" }],\n [\"og svo framvegis\", { lemma: \"og svo framvegis\", isStopword: true, pos: \"ao\" }],\n [\"satt að segja\", { lemma: \"sannur að segja\", isStopword: true, pos: \"ao\" }],\n [\"sem betur fer\", { lemma: \"sem vel fara\", isStopword: true, pos: \"ao\" }],\n [\"smám saman\", { lemma: \"smátt saman\", isStopword: true, pos: \"ao\" }],\n [\"svo sem\", { lemma: \"svo sem\", isStopword: true, pos: \"ao\" }],\n [\"sér í lagi\", { lemma: \"sér í lag\", isStopword: true, pos: \"ao\" }],\n [\"til og frá\", { lemma: \"til og frá\", isStopword: true, pos: \"ao\" }],\n [\"til baka\", { lemma: \"til baka\", isStopword: true, pos: \"ao\" }],\n [\"vítt og breitt\", { lemma: \"vítt og breitt\", isStopword: true, pos: \"ao\" }],\n [\"á ný\", { lemma: \"á ný\", isStopword: true, pos: \"ao\" }],\n [\"á meðan\", { lemma: \"á meðan\", isStopword: true, pos: \"ao\" }],\n [\"á sama tíma\", { lemma: \"á samur tími\", isStopword: true, pos: \"ao\" }],\n [\"á hinn bóginn\", { lemma: \"á hinn bógur\", isStopword: true, pos: \"ao\" }],\n [\"þar af leiðandi\", { lemma: \"þar af leiða\", isStopword: true, pos: \"ao\" }],\n [\"þar að auki\", { lemma: \"þar að auki\", isStopword: true, pos: \"ao\" }],\n [\"það er að segja\", { lemma: \"það vera að segja\", isStopword: true, pos: \"ao\" }],\n [\"þess vegna\", { lemma: \"það vegna\", isStopword: true, pos: \"ao\" }],\n [\"því miður\", { lemma: \"það lítt\", isStopword: true, pos: \"ao\" }],\n [\"þrátt fyrir\", { lemma: \"þrátt fyrir\", isStopword: true, pos: \"ao\" }],\n\n // Time expressions\n [\"á dögunum\", { lemma: \"á dagur\", isStopword: true, pos: \"ao\" }],\n [\"á sínum tíma\", { lemma: \"á sinn tími\", isStopword: true, pos: \"ao\" }],\n [\"á endanum\", { lemma: \"á endi\", isStopword: true, pos: \"ao\" }],\n [\"einu sinni\", { lemma: \"einn sinn\", isStopword: false, pos: \"ao\" }],\n [\"eitt sinn\", { lemma: \"einn sinn\", isStopword: false, pos: \"ao\" }],\n [\"í fyrsta sinn\", { lemma: \"í fyrstur sinn\", isStopword: false, pos: \"ao\" }],\n [\"í kvöld\", { lemma: \"í kvöld\", isStopword: false, pos: \"ao\" }],\n [\"í morgun\", { lemma: \"í morgunn\", isStopword: false, pos: \"ao\" }],\n [\"á morgun\", { lemma: \"á morgunn\", isStopword: false, pos: \"ao\" }],\n\n // Prepositional phrases (fs frasi)\n [\"fyrir hönd\", { lemma: \"fyrir hönd\", isStopword: false, pos: \"fs\" }],\n [\"með tilliti til\", { lemma: \"með tillit til\", isStopword: false, pos: \"fs\" }],\n [\"í ljósi\", { lemma: \"í ljós\", isStopword: false, pos: \"fs\" }],\n [\"í stað\", { lemma: \"í staður\", isStopword: false, pos: \"fs\" }],\n [\"fyrir aftan\", { lemma: \"fyrir aftan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir austan\", { lemma: \"fyrir austan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir framan\", { lemma: \"fyrir framan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir handan\", { lemma: \"fyrir handan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir innan\", { lemma: \"fyrir innan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir neðan\", { lemma: \"fyrir neðan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir norðan\", { lemma: \"fyrir norðan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir ofan\", { lemma: \"fyrir ofan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir sunnan\", { lemma: \"fyrir sunnan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir utan\", { lemma: \"fyrir utan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir vestan\", { lemma: \"fyrir vestan\", isStopword: false, pos: \"fs\" }],\n [\"í gegnum\", { lemma: \"í gegnum\", isStopword: false, pos: \"fs\" }],\n [\"í kringum\", { lemma: \"í kringum\", isStopword: false, pos: \"fs\" }],\n [\"innan við\", { lemma: \"innan við\", isStopword: false, pos: \"fs\" }],\n [\"upp úr\", { lemma: \"upp úr\", isStopword: false, pos: \"fs\" }],\n [\"þvert á\", { lemma: \"þvert á\", isStopword: false, pos: \"fs\" }],\n\n // Conjunction-like phrases (st frasi)\n [\"þar eð\", { lemma: \"þar eð\", isStopword: true, pos: \"st\" }],\n\n // Named entities - organizations/institutions (NOT stopwords)\n [\"sameinuðu þjóðirnar\", { lemma: \"Sameinuðu þjóðirnar\", isStopword: false, pos: \"entity\" }],\n [\"evrópusambandið\", { lemma: \"Evrópusambandið\", isStopword: false, pos: \"entity\" }],\n [\"nato\", { lemma: \"NATO\", isStopword: false, pos: \"entity\" }],\n [\"nató\", { lemma: \"NATO\", isStopword: false, pos: \"entity\" }],\n]);\n\n/**\n * Check if a phrase starting at the given position exists.\n * Returns the phrase info and length if found, null otherwise.\n */\nexport function matchPhrase(\n words: string[],\n startIndex: number\n): { phrase: StaticPhrase; wordCount: number } | null {\n // Try longest matches first (up to 4 words)\n for (let len = Math.min(4, words.length - startIndex); len >= 2; len--) {\n const phraseWords = words.slice(startIndex, startIndex + len);\n const phraseKey = phraseWords.join(\" \").toLowerCase();\n const phrase = STATIC_PHRASES.get(phraseKey);\n if (phrase) {\n return { phrase, wordCount: len };\n }\n }\n return null;\n}\n\n/**\n * Check if a normalized string is a known phrase.\n */\nexport function isKnownPhrase(text: string): boolean {\n return STATIC_PHRASES.has(text.toLowerCase());\n}\n\n/**\n * Get phrase info for a normalized string.\n */\nexport function getPhraseInfo(text: string): StaticPhrase | undefined {\n return STATIC_PHRASES.get(text.toLowerCase());\n}\n","/**\n * Unified text processing pipeline integrating tokenize-is with lemmatization.\n *\n * Provides proper tokenization that handles Icelandic-specific patterns\n * (abbreviations, dates, times, etc.) before lemmatization.\n */\n\nimport { tokenize, type Token } from \"tokenize-is\";\nimport { Disambiguator, type DisambiguatedToken } from \"./disambiguate.js\";\nimport { CompoundSplitter, type CompoundSplit } from \"./compounds.js\";\nimport { STOPWORDS_IS, isContextualStopword } from \"./stopwords.js\";\nimport type { LemmatizerLike, BigramProvider } from \"./types.js\";\n\n/**\n * Token kinds that should be lemmatized.\n */\nconst LEMMATIZABLE_KINDS = new Set([\"word\"]);\n\n/**\n * Token kinds that represent named entities (skip lemmatization).\n */\nconst ENTITY_KINDS = new Set([\"person\", \"company\", \"entity\"]);\n\n/**\n * Token kinds to skip entirely (not useful for indexing).\n */\nconst SKIP_KINDS = new Set([\n \"punctuation\",\n \"s_begin\",\n \"s_end\",\n \"s_split\",\n \"unknown\",\n]);\n\nconst UNKNOWN_SUFFIXES = [\n \"arinnar\",\n \"anna\",\n \"unum\",\n \"um\",\n \"ir\",\n \"ar\",\n \"ur\",\n \"a\",\n \"i\",\n \"ið\",\n \"inn\",\n \"in\",\n];\n\nconst MIN_UNKNOWN_WORD_LENGTH = 6;\nconst MIN_STRIPPED_LENGTH = 3;\nconst MAX_SUFFIX_STRIPS = 2;\n\n/**\n * A processed token with lemmatization results.\n */\nexport interface ProcessedToken {\n /** Original token text */\n original: string;\n /** Token kind from tokenize-is */\n kind: string;\n /** Candidate lemmas (for word tokens) */\n lemmas: string[];\n /** Is this a named entity? */\n isEntity: boolean;\n /** Best lemma guess after disambiguation */\n disambiguated?: string;\n /** Disambiguation confidence (0-1) */\n confidence?: number;\n /** Compound split result if applicable */\n compoundSplit?: CompoundSplit;\n /** Lemmas derived from compound parts (if any) */\n compoundLemmas?: string[];\n}\n\n/**\n * Options for text processing.\n */\nexport interface ProcessOptions {\n /** Bigram provider for disambiguation */\n bigrams?: BigramProvider;\n /** Compound splitter for compound word detection */\n compoundSplitter?: CompoundSplitter;\n /** Remove stopwords from results */\n removeStopwords?: boolean;\n /**\n * Use contextual stopword detection (requires POS info).\n * When true, words like \"á\" are only filtered as stopwords when used\n * as prepositions, not when used as verbs (\"eiga\") or nouns (river).\n * Default: false (use simple stopword list)\n */\n useContextualStopwords?: boolean;\n /** Include numbers in results */\n includeNumbers?: boolean;\n /**\n * Index all candidate lemmas, not just the disambiguated one.\n * Better recall for search (finds more matches), worse precision.\n * Set to false if you only want the most likely lemma.\n * Default: true\n */\n indexAllCandidates?: boolean;\n /**\n * Try compound splitting even for known words.\n * Useful when BÍN contains the compound but you still want parts indexed.\n * Set to false to only split unknown words.\n * Default: true\n */\n alwaysTryCompounds?: boolean;\n}\n\n/**\n * Process text through the full pipeline.\n *\n * @param text - Input text\n * @param lemmatizer - Lemmatizer instance\n * @param options - Processing options\n * @returns Array of processed tokens\n */\nexport function processText(\n text: string,\n lemmatizer: LemmatizerLike,\n options: ProcessOptions = {}\n): ProcessedToken[] {\n const {\n bigrams,\n compoundSplitter,\n includeNumbers = false,\n alwaysTryCompounds = true,\n } = options;\n\n // Step 1: Tokenize\n const tokens = tokenize(text);\n\n // Step 2: Process each token\n const results: ProcessedToken[] = [];\n const wordTokens: { index: number; token: Token }[] = [];\n const lemmaCache = new Map<string, string[]>();\n const allowSuffixFallback =\n \"bigramCountValue\" in lemmatizer\n ? (lemmatizer as { bigramCountValue?: number }).bigramCountValue === 0\n : false;\n\n const isUnknownLemma = (raw: string, lemmas: string[]): boolean =>\n lemmas.length === 1 && lemmas[0] === raw.toLowerCase();\n\n const trySuffixFallback = (raw: string): string[] | null => {\n let current = raw;\n let strippedCandidate: string | null = null;\n\n for (let attempt = 0; attempt < MAX_SUFFIX_STRIPS; attempt++) {\n const lower = current.toLowerCase();\n strippedCandidate = null;\n\n for (const suffix of UNKNOWN_SUFFIXES) {\n if (!lower.endsWith(suffix)) continue;\n\n const next = current.slice(0, current.length - suffix.length);\n if (next.length < MIN_STRIPPED_LENGTH) continue;\n\n const nextLemmas = lemmatizer.lemmatize(next);\n if (!isUnknownLemma(next, nextLemmas)) {\n return nextLemmas;\n }\n\n if (!strippedCandidate) {\n strippedCandidate = next;\n }\n }\n\n if (!strippedCandidate || strippedCandidate.length < MIN_UNKNOWN_WORD_LENGTH) {\n break;\n }\n\n current = strippedCandidate;\n }\n\n return null;\n };\n\n const getLemmas = (raw: string): string[] => {\n const key = raw.toLowerCase();\n const cached = lemmaCache.get(key);\n if (cached) return cached;\n const lemmas = lemmatizer.lemmatize(raw);\n if (\n allowSuffixFallback &&\n isUnknownLemma(raw, lemmas) &&\n raw.length >= MIN_UNKNOWN_WORD_LENGTH\n ) {\n const fallbackLemmas = trySuffixFallback(raw);\n if (fallbackLemmas) {\n lemmaCache.set(key, fallbackLemmas);\n return fallbackLemmas;\n }\n }\n lemmaCache.set(key, lemmas);\n return lemmas;\n };\n\n for (let i = 0; i < tokens.length; i++) {\n const token = tokens[i];\n\n // Skip unwanted tokens\n if (SKIP_KINDS.has(token.kind)) {\n continue;\n }\n\n // Handle named entities\n if (ENTITY_KINDS.has(token.kind)) {\n results.push({\n original: token.text ?? \"\",\n kind: token.kind,\n lemmas: [],\n isEntity: true,\n });\n continue;\n }\n\n // Handle numbers if requested\n if (token.kind === \"number\" || token.kind === \"ordinal\") {\n if (includeNumbers) {\n results.push({\n original: token.text ?? \"\",\n kind: token.kind,\n lemmas: [],\n isEntity: false,\n });\n }\n continue;\n }\n\n // Handle word tokens\n if (LEMMATIZABLE_KINDS.has(token.kind)) {\n const tokenText = token.text ?? \"\";\n const lemmas = getLemmas(tokenText);\n\n const processed: ProcessedToken = {\n original: tokenText,\n kind: token.kind,\n lemmas,\n isEntity: false,\n };\n\n // Try compound splitting\n // - Always if alwaysTryCompounds is set (for better search recall)\n // - Otherwise only if lemmatization returns unknown word\n const isUnknownWord = lemmas.length === 1 && lemmas[0] === tokenText.toLowerCase();\n if (compoundSplitter && (alwaysTryCompounds || isUnknownWord)) {\n const split = compoundSplitter.split(tokenText);\n if (split.isCompound) {\n processed.compoundSplit = split;\n // Add component lemmas from parts (in addition to direct lemmas)\n const partLemmas = split.parts.flatMap((c) => getLemmas(c));\n processed.compoundLemmas = partLemmas;\n processed.lemmas = [...new Set([...lemmas, ...partLemmas])];\n }\n }\n\n results.push(processed);\n wordTokens.push({ index: results.length - 1, token });\n continue;\n }\n\n // Pass through other tokens (time, date, url, etc.)\n results.push({\n original: token.text ?? \"\",\n kind: token.kind,\n lemmas: [],\n isEntity: false,\n });\n }\n\n // Step 3: Disambiguate if we have bigram data\n if (bigrams && wordTokens.length > 0) {\n const disambiguator = new Disambiguator(lemmatizer, bigrams);\n\n for (let i = 0; i < wordTokens.length; i++) {\n const { index, token } = wordTokens[i];\n const prevToken = i > 0 ? wordTokens[i - 1].token : null;\n const nextToken = i < wordTokens.length - 1 ? wordTokens[i + 1].token : null;\n\n const result = disambiguator.disambiguate(\n token.text ?? \"\",\n prevToken?.text ?? null,\n nextToken?.text ?? null,\n {\n prevLemmas: prevToken?.text ? getLemmas(prevToken.text) : undefined,\n nextLemmas: nextToken?.text ? getLemmas(nextToken.text) : undefined,\n }\n );\n\n results[index].disambiguated = result.lemma;\n results[index].confidence = result.confidence;\n }\n } else {\n // No disambiguation - use first lemma\n for (const { index } of wordTokens) {\n const processed = results[index];\n if (processed.lemmas.length > 0) {\n processed.disambiguated = processed.lemmas[0];\n processed.confidence = processed.lemmas.length === 1 ? 1.0 : 0.5;\n }\n }\n }\n\n return results;\n}\n\n/**\n * Extract unique indexable lemmas from text.\n *\n * @param text - Input text\n * @param lemmatizer - Lemmatizer instance\n * @param options - Processing options\n * @returns Set of unique lemmas suitable for search indexing\n */\nexport function extractIndexableLemmas(\n text: string,\n lemmatizer: LemmatizerLike,\n options: ProcessOptions = {}\n): Set<string> {\n const {\n removeStopwords = false,\n indexAllCandidates = true,\n useContextualStopwords = false,\n } = options;\n\n const processed = processText(text, lemmatizer, options);\n const lemmas = new Set<string>();\n\n /**\n * Check if a lemma should be filtered as a stopword.\n * Uses contextual rules when enabled and POS is available.\n */\n const shouldFilter = (lemma: string, pos?: string): boolean => {\n if (!removeStopwords) return false;\n if (useContextualStopwords) {\n return isContextualStopword(lemma, pos);\n }\n return STOPWORDS_IS.has(lemma);\n };\n\n for (const token of processed) {\n // Skip entities\n if (token.isEntity) {\n continue;\n }\n\n if (indexAllCandidates) {\n // Index ALL candidate lemmas for better search recall\n for (const lemma of token.lemmas) {\n if (!shouldFilter(lemma)) {\n lemmas.add(lemma);\n }\n }\n } else {\n // Use disambiguated lemma if available (better precision)\n if (token.disambiguated) {\n // Note: We don't have POS info easily available in disambiguated result\n // This would need enhancement to pass through POS from disambiguation\n if (!shouldFilter(token.disambiguated)) {\n lemmas.add(token.disambiguated);\n }\n }\n }\n\n // Also add compound parts if split\n if (token.compoundSplit?.isCompound) {\n const partLemmas = token.compoundLemmas\n ? token.compoundLemmas\n : token.compoundSplit.parts.flatMap((p) => lemmatizer.lemmatize(p));\n for (const lemma of partLemmas) {\n if (!shouldFilter(lemma)) {\n lemmas.add(lemma);\n }\n }\n }\n }\n\n return lemmas;\n}\n\n/**\n * Options for building a backend-agnostic boolean search query.\n */\nexport interface SearchQueryOptions extends ProcessOptions {\n /** Operator between token groups (AND). Default: \" & \" */\n andOperator?: string;\n /** Operator between candidate lemmas within a group (OR). Default: \" | \" */\n orOperator?: string;\n /** Wrap groups with multiple terms in parentheses. Default: true */\n wrapGroups?: boolean;\n /**\n * Include the original token (lowercased) in each group for recall.\n * Useful for unknown words or when you want a fallback.\n * Default: false\n */\n includeOriginal?: boolean;\n /** Lowercase original tokens when includeOriginal is true. Default: true */\n lowercaseOriginal?: boolean;\n}\n\n/**\n * Result for a backend-agnostic boolean search query.\n */\nexport interface SearchQueryResult {\n /** Lemma groups per token (OR within group, AND between groups) */\n groups: string[][];\n /** Boolean query string using provided operators */\n query: string;\n}\n\n/**\n * Build a backend-agnostic boolean query string from user input.\n *\n * Use the same lemmatization pipeline as indexing, then:\n * - OR within a token's candidate lemmas\n * - AND across tokens\n *\n * @param text - User search input\n * @param lemmatizer - Lemmatizer instance\n * @param options - Query + processing options\n */\nexport function buildSearchQuery(\n text: string,\n lemmatizer: LemmatizerLike,\n options: SearchQueryOptions = {}\n): SearchQueryResult {\n const {\n removeStopwords = false,\n indexAllCandidates = true,\n useContextualStopwords = false,\n andOperator = \" & \",\n orOperator = \" | \",\n wrapGroups = true,\n includeOriginal = false,\n lowercaseOriginal = true,\n } = options;\n\n const processed = processText(text, lemmatizer, options);\n const groups: string[][] = [];\n\n /**\n * Check if a lemma should be filtered as a stopword.\n * Uses contextual rules when enabled and POS is available.\n */\n const shouldFilter = (lemma: string, pos?: string): boolean => {\n if (!removeStopwords) return false;\n if (useContextualStopwords) {\n return isContextualStopword(lemma, pos);\n }\n return STOPWORDS_IS.has(lemma);\n };\n\n for (const token of processed) {\n // Mirror indexing behavior: skip entities\n if (token.isEntity) continue;\n\n let candidates: string[] = [];\n if (indexAllCandidates) {\n candidates = token.lemmas;\n } else if (token.disambiguated) {\n candidates = [token.disambiguated];\n }\n\n if (includeOriginal) {\n const raw = token.original ?? \"\";\n if (raw.length > 0) {\n const original = lowercaseOriginal ? raw.toLowerCase() : raw;\n candidates = [...candidates, original];\n }\n }\n\n const unique = [\n ...new Set(candidates.filter((lemma) => lemma && !shouldFilter(lemma))),\n ];\n\n if (unique.length > 0) {\n groups.push(unique);\n }\n }\n\n const query = groups\n .map((group) => {\n const joined = group.join(orOperator);\n if (wrapGroups && group.length > 1) {\n return `(${joined})`;\n }\n return joined;\n })\n .filter((part) => part.length > 0)\n .join(andOperator);\n\n return { groups, query };\n}\n\n/**\n * Strategy for benchmark comparisons.\n */\nexport type ProcessingStrategy = \"naive\" | \"tokenized\" | \"disambiguated\" | \"full\";\n\n/**\n * Metrics from processing a text.\n */\nexport interface ProcessingMetrics {\n /** Total word count */\n wordCount: number;\n /** Words successfully lemmatized (not returned as-is) */\n lemmatizedCount: number;\n /** Coverage: lemmatized / total */\n coverage: number;\n /** Words with multiple candidate lemmas */\n ambiguousCount: number;\n /** Ambiguity rate: ambiguous / total */\n ambiguityRate: number;\n /** Average disambiguation confidence */\n avgConfidence: number;\n /** Compounds detected and split */\n compoundsFound: number;\n /** Named entities skipped */\n entitiesSkipped: number;\n /** Unique lemmas extracted */\n uniqueLemmas: number;\n /** Processing time in milliseconds */\n timeMs: number;\n}\n\n/**\n * Run benchmark with a specific strategy and collect metrics.\n */\nexport function runBenchmark(\n text: string,\n lemmatizer: LemmatizerLike,\n strategy: ProcessingStrategy,\n resources: {\n bigrams?: BigramProvider;\n compoundSplitter?: CompoundSplitter;\n } = {}\n): ProcessingMetrics {\n const start = performance.now();\n\n let processed: ProcessedToken[];\n let lemmas: Set<string>;\n\n switch (strategy) {\n case \"naive\": {\n // Simple whitespace split + lemmatize\n const tokens = text.split(/\\s+/).filter((t) => t.length > 0);\n const naiveProcessed: ProcessedToken[] = [];\n\n for (const token of tokens) {\n const cleaned = token.replace(/^[^\\p{L}\\p{N}]+|[^\\p{L}\\p{N}]+$/gu, \"\");\n if (cleaned) {\n const tokenLemmas = lemmatizer.lemmatize(cleaned);\n naiveProcessed.push({\n original: cleaned,\n kind: \"word\",\n lemmas: tokenLemmas,\n isEntity: false,\n disambiguated: tokenLemmas[0],\n confidence: tokenLemmas.length === 1 ? 1.0 : 0.5,\n });\n }\n }\n processed = naiveProcessed;\n lemmas = new Set(naiveProcessed.map((p) => p.disambiguated!).filter(Boolean));\n break;\n }\n\n case \"tokenized\": {\n // tokenize-is + lemmatize word tokens\n processed = processText(text, lemmatizer);\n lemmas = new Set(\n processed\n .filter((p) => p.kind === \"word\" && p.lemmas.length > 0)\n .map((p) => p.lemmas[0])\n );\n break;\n }\n\n case \"disambiguated\": {\n // tokenized + bigram disambiguation\n processed = processText(text, lemmatizer, {\n bigrams: resources.bigrams,\n });\n lemmas = extractIndexableLemmas(text, lemmatizer, {\n bigrams: resources.bigrams,\n });\n break;\n }\n\n case \"full\": {\n // disambiguated + compounds\n processed = processText(text, lemmatizer, {\n bigrams: resources.bigrams,\n compoundSplitter: resources.compoundSplitter,\n });\n lemmas = extractIndexableLemmas(text, lemmatizer, {\n bigrams: resources.bigrams,\n compoundSplitter: resources.compoundSplitter,\n });\n break;\n }\n }\n\n const timeMs = performance.now() - start;\n\n // Calculate metrics\n const wordTokens = processed.filter((p) => p.kind === \"word\");\n const wordCount = wordTokens.length;\n\n const lemmatizedCount = wordTokens.filter((p) => {\n // Considered lemmatized if not returned as-is\n return (\n p.lemmas.length > 0 &&\n !(p.lemmas.length === 1 && p.lemmas[0] === p.original.toLowerCase())\n );\n }).length;\n\n const ambiguousCount = wordTokens.filter((p) => p.lemmas.length > 1).length;\n\n const confidences = wordTokens\n .filter((p) => p.confidence !== undefined)\n .map((p) => p.confidence!);\n const avgConfidence =\n confidences.length > 0\n ? confidences.reduce((a, b) => a + b, 0) / confidences.length\n : 0;\n\n const compoundsFound = wordTokens.filter((p) => p.compoundSplit?.isCompound).length;\n const entitiesSkipped = processed.filter((p) => p.isEntity).length;\n\n return {\n wordCount,\n lemmatizedCount,\n coverage: wordCount > 0 ? lemmatizedCount / wordCount : 0,\n ambiguousCount,\n ambiguityRate: wordCount > 0 ? ambiguousCount / wordCount : 0,\n avgConfidence,\n compoundsFound,\n entitiesSkipped,\n uniqueLemmas: lemmas.size,\n timeMs,\n };\n}\n"],"mappings":"uCAUA,MAAa,EAAe,IAAI,IAAI,8rIAuEnC,CAAC,CAKF,SAAgB,EAAW,EAAuB,CAChD,OAAO,EAAa,IAAI,EAAK,aAAa,CAAC,CAa7C,MAAa,EAAiD,IAAI,IAAI,CAEpE,CAAC,IAAK,IAAI,IAAI,CAAC,KAAM,KAAK,CAAC,CAAC,CAE5B,CAAC,MAAO,IAAI,IAAI,CAAC,KAAM,KAAK,CAAC,CAAC,CAE9B,CAAC,KAAM,IAAI,IAAI,CAAC,KAAM,KAAK,CAAC,CAAC,CAE7B,CAAC,MAAO,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CAExB,CAAC,KAAM,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CAEvB,CAAC,MAAO,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CAExB,CAAC,OAAQ,IAAI,IAAI,CAAC,KAAM,KAAK,CAAC,CAAC,CAE/B,CAAC,QAAS,IAAI,IAAI,CAAC,KAAM,KAAK,CAAC,CAAC,CAEhC,CAAC,QAAS,IAAI,IAAI,CAAC,KAAM,KAAK,CAAC,CAAC,CAEhC,CAAC,QAAS,IAAI,IAAI,CAAC,KAAM,KAAK,CAAC,CAAC,CAEhC,CAAC,OAAQ,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CAEzB,CAAC,MAAO,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CAExB,CAAC,KAAM,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CAEvB,CAAC,IAAK,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CACvB,CAAC,CAYF,SAAgB,EAAqB,EAAe,EAAuB,CACzE,IAAM,EAAa,EAAM,aAAa,CAGhC,EAAc,EAAqB,IAAI,EAAW,CAOxD,OANI,GAAe,EAEV,EAAY,IAAI,EAAI,CAItB,EAAa,IAAI,EAAW,CAMrC,SAAgB,EAAkC,EAAiB,CACjE,OAAO,EAAM,OAAQ,GAAM,CAAC,EAAW,EAAE,CAAC,CCnI5C,MAAM,EAAQ,WAGR,EAA2B,CAC/B,KACA,KACA,KACA,KACA,KACA,KACA,KACA,KACA,KACA,KACD,CAIK,EAAgD,CACpD,IAAA,GACA,KACA,KACA,MACA,KACD,CAIK,EAAoD,CACxD,IAAA,GACA,KACA,MACA,KACD,CAIK,EAAoD,CACxD,KACA,KACD,CAUD,IAAa,EAAb,MAAa,CAA2D,CACtE,OACA,WACA,aACA,aACA,YACA,YACA,aACA,QACA,gBACA,gBACA,gBACA,gBACA,YAEA,WACA,UACA,WACA,YACA,QAEA,QAAkB,IAAI,YAAY,QAAQ,CAE1C,YAAoB,EAAqB,CACvC,KAAK,OAAS,EACd,IAAM,EAAO,IAAI,SAAS,EAAO,CAG3B,EAAQ,EAAK,UAAU,EAAG,GAAK,CACrC,GAAI,IAAU,EACZ,MAAU,MACR,2CAA2C,EAAM,SAAS,GAAG,CAAC,UAAU,EAAM,SAAS,GAAG,GAC3F,CAIH,GADA,KAAK,QAAU,EAAK,UAAU,EAAG,GAAK,CAClC,KAAK,UAAY,GAAK,KAAK,UAAY,EACzC,MAAU,MAAM,wBAAwB,KAAK,UAAU,CAGzD,IAAM,EAAiB,EAAK,UAAU,EAAG,GAAK,CAC9C,KAAK,WAAa,EAAK,UAAU,GAAI,GAAK,CAC1C,KAAK,UAAY,EAAK,UAAU,GAAI,GAAK,CACzC,KAAK,WAAa,EAAK,UAAU,GAAI,GAAK,CAC1C,KAAK,YAAc,EAAK,UAAU,GAAI,GAAK,CAI3C,IAAI,EAAS,GAGb,KAAK,WAAa,IAAI,WAAW,EAAQ,EAAQ,EAAe,CAChE,GAAU,EAGV,KAAK,aAAe,IAAI,YAAY,EAAQ,EAAQ,KAAK,WAAW,CACpE,GAAU,KAAK,WAAa,EAG5B,KAAK,aAAe,IAAI,WAAW,EAAQ,EAAQ,KAAK,WAAW,CACnE,GAAU,KAAK,WAEf,EAAU,EAAS,EAAK,GAGxB,KAAK,YAAc,IAAI,YAAY,EAAQ,EAAQ,KAAK,UAAU,CAClE,GAAU,KAAK,UAAY,EAG3B,KAAK,YAAc,IAAI,WAAW,EAAQ,EAAQ,KAAK,UAAU,CACjE,GAAU,KAAK,UAEf,EAAU,EAAS,EAAK,GAGxB,KAAK,aAAe,IAAI,YAAY,EAAQ,EAAQ,KAAK,UAAY,EAAE,CACvE,IAAW,KAAK,UAAY,GAAK,EAGjC,KAAK,QAAU,IAAI,YAAY,EAAQ,EAAQ,KAAK,WAAW,CAC/D,GAAU,KAAK,WAAa,EAG5B,KAAK,gBAAkB,IAAI,YAAY,EAAQ,EAAQ,KAAK,YAAY,CACxE,GAAU,KAAK,YAAc,EAG7B,KAAK,gBAAkB,IAAI,WAAW,EAAQ,EAAQ,KAAK,YAAY,CACvE,GAAU,KAAK,YAEf,EAAU,EAAS,EAAK,GAGxB,KAAK,gBAAkB,IAAI,YAAY,EAAQ,EAAQ,KAAK,YAAY,CACxE,GAAU,KAAK,YAAc,EAG7B,KAAK,gBAAkB,IAAI,WAAW,EAAQ,EAAQ,KAAK,YAAY,CACvE,GAAU,KAAK,YAEf,EAAU,EAAS,EAAK,GAGxB,KAAK,YAAc,IAAI,YAAY,EAAQ,EAAQ,KAAK,YAAY,CAMtE,aAAa,KACX,EACA,EAAmC,EAAE,CACV,CAE3B,IAAM,EAAW,MADD,EAAQ,OAAS,OACF,EAAI,CAEnC,GAAI,CAAC,EAAS,GACZ,MAAU,MAAM,+BAA+B,EAAS,SAAS,CAInE,OAAO,IAAI,EADI,MAAM,EAAS,aAAa,CACR,CAMrC,OAAO,eAAe,EAAuC,CAC3D,OAAO,IAAI,EAAiB,EAAO,CAMrC,UAAkB,EAAgB,EAAwB,CACxD,OAAO,KAAK,QAAQ,OAAO,KAAK,WAAW,SAAS,EAAQ,EAAS,EAAO,CAAC,CAM/E,SAAiB,EAAuB,CACtC,OAAO,KAAK,UAAU,KAAK,aAAa,GAAQ,KAAK,aAAa,GAAO,CAM3E,QAAgB,EAAuB,CACrC,OAAO,KAAK,UAAU,KAAK,YAAY,GAAQ,KAAK,YAAY,GAAO,CAOzE,SAAiB,EAAsB,CACrC,IAAI,EAAO,EACP,EAAQ,KAAK,UAAY,EAE7B,KAAO,GAAQ,GAAO,CACpB,IAAM,EAAO,EAAO,IAAW,EACzB,EAAU,KAAK,QAAQ,EAAI,CAEjC,GAAI,IAAY,EACd,OAAO,EAEL,EAAU,EACZ,EAAO,EAAM,EAEb,EAAQ,EAAM,EAIlB,MAAO,GAQT,UAAU,EAAc,EAAkC,EAAE,CAAY,CACtE,IAAM,EAAa,EAAK,aAAa,CAC/B,EAAM,KAAK,SAAS,EAAW,CAErC,GAAI,IAAQ,GACV,MAAO,CAAC,EAAW,CAGrB,IAAM,EAAQ,KAAK,aAAa,GAC1B,EAAM,KAAK,aAAa,EAAM,GAE9B,CAAE,aAAc,EAChB,EAAO,IAAI,IACX,EAAmB,EAAE,CAE3B,IAAK,IAAI,EAAI,EAAO,EAAI,EAAK,IAAK,CAChC,GAAM,CAAE,WAAU,WAAY,KAAK,YAAY,KAAK,QAAQ,GAAG,CACzD,EAAM,EAAY,GAExB,GAAI,GAAa,IAAQ,EACvB,SAGF,IAAM,EAAQ,KAAK,SAAS,EAAS,CAChC,EAAK,IAAI,EAAM,GAClB,EAAK,IAAI,EAAM,CACf,EAAO,KAAK,EAAM,EAQtB,OAJI,EAAO,SAAW,EACb,CAAC,EAAW,CAGd,EAQT,YAAoB,EAMlB,CAWA,OAVI,KAAK,UAAY,EACZ,CACL,SAAU,IAAU,EACpB,QAAS,EAAQ,GACjB,SAAU,EACV,WAAY,EACZ,WAAY,EACb,CAGI,CACL,SAAU,IAAU,GACpB,QAAS,EAAQ,GACjB,SAAW,IAAU,EAAK,EAC1B,WAAa,IAAU,EAAK,EAC5B,WAAa,IAAU,EAAK,EAC7B,CAOH,iBAAiB,EAA8B,CAC7C,IAAM,EAAa,EAAK,aAAa,CAC/B,EAAM,KAAK,SAAS,EAAW,CAErC,GAAI,IAAQ,GACV,MAAO,EAAE,CAGX,IAAM,EAAQ,KAAK,aAAa,GAC1B,EAAM,KAAK,aAAa,EAAM,GAC9B,EAAO,IAAI,IACX,EAAyB,EAAE,CAEjC,IAAK,IAAI,EAAI,EAAO,EAAI,EAAK,IAAK,CAChC,GAAM,CAAE,WAAU,WAAY,KAAK,YAAY,KAAK,QAAQ,GAAG,CACzD,EAAQ,KAAK,SAAS,EAAS,CAC/B,EAAM,EAAY,IAAa,GAC/B,EAAM,GAAG,EAAM,GAAG,IAEnB,EAAK,IAAI,EAAI,GAChB,EAAK,IAAI,EAAI,CACb,EAAO,KAAK,CAAE,QAAO,MAAK,CAAC,EAI/B,OAAO,EAOT,mBAAmB,EAAgC,CACjD,IAAM,EAAa,EAAK,aAAa,CAC/B,EAAM,KAAK,SAAS,EAAW,CAErC,GAAI,IAAQ,GACV,MAAO,EAAE,CAGX,IAAM,EAAQ,KAAK,aAAa,GAC1B,EAAM,KAAK,aAAa,EAAM,GAC9B,EAA2B,EAAE,CAEnC,IAAK,IAAI,EAAI,EAAO,EAAI,EAAK,IAAK,CAChC,GAAM,CAAE,WAAU,UAAS,WAAU,aAAY,cAC/C,KAAK,YAAY,KAAK,QAAQ,GAAG,CAE7B,EAAuB,EAAE,CACzB,EAAU,EAAa,GACvB,EAAY,EAAe,GAC3B,EAAY,EAAe,GAE7B,IAAS,EAAM,KAAO,GACtB,IAAW,EAAM,OAAS,GAC1B,IAAW,EAAM,OAAS,GAE9B,EAAO,KAAK,CACV,MAAO,KAAK,SAAS,EAAS,CAC9B,IAAK,EAAY,IAAa,GAC9B,MAAO,OAAO,KAAK,EAAM,CAAC,OAAS,EAAI,EAAQ,IAAA,GAChD,CAAC,CAGJ,OAAO,EAMT,kBAA4B,CAC1B,OAAO,KAAK,SAAW,EAMzB,YAAqB,CACnB,OAAO,KAAK,QAMd,WAAmB,EAAe,EAAuB,CACvD,IAAI,EAAO,EACP,EAAQ,KAAK,YAAc,EAE/B,KAAO,GAAQ,GAAO,CACpB,IAAM,EAAO,EAAO,IAAW,EACzB,EAAQ,KAAK,UACjB,KAAK,gBAAgB,GACrB,KAAK,gBAAgB,GACtB,CAED,GAAI,EAAQ,EACV,EAAO,EAAM,UACJ,EAAQ,EACjB,EAAQ,EAAM,MACT,CAEL,IAAM,EAAQ,KAAK,UACjB,KAAK,gBAAgB,GACrB,KAAK,gBAAgB,GACtB,CAED,GAAI,IAAU,EACZ,OAAO,EAEL,EAAQ,EACV,EAAO,EAAM,EAEb,EAAQ,EAAM,GAKpB,MAAO,GAOT,WAAW,EAAe,EAAuB,CAC/C,IAAM,EAAM,KAAK,WAAW,EAAM,aAAa,CAAE,EAAM,aAAa,CAAC,CACrE,OAAO,IAAQ,GAAK,EAAI,KAAK,YAAY,GAO3C,KAAK,EAAe,EAAuB,CACzC,OAAO,KAAK,WAAW,EAAO,EAAM,CAMtC,QAAQ,EAAuB,CAC7B,OAAO,KAAK,SAAS,EAAK,aAAa,CAAC,GAAK,GAM/C,IAAI,iBAA0B,CAC5B,OAAO,KAAK,WAMd,IAAI,eAAwB,CAC1B,OAAO,KAAK,UAMd,IAAI,kBAA2B,CAC7B,OAAO,KAAK,YAMd,IAAI,YAAqB,CACvB,OAAO,KAAK,OAAO,WAOrB,cAAyB,CACvB,IAAM,EAAmB,EAAE,CAC3B,IAAK,IAAI,EAAI,EAAG,EAAI,KAAK,WAAY,IACnC,EAAO,KAAK,KAAK,SAAS,EAAE,CAAC,CAE/B,OAAO,IC5dX,MAAa,EAA6C,CAKxD,CACE,KAAM,IACN,OAAQ,KACR,KAAM,KACN,QAAS,gBACT,YAAa,iDACd,CACD,CACE,KAAM,IACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,uCACd,CAKD,CACE,KAAM,MACN,OAAQ,KACR,KAAM,KACN,QAAS,iBACT,YAAa,uCACd,CACD,CACE,KAAM,MACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,wCACd,CAGD,CACE,KAAM,KACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,yCACd,CAGD,CACE,KAAM,MACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,qCACd,CAGD,CACE,KAAM,KACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,8CACd,CAGD,CACE,KAAM,OACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,wCACd,CAGD,CACE,KAAM,QACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,0CACd,CAGD,CACE,KAAM,QACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,+CACd,CAGD,CACE,KAAM,QACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,0CACd,CAGD,CACE,KAAM,MACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,uCACd,CAGD,CACE,KAAM,MACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,uCACd,CAGD,CACE,KAAM,IACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,mCACd,CAGD,CACE,KAAM,KACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,wCACd,CACF,CAKD,SAAgB,EAAgB,EAAoC,CAClE,IAAM,EAAa,EAAK,aAAa,CACrC,OAAO,EAAqB,OAAQ,GAAM,EAAE,OAAS,EAAW,CAMlE,SAAgB,EAAuB,EAAuB,CAC5D,OAAO,EAAqB,KAAM,GAAM,EAAE,OAAS,EAAK,aAAa,CAAC,CCzJxE,MAAa,EAAuD,IAAI,IAAkC,CAExG,CAAC,IAAK,IAAI,IAAqB,CAAC,KAAM,MAAM,CAAC,CAAC,CAC9C,CAAC,IAAK,IAAI,IAAqB,CAAC,KAAM,MAAM,CAAC,CAAC,CAC9C,CAAC,MAAO,IAAI,IAAqB,CAAC,KAAM,MAAM,CAAC,CAAC,CAChD,CAAC,MAAO,IAAI,IAAqB,CAAC,KAAM,MAAM,CAAC,CAAC,CAChD,CAAC,QAAS,IAAI,IAAqB,CAAC,KAAM,MAAM,CAAC,CAAC,CAClD,CAAC,OAAQ,IAAI,IAAqB,CAAC,KAAM,MAAM,CAAC,CAAC,CACjD,CAAC,QAAS,IAAI,IAAqB,CAAC,KAAM,MAAM,CAAC,CAAC,CAGlD,CAAC,KAAM,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CACxC,CAAC,SAAU,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAC5C,CAAC,UAAW,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAC7C,CAAC,YAAa,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAG/C,CAAC,KAAM,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CACzC,CAAC,MAAO,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CAC1C,CAAC,MAAO,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CAC1C,CAAC,KAAM,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CACzC,CAAC,KAAM,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CACzC,CAAC,OAAQ,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CAC3C,CAAC,SAAU,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CAC7C,CAAC,OAAQ,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CAC3C,CAAC,WAAY,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CAC/C,CAAC,QAAS,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CAC5C,CAAC,QAAS,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAG3C,CAAC,MAAO,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CACzC,CAAC,KAAM,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CACxC,CAAC,QAAS,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAC3C,CAAC,QAAS,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAC3C,CAAC,OAAQ,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAC1C,CAAC,QAAS,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAC3C,CAAC,UAAW,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CAC9C,CAAC,QAAS,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAC3C,CAAC,MAAO,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CACzC,CAAC,SAAU,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAC7C,CAAC,CAOW,EAAsB,IAAI,IAAI,CACzC,KACA,KACA,OACA,MACA,MACA,MACA,MACA,OACA,MACA,MACD,CAAC,CAuBF,SAAgB,EACd,EACA,EACS,CAGT,OAFK,EACS,EAAkB,IAAI,EAAU,EAChC,IAAI,EAAa,EAAI,GAFT,GAe5B,SAAgB,EACd,EACA,EACyB,CAEzB,IAAM,EAAiB,EAAW,OAAQ,GAAM,EAAE,MAAQ,KAAK,CAC/D,GAAI,EAAe,SAAW,EAAG,OAAO,KAGxC,IAAK,IAAM,KAAQ,EACjB,IAAK,IAAM,KAAY,EACrB,GAAI,EAAS,OAAO,MAAQ,EAAc,EAAK,MAAO,EAAS,MAAM,KAAK,CACxE,MAAO,CACL,MAAO,EAAK,MACZ,IAAK,KACL,KAAM,QAAQ,EAAS,MAAM,OAC7B,WAAY,GACb,CAKP,OAAO,KAaT,SAAgB,EACd,EACA,EACyB,CACzB,GAAI,CAAC,EAAU,OAAO,KAEtB,IAAM,EAAY,EAAS,aAAa,CACxC,GAAI,CAAC,EAAoB,IAAI,EAAU,CAAE,OAAO,KAGhD,IAAM,EAAiB,EAAW,OAAQ,GAAM,EAAE,MAAQ,KAAK,CAW/D,OAVI,EAAe,SAAW,GAI1B,CADe,EAAW,KAAM,GAAM,EAAE,MAAQ,KAAK,CACjC,KAMjB,CACL,OAJoB,EAAe,KAAM,GAAM,EAAE,QAAU,OAAO,EAC7B,EAAe,IAG/B,MACrB,IAAK,KACL,KAAM,eACN,WAAY,IACb,CAsBH,SAAgB,EACd,EACA,EACA,EACyB,CACzB,GAAI,CAAC,GAAY,CAAC,GAAY,iBAAkB,OAAO,KAGvD,IAAM,EAAa,EAAW,iBAAiB,EAAS,CAClD,EAAgB,EAAW,KAAM,GAAM,EAAE,MAAQ,KAAK,CAC5D,GAAI,CAAC,EAAe,OAAO,KAG3B,IAAM,EAAoB,EAAW,KAAM,GAAM,EAAE,MAAQ,KAAK,CAG1D,EAAmB,EAAW,KAAM,GAAM,EAAE,MAAQ,KAAK,CAI/D,GAAI,GAAqB,EACvB,OAAO,KAIT,IAAM,EAAgB,EAAkB,IAAI,EAAc,MAAM,CAChE,GAAI,CAAC,EAAe,OAAO,KAG3B,IAAM,EAAiB,EAAW,OAAQ,GAAM,EAAE,MAAQ,KAAK,CAC/D,IAAK,IAAM,KAAQ,EACjB,GAAI,EAAK,OAAO,MAAQ,EAAc,IAAI,EAAK,MAAM,KAAK,CACxD,MAAO,CACL,MAAO,EAAK,MACZ,IAAK,KACL,KAAM,mBAAmB,EAAK,MAAM,OACpC,WAAY,GACb,CAIL,OAAO,KAiBT,SAAgB,EACd,EACA,EACA,EACA,EAA2C,KAClB,CAazB,OAXiB,EAAqB,EAAY,EAAc,EAItC,EAA8B,EAAY,EAAU,EAAW,EAIxE,EAAqB,EAAY,EAAS,EAGpD,KAMT,SAAgB,EAAmB,EAAwB,CACzD,OAAO,EAAkB,IAAI,EAAM,CAMrC,SAAgB,EAAiB,EAAqD,CACpF,OAAO,EAAkB,IAAI,EAAU,CClNzC,MAAM,EAAwC,CAC5C,KAAM,cACN,IAAI,EAAY,CAQd,OAPI,EAAW,SAAW,EACjB,CACL,MAAO,EAAW,GAAG,MACrB,IAAK,EAAW,GAAG,IACnB,WAAY,EACb,CAEI,MAEV,CAKK,EAA4C,CAChD,KAAM,mBACN,IAAI,EAAY,EAAS,EAAe,CACtC,GAAI,CAAC,EAAc,mBAAoB,OAAO,KAE9C,IAAK,IAAM,KAAQ,EAAsB,CACvC,IAAM,EAAQ,EAAU,EAAM,EAAY,EAAQ,CAClD,GAAI,EACF,MAAO,CACL,MAAO,EAAM,MACb,IAAK,EAAM,IACX,WAAY,IACb,CAGL,OAAO,MAEV,CAKD,SAAS,EACP,EACA,EACA,EACqB,CAErB,IAAM,EAAqB,EAAW,KACnC,GAAM,EAAE,MAAM,aAAa,GAAK,EAAK,KAAK,aAAa,EAAI,EAAE,MAAQ,EAAK,OAC5E,CACK,EAAe,EAAW,KAC7B,GAAM,EAAE,MAAM,aAAa,GAAK,EAAK,KAAK,aAAa,EAAI,EAAE,MAAQ,EAAK,KAC5E,CAED,GAAI,CAAC,GAAsB,CAAC,EAC1B,OAAO,KAIT,GAAI,EAAK,UAAY,cAAe,CAElC,IAAM,EAAO,EAAQ,SACrB,GAAI,GAAQ,kBAAkB,KAAK,EAAK,CACtC,OAAO,UAEA,EAAK,UAAY,cAAe,CAGzC,IAAM,EAAO,EAAQ,UAAU,aAAa,CAC5C,GAAI,GAAQ,CAAC,CAAC,QAAS,QAAS,KAAM,KAAM,MAAO,OAAQ,MAAO,MAAM,CAAC,SAAS,EAAK,CACrF,OAAO,UAEA,EAAK,UAAY,gBAAiB,CAE3C,IAAM,EAAO,EAAQ,UAAU,aAAa,CAE5C,GAAI,GADa,CAAC,KAAM,KAAM,OAAQ,MAAO,MAAO,MAAO,MAAO,OAAQ,MAAO,MAAM,CAClE,SAAS,EAAK,CACjC,OAAO,EAIX,OAAO,KAiIT,MAAM,EAAgC,CACpC,EACA,EA3H6C,CAC7C,KAAM,gBACN,IAAI,EAAY,EAAS,EAAe,CACtC,GAAI,CAAC,EAAc,gBAAiB,OAAO,KAG3C,IAAM,EAAwC,EAAW,IAAK,IAAO,CACnE,GAAG,EACH,MAAO,IAAA,GACR,EAAE,CAGG,EAAc,EAAQ,UAAU,EAAQ,OAC9C,GAAI,EAAa,CACf,IAAM,EAAkB,EAAc,SAAS,EAAY,CACvD,IAEF,EAAoB,OAAS,EAC7B,EAAoB,KAAK,GAAG,EAAgB,EAKhD,IAAM,EAAS,EACb,EACA,EAAQ,SACR,EAAQ,eAAiB,EAAE,CAC3B,EAAc,WACf,CAUD,OARI,EACK,CACL,MAAO,EAAO,MACd,IAAK,EAAO,IACZ,WAAY,EAAO,WACpB,CAGI,MAEV,CAKwC,CACvC,KAAM,eACN,IAAI,EAAY,EAAS,EAAe,CAEtC,GADI,CAAC,EAAc,SACf,EAAW,SAAW,EAAG,OAAO,KAEpC,IAAM,EAAuD,EAAE,CAE/D,IAAK,IAAM,KAAa,EAAY,CAClC,IAAI,EAAQ,EAGZ,GAAI,EAAQ,SAAU,CACpB,IAAM,EAAa,EAAQ,YAAc,EAAc,WAAW,UAAU,EAAQ,SAAS,CAC7F,IAAK,IAAM,KAAa,EAAY,CAClC,IAAM,EAAO,EAAc,QAAQ,KAAK,EAAW,EAAU,MAAM,CAC/D,EAAO,IACT,GAAS,KAAK,IAAI,EAAO,EAAE,CAAG,EAAc,aAMlD,GAAI,EAAQ,SAAU,CACpB,IAAM,EAAa,EAAQ,YAAc,EAAc,WAAW,UAAU,EAAQ,SAAS,CAC7F,IAAK,IAAM,KAAa,EAAY,CAClC,IAAM,EAAO,EAAc,QAAQ,KAAK,EAAU,MAAO,EAAU,CAC/D,EAAO,IACT,GAAS,KAAK,IAAI,EAAO,EAAE,CAAG,EAAc,cAKlD,EAAO,KAAK,CAAE,YAAW,QAAO,CAAC,CAOnC,GAHA,EAAO,MAAM,EAAG,IAAM,EAAE,MAAQ,EAAE,MAAM,CAGpC,EAAO,OAAS,GAAK,EAAO,GAAG,MAAQ,EAAG,CAC5C,IAAM,EAAW,EAAO,GAAG,MACrB,EAAa,EAAO,QAAQ,EAAK,IAAM,EAAM,KAAK,IAAI,EAAE,MAAM,CAAE,EAAE,CAClE,EAAa,EAAa,EAAI,KAAK,IAAI,EAAS,CAAG,EAAa,GAEtE,MAAO,CACL,MAAO,EAAO,GAAG,UAAU,MAC3B,IAAK,EAAO,GAAG,UAAU,IACzB,aACD,CAGH,OAAO,MAEV,CAK0C,CACzC,KAAM,WACN,IAAI,EAAY,CAQd,OAPI,EAAW,OAAS,EACf,CACL,MAAO,EAAW,GAAG,MACrB,IAAK,EAAW,GAAG,IACnB,WAAY,EAAI,EAAW,OAC5B,CAEI,MAEV,CAWA,CAKD,IAAa,EAAb,KAA2B,CACzB,WACA,QACA,WACA,YACA,mBACA,gBACA,WAEA,YACE,EACA,EAAiC,KACjC,EAAgC,EAAE,CAClC,CACA,KAAK,WAAa,EAClB,KAAK,QAAU,EACf,KAAK,WAAa,EAAQ,YAAc,EACxC,KAAK,YAAc,EAAQ,aAAe,EAC1C,KAAK,mBAAqB,EAAQ,oBAAsB,GACxD,KAAK,gBAAkB,EAAQ,iBAAmB,GAClD,KAAK,WAAa,KAAK,WAAW,mBAAqB,IAAI,IAAQ,KAGrE,SAAiB,EAA4C,CAC3D,GAAI,CAAC,KAAK,WAAW,oBAAsB,CAAC,KAAK,WAAY,OAC7D,IAAM,EAAM,EAAK,aAAa,CACxB,EAAS,KAAK,WAAW,IAAI,EAAI,CACvC,GAAI,EAAQ,OAAO,EACnB,IAAM,EAAQ,KAAK,WAAW,mBAAmB,EAAK,CAEtD,OADA,KAAK,WAAW,IAAI,EAAK,EAAM,CACxB,EAUT,aACE,EACA,EACA,EACA,EAAkC,EAAE,CAChB,CAEpB,IAAI,EACJ,AAKE,EALE,KAAK,WAAW,iBACE,KAAK,WAAW,iBAAiB,EAAK,CAG3C,KAAK,WAAW,UAAU,EAAK,CACnB,IAAK,IAAO,CAAE,MAAO,EAAG,IAAK,KAAmB,EAAE,CAG/E,IAAM,EAAa,EAAkB,IAAK,GAAM,EAAE,MAAM,CAClD,EAAQ,EAGV,EACA,IACF,EAAgB,KAAK,SAAS,EAAS,EAIzC,IAAM,EAAiC,CACrC,WACA,WACA,WAAY,EAAK,WACjB,WAAY,EAAK,WACjB,gBACA,UAAW,CAAC,EAAK,CACjB,MAAO,EACR,CAGD,IAAK,IAAM,KAAS,EAAQ,CAC1B,IAAM,EAAS,EAAM,IAAI,EAAmB,EAAS,KAAK,CAC1D,GAAI,EACF,MAAO,CACL,QACA,MAAO,EAAO,MACd,IAAK,EAAO,IACZ,aACA,oBACA,UAAW,EAAW,OAAS,EAC/B,WAAY,EAAO,WACnB,WAAY,EAAM,KACnB,CAKL,MAAO,CACL,QACA,MAAO,EAAK,aAAa,CACzB,aACA,oBACA,UAAW,GACX,WAAY,EACZ,WAAY,OACb,CASH,gBAAgB,EAAwC,CACtD,IAAM,EAAgC,EAAE,CAExC,IAAK,IAAI,EAAI,EAAG,EAAI,EAAO,OAAQ,IAAK,CACtC,IAAM,EAAO,EAAO,GACd,EAAW,EAAI,EAAI,EAAO,EAAI,GAAK,KACnC,EAAW,EAAI,EAAO,OAAS,EAAI,EAAO,EAAI,GAAK,KAEzD,EAAQ,KAAK,KAAK,aAAa,EAAM,EAAU,EAAS,CAAC,CAG3D,OAAO,EAST,cAAc,EAA+B,CAC3C,IAAM,EAAS,IAAI,IACb,EAAgB,KAAK,gBAAgB,EAAO,CAElD,IAAK,IAAM,KAAU,EACnB,EAAO,IAAI,EAAO,MAAM,CAG1B,OAAO,IAOX,SAAgB,EACd,EACA,EACA,EACA,EAGI,EAAE,CACO,CACb,GAAM,CAAE,WAAU,mBAAoB,EAGhC,EAAS,EACX,EAAS,EAAK,CACd,EACG,MAAM,MAAM,CACZ,OAAQ,GAAM,EAAE,OAAS,EAAE,CAC3B,IAAK,GAAM,EAAE,QAAQ,oCAAqC,GAAG,CAAC,CAC9D,OAAQ,GAAM,EAAE,OAAS,EAAE,CAI5B,EADgB,IAAI,EAAc,EAAY,EAAQ,CAC/B,cAAc,EAAO,CAGlD,GAAI,MACG,IAAM,KAAS,EACd,EAAa,IAAI,EAAM,EACzB,EAAO,OAAO,EAAM,CAK1B,OAAO,ECrdT,MAAa,EAA8C,CACzD,GAAI,OACJ,GAAI,OACJ,GAAI,YACJ,GAAI,SACJ,GAAI,cACJ,GAAI,UACJ,GAAI,cACJ,GAAI,UACJ,GAAI,UACJ,GAAI,eACL,CAKY,EAAiD,CAC5D,GAAI,UACJ,GAAI,UACJ,GAAI,cACJ,GAAI,YACJ,GAAI,aACJ,GAAI,UACJ,GAAI,aACJ,GAAI,UACJ,GAAI,UACJ,GAAI,YACL,CAoBY,EAA8C,CACzD,GAAI,aACJ,GAAI,aACJ,IAAK,SACL,GAAI,WACL,CAKY,EAAkD,CAC7D,GAAI,YACJ,IAAK,WACL,GAAI,SACL,CAKY,EAAkD,CAC7D,GAAI,WACJ,GAAI,SACL,CCtFD,IAAa,EAAb,MAAa,CAAY,CACvB,KACA,SACA,UAEA,YAAoB,EAAkB,EAAkB,EAAmB,CACzE,KAAK,KAAO,EACZ,KAAK,SAAW,EAChB,KAAK,UAAY,EAGnB,OAAO,WAAW,EAAkB,EAA8B,EAAE,CAAe,CACjF,IAAM,EAAI,KAAK,IAAI,EAAO,OAAQ,EAAE,CAC9B,EAAI,EAAQ,mBAAqB,IAEjC,EAAI,KAAK,IAAI,EAAG,KAAK,KAAM,CAAC,EAAI,KAAK,IAAI,EAAE,EAAK,KAAK,IAAM,KAAK,KAAK,CAAC,CACtE,EAAI,KAAK,IAAI,EAAG,KAAK,MAAO,EAAI,EAAK,KAAK,IAAI,CAAC,CAC/C,EAAY,EAAQ,iBACtB,KAAK,IAAI,EAAG,EAAQ,iBAAiB,CACrC,EAEE,EAAQ,KAAK,KAAK,EAAI,EAAE,CAExB,EAAS,IAAI,EADN,IAAI,WAAW,EAAM,CACG,EAAG,EAAU,CAElD,IAAK,IAAM,KAAS,EAClB,EAAO,IAAI,EAAM,CAGnB,OAAO,EAGT,IAAI,EAAqB,CACvB,GAAM,CAAC,EAAI,GAAM,KAAK,OAAO,EAAM,CACnC,IAAK,IAAI,EAAI,EAAG,EAAI,KAAK,UAAW,IAAK,CACvC,IAAM,GAAY,EAAK,EAAI,GAAM,KAAK,SACtC,KAAK,OAAO,EAAS,EAIzB,IAAI,EAAwB,CAC1B,GAAM,CAAC,EAAI,GAAM,KAAK,OAAO,EAAM,CACnC,IAAK,IAAI,EAAI,EAAG,EAAI,KAAK,UAAW,IAAK,CACvC,IAAM,GAAY,EAAK,EAAI,GAAM,KAAK,SACtC,GAAI,CAAC,KAAK,OAAO,EAAS,CAAE,MAAO,GAErC,MAAO,GAGT,OAAe,EAAqB,CAClC,IAAM,EAAY,IAAU,EACtB,EAAM,EAAQ,EACpB,KAAK,KAAK,IAAc,GAAK,EAG/B,OAAe,EAAwB,CACrC,IAAM,EAAY,IAAU,EACtB,EAAM,EAAQ,EACpB,OAAQ,KAAK,KAAK,GAAc,GAAK,IAAU,EAGjD,OAAe,EAAiC,CAC9C,IAAM,EAAM,EAAM,aAAa,CAC3B,EAAQ,WACR,EAAQ,WAEZ,IAAK,IAAI,EAAI,EAAG,EAAI,EAAI,OAAQ,IAAK,CACnC,IAAM,EAAO,EAAI,WAAW,EAAE,CAC9B,GAAS,EACT,EAAQ,KAAK,KAAK,EAAO,SAAS,GAAK,EAEvC,GAAS,EACT,EAAQ,KAAK,KAAK,EAAO,WAAW,GAAK,EAO3C,MAJA,IAAS,IAAU,GACnB,EAAQ,KAAK,KAAK,EAAO,WAAW,GAAK,EACzC,GAAS,IAAU,GAEZ,CAAC,IAAU,EAAG,IAAU,GAAK,UAAW,GCnEnD,MAAa,EAAmB,IAAI,IAAI,sxBAkFvC,CAAC,CA2CI,EAAwB,IAAI,IAAI,+NAmCrC,CAAC,CAMI,EAAoB,IAAI,IAAI,CAChC,OACA,OACA,OACA,OACA,OACA,QACA,QACA,OACA,QACA,QACA,OACA,MACA,OACA,OACA,OACA,QACA,QACA,MACD,CAAC,CASI,EAAmB,CAAC,IAAK,IAAK,IAAI,CAExC,IAAa,EAAb,KAA8B,CAC5B,WACA,cACA,kBACA,YACA,KAEA,YACE,EACA,EACA,EAAmC,EAAE,CACrC,CACA,KAAK,WAAa,EAClB,KAAK,YAAc,EACnB,KAAK,cAAgB,EAAQ,eAAiB,EAC9C,KAAK,kBAAoB,EAAQ,mBAAqB,GACtD,KAAK,KAAO,EAAQ,MAAQ,WAM9B,QAAgB,EAAc,EAAiC,CAC7D,MAAO,CACL,OACA,MAAO,EACP,WAAY,EACZ,WAAY,EACZ,WAAY,GACb,CAWH,MAAM,EAA6B,CACjC,IAAM,EAAa,EAAK,aAAa,CAG/B,EAAe,KAAK,WAAW,UAAU,EAAK,CAC9C,EAAe,EAAa,IAAI,aAAa,CAMnD,GALI,GAAgB,EAAiB,IAAI,EAAa,EAKlD,EAAiB,IAAI,EAAW,CAClC,OAAO,KAAK,QAAQ,EAAM,EAAa,CAKzC,IAAM,EACJ,EAAa,OAAS,GAAK,EAAa,GAAG,aAAa,GAAK,EACzD,EAAgB,EAAa,SAAW,EAG9C,GAAI,KAAK,OAAS,eAIhB,OAHI,EAAK,SAAS,IAAI,CACb,KAAK,cAAc,EAAM,EAAa,CAExC,KAAK,QAAQ,EAAM,EAAa,CAYzC,GARI,KAAK,OAAS,YAAc,GAAe,GAEzC,EAAW,OAAS,IAMtB,EAAW,OAAS,KAAK,cAAgB,EAC3C,OAAO,KAAK,QAAQ,EAAM,EAAa,CAIzC,IAAM,EAIA,EAAE,CAER,IACE,IAAI,EAAI,KAAK,cACb,GAAK,EAAW,OAAS,KAAK,cAC9B,IACA,CACA,IAAM,EAAW,EAAW,MAAM,EAAG,EAAE,CACjC,EAAY,EAAW,MAAM,EAAE,CAG/B,EAAe,KAAK,SAAS,EAAU,EAAU,CAMvD,GALI,GACF,EAAW,KAAK,EAAa,CAI3B,KAAK,uBACF,IAAM,KAAU,EAEnB,GAAI,EAAS,SAAS,EAAO,EAAI,EAAS,OAAS,KAAK,cAAe,CACrE,IAAM,EAAc,EAAS,MAAM,EAAG,GAAG,CACnC,EAAS,KAAK,SAAS,EAAa,EAAU,CAChD,GAEF,EAAW,KAAK,CAAE,GAAG,EAAQ,MAAO,EAAO,MAAQ,IAAM,CAAC,GAOpE,GAAI,EAAW,SAAW,EACxB,OAAO,KAAK,QAAQ,EAAM,EAAa,CAIzC,EAAW,MAAM,EAAG,IAAM,EAAE,MAAQ,EAAE,MAAM,CAC5C,IAAM,EAAO,EAAW,GAGxB,GAAI,KAAK,OAAS,YAAc,GAAe,EAAK,MAAQ,GAC1D,OAAO,KAAK,QAAQ,EAAM,EAAa,CAIzC,IAAM,EAAQ,CAAC,GAAG,IAAI,IAAI,CAAC,GAAG,EAAK,UAAW,GAAG,EAAK,WAAW,CAAC,CAAC,CAInE,MAAO,CACL,OACA,QACA,WALiB,CAAC,GAAG,IAAI,IAAI,CAAC,GAAG,EAAO,EAAW,CAAC,CAAC,CAMrD,WAAY,KAAK,IAAI,EAAK,MAAO,EAAE,CACnC,WAAY,GACb,CAMH,cAAsB,EAAc,EAAuC,CACzE,IAAM,EAAQ,EAAK,MAAM,IAAI,CAAC,OAAQ,GAAM,EAAE,OAAS,EAAE,CACzD,GAAI,EAAM,OAAS,EACjB,OAAO,KAAK,QAAQ,EAAM,EAAa,CAGzC,IAAM,EAAqB,EAAE,CAC7B,IAAK,IAAM,KAAQ,EAAO,CACxB,IAAM,EAAS,KAAK,WAAW,UAAU,EAAK,CAC9C,EAAS,KAAK,GAAG,EAAO,CAG1B,IAAM,EAAc,CAAC,GAAG,IAAI,IAAI,EAAS,CAAC,CAG1C,MAAO,CACL,OACA,MAAO,EACP,WALiB,CAAC,GAAG,IAAI,IAAI,CAAC,GAAG,EAAa,EAAK,aAAa,CAAC,CAAC,CAAC,CAMnE,WAAY,GACZ,WAAY,GACb,CAGH,SACE,EACA,EACqE,CAErE,IAAM,EAAa,KAAK,WAAW,UAAU,EAAS,CAChD,EAAc,KAAK,WAAW,UAAU,EAAU,CAGlD,EAAY,CAAC,GAAG,IAAI,IAAI,EAAW,OAAQ,GAAM,KAAK,YAAY,IAAI,EAAE,CAAC,CAAC,CAAC,CAC3E,EAAa,CAAC,GAAG,IAAI,IAAI,EAAY,OAAQ,GAAM,KAAK,YAAY,IAAI,EAAE,CAAC,CAAC,CAAC,CAEnF,GAAI,EAAU,SAAW,GAAK,EAAW,SAAW,EAClD,OAAO,KAIT,IAAI,EAAQ,EAIN,EACJ,EAAI,KAAK,IAAI,EAAS,OAAS,EAAU,OAAO,EAAI,EAAS,OAAS,EAAU,QAClF,GAAS,EAAgB,GAIzB,IAAM,GAAa,EAAS,OAAS,EAAU,QAAU,EACnD,EAAc,KAAK,IAAI,EAAY,EAAG,EAAE,CAC9C,GAAS,EAAc,GAIC,EAAW,KAAM,GAAU,EAAsB,IAAI,EAAM,CAAC,GAElF,GAAS,IAKX,IAAM,EAAe,EAAU,KAAM,GAAU,EAAkB,IAAI,EAAM,CAAC,CACtE,EAAgB,EAAW,KAAM,GAAU,EAAkB,IAAI,EAAM,CAAC,CAgB9E,OAfI,GAAgB,EAElB,GAAS,GACA,CAAC,GAAgB,CAAC,IAE3B,GAAS,KAKP,EAAS,OAAS,GAAK,EAAU,OAAS,KAC5C,GAAS,KAIJ,CACL,UAAW,EACX,WAAY,EACZ,MAAO,KAAK,IAAI,EAAG,EAAM,CAC1B,CAOH,aAAa,EAAwB,CAEnC,OADc,KAAK,MAAM,EAAK,CACjB,aAQjB,SAAgB,EAAoB,EAA+B,CACjE,OAAO,IAAI,IAAI,EAAO,IAAK,GAAM,EAAE,aAAa,CAAC,CAAC,CAapD,SAAgB,EACd,EACA,EAAmC,EAAE,CACnB,CAClB,IAAM,EAAa,EAAO,IAAK,GAAM,EAAE,aAAa,CAAC,CACrD,OAAO,EAAY,WAAW,EAAY,EAAQ,CC7cpD,MAAa,EAA4C,IAAI,IAAI,CAE/D,CAAC,YAAa,CAAE,MAAO,WAAY,WAAY,GAAM,IAAK,KAAM,CAAC,CACjE,CAAC,kBAAmB,CAAE,MAAO,gBAAiB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC5E,CAAC,SAAU,CAAE,MAAO,SAAU,WAAY,GAAM,IAAK,KAAM,CAAC,CAC5D,CAAC,iBAAkB,CAAE,MAAO,iBAAkB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC5E,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CAClE,CAAC,eAAgB,CAAE,MAAO,cAAe,WAAY,GAAM,IAAK,KAAM,CAAC,CACvE,CAAC,YAAa,CAAE,MAAO,cAAe,WAAY,GAAM,IAAK,KAAM,CAAC,CACpE,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CAClE,CAAC,cAAe,CAAE,MAAO,eAAgB,WAAY,GAAM,IAAK,KAAM,CAAC,CACvE,CAAC,cAAe,CAAE,MAAO,gBAAiB,WAAY,GAAM,IAAK,KAAM,CAAC,CACxE,CAAC,eAAgB,CAAE,MAAO,cAAe,WAAY,GAAM,IAAK,KAAM,CAAC,CACvE,CAAC,WAAY,CAAE,MAAO,UAAW,WAAY,GAAM,IAAK,KAAM,CAAC,CAC/D,CAAC,UAAW,CAAE,MAAO,UAAW,WAAY,GAAM,IAAK,KAAM,CAAC,CAC9D,CAAC,UAAW,CAAE,MAAO,SAAU,WAAY,GAAM,IAAK,KAAM,CAAC,CAC7D,CAAC,gBAAiB,CAAE,MAAO,iBAAkB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC3E,CAAC,mBAAoB,CAAE,MAAO,mBAAoB,WAAY,GAAM,IAAK,KAAM,CAAC,CAChF,CAAC,gBAAiB,CAAE,MAAO,iBAAkB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC3E,CAAC,kBAAmB,CAAE,MAAO,kBAAmB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC9E,CAAC,cAAe,CAAE,MAAO,eAAgB,WAAY,GAAM,IAAK,KAAM,CAAC,CACvE,CAAC,kBAAmB,CAAE,MAAO,iBAAkB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC7E,CAAC,kBAAmB,CAAE,MAAO,gBAAiB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC5E,CAAC,YAAa,CAAE,MAAO,aAAc,WAAY,GAAM,IAAK,KAAM,CAAC,CACnE,CAAC,gBAAiB,CAAE,MAAO,kBAAmB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC5E,CAAC,kBAAmB,CAAE,MAAO,mBAAoB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC/E,CAAC,aAAc,CAAE,MAAO,aAAc,WAAY,GAAM,IAAK,KAAM,CAAC,CACpE,CAAC,aAAc,CAAE,MAAO,aAAc,WAAY,GAAM,IAAK,KAAM,CAAC,CACpE,CAAC,aAAc,CAAE,MAAO,aAAc,WAAY,GAAM,IAAK,KAAM,CAAC,CACpE,CAAC,cAAe,CAAE,MAAO,aAAc,WAAY,GAAM,IAAK,KAAM,CAAC,CACrE,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CAClE,CAAC,eAAgB,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CACrE,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CAClE,CAAC,UAAW,CAAE,MAAO,UAAW,WAAY,GAAM,IAAK,KAAM,CAAC,CAC9D,CAAC,QAAS,CAAE,MAAO,QAAS,WAAY,GAAM,IAAK,KAAM,CAAC,CAC1D,CAAC,SAAU,CAAE,MAAO,SAAU,WAAY,GAAM,IAAK,KAAM,CAAC,CAC5D,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CAClE,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CAClE,CAAC,iBAAkB,CAAE,MAAO,kBAAmB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC7E,CAAC,iBAAkB,CAAE,MAAO,mBAAoB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC9E,CAAC,eAAgB,CAAE,MAAO,cAAe,WAAY,GAAM,IAAK,KAAM,CAAC,CACvE,CAAC,iBAAkB,CAAE,MAAO,eAAgB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC1E,CAAC,mBAAoB,CAAE,MAAO,mBAAoB,WAAY,GAAM,IAAK,KAAM,CAAC,CAChF,CAAC,gBAAiB,CAAE,MAAO,kBAAmB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC5E,CAAC,gBAAiB,CAAE,MAAO,eAAgB,WAAY,GAAM,IAAK,KAAM,CAAC,CACzE,CAAC,aAAc,CAAE,MAAO,cAAe,WAAY,GAAM,IAAK,KAAM,CAAC,CACrE,CAAC,UAAW,CAAE,MAAO,UAAW,WAAY,GAAM,IAAK,KAAM,CAAC,CAC9D,CAAC,aAAc,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CACnE,CAAC,aAAc,CAAE,MAAO,aAAc,WAAY,GAAM,IAAK,KAAM,CAAC,CACpE,CAAC,WAAY,CAAE,MAAO,WAAY,WAAY,GAAM,IAAK,KAAM,CAAC,CAChE,CAAC,iBAAkB,CAAE,MAAO,iBAAkB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC5E,CAAC,OAAQ,CAAE,MAAO,OAAQ,WAAY,GAAM,IAAK,KAAM,CAAC,CACxD,CAAC,UAAW,CAAE,MAAO,UAAW,WAAY,GAAM,IAAK,KAAM,CAAC,CAC9D,CAAC,cAAe,CAAE,MAAO,eAAgB,WAAY,GAAM,IAAK,KAAM,CAAC,CACvE,CAAC,gBAAiB,CAAE,MAAO,eAAgB,WAAY,GAAM,IAAK,KAAM,CAAC,CACzE,CAAC,kBAAmB,CAAE,MAAO,eAAgB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC3E,CAAC,cAAe,CAAE,MAAO,cAAe,WAAY,GAAM,IAAK,KAAM,CAAC,CACtE,CAAC,kBAAmB,CAAE,MAAO,oBAAqB,WAAY,GAAM,IAAK,KAAM,CAAC,CAChF,CAAC,aAAc,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CACnE,CAAC,YAAa,CAAE,MAAO,WAAY,WAAY,GAAM,IAAK,KAAM,CAAC,CACjE,CAAC,cAAe,CAAE,MAAO,cAAe,WAAY,GAAM,IAAK,KAAM,CAAC,CAGtE,CAAC,YAAa,CAAE,MAAO,UAAW,WAAY,GAAM,IAAK,KAAM,CAAC,CAChE,CAAC,eAAgB,CAAE,MAAO,cAAe,WAAY,GAAM,IAAK,KAAM,CAAC,CACvE,CAAC,YAAa,CAAE,MAAO,SAAU,WAAY,GAAM,IAAK,KAAM,CAAC,CAC/D,CAAC,aAAc,CAAE,MAAO,YAAa,WAAY,GAAO,IAAK,KAAM,CAAC,CACpE,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAO,IAAK,KAAM,CAAC,CACnE,CAAC,gBAAiB,CAAE,MAAO,iBAAkB,WAAY,GAAO,IAAK,KAAM,CAAC,CAC5E,CAAC,UAAW,CAAE,MAAO,UAAW,WAAY,GAAO,IAAK,KAAM,CAAC,CAC/D,CAAC,WAAY,CAAE,MAAO,YAAa,WAAY,GAAO,IAAK,KAAM,CAAC,CAClE,CAAC,WAAY,CAAE,MAAO,YAAa,WAAY,GAAO,IAAK,KAAM,CAAC,CAGlE,CAAC,aAAc,CAAE,MAAO,aAAc,WAAY,GAAO,IAAK,KAAM,CAAC,CACrE,CAAC,kBAAmB,CAAE,MAAO,iBAAkB,WAAY,GAAO,IAAK,KAAM,CAAC,CAC9E,CAAC,UAAW,CAAE,MAAO,SAAU,WAAY,GAAO,IAAK,KAAM,CAAC,CAC9D,CAAC,SAAU,CAAE,MAAO,WAAY,WAAY,GAAO,IAAK,KAAM,CAAC,CAC/D,CAAC,cAAe,CAAE,MAAO,cAAe,WAAY,GAAO,IAAK,KAAM,CAAC,CACvE,CAAC,eAAgB,CAAE,MAAO,eAAgB,WAAY,GAAO,IAAK,KAAM,CAAC,CACzE,CAAC,eAAgB,CAAE,MAAO,eAAgB,WAAY,GAAO,IAAK,KAAM,CAAC,CACzE,CAAC,eAAgB,CAAE,MAAO,eAAgB,WAAY,GAAO,IAAK,KAAM,CAAC,CACzE,CAAC,cAAe,CAAE,MAAO,cAAe,WAAY,GAAO,IAAK,KAAM,CAAC,CACvE,CAAC,cAAe,CAAE,MAAO,cAAe,WAAY,GAAO,IAAK,KAAM,CAAC,CACvE,CAAC,eAAgB,CAAE,MAAO,eAAgB,WAAY,GAAO,IAAK,KAAM,CAAC,CACzE,CAAC,aAAc,CAAE,MAAO,aAAc,WAAY,GAAO,IAAK,KAAM,CAAC,CACrE,CAAC,eAAgB,CAAE,MAAO,eAAgB,WAAY,GAAO,IAAK,KAAM,CAAC,CACzE,CAAC,aAAc,CAAE,MAAO,aAAc,WAAY,GAAO,IAAK,KAAM,CAAC,CACrE,CAAC,eAAgB,CAAE,MAAO,eAAgB,WAAY,GAAO,IAAK,KAAM,CAAC,CACzE,CAAC,WAAY,CAAE,MAAO,WAAY,WAAY,GAAO,IAAK,KAAM,CAAC,CACjE,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAO,IAAK,KAAM,CAAC,CACnE,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAO,IAAK,KAAM,CAAC,CACnE,CAAC,SAAU,CAAE,MAAO,SAAU,WAAY,GAAO,IAAK,KAAM,CAAC,CAC7D,CAAC,UAAW,CAAE,MAAO,UAAW,WAAY,GAAO,IAAK,KAAM,CAAC,CAG/D,CAAC,SAAU,CAAE,MAAO,SAAU,WAAY,GAAM,IAAK,KAAM,CAAC,CAG5D,CAAC,sBAAuB,CAAE,MAAO,sBAAuB,WAAY,GAAO,IAAK,SAAU,CAAC,CAC3F,CAAC,kBAAmB,CAAE,MAAO,kBAAmB,WAAY,GAAO,IAAK,SAAU,CAAC,CACnF,CAAC,OAAQ,CAAE,MAAO,OAAQ,WAAY,GAAO,IAAK,SAAU,CAAC,CAC7D,CAAC,OAAQ,CAAE,MAAO,OAAQ,WAAY,GAAO,IAAK,SAAU,CAAC,CAC9D,CAAC,CAMF,SAAgB,EACd,EACA,EACoD,CAEpD,IAAK,IAAI,EAAM,KAAK,IAAI,EAAG,EAAM,OAAS,EAAW,CAAE,GAAO,EAAG,IAAO,CAEtE,IAAM,EADc,EAAM,MAAM,EAAY,EAAa,EAAI,CAC/B,KAAK,IAAI,CAAC,aAAa,CAC/C,EAAS,EAAe,IAAI,EAAU,CAC5C,GAAI,EACF,MAAO,CAAE,SAAQ,UAAW,EAAK,CAGrC,OAAO,KAMT,SAAgB,EAAc,EAAuB,CACnD,OAAO,EAAe,IAAI,EAAK,aAAa,CAAC,CAM/C,SAAgB,EAAc,EAAwC,CACpE,OAAO,EAAe,IAAI,EAAK,aAAa,CAAC,CClJ/C,MAAM,EAAqB,IAAI,IAAI,CAAC,OAAO,CAAC,CAKtC,EAAe,IAAI,IAAI,CAAC,SAAU,UAAW,SAAS,CAAC,CAKvD,EAAa,IAAI,IAAI,CACzB,cACA,UACA,QACA,UACA,UACD,CAAC,CAEI,EAAmB,CACvB,UACA,OACA,OACA,KACA,KACA,KACA,KACA,IACA,IACA,KACA,MACA,KACD,CAuED,SAAgB,EACd,EACA,EACA,EAA0B,EAAE,CACV,CAClB,GAAM,CACJ,UACA,mBACA,iBAAiB,GACjB,qBAAqB,IACnB,EAGE,EAAS,EAAS,EAAK,CAGvB,EAA4B,EAAE,CAC9B,EAAgD,EAAE,CAClD,EAAa,IAAI,IACjB,EACJ,qBAAsB,EACjB,EAA6C,mBAAqB,EACnE,GAEA,GAAkB,EAAa,IACnC,EAAO,SAAW,GAAK,EAAO,KAAO,EAAI,aAAa,CAElD,EAAqB,GAAiC,CAC1D,IAAI,EAAU,EACV,EAAmC,KAEvC,IAAK,IAAI,EAAU,EAAG,EAAU,EAAmB,IAAW,CAC5D,IAAM,EAAQ,EAAQ,aAAa,CACnC,EAAoB,KAEpB,IAAK,IAAM,KAAU,EAAkB,CACrC,GAAI,CAAC,EAAM,SAAS,EAAO,CAAE,SAE7B,IAAM,EAAO,EAAQ,MAAM,EAAG,EAAQ,OAAS,EAAO,OAAO,CAC7D,GAAI,EAAK,OAAS,EAAqB,SAEvC,IAAM,EAAa,EAAW,UAAU,EAAK,CAC7C,GAAI,CAAC,EAAe,EAAM,EAAW,CACnC,OAAO,EAGT,AACE,IAAoB,EAIxB,GAAI,CAAC,GAAqB,EAAkB,OAAS,EACnD,MAGF,EAAU,EAGZ,OAAO,MAGH,EAAa,GAA0B,CAC3C,IAAM,EAAM,EAAI,aAAa,CACvB,EAAS,EAAW,IAAI,EAAI,CAClC,GAAI,EAAQ,OAAO,EACnB,IAAM,EAAS,EAAW,UAAU,EAAI,CACxC,GACE,GACA,EAAe,EAAK,EAAO,EAC3B,EAAI,QAAU,EACd,CACA,IAAM,EAAiB,EAAkB,EAAI,CAC7C,GAAI,EAEF,OADA,EAAW,IAAI,EAAK,EAAe,CAC5B,EAIX,OADA,EAAW,IAAI,EAAK,EAAO,CACpB,GAGT,IAAK,IAAI,EAAI,EAAG,EAAI,EAAO,OAAQ,IAAK,CACtC,IAAM,EAAQ,EAAO,GAGjB,MAAW,IAAI,EAAM,KAAK,CAK9B,IAAI,EAAa,IAAI,EAAM,KAAK,CAAE,CAChC,EAAQ,KAAK,CACX,SAAU,EAAM,MAAQ,GACxB,KAAM,EAAM,KACZ,OAAQ,EAAE,CACV,SAAU,GACX,CAAC,CACF,SAIF,GAAI,EAAM,OAAS,UAAY,EAAM,OAAS,UAAW,CACnD,GACF,EAAQ,KAAK,CACX,SAAU,EAAM,MAAQ,GACxB,KAAM,EAAM,KACZ,OAAQ,EAAE,CACV,SAAU,GACX,CAAC,CAEJ,SAIF,GAAI,EAAmB,IAAI,EAAM,KAAK,CAAE,CACtC,IAAM,EAAY,EAAM,MAAQ,GAC1B,EAAS,EAAU,EAAU,CAE7B,EAA4B,CAChC,SAAU,EACV,KAAM,EAAM,KACZ,SACA,SAAU,GACX,CAKK,EAAgB,EAAO,SAAW,GAAK,EAAO,KAAO,EAAU,aAAa,CAClF,GAAI,IAAqB,GAAsB,GAAgB,CAC7D,IAAM,EAAQ,EAAiB,MAAM,EAAU,CAC/C,GAAI,EAAM,WAAY,CACpB,EAAU,cAAgB,EAE1B,IAAM,EAAa,EAAM,MAAM,QAAS,GAAM,EAAU,EAAE,CAAC,CAC3D,EAAU,eAAiB,EAC3B,EAAU,OAAS,CAAC,GAAG,IAAI,IAAI,CAAC,GAAG,EAAQ,GAAG,EAAW,CAAC,CAAC,EAI/D,EAAQ,KAAK,EAAU,CACvB,EAAW,KAAK,CAAE,MAAO,EAAQ,OAAS,EAAG,QAAO,CAAC,CACrD,SAIF,EAAQ,KAAK,CACX,SAAU,EAAM,MAAQ,GACxB,KAAM,EAAM,KACZ,OAAQ,EAAE,CACV,SAAU,GACX,CAAC,EAIJ,GAAI,GAAW,EAAW,OAAS,EAAG,CACpC,IAAM,EAAgB,IAAI,EAAc,EAAY,EAAQ,CAE5D,IAAK,IAAI,EAAI,EAAG,EAAI,EAAW,OAAQ,IAAK,CAC1C,GAAM,CAAE,QAAO,SAAU,EAAW,GAC9B,EAAY,EAAI,EAAI,EAAW,EAAI,GAAG,MAAQ,KAC9C,EAAY,EAAI,EAAW,OAAS,EAAI,EAAW,EAAI,GAAG,MAAQ,KAElE,EAAS,EAAc,aAC3B,EAAM,MAAQ,GACd,GAAW,MAAQ,KACnB,GAAW,MAAQ,KACnB,CACE,WAAY,GAAW,KAAO,EAAU,EAAU,KAAK,CAAG,IAAA,GAC1D,WAAY,GAAW,KAAO,EAAU,EAAU,KAAK,CAAG,IAAA,GAC3D,CACF,CAED,EAAQ,GAAO,cAAgB,EAAO,MACtC,EAAQ,GAAO,WAAa,EAAO,iBAIrC,IAAK,GAAM,CAAE,WAAW,EAAY,CAClC,IAAM,EAAY,EAAQ,GACtB,EAAU,OAAO,OAAS,IAC5B,EAAU,cAAgB,EAAU,OAAO,GAC3C,EAAU,WAAa,EAAU,OAAO,SAAW,EAAI,EAAM,IAKnE,OAAO,EAWT,SAAgB,EACd,EACA,EACA,EAA0B,EAAE,CACf,CACb,GAAM,CACJ,kBAAkB,GAClB,qBAAqB,GACrB,yBAAyB,IACvB,EAEE,EAAY,EAAY,EAAM,EAAY,EAAQ,CAClD,EAAS,IAAI,IAMb,GAAgB,EAAe,IAC9B,EACD,EACK,EAAqB,EAAO,EAAI,CAElC,EAAa,IAAI,EAAM,CAJD,GAO/B,IAAK,IAAM,KAAS,EAEd,MAAM,SAIV,IAAI,MAEG,IAAM,KAAS,EAAM,OACnB,EAAa,EAAM,EACtB,EAAO,IAAI,EAAM,MAKjB,EAAM,gBAGH,EAAa,EAAM,cAAc,EACpC,EAAO,IAAI,EAAM,cAAc,EAMrC,GAAI,EAAM,eAAe,WAAY,CACnC,IAAM,EAAa,EAAM,eACrB,EAAM,eACN,EAAM,cAAc,MAAM,QAAS,GAAM,EAAW,UAAU,EAAE,CAAC,CACrE,IAAK,IAAM,KAAS,EACX,EAAa,EAAM,EACtB,EAAO,IAAI,EAAM,EAM3B,OAAO,EA4CT,SAAgB,EACd,EACA,EACA,EAA8B,EAAE,CACb,CACnB,GAAM,CACJ,kBAAkB,GAClB,qBAAqB,GACrB,yBAAyB,GACzB,cAAc,MACd,aAAa,MACb,aAAa,GACb,kBAAkB,GAClB,oBAAoB,IAClB,EAEE,EAAY,EAAY,EAAM,EAAY,EAAQ,CAClD,EAAqB,EAAE,CAMvB,GAAgB,EAAe,IAC9B,EACD,EACK,EAAqB,EAAO,EAAI,CAElC,EAAa,IAAI,EAAM,CAJD,GAO/B,IAAK,IAAM,KAAS,EAAW,CAE7B,GAAI,EAAM,SAAU,SAEpB,IAAI,EAAuB,EAAE,CAO7B,GANI,EACF,EAAa,EAAM,OACV,EAAM,gBACf,EAAa,CAAC,EAAM,cAAc,EAGhC,EAAiB,CACnB,IAAM,EAAM,EAAM,UAAY,GAC9B,GAAI,EAAI,OAAS,EAAG,CAClB,IAAM,EAAW,EAAoB,EAAI,aAAa,CAAG,EACzD,EAAa,CAAC,GAAG,EAAY,EAAS,EAI1C,IAAM,EAAS,CACb,GAAG,IAAI,IAAI,EAAW,OAAQ,GAAU,GAAS,CAAC,EAAa,EAAM,CAAC,CAAC,CACxE,CAEG,EAAO,OAAS,GAClB,EAAO,KAAK,EAAO,CAevB,MAAO,CAAE,SAAQ,MAXH,EACX,IAAK,GAAU,CACd,IAAM,EAAS,EAAM,KAAK,EAAW,CAIrC,OAHI,GAAc,EAAM,OAAS,EACxB,IAAI,EAAO,GAEb,GACP,CACD,OAAQ,GAAS,EAAK,OAAS,EAAE,CACjC,KAAK,EAAY,CAEI,CAqC1B,SAAgB,GACd,EACA,EACA,EACA,EAGI,EAAE,CACa,CACnB,IAAM,EAAQ,YAAY,KAAK,CAE3B,EACA,EAEJ,OAAQ,EAAR,CACE,IAAK,QAAS,CAEZ,IAAM,EAAS,EAAK,MAAM,MAAM,CAAC,OAAQ,GAAM,EAAE,OAAS,EAAE,CACtD,EAAmC,EAAE,CAE3C,IAAK,IAAM,KAAS,EAAQ,CAC1B,IAAM,EAAU,EAAM,QAAQ,oCAAqC,GAAG,CACtE,GAAI,EAAS,CACX,IAAM,EAAc,EAAW,UAAU,EAAQ,CACjD,EAAe,KAAK,CAClB,SAAU,EACV,KAAM,OACN,OAAQ,EACR,SAAU,GACV,cAAe,EAAY,GAC3B,WAAY,EAAY,SAAW,EAAI,EAAM,GAC9C,CAAC,EAGN,EAAY,EACZ,EAAS,IAAI,IAAI,EAAe,IAAK,GAAM,EAAE,cAAe,CAAC,OAAO,QAAQ,CAAC,CAC7E,MAGF,IAAK,YAEH,EAAY,EAAY,EAAM,EAAW,CACzC,EAAS,IAAI,IACX,EACG,OAAQ,GAAM,EAAE,OAAS,QAAU,EAAE,OAAO,OAAS,EAAE,CACvD,IAAK,GAAM,EAAE,OAAO,GAAG,CAC3B,CACD,MAGF,IAAK,gBAEH,EAAY,EAAY,EAAM,EAAY,CACxC,QAAS,EAAU,QACpB,CAAC,CACF,EAAS,EAAuB,EAAM,EAAY,CAChD,QAAS,EAAU,QACpB,CAAC,CACF,MAGF,IAAK,OAEH,EAAY,EAAY,EAAM,EAAY,CACxC,QAAS,EAAU,QACnB,iBAAkB,EAAU,iBAC7B,CAAC,CACF,EAAS,EAAuB,EAAM,EAAY,CAChD,QAAS,EAAU,QACnB,iBAAkB,EAAU,iBAC7B,CAAC,CACF,MAIJ,IAAM,EAAS,YAAY,KAAK,CAAG,EAG7B,EAAa,EAAU,OAAQ,GAAM,EAAE,OAAS,OAAO,CACvD,EAAY,EAAW,OAEvB,EAAkB,EAAW,OAAQ,GAGvC,EAAE,OAAO,OAAS,GAClB,EAAE,EAAE,OAAO,SAAW,GAAK,EAAE,OAAO,KAAO,EAAE,SAAS,aAAa,EAErE,CAAC,OAEG,EAAiB,EAAW,OAAQ,GAAM,EAAE,OAAO,OAAS,EAAE,CAAC,OAE/D,EAAc,EACjB,OAAQ,GAAM,EAAE,aAAe,IAAA,GAAU,CACzC,IAAK,GAAM,EAAE,WAAY,CACtB,EACJ,EAAY,OAAS,EACjB,EAAY,QAAQ,EAAG,IAAM,EAAI,EAAG,EAAE,CAAG,EAAY,OACrD,EAEA,EAAiB,EAAW,OAAQ,GAAM,EAAE,eAAe,WAAW,CAAC,OACvE,EAAkB,EAAU,OAAQ,GAAM,EAAE,SAAS,CAAC,OAE5D,MAAO,CACL,YACA,kBACA,SAAU,EAAY,EAAI,EAAkB,EAAY,EACxD,iBACA,cAAe,EAAY,EAAI,EAAiB,EAAY,EAC5D,gBACA,iBACA,kBACA,aAAc,EAAO,KACrB,SACD"}
1
+ {"version":3,"file":"index.mjs","names":[],"sources":["../src/stopwords.ts","../src/binary-lemmatizer.ts","../src/disambiguation-rules.ts","../src/mini-grammar.ts","../src/disambiguate.ts","../src/types.ts","../src/bloom.ts","../src/compounds.ts","../src/phrases.ts","../src/normalizers.ts","../src/pipeline.ts"],"sourcesContent":["/**\n * Icelandic stopwords for search indexing.\n *\n * Source: https://github.com/atlijas/icelandic-stop-words\n * Data from DIM (Database of Icelandic Morphology) by Árni Magnússon Institute.\n *\n * Includes all inflected forms of pronouns, prepositions, conjunctions, etc.\n */\n\n// prettier-ignore\nexport const STOPWORDS_IS = new Set([\n \"á\",\"að\",\"aðra\",\"aðrar\",\"aðrir\",\"af\",\"alla\",\"allan\",\"allar\",\"allir\",\n \"allnokkra\",\"allnokkrar\",\"allnokkrir\",\"allnokkru\",\"allnokkrum\",\"allnokkuð\",\n \"allnokkur\",\"allnokkurn\",\"allnokkurra\",\"allnokkurrar\",\"allnokkurri\",\"allnokkurs\",\n \"allnokkurt\",\"allra\",\"allrar\",\"allri\",\"alls\",\"allt\",\"alltað\",\"allur\",\"án\",\n \"andspænis\",\"annað\",\"annaðhvort\",\"annan\",\"annar\",\"annarra\",\"annarrar\",\"annarri\",\n \"annars\",\"árla\",\"ásamt\",\"auk\",\"austan\",\"austanundir\",\"austur\",\"báða\",\"báðar\",\n \"báðir\",\"báðum\",\"bæði\",\"bak\",\"beggja\",\"eða\",\"eður\",\"ef\",\"eftir\",\"ég\",\"ein\",\n \"eina\",\"einar\",\"einhver\",\"einhverja\",\"einhverjar\",\"einhverjir\",\"einhverju\",\n \"einhverjum\",\"einhvern\",\"einhverra\",\"einhverrar\",\"einhverri\",\"einhvers\",\"einir\",\n \"einn\",\"einna\",\"einnar\",\"einni\",\"eins\",\"einskis\",\"einu\",\"einum\",\"eitt\",\"eitthvað\",\n \"eitthvert\",\"ekkert\",\"ella\",\"ellegar\",\"en\",\"enda\",\"enga\",\"engan\",\"engar\",\"engin\",\n \"enginn\",\"engir\",\"engra\",\"engrar\",\"engri\",\"engu\",\"engum\",\"er\",\"fáein\",\"fáeina\",\n \"fáeinar\",\"fáeinir\",\"fáeinna\",\"fáeinum\",\"fjær\",\"fjarri\",\"flestalla\",\"flestallan\",\n \"flestallar\",\"flestallir\",\"flestallra\",\"flestallrar\",\"flestallri\",\"flestalls\",\n \"flestallt\",\"flestallur\",\"flestöll\",\"flestöllu\",\"flestöllum\",\"frá\",\"fram\",\"fyrir\",\n \"fyrst\",\"gagnstætt\",\"gagnvart\",\"gegn\",\"gegnt\",\"gegnum\",\"hana\",\"handa\",\"handan\",\n \"hann\",\"hans\",\"heldur\",\"hennar\",\"henni\",\"hið\",\"hin\",\"hina\",\"hinar\",\"hinir\",\"hinn\",\n \"hinna\",\"hinnar\",\"hinni\",\"hins\",\"hinu\",\"hinum\",\"hitt\",\"hjá\",\"honum\",\"hún\",\"hvað\",\n \"hvaða\",\"hvenær\",\"hver\",\"hverja\",\"hverjar\",\"hverjir\",\"hverju\",\"hverjum\",\"hvern\",\n \"hverra\",\"hverrar\",\"hverri\",\"hvers\",\"hvert\",\"hvílík\",\"hvílíka\",\"hvílíkan\",\n \"hvílíkar\",\"hvílíkir\",\"hvílíkra\",\"hvílíkrar\",\"hvílíkri\",\"hvílíks\",\"hvílíkt\",\n \"hvílíku\",\"hvílíkum\",\"hvílíkur\",\"hvor\",\"hvora\",\"hvorar\",\"hvorir\",\"hvorki\",\"hvorn\",\n \"hvorra\",\"hvorrar\",\"hvorri\",\"hvors\",\"hvort\",\"hvoru\",\"hvorug\",\"hvoruga\",\"hvorugan\",\n \"hvorugar\",\"hvorugir\",\"hvorugra\",\"hvorugrar\",\"hvorugri\",\"hvorugs\",\"hvorugt\",\n \"hvorugu\",\"hvorugum\",\"hvorugur\",\"hvorum\",\"í\",\"inn\",\"innan\",\"innanundir\",\"jafnframt\",\n \"jafnhliða\",\"kring\",\"kringum\",\"með\",\"meðal\",\"meðan\",\"meður\",\"mér\",\"mestalla\",\n \"mestallan\",\"mestallar\",\"mestallir\",\"mestallra\",\"mestallrar\",\"mestallri\",\"mestalls\",\n \"mestallt\",\"mestallur\",\"mestöll\",\"mestöllu\",\"mestöllum\",\"miðli\",\"mig\",\"milli\",\n \"millum\",\"mín\",\"mína\",\"mínar\",\"mínir\",\"minn\",\"minna\",\"minnar\",\"minni\",\"míns\",\n \"mínu\",\"mínum\",\"mitt\",\"mót\",\"móti\",\"nær\",\"nærri\",\"næst\",\"næstum\",\"nálægt\",\"né\",\n \"neðan\",\"nein\",\"neina\",\"neinar\",\"neinir\",\"neinn\",\"neinna\",\"neinnar\",\"neinni\",\n \"neins\",\"neinu\",\"neinum\",\"neitt\",\"nema\",\"niður\",\"nokkra\",\"nokkrar\",\"nokkrir\",\n \"nokkru\",\"nokkrum\",\"nokkuð\",\"nokkur\",\"nokkurn\",\"nokkurra\",\"nokkurrar\",\"nokkurri\",\n \"nokkurs\",\"nokkurt\",\"norðan\",\"nú\",\"öðru\",\"öðrum\",\"of\",\"ofan\",\"ofar\",\"og\",\"óháð\",\n \"okkar\",\"okkur\",\"öll\",\"öllu\",\"öllum\",\"önnur\",\"órafjarri\",\"oss\",\"sá\",\"sakir\",\n \"sama\",\"saman\",\"samar\",\"samfara\",\"samhliða\",\"sami\",\"samir\",\"samkvæmt\",\"samra\",\n \"samrar\",\"samri\",\"sams\",\"samskipa\",\"samt\",\"samtímis\",\"samur\",\"sem\",\"sér\",\"sérhvað\",\n \"sérhver\",\"sérhverja\",\"sérhverjar\",\"sérhverjir\",\"sérhverju\",\"sérhverjum\",\"sérhvern\",\n \"sérhverra\",\"sérhverrar\",\"sérhverri\",\"sérhvers\",\"sérhvert\",\"síðan\",\"síðla\",\"sig\",\n \"sín\",\"sína\",\"sínar\",\"sínhver\",\"sínhverja\",\"sínhverjar\",\"sínhverjir\",\"sínhverju\",\n \"sínhverjum\",\"sínhvern\",\"sínhverra\",\"sínhverrar\",\"sínhverri\",\"sínhvers\",\"sínhvert\",\n \"sínhvor\",\"sínhvora\",\"sínhvorar\",\"sínhvorir\",\"sínhvorn\",\"sínhvorra\",\"sínhvorrar\",\n \"sínhvorri\",\"sínhvors\",\"sínhvort\",\"sínhvoru\",\"sínhvorum\",\"sínir\",\"sinn\",\"sinna\",\n \"sinnar\",\"sinnhver\",\"sinnhverja\",\"sinnhverjar\",\"sinnhverjir\",\"sinnhverju\",\n \"sinnhverjum\",\"sinnhvern\",\"sinnhverra\",\"sinnhverrar\",\"sinnhverri\",\"sinnhvers\",\n \"sinnhvert\",\"sinnhvor\",\"sinnhvora\",\"sinnhvorar\",\"sinnhvorir\",\"sinnhvorn\",\n \"sinnhvorra\",\"sinnhvorrar\",\"sinnhvorri\",\"sinnhvors\",\"sinnhvort\",\"sinnhvoru\",\n \"sinnhvorum\",\"sinni\",\"síns\",\"sínu\",\"sínum\",\"sitt\",\"sitthvað\",\"sitthver\",\n \"sitthverja\",\"sitthverjar\",\"sitthverjir\",\"sitthverju\",\"sitthverjum\",\"sitthvern\",\n \"sitthverra\",\"sitthverrar\",\"sitthverri\",\"sitthvers\",\"sitthvert\",\"sitthvor\",\n \"sitthvora\",\"sitthvorar\",\"sitthvorir\",\"sitthvorn\",\"sitthvorra\",\"sitthvorrar\",\n \"sitthvorri\",\"sitthvors\",\"sitthvort\",\"sitthvoru\",\"sitthvorum\",\"sjálf\",\"sjálfa\",\n \"sjálfan\",\"sjálfar\",\"sjálfir\",\"sjálfra\",\"sjálfrar\",\"sjálfri\",\"sjálfs\",\"sjálft\",\n \"sjálfu\",\"sjálfum\",\"sjálfur\",\"slík\",\"slíka\",\"slíkan\",\"slíkar\",\"slíkir\",\"slíkra\",\n \"slíkrar\",\"slíkri\",\"slíks\",\"slíkt\",\"slíku\",\"slíkum\",\"slíkur\",\"snemma\",\"sökum\",\n \"söm\",\"sömu\",\"sömum\",\"sú\",\"sum\",\"suma\",\"suman\",\"sumar\",\"sumir\",\"sumra\",\"sumrar\",\n \"sumri\",\"sums\",\"sumt\",\"sumu\",\"sumum\",\"sumur\",\"sunnan\",\"svo\",\"til\",\"tráss\",\"um\",\n \"umfram\",\"umhverfis\",\"undan\",\"undir\",\"uns\",\"upp\",\"úr\",\"út\",\"utan\",\"útundan\",\n \"vegna\",\"vér\",\"vestan\",\"vestur\",\"vettugi\",\"við\",\"viður\",\"vor\",\"vora\",\"vorar\",\n \"vorir\",\"vorn\",\"vorra\",\"vorrar\",\"vorri\",\"vors\",\"vort\",\"voru\",\"vorum\",\"yðar\",\n \"yður\",\"yfir\",\"ykkar\",\"ykkur\",\"ýmis\",\"ýmiss\",\"ýmissa\",\"ýmissar\",\"ýmissi\",\"ýmist\",\n \"ýmsa\",\"ýmsan\",\"ýmsar\",\"ýmsir\",\"ýmsu\",\"ýmsum\",\"þá\",\"það\",\"þær\",\"þann\",\"þar\",\n \"þau\",\"þegar\",\"þeim\",\"þeir\",\"þeirra\",\"þeirrar\",\"þeirri\",\"þennan\",\"þér\",\"þess\",\n \"þessa\",\"þessar\",\"þessara\",\"þessarar\",\"þessari\",\"þessi\",\"þessir\",\"þessu\",\n \"þessum\",\"þetta\",\"þið\",\"þig\",\"þín\",\"þína\",\"þínar\",\"þínir\",\"þinn\",\"þinna\",\n \"þinnar\",\"þinni\",\"þíns\",\"þínu\",\"þínum\",\"þitt\",\"þó\",\"þónokkra\",\"þónokkrar\",\n \"þónokkrir\",\"þónokkru\",\"þónokkrum\",\"þónokkuð\",\"þónokkur\",\"þónokkurn\",\"þónokkurra\",\n \"þónokkurrar\",\"þónokkurri\",\"þónokkurs\",\"þónokkurt\",\"þótt\",\"þú\",\"því\",\"þvílík\",\n \"þvílíka\",\"þvílíkan\",\"þvílíkar\",\"þvílíkir\",\"þvílíkra\",\"þvílíkrar\",\"þvílíkri\",\n \"þvílíks\",\"þvílíkt\",\"þvílíku\",\"þvílíkum\",\"þvílíkur\",\n]);\n\n/**\n * Check if a word is a stopword.\n */\nexport function isStopword(word: string): boolean {\n return STOPWORDS_IS.has(word.toLowerCase());\n}\n\n/**\n * Contextual stopword rules for ambiguous words.\n *\n * Some words are stopwords in certain grammatical contexts but not others:\n * - \"á\" as preposition (fs) or adverb (ao) = stopword\n * - \"á\" as verb \"eiga\" (so) = NOT a stopword (\"Ég á bíl\")\n * - \"á\" as noun \"river\" (no) = NOT a stopword (\"við ána\")\n *\n * Map: lemma -> Set of POS codes where it IS a stopword\n */\nexport const CONTEXTUAL_STOPWORDS: Map<string, Set<string>> = new Map([\n // \"á\" - prep/adverb = stop, verb/noun = keep\n [\"á\", new Set([\"fs\", \"ao\"])],\n // \"við\" - prep = stop, pronoun \"we\" = stop, noun \"viður\" = keep\n [\"við\", new Set([\"fs\", \"fn\"])],\n // \"af\" - prep/adverb = stop\n [\"af\", new Set([\"fs\", \"ao\"])],\n // \"til\" - prep = stop\n [\"til\", new Set([\"fs\"])],\n // \"um\" - prep = stop\n [\"um\", new Set([\"fs\"])],\n // \"frá\" - prep = stop\n [\"frá\", new Set([\"fs\"])],\n // \"yfir\" - prep/adverb = stop\n [\"yfir\", new Set([\"fs\", \"ao\"])],\n // \"undir\" - prep/adverb = stop\n [\"undir\", new Set([\"fs\", \"ao\"])],\n // \"fyrir\" - prep/adverb = stop\n [\"fyrir\", new Set([\"fs\", \"ao\"])],\n // \"eftir\" - prep/adverb = stop\n [\"eftir\", new Set([\"fs\", \"ao\"])],\n // \"gegn\" - prep = stop\n [\"gegn\", new Set([\"fs\"])],\n // \"hjá\" - prep = stop\n [\"hjá\", new Set([\"fs\"])],\n // \"úr\" - prep = stop, noun \"úr\" (watch) = keep\n [\"úr\", new Set([\"fs\"])],\n // \"í\" - prep = stop\n [\"í\", new Set([\"fs\"])],\n]);\n\n/**\n * Check if a lemma is a stopword in a specific grammatical context.\n *\n * For ambiguous words, uses POS to determine stopword status.\n * For unambiguous words, falls back to standard stopword check.\n *\n * @param lemma - The lemmatized word\n * @param pos - Part of speech code (fs, ao, so, no, etc.)\n * @returns true if the word should be treated as a stopword\n */\nexport function isContextualStopword(lemma: string, pos?: string): boolean {\n const normalized = lemma.toLowerCase();\n\n // Check if this lemma has context-dependent rules\n const contextRule = CONTEXTUAL_STOPWORDS.get(normalized);\n if (contextRule && pos) {\n // Use the rule: stopword only if POS is in the stopword set\n return contextRule.has(pos);\n }\n\n // Fall back to standard stopword check\n return STOPWORDS_IS.has(normalized);\n}\n\n/**\n * Filter stopwords from an array of words/lemmas.\n */\nexport function removeStopwords<T extends string>(words: T[]): T[] {\n return words.filter((w) => !isStopword(w));\n}\n","/**\n * Binary format lemmatizer for efficient memory usage.\n *\n * Uses ArrayBuffer with TypedArray views and binary search for O(log n) lookups.\n * Target memory: ~70MB vs ~1.2GB for JS Map-based approach.\n *\n * Binary file format:\n * - Header (32 bytes): magic, version, counts\n * - String pool: all strings concatenated UTF-8\n * - Lemma index: offsets + lengths\n * - Word index: offsets + lengths (sorted alphabetically)\n * - Entry offsets: start/end of entries for each word\n * - Entries: packed lemmaIdx:20 + posCode:4\n * - Bigrams: word1/word2 offsets + lengths + frequencies (sorted)\n */\n\nimport type {\n WordClass,\n LemmaWithPOS,\n LemmaWithMorph,\n LemmatizerLike,\n BigramProvider,\n GrammaticalCase,\n GrammaticalGender,\n GrammaticalNumber,\n MorphFeatures,\n} from \"./types.js\";\n\nconst MAGIC = 0x4c454d41; // \"LEMA\"\n\n// POS code to string mapping (must match build-binary.py)\nconst CODE_TO_POS: WordClass[] = [\n \"no\",\n \"so\",\n \"lo\",\n \"ao\",\n \"fs\",\n \"fn\",\n \"st\",\n \"to\",\n \"gr\",\n \"uh\",\n];\n\n// Case code to string mapping (must match build-binary.py)\n// 0=none, 1=nf, 2=þf, 3=þgf, 4=ef\nconst CODE_TO_CASE: (GrammaticalCase | undefined)[] = [\n undefined, // 0 = none\n \"nf\", // 1 = nominative\n \"þf\", // 2 = accusative\n \"þgf\", // 3 = dative\n \"ef\", // 4 = genitive\n];\n\n// Gender code to string mapping (must match build-binary.py)\n// 0=none, 1=kk, 2=kvk, 3=hk\nconst CODE_TO_GENDER: (GrammaticalGender | undefined)[] = [\n undefined, // 0 = none\n \"kk\", // 1 = masculine\n \"kvk\", // 2 = feminine\n \"hk\", // 3 = neuter\n];\n\n// Number code to string mapping (must match build-binary.py)\n// 0=et/none, 1=ft\nconst CODE_TO_NUMBER: (GrammaticalNumber | undefined)[] = [\n \"et\", // 0 = singular (or none)\n \"ft\", // 1 = plural\n];\n\nexport interface BinaryLemmatizerOptions {\n fetch?: typeof fetch;\n}\n\nexport interface BinaryLemmatizeOptions {\n wordClass?: WordClass;\n}\n\nexport class BinaryLemmatizer implements LemmatizerLike, BigramProvider {\n private buffer: ArrayBuffer;\n private stringPool: Uint8Array;\n private lemmaOffsets: Uint32Array;\n private lemmaLengths: Uint8Array;\n private wordOffsets: Uint32Array;\n private wordLengths: Uint8Array;\n private entryOffsets: Uint32Array;\n private entries: Uint32Array;\n private bigramW1Offsets: Uint32Array;\n private bigramW1Lengths: Uint8Array;\n private bigramW2Offsets: Uint32Array;\n private bigramW2Lengths: Uint8Array;\n private bigramFreqs: Uint32Array;\n\n private lemmaCount: number;\n private wordCount: number;\n private entryCount: number;\n private bigramCount: number;\n private version: number;\n\n private decoder = new TextDecoder(\"utf-8\");\n\n private constructor(buffer: ArrayBuffer) {\n this.buffer = buffer;\n const view = new DataView(buffer);\n\n // Read header\n const magic = view.getUint32(0, true);\n if (magic !== MAGIC) {\n throw new Error(\n `Invalid binary format: expected magic 0x${MAGIC.toString(16)}, got 0x${magic.toString(16)}`\n );\n }\n\n this.version = view.getUint32(4, true);\n if (this.version !== 1 && this.version !== 2) {\n throw new Error(`Unsupported version: ${this.version}`);\n }\n\n const stringPoolSize = view.getUint32(8, true);\n this.lemmaCount = view.getUint32(12, true);\n this.wordCount = view.getUint32(16, true);\n this.entryCount = view.getUint32(20, true);\n this.bigramCount = view.getUint32(24, true);\n // reserved at 28\n\n // Calculate section offsets\n let offset = 32;\n\n // String pool\n this.stringPool = new Uint8Array(buffer, offset, stringPoolSize);\n offset += stringPoolSize;\n\n // Lemma offsets (u32 × lemmaCount)\n this.lemmaOffsets = new Uint32Array(buffer, offset, this.lemmaCount);\n offset += this.lemmaCount * 4;\n\n // Lemma lengths (u8 × lemmaCount)\n this.lemmaLengths = new Uint8Array(buffer, offset, this.lemmaCount);\n offset += this.lemmaCount;\n // Align to 4 bytes\n offset = (offset + 3) & ~3;\n\n // Word offsets (u32 × wordCount)\n this.wordOffsets = new Uint32Array(buffer, offset, this.wordCount);\n offset += this.wordCount * 4;\n\n // Word lengths (u8 × wordCount)\n this.wordLengths = new Uint8Array(buffer, offset, this.wordCount);\n offset += this.wordCount;\n // Align to 4 bytes\n offset = (offset + 3) & ~3;\n\n // Entry offsets (u32 × (wordCount + 1))\n this.entryOffsets = new Uint32Array(buffer, offset, this.wordCount + 1);\n offset += (this.wordCount + 1) * 4;\n\n // Entries (u32 × entryCount)\n this.entries = new Uint32Array(buffer, offset, this.entryCount);\n offset += this.entryCount * 4;\n\n // Bigram word1 offsets\n this.bigramW1Offsets = new Uint32Array(buffer, offset, this.bigramCount);\n offset += this.bigramCount * 4;\n\n // Bigram word1 lengths\n this.bigramW1Lengths = new Uint8Array(buffer, offset, this.bigramCount);\n offset += this.bigramCount;\n // Align to 4 bytes\n offset = (offset + 3) & ~3;\n\n // Bigram word2 offsets\n this.bigramW2Offsets = new Uint32Array(buffer, offset, this.bigramCount);\n offset += this.bigramCount * 4;\n\n // Bigram word2 lengths\n this.bigramW2Lengths = new Uint8Array(buffer, offset, this.bigramCount);\n offset += this.bigramCount;\n // Align to 4 bytes\n offset = (offset + 3) & ~3;\n\n // Bigram frequencies\n this.bigramFreqs = new Uint32Array(buffer, offset, this.bigramCount);\n }\n\n /**\n * Load binary lemmatizer from URL.\n */\n static async load(\n url: string,\n options: BinaryLemmatizerOptions = {}\n ): Promise<BinaryLemmatizer> {\n const fetchFn = options.fetch ?? fetch;\n const response = await fetchFn(url);\n\n if (!response.ok) {\n throw new Error(`Failed to load binary data: ${response.status}`);\n }\n\n const buffer = await response.arrayBuffer();\n return new BinaryLemmatizer(buffer);\n }\n\n /**\n * Load from ArrayBuffer (for Node.js or pre-loaded data).\n */\n static loadFromBuffer(buffer: ArrayBuffer): BinaryLemmatizer {\n return new BinaryLemmatizer(buffer);\n }\n\n /**\n * Get string from string pool.\n */\n private getString(offset: number, length: number): string {\n return this.decoder.decode(this.stringPool.subarray(offset, offset + length));\n }\n\n /**\n * Get lemma by index.\n */\n private getLemma(index: number): string {\n return this.getString(this.lemmaOffsets[index], this.lemmaLengths[index]);\n }\n\n /**\n * Get word by index.\n */\n private getWord(index: number): string {\n return this.getString(this.wordOffsets[index], this.wordLengths[index]);\n }\n\n /**\n * Binary search for word in sorted word array.\n * Returns index or -1 if not found.\n */\n private findWord(word: string): number {\n let left = 0;\n let right = this.wordCount - 1;\n\n while (left <= right) {\n const mid = (left + right) >>> 1;\n const midWord = this.getWord(mid);\n\n if (midWord === word) {\n return mid;\n }\n if (midWord < word) {\n left = mid + 1;\n } else {\n right = mid - 1;\n }\n }\n\n return -1;\n }\n\n /**\n * Look up possible lemmas for a word form.\n * Results are sorted by corpus frequency (most common first).\n * Duplicates are removed (same lemma with different morph features).\n */\n lemmatize(word: string, options: BinaryLemmatizeOptions = {}): string[] {\n const normalized = word.toLowerCase();\n const idx = this.findWord(normalized);\n\n if (idx === -1) {\n return [normalized];\n }\n\n const start = this.entryOffsets[idx];\n const end = this.entryOffsets[idx + 1];\n\n const { wordClass } = options;\n const seen = new Set<string>();\n const result: string[] = [];\n\n for (let i = start; i < end; i++) {\n const { lemmaIdx, posCode } = this.unpackEntry(this.entries[i]);\n const pos = CODE_TO_POS[posCode];\n\n if (wordClass && pos !== wordClass) {\n continue;\n }\n\n const lemma = this.getLemma(lemmaIdx);\n if (!seen.has(lemma)) {\n seen.add(lemma);\n result.push(lemma);\n }\n }\n\n if (result.length === 0) {\n return [normalized];\n }\n\n return result;\n }\n\n /**\n * Unpack entry based on binary format version.\n * Version 1: bits 0-3=pos, bits 4-23=lemmaIdx\n * Version 2: bits 0-3=pos, bits 4-6=case, bits 7-8=gender, bit 9=number, bits 10-29=lemmaIdx\n */\n private unpackEntry(entry: number): {\n lemmaIdx: number;\n posCode: number;\n caseCode: number;\n genderCode: number;\n numberCode: number;\n } {\n if (this.version === 1) {\n return {\n lemmaIdx: entry >>> 4,\n posCode: entry & 0xf,\n caseCode: 0,\n genderCode: 0,\n numberCode: 0,\n };\n }\n // Version 2\n return {\n lemmaIdx: entry >>> 10,\n posCode: entry & 0xf,\n caseCode: (entry >>> 4) & 0x7,\n genderCode: (entry >>> 7) & 0x3,\n numberCode: (entry >>> 9) & 0x1,\n };\n }\n\n /**\n * Look up lemmas with their word class (POS) tags.\n * Duplicates are removed (same lemma+pos with different morph features).\n */\n lemmatizeWithPOS(word: string): LemmaWithPOS[] {\n const normalized = word.toLowerCase();\n const idx = this.findWord(normalized);\n\n if (idx === -1) {\n return [];\n }\n\n const start = this.entryOffsets[idx];\n const end = this.entryOffsets[idx + 1];\n const seen = new Set<string>();\n const result: LemmaWithPOS[] = [];\n\n for (let i = start; i < end; i++) {\n const { lemmaIdx, posCode } = this.unpackEntry(this.entries[i]);\n const lemma = this.getLemma(lemmaIdx);\n const pos = CODE_TO_POS[posCode] ?? (\"\" as WordClass);\n const key = `${lemma}:${pos}`;\n\n if (!seen.has(key)) {\n seen.add(key);\n result.push({ lemma, pos });\n }\n }\n\n return result;\n }\n\n /**\n * Look up lemmas with word class and morphological features.\n * Only available with version 2 binary format.\n */\n lemmatizeWithMorph(word: string): LemmaWithMorph[] {\n const normalized = word.toLowerCase();\n const idx = this.findWord(normalized);\n\n if (idx === -1) {\n return [];\n }\n\n const start = this.entryOffsets[idx];\n const end = this.entryOffsets[idx + 1];\n const result: LemmaWithMorph[] = [];\n\n for (let i = start; i < end; i++) {\n const { lemmaIdx, posCode, caseCode, genderCode, numberCode } =\n this.unpackEntry(this.entries[i]);\n\n const morph: MorphFeatures = {};\n const caseVal = CODE_TO_CASE[caseCode];\n const genderVal = CODE_TO_GENDER[genderCode];\n const numberVal = CODE_TO_NUMBER[numberCode];\n\n if (caseVal) morph.case = caseVal;\n if (genderVal) morph.gender = genderVal;\n if (numberVal) morph.number = numberVal;\n\n result.push({\n lemma: this.getLemma(lemmaIdx),\n pos: CODE_TO_POS[posCode] ?? (\"\" as WordClass),\n morph: Object.keys(morph).length > 0 ? morph : undefined,\n });\n }\n\n return result;\n }\n\n /**\n * Check if morphological features are available (version 2+).\n */\n hasMorphFeatures(): boolean {\n return this.version >= 2;\n }\n\n /**\n * Get the binary format version.\n */\n getVersion(): number {\n return this.version;\n }\n\n /**\n * Binary search for bigram. Returns index or -1.\n */\n private findBigram(word1: string, word2: string): number {\n let left = 0;\n let right = this.bigramCount - 1;\n\n while (left <= right) {\n const mid = (left + right) >>> 1;\n const midW1 = this.getString(\n this.bigramW1Offsets[mid],\n this.bigramW1Lengths[mid]\n );\n\n if (midW1 < word1) {\n left = mid + 1;\n } else if (midW1 > word1) {\n right = mid - 1;\n } else {\n // word1 matches, compare word2\n const midW2 = this.getString(\n this.bigramW2Offsets[mid],\n this.bigramW2Lengths[mid]\n );\n\n if (midW2 === word2) {\n return mid;\n }\n if (midW2 < word2) {\n left = mid + 1;\n } else {\n right = mid - 1;\n }\n }\n }\n\n return -1;\n }\n\n /**\n * Get bigram frequency.\n * @returns Frequency count, or 0 if not found\n */\n bigramFreq(word1: string, word2: string): number {\n const idx = this.findBigram(word1.toLowerCase(), word2.toLowerCase());\n return idx === -1 ? 0 : this.bigramFreqs[idx];\n }\n\n /**\n * Alias for bigramFreq to satisfy BigramProvider interface.\n * @returns Frequency count, or 0 if not found\n */\n freq(word1: string, word2: string): number {\n return this.bigramFreq(word1, word2);\n }\n\n /**\n * Check if a word is known to the lemmatizer.\n */\n isKnown(word: string): boolean {\n return this.findWord(word.toLowerCase()) !== -1;\n }\n\n /**\n * Get the total number of lemmas in the database.\n */\n get lemmaCountValue(): number {\n return this.lemmaCount;\n }\n\n /**\n * Get the total number of word forms.\n */\n get wordFormCount(): number {\n return this.wordCount;\n }\n\n /**\n * Get the total number of bigrams.\n */\n get bigramCountValue(): number {\n return this.bigramCount;\n }\n\n /**\n * Get raw buffer size (approximate memory usage).\n */\n get bufferSize(): number {\n return this.buffer.byteLength;\n }\n\n /**\n * Get all unique lemmas from the binary data.\n * Useful for compound splitting.\n */\n getAllLemmas(): string[] {\n const lemmas: string[] = [];\n for (let i = 0; i < this.lemmaCount; i++) {\n lemmas.push(this.getLemma(i));\n }\n return lemmas;\n }\n}\n","/**\n * Disambiguation rules for Icelandic.\n *\n * Based on GreynirEngine's Prefs.conf and linguistic patterns.\n * These rules help resolve ambiguous words by considering context.\n */\n\nimport type { WordClass } from \"./types.js\";\n\n/**\n * A disambiguation preference rule.\n *\n * When the word matches and the context condition is met,\n * prefer `prefer` POS over `over` POS.\n */\nexport interface DisambiguationRule {\n /** The ambiguous word (lowercase) */\n word: string;\n /** Preferred part of speech in this context */\n prefer: WordClass;\n /** Dispreferred part of speech */\n over: WordClass;\n /** Context condition for when to apply this rule */\n context: \"before_noun\" | \"before_verb\" | \"after_pronoun\" | \"sentence_start\" | \"any\";\n /** Optional description */\n description?: string;\n}\n\n/**\n * Disambiguation rules extracted from Greynir's patterns.\n *\n * Format: { word, prefer, over, context }\n *\n * Common patterns:\n * - \"á\" as preposition (fs) when before noun, as verb \"eiga\" (so) after pronoun\n * - \"við\" as preposition (fs) when before noun, as pronoun (fn) at sentence start\n */\nexport const DISAMBIGUATION_RULES: DisambiguationRule[] = [\n // \"á\" - one of the most ambiguous words\n // Preposition: \"á borðinu\", \"á Íslandi\"\n // Verb (eiga): \"Ég á bíl\", \"Hún á hest\"\n // Noun (river): \"við ána\"\n {\n word: \"á\",\n prefer: \"so\", // verb \"eiga\"\n over: \"fs\", // preposition\n context: \"after_pronoun\",\n description: \"á after pronoun = verb 'eiga' (I own, you own)\",\n },\n {\n word: \"á\",\n prefer: \"fs\", // preposition\n over: \"so\", // verb\n context: \"before_noun\",\n description: \"á before noun = preposition (on, at)\",\n },\n\n // \"við\" - preposition vs pronoun\n // Preposition: \"við gluggann\", \"við borðið\"\n // Pronoun: \"Við erum hér\" (we are here)\n {\n word: \"við\",\n prefer: \"fn\", // pronoun \"we\"\n over: \"fs\", // preposition\n context: \"sentence_start\",\n description: \"við at sentence start = pronoun 'we'\",\n },\n {\n word: \"við\",\n prefer: \"fs\", // preposition\n over: \"fn\", // pronoun\n context: \"before_noun\",\n description: \"við before noun = preposition 'by/at'\",\n },\n\n // \"af\" - preposition vs adverb\n {\n word: \"af\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"af before noun = preposition 'of/from'\",\n },\n\n // \"til\" - preposition\n {\n word: \"til\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"til before noun = preposition 'to'\",\n },\n\n // \"um\" - preposition vs adverb\n {\n word: \"um\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"um before noun = preposition 'about/around'\",\n },\n\n // \"yfir\" - preposition vs adverb\n {\n word: \"yfir\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"yfir before noun = preposition 'over'\",\n },\n\n // \"undir\" - preposition vs adverb\n {\n word: \"undir\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"undir before noun = preposition 'under'\",\n },\n\n // \"fyrir\" - preposition vs adverb\n {\n word: \"fyrir\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"fyrir before noun = preposition 'for/before'\",\n },\n\n // \"eftir\" - preposition vs adverb\n {\n word: \"eftir\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"eftir before noun = preposition 'after'\",\n },\n\n // \"frá\" - preposition\n {\n word: \"frá\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"frá before noun = preposition 'from'\",\n },\n\n // \"með\" - preposition vs adverb\n {\n word: \"með\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"með before noun = preposition 'with'\",\n },\n\n // \"í\" - preposition\n {\n word: \"í\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"í before noun = preposition 'in'\",\n },\n\n // \"úr\" - preposition vs noun (watch)\n {\n word: \"úr\",\n prefer: \"fs\",\n over: \"no\",\n context: \"before_noun\",\n description: \"úr before noun = preposition 'out of'\",\n },\n];\n\n/**\n * Look up rules that apply to a specific word.\n */\nexport function getRulesForWord(word: string): DisambiguationRule[] {\n const normalized = word.toLowerCase();\n return DISAMBIGUATION_RULES.filter((r) => r.word === normalized);\n}\n\n/**\n * Check if a word has disambiguation rules.\n */\nexport function hasDisambiguationRules(word: string): boolean {\n return DISAMBIGUATION_RULES.some((r) => r.word === word.toLowerCase());\n}\n","/**\n * Mini-grammar disambiguation rules for Icelandic.\n *\n * Uses case government (forsetningar stjórna falli) to disambiguate\n * prepositions from other parts of speech. For example:\n * - \"á\" + dative noun = preposition \"on/at\"\n * - \"á\" after pronoun = verb \"eiga\" (to own)\n *\n * Based on Greynir's Prepositions.conf but simplified for fast lookup.\n */\n\nimport type {\n GrammaticalCase,\n LemmaWithMorph,\n LemmaWithPOS,\n WordClass,\n} from \"./types.js\";\n\n/**\n * Interface for lemmatizer used in grammar rules.\n */\nexport interface GrammarLemmatizerLike {\n lemmatizeWithPOS?(word: string): LemmaWithPOS[];\n}\n\n/**\n * Preposition case government rules.\n *\n * Maps preposition lemma to the grammatical cases it governs.\n * When a preposition is followed by a noun in one of these cases,\n * we can be confident it's being used as a preposition.\n *\n * Source: Greynir's Prepositions.conf\n */\nexport const PREPOSITION_CASES: Map<string, Set<GrammaticalCase>> = new Map<string, Set<GrammaticalCase>>([\n // Both accusative and dative\n [\"á\", new Set<GrammaticalCase>([\"þf\", \"þgf\"])], // on/at (þf=direction, þgf=location)\n [\"í\", new Set<GrammaticalCase>([\"þf\", \"þgf\"])], // in (þf=into, þgf=inside)\n [\"við\", new Set<GrammaticalCase>([\"þf\", \"þgf\"])], // at/by (þf=against, þgf=near)\n [\"með\", new Set<GrammaticalCase>([\"þf\", \"þgf\"])], // with (þf=bring, þgf=accompany)\n [\"undir\", new Set<GrammaticalCase>([\"þf\", \"þgf\"])], // under (þf=motion, þgf=position)\n [\"yfir\", new Set<GrammaticalCase>([\"þf\", \"þgf\"])], // over (þf=motion, þgf=position)\n [\"fyrir\", new Set<GrammaticalCase>([\"þf\", \"þgf\"])], // for/before (þf=in exchange, þgf=in front)\n\n // Accusative only\n [\"um\", new Set<GrammaticalCase>([\"þf\"])], // about/around\n [\"gegnum\", new Set<GrammaticalCase>([\"þf\"])], // through\n [\"kringum\", new Set<GrammaticalCase>([\"þf\"])], // around\n [\"umhverfis\", new Set<GrammaticalCase>([\"þf\"])], // around/surrounding\n\n // Dative only\n [\"af\", new Set<GrammaticalCase>([\"þgf\"])], // of/from\n [\"frá\", new Set<GrammaticalCase>([\"þgf\"])], // from\n [\"hjá\", new Set<GrammaticalCase>([\"þgf\"])], // at/with (someone's place)\n [\"úr\", new Set<GrammaticalCase>([\"þgf\"])], // out of\n [\"að\", new Set<GrammaticalCase>([\"þgf\"])], // to/at\n [\"móti\", new Set<GrammaticalCase>([\"þgf\"])], // against\n [\"nálægt\", new Set<GrammaticalCase>([\"þgf\"])], // near\n [\"gegn\", new Set<GrammaticalCase>([\"þgf\"])], // against\n [\"gagnvart\", new Set<GrammaticalCase>([\"þgf\"])], // towards/regarding\n [\"handa\", new Set<GrammaticalCase>([\"þgf\"])], // for (someone)\n [\"meðal\", new Set<GrammaticalCase>([\"ef\"])], // among (actually genitive)\n\n // Genitive only\n [\"til\", new Set<GrammaticalCase>([\"ef\"])], // to\n [\"án\", new Set<GrammaticalCase>([\"ef\"])], // without\n [\"vegna\", new Set<GrammaticalCase>([\"ef\"])], // because of\n [\"sakir\", new Set<GrammaticalCase>([\"ef\"])], // because of\n [\"utan\", new Set<GrammaticalCase>([\"ef\"])], // outside\n [\"innan\", new Set<GrammaticalCase>([\"ef\"])], // inside\n [\"meðfram\", new Set<GrammaticalCase>([\"þgf\"])], // along\n [\"milli\", new Set<GrammaticalCase>([\"ef\"])], // between\n [\"auk\", new Set<GrammaticalCase>([\"ef\"])], // in addition to\n [\"í stað\", new Set<GrammaticalCase>([\"ef\"])], // instead of\n]);\n\n/**\n * Nominative-case pronouns that can precede verbs.\n * When one of these is followed by a potentially ambiguous word,\n * prefer the verb reading.\n */\nexport const NOMINATIVE_PRONOUNS = new Set([\n \"ég\",\n \"þú\",\n \"hann\",\n \"hún\",\n \"það\",\n \"við\",\n \"þið\",\n \"þeir\",\n \"þær\",\n \"þau\",\n]);\n\n/**\n * Result of applying a mini-grammar rule.\n */\nexport interface GrammarRuleMatch {\n /** The preferred lemma */\n lemma: string;\n /** The preferred POS */\n pos: WordClass;\n /** Rule that matched */\n rule: string;\n /** Confidence score (0-1) */\n confidence: number;\n}\n\n/**\n * Check if a preposition candidate can govern the case of the following word.\n *\n * @param prepLemma - The potential preposition lemma\n * @param nextWordMorph - Morphological features of the next word\n * @returns True if the preposition can govern this case\n */\nexport function canGovernCase(\n prepLemma: string,\n nextWordCase: GrammaticalCase | undefined\n): boolean {\n if (!nextWordCase) return false;\n const cases = PREPOSITION_CASES.get(prepLemma);\n return cases?.has(nextWordCase) ?? false;\n}\n\n/**\n * Apply preposition+case rule to disambiguate.\n *\n * If the current word can be a preposition and the next word has\n * a case governed by that preposition, prefer the preposition reading.\n *\n * @param candidates - All possible readings of the current word\n * @param nextWordMorph - Morphological analyses of the next word\n * @returns GrammarRuleMatch if a rule applies, null otherwise\n */\nexport function applyPrepositionRule(\n candidates: LemmaWithMorph[],\n nextWordMorph: LemmaWithMorph[]\n): GrammarRuleMatch | null {\n // Find preposition candidates\n const prepCandidates = candidates.filter((c) => c.pos === \"fs\");\n if (prepCandidates.length === 0) return null;\n\n // Check if any next word form has a case governed by any prep candidate\n for (const prep of prepCandidates) {\n for (const nextForm of nextWordMorph) {\n if (nextForm.morph?.case && canGovernCase(prep.lemma, nextForm.morph.case)) {\n return {\n lemma: prep.lemma,\n pos: \"fs\",\n rule: `prep+${nextForm.morph.case}`,\n confidence: 0.9,\n };\n }\n }\n }\n\n return null;\n}\n\n/**\n * Apply pronoun+verb rule to disambiguate.\n *\n * If the previous word is a nominative pronoun and the current word\n * can be a verb, prefer the verb reading.\n *\n * @param candidates - All possible readings of the current word\n * @param prevWord - The previous word (raw form)\n * @returns GrammarRuleMatch if a rule applies, null otherwise\n */\nexport function applyPronounVerbRule(\n candidates: LemmaWithMorph[],\n prevWord: string | null\n): GrammarRuleMatch | null {\n if (!prevWord) return null;\n\n const prevLower = prevWord.toLowerCase();\n if (!NOMINATIVE_PRONOUNS.has(prevLower)) return null;\n\n // Find verb candidates\n const verbCandidates = candidates.filter((c) => c.pos === \"so\");\n if (verbCandidates.length === 0) return null;\n\n // Prefer verb over preposition/noun when after pronoun\n const hasNonVerb = candidates.some((c) => c.pos !== \"so\");\n if (!hasNonVerb) return null;\n\n // Return the verb candidate (prefer eiga for \"á\")\n const eigaCandidate = verbCandidates.find((c) => c.lemma === \"eiga\");\n const verbCandidate = eigaCandidate ?? verbCandidates[0];\n\n return {\n lemma: verbCandidate.lemma,\n pos: \"so\",\n rule: \"pronoun+verb\",\n confidence: 0.85,\n };\n}\n\n/**\n * Apply noun-after-preposition rule to disambiguate.\n *\n * If the previous word is a preposition and the current word has a\n * noun candidate with a case governed by that preposition, prefer\n * the noun reading.\n *\n * This rule only applies when:\n * - The previous word is UNAMBIGUOUSLY a preposition (no pronoun reading), OR\n * - The current word has no verb candidate\n *\n * Example: \"til fundar\" → \"fundar\" is noun \"fundur\" (genitive), not verb \"funda\"\n * Counter-example: \"við fórum\" → \"við\" is pronoun, \"fórum\" is verb \"fara\"\n *\n * @param candidates - All possible readings of the current word\n * @param prevWord - The previous word (raw form)\n * @param lemmatizer - Lemmatizer for looking up the previous word\n * @returns GrammarRuleMatch if a rule applies, null otherwise\n */\nexport function applyNounAfterPrepositionRule(\n candidates: LemmaWithMorph[],\n prevWord: string | null,\n lemmatizer: GrammarLemmatizerLike | null\n): GrammarRuleMatch | null {\n if (!prevWord || !lemmatizer?.lemmatizeWithPOS) return null;\n\n // Check if previous word is a preposition\n const prevLemmas = lemmatizer.lemmatizeWithPOS(prevWord);\n const prepCandidate = prevLemmas.find((l) => l.pos === \"fs\");\n if (!prepCandidate) return null;\n\n // Check if the previous word could also be a pronoun\n const hasPronounReading = prevLemmas.some((l) => l.pos === \"fn\");\n\n // Check if current word has a verb candidate\n const hasVerbCandidate = candidates.some((c) => c.pos === \"so\");\n\n // If prevWord is ambiguously pronoun/preposition AND current word can be a verb,\n // don't apply this rule (let pronoun+verb rule or bigrams handle it)\n if (hasPronounReading && hasVerbCandidate) {\n return null;\n }\n\n // Get cases this preposition governs\n const governedCases = PREPOSITION_CASES.get(prepCandidate.lemma);\n if (!governedCases) return null;\n\n // Find noun candidate with matching case\n const nounCandidates = candidates.filter((c) => c.pos === \"no\");\n for (const noun of nounCandidates) {\n if (noun.morph?.case && governedCases.has(noun.morph.case)) {\n return {\n lemma: noun.lemma,\n pos: \"no\",\n rule: `noun_after_prep+${noun.morph.case}`,\n confidence: 0.9,\n };\n }\n }\n\n return null;\n}\n\n/**\n * Apply all mini-grammar rules in sequence.\n *\n * Rules are applied in order of specificity:\n * 1. Preposition + case government (most reliable)\n * 2. Noun after preposition (governed case)\n * 3. Pronoun + verb pattern\n *\n * @param candidates - All possible readings of the current word\n * @param prevWord - Previous word (raw form)\n * @param nextWordMorph - Morphological analyses of the next word\n * @param lemmatizer - Optional lemmatizer for looking up previous word POS\n * @returns GrammarRuleMatch if any rule applies, null otherwise\n */\nexport function applyGrammarRules(\n candidates: LemmaWithMorph[],\n prevWord: string | null,\n nextWordMorph: LemmaWithMorph[],\n lemmatizer: GrammarLemmatizerLike | null = null\n): GrammarRuleMatch | null {\n // Rule 1: Preposition + governed case\n const prepRule = applyPrepositionRule(candidates, nextWordMorph);\n if (prepRule) return prepRule;\n\n // Rule 2: Noun after preposition with governed case\n const nounAfterPrepRule = applyNounAfterPrepositionRule(candidates, prevWord, lemmatizer);\n if (nounAfterPrepRule) return nounAfterPrepRule;\n\n // Rule 3: Pronoun + verb\n const verbRule = applyPronounVerbRule(candidates, prevWord);\n if (verbRule) return verbRule;\n\n return null;\n}\n\n/**\n * Check if a word is a known preposition.\n */\nexport function isKnownPreposition(lemma: string): boolean {\n return PREPOSITION_CASES.has(lemma);\n}\n\n/**\n * Get the cases governed by a preposition.\n */\nexport function getGovernedCases(prepLemma: string): Set<GrammaticalCase> | undefined {\n return PREPOSITION_CASES.get(prepLemma);\n}\n","/**\n * Disambiguation algorithm using a multi-phase pipeline.\n *\n * When a word has multiple possible lemmas, use surrounding context\n * and linguistic rules to select the most likely one.\n *\n * Pipeline phases:\n * 1. Unambiguous - words with only one lemma candidate\n * 2. Phrase rules - multi-word expressions and fixed phrases\n * 3. Disambiguation rules - contextual preferences (e.g., \"á\" after pronoun = verb)\n * 4. Grammar rules - case government (preposition + case noun)\n * 5. Word bigrams - statistical scoring using bigram frequencies\n * 6. Fallback - use first lemma if no other evidence\n */\n\nimport { STOPWORDS_IS } from \"./stopwords.js\";\nimport type { LemmatizerLike, LemmaWithPOS, LemmaWithMorph, BigramProvider, WordClass } from \"./types.js\";\nimport { DISAMBIGUATION_RULES, type DisambiguationRule } from \"./disambiguation-rules.js\";\nimport { applyGrammarRules } from \"./mini-grammar.js\";\n\nexport interface DisambiguatorOptions {\n /** Weight for left context (previous word) */\n leftWeight?: number;\n /** Weight for right context (next word) */\n rightWeight?: number;\n /** Enable preference rules (e.g., \"á\" context rules) */\n usePreferenceRules?: boolean;\n /** Enable grammar rules (case government) */\n useGrammarRules?: boolean;\n}\n\nexport interface DisambiguatedToken {\n /** Original token */\n token: string;\n /** Chosen lemma */\n lemma: string;\n /** Part of speech (if available) */\n pos?: WordClass;\n /** All candidate lemmas */\n candidates: string[];\n /** Candidates with POS (if available) */\n candidatesWithPOS?: LemmaWithPOS[];\n /** Was disambiguation needed? */\n ambiguous: boolean;\n /** Confidence score (0-1) */\n confidence: number;\n /** Which phase resolved this token */\n resolvedBy?: string;\n}\n\n/**\n * Extended lemmatizer interface that supports morphological lookup.\n */\ninterface MorphLemmatizerLike extends LemmatizerLike {\n lemmatizeWithMorph?(word: string): LemmaWithMorph[];\n}\n\n/**\n * Context for disambiguation, including surrounding tokens.\n */\ninterface DisambiguationContext {\n /** Previous word (if any) */\n prevWord: string | null;\n /** Next word (if any) */\n nextWord: string | null;\n /** Previous token's lemmas (if available) */\n prevLemmas?: string[];\n /** Next token's lemmas (if available) */\n nextLemmas?: string[];\n /** Next word's morphological analyses (if available) */\n nextWordMorph?: LemmaWithMorph[];\n /** All tokens in the sequence */\n allTokens: string[];\n /** Current index in the sequence */\n index: number;\n}\n\nexport interface DisambiguationContextHint {\n prevLemmas?: string[];\n nextLemmas?: string[];\n}\n\n/**\n * A disambiguation phase that processes candidates.\n */\ninterface DisambiguationPhase {\n name: string;\n run(\n candidates: LemmaWithPOS[],\n context: DisambiguationContext,\n disambiguator: Disambiguator\n ): { lemma: string; pos?: WordClass; confidence: number } | null;\n}\n\n/**\n * Phase 1: Handle unambiguous cases (single candidate).\n */\nconst unambiguousPhase: DisambiguationPhase = {\n name: \"unambiguous\",\n run(candidates) {\n if (candidates.length === 1) {\n return {\n lemma: candidates[0].lemma,\n pos: candidates[0].pos,\n confidence: 1.0,\n };\n }\n return null;\n },\n};\n\n/**\n * Phase 2: Apply disambiguation rules based on context.\n */\nconst preferenceRulesPhase: DisambiguationPhase = {\n name: \"preference_rules\",\n run(candidates, context, disambiguator) {\n if (!disambiguator.usePreferenceRules) return null;\n\n for (const rule of DISAMBIGUATION_RULES) {\n const match = applyRule(rule, candidates, context);\n if (match) {\n return {\n lemma: match.lemma,\n pos: match.pos,\n confidence: 0.85,\n };\n }\n }\n return null;\n },\n};\n\n/**\n * Apply a single disambiguation rule.\n */\nfunction applyRule(\n rule: DisambiguationRule,\n candidates: LemmaWithPOS[],\n context: DisambiguationContext\n): LemmaWithPOS | null {\n // Find candidates matching the word and preferred POS\n const preferredCandidate = candidates.find(\n (c) => c.lemma.toLowerCase() === rule.word.toLowerCase() && c.pos === rule.prefer\n );\n const dispreferred = candidates.find(\n (c) => c.lemma.toLowerCase() === rule.word.toLowerCase() && c.pos === rule.over\n );\n\n if (!preferredCandidate || !dispreferred) {\n return null;\n }\n\n // Check context condition\n if (rule.context === \"before_noun\") {\n // Next word should be a noun (starts with uppercase or known noun)\n const next = context.nextWord;\n if (next && /^[A-ZÁÉÍÓÚÝÞÆÖ]/.test(next)) {\n return preferredCandidate;\n }\n } else if (rule.context === \"before_verb\") {\n // Next word suggests a verb context (harder to detect without POS)\n // Simple heuristic: if next word is lowercase and not a common noun determiner\n const next = context.nextWord?.toLowerCase();\n if (next && ![\"þessi\", \"þetta\", \"sá\", \"sú\", \"það\", \"hinn\", \"hin\", \"hið\"].includes(next)) {\n return preferredCandidate;\n }\n } else if (rule.context === \"after_pronoun\") {\n // Previous word is a pronoun\n const prev = context.prevWord?.toLowerCase();\n const pronouns = [\"ég\", \"þú\", \"hann\", \"hún\", \"það\", \"við\", \"þið\", \"þeir\", \"þær\", \"þau\"];\n if (prev && pronouns.includes(prev)) {\n return preferredCandidate;\n }\n }\n\n return null;\n}\n\n/**\n * Phase 3: Apply grammar rules (case government).\n *\n * Uses morphological features to apply preposition+case and pronoun+verb rules.\n */\nconst grammarRulesPhase: DisambiguationPhase = {\n name: \"grammar_rules\",\n run(candidates, context, disambiguator) {\n if (!disambiguator.useGrammarRules) return null;\n\n // Convert LemmaWithPOS to LemmaWithMorph if needed\n const candidatesWithMorph: LemmaWithMorph[] = candidates.map((c) => ({\n ...c,\n morph: undefined,\n }));\n\n // Get morphological info for candidates if available\n const currentWord = context.allTokens[context.index];\n if (currentWord) {\n const morphCandidates = disambiguator.getMorph(currentWord);\n if (morphCandidates) {\n // Replace with morph-enriched candidates\n candidatesWithMorph.length = 0;\n candidatesWithMorph.push(...morphCandidates);\n }\n }\n\n // Apply grammar rules\n const result = applyGrammarRules(\n candidatesWithMorph,\n context.prevWord,\n context.nextWordMorph ?? [],\n disambiguator.lemmatizer\n );\n\n if (result) {\n return {\n lemma: result.lemma,\n pos: result.pos,\n confidence: result.confidence,\n };\n }\n\n return null;\n },\n};\n\n/**\n * Phase 4: Score using bigram frequencies.\n */\nconst bigramPhase: DisambiguationPhase = {\n name: \"word_bigrams\",\n run(candidates, context, disambiguator) {\n if (!disambiguator.bigrams) return null;\n if (candidates.length === 0) return null;\n\n const scores: { candidate: LemmaWithPOS; score: number }[] = [];\n\n for (const candidate of candidates) {\n let score = 0;\n\n // Left context: bigram(prevWord, lemma)\n if (context.prevWord) {\n const prevLemmas = context.prevLemmas || disambiguator.lemmatizer.lemmatize(context.prevWord);\n for (const prevLemma of prevLemmas) {\n const freq = disambiguator.bigrams.freq(prevLemma, candidate.lemma);\n if (freq > 0) {\n score += Math.log(freq + 1) * disambiguator.leftWeight;\n }\n }\n }\n\n // Right context: bigram(lemma, nextWord)\n if (context.nextWord) {\n const nextLemmas = context.nextLemmas || disambiguator.lemmatizer.lemmatize(context.nextWord);\n for (const nextLemma of nextLemmas) {\n const freq = disambiguator.bigrams.freq(candidate.lemma, nextLemma);\n if (freq > 0) {\n score += Math.log(freq + 1) * disambiguator.rightWeight;\n }\n }\n }\n\n scores.push({ candidate, score });\n }\n\n // Sort by score\n scores.sort((a, b) => b.score - a.score);\n\n // Check if we have scores and if top score is positive\n if (scores.length > 0 && scores[0].score > 0) {\n const topScore = scores[0].score;\n const totalScore = scores.reduce((sum, s) => sum + Math.exp(s.score), 0);\n const confidence = totalScore > 0 ? Math.exp(topScore) / totalScore : 0.5;\n\n return {\n lemma: scores[0].candidate.lemma,\n pos: scores[0].candidate.pos,\n confidence,\n };\n }\n\n return null;\n },\n};\n\n/**\n * Phase 5: Fallback to first candidate.\n */\nconst fallbackPhase: DisambiguationPhase = {\n name: \"fallback\",\n run(candidates) {\n if (candidates.length > 0) {\n return {\n lemma: candidates[0].lemma,\n pos: candidates[0].pos,\n confidence: 1 / candidates.length,\n };\n }\n return null;\n },\n};\n\n/**\n * All disambiguation phases in order.\n */\nconst PHASES: DisambiguationPhase[] = [\n unambiguousPhase,\n preferenceRulesPhase,\n grammarRulesPhase,\n bigramPhase,\n fallbackPhase,\n];\n\n/**\n * Disambiguate lemmas using a multi-phase pipeline.\n */\nexport class Disambiguator {\n lemmatizer: MorphLemmatizerLike;\n bigrams: BigramProvider | null;\n leftWeight: number;\n rightWeight: number;\n usePreferenceRules: boolean;\n useGrammarRules: boolean;\n private morphCache: Map<string, LemmaWithMorph[]> | null;\n\n constructor(\n lemmatizer: LemmatizerLike,\n bigrams: BigramProvider | null = null,\n options: DisambiguatorOptions = {}\n ) {\n this.lemmatizer = lemmatizer as MorphLemmatizerLike;\n this.bigrams = bigrams;\n this.leftWeight = options.leftWeight ?? 1.0;\n this.rightWeight = options.rightWeight ?? 1.0;\n this.usePreferenceRules = options.usePreferenceRules ?? true;\n this.useGrammarRules = options.useGrammarRules ?? true;\n this.morphCache = this.lemmatizer.lemmatizeWithMorph ? new Map() : null;\n }\n\n private getMorph(word: string): LemmaWithMorph[] | undefined {\n if (!this.lemmatizer.lemmatizeWithMorph || !this.morphCache) return undefined;\n const key = word.toLowerCase();\n const cached = this.morphCache.get(key);\n if (cached) return cached;\n const morph = this.lemmatizer.lemmatizeWithMorph(word);\n this.morphCache.set(key, morph);\n return morph;\n }\n\n /**\n * Disambiguate a single word given context.\n *\n * @param word - The word to lemmatize\n * @param prevWord - Previous word (left context), or null\n * @param nextWord - Next word (right context), or null\n */\n disambiguate(\n word: string,\n prevWord: string | null,\n nextWord: string | null,\n hint: DisambiguationContextHint = {}\n ): DisambiguatedToken {\n // Get candidates with POS if available\n let candidatesWithPOS: LemmaWithPOS[];\n if (this.lemmatizer.lemmatizeWithPOS) {\n candidatesWithPOS = this.lemmatizer.lemmatizeWithPOS(word);\n } else {\n // Fall back to plain lemmatization\n const lemmas = this.lemmatizer.lemmatize(word);\n candidatesWithPOS = lemmas.map((l) => ({ lemma: l, pos: \"no\" as WordClass }));\n }\n\n const candidates = candidatesWithPOS.map((c) => c.lemma);\n const token = word;\n\n // Get morphological info for next word if available\n let nextWordMorph: LemmaWithMorph[] | undefined;\n if (nextWord) {\n nextWordMorph = this.getMorph(nextWord);\n }\n\n // Build context\n const context: DisambiguationContext = {\n prevWord,\n nextWord,\n prevLemmas: hint.prevLemmas,\n nextLemmas: hint.nextLemmas,\n nextWordMorph,\n allTokens: [word],\n index: 0,\n };\n\n // Run through phases\n for (const phase of PHASES) {\n const result = phase.run(candidatesWithPOS, context, this);\n if (result) {\n return {\n token,\n lemma: result.lemma,\n pos: result.pos,\n candidates,\n candidatesWithPOS,\n ambiguous: candidates.length > 1,\n confidence: result.confidence,\n resolvedBy: phase.name,\n };\n }\n }\n\n // Should never reach here due to fallback phase\n return {\n token,\n lemma: word.toLowerCase(),\n candidates,\n candidatesWithPOS,\n ambiguous: false,\n confidence: 0,\n resolvedBy: \"none\",\n };\n }\n\n /**\n * Disambiguate an array of tokens.\n *\n * @param tokens - Array of word tokens\n * @returns Array of disambiguated tokens\n */\n disambiguateAll(tokens: string[]): DisambiguatedToken[] {\n const results: DisambiguatedToken[] = [];\n\n for (let i = 0; i < tokens.length; i++) {\n const word = tokens[i];\n const prevWord = i > 0 ? tokens[i - 1] : null;\n const nextWord = i < tokens.length - 1 ? tokens[i + 1] : null;\n\n results.push(this.disambiguate(word, prevWord, nextWord));\n }\n\n return results;\n }\n\n /**\n * Extract unique lemmas from text with disambiguation.\n *\n * @param tokens - Array of word tokens\n * @returns Set of unique lemmas (best guess for each ambiguous word)\n */\n extractLemmas(tokens: string[]): Set<string> {\n const lemmas = new Set<string>();\n const disambiguated = this.disambiguateAll(tokens);\n\n for (const result of disambiguated) {\n lemmas.add(result.lemma);\n }\n\n return lemmas;\n }\n}\n\n/**\n * Shortcut for simple lemma extraction with disambiguation.\n */\nexport function extractDisambiguatedLemmas(\n text: string,\n lemmatizer: LemmatizerLike,\n bigrams: BigramProvider,\n options: {\n tokenize?: (text: string) => string[];\n removeStopwords?: boolean;\n } = {}\n): Set<string> {\n const { tokenize, removeStopwords } = options;\n\n // Tokenize\n const tokens = tokenize\n ? tokenize(text)\n : text\n .split(/\\s+/)\n .filter((t) => t.length > 0)\n .map((t) => t.replace(/^[^\\p{L}\\p{N}]+|[^\\p{L}\\p{N}]+$/gu, \"\"))\n .filter((t) => t.length > 0);\n\n // Disambiguate\n const disambiguator = new Disambiguator(lemmatizer, bigrams);\n const lemmas = disambiguator.extractLemmas(tokens);\n\n // Filter stopwords if requested\n if (removeStopwords) {\n for (const lemma of lemmas) {\n if (STOPWORDS_IS.has(lemma)) {\n lemmas.delete(lemma);\n }\n }\n }\n\n return lemmas;\n}\n","/**\n * Shared type definitions to avoid circular imports.\n */\n\n/**\n * Word class (part-of-speech) codes from BÍN.\n *\n * These are simplified from BÍN's detailed categories:\n * - kk/kvk/hk (gendered nouns) → 'no'\n * - pfn (personal pronoun) → 'fn'\n */\nexport type WordClass =\n | \"no\" // nafnorð (noun)\n | \"so\" // sagnorð (verb)\n | \"lo\" // lýsingarorð (adjective)\n | \"ao\" // atviksorð (adverb)\n | \"fs\" // forsetning (preposition)\n | \"fn\" // fornafn (pronoun)\n | \"st\" // samtenging (conjunction)\n | \"to\" // töluorð (numeral)\n | \"gr\" // greinir (article)\n | \"uh\"; // upphrópun (interjection)\n\n/**\n * Human-readable names for word classes.\n */\nexport const WORD_CLASS_NAMES: Record<WordClass, string> = {\n no: \"noun\",\n so: \"verb\",\n lo: \"adjective\",\n ao: \"adverb\",\n fs: \"preposition\",\n fn: \"pronoun\",\n st: \"conjunction\",\n to: \"numeral\",\n gr: \"article\",\n uh: \"interjection\",\n};\n\n/**\n * Icelandic names for word classes.\n */\nexport const WORD_CLASS_NAMES_IS: Record<WordClass, string> = {\n no: \"nafnorð\",\n so: \"sagnorð\",\n lo: \"lýsingarorð\",\n ao: \"atviksorð\",\n fs: \"forsetning\",\n fn: \"fornafn\",\n st: \"samtenging\",\n to: \"töluorð\",\n gr: \"greinir\",\n uh: \"upphrópun\",\n};\n\n/**\n * Grammatical case (fall) in Icelandic.\n */\nexport type GrammaticalCase = \"nf\" | \"þf\" | \"þgf\" | \"ef\";\n\n/**\n * Grammatical gender (kyn) in Icelandic.\n */\nexport type GrammaticalGender = \"kk\" | \"kvk\" | \"hk\";\n\n/**\n * Grammatical number (tala) in Icelandic.\n */\nexport type GrammaticalNumber = \"et\" | \"ft\";\n\n/**\n * Human-readable names for cases.\n */\nexport const CASE_NAMES: Record<GrammaticalCase, string> = {\n nf: \"nominative\",\n þf: \"accusative\",\n þgf: \"dative\",\n ef: \"genitive\",\n};\n\n/**\n * Human-readable names for genders.\n */\nexport const GENDER_NAMES: Record<GrammaticalGender, string> = {\n kk: \"masculine\",\n kvk: \"feminine\",\n hk: \"neuter\",\n};\n\n/**\n * Human-readable names for numbers.\n */\nexport const NUMBER_NAMES: Record<GrammaticalNumber, string> = {\n et: \"singular\",\n ft: \"plural\",\n};\n\n/**\n * Morphological features extracted from BÍN.\n */\nexport interface MorphFeatures {\n case?: GrammaticalCase;\n gender?: GrammaticalGender;\n number?: GrammaticalNumber;\n}\n\n/**\n * A lemma with its word class.\n */\nexport interface LemmaWithPOS {\n lemma: string;\n pos: WordClass;\n}\n\n/**\n * A lemma with word class and morphological features.\n */\nexport interface LemmaWithMorph extends LemmaWithPOS {\n morph?: MorphFeatures;\n}\n\n/**\n * Interface for lemmatizer-like objects.\n * Used to avoid circular dependency between modules.\n */\nexport interface LemmatizerLike {\n lemmatize(word: string): string[];\n lemmatizeWithPOS?(word: string): LemmaWithPOS[];\n}\n\n/**\n * Interface for bigram frequency lookup.\n * Used for disambiguation scoring.\n */\nexport interface BigramProvider {\n freq(word1: string, word2: string): number;\n}\n","/**\n * Minimal Bloom filter for compact set membership checks.\n */\n\nexport interface BloomFilterOptions {\n falsePositiveRate?: number;\n maxHashFunctions?: number;\n}\n\nexport class BloomFilter {\n private bits: Uint8Array;\n private sizeBits: number;\n private hashCount: number;\n\n private constructor(bits: Uint8Array, sizeBits: number, hashCount: number) {\n this.bits = bits;\n this.sizeBits = sizeBits;\n this.hashCount = hashCount;\n }\n\n static fromValues(values: string[], options: BloomFilterOptions = {}): BloomFilter {\n const n = Math.max(values.length, 1);\n const p = options.falsePositiveRate ?? 0.01;\n\n const m = Math.max(1, Math.ceil((-n * Math.log(p)) / (Math.LN2 * Math.LN2)));\n const k = Math.max(1, Math.round((m / n) * Math.LN2));\n const hashCount = options.maxHashFunctions\n ? Math.min(k, options.maxHashFunctions)\n : k;\n\n const bytes = Math.ceil(m / 8);\n const bits = new Uint8Array(bytes);\n const filter = new BloomFilter(bits, m, hashCount);\n\n for (const value of values) {\n filter.add(value);\n }\n\n return filter;\n }\n\n add(value: string): void {\n const [h1, h2] = this.hashes(value);\n for (let i = 0; i < this.hashCount; i++) {\n const combined = (h1 + i * h2) % this.sizeBits;\n this.setBit(combined);\n }\n }\n\n has(value: string): boolean {\n const [h1, h2] = this.hashes(value);\n for (let i = 0; i < this.hashCount; i++) {\n const combined = (h1 + i * h2) % this.sizeBits;\n if (!this.getBit(combined)) return false;\n }\n return true;\n }\n\n private setBit(index: number): void {\n const byteIndex = index >>> 3;\n const bit = index & 7;\n this.bits[byteIndex] |= 1 << bit;\n }\n\n private getBit(index: number): boolean {\n const byteIndex = index >>> 3;\n const bit = index & 7;\n return (this.bits[byteIndex] & (1 << bit)) !== 0;\n }\n\n private hashes(value: string): [number, number] {\n const str = value.toLowerCase();\n let hash1 = 2166136261 >>> 0;\n let hash2 = 2166136261 >>> 0;\n\n for (let i = 0; i < str.length; i++) {\n const code = str.charCodeAt(i);\n hash1 ^= code;\n hash1 = Math.imul(hash1, 16777619) >>> 0;\n\n hash2 ^= code;\n hash2 = Math.imul(hash2, 2166136261) >>> 0;\n }\n\n hash2 ^= hash2 >>> 13;\n hash2 = Math.imul(hash2, 0x85ebca6b) >>> 0;\n hash2 ^= hash2 >>> 16;\n\n return [hash1 >>> 0, hash2 >>> 0 || 0x27d4eb2d];\n }\n}\n","/**\n * Compound word splitting for Icelandic.\n *\n * Icelandic compounds are written as single words:\n * - \"bílstjóri\" = \"bíl\" (car) + \"stjóri\" (driver)\n * - \"sjúkrahús\" = \"sjúkra\" (sick-GEN) + \"hús\" (house)\n *\n * Strategy:\n * 1. Try splitting at each position\n * 2. Check if both parts are known words\n * 3. Handle common compound linking letters (s, u, a)\n * 4. Score by part lengths (prefer balanced splits)\n */\n\nimport type { LemmatizerLike } from \"./types.js\";\nimport { BloomFilter, type BloomFilterOptions } from \"./bloom.js\";\n\n/**\n * Protected lemmas that should NEVER be split as compounds.\n * Mostly place names that happen to end in common word parts.\n */\nexport const PROTECTED_LEMMAS = new Set([\n // Countries ending in -land\n \"ísland\",\n \"england\",\n \"írland\",\n \"skotland\",\n \"finnland\",\n \"grænland\",\n \"holland\",\n \"þýskaland\",\n \"frakkland\",\n \"pólland\",\n \"tékkland\",\n \"svissland\",\n \"rússland\",\n \"eistland\",\n \"lettland\",\n \"litháen\",\n // Other countries/regions\n \"danmörk\",\n \"noregur\",\n \"svíþjóð\",\n \"bandaríkin\",\n \"spánn\",\n \"portúgal\",\n \"ítalía\",\n \"grikkland\",\n // Icelandic place names (from BÍN)\n \"þingvellir\",\n \"akureyri\",\n \"ísafjörður\",\n \"reykjavík\",\n \"keflavík\",\n \"hafnarfjörður\",\n \"kópavogur\",\n \"seltjarnarnes\",\n \"garðabær\",\n \"mosfellsbær\",\n \"vestmannaeyjar\",\n \"húsavík\",\n \"sauðárkrókur\",\n \"siglufjörður\",\n \"ólafsfjörður\",\n \"dalvík\",\n \"egilsstaðir\",\n \"neskaupstaður\",\n \"seyðisfjörður\",\n \"eskifjörður\",\n \"reyðarfjörður\",\n \"fáskrúðsfjörður\",\n \"stöðvarfjörður\",\n \"djúpivogur\",\n \"höfn\",\n \"vík\",\n \"selfoss\",\n \"hveragerði\",\n \"þorlákshöfn\",\n \"grindavík\",\n \"sandgerði\",\n \"borgarnes\",\n \"stykkishólmur\",\n \"grundarfjörður\",\n \"ólafsvík\",\n \"búðardalur\",\n \"patreksfjörður\",\n \"flateyri\",\n \"suðureyri\",\n \"bolungarvík\",\n \"hólmavík\",\n \"hvammstangi\",\n \"blönduós\",\n \"skagaströnd\",\n \"varmahlíð\",\n // Literary/historical places\n \"hlíðarendi\",\n \"bergþórshvol\",\n // Company names\n \"íslandsbanki\",\n \"landsbankinn\",\n \"arionbanki\",\n // Institutions\n \"alþingi\",\n]);\n\nexport interface CompoundSplit {\n /** Original word */\n word: string;\n /** Constituent parts (lemmatized) - all variants for indexing */\n parts: string[];\n /** All index terms: parts + original word */\n indexTerms: string[];\n /** Split confidence (0-1) */\n confidence: number;\n /** Is this a compound? */\n isCompound: boolean;\n}\n\n/**\n * Splitting mode for compound words.\n *\n * - \"aggressive\": Try to split all words, even known BÍN entries\n * - \"balanced\": Split unknown words; split known words only if high confidence\n * - \"conservative\": Only split at hyphens or very high confidence cases\n */\nexport type CompoundSplitMode = \"aggressive\" | \"balanced\" | \"conservative\";\n\nexport interface CompoundSplitterOptions {\n /**\n * Minimum part length.\n * Default: 3. Set to 2 for more aggressive splitting (e.g., \"ís\" in \"ísland\").\n */\n minPartLength?: number;\n /** Try removing linking letters (s, u, a) */\n tryLinkingLetters?: boolean;\n /**\n * Splitting mode.\n * Default: \"balanced\"\n */\n mode?: CompoundSplitMode;\n}\n\n/**\n * Common compound tail words in Icelandic.\n * These are often the second part of compounds and boost split confidence.\n */\nconst COMMON_COMPOUND_TAILS = new Set([\n // People/roles\n \"maður\",\n \"kona\",\n \"stjóri\",\n \"ráðherra\",\n \"forseti\",\n \"formaður\",\n \"fulltrúi\",\n \"starfsmaður\",\n // Places\n \"hús\",\n \"staður\",\n \"vegur\",\n \"borg\",\n \"bær\",\n \"dalur\",\n \"fjörður\",\n // Organizations\n \"félag\",\n \"banki\",\n \"sjóður\",\n \"stofnun\",\n \"ráð\",\n // Things/concepts\n \"rannsókn\",\n \"greiðsla\",\n \"mál\",\n \"kerfi\",\n \"verk\",\n \"þjónusta\",\n \"rekstur\",\n \"viðskipti\",\n \"verð\",\n \"kostnaður\",\n]);\n\n/**\n * Very common standalone words that should rarely be compound parts.\n * Penalize splits where BOTH parts are common standalone words.\n */\nconst COMMON_STANDALONE = new Set([\n \"vera\",\n \"hafa\",\n \"gera\",\n \"fara\",\n \"koma\",\n \"segja\",\n \"vilja\",\n \"mega\",\n \"þurfa\",\n \"verða\",\n \"geta\",\n \"sjá\",\n \"taka\",\n \"eiga\",\n \"láta\",\n \"halda\",\n \"leyfa\",\n \"búa\",\n]);\n\n/**\n * Common compound linking patterns in Icelandic.\n * These letters often join compound parts:\n * - \"s\" (genitive): húss + eigandi -> \"húseigandi\"\n * - \"u\" (genitive/linking): vatnu + fall -> \"vatnufall\" (rare)\n * - \"a\" (genitive): daga + blað -> \"dagablað\"\n */\nconst LINKING_PATTERNS = [\"s\", \"u\", \"a\"];\n\nexport class CompoundSplitter {\n private lemmatizer: LemmatizerLike;\n private minPartLength: number;\n private tryLinkingLetters: boolean;\n private knownLemmas: KnownLemmaLookup;\n private mode: CompoundSplitMode;\n\n constructor(\n lemmatizer: LemmatizerLike,\n knownLemmas: KnownLemmaLookup,\n options: CompoundSplitterOptions = {}\n ) {\n this.lemmatizer = lemmatizer;\n this.knownLemmas = knownLemmas;\n this.minPartLength = options.minPartLength ?? 3;\n this.tryLinkingLetters = options.tryLinkingLetters ?? true;\n this.mode = options.mode ?? \"balanced\";\n }\n\n /**\n * Helper to create a no-split result.\n */\n private noSplit(word: string, lemmas: string[]): CompoundSplit {\n return {\n word,\n parts: lemmas,\n indexTerms: lemmas,\n confidence: 0,\n isCompound: false,\n };\n }\n\n /**\n * Try to split a word into compound parts.\n *\n * Uses a lookup-first strategy:\n * 1. Check protected lemmas - never split\n * 2. Check if word is known in BÍN and unambiguous - don't split\n * 3. Apply mode-based splitting rules\n */\n split(word: string): CompoundSplit {\n const normalized = word.toLowerCase();\n\n // Step 1: Check protected lemmas - never split these\n const directLemmas = this.lemmatizer.lemmatize(word);\n const primaryLemma = directLemmas[0]?.toLowerCase();\n if (primaryLemma && PROTECTED_LEMMAS.has(primaryLemma)) {\n return this.noSplit(word, directLemmas);\n }\n\n // Also check if the word itself is protected (for inflected forms)\n if (PROTECTED_LEMMAS.has(normalized)) {\n return this.noSplit(word, directLemmas);\n }\n\n // Step 2: Check if known in BÍN and unambiguous\n // A word is \"known\" if lemmatization returned something other than the word itself\n const isKnownWord =\n directLemmas.length > 0 && directLemmas[0].toLowerCase() !== normalized;\n const isUnambiguous = directLemmas.length === 1;\n\n // For conservative mode, only split at hyphens\n if (this.mode === \"conservative\") {\n if (word.includes(\"-\")) {\n return this.splitAtHyphen(word, directLemmas);\n }\n return this.noSplit(word, directLemmas);\n }\n\n // For balanced mode, don't split unambiguous known words\n if (this.mode === \"balanced\" && isKnownWord && isUnambiguous) {\n // Exception: still try if the word is very long (likely a compound)\n if (normalized.length < 12) {\n return this.noSplit(word, directLemmas);\n }\n }\n\n // Too short to be a compound\n if (normalized.length < this.minPartLength * 2) {\n return this.noSplit(word, directLemmas);\n }\n\n // Step 3: Try algorithmic splitting\n const candidates: {\n leftParts: string[];\n rightParts: string[];\n score: number;\n }[] = [];\n\n for (\n let i = this.minPartLength;\n i <= normalized.length - this.minPartLength;\n i++\n ) {\n const leftPart = normalized.slice(0, i);\n const rightPart = normalized.slice(i);\n\n // Try direct split\n const directResult = this.trySplit(leftPart, rightPart);\n if (directResult) {\n candidates.push(directResult);\n }\n\n // Try with linking letters removed from split point\n if (this.tryLinkingLetters) {\n for (const linker of LINKING_PATTERNS) {\n // Remove linking letter from end of left part\n if (leftPart.endsWith(linker) && leftPart.length > this.minPartLength) {\n const trimmedLeft = leftPart.slice(0, -1);\n const result = this.trySplit(trimmedLeft, rightPart);\n if (result) {\n // Slightly lower score for linked compounds\n candidates.push({ ...result, score: result.score * 0.95 });\n }\n }\n }\n }\n }\n\n if (candidates.length === 0) {\n return this.noSplit(word, directLemmas);\n }\n\n // Pick best candidate by score\n candidates.sort((a, b) => b.score - a.score);\n const best = candidates[0];\n\n // In balanced mode, require higher confidence for known words\n if (this.mode === \"balanced\" && isKnownWord && best.score < 0.6) {\n return this.noSplit(word, directLemmas);\n }\n\n // Collect all unique parts from best split\n const parts = [...new Set([...best.leftParts, ...best.rightParts])];\n // Index terms include parts + original word for search\n const indexTerms = [...new Set([...parts, normalized])];\n\n return {\n word,\n parts,\n indexTerms,\n confidence: Math.min(best.score, 1),\n isCompound: true,\n };\n }\n\n /**\n * Split a hyphenated word.\n */\n private splitAtHyphen(word: string, directLemmas: string[]): CompoundSplit {\n const parts = word.split(\"-\").filter((p) => p.length > 0);\n if (parts.length < 2) {\n return this.noSplit(word, directLemmas);\n }\n\n const allParts: string[] = [];\n for (const part of parts) {\n const lemmas = this.lemmatizer.lemmatize(part);\n allParts.push(...lemmas);\n }\n\n const uniqueParts = [...new Set(allParts)];\n const indexTerms = [...new Set([...uniqueParts, word.toLowerCase()])];\n\n return {\n word,\n parts: uniqueParts,\n indexTerms,\n confidence: 0.9,\n isCompound: true,\n };\n }\n\n private trySplit(\n leftPart: string,\n rightPart: string\n ): { leftParts: string[]; rightParts: string[]; score: number } | null {\n // Get lemmas for both parts\n const leftLemmas = this.lemmatizer.lemmatize(leftPart);\n const rightLemmas = this.lemmatizer.lemmatize(rightPart);\n\n // Filter to known lemmas only, deduplicated\n const leftKnown = [...new Set(leftLemmas.filter((l) => this.knownLemmas.has(l)))];\n const rightKnown = [...new Set(rightLemmas.filter((l) => this.knownLemmas.has(l)))];\n\n if (leftKnown.length === 0 || rightKnown.length === 0) {\n return null;\n }\n\n // Calculate score with multiple factors\n let score = 0;\n\n // Factor 1: Length balance (20% weight)\n // Prefer balanced splits, but not too strictly\n const lengthBalance =\n 1 - Math.abs(leftPart.length - rightPart.length) / (leftPart.length + rightPart.length);\n score += lengthBalance * 0.2;\n\n // Factor 2: Part length bonus (20% weight)\n // Prefer longer parts (more likely to be real words)\n const avgLength = (leftPart.length + rightPart.length) / 2;\n const lengthBonus = Math.min(avgLength / 6, 1);\n score += lengthBonus * 0.2;\n\n // Factor 3: Common compound tail bonus (30% weight)\n // Strongly prefer splits where right part is a known compound tail\n const hasCompoundTail = rightKnown.some((lemma) => COMMON_COMPOUND_TAILS.has(lemma));\n if (hasCompoundTail) {\n score += 0.3;\n }\n\n // Factor 4: Penalty for both parts being common standalone words (30% weight)\n // E.g., \"ísland\" -> \"ís\" + \"land\" should be penalized\n const leftIsCommon = leftKnown.some((lemma) => COMMON_STANDALONE.has(lemma));\n const rightIsCommon = rightKnown.some((lemma) => COMMON_STANDALONE.has(lemma));\n if (leftIsCommon && rightIsCommon) {\n // Strong penalty if both parts are very common standalone\n score -= 0.3;\n } else if (!leftIsCommon && !rightIsCommon) {\n // Bonus if neither is a common standalone (more likely a real compound)\n score += 0.2;\n }\n\n // Factor 5: Minimum part length requirement\n // Very short parts (2-3 chars) get a penalty\n if (leftPart.length < 4 || rightPart.length < 4) {\n score -= 0.15;\n }\n\n // Return all known lemmas from both parts\n return {\n leftParts: leftKnown,\n rightParts: rightKnown,\n score: Math.max(0, score), // Ensure non-negative\n };\n }\n\n /**\n * Get all lemmas for a word, including compound parts.\n * Useful for search indexing.\n */\n getAllLemmas(word: string): string[] {\n const split = this.split(word);\n return split.indexTerms;\n }\n}\n\n/**\n * Create a set of known lemmas from the lemmatizer.\n * This is used to check if compound parts are valid words.\n */\nexport function createKnownLemmaSet(lemmas: string[]): Set<string> {\n return new Set(lemmas.map((l) => l.toLowerCase()));\n}\n\nexport interface KnownLemmaLookup {\n has(lemma: string): boolean;\n}\n\nexport interface KnownLemmaFilterOptions extends BloomFilterOptions {}\n\n/**\n * Create a compact lookup for known lemmas using a Bloom filter.\n * False positives are possible (more splits), false negatives are not.\n */\nexport function createKnownLemmaFilter(\n lemmas: string[],\n options: KnownLemmaFilterOptions = {}\n): KnownLemmaLookup {\n const normalized = lemmas.map((l) => l.toLowerCase());\n return BloomFilter.fromValues(normalized, options);\n}\n","/**\n * Static multi-word phrases for Icelandic.\n *\n * Source: Extracted from GreynirEngine's Phrases.conf (MIT License)\n * https://github.com/mideind/GreynirEngine\n *\n * These phrases should be recognized as units rather than individual words,\n * enabling better stopword detection and lemmatization.\n */\n\n/**\n * A static phrase definition.\n */\nexport interface StaticPhrase {\n /** The canonical/lemma form of the phrase */\n lemma: string;\n /** Whether this phrase functions as a stopword (e.g., \"til dæmis\") */\n isStopword: boolean;\n /** Part of speech category */\n pos?: \"ao\" | \"fs\" | \"st\" | \"entity\";\n}\n\n/**\n * Common Icelandic multi-word phrases.\n * Keys are lowercase, normalized forms.\n */\nexport const STATIC_PHRASES: Map<string, StaticPhrase> = new Map([\n // Adverbial phrases (ao frasi) - often function as stopwords\n [\"til dæmis\", { lemma: \"til dæmi\", isStopword: true, pos: \"ao\" }],\n [\"með öðrum orðum\", { lemma: \"með annar orð\", isStopword: true, pos: \"ao\" }],\n [\"í raun\", { lemma: \"í raun\", isStopword: true, pos: \"ao\" }],\n [\"í raun og veru\", { lemma: \"í raun og vera\", isStopword: true, pos: \"ao\" }],\n [\"af og til\", { lemma: \"af og til\", isStopword: true, pos: \"ao\" }],\n [\"aftur á móti\", { lemma: \"aftur á mót\", isStopword: true, pos: \"ao\" }],\n [\"alla vega\", { lemma: \"allur vegur\", isStopword: true, pos: \"ao\" }],\n [\"alls ekki\", { lemma: \"alls ekki\", isStopword: true, pos: \"ao\" }],\n [\"alls staðar\", { lemma: \"allur staður\", isStopword: true, pos: \"ao\" }],\n [\"allt í allt\", { lemma: \"allur í allur\", isStopword: true, pos: \"ao\" }],\n [\"annars vegar\", { lemma: \"annar vegur\", isStopword: true, pos: \"ao\" }],\n [\"auk þess\", { lemma: \"auk það\", isStopword: true, pos: \"ao\" }],\n [\"að auki\", { lemma: \"að auki\", isStopword: true, pos: \"ao\" }],\n [\"að vísu\", { lemma: \"að vís\", isStopword: true, pos: \"ao\" }],\n [\"að sjálfsögðu\", { lemma: \"að sjálfsagður\", isStopword: true, pos: \"ao\" }],\n [\"að minnsta kosti\", { lemma: \"að lítill kostur\", isStopword: true, pos: \"ao\" }],\n [\"að öllu leyti\", { lemma: \"að allur leyti\", isStopword: true, pos: \"ao\" }],\n [\"að nokkru leyti\", { lemma: \"að nokkur leyti\", isStopword: true, pos: \"ao\" }],\n [\"ef til vill\", { lemma: \"ef til vilja\", isStopword: true, pos: \"ao\" }],\n [\"einhvers staðar\", { lemma: \"einhver staður\", isStopword: true, pos: \"ao\" }],\n [\"einhvern veginn\", { lemma: \"einhver vegur\", isStopword: true, pos: \"ao\" }],\n [\"ekki síst\", { lemma: \"ekki síður\", isStopword: true, pos: \"ao\" }],\n [\"engu að síður\", { lemma: \"enginn að síður\", isStopword: true, pos: \"ao\" }],\n [\"fyrst og fremst\", { lemma: \"snemma og fremri\", isStopword: true, pos: \"ao\" }],\n [\"hins vegar\", { lemma: \"hinn vegur\", isStopword: true, pos: \"ao\" }],\n [\"hér og þar\", { lemma: \"hér og þar\", isStopword: true, pos: \"ao\" }],\n [\"hér um bil\", { lemma: \"hér um bil\", isStopword: true, pos: \"ao\" }],\n [\"hér á landi\", { lemma: \"hér á land\", isStopword: true, pos: \"ao\" }],\n [\"hvað mest\", { lemma: \"hvað mjög\", isStopword: true, pos: \"ao\" }],\n [\"hverju sinni\", { lemma: \"hver sinn\", isStopword: true, pos: \"ao\" }],\n [\"hvorki né\", { lemma: \"hvorki né\", isStopword: true, pos: \"ao\" }],\n [\"í burtu\", { lemma: \"í burtu\", isStopword: true, pos: \"ao\" }],\n [\"í gær\", { lemma: \"í gær\", isStopword: true, pos: \"ao\" }],\n [\"í senn\", { lemma: \"í senn\", isStopword: true, pos: \"ao\" }],\n [\"í sífellu\", { lemma: \"í sífella\", isStopword: true, pos: \"ao\" }],\n [\"lengi vel\", { lemma: \"lengi vel\", isStopword: true, pos: \"ao\" }],\n [\"meira að segja\", { lemma: \"mikill að segja\", isStopword: true, pos: \"ao\" }],\n [\"meira og minna\", { lemma: \"mikill og lítill\", isStopword: true, pos: \"ao\" }],\n [\"meðal annars\", { lemma: \"meðal annar\", isStopword: true, pos: \"ao\" }],\n [\"nokkurn veginn\", { lemma: \"nokkur vegur\", isStopword: true, pos: \"ao\" }],\n [\"og svo framvegis\", { lemma: \"og svo framvegis\", isStopword: true, pos: \"ao\" }],\n [\"satt að segja\", { lemma: \"sannur að segja\", isStopword: true, pos: \"ao\" }],\n [\"sem betur fer\", { lemma: \"sem vel fara\", isStopword: true, pos: \"ao\" }],\n [\"smám saman\", { lemma: \"smátt saman\", isStopword: true, pos: \"ao\" }],\n [\"svo sem\", { lemma: \"svo sem\", isStopword: true, pos: \"ao\" }],\n [\"sér í lagi\", { lemma: \"sér í lag\", isStopword: true, pos: \"ao\" }],\n [\"til og frá\", { lemma: \"til og frá\", isStopword: true, pos: \"ao\" }],\n [\"til baka\", { lemma: \"til baka\", isStopword: true, pos: \"ao\" }],\n [\"vítt og breitt\", { lemma: \"vítt og breitt\", isStopword: true, pos: \"ao\" }],\n [\"á ný\", { lemma: \"á ný\", isStopword: true, pos: \"ao\" }],\n [\"á meðan\", { lemma: \"á meðan\", isStopword: true, pos: \"ao\" }],\n [\"á sama tíma\", { lemma: \"á samur tími\", isStopword: true, pos: \"ao\" }],\n [\"á hinn bóginn\", { lemma: \"á hinn bógur\", isStopword: true, pos: \"ao\" }],\n [\"þar af leiðandi\", { lemma: \"þar af leiða\", isStopword: true, pos: \"ao\" }],\n [\"þar að auki\", { lemma: \"þar að auki\", isStopword: true, pos: \"ao\" }],\n [\"það er að segja\", { lemma: \"það vera að segja\", isStopword: true, pos: \"ao\" }],\n [\"þess vegna\", { lemma: \"það vegna\", isStopword: true, pos: \"ao\" }],\n [\"því miður\", { lemma: \"það lítt\", isStopword: true, pos: \"ao\" }],\n [\"þrátt fyrir\", { lemma: \"þrátt fyrir\", isStopword: true, pos: \"ao\" }],\n\n // Time expressions\n [\"á dögunum\", { lemma: \"á dagur\", isStopword: true, pos: \"ao\" }],\n [\"á sínum tíma\", { lemma: \"á sinn tími\", isStopword: true, pos: \"ao\" }],\n [\"á endanum\", { lemma: \"á endi\", isStopword: true, pos: \"ao\" }],\n [\"einu sinni\", { lemma: \"einn sinn\", isStopword: false, pos: \"ao\" }],\n [\"eitt sinn\", { lemma: \"einn sinn\", isStopword: false, pos: \"ao\" }],\n [\"í fyrsta sinn\", { lemma: \"í fyrstur sinn\", isStopword: false, pos: \"ao\" }],\n [\"í kvöld\", { lemma: \"í kvöld\", isStopword: false, pos: \"ao\" }],\n [\"í morgun\", { lemma: \"í morgunn\", isStopword: false, pos: \"ao\" }],\n [\"á morgun\", { lemma: \"á morgunn\", isStopword: false, pos: \"ao\" }],\n\n // Prepositional phrases (fs frasi)\n [\"fyrir hönd\", { lemma: \"fyrir hönd\", isStopword: false, pos: \"fs\" }],\n [\"með tilliti til\", { lemma: \"með tillit til\", isStopword: false, pos: \"fs\" }],\n [\"í ljósi\", { lemma: \"í ljós\", isStopword: false, pos: \"fs\" }],\n [\"í stað\", { lemma: \"í staður\", isStopword: false, pos: \"fs\" }],\n [\"fyrir aftan\", { lemma: \"fyrir aftan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir austan\", { lemma: \"fyrir austan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir framan\", { lemma: \"fyrir framan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir handan\", { lemma: \"fyrir handan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir innan\", { lemma: \"fyrir innan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir neðan\", { lemma: \"fyrir neðan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir norðan\", { lemma: \"fyrir norðan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir ofan\", { lemma: \"fyrir ofan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir sunnan\", { lemma: \"fyrir sunnan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir utan\", { lemma: \"fyrir utan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir vestan\", { lemma: \"fyrir vestan\", isStopword: false, pos: \"fs\" }],\n [\"í gegnum\", { lemma: \"í gegnum\", isStopword: false, pos: \"fs\" }],\n [\"í kringum\", { lemma: \"í kringum\", isStopword: false, pos: \"fs\" }],\n [\"innan við\", { lemma: \"innan við\", isStopword: false, pos: \"fs\" }],\n [\"upp úr\", { lemma: \"upp úr\", isStopword: false, pos: \"fs\" }],\n [\"þvert á\", { lemma: \"þvert á\", isStopword: false, pos: \"fs\" }],\n\n // Conjunction-like phrases (st frasi)\n [\"þar eð\", { lemma: \"þar eð\", isStopword: true, pos: \"st\" }],\n\n // Named entities - organizations/institutions (NOT stopwords)\n [\"sameinuðu þjóðirnar\", { lemma: \"Sameinuðu þjóðirnar\", isStopword: false, pos: \"entity\" }],\n [\"evrópusambandið\", { lemma: \"Evrópusambandið\", isStopword: false, pos: \"entity\" }],\n [\"nato\", { lemma: \"NATO\", isStopword: false, pos: \"entity\" }],\n [\"nató\", { lemma: \"NATO\", isStopword: false, pos: \"entity\" }],\n]);\n\n/**\n * Check if a phrase starting at the given position exists.\n * Returns the phrase info and length if found, null otherwise.\n */\nexport function matchPhrase(\n words: string[],\n startIndex: number\n): { phrase: StaticPhrase; wordCount: number } | null {\n // Try longest matches first (up to 4 words)\n for (let len = Math.min(4, words.length - startIndex); len >= 2; len--) {\n const phraseWords = words.slice(startIndex, startIndex + len);\n const phraseKey = phraseWords.join(\" \").toLowerCase();\n const phrase = STATIC_PHRASES.get(phraseKey);\n if (phrase) {\n return { phrase, wordCount: len };\n }\n }\n return null;\n}\n\n/**\n * Check if a normalized string is a known phrase.\n */\nexport function isKnownPhrase(text: string): boolean {\n return STATIC_PHRASES.has(text.toLowerCase());\n}\n\n/**\n * Get phrase info for a normalized string.\n */\nexport function getPhraseInfo(text: string): StaticPhrase | undefined {\n return STATIC_PHRASES.get(text.toLowerCase());\n}\n","/**\n * Token normalization functions for indexing non-word token types.\n *\n * Normalized forms preserve type indicators (like # for hashtags) so that\n * searches require the same syntax - searching \"iceland\" won't match \"#iceland\".\n */\n\nimport type { Token } from \"tokenize-is\";\n\n/**\n * Normalize a token to indexable string values.\n *\n * @param token - Token from tokenize-is\n * @returns Array of normalized strings for indexing (may be empty)\n */\nexport function normalizeToken(token: Token): string[] {\n switch (token.kind) {\n case \"telno\":\n // Phone: preserve + prefix if country code present\n return [token.cc ? `+${token.cc}${token.number}` : token.number];\n\n case \"email\":\n // Email: lowercase (already type-distinct due to @ in middle)\n return [token.text.toLowerCase()];\n\n case \"url\":\n // URL: as-is (protocol makes it distinct)\n return [token.text];\n\n case \"domain\":\n // Domain: lowercase (TLD makes it recognizable)\n return [token.text.toLowerCase()];\n\n case \"date\":\n case \"dateabs\":\n case \"daterel\": {\n // Date: ISO format YYYY-MM-DD or MM-DD if no year\n const y = token.year || 0;\n const m = String(token.month).padStart(2, \"0\");\n const d = String(token.day).padStart(2, \"0\");\n return y > 0 ? [`${y}-${m}-${d}`] : [`${m}-${d}`];\n }\n\n case \"time\": {\n // Time: HH:MM or HH:MM:SS if seconds present\n const h = String(token.hour).padStart(2, \"0\");\n const m = String(token.minute).padStart(2, \"0\");\n if (token.second > 0) {\n return [`${h}:${m}:${String(token.second).padStart(2, \"0\")}`];\n }\n return [`${h}:${m}`];\n }\n\n case \"timestamp\":\n case \"timestampabs\":\n case \"timestamprel\": {\n // Timestamp: ISO format YYYY-MM-DDTHH:MM:SS\n const date = `${token.year}-${String(token.month).padStart(2, \"0\")}-${String(token.day).padStart(2, \"0\")}`;\n const time = `${String(token.hour).padStart(2, \"0\")}:${String(token.minute).padStart(2, \"0\")}:${String(token.second).padStart(2, \"0\")}`;\n return [`${date}T${time}`];\n }\n\n case \"ssn\":\n // SSN (kennitala): format with dash (DDMMYY-NNNN)\n return [`${token.value.slice(0, 6)}-${token.value.slice(6)}`];\n\n case \"amount\":\n // Amount: combined \"value currency\" for precise matching\n return [`${token.value} ${token.currency}`];\n\n case \"measurement\":\n // Measurement: combined \"value unit\" for precise matching\n return [`${token.value} ${token.unit}`];\n\n case \"percent\":\n // Percent: keep % suffix to distinguish from plain numbers\n return [`${token.value}%`];\n\n case \"hashtag\":\n // Hashtag: keep # prefix, lowercase value\n return [`#${token.text.slice(1).toLowerCase()}`];\n\n case \"username\":\n // Username: keep @ prefix, lowercase value\n return [`@${token.username.toLowerCase()}`];\n\n case \"year\":\n // Year: as string (4-digit format is recognizable)\n return [String(token.value)];\n\n case \"number\":\n case \"ordinal\":\n // Number/ordinal: as string (caller decides if to include)\n return [String(token.value)];\n\n default:\n return [];\n }\n}\n","/**\n * Unified text processing pipeline integrating tokenize-is with lemmatization.\n *\n * Provides proper tokenization that handles Icelandic-specific patterns\n * (abbreviations, dates, times, etc.) before lemmatization.\n */\n\nimport { tokenize, type Token } from \"tokenize-is\";\nimport { Disambiguator, type DisambiguatedToken } from \"./disambiguate.js\";\nimport { CompoundSplitter, type CompoundSplit } from \"./compounds.js\";\nimport { STOPWORDS_IS, isContextualStopword } from \"./stopwords.js\";\nimport { normalizeToken } from \"./normalizers.js\";\nimport type { LemmatizerLike, BigramProvider } from \"./types.js\";\n\n/**\n * Token kinds that should be lemmatized.\n */\nconst LEMMATIZABLE_KINDS = new Set([\"word\"]);\n\n/**\n * Token kinds that represent named entities (skip lemmatization).\n */\nconst ENTITY_KINDS = new Set([\"person\", \"company\", \"entity\"]);\n\n/**\n * Token kinds to skip entirely (not useful for indexing).\n */\nconst SKIP_KINDS = new Set([\n \"punctuation\",\n \"s_begin\",\n \"s_end\",\n \"s_split\",\n \"unknown\",\n]);\n\n/**\n * Icelandic case suffixes to strip from unknown words (longest first).\n * Ordered by length to prefer longer matches.\n */\nconst UNKNOWN_SUFFIXES = [\n // Definite + case (longest)\n \"arinnar\", // fem gen def\n \"inum\", // masc/neut dat def\n \"anna\", // gen pl def\n \"unum\", // dat pl def\n \"sins\", // masc gen def (rarely used on foreign names)\n // Definite\n \"inn\", // masc nom def\n \"ins\", // masc/neut gen def\n \"ið\", // neut nom/acc def\n \"in\", // fem nom def\n // Case endings (common on foreign names)\n \"um\", // dat pl / dat sg (rare)\n \"ir\", // masc nom pl\n \"ar\", // fem nom pl / masc gen sg\n \"ur\", // masc nom sg\n \"s\", // genitive (very common on foreign names: Simons, Obamas)\n \"a\", // fem acc/dat sg, weak masc acc sg\n \"i\", // dat sg (weak), masc nom pl (some classes)\n];\n\nconst MIN_UNKNOWN_WORD_LENGTH = 4;\nconst MIN_STRIPPED_LENGTH = 2;\nconst MAX_SUFFIX_STRIPS = 2;\n\n/**\n * A processed token with lemmatization results.\n */\nexport interface ProcessedToken {\n /** Original token text */\n original: string;\n /** Token kind from tokenize-is */\n kind: string;\n /** Candidate lemmas (for word tokens) */\n lemmas: string[];\n /** Is this a named entity? */\n isEntity: boolean;\n /** Best lemma guess after disambiguation */\n disambiguated?: string;\n /** Disambiguation confidence (0-1) */\n confidence?: number;\n /** Compound split result if applicable */\n compoundSplit?: CompoundSplit;\n /** Lemmas derived from compound parts (if any) */\n compoundLemmas?: string[];\n}\n\n/**\n * Options for text processing.\n */\nexport interface ProcessOptions {\n /** Bigram provider for disambiguation */\n bigrams?: BigramProvider;\n /** Compound splitter for compound word detection */\n compoundSplitter?: CompoundSplitter;\n /** Remove stopwords from results */\n removeStopwords?: boolean;\n /**\n * Use contextual stopword detection (requires POS info).\n * When true, words like \"á\" are only filtered as stopwords when used\n * as prepositions, not when used as verbs (\"eiga\") or nouns (river).\n * Default: false (use simple stopword list)\n */\n useContextualStopwords?: boolean;\n /** Include numbers in results */\n includeNumbers?: boolean;\n /**\n * Index all candidate lemmas, not just the disambiguated one.\n * Better recall for search (finds more matches), worse precision.\n * Set to false if you only want the most likely lemma.\n * Default: true\n */\n indexAllCandidates?: boolean;\n /**\n * Try compound splitting even for known words.\n * Useful when BÍN contains the compound but you still want parts indexed.\n * Set to false to only split unknown words.\n * Default: true\n */\n alwaysTryCompounds?: boolean;\n /**\n * Strip Icelandic suffixes from unknown words to find base forms.\n * Useful for foreign names: \"Simons\" → \"simon\", \"Obamas\" → \"obama\".\n * Default: true\n */\n stripUnknownSuffixes?: boolean;\n}\n\n/**\n * Process text through the full pipeline.\n *\n * @param text - Input text\n * @param lemmatizer - Lemmatizer instance\n * @param options - Processing options\n * @returns Array of processed tokens\n */\nexport function processText(\n text: string,\n lemmatizer: LemmatizerLike,\n options: ProcessOptions = {}\n): ProcessedToken[] {\n const {\n bigrams,\n compoundSplitter,\n includeNumbers = false,\n alwaysTryCompounds = true,\n stripUnknownSuffixes = true,\n } = options;\n\n // Step 1: Tokenize\n const tokens = tokenize(text);\n\n // Step 2: Process each token\n const results: ProcessedToken[] = [];\n const wordTokens: { index: number; token: Token }[] = [];\n const lemmaCache = new Map<string, string[]>();\n\n const isUnknownLemma = (raw: string, lemmas: string[]): boolean =>\n lemmas.length === 1 && lemmas[0] === raw.toLowerCase();\n\n /**\n * Try stripping Icelandic suffixes from unknown words.\n * Returns known lemmas if found, otherwise returns the stripped form\n * as a candidate (useful for foreign names like \"Simons\" → \"simon\").\n */\n const trySuffixFallback = (raw: string): string[] | null => {\n let current = raw;\n const candidates: string[] = [];\n\n for (let attempt = 0; attempt < MAX_SUFFIX_STRIPS; attempt++) {\n const lower = current.toLowerCase();\n let bestStripped: string | null = null;\n\n for (const suffix of UNKNOWN_SUFFIXES) {\n if (!lower.endsWith(suffix)) continue;\n\n const stripped = current.slice(0, current.length - suffix.length);\n if (stripped.length < MIN_STRIPPED_LENGTH) continue;\n\n const strippedLemmas = lemmatizer.lemmatize(stripped);\n\n // If stripped form is known, return those lemmas\n if (!isUnknownLemma(stripped, strippedLemmas)) {\n // Include any candidates we've collected plus the known lemmas\n return [...new Set([...candidates, ...strippedLemmas])];\n }\n\n // Track the first valid stripped form for this iteration\n if (!bestStripped) {\n bestStripped = stripped;\n // Add the stripped form as a candidate (for foreign names)\n candidates.push(stripped.toLowerCase());\n }\n }\n\n // If we found a stripped form but it's still unknown, try stripping again\n if (!bestStripped || bestStripped.length < MIN_UNKNOWN_WORD_LENGTH) {\n break;\n }\n\n current = bestStripped;\n }\n\n // Return collected candidates if any (stripped unknown forms)\n return candidates.length > 0 ? [...new Set(candidates)] : null;\n };\n\n const getLemmas = (raw: string): string[] => {\n const key = raw.toLowerCase();\n const cached = lemmaCache.get(key);\n if (cached) return cached;\n\n const lemmas = lemmatizer.lemmatize(raw);\n\n // For unknown words, try suffix stripping to find base forms.\n // Must check BOTH isUnknownLemma (returns self) AND !isKnown (not in dictionary)\n // to avoid stripping suffixes from words like \"fyrir\" that are their own lemma.\n if (\n stripUnknownSuffixes &&\n isUnknownLemma(raw, lemmas) &&\n !lemmatizer.isKnown(raw) &&\n raw.length >= MIN_UNKNOWN_WORD_LENGTH\n ) {\n const fallbackLemmas = trySuffixFallback(raw);\n if (fallbackLemmas) {\n // Include both original lowercased and stripped forms\n const combined = [...new Set([...lemmas, ...fallbackLemmas])];\n lemmaCache.set(key, combined);\n return combined;\n }\n }\n\n lemmaCache.set(key, lemmas);\n return lemmas;\n };\n\n for (let i = 0; i < tokens.length; i++) {\n const token = tokens[i];\n\n // Skip unwanted tokens\n if (SKIP_KINDS.has(token.kind)) {\n continue;\n }\n\n // Handle named entities\n if (ENTITY_KINDS.has(token.kind)) {\n results.push({\n original: token.text ?? \"\",\n kind: token.kind,\n lemmas: [],\n isEntity: true,\n });\n continue;\n }\n\n // Handle word tokens\n if (LEMMATIZABLE_KINDS.has(token.kind)) {\n const tokenText = token.text ?? \"\";\n const lemmas = getLemmas(tokenText);\n\n const processed: ProcessedToken = {\n original: tokenText,\n kind: token.kind,\n lemmas,\n isEntity: false,\n };\n\n // Try compound splitting\n // - Always if alwaysTryCompounds is set (for better search recall)\n // - Otherwise only if lemmatization returns unknown word\n const isUnknownWord = lemmas.length === 1 && lemmas[0] === tokenText.toLowerCase();\n\n // Split unknown hyphenated words (e.g., \"COVID-sýking\" → \"covid\" + \"sýking\")\n if (isUnknownWord && tokenText.includes(\"-\")) {\n const hyphenParts = tokenText.split(\"-\");\n const partLemmas: string[] = [];\n for (const part of hyphenParts) {\n if (part.length > 0) {\n partLemmas.push(...getLemmas(part));\n }\n }\n if (partLemmas.length > 0) {\n processed.lemmas = [...new Set([...lemmas, ...partLemmas])];\n }\n }\n\n if (compoundSplitter && (alwaysTryCompounds || isUnknownWord)) {\n const split = compoundSplitter.split(tokenText);\n if (split.isCompound) {\n processed.compoundSplit = split;\n // Add component lemmas from parts (in addition to direct lemmas)\n const partLemmas = split.parts.flatMap((c) => getLemmas(c));\n processed.compoundLemmas = partLemmas;\n processed.lemmas = [...new Set([...lemmas, ...partLemmas])];\n }\n }\n\n results.push(processed);\n wordTokens.push({ index: results.length - 1, token });\n continue;\n }\n\n // Handle non-word tokens with normalization (numbers, dates, URLs, etc.)\n const normalized = normalizeToken(token);\n if (normalized.length > 0) {\n // Numbers/ordinals only included if includeNumbers is set\n if ((token.kind === \"number\" || token.kind === \"ordinal\") && !includeNumbers) {\n continue;\n }\n results.push({\n original: token.text ?? \"\",\n kind: token.kind,\n lemmas: normalized,\n isEntity: false,\n });\n continue;\n }\n\n // Pass through other tokens with no normalization\n results.push({\n original: token.text ?? \"\",\n kind: token.kind,\n lemmas: [],\n isEntity: false,\n });\n }\n\n // Step 3: Disambiguate if we have bigram data\n if (bigrams && wordTokens.length > 0) {\n const disambiguator = new Disambiguator(lemmatizer, bigrams);\n\n for (let i = 0; i < wordTokens.length; i++) {\n const { index, token } = wordTokens[i];\n const prevToken = i > 0 ? wordTokens[i - 1].token : null;\n const nextToken = i < wordTokens.length - 1 ? wordTokens[i + 1].token : null;\n\n const result = disambiguator.disambiguate(\n token.text ?? \"\",\n prevToken?.text ?? null,\n nextToken?.text ?? null,\n {\n prevLemmas: prevToken?.text ? getLemmas(prevToken.text) : undefined,\n nextLemmas: nextToken?.text ? getLemmas(nextToken.text) : undefined,\n }\n );\n\n results[index].disambiguated = result.lemma;\n results[index].confidence = result.confidence;\n }\n } else {\n // No disambiguation - use first lemma\n for (const { index } of wordTokens) {\n const processed = results[index];\n if (processed.lemmas.length > 0) {\n processed.disambiguated = processed.lemmas[0];\n processed.confidence = processed.lemmas.length === 1 ? 1.0 : 0.5;\n }\n }\n }\n\n return results;\n}\n\n/**\n * Extract unique indexable lemmas from text.\n *\n * @param text - Input text\n * @param lemmatizer - Lemmatizer instance\n * @param options - Processing options\n * @returns Set of unique lemmas suitable for search indexing\n */\nexport function extractIndexableLemmas(\n text: string,\n lemmatizer: LemmatizerLike,\n options: ProcessOptions = {}\n): Set<string> {\n const {\n removeStopwords = false,\n indexAllCandidates = true,\n useContextualStopwords = false,\n } = options;\n\n const processed = processText(text, lemmatizer, options);\n const lemmas = new Set<string>();\n\n /**\n * Check if a lemma should be filtered as a stopword.\n * Uses contextual rules when enabled and POS is available.\n */\n const shouldFilter = (lemma: string, pos?: string): boolean => {\n if (!removeStopwords) return false;\n if (useContextualStopwords) {\n return isContextualStopword(lemma, pos);\n }\n return STOPWORDS_IS.has(lemma);\n };\n\n for (const token of processed) {\n // Skip entities\n if (token.isEntity) {\n continue;\n }\n\n if (indexAllCandidates) {\n // Index ALL candidate lemmas for better search recall\n for (const lemma of token.lemmas) {\n if (!shouldFilter(lemma)) {\n lemmas.add(lemma);\n }\n }\n } else {\n // Use disambiguated lemma if available (better precision)\n if (token.disambiguated) {\n // Note: We don't have POS info easily available in disambiguated result\n // This would need enhancement to pass through POS from disambiguation\n if (!shouldFilter(token.disambiguated)) {\n lemmas.add(token.disambiguated);\n }\n }\n }\n\n // Also add compound parts if split\n if (token.compoundSplit?.isCompound) {\n const partLemmas = token.compoundLemmas\n ? token.compoundLemmas\n : token.compoundSplit.parts.flatMap((p) => lemmatizer.lemmatize(p));\n for (const lemma of partLemmas) {\n if (!shouldFilter(lemma)) {\n lemmas.add(lemma);\n }\n }\n }\n }\n\n return lemmas;\n}\n\n/**\n * Options for building a backend-agnostic boolean search query.\n */\nexport interface SearchQueryOptions extends ProcessOptions {\n /** Operator between token groups (AND). Default: \" & \" */\n andOperator?: string;\n /** Operator between candidate lemmas within a group (OR). Default: \" | \" */\n orOperator?: string;\n /** Wrap groups with multiple terms in parentheses. Default: true */\n wrapGroups?: boolean;\n /**\n * Include the original token (lowercased) in each group for recall.\n * Useful for unknown words or when you want a fallback.\n * Default: false\n */\n includeOriginal?: boolean;\n /** Lowercase original tokens when includeOriginal is true. Default: true */\n lowercaseOriginal?: boolean;\n}\n\n/**\n * Result for a backend-agnostic boolean search query.\n */\nexport interface SearchQueryResult {\n /** Lemma groups per token (OR within group, AND between groups) */\n groups: string[][];\n /** Boolean query string using provided operators */\n query: string;\n}\n\n/**\n * Build a backend-agnostic boolean query string from user input.\n *\n * Use the same lemmatization pipeline as indexing, then:\n * - OR within a token's candidate lemmas\n * - AND across tokens\n *\n * @param text - User search input\n * @param lemmatizer - Lemmatizer instance\n * @param options - Query + processing options\n */\nexport function buildSearchQuery(\n text: string,\n lemmatizer: LemmatizerLike,\n options: SearchQueryOptions = {}\n): SearchQueryResult {\n const {\n removeStopwords = false,\n indexAllCandidates = true,\n useContextualStopwords = false,\n andOperator = \" & \",\n orOperator = \" | \",\n wrapGroups = true,\n includeOriginal = false,\n lowercaseOriginal = true,\n } = options;\n\n const processed = processText(text, lemmatizer, options);\n const groups: string[][] = [];\n\n /**\n * Check if a lemma should be filtered as a stopword.\n * Uses contextual rules when enabled and POS is available.\n */\n const shouldFilter = (lemma: string, pos?: string): boolean => {\n if (!removeStopwords) return false;\n if (useContextualStopwords) {\n return isContextualStopword(lemma, pos);\n }\n return STOPWORDS_IS.has(lemma);\n };\n\n for (const token of processed) {\n // Mirror indexing behavior: skip entities\n if (token.isEntity) continue;\n\n let candidates: string[] = [];\n if (indexAllCandidates) {\n candidates = token.lemmas;\n } else if (token.disambiguated) {\n candidates = [token.disambiguated];\n }\n\n if (includeOriginal) {\n const raw = token.original ?? \"\";\n if (raw.length > 0) {\n const original = lowercaseOriginal ? raw.toLowerCase() : raw;\n candidates = [...candidates, original];\n }\n }\n\n const unique = [\n ...new Set(candidates.filter((lemma) => lemma && !shouldFilter(lemma))),\n ];\n\n if (unique.length > 0) {\n groups.push(unique);\n }\n }\n\n const query = groups\n .map((group) => {\n const joined = group.join(orOperator);\n if (wrapGroups && group.length > 1) {\n return `(${joined})`;\n }\n return joined;\n })\n .filter((part) => part.length > 0)\n .join(andOperator);\n\n return { groups, query };\n}\n\n/**\n * Strategy for benchmark comparisons.\n */\nexport type ProcessingStrategy = \"naive\" | \"tokenized\" | \"disambiguated\" | \"full\";\n\n/**\n * Metrics from processing a text.\n */\nexport interface ProcessingMetrics {\n /** Total word count */\n wordCount: number;\n /** Words successfully lemmatized (not returned as-is) */\n lemmatizedCount: number;\n /** Coverage: lemmatized / total */\n coverage: number;\n /** Words with multiple candidate lemmas */\n ambiguousCount: number;\n /** Ambiguity rate: ambiguous / total */\n ambiguityRate: number;\n /** Average disambiguation confidence */\n avgConfidence: number;\n /** Compounds detected and split */\n compoundsFound: number;\n /** Named entities skipped */\n entitiesSkipped: number;\n /** Unique lemmas extracted */\n uniqueLemmas: number;\n /** Processing time in milliseconds */\n timeMs: number;\n}\n\n/**\n * Run benchmark with a specific strategy and collect metrics.\n */\nexport function runBenchmark(\n text: string,\n lemmatizer: LemmatizerLike,\n strategy: ProcessingStrategy,\n resources: {\n bigrams?: BigramProvider;\n compoundSplitter?: CompoundSplitter;\n } = {}\n): ProcessingMetrics {\n const start = performance.now();\n\n let processed: ProcessedToken[];\n let lemmas: Set<string>;\n\n switch (strategy) {\n case \"naive\": {\n // Simple whitespace split + lemmatize\n const tokens = text.split(/\\s+/).filter((t) => t.length > 0);\n const naiveProcessed: ProcessedToken[] = [];\n\n for (const token of tokens) {\n const cleaned = token.replace(/^[^\\p{L}\\p{N}]+|[^\\p{L}\\p{N}]+$/gu, \"\");\n if (cleaned) {\n const tokenLemmas = lemmatizer.lemmatize(cleaned);\n naiveProcessed.push({\n original: cleaned,\n kind: \"word\",\n lemmas: tokenLemmas,\n isEntity: false,\n disambiguated: tokenLemmas[0],\n confidence: tokenLemmas.length === 1 ? 1.0 : 0.5,\n });\n }\n }\n processed = naiveProcessed;\n lemmas = new Set(naiveProcessed.map((p) => p.disambiguated!).filter(Boolean));\n break;\n }\n\n case \"tokenized\": {\n // tokenize-is + lemmatize word tokens\n processed = processText(text, lemmatizer);\n lemmas = new Set(\n processed\n .filter((p) => p.kind === \"word\" && p.lemmas.length > 0)\n .map((p) => p.lemmas[0])\n );\n break;\n }\n\n case \"disambiguated\": {\n // tokenized + bigram disambiguation\n processed = processText(text, lemmatizer, {\n bigrams: resources.bigrams,\n });\n lemmas = extractIndexableLemmas(text, lemmatizer, {\n bigrams: resources.bigrams,\n });\n break;\n }\n\n case \"full\": {\n // disambiguated + compounds\n processed = processText(text, lemmatizer, {\n bigrams: resources.bigrams,\n compoundSplitter: resources.compoundSplitter,\n });\n lemmas = extractIndexableLemmas(text, lemmatizer, {\n bigrams: resources.bigrams,\n compoundSplitter: resources.compoundSplitter,\n });\n break;\n }\n }\n\n const timeMs = performance.now() - start;\n\n // Calculate metrics\n const wordTokens = processed.filter((p) => p.kind === \"word\");\n const wordCount = wordTokens.length;\n\n const lemmatizedCount = wordTokens.filter((p) => {\n // Considered lemmatized if not returned as-is\n return (\n p.lemmas.length > 0 &&\n !(p.lemmas.length === 1 && p.lemmas[0] === p.original.toLowerCase())\n );\n }).length;\n\n const ambiguousCount = wordTokens.filter((p) => p.lemmas.length > 1).length;\n\n const confidences = wordTokens\n .filter((p) => p.confidence !== undefined)\n .map((p) => p.confidence!);\n const avgConfidence =\n confidences.length > 0\n ? confidences.reduce((a, b) => a + b, 0) / confidences.length\n : 0;\n\n const compoundsFound = wordTokens.filter((p) => p.compoundSplit?.isCompound).length;\n const entitiesSkipped = processed.filter((p) => p.isEntity).length;\n\n return {\n wordCount,\n lemmatizedCount,\n coverage: wordCount > 0 ? lemmatizedCount / wordCount : 0,\n ambiguousCount,\n ambiguityRate: wordCount > 0 ? ambiguousCount / wordCount : 0,\n avgConfidence,\n compoundsFound,\n entitiesSkipped,\n uniqueLemmas: lemmas.size,\n timeMs,\n };\n}\n"],"mappings":"uCAUA,MAAa,EAAe,IAAI,IAAI,8rIAuEnC,CAAC,CAKF,SAAgB,EAAW,EAAuB,CAChD,OAAO,EAAa,IAAI,EAAK,aAAa,CAAC,CAa7C,MAAa,EAAiD,IAAI,IAAI,CAEpE,CAAC,IAAK,IAAI,IAAI,CAAC,KAAM,KAAK,CAAC,CAAC,CAE5B,CAAC,MAAO,IAAI,IAAI,CAAC,KAAM,KAAK,CAAC,CAAC,CAE9B,CAAC,KAAM,IAAI,IAAI,CAAC,KAAM,KAAK,CAAC,CAAC,CAE7B,CAAC,MAAO,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CAExB,CAAC,KAAM,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CAEvB,CAAC,MAAO,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CAExB,CAAC,OAAQ,IAAI,IAAI,CAAC,KAAM,KAAK,CAAC,CAAC,CAE/B,CAAC,QAAS,IAAI,IAAI,CAAC,KAAM,KAAK,CAAC,CAAC,CAEhC,CAAC,QAAS,IAAI,IAAI,CAAC,KAAM,KAAK,CAAC,CAAC,CAEhC,CAAC,QAAS,IAAI,IAAI,CAAC,KAAM,KAAK,CAAC,CAAC,CAEhC,CAAC,OAAQ,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CAEzB,CAAC,MAAO,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CAExB,CAAC,KAAM,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CAEvB,CAAC,IAAK,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CACvB,CAAC,CAYF,SAAgB,EAAqB,EAAe,EAAuB,CACzE,IAAM,EAAa,EAAM,aAAa,CAGhC,EAAc,EAAqB,IAAI,EAAW,CAOxD,OANI,GAAe,EAEV,EAAY,IAAI,EAAI,CAItB,EAAa,IAAI,EAAW,CAMrC,SAAgB,EAAkC,EAAiB,CACjE,OAAO,EAAM,OAAQ,GAAM,CAAC,EAAW,EAAE,CAAC,CCnI5C,MAAM,EAAQ,WAGR,EAA2B,CAC/B,KACA,KACA,KACA,KACA,KACA,KACA,KACA,KACA,KACA,KACD,CAIK,EAAgD,CACpD,IAAA,GACA,KACA,KACA,MACA,KACD,CAIK,EAAoD,CACxD,IAAA,GACA,KACA,MACA,KACD,CAIK,EAAoD,CACxD,KACA,KACD,CAUD,IAAa,EAAb,MAAa,CAA2D,CACtE,OACA,WACA,aACA,aACA,YACA,YACA,aACA,QACA,gBACA,gBACA,gBACA,gBACA,YAEA,WACA,UACA,WACA,YACA,QAEA,QAAkB,IAAI,YAAY,QAAQ,CAE1C,YAAoB,EAAqB,CACvC,KAAK,OAAS,EACd,IAAM,EAAO,IAAI,SAAS,EAAO,CAG3B,EAAQ,EAAK,UAAU,EAAG,GAAK,CACrC,GAAI,IAAU,EACZ,MAAU,MACR,2CAA2C,EAAM,SAAS,GAAG,CAAC,UAAU,EAAM,SAAS,GAAG,GAC3F,CAIH,GADA,KAAK,QAAU,EAAK,UAAU,EAAG,GAAK,CAClC,KAAK,UAAY,GAAK,KAAK,UAAY,EACzC,MAAU,MAAM,wBAAwB,KAAK,UAAU,CAGzD,IAAM,EAAiB,EAAK,UAAU,EAAG,GAAK,CAC9C,KAAK,WAAa,EAAK,UAAU,GAAI,GAAK,CAC1C,KAAK,UAAY,EAAK,UAAU,GAAI,GAAK,CACzC,KAAK,WAAa,EAAK,UAAU,GAAI,GAAK,CAC1C,KAAK,YAAc,EAAK,UAAU,GAAI,GAAK,CAI3C,IAAI,EAAS,GAGb,KAAK,WAAa,IAAI,WAAW,EAAQ,EAAQ,EAAe,CAChE,GAAU,EAGV,KAAK,aAAe,IAAI,YAAY,EAAQ,EAAQ,KAAK,WAAW,CACpE,GAAU,KAAK,WAAa,EAG5B,KAAK,aAAe,IAAI,WAAW,EAAQ,EAAQ,KAAK,WAAW,CACnE,GAAU,KAAK,WAEf,EAAU,EAAS,EAAK,GAGxB,KAAK,YAAc,IAAI,YAAY,EAAQ,EAAQ,KAAK,UAAU,CAClE,GAAU,KAAK,UAAY,EAG3B,KAAK,YAAc,IAAI,WAAW,EAAQ,EAAQ,KAAK,UAAU,CACjE,GAAU,KAAK,UAEf,EAAU,EAAS,EAAK,GAGxB,KAAK,aAAe,IAAI,YAAY,EAAQ,EAAQ,KAAK,UAAY,EAAE,CACvE,IAAW,KAAK,UAAY,GAAK,EAGjC,KAAK,QAAU,IAAI,YAAY,EAAQ,EAAQ,KAAK,WAAW,CAC/D,GAAU,KAAK,WAAa,EAG5B,KAAK,gBAAkB,IAAI,YAAY,EAAQ,EAAQ,KAAK,YAAY,CACxE,GAAU,KAAK,YAAc,EAG7B,KAAK,gBAAkB,IAAI,WAAW,EAAQ,EAAQ,KAAK,YAAY,CACvE,GAAU,KAAK,YAEf,EAAU,EAAS,EAAK,GAGxB,KAAK,gBAAkB,IAAI,YAAY,EAAQ,EAAQ,KAAK,YAAY,CACxE,GAAU,KAAK,YAAc,EAG7B,KAAK,gBAAkB,IAAI,WAAW,EAAQ,EAAQ,KAAK,YAAY,CACvE,GAAU,KAAK,YAEf,EAAU,EAAS,EAAK,GAGxB,KAAK,YAAc,IAAI,YAAY,EAAQ,EAAQ,KAAK,YAAY,CAMtE,aAAa,KACX,EACA,EAAmC,EAAE,CACV,CAE3B,IAAM,EAAW,MADD,EAAQ,OAAS,OACF,EAAI,CAEnC,GAAI,CAAC,EAAS,GACZ,MAAU,MAAM,+BAA+B,EAAS,SAAS,CAInE,OAAO,IAAI,EADI,MAAM,EAAS,aAAa,CACR,CAMrC,OAAO,eAAe,EAAuC,CAC3D,OAAO,IAAI,EAAiB,EAAO,CAMrC,UAAkB,EAAgB,EAAwB,CACxD,OAAO,KAAK,QAAQ,OAAO,KAAK,WAAW,SAAS,EAAQ,EAAS,EAAO,CAAC,CAM/E,SAAiB,EAAuB,CACtC,OAAO,KAAK,UAAU,KAAK,aAAa,GAAQ,KAAK,aAAa,GAAO,CAM3E,QAAgB,EAAuB,CACrC,OAAO,KAAK,UAAU,KAAK,YAAY,GAAQ,KAAK,YAAY,GAAO,CAOzE,SAAiB,EAAsB,CACrC,IAAI,EAAO,EACP,EAAQ,KAAK,UAAY,EAE7B,KAAO,GAAQ,GAAO,CACpB,IAAM,EAAO,EAAO,IAAW,EACzB,EAAU,KAAK,QAAQ,EAAI,CAEjC,GAAI,IAAY,EACd,OAAO,EAEL,EAAU,EACZ,EAAO,EAAM,EAEb,EAAQ,EAAM,EAIlB,MAAO,GAQT,UAAU,EAAc,EAAkC,EAAE,CAAY,CACtE,IAAM,EAAa,EAAK,aAAa,CAC/B,EAAM,KAAK,SAAS,EAAW,CAErC,GAAI,IAAQ,GACV,MAAO,CAAC,EAAW,CAGrB,IAAM,EAAQ,KAAK,aAAa,GAC1B,EAAM,KAAK,aAAa,EAAM,GAE9B,CAAE,aAAc,EAChB,EAAO,IAAI,IACX,EAAmB,EAAE,CAE3B,IAAK,IAAI,EAAI,EAAO,EAAI,EAAK,IAAK,CAChC,GAAM,CAAE,WAAU,WAAY,KAAK,YAAY,KAAK,QAAQ,GAAG,CACzD,EAAM,EAAY,GAExB,GAAI,GAAa,IAAQ,EACvB,SAGF,IAAM,EAAQ,KAAK,SAAS,EAAS,CAChC,EAAK,IAAI,EAAM,GAClB,EAAK,IAAI,EAAM,CACf,EAAO,KAAK,EAAM,EAQtB,OAJI,EAAO,SAAW,EACb,CAAC,EAAW,CAGd,EAQT,YAAoB,EAMlB,CAWA,OAVI,KAAK,UAAY,EACZ,CACL,SAAU,IAAU,EACpB,QAAS,EAAQ,GACjB,SAAU,EACV,WAAY,EACZ,WAAY,EACb,CAGI,CACL,SAAU,IAAU,GACpB,QAAS,EAAQ,GACjB,SAAW,IAAU,EAAK,EAC1B,WAAa,IAAU,EAAK,EAC5B,WAAa,IAAU,EAAK,EAC7B,CAOH,iBAAiB,EAA8B,CAC7C,IAAM,EAAa,EAAK,aAAa,CAC/B,EAAM,KAAK,SAAS,EAAW,CAErC,GAAI,IAAQ,GACV,MAAO,EAAE,CAGX,IAAM,EAAQ,KAAK,aAAa,GAC1B,EAAM,KAAK,aAAa,EAAM,GAC9B,EAAO,IAAI,IACX,EAAyB,EAAE,CAEjC,IAAK,IAAI,EAAI,EAAO,EAAI,EAAK,IAAK,CAChC,GAAM,CAAE,WAAU,WAAY,KAAK,YAAY,KAAK,QAAQ,GAAG,CACzD,EAAQ,KAAK,SAAS,EAAS,CAC/B,EAAM,EAAY,IAAa,GAC/B,EAAM,GAAG,EAAM,GAAG,IAEnB,EAAK,IAAI,EAAI,GAChB,EAAK,IAAI,EAAI,CACb,EAAO,KAAK,CAAE,QAAO,MAAK,CAAC,EAI/B,OAAO,EAOT,mBAAmB,EAAgC,CACjD,IAAM,EAAa,EAAK,aAAa,CAC/B,EAAM,KAAK,SAAS,EAAW,CAErC,GAAI,IAAQ,GACV,MAAO,EAAE,CAGX,IAAM,EAAQ,KAAK,aAAa,GAC1B,EAAM,KAAK,aAAa,EAAM,GAC9B,EAA2B,EAAE,CAEnC,IAAK,IAAI,EAAI,EAAO,EAAI,EAAK,IAAK,CAChC,GAAM,CAAE,WAAU,UAAS,WAAU,aAAY,cAC/C,KAAK,YAAY,KAAK,QAAQ,GAAG,CAE7B,EAAuB,EAAE,CACzB,EAAU,EAAa,GACvB,EAAY,EAAe,GAC3B,EAAY,EAAe,GAE7B,IAAS,EAAM,KAAO,GACtB,IAAW,EAAM,OAAS,GAC1B,IAAW,EAAM,OAAS,GAE9B,EAAO,KAAK,CACV,MAAO,KAAK,SAAS,EAAS,CAC9B,IAAK,EAAY,IAAa,GAC9B,MAAO,OAAO,KAAK,EAAM,CAAC,OAAS,EAAI,EAAQ,IAAA,GAChD,CAAC,CAGJ,OAAO,EAMT,kBAA4B,CAC1B,OAAO,KAAK,SAAW,EAMzB,YAAqB,CACnB,OAAO,KAAK,QAMd,WAAmB,EAAe,EAAuB,CACvD,IAAI,EAAO,EACP,EAAQ,KAAK,YAAc,EAE/B,KAAO,GAAQ,GAAO,CACpB,IAAM,EAAO,EAAO,IAAW,EACzB,EAAQ,KAAK,UACjB,KAAK,gBAAgB,GACrB,KAAK,gBAAgB,GACtB,CAED,GAAI,EAAQ,EACV,EAAO,EAAM,UACJ,EAAQ,EACjB,EAAQ,EAAM,MACT,CAEL,IAAM,EAAQ,KAAK,UACjB,KAAK,gBAAgB,GACrB,KAAK,gBAAgB,GACtB,CAED,GAAI,IAAU,EACZ,OAAO,EAEL,EAAQ,EACV,EAAO,EAAM,EAEb,EAAQ,EAAM,GAKpB,MAAO,GAOT,WAAW,EAAe,EAAuB,CAC/C,IAAM,EAAM,KAAK,WAAW,EAAM,aAAa,CAAE,EAAM,aAAa,CAAC,CACrE,OAAO,IAAQ,GAAK,EAAI,KAAK,YAAY,GAO3C,KAAK,EAAe,EAAuB,CACzC,OAAO,KAAK,WAAW,EAAO,EAAM,CAMtC,QAAQ,EAAuB,CAC7B,OAAO,KAAK,SAAS,EAAK,aAAa,CAAC,GAAK,GAM/C,IAAI,iBAA0B,CAC5B,OAAO,KAAK,WAMd,IAAI,eAAwB,CAC1B,OAAO,KAAK,UAMd,IAAI,kBAA2B,CAC7B,OAAO,KAAK,YAMd,IAAI,YAAqB,CACvB,OAAO,KAAK,OAAO,WAOrB,cAAyB,CACvB,IAAM,EAAmB,EAAE,CAC3B,IAAK,IAAI,EAAI,EAAG,EAAI,KAAK,WAAY,IACnC,EAAO,KAAK,KAAK,SAAS,EAAE,CAAC,CAE/B,OAAO,IC5dX,MAAa,EAA6C,CAKxD,CACE,KAAM,IACN,OAAQ,KACR,KAAM,KACN,QAAS,gBACT,YAAa,iDACd,CACD,CACE,KAAM,IACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,uCACd,CAKD,CACE,KAAM,MACN,OAAQ,KACR,KAAM,KACN,QAAS,iBACT,YAAa,uCACd,CACD,CACE,KAAM,MACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,wCACd,CAGD,CACE,KAAM,KACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,yCACd,CAGD,CACE,KAAM,MACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,qCACd,CAGD,CACE,KAAM,KACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,8CACd,CAGD,CACE,KAAM,OACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,wCACd,CAGD,CACE,KAAM,QACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,0CACd,CAGD,CACE,KAAM,QACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,+CACd,CAGD,CACE,KAAM,QACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,0CACd,CAGD,CACE,KAAM,MACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,uCACd,CAGD,CACE,KAAM,MACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,uCACd,CAGD,CACE,KAAM,IACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,mCACd,CAGD,CACE,KAAM,KACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,wCACd,CACF,CAKD,SAAgB,EAAgB,EAAoC,CAClE,IAAM,EAAa,EAAK,aAAa,CACrC,OAAO,EAAqB,OAAQ,GAAM,EAAE,OAAS,EAAW,CAMlE,SAAgB,EAAuB,EAAuB,CAC5D,OAAO,EAAqB,KAAM,GAAM,EAAE,OAAS,EAAK,aAAa,CAAC,CCzJxE,MAAa,EAAuD,IAAI,IAAkC,CAExG,CAAC,IAAK,IAAI,IAAqB,CAAC,KAAM,MAAM,CAAC,CAAC,CAC9C,CAAC,IAAK,IAAI,IAAqB,CAAC,KAAM,MAAM,CAAC,CAAC,CAC9C,CAAC,MAAO,IAAI,IAAqB,CAAC,KAAM,MAAM,CAAC,CAAC,CAChD,CAAC,MAAO,IAAI,IAAqB,CAAC,KAAM,MAAM,CAAC,CAAC,CAChD,CAAC,QAAS,IAAI,IAAqB,CAAC,KAAM,MAAM,CAAC,CAAC,CAClD,CAAC,OAAQ,IAAI,IAAqB,CAAC,KAAM,MAAM,CAAC,CAAC,CACjD,CAAC,QAAS,IAAI,IAAqB,CAAC,KAAM,MAAM,CAAC,CAAC,CAGlD,CAAC,KAAM,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CACxC,CAAC,SAAU,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAC5C,CAAC,UAAW,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAC7C,CAAC,YAAa,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAG/C,CAAC,KAAM,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CACzC,CAAC,MAAO,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CAC1C,CAAC,MAAO,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CAC1C,CAAC,KAAM,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CACzC,CAAC,KAAM,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CACzC,CAAC,OAAQ,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CAC3C,CAAC,SAAU,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CAC7C,CAAC,OAAQ,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CAC3C,CAAC,WAAY,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CAC/C,CAAC,QAAS,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CAC5C,CAAC,QAAS,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAG3C,CAAC,MAAO,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CACzC,CAAC,KAAM,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CACxC,CAAC,QAAS,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAC3C,CAAC,QAAS,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAC3C,CAAC,OAAQ,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAC1C,CAAC,QAAS,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAC3C,CAAC,UAAW,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CAC9C,CAAC,QAAS,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAC3C,CAAC,MAAO,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CACzC,CAAC,SAAU,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAC7C,CAAC,CAOW,EAAsB,IAAI,IAAI,CACzC,KACA,KACA,OACA,MACA,MACA,MACA,MACA,OACA,MACA,MACD,CAAC,CAuBF,SAAgB,EACd,EACA,EACS,CAGT,OAFK,EACS,EAAkB,IAAI,EAAU,EAChC,IAAI,EAAa,EAAI,GAFT,GAe5B,SAAgB,EACd,EACA,EACyB,CAEzB,IAAM,EAAiB,EAAW,OAAQ,GAAM,EAAE,MAAQ,KAAK,CAC/D,GAAI,EAAe,SAAW,EAAG,OAAO,KAGxC,IAAK,IAAM,KAAQ,EACjB,IAAK,IAAM,KAAY,EACrB,GAAI,EAAS,OAAO,MAAQ,EAAc,EAAK,MAAO,EAAS,MAAM,KAAK,CACxE,MAAO,CACL,MAAO,EAAK,MACZ,IAAK,KACL,KAAM,QAAQ,EAAS,MAAM,OAC7B,WAAY,GACb,CAKP,OAAO,KAaT,SAAgB,EACd,EACA,EACyB,CACzB,GAAI,CAAC,EAAU,OAAO,KAEtB,IAAM,EAAY,EAAS,aAAa,CACxC,GAAI,CAAC,EAAoB,IAAI,EAAU,CAAE,OAAO,KAGhD,IAAM,EAAiB,EAAW,OAAQ,GAAM,EAAE,MAAQ,KAAK,CAW/D,OAVI,EAAe,SAAW,GAI1B,CADe,EAAW,KAAM,GAAM,EAAE,MAAQ,KAAK,CACjC,KAMjB,CACL,OAJoB,EAAe,KAAM,GAAM,EAAE,QAAU,OAAO,EAC7B,EAAe,IAG/B,MACrB,IAAK,KACL,KAAM,eACN,WAAY,IACb,CAsBH,SAAgB,EACd,EACA,EACA,EACyB,CACzB,GAAI,CAAC,GAAY,CAAC,GAAY,iBAAkB,OAAO,KAGvD,IAAM,EAAa,EAAW,iBAAiB,EAAS,CAClD,EAAgB,EAAW,KAAM,GAAM,EAAE,MAAQ,KAAK,CAC5D,GAAI,CAAC,EAAe,OAAO,KAG3B,IAAM,EAAoB,EAAW,KAAM,GAAM,EAAE,MAAQ,KAAK,CAG1D,EAAmB,EAAW,KAAM,GAAM,EAAE,MAAQ,KAAK,CAI/D,GAAI,GAAqB,EACvB,OAAO,KAIT,IAAM,EAAgB,EAAkB,IAAI,EAAc,MAAM,CAChE,GAAI,CAAC,EAAe,OAAO,KAG3B,IAAM,EAAiB,EAAW,OAAQ,GAAM,EAAE,MAAQ,KAAK,CAC/D,IAAK,IAAM,KAAQ,EACjB,GAAI,EAAK,OAAO,MAAQ,EAAc,IAAI,EAAK,MAAM,KAAK,CACxD,MAAO,CACL,MAAO,EAAK,MACZ,IAAK,KACL,KAAM,mBAAmB,EAAK,MAAM,OACpC,WAAY,GACb,CAIL,OAAO,KAiBT,SAAgB,EACd,EACA,EACA,EACA,EAA2C,KAClB,CAazB,OAXiB,EAAqB,EAAY,EAAc,EAItC,EAA8B,EAAY,EAAU,EAAW,EAIxE,EAAqB,EAAY,EAAS,EAGpD,KAMT,SAAgB,EAAmB,EAAwB,CACzD,OAAO,EAAkB,IAAI,EAAM,CAMrC,SAAgB,EAAiB,EAAqD,CACpF,OAAO,EAAkB,IAAI,EAAU,CClNzC,MAAM,EAAwC,CAC5C,KAAM,cACN,IAAI,EAAY,CAQd,OAPI,EAAW,SAAW,EACjB,CACL,MAAO,EAAW,GAAG,MACrB,IAAK,EAAW,GAAG,IACnB,WAAY,EACb,CAEI,MAEV,CAKK,EAA4C,CAChD,KAAM,mBACN,IAAI,EAAY,EAAS,EAAe,CACtC,GAAI,CAAC,EAAc,mBAAoB,OAAO,KAE9C,IAAK,IAAM,KAAQ,EAAsB,CACvC,IAAM,EAAQ,EAAU,EAAM,EAAY,EAAQ,CAClD,GAAI,EACF,MAAO,CACL,MAAO,EAAM,MACb,IAAK,EAAM,IACX,WAAY,IACb,CAGL,OAAO,MAEV,CAKD,SAAS,EACP,EACA,EACA,EACqB,CAErB,IAAM,EAAqB,EAAW,KACnC,GAAM,EAAE,MAAM,aAAa,GAAK,EAAK,KAAK,aAAa,EAAI,EAAE,MAAQ,EAAK,OAC5E,CACK,EAAe,EAAW,KAC7B,GAAM,EAAE,MAAM,aAAa,GAAK,EAAK,KAAK,aAAa,EAAI,EAAE,MAAQ,EAAK,KAC5E,CAED,GAAI,CAAC,GAAsB,CAAC,EAC1B,OAAO,KAIT,GAAI,EAAK,UAAY,cAAe,CAElC,IAAM,EAAO,EAAQ,SACrB,GAAI,GAAQ,kBAAkB,KAAK,EAAK,CACtC,OAAO,UAEA,EAAK,UAAY,cAAe,CAGzC,IAAM,EAAO,EAAQ,UAAU,aAAa,CAC5C,GAAI,GAAQ,CAAC,CAAC,QAAS,QAAS,KAAM,KAAM,MAAO,OAAQ,MAAO,MAAM,CAAC,SAAS,EAAK,CACrF,OAAO,UAEA,EAAK,UAAY,gBAAiB,CAE3C,IAAM,EAAO,EAAQ,UAAU,aAAa,CAE5C,GAAI,GADa,CAAC,KAAM,KAAM,OAAQ,MAAO,MAAO,MAAO,MAAO,OAAQ,MAAO,MAAM,CAClE,SAAS,EAAK,CACjC,OAAO,EAIX,OAAO,KAiIT,MAAM,EAAgC,CACpC,EACA,EA3H6C,CAC7C,KAAM,gBACN,IAAI,EAAY,EAAS,EAAe,CACtC,GAAI,CAAC,EAAc,gBAAiB,OAAO,KAG3C,IAAM,EAAwC,EAAW,IAAK,IAAO,CACnE,GAAG,EACH,MAAO,IAAA,GACR,EAAE,CAGG,EAAc,EAAQ,UAAU,EAAQ,OAC9C,GAAI,EAAa,CACf,IAAM,EAAkB,EAAc,SAAS,EAAY,CACvD,IAEF,EAAoB,OAAS,EAC7B,EAAoB,KAAK,GAAG,EAAgB,EAKhD,IAAM,EAAS,EACb,EACA,EAAQ,SACR,EAAQ,eAAiB,EAAE,CAC3B,EAAc,WACf,CAUD,OARI,EACK,CACL,MAAO,EAAO,MACd,IAAK,EAAO,IACZ,WAAY,EAAO,WACpB,CAGI,MAEV,CAKwC,CACvC,KAAM,eACN,IAAI,EAAY,EAAS,EAAe,CAEtC,GADI,CAAC,EAAc,SACf,EAAW,SAAW,EAAG,OAAO,KAEpC,IAAM,EAAuD,EAAE,CAE/D,IAAK,IAAM,KAAa,EAAY,CAClC,IAAI,EAAQ,EAGZ,GAAI,EAAQ,SAAU,CACpB,IAAM,EAAa,EAAQ,YAAc,EAAc,WAAW,UAAU,EAAQ,SAAS,CAC7F,IAAK,IAAM,KAAa,EAAY,CAClC,IAAM,EAAO,EAAc,QAAQ,KAAK,EAAW,EAAU,MAAM,CAC/D,EAAO,IACT,GAAS,KAAK,IAAI,EAAO,EAAE,CAAG,EAAc,aAMlD,GAAI,EAAQ,SAAU,CACpB,IAAM,EAAa,EAAQ,YAAc,EAAc,WAAW,UAAU,EAAQ,SAAS,CAC7F,IAAK,IAAM,KAAa,EAAY,CAClC,IAAM,EAAO,EAAc,QAAQ,KAAK,EAAU,MAAO,EAAU,CAC/D,EAAO,IACT,GAAS,KAAK,IAAI,EAAO,EAAE,CAAG,EAAc,cAKlD,EAAO,KAAK,CAAE,YAAW,QAAO,CAAC,CAOnC,GAHA,EAAO,MAAM,EAAG,IAAM,EAAE,MAAQ,EAAE,MAAM,CAGpC,EAAO,OAAS,GAAK,EAAO,GAAG,MAAQ,EAAG,CAC5C,IAAM,EAAW,EAAO,GAAG,MACrB,EAAa,EAAO,QAAQ,EAAK,IAAM,EAAM,KAAK,IAAI,EAAE,MAAM,CAAE,EAAE,CAClE,EAAa,EAAa,EAAI,KAAK,IAAI,EAAS,CAAG,EAAa,GAEtE,MAAO,CACL,MAAO,EAAO,GAAG,UAAU,MAC3B,IAAK,EAAO,GAAG,UAAU,IACzB,aACD,CAGH,OAAO,MAEV,CAK0C,CACzC,KAAM,WACN,IAAI,EAAY,CAQd,OAPI,EAAW,OAAS,EACf,CACL,MAAO,EAAW,GAAG,MACrB,IAAK,EAAW,GAAG,IACnB,WAAY,EAAI,EAAW,OAC5B,CAEI,MAEV,CAWA,CAKD,IAAa,EAAb,KAA2B,CACzB,WACA,QACA,WACA,YACA,mBACA,gBACA,WAEA,YACE,EACA,EAAiC,KACjC,EAAgC,EAAE,CAClC,CACA,KAAK,WAAa,EAClB,KAAK,QAAU,EACf,KAAK,WAAa,EAAQ,YAAc,EACxC,KAAK,YAAc,EAAQ,aAAe,EAC1C,KAAK,mBAAqB,EAAQ,oBAAsB,GACxD,KAAK,gBAAkB,EAAQ,iBAAmB,GAClD,KAAK,WAAa,KAAK,WAAW,mBAAqB,IAAI,IAAQ,KAGrE,SAAiB,EAA4C,CAC3D,GAAI,CAAC,KAAK,WAAW,oBAAsB,CAAC,KAAK,WAAY,OAC7D,IAAM,EAAM,EAAK,aAAa,CACxB,EAAS,KAAK,WAAW,IAAI,EAAI,CACvC,GAAI,EAAQ,OAAO,EACnB,IAAM,EAAQ,KAAK,WAAW,mBAAmB,EAAK,CAEtD,OADA,KAAK,WAAW,IAAI,EAAK,EAAM,CACxB,EAUT,aACE,EACA,EACA,EACA,EAAkC,EAAE,CAChB,CAEpB,IAAI,EACJ,AAKE,EALE,KAAK,WAAW,iBACE,KAAK,WAAW,iBAAiB,EAAK,CAG3C,KAAK,WAAW,UAAU,EAAK,CACnB,IAAK,IAAO,CAAE,MAAO,EAAG,IAAK,KAAmB,EAAE,CAG/E,IAAM,EAAa,EAAkB,IAAK,GAAM,EAAE,MAAM,CAClD,EAAQ,EAGV,EACA,IACF,EAAgB,KAAK,SAAS,EAAS,EAIzC,IAAM,EAAiC,CACrC,WACA,WACA,WAAY,EAAK,WACjB,WAAY,EAAK,WACjB,gBACA,UAAW,CAAC,EAAK,CACjB,MAAO,EACR,CAGD,IAAK,IAAM,KAAS,EAAQ,CAC1B,IAAM,EAAS,EAAM,IAAI,EAAmB,EAAS,KAAK,CAC1D,GAAI,EACF,MAAO,CACL,QACA,MAAO,EAAO,MACd,IAAK,EAAO,IACZ,aACA,oBACA,UAAW,EAAW,OAAS,EAC/B,WAAY,EAAO,WACnB,WAAY,EAAM,KACnB,CAKL,MAAO,CACL,QACA,MAAO,EAAK,aAAa,CACzB,aACA,oBACA,UAAW,GACX,WAAY,EACZ,WAAY,OACb,CASH,gBAAgB,EAAwC,CACtD,IAAM,EAAgC,EAAE,CAExC,IAAK,IAAI,EAAI,EAAG,EAAI,EAAO,OAAQ,IAAK,CACtC,IAAM,EAAO,EAAO,GACd,EAAW,EAAI,EAAI,EAAO,EAAI,GAAK,KACnC,EAAW,EAAI,EAAO,OAAS,EAAI,EAAO,EAAI,GAAK,KAEzD,EAAQ,KAAK,KAAK,aAAa,EAAM,EAAU,EAAS,CAAC,CAG3D,OAAO,EAST,cAAc,EAA+B,CAC3C,IAAM,EAAS,IAAI,IACb,EAAgB,KAAK,gBAAgB,EAAO,CAElD,IAAK,IAAM,KAAU,EACnB,EAAO,IAAI,EAAO,MAAM,CAG1B,OAAO,IAOX,SAAgB,EACd,EACA,EACA,EACA,EAGI,EAAE,CACO,CACb,GAAM,CAAE,WAAU,mBAAoB,EAGhC,EAAS,EACX,EAAS,EAAK,CACd,EACG,MAAM,MAAM,CACZ,OAAQ,GAAM,EAAE,OAAS,EAAE,CAC3B,IAAK,GAAM,EAAE,QAAQ,oCAAqC,GAAG,CAAC,CAC9D,OAAQ,GAAM,EAAE,OAAS,EAAE,CAI5B,EADgB,IAAI,EAAc,EAAY,EAAQ,CAC/B,cAAc,EAAO,CAGlD,GAAI,MACG,IAAM,KAAS,EACd,EAAa,IAAI,EAAM,EACzB,EAAO,OAAO,EAAM,CAK1B,OAAO,ECrdT,MAAa,EAA8C,CACzD,GAAI,OACJ,GAAI,OACJ,GAAI,YACJ,GAAI,SACJ,GAAI,cACJ,GAAI,UACJ,GAAI,cACJ,GAAI,UACJ,GAAI,UACJ,GAAI,eACL,CAKY,EAAiD,CAC5D,GAAI,UACJ,GAAI,UACJ,GAAI,cACJ,GAAI,YACJ,GAAI,aACJ,GAAI,UACJ,GAAI,aACJ,GAAI,UACJ,GAAI,UACJ,GAAI,YACL,CAoBY,EAA8C,CACzD,GAAI,aACJ,GAAI,aACJ,IAAK,SACL,GAAI,WACL,CAKY,EAAkD,CAC7D,GAAI,YACJ,IAAK,WACL,GAAI,SACL,CAKY,EAAkD,CAC7D,GAAI,WACJ,GAAI,SACL,CCtFD,IAAa,EAAb,MAAa,CAAY,CACvB,KACA,SACA,UAEA,YAAoB,EAAkB,EAAkB,EAAmB,CACzE,KAAK,KAAO,EACZ,KAAK,SAAW,EAChB,KAAK,UAAY,EAGnB,OAAO,WAAW,EAAkB,EAA8B,EAAE,CAAe,CACjF,IAAM,EAAI,KAAK,IAAI,EAAO,OAAQ,EAAE,CAC9B,EAAI,EAAQ,mBAAqB,IAEjC,EAAI,KAAK,IAAI,EAAG,KAAK,KAAM,CAAC,EAAI,KAAK,IAAI,EAAE,EAAK,KAAK,IAAM,KAAK,KAAK,CAAC,CACtE,EAAI,KAAK,IAAI,EAAG,KAAK,MAAO,EAAI,EAAK,KAAK,IAAI,CAAC,CAC/C,EAAY,EAAQ,iBACtB,KAAK,IAAI,EAAG,EAAQ,iBAAiB,CACrC,EAEE,EAAQ,KAAK,KAAK,EAAI,EAAE,CAExB,EAAS,IAAI,EADN,IAAI,WAAW,EAAM,CACG,EAAG,EAAU,CAElD,IAAK,IAAM,KAAS,EAClB,EAAO,IAAI,EAAM,CAGnB,OAAO,EAGT,IAAI,EAAqB,CACvB,GAAM,CAAC,EAAI,GAAM,KAAK,OAAO,EAAM,CACnC,IAAK,IAAI,EAAI,EAAG,EAAI,KAAK,UAAW,IAAK,CACvC,IAAM,GAAY,EAAK,EAAI,GAAM,KAAK,SACtC,KAAK,OAAO,EAAS,EAIzB,IAAI,EAAwB,CAC1B,GAAM,CAAC,EAAI,GAAM,KAAK,OAAO,EAAM,CACnC,IAAK,IAAI,EAAI,EAAG,EAAI,KAAK,UAAW,IAAK,CACvC,IAAM,GAAY,EAAK,EAAI,GAAM,KAAK,SACtC,GAAI,CAAC,KAAK,OAAO,EAAS,CAAE,MAAO,GAErC,MAAO,GAGT,OAAe,EAAqB,CAClC,IAAM,EAAY,IAAU,EACtB,EAAM,EAAQ,EACpB,KAAK,KAAK,IAAc,GAAK,EAG/B,OAAe,EAAwB,CACrC,IAAM,EAAY,IAAU,EACtB,EAAM,EAAQ,EACpB,OAAQ,KAAK,KAAK,GAAc,GAAK,IAAU,EAGjD,OAAe,EAAiC,CAC9C,IAAM,EAAM,EAAM,aAAa,CAC3B,EAAQ,WACR,EAAQ,WAEZ,IAAK,IAAI,EAAI,EAAG,EAAI,EAAI,OAAQ,IAAK,CACnC,IAAM,EAAO,EAAI,WAAW,EAAE,CAC9B,GAAS,EACT,EAAQ,KAAK,KAAK,EAAO,SAAS,GAAK,EAEvC,GAAS,EACT,EAAQ,KAAK,KAAK,EAAO,WAAW,GAAK,EAO3C,MAJA,IAAS,IAAU,GACnB,EAAQ,KAAK,KAAK,EAAO,WAAW,GAAK,EACzC,GAAS,IAAU,GAEZ,CAAC,IAAU,EAAG,IAAU,GAAK,UAAW,GCnEnD,MAAa,EAAmB,IAAI,IAAI,sxBAkFvC,CAAC,CA2CI,EAAwB,IAAI,IAAI,+NAmCrC,CAAC,CAMI,EAAoB,IAAI,IAAI,CAChC,OACA,OACA,OACA,OACA,OACA,QACA,QACA,OACA,QACA,QACA,OACA,MACA,OACA,OACA,OACA,QACA,QACA,MACD,CAAC,CASI,EAAmB,CAAC,IAAK,IAAK,IAAI,CAExC,IAAa,EAAb,KAA8B,CAC5B,WACA,cACA,kBACA,YACA,KAEA,YACE,EACA,EACA,EAAmC,EAAE,CACrC,CACA,KAAK,WAAa,EAClB,KAAK,YAAc,EACnB,KAAK,cAAgB,EAAQ,eAAiB,EAC9C,KAAK,kBAAoB,EAAQ,mBAAqB,GACtD,KAAK,KAAO,EAAQ,MAAQ,WAM9B,QAAgB,EAAc,EAAiC,CAC7D,MAAO,CACL,OACA,MAAO,EACP,WAAY,EACZ,WAAY,EACZ,WAAY,GACb,CAWH,MAAM,EAA6B,CACjC,IAAM,EAAa,EAAK,aAAa,CAG/B,EAAe,KAAK,WAAW,UAAU,EAAK,CAC9C,EAAe,EAAa,IAAI,aAAa,CAMnD,GALI,GAAgB,EAAiB,IAAI,EAAa,EAKlD,EAAiB,IAAI,EAAW,CAClC,OAAO,KAAK,QAAQ,EAAM,EAAa,CAKzC,IAAM,EACJ,EAAa,OAAS,GAAK,EAAa,GAAG,aAAa,GAAK,EACzD,EAAgB,EAAa,SAAW,EAG9C,GAAI,KAAK,OAAS,eAIhB,OAHI,EAAK,SAAS,IAAI,CACb,KAAK,cAAc,EAAM,EAAa,CAExC,KAAK,QAAQ,EAAM,EAAa,CAYzC,GARI,KAAK,OAAS,YAAc,GAAe,GAEzC,EAAW,OAAS,IAMtB,EAAW,OAAS,KAAK,cAAgB,EAC3C,OAAO,KAAK,QAAQ,EAAM,EAAa,CAIzC,IAAM,EAIA,EAAE,CAER,IACE,IAAI,EAAI,KAAK,cACb,GAAK,EAAW,OAAS,KAAK,cAC9B,IACA,CACA,IAAM,EAAW,EAAW,MAAM,EAAG,EAAE,CACjC,EAAY,EAAW,MAAM,EAAE,CAG/B,EAAe,KAAK,SAAS,EAAU,EAAU,CAMvD,GALI,GACF,EAAW,KAAK,EAAa,CAI3B,KAAK,uBACF,IAAM,KAAU,EAEnB,GAAI,EAAS,SAAS,EAAO,EAAI,EAAS,OAAS,KAAK,cAAe,CACrE,IAAM,EAAc,EAAS,MAAM,EAAG,GAAG,CACnC,EAAS,KAAK,SAAS,EAAa,EAAU,CAChD,GAEF,EAAW,KAAK,CAAE,GAAG,EAAQ,MAAO,EAAO,MAAQ,IAAM,CAAC,GAOpE,GAAI,EAAW,SAAW,EACxB,OAAO,KAAK,QAAQ,EAAM,EAAa,CAIzC,EAAW,MAAM,EAAG,IAAM,EAAE,MAAQ,EAAE,MAAM,CAC5C,IAAM,EAAO,EAAW,GAGxB,GAAI,KAAK,OAAS,YAAc,GAAe,EAAK,MAAQ,GAC1D,OAAO,KAAK,QAAQ,EAAM,EAAa,CAIzC,IAAM,EAAQ,CAAC,GAAG,IAAI,IAAI,CAAC,GAAG,EAAK,UAAW,GAAG,EAAK,WAAW,CAAC,CAAC,CAInE,MAAO,CACL,OACA,QACA,WALiB,CAAC,GAAG,IAAI,IAAI,CAAC,GAAG,EAAO,EAAW,CAAC,CAAC,CAMrD,WAAY,KAAK,IAAI,EAAK,MAAO,EAAE,CACnC,WAAY,GACb,CAMH,cAAsB,EAAc,EAAuC,CACzE,IAAM,EAAQ,EAAK,MAAM,IAAI,CAAC,OAAQ,GAAM,EAAE,OAAS,EAAE,CACzD,GAAI,EAAM,OAAS,EACjB,OAAO,KAAK,QAAQ,EAAM,EAAa,CAGzC,IAAM,EAAqB,EAAE,CAC7B,IAAK,IAAM,KAAQ,EAAO,CACxB,IAAM,EAAS,KAAK,WAAW,UAAU,EAAK,CAC9C,EAAS,KAAK,GAAG,EAAO,CAG1B,IAAM,EAAc,CAAC,GAAG,IAAI,IAAI,EAAS,CAAC,CAG1C,MAAO,CACL,OACA,MAAO,EACP,WALiB,CAAC,GAAG,IAAI,IAAI,CAAC,GAAG,EAAa,EAAK,aAAa,CAAC,CAAC,CAAC,CAMnE,WAAY,GACZ,WAAY,GACb,CAGH,SACE,EACA,EACqE,CAErE,IAAM,EAAa,KAAK,WAAW,UAAU,EAAS,CAChD,EAAc,KAAK,WAAW,UAAU,EAAU,CAGlD,EAAY,CAAC,GAAG,IAAI,IAAI,EAAW,OAAQ,GAAM,KAAK,YAAY,IAAI,EAAE,CAAC,CAAC,CAAC,CAC3E,EAAa,CAAC,GAAG,IAAI,IAAI,EAAY,OAAQ,GAAM,KAAK,YAAY,IAAI,EAAE,CAAC,CAAC,CAAC,CAEnF,GAAI,EAAU,SAAW,GAAK,EAAW,SAAW,EAClD,OAAO,KAIT,IAAI,EAAQ,EAIN,EACJ,EAAI,KAAK,IAAI,EAAS,OAAS,EAAU,OAAO,EAAI,EAAS,OAAS,EAAU,QAClF,GAAS,EAAgB,GAIzB,IAAM,GAAa,EAAS,OAAS,EAAU,QAAU,EACnD,EAAc,KAAK,IAAI,EAAY,EAAG,EAAE,CAC9C,GAAS,EAAc,GAIC,EAAW,KAAM,GAAU,EAAsB,IAAI,EAAM,CAAC,GAElF,GAAS,IAKX,IAAM,EAAe,EAAU,KAAM,GAAU,EAAkB,IAAI,EAAM,CAAC,CACtE,EAAgB,EAAW,KAAM,GAAU,EAAkB,IAAI,EAAM,CAAC,CAgB9E,OAfI,GAAgB,EAElB,GAAS,GACA,CAAC,GAAgB,CAAC,IAE3B,GAAS,KAKP,EAAS,OAAS,GAAK,EAAU,OAAS,KAC5C,GAAS,KAIJ,CACL,UAAW,EACX,WAAY,EACZ,MAAO,KAAK,IAAI,EAAG,EAAM,CAC1B,CAOH,aAAa,EAAwB,CAEnC,OADc,KAAK,MAAM,EAAK,CACjB,aAQjB,SAAgB,EAAoB,EAA+B,CACjE,OAAO,IAAI,IAAI,EAAO,IAAK,GAAM,EAAE,aAAa,CAAC,CAAC,CAapD,SAAgB,EACd,EACA,EAAmC,EAAE,CACnB,CAClB,IAAM,EAAa,EAAO,IAAK,GAAM,EAAE,aAAa,CAAC,CACrD,OAAO,EAAY,WAAW,EAAY,EAAQ,CC7cpD,MAAa,EAA4C,IAAI,IAAI,CAE/D,CAAC,YAAa,CAAE,MAAO,WAAY,WAAY,GAAM,IAAK,KAAM,CAAC,CACjE,CAAC,kBAAmB,CAAE,MAAO,gBAAiB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC5E,CAAC,SAAU,CAAE,MAAO,SAAU,WAAY,GAAM,IAAK,KAAM,CAAC,CAC5D,CAAC,iBAAkB,CAAE,MAAO,iBAAkB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC5E,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CAClE,CAAC,eAAgB,CAAE,MAAO,cAAe,WAAY,GAAM,IAAK,KAAM,CAAC,CACvE,CAAC,YAAa,CAAE,MAAO,cAAe,WAAY,GAAM,IAAK,KAAM,CAAC,CACpE,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CAClE,CAAC,cAAe,CAAE,MAAO,eAAgB,WAAY,GAAM,IAAK,KAAM,CAAC,CACvE,CAAC,cAAe,CAAE,MAAO,gBAAiB,WAAY,GAAM,IAAK,KAAM,CAAC,CACxE,CAAC,eAAgB,CAAE,MAAO,cAAe,WAAY,GAAM,IAAK,KAAM,CAAC,CACvE,CAAC,WAAY,CAAE,MAAO,UAAW,WAAY,GAAM,IAAK,KAAM,CAAC,CAC/D,CAAC,UAAW,CAAE,MAAO,UAAW,WAAY,GAAM,IAAK,KAAM,CAAC,CAC9D,CAAC,UAAW,CAAE,MAAO,SAAU,WAAY,GAAM,IAAK,KAAM,CAAC,CAC7D,CAAC,gBAAiB,CAAE,MAAO,iBAAkB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC3E,CAAC,mBAAoB,CAAE,MAAO,mBAAoB,WAAY,GAAM,IAAK,KAAM,CAAC,CAChF,CAAC,gBAAiB,CAAE,MAAO,iBAAkB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC3E,CAAC,kBAAmB,CAAE,MAAO,kBAAmB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC9E,CAAC,cAAe,CAAE,MAAO,eAAgB,WAAY,GAAM,IAAK,KAAM,CAAC,CACvE,CAAC,kBAAmB,CAAE,MAAO,iBAAkB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC7E,CAAC,kBAAmB,CAAE,MAAO,gBAAiB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC5E,CAAC,YAAa,CAAE,MAAO,aAAc,WAAY,GAAM,IAAK,KAAM,CAAC,CACnE,CAAC,gBAAiB,CAAE,MAAO,kBAAmB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC5E,CAAC,kBAAmB,CAAE,MAAO,mBAAoB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC/E,CAAC,aAAc,CAAE,MAAO,aAAc,WAAY,GAAM,IAAK,KAAM,CAAC,CACpE,CAAC,aAAc,CAAE,MAAO,aAAc,WAAY,GAAM,IAAK,KAAM,CAAC,CACpE,CAAC,aAAc,CAAE,MAAO,aAAc,WAAY,GAAM,IAAK,KAAM,CAAC,CACpE,CAAC,cAAe,CAAE,MAAO,aAAc,WAAY,GAAM,IAAK,KAAM,CAAC,CACrE,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CAClE,CAAC,eAAgB,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CACrE,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CAClE,CAAC,UAAW,CAAE,MAAO,UAAW,WAAY,GAAM,IAAK,KAAM,CAAC,CAC9D,CAAC,QAAS,CAAE,MAAO,QAAS,WAAY,GAAM,IAAK,KAAM,CAAC,CAC1D,CAAC,SAAU,CAAE,MAAO,SAAU,WAAY,GAAM,IAAK,KAAM,CAAC,CAC5D,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CAClE,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CAClE,CAAC,iBAAkB,CAAE,MAAO,kBAAmB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC7E,CAAC,iBAAkB,CAAE,MAAO,mBAAoB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC9E,CAAC,eAAgB,CAAE,MAAO,cAAe,WAAY,GAAM,IAAK,KAAM,CAAC,CACvE,CAAC,iBAAkB,CAAE,MAAO,eAAgB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC1E,CAAC,mBAAoB,CAAE,MAAO,mBAAoB,WAAY,GAAM,IAAK,KAAM,CAAC,CAChF,CAAC,gBAAiB,CAAE,MAAO,kBAAmB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC5E,CAAC,gBAAiB,CAAE,MAAO,eAAgB,WAAY,GAAM,IAAK,KAAM,CAAC,CACzE,CAAC,aAAc,CAAE,MAAO,cAAe,WAAY,GAAM,IAAK,KAAM,CAAC,CACrE,CAAC,UAAW,CAAE,MAAO,UAAW,WAAY,GAAM,IAAK,KAAM,CAAC,CAC9D,CAAC,aAAc,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CACnE,CAAC,aAAc,CAAE,MAAO,aAAc,WAAY,GAAM,IAAK,KAAM,CAAC,CACpE,CAAC,WAAY,CAAE,MAAO,WAAY,WAAY,GAAM,IAAK,KAAM,CAAC,CAChE,CAAC,iBAAkB,CAAE,MAAO,iBAAkB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC5E,CAAC,OAAQ,CAAE,MAAO,OAAQ,WAAY,GAAM,IAAK,KAAM,CAAC,CACxD,CAAC,UAAW,CAAE,MAAO,UAAW,WAAY,GAAM,IAAK,KAAM,CAAC,CAC9D,CAAC,cAAe,CAAE,MAAO,eAAgB,WAAY,GAAM,IAAK,KAAM,CAAC,CACvE,CAAC,gBAAiB,CAAE,MAAO,eAAgB,WAAY,GAAM,IAAK,KAAM,CAAC,CACzE,CAAC,kBAAmB,CAAE,MAAO,eAAgB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC3E,CAAC,cAAe,CAAE,MAAO,cAAe,WAAY,GAAM,IAAK,KAAM,CAAC,CACtE,CAAC,kBAAmB,CAAE,MAAO,oBAAqB,WAAY,GAAM,IAAK,KAAM,CAAC,CAChF,CAAC,aAAc,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CACnE,CAAC,YAAa,CAAE,MAAO,WAAY,WAAY,GAAM,IAAK,KAAM,CAAC,CACjE,CAAC,cAAe,CAAE,MAAO,cAAe,WAAY,GAAM,IAAK,KAAM,CAAC,CAGtE,CAAC,YAAa,CAAE,MAAO,UAAW,WAAY,GAAM,IAAK,KAAM,CAAC,CAChE,CAAC,eAAgB,CAAE,MAAO,cAAe,WAAY,GAAM,IAAK,KAAM,CAAC,CACvE,CAAC,YAAa,CAAE,MAAO,SAAU,WAAY,GAAM,IAAK,KAAM,CAAC,CAC/D,CAAC,aAAc,CAAE,MAAO,YAAa,WAAY,GAAO,IAAK,KAAM,CAAC,CACpE,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAO,IAAK,KAAM,CAAC,CACnE,CAAC,gBAAiB,CAAE,MAAO,iBAAkB,WAAY,GAAO,IAAK,KAAM,CAAC,CAC5E,CAAC,UAAW,CAAE,MAAO,UAAW,WAAY,GAAO,IAAK,KAAM,CAAC,CAC/D,CAAC,WAAY,CAAE,MAAO,YAAa,WAAY,GAAO,IAAK,KAAM,CAAC,CAClE,CAAC,WAAY,CAAE,MAAO,YAAa,WAAY,GAAO,IAAK,KAAM,CAAC,CAGlE,CAAC,aAAc,CAAE,MAAO,aAAc,WAAY,GAAO,IAAK,KAAM,CAAC,CACrE,CAAC,kBAAmB,CAAE,MAAO,iBAAkB,WAAY,GAAO,IAAK,KAAM,CAAC,CAC9E,CAAC,UAAW,CAAE,MAAO,SAAU,WAAY,GAAO,IAAK,KAAM,CAAC,CAC9D,CAAC,SAAU,CAAE,MAAO,WAAY,WAAY,GAAO,IAAK,KAAM,CAAC,CAC/D,CAAC,cAAe,CAAE,MAAO,cAAe,WAAY,GAAO,IAAK,KAAM,CAAC,CACvE,CAAC,eAAgB,CAAE,MAAO,eAAgB,WAAY,GAAO,IAAK,KAAM,CAAC,CACzE,CAAC,eAAgB,CAAE,MAAO,eAAgB,WAAY,GAAO,IAAK,KAAM,CAAC,CACzE,CAAC,eAAgB,CAAE,MAAO,eAAgB,WAAY,GAAO,IAAK,KAAM,CAAC,CACzE,CAAC,cAAe,CAAE,MAAO,cAAe,WAAY,GAAO,IAAK,KAAM,CAAC,CACvE,CAAC,cAAe,CAAE,MAAO,cAAe,WAAY,GAAO,IAAK,KAAM,CAAC,CACvE,CAAC,eAAgB,CAAE,MAAO,eAAgB,WAAY,GAAO,IAAK,KAAM,CAAC,CACzE,CAAC,aAAc,CAAE,MAAO,aAAc,WAAY,GAAO,IAAK,KAAM,CAAC,CACrE,CAAC,eAAgB,CAAE,MAAO,eAAgB,WAAY,GAAO,IAAK,KAAM,CAAC,CACzE,CAAC,aAAc,CAAE,MAAO,aAAc,WAAY,GAAO,IAAK,KAAM,CAAC,CACrE,CAAC,eAAgB,CAAE,MAAO,eAAgB,WAAY,GAAO,IAAK,KAAM,CAAC,CACzE,CAAC,WAAY,CAAE,MAAO,WAAY,WAAY,GAAO,IAAK,KAAM,CAAC,CACjE,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAO,IAAK,KAAM,CAAC,CACnE,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAO,IAAK,KAAM,CAAC,CACnE,CAAC,SAAU,CAAE,MAAO,SAAU,WAAY,GAAO,IAAK,KAAM,CAAC,CAC7D,CAAC,UAAW,CAAE,MAAO,UAAW,WAAY,GAAO,IAAK,KAAM,CAAC,CAG/D,CAAC,SAAU,CAAE,MAAO,SAAU,WAAY,GAAM,IAAK,KAAM,CAAC,CAG5D,CAAC,sBAAuB,CAAE,MAAO,sBAAuB,WAAY,GAAO,IAAK,SAAU,CAAC,CAC3F,CAAC,kBAAmB,CAAE,MAAO,kBAAmB,WAAY,GAAO,IAAK,SAAU,CAAC,CACnF,CAAC,OAAQ,CAAE,MAAO,OAAQ,WAAY,GAAO,IAAK,SAAU,CAAC,CAC7D,CAAC,OAAQ,CAAE,MAAO,OAAQ,WAAY,GAAO,IAAK,SAAU,CAAC,CAC9D,CAAC,CAMF,SAAgB,EACd,EACA,EACoD,CAEpD,IAAK,IAAI,EAAM,KAAK,IAAI,EAAG,EAAM,OAAS,EAAW,CAAE,GAAO,EAAG,IAAO,CAEtE,IAAM,EADc,EAAM,MAAM,EAAY,EAAa,EAAI,CAC/B,KAAK,IAAI,CAAC,aAAa,CAC/C,EAAS,EAAe,IAAI,EAAU,CAC5C,GAAI,EACF,MAAO,CAAE,SAAQ,UAAW,EAAK,CAGrC,OAAO,KAMT,SAAgB,EAAc,EAAuB,CACnD,OAAO,EAAe,IAAI,EAAK,aAAa,CAAC,CAM/C,SAAgB,EAAc,EAAwC,CACpE,OAAO,EAAe,IAAI,EAAK,aAAa,CAAC,CCnJ/C,SAAgB,EAAe,EAAwB,CACrD,OAAQ,EAAM,KAAd,CACE,IAAK,QAEH,MAAO,CAAC,EAAM,GAAK,IAAI,EAAM,KAAK,EAAM,SAAW,EAAM,OAAO,CAElE,IAAK,QAEH,MAAO,CAAC,EAAM,KAAK,aAAa,CAAC,CAEnC,IAAK,MAEH,MAAO,CAAC,EAAM,KAAK,CAErB,IAAK,SAEH,MAAO,CAAC,EAAM,KAAK,aAAa,CAAC,CAEnC,IAAK,OACL,IAAK,UACL,IAAK,UAAW,CAEd,IAAM,EAAI,EAAM,MAAQ,EAClB,EAAI,OAAO,EAAM,MAAM,CAAC,SAAS,EAAG,IAAI,CACxC,EAAI,OAAO,EAAM,IAAI,CAAC,SAAS,EAAG,IAAI,CAC5C,OAAO,EAAI,EAAI,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAG,CAAC,GAAG,EAAE,GAAG,IAAI,CAGnD,IAAK,OAAQ,CAEX,IAAM,EAAI,OAAO,EAAM,KAAK,CAAC,SAAS,EAAG,IAAI,CACvC,EAAI,OAAO,EAAM,OAAO,CAAC,SAAS,EAAG,IAAI,CAI/C,OAHI,EAAM,OAAS,EACV,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,OAAO,EAAM,OAAO,CAAC,SAAS,EAAG,IAAI,GAAG,CAExD,CAAC,GAAG,EAAE,GAAG,IAAI,CAGtB,IAAK,YACL,IAAK,eACL,IAAK,eAIH,MAAO,CAAC,GAFK,GAAG,EAAM,KAAK,GAAG,OAAO,EAAM,MAAM,CAAC,SAAS,EAAG,IAAI,CAAC,GAAG,OAAO,EAAM,IAAI,CAAC,SAAS,EAAG,IAAI,GAExF,GADH,GAAG,OAAO,EAAM,KAAK,CAAC,SAAS,EAAG,IAAI,CAAC,GAAG,OAAO,EAAM,OAAO,CAAC,SAAS,EAAG,IAAI,CAAC,GAAG,OAAO,EAAM,OAAO,CAAC,SAAS,EAAG,IAAI,KAC3G,CAG5B,IAAK,MAEH,MAAO,CAAC,GAAG,EAAM,MAAM,MAAM,EAAG,EAAE,CAAC,GAAG,EAAM,MAAM,MAAM,EAAE,GAAG,CAE/D,IAAK,SAEH,MAAO,CAAC,GAAG,EAAM,MAAM,GAAG,EAAM,WAAW,CAE7C,IAAK,cAEH,MAAO,CAAC,GAAG,EAAM,MAAM,GAAG,EAAM,OAAO,CAEzC,IAAK,UAEH,MAAO,CAAC,GAAG,EAAM,MAAM,GAAG,CAE5B,IAAK,UAEH,MAAO,CAAC,IAAI,EAAM,KAAK,MAAM,EAAE,CAAC,aAAa,GAAG,CAElD,IAAK,WAEH,MAAO,CAAC,IAAI,EAAM,SAAS,aAAa,GAAG,CAE7C,IAAK,OAEH,MAAO,CAAC,OAAO,EAAM,MAAM,CAAC,CAE9B,IAAK,SACL,IAAK,UAEH,MAAO,CAAC,OAAO,EAAM,MAAM,CAAC,CAE9B,QACE,MAAO,EAAE,EC/Ef,MAAM,EAAqB,IAAI,IAAI,CAAC,OAAO,CAAC,CAKtC,EAAe,IAAI,IAAI,CAAC,SAAU,UAAW,SAAS,CAAC,CAKvD,EAAa,IAAI,IAAI,CACzB,cACA,UACA,QACA,UACA,UACD,CAAC,CAMI,EAAmB,CAEvB,UACA,OACA,OACA,OACA,OAEA,MACA,MACA,KACA,KAEA,KACA,KACA,KACA,KACA,IACA,IACA,IACD,CA6ED,SAAgB,EACd,EACA,EACA,EAA0B,EAAE,CACV,CAClB,GAAM,CACJ,UACA,mBACA,iBAAiB,GACjB,qBAAqB,GACrB,uBAAuB,IACrB,EAGE,EAAS,EAAS,EAAK,CAGvB,EAA4B,EAAE,CAC9B,EAAgD,EAAE,CAClD,EAAa,IAAI,IAEjB,GAAkB,EAAa,IACnC,EAAO,SAAW,GAAK,EAAO,KAAO,EAAI,aAAa,CAOlD,EAAqB,GAAiC,CAC1D,IAAI,EAAU,EACR,EAAuB,EAAE,CAE/B,IAAK,IAAI,EAAU,EAAG,EAAU,EAAmB,IAAW,CAC5D,IAAM,EAAQ,EAAQ,aAAa,CAC/B,EAA8B,KAElC,IAAK,IAAM,KAAU,EAAkB,CACrC,GAAI,CAAC,EAAM,SAAS,EAAO,CAAE,SAE7B,IAAM,EAAW,EAAQ,MAAM,EAAG,EAAQ,OAAS,EAAO,OAAO,CACjE,GAAI,EAAS,OAAS,EAAqB,SAE3C,IAAM,EAAiB,EAAW,UAAU,EAAS,CAGrD,GAAI,CAAC,EAAe,EAAU,EAAe,CAE3C,MAAO,CAAC,GAAG,IAAI,IAAI,CAAC,GAAG,EAAY,GAAG,EAAe,CAAC,CAAC,CAIpD,IACH,EAAe,EAEf,EAAW,KAAK,EAAS,aAAa,CAAC,EAK3C,GAAI,CAAC,GAAgB,EAAa,OAAS,EACzC,MAGF,EAAU,EAIZ,OAAO,EAAW,OAAS,EAAI,CAAC,GAAG,IAAI,IAAI,EAAW,CAAC,CAAG,MAGtD,EAAa,GAA0B,CAC3C,IAAM,EAAM,EAAI,aAAa,CACvB,EAAS,EAAW,IAAI,EAAI,CAClC,GAAI,EAAQ,OAAO,EAEnB,IAAM,EAAS,EAAW,UAAU,EAAI,CAKxC,GACE,GACA,EAAe,EAAK,EAAO,EAC3B,CAAC,EAAW,QAAQ,EAAI,EACxB,EAAI,QAAU,EACd,CACA,IAAM,EAAiB,EAAkB,EAAI,CAC7C,GAAI,EAAgB,CAElB,IAAM,EAAW,CAAC,GAAG,IAAI,IAAI,CAAC,GAAG,EAAQ,GAAG,EAAe,CAAC,CAAC,CAE7D,OADA,EAAW,IAAI,EAAK,EAAS,CACtB,GAKX,OADA,EAAW,IAAI,EAAK,EAAO,CACpB,GAGT,IAAK,IAAI,EAAI,EAAG,EAAI,EAAO,OAAQ,IAAK,CACtC,IAAM,EAAQ,EAAO,GAGrB,GAAI,EAAW,IAAI,EAAM,KAAK,CAC5B,SAIF,GAAI,EAAa,IAAI,EAAM,KAAK,CAAE,CAChC,EAAQ,KAAK,CACX,SAAU,EAAM,MAAQ,GACxB,KAAM,EAAM,KACZ,OAAQ,EAAE,CACV,SAAU,GACX,CAAC,CACF,SAIF,GAAI,EAAmB,IAAI,EAAM,KAAK,CAAE,CACtC,IAAM,EAAY,EAAM,MAAQ,GAC1B,EAAS,EAAU,EAAU,CAE7B,EAA4B,CAChC,SAAU,EACV,KAAM,EAAM,KACZ,SACA,SAAU,GACX,CAKK,EAAgB,EAAO,SAAW,GAAK,EAAO,KAAO,EAAU,aAAa,CAGlF,GAAI,GAAiB,EAAU,SAAS,IAAI,CAAE,CAC5C,IAAM,EAAc,EAAU,MAAM,IAAI,CAClC,EAAuB,EAAE,CAC/B,IAAK,IAAM,KAAQ,EACb,EAAK,OAAS,GAChB,EAAW,KAAK,GAAG,EAAU,EAAK,CAAC,CAGnC,EAAW,OAAS,IACtB,EAAU,OAAS,CAAC,GAAG,IAAI,IAAI,CAAC,GAAG,EAAQ,GAAG,EAAW,CAAC,CAAC,EAI/D,GAAI,IAAqB,GAAsB,GAAgB,CAC7D,IAAM,EAAQ,EAAiB,MAAM,EAAU,CAC/C,GAAI,EAAM,WAAY,CACpB,EAAU,cAAgB,EAE1B,IAAM,EAAa,EAAM,MAAM,QAAS,GAAM,EAAU,EAAE,CAAC,CAC3D,EAAU,eAAiB,EAC3B,EAAU,OAAS,CAAC,GAAG,IAAI,IAAI,CAAC,GAAG,EAAQ,GAAG,EAAW,CAAC,CAAC,EAI/D,EAAQ,KAAK,EAAU,CACvB,EAAW,KAAK,CAAE,MAAO,EAAQ,OAAS,EAAG,QAAO,CAAC,CACrD,SAIF,IAAM,EAAa,EAAe,EAAM,CACxC,GAAI,EAAW,OAAS,EAAG,CAEzB,IAAK,EAAM,OAAS,UAAY,EAAM,OAAS,YAAc,CAAC,EAC5D,SAEF,EAAQ,KAAK,CACX,SAAU,EAAM,MAAQ,GACxB,KAAM,EAAM,KACZ,OAAQ,EACR,SAAU,GACX,CAAC,CACF,SAIF,EAAQ,KAAK,CACX,SAAU,EAAM,MAAQ,GACxB,KAAM,EAAM,KACZ,OAAQ,EAAE,CACV,SAAU,GACX,CAAC,CAIJ,GAAI,GAAW,EAAW,OAAS,EAAG,CACpC,IAAM,EAAgB,IAAI,EAAc,EAAY,EAAQ,CAE5D,IAAK,IAAI,EAAI,EAAG,EAAI,EAAW,OAAQ,IAAK,CAC1C,GAAM,CAAE,QAAO,SAAU,EAAW,GAC9B,EAAY,EAAI,EAAI,EAAW,EAAI,GAAG,MAAQ,KAC9C,EAAY,EAAI,EAAW,OAAS,EAAI,EAAW,EAAI,GAAG,MAAQ,KAElE,EAAS,EAAc,aAC3B,EAAM,MAAQ,GACd,GAAW,MAAQ,KACnB,GAAW,MAAQ,KACnB,CACE,WAAY,GAAW,KAAO,EAAU,EAAU,KAAK,CAAG,IAAA,GAC1D,WAAY,GAAW,KAAO,EAAU,EAAU,KAAK,CAAG,IAAA,GAC3D,CACF,CAED,EAAQ,GAAO,cAAgB,EAAO,MACtC,EAAQ,GAAO,WAAa,EAAO,iBAIrC,IAAK,GAAM,CAAE,WAAW,EAAY,CAClC,IAAM,EAAY,EAAQ,GACtB,EAAU,OAAO,OAAS,IAC5B,EAAU,cAAgB,EAAU,OAAO,GAC3C,EAAU,WAAa,EAAU,OAAO,SAAW,EAAI,EAAM,IAKnE,OAAO,EAWT,SAAgB,EACd,EACA,EACA,EAA0B,EAAE,CACf,CACb,GAAM,CACJ,kBAAkB,GAClB,qBAAqB,GACrB,yBAAyB,IACvB,EAEE,EAAY,EAAY,EAAM,EAAY,EAAQ,CAClD,EAAS,IAAI,IAMb,GAAgB,EAAe,IAC9B,EACD,EACK,EAAqB,EAAO,EAAI,CAElC,EAAa,IAAI,EAAM,CAJD,GAO/B,IAAK,IAAM,KAAS,EAEd,MAAM,SAIV,IAAI,MAEG,IAAM,KAAS,EAAM,OACnB,EAAa,EAAM,EACtB,EAAO,IAAI,EAAM,MAKjB,EAAM,gBAGH,EAAa,EAAM,cAAc,EACpC,EAAO,IAAI,EAAM,cAAc,EAMrC,GAAI,EAAM,eAAe,WAAY,CACnC,IAAM,EAAa,EAAM,eACrB,EAAM,eACN,EAAM,cAAc,MAAM,QAAS,GAAM,EAAW,UAAU,EAAE,CAAC,CACrE,IAAK,IAAM,KAAS,EACX,EAAa,EAAM,EACtB,EAAO,IAAI,EAAM,EAM3B,OAAO,EA4CT,SAAgB,GACd,EACA,EACA,EAA8B,EAAE,CACb,CACnB,GAAM,CACJ,kBAAkB,GAClB,qBAAqB,GACrB,yBAAyB,GACzB,cAAc,MACd,aAAa,MACb,aAAa,GACb,kBAAkB,GAClB,oBAAoB,IAClB,EAEE,EAAY,EAAY,EAAM,EAAY,EAAQ,CAClD,EAAqB,EAAE,CAMvB,GAAgB,EAAe,IAC9B,EACD,EACK,EAAqB,EAAO,EAAI,CAElC,EAAa,IAAI,EAAM,CAJD,GAO/B,IAAK,IAAM,KAAS,EAAW,CAE7B,GAAI,EAAM,SAAU,SAEpB,IAAI,EAAuB,EAAE,CAO7B,GANI,EACF,EAAa,EAAM,OACV,EAAM,gBACf,EAAa,CAAC,EAAM,cAAc,EAGhC,EAAiB,CACnB,IAAM,EAAM,EAAM,UAAY,GAC9B,GAAI,EAAI,OAAS,EAAG,CAClB,IAAM,EAAW,EAAoB,EAAI,aAAa,CAAG,EACzD,EAAa,CAAC,GAAG,EAAY,EAAS,EAI1C,IAAM,EAAS,CACb,GAAG,IAAI,IAAI,EAAW,OAAQ,GAAU,GAAS,CAAC,EAAa,EAAM,CAAC,CAAC,CACxE,CAEG,EAAO,OAAS,GAClB,EAAO,KAAK,EAAO,CAevB,MAAO,CAAE,SAAQ,MAXH,EACX,IAAK,GAAU,CACd,IAAM,EAAS,EAAM,KAAK,EAAW,CAIrC,OAHI,GAAc,EAAM,OAAS,EACxB,IAAI,EAAO,GAEb,GACP,CACD,OAAQ,GAAS,EAAK,OAAS,EAAE,CACjC,KAAK,EAAY,CAEI,CAqC1B,SAAgB,GACd,EACA,EACA,EACA,EAGI,EAAE,CACa,CACnB,IAAM,EAAQ,YAAY,KAAK,CAE3B,EACA,EAEJ,OAAQ,EAAR,CACE,IAAK,QAAS,CAEZ,IAAM,EAAS,EAAK,MAAM,MAAM,CAAC,OAAQ,GAAM,EAAE,OAAS,EAAE,CACtD,EAAmC,EAAE,CAE3C,IAAK,IAAM,KAAS,EAAQ,CAC1B,IAAM,EAAU,EAAM,QAAQ,oCAAqC,GAAG,CACtE,GAAI,EAAS,CACX,IAAM,EAAc,EAAW,UAAU,EAAQ,CACjD,EAAe,KAAK,CAClB,SAAU,EACV,KAAM,OACN,OAAQ,EACR,SAAU,GACV,cAAe,EAAY,GAC3B,WAAY,EAAY,SAAW,EAAI,EAAM,GAC9C,CAAC,EAGN,EAAY,EACZ,EAAS,IAAI,IAAI,EAAe,IAAK,GAAM,EAAE,cAAe,CAAC,OAAO,QAAQ,CAAC,CAC7E,MAGF,IAAK,YAEH,EAAY,EAAY,EAAM,EAAW,CACzC,EAAS,IAAI,IACX,EACG,OAAQ,GAAM,EAAE,OAAS,QAAU,EAAE,OAAO,OAAS,EAAE,CACvD,IAAK,GAAM,EAAE,OAAO,GAAG,CAC3B,CACD,MAGF,IAAK,gBAEH,EAAY,EAAY,EAAM,EAAY,CACxC,QAAS,EAAU,QACpB,CAAC,CACF,EAAS,EAAuB,EAAM,EAAY,CAChD,QAAS,EAAU,QACpB,CAAC,CACF,MAGF,IAAK,OAEH,EAAY,EAAY,EAAM,EAAY,CACxC,QAAS,EAAU,QACnB,iBAAkB,EAAU,iBAC7B,CAAC,CACF,EAAS,EAAuB,EAAM,EAAY,CAChD,QAAS,EAAU,QACnB,iBAAkB,EAAU,iBAC7B,CAAC,CACF,MAIJ,IAAM,EAAS,YAAY,KAAK,CAAG,EAG7B,EAAa,EAAU,OAAQ,GAAM,EAAE,OAAS,OAAO,CACvD,EAAY,EAAW,OAEvB,EAAkB,EAAW,OAAQ,GAGvC,EAAE,OAAO,OAAS,GAClB,EAAE,EAAE,OAAO,SAAW,GAAK,EAAE,OAAO,KAAO,EAAE,SAAS,aAAa,EAErE,CAAC,OAEG,EAAiB,EAAW,OAAQ,GAAM,EAAE,OAAO,OAAS,EAAE,CAAC,OAE/D,EAAc,EACjB,OAAQ,GAAM,EAAE,aAAe,IAAA,GAAU,CACzC,IAAK,GAAM,EAAE,WAAY,CACtB,EACJ,EAAY,OAAS,EACjB,EAAY,QAAQ,EAAG,IAAM,EAAI,EAAG,EAAE,CAAG,EAAY,OACrD,EAEA,EAAiB,EAAW,OAAQ,GAAM,EAAE,eAAe,WAAW,CAAC,OACvE,EAAkB,EAAU,OAAQ,GAAM,EAAE,SAAS,CAAC,OAE5D,MAAO,CACL,YACA,kBACA,SAAU,EAAY,EAAI,EAAkB,EAAY,EACxD,iBACA,cAAe,EAAY,EAAI,EAAiB,EAAY,EAC5D,gBACA,iBACA,kBACA,aAAc,EAAO,KACrB,SACD"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "lemma-is",
3
- "version": "0.7.0",
3
+ "version": "0.9.0",
4
4
  "description": "Icelandic word form to lemma lookup for browser and Node.js",
5
5
  "keywords": [
6
6
  "icelandic",