lemma-is 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +44 -2
- package/dist/index.d.mts +62 -2
- package/dist/index.d.mts.map +1 -1
- package/dist/index.mjs +1 -1
- package/dist/index.mjs.map +1 -1
- package/package.json +14 -13
package/README.md
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
Fast Icelandic lemmatization for JavaScript. Built for search indexing.
|
|
4
4
|
|
|
5
5
|
```typescript
|
|
6
|
-
import { BinaryLemmatizer, extractIndexableLemmas } from "lemma-is";
|
|
6
|
+
import { BinaryLemmatizer, extractIndexableLemmas, buildSearchQuery } from "lemma-is";
|
|
7
7
|
|
|
8
8
|
lemmatizer.lemmatize("börnin"); // → ["barn"]
|
|
9
9
|
lemmatizer.lemmatize("keypti"); // → ["kaupa"]
|
|
@@ -12,6 +12,10 @@ lemmatizer.lemmatize("hestinum"); // → ["hestur"]
|
|
|
12
12
|
// Full pipeline for search
|
|
13
13
|
extractIndexableLemmas("Börnin keypti hestinn", lemmatizer);
|
|
14
14
|
// → ["barn", "kaupa", "hestur"]
|
|
15
|
+
|
|
16
|
+
// Query normalization (backend-agnostic)
|
|
17
|
+
buildSearchQuery("bílaleigur", lemmatizer);
|
|
18
|
+
// → { groups: [["bílaleiga"]], query: "bílaleiga" }
|
|
15
19
|
```
|
|
16
20
|
|
|
17
21
|
## The Problem
|
|
@@ -165,7 +169,37 @@ const lemmas = extractIndexableLemmas(text, lemmatizer, {
|
|
|
165
169
|
|
|
166
170
|
A search for "sjóður" or "arður" now finds this document.
|
|
167
171
|
|
|
168
|
-
##
|
|
172
|
+
## Query Normalization (Backend-Agnostic)
|
|
173
|
+
|
|
174
|
+
Use the same lemmatization pipeline for **search queries** as for documents.
|
|
175
|
+
The helper returns grouped terms plus a boolean query string:
|
|
176
|
+
|
|
177
|
+
```typescript
|
|
178
|
+
import { buildSearchQuery } from "lemma-is";
|
|
179
|
+
|
|
180
|
+
const { groups, query } = buildSearchQuery("bílaleigur", lemmatizer, {
|
|
181
|
+
removeStopwords: true,
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
// groups: [["bílaleiga"]]
|
|
185
|
+
// query: "bílaleiga"
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
You can swap operators to match your backend:
|
|
189
|
+
|
|
190
|
+
```typescript
|
|
191
|
+
// SQLite FTS5 prefers AND/OR
|
|
192
|
+
const sqlite = buildSearchQuery("við fórum í bíó", lemmatizer, {
|
|
193
|
+
removeStopwords: true,
|
|
194
|
+
andOperator: " AND ",
|
|
195
|
+
orOperator: " OR ",
|
|
196
|
+
});
|
|
197
|
+
|
|
198
|
+
// Elasticsearch can use `groups` to build a bool query
|
|
199
|
+
// (OR within a group, AND across groups)
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
## PostgreSQL Full-Text Search (Example)
|
|
169
203
|
|
|
170
204
|
PostgreSQL has no built-in Icelandic stemmer. Use lemma-is to pre-process:
|
|
171
205
|
|
|
@@ -183,6 +217,14 @@ Use the `simple` configuration—it lowercases but doesn't stem, since our lemma
|
|
|
183
217
|
|
|
184
218
|
**Important:** Don't use PostgreSQL's `unaccent` extension for Icelandic. Characters like á, ö, þ, ð are distinct letters, not accented variants.
|
|
185
219
|
|
|
220
|
+
For queries:
|
|
221
|
+
|
|
222
|
+
```typescript
|
|
223
|
+
const { query } = buildSearchQuery(userQuery, lemmatizer, { removeStopwords: true });
|
|
224
|
+
const sql = `SELECT * FROM documents WHERE search_vector @@ to_tsquery('simple', $1)`;
|
|
225
|
+
await db.query(sql, [query]);
|
|
226
|
+
```
|
|
227
|
+
|
|
186
228
|
## Limitations
|
|
187
229
|
|
|
188
230
|
This is an early effort with known limitations.
|
package/dist/index.d.mts
CHANGED
|
@@ -293,7 +293,9 @@ declare class Disambiguator {
|
|
|
293
293
|
rightWeight: number;
|
|
294
294
|
usePreferenceRules: boolean;
|
|
295
295
|
useGrammarRules: boolean;
|
|
296
|
+
private morphCache;
|
|
296
297
|
constructor(lemmatizer: LemmatizerLike, bigrams?: BigramProvider | null, options?: DisambiguatorOptions);
|
|
298
|
+
private getMorph;
|
|
297
299
|
/**
|
|
298
300
|
* Disambiguate a single word given context.
|
|
299
301
|
*
|
|
@@ -473,6 +475,15 @@ declare function isKnownPreposition(lemma: string): boolean;
|
|
|
473
475
|
*/
|
|
474
476
|
declare function getGovernedCases(prepLemma: string): Set<GrammaticalCase> | undefined;
|
|
475
477
|
//#endregion
|
|
478
|
+
//#region src/bloom.d.ts
|
|
479
|
+
/**
|
|
480
|
+
* Minimal Bloom filter for compact set membership checks.
|
|
481
|
+
*/
|
|
482
|
+
interface BloomFilterOptions {
|
|
483
|
+
falsePositiveRate?: number;
|
|
484
|
+
maxHashFunctions?: number;
|
|
485
|
+
}
|
|
486
|
+
//#endregion
|
|
476
487
|
//#region src/compounds.d.ts
|
|
477
488
|
/**
|
|
478
489
|
* Protected lemmas that should NEVER be split as compounds.
|
|
@@ -519,7 +530,7 @@ declare class CompoundSplitter {
|
|
|
519
530
|
private tryLinkingLetters;
|
|
520
531
|
private knownLemmas;
|
|
521
532
|
private mode;
|
|
522
|
-
constructor(lemmatizer: LemmatizerLike, knownLemmas:
|
|
533
|
+
constructor(lemmatizer: LemmatizerLike, knownLemmas: KnownLemmaLookup, options?: CompoundSplitterOptions);
|
|
523
534
|
/**
|
|
524
535
|
* Helper to create a no-split result.
|
|
525
536
|
*/
|
|
@@ -549,6 +560,15 @@ declare class CompoundSplitter {
|
|
|
549
560
|
* This is used to check if compound parts are valid words.
|
|
550
561
|
*/
|
|
551
562
|
declare function createKnownLemmaSet(lemmas: string[]): Set<string>;
|
|
563
|
+
interface KnownLemmaLookup {
|
|
564
|
+
has(lemma: string): boolean;
|
|
565
|
+
}
|
|
566
|
+
interface KnownLemmaFilterOptions extends BloomFilterOptions {}
|
|
567
|
+
/**
|
|
568
|
+
* Create a compact lookup for known lemmas using a Bloom filter.
|
|
569
|
+
* False positives are possible (more splits), false negatives are not.
|
|
570
|
+
*/
|
|
571
|
+
declare function createKnownLemmaFilter(lemmas: string[], options?: KnownLemmaFilterOptions): KnownLemmaLookup;
|
|
552
572
|
//#endregion
|
|
553
573
|
//#region src/phrases.d.ts
|
|
554
574
|
/**
|
|
@@ -667,6 +687,46 @@ declare function processText(text: string, lemmatizer: LemmatizerLike, options?:
|
|
|
667
687
|
* @returns Set of unique lemmas suitable for search indexing
|
|
668
688
|
*/
|
|
669
689
|
declare function extractIndexableLemmas(text: string, lemmatizer: LemmatizerLike, options?: ProcessOptions): Set<string>;
|
|
690
|
+
/**
|
|
691
|
+
* Options for building a backend-agnostic boolean search query.
|
|
692
|
+
*/
|
|
693
|
+
interface SearchQueryOptions extends ProcessOptions {
|
|
694
|
+
/** Operator between token groups (AND). Default: " & " */
|
|
695
|
+
andOperator?: string;
|
|
696
|
+
/** Operator between candidate lemmas within a group (OR). Default: " | " */
|
|
697
|
+
orOperator?: string;
|
|
698
|
+
/** Wrap groups with multiple terms in parentheses. Default: true */
|
|
699
|
+
wrapGroups?: boolean;
|
|
700
|
+
/**
|
|
701
|
+
* Include the original token (lowercased) in each group for recall.
|
|
702
|
+
* Useful for unknown words or when you want a fallback.
|
|
703
|
+
* Default: false
|
|
704
|
+
*/
|
|
705
|
+
includeOriginal?: boolean;
|
|
706
|
+
/** Lowercase original tokens when includeOriginal is true. Default: true */
|
|
707
|
+
lowercaseOriginal?: boolean;
|
|
708
|
+
}
|
|
709
|
+
/**
|
|
710
|
+
* Result for a backend-agnostic boolean search query.
|
|
711
|
+
*/
|
|
712
|
+
interface SearchQueryResult {
|
|
713
|
+
/** Lemma groups per token (OR within group, AND between groups) */
|
|
714
|
+
groups: string[][];
|
|
715
|
+
/** Boolean query string using provided operators */
|
|
716
|
+
query: string;
|
|
717
|
+
}
|
|
718
|
+
/**
|
|
719
|
+
* Build a backend-agnostic boolean query string from user input.
|
|
720
|
+
*
|
|
721
|
+
* Use the same lemmatization pipeline as indexing, then:
|
|
722
|
+
* - OR within a token's candidate lemmas
|
|
723
|
+
* - AND across tokens
|
|
724
|
+
*
|
|
725
|
+
* @param text - User search input
|
|
726
|
+
* @param lemmatizer - Lemmatizer instance
|
|
727
|
+
* @param options - Query + processing options
|
|
728
|
+
*/
|
|
729
|
+
declare function buildSearchQuery(text: string, lemmatizer: LemmatizerLike, options?: SearchQueryOptions): SearchQueryResult;
|
|
670
730
|
/**
|
|
671
731
|
* Strategy for benchmark comparisons.
|
|
672
732
|
*/
|
|
@@ -704,5 +764,5 @@ declare function runBenchmark(text: string, lemmatizer: LemmatizerLike, strategy
|
|
|
704
764
|
compoundSplitter?: CompoundSplitter;
|
|
705
765
|
}): ProcessingMetrics;
|
|
706
766
|
//#endregion
|
|
707
|
-
export { type BigramProvider, type BinaryLemmatizeOptions, BinaryLemmatizer, type BinaryLemmatizerOptions, CASE_NAMES, CONTEXTUAL_STOPWORDS, type CompoundSplit, type CompoundSplitMode, CompoundSplitter, type CompoundSplitterOptions, DISAMBIGUATION_RULES, type DisambiguatedToken, type DisambiguationRule, Disambiguator, type DisambiguatorOptions, GENDER_NAMES, type GrammarLemmatizerLike, type GrammarRuleMatch, type GrammaticalCase, type GrammaticalGender, type GrammaticalNumber, type LemmaWithMorph, type LemmaWithPOS, type LemmatizerLike, type MorphFeatures, NOMINATIVE_PRONOUNS, NUMBER_NAMES, PREPOSITION_CASES, PROTECTED_LEMMAS, type ProcessOptions, type ProcessedToken, type ProcessingMetrics, type ProcessingStrategy, STATIC_PHRASES, STOPWORDS_IS, type StaticPhrase, WORD_CLASS_NAMES, WORD_CLASS_NAMES_IS, type WordClass, applyGrammarRules, applyNounAfterPrepositionRule, applyPrepositionRule, applyPronounVerbRule, canGovernCase, createKnownLemmaSet, extractDisambiguatedLemmas, extractIndexableLemmas, getGovernedCases, getPhraseInfo, getRulesForWord, hasDisambiguationRules, isContextualStopword, isKnownPhrase, isKnownPreposition, isStopword, matchPhrase, processText, removeStopwords, runBenchmark };
|
|
767
|
+
export { type BigramProvider, type BinaryLemmatizeOptions, BinaryLemmatizer, type BinaryLemmatizerOptions, CASE_NAMES, CONTEXTUAL_STOPWORDS, type CompoundSplit, type CompoundSplitMode, CompoundSplitter, type CompoundSplitterOptions, DISAMBIGUATION_RULES, type DisambiguatedToken, type DisambiguationRule, Disambiguator, type DisambiguatorOptions, GENDER_NAMES, type GrammarLemmatizerLike, type GrammarRuleMatch, type GrammaticalCase, type GrammaticalGender, type GrammaticalNumber, type KnownLemmaFilterOptions, type KnownLemmaLookup, type LemmaWithMorph, type LemmaWithPOS, type LemmatizerLike, type MorphFeatures, NOMINATIVE_PRONOUNS, NUMBER_NAMES, PREPOSITION_CASES, PROTECTED_LEMMAS, type ProcessOptions, type ProcessedToken, type ProcessingMetrics, type ProcessingStrategy, STATIC_PHRASES, STOPWORDS_IS, type SearchQueryOptions, type SearchQueryResult, type StaticPhrase, WORD_CLASS_NAMES, WORD_CLASS_NAMES_IS, type WordClass, applyGrammarRules, applyNounAfterPrepositionRule, applyPrepositionRule, applyPronounVerbRule, buildSearchQuery, canGovernCase, createKnownLemmaFilter, createKnownLemmaSet, extractDisambiguatedLemmas, extractIndexableLemmas, getGovernedCases, getPhraseInfo, getRulesForWord, hasDisambiguationRules, isContextualStopword, isKnownPhrase, isKnownPreposition, isStopword, matchPhrase, processText, removeStopwords, runBenchmark };
|
|
708
768
|
//# sourceMappingURL=index.d.mts.map
|
package/dist/index.d.mts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.mts","names":[],"sources":["../src/stopwords.ts","../src/types.ts","../src/binary-lemmatizer.ts","../src/disambiguate.ts","../src/disambiguation-rules.ts","../src/mini-grammar.ts","../src/compounds.ts","../src/phrases.ts","../src/pipeline.ts"],"mappings":";;AAUA;;;;;AA4EA;;cA5Ea,YAAA,EAAY,GAAA;;;AA0FzB;iBAdgB,UAAA,CAAW,IAAA;;;;AAuD3B;;;;;AAiBA;;cA1Da,oBAAA,EAAsB,GAAA,SAAY,GAAA;;;;;;;;;;;iBAyC/B,oBAAA,CAAqB,KAAA,UAAe,GAAA;;;;iBAiBpC,eAAA,kBAAA,CAAkC,KAAA,EAAO,CAAA,KAAM,CAAA;;;;AApJ/D;;;;;AA4EA;;;;KC3EY,SAAA;ADyFZ;;;AAAA,cC1Ea,gBAAA,EAAkB,MAAA,CAAO,SAAA;;ADmHtC;;cCnGa,mBAAA,EAAqB,MAAA,CAAO,SAAA;;;ADoHzC;KCpGY,eAAA;;;;KAKA,iBAAA;;;;KAKA,iBAAA;;;;cAKC,UAAA,EAAY,MAAA,CAAO,eAAA;;;;cAUnB,YAAA,EAAc,MAAA,CAAO,iBAAA;AAzDlC;;;AAAA,cAkEa,YAAA,EAAc,MAAA,CAAO,iBAAA;;AAlDlC;;UA0DiB,aAAA;EACf,IAAA,GAAO,eAAA;EACP,MAAA,GAAS,iBAAA;EACT,MAAA,GAAS,iBAAA;AAAA;;;;UAMM,YAAA;EACf,KAAA;EACA,GAAA,EAAK,SAAA;AAAA;;AA3CP;;UAiDiB,cAAA,SAAuB,YAAA;EACtC,KAAA,GAAQ,aAAA;AAAA;AA7CV;;;;AAAA,UAoDiB,cAAA;EACf,SAAA,CAAU,IAAA;EACV,gBAAA,EAAkB,IAAA,WAAe,YAAA;AAAA;;;AAnCnC;;UA0CiB,cAAA;EACf,IAAA,CAAK,KAAA,UAAe,KAAA;AAAA;;;UCjEL,uBAAA;EACf,KAAA,UAAe,KAAA;AAAA;AAAA,UAGA,sBAAA;EACf,SAAA,GAAY,SAAA;AAAA;AAAA,cAGD,gBAAA,YAA4B,cAAA,EAAgB,cAAA;EAAA,QAC/C,MAAA;EAAA,QACA,UAAA;EAAA,QACA,YAAA;EAAA,QACA,YAAA;EAAA,QACA,WAAA;EAAA,QACA,WAAA;EAAA,QACA,YAAA;EAAA,QACA,OAAA;EAAA,QACA,eAAA;EAAA,QACA,eAAA;EAAA,QACA,eAAA;EAAA,QACA,eAAA;EAAA,QACA,WAAA;EAAA,QAEA,UAAA;EAAA,QACA,SAAA;EAAA,QACA,UAAA;EAAA,QACA,WAAA;EAAA,QACA,OAAA;EAAA,QAEA,OAAA;EAAA,QAED,WAAA,CAAA;ED3EsC;AAgB/C;;EAhB+C,OCiKhC,IAAA,CACX,GAAA,UACA,OAAA,GAAS,uBAAA,GACR,OAAA,CAAQ,gBAAA;EDpJqB;;AAgBlC;EAhBkC,OCmKzB,cAAA,CAAe,MAAA,EAAQ,WAAA,GAAc,gBAAA;;;;UAOpC,SAAA;EDrJmB;;;EAAA,QC4JnB,QAAA;EDvJE;;;EAAA,QC8JF,OAAA;ED9JmB;AAK7B;;;EAL6B,QCsKnB,QAAA;EDjKqC;AAU/C;;;;ECiLE,SAAA,CAAU,IAAA,UAAc,OAAA,GAAS,sBAAA;EDxKtB;;;;;EAAA,QCkNH,WAAA;ED1MoB;;;;ECwO5B,gBAAA,CAAiB,IAAA,WAAe,YAAA;EDrON;;;;ECqQ1B,kBAAA,CAAmB,IAAA,WAAe,cAAA;EDtQzB;;;EC4ST,gBAAA,CAAA;ED3S0B;AAM5B;;EC4SE,UAAA,CAAA;ED1Sc;;;EAAA,QCiTN,UAAA;EDjTM;;AAMhB;;ECmVE,UAAA,CAAW,KAAA,UAAe,KAAA;EDnVwB;;;;EC4VlD,IAAA,CAAK,KAAA,UAAe,KAAA;ED3VC;AAOvB;;EC2VE,OAAA,CAAQ,IAAA;EDzVqC;;;EAAA,ICgWzC,eAAA,CAAA;EDhWc;;;EAAA,ICuWd,aAAA,CAAA;EDhWW;;;EAAA,ICuWX,gBAAA,CAAA;EDtWJ;;;EAAA,IC6WI,UAAA,CAAA;ED7W6B;;;;ECqXjC,YAAA,CAAA;AAAA;;;UCxee,oBAAA;EHyHD;EGvHd,UAAA;;EAEA,WAAA;EHqH8D;EGnH9D,kBAAA;EHoI6B;EGlI7B,eAAA;AAAA;AAAA,UAGe,kBAAA;EH+HwC;EG7HvD,KAAA;EH6H6D;EG3H7D,KAAA;EH2H8D;EGzH9D,GAAA,GAAM,SAAA;;EAEN,UAAA;EF5BU;EE8BV,iBAAA,GAAoB,YAAA;;EAEpB,SAAA;EFhCmB;EEkCnB,UAAA;EFRD;EEUC,UAAA;AAAA;;AFLF;;UEWU,mBAAA,SAA4B,cAAA;EACpC,kBAAA,EAAoB,IAAA,WAAe,cAAA;AAAA;AAAA,UAuBpB,yBAAA;EACf,UAAA;EACA,UAAA;AAAA;;AFhBF;;cE6Pa,aAAA;EACX,UAAA,EAAY,mBAAA;EACZ,OAAA,EAAS,cAAA;EACT,UAAA;EACA,WAAA;EACA,kBAAA;EACA,eAAA;
|
|
1
|
+
{"version":3,"file":"index.d.mts","names":[],"sources":["../src/stopwords.ts","../src/types.ts","../src/binary-lemmatizer.ts","../src/disambiguate.ts","../src/disambiguation-rules.ts","../src/mini-grammar.ts","../src/bloom.ts","../src/compounds.ts","../src/phrases.ts","../src/pipeline.ts"],"mappings":";;AAUA;;;;;AA4EA;;cA5Ea,YAAA,EAAY,GAAA;;;AA0FzB;iBAdgB,UAAA,CAAW,IAAA;;;;AAuD3B;;;;;AAiBA;;cA1Da,oBAAA,EAAsB,GAAA,SAAY,GAAA;;;;;;;;;;;iBAyC/B,oBAAA,CAAqB,KAAA,UAAe,GAAA;;;;iBAiBpC,eAAA,kBAAA,CAAkC,KAAA,EAAO,CAAA,KAAM,CAAA;;;;AApJ/D;;;;;AA4EA;;;;KC3EY,SAAA;ADyFZ;;;AAAA,cC1Ea,gBAAA,EAAkB,MAAA,CAAO,SAAA;;ADmHtC;;cCnGa,mBAAA,EAAqB,MAAA,CAAO,SAAA;;;ADoHzC;KCpGY,eAAA;;;;KAKA,iBAAA;;;;KAKA,iBAAA;;;;cAKC,UAAA,EAAY,MAAA,CAAO,eAAA;;;;cAUnB,YAAA,EAAc,MAAA,CAAO,iBAAA;AAzDlC;;;AAAA,cAkEa,YAAA,EAAc,MAAA,CAAO,iBAAA;;AAlDlC;;UA0DiB,aAAA;EACf,IAAA,GAAO,eAAA;EACP,MAAA,GAAS,iBAAA;EACT,MAAA,GAAS,iBAAA;AAAA;;;;UAMM,YAAA;EACf,KAAA;EACA,GAAA,EAAK,SAAA;AAAA;;AA3CP;;UAiDiB,cAAA,SAAuB,YAAA;EACtC,KAAA,GAAQ,aAAA;AAAA;AA7CV;;;;AAAA,UAoDiB,cAAA;EACf,SAAA,CAAU,IAAA;EACV,gBAAA,EAAkB,IAAA,WAAe,YAAA;AAAA;;;AAnCnC;;UA0CiB,cAAA;EACf,IAAA,CAAK,KAAA,UAAe,KAAA;AAAA;;;UCjEL,uBAAA;EACf,KAAA,UAAe,KAAA;AAAA;AAAA,UAGA,sBAAA;EACf,SAAA,GAAY,SAAA;AAAA;AAAA,cAGD,gBAAA,YAA4B,cAAA,EAAgB,cAAA;EAAA,QAC/C,MAAA;EAAA,QACA,UAAA;EAAA,QACA,YAAA;EAAA,QACA,YAAA;EAAA,QACA,WAAA;EAAA,QACA,WAAA;EAAA,QACA,YAAA;EAAA,QACA,OAAA;EAAA,QACA,eAAA;EAAA,QACA,eAAA;EAAA,QACA,eAAA;EAAA,QACA,eAAA;EAAA,QACA,WAAA;EAAA,QAEA,UAAA;EAAA,QACA,SAAA;EAAA,QACA,UAAA;EAAA,QACA,WAAA;EAAA,QACA,OAAA;EAAA,QAEA,OAAA;EAAA,QAED,WAAA,CAAA;ED3EsC;AAgB/C;;EAhB+C,OCiKhC,IAAA,CACX,GAAA,UACA,OAAA,GAAS,uBAAA,GACR,OAAA,CAAQ,gBAAA;EDpJqB;;AAgBlC;EAhBkC,OCmKzB,cAAA,CAAe,MAAA,EAAQ,WAAA,GAAc,gBAAA;;;;UAOpC,SAAA;EDrJmB;;;EAAA,QC4JnB,QAAA;EDvJE;;;EAAA,QC8JF,OAAA;ED9JmB;AAK7B;;;EAL6B,QCsKnB,QAAA;EDjKqC;AAU/C;;;;ECiLE,SAAA,CAAU,IAAA,UAAc,OAAA,GAAS,sBAAA;EDxKtB;;;;;EAAA,QCkNH,WAAA;ED1MoB;;;;ECwO5B,gBAAA,CAAiB,IAAA,WAAe,YAAA;EDrON;;;;ECqQ1B,kBAAA,CAAmB,IAAA,WAAe,cAAA;EDtQzB;;;EC4ST,gBAAA,CAAA;ED3S0B;AAM5B;;EC4SE,UAAA,CAAA;ED1Sc;;;EAAA,QCiTN,UAAA;EDjTM;;AAMhB;;ECmVE,UAAA,CAAW,KAAA,UAAe,KAAA;EDnVwB;;;;EC4VlD,IAAA,CAAK,KAAA,UAAe,KAAA;ED3VC;AAOvB;;EC2VE,OAAA,CAAQ,IAAA;EDzVqC;;;EAAA,ICgWzC,eAAA,CAAA;EDhWc;;;EAAA,ICuWd,aAAA,CAAA;EDhWW;;;EAAA,ICuWX,gBAAA,CAAA;EDtWJ;;;EAAA,IC6WI,UAAA,CAAA;ED7W6B;;;;ECqXjC,YAAA,CAAA;AAAA;;;UCxee,oBAAA;EHyHD;EGvHd,UAAA;;EAEA,WAAA;EHqH8D;EGnH9D,kBAAA;EHoI6B;EGlI7B,eAAA;AAAA;AAAA,UAGe,kBAAA;EH+HwC;EG7HvD,KAAA;EH6H6D;EG3H7D,KAAA;EH2H8D;EGzH9D,GAAA,GAAM,SAAA;;EAEN,UAAA;EF5BU;EE8BV,iBAAA,GAAoB,YAAA;;EAEpB,SAAA;EFhCmB;EEkCnB,UAAA;EFRD;EEUC,UAAA;AAAA;;AFLF;;UEWU,mBAAA,SAA4B,cAAA;EACpC,kBAAA,EAAoB,IAAA,WAAe,cAAA;AAAA;AAAA,UAuBpB,yBAAA;EACf,UAAA;EACA,UAAA;AAAA;;AFhBF;;cE6Pa,aAAA;EACX,UAAA,EAAY,mBAAA;EACZ,OAAA,EAAS,cAAA;EACT,UAAA;EACA,WAAA;EACA,kBAAA;EACA,eAAA;EAAA,QACQ,UAAA;cAGN,UAAA,EAAY,cAAA,EACZ,OAAA,GAAS,cAAA,SACT,OAAA,GAAS,oBAAA;EAAA,QAWH,QAAA;;;;AFhQV;;;;EEiRE,YAAA,CACE,IAAA,UACA,QAAA,iBACA,QAAA,iBACA,IAAA,GAAM,yBAAA,GACL,kBAAA;EF7QQ;;;;;AAQb;EEuUE,eAAA,CAAgB,MAAA,aAAmB,kBAAA;;;;;;;EAoBnC,aAAA,CAAc,MAAA,aAAmB,GAAA;AAAA;;;;iBAenB,0BAAA,CACd,IAAA,UACA,UAAA,EAAY,cAAA,EACZ,OAAA,EAAS,cAAA,EACT,OAAA;EACE,QAAA,IAAY,IAAA;EACZ,eAAA;AAAA,IAED,GAAA;;;;;;;AHlXH;;UIrFiB,kBAAA;EJqFkB;EInFjC,IAAA;EJ4Hc;EI1Hd,MAAA,EAAQ,SAAA;;EAER,IAAA,EAAM,SAAA;EJwHwD;EItH9D,OAAA;EJuI6B;EIrI7B,WAAA;AAAA;;;;;;;;;;cAYW,oBAAA,EAAsB,kBAAA;;;;iBA6InB,eAAA,CAAgB,IAAA,WAAe,kBAAA;AHxJ/C;;;AAAA,iBGgKgB,sBAAA,CAAuB,IAAA;;;AJtFvC;;;AAAA,UK/EiB,qBAAA;EACf,gBAAA,EAAkB,IAAA,WAAe,YAAA;AAAA;;;;;ALwInC;;;;;cK5Ha,iBAAA,EAAmB,GAAA,SAAY,GAAA,CAAI,eAAA;;;;;;cA+CnC,mBAAA,EAAmB,GAAA;;AJtEhC;;UIsFiB,gBAAA;EJtFI;EIwFnB,KAAA;EJzEW;EI2EX,GAAA,EAAK,SAAA;;EAEL,IAAA;EJ7E6C;EI+E7C,UAAA;AAAA;;;;AJ/CF;;;;iBIyDgB,aAAA,CACd,SAAA,UACA,YAAA,EAAc,eAAA;AJtDhB;;;;;AAKA;;;;;AALA,iBIuEgB,oBAAA,CACd,UAAA,EAAY,cAAA,IACZ,aAAA,EAAe,cAAA,KACd,gBAAA;;;;;AJtDH;;;;;AASA;iBI6EgB,oBAAA,CACd,UAAA,EAAY,cAAA,IACZ,QAAA,kBACC,gBAAA;;;;AJxEH;;;;;;;;;;;;;;;;iBIqHgB,6BAAA,CACd,UAAA,EAAY,cAAA,IACZ,QAAA,iBACA,UAAA,EAAY,qBAAA,UACX,gBAAA;;;;;;;;;AJxGH;;;;;;iBI8JgB,iBAAA,CACd,UAAA,EAAY,cAAA,IACZ,QAAA,iBACA,aAAA,EAAe,cAAA,IACf,UAAA,GAAY,qBAAA,UACX,gBAAA;;;AJ3JH;iBI8KgB,kBAAA,CAAmB,KAAA;;;;iBAOnB,gBAAA,CAAiB,SAAA,WAAoB,GAAA,CAAI,eAAA;;;;ALxSzD;;UMNiB,kBAAA;EACf,iBAAA;EACA,gBAAA;AAAA;;;;ANuIF;;;cOxHa,gBAAA,EAAgB,GAAA;AAAA,UAoFZ,aAAA;EPqDD;EOnDd,IAAA;EPmD6B;EOjD7B,KAAA;EPiD8B;EO/C9B,UAAA;EP+CgD;EO7ChD,UAAA;EP6C8D;EO3C9D,UAAA;AAAA;;;ANxGF;;;;;KMkHY,iBAAA;AAAA,UAEK,uBAAA;;;;ANrFjB;EM0FE,aAAA;;EAEA,iBAAA;EN5FgD;AAgBlD;;;EMiFE,IAAA,GAAO,iBAAA;AAAA;AAAA,cA8EI,gBAAA;EAAA,QACH,UAAA;EAAA,QACA,aAAA;EAAA,QACA,iBAAA;EAAA,QACA,WAAA;EAAA,QACA,IAAA;cAGN,UAAA,EAAY,cAAA,EACZ,WAAA,EAAa,gBAAA,EACb,OAAA,GAAS,uBAAA;;;;UAYH,OAAA;ENjKT;;;;AAKD;;;;EM8KE,KAAA,CAAM,IAAA,WAAe,aAAA;ENrKV;;;EAAA,QMkRH,aAAA;EAAA,QAwBA,QAAA;ENlSO;;;;EMsWf,YAAA,CAAa,IAAA;AAAA;;;;;iBAUC,mBAAA,CAAoB,MAAA,aAAmB,GAAA;AAAA,UAItC,gBAAA;EACf,GAAA,CAAI,KAAA;AAAA;AAAA,UAGW,uBAAA,SAAgC,kBAAA;AN/WjD;;;;AAAA,iBMqXgB,sBAAA,CACd,MAAA,YACA,OAAA,GAAS,uBAAA,GACR,gBAAA;;;;AP3dH;;;;;AA4EA;;;;;AAcA;UQvFiB,YAAA;;EAEf,KAAA;ERqFgD;EQnFhD,UAAA;ER4HkC;EQ1HlC,GAAA;AAAA;;AR2IF;;;cQpIa,cAAA,EAAgB,GAAA,SAAY,YAAA;;;;;iBA6GzB,WAAA,CACd,KAAA,YACA,UAAA;EACG,MAAA,EAAQ,YAAA;EAAc,SAAA;AAAA;;AP/H3B;;iBO+IgB,aAAA,CAAc,IAAA;;;APhI9B;iBOuIgB,aAAA,CAAc,IAAA,WAAe,YAAA;;;;;;UCzG5B,cAAA;ETyEf;ESvEA,QAAA;ET0CiC;ESxCjC,IAAA;ETiFc;ES/Ed,MAAA;;EAEA,QAAA;ET6E8D;ES3E9D,aAAA;ET4F6B;ES1F7B,UAAA;ET0F8D;ESxF9D,aAAA,GAAgB,aAAA;ETwFuC;EStFvD,cAAA;AAAA;;;;UAMe,cAAA;;EAEf,OAAA,GAAU,cAAA;ERrES;EQuEnB,gBAAA,GAAmB,gBAAA;ERvEA;EQyEnB,eAAA;ER1DW;;;;;AAgBb;EQiDE,sBAAA;;EAEA,cAAA;ERnDgD;AAgBlD;;;;;EQ0CE,kBAAA;ERrC2B;;;;AAK7B;;EQuCE,kBAAA;AAAA;;ARlCF;;;;;AAUA;;iBQmCgB,WAAA,CACd,IAAA,UACA,UAAA,EAAY,cAAA,EACZ,OAAA,GAAS,cAAA,GACR,cAAA;;;AR9BH;;;;;AAQA;iBQwNgB,sBAAA,CACd,IAAA,UACA,UAAA,EAAY,cAAA,EACZ,OAAA,GAAS,cAAA,GACR,GAAA;;;;UAiEc,kBAAA,SAA2B,cAAA;ER1RhB;EQ4R1B,WAAA;ER9RA;EQgSA,UAAA;ER/RA;EQiSA,UAAA;ERhSA;;;;AAMF;EQgSE,eAAA;;EAEA,iBAAA;AAAA;;;;UAMe,iBAAA;ERhSA;EQkSf,MAAA;;EAEA,KAAA;AAAA;;;;;AR5RF;;;;;;;iBQ0SgB,gBAAA,CACd,IAAA,UACA,UAAA,EAAY,cAAA,EACZ,OAAA,GAAS,kBAAA,GACR,iBAAA;;;;KAwES,kBAAA;;;;UAKK,iBAAA;ERjXV;EQmXL,SAAA;ERnXiC;EQqXjC,eAAA;;EAEA,QAAA;;EAEA,cAAA;EP1bsC;EO4btC,aAAA;EP3bA;EO6bA,aAAA;EP1be;EO4bf,cAAA;;EAEA,eAAA;EP7bqB;EO+brB,YAAA;EP5b4B;EO8b5B,MAAA;AAAA;;;;iBAMc,YAAA,CACd,IAAA,UACA,UAAA,EAAY,cAAA,EACZ,QAAA,EAAU,kBAAA,EACV,SAAA;EACE,OAAA,GAAU,cAAA;EACV,gBAAA,GAAmB,gBAAA;AAAA,IAEpB,iBAAA"}
|
package/dist/index.mjs
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import{tokenize as e}from"tokenize-is";const t=new Set(`á.að.aðra.aðrar.aðrir.af.alla.allan.allar.allir.allnokkra.allnokkrar.allnokkrir.allnokkru.allnokkrum.allnokkuð.allnokkur.allnokkurn.allnokkurra.allnokkurrar.allnokkurri.allnokkurs.allnokkurt.allra.allrar.allri.alls.allt.alltað.allur.án.andspænis.annað.annaðhvort.annan.annar.annarra.annarrar.annarri.annars.árla.ásamt.auk.austan.austanundir.austur.báða.báðar.báðir.báðum.bæði.bak.beggja.eða.eður.ef.eftir.ég.ein.eina.einar.einhver.einhverja.einhverjar.einhverjir.einhverju.einhverjum.einhvern.einhverra.einhverrar.einhverri.einhvers.einir.einn.einna.einnar.einni.eins.einskis.einu.einum.eitt.eitthvað.eitthvert.ekkert.ella.ellegar.en.enda.enga.engan.engar.engin.enginn.engir.engra.engrar.engri.engu.engum.er.fáein.fáeina.fáeinar.fáeinir.fáeinna.fáeinum.fjær.fjarri.flestalla.flestallan.flestallar.flestallir.flestallra.flestallrar.flestallri.flestalls.flestallt.flestallur.flestöll.flestöllu.flestöllum.frá.fram.fyrir.fyrst.gagnstætt.gagnvart.gegn.gegnt.gegnum.hana.handa.handan.hann.hans.heldur.hennar.henni.hið.hin.hina.hinar.hinir.hinn.hinna.hinnar.hinni.hins.hinu.hinum.hitt.hjá.honum.hún.hvað.hvaða.hvenær.hver.hverja.hverjar.hverjir.hverju.hverjum.hvern.hverra.hverrar.hverri.hvers.hvert.hvílík.hvílíka.hvílíkan.hvílíkar.hvílíkir.hvílíkra.hvílíkrar.hvílíkri.hvílíks.hvílíkt.hvílíku.hvílíkum.hvílíkur.hvor.hvora.hvorar.hvorir.hvorki.hvorn.hvorra.hvorrar.hvorri.hvors.hvort.hvoru.hvorug.hvoruga.hvorugan.hvorugar.hvorugir.hvorugra.hvorugrar.hvorugri.hvorugs.hvorugt.hvorugu.hvorugum.hvorugur.hvorum.í.inn.innan.innanundir.jafnframt.jafnhliða.kring.kringum.með.meðal.meðan.meður.mér.mestalla.mestallan.mestallar.mestallir.mestallra.mestallrar.mestallri.mestalls.mestallt.mestallur.mestöll.mestöllu.mestöllum.miðli.mig.milli.millum.mín.mína.mínar.mínir.minn.minna.minnar.minni.míns.mínu.mínum.mitt.mót.móti.nær.nærri.næst.næstum.nálægt.né.neðan.nein.neina.neinar.neinir.neinn.neinna.neinnar.neinni.neins.neinu.neinum.neitt.nema.niður.nokkra.nokkrar.nokkrir.nokkru.nokkrum.nokkuð.nokkur.nokkurn.nokkurra.nokkurrar.nokkurri.nokkurs.nokkurt.norðan.nú.öðru.öðrum.of.ofan.ofar.og.óháð.okkar.okkur.öll.öllu.öllum.önnur.órafjarri.oss.sá.sakir.sama.saman.samar.samfara.samhliða.sami.samir.samkvæmt.samra.samrar.samri.sams.samskipa.samt.samtímis.samur.sem.sér.sérhvað.sérhver.sérhverja.sérhverjar.sérhverjir.sérhverju.sérhverjum.sérhvern.sérhverra.sérhverrar.sérhverri.sérhvers.sérhvert.síðan.síðla.sig.sín.sína.sínar.sínhver.sínhverja.sínhverjar.sínhverjir.sínhverju.sínhverjum.sínhvern.sínhverra.sínhverrar.sínhverri.sínhvers.sínhvert.sínhvor.sínhvora.sínhvorar.sínhvorir.sínhvorn.sínhvorra.sínhvorrar.sínhvorri.sínhvors.sínhvort.sínhvoru.sínhvorum.sínir.sinn.sinna.sinnar.sinnhver.sinnhverja.sinnhverjar.sinnhverjir.sinnhverju.sinnhverjum.sinnhvern.sinnhverra.sinnhverrar.sinnhverri.sinnhvers.sinnhvert.sinnhvor.sinnhvora.sinnhvorar.sinnhvorir.sinnhvorn.sinnhvorra.sinnhvorrar.sinnhvorri.sinnhvors.sinnhvort.sinnhvoru.sinnhvorum.sinni.síns.sínu.sínum.sitt.sitthvað.sitthver.sitthverja.sitthverjar.sitthverjir.sitthverju.sitthverjum.sitthvern.sitthverra.sitthverrar.sitthverri.sitthvers.sitthvert.sitthvor.sitthvora.sitthvorar.sitthvorir.sitthvorn.sitthvorra.sitthvorrar.sitthvorri.sitthvors.sitthvort.sitthvoru.sitthvorum.sjálf.sjálfa.sjálfan.sjálfar.sjálfir.sjálfra.sjálfrar.sjálfri.sjálfs.sjálft.sjálfu.sjálfum.sjálfur.slík.slíka.slíkan.slíkar.slíkir.slíkra.slíkrar.slíkri.slíks.slíkt.slíku.slíkum.slíkur.snemma.sökum.söm.sömu.sömum.sú.sum.suma.suman.sumar.sumir.sumra.sumrar.sumri.sums.sumt.sumu.sumum.sumur.sunnan.svo.til.tráss.um.umfram.umhverfis.undan.undir.uns.upp.úr.út.utan.útundan.vegna.vér.vestan.vestur.vettugi.við.viður.vor.vora.vorar.vorir.vorn.vorra.vorrar.vorri.vors.vort.voru.vorum.yðar.yður.yfir.ykkar.ykkur.ýmis.ýmiss.ýmissa.ýmissar.ýmissi.ýmist.ýmsa.ýmsan.ýmsar.ýmsir.ýmsu.ýmsum.þá.það.þær.þann.þar.þau.þegar.þeim.þeir.þeirra.þeirrar.þeirri.þennan.þér.þess.þessa.þessar.þessara.þessarar.þessari.þessi.þessir.þessu.þessum.þetta.þið.þig.þín.þína.þínar.þínir.þinn.þinna.þinnar.þinni.þíns.þínu.þínum.þitt.þó.þónokkra.þónokkrar.þónokkrir.þónokkru.þónokkrum.þónokkuð.þónokkur.þónokkurn.þónokkurra.þónokkurrar.þónokkurri.þónokkurs.þónokkurt.þótt.þú.því.þvílík.þvílíka.þvílíkan.þvílíkar.þvílíkir.þvílíkra.þvílíkrar.þvílíkri.þvílíks.þvílíkt.þvílíku.þvílíkum.þvílíkur`.split(`.`));function n(e){return t.has(e.toLowerCase())}const r=new Map([[`á`,new Set([`fs`,`ao`])],[`við`,new Set([`fs`,`fn`])],[`af`,new Set([`fs`,`ao`])],[`til`,new Set([`fs`])],[`um`,new Set([`fs`])],[`frá`,new Set([`fs`])],[`yfir`,new Set([`fs`,`ao`])],[`undir`,new Set([`fs`,`ao`])],[`fyrir`,new Set([`fs`,`ao`])],[`eftir`,new Set([`fs`,`ao`])],[`gegn`,new Set([`fs`])],[`hjá`,new Set([`fs`])],[`úr`,new Set([`fs`])],[`í`,new Set([`fs`])]]);function i(e,n){let i=e.toLowerCase(),a=r.get(i);return a&&n?a.has(n):t.has(i)}function a(e){return e.filter(e=>!n(e))}const o=1279610177,s=[`no`,`so`,`lo`,`ao`,`fs`,`fn`,`st`,`to`,`gr`,`uh`],c=[void 0,`nf`,`þf`,`þgf`,`ef`],l=[void 0,`kk`,`kvk`,`hk`],u=[`et`,`ft`];var d=class e{buffer;stringPool;lemmaOffsets;lemmaLengths;wordOffsets;wordLengths;entryOffsets;entries;bigramW1Offsets;bigramW1Lengths;bigramW2Offsets;bigramW2Lengths;bigramFreqs;lemmaCount;wordCount;entryCount;bigramCount;version;decoder=new TextDecoder(`utf-8`);constructor(e){this.buffer=e;let t=new DataView(e),n=t.getUint32(0,!0);if(n!==o)throw Error(`Invalid binary format: expected magic 0x${o.toString(16)}, got 0x${n.toString(16)}`);if(this.version=t.getUint32(4,!0),this.version!==1&&this.version!==2)throw Error(`Unsupported version: ${this.version}`);let r=t.getUint32(8,!0);this.lemmaCount=t.getUint32(12,!0),this.wordCount=t.getUint32(16,!0),this.entryCount=t.getUint32(20,!0),this.bigramCount=t.getUint32(24,!0);let i=32;this.stringPool=new Uint8Array(e,i,r),i+=r,this.lemmaOffsets=new Uint32Array(e,i,this.lemmaCount),i+=this.lemmaCount*4,this.lemmaLengths=new Uint8Array(e,i,this.lemmaCount),i+=this.lemmaCount,i=i+3&-4,this.wordOffsets=new Uint32Array(e,i,this.wordCount),i+=this.wordCount*4,this.wordLengths=new Uint8Array(e,i,this.wordCount),i+=this.wordCount,i=i+3&-4,this.entryOffsets=new Uint32Array(e,i,this.wordCount+1),i+=(this.wordCount+1)*4,this.entries=new Uint32Array(e,i,this.entryCount),i+=this.entryCount*4,this.bigramW1Offsets=new Uint32Array(e,i,this.bigramCount),i+=this.bigramCount*4,this.bigramW1Lengths=new Uint8Array(e,i,this.bigramCount),i+=this.bigramCount,i=i+3&-4,this.bigramW2Offsets=new Uint32Array(e,i,this.bigramCount),i+=this.bigramCount*4,this.bigramW2Lengths=new Uint8Array(e,i,this.bigramCount),i+=this.bigramCount,i=i+3&-4,this.bigramFreqs=new Uint32Array(e,i,this.bigramCount)}static async load(t,n={}){let r=await(n.fetch??fetch)(t);if(!r.ok)throw Error(`Failed to load binary data: ${r.status}`);return new e(await r.arrayBuffer())}static loadFromBuffer(t){return new e(t)}getString(e,t){return this.decoder.decode(this.stringPool.subarray(e,e+t))}getLemma(e){return this.getString(this.lemmaOffsets[e],this.lemmaLengths[e])}getWord(e){return this.getString(this.wordOffsets[e],this.wordLengths[e])}findWord(e){let t=0,n=this.wordCount-1;for(;t<=n;){let r=t+n>>>1,i=this.getWord(r);if(i===e)return r;i<e?t=r+1:n=r-1}return-1}lemmatize(e,t={}){let n=e.toLowerCase(),r=this.findWord(n);if(r===-1)return[n];let i=this.entryOffsets[r],a=this.entryOffsets[r+1],{wordClass:o}=t,c=new Set,l=[];for(let e=i;e<a;e++){let{lemmaIdx:t,posCode:n}=this.unpackEntry(this.entries[e]),r=s[n];if(o&&r!==o)continue;let i=this.getLemma(t);c.has(i)||(c.add(i),l.push(i))}return l.length===0?[n]:l}unpackEntry(e){return this.version===1?{lemmaIdx:e>>>4,posCode:e&15,caseCode:0,genderCode:0,numberCode:0}:{lemmaIdx:e>>>10,posCode:e&15,caseCode:e>>>4&7,genderCode:e>>>7&3,numberCode:e>>>9&1}}lemmatizeWithPOS(e){let t=e.toLowerCase(),n=this.findWord(t);if(n===-1)return[];let r=this.entryOffsets[n],i=this.entryOffsets[n+1],a=new Set,o=[];for(let e=r;e<i;e++){let{lemmaIdx:t,posCode:n}=this.unpackEntry(this.entries[e]),r=this.getLemma(t),i=s[n]??``,c=`${r}:${i}`;a.has(c)||(a.add(c),o.push({lemma:r,pos:i}))}return o}lemmatizeWithMorph(e){let t=e.toLowerCase(),n=this.findWord(t);if(n===-1)return[];let r=this.entryOffsets[n],i=this.entryOffsets[n+1],a=[];for(let e=r;e<i;e++){let{lemmaIdx:t,posCode:n,caseCode:r,genderCode:i,numberCode:o}=this.unpackEntry(this.entries[e]),d={},f=c[r],p=l[i],m=u[o];f&&(d.case=f),p&&(d.gender=p),m&&(d.number=m),a.push({lemma:this.getLemma(t),pos:s[n]??``,morph:Object.keys(d).length>0?d:void 0})}return a}hasMorphFeatures(){return this.version>=2}getVersion(){return this.version}findBigram(e,t){let n=0,r=this.bigramCount-1;for(;n<=r;){let i=n+r>>>1,a=this.getString(this.bigramW1Offsets[i],this.bigramW1Lengths[i]);if(a<e)n=i+1;else if(a>e)r=i-1;else{let e=this.getString(this.bigramW2Offsets[i],this.bigramW2Lengths[i]);if(e===t)return i;e<t?n=i+1:r=i-1}}return-1}bigramFreq(e,t){let n=this.findBigram(e.toLowerCase(),t.toLowerCase());return n===-1?0:this.bigramFreqs[n]}freq(e,t){return this.bigramFreq(e,t)}isKnown(e){return this.findWord(e.toLowerCase())!==-1}get lemmaCountValue(){return this.lemmaCount}get wordFormCount(){return this.wordCount}get bigramCountValue(){return this.bigramCount}get bufferSize(){return this.buffer.byteLength}getAllLemmas(){let e=[];for(let t=0;t<this.lemmaCount;t++)e.push(this.getLemma(t));return e}};const f=[{word:`á`,prefer:`so`,over:`fs`,context:`after_pronoun`,description:`á after pronoun = verb 'eiga' (I own, you own)`},{word:`á`,prefer:`fs`,over:`so`,context:`before_noun`,description:`á before noun = preposition (on, at)`},{word:`við`,prefer:`fn`,over:`fs`,context:`sentence_start`,description:`við at sentence start = pronoun 'we'`},{word:`við`,prefer:`fs`,over:`fn`,context:`before_noun`,description:`við before noun = preposition 'by/at'`},{word:`af`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`af before noun = preposition 'of/from'`},{word:`til`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`til before noun = preposition 'to'`},{word:`um`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`um before noun = preposition 'about/around'`},{word:`yfir`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`yfir before noun = preposition 'over'`},{word:`undir`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`undir before noun = preposition 'under'`},{word:`fyrir`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`fyrir before noun = preposition 'for/before'`},{word:`eftir`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`eftir before noun = preposition 'after'`},{word:`frá`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`frá before noun = preposition 'from'`},{word:`með`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`með before noun = preposition 'with'`},{word:`í`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`í before noun = preposition 'in'`},{word:`úr`,prefer:`fs`,over:`no`,context:`before_noun`,description:`úr before noun = preposition 'out of'`}];function p(e){let t=e.toLowerCase();return f.filter(e=>e.word===t)}function m(e){return f.some(t=>t.word===e.toLowerCase())}const h=new Map([[`á`,new Set([`þf`,`þgf`])],[`í`,new Set([`þf`,`þgf`])],[`við`,new Set([`þf`,`þgf`])],[`með`,new Set([`þf`,`þgf`])],[`undir`,new Set([`þf`,`þgf`])],[`yfir`,new Set([`þf`,`þgf`])],[`fyrir`,new Set([`þf`,`þgf`])],[`um`,new Set([`þf`])],[`gegnum`,new Set([`þf`])],[`kringum`,new Set([`þf`])],[`umhverfis`,new Set([`þf`])],[`af`,new Set([`þgf`])],[`frá`,new Set([`þgf`])],[`hjá`,new Set([`þgf`])],[`úr`,new Set([`þgf`])],[`að`,new Set([`þgf`])],[`móti`,new Set([`þgf`])],[`nálægt`,new Set([`þgf`])],[`gegn`,new Set([`þgf`])],[`gagnvart`,new Set([`þgf`])],[`handa`,new Set([`þgf`])],[`meðal`,new Set([`ef`])],[`til`,new Set([`ef`])],[`án`,new Set([`ef`])],[`vegna`,new Set([`ef`])],[`sakir`,new Set([`ef`])],[`utan`,new Set([`ef`])],[`innan`,new Set([`ef`])],[`meðfram`,new Set([`þgf`])],[`milli`,new Set([`ef`])],[`auk`,new Set([`ef`])],[`í stað`,new Set([`ef`])]]),g=new Set([`ég`,`þú`,`hann`,`hún`,`það`,`við`,`þið`,`þeir`,`þær`,`þau`]);function _(e,t){return t?h.get(e)?.has(t)??!1:!1}function v(e,t){let n=e.filter(e=>e.pos===`fs`);if(n.length===0)return null;for(let e of n)for(let n of t)if(n.morph?.case&&_(e.lemma,n.morph.case))return{lemma:e.lemma,pos:`fs`,rule:`prep+${n.morph.case}`,confidence:.9};return null}function y(e,t){if(!t)return null;let n=t.toLowerCase();if(!g.has(n))return null;let r=e.filter(e=>e.pos===`so`);return r.length===0||!e.some(e=>e.pos!==`so`)?null:{lemma:(r.find(e=>e.lemma===`eiga`)??r[0]).lemma,pos:`so`,rule:`pronoun+verb`,confidence:.85}}function b(e,t,n){if(!t||!n?.lemmatizeWithPOS)return null;let r=n.lemmatizeWithPOS(t),i=r.find(e=>e.pos===`fs`);if(!i)return null;let a=r.some(e=>e.pos===`fn`),o=e.some(e=>e.pos===`so`);if(a&&o)return null;let s=h.get(i.lemma);if(!s)return null;let c=e.filter(e=>e.pos===`no`);for(let e of c)if(e.morph?.case&&s.has(e.morph.case))return{lemma:e.lemma,pos:`no`,rule:`noun_after_prep+${e.morph.case}`,confidence:.9};return null}function x(e,t,n,r=null){return v(e,n)||b(e,t,r)||y(e,t)||null}function S(e){return h.has(e)}function C(e){return h.get(e)}const w={name:`unambiguous`,run(e){return e.length===1?{lemma:e[0].lemma,pos:e[0].pos,confidence:1}:null}},T={name:`preference_rules`,run(e,t,n){if(!n.usePreferenceRules)return null;for(let n of f){let r=E(n,e,t);if(r)return{lemma:r.lemma,pos:r.pos,confidence:.85}}return null}};function E(e,t,n){let r=t.find(t=>t.lemma.toLowerCase()===e.word.toLowerCase()&&t.pos===e.prefer),i=t.find(t=>t.lemma.toLowerCase()===e.word.toLowerCase()&&t.pos===e.over);if(!r||!i)return null;if(e.context===`before_noun`){let e=n.nextWord;if(e&&/^[A-ZÁÉÍÓÚÝÞÆÖ]/.test(e))return r}else if(e.context===`before_verb`){let e=n.nextWord?.toLowerCase();if(e&&![`þessi`,`þetta`,`sá`,`sú`,`það`,`hinn`,`hin`,`hið`].includes(e))return r}else if(e.context===`after_pronoun`){let e=n.prevWord?.toLowerCase();if(e&&[`ég`,`þú`,`hann`,`hún`,`það`,`við`,`þið`,`þeir`,`þær`,`þau`].includes(e))return r}return null}const D=[w,T,{name:`grammar_rules`,run(e,t,n){if(!n.useGrammarRules)return null;let r=e.map(e=>({...e,morph:void 0}));if(n.lemmatizer.lemmatizeWithMorph){let e=t.allTokens[t.index];if(e){let t=n.lemmatizer.lemmatizeWithMorph(e);r.length=0,r.push(...t)}}let i=x(r,t.prevWord,t.nextWordMorph??[],n.lemmatizer);return i?{lemma:i.lemma,pos:i.pos,confidence:i.confidence}:null}},{name:`word_bigrams`,run(e,t,n){if(!n.bigrams||e.length===0)return null;let r=[];for(let i of e){let e=0;if(t.prevWord){let r=t.prevLemmas||n.lemmatizer.lemmatize(t.prevWord);for(let t of r){let r=n.bigrams.freq(t,i.lemma);r>0&&(e+=Math.log(r+1)*n.leftWeight)}}if(t.nextWord){let r=t.nextLemmas||n.lemmatizer.lemmatize(t.nextWord);for(let t of r){let r=n.bigrams.freq(i.lemma,t);r>0&&(e+=Math.log(r+1)*n.rightWeight)}}r.push({candidate:i,score:e})}if(r.sort((e,t)=>t.score-e.score),r.length>0&&r[0].score>0){let e=r[0].score,t=r.reduce((e,t)=>e+Math.exp(t.score),0),n=t>0?Math.exp(e)/t:.5;return{lemma:r[0].candidate.lemma,pos:r[0].candidate.pos,confidence:n}}return null}},{name:`fallback`,run(e){return e.length>0?{lemma:e[0].lemma,pos:e[0].pos,confidence:1/e.length}:null}}];var O=class{lemmatizer;bigrams;leftWeight;rightWeight;usePreferenceRules;useGrammarRules;constructor(e,t=null,n={}){this.lemmatizer=e,this.bigrams=t,this.leftWeight=n.leftWeight??1,this.rightWeight=n.rightWeight??1,this.usePreferenceRules=n.usePreferenceRules??!0,this.useGrammarRules=n.useGrammarRules??!0}disambiguate(e,t,n,r={}){let i;i=this.lemmatizer.lemmatizeWithPOS?this.lemmatizer.lemmatizeWithPOS(e):this.lemmatizer.lemmatize(e).map(e=>({lemma:e,pos:`no`}));let a=i.map(e=>e.lemma),o=e,s;n&&this.lemmatizer.lemmatizeWithMorph&&(s=this.lemmatizer.lemmatizeWithMorph(n));let c={prevWord:t,nextWord:n,prevLemmas:r.prevLemmas,nextLemmas:r.nextLemmas,nextWordMorph:s,allTokens:[e],index:0};for(let e of D){let t=e.run(i,c,this);if(t)return{token:o,lemma:t.lemma,pos:t.pos,candidates:a,candidatesWithPOS:i,ambiguous:a.length>1,confidence:t.confidence,resolvedBy:e.name}}return{token:o,lemma:e.toLowerCase(),candidates:a,candidatesWithPOS:i,ambiguous:!1,confidence:0,resolvedBy:`none`}}disambiguateAll(e){let t=[];for(let n=0;n<e.length;n++){let r=e[n],i=n>0?e[n-1]:null,a=n<e.length-1?e[n+1]:null;t.push(this.disambiguate(r,i,a))}return t}extractLemmas(e){let t=new Set,n=this.disambiguateAll(e);for(let e of n)t.add(e.lemma);return t}};function k(e,n,r,i={}){let{tokenize:a,removeStopwords:o}=i,s=a?a(e):e.split(/\s+/).filter(e=>e.length>0).map(e=>e.replace(/^[^\p{L}\p{N}]+|[^\p{L}\p{N}]+$/gu,``)).filter(e=>e.length>0),c=new O(n,r).extractLemmas(s);if(o)for(let e of c)t.has(e)&&c.delete(e);return c}const A={no:`noun`,so:`verb`,lo:`adjective`,ao:`adverb`,fs:`preposition`,fn:`pronoun`,st:`conjunction`,to:`numeral`,gr:`article`,uh:`interjection`},j={no:`nafnorð`,so:`sagnorð`,lo:`lýsingarorð`,ao:`atviksorð`,fs:`forsetning`,fn:`fornafn`,st:`samtenging`,to:`töluorð`,gr:`greinir`,uh:`upphrópun`},M={nf:`nominative`,þf:`accusative`,þgf:`dative`,ef:`genitive`},N={kk:`masculine`,kvk:`feminine`,hk:`neuter`},P={et:`singular`,ft:`plural`},F=new Set(`ísland.england.írland.skotland.finnland.grænland.holland.þýskaland.frakkland.pólland.tékkland.svissland.rússland.eistland.lettland.litháen.danmörk.noregur.svíþjóð.bandaríkin.spánn.portúgal.ítalía.grikkland.þingvellir.akureyri.ísafjörður.reykjavík.keflavík.hafnarfjörður.kópavogur.seltjarnarnes.garðabær.mosfellsbær.vestmannaeyjar.húsavík.sauðárkrókur.siglufjörður.ólafsfjörður.dalvík.egilsstaðir.neskaupstaður.seyðisfjörður.eskifjörður.reyðarfjörður.fáskrúðsfjörður.stöðvarfjörður.djúpivogur.höfn.vík.selfoss.hveragerði.þorlákshöfn.grindavík.sandgerði.borgarnes.stykkishólmur.grundarfjörður.ólafsvík.búðardalur.patreksfjörður.flateyri.suðureyri.bolungarvík.hólmavík.hvammstangi.blönduós.skagaströnd.varmahlíð.hlíðarendi.bergþórshvol.íslandsbanki.landsbankinn.arionbanki.alþingi`.split(`.`)),I=new Set(`maður.kona.stjóri.ráðherra.forseti.formaður.fulltrúi.starfsmaður.hús.staður.vegur.borg.bær.dalur.fjörður.félag.banki.sjóður.stofnun.ráð.rannsókn.greiðsla.mál.kerfi.verk.þjónusta.rekstur.viðskipti.verð.kostnaður`.split(`.`)),L=new Set([`vera`,`hafa`,`gera`,`fara`,`koma`,`segja`,`vilja`,`mega`,`þurfa`,`verða`,`geta`,`sjá`,`taka`,`eiga`,`láta`,`halda`,`leyfa`,`búa`]),R=[`s`,`u`,`a`];var z=class{lemmatizer;minPartLength;tryLinkingLetters;knownLemmas;mode;constructor(e,t,n={}){this.lemmatizer=e,this.knownLemmas=t,this.minPartLength=n.minPartLength??3,this.tryLinkingLetters=n.tryLinkingLetters??!0,this.mode=n.mode??`balanced`}noSplit(e,t){return{word:e,parts:t,indexTerms:t,confidence:0,isCompound:!1}}split(e){let t=e.toLowerCase(),n=this.lemmatizer.lemmatize(e),r=n[0]?.toLowerCase();if(r&&F.has(r)||F.has(t))return this.noSplit(e,n);let i=n.length>0&&n[0].toLowerCase()!==t,a=n.length===1;if(this.mode===`conservative`)return e.includes(`-`)?this.splitAtHyphen(e,n):this.noSplit(e,n);if(this.mode===`balanced`&&i&&a&&t.length<12||t.length<this.minPartLength*2)return this.noSplit(e,n);let o=[];for(let e=this.minPartLength;e<=t.length-this.minPartLength;e++){let n=t.slice(0,e),r=t.slice(e),i=this.trySplit(n,r);if(i&&o.push(i),this.tryLinkingLetters){for(let e of R)if(n.endsWith(e)&&n.length>this.minPartLength){let e=n.slice(0,-1),t=this.trySplit(e,r);t&&o.push({...t,score:t.score*.95})}}}if(o.length===0)return this.noSplit(e,n);o.sort((e,t)=>t.score-e.score);let s=o[0];if(this.mode===`balanced`&&i&&s.score<.6)return this.noSplit(e,n);let c=[...new Set([...s.leftParts,...s.rightParts])];return{word:e,parts:c,indexTerms:[...new Set([...c,t])],confidence:Math.min(s.score,1),isCompound:!0}}splitAtHyphen(e,t){let n=e.split(`-`).filter(e=>e.length>0);if(n.length<2)return this.noSplit(e,t);let r=[];for(let e of n){let t=this.lemmatizer.lemmatize(e);r.push(...t)}let i=[...new Set(r)];return{word:e,parts:i,indexTerms:[...new Set([...i,e.toLowerCase()])],confidence:.9,isCompound:!0}}trySplit(e,t){let n=this.lemmatizer.lemmatize(e),r=this.lemmatizer.lemmatize(t),i=[...new Set(n.filter(e=>this.knownLemmas.has(e)))],a=[...new Set(r.filter(e=>this.knownLemmas.has(e)))];if(i.length===0||a.length===0)return null;let o=0,s=1-Math.abs(e.length-t.length)/(e.length+t.length);o+=s*.2;let c=(e.length+t.length)/2,l=Math.min(c/6,1);o+=l*.2,a.some(e=>I.has(e))&&(o+=.3);let u=i.some(e=>L.has(e)),d=a.some(e=>L.has(e));return u&&d?o-=.3:!u&&!d&&(o+=.2),(e.length<4||t.length<4)&&(o-=.15),{leftParts:i,rightParts:a,score:Math.max(0,o)}}getAllLemmas(e){return this.split(e).indexTerms}};function B(e){return new Set(e.map(e=>e.toLowerCase()))}const V=new Map([[`til dæmis`,{lemma:`til dæmi`,isStopword:!0,pos:`ao`}],[`með öðrum orðum`,{lemma:`með annar orð`,isStopword:!0,pos:`ao`}],[`í raun`,{lemma:`í raun`,isStopword:!0,pos:`ao`}],[`í raun og veru`,{lemma:`í raun og vera`,isStopword:!0,pos:`ao`}],[`af og til`,{lemma:`af og til`,isStopword:!0,pos:`ao`}],[`aftur á móti`,{lemma:`aftur á mót`,isStopword:!0,pos:`ao`}],[`alla vega`,{lemma:`allur vegur`,isStopword:!0,pos:`ao`}],[`alls ekki`,{lemma:`alls ekki`,isStopword:!0,pos:`ao`}],[`alls staðar`,{lemma:`allur staður`,isStopword:!0,pos:`ao`}],[`allt í allt`,{lemma:`allur í allur`,isStopword:!0,pos:`ao`}],[`annars vegar`,{lemma:`annar vegur`,isStopword:!0,pos:`ao`}],[`auk þess`,{lemma:`auk það`,isStopword:!0,pos:`ao`}],[`að auki`,{lemma:`að auki`,isStopword:!0,pos:`ao`}],[`að vísu`,{lemma:`að vís`,isStopword:!0,pos:`ao`}],[`að sjálfsögðu`,{lemma:`að sjálfsagður`,isStopword:!0,pos:`ao`}],[`að minnsta kosti`,{lemma:`að lítill kostur`,isStopword:!0,pos:`ao`}],[`að öllu leyti`,{lemma:`að allur leyti`,isStopword:!0,pos:`ao`}],[`að nokkru leyti`,{lemma:`að nokkur leyti`,isStopword:!0,pos:`ao`}],[`ef til vill`,{lemma:`ef til vilja`,isStopword:!0,pos:`ao`}],[`einhvers staðar`,{lemma:`einhver staður`,isStopword:!0,pos:`ao`}],[`einhvern veginn`,{lemma:`einhver vegur`,isStopword:!0,pos:`ao`}],[`ekki síst`,{lemma:`ekki síður`,isStopword:!0,pos:`ao`}],[`engu að síður`,{lemma:`enginn að síður`,isStopword:!0,pos:`ao`}],[`fyrst og fremst`,{lemma:`snemma og fremri`,isStopword:!0,pos:`ao`}],[`hins vegar`,{lemma:`hinn vegur`,isStopword:!0,pos:`ao`}],[`hér og þar`,{lemma:`hér og þar`,isStopword:!0,pos:`ao`}],[`hér um bil`,{lemma:`hér um bil`,isStopword:!0,pos:`ao`}],[`hér á landi`,{lemma:`hér á land`,isStopword:!0,pos:`ao`}],[`hvað mest`,{lemma:`hvað mjög`,isStopword:!0,pos:`ao`}],[`hverju sinni`,{lemma:`hver sinn`,isStopword:!0,pos:`ao`}],[`hvorki né`,{lemma:`hvorki né`,isStopword:!0,pos:`ao`}],[`í burtu`,{lemma:`í burtu`,isStopword:!0,pos:`ao`}],[`í gær`,{lemma:`í gær`,isStopword:!0,pos:`ao`}],[`í senn`,{lemma:`í senn`,isStopword:!0,pos:`ao`}],[`í sífellu`,{lemma:`í sífella`,isStopword:!0,pos:`ao`}],[`lengi vel`,{lemma:`lengi vel`,isStopword:!0,pos:`ao`}],[`meira að segja`,{lemma:`mikill að segja`,isStopword:!0,pos:`ao`}],[`meira og minna`,{lemma:`mikill og lítill`,isStopword:!0,pos:`ao`}],[`meðal annars`,{lemma:`meðal annar`,isStopword:!0,pos:`ao`}],[`nokkurn veginn`,{lemma:`nokkur vegur`,isStopword:!0,pos:`ao`}],[`og svo framvegis`,{lemma:`og svo framvegis`,isStopword:!0,pos:`ao`}],[`satt að segja`,{lemma:`sannur að segja`,isStopword:!0,pos:`ao`}],[`sem betur fer`,{lemma:`sem vel fara`,isStopword:!0,pos:`ao`}],[`smám saman`,{lemma:`smátt saman`,isStopword:!0,pos:`ao`}],[`svo sem`,{lemma:`svo sem`,isStopword:!0,pos:`ao`}],[`sér í lagi`,{lemma:`sér í lag`,isStopword:!0,pos:`ao`}],[`til og frá`,{lemma:`til og frá`,isStopword:!0,pos:`ao`}],[`til baka`,{lemma:`til baka`,isStopword:!0,pos:`ao`}],[`vítt og breitt`,{lemma:`vítt og breitt`,isStopword:!0,pos:`ao`}],[`á ný`,{lemma:`á ný`,isStopword:!0,pos:`ao`}],[`á meðan`,{lemma:`á meðan`,isStopword:!0,pos:`ao`}],[`á sama tíma`,{lemma:`á samur tími`,isStopword:!0,pos:`ao`}],[`á hinn bóginn`,{lemma:`á hinn bógur`,isStopword:!0,pos:`ao`}],[`þar af leiðandi`,{lemma:`þar af leiða`,isStopword:!0,pos:`ao`}],[`þar að auki`,{lemma:`þar að auki`,isStopword:!0,pos:`ao`}],[`það er að segja`,{lemma:`það vera að segja`,isStopword:!0,pos:`ao`}],[`þess vegna`,{lemma:`það vegna`,isStopword:!0,pos:`ao`}],[`því miður`,{lemma:`það lítt`,isStopword:!0,pos:`ao`}],[`þrátt fyrir`,{lemma:`þrátt fyrir`,isStopword:!0,pos:`ao`}],[`á dögunum`,{lemma:`á dagur`,isStopword:!0,pos:`ao`}],[`á sínum tíma`,{lemma:`á sinn tími`,isStopword:!0,pos:`ao`}],[`á endanum`,{lemma:`á endi`,isStopword:!0,pos:`ao`}],[`einu sinni`,{lemma:`einn sinn`,isStopword:!1,pos:`ao`}],[`eitt sinn`,{lemma:`einn sinn`,isStopword:!1,pos:`ao`}],[`í fyrsta sinn`,{lemma:`í fyrstur sinn`,isStopword:!1,pos:`ao`}],[`í kvöld`,{lemma:`í kvöld`,isStopword:!1,pos:`ao`}],[`í morgun`,{lemma:`í morgunn`,isStopword:!1,pos:`ao`}],[`á morgun`,{lemma:`á morgunn`,isStopword:!1,pos:`ao`}],[`fyrir hönd`,{lemma:`fyrir hönd`,isStopword:!1,pos:`fs`}],[`með tilliti til`,{lemma:`með tillit til`,isStopword:!1,pos:`fs`}],[`í ljósi`,{lemma:`í ljós`,isStopword:!1,pos:`fs`}],[`í stað`,{lemma:`í staður`,isStopword:!1,pos:`fs`}],[`fyrir aftan`,{lemma:`fyrir aftan`,isStopword:!1,pos:`fs`}],[`fyrir austan`,{lemma:`fyrir austan`,isStopword:!1,pos:`fs`}],[`fyrir framan`,{lemma:`fyrir framan`,isStopword:!1,pos:`fs`}],[`fyrir handan`,{lemma:`fyrir handan`,isStopword:!1,pos:`fs`}],[`fyrir innan`,{lemma:`fyrir innan`,isStopword:!1,pos:`fs`}],[`fyrir neðan`,{lemma:`fyrir neðan`,isStopword:!1,pos:`fs`}],[`fyrir norðan`,{lemma:`fyrir norðan`,isStopword:!1,pos:`fs`}],[`fyrir ofan`,{lemma:`fyrir ofan`,isStopword:!1,pos:`fs`}],[`fyrir sunnan`,{lemma:`fyrir sunnan`,isStopword:!1,pos:`fs`}],[`fyrir utan`,{lemma:`fyrir utan`,isStopword:!1,pos:`fs`}],[`fyrir vestan`,{lemma:`fyrir vestan`,isStopword:!1,pos:`fs`}],[`í gegnum`,{lemma:`í gegnum`,isStopword:!1,pos:`fs`}],[`í kringum`,{lemma:`í kringum`,isStopword:!1,pos:`fs`}],[`innan við`,{lemma:`innan við`,isStopword:!1,pos:`fs`}],[`upp úr`,{lemma:`upp úr`,isStopword:!1,pos:`fs`}],[`þvert á`,{lemma:`þvert á`,isStopword:!1,pos:`fs`}],[`þar eð`,{lemma:`þar eð`,isStopword:!0,pos:`st`}],[`sameinuðu þjóðirnar`,{lemma:`Sameinuðu þjóðirnar`,isStopword:!1,pos:`entity`}],[`evrópusambandið`,{lemma:`Evrópusambandið`,isStopword:!1,pos:`entity`}],[`nato`,{lemma:`NATO`,isStopword:!1,pos:`entity`}],[`nató`,{lemma:`NATO`,isStopword:!1,pos:`entity`}]]);function H(e,t){for(let n=Math.min(4,e.length-t);n>=2;n--){let r=e.slice(t,t+n).join(` `).toLowerCase(),i=V.get(r);if(i)return{phrase:i,wordCount:n}}return null}function U(e){return V.has(e.toLowerCase())}function W(e){return V.get(e.toLowerCase())}const G=new Set([`word`]),K=new Set([`person`,`company`,`entity`]),q=new Set([`punctuation`,`s_begin`,`s_end`,`s_split`,`unknown`]);function J(t,n,r={}){let{bigrams:i,compoundSplitter:a,includeNumbers:o=!1,alwaysTryCompounds:s=!0}=r,c=e(t),l=[],u=[],d=new Map,f=e=>{let t=e.toLowerCase(),r=d.get(t);if(r)return r;let i=n.lemmatize(e);return d.set(t,i),i};for(let e=0;e<c.length;e++){let t=c[e];if(!q.has(t.kind)){if(K.has(t.kind)){l.push({original:t.text??``,kind:t.kind,lemmas:[],isEntity:!0});continue}if(t.kind===`number`||t.kind===`ordinal`){o&&l.push({original:t.text??``,kind:t.kind,lemmas:[],isEntity:!1});continue}if(G.has(t.kind)){let e=t.text??``,n=f(e),r={original:e,kind:t.kind,lemmas:n,isEntity:!1},i=n.length===1&&n[0]===e.toLowerCase();if(a&&(s||i)){let t=a.split(e);if(t.isCompound){r.compoundSplit=t;let e=t.parts.flatMap(e=>f(e));r.compoundLemmas=e,r.lemmas=[...new Set([...n,...e])]}}l.push(r),u.push({index:l.length-1,token:t});continue}l.push({original:t.text??``,kind:t.kind,lemmas:[],isEntity:!1})}}if(i&&u.length>0){let e=new O(n,i);for(let t=0;t<u.length;t++){let{index:n,token:r}=u[t],i=t>0?u[t-1].token:null,a=t<u.length-1?u[t+1].token:null,o=e.disambiguate(r.text??``,i?.text??null,a?.text??null,{prevLemmas:i?.text?f(i.text):void 0,nextLemmas:a?.text?f(a.text):void 0});l[n].disambiguated=o.lemma,l[n].confidence=o.confidence}}else for(let{index:e}of u){let t=l[e];t.lemmas.length>0&&(t.disambiguated=t.lemmas[0],t.confidence=t.lemmas.length===1?1:.5)}return l}function Y(e,n,r={}){let{removeStopwords:a=!1,indexAllCandidates:o=!0,useContextualStopwords:s=!1}=r,c=J(e,n,r),l=new Set,u=(e,n)=>a?s?i(e,n):t.has(e):!1;for(let e of c)if(!e.isEntity){if(o)for(let t of e.lemmas)u(t)||l.add(t);else e.disambiguated&&(u(e.disambiguated)||l.add(e.disambiguated));if(e.compoundSplit?.isCompound){let t=e.compoundLemmas?e.compoundLemmas:e.compoundSplit.parts.flatMap(e=>n.lemmatize(e));for(let e of t)u(e)||l.add(e)}}return l}function X(e,t,n,r={}){let i=performance.now(),a,o;switch(n){case`naive`:{let n=e.split(/\s+/).filter(e=>e.length>0),r=[];for(let e of n){let n=e.replace(/^[^\p{L}\p{N}]+|[^\p{L}\p{N}]+$/gu,``);if(n){let e=t.lemmatize(n);r.push({original:n,kind:`word`,lemmas:e,isEntity:!1,disambiguated:e[0],confidence:e.length===1?1:.5})}}a=r,o=new Set(r.map(e=>e.disambiguated).filter(Boolean));break}case`tokenized`:a=J(e,t),o=new Set(a.filter(e=>e.kind===`word`&&e.lemmas.length>0).map(e=>e.lemmas[0]));break;case`disambiguated`:a=J(e,t,{bigrams:r.bigrams}),o=Y(e,t,{bigrams:r.bigrams});break;case`full`:a=J(e,t,{bigrams:r.bigrams,compoundSplitter:r.compoundSplitter}),o=Y(e,t,{bigrams:r.bigrams,compoundSplitter:r.compoundSplitter});break}let s=performance.now()-i,c=a.filter(e=>e.kind===`word`),l=c.length,u=c.filter(e=>e.lemmas.length>0&&!(e.lemmas.length===1&&e.lemmas[0]===e.original.toLowerCase())).length,d=c.filter(e=>e.lemmas.length>1).length,f=c.filter(e=>e.confidence!==void 0).map(e=>e.confidence),p=f.length>0?f.reduce((e,t)=>e+t,0)/f.length:0,m=c.filter(e=>e.compoundSplit?.isCompound).length,h=a.filter(e=>e.isEntity).length;return{wordCount:l,lemmatizedCount:u,coverage:l>0?u/l:0,ambiguousCount:d,ambiguityRate:l>0?d/l:0,avgConfidence:p,compoundsFound:m,entitiesSkipped:h,uniqueLemmas:o.size,timeMs:s}}export{d as BinaryLemmatizer,M as CASE_NAMES,r as CONTEXTUAL_STOPWORDS,z as CompoundSplitter,f as DISAMBIGUATION_RULES,O as Disambiguator,N as GENDER_NAMES,g as NOMINATIVE_PRONOUNS,P as NUMBER_NAMES,h as PREPOSITION_CASES,F as PROTECTED_LEMMAS,V as STATIC_PHRASES,t as STOPWORDS_IS,A as WORD_CLASS_NAMES,j as WORD_CLASS_NAMES_IS,x as applyGrammarRules,b as applyNounAfterPrepositionRule,v as applyPrepositionRule,y as applyPronounVerbRule,_ as canGovernCase,B as createKnownLemmaSet,k as extractDisambiguatedLemmas,Y as extractIndexableLemmas,C as getGovernedCases,W as getPhraseInfo,p as getRulesForWord,m as hasDisambiguationRules,i as isContextualStopword,U as isKnownPhrase,S as isKnownPreposition,n as isStopword,H as matchPhrase,J as processText,a as removeStopwords,X as runBenchmark};
|
|
1
|
+
import{tokenize as e}from"tokenize-is";const t=new Set(`á.að.aðra.aðrar.aðrir.af.alla.allan.allar.allir.allnokkra.allnokkrar.allnokkrir.allnokkru.allnokkrum.allnokkuð.allnokkur.allnokkurn.allnokkurra.allnokkurrar.allnokkurri.allnokkurs.allnokkurt.allra.allrar.allri.alls.allt.alltað.allur.án.andspænis.annað.annaðhvort.annan.annar.annarra.annarrar.annarri.annars.árla.ásamt.auk.austan.austanundir.austur.báða.báðar.báðir.báðum.bæði.bak.beggja.eða.eður.ef.eftir.ég.ein.eina.einar.einhver.einhverja.einhverjar.einhverjir.einhverju.einhverjum.einhvern.einhverra.einhverrar.einhverri.einhvers.einir.einn.einna.einnar.einni.eins.einskis.einu.einum.eitt.eitthvað.eitthvert.ekkert.ella.ellegar.en.enda.enga.engan.engar.engin.enginn.engir.engra.engrar.engri.engu.engum.er.fáein.fáeina.fáeinar.fáeinir.fáeinna.fáeinum.fjær.fjarri.flestalla.flestallan.flestallar.flestallir.flestallra.flestallrar.flestallri.flestalls.flestallt.flestallur.flestöll.flestöllu.flestöllum.frá.fram.fyrir.fyrst.gagnstætt.gagnvart.gegn.gegnt.gegnum.hana.handa.handan.hann.hans.heldur.hennar.henni.hið.hin.hina.hinar.hinir.hinn.hinna.hinnar.hinni.hins.hinu.hinum.hitt.hjá.honum.hún.hvað.hvaða.hvenær.hver.hverja.hverjar.hverjir.hverju.hverjum.hvern.hverra.hverrar.hverri.hvers.hvert.hvílík.hvílíka.hvílíkan.hvílíkar.hvílíkir.hvílíkra.hvílíkrar.hvílíkri.hvílíks.hvílíkt.hvílíku.hvílíkum.hvílíkur.hvor.hvora.hvorar.hvorir.hvorki.hvorn.hvorra.hvorrar.hvorri.hvors.hvort.hvoru.hvorug.hvoruga.hvorugan.hvorugar.hvorugir.hvorugra.hvorugrar.hvorugri.hvorugs.hvorugt.hvorugu.hvorugum.hvorugur.hvorum.í.inn.innan.innanundir.jafnframt.jafnhliða.kring.kringum.með.meðal.meðan.meður.mér.mestalla.mestallan.mestallar.mestallir.mestallra.mestallrar.mestallri.mestalls.mestallt.mestallur.mestöll.mestöllu.mestöllum.miðli.mig.milli.millum.mín.mína.mínar.mínir.minn.minna.minnar.minni.míns.mínu.mínum.mitt.mót.móti.nær.nærri.næst.næstum.nálægt.né.neðan.nein.neina.neinar.neinir.neinn.neinna.neinnar.neinni.neins.neinu.neinum.neitt.nema.niður.nokkra.nokkrar.nokkrir.nokkru.nokkrum.nokkuð.nokkur.nokkurn.nokkurra.nokkurrar.nokkurri.nokkurs.nokkurt.norðan.nú.öðru.öðrum.of.ofan.ofar.og.óháð.okkar.okkur.öll.öllu.öllum.önnur.órafjarri.oss.sá.sakir.sama.saman.samar.samfara.samhliða.sami.samir.samkvæmt.samra.samrar.samri.sams.samskipa.samt.samtímis.samur.sem.sér.sérhvað.sérhver.sérhverja.sérhverjar.sérhverjir.sérhverju.sérhverjum.sérhvern.sérhverra.sérhverrar.sérhverri.sérhvers.sérhvert.síðan.síðla.sig.sín.sína.sínar.sínhver.sínhverja.sínhverjar.sínhverjir.sínhverju.sínhverjum.sínhvern.sínhverra.sínhverrar.sínhverri.sínhvers.sínhvert.sínhvor.sínhvora.sínhvorar.sínhvorir.sínhvorn.sínhvorra.sínhvorrar.sínhvorri.sínhvors.sínhvort.sínhvoru.sínhvorum.sínir.sinn.sinna.sinnar.sinnhver.sinnhverja.sinnhverjar.sinnhverjir.sinnhverju.sinnhverjum.sinnhvern.sinnhverra.sinnhverrar.sinnhverri.sinnhvers.sinnhvert.sinnhvor.sinnhvora.sinnhvorar.sinnhvorir.sinnhvorn.sinnhvorra.sinnhvorrar.sinnhvorri.sinnhvors.sinnhvort.sinnhvoru.sinnhvorum.sinni.síns.sínu.sínum.sitt.sitthvað.sitthver.sitthverja.sitthverjar.sitthverjir.sitthverju.sitthverjum.sitthvern.sitthverra.sitthverrar.sitthverri.sitthvers.sitthvert.sitthvor.sitthvora.sitthvorar.sitthvorir.sitthvorn.sitthvorra.sitthvorrar.sitthvorri.sitthvors.sitthvort.sitthvoru.sitthvorum.sjálf.sjálfa.sjálfan.sjálfar.sjálfir.sjálfra.sjálfrar.sjálfri.sjálfs.sjálft.sjálfu.sjálfum.sjálfur.slík.slíka.slíkan.slíkar.slíkir.slíkra.slíkrar.slíkri.slíks.slíkt.slíku.slíkum.slíkur.snemma.sökum.söm.sömu.sömum.sú.sum.suma.suman.sumar.sumir.sumra.sumrar.sumri.sums.sumt.sumu.sumum.sumur.sunnan.svo.til.tráss.um.umfram.umhverfis.undan.undir.uns.upp.úr.út.utan.útundan.vegna.vér.vestan.vestur.vettugi.við.viður.vor.vora.vorar.vorir.vorn.vorra.vorrar.vorri.vors.vort.voru.vorum.yðar.yður.yfir.ykkar.ykkur.ýmis.ýmiss.ýmissa.ýmissar.ýmissi.ýmist.ýmsa.ýmsan.ýmsar.ýmsir.ýmsu.ýmsum.þá.það.þær.þann.þar.þau.þegar.þeim.þeir.þeirra.þeirrar.þeirri.þennan.þér.þess.þessa.þessar.þessara.þessarar.þessari.þessi.þessir.þessu.þessum.þetta.þið.þig.þín.þína.þínar.þínir.þinn.þinna.þinnar.þinni.þíns.þínu.þínum.þitt.þó.þónokkra.þónokkrar.þónokkrir.þónokkru.þónokkrum.þónokkuð.þónokkur.þónokkurn.þónokkurra.þónokkurrar.þónokkurri.þónokkurs.þónokkurt.þótt.þú.því.þvílík.þvílíka.þvílíkan.þvílíkar.þvílíkir.þvílíkra.þvílíkrar.þvílíkri.þvílíks.þvílíkt.þvílíku.þvílíkum.þvílíkur`.split(`.`));function n(e){return t.has(e.toLowerCase())}const r=new Map([[`á`,new Set([`fs`,`ao`])],[`við`,new Set([`fs`,`fn`])],[`af`,new Set([`fs`,`ao`])],[`til`,new Set([`fs`])],[`um`,new Set([`fs`])],[`frá`,new Set([`fs`])],[`yfir`,new Set([`fs`,`ao`])],[`undir`,new Set([`fs`,`ao`])],[`fyrir`,new Set([`fs`,`ao`])],[`eftir`,new Set([`fs`,`ao`])],[`gegn`,new Set([`fs`])],[`hjá`,new Set([`fs`])],[`úr`,new Set([`fs`])],[`í`,new Set([`fs`])]]);function i(e,n){let i=e.toLowerCase(),a=r.get(i);return a&&n?a.has(n):t.has(i)}function a(e){return e.filter(e=>!n(e))}const o=1279610177,s=[`no`,`so`,`lo`,`ao`,`fs`,`fn`,`st`,`to`,`gr`,`uh`],c=[void 0,`nf`,`þf`,`þgf`,`ef`],l=[void 0,`kk`,`kvk`,`hk`],u=[`et`,`ft`];var d=class e{buffer;stringPool;lemmaOffsets;lemmaLengths;wordOffsets;wordLengths;entryOffsets;entries;bigramW1Offsets;bigramW1Lengths;bigramW2Offsets;bigramW2Lengths;bigramFreqs;lemmaCount;wordCount;entryCount;bigramCount;version;decoder=new TextDecoder(`utf-8`);constructor(e){this.buffer=e;let t=new DataView(e),n=t.getUint32(0,!0);if(n!==o)throw Error(`Invalid binary format: expected magic 0x${o.toString(16)}, got 0x${n.toString(16)}`);if(this.version=t.getUint32(4,!0),this.version!==1&&this.version!==2)throw Error(`Unsupported version: ${this.version}`);let r=t.getUint32(8,!0);this.lemmaCount=t.getUint32(12,!0),this.wordCount=t.getUint32(16,!0),this.entryCount=t.getUint32(20,!0),this.bigramCount=t.getUint32(24,!0);let i=32;this.stringPool=new Uint8Array(e,i,r),i+=r,this.lemmaOffsets=new Uint32Array(e,i,this.lemmaCount),i+=this.lemmaCount*4,this.lemmaLengths=new Uint8Array(e,i,this.lemmaCount),i+=this.lemmaCount,i=i+3&-4,this.wordOffsets=new Uint32Array(e,i,this.wordCount),i+=this.wordCount*4,this.wordLengths=new Uint8Array(e,i,this.wordCount),i+=this.wordCount,i=i+3&-4,this.entryOffsets=new Uint32Array(e,i,this.wordCount+1),i+=(this.wordCount+1)*4,this.entries=new Uint32Array(e,i,this.entryCount),i+=this.entryCount*4,this.bigramW1Offsets=new Uint32Array(e,i,this.bigramCount),i+=this.bigramCount*4,this.bigramW1Lengths=new Uint8Array(e,i,this.bigramCount),i+=this.bigramCount,i=i+3&-4,this.bigramW2Offsets=new Uint32Array(e,i,this.bigramCount),i+=this.bigramCount*4,this.bigramW2Lengths=new Uint8Array(e,i,this.bigramCount),i+=this.bigramCount,i=i+3&-4,this.bigramFreqs=new Uint32Array(e,i,this.bigramCount)}static async load(t,n={}){let r=await(n.fetch??fetch)(t);if(!r.ok)throw Error(`Failed to load binary data: ${r.status}`);return new e(await r.arrayBuffer())}static loadFromBuffer(t){return new e(t)}getString(e,t){return this.decoder.decode(this.stringPool.subarray(e,e+t))}getLemma(e){return this.getString(this.lemmaOffsets[e],this.lemmaLengths[e])}getWord(e){return this.getString(this.wordOffsets[e],this.wordLengths[e])}findWord(e){let t=0,n=this.wordCount-1;for(;t<=n;){let r=t+n>>>1,i=this.getWord(r);if(i===e)return r;i<e?t=r+1:n=r-1}return-1}lemmatize(e,t={}){let n=e.toLowerCase(),r=this.findWord(n);if(r===-1)return[n];let i=this.entryOffsets[r],a=this.entryOffsets[r+1],{wordClass:o}=t,c=new Set,l=[];for(let e=i;e<a;e++){let{lemmaIdx:t,posCode:n}=this.unpackEntry(this.entries[e]),r=s[n];if(o&&r!==o)continue;let i=this.getLemma(t);c.has(i)||(c.add(i),l.push(i))}return l.length===0?[n]:l}unpackEntry(e){return this.version===1?{lemmaIdx:e>>>4,posCode:e&15,caseCode:0,genderCode:0,numberCode:0}:{lemmaIdx:e>>>10,posCode:e&15,caseCode:e>>>4&7,genderCode:e>>>7&3,numberCode:e>>>9&1}}lemmatizeWithPOS(e){let t=e.toLowerCase(),n=this.findWord(t);if(n===-1)return[];let r=this.entryOffsets[n],i=this.entryOffsets[n+1],a=new Set,o=[];for(let e=r;e<i;e++){let{lemmaIdx:t,posCode:n}=this.unpackEntry(this.entries[e]),r=this.getLemma(t),i=s[n]??``,c=`${r}:${i}`;a.has(c)||(a.add(c),o.push({lemma:r,pos:i}))}return o}lemmatizeWithMorph(e){let t=e.toLowerCase(),n=this.findWord(t);if(n===-1)return[];let r=this.entryOffsets[n],i=this.entryOffsets[n+1],a=[];for(let e=r;e<i;e++){let{lemmaIdx:t,posCode:n,caseCode:r,genderCode:i,numberCode:o}=this.unpackEntry(this.entries[e]),d={},f=c[r],p=l[i],m=u[o];f&&(d.case=f),p&&(d.gender=p),m&&(d.number=m),a.push({lemma:this.getLemma(t),pos:s[n]??``,morph:Object.keys(d).length>0?d:void 0})}return a}hasMorphFeatures(){return this.version>=2}getVersion(){return this.version}findBigram(e,t){let n=0,r=this.bigramCount-1;for(;n<=r;){let i=n+r>>>1,a=this.getString(this.bigramW1Offsets[i],this.bigramW1Lengths[i]);if(a<e)n=i+1;else if(a>e)r=i-1;else{let e=this.getString(this.bigramW2Offsets[i],this.bigramW2Lengths[i]);if(e===t)return i;e<t?n=i+1:r=i-1}}return-1}bigramFreq(e,t){let n=this.findBigram(e.toLowerCase(),t.toLowerCase());return n===-1?0:this.bigramFreqs[n]}freq(e,t){return this.bigramFreq(e,t)}isKnown(e){return this.findWord(e.toLowerCase())!==-1}get lemmaCountValue(){return this.lemmaCount}get wordFormCount(){return this.wordCount}get bigramCountValue(){return this.bigramCount}get bufferSize(){return this.buffer.byteLength}getAllLemmas(){let e=[];for(let t=0;t<this.lemmaCount;t++)e.push(this.getLemma(t));return e}};const f=[{word:`á`,prefer:`so`,over:`fs`,context:`after_pronoun`,description:`á after pronoun = verb 'eiga' (I own, you own)`},{word:`á`,prefer:`fs`,over:`so`,context:`before_noun`,description:`á before noun = preposition (on, at)`},{word:`við`,prefer:`fn`,over:`fs`,context:`sentence_start`,description:`við at sentence start = pronoun 'we'`},{word:`við`,prefer:`fs`,over:`fn`,context:`before_noun`,description:`við before noun = preposition 'by/at'`},{word:`af`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`af before noun = preposition 'of/from'`},{word:`til`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`til before noun = preposition 'to'`},{word:`um`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`um before noun = preposition 'about/around'`},{word:`yfir`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`yfir before noun = preposition 'over'`},{word:`undir`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`undir before noun = preposition 'under'`},{word:`fyrir`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`fyrir before noun = preposition 'for/before'`},{word:`eftir`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`eftir before noun = preposition 'after'`},{word:`frá`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`frá before noun = preposition 'from'`},{word:`með`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`með before noun = preposition 'with'`},{word:`í`,prefer:`fs`,over:`ao`,context:`before_noun`,description:`í before noun = preposition 'in'`},{word:`úr`,prefer:`fs`,over:`no`,context:`before_noun`,description:`úr before noun = preposition 'out of'`}];function p(e){let t=e.toLowerCase();return f.filter(e=>e.word===t)}function m(e){return f.some(t=>t.word===e.toLowerCase())}const h=new Map([[`á`,new Set([`þf`,`þgf`])],[`í`,new Set([`þf`,`þgf`])],[`við`,new Set([`þf`,`þgf`])],[`með`,new Set([`þf`,`þgf`])],[`undir`,new Set([`þf`,`þgf`])],[`yfir`,new Set([`þf`,`þgf`])],[`fyrir`,new Set([`þf`,`þgf`])],[`um`,new Set([`þf`])],[`gegnum`,new Set([`þf`])],[`kringum`,new Set([`þf`])],[`umhverfis`,new Set([`þf`])],[`af`,new Set([`þgf`])],[`frá`,new Set([`þgf`])],[`hjá`,new Set([`þgf`])],[`úr`,new Set([`þgf`])],[`að`,new Set([`þgf`])],[`móti`,new Set([`þgf`])],[`nálægt`,new Set([`þgf`])],[`gegn`,new Set([`þgf`])],[`gagnvart`,new Set([`þgf`])],[`handa`,new Set([`þgf`])],[`meðal`,new Set([`ef`])],[`til`,new Set([`ef`])],[`án`,new Set([`ef`])],[`vegna`,new Set([`ef`])],[`sakir`,new Set([`ef`])],[`utan`,new Set([`ef`])],[`innan`,new Set([`ef`])],[`meðfram`,new Set([`þgf`])],[`milli`,new Set([`ef`])],[`auk`,new Set([`ef`])],[`í stað`,new Set([`ef`])]]),g=new Set([`ég`,`þú`,`hann`,`hún`,`það`,`við`,`þið`,`þeir`,`þær`,`þau`]);function _(e,t){return t?h.get(e)?.has(t)??!1:!1}function v(e,t){let n=e.filter(e=>e.pos===`fs`);if(n.length===0)return null;for(let e of n)for(let n of t)if(n.morph?.case&&_(e.lemma,n.morph.case))return{lemma:e.lemma,pos:`fs`,rule:`prep+${n.morph.case}`,confidence:.9};return null}function y(e,t){if(!t)return null;let n=t.toLowerCase();if(!g.has(n))return null;let r=e.filter(e=>e.pos===`so`);return r.length===0||!e.some(e=>e.pos!==`so`)?null:{lemma:(r.find(e=>e.lemma===`eiga`)??r[0]).lemma,pos:`so`,rule:`pronoun+verb`,confidence:.85}}function b(e,t,n){if(!t||!n?.lemmatizeWithPOS)return null;let r=n.lemmatizeWithPOS(t),i=r.find(e=>e.pos===`fs`);if(!i)return null;let a=r.some(e=>e.pos===`fn`),o=e.some(e=>e.pos===`so`);if(a&&o)return null;let s=h.get(i.lemma);if(!s)return null;let c=e.filter(e=>e.pos===`no`);for(let e of c)if(e.morph?.case&&s.has(e.morph.case))return{lemma:e.lemma,pos:`no`,rule:`noun_after_prep+${e.morph.case}`,confidence:.9};return null}function x(e,t,n,r=null){return v(e,n)||b(e,t,r)||y(e,t)||null}function S(e){return h.has(e)}function C(e){return h.get(e)}const w={name:`unambiguous`,run(e){return e.length===1?{lemma:e[0].lemma,pos:e[0].pos,confidence:1}:null}},T={name:`preference_rules`,run(e,t,n){if(!n.usePreferenceRules)return null;for(let n of f){let r=E(n,e,t);if(r)return{lemma:r.lemma,pos:r.pos,confidence:.85}}return null}};function E(e,t,n){let r=t.find(t=>t.lemma.toLowerCase()===e.word.toLowerCase()&&t.pos===e.prefer),i=t.find(t=>t.lemma.toLowerCase()===e.word.toLowerCase()&&t.pos===e.over);if(!r||!i)return null;if(e.context===`before_noun`){let e=n.nextWord;if(e&&/^[A-ZÁÉÍÓÚÝÞÆÖ]/.test(e))return r}else if(e.context===`before_verb`){let e=n.nextWord?.toLowerCase();if(e&&![`þessi`,`þetta`,`sá`,`sú`,`það`,`hinn`,`hin`,`hið`].includes(e))return r}else if(e.context===`after_pronoun`){let e=n.prevWord?.toLowerCase();if(e&&[`ég`,`þú`,`hann`,`hún`,`það`,`við`,`þið`,`þeir`,`þær`,`þau`].includes(e))return r}return null}const D=[w,T,{name:`grammar_rules`,run(e,t,n){if(!n.useGrammarRules)return null;let r=e.map(e=>({...e,morph:void 0})),i=t.allTokens[t.index];if(i){let e=n.getMorph(i);e&&(r.length=0,r.push(...e))}let a=x(r,t.prevWord,t.nextWordMorph??[],n.lemmatizer);return a?{lemma:a.lemma,pos:a.pos,confidence:a.confidence}:null}},{name:`word_bigrams`,run(e,t,n){if(!n.bigrams||e.length===0)return null;let r=[];for(let i of e){let e=0;if(t.prevWord){let r=t.prevLemmas||n.lemmatizer.lemmatize(t.prevWord);for(let t of r){let r=n.bigrams.freq(t,i.lemma);r>0&&(e+=Math.log(r+1)*n.leftWeight)}}if(t.nextWord){let r=t.nextLemmas||n.lemmatizer.lemmatize(t.nextWord);for(let t of r){let r=n.bigrams.freq(i.lemma,t);r>0&&(e+=Math.log(r+1)*n.rightWeight)}}r.push({candidate:i,score:e})}if(r.sort((e,t)=>t.score-e.score),r.length>0&&r[0].score>0){let e=r[0].score,t=r.reduce((e,t)=>e+Math.exp(t.score),0),n=t>0?Math.exp(e)/t:.5;return{lemma:r[0].candidate.lemma,pos:r[0].candidate.pos,confidence:n}}return null}},{name:`fallback`,run(e){return e.length>0?{lemma:e[0].lemma,pos:e[0].pos,confidence:1/e.length}:null}}];var O=class{lemmatizer;bigrams;leftWeight;rightWeight;usePreferenceRules;useGrammarRules;morphCache;constructor(e,t=null,n={}){this.lemmatizer=e,this.bigrams=t,this.leftWeight=n.leftWeight??1,this.rightWeight=n.rightWeight??1,this.usePreferenceRules=n.usePreferenceRules??!0,this.useGrammarRules=n.useGrammarRules??!0,this.morphCache=this.lemmatizer.lemmatizeWithMorph?new Map:null}getMorph(e){if(!this.lemmatizer.lemmatizeWithMorph||!this.morphCache)return;let t=e.toLowerCase(),n=this.morphCache.get(t);if(n)return n;let r=this.lemmatizer.lemmatizeWithMorph(e);return this.morphCache.set(t,r),r}disambiguate(e,t,n,r={}){let i;i=this.lemmatizer.lemmatizeWithPOS?this.lemmatizer.lemmatizeWithPOS(e):this.lemmatizer.lemmatize(e).map(e=>({lemma:e,pos:`no`}));let a=i.map(e=>e.lemma),o=e,s;n&&(s=this.getMorph(n));let c={prevWord:t,nextWord:n,prevLemmas:r.prevLemmas,nextLemmas:r.nextLemmas,nextWordMorph:s,allTokens:[e],index:0};for(let e of D){let t=e.run(i,c,this);if(t)return{token:o,lemma:t.lemma,pos:t.pos,candidates:a,candidatesWithPOS:i,ambiguous:a.length>1,confidence:t.confidence,resolvedBy:e.name}}return{token:o,lemma:e.toLowerCase(),candidates:a,candidatesWithPOS:i,ambiguous:!1,confidence:0,resolvedBy:`none`}}disambiguateAll(e){let t=[];for(let n=0;n<e.length;n++){let r=e[n],i=n>0?e[n-1]:null,a=n<e.length-1?e[n+1]:null;t.push(this.disambiguate(r,i,a))}return t}extractLemmas(e){let t=new Set,n=this.disambiguateAll(e);for(let e of n)t.add(e.lemma);return t}};function k(e,n,r,i={}){let{tokenize:a,removeStopwords:o}=i,s=a?a(e):e.split(/\s+/).filter(e=>e.length>0).map(e=>e.replace(/^[^\p{L}\p{N}]+|[^\p{L}\p{N}]+$/gu,``)).filter(e=>e.length>0),c=new O(n,r).extractLemmas(s);if(o)for(let e of c)t.has(e)&&c.delete(e);return c}const A={no:`noun`,so:`verb`,lo:`adjective`,ao:`adverb`,fs:`preposition`,fn:`pronoun`,st:`conjunction`,to:`numeral`,gr:`article`,uh:`interjection`},j={no:`nafnorð`,so:`sagnorð`,lo:`lýsingarorð`,ao:`atviksorð`,fs:`forsetning`,fn:`fornafn`,st:`samtenging`,to:`töluorð`,gr:`greinir`,uh:`upphrópun`},M={nf:`nominative`,þf:`accusative`,þgf:`dative`,ef:`genitive`},N={kk:`masculine`,kvk:`feminine`,hk:`neuter`},P={et:`singular`,ft:`plural`};var F=class e{bits;sizeBits;hashCount;constructor(e,t,n){this.bits=e,this.sizeBits=t,this.hashCount=n}static fromValues(t,n={}){let r=Math.max(t.length,1),i=n.falsePositiveRate??.01,a=Math.max(1,Math.ceil(-r*Math.log(i)/(Math.LN2*Math.LN2))),o=Math.max(1,Math.round(a/r*Math.LN2)),s=n.maxHashFunctions?Math.min(o,n.maxHashFunctions):o,c=Math.ceil(a/8),l=new e(new Uint8Array(c),a,s);for(let e of t)l.add(e);return l}add(e){let[t,n]=this.hashes(e);for(let e=0;e<this.hashCount;e++){let r=(t+e*n)%this.sizeBits;this.setBit(r)}}has(e){let[t,n]=this.hashes(e);for(let e=0;e<this.hashCount;e++){let r=(t+e*n)%this.sizeBits;if(!this.getBit(r))return!1}return!0}setBit(e){let t=e>>>3,n=e&7;this.bits[t]|=1<<n}getBit(e){let t=e>>>3,n=e&7;return(this.bits[t]&1<<n)!=0}hashes(e){let t=e.toLowerCase(),n=2166136261,r=2166136261;for(let e=0;e<t.length;e++){let i=t.charCodeAt(e);n^=i,n=Math.imul(n,16777619)>>>0,r^=i,r=Math.imul(r,2166136261)>>>0}return r^=r>>>13,r=Math.imul(r,2246822507)>>>0,r^=r>>>16,[n>>>0,r>>>0||668265261]}};const I=new Set(`ísland.england.írland.skotland.finnland.grænland.holland.þýskaland.frakkland.pólland.tékkland.svissland.rússland.eistland.lettland.litháen.danmörk.noregur.svíþjóð.bandaríkin.spánn.portúgal.ítalía.grikkland.þingvellir.akureyri.ísafjörður.reykjavík.keflavík.hafnarfjörður.kópavogur.seltjarnarnes.garðabær.mosfellsbær.vestmannaeyjar.húsavík.sauðárkrókur.siglufjörður.ólafsfjörður.dalvík.egilsstaðir.neskaupstaður.seyðisfjörður.eskifjörður.reyðarfjörður.fáskrúðsfjörður.stöðvarfjörður.djúpivogur.höfn.vík.selfoss.hveragerði.þorlákshöfn.grindavík.sandgerði.borgarnes.stykkishólmur.grundarfjörður.ólafsvík.búðardalur.patreksfjörður.flateyri.suðureyri.bolungarvík.hólmavík.hvammstangi.blönduós.skagaströnd.varmahlíð.hlíðarendi.bergþórshvol.íslandsbanki.landsbankinn.arionbanki.alþingi`.split(`.`)),L=new Set(`maður.kona.stjóri.ráðherra.forseti.formaður.fulltrúi.starfsmaður.hús.staður.vegur.borg.bær.dalur.fjörður.félag.banki.sjóður.stofnun.ráð.rannsókn.greiðsla.mál.kerfi.verk.þjónusta.rekstur.viðskipti.verð.kostnaður`.split(`.`)),R=new Set([`vera`,`hafa`,`gera`,`fara`,`koma`,`segja`,`vilja`,`mega`,`þurfa`,`verða`,`geta`,`sjá`,`taka`,`eiga`,`láta`,`halda`,`leyfa`,`búa`]),z=[`s`,`u`,`a`];var B=class{lemmatizer;minPartLength;tryLinkingLetters;knownLemmas;mode;constructor(e,t,n={}){this.lemmatizer=e,this.knownLemmas=t,this.minPartLength=n.minPartLength??3,this.tryLinkingLetters=n.tryLinkingLetters??!0,this.mode=n.mode??`balanced`}noSplit(e,t){return{word:e,parts:t,indexTerms:t,confidence:0,isCompound:!1}}split(e){let t=e.toLowerCase(),n=this.lemmatizer.lemmatize(e),r=n[0]?.toLowerCase();if(r&&I.has(r)||I.has(t))return this.noSplit(e,n);let i=n.length>0&&n[0].toLowerCase()!==t,a=n.length===1;if(this.mode===`conservative`)return e.includes(`-`)?this.splitAtHyphen(e,n):this.noSplit(e,n);if(this.mode===`balanced`&&i&&a&&t.length<12||t.length<this.minPartLength*2)return this.noSplit(e,n);let o=[];for(let e=this.minPartLength;e<=t.length-this.minPartLength;e++){let n=t.slice(0,e),r=t.slice(e),i=this.trySplit(n,r);if(i&&o.push(i),this.tryLinkingLetters){for(let e of z)if(n.endsWith(e)&&n.length>this.minPartLength){let e=n.slice(0,-1),t=this.trySplit(e,r);t&&o.push({...t,score:t.score*.95})}}}if(o.length===0)return this.noSplit(e,n);o.sort((e,t)=>t.score-e.score);let s=o[0];if(this.mode===`balanced`&&i&&s.score<.6)return this.noSplit(e,n);let c=[...new Set([...s.leftParts,...s.rightParts])];return{word:e,parts:c,indexTerms:[...new Set([...c,t])],confidence:Math.min(s.score,1),isCompound:!0}}splitAtHyphen(e,t){let n=e.split(`-`).filter(e=>e.length>0);if(n.length<2)return this.noSplit(e,t);let r=[];for(let e of n){let t=this.lemmatizer.lemmatize(e);r.push(...t)}let i=[...new Set(r)];return{word:e,parts:i,indexTerms:[...new Set([...i,e.toLowerCase()])],confidence:.9,isCompound:!0}}trySplit(e,t){let n=this.lemmatizer.lemmatize(e),r=this.lemmatizer.lemmatize(t),i=[...new Set(n.filter(e=>this.knownLemmas.has(e)))],a=[...new Set(r.filter(e=>this.knownLemmas.has(e)))];if(i.length===0||a.length===0)return null;let o=0,s=1-Math.abs(e.length-t.length)/(e.length+t.length);o+=s*.2;let c=(e.length+t.length)/2,l=Math.min(c/6,1);o+=l*.2,a.some(e=>L.has(e))&&(o+=.3);let u=i.some(e=>R.has(e)),d=a.some(e=>R.has(e));return u&&d?o-=.3:!u&&!d&&(o+=.2),(e.length<4||t.length<4)&&(o-=.15),{leftParts:i,rightParts:a,score:Math.max(0,o)}}getAllLemmas(e){return this.split(e).indexTerms}};function V(e){return new Set(e.map(e=>e.toLowerCase()))}function H(e,t={}){let n=e.map(e=>e.toLowerCase());return F.fromValues(n,t)}const U=new Map([[`til dæmis`,{lemma:`til dæmi`,isStopword:!0,pos:`ao`}],[`með öðrum orðum`,{lemma:`með annar orð`,isStopword:!0,pos:`ao`}],[`í raun`,{lemma:`í raun`,isStopword:!0,pos:`ao`}],[`í raun og veru`,{lemma:`í raun og vera`,isStopword:!0,pos:`ao`}],[`af og til`,{lemma:`af og til`,isStopword:!0,pos:`ao`}],[`aftur á móti`,{lemma:`aftur á mót`,isStopword:!0,pos:`ao`}],[`alla vega`,{lemma:`allur vegur`,isStopword:!0,pos:`ao`}],[`alls ekki`,{lemma:`alls ekki`,isStopword:!0,pos:`ao`}],[`alls staðar`,{lemma:`allur staður`,isStopword:!0,pos:`ao`}],[`allt í allt`,{lemma:`allur í allur`,isStopword:!0,pos:`ao`}],[`annars vegar`,{lemma:`annar vegur`,isStopword:!0,pos:`ao`}],[`auk þess`,{lemma:`auk það`,isStopword:!0,pos:`ao`}],[`að auki`,{lemma:`að auki`,isStopword:!0,pos:`ao`}],[`að vísu`,{lemma:`að vís`,isStopword:!0,pos:`ao`}],[`að sjálfsögðu`,{lemma:`að sjálfsagður`,isStopword:!0,pos:`ao`}],[`að minnsta kosti`,{lemma:`að lítill kostur`,isStopword:!0,pos:`ao`}],[`að öllu leyti`,{lemma:`að allur leyti`,isStopword:!0,pos:`ao`}],[`að nokkru leyti`,{lemma:`að nokkur leyti`,isStopword:!0,pos:`ao`}],[`ef til vill`,{lemma:`ef til vilja`,isStopword:!0,pos:`ao`}],[`einhvers staðar`,{lemma:`einhver staður`,isStopword:!0,pos:`ao`}],[`einhvern veginn`,{lemma:`einhver vegur`,isStopword:!0,pos:`ao`}],[`ekki síst`,{lemma:`ekki síður`,isStopword:!0,pos:`ao`}],[`engu að síður`,{lemma:`enginn að síður`,isStopword:!0,pos:`ao`}],[`fyrst og fremst`,{lemma:`snemma og fremri`,isStopword:!0,pos:`ao`}],[`hins vegar`,{lemma:`hinn vegur`,isStopword:!0,pos:`ao`}],[`hér og þar`,{lemma:`hér og þar`,isStopword:!0,pos:`ao`}],[`hér um bil`,{lemma:`hér um bil`,isStopword:!0,pos:`ao`}],[`hér á landi`,{lemma:`hér á land`,isStopword:!0,pos:`ao`}],[`hvað mest`,{lemma:`hvað mjög`,isStopword:!0,pos:`ao`}],[`hverju sinni`,{lemma:`hver sinn`,isStopword:!0,pos:`ao`}],[`hvorki né`,{lemma:`hvorki né`,isStopword:!0,pos:`ao`}],[`í burtu`,{lemma:`í burtu`,isStopword:!0,pos:`ao`}],[`í gær`,{lemma:`í gær`,isStopword:!0,pos:`ao`}],[`í senn`,{lemma:`í senn`,isStopword:!0,pos:`ao`}],[`í sífellu`,{lemma:`í sífella`,isStopword:!0,pos:`ao`}],[`lengi vel`,{lemma:`lengi vel`,isStopword:!0,pos:`ao`}],[`meira að segja`,{lemma:`mikill að segja`,isStopword:!0,pos:`ao`}],[`meira og minna`,{lemma:`mikill og lítill`,isStopword:!0,pos:`ao`}],[`meðal annars`,{lemma:`meðal annar`,isStopword:!0,pos:`ao`}],[`nokkurn veginn`,{lemma:`nokkur vegur`,isStopword:!0,pos:`ao`}],[`og svo framvegis`,{lemma:`og svo framvegis`,isStopword:!0,pos:`ao`}],[`satt að segja`,{lemma:`sannur að segja`,isStopword:!0,pos:`ao`}],[`sem betur fer`,{lemma:`sem vel fara`,isStopword:!0,pos:`ao`}],[`smám saman`,{lemma:`smátt saman`,isStopword:!0,pos:`ao`}],[`svo sem`,{lemma:`svo sem`,isStopword:!0,pos:`ao`}],[`sér í lagi`,{lemma:`sér í lag`,isStopword:!0,pos:`ao`}],[`til og frá`,{lemma:`til og frá`,isStopword:!0,pos:`ao`}],[`til baka`,{lemma:`til baka`,isStopword:!0,pos:`ao`}],[`vítt og breitt`,{lemma:`vítt og breitt`,isStopword:!0,pos:`ao`}],[`á ný`,{lemma:`á ný`,isStopword:!0,pos:`ao`}],[`á meðan`,{lemma:`á meðan`,isStopword:!0,pos:`ao`}],[`á sama tíma`,{lemma:`á samur tími`,isStopword:!0,pos:`ao`}],[`á hinn bóginn`,{lemma:`á hinn bógur`,isStopword:!0,pos:`ao`}],[`þar af leiðandi`,{lemma:`þar af leiða`,isStopword:!0,pos:`ao`}],[`þar að auki`,{lemma:`þar að auki`,isStopword:!0,pos:`ao`}],[`það er að segja`,{lemma:`það vera að segja`,isStopword:!0,pos:`ao`}],[`þess vegna`,{lemma:`það vegna`,isStopword:!0,pos:`ao`}],[`því miður`,{lemma:`það lítt`,isStopword:!0,pos:`ao`}],[`þrátt fyrir`,{lemma:`þrátt fyrir`,isStopword:!0,pos:`ao`}],[`á dögunum`,{lemma:`á dagur`,isStopword:!0,pos:`ao`}],[`á sínum tíma`,{lemma:`á sinn tími`,isStopword:!0,pos:`ao`}],[`á endanum`,{lemma:`á endi`,isStopword:!0,pos:`ao`}],[`einu sinni`,{lemma:`einn sinn`,isStopword:!1,pos:`ao`}],[`eitt sinn`,{lemma:`einn sinn`,isStopword:!1,pos:`ao`}],[`í fyrsta sinn`,{lemma:`í fyrstur sinn`,isStopword:!1,pos:`ao`}],[`í kvöld`,{lemma:`í kvöld`,isStopword:!1,pos:`ao`}],[`í morgun`,{lemma:`í morgunn`,isStopword:!1,pos:`ao`}],[`á morgun`,{lemma:`á morgunn`,isStopword:!1,pos:`ao`}],[`fyrir hönd`,{lemma:`fyrir hönd`,isStopword:!1,pos:`fs`}],[`með tilliti til`,{lemma:`með tillit til`,isStopword:!1,pos:`fs`}],[`í ljósi`,{lemma:`í ljós`,isStopword:!1,pos:`fs`}],[`í stað`,{lemma:`í staður`,isStopword:!1,pos:`fs`}],[`fyrir aftan`,{lemma:`fyrir aftan`,isStopword:!1,pos:`fs`}],[`fyrir austan`,{lemma:`fyrir austan`,isStopword:!1,pos:`fs`}],[`fyrir framan`,{lemma:`fyrir framan`,isStopword:!1,pos:`fs`}],[`fyrir handan`,{lemma:`fyrir handan`,isStopword:!1,pos:`fs`}],[`fyrir innan`,{lemma:`fyrir innan`,isStopword:!1,pos:`fs`}],[`fyrir neðan`,{lemma:`fyrir neðan`,isStopword:!1,pos:`fs`}],[`fyrir norðan`,{lemma:`fyrir norðan`,isStopword:!1,pos:`fs`}],[`fyrir ofan`,{lemma:`fyrir ofan`,isStopword:!1,pos:`fs`}],[`fyrir sunnan`,{lemma:`fyrir sunnan`,isStopword:!1,pos:`fs`}],[`fyrir utan`,{lemma:`fyrir utan`,isStopword:!1,pos:`fs`}],[`fyrir vestan`,{lemma:`fyrir vestan`,isStopword:!1,pos:`fs`}],[`í gegnum`,{lemma:`í gegnum`,isStopword:!1,pos:`fs`}],[`í kringum`,{lemma:`í kringum`,isStopword:!1,pos:`fs`}],[`innan við`,{lemma:`innan við`,isStopword:!1,pos:`fs`}],[`upp úr`,{lemma:`upp úr`,isStopword:!1,pos:`fs`}],[`þvert á`,{lemma:`þvert á`,isStopword:!1,pos:`fs`}],[`þar eð`,{lemma:`þar eð`,isStopword:!0,pos:`st`}],[`sameinuðu þjóðirnar`,{lemma:`Sameinuðu þjóðirnar`,isStopword:!1,pos:`entity`}],[`evrópusambandið`,{lemma:`Evrópusambandið`,isStopword:!1,pos:`entity`}],[`nato`,{lemma:`NATO`,isStopword:!1,pos:`entity`}],[`nató`,{lemma:`NATO`,isStopword:!1,pos:`entity`}]]);function W(e,t){for(let n=Math.min(4,e.length-t);n>=2;n--){let r=e.slice(t,t+n).join(` `).toLowerCase(),i=U.get(r);if(i)return{phrase:i,wordCount:n}}return null}function G(e){return U.has(e.toLowerCase())}function K(e){return U.get(e.toLowerCase())}const q=new Set([`word`]),J=new Set([`person`,`company`,`entity`]),Y=new Set([`punctuation`,`s_begin`,`s_end`,`s_split`,`unknown`]),X=[`arinnar`,`anna`,`unum`,`um`,`ir`,`ar`,`ur`,`a`,`i`,`ið`,`inn`,`in`];function Z(t,n,r={}){let{bigrams:i,compoundSplitter:a,includeNumbers:o=!1,alwaysTryCompounds:s=!0}=r,c=e(t),l=[],u=[],d=new Map,f=`bigramCountValue`in n?n.bigramCountValue===0:!1,p=(e,t)=>t.length===1&&t[0]===e.toLowerCase(),m=e=>{let t=e,r=null;for(let e=0;e<2;e++){let e=t.toLowerCase();r=null;for(let i of X){if(!e.endsWith(i))continue;let a=t.slice(0,t.length-i.length);if(a.length<3)continue;let o=n.lemmatize(a);if(!p(a,o))return o;r||=a}if(!r||r.length<6)break;t=r}return null},h=e=>{let t=e.toLowerCase(),r=d.get(t);if(r)return r;let i=n.lemmatize(e);if(f&&p(e,i)&&e.length>=6){let n=m(e);if(n)return d.set(t,n),n}return d.set(t,i),i};for(let e=0;e<c.length;e++){let t=c[e];if(!Y.has(t.kind)){if(J.has(t.kind)){l.push({original:t.text??``,kind:t.kind,lemmas:[],isEntity:!0});continue}if(t.kind===`number`||t.kind===`ordinal`){o&&l.push({original:t.text??``,kind:t.kind,lemmas:[],isEntity:!1});continue}if(q.has(t.kind)){let e=t.text??``,n=h(e),r={original:e,kind:t.kind,lemmas:n,isEntity:!1},i=n.length===1&&n[0]===e.toLowerCase();if(a&&(s||i)){let t=a.split(e);if(t.isCompound){r.compoundSplit=t;let e=t.parts.flatMap(e=>h(e));r.compoundLemmas=e,r.lemmas=[...new Set([...n,...e])]}}l.push(r),u.push({index:l.length-1,token:t});continue}l.push({original:t.text??``,kind:t.kind,lemmas:[],isEntity:!1})}}if(i&&u.length>0){let e=new O(n,i);for(let t=0;t<u.length;t++){let{index:n,token:r}=u[t],i=t>0?u[t-1].token:null,a=t<u.length-1?u[t+1].token:null,o=e.disambiguate(r.text??``,i?.text??null,a?.text??null,{prevLemmas:i?.text?h(i.text):void 0,nextLemmas:a?.text?h(a.text):void 0});l[n].disambiguated=o.lemma,l[n].confidence=o.confidence}}else for(let{index:e}of u){let t=l[e];t.lemmas.length>0&&(t.disambiguated=t.lemmas[0],t.confidence=t.lemmas.length===1?1:.5)}return l}function Q(e,n,r={}){let{removeStopwords:a=!1,indexAllCandidates:o=!0,useContextualStopwords:s=!1}=r,c=Z(e,n,r),l=new Set,u=(e,n)=>a?s?i(e,n):t.has(e):!1;for(let e of c)if(!e.isEntity){if(o)for(let t of e.lemmas)u(t)||l.add(t);else e.disambiguated&&(u(e.disambiguated)||l.add(e.disambiguated));if(e.compoundSplit?.isCompound){let t=e.compoundLemmas?e.compoundLemmas:e.compoundSplit.parts.flatMap(e=>n.lemmatize(e));for(let e of t)u(e)||l.add(e)}}return l}function $(e,n,r={}){let{removeStopwords:a=!1,indexAllCandidates:o=!0,useContextualStopwords:s=!1,andOperator:c=` & `,orOperator:l=` | `,wrapGroups:u=!0,includeOriginal:d=!1,lowercaseOriginal:f=!0}=r,p=Z(e,n,r),m=[],h=(e,n)=>a?s?i(e,n):t.has(e):!1;for(let e of p){if(e.isEntity)continue;let t=[];if(o?t=e.lemmas:e.disambiguated&&(t=[e.disambiguated]),d){let n=e.original??``;if(n.length>0){let e=f?n.toLowerCase():n;t=[...t,e]}}let n=[...new Set(t.filter(e=>e&&!h(e)))];n.length>0&&m.push(n)}return{groups:m,query:m.map(e=>{let t=e.join(l);return u&&e.length>1?`(${t})`:t}).filter(e=>e.length>0).join(c)}}function ee(e,t,n,r={}){let i=performance.now(),a,o;switch(n){case`naive`:{let n=e.split(/\s+/).filter(e=>e.length>0),r=[];for(let e of n){let n=e.replace(/^[^\p{L}\p{N}]+|[^\p{L}\p{N}]+$/gu,``);if(n){let e=t.lemmatize(n);r.push({original:n,kind:`word`,lemmas:e,isEntity:!1,disambiguated:e[0],confidence:e.length===1?1:.5})}}a=r,o=new Set(r.map(e=>e.disambiguated).filter(Boolean));break}case`tokenized`:a=Z(e,t),o=new Set(a.filter(e=>e.kind===`word`&&e.lemmas.length>0).map(e=>e.lemmas[0]));break;case`disambiguated`:a=Z(e,t,{bigrams:r.bigrams}),o=Q(e,t,{bigrams:r.bigrams});break;case`full`:a=Z(e,t,{bigrams:r.bigrams,compoundSplitter:r.compoundSplitter}),o=Q(e,t,{bigrams:r.bigrams,compoundSplitter:r.compoundSplitter});break}let s=performance.now()-i,c=a.filter(e=>e.kind===`word`),l=c.length,u=c.filter(e=>e.lemmas.length>0&&!(e.lemmas.length===1&&e.lemmas[0]===e.original.toLowerCase())).length,d=c.filter(e=>e.lemmas.length>1).length,f=c.filter(e=>e.confidence!==void 0).map(e=>e.confidence),p=f.length>0?f.reduce((e,t)=>e+t,0)/f.length:0,m=c.filter(e=>e.compoundSplit?.isCompound).length,h=a.filter(e=>e.isEntity).length;return{wordCount:l,lemmatizedCount:u,coverage:l>0?u/l:0,ambiguousCount:d,ambiguityRate:l>0?d/l:0,avgConfidence:p,compoundsFound:m,entitiesSkipped:h,uniqueLemmas:o.size,timeMs:s}}export{d as BinaryLemmatizer,M as CASE_NAMES,r as CONTEXTUAL_STOPWORDS,B as CompoundSplitter,f as DISAMBIGUATION_RULES,O as Disambiguator,N as GENDER_NAMES,g as NOMINATIVE_PRONOUNS,P as NUMBER_NAMES,h as PREPOSITION_CASES,I as PROTECTED_LEMMAS,U as STATIC_PHRASES,t as STOPWORDS_IS,A as WORD_CLASS_NAMES,j as WORD_CLASS_NAMES_IS,x as applyGrammarRules,b as applyNounAfterPrepositionRule,v as applyPrepositionRule,y as applyPronounVerbRule,$ as buildSearchQuery,_ as canGovernCase,H as createKnownLemmaFilter,V as createKnownLemmaSet,k as extractDisambiguatedLemmas,Q as extractIndexableLemmas,C as getGovernedCases,K as getPhraseInfo,p as getRulesForWord,m as hasDisambiguationRules,i as isContextualStopword,G as isKnownPhrase,S as isKnownPreposition,n as isStopword,W as matchPhrase,Z as processText,a as removeStopwords,ee as runBenchmark};
|
|
2
2
|
//# sourceMappingURL=index.mjs.map
|
package/dist/index.mjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.mjs","names":[],"sources":["../src/stopwords.ts","../src/binary-lemmatizer.ts","../src/disambiguation-rules.ts","../src/mini-grammar.ts","../src/disambiguate.ts","../src/types.ts","../src/compounds.ts","../src/phrases.ts","../src/pipeline.ts"],"sourcesContent":["/**\n * Icelandic stopwords for search indexing.\n *\n * Source: https://github.com/atlijas/icelandic-stop-words\n * Data from DIM (Database of Icelandic Morphology) by Árni Magnússon Institute.\n *\n * Includes all inflected forms of pronouns, prepositions, conjunctions, etc.\n */\n\n// prettier-ignore\nexport const STOPWORDS_IS = new Set([\n \"á\",\"að\",\"aðra\",\"aðrar\",\"aðrir\",\"af\",\"alla\",\"allan\",\"allar\",\"allir\",\n \"allnokkra\",\"allnokkrar\",\"allnokkrir\",\"allnokkru\",\"allnokkrum\",\"allnokkuð\",\n \"allnokkur\",\"allnokkurn\",\"allnokkurra\",\"allnokkurrar\",\"allnokkurri\",\"allnokkurs\",\n \"allnokkurt\",\"allra\",\"allrar\",\"allri\",\"alls\",\"allt\",\"alltað\",\"allur\",\"án\",\n \"andspænis\",\"annað\",\"annaðhvort\",\"annan\",\"annar\",\"annarra\",\"annarrar\",\"annarri\",\n \"annars\",\"árla\",\"ásamt\",\"auk\",\"austan\",\"austanundir\",\"austur\",\"báða\",\"báðar\",\n \"báðir\",\"báðum\",\"bæði\",\"bak\",\"beggja\",\"eða\",\"eður\",\"ef\",\"eftir\",\"ég\",\"ein\",\n \"eina\",\"einar\",\"einhver\",\"einhverja\",\"einhverjar\",\"einhverjir\",\"einhverju\",\n \"einhverjum\",\"einhvern\",\"einhverra\",\"einhverrar\",\"einhverri\",\"einhvers\",\"einir\",\n \"einn\",\"einna\",\"einnar\",\"einni\",\"eins\",\"einskis\",\"einu\",\"einum\",\"eitt\",\"eitthvað\",\n \"eitthvert\",\"ekkert\",\"ella\",\"ellegar\",\"en\",\"enda\",\"enga\",\"engan\",\"engar\",\"engin\",\n \"enginn\",\"engir\",\"engra\",\"engrar\",\"engri\",\"engu\",\"engum\",\"er\",\"fáein\",\"fáeina\",\n \"fáeinar\",\"fáeinir\",\"fáeinna\",\"fáeinum\",\"fjær\",\"fjarri\",\"flestalla\",\"flestallan\",\n \"flestallar\",\"flestallir\",\"flestallra\",\"flestallrar\",\"flestallri\",\"flestalls\",\n \"flestallt\",\"flestallur\",\"flestöll\",\"flestöllu\",\"flestöllum\",\"frá\",\"fram\",\"fyrir\",\n \"fyrst\",\"gagnstætt\",\"gagnvart\",\"gegn\",\"gegnt\",\"gegnum\",\"hana\",\"handa\",\"handan\",\n \"hann\",\"hans\",\"heldur\",\"hennar\",\"henni\",\"hið\",\"hin\",\"hina\",\"hinar\",\"hinir\",\"hinn\",\n \"hinna\",\"hinnar\",\"hinni\",\"hins\",\"hinu\",\"hinum\",\"hitt\",\"hjá\",\"honum\",\"hún\",\"hvað\",\n \"hvaða\",\"hvenær\",\"hver\",\"hverja\",\"hverjar\",\"hverjir\",\"hverju\",\"hverjum\",\"hvern\",\n \"hverra\",\"hverrar\",\"hverri\",\"hvers\",\"hvert\",\"hvílík\",\"hvílíka\",\"hvílíkan\",\n \"hvílíkar\",\"hvílíkir\",\"hvílíkra\",\"hvílíkrar\",\"hvílíkri\",\"hvílíks\",\"hvílíkt\",\n \"hvílíku\",\"hvílíkum\",\"hvílíkur\",\"hvor\",\"hvora\",\"hvorar\",\"hvorir\",\"hvorki\",\"hvorn\",\n \"hvorra\",\"hvorrar\",\"hvorri\",\"hvors\",\"hvort\",\"hvoru\",\"hvorug\",\"hvoruga\",\"hvorugan\",\n \"hvorugar\",\"hvorugir\",\"hvorugra\",\"hvorugrar\",\"hvorugri\",\"hvorugs\",\"hvorugt\",\n \"hvorugu\",\"hvorugum\",\"hvorugur\",\"hvorum\",\"í\",\"inn\",\"innan\",\"innanundir\",\"jafnframt\",\n \"jafnhliða\",\"kring\",\"kringum\",\"með\",\"meðal\",\"meðan\",\"meður\",\"mér\",\"mestalla\",\n \"mestallan\",\"mestallar\",\"mestallir\",\"mestallra\",\"mestallrar\",\"mestallri\",\"mestalls\",\n \"mestallt\",\"mestallur\",\"mestöll\",\"mestöllu\",\"mestöllum\",\"miðli\",\"mig\",\"milli\",\n \"millum\",\"mín\",\"mína\",\"mínar\",\"mínir\",\"minn\",\"minna\",\"minnar\",\"minni\",\"míns\",\n \"mínu\",\"mínum\",\"mitt\",\"mót\",\"móti\",\"nær\",\"nærri\",\"næst\",\"næstum\",\"nálægt\",\"né\",\n \"neðan\",\"nein\",\"neina\",\"neinar\",\"neinir\",\"neinn\",\"neinna\",\"neinnar\",\"neinni\",\n \"neins\",\"neinu\",\"neinum\",\"neitt\",\"nema\",\"niður\",\"nokkra\",\"nokkrar\",\"nokkrir\",\n \"nokkru\",\"nokkrum\",\"nokkuð\",\"nokkur\",\"nokkurn\",\"nokkurra\",\"nokkurrar\",\"nokkurri\",\n \"nokkurs\",\"nokkurt\",\"norðan\",\"nú\",\"öðru\",\"öðrum\",\"of\",\"ofan\",\"ofar\",\"og\",\"óháð\",\n \"okkar\",\"okkur\",\"öll\",\"öllu\",\"öllum\",\"önnur\",\"órafjarri\",\"oss\",\"sá\",\"sakir\",\n \"sama\",\"saman\",\"samar\",\"samfara\",\"samhliða\",\"sami\",\"samir\",\"samkvæmt\",\"samra\",\n \"samrar\",\"samri\",\"sams\",\"samskipa\",\"samt\",\"samtímis\",\"samur\",\"sem\",\"sér\",\"sérhvað\",\n \"sérhver\",\"sérhverja\",\"sérhverjar\",\"sérhverjir\",\"sérhverju\",\"sérhverjum\",\"sérhvern\",\n \"sérhverra\",\"sérhverrar\",\"sérhverri\",\"sérhvers\",\"sérhvert\",\"síðan\",\"síðla\",\"sig\",\n \"sín\",\"sína\",\"sínar\",\"sínhver\",\"sínhverja\",\"sínhverjar\",\"sínhverjir\",\"sínhverju\",\n \"sínhverjum\",\"sínhvern\",\"sínhverra\",\"sínhverrar\",\"sínhverri\",\"sínhvers\",\"sínhvert\",\n \"sínhvor\",\"sínhvora\",\"sínhvorar\",\"sínhvorir\",\"sínhvorn\",\"sínhvorra\",\"sínhvorrar\",\n \"sínhvorri\",\"sínhvors\",\"sínhvort\",\"sínhvoru\",\"sínhvorum\",\"sínir\",\"sinn\",\"sinna\",\n \"sinnar\",\"sinnhver\",\"sinnhverja\",\"sinnhverjar\",\"sinnhverjir\",\"sinnhverju\",\n \"sinnhverjum\",\"sinnhvern\",\"sinnhverra\",\"sinnhverrar\",\"sinnhverri\",\"sinnhvers\",\n \"sinnhvert\",\"sinnhvor\",\"sinnhvora\",\"sinnhvorar\",\"sinnhvorir\",\"sinnhvorn\",\n \"sinnhvorra\",\"sinnhvorrar\",\"sinnhvorri\",\"sinnhvors\",\"sinnhvort\",\"sinnhvoru\",\n \"sinnhvorum\",\"sinni\",\"síns\",\"sínu\",\"sínum\",\"sitt\",\"sitthvað\",\"sitthver\",\n \"sitthverja\",\"sitthverjar\",\"sitthverjir\",\"sitthverju\",\"sitthverjum\",\"sitthvern\",\n \"sitthverra\",\"sitthverrar\",\"sitthverri\",\"sitthvers\",\"sitthvert\",\"sitthvor\",\n \"sitthvora\",\"sitthvorar\",\"sitthvorir\",\"sitthvorn\",\"sitthvorra\",\"sitthvorrar\",\n \"sitthvorri\",\"sitthvors\",\"sitthvort\",\"sitthvoru\",\"sitthvorum\",\"sjálf\",\"sjálfa\",\n \"sjálfan\",\"sjálfar\",\"sjálfir\",\"sjálfra\",\"sjálfrar\",\"sjálfri\",\"sjálfs\",\"sjálft\",\n \"sjálfu\",\"sjálfum\",\"sjálfur\",\"slík\",\"slíka\",\"slíkan\",\"slíkar\",\"slíkir\",\"slíkra\",\n \"slíkrar\",\"slíkri\",\"slíks\",\"slíkt\",\"slíku\",\"slíkum\",\"slíkur\",\"snemma\",\"sökum\",\n \"söm\",\"sömu\",\"sömum\",\"sú\",\"sum\",\"suma\",\"suman\",\"sumar\",\"sumir\",\"sumra\",\"sumrar\",\n \"sumri\",\"sums\",\"sumt\",\"sumu\",\"sumum\",\"sumur\",\"sunnan\",\"svo\",\"til\",\"tráss\",\"um\",\n \"umfram\",\"umhverfis\",\"undan\",\"undir\",\"uns\",\"upp\",\"úr\",\"út\",\"utan\",\"útundan\",\n \"vegna\",\"vér\",\"vestan\",\"vestur\",\"vettugi\",\"við\",\"viður\",\"vor\",\"vora\",\"vorar\",\n \"vorir\",\"vorn\",\"vorra\",\"vorrar\",\"vorri\",\"vors\",\"vort\",\"voru\",\"vorum\",\"yðar\",\n \"yður\",\"yfir\",\"ykkar\",\"ykkur\",\"ýmis\",\"ýmiss\",\"ýmissa\",\"ýmissar\",\"ýmissi\",\"ýmist\",\n \"ýmsa\",\"ýmsan\",\"ýmsar\",\"ýmsir\",\"ýmsu\",\"ýmsum\",\"þá\",\"það\",\"þær\",\"þann\",\"þar\",\n \"þau\",\"þegar\",\"þeim\",\"þeir\",\"þeirra\",\"þeirrar\",\"þeirri\",\"þennan\",\"þér\",\"þess\",\n \"þessa\",\"þessar\",\"þessara\",\"þessarar\",\"þessari\",\"þessi\",\"þessir\",\"þessu\",\n \"þessum\",\"þetta\",\"þið\",\"þig\",\"þín\",\"þína\",\"þínar\",\"þínir\",\"þinn\",\"þinna\",\n \"þinnar\",\"þinni\",\"þíns\",\"þínu\",\"þínum\",\"þitt\",\"þó\",\"þónokkra\",\"þónokkrar\",\n \"þónokkrir\",\"þónokkru\",\"þónokkrum\",\"þónokkuð\",\"þónokkur\",\"þónokkurn\",\"þónokkurra\",\n \"þónokkurrar\",\"þónokkurri\",\"þónokkurs\",\"þónokkurt\",\"þótt\",\"þú\",\"því\",\"þvílík\",\n \"þvílíka\",\"þvílíkan\",\"þvílíkar\",\"þvílíkir\",\"þvílíkra\",\"þvílíkrar\",\"þvílíkri\",\n \"þvílíks\",\"þvílíkt\",\"þvílíku\",\"þvílíkum\",\"þvílíkur\",\n]);\n\n/**\n * Check if a word is a stopword.\n */\nexport function isStopword(word: string): boolean {\n return STOPWORDS_IS.has(word.toLowerCase());\n}\n\n/**\n * Contextual stopword rules for ambiguous words.\n *\n * Some words are stopwords in certain grammatical contexts but not others:\n * - \"á\" as preposition (fs) or adverb (ao) = stopword\n * - \"á\" as verb \"eiga\" (so) = NOT a stopword (\"Ég á bíl\")\n * - \"á\" as noun \"river\" (no) = NOT a stopword (\"við ána\")\n *\n * Map: lemma -> Set of POS codes where it IS a stopword\n */\nexport const CONTEXTUAL_STOPWORDS: Map<string, Set<string>> = new Map([\n // \"á\" - prep/adverb = stop, verb/noun = keep\n [\"á\", new Set([\"fs\", \"ao\"])],\n // \"við\" - prep = stop, pronoun \"we\" = stop, noun \"viður\" = keep\n [\"við\", new Set([\"fs\", \"fn\"])],\n // \"af\" - prep/adverb = stop\n [\"af\", new Set([\"fs\", \"ao\"])],\n // \"til\" - prep = stop\n [\"til\", new Set([\"fs\"])],\n // \"um\" - prep = stop\n [\"um\", new Set([\"fs\"])],\n // \"frá\" - prep = stop\n [\"frá\", new Set([\"fs\"])],\n // \"yfir\" - prep/adverb = stop\n [\"yfir\", new Set([\"fs\", \"ao\"])],\n // \"undir\" - prep/adverb = stop\n [\"undir\", new Set([\"fs\", \"ao\"])],\n // \"fyrir\" - prep/adverb = stop\n [\"fyrir\", new Set([\"fs\", \"ao\"])],\n // \"eftir\" - prep/adverb = stop\n [\"eftir\", new Set([\"fs\", \"ao\"])],\n // \"gegn\" - prep = stop\n [\"gegn\", new Set([\"fs\"])],\n // \"hjá\" - prep = stop\n [\"hjá\", new Set([\"fs\"])],\n // \"úr\" - prep = stop, noun \"úr\" (watch) = keep\n [\"úr\", new Set([\"fs\"])],\n // \"í\" - prep = stop\n [\"í\", new Set([\"fs\"])],\n]);\n\n/**\n * Check if a lemma is a stopword in a specific grammatical context.\n *\n * For ambiguous words, uses POS to determine stopword status.\n * For unambiguous words, falls back to standard stopword check.\n *\n * @param lemma - The lemmatized word\n * @param pos - Part of speech code (fs, ao, so, no, etc.)\n * @returns true if the word should be treated as a stopword\n */\nexport function isContextualStopword(lemma: string, pos?: string): boolean {\n const normalized = lemma.toLowerCase();\n\n // Check if this lemma has context-dependent rules\n const contextRule = CONTEXTUAL_STOPWORDS.get(normalized);\n if (contextRule && pos) {\n // Use the rule: stopword only if POS is in the stopword set\n return contextRule.has(pos);\n }\n\n // Fall back to standard stopword check\n return STOPWORDS_IS.has(normalized);\n}\n\n/**\n * Filter stopwords from an array of words/lemmas.\n */\nexport function removeStopwords<T extends string>(words: T[]): T[] {\n return words.filter((w) => !isStopword(w));\n}\n","/**\n * Binary format lemmatizer for efficient memory usage.\n *\n * Uses ArrayBuffer with TypedArray views and binary search for O(log n) lookups.\n * Target memory: ~70MB vs ~1.2GB for JS Map-based approach.\n *\n * Binary file format:\n * - Header (32 bytes): magic, version, counts\n * - String pool: all strings concatenated UTF-8\n * - Lemma index: offsets + lengths\n * - Word index: offsets + lengths (sorted alphabetically)\n * - Entry offsets: start/end of entries for each word\n * - Entries: packed lemmaIdx:20 + posCode:4\n * - Bigrams: word1/word2 offsets + lengths + frequencies (sorted)\n */\n\nimport type {\n WordClass,\n LemmaWithPOS,\n LemmaWithMorph,\n LemmatizerLike,\n BigramProvider,\n GrammaticalCase,\n GrammaticalGender,\n GrammaticalNumber,\n MorphFeatures,\n} from \"./types.js\";\n\nconst MAGIC = 0x4c454d41; // \"LEMA\"\n\n// POS code to string mapping (must match build-binary.py)\nconst CODE_TO_POS: WordClass[] = [\n \"no\",\n \"so\",\n \"lo\",\n \"ao\",\n \"fs\",\n \"fn\",\n \"st\",\n \"to\",\n \"gr\",\n \"uh\",\n];\n\n// Case code to string mapping (must match build-binary.py)\n// 0=none, 1=nf, 2=þf, 3=þgf, 4=ef\nconst CODE_TO_CASE: (GrammaticalCase | undefined)[] = [\n undefined, // 0 = none\n \"nf\", // 1 = nominative\n \"þf\", // 2 = accusative\n \"þgf\", // 3 = dative\n \"ef\", // 4 = genitive\n];\n\n// Gender code to string mapping (must match build-binary.py)\n// 0=none, 1=kk, 2=kvk, 3=hk\nconst CODE_TO_GENDER: (GrammaticalGender | undefined)[] = [\n undefined, // 0 = none\n \"kk\", // 1 = masculine\n \"kvk\", // 2 = feminine\n \"hk\", // 3 = neuter\n];\n\n// Number code to string mapping (must match build-binary.py)\n// 0=et/none, 1=ft\nconst CODE_TO_NUMBER: (GrammaticalNumber | undefined)[] = [\n \"et\", // 0 = singular (or none)\n \"ft\", // 1 = plural\n];\n\nexport interface BinaryLemmatizerOptions {\n fetch?: typeof fetch;\n}\n\nexport interface BinaryLemmatizeOptions {\n wordClass?: WordClass;\n}\n\nexport class BinaryLemmatizer implements LemmatizerLike, BigramProvider {\n private buffer: ArrayBuffer;\n private stringPool: Uint8Array;\n private lemmaOffsets: Uint32Array;\n private lemmaLengths: Uint8Array;\n private wordOffsets: Uint32Array;\n private wordLengths: Uint8Array;\n private entryOffsets: Uint32Array;\n private entries: Uint32Array;\n private bigramW1Offsets: Uint32Array;\n private bigramW1Lengths: Uint8Array;\n private bigramW2Offsets: Uint32Array;\n private bigramW2Lengths: Uint8Array;\n private bigramFreqs: Uint32Array;\n\n private lemmaCount: number;\n private wordCount: number;\n private entryCount: number;\n private bigramCount: number;\n private version: number;\n\n private decoder = new TextDecoder(\"utf-8\");\n\n private constructor(buffer: ArrayBuffer) {\n this.buffer = buffer;\n const view = new DataView(buffer);\n\n // Read header\n const magic = view.getUint32(0, true);\n if (magic !== MAGIC) {\n throw new Error(\n `Invalid binary format: expected magic 0x${MAGIC.toString(16)}, got 0x${magic.toString(16)}`\n );\n }\n\n this.version = view.getUint32(4, true);\n if (this.version !== 1 && this.version !== 2) {\n throw new Error(`Unsupported version: ${this.version}`);\n }\n\n const stringPoolSize = view.getUint32(8, true);\n this.lemmaCount = view.getUint32(12, true);\n this.wordCount = view.getUint32(16, true);\n this.entryCount = view.getUint32(20, true);\n this.bigramCount = view.getUint32(24, true);\n // reserved at 28\n\n // Calculate section offsets\n let offset = 32;\n\n // String pool\n this.stringPool = new Uint8Array(buffer, offset, stringPoolSize);\n offset += stringPoolSize;\n\n // Lemma offsets (u32 × lemmaCount)\n this.lemmaOffsets = new Uint32Array(buffer, offset, this.lemmaCount);\n offset += this.lemmaCount * 4;\n\n // Lemma lengths (u8 × lemmaCount)\n this.lemmaLengths = new Uint8Array(buffer, offset, this.lemmaCount);\n offset += this.lemmaCount;\n // Align to 4 bytes\n offset = (offset + 3) & ~3;\n\n // Word offsets (u32 × wordCount)\n this.wordOffsets = new Uint32Array(buffer, offset, this.wordCount);\n offset += this.wordCount * 4;\n\n // Word lengths (u8 × wordCount)\n this.wordLengths = new Uint8Array(buffer, offset, this.wordCount);\n offset += this.wordCount;\n // Align to 4 bytes\n offset = (offset + 3) & ~3;\n\n // Entry offsets (u32 × (wordCount + 1))\n this.entryOffsets = new Uint32Array(buffer, offset, this.wordCount + 1);\n offset += (this.wordCount + 1) * 4;\n\n // Entries (u32 × entryCount)\n this.entries = new Uint32Array(buffer, offset, this.entryCount);\n offset += this.entryCount * 4;\n\n // Bigram word1 offsets\n this.bigramW1Offsets = new Uint32Array(buffer, offset, this.bigramCount);\n offset += this.bigramCount * 4;\n\n // Bigram word1 lengths\n this.bigramW1Lengths = new Uint8Array(buffer, offset, this.bigramCount);\n offset += this.bigramCount;\n // Align to 4 bytes\n offset = (offset + 3) & ~3;\n\n // Bigram word2 offsets\n this.bigramW2Offsets = new Uint32Array(buffer, offset, this.bigramCount);\n offset += this.bigramCount * 4;\n\n // Bigram word2 lengths\n this.bigramW2Lengths = new Uint8Array(buffer, offset, this.bigramCount);\n offset += this.bigramCount;\n // Align to 4 bytes\n offset = (offset + 3) & ~3;\n\n // Bigram frequencies\n this.bigramFreqs = new Uint32Array(buffer, offset, this.bigramCount);\n }\n\n /**\n * Load binary lemmatizer from URL.\n */\n static async load(\n url: string,\n options: BinaryLemmatizerOptions = {}\n ): Promise<BinaryLemmatizer> {\n const fetchFn = options.fetch ?? fetch;\n const response = await fetchFn(url);\n\n if (!response.ok) {\n throw new Error(`Failed to load binary data: ${response.status}`);\n }\n\n const buffer = await response.arrayBuffer();\n return new BinaryLemmatizer(buffer);\n }\n\n /**\n * Load from ArrayBuffer (for Node.js or pre-loaded data).\n */\n static loadFromBuffer(buffer: ArrayBuffer): BinaryLemmatizer {\n return new BinaryLemmatizer(buffer);\n }\n\n /**\n * Get string from string pool.\n */\n private getString(offset: number, length: number): string {\n return this.decoder.decode(this.stringPool.subarray(offset, offset + length));\n }\n\n /**\n * Get lemma by index.\n */\n private getLemma(index: number): string {\n return this.getString(this.lemmaOffsets[index], this.lemmaLengths[index]);\n }\n\n /**\n * Get word by index.\n */\n private getWord(index: number): string {\n return this.getString(this.wordOffsets[index], this.wordLengths[index]);\n }\n\n /**\n * Binary search for word in sorted word array.\n * Returns index or -1 if not found.\n */\n private findWord(word: string): number {\n let left = 0;\n let right = this.wordCount - 1;\n\n while (left <= right) {\n const mid = (left + right) >>> 1;\n const midWord = this.getWord(mid);\n\n if (midWord === word) {\n return mid;\n }\n if (midWord < word) {\n left = mid + 1;\n } else {\n right = mid - 1;\n }\n }\n\n return -1;\n }\n\n /**\n * Look up possible lemmas for a word form.\n * Results are sorted by corpus frequency (most common first).\n * Duplicates are removed (same lemma with different morph features).\n */\n lemmatize(word: string, options: BinaryLemmatizeOptions = {}): string[] {\n const normalized = word.toLowerCase();\n const idx = this.findWord(normalized);\n\n if (idx === -1) {\n return [normalized];\n }\n\n const start = this.entryOffsets[idx];\n const end = this.entryOffsets[idx + 1];\n\n const { wordClass } = options;\n const seen = new Set<string>();\n const result: string[] = [];\n\n for (let i = start; i < end; i++) {\n const { lemmaIdx, posCode } = this.unpackEntry(this.entries[i]);\n const pos = CODE_TO_POS[posCode];\n\n if (wordClass && pos !== wordClass) {\n continue;\n }\n\n const lemma = this.getLemma(lemmaIdx);\n if (!seen.has(lemma)) {\n seen.add(lemma);\n result.push(lemma);\n }\n }\n\n if (result.length === 0) {\n return [normalized];\n }\n\n return result;\n }\n\n /**\n * Unpack entry based on binary format version.\n * Version 1: bits 0-3=pos, bits 4-23=lemmaIdx\n * Version 2: bits 0-3=pos, bits 4-6=case, bits 7-8=gender, bit 9=number, bits 10-29=lemmaIdx\n */\n private unpackEntry(entry: number): {\n lemmaIdx: number;\n posCode: number;\n caseCode: number;\n genderCode: number;\n numberCode: number;\n } {\n if (this.version === 1) {\n return {\n lemmaIdx: entry >>> 4,\n posCode: entry & 0xf,\n caseCode: 0,\n genderCode: 0,\n numberCode: 0,\n };\n }\n // Version 2\n return {\n lemmaIdx: entry >>> 10,\n posCode: entry & 0xf,\n caseCode: (entry >>> 4) & 0x7,\n genderCode: (entry >>> 7) & 0x3,\n numberCode: (entry >>> 9) & 0x1,\n };\n }\n\n /**\n * Look up lemmas with their word class (POS) tags.\n * Duplicates are removed (same lemma+pos with different morph features).\n */\n lemmatizeWithPOS(word: string): LemmaWithPOS[] {\n const normalized = word.toLowerCase();\n const idx = this.findWord(normalized);\n\n if (idx === -1) {\n return [];\n }\n\n const start = this.entryOffsets[idx];\n const end = this.entryOffsets[idx + 1];\n const seen = new Set<string>();\n const result: LemmaWithPOS[] = [];\n\n for (let i = start; i < end; i++) {\n const { lemmaIdx, posCode } = this.unpackEntry(this.entries[i]);\n const lemma = this.getLemma(lemmaIdx);\n const pos = CODE_TO_POS[posCode] ?? (\"\" as WordClass);\n const key = `${lemma}:${pos}`;\n\n if (!seen.has(key)) {\n seen.add(key);\n result.push({ lemma, pos });\n }\n }\n\n return result;\n }\n\n /**\n * Look up lemmas with word class and morphological features.\n * Only available with version 2 binary format.\n */\n lemmatizeWithMorph(word: string): LemmaWithMorph[] {\n const normalized = word.toLowerCase();\n const idx = this.findWord(normalized);\n\n if (idx === -1) {\n return [];\n }\n\n const start = this.entryOffsets[idx];\n const end = this.entryOffsets[idx + 1];\n const result: LemmaWithMorph[] = [];\n\n for (let i = start; i < end; i++) {\n const { lemmaIdx, posCode, caseCode, genderCode, numberCode } =\n this.unpackEntry(this.entries[i]);\n\n const morph: MorphFeatures = {};\n const caseVal = CODE_TO_CASE[caseCode];\n const genderVal = CODE_TO_GENDER[genderCode];\n const numberVal = CODE_TO_NUMBER[numberCode];\n\n if (caseVal) morph.case = caseVal;\n if (genderVal) morph.gender = genderVal;\n if (numberVal) morph.number = numberVal;\n\n result.push({\n lemma: this.getLemma(lemmaIdx),\n pos: CODE_TO_POS[posCode] ?? (\"\" as WordClass),\n morph: Object.keys(morph).length > 0 ? morph : undefined,\n });\n }\n\n return result;\n }\n\n /**\n * Check if morphological features are available (version 2+).\n */\n hasMorphFeatures(): boolean {\n return this.version >= 2;\n }\n\n /**\n * Get the binary format version.\n */\n getVersion(): number {\n return this.version;\n }\n\n /**\n * Binary search for bigram. Returns index or -1.\n */\n private findBigram(word1: string, word2: string): number {\n let left = 0;\n let right = this.bigramCount - 1;\n\n while (left <= right) {\n const mid = (left + right) >>> 1;\n const midW1 = this.getString(\n this.bigramW1Offsets[mid],\n this.bigramW1Lengths[mid]\n );\n\n if (midW1 < word1) {\n left = mid + 1;\n } else if (midW1 > word1) {\n right = mid - 1;\n } else {\n // word1 matches, compare word2\n const midW2 = this.getString(\n this.bigramW2Offsets[mid],\n this.bigramW2Lengths[mid]\n );\n\n if (midW2 === word2) {\n return mid;\n }\n if (midW2 < word2) {\n left = mid + 1;\n } else {\n right = mid - 1;\n }\n }\n }\n\n return -1;\n }\n\n /**\n * Get bigram frequency.\n * @returns Frequency count, or 0 if not found\n */\n bigramFreq(word1: string, word2: string): number {\n const idx = this.findBigram(word1.toLowerCase(), word2.toLowerCase());\n return idx === -1 ? 0 : this.bigramFreqs[idx];\n }\n\n /**\n * Alias for bigramFreq to satisfy BigramProvider interface.\n * @returns Frequency count, or 0 if not found\n */\n freq(word1: string, word2: string): number {\n return this.bigramFreq(word1, word2);\n }\n\n /**\n * Check if a word is known to the lemmatizer.\n */\n isKnown(word: string): boolean {\n return this.findWord(word.toLowerCase()) !== -1;\n }\n\n /**\n * Get the total number of lemmas in the database.\n */\n get lemmaCountValue(): number {\n return this.lemmaCount;\n }\n\n /**\n * Get the total number of word forms.\n */\n get wordFormCount(): number {\n return this.wordCount;\n }\n\n /**\n * Get the total number of bigrams.\n */\n get bigramCountValue(): number {\n return this.bigramCount;\n }\n\n /**\n * Get raw buffer size (approximate memory usage).\n */\n get bufferSize(): number {\n return this.buffer.byteLength;\n }\n\n /**\n * Get all unique lemmas from the binary data.\n * Useful for compound splitting.\n */\n getAllLemmas(): string[] {\n const lemmas: string[] = [];\n for (let i = 0; i < this.lemmaCount; i++) {\n lemmas.push(this.getLemma(i));\n }\n return lemmas;\n }\n}\n","/**\n * Disambiguation rules for Icelandic.\n *\n * Based on GreynirEngine's Prefs.conf and linguistic patterns.\n * These rules help resolve ambiguous words by considering context.\n */\n\nimport type { WordClass } from \"./types.js\";\n\n/**\n * A disambiguation preference rule.\n *\n * When the word matches and the context condition is met,\n * prefer `prefer` POS over `over` POS.\n */\nexport interface DisambiguationRule {\n /** The ambiguous word (lowercase) */\n word: string;\n /** Preferred part of speech in this context */\n prefer: WordClass;\n /** Dispreferred part of speech */\n over: WordClass;\n /** Context condition for when to apply this rule */\n context: \"before_noun\" | \"before_verb\" | \"after_pronoun\" | \"sentence_start\" | \"any\";\n /** Optional description */\n description?: string;\n}\n\n/**\n * Disambiguation rules extracted from Greynir's patterns.\n *\n * Format: { word, prefer, over, context }\n *\n * Common patterns:\n * - \"á\" as preposition (fs) when before noun, as verb \"eiga\" (so) after pronoun\n * - \"við\" as preposition (fs) when before noun, as pronoun (fn) at sentence start\n */\nexport const DISAMBIGUATION_RULES: DisambiguationRule[] = [\n // \"á\" - one of the most ambiguous words\n // Preposition: \"á borðinu\", \"á Íslandi\"\n // Verb (eiga): \"Ég á bíl\", \"Hún á hest\"\n // Noun (river): \"við ána\"\n {\n word: \"á\",\n prefer: \"so\", // verb \"eiga\"\n over: \"fs\", // preposition\n context: \"after_pronoun\",\n description: \"á after pronoun = verb 'eiga' (I own, you own)\",\n },\n {\n word: \"á\",\n prefer: \"fs\", // preposition\n over: \"so\", // verb\n context: \"before_noun\",\n description: \"á before noun = preposition (on, at)\",\n },\n\n // \"við\" - preposition vs pronoun\n // Preposition: \"við gluggann\", \"við borðið\"\n // Pronoun: \"Við erum hér\" (we are here)\n {\n word: \"við\",\n prefer: \"fn\", // pronoun \"we\"\n over: \"fs\", // preposition\n context: \"sentence_start\",\n description: \"við at sentence start = pronoun 'we'\",\n },\n {\n word: \"við\",\n prefer: \"fs\", // preposition\n over: \"fn\", // pronoun\n context: \"before_noun\",\n description: \"við before noun = preposition 'by/at'\",\n },\n\n // \"af\" - preposition vs adverb\n {\n word: \"af\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"af before noun = preposition 'of/from'\",\n },\n\n // \"til\" - preposition\n {\n word: \"til\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"til before noun = preposition 'to'\",\n },\n\n // \"um\" - preposition vs adverb\n {\n word: \"um\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"um before noun = preposition 'about/around'\",\n },\n\n // \"yfir\" - preposition vs adverb\n {\n word: \"yfir\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"yfir before noun = preposition 'over'\",\n },\n\n // \"undir\" - preposition vs adverb\n {\n word: \"undir\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"undir before noun = preposition 'under'\",\n },\n\n // \"fyrir\" - preposition vs adverb\n {\n word: \"fyrir\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"fyrir before noun = preposition 'for/before'\",\n },\n\n // \"eftir\" - preposition vs adverb\n {\n word: \"eftir\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"eftir before noun = preposition 'after'\",\n },\n\n // \"frá\" - preposition\n {\n word: \"frá\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"frá before noun = preposition 'from'\",\n },\n\n // \"með\" - preposition vs adverb\n {\n word: \"með\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"með before noun = preposition 'with'\",\n },\n\n // \"í\" - preposition\n {\n word: \"í\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"í before noun = preposition 'in'\",\n },\n\n // \"úr\" - preposition vs noun (watch)\n {\n word: \"úr\",\n prefer: \"fs\",\n over: \"no\",\n context: \"before_noun\",\n description: \"úr before noun = preposition 'out of'\",\n },\n];\n\n/**\n * Look up rules that apply to a specific word.\n */\nexport function getRulesForWord(word: string): DisambiguationRule[] {\n const normalized = word.toLowerCase();\n return DISAMBIGUATION_RULES.filter((r) => r.word === normalized);\n}\n\n/**\n * Check if a word has disambiguation rules.\n */\nexport function hasDisambiguationRules(word: string): boolean {\n return DISAMBIGUATION_RULES.some((r) => r.word === word.toLowerCase());\n}\n","/**\n * Mini-grammar disambiguation rules for Icelandic.\n *\n * Uses case government (forsetningar stjórna falli) to disambiguate\n * prepositions from other parts of speech. For example:\n * - \"á\" + dative noun = preposition \"on/at\"\n * - \"á\" after pronoun = verb \"eiga\" (to own)\n *\n * Based on Greynir's Prepositions.conf but simplified for fast lookup.\n */\n\nimport type {\n GrammaticalCase,\n LemmaWithMorph,\n LemmaWithPOS,\n WordClass,\n} from \"./types.js\";\n\n/**\n * Interface for lemmatizer used in grammar rules.\n */\nexport interface GrammarLemmatizerLike {\n lemmatizeWithPOS?(word: string): LemmaWithPOS[];\n}\n\n/**\n * Preposition case government rules.\n *\n * Maps preposition lemma to the grammatical cases it governs.\n * When a preposition is followed by a noun in one of these cases,\n * we can be confident it's being used as a preposition.\n *\n * Source: Greynir's Prepositions.conf\n */\nexport const PREPOSITION_CASES: Map<string, Set<GrammaticalCase>> = new Map<string, Set<GrammaticalCase>>([\n // Both accusative and dative\n [\"á\", new Set<GrammaticalCase>([\"þf\", \"þgf\"])], // on/at (þf=direction, þgf=location)\n [\"í\", new Set<GrammaticalCase>([\"þf\", \"þgf\"])], // in (þf=into, þgf=inside)\n [\"við\", new Set<GrammaticalCase>([\"þf\", \"þgf\"])], // at/by (þf=against, þgf=near)\n [\"með\", new Set<GrammaticalCase>([\"þf\", \"þgf\"])], // with (þf=bring, þgf=accompany)\n [\"undir\", new Set<GrammaticalCase>([\"þf\", \"þgf\"])], // under (þf=motion, þgf=position)\n [\"yfir\", new Set<GrammaticalCase>([\"þf\", \"þgf\"])], // over (þf=motion, þgf=position)\n [\"fyrir\", new Set<GrammaticalCase>([\"þf\", \"þgf\"])], // for/before (þf=in exchange, þgf=in front)\n\n // Accusative only\n [\"um\", new Set<GrammaticalCase>([\"þf\"])], // about/around\n [\"gegnum\", new Set<GrammaticalCase>([\"þf\"])], // through\n [\"kringum\", new Set<GrammaticalCase>([\"þf\"])], // around\n [\"umhverfis\", new Set<GrammaticalCase>([\"þf\"])], // around/surrounding\n\n // Dative only\n [\"af\", new Set<GrammaticalCase>([\"þgf\"])], // of/from\n [\"frá\", new Set<GrammaticalCase>([\"þgf\"])], // from\n [\"hjá\", new Set<GrammaticalCase>([\"þgf\"])], // at/with (someone's place)\n [\"úr\", new Set<GrammaticalCase>([\"þgf\"])], // out of\n [\"að\", new Set<GrammaticalCase>([\"þgf\"])], // to/at\n [\"móti\", new Set<GrammaticalCase>([\"þgf\"])], // against\n [\"nálægt\", new Set<GrammaticalCase>([\"þgf\"])], // near\n [\"gegn\", new Set<GrammaticalCase>([\"þgf\"])], // against\n [\"gagnvart\", new Set<GrammaticalCase>([\"þgf\"])], // towards/regarding\n [\"handa\", new Set<GrammaticalCase>([\"þgf\"])], // for (someone)\n [\"meðal\", new Set<GrammaticalCase>([\"ef\"])], // among (actually genitive)\n\n // Genitive only\n [\"til\", new Set<GrammaticalCase>([\"ef\"])], // to\n [\"án\", new Set<GrammaticalCase>([\"ef\"])], // without\n [\"vegna\", new Set<GrammaticalCase>([\"ef\"])], // because of\n [\"sakir\", new Set<GrammaticalCase>([\"ef\"])], // because of\n [\"utan\", new Set<GrammaticalCase>([\"ef\"])], // outside\n [\"innan\", new Set<GrammaticalCase>([\"ef\"])], // inside\n [\"meðfram\", new Set<GrammaticalCase>([\"þgf\"])], // along\n [\"milli\", new Set<GrammaticalCase>([\"ef\"])], // between\n [\"auk\", new Set<GrammaticalCase>([\"ef\"])], // in addition to\n [\"í stað\", new Set<GrammaticalCase>([\"ef\"])], // instead of\n]);\n\n/**\n * Nominative-case pronouns that can precede verbs.\n * When one of these is followed by a potentially ambiguous word,\n * prefer the verb reading.\n */\nexport const NOMINATIVE_PRONOUNS = new Set([\n \"ég\",\n \"þú\",\n \"hann\",\n \"hún\",\n \"það\",\n \"við\",\n \"þið\",\n \"þeir\",\n \"þær\",\n \"þau\",\n]);\n\n/**\n * Result of applying a mini-grammar rule.\n */\nexport interface GrammarRuleMatch {\n /** The preferred lemma */\n lemma: string;\n /** The preferred POS */\n pos: WordClass;\n /** Rule that matched */\n rule: string;\n /** Confidence score (0-1) */\n confidence: number;\n}\n\n/**\n * Check if a preposition candidate can govern the case of the following word.\n *\n * @param prepLemma - The potential preposition lemma\n * @param nextWordMorph - Morphological features of the next word\n * @returns True if the preposition can govern this case\n */\nexport function canGovernCase(\n prepLemma: string,\n nextWordCase: GrammaticalCase | undefined\n): boolean {\n if (!nextWordCase) return false;\n const cases = PREPOSITION_CASES.get(prepLemma);\n return cases?.has(nextWordCase) ?? false;\n}\n\n/**\n * Apply preposition+case rule to disambiguate.\n *\n * If the current word can be a preposition and the next word has\n * a case governed by that preposition, prefer the preposition reading.\n *\n * @param candidates - All possible readings of the current word\n * @param nextWordMorph - Morphological analyses of the next word\n * @returns GrammarRuleMatch if a rule applies, null otherwise\n */\nexport function applyPrepositionRule(\n candidates: LemmaWithMorph[],\n nextWordMorph: LemmaWithMorph[]\n): GrammarRuleMatch | null {\n // Find preposition candidates\n const prepCandidates = candidates.filter((c) => c.pos === \"fs\");\n if (prepCandidates.length === 0) return null;\n\n // Check if any next word form has a case governed by any prep candidate\n for (const prep of prepCandidates) {\n for (const nextForm of nextWordMorph) {\n if (nextForm.morph?.case && canGovernCase(prep.lemma, nextForm.morph.case)) {\n return {\n lemma: prep.lemma,\n pos: \"fs\",\n rule: `prep+${nextForm.morph.case}`,\n confidence: 0.9,\n };\n }\n }\n }\n\n return null;\n}\n\n/**\n * Apply pronoun+verb rule to disambiguate.\n *\n * If the previous word is a nominative pronoun and the current word\n * can be a verb, prefer the verb reading.\n *\n * @param candidates - All possible readings of the current word\n * @param prevWord - The previous word (raw form)\n * @returns GrammarRuleMatch if a rule applies, null otherwise\n */\nexport function applyPronounVerbRule(\n candidates: LemmaWithMorph[],\n prevWord: string | null\n): GrammarRuleMatch | null {\n if (!prevWord) return null;\n\n const prevLower = prevWord.toLowerCase();\n if (!NOMINATIVE_PRONOUNS.has(prevLower)) return null;\n\n // Find verb candidates\n const verbCandidates = candidates.filter((c) => c.pos === \"so\");\n if (verbCandidates.length === 0) return null;\n\n // Prefer verb over preposition/noun when after pronoun\n const hasNonVerb = candidates.some((c) => c.pos !== \"so\");\n if (!hasNonVerb) return null;\n\n // Return the verb candidate (prefer eiga for \"á\")\n const eigaCandidate = verbCandidates.find((c) => c.lemma === \"eiga\");\n const verbCandidate = eigaCandidate ?? verbCandidates[0];\n\n return {\n lemma: verbCandidate.lemma,\n pos: \"so\",\n rule: \"pronoun+verb\",\n confidence: 0.85,\n };\n}\n\n/**\n * Apply noun-after-preposition rule to disambiguate.\n *\n * If the previous word is a preposition and the current word has a\n * noun candidate with a case governed by that preposition, prefer\n * the noun reading.\n *\n * This rule only applies when:\n * - The previous word is UNAMBIGUOUSLY a preposition (no pronoun reading), OR\n * - The current word has no verb candidate\n *\n * Example: \"til fundar\" → \"fundar\" is noun \"fundur\" (genitive), not verb \"funda\"\n * Counter-example: \"við fórum\" → \"við\" is pronoun, \"fórum\" is verb \"fara\"\n *\n * @param candidates - All possible readings of the current word\n * @param prevWord - The previous word (raw form)\n * @param lemmatizer - Lemmatizer for looking up the previous word\n * @returns GrammarRuleMatch if a rule applies, null otherwise\n */\nexport function applyNounAfterPrepositionRule(\n candidates: LemmaWithMorph[],\n prevWord: string | null,\n lemmatizer: GrammarLemmatizerLike | null\n): GrammarRuleMatch | null {\n if (!prevWord || !lemmatizer?.lemmatizeWithPOS) return null;\n\n // Check if previous word is a preposition\n const prevLemmas = lemmatizer.lemmatizeWithPOS(prevWord);\n const prepCandidate = prevLemmas.find((l) => l.pos === \"fs\");\n if (!prepCandidate) return null;\n\n // Check if the previous word could also be a pronoun\n const hasPronounReading = prevLemmas.some((l) => l.pos === \"fn\");\n\n // Check if current word has a verb candidate\n const hasVerbCandidate = candidates.some((c) => c.pos === \"so\");\n\n // If prevWord is ambiguously pronoun/preposition AND current word can be a verb,\n // don't apply this rule (let pronoun+verb rule or bigrams handle it)\n if (hasPronounReading && hasVerbCandidate) {\n return null;\n }\n\n // Get cases this preposition governs\n const governedCases = PREPOSITION_CASES.get(prepCandidate.lemma);\n if (!governedCases) return null;\n\n // Find noun candidate with matching case\n const nounCandidates = candidates.filter((c) => c.pos === \"no\");\n for (const noun of nounCandidates) {\n if (noun.morph?.case && governedCases.has(noun.morph.case)) {\n return {\n lemma: noun.lemma,\n pos: \"no\",\n rule: `noun_after_prep+${noun.morph.case}`,\n confidence: 0.9,\n };\n }\n }\n\n return null;\n}\n\n/**\n * Apply all mini-grammar rules in sequence.\n *\n * Rules are applied in order of specificity:\n * 1. Preposition + case government (most reliable)\n * 2. Noun after preposition (governed case)\n * 3. Pronoun + verb pattern\n *\n * @param candidates - All possible readings of the current word\n * @param prevWord - Previous word (raw form)\n * @param nextWordMorph - Morphological analyses of the next word\n * @param lemmatizer - Optional lemmatizer for looking up previous word POS\n * @returns GrammarRuleMatch if any rule applies, null otherwise\n */\nexport function applyGrammarRules(\n candidates: LemmaWithMorph[],\n prevWord: string | null,\n nextWordMorph: LemmaWithMorph[],\n lemmatizer: GrammarLemmatizerLike | null = null\n): GrammarRuleMatch | null {\n // Rule 1: Preposition + governed case\n const prepRule = applyPrepositionRule(candidates, nextWordMorph);\n if (prepRule) return prepRule;\n\n // Rule 2: Noun after preposition with governed case\n const nounAfterPrepRule = applyNounAfterPrepositionRule(candidates, prevWord, lemmatizer);\n if (nounAfterPrepRule) return nounAfterPrepRule;\n\n // Rule 3: Pronoun + verb\n const verbRule = applyPronounVerbRule(candidates, prevWord);\n if (verbRule) return verbRule;\n\n return null;\n}\n\n/**\n * Check if a word is a known preposition.\n */\nexport function isKnownPreposition(lemma: string): boolean {\n return PREPOSITION_CASES.has(lemma);\n}\n\n/**\n * Get the cases governed by a preposition.\n */\nexport function getGovernedCases(prepLemma: string): Set<GrammaticalCase> | undefined {\n return PREPOSITION_CASES.get(prepLemma);\n}\n","/**\n * Disambiguation algorithm using a multi-phase pipeline.\n *\n * When a word has multiple possible lemmas, use surrounding context\n * and linguistic rules to select the most likely one.\n *\n * Pipeline phases:\n * 1. Unambiguous - words with only one lemma candidate\n * 2. Phrase rules - multi-word expressions and fixed phrases\n * 3. Disambiguation rules - contextual preferences (e.g., \"á\" after pronoun = verb)\n * 4. Grammar rules - case government (preposition + case noun)\n * 5. Word bigrams - statistical scoring using bigram frequencies\n * 6. Fallback - use first lemma if no other evidence\n */\n\nimport { STOPWORDS_IS } from \"./stopwords.js\";\nimport type { LemmatizerLike, LemmaWithPOS, LemmaWithMorph, BigramProvider, WordClass } from \"./types.js\";\nimport { DISAMBIGUATION_RULES, type DisambiguationRule } from \"./disambiguation-rules.js\";\nimport { applyGrammarRules } from \"./mini-grammar.js\";\n\nexport interface DisambiguatorOptions {\n /** Weight for left context (previous word) */\n leftWeight?: number;\n /** Weight for right context (next word) */\n rightWeight?: number;\n /** Enable preference rules (e.g., \"á\" context rules) */\n usePreferenceRules?: boolean;\n /** Enable grammar rules (case government) */\n useGrammarRules?: boolean;\n}\n\nexport interface DisambiguatedToken {\n /** Original token */\n token: string;\n /** Chosen lemma */\n lemma: string;\n /** Part of speech (if available) */\n pos?: WordClass;\n /** All candidate lemmas */\n candidates: string[];\n /** Candidates with POS (if available) */\n candidatesWithPOS?: LemmaWithPOS[];\n /** Was disambiguation needed? */\n ambiguous: boolean;\n /** Confidence score (0-1) */\n confidence: number;\n /** Which phase resolved this token */\n resolvedBy?: string;\n}\n\n/**\n * Extended lemmatizer interface that supports morphological lookup.\n */\ninterface MorphLemmatizerLike extends LemmatizerLike {\n lemmatizeWithMorph?(word: string): LemmaWithMorph[];\n}\n\n/**\n * Context for disambiguation, including surrounding tokens.\n */\ninterface DisambiguationContext {\n /** Previous word (if any) */\n prevWord: string | null;\n /** Next word (if any) */\n nextWord: string | null;\n /** Previous token's lemmas (if available) */\n prevLemmas?: string[];\n /** Next token's lemmas (if available) */\n nextLemmas?: string[];\n /** Next word's morphological analyses (if available) */\n nextWordMorph?: LemmaWithMorph[];\n /** All tokens in the sequence */\n allTokens: string[];\n /** Current index in the sequence */\n index: number;\n}\n\nexport interface DisambiguationContextHint {\n prevLemmas?: string[];\n nextLemmas?: string[];\n}\n\n/**\n * A disambiguation phase that processes candidates.\n */\ninterface DisambiguationPhase {\n name: string;\n run(\n candidates: LemmaWithPOS[],\n context: DisambiguationContext,\n disambiguator: Disambiguator\n ): { lemma: string; pos?: WordClass; confidence: number } | null;\n}\n\n/**\n * Phase 1: Handle unambiguous cases (single candidate).\n */\nconst unambiguousPhase: DisambiguationPhase = {\n name: \"unambiguous\",\n run(candidates) {\n if (candidates.length === 1) {\n return {\n lemma: candidates[0].lemma,\n pos: candidates[0].pos,\n confidence: 1.0,\n };\n }\n return null;\n },\n};\n\n/**\n * Phase 2: Apply disambiguation rules based on context.\n */\nconst preferenceRulesPhase: DisambiguationPhase = {\n name: \"preference_rules\",\n run(candidates, context, disambiguator) {\n if (!disambiguator.usePreferenceRules) return null;\n\n for (const rule of DISAMBIGUATION_RULES) {\n const match = applyRule(rule, candidates, context);\n if (match) {\n return {\n lemma: match.lemma,\n pos: match.pos,\n confidence: 0.85,\n };\n }\n }\n return null;\n },\n};\n\n/**\n * Apply a single disambiguation rule.\n */\nfunction applyRule(\n rule: DisambiguationRule,\n candidates: LemmaWithPOS[],\n context: DisambiguationContext\n): LemmaWithPOS | null {\n // Find candidates matching the word and preferred POS\n const preferredCandidate = candidates.find(\n (c) => c.lemma.toLowerCase() === rule.word.toLowerCase() && c.pos === rule.prefer\n );\n const dispreferred = candidates.find(\n (c) => c.lemma.toLowerCase() === rule.word.toLowerCase() && c.pos === rule.over\n );\n\n if (!preferredCandidate || !dispreferred) {\n return null;\n }\n\n // Check context condition\n if (rule.context === \"before_noun\") {\n // Next word should be a noun (starts with uppercase or known noun)\n const next = context.nextWord;\n if (next && /^[A-ZÁÉÍÓÚÝÞÆÖ]/.test(next)) {\n return preferredCandidate;\n }\n } else if (rule.context === \"before_verb\") {\n // Next word suggests a verb context (harder to detect without POS)\n // Simple heuristic: if next word is lowercase and not a common noun determiner\n const next = context.nextWord?.toLowerCase();\n if (next && ![\"þessi\", \"þetta\", \"sá\", \"sú\", \"það\", \"hinn\", \"hin\", \"hið\"].includes(next)) {\n return preferredCandidate;\n }\n } else if (rule.context === \"after_pronoun\") {\n // Previous word is a pronoun\n const prev = context.prevWord?.toLowerCase();\n const pronouns = [\"ég\", \"þú\", \"hann\", \"hún\", \"það\", \"við\", \"þið\", \"þeir\", \"þær\", \"þau\"];\n if (prev && pronouns.includes(prev)) {\n return preferredCandidate;\n }\n }\n\n return null;\n}\n\n/**\n * Phase 3: Apply grammar rules (case government).\n *\n * Uses morphological features to apply preposition+case and pronoun+verb rules.\n */\nconst grammarRulesPhase: DisambiguationPhase = {\n name: \"grammar_rules\",\n run(candidates, context, disambiguator) {\n if (!disambiguator.useGrammarRules) return null;\n\n // Convert LemmaWithPOS to LemmaWithMorph if needed\n const candidatesWithMorph: LemmaWithMorph[] = candidates.map((c) => ({\n ...c,\n morph: undefined,\n }));\n\n // Get morphological info for candidates if available\n if (disambiguator.lemmatizer.lemmatizeWithMorph) {\n const currentWord = context.allTokens[context.index];\n if (currentWord) {\n const morphCandidates = disambiguator.lemmatizer.lemmatizeWithMorph(currentWord);\n // Replace with morph-enriched candidates\n candidatesWithMorph.length = 0;\n candidatesWithMorph.push(...morphCandidates);\n }\n }\n\n // Apply grammar rules\n const result = applyGrammarRules(\n candidatesWithMorph,\n context.prevWord,\n context.nextWordMorph ?? [],\n disambiguator.lemmatizer\n );\n\n if (result) {\n return {\n lemma: result.lemma,\n pos: result.pos,\n confidence: result.confidence,\n };\n }\n\n return null;\n },\n};\n\n/**\n * Phase 4: Score using bigram frequencies.\n */\nconst bigramPhase: DisambiguationPhase = {\n name: \"word_bigrams\",\n run(candidates, context, disambiguator) {\n if (!disambiguator.bigrams) return null;\n if (candidates.length === 0) return null;\n\n const scores: { candidate: LemmaWithPOS; score: number }[] = [];\n\n for (const candidate of candidates) {\n let score = 0;\n\n // Left context: bigram(prevWord, lemma)\n if (context.prevWord) {\n const prevLemmas = context.prevLemmas || disambiguator.lemmatizer.lemmatize(context.prevWord);\n for (const prevLemma of prevLemmas) {\n const freq = disambiguator.bigrams.freq(prevLemma, candidate.lemma);\n if (freq > 0) {\n score += Math.log(freq + 1) * disambiguator.leftWeight;\n }\n }\n }\n\n // Right context: bigram(lemma, nextWord)\n if (context.nextWord) {\n const nextLemmas = context.nextLemmas || disambiguator.lemmatizer.lemmatize(context.nextWord);\n for (const nextLemma of nextLemmas) {\n const freq = disambiguator.bigrams.freq(candidate.lemma, nextLemma);\n if (freq > 0) {\n score += Math.log(freq + 1) * disambiguator.rightWeight;\n }\n }\n }\n\n scores.push({ candidate, score });\n }\n\n // Sort by score\n scores.sort((a, b) => b.score - a.score);\n\n // Check if we have scores and if top score is positive\n if (scores.length > 0 && scores[0].score > 0) {\n const topScore = scores[0].score;\n const totalScore = scores.reduce((sum, s) => sum + Math.exp(s.score), 0);\n const confidence = totalScore > 0 ? Math.exp(topScore) / totalScore : 0.5;\n\n return {\n lemma: scores[0].candidate.lemma,\n pos: scores[0].candidate.pos,\n confidence,\n };\n }\n\n return null;\n },\n};\n\n/**\n * Phase 5: Fallback to first candidate.\n */\nconst fallbackPhase: DisambiguationPhase = {\n name: \"fallback\",\n run(candidates) {\n if (candidates.length > 0) {\n return {\n lemma: candidates[0].lemma,\n pos: candidates[0].pos,\n confidence: 1 / candidates.length,\n };\n }\n return null;\n },\n};\n\n/**\n * All disambiguation phases in order.\n */\nconst PHASES: DisambiguationPhase[] = [\n unambiguousPhase,\n preferenceRulesPhase,\n grammarRulesPhase,\n bigramPhase,\n fallbackPhase,\n];\n\n/**\n * Disambiguate lemmas using a multi-phase pipeline.\n */\nexport class Disambiguator {\n lemmatizer: MorphLemmatizerLike;\n bigrams: BigramProvider | null;\n leftWeight: number;\n rightWeight: number;\n usePreferenceRules: boolean;\n useGrammarRules: boolean;\n\n constructor(\n lemmatizer: LemmatizerLike,\n bigrams: BigramProvider | null = null,\n options: DisambiguatorOptions = {}\n ) {\n this.lemmatizer = lemmatizer as MorphLemmatizerLike;\n this.bigrams = bigrams;\n this.leftWeight = options.leftWeight ?? 1.0;\n this.rightWeight = options.rightWeight ?? 1.0;\n this.usePreferenceRules = options.usePreferenceRules ?? true;\n this.useGrammarRules = options.useGrammarRules ?? true;\n }\n\n /**\n * Disambiguate a single word given context.\n *\n * @param word - The word to lemmatize\n * @param prevWord - Previous word (left context), or null\n * @param nextWord - Next word (right context), or null\n */\n disambiguate(\n word: string,\n prevWord: string | null,\n nextWord: string | null,\n hint: DisambiguationContextHint = {}\n ): DisambiguatedToken {\n // Get candidates with POS if available\n let candidatesWithPOS: LemmaWithPOS[];\n if (this.lemmatizer.lemmatizeWithPOS) {\n candidatesWithPOS = this.lemmatizer.lemmatizeWithPOS(word);\n } else {\n // Fall back to plain lemmatization\n const lemmas = this.lemmatizer.lemmatize(word);\n candidatesWithPOS = lemmas.map((l) => ({ lemma: l, pos: \"no\" as WordClass }));\n }\n\n const candidates = candidatesWithPOS.map((c) => c.lemma);\n const token = word;\n\n // Get morphological info for next word if available\n let nextWordMorph: LemmaWithMorph[] | undefined;\n if (nextWord && this.lemmatizer.lemmatizeWithMorph) {\n nextWordMorph = this.lemmatizer.lemmatizeWithMorph(nextWord);\n }\n\n // Build context\n const context: DisambiguationContext = {\n prevWord,\n nextWord,\n prevLemmas: hint.prevLemmas,\n nextLemmas: hint.nextLemmas,\n nextWordMorph,\n allTokens: [word],\n index: 0,\n };\n\n // Run through phases\n for (const phase of PHASES) {\n const result = phase.run(candidatesWithPOS, context, this);\n if (result) {\n return {\n token,\n lemma: result.lemma,\n pos: result.pos,\n candidates,\n candidatesWithPOS,\n ambiguous: candidates.length > 1,\n confidence: result.confidence,\n resolvedBy: phase.name,\n };\n }\n }\n\n // Should never reach here due to fallback phase\n return {\n token,\n lemma: word.toLowerCase(),\n candidates,\n candidatesWithPOS,\n ambiguous: false,\n confidence: 0,\n resolvedBy: \"none\",\n };\n }\n\n /**\n * Disambiguate an array of tokens.\n *\n * @param tokens - Array of word tokens\n * @returns Array of disambiguated tokens\n */\n disambiguateAll(tokens: string[]): DisambiguatedToken[] {\n const results: DisambiguatedToken[] = [];\n\n for (let i = 0; i < tokens.length; i++) {\n const word = tokens[i];\n const prevWord = i > 0 ? tokens[i - 1] : null;\n const nextWord = i < tokens.length - 1 ? tokens[i + 1] : null;\n\n results.push(this.disambiguate(word, prevWord, nextWord));\n }\n\n return results;\n }\n\n /**\n * Extract unique lemmas from text with disambiguation.\n *\n * @param tokens - Array of word tokens\n * @returns Set of unique lemmas (best guess for each ambiguous word)\n */\n extractLemmas(tokens: string[]): Set<string> {\n const lemmas = new Set<string>();\n const disambiguated = this.disambiguateAll(tokens);\n\n for (const result of disambiguated) {\n lemmas.add(result.lemma);\n }\n\n return lemmas;\n }\n}\n\n/**\n * Shortcut for simple lemma extraction with disambiguation.\n */\nexport function extractDisambiguatedLemmas(\n text: string,\n lemmatizer: LemmatizerLike,\n bigrams: BigramProvider,\n options: {\n tokenize?: (text: string) => string[];\n removeStopwords?: boolean;\n } = {}\n): Set<string> {\n const { tokenize, removeStopwords } = options;\n\n // Tokenize\n const tokens = tokenize\n ? tokenize(text)\n : text\n .split(/\\s+/)\n .filter((t) => t.length > 0)\n .map((t) => t.replace(/^[^\\p{L}\\p{N}]+|[^\\p{L}\\p{N}]+$/gu, \"\"))\n .filter((t) => t.length > 0);\n\n // Disambiguate\n const disambiguator = new Disambiguator(lemmatizer, bigrams);\n const lemmas = disambiguator.extractLemmas(tokens);\n\n // Filter stopwords if requested\n if (removeStopwords) {\n for (const lemma of lemmas) {\n if (STOPWORDS_IS.has(lemma)) {\n lemmas.delete(lemma);\n }\n }\n }\n\n return lemmas;\n}\n","/**\n * Shared type definitions to avoid circular imports.\n */\n\n/**\n * Word class (part-of-speech) codes from BÍN.\n *\n * These are simplified from BÍN's detailed categories:\n * - kk/kvk/hk (gendered nouns) → 'no'\n * - pfn (personal pronoun) → 'fn'\n */\nexport type WordClass =\n | \"no\" // nafnorð (noun)\n | \"so\" // sagnorð (verb)\n | \"lo\" // lýsingarorð (adjective)\n | \"ao\" // atviksorð (adverb)\n | \"fs\" // forsetning (preposition)\n | \"fn\" // fornafn (pronoun)\n | \"st\" // samtenging (conjunction)\n | \"to\" // töluorð (numeral)\n | \"gr\" // greinir (article)\n | \"uh\"; // upphrópun (interjection)\n\n/**\n * Human-readable names for word classes.\n */\nexport const WORD_CLASS_NAMES: Record<WordClass, string> = {\n no: \"noun\",\n so: \"verb\",\n lo: \"adjective\",\n ao: \"adverb\",\n fs: \"preposition\",\n fn: \"pronoun\",\n st: \"conjunction\",\n to: \"numeral\",\n gr: \"article\",\n uh: \"interjection\",\n};\n\n/**\n * Icelandic names for word classes.\n */\nexport const WORD_CLASS_NAMES_IS: Record<WordClass, string> = {\n no: \"nafnorð\",\n so: \"sagnorð\",\n lo: \"lýsingarorð\",\n ao: \"atviksorð\",\n fs: \"forsetning\",\n fn: \"fornafn\",\n st: \"samtenging\",\n to: \"töluorð\",\n gr: \"greinir\",\n uh: \"upphrópun\",\n};\n\n/**\n * Grammatical case (fall) in Icelandic.\n */\nexport type GrammaticalCase = \"nf\" | \"þf\" | \"þgf\" | \"ef\";\n\n/**\n * Grammatical gender (kyn) in Icelandic.\n */\nexport type GrammaticalGender = \"kk\" | \"kvk\" | \"hk\";\n\n/**\n * Grammatical number (tala) in Icelandic.\n */\nexport type GrammaticalNumber = \"et\" | \"ft\";\n\n/**\n * Human-readable names for cases.\n */\nexport const CASE_NAMES: Record<GrammaticalCase, string> = {\n nf: \"nominative\",\n þf: \"accusative\",\n þgf: \"dative\",\n ef: \"genitive\",\n};\n\n/**\n * Human-readable names for genders.\n */\nexport const GENDER_NAMES: Record<GrammaticalGender, string> = {\n kk: \"masculine\",\n kvk: \"feminine\",\n hk: \"neuter\",\n};\n\n/**\n * Human-readable names for numbers.\n */\nexport const NUMBER_NAMES: Record<GrammaticalNumber, string> = {\n et: \"singular\",\n ft: \"plural\",\n};\n\n/**\n * Morphological features extracted from BÍN.\n */\nexport interface MorphFeatures {\n case?: GrammaticalCase;\n gender?: GrammaticalGender;\n number?: GrammaticalNumber;\n}\n\n/**\n * A lemma with its word class.\n */\nexport interface LemmaWithPOS {\n lemma: string;\n pos: WordClass;\n}\n\n/**\n * A lemma with word class and morphological features.\n */\nexport interface LemmaWithMorph extends LemmaWithPOS {\n morph?: MorphFeatures;\n}\n\n/**\n * Interface for lemmatizer-like objects.\n * Used to avoid circular dependency between modules.\n */\nexport interface LemmatizerLike {\n lemmatize(word: string): string[];\n lemmatizeWithPOS?(word: string): LemmaWithPOS[];\n}\n\n/**\n * Interface for bigram frequency lookup.\n * Used for disambiguation scoring.\n */\nexport interface BigramProvider {\n freq(word1: string, word2: string): number;\n}\n","/**\n * Compound word splitting for Icelandic.\n *\n * Icelandic compounds are written as single words:\n * - \"bílstjóri\" = \"bíl\" (car) + \"stjóri\" (driver)\n * - \"sjúkrahús\" = \"sjúkra\" (sick-GEN) + \"hús\" (house)\n *\n * Strategy:\n * 1. Try splitting at each position\n * 2. Check if both parts are known words\n * 3. Handle common compound linking letters (s, u, a)\n * 4. Score by part lengths (prefer balanced splits)\n */\n\nimport type { LemmatizerLike } from \"./types.js\";\n\n/**\n * Protected lemmas that should NEVER be split as compounds.\n * Mostly place names that happen to end in common word parts.\n */\nexport const PROTECTED_LEMMAS = new Set([\n // Countries ending in -land\n \"ísland\",\n \"england\",\n \"írland\",\n \"skotland\",\n \"finnland\",\n \"grænland\",\n \"holland\",\n \"þýskaland\",\n \"frakkland\",\n \"pólland\",\n \"tékkland\",\n \"svissland\",\n \"rússland\",\n \"eistland\",\n \"lettland\",\n \"litháen\",\n // Other countries/regions\n \"danmörk\",\n \"noregur\",\n \"svíþjóð\",\n \"bandaríkin\",\n \"spánn\",\n \"portúgal\",\n \"ítalía\",\n \"grikkland\",\n // Icelandic place names (from BÍN)\n \"þingvellir\",\n \"akureyri\",\n \"ísafjörður\",\n \"reykjavík\",\n \"keflavík\",\n \"hafnarfjörður\",\n \"kópavogur\",\n \"seltjarnarnes\",\n \"garðabær\",\n \"mosfellsbær\",\n \"vestmannaeyjar\",\n \"húsavík\",\n \"sauðárkrókur\",\n \"siglufjörður\",\n \"ólafsfjörður\",\n \"dalvík\",\n \"egilsstaðir\",\n \"neskaupstaður\",\n \"seyðisfjörður\",\n \"eskifjörður\",\n \"reyðarfjörður\",\n \"fáskrúðsfjörður\",\n \"stöðvarfjörður\",\n \"djúpivogur\",\n \"höfn\",\n \"vík\",\n \"selfoss\",\n \"hveragerði\",\n \"þorlákshöfn\",\n \"grindavík\",\n \"sandgerði\",\n \"borgarnes\",\n \"stykkishólmur\",\n \"grundarfjörður\",\n \"ólafsvík\",\n \"búðardalur\",\n \"patreksfjörður\",\n \"flateyri\",\n \"suðureyri\",\n \"bolungarvík\",\n \"hólmavík\",\n \"hvammstangi\",\n \"blönduós\",\n \"skagaströnd\",\n \"varmahlíð\",\n // Literary/historical places\n \"hlíðarendi\",\n \"bergþórshvol\",\n // Company names\n \"íslandsbanki\",\n \"landsbankinn\",\n \"arionbanki\",\n // Institutions\n \"alþingi\",\n]);\n\nexport interface CompoundSplit {\n /** Original word */\n word: string;\n /** Constituent parts (lemmatized) - all variants for indexing */\n parts: string[];\n /** All index terms: parts + original word */\n indexTerms: string[];\n /** Split confidence (0-1) */\n confidence: number;\n /** Is this a compound? */\n isCompound: boolean;\n}\n\n/**\n * Splitting mode for compound words.\n *\n * - \"aggressive\": Try to split all words, even known BÍN entries\n * - \"balanced\": Split unknown words; split known words only if high confidence\n * - \"conservative\": Only split at hyphens or very high confidence cases\n */\nexport type CompoundSplitMode = \"aggressive\" | \"balanced\" | \"conservative\";\n\nexport interface CompoundSplitterOptions {\n /**\n * Minimum part length.\n * Default: 3. Set to 2 for more aggressive splitting (e.g., \"ís\" in \"ísland\").\n */\n minPartLength?: number;\n /** Try removing linking letters (s, u, a) */\n tryLinkingLetters?: boolean;\n /**\n * Splitting mode.\n * Default: \"balanced\"\n */\n mode?: CompoundSplitMode;\n}\n\n/**\n * Common compound tail words in Icelandic.\n * These are often the second part of compounds and boost split confidence.\n */\nconst COMMON_COMPOUND_TAILS = new Set([\n // People/roles\n \"maður\",\n \"kona\",\n \"stjóri\",\n \"ráðherra\",\n \"forseti\",\n \"formaður\",\n \"fulltrúi\",\n \"starfsmaður\",\n // Places\n \"hús\",\n \"staður\",\n \"vegur\",\n \"borg\",\n \"bær\",\n \"dalur\",\n \"fjörður\",\n // Organizations\n \"félag\",\n \"banki\",\n \"sjóður\",\n \"stofnun\",\n \"ráð\",\n // Things/concepts\n \"rannsókn\",\n \"greiðsla\",\n \"mál\",\n \"kerfi\",\n \"verk\",\n \"þjónusta\",\n \"rekstur\",\n \"viðskipti\",\n \"verð\",\n \"kostnaður\",\n]);\n\n/**\n * Very common standalone words that should rarely be compound parts.\n * Penalize splits where BOTH parts are common standalone words.\n */\nconst COMMON_STANDALONE = new Set([\n \"vera\",\n \"hafa\",\n \"gera\",\n \"fara\",\n \"koma\",\n \"segja\",\n \"vilja\",\n \"mega\",\n \"þurfa\",\n \"verða\",\n \"geta\",\n \"sjá\",\n \"taka\",\n \"eiga\",\n \"láta\",\n \"halda\",\n \"leyfa\",\n \"búa\",\n]);\n\n/**\n * Common compound linking patterns in Icelandic.\n * These letters often join compound parts:\n * - \"s\" (genitive): húss + eigandi -> \"húseigandi\"\n * - \"u\" (genitive/linking): vatnu + fall -> \"vatnufall\" (rare)\n * - \"a\" (genitive): daga + blað -> \"dagablað\"\n */\nconst LINKING_PATTERNS = [\"s\", \"u\", \"a\"];\n\nexport class CompoundSplitter {\n private lemmatizer: LemmatizerLike;\n private minPartLength: number;\n private tryLinkingLetters: boolean;\n private knownLemmas: Set<string>;\n private mode: CompoundSplitMode;\n\n constructor(\n lemmatizer: LemmatizerLike,\n knownLemmas: Set<string>,\n options: CompoundSplitterOptions = {}\n ) {\n this.lemmatizer = lemmatizer;\n this.knownLemmas = knownLemmas;\n this.minPartLength = options.minPartLength ?? 3;\n this.tryLinkingLetters = options.tryLinkingLetters ?? true;\n this.mode = options.mode ?? \"balanced\";\n }\n\n /**\n * Helper to create a no-split result.\n */\n private noSplit(word: string, lemmas: string[]): CompoundSplit {\n return {\n word,\n parts: lemmas,\n indexTerms: lemmas,\n confidence: 0,\n isCompound: false,\n };\n }\n\n /**\n * Try to split a word into compound parts.\n *\n * Uses a lookup-first strategy:\n * 1. Check protected lemmas - never split\n * 2. Check if word is known in BÍN and unambiguous - don't split\n * 3. Apply mode-based splitting rules\n */\n split(word: string): CompoundSplit {\n const normalized = word.toLowerCase();\n\n // Step 1: Check protected lemmas - never split these\n const directLemmas = this.lemmatizer.lemmatize(word);\n const primaryLemma = directLemmas[0]?.toLowerCase();\n if (primaryLemma && PROTECTED_LEMMAS.has(primaryLemma)) {\n return this.noSplit(word, directLemmas);\n }\n\n // Also check if the word itself is protected (for inflected forms)\n if (PROTECTED_LEMMAS.has(normalized)) {\n return this.noSplit(word, directLemmas);\n }\n\n // Step 2: Check if known in BÍN and unambiguous\n // A word is \"known\" if lemmatization returned something other than the word itself\n const isKnownWord =\n directLemmas.length > 0 && directLemmas[0].toLowerCase() !== normalized;\n const isUnambiguous = directLemmas.length === 1;\n\n // For conservative mode, only split at hyphens\n if (this.mode === \"conservative\") {\n if (word.includes(\"-\")) {\n return this.splitAtHyphen(word, directLemmas);\n }\n return this.noSplit(word, directLemmas);\n }\n\n // For balanced mode, don't split unambiguous known words\n if (this.mode === \"balanced\" && isKnownWord && isUnambiguous) {\n // Exception: still try if the word is very long (likely a compound)\n if (normalized.length < 12) {\n return this.noSplit(word, directLemmas);\n }\n }\n\n // Too short to be a compound\n if (normalized.length < this.minPartLength * 2) {\n return this.noSplit(word, directLemmas);\n }\n\n // Step 3: Try algorithmic splitting\n const candidates: {\n leftParts: string[];\n rightParts: string[];\n score: number;\n }[] = [];\n\n for (\n let i = this.minPartLength;\n i <= normalized.length - this.minPartLength;\n i++\n ) {\n const leftPart = normalized.slice(0, i);\n const rightPart = normalized.slice(i);\n\n // Try direct split\n const directResult = this.trySplit(leftPart, rightPart);\n if (directResult) {\n candidates.push(directResult);\n }\n\n // Try with linking letters removed from split point\n if (this.tryLinkingLetters) {\n for (const linker of LINKING_PATTERNS) {\n // Remove linking letter from end of left part\n if (leftPart.endsWith(linker) && leftPart.length > this.minPartLength) {\n const trimmedLeft = leftPart.slice(0, -1);\n const result = this.trySplit(trimmedLeft, rightPart);\n if (result) {\n // Slightly lower score for linked compounds\n candidates.push({ ...result, score: result.score * 0.95 });\n }\n }\n }\n }\n }\n\n if (candidates.length === 0) {\n return this.noSplit(word, directLemmas);\n }\n\n // Pick best candidate by score\n candidates.sort((a, b) => b.score - a.score);\n const best = candidates[0];\n\n // In balanced mode, require higher confidence for known words\n if (this.mode === \"balanced\" && isKnownWord && best.score < 0.6) {\n return this.noSplit(word, directLemmas);\n }\n\n // Collect all unique parts from best split\n const parts = [...new Set([...best.leftParts, ...best.rightParts])];\n // Index terms include parts + original word for search\n const indexTerms = [...new Set([...parts, normalized])];\n\n return {\n word,\n parts,\n indexTerms,\n confidence: Math.min(best.score, 1),\n isCompound: true,\n };\n }\n\n /**\n * Split a hyphenated word.\n */\n private splitAtHyphen(word: string, directLemmas: string[]): CompoundSplit {\n const parts = word.split(\"-\").filter((p) => p.length > 0);\n if (parts.length < 2) {\n return this.noSplit(word, directLemmas);\n }\n\n const allParts: string[] = [];\n for (const part of parts) {\n const lemmas = this.lemmatizer.lemmatize(part);\n allParts.push(...lemmas);\n }\n\n const uniqueParts = [...new Set(allParts)];\n const indexTerms = [...new Set([...uniqueParts, word.toLowerCase()])];\n\n return {\n word,\n parts: uniqueParts,\n indexTerms,\n confidence: 0.9,\n isCompound: true,\n };\n }\n\n private trySplit(\n leftPart: string,\n rightPart: string\n ): { leftParts: string[]; rightParts: string[]; score: number } | null {\n // Get lemmas for both parts\n const leftLemmas = this.lemmatizer.lemmatize(leftPart);\n const rightLemmas = this.lemmatizer.lemmatize(rightPart);\n\n // Filter to known lemmas only, deduplicated\n const leftKnown = [...new Set(leftLemmas.filter((l) => this.knownLemmas.has(l)))];\n const rightKnown = [...new Set(rightLemmas.filter((l) => this.knownLemmas.has(l)))];\n\n if (leftKnown.length === 0 || rightKnown.length === 0) {\n return null;\n }\n\n // Calculate score with multiple factors\n let score = 0;\n\n // Factor 1: Length balance (20% weight)\n // Prefer balanced splits, but not too strictly\n const lengthBalance =\n 1 - Math.abs(leftPart.length - rightPart.length) / (leftPart.length + rightPart.length);\n score += lengthBalance * 0.2;\n\n // Factor 2: Part length bonus (20% weight)\n // Prefer longer parts (more likely to be real words)\n const avgLength = (leftPart.length + rightPart.length) / 2;\n const lengthBonus = Math.min(avgLength / 6, 1);\n score += lengthBonus * 0.2;\n\n // Factor 3: Common compound tail bonus (30% weight)\n // Strongly prefer splits where right part is a known compound tail\n const hasCompoundTail = rightKnown.some((lemma) => COMMON_COMPOUND_TAILS.has(lemma));\n if (hasCompoundTail) {\n score += 0.3;\n }\n\n // Factor 4: Penalty for both parts being common standalone words (30% weight)\n // E.g., \"ísland\" -> \"ís\" + \"land\" should be penalized\n const leftIsCommon = leftKnown.some((lemma) => COMMON_STANDALONE.has(lemma));\n const rightIsCommon = rightKnown.some((lemma) => COMMON_STANDALONE.has(lemma));\n if (leftIsCommon && rightIsCommon) {\n // Strong penalty if both parts are very common standalone\n score -= 0.3;\n } else if (!leftIsCommon && !rightIsCommon) {\n // Bonus if neither is a common standalone (more likely a real compound)\n score += 0.2;\n }\n\n // Factor 5: Minimum part length requirement\n // Very short parts (2-3 chars) get a penalty\n if (leftPart.length < 4 || rightPart.length < 4) {\n score -= 0.15;\n }\n\n // Return all known lemmas from both parts\n return {\n leftParts: leftKnown,\n rightParts: rightKnown,\n score: Math.max(0, score), // Ensure non-negative\n };\n }\n\n /**\n * Get all lemmas for a word, including compound parts.\n * Useful for search indexing.\n */\n getAllLemmas(word: string): string[] {\n const split = this.split(word);\n return split.indexTerms;\n }\n}\n\n/**\n * Create a set of known lemmas from the lemmatizer.\n * This is used to check if compound parts are valid words.\n */\nexport function createKnownLemmaSet(lemmas: string[]): Set<string> {\n return new Set(lemmas.map((l) => l.toLowerCase()));\n}\n","/**\n * Static multi-word phrases for Icelandic.\n *\n * Source: Extracted from GreynirEngine's Phrases.conf (MIT License)\n * https://github.com/mideind/GreynirEngine\n *\n * These phrases should be recognized as units rather than individual words,\n * enabling better stopword detection and lemmatization.\n */\n\n/**\n * A static phrase definition.\n */\nexport interface StaticPhrase {\n /** The canonical/lemma form of the phrase */\n lemma: string;\n /** Whether this phrase functions as a stopword (e.g., \"til dæmis\") */\n isStopword: boolean;\n /** Part of speech category */\n pos?: \"ao\" | \"fs\" | \"st\" | \"entity\";\n}\n\n/**\n * Common Icelandic multi-word phrases.\n * Keys are lowercase, normalized forms.\n */\nexport const STATIC_PHRASES: Map<string, StaticPhrase> = new Map([\n // Adverbial phrases (ao frasi) - often function as stopwords\n [\"til dæmis\", { lemma: \"til dæmi\", isStopword: true, pos: \"ao\" }],\n [\"með öðrum orðum\", { lemma: \"með annar orð\", isStopword: true, pos: \"ao\" }],\n [\"í raun\", { lemma: \"í raun\", isStopword: true, pos: \"ao\" }],\n [\"í raun og veru\", { lemma: \"í raun og vera\", isStopword: true, pos: \"ao\" }],\n [\"af og til\", { lemma: \"af og til\", isStopword: true, pos: \"ao\" }],\n [\"aftur á móti\", { lemma: \"aftur á mót\", isStopword: true, pos: \"ao\" }],\n [\"alla vega\", { lemma: \"allur vegur\", isStopword: true, pos: \"ao\" }],\n [\"alls ekki\", { lemma: \"alls ekki\", isStopword: true, pos: \"ao\" }],\n [\"alls staðar\", { lemma: \"allur staður\", isStopword: true, pos: \"ao\" }],\n [\"allt í allt\", { lemma: \"allur í allur\", isStopword: true, pos: \"ao\" }],\n [\"annars vegar\", { lemma: \"annar vegur\", isStopword: true, pos: \"ao\" }],\n [\"auk þess\", { lemma: \"auk það\", isStopword: true, pos: \"ao\" }],\n [\"að auki\", { lemma: \"að auki\", isStopword: true, pos: \"ao\" }],\n [\"að vísu\", { lemma: \"að vís\", isStopword: true, pos: \"ao\" }],\n [\"að sjálfsögðu\", { lemma: \"að sjálfsagður\", isStopword: true, pos: \"ao\" }],\n [\"að minnsta kosti\", { lemma: \"að lítill kostur\", isStopword: true, pos: \"ao\" }],\n [\"að öllu leyti\", { lemma: \"að allur leyti\", isStopword: true, pos: \"ao\" }],\n [\"að nokkru leyti\", { lemma: \"að nokkur leyti\", isStopword: true, pos: \"ao\" }],\n [\"ef til vill\", { lemma: \"ef til vilja\", isStopword: true, pos: \"ao\" }],\n [\"einhvers staðar\", { lemma: \"einhver staður\", isStopword: true, pos: \"ao\" }],\n [\"einhvern veginn\", { lemma: \"einhver vegur\", isStopword: true, pos: \"ao\" }],\n [\"ekki síst\", { lemma: \"ekki síður\", isStopword: true, pos: \"ao\" }],\n [\"engu að síður\", { lemma: \"enginn að síður\", isStopword: true, pos: \"ao\" }],\n [\"fyrst og fremst\", { lemma: \"snemma og fremri\", isStopword: true, pos: \"ao\" }],\n [\"hins vegar\", { lemma: \"hinn vegur\", isStopword: true, pos: \"ao\" }],\n [\"hér og þar\", { lemma: \"hér og þar\", isStopword: true, pos: \"ao\" }],\n [\"hér um bil\", { lemma: \"hér um bil\", isStopword: true, pos: \"ao\" }],\n [\"hér á landi\", { lemma: \"hér á land\", isStopword: true, pos: \"ao\" }],\n [\"hvað mest\", { lemma: \"hvað mjög\", isStopword: true, pos: \"ao\" }],\n [\"hverju sinni\", { lemma: \"hver sinn\", isStopword: true, pos: \"ao\" }],\n [\"hvorki né\", { lemma: \"hvorki né\", isStopword: true, pos: \"ao\" }],\n [\"í burtu\", { lemma: \"í burtu\", isStopword: true, pos: \"ao\" }],\n [\"í gær\", { lemma: \"í gær\", isStopword: true, pos: \"ao\" }],\n [\"í senn\", { lemma: \"í senn\", isStopword: true, pos: \"ao\" }],\n [\"í sífellu\", { lemma: \"í sífella\", isStopword: true, pos: \"ao\" }],\n [\"lengi vel\", { lemma: \"lengi vel\", isStopword: true, pos: \"ao\" }],\n [\"meira að segja\", { lemma: \"mikill að segja\", isStopword: true, pos: \"ao\" }],\n [\"meira og minna\", { lemma: \"mikill og lítill\", isStopword: true, pos: \"ao\" }],\n [\"meðal annars\", { lemma: \"meðal annar\", isStopword: true, pos: \"ao\" }],\n [\"nokkurn veginn\", { lemma: \"nokkur vegur\", isStopword: true, pos: \"ao\" }],\n [\"og svo framvegis\", { lemma: \"og svo framvegis\", isStopword: true, pos: \"ao\" }],\n [\"satt að segja\", { lemma: \"sannur að segja\", isStopword: true, pos: \"ao\" }],\n [\"sem betur fer\", { lemma: \"sem vel fara\", isStopword: true, pos: \"ao\" }],\n [\"smám saman\", { lemma: \"smátt saman\", isStopword: true, pos: \"ao\" }],\n [\"svo sem\", { lemma: \"svo sem\", isStopword: true, pos: \"ao\" }],\n [\"sér í lagi\", { lemma: \"sér í lag\", isStopword: true, pos: \"ao\" }],\n [\"til og frá\", { lemma: \"til og frá\", isStopword: true, pos: \"ao\" }],\n [\"til baka\", { lemma: \"til baka\", isStopword: true, pos: \"ao\" }],\n [\"vítt og breitt\", { lemma: \"vítt og breitt\", isStopword: true, pos: \"ao\" }],\n [\"á ný\", { lemma: \"á ný\", isStopword: true, pos: \"ao\" }],\n [\"á meðan\", { lemma: \"á meðan\", isStopword: true, pos: \"ao\" }],\n [\"á sama tíma\", { lemma: \"á samur tími\", isStopword: true, pos: \"ao\" }],\n [\"á hinn bóginn\", { lemma: \"á hinn bógur\", isStopword: true, pos: \"ao\" }],\n [\"þar af leiðandi\", { lemma: \"þar af leiða\", isStopword: true, pos: \"ao\" }],\n [\"þar að auki\", { lemma: \"þar að auki\", isStopword: true, pos: \"ao\" }],\n [\"það er að segja\", { lemma: \"það vera að segja\", isStopword: true, pos: \"ao\" }],\n [\"þess vegna\", { lemma: \"það vegna\", isStopword: true, pos: \"ao\" }],\n [\"því miður\", { lemma: \"það lítt\", isStopword: true, pos: \"ao\" }],\n [\"þrátt fyrir\", { lemma: \"þrátt fyrir\", isStopword: true, pos: \"ao\" }],\n\n // Time expressions\n [\"á dögunum\", { lemma: \"á dagur\", isStopword: true, pos: \"ao\" }],\n [\"á sínum tíma\", { lemma: \"á sinn tími\", isStopword: true, pos: \"ao\" }],\n [\"á endanum\", { lemma: \"á endi\", isStopword: true, pos: \"ao\" }],\n [\"einu sinni\", { lemma: \"einn sinn\", isStopword: false, pos: \"ao\" }],\n [\"eitt sinn\", { lemma: \"einn sinn\", isStopword: false, pos: \"ao\" }],\n [\"í fyrsta sinn\", { lemma: \"í fyrstur sinn\", isStopword: false, pos: \"ao\" }],\n [\"í kvöld\", { lemma: \"í kvöld\", isStopword: false, pos: \"ao\" }],\n [\"í morgun\", { lemma: \"í morgunn\", isStopword: false, pos: \"ao\" }],\n [\"á morgun\", { lemma: \"á morgunn\", isStopword: false, pos: \"ao\" }],\n\n // Prepositional phrases (fs frasi)\n [\"fyrir hönd\", { lemma: \"fyrir hönd\", isStopword: false, pos: \"fs\" }],\n [\"með tilliti til\", { lemma: \"með tillit til\", isStopword: false, pos: \"fs\" }],\n [\"í ljósi\", { lemma: \"í ljós\", isStopword: false, pos: \"fs\" }],\n [\"í stað\", { lemma: \"í staður\", isStopword: false, pos: \"fs\" }],\n [\"fyrir aftan\", { lemma: \"fyrir aftan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir austan\", { lemma: \"fyrir austan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir framan\", { lemma: \"fyrir framan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir handan\", { lemma: \"fyrir handan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir innan\", { lemma: \"fyrir innan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir neðan\", { lemma: \"fyrir neðan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir norðan\", { lemma: \"fyrir norðan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir ofan\", { lemma: \"fyrir ofan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir sunnan\", { lemma: \"fyrir sunnan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir utan\", { lemma: \"fyrir utan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir vestan\", { lemma: \"fyrir vestan\", isStopword: false, pos: \"fs\" }],\n [\"í gegnum\", { lemma: \"í gegnum\", isStopword: false, pos: \"fs\" }],\n [\"í kringum\", { lemma: \"í kringum\", isStopword: false, pos: \"fs\" }],\n [\"innan við\", { lemma: \"innan við\", isStopword: false, pos: \"fs\" }],\n [\"upp úr\", { lemma: \"upp úr\", isStopword: false, pos: \"fs\" }],\n [\"þvert á\", { lemma: \"þvert á\", isStopword: false, pos: \"fs\" }],\n\n // Conjunction-like phrases (st frasi)\n [\"þar eð\", { lemma: \"þar eð\", isStopword: true, pos: \"st\" }],\n\n // Named entities - organizations/institutions (NOT stopwords)\n [\"sameinuðu þjóðirnar\", { lemma: \"Sameinuðu þjóðirnar\", isStopword: false, pos: \"entity\" }],\n [\"evrópusambandið\", { lemma: \"Evrópusambandið\", isStopword: false, pos: \"entity\" }],\n [\"nato\", { lemma: \"NATO\", isStopword: false, pos: \"entity\" }],\n [\"nató\", { lemma: \"NATO\", isStopword: false, pos: \"entity\" }],\n]);\n\n/**\n * Check if a phrase starting at the given position exists.\n * Returns the phrase info and length if found, null otherwise.\n */\nexport function matchPhrase(\n words: string[],\n startIndex: number\n): { phrase: StaticPhrase; wordCount: number } | null {\n // Try longest matches first (up to 4 words)\n for (let len = Math.min(4, words.length - startIndex); len >= 2; len--) {\n const phraseWords = words.slice(startIndex, startIndex + len);\n const phraseKey = phraseWords.join(\" \").toLowerCase();\n const phrase = STATIC_PHRASES.get(phraseKey);\n if (phrase) {\n return { phrase, wordCount: len };\n }\n }\n return null;\n}\n\n/**\n * Check if a normalized string is a known phrase.\n */\nexport function isKnownPhrase(text: string): boolean {\n return STATIC_PHRASES.has(text.toLowerCase());\n}\n\n/**\n * Get phrase info for a normalized string.\n */\nexport function getPhraseInfo(text: string): StaticPhrase | undefined {\n return STATIC_PHRASES.get(text.toLowerCase());\n}\n","/**\n * Unified text processing pipeline integrating tokenize-is with lemmatization.\n *\n * Provides proper tokenization that handles Icelandic-specific patterns\n * (abbreviations, dates, times, etc.) before lemmatization.\n */\n\nimport { tokenize, type Token } from \"tokenize-is\";\nimport { Disambiguator, type DisambiguatedToken } from \"./disambiguate.js\";\nimport { CompoundSplitter, type CompoundSplit } from \"./compounds.js\";\nimport { STOPWORDS_IS, isContextualStopword } from \"./stopwords.js\";\nimport type { LemmatizerLike, BigramProvider } from \"./types.js\";\n\n/**\n * Token kinds that should be lemmatized.\n */\nconst LEMMATIZABLE_KINDS = new Set([\"word\"]);\n\n/**\n * Token kinds that represent named entities (skip lemmatization).\n */\nconst ENTITY_KINDS = new Set([\"person\", \"company\", \"entity\"]);\n\n/**\n * Token kinds to skip entirely (not useful for indexing).\n */\nconst SKIP_KINDS = new Set([\n \"punctuation\",\n \"s_begin\",\n \"s_end\",\n \"s_split\",\n \"unknown\",\n]);\n\n/**\n * A processed token with lemmatization results.\n */\nexport interface ProcessedToken {\n /** Original token text */\n original: string;\n /** Token kind from tokenize-is */\n kind: string;\n /** Candidate lemmas (for word tokens) */\n lemmas: string[];\n /** Is this a named entity? */\n isEntity: boolean;\n /** Best lemma guess after disambiguation */\n disambiguated?: string;\n /** Disambiguation confidence (0-1) */\n confidence?: number;\n /** Compound split result if applicable */\n compoundSplit?: CompoundSplit;\n /** Lemmas derived from compound parts (if any) */\n compoundLemmas?: string[];\n}\n\n/**\n * Options for text processing.\n */\nexport interface ProcessOptions {\n /** Bigram provider for disambiguation */\n bigrams?: BigramProvider;\n /** Compound splitter for compound word detection */\n compoundSplitter?: CompoundSplitter;\n /** Remove stopwords from results */\n removeStopwords?: boolean;\n /**\n * Use contextual stopword detection (requires POS info).\n * When true, words like \"á\" are only filtered as stopwords when used\n * as prepositions, not when used as verbs (\"eiga\") or nouns (river).\n * Default: false (use simple stopword list)\n */\n useContextualStopwords?: boolean;\n /** Include numbers in results */\n includeNumbers?: boolean;\n /**\n * Index all candidate lemmas, not just the disambiguated one.\n * Better recall for search (finds more matches), worse precision.\n * Set to false if you only want the most likely lemma.\n * Default: true\n */\n indexAllCandidates?: boolean;\n /**\n * Try compound splitting even for known words.\n * Useful when BÍN contains the compound but you still want parts indexed.\n * Set to false to only split unknown words.\n * Default: true\n */\n alwaysTryCompounds?: boolean;\n}\n\n/**\n * Process text through the full pipeline.\n *\n * @param text - Input text\n * @param lemmatizer - Lemmatizer instance\n * @param options - Processing options\n * @returns Array of processed tokens\n */\nexport function processText(\n text: string,\n lemmatizer: LemmatizerLike,\n options: ProcessOptions = {}\n): ProcessedToken[] {\n const {\n bigrams,\n compoundSplitter,\n includeNumbers = false,\n alwaysTryCompounds = true,\n } = options;\n\n // Step 1: Tokenize\n const tokens = tokenize(text);\n\n // Step 2: Process each token\n const results: ProcessedToken[] = [];\n const wordTokens: { index: number; token: Token }[] = [];\n const lemmaCache = new Map<string, string[]>();\n\n const getLemmas = (raw: string): string[] => {\n const key = raw.toLowerCase();\n const cached = lemmaCache.get(key);\n if (cached) return cached;\n const lemmas = lemmatizer.lemmatize(raw);\n lemmaCache.set(key, lemmas);\n return lemmas;\n };\n\n for (let i = 0; i < tokens.length; i++) {\n const token = tokens[i];\n\n // Skip unwanted tokens\n if (SKIP_KINDS.has(token.kind)) {\n continue;\n }\n\n // Handle named entities\n if (ENTITY_KINDS.has(token.kind)) {\n results.push({\n original: token.text ?? \"\",\n kind: token.kind,\n lemmas: [],\n isEntity: true,\n });\n continue;\n }\n\n // Handle numbers if requested\n if (token.kind === \"number\" || token.kind === \"ordinal\") {\n if (includeNumbers) {\n results.push({\n original: token.text ?? \"\",\n kind: token.kind,\n lemmas: [],\n isEntity: false,\n });\n }\n continue;\n }\n\n // Handle word tokens\n if (LEMMATIZABLE_KINDS.has(token.kind)) {\n const tokenText = token.text ?? \"\";\n const lemmas = getLemmas(tokenText);\n\n const processed: ProcessedToken = {\n original: tokenText,\n kind: token.kind,\n lemmas,\n isEntity: false,\n };\n\n // Try compound splitting\n // - Always if alwaysTryCompounds is set (for better search recall)\n // - Otherwise only if lemmatization returns unknown word\n const isUnknownWord = lemmas.length === 1 && lemmas[0] === tokenText.toLowerCase();\n if (compoundSplitter && (alwaysTryCompounds || isUnknownWord)) {\n const split = compoundSplitter.split(tokenText);\n if (split.isCompound) {\n processed.compoundSplit = split;\n // Add component lemmas from parts (in addition to direct lemmas)\n const partLemmas = split.parts.flatMap((c) => getLemmas(c));\n processed.compoundLemmas = partLemmas;\n processed.lemmas = [...new Set([...lemmas, ...partLemmas])];\n }\n }\n\n results.push(processed);\n wordTokens.push({ index: results.length - 1, token });\n continue;\n }\n\n // Pass through other tokens (time, date, url, etc.)\n results.push({\n original: token.text ?? \"\",\n kind: token.kind,\n lemmas: [],\n isEntity: false,\n });\n }\n\n // Step 3: Disambiguate if we have bigram data\n if (bigrams && wordTokens.length > 0) {\n const disambiguator = new Disambiguator(lemmatizer, bigrams);\n\n for (let i = 0; i < wordTokens.length; i++) {\n const { index, token } = wordTokens[i];\n const prevToken = i > 0 ? wordTokens[i - 1].token : null;\n const nextToken = i < wordTokens.length - 1 ? wordTokens[i + 1].token : null;\n\n const result = disambiguator.disambiguate(\n token.text ?? \"\",\n prevToken?.text ?? null,\n nextToken?.text ?? null,\n {\n prevLemmas: prevToken?.text ? getLemmas(prevToken.text) : undefined,\n nextLemmas: nextToken?.text ? getLemmas(nextToken.text) : undefined,\n }\n );\n\n results[index].disambiguated = result.lemma;\n results[index].confidence = result.confidence;\n }\n } else {\n // No disambiguation - use first lemma\n for (const { index } of wordTokens) {\n const processed = results[index];\n if (processed.lemmas.length > 0) {\n processed.disambiguated = processed.lemmas[0];\n processed.confidence = processed.lemmas.length === 1 ? 1.0 : 0.5;\n }\n }\n }\n\n return results;\n}\n\n/**\n * Extract unique indexable lemmas from text.\n *\n * @param text - Input text\n * @param lemmatizer - Lemmatizer instance\n * @param options - Processing options\n * @returns Set of unique lemmas suitable for search indexing\n */\nexport function extractIndexableLemmas(\n text: string,\n lemmatizer: LemmatizerLike,\n options: ProcessOptions = {}\n): Set<string> {\n const {\n removeStopwords = false,\n indexAllCandidates = true,\n useContextualStopwords = false,\n } = options;\n\n const processed = processText(text, lemmatizer, options);\n const lemmas = new Set<string>();\n\n /**\n * Check if a lemma should be filtered as a stopword.\n * Uses contextual rules when enabled and POS is available.\n */\n const shouldFilter = (lemma: string, pos?: string): boolean => {\n if (!removeStopwords) return false;\n if (useContextualStopwords) {\n return isContextualStopword(lemma, pos);\n }\n return STOPWORDS_IS.has(lemma);\n };\n\n for (const token of processed) {\n // Skip entities\n if (token.isEntity) {\n continue;\n }\n\n if (indexAllCandidates) {\n // Index ALL candidate lemmas for better search recall\n for (const lemma of token.lemmas) {\n if (!shouldFilter(lemma)) {\n lemmas.add(lemma);\n }\n }\n } else {\n // Use disambiguated lemma if available (better precision)\n if (token.disambiguated) {\n // Note: We don't have POS info easily available in disambiguated result\n // This would need enhancement to pass through POS from disambiguation\n if (!shouldFilter(token.disambiguated)) {\n lemmas.add(token.disambiguated);\n }\n }\n }\n\n // Also add compound parts if split\n if (token.compoundSplit?.isCompound) {\n const partLemmas = token.compoundLemmas\n ? token.compoundLemmas\n : token.compoundSplit.parts.flatMap((p) => lemmatizer.lemmatize(p));\n for (const lemma of partLemmas) {\n if (!shouldFilter(lemma)) {\n lemmas.add(lemma);\n }\n }\n }\n }\n\n return lemmas;\n}\n\n/**\n * Strategy for benchmark comparisons.\n */\nexport type ProcessingStrategy = \"naive\" | \"tokenized\" | \"disambiguated\" | \"full\";\n\n/**\n * Metrics from processing a text.\n */\nexport interface ProcessingMetrics {\n /** Total word count */\n wordCount: number;\n /** Words successfully lemmatized (not returned as-is) */\n lemmatizedCount: number;\n /** Coverage: lemmatized / total */\n coverage: number;\n /** Words with multiple candidate lemmas */\n ambiguousCount: number;\n /** Ambiguity rate: ambiguous / total */\n ambiguityRate: number;\n /** Average disambiguation confidence */\n avgConfidence: number;\n /** Compounds detected and split */\n compoundsFound: number;\n /** Named entities skipped */\n entitiesSkipped: number;\n /** Unique lemmas extracted */\n uniqueLemmas: number;\n /** Processing time in milliseconds */\n timeMs: number;\n}\n\n/**\n * Run benchmark with a specific strategy and collect metrics.\n */\nexport function runBenchmark(\n text: string,\n lemmatizer: LemmatizerLike,\n strategy: ProcessingStrategy,\n resources: {\n bigrams?: BigramProvider;\n compoundSplitter?: CompoundSplitter;\n } = {}\n): ProcessingMetrics {\n const start = performance.now();\n\n let processed: ProcessedToken[];\n let lemmas: Set<string>;\n\n switch (strategy) {\n case \"naive\": {\n // Simple whitespace split + lemmatize\n const tokens = text.split(/\\s+/).filter((t) => t.length > 0);\n const naiveProcessed: ProcessedToken[] = [];\n\n for (const token of tokens) {\n const cleaned = token.replace(/^[^\\p{L}\\p{N}]+|[^\\p{L}\\p{N}]+$/gu, \"\");\n if (cleaned) {\n const tokenLemmas = lemmatizer.lemmatize(cleaned);\n naiveProcessed.push({\n original: cleaned,\n kind: \"word\",\n lemmas: tokenLemmas,\n isEntity: false,\n disambiguated: tokenLemmas[0],\n confidence: tokenLemmas.length === 1 ? 1.0 : 0.5,\n });\n }\n }\n processed = naiveProcessed;\n lemmas = new Set(naiveProcessed.map((p) => p.disambiguated!).filter(Boolean));\n break;\n }\n\n case \"tokenized\": {\n // tokenize-is + lemmatize word tokens\n processed = processText(text, lemmatizer);\n lemmas = new Set(\n processed\n .filter((p) => p.kind === \"word\" && p.lemmas.length > 0)\n .map((p) => p.lemmas[0])\n );\n break;\n }\n\n case \"disambiguated\": {\n // tokenized + bigram disambiguation\n processed = processText(text, lemmatizer, {\n bigrams: resources.bigrams,\n });\n lemmas = extractIndexableLemmas(text, lemmatizer, {\n bigrams: resources.bigrams,\n });\n break;\n }\n\n case \"full\": {\n // disambiguated + compounds\n processed = processText(text, lemmatizer, {\n bigrams: resources.bigrams,\n compoundSplitter: resources.compoundSplitter,\n });\n lemmas = extractIndexableLemmas(text, lemmatizer, {\n bigrams: resources.bigrams,\n compoundSplitter: resources.compoundSplitter,\n });\n break;\n }\n }\n\n const timeMs = performance.now() - start;\n\n // Calculate metrics\n const wordTokens = processed.filter((p) => p.kind === \"word\");\n const wordCount = wordTokens.length;\n\n const lemmatizedCount = wordTokens.filter((p) => {\n // Considered lemmatized if not returned as-is\n return (\n p.lemmas.length > 0 &&\n !(p.lemmas.length === 1 && p.lemmas[0] === p.original.toLowerCase())\n );\n }).length;\n\n const ambiguousCount = wordTokens.filter((p) => p.lemmas.length > 1).length;\n\n const confidences = wordTokens\n .filter((p) => p.confidence !== undefined)\n .map((p) => p.confidence!);\n const avgConfidence =\n confidences.length > 0\n ? confidences.reduce((a, b) => a + b, 0) / confidences.length\n : 0;\n\n const compoundsFound = wordTokens.filter((p) => p.compoundSplit?.isCompound).length;\n const entitiesSkipped = processed.filter((p) => p.isEntity).length;\n\n return {\n wordCount,\n lemmatizedCount,\n coverage: wordCount > 0 ? lemmatizedCount / wordCount : 0,\n ambiguousCount,\n ambiguityRate: wordCount > 0 ? ambiguousCount / wordCount : 0,\n avgConfidence,\n compoundsFound,\n entitiesSkipped,\n uniqueLemmas: lemmas.size,\n timeMs,\n };\n}\n"],"mappings":"uCAUA,MAAa,EAAe,IAAI,IAAI,8rIAuEnC,CAAC,CAKF,SAAgB,EAAW,EAAuB,CAChD,OAAO,EAAa,IAAI,EAAK,aAAa,CAAC,CAa7C,MAAa,EAAiD,IAAI,IAAI,CAEpE,CAAC,IAAK,IAAI,IAAI,CAAC,KAAM,KAAK,CAAC,CAAC,CAE5B,CAAC,MAAO,IAAI,IAAI,CAAC,KAAM,KAAK,CAAC,CAAC,CAE9B,CAAC,KAAM,IAAI,IAAI,CAAC,KAAM,KAAK,CAAC,CAAC,CAE7B,CAAC,MAAO,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CAExB,CAAC,KAAM,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CAEvB,CAAC,MAAO,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CAExB,CAAC,OAAQ,IAAI,IAAI,CAAC,KAAM,KAAK,CAAC,CAAC,CAE/B,CAAC,QAAS,IAAI,IAAI,CAAC,KAAM,KAAK,CAAC,CAAC,CAEhC,CAAC,QAAS,IAAI,IAAI,CAAC,KAAM,KAAK,CAAC,CAAC,CAEhC,CAAC,QAAS,IAAI,IAAI,CAAC,KAAM,KAAK,CAAC,CAAC,CAEhC,CAAC,OAAQ,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CAEzB,CAAC,MAAO,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CAExB,CAAC,KAAM,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CAEvB,CAAC,IAAK,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CACvB,CAAC,CAYF,SAAgB,EAAqB,EAAe,EAAuB,CACzE,IAAM,EAAa,EAAM,aAAa,CAGhC,EAAc,EAAqB,IAAI,EAAW,CAOxD,OANI,GAAe,EAEV,EAAY,IAAI,EAAI,CAItB,EAAa,IAAI,EAAW,CAMrC,SAAgB,EAAkC,EAAiB,CACjE,OAAO,EAAM,OAAQ,GAAM,CAAC,EAAW,EAAE,CAAC,CCnI5C,MAAM,EAAQ,WAGR,EAA2B,CAC/B,KACA,KACA,KACA,KACA,KACA,KACA,KACA,KACA,KACA,KACD,CAIK,EAAgD,CACpD,IAAA,GACA,KACA,KACA,MACA,KACD,CAIK,EAAoD,CACxD,IAAA,GACA,KACA,MACA,KACD,CAIK,EAAoD,CACxD,KACA,KACD,CAUD,IAAa,EAAb,MAAa,CAA2D,CACtE,OACA,WACA,aACA,aACA,YACA,YACA,aACA,QACA,gBACA,gBACA,gBACA,gBACA,YAEA,WACA,UACA,WACA,YACA,QAEA,QAAkB,IAAI,YAAY,QAAQ,CAE1C,YAAoB,EAAqB,CACvC,KAAK,OAAS,EACd,IAAM,EAAO,IAAI,SAAS,EAAO,CAG3B,EAAQ,EAAK,UAAU,EAAG,GAAK,CACrC,GAAI,IAAU,EACZ,MAAU,MACR,2CAA2C,EAAM,SAAS,GAAG,CAAC,UAAU,EAAM,SAAS,GAAG,GAC3F,CAIH,GADA,KAAK,QAAU,EAAK,UAAU,EAAG,GAAK,CAClC,KAAK,UAAY,GAAK,KAAK,UAAY,EACzC,MAAU,MAAM,wBAAwB,KAAK,UAAU,CAGzD,IAAM,EAAiB,EAAK,UAAU,EAAG,GAAK,CAC9C,KAAK,WAAa,EAAK,UAAU,GAAI,GAAK,CAC1C,KAAK,UAAY,EAAK,UAAU,GAAI,GAAK,CACzC,KAAK,WAAa,EAAK,UAAU,GAAI,GAAK,CAC1C,KAAK,YAAc,EAAK,UAAU,GAAI,GAAK,CAI3C,IAAI,EAAS,GAGb,KAAK,WAAa,IAAI,WAAW,EAAQ,EAAQ,EAAe,CAChE,GAAU,EAGV,KAAK,aAAe,IAAI,YAAY,EAAQ,EAAQ,KAAK,WAAW,CACpE,GAAU,KAAK,WAAa,EAG5B,KAAK,aAAe,IAAI,WAAW,EAAQ,EAAQ,KAAK,WAAW,CACnE,GAAU,KAAK,WAEf,EAAU,EAAS,EAAK,GAGxB,KAAK,YAAc,IAAI,YAAY,EAAQ,EAAQ,KAAK,UAAU,CAClE,GAAU,KAAK,UAAY,EAG3B,KAAK,YAAc,IAAI,WAAW,EAAQ,EAAQ,KAAK,UAAU,CACjE,GAAU,KAAK,UAEf,EAAU,EAAS,EAAK,GAGxB,KAAK,aAAe,IAAI,YAAY,EAAQ,EAAQ,KAAK,UAAY,EAAE,CACvE,IAAW,KAAK,UAAY,GAAK,EAGjC,KAAK,QAAU,IAAI,YAAY,EAAQ,EAAQ,KAAK,WAAW,CAC/D,GAAU,KAAK,WAAa,EAG5B,KAAK,gBAAkB,IAAI,YAAY,EAAQ,EAAQ,KAAK,YAAY,CACxE,GAAU,KAAK,YAAc,EAG7B,KAAK,gBAAkB,IAAI,WAAW,EAAQ,EAAQ,KAAK,YAAY,CACvE,GAAU,KAAK,YAEf,EAAU,EAAS,EAAK,GAGxB,KAAK,gBAAkB,IAAI,YAAY,EAAQ,EAAQ,KAAK,YAAY,CACxE,GAAU,KAAK,YAAc,EAG7B,KAAK,gBAAkB,IAAI,WAAW,EAAQ,EAAQ,KAAK,YAAY,CACvE,GAAU,KAAK,YAEf,EAAU,EAAS,EAAK,GAGxB,KAAK,YAAc,IAAI,YAAY,EAAQ,EAAQ,KAAK,YAAY,CAMtE,aAAa,KACX,EACA,EAAmC,EAAE,CACV,CAE3B,IAAM,EAAW,MADD,EAAQ,OAAS,OACF,EAAI,CAEnC,GAAI,CAAC,EAAS,GACZ,MAAU,MAAM,+BAA+B,EAAS,SAAS,CAInE,OAAO,IAAI,EADI,MAAM,EAAS,aAAa,CACR,CAMrC,OAAO,eAAe,EAAuC,CAC3D,OAAO,IAAI,EAAiB,EAAO,CAMrC,UAAkB,EAAgB,EAAwB,CACxD,OAAO,KAAK,QAAQ,OAAO,KAAK,WAAW,SAAS,EAAQ,EAAS,EAAO,CAAC,CAM/E,SAAiB,EAAuB,CACtC,OAAO,KAAK,UAAU,KAAK,aAAa,GAAQ,KAAK,aAAa,GAAO,CAM3E,QAAgB,EAAuB,CACrC,OAAO,KAAK,UAAU,KAAK,YAAY,GAAQ,KAAK,YAAY,GAAO,CAOzE,SAAiB,EAAsB,CACrC,IAAI,EAAO,EACP,EAAQ,KAAK,UAAY,EAE7B,KAAO,GAAQ,GAAO,CACpB,IAAM,EAAO,EAAO,IAAW,EACzB,EAAU,KAAK,QAAQ,EAAI,CAEjC,GAAI,IAAY,EACd,OAAO,EAEL,EAAU,EACZ,EAAO,EAAM,EAEb,EAAQ,EAAM,EAIlB,MAAO,GAQT,UAAU,EAAc,EAAkC,EAAE,CAAY,CACtE,IAAM,EAAa,EAAK,aAAa,CAC/B,EAAM,KAAK,SAAS,EAAW,CAErC,GAAI,IAAQ,GACV,MAAO,CAAC,EAAW,CAGrB,IAAM,EAAQ,KAAK,aAAa,GAC1B,EAAM,KAAK,aAAa,EAAM,GAE9B,CAAE,aAAc,EAChB,EAAO,IAAI,IACX,EAAmB,EAAE,CAE3B,IAAK,IAAI,EAAI,EAAO,EAAI,EAAK,IAAK,CAChC,GAAM,CAAE,WAAU,WAAY,KAAK,YAAY,KAAK,QAAQ,GAAG,CACzD,EAAM,EAAY,GAExB,GAAI,GAAa,IAAQ,EACvB,SAGF,IAAM,EAAQ,KAAK,SAAS,EAAS,CAChC,EAAK,IAAI,EAAM,GAClB,EAAK,IAAI,EAAM,CACf,EAAO,KAAK,EAAM,EAQtB,OAJI,EAAO,SAAW,EACb,CAAC,EAAW,CAGd,EAQT,YAAoB,EAMlB,CAWA,OAVI,KAAK,UAAY,EACZ,CACL,SAAU,IAAU,EACpB,QAAS,EAAQ,GACjB,SAAU,EACV,WAAY,EACZ,WAAY,EACb,CAGI,CACL,SAAU,IAAU,GACpB,QAAS,EAAQ,GACjB,SAAW,IAAU,EAAK,EAC1B,WAAa,IAAU,EAAK,EAC5B,WAAa,IAAU,EAAK,EAC7B,CAOH,iBAAiB,EAA8B,CAC7C,IAAM,EAAa,EAAK,aAAa,CAC/B,EAAM,KAAK,SAAS,EAAW,CAErC,GAAI,IAAQ,GACV,MAAO,EAAE,CAGX,IAAM,EAAQ,KAAK,aAAa,GAC1B,EAAM,KAAK,aAAa,EAAM,GAC9B,EAAO,IAAI,IACX,EAAyB,EAAE,CAEjC,IAAK,IAAI,EAAI,EAAO,EAAI,EAAK,IAAK,CAChC,GAAM,CAAE,WAAU,WAAY,KAAK,YAAY,KAAK,QAAQ,GAAG,CACzD,EAAQ,KAAK,SAAS,EAAS,CAC/B,EAAM,EAAY,IAAa,GAC/B,EAAM,GAAG,EAAM,GAAG,IAEnB,EAAK,IAAI,EAAI,GAChB,EAAK,IAAI,EAAI,CACb,EAAO,KAAK,CAAE,QAAO,MAAK,CAAC,EAI/B,OAAO,EAOT,mBAAmB,EAAgC,CACjD,IAAM,EAAa,EAAK,aAAa,CAC/B,EAAM,KAAK,SAAS,EAAW,CAErC,GAAI,IAAQ,GACV,MAAO,EAAE,CAGX,IAAM,EAAQ,KAAK,aAAa,GAC1B,EAAM,KAAK,aAAa,EAAM,GAC9B,EAA2B,EAAE,CAEnC,IAAK,IAAI,EAAI,EAAO,EAAI,EAAK,IAAK,CAChC,GAAM,CAAE,WAAU,UAAS,WAAU,aAAY,cAC/C,KAAK,YAAY,KAAK,QAAQ,GAAG,CAE7B,EAAuB,EAAE,CACzB,EAAU,EAAa,GACvB,EAAY,EAAe,GAC3B,EAAY,EAAe,GAE7B,IAAS,EAAM,KAAO,GACtB,IAAW,EAAM,OAAS,GAC1B,IAAW,EAAM,OAAS,GAE9B,EAAO,KAAK,CACV,MAAO,KAAK,SAAS,EAAS,CAC9B,IAAK,EAAY,IAAa,GAC9B,MAAO,OAAO,KAAK,EAAM,CAAC,OAAS,EAAI,EAAQ,IAAA,GAChD,CAAC,CAGJ,OAAO,EAMT,kBAA4B,CAC1B,OAAO,KAAK,SAAW,EAMzB,YAAqB,CACnB,OAAO,KAAK,QAMd,WAAmB,EAAe,EAAuB,CACvD,IAAI,EAAO,EACP,EAAQ,KAAK,YAAc,EAE/B,KAAO,GAAQ,GAAO,CACpB,IAAM,EAAO,EAAO,IAAW,EACzB,EAAQ,KAAK,UACjB,KAAK,gBAAgB,GACrB,KAAK,gBAAgB,GACtB,CAED,GAAI,EAAQ,EACV,EAAO,EAAM,UACJ,EAAQ,EACjB,EAAQ,EAAM,MACT,CAEL,IAAM,EAAQ,KAAK,UACjB,KAAK,gBAAgB,GACrB,KAAK,gBAAgB,GACtB,CAED,GAAI,IAAU,EACZ,OAAO,EAEL,EAAQ,EACV,EAAO,EAAM,EAEb,EAAQ,EAAM,GAKpB,MAAO,GAOT,WAAW,EAAe,EAAuB,CAC/C,IAAM,EAAM,KAAK,WAAW,EAAM,aAAa,CAAE,EAAM,aAAa,CAAC,CACrE,OAAO,IAAQ,GAAK,EAAI,KAAK,YAAY,GAO3C,KAAK,EAAe,EAAuB,CACzC,OAAO,KAAK,WAAW,EAAO,EAAM,CAMtC,QAAQ,EAAuB,CAC7B,OAAO,KAAK,SAAS,EAAK,aAAa,CAAC,GAAK,GAM/C,IAAI,iBAA0B,CAC5B,OAAO,KAAK,WAMd,IAAI,eAAwB,CAC1B,OAAO,KAAK,UAMd,IAAI,kBAA2B,CAC7B,OAAO,KAAK,YAMd,IAAI,YAAqB,CACvB,OAAO,KAAK,OAAO,WAOrB,cAAyB,CACvB,IAAM,EAAmB,EAAE,CAC3B,IAAK,IAAI,EAAI,EAAG,EAAI,KAAK,WAAY,IACnC,EAAO,KAAK,KAAK,SAAS,EAAE,CAAC,CAE/B,OAAO,IC5dX,MAAa,EAA6C,CAKxD,CACE,KAAM,IACN,OAAQ,KACR,KAAM,KACN,QAAS,gBACT,YAAa,iDACd,CACD,CACE,KAAM,IACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,uCACd,CAKD,CACE,KAAM,MACN,OAAQ,KACR,KAAM,KACN,QAAS,iBACT,YAAa,uCACd,CACD,CACE,KAAM,MACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,wCACd,CAGD,CACE,KAAM,KACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,yCACd,CAGD,CACE,KAAM,MACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,qCACd,CAGD,CACE,KAAM,KACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,8CACd,CAGD,CACE,KAAM,OACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,wCACd,CAGD,CACE,KAAM,QACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,0CACd,CAGD,CACE,KAAM,QACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,+CACd,CAGD,CACE,KAAM,QACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,0CACd,CAGD,CACE,KAAM,MACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,uCACd,CAGD,CACE,KAAM,MACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,uCACd,CAGD,CACE,KAAM,IACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,mCACd,CAGD,CACE,KAAM,KACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,wCACd,CACF,CAKD,SAAgB,EAAgB,EAAoC,CAClE,IAAM,EAAa,EAAK,aAAa,CACrC,OAAO,EAAqB,OAAQ,GAAM,EAAE,OAAS,EAAW,CAMlE,SAAgB,EAAuB,EAAuB,CAC5D,OAAO,EAAqB,KAAM,GAAM,EAAE,OAAS,EAAK,aAAa,CAAC,CCzJxE,MAAa,EAAuD,IAAI,IAAkC,CAExG,CAAC,IAAK,IAAI,IAAqB,CAAC,KAAM,MAAM,CAAC,CAAC,CAC9C,CAAC,IAAK,IAAI,IAAqB,CAAC,KAAM,MAAM,CAAC,CAAC,CAC9C,CAAC,MAAO,IAAI,IAAqB,CAAC,KAAM,MAAM,CAAC,CAAC,CAChD,CAAC,MAAO,IAAI,IAAqB,CAAC,KAAM,MAAM,CAAC,CAAC,CAChD,CAAC,QAAS,IAAI,IAAqB,CAAC,KAAM,MAAM,CAAC,CAAC,CAClD,CAAC,OAAQ,IAAI,IAAqB,CAAC,KAAM,MAAM,CAAC,CAAC,CACjD,CAAC,QAAS,IAAI,IAAqB,CAAC,KAAM,MAAM,CAAC,CAAC,CAGlD,CAAC,KAAM,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CACxC,CAAC,SAAU,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAC5C,CAAC,UAAW,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAC7C,CAAC,YAAa,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAG/C,CAAC,KAAM,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CACzC,CAAC,MAAO,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CAC1C,CAAC,MAAO,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CAC1C,CAAC,KAAM,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CACzC,CAAC,KAAM,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CACzC,CAAC,OAAQ,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CAC3C,CAAC,SAAU,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CAC7C,CAAC,OAAQ,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CAC3C,CAAC,WAAY,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CAC/C,CAAC,QAAS,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CAC5C,CAAC,QAAS,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAG3C,CAAC,MAAO,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CACzC,CAAC,KAAM,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CACxC,CAAC,QAAS,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAC3C,CAAC,QAAS,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAC3C,CAAC,OAAQ,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAC1C,CAAC,QAAS,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAC3C,CAAC,UAAW,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CAC9C,CAAC,QAAS,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAC3C,CAAC,MAAO,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CACzC,CAAC,SAAU,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAC7C,CAAC,CAOW,EAAsB,IAAI,IAAI,CACzC,KACA,KACA,OACA,MACA,MACA,MACA,MACA,OACA,MACA,MACD,CAAC,CAuBF,SAAgB,EACd,EACA,EACS,CAGT,OAFK,EACS,EAAkB,IAAI,EAAU,EAChC,IAAI,EAAa,EAAI,GAFT,GAe5B,SAAgB,EACd,EACA,EACyB,CAEzB,IAAM,EAAiB,EAAW,OAAQ,GAAM,EAAE,MAAQ,KAAK,CAC/D,GAAI,EAAe,SAAW,EAAG,OAAO,KAGxC,IAAK,IAAM,KAAQ,EACjB,IAAK,IAAM,KAAY,EACrB,GAAI,EAAS,OAAO,MAAQ,EAAc,EAAK,MAAO,EAAS,MAAM,KAAK,CACxE,MAAO,CACL,MAAO,EAAK,MACZ,IAAK,KACL,KAAM,QAAQ,EAAS,MAAM,OAC7B,WAAY,GACb,CAKP,OAAO,KAaT,SAAgB,EACd,EACA,EACyB,CACzB,GAAI,CAAC,EAAU,OAAO,KAEtB,IAAM,EAAY,EAAS,aAAa,CACxC,GAAI,CAAC,EAAoB,IAAI,EAAU,CAAE,OAAO,KAGhD,IAAM,EAAiB,EAAW,OAAQ,GAAM,EAAE,MAAQ,KAAK,CAW/D,OAVI,EAAe,SAAW,GAI1B,CADe,EAAW,KAAM,GAAM,EAAE,MAAQ,KAAK,CACjC,KAMjB,CACL,OAJoB,EAAe,KAAM,GAAM,EAAE,QAAU,OAAO,EAC7B,EAAe,IAG/B,MACrB,IAAK,KACL,KAAM,eACN,WAAY,IACb,CAsBH,SAAgB,EACd,EACA,EACA,EACyB,CACzB,GAAI,CAAC,GAAY,CAAC,GAAY,iBAAkB,OAAO,KAGvD,IAAM,EAAa,EAAW,iBAAiB,EAAS,CAClD,EAAgB,EAAW,KAAM,GAAM,EAAE,MAAQ,KAAK,CAC5D,GAAI,CAAC,EAAe,OAAO,KAG3B,IAAM,EAAoB,EAAW,KAAM,GAAM,EAAE,MAAQ,KAAK,CAG1D,EAAmB,EAAW,KAAM,GAAM,EAAE,MAAQ,KAAK,CAI/D,GAAI,GAAqB,EACvB,OAAO,KAIT,IAAM,EAAgB,EAAkB,IAAI,EAAc,MAAM,CAChE,GAAI,CAAC,EAAe,OAAO,KAG3B,IAAM,EAAiB,EAAW,OAAQ,GAAM,EAAE,MAAQ,KAAK,CAC/D,IAAK,IAAM,KAAQ,EACjB,GAAI,EAAK,OAAO,MAAQ,EAAc,IAAI,EAAK,MAAM,KAAK,CACxD,MAAO,CACL,MAAO,EAAK,MACZ,IAAK,KACL,KAAM,mBAAmB,EAAK,MAAM,OACpC,WAAY,GACb,CAIL,OAAO,KAiBT,SAAgB,EACd,EACA,EACA,EACA,EAA2C,KAClB,CAazB,OAXiB,EAAqB,EAAY,EAAc,EAItC,EAA8B,EAAY,EAAU,EAAW,EAIxE,EAAqB,EAAY,EAAS,EAGpD,KAMT,SAAgB,EAAmB,EAAwB,CACzD,OAAO,EAAkB,IAAI,EAAM,CAMrC,SAAgB,EAAiB,EAAqD,CACpF,OAAO,EAAkB,IAAI,EAAU,CClNzC,MAAM,EAAwC,CAC5C,KAAM,cACN,IAAI,EAAY,CAQd,OAPI,EAAW,SAAW,EACjB,CACL,MAAO,EAAW,GAAG,MACrB,IAAK,EAAW,GAAG,IACnB,WAAY,EACb,CAEI,MAEV,CAKK,EAA4C,CAChD,KAAM,mBACN,IAAI,EAAY,EAAS,EAAe,CACtC,GAAI,CAAC,EAAc,mBAAoB,OAAO,KAE9C,IAAK,IAAM,KAAQ,EAAsB,CACvC,IAAM,EAAQ,EAAU,EAAM,EAAY,EAAQ,CAClD,GAAI,EACF,MAAO,CACL,MAAO,EAAM,MACb,IAAK,EAAM,IACX,WAAY,IACb,CAGL,OAAO,MAEV,CAKD,SAAS,EACP,EACA,EACA,EACqB,CAErB,IAAM,EAAqB,EAAW,KACnC,GAAM,EAAE,MAAM,aAAa,GAAK,EAAK,KAAK,aAAa,EAAI,EAAE,MAAQ,EAAK,OAC5E,CACK,EAAe,EAAW,KAC7B,GAAM,EAAE,MAAM,aAAa,GAAK,EAAK,KAAK,aAAa,EAAI,EAAE,MAAQ,EAAK,KAC5E,CAED,GAAI,CAAC,GAAsB,CAAC,EAC1B,OAAO,KAIT,GAAI,EAAK,UAAY,cAAe,CAElC,IAAM,EAAO,EAAQ,SACrB,GAAI,GAAQ,kBAAkB,KAAK,EAAK,CACtC,OAAO,UAEA,EAAK,UAAY,cAAe,CAGzC,IAAM,EAAO,EAAQ,UAAU,aAAa,CAC5C,GAAI,GAAQ,CAAC,CAAC,QAAS,QAAS,KAAM,KAAM,MAAO,OAAQ,MAAO,MAAM,CAAC,SAAS,EAAK,CACrF,OAAO,UAEA,EAAK,UAAY,gBAAiB,CAE3C,IAAM,EAAO,EAAQ,UAAU,aAAa,CAE5C,GAAI,GADa,CAAC,KAAM,KAAM,OAAQ,MAAO,MAAO,MAAO,MAAO,OAAQ,MAAO,MAAM,CAClE,SAAS,EAAK,CACjC,OAAO,EAIX,OAAO,KAiIT,MAAM,EAAgC,CACpC,EACA,EA3H6C,CAC7C,KAAM,gBACN,IAAI,EAAY,EAAS,EAAe,CACtC,GAAI,CAAC,EAAc,gBAAiB,OAAO,KAG3C,IAAM,EAAwC,EAAW,IAAK,IAAO,CACnE,GAAG,EACH,MAAO,IAAA,GACR,EAAE,CAGH,GAAI,EAAc,WAAW,mBAAoB,CAC/C,IAAM,EAAc,EAAQ,UAAU,EAAQ,OAC9C,GAAI,EAAa,CACf,IAAM,EAAkB,EAAc,WAAW,mBAAmB,EAAY,CAEhF,EAAoB,OAAS,EAC7B,EAAoB,KAAK,GAAG,EAAgB,EAKhD,IAAM,EAAS,EACb,EACA,EAAQ,SACR,EAAQ,eAAiB,EAAE,CAC3B,EAAc,WACf,CAUD,OARI,EACK,CACL,MAAO,EAAO,MACd,IAAK,EAAO,IACZ,WAAY,EAAO,WACpB,CAGI,MAEV,CAKwC,CACvC,KAAM,eACN,IAAI,EAAY,EAAS,EAAe,CAEtC,GADI,CAAC,EAAc,SACf,EAAW,SAAW,EAAG,OAAO,KAEpC,IAAM,EAAuD,EAAE,CAE/D,IAAK,IAAM,KAAa,EAAY,CAClC,IAAI,EAAQ,EAGZ,GAAI,EAAQ,SAAU,CACpB,IAAM,EAAa,EAAQ,YAAc,EAAc,WAAW,UAAU,EAAQ,SAAS,CAC7F,IAAK,IAAM,KAAa,EAAY,CAClC,IAAM,EAAO,EAAc,QAAQ,KAAK,EAAW,EAAU,MAAM,CAC/D,EAAO,IACT,GAAS,KAAK,IAAI,EAAO,EAAE,CAAG,EAAc,aAMlD,GAAI,EAAQ,SAAU,CACpB,IAAM,EAAa,EAAQ,YAAc,EAAc,WAAW,UAAU,EAAQ,SAAS,CAC7F,IAAK,IAAM,KAAa,EAAY,CAClC,IAAM,EAAO,EAAc,QAAQ,KAAK,EAAU,MAAO,EAAU,CAC/D,EAAO,IACT,GAAS,KAAK,IAAI,EAAO,EAAE,CAAG,EAAc,cAKlD,EAAO,KAAK,CAAE,YAAW,QAAO,CAAC,CAOnC,GAHA,EAAO,MAAM,EAAG,IAAM,EAAE,MAAQ,EAAE,MAAM,CAGpC,EAAO,OAAS,GAAK,EAAO,GAAG,MAAQ,EAAG,CAC5C,IAAM,EAAW,EAAO,GAAG,MACrB,EAAa,EAAO,QAAQ,EAAK,IAAM,EAAM,KAAK,IAAI,EAAE,MAAM,CAAE,EAAE,CAClE,EAAa,EAAa,EAAI,KAAK,IAAI,EAAS,CAAG,EAAa,GAEtE,MAAO,CACL,MAAO,EAAO,GAAG,UAAU,MAC3B,IAAK,EAAO,GAAG,UAAU,IACzB,aACD,CAGH,OAAO,MAEV,CAK0C,CACzC,KAAM,WACN,IAAI,EAAY,CAQd,OAPI,EAAW,OAAS,EACf,CACL,MAAO,EAAW,GAAG,MACrB,IAAK,EAAW,GAAG,IACnB,WAAY,EAAI,EAAW,OAC5B,CAEI,MAEV,CAWA,CAKD,IAAa,EAAb,KAA2B,CACzB,WACA,QACA,WACA,YACA,mBACA,gBAEA,YACE,EACA,EAAiC,KACjC,EAAgC,EAAE,CAClC,CACA,KAAK,WAAa,EAClB,KAAK,QAAU,EACf,KAAK,WAAa,EAAQ,YAAc,EACxC,KAAK,YAAc,EAAQ,aAAe,EAC1C,KAAK,mBAAqB,EAAQ,oBAAsB,GACxD,KAAK,gBAAkB,EAAQ,iBAAmB,GAUpD,aACE,EACA,EACA,EACA,EAAkC,EAAE,CAChB,CAEpB,IAAI,EACJ,AAKE,EALE,KAAK,WAAW,iBACE,KAAK,WAAW,iBAAiB,EAAK,CAG3C,KAAK,WAAW,UAAU,EAAK,CACnB,IAAK,IAAO,CAAE,MAAO,EAAG,IAAK,KAAmB,EAAE,CAG/E,IAAM,EAAa,EAAkB,IAAK,GAAM,EAAE,MAAM,CAClD,EAAQ,EAGV,EACA,GAAY,KAAK,WAAW,qBAC9B,EAAgB,KAAK,WAAW,mBAAmB,EAAS,EAI9D,IAAM,EAAiC,CACrC,WACA,WACA,WAAY,EAAK,WACjB,WAAY,EAAK,WACjB,gBACA,UAAW,CAAC,EAAK,CACjB,MAAO,EACR,CAGD,IAAK,IAAM,KAAS,EAAQ,CAC1B,IAAM,EAAS,EAAM,IAAI,EAAmB,EAAS,KAAK,CAC1D,GAAI,EACF,MAAO,CACL,QACA,MAAO,EAAO,MACd,IAAK,EAAO,IACZ,aACA,oBACA,UAAW,EAAW,OAAS,EAC/B,WAAY,EAAO,WACnB,WAAY,EAAM,KACnB,CAKL,MAAO,CACL,QACA,MAAO,EAAK,aAAa,CACzB,aACA,oBACA,UAAW,GACX,WAAY,EACZ,WAAY,OACb,CASH,gBAAgB,EAAwC,CACtD,IAAM,EAAgC,EAAE,CAExC,IAAK,IAAI,EAAI,EAAG,EAAI,EAAO,OAAQ,IAAK,CACtC,IAAM,EAAO,EAAO,GACd,EAAW,EAAI,EAAI,EAAO,EAAI,GAAK,KACnC,EAAW,EAAI,EAAO,OAAS,EAAI,EAAO,EAAI,GAAK,KAEzD,EAAQ,KAAK,KAAK,aAAa,EAAM,EAAU,EAAS,CAAC,CAG3D,OAAO,EAST,cAAc,EAA+B,CAC3C,IAAM,EAAS,IAAI,IACb,EAAgB,KAAK,gBAAgB,EAAO,CAElD,IAAK,IAAM,KAAU,EACnB,EAAO,IAAI,EAAO,MAAM,CAG1B,OAAO,IAOX,SAAgB,EACd,EACA,EACA,EACA,EAGI,EAAE,CACO,CACb,GAAM,CAAE,WAAU,mBAAoB,EAGhC,EAAS,EACX,EAAS,EAAK,CACd,EACG,MAAM,MAAM,CACZ,OAAQ,GAAM,EAAE,OAAS,EAAE,CAC3B,IAAK,GAAM,EAAE,QAAQ,oCAAqC,GAAG,CAAC,CAC9D,OAAQ,GAAM,EAAE,OAAS,EAAE,CAI5B,EADgB,IAAI,EAAc,EAAY,EAAQ,CAC/B,cAAc,EAAO,CAGlD,GAAI,MACG,IAAM,KAAS,EACd,EAAa,IAAI,EAAM,EACzB,EAAO,OAAO,EAAM,CAK1B,OAAO,ECzcT,MAAa,EAA8C,CACzD,GAAI,OACJ,GAAI,OACJ,GAAI,YACJ,GAAI,SACJ,GAAI,cACJ,GAAI,UACJ,GAAI,cACJ,GAAI,UACJ,GAAI,UACJ,GAAI,eACL,CAKY,EAAiD,CAC5D,GAAI,UACJ,GAAI,UACJ,GAAI,cACJ,GAAI,YACJ,GAAI,aACJ,GAAI,UACJ,GAAI,aACJ,GAAI,UACJ,GAAI,UACJ,GAAI,YACL,CAoBY,EAA8C,CACzD,GAAI,aACJ,GAAI,aACJ,IAAK,SACL,GAAI,WACL,CAKY,EAAkD,CAC7D,GAAI,YACJ,IAAK,WACL,GAAI,SACL,CAKY,EAAkD,CAC7D,GAAI,WACJ,GAAI,SACL,CC3EY,EAAmB,IAAI,IAAI,sxBAkFvC,CAAC,CA2CI,EAAwB,IAAI,IAAI,+NAmCrC,CAAC,CAMI,EAAoB,IAAI,IAAI,CAChC,OACA,OACA,OACA,OACA,OACA,QACA,QACA,OACA,QACA,QACA,OACA,MACA,OACA,OACA,OACA,QACA,QACA,MACD,CAAC,CASI,EAAmB,CAAC,IAAK,IAAK,IAAI,CAExC,IAAa,EAAb,KAA8B,CAC5B,WACA,cACA,kBACA,YACA,KAEA,YACE,EACA,EACA,EAAmC,EAAE,CACrC,CACA,KAAK,WAAa,EAClB,KAAK,YAAc,EACnB,KAAK,cAAgB,EAAQ,eAAiB,EAC9C,KAAK,kBAAoB,EAAQ,mBAAqB,GACtD,KAAK,KAAO,EAAQ,MAAQ,WAM9B,QAAgB,EAAc,EAAiC,CAC7D,MAAO,CACL,OACA,MAAO,EACP,WAAY,EACZ,WAAY,EACZ,WAAY,GACb,CAWH,MAAM,EAA6B,CACjC,IAAM,EAAa,EAAK,aAAa,CAG/B,EAAe,KAAK,WAAW,UAAU,EAAK,CAC9C,EAAe,EAAa,IAAI,aAAa,CAMnD,GALI,GAAgB,EAAiB,IAAI,EAAa,EAKlD,EAAiB,IAAI,EAAW,CAClC,OAAO,KAAK,QAAQ,EAAM,EAAa,CAKzC,IAAM,EACJ,EAAa,OAAS,GAAK,EAAa,GAAG,aAAa,GAAK,EACzD,EAAgB,EAAa,SAAW,EAG9C,GAAI,KAAK,OAAS,eAIhB,OAHI,EAAK,SAAS,IAAI,CACb,KAAK,cAAc,EAAM,EAAa,CAExC,KAAK,QAAQ,EAAM,EAAa,CAYzC,GARI,KAAK,OAAS,YAAc,GAAe,GAEzC,EAAW,OAAS,IAMtB,EAAW,OAAS,KAAK,cAAgB,EAC3C,OAAO,KAAK,QAAQ,EAAM,EAAa,CAIzC,IAAM,EAIA,EAAE,CAER,IACE,IAAI,EAAI,KAAK,cACb,GAAK,EAAW,OAAS,KAAK,cAC9B,IACA,CACA,IAAM,EAAW,EAAW,MAAM,EAAG,EAAE,CACjC,EAAY,EAAW,MAAM,EAAE,CAG/B,EAAe,KAAK,SAAS,EAAU,EAAU,CAMvD,GALI,GACF,EAAW,KAAK,EAAa,CAI3B,KAAK,uBACF,IAAM,KAAU,EAEnB,GAAI,EAAS,SAAS,EAAO,EAAI,EAAS,OAAS,KAAK,cAAe,CACrE,IAAM,EAAc,EAAS,MAAM,EAAG,GAAG,CACnC,EAAS,KAAK,SAAS,EAAa,EAAU,CAChD,GAEF,EAAW,KAAK,CAAE,GAAG,EAAQ,MAAO,EAAO,MAAQ,IAAM,CAAC,GAOpE,GAAI,EAAW,SAAW,EACxB,OAAO,KAAK,QAAQ,EAAM,EAAa,CAIzC,EAAW,MAAM,EAAG,IAAM,EAAE,MAAQ,EAAE,MAAM,CAC5C,IAAM,EAAO,EAAW,GAGxB,GAAI,KAAK,OAAS,YAAc,GAAe,EAAK,MAAQ,GAC1D,OAAO,KAAK,QAAQ,EAAM,EAAa,CAIzC,IAAM,EAAQ,CAAC,GAAG,IAAI,IAAI,CAAC,GAAG,EAAK,UAAW,GAAG,EAAK,WAAW,CAAC,CAAC,CAInE,MAAO,CACL,OACA,QACA,WALiB,CAAC,GAAG,IAAI,IAAI,CAAC,GAAG,EAAO,EAAW,CAAC,CAAC,CAMrD,WAAY,KAAK,IAAI,EAAK,MAAO,EAAE,CACnC,WAAY,GACb,CAMH,cAAsB,EAAc,EAAuC,CACzE,IAAM,EAAQ,EAAK,MAAM,IAAI,CAAC,OAAQ,GAAM,EAAE,OAAS,EAAE,CACzD,GAAI,EAAM,OAAS,EACjB,OAAO,KAAK,QAAQ,EAAM,EAAa,CAGzC,IAAM,EAAqB,EAAE,CAC7B,IAAK,IAAM,KAAQ,EAAO,CACxB,IAAM,EAAS,KAAK,WAAW,UAAU,EAAK,CAC9C,EAAS,KAAK,GAAG,EAAO,CAG1B,IAAM,EAAc,CAAC,GAAG,IAAI,IAAI,EAAS,CAAC,CAG1C,MAAO,CACL,OACA,MAAO,EACP,WALiB,CAAC,GAAG,IAAI,IAAI,CAAC,GAAG,EAAa,EAAK,aAAa,CAAC,CAAC,CAAC,CAMnE,WAAY,GACZ,WAAY,GACb,CAGH,SACE,EACA,EACqE,CAErE,IAAM,EAAa,KAAK,WAAW,UAAU,EAAS,CAChD,EAAc,KAAK,WAAW,UAAU,EAAU,CAGlD,EAAY,CAAC,GAAG,IAAI,IAAI,EAAW,OAAQ,GAAM,KAAK,YAAY,IAAI,EAAE,CAAC,CAAC,CAAC,CAC3E,EAAa,CAAC,GAAG,IAAI,IAAI,EAAY,OAAQ,GAAM,KAAK,YAAY,IAAI,EAAE,CAAC,CAAC,CAAC,CAEnF,GAAI,EAAU,SAAW,GAAK,EAAW,SAAW,EAClD,OAAO,KAIT,IAAI,EAAQ,EAIN,EACJ,EAAI,KAAK,IAAI,EAAS,OAAS,EAAU,OAAO,EAAI,EAAS,OAAS,EAAU,QAClF,GAAS,EAAgB,GAIzB,IAAM,GAAa,EAAS,OAAS,EAAU,QAAU,EACnD,EAAc,KAAK,IAAI,EAAY,EAAG,EAAE,CAC9C,GAAS,EAAc,GAIC,EAAW,KAAM,GAAU,EAAsB,IAAI,EAAM,CAAC,GAElF,GAAS,IAKX,IAAM,EAAe,EAAU,KAAM,GAAU,EAAkB,IAAI,EAAM,CAAC,CACtE,EAAgB,EAAW,KAAM,GAAU,EAAkB,IAAI,EAAM,CAAC,CAgB9E,OAfI,GAAgB,EAElB,GAAS,GACA,CAAC,GAAgB,CAAC,IAE3B,GAAS,KAKP,EAAS,OAAS,GAAK,EAAU,OAAS,KAC5C,GAAS,KAIJ,CACL,UAAW,EACX,WAAY,EACZ,MAAO,KAAK,IAAI,EAAG,EAAM,CAC1B,CAOH,aAAa,EAAwB,CAEnC,OADc,KAAK,MAAM,EAAK,CACjB,aAQjB,SAAgB,EAAoB,EAA+B,CACjE,OAAO,IAAI,IAAI,EAAO,IAAK,GAAM,EAAE,aAAa,CAAC,CAAC,CC1bpD,MAAa,EAA4C,IAAI,IAAI,CAE/D,CAAC,YAAa,CAAE,MAAO,WAAY,WAAY,GAAM,IAAK,KAAM,CAAC,CACjE,CAAC,kBAAmB,CAAE,MAAO,gBAAiB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC5E,CAAC,SAAU,CAAE,MAAO,SAAU,WAAY,GAAM,IAAK,KAAM,CAAC,CAC5D,CAAC,iBAAkB,CAAE,MAAO,iBAAkB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC5E,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CAClE,CAAC,eAAgB,CAAE,MAAO,cAAe,WAAY,GAAM,IAAK,KAAM,CAAC,CACvE,CAAC,YAAa,CAAE,MAAO,cAAe,WAAY,GAAM,IAAK,KAAM,CAAC,CACpE,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CAClE,CAAC,cAAe,CAAE,MAAO,eAAgB,WAAY,GAAM,IAAK,KAAM,CAAC,CACvE,CAAC,cAAe,CAAE,MAAO,gBAAiB,WAAY,GAAM,IAAK,KAAM,CAAC,CACxE,CAAC,eAAgB,CAAE,MAAO,cAAe,WAAY,GAAM,IAAK,KAAM,CAAC,CACvE,CAAC,WAAY,CAAE,MAAO,UAAW,WAAY,GAAM,IAAK,KAAM,CAAC,CAC/D,CAAC,UAAW,CAAE,MAAO,UAAW,WAAY,GAAM,IAAK,KAAM,CAAC,CAC9D,CAAC,UAAW,CAAE,MAAO,SAAU,WAAY,GAAM,IAAK,KAAM,CAAC,CAC7D,CAAC,gBAAiB,CAAE,MAAO,iBAAkB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC3E,CAAC,mBAAoB,CAAE,MAAO,mBAAoB,WAAY,GAAM,IAAK,KAAM,CAAC,CAChF,CAAC,gBAAiB,CAAE,MAAO,iBAAkB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC3E,CAAC,kBAAmB,CAAE,MAAO,kBAAmB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC9E,CAAC,cAAe,CAAE,MAAO,eAAgB,WAAY,GAAM,IAAK,KAAM,CAAC,CACvE,CAAC,kBAAmB,CAAE,MAAO,iBAAkB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC7E,CAAC,kBAAmB,CAAE,MAAO,gBAAiB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC5E,CAAC,YAAa,CAAE,MAAO,aAAc,WAAY,GAAM,IAAK,KAAM,CAAC,CACnE,CAAC,gBAAiB,CAAE,MAAO,kBAAmB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC5E,CAAC,kBAAmB,CAAE,MAAO,mBAAoB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC/E,CAAC,aAAc,CAAE,MAAO,aAAc,WAAY,GAAM,IAAK,KAAM,CAAC,CACpE,CAAC,aAAc,CAAE,MAAO,aAAc,WAAY,GAAM,IAAK,KAAM,CAAC,CACpE,CAAC,aAAc,CAAE,MAAO,aAAc,WAAY,GAAM,IAAK,KAAM,CAAC,CACpE,CAAC,cAAe,CAAE,MAAO,aAAc,WAAY,GAAM,IAAK,KAAM,CAAC,CACrE,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CAClE,CAAC,eAAgB,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CACrE,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CAClE,CAAC,UAAW,CAAE,MAAO,UAAW,WAAY,GAAM,IAAK,KAAM,CAAC,CAC9D,CAAC,QAAS,CAAE,MAAO,QAAS,WAAY,GAAM,IAAK,KAAM,CAAC,CAC1D,CAAC,SAAU,CAAE,MAAO,SAAU,WAAY,GAAM,IAAK,KAAM,CAAC,CAC5D,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CAClE,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CAClE,CAAC,iBAAkB,CAAE,MAAO,kBAAmB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC7E,CAAC,iBAAkB,CAAE,MAAO,mBAAoB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC9E,CAAC,eAAgB,CAAE,MAAO,cAAe,WAAY,GAAM,IAAK,KAAM,CAAC,CACvE,CAAC,iBAAkB,CAAE,MAAO,eAAgB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC1E,CAAC,mBAAoB,CAAE,MAAO,mBAAoB,WAAY,GAAM,IAAK,KAAM,CAAC,CAChF,CAAC,gBAAiB,CAAE,MAAO,kBAAmB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC5E,CAAC,gBAAiB,CAAE,MAAO,eAAgB,WAAY,GAAM,IAAK,KAAM,CAAC,CACzE,CAAC,aAAc,CAAE,MAAO,cAAe,WAAY,GAAM,IAAK,KAAM,CAAC,CACrE,CAAC,UAAW,CAAE,MAAO,UAAW,WAAY,GAAM,IAAK,KAAM,CAAC,CAC9D,CAAC,aAAc,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CACnE,CAAC,aAAc,CAAE,MAAO,aAAc,WAAY,GAAM,IAAK,KAAM,CAAC,CACpE,CAAC,WAAY,CAAE,MAAO,WAAY,WAAY,GAAM,IAAK,KAAM,CAAC,CAChE,CAAC,iBAAkB,CAAE,MAAO,iBAAkB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC5E,CAAC,OAAQ,CAAE,MAAO,OAAQ,WAAY,GAAM,IAAK,KAAM,CAAC,CACxD,CAAC,UAAW,CAAE,MAAO,UAAW,WAAY,GAAM,IAAK,KAAM,CAAC,CAC9D,CAAC,cAAe,CAAE,MAAO,eAAgB,WAAY,GAAM,IAAK,KAAM,CAAC,CACvE,CAAC,gBAAiB,CAAE,MAAO,eAAgB,WAAY,GAAM,IAAK,KAAM,CAAC,CACzE,CAAC,kBAAmB,CAAE,MAAO,eAAgB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC3E,CAAC,cAAe,CAAE,MAAO,cAAe,WAAY,GAAM,IAAK,KAAM,CAAC,CACtE,CAAC,kBAAmB,CAAE,MAAO,oBAAqB,WAAY,GAAM,IAAK,KAAM,CAAC,CAChF,CAAC,aAAc,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CACnE,CAAC,YAAa,CAAE,MAAO,WAAY,WAAY,GAAM,IAAK,KAAM,CAAC,CACjE,CAAC,cAAe,CAAE,MAAO,cAAe,WAAY,GAAM,IAAK,KAAM,CAAC,CAGtE,CAAC,YAAa,CAAE,MAAO,UAAW,WAAY,GAAM,IAAK,KAAM,CAAC,CAChE,CAAC,eAAgB,CAAE,MAAO,cAAe,WAAY,GAAM,IAAK,KAAM,CAAC,CACvE,CAAC,YAAa,CAAE,MAAO,SAAU,WAAY,GAAM,IAAK,KAAM,CAAC,CAC/D,CAAC,aAAc,CAAE,MAAO,YAAa,WAAY,GAAO,IAAK,KAAM,CAAC,CACpE,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAO,IAAK,KAAM,CAAC,CACnE,CAAC,gBAAiB,CAAE,MAAO,iBAAkB,WAAY,GAAO,IAAK,KAAM,CAAC,CAC5E,CAAC,UAAW,CAAE,MAAO,UAAW,WAAY,GAAO,IAAK,KAAM,CAAC,CAC/D,CAAC,WAAY,CAAE,MAAO,YAAa,WAAY,GAAO,IAAK,KAAM,CAAC,CAClE,CAAC,WAAY,CAAE,MAAO,YAAa,WAAY,GAAO,IAAK,KAAM,CAAC,CAGlE,CAAC,aAAc,CAAE,MAAO,aAAc,WAAY,GAAO,IAAK,KAAM,CAAC,CACrE,CAAC,kBAAmB,CAAE,MAAO,iBAAkB,WAAY,GAAO,IAAK,KAAM,CAAC,CAC9E,CAAC,UAAW,CAAE,MAAO,SAAU,WAAY,GAAO,IAAK,KAAM,CAAC,CAC9D,CAAC,SAAU,CAAE,MAAO,WAAY,WAAY,GAAO,IAAK,KAAM,CAAC,CAC/D,CAAC,cAAe,CAAE,MAAO,cAAe,WAAY,GAAO,IAAK,KAAM,CAAC,CACvE,CAAC,eAAgB,CAAE,MAAO,eAAgB,WAAY,GAAO,IAAK,KAAM,CAAC,CACzE,CAAC,eAAgB,CAAE,MAAO,eAAgB,WAAY,GAAO,IAAK,KAAM,CAAC,CACzE,CAAC,eAAgB,CAAE,MAAO,eAAgB,WAAY,GAAO,IAAK,KAAM,CAAC,CACzE,CAAC,cAAe,CAAE,MAAO,cAAe,WAAY,GAAO,IAAK,KAAM,CAAC,CACvE,CAAC,cAAe,CAAE,MAAO,cAAe,WAAY,GAAO,IAAK,KAAM,CAAC,CACvE,CAAC,eAAgB,CAAE,MAAO,eAAgB,WAAY,GAAO,IAAK,KAAM,CAAC,CACzE,CAAC,aAAc,CAAE,MAAO,aAAc,WAAY,GAAO,IAAK,KAAM,CAAC,CACrE,CAAC,eAAgB,CAAE,MAAO,eAAgB,WAAY,GAAO,IAAK,KAAM,CAAC,CACzE,CAAC,aAAc,CAAE,MAAO,aAAc,WAAY,GAAO,IAAK,KAAM,CAAC,CACrE,CAAC,eAAgB,CAAE,MAAO,eAAgB,WAAY,GAAO,IAAK,KAAM,CAAC,CACzE,CAAC,WAAY,CAAE,MAAO,WAAY,WAAY,GAAO,IAAK,KAAM,CAAC,CACjE,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAO,IAAK,KAAM,CAAC,CACnE,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAO,IAAK,KAAM,CAAC,CACnE,CAAC,SAAU,CAAE,MAAO,SAAU,WAAY,GAAO,IAAK,KAAM,CAAC,CAC7D,CAAC,UAAW,CAAE,MAAO,UAAW,WAAY,GAAO,IAAK,KAAM,CAAC,CAG/D,CAAC,SAAU,CAAE,MAAO,SAAU,WAAY,GAAM,IAAK,KAAM,CAAC,CAG5D,CAAC,sBAAuB,CAAE,MAAO,sBAAuB,WAAY,GAAO,IAAK,SAAU,CAAC,CAC3F,CAAC,kBAAmB,CAAE,MAAO,kBAAmB,WAAY,GAAO,IAAK,SAAU,CAAC,CACnF,CAAC,OAAQ,CAAE,MAAO,OAAQ,WAAY,GAAO,IAAK,SAAU,CAAC,CAC7D,CAAC,OAAQ,CAAE,MAAO,OAAQ,WAAY,GAAO,IAAK,SAAU,CAAC,CAC9D,CAAC,CAMF,SAAgB,EACd,EACA,EACoD,CAEpD,IAAK,IAAI,EAAM,KAAK,IAAI,EAAG,EAAM,OAAS,EAAW,CAAE,GAAO,EAAG,IAAO,CAEtE,IAAM,EADc,EAAM,MAAM,EAAY,EAAa,EAAI,CAC/B,KAAK,IAAI,CAAC,aAAa,CAC/C,EAAS,EAAe,IAAI,EAAU,CAC5C,GAAI,EACF,MAAO,CAAE,SAAQ,UAAW,EAAK,CAGrC,OAAO,KAMT,SAAgB,EAAc,EAAuB,CACnD,OAAO,EAAe,IAAI,EAAK,aAAa,CAAC,CAM/C,SAAgB,EAAc,EAAwC,CACpE,OAAO,EAAe,IAAI,EAAK,aAAa,CAAC,CClJ/C,MAAM,EAAqB,IAAI,IAAI,CAAC,OAAO,CAAC,CAKtC,EAAe,IAAI,IAAI,CAAC,SAAU,UAAW,SAAS,CAAC,CAKvD,EAAa,IAAI,IAAI,CACzB,cACA,UACA,QACA,UACA,UACD,CAAC,CAmEF,SAAgB,EACd,EACA,EACA,EAA0B,EAAE,CACV,CAClB,GAAM,CACJ,UACA,mBACA,iBAAiB,GACjB,qBAAqB,IACnB,EAGE,EAAS,EAAS,EAAK,CAGvB,EAA4B,EAAE,CAC9B,EAAgD,EAAE,CAClD,EAAa,IAAI,IAEjB,EAAa,GAA0B,CAC3C,IAAM,EAAM,EAAI,aAAa,CACvB,EAAS,EAAW,IAAI,EAAI,CAClC,GAAI,EAAQ,OAAO,EACnB,IAAM,EAAS,EAAW,UAAU,EAAI,CAExC,OADA,EAAW,IAAI,EAAK,EAAO,CACpB,GAGT,IAAK,IAAI,EAAI,EAAG,EAAI,EAAO,OAAQ,IAAK,CACtC,IAAM,EAAQ,EAAO,GAGjB,MAAW,IAAI,EAAM,KAAK,CAK9B,IAAI,EAAa,IAAI,EAAM,KAAK,CAAE,CAChC,EAAQ,KAAK,CACX,SAAU,EAAM,MAAQ,GACxB,KAAM,EAAM,KACZ,OAAQ,EAAE,CACV,SAAU,GACX,CAAC,CACF,SAIF,GAAI,EAAM,OAAS,UAAY,EAAM,OAAS,UAAW,CACnD,GACF,EAAQ,KAAK,CACX,SAAU,EAAM,MAAQ,GACxB,KAAM,EAAM,KACZ,OAAQ,EAAE,CACV,SAAU,GACX,CAAC,CAEJ,SAIF,GAAI,EAAmB,IAAI,EAAM,KAAK,CAAE,CACtC,IAAM,EAAY,EAAM,MAAQ,GAC1B,EAAS,EAAU,EAAU,CAE7B,EAA4B,CAChC,SAAU,EACV,KAAM,EAAM,KACZ,SACA,SAAU,GACX,CAKK,EAAgB,EAAO,SAAW,GAAK,EAAO,KAAO,EAAU,aAAa,CAClF,GAAI,IAAqB,GAAsB,GAAgB,CAC7D,IAAM,EAAQ,EAAiB,MAAM,EAAU,CAC/C,GAAI,EAAM,WAAY,CACpB,EAAU,cAAgB,EAE1B,IAAM,EAAa,EAAM,MAAM,QAAS,GAAM,EAAU,EAAE,CAAC,CAC3D,EAAU,eAAiB,EAC3B,EAAU,OAAS,CAAC,GAAG,IAAI,IAAI,CAAC,GAAG,EAAQ,GAAG,EAAW,CAAC,CAAC,EAI/D,EAAQ,KAAK,EAAU,CACvB,EAAW,KAAK,CAAE,MAAO,EAAQ,OAAS,EAAG,QAAO,CAAC,CACrD,SAIF,EAAQ,KAAK,CACX,SAAU,EAAM,MAAQ,GACxB,KAAM,EAAM,KACZ,OAAQ,EAAE,CACV,SAAU,GACX,CAAC,EAIJ,GAAI,GAAW,EAAW,OAAS,EAAG,CACpC,IAAM,EAAgB,IAAI,EAAc,EAAY,EAAQ,CAE5D,IAAK,IAAI,EAAI,EAAG,EAAI,EAAW,OAAQ,IAAK,CAC1C,GAAM,CAAE,QAAO,SAAU,EAAW,GAC9B,EAAY,EAAI,EAAI,EAAW,EAAI,GAAG,MAAQ,KAC9C,EAAY,EAAI,EAAW,OAAS,EAAI,EAAW,EAAI,GAAG,MAAQ,KAElE,EAAS,EAAc,aAC3B,EAAM,MAAQ,GACd,GAAW,MAAQ,KACnB,GAAW,MAAQ,KACnB,CACE,WAAY,GAAW,KAAO,EAAU,EAAU,KAAK,CAAG,IAAA,GAC1D,WAAY,GAAW,KAAO,EAAU,EAAU,KAAK,CAAG,IAAA,GAC3D,CACF,CAED,EAAQ,GAAO,cAAgB,EAAO,MACtC,EAAQ,GAAO,WAAa,EAAO,iBAIrC,IAAK,GAAM,CAAE,WAAW,EAAY,CAClC,IAAM,EAAY,EAAQ,GACtB,EAAU,OAAO,OAAS,IAC5B,EAAU,cAAgB,EAAU,OAAO,GAC3C,EAAU,WAAa,EAAU,OAAO,SAAW,EAAI,EAAM,IAKnE,OAAO,EAWT,SAAgB,EACd,EACA,EACA,EAA0B,EAAE,CACf,CACb,GAAM,CACJ,kBAAkB,GAClB,qBAAqB,GACrB,yBAAyB,IACvB,EAEE,EAAY,EAAY,EAAM,EAAY,EAAQ,CAClD,EAAS,IAAI,IAMb,GAAgB,EAAe,IAC9B,EACD,EACK,EAAqB,EAAO,EAAI,CAElC,EAAa,IAAI,EAAM,CAJD,GAO/B,IAAK,IAAM,KAAS,EAEd,MAAM,SAIV,IAAI,MAEG,IAAM,KAAS,EAAM,OACnB,EAAa,EAAM,EACtB,EAAO,IAAI,EAAM,MAKjB,EAAM,gBAGH,EAAa,EAAM,cAAc,EACpC,EAAO,IAAI,EAAM,cAAc,EAMrC,GAAI,EAAM,eAAe,WAAY,CACnC,IAAM,EAAa,EAAM,eACrB,EAAM,eACN,EAAM,cAAc,MAAM,QAAS,GAAM,EAAW,UAAU,EAAE,CAAC,CACrE,IAAK,IAAM,KAAS,EACX,EAAa,EAAM,EACtB,EAAO,IAAI,EAAM,EAM3B,OAAO,EAqCT,SAAgB,EACd,EACA,EACA,EACA,EAGI,EAAE,CACa,CACnB,IAAM,EAAQ,YAAY,KAAK,CAE3B,EACA,EAEJ,OAAQ,EAAR,CACE,IAAK,QAAS,CAEZ,IAAM,EAAS,EAAK,MAAM,MAAM,CAAC,OAAQ,GAAM,EAAE,OAAS,EAAE,CACtD,EAAmC,EAAE,CAE3C,IAAK,IAAM,KAAS,EAAQ,CAC1B,IAAM,EAAU,EAAM,QAAQ,oCAAqC,GAAG,CACtE,GAAI,EAAS,CACX,IAAM,EAAc,EAAW,UAAU,EAAQ,CACjD,EAAe,KAAK,CAClB,SAAU,EACV,KAAM,OACN,OAAQ,EACR,SAAU,GACV,cAAe,EAAY,GAC3B,WAAY,EAAY,SAAW,EAAI,EAAM,GAC9C,CAAC,EAGN,EAAY,EACZ,EAAS,IAAI,IAAI,EAAe,IAAK,GAAM,EAAE,cAAe,CAAC,OAAO,QAAQ,CAAC,CAC7E,MAGF,IAAK,YAEH,EAAY,EAAY,EAAM,EAAW,CACzC,EAAS,IAAI,IACX,EACG,OAAQ,GAAM,EAAE,OAAS,QAAU,EAAE,OAAO,OAAS,EAAE,CACvD,IAAK,GAAM,EAAE,OAAO,GAAG,CAC3B,CACD,MAGF,IAAK,gBAEH,EAAY,EAAY,EAAM,EAAY,CACxC,QAAS,EAAU,QACpB,CAAC,CACF,EAAS,EAAuB,EAAM,EAAY,CAChD,QAAS,EAAU,QACpB,CAAC,CACF,MAGF,IAAK,OAEH,EAAY,EAAY,EAAM,EAAY,CACxC,QAAS,EAAU,QACnB,iBAAkB,EAAU,iBAC7B,CAAC,CACF,EAAS,EAAuB,EAAM,EAAY,CAChD,QAAS,EAAU,QACnB,iBAAkB,EAAU,iBAC7B,CAAC,CACF,MAIJ,IAAM,EAAS,YAAY,KAAK,CAAG,EAG7B,EAAa,EAAU,OAAQ,GAAM,EAAE,OAAS,OAAO,CACvD,EAAY,EAAW,OAEvB,EAAkB,EAAW,OAAQ,GAGvC,EAAE,OAAO,OAAS,GAClB,EAAE,EAAE,OAAO,SAAW,GAAK,EAAE,OAAO,KAAO,EAAE,SAAS,aAAa,EAErE,CAAC,OAEG,EAAiB,EAAW,OAAQ,GAAM,EAAE,OAAO,OAAS,EAAE,CAAC,OAE/D,EAAc,EACjB,OAAQ,GAAM,EAAE,aAAe,IAAA,GAAU,CACzC,IAAK,GAAM,EAAE,WAAY,CACtB,EACJ,EAAY,OAAS,EACjB,EAAY,QAAQ,EAAG,IAAM,EAAI,EAAG,EAAE,CAAG,EAAY,OACrD,EAEA,EAAiB,EAAW,OAAQ,GAAM,EAAE,eAAe,WAAW,CAAC,OACvE,EAAkB,EAAU,OAAQ,GAAM,EAAE,SAAS,CAAC,OAE5D,MAAO,CACL,YACA,kBACA,SAAU,EAAY,EAAI,EAAkB,EAAY,EACxD,iBACA,cAAe,EAAY,EAAI,EAAiB,EAAY,EAC5D,gBACA,iBACA,kBACA,aAAc,EAAO,KACrB,SACD"}
|
|
1
|
+
{"version":3,"file":"index.mjs","names":[],"sources":["../src/stopwords.ts","../src/binary-lemmatizer.ts","../src/disambiguation-rules.ts","../src/mini-grammar.ts","../src/disambiguate.ts","../src/types.ts","../src/bloom.ts","../src/compounds.ts","../src/phrases.ts","../src/pipeline.ts"],"sourcesContent":["/**\n * Icelandic stopwords for search indexing.\n *\n * Source: https://github.com/atlijas/icelandic-stop-words\n * Data from DIM (Database of Icelandic Morphology) by Árni Magnússon Institute.\n *\n * Includes all inflected forms of pronouns, prepositions, conjunctions, etc.\n */\n\n// prettier-ignore\nexport const STOPWORDS_IS = new Set([\n \"á\",\"að\",\"aðra\",\"aðrar\",\"aðrir\",\"af\",\"alla\",\"allan\",\"allar\",\"allir\",\n \"allnokkra\",\"allnokkrar\",\"allnokkrir\",\"allnokkru\",\"allnokkrum\",\"allnokkuð\",\n \"allnokkur\",\"allnokkurn\",\"allnokkurra\",\"allnokkurrar\",\"allnokkurri\",\"allnokkurs\",\n \"allnokkurt\",\"allra\",\"allrar\",\"allri\",\"alls\",\"allt\",\"alltað\",\"allur\",\"án\",\n \"andspænis\",\"annað\",\"annaðhvort\",\"annan\",\"annar\",\"annarra\",\"annarrar\",\"annarri\",\n \"annars\",\"árla\",\"ásamt\",\"auk\",\"austan\",\"austanundir\",\"austur\",\"báða\",\"báðar\",\n \"báðir\",\"báðum\",\"bæði\",\"bak\",\"beggja\",\"eða\",\"eður\",\"ef\",\"eftir\",\"ég\",\"ein\",\n \"eina\",\"einar\",\"einhver\",\"einhverja\",\"einhverjar\",\"einhverjir\",\"einhverju\",\n \"einhverjum\",\"einhvern\",\"einhverra\",\"einhverrar\",\"einhverri\",\"einhvers\",\"einir\",\n \"einn\",\"einna\",\"einnar\",\"einni\",\"eins\",\"einskis\",\"einu\",\"einum\",\"eitt\",\"eitthvað\",\n \"eitthvert\",\"ekkert\",\"ella\",\"ellegar\",\"en\",\"enda\",\"enga\",\"engan\",\"engar\",\"engin\",\n \"enginn\",\"engir\",\"engra\",\"engrar\",\"engri\",\"engu\",\"engum\",\"er\",\"fáein\",\"fáeina\",\n \"fáeinar\",\"fáeinir\",\"fáeinna\",\"fáeinum\",\"fjær\",\"fjarri\",\"flestalla\",\"flestallan\",\n \"flestallar\",\"flestallir\",\"flestallra\",\"flestallrar\",\"flestallri\",\"flestalls\",\n \"flestallt\",\"flestallur\",\"flestöll\",\"flestöllu\",\"flestöllum\",\"frá\",\"fram\",\"fyrir\",\n \"fyrst\",\"gagnstætt\",\"gagnvart\",\"gegn\",\"gegnt\",\"gegnum\",\"hana\",\"handa\",\"handan\",\n \"hann\",\"hans\",\"heldur\",\"hennar\",\"henni\",\"hið\",\"hin\",\"hina\",\"hinar\",\"hinir\",\"hinn\",\n \"hinna\",\"hinnar\",\"hinni\",\"hins\",\"hinu\",\"hinum\",\"hitt\",\"hjá\",\"honum\",\"hún\",\"hvað\",\n \"hvaða\",\"hvenær\",\"hver\",\"hverja\",\"hverjar\",\"hverjir\",\"hverju\",\"hverjum\",\"hvern\",\n \"hverra\",\"hverrar\",\"hverri\",\"hvers\",\"hvert\",\"hvílík\",\"hvílíka\",\"hvílíkan\",\n \"hvílíkar\",\"hvílíkir\",\"hvílíkra\",\"hvílíkrar\",\"hvílíkri\",\"hvílíks\",\"hvílíkt\",\n \"hvílíku\",\"hvílíkum\",\"hvílíkur\",\"hvor\",\"hvora\",\"hvorar\",\"hvorir\",\"hvorki\",\"hvorn\",\n \"hvorra\",\"hvorrar\",\"hvorri\",\"hvors\",\"hvort\",\"hvoru\",\"hvorug\",\"hvoruga\",\"hvorugan\",\n \"hvorugar\",\"hvorugir\",\"hvorugra\",\"hvorugrar\",\"hvorugri\",\"hvorugs\",\"hvorugt\",\n \"hvorugu\",\"hvorugum\",\"hvorugur\",\"hvorum\",\"í\",\"inn\",\"innan\",\"innanundir\",\"jafnframt\",\n \"jafnhliða\",\"kring\",\"kringum\",\"með\",\"meðal\",\"meðan\",\"meður\",\"mér\",\"mestalla\",\n \"mestallan\",\"mestallar\",\"mestallir\",\"mestallra\",\"mestallrar\",\"mestallri\",\"mestalls\",\n \"mestallt\",\"mestallur\",\"mestöll\",\"mestöllu\",\"mestöllum\",\"miðli\",\"mig\",\"milli\",\n \"millum\",\"mín\",\"mína\",\"mínar\",\"mínir\",\"minn\",\"minna\",\"minnar\",\"minni\",\"míns\",\n \"mínu\",\"mínum\",\"mitt\",\"mót\",\"móti\",\"nær\",\"nærri\",\"næst\",\"næstum\",\"nálægt\",\"né\",\n \"neðan\",\"nein\",\"neina\",\"neinar\",\"neinir\",\"neinn\",\"neinna\",\"neinnar\",\"neinni\",\n \"neins\",\"neinu\",\"neinum\",\"neitt\",\"nema\",\"niður\",\"nokkra\",\"nokkrar\",\"nokkrir\",\n \"nokkru\",\"nokkrum\",\"nokkuð\",\"nokkur\",\"nokkurn\",\"nokkurra\",\"nokkurrar\",\"nokkurri\",\n \"nokkurs\",\"nokkurt\",\"norðan\",\"nú\",\"öðru\",\"öðrum\",\"of\",\"ofan\",\"ofar\",\"og\",\"óháð\",\n \"okkar\",\"okkur\",\"öll\",\"öllu\",\"öllum\",\"önnur\",\"órafjarri\",\"oss\",\"sá\",\"sakir\",\n \"sama\",\"saman\",\"samar\",\"samfara\",\"samhliða\",\"sami\",\"samir\",\"samkvæmt\",\"samra\",\n \"samrar\",\"samri\",\"sams\",\"samskipa\",\"samt\",\"samtímis\",\"samur\",\"sem\",\"sér\",\"sérhvað\",\n \"sérhver\",\"sérhverja\",\"sérhverjar\",\"sérhverjir\",\"sérhverju\",\"sérhverjum\",\"sérhvern\",\n \"sérhverra\",\"sérhverrar\",\"sérhverri\",\"sérhvers\",\"sérhvert\",\"síðan\",\"síðla\",\"sig\",\n \"sín\",\"sína\",\"sínar\",\"sínhver\",\"sínhverja\",\"sínhverjar\",\"sínhverjir\",\"sínhverju\",\n \"sínhverjum\",\"sínhvern\",\"sínhverra\",\"sínhverrar\",\"sínhverri\",\"sínhvers\",\"sínhvert\",\n \"sínhvor\",\"sínhvora\",\"sínhvorar\",\"sínhvorir\",\"sínhvorn\",\"sínhvorra\",\"sínhvorrar\",\n \"sínhvorri\",\"sínhvors\",\"sínhvort\",\"sínhvoru\",\"sínhvorum\",\"sínir\",\"sinn\",\"sinna\",\n \"sinnar\",\"sinnhver\",\"sinnhverja\",\"sinnhverjar\",\"sinnhverjir\",\"sinnhverju\",\n \"sinnhverjum\",\"sinnhvern\",\"sinnhverra\",\"sinnhverrar\",\"sinnhverri\",\"sinnhvers\",\n \"sinnhvert\",\"sinnhvor\",\"sinnhvora\",\"sinnhvorar\",\"sinnhvorir\",\"sinnhvorn\",\n \"sinnhvorra\",\"sinnhvorrar\",\"sinnhvorri\",\"sinnhvors\",\"sinnhvort\",\"sinnhvoru\",\n \"sinnhvorum\",\"sinni\",\"síns\",\"sínu\",\"sínum\",\"sitt\",\"sitthvað\",\"sitthver\",\n \"sitthverja\",\"sitthverjar\",\"sitthverjir\",\"sitthverju\",\"sitthverjum\",\"sitthvern\",\n \"sitthverra\",\"sitthverrar\",\"sitthverri\",\"sitthvers\",\"sitthvert\",\"sitthvor\",\n \"sitthvora\",\"sitthvorar\",\"sitthvorir\",\"sitthvorn\",\"sitthvorra\",\"sitthvorrar\",\n \"sitthvorri\",\"sitthvors\",\"sitthvort\",\"sitthvoru\",\"sitthvorum\",\"sjálf\",\"sjálfa\",\n \"sjálfan\",\"sjálfar\",\"sjálfir\",\"sjálfra\",\"sjálfrar\",\"sjálfri\",\"sjálfs\",\"sjálft\",\n \"sjálfu\",\"sjálfum\",\"sjálfur\",\"slík\",\"slíka\",\"slíkan\",\"slíkar\",\"slíkir\",\"slíkra\",\n \"slíkrar\",\"slíkri\",\"slíks\",\"slíkt\",\"slíku\",\"slíkum\",\"slíkur\",\"snemma\",\"sökum\",\n \"söm\",\"sömu\",\"sömum\",\"sú\",\"sum\",\"suma\",\"suman\",\"sumar\",\"sumir\",\"sumra\",\"sumrar\",\n \"sumri\",\"sums\",\"sumt\",\"sumu\",\"sumum\",\"sumur\",\"sunnan\",\"svo\",\"til\",\"tráss\",\"um\",\n \"umfram\",\"umhverfis\",\"undan\",\"undir\",\"uns\",\"upp\",\"úr\",\"út\",\"utan\",\"útundan\",\n \"vegna\",\"vér\",\"vestan\",\"vestur\",\"vettugi\",\"við\",\"viður\",\"vor\",\"vora\",\"vorar\",\n \"vorir\",\"vorn\",\"vorra\",\"vorrar\",\"vorri\",\"vors\",\"vort\",\"voru\",\"vorum\",\"yðar\",\n \"yður\",\"yfir\",\"ykkar\",\"ykkur\",\"ýmis\",\"ýmiss\",\"ýmissa\",\"ýmissar\",\"ýmissi\",\"ýmist\",\n \"ýmsa\",\"ýmsan\",\"ýmsar\",\"ýmsir\",\"ýmsu\",\"ýmsum\",\"þá\",\"það\",\"þær\",\"þann\",\"þar\",\n \"þau\",\"þegar\",\"þeim\",\"þeir\",\"þeirra\",\"þeirrar\",\"þeirri\",\"þennan\",\"þér\",\"þess\",\n \"þessa\",\"þessar\",\"þessara\",\"þessarar\",\"þessari\",\"þessi\",\"þessir\",\"þessu\",\n \"þessum\",\"þetta\",\"þið\",\"þig\",\"þín\",\"þína\",\"þínar\",\"þínir\",\"þinn\",\"þinna\",\n \"þinnar\",\"þinni\",\"þíns\",\"þínu\",\"þínum\",\"þitt\",\"þó\",\"þónokkra\",\"þónokkrar\",\n \"þónokkrir\",\"þónokkru\",\"þónokkrum\",\"þónokkuð\",\"þónokkur\",\"þónokkurn\",\"þónokkurra\",\n \"þónokkurrar\",\"þónokkurri\",\"þónokkurs\",\"þónokkurt\",\"þótt\",\"þú\",\"því\",\"þvílík\",\n \"þvílíka\",\"þvílíkan\",\"þvílíkar\",\"þvílíkir\",\"þvílíkra\",\"þvílíkrar\",\"þvílíkri\",\n \"þvílíks\",\"þvílíkt\",\"þvílíku\",\"þvílíkum\",\"þvílíkur\",\n]);\n\n/**\n * Check if a word is a stopword.\n */\nexport function isStopword(word: string): boolean {\n return STOPWORDS_IS.has(word.toLowerCase());\n}\n\n/**\n * Contextual stopword rules for ambiguous words.\n *\n * Some words are stopwords in certain grammatical contexts but not others:\n * - \"á\" as preposition (fs) or adverb (ao) = stopword\n * - \"á\" as verb \"eiga\" (so) = NOT a stopword (\"Ég á bíl\")\n * - \"á\" as noun \"river\" (no) = NOT a stopword (\"við ána\")\n *\n * Map: lemma -> Set of POS codes where it IS a stopword\n */\nexport const CONTEXTUAL_STOPWORDS: Map<string, Set<string>> = new Map([\n // \"á\" - prep/adverb = stop, verb/noun = keep\n [\"á\", new Set([\"fs\", \"ao\"])],\n // \"við\" - prep = stop, pronoun \"we\" = stop, noun \"viður\" = keep\n [\"við\", new Set([\"fs\", \"fn\"])],\n // \"af\" - prep/adverb = stop\n [\"af\", new Set([\"fs\", \"ao\"])],\n // \"til\" - prep = stop\n [\"til\", new Set([\"fs\"])],\n // \"um\" - prep = stop\n [\"um\", new Set([\"fs\"])],\n // \"frá\" - prep = stop\n [\"frá\", new Set([\"fs\"])],\n // \"yfir\" - prep/adverb = stop\n [\"yfir\", new Set([\"fs\", \"ao\"])],\n // \"undir\" - prep/adverb = stop\n [\"undir\", new Set([\"fs\", \"ao\"])],\n // \"fyrir\" - prep/adverb = stop\n [\"fyrir\", new Set([\"fs\", \"ao\"])],\n // \"eftir\" - prep/adverb = stop\n [\"eftir\", new Set([\"fs\", \"ao\"])],\n // \"gegn\" - prep = stop\n [\"gegn\", new Set([\"fs\"])],\n // \"hjá\" - prep = stop\n [\"hjá\", new Set([\"fs\"])],\n // \"úr\" - prep = stop, noun \"úr\" (watch) = keep\n [\"úr\", new Set([\"fs\"])],\n // \"í\" - prep = stop\n [\"í\", new Set([\"fs\"])],\n]);\n\n/**\n * Check if a lemma is a stopword in a specific grammatical context.\n *\n * For ambiguous words, uses POS to determine stopword status.\n * For unambiguous words, falls back to standard stopword check.\n *\n * @param lemma - The lemmatized word\n * @param pos - Part of speech code (fs, ao, so, no, etc.)\n * @returns true if the word should be treated as a stopword\n */\nexport function isContextualStopword(lemma: string, pos?: string): boolean {\n const normalized = lemma.toLowerCase();\n\n // Check if this lemma has context-dependent rules\n const contextRule = CONTEXTUAL_STOPWORDS.get(normalized);\n if (contextRule && pos) {\n // Use the rule: stopword only if POS is in the stopword set\n return contextRule.has(pos);\n }\n\n // Fall back to standard stopword check\n return STOPWORDS_IS.has(normalized);\n}\n\n/**\n * Filter stopwords from an array of words/lemmas.\n */\nexport function removeStopwords<T extends string>(words: T[]): T[] {\n return words.filter((w) => !isStopword(w));\n}\n","/**\n * Binary format lemmatizer for efficient memory usage.\n *\n * Uses ArrayBuffer with TypedArray views and binary search for O(log n) lookups.\n * Target memory: ~70MB vs ~1.2GB for JS Map-based approach.\n *\n * Binary file format:\n * - Header (32 bytes): magic, version, counts\n * - String pool: all strings concatenated UTF-8\n * - Lemma index: offsets + lengths\n * - Word index: offsets + lengths (sorted alphabetically)\n * - Entry offsets: start/end of entries for each word\n * - Entries: packed lemmaIdx:20 + posCode:4\n * - Bigrams: word1/word2 offsets + lengths + frequencies (sorted)\n */\n\nimport type {\n WordClass,\n LemmaWithPOS,\n LemmaWithMorph,\n LemmatizerLike,\n BigramProvider,\n GrammaticalCase,\n GrammaticalGender,\n GrammaticalNumber,\n MorphFeatures,\n} from \"./types.js\";\n\nconst MAGIC = 0x4c454d41; // \"LEMA\"\n\n// POS code to string mapping (must match build-binary.py)\nconst CODE_TO_POS: WordClass[] = [\n \"no\",\n \"so\",\n \"lo\",\n \"ao\",\n \"fs\",\n \"fn\",\n \"st\",\n \"to\",\n \"gr\",\n \"uh\",\n];\n\n// Case code to string mapping (must match build-binary.py)\n// 0=none, 1=nf, 2=þf, 3=þgf, 4=ef\nconst CODE_TO_CASE: (GrammaticalCase | undefined)[] = [\n undefined, // 0 = none\n \"nf\", // 1 = nominative\n \"þf\", // 2 = accusative\n \"þgf\", // 3 = dative\n \"ef\", // 4 = genitive\n];\n\n// Gender code to string mapping (must match build-binary.py)\n// 0=none, 1=kk, 2=kvk, 3=hk\nconst CODE_TO_GENDER: (GrammaticalGender | undefined)[] = [\n undefined, // 0 = none\n \"kk\", // 1 = masculine\n \"kvk\", // 2 = feminine\n \"hk\", // 3 = neuter\n];\n\n// Number code to string mapping (must match build-binary.py)\n// 0=et/none, 1=ft\nconst CODE_TO_NUMBER: (GrammaticalNumber | undefined)[] = [\n \"et\", // 0 = singular (or none)\n \"ft\", // 1 = plural\n];\n\nexport interface BinaryLemmatizerOptions {\n fetch?: typeof fetch;\n}\n\nexport interface BinaryLemmatizeOptions {\n wordClass?: WordClass;\n}\n\nexport class BinaryLemmatizer implements LemmatizerLike, BigramProvider {\n private buffer: ArrayBuffer;\n private stringPool: Uint8Array;\n private lemmaOffsets: Uint32Array;\n private lemmaLengths: Uint8Array;\n private wordOffsets: Uint32Array;\n private wordLengths: Uint8Array;\n private entryOffsets: Uint32Array;\n private entries: Uint32Array;\n private bigramW1Offsets: Uint32Array;\n private bigramW1Lengths: Uint8Array;\n private bigramW2Offsets: Uint32Array;\n private bigramW2Lengths: Uint8Array;\n private bigramFreqs: Uint32Array;\n\n private lemmaCount: number;\n private wordCount: number;\n private entryCount: number;\n private bigramCount: number;\n private version: number;\n\n private decoder = new TextDecoder(\"utf-8\");\n\n private constructor(buffer: ArrayBuffer) {\n this.buffer = buffer;\n const view = new DataView(buffer);\n\n // Read header\n const magic = view.getUint32(0, true);\n if (magic !== MAGIC) {\n throw new Error(\n `Invalid binary format: expected magic 0x${MAGIC.toString(16)}, got 0x${magic.toString(16)}`\n );\n }\n\n this.version = view.getUint32(4, true);\n if (this.version !== 1 && this.version !== 2) {\n throw new Error(`Unsupported version: ${this.version}`);\n }\n\n const stringPoolSize = view.getUint32(8, true);\n this.lemmaCount = view.getUint32(12, true);\n this.wordCount = view.getUint32(16, true);\n this.entryCount = view.getUint32(20, true);\n this.bigramCount = view.getUint32(24, true);\n // reserved at 28\n\n // Calculate section offsets\n let offset = 32;\n\n // String pool\n this.stringPool = new Uint8Array(buffer, offset, stringPoolSize);\n offset += stringPoolSize;\n\n // Lemma offsets (u32 × lemmaCount)\n this.lemmaOffsets = new Uint32Array(buffer, offset, this.lemmaCount);\n offset += this.lemmaCount * 4;\n\n // Lemma lengths (u8 × lemmaCount)\n this.lemmaLengths = new Uint8Array(buffer, offset, this.lemmaCount);\n offset += this.lemmaCount;\n // Align to 4 bytes\n offset = (offset + 3) & ~3;\n\n // Word offsets (u32 × wordCount)\n this.wordOffsets = new Uint32Array(buffer, offset, this.wordCount);\n offset += this.wordCount * 4;\n\n // Word lengths (u8 × wordCount)\n this.wordLengths = new Uint8Array(buffer, offset, this.wordCount);\n offset += this.wordCount;\n // Align to 4 bytes\n offset = (offset + 3) & ~3;\n\n // Entry offsets (u32 × (wordCount + 1))\n this.entryOffsets = new Uint32Array(buffer, offset, this.wordCount + 1);\n offset += (this.wordCount + 1) * 4;\n\n // Entries (u32 × entryCount)\n this.entries = new Uint32Array(buffer, offset, this.entryCount);\n offset += this.entryCount * 4;\n\n // Bigram word1 offsets\n this.bigramW1Offsets = new Uint32Array(buffer, offset, this.bigramCount);\n offset += this.bigramCount * 4;\n\n // Bigram word1 lengths\n this.bigramW1Lengths = new Uint8Array(buffer, offset, this.bigramCount);\n offset += this.bigramCount;\n // Align to 4 bytes\n offset = (offset + 3) & ~3;\n\n // Bigram word2 offsets\n this.bigramW2Offsets = new Uint32Array(buffer, offset, this.bigramCount);\n offset += this.bigramCount * 4;\n\n // Bigram word2 lengths\n this.bigramW2Lengths = new Uint8Array(buffer, offset, this.bigramCount);\n offset += this.bigramCount;\n // Align to 4 bytes\n offset = (offset + 3) & ~3;\n\n // Bigram frequencies\n this.bigramFreqs = new Uint32Array(buffer, offset, this.bigramCount);\n }\n\n /**\n * Load binary lemmatizer from URL.\n */\n static async load(\n url: string,\n options: BinaryLemmatizerOptions = {}\n ): Promise<BinaryLemmatizer> {\n const fetchFn = options.fetch ?? fetch;\n const response = await fetchFn(url);\n\n if (!response.ok) {\n throw new Error(`Failed to load binary data: ${response.status}`);\n }\n\n const buffer = await response.arrayBuffer();\n return new BinaryLemmatizer(buffer);\n }\n\n /**\n * Load from ArrayBuffer (for Node.js or pre-loaded data).\n */\n static loadFromBuffer(buffer: ArrayBuffer): BinaryLemmatizer {\n return new BinaryLemmatizer(buffer);\n }\n\n /**\n * Get string from string pool.\n */\n private getString(offset: number, length: number): string {\n return this.decoder.decode(this.stringPool.subarray(offset, offset + length));\n }\n\n /**\n * Get lemma by index.\n */\n private getLemma(index: number): string {\n return this.getString(this.lemmaOffsets[index], this.lemmaLengths[index]);\n }\n\n /**\n * Get word by index.\n */\n private getWord(index: number): string {\n return this.getString(this.wordOffsets[index], this.wordLengths[index]);\n }\n\n /**\n * Binary search for word in sorted word array.\n * Returns index or -1 if not found.\n */\n private findWord(word: string): number {\n let left = 0;\n let right = this.wordCount - 1;\n\n while (left <= right) {\n const mid = (left + right) >>> 1;\n const midWord = this.getWord(mid);\n\n if (midWord === word) {\n return mid;\n }\n if (midWord < word) {\n left = mid + 1;\n } else {\n right = mid - 1;\n }\n }\n\n return -1;\n }\n\n /**\n * Look up possible lemmas for a word form.\n * Results are sorted by corpus frequency (most common first).\n * Duplicates are removed (same lemma with different morph features).\n */\n lemmatize(word: string, options: BinaryLemmatizeOptions = {}): string[] {\n const normalized = word.toLowerCase();\n const idx = this.findWord(normalized);\n\n if (idx === -1) {\n return [normalized];\n }\n\n const start = this.entryOffsets[idx];\n const end = this.entryOffsets[idx + 1];\n\n const { wordClass } = options;\n const seen = new Set<string>();\n const result: string[] = [];\n\n for (let i = start; i < end; i++) {\n const { lemmaIdx, posCode } = this.unpackEntry(this.entries[i]);\n const pos = CODE_TO_POS[posCode];\n\n if (wordClass && pos !== wordClass) {\n continue;\n }\n\n const lemma = this.getLemma(lemmaIdx);\n if (!seen.has(lemma)) {\n seen.add(lemma);\n result.push(lemma);\n }\n }\n\n if (result.length === 0) {\n return [normalized];\n }\n\n return result;\n }\n\n /**\n * Unpack entry based on binary format version.\n * Version 1: bits 0-3=pos, bits 4-23=lemmaIdx\n * Version 2: bits 0-3=pos, bits 4-6=case, bits 7-8=gender, bit 9=number, bits 10-29=lemmaIdx\n */\n private unpackEntry(entry: number): {\n lemmaIdx: number;\n posCode: number;\n caseCode: number;\n genderCode: number;\n numberCode: number;\n } {\n if (this.version === 1) {\n return {\n lemmaIdx: entry >>> 4,\n posCode: entry & 0xf,\n caseCode: 0,\n genderCode: 0,\n numberCode: 0,\n };\n }\n // Version 2\n return {\n lemmaIdx: entry >>> 10,\n posCode: entry & 0xf,\n caseCode: (entry >>> 4) & 0x7,\n genderCode: (entry >>> 7) & 0x3,\n numberCode: (entry >>> 9) & 0x1,\n };\n }\n\n /**\n * Look up lemmas with their word class (POS) tags.\n * Duplicates are removed (same lemma+pos with different morph features).\n */\n lemmatizeWithPOS(word: string): LemmaWithPOS[] {\n const normalized = word.toLowerCase();\n const idx = this.findWord(normalized);\n\n if (idx === -1) {\n return [];\n }\n\n const start = this.entryOffsets[idx];\n const end = this.entryOffsets[idx + 1];\n const seen = new Set<string>();\n const result: LemmaWithPOS[] = [];\n\n for (let i = start; i < end; i++) {\n const { lemmaIdx, posCode } = this.unpackEntry(this.entries[i]);\n const lemma = this.getLemma(lemmaIdx);\n const pos = CODE_TO_POS[posCode] ?? (\"\" as WordClass);\n const key = `${lemma}:${pos}`;\n\n if (!seen.has(key)) {\n seen.add(key);\n result.push({ lemma, pos });\n }\n }\n\n return result;\n }\n\n /**\n * Look up lemmas with word class and morphological features.\n * Only available with version 2 binary format.\n */\n lemmatizeWithMorph(word: string): LemmaWithMorph[] {\n const normalized = word.toLowerCase();\n const idx = this.findWord(normalized);\n\n if (idx === -1) {\n return [];\n }\n\n const start = this.entryOffsets[idx];\n const end = this.entryOffsets[idx + 1];\n const result: LemmaWithMorph[] = [];\n\n for (let i = start; i < end; i++) {\n const { lemmaIdx, posCode, caseCode, genderCode, numberCode } =\n this.unpackEntry(this.entries[i]);\n\n const morph: MorphFeatures = {};\n const caseVal = CODE_TO_CASE[caseCode];\n const genderVal = CODE_TO_GENDER[genderCode];\n const numberVal = CODE_TO_NUMBER[numberCode];\n\n if (caseVal) morph.case = caseVal;\n if (genderVal) morph.gender = genderVal;\n if (numberVal) morph.number = numberVal;\n\n result.push({\n lemma: this.getLemma(lemmaIdx),\n pos: CODE_TO_POS[posCode] ?? (\"\" as WordClass),\n morph: Object.keys(morph).length > 0 ? morph : undefined,\n });\n }\n\n return result;\n }\n\n /**\n * Check if morphological features are available (version 2+).\n */\n hasMorphFeatures(): boolean {\n return this.version >= 2;\n }\n\n /**\n * Get the binary format version.\n */\n getVersion(): number {\n return this.version;\n }\n\n /**\n * Binary search for bigram. Returns index or -1.\n */\n private findBigram(word1: string, word2: string): number {\n let left = 0;\n let right = this.bigramCount - 1;\n\n while (left <= right) {\n const mid = (left + right) >>> 1;\n const midW1 = this.getString(\n this.bigramW1Offsets[mid],\n this.bigramW1Lengths[mid]\n );\n\n if (midW1 < word1) {\n left = mid + 1;\n } else if (midW1 > word1) {\n right = mid - 1;\n } else {\n // word1 matches, compare word2\n const midW2 = this.getString(\n this.bigramW2Offsets[mid],\n this.bigramW2Lengths[mid]\n );\n\n if (midW2 === word2) {\n return mid;\n }\n if (midW2 < word2) {\n left = mid + 1;\n } else {\n right = mid - 1;\n }\n }\n }\n\n return -1;\n }\n\n /**\n * Get bigram frequency.\n * @returns Frequency count, or 0 if not found\n */\n bigramFreq(word1: string, word2: string): number {\n const idx = this.findBigram(word1.toLowerCase(), word2.toLowerCase());\n return idx === -1 ? 0 : this.bigramFreqs[idx];\n }\n\n /**\n * Alias for bigramFreq to satisfy BigramProvider interface.\n * @returns Frequency count, or 0 if not found\n */\n freq(word1: string, word2: string): number {\n return this.bigramFreq(word1, word2);\n }\n\n /**\n * Check if a word is known to the lemmatizer.\n */\n isKnown(word: string): boolean {\n return this.findWord(word.toLowerCase()) !== -1;\n }\n\n /**\n * Get the total number of lemmas in the database.\n */\n get lemmaCountValue(): number {\n return this.lemmaCount;\n }\n\n /**\n * Get the total number of word forms.\n */\n get wordFormCount(): number {\n return this.wordCount;\n }\n\n /**\n * Get the total number of bigrams.\n */\n get bigramCountValue(): number {\n return this.bigramCount;\n }\n\n /**\n * Get raw buffer size (approximate memory usage).\n */\n get bufferSize(): number {\n return this.buffer.byteLength;\n }\n\n /**\n * Get all unique lemmas from the binary data.\n * Useful for compound splitting.\n */\n getAllLemmas(): string[] {\n const lemmas: string[] = [];\n for (let i = 0; i < this.lemmaCount; i++) {\n lemmas.push(this.getLemma(i));\n }\n return lemmas;\n }\n}\n","/**\n * Disambiguation rules for Icelandic.\n *\n * Based on GreynirEngine's Prefs.conf and linguistic patterns.\n * These rules help resolve ambiguous words by considering context.\n */\n\nimport type { WordClass } from \"./types.js\";\n\n/**\n * A disambiguation preference rule.\n *\n * When the word matches and the context condition is met,\n * prefer `prefer` POS over `over` POS.\n */\nexport interface DisambiguationRule {\n /** The ambiguous word (lowercase) */\n word: string;\n /** Preferred part of speech in this context */\n prefer: WordClass;\n /** Dispreferred part of speech */\n over: WordClass;\n /** Context condition for when to apply this rule */\n context: \"before_noun\" | \"before_verb\" | \"after_pronoun\" | \"sentence_start\" | \"any\";\n /** Optional description */\n description?: string;\n}\n\n/**\n * Disambiguation rules extracted from Greynir's patterns.\n *\n * Format: { word, prefer, over, context }\n *\n * Common patterns:\n * - \"á\" as preposition (fs) when before noun, as verb \"eiga\" (so) after pronoun\n * - \"við\" as preposition (fs) when before noun, as pronoun (fn) at sentence start\n */\nexport const DISAMBIGUATION_RULES: DisambiguationRule[] = [\n // \"á\" - one of the most ambiguous words\n // Preposition: \"á borðinu\", \"á Íslandi\"\n // Verb (eiga): \"Ég á bíl\", \"Hún á hest\"\n // Noun (river): \"við ána\"\n {\n word: \"á\",\n prefer: \"so\", // verb \"eiga\"\n over: \"fs\", // preposition\n context: \"after_pronoun\",\n description: \"á after pronoun = verb 'eiga' (I own, you own)\",\n },\n {\n word: \"á\",\n prefer: \"fs\", // preposition\n over: \"so\", // verb\n context: \"before_noun\",\n description: \"á before noun = preposition (on, at)\",\n },\n\n // \"við\" - preposition vs pronoun\n // Preposition: \"við gluggann\", \"við borðið\"\n // Pronoun: \"Við erum hér\" (we are here)\n {\n word: \"við\",\n prefer: \"fn\", // pronoun \"we\"\n over: \"fs\", // preposition\n context: \"sentence_start\",\n description: \"við at sentence start = pronoun 'we'\",\n },\n {\n word: \"við\",\n prefer: \"fs\", // preposition\n over: \"fn\", // pronoun\n context: \"before_noun\",\n description: \"við before noun = preposition 'by/at'\",\n },\n\n // \"af\" - preposition vs adverb\n {\n word: \"af\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"af before noun = preposition 'of/from'\",\n },\n\n // \"til\" - preposition\n {\n word: \"til\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"til before noun = preposition 'to'\",\n },\n\n // \"um\" - preposition vs adverb\n {\n word: \"um\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"um before noun = preposition 'about/around'\",\n },\n\n // \"yfir\" - preposition vs adverb\n {\n word: \"yfir\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"yfir before noun = preposition 'over'\",\n },\n\n // \"undir\" - preposition vs adverb\n {\n word: \"undir\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"undir before noun = preposition 'under'\",\n },\n\n // \"fyrir\" - preposition vs adverb\n {\n word: \"fyrir\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"fyrir before noun = preposition 'for/before'\",\n },\n\n // \"eftir\" - preposition vs adverb\n {\n word: \"eftir\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"eftir before noun = preposition 'after'\",\n },\n\n // \"frá\" - preposition\n {\n word: \"frá\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"frá before noun = preposition 'from'\",\n },\n\n // \"með\" - preposition vs adverb\n {\n word: \"með\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"með before noun = preposition 'with'\",\n },\n\n // \"í\" - preposition\n {\n word: \"í\",\n prefer: \"fs\",\n over: \"ao\",\n context: \"before_noun\",\n description: \"í before noun = preposition 'in'\",\n },\n\n // \"úr\" - preposition vs noun (watch)\n {\n word: \"úr\",\n prefer: \"fs\",\n over: \"no\",\n context: \"before_noun\",\n description: \"úr before noun = preposition 'out of'\",\n },\n];\n\n/**\n * Look up rules that apply to a specific word.\n */\nexport function getRulesForWord(word: string): DisambiguationRule[] {\n const normalized = word.toLowerCase();\n return DISAMBIGUATION_RULES.filter((r) => r.word === normalized);\n}\n\n/**\n * Check if a word has disambiguation rules.\n */\nexport function hasDisambiguationRules(word: string): boolean {\n return DISAMBIGUATION_RULES.some((r) => r.word === word.toLowerCase());\n}\n","/**\n * Mini-grammar disambiguation rules for Icelandic.\n *\n * Uses case government (forsetningar stjórna falli) to disambiguate\n * prepositions from other parts of speech. For example:\n * - \"á\" + dative noun = preposition \"on/at\"\n * - \"á\" after pronoun = verb \"eiga\" (to own)\n *\n * Based on Greynir's Prepositions.conf but simplified for fast lookup.\n */\n\nimport type {\n GrammaticalCase,\n LemmaWithMorph,\n LemmaWithPOS,\n WordClass,\n} from \"./types.js\";\n\n/**\n * Interface for lemmatizer used in grammar rules.\n */\nexport interface GrammarLemmatizerLike {\n lemmatizeWithPOS?(word: string): LemmaWithPOS[];\n}\n\n/**\n * Preposition case government rules.\n *\n * Maps preposition lemma to the grammatical cases it governs.\n * When a preposition is followed by a noun in one of these cases,\n * we can be confident it's being used as a preposition.\n *\n * Source: Greynir's Prepositions.conf\n */\nexport const PREPOSITION_CASES: Map<string, Set<GrammaticalCase>> = new Map<string, Set<GrammaticalCase>>([\n // Both accusative and dative\n [\"á\", new Set<GrammaticalCase>([\"þf\", \"þgf\"])], // on/at (þf=direction, þgf=location)\n [\"í\", new Set<GrammaticalCase>([\"þf\", \"þgf\"])], // in (þf=into, þgf=inside)\n [\"við\", new Set<GrammaticalCase>([\"þf\", \"þgf\"])], // at/by (þf=against, þgf=near)\n [\"með\", new Set<GrammaticalCase>([\"þf\", \"þgf\"])], // with (þf=bring, þgf=accompany)\n [\"undir\", new Set<GrammaticalCase>([\"þf\", \"þgf\"])], // under (þf=motion, þgf=position)\n [\"yfir\", new Set<GrammaticalCase>([\"þf\", \"þgf\"])], // over (þf=motion, þgf=position)\n [\"fyrir\", new Set<GrammaticalCase>([\"þf\", \"þgf\"])], // for/before (þf=in exchange, þgf=in front)\n\n // Accusative only\n [\"um\", new Set<GrammaticalCase>([\"þf\"])], // about/around\n [\"gegnum\", new Set<GrammaticalCase>([\"þf\"])], // through\n [\"kringum\", new Set<GrammaticalCase>([\"þf\"])], // around\n [\"umhverfis\", new Set<GrammaticalCase>([\"þf\"])], // around/surrounding\n\n // Dative only\n [\"af\", new Set<GrammaticalCase>([\"þgf\"])], // of/from\n [\"frá\", new Set<GrammaticalCase>([\"þgf\"])], // from\n [\"hjá\", new Set<GrammaticalCase>([\"þgf\"])], // at/with (someone's place)\n [\"úr\", new Set<GrammaticalCase>([\"þgf\"])], // out of\n [\"að\", new Set<GrammaticalCase>([\"þgf\"])], // to/at\n [\"móti\", new Set<GrammaticalCase>([\"þgf\"])], // against\n [\"nálægt\", new Set<GrammaticalCase>([\"þgf\"])], // near\n [\"gegn\", new Set<GrammaticalCase>([\"þgf\"])], // against\n [\"gagnvart\", new Set<GrammaticalCase>([\"þgf\"])], // towards/regarding\n [\"handa\", new Set<GrammaticalCase>([\"þgf\"])], // for (someone)\n [\"meðal\", new Set<GrammaticalCase>([\"ef\"])], // among (actually genitive)\n\n // Genitive only\n [\"til\", new Set<GrammaticalCase>([\"ef\"])], // to\n [\"án\", new Set<GrammaticalCase>([\"ef\"])], // without\n [\"vegna\", new Set<GrammaticalCase>([\"ef\"])], // because of\n [\"sakir\", new Set<GrammaticalCase>([\"ef\"])], // because of\n [\"utan\", new Set<GrammaticalCase>([\"ef\"])], // outside\n [\"innan\", new Set<GrammaticalCase>([\"ef\"])], // inside\n [\"meðfram\", new Set<GrammaticalCase>([\"þgf\"])], // along\n [\"milli\", new Set<GrammaticalCase>([\"ef\"])], // between\n [\"auk\", new Set<GrammaticalCase>([\"ef\"])], // in addition to\n [\"í stað\", new Set<GrammaticalCase>([\"ef\"])], // instead of\n]);\n\n/**\n * Nominative-case pronouns that can precede verbs.\n * When one of these is followed by a potentially ambiguous word,\n * prefer the verb reading.\n */\nexport const NOMINATIVE_PRONOUNS = new Set([\n \"ég\",\n \"þú\",\n \"hann\",\n \"hún\",\n \"það\",\n \"við\",\n \"þið\",\n \"þeir\",\n \"þær\",\n \"þau\",\n]);\n\n/**\n * Result of applying a mini-grammar rule.\n */\nexport interface GrammarRuleMatch {\n /** The preferred lemma */\n lemma: string;\n /** The preferred POS */\n pos: WordClass;\n /** Rule that matched */\n rule: string;\n /** Confidence score (0-1) */\n confidence: number;\n}\n\n/**\n * Check if a preposition candidate can govern the case of the following word.\n *\n * @param prepLemma - The potential preposition lemma\n * @param nextWordMorph - Morphological features of the next word\n * @returns True if the preposition can govern this case\n */\nexport function canGovernCase(\n prepLemma: string,\n nextWordCase: GrammaticalCase | undefined\n): boolean {\n if (!nextWordCase) return false;\n const cases = PREPOSITION_CASES.get(prepLemma);\n return cases?.has(nextWordCase) ?? false;\n}\n\n/**\n * Apply preposition+case rule to disambiguate.\n *\n * If the current word can be a preposition and the next word has\n * a case governed by that preposition, prefer the preposition reading.\n *\n * @param candidates - All possible readings of the current word\n * @param nextWordMorph - Morphological analyses of the next word\n * @returns GrammarRuleMatch if a rule applies, null otherwise\n */\nexport function applyPrepositionRule(\n candidates: LemmaWithMorph[],\n nextWordMorph: LemmaWithMorph[]\n): GrammarRuleMatch | null {\n // Find preposition candidates\n const prepCandidates = candidates.filter((c) => c.pos === \"fs\");\n if (prepCandidates.length === 0) return null;\n\n // Check if any next word form has a case governed by any prep candidate\n for (const prep of prepCandidates) {\n for (const nextForm of nextWordMorph) {\n if (nextForm.morph?.case && canGovernCase(prep.lemma, nextForm.morph.case)) {\n return {\n lemma: prep.lemma,\n pos: \"fs\",\n rule: `prep+${nextForm.morph.case}`,\n confidence: 0.9,\n };\n }\n }\n }\n\n return null;\n}\n\n/**\n * Apply pronoun+verb rule to disambiguate.\n *\n * If the previous word is a nominative pronoun and the current word\n * can be a verb, prefer the verb reading.\n *\n * @param candidates - All possible readings of the current word\n * @param prevWord - The previous word (raw form)\n * @returns GrammarRuleMatch if a rule applies, null otherwise\n */\nexport function applyPronounVerbRule(\n candidates: LemmaWithMorph[],\n prevWord: string | null\n): GrammarRuleMatch | null {\n if (!prevWord) return null;\n\n const prevLower = prevWord.toLowerCase();\n if (!NOMINATIVE_PRONOUNS.has(prevLower)) return null;\n\n // Find verb candidates\n const verbCandidates = candidates.filter((c) => c.pos === \"so\");\n if (verbCandidates.length === 0) return null;\n\n // Prefer verb over preposition/noun when after pronoun\n const hasNonVerb = candidates.some((c) => c.pos !== \"so\");\n if (!hasNonVerb) return null;\n\n // Return the verb candidate (prefer eiga for \"á\")\n const eigaCandidate = verbCandidates.find((c) => c.lemma === \"eiga\");\n const verbCandidate = eigaCandidate ?? verbCandidates[0];\n\n return {\n lemma: verbCandidate.lemma,\n pos: \"so\",\n rule: \"pronoun+verb\",\n confidence: 0.85,\n };\n}\n\n/**\n * Apply noun-after-preposition rule to disambiguate.\n *\n * If the previous word is a preposition and the current word has a\n * noun candidate with a case governed by that preposition, prefer\n * the noun reading.\n *\n * This rule only applies when:\n * - The previous word is UNAMBIGUOUSLY a preposition (no pronoun reading), OR\n * - The current word has no verb candidate\n *\n * Example: \"til fundar\" → \"fundar\" is noun \"fundur\" (genitive), not verb \"funda\"\n * Counter-example: \"við fórum\" → \"við\" is pronoun, \"fórum\" is verb \"fara\"\n *\n * @param candidates - All possible readings of the current word\n * @param prevWord - The previous word (raw form)\n * @param lemmatizer - Lemmatizer for looking up the previous word\n * @returns GrammarRuleMatch if a rule applies, null otherwise\n */\nexport function applyNounAfterPrepositionRule(\n candidates: LemmaWithMorph[],\n prevWord: string | null,\n lemmatizer: GrammarLemmatizerLike | null\n): GrammarRuleMatch | null {\n if (!prevWord || !lemmatizer?.lemmatizeWithPOS) return null;\n\n // Check if previous word is a preposition\n const prevLemmas = lemmatizer.lemmatizeWithPOS(prevWord);\n const prepCandidate = prevLemmas.find((l) => l.pos === \"fs\");\n if (!prepCandidate) return null;\n\n // Check if the previous word could also be a pronoun\n const hasPronounReading = prevLemmas.some((l) => l.pos === \"fn\");\n\n // Check if current word has a verb candidate\n const hasVerbCandidate = candidates.some((c) => c.pos === \"so\");\n\n // If prevWord is ambiguously pronoun/preposition AND current word can be a verb,\n // don't apply this rule (let pronoun+verb rule or bigrams handle it)\n if (hasPronounReading && hasVerbCandidate) {\n return null;\n }\n\n // Get cases this preposition governs\n const governedCases = PREPOSITION_CASES.get(prepCandidate.lemma);\n if (!governedCases) return null;\n\n // Find noun candidate with matching case\n const nounCandidates = candidates.filter((c) => c.pos === \"no\");\n for (const noun of nounCandidates) {\n if (noun.morph?.case && governedCases.has(noun.morph.case)) {\n return {\n lemma: noun.lemma,\n pos: \"no\",\n rule: `noun_after_prep+${noun.morph.case}`,\n confidence: 0.9,\n };\n }\n }\n\n return null;\n}\n\n/**\n * Apply all mini-grammar rules in sequence.\n *\n * Rules are applied in order of specificity:\n * 1. Preposition + case government (most reliable)\n * 2. Noun after preposition (governed case)\n * 3. Pronoun + verb pattern\n *\n * @param candidates - All possible readings of the current word\n * @param prevWord - Previous word (raw form)\n * @param nextWordMorph - Morphological analyses of the next word\n * @param lemmatizer - Optional lemmatizer for looking up previous word POS\n * @returns GrammarRuleMatch if any rule applies, null otherwise\n */\nexport function applyGrammarRules(\n candidates: LemmaWithMorph[],\n prevWord: string | null,\n nextWordMorph: LemmaWithMorph[],\n lemmatizer: GrammarLemmatizerLike | null = null\n): GrammarRuleMatch | null {\n // Rule 1: Preposition + governed case\n const prepRule = applyPrepositionRule(candidates, nextWordMorph);\n if (prepRule) return prepRule;\n\n // Rule 2: Noun after preposition with governed case\n const nounAfterPrepRule = applyNounAfterPrepositionRule(candidates, prevWord, lemmatizer);\n if (nounAfterPrepRule) return nounAfterPrepRule;\n\n // Rule 3: Pronoun + verb\n const verbRule = applyPronounVerbRule(candidates, prevWord);\n if (verbRule) return verbRule;\n\n return null;\n}\n\n/**\n * Check if a word is a known preposition.\n */\nexport function isKnownPreposition(lemma: string): boolean {\n return PREPOSITION_CASES.has(lemma);\n}\n\n/**\n * Get the cases governed by a preposition.\n */\nexport function getGovernedCases(prepLemma: string): Set<GrammaticalCase> | undefined {\n return PREPOSITION_CASES.get(prepLemma);\n}\n","/**\n * Disambiguation algorithm using a multi-phase pipeline.\n *\n * When a word has multiple possible lemmas, use surrounding context\n * and linguistic rules to select the most likely one.\n *\n * Pipeline phases:\n * 1. Unambiguous - words with only one lemma candidate\n * 2. Phrase rules - multi-word expressions and fixed phrases\n * 3. Disambiguation rules - contextual preferences (e.g., \"á\" after pronoun = verb)\n * 4. Grammar rules - case government (preposition + case noun)\n * 5. Word bigrams - statistical scoring using bigram frequencies\n * 6. Fallback - use first lemma if no other evidence\n */\n\nimport { STOPWORDS_IS } from \"./stopwords.js\";\nimport type { LemmatizerLike, LemmaWithPOS, LemmaWithMorph, BigramProvider, WordClass } from \"./types.js\";\nimport { DISAMBIGUATION_RULES, type DisambiguationRule } from \"./disambiguation-rules.js\";\nimport { applyGrammarRules } from \"./mini-grammar.js\";\n\nexport interface DisambiguatorOptions {\n /** Weight for left context (previous word) */\n leftWeight?: number;\n /** Weight for right context (next word) */\n rightWeight?: number;\n /** Enable preference rules (e.g., \"á\" context rules) */\n usePreferenceRules?: boolean;\n /** Enable grammar rules (case government) */\n useGrammarRules?: boolean;\n}\n\nexport interface DisambiguatedToken {\n /** Original token */\n token: string;\n /** Chosen lemma */\n lemma: string;\n /** Part of speech (if available) */\n pos?: WordClass;\n /** All candidate lemmas */\n candidates: string[];\n /** Candidates with POS (if available) */\n candidatesWithPOS?: LemmaWithPOS[];\n /** Was disambiguation needed? */\n ambiguous: boolean;\n /** Confidence score (0-1) */\n confidence: number;\n /** Which phase resolved this token */\n resolvedBy?: string;\n}\n\n/**\n * Extended lemmatizer interface that supports morphological lookup.\n */\ninterface MorphLemmatizerLike extends LemmatizerLike {\n lemmatizeWithMorph?(word: string): LemmaWithMorph[];\n}\n\n/**\n * Context for disambiguation, including surrounding tokens.\n */\ninterface DisambiguationContext {\n /** Previous word (if any) */\n prevWord: string | null;\n /** Next word (if any) */\n nextWord: string | null;\n /** Previous token's lemmas (if available) */\n prevLemmas?: string[];\n /** Next token's lemmas (if available) */\n nextLemmas?: string[];\n /** Next word's morphological analyses (if available) */\n nextWordMorph?: LemmaWithMorph[];\n /** All tokens in the sequence */\n allTokens: string[];\n /** Current index in the sequence */\n index: number;\n}\n\nexport interface DisambiguationContextHint {\n prevLemmas?: string[];\n nextLemmas?: string[];\n}\n\n/**\n * A disambiguation phase that processes candidates.\n */\ninterface DisambiguationPhase {\n name: string;\n run(\n candidates: LemmaWithPOS[],\n context: DisambiguationContext,\n disambiguator: Disambiguator\n ): { lemma: string; pos?: WordClass; confidence: number } | null;\n}\n\n/**\n * Phase 1: Handle unambiguous cases (single candidate).\n */\nconst unambiguousPhase: DisambiguationPhase = {\n name: \"unambiguous\",\n run(candidates) {\n if (candidates.length === 1) {\n return {\n lemma: candidates[0].lemma,\n pos: candidates[0].pos,\n confidence: 1.0,\n };\n }\n return null;\n },\n};\n\n/**\n * Phase 2: Apply disambiguation rules based on context.\n */\nconst preferenceRulesPhase: DisambiguationPhase = {\n name: \"preference_rules\",\n run(candidates, context, disambiguator) {\n if (!disambiguator.usePreferenceRules) return null;\n\n for (const rule of DISAMBIGUATION_RULES) {\n const match = applyRule(rule, candidates, context);\n if (match) {\n return {\n lemma: match.lemma,\n pos: match.pos,\n confidence: 0.85,\n };\n }\n }\n return null;\n },\n};\n\n/**\n * Apply a single disambiguation rule.\n */\nfunction applyRule(\n rule: DisambiguationRule,\n candidates: LemmaWithPOS[],\n context: DisambiguationContext\n): LemmaWithPOS | null {\n // Find candidates matching the word and preferred POS\n const preferredCandidate = candidates.find(\n (c) => c.lemma.toLowerCase() === rule.word.toLowerCase() && c.pos === rule.prefer\n );\n const dispreferred = candidates.find(\n (c) => c.lemma.toLowerCase() === rule.word.toLowerCase() && c.pos === rule.over\n );\n\n if (!preferredCandidate || !dispreferred) {\n return null;\n }\n\n // Check context condition\n if (rule.context === \"before_noun\") {\n // Next word should be a noun (starts with uppercase or known noun)\n const next = context.nextWord;\n if (next && /^[A-ZÁÉÍÓÚÝÞÆÖ]/.test(next)) {\n return preferredCandidate;\n }\n } else if (rule.context === \"before_verb\") {\n // Next word suggests a verb context (harder to detect without POS)\n // Simple heuristic: if next word is lowercase and not a common noun determiner\n const next = context.nextWord?.toLowerCase();\n if (next && ![\"þessi\", \"þetta\", \"sá\", \"sú\", \"það\", \"hinn\", \"hin\", \"hið\"].includes(next)) {\n return preferredCandidate;\n }\n } else if (rule.context === \"after_pronoun\") {\n // Previous word is a pronoun\n const prev = context.prevWord?.toLowerCase();\n const pronouns = [\"ég\", \"þú\", \"hann\", \"hún\", \"það\", \"við\", \"þið\", \"þeir\", \"þær\", \"þau\"];\n if (prev && pronouns.includes(prev)) {\n return preferredCandidate;\n }\n }\n\n return null;\n}\n\n/**\n * Phase 3: Apply grammar rules (case government).\n *\n * Uses morphological features to apply preposition+case and pronoun+verb rules.\n */\nconst grammarRulesPhase: DisambiguationPhase = {\n name: \"grammar_rules\",\n run(candidates, context, disambiguator) {\n if (!disambiguator.useGrammarRules) return null;\n\n // Convert LemmaWithPOS to LemmaWithMorph if needed\n const candidatesWithMorph: LemmaWithMorph[] = candidates.map((c) => ({\n ...c,\n morph: undefined,\n }));\n\n // Get morphological info for candidates if available\n const currentWord = context.allTokens[context.index];\n if (currentWord) {\n const morphCandidates = disambiguator.getMorph(currentWord);\n if (morphCandidates) {\n // Replace with morph-enriched candidates\n candidatesWithMorph.length = 0;\n candidatesWithMorph.push(...morphCandidates);\n }\n }\n\n // Apply grammar rules\n const result = applyGrammarRules(\n candidatesWithMorph,\n context.prevWord,\n context.nextWordMorph ?? [],\n disambiguator.lemmatizer\n );\n\n if (result) {\n return {\n lemma: result.lemma,\n pos: result.pos,\n confidence: result.confidence,\n };\n }\n\n return null;\n },\n};\n\n/**\n * Phase 4: Score using bigram frequencies.\n */\nconst bigramPhase: DisambiguationPhase = {\n name: \"word_bigrams\",\n run(candidates, context, disambiguator) {\n if (!disambiguator.bigrams) return null;\n if (candidates.length === 0) return null;\n\n const scores: { candidate: LemmaWithPOS; score: number }[] = [];\n\n for (const candidate of candidates) {\n let score = 0;\n\n // Left context: bigram(prevWord, lemma)\n if (context.prevWord) {\n const prevLemmas = context.prevLemmas || disambiguator.lemmatizer.lemmatize(context.prevWord);\n for (const prevLemma of prevLemmas) {\n const freq = disambiguator.bigrams.freq(prevLemma, candidate.lemma);\n if (freq > 0) {\n score += Math.log(freq + 1) * disambiguator.leftWeight;\n }\n }\n }\n\n // Right context: bigram(lemma, nextWord)\n if (context.nextWord) {\n const nextLemmas = context.nextLemmas || disambiguator.lemmatizer.lemmatize(context.nextWord);\n for (const nextLemma of nextLemmas) {\n const freq = disambiguator.bigrams.freq(candidate.lemma, nextLemma);\n if (freq > 0) {\n score += Math.log(freq + 1) * disambiguator.rightWeight;\n }\n }\n }\n\n scores.push({ candidate, score });\n }\n\n // Sort by score\n scores.sort((a, b) => b.score - a.score);\n\n // Check if we have scores and if top score is positive\n if (scores.length > 0 && scores[0].score > 0) {\n const topScore = scores[0].score;\n const totalScore = scores.reduce((sum, s) => sum + Math.exp(s.score), 0);\n const confidence = totalScore > 0 ? Math.exp(topScore) / totalScore : 0.5;\n\n return {\n lemma: scores[0].candidate.lemma,\n pos: scores[0].candidate.pos,\n confidence,\n };\n }\n\n return null;\n },\n};\n\n/**\n * Phase 5: Fallback to first candidate.\n */\nconst fallbackPhase: DisambiguationPhase = {\n name: \"fallback\",\n run(candidates) {\n if (candidates.length > 0) {\n return {\n lemma: candidates[0].lemma,\n pos: candidates[0].pos,\n confidence: 1 / candidates.length,\n };\n }\n return null;\n },\n};\n\n/**\n * All disambiguation phases in order.\n */\nconst PHASES: DisambiguationPhase[] = [\n unambiguousPhase,\n preferenceRulesPhase,\n grammarRulesPhase,\n bigramPhase,\n fallbackPhase,\n];\n\n/**\n * Disambiguate lemmas using a multi-phase pipeline.\n */\nexport class Disambiguator {\n lemmatizer: MorphLemmatizerLike;\n bigrams: BigramProvider | null;\n leftWeight: number;\n rightWeight: number;\n usePreferenceRules: boolean;\n useGrammarRules: boolean;\n private morphCache: Map<string, LemmaWithMorph[]> | null;\n\n constructor(\n lemmatizer: LemmatizerLike,\n bigrams: BigramProvider | null = null,\n options: DisambiguatorOptions = {}\n ) {\n this.lemmatizer = lemmatizer as MorphLemmatizerLike;\n this.bigrams = bigrams;\n this.leftWeight = options.leftWeight ?? 1.0;\n this.rightWeight = options.rightWeight ?? 1.0;\n this.usePreferenceRules = options.usePreferenceRules ?? true;\n this.useGrammarRules = options.useGrammarRules ?? true;\n this.morphCache = this.lemmatizer.lemmatizeWithMorph ? new Map() : null;\n }\n\n private getMorph(word: string): LemmaWithMorph[] | undefined {\n if (!this.lemmatizer.lemmatizeWithMorph || !this.morphCache) return undefined;\n const key = word.toLowerCase();\n const cached = this.morphCache.get(key);\n if (cached) return cached;\n const morph = this.lemmatizer.lemmatizeWithMorph(word);\n this.morphCache.set(key, morph);\n return morph;\n }\n\n /**\n * Disambiguate a single word given context.\n *\n * @param word - The word to lemmatize\n * @param prevWord - Previous word (left context), or null\n * @param nextWord - Next word (right context), or null\n */\n disambiguate(\n word: string,\n prevWord: string | null,\n nextWord: string | null,\n hint: DisambiguationContextHint = {}\n ): DisambiguatedToken {\n // Get candidates with POS if available\n let candidatesWithPOS: LemmaWithPOS[];\n if (this.lemmatizer.lemmatizeWithPOS) {\n candidatesWithPOS = this.lemmatizer.lemmatizeWithPOS(word);\n } else {\n // Fall back to plain lemmatization\n const lemmas = this.lemmatizer.lemmatize(word);\n candidatesWithPOS = lemmas.map((l) => ({ lemma: l, pos: \"no\" as WordClass }));\n }\n\n const candidates = candidatesWithPOS.map((c) => c.lemma);\n const token = word;\n\n // Get morphological info for next word if available\n let nextWordMorph: LemmaWithMorph[] | undefined;\n if (nextWord) {\n nextWordMorph = this.getMorph(nextWord);\n }\n\n // Build context\n const context: DisambiguationContext = {\n prevWord,\n nextWord,\n prevLemmas: hint.prevLemmas,\n nextLemmas: hint.nextLemmas,\n nextWordMorph,\n allTokens: [word],\n index: 0,\n };\n\n // Run through phases\n for (const phase of PHASES) {\n const result = phase.run(candidatesWithPOS, context, this);\n if (result) {\n return {\n token,\n lemma: result.lemma,\n pos: result.pos,\n candidates,\n candidatesWithPOS,\n ambiguous: candidates.length > 1,\n confidence: result.confidence,\n resolvedBy: phase.name,\n };\n }\n }\n\n // Should never reach here due to fallback phase\n return {\n token,\n lemma: word.toLowerCase(),\n candidates,\n candidatesWithPOS,\n ambiguous: false,\n confidence: 0,\n resolvedBy: \"none\",\n };\n }\n\n /**\n * Disambiguate an array of tokens.\n *\n * @param tokens - Array of word tokens\n * @returns Array of disambiguated tokens\n */\n disambiguateAll(tokens: string[]): DisambiguatedToken[] {\n const results: DisambiguatedToken[] = [];\n\n for (let i = 0; i < tokens.length; i++) {\n const word = tokens[i];\n const prevWord = i > 0 ? tokens[i - 1] : null;\n const nextWord = i < tokens.length - 1 ? tokens[i + 1] : null;\n\n results.push(this.disambiguate(word, prevWord, nextWord));\n }\n\n return results;\n }\n\n /**\n * Extract unique lemmas from text with disambiguation.\n *\n * @param tokens - Array of word tokens\n * @returns Set of unique lemmas (best guess for each ambiguous word)\n */\n extractLemmas(tokens: string[]): Set<string> {\n const lemmas = new Set<string>();\n const disambiguated = this.disambiguateAll(tokens);\n\n for (const result of disambiguated) {\n lemmas.add(result.lemma);\n }\n\n return lemmas;\n }\n}\n\n/**\n * Shortcut for simple lemma extraction with disambiguation.\n */\nexport function extractDisambiguatedLemmas(\n text: string,\n lemmatizer: LemmatizerLike,\n bigrams: BigramProvider,\n options: {\n tokenize?: (text: string) => string[];\n removeStopwords?: boolean;\n } = {}\n): Set<string> {\n const { tokenize, removeStopwords } = options;\n\n // Tokenize\n const tokens = tokenize\n ? tokenize(text)\n : text\n .split(/\\s+/)\n .filter((t) => t.length > 0)\n .map((t) => t.replace(/^[^\\p{L}\\p{N}]+|[^\\p{L}\\p{N}]+$/gu, \"\"))\n .filter((t) => t.length > 0);\n\n // Disambiguate\n const disambiguator = new Disambiguator(lemmatizer, bigrams);\n const lemmas = disambiguator.extractLemmas(tokens);\n\n // Filter stopwords if requested\n if (removeStopwords) {\n for (const lemma of lemmas) {\n if (STOPWORDS_IS.has(lemma)) {\n lemmas.delete(lemma);\n }\n }\n }\n\n return lemmas;\n}\n","/**\n * Shared type definitions to avoid circular imports.\n */\n\n/**\n * Word class (part-of-speech) codes from BÍN.\n *\n * These are simplified from BÍN's detailed categories:\n * - kk/kvk/hk (gendered nouns) → 'no'\n * - pfn (personal pronoun) → 'fn'\n */\nexport type WordClass =\n | \"no\" // nafnorð (noun)\n | \"so\" // sagnorð (verb)\n | \"lo\" // lýsingarorð (adjective)\n | \"ao\" // atviksorð (adverb)\n | \"fs\" // forsetning (preposition)\n | \"fn\" // fornafn (pronoun)\n | \"st\" // samtenging (conjunction)\n | \"to\" // töluorð (numeral)\n | \"gr\" // greinir (article)\n | \"uh\"; // upphrópun (interjection)\n\n/**\n * Human-readable names for word classes.\n */\nexport const WORD_CLASS_NAMES: Record<WordClass, string> = {\n no: \"noun\",\n so: \"verb\",\n lo: \"adjective\",\n ao: \"adverb\",\n fs: \"preposition\",\n fn: \"pronoun\",\n st: \"conjunction\",\n to: \"numeral\",\n gr: \"article\",\n uh: \"interjection\",\n};\n\n/**\n * Icelandic names for word classes.\n */\nexport const WORD_CLASS_NAMES_IS: Record<WordClass, string> = {\n no: \"nafnorð\",\n so: \"sagnorð\",\n lo: \"lýsingarorð\",\n ao: \"atviksorð\",\n fs: \"forsetning\",\n fn: \"fornafn\",\n st: \"samtenging\",\n to: \"töluorð\",\n gr: \"greinir\",\n uh: \"upphrópun\",\n};\n\n/**\n * Grammatical case (fall) in Icelandic.\n */\nexport type GrammaticalCase = \"nf\" | \"þf\" | \"þgf\" | \"ef\";\n\n/**\n * Grammatical gender (kyn) in Icelandic.\n */\nexport type GrammaticalGender = \"kk\" | \"kvk\" | \"hk\";\n\n/**\n * Grammatical number (tala) in Icelandic.\n */\nexport type GrammaticalNumber = \"et\" | \"ft\";\n\n/**\n * Human-readable names for cases.\n */\nexport const CASE_NAMES: Record<GrammaticalCase, string> = {\n nf: \"nominative\",\n þf: \"accusative\",\n þgf: \"dative\",\n ef: \"genitive\",\n};\n\n/**\n * Human-readable names for genders.\n */\nexport const GENDER_NAMES: Record<GrammaticalGender, string> = {\n kk: \"masculine\",\n kvk: \"feminine\",\n hk: \"neuter\",\n};\n\n/**\n * Human-readable names for numbers.\n */\nexport const NUMBER_NAMES: Record<GrammaticalNumber, string> = {\n et: \"singular\",\n ft: \"plural\",\n};\n\n/**\n * Morphological features extracted from BÍN.\n */\nexport interface MorphFeatures {\n case?: GrammaticalCase;\n gender?: GrammaticalGender;\n number?: GrammaticalNumber;\n}\n\n/**\n * A lemma with its word class.\n */\nexport interface LemmaWithPOS {\n lemma: string;\n pos: WordClass;\n}\n\n/**\n * A lemma with word class and morphological features.\n */\nexport interface LemmaWithMorph extends LemmaWithPOS {\n morph?: MorphFeatures;\n}\n\n/**\n * Interface for lemmatizer-like objects.\n * Used to avoid circular dependency between modules.\n */\nexport interface LemmatizerLike {\n lemmatize(word: string): string[];\n lemmatizeWithPOS?(word: string): LemmaWithPOS[];\n}\n\n/**\n * Interface for bigram frequency lookup.\n * Used for disambiguation scoring.\n */\nexport interface BigramProvider {\n freq(word1: string, word2: string): number;\n}\n","/**\n * Minimal Bloom filter for compact set membership checks.\n */\n\nexport interface BloomFilterOptions {\n falsePositiveRate?: number;\n maxHashFunctions?: number;\n}\n\nexport class BloomFilter {\n private bits: Uint8Array;\n private sizeBits: number;\n private hashCount: number;\n\n private constructor(bits: Uint8Array, sizeBits: number, hashCount: number) {\n this.bits = bits;\n this.sizeBits = sizeBits;\n this.hashCount = hashCount;\n }\n\n static fromValues(values: string[], options: BloomFilterOptions = {}): BloomFilter {\n const n = Math.max(values.length, 1);\n const p = options.falsePositiveRate ?? 0.01;\n\n const m = Math.max(1, Math.ceil((-n * Math.log(p)) / (Math.LN2 * Math.LN2)));\n const k = Math.max(1, Math.round((m / n) * Math.LN2));\n const hashCount = options.maxHashFunctions\n ? Math.min(k, options.maxHashFunctions)\n : k;\n\n const bytes = Math.ceil(m / 8);\n const bits = new Uint8Array(bytes);\n const filter = new BloomFilter(bits, m, hashCount);\n\n for (const value of values) {\n filter.add(value);\n }\n\n return filter;\n }\n\n add(value: string): void {\n const [h1, h2] = this.hashes(value);\n for (let i = 0; i < this.hashCount; i++) {\n const combined = (h1 + i * h2) % this.sizeBits;\n this.setBit(combined);\n }\n }\n\n has(value: string): boolean {\n const [h1, h2] = this.hashes(value);\n for (let i = 0; i < this.hashCount; i++) {\n const combined = (h1 + i * h2) % this.sizeBits;\n if (!this.getBit(combined)) return false;\n }\n return true;\n }\n\n private setBit(index: number): void {\n const byteIndex = index >>> 3;\n const bit = index & 7;\n this.bits[byteIndex] |= 1 << bit;\n }\n\n private getBit(index: number): boolean {\n const byteIndex = index >>> 3;\n const bit = index & 7;\n return (this.bits[byteIndex] & (1 << bit)) !== 0;\n }\n\n private hashes(value: string): [number, number] {\n const str = value.toLowerCase();\n let hash1 = 2166136261 >>> 0;\n let hash2 = 2166136261 >>> 0;\n\n for (let i = 0; i < str.length; i++) {\n const code = str.charCodeAt(i);\n hash1 ^= code;\n hash1 = Math.imul(hash1, 16777619) >>> 0;\n\n hash2 ^= code;\n hash2 = Math.imul(hash2, 2166136261) >>> 0;\n }\n\n hash2 ^= hash2 >>> 13;\n hash2 = Math.imul(hash2, 0x85ebca6b) >>> 0;\n hash2 ^= hash2 >>> 16;\n\n return [hash1 >>> 0, hash2 >>> 0 || 0x27d4eb2d];\n }\n}\n","/**\n * Compound word splitting for Icelandic.\n *\n * Icelandic compounds are written as single words:\n * - \"bílstjóri\" = \"bíl\" (car) + \"stjóri\" (driver)\n * - \"sjúkrahús\" = \"sjúkra\" (sick-GEN) + \"hús\" (house)\n *\n * Strategy:\n * 1. Try splitting at each position\n * 2. Check if both parts are known words\n * 3. Handle common compound linking letters (s, u, a)\n * 4. Score by part lengths (prefer balanced splits)\n */\n\nimport type { LemmatizerLike } from \"./types.js\";\nimport { BloomFilter, type BloomFilterOptions } from \"./bloom.js\";\n\n/**\n * Protected lemmas that should NEVER be split as compounds.\n * Mostly place names that happen to end in common word parts.\n */\nexport const PROTECTED_LEMMAS = new Set([\n // Countries ending in -land\n \"ísland\",\n \"england\",\n \"írland\",\n \"skotland\",\n \"finnland\",\n \"grænland\",\n \"holland\",\n \"þýskaland\",\n \"frakkland\",\n \"pólland\",\n \"tékkland\",\n \"svissland\",\n \"rússland\",\n \"eistland\",\n \"lettland\",\n \"litháen\",\n // Other countries/regions\n \"danmörk\",\n \"noregur\",\n \"svíþjóð\",\n \"bandaríkin\",\n \"spánn\",\n \"portúgal\",\n \"ítalía\",\n \"grikkland\",\n // Icelandic place names (from BÍN)\n \"þingvellir\",\n \"akureyri\",\n \"ísafjörður\",\n \"reykjavík\",\n \"keflavík\",\n \"hafnarfjörður\",\n \"kópavogur\",\n \"seltjarnarnes\",\n \"garðabær\",\n \"mosfellsbær\",\n \"vestmannaeyjar\",\n \"húsavík\",\n \"sauðárkrókur\",\n \"siglufjörður\",\n \"ólafsfjörður\",\n \"dalvík\",\n \"egilsstaðir\",\n \"neskaupstaður\",\n \"seyðisfjörður\",\n \"eskifjörður\",\n \"reyðarfjörður\",\n \"fáskrúðsfjörður\",\n \"stöðvarfjörður\",\n \"djúpivogur\",\n \"höfn\",\n \"vík\",\n \"selfoss\",\n \"hveragerði\",\n \"þorlákshöfn\",\n \"grindavík\",\n \"sandgerði\",\n \"borgarnes\",\n \"stykkishólmur\",\n \"grundarfjörður\",\n \"ólafsvík\",\n \"búðardalur\",\n \"patreksfjörður\",\n \"flateyri\",\n \"suðureyri\",\n \"bolungarvík\",\n \"hólmavík\",\n \"hvammstangi\",\n \"blönduós\",\n \"skagaströnd\",\n \"varmahlíð\",\n // Literary/historical places\n \"hlíðarendi\",\n \"bergþórshvol\",\n // Company names\n \"íslandsbanki\",\n \"landsbankinn\",\n \"arionbanki\",\n // Institutions\n \"alþingi\",\n]);\n\nexport interface CompoundSplit {\n /** Original word */\n word: string;\n /** Constituent parts (lemmatized) - all variants for indexing */\n parts: string[];\n /** All index terms: parts + original word */\n indexTerms: string[];\n /** Split confidence (0-1) */\n confidence: number;\n /** Is this a compound? */\n isCompound: boolean;\n}\n\n/**\n * Splitting mode for compound words.\n *\n * - \"aggressive\": Try to split all words, even known BÍN entries\n * - \"balanced\": Split unknown words; split known words only if high confidence\n * - \"conservative\": Only split at hyphens or very high confidence cases\n */\nexport type CompoundSplitMode = \"aggressive\" | \"balanced\" | \"conservative\";\n\nexport interface CompoundSplitterOptions {\n /**\n * Minimum part length.\n * Default: 3. Set to 2 for more aggressive splitting (e.g., \"ís\" in \"ísland\").\n */\n minPartLength?: number;\n /** Try removing linking letters (s, u, a) */\n tryLinkingLetters?: boolean;\n /**\n * Splitting mode.\n * Default: \"balanced\"\n */\n mode?: CompoundSplitMode;\n}\n\n/**\n * Common compound tail words in Icelandic.\n * These are often the second part of compounds and boost split confidence.\n */\nconst COMMON_COMPOUND_TAILS = new Set([\n // People/roles\n \"maður\",\n \"kona\",\n \"stjóri\",\n \"ráðherra\",\n \"forseti\",\n \"formaður\",\n \"fulltrúi\",\n \"starfsmaður\",\n // Places\n \"hús\",\n \"staður\",\n \"vegur\",\n \"borg\",\n \"bær\",\n \"dalur\",\n \"fjörður\",\n // Organizations\n \"félag\",\n \"banki\",\n \"sjóður\",\n \"stofnun\",\n \"ráð\",\n // Things/concepts\n \"rannsókn\",\n \"greiðsla\",\n \"mál\",\n \"kerfi\",\n \"verk\",\n \"þjónusta\",\n \"rekstur\",\n \"viðskipti\",\n \"verð\",\n \"kostnaður\",\n]);\n\n/**\n * Very common standalone words that should rarely be compound parts.\n * Penalize splits where BOTH parts are common standalone words.\n */\nconst COMMON_STANDALONE = new Set([\n \"vera\",\n \"hafa\",\n \"gera\",\n \"fara\",\n \"koma\",\n \"segja\",\n \"vilja\",\n \"mega\",\n \"þurfa\",\n \"verða\",\n \"geta\",\n \"sjá\",\n \"taka\",\n \"eiga\",\n \"láta\",\n \"halda\",\n \"leyfa\",\n \"búa\",\n]);\n\n/**\n * Common compound linking patterns in Icelandic.\n * These letters often join compound parts:\n * - \"s\" (genitive): húss + eigandi -> \"húseigandi\"\n * - \"u\" (genitive/linking): vatnu + fall -> \"vatnufall\" (rare)\n * - \"a\" (genitive): daga + blað -> \"dagablað\"\n */\nconst LINKING_PATTERNS = [\"s\", \"u\", \"a\"];\n\nexport class CompoundSplitter {\n private lemmatizer: LemmatizerLike;\n private minPartLength: number;\n private tryLinkingLetters: boolean;\n private knownLemmas: KnownLemmaLookup;\n private mode: CompoundSplitMode;\n\n constructor(\n lemmatizer: LemmatizerLike,\n knownLemmas: KnownLemmaLookup,\n options: CompoundSplitterOptions = {}\n ) {\n this.lemmatizer = lemmatizer;\n this.knownLemmas = knownLemmas;\n this.minPartLength = options.minPartLength ?? 3;\n this.tryLinkingLetters = options.tryLinkingLetters ?? true;\n this.mode = options.mode ?? \"balanced\";\n }\n\n /**\n * Helper to create a no-split result.\n */\n private noSplit(word: string, lemmas: string[]): CompoundSplit {\n return {\n word,\n parts: lemmas,\n indexTerms: lemmas,\n confidence: 0,\n isCompound: false,\n };\n }\n\n /**\n * Try to split a word into compound parts.\n *\n * Uses a lookup-first strategy:\n * 1. Check protected lemmas - never split\n * 2. Check if word is known in BÍN and unambiguous - don't split\n * 3. Apply mode-based splitting rules\n */\n split(word: string): CompoundSplit {\n const normalized = word.toLowerCase();\n\n // Step 1: Check protected lemmas - never split these\n const directLemmas = this.lemmatizer.lemmatize(word);\n const primaryLemma = directLemmas[0]?.toLowerCase();\n if (primaryLemma && PROTECTED_LEMMAS.has(primaryLemma)) {\n return this.noSplit(word, directLemmas);\n }\n\n // Also check if the word itself is protected (for inflected forms)\n if (PROTECTED_LEMMAS.has(normalized)) {\n return this.noSplit(word, directLemmas);\n }\n\n // Step 2: Check if known in BÍN and unambiguous\n // A word is \"known\" if lemmatization returned something other than the word itself\n const isKnownWord =\n directLemmas.length > 0 && directLemmas[0].toLowerCase() !== normalized;\n const isUnambiguous = directLemmas.length === 1;\n\n // For conservative mode, only split at hyphens\n if (this.mode === \"conservative\") {\n if (word.includes(\"-\")) {\n return this.splitAtHyphen(word, directLemmas);\n }\n return this.noSplit(word, directLemmas);\n }\n\n // For balanced mode, don't split unambiguous known words\n if (this.mode === \"balanced\" && isKnownWord && isUnambiguous) {\n // Exception: still try if the word is very long (likely a compound)\n if (normalized.length < 12) {\n return this.noSplit(word, directLemmas);\n }\n }\n\n // Too short to be a compound\n if (normalized.length < this.minPartLength * 2) {\n return this.noSplit(word, directLemmas);\n }\n\n // Step 3: Try algorithmic splitting\n const candidates: {\n leftParts: string[];\n rightParts: string[];\n score: number;\n }[] = [];\n\n for (\n let i = this.minPartLength;\n i <= normalized.length - this.minPartLength;\n i++\n ) {\n const leftPart = normalized.slice(0, i);\n const rightPart = normalized.slice(i);\n\n // Try direct split\n const directResult = this.trySplit(leftPart, rightPart);\n if (directResult) {\n candidates.push(directResult);\n }\n\n // Try with linking letters removed from split point\n if (this.tryLinkingLetters) {\n for (const linker of LINKING_PATTERNS) {\n // Remove linking letter from end of left part\n if (leftPart.endsWith(linker) && leftPart.length > this.minPartLength) {\n const trimmedLeft = leftPart.slice(0, -1);\n const result = this.trySplit(trimmedLeft, rightPart);\n if (result) {\n // Slightly lower score for linked compounds\n candidates.push({ ...result, score: result.score * 0.95 });\n }\n }\n }\n }\n }\n\n if (candidates.length === 0) {\n return this.noSplit(word, directLemmas);\n }\n\n // Pick best candidate by score\n candidates.sort((a, b) => b.score - a.score);\n const best = candidates[0];\n\n // In balanced mode, require higher confidence for known words\n if (this.mode === \"balanced\" && isKnownWord && best.score < 0.6) {\n return this.noSplit(word, directLemmas);\n }\n\n // Collect all unique parts from best split\n const parts = [...new Set([...best.leftParts, ...best.rightParts])];\n // Index terms include parts + original word for search\n const indexTerms = [...new Set([...parts, normalized])];\n\n return {\n word,\n parts,\n indexTerms,\n confidence: Math.min(best.score, 1),\n isCompound: true,\n };\n }\n\n /**\n * Split a hyphenated word.\n */\n private splitAtHyphen(word: string, directLemmas: string[]): CompoundSplit {\n const parts = word.split(\"-\").filter((p) => p.length > 0);\n if (parts.length < 2) {\n return this.noSplit(word, directLemmas);\n }\n\n const allParts: string[] = [];\n for (const part of parts) {\n const lemmas = this.lemmatizer.lemmatize(part);\n allParts.push(...lemmas);\n }\n\n const uniqueParts = [...new Set(allParts)];\n const indexTerms = [...new Set([...uniqueParts, word.toLowerCase()])];\n\n return {\n word,\n parts: uniqueParts,\n indexTerms,\n confidence: 0.9,\n isCompound: true,\n };\n }\n\n private trySplit(\n leftPart: string,\n rightPart: string\n ): { leftParts: string[]; rightParts: string[]; score: number } | null {\n // Get lemmas for both parts\n const leftLemmas = this.lemmatizer.lemmatize(leftPart);\n const rightLemmas = this.lemmatizer.lemmatize(rightPart);\n\n // Filter to known lemmas only, deduplicated\n const leftKnown = [...new Set(leftLemmas.filter((l) => this.knownLemmas.has(l)))];\n const rightKnown = [...new Set(rightLemmas.filter((l) => this.knownLemmas.has(l)))];\n\n if (leftKnown.length === 0 || rightKnown.length === 0) {\n return null;\n }\n\n // Calculate score with multiple factors\n let score = 0;\n\n // Factor 1: Length balance (20% weight)\n // Prefer balanced splits, but not too strictly\n const lengthBalance =\n 1 - Math.abs(leftPart.length - rightPart.length) / (leftPart.length + rightPart.length);\n score += lengthBalance * 0.2;\n\n // Factor 2: Part length bonus (20% weight)\n // Prefer longer parts (more likely to be real words)\n const avgLength = (leftPart.length + rightPart.length) / 2;\n const lengthBonus = Math.min(avgLength / 6, 1);\n score += lengthBonus * 0.2;\n\n // Factor 3: Common compound tail bonus (30% weight)\n // Strongly prefer splits where right part is a known compound tail\n const hasCompoundTail = rightKnown.some((lemma) => COMMON_COMPOUND_TAILS.has(lemma));\n if (hasCompoundTail) {\n score += 0.3;\n }\n\n // Factor 4: Penalty for both parts being common standalone words (30% weight)\n // E.g., \"ísland\" -> \"ís\" + \"land\" should be penalized\n const leftIsCommon = leftKnown.some((lemma) => COMMON_STANDALONE.has(lemma));\n const rightIsCommon = rightKnown.some((lemma) => COMMON_STANDALONE.has(lemma));\n if (leftIsCommon && rightIsCommon) {\n // Strong penalty if both parts are very common standalone\n score -= 0.3;\n } else if (!leftIsCommon && !rightIsCommon) {\n // Bonus if neither is a common standalone (more likely a real compound)\n score += 0.2;\n }\n\n // Factor 5: Minimum part length requirement\n // Very short parts (2-3 chars) get a penalty\n if (leftPart.length < 4 || rightPart.length < 4) {\n score -= 0.15;\n }\n\n // Return all known lemmas from both parts\n return {\n leftParts: leftKnown,\n rightParts: rightKnown,\n score: Math.max(0, score), // Ensure non-negative\n };\n }\n\n /**\n * Get all lemmas for a word, including compound parts.\n * Useful for search indexing.\n */\n getAllLemmas(word: string): string[] {\n const split = this.split(word);\n return split.indexTerms;\n }\n}\n\n/**\n * Create a set of known lemmas from the lemmatizer.\n * This is used to check if compound parts are valid words.\n */\nexport function createKnownLemmaSet(lemmas: string[]): Set<string> {\n return new Set(lemmas.map((l) => l.toLowerCase()));\n}\n\nexport interface KnownLemmaLookup {\n has(lemma: string): boolean;\n}\n\nexport interface KnownLemmaFilterOptions extends BloomFilterOptions {}\n\n/**\n * Create a compact lookup for known lemmas using a Bloom filter.\n * False positives are possible (more splits), false negatives are not.\n */\nexport function createKnownLemmaFilter(\n lemmas: string[],\n options: KnownLemmaFilterOptions = {}\n): KnownLemmaLookup {\n const normalized = lemmas.map((l) => l.toLowerCase());\n return BloomFilter.fromValues(normalized, options);\n}\n","/**\n * Static multi-word phrases for Icelandic.\n *\n * Source: Extracted from GreynirEngine's Phrases.conf (MIT License)\n * https://github.com/mideind/GreynirEngine\n *\n * These phrases should be recognized as units rather than individual words,\n * enabling better stopword detection and lemmatization.\n */\n\n/**\n * A static phrase definition.\n */\nexport interface StaticPhrase {\n /** The canonical/lemma form of the phrase */\n lemma: string;\n /** Whether this phrase functions as a stopword (e.g., \"til dæmis\") */\n isStopword: boolean;\n /** Part of speech category */\n pos?: \"ao\" | \"fs\" | \"st\" | \"entity\";\n}\n\n/**\n * Common Icelandic multi-word phrases.\n * Keys are lowercase, normalized forms.\n */\nexport const STATIC_PHRASES: Map<string, StaticPhrase> = new Map([\n // Adverbial phrases (ao frasi) - often function as stopwords\n [\"til dæmis\", { lemma: \"til dæmi\", isStopword: true, pos: \"ao\" }],\n [\"með öðrum orðum\", { lemma: \"með annar orð\", isStopword: true, pos: \"ao\" }],\n [\"í raun\", { lemma: \"í raun\", isStopword: true, pos: \"ao\" }],\n [\"í raun og veru\", { lemma: \"í raun og vera\", isStopword: true, pos: \"ao\" }],\n [\"af og til\", { lemma: \"af og til\", isStopword: true, pos: \"ao\" }],\n [\"aftur á móti\", { lemma: \"aftur á mót\", isStopword: true, pos: \"ao\" }],\n [\"alla vega\", { lemma: \"allur vegur\", isStopword: true, pos: \"ao\" }],\n [\"alls ekki\", { lemma: \"alls ekki\", isStopword: true, pos: \"ao\" }],\n [\"alls staðar\", { lemma: \"allur staður\", isStopword: true, pos: \"ao\" }],\n [\"allt í allt\", { lemma: \"allur í allur\", isStopword: true, pos: \"ao\" }],\n [\"annars vegar\", { lemma: \"annar vegur\", isStopword: true, pos: \"ao\" }],\n [\"auk þess\", { lemma: \"auk það\", isStopword: true, pos: \"ao\" }],\n [\"að auki\", { lemma: \"að auki\", isStopword: true, pos: \"ao\" }],\n [\"að vísu\", { lemma: \"að vís\", isStopword: true, pos: \"ao\" }],\n [\"að sjálfsögðu\", { lemma: \"að sjálfsagður\", isStopword: true, pos: \"ao\" }],\n [\"að minnsta kosti\", { lemma: \"að lítill kostur\", isStopword: true, pos: \"ao\" }],\n [\"að öllu leyti\", { lemma: \"að allur leyti\", isStopword: true, pos: \"ao\" }],\n [\"að nokkru leyti\", { lemma: \"að nokkur leyti\", isStopword: true, pos: \"ao\" }],\n [\"ef til vill\", { lemma: \"ef til vilja\", isStopword: true, pos: \"ao\" }],\n [\"einhvers staðar\", { lemma: \"einhver staður\", isStopword: true, pos: \"ao\" }],\n [\"einhvern veginn\", { lemma: \"einhver vegur\", isStopword: true, pos: \"ao\" }],\n [\"ekki síst\", { lemma: \"ekki síður\", isStopword: true, pos: \"ao\" }],\n [\"engu að síður\", { lemma: \"enginn að síður\", isStopword: true, pos: \"ao\" }],\n [\"fyrst og fremst\", { lemma: \"snemma og fremri\", isStopword: true, pos: \"ao\" }],\n [\"hins vegar\", { lemma: \"hinn vegur\", isStopword: true, pos: \"ao\" }],\n [\"hér og þar\", { lemma: \"hér og þar\", isStopword: true, pos: \"ao\" }],\n [\"hér um bil\", { lemma: \"hér um bil\", isStopword: true, pos: \"ao\" }],\n [\"hér á landi\", { lemma: \"hér á land\", isStopword: true, pos: \"ao\" }],\n [\"hvað mest\", { lemma: \"hvað mjög\", isStopword: true, pos: \"ao\" }],\n [\"hverju sinni\", { lemma: \"hver sinn\", isStopword: true, pos: \"ao\" }],\n [\"hvorki né\", { lemma: \"hvorki né\", isStopword: true, pos: \"ao\" }],\n [\"í burtu\", { lemma: \"í burtu\", isStopword: true, pos: \"ao\" }],\n [\"í gær\", { lemma: \"í gær\", isStopword: true, pos: \"ao\" }],\n [\"í senn\", { lemma: \"í senn\", isStopword: true, pos: \"ao\" }],\n [\"í sífellu\", { lemma: \"í sífella\", isStopword: true, pos: \"ao\" }],\n [\"lengi vel\", { lemma: \"lengi vel\", isStopword: true, pos: \"ao\" }],\n [\"meira að segja\", { lemma: \"mikill að segja\", isStopword: true, pos: \"ao\" }],\n [\"meira og minna\", { lemma: \"mikill og lítill\", isStopword: true, pos: \"ao\" }],\n [\"meðal annars\", { lemma: \"meðal annar\", isStopword: true, pos: \"ao\" }],\n [\"nokkurn veginn\", { lemma: \"nokkur vegur\", isStopword: true, pos: \"ao\" }],\n [\"og svo framvegis\", { lemma: \"og svo framvegis\", isStopword: true, pos: \"ao\" }],\n [\"satt að segja\", { lemma: \"sannur að segja\", isStopword: true, pos: \"ao\" }],\n [\"sem betur fer\", { lemma: \"sem vel fara\", isStopword: true, pos: \"ao\" }],\n [\"smám saman\", { lemma: \"smátt saman\", isStopword: true, pos: \"ao\" }],\n [\"svo sem\", { lemma: \"svo sem\", isStopword: true, pos: \"ao\" }],\n [\"sér í lagi\", { lemma: \"sér í lag\", isStopword: true, pos: \"ao\" }],\n [\"til og frá\", { lemma: \"til og frá\", isStopword: true, pos: \"ao\" }],\n [\"til baka\", { lemma: \"til baka\", isStopword: true, pos: \"ao\" }],\n [\"vítt og breitt\", { lemma: \"vítt og breitt\", isStopword: true, pos: \"ao\" }],\n [\"á ný\", { lemma: \"á ný\", isStopword: true, pos: \"ao\" }],\n [\"á meðan\", { lemma: \"á meðan\", isStopword: true, pos: \"ao\" }],\n [\"á sama tíma\", { lemma: \"á samur tími\", isStopword: true, pos: \"ao\" }],\n [\"á hinn bóginn\", { lemma: \"á hinn bógur\", isStopword: true, pos: \"ao\" }],\n [\"þar af leiðandi\", { lemma: \"þar af leiða\", isStopword: true, pos: \"ao\" }],\n [\"þar að auki\", { lemma: \"þar að auki\", isStopword: true, pos: \"ao\" }],\n [\"það er að segja\", { lemma: \"það vera að segja\", isStopword: true, pos: \"ao\" }],\n [\"þess vegna\", { lemma: \"það vegna\", isStopword: true, pos: \"ao\" }],\n [\"því miður\", { lemma: \"það lítt\", isStopword: true, pos: \"ao\" }],\n [\"þrátt fyrir\", { lemma: \"þrátt fyrir\", isStopword: true, pos: \"ao\" }],\n\n // Time expressions\n [\"á dögunum\", { lemma: \"á dagur\", isStopword: true, pos: \"ao\" }],\n [\"á sínum tíma\", { lemma: \"á sinn tími\", isStopword: true, pos: \"ao\" }],\n [\"á endanum\", { lemma: \"á endi\", isStopword: true, pos: \"ao\" }],\n [\"einu sinni\", { lemma: \"einn sinn\", isStopword: false, pos: \"ao\" }],\n [\"eitt sinn\", { lemma: \"einn sinn\", isStopword: false, pos: \"ao\" }],\n [\"í fyrsta sinn\", { lemma: \"í fyrstur sinn\", isStopword: false, pos: \"ao\" }],\n [\"í kvöld\", { lemma: \"í kvöld\", isStopword: false, pos: \"ao\" }],\n [\"í morgun\", { lemma: \"í morgunn\", isStopword: false, pos: \"ao\" }],\n [\"á morgun\", { lemma: \"á morgunn\", isStopword: false, pos: \"ao\" }],\n\n // Prepositional phrases (fs frasi)\n [\"fyrir hönd\", { lemma: \"fyrir hönd\", isStopword: false, pos: \"fs\" }],\n [\"með tilliti til\", { lemma: \"með tillit til\", isStopword: false, pos: \"fs\" }],\n [\"í ljósi\", { lemma: \"í ljós\", isStopword: false, pos: \"fs\" }],\n [\"í stað\", { lemma: \"í staður\", isStopword: false, pos: \"fs\" }],\n [\"fyrir aftan\", { lemma: \"fyrir aftan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir austan\", { lemma: \"fyrir austan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir framan\", { lemma: \"fyrir framan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir handan\", { lemma: \"fyrir handan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir innan\", { lemma: \"fyrir innan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir neðan\", { lemma: \"fyrir neðan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir norðan\", { lemma: \"fyrir norðan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir ofan\", { lemma: \"fyrir ofan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir sunnan\", { lemma: \"fyrir sunnan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir utan\", { lemma: \"fyrir utan\", isStopword: false, pos: \"fs\" }],\n [\"fyrir vestan\", { lemma: \"fyrir vestan\", isStopword: false, pos: \"fs\" }],\n [\"í gegnum\", { lemma: \"í gegnum\", isStopword: false, pos: \"fs\" }],\n [\"í kringum\", { lemma: \"í kringum\", isStopword: false, pos: \"fs\" }],\n [\"innan við\", { lemma: \"innan við\", isStopword: false, pos: \"fs\" }],\n [\"upp úr\", { lemma: \"upp úr\", isStopword: false, pos: \"fs\" }],\n [\"þvert á\", { lemma: \"þvert á\", isStopword: false, pos: \"fs\" }],\n\n // Conjunction-like phrases (st frasi)\n [\"þar eð\", { lemma: \"þar eð\", isStopword: true, pos: \"st\" }],\n\n // Named entities - organizations/institutions (NOT stopwords)\n [\"sameinuðu þjóðirnar\", { lemma: \"Sameinuðu þjóðirnar\", isStopword: false, pos: \"entity\" }],\n [\"evrópusambandið\", { lemma: \"Evrópusambandið\", isStopword: false, pos: \"entity\" }],\n [\"nato\", { lemma: \"NATO\", isStopword: false, pos: \"entity\" }],\n [\"nató\", { lemma: \"NATO\", isStopword: false, pos: \"entity\" }],\n]);\n\n/**\n * Check if a phrase starting at the given position exists.\n * Returns the phrase info and length if found, null otherwise.\n */\nexport function matchPhrase(\n words: string[],\n startIndex: number\n): { phrase: StaticPhrase; wordCount: number } | null {\n // Try longest matches first (up to 4 words)\n for (let len = Math.min(4, words.length - startIndex); len >= 2; len--) {\n const phraseWords = words.slice(startIndex, startIndex + len);\n const phraseKey = phraseWords.join(\" \").toLowerCase();\n const phrase = STATIC_PHRASES.get(phraseKey);\n if (phrase) {\n return { phrase, wordCount: len };\n }\n }\n return null;\n}\n\n/**\n * Check if a normalized string is a known phrase.\n */\nexport function isKnownPhrase(text: string): boolean {\n return STATIC_PHRASES.has(text.toLowerCase());\n}\n\n/**\n * Get phrase info for a normalized string.\n */\nexport function getPhraseInfo(text: string): StaticPhrase | undefined {\n return STATIC_PHRASES.get(text.toLowerCase());\n}\n","/**\n * Unified text processing pipeline integrating tokenize-is with lemmatization.\n *\n * Provides proper tokenization that handles Icelandic-specific patterns\n * (abbreviations, dates, times, etc.) before lemmatization.\n */\n\nimport { tokenize, type Token } from \"tokenize-is\";\nimport { Disambiguator, type DisambiguatedToken } from \"./disambiguate.js\";\nimport { CompoundSplitter, type CompoundSplit } from \"./compounds.js\";\nimport { STOPWORDS_IS, isContextualStopword } from \"./stopwords.js\";\nimport type { LemmatizerLike, BigramProvider } from \"./types.js\";\n\n/**\n * Token kinds that should be lemmatized.\n */\nconst LEMMATIZABLE_KINDS = new Set([\"word\"]);\n\n/**\n * Token kinds that represent named entities (skip lemmatization).\n */\nconst ENTITY_KINDS = new Set([\"person\", \"company\", \"entity\"]);\n\n/**\n * Token kinds to skip entirely (not useful for indexing).\n */\nconst SKIP_KINDS = new Set([\n \"punctuation\",\n \"s_begin\",\n \"s_end\",\n \"s_split\",\n \"unknown\",\n]);\n\nconst UNKNOWN_SUFFIXES = [\n \"arinnar\",\n \"anna\",\n \"unum\",\n \"um\",\n \"ir\",\n \"ar\",\n \"ur\",\n \"a\",\n \"i\",\n \"ið\",\n \"inn\",\n \"in\",\n];\n\nconst MIN_UNKNOWN_WORD_LENGTH = 6;\nconst MIN_STRIPPED_LENGTH = 3;\nconst MAX_SUFFIX_STRIPS = 2;\n\n/**\n * A processed token with lemmatization results.\n */\nexport interface ProcessedToken {\n /** Original token text */\n original: string;\n /** Token kind from tokenize-is */\n kind: string;\n /** Candidate lemmas (for word tokens) */\n lemmas: string[];\n /** Is this a named entity? */\n isEntity: boolean;\n /** Best lemma guess after disambiguation */\n disambiguated?: string;\n /** Disambiguation confidence (0-1) */\n confidence?: number;\n /** Compound split result if applicable */\n compoundSplit?: CompoundSplit;\n /** Lemmas derived from compound parts (if any) */\n compoundLemmas?: string[];\n}\n\n/**\n * Options for text processing.\n */\nexport interface ProcessOptions {\n /** Bigram provider for disambiguation */\n bigrams?: BigramProvider;\n /** Compound splitter for compound word detection */\n compoundSplitter?: CompoundSplitter;\n /** Remove stopwords from results */\n removeStopwords?: boolean;\n /**\n * Use contextual stopword detection (requires POS info).\n * When true, words like \"á\" are only filtered as stopwords when used\n * as prepositions, not when used as verbs (\"eiga\") or nouns (river).\n * Default: false (use simple stopword list)\n */\n useContextualStopwords?: boolean;\n /** Include numbers in results */\n includeNumbers?: boolean;\n /**\n * Index all candidate lemmas, not just the disambiguated one.\n * Better recall for search (finds more matches), worse precision.\n * Set to false if you only want the most likely lemma.\n * Default: true\n */\n indexAllCandidates?: boolean;\n /**\n * Try compound splitting even for known words.\n * Useful when BÍN contains the compound but you still want parts indexed.\n * Set to false to only split unknown words.\n * Default: true\n */\n alwaysTryCompounds?: boolean;\n}\n\n/**\n * Process text through the full pipeline.\n *\n * @param text - Input text\n * @param lemmatizer - Lemmatizer instance\n * @param options - Processing options\n * @returns Array of processed tokens\n */\nexport function processText(\n text: string,\n lemmatizer: LemmatizerLike,\n options: ProcessOptions = {}\n): ProcessedToken[] {\n const {\n bigrams,\n compoundSplitter,\n includeNumbers = false,\n alwaysTryCompounds = true,\n } = options;\n\n // Step 1: Tokenize\n const tokens = tokenize(text);\n\n // Step 2: Process each token\n const results: ProcessedToken[] = [];\n const wordTokens: { index: number; token: Token }[] = [];\n const lemmaCache = new Map<string, string[]>();\n const allowSuffixFallback =\n \"bigramCountValue\" in lemmatizer\n ? (lemmatizer as { bigramCountValue?: number }).bigramCountValue === 0\n : false;\n\n const isUnknownLemma = (raw: string, lemmas: string[]): boolean =>\n lemmas.length === 1 && lemmas[0] === raw.toLowerCase();\n\n const trySuffixFallback = (raw: string): string[] | null => {\n let current = raw;\n let strippedCandidate: string | null = null;\n\n for (let attempt = 0; attempt < MAX_SUFFIX_STRIPS; attempt++) {\n const lower = current.toLowerCase();\n strippedCandidate = null;\n\n for (const suffix of UNKNOWN_SUFFIXES) {\n if (!lower.endsWith(suffix)) continue;\n\n const next = current.slice(0, current.length - suffix.length);\n if (next.length < MIN_STRIPPED_LENGTH) continue;\n\n const nextLemmas = lemmatizer.lemmatize(next);\n if (!isUnknownLemma(next, nextLemmas)) {\n return nextLemmas;\n }\n\n if (!strippedCandidate) {\n strippedCandidate = next;\n }\n }\n\n if (!strippedCandidate || strippedCandidate.length < MIN_UNKNOWN_WORD_LENGTH) {\n break;\n }\n\n current = strippedCandidate;\n }\n\n return null;\n };\n\n const getLemmas = (raw: string): string[] => {\n const key = raw.toLowerCase();\n const cached = lemmaCache.get(key);\n if (cached) return cached;\n const lemmas = lemmatizer.lemmatize(raw);\n if (\n allowSuffixFallback &&\n isUnknownLemma(raw, lemmas) &&\n raw.length >= MIN_UNKNOWN_WORD_LENGTH\n ) {\n const fallbackLemmas = trySuffixFallback(raw);\n if (fallbackLemmas) {\n lemmaCache.set(key, fallbackLemmas);\n return fallbackLemmas;\n }\n }\n lemmaCache.set(key, lemmas);\n return lemmas;\n };\n\n for (let i = 0; i < tokens.length; i++) {\n const token = tokens[i];\n\n // Skip unwanted tokens\n if (SKIP_KINDS.has(token.kind)) {\n continue;\n }\n\n // Handle named entities\n if (ENTITY_KINDS.has(token.kind)) {\n results.push({\n original: token.text ?? \"\",\n kind: token.kind,\n lemmas: [],\n isEntity: true,\n });\n continue;\n }\n\n // Handle numbers if requested\n if (token.kind === \"number\" || token.kind === \"ordinal\") {\n if (includeNumbers) {\n results.push({\n original: token.text ?? \"\",\n kind: token.kind,\n lemmas: [],\n isEntity: false,\n });\n }\n continue;\n }\n\n // Handle word tokens\n if (LEMMATIZABLE_KINDS.has(token.kind)) {\n const tokenText = token.text ?? \"\";\n const lemmas = getLemmas(tokenText);\n\n const processed: ProcessedToken = {\n original: tokenText,\n kind: token.kind,\n lemmas,\n isEntity: false,\n };\n\n // Try compound splitting\n // - Always if alwaysTryCompounds is set (for better search recall)\n // - Otherwise only if lemmatization returns unknown word\n const isUnknownWord = lemmas.length === 1 && lemmas[0] === tokenText.toLowerCase();\n if (compoundSplitter && (alwaysTryCompounds || isUnknownWord)) {\n const split = compoundSplitter.split(tokenText);\n if (split.isCompound) {\n processed.compoundSplit = split;\n // Add component lemmas from parts (in addition to direct lemmas)\n const partLemmas = split.parts.flatMap((c) => getLemmas(c));\n processed.compoundLemmas = partLemmas;\n processed.lemmas = [...new Set([...lemmas, ...partLemmas])];\n }\n }\n\n results.push(processed);\n wordTokens.push({ index: results.length - 1, token });\n continue;\n }\n\n // Pass through other tokens (time, date, url, etc.)\n results.push({\n original: token.text ?? \"\",\n kind: token.kind,\n lemmas: [],\n isEntity: false,\n });\n }\n\n // Step 3: Disambiguate if we have bigram data\n if (bigrams && wordTokens.length > 0) {\n const disambiguator = new Disambiguator(lemmatizer, bigrams);\n\n for (let i = 0; i < wordTokens.length; i++) {\n const { index, token } = wordTokens[i];\n const prevToken = i > 0 ? wordTokens[i - 1].token : null;\n const nextToken = i < wordTokens.length - 1 ? wordTokens[i + 1].token : null;\n\n const result = disambiguator.disambiguate(\n token.text ?? \"\",\n prevToken?.text ?? null,\n nextToken?.text ?? null,\n {\n prevLemmas: prevToken?.text ? getLemmas(prevToken.text) : undefined,\n nextLemmas: nextToken?.text ? getLemmas(nextToken.text) : undefined,\n }\n );\n\n results[index].disambiguated = result.lemma;\n results[index].confidence = result.confidence;\n }\n } else {\n // No disambiguation - use first lemma\n for (const { index } of wordTokens) {\n const processed = results[index];\n if (processed.lemmas.length > 0) {\n processed.disambiguated = processed.lemmas[0];\n processed.confidence = processed.lemmas.length === 1 ? 1.0 : 0.5;\n }\n }\n }\n\n return results;\n}\n\n/**\n * Extract unique indexable lemmas from text.\n *\n * @param text - Input text\n * @param lemmatizer - Lemmatizer instance\n * @param options - Processing options\n * @returns Set of unique lemmas suitable for search indexing\n */\nexport function extractIndexableLemmas(\n text: string,\n lemmatizer: LemmatizerLike,\n options: ProcessOptions = {}\n): Set<string> {\n const {\n removeStopwords = false,\n indexAllCandidates = true,\n useContextualStopwords = false,\n } = options;\n\n const processed = processText(text, lemmatizer, options);\n const lemmas = new Set<string>();\n\n /**\n * Check if a lemma should be filtered as a stopword.\n * Uses contextual rules when enabled and POS is available.\n */\n const shouldFilter = (lemma: string, pos?: string): boolean => {\n if (!removeStopwords) return false;\n if (useContextualStopwords) {\n return isContextualStopword(lemma, pos);\n }\n return STOPWORDS_IS.has(lemma);\n };\n\n for (const token of processed) {\n // Skip entities\n if (token.isEntity) {\n continue;\n }\n\n if (indexAllCandidates) {\n // Index ALL candidate lemmas for better search recall\n for (const lemma of token.lemmas) {\n if (!shouldFilter(lemma)) {\n lemmas.add(lemma);\n }\n }\n } else {\n // Use disambiguated lemma if available (better precision)\n if (token.disambiguated) {\n // Note: We don't have POS info easily available in disambiguated result\n // This would need enhancement to pass through POS from disambiguation\n if (!shouldFilter(token.disambiguated)) {\n lemmas.add(token.disambiguated);\n }\n }\n }\n\n // Also add compound parts if split\n if (token.compoundSplit?.isCompound) {\n const partLemmas = token.compoundLemmas\n ? token.compoundLemmas\n : token.compoundSplit.parts.flatMap((p) => lemmatizer.lemmatize(p));\n for (const lemma of partLemmas) {\n if (!shouldFilter(lemma)) {\n lemmas.add(lemma);\n }\n }\n }\n }\n\n return lemmas;\n}\n\n/**\n * Options for building a backend-agnostic boolean search query.\n */\nexport interface SearchQueryOptions extends ProcessOptions {\n /** Operator between token groups (AND). Default: \" & \" */\n andOperator?: string;\n /** Operator between candidate lemmas within a group (OR). Default: \" | \" */\n orOperator?: string;\n /** Wrap groups with multiple terms in parentheses. Default: true */\n wrapGroups?: boolean;\n /**\n * Include the original token (lowercased) in each group for recall.\n * Useful for unknown words or when you want a fallback.\n * Default: false\n */\n includeOriginal?: boolean;\n /** Lowercase original tokens when includeOriginal is true. Default: true */\n lowercaseOriginal?: boolean;\n}\n\n/**\n * Result for a backend-agnostic boolean search query.\n */\nexport interface SearchQueryResult {\n /** Lemma groups per token (OR within group, AND between groups) */\n groups: string[][];\n /** Boolean query string using provided operators */\n query: string;\n}\n\n/**\n * Build a backend-agnostic boolean query string from user input.\n *\n * Use the same lemmatization pipeline as indexing, then:\n * - OR within a token's candidate lemmas\n * - AND across tokens\n *\n * @param text - User search input\n * @param lemmatizer - Lemmatizer instance\n * @param options - Query + processing options\n */\nexport function buildSearchQuery(\n text: string,\n lemmatizer: LemmatizerLike,\n options: SearchQueryOptions = {}\n): SearchQueryResult {\n const {\n removeStopwords = false,\n indexAllCandidates = true,\n useContextualStopwords = false,\n andOperator = \" & \",\n orOperator = \" | \",\n wrapGroups = true,\n includeOriginal = false,\n lowercaseOriginal = true,\n } = options;\n\n const processed = processText(text, lemmatizer, options);\n const groups: string[][] = [];\n\n /**\n * Check if a lemma should be filtered as a stopword.\n * Uses contextual rules when enabled and POS is available.\n */\n const shouldFilter = (lemma: string, pos?: string): boolean => {\n if (!removeStopwords) return false;\n if (useContextualStopwords) {\n return isContextualStopword(lemma, pos);\n }\n return STOPWORDS_IS.has(lemma);\n };\n\n for (const token of processed) {\n // Mirror indexing behavior: skip entities\n if (token.isEntity) continue;\n\n let candidates: string[] = [];\n if (indexAllCandidates) {\n candidates = token.lemmas;\n } else if (token.disambiguated) {\n candidates = [token.disambiguated];\n }\n\n if (includeOriginal) {\n const raw = token.original ?? \"\";\n if (raw.length > 0) {\n const original = lowercaseOriginal ? raw.toLowerCase() : raw;\n candidates = [...candidates, original];\n }\n }\n\n const unique = [\n ...new Set(candidates.filter((lemma) => lemma && !shouldFilter(lemma))),\n ];\n\n if (unique.length > 0) {\n groups.push(unique);\n }\n }\n\n const query = groups\n .map((group) => {\n const joined = group.join(orOperator);\n if (wrapGroups && group.length > 1) {\n return `(${joined})`;\n }\n return joined;\n })\n .filter((part) => part.length > 0)\n .join(andOperator);\n\n return { groups, query };\n}\n\n/**\n * Strategy for benchmark comparisons.\n */\nexport type ProcessingStrategy = \"naive\" | \"tokenized\" | \"disambiguated\" | \"full\";\n\n/**\n * Metrics from processing a text.\n */\nexport interface ProcessingMetrics {\n /** Total word count */\n wordCount: number;\n /** Words successfully lemmatized (not returned as-is) */\n lemmatizedCount: number;\n /** Coverage: lemmatized / total */\n coverage: number;\n /** Words with multiple candidate lemmas */\n ambiguousCount: number;\n /** Ambiguity rate: ambiguous / total */\n ambiguityRate: number;\n /** Average disambiguation confidence */\n avgConfidence: number;\n /** Compounds detected and split */\n compoundsFound: number;\n /** Named entities skipped */\n entitiesSkipped: number;\n /** Unique lemmas extracted */\n uniqueLemmas: number;\n /** Processing time in milliseconds */\n timeMs: number;\n}\n\n/**\n * Run benchmark with a specific strategy and collect metrics.\n */\nexport function runBenchmark(\n text: string,\n lemmatizer: LemmatizerLike,\n strategy: ProcessingStrategy,\n resources: {\n bigrams?: BigramProvider;\n compoundSplitter?: CompoundSplitter;\n } = {}\n): ProcessingMetrics {\n const start = performance.now();\n\n let processed: ProcessedToken[];\n let lemmas: Set<string>;\n\n switch (strategy) {\n case \"naive\": {\n // Simple whitespace split + lemmatize\n const tokens = text.split(/\\s+/).filter((t) => t.length > 0);\n const naiveProcessed: ProcessedToken[] = [];\n\n for (const token of tokens) {\n const cleaned = token.replace(/^[^\\p{L}\\p{N}]+|[^\\p{L}\\p{N}]+$/gu, \"\");\n if (cleaned) {\n const tokenLemmas = lemmatizer.lemmatize(cleaned);\n naiveProcessed.push({\n original: cleaned,\n kind: \"word\",\n lemmas: tokenLemmas,\n isEntity: false,\n disambiguated: tokenLemmas[0],\n confidence: tokenLemmas.length === 1 ? 1.0 : 0.5,\n });\n }\n }\n processed = naiveProcessed;\n lemmas = new Set(naiveProcessed.map((p) => p.disambiguated!).filter(Boolean));\n break;\n }\n\n case \"tokenized\": {\n // tokenize-is + lemmatize word tokens\n processed = processText(text, lemmatizer);\n lemmas = new Set(\n processed\n .filter((p) => p.kind === \"word\" && p.lemmas.length > 0)\n .map((p) => p.lemmas[0])\n );\n break;\n }\n\n case \"disambiguated\": {\n // tokenized + bigram disambiguation\n processed = processText(text, lemmatizer, {\n bigrams: resources.bigrams,\n });\n lemmas = extractIndexableLemmas(text, lemmatizer, {\n bigrams: resources.bigrams,\n });\n break;\n }\n\n case \"full\": {\n // disambiguated + compounds\n processed = processText(text, lemmatizer, {\n bigrams: resources.bigrams,\n compoundSplitter: resources.compoundSplitter,\n });\n lemmas = extractIndexableLemmas(text, lemmatizer, {\n bigrams: resources.bigrams,\n compoundSplitter: resources.compoundSplitter,\n });\n break;\n }\n }\n\n const timeMs = performance.now() - start;\n\n // Calculate metrics\n const wordTokens = processed.filter((p) => p.kind === \"word\");\n const wordCount = wordTokens.length;\n\n const lemmatizedCount = wordTokens.filter((p) => {\n // Considered lemmatized if not returned as-is\n return (\n p.lemmas.length > 0 &&\n !(p.lemmas.length === 1 && p.lemmas[0] === p.original.toLowerCase())\n );\n }).length;\n\n const ambiguousCount = wordTokens.filter((p) => p.lemmas.length > 1).length;\n\n const confidences = wordTokens\n .filter((p) => p.confidence !== undefined)\n .map((p) => p.confidence!);\n const avgConfidence =\n confidences.length > 0\n ? confidences.reduce((a, b) => a + b, 0) / confidences.length\n : 0;\n\n const compoundsFound = wordTokens.filter((p) => p.compoundSplit?.isCompound).length;\n const entitiesSkipped = processed.filter((p) => p.isEntity).length;\n\n return {\n wordCount,\n lemmatizedCount,\n coverage: wordCount > 0 ? lemmatizedCount / wordCount : 0,\n ambiguousCount,\n ambiguityRate: wordCount > 0 ? ambiguousCount / wordCount : 0,\n avgConfidence,\n compoundsFound,\n entitiesSkipped,\n uniqueLemmas: lemmas.size,\n timeMs,\n };\n}\n"],"mappings":"uCAUA,MAAa,EAAe,IAAI,IAAI,8rIAuEnC,CAAC,CAKF,SAAgB,EAAW,EAAuB,CAChD,OAAO,EAAa,IAAI,EAAK,aAAa,CAAC,CAa7C,MAAa,EAAiD,IAAI,IAAI,CAEpE,CAAC,IAAK,IAAI,IAAI,CAAC,KAAM,KAAK,CAAC,CAAC,CAE5B,CAAC,MAAO,IAAI,IAAI,CAAC,KAAM,KAAK,CAAC,CAAC,CAE9B,CAAC,KAAM,IAAI,IAAI,CAAC,KAAM,KAAK,CAAC,CAAC,CAE7B,CAAC,MAAO,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CAExB,CAAC,KAAM,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CAEvB,CAAC,MAAO,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CAExB,CAAC,OAAQ,IAAI,IAAI,CAAC,KAAM,KAAK,CAAC,CAAC,CAE/B,CAAC,QAAS,IAAI,IAAI,CAAC,KAAM,KAAK,CAAC,CAAC,CAEhC,CAAC,QAAS,IAAI,IAAI,CAAC,KAAM,KAAK,CAAC,CAAC,CAEhC,CAAC,QAAS,IAAI,IAAI,CAAC,KAAM,KAAK,CAAC,CAAC,CAEhC,CAAC,OAAQ,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CAEzB,CAAC,MAAO,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CAExB,CAAC,KAAM,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CAEvB,CAAC,IAAK,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CACvB,CAAC,CAYF,SAAgB,EAAqB,EAAe,EAAuB,CACzE,IAAM,EAAa,EAAM,aAAa,CAGhC,EAAc,EAAqB,IAAI,EAAW,CAOxD,OANI,GAAe,EAEV,EAAY,IAAI,EAAI,CAItB,EAAa,IAAI,EAAW,CAMrC,SAAgB,EAAkC,EAAiB,CACjE,OAAO,EAAM,OAAQ,GAAM,CAAC,EAAW,EAAE,CAAC,CCnI5C,MAAM,EAAQ,WAGR,EAA2B,CAC/B,KACA,KACA,KACA,KACA,KACA,KACA,KACA,KACA,KACA,KACD,CAIK,EAAgD,CACpD,IAAA,GACA,KACA,KACA,MACA,KACD,CAIK,EAAoD,CACxD,IAAA,GACA,KACA,MACA,KACD,CAIK,EAAoD,CACxD,KACA,KACD,CAUD,IAAa,EAAb,MAAa,CAA2D,CACtE,OACA,WACA,aACA,aACA,YACA,YACA,aACA,QACA,gBACA,gBACA,gBACA,gBACA,YAEA,WACA,UACA,WACA,YACA,QAEA,QAAkB,IAAI,YAAY,QAAQ,CAE1C,YAAoB,EAAqB,CACvC,KAAK,OAAS,EACd,IAAM,EAAO,IAAI,SAAS,EAAO,CAG3B,EAAQ,EAAK,UAAU,EAAG,GAAK,CACrC,GAAI,IAAU,EACZ,MAAU,MACR,2CAA2C,EAAM,SAAS,GAAG,CAAC,UAAU,EAAM,SAAS,GAAG,GAC3F,CAIH,GADA,KAAK,QAAU,EAAK,UAAU,EAAG,GAAK,CAClC,KAAK,UAAY,GAAK,KAAK,UAAY,EACzC,MAAU,MAAM,wBAAwB,KAAK,UAAU,CAGzD,IAAM,EAAiB,EAAK,UAAU,EAAG,GAAK,CAC9C,KAAK,WAAa,EAAK,UAAU,GAAI,GAAK,CAC1C,KAAK,UAAY,EAAK,UAAU,GAAI,GAAK,CACzC,KAAK,WAAa,EAAK,UAAU,GAAI,GAAK,CAC1C,KAAK,YAAc,EAAK,UAAU,GAAI,GAAK,CAI3C,IAAI,EAAS,GAGb,KAAK,WAAa,IAAI,WAAW,EAAQ,EAAQ,EAAe,CAChE,GAAU,EAGV,KAAK,aAAe,IAAI,YAAY,EAAQ,EAAQ,KAAK,WAAW,CACpE,GAAU,KAAK,WAAa,EAG5B,KAAK,aAAe,IAAI,WAAW,EAAQ,EAAQ,KAAK,WAAW,CACnE,GAAU,KAAK,WAEf,EAAU,EAAS,EAAK,GAGxB,KAAK,YAAc,IAAI,YAAY,EAAQ,EAAQ,KAAK,UAAU,CAClE,GAAU,KAAK,UAAY,EAG3B,KAAK,YAAc,IAAI,WAAW,EAAQ,EAAQ,KAAK,UAAU,CACjE,GAAU,KAAK,UAEf,EAAU,EAAS,EAAK,GAGxB,KAAK,aAAe,IAAI,YAAY,EAAQ,EAAQ,KAAK,UAAY,EAAE,CACvE,IAAW,KAAK,UAAY,GAAK,EAGjC,KAAK,QAAU,IAAI,YAAY,EAAQ,EAAQ,KAAK,WAAW,CAC/D,GAAU,KAAK,WAAa,EAG5B,KAAK,gBAAkB,IAAI,YAAY,EAAQ,EAAQ,KAAK,YAAY,CACxE,GAAU,KAAK,YAAc,EAG7B,KAAK,gBAAkB,IAAI,WAAW,EAAQ,EAAQ,KAAK,YAAY,CACvE,GAAU,KAAK,YAEf,EAAU,EAAS,EAAK,GAGxB,KAAK,gBAAkB,IAAI,YAAY,EAAQ,EAAQ,KAAK,YAAY,CACxE,GAAU,KAAK,YAAc,EAG7B,KAAK,gBAAkB,IAAI,WAAW,EAAQ,EAAQ,KAAK,YAAY,CACvE,GAAU,KAAK,YAEf,EAAU,EAAS,EAAK,GAGxB,KAAK,YAAc,IAAI,YAAY,EAAQ,EAAQ,KAAK,YAAY,CAMtE,aAAa,KACX,EACA,EAAmC,EAAE,CACV,CAE3B,IAAM,EAAW,MADD,EAAQ,OAAS,OACF,EAAI,CAEnC,GAAI,CAAC,EAAS,GACZ,MAAU,MAAM,+BAA+B,EAAS,SAAS,CAInE,OAAO,IAAI,EADI,MAAM,EAAS,aAAa,CACR,CAMrC,OAAO,eAAe,EAAuC,CAC3D,OAAO,IAAI,EAAiB,EAAO,CAMrC,UAAkB,EAAgB,EAAwB,CACxD,OAAO,KAAK,QAAQ,OAAO,KAAK,WAAW,SAAS,EAAQ,EAAS,EAAO,CAAC,CAM/E,SAAiB,EAAuB,CACtC,OAAO,KAAK,UAAU,KAAK,aAAa,GAAQ,KAAK,aAAa,GAAO,CAM3E,QAAgB,EAAuB,CACrC,OAAO,KAAK,UAAU,KAAK,YAAY,GAAQ,KAAK,YAAY,GAAO,CAOzE,SAAiB,EAAsB,CACrC,IAAI,EAAO,EACP,EAAQ,KAAK,UAAY,EAE7B,KAAO,GAAQ,GAAO,CACpB,IAAM,EAAO,EAAO,IAAW,EACzB,EAAU,KAAK,QAAQ,EAAI,CAEjC,GAAI,IAAY,EACd,OAAO,EAEL,EAAU,EACZ,EAAO,EAAM,EAEb,EAAQ,EAAM,EAIlB,MAAO,GAQT,UAAU,EAAc,EAAkC,EAAE,CAAY,CACtE,IAAM,EAAa,EAAK,aAAa,CAC/B,EAAM,KAAK,SAAS,EAAW,CAErC,GAAI,IAAQ,GACV,MAAO,CAAC,EAAW,CAGrB,IAAM,EAAQ,KAAK,aAAa,GAC1B,EAAM,KAAK,aAAa,EAAM,GAE9B,CAAE,aAAc,EAChB,EAAO,IAAI,IACX,EAAmB,EAAE,CAE3B,IAAK,IAAI,EAAI,EAAO,EAAI,EAAK,IAAK,CAChC,GAAM,CAAE,WAAU,WAAY,KAAK,YAAY,KAAK,QAAQ,GAAG,CACzD,EAAM,EAAY,GAExB,GAAI,GAAa,IAAQ,EACvB,SAGF,IAAM,EAAQ,KAAK,SAAS,EAAS,CAChC,EAAK,IAAI,EAAM,GAClB,EAAK,IAAI,EAAM,CACf,EAAO,KAAK,EAAM,EAQtB,OAJI,EAAO,SAAW,EACb,CAAC,EAAW,CAGd,EAQT,YAAoB,EAMlB,CAWA,OAVI,KAAK,UAAY,EACZ,CACL,SAAU,IAAU,EACpB,QAAS,EAAQ,GACjB,SAAU,EACV,WAAY,EACZ,WAAY,EACb,CAGI,CACL,SAAU,IAAU,GACpB,QAAS,EAAQ,GACjB,SAAW,IAAU,EAAK,EAC1B,WAAa,IAAU,EAAK,EAC5B,WAAa,IAAU,EAAK,EAC7B,CAOH,iBAAiB,EAA8B,CAC7C,IAAM,EAAa,EAAK,aAAa,CAC/B,EAAM,KAAK,SAAS,EAAW,CAErC,GAAI,IAAQ,GACV,MAAO,EAAE,CAGX,IAAM,EAAQ,KAAK,aAAa,GAC1B,EAAM,KAAK,aAAa,EAAM,GAC9B,EAAO,IAAI,IACX,EAAyB,EAAE,CAEjC,IAAK,IAAI,EAAI,EAAO,EAAI,EAAK,IAAK,CAChC,GAAM,CAAE,WAAU,WAAY,KAAK,YAAY,KAAK,QAAQ,GAAG,CACzD,EAAQ,KAAK,SAAS,EAAS,CAC/B,EAAM,EAAY,IAAa,GAC/B,EAAM,GAAG,EAAM,GAAG,IAEnB,EAAK,IAAI,EAAI,GAChB,EAAK,IAAI,EAAI,CACb,EAAO,KAAK,CAAE,QAAO,MAAK,CAAC,EAI/B,OAAO,EAOT,mBAAmB,EAAgC,CACjD,IAAM,EAAa,EAAK,aAAa,CAC/B,EAAM,KAAK,SAAS,EAAW,CAErC,GAAI,IAAQ,GACV,MAAO,EAAE,CAGX,IAAM,EAAQ,KAAK,aAAa,GAC1B,EAAM,KAAK,aAAa,EAAM,GAC9B,EAA2B,EAAE,CAEnC,IAAK,IAAI,EAAI,EAAO,EAAI,EAAK,IAAK,CAChC,GAAM,CAAE,WAAU,UAAS,WAAU,aAAY,cAC/C,KAAK,YAAY,KAAK,QAAQ,GAAG,CAE7B,EAAuB,EAAE,CACzB,EAAU,EAAa,GACvB,EAAY,EAAe,GAC3B,EAAY,EAAe,GAE7B,IAAS,EAAM,KAAO,GACtB,IAAW,EAAM,OAAS,GAC1B,IAAW,EAAM,OAAS,GAE9B,EAAO,KAAK,CACV,MAAO,KAAK,SAAS,EAAS,CAC9B,IAAK,EAAY,IAAa,GAC9B,MAAO,OAAO,KAAK,EAAM,CAAC,OAAS,EAAI,EAAQ,IAAA,GAChD,CAAC,CAGJ,OAAO,EAMT,kBAA4B,CAC1B,OAAO,KAAK,SAAW,EAMzB,YAAqB,CACnB,OAAO,KAAK,QAMd,WAAmB,EAAe,EAAuB,CACvD,IAAI,EAAO,EACP,EAAQ,KAAK,YAAc,EAE/B,KAAO,GAAQ,GAAO,CACpB,IAAM,EAAO,EAAO,IAAW,EACzB,EAAQ,KAAK,UACjB,KAAK,gBAAgB,GACrB,KAAK,gBAAgB,GACtB,CAED,GAAI,EAAQ,EACV,EAAO,EAAM,UACJ,EAAQ,EACjB,EAAQ,EAAM,MACT,CAEL,IAAM,EAAQ,KAAK,UACjB,KAAK,gBAAgB,GACrB,KAAK,gBAAgB,GACtB,CAED,GAAI,IAAU,EACZ,OAAO,EAEL,EAAQ,EACV,EAAO,EAAM,EAEb,EAAQ,EAAM,GAKpB,MAAO,GAOT,WAAW,EAAe,EAAuB,CAC/C,IAAM,EAAM,KAAK,WAAW,EAAM,aAAa,CAAE,EAAM,aAAa,CAAC,CACrE,OAAO,IAAQ,GAAK,EAAI,KAAK,YAAY,GAO3C,KAAK,EAAe,EAAuB,CACzC,OAAO,KAAK,WAAW,EAAO,EAAM,CAMtC,QAAQ,EAAuB,CAC7B,OAAO,KAAK,SAAS,EAAK,aAAa,CAAC,GAAK,GAM/C,IAAI,iBAA0B,CAC5B,OAAO,KAAK,WAMd,IAAI,eAAwB,CAC1B,OAAO,KAAK,UAMd,IAAI,kBAA2B,CAC7B,OAAO,KAAK,YAMd,IAAI,YAAqB,CACvB,OAAO,KAAK,OAAO,WAOrB,cAAyB,CACvB,IAAM,EAAmB,EAAE,CAC3B,IAAK,IAAI,EAAI,EAAG,EAAI,KAAK,WAAY,IACnC,EAAO,KAAK,KAAK,SAAS,EAAE,CAAC,CAE/B,OAAO,IC5dX,MAAa,EAA6C,CAKxD,CACE,KAAM,IACN,OAAQ,KACR,KAAM,KACN,QAAS,gBACT,YAAa,iDACd,CACD,CACE,KAAM,IACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,uCACd,CAKD,CACE,KAAM,MACN,OAAQ,KACR,KAAM,KACN,QAAS,iBACT,YAAa,uCACd,CACD,CACE,KAAM,MACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,wCACd,CAGD,CACE,KAAM,KACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,yCACd,CAGD,CACE,KAAM,MACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,qCACd,CAGD,CACE,KAAM,KACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,8CACd,CAGD,CACE,KAAM,OACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,wCACd,CAGD,CACE,KAAM,QACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,0CACd,CAGD,CACE,KAAM,QACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,+CACd,CAGD,CACE,KAAM,QACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,0CACd,CAGD,CACE,KAAM,MACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,uCACd,CAGD,CACE,KAAM,MACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,uCACd,CAGD,CACE,KAAM,IACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,mCACd,CAGD,CACE,KAAM,KACN,OAAQ,KACR,KAAM,KACN,QAAS,cACT,YAAa,wCACd,CACF,CAKD,SAAgB,EAAgB,EAAoC,CAClE,IAAM,EAAa,EAAK,aAAa,CACrC,OAAO,EAAqB,OAAQ,GAAM,EAAE,OAAS,EAAW,CAMlE,SAAgB,EAAuB,EAAuB,CAC5D,OAAO,EAAqB,KAAM,GAAM,EAAE,OAAS,EAAK,aAAa,CAAC,CCzJxE,MAAa,EAAuD,IAAI,IAAkC,CAExG,CAAC,IAAK,IAAI,IAAqB,CAAC,KAAM,MAAM,CAAC,CAAC,CAC9C,CAAC,IAAK,IAAI,IAAqB,CAAC,KAAM,MAAM,CAAC,CAAC,CAC9C,CAAC,MAAO,IAAI,IAAqB,CAAC,KAAM,MAAM,CAAC,CAAC,CAChD,CAAC,MAAO,IAAI,IAAqB,CAAC,KAAM,MAAM,CAAC,CAAC,CAChD,CAAC,QAAS,IAAI,IAAqB,CAAC,KAAM,MAAM,CAAC,CAAC,CAClD,CAAC,OAAQ,IAAI,IAAqB,CAAC,KAAM,MAAM,CAAC,CAAC,CACjD,CAAC,QAAS,IAAI,IAAqB,CAAC,KAAM,MAAM,CAAC,CAAC,CAGlD,CAAC,KAAM,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CACxC,CAAC,SAAU,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAC5C,CAAC,UAAW,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAC7C,CAAC,YAAa,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAG/C,CAAC,KAAM,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CACzC,CAAC,MAAO,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CAC1C,CAAC,MAAO,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CAC1C,CAAC,KAAM,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CACzC,CAAC,KAAM,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CACzC,CAAC,OAAQ,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CAC3C,CAAC,SAAU,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CAC7C,CAAC,OAAQ,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CAC3C,CAAC,WAAY,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CAC/C,CAAC,QAAS,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CAC5C,CAAC,QAAS,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAG3C,CAAC,MAAO,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CACzC,CAAC,KAAM,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CACxC,CAAC,QAAS,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAC3C,CAAC,QAAS,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAC3C,CAAC,OAAQ,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAC1C,CAAC,QAAS,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAC3C,CAAC,UAAW,IAAI,IAAqB,CAAC,MAAM,CAAC,CAAC,CAC9C,CAAC,QAAS,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAC3C,CAAC,MAAO,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CACzC,CAAC,SAAU,IAAI,IAAqB,CAAC,KAAK,CAAC,CAAC,CAC7C,CAAC,CAOW,EAAsB,IAAI,IAAI,CACzC,KACA,KACA,OACA,MACA,MACA,MACA,MACA,OACA,MACA,MACD,CAAC,CAuBF,SAAgB,EACd,EACA,EACS,CAGT,OAFK,EACS,EAAkB,IAAI,EAAU,EAChC,IAAI,EAAa,EAAI,GAFT,GAe5B,SAAgB,EACd,EACA,EACyB,CAEzB,IAAM,EAAiB,EAAW,OAAQ,GAAM,EAAE,MAAQ,KAAK,CAC/D,GAAI,EAAe,SAAW,EAAG,OAAO,KAGxC,IAAK,IAAM,KAAQ,EACjB,IAAK,IAAM,KAAY,EACrB,GAAI,EAAS,OAAO,MAAQ,EAAc,EAAK,MAAO,EAAS,MAAM,KAAK,CACxE,MAAO,CACL,MAAO,EAAK,MACZ,IAAK,KACL,KAAM,QAAQ,EAAS,MAAM,OAC7B,WAAY,GACb,CAKP,OAAO,KAaT,SAAgB,EACd,EACA,EACyB,CACzB,GAAI,CAAC,EAAU,OAAO,KAEtB,IAAM,EAAY,EAAS,aAAa,CACxC,GAAI,CAAC,EAAoB,IAAI,EAAU,CAAE,OAAO,KAGhD,IAAM,EAAiB,EAAW,OAAQ,GAAM,EAAE,MAAQ,KAAK,CAW/D,OAVI,EAAe,SAAW,GAI1B,CADe,EAAW,KAAM,GAAM,EAAE,MAAQ,KAAK,CACjC,KAMjB,CACL,OAJoB,EAAe,KAAM,GAAM,EAAE,QAAU,OAAO,EAC7B,EAAe,IAG/B,MACrB,IAAK,KACL,KAAM,eACN,WAAY,IACb,CAsBH,SAAgB,EACd,EACA,EACA,EACyB,CACzB,GAAI,CAAC,GAAY,CAAC,GAAY,iBAAkB,OAAO,KAGvD,IAAM,EAAa,EAAW,iBAAiB,EAAS,CAClD,EAAgB,EAAW,KAAM,GAAM,EAAE,MAAQ,KAAK,CAC5D,GAAI,CAAC,EAAe,OAAO,KAG3B,IAAM,EAAoB,EAAW,KAAM,GAAM,EAAE,MAAQ,KAAK,CAG1D,EAAmB,EAAW,KAAM,GAAM,EAAE,MAAQ,KAAK,CAI/D,GAAI,GAAqB,EACvB,OAAO,KAIT,IAAM,EAAgB,EAAkB,IAAI,EAAc,MAAM,CAChE,GAAI,CAAC,EAAe,OAAO,KAG3B,IAAM,EAAiB,EAAW,OAAQ,GAAM,EAAE,MAAQ,KAAK,CAC/D,IAAK,IAAM,KAAQ,EACjB,GAAI,EAAK,OAAO,MAAQ,EAAc,IAAI,EAAK,MAAM,KAAK,CACxD,MAAO,CACL,MAAO,EAAK,MACZ,IAAK,KACL,KAAM,mBAAmB,EAAK,MAAM,OACpC,WAAY,GACb,CAIL,OAAO,KAiBT,SAAgB,EACd,EACA,EACA,EACA,EAA2C,KAClB,CAazB,OAXiB,EAAqB,EAAY,EAAc,EAItC,EAA8B,EAAY,EAAU,EAAW,EAIxE,EAAqB,EAAY,EAAS,EAGpD,KAMT,SAAgB,EAAmB,EAAwB,CACzD,OAAO,EAAkB,IAAI,EAAM,CAMrC,SAAgB,EAAiB,EAAqD,CACpF,OAAO,EAAkB,IAAI,EAAU,CClNzC,MAAM,EAAwC,CAC5C,KAAM,cACN,IAAI,EAAY,CAQd,OAPI,EAAW,SAAW,EACjB,CACL,MAAO,EAAW,GAAG,MACrB,IAAK,EAAW,GAAG,IACnB,WAAY,EACb,CAEI,MAEV,CAKK,EAA4C,CAChD,KAAM,mBACN,IAAI,EAAY,EAAS,EAAe,CACtC,GAAI,CAAC,EAAc,mBAAoB,OAAO,KAE9C,IAAK,IAAM,KAAQ,EAAsB,CACvC,IAAM,EAAQ,EAAU,EAAM,EAAY,EAAQ,CAClD,GAAI,EACF,MAAO,CACL,MAAO,EAAM,MACb,IAAK,EAAM,IACX,WAAY,IACb,CAGL,OAAO,MAEV,CAKD,SAAS,EACP,EACA,EACA,EACqB,CAErB,IAAM,EAAqB,EAAW,KACnC,GAAM,EAAE,MAAM,aAAa,GAAK,EAAK,KAAK,aAAa,EAAI,EAAE,MAAQ,EAAK,OAC5E,CACK,EAAe,EAAW,KAC7B,GAAM,EAAE,MAAM,aAAa,GAAK,EAAK,KAAK,aAAa,EAAI,EAAE,MAAQ,EAAK,KAC5E,CAED,GAAI,CAAC,GAAsB,CAAC,EAC1B,OAAO,KAIT,GAAI,EAAK,UAAY,cAAe,CAElC,IAAM,EAAO,EAAQ,SACrB,GAAI,GAAQ,kBAAkB,KAAK,EAAK,CACtC,OAAO,UAEA,EAAK,UAAY,cAAe,CAGzC,IAAM,EAAO,EAAQ,UAAU,aAAa,CAC5C,GAAI,GAAQ,CAAC,CAAC,QAAS,QAAS,KAAM,KAAM,MAAO,OAAQ,MAAO,MAAM,CAAC,SAAS,EAAK,CACrF,OAAO,UAEA,EAAK,UAAY,gBAAiB,CAE3C,IAAM,EAAO,EAAQ,UAAU,aAAa,CAE5C,GAAI,GADa,CAAC,KAAM,KAAM,OAAQ,MAAO,MAAO,MAAO,MAAO,OAAQ,MAAO,MAAM,CAClE,SAAS,EAAK,CACjC,OAAO,EAIX,OAAO,KAiIT,MAAM,EAAgC,CACpC,EACA,EA3H6C,CAC7C,KAAM,gBACN,IAAI,EAAY,EAAS,EAAe,CACtC,GAAI,CAAC,EAAc,gBAAiB,OAAO,KAG3C,IAAM,EAAwC,EAAW,IAAK,IAAO,CACnE,GAAG,EACH,MAAO,IAAA,GACR,EAAE,CAGG,EAAc,EAAQ,UAAU,EAAQ,OAC9C,GAAI,EAAa,CACf,IAAM,EAAkB,EAAc,SAAS,EAAY,CACvD,IAEF,EAAoB,OAAS,EAC7B,EAAoB,KAAK,GAAG,EAAgB,EAKhD,IAAM,EAAS,EACb,EACA,EAAQ,SACR,EAAQ,eAAiB,EAAE,CAC3B,EAAc,WACf,CAUD,OARI,EACK,CACL,MAAO,EAAO,MACd,IAAK,EAAO,IACZ,WAAY,EAAO,WACpB,CAGI,MAEV,CAKwC,CACvC,KAAM,eACN,IAAI,EAAY,EAAS,EAAe,CAEtC,GADI,CAAC,EAAc,SACf,EAAW,SAAW,EAAG,OAAO,KAEpC,IAAM,EAAuD,EAAE,CAE/D,IAAK,IAAM,KAAa,EAAY,CAClC,IAAI,EAAQ,EAGZ,GAAI,EAAQ,SAAU,CACpB,IAAM,EAAa,EAAQ,YAAc,EAAc,WAAW,UAAU,EAAQ,SAAS,CAC7F,IAAK,IAAM,KAAa,EAAY,CAClC,IAAM,EAAO,EAAc,QAAQ,KAAK,EAAW,EAAU,MAAM,CAC/D,EAAO,IACT,GAAS,KAAK,IAAI,EAAO,EAAE,CAAG,EAAc,aAMlD,GAAI,EAAQ,SAAU,CACpB,IAAM,EAAa,EAAQ,YAAc,EAAc,WAAW,UAAU,EAAQ,SAAS,CAC7F,IAAK,IAAM,KAAa,EAAY,CAClC,IAAM,EAAO,EAAc,QAAQ,KAAK,EAAU,MAAO,EAAU,CAC/D,EAAO,IACT,GAAS,KAAK,IAAI,EAAO,EAAE,CAAG,EAAc,cAKlD,EAAO,KAAK,CAAE,YAAW,QAAO,CAAC,CAOnC,GAHA,EAAO,MAAM,EAAG,IAAM,EAAE,MAAQ,EAAE,MAAM,CAGpC,EAAO,OAAS,GAAK,EAAO,GAAG,MAAQ,EAAG,CAC5C,IAAM,EAAW,EAAO,GAAG,MACrB,EAAa,EAAO,QAAQ,EAAK,IAAM,EAAM,KAAK,IAAI,EAAE,MAAM,CAAE,EAAE,CAClE,EAAa,EAAa,EAAI,KAAK,IAAI,EAAS,CAAG,EAAa,GAEtE,MAAO,CACL,MAAO,EAAO,GAAG,UAAU,MAC3B,IAAK,EAAO,GAAG,UAAU,IACzB,aACD,CAGH,OAAO,MAEV,CAK0C,CACzC,KAAM,WACN,IAAI,EAAY,CAQd,OAPI,EAAW,OAAS,EACf,CACL,MAAO,EAAW,GAAG,MACrB,IAAK,EAAW,GAAG,IACnB,WAAY,EAAI,EAAW,OAC5B,CAEI,MAEV,CAWA,CAKD,IAAa,EAAb,KAA2B,CACzB,WACA,QACA,WACA,YACA,mBACA,gBACA,WAEA,YACE,EACA,EAAiC,KACjC,EAAgC,EAAE,CAClC,CACA,KAAK,WAAa,EAClB,KAAK,QAAU,EACf,KAAK,WAAa,EAAQ,YAAc,EACxC,KAAK,YAAc,EAAQ,aAAe,EAC1C,KAAK,mBAAqB,EAAQ,oBAAsB,GACxD,KAAK,gBAAkB,EAAQ,iBAAmB,GAClD,KAAK,WAAa,KAAK,WAAW,mBAAqB,IAAI,IAAQ,KAGrE,SAAiB,EAA4C,CAC3D,GAAI,CAAC,KAAK,WAAW,oBAAsB,CAAC,KAAK,WAAY,OAC7D,IAAM,EAAM,EAAK,aAAa,CACxB,EAAS,KAAK,WAAW,IAAI,EAAI,CACvC,GAAI,EAAQ,OAAO,EACnB,IAAM,EAAQ,KAAK,WAAW,mBAAmB,EAAK,CAEtD,OADA,KAAK,WAAW,IAAI,EAAK,EAAM,CACxB,EAUT,aACE,EACA,EACA,EACA,EAAkC,EAAE,CAChB,CAEpB,IAAI,EACJ,AAKE,EALE,KAAK,WAAW,iBACE,KAAK,WAAW,iBAAiB,EAAK,CAG3C,KAAK,WAAW,UAAU,EAAK,CACnB,IAAK,IAAO,CAAE,MAAO,EAAG,IAAK,KAAmB,EAAE,CAG/E,IAAM,EAAa,EAAkB,IAAK,GAAM,EAAE,MAAM,CAClD,EAAQ,EAGV,EACA,IACF,EAAgB,KAAK,SAAS,EAAS,EAIzC,IAAM,EAAiC,CACrC,WACA,WACA,WAAY,EAAK,WACjB,WAAY,EAAK,WACjB,gBACA,UAAW,CAAC,EAAK,CACjB,MAAO,EACR,CAGD,IAAK,IAAM,KAAS,EAAQ,CAC1B,IAAM,EAAS,EAAM,IAAI,EAAmB,EAAS,KAAK,CAC1D,GAAI,EACF,MAAO,CACL,QACA,MAAO,EAAO,MACd,IAAK,EAAO,IACZ,aACA,oBACA,UAAW,EAAW,OAAS,EAC/B,WAAY,EAAO,WACnB,WAAY,EAAM,KACnB,CAKL,MAAO,CACL,QACA,MAAO,EAAK,aAAa,CACzB,aACA,oBACA,UAAW,GACX,WAAY,EACZ,WAAY,OACb,CASH,gBAAgB,EAAwC,CACtD,IAAM,EAAgC,EAAE,CAExC,IAAK,IAAI,EAAI,EAAG,EAAI,EAAO,OAAQ,IAAK,CACtC,IAAM,EAAO,EAAO,GACd,EAAW,EAAI,EAAI,EAAO,EAAI,GAAK,KACnC,EAAW,EAAI,EAAO,OAAS,EAAI,EAAO,EAAI,GAAK,KAEzD,EAAQ,KAAK,KAAK,aAAa,EAAM,EAAU,EAAS,CAAC,CAG3D,OAAO,EAST,cAAc,EAA+B,CAC3C,IAAM,EAAS,IAAI,IACb,EAAgB,KAAK,gBAAgB,EAAO,CAElD,IAAK,IAAM,KAAU,EACnB,EAAO,IAAI,EAAO,MAAM,CAG1B,OAAO,IAOX,SAAgB,EACd,EACA,EACA,EACA,EAGI,EAAE,CACO,CACb,GAAM,CAAE,WAAU,mBAAoB,EAGhC,EAAS,EACX,EAAS,EAAK,CACd,EACG,MAAM,MAAM,CACZ,OAAQ,GAAM,EAAE,OAAS,EAAE,CAC3B,IAAK,GAAM,EAAE,QAAQ,oCAAqC,GAAG,CAAC,CAC9D,OAAQ,GAAM,EAAE,OAAS,EAAE,CAI5B,EADgB,IAAI,EAAc,EAAY,EAAQ,CAC/B,cAAc,EAAO,CAGlD,GAAI,MACG,IAAM,KAAS,EACd,EAAa,IAAI,EAAM,EACzB,EAAO,OAAO,EAAM,CAK1B,OAAO,ECrdT,MAAa,EAA8C,CACzD,GAAI,OACJ,GAAI,OACJ,GAAI,YACJ,GAAI,SACJ,GAAI,cACJ,GAAI,UACJ,GAAI,cACJ,GAAI,UACJ,GAAI,UACJ,GAAI,eACL,CAKY,EAAiD,CAC5D,GAAI,UACJ,GAAI,UACJ,GAAI,cACJ,GAAI,YACJ,GAAI,aACJ,GAAI,UACJ,GAAI,aACJ,GAAI,UACJ,GAAI,UACJ,GAAI,YACL,CAoBY,EAA8C,CACzD,GAAI,aACJ,GAAI,aACJ,IAAK,SACL,GAAI,WACL,CAKY,EAAkD,CAC7D,GAAI,YACJ,IAAK,WACL,GAAI,SACL,CAKY,EAAkD,CAC7D,GAAI,WACJ,GAAI,SACL,CCtFD,IAAa,EAAb,MAAa,CAAY,CACvB,KACA,SACA,UAEA,YAAoB,EAAkB,EAAkB,EAAmB,CACzE,KAAK,KAAO,EACZ,KAAK,SAAW,EAChB,KAAK,UAAY,EAGnB,OAAO,WAAW,EAAkB,EAA8B,EAAE,CAAe,CACjF,IAAM,EAAI,KAAK,IAAI,EAAO,OAAQ,EAAE,CAC9B,EAAI,EAAQ,mBAAqB,IAEjC,EAAI,KAAK,IAAI,EAAG,KAAK,KAAM,CAAC,EAAI,KAAK,IAAI,EAAE,EAAK,KAAK,IAAM,KAAK,KAAK,CAAC,CACtE,EAAI,KAAK,IAAI,EAAG,KAAK,MAAO,EAAI,EAAK,KAAK,IAAI,CAAC,CAC/C,EAAY,EAAQ,iBACtB,KAAK,IAAI,EAAG,EAAQ,iBAAiB,CACrC,EAEE,EAAQ,KAAK,KAAK,EAAI,EAAE,CAExB,EAAS,IAAI,EADN,IAAI,WAAW,EAAM,CACG,EAAG,EAAU,CAElD,IAAK,IAAM,KAAS,EAClB,EAAO,IAAI,EAAM,CAGnB,OAAO,EAGT,IAAI,EAAqB,CACvB,GAAM,CAAC,EAAI,GAAM,KAAK,OAAO,EAAM,CACnC,IAAK,IAAI,EAAI,EAAG,EAAI,KAAK,UAAW,IAAK,CACvC,IAAM,GAAY,EAAK,EAAI,GAAM,KAAK,SACtC,KAAK,OAAO,EAAS,EAIzB,IAAI,EAAwB,CAC1B,GAAM,CAAC,EAAI,GAAM,KAAK,OAAO,EAAM,CACnC,IAAK,IAAI,EAAI,EAAG,EAAI,KAAK,UAAW,IAAK,CACvC,IAAM,GAAY,EAAK,EAAI,GAAM,KAAK,SACtC,GAAI,CAAC,KAAK,OAAO,EAAS,CAAE,MAAO,GAErC,MAAO,GAGT,OAAe,EAAqB,CAClC,IAAM,EAAY,IAAU,EACtB,EAAM,EAAQ,EACpB,KAAK,KAAK,IAAc,GAAK,EAG/B,OAAe,EAAwB,CACrC,IAAM,EAAY,IAAU,EACtB,EAAM,EAAQ,EACpB,OAAQ,KAAK,KAAK,GAAc,GAAK,IAAU,EAGjD,OAAe,EAAiC,CAC9C,IAAM,EAAM,EAAM,aAAa,CAC3B,EAAQ,WACR,EAAQ,WAEZ,IAAK,IAAI,EAAI,EAAG,EAAI,EAAI,OAAQ,IAAK,CACnC,IAAM,EAAO,EAAI,WAAW,EAAE,CAC9B,GAAS,EACT,EAAQ,KAAK,KAAK,EAAO,SAAS,GAAK,EAEvC,GAAS,EACT,EAAQ,KAAK,KAAK,EAAO,WAAW,GAAK,EAO3C,MAJA,IAAS,IAAU,GACnB,EAAQ,KAAK,KAAK,EAAO,WAAW,GAAK,EACzC,GAAS,IAAU,GAEZ,CAAC,IAAU,EAAG,IAAU,GAAK,UAAW,GCnEnD,MAAa,EAAmB,IAAI,IAAI,sxBAkFvC,CAAC,CA2CI,EAAwB,IAAI,IAAI,+NAmCrC,CAAC,CAMI,EAAoB,IAAI,IAAI,CAChC,OACA,OACA,OACA,OACA,OACA,QACA,QACA,OACA,QACA,QACA,OACA,MACA,OACA,OACA,OACA,QACA,QACA,MACD,CAAC,CASI,EAAmB,CAAC,IAAK,IAAK,IAAI,CAExC,IAAa,EAAb,KAA8B,CAC5B,WACA,cACA,kBACA,YACA,KAEA,YACE,EACA,EACA,EAAmC,EAAE,CACrC,CACA,KAAK,WAAa,EAClB,KAAK,YAAc,EACnB,KAAK,cAAgB,EAAQ,eAAiB,EAC9C,KAAK,kBAAoB,EAAQ,mBAAqB,GACtD,KAAK,KAAO,EAAQ,MAAQ,WAM9B,QAAgB,EAAc,EAAiC,CAC7D,MAAO,CACL,OACA,MAAO,EACP,WAAY,EACZ,WAAY,EACZ,WAAY,GACb,CAWH,MAAM,EAA6B,CACjC,IAAM,EAAa,EAAK,aAAa,CAG/B,EAAe,KAAK,WAAW,UAAU,EAAK,CAC9C,EAAe,EAAa,IAAI,aAAa,CAMnD,GALI,GAAgB,EAAiB,IAAI,EAAa,EAKlD,EAAiB,IAAI,EAAW,CAClC,OAAO,KAAK,QAAQ,EAAM,EAAa,CAKzC,IAAM,EACJ,EAAa,OAAS,GAAK,EAAa,GAAG,aAAa,GAAK,EACzD,EAAgB,EAAa,SAAW,EAG9C,GAAI,KAAK,OAAS,eAIhB,OAHI,EAAK,SAAS,IAAI,CACb,KAAK,cAAc,EAAM,EAAa,CAExC,KAAK,QAAQ,EAAM,EAAa,CAYzC,GARI,KAAK,OAAS,YAAc,GAAe,GAEzC,EAAW,OAAS,IAMtB,EAAW,OAAS,KAAK,cAAgB,EAC3C,OAAO,KAAK,QAAQ,EAAM,EAAa,CAIzC,IAAM,EAIA,EAAE,CAER,IACE,IAAI,EAAI,KAAK,cACb,GAAK,EAAW,OAAS,KAAK,cAC9B,IACA,CACA,IAAM,EAAW,EAAW,MAAM,EAAG,EAAE,CACjC,EAAY,EAAW,MAAM,EAAE,CAG/B,EAAe,KAAK,SAAS,EAAU,EAAU,CAMvD,GALI,GACF,EAAW,KAAK,EAAa,CAI3B,KAAK,uBACF,IAAM,KAAU,EAEnB,GAAI,EAAS,SAAS,EAAO,EAAI,EAAS,OAAS,KAAK,cAAe,CACrE,IAAM,EAAc,EAAS,MAAM,EAAG,GAAG,CACnC,EAAS,KAAK,SAAS,EAAa,EAAU,CAChD,GAEF,EAAW,KAAK,CAAE,GAAG,EAAQ,MAAO,EAAO,MAAQ,IAAM,CAAC,GAOpE,GAAI,EAAW,SAAW,EACxB,OAAO,KAAK,QAAQ,EAAM,EAAa,CAIzC,EAAW,MAAM,EAAG,IAAM,EAAE,MAAQ,EAAE,MAAM,CAC5C,IAAM,EAAO,EAAW,GAGxB,GAAI,KAAK,OAAS,YAAc,GAAe,EAAK,MAAQ,GAC1D,OAAO,KAAK,QAAQ,EAAM,EAAa,CAIzC,IAAM,EAAQ,CAAC,GAAG,IAAI,IAAI,CAAC,GAAG,EAAK,UAAW,GAAG,EAAK,WAAW,CAAC,CAAC,CAInE,MAAO,CACL,OACA,QACA,WALiB,CAAC,GAAG,IAAI,IAAI,CAAC,GAAG,EAAO,EAAW,CAAC,CAAC,CAMrD,WAAY,KAAK,IAAI,EAAK,MAAO,EAAE,CACnC,WAAY,GACb,CAMH,cAAsB,EAAc,EAAuC,CACzE,IAAM,EAAQ,EAAK,MAAM,IAAI,CAAC,OAAQ,GAAM,EAAE,OAAS,EAAE,CACzD,GAAI,EAAM,OAAS,EACjB,OAAO,KAAK,QAAQ,EAAM,EAAa,CAGzC,IAAM,EAAqB,EAAE,CAC7B,IAAK,IAAM,KAAQ,EAAO,CACxB,IAAM,EAAS,KAAK,WAAW,UAAU,EAAK,CAC9C,EAAS,KAAK,GAAG,EAAO,CAG1B,IAAM,EAAc,CAAC,GAAG,IAAI,IAAI,EAAS,CAAC,CAG1C,MAAO,CACL,OACA,MAAO,EACP,WALiB,CAAC,GAAG,IAAI,IAAI,CAAC,GAAG,EAAa,EAAK,aAAa,CAAC,CAAC,CAAC,CAMnE,WAAY,GACZ,WAAY,GACb,CAGH,SACE,EACA,EACqE,CAErE,IAAM,EAAa,KAAK,WAAW,UAAU,EAAS,CAChD,EAAc,KAAK,WAAW,UAAU,EAAU,CAGlD,EAAY,CAAC,GAAG,IAAI,IAAI,EAAW,OAAQ,GAAM,KAAK,YAAY,IAAI,EAAE,CAAC,CAAC,CAAC,CAC3E,EAAa,CAAC,GAAG,IAAI,IAAI,EAAY,OAAQ,GAAM,KAAK,YAAY,IAAI,EAAE,CAAC,CAAC,CAAC,CAEnF,GAAI,EAAU,SAAW,GAAK,EAAW,SAAW,EAClD,OAAO,KAIT,IAAI,EAAQ,EAIN,EACJ,EAAI,KAAK,IAAI,EAAS,OAAS,EAAU,OAAO,EAAI,EAAS,OAAS,EAAU,QAClF,GAAS,EAAgB,GAIzB,IAAM,GAAa,EAAS,OAAS,EAAU,QAAU,EACnD,EAAc,KAAK,IAAI,EAAY,EAAG,EAAE,CAC9C,GAAS,EAAc,GAIC,EAAW,KAAM,GAAU,EAAsB,IAAI,EAAM,CAAC,GAElF,GAAS,IAKX,IAAM,EAAe,EAAU,KAAM,GAAU,EAAkB,IAAI,EAAM,CAAC,CACtE,EAAgB,EAAW,KAAM,GAAU,EAAkB,IAAI,EAAM,CAAC,CAgB9E,OAfI,GAAgB,EAElB,GAAS,GACA,CAAC,GAAgB,CAAC,IAE3B,GAAS,KAKP,EAAS,OAAS,GAAK,EAAU,OAAS,KAC5C,GAAS,KAIJ,CACL,UAAW,EACX,WAAY,EACZ,MAAO,KAAK,IAAI,EAAG,EAAM,CAC1B,CAOH,aAAa,EAAwB,CAEnC,OADc,KAAK,MAAM,EAAK,CACjB,aAQjB,SAAgB,EAAoB,EAA+B,CACjE,OAAO,IAAI,IAAI,EAAO,IAAK,GAAM,EAAE,aAAa,CAAC,CAAC,CAapD,SAAgB,EACd,EACA,EAAmC,EAAE,CACnB,CAClB,IAAM,EAAa,EAAO,IAAK,GAAM,EAAE,aAAa,CAAC,CACrD,OAAO,EAAY,WAAW,EAAY,EAAQ,CC7cpD,MAAa,EAA4C,IAAI,IAAI,CAE/D,CAAC,YAAa,CAAE,MAAO,WAAY,WAAY,GAAM,IAAK,KAAM,CAAC,CACjE,CAAC,kBAAmB,CAAE,MAAO,gBAAiB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC5E,CAAC,SAAU,CAAE,MAAO,SAAU,WAAY,GAAM,IAAK,KAAM,CAAC,CAC5D,CAAC,iBAAkB,CAAE,MAAO,iBAAkB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC5E,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CAClE,CAAC,eAAgB,CAAE,MAAO,cAAe,WAAY,GAAM,IAAK,KAAM,CAAC,CACvE,CAAC,YAAa,CAAE,MAAO,cAAe,WAAY,GAAM,IAAK,KAAM,CAAC,CACpE,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CAClE,CAAC,cAAe,CAAE,MAAO,eAAgB,WAAY,GAAM,IAAK,KAAM,CAAC,CACvE,CAAC,cAAe,CAAE,MAAO,gBAAiB,WAAY,GAAM,IAAK,KAAM,CAAC,CACxE,CAAC,eAAgB,CAAE,MAAO,cAAe,WAAY,GAAM,IAAK,KAAM,CAAC,CACvE,CAAC,WAAY,CAAE,MAAO,UAAW,WAAY,GAAM,IAAK,KAAM,CAAC,CAC/D,CAAC,UAAW,CAAE,MAAO,UAAW,WAAY,GAAM,IAAK,KAAM,CAAC,CAC9D,CAAC,UAAW,CAAE,MAAO,SAAU,WAAY,GAAM,IAAK,KAAM,CAAC,CAC7D,CAAC,gBAAiB,CAAE,MAAO,iBAAkB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC3E,CAAC,mBAAoB,CAAE,MAAO,mBAAoB,WAAY,GAAM,IAAK,KAAM,CAAC,CAChF,CAAC,gBAAiB,CAAE,MAAO,iBAAkB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC3E,CAAC,kBAAmB,CAAE,MAAO,kBAAmB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC9E,CAAC,cAAe,CAAE,MAAO,eAAgB,WAAY,GAAM,IAAK,KAAM,CAAC,CACvE,CAAC,kBAAmB,CAAE,MAAO,iBAAkB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC7E,CAAC,kBAAmB,CAAE,MAAO,gBAAiB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC5E,CAAC,YAAa,CAAE,MAAO,aAAc,WAAY,GAAM,IAAK,KAAM,CAAC,CACnE,CAAC,gBAAiB,CAAE,MAAO,kBAAmB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC5E,CAAC,kBAAmB,CAAE,MAAO,mBAAoB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC/E,CAAC,aAAc,CAAE,MAAO,aAAc,WAAY,GAAM,IAAK,KAAM,CAAC,CACpE,CAAC,aAAc,CAAE,MAAO,aAAc,WAAY,GAAM,IAAK,KAAM,CAAC,CACpE,CAAC,aAAc,CAAE,MAAO,aAAc,WAAY,GAAM,IAAK,KAAM,CAAC,CACpE,CAAC,cAAe,CAAE,MAAO,aAAc,WAAY,GAAM,IAAK,KAAM,CAAC,CACrE,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CAClE,CAAC,eAAgB,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CACrE,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CAClE,CAAC,UAAW,CAAE,MAAO,UAAW,WAAY,GAAM,IAAK,KAAM,CAAC,CAC9D,CAAC,QAAS,CAAE,MAAO,QAAS,WAAY,GAAM,IAAK,KAAM,CAAC,CAC1D,CAAC,SAAU,CAAE,MAAO,SAAU,WAAY,GAAM,IAAK,KAAM,CAAC,CAC5D,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CAClE,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CAClE,CAAC,iBAAkB,CAAE,MAAO,kBAAmB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC7E,CAAC,iBAAkB,CAAE,MAAO,mBAAoB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC9E,CAAC,eAAgB,CAAE,MAAO,cAAe,WAAY,GAAM,IAAK,KAAM,CAAC,CACvE,CAAC,iBAAkB,CAAE,MAAO,eAAgB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC1E,CAAC,mBAAoB,CAAE,MAAO,mBAAoB,WAAY,GAAM,IAAK,KAAM,CAAC,CAChF,CAAC,gBAAiB,CAAE,MAAO,kBAAmB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC5E,CAAC,gBAAiB,CAAE,MAAO,eAAgB,WAAY,GAAM,IAAK,KAAM,CAAC,CACzE,CAAC,aAAc,CAAE,MAAO,cAAe,WAAY,GAAM,IAAK,KAAM,CAAC,CACrE,CAAC,UAAW,CAAE,MAAO,UAAW,WAAY,GAAM,IAAK,KAAM,CAAC,CAC9D,CAAC,aAAc,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CACnE,CAAC,aAAc,CAAE,MAAO,aAAc,WAAY,GAAM,IAAK,KAAM,CAAC,CACpE,CAAC,WAAY,CAAE,MAAO,WAAY,WAAY,GAAM,IAAK,KAAM,CAAC,CAChE,CAAC,iBAAkB,CAAE,MAAO,iBAAkB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC5E,CAAC,OAAQ,CAAE,MAAO,OAAQ,WAAY,GAAM,IAAK,KAAM,CAAC,CACxD,CAAC,UAAW,CAAE,MAAO,UAAW,WAAY,GAAM,IAAK,KAAM,CAAC,CAC9D,CAAC,cAAe,CAAE,MAAO,eAAgB,WAAY,GAAM,IAAK,KAAM,CAAC,CACvE,CAAC,gBAAiB,CAAE,MAAO,eAAgB,WAAY,GAAM,IAAK,KAAM,CAAC,CACzE,CAAC,kBAAmB,CAAE,MAAO,eAAgB,WAAY,GAAM,IAAK,KAAM,CAAC,CAC3E,CAAC,cAAe,CAAE,MAAO,cAAe,WAAY,GAAM,IAAK,KAAM,CAAC,CACtE,CAAC,kBAAmB,CAAE,MAAO,oBAAqB,WAAY,GAAM,IAAK,KAAM,CAAC,CAChF,CAAC,aAAc,CAAE,MAAO,YAAa,WAAY,GAAM,IAAK,KAAM,CAAC,CACnE,CAAC,YAAa,CAAE,MAAO,WAAY,WAAY,GAAM,IAAK,KAAM,CAAC,CACjE,CAAC,cAAe,CAAE,MAAO,cAAe,WAAY,GAAM,IAAK,KAAM,CAAC,CAGtE,CAAC,YAAa,CAAE,MAAO,UAAW,WAAY,GAAM,IAAK,KAAM,CAAC,CAChE,CAAC,eAAgB,CAAE,MAAO,cAAe,WAAY,GAAM,IAAK,KAAM,CAAC,CACvE,CAAC,YAAa,CAAE,MAAO,SAAU,WAAY,GAAM,IAAK,KAAM,CAAC,CAC/D,CAAC,aAAc,CAAE,MAAO,YAAa,WAAY,GAAO,IAAK,KAAM,CAAC,CACpE,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAO,IAAK,KAAM,CAAC,CACnE,CAAC,gBAAiB,CAAE,MAAO,iBAAkB,WAAY,GAAO,IAAK,KAAM,CAAC,CAC5E,CAAC,UAAW,CAAE,MAAO,UAAW,WAAY,GAAO,IAAK,KAAM,CAAC,CAC/D,CAAC,WAAY,CAAE,MAAO,YAAa,WAAY,GAAO,IAAK,KAAM,CAAC,CAClE,CAAC,WAAY,CAAE,MAAO,YAAa,WAAY,GAAO,IAAK,KAAM,CAAC,CAGlE,CAAC,aAAc,CAAE,MAAO,aAAc,WAAY,GAAO,IAAK,KAAM,CAAC,CACrE,CAAC,kBAAmB,CAAE,MAAO,iBAAkB,WAAY,GAAO,IAAK,KAAM,CAAC,CAC9E,CAAC,UAAW,CAAE,MAAO,SAAU,WAAY,GAAO,IAAK,KAAM,CAAC,CAC9D,CAAC,SAAU,CAAE,MAAO,WAAY,WAAY,GAAO,IAAK,KAAM,CAAC,CAC/D,CAAC,cAAe,CAAE,MAAO,cAAe,WAAY,GAAO,IAAK,KAAM,CAAC,CACvE,CAAC,eAAgB,CAAE,MAAO,eAAgB,WAAY,GAAO,IAAK,KAAM,CAAC,CACzE,CAAC,eAAgB,CAAE,MAAO,eAAgB,WAAY,GAAO,IAAK,KAAM,CAAC,CACzE,CAAC,eAAgB,CAAE,MAAO,eAAgB,WAAY,GAAO,IAAK,KAAM,CAAC,CACzE,CAAC,cAAe,CAAE,MAAO,cAAe,WAAY,GAAO,IAAK,KAAM,CAAC,CACvE,CAAC,cAAe,CAAE,MAAO,cAAe,WAAY,GAAO,IAAK,KAAM,CAAC,CACvE,CAAC,eAAgB,CAAE,MAAO,eAAgB,WAAY,GAAO,IAAK,KAAM,CAAC,CACzE,CAAC,aAAc,CAAE,MAAO,aAAc,WAAY,GAAO,IAAK,KAAM,CAAC,CACrE,CAAC,eAAgB,CAAE,MAAO,eAAgB,WAAY,GAAO,IAAK,KAAM,CAAC,CACzE,CAAC,aAAc,CAAE,MAAO,aAAc,WAAY,GAAO,IAAK,KAAM,CAAC,CACrE,CAAC,eAAgB,CAAE,MAAO,eAAgB,WAAY,GAAO,IAAK,KAAM,CAAC,CACzE,CAAC,WAAY,CAAE,MAAO,WAAY,WAAY,GAAO,IAAK,KAAM,CAAC,CACjE,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAO,IAAK,KAAM,CAAC,CACnE,CAAC,YAAa,CAAE,MAAO,YAAa,WAAY,GAAO,IAAK,KAAM,CAAC,CACnE,CAAC,SAAU,CAAE,MAAO,SAAU,WAAY,GAAO,IAAK,KAAM,CAAC,CAC7D,CAAC,UAAW,CAAE,MAAO,UAAW,WAAY,GAAO,IAAK,KAAM,CAAC,CAG/D,CAAC,SAAU,CAAE,MAAO,SAAU,WAAY,GAAM,IAAK,KAAM,CAAC,CAG5D,CAAC,sBAAuB,CAAE,MAAO,sBAAuB,WAAY,GAAO,IAAK,SAAU,CAAC,CAC3F,CAAC,kBAAmB,CAAE,MAAO,kBAAmB,WAAY,GAAO,IAAK,SAAU,CAAC,CACnF,CAAC,OAAQ,CAAE,MAAO,OAAQ,WAAY,GAAO,IAAK,SAAU,CAAC,CAC7D,CAAC,OAAQ,CAAE,MAAO,OAAQ,WAAY,GAAO,IAAK,SAAU,CAAC,CAC9D,CAAC,CAMF,SAAgB,EACd,EACA,EACoD,CAEpD,IAAK,IAAI,EAAM,KAAK,IAAI,EAAG,EAAM,OAAS,EAAW,CAAE,GAAO,EAAG,IAAO,CAEtE,IAAM,EADc,EAAM,MAAM,EAAY,EAAa,EAAI,CAC/B,KAAK,IAAI,CAAC,aAAa,CAC/C,EAAS,EAAe,IAAI,EAAU,CAC5C,GAAI,EACF,MAAO,CAAE,SAAQ,UAAW,EAAK,CAGrC,OAAO,KAMT,SAAgB,EAAc,EAAuB,CACnD,OAAO,EAAe,IAAI,EAAK,aAAa,CAAC,CAM/C,SAAgB,EAAc,EAAwC,CACpE,OAAO,EAAe,IAAI,EAAK,aAAa,CAAC,CClJ/C,MAAM,EAAqB,IAAI,IAAI,CAAC,OAAO,CAAC,CAKtC,EAAe,IAAI,IAAI,CAAC,SAAU,UAAW,SAAS,CAAC,CAKvD,EAAa,IAAI,IAAI,CACzB,cACA,UACA,QACA,UACA,UACD,CAAC,CAEI,EAAmB,CACvB,UACA,OACA,OACA,KACA,KACA,KACA,KACA,IACA,IACA,KACA,MACA,KACD,CAuED,SAAgB,EACd,EACA,EACA,EAA0B,EAAE,CACV,CAClB,GAAM,CACJ,UACA,mBACA,iBAAiB,GACjB,qBAAqB,IACnB,EAGE,EAAS,EAAS,EAAK,CAGvB,EAA4B,EAAE,CAC9B,EAAgD,EAAE,CAClD,EAAa,IAAI,IACjB,EACJ,qBAAsB,EACjB,EAA6C,mBAAqB,EACnE,GAEA,GAAkB,EAAa,IACnC,EAAO,SAAW,GAAK,EAAO,KAAO,EAAI,aAAa,CAElD,EAAqB,GAAiC,CAC1D,IAAI,EAAU,EACV,EAAmC,KAEvC,IAAK,IAAI,EAAU,EAAG,EAAU,EAAmB,IAAW,CAC5D,IAAM,EAAQ,EAAQ,aAAa,CACnC,EAAoB,KAEpB,IAAK,IAAM,KAAU,EAAkB,CACrC,GAAI,CAAC,EAAM,SAAS,EAAO,CAAE,SAE7B,IAAM,EAAO,EAAQ,MAAM,EAAG,EAAQ,OAAS,EAAO,OAAO,CAC7D,GAAI,EAAK,OAAS,EAAqB,SAEvC,IAAM,EAAa,EAAW,UAAU,EAAK,CAC7C,GAAI,CAAC,EAAe,EAAM,EAAW,CACnC,OAAO,EAGT,AACE,IAAoB,EAIxB,GAAI,CAAC,GAAqB,EAAkB,OAAS,EACnD,MAGF,EAAU,EAGZ,OAAO,MAGH,EAAa,GAA0B,CAC3C,IAAM,EAAM,EAAI,aAAa,CACvB,EAAS,EAAW,IAAI,EAAI,CAClC,GAAI,EAAQ,OAAO,EACnB,IAAM,EAAS,EAAW,UAAU,EAAI,CACxC,GACE,GACA,EAAe,EAAK,EAAO,EAC3B,EAAI,QAAU,EACd,CACA,IAAM,EAAiB,EAAkB,EAAI,CAC7C,GAAI,EAEF,OADA,EAAW,IAAI,EAAK,EAAe,CAC5B,EAIX,OADA,EAAW,IAAI,EAAK,EAAO,CACpB,GAGT,IAAK,IAAI,EAAI,EAAG,EAAI,EAAO,OAAQ,IAAK,CACtC,IAAM,EAAQ,EAAO,GAGjB,MAAW,IAAI,EAAM,KAAK,CAK9B,IAAI,EAAa,IAAI,EAAM,KAAK,CAAE,CAChC,EAAQ,KAAK,CACX,SAAU,EAAM,MAAQ,GACxB,KAAM,EAAM,KACZ,OAAQ,EAAE,CACV,SAAU,GACX,CAAC,CACF,SAIF,GAAI,EAAM,OAAS,UAAY,EAAM,OAAS,UAAW,CACnD,GACF,EAAQ,KAAK,CACX,SAAU,EAAM,MAAQ,GACxB,KAAM,EAAM,KACZ,OAAQ,EAAE,CACV,SAAU,GACX,CAAC,CAEJ,SAIF,GAAI,EAAmB,IAAI,EAAM,KAAK,CAAE,CACtC,IAAM,EAAY,EAAM,MAAQ,GAC1B,EAAS,EAAU,EAAU,CAE7B,EAA4B,CAChC,SAAU,EACV,KAAM,EAAM,KACZ,SACA,SAAU,GACX,CAKK,EAAgB,EAAO,SAAW,GAAK,EAAO,KAAO,EAAU,aAAa,CAClF,GAAI,IAAqB,GAAsB,GAAgB,CAC7D,IAAM,EAAQ,EAAiB,MAAM,EAAU,CAC/C,GAAI,EAAM,WAAY,CACpB,EAAU,cAAgB,EAE1B,IAAM,EAAa,EAAM,MAAM,QAAS,GAAM,EAAU,EAAE,CAAC,CAC3D,EAAU,eAAiB,EAC3B,EAAU,OAAS,CAAC,GAAG,IAAI,IAAI,CAAC,GAAG,EAAQ,GAAG,EAAW,CAAC,CAAC,EAI/D,EAAQ,KAAK,EAAU,CACvB,EAAW,KAAK,CAAE,MAAO,EAAQ,OAAS,EAAG,QAAO,CAAC,CACrD,SAIF,EAAQ,KAAK,CACX,SAAU,EAAM,MAAQ,GACxB,KAAM,EAAM,KACZ,OAAQ,EAAE,CACV,SAAU,GACX,CAAC,EAIJ,GAAI,GAAW,EAAW,OAAS,EAAG,CACpC,IAAM,EAAgB,IAAI,EAAc,EAAY,EAAQ,CAE5D,IAAK,IAAI,EAAI,EAAG,EAAI,EAAW,OAAQ,IAAK,CAC1C,GAAM,CAAE,QAAO,SAAU,EAAW,GAC9B,EAAY,EAAI,EAAI,EAAW,EAAI,GAAG,MAAQ,KAC9C,EAAY,EAAI,EAAW,OAAS,EAAI,EAAW,EAAI,GAAG,MAAQ,KAElE,EAAS,EAAc,aAC3B,EAAM,MAAQ,GACd,GAAW,MAAQ,KACnB,GAAW,MAAQ,KACnB,CACE,WAAY,GAAW,KAAO,EAAU,EAAU,KAAK,CAAG,IAAA,GAC1D,WAAY,GAAW,KAAO,EAAU,EAAU,KAAK,CAAG,IAAA,GAC3D,CACF,CAED,EAAQ,GAAO,cAAgB,EAAO,MACtC,EAAQ,GAAO,WAAa,EAAO,iBAIrC,IAAK,GAAM,CAAE,WAAW,EAAY,CAClC,IAAM,EAAY,EAAQ,GACtB,EAAU,OAAO,OAAS,IAC5B,EAAU,cAAgB,EAAU,OAAO,GAC3C,EAAU,WAAa,EAAU,OAAO,SAAW,EAAI,EAAM,IAKnE,OAAO,EAWT,SAAgB,EACd,EACA,EACA,EAA0B,EAAE,CACf,CACb,GAAM,CACJ,kBAAkB,GAClB,qBAAqB,GACrB,yBAAyB,IACvB,EAEE,EAAY,EAAY,EAAM,EAAY,EAAQ,CAClD,EAAS,IAAI,IAMb,GAAgB,EAAe,IAC9B,EACD,EACK,EAAqB,EAAO,EAAI,CAElC,EAAa,IAAI,EAAM,CAJD,GAO/B,IAAK,IAAM,KAAS,EAEd,MAAM,SAIV,IAAI,MAEG,IAAM,KAAS,EAAM,OACnB,EAAa,EAAM,EACtB,EAAO,IAAI,EAAM,MAKjB,EAAM,gBAGH,EAAa,EAAM,cAAc,EACpC,EAAO,IAAI,EAAM,cAAc,EAMrC,GAAI,EAAM,eAAe,WAAY,CACnC,IAAM,EAAa,EAAM,eACrB,EAAM,eACN,EAAM,cAAc,MAAM,QAAS,GAAM,EAAW,UAAU,EAAE,CAAC,CACrE,IAAK,IAAM,KAAS,EACX,EAAa,EAAM,EACtB,EAAO,IAAI,EAAM,EAM3B,OAAO,EA4CT,SAAgB,EACd,EACA,EACA,EAA8B,EAAE,CACb,CACnB,GAAM,CACJ,kBAAkB,GAClB,qBAAqB,GACrB,yBAAyB,GACzB,cAAc,MACd,aAAa,MACb,aAAa,GACb,kBAAkB,GAClB,oBAAoB,IAClB,EAEE,EAAY,EAAY,EAAM,EAAY,EAAQ,CAClD,EAAqB,EAAE,CAMvB,GAAgB,EAAe,IAC9B,EACD,EACK,EAAqB,EAAO,EAAI,CAElC,EAAa,IAAI,EAAM,CAJD,GAO/B,IAAK,IAAM,KAAS,EAAW,CAE7B,GAAI,EAAM,SAAU,SAEpB,IAAI,EAAuB,EAAE,CAO7B,GANI,EACF,EAAa,EAAM,OACV,EAAM,gBACf,EAAa,CAAC,EAAM,cAAc,EAGhC,EAAiB,CACnB,IAAM,EAAM,EAAM,UAAY,GAC9B,GAAI,EAAI,OAAS,EAAG,CAClB,IAAM,EAAW,EAAoB,EAAI,aAAa,CAAG,EACzD,EAAa,CAAC,GAAG,EAAY,EAAS,EAI1C,IAAM,EAAS,CACb,GAAG,IAAI,IAAI,EAAW,OAAQ,GAAU,GAAS,CAAC,EAAa,EAAM,CAAC,CAAC,CACxE,CAEG,EAAO,OAAS,GAClB,EAAO,KAAK,EAAO,CAevB,MAAO,CAAE,SAAQ,MAXH,EACX,IAAK,GAAU,CACd,IAAM,EAAS,EAAM,KAAK,EAAW,CAIrC,OAHI,GAAc,EAAM,OAAS,EACxB,IAAI,EAAO,GAEb,GACP,CACD,OAAQ,GAAS,EAAK,OAAS,EAAE,CACjC,KAAK,EAAY,CAEI,CAqC1B,SAAgB,GACd,EACA,EACA,EACA,EAGI,EAAE,CACa,CACnB,IAAM,EAAQ,YAAY,KAAK,CAE3B,EACA,EAEJ,OAAQ,EAAR,CACE,IAAK,QAAS,CAEZ,IAAM,EAAS,EAAK,MAAM,MAAM,CAAC,OAAQ,GAAM,EAAE,OAAS,EAAE,CACtD,EAAmC,EAAE,CAE3C,IAAK,IAAM,KAAS,EAAQ,CAC1B,IAAM,EAAU,EAAM,QAAQ,oCAAqC,GAAG,CACtE,GAAI,EAAS,CACX,IAAM,EAAc,EAAW,UAAU,EAAQ,CACjD,EAAe,KAAK,CAClB,SAAU,EACV,KAAM,OACN,OAAQ,EACR,SAAU,GACV,cAAe,EAAY,GAC3B,WAAY,EAAY,SAAW,EAAI,EAAM,GAC9C,CAAC,EAGN,EAAY,EACZ,EAAS,IAAI,IAAI,EAAe,IAAK,GAAM,EAAE,cAAe,CAAC,OAAO,QAAQ,CAAC,CAC7E,MAGF,IAAK,YAEH,EAAY,EAAY,EAAM,EAAW,CACzC,EAAS,IAAI,IACX,EACG,OAAQ,GAAM,EAAE,OAAS,QAAU,EAAE,OAAO,OAAS,EAAE,CACvD,IAAK,GAAM,EAAE,OAAO,GAAG,CAC3B,CACD,MAGF,IAAK,gBAEH,EAAY,EAAY,EAAM,EAAY,CACxC,QAAS,EAAU,QACpB,CAAC,CACF,EAAS,EAAuB,EAAM,EAAY,CAChD,QAAS,EAAU,QACpB,CAAC,CACF,MAGF,IAAK,OAEH,EAAY,EAAY,EAAM,EAAY,CACxC,QAAS,EAAU,QACnB,iBAAkB,EAAU,iBAC7B,CAAC,CACF,EAAS,EAAuB,EAAM,EAAY,CAChD,QAAS,EAAU,QACnB,iBAAkB,EAAU,iBAC7B,CAAC,CACF,MAIJ,IAAM,EAAS,YAAY,KAAK,CAAG,EAG7B,EAAa,EAAU,OAAQ,GAAM,EAAE,OAAS,OAAO,CACvD,EAAY,EAAW,OAEvB,EAAkB,EAAW,OAAQ,GAGvC,EAAE,OAAO,OAAS,GAClB,EAAE,EAAE,OAAO,SAAW,GAAK,EAAE,OAAO,KAAO,EAAE,SAAS,aAAa,EAErE,CAAC,OAEG,EAAiB,EAAW,OAAQ,GAAM,EAAE,OAAO,OAAS,EAAE,CAAC,OAE/D,EAAc,EACjB,OAAQ,GAAM,EAAE,aAAe,IAAA,GAAU,CACzC,IAAK,GAAM,EAAE,WAAY,CACtB,EACJ,EAAY,OAAS,EACjB,EAAY,QAAQ,EAAG,IAAM,EAAI,EAAG,EAAE,CAAG,EAAY,OACrD,EAEA,EAAiB,EAAW,OAAQ,GAAM,EAAE,eAAe,WAAW,CAAC,OACvE,EAAkB,EAAU,OAAQ,GAAM,EAAE,SAAS,CAAC,OAE5D,MAAO,CACL,YACA,kBACA,SAAU,EAAY,EAAI,EAAkB,EAAY,EACxD,iBACA,cAAe,EAAY,EAAI,EAAiB,EAAY,EAC5D,gBACA,iBACA,kBACA,aAAc,EAAO,KACrB,SACD"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "lemma-is",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.6.0",
|
|
4
4
|
"description": "Icelandic word form to lemma lookup for browser and Node.js",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"icelandic",
|
|
@@ -27,6 +27,17 @@
|
|
|
27
27
|
"data-dist/lemma-is.core.bin",
|
|
28
28
|
"README.md"
|
|
29
29
|
],
|
|
30
|
+
"scripts": {
|
|
31
|
+
"build": "tsdown",
|
|
32
|
+
"build:data": "uv run python scripts/build-data.py",
|
|
33
|
+
"build:binary": "uv run python scripts/build-binary.py",
|
|
34
|
+
"build:core": "uv run python scripts/build-binary.py --no-bigrams --no-morph --top-words 350000 --output data-dist/lemma-is.core.bin",
|
|
35
|
+
"benchmark:core-sweep": "node --import=tsx scripts/benchmark/core-sweep.ts",
|
|
36
|
+
"test": "NODE_OPTIONS='--max-old-space-size=8192' vitest run",
|
|
37
|
+
"test:watch": "NODE_OPTIONS='--max-old-space-size=8192' vitest",
|
|
38
|
+
"typecheck": "tsc --noEmit",
|
|
39
|
+
"serve": "python3 -m http.server 8080"
|
|
40
|
+
},
|
|
30
41
|
"devDependencies": {
|
|
31
42
|
"@types/node": "^22.0.0",
|
|
32
43
|
"tsdown": "^0.20.1",
|
|
@@ -34,21 +45,11 @@
|
|
|
34
45
|
"typescript": "^5.9.3",
|
|
35
46
|
"vitest": "^4.0.18"
|
|
36
47
|
},
|
|
48
|
+
"packageManager": "pnpm@10.10.0",
|
|
37
49
|
"dependencies": {
|
|
38
50
|
"dawg-lookup": "^2.2.1",
|
|
39
51
|
"dawg-set": "^0.0.0",
|
|
40
52
|
"tokenize-is": "^0.1.0",
|
|
41
53
|
"trie-mapping": "^4.0.0"
|
|
42
|
-
},
|
|
43
|
-
"scripts": {
|
|
44
|
-
"build": "tsdown",
|
|
45
|
-
"build:data": "uv run python scripts/build-data.py",
|
|
46
|
-
"build:binary": "uv run python scripts/build-binary.py",
|
|
47
|
-
"build:core": "uv run python scripts/build-binary.py --no-bigrams --no-morph --top-words 350000 --output data-dist/lemma-is.core.bin",
|
|
48
|
-
"benchmark:core-sweep": "node --import=tsx scripts/benchmark/core-sweep.ts",
|
|
49
|
-
"test": "NODE_OPTIONS='--max-old-space-size=8192' vitest run",
|
|
50
|
-
"test:watch": "NODE_OPTIONS='--max-old-space-size=8192' vitest",
|
|
51
|
-
"typecheck": "tsc --noEmit",
|
|
52
|
-
"serve": "python3 -m http.server 8080"
|
|
53
54
|
}
|
|
54
|
-
}
|
|
55
|
+
}
|