tokenize-is 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -67,9 +67,24 @@ All tokens have a `kind` discriminator for TypeScript narrowing:
67
67
  tokenize(text, {
68
68
  replaceCompositeGlyphs: true, // Normalize Unicode (a + ́ → á)
69
69
  includeSentenceMarkers: false, // Add s_begin/s_end tokens
70
+ includeOffsets: false, // Add span.start/end character offsets
70
71
  });
71
72
  ```
72
73
 
74
+ ### Token Offsets
75
+
76
+ When `includeOffsets: true`, each token includes a `span` with character positions:
77
+
78
+ ```typescript
79
+ const tokens = tokenize("Halló heimur", { includeOffsets: true });
80
+ // tokens[0] = { kind: "word", text: "Halló", span: { start: 0, end: 5 } }
81
+ // tokens[1] = { kind: "word", text: "heimur", span: { start: 6, end: 12 } }
82
+
83
+ // Extract original text from spans
84
+ const text = "Halló heimur";
85
+ text.slice(tokens[0].span.start, tokens[0].span.end); // "Halló"
86
+ ```
87
+
73
88
  ## Port Fidelity
74
89
 
75
90
  This is a TypeScript port of [Miðeind's Tokenizer](https://github.com/mideind/Tokenizer) (MIT licensed).
package/dist/index.d.mts CHANGED
@@ -11,16 +11,19 @@ type Token = WordToken | NumberToken | OrdinalToken | TimeToken | DateToken | Da
11
11
  interface WordToken {
12
12
  kind: "word";
13
13
  text: string;
14
+ span?: TokenSpan;
14
15
  }
15
16
  interface NumberToken {
16
17
  kind: "number";
17
18
  text: string;
18
19
  value: number;
20
+ span?: TokenSpan;
19
21
  }
20
22
  interface OrdinalToken {
21
23
  kind: "ordinal";
22
24
  text: string;
23
25
  value: number;
26
+ span?: TokenSpan;
24
27
  }
25
28
  interface TimeToken {
26
29
  kind: "time";
@@ -28,6 +31,7 @@ interface TimeToken {
28
31
  hour: number;
29
32
  minute: number;
30
33
  second: number;
34
+ span?: TokenSpan;
31
35
  }
32
36
  interface DateToken {
33
37
  kind: "date";
@@ -35,6 +39,7 @@ interface DateToken {
35
39
  year: number;
36
40
  month: number;
37
41
  day: number;
42
+ span?: TokenSpan;
38
43
  }
39
44
  interface DateAbsToken {
40
45
  kind: "dateabs";
@@ -42,6 +47,7 @@ interface DateAbsToken {
42
47
  year: number;
43
48
  month: number;
44
49
  day: number;
50
+ span?: TokenSpan;
45
51
  }
46
52
  interface DateRelToken {
47
53
  kind: "daterel";
@@ -49,85 +55,102 @@ interface DateRelToken {
49
55
  year: number;
50
56
  month: number;
51
57
  day: number;
58
+ span?: TokenSpan;
52
59
  }
53
60
  interface YearToken {
54
61
  kind: "year";
55
62
  text: string;
56
63
  value: number;
64
+ span?: TokenSpan;
57
65
  }
58
66
  interface AmountToken {
59
67
  kind: "amount";
60
68
  text: string;
61
69
  value: number;
62
70
  currency: string;
71
+ span?: TokenSpan;
63
72
  }
64
73
  interface CurrencyToken {
65
74
  kind: "currency";
66
75
  text: string;
67
76
  iso: string;
77
+ span?: TokenSpan;
68
78
  }
69
79
  interface MeasurementToken {
70
80
  kind: "measurement";
71
81
  text: string;
72
82
  value: number;
73
83
  unit: string;
84
+ span?: TokenSpan;
74
85
  }
75
86
  interface PercentToken {
76
87
  kind: "percent";
77
88
  text: string;
78
89
  value: number;
90
+ span?: TokenSpan;
79
91
  }
80
92
  interface PunctuationToken {
81
93
  kind: "punctuation";
82
94
  text: string;
83
95
  normalized: string;
84
96
  position: PunctuationType;
97
+ span?: TokenSpan;
85
98
  }
86
99
  interface UrlToken {
87
100
  kind: "url";
88
101
  text: string;
102
+ span?: TokenSpan;
89
103
  }
90
104
  interface DomainToken {
91
105
  kind: "domain";
92
106
  text: string;
107
+ span?: TokenSpan;
93
108
  }
94
109
  interface EmailToken {
95
110
  kind: "email";
96
111
  text: string;
112
+ span?: TokenSpan;
97
113
  }
98
114
  interface HashtagToken {
99
115
  kind: "hashtag";
100
116
  text: string;
117
+ span?: TokenSpan;
101
118
  }
102
119
  interface UsernameToken {
103
120
  kind: "username";
104
121
  text: string;
105
122
  username: string;
123
+ span?: TokenSpan;
106
124
  }
107
125
  interface NumberWithLetterToken {
108
126
  kind: "numwletter";
109
127
  text: string;
110
128
  value: number;
111
129
  letter: string;
130
+ span?: TokenSpan;
112
131
  }
113
132
  interface TelnoToken {
114
133
  kind: "telno";
115
134
  text: string;
116
135
  cc: string;
117
136
  number: string;
137
+ span?: TokenSpan;
118
138
  }
119
139
  interface MoleculeToken {
120
140
  kind: "molecule";
121
141
  text: string;
142
+ span?: TokenSpan;
122
143
  }
123
144
  interface SsnToken {
124
145
  kind: "ssn";
125
146
  text: string;
126
147
  value: string;
148
+ span?: TokenSpan;
127
149
  }
128
150
  interface SerialNumberToken {
129
151
  kind: "serialnumber";
130
152
  text: string;
153
+ span?: TokenSpan;
131
154
  }
132
155
  interface TimestampToken {
133
156
  kind: "timestamp";
@@ -138,6 +161,7 @@ interface TimestampToken {
138
161
  hour: number;
139
162
  minute: number;
140
163
  second: number;
164
+ span?: TokenSpan;
141
165
  }
142
166
  interface TimestampAbsToken {
143
167
  kind: "timestampabs";
@@ -148,6 +172,7 @@ interface TimestampAbsToken {
148
172
  hour: number;
149
173
  minute: number;
150
174
  second: number;
175
+ span?: TokenSpan;
151
176
  }
152
177
  interface TimestampRelToken {
153
178
  kind: "timestamprel";
@@ -158,34 +183,42 @@ interface TimestampRelToken {
158
183
  hour: number;
159
184
  minute: number;
160
185
  second: number;
186
+ span?: TokenSpan;
161
187
  }
162
188
  interface CompanyToken {
163
189
  kind: "company";
164
190
  text: string;
191
+ span?: TokenSpan;
165
192
  }
166
193
  interface PersonToken {
167
194
  kind: "person";
168
195
  text: string;
196
+ span?: TokenSpan;
169
197
  }
170
198
  interface EntityToken {
171
199
  kind: "entity";
172
200
  text: string;
201
+ span?: TokenSpan;
173
202
  }
174
203
  interface UnknownToken {
175
204
  kind: "unknown";
176
205
  text: string;
206
+ span?: TokenSpan;
177
207
  }
178
208
  interface SentenceBeginToken {
179
209
  kind: "s_begin";
180
210
  text: null;
211
+ span?: TokenSpan;
181
212
  }
182
213
  interface SentenceEndToken {
183
214
  kind: "s_end";
184
215
  text: null;
216
+ span?: TokenSpan;
185
217
  }
186
218
  interface SentenceSplitToken {
187
219
  kind: "s_split";
188
220
  text: null;
221
+ span?: TokenSpan;
189
222
  }
190
223
  /**
191
224
  * Options for the tokenize function
@@ -201,6 +234,21 @@ interface TokenizeOptions {
201
234
  * @default false
202
235
  */
203
236
  includeSentenceMarkers?: boolean;
237
+ /**
238
+ * Whether to include character offsets (start/end) on each token.
239
+ * Useful for highlighting matched terms in original text.
240
+ * @default false
241
+ */
242
+ includeOffsets?: boolean;
243
+ }
244
+ /**
245
+ * Token span in original text. Only present when includeOffsets is true.
246
+ */
247
+ interface TokenSpan {
248
+ /** Start offset (inclusive) in original text */
249
+ start: number;
250
+ /** End offset (exclusive) in original text */
251
+ end: number;
204
252
  }
205
253
  //#endregion
206
254
  //#region src/tokenize.d.ts
@@ -249,5 +297,5 @@ declare function splitIntoSentences(text: string): string[];
249
297
  */
250
298
  declare function normalizeUnicode(text: string): string;
251
299
  //#endregion
252
- export { type AmountToken, type CompanyToken, type CurrencyToken, type DateAbsToken, type DateRelToken, type DateToken, type DomainToken, type EmailToken, type EntityToken, type HashtagToken, type MeasurementToken, type MoleculeToken, type NumberToken, type NumberWithLetterToken, type OrdinalToken, type PercentToken, type PersonToken, type PunctuationToken, type PunctuationType, type SentenceBeginToken, type SentenceEndToken, type SentenceSplitToken, type SerialNumberToken, type SsnToken, type TelnoToken, type TimeToken, type TimestampAbsToken, type TimestampRelToken, type TimestampToken, type Token, type TokenizeOptions, type UnknownToken, type UrlToken, type UsernameToken, type WordToken, type YearToken, normalizeUnicode, splitIntoSentences, tokenize };
300
+ export { type AmountToken, type CompanyToken, type CurrencyToken, type DateAbsToken, type DateRelToken, type DateToken, type DomainToken, type EmailToken, type EntityToken, type HashtagToken, type MeasurementToken, type MoleculeToken, type NumberToken, type NumberWithLetterToken, type OrdinalToken, type PercentToken, type PersonToken, type PunctuationToken, type PunctuationType, type SentenceBeginToken, type SentenceEndToken, type SentenceSplitToken, type SerialNumberToken, type SsnToken, type TelnoToken, type TimeToken, type TimestampAbsToken, type TimestampRelToken, type TimestampToken, type Token, type TokenSpan, type TokenizeOptions, type UnknownToken, type UrlToken, type UsernameToken, type WordToken, type YearToken, normalizeUnicode, splitIntoSentences, tokenize };
253
301
  //# sourceMappingURL=index.d.mts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.mts","names":[],"sources":["../src/types.ts","../src/tokenize.ts","../src/split-sentences.ts","../src/pipeline/lexer.ts"],"mappings":";;AAGA;;KAAY,eAAA;;;AAMZ;;KAAY,KAAA,GACR,SAAA,GACA,WAAA,GACA,YAAA,GACA,SAAA,GACA,SAAA,GACA,YAAA,GACA,YAAA,GACA,SAAA,GACA,WAAA,GACA,aAAA,GACA,gBAAA,GACA,YAAA,GACA,gBAAA,GACA,QAAA,GACA,WAAA,GACA,UAAA,GACA,YAAA,GACA,aAAA,GACA,qBAAA,GACA,UAAA,GACA,aAAA,GACA,QAAA,GACA,iBAAA,GACA,cAAA,GACA,iBAAA,GACA,iBAAA,GACA,YAAA,GACA,WAAA,GACA,WAAA,GACA,YAAA,GACA,kBAAA,GACA,gBAAA,GACA,kBAAA;AAAA,UAEa,SAAA;EACf,IAAA;EACA,IAAA;AAAA;AAAA,UAGe,WAAA;EACf,IAAA;EACA,IAAA;EACA,KAAA;AAAA;AAAA,UAGe,YAAA;EACf,IAAA;EACA,IAAA;EACA,KAAA;AAAA;AAAA,UAGe,SAAA;EACf,IAAA;EACA,IAAA;EACA,IAAA;EACA,MAAA;EACA,MAAA;AAAA;AAAA,UAGe,SAAA;EACf,IAAA;EACA,IAAA;EACA,IAAA;EACA,KAAA;EACA,GAAA;AAAA;AAAA,UAGe,YAAA;EACf,IAAA;EACA,IAAA;EACA,IAAA;EACA,KAAA;EACA,GAAA;AAAA;AAAA,UAGe,YAAA;EACf,IAAA;EACA,IAAA;EACA,IAAA;EACA,KAAA;EACA,GAAA;AAAA;AAAA,UAGe,SAAA;EACf,IAAA;EACA,IAAA;EACA,KAAA;AAAA;AAAA,UAGe,WAAA;EACf,IAAA;EACA,IAAA;EACA,KAAA;EACA,QAAA;AAAA;AAAA,UAGe,aAAA;EACf,IAAA;EACA,IAAA;EACA,GAAA;AAAA;AAAA,UAGe,gBAAA;EACf,IAAA;EACA,IAAA;EACA,KAAA;EACA,IAAA;AAAA;AAAA,UAGe,YAAA;EACf,IAAA;EACA,IAAA;EACA,KAAA;AAAA;AAAA,UAGe,gBAAA;EACf,IAAA;EACA,IAAA;EACA,UAAA;EACA,QAAA,EAAU,eAAA;AAAA;AAAA,UAGK,QAAA;EACf,IAAA;EACA,IAAA;AAAA;AAAA,UAGe,WAAA;EACf,IAAA;EACA,IAAA;AAAA;AAAA,UAGe,UAAA;EACf,IAAA;EACA,IAAA;AAAA;AAAA,UAGe,YAAA;EACf,IAAA;EACA,IAAA;AAAA;AAAA,UAGe,aAAA;EACf,IAAA;EACA,IAAA;EACA,QAAA;AAAA;AAAA,UAGe,qBAAA;EACf,IAAA;EACA,IAAA;EACA,KAAA;EACA,MAAA;AAAA;AAAA,UAGe,UAAA;EACf,IAAA;EACA,IAAA;EACA,EAAA;EACA,MAAA;AAAA;AAAA,UAGe,aAAA;EACf,IAAA;EACA,IAAA;AAAA;AAAA,UAGe,QAAA;EACf,IAAA;EACA,IAAA;EACA,KAAA;AAAA;AAAA,UAGe,iBAAA;EACf,IAAA;EACA,IAAA;AAAA;AAAA,UAGe,cAAA;EACf,IAAA;EACA,IAAA;EACA,IAAA;EACA,KAAA;EACA,GAAA;EACA,IAAA;EACA,MAAA;EACA,MAAA;AAAA;AAAA,UAGe,iBAAA;EACf,IAAA;EACA,IAAA;EACA,IAAA;EACA,KAAA;EACA,GAAA;EACA,IAAA;EACA,MAAA;EACA,MAAA;AAAA;AAAA,UAGe,iBAAA;EACf,IAAA;EACA,IAAA;EACA,IAAA;EACA,KAAA;EACA,GAAA;EACA,IAAA;EACA,MAAA;EACA,MAAA;AAAA;AAAA,UAGe,YAAA;EACf,IAAA;EACA,IAAA;AAAA;AAAA,UAGe,WAAA;EACf,IAAA;EACA,IAAA;AAAA;AAAA,UAGe,WAAA;EACf,IAAA;EACA,IAAA;AAAA;AAAA,UAGe,YAAA;EACf,IAAA;EACA,IAAA;AAAA;AAAA,UAGe,kBAAA;EACf,IAAA;EACA,IAAA;AAAA;AAAA,UAGe,gBAAA;EACf,IAAA;EACA,IAAA;AAAA;AAAA,UAGe,kBAAA;EACf,IAAA;EACA,IAAA;AAAA;;;;UAMe,eAAA;EAzIV;AAGP;;;EA2IE,sBAAA;EA1IA;;;;EAgJA,sBAAA;AAAA;;;AArQF;;;;;;;;;;;;;;;;;AAAA,iBCoBgB,QAAA,CAAS,IAAA,UAAc,OAAA,GAAS,eAAA,GAAuB,KAAA;;;;AD1BvE;;;;;AAMA;;;;;;;;;;;iBEYgB,kBAAA,CAAmB,IAAA;;;;;;iBCsCnB,gBAAA,CAAiB,IAAA"}
1
+ {"version":3,"file":"index.d.mts","names":[],"sources":["../src/types.ts","../src/tokenize.ts","../src/split-sentences.ts","../src/pipeline/lexer.ts"],"mappings":";;AAGA;;KAAY,eAAA;;;AAMZ;;KAAY,KAAA,GACR,SAAA,GACA,WAAA,GACA,YAAA,GACA,SAAA,GACA,SAAA,GACA,YAAA,GACA,YAAA,GACA,SAAA,GACA,WAAA,GACA,aAAA,GACA,gBAAA,GACA,YAAA,GACA,gBAAA,GACA,QAAA,GACA,WAAA,GACA,UAAA,GACA,YAAA,GACA,aAAA,GACA,qBAAA,GACA,UAAA,GACA,aAAA,GACA,QAAA,GACA,iBAAA,GACA,cAAA,GACA,iBAAA,GACA,iBAAA,GACA,YAAA,GACA,WAAA,GACA,WAAA,GACA,YAAA,GACA,kBAAA,GACA,gBAAA,GACA,kBAAA;AAAA,UAEa,SAAA;EACf,IAAA;EACA,IAAA;EACA,IAAA,GAAO,SAAA;AAAA;AAAA,UAGQ,WAAA;EACf,IAAA;EACA,IAAA;EACA,KAAA;EACA,IAAA,GAAO,SAAA;AAAA;AAAA,UAGQ,YAAA;EACf,IAAA;EACA,IAAA;EACA,KAAA;EACA,IAAA,GAAO,SAAA;AAAA;AAAA,UAGQ,SAAA;EACf,IAAA;EACA,IAAA;EACA,IAAA;EACA,MAAA;EACA,MAAA;EACA,IAAA,GAAO,SAAA;AAAA;AAAA,UAGQ,SAAA;EACf,IAAA;EACA,IAAA;EACA,IAAA;EACA,KAAA;EACA,GAAA;EACA,IAAA,GAAO,SAAA;AAAA;AAAA,UAGQ,YAAA;EACf,IAAA;EACA,IAAA;EACA,IAAA;EACA,KAAA;EACA,GAAA;EACA,IAAA,GAAO,SAAA;AAAA;AAAA,UAGQ,YAAA;EACf,IAAA;EACA,IAAA;EACA,IAAA;EACA,KAAA;EACA,GAAA;EACA,IAAA,GAAO,SAAA;AAAA;AAAA,UAGQ,SAAA;EACf,IAAA;EACA,IAAA;EACA,KAAA;EACA,IAAA,GAAO,SAAA;AAAA;AAAA,UAGQ,WAAA;EACf,IAAA;EACA,IAAA;EACA,KAAA;EACA,QAAA;EACA,IAAA,GAAO,SAAA;AAAA;AAAA,UAGQ,aAAA;EACf,IAAA;EACA,IAAA;EACA,GAAA;EACA,IAAA,GAAO,SAAA;AAAA;AAAA,UAGQ,gBAAA;EACf,IAAA;EACA,IAAA;EACA,KAAA;EACA,IAAA;EACA,IAAA,GAAO,SAAA;AAAA;AAAA,UAGQ,YAAA;EACf,IAAA;EACA,IAAA;EACA,KAAA;EACA,IAAA,GAAO,SAAA;AAAA;AAAA,UAGQ,gBAAA;EACf,IAAA;EACA,IAAA;EACA,UAAA;EACA,QAAA,EAAU,eAAA;EACV,IAAA,GAAO,SAAA;AAAA;AAAA,UAGQ,QAAA;EACf,IAAA;EACA,IAAA;EACA,IAAA,GAAO,SAAA;AAAA;AAAA,UAGQ,WAAA;EACf,IAAA;EACA,IAAA;EACA,IAAA,GAAO,SAAA;AAAA;AAAA,UAGQ,UAAA;EACf,IAAA;EACA,IAAA;EACA,IAAA,GAAO,SAAA;AAAA;AAAA,UAGQ,YAAA;EACf,IAAA;EACA,IAAA;EACA,IAAA,GAAO,SAAA;AAAA;AAAA,UAGQ,aAAA;EACf,IAAA;EACA,IAAA;EACA,QAAA;EACA,IAAA,GAAO,SAAA;AAAA;AAAA,UAGQ,qBAAA;EACf,IAAA;EACA,IAAA;EACA,KAAA;EACA,MAAA;EACA,IAAA,GAAO,SAAA;AAAA;AAAA,UAGQ,UAAA;EACf,IAAA;EACA,IAAA;EACA,EAAA;EACA,MAAA;EACA,IAAA,GAAO,SAAA;AAAA;AAAA,UAGQ,aAAA;EACf,IAAA;EACA,IAAA;EACA,IAAA,GAAO,SAAA;AAAA;AAAA,UAGQ,QAAA;EACf,IAAA;EACA,IAAA;EACA,KAAA;EACA,IAAA,GAAO,SAAA;AAAA;AAAA,UAGQ,iBAAA;EACf,IAAA;EACA,IAAA;EACA,IAAA,GAAO,SAAA;AAAA;AAAA,UAGQ,cAAA;EACf,IAAA;EACA,IAAA;EACA,IAAA;EACA,KAAA;EACA,GAAA;EACA,IAAA;EACA,MAAA;EACA,MAAA;EACA,IAAA,GAAO,SAAA;AAAA;AAAA,UAGQ,iBAAA;EACf,IAAA;EACA,IAAA;EACA,IAAA;EACA,KAAA;EACA,GAAA;EACA,IAAA;EACA,MAAA;EACA,MAAA;EACA,IAAA,GAAO,SAAA;AAAA;AAAA,UAGQ,iBAAA;EACf,IAAA;EACA,IAAA;EACA,IAAA;EACA,KAAA;EACA,GAAA;EACA,IAAA;EACA,MAAA;EACA,MAAA;EACA,IAAA,GAAO,SAAA;AAAA;AAAA,UAGQ,YAAA;EACf,IAAA;EACA,IAAA;EACA,IAAA,GAAO,SAAA;AAAA;AAAA,UAGQ,WAAA;EACf,IAAA;EACA,IAAA;EACA,IAAA,GAAO,SAAA;AAAA;AAAA,UAGQ,WAAA;EACf,IAAA;EACA,IAAA;EACA,IAAA,GAAO,SAAA;AAAA;AAAA,UAGQ,YAAA;EACf,IAAA;EACA,IAAA;EACA,IAAA,GAAO,SAAA;AAAA;AAAA,UAGQ,kBAAA;EACf,IAAA;EACA,IAAA;EACA,IAAA,GAAO,SAAA;AAAA;AAAA,UAGQ,gBAAA;EACf,IAAA;EACA,IAAA;EACA,IAAA,GAAO,SAAA;AAAA;AAAA,UAGQ,kBAAA;EACf,IAAA;EACA,IAAA;EACA,IAAA,GAAO,SAAA;AAAA;;;;UAMQ,eAAA;EAzJf;;;;EA8JA,sBAAA;EA3JO;;;AAGT;EA8JE,sBAAA;;;;;;EAOA,cAAA;AAAA;;AA/JF;;UAqKiB,SAAA;EAlKC;EAoKhB,KAAA;EArKA;EAuKA,GAAA;AAAA;;;AAvTF;;;;;;;;;;;;;;;;;AAAA,iBCoBgB,QAAA,CAAS,IAAA,UAAc,OAAA,GAAS,eAAA,GAAuB,KAAA;;;;AD1BvE;;;;;AAMA;;;;;;;;;;;iBEYgB,kBAAA,CAAmB,IAAA;;;;;;iBCsCnB,gBAAA,CAAiB,IAAA"}
package/dist/index.mjs CHANGED
@@ -1,2 +1,2 @@
1
- const e={á:`á`,é:`é`,í:`í`,ó:`ó`,ú:`ú`,ý:`ý`,Á:`Á`,É:`É`,Í:`Í`,Ó:`Ó`,Ú:`Ú`,Ý:`Ý`,ä:`ä`,ë:`ë`,ö:`ö`,ü:`ü`,Ä:`Ä`,Ë:`Ë`,Ö:`Ö`,Ü:`Ü`,"­":``,"​":``,"":``},t=`-–—`,n=`([„‚«#$€£¥₽<`;``+t,n+``;const r=new Set(`([„‚«#$€£¥₽<"*•&+=@©|.,:;)]!%‰?»“’‛‘…>°^/±'´~\\-–—`),i=new Set(n),a=new Set(`.,:;)]!%‰?»“’‛‘…>°`),o=new Set(`^/±'´~\\-–—`),s=`'‚‛’´`,c=`"“„”«»`,l=new Set([`.`,`?`,`!`,`…`]),u=new Set([`)`,`]`,`“`,`»`,`”`,`’`,`"`,`[…]`]),d=new Set([`?`,`!`,`…`]),f=new Set(`0123456789`),p=new Set([`+`,`-`]),m={janúar:1,janúars:1,febrúar:2,febrúars:2,mars:3,apríl:4,apríls:4,maí:5,maís:5,júní:6,júnís:6,júlí:7,júlís:7,ágúst:8,ágústs:8,september:9,septembers:9,október:10,októbers:10,nóvember:11,nóvembers:11,desember:12,desembers:12,"jan.":1,"feb.":2,"mar.":3,"apr.":4,"jún.":6,"júl.":7,"ág.":8,"ágú.":8,"sep.":9,"sept.":9,"okt.":10,"nóv.":11,"des.":12,jan:1,feb:2,mar:3,apr:4,jún:6,júl:7,ág:8,ágú:8,sep:9,sept:9,okt:10,nóv:11,des:12},h=new Set([`Ágúst`]),g=[0,31,29,31,30,31,30,31,31,30,31,30,31],_=new Set([`kl`,`kl.`,`klukkan`]),ee={eitt:[1,0,0],tvö:[2,0,0],þrjú:[3,0,0],fjögur:[4,0,0],fimm:[5,0,0],sex:[6,0,0],sjö:[7,0,0],átta:[8,0,0],níu:[9,0,0],tíu:[10,0,0],ellefu:[11,0,0],tólf:[12,0,0],hálfeitt:[12,30,0],hálftvö:[1,30,0],hálfþrjú:[2,30,0],hálffjögur:[3,30,0],hálffimm:[4,30,0],hálfsex:[5,30,0],hálfsjö:[6,30,0],hálfátta:[7,30,0],hálfníu:[8,30,0],hálftíu:[9,30,0],hálfellefu:[10,30,0],hálftólf:[11,30,0]},v=new Set([`e.Kr`,`e.Kr.`]),y=new Set([`f.Kr`,`f.Kr.`]);new Set([...v,...y]);const b={$:`USD`,"€":`EUR`,"£":`GBP`,"¥":`JPY`,"₽":`RUB`},x=new Set([`ISK`,`DKK`,`NOK`,`SEK`,`GBP`,`USD`,`EUR`,`CAD`,`AUD`,`CHF`,`JPY`,`PLN`,`RUB`,`CZK`,`INR`,`CNY`,`RMB`,`HKD`,`NZD`,`SGD`,`MXN`,`ZAR`]),S={"kr.":1,kr:1,krónur:1,"þ.kr.":1e3,"þ.kr":1e3,"þús.kr.":1e3,"þús.kr":1e3,"m.kr.":1e6,"m.kr":1e6,"mkr.":1e6,mkr:1e6,"millj.kr.":1e6,"millj.kr":1e6,"ma.kr.":1e9,"ma.kr":1e9,"mlja.kr.":1e9,"mlja.kr":1e9},C={m:[`m`,1],mm:[`m`,.001],μm:[`m`,1e-6],cm:[`m`,.01],sm:[`m`,.01],km:[`m`,1e3],ft:[`m`,.3048],mi:[`m`,1609.34],"m²":[`m²`,1],fm:[`m²`,1],"km²":[`m²`,1e6],"cm²":[`m²`,.01],ha:[`m²`,1e4],"m³":[`m³`,1],"cm³":[`m³`,1e-6],"km³":[`m³`,1e9],l:[`m³`,.001],ltr:[`m³`,.001],dl:[`m³`,1e-4],cl:[`m³`,1e-5],ml:[`m³`,1e-6],gal:[`m³`,.00378541],bbl:[`m³`,.158987294928],K:[`K`,1],"°K":[`K`,1],g:[`kg`,.001],gr:[`kg`,.001],kg:[`kg`,1],t:[`kg`,1e3],mg:[`kg`,1e-6],μg:[`kg`,1e-9],tn:[`kg`,1e3],lb:[`kg`,.453592],s:[`s`,1],ms:[`s`,.001],μs:[`s`,1e-6],klst:[`s`,3600],mín:[`s`,60],N:[`N`,1],kN:[`N`,1e3],Nm:[`J`,1],J:[`J`,1],kJ:[`J`,1e3],MJ:[`J`,1e6],GJ:[`J`,1e9],TJ:[`J`,0xe8d4a51000],kWh:[`J`,36e5],MWh:[`J`,36e8],kWst:[`J`,36e5],MWst:[`J`,36e8],kcal:[`J`,4184],cal:[`J`,4.184],W:[`W`,1],mW:[`W`,.001],kW:[`W`,1e3],MW:[`W`,1e6],GW:[`W`,1e9],TW:[`W`,0xe8d4a51000],V:[`V`,1],mV:[`V`,.001],kV:[`V`,1e3],A:[`A`,1],mA:[`A`,.001],Hz:[`Hz`,1],kHz:[`Hz`,1e3],MHz:[`Hz`,1e6],GHz:[`Hz`,1e9],Pa:[`Pa`,1],hPa:[`Pa`,100],"°":[`°`,1],"%":[`%`,1],"‰":[`‰`,.1]},w=new Set(Object.keys(C));function T(){let e=Object.keys(C).sort((e,t)=>t.length-e.length).map(e=>{let t=e.replace(/[.*+?^${}()|[\]\\]/g,`\\$&`);return e[e.length-1].match(/[a-zA-Z]/)?`${t}(?!\\w)`:t});return RegExp(`^(${e.join(`|`)})`,`u`)}const E=T();function te(){let e=Object.keys(b).sort((e,t)=>t.length-e.length).map(e=>e.replace(/[.*+?^${}()|[\]\\]/g,`\\$&`));return RegExp(`^(${e.join(`|`)})`,`u`)}te();function ne(){let e=[...Object.keys(C),...Object.keys(b)].sort((e,t)=>t.length-e.length).map(e=>{let t=e.replace(/[.*+?^${}()|[\]\\]/g,`\\$&`);return e[e.length-1].match(/[a-zA-Z]/)?`${t}(?!\\w)`:t});return RegExp(`(${e.join(`|`)})$`)}ne();const re=/^(\d{1,2}):(\d{2}):(\d{2}),(\d{2})(?!\d)/,ie=/^(\d{1,2}):(\d{2}):(\d{2})(?!\d)/,ae=/^(\d{1,2}):(\d{2})(?!\d)/,oe=/^(\d{4})[-/](\d{2})[-/](\d{2})(?!\d)/,se=/^(\d{1,2})[./-](\d{1,2})[./-](\d{2,4})(?!\d)/,ce=/^(\d{2})\.(\d{2})(?!\d)/,D=/^(\d{2})[.-](\d{4})(?!\d)/,O=/^(\d+)([a-zA-Z])(?!\w)/u,k=/^[^@\s]+@[^@\s]+(\.[^@\s.,/:;"()%#!?]+)+/,A=/^(https?:\/\/|ftp:\/\/|file:\/\/|mailto:|www\.)/i,j=/^[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(\.[a-zA-Z]{2,})+/,M=/^#\w+/u,N=/^@[0-9a-z_]+/i,P=/^M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$/;function F(e){return P.test(e)}const I=/^\+(\d{1,3})[-\s]?(\d{3})[-\s]?(\d{4})(?!\d)/,L=/^[A-Z][a-z]?\d*(?:[A-Z][a-z]?\d*)+$/,R=/^(\d{6})-?(\d{4})(?!\d)/,z=/^(\d+)-(\d+)(?:-\d+)*(?!\d)/,B=/^(\d{4})[-/](\d{2})[-/](\d{2})[T\s](\d{1,2}):(\d{2}):(\d{2})(?!\d)/,V=/^(\d{4})[-/](\d{2})[-/](\d{2})[T\s](\d{1,2}):(\d{2})(?!\d)/,H=/^\[company[:\s]([^\]]+)\]/i,U=/^\[person[:\s]([^\]]+)\]/i,W=/^\[entity[:\s]([^\]]+)\]/i,G=new Set(`H.He.Li.Be.B.C.N.O.F.Ne.Na.Mg.Al.Si.P.S.Cl.Ar.K.Ca.Sc.Ti.V.Cr.Mn.Fe.Co.Ni.Cu.Zn.Ga.Ge.As.Se.Br.Kr.Rb.Sr.Y.Zr.Nb.Mo.Tc.Ru.Rh.Pd.Ag.Cd.In.Sn.Sb.Te.I.Xe.Cs.Ba.La.Ce.Pr.Nd.Pm.Sm.Eu.Gd.Tb.Dy.Ho.Er.Tm.Yb.Lu.Hf.Ta.W.Re.Os.Ir.Pt.Au.Hg.Tl.Pb.Bi.Po.At.Rn.Fr.Ra.Ac.Th.Pa.U.Np.Pu.Am.Cm.Bk.Cf.Es.Fm.Md.No.Lr.Rf.Db.Sg.Bh.Hs.Mt.Ds.Rg.Cn.Nh.Fl.Mc.Lv.Ts.Og`.split(`.`));function le(e){if(e.length!==10||!/^\d{10}$/.test(e))return!1;let t=parseInt(e[0],10),n=parseInt(e[1],10),r=parseInt(e[2],10),i=parseInt(e[3],10),a=parseInt(e[4],10),o=parseInt(e[5],10),s=parseInt(e[6],10),c=parseInt(e[7],10),l=parseInt(e[8],10),u=parseInt(e[9],10);if(u!==9&&u!==0)return!1;let d=t*10+n,f=r*10+i,p=d>40?d-40:d;if(p<1||p>31||f<1||f>12)return!1;let m=(3*t+2*n+7*r+6*i+5*a+4*o+3*s+2*c)%11,h=m===0?0:11-m;return h===10?!1:l===h}function K(e){if(!L.test(e))return!1;let t=/([A-Z][a-z]?)(\d*)/g,n,r=0;for(;(n=t.exec(e))!==null;){let e=n[1];if(!G.has(e))return!1;r++}return r>=2}function q(t){let n=t;for(let[t,r]of Object.entries(e))n=n.replaceAll(t,r);return n}function J(e,t,n){return!(e<1776||e>2100||t<1||t>12||n<1||n>g[t]||t===2&&n===29&&!(e%4==0&&e%100!=0||e%400==0))}function ue(e){return i.has(e)?`left`:a.has(e)?`right`:o.has(e)?`none`:`center`}function Y(e,t){let n=t??e;return{kind:`punctuation`,text:e,normalized:n,position:n.length===1?ue(n):`center`}}function X(e){let t=B.exec(e);if(t){let e=parseInt(t[1],10),n=parseInt(t[2],10),r=parseInt(t[3],10),i=parseInt(t[4],10),a=parseInt(t[5],10),o=parseInt(t[6],10);if(J(e,n,r)&&i>=0&&i<24&&a>=0&&a<60&&o>=0&&o<60)return[{kind:`timestamp`,text:t[0],year:e,month:n,day:r,hour:i,minute:a,second:o},t[0].length]}if(t=V.exec(e),t){let e=parseInt(t[1],10),n=parseInt(t[2],10),r=parseInt(t[3],10),i=parseInt(t[4],10),a=parseInt(t[5],10);if(J(e,n,r)&&i>=0&&i<24&&a>=0&&a<60)return[{kind:`timestamp`,text:t[0],year:e,month:n,day:r,hour:i,minute:a,second:0},t[0].length]}if(t=re.exec(e),t){let e=parseInt(t[1],10),n=parseInt(t[2],10),r=parseInt(t[3],10);if(e>=0&&e<24&&n>=0&&n<60&&r>=0&&r<60)return[{kind:`time`,text:t[0],hour:e,minute:n,second:r},t[0].length]}if(t=ie.exec(e),t){let e=parseInt(t[1],10),n=parseInt(t[2],10),r=parseInt(t[3],10);if(e>=0&&e<24&&n>=0&&n<60&&r>=0&&r<60)return[{kind:`time`,text:t[0],hour:e,minute:n,second:r},t[0].length]}if(t=ae.exec(e),t){let e=parseInt(t[1],10),n=parseInt(t[2],10);if(e>=0&&e<24&&n>=0&&n<60)return[{kind:`time`,text:t[0],hour:e,minute:n,second:0},t[0].length]}if(t=oe.exec(e),t){let e=parseInt(t[1],10),n=parseInt(t[2],10),r=parseInt(t[3],10);if(J(e,n,r))return[{kind:`date`,text:t[0],year:e,month:n,day:r},t[0].length]}if(t=R.exec(e),t){let e=t[1]+t[2];if(le(e))return[{kind:`ssn`,text:t[0],value:e},t[0].length]}if(t=z.exec(e),t)return[{kind:`serialnumber`,text:t[0]},t[0].length];let n=e.match(/^(\d{7})(?!\d)/);if(n){let e=n[1];return[{kind:`telno`,text:n[0],cc:``,number:e},n[0].length]}if(t=se.exec(e),t){let e=parseInt(t[1],10),n=parseInt(t[2],10),r=parseInt(t[3],10);if(r<=99&&(r+=r>50?1900:2e3),n>12&&e<=12&&([e,n]=[n,e]),J(r,n,e))return[{kind:`date`,text:t[0],year:r,month:n,day:e},t[0].length]}if(t=ce.exec(e),t){let e=parseInt(t[1],10),n=parseInt(t[2],10);if(n>=1&&n<=12&&e>=1&&e<=g[n])return[{kind:`daterel`,text:t[0],year:0,month:n,day:e},t[0].length]}if(t=D.exec(e),t){let e=parseInt(t[1],10),n=parseInt(t[2],10);if(n>=1776&&n<=2100&&e>=1&&e<=12)return[{kind:`daterel`,text:t[0],year:n,month:e,day:0},t[0].length]}if(t=O.exec(e),t){let e=t[2];if(!w.has(e)){let n=parseInt(t[1],10);return[{kind:`numwletter`,text:t[0],value:n,letter:e},t[0].length]}}let r=e.match(/^([-+]?\d+(?:\.\d{3})*(?:,\d+)?)/);if(r){let t=r[1],n=e.slice(t.length),i=E.exec(n);if(i){let e=i[1],n=t+e,r=parseFloat(t.replace(/\./g,``).replace(`,`,`.`));if(e in b)return[{kind:`amount`,text:n,value:r,currency:b[e]},n.length];let[a]=C[e];return e===`%`||e===`‰`?[{kind:`percent`,text:n,value:r},n.length]:[{kind:`measurement`,text:n,value:r,unit:a},n.length]}}let i=e.match(/^([-+]?\d+(?:\.\d{3})*(?:,\d+)?)(?!\d)/);if(i&&i[1].includes(`,`)){let e=parseFloat(i[1].replace(/\./g,``).replace(`,`,`.`));return[{kind:`number`,text:i[1],value:e},i[1].length]}let a=e.match(/^([-+]?\d+(?:,\d{3})*(?:\.\d+)?)(?!\d)/);if(a&&(a[1].includes(`,`)||a[1].includes(`.`))){let e=parseFloat(a[1].replace(/,/g,``));return[{kind:`number`,text:a[1],value:e},a[1].length]}let o=e.match(/^([-+]?\d+)(?!\d)/);if(o){let e=parseInt(o[1],10);return[{kind:`number`,text:o[1],value:e},o[1].length]}return[{kind:`unknown`,text:e[0]},1]}function*de(e){if(!e){yield{kind:`s_split`,text:null};return}if(/^[\p{L}]+$/u.test(e)||w.has(e)){if(K(e)){yield{kind:`molecule`,text:e};return}yield{kind:`word`,text:e};return}if(e.startsWith(`+`)&&e.length>=10){let t=I.exec(e);if(t){let n=t[1],r=t[2]+t[3];if(yield{kind:`telno`,text:t[0],cc:n,number:r},e=e.slice(t[0].length),!e)return}}if(e.length>1&&p.has(e[0])&&f.has(e[1])){let[t,n]=X(e);if(yield t,e=e.slice(n),!e)return}if(e.length>1&&`-–`.includes(e[0])&&/\p{L}/u.test(e[1])){let t=2;for(;t<e.length&&/\p{L}/u.test(e[t]);)t++;let n=e.slice(0,t);(n.slice(1).toLowerCase()===n.slice(1)||t>2&&n.slice(1).toUpperCase()===n.slice(1))&&(yield{kind:`word`,text:n},e=e.slice(t))}if(e.length>=3){if(c.includes(e[0])&&c.includes(e[e.length-1])){let t=e.slice(1,-1);if(/^[\p{L}]+$/u.test(t)){yield Y(e[0],`„`),yield{kind:`word`,text:t},yield Y(e[e.length-1],`“`);return}}if(s.includes(e[0])&&s.includes(e[e.length-1])){let t=e.slice(1,-1);if(/^[\p{L}]+$/u.test(t)){yield Y(e[0],`‚`),yield{kind:`word`,text:t},yield Y(e[e.length-1],`’`);return}}}for(e.length>1&&(c.includes(e[0])?(yield Y(e[0],`„`),e=e.slice(1)):s.includes(e[0])&&(yield Y(e[0],`‚`),e=e.slice(1)));e;){for(;e&&r.has(e[0]);){if(e.startsWith(`[`)){let t=H.exec(e);if(t){yield{kind:`company`,text:t[1]},e=e.slice(t[0].length);continue}let n=U.exec(e);if(n){yield{kind:`person`,text:n[1]},e=e.slice(n[0].length);continue}let r=W.exec(e);if(r){yield{kind:`entity`,text:r[1]},e=e.slice(r[0].length);continue}}if(e.startsWith(`[...]`)){yield Y(`[...]`,`[…]`),e=e.slice(5);continue}if(e.startsWith(`[…]`)){yield Y(`[…]`),e=e.slice(3);continue}if(e.startsWith(`...`)){let t=`...`,n=e.slice(3);for(;n.startsWith(`.`);)t+=`.`,n=n.slice(1);yield Y(t,`…`),e=n;continue}if(e.startsWith(`…`)){yield Y(`…`),e=e.slice(1);continue}if(e===`,,`){yield Y(`,,`,`,`),e=``;continue}if(e.startsWith(`,,`)){yield Y(`,,`,`„`),e=e.slice(2);continue}if(e===`[[`||e===`]]`){yield Y(e),e=``;continue}if(t.includes(e[0])){yield Y(e[0],`-`),e=e.slice(1);continue}if(c.includes(e[0])){yield Y(e[0],`“`),e=e.slice(1);continue}if(s.includes(e[0])){yield Y(e[0],`’`),e=e.slice(1);continue}if(e.startsWith(`#`)&&e.length>1){let t=M.exec(e);if(t){/^#\d+$/.test(t[0])?yield{kind:`ordinal`,text:t[0],value:parseInt(t[0].slice(1),10)}:yield{kind:`hashtag`,text:t[0]},e=e.slice(t[0].length);continue}}if(e.startsWith(`@`)&&e.length>1){let t=N.exec(e);if(t){yield{kind:`username`,text:t[0],username:t[0].slice(1)},e=e.slice(t[0].length);continue}}if(e.startsWith(`+`)&&e.length>1&&f.has(e[1])){let t=I.exec(e);if(t){let n=t[1],r=t[2]+t[3];yield{kind:`telno`,text:t[0],cc:n,number:r},e=e.slice(t[0].length);continue}}yield Y(e[0]),e=e.slice(1)}if(!e)break;if(e.includes(`@`)){let t=k.exec(e);if(t){yield{kind:`email`,text:t[0]},e=e.slice(t[0].length);continue}}if(A.test(e)){let t=e,n=``;for(;t&&a.has(t[t.length-1]);)n=t[t.length-1]+n,t=t.slice(0,-1);yield{kind:`url`,text:t},e=n;continue}if(e.length>=4&&/^[a-zA-Z0-9]/.test(e)&&e.includes(`.`)){let t=j.exec(e);if(t){let n=t[0],i=e.slice(n.length);for(;n&&r.has(n[n.length-1]);)i=n[n.length-1]+i,n=n.slice(0,-1);if(n.includes(`.`)){yield{kind:`domain`,text:n},e=i;continue}}}if(f.has(e[0])||p.has(e[0])&&e.length>1&&f.has(e[1])){let[t,n]=X(e);if(yield t,e=e.slice(n),e){let t=E.exec(e);t&&(yield{kind:`word`,text:t[1]},e=e.slice(t[1].length))}continue}if(/^\p{L}/u.test(e)){let t=1,n=new Set([`.`,`'`,`'`,`´`,`'`,`-`,`–`]),r=new Set([`'`,`²`,`³`]);for(;t<e.length;)if(/\p{L}/u.test(e[t]))t++;else if(f.has(e[t]))t++;else if(n.has(e[t])&&t+1<e.length&&/\p{L}/u.test(e[t+1]))t++;else break;t<e.length&&r.has(e[t])&&t++;let i=e.slice(0,t);if(K(i)){yield{kind:`molecule`,text:i},e=e.slice(t);continue}yield{kind:`word`,text:i},e=e.slice(t);continue}yield{kind:`unknown`,text:e[0]},e=e.slice(1)}}function*fe(e,t){let n=(t?q(e):e).split(/\n\s*\n/),r=!0;for(let e of n){r||(yield``),r=!1;for(let t of e.split(/\s+/))t&&(yield t)}}function pe(e,t=!0){let n=[];for(let r of fe(e,t))for(let e of de(r))n.push(e);return n}const me={hr:`herra`,"hr.":`herra`,frú:`frú`,"frú.":`frú`,sr:`séra`,"sr.":`séra`,dr:`doktor`,"dr.":`doktor`,prof:`prófessor`,"prof.":`prófessor`,hf:`hlutafélag`,"hf.":`hlutafélag`,ehf:`einkahlutafélag`,"ehf.":`einkahlutafélag`,ohf:`opinbert hlutafélag`,"ohf.":`opinbert hlutafélag`,sf:`sameignarfélag`,"sf.":`sameignarfélag`,slf:`samlagsfélag`,"slf.":`samlagsfélag`,ses:`sjálfseignarstofnun`,"ses.":`sjálfseignarstofnun`,ofl:`og fleiri`,"o.fl.":`og fleiri`,osfrv:`og svo framvegis`,"o.s.frv.":`og svo framvegis`,oþh:`og þess háttar`,"o.þ.h.":`og þess háttar`,þe:`það er`,"þ.e.":`það er`,þea:`það er að segja`,"þ.e.a.s.":`það er að segja`,sbr:`samanber`,"sbr.":`samanber`,skv:`samkvæmt`,"skv.":`samkvæmt`,mtt:`með tilliti til`,"m.t.t.":`með tilliti til`,ath:`athugasemd`,"ath.":`athugasemd`,gr:`grein`,"gr.":`grein`,mgr:`málsgrein`,"mgr.":`málsgrein`,tölul:`töluliður`,"tölul.":`töluliður`,nr:`númer`,"nr.":`númer`,sl:`síðastliðinn`,"sl.":`síðastliðinn`,nk:`næstkomandi`,"n.k.":`næstkomandi`,kl:`klukkan`,"kl.":`klukkan`,ca:`circa`,"ca.":`circa`,bs:`Bachelor of Science`,"B.S.":`Bachelor of Science`,ms:`Master of Science`,"M.S.":`Master of Science`,ba:`Bachelor of Arts`,"B.A.":`Bachelor of Arts`,"M.A.":`Master of Arts`,phd:`Doctor of Philosophy`,"Ph.D.":`Doctor of Philosophy`,mba:`Master of Business Administration`,MBA:`Master of Business Administration`,Rvk:`Reykjavík`,"Rvk.":`Reykjavík`,Akr:`Akranes`,"Akr.":`Akranes`,Ak:`Akureyri`,"Ak.":`Akureyri`,n:`norður`,"n.":`norður`,s:`suður`,"s.":`suður`,a:`austur`,"a.":`austur`,v:`vestur`,"v.":`vestur`,na:`norðaustur`,"n.a.":`norðaustur`,nv:`norðvestur`,"n.v.":`norðvestur`,sa:`suðaustur`,"s.a.":`suðaustur`,sv:`suðvestur`,"s.v.":`suðvestur`,þús:`þúsund`,"þús.":`þúsund`,millj:`milljón`,"millj.":`milljón`,mljó:`milljón`,"mljó.":`milljón`,ma:`milljarður`,"ma.":`milljarður`,mrð:`milljarður`,"mrð.":`milljarður`},he=new Set([`o.fl`,`o.s.frv`,`o.þ.h`,`þ.e`,`þ.e.a.s`,`m.t.t`,`n.k`]);function ge(e){let t=[],n=0;for(;n<e.length;){let r=e[n],i=e[n+1];if(r.kind===`word`&&i?.kind===`punctuation`&&i.text===`.`){let e=r.text+`.`;if(e in me||e in S){t.push({kind:`word`,text:e}),n+=2;continue}}if(r.kind===`punctuation`&&r.text in b&&i?.kind===`number`){let e=b[r.text];t.push({kind:`amount`,text:r.text+i.text,value:i.value,currency:e}),n+=2;continue}if(r.kind===`number`&&i?.kind===`word`){let e=i.text;if(x.has(e)){t.push({kind:`amount`,text:r.text+` `+i.text,value:r.value,currency:e}),n+=2;continue}if(e in S){let a=S[e];t.push({kind:`amount`,text:r.text+` `+i.text,value:r.value*a,currency:`ISK`}),n+=2;continue}}if(r.kind===`number`&&i?.kind===`word`&&[`prósent`,`prósenta`,`prósenti`,`hundraðshluti`].includes(i.text.toLowerCase())){t.push({kind:`percent`,text:r.text+` `+i.text,value:r.value}),n+=2;continue}if((r.kind===`date`||r.kind===`dateabs`)&&i?.kind===`time`){t.push({kind:`timestamp`,text:r.text+` `+i.text,year:r.year,month:r.month,day:r.day,hour:i.hour,minute:i.minute,second:i.second}),n+=2;continue}t.push(r),n++}return t}function _e(e){if(e.kind===`s_end`||e.kind===`s_split`)return!0;if(e.kind===`word`&&e.text.length>0){let t=e.text[0];if(t===t.toUpperCase()&&t!==t.toLowerCase())return!(e.text.toLowerCase()in m||F(e.text)||x.has(e.text))}return!1}function ve(e){if(e.length===0)return[];let t=[],n=!1,r=0,i=()=>({kind:`s_begin`,text:null}),a=()=>({kind:`s_end`,text:null});for(;r<e.length;){let o=e[r],s=e[r+1];if(o.kind===`s_split`){n&&=(t.push(a()),!1),r++;continue}if(n||=(t.push(i()),!0),o.kind===`punctuation`&&l.has(o.normalized)){if(o.normalized===`…`&&s&&!_e(s)){t.push(o),r++;continue}let i=o.text,c=r+1;for(;c<e.length;){let t=e[c];if(t.kind!==`punctuation`||!d.has(t.normalized))break;i+=t.text,c++}for(c>r+1?(t.push({...o,text:i}),r=c):(t.push(o),r++);r<e.length;){let n=e[r];if(n.kind!==`punctuation`||!u.has(n.normalized))break;t.push(n),r++}t.push(a()),n=!1;continue}t.push(o),r++}return n&&t.push(a()),t}const ye=new Set([`-`,`–`]);function be(e,t=!1){return e.kind!==`word`||!t&&h.has(e.text)?null:m[e.text.toLowerCase()]??null}function xe(e){return e.kind===`punctuation`&&ye.has(e.text)}function Se(e,t){let n=[],r=t;for(;r<e.length;){let t=e[r],i=e[r+1];if(t?.kind!==`word`||!i||!xe(i))break;n.push(t),n.push(i),r+=2;let a=e[r];a?.kind===`punctuation`&&a.text===`,`&&(n.push(a),r++)}if(n.length===0)return null;let i=e[r];if(!i||i.kind!==`word`||i.text.toLowerCase()!==`og`&&i.text.toLowerCase()!==`eða`)return null;let a=e[r+1];if(!a||a.kind!==`word`)return null;let o=[...n,i,a].map(e=>e.text).join(` `);return o=o.replace(/ -/g,`-`).replace(/ ,/g,`,`),[{kind:`word`,text:o},r+2]}function Z(e){let t=[],n=0;for(;n<e.length;){let r=e[n],i=e[n+1],a=Se(e,n);if(a){t.push(a[0]),n=a[1];continue}if(r.kind===`word`&&i?.kind===`punctuation`&&i.text===`.`){let e=r.text.replace(/\.$/,``);if(he.has(e)){t.push({kind:`word`,text:r.text+`.`}),n+=2;continue}}if((r.kind===`year`||r.kind===`number`)&&i?.kind===`word`){let a=(r.kind,r.value),o=null;if(y.has(i.text)?o=-a:v.has(i.text)&&(o=a),o!==null){let a=r.text+` `+i.text;n+=2,e[n]?.kind===`punctuation`&&e[n].text===`.`&&(a+=`.`,n++),t.push({kind:`year`,text:a,value:o});continue}}if((r.kind===`ordinal`||r.kind===`number`)&&i?.kind===`word`){let e=be(i,!0);if(e!==null){let a=(r.kind,r.value);t.push({kind:`daterel`,text:r.text+` `+i.text,year:0,month:e,day:a}),n+=2;continue}}if((r.kind===`date`||r.kind===`daterel`)&&r.year===0&&i?.kind===`number`){let e=i.value;if(e>=1776&&e<=2100){t.push({kind:`dateabs`,text:r.text+` `+i.text,year:e,month:r.month,day:r.day}),n+=2;continue}}if((r.kind===`date`||r.kind===`daterel`)&&r.year===0&&i?.kind===`year`){t.push({kind:`dateabs`,text:r.text+` `+i.text,year:i.value,month:r.month,day:r.day}),n+=2;continue}if(r.kind===`word`&&_.has(r.text.toLowerCase())&&i?.kind===`time`){t.push({...i,text:r.text+` `+i.text}),n+=2;continue}if(r.kind===`word`&&_.has(r.text.toLowerCase())&&i?.kind===`word`){let e=ee[i.text.toLowerCase()];if(e){t.push({kind:`time`,text:r.text+` `+i.text,hour:e[0],minute:e[1],second:e[2]}),n+=2;continue}}t.push(r),n++}return t}function Q(e,t={}){let{replaceCompositeGlyphs:n=!0,includeSentenceMarkers:r=!1}=t,i=pe(e,n);return i=ge(i),i=Z(i),i=r?ve(i):i.filter(e=>e.kind!==`s_split`),i}function Ce(e){let t=Q(e,{includeSentenceMarkers:!0}),n=[],r=[];for(let e of t)e.kind===`s_begin`?r=[]:e.kind===`s_end`?(r.length>0&&n.push($(r)),r=[]):e.text!==null&&r.push(we(e));return r.length>0&&n.push($(r)),n}function we(e){return e.kind===`punctuation`?e.normalized:e.text??``}function $(e){if(e.length===0)return``;let t=e[0];for(let n=1;n<e.length;n++){let r=e[n-1],i=e[n];Te(r,i)?t+=` `+i:t+=i}return t}function Te(e,t){if(!e||!t)return!1;let n=e[e.length-1],r=t[0];return!(new Set([`(`,`[`,`„`,`‚`,`«`,`<`]).has(n)||new Set([`.`,`,`,`;`,`:`,`!`,`?`,`)`,`]`,`“`,`’`,`»`,`>`,`…`]).has(r)||n===`-`||r===`-`)}export{q as normalizeUnicode,Ce as splitIntoSentences,Q as tokenize};
1
+ const e={á:`á`,é:`é`,í:`í`,ó:`ó`,ú:`ú`,ý:`ý`,Á:`Á`,É:`É`,Í:`Í`,Ó:`Ó`,Ú:`Ú`,Ý:`Ý`,ä:`ä`,ë:`ë`,ö:`ö`,ü:`ü`,Ä:`Ä`,Ë:`Ë`,Ö:`Ö`,Ü:`Ü`,"­":``,"​":``,"":``},t=`-–—`,n=`([„‚«#$€£¥₽<`;``+t,n+``;const r=new Set(`([„‚«#$€£¥₽<"*•&+=@©|.,:;)]!%‰?»“’‛‘…>°^/±'´~\\-–—`),i=new Set(n),a=new Set(`.,:;)]!%‰?»“’‛‘…>°`),o=new Set(`^/±'´~\\-–—`),s=`'‚‛’´`,c=`"“„”«»`,l=new Set([`.`,`?`,`!`,`…`]),u=new Set([`)`,`]`,`“`,`»`,`”`,`’`,`"`,`[…]`]),d=new Set([`?`,`!`,`…`]),f=new Set(`0123456789`),p=new Set([`+`,`-`]),m={janúar:1,janúars:1,febrúar:2,febrúars:2,mars:3,apríl:4,apríls:4,maí:5,maís:5,júní:6,júnís:6,júlí:7,júlís:7,ágúst:8,ágústs:8,september:9,septembers:9,október:10,októbers:10,nóvember:11,nóvembers:11,desember:12,desembers:12,"jan.":1,"feb.":2,"mar.":3,"apr.":4,"jún.":6,"júl.":7,"ág.":8,"ágú.":8,"sep.":9,"sept.":9,"okt.":10,"nóv.":11,"des.":12,jan:1,feb:2,mar:3,apr:4,jún:6,júl:7,ág:8,ágú:8,sep:9,sept:9,okt:10,nóv:11,des:12},h=new Set([`Ágúst`]),g=[0,31,29,31,30,31,30,31,31,30,31,30,31],_=new Set([`kl`,`kl.`,`klukkan`]),ee={eitt:[1,0,0],tvö:[2,0,0],þrjú:[3,0,0],fjögur:[4,0,0],fimm:[5,0,0],sex:[6,0,0],sjö:[7,0,0],átta:[8,0,0],níu:[9,0,0],tíu:[10,0,0],ellefu:[11,0,0],tólf:[12,0,0],hálfeitt:[12,30,0],hálftvö:[1,30,0],hálfþrjú:[2,30,0],hálffjögur:[3,30,0],hálffimm:[4,30,0],hálfsex:[5,30,0],hálfsjö:[6,30,0],hálfátta:[7,30,0],hálfníu:[8,30,0],hálftíu:[9,30,0],hálfellefu:[10,30,0],hálftólf:[11,30,0]},v=new Set([`e.Kr`,`e.Kr.`]),y=new Set([`f.Kr`,`f.Kr.`]);new Set([...v,...y]);const b={$:`USD`,"€":`EUR`,"£":`GBP`,"¥":`JPY`,"₽":`RUB`},x=new Set([`ISK`,`DKK`,`NOK`,`SEK`,`GBP`,`USD`,`EUR`,`CAD`,`AUD`,`CHF`,`JPY`,`PLN`,`RUB`,`CZK`,`INR`,`CNY`,`RMB`,`HKD`,`NZD`,`SGD`,`MXN`,`ZAR`]),S={"kr.":1,kr:1,krónur:1,"þ.kr.":1e3,"þ.kr":1e3,"þús.kr.":1e3,"þús.kr":1e3,"m.kr.":1e6,"m.kr":1e6,"mkr.":1e6,mkr:1e6,"millj.kr.":1e6,"millj.kr":1e6,"ma.kr.":1e9,"ma.kr":1e9,"mlja.kr.":1e9,"mlja.kr":1e9},C={m:[`m`,1],mm:[`m`,.001],μm:[`m`,1e-6],cm:[`m`,.01],sm:[`m`,.01],km:[`m`,1e3],ft:[`m`,.3048],mi:[`m`,1609.34],"m²":[`m²`,1],fm:[`m²`,1],"km²":[`m²`,1e6],"cm²":[`m²`,.01],ha:[`m²`,1e4],"m³":[`m³`,1],"cm³":[`m³`,1e-6],"km³":[`m³`,1e9],l:[`m³`,.001],ltr:[`m³`,.001],dl:[`m³`,1e-4],cl:[`m³`,1e-5],ml:[`m³`,1e-6],gal:[`m³`,.00378541],bbl:[`m³`,.158987294928],K:[`K`,1],"°K":[`K`,1],g:[`kg`,.001],gr:[`kg`,.001],kg:[`kg`,1],t:[`kg`,1e3],mg:[`kg`,1e-6],μg:[`kg`,1e-9],tn:[`kg`,1e3],lb:[`kg`,.453592],s:[`s`,1],ms:[`s`,.001],μs:[`s`,1e-6],klst:[`s`,3600],mín:[`s`,60],N:[`N`,1],kN:[`N`,1e3],Nm:[`J`,1],J:[`J`,1],kJ:[`J`,1e3],MJ:[`J`,1e6],GJ:[`J`,1e9],TJ:[`J`,0xe8d4a51000],kWh:[`J`,36e5],MWh:[`J`,36e8],kWst:[`J`,36e5],MWst:[`J`,36e8],kcal:[`J`,4184],cal:[`J`,4.184],W:[`W`,1],mW:[`W`,.001],kW:[`W`,1e3],MW:[`W`,1e6],GW:[`W`,1e9],TW:[`W`,0xe8d4a51000],V:[`V`,1],mV:[`V`,.001],kV:[`V`,1e3],A:[`A`,1],mA:[`A`,.001],Hz:[`Hz`,1],kHz:[`Hz`,1e3],MHz:[`Hz`,1e6],GHz:[`Hz`,1e9],Pa:[`Pa`,1],hPa:[`Pa`,100],"°":[`°`,1],"%":[`%`,1],"‰":[`‰`,.1]},w=new Set(Object.keys(C));function T(){let e=Object.keys(C).sort((e,t)=>t.length-e.length).map(e=>{let t=e.replace(/[.*+?^${}()|[\]\\]/g,`\\$&`);return e[e.length-1].match(/[a-zA-Z]/)?`${t}(?!\\w)`:t});return RegExp(`^(${e.join(`|`)})`,`u`)}const E=T();function te(){let e=Object.keys(b).sort((e,t)=>t.length-e.length).map(e=>e.replace(/[.*+?^${}()|[\]\\]/g,`\\$&`));return RegExp(`^(${e.join(`|`)})`,`u`)}te();function ne(){let e=[...Object.keys(C),...Object.keys(b)].sort((e,t)=>t.length-e.length).map(e=>{let t=e.replace(/[.*+?^${}()|[\]\\]/g,`\\$&`);return e[e.length-1].match(/[a-zA-Z]/)?`${t}(?!\\w)`:t});return RegExp(`(${e.join(`|`)})$`)}ne();const re=/^(\d{1,2}):(\d{2}):(\d{2}),(\d{2})(?!\d)/,ie=/^(\d{1,2}):(\d{2}):(\d{2})(?!\d)/,ae=/^(\d{1,2}):(\d{2})(?!\d)/,oe=/^(\d{4})[-/](\d{2})[-/](\d{2})(?!\d)/,se=/^(\d{1,2})[./-](\d{1,2})[./-](\d{2,4})(?!\d)/,ce=/^(\d{2})\.(\d{2})(?!\d)/,le=/^(\d{2})[.-](\d{4})(?!\d)/,ue=/^(\d+)([a-zA-Z])(?!\w)/u,D=/^[^@\s]+@[^@\s]+(\.[^@\s.,/:;"()%#!?]+)+/,O=/^(https?:\/\/|ftp:\/\/|file:\/\/|mailto:|www\.)/i,k=/^[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(\.[a-zA-Z]{2,})+/,A=/^#\w+/u,j=/^@[0-9a-z_]+/i,M=/^M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$/;function N(e){return M.test(e)}const P=/^\+(\d{1,3})[-\s]?(\d{3})[-\s]?(\d{4})(?!\d)/,F=/^[A-Z][a-z]?\d*(?:[A-Z][a-z]?\d*)+$/,I=/^(\d{6})-?(\d{4})(?!\d)/,L=/^(\d+)-(\d+)(?:-\d+)*(?!\d)/,R=/^(\d{4})[-/](\d{2})[-/](\d{2})[T\s](\d{1,2}):(\d{2}):(\d{2})(?!\d)/,z=/^(\d{4})[-/](\d{2})[-/](\d{2})[T\s](\d{1,2}):(\d{2})(?!\d)/,B=/^\[company[:\s]([^\]]+)\]/i,V=/^\[person[:\s]([^\]]+)\]/i,H=/^\[entity[:\s]([^\]]+)\]/i,U=new Set(`H.He.Li.Be.B.C.N.O.F.Ne.Na.Mg.Al.Si.P.S.Cl.Ar.K.Ca.Sc.Ti.V.Cr.Mn.Fe.Co.Ni.Cu.Zn.Ga.Ge.As.Se.Br.Kr.Rb.Sr.Y.Zr.Nb.Mo.Tc.Ru.Rh.Pd.Ag.Cd.In.Sn.Sb.Te.I.Xe.Cs.Ba.La.Ce.Pr.Nd.Pm.Sm.Eu.Gd.Tb.Dy.Ho.Er.Tm.Yb.Lu.Hf.Ta.W.Re.Os.Ir.Pt.Au.Hg.Tl.Pb.Bi.Po.At.Rn.Fr.Ra.Ac.Th.Pa.U.Np.Pu.Am.Cm.Bk.Cf.Es.Fm.Md.No.Lr.Rf.Db.Sg.Bh.Hs.Mt.Ds.Rg.Cn.Nh.Fl.Mc.Lv.Ts.Og`.split(`.`));function de(e){if(e.length!==10||!/^\d{10}$/.test(e))return!1;let t=parseInt(e[0],10),n=parseInt(e[1],10),r=parseInt(e[2],10),i=parseInt(e[3],10),a=parseInt(e[4],10),o=parseInt(e[5],10),s=parseInt(e[6],10),c=parseInt(e[7],10),l=parseInt(e[8],10),u=parseInt(e[9],10);if(u!==9&&u!==0)return!1;let d=t*10+n,f=r*10+i,p=d>40?d-40:d;if(p<1||p>31||f<1||f>12)return!1;let m=(3*t+2*n+7*r+6*i+5*a+4*o+3*s+2*c)%11,h=m===0?0:11-m;return h===10?!1:l===h}function W(e){if(!F.test(e))return!1;let t=/([A-Z][a-z]?)(\d*)/g,n,r=0;for(;(n=t.exec(e))!==null;){let e=n[1];if(!U.has(e))return!1;r++}return r>=2}function G(t){let n=t;for(let[t,r]of Object.entries(e))n=n.replaceAll(t,r);return n}function K(e,t,n){return!(e<1776||e>2100||t<1||t>12||n<1||n>g[t]||t===2&&n===29&&!(e%4==0&&e%100!=0||e%400==0))}function fe(e){return i.has(e)?`left`:a.has(e)?`right`:o.has(e)?`none`:`center`}function q(e,t){let n=t??e;return{kind:`punctuation`,text:e,normalized:n,position:n.length===1?fe(n):`center`}}function J(e){let t=R.exec(e);if(t){let e=parseInt(t[1],10),n=parseInt(t[2],10),r=parseInt(t[3],10),i=parseInt(t[4],10),a=parseInt(t[5],10),o=parseInt(t[6],10);if(K(e,n,r)&&i>=0&&i<24&&a>=0&&a<60&&o>=0&&o<60)return[{kind:`timestamp`,text:t[0],year:e,month:n,day:r,hour:i,minute:a,second:o},t[0].length]}if(t=z.exec(e),t){let e=parseInt(t[1],10),n=parseInt(t[2],10),r=parseInt(t[3],10),i=parseInt(t[4],10),a=parseInt(t[5],10);if(K(e,n,r)&&i>=0&&i<24&&a>=0&&a<60)return[{kind:`timestamp`,text:t[0],year:e,month:n,day:r,hour:i,minute:a,second:0},t[0].length]}if(t=re.exec(e),t){let e=parseInt(t[1],10),n=parseInt(t[2],10),r=parseInt(t[3],10);if(e>=0&&e<24&&n>=0&&n<60&&r>=0&&r<60)return[{kind:`time`,text:t[0],hour:e,minute:n,second:r},t[0].length]}if(t=ie.exec(e),t){let e=parseInt(t[1],10),n=parseInt(t[2],10),r=parseInt(t[3],10);if(e>=0&&e<24&&n>=0&&n<60&&r>=0&&r<60)return[{kind:`time`,text:t[0],hour:e,minute:n,second:r},t[0].length]}if(t=ae.exec(e),t){let e=parseInt(t[1],10),n=parseInt(t[2],10);if(e>=0&&e<24&&n>=0&&n<60)return[{kind:`time`,text:t[0],hour:e,minute:n,second:0},t[0].length]}if(t=oe.exec(e),t){let e=parseInt(t[1],10),n=parseInt(t[2],10),r=parseInt(t[3],10);if(K(e,n,r))return[{kind:`date`,text:t[0],year:e,month:n,day:r},t[0].length]}if(t=I.exec(e),t){let e=t[1]+t[2];if(de(e))return[{kind:`ssn`,text:t[0],value:e},t[0].length]}if(t=L.exec(e),t)return[{kind:`serialnumber`,text:t[0]},t[0].length];let n=e.match(/^(\d{7})(?!\d)/);if(n){let e=n[1];return[{kind:`telno`,text:n[0],cc:``,number:e},n[0].length]}if(t=se.exec(e),t){let e=parseInt(t[1],10),n=parseInt(t[2],10),r=parseInt(t[3],10);if(r<=99&&(r+=r>50?1900:2e3),n>12&&e<=12&&([e,n]=[n,e]),K(r,n,e))return[{kind:`date`,text:t[0],year:r,month:n,day:e},t[0].length]}if(t=ce.exec(e),t){let e=parseInt(t[1],10),n=parseInt(t[2],10);if(n>=1&&n<=12&&e>=1&&e<=g[n])return[{kind:`daterel`,text:t[0],year:0,month:n,day:e},t[0].length]}if(t=le.exec(e),t){let e=parseInt(t[1],10),n=parseInt(t[2],10);if(n>=1776&&n<=2100&&e>=1&&e<=12)return[{kind:`daterel`,text:t[0],year:n,month:e,day:0},t[0].length]}if(t=ue.exec(e),t){let e=t[2];if(!w.has(e)){let n=parseInt(t[1],10);return[{kind:`numwletter`,text:t[0],value:n,letter:e},t[0].length]}}let r=e.match(/^([-+]?\d+(?:\.\d{3})*(?:,\d+)?)/);if(r){let t=r[1],n=e.slice(t.length),i=E.exec(n);if(i){let e=i[1],n=t+e,r=parseFloat(t.replace(/\./g,``).replace(`,`,`.`));if(e in b)return[{kind:`amount`,text:n,value:r,currency:b[e]},n.length];let[a]=C[e];return e===`%`||e===`‰`?[{kind:`percent`,text:n,value:r},n.length]:[{kind:`measurement`,text:n,value:r,unit:a},n.length]}}let i=e.match(/^([-+]?\d+(?:\.\d{3})*(?:,\d+)?)(?!\d)/);if(i&&i[1].includes(`,`)){let e=parseFloat(i[1].replace(/\./g,``).replace(`,`,`.`));return[{kind:`number`,text:i[1],value:e},i[1].length]}let a=e.match(/^([-+]?\d+(?:,\d{3})*(?:\.\d+)?)(?!\d)/);if(a&&(a[1].includes(`,`)||a[1].includes(`.`))){let e=parseFloat(a[1].replace(/,/g,``));return[{kind:`number`,text:a[1],value:e},a[1].length]}let o=e.match(/^([-+]?\d+)(?!\d)/);if(o){let e=parseInt(o[1],10);return[{kind:`number`,text:o[1],value:e},o[1].length]}return[{kind:`unknown`,text:e[0]},1]}function*pe(e){if(!e){yield{kind:`s_split`,text:null};return}if(/^[\p{L}]+$/u.test(e)||w.has(e)){if(W(e)){yield{kind:`molecule`,text:e};return}yield{kind:`word`,text:e};return}if(e.startsWith(`+`)&&e.length>=10){let t=P.exec(e);if(t){let n=t[1],r=t[2]+t[3];if(yield{kind:`telno`,text:t[0],cc:n,number:r},e=e.slice(t[0].length),!e)return}}if(e.length>1&&p.has(e[0])&&f.has(e[1])){let[t,n]=J(e);if(yield t,e=e.slice(n),!e)return}if(e.length>1&&`-–`.includes(e[0])&&/\p{L}/u.test(e[1])){let t=2;for(;t<e.length&&/\p{L}/u.test(e[t]);)t++;let n=e.slice(0,t);(n.slice(1).toLowerCase()===n.slice(1)||t>2&&n.slice(1).toUpperCase()===n.slice(1))&&(yield{kind:`word`,text:n},e=e.slice(t))}if(e.length>=3){if(c.includes(e[0])&&c.includes(e[e.length-1])){let t=e.slice(1,-1);if(/^[\p{L}]+$/u.test(t)){yield q(e[0],`„`),yield{kind:`word`,text:t},yield q(e[e.length-1],`“`);return}}if(s.includes(e[0])&&s.includes(e[e.length-1])){let t=e.slice(1,-1);if(/^[\p{L}]+$/u.test(t)){yield q(e[0],`‚`),yield{kind:`word`,text:t},yield q(e[e.length-1],`’`);return}}}for(e.length>1&&(c.includes(e[0])?(yield q(e[0],`„`),e=e.slice(1)):s.includes(e[0])&&(yield q(e[0],`‚`),e=e.slice(1)));e;){for(;e&&r.has(e[0]);){if(e.startsWith(`[`)){let t=B.exec(e);if(t){yield{kind:`company`,text:t[1]},e=e.slice(t[0].length);continue}let n=V.exec(e);if(n){yield{kind:`person`,text:n[1]},e=e.slice(n[0].length);continue}let r=H.exec(e);if(r){yield{kind:`entity`,text:r[1]},e=e.slice(r[0].length);continue}}if(e.startsWith(`[...]`)){yield q(`[...]`,`[…]`),e=e.slice(5);continue}if(e.startsWith(`[…]`)){yield q(`[…]`),e=e.slice(3);continue}if(e.startsWith(`...`)){let t=`...`,n=e.slice(3);for(;n.startsWith(`.`);)t+=`.`,n=n.slice(1);yield q(t,`…`),e=n;continue}if(e.startsWith(`…`)){yield q(`…`),e=e.slice(1);continue}if(e===`,,`){yield q(`,,`,`,`),e=``;continue}if(e.startsWith(`,,`)){yield q(`,,`,`„`),e=e.slice(2);continue}if(e===`[[`||e===`]]`){yield q(e),e=``;continue}if(t.includes(e[0])){yield q(e[0],`-`),e=e.slice(1);continue}if(c.includes(e[0])){yield q(e[0],`“`),e=e.slice(1);continue}if(s.includes(e[0])){yield q(e[0],`’`),e=e.slice(1);continue}if(e.startsWith(`#`)&&e.length>1){let t=A.exec(e);if(t){/^#\d+$/.test(t[0])?yield{kind:`ordinal`,text:t[0],value:parseInt(t[0].slice(1),10)}:yield{kind:`hashtag`,text:t[0]},e=e.slice(t[0].length);continue}}if(e.startsWith(`@`)&&e.length>1){let t=j.exec(e);if(t){yield{kind:`username`,text:t[0],username:t[0].slice(1)},e=e.slice(t[0].length);continue}}if(e.startsWith(`+`)&&e.length>1&&f.has(e[1])){let t=P.exec(e);if(t){let n=t[1],r=t[2]+t[3];yield{kind:`telno`,text:t[0],cc:n,number:r},e=e.slice(t[0].length);continue}}yield q(e[0]),e=e.slice(1)}if(!e)break;if(e.includes(`@`)){let t=D.exec(e);if(t){yield{kind:`email`,text:t[0]},e=e.slice(t[0].length);continue}}if(O.test(e)){let t=e,n=``;for(;t&&a.has(t[t.length-1]);)n=t[t.length-1]+n,t=t.slice(0,-1);yield{kind:`url`,text:t},e=n;continue}if(e.length>=4&&/^[a-zA-Z0-9]/.test(e)&&e.includes(`.`)){let t=k.exec(e);if(t){let n=t[0],i=e.slice(n.length);for(;n&&r.has(n[n.length-1]);)i=n[n.length-1]+i,n=n.slice(0,-1);if(n.includes(`.`)){yield{kind:`domain`,text:n},e=i;continue}}}if(f.has(e[0])||p.has(e[0])&&e.length>1&&f.has(e[1])){let[t,n]=J(e);if(yield t,e=e.slice(n),e){let t=E.exec(e);t&&(yield{kind:`word`,text:t[1]},e=e.slice(t[1].length))}continue}if(/^\p{L}/u.test(e)){let t=1,n=new Set([`.`,`'`,`'`,`´`,`'`,`-`,`–`]),r=new Set([`'`,`²`,`³`]);for(;t<e.length;)if(/\p{L}/u.test(e[t]))t++;else if(f.has(e[t]))t++;else if(n.has(e[t])&&t+1<e.length&&/\p{L}/u.test(e[t+1]))t++;else break;t<e.length&&r.has(e[t])&&t++;let i=e.slice(0,t);if(W(i)){yield{kind:`molecule`,text:i},e=e.slice(t);continue}yield{kind:`word`,text:i},e=e.slice(t);continue}yield{kind:`unknown`,text:e[0]},e=e.slice(1)}}function*me(e,t){let n=t?G(e):e,r=0,i=/(\S+)|(\n\s*\n)|(\s+)/g,a,o=!1;for(;(a=i.exec(n))!==null;)a[1]?(o&&=(yield{text:``,start:r,end:r},!1),yield{text:a[1],start:a.index,end:a.index+a[1].length}):a[2]&&(o=!0),r=a.index+a[0].length}function he(e,t=!0,n=!1){let r=[];for(let i of me(e,t)){let e=0;for(let t of pe(i.text)){if(n&&t.text!==null){let n=i.text.indexOf(t.text,e);n!==-1&&(t.span={start:i.start+n,end:i.start+n+t.text.length},e=n+t.text.length)}else n&&t.text===null&&(t.span={start:i.start,end:i.start});r.push(t)}}return r}const ge={hr:`herra`,"hr.":`herra`,frú:`frú`,"frú.":`frú`,sr:`séra`,"sr.":`séra`,dr:`doktor`,"dr.":`doktor`,prof:`prófessor`,"prof.":`prófessor`,hf:`hlutafélag`,"hf.":`hlutafélag`,ehf:`einkahlutafélag`,"ehf.":`einkahlutafélag`,ohf:`opinbert hlutafélag`,"ohf.":`opinbert hlutafélag`,sf:`sameignarfélag`,"sf.":`sameignarfélag`,slf:`samlagsfélag`,"slf.":`samlagsfélag`,ses:`sjálfseignarstofnun`,"ses.":`sjálfseignarstofnun`,ofl:`og fleiri`,"o.fl.":`og fleiri`,osfrv:`og svo framvegis`,"o.s.frv.":`og svo framvegis`,oþh:`og þess háttar`,"o.þ.h.":`og þess háttar`,þe:`það er`,"þ.e.":`það er`,þea:`það er að segja`,"þ.e.a.s.":`það er að segja`,sbr:`samanber`,"sbr.":`samanber`,skv:`samkvæmt`,"skv.":`samkvæmt`,mtt:`með tilliti til`,"m.t.t.":`með tilliti til`,ath:`athugasemd`,"ath.":`athugasemd`,gr:`grein`,"gr.":`grein`,mgr:`málsgrein`,"mgr.":`málsgrein`,tölul:`töluliður`,"tölul.":`töluliður`,nr:`númer`,"nr.":`númer`,sl:`síðastliðinn`,"sl.":`síðastliðinn`,nk:`næstkomandi`,"n.k.":`næstkomandi`,kl:`klukkan`,"kl.":`klukkan`,ca:`circa`,"ca.":`circa`,bs:`Bachelor of Science`,"B.S.":`Bachelor of Science`,ms:`Master of Science`,"M.S.":`Master of Science`,ba:`Bachelor of Arts`,"B.A.":`Bachelor of Arts`,"M.A.":`Master of Arts`,phd:`Doctor of Philosophy`,"Ph.D.":`Doctor of Philosophy`,mba:`Master of Business Administration`,MBA:`Master of Business Administration`,Rvk:`Reykjavík`,"Rvk.":`Reykjavík`,Akr:`Akranes`,"Akr.":`Akranes`,Ak:`Akureyri`,"Ak.":`Akureyri`,n:`norður`,"n.":`norður`,s:`suður`,"s.":`suður`,a:`austur`,"a.":`austur`,v:`vestur`,"v.":`vestur`,na:`norðaustur`,"n.a.":`norðaustur`,nv:`norðvestur`,"n.v.":`norðvestur`,sa:`suðaustur`,"s.a.":`suðaustur`,sv:`suðvestur`,"s.v.":`suðvestur`,þús:`þúsund`,"þús.":`þúsund`,millj:`milljón`,"millj.":`milljón`,mljó:`milljón`,"mljó.":`milljón`,ma:`milljarður`,"ma.":`milljarður`,mrð:`milljarður`,"mrð.":`milljarður`},_e=new Set([`o.fl`,`o.s.frv`,`o.þ.h`,`þ.e`,`þ.e.a.s`,`m.t.t`,`n.k`]);function Y(e,t){return e.span&&t.span?{start:e.span.start,end:t.span.end}:e.span??t.span}function ve(e){let t=[],n=0;for(;n<e.length;){let r=e[n],i=e[n+1];if(r.kind===`word`&&i?.kind===`punctuation`&&i.text===`.`){let e=r.text+`.`;if(e in ge||e in S){t.push({kind:`word`,text:e,span:Y(r,i)}),n+=2;continue}}if(r.kind===`punctuation`&&r.text in b&&i?.kind===`number`){let e=b[r.text];t.push({kind:`amount`,text:r.text+i.text,value:i.value,currency:e,span:Y(r,i)}),n+=2;continue}if(r.kind===`number`&&i?.kind===`word`){let e=i.text;if(x.has(e)){t.push({kind:`amount`,text:r.text+` `+i.text,value:r.value,currency:e,span:Y(r,i)}),n+=2;continue}if(e in S){let a=S[e];t.push({kind:`amount`,text:r.text+` `+i.text,value:r.value*a,currency:`ISK`,span:Y(r,i)}),n+=2;continue}}if(r.kind===`number`&&i?.kind===`word`&&[`prósent`,`prósenta`,`prósenti`,`hundraðshluti`].includes(i.text.toLowerCase())){t.push({kind:`percent`,text:r.text+` `+i.text,value:r.value,span:Y(r,i)}),n+=2;continue}if((r.kind===`date`||r.kind===`dateabs`)&&i?.kind===`time`){t.push({kind:`timestamp`,text:r.text+` `+i.text,year:r.year,month:r.month,day:r.day,hour:i.hour,minute:i.minute,second:i.second,span:Y(r,i)}),n+=2;continue}t.push(r),n++}return t}function ye(e){if(e.kind===`s_end`||e.kind===`s_split`)return!0;if(e.kind===`word`&&e.text.length>0){let t=e.text[0];if(t===t.toUpperCase()&&t!==t.toLowerCase())return!(e.text.toLowerCase()in m||N(e.text)||x.has(e.text))}return!1}function be(e){if(e.length===0)return[];let t=[],n=!1,r=0,i=()=>({kind:`s_begin`,text:null}),a=()=>({kind:`s_end`,text:null});for(;r<e.length;){let o=e[r],s=e[r+1];if(o.kind===`s_split`){n&&=(t.push(a()),!1),r++;continue}if(n||=(t.push(i()),!0),o.kind===`punctuation`&&l.has(o.normalized)){if(o.normalized===`…`&&s&&!ye(s)){t.push(o),r++;continue}let i=o.text,c=r+1;for(;c<e.length;){let t=e[c];if(t.kind!==`punctuation`||!d.has(t.normalized))break;i+=t.text,c++}for(c>r+1?(t.push({...o,text:i}),r=c):(t.push(o),r++);r<e.length;){let n=e[r];if(n.kind!==`punctuation`||!u.has(n.normalized))break;t.push(n),r++}t.push(a()),n=!1;continue}t.push(o),r++}return n&&t.push(a()),t}function X(e,t){return e.span&&t.span?{start:e.span.start,end:t.span.end}:e.span??t.span}const xe=new Set([`-`,`–`]);function Se(e,t=!1){return e.kind!==`word`||!t&&h.has(e.text)?null:m[e.text.toLowerCase()]??null}function Z(e){return e.kind===`punctuation`&&xe.has(e.text)}function Ce(e,t){let n=[],r=t;for(;r<e.length;){let t=e[r],i=e[r+1];if(t?.kind!==`word`||!i||!Z(i))break;n.push(t),n.push(i),r+=2;let a=e[r];a?.kind===`punctuation`&&a.text===`,`&&(n.push(a),r++)}if(n.length===0)return null;let i=e[r];if(!i||i.kind!==`word`||i.text.toLowerCase()!==`og`&&i.text.toLowerCase()!==`eða`)return null;let a=e[r+1];if(!a||a.kind!==`word`)return null;let o=[...n,i,a].map(e=>e.text).join(` `);o=o.replace(/ -/g,`-`).replace(/ ,/g,`,`);let s=X(e[t],a);return[{kind:`word`,text:o,span:s},r+2]}function we(e){let t=[],n=0;for(;n<e.length;){let r=e[n],i=e[n+1],a=Ce(e,n);if(a){t.push(a[0]),n=a[1];continue}if(r.kind===`word`&&i?.kind===`punctuation`&&i.text===`.`){let e=r.text.replace(/\.$/,``);if(_e.has(e)){t.push({kind:`word`,text:r.text+`.`,span:X(r,i)}),n+=2;continue}}if((r.kind===`year`||r.kind===`number`)&&i?.kind===`word`){let a=(r.kind,r.value),o=null;if(y.has(i.text)?o=-a:v.has(i.text)&&(o=a),o!==null){let a=r.text+` `+i.text,s=i;n+=2,e[n]?.kind===`punctuation`&&e[n].text===`.`&&(a+=`.`,s=e[n],n++),t.push({kind:`year`,text:a,value:o,span:X(r,s)});continue}}if((r.kind===`ordinal`||r.kind===`number`)&&i?.kind===`word`){let e=Se(i,!0);if(e!==null){let a=(r.kind,r.value);t.push({kind:`daterel`,text:r.text+` `+i.text,year:0,month:e,day:a,span:X(r,i)}),n+=2;continue}}if((r.kind===`date`||r.kind===`daterel`)&&r.year===0&&i?.kind===`number`){let e=i.value;if(e>=1776&&e<=2100){t.push({kind:`dateabs`,text:r.text+` `+i.text,year:e,month:r.month,day:r.day,span:X(r,i)}),n+=2;continue}}if((r.kind===`date`||r.kind===`daterel`)&&r.year===0&&i?.kind===`year`){t.push({kind:`dateabs`,text:r.text+` `+i.text,year:i.value,month:r.month,day:r.day,span:X(r,i)}),n+=2;continue}if(r.kind===`word`&&_.has(r.text.toLowerCase())&&i?.kind===`time`){t.push({...i,text:r.text+` `+i.text,span:X(r,i)}),n+=2;continue}if(r.kind===`word`&&_.has(r.text.toLowerCase())&&i?.kind===`word`){let e=ee[i.text.toLowerCase()];if(e){t.push({kind:`time`,text:r.text+` `+i.text,hour:e[0],minute:e[1],second:e[2],span:X(r,i)}),n+=2;continue}}t.push(r),n++}return t}function Q(e,t={}){let{replaceCompositeGlyphs:n=!0,includeSentenceMarkers:r=!1,includeOffsets:i=!1}=t,a=he(e,n,i);return a=ve(a),a=we(a),a=r?be(a):a.filter(e=>e.kind!==`s_split`),a}function Te(e){let t=Q(e,{includeSentenceMarkers:!0}),n=[],r=[];for(let e of t)e.kind===`s_begin`?r=[]:e.kind===`s_end`?(r.length>0&&n.push($(r)),r=[]):e.text!==null&&r.push(Ee(e));return r.length>0&&n.push($(r)),n}function Ee(e){return e.kind===`punctuation`?e.normalized:e.text??``}function $(e){if(e.length===0)return``;let t=e[0];for(let n=1;n<e.length;n++){let r=e[n-1],i=e[n];De(r,i)?t+=` `+i:t+=i}return t}function De(e,t){if(!e||!t)return!1;let n=e[e.length-1],r=t[0];return!(new Set([`(`,`[`,`„`,`‚`,`«`,`<`]).has(n)||new Set([`.`,`,`,`;`,`:`,`!`,`?`,`)`,`]`,`“`,`’`,`»`,`>`,`…`]).has(r)||n===`-`||r===`-`)}export{G as normalizeUnicode,Te as splitIntoSentences,Q as tokenize};
2
2
  //# sourceMappingURL=index.mjs.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.mjs","names":["COMPOSITE_HYPHENS","COMPOSITE_HYPHENS"],"sources":["../src/data/constants.ts","../src/data/units.ts","../src/data/patterns.ts","../src/pipeline/lexer.ts","../src/data/abbreviations.ts","../src/pipeline/particles.ts","../src/pipeline/sentences.ts","../src/pipeline/phrases.ts","../src/tokenize.ts","../src/split-sentences.ts"],"sourcesContent":["/**\n * Unicode replacements for composite glyphs\n */\nexport const UNICODE_REPLACEMENTS: Record<string, string> = {\n // Vowel + combining acute accent (U+0301)\n \"a\\u0301\": \"\\u00E1\", // á\n \"e\\u0301\": \"\\u00E9\", // é\n \"i\\u0301\": \"\\u00ED\", // í\n \"o\\u0301\": \"\\u00F3\", // ó\n \"u\\u0301\": \"\\u00FA\", // ú\n \"y\\u0301\": \"\\u00FD\", // ý\n \"A\\u0301\": \"\\u00C1\", // Á\n \"E\\u0301\": \"\\u00C9\", // É\n \"I\\u0301\": \"\\u00CD\", // Í\n \"O\\u0301\": \"\\u00D3\", // Ó\n \"U\\u0301\": \"\\u00DA\", // Ú\n \"Y\\u0301\": \"\\u00DD\", // Ý\n // Vowel + combining diaeresis (U+0308)\n \"a\\u0308\": \"\\u00E4\", // ä\n \"e\\u0308\": \"\\u00EB\", // ë\n \"o\\u0308\": \"\\u00F6\", // ö\n \"u\\u0308\": \"\\u00FC\", // ü\n \"A\\u0308\": \"\\u00C4\", // Ä\n \"E\\u0308\": \"\\u00CB\", // Ë\n \"O\\u0308\": \"\\u00D6\", // Ö\n \"U\\u0308\": \"\\u00DC\", // Ü\n // Remove unwanted characters\n \"\\u00AD\": \"\", // Soft hyphen\n \"\\u200B\": \"\", // Zero-width space\n \"\\uFEFF\": \"\", // Zero-width nbsp (BOM)\n};\n\n/**\n * Hyphen characters\n */\nexport const HYPHEN = \"-\";\nexport const EN_DASH = \"\\u2013\"; // –\nexport const EM_DASH = \"\\u2014\"; // —\nexport const HYPHENS = HYPHEN + EN_DASH + EM_DASH;\nexport const COMPOSITE_HYPHENS = HYPHEN + EN_DASH;\n\n/**\n * Punctuation character sets\n * Using Unicode escapes to avoid parsing issues\n */\n// Left: ( [ „ ‚ « # $ € £ ¥ ₽ <\nexport const LEFT_PUNCTUATION = \"([\\u201E\\u201A\\u00AB#$\\u20AC\\u00A3\\u00A5\\u20BD<\";\n// Right: . , : ; ) ] ! % ‰ ? » \" ' ‛ ' … > °\nexport const RIGHT_PUNCTUATION = \".,:;)]!%\\u2030?\\u00BB\\u201C\\u2019\\u201B\\u2018\\u2026>\\u00B0\";\n// Center: \" * • & + = @ © |\nexport const CENTER_PUNCTUATION = '\"*\\u2022&+=@\\u00A9|';\n// None: ^ / ± ' ´ ~ \\ -\nexport const NONE_PUNCTUATION = \"^/\\u00B1'\\u00B4~\\\\\" + HYPHENS;\nexport const PUNCTUATION =\n LEFT_PUNCTUATION + CENTER_PUNCTUATION + RIGHT_PUNCTUATION + NONE_PUNCTUATION;\n\nexport const PUNCTUATION_SET = new Set(PUNCTUATION);\nexport const LEFT_PUNCTUATION_SET = new Set(LEFT_PUNCTUATION);\nexport const RIGHT_PUNCTUATION_SET = new Set(RIGHT_PUNCTUATION);\nexport const NONE_PUNCTUATION_SET = new Set(NONE_PUNCTUATION);\n\n/**\n * Quote characters\n */\n// Single quotes: ' ‚ ‛ ' ´\nexport const SINGLE_QUOTES = \"'\\u201A\\u201B\\u2019\\u00B4\";\n// Double quotes: \" \" „ \" « »\nexport const DOUBLE_QUOTES = '\"\\u201C\\u201E\\u201D\\u00AB\\u00BB';\n\n/**\n * Normalized quote characters (for output)\n */\nexport const OPEN_DOUBLE_QUOTE = \"\\u201E\"; // „\nexport const CLOSE_DOUBLE_QUOTE = \"\\u201C\"; // \"\nexport const OPEN_SINGLE_QUOTE = \"\\u201A\"; // ‚\nexport const CLOSE_SINGLE_QUOTE = \"\\u2019\"; // '\n\n/**\n * Sentence-ending punctuation\n */\nexport const END_OF_SENTENCE = new Set([\".\", \"?\", \"!\", \"\\u2026\"]); // … = ellipsis\nexport const SENTENCE_FINISHERS = new Set([\n \")\",\n \"]\",\n \"\\u201C\", // \"\n \"\\u00BB\", // »\n \"\\u201D\", // \"\n \"\\u2019\", // '\n '\"',\n \"[\\u2026]\", // […]\n]);\n\n/**\n * Punctuation that may occur inside words\n */\nexport const PUNCT_INSIDE_WORD = new Set([\n \".\",\n \"'\",\n \"\\u2019\", // '\n \"\\u00B4\", // ´\n \"\\u2018\", // '\n HYPHEN,\n EN_DASH,\n]);\nexport const PUNCT_ENDING_WORD = new Set([\"'\", \"\\u00B2\", \"\\u00B3\"]); // ² ³\nexport const PUNCT_COMBINATIONS = new Set([\"?\", \"!\", \"\\u2026\"]); // …\n\n/**\n * Digit-related sets\n */\nexport const DIGITS = new Set(\"0123456789\");\nexport const SIGN_PREFIX = new Set([\"+\", \"-\"]);\n\n/**\n * Icelandic month names to month numbers\n */\nexport const MONTHS: Record<string, number> = {\n \"jan\\u00FAar\": 1, // janúar\n \"jan\\u00FAars\": 1, // janúars\n \"febr\\u00FAar\": 2, // febrúar\n \"febr\\u00FAars\": 2, // febrúars\n mars: 3,\n \"apr\\u00EDl\": 4, // apríl\n \"apr\\u00EDls\": 4, // apríls\n \"ma\\u00ED\": 5, // maí\n \"ma\\u00EDs\": 5, // maís\n \"j\\u00FAn\\u00ED\": 6, // júní\n \"j\\u00FAn\\u00EDs\": 6, // júnís\n \"j\\u00FAl\\u00ED\": 7, // júlí\n \"j\\u00FAl\\u00EDs\": 7, // júlís\n \"\\u00E1g\\u00FAst\": 8, // ágúst\n \"\\u00E1g\\u00FAsts\": 8, // ágústs\n september: 9,\n septembers: 9,\n \"okt\\u00F3ber\": 10, // október\n \"okt\\u00F3bers\": 10, // októbers\n \"n\\u00F3vember\": 11, // nóvember\n \"n\\u00F3vembers\": 11, // nóvembers\n desember: 12,\n desembers: 12,\n // Abbreviated forms\n \"jan.\": 1,\n \"feb.\": 2,\n \"mar.\": 3,\n \"apr.\": 4,\n \"j\\u00FAn.\": 6, // jún.\n \"j\\u00FAl.\": 7, // júl.\n \"\\u00E1g.\": 8, // ág.\n \"\\u00E1g\\u00FA.\": 8, // ágú.\n \"sep.\": 9,\n \"sept.\": 9,\n \"okt.\": 10,\n \"n\\u00F3v.\": 11, // nóv.\n \"des.\": 12,\n jan: 1,\n feb: 2,\n mar: 3,\n apr: 4,\n \"j\\u00FAn\": 6, // jún\n \"j\\u00FAl\": 7, // júl\n \"\\u00E1g\": 8, // ág\n \"\\u00E1g\\u00FA\": 8, // ágú\n sep: 9,\n sept: 9,\n okt: 10,\n \"n\\u00F3v\": 11, // nóv\n des: 12,\n};\n\n/**\n * Month name blacklist (Ágúst is also a masculine name)\n */\nexport const MONTH_BLACKLIST = new Set([\"\\u00C1g\\u00FAst\"]); // Ágúst\n\n/**\n * Max days in each month (index 0 unused, 1=January)\n */\nexport const DAYS_IN_MONTH = [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31];\n\n/**\n * Clock abbreviations\n */\nexport const CLOCK_ABBREVS = new Set([\"kl\", \"kl.\", \"klukkan\"]);\n\n/**\n * Time expressions spelled out in Icelandic\n */\nexport const CLOCK_NUMBERS: Record<string, [number, number, number]> = {\n eitt: [1, 0, 0],\n \"tv\\u00F6\": [2, 0, 0], // tvö\n \"\\u00FErj\\u00FA\": [3, 0, 0], // þrjú\n \"fj\\u00F6gur\": [4, 0, 0], // fjögur\n fimm: [5, 0, 0],\n sex: [6, 0, 0],\n \"sj\\u00F6\": [7, 0, 0], // sjö\n \"\\u00E1tta\": [8, 0, 0], // átta\n \"n\\u00EDu\": [9, 0, 0], // níu\n \"t\\u00EDu\": [10, 0, 0], // tíu\n ellefu: [11, 0, 0],\n \"t\\u00F3lf\": [12, 0, 0], // tólf\n \"h\\u00E1lfeitt\": [12, 30, 0], // hálfeitt\n \"h\\u00E1lftv\\u00F6\": [1, 30, 0], // hálftvö\n \"h\\u00E1lf\\u00FErj\\u00FA\": [2, 30, 0], // hálfþrjú\n \"h\\u00E1lffj\\u00F6gur\": [3, 30, 0], // hálffjögur\n \"h\\u00E1lffimm\": [4, 30, 0], // hálffimm\n \"h\\u00E1lfsex\": [5, 30, 0], // hálfsex\n \"h\\u00E1lfsj\\u00F6\": [6, 30, 0], // hálfsjö\n \"h\\u00E1lf\\u00E1tta\": [7, 30, 0], // hálfátta\n \"h\\u00E1lfn\\u00EDu\": [8, 30, 0], // hálfníu\n \"h\\u00E1lft\\u00EDu\": [9, 30, 0], // hálftíu\n \"h\\u00E1lfellefu\": [10, 30, 0], // hálfellefu\n \"h\\u00E1lft\\u00F3lf\": [11, 30, 0], // hálftólf\n};\n\n/**\n * Before/After Common Era markers\n */\nexport const CE = new Set([\"e.Kr\", \"e.Kr.\"]);\nexport const BCE = new Set([\"f.Kr\", \"f.Kr.\"]);\nexport const CE_BCE = new Set([...CE, ...BCE]);\n\n/**\n * URL prefixes\n */\nexport const URL_PREFIXES = [\"http://\", \"https://\", \"ftp://\", \"file://\", \"mailto:\", \"www.\"];\n","/**\n * Currency symbols to ISO codes\n */\nexport const CURRENCY_SYMBOLS: Record<string, string> = {\n $: \"USD\",\n \"€\": \"EUR\",\n \"£\": \"GBP\",\n \"¥\": \"JPY\",\n \"₽\": \"RUB\",\n};\n\n/**\n * ISO 4217 currency codes\n */\nexport const CURRENCY_ABBREV = new Set([\n \"ISK\",\n \"DKK\",\n \"NOK\",\n \"SEK\",\n \"GBP\",\n \"USD\",\n \"EUR\",\n \"CAD\",\n \"AUD\",\n \"CHF\",\n \"JPY\",\n \"PLN\",\n \"RUB\",\n \"CZK\",\n \"INR\",\n \"CNY\",\n \"RMB\",\n \"HKD\",\n \"NZD\",\n \"SGD\",\n \"MXN\",\n \"ZAR\",\n]);\n\n/**\n * ISK amount abbreviations (króna-specific)\n */\nexport const AMOUNT_ABBREV: Record<string, number> = {\n \"kr.\": 1,\n kr: 1,\n krónur: 1,\n \"þ.kr.\": 1e3,\n \"þ.kr\": 1e3,\n \"þús.kr.\": 1e3,\n \"þús.kr\": 1e3,\n \"m.kr.\": 1e6,\n \"m.kr\": 1e6,\n \"mkr.\": 1e6,\n mkr: 1e6,\n \"millj.kr.\": 1e6,\n \"millj.kr\": 1e6,\n \"ma.kr.\": 1e9,\n \"ma.kr\": 1e9,\n \"mlja.kr.\": 1e9,\n \"mlja.kr\": 1e9,\n};\n\nexport const ISK_AMOUNT_PRECEDING = new Set([\"kr.\", \"kr\", \"krónur\"]);\n\n/**\n * SI units: unit → [base unit, conversion factor]\n * Conversion factor is number or null (for temperature that needs functions)\n */\nexport const SI_UNITS: Record<string, [string, number]> = {\n // Distance\n m: [\"m\", 1.0],\n mm: [\"m\", 1.0e-3],\n μm: [\"m\", 1.0e-6],\n cm: [\"m\", 1.0e-2],\n sm: [\"m\", 1.0e-2],\n km: [\"m\", 1.0e3],\n ft: [\"m\", 0.3048],\n mi: [\"m\", 1609.34],\n // Area\n \"m²\": [\"m²\", 1.0],\n fm: [\"m²\", 1.0],\n \"km²\": [\"m²\", 1.0e6],\n \"cm²\": [\"m²\", 1.0e-2],\n ha: [\"m²\", 1.0e4],\n // Volume\n \"m³\": [\"m³\", 1.0],\n \"cm³\": [\"m³\", 1.0e-6],\n \"km³\": [\"m³\", 1.0e9],\n l: [\"m³\", 1.0e-3],\n ltr: [\"m³\", 1.0e-3],\n dl: [\"m³\", 1.0e-4],\n cl: [\"m³\", 1.0e-5],\n ml: [\"m³\", 1.0e-6],\n gal: [\"m³\", 3.78541e-3],\n bbl: [\"m³\", 158.987294928e-3],\n // Temperature\n K: [\"K\", 1.0],\n \"°K\": [\"K\", 1.0],\n // Mass\n g: [\"kg\", 1.0e-3],\n gr: [\"kg\", 1.0e-3],\n kg: [\"kg\", 1.0],\n t: [\"kg\", 1.0e3],\n mg: [\"kg\", 1.0e-6],\n μg: [\"kg\", 1.0e-9],\n tn: [\"kg\", 1.0e3],\n lb: [\"kg\", 0.453592],\n // Duration\n s: [\"s\", 1.0],\n ms: [\"s\", 1.0e-3],\n μs: [\"s\", 1.0e-6],\n klst: [\"s\", 3600.0],\n mín: [\"s\", 60.0],\n // Force\n N: [\"N\", 1.0],\n kN: [\"N\", 1.0e3],\n // Energy\n Nm: [\"J\", 1.0],\n J: [\"J\", 1.0],\n kJ: [\"J\", 1.0e3],\n MJ: [\"J\", 1.0e6],\n GJ: [\"J\", 1.0e9],\n TJ: [\"J\", 1.0e12],\n kWh: [\"J\", 3.6e6],\n MWh: [\"J\", 3.6e9],\n kWst: [\"J\", 3.6e6],\n MWst: [\"J\", 3.6e9],\n kcal: [\"J\", 4184],\n cal: [\"J\", 4.184],\n // Power\n W: [\"W\", 1.0],\n mW: [\"W\", 1.0e-3],\n kW: [\"W\", 1.0e3],\n MW: [\"W\", 1.0e6],\n GW: [\"W\", 1.0e9],\n TW: [\"W\", 1.0e12],\n // Electric potential\n V: [\"V\", 1.0],\n mV: [\"V\", 1.0e-3],\n kV: [\"V\", 1.0e3],\n // Electric current\n A: [\"A\", 1.0],\n mA: [\"A\", 1.0e-3],\n // Frequency\n Hz: [\"Hz\", 1.0],\n kHz: [\"Hz\", 1.0e3],\n MHz: [\"Hz\", 1.0e6],\n GHz: [\"Hz\", 1.0e9],\n // Pressure\n Pa: [\"Pa\", 1.0],\n hPa: [\"Pa\", 1.0e2],\n // Angle\n \"°\": [\"°\", 1.0],\n // Percentage\n \"%\": [\"%\", 1.0],\n \"‰\": [\"‰\", 0.1],\n};\n\nexport const SI_UNITS_SET = new Set(Object.keys(SI_UNITS));\n\n/**\n * Build regex for SI units (sorted by length descending)\n */\nfunction buildUnitsRegex(): RegExp {\n const units = Object.keys(SI_UNITS).sort((a, b) => b.length - a.length);\n const patterns = units.map((unit) => {\n const escaped = unit.replace(/[.*+?^${}()|[\\]\\\\]/g, \"\\\\$&\");\n // If unit ends with letter, require word boundary\n return unit[unit.length - 1].match(/[a-zA-Z]/) ? `${escaped}(?!\\\\w)` : escaped;\n });\n return new RegExp(`^(${patterns.join(\"|\")})`, \"u\");\n}\n\nexport const SI_UNITS_REGEX = buildUnitsRegex();\n\n/**\n * Build regex for currency symbols\n */\nfunction buildCurrencyRegex(): RegExp {\n const symbols = Object.keys(CURRENCY_SYMBOLS).sort((a, b) => b.length - a.length);\n const patterns = symbols.map((s) => s.replace(/[.*+?^${}()|[\\]\\\\]/g, \"\\\\$&\"));\n return new RegExp(`^(${patterns.join(\"|\")})`, \"u\");\n}\n\nexport const CURRENCY_REGEX = buildCurrencyRegex();\n\n/**\n * Combined unit regex (SI + currency)\n */\nfunction buildCombinedUnitRegex(): RegExp {\n const allUnits = [...Object.keys(SI_UNITS), ...Object.keys(CURRENCY_SYMBOLS)].sort(\n (a, b) => b.length - a.length,\n );\n const patterns = allUnits.map((unit) => {\n const escaped = unit.replace(/[.*+?^${}()|[\\]\\\\]/g, \"\\\\$&\");\n return unit[unit.length - 1].match(/[a-zA-Z]/) ? `${escaped}(?!\\\\w)` : escaped;\n });\n return new RegExp(`(${patterns.join(\"|\")})$`);\n}\n\nexport const UNIT_REGEX = buildCombinedUnitRegex();\n","/**\n * Regular expression patterns for tokenization\n */\n\n// Time patterns\nexport const TIME_HMS_MS = /^(\\d{1,2}):(\\d{2}):(\\d{2}),(\\d{2})(?!\\d)/;\nexport const TIME_HMS = /^(\\d{1,2}):(\\d{2}):(\\d{2})(?!\\d)/;\nexport const TIME_HM = /^(\\d{1,2}):(\\d{2})(?!\\d)/;\n\n// Date patterns\nexport const DATE_ISO = /^(\\d{4})[-/](\\d{2})[-/](\\d{2})(?!\\d)/;\nexport const DATE_DMY = /^(\\d{1,2})[./-](\\d{1,2})[./-](\\d{2,4})(?!\\d)/;\nexport const DATE_DM = /^(\\d{2})\\.(\\d{2})(?!\\d)/;\nexport const DATE_MY = /^(\\d{2})[.-](\\d{4})(?!\\d)/;\n\n// Number patterns\n// Icelandic style: 1.234,56 (dot as thousands, comma as decimal)\nexport const NUMBER_ICELANDIC = /^[-+]?\\d+(\\.\\d{3})*(,\\d+)?(?!\\d)/;\n// English style: 1,234.56 (comma as thousands, dot as decimal)\nexport const NUMBER_ENGLISH = /^[-+]?\\d+(,\\d{3})*(\\.\\d+)?(?!\\d)/;\n// Simple integer\nexport const NUMBER_INTEGER = /^[-+]?\\d+(?!\\d)/;\n\n// Number followed by letter (e.g., 14b, 33C)\nexport const NUMBER_WITH_LETTER = /^(\\d+)([a-zA-Z])(?!\\w)/u;\n\n// Email pattern\nexport const EMAIL = /^[^@\\s]+@[^@\\s]+(\\.[^@\\s.,/:;\"()%#!?]+)+/;\n\n// URL detection\nexport const URL_PREFIX = /^(https?:\\/\\/|ftp:\\/\\/|file:\\/\\/|mailto:|www\\.)/i;\n\n// Domain pattern (simplified)\nexport const DOMAIN = /^[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(\\.[a-zA-Z]{2,})+/;\n\n// Hashtag\nexport const HASHTAG = /^#\\w+/u;\n\n// Username (@handle)\nexport const USERNAME = /^@[0-9a-z_]+/i;\n\n// Roman numerals\nexport const ROMAN_NUMERAL = /^M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$/;\n\n// Unicode vulgar fractions\nexport const VULGAR_FRACTIONS = /^[\\u00BC-\\u00BE\\u2150-\\u215E]/;\n\n// Ordinal suffixes (Icelandic kludgy ordinals like \"1sti\", \"3ji\")\nexport const KLUDGY_ORDINAL = /^(1st[iau]|3j[iau]|4ð[iau]|5t[iau]|2svar|3svar|2ja|3ja|4ra)(?!\\w)/;\n\n/**\n * Map of kludgy ordinals to their correct forms\n */\nexport const ORDINAL_CORRECTIONS: Record<string, string> = {\n \"1sti\": \"fyrsti\",\n \"1sta\": \"fyrsta\",\n \"1stu\": \"fyrstu\",\n \"3ji\": \"þriðji\",\n \"3ju\": \"þriðju\",\n \"4ði\": \"fjórði\",\n \"4ða\": \"fjórða\",\n \"4ðu\": \"fjórðu\",\n \"5ti\": \"fimmti\",\n \"5ta\": \"fimmta\",\n \"5tu\": \"fimmtu\",\n \"2svar\": \"tvisvar\",\n \"3svar\": \"þrisvar\",\n \"2ja\": \"tveggja\",\n \"3ja\": \"þriggja\",\n \"4ra\": \"fjögurra\",\n};\n\n/**\n * Ordinals that can be converted to numbers\n */\nexport const ORDINAL_NUMBERS: Record<string, number> = {\n \"1sti\": 1,\n \"1sta\": 1,\n \"1stu\": 1,\n \"3ji\": 3,\n \"3ja\": 3,\n \"3ju\": 3,\n \"4ði\": 4,\n \"4ða\": 4,\n \"4ðu\": 4,\n \"5ti\": 5,\n \"5ta\": 5,\n \"5tu\": 5,\n};\n\n/**\n * Convert Roman numeral to integer\n */\nexport function romanToInt(s: string): number {\n const values: [number, string][] = [\n [1000, \"M\"],\n [900, \"CM\"],\n [500, \"D\"],\n [400, \"CD\"],\n [100, \"C\"],\n [90, \"XC\"],\n [50, \"L\"],\n [40, \"XL\"],\n [10, \"X\"],\n [9, \"IX\"],\n [5, \"V\"],\n [4, \"IV\"],\n [1, \"I\"],\n ];\n\n let i = 0;\n let result = 0;\n for (const [value, numeral] of values) {\n while (s.substring(i, i + numeral.length) === numeral) {\n result += value;\n i += numeral.length;\n }\n }\n return result;\n}\n\n/**\n * Check if string is a valid Roman numeral\n */\nexport function isRomanNumeral(s: string): boolean {\n return ROMAN_NUMERAL.test(s);\n}\n\n// Telephone number patterns\n// Icelandic: 7 digits, optionally with country code +354\n// Format: +XXX XXXXXXX or +XXXXXXXXXXX (10 digits total for Iceland)\nexport const TELNO_WITH_CC = /^\\+(\\d{1,3})[-\\s]?(\\d{3})[-\\s]?(\\d{4})(?!\\d)/;\n\n// Chemical molecule pattern\n// Matches element symbols (uppercase + optional lowercase) followed by optional digit count\n// Examples: H2O, CO2, NaCl, H2SO4, C6H12O6\n// Must have at least 2 element symbols (single element like \"O\" or \"O2\" is not a molecule)\nexport const MOLECULE = /^[A-Z][a-z]?\\d*(?:[A-Z][a-z]?\\d*)+$/;\n\n// Icelandic SSN (kennitala) pattern: DDMMYY-XXXX\n// Last digit is century indicator (9=1900s, 0=2000s)\nexport const SSN = /^(\\d{6})-?(\\d{4})(?!\\d)/;\n\n// Serial number pattern: groups of digits separated by hyphens\n// Examples: 394-8362, 123-456-789\nexport const SERIAL_NUMBER = /^(\\d+)-(\\d+)(?:-\\d+)*(?!\\d)/;\n\n// Timestamp patterns (date + time combined)\n// ISO timestamp: YYYY-MM-DD HH:MM:SS or YYYY-MM-DDTHH:MM:SS\nexport const TIMESTAMP_ISO = /^(\\d{4})[-/](\\d{2})[-/](\\d{2})[T\\s](\\d{1,2}):(\\d{2}):(\\d{2})(?!\\d)/;\nexport const TIMESTAMP_ISO_HM = /^(\\d{4})[-/](\\d{2})[-/](\\d{2})[T\\s](\\d{1,2}):(\\d{2})(?!\\d)/;\n\n// Company/Person/Entity markers (for annotated text with brackets)\n// Format: [company:Name] or [company Name] (without spaces in single-token form)\nexport const COMPANY_MARKER = /^\\[company[:\\s]([^\\]]+)\\]/i;\nexport const PERSON_MARKER = /^\\[person[:\\s]([^\\]]+)\\]/i;\nexport const ENTITY_MARKER = /^\\[entity[:\\s]([^\\]]+)\\]/i;\n\n/**\n * Valid chemical element symbols\n */\nexport const ELEMENT_SYMBOLS = new Set([\n \"H\",\n \"He\",\n \"Li\",\n \"Be\",\n \"B\",\n \"C\",\n \"N\",\n \"O\",\n \"F\",\n \"Ne\",\n \"Na\",\n \"Mg\",\n \"Al\",\n \"Si\",\n \"P\",\n \"S\",\n \"Cl\",\n \"Ar\",\n \"K\",\n \"Ca\",\n \"Sc\",\n \"Ti\",\n \"V\",\n \"Cr\",\n \"Mn\",\n \"Fe\",\n \"Co\",\n \"Ni\",\n \"Cu\",\n \"Zn\",\n \"Ga\",\n \"Ge\",\n \"As\",\n \"Se\",\n \"Br\",\n \"Kr\",\n \"Rb\",\n \"Sr\",\n \"Y\",\n \"Zr\",\n \"Nb\",\n \"Mo\",\n \"Tc\",\n \"Ru\",\n \"Rh\",\n \"Pd\",\n \"Ag\",\n \"Cd\",\n \"In\",\n \"Sn\",\n \"Sb\",\n \"Te\",\n \"I\",\n \"Xe\",\n \"Cs\",\n \"Ba\",\n \"La\",\n \"Ce\",\n \"Pr\",\n \"Nd\",\n \"Pm\",\n \"Sm\",\n \"Eu\",\n \"Gd\",\n \"Tb\",\n \"Dy\",\n \"Ho\",\n \"Er\",\n \"Tm\",\n \"Yb\",\n \"Lu\",\n \"Hf\",\n \"Ta\",\n \"W\",\n \"Re\",\n \"Os\",\n \"Ir\",\n \"Pt\",\n \"Au\",\n \"Hg\",\n \"Tl\",\n \"Pb\",\n \"Bi\",\n \"Po\",\n \"At\",\n \"Rn\",\n \"Fr\",\n \"Ra\",\n \"Ac\",\n \"Th\",\n \"Pa\",\n \"U\",\n \"Np\",\n \"Pu\",\n \"Am\",\n \"Cm\",\n \"Bk\",\n \"Cf\",\n \"Es\",\n \"Fm\",\n \"Md\",\n \"No\",\n \"Lr\",\n \"Rf\",\n \"Db\",\n \"Sg\",\n \"Bh\",\n \"Hs\",\n \"Mt\",\n \"Ds\",\n \"Rg\",\n \"Cn\",\n \"Nh\",\n \"Fl\",\n \"Mc\",\n \"Lv\",\n \"Ts\",\n \"Og\",\n]);\n\n/**\n * Validate Icelandic kennitala checksum\n * Returns true if valid, false otherwise\n */\nexport function validateKennitala(digits: string): boolean {\n if (digits.length !== 10) return false;\n\n // All characters must be digits\n if (!/^\\d{10}$/.test(digits)) return false;\n\n // Extract components\n const d1 = parseInt(digits[0], 10);\n const d2 = parseInt(digits[1], 10);\n const d3 = parseInt(digits[2], 10);\n const d4 = parseInt(digits[3], 10);\n const d5 = parseInt(digits[4], 10);\n const d6 = parseInt(digits[5], 10);\n const d7 = parseInt(digits[6], 10);\n const d8 = parseInt(digits[7], 10);\n const checkDigit = parseInt(digits[8], 10);\n const century = parseInt(digits[9], 10);\n\n // Century must be 9 (1900s) or 0 (2000s)\n if (century !== 9 && century !== 0) return false;\n\n // Validate date components (DD MM YY)\n const day = d1 * 10 + d2;\n const month = d3 * 10 + d4;\n\n // Day must be 1-31 (or 41-71 for companies: day + 40)\n const isCompany = day > 40;\n const actualDay = isCompany ? day - 40 : day;\n if (actualDay < 1 || actualDay > 31) return false;\n if (month < 1 || month > 12) return false;\n\n // Calculate checksum: 11 - ((3×d1 + 2×d2 + 7×d3 + 6×d4 + 5×d5 + 4×d6 + 3×d7 + 2×d8) mod 11)\n const sum = 3 * d1 + 2 * d2 + 7 * d3 + 6 * d4 + 5 * d5 + 4 * d6 + 3 * d7 + 2 * d8;\n const remainder = sum % 11;\n const expected = remainder === 0 ? 0 : 11 - remainder;\n\n // If expected is 10, the kennitala is invalid\n if (expected === 10) return false;\n\n return checkDigit === expected;\n}\n\n/**\n * Check if a string is a valid chemical formula\n * Validates that all symbols are real element symbols\n * Requires at least 2 elements (otherwise it's just an element symbol, not a molecule)\n */\nexport function isValidMolecule(s: string): boolean {\n // Must match the molecule pattern\n if (!MOLECULE.test(s)) return false;\n\n // Extract and validate all element symbols\n const elementRegex = /([A-Z][a-z]?)(\\d*)/g;\n let match;\n let elementCount = 0;\n\n while ((match = elementRegex.exec(s)) !== null) {\n const element = match[1];\n if (!ELEMENT_SYMBOLS.has(element)) {\n return false;\n }\n elementCount++;\n }\n\n // Must have at least 2 element symbols to be a molecule\n return elementCount >= 2;\n}\n","/**\n * Lexer: Split text into initial tokens\n *\n * This is the first stage of the pipeline. It:\n * 1. Splits text on whitespace\n * 2. Extracts punctuation\n * 3. Classifies tokens (words, numbers, dates, times, etc.)\n */\n\nimport type { Token, PunctuationType } from \"../types.js\";\nimport {\n PUNCTUATION_SET,\n LEFT_PUNCTUATION_SET,\n RIGHT_PUNCTUATION_SET,\n NONE_PUNCTUATION_SET,\n SINGLE_QUOTES,\n DOUBLE_QUOTES,\n HYPHENS,\n HYPHEN,\n COMPOSITE_HYPHENS,\n DIGITS,\n SIGN_PREFIX,\n DAYS_IN_MONTH,\n UNICODE_REPLACEMENTS,\n OPEN_DOUBLE_QUOTE,\n CLOSE_DOUBLE_QUOTE,\n OPEN_SINGLE_QUOTE,\n CLOSE_SINGLE_QUOTE,\n} from \"../data/constants.js\";\nimport { SI_UNITS, SI_UNITS_SET, SI_UNITS_REGEX, CURRENCY_SYMBOLS } from \"../data/units.js\";\nimport {\n TIME_HMS_MS,\n TIME_HMS,\n TIME_HM,\n DATE_ISO,\n DATE_DMY,\n DATE_DM,\n DATE_MY,\n NUMBER_WITH_LETTER,\n EMAIL,\n URL_PREFIX,\n DOMAIN,\n HASHTAG,\n USERNAME,\n TELNO_WITH_CC,\n SSN,\n SERIAL_NUMBER,\n TIMESTAMP_ISO,\n TIMESTAMP_ISO_HM,\n COMPANY_MARKER,\n PERSON_MARKER,\n ENTITY_MARKER,\n validateKennitala,\n isValidMolecule,\n} from \"../data/patterns.js\";\n\n/**\n * Replace composite Unicode glyphs with single characters\n */\nexport function normalizeUnicode(text: string): string {\n let result = text;\n for (const [from, to] of Object.entries(UNICODE_REPLACEMENTS)) {\n result = result.replaceAll(from, to);\n }\n return result;\n}\n\n/**\n * Check if a date is valid\n */\nfunction isValidDate(y: number, m: number, d: number): boolean {\n if (y < 1776 || y > 2100) return false;\n if (m < 1 || m > 12) return false;\n if (d < 1 || d > DAYS_IN_MONTH[m]) return false;\n // Check Feb 29 in non-leap years\n if (m === 2 && d === 29) {\n const isLeap = (y % 4 === 0 && y % 100 !== 0) || y % 400 === 0;\n if (!isLeap) return false;\n }\n return true;\n}\n\n/**\n * Get punctuation position type\n */\nfunction getPunctType(char: string): PunctuationType {\n if (LEFT_PUNCTUATION_SET.has(char)) return \"left\";\n if (RIGHT_PUNCTUATION_SET.has(char)) return \"right\";\n if (NONE_PUNCTUATION_SET.has(char)) return \"none\";\n return \"center\";\n}\n\n/**\n * Create a punctuation token\n */\nfunction punct(text: string, normalized?: string): Token {\n const norm = normalized ?? text;\n const position = norm.length === 1 ? getPunctType(norm) : \"center\";\n return { kind: \"punctuation\", text, normalized: norm, position };\n}\n\n/**\n * Parse a token starting with digits\n */\nfunction parseDigits(w: string): [Token, number] {\n // Timestamp with full time: YYYY-MM-DD HH:MM:SS or YYYY-MM-DDTHH:MM:SS\n let match = TIMESTAMP_ISO.exec(w);\n if (match) {\n const y = parseInt(match[1], 10);\n const mo = parseInt(match[2], 10);\n const d = parseInt(match[3], 10);\n const h = parseInt(match[4], 10);\n const mi = parseInt(match[5], 10);\n const s = parseInt(match[6], 10);\n if (isValidDate(y, mo, d) && h >= 0 && h < 24 && mi >= 0 && mi < 60 && s >= 0 && s < 60) {\n return [\n {\n kind: \"timestamp\",\n text: match[0],\n year: y,\n month: mo,\n day: d,\n hour: h,\n minute: mi,\n second: s,\n },\n match[0].length,\n ];\n }\n }\n\n // Timestamp with HH:MM only: YYYY-MM-DD HH:MM or YYYY-MM-DDTHH:MM\n match = TIMESTAMP_ISO_HM.exec(w);\n if (match) {\n const y = parseInt(match[1], 10);\n const mo = parseInt(match[2], 10);\n const d = parseInt(match[3], 10);\n const h = parseInt(match[4], 10);\n const mi = parseInt(match[5], 10);\n if (isValidDate(y, mo, d) && h >= 0 && h < 24 && mi >= 0 && mi < 60) {\n return [\n {\n kind: \"timestamp\",\n text: match[0],\n year: y,\n month: mo,\n day: d,\n hour: h,\n minute: mi,\n second: 0,\n },\n match[0].length,\n ];\n }\n }\n\n // Time with milliseconds: H:M:S,MS\n match = TIME_HMS_MS.exec(w);\n if (match) {\n const h = parseInt(match[1], 10);\n const m = parseInt(match[2], 10);\n const s = parseInt(match[3], 10);\n if (h >= 0 && h < 24 && m >= 0 && m < 60 && s >= 0 && s < 60) {\n return [{ kind: \"time\", text: match[0], hour: h, minute: m, second: s }, match[0].length];\n }\n }\n\n // Time H:M:S\n match = TIME_HMS.exec(w);\n if (match) {\n const h = parseInt(match[1], 10);\n const m = parseInt(match[2], 10);\n const s = parseInt(match[3], 10);\n if (h >= 0 && h < 24 && m >= 0 && m < 60 && s >= 0 && s < 60) {\n return [{ kind: \"time\", text: match[0], hour: h, minute: m, second: s }, match[0].length];\n }\n }\n\n // Time H:M\n match = TIME_HM.exec(w);\n if (match) {\n const h = parseInt(match[1], 10);\n const m = parseInt(match[2], 10);\n if (h >= 0 && h < 24 && m >= 0 && m < 60) {\n return [{ kind: \"time\", text: match[0], hour: h, minute: m, second: 0 }, match[0].length];\n }\n }\n\n // ISO date: YYYY-MM-DD or YYYY/MM/DD\n match = DATE_ISO.exec(w);\n if (match) {\n const y = parseInt(match[1], 10);\n const m = parseInt(match[2], 10);\n const d = parseInt(match[3], 10);\n if (isValidDate(y, m, d)) {\n return [{ kind: \"date\", text: match[0], year: y, month: m, day: d }, match[0].length];\n }\n }\n\n // Icelandic SSN (kennitala): DDMMYY-XXXX\n match = SSN.exec(w);\n if (match) {\n const digits = match[1] + match[2];\n if (validateKennitala(digits)) {\n return [{ kind: \"ssn\", text: match[0], value: digits }, match[0].length];\n }\n }\n\n // Serial number: XXX-XXXX or similar patterns with hyphens\n // This includes invalid SSN-like patterns (6-4 digits that failed checksum validation)\n match = SERIAL_NUMBER.exec(w);\n if (match) {\n return [{ kind: \"serialnumber\", text: match[0] }, match[0].length];\n }\n\n // Icelandic phone number (7 digits without hyphen): XXXXXXX\n // Only match continuous 7 digits - hyphenated patterns go to serial number\n const telnoMatch = w.match(/^(\\d{7})(?!\\d)/);\n if (telnoMatch) {\n const number = telnoMatch[1];\n return [{ kind: \"telno\", text: telnoMatch[0], cc: \"\", number }, telnoMatch[0].length];\n }\n\n // Date with day, month, year: D.M.Y or D/M/Y or D-M-Y\n match = DATE_DMY.exec(w);\n if (match) {\n let d = parseInt(match[1], 10);\n let m = parseInt(match[2], 10);\n let y = parseInt(match[3], 10);\n // Handle 2-digit years\n if (y <= 99) {\n y += y > 50 ? 1900 : 2000;\n }\n // Swap if American format (month > 12 but day <= 12)\n if (m > 12 && d <= 12) {\n [d, m] = [m, d];\n }\n if (isValidDate(y, m, d)) {\n return [{ kind: \"date\", text: match[0], year: y, month: m, day: d }, match[0].length];\n }\n }\n\n // Relative date: DD.MM (day and month only)\n match = DATE_DM.exec(w);\n if (match) {\n const d = parseInt(match[1], 10);\n const m = parseInt(match[2], 10);\n if (m >= 1 && m <= 12 && d >= 1 && d <= DAYS_IN_MONTH[m]) {\n return [{ kind: \"daterel\", text: match[0], year: 0, month: m, day: d }, match[0].length];\n }\n }\n\n // Relative date: MM.YYYY or MM-YYYY\n match = DATE_MY.exec(w);\n if (match) {\n const m = parseInt(match[1], 10);\n const y = parseInt(match[2], 10);\n if (y >= 1776 && y <= 2100 && m >= 1 && m <= 12) {\n return [{ kind: \"daterel\", text: match[0], year: y, month: m, day: 0 }, match[0].length];\n }\n }\n\n // Number with trailing letter: 14b, 33C\n match = NUMBER_WITH_LETTER.exec(w);\n if (match) {\n const letter = match[2];\n // Don't match if the letter is an SI unit\n if (!SI_UNITS_SET.has(letter)) {\n const n = parseInt(match[1], 10);\n return [{ kind: \"numwletter\", text: match[0], value: n, letter }, match[0].length];\n }\n }\n\n // Number with unit (Icelandic style: 1.234,56km)\n const icelandicMatch = w.match(/^([-+]?\\d+(?:\\.\\d{3})*(?:,\\d+)?)/);\n if (icelandicMatch) {\n const numPart = icelandicMatch[1];\n const rest = w.slice(numPart.length);\n const unitMatch = SI_UNITS_REGEX.exec(rest);\n if (unitMatch) {\n const unit = unitMatch[1];\n const fullText = numPart + unit;\n const value = parseFloat(numPart.replace(/\\./g, \"\").replace(\",\", \".\"));\n if (unit in CURRENCY_SYMBOLS) {\n const iso = CURRENCY_SYMBOLS[unit];\n return [{ kind: \"amount\", text: fullText, value, currency: iso }, fullText.length];\n }\n const [baseUnit] = SI_UNITS[unit];\n if (unit === \"%\" || unit === \"‰\") {\n return [{ kind: \"percent\", text: fullText, value }, fullText.length];\n }\n return [{ kind: \"measurement\", text: fullText, value, unit: baseUnit }, fullText.length];\n }\n }\n\n // Plain number (try Icelandic style first, then English)\n // Icelandic: 1.234,56\n const iceNum = w.match(/^([-+]?\\d+(?:\\.\\d{3})*(?:,\\d+)?)(?!\\d)/);\n if (iceNum && iceNum[1].includes(\",\")) {\n const value = parseFloat(iceNum[1].replace(/\\./g, \"\").replace(\",\", \".\"));\n return [{ kind: \"number\", text: iceNum[1], value }, iceNum[1].length];\n }\n\n // English: 1,234.56\n const engNum = w.match(/^([-+]?\\d+(?:,\\d{3})*(?:\\.\\d+)?)(?!\\d)/);\n if (engNum && (engNum[1].includes(\",\") || engNum[1].includes(\".\"))) {\n const value = parseFloat(engNum[1].replace(/,/g, \"\"));\n return [{ kind: \"number\", text: engNum[1], value }, engNum[1].length];\n }\n\n // Simple integer\n const intMatch = w.match(/^([-+]?\\d+)(?!\\d)/);\n if (intMatch) {\n const value = parseInt(intMatch[1], 10);\n return [{ kind: \"number\", text: intMatch[1], value }, intMatch[1].length];\n }\n\n // Fallback: unknown\n return [{ kind: \"unknown\", text: w[0] }, 1];\n}\n\n/**\n * Parse a single whitespace-separated token\n */\nfunction* parseRawToken(w: string): Generator<Token> {\n // Empty string signals sentence split\n if (!w) {\n yield { kind: \"s_split\", text: null };\n return;\n }\n\n // Pure alphabetic word (most common case)\n if (/^[\\p{L}]+$/u.test(w) || SI_UNITS_SET.has(w)) {\n // Check if it's a chemical molecule (e.g., NaCl, CaCO3)\n if (isValidMolecule(w)) {\n yield { kind: \"molecule\", text: w };\n return;\n }\n yield { kind: \"word\", text: w };\n return;\n }\n\n // Phone number with country code: +XXX XXXXXXX (before signed number handling)\n if (w.startsWith(\"+\") && w.length >= 10) {\n const telMatch = TELNO_WITH_CC.exec(w);\n if (telMatch) {\n const cc = telMatch[1];\n const number = telMatch[2] + telMatch[3];\n yield { kind: \"telno\", text: telMatch[0], cc, number };\n w = w.slice(telMatch[0].length);\n if (!w) return;\n }\n }\n\n // Handle signed numbers at start\n if (w.length > 1 && SIGN_PREFIX.has(w[0]) && DIGITS.has(w[1])) {\n const [token, eaten] = parseDigits(w);\n yield token;\n w = w.slice(eaten);\n if (!w) return;\n }\n\n // Handle composite hyphen prefix: -menn in \"þingkonur og -menn\"\n if (w.length > 1 && COMPOSITE_HYPHENS.includes(w[0]) && /\\p{L}/u.test(w[1])) {\n let i = 2;\n while (i < w.length && /\\p{L}/u.test(w[i])) i++;\n const word = w.slice(0, i);\n if (\n word.slice(1).toLowerCase() === word.slice(1) ||\n (i > 2 && word.slice(1).toUpperCase() === word.slice(1))\n ) {\n yield { kind: \"word\", text: word };\n w = w.slice(i);\n }\n }\n\n // Shortcut for quoted single words: \"word\" or 'word'\n if (w.length >= 3) {\n if (DOUBLE_QUOTES.includes(w[0]) && DOUBLE_QUOTES.includes(w[w.length - 1])) {\n const inner = w.slice(1, -1);\n if (/^[\\p{L}]+$/u.test(inner)) {\n yield punct(w[0], OPEN_DOUBLE_QUOTE);\n yield { kind: \"word\", text: inner };\n yield punct(w[w.length - 1], CLOSE_DOUBLE_QUOTE);\n return;\n }\n }\n if (SINGLE_QUOTES.includes(w[0]) && SINGLE_QUOTES.includes(w[w.length - 1])) {\n const inner = w.slice(1, -1);\n if (/^[\\p{L}]+$/u.test(inner)) {\n yield punct(w[0], OPEN_SINGLE_QUOTE);\n yield { kind: \"word\", text: inner };\n yield punct(w[w.length - 1], CLOSE_SINGLE_QUOTE);\n return;\n }\n }\n }\n\n // Leading quote → opening quote\n if (w.length > 1) {\n if (DOUBLE_QUOTES.includes(w[0])) {\n yield punct(w[0], OPEN_DOUBLE_QUOTE);\n w = w.slice(1);\n } else if (SINGLE_QUOTES.includes(w[0])) {\n yield punct(w[0], OPEN_SINGLE_QUOTE);\n w = w.slice(1);\n }\n }\n\n // Process remaining characters\n while (w) {\n // Handle leading punctuation\n while (w && PUNCTUATION_SET.has(w[0])) {\n // Company/Person/Entity markers - check before other punctuation\n if (w.startsWith(\"[\")) {\n const companyMatch = COMPANY_MARKER.exec(w);\n if (companyMatch) {\n yield { kind: \"company\", text: companyMatch[1] };\n w = w.slice(companyMatch[0].length);\n continue;\n }\n const personMatch = PERSON_MARKER.exec(w);\n if (personMatch) {\n yield { kind: \"person\", text: personMatch[1] };\n w = w.slice(personMatch[0].length);\n continue;\n }\n const entityMatch = ENTITY_MARKER.exec(w);\n if (entityMatch) {\n yield { kind: \"entity\", text: entityMatch[1] };\n w = w.slice(entityMatch[0].length);\n continue;\n }\n }\n // Ellipsis variations\n if (w.startsWith(\"[...]\")) {\n yield punct(\"[...]\", \"[…]\");\n w = w.slice(5);\n continue;\n }\n if (w.startsWith(\"[…]\")) {\n yield punct(\"[…]\");\n w = w.slice(3);\n continue;\n }\n if (w.startsWith(\"...\")) {\n let dots = \"...\";\n let rest = w.slice(3);\n while (rest.startsWith(\".\")) {\n dots += \".\";\n rest = rest.slice(1);\n }\n yield punct(dots, \"…\");\n w = rest;\n continue;\n }\n if (w.startsWith(\"…\")) {\n yield punct(\"…\");\n w = w.slice(1);\n continue;\n }\n // Double comma → single comma or opening quote\n if (w === \",,\") {\n yield punct(\",,\", \",\");\n w = \"\";\n continue;\n }\n if (w.startsWith(\",,\")) {\n yield punct(\",,\", OPEN_DOUBLE_QUOTE);\n w = w.slice(2);\n continue;\n }\n // Paragraph markers\n if (w === \"[[\" || w === \"]]\") {\n // Skip paragraph markers for now (just punctuation)\n yield punct(w);\n w = \"\";\n continue;\n }\n // Hyphens\n if (HYPHENS.includes(w[0])) {\n yield punct(w[0], HYPHEN);\n w = w.slice(1);\n continue;\n }\n // Closing quotes\n if (DOUBLE_QUOTES.includes(w[0])) {\n yield punct(w[0], CLOSE_DOUBLE_QUOTE);\n w = w.slice(1);\n continue;\n }\n if (SINGLE_QUOTES.includes(w[0])) {\n yield punct(w[0], CLOSE_SINGLE_QUOTE);\n w = w.slice(1);\n continue;\n }\n // Hashtag check\n if (w.startsWith(\"#\") && w.length > 1) {\n const hashMatch = HASHTAG.exec(w);\n if (hashMatch) {\n // Check if it's a number sign: #12\n if (/^#\\d+$/.test(hashMatch[0])) {\n yield {\n kind: \"ordinal\",\n text: hashMatch[0],\n value: parseInt(hashMatch[0].slice(1), 10),\n };\n } else {\n yield { kind: \"hashtag\", text: hashMatch[0] };\n }\n w = w.slice(hashMatch[0].length);\n continue;\n }\n }\n // Username check\n if (w.startsWith(\"@\") && w.length > 1) {\n const userMatch = USERNAME.exec(w);\n if (userMatch) {\n yield { kind: \"username\", text: userMatch[0], username: userMatch[0].slice(1) };\n w = w.slice(userMatch[0].length);\n continue;\n }\n }\n // Phone number with country code: +XXX XXXXXXX\n if (w.startsWith(\"+\") && w.length > 1 && DIGITS.has(w[1])) {\n const telMatch = TELNO_WITH_CC.exec(w);\n if (telMatch) {\n const cc = telMatch[1];\n const number = telMatch[2] + telMatch[3];\n yield { kind: \"telno\", text: telMatch[0], cc, number };\n w = w.slice(telMatch[0].length);\n continue;\n }\n }\n // Default: single punctuation character\n yield punct(w[0]);\n w = w.slice(1);\n }\n\n if (!w) break;\n\n // Email check\n if (w.includes(\"@\")) {\n const emailMatch = EMAIL.exec(w);\n if (emailMatch) {\n yield { kind: \"email\", text: emailMatch[0] };\n w = w.slice(emailMatch[0].length);\n continue;\n }\n }\n\n // URL check\n if (URL_PREFIX.test(w)) {\n // Cut trailing punctuation\n let url = w;\n let trailing = \"\";\n while (url && RIGHT_PUNCTUATION_SET.has(url[url.length - 1])) {\n trailing = url[url.length - 1] + trailing;\n url = url.slice(0, -1);\n }\n yield { kind: \"url\", text: url };\n w = trailing;\n continue;\n }\n\n // Domain check\n if (w.length >= 4 && /^[a-zA-Z0-9]/.test(w) && w.includes(\".\")) {\n const domainMatch = DOMAIN.exec(w);\n if (domainMatch) {\n let domain = domainMatch[0];\n let trailing = w.slice(domain.length);\n // Cut trailing punctuation from domain\n while (domain && PUNCTUATION_SET.has(domain[domain.length - 1])) {\n trailing = domain[domain.length - 1] + trailing;\n domain = domain.slice(0, -1);\n }\n if (domain.includes(\".\")) {\n yield { kind: \"domain\", text: domain };\n w = trailing;\n continue;\n }\n }\n }\n\n // Numbers\n if (DIGITS.has(w[0]) || (SIGN_PREFIX.has(w[0]) && w.length > 1 && DIGITS.has(w[1]))) {\n const [token, eaten] = parseDigits(w);\n yield token;\n w = w.slice(eaten);\n\n // Check for SI unit immediately following\n if (w) {\n const unitMatch = SI_UNITS_REGEX.exec(w);\n if (unitMatch) {\n yield { kind: \"word\", text: unitMatch[1] };\n w = w.slice(unitMatch[1].length);\n }\n }\n continue;\n }\n\n // Words (alphabetic sequences)\n if (/^\\p{L}/u.test(w)) {\n let i = 1;\n const PUNCT_INSIDE = new Set([\".\", \"'\", \"'\", \"´\", \"'\", HYPHEN, \"\\u2013\"]);\n const PUNCT_ENDING = new Set([\"'\", \"²\", \"³\"]);\n\n while (i < w.length) {\n if (/\\p{L}/u.test(w[i])) {\n i++;\n } else if (DIGITS.has(w[i])) {\n // Could be a molecule like H2O - extend to include digits\n i++;\n } else if (PUNCT_INSIDE.has(w[i]) && i + 1 < w.length && /\\p{L}/u.test(w[i + 1])) {\n i++;\n } else {\n break;\n }\n }\n // Allow ending punctuation\n if (i < w.length && PUNCT_ENDING.has(w[i])) {\n i++;\n }\n const wordCandidate = w.slice(0, i);\n\n // Check if this is a chemical molecule (e.g., H2O, CO2, NaCl)\n if (isValidMolecule(wordCandidate)) {\n yield { kind: \"molecule\", text: wordCandidate };\n w = w.slice(i);\n continue;\n }\n\n yield { kind: \"word\", text: wordCandidate };\n w = w.slice(i);\n continue;\n }\n\n // Unknown character - emit as unknown\n yield { kind: \"unknown\", text: w[0] };\n w = w.slice(1);\n }\n}\n\n/**\n * Split text into rough tokens on whitespace, handling paragraph breaks\n */\nfunction* generateRoughTokens(text: string, replaceCompositeGlyphs: boolean): Generator<string> {\n let normalized = replaceCompositeGlyphs ? normalizeUnicode(text) : text;\n\n // Split on double newlines (paragraph breaks)\n const paragraphs = normalized.split(/\\n\\s*\\n/);\n let first = true;\n\n for (const paragraph of paragraphs) {\n if (!first) {\n // Yield empty string to signal sentence split\n yield \"\";\n }\n first = false;\n\n // Split on whitespace\n for (const word of paragraph.split(/\\s+/)) {\n if (word) {\n yield word;\n }\n }\n }\n}\n\n/**\n * Lexer: Convert text to initial token stream\n */\nexport function lex(text: string, replaceCompositeGlyphs = true): Token[] {\n const tokens: Token[] = [];\n\n for (const rawToken of generateRoughTokens(text, replaceCompositeGlyphs)) {\n for (const token of parseRawToken(rawToken)) {\n tokens.push(token);\n }\n }\n\n return tokens;\n}\n","/**\n * Common Icelandic abbreviations\n * Format: abbreviation → expansion\n *\n * This is a curated subset (~100) of common abbreviations.\n * The full Miðeind tokenizer has ~1500.\n */\nexport const ABBREVIATIONS: Record<string, string> = {\n // Titles and honorifics\n hr: \"herra\",\n \"hr.\": \"herra\",\n frú: \"frú\",\n \"frú.\": \"frú\",\n sr: \"séra\",\n \"sr.\": \"séra\",\n dr: \"doktor\",\n \"dr.\": \"doktor\",\n prof: \"prófessor\",\n \"prof.\": \"prófessor\",\n\n // Organizations\n hf: \"hlutafélag\",\n \"hf.\": \"hlutafélag\",\n ehf: \"einkahlutafélag\",\n \"ehf.\": \"einkahlutafélag\",\n ohf: \"opinbert hlutafélag\",\n \"ohf.\": \"opinbert hlutafélag\",\n sf: \"sameignarfélag\",\n \"sf.\": \"sameignarfélag\",\n slf: \"samlagsfélag\",\n \"slf.\": \"samlagsfélag\",\n ses: \"sjálfseignarstofnun\",\n \"ses.\": \"sjálfseignarstofnun\",\n\n // Common abbreviations\n ofl: \"og fleiri\",\n \"o.fl.\": \"og fleiri\",\n osfrv: \"og svo framvegis\",\n \"o.s.frv.\": \"og svo framvegis\",\n oþh: \"og þess háttar\",\n \"o.þ.h.\": \"og þess háttar\",\n þe: \"það er\",\n \"þ.e.\": \"það er\",\n þea: \"það er að segja\",\n \"þ.e.a.s.\": \"það er að segja\",\n sbr: \"samanber\",\n \"sbr.\": \"samanber\",\n skv: \"samkvæmt\",\n \"skv.\": \"samkvæmt\",\n mtt: \"með tilliti til\",\n \"m.t.t.\": \"með tilliti til\",\n ath: \"athugasemd\",\n \"ath.\": \"athugasemd\",\n gr: \"grein\",\n \"gr.\": \"grein\",\n mgr: \"málsgrein\",\n \"mgr.\": \"málsgrein\",\n tölul: \"töluliður\",\n \"tölul.\": \"töluliður\",\n nr: \"númer\",\n \"nr.\": \"númer\",\n sl: \"síðastliðinn\",\n \"sl.\": \"síðastliðinn\",\n nk: \"næstkomandi\",\n \"n.k.\": \"næstkomandi\",\n\n // Time-related\n kl: \"klukkan\",\n \"kl.\": \"klukkan\",\n ca: \"circa\",\n \"ca.\": \"circa\",\n\n // Academic/Professional\n bs: \"Bachelor of Science\",\n \"B.S.\": \"Bachelor of Science\",\n ms: \"Master of Science\",\n \"M.S.\": \"Master of Science\",\n ba: \"Bachelor of Arts\",\n \"B.A.\": \"Bachelor of Arts\",\n // Note: \"ma\" also means \"milljarður\" - using M.A. for Master of Arts\n \"M.A.\": \"Master of Arts\",\n phd: \"Doctor of Philosophy\",\n \"Ph.D.\": \"Doctor of Philosophy\",\n mba: \"Master of Business Administration\",\n MBA: \"Master of Business Administration\",\n\n // Places\n Rvk: \"Reykjavík\",\n \"Rvk.\": \"Reykjavík\",\n Akr: \"Akranes\",\n \"Akr.\": \"Akranes\",\n Ak: \"Akureyri\",\n \"Ak.\": \"Akureyri\",\n\n // Directions\n n: \"norður\",\n \"n.\": \"norður\",\n s: \"suður\",\n \"s.\": \"suður\",\n a: \"austur\",\n \"a.\": \"austur\",\n v: \"vestur\",\n \"v.\": \"vestur\",\n na: \"norðaustur\",\n \"n.a.\": \"norðaustur\",\n nv: \"norðvestur\",\n \"n.v.\": \"norðvestur\",\n sa: \"suðaustur\",\n \"s.a.\": \"suðaustur\",\n sv: \"suðvestur\",\n \"s.v.\": \"suðvestur\",\n\n // Measurements (that don't have SI unit meanings)\n þús: \"þúsund\",\n \"þús.\": \"þúsund\",\n millj: \"milljón\",\n \"millj.\": \"milljón\",\n mljó: \"milljón\",\n \"mljó.\": \"milljón\",\n ma: \"milljarður\",\n \"ma.\": \"milljarður\",\n mrð: \"milljarður\",\n \"mrð.\": \"milljarður\",\n};\n\n/**\n * Abbreviations that can end a sentence (followed by period)\n */\nexport const FINISHER_ABBREVIATIONS = new Set([\n \"o.fl\",\n \"o.s.frv\",\n \"o.þ.h\",\n \"þ.e\",\n \"þ.e.a.s\",\n \"m.t.t\",\n \"n.k\",\n]);\n\n/**\n * Check if an abbreviation exists\n */\nexport function hasAbbreviation(text: string): boolean {\n return text in ABBREVIATIONS;\n}\n\n/**\n * Get the expansion of an abbreviation\n */\nexport function getAbbreviationMeaning(text: string): string | undefined {\n return ABBREVIATIONS[text];\n}\n","/**\n * Particles: Coalesce abbreviations, currency+number, etc.\n *\n * This stage combines tokens that belong together:\n * - Abbreviation + period\n * - Currency symbol + number → amount\n * - Number + currency code → amount\n */\n\nimport type { Token } from \"../types.js\";\nimport { CURRENCY_SYMBOLS, CURRENCY_ABBREV, AMOUNT_ABBREV } from \"../data/units.js\";\nimport { ABBREVIATIONS } from \"../data/abbreviations.js\";\n\n/**\n * Process particles: combine related tokens\n */\nexport function processParticles(tokens: Token[]): Token[] {\n const result: Token[] = [];\n let i = 0;\n\n while (i < tokens.length) {\n const token = tokens[i];\n const next = tokens[i + 1];\n\n // Word + period → check if it's an abbreviation\n if (token.kind === \"word\" && next?.kind === \"punctuation\" && next.text === \".\") {\n const abbrevWithPeriod = token.text + \".\";\n if (abbrevWithPeriod in ABBREVIATIONS || abbrevWithPeriod in AMOUNT_ABBREV) {\n result.push({ kind: \"word\", text: abbrevWithPeriod });\n i += 2;\n continue;\n }\n }\n\n // Currency symbol + number → amount (e.g., $100)\n if (token.kind === \"punctuation\" && token.text in CURRENCY_SYMBOLS && next?.kind === \"number\") {\n const iso = CURRENCY_SYMBOLS[token.text];\n result.push({\n kind: \"amount\",\n text: token.text + next.text,\n value: next.value,\n currency: iso,\n });\n i += 2;\n continue;\n }\n\n // Number + currency code → amount (e.g., 100 USD, 100 kr.)\n if (token.kind === \"number\" && next?.kind === \"word\") {\n const currencyText = next.text;\n if (CURRENCY_ABBREV.has(currencyText)) {\n result.push({\n kind: \"amount\",\n text: token.text + \" \" + next.text,\n value: token.value,\n currency: currencyText,\n });\n i += 2;\n continue;\n }\n // Check for ISK abbreviations (kr., m.kr., etc.)\n if (currencyText in AMOUNT_ABBREV) {\n const multiplier = AMOUNT_ABBREV[currencyText];\n result.push({\n kind: \"amount\",\n text: token.text + \" \" + next.text,\n value: token.value * multiplier,\n currency: \"ISK\",\n });\n i += 2;\n continue;\n }\n }\n\n // Percent word after number\n if (\n token.kind === \"number\" &&\n next?.kind === \"word\" &&\n [\"prósent\", \"prósenta\", \"prósenti\", \"hundraðshluti\"].includes(next.text.toLowerCase())\n ) {\n result.push({\n kind: \"percent\",\n text: token.text + \" \" + next.text,\n value: token.value,\n });\n i += 2;\n continue;\n }\n\n // Date + time → timestamp\n if ((token.kind === \"date\" || token.kind === \"dateabs\") && next?.kind === \"time\") {\n result.push({\n kind: \"timestamp\",\n text: token.text + \" \" + next.text,\n year: token.year,\n month: token.month,\n day: token.day,\n hour: next.hour,\n minute: next.minute,\n second: next.second,\n });\n i += 2;\n continue;\n }\n\n // Default: pass through\n result.push(token);\n i++;\n }\n\n return result;\n}\n","/**\n * Sentences: Add sentence boundary markers\n *\n * This stage detects sentence boundaries and inserts S_BEGIN/S_END markers.\n */\n\nimport type { Token } from \"../types.js\";\nimport {\n END_OF_SENTENCE,\n SENTENCE_FINISHERS,\n PUNCT_COMBINATIONS,\n MONTHS,\n} from \"../data/constants.js\";\nimport { isRomanNumeral } from \"../data/patterns.js\";\nimport { CURRENCY_ABBREV } from \"../data/units.js\";\n\n/**\n * Check if the next token could be ending a sentence or starting a new one\n */\nfunction couldBeEndOfSentence(nextToken: Token): boolean {\n // Sentence markers definitely end/start\n if (nextToken.kind === \"s_end\" || nextToken.kind === \"s_split\") {\n return true;\n }\n\n // Uppercase word (except month names and roman numerals)\n if (nextToken.kind === \"word\" && nextToken.text.length > 0) {\n const firstChar = nextToken.text[0];\n if (firstChar === firstChar.toUpperCase() && firstChar !== firstChar.toLowerCase()) {\n // It's capitalized\n const lower = nextToken.text.toLowerCase();\n // Don't treat month names as sentence starters\n if (lower in MONTHS) return false;\n // Don't treat roman numerals as sentence starters\n if (isRomanNumeral(nextToken.text)) return false;\n // Don't treat currency abbreviations as sentence starters\n if (CURRENCY_ABBREV.has(nextToken.text)) return false;\n return true;\n }\n }\n\n return false;\n}\n\n/**\n * Add sentence boundary markers\n */\nexport function addSentenceMarkers(tokens: Token[]): Token[] {\n if (tokens.length === 0) return [];\n\n const result: Token[] = [];\n let inSentence = false;\n let i = 0;\n\n const beginSentence = (): Token => ({ kind: \"s_begin\", text: null });\n const endSentence = (): Token => ({ kind: \"s_end\", text: null });\n\n while (i < tokens.length) {\n const token = tokens[i];\n const next = tokens[i + 1];\n\n // Handle sentence split marker\n if (token.kind === \"s_split\") {\n if (inSentence) {\n result.push(endSentence());\n inSentence = false;\n }\n // Don't emit the split marker itself\n i++;\n continue;\n }\n\n // Start a new sentence if needed\n if (!inSentence) {\n result.push(beginSentence());\n inSentence = true;\n }\n\n // Check for sentence-ending punctuation\n if (token.kind === \"punctuation\" && END_OF_SENTENCE.has(token.normalized)) {\n // Handle ellipsis mid-sentence (don't end if next token doesn't look like sentence start)\n if (token.normalized === \"…\" && next && !couldBeEndOfSentence(next)) {\n result.push(token);\n i++;\n continue;\n }\n\n // Combine consecutive punctuation (??!, etc.)\n let combinedText = token.text;\n let j = i + 1;\n while (j < tokens.length) {\n const nextTok = tokens[j];\n if (nextTok.kind !== \"punctuation\") break;\n if (!PUNCT_COMBINATIONS.has(nextTok.normalized)) break;\n combinedText += nextTok.text;\n j++;\n }\n\n // Emit combined punctuation if any\n if (j > i + 1) {\n result.push({ ...token, text: combinedText });\n i = j;\n } else {\n result.push(token);\n i++;\n }\n\n // Collect any sentence finishers (closing quotes, brackets)\n while (i < tokens.length) {\n const tok = tokens[i];\n if (tok.kind !== \"punctuation\") break;\n if (!SENTENCE_FINISHERS.has(tok.normalized)) break;\n result.push(tok);\n i++;\n }\n\n // End the sentence\n result.push(endSentence());\n inSentence = false;\n continue;\n }\n\n // Regular token\n result.push(token);\n i++;\n }\n\n // Close any open sentence\n if (inSentence) {\n result.push(endSentence());\n }\n\n return result;\n}\n","/**\n * Phrases: Combine date+year, ordinal+month, clock+time, compounds, etc.\n *\n * This stage combines tokens that form higher-level constructs:\n * - \"5. mars\" → date\n * - \"2024\" after date → add year to date\n * - \"kl. 14:30\" → time with prefix\n * - \"1920 f.Kr.\" → year BCE\n * - \"stjórnskipunar- og eftirlitsnefnd\" → compound word\n */\n\nimport type { Token } from \"../types.js\";\nimport {\n MONTHS,\n MONTH_BLACKLIST,\n CE,\n BCE,\n CLOCK_ABBREVS,\n CLOCK_NUMBERS,\n HYPHEN,\n EN_DASH,\n} from \"../data/constants.js\";\nimport { FINISHER_ABBREVIATIONS } from \"../data/abbreviations.js\";\n\n/** Hyphens that can appear in compound words */\nconst COMPOSITE_HYPHENS = new Set([HYPHEN, EN_DASH]);\n\n/**\n * Get month number from token, or null if not a month\n */\nfunction getMonth(token: Token, afterOrdinal = false): number | null {\n if (token.kind !== \"word\") return null;\n // Check blacklist (Ágúst as a name)\n if (!afterOrdinal && MONTH_BLACKLIST.has(token.text)) return null;\n const lower = token.text.toLowerCase();\n return MONTHS[lower] ?? null;\n}\n\n/**\n * Check if a token is a composite hyphen (- or –)\n */\nfunction isCompositeHyphen(token: Token): boolean {\n return token.kind === \"punctuation\" && COMPOSITE_HYPHENS.has(token.text);\n}\n\n/**\n * Try to parse a compound word pattern starting at index i.\n * Pattern: (word- [,])+ (og|eða) word\n * Examples:\n * - \"stjórnskipunar- og eftirlitsnefnd\"\n * - \"dómsmála-, viðskipta- og iðnaðarráðherra\"\n *\n * Returns [combined token, new index] or null if no match.\n */\nfunction tryParseCompound(tokens: Token[], startIndex: number): [Token, number] | null {\n const prefixes: Token[] = [];\n let i = startIndex;\n\n // Accumulate prefix patterns: word + hyphen [+ comma]\n while (i < tokens.length) {\n const word = tokens[i];\n const hyphen = tokens[i + 1];\n\n // Must be word followed by composite hyphen\n if (word?.kind !== \"word\" || !hyphen || !isCompositeHyphen(hyphen)) {\n break;\n }\n\n prefixes.push(word);\n prefixes.push(hyphen);\n i += 2;\n\n // Check for optional comma\n const maybeComma = tokens[i];\n if (maybeComma?.kind === \"punctuation\" && maybeComma.text === \",\") {\n prefixes.push(maybeComma);\n i++;\n }\n }\n\n // Must have at least one prefix\n if (prefixes.length === 0) {\n return null;\n }\n\n // Next must be \"og\" or \"eða\"\n const conjunction = tokens[i];\n if (\n !conjunction ||\n conjunction.kind !== \"word\" ||\n (conjunction.text.toLowerCase() !== \"og\" && conjunction.text.toLowerCase() !== \"eða\")\n ) {\n return null;\n }\n\n // After conjunction must be a word (the suffix)\n const suffix = tokens[i + 1];\n if (!suffix || suffix.kind !== \"word\") {\n return null;\n }\n\n // Build the combined text\n // Join all parts: \"stjórnskipunar\", \"-\", \"og\", \"eftirlitsnefnd\"\n // Then normalize spacing: remove space before hyphen/comma\n const parts = [...prefixes, conjunction, suffix];\n let text = parts.map((t) => t.text).join(\" \");\n text = text.replace(/ -/g, \"-\").replace(/ ,/g, \",\");\n\n return [{ kind: \"word\", text }, i + 2];\n}\n\n/**\n * Process phrases: combine date/time constructs and compound words\n */\nexport function processPhrases(tokens: Token[]): Token[] {\n const result: Token[] = [];\n let i = 0;\n\n while (i < tokens.length) {\n const token = tokens[i];\n const next = tokens[i + 1];\n\n // Try compound word pattern first\n const compound = tryParseCompound(tokens, i);\n if (compound) {\n result.push(compound[0]);\n i = compound[1];\n continue;\n }\n\n // Word + \".\" → check if it's an abbreviation that ends sentences\n if (token.kind === \"word\" && next?.kind === \"punctuation\" && next.text === \".\") {\n const base = token.text.replace(/\\.$/, \"\");\n if (FINISHER_ABBREVIATIONS.has(base)) {\n // Coalesce abbreviation with period\n result.push({ kind: \"word\", text: token.text + \".\" });\n i += 2;\n continue;\n }\n }\n\n // Year/number + \"e.Kr.\" or \"f.Kr.\" → year with era\n if ((token.kind === \"year\" || token.kind === \"number\") && next?.kind === \"word\") {\n const val = token.kind === \"year\" ? token.value : token.value;\n let newVal: number | null = null;\n if (BCE.has(next.text)) {\n newVal = -val;\n } else if (CE.has(next.text)) {\n newVal = val;\n }\n if (newVal !== null) {\n let text = token.text + \" \" + next.text;\n i += 2;\n // Handle trailing period\n if (tokens[i]?.kind === \"punctuation\" && tokens[i].text === \".\") {\n text += \".\";\n i++;\n }\n result.push({ kind: \"year\", text, value: newVal });\n continue;\n }\n }\n\n // Ordinal/number + month name → date\n if ((token.kind === \"ordinal\" || token.kind === \"number\") && next?.kind === \"word\") {\n const month = getMonth(next, true);\n if (month !== null) {\n const day = token.kind === \"ordinal\" ? token.value : token.value;\n result.push({\n kind: \"daterel\",\n text: token.text + \" \" + next.text,\n year: 0,\n month,\n day,\n });\n i += 2;\n continue;\n }\n }\n\n // Date + year → add year to date\n if (\n (token.kind === \"date\" || token.kind === \"daterel\") &&\n token.year === 0 &&\n next?.kind === \"number\"\n ) {\n const year = next.value;\n if (year >= 1776 && year <= 2100) {\n result.push({\n kind: \"dateabs\",\n text: token.text + \" \" + next.text,\n year,\n month: token.month,\n day: token.day,\n });\n i += 2;\n continue;\n }\n }\n\n // Date + year token\n if (\n (token.kind === \"date\" || token.kind === \"daterel\") &&\n token.year === 0 &&\n next?.kind === \"year\"\n ) {\n result.push({\n kind: \"dateabs\",\n text: token.text + \" \" + next.text,\n year: next.value,\n month: token.month,\n day: token.day,\n });\n i += 2;\n continue;\n }\n\n // Clock abbreviation + time → time (keep as-is but combine text)\n if (\n token.kind === \"word\" &&\n CLOCK_ABBREVS.has(token.text.toLowerCase()) &&\n next?.kind === \"time\"\n ) {\n result.push({\n ...next,\n text: token.text + \" \" + next.text,\n });\n i += 2;\n continue;\n }\n\n // Clock abbreviation + spelled-out time (kl. tvö → time)\n if (\n token.kind === \"word\" &&\n CLOCK_ABBREVS.has(token.text.toLowerCase()) &&\n next?.kind === \"word\"\n ) {\n const timeValue = CLOCK_NUMBERS[next.text.toLowerCase()];\n if (timeValue) {\n result.push({\n kind: \"time\",\n text: token.text + \" \" + next.text,\n hour: timeValue[0],\n minute: timeValue[1],\n second: timeValue[2],\n });\n i += 2;\n continue;\n }\n }\n\n // Default: pass through\n result.push(token);\n i++;\n }\n\n return result;\n}\n","/**\n * Main tokenize function\n *\n * Chains the pipeline stages to produce a stream of tokens.\n */\n\nimport type { Token, TokenizeOptions } from \"./types.js\";\nimport { lex } from \"./pipeline/lexer.js\";\nimport { processParticles } from \"./pipeline/particles.js\";\nimport { addSentenceMarkers } from \"./pipeline/sentences.js\";\nimport { processPhrases } from \"./pipeline/phrases.js\";\n\n/**\n * Tokenize Icelandic text into an array of tokens.\n *\n * @param text - The text to tokenize\n * @param options - Tokenization options\n * @returns Array of tokens\n *\n * @example\n * ```ts\n * const tokens = tokenize(\"Þetta er próf.\");\n * // → [word(\"Þetta\"), word(\"er\"), word(\"próf\"), punctuation(\".\")]\n *\n * // With sentence markers:\n * const tokens = tokenize(\"Þetta er próf.\", { includeSentenceMarkers: true });\n * // → [s_begin, word(\"Þetta\"), word(\"er\"), word(\"próf\"), punctuation(\".\"), s_end]\n * ```\n */\nexport function tokenize(text: string, options: TokenizeOptions = {}): Token[] {\n const { replaceCompositeGlyphs = true, includeSentenceMarkers = false } = options;\n\n // Pipeline:\n // 1. Lexer: split text into initial tokens\n let tokens = lex(text, replaceCompositeGlyphs);\n\n // 2. Particles: coalesce abbreviations, currency+number\n tokens = processParticles(tokens);\n\n // 3. Phrases: combine date+year, ordinal+month, etc.\n tokens = processPhrases(tokens);\n\n // 4. Sentences: add boundary markers if requested\n if (includeSentenceMarkers) {\n tokens = addSentenceMarkers(tokens);\n } else {\n // Filter out internal sentence markers (s_split)\n tokens = tokens.filter((t) => t.kind !== \"s_split\");\n }\n\n return tokens;\n}\n","/**\n * Split text into sentences\n *\n * A higher-level function that returns sentence strings rather than tokens.\n */\n\nimport { tokenize } from \"./tokenize.js\";\nimport type { Token } from \"./types.js\";\n\n/**\n * Split Icelandic text into an array of sentence strings.\n *\n * @param text - The text to split\n * @returns Array of sentence strings\n *\n * @example\n * ```ts\n * const sentences = splitIntoSentences(\"Þetta er fyrsta setning. Þetta er önnur.\");\n * // → [\"Þetta er fyrsta setning.\", \"Þetta er önnur.\"]\n * ```\n */\nexport function splitIntoSentences(text: string): string[] {\n const tokens = tokenize(text, { includeSentenceMarkers: true });\n const sentences: string[] = [];\n let currentSentence: string[] = [];\n\n for (const token of tokens) {\n if (token.kind === \"s_begin\") {\n currentSentence = [];\n } else if (token.kind === \"s_end\") {\n if (currentSentence.length > 0) {\n sentences.push(joinTokens(currentSentence));\n }\n currentSentence = [];\n } else if (token.text !== null) {\n currentSentence.push(getTokenText(token));\n }\n }\n\n // Handle any remaining tokens (shouldn't happen with proper markers)\n if (currentSentence.length > 0) {\n sentences.push(joinTokens(currentSentence));\n }\n\n return sentences;\n}\n\n/**\n * Get display text from a token\n */\nfunction getTokenText(token: Token): string {\n if (token.kind === \"punctuation\") {\n return token.normalized;\n }\n return token.text ?? \"\";\n}\n\n/**\n * Join token texts with appropriate spacing\n */\nfunction joinTokens(texts: string[]): string {\n if (texts.length === 0) return \"\";\n\n let result = texts[0];\n\n for (let i = 1; i < texts.length; i++) {\n const prev = texts[i - 1];\n const curr = texts[i];\n\n // Determine if we need a space\n const needsSpace = shouldAddSpace(prev, curr);\n if (needsSpace) {\n result += \" \" + curr;\n } else {\n result += curr;\n }\n }\n\n return result;\n}\n\n/**\n * Determine if a space should be added between two tokens\n */\nfunction shouldAddSpace(prev: string, curr: string): boolean {\n if (!prev || !curr) return false;\n\n const lastChar = prev[prev.length - 1];\n const firstChar = curr[0];\n\n // Opening punctuation: no space after\n // ( [ „ ‚ « <\n const openingPunct = new Set([\"(\", \"[\", \"\\u201E\", \"\\u201A\", \"\\u00AB\", \"<\"]);\n if (openingPunct.has(lastChar)) return false;\n\n // Closing/ending punctuation: no space before\n // . , ; : ! ? ) ] \" ' » > …\n const closingPunct = new Set([\n \".\",\n \",\",\n \";\",\n \":\",\n \"!\",\n \"?\",\n \")\",\n \"]\",\n \"\\u201C\", // \"\n \"\\u2019\", // '\n \"\\u00BB\", // »\n \">\",\n \"\\u2026\", // …\n ]);\n if (closingPunct.has(firstChar)) return false;\n\n // Hyphen handling\n if (lastChar === \"-\" || firstChar === \"-\") return false;\n\n return true;\n}\n"],"mappings":"AAGA,MAAa,EAA+C,CAE1D,GAAW,IACX,GAAW,IACX,GAAW,IACX,GAAW,IACX,GAAW,IACX,GAAW,IACX,GAAW,IACX,GAAW,IACX,GAAW,IACX,GAAW,IACX,GAAW,IACX,GAAW,IAEX,GAAW,IACX,GAAW,IACX,GAAW,IACX,GAAW,IACX,GAAW,IACX,GAAW,IACX,GAAW,IACX,GAAW,IAEX,IAAU,GACV,IAAU,GACV,IAAU,GACX,CAQY,EAAU,MAQV,EAAmB,eAMA,GAAuB,EAErD,EAAmB,GAnDrB,MAqDa,EAAkB,IAAI,IAAI,qDAAY,CACtC,EAAuB,IAAI,IAAI,EAAiB,CAChD,EAAwB,IAAI,IAAI,qBAAkB,CAClD,EAAuB,IAAI,IAAI,cAAiB,CAMhD,EAAgB,QAEhB,EAAgB,SAahB,EAAkB,IAAI,IAAI,CAAC,IAAK,IAAK,IAAK,IAAS,CAAC,CACpD,EAAqB,IAAI,IAAI,CACxC,IACA,IACA,IACA,IACA,IACA,IACA,IACA,MACD,CAAC,CAeW,EAAqB,IAAI,IAAI,CAAC,IAAK,IAAK,IAAS,CAAC,CAKlD,EAAS,IAAI,IAAI,aAAa,CAC9B,EAAc,IAAI,IAAI,CAAC,IAAK,IAAI,CAAC,CAKjC,EAAiC,CAC5C,OAAe,EACf,QAAgB,EAChB,QAAgB,EAChB,SAAiB,EACjB,KAAM,EACN,MAAc,EACd,OAAe,EACf,IAAY,EACZ,KAAa,EACb,KAAkB,EAClB,MAAmB,EACnB,KAAkB,EAClB,MAAmB,EACnB,MAAmB,EACnB,OAAoB,EACpB,UAAW,EACX,WAAY,EACZ,QAAgB,GAChB,SAAiB,GACjB,SAAiB,GACjB,UAAkB,GAClB,SAAU,GACV,UAAW,GAEX,OAAQ,EACR,OAAQ,EACR,OAAQ,EACR,OAAQ,EACR,OAAa,EACb,OAAa,EACb,MAAY,EACZ,OAAkB,EAClB,OAAQ,EACR,QAAS,EACT,OAAQ,GACR,OAAa,GACb,OAAQ,GACR,IAAK,EACL,IAAK,EACL,IAAK,EACL,IAAK,EACL,IAAY,EACZ,IAAY,EACZ,GAAW,EACX,IAAiB,EACjB,IAAK,EACL,KAAM,EACN,IAAK,GACL,IAAY,GACZ,IAAK,GACN,CAKY,EAAkB,IAAI,IAAI,CAAC,QAAkB,CAAC,CAK9C,EAAgB,CAAC,EAAG,GAAI,GAAI,GAAI,GAAI,GAAI,GAAI,GAAI,GAAI,GAAI,GAAI,GAAI,GAAG,CAKnE,EAAgB,IAAI,IAAI,CAAC,KAAM,MAAO,UAAU,CAAC,CAKjD,GAA0D,CACrE,KAAM,CAAC,EAAG,EAAG,EAAE,CACf,IAAY,CAAC,EAAG,EAAG,EAAE,CACrB,KAAkB,CAAC,EAAG,EAAG,EAAE,CAC3B,OAAe,CAAC,EAAG,EAAG,EAAE,CACxB,KAAM,CAAC,EAAG,EAAG,EAAE,CACf,IAAK,CAAC,EAAG,EAAG,EAAE,CACd,IAAY,CAAC,EAAG,EAAG,EAAE,CACrB,KAAa,CAAC,EAAG,EAAG,EAAE,CACtB,IAAY,CAAC,EAAG,EAAG,EAAE,CACrB,IAAY,CAAC,GAAI,EAAG,EAAE,CACtB,OAAQ,CAAC,GAAI,EAAG,EAAE,CAClB,KAAa,CAAC,GAAI,EAAG,EAAE,CACvB,SAAiB,CAAC,GAAI,GAAI,EAAE,CAC5B,QAAqB,CAAC,EAAG,GAAI,EAAE,CAC/B,SAA2B,CAAC,EAAG,GAAI,EAAE,CACrC,WAAwB,CAAC,EAAG,GAAI,EAAE,CAClC,SAAiB,CAAC,EAAG,GAAI,EAAE,CAC3B,QAAgB,CAAC,EAAG,GAAI,EAAE,CAC1B,QAAqB,CAAC,EAAG,GAAI,EAAE,CAC/B,SAAsB,CAAC,EAAG,GAAI,EAAE,CAChC,QAAqB,CAAC,EAAG,GAAI,EAAE,CAC/B,QAAqB,CAAC,EAAG,GAAI,EAAE,CAC/B,WAAmB,CAAC,GAAI,GAAI,EAAE,CAC9B,SAAsB,CAAC,GAAI,GAAI,EAAE,CAClC,CAKY,EAAK,IAAI,IAAI,CAAC,OAAQ,QAAQ,CAAC,CAC/B,EAAM,IAAI,IAAI,CAAC,OAAQ,QAAQ,CAAC,CACvB,IAAI,IAAI,CAAC,GAAG,EAAI,GAAG,EAAI,CAAC,CCxN9C,MAAa,EAA2C,CACtD,EAAG,MACH,IAAK,MACL,IAAK,MACL,IAAK,MACL,IAAK,MACN,CAKY,EAAkB,IAAI,IAAI,CACrC,MACA,MACA,MACA,MACA,MACA,MACA,MACA,MACA,MACA,MACA,MACA,MACA,MACA,MACA,MACA,MACA,MACA,MACA,MACA,MACA,MACA,MACD,CAAC,CAKW,EAAwC,CACnD,MAAO,EACP,GAAI,EACJ,OAAQ,EACR,QAAS,IACT,OAAQ,IACR,UAAW,IACX,SAAU,IACV,QAAS,IACT,OAAQ,IACR,OAAQ,IACR,IAAK,IACL,YAAa,IACb,WAAY,IACZ,SAAU,IACV,QAAS,IACT,WAAY,IACZ,UAAW,IACZ,CAQY,EAA6C,CAExD,EAAG,CAAC,IAAK,EAAI,CACb,GAAI,CAAC,IAAK,KAAO,CACjB,GAAI,CAAC,IAAK,KAAO,CACjB,GAAI,CAAC,IAAK,IAAO,CACjB,GAAI,CAAC,IAAK,IAAO,CACjB,GAAI,CAAC,IAAK,IAAM,CAChB,GAAI,CAAC,IAAK,MAAO,CACjB,GAAI,CAAC,IAAK,QAAQ,CAElB,KAAM,CAAC,KAAM,EAAI,CACjB,GAAI,CAAC,KAAM,EAAI,CACf,MAAO,CAAC,KAAM,IAAM,CACpB,MAAO,CAAC,KAAM,IAAO,CACrB,GAAI,CAAC,KAAM,IAAM,CAEjB,KAAM,CAAC,KAAM,EAAI,CACjB,MAAO,CAAC,KAAM,KAAO,CACrB,MAAO,CAAC,KAAM,IAAM,CACpB,EAAG,CAAC,KAAM,KAAO,CACjB,IAAK,CAAC,KAAM,KAAO,CACnB,GAAI,CAAC,KAAM,KAAO,CAClB,GAAI,CAAC,KAAM,KAAO,CAClB,GAAI,CAAC,KAAM,KAAO,CAClB,IAAK,CAAC,KAAM,UAAW,CACvB,IAAK,CAAC,KAAM,cAAiB,CAE7B,EAAG,CAAC,IAAK,EAAI,CACb,KAAM,CAAC,IAAK,EAAI,CAEhB,EAAG,CAAC,KAAM,KAAO,CACjB,GAAI,CAAC,KAAM,KAAO,CAClB,GAAI,CAAC,KAAM,EAAI,CACf,EAAG,CAAC,KAAM,IAAM,CAChB,GAAI,CAAC,KAAM,KAAO,CAClB,GAAI,CAAC,KAAM,KAAO,CAClB,GAAI,CAAC,KAAM,IAAM,CACjB,GAAI,CAAC,KAAM,QAAS,CAEpB,EAAG,CAAC,IAAK,EAAI,CACb,GAAI,CAAC,IAAK,KAAO,CACjB,GAAI,CAAC,IAAK,KAAO,CACjB,KAAM,CAAC,IAAK,KAAO,CACnB,IAAK,CAAC,IAAK,GAAK,CAEhB,EAAG,CAAC,IAAK,EAAI,CACb,GAAI,CAAC,IAAK,IAAM,CAEhB,GAAI,CAAC,IAAK,EAAI,CACd,EAAG,CAAC,IAAK,EAAI,CACb,GAAI,CAAC,IAAK,IAAM,CAChB,GAAI,CAAC,IAAK,IAAM,CAChB,GAAI,CAAC,IAAK,IAAM,CAChB,GAAI,CAAC,IAAK,aAAO,CACjB,IAAK,CAAC,IAAK,KAAM,CACjB,IAAK,CAAC,IAAK,KAAM,CACjB,KAAM,CAAC,IAAK,KAAM,CAClB,KAAM,CAAC,IAAK,KAAM,CAClB,KAAM,CAAC,IAAK,KAAK,CACjB,IAAK,CAAC,IAAK,MAAM,CAEjB,EAAG,CAAC,IAAK,EAAI,CACb,GAAI,CAAC,IAAK,KAAO,CACjB,GAAI,CAAC,IAAK,IAAM,CAChB,GAAI,CAAC,IAAK,IAAM,CAChB,GAAI,CAAC,IAAK,IAAM,CAChB,GAAI,CAAC,IAAK,aAAO,CAEjB,EAAG,CAAC,IAAK,EAAI,CACb,GAAI,CAAC,IAAK,KAAO,CACjB,GAAI,CAAC,IAAK,IAAM,CAEhB,EAAG,CAAC,IAAK,EAAI,CACb,GAAI,CAAC,IAAK,KAAO,CAEjB,GAAI,CAAC,KAAM,EAAI,CACf,IAAK,CAAC,KAAM,IAAM,CAClB,IAAK,CAAC,KAAM,IAAM,CAClB,IAAK,CAAC,KAAM,IAAM,CAElB,GAAI,CAAC,KAAM,EAAI,CACf,IAAK,CAAC,KAAM,IAAM,CAElB,IAAK,CAAC,IAAK,EAAI,CAEf,IAAK,CAAC,IAAK,EAAI,CACf,IAAK,CAAC,IAAK,GAAI,CAChB,CAEY,EAAe,IAAI,IAAI,OAAO,KAAK,EAAS,CAAC,CAK1D,SAAS,GAA0B,CAEjC,IAAM,EADQ,OAAO,KAAK,EAAS,CAAC,MAAM,EAAG,IAAM,EAAE,OAAS,EAAE,OAAO,CAChD,IAAK,GAAS,CACnC,IAAM,EAAU,EAAK,QAAQ,sBAAuB,OAAO,CAE3D,OAAO,EAAK,EAAK,OAAS,GAAG,MAAM,WAAW,CAAG,GAAG,EAAQ,SAAW,GACvE,CACF,OAAW,OAAO,KAAK,EAAS,KAAK,IAAI,CAAC,GAAI,IAAI,CAGpD,MAAa,EAAiB,GAAiB,CAK/C,SAAS,IAA6B,CAEpC,IAAM,EADU,OAAO,KAAK,EAAiB,CAAC,MAAM,EAAG,IAAM,EAAE,OAAS,EAAE,OAAO,CACxD,IAAK,GAAM,EAAE,QAAQ,sBAAuB,OAAO,CAAC,CAC7E,OAAW,OAAO,KAAK,EAAS,KAAK,IAAI,CAAC,GAAI,IAAI,CAGtB,IAAoB,CAKlD,SAAS,IAAiC,CAIxC,IAAM,EAHW,CAAC,GAAG,OAAO,KAAK,EAAS,CAAE,GAAG,OAAO,KAAK,EAAiB,CAAC,CAAC,MAC3E,EAAG,IAAM,EAAE,OAAS,EAAE,OACxB,CACyB,IAAK,GAAS,CACtC,IAAM,EAAU,EAAK,QAAQ,sBAAuB,OAAO,CAC3D,OAAO,EAAK,EAAK,OAAS,GAAG,MAAM,WAAW,CAAG,GAAG,EAAQ,SAAW,GACvE,CACF,OAAW,OAAO,IAAI,EAAS,KAAK,IAAI,CAAC,IAAI,CAGrB,IAAwB,CCnMlD,MAAa,GAAc,2CACd,GAAW,mCACX,GAAU,2BAGV,GAAW,uCACX,GAAW,+CACX,GAAU,0BACV,EAAU,4BAWV,EAAqB,0BAGrB,EAAQ,2CAGR,EAAa,mDAGb,EAAS,gEAGT,EAAU,SAGV,EAAW,gBAGX,EAAgB,2DAkF7B,SAAgB,EAAe,EAAoB,CACjD,OAAO,EAAc,KAAK,EAAE,CAM9B,MAAa,EAAgB,+CAMhB,EAAW,sCAIX,EAAM,0BAIN,EAAgB,8BAIhB,EAAgB,qEAChB,EAAmB,6DAInB,EAAiB,6BACjB,EAAgB,4BAChB,EAAgB,4BAKhB,EAAkB,IAAI,IAAI,gWAuHtC,CAAC,CAMF,SAAgB,GAAkB,EAAyB,CAIzD,GAHI,EAAO,SAAW,IAGlB,CAAC,WAAW,KAAK,EAAO,CAAE,MAAO,GAGrC,IAAM,EAAK,SAAS,EAAO,GAAI,GAAG,CAC5B,EAAK,SAAS,EAAO,GAAI,GAAG,CAC5B,EAAK,SAAS,EAAO,GAAI,GAAG,CAC5B,EAAK,SAAS,EAAO,GAAI,GAAG,CAC5B,EAAK,SAAS,EAAO,GAAI,GAAG,CAC5B,EAAK,SAAS,EAAO,GAAI,GAAG,CAC5B,EAAK,SAAS,EAAO,GAAI,GAAG,CAC5B,EAAK,SAAS,EAAO,GAAI,GAAG,CAC5B,EAAa,SAAS,EAAO,GAAI,GAAG,CACpC,EAAU,SAAS,EAAO,GAAI,GAAG,CAGvC,GAAI,IAAY,GAAK,IAAY,EAAG,MAAO,GAG3C,IAAM,EAAM,EAAK,GAAK,EAChB,EAAQ,EAAK,GAAK,EAIlB,EADY,EAAM,GACM,EAAM,GAAK,EAEzC,GADI,EAAY,GAAK,EAAY,IAC7B,EAAQ,GAAK,EAAQ,GAAI,MAAO,GAIpC,IAAM,GADM,EAAI,EAAK,EAAI,EAAK,EAAI,EAAK,EAAI,EAAK,EAAI,EAAK,EAAI,EAAK,EAAI,EAAK,EAAI,GACvD,GAClB,EAAW,IAAc,EAAI,EAAI,GAAK,EAK5C,OAFI,IAAa,GAAW,GAErB,IAAe,EAQxB,SAAgB,EAAgB,EAAoB,CAElD,GAAI,CAAC,EAAS,KAAK,EAAE,CAAE,MAAO,GAG9B,IAAM,EAAe,sBACjB,EACA,EAAe,EAEnB,MAAQ,EAAQ,EAAa,KAAK,EAAE,IAAM,MAAM,CAC9C,IAAM,EAAU,EAAM,GACtB,GAAI,CAAC,EAAgB,IAAI,EAAQ,CAC/B,MAAO,GAET,IAIF,OAAO,GAAgB,ECpSzB,SAAgB,EAAiB,EAAsB,CACrD,IAAI,EAAS,EACb,IAAK,GAAM,CAAC,EAAM,KAAO,OAAO,QAAQ,EAAqB,CAC3D,EAAS,EAAO,WAAW,EAAM,EAAG,CAEtC,OAAO,EAMT,SAAS,EAAY,EAAW,EAAW,EAAoB,CAS7D,MAJA,EAJI,EAAI,MAAQ,EAAI,MAChB,EAAI,GAAK,EAAI,IACb,EAAI,GAAK,EAAI,EAAc,IAE3B,IAAM,GAAK,IAAM,IAEf,EADY,EAAI,GAAM,GAAK,EAAI,KAAQ,GAAM,EAAI,KAAQ,IASjE,SAAS,GAAa,EAA+B,CAInD,OAHI,EAAqB,IAAI,EAAK,CAAS,OACvC,EAAsB,IAAI,EAAK,CAAS,QACxC,EAAqB,IAAI,EAAK,CAAS,OACpC,SAMT,SAAS,EAAM,EAAc,EAA4B,CACvD,IAAM,EAAO,GAAc,EAE3B,MAAO,CAAE,KAAM,cAAe,OAAM,WAAY,EAAM,SADrC,EAAK,SAAW,EAAI,GAAa,EAAK,CAAG,SACM,CAMlE,SAAS,EAAY,EAA4B,CAE/C,IAAI,EAAQ,EAAc,KAAK,EAAE,CACjC,GAAI,EAAO,CACT,IAAM,EAAI,SAAS,EAAM,GAAI,GAAG,CAC1B,EAAK,SAAS,EAAM,GAAI,GAAG,CAC3B,EAAI,SAAS,EAAM,GAAI,GAAG,CAC1B,EAAI,SAAS,EAAM,GAAI,GAAG,CAC1B,EAAK,SAAS,EAAM,GAAI,GAAG,CAC3B,EAAI,SAAS,EAAM,GAAI,GAAG,CAChC,GAAI,EAAY,EAAG,EAAI,EAAE,EAAI,GAAK,GAAK,EAAI,IAAM,GAAM,GAAK,EAAK,IAAM,GAAK,GAAK,EAAI,GACnF,MAAO,CACL,CACE,KAAM,YACN,KAAM,EAAM,GACZ,KAAM,EACN,MAAO,EACP,IAAK,EACL,KAAM,EACN,OAAQ,EACR,OAAQ,EACT,CACD,EAAM,GAAG,OACV,CAML,GADA,EAAQ,EAAiB,KAAK,EAAE,CAC5B,EAAO,CACT,IAAM,EAAI,SAAS,EAAM,GAAI,GAAG,CAC1B,EAAK,SAAS,EAAM,GAAI,GAAG,CAC3B,EAAI,SAAS,EAAM,GAAI,GAAG,CAC1B,EAAI,SAAS,EAAM,GAAI,GAAG,CAC1B,EAAK,SAAS,EAAM,GAAI,GAAG,CACjC,GAAI,EAAY,EAAG,EAAI,EAAE,EAAI,GAAK,GAAK,EAAI,IAAM,GAAM,GAAK,EAAK,GAC/D,MAAO,CACL,CACE,KAAM,YACN,KAAM,EAAM,GACZ,KAAM,EACN,MAAO,EACP,IAAK,EACL,KAAM,EACN,OAAQ,EACR,OAAQ,EACT,CACD,EAAM,GAAG,OACV,CAML,GADA,EAAQ,GAAY,KAAK,EAAE,CACvB,EAAO,CACT,IAAM,EAAI,SAAS,EAAM,GAAI,GAAG,CAC1B,EAAI,SAAS,EAAM,GAAI,GAAG,CAC1B,EAAI,SAAS,EAAM,GAAI,GAAG,CAChC,GAAI,GAAK,GAAK,EAAI,IAAM,GAAK,GAAK,EAAI,IAAM,GAAK,GAAK,EAAI,GACxD,MAAO,CAAC,CAAE,KAAM,OAAQ,KAAM,EAAM,GAAI,KAAM,EAAG,OAAQ,EAAG,OAAQ,EAAG,CAAE,EAAM,GAAG,OAAO,CAM7F,GADA,EAAQ,GAAS,KAAK,EAAE,CACpB,EAAO,CACT,IAAM,EAAI,SAAS,EAAM,GAAI,GAAG,CAC1B,EAAI,SAAS,EAAM,GAAI,GAAG,CAC1B,EAAI,SAAS,EAAM,GAAI,GAAG,CAChC,GAAI,GAAK,GAAK,EAAI,IAAM,GAAK,GAAK,EAAI,IAAM,GAAK,GAAK,EAAI,GACxD,MAAO,CAAC,CAAE,KAAM,OAAQ,KAAM,EAAM,GAAI,KAAM,EAAG,OAAQ,EAAG,OAAQ,EAAG,CAAE,EAAM,GAAG,OAAO,CAM7F,GADA,EAAQ,GAAQ,KAAK,EAAE,CACnB,EAAO,CACT,IAAM,EAAI,SAAS,EAAM,GAAI,GAAG,CAC1B,EAAI,SAAS,EAAM,GAAI,GAAG,CAChC,GAAI,GAAK,GAAK,EAAI,IAAM,GAAK,GAAK,EAAI,GACpC,MAAO,CAAC,CAAE,KAAM,OAAQ,KAAM,EAAM,GAAI,KAAM,EAAG,OAAQ,EAAG,OAAQ,EAAG,CAAE,EAAM,GAAG,OAAO,CAM7F,GADA,EAAQ,GAAS,KAAK,EAAE,CACpB,EAAO,CACT,IAAM,EAAI,SAAS,EAAM,GAAI,GAAG,CAC1B,EAAI,SAAS,EAAM,GAAI,GAAG,CAC1B,EAAI,SAAS,EAAM,GAAI,GAAG,CAChC,GAAI,EAAY,EAAG,EAAG,EAAE,CACtB,MAAO,CAAC,CAAE,KAAM,OAAQ,KAAM,EAAM,GAAI,KAAM,EAAG,MAAO,EAAG,IAAK,EAAG,CAAE,EAAM,GAAG,OAAO,CAMzF,GADA,EAAQ,EAAI,KAAK,EAAE,CACf,EAAO,CACT,IAAM,EAAS,EAAM,GAAK,EAAM,GAChC,GAAI,GAAkB,EAAO,CAC3B,MAAO,CAAC,CAAE,KAAM,MAAO,KAAM,EAAM,GAAI,MAAO,EAAQ,CAAE,EAAM,GAAG,OAAO,CAO5E,GADA,EAAQ,EAAc,KAAK,EAAE,CACzB,EACF,MAAO,CAAC,CAAE,KAAM,eAAgB,KAAM,EAAM,GAAI,CAAE,EAAM,GAAG,OAAO,CAKpE,IAAM,EAAa,EAAE,MAAM,iBAAiB,CAC5C,GAAI,EAAY,CACd,IAAM,EAAS,EAAW,GAC1B,MAAO,CAAC,CAAE,KAAM,QAAS,KAAM,EAAW,GAAI,GAAI,GAAI,SAAQ,CAAE,EAAW,GAAG,OAAO,CAKvF,GADA,EAAQ,GAAS,KAAK,EAAE,CACpB,EAAO,CACT,IAAI,EAAI,SAAS,EAAM,GAAI,GAAG,CAC1B,EAAI,SAAS,EAAM,GAAI,GAAG,CAC1B,EAAI,SAAS,EAAM,GAAI,GAAG,CAS9B,GAPI,GAAK,KACP,GAAK,EAAI,GAAK,KAAO,KAGnB,EAAI,IAAM,GAAK,KACjB,CAAC,EAAG,GAAK,CAAC,EAAG,EAAE,EAEb,EAAY,EAAG,EAAG,EAAE,CACtB,MAAO,CAAC,CAAE,KAAM,OAAQ,KAAM,EAAM,GAAI,KAAM,EAAG,MAAO,EAAG,IAAK,EAAG,CAAE,EAAM,GAAG,OAAO,CAMzF,GADA,EAAQ,GAAQ,KAAK,EAAE,CACnB,EAAO,CACT,IAAM,EAAI,SAAS,EAAM,GAAI,GAAG,CAC1B,EAAI,SAAS,EAAM,GAAI,GAAG,CAChC,GAAI,GAAK,GAAK,GAAK,IAAM,GAAK,GAAK,GAAK,EAAc,GACpD,MAAO,CAAC,CAAE,KAAM,UAAW,KAAM,EAAM,GAAI,KAAM,EAAG,MAAO,EAAG,IAAK,EAAG,CAAE,EAAM,GAAG,OAAO,CAM5F,GADA,EAAQ,EAAQ,KAAK,EAAE,CACnB,EAAO,CACT,IAAM,EAAI,SAAS,EAAM,GAAI,GAAG,CAC1B,EAAI,SAAS,EAAM,GAAI,GAAG,CAChC,GAAI,GAAK,MAAQ,GAAK,MAAQ,GAAK,GAAK,GAAK,GAC3C,MAAO,CAAC,CAAE,KAAM,UAAW,KAAM,EAAM,GAAI,KAAM,EAAG,MAAO,EAAG,IAAK,EAAG,CAAE,EAAM,GAAG,OAAO,CAM5F,GADA,EAAQ,EAAmB,KAAK,EAAE,CAC9B,EAAO,CACT,IAAM,EAAS,EAAM,GAErB,GAAI,CAAC,EAAa,IAAI,EAAO,CAAE,CAC7B,IAAM,EAAI,SAAS,EAAM,GAAI,GAAG,CAChC,MAAO,CAAC,CAAE,KAAM,aAAc,KAAM,EAAM,GAAI,MAAO,EAAG,SAAQ,CAAE,EAAM,GAAG,OAAO,EAKtF,IAAM,EAAiB,EAAE,MAAM,mCAAmC,CAClE,GAAI,EAAgB,CAClB,IAAM,EAAU,EAAe,GACzB,EAAO,EAAE,MAAM,EAAQ,OAAO,CAC9B,EAAY,EAAe,KAAK,EAAK,CAC3C,GAAI,EAAW,CACb,IAAM,EAAO,EAAU,GACjB,EAAW,EAAU,EACrB,EAAQ,WAAW,EAAQ,QAAQ,MAAO,GAAG,CAAC,QAAQ,IAAK,IAAI,CAAC,CACtE,GAAI,KAAQ,EAEV,MAAO,CAAC,CAAE,KAAM,SAAU,KAAM,EAAU,QAAO,SADrC,EAAiB,GACmC,CAAE,EAAS,OAAO,CAEpF,GAAM,CAAC,GAAY,EAAS,GAI5B,OAHI,IAAS,KAAO,IAAS,IACpB,CAAC,CAAE,KAAM,UAAW,KAAM,EAAU,QAAO,CAAE,EAAS,OAAO,CAE/D,CAAC,CAAE,KAAM,cAAe,KAAM,EAAU,QAAO,KAAM,EAAU,CAAE,EAAS,OAAO,EAM5F,IAAM,EAAS,EAAE,MAAM,yCAAyC,CAChE,GAAI,GAAU,EAAO,GAAG,SAAS,IAAI,CAAE,CACrC,IAAM,EAAQ,WAAW,EAAO,GAAG,QAAQ,MAAO,GAAG,CAAC,QAAQ,IAAK,IAAI,CAAC,CACxE,MAAO,CAAC,CAAE,KAAM,SAAU,KAAM,EAAO,GAAI,QAAO,CAAE,EAAO,GAAG,OAAO,CAIvE,IAAM,EAAS,EAAE,MAAM,yCAAyC,CAChE,GAAI,IAAW,EAAO,GAAG,SAAS,IAAI,EAAI,EAAO,GAAG,SAAS,IAAI,EAAG,CAClE,IAAM,EAAQ,WAAW,EAAO,GAAG,QAAQ,KAAM,GAAG,CAAC,CACrD,MAAO,CAAC,CAAE,KAAM,SAAU,KAAM,EAAO,GAAI,QAAO,CAAE,EAAO,GAAG,OAAO,CAIvE,IAAM,EAAW,EAAE,MAAM,oBAAoB,CAC7C,GAAI,EAAU,CACZ,IAAM,EAAQ,SAAS,EAAS,GAAI,GAAG,CACvC,MAAO,CAAC,CAAE,KAAM,SAAU,KAAM,EAAS,GAAI,QAAO,CAAE,EAAS,GAAG,OAAO,CAI3E,MAAO,CAAC,CAAE,KAAM,UAAW,KAAM,EAAE,GAAI,CAAE,EAAE,CAM7C,SAAU,GAAc,EAA6B,CAEnD,GAAI,CAAC,EAAG,CACN,KAAM,CAAE,KAAM,UAAW,KAAM,KAAM,CACrC,OAIF,GAAI,cAAc,KAAK,EAAE,EAAI,EAAa,IAAI,EAAE,CAAE,CAEhD,GAAI,EAAgB,EAAE,CAAE,CACtB,KAAM,CAAE,KAAM,WAAY,KAAM,EAAG,CACnC,OAEF,KAAM,CAAE,KAAM,OAAQ,KAAM,EAAG,CAC/B,OAIF,GAAI,EAAE,WAAW,IAAI,EAAI,EAAE,QAAU,GAAI,CACvC,IAAM,EAAW,EAAc,KAAK,EAAE,CACtC,GAAI,EAAU,CACZ,IAAM,EAAK,EAAS,GACd,EAAS,EAAS,GAAK,EAAS,GAGtC,GAFA,KAAM,CAAE,KAAM,QAAS,KAAM,EAAS,GAAI,KAAI,SAAQ,CACtD,EAAI,EAAE,MAAM,EAAS,GAAG,OAAO,CAC3B,CAAC,EAAG,QAKZ,GAAI,EAAE,OAAS,GAAK,EAAY,IAAI,EAAE,GAAG,EAAI,EAAO,IAAI,EAAE,GAAG,CAAE,CAC7D,GAAM,CAAC,EAAO,GAAS,EAAY,EAAE,CAGrC,GAFA,MAAM,EACN,EAAI,EAAE,MAAM,EAAM,CACd,CAAC,EAAG,OAIV,GAAI,EAAE,OAAS,GAAKC,KAAkB,SAAS,EAAE,GAAG,EAAI,SAAS,KAAK,EAAE,GAAG,CAAE,CAC3E,IAAI,EAAI,EACR,KAAO,EAAI,EAAE,QAAU,SAAS,KAAK,EAAE,GAAG,EAAE,IAC5C,IAAM,EAAO,EAAE,MAAM,EAAG,EAAE,EAExB,EAAK,MAAM,EAAE,CAAC,aAAa,GAAK,EAAK,MAAM,EAAE,EAC5C,EAAI,GAAK,EAAK,MAAM,EAAE,CAAC,aAAa,GAAK,EAAK,MAAM,EAAE,IAEvD,KAAM,CAAE,KAAM,OAAQ,KAAM,EAAM,CAClC,EAAI,EAAE,MAAM,EAAE,EAKlB,GAAI,EAAE,QAAU,EAAG,CACjB,GAAI,EAAc,SAAS,EAAE,GAAG,EAAI,EAAc,SAAS,EAAE,EAAE,OAAS,GAAG,CAAE,CAC3E,IAAM,EAAQ,EAAE,MAAM,EAAG,GAAG,CAC5B,GAAI,cAAc,KAAK,EAAM,CAAE,CAC7B,MAAM,EAAM,EAAE,GAAI,IAAkB,CACpC,KAAM,CAAE,KAAM,OAAQ,KAAM,EAAO,CACnC,MAAM,EAAM,EAAE,EAAE,OAAS,GAAI,IAAmB,CAChD,QAGJ,GAAI,EAAc,SAAS,EAAE,GAAG,EAAI,EAAc,SAAS,EAAE,EAAE,OAAS,GAAG,CAAE,CAC3E,IAAM,EAAQ,EAAE,MAAM,EAAG,GAAG,CAC5B,GAAI,cAAc,KAAK,EAAM,CAAE,CAC7B,MAAM,EAAM,EAAE,GAAI,IAAkB,CACpC,KAAM,CAAE,KAAM,OAAQ,KAAM,EAAO,CACnC,MAAM,EAAM,EAAE,EAAE,OAAS,GAAI,IAAmB,CAChD,SAiBN,IAXI,EAAE,OAAS,IACT,EAAc,SAAS,EAAE,GAAG,EAC9B,MAAM,EAAM,EAAE,GAAI,IAAkB,CACpC,EAAI,EAAE,MAAM,EAAE,EACL,EAAc,SAAS,EAAE,GAAG,GACrC,MAAM,EAAM,EAAE,GAAI,IAAkB,CACpC,EAAI,EAAE,MAAM,EAAE,GAKX,GAAG,CAER,KAAO,GAAK,EAAgB,IAAI,EAAE,GAAG,EAAE,CAErC,GAAI,EAAE,WAAW,IAAI,CAAE,CACrB,IAAM,EAAe,EAAe,KAAK,EAAE,CAC3C,GAAI,EAAc,CAChB,KAAM,CAAE,KAAM,UAAW,KAAM,EAAa,GAAI,CAChD,EAAI,EAAE,MAAM,EAAa,GAAG,OAAO,CACnC,SAEF,IAAM,EAAc,EAAc,KAAK,EAAE,CACzC,GAAI,EAAa,CACf,KAAM,CAAE,KAAM,SAAU,KAAM,EAAY,GAAI,CAC9C,EAAI,EAAE,MAAM,EAAY,GAAG,OAAO,CAClC,SAEF,IAAM,EAAc,EAAc,KAAK,EAAE,CACzC,GAAI,EAAa,CACf,KAAM,CAAE,KAAM,SAAU,KAAM,EAAY,GAAI,CAC9C,EAAI,EAAE,MAAM,EAAY,GAAG,OAAO,CAClC,UAIJ,GAAI,EAAE,WAAW,QAAQ,CAAE,CACzB,MAAM,EAAM,QAAS,MAAM,CAC3B,EAAI,EAAE,MAAM,EAAE,CACd,SAEF,GAAI,EAAE,WAAW,MAAM,CAAE,CACvB,MAAM,EAAM,MAAM,CAClB,EAAI,EAAE,MAAM,EAAE,CACd,SAEF,GAAI,EAAE,WAAW,MAAM,CAAE,CACvB,IAAI,EAAO,MACP,EAAO,EAAE,MAAM,EAAE,CACrB,KAAO,EAAK,WAAW,IAAI,EACzB,GAAQ,IACR,EAAO,EAAK,MAAM,EAAE,CAEtB,MAAM,EAAM,EAAM,IAAI,CACtB,EAAI,EACJ,SAEF,GAAI,EAAE,WAAW,IAAI,CAAE,CACrB,MAAM,EAAM,IAAI,CAChB,EAAI,EAAE,MAAM,EAAE,CACd,SAGF,GAAI,IAAM,KAAM,CACd,MAAM,EAAM,KAAM,IAAI,CACtB,EAAI,GACJ,SAEF,GAAI,EAAE,WAAW,KAAK,CAAE,CACtB,MAAM,EAAM,KAAM,IAAkB,CACpC,EAAI,EAAE,MAAM,EAAE,CACd,SAGF,GAAI,IAAM,MAAQ,IAAM,KAAM,CAE5B,MAAM,EAAM,EAAE,CACd,EAAI,GACJ,SAGF,GAAI,EAAQ,SAAS,EAAE,GAAG,CAAE,CAC1B,MAAM,EAAM,EAAE,GAAI,IAAO,CACzB,EAAI,EAAE,MAAM,EAAE,CACd,SAGF,GAAI,EAAc,SAAS,EAAE,GAAG,CAAE,CAChC,MAAM,EAAM,EAAE,GAAI,IAAmB,CACrC,EAAI,EAAE,MAAM,EAAE,CACd,SAEF,GAAI,EAAc,SAAS,EAAE,GAAG,CAAE,CAChC,MAAM,EAAM,EAAE,GAAI,IAAmB,CACrC,EAAI,EAAE,MAAM,EAAE,CACd,SAGF,GAAI,EAAE,WAAW,IAAI,EAAI,EAAE,OAAS,EAAG,CACrC,IAAM,EAAY,EAAQ,KAAK,EAAE,CACjC,GAAI,EAAW,CAET,SAAS,KAAK,EAAU,GAAG,CAC7B,KAAM,CACJ,KAAM,UACN,KAAM,EAAU,GAChB,MAAO,SAAS,EAAU,GAAG,MAAM,EAAE,CAAE,GAAG,CAC3C,CAED,KAAM,CAAE,KAAM,UAAW,KAAM,EAAU,GAAI,CAE/C,EAAI,EAAE,MAAM,EAAU,GAAG,OAAO,CAChC,UAIJ,GAAI,EAAE,WAAW,IAAI,EAAI,EAAE,OAAS,EAAG,CACrC,IAAM,EAAY,EAAS,KAAK,EAAE,CAClC,GAAI,EAAW,CACb,KAAM,CAAE,KAAM,WAAY,KAAM,EAAU,GAAI,SAAU,EAAU,GAAG,MAAM,EAAE,CAAE,CAC/E,EAAI,EAAE,MAAM,EAAU,GAAG,OAAO,CAChC,UAIJ,GAAI,EAAE,WAAW,IAAI,EAAI,EAAE,OAAS,GAAK,EAAO,IAAI,EAAE,GAAG,CAAE,CACzD,IAAM,EAAW,EAAc,KAAK,EAAE,CACtC,GAAI,EAAU,CACZ,IAAM,EAAK,EAAS,GACd,EAAS,EAAS,GAAK,EAAS,GACtC,KAAM,CAAE,KAAM,QAAS,KAAM,EAAS,GAAI,KAAI,SAAQ,CACtD,EAAI,EAAE,MAAM,EAAS,GAAG,OAAO,CAC/B,UAIJ,MAAM,EAAM,EAAE,GAAG,CACjB,EAAI,EAAE,MAAM,EAAE,CAGhB,GAAI,CAAC,EAAG,MAGR,GAAI,EAAE,SAAS,IAAI,CAAE,CACnB,IAAM,EAAa,EAAM,KAAK,EAAE,CAChC,GAAI,EAAY,CACd,KAAM,CAAE,KAAM,QAAS,KAAM,EAAW,GAAI,CAC5C,EAAI,EAAE,MAAM,EAAW,GAAG,OAAO,CACjC,UAKJ,GAAI,EAAW,KAAK,EAAE,CAAE,CAEtB,IAAI,EAAM,EACN,EAAW,GACf,KAAO,GAAO,EAAsB,IAAI,EAAI,EAAI,OAAS,GAAG,EAC1D,EAAW,EAAI,EAAI,OAAS,GAAK,EACjC,EAAM,EAAI,MAAM,EAAG,GAAG,CAExB,KAAM,CAAE,KAAM,MAAO,KAAM,EAAK,CAChC,EAAI,EACJ,SAIF,GAAI,EAAE,QAAU,GAAK,eAAe,KAAK,EAAE,EAAI,EAAE,SAAS,IAAI,CAAE,CAC9D,IAAM,EAAc,EAAO,KAAK,EAAE,CAClC,GAAI,EAAa,CACf,IAAI,EAAS,EAAY,GACrB,EAAW,EAAE,MAAM,EAAO,OAAO,CAErC,KAAO,GAAU,EAAgB,IAAI,EAAO,EAAO,OAAS,GAAG,EAC7D,EAAW,EAAO,EAAO,OAAS,GAAK,EACvC,EAAS,EAAO,MAAM,EAAG,GAAG,CAE9B,GAAI,EAAO,SAAS,IAAI,CAAE,CACxB,KAAM,CAAE,KAAM,SAAU,KAAM,EAAQ,CACtC,EAAI,EACJ,WAMN,GAAI,EAAO,IAAI,EAAE,GAAG,EAAK,EAAY,IAAI,EAAE,GAAG,EAAI,EAAE,OAAS,GAAK,EAAO,IAAI,EAAE,GAAG,CAAG,CACnF,GAAM,CAAC,EAAO,GAAS,EAAY,EAAE,CAKrC,GAJA,MAAM,EACN,EAAI,EAAE,MAAM,EAAM,CAGd,EAAG,CACL,IAAM,EAAY,EAAe,KAAK,EAAE,CACpC,IACF,KAAM,CAAE,KAAM,OAAQ,KAAM,EAAU,GAAI,CAC1C,EAAI,EAAE,MAAM,EAAU,GAAG,OAAO,EAGpC,SAIF,GAAI,UAAU,KAAK,EAAE,CAAE,CACrB,IAAI,EAAI,EACF,EAAe,IAAI,IAAI,CAAC,IAAK,IAAK,IAAK,IAAK,IAAK,IAAQ,IAAS,CAAC,CACnE,EAAe,IAAI,IAAI,CAAC,IAAK,IAAK,IAAI,CAAC,CAE7C,KAAO,EAAI,EAAE,QACX,GAAI,SAAS,KAAK,EAAE,GAAG,CACrB,YACS,EAAO,IAAI,EAAE,GAAG,CAEzB,YACS,EAAa,IAAI,EAAE,GAAG,EAAI,EAAI,EAAI,EAAE,QAAU,SAAS,KAAK,EAAE,EAAI,GAAG,CAC9E,SAEA,MAIA,EAAI,EAAE,QAAU,EAAa,IAAI,EAAE,GAAG,EACxC,IAEF,IAAM,EAAgB,EAAE,MAAM,EAAG,EAAE,CAGnC,GAAI,EAAgB,EAAc,CAAE,CAClC,KAAM,CAAE,KAAM,WAAY,KAAM,EAAe,CAC/C,EAAI,EAAE,MAAM,EAAE,CACd,SAGF,KAAM,CAAE,KAAM,OAAQ,KAAM,EAAe,CAC3C,EAAI,EAAE,MAAM,EAAE,CACd,SAIF,KAAM,CAAE,KAAM,UAAW,KAAM,EAAE,GAAI,CACrC,EAAI,EAAE,MAAM,EAAE,EAOlB,SAAU,GAAoB,EAAc,EAAoD,CAI9F,IAAM,GAHW,EAAyB,EAAiB,EAAK,CAAG,GAGrC,MAAM,UAAU,CAC1C,EAAQ,GAEZ,IAAK,IAAM,KAAa,EAAY,CAC7B,IAEH,KAAM,IAER,EAAQ,GAGR,IAAK,IAAM,KAAQ,EAAU,MAAM,MAAM,CACnC,IACF,MAAM,IASd,SAAgB,GAAI,EAAc,EAAyB,GAAe,CACxE,IAAM,EAAkB,EAAE,CAE1B,IAAK,IAAM,KAAY,GAAoB,EAAM,EAAuB,CACtE,IAAK,IAAM,KAAS,GAAc,EAAS,CACzC,EAAO,KAAK,EAAM,CAItB,OAAO,EClqBT,MAAa,GAAwC,CAEnD,GAAI,QACJ,MAAO,QACP,IAAK,MACL,OAAQ,MACR,GAAI,OACJ,MAAO,OACP,GAAI,SACJ,MAAO,SACP,KAAM,YACN,QAAS,YAGT,GAAI,aACJ,MAAO,aACP,IAAK,kBACL,OAAQ,kBACR,IAAK,sBACL,OAAQ,sBACR,GAAI,iBACJ,MAAO,iBACP,IAAK,eACL,OAAQ,eACR,IAAK,sBACL,OAAQ,sBAGR,IAAK,YACL,QAAS,YACT,MAAO,mBACP,WAAY,mBACZ,IAAK,iBACL,SAAU,iBACV,GAAI,SACJ,OAAQ,SACR,IAAK,kBACL,WAAY,kBACZ,IAAK,WACL,OAAQ,WACR,IAAK,WACL,OAAQ,WACR,IAAK,kBACL,SAAU,kBACV,IAAK,aACL,OAAQ,aACR,GAAI,QACJ,MAAO,QACP,IAAK,YACL,OAAQ,YACR,MAAO,YACP,SAAU,YACV,GAAI,QACJ,MAAO,QACP,GAAI,eACJ,MAAO,eACP,GAAI,cACJ,OAAQ,cAGR,GAAI,UACJ,MAAO,UACP,GAAI,QACJ,MAAO,QAGP,GAAI,sBACJ,OAAQ,sBACR,GAAI,oBACJ,OAAQ,oBACR,GAAI,mBACJ,OAAQ,mBAER,OAAQ,iBACR,IAAK,uBACL,QAAS,uBACT,IAAK,oCACL,IAAK,oCAGL,IAAK,YACL,OAAQ,YACR,IAAK,UACL,OAAQ,UACR,GAAI,WACJ,MAAO,WAGP,EAAG,SACH,KAAM,SACN,EAAG,QACH,KAAM,QACN,EAAG,SACH,KAAM,SACN,EAAG,SACH,KAAM,SACN,GAAI,aACJ,OAAQ,aACR,GAAI,aACJ,OAAQ,aACR,GAAI,YACJ,OAAQ,YACR,GAAI,YACJ,OAAQ,YAGR,IAAK,SACL,OAAQ,SACR,MAAO,UACP,SAAU,UACV,KAAM,UACN,QAAS,UACT,GAAI,aACJ,MAAO,aACP,IAAK,aACL,OAAQ,aACT,CAKY,GAAyB,IAAI,IAAI,CAC5C,OACA,UACA,QACA,MACA,UACA,QACA,MACD,CAAC,CCxHF,SAAgB,GAAiB,EAA0B,CACzD,IAAM,EAAkB,EAAE,CACtB,EAAI,EAER,KAAO,EAAI,EAAO,QAAQ,CACxB,IAAM,EAAQ,EAAO,GACf,EAAO,EAAO,EAAI,GAGxB,GAAI,EAAM,OAAS,QAAU,GAAM,OAAS,eAAiB,EAAK,OAAS,IAAK,CAC9E,IAAM,EAAmB,EAAM,KAAO,IACtC,GAAI,KAAoB,IAAiB,KAAoB,EAAe,CAC1E,EAAO,KAAK,CAAE,KAAM,OAAQ,KAAM,EAAkB,CAAC,CACrD,GAAK,EACL,UAKJ,GAAI,EAAM,OAAS,eAAiB,EAAM,QAAQ,GAAoB,GAAM,OAAS,SAAU,CAC7F,IAAM,EAAM,EAAiB,EAAM,MACnC,EAAO,KAAK,CACV,KAAM,SACN,KAAM,EAAM,KAAO,EAAK,KACxB,MAAO,EAAK,MACZ,SAAU,EACX,CAAC,CACF,GAAK,EACL,SAIF,GAAI,EAAM,OAAS,UAAY,GAAM,OAAS,OAAQ,CACpD,IAAM,EAAe,EAAK,KAC1B,GAAI,EAAgB,IAAI,EAAa,CAAE,CACrC,EAAO,KAAK,CACV,KAAM,SACN,KAAM,EAAM,KAAO,IAAM,EAAK,KAC9B,MAAO,EAAM,MACb,SAAU,EACX,CAAC,CACF,GAAK,EACL,SAGF,GAAI,KAAgB,EAAe,CACjC,IAAM,EAAa,EAAc,GACjC,EAAO,KAAK,CACV,KAAM,SACN,KAAM,EAAM,KAAO,IAAM,EAAK,KAC9B,MAAO,EAAM,MAAQ,EACrB,SAAU,MACX,CAAC,CACF,GAAK,EACL,UAKJ,GACE,EAAM,OAAS,UACf,GAAM,OAAS,QACf,CAAC,UAAW,WAAY,WAAY,gBAAgB,CAAC,SAAS,EAAK,KAAK,aAAa,CAAC,CACtF,CACA,EAAO,KAAK,CACV,KAAM,UACN,KAAM,EAAM,KAAO,IAAM,EAAK,KAC9B,MAAO,EAAM,MACd,CAAC,CACF,GAAK,EACL,SAIF,IAAK,EAAM,OAAS,QAAU,EAAM,OAAS,YAAc,GAAM,OAAS,OAAQ,CAChF,EAAO,KAAK,CACV,KAAM,YACN,KAAM,EAAM,KAAO,IAAM,EAAK,KAC9B,KAAM,EAAM,KACZ,MAAO,EAAM,MACb,IAAK,EAAM,IACX,KAAM,EAAK,KACX,OAAQ,EAAK,OACb,OAAQ,EAAK,OACd,CAAC,CACF,GAAK,EACL,SAIF,EAAO,KAAK,EAAM,CAClB,IAGF,OAAO,EC3FT,SAAS,GAAqB,EAA2B,CAEvD,GAAI,EAAU,OAAS,SAAW,EAAU,OAAS,UACnD,MAAO,GAIT,GAAI,EAAU,OAAS,QAAU,EAAU,KAAK,OAAS,EAAG,CAC1D,IAAM,EAAY,EAAU,KAAK,GACjC,GAAI,IAAc,EAAU,aAAa,EAAI,IAAc,EAAU,aAAa,CAShF,MADA,EANc,EAAU,KAAK,aAAa,GAE7B,GAET,EAAe,EAAU,KAAK,EAE9B,EAAgB,IAAI,EAAU,KAAK,EAK3C,MAAO,GAMT,SAAgB,GAAmB,EAA0B,CAC3D,GAAI,EAAO,SAAW,EAAG,MAAO,EAAE,CAElC,IAAM,EAAkB,EAAE,CACtB,EAAa,GACb,EAAI,EAEF,OAA8B,CAAE,KAAM,UAAW,KAAM,KAAM,EAC7D,OAA4B,CAAE,KAAM,QAAS,KAAM,KAAM,EAE/D,KAAO,EAAI,EAAO,QAAQ,CACxB,IAAM,EAAQ,EAAO,GACf,EAAO,EAAO,EAAI,GAGxB,GAAI,EAAM,OAAS,UAAW,CAC5B,AAEE,KADA,EAAO,KAAK,GAAa,CAAC,CACb,IAGf,IACA,SAUF,GANA,AAEE,KADA,EAAO,KAAK,GAAe,CAAC,CACf,IAIX,EAAM,OAAS,eAAiB,EAAgB,IAAI,EAAM,WAAW,CAAE,CAEzE,GAAI,EAAM,aAAe,KAAO,GAAQ,CAAC,GAAqB,EAAK,CAAE,CACnE,EAAO,KAAK,EAAM,CAClB,IACA,SAIF,IAAI,EAAe,EAAM,KACrB,EAAI,EAAI,EACZ,KAAO,EAAI,EAAO,QAAQ,CACxB,IAAM,EAAU,EAAO,GAEvB,GADI,EAAQ,OAAS,eACjB,CAAC,EAAmB,IAAI,EAAQ,WAAW,CAAE,MACjD,GAAgB,EAAQ,KACxB,IAaF,IATI,EAAI,EAAI,GACV,EAAO,KAAK,CAAE,GAAG,EAAO,KAAM,EAAc,CAAC,CAC7C,EAAI,IAEJ,EAAO,KAAK,EAAM,CAClB,KAIK,EAAI,EAAO,QAAQ,CACxB,IAAM,EAAM,EAAO,GAEnB,GADI,EAAI,OAAS,eACb,CAAC,EAAmB,IAAI,EAAI,WAAW,CAAE,MAC7C,EAAO,KAAK,EAAI,CAChB,IAIF,EAAO,KAAK,GAAa,CAAC,CAC1B,EAAa,GACb,SAIF,EAAO,KAAK,EAAM,CAClB,IAQF,OAJI,GACF,EAAO,KAAK,GAAa,CAAC,CAGrB,EC3GT,MAAM,GAAoB,IAAI,IAAI,CAAC,IAAQ,IAAQ,CAAC,CAKpD,SAAS,GAAS,EAAc,EAAe,GAAsB,CAKnE,OAJI,EAAM,OAAS,QAEf,CAAC,GAAgB,EAAgB,IAAI,EAAM,KAAK,CAAS,KAEtD,EADO,EAAM,KAAK,aAAa,GACd,KAM1B,SAAS,GAAkB,EAAuB,CAChD,OAAO,EAAM,OAAS,eAAiB,GAAkB,IAAI,EAAM,KAAK,CAY1E,SAAS,GAAiB,EAAiB,EAA4C,CACrF,IAAM,EAAoB,EAAE,CACxB,EAAI,EAGR,KAAO,EAAI,EAAO,QAAQ,CACxB,IAAM,EAAO,EAAO,GACd,EAAS,EAAO,EAAI,GAG1B,GAAI,GAAM,OAAS,QAAU,CAAC,GAAU,CAAC,GAAkB,EAAO,CAChE,MAGF,EAAS,KAAK,EAAK,CACnB,EAAS,KAAK,EAAO,CACrB,GAAK,EAGL,IAAM,EAAa,EAAO,GACtB,GAAY,OAAS,eAAiB,EAAW,OAAS,MAC5D,EAAS,KAAK,EAAW,CACzB,KAKJ,GAAI,EAAS,SAAW,EACtB,OAAO,KAIT,IAAM,EAAc,EAAO,GAC3B,GACE,CAAC,GACD,EAAY,OAAS,QACpB,EAAY,KAAK,aAAa,GAAK,MAAQ,EAAY,KAAK,aAAa,GAAK,MAE/E,OAAO,KAIT,IAAM,EAAS,EAAO,EAAI,GAC1B,GAAI,CAAC,GAAU,EAAO,OAAS,OAC7B,OAAO,KAOT,IAAI,EADU,CAAC,GAAG,EAAU,EAAa,EAAO,CAC/B,IAAK,GAAM,EAAE,KAAK,CAAC,KAAK,IAAI,CAG7C,MAFA,GAAO,EAAK,QAAQ,MAAO,IAAI,CAAC,QAAQ,MAAO,IAAI,CAE5C,CAAC,CAAE,KAAM,OAAQ,OAAM,CAAE,EAAI,EAAE,CAMxC,SAAgB,EAAe,EAA0B,CACvD,IAAM,EAAkB,EAAE,CACtB,EAAI,EAER,KAAO,EAAI,EAAO,QAAQ,CACxB,IAAM,EAAQ,EAAO,GACf,EAAO,EAAO,EAAI,GAGlB,EAAW,GAAiB,EAAQ,EAAE,CAC5C,GAAI,EAAU,CACZ,EAAO,KAAK,EAAS,GAAG,CACxB,EAAI,EAAS,GACb,SAIF,GAAI,EAAM,OAAS,QAAU,GAAM,OAAS,eAAiB,EAAK,OAAS,IAAK,CAC9E,IAAM,EAAO,EAAM,KAAK,QAAQ,MAAO,GAAG,CAC1C,GAAI,GAAuB,IAAI,EAAK,CAAE,CAEpC,EAAO,KAAK,CAAE,KAAM,OAAQ,KAAM,EAAM,KAAO,IAAK,CAAC,CACrD,GAAK,EACL,UAKJ,IAAK,EAAM,OAAS,QAAU,EAAM,OAAS,WAAa,GAAM,OAAS,OAAQ,CAC/E,IAAM,GAAM,EAAM,KAAkB,EAAM,OACtC,EAAwB,KAM5B,GALI,EAAI,IAAI,EAAK,KAAK,CACpB,EAAS,CAAC,EACD,EAAG,IAAI,EAAK,KAAK,GAC1B,EAAS,GAEP,IAAW,KAAM,CACnB,IAAI,EAAO,EAAM,KAAO,IAAM,EAAK,KACnC,GAAK,EAED,EAAO,IAAI,OAAS,eAAiB,EAAO,GAAG,OAAS,MAC1D,GAAQ,IACR,KAEF,EAAO,KAAK,CAAE,KAAM,OAAQ,OAAM,MAAO,EAAQ,CAAC,CAClD,UAKJ,IAAK,EAAM,OAAS,WAAa,EAAM,OAAS,WAAa,GAAM,OAAS,OAAQ,CAClF,IAAM,EAAQ,GAAS,EAAM,GAAK,CAClC,GAAI,IAAU,KAAM,CAClB,IAAM,GAAM,EAAM,KAAqB,EAAM,OAC7C,EAAO,KAAK,CACV,KAAM,UACN,KAAM,EAAM,KAAO,IAAM,EAAK,KAC9B,KAAM,EACN,QACA,MACD,CAAC,CACF,GAAK,EACL,UAKJ,IACG,EAAM,OAAS,QAAU,EAAM,OAAS,YACzC,EAAM,OAAS,GACf,GAAM,OAAS,SACf,CACA,IAAM,EAAO,EAAK,MAClB,GAAI,GAAQ,MAAQ,GAAQ,KAAM,CAChC,EAAO,KAAK,CACV,KAAM,UACN,KAAM,EAAM,KAAO,IAAM,EAAK,KAC9B,OACA,MAAO,EAAM,MACb,IAAK,EAAM,IACZ,CAAC,CACF,GAAK,EACL,UAKJ,IACG,EAAM,OAAS,QAAU,EAAM,OAAS,YACzC,EAAM,OAAS,GACf,GAAM,OAAS,OACf,CACA,EAAO,KAAK,CACV,KAAM,UACN,KAAM,EAAM,KAAO,IAAM,EAAK,KAC9B,KAAM,EAAK,MACX,MAAO,EAAM,MACb,IAAK,EAAM,IACZ,CAAC,CACF,GAAK,EACL,SAIF,GACE,EAAM,OAAS,QACf,EAAc,IAAI,EAAM,KAAK,aAAa,CAAC,EAC3C,GAAM,OAAS,OACf,CACA,EAAO,KAAK,CACV,GAAG,EACH,KAAM,EAAM,KAAO,IAAM,EAAK,KAC/B,CAAC,CACF,GAAK,EACL,SAIF,GACE,EAAM,OAAS,QACf,EAAc,IAAI,EAAM,KAAK,aAAa,CAAC,EAC3C,GAAM,OAAS,OACf,CACA,IAAM,EAAY,GAAc,EAAK,KAAK,aAAa,EACvD,GAAI,EAAW,CACb,EAAO,KAAK,CACV,KAAM,OACN,KAAM,EAAM,KAAO,IAAM,EAAK,KAC9B,KAAM,EAAU,GAChB,OAAQ,EAAU,GAClB,OAAQ,EAAU,GACnB,CAAC,CACF,GAAK,EACL,UAKJ,EAAO,KAAK,EAAM,CAClB,IAGF,OAAO,ECnOT,SAAgB,EAAS,EAAc,EAA2B,EAAE,CAAW,CAC7E,GAAM,CAAE,yBAAyB,GAAM,yBAAyB,IAAU,EAItE,EAAS,GAAI,EAAM,EAAuB,CAgB9C,MAbA,GAAS,GAAiB,EAAO,CAGjC,EAAS,EAAe,EAAO,CAG/B,AAIE,EAJE,EACO,GAAmB,EAAO,CAG1B,EAAO,OAAQ,GAAM,EAAE,OAAS,UAAU,CAG9C,EC7BT,SAAgB,GAAmB,EAAwB,CACzD,IAAM,EAAS,EAAS,EAAM,CAAE,uBAAwB,GAAM,CAAC,CACzD,EAAsB,EAAE,CAC1B,EAA4B,EAAE,CAElC,IAAK,IAAM,KAAS,EACd,EAAM,OAAS,UACjB,EAAkB,EAAE,CACX,EAAM,OAAS,SACpB,EAAgB,OAAS,GAC3B,EAAU,KAAK,EAAW,EAAgB,CAAC,CAE7C,EAAkB,EAAE,EACX,EAAM,OAAS,MACxB,EAAgB,KAAK,GAAa,EAAM,CAAC,CAS7C,OAJI,EAAgB,OAAS,GAC3B,EAAU,KAAK,EAAW,EAAgB,CAAC,CAGtC,EAMT,SAAS,GAAa,EAAsB,CAI1C,OAHI,EAAM,OAAS,cACV,EAAM,WAER,EAAM,MAAQ,GAMvB,SAAS,EAAW,EAAyB,CAC3C,GAAI,EAAM,SAAW,EAAG,MAAO,GAE/B,IAAI,EAAS,EAAM,GAEnB,IAAK,IAAI,EAAI,EAAG,EAAI,EAAM,OAAQ,IAAK,CACrC,IAAM,EAAO,EAAM,EAAI,GACjB,EAAO,EAAM,GAGA,GAAe,EAAM,EAAK,CAE3C,GAAU,IAAM,EAEhB,GAAU,EAId,OAAO,EAMT,SAAS,GAAe,EAAc,EAAuB,CAC3D,GAAI,CAAC,GAAQ,CAAC,EAAM,MAAO,GAE3B,IAAM,EAAW,EAAK,EAAK,OAAS,GAC9B,EAAY,EAAK,GA6BvB,MAFA,EAvBqB,IAAI,IAAI,CAAC,IAAK,IAAK,IAAU,IAAU,IAAU,IAAI,CAAC,CAC1D,IAAI,EAAS,EAIT,IAAI,IAAI,CAC3B,IACA,IACA,IACA,IACA,IACA,IACA,IACA,IACA,IACA,IACA,IACA,IACA,IACD,CAAC,CACe,IAAI,EAAU,EAG3B,IAAa,KAAO,IAAc"}
1
+ {"version":3,"file":"index.mjs","names":["COMPOSITE_HYPHENS","COMPOSITE_HYPHENS","mergeSpans"],"sources":["../src/data/constants.ts","../src/data/units.ts","../src/data/patterns.ts","../src/pipeline/lexer.ts","../src/data/abbreviations.ts","../src/pipeline/particles.ts","../src/pipeline/sentences.ts","../src/pipeline/phrases.ts","../src/tokenize.ts","../src/split-sentences.ts"],"sourcesContent":["/**\n * Unicode replacements for composite glyphs\n */\nexport const UNICODE_REPLACEMENTS: Record<string, string> = {\n // Vowel + combining acute accent (U+0301)\n \"a\\u0301\": \"\\u00E1\", // á\n \"e\\u0301\": \"\\u00E9\", // é\n \"i\\u0301\": \"\\u00ED\", // í\n \"o\\u0301\": \"\\u00F3\", // ó\n \"u\\u0301\": \"\\u00FA\", // ú\n \"y\\u0301\": \"\\u00FD\", // ý\n \"A\\u0301\": \"\\u00C1\", // Á\n \"E\\u0301\": \"\\u00C9\", // É\n \"I\\u0301\": \"\\u00CD\", // Í\n \"O\\u0301\": \"\\u00D3\", // Ó\n \"U\\u0301\": \"\\u00DA\", // Ú\n \"Y\\u0301\": \"\\u00DD\", // Ý\n // Vowel + combining diaeresis (U+0308)\n \"a\\u0308\": \"\\u00E4\", // ä\n \"e\\u0308\": \"\\u00EB\", // ë\n \"o\\u0308\": \"\\u00F6\", // ö\n \"u\\u0308\": \"\\u00FC\", // ü\n \"A\\u0308\": \"\\u00C4\", // Ä\n \"E\\u0308\": \"\\u00CB\", // Ë\n \"O\\u0308\": \"\\u00D6\", // Ö\n \"U\\u0308\": \"\\u00DC\", // Ü\n // Remove unwanted characters\n \"\\u00AD\": \"\", // Soft hyphen\n \"\\u200B\": \"\", // Zero-width space\n \"\\uFEFF\": \"\", // Zero-width nbsp (BOM)\n};\n\n/**\n * Hyphen characters\n */\nexport const HYPHEN = \"-\";\nexport const EN_DASH = \"\\u2013\"; // –\nexport const EM_DASH = \"\\u2014\"; // —\nexport const HYPHENS = HYPHEN + EN_DASH + EM_DASH;\nexport const COMPOSITE_HYPHENS = HYPHEN + EN_DASH;\n\n/**\n * Punctuation character sets\n * Using Unicode escapes to avoid parsing issues\n */\n// Left: ( [ „ ‚ « # $ € £ ¥ ₽ <\nexport const LEFT_PUNCTUATION = \"([\\u201E\\u201A\\u00AB#$\\u20AC\\u00A3\\u00A5\\u20BD<\";\n// Right: . , : ; ) ] ! % ‰ ? » \" ' ‛ ' … > °\nexport const RIGHT_PUNCTUATION = \".,:;)]!%\\u2030?\\u00BB\\u201C\\u2019\\u201B\\u2018\\u2026>\\u00B0\";\n// Center: \" * • & + = @ © |\nexport const CENTER_PUNCTUATION = '\"*\\u2022&+=@\\u00A9|';\n// None: ^ / ± ' ´ ~ \\ -\nexport const NONE_PUNCTUATION = \"^/\\u00B1'\\u00B4~\\\\\" + HYPHENS;\nexport const PUNCTUATION =\n LEFT_PUNCTUATION + CENTER_PUNCTUATION + RIGHT_PUNCTUATION + NONE_PUNCTUATION;\n\nexport const PUNCTUATION_SET = new Set(PUNCTUATION);\nexport const LEFT_PUNCTUATION_SET = new Set(LEFT_PUNCTUATION);\nexport const RIGHT_PUNCTUATION_SET = new Set(RIGHT_PUNCTUATION);\nexport const NONE_PUNCTUATION_SET = new Set(NONE_PUNCTUATION);\n\n/**\n * Quote characters\n */\n// Single quotes: ' ‚ ‛ ' ´\nexport const SINGLE_QUOTES = \"'\\u201A\\u201B\\u2019\\u00B4\";\n// Double quotes: \" \" „ \" « »\nexport const DOUBLE_QUOTES = '\"\\u201C\\u201E\\u201D\\u00AB\\u00BB';\n\n/**\n * Normalized quote characters (for output)\n */\nexport const OPEN_DOUBLE_QUOTE = \"\\u201E\"; // „\nexport const CLOSE_DOUBLE_QUOTE = \"\\u201C\"; // \"\nexport const OPEN_SINGLE_QUOTE = \"\\u201A\"; // ‚\nexport const CLOSE_SINGLE_QUOTE = \"\\u2019\"; // '\n\n/**\n * Sentence-ending punctuation\n */\nexport const END_OF_SENTENCE = new Set([\".\", \"?\", \"!\", \"\\u2026\"]); // … = ellipsis\nexport const SENTENCE_FINISHERS = new Set([\n \")\",\n \"]\",\n \"\\u201C\", // \"\n \"\\u00BB\", // »\n \"\\u201D\", // \"\n \"\\u2019\", // '\n '\"',\n \"[\\u2026]\", // […]\n]);\n\n/**\n * Punctuation that may occur inside words\n */\nexport const PUNCT_INSIDE_WORD = new Set([\n \".\",\n \"'\",\n \"\\u2019\", // '\n \"\\u00B4\", // ´\n \"\\u2018\", // '\n HYPHEN,\n EN_DASH,\n]);\nexport const PUNCT_ENDING_WORD = new Set([\"'\", \"\\u00B2\", \"\\u00B3\"]); // ² ³\nexport const PUNCT_COMBINATIONS = new Set([\"?\", \"!\", \"\\u2026\"]); // …\n\n/**\n * Digit-related sets\n */\nexport const DIGITS = new Set(\"0123456789\");\nexport const SIGN_PREFIX = new Set([\"+\", \"-\"]);\n\n/**\n * Icelandic month names to month numbers\n */\nexport const MONTHS: Record<string, number> = {\n \"jan\\u00FAar\": 1, // janúar\n \"jan\\u00FAars\": 1, // janúars\n \"febr\\u00FAar\": 2, // febrúar\n \"febr\\u00FAars\": 2, // febrúars\n mars: 3,\n \"apr\\u00EDl\": 4, // apríl\n \"apr\\u00EDls\": 4, // apríls\n \"ma\\u00ED\": 5, // maí\n \"ma\\u00EDs\": 5, // maís\n \"j\\u00FAn\\u00ED\": 6, // júní\n \"j\\u00FAn\\u00EDs\": 6, // júnís\n \"j\\u00FAl\\u00ED\": 7, // júlí\n \"j\\u00FAl\\u00EDs\": 7, // júlís\n \"\\u00E1g\\u00FAst\": 8, // ágúst\n \"\\u00E1g\\u00FAsts\": 8, // ágústs\n september: 9,\n septembers: 9,\n \"okt\\u00F3ber\": 10, // október\n \"okt\\u00F3bers\": 10, // októbers\n \"n\\u00F3vember\": 11, // nóvember\n \"n\\u00F3vembers\": 11, // nóvembers\n desember: 12,\n desembers: 12,\n // Abbreviated forms\n \"jan.\": 1,\n \"feb.\": 2,\n \"mar.\": 3,\n \"apr.\": 4,\n \"j\\u00FAn.\": 6, // jún.\n \"j\\u00FAl.\": 7, // júl.\n \"\\u00E1g.\": 8, // ág.\n \"\\u00E1g\\u00FA.\": 8, // ágú.\n \"sep.\": 9,\n \"sept.\": 9,\n \"okt.\": 10,\n \"n\\u00F3v.\": 11, // nóv.\n \"des.\": 12,\n jan: 1,\n feb: 2,\n mar: 3,\n apr: 4,\n \"j\\u00FAn\": 6, // jún\n \"j\\u00FAl\": 7, // júl\n \"\\u00E1g\": 8, // ág\n \"\\u00E1g\\u00FA\": 8, // ágú\n sep: 9,\n sept: 9,\n okt: 10,\n \"n\\u00F3v\": 11, // nóv\n des: 12,\n};\n\n/**\n * Month name blacklist (Ágúst is also a masculine name)\n */\nexport const MONTH_BLACKLIST = new Set([\"\\u00C1g\\u00FAst\"]); // Ágúst\n\n/**\n * Max days in each month (index 0 unused, 1=January)\n */\nexport const DAYS_IN_MONTH = [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31];\n\n/**\n * Clock abbreviations\n */\nexport const CLOCK_ABBREVS = new Set([\"kl\", \"kl.\", \"klukkan\"]);\n\n/**\n * Time expressions spelled out in Icelandic\n */\nexport const CLOCK_NUMBERS: Record<string, [number, number, number]> = {\n eitt: [1, 0, 0],\n \"tv\\u00F6\": [2, 0, 0], // tvö\n \"\\u00FErj\\u00FA\": [3, 0, 0], // þrjú\n \"fj\\u00F6gur\": [4, 0, 0], // fjögur\n fimm: [5, 0, 0],\n sex: [6, 0, 0],\n \"sj\\u00F6\": [7, 0, 0], // sjö\n \"\\u00E1tta\": [8, 0, 0], // átta\n \"n\\u00EDu\": [9, 0, 0], // níu\n \"t\\u00EDu\": [10, 0, 0], // tíu\n ellefu: [11, 0, 0],\n \"t\\u00F3lf\": [12, 0, 0], // tólf\n \"h\\u00E1lfeitt\": [12, 30, 0], // hálfeitt\n \"h\\u00E1lftv\\u00F6\": [1, 30, 0], // hálftvö\n \"h\\u00E1lf\\u00FErj\\u00FA\": [2, 30, 0], // hálfþrjú\n \"h\\u00E1lffj\\u00F6gur\": [3, 30, 0], // hálffjögur\n \"h\\u00E1lffimm\": [4, 30, 0], // hálffimm\n \"h\\u00E1lfsex\": [5, 30, 0], // hálfsex\n \"h\\u00E1lfsj\\u00F6\": [6, 30, 0], // hálfsjö\n \"h\\u00E1lf\\u00E1tta\": [7, 30, 0], // hálfátta\n \"h\\u00E1lfn\\u00EDu\": [8, 30, 0], // hálfníu\n \"h\\u00E1lft\\u00EDu\": [9, 30, 0], // hálftíu\n \"h\\u00E1lfellefu\": [10, 30, 0], // hálfellefu\n \"h\\u00E1lft\\u00F3lf\": [11, 30, 0], // hálftólf\n};\n\n/**\n * Before/After Common Era markers\n */\nexport const CE = new Set([\"e.Kr\", \"e.Kr.\"]);\nexport const BCE = new Set([\"f.Kr\", \"f.Kr.\"]);\nexport const CE_BCE = new Set([...CE, ...BCE]);\n\n/**\n * URL prefixes\n */\nexport const URL_PREFIXES = [\"http://\", \"https://\", \"ftp://\", \"file://\", \"mailto:\", \"www.\"];\n","/**\n * Currency symbols to ISO codes\n */\nexport const CURRENCY_SYMBOLS: Record<string, string> = {\n $: \"USD\",\n \"€\": \"EUR\",\n \"£\": \"GBP\",\n \"¥\": \"JPY\",\n \"₽\": \"RUB\",\n};\n\n/**\n * ISO 4217 currency codes\n */\nexport const CURRENCY_ABBREV = new Set([\n \"ISK\",\n \"DKK\",\n \"NOK\",\n \"SEK\",\n \"GBP\",\n \"USD\",\n \"EUR\",\n \"CAD\",\n \"AUD\",\n \"CHF\",\n \"JPY\",\n \"PLN\",\n \"RUB\",\n \"CZK\",\n \"INR\",\n \"CNY\",\n \"RMB\",\n \"HKD\",\n \"NZD\",\n \"SGD\",\n \"MXN\",\n \"ZAR\",\n]);\n\n/**\n * ISK amount abbreviations (króna-specific)\n */\nexport const AMOUNT_ABBREV: Record<string, number> = {\n \"kr.\": 1,\n kr: 1,\n krónur: 1,\n \"þ.kr.\": 1e3,\n \"þ.kr\": 1e3,\n \"þús.kr.\": 1e3,\n \"þús.kr\": 1e3,\n \"m.kr.\": 1e6,\n \"m.kr\": 1e6,\n \"mkr.\": 1e6,\n mkr: 1e6,\n \"millj.kr.\": 1e6,\n \"millj.kr\": 1e6,\n \"ma.kr.\": 1e9,\n \"ma.kr\": 1e9,\n \"mlja.kr.\": 1e9,\n \"mlja.kr\": 1e9,\n};\n\nexport const ISK_AMOUNT_PRECEDING = new Set([\"kr.\", \"kr\", \"krónur\"]);\n\n/**\n * SI units: unit → [base unit, conversion factor]\n * Conversion factor is number or null (for temperature that needs functions)\n */\nexport const SI_UNITS: Record<string, [string, number]> = {\n // Distance\n m: [\"m\", 1.0],\n mm: [\"m\", 1.0e-3],\n μm: [\"m\", 1.0e-6],\n cm: [\"m\", 1.0e-2],\n sm: [\"m\", 1.0e-2],\n km: [\"m\", 1.0e3],\n ft: [\"m\", 0.3048],\n mi: [\"m\", 1609.34],\n // Area\n \"m²\": [\"m²\", 1.0],\n fm: [\"m²\", 1.0],\n \"km²\": [\"m²\", 1.0e6],\n \"cm²\": [\"m²\", 1.0e-2],\n ha: [\"m²\", 1.0e4],\n // Volume\n \"m³\": [\"m³\", 1.0],\n \"cm³\": [\"m³\", 1.0e-6],\n \"km³\": [\"m³\", 1.0e9],\n l: [\"m³\", 1.0e-3],\n ltr: [\"m³\", 1.0e-3],\n dl: [\"m³\", 1.0e-4],\n cl: [\"m³\", 1.0e-5],\n ml: [\"m³\", 1.0e-6],\n gal: [\"m³\", 3.78541e-3],\n bbl: [\"m³\", 158.987294928e-3],\n // Temperature\n K: [\"K\", 1.0],\n \"°K\": [\"K\", 1.0],\n // Mass\n g: [\"kg\", 1.0e-3],\n gr: [\"kg\", 1.0e-3],\n kg: [\"kg\", 1.0],\n t: [\"kg\", 1.0e3],\n mg: [\"kg\", 1.0e-6],\n μg: [\"kg\", 1.0e-9],\n tn: [\"kg\", 1.0e3],\n lb: [\"kg\", 0.453592],\n // Duration\n s: [\"s\", 1.0],\n ms: [\"s\", 1.0e-3],\n μs: [\"s\", 1.0e-6],\n klst: [\"s\", 3600.0],\n mín: [\"s\", 60.0],\n // Force\n N: [\"N\", 1.0],\n kN: [\"N\", 1.0e3],\n // Energy\n Nm: [\"J\", 1.0],\n J: [\"J\", 1.0],\n kJ: [\"J\", 1.0e3],\n MJ: [\"J\", 1.0e6],\n GJ: [\"J\", 1.0e9],\n TJ: [\"J\", 1.0e12],\n kWh: [\"J\", 3.6e6],\n MWh: [\"J\", 3.6e9],\n kWst: [\"J\", 3.6e6],\n MWst: [\"J\", 3.6e9],\n kcal: [\"J\", 4184],\n cal: [\"J\", 4.184],\n // Power\n W: [\"W\", 1.0],\n mW: [\"W\", 1.0e-3],\n kW: [\"W\", 1.0e3],\n MW: [\"W\", 1.0e6],\n GW: [\"W\", 1.0e9],\n TW: [\"W\", 1.0e12],\n // Electric potential\n V: [\"V\", 1.0],\n mV: [\"V\", 1.0e-3],\n kV: [\"V\", 1.0e3],\n // Electric current\n A: [\"A\", 1.0],\n mA: [\"A\", 1.0e-3],\n // Frequency\n Hz: [\"Hz\", 1.0],\n kHz: [\"Hz\", 1.0e3],\n MHz: [\"Hz\", 1.0e6],\n GHz: [\"Hz\", 1.0e9],\n // Pressure\n Pa: [\"Pa\", 1.0],\n hPa: [\"Pa\", 1.0e2],\n // Angle\n \"°\": [\"°\", 1.0],\n // Percentage\n \"%\": [\"%\", 1.0],\n \"‰\": [\"‰\", 0.1],\n};\n\nexport const SI_UNITS_SET = new Set(Object.keys(SI_UNITS));\n\n/**\n * Build regex for SI units (sorted by length descending)\n */\nfunction buildUnitsRegex(): RegExp {\n const units = Object.keys(SI_UNITS).sort((a, b) => b.length - a.length);\n const patterns = units.map((unit) => {\n const escaped = unit.replace(/[.*+?^${}()|[\\]\\\\]/g, \"\\\\$&\");\n // If unit ends with letter, require word boundary\n return unit[unit.length - 1].match(/[a-zA-Z]/) ? `${escaped}(?!\\\\w)` : escaped;\n });\n return new RegExp(`^(${patterns.join(\"|\")})`, \"u\");\n}\n\nexport const SI_UNITS_REGEX = buildUnitsRegex();\n\n/**\n * Build regex for currency symbols\n */\nfunction buildCurrencyRegex(): RegExp {\n const symbols = Object.keys(CURRENCY_SYMBOLS).sort((a, b) => b.length - a.length);\n const patterns = symbols.map((s) => s.replace(/[.*+?^${}()|[\\]\\\\]/g, \"\\\\$&\"));\n return new RegExp(`^(${patterns.join(\"|\")})`, \"u\");\n}\n\nexport const CURRENCY_REGEX = buildCurrencyRegex();\n\n/**\n * Combined unit regex (SI + currency)\n */\nfunction buildCombinedUnitRegex(): RegExp {\n const allUnits = [...Object.keys(SI_UNITS), ...Object.keys(CURRENCY_SYMBOLS)].sort(\n (a, b) => b.length - a.length,\n );\n const patterns = allUnits.map((unit) => {\n const escaped = unit.replace(/[.*+?^${}()|[\\]\\\\]/g, \"\\\\$&\");\n return unit[unit.length - 1].match(/[a-zA-Z]/) ? `${escaped}(?!\\\\w)` : escaped;\n });\n return new RegExp(`(${patterns.join(\"|\")})$`);\n}\n\nexport const UNIT_REGEX = buildCombinedUnitRegex();\n","/**\n * Regular expression patterns for tokenization\n */\n\n// Time patterns\nexport const TIME_HMS_MS = /^(\\d{1,2}):(\\d{2}):(\\d{2}),(\\d{2})(?!\\d)/;\nexport const TIME_HMS = /^(\\d{1,2}):(\\d{2}):(\\d{2})(?!\\d)/;\nexport const TIME_HM = /^(\\d{1,2}):(\\d{2})(?!\\d)/;\n\n// Date patterns\nexport const DATE_ISO = /^(\\d{4})[-/](\\d{2})[-/](\\d{2})(?!\\d)/;\nexport const DATE_DMY = /^(\\d{1,2})[./-](\\d{1,2})[./-](\\d{2,4})(?!\\d)/;\nexport const DATE_DM = /^(\\d{2})\\.(\\d{2})(?!\\d)/;\nexport const DATE_MY = /^(\\d{2})[.-](\\d{4})(?!\\d)/;\n\n// Number patterns\n// Icelandic style: 1.234,56 (dot as thousands, comma as decimal)\nexport const NUMBER_ICELANDIC = /^[-+]?\\d+(\\.\\d{3})*(,\\d+)?(?!\\d)/;\n// English style: 1,234.56 (comma as thousands, dot as decimal)\nexport const NUMBER_ENGLISH = /^[-+]?\\d+(,\\d{3})*(\\.\\d+)?(?!\\d)/;\n// Simple integer\nexport const NUMBER_INTEGER = /^[-+]?\\d+(?!\\d)/;\n\n// Number followed by letter (e.g., 14b, 33C)\nexport const NUMBER_WITH_LETTER = /^(\\d+)([a-zA-Z])(?!\\w)/u;\n\n// Email pattern\nexport const EMAIL = /^[^@\\s]+@[^@\\s]+(\\.[^@\\s.,/:;\"()%#!?]+)+/;\n\n// URL detection\nexport const URL_PREFIX = /^(https?:\\/\\/|ftp:\\/\\/|file:\\/\\/|mailto:|www\\.)/i;\n\n// Domain pattern (simplified)\nexport const DOMAIN = /^[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(\\.[a-zA-Z]{2,})+/;\n\n// Hashtag\nexport const HASHTAG = /^#\\w+/u;\n\n// Username (@handle)\nexport const USERNAME = /^@[0-9a-z_]+/i;\n\n// Roman numerals\nexport const ROMAN_NUMERAL = /^M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$/;\n\n// Unicode vulgar fractions\nexport const VULGAR_FRACTIONS = /^[\\u00BC-\\u00BE\\u2150-\\u215E]/;\n\n// Ordinal suffixes (Icelandic kludgy ordinals like \"1sti\", \"3ji\")\nexport const KLUDGY_ORDINAL = /^(1st[iau]|3j[iau]|4ð[iau]|5t[iau]|2svar|3svar|2ja|3ja|4ra)(?!\\w)/;\n\n/**\n * Map of kludgy ordinals to their correct forms\n */\nexport const ORDINAL_CORRECTIONS: Record<string, string> = {\n \"1sti\": \"fyrsti\",\n \"1sta\": \"fyrsta\",\n \"1stu\": \"fyrstu\",\n \"3ji\": \"þriðji\",\n \"3ju\": \"þriðju\",\n \"4ði\": \"fjórði\",\n \"4ða\": \"fjórða\",\n \"4ðu\": \"fjórðu\",\n \"5ti\": \"fimmti\",\n \"5ta\": \"fimmta\",\n \"5tu\": \"fimmtu\",\n \"2svar\": \"tvisvar\",\n \"3svar\": \"þrisvar\",\n \"2ja\": \"tveggja\",\n \"3ja\": \"þriggja\",\n \"4ra\": \"fjögurra\",\n};\n\n/**\n * Ordinals that can be converted to numbers\n */\nexport const ORDINAL_NUMBERS: Record<string, number> = {\n \"1sti\": 1,\n \"1sta\": 1,\n \"1stu\": 1,\n \"3ji\": 3,\n \"3ja\": 3,\n \"3ju\": 3,\n \"4ði\": 4,\n \"4ða\": 4,\n \"4ðu\": 4,\n \"5ti\": 5,\n \"5ta\": 5,\n \"5tu\": 5,\n};\n\n/**\n * Convert Roman numeral to integer\n */\nexport function romanToInt(s: string): number {\n const values: [number, string][] = [\n [1000, \"M\"],\n [900, \"CM\"],\n [500, \"D\"],\n [400, \"CD\"],\n [100, \"C\"],\n [90, \"XC\"],\n [50, \"L\"],\n [40, \"XL\"],\n [10, \"X\"],\n [9, \"IX\"],\n [5, \"V\"],\n [4, \"IV\"],\n [1, \"I\"],\n ];\n\n let i = 0;\n let result = 0;\n for (const [value, numeral] of values) {\n while (s.substring(i, i + numeral.length) === numeral) {\n result += value;\n i += numeral.length;\n }\n }\n return result;\n}\n\n/**\n * Check if string is a valid Roman numeral\n */\nexport function isRomanNumeral(s: string): boolean {\n return ROMAN_NUMERAL.test(s);\n}\n\n// Telephone number patterns\n// Icelandic: 7 digits, optionally with country code +354\n// Format: +XXX XXXXXXX or +XXXXXXXXXXX (10 digits total for Iceland)\nexport const TELNO_WITH_CC = /^\\+(\\d{1,3})[-\\s]?(\\d{3})[-\\s]?(\\d{4})(?!\\d)/;\n\n// Chemical molecule pattern\n// Matches element symbols (uppercase + optional lowercase) followed by optional digit count\n// Examples: H2O, CO2, NaCl, H2SO4, C6H12O6\n// Must have at least 2 element symbols (single element like \"O\" or \"O2\" is not a molecule)\nexport const MOLECULE = /^[A-Z][a-z]?\\d*(?:[A-Z][a-z]?\\d*)+$/;\n\n// Icelandic SSN (kennitala) pattern: DDMMYY-XXXX\n// Last digit is century indicator (9=1900s, 0=2000s)\nexport const SSN = /^(\\d{6})-?(\\d{4})(?!\\d)/;\n\n// Serial number pattern: groups of digits separated by hyphens\n// Examples: 394-8362, 123-456-789\nexport const SERIAL_NUMBER = /^(\\d+)-(\\d+)(?:-\\d+)*(?!\\d)/;\n\n// Timestamp patterns (date + time combined)\n// ISO timestamp: YYYY-MM-DD HH:MM:SS or YYYY-MM-DDTHH:MM:SS\nexport const TIMESTAMP_ISO = /^(\\d{4})[-/](\\d{2})[-/](\\d{2})[T\\s](\\d{1,2}):(\\d{2}):(\\d{2})(?!\\d)/;\nexport const TIMESTAMP_ISO_HM = /^(\\d{4})[-/](\\d{2})[-/](\\d{2})[T\\s](\\d{1,2}):(\\d{2})(?!\\d)/;\n\n// Company/Person/Entity markers (for annotated text with brackets)\n// Format: [company:Name] or [company Name] (without spaces in single-token form)\nexport const COMPANY_MARKER = /^\\[company[:\\s]([^\\]]+)\\]/i;\nexport const PERSON_MARKER = /^\\[person[:\\s]([^\\]]+)\\]/i;\nexport const ENTITY_MARKER = /^\\[entity[:\\s]([^\\]]+)\\]/i;\n\n/**\n * Valid chemical element symbols\n */\nexport const ELEMENT_SYMBOLS = new Set([\n \"H\",\n \"He\",\n \"Li\",\n \"Be\",\n \"B\",\n \"C\",\n \"N\",\n \"O\",\n \"F\",\n \"Ne\",\n \"Na\",\n \"Mg\",\n \"Al\",\n \"Si\",\n \"P\",\n \"S\",\n \"Cl\",\n \"Ar\",\n \"K\",\n \"Ca\",\n \"Sc\",\n \"Ti\",\n \"V\",\n \"Cr\",\n \"Mn\",\n \"Fe\",\n \"Co\",\n \"Ni\",\n \"Cu\",\n \"Zn\",\n \"Ga\",\n \"Ge\",\n \"As\",\n \"Se\",\n \"Br\",\n \"Kr\",\n \"Rb\",\n \"Sr\",\n \"Y\",\n \"Zr\",\n \"Nb\",\n \"Mo\",\n \"Tc\",\n \"Ru\",\n \"Rh\",\n \"Pd\",\n \"Ag\",\n \"Cd\",\n \"In\",\n \"Sn\",\n \"Sb\",\n \"Te\",\n \"I\",\n \"Xe\",\n \"Cs\",\n \"Ba\",\n \"La\",\n \"Ce\",\n \"Pr\",\n \"Nd\",\n \"Pm\",\n \"Sm\",\n \"Eu\",\n \"Gd\",\n \"Tb\",\n \"Dy\",\n \"Ho\",\n \"Er\",\n \"Tm\",\n \"Yb\",\n \"Lu\",\n \"Hf\",\n \"Ta\",\n \"W\",\n \"Re\",\n \"Os\",\n \"Ir\",\n \"Pt\",\n \"Au\",\n \"Hg\",\n \"Tl\",\n \"Pb\",\n \"Bi\",\n \"Po\",\n \"At\",\n \"Rn\",\n \"Fr\",\n \"Ra\",\n \"Ac\",\n \"Th\",\n \"Pa\",\n \"U\",\n \"Np\",\n \"Pu\",\n \"Am\",\n \"Cm\",\n \"Bk\",\n \"Cf\",\n \"Es\",\n \"Fm\",\n \"Md\",\n \"No\",\n \"Lr\",\n \"Rf\",\n \"Db\",\n \"Sg\",\n \"Bh\",\n \"Hs\",\n \"Mt\",\n \"Ds\",\n \"Rg\",\n \"Cn\",\n \"Nh\",\n \"Fl\",\n \"Mc\",\n \"Lv\",\n \"Ts\",\n \"Og\",\n]);\n\n/**\n * Validate Icelandic kennitala checksum\n * Returns true if valid, false otherwise\n */\nexport function validateKennitala(digits: string): boolean {\n if (digits.length !== 10) return false;\n\n // All characters must be digits\n if (!/^\\d{10}$/.test(digits)) return false;\n\n // Extract components\n const d1 = parseInt(digits[0], 10);\n const d2 = parseInt(digits[1], 10);\n const d3 = parseInt(digits[2], 10);\n const d4 = parseInt(digits[3], 10);\n const d5 = parseInt(digits[4], 10);\n const d6 = parseInt(digits[5], 10);\n const d7 = parseInt(digits[6], 10);\n const d8 = parseInt(digits[7], 10);\n const checkDigit = parseInt(digits[8], 10);\n const century = parseInt(digits[9], 10);\n\n // Century must be 9 (1900s) or 0 (2000s)\n if (century !== 9 && century !== 0) return false;\n\n // Validate date components (DD MM YY)\n const day = d1 * 10 + d2;\n const month = d3 * 10 + d4;\n\n // Day must be 1-31 (or 41-71 for companies: day + 40)\n const isCompany = day > 40;\n const actualDay = isCompany ? day - 40 : day;\n if (actualDay < 1 || actualDay > 31) return false;\n if (month < 1 || month > 12) return false;\n\n // Calculate checksum: 11 - ((3×d1 + 2×d2 + 7×d3 + 6×d4 + 5×d5 + 4×d6 + 3×d7 + 2×d8) mod 11)\n const sum = 3 * d1 + 2 * d2 + 7 * d3 + 6 * d4 + 5 * d5 + 4 * d6 + 3 * d7 + 2 * d8;\n const remainder = sum % 11;\n const expected = remainder === 0 ? 0 : 11 - remainder;\n\n // If expected is 10, the kennitala is invalid\n if (expected === 10) return false;\n\n return checkDigit === expected;\n}\n\n/**\n * Check if a string is a valid chemical formula\n * Validates that all symbols are real element symbols\n * Requires at least 2 elements (otherwise it's just an element symbol, not a molecule)\n */\nexport function isValidMolecule(s: string): boolean {\n // Must match the molecule pattern\n if (!MOLECULE.test(s)) return false;\n\n // Extract and validate all element symbols\n const elementRegex = /([A-Z][a-z]?)(\\d*)/g;\n let match;\n let elementCount = 0;\n\n while ((match = elementRegex.exec(s)) !== null) {\n const element = match[1];\n if (!ELEMENT_SYMBOLS.has(element)) {\n return false;\n }\n elementCount++;\n }\n\n // Must have at least 2 element symbols to be a molecule\n return elementCount >= 2;\n}\n","/**\n * Lexer: Split text into initial tokens\n *\n * This is the first stage of the pipeline. It:\n * 1. Splits text on whitespace\n * 2. Extracts punctuation\n * 3. Classifies tokens (words, numbers, dates, times, etc.)\n */\n\nimport type { Token, PunctuationType } from \"../types.js\";\nimport {\n PUNCTUATION_SET,\n LEFT_PUNCTUATION_SET,\n RIGHT_PUNCTUATION_SET,\n NONE_PUNCTUATION_SET,\n SINGLE_QUOTES,\n DOUBLE_QUOTES,\n HYPHENS,\n HYPHEN,\n COMPOSITE_HYPHENS,\n DIGITS,\n SIGN_PREFIX,\n DAYS_IN_MONTH,\n UNICODE_REPLACEMENTS,\n OPEN_DOUBLE_QUOTE,\n CLOSE_DOUBLE_QUOTE,\n OPEN_SINGLE_QUOTE,\n CLOSE_SINGLE_QUOTE,\n} from \"../data/constants.js\";\nimport { SI_UNITS, SI_UNITS_SET, SI_UNITS_REGEX, CURRENCY_SYMBOLS } from \"../data/units.js\";\nimport {\n TIME_HMS_MS,\n TIME_HMS,\n TIME_HM,\n DATE_ISO,\n DATE_DMY,\n DATE_DM,\n DATE_MY,\n NUMBER_WITH_LETTER,\n EMAIL,\n URL_PREFIX,\n DOMAIN,\n HASHTAG,\n USERNAME,\n TELNO_WITH_CC,\n SSN,\n SERIAL_NUMBER,\n TIMESTAMP_ISO,\n TIMESTAMP_ISO_HM,\n COMPANY_MARKER,\n PERSON_MARKER,\n ENTITY_MARKER,\n validateKennitala,\n isValidMolecule,\n} from \"../data/patterns.js\";\n\n/**\n * Replace composite Unicode glyphs with single characters\n */\nexport function normalizeUnicode(text: string): string {\n let result = text;\n for (const [from, to] of Object.entries(UNICODE_REPLACEMENTS)) {\n result = result.replaceAll(from, to);\n }\n return result;\n}\n\n/**\n * Check if a date is valid\n */\nfunction isValidDate(y: number, m: number, d: number): boolean {\n if (y < 1776 || y > 2100) return false;\n if (m < 1 || m > 12) return false;\n if (d < 1 || d > DAYS_IN_MONTH[m]) return false;\n // Check Feb 29 in non-leap years\n if (m === 2 && d === 29) {\n const isLeap = (y % 4 === 0 && y % 100 !== 0) || y % 400 === 0;\n if (!isLeap) return false;\n }\n return true;\n}\n\n/**\n * Get punctuation position type\n */\nfunction getPunctType(char: string): PunctuationType {\n if (LEFT_PUNCTUATION_SET.has(char)) return \"left\";\n if (RIGHT_PUNCTUATION_SET.has(char)) return \"right\";\n if (NONE_PUNCTUATION_SET.has(char)) return \"none\";\n return \"center\";\n}\n\n/**\n * Create a punctuation token\n */\nfunction punct(text: string, normalized?: string): Token {\n const norm = normalized ?? text;\n const position = norm.length === 1 ? getPunctType(norm) : \"center\";\n return { kind: \"punctuation\", text, normalized: norm, position };\n}\n\n/**\n * Parse a token starting with digits\n */\nfunction parseDigits(w: string): [Token, number] {\n // Timestamp with full time: YYYY-MM-DD HH:MM:SS or YYYY-MM-DDTHH:MM:SS\n let match = TIMESTAMP_ISO.exec(w);\n if (match) {\n const y = parseInt(match[1], 10);\n const mo = parseInt(match[2], 10);\n const d = parseInt(match[3], 10);\n const h = parseInt(match[4], 10);\n const mi = parseInt(match[5], 10);\n const s = parseInt(match[6], 10);\n if (isValidDate(y, mo, d) && h >= 0 && h < 24 && mi >= 0 && mi < 60 && s >= 0 && s < 60) {\n return [\n {\n kind: \"timestamp\",\n text: match[0],\n year: y,\n month: mo,\n day: d,\n hour: h,\n minute: mi,\n second: s,\n },\n match[0].length,\n ];\n }\n }\n\n // Timestamp with HH:MM only: YYYY-MM-DD HH:MM or YYYY-MM-DDTHH:MM\n match = TIMESTAMP_ISO_HM.exec(w);\n if (match) {\n const y = parseInt(match[1], 10);\n const mo = parseInt(match[2], 10);\n const d = parseInt(match[3], 10);\n const h = parseInt(match[4], 10);\n const mi = parseInt(match[5], 10);\n if (isValidDate(y, mo, d) && h >= 0 && h < 24 && mi >= 0 && mi < 60) {\n return [\n {\n kind: \"timestamp\",\n text: match[0],\n year: y,\n month: mo,\n day: d,\n hour: h,\n minute: mi,\n second: 0,\n },\n match[0].length,\n ];\n }\n }\n\n // Time with milliseconds: H:M:S,MS\n match = TIME_HMS_MS.exec(w);\n if (match) {\n const h = parseInt(match[1], 10);\n const m = parseInt(match[2], 10);\n const s = parseInt(match[3], 10);\n if (h >= 0 && h < 24 && m >= 0 && m < 60 && s >= 0 && s < 60) {\n return [{ kind: \"time\", text: match[0], hour: h, minute: m, second: s }, match[0].length];\n }\n }\n\n // Time H:M:S\n match = TIME_HMS.exec(w);\n if (match) {\n const h = parseInt(match[1], 10);\n const m = parseInt(match[2], 10);\n const s = parseInt(match[3], 10);\n if (h >= 0 && h < 24 && m >= 0 && m < 60 && s >= 0 && s < 60) {\n return [{ kind: \"time\", text: match[0], hour: h, minute: m, second: s }, match[0].length];\n }\n }\n\n // Time H:M\n match = TIME_HM.exec(w);\n if (match) {\n const h = parseInt(match[1], 10);\n const m = parseInt(match[2], 10);\n if (h >= 0 && h < 24 && m >= 0 && m < 60) {\n return [{ kind: \"time\", text: match[0], hour: h, minute: m, second: 0 }, match[0].length];\n }\n }\n\n // ISO date: YYYY-MM-DD or YYYY/MM/DD\n match = DATE_ISO.exec(w);\n if (match) {\n const y = parseInt(match[1], 10);\n const m = parseInt(match[2], 10);\n const d = parseInt(match[3], 10);\n if (isValidDate(y, m, d)) {\n return [{ kind: \"date\", text: match[0], year: y, month: m, day: d }, match[0].length];\n }\n }\n\n // Icelandic SSN (kennitala): DDMMYY-XXXX\n match = SSN.exec(w);\n if (match) {\n const digits = match[1] + match[2];\n if (validateKennitala(digits)) {\n return [{ kind: \"ssn\", text: match[0], value: digits }, match[0].length];\n }\n }\n\n // Serial number: XXX-XXXX or similar patterns with hyphens\n // This includes invalid SSN-like patterns (6-4 digits that failed checksum validation)\n match = SERIAL_NUMBER.exec(w);\n if (match) {\n return [{ kind: \"serialnumber\", text: match[0] }, match[0].length];\n }\n\n // Icelandic phone number (7 digits without hyphen): XXXXXXX\n // Only match continuous 7 digits - hyphenated patterns go to serial number\n const telnoMatch = w.match(/^(\\d{7})(?!\\d)/);\n if (telnoMatch) {\n const number = telnoMatch[1];\n return [{ kind: \"telno\", text: telnoMatch[0], cc: \"\", number }, telnoMatch[0].length];\n }\n\n // Date with day, month, year: D.M.Y or D/M/Y or D-M-Y\n match = DATE_DMY.exec(w);\n if (match) {\n let d = parseInt(match[1], 10);\n let m = parseInt(match[2], 10);\n let y = parseInt(match[3], 10);\n // Handle 2-digit years\n if (y <= 99) {\n y += y > 50 ? 1900 : 2000;\n }\n // Swap if American format (month > 12 but day <= 12)\n if (m > 12 && d <= 12) {\n [d, m] = [m, d];\n }\n if (isValidDate(y, m, d)) {\n return [{ kind: \"date\", text: match[0], year: y, month: m, day: d }, match[0].length];\n }\n }\n\n // Relative date: DD.MM (day and month only)\n match = DATE_DM.exec(w);\n if (match) {\n const d = parseInt(match[1], 10);\n const m = parseInt(match[2], 10);\n if (m >= 1 && m <= 12 && d >= 1 && d <= DAYS_IN_MONTH[m]) {\n return [{ kind: \"daterel\", text: match[0], year: 0, month: m, day: d }, match[0].length];\n }\n }\n\n // Relative date: MM.YYYY or MM-YYYY\n match = DATE_MY.exec(w);\n if (match) {\n const m = parseInt(match[1], 10);\n const y = parseInt(match[2], 10);\n if (y >= 1776 && y <= 2100 && m >= 1 && m <= 12) {\n return [{ kind: \"daterel\", text: match[0], year: y, month: m, day: 0 }, match[0].length];\n }\n }\n\n // Number with trailing letter: 14b, 33C\n match = NUMBER_WITH_LETTER.exec(w);\n if (match) {\n const letter = match[2];\n // Don't match if the letter is an SI unit\n if (!SI_UNITS_SET.has(letter)) {\n const n = parseInt(match[1], 10);\n return [{ kind: \"numwletter\", text: match[0], value: n, letter }, match[0].length];\n }\n }\n\n // Number with unit (Icelandic style: 1.234,56km)\n const icelandicMatch = w.match(/^([-+]?\\d+(?:\\.\\d{3})*(?:,\\d+)?)/);\n if (icelandicMatch) {\n const numPart = icelandicMatch[1];\n const rest = w.slice(numPart.length);\n const unitMatch = SI_UNITS_REGEX.exec(rest);\n if (unitMatch) {\n const unit = unitMatch[1];\n const fullText = numPart + unit;\n const value = parseFloat(numPart.replace(/\\./g, \"\").replace(\",\", \".\"));\n if (unit in CURRENCY_SYMBOLS) {\n const iso = CURRENCY_SYMBOLS[unit];\n return [{ kind: \"amount\", text: fullText, value, currency: iso }, fullText.length];\n }\n const [baseUnit] = SI_UNITS[unit];\n if (unit === \"%\" || unit === \"‰\") {\n return [{ kind: \"percent\", text: fullText, value }, fullText.length];\n }\n return [{ kind: \"measurement\", text: fullText, value, unit: baseUnit }, fullText.length];\n }\n }\n\n // Plain number (try Icelandic style first, then English)\n // Icelandic: 1.234,56\n const iceNum = w.match(/^([-+]?\\d+(?:\\.\\d{3})*(?:,\\d+)?)(?!\\d)/);\n if (iceNum && iceNum[1].includes(\",\")) {\n const value = parseFloat(iceNum[1].replace(/\\./g, \"\").replace(\",\", \".\"));\n return [{ kind: \"number\", text: iceNum[1], value }, iceNum[1].length];\n }\n\n // English: 1,234.56\n const engNum = w.match(/^([-+]?\\d+(?:,\\d{3})*(?:\\.\\d+)?)(?!\\d)/);\n if (engNum && (engNum[1].includes(\",\") || engNum[1].includes(\".\"))) {\n const value = parseFloat(engNum[1].replace(/,/g, \"\"));\n return [{ kind: \"number\", text: engNum[1], value }, engNum[1].length];\n }\n\n // Simple integer\n const intMatch = w.match(/^([-+]?\\d+)(?!\\d)/);\n if (intMatch) {\n const value = parseInt(intMatch[1], 10);\n return [{ kind: \"number\", text: intMatch[1], value }, intMatch[1].length];\n }\n\n // Fallback: unknown\n return [{ kind: \"unknown\", text: w[0] }, 1];\n}\n\n/**\n * Parse a single whitespace-separated token\n */\nfunction* parseRawToken(w: string): Generator<Token> {\n // Empty string signals sentence split\n if (!w) {\n yield { kind: \"s_split\", text: null };\n return;\n }\n\n // Pure alphabetic word (most common case)\n if (/^[\\p{L}]+$/u.test(w) || SI_UNITS_SET.has(w)) {\n // Check if it's a chemical molecule (e.g., NaCl, CaCO3)\n if (isValidMolecule(w)) {\n yield { kind: \"molecule\", text: w };\n return;\n }\n yield { kind: \"word\", text: w };\n return;\n }\n\n // Phone number with country code: +XXX XXXXXXX (before signed number handling)\n if (w.startsWith(\"+\") && w.length >= 10) {\n const telMatch = TELNO_WITH_CC.exec(w);\n if (telMatch) {\n const cc = telMatch[1];\n const number = telMatch[2] + telMatch[3];\n yield { kind: \"telno\", text: telMatch[0], cc, number };\n w = w.slice(telMatch[0].length);\n if (!w) return;\n }\n }\n\n // Handle signed numbers at start\n if (w.length > 1 && SIGN_PREFIX.has(w[0]) && DIGITS.has(w[1])) {\n const [token, eaten] = parseDigits(w);\n yield token;\n w = w.slice(eaten);\n if (!w) return;\n }\n\n // Handle composite hyphen prefix: -menn in \"þingkonur og -menn\"\n if (w.length > 1 && COMPOSITE_HYPHENS.includes(w[0]) && /\\p{L}/u.test(w[1])) {\n let i = 2;\n while (i < w.length && /\\p{L}/u.test(w[i])) i++;\n const word = w.slice(0, i);\n if (\n word.slice(1).toLowerCase() === word.slice(1) ||\n (i > 2 && word.slice(1).toUpperCase() === word.slice(1))\n ) {\n yield { kind: \"word\", text: word };\n w = w.slice(i);\n }\n }\n\n // Shortcut for quoted single words: \"word\" or 'word'\n if (w.length >= 3) {\n if (DOUBLE_QUOTES.includes(w[0]) && DOUBLE_QUOTES.includes(w[w.length - 1])) {\n const inner = w.slice(1, -1);\n if (/^[\\p{L}]+$/u.test(inner)) {\n yield punct(w[0], OPEN_DOUBLE_QUOTE);\n yield { kind: \"word\", text: inner };\n yield punct(w[w.length - 1], CLOSE_DOUBLE_QUOTE);\n return;\n }\n }\n if (SINGLE_QUOTES.includes(w[0]) && SINGLE_QUOTES.includes(w[w.length - 1])) {\n const inner = w.slice(1, -1);\n if (/^[\\p{L}]+$/u.test(inner)) {\n yield punct(w[0], OPEN_SINGLE_QUOTE);\n yield { kind: \"word\", text: inner };\n yield punct(w[w.length - 1], CLOSE_SINGLE_QUOTE);\n return;\n }\n }\n }\n\n // Leading quote → opening quote\n if (w.length > 1) {\n if (DOUBLE_QUOTES.includes(w[0])) {\n yield punct(w[0], OPEN_DOUBLE_QUOTE);\n w = w.slice(1);\n } else if (SINGLE_QUOTES.includes(w[0])) {\n yield punct(w[0], OPEN_SINGLE_QUOTE);\n w = w.slice(1);\n }\n }\n\n // Process remaining characters\n while (w) {\n // Handle leading punctuation\n while (w && PUNCTUATION_SET.has(w[0])) {\n // Company/Person/Entity markers - check before other punctuation\n if (w.startsWith(\"[\")) {\n const companyMatch = COMPANY_MARKER.exec(w);\n if (companyMatch) {\n yield { kind: \"company\", text: companyMatch[1] };\n w = w.slice(companyMatch[0].length);\n continue;\n }\n const personMatch = PERSON_MARKER.exec(w);\n if (personMatch) {\n yield { kind: \"person\", text: personMatch[1] };\n w = w.slice(personMatch[0].length);\n continue;\n }\n const entityMatch = ENTITY_MARKER.exec(w);\n if (entityMatch) {\n yield { kind: \"entity\", text: entityMatch[1] };\n w = w.slice(entityMatch[0].length);\n continue;\n }\n }\n // Ellipsis variations\n if (w.startsWith(\"[...]\")) {\n yield punct(\"[...]\", \"[…]\");\n w = w.slice(5);\n continue;\n }\n if (w.startsWith(\"[…]\")) {\n yield punct(\"[…]\");\n w = w.slice(3);\n continue;\n }\n if (w.startsWith(\"...\")) {\n let dots = \"...\";\n let rest = w.slice(3);\n while (rest.startsWith(\".\")) {\n dots += \".\";\n rest = rest.slice(1);\n }\n yield punct(dots, \"…\");\n w = rest;\n continue;\n }\n if (w.startsWith(\"…\")) {\n yield punct(\"…\");\n w = w.slice(1);\n continue;\n }\n // Double comma → single comma or opening quote\n if (w === \",,\") {\n yield punct(\",,\", \",\");\n w = \"\";\n continue;\n }\n if (w.startsWith(\",,\")) {\n yield punct(\",,\", OPEN_DOUBLE_QUOTE);\n w = w.slice(2);\n continue;\n }\n // Paragraph markers\n if (w === \"[[\" || w === \"]]\") {\n // Skip paragraph markers for now (just punctuation)\n yield punct(w);\n w = \"\";\n continue;\n }\n // Hyphens\n if (HYPHENS.includes(w[0])) {\n yield punct(w[0], HYPHEN);\n w = w.slice(1);\n continue;\n }\n // Closing quotes\n if (DOUBLE_QUOTES.includes(w[0])) {\n yield punct(w[0], CLOSE_DOUBLE_QUOTE);\n w = w.slice(1);\n continue;\n }\n if (SINGLE_QUOTES.includes(w[0])) {\n yield punct(w[0], CLOSE_SINGLE_QUOTE);\n w = w.slice(1);\n continue;\n }\n // Hashtag check\n if (w.startsWith(\"#\") && w.length > 1) {\n const hashMatch = HASHTAG.exec(w);\n if (hashMatch) {\n // Check if it's a number sign: #12\n if (/^#\\d+$/.test(hashMatch[0])) {\n yield {\n kind: \"ordinal\",\n text: hashMatch[0],\n value: parseInt(hashMatch[0].slice(1), 10),\n };\n } else {\n yield { kind: \"hashtag\", text: hashMatch[0] };\n }\n w = w.slice(hashMatch[0].length);\n continue;\n }\n }\n // Username check\n if (w.startsWith(\"@\") && w.length > 1) {\n const userMatch = USERNAME.exec(w);\n if (userMatch) {\n yield { kind: \"username\", text: userMatch[0], username: userMatch[0].slice(1) };\n w = w.slice(userMatch[0].length);\n continue;\n }\n }\n // Phone number with country code: +XXX XXXXXXX\n if (w.startsWith(\"+\") && w.length > 1 && DIGITS.has(w[1])) {\n const telMatch = TELNO_WITH_CC.exec(w);\n if (telMatch) {\n const cc = telMatch[1];\n const number = telMatch[2] + telMatch[3];\n yield { kind: \"telno\", text: telMatch[0], cc, number };\n w = w.slice(telMatch[0].length);\n continue;\n }\n }\n // Default: single punctuation character\n yield punct(w[0]);\n w = w.slice(1);\n }\n\n if (!w) break;\n\n // Email check\n if (w.includes(\"@\")) {\n const emailMatch = EMAIL.exec(w);\n if (emailMatch) {\n yield { kind: \"email\", text: emailMatch[0] };\n w = w.slice(emailMatch[0].length);\n continue;\n }\n }\n\n // URL check\n if (URL_PREFIX.test(w)) {\n // Cut trailing punctuation\n let url = w;\n let trailing = \"\";\n while (url && RIGHT_PUNCTUATION_SET.has(url[url.length - 1])) {\n trailing = url[url.length - 1] + trailing;\n url = url.slice(0, -1);\n }\n yield { kind: \"url\", text: url };\n w = trailing;\n continue;\n }\n\n // Domain check\n if (w.length >= 4 && /^[a-zA-Z0-9]/.test(w) && w.includes(\".\")) {\n const domainMatch = DOMAIN.exec(w);\n if (domainMatch) {\n let domain = domainMatch[0];\n let trailing = w.slice(domain.length);\n // Cut trailing punctuation from domain\n while (domain && PUNCTUATION_SET.has(domain[domain.length - 1])) {\n trailing = domain[domain.length - 1] + trailing;\n domain = domain.slice(0, -1);\n }\n if (domain.includes(\".\")) {\n yield { kind: \"domain\", text: domain };\n w = trailing;\n continue;\n }\n }\n }\n\n // Numbers\n if (DIGITS.has(w[0]) || (SIGN_PREFIX.has(w[0]) && w.length > 1 && DIGITS.has(w[1]))) {\n const [token, eaten] = parseDigits(w);\n yield token;\n w = w.slice(eaten);\n\n // Check for SI unit immediately following\n if (w) {\n const unitMatch = SI_UNITS_REGEX.exec(w);\n if (unitMatch) {\n yield { kind: \"word\", text: unitMatch[1] };\n w = w.slice(unitMatch[1].length);\n }\n }\n continue;\n }\n\n // Words (alphabetic sequences)\n if (/^\\p{L}/u.test(w)) {\n let i = 1;\n const PUNCT_INSIDE = new Set([\".\", \"'\", \"'\", \"´\", \"'\", HYPHEN, \"\\u2013\"]);\n const PUNCT_ENDING = new Set([\"'\", \"²\", \"³\"]);\n\n while (i < w.length) {\n if (/\\p{L}/u.test(w[i])) {\n i++;\n } else if (DIGITS.has(w[i])) {\n // Could be a molecule like H2O - extend to include digits\n i++;\n } else if (PUNCT_INSIDE.has(w[i]) && i + 1 < w.length && /\\p{L}/u.test(w[i + 1])) {\n i++;\n } else {\n break;\n }\n }\n // Allow ending punctuation\n if (i < w.length && PUNCT_ENDING.has(w[i])) {\n i++;\n }\n const wordCandidate = w.slice(0, i);\n\n // Check if this is a chemical molecule (e.g., H2O, CO2, NaCl)\n if (isValidMolecule(wordCandidate)) {\n yield { kind: \"molecule\", text: wordCandidate };\n w = w.slice(i);\n continue;\n }\n\n yield { kind: \"word\", text: wordCandidate };\n w = w.slice(i);\n continue;\n }\n\n // Unknown character - emit as unknown\n yield { kind: \"unknown\", text: w[0] };\n w = w.slice(1);\n }\n}\n\n/** A rough token with its position in the original text */\ninterface RoughToken {\n text: string;\n start: number;\n end: number;\n}\n\n/**\n * Split text into rough tokens on whitespace, handling paragraph breaks.\n * Tracks character offsets for each token.\n */\nfunction* generateRoughTokens(\n text: string,\n replaceCompositeGlyphs: boolean,\n): Generator<RoughToken> {\n const normalized = replaceCompositeGlyphs ? normalizeUnicode(text) : text;\n\n // Track position in normalized text\n let cursor = 0;\n\n // Match tokens and whitespace\n const tokenRegex = /(\\S+)|(\\n\\s*\\n)|(\\s+)/g;\n let match: RegExpExecArray | null;\n let lastParagraphBreak = false;\n\n while ((match = tokenRegex.exec(normalized)) !== null) {\n if (match[1]) {\n // Non-whitespace token\n if (lastParagraphBreak) {\n // Emit sentence split marker before this token\n yield { text: \"\", start: cursor, end: cursor };\n lastParagraphBreak = false;\n }\n yield { text: match[1], start: match.index, end: match.index + match[1].length };\n } else if (match[2]) {\n // Paragraph break (double newline)\n lastParagraphBreak = true;\n }\n // Single whitespace (match[3]) is ignored\n cursor = match.index + match[0].length;\n }\n}\n\n/**\n * Lexer: Convert text to initial token stream\n */\nexport function lex(text: string, replaceCompositeGlyphs = true, includeOffsets = false): Token[] {\n const tokens: Token[] = [];\n\n for (const roughToken of generateRoughTokens(text, replaceCompositeGlyphs)) {\n // Track position within the rough token for offset computation\n let localPos = 0;\n\n for (const token of parseRawToken(roughToken.text)) {\n if (includeOffsets && token.text !== null) {\n // Find token text within remaining rough token\n const idx = roughToken.text.indexOf(token.text, localPos);\n if (idx !== -1) {\n token.span = {\n start: roughToken.start + idx,\n end: roughToken.start + idx + token.text.length,\n };\n localPos = idx + token.text.length;\n }\n } else if (includeOffsets && token.text === null) {\n // Sentence markers get zero-width span at rough token position\n token.span = { start: roughToken.start, end: roughToken.start };\n }\n tokens.push(token);\n }\n }\n\n return tokens;\n}\n","/**\n * Common Icelandic abbreviations\n * Format: abbreviation → expansion\n *\n * This is a curated subset (~100) of common abbreviations.\n * The full Miðeind tokenizer has ~1500.\n */\nexport const ABBREVIATIONS: Record<string, string> = {\n // Titles and honorifics\n hr: \"herra\",\n \"hr.\": \"herra\",\n frú: \"frú\",\n \"frú.\": \"frú\",\n sr: \"séra\",\n \"sr.\": \"séra\",\n dr: \"doktor\",\n \"dr.\": \"doktor\",\n prof: \"prófessor\",\n \"prof.\": \"prófessor\",\n\n // Organizations\n hf: \"hlutafélag\",\n \"hf.\": \"hlutafélag\",\n ehf: \"einkahlutafélag\",\n \"ehf.\": \"einkahlutafélag\",\n ohf: \"opinbert hlutafélag\",\n \"ohf.\": \"opinbert hlutafélag\",\n sf: \"sameignarfélag\",\n \"sf.\": \"sameignarfélag\",\n slf: \"samlagsfélag\",\n \"slf.\": \"samlagsfélag\",\n ses: \"sjálfseignarstofnun\",\n \"ses.\": \"sjálfseignarstofnun\",\n\n // Common abbreviations\n ofl: \"og fleiri\",\n \"o.fl.\": \"og fleiri\",\n osfrv: \"og svo framvegis\",\n \"o.s.frv.\": \"og svo framvegis\",\n oþh: \"og þess háttar\",\n \"o.þ.h.\": \"og þess háttar\",\n þe: \"það er\",\n \"þ.e.\": \"það er\",\n þea: \"það er að segja\",\n \"þ.e.a.s.\": \"það er að segja\",\n sbr: \"samanber\",\n \"sbr.\": \"samanber\",\n skv: \"samkvæmt\",\n \"skv.\": \"samkvæmt\",\n mtt: \"með tilliti til\",\n \"m.t.t.\": \"með tilliti til\",\n ath: \"athugasemd\",\n \"ath.\": \"athugasemd\",\n gr: \"grein\",\n \"gr.\": \"grein\",\n mgr: \"málsgrein\",\n \"mgr.\": \"málsgrein\",\n tölul: \"töluliður\",\n \"tölul.\": \"töluliður\",\n nr: \"númer\",\n \"nr.\": \"númer\",\n sl: \"síðastliðinn\",\n \"sl.\": \"síðastliðinn\",\n nk: \"næstkomandi\",\n \"n.k.\": \"næstkomandi\",\n\n // Time-related\n kl: \"klukkan\",\n \"kl.\": \"klukkan\",\n ca: \"circa\",\n \"ca.\": \"circa\",\n\n // Academic/Professional\n bs: \"Bachelor of Science\",\n \"B.S.\": \"Bachelor of Science\",\n ms: \"Master of Science\",\n \"M.S.\": \"Master of Science\",\n ba: \"Bachelor of Arts\",\n \"B.A.\": \"Bachelor of Arts\",\n // Note: \"ma\" also means \"milljarður\" - using M.A. for Master of Arts\n \"M.A.\": \"Master of Arts\",\n phd: \"Doctor of Philosophy\",\n \"Ph.D.\": \"Doctor of Philosophy\",\n mba: \"Master of Business Administration\",\n MBA: \"Master of Business Administration\",\n\n // Places\n Rvk: \"Reykjavík\",\n \"Rvk.\": \"Reykjavík\",\n Akr: \"Akranes\",\n \"Akr.\": \"Akranes\",\n Ak: \"Akureyri\",\n \"Ak.\": \"Akureyri\",\n\n // Directions\n n: \"norður\",\n \"n.\": \"norður\",\n s: \"suður\",\n \"s.\": \"suður\",\n a: \"austur\",\n \"a.\": \"austur\",\n v: \"vestur\",\n \"v.\": \"vestur\",\n na: \"norðaustur\",\n \"n.a.\": \"norðaustur\",\n nv: \"norðvestur\",\n \"n.v.\": \"norðvestur\",\n sa: \"suðaustur\",\n \"s.a.\": \"suðaustur\",\n sv: \"suðvestur\",\n \"s.v.\": \"suðvestur\",\n\n // Measurements (that don't have SI unit meanings)\n þús: \"þúsund\",\n \"þús.\": \"þúsund\",\n millj: \"milljón\",\n \"millj.\": \"milljón\",\n mljó: \"milljón\",\n \"mljó.\": \"milljón\",\n ma: \"milljarður\",\n \"ma.\": \"milljarður\",\n mrð: \"milljarður\",\n \"mrð.\": \"milljarður\",\n};\n\n/**\n * Abbreviations that can end a sentence (followed by period)\n */\nexport const FINISHER_ABBREVIATIONS = new Set([\n \"o.fl\",\n \"o.s.frv\",\n \"o.þ.h\",\n \"þ.e\",\n \"þ.e.a.s\",\n \"m.t.t\",\n \"n.k\",\n]);\n\n/**\n * Check if an abbreviation exists\n */\nexport function hasAbbreviation(text: string): boolean {\n return text in ABBREVIATIONS;\n}\n\n/**\n * Get the expansion of an abbreviation\n */\nexport function getAbbreviationMeaning(text: string): string | undefined {\n return ABBREVIATIONS[text];\n}\n","/**\n * Particles: Coalesce abbreviations, currency+number, etc.\n *\n * This stage combines tokens that belong together:\n * - Abbreviation + period\n * - Currency symbol + number → amount\n * - Number + currency code → amount\n */\n\nimport type { Token, TokenSpan } from \"../types.js\";\nimport { CURRENCY_SYMBOLS, CURRENCY_ABBREV, AMOUNT_ABBREV } from \"../data/units.js\";\nimport { ABBREVIATIONS } from \"../data/abbreviations.js\";\n\n/** Merge spans from two tokens */\nfunction mergeSpans(first: Token, last: Token): TokenSpan | undefined {\n if (first.span && last.span) {\n return { start: first.span.start, end: last.span.end };\n }\n return first.span ?? last.span;\n}\n\n/**\n * Process particles: combine related tokens\n */\nexport function processParticles(tokens: Token[]): Token[] {\n const result: Token[] = [];\n let i = 0;\n\n while (i < tokens.length) {\n const token = tokens[i];\n const next = tokens[i + 1];\n\n // Word + period → check if it's an abbreviation\n if (token.kind === \"word\" && next?.kind === \"punctuation\" && next.text === \".\") {\n const abbrevWithPeriod = token.text + \".\";\n if (abbrevWithPeriod in ABBREVIATIONS || abbrevWithPeriod in AMOUNT_ABBREV) {\n result.push({ kind: \"word\", text: abbrevWithPeriod, span: mergeSpans(token, next) });\n i += 2;\n continue;\n }\n }\n\n // Currency symbol + number → amount (e.g., $100)\n if (token.kind === \"punctuation\" && token.text in CURRENCY_SYMBOLS && next?.kind === \"number\") {\n const iso = CURRENCY_SYMBOLS[token.text];\n result.push({\n kind: \"amount\",\n text: token.text + next.text,\n value: next.value,\n currency: iso,\n span: mergeSpans(token, next),\n });\n i += 2;\n continue;\n }\n\n // Number + currency code → amount (e.g., 100 USD, 100 kr.)\n if (token.kind === \"number\" && next?.kind === \"word\") {\n const currencyText = next.text;\n if (CURRENCY_ABBREV.has(currencyText)) {\n result.push({\n kind: \"amount\",\n text: token.text + \" \" + next.text,\n value: token.value,\n currency: currencyText,\n span: mergeSpans(token, next),\n });\n i += 2;\n continue;\n }\n // Check for ISK abbreviations (kr., m.kr., etc.)\n if (currencyText in AMOUNT_ABBREV) {\n const multiplier = AMOUNT_ABBREV[currencyText];\n result.push({\n kind: \"amount\",\n text: token.text + \" \" + next.text,\n value: token.value * multiplier,\n currency: \"ISK\",\n span: mergeSpans(token, next),\n });\n i += 2;\n continue;\n }\n }\n\n // Percent word after number\n if (\n token.kind === \"number\" &&\n next?.kind === \"word\" &&\n [\"prósent\", \"prósenta\", \"prósenti\", \"hundraðshluti\"].includes(next.text.toLowerCase())\n ) {\n result.push({\n kind: \"percent\",\n text: token.text + \" \" + next.text,\n value: token.value,\n span: mergeSpans(token, next),\n });\n i += 2;\n continue;\n }\n\n // Date + time → timestamp\n if ((token.kind === \"date\" || token.kind === \"dateabs\") && next?.kind === \"time\") {\n result.push({\n kind: \"timestamp\",\n text: token.text + \" \" + next.text,\n year: token.year,\n month: token.month,\n day: token.day,\n hour: next.hour,\n minute: next.minute,\n second: next.second,\n span: mergeSpans(token, next),\n });\n i += 2;\n continue;\n }\n\n // Default: pass through\n result.push(token);\n i++;\n }\n\n return result;\n}\n","/**\n * Sentences: Add sentence boundary markers\n *\n * This stage detects sentence boundaries and inserts S_BEGIN/S_END markers.\n */\n\nimport type { Token } from \"../types.js\";\nimport {\n END_OF_SENTENCE,\n SENTENCE_FINISHERS,\n PUNCT_COMBINATIONS,\n MONTHS,\n} from \"../data/constants.js\";\nimport { isRomanNumeral } from \"../data/patterns.js\";\nimport { CURRENCY_ABBREV } from \"../data/units.js\";\n\n/**\n * Check if the next token could be ending a sentence or starting a new one\n */\nfunction couldBeEndOfSentence(nextToken: Token): boolean {\n // Sentence markers definitely end/start\n if (nextToken.kind === \"s_end\" || nextToken.kind === \"s_split\") {\n return true;\n }\n\n // Uppercase word (except month names and roman numerals)\n if (nextToken.kind === \"word\" && nextToken.text.length > 0) {\n const firstChar = nextToken.text[0];\n if (firstChar === firstChar.toUpperCase() && firstChar !== firstChar.toLowerCase()) {\n // It's capitalized\n const lower = nextToken.text.toLowerCase();\n // Don't treat month names as sentence starters\n if (lower in MONTHS) return false;\n // Don't treat roman numerals as sentence starters\n if (isRomanNumeral(nextToken.text)) return false;\n // Don't treat currency abbreviations as sentence starters\n if (CURRENCY_ABBREV.has(nextToken.text)) return false;\n return true;\n }\n }\n\n return false;\n}\n\n/**\n * Add sentence boundary markers\n */\nexport function addSentenceMarkers(tokens: Token[]): Token[] {\n if (tokens.length === 0) return [];\n\n const result: Token[] = [];\n let inSentence = false;\n let i = 0;\n\n const beginSentence = (): Token => ({ kind: \"s_begin\", text: null });\n const endSentence = (): Token => ({ kind: \"s_end\", text: null });\n\n while (i < tokens.length) {\n const token = tokens[i];\n const next = tokens[i + 1];\n\n // Handle sentence split marker\n if (token.kind === \"s_split\") {\n if (inSentence) {\n result.push(endSentence());\n inSentence = false;\n }\n // Don't emit the split marker itself\n i++;\n continue;\n }\n\n // Start a new sentence if needed\n if (!inSentence) {\n result.push(beginSentence());\n inSentence = true;\n }\n\n // Check for sentence-ending punctuation\n if (token.kind === \"punctuation\" && END_OF_SENTENCE.has(token.normalized)) {\n // Handle ellipsis mid-sentence (don't end if next token doesn't look like sentence start)\n if (token.normalized === \"…\" && next && !couldBeEndOfSentence(next)) {\n result.push(token);\n i++;\n continue;\n }\n\n // Combine consecutive punctuation (??!, etc.)\n let combinedText = token.text;\n let j = i + 1;\n while (j < tokens.length) {\n const nextTok = tokens[j];\n if (nextTok.kind !== \"punctuation\") break;\n if (!PUNCT_COMBINATIONS.has(nextTok.normalized)) break;\n combinedText += nextTok.text;\n j++;\n }\n\n // Emit combined punctuation if any\n if (j > i + 1) {\n result.push({ ...token, text: combinedText });\n i = j;\n } else {\n result.push(token);\n i++;\n }\n\n // Collect any sentence finishers (closing quotes, brackets)\n while (i < tokens.length) {\n const tok = tokens[i];\n if (tok.kind !== \"punctuation\") break;\n if (!SENTENCE_FINISHERS.has(tok.normalized)) break;\n result.push(tok);\n i++;\n }\n\n // End the sentence\n result.push(endSentence());\n inSentence = false;\n continue;\n }\n\n // Regular token\n result.push(token);\n i++;\n }\n\n // Close any open sentence\n if (inSentence) {\n result.push(endSentence());\n }\n\n return result;\n}\n","/**\n * Phrases: Combine date+year, ordinal+month, clock+time, compounds, etc.\n *\n * This stage combines tokens that form higher-level constructs:\n * - \"5. mars\" → date\n * - \"2024\" after date → add year to date\n * - \"kl. 14:30\" → time with prefix\n * - \"1920 f.Kr.\" → year BCE\n * - \"stjórnskipunar- og eftirlitsnefnd\" → compound word\n */\n\nimport type { Token, TokenSpan } from \"../types.js\";\n\n/** Merge spans from first and last tokens */\nfunction mergeSpans(first: Token, last: Token): TokenSpan | undefined {\n if (first.span && last.span) {\n return { start: first.span.start, end: last.span.end };\n }\n return first.span ?? last.span;\n}\nimport {\n MONTHS,\n MONTH_BLACKLIST,\n CE,\n BCE,\n CLOCK_ABBREVS,\n CLOCK_NUMBERS,\n HYPHEN,\n EN_DASH,\n} from \"../data/constants.js\";\nimport { FINISHER_ABBREVIATIONS } from \"../data/abbreviations.js\";\n\n/** Hyphens that can appear in compound words */\nconst COMPOSITE_HYPHENS = new Set([HYPHEN, EN_DASH]);\n\n/**\n * Get month number from token, or null if not a month\n */\nfunction getMonth(token: Token, afterOrdinal = false): number | null {\n if (token.kind !== \"word\") return null;\n // Check blacklist (Ágúst as a name)\n if (!afterOrdinal && MONTH_BLACKLIST.has(token.text)) return null;\n const lower = token.text.toLowerCase();\n return MONTHS[lower] ?? null;\n}\n\n/**\n * Check if a token is a composite hyphen (- or –)\n */\nfunction isCompositeHyphen(token: Token): boolean {\n return token.kind === \"punctuation\" && COMPOSITE_HYPHENS.has(token.text);\n}\n\n/**\n * Try to parse a compound word pattern starting at index i.\n * Pattern: (word- [,])+ (og|eða) word\n * Examples:\n * - \"stjórnskipunar- og eftirlitsnefnd\"\n * - \"dómsmála-, viðskipta- og iðnaðarráðherra\"\n *\n * Returns [combined token, new index] or null if no match.\n */\nfunction tryParseCompound(tokens: Token[], startIndex: number): [Token, number] | null {\n const prefixes: Token[] = [];\n let i = startIndex;\n\n // Accumulate prefix patterns: word + hyphen [+ comma]\n while (i < tokens.length) {\n const word = tokens[i];\n const hyphen = tokens[i + 1];\n\n // Must be word followed by composite hyphen\n if (word?.kind !== \"word\" || !hyphen || !isCompositeHyphen(hyphen)) {\n break;\n }\n\n prefixes.push(word);\n prefixes.push(hyphen);\n i += 2;\n\n // Check for optional comma\n const maybeComma = tokens[i];\n if (maybeComma?.kind === \"punctuation\" && maybeComma.text === \",\") {\n prefixes.push(maybeComma);\n i++;\n }\n }\n\n // Must have at least one prefix\n if (prefixes.length === 0) {\n return null;\n }\n\n // Next must be \"og\" or \"eða\"\n const conjunction = tokens[i];\n if (\n !conjunction ||\n conjunction.kind !== \"word\" ||\n (conjunction.text.toLowerCase() !== \"og\" && conjunction.text.toLowerCase() !== \"eða\")\n ) {\n return null;\n }\n\n // After conjunction must be a word (the suffix)\n const suffix = tokens[i + 1];\n if (!suffix || suffix.kind !== \"word\") {\n return null;\n }\n\n // Build the combined text\n // Join all parts: \"stjórnskipunar\", \"-\", \"og\", \"eftirlitsnefnd\"\n // Then normalize spacing: remove space before hyphen/comma\n const parts = [...prefixes, conjunction, suffix];\n let text = parts.map((t) => t.text).join(\" \");\n text = text.replace(/ -/g, \"-\").replace(/ ,/g, \",\");\n\n // Merge span from first prefix to suffix\n const span = mergeSpans(tokens[startIndex], suffix);\n return [{ kind: \"word\", text, span }, i + 2];\n}\n\n/**\n * Process phrases: combine date/time constructs and compound words\n */\nexport function processPhrases(tokens: Token[]): Token[] {\n const result: Token[] = [];\n let i = 0;\n\n while (i < tokens.length) {\n const token = tokens[i];\n const next = tokens[i + 1];\n\n // Try compound word pattern first\n const compound = tryParseCompound(tokens, i);\n if (compound) {\n result.push(compound[0]);\n i = compound[1];\n continue;\n }\n\n // Word + \".\" → check if it's an abbreviation that ends sentences\n if (token.kind === \"word\" && next?.kind === \"punctuation\" && next.text === \".\") {\n const base = token.text.replace(/\\.$/, \"\");\n if (FINISHER_ABBREVIATIONS.has(base)) {\n // Coalesce abbreviation with period\n result.push({ kind: \"word\", text: token.text + \".\", span: mergeSpans(token, next) });\n i += 2;\n continue;\n }\n }\n\n // Year/number + \"e.Kr.\" or \"f.Kr.\" → year with era\n if ((token.kind === \"year\" || token.kind === \"number\") && next?.kind === \"word\") {\n const val = token.kind === \"year\" ? token.value : token.value;\n let newVal: number | null = null;\n if (BCE.has(next.text)) {\n newVal = -val;\n } else if (CE.has(next.text)) {\n newVal = val;\n }\n if (newVal !== null) {\n let text = token.text + \" \" + next.text;\n let lastToken: Token = next;\n i += 2;\n // Handle trailing period\n if (tokens[i]?.kind === \"punctuation\" && tokens[i].text === \".\") {\n text += \".\";\n lastToken = tokens[i];\n i++;\n }\n result.push({ kind: \"year\", text, value: newVal, span: mergeSpans(token, lastToken) });\n continue;\n }\n }\n\n // Ordinal/number + month name → date\n if ((token.kind === \"ordinal\" || token.kind === \"number\") && next?.kind === \"word\") {\n const month = getMonth(next, true);\n if (month !== null) {\n const day = token.kind === \"ordinal\" ? token.value : token.value;\n result.push({\n kind: \"daterel\",\n text: token.text + \" \" + next.text,\n year: 0,\n month,\n day,\n span: mergeSpans(token, next),\n });\n i += 2;\n continue;\n }\n }\n\n // Date + year → add year to date\n if (\n (token.kind === \"date\" || token.kind === \"daterel\") &&\n token.year === 0 &&\n next?.kind === \"number\"\n ) {\n const year = next.value;\n if (year >= 1776 && year <= 2100) {\n result.push({\n kind: \"dateabs\",\n text: token.text + \" \" + next.text,\n year,\n month: token.month,\n day: token.day,\n span: mergeSpans(token, next),\n });\n i += 2;\n continue;\n }\n }\n\n // Date + year token\n if (\n (token.kind === \"date\" || token.kind === \"daterel\") &&\n token.year === 0 &&\n next?.kind === \"year\"\n ) {\n result.push({\n kind: \"dateabs\",\n text: token.text + \" \" + next.text,\n year: next.value,\n month: token.month,\n day: token.day,\n span: mergeSpans(token, next),\n });\n i += 2;\n continue;\n }\n\n // Clock abbreviation + time → time (keep as-is but combine text)\n if (\n token.kind === \"word\" &&\n CLOCK_ABBREVS.has(token.text.toLowerCase()) &&\n next?.kind === \"time\"\n ) {\n result.push({\n ...next,\n text: token.text + \" \" + next.text,\n span: mergeSpans(token, next),\n });\n i += 2;\n continue;\n }\n\n // Clock abbreviation + spelled-out time (kl. tvö → time)\n if (\n token.kind === \"word\" &&\n CLOCK_ABBREVS.has(token.text.toLowerCase()) &&\n next?.kind === \"word\"\n ) {\n const timeValue = CLOCK_NUMBERS[next.text.toLowerCase()];\n if (timeValue) {\n result.push({\n kind: \"time\",\n text: token.text + \" \" + next.text,\n hour: timeValue[0],\n minute: timeValue[1],\n second: timeValue[2],\n span: mergeSpans(token, next),\n });\n i += 2;\n continue;\n }\n }\n\n // Default: pass through\n result.push(token);\n i++;\n }\n\n return result;\n}\n","/**\n * Main tokenize function\n *\n * Chains the pipeline stages to produce a stream of tokens.\n */\n\nimport type { Token, TokenizeOptions } from \"./types.js\";\nimport { lex } from \"./pipeline/lexer.js\";\nimport { processParticles } from \"./pipeline/particles.js\";\nimport { addSentenceMarkers } from \"./pipeline/sentences.js\";\nimport { processPhrases } from \"./pipeline/phrases.js\";\n\n/**\n * Tokenize Icelandic text into an array of tokens.\n *\n * @param text - The text to tokenize\n * @param options - Tokenization options\n * @returns Array of tokens\n *\n * @example\n * ```ts\n * const tokens = tokenize(\"Þetta er próf.\");\n * // → [word(\"Þetta\"), word(\"er\"), word(\"próf\"), punctuation(\".\")]\n *\n * // With sentence markers:\n * const tokens = tokenize(\"Þetta er próf.\", { includeSentenceMarkers: true });\n * // → [s_begin, word(\"Þetta\"), word(\"er\"), word(\"próf\"), punctuation(\".\"), s_end]\n * ```\n */\nexport function tokenize(text: string, options: TokenizeOptions = {}): Token[] {\n const {\n replaceCompositeGlyphs = true,\n includeSentenceMarkers = false,\n includeOffsets = false,\n } = options;\n\n // Pipeline:\n // 1. Lexer: split text into initial tokens\n let tokens = lex(text, replaceCompositeGlyphs, includeOffsets);\n\n // 2. Particles: coalesce abbreviations, currency+number\n tokens = processParticles(tokens);\n\n // 3. Phrases: combine date+year, ordinal+month, etc.\n tokens = processPhrases(tokens);\n\n // 4. Sentences: add boundary markers if requested\n if (includeSentenceMarkers) {\n tokens = addSentenceMarkers(tokens);\n } else {\n // Filter out internal sentence markers (s_split)\n tokens = tokens.filter((t) => t.kind !== \"s_split\");\n }\n\n return tokens;\n}\n","/**\n * Split text into sentences\n *\n * A higher-level function that returns sentence strings rather than tokens.\n */\n\nimport { tokenize } from \"./tokenize.js\";\nimport type { Token } from \"./types.js\";\n\n/**\n * Split Icelandic text into an array of sentence strings.\n *\n * @param text - The text to split\n * @returns Array of sentence strings\n *\n * @example\n * ```ts\n * const sentences = splitIntoSentences(\"Þetta er fyrsta setning. Þetta er önnur.\");\n * // → [\"Þetta er fyrsta setning.\", \"Þetta er önnur.\"]\n * ```\n */\nexport function splitIntoSentences(text: string): string[] {\n const tokens = tokenize(text, { includeSentenceMarkers: true });\n const sentences: string[] = [];\n let currentSentence: string[] = [];\n\n for (const token of tokens) {\n if (token.kind === \"s_begin\") {\n currentSentence = [];\n } else if (token.kind === \"s_end\") {\n if (currentSentence.length > 0) {\n sentences.push(joinTokens(currentSentence));\n }\n currentSentence = [];\n } else if (token.text !== null) {\n currentSentence.push(getTokenText(token));\n }\n }\n\n // Handle any remaining tokens (shouldn't happen with proper markers)\n if (currentSentence.length > 0) {\n sentences.push(joinTokens(currentSentence));\n }\n\n return sentences;\n}\n\n/**\n * Get display text from a token\n */\nfunction getTokenText(token: Token): string {\n if (token.kind === \"punctuation\") {\n return token.normalized;\n }\n return token.text ?? \"\";\n}\n\n/**\n * Join token texts with appropriate spacing\n */\nfunction joinTokens(texts: string[]): string {\n if (texts.length === 0) return \"\";\n\n let result = texts[0];\n\n for (let i = 1; i < texts.length; i++) {\n const prev = texts[i - 1];\n const curr = texts[i];\n\n // Determine if we need a space\n const needsSpace = shouldAddSpace(prev, curr);\n if (needsSpace) {\n result += \" \" + curr;\n } else {\n result += curr;\n }\n }\n\n return result;\n}\n\n/**\n * Determine if a space should be added between two tokens\n */\nfunction shouldAddSpace(prev: string, curr: string): boolean {\n if (!prev || !curr) return false;\n\n const lastChar = prev[prev.length - 1];\n const firstChar = curr[0];\n\n // Opening punctuation: no space after\n // ( [ „ ‚ « <\n const openingPunct = new Set([\"(\", \"[\", \"\\u201E\", \"\\u201A\", \"\\u00AB\", \"<\"]);\n if (openingPunct.has(lastChar)) return false;\n\n // Closing/ending punctuation: no space before\n // . , ; : ! ? ) ] \" ' » > …\n const closingPunct = new Set([\n \".\",\n \",\",\n \";\",\n \":\",\n \"!\",\n \"?\",\n \")\",\n \"]\",\n \"\\u201C\", // \"\n \"\\u2019\", // '\n \"\\u00BB\", // »\n \">\",\n \"\\u2026\", // …\n ]);\n if (closingPunct.has(firstChar)) return false;\n\n // Hyphen handling\n if (lastChar === \"-\" || firstChar === \"-\") return false;\n\n return true;\n}\n"],"mappings":"AAGA,MAAa,EAA+C,CAE1D,GAAW,IACX,GAAW,IACX,GAAW,IACX,GAAW,IACX,GAAW,IACX,GAAW,IACX,GAAW,IACX,GAAW,IACX,GAAW,IACX,GAAW,IACX,GAAW,IACX,GAAW,IAEX,GAAW,IACX,GAAW,IACX,GAAW,IACX,GAAW,IACX,GAAW,IACX,GAAW,IACX,GAAW,IACX,GAAW,IAEX,IAAU,GACV,IAAU,GACV,IAAU,GACX,CAQY,EAAU,MAQV,EAAmB,eAMA,GAAuB,EAErD,EAAmB,GAnDrB,MAqDa,EAAkB,IAAI,IAAI,qDAAY,CACtC,EAAuB,IAAI,IAAI,EAAiB,CAChD,EAAwB,IAAI,IAAI,qBAAkB,CAClD,EAAuB,IAAI,IAAI,cAAiB,CAMhD,EAAgB,QAEhB,EAAgB,SAahB,EAAkB,IAAI,IAAI,CAAC,IAAK,IAAK,IAAK,IAAS,CAAC,CACpD,EAAqB,IAAI,IAAI,CACxC,IACA,IACA,IACA,IACA,IACA,IACA,IACA,MACD,CAAC,CAeW,EAAqB,IAAI,IAAI,CAAC,IAAK,IAAK,IAAS,CAAC,CAKlD,EAAS,IAAI,IAAI,aAAa,CAC9B,EAAc,IAAI,IAAI,CAAC,IAAK,IAAI,CAAC,CAKjC,EAAiC,CAC5C,OAAe,EACf,QAAgB,EAChB,QAAgB,EAChB,SAAiB,EACjB,KAAM,EACN,MAAc,EACd,OAAe,EACf,IAAY,EACZ,KAAa,EACb,KAAkB,EAClB,MAAmB,EACnB,KAAkB,EAClB,MAAmB,EACnB,MAAmB,EACnB,OAAoB,EACpB,UAAW,EACX,WAAY,EACZ,QAAgB,GAChB,SAAiB,GACjB,SAAiB,GACjB,UAAkB,GAClB,SAAU,GACV,UAAW,GAEX,OAAQ,EACR,OAAQ,EACR,OAAQ,EACR,OAAQ,EACR,OAAa,EACb,OAAa,EACb,MAAY,EACZ,OAAkB,EAClB,OAAQ,EACR,QAAS,EACT,OAAQ,GACR,OAAa,GACb,OAAQ,GACR,IAAK,EACL,IAAK,EACL,IAAK,EACL,IAAK,EACL,IAAY,EACZ,IAAY,EACZ,GAAW,EACX,IAAiB,EACjB,IAAK,EACL,KAAM,EACN,IAAK,GACL,IAAY,GACZ,IAAK,GACN,CAKY,EAAkB,IAAI,IAAI,CAAC,QAAkB,CAAC,CAK9C,EAAgB,CAAC,EAAG,GAAI,GAAI,GAAI,GAAI,GAAI,GAAI,GAAI,GAAI,GAAI,GAAI,GAAI,GAAG,CAKnE,EAAgB,IAAI,IAAI,CAAC,KAAM,MAAO,UAAU,CAAC,CAKjD,GAA0D,CACrE,KAAM,CAAC,EAAG,EAAG,EAAE,CACf,IAAY,CAAC,EAAG,EAAG,EAAE,CACrB,KAAkB,CAAC,EAAG,EAAG,EAAE,CAC3B,OAAe,CAAC,EAAG,EAAG,EAAE,CACxB,KAAM,CAAC,EAAG,EAAG,EAAE,CACf,IAAK,CAAC,EAAG,EAAG,EAAE,CACd,IAAY,CAAC,EAAG,EAAG,EAAE,CACrB,KAAa,CAAC,EAAG,EAAG,EAAE,CACtB,IAAY,CAAC,EAAG,EAAG,EAAE,CACrB,IAAY,CAAC,GAAI,EAAG,EAAE,CACtB,OAAQ,CAAC,GAAI,EAAG,EAAE,CAClB,KAAa,CAAC,GAAI,EAAG,EAAE,CACvB,SAAiB,CAAC,GAAI,GAAI,EAAE,CAC5B,QAAqB,CAAC,EAAG,GAAI,EAAE,CAC/B,SAA2B,CAAC,EAAG,GAAI,EAAE,CACrC,WAAwB,CAAC,EAAG,GAAI,EAAE,CAClC,SAAiB,CAAC,EAAG,GAAI,EAAE,CAC3B,QAAgB,CAAC,EAAG,GAAI,EAAE,CAC1B,QAAqB,CAAC,EAAG,GAAI,EAAE,CAC/B,SAAsB,CAAC,EAAG,GAAI,EAAE,CAChC,QAAqB,CAAC,EAAG,GAAI,EAAE,CAC/B,QAAqB,CAAC,EAAG,GAAI,EAAE,CAC/B,WAAmB,CAAC,GAAI,GAAI,EAAE,CAC9B,SAAsB,CAAC,GAAI,GAAI,EAAE,CAClC,CAKY,EAAK,IAAI,IAAI,CAAC,OAAQ,QAAQ,CAAC,CAC/B,EAAM,IAAI,IAAI,CAAC,OAAQ,QAAQ,CAAC,CACvB,IAAI,IAAI,CAAC,GAAG,EAAI,GAAG,EAAI,CAAC,CCxN9C,MAAa,EAA2C,CACtD,EAAG,MACH,IAAK,MACL,IAAK,MACL,IAAK,MACL,IAAK,MACN,CAKY,EAAkB,IAAI,IAAI,CACrC,MACA,MACA,MACA,MACA,MACA,MACA,MACA,MACA,MACA,MACA,MACA,MACA,MACA,MACA,MACA,MACA,MACA,MACA,MACA,MACA,MACA,MACD,CAAC,CAKW,EAAwC,CACnD,MAAO,EACP,GAAI,EACJ,OAAQ,EACR,QAAS,IACT,OAAQ,IACR,UAAW,IACX,SAAU,IACV,QAAS,IACT,OAAQ,IACR,OAAQ,IACR,IAAK,IACL,YAAa,IACb,WAAY,IACZ,SAAU,IACV,QAAS,IACT,WAAY,IACZ,UAAW,IACZ,CAQY,EAA6C,CAExD,EAAG,CAAC,IAAK,EAAI,CACb,GAAI,CAAC,IAAK,KAAO,CACjB,GAAI,CAAC,IAAK,KAAO,CACjB,GAAI,CAAC,IAAK,IAAO,CACjB,GAAI,CAAC,IAAK,IAAO,CACjB,GAAI,CAAC,IAAK,IAAM,CAChB,GAAI,CAAC,IAAK,MAAO,CACjB,GAAI,CAAC,IAAK,QAAQ,CAElB,KAAM,CAAC,KAAM,EAAI,CACjB,GAAI,CAAC,KAAM,EAAI,CACf,MAAO,CAAC,KAAM,IAAM,CACpB,MAAO,CAAC,KAAM,IAAO,CACrB,GAAI,CAAC,KAAM,IAAM,CAEjB,KAAM,CAAC,KAAM,EAAI,CACjB,MAAO,CAAC,KAAM,KAAO,CACrB,MAAO,CAAC,KAAM,IAAM,CACpB,EAAG,CAAC,KAAM,KAAO,CACjB,IAAK,CAAC,KAAM,KAAO,CACnB,GAAI,CAAC,KAAM,KAAO,CAClB,GAAI,CAAC,KAAM,KAAO,CAClB,GAAI,CAAC,KAAM,KAAO,CAClB,IAAK,CAAC,KAAM,UAAW,CACvB,IAAK,CAAC,KAAM,cAAiB,CAE7B,EAAG,CAAC,IAAK,EAAI,CACb,KAAM,CAAC,IAAK,EAAI,CAEhB,EAAG,CAAC,KAAM,KAAO,CACjB,GAAI,CAAC,KAAM,KAAO,CAClB,GAAI,CAAC,KAAM,EAAI,CACf,EAAG,CAAC,KAAM,IAAM,CAChB,GAAI,CAAC,KAAM,KAAO,CAClB,GAAI,CAAC,KAAM,KAAO,CAClB,GAAI,CAAC,KAAM,IAAM,CACjB,GAAI,CAAC,KAAM,QAAS,CAEpB,EAAG,CAAC,IAAK,EAAI,CACb,GAAI,CAAC,IAAK,KAAO,CACjB,GAAI,CAAC,IAAK,KAAO,CACjB,KAAM,CAAC,IAAK,KAAO,CACnB,IAAK,CAAC,IAAK,GAAK,CAEhB,EAAG,CAAC,IAAK,EAAI,CACb,GAAI,CAAC,IAAK,IAAM,CAEhB,GAAI,CAAC,IAAK,EAAI,CACd,EAAG,CAAC,IAAK,EAAI,CACb,GAAI,CAAC,IAAK,IAAM,CAChB,GAAI,CAAC,IAAK,IAAM,CAChB,GAAI,CAAC,IAAK,IAAM,CAChB,GAAI,CAAC,IAAK,aAAO,CACjB,IAAK,CAAC,IAAK,KAAM,CACjB,IAAK,CAAC,IAAK,KAAM,CACjB,KAAM,CAAC,IAAK,KAAM,CAClB,KAAM,CAAC,IAAK,KAAM,CAClB,KAAM,CAAC,IAAK,KAAK,CACjB,IAAK,CAAC,IAAK,MAAM,CAEjB,EAAG,CAAC,IAAK,EAAI,CACb,GAAI,CAAC,IAAK,KAAO,CACjB,GAAI,CAAC,IAAK,IAAM,CAChB,GAAI,CAAC,IAAK,IAAM,CAChB,GAAI,CAAC,IAAK,IAAM,CAChB,GAAI,CAAC,IAAK,aAAO,CAEjB,EAAG,CAAC,IAAK,EAAI,CACb,GAAI,CAAC,IAAK,KAAO,CACjB,GAAI,CAAC,IAAK,IAAM,CAEhB,EAAG,CAAC,IAAK,EAAI,CACb,GAAI,CAAC,IAAK,KAAO,CAEjB,GAAI,CAAC,KAAM,EAAI,CACf,IAAK,CAAC,KAAM,IAAM,CAClB,IAAK,CAAC,KAAM,IAAM,CAClB,IAAK,CAAC,KAAM,IAAM,CAElB,GAAI,CAAC,KAAM,EAAI,CACf,IAAK,CAAC,KAAM,IAAM,CAElB,IAAK,CAAC,IAAK,EAAI,CAEf,IAAK,CAAC,IAAK,EAAI,CACf,IAAK,CAAC,IAAK,GAAI,CAChB,CAEY,EAAe,IAAI,IAAI,OAAO,KAAK,EAAS,CAAC,CAK1D,SAAS,GAA0B,CAEjC,IAAM,EADQ,OAAO,KAAK,EAAS,CAAC,MAAM,EAAG,IAAM,EAAE,OAAS,EAAE,OAAO,CAChD,IAAK,GAAS,CACnC,IAAM,EAAU,EAAK,QAAQ,sBAAuB,OAAO,CAE3D,OAAO,EAAK,EAAK,OAAS,GAAG,MAAM,WAAW,CAAG,GAAG,EAAQ,SAAW,GACvE,CACF,OAAW,OAAO,KAAK,EAAS,KAAK,IAAI,CAAC,GAAI,IAAI,CAGpD,MAAa,EAAiB,GAAiB,CAK/C,SAAS,IAA6B,CAEpC,IAAM,EADU,OAAO,KAAK,EAAiB,CAAC,MAAM,EAAG,IAAM,EAAE,OAAS,EAAE,OAAO,CACxD,IAAK,GAAM,EAAE,QAAQ,sBAAuB,OAAO,CAAC,CAC7E,OAAW,OAAO,KAAK,EAAS,KAAK,IAAI,CAAC,GAAI,IAAI,CAGtB,IAAoB,CAKlD,SAAS,IAAiC,CAIxC,IAAM,EAHW,CAAC,GAAG,OAAO,KAAK,EAAS,CAAE,GAAG,OAAO,KAAK,EAAiB,CAAC,CAAC,MAC3E,EAAG,IAAM,EAAE,OAAS,EAAE,OACxB,CACyB,IAAK,GAAS,CACtC,IAAM,EAAU,EAAK,QAAQ,sBAAuB,OAAO,CAC3D,OAAO,EAAK,EAAK,OAAS,GAAG,MAAM,WAAW,CAAG,GAAG,EAAQ,SAAW,GACvE,CACF,OAAW,OAAO,IAAI,EAAS,KAAK,IAAI,CAAC,IAAI,CAGrB,IAAwB,CCnMlD,MAAa,GAAc,2CACd,GAAW,mCACX,GAAU,2BAGV,GAAW,uCACX,GAAW,+CACX,GAAU,0BACV,GAAU,4BAWV,GAAqB,0BAGrB,EAAQ,2CAGR,EAAa,mDAGb,EAAS,gEAGT,EAAU,SAGV,EAAW,gBAGX,EAAgB,2DAkF7B,SAAgB,EAAe,EAAoB,CACjD,OAAO,EAAc,KAAK,EAAE,CAM9B,MAAa,EAAgB,+CAMhB,EAAW,sCAIX,EAAM,0BAIN,EAAgB,8BAIhB,EAAgB,qEAChB,EAAmB,6DAInB,EAAiB,6BACjB,EAAgB,4BAChB,EAAgB,4BAKhB,EAAkB,IAAI,IAAI,gWAuHtC,CAAC,CAMF,SAAgB,GAAkB,EAAyB,CAIzD,GAHI,EAAO,SAAW,IAGlB,CAAC,WAAW,KAAK,EAAO,CAAE,MAAO,GAGrC,IAAM,EAAK,SAAS,EAAO,GAAI,GAAG,CAC5B,EAAK,SAAS,EAAO,GAAI,GAAG,CAC5B,EAAK,SAAS,EAAO,GAAI,GAAG,CAC5B,EAAK,SAAS,EAAO,GAAI,GAAG,CAC5B,EAAK,SAAS,EAAO,GAAI,GAAG,CAC5B,EAAK,SAAS,EAAO,GAAI,GAAG,CAC5B,EAAK,SAAS,EAAO,GAAI,GAAG,CAC5B,EAAK,SAAS,EAAO,GAAI,GAAG,CAC5B,EAAa,SAAS,EAAO,GAAI,GAAG,CACpC,EAAU,SAAS,EAAO,GAAI,GAAG,CAGvC,GAAI,IAAY,GAAK,IAAY,EAAG,MAAO,GAG3C,IAAM,EAAM,EAAK,GAAK,EAChB,EAAQ,EAAK,GAAK,EAIlB,EADY,EAAM,GACM,EAAM,GAAK,EAEzC,GADI,EAAY,GAAK,EAAY,IAC7B,EAAQ,GAAK,EAAQ,GAAI,MAAO,GAIpC,IAAM,GADM,EAAI,EAAK,EAAI,EAAK,EAAI,EAAK,EAAI,EAAK,EAAI,EAAK,EAAI,EAAK,EAAI,EAAK,EAAI,GACvD,GAClB,EAAW,IAAc,EAAI,EAAI,GAAK,EAK5C,OAFI,IAAa,GAAW,GAErB,IAAe,EAQxB,SAAgB,EAAgB,EAAoB,CAElD,GAAI,CAAC,EAAS,KAAK,EAAE,CAAE,MAAO,GAG9B,IAAM,EAAe,sBACjB,EACA,EAAe,EAEnB,MAAQ,EAAQ,EAAa,KAAK,EAAE,IAAM,MAAM,CAC9C,IAAM,EAAU,EAAM,GACtB,GAAI,CAAC,EAAgB,IAAI,EAAQ,CAC/B,MAAO,GAET,IAIF,OAAO,GAAgB,ECpSzB,SAAgB,EAAiB,EAAsB,CACrD,IAAI,EAAS,EACb,IAAK,GAAM,CAAC,EAAM,KAAO,OAAO,QAAQ,EAAqB,CAC3D,EAAS,EAAO,WAAW,EAAM,EAAG,CAEtC,OAAO,EAMT,SAAS,EAAY,EAAW,EAAW,EAAoB,CAS7D,MAJA,EAJI,EAAI,MAAQ,EAAI,MAChB,EAAI,GAAK,EAAI,IACb,EAAI,GAAK,EAAI,EAAc,IAE3B,IAAM,GAAK,IAAM,IAEf,EADY,EAAI,GAAM,GAAK,EAAI,KAAQ,GAAM,EAAI,KAAQ,IASjE,SAAS,GAAa,EAA+B,CAInD,OAHI,EAAqB,IAAI,EAAK,CAAS,OACvC,EAAsB,IAAI,EAAK,CAAS,QACxC,EAAqB,IAAI,EAAK,CAAS,OACpC,SAMT,SAAS,EAAM,EAAc,EAA4B,CACvD,IAAM,EAAO,GAAc,EAE3B,MAAO,CAAE,KAAM,cAAe,OAAM,WAAY,EAAM,SADrC,EAAK,SAAW,EAAI,GAAa,EAAK,CAAG,SACM,CAMlE,SAAS,EAAY,EAA4B,CAE/C,IAAI,EAAQ,EAAc,KAAK,EAAE,CACjC,GAAI,EAAO,CACT,IAAM,EAAI,SAAS,EAAM,GAAI,GAAG,CAC1B,EAAK,SAAS,EAAM,GAAI,GAAG,CAC3B,EAAI,SAAS,EAAM,GAAI,GAAG,CAC1B,EAAI,SAAS,EAAM,GAAI,GAAG,CAC1B,EAAK,SAAS,EAAM,GAAI,GAAG,CAC3B,EAAI,SAAS,EAAM,GAAI,GAAG,CAChC,GAAI,EAAY,EAAG,EAAI,EAAE,EAAI,GAAK,GAAK,EAAI,IAAM,GAAM,GAAK,EAAK,IAAM,GAAK,GAAK,EAAI,GACnF,MAAO,CACL,CACE,KAAM,YACN,KAAM,EAAM,GACZ,KAAM,EACN,MAAO,EACP,IAAK,EACL,KAAM,EACN,OAAQ,EACR,OAAQ,EACT,CACD,EAAM,GAAG,OACV,CAML,GADA,EAAQ,EAAiB,KAAK,EAAE,CAC5B,EAAO,CACT,IAAM,EAAI,SAAS,EAAM,GAAI,GAAG,CAC1B,EAAK,SAAS,EAAM,GAAI,GAAG,CAC3B,EAAI,SAAS,EAAM,GAAI,GAAG,CAC1B,EAAI,SAAS,EAAM,GAAI,GAAG,CAC1B,EAAK,SAAS,EAAM,GAAI,GAAG,CACjC,GAAI,EAAY,EAAG,EAAI,EAAE,EAAI,GAAK,GAAK,EAAI,IAAM,GAAM,GAAK,EAAK,GAC/D,MAAO,CACL,CACE,KAAM,YACN,KAAM,EAAM,GACZ,KAAM,EACN,MAAO,EACP,IAAK,EACL,KAAM,EACN,OAAQ,EACR,OAAQ,EACT,CACD,EAAM,GAAG,OACV,CAML,GADA,EAAQ,GAAY,KAAK,EAAE,CACvB,EAAO,CACT,IAAM,EAAI,SAAS,EAAM,GAAI,GAAG,CAC1B,EAAI,SAAS,EAAM,GAAI,GAAG,CAC1B,EAAI,SAAS,EAAM,GAAI,GAAG,CAChC,GAAI,GAAK,GAAK,EAAI,IAAM,GAAK,GAAK,EAAI,IAAM,GAAK,GAAK,EAAI,GACxD,MAAO,CAAC,CAAE,KAAM,OAAQ,KAAM,EAAM,GAAI,KAAM,EAAG,OAAQ,EAAG,OAAQ,EAAG,CAAE,EAAM,GAAG,OAAO,CAM7F,GADA,EAAQ,GAAS,KAAK,EAAE,CACpB,EAAO,CACT,IAAM,EAAI,SAAS,EAAM,GAAI,GAAG,CAC1B,EAAI,SAAS,EAAM,GAAI,GAAG,CAC1B,EAAI,SAAS,EAAM,GAAI,GAAG,CAChC,GAAI,GAAK,GAAK,EAAI,IAAM,GAAK,GAAK,EAAI,IAAM,GAAK,GAAK,EAAI,GACxD,MAAO,CAAC,CAAE,KAAM,OAAQ,KAAM,EAAM,GAAI,KAAM,EAAG,OAAQ,EAAG,OAAQ,EAAG,CAAE,EAAM,GAAG,OAAO,CAM7F,GADA,EAAQ,GAAQ,KAAK,EAAE,CACnB,EAAO,CACT,IAAM,EAAI,SAAS,EAAM,GAAI,GAAG,CAC1B,EAAI,SAAS,EAAM,GAAI,GAAG,CAChC,GAAI,GAAK,GAAK,EAAI,IAAM,GAAK,GAAK,EAAI,GACpC,MAAO,CAAC,CAAE,KAAM,OAAQ,KAAM,EAAM,GAAI,KAAM,EAAG,OAAQ,EAAG,OAAQ,EAAG,CAAE,EAAM,GAAG,OAAO,CAM7F,GADA,EAAQ,GAAS,KAAK,EAAE,CACpB,EAAO,CACT,IAAM,EAAI,SAAS,EAAM,GAAI,GAAG,CAC1B,EAAI,SAAS,EAAM,GAAI,GAAG,CAC1B,EAAI,SAAS,EAAM,GAAI,GAAG,CAChC,GAAI,EAAY,EAAG,EAAG,EAAE,CACtB,MAAO,CAAC,CAAE,KAAM,OAAQ,KAAM,EAAM,GAAI,KAAM,EAAG,MAAO,EAAG,IAAK,EAAG,CAAE,EAAM,GAAG,OAAO,CAMzF,GADA,EAAQ,EAAI,KAAK,EAAE,CACf,EAAO,CACT,IAAM,EAAS,EAAM,GAAK,EAAM,GAChC,GAAI,GAAkB,EAAO,CAC3B,MAAO,CAAC,CAAE,KAAM,MAAO,KAAM,EAAM,GAAI,MAAO,EAAQ,CAAE,EAAM,GAAG,OAAO,CAO5E,GADA,EAAQ,EAAc,KAAK,EAAE,CACzB,EACF,MAAO,CAAC,CAAE,KAAM,eAAgB,KAAM,EAAM,GAAI,CAAE,EAAM,GAAG,OAAO,CAKpE,IAAM,EAAa,EAAE,MAAM,iBAAiB,CAC5C,GAAI,EAAY,CACd,IAAM,EAAS,EAAW,GAC1B,MAAO,CAAC,CAAE,KAAM,QAAS,KAAM,EAAW,GAAI,GAAI,GAAI,SAAQ,CAAE,EAAW,GAAG,OAAO,CAKvF,GADA,EAAQ,GAAS,KAAK,EAAE,CACpB,EAAO,CACT,IAAI,EAAI,SAAS,EAAM,GAAI,GAAG,CAC1B,EAAI,SAAS,EAAM,GAAI,GAAG,CAC1B,EAAI,SAAS,EAAM,GAAI,GAAG,CAS9B,GAPI,GAAK,KACP,GAAK,EAAI,GAAK,KAAO,KAGnB,EAAI,IAAM,GAAK,KACjB,CAAC,EAAG,GAAK,CAAC,EAAG,EAAE,EAEb,EAAY,EAAG,EAAG,EAAE,CACtB,MAAO,CAAC,CAAE,KAAM,OAAQ,KAAM,EAAM,GAAI,KAAM,EAAG,MAAO,EAAG,IAAK,EAAG,CAAE,EAAM,GAAG,OAAO,CAMzF,GADA,EAAQ,GAAQ,KAAK,EAAE,CACnB,EAAO,CACT,IAAM,EAAI,SAAS,EAAM,GAAI,GAAG,CAC1B,EAAI,SAAS,EAAM,GAAI,GAAG,CAChC,GAAI,GAAK,GAAK,GAAK,IAAM,GAAK,GAAK,GAAK,EAAc,GACpD,MAAO,CAAC,CAAE,KAAM,UAAW,KAAM,EAAM,GAAI,KAAM,EAAG,MAAO,EAAG,IAAK,EAAG,CAAE,EAAM,GAAG,OAAO,CAM5F,GADA,EAAQ,GAAQ,KAAK,EAAE,CACnB,EAAO,CACT,IAAM,EAAI,SAAS,EAAM,GAAI,GAAG,CAC1B,EAAI,SAAS,EAAM,GAAI,GAAG,CAChC,GAAI,GAAK,MAAQ,GAAK,MAAQ,GAAK,GAAK,GAAK,GAC3C,MAAO,CAAC,CAAE,KAAM,UAAW,KAAM,EAAM,GAAI,KAAM,EAAG,MAAO,EAAG,IAAK,EAAG,CAAE,EAAM,GAAG,OAAO,CAM5F,GADA,EAAQ,GAAmB,KAAK,EAAE,CAC9B,EAAO,CACT,IAAM,EAAS,EAAM,GAErB,GAAI,CAAC,EAAa,IAAI,EAAO,CAAE,CAC7B,IAAM,EAAI,SAAS,EAAM,GAAI,GAAG,CAChC,MAAO,CAAC,CAAE,KAAM,aAAc,KAAM,EAAM,GAAI,MAAO,EAAG,SAAQ,CAAE,EAAM,GAAG,OAAO,EAKtF,IAAM,EAAiB,EAAE,MAAM,mCAAmC,CAClE,GAAI,EAAgB,CAClB,IAAM,EAAU,EAAe,GACzB,EAAO,EAAE,MAAM,EAAQ,OAAO,CAC9B,EAAY,EAAe,KAAK,EAAK,CAC3C,GAAI,EAAW,CACb,IAAM,EAAO,EAAU,GACjB,EAAW,EAAU,EACrB,EAAQ,WAAW,EAAQ,QAAQ,MAAO,GAAG,CAAC,QAAQ,IAAK,IAAI,CAAC,CACtE,GAAI,KAAQ,EAEV,MAAO,CAAC,CAAE,KAAM,SAAU,KAAM,EAAU,QAAO,SADrC,EAAiB,GACmC,CAAE,EAAS,OAAO,CAEpF,GAAM,CAAC,GAAY,EAAS,GAI5B,OAHI,IAAS,KAAO,IAAS,IACpB,CAAC,CAAE,KAAM,UAAW,KAAM,EAAU,QAAO,CAAE,EAAS,OAAO,CAE/D,CAAC,CAAE,KAAM,cAAe,KAAM,EAAU,QAAO,KAAM,EAAU,CAAE,EAAS,OAAO,EAM5F,IAAM,EAAS,EAAE,MAAM,yCAAyC,CAChE,GAAI,GAAU,EAAO,GAAG,SAAS,IAAI,CAAE,CACrC,IAAM,EAAQ,WAAW,EAAO,GAAG,QAAQ,MAAO,GAAG,CAAC,QAAQ,IAAK,IAAI,CAAC,CACxE,MAAO,CAAC,CAAE,KAAM,SAAU,KAAM,EAAO,GAAI,QAAO,CAAE,EAAO,GAAG,OAAO,CAIvE,IAAM,EAAS,EAAE,MAAM,yCAAyC,CAChE,GAAI,IAAW,EAAO,GAAG,SAAS,IAAI,EAAI,EAAO,GAAG,SAAS,IAAI,EAAG,CAClE,IAAM,EAAQ,WAAW,EAAO,GAAG,QAAQ,KAAM,GAAG,CAAC,CACrD,MAAO,CAAC,CAAE,KAAM,SAAU,KAAM,EAAO,GAAI,QAAO,CAAE,EAAO,GAAG,OAAO,CAIvE,IAAM,EAAW,EAAE,MAAM,oBAAoB,CAC7C,GAAI,EAAU,CACZ,IAAM,EAAQ,SAAS,EAAS,GAAI,GAAG,CACvC,MAAO,CAAC,CAAE,KAAM,SAAU,KAAM,EAAS,GAAI,QAAO,CAAE,EAAS,GAAG,OAAO,CAI3E,MAAO,CAAC,CAAE,KAAM,UAAW,KAAM,EAAE,GAAI,CAAE,EAAE,CAM7C,SAAU,GAAc,EAA6B,CAEnD,GAAI,CAAC,EAAG,CACN,KAAM,CAAE,KAAM,UAAW,KAAM,KAAM,CACrC,OAIF,GAAI,cAAc,KAAK,EAAE,EAAI,EAAa,IAAI,EAAE,CAAE,CAEhD,GAAI,EAAgB,EAAE,CAAE,CACtB,KAAM,CAAE,KAAM,WAAY,KAAM,EAAG,CACnC,OAEF,KAAM,CAAE,KAAM,OAAQ,KAAM,EAAG,CAC/B,OAIF,GAAI,EAAE,WAAW,IAAI,EAAI,EAAE,QAAU,GAAI,CACvC,IAAM,EAAW,EAAc,KAAK,EAAE,CACtC,GAAI,EAAU,CACZ,IAAM,EAAK,EAAS,GACd,EAAS,EAAS,GAAK,EAAS,GAGtC,GAFA,KAAM,CAAE,KAAM,QAAS,KAAM,EAAS,GAAI,KAAI,SAAQ,CACtD,EAAI,EAAE,MAAM,EAAS,GAAG,OAAO,CAC3B,CAAC,EAAG,QAKZ,GAAI,EAAE,OAAS,GAAK,EAAY,IAAI,EAAE,GAAG,EAAI,EAAO,IAAI,EAAE,GAAG,CAAE,CAC7D,GAAM,CAAC,EAAO,GAAS,EAAY,EAAE,CAGrC,GAFA,MAAM,EACN,EAAI,EAAE,MAAM,EAAM,CACd,CAAC,EAAG,OAIV,GAAI,EAAE,OAAS,GAAKC,KAAkB,SAAS,EAAE,GAAG,EAAI,SAAS,KAAK,EAAE,GAAG,CAAE,CAC3E,IAAI,EAAI,EACR,KAAO,EAAI,EAAE,QAAU,SAAS,KAAK,EAAE,GAAG,EAAE,IAC5C,IAAM,EAAO,EAAE,MAAM,EAAG,EAAE,EAExB,EAAK,MAAM,EAAE,CAAC,aAAa,GAAK,EAAK,MAAM,EAAE,EAC5C,EAAI,GAAK,EAAK,MAAM,EAAE,CAAC,aAAa,GAAK,EAAK,MAAM,EAAE,IAEvD,KAAM,CAAE,KAAM,OAAQ,KAAM,EAAM,CAClC,EAAI,EAAE,MAAM,EAAE,EAKlB,GAAI,EAAE,QAAU,EAAG,CACjB,GAAI,EAAc,SAAS,EAAE,GAAG,EAAI,EAAc,SAAS,EAAE,EAAE,OAAS,GAAG,CAAE,CAC3E,IAAM,EAAQ,EAAE,MAAM,EAAG,GAAG,CAC5B,GAAI,cAAc,KAAK,EAAM,CAAE,CAC7B,MAAM,EAAM,EAAE,GAAI,IAAkB,CACpC,KAAM,CAAE,KAAM,OAAQ,KAAM,EAAO,CACnC,MAAM,EAAM,EAAE,EAAE,OAAS,GAAI,IAAmB,CAChD,QAGJ,GAAI,EAAc,SAAS,EAAE,GAAG,EAAI,EAAc,SAAS,EAAE,EAAE,OAAS,GAAG,CAAE,CAC3E,IAAM,EAAQ,EAAE,MAAM,EAAG,GAAG,CAC5B,GAAI,cAAc,KAAK,EAAM,CAAE,CAC7B,MAAM,EAAM,EAAE,GAAI,IAAkB,CACpC,KAAM,CAAE,KAAM,OAAQ,KAAM,EAAO,CACnC,MAAM,EAAM,EAAE,EAAE,OAAS,GAAI,IAAmB,CAChD,SAiBN,IAXI,EAAE,OAAS,IACT,EAAc,SAAS,EAAE,GAAG,EAC9B,MAAM,EAAM,EAAE,GAAI,IAAkB,CACpC,EAAI,EAAE,MAAM,EAAE,EACL,EAAc,SAAS,EAAE,GAAG,GACrC,MAAM,EAAM,EAAE,GAAI,IAAkB,CACpC,EAAI,EAAE,MAAM,EAAE,GAKX,GAAG,CAER,KAAO,GAAK,EAAgB,IAAI,EAAE,GAAG,EAAE,CAErC,GAAI,EAAE,WAAW,IAAI,CAAE,CACrB,IAAM,EAAe,EAAe,KAAK,EAAE,CAC3C,GAAI,EAAc,CAChB,KAAM,CAAE,KAAM,UAAW,KAAM,EAAa,GAAI,CAChD,EAAI,EAAE,MAAM,EAAa,GAAG,OAAO,CACnC,SAEF,IAAM,EAAc,EAAc,KAAK,EAAE,CACzC,GAAI,EAAa,CACf,KAAM,CAAE,KAAM,SAAU,KAAM,EAAY,GAAI,CAC9C,EAAI,EAAE,MAAM,EAAY,GAAG,OAAO,CAClC,SAEF,IAAM,EAAc,EAAc,KAAK,EAAE,CACzC,GAAI,EAAa,CACf,KAAM,CAAE,KAAM,SAAU,KAAM,EAAY,GAAI,CAC9C,EAAI,EAAE,MAAM,EAAY,GAAG,OAAO,CAClC,UAIJ,GAAI,EAAE,WAAW,QAAQ,CAAE,CACzB,MAAM,EAAM,QAAS,MAAM,CAC3B,EAAI,EAAE,MAAM,EAAE,CACd,SAEF,GAAI,EAAE,WAAW,MAAM,CAAE,CACvB,MAAM,EAAM,MAAM,CAClB,EAAI,EAAE,MAAM,EAAE,CACd,SAEF,GAAI,EAAE,WAAW,MAAM,CAAE,CACvB,IAAI,EAAO,MACP,EAAO,EAAE,MAAM,EAAE,CACrB,KAAO,EAAK,WAAW,IAAI,EACzB,GAAQ,IACR,EAAO,EAAK,MAAM,EAAE,CAEtB,MAAM,EAAM,EAAM,IAAI,CACtB,EAAI,EACJ,SAEF,GAAI,EAAE,WAAW,IAAI,CAAE,CACrB,MAAM,EAAM,IAAI,CAChB,EAAI,EAAE,MAAM,EAAE,CACd,SAGF,GAAI,IAAM,KAAM,CACd,MAAM,EAAM,KAAM,IAAI,CACtB,EAAI,GACJ,SAEF,GAAI,EAAE,WAAW,KAAK,CAAE,CACtB,MAAM,EAAM,KAAM,IAAkB,CACpC,EAAI,EAAE,MAAM,EAAE,CACd,SAGF,GAAI,IAAM,MAAQ,IAAM,KAAM,CAE5B,MAAM,EAAM,EAAE,CACd,EAAI,GACJ,SAGF,GAAI,EAAQ,SAAS,EAAE,GAAG,CAAE,CAC1B,MAAM,EAAM,EAAE,GAAI,IAAO,CACzB,EAAI,EAAE,MAAM,EAAE,CACd,SAGF,GAAI,EAAc,SAAS,EAAE,GAAG,CAAE,CAChC,MAAM,EAAM,EAAE,GAAI,IAAmB,CACrC,EAAI,EAAE,MAAM,EAAE,CACd,SAEF,GAAI,EAAc,SAAS,EAAE,GAAG,CAAE,CAChC,MAAM,EAAM,EAAE,GAAI,IAAmB,CACrC,EAAI,EAAE,MAAM,EAAE,CACd,SAGF,GAAI,EAAE,WAAW,IAAI,EAAI,EAAE,OAAS,EAAG,CACrC,IAAM,EAAY,EAAQ,KAAK,EAAE,CACjC,GAAI,EAAW,CAET,SAAS,KAAK,EAAU,GAAG,CAC7B,KAAM,CACJ,KAAM,UACN,KAAM,EAAU,GAChB,MAAO,SAAS,EAAU,GAAG,MAAM,EAAE,CAAE,GAAG,CAC3C,CAED,KAAM,CAAE,KAAM,UAAW,KAAM,EAAU,GAAI,CAE/C,EAAI,EAAE,MAAM,EAAU,GAAG,OAAO,CAChC,UAIJ,GAAI,EAAE,WAAW,IAAI,EAAI,EAAE,OAAS,EAAG,CACrC,IAAM,EAAY,EAAS,KAAK,EAAE,CAClC,GAAI,EAAW,CACb,KAAM,CAAE,KAAM,WAAY,KAAM,EAAU,GAAI,SAAU,EAAU,GAAG,MAAM,EAAE,CAAE,CAC/E,EAAI,EAAE,MAAM,EAAU,GAAG,OAAO,CAChC,UAIJ,GAAI,EAAE,WAAW,IAAI,EAAI,EAAE,OAAS,GAAK,EAAO,IAAI,EAAE,GAAG,CAAE,CACzD,IAAM,EAAW,EAAc,KAAK,EAAE,CACtC,GAAI,EAAU,CACZ,IAAM,EAAK,EAAS,GACd,EAAS,EAAS,GAAK,EAAS,GACtC,KAAM,CAAE,KAAM,QAAS,KAAM,EAAS,GAAI,KAAI,SAAQ,CACtD,EAAI,EAAE,MAAM,EAAS,GAAG,OAAO,CAC/B,UAIJ,MAAM,EAAM,EAAE,GAAG,CACjB,EAAI,EAAE,MAAM,EAAE,CAGhB,GAAI,CAAC,EAAG,MAGR,GAAI,EAAE,SAAS,IAAI,CAAE,CACnB,IAAM,EAAa,EAAM,KAAK,EAAE,CAChC,GAAI,EAAY,CACd,KAAM,CAAE,KAAM,QAAS,KAAM,EAAW,GAAI,CAC5C,EAAI,EAAE,MAAM,EAAW,GAAG,OAAO,CACjC,UAKJ,GAAI,EAAW,KAAK,EAAE,CAAE,CAEtB,IAAI,EAAM,EACN,EAAW,GACf,KAAO,GAAO,EAAsB,IAAI,EAAI,EAAI,OAAS,GAAG,EAC1D,EAAW,EAAI,EAAI,OAAS,GAAK,EACjC,EAAM,EAAI,MAAM,EAAG,GAAG,CAExB,KAAM,CAAE,KAAM,MAAO,KAAM,EAAK,CAChC,EAAI,EACJ,SAIF,GAAI,EAAE,QAAU,GAAK,eAAe,KAAK,EAAE,EAAI,EAAE,SAAS,IAAI,CAAE,CAC9D,IAAM,EAAc,EAAO,KAAK,EAAE,CAClC,GAAI,EAAa,CACf,IAAI,EAAS,EAAY,GACrB,EAAW,EAAE,MAAM,EAAO,OAAO,CAErC,KAAO,GAAU,EAAgB,IAAI,EAAO,EAAO,OAAS,GAAG,EAC7D,EAAW,EAAO,EAAO,OAAS,GAAK,EACvC,EAAS,EAAO,MAAM,EAAG,GAAG,CAE9B,GAAI,EAAO,SAAS,IAAI,CAAE,CACxB,KAAM,CAAE,KAAM,SAAU,KAAM,EAAQ,CACtC,EAAI,EACJ,WAMN,GAAI,EAAO,IAAI,EAAE,GAAG,EAAK,EAAY,IAAI,EAAE,GAAG,EAAI,EAAE,OAAS,GAAK,EAAO,IAAI,EAAE,GAAG,CAAG,CACnF,GAAM,CAAC,EAAO,GAAS,EAAY,EAAE,CAKrC,GAJA,MAAM,EACN,EAAI,EAAE,MAAM,EAAM,CAGd,EAAG,CACL,IAAM,EAAY,EAAe,KAAK,EAAE,CACpC,IACF,KAAM,CAAE,KAAM,OAAQ,KAAM,EAAU,GAAI,CAC1C,EAAI,EAAE,MAAM,EAAU,GAAG,OAAO,EAGpC,SAIF,GAAI,UAAU,KAAK,EAAE,CAAE,CACrB,IAAI,EAAI,EACF,EAAe,IAAI,IAAI,CAAC,IAAK,IAAK,IAAK,IAAK,IAAK,IAAQ,IAAS,CAAC,CACnE,EAAe,IAAI,IAAI,CAAC,IAAK,IAAK,IAAI,CAAC,CAE7C,KAAO,EAAI,EAAE,QACX,GAAI,SAAS,KAAK,EAAE,GAAG,CACrB,YACS,EAAO,IAAI,EAAE,GAAG,CAEzB,YACS,EAAa,IAAI,EAAE,GAAG,EAAI,EAAI,EAAI,EAAE,QAAU,SAAS,KAAK,EAAE,EAAI,GAAG,CAC9E,SAEA,MAIA,EAAI,EAAE,QAAU,EAAa,IAAI,EAAE,GAAG,EACxC,IAEF,IAAM,EAAgB,EAAE,MAAM,EAAG,EAAE,CAGnC,GAAI,EAAgB,EAAc,CAAE,CAClC,KAAM,CAAE,KAAM,WAAY,KAAM,EAAe,CAC/C,EAAI,EAAE,MAAM,EAAE,CACd,SAGF,KAAM,CAAE,KAAM,OAAQ,KAAM,EAAe,CAC3C,EAAI,EAAE,MAAM,EAAE,CACd,SAIF,KAAM,CAAE,KAAM,UAAW,KAAM,EAAE,GAAI,CACrC,EAAI,EAAE,MAAM,EAAE,EAelB,SAAU,GACR,EACA,EACuB,CACvB,IAAM,EAAa,EAAyB,EAAiB,EAAK,CAAG,EAGjE,EAAS,EAGP,EAAa,yBACf,EACA,EAAqB,GAEzB,MAAQ,EAAQ,EAAW,KAAK,EAAW,IAAM,MAC3C,EAAM,IAER,AAGE,KADA,KAAM,CAAE,KAAM,GAAI,MAAO,EAAQ,IAAK,EAAQ,CACzB,IAEvB,KAAM,CAAE,KAAM,EAAM,GAAI,MAAO,EAAM,MAAO,IAAK,EAAM,MAAQ,EAAM,GAAG,OAAQ,EACvE,EAAM,KAEf,EAAqB,IAGvB,EAAS,EAAM,MAAQ,EAAM,GAAG,OAOpC,SAAgB,GAAI,EAAc,EAAyB,GAAM,EAAiB,GAAgB,CAChG,IAAM,EAAkB,EAAE,CAE1B,IAAK,IAAM,KAAc,GAAoB,EAAM,EAAuB,CAAE,CAE1E,IAAI,EAAW,EAEf,IAAK,IAAM,KAAS,GAAc,EAAW,KAAK,CAAE,CAClD,GAAI,GAAkB,EAAM,OAAS,KAAM,CAEzC,IAAM,EAAM,EAAW,KAAK,QAAQ,EAAM,KAAM,EAAS,CACrD,IAAQ,KACV,EAAM,KAAO,CACX,MAAO,EAAW,MAAQ,EAC1B,IAAK,EAAW,MAAQ,EAAM,EAAM,KAAK,OAC1C,CACD,EAAW,EAAM,EAAM,KAAK,aAErB,GAAkB,EAAM,OAAS,OAE1C,EAAM,KAAO,CAAE,MAAO,EAAW,MAAO,IAAK,EAAW,MAAO,EAEjE,EAAO,KAAK,EAAM,EAItB,OAAO,ECpsBT,MAAa,GAAwC,CAEnD,GAAI,QACJ,MAAO,QACP,IAAK,MACL,OAAQ,MACR,GAAI,OACJ,MAAO,OACP,GAAI,SACJ,MAAO,SACP,KAAM,YACN,QAAS,YAGT,GAAI,aACJ,MAAO,aACP,IAAK,kBACL,OAAQ,kBACR,IAAK,sBACL,OAAQ,sBACR,GAAI,iBACJ,MAAO,iBACP,IAAK,eACL,OAAQ,eACR,IAAK,sBACL,OAAQ,sBAGR,IAAK,YACL,QAAS,YACT,MAAO,mBACP,WAAY,mBACZ,IAAK,iBACL,SAAU,iBACV,GAAI,SACJ,OAAQ,SACR,IAAK,kBACL,WAAY,kBACZ,IAAK,WACL,OAAQ,WACR,IAAK,WACL,OAAQ,WACR,IAAK,kBACL,SAAU,kBACV,IAAK,aACL,OAAQ,aACR,GAAI,QACJ,MAAO,QACP,IAAK,YACL,OAAQ,YACR,MAAO,YACP,SAAU,YACV,GAAI,QACJ,MAAO,QACP,GAAI,eACJ,MAAO,eACP,GAAI,cACJ,OAAQ,cAGR,GAAI,UACJ,MAAO,UACP,GAAI,QACJ,MAAO,QAGP,GAAI,sBACJ,OAAQ,sBACR,GAAI,oBACJ,OAAQ,oBACR,GAAI,mBACJ,OAAQ,mBAER,OAAQ,iBACR,IAAK,uBACL,QAAS,uBACT,IAAK,oCACL,IAAK,oCAGL,IAAK,YACL,OAAQ,YACR,IAAK,UACL,OAAQ,UACR,GAAI,WACJ,MAAO,WAGP,EAAG,SACH,KAAM,SACN,EAAG,QACH,KAAM,QACN,EAAG,SACH,KAAM,SACN,EAAG,SACH,KAAM,SACN,GAAI,aACJ,OAAQ,aACR,GAAI,aACJ,OAAQ,aACR,GAAI,YACJ,OAAQ,YACR,GAAI,YACJ,OAAQ,YAGR,IAAK,SACL,OAAQ,SACR,MAAO,UACP,SAAU,UACV,KAAM,UACN,QAAS,UACT,GAAI,aACJ,MAAO,aACP,IAAK,aACL,OAAQ,aACT,CAKY,GAAyB,IAAI,IAAI,CAC5C,OACA,UACA,QACA,MACA,UACA,QACA,MACD,CAAC,CC1HF,SAASC,EAAW,EAAc,EAAoC,CAIpE,OAHI,EAAM,MAAQ,EAAK,KACd,CAAE,MAAO,EAAM,KAAK,MAAO,IAAK,EAAK,KAAK,IAAK,CAEjD,EAAM,MAAQ,EAAK,KAM5B,SAAgB,GAAiB,EAA0B,CACzD,IAAM,EAAkB,EAAE,CACtB,EAAI,EAER,KAAO,EAAI,EAAO,QAAQ,CACxB,IAAM,EAAQ,EAAO,GACf,EAAO,EAAO,EAAI,GAGxB,GAAI,EAAM,OAAS,QAAU,GAAM,OAAS,eAAiB,EAAK,OAAS,IAAK,CAC9E,IAAM,EAAmB,EAAM,KAAO,IACtC,GAAI,KAAoB,IAAiB,KAAoB,EAAe,CAC1E,EAAO,KAAK,CAAE,KAAM,OAAQ,KAAM,EAAkB,KAAMA,EAAW,EAAO,EAAK,CAAE,CAAC,CACpF,GAAK,EACL,UAKJ,GAAI,EAAM,OAAS,eAAiB,EAAM,QAAQ,GAAoB,GAAM,OAAS,SAAU,CAC7F,IAAM,EAAM,EAAiB,EAAM,MACnC,EAAO,KAAK,CACV,KAAM,SACN,KAAM,EAAM,KAAO,EAAK,KACxB,MAAO,EAAK,MACZ,SAAU,EACV,KAAMA,EAAW,EAAO,EAAK,CAC9B,CAAC,CACF,GAAK,EACL,SAIF,GAAI,EAAM,OAAS,UAAY,GAAM,OAAS,OAAQ,CACpD,IAAM,EAAe,EAAK,KAC1B,GAAI,EAAgB,IAAI,EAAa,CAAE,CACrC,EAAO,KAAK,CACV,KAAM,SACN,KAAM,EAAM,KAAO,IAAM,EAAK,KAC9B,MAAO,EAAM,MACb,SAAU,EACV,KAAMA,EAAW,EAAO,EAAK,CAC9B,CAAC,CACF,GAAK,EACL,SAGF,GAAI,KAAgB,EAAe,CACjC,IAAM,EAAa,EAAc,GACjC,EAAO,KAAK,CACV,KAAM,SACN,KAAM,EAAM,KAAO,IAAM,EAAK,KAC9B,MAAO,EAAM,MAAQ,EACrB,SAAU,MACV,KAAMA,EAAW,EAAO,EAAK,CAC9B,CAAC,CACF,GAAK,EACL,UAKJ,GACE,EAAM,OAAS,UACf,GAAM,OAAS,QACf,CAAC,UAAW,WAAY,WAAY,gBAAgB,CAAC,SAAS,EAAK,KAAK,aAAa,CAAC,CACtF,CACA,EAAO,KAAK,CACV,KAAM,UACN,KAAM,EAAM,KAAO,IAAM,EAAK,KAC9B,MAAO,EAAM,MACb,KAAMA,EAAW,EAAO,EAAK,CAC9B,CAAC,CACF,GAAK,EACL,SAIF,IAAK,EAAM,OAAS,QAAU,EAAM,OAAS,YAAc,GAAM,OAAS,OAAQ,CAChF,EAAO,KAAK,CACV,KAAM,YACN,KAAM,EAAM,KAAO,IAAM,EAAK,KAC9B,KAAM,EAAM,KACZ,MAAO,EAAM,MACb,IAAK,EAAM,IACX,KAAM,EAAK,KACX,OAAQ,EAAK,OACb,OAAQ,EAAK,OACb,KAAMA,EAAW,EAAO,EAAK,CAC9B,CAAC,CACF,GAAK,EACL,SAIF,EAAO,KAAK,EAAM,CAClB,IAGF,OAAO,ECxGT,SAAS,GAAqB,EAA2B,CAEvD,GAAI,EAAU,OAAS,SAAW,EAAU,OAAS,UACnD,MAAO,GAIT,GAAI,EAAU,OAAS,QAAU,EAAU,KAAK,OAAS,EAAG,CAC1D,IAAM,EAAY,EAAU,KAAK,GACjC,GAAI,IAAc,EAAU,aAAa,EAAI,IAAc,EAAU,aAAa,CAShF,MADA,EANc,EAAU,KAAK,aAAa,GAE7B,GAET,EAAe,EAAU,KAAK,EAE9B,EAAgB,IAAI,EAAU,KAAK,EAK3C,MAAO,GAMT,SAAgB,GAAmB,EAA0B,CAC3D,GAAI,EAAO,SAAW,EAAG,MAAO,EAAE,CAElC,IAAM,EAAkB,EAAE,CACtB,EAAa,GACb,EAAI,EAEF,OAA8B,CAAE,KAAM,UAAW,KAAM,KAAM,EAC7D,OAA4B,CAAE,KAAM,QAAS,KAAM,KAAM,EAE/D,KAAO,EAAI,EAAO,QAAQ,CACxB,IAAM,EAAQ,EAAO,GACf,EAAO,EAAO,EAAI,GAGxB,GAAI,EAAM,OAAS,UAAW,CAC5B,AAEE,KADA,EAAO,KAAK,GAAa,CAAC,CACb,IAGf,IACA,SAUF,GANA,AAEE,KADA,EAAO,KAAK,GAAe,CAAC,CACf,IAIX,EAAM,OAAS,eAAiB,EAAgB,IAAI,EAAM,WAAW,CAAE,CAEzE,GAAI,EAAM,aAAe,KAAO,GAAQ,CAAC,GAAqB,EAAK,CAAE,CACnE,EAAO,KAAK,EAAM,CAClB,IACA,SAIF,IAAI,EAAe,EAAM,KACrB,EAAI,EAAI,EACZ,KAAO,EAAI,EAAO,QAAQ,CACxB,IAAM,EAAU,EAAO,GAEvB,GADI,EAAQ,OAAS,eACjB,CAAC,EAAmB,IAAI,EAAQ,WAAW,CAAE,MACjD,GAAgB,EAAQ,KACxB,IAaF,IATI,EAAI,EAAI,GACV,EAAO,KAAK,CAAE,GAAG,EAAO,KAAM,EAAc,CAAC,CAC7C,EAAI,IAEJ,EAAO,KAAK,EAAM,CAClB,KAIK,EAAI,EAAO,QAAQ,CACxB,IAAM,EAAM,EAAO,GAEnB,GADI,EAAI,OAAS,eACb,CAAC,EAAmB,IAAI,EAAI,WAAW,CAAE,MAC7C,EAAO,KAAK,EAAI,CAChB,IAIF,EAAO,KAAK,GAAa,CAAC,CAC1B,EAAa,GACb,SAIF,EAAO,KAAK,EAAM,CAClB,IAQF,OAJI,GACF,EAAO,KAAK,GAAa,CAAC,CAGrB,ECtHT,SAAS,EAAW,EAAc,EAAoC,CAIpE,OAHI,EAAM,MAAQ,EAAK,KACd,CAAE,MAAO,EAAM,KAAK,MAAO,IAAK,EAAK,KAAK,IAAK,CAEjD,EAAM,MAAQ,EAAK,KAe5B,MAAM,GAAoB,IAAI,IAAI,CAAC,IAAQ,IAAQ,CAAC,CAKpD,SAAS,GAAS,EAAc,EAAe,GAAsB,CAKnE,OAJI,EAAM,OAAS,QAEf,CAAC,GAAgB,EAAgB,IAAI,EAAM,KAAK,CAAS,KAEtD,EADO,EAAM,KAAK,aAAa,GACd,KAM1B,SAAS,EAAkB,EAAuB,CAChD,OAAO,EAAM,OAAS,eAAiB,GAAkB,IAAI,EAAM,KAAK,CAY1E,SAAS,GAAiB,EAAiB,EAA4C,CACrF,IAAM,EAAoB,EAAE,CACxB,EAAI,EAGR,KAAO,EAAI,EAAO,QAAQ,CACxB,IAAM,EAAO,EAAO,GACd,EAAS,EAAO,EAAI,GAG1B,GAAI,GAAM,OAAS,QAAU,CAAC,GAAU,CAAC,EAAkB,EAAO,CAChE,MAGF,EAAS,KAAK,EAAK,CACnB,EAAS,KAAK,EAAO,CACrB,GAAK,EAGL,IAAM,EAAa,EAAO,GACtB,GAAY,OAAS,eAAiB,EAAW,OAAS,MAC5D,EAAS,KAAK,EAAW,CACzB,KAKJ,GAAI,EAAS,SAAW,EACtB,OAAO,KAIT,IAAM,EAAc,EAAO,GAC3B,GACE,CAAC,GACD,EAAY,OAAS,QACpB,EAAY,KAAK,aAAa,GAAK,MAAQ,EAAY,KAAK,aAAa,GAAK,MAE/E,OAAO,KAIT,IAAM,EAAS,EAAO,EAAI,GAC1B,GAAI,CAAC,GAAU,EAAO,OAAS,OAC7B,OAAO,KAOT,IAAI,EADU,CAAC,GAAG,EAAU,EAAa,EAAO,CAC/B,IAAK,GAAM,EAAE,KAAK,CAAC,KAAK,IAAI,CAC7C,EAAO,EAAK,QAAQ,MAAO,IAAI,CAAC,QAAQ,MAAO,IAAI,CAGnD,IAAM,EAAO,EAAW,EAAO,GAAa,EAAO,CACnD,MAAO,CAAC,CAAE,KAAM,OAAQ,OAAM,OAAM,CAAE,EAAI,EAAE,CAM9C,SAAgB,GAAe,EAA0B,CACvD,IAAM,EAAkB,EAAE,CACtB,EAAI,EAER,KAAO,EAAI,EAAO,QAAQ,CACxB,IAAM,EAAQ,EAAO,GACf,EAAO,EAAO,EAAI,GAGlB,EAAW,GAAiB,EAAQ,EAAE,CAC5C,GAAI,EAAU,CACZ,EAAO,KAAK,EAAS,GAAG,CACxB,EAAI,EAAS,GACb,SAIF,GAAI,EAAM,OAAS,QAAU,GAAM,OAAS,eAAiB,EAAK,OAAS,IAAK,CAC9E,IAAM,EAAO,EAAM,KAAK,QAAQ,MAAO,GAAG,CAC1C,GAAI,GAAuB,IAAI,EAAK,CAAE,CAEpC,EAAO,KAAK,CAAE,KAAM,OAAQ,KAAM,EAAM,KAAO,IAAK,KAAM,EAAW,EAAO,EAAK,CAAE,CAAC,CACpF,GAAK,EACL,UAKJ,IAAK,EAAM,OAAS,QAAU,EAAM,OAAS,WAAa,GAAM,OAAS,OAAQ,CAC/E,IAAM,GAAM,EAAM,KAAkB,EAAM,OACtC,EAAwB,KAM5B,GALI,EAAI,IAAI,EAAK,KAAK,CACpB,EAAS,CAAC,EACD,EAAG,IAAI,EAAK,KAAK,GAC1B,EAAS,GAEP,IAAW,KAAM,CACnB,IAAI,EAAO,EAAM,KAAO,IAAM,EAAK,KAC/B,EAAmB,EACvB,GAAK,EAED,EAAO,IAAI,OAAS,eAAiB,EAAO,GAAG,OAAS,MAC1D,GAAQ,IACR,EAAY,EAAO,GACnB,KAEF,EAAO,KAAK,CAAE,KAAM,OAAQ,OAAM,MAAO,EAAQ,KAAM,EAAW,EAAO,EAAU,CAAE,CAAC,CACtF,UAKJ,IAAK,EAAM,OAAS,WAAa,EAAM,OAAS,WAAa,GAAM,OAAS,OAAQ,CAClF,IAAM,EAAQ,GAAS,EAAM,GAAK,CAClC,GAAI,IAAU,KAAM,CAClB,IAAM,GAAM,EAAM,KAAqB,EAAM,OAC7C,EAAO,KAAK,CACV,KAAM,UACN,KAAM,EAAM,KAAO,IAAM,EAAK,KAC9B,KAAM,EACN,QACA,MACA,KAAM,EAAW,EAAO,EAAK,CAC9B,CAAC,CACF,GAAK,EACL,UAKJ,IACG,EAAM,OAAS,QAAU,EAAM,OAAS,YACzC,EAAM,OAAS,GACf,GAAM,OAAS,SACf,CACA,IAAM,EAAO,EAAK,MAClB,GAAI,GAAQ,MAAQ,GAAQ,KAAM,CAChC,EAAO,KAAK,CACV,KAAM,UACN,KAAM,EAAM,KAAO,IAAM,EAAK,KAC9B,OACA,MAAO,EAAM,MACb,IAAK,EAAM,IACX,KAAM,EAAW,EAAO,EAAK,CAC9B,CAAC,CACF,GAAK,EACL,UAKJ,IACG,EAAM,OAAS,QAAU,EAAM,OAAS,YACzC,EAAM,OAAS,GACf,GAAM,OAAS,OACf,CACA,EAAO,KAAK,CACV,KAAM,UACN,KAAM,EAAM,KAAO,IAAM,EAAK,KAC9B,KAAM,EAAK,MACX,MAAO,EAAM,MACb,IAAK,EAAM,IACX,KAAM,EAAW,EAAO,EAAK,CAC9B,CAAC,CACF,GAAK,EACL,SAIF,GACE,EAAM,OAAS,QACf,EAAc,IAAI,EAAM,KAAK,aAAa,CAAC,EAC3C,GAAM,OAAS,OACf,CACA,EAAO,KAAK,CACV,GAAG,EACH,KAAM,EAAM,KAAO,IAAM,EAAK,KAC9B,KAAM,EAAW,EAAO,EAAK,CAC9B,CAAC,CACF,GAAK,EACL,SAIF,GACE,EAAM,OAAS,QACf,EAAc,IAAI,EAAM,KAAK,aAAa,CAAC,EAC3C,GAAM,OAAS,OACf,CACA,IAAM,EAAY,GAAc,EAAK,KAAK,aAAa,EACvD,GAAI,EAAW,CACb,EAAO,KAAK,CACV,KAAM,OACN,KAAM,EAAM,KAAO,IAAM,EAAK,KAC9B,KAAM,EAAU,GAChB,OAAQ,EAAU,GAClB,OAAQ,EAAU,GAClB,KAAM,EAAW,EAAO,EAAK,CAC9B,CAAC,CACF,GAAK,EACL,UAKJ,EAAO,KAAK,EAAM,CAClB,IAGF,OAAO,ECpPT,SAAgB,EAAS,EAAc,EAA2B,EAAE,CAAW,CAC7E,GAAM,CACJ,yBAAyB,GACzB,yBAAyB,GACzB,iBAAiB,IACf,EAIA,EAAS,GAAI,EAAM,EAAwB,EAAe,CAgB9D,MAbA,GAAS,GAAiB,EAAO,CAGjC,EAAS,GAAe,EAAO,CAG/B,AAIE,EAJE,EACO,GAAmB,EAAO,CAG1B,EAAO,OAAQ,GAAM,EAAE,OAAS,UAAU,CAG9C,ECjCT,SAAgB,GAAmB,EAAwB,CACzD,IAAM,EAAS,EAAS,EAAM,CAAE,uBAAwB,GAAM,CAAC,CACzD,EAAsB,EAAE,CAC1B,EAA4B,EAAE,CAElC,IAAK,IAAM,KAAS,EACd,EAAM,OAAS,UACjB,EAAkB,EAAE,CACX,EAAM,OAAS,SACpB,EAAgB,OAAS,GAC3B,EAAU,KAAK,EAAW,EAAgB,CAAC,CAE7C,EAAkB,EAAE,EACX,EAAM,OAAS,MACxB,EAAgB,KAAK,GAAa,EAAM,CAAC,CAS7C,OAJI,EAAgB,OAAS,GAC3B,EAAU,KAAK,EAAW,EAAgB,CAAC,CAGtC,EAMT,SAAS,GAAa,EAAsB,CAI1C,OAHI,EAAM,OAAS,cACV,EAAM,WAER,EAAM,MAAQ,GAMvB,SAAS,EAAW,EAAyB,CAC3C,GAAI,EAAM,SAAW,EAAG,MAAO,GAE/B,IAAI,EAAS,EAAM,GAEnB,IAAK,IAAI,EAAI,EAAG,EAAI,EAAM,OAAQ,IAAK,CACrC,IAAM,EAAO,EAAM,EAAI,GACjB,EAAO,EAAM,GAGA,GAAe,EAAM,EAAK,CAE3C,GAAU,IAAM,EAEhB,GAAU,EAId,OAAO,EAMT,SAAS,GAAe,EAAc,EAAuB,CAC3D,GAAI,CAAC,GAAQ,CAAC,EAAM,MAAO,GAE3B,IAAM,EAAW,EAAK,EAAK,OAAS,GAC9B,EAAY,EAAK,GA6BvB,MAFA,EAvBqB,IAAI,IAAI,CAAC,IAAK,IAAK,IAAU,IAAU,IAAU,IAAI,CAAC,CAC1D,IAAI,EAAS,EAIT,IAAI,IAAI,CAC3B,IACA,IACA,IACA,IACA,IACA,IACA,IACA,IACA,IACA,IACA,IACA,IACA,IACD,CAAC,CACe,IAAI,EAAU,EAG3B,IAAa,KAAO,IAAc"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tokenize-is",
3
- "version": "0.1.0",
3
+ "version": "0.2.0",
4
4
  "description": "TypeScript tokenizer for Icelandic text",
5
5
  "keywords": [
6
6
  "icelandic",
@@ -13,7 +13,7 @@
13
13
  "author": "Jökull Sólberg",
14
14
  "repository": {
15
15
  "type": "git",
16
- "url": "https://github.com/jokull/tokenize-ts"
16
+ "url": "git+https://github.com/jokull/tokenize-ts.git"
17
17
  },
18
18
  "files": [
19
19
  "dist"
@@ -27,6 +27,16 @@
27
27
  },
28
28
  "./package.json": "./package.json"
29
29
  },
30
+ "devDependencies": {
31
+ "@changesets/cli": "^2.29.8",
32
+ "@types/node": "^25.0.10",
33
+ "lefthook": "^2.0.15",
34
+ "oxlint": "^1.41.0",
35
+ "oxlint-tsgolint": "^0.11.1",
36
+ "tsdown": "^0.20.1",
37
+ "typescript": "^5.9.3",
38
+ "vitest": "^4.0.18"
39
+ },
30
40
  "scripts": {
31
41
  "build": "tsdown",
32
42
  "test": "vitest run",
@@ -39,18 +49,6 @@
39
49
  "check": "pnpm lint && pnpm format:check && pnpm typecheck",
40
50
  "changeset": "changeset",
41
51
  "version": "changeset version",
42
- "release": "pnpm build && changeset publish",
43
- "prepare": "lefthook install"
44
- },
45
- "devDependencies": {
46
- "@changesets/cli": "^2.29.8",
47
- "@types/node": "^25.0.10",
48
- "lefthook": "^2.0.15",
49
- "oxlint": "^1.41.0",
50
- "oxlint-tsgolint": "^0.11.1",
51
- "tsdown": "^0.20.1",
52
- "typescript": "^5.9.3",
53
- "vitest": "^4.0.18"
54
- },
55
- "packageManager": "pnpm@10.10.0"
56
- }
52
+ "release": "pnpm build && changeset publish"
53
+ }
54
+ }