@formatjs/intl-segmenter 12.0.7 → 12.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/segmenter.js CHANGED
@@ -1,346 +1,350 @@
1
- import { __assign, __spreadArray } from "tslib";
2
- import { CanonicalizeLocaleList, GetOption, GetOptionsObject, SupportedLocales, getInternalSlot, getMultiInternalSlots, setInternalSlot, } from '@formatjs/ecma402-abstract';
3
- import { ResolveLocale } from '@formatjs/intl-localematcher';
4
- import { SegmentationRules } from './cldr-segmentation-rules.generated.js';
5
- import { isSurrogate, replaceVariables } from './segmentation-utils.js';
1
+ import { CanonicalizeLocaleList, GetOption, GetOptionsObject, SupportedLocales, getInternalSlot, getMultiInternalSlots, setInternalSlot } from "@formatjs/ecma402-abstract";
2
+ import { ResolveLocale } from "@formatjs/intl-localematcher";
3
+ import { SegmentationRules } from "./cldr-segmentation-rules.generated.js";
4
+ import { isSurrogate, replaceVariables } from "./segmentation-utils.js";
6
5
  // Cached regex patterns for word character detection
7
6
  // Note: Unicode property escape regex is created at runtime in try-catch
8
7
  // to avoid compile-time errors when targeting ES5
9
- var WORD_CHARACTERS_BASIC_REGEX = /\w/;
8
+ const WORD_CHARACTERS_BASIC_REGEX = /\w/;
10
9
  // Lazy-initialized Unicode word character regex (null if not supported)
11
- var WORD_CHARACTERS_UNICODE_REGEX = undefined;
10
+ let WORD_CHARACTERS_UNICODE_REGEX = undefined;
12
11
  /**
13
- * Adds $ to before rules and ^ to after rules for strictness
14
- * Replaces variables
15
- * Initializes the RegExp
16
- *
17
- * @param rule raw rule string from cldr-segmentation-rules.generated
18
- * @param variables
19
- * @param after appends ^ if true and $ if false
20
- * @returns
21
- */
22
- var generateRuleRegex = function (rule, variables, after) {
23
- return new RegExp("".concat(after ? '^' : '').concat(replaceVariables(variables, rule)).concat(after ? '' : '$'));
12
+ * Adds $ to before rules and ^ to after rules for strictness
13
+ * Replaces variables
14
+ * Initializes the RegExp
15
+ *
16
+ * @param rule raw rule string from cldr-segmentation-rules.generated
17
+ * @param variables
18
+ * @param after appends ^ if true and $ if false
19
+ * @returns
20
+ */
21
+ const generateRuleRegex = (rule, variables, after) => {
22
+ return new RegExp(`${after ? "^" : ""}${replaceVariables(variables, rule)}${after ? "" : "$"}`);
24
23
  };
25
- var prepareLocaleSegmentationRules = function (segmentationTypeValue) {
26
- var preparedRules = {};
27
- for (var _i = 0, _a = Object.keys(segmentationTypeValue.segmentRules); _i < _a.length; _i++) {
28
- var ruleNr = _a[_i];
29
- var ruleValue = segmentationTypeValue.segmentRules[ruleNr];
30
- var preparedRule = {
31
- breaks: ruleValue.breaks,
32
- };
33
- if ('before' in ruleValue && ruleValue.before) {
34
- preparedRule.before = generateRuleRegex(ruleValue.before, segmentationTypeValue.variables, false);
35
- }
36
- if ('after' in ruleValue && ruleValue.after) {
37
- preparedRule.after = generateRuleRegex(ruleValue.after, segmentationTypeValue.variables, true);
38
- }
39
- preparedRules[ruleNr] = preparedRule;
40
- }
41
- return preparedRules;
24
+ const prepareLocaleSegmentationRules = (segmentationTypeValue) => {
25
+ const preparedRules = {};
26
+ for (const ruleNr of Object.keys(segmentationTypeValue.segmentRules)) {
27
+ const ruleValue = segmentationTypeValue.segmentRules[ruleNr];
28
+ const preparedRule = { breaks: ruleValue.breaks };
29
+ if ("before" in ruleValue && ruleValue.before) {
30
+ preparedRule.before = generateRuleRegex(ruleValue.before, segmentationTypeValue.variables, false);
31
+ }
32
+ if ("after" in ruleValue && ruleValue.after) {
33
+ preparedRule.after = generateRuleRegex(ruleValue.after, segmentationTypeValue.variables, true);
34
+ }
35
+ preparedRules[ruleNr] = preparedRule;
36
+ }
37
+ return preparedRules;
42
38
  };
43
- var breaksAtResult = function (breaks, matchingRule) { return ({
44
- breaks: breaks,
45
- matchingRule: matchingRule,
46
- }); };
47
- var Segmenter = /** @class */ (function () {
48
- function Segmenter(locales, options) {
49
- var _newTarget = this.constructor;
50
- if (_newTarget === undefined) {
51
- throw TypeError("Constructor Intl.Segmenter requires 'new'");
52
- }
53
- var requestedLocales = CanonicalizeLocaleList(locales);
54
- options = GetOptionsObject(options);
55
- var opt = Object.create(null);
56
- var matcher = GetOption(options, 'localeMatcher', 'string', ['lookup', 'best fit'], 'best fit');
57
- opt.localeMatcher = matcher;
58
- var granularity = GetOption(options, 'granularity', 'string', ['word', 'sentence', 'grapheme'], 'grapheme');
59
- setSlot(this, 'granularity', granularity);
60
- //TODO: figure out correct availible locales
61
- var r = ResolveLocale(Segmenter.availableLocales, //availible locales
62
- requestedLocales, opt, [], // there is no relevantExtensionKeys
63
- {}, function () { return ''; } //use only root rules
64
- );
65
- setSlot(this, 'locale', r.locale);
66
- //root rules based on granularity
67
- this.mergedSegmentationTypeValue = SegmentationRules.root[granularity];
68
- //merge root rules with locale ones if locale is specified
69
- if (r.locale.length) {
70
- var localeOverrides = SegmentationRules[r.locale];
71
- if (granularity in localeOverrides) {
72
- var localeSegmentationTypeValue = localeOverrides[granularity];
73
- this.mergedSegmentationTypeValue.variables = __assign(__assign({}, this.mergedSegmentationTypeValue.variables), localeSegmentationTypeValue.variables);
74
- this.mergedSegmentationTypeValue.segmentRules = __assign(__assign({}, this.mergedSegmentationTypeValue.segmentRules), localeSegmentationTypeValue.segmentRules);
75
- this.mergedSegmentationTypeValue.suppressions = __spreadArray(__spreadArray([], this.mergedSegmentationTypeValue.suppressions, true), localeSegmentationTypeValue.suppressions, true);
76
- }
77
- }
78
- //prepare rules
79
- this.rules = prepareLocaleSegmentationRules(this.mergedSegmentationTypeValue);
80
- //order rule keys
81
- this.ruleSortedKeys = Object.keys(this.rules).sort(function (a, b) { return Number(a) - Number(b); });
82
- }
83
- Segmenter.prototype.breaksAt = function (position, input) {
84
- var ruleSortedKeys = this.ruleSortedKeys;
85
- var rules = this.rules;
86
- var mergedSegmentationTypeValue = this.mergedSegmentationTypeValue;
87
- //artificial rule 0.2
88
- if (position === 0) {
89
- return breaksAtResult(true, '0.2');
90
- }
91
- if (position === input.length) {
92
- //rule 0.3
93
- return breaksAtResult(true, '0.3');
94
- }
95
- //artificial rule 0.1: js specific, due to es5 regex not being unicode aware
96
- //number 0.1 chosen to mimic java implementation, but needs to execute after 0.2 and 0.3 to be inside the string bounds
97
- if (isSurrogate(input, position)) {
98
- return breaksAtResult(false, '0.1');
99
- }
100
- var stringBeforeBreak = input.substring(0, position);
101
- var stringAfterBreak = input.substring(position);
102
- //artificial rule 0.4: handle suppressions
103
- if ('suppressions' in mergedSegmentationTypeValue) {
104
- for (var _i = 0, _a = mergedSegmentationTypeValue.suppressions; _i < _a.length; _i++) {
105
- var suppressions = _a[_i];
106
- if (stringBeforeBreak.trim().endsWith(suppressions)) {
107
- return breaksAtResult(false, '0.4');
108
- }
109
- }
110
- }
111
- // loop through rules and find a match
112
- for (var _b = 0, ruleSortedKeys_1 = ruleSortedKeys; _b < ruleSortedKeys_1.length; _b++) {
113
- var ruleKey = ruleSortedKeys_1[_b];
114
- var _c = rules[ruleKey], before = _c.before, after = _c.after, breaks = _c.breaks;
115
- // for debugging
116
- // if (ruleKey === '16' && position === 4) {
117
- // console.log({before, after, stringBeforeBreak, stringAfterBreak})
118
- // }
119
- if (before) {
120
- if (!before.test(stringBeforeBreak)) {
121
- //didn't match the before part, therfore skipping
122
- continue;
123
- }
124
- }
125
- if (after) {
126
- if (!after.test(stringAfterBreak)) {
127
- //didn't match the after part, therfore skipping
128
- continue;
129
- }
130
- }
131
- return breaksAtResult(breaks, ruleKey);
132
- }
133
- //artificial rule 999: if no rule matched is Any ÷ Any so return true
134
- return breaksAtResult(true, '999');
135
- };
136
- Segmenter.prototype.segment = function (input) {
137
- checkReceiver(this, 'segment');
138
- return new SegmentIterator(this, input);
139
- };
140
- Segmenter.prototype.resolvedOptions = function () {
141
- checkReceiver(this, 'resolvedOptions');
142
- return __assign({}, getMultiInternalSlots(__INTERNAL_SLOT_MAP__, this, 'locale', 'granularity'));
143
- };
144
- Segmenter.supportedLocalesOf = function (locales, options) {
145
- return SupportedLocales(Segmenter.availableLocales, CanonicalizeLocaleList(locales), options);
146
- };
147
- Segmenter.availableLocales = new Set(Object.keys(SegmentationRules).filter(function (key) { return key !== 'root'; }));
148
- Segmenter.polyfilled = true;
149
- return Segmenter;
150
- }());
151
- export { Segmenter };
39
+ const breaksAtResult = (breaks, matchingRule) => ({
40
+ breaks,
41
+ matchingRule
42
+ });
43
+ export class Segmenter {
44
+ rules;
45
+ ruleSortedKeys;
46
+ mergedSegmentationTypeValue;
47
+ constructor(locales, options) {
48
+ if (new.target === undefined) {
49
+ throw TypeError(`Constructor Intl.Segmenter requires 'new'`);
50
+ }
51
+ const requestedLocales = CanonicalizeLocaleList(locales);
52
+ options = GetOptionsObject(options);
53
+ const opt = Object.create(null);
54
+ const matcher = GetOption(options, "localeMatcher", "string", ["lookup", "best fit"], "best fit");
55
+ opt.localeMatcher = matcher;
56
+ const granularity = GetOption(options, "granularity", "string", [
57
+ "word",
58
+ "sentence",
59
+ "grapheme"
60
+ ], "grapheme");
61
+ setSlot(this, "granularity", granularity);
62
+ //TODO: figure out correct availible locales
63
+ const r = ResolveLocale(Segmenter.availableLocales, requestedLocales, opt, [], {}, () => "");
64
+ setSlot(this, "locale", r.locale);
65
+ //root rules based on granularity
66
+ this.mergedSegmentationTypeValue = SegmentationRules.root[granularity];
67
+ //merge root rules with locale ones if locale is specified
68
+ if (r.locale.length) {
69
+ const localeOverrides = SegmentationRules[r.locale];
70
+ if (granularity in localeOverrides) {
71
+ const localeSegmentationTypeValue = localeOverrides[granularity];
72
+ this.mergedSegmentationTypeValue.variables = {
73
+ ...this.mergedSegmentationTypeValue.variables,
74
+ ...localeSegmentationTypeValue.variables
75
+ };
76
+ this.mergedSegmentationTypeValue.segmentRules = {
77
+ ...this.mergedSegmentationTypeValue.segmentRules,
78
+ ...localeSegmentationTypeValue.segmentRules
79
+ };
80
+ this.mergedSegmentationTypeValue.suppressions = [...this.mergedSegmentationTypeValue.suppressions, ...localeSegmentationTypeValue.suppressions];
81
+ }
82
+ }
83
+ //prepare rules
84
+ this.rules = prepareLocaleSegmentationRules(this.mergedSegmentationTypeValue);
85
+ //order rule keys
86
+ this.ruleSortedKeys = Object.keys(this.rules).sort((a, b) => Number(a) - Number(b));
87
+ }
88
+ breaksAt(position, input) {
89
+ const ruleSortedKeys = this.ruleSortedKeys;
90
+ const rules = this.rules;
91
+ const mergedSegmentationTypeValue = this.mergedSegmentationTypeValue;
92
+ //artificial rule 0.2
93
+ if (position === 0) {
94
+ return breaksAtResult(true, "0.2");
95
+ }
96
+ if (position === input.length) {
97
+ //rule 0.3
98
+ return breaksAtResult(true, "0.3");
99
+ }
100
+ //artificial rule 0.1: js specific, due to es5 regex not being unicode aware
101
+ //number 0.1 chosen to mimic java implementation, but needs to execute after 0.2 and 0.3 to be inside the string bounds
102
+ if (isSurrogate(input, position)) {
103
+ return breaksAtResult(false, "0.1");
104
+ }
105
+ const stringBeforeBreak = input.substring(0, position);
106
+ const stringAfterBreak = input.substring(position);
107
+ //artificial rule 0.4: handle suppressions
108
+ if ("suppressions" in mergedSegmentationTypeValue) {
109
+ for (const suppressions of mergedSegmentationTypeValue.suppressions) {
110
+ if (stringBeforeBreak.trim().endsWith(suppressions)) {
111
+ return breaksAtResult(false, "0.4");
112
+ }
113
+ }
114
+ }
115
+ // loop through rules and find a match
116
+ for (const ruleKey of ruleSortedKeys) {
117
+ const { before, after, breaks } = rules[ruleKey];
118
+ // for debugging
119
+ // if (ruleKey === '16' && position === 4) {
120
+ // console.log({before, after, stringBeforeBreak, stringAfterBreak})
121
+ // }
122
+ if (before) {
123
+ if (!before.test(stringBeforeBreak)) {
124
+ //didn't match the before part, therfore skipping
125
+ continue;
126
+ }
127
+ }
128
+ if (after) {
129
+ if (!after.test(stringAfterBreak)) {
130
+ //didn't match the after part, therfore skipping
131
+ continue;
132
+ }
133
+ }
134
+ return breaksAtResult(breaks, ruleKey);
135
+ }
136
+ //artificial rule 999: if no rule matched is Any ÷ Any so return true
137
+ return breaksAtResult(true, "999");
138
+ }
139
+ segment(input) {
140
+ checkReceiver(this, "segment");
141
+ return new SegmentIterator(this, input);
142
+ }
143
+ resolvedOptions() {
144
+ checkReceiver(this, "resolvedOptions");
145
+ return { ...getMultiInternalSlots(__INTERNAL_SLOT_MAP__, this, "locale", "granularity") };
146
+ }
147
+ static availableLocales = new Set(Object.keys(SegmentationRules).filter((key) => key !== "root"));
148
+ static supportedLocalesOf(locales, options) {
149
+ return SupportedLocales(Segmenter.availableLocales, CanonicalizeLocaleList(locales), options);
150
+ }
151
+ static polyfilled = true;
152
+ }
152
153
  /**
153
- * Determines if a segment is word-like according to Unicode Word Break rules.
154
- *
155
- * A segment is considered word-like if it contains alphabetic characters,
156
- * numbers, or ideographs. Segments containing only whitespace, punctuation,
157
- * or symbols are not word-like.
158
- *
159
- * Per Unicode Word Break (UAX #29) and native Intl.Segmenter implementations,
160
- * this matches segments that contain characters from word character classes:
161
- * ALetter, Hebrew_Letter, Numeric, Katakana, Hiragana, and Ideographic.
162
- *
163
- * @param segment - The text segment to check
164
- * @param matchingRule - The word break rule that created this segment
165
- * @returns true if the segment is word-like
166
- */
154
+ * Determines if a segment is word-like according to Unicode Word Break rules.
155
+ *
156
+ * A segment is considered word-like if it contains alphabetic characters,
157
+ * numbers, or ideographs. Segments containing only whitespace, punctuation,
158
+ * or symbols are not word-like.
159
+ *
160
+ * Per Unicode Word Break (UAX #29) and native Intl.Segmenter implementations,
161
+ * this matches segments that contain characters from word character classes:
162
+ * ALetter, Hebrew_Letter, Numeric, Katakana, Hiragana, and Ideographic.
163
+ *
164
+ * @param segment - The text segment to check
165
+ * @param matchingRule - The word break rule that created this segment
166
+ * @returns true if the segment is word-like
167
+ */
167
168
  function isSegmentWordLike(segment, matchingRule) {
168
- // Primary check: Does the segment contain word characters?
169
- // Word-like segments contain letters (including ideographs), numbers,
170
- // or connecting characters like apostrophes within words
171
- //
172
- // Regex matches:
173
- // - Letters: \p{L} (all Unicode letters)
174
- // - Numbers: \p{N} (all Unicode numbers)
175
- // - Marks: \p{M} (combining marks, typically part of letters)
176
- //
177
- // Note: Using Unicode property escapes which work in modern JS engines
178
- // and are necessary for proper internationalization
179
- // Lazy-initialize Unicode regex on first use
180
- if (WORD_CHARACTERS_UNICODE_REGEX === undefined) {
181
- try {
182
- // Create Unicode property escape regex at runtime to avoid compile-time TS1501 error
183
- WORD_CHARACTERS_UNICODE_REGEX = new RegExp('[\\p{L}\\p{N}\\p{M}]', 'u');
184
- }
185
- catch (_a) {
186
- // Environment doesn't support Unicode property escapes
187
- WORD_CHARACTERS_UNICODE_REGEX = null;
188
- }
189
- }
190
- var hasWordCharacters;
191
- if (WORD_CHARACTERS_UNICODE_REGEX) {
192
- // Check if segment contains word characters using Unicode property escapes
193
- // This matches the behavior of native Intl.Segmenter in Chrome/Firefox
194
- hasWordCharacters = WORD_CHARACTERS_UNICODE_REGEX.test(segment);
195
- }
196
- else {
197
- // Fallback for environments without Unicode property escapes
198
- // Match basic word characters: letters, numbers, underscores
199
- hasWordCharacters = WORD_CHARACTERS_BASIC_REGEX.test(segment);
200
- }
201
- // If segment contains word characters, it's word-like
202
- if (hasWordCharacters) {
203
- return true;
204
- }
205
- // If no word characters, check if it's definitely not word-like via rules
206
- // Non-word-like rules per Unicode Word Break specification (UAX #29):
207
- // https://unicode.org/reports/tr29/#Word_Boundaries
208
- //
209
- // WB3a (3.1): Break before newlines (sot ÷ (Newline | CR | LF))
210
- // WB3b (3.2): Break after newlines ((Newline | CR | LF) ÷ eot)
211
- // WB3d (3.4): Keep horizontal whitespace together (WSegSpace × WSegSpace)
212
- //
213
- // These rules specifically identify non-word segments like line breaks and whitespace
214
- var definitelyNotWordLikeRules = ['3.1', '3.2', '3.4'];
215
- if (definitelyNotWordLikeRules.includes(matchingRule)) {
216
- return false;
217
- }
218
- // For segments without word characters and not matching specific non-word rules,
219
- // return false (e.g., punctuation, symbols, whitespace via rule 999)
220
- return false;
169
+ // Primary check: Does the segment contain word characters?
170
+ // Word-like segments contain letters (including ideographs), numbers,
171
+ // or connecting characters like apostrophes within words
172
+ //
173
+ // Regex matches:
174
+ // - Letters: \p{L} (all Unicode letters)
175
+ // - Numbers: \p{N} (all Unicode numbers)
176
+ // - Marks: \p{M} (combining marks, typically part of letters)
177
+ //
178
+ // Note: Using Unicode property escapes which work in modern JS engines
179
+ // and are necessary for proper internationalization
180
+ // Lazy-initialize Unicode regex on first use
181
+ if (WORD_CHARACTERS_UNICODE_REGEX === undefined) {
182
+ try {
183
+ // Create Unicode property escape regex at runtime to avoid compile-time TS1501 error
184
+ WORD_CHARACTERS_UNICODE_REGEX = new RegExp("[\\p{L}\\p{N}\\p{M}]", "u");
185
+ } catch {
186
+ // Environment doesn't support Unicode property escapes
187
+ WORD_CHARACTERS_UNICODE_REGEX = null;
188
+ }
189
+ }
190
+ let hasWordCharacters;
191
+ if (WORD_CHARACTERS_UNICODE_REGEX) {
192
+ // Check if segment contains word characters using Unicode property escapes
193
+ // This matches the behavior of native Intl.Segmenter in Chrome/Firefox
194
+ hasWordCharacters = WORD_CHARACTERS_UNICODE_REGEX.test(segment);
195
+ } else {
196
+ // Fallback for environments without Unicode property escapes
197
+ // Match basic word characters: letters, numbers, underscores
198
+ hasWordCharacters = WORD_CHARACTERS_BASIC_REGEX.test(segment);
199
+ }
200
+ // If segment contains word characters, it's word-like
201
+ if (hasWordCharacters) {
202
+ return true;
203
+ }
204
+ // If no word characters, check if it's definitely not word-like via rules
205
+ // Non-word-like rules per Unicode Word Break specification (UAX #29):
206
+ // https://unicode.org/reports/tr29/#Word_Boundaries
207
+ //
208
+ // WB3a (3.1): Break before newlines (sot ÷ (Newline | CR | LF))
209
+ // WB3b (3.2): Break after newlines ((Newline | CR | LF) ÷ eot)
210
+ // WB3d (3.4): Keep horizontal whitespace together (WSegSpace × WSegSpace)
211
+ //
212
+ // These rules specifically identify non-word segments like line breaks and whitespace
213
+ const definitelyNotWordLikeRules = [
214
+ "3.1",
215
+ "3.2",
216
+ "3.4"
217
+ ];
218
+ if (definitelyNotWordLikeRules.includes(matchingRule)) {
219
+ return false;
220
+ }
221
+ // For segments without word characters and not matching specific non-word rules,
222
+ // return false (e.g., punctuation, symbols, whitespace via rule 999)
223
+ return false;
221
224
  }
222
- var createSegmentDataObject = function (segmenter, segment, index, input, matchingRule) {
223
- var returnValue = {
224
- segment: segment,
225
- index: index,
226
- input: input,
227
- };
228
- if (getSlot(segmenter, 'granularity') === 'word') {
229
- returnValue.isWordLike = isSegmentWordLike(segment, matchingRule);
230
- }
231
- return returnValue;
225
+ const createSegmentDataObject = (segmenter, segment, index, input, matchingRule) => {
226
+ const returnValue = {
227
+ segment,
228
+ index,
229
+ input
230
+ };
231
+ if (getSlot(segmenter, "granularity") === "word") {
232
+ returnValue.isWordLike = isSegmentWordLike(segment, matchingRule);
233
+ }
234
+ return returnValue;
232
235
  };
233
- var SegmentIterator = /** @class */ (function () {
234
- function SegmentIterator(segmenter, input) {
235
- this.segmenter = segmenter;
236
- this.lastSegmentIndex = 0;
237
- if (typeof input == 'symbol') {
238
- throw TypeError("Input must not be a symbol");
239
- }
240
- this.input = String(input);
241
- }
242
- SegmentIterator.prototype[Symbol.iterator] = function () {
243
- return new SegmentIterator(this.segmenter, this.input);
244
- };
245
- SegmentIterator.prototype.next = function () {
246
- //using only the relevant bit of the string
247
- var checkString = this.input.substring(this.lastSegmentIndex);
248
- //loop from the start of the checkString, until exactly length (breaksAt returns break at pos=== lenght)
249
- for (var position = 1; position <= checkString.length; position++) {
250
- var _a = this.segmenter.breaksAt(position, checkString), breaks = _a.breaks, matchingRule = _a.matchingRule;
251
- if (breaks) {
252
- var segment = checkString.substring(0, position);
253
- var index = this.lastSegmentIndex;
254
- this.lastSegmentIndex += position;
255
- return {
256
- done: false,
257
- value: createSegmentDataObject(this.segmenter, segment, index, this.input, matchingRule),
258
- };
259
- }
260
- }
261
- //no segment was found by the loop, therefore the segmentation is done
262
- return { done: true, value: undefined };
263
- };
264
- SegmentIterator.prototype.containing = function (positionInput) {
265
- if (typeof positionInput === 'bigint') {
266
- throw TypeError('Index must not be a BigInt');
267
- }
268
- var position = Number(positionInput);
269
- //https://tc39.es/ecma262/#sec-tointegerorinfinity
270
- // 2. If number is NaN, +0𝔽, or -0𝔽, return 0.
271
- if (isNaN(position) || !position) {
272
- position = 0;
273
- }
274
- // 5. Let integer be floor(abs(ℝ(number))).
275
- // 6. If number < -0𝔽, set integer to -integer.
276
- position = Math.floor(Math.abs(position)) * (position < 0 ? -1 : 1);
277
- if (position < 0 || position >= this.input.length) {
278
- return undefined;
279
- }
280
- //find previous break point
281
- var previousBreakPoint = 0;
282
- if (position === 0) {
283
- previousBreakPoint = 0;
284
- }
285
- else {
286
- var checkString_1 = this.input;
287
- for (var cursor = position; cursor >= 0; cursor--) {
288
- var breaks = this.segmenter.breaksAt(cursor, checkString_1).breaks;
289
- if (breaks) {
290
- previousBreakPoint = cursor;
291
- break;
292
- }
293
- }
294
- }
295
- var checkString = this.input.substring(previousBreakPoint);
296
- //find next break point
297
- for (var cursor = 1; cursor <= checkString.length; cursor++) {
298
- var _a = this.segmenter.breaksAt(cursor, checkString), breaks = _a.breaks, matchingRule = _a.matchingRule;
299
- if (breaks) {
300
- var segment = checkString.substring(0, cursor);
301
- return createSegmentDataObject(this.segmenter, segment, previousBreakPoint, this.input, matchingRule);
302
- }
303
- }
304
- };
305
- return SegmentIterator;
306
- }());
307
- var __INTERNAL_SLOT_MAP__ = new WeakMap();
236
+ class SegmentIterator {
237
+ segmenter;
238
+ lastSegmentIndex;
239
+ input;
240
+ constructor(segmenter, input) {
241
+ this.segmenter = segmenter;
242
+ this.lastSegmentIndex = 0;
243
+ if (typeof input == "symbol") {
244
+ throw TypeError(`Input must not be a symbol`);
245
+ }
246
+ this.input = String(input);
247
+ }
248
+ [Symbol.iterator]() {
249
+ return new SegmentIterator(this.segmenter, this.input);
250
+ }
251
+ next() {
252
+ //using only the relevant bit of the string
253
+ let checkString = this.input.substring(this.lastSegmentIndex);
254
+ //loop from the start of the checkString, until exactly length (breaksAt returns break at pos=== lenght)
255
+ for (let position = 1; position <= checkString.length; position++) {
256
+ const { breaks, matchingRule } = this.segmenter.breaksAt(position, checkString);
257
+ if (breaks) {
258
+ const segment = checkString.substring(0, position);
259
+ const index = this.lastSegmentIndex;
260
+ this.lastSegmentIndex += position;
261
+ return {
262
+ done: false,
263
+ value: createSegmentDataObject(this.segmenter, segment, index, this.input, matchingRule)
264
+ };
265
+ }
266
+ }
267
+ //no segment was found by the loop, therefore the segmentation is done
268
+ return {
269
+ done: true,
270
+ value: undefined
271
+ };
272
+ }
273
+ containing(positionInput) {
274
+ if (typeof positionInput === "bigint") {
275
+ throw TypeError("Index must not be a BigInt");
276
+ }
277
+ let position = Number(positionInput);
278
+ //https://tc39.es/ecma262/#sec-tointegerorinfinity
279
+ // 2. If number is NaN, +0𝔽, or -0𝔽, return 0.
280
+ if (isNaN(position) || !position) {
281
+ position = 0;
282
+ }
283
+ // 5. Let integer be floor(abs(ℝ(number))).
284
+ // 6. If number < -0𝔽, set integer to -integer.
285
+ position = Math.floor(Math.abs(position)) * (position < 0 ? -1 : 1);
286
+ if (position < 0 || position >= this.input.length) {
287
+ return undefined;
288
+ }
289
+ //find previous break point
290
+ let previousBreakPoint = 0;
291
+ if (position === 0) {
292
+ previousBreakPoint = 0;
293
+ } else {
294
+ const checkString = this.input;
295
+ for (let cursor = position; cursor >= 0; cursor--) {
296
+ const { breaks } = this.segmenter.breaksAt(cursor, checkString);
297
+ if (breaks) {
298
+ previousBreakPoint = cursor;
299
+ break;
300
+ }
301
+ }
302
+ }
303
+ let checkString = this.input.substring(previousBreakPoint);
304
+ //find next break point
305
+ for (let cursor = 1; cursor <= checkString.length; cursor++) {
306
+ const { breaks, matchingRule } = this.segmenter.breaksAt(cursor, checkString);
307
+ if (breaks) {
308
+ const segment = checkString.substring(0, cursor);
309
+ return createSegmentDataObject(this.segmenter, segment, previousBreakPoint, this.input, matchingRule);
310
+ }
311
+ }
312
+ }
313
+ }
314
+ const __INTERNAL_SLOT_MAP__ = new WeakMap();
308
315
  function getSlot(instance, key) {
309
- return getInternalSlot(__INTERNAL_SLOT_MAP__, instance, key);
316
+ return getInternalSlot(__INTERNAL_SLOT_MAP__, instance, key);
310
317
  }
311
318
  function setSlot(instance, key, value) {
312
- setInternalSlot(__INTERNAL_SLOT_MAP__, instance, key, value);
319
+ setInternalSlot(__INTERNAL_SLOT_MAP__, instance, key, value);
313
320
  }
314
321
  function checkReceiver(receiver, methodName) {
315
- if (!(receiver instanceof Segmenter)) {
316
- throw TypeError("Method Intl.Segmenter.prototype.".concat(methodName, " called on incompatible receiver"));
317
- }
322
+ if (!(receiver instanceof Segmenter)) {
323
+ throw TypeError(`Method Intl.Segmenter.prototype.${methodName} called on incompatible receiver`);
324
+ }
318
325
  }
319
326
  try {
320
- // IE11 does not have Symbol
321
- if (typeof Symbol !== 'undefined') {
322
- Object.defineProperty(Segmenter.prototype, Symbol.toStringTag, {
323
- value: 'Intl.Segmenter',
324
- writable: false,
325
- enumerable: false,
326
- configurable: true,
327
- });
328
- }
329
- //github.com/tc39/test262/blob/main/test/intl402/Segmenter/constructor/length.js
330
- Object.defineProperty(Segmenter.prototype.constructor, 'length', {
331
- value: 0,
332
- writable: false,
333
- enumerable: false,
334
- configurable: true,
335
- });
336
- // https://github.com/tc39/test262/blob/main/test/intl402/Segmenter/constructor/supportedLocalesOf/length.js
337
- Object.defineProperty(Segmenter.supportedLocalesOf, 'length', {
338
- value: 1,
339
- writable: false,
340
- enumerable: false,
341
- configurable: true,
342
- });
343
- }
344
- catch (_a) {
345
- // Meta fix so we're test262-compliant, not important
346
- }
327
+ // IE11 does not have Symbol
328
+ if (typeof Symbol !== "undefined") {
329
+ Object.defineProperty(Segmenter.prototype, Symbol.toStringTag, {
330
+ value: "Intl.Segmenter",
331
+ writable: false,
332
+ enumerable: false,
333
+ configurable: true
334
+ });
335
+ }
336
+ //github.com/tc39/test262/blob/main/test/intl402/Segmenter/constructor/length.js
337
+ Object.defineProperty(Segmenter.prototype.constructor, "length", {
338
+ value: 0,
339
+ writable: false,
340
+ enumerable: false,
341
+ configurable: true
342
+ });
343
+ // https://github.com/tc39/test262/blob/main/test/intl402/Segmenter/constructor/supportedLocalesOf/length.js
344
+ Object.defineProperty(Segmenter.supportedLocalesOf, "length", {
345
+ value: 1,
346
+ writable: false,
347
+ enumerable: false,
348
+ configurable: true
349
+ });
350
+ } catch {}