@formatjs/intl-segmenter 11.7.12 → 12.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +9 -5
- package/polyfill-force.js +2 -4
- package/polyfill.iife.js +38 -40
- package/polyfill.js +4 -6
- package/should-polyfill.js +1 -4
- package/src/cldr-segmentation-rules.generated.js +1 -4
- package/src/segmentation-utils.js +2 -7
- package/src/segmenter.js +25 -28
- package/test262-main.d.ts +1 -1
- package/test262-main.js +1 -3
- package/lib/polyfill-force.d.ts +0 -1
- package/lib/polyfill-force.js +0 -7
- package/lib/polyfill.d.ts +0 -1
- package/lib/polyfill.js +0 -10
- package/lib/should-polyfill.d.ts +0 -1
- package/lib/should-polyfill.js +0 -3
- package/lib/src/cldr-segmentation-rules.generated.d.ts +0 -384
- package/lib/src/cldr-segmentation-rules.generated.js +0 -1266
- package/lib/src/segmentation-utils.d.ts +0 -2
- package/lib/src/segmentation-utils.js +0 -32
- package/lib/src/segmenter.d.ts +0 -55
- package/lib/src/segmenter.js +0 -270
- package/lib/test262-main.d.ts +0 -1
- package/lib/test262-main.js +0 -3
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
export var replaceVariables = function (variables, input) {
|
|
2
|
-
var findVarRegex = /\$[A-Za-z0-9_]+/gm;
|
|
3
|
-
return input.replaceAll(findVarRegex, function (match) {
|
|
4
|
-
if (!(match in variables)) {
|
|
5
|
-
throw new Error("No such variable ".concat(match));
|
|
6
|
-
}
|
|
7
|
-
return variables[match];
|
|
8
|
-
});
|
|
9
|
-
};
|
|
10
|
-
export var isSurrogate = function (str, pos) {
|
|
11
|
-
return (0xd800 <= str.charCodeAt(pos - 1) &&
|
|
12
|
-
str.charCodeAt(pos - 1) <= 0xdbff &&
|
|
13
|
-
0xdc00 <= str.charCodeAt(pos) &&
|
|
14
|
-
str.charCodeAt(pos) <= 0xdfff);
|
|
15
|
-
};
|
|
16
|
-
// alternative surrogate check mimicking the java implementation
|
|
17
|
-
// const TRAIL_SURROGATE_BITMASK = 0xfffffc00
|
|
18
|
-
// const TRAIL_SURROGATE_BITS = 0xdc00
|
|
19
|
-
// const LEAD_SURROGATE_BITMASK = 0xfffffc00
|
|
20
|
-
// const LEAD_SURROGATE_BITS = 0xd800
|
|
21
|
-
// const isSurrogate = (text: string, position: number) => {
|
|
22
|
-
// if (
|
|
23
|
-
// (text.charCodeAt(position - 1) & LEAD_SURROGATE_BITMASK) ==
|
|
24
|
-
// LEAD_SURROGATE_BITS &&
|
|
25
|
-
// (text.charCodeAt(position) & TRAIL_SURROGATE_BITMASK) ==
|
|
26
|
-
// TRAIL_SURROGATE_BITS
|
|
27
|
-
// ) {
|
|
28
|
-
// return true
|
|
29
|
-
// } else {
|
|
30
|
-
// return false
|
|
31
|
-
// }
|
|
32
|
-
// }
|
package/lib/src/segmenter.d.ts
DELETED
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
type SegmentResult = {
|
|
2
|
-
segment: string;
|
|
3
|
-
breakingRule?: string;
|
|
4
|
-
nonBreakingRules?: string[];
|
|
5
|
-
} | undefined;
|
|
6
|
-
export interface SegmenterOptions {
|
|
7
|
-
localeMatcher?: 'lookup' | 'best fit';
|
|
8
|
-
granularity?: 'word' | 'sentence' | 'grapheme';
|
|
9
|
-
}
|
|
10
|
-
export interface SegmenterResolvedOptions {
|
|
11
|
-
locale: string;
|
|
12
|
-
granularity: NonNullable<SegmenterOptions['granularity']>;
|
|
13
|
-
}
|
|
14
|
-
declare const breaksAtResult: (breaks: boolean, matchingRule: string) => {
|
|
15
|
-
breaks: boolean;
|
|
16
|
-
matchingRule: string;
|
|
17
|
-
};
|
|
18
|
-
export declare class Segmenter {
|
|
19
|
-
private readonly rules;
|
|
20
|
-
private readonly ruleSortedKeys;
|
|
21
|
-
private readonly mergedSegmentationTypeValue;
|
|
22
|
-
constructor(locales: string | string[] | undefined, options: SegmenterOptions);
|
|
23
|
-
breaksAt(position: number, input: string): ReturnType<typeof breaksAtResult>;
|
|
24
|
-
segment(input: string): SegmentIterator;
|
|
25
|
-
resolvedOptions(): SegmenterResolvedOptions;
|
|
26
|
-
static availableLocales: Set<string>;
|
|
27
|
-
static supportedLocalesOf(locales?: string | string[], options?: Pick<SegmenterOptions, 'localeMatcher'>): string[];
|
|
28
|
-
static readonly polyfilled = true;
|
|
29
|
-
}
|
|
30
|
-
declare class SegmentIterator implements Iterable<SegmentResult>, Iterator<SegmentResult> {
|
|
31
|
-
private readonly segmenter;
|
|
32
|
-
private lastSegmentIndex;
|
|
33
|
-
private input;
|
|
34
|
-
constructor(segmenter: Segmenter, input: string);
|
|
35
|
-
[Symbol.iterator](): SegmentIterator;
|
|
36
|
-
next(): {
|
|
37
|
-
done: boolean;
|
|
38
|
-
value: {
|
|
39
|
-
segment: string;
|
|
40
|
-
index: number;
|
|
41
|
-
input: string;
|
|
42
|
-
isWordLike?: boolean;
|
|
43
|
-
};
|
|
44
|
-
} | {
|
|
45
|
-
done: boolean;
|
|
46
|
-
value: undefined;
|
|
47
|
-
};
|
|
48
|
-
containing(positionInput: number): {
|
|
49
|
-
segment: string;
|
|
50
|
-
index: number;
|
|
51
|
-
input: string;
|
|
52
|
-
isWordLike?: boolean;
|
|
53
|
-
} | undefined;
|
|
54
|
-
}
|
|
55
|
-
export type { SegmentIterator };
|
package/lib/src/segmenter.js
DELETED
|
@@ -1,270 +0,0 @@
|
|
|
1
|
-
import { __assign, __spreadArray } from "tslib";
|
|
2
|
-
import { CanonicalizeLocaleList, GetOption, GetOptionsObject, SupportedLocales, getInternalSlot, getMultiInternalSlots, setInternalSlot, } from '@formatjs/ecma402-abstract';
|
|
3
|
-
import { ResolveLocale } from '@formatjs/intl-localematcher';
|
|
4
|
-
import { SegmentationRules } from './cldr-segmentation-rules.generated';
|
|
5
|
-
import { isSurrogate, replaceVariables } from './segmentation-utils';
|
|
6
|
-
/**
|
|
7
|
-
* Adds $ to before rules and ^ to after rules for strictness
|
|
8
|
-
* Replaces variables
|
|
9
|
-
* Initializes the RegExp
|
|
10
|
-
*
|
|
11
|
-
* @param rule raw rule string from cldr-segmentation-rules.generated
|
|
12
|
-
* @param variables
|
|
13
|
-
* @param after appends ^ if true and $ if false
|
|
14
|
-
* @returns
|
|
15
|
-
*/
|
|
16
|
-
var generateRuleRegex = function (rule, variables, after) {
|
|
17
|
-
return new RegExp("".concat(after ? '^' : '').concat(replaceVariables(variables, rule)).concat(after ? '' : '$'));
|
|
18
|
-
};
|
|
19
|
-
var prepareLocaleSegmentationRules = function (segmentationTypeValue) {
|
|
20
|
-
var preparedRules = {};
|
|
21
|
-
for (var _i = 0, _a = Object.keys(segmentationTypeValue.segmentRules); _i < _a.length; _i++) {
|
|
22
|
-
var ruleNr = _a[_i];
|
|
23
|
-
var ruleValue = segmentationTypeValue.segmentRules[ruleNr];
|
|
24
|
-
var preparedRule = {
|
|
25
|
-
breaks: ruleValue.breaks,
|
|
26
|
-
};
|
|
27
|
-
if ('before' in ruleValue && ruleValue.before) {
|
|
28
|
-
preparedRule.before = generateRuleRegex(ruleValue.before, segmentationTypeValue.variables, false);
|
|
29
|
-
}
|
|
30
|
-
if ('after' in ruleValue && ruleValue.after) {
|
|
31
|
-
preparedRule.after = generateRuleRegex(ruleValue.after, segmentationTypeValue.variables, true);
|
|
32
|
-
}
|
|
33
|
-
preparedRules[ruleNr] = preparedRule;
|
|
34
|
-
}
|
|
35
|
-
return preparedRules;
|
|
36
|
-
};
|
|
37
|
-
var breaksAtResult = function (breaks, matchingRule) { return ({
|
|
38
|
-
breaks: breaks,
|
|
39
|
-
matchingRule: matchingRule,
|
|
40
|
-
}); };
|
|
41
|
-
var Segmenter = /** @class */ (function () {
|
|
42
|
-
function Segmenter(locales, options) {
|
|
43
|
-
var _newTarget = this.constructor;
|
|
44
|
-
if (_newTarget === undefined) {
|
|
45
|
-
throw TypeError("Constructor Intl.Segmenter requires 'new'");
|
|
46
|
-
}
|
|
47
|
-
var requestedLocales = CanonicalizeLocaleList(locales);
|
|
48
|
-
options = GetOptionsObject(options);
|
|
49
|
-
var opt = Object.create(null);
|
|
50
|
-
var matcher = GetOption(options, 'localeMatcher', 'string', ['lookup', 'best fit'], 'best fit');
|
|
51
|
-
opt.localeMatcher = matcher;
|
|
52
|
-
var granularity = GetOption(options, 'granularity', 'string', ['word', 'sentence', 'grapheme'], 'grapheme');
|
|
53
|
-
setSlot(this, 'granularity', granularity);
|
|
54
|
-
//TODO: figure out correct availible locales
|
|
55
|
-
var r = ResolveLocale(Segmenter.availableLocales, //availible locales
|
|
56
|
-
requestedLocales, opt, [], // there is no relevantExtensionKeys
|
|
57
|
-
{}, function () { return ''; } //use only root rules
|
|
58
|
-
);
|
|
59
|
-
setSlot(this, 'locale', r.locale);
|
|
60
|
-
//root rules based on granularity
|
|
61
|
-
this.mergedSegmentationTypeValue = SegmentationRules.root[granularity];
|
|
62
|
-
//merge root rules with locale ones if locale is specified
|
|
63
|
-
if (r.locale.length) {
|
|
64
|
-
var localeOverrides = SegmentationRules[r.locale];
|
|
65
|
-
if (granularity in localeOverrides) {
|
|
66
|
-
var localeSegmentationTypeValue = localeOverrides[granularity];
|
|
67
|
-
this.mergedSegmentationTypeValue.variables = __assign(__assign({}, this.mergedSegmentationTypeValue.variables), localeSegmentationTypeValue.variables);
|
|
68
|
-
this.mergedSegmentationTypeValue.segmentRules = __assign(__assign({}, this.mergedSegmentationTypeValue.segmentRules), localeSegmentationTypeValue.segmentRules);
|
|
69
|
-
this.mergedSegmentationTypeValue.suppressions = __spreadArray(__spreadArray([], this.mergedSegmentationTypeValue.suppressions, true), localeSegmentationTypeValue.suppressions, true);
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
//prepare rules
|
|
73
|
-
this.rules = prepareLocaleSegmentationRules(this.mergedSegmentationTypeValue);
|
|
74
|
-
//order rule keys
|
|
75
|
-
this.ruleSortedKeys = Object.keys(this.rules).sort(function (a, b) { return Number(a) - Number(b); });
|
|
76
|
-
}
|
|
77
|
-
Segmenter.prototype.breaksAt = function (position, input) {
|
|
78
|
-
var ruleSortedKeys = this.ruleSortedKeys;
|
|
79
|
-
var rules = this.rules;
|
|
80
|
-
var mergedSegmentationTypeValue = this.mergedSegmentationTypeValue;
|
|
81
|
-
//artificial rule 0.2
|
|
82
|
-
if (position === 0) {
|
|
83
|
-
return breaksAtResult(true, '0.2');
|
|
84
|
-
}
|
|
85
|
-
if (position === input.length) {
|
|
86
|
-
//rule 0.3
|
|
87
|
-
return breaksAtResult(true, '0.3');
|
|
88
|
-
}
|
|
89
|
-
//artificial rule 0.1: js specific, due to es5 regex not being unicode aware
|
|
90
|
-
//number 0.1 chosen to mimic java implementation, but needs to execute after 0.2 and 0.3 to be inside the string bounds
|
|
91
|
-
if (isSurrogate(input, position)) {
|
|
92
|
-
return breaksAtResult(false, '0.1');
|
|
93
|
-
}
|
|
94
|
-
var stringBeforeBreak = input.substring(0, position);
|
|
95
|
-
var stringAfterBreak = input.substring(position);
|
|
96
|
-
//artificial rule 0.4: handle suppressions
|
|
97
|
-
if ('suppressions' in mergedSegmentationTypeValue) {
|
|
98
|
-
for (var _i = 0, _a = mergedSegmentationTypeValue.suppressions; _i < _a.length; _i++) {
|
|
99
|
-
var suppressions = _a[_i];
|
|
100
|
-
if (stringBeforeBreak.trim().endsWith(suppressions)) {
|
|
101
|
-
return breaksAtResult(false, '0.4');
|
|
102
|
-
}
|
|
103
|
-
}
|
|
104
|
-
}
|
|
105
|
-
// loop through rules and find a match
|
|
106
|
-
for (var _b = 0, ruleSortedKeys_1 = ruleSortedKeys; _b < ruleSortedKeys_1.length; _b++) {
|
|
107
|
-
var ruleKey = ruleSortedKeys_1[_b];
|
|
108
|
-
var _c = rules[ruleKey], before = _c.before, after = _c.after, breaks = _c.breaks;
|
|
109
|
-
// for debugging
|
|
110
|
-
// if (ruleKey === '16' && position === 4) {
|
|
111
|
-
// console.log({before, after, stringBeforeBreak, stringAfterBreak})
|
|
112
|
-
// }
|
|
113
|
-
if (before) {
|
|
114
|
-
if (!before.test(stringBeforeBreak)) {
|
|
115
|
-
//didn't match the before part, therfore skipping
|
|
116
|
-
continue;
|
|
117
|
-
}
|
|
118
|
-
}
|
|
119
|
-
if (after) {
|
|
120
|
-
if (!after.test(stringAfterBreak)) {
|
|
121
|
-
//didn't match the after part, therfore skipping
|
|
122
|
-
continue;
|
|
123
|
-
}
|
|
124
|
-
}
|
|
125
|
-
return breaksAtResult(breaks, ruleKey);
|
|
126
|
-
}
|
|
127
|
-
//artificial rule 999: if no rule matched is Any ÷ Any so return true
|
|
128
|
-
return breaksAtResult(true, '999');
|
|
129
|
-
};
|
|
130
|
-
Segmenter.prototype.segment = function (input) {
|
|
131
|
-
checkReceiver(this, 'segment');
|
|
132
|
-
return new SegmentIterator(this, input);
|
|
133
|
-
};
|
|
134
|
-
Segmenter.prototype.resolvedOptions = function () {
|
|
135
|
-
checkReceiver(this, 'resolvedOptions');
|
|
136
|
-
return __assign({}, getMultiInternalSlots(__INTERNAL_SLOT_MAP__, this, 'locale', 'granularity'));
|
|
137
|
-
};
|
|
138
|
-
Segmenter.supportedLocalesOf = function (locales, options) {
|
|
139
|
-
return SupportedLocales(Segmenter.availableLocales, CanonicalizeLocaleList(locales), options);
|
|
140
|
-
};
|
|
141
|
-
Segmenter.availableLocales = new Set(Object.keys(SegmentationRules).filter(function (key) { return key !== 'root'; }));
|
|
142
|
-
Segmenter.polyfilled = true;
|
|
143
|
-
return Segmenter;
|
|
144
|
-
}());
|
|
145
|
-
export { Segmenter };
|
|
146
|
-
var createSegmentDataObject = function (segmenter, segment, index, input, matchingRule) {
|
|
147
|
-
var returnValue = {
|
|
148
|
-
segment: segment,
|
|
149
|
-
index: index,
|
|
150
|
-
input: input,
|
|
151
|
-
};
|
|
152
|
-
if (getSlot(segmenter, 'granularity') === 'word') {
|
|
153
|
-
returnValue.isWordLike = matchingRule !== '3.1' && matchingRule !== '3.2';
|
|
154
|
-
}
|
|
155
|
-
return returnValue;
|
|
156
|
-
};
|
|
157
|
-
var SegmentIterator = /** @class */ (function () {
|
|
158
|
-
function SegmentIterator(segmenter, input) {
|
|
159
|
-
this.segmenter = segmenter;
|
|
160
|
-
this.lastSegmentIndex = 0;
|
|
161
|
-
if (typeof input == 'symbol') {
|
|
162
|
-
throw TypeError("Input must not be a symbol");
|
|
163
|
-
}
|
|
164
|
-
this.input = String(input);
|
|
165
|
-
}
|
|
166
|
-
SegmentIterator.prototype[Symbol.iterator] = function () {
|
|
167
|
-
return new SegmentIterator(this.segmenter, this.input);
|
|
168
|
-
};
|
|
169
|
-
SegmentIterator.prototype.next = function () {
|
|
170
|
-
//using only the relevant bit of the string
|
|
171
|
-
var checkString = this.input.substring(this.lastSegmentIndex);
|
|
172
|
-
//loop from the start of the checkString, until exactly length (breaksAt returns break at pos=== lenght)
|
|
173
|
-
for (var position = 1; position <= checkString.length; position++) {
|
|
174
|
-
var _a = this.segmenter.breaksAt(position, checkString), breaks = _a.breaks, matchingRule = _a.matchingRule;
|
|
175
|
-
if (breaks) {
|
|
176
|
-
var segment = checkString.substring(0, position);
|
|
177
|
-
var index = this.lastSegmentIndex;
|
|
178
|
-
this.lastSegmentIndex += position;
|
|
179
|
-
return {
|
|
180
|
-
done: false,
|
|
181
|
-
value: createSegmentDataObject(this.segmenter, segment, index, this.input, matchingRule),
|
|
182
|
-
};
|
|
183
|
-
}
|
|
184
|
-
}
|
|
185
|
-
//no segment was found by the loop, therefore the segmentation is done
|
|
186
|
-
return { done: true, value: undefined };
|
|
187
|
-
};
|
|
188
|
-
SegmentIterator.prototype.containing = function (positionInput) {
|
|
189
|
-
if (typeof positionInput === 'bigint') {
|
|
190
|
-
throw TypeError('Index must not be a BigInt');
|
|
191
|
-
}
|
|
192
|
-
var position = Number(positionInput);
|
|
193
|
-
//https://tc39.es/ecma262/#sec-tointegerorinfinity
|
|
194
|
-
// 2. If number is NaN, +0𝔽, or -0𝔽, return 0.
|
|
195
|
-
if (isNaN(position) || !position) {
|
|
196
|
-
position = 0;
|
|
197
|
-
}
|
|
198
|
-
// 5. Let integer be floor(abs(ℝ(number))).
|
|
199
|
-
// 6. If number < -0𝔽, set integer to -integer.
|
|
200
|
-
position = Math.floor(Math.abs(position)) * (position < 0 ? -1 : 1);
|
|
201
|
-
if (position < 0 || position >= this.input.length) {
|
|
202
|
-
return undefined;
|
|
203
|
-
}
|
|
204
|
-
//find previous break point
|
|
205
|
-
var previousBreakPoint = 0;
|
|
206
|
-
if (position === 0) {
|
|
207
|
-
previousBreakPoint = 0;
|
|
208
|
-
}
|
|
209
|
-
else {
|
|
210
|
-
var checkString_1 = this.input;
|
|
211
|
-
for (var cursor = position; cursor >= 0; cursor--) {
|
|
212
|
-
var breaks = this.segmenter.breaksAt(cursor, checkString_1).breaks;
|
|
213
|
-
if (breaks) {
|
|
214
|
-
previousBreakPoint = cursor;
|
|
215
|
-
break;
|
|
216
|
-
}
|
|
217
|
-
}
|
|
218
|
-
}
|
|
219
|
-
var checkString = this.input.substring(previousBreakPoint);
|
|
220
|
-
//find next break point
|
|
221
|
-
for (var cursor = 1; cursor <= checkString.length; cursor++) {
|
|
222
|
-
var _a = this.segmenter.breaksAt(cursor, checkString), breaks = _a.breaks, matchingRule = _a.matchingRule;
|
|
223
|
-
if (breaks) {
|
|
224
|
-
var segment = checkString.substring(0, cursor);
|
|
225
|
-
return createSegmentDataObject(this.segmenter, segment, previousBreakPoint, this.input, matchingRule);
|
|
226
|
-
}
|
|
227
|
-
}
|
|
228
|
-
};
|
|
229
|
-
return SegmentIterator;
|
|
230
|
-
}());
|
|
231
|
-
var __INTERNAL_SLOT_MAP__ = new WeakMap();
|
|
232
|
-
function getSlot(instance, key) {
|
|
233
|
-
return getInternalSlot(__INTERNAL_SLOT_MAP__, instance, key);
|
|
234
|
-
}
|
|
235
|
-
function setSlot(instance, key, value) {
|
|
236
|
-
setInternalSlot(__INTERNAL_SLOT_MAP__, instance, key, value);
|
|
237
|
-
}
|
|
238
|
-
function checkReceiver(receiver, methodName) {
|
|
239
|
-
if (!(receiver instanceof Segmenter)) {
|
|
240
|
-
throw TypeError("Method Intl.Segmenter.prototype.".concat(methodName, " called on incompatible receiver"));
|
|
241
|
-
}
|
|
242
|
-
}
|
|
243
|
-
try {
|
|
244
|
-
// IE11 does not have Symbol
|
|
245
|
-
if (typeof Symbol !== 'undefined') {
|
|
246
|
-
Object.defineProperty(Segmenter.prototype, Symbol.toStringTag, {
|
|
247
|
-
value: 'Intl.Segmenter',
|
|
248
|
-
writable: false,
|
|
249
|
-
enumerable: false,
|
|
250
|
-
configurable: true,
|
|
251
|
-
});
|
|
252
|
-
}
|
|
253
|
-
//github.com/tc39/test262/blob/main/test/intl402/Segmenter/constructor/length.js
|
|
254
|
-
https: Object.defineProperty(Segmenter.prototype.constructor, 'length', {
|
|
255
|
-
value: 0,
|
|
256
|
-
writable: false,
|
|
257
|
-
enumerable: false,
|
|
258
|
-
configurable: true,
|
|
259
|
-
});
|
|
260
|
-
// https://github.com/tc39/test262/blob/main/test/intl402/Segmenter/constructor/supportedLocalesOf/length.js
|
|
261
|
-
Object.defineProperty(Segmenter.supportedLocalesOf, 'length', {
|
|
262
|
-
value: 1,
|
|
263
|
-
writable: false,
|
|
264
|
-
enumerable: false,
|
|
265
|
-
configurable: true,
|
|
266
|
-
});
|
|
267
|
-
}
|
|
268
|
-
catch (e) {
|
|
269
|
-
// Meta fix so we're test262-compliant, not important
|
|
270
|
-
}
|
package/lib/test262-main.d.ts
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
import './polyfill-force';
|
package/lib/test262-main.js
DELETED