@marijn/find-cluster-break 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +85 -0
- package/package.json +4 -4
- package/src/index.js +1 -1
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// These are filled with ranges (rangeFrom[i] up to but not including
|
|
4
|
+
// rangeTo[i]) of code points that count as extending characters.
|
|
5
|
+
let rangeFrom = [], rangeTo = []
|
|
6
|
+
|
|
7
|
+
;(() => {
|
|
8
|
+
// Compressed representation of the Grapheme_Cluster_Break=Extend
|
|
9
|
+
// information from
|
|
10
|
+
// http://www.unicode.org/Public/16.0.0/ucd/auxiliary/GraphemeBreakProperty.txt.
|
|
11
|
+
// Each pair of elements represents a range, as an offet from the
|
|
12
|
+
// previous range and a length. Numbers are in base-36, with the empty
|
|
13
|
+
// string being a shorthand for 1.
|
|
14
|
+
let numbers = "lc,34,7n,7,7b,19,,,,2,,2,,,20,b,1c,l,g,,2t,7,2,6,2,2,,4,z,,u,r,2j,b,1m,9,9,,o,4,,9,,3,,5,17,3,3b,f,,w,1j,,,,4,8,4,,3,7,a,2,t,,1m,,,,2,4,8,,9,,a,2,q,,2,2,1l,,4,2,4,2,2,3,3,,u,2,3,,b,2,1l,,4,5,,2,4,,k,2,m,6,,,1m,,,2,,4,8,,7,3,a,2,u,,1n,,,,c,,9,,14,,3,,1l,3,5,3,,4,7,2,b,2,t,,1m,,2,,2,,3,,5,2,7,2,b,2,s,2,1l,2,,,2,4,8,,9,,a,2,t,,20,,4,,2,3,,,8,,29,,2,7,c,8,2q,,2,9,b,6,22,2,r,,,,,,1j,e,,5,,2,5,b,,10,9,,2u,4,,6,,2,2,2,p,2,4,3,g,4,d,,2,2,6,,f,,jj,3,qa,3,t,3,t,2,u,2,1s,2,,7,8,,2,b,9,,19,3,3b,2,y,,3a,3,4,2,9,,6,3,63,2,2,,1m,,,7,,,,,2,8,6,a,2,,1c,h,1r,4,1c,7,,,5,,14,9,c,2,w,4,2,2,,3,1k,,,2,3,,,3,1m,8,2,2,48,3,,d,,7,4,,6,,3,2,5i,1m,,5,ek,,5f,x,2da,3,3x,,2o,w,fe,6,2x,2,n9w,4,,a,w,2,28,2,7k,,3,,4,,p,2,5,,47,2,q,i,d,,12,8,p,b,1a,3,1c,,2,4,2,2,13,,1v,6,2,2,2,2,c,,8,,1b,,1f,,,3,2,2,5,2,,,16,2,8,,6m,,2,,4,,fn4,,kh,g,g,g,a6,2,gt,,6a,,45,5,1ae,3,,2,5,4,14,3,4,,4l,2,fx,4,ar,2,49,b,4w,,1i,f,1k,3,1d,4,2,2,1x,3,10,5,,8,1q,,c,2,1g,9,a,4,2,,2n,3,2,,,2,6,,4g,,3,8,l,2,1l,2,,,,,m,,e,7,3,5,5f,8,2,3,,,n,,29,,2,6,,,2,,,2,,2,6j,,2,4,6,2,,2,r,2,2d,8,2,,,2,2y,,,,2,6,,,2t,3,2,4,,5,77,9,,2,6t,,a,2,,,4,,40,4,2,2,4,,w,a,14,6,2,4,8,,9,6,2,3,1a,d,,2,ba,7,,6,,,2a,m,2,7,,2,,2,3e,6,3,,,2,,7,,,20,2,3,,,,9n,2,f0b,5,1n,7,t4,,1r,4,29,,f5k,2,43q,,,3,4,5,8,8,2,7,u,4,44,3,1iz,1j,4,1e,8,,e,,m,5,,f,11s,7,,h,2,7,,2,,5,79,7,c5,4,15s,7,31,7,240,5,gx7k,2o,3k,6o".split(",").map(s => s ? parseInt(s, 36) : 1);
|
|
15
|
+
for (let i = 0, n = 0; i < numbers.length; i++)
|
|
16
|
+
(i % 2 ? rangeTo : rangeFrom).push(n = n + numbers[i]);
|
|
17
|
+
})();
|
|
18
|
+
|
|
19
|
+
function isExtendingChar(code) {
|
|
20
|
+
if (code < 768) return false
|
|
21
|
+
for (let from = 0, to = rangeFrom.length;;) {
|
|
22
|
+
let mid = (from + to) >> 1;
|
|
23
|
+
if (code < rangeFrom[mid]) to = mid;
|
|
24
|
+
else if (code >= rangeTo[mid]) from = mid + 1;
|
|
25
|
+
else return true
|
|
26
|
+
if (from == to) return false
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function isRegionalIndicator(code) {
|
|
31
|
+
return code >= 0x1F1E6 && code <= 0x1F1FF
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
const ZWJ = 0x200d;
|
|
35
|
+
|
|
36
|
+
function findClusterBreak(str, pos, forward = true, includeExtending = true) {
|
|
37
|
+
return (forward ? nextClusterBreak : prevClusterBreak)(str, pos, includeExtending)
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function nextClusterBreak(str, pos, includeExtending) {
|
|
41
|
+
if (pos == str.length) return pos
|
|
42
|
+
// If pos is in the middle of a surrogate pair, move to its start
|
|
43
|
+
if (pos && surrogateLow(str.charCodeAt(pos)) && surrogateHigh(str.charCodeAt(pos - 1))) pos--;
|
|
44
|
+
let prev = codePointAt(str, pos);
|
|
45
|
+
pos += codePointSize(prev);
|
|
46
|
+
while (pos < str.length) {
|
|
47
|
+
let next = codePointAt(str, pos);
|
|
48
|
+
if (prev == ZWJ || next == ZWJ || includeExtending && isExtendingChar(next)) {
|
|
49
|
+
pos += codePointSize(next);
|
|
50
|
+
prev = next;
|
|
51
|
+
} else if (isRegionalIndicator(next)) {
|
|
52
|
+
let countBefore = 0, i = pos - 2;
|
|
53
|
+
while (i >= 0 && isRegionalIndicator(codePointAt(str, i))) { countBefore++; i -= 2; }
|
|
54
|
+
if (countBefore % 2 == 0) break
|
|
55
|
+
else pos += 2;
|
|
56
|
+
} else {
|
|
57
|
+
break
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
return pos
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function prevClusterBreak(str, pos, includeExtending) {
|
|
64
|
+
while (pos > 1) {
|
|
65
|
+
let found = nextClusterBreak(str, pos - 2, includeExtending);
|
|
66
|
+
if (found < pos) return found
|
|
67
|
+
pos--;
|
|
68
|
+
}
|
|
69
|
+
return 0
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function codePointAt(str, pos) {
|
|
73
|
+
let code0 = str.charCodeAt(pos);
|
|
74
|
+
if (!surrogateHigh(code0) || pos + 1 == str.length) return code0
|
|
75
|
+
let code1 = str.charCodeAt(pos + 1);
|
|
76
|
+
if (!surrogateLow(code1)) return code0
|
|
77
|
+
return ((code0 - 0xd800) << 10) + (code1 - 0xdc00) + 0x10000
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function surrogateLow(ch) { return ch >= 0xDC00 && ch < 0xE000 }
|
|
81
|
+
function surrogateHigh(ch) { return ch >= 0xD800 && ch < 0xDC00 }
|
|
82
|
+
function codePointSize(code) { return code < 0x10000 ? 1 : 2 }
|
|
83
|
+
|
|
84
|
+
exports.findClusterBreak = findClusterBreak;
|
|
85
|
+
exports.isExtendingChar = isExtendingChar;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@marijn/find-cluster-break",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.3",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Find the position of grapheme cluster breaks in a string",
|
|
6
6
|
"main": "src/index.js",
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
},
|
|
15
15
|
"repository": {
|
|
16
16
|
"type": "git",
|
|
17
|
-
"url": "git+https://
|
|
17
|
+
"url": "git+https://code.haverbeke.berlin/marijn/find-cluster-break.git"
|
|
18
18
|
},
|
|
19
19
|
"keywords": [
|
|
20
20
|
"unicode",
|
|
@@ -25,9 +25,9 @@
|
|
|
25
25
|
"author": "Marijn Haverbeke <marijn@haverbeke.berlin>",
|
|
26
26
|
"license": "MIT",
|
|
27
27
|
"bugs": {
|
|
28
|
-
"url": "https://
|
|
28
|
+
"url": "https://code.haverbeke.berlin/marijn/find-cluster-break/issues"
|
|
29
29
|
},
|
|
30
|
-
"homepage": "https://
|
|
30
|
+
"homepage": "https://code.haverbeke.berlin/marijn/find-cluster-break",
|
|
31
31
|
"devDependencies": {
|
|
32
32
|
"mocha": "^10.7.3",
|
|
33
33
|
"rollup": "^4.28.1"
|
package/src/index.js
CHANGED
|
@@ -66,7 +66,7 @@ function nextClusterBreak(str, pos, includeExtending) {
|
|
|
66
66
|
}
|
|
67
67
|
|
|
68
68
|
function prevClusterBreak(str, pos, includeExtending) {
|
|
69
|
-
while (pos >
|
|
69
|
+
while (pos > 1) {
|
|
70
70
|
let found = nextClusterBreak(str, pos - 2, includeExtending)
|
|
71
71
|
if (found < pos) return found
|
|
72
72
|
pos--
|