nv-string-foreach-byt 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +122 -0
- package/package.json +11 -0
package/index.js
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
|
|
2
|
+
const foreach_utf16le = (s, callback, si = 0, ei = s.length) => {
|
|
3
|
+
var byteIndex = 0; // 全局字节索引
|
|
4
|
+
for (var i = si; i < ei; ++i) {
|
|
5
|
+
var cd = s.charCodeAt(i);
|
|
6
|
+
// 低 8 位(LE 先发)
|
|
7
|
+
callback(cd & 0xFF, byteIndex, 0, i);
|
|
8
|
+
byteIndex++;
|
|
9
|
+
// 高 8 位
|
|
10
|
+
callback((cd >>> 8) & 0xFF, byteIndex, 1, i);
|
|
11
|
+
byteIndex++;
|
|
12
|
+
}
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
const foreach_utf16be = (s, callback, si = 0, ei = s.length) => {
|
|
17
|
+
var byteIndex = 0; // 全局字节索引
|
|
18
|
+
for (var i = si; i < ei; ++i) {
|
|
19
|
+
var cd = s.charCodeAt(i);
|
|
20
|
+
// 高 8 位(BE 先发)
|
|
21
|
+
callback((cd >>> 8) & 0xFF, byteIndex, 0, i);
|
|
22
|
+
byteIndex++;
|
|
23
|
+
// 低 8 位
|
|
24
|
+
callback(cd & 0xFF, byteIndex, 1, i);
|
|
25
|
+
byteIndex++;
|
|
26
|
+
}
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
const is_u16_hi_cd = (cd)=>(cd>=55296 && cd<=56319);
|
|
30
|
+
const is_u16_lo_cd = (cd)=>(cd>=56320 && cd<=57343);
|
|
31
|
+
const is_u16_hilo_cd = (cd)=>(cd>=55296 && cd<=57343);
|
|
32
|
+
|
|
33
|
+
const foreach_utf8 = (s, callback, si = 0, ei = s.length) => {
|
|
34
|
+
// 假定 [si,ei) 一定是完整字符串 : s.charCodeAt(si) 不是 lo , s.charCodeAt(ei-1) 不是hi
|
|
35
|
+
var byteIndex = 0; // 全局字节索引
|
|
36
|
+
var i = si;
|
|
37
|
+
while (i < ei) {
|
|
38
|
+
var cd = s.charCodeAt(i);
|
|
39
|
+
if (!is_u16_hi_cd(cd)) {
|
|
40
|
+
// 非代理对:单个 UTF-16 单元
|
|
41
|
+
if (cd < 128) {
|
|
42
|
+
// 1 字节 UTF-8
|
|
43
|
+
callback(cd, byteIndex, 0, i);
|
|
44
|
+
byteIndex++;
|
|
45
|
+
} else if (cd < 2048) {
|
|
46
|
+
// 2 字节 UTF-8
|
|
47
|
+
const b0 = 0xC0 | (cd >> 6);
|
|
48
|
+
const b1 = 0x80 | (cd & 0x3F);
|
|
49
|
+
callback(b0, byteIndex, 0, i);
|
|
50
|
+
byteIndex++;
|
|
51
|
+
callback(b1, byteIndex, 1, i);
|
|
52
|
+
byteIndex++;
|
|
53
|
+
} else {
|
|
54
|
+
// 3 字节 UTF-8
|
|
55
|
+
const b0 = 0xE0 | (cd >> 12);
|
|
56
|
+
const b1 = 0x80 | ((cd >> 6) & 0x3F);
|
|
57
|
+
const b2 = 0x80 | (cd & 0x3F);
|
|
58
|
+
callback(b0, byteIndex, 0, i);
|
|
59
|
+
byteIndex++;
|
|
60
|
+
callback(b1, byteIndex, 1, i);
|
|
61
|
+
byteIndex++;
|
|
62
|
+
callback(b2, byteIndex, 2, i);
|
|
63
|
+
byteIndex++;
|
|
64
|
+
}
|
|
65
|
+
++i;
|
|
66
|
+
} else {
|
|
67
|
+
// 代理对:hi + lo → 4 字节 UTF-8
|
|
68
|
+
var lo = s.charCodeAt(i + 1);
|
|
69
|
+
var codePoint = 0x10000 + ((cd - 0xD800) << 10) + (lo - 0xDC00);
|
|
70
|
+
const b0 = 0xF0 | (codePoint >> 18);
|
|
71
|
+
const b1 = 0x80 | ((codePoint >> 12) & 0x3F);
|
|
72
|
+
const b2 = 0x80 | ((codePoint >> 6) & 0x3F);
|
|
73
|
+
const b3 = 0x80 | (codePoint & 0x3F);
|
|
74
|
+
callback(b0, byteIndex, 0, i);
|
|
75
|
+
byteIndex++;
|
|
76
|
+
callback(b1, byteIndex, 1, i);
|
|
77
|
+
byteIndex++;
|
|
78
|
+
callback(b2, byteIndex, 2, i);
|
|
79
|
+
byteIndex++;
|
|
80
|
+
callback(b3, byteIndex, 3, i);
|
|
81
|
+
byteIndex++;
|
|
82
|
+
i += 2;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
const fmt_for_utf8 = (s) => {
|
|
88
|
+
let bfr = "";
|
|
89
|
+
let aft = "";
|
|
90
|
+
|
|
91
|
+
// 处理开头
|
|
92
|
+
if (s.length > 0) {
|
|
93
|
+
const firstCd = s.charCodeAt(0);
|
|
94
|
+
if (is_u16_lo_cd(firstCd)) {
|
|
95
|
+
// 开头是 lo(半个代理对的后半部分),需要保留在 bfr
|
|
96
|
+
bfr = s.slice(0, 1);
|
|
97
|
+
}
|
|
98
|
+
// 否则 bfr = ""
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// 处理结尾
|
|
102
|
+
if (s.length > 0) {
|
|
103
|
+
const lastCd = s.charCodeAt(s.length - 1);
|
|
104
|
+
if (is_u16_hi_cd(lastCd)) {
|
|
105
|
+
// 结尾是 hi(半个代理对的前半部分),需要保留在 aft
|
|
106
|
+
aft = s.slice(s.length - 1);
|
|
107
|
+
}
|
|
108
|
+
// 否则 aft = ""
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// 中间部分
|
|
112
|
+
const mid = s.slice(bfr.length, s.length - aft.length);
|
|
113
|
+
|
|
114
|
+
return { bfr, mid, aft };
|
|
115
|
+
};
|
|
116
|
+
|
|
117
|
+
module.exports = {
|
|
118
|
+
fmt_for_utf8,
|
|
119
|
+
foreach_utf8,
|
|
120
|
+
foreach_utf16le,
|
|
121
|
+
foreach_utf16be,
|
|
122
|
+
}
|