nv-string-foreach-byt 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/index.js +122 -0
  2. package/package.json +11 -0
package/index.js ADDED
@@ -0,0 +1,122 @@
1
+
2
+ const foreach_utf16le = (s, callback, si = 0, ei = s.length) => {
3
+ var byteIndex = 0; // 全局字节索引
4
+ for (var i = si; i < ei; ++i) {
5
+ var cd = s.charCodeAt(i);
6
+ // 低 8 位(LE 先发)
7
+ callback(cd & 0xFF, byteIndex, 0, i);
8
+ byteIndex++;
9
+ // 高 8 位
10
+ callback((cd >>> 8) & 0xFF, byteIndex, 1, i);
11
+ byteIndex++;
12
+ }
13
+ };
14
+
15
+
16
+ const foreach_utf16be = (s, callback, si = 0, ei = s.length) => {
17
+ var byteIndex = 0; // 全局字节索引
18
+ for (var i = si; i < ei; ++i) {
19
+ var cd = s.charCodeAt(i);
20
+ // 高 8 位(BE 先发)
21
+ callback((cd >>> 8) & 0xFF, byteIndex, 0, i);
22
+ byteIndex++;
23
+ // 低 8 位
24
+ callback(cd & 0xFF, byteIndex, 1, i);
25
+ byteIndex++;
26
+ }
27
+ };
28
+
29
+ const is_u16_hi_cd = (cd)=>(cd>=55296 && cd<=56319);
30
+ const is_u16_lo_cd = (cd)=>(cd>=56320 && cd<=57343);
31
+ const is_u16_hilo_cd = (cd)=>(cd>=55296 && cd<=57343);
32
+
33
+ const foreach_utf8 = (s, callback, si = 0, ei = s.length) => {
34
+ // 假定 [si,ei) 一定是完整字符串 : s.charCodeAt(si) 不是 lo , s.charCodeAt(ei-1) 不是hi
35
+ var byteIndex = 0; // 全局字节索引
36
+ var i = si;
37
+ while (i < ei) {
38
+ var cd = s.charCodeAt(i);
39
+ if (!is_u16_hi_cd(cd)) {
40
+ // 非代理对:单个 UTF-16 单元
41
+ if (cd < 128) {
42
+ // 1 字节 UTF-8
43
+ callback(cd, byteIndex, 0, i);
44
+ byteIndex++;
45
+ } else if (cd < 2048) {
46
+ // 2 字节 UTF-8
47
+ const b0 = 0xC0 | (cd >> 6);
48
+ const b1 = 0x80 | (cd & 0x3F);
49
+ callback(b0, byteIndex, 0, i);
50
+ byteIndex++;
51
+ callback(b1, byteIndex, 1, i);
52
+ byteIndex++;
53
+ } else {
54
+ // 3 字节 UTF-8
55
+ const b0 = 0xE0 | (cd >> 12);
56
+ const b1 = 0x80 | ((cd >> 6) & 0x3F);
57
+ const b2 = 0x80 | (cd & 0x3F);
58
+ callback(b0, byteIndex, 0, i);
59
+ byteIndex++;
60
+ callback(b1, byteIndex, 1, i);
61
+ byteIndex++;
62
+ callback(b2, byteIndex, 2, i);
63
+ byteIndex++;
64
+ }
65
+ ++i;
66
+ } else {
67
+ // 代理对:hi + lo → 4 字节 UTF-8
68
+ var lo = s.charCodeAt(i + 1);
69
+ var codePoint = 0x10000 + ((cd - 0xD800) << 10) + (lo - 0xDC00);
70
+ const b0 = 0xF0 | (codePoint >> 18);
71
+ const b1 = 0x80 | ((codePoint >> 12) & 0x3F);
72
+ const b2 = 0x80 | ((codePoint >> 6) & 0x3F);
73
+ const b3 = 0x80 | (codePoint & 0x3F);
74
+ callback(b0, byteIndex, 0, i);
75
+ byteIndex++;
76
+ callback(b1, byteIndex, 1, i);
77
+ byteIndex++;
78
+ callback(b2, byteIndex, 2, i);
79
+ byteIndex++;
80
+ callback(b3, byteIndex, 3, i);
81
+ byteIndex++;
82
+ i += 2;
83
+ }
84
+ }
85
+ };
86
+
87
+ const fmt_for_utf8 = (s) => {
88
+ let bfr = "";
89
+ let aft = "";
90
+
91
+ // 处理开头
92
+ if (s.length > 0) {
93
+ const firstCd = s.charCodeAt(0);
94
+ if (is_u16_lo_cd(firstCd)) {
95
+ // 开头是 lo(半个代理对的后半部分),需要保留在 bfr
96
+ bfr = s.slice(0, 1);
97
+ }
98
+ // 否则 bfr = ""
99
+ }
100
+
101
+ // 处理结尾
102
+ if (s.length > 0) {
103
+ const lastCd = s.charCodeAt(s.length - 1);
104
+ if (is_u16_hi_cd(lastCd)) {
105
+ // 结尾是 hi(半个代理对的前半部分),需要保留在 aft
106
+ aft = s.slice(s.length - 1);
107
+ }
108
+ // 否则 aft = ""
109
+ }
110
+
111
+ // 中间部分
112
+ const mid = s.slice(bfr.length, s.length - aft.length);
113
+
114
+ return { bfr, mid, aft };
115
+ };
116
+
117
+ module.exports = {
118
+ fmt_for_utf8,
119
+ foreach_utf8,
120
+ foreach_utf16le,
121
+ foreach_utf16be,
122
+ }
package/package.json ADDED
@@ -0,0 +1,11 @@
1
+ {
2
+ "name": "nv-string-foreach-byt",
3
+ "version": "1.0.0",
4
+ "main": "index.js",
5
+ "scripts": {
6
+ "test": "echo \"Error: no test specified\" && exit 1"
7
+ },
8
+ "author": "",
9
+ "license": "ISC",
10
+ "description": ""
11
+ }