mol_vary 0.0.96 → 0.0.98
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/node.d.ts +1 -4
- package/node.d.ts.map +1 -1
- package/node.deps.json +1 -1
- package/node.js +116 -66
- package/node.js.map +1 -1
- package/node.mjs +116 -66
- package/node.test.js +162 -96
- package/node.test.js.map +1 -1
- package/package.json +1 -3
- package/web.d.ts +1 -4
- package/web.d.ts.map +1 -1
- package/web.deps.json +1 -1
- package/web.js +116 -66
- package/web.js.map +1 -1
- package/web.mjs +116 -66
- package/web.test.js +46 -30
- package/web.test.js.map +1 -1
package/web.mjs
CHANGED
|
@@ -77,57 +77,146 @@ var $;
|
|
|
77
77
|
"use strict";
|
|
78
78
|
var $;
|
|
79
79
|
(function ($) {
|
|
80
|
+
const ascii_set = [...`0123456789.,:;()'"- \n`].map(c => c.charCodeAt(0));
|
|
81
|
+
const ascii_map = new Array(0x80).fill(0);
|
|
82
|
+
for (let i = 0; i < ascii_set.length; ++i)
|
|
83
|
+
ascii_map[ascii_set[i]] = i | 0x80;
|
|
84
|
+
const diacr_set = [
|
|
85
|
+
0x00, 0x01, 0x0F, 0x0B, 0x07, 0x08, 0x12, 0x13,
|
|
86
|
+
0x02, 0x0C, 0x06, 0x11, 0x03, 0x09, 0x0A, 0x04,
|
|
87
|
+
0x28, 0x31, 0x27, 0x26, 0x23,
|
|
88
|
+
];
|
|
89
|
+
const diacr_map = new Array(0x80).fill(0);
|
|
90
|
+
for (let i = 0; i < diacr_set.length; ++i)
|
|
91
|
+
diacr_map[diacr_set[i]] = i | 0x80;
|
|
92
|
+
const wide_offset = 0x0E_00;
|
|
93
|
+
const wide_limit = 128 * 128 * 8 + wide_offset;
|
|
94
|
+
const tiny_limit = 128 * 98;
|
|
95
|
+
const full_mode = 0x95;
|
|
96
|
+
const wide_mode = 0x96;
|
|
97
|
+
const tiny_mode = 0x9E;
|
|
80
98
|
function $mol_charset_ucf_encode(str) {
|
|
81
99
|
const buf = $mol_charset_buffer(str.length * 3);
|
|
82
100
|
return buf.slice(0, $mol_charset_ucf_encode_to(str, buf));
|
|
83
101
|
}
|
|
84
102
|
$.$mol_charset_ucf_encode = $mol_charset_ucf_encode;
|
|
85
|
-
const fast_char = `0123456789.,:;()?!-'" \n`;
|
|
86
|
-
const fast_map = new Array(0x80).fill(0);
|
|
87
|
-
for (let i = 0; i < fast_char.length; ++i)
|
|
88
|
-
fast_map[fast_char[i].charCodeAt(0)] = i | 0x80;
|
|
89
103
|
function $mol_charset_ucf_encode_to(str, buf, from = 0) {
|
|
90
104
|
let pos = from;
|
|
91
|
-
let mode =
|
|
105
|
+
let mode = tiny_mode;
|
|
106
|
+
const write_high = (code) => {
|
|
107
|
+
buf[pos++] = ((code + 128 - mode) & 0x7F) | 0x80;
|
|
108
|
+
};
|
|
109
|
+
const write_remap = (code) => {
|
|
110
|
+
const fast = ascii_map[code];
|
|
111
|
+
if (fast)
|
|
112
|
+
write_high(fast);
|
|
113
|
+
else
|
|
114
|
+
buf[pos++] = code;
|
|
115
|
+
};
|
|
116
|
+
const write_mode = (m) => {
|
|
117
|
+
write_high(m);
|
|
118
|
+
mode = m;
|
|
119
|
+
};
|
|
92
120
|
for (let i = 0; i < str.length; i++) {
|
|
93
121
|
let code = str.charCodeAt(i);
|
|
94
|
-
if (code >=
|
|
122
|
+
if (code >= 0xD8_00 && code < 0xDC_00)
|
|
95
123
|
code = ((code - 0xd800) << 10) + str.charCodeAt(++i) + 0x2400;
|
|
96
124
|
if (code < 0x80) {
|
|
97
|
-
if (mode !==
|
|
98
|
-
const fast =
|
|
99
|
-
if (fast)
|
|
100
|
-
|
|
101
|
-
else
|
|
102
|
-
buf[pos++] = mode = 0x9C;
|
|
125
|
+
if (mode !== tiny_mode) {
|
|
126
|
+
const fast = ascii_map[code];
|
|
127
|
+
if (!fast)
|
|
128
|
+
write_mode(tiny_mode);
|
|
103
129
|
}
|
|
104
130
|
buf[pos++] = code;
|
|
105
131
|
}
|
|
106
|
-
else if (code <
|
|
107
|
-
const page = (code >> 7) +
|
|
132
|
+
else if (code < tiny_limit) {
|
|
133
|
+
const page = (code >> 7) + tiny_mode;
|
|
134
|
+
code &= 0x7F;
|
|
135
|
+
if (page === 164) {
|
|
136
|
+
const fast = diacr_map[code];
|
|
137
|
+
if (fast) {
|
|
138
|
+
if (mode !== tiny_mode)
|
|
139
|
+
write_mode(tiny_mode);
|
|
140
|
+
write_high(fast);
|
|
141
|
+
continue;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
108
144
|
if (mode !== page)
|
|
109
|
-
|
|
110
|
-
|
|
145
|
+
write_mode(page);
|
|
146
|
+
write_remap(code);
|
|
111
147
|
}
|
|
112
|
-
else if (code <
|
|
113
|
-
code -=
|
|
114
|
-
const page = (code >>
|
|
148
|
+
else if (code < wide_limit) {
|
|
149
|
+
code -= wide_offset;
|
|
150
|
+
const page = (code >> 14) + wide_mode;
|
|
115
151
|
if (mode !== page)
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
152
|
+
write_mode(page);
|
|
153
|
+
write_remap(code & 0x7F);
|
|
154
|
+
write_remap((code >> 7) & 0x7F);
|
|
119
155
|
}
|
|
120
156
|
else {
|
|
121
|
-
if (mode !==
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
157
|
+
if (mode !== full_mode)
|
|
158
|
+
write_mode(full_mode);
|
|
159
|
+
write_remap(code & 0x7F);
|
|
160
|
+
write_remap((code >> 7) & 0x7F);
|
|
161
|
+
write_remap(code >> 14);
|
|
126
162
|
}
|
|
127
163
|
}
|
|
164
|
+
if (mode !== tiny_mode)
|
|
165
|
+
write_mode(tiny_mode);
|
|
128
166
|
return pos - from;
|
|
129
167
|
}
|
|
130
168
|
$.$mol_charset_ucf_encode_to = $mol_charset_ucf_encode_to;
|
|
169
|
+
function $mol_charset_ucf_decode(buffer, mode = tiny_mode) {
|
|
170
|
+
let text = '';
|
|
171
|
+
let pos = 0;
|
|
172
|
+
let page_offset = 0;
|
|
173
|
+
const read_code = () => {
|
|
174
|
+
let code = buffer[pos++];
|
|
175
|
+
if (code > 0x80)
|
|
176
|
+
code = ((mode + code) & 0x7F) | 0x80;
|
|
177
|
+
return code;
|
|
178
|
+
};
|
|
179
|
+
const read_remap = () => {
|
|
180
|
+
let code = read_code();
|
|
181
|
+
if (code >= 0x80)
|
|
182
|
+
code = ascii_set[code - 0x80];
|
|
183
|
+
return code;
|
|
184
|
+
};
|
|
185
|
+
while (pos < buffer.length) {
|
|
186
|
+
let code = read_code();
|
|
187
|
+
if (code < full_mode) {
|
|
188
|
+
if (mode === tiny_mode) {
|
|
189
|
+
if (code > 0x80) {
|
|
190
|
+
code = diacr_set[code - 0x080] | (6 << 7);
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
else if (!ascii_map[code]) {
|
|
194
|
+
if (code >= 0x80)
|
|
195
|
+
code = ascii_set[code - 0x80];
|
|
196
|
+
if (mode < tiny_mode)
|
|
197
|
+
code |= read_remap() << 7;
|
|
198
|
+
if (mode === full_mode)
|
|
199
|
+
code |= read_remap() << 14;
|
|
200
|
+
code += page_offset;
|
|
201
|
+
}
|
|
202
|
+
text += String.fromCodePoint(code);
|
|
203
|
+
}
|
|
204
|
+
else if (code >= tiny_mode) {
|
|
205
|
+
mode = code;
|
|
206
|
+
page_offset = (mode - tiny_mode) << 7;
|
|
207
|
+
}
|
|
208
|
+
else if (code === full_mode) {
|
|
209
|
+
mode = code;
|
|
210
|
+
page_offset = 0;
|
|
211
|
+
}
|
|
212
|
+
else {
|
|
213
|
+
mode = code;
|
|
214
|
+
page_offset = ((mode - wide_mode) << 14) + wide_offset;
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
return text;
|
|
218
|
+
}
|
|
219
|
+
$.$mol_charset_ucf_decode = $mol_charset_ucf_decode;
|
|
131
220
|
})($ || ($ = {}));
|
|
132
221
|
|
|
133
222
|
;
|
|
@@ -156,45 +245,6 @@ var $;
|
|
|
156
245
|
$.$mol_bigint_decode = $mol_bigint_decode;
|
|
157
246
|
})($ || ($ = {}));
|
|
158
247
|
|
|
159
|
-
;
|
|
160
|
-
"use strict";
|
|
161
|
-
var $;
|
|
162
|
-
(function ($) {
|
|
163
|
-
const fast_char = `0123456789.,:;()?!-'" \n`;
|
|
164
|
-
function $mol_charset_ucf_decode(buffer, mode = 0x9C) {
|
|
165
|
-
let text = '';
|
|
166
|
-
let pos = 0;
|
|
167
|
-
let page_offset = 0;
|
|
168
|
-
while (pos < buffer.length) {
|
|
169
|
-
let code = buffer[pos++];
|
|
170
|
-
if (code < 0x80) {
|
|
171
|
-
if (mode < 0x9C)
|
|
172
|
-
code |= buffer[pos++] << 7;
|
|
173
|
-
if (mode === 0x97)
|
|
174
|
-
code |= buffer[pos++] << 15;
|
|
175
|
-
text += String.fromCodePoint(page_offset + code);
|
|
176
|
-
}
|
|
177
|
-
else if (code < 0x97) {
|
|
178
|
-
text += fast_char[code - 0x80];
|
|
179
|
-
}
|
|
180
|
-
else if (code >= 0x9C) {
|
|
181
|
-
mode = code;
|
|
182
|
-
page_offset = (mode - 0x9C) << 7;
|
|
183
|
-
}
|
|
184
|
-
else if (code === 0x97) {
|
|
185
|
-
mode = code;
|
|
186
|
-
page_offset = 0;
|
|
187
|
-
}
|
|
188
|
-
else {
|
|
189
|
-
mode = code;
|
|
190
|
-
page_offset = ((mode - 0x98) << 15) + 0x20_00;
|
|
191
|
-
}
|
|
192
|
-
}
|
|
193
|
-
return text;
|
|
194
|
-
}
|
|
195
|
-
$.$mol_charset_ucf_decode = $mol_charset_ucf_decode;
|
|
196
|
-
})($ || ($ = {}));
|
|
197
|
-
|
|
198
248
|
;
|
|
199
249
|
"use strict";
|
|
200
250
|
var $;
|
package/web.test.js
CHANGED
|
@@ -1332,15 +1332,51 @@ var $;
|
|
|
1332
1332
|
(function ($_1) {
|
|
1333
1333
|
var $$;
|
|
1334
1334
|
(function ($$) {
|
|
1335
|
+
function check(text, bytes) {
|
|
1336
|
+
const ideal = new Uint8Array(bytes);
|
|
1337
|
+
const actual = $mol_charset_ucf_encode(text);
|
|
1338
|
+
$mol_assert_equal($mol_charset_ucf_decode(actual), text);
|
|
1339
|
+
$mol_assert_equal(actual, ideal);
|
|
1340
|
+
}
|
|
1335
1341
|
$mol_test({
|
|
1336
|
-
"
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
])
|
|
1342
|
+
"Full ASCII compatible"($) {
|
|
1343
|
+
check('hi', [0x68, 0x69]);
|
|
1344
|
+
},
|
|
1345
|
+
"1B ASCII with diacritic"($) {
|
|
1346
|
+
check('allo\u0302', [0x61, 0x6C, 0x6C, 0x6F, 0xEA]);
|
|
1347
|
+
},
|
|
1348
|
+
"1B Cyrillic"($) {
|
|
1349
|
+
check('мир', [0x88, 0x3C, 0xE2, 0x40, 0xF8]);
|
|
1350
|
+
},
|
|
1351
|
+
"1B Cyrillic with nummbers and punctuation"($) {
|
|
1352
|
+
check('м.1', [0x88, 0x3C, 0x2E, 0x31, 0xF8]);
|
|
1353
|
+
},
|
|
1354
|
+
"2B Kanji"($) {
|
|
1355
|
+
check('美', [0xF9, 0x0E, 0x63, 0x87]);
|
|
1356
|
+
},
|
|
1357
|
+
"3B rare Kanji"($) {
|
|
1358
|
+
check('𲎯', [0xF7, 0x2F, 0x47, 0x0C, 0x89]);
|
|
1359
|
+
},
|
|
1360
|
+
"1B Kana"($) {
|
|
1361
|
+
check('しい', [0xE0, 0x57, 0x44, 0xA0]);
|
|
1362
|
+
},
|
|
1363
|
+
"2B Emoji"($) {
|
|
1364
|
+
check('🏴', [0xFF, 0x74, 0x4B, 0x81]);
|
|
1365
|
+
},
|
|
1366
|
+
"2B Emoji with 1B modifiers"($) {
|
|
1367
|
+
check('🏴☠', [0xFF, 0x74, 0x4B, 0xC1, 0x0D, 0x8C, 0xA9, 0xB4]);
|
|
1368
|
+
},
|
|
1369
|
+
"2B Emoji with 3B Tag"($) {
|
|
1370
|
+
check('🏴\u{E007F}', [0xFF, 0x74, 0x4B, 0xF8, 0x7F, 0x00, 0xF3, 0x89]);
|
|
1371
|
+
},
|
|
1372
|
+
"Mixed scripts"($) {
|
|
1373
|
+
check('allô 美しい мир, 🏴☠\n', [
|
|
1374
|
+
0x61, 0x6C, 0x6C, 0x6F, 0xEA, 0x20,
|
|
1375
|
+
0xF9, 0x0E, 0x63, 0xE7, 0x57, 0x44, 0x20,
|
|
1376
|
+
0xA8, 0x3C, 0xE2, 0x40, 0x2C, 0x20,
|
|
1377
|
+
0xF7, 0x74, 0x4B, 0xC1, 0x0D, 0x8C, 0xA9, 0x0A,
|
|
1378
|
+
0xB4,
|
|
1379
|
+
]);
|
|
1344
1380
|
},
|
|
1345
1381
|
});
|
|
1346
1382
|
})($$ = $_1.$$ || ($_1.$$ = {}));
|
|
@@ -1386,26 +1422,6 @@ var $;
|
|
|
1386
1422
|
})($$ = $_1.$$ || ($_1.$$ = {}));
|
|
1387
1423
|
})($ || ($ = {}));
|
|
1388
1424
|
|
|
1389
|
-
;
|
|
1390
|
-
"use strict";
|
|
1391
|
-
var $;
|
|
1392
|
-
(function ($_1) {
|
|
1393
|
-
var $$;
|
|
1394
|
-
(function ($$) {
|
|
1395
|
-
$mol_test({
|
|
1396
|
-
"Complex UCF eecoding"($) {
|
|
1397
|
-
$mol_assert_equal('hi мир, 美しい 世界 🏴☠\t\n', $mol_charset_ucf_decode(new Uint8Array([
|
|
1398
|
-
0x68, 0x69, 0x20,
|
|
1399
|
-
0xA4, 0x3C, 0x38, 0x40, 0x8B, 0x95,
|
|
1400
|
-
0x98, 0x0E, 0xBF, 0xFC, 0x57, 0x44, 0x95,
|
|
1401
|
-
0x98, 0x16, 0x5C, 0x4C, 0xAA, 0x95,
|
|
1402
|
-
0x9B, 0x74, 0xA7, 0xDC, 0x0D, 0xE8, 0x20, 0x9C, 0x09, 0x0A,
|
|
1403
|
-
])));
|
|
1404
|
-
},
|
|
1405
|
-
});
|
|
1406
|
-
})($$ = $_1.$$ || ($_1.$$ = {}));
|
|
1407
|
-
})($ || ($ = {}));
|
|
1408
|
-
|
|
1409
1425
|
;
|
|
1410
1426
|
"use strict";
|
|
1411
1427
|
var $;
|
|
@@ -1649,11 +1665,11 @@ var $;
|
|
|
1649
1665
|
},
|
|
1650
1666
|
"vary pack text"($) {
|
|
1651
1667
|
check(['foo'], [text | 3, ...str('foo')]);
|
|
1652
|
-
check(['абв'], [text |
|
|
1668
|
+
check(['абв'], [text | 5, ...str('абв')]);
|
|
1653
1669
|
const long_lat = 'abcdefghijklmnopqrst';
|
|
1654
1670
|
check([long_lat], [text | L1, 20, ...str(long_lat)]);
|
|
1655
1671
|
const long_cyr = 'абвгдеёжзийклмнопрст';
|
|
1656
|
-
check([long_cyr], [text | L1,
|
|
1672
|
+
check([long_cyr], [text | L1, 22, ...str(long_cyr)]);
|
|
1657
1673
|
},
|
|
1658
1674
|
"vary pack dedup text"($) {
|
|
1659
1675
|
check([["f", "f"]], [list | 2, text | 1, ...str('f'), link | 0]);
|