mol_vary 0.0.93 → 0.0.95

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/web.mjs CHANGED
@@ -77,54 +77,57 @@ var $;
77
77
  "use strict";
78
78
  var $;
79
79
  (function ($) {
80
- function $mol_charset_encode(str) {
80
+ function $mol_charset_ucf_encode(str) {
81
81
  const buf = $mol_charset_buffer(str.length * 3);
82
- return buf.slice(0, $mol_charset_encode_to(str, buf));
82
+ return buf.slice(0, $mol_charset_ucf_encode_to(str, buf));
83
83
  }
84
- $.$mol_charset_encode = $mol_charset_encode;
85
- function $mol_charset_encode_to(str, buf, from = 0) {
84
+ $.$mol_charset_ucf_encode = $mol_charset_ucf_encode;
85
+ const fast_char = `0123456789.,:;()?!-'" \n`;
86
+ const fast_map = new Array(0x80).fill(0);
87
+ for (let i = 0; i < fast_char.length; ++i)
88
+ fast_map[fast_char[i].charCodeAt(0)] = i | 0x80;
89
+ function $mol_charset_ucf_encode_to(str, buf, from = 0) {
86
90
  let pos = from;
91
+ let mode = 0x9C;
87
92
  for (let i = 0; i < str.length; i++) {
88
93
  let code = str.charCodeAt(i);
94
+ if (code >= 0xd800 && code < 0xe000)
95
+ code = ((code - 0xd800) << 10) + str.charCodeAt(++i) + 0x2400;
89
96
  if (code < 0x80) {
97
+ if (mode !== 0x9C) {
98
+ const fast = fast_map[code];
99
+ if (fast)
100
+ code = fast;
101
+ else
102
+ buf[pos++] = mode = 0x9C;
103
+ }
90
104
  buf[pos++] = code;
91
105
  }
92
- else if (code < 0x800) {
93
- buf[pos++] = 0xc0 | (code >> 6);
94
- buf[pos++] = 0x80 | (code & 0x3f);
106
+ else if (code < 0x32_00) {
107
+ const page = (code >> 7) + 0x9C;
108
+ if (mode !== page)
109
+ buf[pos++] = mode = page;
110
+ buf[pos++] = code & 0x7F;
95
111
  }
96
- else if (code < 0xd800 || code >= 0xe000) {
97
- buf[pos++] = 0xe0 | (code >> 12);
98
- buf[pos++] = 0x80 | ((code >> 6) & 0x3f);
99
- buf[pos++] = 0x80 | (code & 0x3f);
112
+ else if (code < 0x04_20_00) {
113
+ code -= 0x2000;
114
+ const page = (code >> 15) + 0x98;
115
+ if (mode !== page)
116
+ buf[pos++] = mode = page;
117
+ buf[pos++] = code & 0x7F;
118
+ buf[pos++] = code >> 7;
100
119
  }
101
120
  else {
102
- const point = ((code - 0xd800) << 10) + str.charCodeAt(++i) + 0x2400;
103
- buf[pos++] = 0xf0 | (point >> 18);
104
- buf[pos++] = 0x80 | ((point >> 12) & 0x3f);
105
- buf[pos++] = 0x80 | ((point >> 6) & 0x3f);
106
- buf[pos++] = 0x80 | (point & 0x3f);
121
+ if (mode !== 0x97)
122
+ buf[pos++] = mode = 0x97;
123
+ buf[pos++] = code & 0x7F;
124
+ buf[pos++] = code >> 7;
125
+ buf[pos++] = code >> 15;
107
126
  }
108
127
  }
109
128
  return pos - from;
110
129
  }
111
- $.$mol_charset_encode_to = $mol_charset_encode_to;
112
- function $mol_charset_encode_size(str) {
113
- let size = 0;
114
- for (let i = 0; i < str.length; i++) {
115
- let code = str.charCodeAt(i);
116
- if (code < 0x80)
117
- size += 1;
118
- else if (code < 0x800)
119
- size += 2;
120
- else if (code < 0xd800 || code >= 0xe000)
121
- size += 3;
122
- else
123
- size += 4;
124
- }
125
- return size;
126
- }
127
- $.$mol_charset_encode_size = $mol_charset_encode_size;
130
+ $.$mol_charset_ucf_encode_to = $mol_charset_ucf_encode_to;
128
131
  })($ || ($ = {}));
129
132
 
130
133
  ;
@@ -153,21 +156,43 @@ var $;
153
156
  $.$mol_bigint_decode = $mol_bigint_decode;
154
157
  })($ || ($ = {}));
155
158
 
156
- ;
157
- "use strict";
158
-
159
159
  ;
160
160
  "use strict";
161
161
  var $;
162
162
  (function ($) {
163
- const decoders = {};
164
- function $mol_charset_decode(buffer, encoding = 'utf8') {
165
- let decoder = decoders[encoding];
166
- if (!decoder)
167
- decoder = decoders[encoding] = new TextDecoder(encoding);
168
- return decoder.decode(buffer);
163
+ const fast_char = `0123456789.,:;()?!-'" \n`;
164
+ function $mol_charset_ucf_decode(buffer, mode = 0x9C) {
165
+ let text = '';
166
+ let pos = 0;
167
+ let page_offset = 0;
168
+ while (pos < buffer.length) {
169
+ let code = buffer[pos++];
170
+ if (code < 0x80) {
171
+ if (mode < 0x9C)
172
+ code |= buffer[pos++] << 7;
173
+ if (mode === 0x97)
174
+ code |= buffer[pos++] << 15;
175
+ text += String.fromCodePoint(page_offset + code);
176
+ }
177
+ else if (code < 0x97) {
178
+ text += fast_char[code - 0x80];
179
+ }
180
+ else if (code >= 0x9C) {
181
+ mode = code;
182
+ page_offset = (mode - 0x9C) << 7;
183
+ }
184
+ else if (code === 0x97) {
185
+ mode = code;
186
+ page_offset = 0;
187
+ }
188
+ else {
189
+ mode = code;
190
+ page_offset = ((mode - 0x98) << 15) + 0x20_00;
191
+ }
192
+ }
193
+ return text;
169
194
  }
170
- $.$mol_charset_decode = $mol_charset_decode;
195
+ $.$mol_charset_ucf_decode = $mol_charset_ucf_decode;
171
196
  })($ || ($ = {}));
172
197
 
173
198
  ;
@@ -395,7 +420,7 @@ var $;
395
420
  const len_max = val.length * 3;
396
421
  const len_size = calc_size(len_max);
397
422
  acquire(len_max);
398
- const len = $mol_charset_encode_to(val, this.array, pos + len_size);
423
+ const len = $mol_charset_ucf_encode_to(val, this.array, pos + len_size);
399
424
  dump_unum($mol_vary_tip.text, len, len_max);
400
425
  pos += len;
401
426
  release(len_max - len);
@@ -604,7 +629,7 @@ var $;
604
629
  };
605
630
  const read_text = (kind) => {
606
631
  const len = read_unum(kind);
607
- const text = $mol_charset_decode(new Uint8Array(array.buffer, array.byteOffset + pos, len));
632
+ const text = $mol_charset_ucf_decode(new Uint8Array(array.buffer, array.byteOffset + pos, len));
608
633
  pos += len;
609
634
  stream.push(text);
610
635
  return text;
package/web.test.js CHANGED
@@ -1329,27 +1329,21 @@ var $;
1329
1329
  ;
1330
1330
  "use strict";
1331
1331
  var $;
1332
- (function ($) {
1333
- $mol_test({
1334
- 'encode empty'() {
1335
- $mol_assert_equal($mol_charset_encode(''), new Uint8Array([]));
1336
- },
1337
- 'encode 1 octet'() {
1338
- $mol_assert_equal($mol_charset_encode('F'), new Uint8Array([0x46]));
1339
- },
1340
- 'encode 2 octet'() {
1341
- $mol_assert_equal($mol_charset_encode('Б'), new Uint8Array([0xd0, 0x91]));
1342
- },
1343
- 'encode 3 octet'() {
1344
- $mol_assert_equal($mol_charset_encode('ह'), new Uint8Array([0xe0, 0xa4, 0xb9]));
1345
- },
1346
- 'encode 4 octet'() {
1347
- $mol_assert_equal($mol_charset_encode('𐍈'), new Uint8Array([0xf0, 0x90, 0x8d, 0x88]));
1348
- },
1349
- 'encode surrogate pair'() {
1350
- $mol_assert_equal($mol_charset_encode('😀'), new Uint8Array([0xf0, 0x9f, 0x98, 0x80]));
1351
- },
1352
- });
1332
+ (function ($_1) {
1333
+ var $$;
1334
+ (function ($$) {
1335
+ $mol_test({
1336
+ "Complex UCF encoding"($) {
1337
+ $mol_assert_equal($mol_charset_ucf_encode('hi мир, 美しい 世界 🏴‍☠\t\n'), new Uint8Array([
1338
+ 0x68, 0x69, 0x20,
1339
+ 0xA4, 0x3C, 0x38, 0x40, 0x8B, 0x95,
1340
+ 0x98, 0x0E, 0xBF, 0xFC, 0x57, 0x44, 0x95,
1341
+ 0x98, 0x16, 0x5C, 0x4C, 0xAA, 0x95,
1342
+ 0x9B, 0x74, 0xA7, 0xDC, 0x0D, 0xE8, 0x20, 0x9C, 0x09, 0x0A,
1343
+ ]));
1344
+ },
1345
+ });
1346
+ })($$ = $_1.$$ || ($_1.$$ = {}));
1353
1347
  })($ || ($ = {}));
1354
1348
 
1355
1349
  ;
@@ -1395,19 +1389,21 @@ var $;
1395
1389
  ;
1396
1390
  "use strict";
1397
1391
  var $;
1398
- (function ($) {
1399
- $mol_test({
1400
- 'decode utf8 string'() {
1401
- const str = 'Hello, ΧΨΩЫ';
1402
- const encoded = new Uint8Array([72, 101, 108, 108, 111, 44, 32, 206, 167, 206, 168, 206, 169, 208, 171]);
1403
- $mol_assert_equal($mol_charset_decode(encoded), str);
1404
- $mol_assert_equal($mol_charset_decode(encoded, 'utf8'), str);
1405
- },
1406
- 'decode empty string'() {
1407
- const encoded = new Uint8Array([]);
1408
- $mol_assert_equal($mol_charset_decode(encoded), '');
1409
- },
1410
- });
1392
+ (function ($_1) {
1393
+ var $$;
1394
+ (function ($$) {
1395
+ $mol_test({
1396
+ "Complex UCF eecoding"($) {
1397
+ $mol_assert_equal('hi мир, 美しい 世界 🏴‍☠\t\n', $mol_charset_ucf_decode(new Uint8Array([
1398
+ 0x68, 0x69, 0x20,
1399
+ 0xA4, 0x3C, 0x38, 0x40, 0x8B, 0x95,
1400
+ 0x98, 0x0E, 0xBF, 0xFC, 0x57, 0x44, 0x95,
1401
+ 0x98, 0x16, 0x5C, 0x4C, 0xAA, 0x95,
1402
+ 0x9B, 0x74, 0xA7, 0xDC, 0x0D, 0xE8, 0x20, 0x9C, 0x09, 0x0A,
1403
+ ])));
1404
+ },
1405
+ });
1406
+ })($$ = $_1.$$ || ($_1.$$ = {}));
1411
1407
  })($ || ($ = {}));
1412
1408
 
1413
1409
  ;
@@ -1564,7 +1560,7 @@ var $;
1564
1560
  const { uint, link, spec, blob, text, list, tupl, sint } = $mol_vary_tip;
1565
1561
  const { none, both, fp16, fp32, fp64 } = $mol_vary_spec;
1566
1562
  const { L1, L2, L4, L8, LA } = $mol_vary_len;
1567
- const str = $mol_charset_encode;
1563
+ const str = $mol_charset_ucf_encode;
1568
1564
  function check(vary, ideal, Vary = $mol_vary) {
1569
1565
  const pack = Vary.pack(vary);
1570
1566
  $mol_assert_equal(Vary.take(pack), vary);
@@ -1653,11 +1649,11 @@ var $;
1653
1649
  },
1654
1650
  "vary pack text"($) {
1655
1651
  check(['foo'], [text | 3, ...str('foo')]);
1656
- check(['абв'], [text | 6, ...str('абв')]);
1652
+ check(['абв'], [text | 4, ...str('абв')]);
1657
1653
  const long_lat = 'abcdefghijklmnopqrst';
1658
1654
  check([long_lat], [text | L1, 20, ...str(long_lat)]);
1659
1655
  const long_cyr = 'абвгдеёжзийклмнопрст';
1660
- check([long_cyr], [text | L1, 40, ...str(long_cyr)]);
1656
+ check([long_cyr], [text | L1, 21, ...str(long_cyr)]);
1661
1657
  },
1662
1658
  "vary pack dedup text"($) {
1663
1659
  check([["f", "f"]], [list | 2, text | 1, ...str('f'), link | 0]);