mol_vary 0.0.94 → 0.0.96

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/node.mjs CHANGED
@@ -77,54 +77,57 @@ var $;
77
77
  "use strict";
78
78
  var $;
79
79
  (function ($) {
80
- function $mol_charset_encode(str) {
80
+ function $mol_charset_ucf_encode(str) {
81
81
  const buf = $mol_charset_buffer(str.length * 3);
82
- return buf.slice(0, $mol_charset_encode_to(str, buf));
83
- }
84
- $.$mol_charset_encode = $mol_charset_encode;
85
- function $mol_charset_encode_to(str, buf, from = 0) {
82
+ return buf.slice(0, $mol_charset_ucf_encode_to(str, buf));
83
+ }
84
+ $.$mol_charset_ucf_encode = $mol_charset_ucf_encode;
85
+ const fast_char = `0123456789.,:;()?!-'" \n`;
86
+ const fast_map = new Array(0x80).fill(0);
87
+ for (let i = 0; i < fast_char.length; ++i)
88
+ fast_map[fast_char[i].charCodeAt(0)] = i | 0x80;
89
+ function $mol_charset_ucf_encode_to(str, buf, from = 0) {
86
90
  let pos = from;
91
+ let mode = 0x9C;
87
92
  for (let i = 0; i < str.length; i++) {
88
93
  let code = str.charCodeAt(i);
94
+ if (code >= 0xd800 && code < 0xe000)
95
+ code = ((code - 0xd800) << 10) + str.charCodeAt(++i) + 0x2400;
89
96
  if (code < 0x80) {
97
+ if (mode !== 0x9C) {
98
+ const fast = fast_map[code];
99
+ if (fast)
100
+ code = fast;
101
+ else
102
+ buf[pos++] = mode = 0x9C;
103
+ }
90
104
  buf[pos++] = code;
91
105
  }
92
- else if (code < 0x800) {
93
- buf[pos++] = 0xc0 | (code >> 6);
94
- buf[pos++] = 0x80 | (code & 0x3f);
106
+ else if (code < 0x32_00) {
107
+ const page = (code >> 7) + 0x9C;
108
+ if (mode !== page)
109
+ buf[pos++] = mode = page;
110
+ buf[pos++] = code & 0x7F;
95
111
  }
96
- else if (code < 0xd800 || code >= 0xe000) {
97
- buf[pos++] = 0xe0 | (code >> 12);
98
- buf[pos++] = 0x80 | ((code >> 6) & 0x3f);
99
- buf[pos++] = 0x80 | (code & 0x3f);
112
+ else if (code < 0x04_20_00) {
113
+ code -= 0x2000;
114
+ const page = (code >> 15) + 0x98;
115
+ if (mode !== page)
116
+ buf[pos++] = mode = page;
117
+ buf[pos++] = code & 0x7F;
118
+ buf[pos++] = code >> 7;
100
119
  }
101
120
  else {
102
- const point = ((code - 0xd800) << 10) + str.charCodeAt(++i) + 0x2400;
103
- buf[pos++] = 0xf0 | (point >> 18);
104
- buf[pos++] = 0x80 | ((point >> 12) & 0x3f);
105
- buf[pos++] = 0x80 | ((point >> 6) & 0x3f);
106
- buf[pos++] = 0x80 | (point & 0x3f);
121
+ if (mode !== 0x97)
122
+ buf[pos++] = mode = 0x97;
123
+ buf[pos++] = code & 0x7F;
124
+ buf[pos++] = code >> 7;
125
+ buf[pos++] = code >> 15;
107
126
  }
108
127
  }
109
128
  return pos - from;
110
129
  }
111
- $.$mol_charset_encode_to = $mol_charset_encode_to;
112
- function $mol_charset_encode_size(str) {
113
- let size = 0;
114
- for (let i = 0; i < str.length; i++) {
115
- let code = str.charCodeAt(i);
116
- if (code < 0x80)
117
- size += 1;
118
- else if (code < 0x800)
119
- size += 2;
120
- else if (code < 0xd800 || code >= 0xe000)
121
- size += 3;
122
- else
123
- size += 4;
124
- }
125
- return size;
126
- }
127
- $.$mol_charset_encode_size = $mol_charset_encode_size;
130
+ $.$mol_charset_ucf_encode_to = $mol_charset_ucf_encode_to;
128
131
  })($ || ($ = {}));
129
132
 
130
133
  ;
@@ -153,21 +156,43 @@ var $;
153
156
  $.$mol_bigint_decode = $mol_bigint_decode;
154
157
  })($ || ($ = {}));
155
158
 
156
- ;
157
- "use strict";
158
-
159
159
  ;
160
160
  "use strict";
161
161
  var $;
162
162
  (function ($) {
163
- const decoders = {};
164
- function $mol_charset_decode(buffer, encoding = 'utf8') {
165
- let decoder = decoders[encoding];
166
- if (!decoder)
167
- decoder = decoders[encoding] = new TextDecoder(encoding);
168
- return decoder.decode(buffer);
169
- }
170
- $.$mol_charset_decode = $mol_charset_decode;
163
+ const fast_char = `0123456789.,:;()?!-'" \n`;
164
+ function $mol_charset_ucf_decode(buffer, mode = 0x9C) {
165
+ let text = '';
166
+ let pos = 0;
167
+ let page_offset = 0;
168
+ while (pos < buffer.length) {
169
+ let code = buffer[pos++];
170
+ if (code < 0x80) {
171
+ if (mode < 0x9C)
172
+ code |= buffer[pos++] << 7;
173
+ if (mode === 0x97)
174
+ code |= buffer[pos++] << 15;
175
+ text += String.fromCodePoint(page_offset + code);
176
+ }
177
+ else if (code < 0x97) {
178
+ text += fast_char[code - 0x80];
179
+ }
180
+ else if (code >= 0x9C) {
181
+ mode = code;
182
+ page_offset = (mode - 0x9C) << 7;
183
+ }
184
+ else if (code === 0x97) {
185
+ mode = code;
186
+ page_offset = 0;
187
+ }
188
+ else {
189
+ mode = code;
190
+ page_offset = ((mode - 0x98) << 15) + 0x20_00;
191
+ }
192
+ }
193
+ return text;
194
+ }
195
+ $.$mol_charset_ucf_decode = $mol_charset_ucf_decode;
171
196
  })($ || ($ = {}));
172
197
 
173
198
  ;
@@ -2474,7 +2499,7 @@ var $;
2474
2499
  const len_max = val.length * 3;
2475
2500
  const len_size = calc_size(len_max);
2476
2501
  acquire(len_max);
2477
- const len = $mol_charset_encode_to(val, this.array, pos + len_size);
2502
+ const len = $mol_charset_ucf_encode_to(val, this.array, pos + len_size);
2478
2503
  dump_unum($mol_vary_tip.text, len, len_max);
2479
2504
  pos += len;
2480
2505
  release(len_max - len);
@@ -2683,7 +2708,7 @@ var $;
2683
2708
  };
2684
2709
  const read_text = (kind) => {
2685
2710
  const len = read_unum(kind);
2686
- const text = $mol_charset_decode(new Uint8Array(array.buffer, array.byteOffset + pos, len));
2711
+ const text = $mol_charset_ucf_decode(new Uint8Array(array.buffer, array.byteOffset + pos, len));
2687
2712
  pos += len;
2688
2713
  stream.push(text);
2689
2714
  return text;
package/node.test.js CHANGED
@@ -68,54 +68,57 @@ var $;
68
68
  "use strict";
69
69
  var $;
70
70
  (function ($) {
71
- function $mol_charset_encode(str) {
71
+ function $mol_charset_ucf_encode(str) {
72
72
  const buf = $mol_charset_buffer(str.length * 3);
73
- return buf.slice(0, $mol_charset_encode_to(str, buf));
73
+ return buf.slice(0, $mol_charset_ucf_encode_to(str, buf));
74
74
  }
75
- $.$mol_charset_encode = $mol_charset_encode;
76
- function $mol_charset_encode_to(str, buf, from = 0) {
75
+ $.$mol_charset_ucf_encode = $mol_charset_ucf_encode;
76
+ const fast_char = `0123456789.,:;()?!-'" \n`;
77
+ const fast_map = new Array(0x80).fill(0);
78
+ for (let i = 0; i < fast_char.length; ++i)
79
+ fast_map[fast_char[i].charCodeAt(0)] = i | 0x80;
80
+ function $mol_charset_ucf_encode_to(str, buf, from = 0) {
77
81
  let pos = from;
82
+ let mode = 0x9C;
78
83
  for (let i = 0; i < str.length; i++) {
79
84
  let code = str.charCodeAt(i);
85
+ if (code >= 0xd800 && code < 0xe000)
86
+ code = ((code - 0xd800) << 10) + str.charCodeAt(++i) + 0x2400;
80
87
  if (code < 0x80) {
88
+ if (mode !== 0x9C) {
89
+ const fast = fast_map[code];
90
+ if (fast)
91
+ code = fast;
92
+ else
93
+ buf[pos++] = mode = 0x9C;
94
+ }
81
95
  buf[pos++] = code;
82
96
  }
83
- else if (code < 0x800) {
84
- buf[pos++] = 0xc0 | (code >> 6);
85
- buf[pos++] = 0x80 | (code & 0x3f);
97
+ else if (code < 0x32_00) {
98
+ const page = (code >> 7) + 0x9C;
99
+ if (mode !== page)
100
+ buf[pos++] = mode = page;
101
+ buf[pos++] = code & 0x7F;
86
102
  }
87
- else if (code < 0xd800 || code >= 0xe000) {
88
- buf[pos++] = 0xe0 | (code >> 12);
89
- buf[pos++] = 0x80 | ((code >> 6) & 0x3f);
90
- buf[pos++] = 0x80 | (code & 0x3f);
103
+ else if (code < 0x04_20_00) {
104
+ code -= 0x2000;
105
+ const page = (code >> 15) + 0x98;
106
+ if (mode !== page)
107
+ buf[pos++] = mode = page;
108
+ buf[pos++] = code & 0x7F;
109
+ buf[pos++] = code >> 7;
91
110
  }
92
111
  else {
93
- const point = ((code - 0xd800) << 10) + str.charCodeAt(++i) + 0x2400;
94
- buf[pos++] = 0xf0 | (point >> 18);
95
- buf[pos++] = 0x80 | ((point >> 12) & 0x3f);
96
- buf[pos++] = 0x80 | ((point >> 6) & 0x3f);
97
- buf[pos++] = 0x80 | (point & 0x3f);
112
+ if (mode !== 0x97)
113
+ buf[pos++] = mode = 0x97;
114
+ buf[pos++] = code & 0x7F;
115
+ buf[pos++] = code >> 7;
116
+ buf[pos++] = code >> 15;
98
117
  }
99
118
  }
100
119
  return pos - from;
101
120
  }
102
- $.$mol_charset_encode_to = $mol_charset_encode_to;
103
- function $mol_charset_encode_size(str) {
104
- let size = 0;
105
- for (let i = 0; i < str.length; i++) {
106
- let code = str.charCodeAt(i);
107
- if (code < 0x80)
108
- size += 1;
109
- else if (code < 0x800)
110
- size += 2;
111
- else if (code < 0xd800 || code >= 0xe000)
112
- size += 3;
113
- else
114
- size += 4;
115
- }
116
- return size;
117
- }
118
- $.$mol_charset_encode_size = $mol_charset_encode_size;
121
+ $.$mol_charset_ucf_encode_to = $mol_charset_ucf_encode_to;
119
122
  })($ || ($ = {}));
120
123
 
121
124
  ;
@@ -144,21 +147,43 @@ var $;
144
147
  $.$mol_bigint_decode = $mol_bigint_decode;
145
148
  })($ || ($ = {}));
146
149
 
147
- ;
148
- "use strict";
149
-
150
150
  ;
151
151
  "use strict";
152
152
  var $;
153
153
  (function ($) {
154
- const decoders = {};
155
- function $mol_charset_decode(buffer, encoding = 'utf8') {
156
- let decoder = decoders[encoding];
157
- if (!decoder)
158
- decoder = decoders[encoding] = new TextDecoder(encoding);
159
- return decoder.decode(buffer);
154
+ const fast_char = `0123456789.,:;()?!-'" \n`;
155
+ function $mol_charset_ucf_decode(buffer, mode = 0x9C) {
156
+ let text = '';
157
+ let pos = 0;
158
+ let page_offset = 0;
159
+ while (pos < buffer.length) {
160
+ let code = buffer[pos++];
161
+ if (code < 0x80) {
162
+ if (mode < 0x9C)
163
+ code |= buffer[pos++] << 7;
164
+ if (mode === 0x97)
165
+ code |= buffer[pos++] << 15;
166
+ text += String.fromCodePoint(page_offset + code);
167
+ }
168
+ else if (code < 0x97) {
169
+ text += fast_char[code - 0x80];
170
+ }
171
+ else if (code >= 0x9C) {
172
+ mode = code;
173
+ page_offset = (mode - 0x9C) << 7;
174
+ }
175
+ else if (code === 0x97) {
176
+ mode = code;
177
+ page_offset = 0;
178
+ }
179
+ else {
180
+ mode = code;
181
+ page_offset = ((mode - 0x98) << 15) + 0x20_00;
182
+ }
183
+ }
184
+ return text;
160
185
  }
161
- $.$mol_charset_decode = $mol_charset_decode;
186
+ $.$mol_charset_ucf_decode = $mol_charset_ucf_decode;
162
187
  })($ || ($ = {}));
163
188
 
164
189
  ;
@@ -2465,7 +2490,7 @@ var $;
2465
2490
  const len_max = val.length * 3;
2466
2491
  const len_size = calc_size(len_max);
2467
2492
  acquire(len_max);
2468
- const len = $mol_charset_encode_to(val, this.array, pos + len_size);
2493
+ const len = $mol_charset_ucf_encode_to(val, this.array, pos + len_size);
2469
2494
  dump_unum($mol_vary_tip.text, len, len_max);
2470
2495
  pos += len;
2471
2496
  release(len_max - len);
@@ -2674,7 +2699,7 @@ var $;
2674
2699
  };
2675
2700
  const read_text = (kind) => {
2676
2701
  const len = read_unum(kind);
2677
- const text = $mol_charset_decode(new Uint8Array(array.buffer, array.byteOffset + pos, len));
2702
+ const text = $mol_charset_ucf_decode(new Uint8Array(array.buffer, array.byteOffset + pos, len));
2678
2703
  pos += len;
2679
2704
  stream.push(text);
2680
2705
  return text;
@@ -4451,27 +4476,21 @@ var $;
4451
4476
  ;
4452
4477
  "use strict";
4453
4478
  var $;
4454
- (function ($) {
4455
- $mol_test({
4456
- 'encode empty'() {
4457
- $mol_assert_equal($mol_charset_encode(''), new Uint8Array([]));
4458
- },
4459
- 'encode 1 octet'() {
4460
- $mol_assert_equal($mol_charset_encode('F'), new Uint8Array([0x46]));
4461
- },
4462
- 'encode 2 octet'() {
4463
- $mol_assert_equal($mol_charset_encode('Б'), new Uint8Array([0xd0, 0x91]));
4464
- },
4465
- 'encode 3 octet'() {
4466
- $mol_assert_equal($mol_charset_encode('ह'), new Uint8Array([0xe0, 0xa4, 0xb9]));
4467
- },
4468
- 'encode 4 octet'() {
4469
- $mol_assert_equal($mol_charset_encode('𐍈'), new Uint8Array([0xf0, 0x90, 0x8d, 0x88]));
4470
- },
4471
- 'encode surrogate pair'() {
4472
- $mol_assert_equal($mol_charset_encode('😀'), new Uint8Array([0xf0, 0x9f, 0x98, 0x80]));
4473
- },
4474
- });
4479
+ (function ($_1) {
4480
+ var $$;
4481
+ (function ($$) {
4482
+ $mol_test({
4483
+ "Complex UCF encoding"($) {
4484
+ $mol_assert_equal($mol_charset_ucf_encode('hi мир, 美しい 世界 🏴‍☠\t\n'), new Uint8Array([
4485
+ 0x68, 0x69, 0x20,
4486
+ 0xA4, 0x3C, 0x38, 0x40, 0x8B, 0x95,
4487
+ 0x98, 0x0E, 0xBF, 0xFC, 0x57, 0x44, 0x95,
4488
+ 0x98, 0x16, 0x5C, 0x4C, 0xAA, 0x95,
4489
+ 0x9B, 0x74, 0xA7, 0xDC, 0x0D, 0xE8, 0x20, 0x9C, 0x09, 0x0A,
4490
+ ]));
4491
+ },
4492
+ });
4493
+ })($$ = $_1.$$ || ($_1.$$ = {}));
4475
4494
  })($ || ($ = {}));
4476
4495
 
4477
4496
  ;
@@ -4517,19 +4536,21 @@ var $;
4517
4536
  ;
4518
4537
  "use strict";
4519
4538
  var $;
4520
- (function ($) {
4521
- $mol_test({
4522
- 'decode utf8 string'() {
4523
- const str = 'Hello, ΧΨΩЫ';
4524
- const encoded = new Uint8Array([72, 101, 108, 108, 111, 44, 32, 206, 167, 206, 168, 206, 169, 208, 171]);
4525
- $mol_assert_equal($mol_charset_decode(encoded), str);
4526
- $mol_assert_equal($mol_charset_decode(encoded, 'utf8'), str);
4527
- },
4528
- 'decode empty string'() {
4529
- const encoded = new Uint8Array([]);
4530
- $mol_assert_equal($mol_charset_decode(encoded), '');
4531
- },
4532
- });
4539
+ (function ($_1) {
4540
+ var $$;
4541
+ (function ($$) {
4542
+ $mol_test({
4543
+ "Complex UCF eecoding"($) {
4544
+ $mol_assert_equal('hi мир, 美しい 世界 🏴‍☠\t\n', $mol_charset_ucf_decode(new Uint8Array([
4545
+ 0x68, 0x69, 0x20,
4546
+ 0xA4, 0x3C, 0x38, 0x40, 0x8B, 0x95,
4547
+ 0x98, 0x0E, 0xBF, 0xFC, 0x57, 0x44, 0x95,
4548
+ 0x98, 0x16, 0x5C, 0x4C, 0xAA, 0x95,
4549
+ 0x9B, 0x74, 0xA7, 0xDC, 0x0D, 0xE8, 0x20, 0x9C, 0x09, 0x0A,
4550
+ ])));
4551
+ },
4552
+ });
4553
+ })($$ = $_1.$$ || ($_1.$$ = {}));
4533
4554
  })($ || ($ = {}));
4534
4555
 
4535
4556
  ;
@@ -4887,7 +4908,7 @@ var $;
4887
4908
  const { uint, link, spec, blob, text, list, tupl, sint } = $mol_vary_tip;
4888
4909
  const { none, both, fp16, fp32, fp64 } = $mol_vary_spec;
4889
4910
  const { L1, L2, L4, L8, LA } = $mol_vary_len;
4890
- const str = $mol_charset_encode;
4911
+ const str = $mol_charset_ucf_encode;
4891
4912
  function check(vary, ideal, Vary = $mol_vary) {
4892
4913
  const pack = Vary.pack(vary);
4893
4914
  $mol_assert_equal(Vary.take(pack), vary);
@@ -4976,11 +4997,11 @@ var $;
4976
4997
  },
4977
4998
  "vary pack text"($) {
4978
4999
  check(['foo'], [text | 3, ...str('foo')]);
4979
- check(['абв'], [text | 6, ...str('абв')]);
5000
+ check(['абв'], [text | 4, ...str('абв')]);
4980
5001
  const long_lat = 'abcdefghijklmnopqrst';
4981
5002
  check([long_lat], [text | L1, 20, ...str(long_lat)]);
4982
5003
  const long_cyr = 'абвгдеёжзийклмнопрст';
4983
- check([long_cyr], [text | L1, 40, ...str(long_cyr)]);
5004
+ check([long_cyr], [text | L1, 21, ...str(long_cyr)]);
4984
5005
  },
4985
5006
  "vary pack dedup text"($) {
4986
5007
  check([["f", "f"]], [list | 2, text | 1, ...str('f'), link | 0]);