mol_vary 0.0.96 → 0.0.98

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/node.mjs CHANGED
@@ -77,57 +77,146 @@ var $;
77
77
  "use strict";
78
78
  var $;
79
79
  (function ($) {
80
+ const ascii_set = [...`0123456789.,:;()'"- \n`].map(c => c.charCodeAt(0));
81
+ const ascii_map = new Array(0x80).fill(0);
82
+ for (let i = 0; i < ascii_set.length; ++i)
83
+ ascii_map[ascii_set[i]] = i | 0x80;
84
+ const diacr_set = [
85
+ 0x00, 0x01, 0x0F, 0x0B, 0x07, 0x08, 0x12, 0x13,
86
+ 0x02, 0x0C, 0x06, 0x11, 0x03, 0x09, 0x0A, 0x04,
87
+ 0x28, 0x31, 0x27, 0x26, 0x23,
88
+ ];
89
+ const diacr_map = new Array(0x80).fill(0);
90
+ for (let i = 0; i < diacr_set.length; ++i)
91
+ diacr_map[diacr_set[i]] = i | 0x80;
92
+ const wide_offset = 0x0E_00;
93
+ const wide_limit = 128 * 128 * 8 + wide_offset;
94
+ const tiny_limit = 128 * 98;
95
+ const full_mode = 0x95;
96
+ const wide_mode = 0x96;
97
+ const tiny_mode = 0x9E;
80
98
  function $mol_charset_ucf_encode(str) {
81
99
  const buf = $mol_charset_buffer(str.length * 3);
82
100
  return buf.slice(0, $mol_charset_ucf_encode_to(str, buf));
83
101
  }
84
102
  $.$mol_charset_ucf_encode = $mol_charset_ucf_encode;
85
- const fast_char = `0123456789.,:;()?!-'" \n`;
86
- const fast_map = new Array(0x80).fill(0);
87
- for (let i = 0; i < fast_char.length; ++i)
88
- fast_map[fast_char[i].charCodeAt(0)] = i | 0x80;
89
103
  function $mol_charset_ucf_encode_to(str, buf, from = 0) {
90
104
  let pos = from;
91
- let mode = 0x9C;
105
+ let mode = tiny_mode;
106
+ const write_high = (code) => {
107
+ buf[pos++] = ((code + 128 - mode) & 0x7F) | 0x80;
108
+ };
109
+ const write_remap = (code) => {
110
+ const fast = ascii_map[code];
111
+ if (fast)
112
+ write_high(fast);
113
+ else
114
+ buf[pos++] = code;
115
+ };
116
+ const write_mode = (m) => {
117
+ write_high(m);
118
+ mode = m;
119
+ };
92
120
  for (let i = 0; i < str.length; i++) {
93
121
  let code = str.charCodeAt(i);
94
- if (code >= 0xd800 && code < 0xe000)
122
+ if (code >= 0xD8_00 && code < 0xDC_00)
95
123
  code = ((code - 0xd800) << 10) + str.charCodeAt(++i) + 0x2400;
96
124
  if (code < 0x80) {
97
- if (mode !== 0x9C) {
98
- const fast = fast_map[code];
99
- if (fast)
100
- code = fast;
101
- else
102
- buf[pos++] = mode = 0x9C;
125
+ if (mode !== tiny_mode) {
126
+ const fast = ascii_map[code];
127
+ if (!fast)
128
+ write_mode(tiny_mode);
103
129
  }
104
130
  buf[pos++] = code;
105
131
  }
106
- else if (code < 0x32_00) {
107
- const page = (code >> 7) + 0x9C;
132
+ else if (code < tiny_limit) {
133
+ const page = (code >> 7) + tiny_mode;
134
+ code &= 0x7F;
135
+ if (page === 164) {
136
+ const fast = diacr_map[code];
137
+ if (fast) {
138
+ if (mode !== tiny_mode)
139
+ write_mode(tiny_mode);
140
+ write_high(fast);
141
+ continue;
142
+ }
143
+ }
108
144
  if (mode !== page)
109
- buf[pos++] = mode = page;
110
- buf[pos++] = code & 0x7F;
145
+ write_mode(page);
146
+ write_remap(code);
111
147
  }
112
- else if (code < 0x04_20_00) {
113
- code -= 0x2000;
114
- const page = (code >> 15) + 0x98;
148
+ else if (code < wide_limit) {
149
+ code -= wide_offset;
150
+ const page = (code >> 14) + wide_mode;
115
151
  if (mode !== page)
116
- buf[pos++] = mode = page;
117
- buf[pos++] = code & 0x7F;
118
- buf[pos++] = code >> 7;
152
+ write_mode(page);
153
+ write_remap(code & 0x7F);
154
+ write_remap((code >> 7) & 0x7F);
119
155
  }
120
156
  else {
121
- if (mode !== 0x97)
122
- buf[pos++] = mode = 0x97;
123
- buf[pos++] = code & 0x7F;
124
- buf[pos++] = code >> 7;
125
- buf[pos++] = code >> 15;
157
+ if (mode !== full_mode)
158
+ write_mode(full_mode);
159
+ write_remap(code & 0x7F);
160
+ write_remap((code >> 7) & 0x7F);
161
+ write_remap(code >> 14);
126
162
  }
127
163
  }
164
+ if (mode !== tiny_mode)
165
+ write_mode(tiny_mode);
128
166
  return pos - from;
129
167
  }
130
168
  $.$mol_charset_ucf_encode_to = $mol_charset_ucf_encode_to;
169
+ function $mol_charset_ucf_decode(buffer, mode = tiny_mode) {
170
+ let text = '';
171
+ let pos = 0;
172
+ let page_offset = 0;
173
+ const read_code = () => {
174
+ let code = buffer[pos++];
175
+ if (code > 0x80)
176
+ code = ((mode + code) & 0x7F) | 0x80;
177
+ return code;
178
+ };
179
+ const read_remap = () => {
180
+ let code = read_code();
181
+ if (code >= 0x80)
182
+ code = ascii_set[code - 0x80];
183
+ return code;
184
+ };
185
+ while (pos < buffer.length) {
186
+ let code = read_code();
187
+ if (code < full_mode) {
188
+ if (mode === tiny_mode) {
189
+ if (code > 0x80) {
190
+ code = diacr_set[code - 0x080] | (6 << 7);
191
+ }
192
+ }
193
+ else if (!ascii_map[code]) {
194
+ if (code >= 0x80)
195
+ code = ascii_set[code - 0x80];
196
+ if (mode < tiny_mode)
197
+ code |= read_remap() << 7;
198
+ if (mode === full_mode)
199
+ code |= read_remap() << 14;
200
+ code += page_offset;
201
+ }
202
+ text += String.fromCodePoint(code);
203
+ }
204
+ else if (code >= tiny_mode) {
205
+ mode = code;
206
+ page_offset = (mode - tiny_mode) << 7;
207
+ }
208
+ else if (code === full_mode) {
209
+ mode = code;
210
+ page_offset = 0;
211
+ }
212
+ else {
213
+ mode = code;
214
+ page_offset = ((mode - wide_mode) << 14) + wide_offset;
215
+ }
216
+ }
217
+ return text;
218
+ }
219
+ $.$mol_charset_ucf_decode = $mol_charset_ucf_decode;
131
220
  })($ || ($ = {}));
132
221
 
133
222
  ;
@@ -156,45 +245,6 @@ var $;
156
245
  $.$mol_bigint_decode = $mol_bigint_decode;
157
246
  })($ || ($ = {}));
158
247
 
159
- ;
160
- "use strict";
161
- var $;
162
- (function ($) {
163
- const fast_char = `0123456789.,:;()?!-'" \n`;
164
- function $mol_charset_ucf_decode(buffer, mode = 0x9C) {
165
- let text = '';
166
- let pos = 0;
167
- let page_offset = 0;
168
- while (pos < buffer.length) {
169
- let code = buffer[pos++];
170
- if (code < 0x80) {
171
- if (mode < 0x9C)
172
- code |= buffer[pos++] << 7;
173
- if (mode === 0x97)
174
- code |= buffer[pos++] << 15;
175
- text += String.fromCodePoint(page_offset + code);
176
- }
177
- else if (code < 0x97) {
178
- text += fast_char[code - 0x80];
179
- }
180
- else if (code >= 0x9C) {
181
- mode = code;
182
- page_offset = (mode - 0x9C) << 7;
183
- }
184
- else if (code === 0x97) {
185
- mode = code;
186
- page_offset = 0;
187
- }
188
- else {
189
- mode = code;
190
- page_offset = ((mode - 0x98) << 15) + 0x20_00;
191
- }
192
- }
193
- return text;
194
- }
195
- $.$mol_charset_ucf_decode = $mol_charset_ucf_decode;
196
- })($ || ($ = {}));
197
-
198
248
  ;
199
249
  "use strict";
200
250
  var $;
package/node.test.js CHANGED
@@ -68,57 +68,146 @@ var $;
68
68
  "use strict";
69
69
  var $;
70
70
  (function ($) {
71
+ const ascii_set = [...`0123456789.,:;()'"- \n`].map(c => c.charCodeAt(0));
72
+ const ascii_map = new Array(0x80).fill(0);
73
+ for (let i = 0; i < ascii_set.length; ++i)
74
+ ascii_map[ascii_set[i]] = i | 0x80;
75
+ const diacr_set = [
76
+ 0x00, 0x01, 0x0F, 0x0B, 0x07, 0x08, 0x12, 0x13,
77
+ 0x02, 0x0C, 0x06, 0x11, 0x03, 0x09, 0x0A, 0x04,
78
+ 0x28, 0x31, 0x27, 0x26, 0x23,
79
+ ];
80
+ const diacr_map = new Array(0x80).fill(0);
81
+ for (let i = 0; i < diacr_set.length; ++i)
82
+ diacr_map[diacr_set[i]] = i | 0x80;
83
+ const wide_offset = 0x0E_00;
84
+ const wide_limit = 128 * 128 * 8 + wide_offset;
85
+ const tiny_limit = 128 * 98;
86
+ const full_mode = 0x95;
87
+ const wide_mode = 0x96;
88
+ const tiny_mode = 0x9E;
71
89
  function $mol_charset_ucf_encode(str) {
72
90
  const buf = $mol_charset_buffer(str.length * 3);
73
91
  return buf.slice(0, $mol_charset_ucf_encode_to(str, buf));
74
92
  }
75
93
  $.$mol_charset_ucf_encode = $mol_charset_ucf_encode;
76
- const fast_char = `0123456789.,:;()?!-'" \n`;
77
- const fast_map = new Array(0x80).fill(0);
78
- for (let i = 0; i < fast_char.length; ++i)
79
- fast_map[fast_char[i].charCodeAt(0)] = i | 0x80;
80
94
  function $mol_charset_ucf_encode_to(str, buf, from = 0) {
81
95
  let pos = from;
82
- let mode = 0x9C;
96
+ let mode = tiny_mode;
97
+ const write_high = (code) => {
98
+ buf[pos++] = ((code + 128 - mode) & 0x7F) | 0x80;
99
+ };
100
+ const write_remap = (code) => {
101
+ const fast = ascii_map[code];
102
+ if (fast)
103
+ write_high(fast);
104
+ else
105
+ buf[pos++] = code;
106
+ };
107
+ const write_mode = (m) => {
108
+ write_high(m);
109
+ mode = m;
110
+ };
83
111
  for (let i = 0; i < str.length; i++) {
84
112
  let code = str.charCodeAt(i);
85
- if (code >= 0xd800 && code < 0xe000)
113
+ if (code >= 0xD8_00 && code < 0xDC_00)
86
114
  code = ((code - 0xd800) << 10) + str.charCodeAt(++i) + 0x2400;
87
115
  if (code < 0x80) {
88
- if (mode !== 0x9C) {
89
- const fast = fast_map[code];
90
- if (fast)
91
- code = fast;
92
- else
93
- buf[pos++] = mode = 0x9C;
116
+ if (mode !== tiny_mode) {
117
+ const fast = ascii_map[code];
118
+ if (!fast)
119
+ write_mode(tiny_mode);
94
120
  }
95
121
  buf[pos++] = code;
96
122
  }
97
- else if (code < 0x32_00) {
98
- const page = (code >> 7) + 0x9C;
123
+ else if (code < tiny_limit) {
124
+ const page = (code >> 7) + tiny_mode;
125
+ code &= 0x7F;
126
+ if (page === 164) {
127
+ const fast = diacr_map[code];
128
+ if (fast) {
129
+ if (mode !== tiny_mode)
130
+ write_mode(tiny_mode);
131
+ write_high(fast);
132
+ continue;
133
+ }
134
+ }
99
135
  if (mode !== page)
100
- buf[pos++] = mode = page;
101
- buf[pos++] = code & 0x7F;
136
+ write_mode(page);
137
+ write_remap(code);
102
138
  }
103
- else if (code < 0x04_20_00) {
104
- code -= 0x2000;
105
- const page = (code >> 15) + 0x98;
139
+ else if (code < wide_limit) {
140
+ code -= wide_offset;
141
+ const page = (code >> 14) + wide_mode;
106
142
  if (mode !== page)
107
- buf[pos++] = mode = page;
108
- buf[pos++] = code & 0x7F;
109
- buf[pos++] = code >> 7;
143
+ write_mode(page);
144
+ write_remap(code & 0x7F);
145
+ write_remap((code >> 7) & 0x7F);
110
146
  }
111
147
  else {
112
- if (mode !== 0x97)
113
- buf[pos++] = mode = 0x97;
114
- buf[pos++] = code & 0x7F;
115
- buf[pos++] = code >> 7;
116
- buf[pos++] = code >> 15;
148
+ if (mode !== full_mode)
149
+ write_mode(full_mode);
150
+ write_remap(code & 0x7F);
151
+ write_remap((code >> 7) & 0x7F);
152
+ write_remap(code >> 14);
117
153
  }
118
154
  }
155
+ if (mode !== tiny_mode)
156
+ write_mode(tiny_mode);
119
157
  return pos - from;
120
158
  }
121
159
  $.$mol_charset_ucf_encode_to = $mol_charset_ucf_encode_to;
160
+ function $mol_charset_ucf_decode(buffer, mode = tiny_mode) {
161
+ let text = '';
162
+ let pos = 0;
163
+ let page_offset = 0;
164
+ const read_code = () => {
165
+ let code = buffer[pos++];
166
+ if (code > 0x80)
167
+ code = ((mode + code) & 0x7F) | 0x80;
168
+ return code;
169
+ };
170
+ const read_remap = () => {
171
+ let code = read_code();
172
+ if (code >= 0x80)
173
+ code = ascii_set[code - 0x80];
174
+ return code;
175
+ };
176
+ while (pos < buffer.length) {
177
+ let code = read_code();
178
+ if (code < full_mode) {
179
+ if (mode === tiny_mode) {
180
+ if (code > 0x80) {
181
+ code = diacr_set[code - 0x080] | (6 << 7);
182
+ }
183
+ }
184
+ else if (!ascii_map[code]) {
185
+ if (code >= 0x80)
186
+ code = ascii_set[code - 0x80];
187
+ if (mode < tiny_mode)
188
+ code |= read_remap() << 7;
189
+ if (mode === full_mode)
190
+ code |= read_remap() << 14;
191
+ code += page_offset;
192
+ }
193
+ text += String.fromCodePoint(code);
194
+ }
195
+ else if (code >= tiny_mode) {
196
+ mode = code;
197
+ page_offset = (mode - tiny_mode) << 7;
198
+ }
199
+ else if (code === full_mode) {
200
+ mode = code;
201
+ page_offset = 0;
202
+ }
203
+ else {
204
+ mode = code;
205
+ page_offset = ((mode - wide_mode) << 14) + wide_offset;
206
+ }
207
+ }
208
+ return text;
209
+ }
210
+ $.$mol_charset_ucf_decode = $mol_charset_ucf_decode;
122
211
  })($ || ($ = {}));
123
212
 
124
213
  ;
@@ -147,45 +236,6 @@ var $;
147
236
  $.$mol_bigint_decode = $mol_bigint_decode;
148
237
  })($ || ($ = {}));
149
238
 
150
- ;
151
- "use strict";
152
- var $;
153
- (function ($) {
154
- const fast_char = `0123456789.,:;()?!-'" \n`;
155
- function $mol_charset_ucf_decode(buffer, mode = 0x9C) {
156
- let text = '';
157
- let pos = 0;
158
- let page_offset = 0;
159
- while (pos < buffer.length) {
160
- let code = buffer[pos++];
161
- if (code < 0x80) {
162
- if (mode < 0x9C)
163
- code |= buffer[pos++] << 7;
164
- if (mode === 0x97)
165
- code |= buffer[pos++] << 15;
166
- text += String.fromCodePoint(page_offset + code);
167
- }
168
- else if (code < 0x97) {
169
- text += fast_char[code - 0x80];
170
- }
171
- else if (code >= 0x9C) {
172
- mode = code;
173
- page_offset = (mode - 0x9C) << 7;
174
- }
175
- else if (code === 0x97) {
176
- mode = code;
177
- page_offset = 0;
178
- }
179
- else {
180
- mode = code;
181
- page_offset = ((mode - 0x98) << 15) + 0x20_00;
182
- }
183
- }
184
- return text;
185
- }
186
- $.$mol_charset_ucf_decode = $mol_charset_ucf_decode;
187
- })($ || ($ = {}));
188
-
189
239
  ;
190
240
  "use strict";
191
241
  var $;
@@ -4479,15 +4529,51 @@ var $;
4479
4529
  (function ($_1) {
4480
4530
  var $$;
4481
4531
  (function ($$) {
4532
+ function check(text, bytes) {
4533
+ const ideal = new Uint8Array(bytes);
4534
+ const actual = $mol_charset_ucf_encode(text);
4535
+ $mol_assert_equal($mol_charset_ucf_decode(actual), text);
4536
+ $mol_assert_equal(actual, ideal);
4537
+ }
4482
4538
  $mol_test({
4483
- "Complex UCF encoding"($) {
4484
- $mol_assert_equal($mol_charset_ucf_encode('hi мир, 美しい 世界 🏴‍☠\t\n'), new Uint8Array([
4485
- 0x68, 0x69, 0x20,
4486
- 0xA4, 0x3C, 0x38, 0x40, 0x8B, 0x95,
4487
- 0x98, 0x0E, 0xBF, 0xFC, 0x57, 0x44, 0x95,
4488
- 0x98, 0x16, 0x5C, 0x4C, 0xAA, 0x95,
4489
- 0x9B, 0x74, 0xA7, 0xDC, 0x0D, 0xE8, 0x20, 0x9C, 0x09, 0x0A,
4490
- ]));
4539
+ "Full ASCII compatible"($) {
4540
+ check('hi', [0x68, 0x69]);
4541
+ },
4542
+ "1B ASCII with diacritic"($) {
4543
+ check('allo\u0302', [0x61, 0x6C, 0x6C, 0x6F, 0xEA]);
4544
+ },
4545
+ "1B Cyrillic"($) {
4546
+ check('мир', [0x88, 0x3C, 0xE2, 0x40, 0xF8]);
4547
+ },
4548
+ "1B Cyrillic with nummbers and punctuation"($) {
4549
+ check('м.1', [0x88, 0x3C, 0x2E, 0x31, 0xF8]);
4550
+ },
4551
+ "2B Kanji"($) {
4552
+ check('美', [0xF9, 0x0E, 0x63, 0x87]);
4553
+ },
4554
+ "3B rare Kanji"($) {
4555
+ check('𲎯', [0xF7, 0x2F, 0x47, 0x0C, 0x89]);
4556
+ },
4557
+ "1B Kana"($) {
4558
+ check('しい', [0xE0, 0x57, 0x44, 0xA0]);
4559
+ },
4560
+ "2B Emoji"($) {
4561
+ check('🏴', [0xFF, 0x74, 0x4B, 0x81]);
4562
+ },
4563
+ "2B Emoji with 1B modifiers"($) {
4564
+ check('🏴‍☠', [0xFF, 0x74, 0x4B, 0xC1, 0x0D, 0x8C, 0xA9, 0xB4]);
4565
+ },
4566
+ "2B Emoji with 3B Tag"($) {
4567
+ check('🏴\u{E007F}', [0xFF, 0x74, 0x4B, 0xF8, 0x7F, 0x00, 0xF3, 0x89]);
4568
+ },
4569
+ "Mixed scripts"($) {
4570
+ check('allô 美しい мир, 🏴‍☠\n', [
4571
+ 0x61, 0x6C, 0x6C, 0x6F, 0xEA, 0x20,
4572
+ 0xF9, 0x0E, 0x63, 0xE7, 0x57, 0x44, 0x20,
4573
+ 0xA8, 0x3C, 0xE2, 0x40, 0x2C, 0x20,
4574
+ 0xF7, 0x74, 0x4B, 0xC1, 0x0D, 0x8C, 0xA9, 0x0A,
4575
+ 0xB4,
4576
+ ]);
4491
4577
  },
4492
4578
  });
4493
4579
  })($$ = $_1.$$ || ($_1.$$ = {}));
@@ -4533,26 +4619,6 @@ var $;
4533
4619
  })($$ = $_1.$$ || ($_1.$$ = {}));
4534
4620
  })($ || ($ = {}));
4535
4621
 
4536
- ;
4537
- "use strict";
4538
- var $;
4539
- (function ($_1) {
4540
- var $$;
4541
- (function ($$) {
4542
- $mol_test({
4543
- "Complex UCF eecoding"($) {
4544
- $mol_assert_equal('hi мир, 美しい 世界 🏴‍☠\t\n', $mol_charset_ucf_decode(new Uint8Array([
4545
- 0x68, 0x69, 0x20,
4546
- 0xA4, 0x3C, 0x38, 0x40, 0x8B, 0x95,
4547
- 0x98, 0x0E, 0xBF, 0xFC, 0x57, 0x44, 0x95,
4548
- 0x98, 0x16, 0x5C, 0x4C, 0xAA, 0x95,
4549
- 0x9B, 0x74, 0xA7, 0xDC, 0x0D, 0xE8, 0x20, 0x9C, 0x09, 0x0A,
4550
- ])));
4551
- },
4552
- });
4553
- })($$ = $_1.$$ || ($_1.$$ = {}));
4554
- })($ || ($ = {}));
4555
-
4556
4622
  ;
4557
4623
  "use strict";
4558
4624
  var $;
@@ -4997,11 +5063,11 @@ var $;
4997
5063
  },
4998
5064
  "vary pack text"($) {
4999
5065
  check(['foo'], [text | 3, ...str('foo')]);
5000
- check(['абв'], [text | 4, ...str('абв')]);
5066
+ check(['абв'], [text | 5, ...str('абв')]);
5001
5067
  const long_lat = 'abcdefghijklmnopqrst';
5002
5068
  check([long_lat], [text | L1, 20, ...str(long_lat)]);
5003
5069
  const long_cyr = 'абвгдеёжзийклмнопрст';
5004
- check([long_cyr], [text | L1, 21, ...str(long_cyr)]);
5070
+ check([long_cyr], [text | L1, 22, ...str(long_cyr)]);
5005
5071
  },
5006
5072
  "vary pack dedup text"($) {
5007
5073
  check([["f", "f"]], [list | 2, text | 1, ...str('f'), link | 0]);