@cloudpss/ubjson 0.5.35 → 0.5.36
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/{benchmark-string.js → benchmark-string-decode.js} +4 -4
- package/benchmark-string-encode.js +32 -0
- package/benchmark-string-size-caculation.js +9 -11
- package/benchmark.js +1 -0
- package/dist/common/decoder.js +2 -1
- package/dist/common/decoder.js.map +1 -1
- package/dist/common/encoder.d.ts +4 -2
- package/dist/common/encoder.js +106 -45
- package/dist/common/encoder.js.map +1 -1
- package/dist/common/errors.d.ts +4 -0
- package/dist/common/errors.js +14 -0
- package/dist/common/errors.js.map +1 -0
- package/dist/common/string-decoder.d.ts +5 -3
- package/dist/common/string-decoder.js +23 -14
- package/dist/common/string-decoder.js.map +1 -1
- package/dist/common/string-encoder.d.ts +32 -2
- package/dist/common/string-encoder.js +105 -12
- package/dist/common/string-encoder.js.map +1 -1
- package/dist/stream-helper/encoder.d.ts +4 -4
- package/dist/stream-helper/encoder.js +116 -41
- package/dist/stream-helper/encoder.js.map +1 -1
- package/package.json +3 -3
- package/src/common/decoder.ts +2 -1
- package/src/common/encoder.ts +100 -42
- package/src/common/errors.ts +14 -0
- package/src/common/string-decoder.ts +26 -17
- package/src/common/string-encoder.ts +103 -14
- package/src/stream-helper/encoder.ts +118 -39
- package/tests/.utils.js +10 -0
- package/tests/e2e/.data.js +470 -0
- package/tests/e2e/no-buffer-text.js +37 -0
- package/tests/e2e/no-buffer.js +30 -0
- package/tests/e2e/no-encode-into.js +32 -0
- package/tests/e2e/no-textencoder-decoder.js +34 -0
- package/tests/e2e/normal.js +27 -0
- package/tests/e2e/stream.js +20 -0
- package/tests/encode.js +11 -19
- package/tests/huge-string.js +7 -9
- package/tests/rxjs/encode.js +4 -18
- package/tests/stream/encode.js +0 -15
- package/tests/string-encoding.js +3 -2
- package/tests/tsconfig.json +2 -1
- package/tests/e2e.js +0 -415
package/src/common/encoder.ts
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import { constants } from './constants.js';
|
|
2
|
-
import {
|
|
2
|
+
import { unsupportedType, unsupportedView } from './errors.js';
|
|
3
|
+
import { stringByteLength, encodeInto } from './string-encoder.js';
|
|
4
|
+
|
|
5
|
+
const LARGE_DATA_LENGTH = 65536;
|
|
3
6
|
|
|
4
7
|
/** 编码至 ubjson */
|
|
5
8
|
export abstract class EncoderBase {
|
|
6
|
-
protected readonly stringByteLength = getStringByteLength();
|
|
7
|
-
protected readonly encodeInto = getEncodeInto();
|
|
8
9
|
/** 当前写指针位置 */
|
|
9
10
|
protected length = 0;
|
|
10
11
|
/** 数据 */
|
|
@@ -79,7 +80,9 @@ export abstract class EncoderBase {
|
|
|
79
80
|
if (value === null) {
|
|
80
81
|
this.ensureCapacity(1);
|
|
81
82
|
this.buffer[this.length++] = constants.NULL;
|
|
82
|
-
|
|
83
|
+
return;
|
|
84
|
+
}
|
|
85
|
+
if (Array.isArray(value)) {
|
|
83
86
|
this.ensureCapacity(1);
|
|
84
87
|
this.buffer[this.length++] = constants.ARRAY;
|
|
85
88
|
const size = value.length;
|
|
@@ -95,7 +98,9 @@ export abstract class EncoderBase {
|
|
|
95
98
|
}
|
|
96
99
|
this.ensureCapacity(1);
|
|
97
100
|
this.buffer[this.length++] = constants.ARRAY_END;
|
|
98
|
-
|
|
101
|
+
return;
|
|
102
|
+
}
|
|
103
|
+
if (!ArrayBuffer.isView(value)) {
|
|
99
104
|
const { toJSON } = value as Record<string, unknown>;
|
|
100
105
|
if (typeof toJSON == 'function') {
|
|
101
106
|
this.write(toJSON.call(value));
|
|
@@ -115,6 +120,33 @@ export abstract class EncoderBase {
|
|
|
115
120
|
}
|
|
116
121
|
this.ensureCapacity(1);
|
|
117
122
|
this.buffer[this.length++] = constants.OBJECT_END;
|
|
123
|
+
return;
|
|
124
|
+
}
|
|
125
|
+
if (value.byteLength > LARGE_DATA_LENGTH) {
|
|
126
|
+
// ARRAY(1) + TYPE_MARKER(1) + TYPE(1) + COUNT_MARKER(1) + COUNT(5)
|
|
127
|
+
this.ensureCapacity(9);
|
|
128
|
+
this.buffer[this.length++] = constants.ARRAY;
|
|
129
|
+
this.buffer[this.length++] = constants.TYPE_MARKER;
|
|
130
|
+
if (value instanceof Uint8Array) {
|
|
131
|
+
this.buffer[this.length++] = constants.UINT8;
|
|
132
|
+
} else if (value instanceof Int8Array) {
|
|
133
|
+
this.buffer[this.length++] = constants.INT8;
|
|
134
|
+
} else if (value instanceof Int16Array) {
|
|
135
|
+
this.buffer[this.length++] = constants.INT16;
|
|
136
|
+
} else if (value instanceof Int32Array) {
|
|
137
|
+
this.buffer[this.length++] = constants.INT32;
|
|
138
|
+
} else if (value instanceof Float32Array) {
|
|
139
|
+
this.buffer[this.length++] = constants.FLOAT32;
|
|
140
|
+
} else if (value instanceof Float64Array) {
|
|
141
|
+
this.buffer[this.length++] = constants.FLOAT64;
|
|
142
|
+
} else if (value instanceof BigInt64Array) {
|
|
143
|
+
this.buffer[this.length++] = constants.INT64;
|
|
144
|
+
} else {
|
|
145
|
+
unsupportedView(value);
|
|
146
|
+
}
|
|
147
|
+
this.buffer[this.length++] = constants.COUNT_MARKER;
|
|
148
|
+
this.setLength(value.length);
|
|
149
|
+
this.writeLargeTypedArrayData(value);
|
|
118
150
|
} else {
|
|
119
151
|
// ARRAY(1) + TYPE_MARKER(1) + TYPE(1) + COUNT_MARKER(1) + COUNT(MAX5) + DATA
|
|
120
152
|
this.ensureCapacity(9 + value.byteLength);
|
|
@@ -175,7 +207,7 @@ export abstract class EncoderBase {
|
|
|
175
207
|
this.length += elementSize;
|
|
176
208
|
}
|
|
177
209
|
} else {
|
|
178
|
-
|
|
210
|
+
unsupportedView(value);
|
|
179
211
|
}
|
|
180
212
|
}
|
|
181
213
|
return;
|
|
@@ -200,18 +232,18 @@ export abstract class EncoderBase {
|
|
|
200
232
|
}
|
|
201
233
|
return;
|
|
202
234
|
default:
|
|
203
|
-
|
|
235
|
+
unsupportedType(value);
|
|
204
236
|
}
|
|
205
237
|
}
|
|
206
238
|
|
|
207
239
|
/** writeStringData */
|
|
208
240
|
private writeStringData(value: string): void {
|
|
209
241
|
const strLength = value.length;
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
242
|
+
if (strLength > LARGE_DATA_LENGTH) {
|
|
243
|
+
return this.writeLargeStringData(value);
|
|
244
|
+
}
|
|
245
|
+
// 对于短字符串,直接计算最大使用空间
|
|
246
|
+
const maxUsage = strLength * 3;
|
|
215
247
|
// 一次性分配 setLength 和 encodeInto 的空间,避免无法回溯
|
|
216
248
|
// 额外分配 3 字节,避免 encodeInto 无法写入最后一个字符
|
|
217
249
|
this.ensureCapacity(maxUsage + 5 + 3);
|
|
@@ -219,36 +251,7 @@ export abstract class EncoderBase {
|
|
|
219
251
|
// 预估头部大小
|
|
220
252
|
const headerSize = strLength < 128 ? 2 : strLength < 32768 ? 3 : 5;
|
|
221
253
|
const headerPos = this.length;
|
|
222
|
-
|
|
223
|
-
// 优化小字符串
|
|
224
|
-
if (strLength < 0x40 || !this.encodeInto) {
|
|
225
|
-
let c1, c2;
|
|
226
|
-
let strPosition = headerPos + headerSize;
|
|
227
|
-
const target = this.buffer;
|
|
228
|
-
for (let i = 0; i < strLength; i++) {
|
|
229
|
-
c1 = value.charCodeAt(i);
|
|
230
|
-
if (c1 < 0x80) {
|
|
231
|
-
target[strPosition++] = c1;
|
|
232
|
-
} else if (c1 < 0x800) {
|
|
233
|
-
target[strPosition++] = (c1 >> 6) | 0xc0;
|
|
234
|
-
target[strPosition++] = (c1 & 0x3f) | 0x80;
|
|
235
|
-
} else if ((c1 & 0xfc00) === 0xd800 && ((c2 = value.charCodeAt(i + 1)) & 0xfc00) === 0xdc00) {
|
|
236
|
-
c1 = 0x1_0000 + ((c1 & 0x03ff) << 10) + (c2 & 0x03ff);
|
|
237
|
-
i++;
|
|
238
|
-
target[strPosition++] = (c1 >> 18) | 0xf0;
|
|
239
|
-
target[strPosition++] = ((c1 >> 12) & 0x3f) | 0x80;
|
|
240
|
-
target[strPosition++] = ((c1 >> 6) & 0x3f) | 0x80;
|
|
241
|
-
target[strPosition++] = (c1 & 0x3f) | 0x80;
|
|
242
|
-
} else {
|
|
243
|
-
target[strPosition++] = (c1 >> 12) | 0xe0;
|
|
244
|
-
target[strPosition++] = ((c1 >> 6) & 0x3f) | 0x80;
|
|
245
|
-
target[strPosition++] = (c1 & 0x3f) | 0x80;
|
|
246
|
-
}
|
|
247
|
-
}
|
|
248
|
-
bufLength = strPosition - headerPos - headerSize;
|
|
249
|
-
} else {
|
|
250
|
-
bufLength = this.encodeInto(value, this.buffer, headerPos + headerSize);
|
|
251
|
-
}
|
|
254
|
+
const bufLength = encodeInto(value, this.buffer, this.length + headerSize);
|
|
252
255
|
if (bufLength < 128) {
|
|
253
256
|
this.buffer[this.length++] = constants.INT8;
|
|
254
257
|
this.buffer[this.length++] = bufLength;
|
|
@@ -270,6 +273,61 @@ export abstract class EncoderBase {
|
|
|
270
273
|
this.length += bufLength;
|
|
271
274
|
}
|
|
272
275
|
|
|
276
|
+
/** 写入大字符串 */
|
|
277
|
+
protected writeLargeStringData(value: string): void {
|
|
278
|
+
const binLen = stringByteLength(value);
|
|
279
|
+
this.ensureCapacity(5);
|
|
280
|
+
this.buffer[this.length++] = constants.INT32;
|
|
281
|
+
this.view.setInt32(this.length, binLen);
|
|
282
|
+
this.length += 4;
|
|
283
|
+
this.ensureCapacity(binLen);
|
|
284
|
+
encodeInto(value, this.buffer, this.length);
|
|
285
|
+
this.length += binLen;
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
/** 写入数组 */
|
|
289
|
+
protected writeLargeTypedArrayData(value: ArrayBufferView): void {
|
|
290
|
+
this.ensureCapacity(value.byteLength);
|
|
291
|
+
if (value instanceof Uint8Array || value instanceof Int8Array) {
|
|
292
|
+
// fast path for typed arrays with `BYTES_PER_ELEMENT` of 1
|
|
293
|
+
this.buffer.set(value, this.length);
|
|
294
|
+
this.length += value.byteLength;
|
|
295
|
+
return;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
const arrayLength = (value as Int16Array | Int32Array | BigInt64Array | Float32Array | Float64Array).length;
|
|
299
|
+
const elementSize = (value as Int16Array | Int32Array | BigInt64Array | Float32Array | Float64Array)
|
|
300
|
+
.BYTES_PER_ELEMENT;
|
|
301
|
+
if (value instanceof Int16Array) {
|
|
302
|
+
for (let i = 0; i < arrayLength; i++) {
|
|
303
|
+
this.view.setInt16(this.length, value[i]!);
|
|
304
|
+
this.length += elementSize;
|
|
305
|
+
}
|
|
306
|
+
} else if (value instanceof Int32Array) {
|
|
307
|
+
for (let i = 0; i < arrayLength; i++) {
|
|
308
|
+
this.view.setInt32(this.length, value[i]!);
|
|
309
|
+
this.length += elementSize;
|
|
310
|
+
}
|
|
311
|
+
} else if (value instanceof Float32Array) {
|
|
312
|
+
for (let i = 0; i < arrayLength; i++) {
|
|
313
|
+
this.view.setFloat32(this.length, value[i]!);
|
|
314
|
+
this.length += elementSize;
|
|
315
|
+
}
|
|
316
|
+
} else if (value instanceof Float64Array) {
|
|
317
|
+
for (let i = 0; i < arrayLength; i++) {
|
|
318
|
+
this.view.setFloat64(this.length, value[i]!);
|
|
319
|
+
this.length += elementSize;
|
|
320
|
+
}
|
|
321
|
+
} else if (value instanceof BigInt64Array) {
|
|
322
|
+
for (let i = 0; i < arrayLength; i++) {
|
|
323
|
+
this.view.setBigInt64(this.length, value[i]!);
|
|
324
|
+
this.length += elementSize;
|
|
325
|
+
}
|
|
326
|
+
} else {
|
|
327
|
+
unsupportedView(value);
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
|
|
273
331
|
/**
|
|
274
332
|
* 写入整形数字,选取合适的大小,需提前分配空间
|
|
275
333
|
*/
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/** unsupported view */
|
|
2
|
+
export function unsupportedView(view: ArrayBufferView): never {
|
|
3
|
+
const type = Object.prototype.toString.call(view).slice(8, -1);
|
|
4
|
+
throw new Error(`Unsupported array buffer view of type ${type}`);
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
/** unsupported type */
|
|
8
|
+
export function unsupportedType(value: unknown): never {
|
|
9
|
+
if (value && typeof value == 'string') {
|
|
10
|
+
throw new Error(`Unsupported type ${value}`);
|
|
11
|
+
}
|
|
12
|
+
const type = Object.prototype.toString.call(value).slice(8, -1);
|
|
13
|
+
throw new Error(`Unsupported type ${type}`);
|
|
14
|
+
}
|
|
@@ -1,17 +1,10 @@
|
|
|
1
|
-
/* c8 ignore next 2: TextDecoder always present, fallback tested */
|
|
2
|
-
export const textDecoder =
|
|
3
|
-
typeof TextDecoder == 'function' ? new TextDecoder('utf8', { ignoreBOM: true, fatal: false }) : null;
|
|
4
|
-
|
|
5
|
-
/* c8 ignore next: TextDecoder always present, fallback tested */
|
|
6
|
-
export const TEXT_ENCODER_THRESHOLD = textDecoder == null ? 0xffff_ffff : 200;
|
|
7
|
-
|
|
8
1
|
const CHUNK_SIZE = 0x1000;
|
|
9
2
|
const REPLACE_CHAR = 0xfffd;
|
|
10
3
|
|
|
11
4
|
const fromCharCode = String.fromCharCode;
|
|
12
5
|
|
|
13
6
|
/** 解码 */
|
|
14
|
-
export function
|
|
7
|
+
export function jsDecode(bytes: Uint8Array, begin: number, end: number): string {
|
|
15
8
|
let offset = begin;
|
|
16
9
|
|
|
17
10
|
const units: number[] = [];
|
|
@@ -190,6 +183,14 @@ function shortStringInJS(buf: Uint8Array, begin: number, length: number): string
|
|
|
190
183
|
}
|
|
191
184
|
}
|
|
192
185
|
|
|
186
|
+
let TEXT_DECODER: TextDecoder | null;
|
|
187
|
+
let TEXT_DECODER_THRESHOLD: number;
|
|
188
|
+
|
|
189
|
+
/** 解码 */
|
|
190
|
+
export function nativeDecode(data: Uint8Array, begin: number, end: number): string {
|
|
191
|
+
return TEXT_DECODER!.decode(data.subarray(begin, end));
|
|
192
|
+
}
|
|
193
|
+
|
|
193
194
|
/** 字符串解码,无缓存 */
|
|
194
195
|
export function decode(data: Uint8Array, begin: number, end: number): string {
|
|
195
196
|
const length = end - begin;
|
|
@@ -198,22 +199,22 @@ export function decode(data: Uint8Array, begin: number, end: number): string {
|
|
|
198
199
|
if (result != null) return result;
|
|
199
200
|
}
|
|
200
201
|
// 只有小字符串有优化价值,见 benchmark-string.js
|
|
201
|
-
if (length <
|
|
202
|
+
if (length < TEXT_DECODER_THRESHOLD) {
|
|
202
203
|
// 为小字符串优化
|
|
203
|
-
return
|
|
204
|
+
return jsDecode(data, begin, end);
|
|
204
205
|
}
|
|
205
206
|
// 使用系统解码
|
|
206
|
-
return
|
|
207
|
+
return nativeDecode(data, begin, end);
|
|
207
208
|
}
|
|
208
209
|
|
|
209
|
-
const
|
|
210
|
+
const KEY_CACHE = Array.from<{ value: string; buffer: Uint8Array } | undefined>({ length: 4096 });
|
|
210
211
|
|
|
211
212
|
/** 字符串解码,使用缓存 */
|
|
212
213
|
export function decodeKey(data: Uint8Array, begin: number, end: number): string {
|
|
213
214
|
const length = end - begin;
|
|
214
215
|
const cacheKey =
|
|
215
216
|
((length << 5) ^ (length > 1 ? data[begin]! & (data[begin + 1]! << 8) : length > 0 ? data[begin]! : 0)) & 0xfff;
|
|
216
|
-
let entry =
|
|
217
|
+
let entry = KEY_CACHE[cacheKey];
|
|
217
218
|
if (entry != null && entry.buffer.byteLength === length) {
|
|
218
219
|
let i = 0;
|
|
219
220
|
for (; i < length; i++) {
|
|
@@ -225,15 +226,23 @@ export function decodeKey(data: Uint8Array, begin: number, end: number): string
|
|
|
225
226
|
let str = length < 16 ? shortStringInJS(data, begin, length) : longStringInJS(data, begin, length);
|
|
226
227
|
if (str == null) {
|
|
227
228
|
// 只有小字符串有优化价值,见 benchmark-string.js
|
|
228
|
-
if (length <
|
|
229
|
+
if (length < TEXT_DECODER_THRESHOLD) {
|
|
229
230
|
// 为小字符串优化
|
|
230
|
-
str =
|
|
231
|
+
str = jsDecode(data, begin, end);
|
|
231
232
|
} else {
|
|
232
233
|
// 使用系统解码
|
|
233
|
-
str =
|
|
234
|
+
str = nativeDecode(data, begin, end);
|
|
234
235
|
}
|
|
235
236
|
}
|
|
236
237
|
entry = { value: str, buffer: data.slice(begin, end) };
|
|
237
|
-
|
|
238
|
+
KEY_CACHE[cacheKey] = entry;
|
|
238
239
|
return str;
|
|
239
240
|
}
|
|
241
|
+
|
|
242
|
+
/** 重设环境 */
|
|
243
|
+
export function resetEnv(): void {
|
|
244
|
+
TEXT_DECODER = typeof TextDecoder == 'function' ? new TextDecoder('utf8', { ignoreBOM: true, fatal: false }) : null;
|
|
245
|
+
TEXT_DECODER_THRESHOLD = TEXT_DECODER == null ? 0xffff_ffff : 16;
|
|
246
|
+
KEY_CACHE.fill(undefined);
|
|
247
|
+
}
|
|
248
|
+
resetEnv();
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
// https://github.com/ehmicky/string-byte-length/blob/main/src/char_code.js
|
|
2
2
|
|
|
3
3
|
// Last ASCII character (1 byte)
|
|
4
|
-
const LAST_ASCII_CODEPOINT =
|
|
4
|
+
const LAST_ASCII_CODEPOINT = 0x80;
|
|
5
5
|
// Last 2-bytes character
|
|
6
|
-
const LAST_TWO_BYTES_CODEPOINT =
|
|
6
|
+
const LAST_TWO_BYTES_CODEPOINT = 0x800;
|
|
7
7
|
// Others are 3 bytes characters
|
|
8
8
|
// However, U+d800 to U+dbff:
|
|
9
9
|
// - Followed by U+dc00 to U+dfff -> 4 bytes together (astral character)
|
|
@@ -13,18 +13,18 @@ const LAST_HIGH_SURROGATE = 0xdbff;
|
|
|
13
13
|
const FIRST_LOW_SURROGATE = 0xdc00;
|
|
14
14
|
const LAST_LOW_SURROGATE = 0xdfff;
|
|
15
15
|
/** 计算使用的空间 */
|
|
16
|
-
function
|
|
16
|
+
export function jsStringByteLength(string: string): number {
|
|
17
17
|
const charLength = string.length;
|
|
18
18
|
let byteLength = charLength;
|
|
19
19
|
|
|
20
20
|
for (let charIndex = 0; charIndex < charLength; charIndex += 1) {
|
|
21
21
|
const codepoint = string.charCodeAt(charIndex);
|
|
22
22
|
|
|
23
|
-
if (codepoint
|
|
23
|
+
if (codepoint < LAST_ASCII_CODEPOINT) {
|
|
24
24
|
continue;
|
|
25
25
|
}
|
|
26
26
|
|
|
27
|
-
if (codepoint
|
|
27
|
+
if (codepoint < LAST_TWO_BYTES_CODEPOINT) {
|
|
28
28
|
byteLength += 1;
|
|
29
29
|
continue;
|
|
30
30
|
}
|
|
@@ -52,15 +52,104 @@ function getCharCodeByteLength(string: string): number {
|
|
|
52
52
|
|
|
53
53
|
return byteLength;
|
|
54
54
|
}
|
|
55
|
+
/** 计算使用的空间 */
|
|
56
|
+
export function nodeStringByteLength(string: string): number {
|
|
57
|
+
return Buffer.byteLength(string, 'utf8');
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/** 计算使用的空间 */
|
|
61
|
+
export let stringByteLength: (v: string) => number;
|
|
55
62
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
63
|
+
/**
|
|
64
|
+
* Encode string into utf-8.
|
|
65
|
+
* Provided `buf` MUST have enough space.
|
|
66
|
+
*/
|
|
67
|
+
export function jsEncodeInto(v: string, buf: Uint8Array, offset: number): number {
|
|
68
|
+
let ptr = offset;
|
|
69
|
+
const strLen = v.length;
|
|
70
|
+
for (let i = 0; i < strLen; i++) {
|
|
71
|
+
const c1 = v.charCodeAt(i);
|
|
72
|
+
if (c1 < LAST_ASCII_CODEPOINT) {
|
|
73
|
+
buf[ptr++] = c1;
|
|
74
|
+
} else if (c1 < LAST_TWO_BYTES_CODEPOINT) {
|
|
75
|
+
buf[ptr++] = 0xc0 | (c1 >> 6);
|
|
76
|
+
buf[ptr++] = 0x80 | (c1 & 0x3f);
|
|
77
|
+
} else if (c1 < FIRST_HIGH_SURROGATE || c1 > LAST_LOW_SURROGATE) {
|
|
78
|
+
buf[ptr++] = 0xe0 | (c1 >> 12);
|
|
79
|
+
buf[ptr++] = 0x80 | ((c1 >> 6) & 0x3f);
|
|
80
|
+
buf[ptr++] = 0x80 | (c1 & 0x3f);
|
|
81
|
+
} else if (c1 > LAST_HIGH_SURROGATE) {
|
|
82
|
+
// low surrogate without high surrogate
|
|
83
|
+
buf[ptr++] = 0xef;
|
|
84
|
+
buf[ptr++] = 0xbf;
|
|
85
|
+
buf[ptr++] = 0xbd;
|
|
86
|
+
} else {
|
|
87
|
+
const c2 = v.charCodeAt(++i);
|
|
88
|
+
if (i >= strLen || c2 < FIRST_LOW_SURROGATE || c2 > LAST_LOW_SURROGATE) {
|
|
89
|
+
// high surrogate not followed by low surrogate
|
|
90
|
+
buf[ptr++] = 0xef;
|
|
91
|
+
buf[ptr++] = 0xbf;
|
|
92
|
+
buf[ptr++] = 0xbd;
|
|
93
|
+
i--;
|
|
94
|
+
continue;
|
|
95
|
+
}
|
|
96
|
+
const c = ((c1 & 0x3ff) << 10) + (c2 & 0x3ff) + 0x1_0000;
|
|
97
|
+
buf[ptr++] = 0xf0 | (c >> 18);
|
|
98
|
+
buf[ptr++] = 0x80 | ((c >> 12) & 0x3f);
|
|
99
|
+
buf[ptr++] = 0x80 | ((c >> 6) & 0x3f);
|
|
100
|
+
buf[ptr++] = 0x80 | (c & 0x3f);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
return ptr - offset;
|
|
104
|
+
}
|
|
60
105
|
|
|
61
106
|
/* c8 ignore next 1 */
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
107
|
+
let TEXT_ENCODER: TextEncoder | null;
|
|
108
|
+
/**
|
|
109
|
+
* Encode string into utf-8.
|
|
110
|
+
* Provided `buf` MUST have enough space.
|
|
111
|
+
*/
|
|
112
|
+
export function nativeEncodeInto(v: string, buf: Uint8Array, offset: number): number {
|
|
113
|
+
const encoded = TEXT_ENCODER!.encodeInto(v, buf.subarray(offset));
|
|
114
|
+
return encoded.written;
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* Encode string into utf-8.
|
|
118
|
+
* Provided `buf` MUST have enough space.
|
|
119
|
+
*/
|
|
120
|
+
export function myEncodeInto(v: string, buf: Uint8Array, offset: number): number {
|
|
121
|
+
if (v.length < 55) {
|
|
122
|
+
return jsEncodeInto(v, buf, offset);
|
|
123
|
+
}
|
|
124
|
+
return nativeEncodeInto(v, buf, offset);
|
|
125
|
+
}
|
|
126
|
+
/**
|
|
127
|
+
* Encode string into utf-8.
|
|
128
|
+
* Provided `buf` MUST have enough space.
|
|
129
|
+
*/
|
|
130
|
+
export let encodeInto: (v: string, buf: Uint8Array, offset: number) => number;
|
|
131
|
+
/**
|
|
132
|
+
* Encode string into utf-8.
|
|
133
|
+
*/
|
|
134
|
+
export let encode: (v: string) => Uint8Array;
|
|
135
|
+
|
|
136
|
+
/** 重设环境 */
|
|
137
|
+
export function resetEnv(): void {
|
|
138
|
+
TEXT_ENCODER = typeof TextEncoder == 'function' ? new TextEncoder() : null;
|
|
139
|
+
stringByteLength =
|
|
140
|
+
typeof Buffer == 'function' && typeof Buffer.byteLength == 'function'
|
|
141
|
+
? nodeStringByteLength
|
|
142
|
+
: jsStringByteLength;
|
|
143
|
+
encodeInto = typeof TEXT_ENCODER?.encodeInto == 'function' ? myEncodeInto : jsEncodeInto;
|
|
144
|
+
encode =
|
|
145
|
+
typeof Buffer == 'function' && Buffer.from
|
|
146
|
+
? (v) => Buffer.from(v, 'utf8')
|
|
147
|
+
: TEXT_ENCODER
|
|
148
|
+
? (v) => TEXT_ENCODER!.encode(v)
|
|
149
|
+
: (v) => {
|
|
150
|
+
const buf = new Uint8Array(stringByteLength(v));
|
|
151
|
+
jsEncodeInto(v, buf, 0);
|
|
152
|
+
return buf;
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
resetEnv();
|
|
@@ -1,33 +1,49 @@
|
|
|
1
|
+
import { constants } from '../common/constants.js';
|
|
1
2
|
import { EncoderBase } from '../common/encoder.js';
|
|
3
|
+
import { unsupportedView } from '../common/errors.js';
|
|
4
|
+
import { encode, stringByteLength } from '../common/string-encoder.js';
|
|
2
5
|
|
|
3
|
-
const BLOCK_SIZE = 1024 *
|
|
4
|
-
const MAX_SIZE = 1024 * 1024 *
|
|
6
|
+
const BLOCK_SIZE = 1024 * 64; // 64 KiB
|
|
7
|
+
const MAX_SIZE = 1024 * 1024 * 32; // 32 MiB
|
|
5
8
|
|
|
6
9
|
/** 保存一个内存池以减少重复分配 */
|
|
7
10
|
let POOL: Uint8Array | null = null;
|
|
8
11
|
|
|
12
|
+
/** 获取内存池 */
|
|
13
|
+
function alloc(size: number): Uint8Array {
|
|
14
|
+
if (POOL == null || size !== BLOCK_SIZE) {
|
|
15
|
+
return new Uint8Array(size);
|
|
16
|
+
}
|
|
17
|
+
const pool = POOL;
|
|
18
|
+
POOL = null;
|
|
19
|
+
return pool;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/** 归还内存池 */
|
|
23
|
+
function free(buf: Uint8Array): boolean {
|
|
24
|
+
if (POOL == null && buf.byteLength === BLOCK_SIZE) {
|
|
25
|
+
POOL = buf;
|
|
26
|
+
return true;
|
|
27
|
+
}
|
|
28
|
+
return false;
|
|
29
|
+
}
|
|
30
|
+
|
|
9
31
|
/** 流式编码 UBJSON */
|
|
10
32
|
export class StreamEncoderHelper extends EncoderBase {
|
|
11
33
|
constructor(protected readonly onChunk: (chunk: Uint8Array) => void) {
|
|
12
34
|
super();
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
POOL = null;
|
|
16
|
-
} else {
|
|
17
|
-
this.pool = new Uint8Array(BLOCK_SIZE);
|
|
18
|
-
}
|
|
35
|
+
this.buffer = alloc(BLOCK_SIZE);
|
|
36
|
+
this.view = new DataView(this.buffer.buffer);
|
|
19
37
|
}
|
|
20
38
|
/**
|
|
21
39
|
* 销毁实例,释放内存池
|
|
22
40
|
*/
|
|
23
41
|
destroy(): void {
|
|
24
|
-
|
|
25
|
-
const self = this as unknown as {
|
|
26
|
-
self.
|
|
42
|
+
free(this.buffer);
|
|
43
|
+
const self = this as unknown as { view: DataView | null; buffer: Uint8Array | null };
|
|
44
|
+
self.view = null;
|
|
27
45
|
self.buffer = null;
|
|
28
46
|
}
|
|
29
|
-
/** 通过内存池减少分配 */
|
|
30
|
-
private readonly pool;
|
|
31
47
|
/**
|
|
32
48
|
* 确保 buffer 还有 capacity 的空闲空间
|
|
33
49
|
*/
|
|
@@ -36,45 +52,108 @@ export class StreamEncoderHelper extends EncoderBase {
|
|
|
36
52
|
// 超过最大尺寸限制
|
|
37
53
|
throw new Error('Buffer has exceed max size');
|
|
38
54
|
}
|
|
39
|
-
if (capacity < 0) {
|
|
40
|
-
// 结束流
|
|
41
|
-
if (this.buffer === this.pool) {
|
|
42
|
-
this.onChunk(this.buffer.slice(0, this.length));
|
|
43
|
-
} else {
|
|
44
|
-
this.onChunk(this.buffer.subarray(0, this.length));
|
|
45
|
-
}
|
|
46
|
-
return;
|
|
47
|
-
}
|
|
48
55
|
// 无需扩容
|
|
49
|
-
if (this.buffer.byteLength >= this.length + capacity) return;
|
|
56
|
+
if (capacity >= 0 && this.buffer.byteLength >= this.length + capacity) return;
|
|
50
57
|
|
|
58
|
+
const CURRENT_SIZE = this.buffer.byteLength;
|
|
59
|
+
const NEXT_SIZE = capacity < BLOCK_SIZE ? BLOCK_SIZE : capacity;
|
|
60
|
+
const REUSE_BUF =
|
|
61
|
+
CURRENT_SIZE >= NEXT_SIZE && // 满足容量需求
|
|
62
|
+
CURRENT_SIZE - BLOCK_SIZE < NEXT_SIZE; // 不过于浪费
|
|
51
63
|
// 提交目前的数据
|
|
52
|
-
if (
|
|
64
|
+
if (REUSE_BUF) {
|
|
53
65
|
this.onChunk(this.buffer.slice(0, this.length));
|
|
54
66
|
} else {
|
|
55
|
-
|
|
67
|
+
if (free(this.buffer)) {
|
|
68
|
+
// 归还内存池成功,buffer 可能重用,需要拷贝数据
|
|
69
|
+
this.onChunk(this.buffer.slice(0, this.length));
|
|
70
|
+
} else {
|
|
71
|
+
this.onChunk(this.buffer.subarray(0, this.length));
|
|
72
|
+
}
|
|
73
|
+
// 重新分配缓冲区
|
|
74
|
+
this.buffer = alloc(NEXT_SIZE);
|
|
75
|
+
this.view = new DataView(this.buffer.buffer);
|
|
56
76
|
}
|
|
77
|
+
this.length = 0;
|
|
78
|
+
}
|
|
79
|
+
/** @inheritdoc */
|
|
80
|
+
protected override writeLargeStringData(value: string): void {
|
|
81
|
+
const strLen = value.length;
|
|
82
|
+
const binLen = stringByteLength(value);
|
|
83
|
+
this.ensureCapacity(5);
|
|
84
|
+
this.buffer[this.length++] = constants.INT32;
|
|
85
|
+
this.view.setInt32(this.length, binLen);
|
|
86
|
+
this.length += 4;
|
|
87
|
+
this.ensureCapacity(-1);
|
|
57
88
|
|
|
58
|
-
//
|
|
59
|
-
|
|
60
|
-
|
|
89
|
+
// divide string to 64k chunks
|
|
90
|
+
for (let i = 0; i < strLen; i += BLOCK_SIZE) {
|
|
91
|
+
let end = i + BLOCK_SIZE;
|
|
92
|
+
// avoid split surrogate pair
|
|
93
|
+
const endAtSurrogate = end < strLen && (value.charCodeAt(end) & 0xfc00) === 0xdc00;
|
|
94
|
+
if (endAtSurrogate) {
|
|
95
|
+
end--;
|
|
96
|
+
}
|
|
97
|
+
const chunk = value.slice(i, end);
|
|
98
|
+
this.onChunk(encode(chunk));
|
|
99
|
+
if (endAtSurrogate) {
|
|
100
|
+
i--;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
61
103
|
}
|
|
62
|
-
/**
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
104
|
+
/** @inheritdoc */
|
|
105
|
+
protected override writeLargeTypedArrayData(value: ArrayBufferView): void {
|
|
106
|
+
this.ensureCapacity(-1);
|
|
107
|
+
if (value instanceof Uint8Array || value instanceof Int8Array) {
|
|
108
|
+
// fast path for typed arrays with `BYTES_PER_ELEMENT` of 1
|
|
109
|
+
// divide buffer to 64k chunks
|
|
110
|
+
for (let i = 0; i < value.byteLength; i += BLOCK_SIZE) {
|
|
111
|
+
this.onChunk(
|
|
112
|
+
new Uint8Array(value.buffer.slice(value.byteOffset + i, value.byteOffset + i + BLOCK_SIZE)),
|
|
113
|
+
);
|
|
114
|
+
}
|
|
69
115
|
return;
|
|
70
116
|
}
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
117
|
+
|
|
118
|
+
const arrayLength = (value as Int16Array | Int32Array | BigInt64Array | Float32Array | Float64Array).length;
|
|
119
|
+
const elementSize = (value as Int16Array | Int32Array | BigInt64Array | Float32Array | Float64Array)
|
|
120
|
+
.BYTES_PER_ELEMENT;
|
|
121
|
+
if (value instanceof Int16Array) {
|
|
122
|
+
for (let i = 0; i < arrayLength; i++) {
|
|
123
|
+
this.ensureCapacity(elementSize);
|
|
124
|
+
this.view.setInt16(this.length, value[i]!);
|
|
125
|
+
this.length += elementSize;
|
|
126
|
+
}
|
|
127
|
+
} else if (value instanceof Int32Array) {
|
|
128
|
+
for (let i = 0; i < arrayLength; i++) {
|
|
129
|
+
this.ensureCapacity(elementSize);
|
|
130
|
+
this.view.setInt32(this.length, value[i]!);
|
|
131
|
+
this.length += elementSize;
|
|
132
|
+
}
|
|
133
|
+
} else if (value instanceof Float32Array) {
|
|
134
|
+
for (let i = 0; i < arrayLength; i++) {
|
|
135
|
+
this.ensureCapacity(elementSize);
|
|
136
|
+
this.view.setFloat32(this.length, value[i]!);
|
|
137
|
+
this.length += elementSize;
|
|
138
|
+
}
|
|
139
|
+
} else if (value instanceof Float64Array) {
|
|
140
|
+
for (let i = 0; i < arrayLength; i++) {
|
|
141
|
+
this.ensureCapacity(elementSize);
|
|
142
|
+
this.view.setFloat64(this.length, value[i]!);
|
|
143
|
+
this.length += elementSize;
|
|
144
|
+
}
|
|
145
|
+
} else if (value instanceof BigInt64Array) {
|
|
146
|
+
for (let i = 0; i < arrayLength; i++) {
|
|
147
|
+
this.ensureCapacity(elementSize);
|
|
148
|
+
this.view.setBigInt64(this.length, value[i]!);
|
|
149
|
+
this.length += elementSize;
|
|
150
|
+
}
|
|
151
|
+
} else {
|
|
152
|
+
unsupportedView(value);
|
|
153
|
+
}
|
|
74
154
|
}
|
|
75
155
|
/** 获取写入结果 */
|
|
76
156
|
encode(value: unknown): void {
|
|
77
|
-
this.allocUnsafe(BLOCK_SIZE);
|
|
78
157
|
this.writeValue(value);
|
|
79
158
|
this.ensureCapacity(-1);
|
|
80
159
|
}
|
package/tests/.utils.js
CHANGED
|
@@ -1,3 +1,13 @@
|
|
|
1
|
+
import { resetEnv as resetDecoderEnv } from '../dist/common/string-decoder.js';
|
|
2
|
+
import { resetEnv as resetEncoderEnv } from '../dist/common/string-encoder.js';
|
|
3
|
+
import { resetEncoder } from '../dist/encoder.js';
|
|
4
|
+
|
|
5
|
+
export function resetEnv() {
|
|
6
|
+
resetDecoderEnv();
|
|
7
|
+
resetEncoderEnv();
|
|
8
|
+
resetEncoder();
|
|
9
|
+
}
|
|
10
|
+
|
|
1
11
|
/**
|
|
2
12
|
* 输入转为数字数组以便比较
|
|
3
13
|
*
|