@cloudpss/ubjson 0.5.34 → 0.5.36
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/{benchmark-string.js → benchmark-string-decode.js} +4 -4
- package/benchmark-string-encode.js +32 -0
- package/benchmark-string-size-caculation.js +9 -11
- package/benchmark.js +1 -0
- package/dist/common/decoder.js +2 -1
- package/dist/common/decoder.js.map +1 -1
- package/dist/common/encoder.d.ts +4 -2
- package/dist/common/encoder.js +106 -45
- package/dist/common/encoder.js.map +1 -1
- package/dist/common/errors.d.ts +4 -0
- package/dist/common/errors.js +14 -0
- package/dist/common/errors.js.map +1 -0
- package/dist/common/string-decoder.d.ts +5 -3
- package/dist/common/string-decoder.js +23 -14
- package/dist/common/string-decoder.js.map +1 -1
- package/dist/common/string-encoder.d.ts +32 -2
- package/dist/common/string-encoder.js +105 -12
- package/dist/common/string-encoder.js.map +1 -1
- package/dist/stream-helper/encoder.d.ts +4 -4
- package/dist/stream-helper/encoder.js +116 -41
- package/dist/stream-helper/encoder.js.map +1 -1
- package/package.json +3 -3
- package/src/common/decoder.ts +4 -3
- package/src/common/encoder.ts +106 -48
- package/src/common/errors.ts +14 -0
- package/src/common/string-decoder.ts +63 -54
- package/src/common/string-encoder.ts +103 -14
- package/src/stream-helper/encoder.ts +118 -39
- package/tests/.utils.js +10 -0
- package/tests/e2e/.data.js +470 -0
- package/tests/e2e/no-buffer-text.js +37 -0
- package/tests/e2e/no-buffer.js +30 -0
- package/tests/e2e/no-encode-into.js +32 -0
- package/tests/e2e/no-textencoder-decoder.js +34 -0
- package/tests/e2e/normal.js +27 -0
- package/tests/e2e/stream.js +20 -0
- package/tests/encode.js +11 -19
- package/tests/huge-string.js +7 -9
- package/tests/rxjs/encode.js +4 -18
- package/tests/stream/encode.js +0 -15
- package/tests/string-encoding.js +3 -2
- package/tests/tsconfig.json +2 -1
- package/tests/e2e.js +0 -415
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
// https://github.com/ehmicky/string-byte-length/blob/main/src/char_code.js
|
|
2
2
|
|
|
3
3
|
// Last ASCII character (1 byte)
|
|
4
|
-
const LAST_ASCII_CODEPOINT =
|
|
4
|
+
const LAST_ASCII_CODEPOINT = 0x80;
|
|
5
5
|
// Last 2-bytes character
|
|
6
|
-
const LAST_TWO_BYTES_CODEPOINT =
|
|
6
|
+
const LAST_TWO_BYTES_CODEPOINT = 0x800;
|
|
7
7
|
// Others are 3 bytes characters
|
|
8
8
|
// However, U+d800 to U+dbff:
|
|
9
9
|
// - Followed by U+dc00 to U+dfff -> 4 bytes together (astral character)
|
|
@@ -13,18 +13,18 @@ const LAST_HIGH_SURROGATE = 0xdbff;
|
|
|
13
13
|
const FIRST_LOW_SURROGATE = 0xdc00;
|
|
14
14
|
const LAST_LOW_SURROGATE = 0xdfff;
|
|
15
15
|
/** 计算使用的空间 */
|
|
16
|
-
function
|
|
16
|
+
export function jsStringByteLength(string: string): number {
|
|
17
17
|
const charLength = string.length;
|
|
18
18
|
let byteLength = charLength;
|
|
19
19
|
|
|
20
20
|
for (let charIndex = 0; charIndex < charLength; charIndex += 1) {
|
|
21
21
|
const codepoint = string.charCodeAt(charIndex);
|
|
22
22
|
|
|
23
|
-
if (codepoint
|
|
23
|
+
if (codepoint < LAST_ASCII_CODEPOINT) {
|
|
24
24
|
continue;
|
|
25
25
|
}
|
|
26
26
|
|
|
27
|
-
if (codepoint
|
|
27
|
+
if (codepoint < LAST_TWO_BYTES_CODEPOINT) {
|
|
28
28
|
byteLength += 1;
|
|
29
29
|
continue;
|
|
30
30
|
}
|
|
@@ -52,15 +52,104 @@ function getCharCodeByteLength(string: string): number {
|
|
|
52
52
|
|
|
53
53
|
return byteLength;
|
|
54
54
|
}
|
|
55
|
+
/** 计算使用的空间 */
|
|
56
|
+
export function nodeStringByteLength(string: string): number {
|
|
57
|
+
return Buffer.byteLength(string, 'utf8');
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/** 计算使用的空间 */
|
|
61
|
+
export let stringByteLength: (v: string) => number;
|
|
55
62
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
63
|
+
/**
|
|
64
|
+
* Encode string into utf-8.
|
|
65
|
+
* Provided `buf` MUST have enough space.
|
|
66
|
+
*/
|
|
67
|
+
export function jsEncodeInto(v: string, buf: Uint8Array, offset: number): number {
|
|
68
|
+
let ptr = offset;
|
|
69
|
+
const strLen = v.length;
|
|
70
|
+
for (let i = 0; i < strLen; i++) {
|
|
71
|
+
const c1 = v.charCodeAt(i);
|
|
72
|
+
if (c1 < LAST_ASCII_CODEPOINT) {
|
|
73
|
+
buf[ptr++] = c1;
|
|
74
|
+
} else if (c1 < LAST_TWO_BYTES_CODEPOINT) {
|
|
75
|
+
buf[ptr++] = 0xc0 | (c1 >> 6);
|
|
76
|
+
buf[ptr++] = 0x80 | (c1 & 0x3f);
|
|
77
|
+
} else if (c1 < FIRST_HIGH_SURROGATE || c1 > LAST_LOW_SURROGATE) {
|
|
78
|
+
buf[ptr++] = 0xe0 | (c1 >> 12);
|
|
79
|
+
buf[ptr++] = 0x80 | ((c1 >> 6) & 0x3f);
|
|
80
|
+
buf[ptr++] = 0x80 | (c1 & 0x3f);
|
|
81
|
+
} else if (c1 > LAST_HIGH_SURROGATE) {
|
|
82
|
+
// low surrogate without high surrogate
|
|
83
|
+
buf[ptr++] = 0xef;
|
|
84
|
+
buf[ptr++] = 0xbf;
|
|
85
|
+
buf[ptr++] = 0xbd;
|
|
86
|
+
} else {
|
|
87
|
+
const c2 = v.charCodeAt(++i);
|
|
88
|
+
if (i >= strLen || c2 < FIRST_LOW_SURROGATE || c2 > LAST_LOW_SURROGATE) {
|
|
89
|
+
// high surrogate not followed by low surrogate
|
|
90
|
+
buf[ptr++] = 0xef;
|
|
91
|
+
buf[ptr++] = 0xbf;
|
|
92
|
+
buf[ptr++] = 0xbd;
|
|
93
|
+
i--;
|
|
94
|
+
continue;
|
|
95
|
+
}
|
|
96
|
+
const c = ((c1 & 0x3ff) << 10) + (c2 & 0x3ff) + 0x1_0000;
|
|
97
|
+
buf[ptr++] = 0xf0 | (c >> 18);
|
|
98
|
+
buf[ptr++] = 0x80 | ((c >> 12) & 0x3f);
|
|
99
|
+
buf[ptr++] = 0x80 | ((c >> 6) & 0x3f);
|
|
100
|
+
buf[ptr++] = 0x80 | (c & 0x3f);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
return ptr - offset;
|
|
104
|
+
}
|
|
60
105
|
|
|
61
106
|
/* c8 ignore next 1 */
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
107
|
+
let TEXT_ENCODER: TextEncoder | null;
|
|
108
|
+
/**
|
|
109
|
+
* Encode string into utf-8.
|
|
110
|
+
* Provided `buf` MUST have enough space.
|
|
111
|
+
*/
|
|
112
|
+
export function nativeEncodeInto(v: string, buf: Uint8Array, offset: number): number {
|
|
113
|
+
const encoded = TEXT_ENCODER!.encodeInto(v, buf.subarray(offset));
|
|
114
|
+
return encoded.written;
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* Encode string into utf-8.
|
|
118
|
+
* Provided `buf` MUST have enough space.
|
|
119
|
+
*/
|
|
120
|
+
export function myEncodeInto(v: string, buf: Uint8Array, offset: number): number {
|
|
121
|
+
if (v.length < 55) {
|
|
122
|
+
return jsEncodeInto(v, buf, offset);
|
|
123
|
+
}
|
|
124
|
+
return nativeEncodeInto(v, buf, offset);
|
|
125
|
+
}
|
|
126
|
+
/**
|
|
127
|
+
* Encode string into utf-8.
|
|
128
|
+
* Provided `buf` MUST have enough space.
|
|
129
|
+
*/
|
|
130
|
+
export let encodeInto: (v: string, buf: Uint8Array, offset: number) => number;
|
|
131
|
+
/**
|
|
132
|
+
* Encode string into utf-8.
|
|
133
|
+
*/
|
|
134
|
+
export let encode: (v: string) => Uint8Array;
|
|
135
|
+
|
|
136
|
+
/** 重设环境 */
|
|
137
|
+
export function resetEnv(): void {
|
|
138
|
+
TEXT_ENCODER = typeof TextEncoder == 'function' ? new TextEncoder() : null;
|
|
139
|
+
stringByteLength =
|
|
140
|
+
typeof Buffer == 'function' && typeof Buffer.byteLength == 'function'
|
|
141
|
+
? nodeStringByteLength
|
|
142
|
+
: jsStringByteLength;
|
|
143
|
+
encodeInto = typeof TEXT_ENCODER?.encodeInto == 'function' ? myEncodeInto : jsEncodeInto;
|
|
144
|
+
encode =
|
|
145
|
+
typeof Buffer == 'function' && Buffer.from
|
|
146
|
+
? (v) => Buffer.from(v, 'utf8')
|
|
147
|
+
: TEXT_ENCODER
|
|
148
|
+
? (v) => TEXT_ENCODER!.encode(v)
|
|
149
|
+
: (v) => {
|
|
150
|
+
const buf = new Uint8Array(stringByteLength(v));
|
|
151
|
+
jsEncodeInto(v, buf, 0);
|
|
152
|
+
return buf;
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
resetEnv();
|
|
@@ -1,33 +1,49 @@
|
|
|
1
|
+
import { constants } from '../common/constants.js';
|
|
1
2
|
import { EncoderBase } from '../common/encoder.js';
|
|
3
|
+
import { unsupportedView } from '../common/errors.js';
|
|
4
|
+
import { encode, stringByteLength } from '../common/string-encoder.js';
|
|
2
5
|
|
|
3
|
-
const BLOCK_SIZE = 1024 *
|
|
4
|
-
const MAX_SIZE = 1024 * 1024 *
|
|
6
|
+
const BLOCK_SIZE = 1024 * 64; // 64 KiB
|
|
7
|
+
const MAX_SIZE = 1024 * 1024 * 32; // 32 MiB
|
|
5
8
|
|
|
6
9
|
/** 保存一个内存池以减少重复分配 */
|
|
7
10
|
let POOL: Uint8Array | null = null;
|
|
8
11
|
|
|
12
|
+
/** 获取内存池 */
|
|
13
|
+
function alloc(size: number): Uint8Array {
|
|
14
|
+
if (POOL == null || size !== BLOCK_SIZE) {
|
|
15
|
+
return new Uint8Array(size);
|
|
16
|
+
}
|
|
17
|
+
const pool = POOL;
|
|
18
|
+
POOL = null;
|
|
19
|
+
return pool;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/** 归还内存池 */
|
|
23
|
+
function free(buf: Uint8Array): boolean {
|
|
24
|
+
if (POOL == null && buf.byteLength === BLOCK_SIZE) {
|
|
25
|
+
POOL = buf;
|
|
26
|
+
return true;
|
|
27
|
+
}
|
|
28
|
+
return false;
|
|
29
|
+
}
|
|
30
|
+
|
|
9
31
|
/** 流式编码 UBJSON */
|
|
10
32
|
export class StreamEncoderHelper extends EncoderBase {
|
|
11
33
|
constructor(protected readonly onChunk: (chunk: Uint8Array) => void) {
|
|
12
34
|
super();
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
POOL = null;
|
|
16
|
-
} else {
|
|
17
|
-
this.pool = new Uint8Array(BLOCK_SIZE);
|
|
18
|
-
}
|
|
35
|
+
this.buffer = alloc(BLOCK_SIZE);
|
|
36
|
+
this.view = new DataView(this.buffer.buffer);
|
|
19
37
|
}
|
|
20
38
|
/**
|
|
21
39
|
* 销毁实例,释放内存池
|
|
22
40
|
*/
|
|
23
41
|
destroy(): void {
|
|
24
|
-
|
|
25
|
-
const self = this as unknown as {
|
|
26
|
-
self.
|
|
42
|
+
free(this.buffer);
|
|
43
|
+
const self = this as unknown as { view: DataView | null; buffer: Uint8Array | null };
|
|
44
|
+
self.view = null;
|
|
27
45
|
self.buffer = null;
|
|
28
46
|
}
|
|
29
|
-
/** 通过内存池减少分配 */
|
|
30
|
-
private readonly pool;
|
|
31
47
|
/**
|
|
32
48
|
* 确保 buffer 还有 capacity 的空闲空间
|
|
33
49
|
*/
|
|
@@ -36,45 +52,108 @@ export class StreamEncoderHelper extends EncoderBase {
|
|
|
36
52
|
// 超过最大尺寸限制
|
|
37
53
|
throw new Error('Buffer has exceed max size');
|
|
38
54
|
}
|
|
39
|
-
if (capacity < 0) {
|
|
40
|
-
// 结束流
|
|
41
|
-
if (this.buffer === this.pool) {
|
|
42
|
-
this.onChunk(this.buffer.slice(0, this.length));
|
|
43
|
-
} else {
|
|
44
|
-
this.onChunk(this.buffer.subarray(0, this.length));
|
|
45
|
-
}
|
|
46
|
-
return;
|
|
47
|
-
}
|
|
48
55
|
// 无需扩容
|
|
49
|
-
if (this.buffer.byteLength >= this.length + capacity) return;
|
|
56
|
+
if (capacity >= 0 && this.buffer.byteLength >= this.length + capacity) return;
|
|
50
57
|
|
|
58
|
+
const CURRENT_SIZE = this.buffer.byteLength;
|
|
59
|
+
const NEXT_SIZE = capacity < BLOCK_SIZE ? BLOCK_SIZE : capacity;
|
|
60
|
+
const REUSE_BUF =
|
|
61
|
+
CURRENT_SIZE >= NEXT_SIZE && // 满足容量需求
|
|
62
|
+
CURRENT_SIZE - BLOCK_SIZE < NEXT_SIZE; // 不过于浪费
|
|
51
63
|
// 提交目前的数据
|
|
52
|
-
if (
|
|
64
|
+
if (REUSE_BUF) {
|
|
53
65
|
this.onChunk(this.buffer.slice(0, this.length));
|
|
54
66
|
} else {
|
|
55
|
-
|
|
67
|
+
if (free(this.buffer)) {
|
|
68
|
+
// 归还内存池成功,buffer 可能重用,需要拷贝数据
|
|
69
|
+
this.onChunk(this.buffer.slice(0, this.length));
|
|
70
|
+
} else {
|
|
71
|
+
this.onChunk(this.buffer.subarray(0, this.length));
|
|
72
|
+
}
|
|
73
|
+
// 重新分配缓冲区
|
|
74
|
+
this.buffer = alloc(NEXT_SIZE);
|
|
75
|
+
this.view = new DataView(this.buffer.buffer);
|
|
56
76
|
}
|
|
77
|
+
this.length = 0;
|
|
78
|
+
}
|
|
79
|
+
/** @inheritdoc */
|
|
80
|
+
protected override writeLargeStringData(value: string): void {
|
|
81
|
+
const strLen = value.length;
|
|
82
|
+
const binLen = stringByteLength(value);
|
|
83
|
+
this.ensureCapacity(5);
|
|
84
|
+
this.buffer[this.length++] = constants.INT32;
|
|
85
|
+
this.view.setInt32(this.length, binLen);
|
|
86
|
+
this.length += 4;
|
|
87
|
+
this.ensureCapacity(-1);
|
|
57
88
|
|
|
58
|
-
//
|
|
59
|
-
|
|
60
|
-
|
|
89
|
+
// divide string to 64k chunks
|
|
90
|
+
for (let i = 0; i < strLen; i += BLOCK_SIZE) {
|
|
91
|
+
let end = i + BLOCK_SIZE;
|
|
92
|
+
// avoid split surrogate pair
|
|
93
|
+
const endAtSurrogate = end < strLen && (value.charCodeAt(end) & 0xfc00) === 0xdc00;
|
|
94
|
+
if (endAtSurrogate) {
|
|
95
|
+
end--;
|
|
96
|
+
}
|
|
97
|
+
const chunk = value.slice(i, end);
|
|
98
|
+
this.onChunk(encode(chunk));
|
|
99
|
+
if (endAtSurrogate) {
|
|
100
|
+
i--;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
61
103
|
}
|
|
62
|
-
/**
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
104
|
+
/** @inheritdoc */
|
|
105
|
+
protected override writeLargeTypedArrayData(value: ArrayBufferView): void {
|
|
106
|
+
this.ensureCapacity(-1);
|
|
107
|
+
if (value instanceof Uint8Array || value instanceof Int8Array) {
|
|
108
|
+
// fast path for typed arrays with `BYTES_PER_ELEMENT` of 1
|
|
109
|
+
// divide buffer to 64k chunks
|
|
110
|
+
for (let i = 0; i < value.byteLength; i += BLOCK_SIZE) {
|
|
111
|
+
this.onChunk(
|
|
112
|
+
new Uint8Array(value.buffer.slice(value.byteOffset + i, value.byteOffset + i + BLOCK_SIZE)),
|
|
113
|
+
);
|
|
114
|
+
}
|
|
69
115
|
return;
|
|
70
116
|
}
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
117
|
+
|
|
118
|
+
const arrayLength = (value as Int16Array | Int32Array | BigInt64Array | Float32Array | Float64Array).length;
|
|
119
|
+
const elementSize = (value as Int16Array | Int32Array | BigInt64Array | Float32Array | Float64Array)
|
|
120
|
+
.BYTES_PER_ELEMENT;
|
|
121
|
+
if (value instanceof Int16Array) {
|
|
122
|
+
for (let i = 0; i < arrayLength; i++) {
|
|
123
|
+
this.ensureCapacity(elementSize);
|
|
124
|
+
this.view.setInt16(this.length, value[i]!);
|
|
125
|
+
this.length += elementSize;
|
|
126
|
+
}
|
|
127
|
+
} else if (value instanceof Int32Array) {
|
|
128
|
+
for (let i = 0; i < arrayLength; i++) {
|
|
129
|
+
this.ensureCapacity(elementSize);
|
|
130
|
+
this.view.setInt32(this.length, value[i]!);
|
|
131
|
+
this.length += elementSize;
|
|
132
|
+
}
|
|
133
|
+
} else if (value instanceof Float32Array) {
|
|
134
|
+
for (let i = 0; i < arrayLength; i++) {
|
|
135
|
+
this.ensureCapacity(elementSize);
|
|
136
|
+
this.view.setFloat32(this.length, value[i]!);
|
|
137
|
+
this.length += elementSize;
|
|
138
|
+
}
|
|
139
|
+
} else if (value instanceof Float64Array) {
|
|
140
|
+
for (let i = 0; i < arrayLength; i++) {
|
|
141
|
+
this.ensureCapacity(elementSize);
|
|
142
|
+
this.view.setFloat64(this.length, value[i]!);
|
|
143
|
+
this.length += elementSize;
|
|
144
|
+
}
|
|
145
|
+
} else if (value instanceof BigInt64Array) {
|
|
146
|
+
for (let i = 0; i < arrayLength; i++) {
|
|
147
|
+
this.ensureCapacity(elementSize);
|
|
148
|
+
this.view.setBigInt64(this.length, value[i]!);
|
|
149
|
+
this.length += elementSize;
|
|
150
|
+
}
|
|
151
|
+
} else {
|
|
152
|
+
unsupportedView(value);
|
|
153
|
+
}
|
|
74
154
|
}
|
|
75
155
|
/** 获取写入结果 */
|
|
76
156
|
encode(value: unknown): void {
|
|
77
|
-
this.allocUnsafe(BLOCK_SIZE);
|
|
78
157
|
this.writeValue(value);
|
|
79
158
|
this.ensureCapacity(-1);
|
|
80
159
|
}
|
package/tests/.utils.js
CHANGED
|
@@ -1,3 +1,13 @@
|
|
|
1
|
+
import { resetEnv as resetDecoderEnv } from '../dist/common/string-decoder.js';
|
|
2
|
+
import { resetEnv as resetEncoderEnv } from '../dist/common/string-encoder.js';
|
|
3
|
+
import { resetEncoder } from '../dist/encoder.js';
|
|
4
|
+
|
|
5
|
+
export function resetEnv() {
|
|
6
|
+
resetDecoderEnv();
|
|
7
|
+
resetEncoderEnv();
|
|
8
|
+
resetEncoder();
|
|
9
|
+
}
|
|
10
|
+
|
|
1
11
|
/**
|
|
2
12
|
* 输入转为数字数组以便比较
|
|
3
13
|
*
|