@cloudpss/ubjson 0.5.34 → 0.5.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/{benchmark-string.js → benchmark-string-decode.js} +4 -4
  2. package/benchmark-string-encode.js +32 -0
  3. package/benchmark-string-size-caculation.js +9 -11
  4. package/benchmark.js +1 -0
  5. package/dist/common/decoder.js +2 -1
  6. package/dist/common/decoder.js.map +1 -1
  7. package/dist/common/encoder.d.ts +4 -2
  8. package/dist/common/encoder.js +106 -45
  9. package/dist/common/encoder.js.map +1 -1
  10. package/dist/common/errors.d.ts +4 -0
  11. package/dist/common/errors.js +14 -0
  12. package/dist/common/errors.js.map +1 -0
  13. package/dist/common/string-decoder.d.ts +5 -3
  14. package/dist/common/string-decoder.js +23 -14
  15. package/dist/common/string-decoder.js.map +1 -1
  16. package/dist/common/string-encoder.d.ts +32 -2
  17. package/dist/common/string-encoder.js +105 -12
  18. package/dist/common/string-encoder.js.map +1 -1
  19. package/dist/stream-helper/encoder.d.ts +4 -4
  20. package/dist/stream-helper/encoder.js +116 -41
  21. package/dist/stream-helper/encoder.js.map +1 -1
  22. package/package.json +3 -3
  23. package/src/common/decoder.ts +4 -3
  24. package/src/common/encoder.ts +106 -48
  25. package/src/common/errors.ts +14 -0
  26. package/src/common/string-decoder.ts +63 -54
  27. package/src/common/string-encoder.ts +103 -14
  28. package/src/stream-helper/encoder.ts +118 -39
  29. package/tests/.utils.js +10 -0
  30. package/tests/e2e/.data.js +470 -0
  31. package/tests/e2e/no-buffer-text.js +37 -0
  32. package/tests/e2e/no-buffer.js +30 -0
  33. package/tests/e2e/no-encode-into.js +32 -0
  34. package/tests/e2e/no-textencoder-decoder.js +34 -0
  35. package/tests/e2e/normal.js +27 -0
  36. package/tests/e2e/stream.js +20 -0
  37. package/tests/encode.js +11 -19
  38. package/tests/huge-string.js +7 -9
  39. package/tests/rxjs/encode.js +4 -18
  40. package/tests/stream/encode.js +0 -15
  41. package/tests/string-encoding.js +3 -2
  42. package/tests/tsconfig.json +2 -1
  43. package/tests/e2e.js +0 -415
@@ -1,9 +1,9 @@
1
1
  // https://github.com/ehmicky/string-byte-length/blob/main/src/char_code.js
2
2
 
3
3
  // Last ASCII character (1 byte)
4
- const LAST_ASCII_CODEPOINT = 0x7f;
4
+ const LAST_ASCII_CODEPOINT = 0x80;
5
5
  // Last 2-bytes character
6
- const LAST_TWO_BYTES_CODEPOINT = 0x7ff;
6
+ const LAST_TWO_BYTES_CODEPOINT = 0x800;
7
7
  // Others are 3 bytes characters
8
8
  // However, U+d800 to U+dbff:
9
9
  // - Followed by U+dc00 to U+dfff -> 4 bytes together (astral character)
@@ -13,18 +13,18 @@ const LAST_HIGH_SURROGATE = 0xdbff;
13
13
  const FIRST_LOW_SURROGATE = 0xdc00;
14
14
  const LAST_LOW_SURROGATE = 0xdfff;
15
15
  /** 计算使用的空间 */
16
- function getCharCodeByteLength(string: string): number {
16
+ export function jsStringByteLength(string: string): number {
17
17
  const charLength = string.length;
18
18
  let byteLength = charLength;
19
19
 
20
20
  for (let charIndex = 0; charIndex < charLength; charIndex += 1) {
21
21
  const codepoint = string.charCodeAt(charIndex);
22
22
 
23
- if (codepoint <= LAST_ASCII_CODEPOINT) {
23
+ if (codepoint < LAST_ASCII_CODEPOINT) {
24
24
  continue;
25
25
  }
26
26
 
27
- if (codepoint <= LAST_TWO_BYTES_CODEPOINT) {
27
+ if (codepoint < LAST_TWO_BYTES_CODEPOINT) {
28
28
  byteLength += 1;
29
29
  continue;
30
30
  }
@@ -52,15 +52,104 @@ function getCharCodeByteLength(string: string): number {
52
52
 
53
53
  return byteLength;
54
54
  }
55
+ /** 计算使用的空间 */
56
+ export function nodeStringByteLength(string: string): number {
57
+ return Buffer.byteLength(string, 'utf8');
58
+ }
59
+
60
+ /** 计算使用的空间 */
61
+ export let stringByteLength: (v: string) => number;
55
62
 
56
- export const getStringByteLength = (): ((v: string) => number) =>
57
- typeof Buffer == 'function' && typeof Buffer.byteLength == 'function'
58
- ? (v) => Buffer.byteLength(v, 'utf8')
59
- : getCharCodeByteLength;
63
+ /**
64
+ * Encode string into utf-8.
65
+ * Provided `buf` MUST have enough space.
66
+ */
67
+ export function jsEncodeInto(v: string, buf: Uint8Array, offset: number): number {
68
+ let ptr = offset;
69
+ const strLen = v.length;
70
+ for (let i = 0; i < strLen; i++) {
71
+ const c1 = v.charCodeAt(i);
72
+ if (c1 < LAST_ASCII_CODEPOINT) {
73
+ buf[ptr++] = c1;
74
+ } else if (c1 < LAST_TWO_BYTES_CODEPOINT) {
75
+ buf[ptr++] = 0xc0 | (c1 >> 6);
76
+ buf[ptr++] = 0x80 | (c1 & 0x3f);
77
+ } else if (c1 < FIRST_HIGH_SURROGATE || c1 > LAST_LOW_SURROGATE) {
78
+ buf[ptr++] = 0xe0 | (c1 >> 12);
79
+ buf[ptr++] = 0x80 | ((c1 >> 6) & 0x3f);
80
+ buf[ptr++] = 0x80 | (c1 & 0x3f);
81
+ } else if (c1 > LAST_HIGH_SURROGATE) {
82
+ // low surrogate without high surrogate
83
+ buf[ptr++] = 0xef;
84
+ buf[ptr++] = 0xbf;
85
+ buf[ptr++] = 0xbd;
86
+ } else {
87
+ const c2 = v.charCodeAt(++i);
88
+ if (i >= strLen || c2 < FIRST_LOW_SURROGATE || c2 > LAST_LOW_SURROGATE) {
89
+ // high surrogate not followed by low surrogate
90
+ buf[ptr++] = 0xef;
91
+ buf[ptr++] = 0xbf;
92
+ buf[ptr++] = 0xbd;
93
+ i--;
94
+ continue;
95
+ }
96
+ const c = ((c1 & 0x3ff) << 10) + (c2 & 0x3ff) + 0x1_0000;
97
+ buf[ptr++] = 0xf0 | (c >> 18);
98
+ buf[ptr++] = 0x80 | ((c >> 12) & 0x3f);
99
+ buf[ptr++] = 0x80 | ((c >> 6) & 0x3f);
100
+ buf[ptr++] = 0x80 | (c & 0x3f);
101
+ }
102
+ }
103
+ return ptr - offset;
104
+ }
60
105
 
61
106
  /* c8 ignore next 1 */
62
- const encoder = typeof TextEncoder == 'function' ? new TextEncoder() : undefined;
63
- export const getEncodeInto = (): ((v: string, buf: Uint8Array, offset: number) => number) | undefined =>
64
- typeof encoder?.encodeInto == 'function'
65
- ? (v, buf, offset) => encoder.encodeInto(v, buf.subarray(offset)).written
66
- : undefined;
107
+ let TEXT_ENCODER: TextEncoder | null;
108
+ /**
109
+ * Encode string into utf-8.
110
+ * Provided `buf` MUST have enough space.
111
+ */
112
+ export function nativeEncodeInto(v: string, buf: Uint8Array, offset: number): number {
113
+ const encoded = TEXT_ENCODER!.encodeInto(v, buf.subarray(offset));
114
+ return encoded.written;
115
+ }
116
+ /**
117
+ * Encode string into utf-8.
118
+ * Provided `buf` MUST have enough space.
119
+ */
120
+ export function myEncodeInto(v: string, buf: Uint8Array, offset: number): number {
121
+ if (v.length < 55) {
122
+ return jsEncodeInto(v, buf, offset);
123
+ }
124
+ return nativeEncodeInto(v, buf, offset);
125
+ }
126
+ /**
127
+ * Encode string into utf-8.
128
+ * Provided `buf` MUST have enough space.
129
+ */
130
+ export let encodeInto: (v: string, buf: Uint8Array, offset: number) => number;
131
+ /**
132
+ * Encode string into utf-8.
133
+ */
134
+ export let encode: (v: string) => Uint8Array;
135
+
136
+ /** 重设环境 */
137
+ export function resetEnv(): void {
138
+ TEXT_ENCODER = typeof TextEncoder == 'function' ? new TextEncoder() : null;
139
+ stringByteLength =
140
+ typeof Buffer == 'function' && typeof Buffer.byteLength == 'function'
141
+ ? nodeStringByteLength
142
+ : jsStringByteLength;
143
+ encodeInto = typeof TEXT_ENCODER?.encodeInto == 'function' ? myEncodeInto : jsEncodeInto;
144
+ encode =
145
+ typeof Buffer == 'function' && Buffer.from
146
+ ? (v) => Buffer.from(v, 'utf8')
147
+ : TEXT_ENCODER
148
+ ? (v) => TEXT_ENCODER!.encode(v)
149
+ : (v) => {
150
+ const buf = new Uint8Array(stringByteLength(v));
151
+ jsEncodeInto(v, buf, 0);
152
+ return buf;
153
+ };
154
+ }
155
+ resetEnv();
@@ -1,33 +1,49 @@
1
+ import { constants } from '../common/constants.js';
1
2
  import { EncoderBase } from '../common/encoder.js';
3
+ import { unsupportedView } from '../common/errors.js';
4
+ import { encode, stringByteLength } from '../common/string-encoder.js';
2
5
 
3
- const BLOCK_SIZE = 1024 * 8; // 8 KiB
4
- const MAX_SIZE = 1024 * 1024 * 256; // 256 MiB
6
+ const BLOCK_SIZE = 1024 * 64; // 64 KiB
7
+ const MAX_SIZE = 1024 * 1024 * 32; // 32 MiB
5
8
 
6
9
  /** 保存一个内存池以减少重复分配 */
7
10
  let POOL: Uint8Array | null = null;
8
11
 
12
+ /** 获取内存池 */
13
+ function alloc(size: number): Uint8Array {
14
+ if (POOL == null || size !== BLOCK_SIZE) {
15
+ return new Uint8Array(size);
16
+ }
17
+ const pool = POOL;
18
+ POOL = null;
19
+ return pool;
20
+ }
21
+
22
+ /** 归还内存池 */
23
+ function free(buf: Uint8Array): boolean {
24
+ if (POOL == null && buf.byteLength === BLOCK_SIZE) {
25
+ POOL = buf;
26
+ return true;
27
+ }
28
+ return false;
29
+ }
30
+
9
31
  /** 流式编码 UBJSON */
10
32
  export class StreamEncoderHelper extends EncoderBase {
11
33
  constructor(protected readonly onChunk: (chunk: Uint8Array) => void) {
12
34
  super();
13
- if (POOL != null) {
14
- this.pool = POOL;
15
- POOL = null;
16
- } else {
17
- this.pool = new Uint8Array(BLOCK_SIZE);
18
- }
35
+ this.buffer = alloc(BLOCK_SIZE);
36
+ this.view = new DataView(this.buffer.buffer);
19
37
  }
20
38
  /**
21
39
  * 销毁实例,释放内存池
22
40
  */
23
41
  destroy(): void {
24
- POOL ??= this.pool;
25
- const self = this as unknown as { pool: Uint8Array | null; buffer: Uint8Array | null };
26
- self.pool = null;
42
+ free(this.buffer);
43
+ const self = this as unknown as { view: DataView | null; buffer: Uint8Array | null };
44
+ self.view = null;
27
45
  self.buffer = null;
28
46
  }
29
- /** 通过内存池减少分配 */
30
- private readonly pool;
31
47
  /**
32
48
  * 确保 buffer 还有 capacity 的空闲空间
33
49
  */
@@ -36,45 +52,108 @@ export class StreamEncoderHelper extends EncoderBase {
36
52
  // 超过最大尺寸限制
37
53
  throw new Error('Buffer has exceed max size');
38
54
  }
39
- if (capacity < 0) {
40
- // 结束流
41
- if (this.buffer === this.pool) {
42
- this.onChunk(this.buffer.slice(0, this.length));
43
- } else {
44
- this.onChunk(this.buffer.subarray(0, this.length));
45
- }
46
- return;
47
- }
48
55
  // 无需扩容
49
- if (this.buffer.byteLength >= this.length + capacity) return;
56
+ if (capacity >= 0 && this.buffer.byteLength >= this.length + capacity) return;
50
57
 
58
+ const CURRENT_SIZE = this.buffer.byteLength;
59
+ const NEXT_SIZE = capacity < BLOCK_SIZE ? BLOCK_SIZE : capacity;
60
+ const REUSE_BUF =
61
+ CURRENT_SIZE >= NEXT_SIZE && // 满足容量需求
62
+ CURRENT_SIZE - BLOCK_SIZE < NEXT_SIZE; // 不过于浪费
51
63
  // 提交目前的数据
52
- if (this.buffer === this.pool) {
64
+ if (REUSE_BUF) {
53
65
  this.onChunk(this.buffer.slice(0, this.length));
54
66
  } else {
55
- this.onChunk(this.buffer.subarray(0, this.length));
67
+ if (free(this.buffer)) {
68
+ // 归还内存池成功,buffer 可能重用,需要拷贝数据
69
+ this.onChunk(this.buffer.slice(0, this.length));
70
+ } else {
71
+ this.onChunk(this.buffer.subarray(0, this.length));
72
+ }
73
+ // 重新分配缓冲区
74
+ this.buffer = alloc(NEXT_SIZE);
75
+ this.view = new DataView(this.buffer.buffer);
56
76
  }
77
+ this.length = 0;
78
+ }
79
+ /** @inheritdoc */
80
+ protected override writeLargeStringData(value: string): void {
81
+ const strLen = value.length;
82
+ const binLen = stringByteLength(value);
83
+ this.ensureCapacity(5);
84
+ this.buffer[this.length++] = constants.INT32;
85
+ this.view.setInt32(this.length, binLen);
86
+ this.length += 4;
87
+ this.ensureCapacity(-1);
57
88
 
58
- // 重新分配缓冲区
59
- if (capacity < BLOCK_SIZE) capacity = BLOCK_SIZE;
60
- this.allocUnsafe(capacity);
89
+ // divide string to 64k chunks
90
+ for (let i = 0; i < strLen; i += BLOCK_SIZE) {
91
+ let end = i + BLOCK_SIZE;
92
+ // avoid split surrogate pair
93
+ const endAtSurrogate = end < strLen && (value.charCodeAt(end) & 0xfc00) === 0xdc00;
94
+ if (endAtSurrogate) {
95
+ end--;
96
+ }
97
+ const chunk = value.slice(i, end);
98
+ this.onChunk(encode(chunk));
99
+ if (endAtSurrogate) {
100
+ i--;
101
+ }
102
+ }
61
103
  }
62
- /** 分配 buffer */
63
- private allocUnsafe(size: number): void {
64
- if (size === this.pool.byteLength) {
65
- // pool 中获取
66
- this.buffer = this.pool;
67
- this.view = new DataView(this.buffer.buffer);
68
- this.length = 0;
104
+ /** @inheritdoc */
105
+ protected override writeLargeTypedArrayData(value: ArrayBufferView): void {
106
+ this.ensureCapacity(-1);
107
+ if (value instanceof Uint8Array || value instanceof Int8Array) {
108
+ // fast path for typed arrays with `BYTES_PER_ELEMENT` of 1
109
+ // divide buffer to 64k chunks
110
+ for (let i = 0; i < value.byteLength; i += BLOCK_SIZE) {
111
+ this.onChunk(
112
+ new Uint8Array(value.buffer.slice(value.byteOffset + i, value.byteOffset + i + BLOCK_SIZE)),
113
+ );
114
+ }
69
115
  return;
70
116
  }
71
- this.buffer = new Uint8Array(size);
72
- this.view = new DataView(this.buffer.buffer);
73
- this.length = 0;
117
+
118
+ const arrayLength = (value as Int16Array | Int32Array | BigInt64Array | Float32Array | Float64Array).length;
119
+ const elementSize = (value as Int16Array | Int32Array | BigInt64Array | Float32Array | Float64Array)
120
+ .BYTES_PER_ELEMENT;
121
+ if (value instanceof Int16Array) {
122
+ for (let i = 0; i < arrayLength; i++) {
123
+ this.ensureCapacity(elementSize);
124
+ this.view.setInt16(this.length, value[i]!);
125
+ this.length += elementSize;
126
+ }
127
+ } else if (value instanceof Int32Array) {
128
+ for (let i = 0; i < arrayLength; i++) {
129
+ this.ensureCapacity(elementSize);
130
+ this.view.setInt32(this.length, value[i]!);
131
+ this.length += elementSize;
132
+ }
133
+ } else if (value instanceof Float32Array) {
134
+ for (let i = 0; i < arrayLength; i++) {
135
+ this.ensureCapacity(elementSize);
136
+ this.view.setFloat32(this.length, value[i]!);
137
+ this.length += elementSize;
138
+ }
139
+ } else if (value instanceof Float64Array) {
140
+ for (let i = 0; i < arrayLength; i++) {
141
+ this.ensureCapacity(elementSize);
142
+ this.view.setFloat64(this.length, value[i]!);
143
+ this.length += elementSize;
144
+ }
145
+ } else if (value instanceof BigInt64Array) {
146
+ for (let i = 0; i < arrayLength; i++) {
147
+ this.ensureCapacity(elementSize);
148
+ this.view.setBigInt64(this.length, value[i]!);
149
+ this.length += elementSize;
150
+ }
151
+ } else {
152
+ unsupportedView(value);
153
+ }
74
154
  }
75
155
  /** 获取写入结果 */
76
156
  encode(value: unknown): void {
77
- this.allocUnsafe(BLOCK_SIZE);
78
157
  this.writeValue(value);
79
158
  this.ensureCapacity(-1);
80
159
  }
package/tests/.utils.js CHANGED
@@ -1,3 +1,13 @@
1
+ import { resetEnv as resetDecoderEnv } from '../dist/common/string-decoder.js';
2
+ import { resetEnv as resetEncoderEnv } from '../dist/common/string-encoder.js';
3
+ import { resetEncoder } from '../dist/encoder.js';
4
+
5
+ export function resetEnv() {
6
+ resetDecoderEnv();
7
+ resetEncoderEnv();
8
+ resetEncoder();
9
+ }
10
+
1
11
  /**
2
12
  * 输入转为数字数组以便比较
3
13
  *