@gjsify/string_decoder 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,423 @@
1
+ import { describe, it, expect } from '@gjsify/unit';
2
+ import { StringDecoder } from 'node:string_decoder';
3
+ import { Buffer } from 'node:buffer';
4
+
5
+ // Ported from refs/node/test/parallel/test-string-decoder.js
6
+ // and test-string-decoder-end.js
7
+ // Original: MIT license, Node.js contributors
8
+
9
+ // Helper: test that decoding input with given encoding produces expected output.
10
+ // Tests all possible write sequences (byte-at-a-time, all-at-once, and various splits).
11
+ function testDecode(encoding: string, input: Buffer, expected: string): boolean {
12
+ // Write all at once
13
+ let decoder = new StringDecoder(encoding);
14
+ let result = decoder.write(input) + decoder.end();
15
+ if (result !== expected) return false;
16
+
17
+ // Write one byte at a time
18
+ decoder = new StringDecoder(encoding);
19
+ result = '';
20
+ for (let i = 0; i < input.length; i++) {
21
+ result += decoder.write(input.subarray(i, i + 1));
22
+ }
23
+ result += decoder.end();
24
+ if (result !== expected) return false;
25
+
26
+ return true;
27
+ }
28
+
29
+ // Helper: test end() behavior — write incomplete, end(), write next, end()
30
+ function testEnd(encoding: string, incomplete: Buffer, next: Buffer, expected: string): boolean {
31
+ const decoder = new StringDecoder(encoding);
32
+ let res = '';
33
+ res += decoder.write(incomplete);
34
+ res += decoder.end();
35
+ res += decoder.write(next);
36
+ res += decoder.end();
37
+ return res === expected;
38
+ }
39
+
40
+ export default async () => {
41
+ // ==================== constructor ====================
42
+
43
+ await describe('StringDecoder: constructor', async () => {
44
+ await it('should create a utf8 decoder by default', async () => {
45
+ const decoder = new StringDecoder();
46
+ expect(decoder.encoding).toBe('utf8');
47
+ });
48
+
49
+ await it('should accept encoding parameter', async () => {
50
+ const decoder = new StringDecoder('utf-8');
51
+ expect(decoder.encoding).toBe('utf8');
52
+ });
53
+
54
+ await it('should accept latin1 encoding', async () => {
55
+ const decoder = new StringDecoder('latin1');
56
+ expect(decoder.encoding).toBe('latin1');
57
+ });
58
+
59
+ await it('should accept hex encoding', async () => {
60
+ const decoder = new StringDecoder('hex');
61
+ expect(decoder.encoding).toBe('hex');
62
+ });
63
+
64
+ await it('should accept base64 encoding', async () => {
65
+ const decoder = new StringDecoder('base64');
66
+ expect(decoder.encoding).toBe('base64');
67
+ });
68
+
69
+ await it('should accept utf16le encoding', async () => {
70
+ const decoder = new StringDecoder('utf16le');
71
+ expect(decoder.encoding).toBe('utf16le');
72
+ });
73
+
74
+ await it('should accept ucs2 encoding (alias for utf16le)', async () => {
75
+ const decoder = new StringDecoder('ucs2');
76
+ expect(decoder.encoding).toBe('utf16le');
77
+ });
78
+ });
79
+
80
+ // ==================== UTF-8 basic ====================
81
+
82
+ await describe('StringDecoder: utf8 basic', async () => {
83
+ await it('should decode ASCII ($)', async () => {
84
+ expect(testDecode('utf-8', Buffer.from('$', 'utf-8'), '$')).toBeTruthy();
85
+ });
86
+
87
+ await it('should decode 2-byte char (¢)', async () => {
88
+ expect(testDecode('utf-8', Buffer.from('¢', 'utf-8'), '¢')).toBeTruthy();
89
+ });
90
+
91
+ await it('should decode 3-byte char (€)', async () => {
92
+ expect(testDecode('utf-8', Buffer.from('€', 'utf-8'), '€')).toBeTruthy();
93
+ });
94
+
95
+ await it('should decode 4-byte char (𤭢)', async () => {
96
+ expect(testDecode('utf-8', Buffer.from('𤭢', 'utf-8'), '𤭢')).toBeTruthy();
97
+ });
98
+
99
+ await it('should decode mixed ascii and non-ascii', async () => {
100
+ // U+02E4 -> CB A4, U+0064 -> 64, U+12E4 -> E1 8B A4, U+0030 -> 30, U+3045 -> E3 81 85
101
+ const buf = Buffer.from([0xCB, 0xA4, 0x64, 0xE1, 0x8B, 0xA4, 0x30, 0xE3, 0x81, 0x85]);
102
+ expect(testDecode('utf-8', buf, '\u02e4\u0064\u12e4\u0030\u3045')).toBeTruthy();
103
+ });
104
+
105
+ await it('should decode complete ASCII buffer', async () => {
106
+ const decoder = new StringDecoder('utf8');
107
+ expect(decoder.write(new Uint8Array([72, 101, 108, 108, 111]))).toBe('Hello');
108
+ });
109
+
110
+ await it('should handle multi-byte characters split across writes', async () => {
111
+ const decoder = new StringDecoder('utf8');
112
+ const result1 = decoder.write(new Uint8Array([0xE2]));
113
+ const result2 = decoder.write(new Uint8Array([0x82, 0xAC]));
114
+ expect(result1 + result2).toBe('€');
115
+ });
116
+
117
+ await it('should handle empty buffer', async () => {
118
+ const decoder = new StringDecoder('utf8');
119
+ expect(decoder.write(new Uint8Array(0))).toBe('');
120
+ });
121
+ });
122
+
123
+ // ==================== UTF-8 invalid sequences ====================
124
+
125
+ await describe('StringDecoder: utf8 invalid sequences', async () => {
126
+ await it('should handle C9B5A941 → \\u0275\\ufffdA', async () => {
127
+ expect(testDecode('utf-8', Buffer.from('C9B5A941', 'hex'), '\u0275\ufffdA')).toBeTruthy();
128
+ });
129
+
130
+ await it('should handle lone E2 → \\ufffd', async () => {
131
+ expect(testDecode('utf-8', Buffer.from('E2', 'hex'), '\ufffd')).toBeTruthy();
132
+ });
133
+
134
+ await it('should handle E241 → \\ufffdA', async () => {
135
+ expect(testDecode('utf-8', Buffer.from('E241', 'hex'), '\ufffdA')).toBeTruthy();
136
+ });
137
+
138
+ await it('should handle CCCCB8 → \\ufffd\\u0338', async () => {
139
+ expect(testDecode('utf-8', Buffer.from('CCCCB8', 'hex'), '\ufffd\u0338')).toBeTruthy();
140
+ });
141
+
142
+ await it('should handle F0B841 → \\ufffdA', async () => {
143
+ expect(testDecode('utf-8', Buffer.from('F0B841', 'hex'), '\ufffdA')).toBeTruthy();
144
+ });
145
+
146
+ await it('should handle F1CCB8 → \\ufffd\\u0338', async () => {
147
+ expect(testDecode('utf-8', Buffer.from('F1CCB8', 'hex'), '\ufffd\u0338')).toBeTruthy();
148
+ });
149
+
150
+ await it('should handle F0FB00 → \\ufffd\\ufffd\\0', async () => {
151
+ expect(testDecode('utf-8', Buffer.from('F0FB00', 'hex'), '\ufffd\ufffd\0')).toBeTruthy();
152
+ });
153
+
154
+ await it('should handle CCE2B8B8 → \\ufffd\\u2e38', async () => {
155
+ expect(testDecode('utf-8', Buffer.from('CCE2B8B8', 'hex'), '\ufffd\u2e38')).toBeTruthy();
156
+ });
157
+
158
+ await it('should handle E2B8CCB8 → \\ufffd\\u0338', async () => {
159
+ expect(testDecode('utf-8', Buffer.from('E2B8CCB8', 'hex'), '\ufffd\u0338')).toBeTruthy();
160
+ });
161
+
162
+ await it('should handle E2FBCC01 → \\ufffd\\ufffd\\ufffd\\u0001', async () => {
163
+ expect(testDecode('utf-8', Buffer.from('E2FBCC01', 'hex'), '\ufffd\ufffd\ufffd\u0001')).toBeTruthy();
164
+ });
165
+
166
+ await it('should handle CCB8CDB9 → \\u0338\\u0379', async () => {
167
+ expect(testDecode('utf-8', Buffer.from('CCB8CDB9', 'hex'), '\u0338\u0379')).toBeTruthy();
168
+ });
169
+
170
+ await it('should handle CESU-8 of U+1D40D as 6 replacement chars', async () => {
171
+ expect(testDecode('utf-8', Buffer.from('EDA0B5EDB08D', 'hex'),
172
+ '\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd')).toBeTruthy();
173
+ });
174
+ });
175
+
176
+ // ==================== UTF-8 streaming edge cases ====================
177
+
178
+ await describe('StringDecoder: utf8 streaming', async () => {
179
+ await it('should buffer incomplete E1 and output replacement on end()', async () => {
180
+ const decoder = new StringDecoder('utf8');
181
+ expect(decoder.write(Buffer.from('E1', 'hex'))).toBe('');
182
+ expect(decoder.end()).toBe('\ufffd');
183
+ });
184
+
185
+ await it('should handle E18B incomplete → replacement on end()', async () => {
186
+ const decoder = new StringDecoder('utf8');
187
+ expect(decoder.write(Buffer.from('E18B', 'hex'))).toBe('');
188
+ expect(decoder.end()).toBe('\ufffd');
189
+ });
190
+
191
+ await it('should pass through replacement char directly', async () => {
192
+ const decoder = new StringDecoder('utf8');
193
+ expect(decoder.write(Buffer.from('\ufffd'))).toBe('\ufffd');
194
+ expect(decoder.end()).toBe('');
195
+ });
196
+
197
+ await it('should pass through multiple replacement chars', async () => {
198
+ const decoder = new StringDecoder('utf8');
199
+ expect(decoder.write(Buffer.from('\ufffd\ufffd\ufffd'))).toBe('\ufffd\ufffd\ufffd');
200
+ expect(decoder.end()).toBe('');
201
+ });
202
+
203
+ await it('should handle EFBFBDE2 → replacement + incomplete', async () => {
204
+ const decoder = new StringDecoder('utf8');
205
+ expect(decoder.write(Buffer.from('EFBFBDE2', 'hex'))).toBe('\ufffd');
206
+ expect(decoder.end()).toBe('\ufffd');
207
+ });
208
+
209
+ await it('should handle F1 then 41F2 sequence', async () => {
210
+ const decoder = new StringDecoder('utf8');
211
+ expect(decoder.write(Buffer.from('F1', 'hex'))).toBe('');
212
+ expect(decoder.write(Buffer.from('41F2', 'hex'))).toBe('\ufffdA');
213
+ expect(decoder.end()).toBe('\ufffd');
214
+ });
215
+ });
216
+
217
+ // ==================== UTF-16LE ====================
218
+
219
+ await describe('StringDecoder: utf16le', async () => {
220
+ await it('should decode UCS-2 text', async () => {
221
+ expect(testDecode('ucs2', Buffer.from('ababc', 'ucs2'), 'ababc')).toBeTruthy();
222
+ });
223
+
224
+ await it('should decode UTF-16LE surrogate pair (thumbs up)', async () => {
225
+ expect(testDecode('utf16le', Buffer.from('3DD84DDC', 'hex'), '\ud83d\udc4d')).toBeTruthy();
226
+ });
227
+
228
+ await it('should handle surrogate pair split across 3 writes', async () => {
229
+ const decoder = new StringDecoder('utf16le');
230
+ expect(decoder.write(Buffer.from('3DD8', 'hex'))).toBe('');
231
+ expect(decoder.write(Buffer.from('4D', 'hex'))).toBe('');
232
+ expect(decoder.write(Buffer.from('DC', 'hex'))).toBe('\ud83d\udc4d');
233
+ expect(decoder.end()).toBe('');
234
+ });
235
+
236
+ await it('should output high surrogate on end() when incomplete', async () => {
237
+ const decoder = new StringDecoder('utf16le');
238
+ expect(decoder.write(Buffer.from('3DD8', 'hex'))).toBe('');
239
+ expect(decoder.end()).toBe('\ud83d');
240
+ });
241
+
242
+ await it('should output high surrogate on end() with 3 bytes', async () => {
243
+ const decoder = new StringDecoder('utf16le');
244
+ expect(decoder.write(Buffer.from('3DD8', 'hex'))).toBe('');
245
+ expect(decoder.write(Buffer.from('4D', 'hex'))).toBe('');
246
+ expect(decoder.end()).toBe('\ud83d');
247
+ });
248
+
249
+ await it('should handle 3-byte buffer as char + leftover', async () => {
250
+ const decoder = new StringDecoder('utf16le');
251
+ expect(decoder.write(Buffer.from('3DD84D', 'hex'))).toBe('\ud83d');
252
+ expect(decoder.end()).toBe('');
253
+ });
254
+ });
255
+
256
+ // ==================== latin1 ====================
257
+
258
+ await describe('StringDecoder: latin1', async () => {
259
+ await it('should decode latin1 buffer', async () => {
260
+ const decoder = new StringDecoder('latin1');
261
+ expect(decoder.write(new Uint8Array([72, 101, 108, 108, 111]))).toBe('Hello');
262
+ });
263
+
264
+ await it('should handle high bytes (é)', async () => {
265
+ const decoder = new StringDecoder('latin1');
266
+ expect(decoder.write(new Uint8Array([0xE9]))).toBe('\u00e9');
267
+ });
268
+ });
269
+
270
+ // ==================== hex ====================
271
+
272
+ await describe('StringDecoder: hex', async () => {
273
+ await it('should decode buffer as hex', async () => {
274
+ const decoder = new StringDecoder('hex');
275
+ expect(decoder.write(new Uint8Array([0xff, 0x00, 0x0a]))).toBe('ff000a');
276
+ });
277
+ });
278
+
279
+ // ==================== ascii ====================
280
+
281
+ await describe('StringDecoder: ascii', async () => {
282
+ await it('should decode ASCII buffer', async () => {
283
+ const decoder = new StringDecoder('ascii');
284
+ expect(decoder.write(new Uint8Array([72, 101, 108, 108, 111]))).toBe('Hello');
285
+ });
286
+ });
287
+
288
+ // ==================== base64 ====================
289
+
290
+ await describe('StringDecoder: base64', async () => {
291
+ await it('should encode single byte on end()', async () => {
292
+ const decoder = new StringDecoder('base64');
293
+ expect(decoder.write(Buffer.of(0x61))).toBe('');
294
+ expect(decoder.end()).toBe('YQ==');
295
+ });
296
+
297
+ await it('should encode two bytes on end()', async () => {
298
+ const decoder = new StringDecoder('base64');
299
+ expect(decoder.write(Buffer.of(0x61, 0x61))).toBe('');
300
+ expect(decoder.end()).toBe('YWE=');
301
+ });
302
+
303
+ await it('should encode three bytes immediately', async () => {
304
+ const decoder = new StringDecoder('base64');
305
+ expect(decoder.write(Buffer.of(0x61, 0x61, 0x61))).toBe('YWFh');
306
+ expect(decoder.end()).toBe('');
307
+ });
308
+
309
+ await it('should handle 3+1 byte split', async () => {
310
+ const decoder = new StringDecoder('base64');
311
+ expect(decoder.write(Buffer.of(0x61, 0x61, 0x61))).toBe('YWFh');
312
+ expect(decoder.write(Buffer.of(0x61))).toBe('');
313
+ expect(decoder.end()).toBe('YQ==');
314
+ });
315
+ });
316
+
317
+ // ==================== end() behavior ====================
318
+
319
+ await describe('StringDecoder: end()', async () => {
320
+ await it('should accept buffer in end()', async () => {
321
+ const decoder = new StringDecoder('utf8');
322
+ expect(decoder.end(new Uint8Array([72, 105]))).toBe('Hi');
323
+ });
324
+
325
+ await it('should return empty string with no pending data', async () => {
326
+ const decoder = new StringDecoder('utf8');
327
+ expect(decoder.end()).toBe('');
328
+ });
329
+
330
+ // UTF-8 end tests
331
+ await it('utf8: E2 then 61 → \\ufffd a', async () => {
332
+ expect(testEnd('utf8', Buffer.of(0xE2), Buffer.of(0x61), '\uFFFDa')).toBeTruthy();
333
+ });
334
+
335
+ await it('utf8: E2 then 82 → \\ufffd\\ufffd', async () => {
336
+ expect(testEnd('utf8', Buffer.of(0xE2), Buffer.of(0x82), '\uFFFD\uFFFD')).toBeTruthy();
337
+ });
338
+
339
+ await it('utf8: E2 then E2 → \\ufffd\\ufffd', async () => {
340
+ expect(testEnd('utf8', Buffer.of(0xE2), Buffer.of(0xE2), '\uFFFD\uFFFD')).toBeTruthy();
341
+ });
342
+
343
+ await it('utf8: E2,82 then 61 → \\ufffd a', async () => {
344
+ expect(testEnd('utf8', Buffer.of(0xE2, 0x82), Buffer.of(0x61), '\uFFFDa')).toBeTruthy();
345
+ });
346
+
347
+ await it('utf8: E2,82,AC then 61 → €a', async () => {
348
+ expect(testEnd('utf8', Buffer.of(0xE2, 0x82, 0xAC), Buffer.of(0x61), '€a')).toBeTruthy();
349
+ });
350
+
351
+ // UTF-16LE end tests
352
+ await it('utf16le: 3D then 61,00 → a', async () => {
353
+ expect(testEnd('utf16le', Buffer.of(0x3D), Buffer.of(0x61, 0x00), 'a')).toBeTruthy();
354
+ });
355
+
356
+ await it('utf16le: 3D,D8 then empty → \\uD83D', async () => {
357
+ expect(testEnd('utf16le', Buffer.of(0x3D, 0xD8), Buffer.of(), '\uD83D')).toBeTruthy();
358
+ });
359
+
360
+ await it('utf16le: 3D,D8 then 61,00 → \\uD83D a', async () => {
361
+ expect(testEnd('utf16le', Buffer.of(0x3D, 0xD8), Buffer.of(0x61, 0x00), '\uD83Da')).toBeTruthy();
362
+ });
363
+
364
+ await it('utf16le: 3D,D8 then 4D,DC → \\uD83D\\uDC4D', async () => {
365
+ expect(testEnd('utf16le', Buffer.of(0x3D, 0xD8), Buffer.of(0x4D, 0xDC), '\uD83D\uDC4D')).toBeTruthy();
366
+ });
367
+
368
+ await it('utf16le: 3D,D8,4D,DC then 61,00 → 👍a', async () => {
369
+ expect(testEnd('utf16le', Buffer.of(0x3D, 0xD8, 0x4D, 0xDC), Buffer.of(0x61, 0x00), '👍a')).toBeTruthy();
370
+ });
371
+
372
+ // Base64 end tests
373
+ await it('base64: 61 then empty → YQ==', async () => {
374
+ expect(testEnd('base64', Buffer.of(0x61), Buffer.of(), 'YQ==')).toBeTruthy();
375
+ });
376
+
377
+ await it('base64: 61 then 61 → YQ==YQ==', async () => {
378
+ expect(testEnd('base64', Buffer.of(0x61), Buffer.of(0x61), 'YQ==YQ==')).toBeTruthy();
379
+ });
380
+
381
+ await it('base64: 61,61 then empty → YWE=', async () => {
382
+ expect(testEnd('base64', Buffer.of(0x61, 0x61), Buffer.of(), 'YWE=')).toBeTruthy();
383
+ });
384
+
385
+ await it('base64: 61,61,61 then empty → YWFh', async () => {
386
+ expect(testEnd('base64', Buffer.of(0x61, 0x61, 0x61), Buffer.of(), 'YWFh')).toBeTruthy();
387
+ });
388
+
389
+ await it('base64: 61,61,61 then 61 → YWFhYQ==', async () => {
390
+ expect(testEnd('base64', Buffer.of(0x61, 0x61, 0x61), Buffer.of(0x61), 'YWFhYQ==')).toBeTruthy();
391
+ });
392
+ });
393
+
394
+ // ==================== byte-at-a-time fuzz ====================
395
+
396
+ await describe('StringDecoder: byte-at-a-time consistency', async () => {
397
+ const encodings = ['hex', 'utf8', 'utf16le', 'latin1', 'ascii'];
398
+ const testStrings = ['Hello', 'asdf'];
399
+
400
+ for (const encoding of encodings) {
401
+ for (const str of testStrings) {
402
+ await it(`${encoding}: byte-at-a-time matches toString for "${str}"`, async () => {
403
+ const buf = Buffer.from(str);
404
+ const expected = buf.toString(encoding as BufferEncoding);
405
+
406
+ // Write one byte at a time
407
+ const decoder = new StringDecoder(encoding);
408
+ let result = '';
409
+ for (let i = 0; i < buf.length; i++) {
410
+ result += decoder.write(buf.subarray(i, i + 1));
411
+ }
412
+ result += decoder.end();
413
+ expect(result).toBe(expected);
414
+
415
+ // Write all at once
416
+ const decoder2 = new StringDecoder(encoding);
417
+ const result2 = decoder2.write(buf) + decoder2.end();
418
+ expect(result2).toBe(expected);
419
+ });
420
+ }
421
+ }
422
+ });
423
+ };