react-native-nitro-buffer 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -124,6 +124,21 @@ This library achieves **100% API compatibility** with Node.js `Buffer`.
124
124
  const cBuf = CraftzBuffer.from(nBuf); // Works!
125
125
  ```
126
126
 
127
+ ## ⚠️ Compatibility Notes
128
+
129
+ ### `toString('ascii')` Behavior
130
+
131
+ When decoding binary data with non-ASCII bytes (0x80-0xFF), `react-native-nitro-buffer` follows the **Node.js standard** by replacing invalid bytes with the Unicode replacement character (`U+FFFD`, displayed as `�`).
132
+
133
+ ```javascript
134
+ const buf = Buffer.from([0x48, 0x69, 0x80, 0xFF, 0x21]); // "Hi" + invalid bytes + "!"
135
+ buf.toString('ascii');
136
+ // Nitro (Node.js compatible): "Hi��!" (length: 5)
137
+ // @craftzdog/react-native-buffer: "Hi!" (length: 5) - incorrectly drops invalid bytes
138
+ ```
139
+
140
+ This ensures consistent behavior with Node.js when handling binary protocols like WebSocket messages containing mixed text and binary data (e.g., Microsoft TTS audio streams).
141
+
127
142
  ## 📄 License
128
143
 
129
144
  ISC
@@ -217,6 +217,177 @@ double HybridNitroBuffer::write(const std::shared_ptr<ArrayBuffer> &buffer,
217
217
  return actualWrite;
218
218
  }
219
219
 
220
+ // UTF-8 replacement character (U+FFFD) encoded as UTF-8
221
+ static const char UTF8_REPLACEMENT[] = "\xEF\xBF\xBD";
222
+
223
+ // Decode UTF-8 with WHATWG-compliant error handling (replace invalid bytes with
224
+ // U+FFFD) This matches Node.js Buffer.toString('utf8') behavior
225
+ static std::string decodeUtf8WithReplacement(const uint8_t *data, size_t len) {
226
+ std::string result;
227
+ result.reserve(len); // Minimum reservation
228
+
229
+ size_t i = 0;
230
+ while (i < len) {
231
+ uint8_t byte1 = data[i];
232
+
233
+ // ASCII (0x00-0x7F)
234
+ if (byte1 <= 0x7F) {
235
+ result.push_back(static_cast<char>(byte1));
236
+ i++;
237
+ continue;
238
+ }
239
+
240
+ // Invalid leading byte (0x80-0xBF or 0xF8-0xFF)
241
+ if (byte1 < 0xC2 || byte1 > 0xF4) {
242
+ result.append(UTF8_REPLACEMENT);
243
+ i++;
244
+ continue;
245
+ }
246
+
247
+ // 2-byte sequence (0xC2-0xDF)
248
+ if (byte1 >= 0xC2 && byte1 <= 0xDF) {
249
+ if (i + 1 >= len) {
250
+ result.append(UTF8_REPLACEMENT);
251
+ i++;
252
+ continue;
253
+ }
254
+ uint8_t byte2 = data[i + 1];
255
+ if ((byte2 & 0xC0) != 0x80) {
256
+ result.append(UTF8_REPLACEMENT);
257
+ i++;
258
+ continue;
259
+ }
260
+ // Valid 2-byte sequence
261
+ result.push_back(static_cast<char>(byte1));
262
+ result.push_back(static_cast<char>(byte2));
263
+ i += 2;
264
+ continue;
265
+ }
266
+
267
+ // 3-byte sequence (0xE0-0xEF)
268
+ if (byte1 >= 0xE0 && byte1 <= 0xEF) {
269
+ if (i + 2 >= len) {
270
+ result.append(UTF8_REPLACEMENT);
271
+ i++;
272
+ continue;
273
+ }
274
+ uint8_t byte2 = data[i + 1];
275
+ uint8_t byte3 = data[i + 2];
276
+
277
+ // Check continuation bytes
278
+ if ((byte2 & 0xC0) != 0x80 || (byte3 & 0xC0) != 0x80) {
279
+ result.append(UTF8_REPLACEMENT);
280
+ i++;
281
+ continue;
282
+ }
283
+
284
+ // Check for overlong encoding and surrogate halves
285
+ if (byte1 == 0xE0 && byte2 < 0xA0) {
286
+ result.append(UTF8_REPLACEMENT);
287
+ i++;
288
+ continue;
289
+ }
290
+ if (byte1 == 0xED && byte2 >= 0xA0) {
291
+ // Surrogate halves (0xD800-0xDFFF) are invalid in UTF-8
292
+ result.append(UTF8_REPLACEMENT);
293
+ i++;
294
+ continue;
295
+ }
296
+
297
+ // Valid 3-byte sequence
298
+ result.push_back(static_cast<char>(byte1));
299
+ result.push_back(static_cast<char>(byte2));
300
+ result.push_back(static_cast<char>(byte3));
301
+ i += 3;
302
+ continue;
303
+ }
304
+
305
+ // 4-byte sequence (0xF0-0xF4)
306
+ if (byte1 >= 0xF0 && byte1 <= 0xF4) {
307
+ if (i + 3 >= len) {
308
+ result.append(UTF8_REPLACEMENT);
309
+ i++;
310
+ continue;
311
+ }
312
+ uint8_t byte2 = data[i + 1];
313
+ uint8_t byte3 = data[i + 2];
314
+ uint8_t byte4 = data[i + 3];
315
+
316
+ // Check continuation bytes
317
+ if ((byte2 & 0xC0) != 0x80 || (byte3 & 0xC0) != 0x80 ||
318
+ (byte4 & 0xC0) != 0x80) {
319
+ result.append(UTF8_REPLACEMENT);
320
+ i++;
321
+ continue;
322
+ }
323
+
324
+ // Check for overlong encoding and out-of-range code points
325
+ if (byte1 == 0xF0 && byte2 < 0x90) {
326
+ result.append(UTF8_REPLACEMENT);
327
+ i++;
328
+ continue;
329
+ }
330
+ if (byte1 == 0xF4 && byte2 > 0x8F) {
331
+ // Code points above U+10FFFF
332
+ result.append(UTF8_REPLACEMENT);
333
+ i++;
334
+ continue;
335
+ }
336
+
337
+ // Valid 4-byte sequence
338
+ result.push_back(static_cast<char>(byte1));
339
+ result.push_back(static_cast<char>(byte2));
340
+ result.push_back(static_cast<char>(byte3));
341
+ result.push_back(static_cast<char>(byte4));
342
+ i += 4;
343
+ continue;
344
+ }
345
+
346
+ // Fallback (should not reach here)
347
+ result.append(UTF8_REPLACEMENT);
348
+ i++;
349
+ }
350
+
351
+ return result;
352
+ }
353
+
354
+ // Decode as latin1/binary - each byte maps directly to Unicode code point
355
+ // 0x00-0xFF
356
+ static std::string decodeLatin1(const uint8_t *data, size_t len) {
357
+ std::string result;
358
+ result.reserve(len * 2); // Worst case: all bytes > 0x7F need 2 bytes in UTF-8
359
+
360
+ for (size_t i = 0; i < len; i++) {
361
+ uint8_t byte = data[i];
362
+ if (byte <= 0x7F) {
363
+ result.push_back(static_cast<char>(byte));
364
+ } else {
365
+ // Encode as 2-byte UTF-8 sequence
366
+ result.push_back(static_cast<char>(0xC0 | (byte >> 6)));
367
+ result.push_back(static_cast<char>(0x80 | (byte & 0x3F)));
368
+ }
369
+ }
370
+
371
+ return result;
372
+ }
373
+
374
+ // Decode as ASCII - bytes > 0x7F are replaced with U+FFFD
375
+ static std::string decodeAscii(const uint8_t *data, size_t len) {
376
+ std::string result;
377
+ result.reserve(len);
378
+
379
+ for (size_t i = 0; i < len; i++) {
380
+ uint8_t byte = data[i];
381
+ if (byte <= 0x7F) {
382
+ result.push_back(static_cast<char>(byte));
383
+ } else {
384
+ result.append(UTF8_REPLACEMENT);
385
+ }
386
+ }
387
+
388
+ return result;
389
+ }
390
+
220
391
  std::string
221
392
  HybridNitroBuffer::decode(const std::shared_ptr<ArrayBuffer> &buffer,
222
393
  double offset, double length,
@@ -232,8 +403,15 @@ HybridNitroBuffer::decode(const std::shared_ptr<ArrayBuffer> &buffer,
232
403
  size_t actualRead = std::min(available, count);
233
404
 
234
405
  if (encoding == "utf8" || encoding == "utf-8") {
235
- // Check for null termination? No, ArrayBuffer may not be null terminated.
236
- return std::string((char *)(data + start), actualRead);
406
+ // WHATWG-compliant UTF-8 decoding with replacement character for invalid
407
+ // sequences
408
+ return decodeUtf8WithReplacement(data + start, actualRead);
409
+ } else if (encoding == "latin1" || encoding == "binary") {
410
+ // Each byte maps to Unicode code point 0x00-0xFF
411
+ return decodeLatin1(data + start, actualRead);
412
+ } else if (encoding == "ascii") {
413
+ // ASCII with replacement for non-ASCII bytes
414
+ return decodeAscii(data + start, actualRead);
237
415
  } else if (encoding == "hex") {
238
416
  std::string hex;
239
417
  hex.reserve(actualRead * 2);
@@ -248,7 +426,8 @@ HybridNitroBuffer::decode(const std::shared_ptr<ArrayBuffer> &buffer,
248
426
  return base64_encode(data + start, (unsigned int)actualRead);
249
427
  }
250
428
 
251
- return std::string((char *)(data + start), actualRead);
429
+ // Default: UTF-8 with replacement
430
+ return decodeUtf8WithReplacement(data + start, actualRead);
252
431
  }
253
432
 
254
433
  double HybridNitroBuffer::compare(const std::shared_ptr<ArrayBuffer> &a,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "react-native-nitro-buffer",
3
- "version": "0.0.2",
3
+ "version": "0.0.3",
4
4
  "description": "Node.js compatible buffer module for React Native",
5
5
  "main": "lib/index.js",
6
6
  "module": "lib/index.js",
@@ -47,4 +47,4 @@
47
47
  "nitrogen/",
48
48
  "*.podspec"
49
49
  ]
50
- }
50
+ }