cborg 1.7.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -246,6 +246,7 @@ Decode valid CBOR bytes from a `Uint8Array` (or `Buffer`) and return a JavaScrip
246
246
  * `strict` (boolean, default `false`): when decoding integers, including for lengths (arrays, maps, strings, bytes), values will be checked to see whether they were encoded in their smallest possible form. If not, an error will be thrown.
247
247
  * Currently, this form of deterministic strictness cannot be enforced for float representations, or map key ordering (pull requests _very_ welcome).
248
248
  * `useMaps` (boolean, default `false`): when decoding major 5 (map) entries, use a `Map` rather than a plain `Object`. This will nest for any encountered map. During encode, a `Map` will be interpreted as an `Object` and will round-trip as such unless `useMaps` is supplied, in which case, all `Map`s and `Object`s will round-trip as `Map`s. There is no way to retain the distinction during round-trip without using a custom tag.
249
+ * `retainStringBytes` (boolean, default `false`): when decoding strings, retain the original bytes on the `Token` object as `byteValue`. Since it is possible to encode non-UTF-8 characters in strings in CBOR, and JavaScript doesn't properly handle non-UTF-8 in its conversion from bytes (`TextEncoder` or `Buffer`), this can result in a loss of data (and an inability to round-trip). Where this is important, a token stream should be consumed instead of a plain `decode()` and the `byteValue` property on string tokens can be inspected (see [lib/diagnostic.js](lib/diagnostic.js) for an example of its use.)
249
250
  * `tags` (array): a mapping of tag number to tag decoder function. By default no tags are supported. See [Tag decoders](#tag-decoders).
250
251
  * `tokenizer` (object): an object with two methods, `next()` which returns a `Token` and `done()` which returns a `boolean`. Can be used to implement custom input decoding. See the source code for examples.
251
252
 
@@ -331,4 +331,19 @@ Input may either be supplied as an argument or piped via stdin
331
331
  `);
332
332
  });
333
333
  });
334
+ it('diag non-utf8 and non-printable ascii', async () => {
335
+ const input = '7864f55ff8f12508b63ef2bfeca7557ae90df6311a5ec1631b4a1fa843310bd9c3a710eaace5a1bdd72ad0bfe049771c11e756338bd93865e645f1adec9b9c99ef407fbd4fc6859e7904c5ad7dc9bd10a5cc16973d5b28ec1a6dd43d9f82f9f18c3d03418e35';
336
+ let {stdout, stderr} = await execBin(`hex2diag ${ input }`);
337
+ assert.strictEqual(stderr, '');
338
+ assert.strictEqual(stdout, `78 64 # string(86)
339
+ f55ff8f12508b63ef2bfeca7557ae90df6311a5ec1631b # "õ_øñ%\\x08¶>ò¿ì§Uzé\\x0dö1\\x1a^Ác\\x1b"
340
+ 4a1fa843310bd9c3a710eaace5a1bdd72ad0bfe049771c # "J\\x1f¨C1\\x0bÙç\\x10ê¬å¡½×*пàIw\\x1c"
341
+ 11e756338bd93865e645f1adec9b9c99ef407fbd4fc685 # "\\x11çV3\\x8bÙ8eæEñ\\xadì\\x9b\\x9c\\x99ï@\\x7f½OÆ\\x85"
342
+ 9e7904c5ad7dc9bd10a5cc16973d5b28ec1a6dd43d9f82 # "\\x9ey\\x04Å\\xad}ɽ\\x10¥Ì\\x16\\x97=[(ì\\x1amÔ=\\x9f\\x82"
343
+ f9f18c3d03418e35 # "ùñ\\x8c=\\x03A\\x8e5"
344
+ `);
345
+ ({stdout, stderr} = await execBin('diag2hex', stdout));
346
+ assert.strictEqual(stderr, '');
347
+ assert.strictEqual(stdout, `${ input }\n`);
348
+ });
334
349
  });
@@ -8,29 +8,33 @@ var _0uint = require('./0uint.js');
8
8
  var _2bytes = require('./2bytes.js');
9
9
  var byteUtils = require('./byte-utils.js');
10
10
 
11
- function toToken(data, pos, prefix, length) {
11
+ function toToken(data, pos, prefix, length, options) {
12
12
  const totLength = prefix + length;
13
13
  common.assertEnoughData(data, pos, totLength);
14
- return new token.Token(token.Type.string, byteUtils.toString(data, pos + prefix, pos + totLength), totLength);
14
+ const tok = new token.Token(token.Type.string, byteUtils.toString(data, pos + prefix, pos + totLength), totLength);
15
+ if (options.retainStringBytes === true) {
16
+ tok.byteValue = byteUtils.slice(data, pos + prefix, pos + totLength);
17
+ }
18
+ return tok;
15
19
  }
16
- function decodeStringCompact(data, pos, minor, _options) {
17
- return toToken(data, pos, 1, minor);
20
+ function decodeStringCompact(data, pos, minor, options) {
21
+ return toToken(data, pos, 1, minor, options);
18
22
  }
19
23
  function decodeString8(data, pos, _minor, options) {
20
- return toToken(data, pos, 2, _0uint.readUint8(data, pos + 1, options));
24
+ return toToken(data, pos, 2, _0uint.readUint8(data, pos + 1, options), options);
21
25
  }
22
26
  function decodeString16(data, pos, _minor, options) {
23
- return toToken(data, pos, 3, _0uint.readUint16(data, pos + 1, options));
27
+ return toToken(data, pos, 3, _0uint.readUint16(data, pos + 1, options), options);
24
28
  }
25
29
  function decodeString32(data, pos, _minor, options) {
26
- return toToken(data, pos, 5, _0uint.readUint32(data, pos + 1, options));
30
+ return toToken(data, pos, 5, _0uint.readUint32(data, pos + 1, options), options);
27
31
  }
28
32
  function decodeString64(data, pos, _minor, options) {
29
33
  const l = _0uint.readUint64(data, pos + 1, options);
30
34
  if (typeof l === 'bigint') {
31
35
  throw new Error(`${ common.decodeErrPrefix } 64-bit integer string lengths not supported`);
32
36
  }
33
- return toToken(data, pos, 9, l);
37
+ return toToken(data, pos, 9, l, options);
34
38
  }
35
39
  const encodeString = _2bytes.encodeBytes;
36
40
 
@@ -9,7 +9,7 @@ var _0uint = require('./0uint.js');
9
9
  const utf8Encoder = new TextEncoder();
10
10
  const utf8Decoder = new TextDecoder();
11
11
  function* tokensToDiagnostic(inp, width = 100) {
12
- const tokeniser = new decode.Tokeniser(inp);
12
+ const tokeniser = new decode.Tokeniser(inp, { retainStringBytes: true });
13
13
  let pos = 0;
14
14
  const indent = [];
15
15
  const slc = (start, length) => {
@@ -59,15 +59,22 @@ function* tokensToDiagnostic(inp, width = 100) {
59
59
  }
60
60
  yield outp;
61
61
  if (str) {
62
+ let asString = token.type.name === 'string';
62
63
  margin += ' ';
63
- const repr = token.type.name === 'bytes' ? token.value : utf8Encoder.encode(token.value);
64
+ let repr = asString ? utf8Encoder.encode(token.value) : token.value;
65
+ if (asString && token.byteValue !== undefined) {
66
+ if (repr.length !== token.byteValue.length) {
67
+ repr = token.byteValue;
68
+ asString = false;
69
+ }
70
+ }
64
71
  const wh = (width / 2 - margin.length - 1) / 2;
65
72
  let snip = 0;
66
73
  while (repr.length - snip > 0) {
67
74
  const piece = repr.slice(snip, snip + wh);
68
75
  snip += piece.length;
69
- const st = token.type.name === 'string' ? utf8Decoder.decode(piece) : piece.reduce((p, c) => {
70
- if (c < 32 || c === 127) {
76
+ const st = asString ? utf8Decoder.decode(piece) : piece.reduce((p, c) => {
77
+ if (c < 32 || c >= 127 && c < 161 || c === 173) {
71
78
  return `${ p }\\x${ c.toString(16).padStart(2, '0') }`;
72
79
  }
73
80
  return `${ p }${ String.fromCharCode(c) }`;
package/cjs/lib/token.js CHANGED
@@ -35,6 +35,7 @@ class Token {
35
35
  this.value = value;
36
36
  this.encodedLength = encodedLength;
37
37
  this.encodedBytes = undefined;
38
+ this.byteValue = undefined;
38
39
  }
39
40
  toString() {
40
41
  return `Token[${ this.type }].${ this.value }`;
@@ -331,4 +331,19 @@ Input may either be supplied as an argument or piped via stdin
331
331
  `);
332
332
  });
333
333
  });
334
+ it('diag non-utf8 and non-printable ascii', async () => {
335
+ const input = '7864f55ff8f12508b63ef2bfeca7557ae90df6311a5ec1631b4a1fa843310bd9c3a710eaace5a1bdd72ad0bfe049771c11e756338bd93865e645f1adec9b9c99ef407fbd4fc6859e7904c5ad7dc9bd10a5cc16973d5b28ec1a6dd43d9f82f9f18c3d03418e35';
336
+ let {stdout, stderr} = await execBin(`hex2diag ${ input }`);
337
+ assert.strictEqual(stderr, '');
338
+ assert.strictEqual(stdout, `78 64 # string(86)
339
+ f55ff8f12508b63ef2bfeca7557ae90df6311a5ec1631b # "õ_øñ%\\x08¶>ò¿ì§Uzé\\x0dö1\\x1a^Ác\\x1b"
340
+ 4a1fa843310bd9c3a710eaace5a1bdd72ad0bfe049771c # "J\\x1f¨C1\\x0bÙç\\x10ê¬å¡½×*пàIw\\x1c"
341
+ 11e756338bd93865e645f1adec9b9c99ef407fbd4fc685 # "\\x11çV3\\x8bÙ8eæEñ\\xadì\\x9b\\x9c\\x99ï@\\x7f½OÆ\\x85"
342
+ 9e7904c5ad7dc9bd10a5cc16973d5b28ec1a6dd43d9f82 # "\\x9ey\\x04Å\\xad}ɽ\\x10¥Ì\\x16\\x97=[(ì\\x1amÔ=\\x9f\\x82"
343
+ f9f18c3d03418e35 # "ùñ\\x8c=\\x03A\\x8e5"
344
+ `);
345
+ ({stdout, stderr} = await execBin('diag2hex', stdout));
346
+ assert.strictEqual(stderr, '');
347
+ assert.strictEqual(stdout, `${ input }\n`);
348
+ });
334
349
  });
@@ -322,4 +322,19 @@ Input may either be supplied as an argument or piped via stdin
322
322
  `);
323
323
  });
324
324
  });
325
+ it('diag non-utf8 and non-printable ascii', async () => {
326
+ const input = '7864f55ff8f12508b63ef2bfeca7557ae90df6311a5ec1631b4a1fa843310bd9c3a710eaace5a1bdd72ad0bfe049771c11e756338bd93865e645f1adec9b9c99ef407fbd4fc6859e7904c5ad7dc9bd10a5cc16973d5b28ec1a6dd43d9f82f9f18c3d03418e35';
327
+ let {stdout, stderr} = await execBin(`hex2diag ${ input }`);
328
+ assert.strictEqual(stderr, '');
329
+ assert.strictEqual(stdout, `78 64 # string(86)
330
+ f55ff8f12508b63ef2bfeca7557ae90df6311a5ec1631b # "õ_øñ%\\x08¶>ò¿ì§Uzé\\x0dö1\\x1a^Ác\\x1b"
331
+ 4a1fa843310bd9c3a710eaace5a1bdd72ad0bfe049771c # "J\\x1f¨C1\\x0bÙç\\x10ê¬å¡½×*пàIw\\x1c"
332
+ 11e756338bd93865e645f1adec9b9c99ef407fbd4fc685 # "\\x11çV3\\x8bÙ8eæEñ\\xadì\\x9b\\x9c\\x99ï@\\x7f½OÆ\\x85"
333
+ 9e7904c5ad7dc9bd10a5cc16973d5b28ec1a6dd43d9f82 # "\\x9ey\\x04Å\\xad}ɽ\\x10¥Ì\\x16\\x97=[(ì\\x1amÔ=\\x9f\\x82"
334
+ f9f18c3d03418e35 # "ùñ\\x8c=\\x03A\\x8e5"
335
+ `);
336
+ ({stdout, stderr} = await execBin('diag2hex', stdout));
337
+ assert.strictEqual(stderr, '');
338
+ assert.strictEqual(stdout, `${ input }\n`);
339
+ });
325
340
  });
@@ -8,29 +8,36 @@ import {
8
8
  } from './common.js';
9
9
  import * as uint from './0uint.js';
10
10
  import { encodeBytes } from './2bytes.js';
11
- import { toString } from './byte-utils.js';
12
- function toToken(data, pos, prefix, length) {
11
+ import {
12
+ toString,
13
+ slice
14
+ } from './byte-utils.js';
15
+ function toToken(data, pos, prefix, length, options) {
13
16
  const totLength = prefix + length;
14
17
  assertEnoughData(data, pos, totLength);
15
- return new Token(Type.string, toString(data, pos + prefix, pos + totLength), totLength);
18
+ const tok = new Token(Type.string, toString(data, pos + prefix, pos + totLength), totLength);
19
+ if (options.retainStringBytes === true) {
20
+ tok.byteValue = slice(data, pos + prefix, pos + totLength);
21
+ }
22
+ return tok;
16
23
  }
17
- export function decodeStringCompact(data, pos, minor, _options) {
18
- return toToken(data, pos, 1, minor);
24
+ export function decodeStringCompact(data, pos, minor, options) {
25
+ return toToken(data, pos, 1, minor, options);
19
26
  }
20
27
  export function decodeString8(data, pos, _minor, options) {
21
- return toToken(data, pos, 2, uint.readUint8(data, pos + 1, options));
28
+ return toToken(data, pos, 2, uint.readUint8(data, pos + 1, options), options);
22
29
  }
23
30
  export function decodeString16(data, pos, _minor, options) {
24
- return toToken(data, pos, 3, uint.readUint16(data, pos + 1, options));
31
+ return toToken(data, pos, 3, uint.readUint16(data, pos + 1, options), options);
25
32
  }
26
33
  export function decodeString32(data, pos, _minor, options) {
27
- return toToken(data, pos, 5, uint.readUint32(data, pos + 1, options));
34
+ return toToken(data, pos, 5, uint.readUint32(data, pos + 1, options), options);
28
35
  }
29
36
  export function decodeString64(data, pos, _minor, options) {
30
37
  const l = uint.readUint64(data, pos + 1, options);
31
38
  if (typeof l === 'bigint') {
32
39
  throw new Error(`${ decodeErrPrefix } 64-bit integer string lengths not supported`);
33
40
  }
34
- return toToken(data, pos, 9, l);
41
+ return toToken(data, pos, 9, l, options);
35
42
  }
36
43
  export const encodeString = encodeBytes;
@@ -7,7 +7,7 @@ import { uintBoundaries } from './0uint.js';
7
7
  const utf8Encoder = new TextEncoder();
8
8
  const utf8Decoder = new TextDecoder();
9
9
  function* tokensToDiagnostic(inp, width = 100) {
10
- const tokeniser = new Tokeniser(inp);
10
+ const tokeniser = new Tokeniser(inp, { retainStringBytes: true });
11
11
  let pos = 0;
12
12
  const indent = [];
13
13
  const slc = (start, length) => {
@@ -57,15 +57,22 @@ function* tokensToDiagnostic(inp, width = 100) {
57
57
  }
58
58
  yield outp;
59
59
  if (str) {
60
+ let asString = token.type.name === 'string';
60
61
  margin += ' ';
61
- const repr = token.type.name === 'bytes' ? token.value : utf8Encoder.encode(token.value);
62
+ let repr = asString ? utf8Encoder.encode(token.value) : token.value;
63
+ if (asString && token.byteValue !== undefined) {
64
+ if (repr.length !== token.byteValue.length) {
65
+ repr = token.byteValue;
66
+ asString = false;
67
+ }
68
+ }
62
69
  const wh = (width / 2 - margin.length - 1) / 2;
63
70
  let snip = 0;
64
71
  while (repr.length - snip > 0) {
65
72
  const piece = repr.slice(snip, snip + wh);
66
73
  snip += piece.length;
67
- const st = token.type.name === 'string' ? utf8Decoder.decode(piece) : piece.reduce((p, c) => {
68
- if (c < 32 || c === 127) {
74
+ const st = asString ? utf8Decoder.decode(piece) : piece.reduce((p, c) => {
75
+ if (c < 32 || c >= 127 && c < 161 || c === 173) {
69
76
  return `${ p }\\x${ c.toString(16).padStart(2, '0') }`;
70
77
  }
71
78
  return `${ p }${ String.fromCharCode(c) }`;
package/esm/lib/token.js CHANGED
@@ -31,6 +31,7 @@ class Token {
31
31
  this.value = value;
32
32
  this.encodedLength = encodedLength;
33
33
  this.encodedBytes = undefined;
34
+ this.byteValue = undefined;
34
35
  }
35
36
  toString() {
36
37
  return `Token[${ this.type }].${ this.value }`;
@@ -322,4 +322,19 @@ Input may either be supplied as an argument or piped via stdin
322
322
  `);
323
323
  });
324
324
  });
325
+ it('diag non-utf8 and non-printable ascii', async () => {
326
+ const input = '7864f55ff8f12508b63ef2bfeca7557ae90df6311a5ec1631b4a1fa843310bd9c3a710eaace5a1bdd72ad0bfe049771c11e756338bd93865e645f1adec9b9c99ef407fbd4fc6859e7904c5ad7dc9bd10a5cc16973d5b28ec1a6dd43d9f82f9f18c3d03418e35';
327
+ let {stdout, stderr} = await execBin(`hex2diag ${ input }`);
328
+ assert.strictEqual(stderr, '');
329
+ assert.strictEqual(stdout, `78 64 # string(86)
330
+ f55ff8f12508b63ef2bfeca7557ae90df6311a5ec1631b # "õ_øñ%\\x08¶>ò¿ì§Uzé\\x0dö1\\x1a^Ác\\x1b"
331
+ 4a1fa843310bd9c3a710eaace5a1bdd72ad0bfe049771c # "J\\x1f¨C1\\x0bÙç\\x10ê¬å¡½×*пàIw\\x1c"
332
+ 11e756338bd93865e645f1adec9b9c99ef407fbd4fc685 # "\\x11çV3\\x8bÙ8eæEñ\\xadì\\x9b\\x9c\\x99ï@\\x7f½OÆ\\x85"
333
+ 9e7904c5ad7dc9bd10a5cc16973d5b28ec1a6dd43d9f82 # "\\x9ey\\x04Å\\xad}ɽ\\x10¥Ì\\x16\\x97=[(ì\\x1amÔ=\\x9f\\x82"
334
+ f9f18c3d03418e35 # "ùñ\\x8c=\\x03A\\x8e5"
335
+ `);
336
+ ({stdout, stderr} = await execBin('diag2hex', stdout));
337
+ assert.strictEqual(stderr, '');
338
+ assert.strictEqual(stdout, `${ input }\n`);
339
+ });
325
340
  });
package/interface.ts CHANGED
@@ -39,6 +39,7 @@ export interface DecodeOptions {
39
39
  allowBigInt?: boolean
40
40
  strict?: boolean
41
41
  useMaps?: boolean
42
+ retainStringBytes?: boolean
42
43
  tags?: TagDecoder[],
43
44
  tokenizer?: DecodeTokenizer
44
45
  }
package/lib/3string.js CHANGED
@@ -2,7 +2,7 @@ import { Token, Type } from './token.js'
2
2
  import { assertEnoughData, decodeErrPrefix } from './common.js'
3
3
  import * as uint from './0uint.js'
4
4
  import { encodeBytes } from './2bytes.js'
5
- import { toString } from './byte-utils.js'
5
+ import { toString, slice } from './byte-utils.js'
6
6
 
7
7
  /**
8
8
  * @typedef {import('./bl.js').Bl} Bl
@@ -14,23 +14,28 @@ import { toString } from './byte-utils.js'
14
14
  * @param {number} pos
15
15
  * @param {number} prefix
16
16
  * @param {number} length
17
+ * @param {DecodeOptions} options
17
18
  * @returns {Token}
18
19
  */
19
- function toToken (data, pos, prefix, length) {
20
+ function toToken (data, pos, prefix, length, options) {
20
21
  const totLength = prefix + length
21
22
  assertEnoughData(data, pos, totLength)
22
- return new Token(Type.string, toString(data, pos + prefix, pos + totLength), totLength)
23
+ const tok = new Token(Type.string, toString(data, pos + prefix, pos + totLength), totLength)
24
+ if (options.retainStringBytes === true) {
25
+ tok.byteValue = slice(data, pos + prefix, pos + totLength)
26
+ }
27
+ return tok
23
28
  }
24
29
 
25
30
  /**
26
31
  * @param {Uint8Array} data
27
32
  * @param {number} pos
28
33
  * @param {number} minor
29
- * @param {DecodeOptions} _options
34
+ * @param {DecodeOptions} options
30
35
  * @returns {Token}
31
36
  */
32
- export function decodeStringCompact (data, pos, minor, _options) {
33
- return toToken(data, pos, 1, minor)
37
+ export function decodeStringCompact (data, pos, minor, options) {
38
+ return toToken(data, pos, 1, minor, options)
34
39
  }
35
40
 
36
41
  /**
@@ -41,7 +46,7 @@ export function decodeStringCompact (data, pos, minor, _options) {
41
46
  * @returns {Token}
42
47
  */
43
48
  export function decodeString8 (data, pos, _minor, options) {
44
- return toToken(data, pos, 2, uint.readUint8(data, pos + 1, options))
49
+ return toToken(data, pos, 2, uint.readUint8(data, pos + 1, options), options)
45
50
  }
46
51
 
47
52
  /**
@@ -52,7 +57,7 @@ export function decodeString8 (data, pos, _minor, options) {
52
57
  * @returns {Token}
53
58
  */
54
59
  export function decodeString16 (data, pos, _minor, options) {
55
- return toToken(data, pos, 3, uint.readUint16(data, pos + 1, options))
60
+ return toToken(data, pos, 3, uint.readUint16(data, pos + 1, options), options)
56
61
  }
57
62
 
58
63
  /**
@@ -63,7 +68,7 @@ export function decodeString16 (data, pos, _minor, options) {
63
68
  * @returns {Token}
64
69
  */
65
70
  export function decodeString32 (data, pos, _minor, options) {
66
- return toToken(data, pos, 5, uint.readUint32(data, pos + 1, options))
71
+ return toToken(data, pos, 5, uint.readUint32(data, pos + 1, options), options)
67
72
  }
68
73
 
69
74
  // TODO: maybe we shouldn't support this ..
@@ -79,7 +84,7 @@ export function decodeString64 (data, pos, _minor, options) {
79
84
  if (typeof l === 'bigint') {
80
85
  throw new Error(`${decodeErrPrefix} 64-bit integer string lengths not supported`)
81
86
  }
82
- return toToken(data, pos, 9, l)
87
+ return toToken(data, pos, 9, l, options)
83
88
  }
84
89
 
85
90
  export const encodeString = encodeBytes
package/lib/diagnostic.js CHANGED
@@ -10,7 +10,7 @@ const utf8Decoder = new TextDecoder()
10
10
  * @param {number} [width]
11
11
  */
12
12
  function * tokensToDiagnostic (inp, width = 100) {
13
- const tokeniser = new Tokeniser(inp)
13
+ const tokeniser = new Tokeniser(inp, { retainStringBytes: true })
14
14
  let pos = 0
15
15
  const indent = []
16
16
 
@@ -77,19 +77,26 @@ function * tokensToDiagnostic (inp, width = 100) {
77
77
  yield outp
78
78
 
79
79
  if (str) {
80
+ let asString = token.type.name === 'string'
80
81
  margin += ' '
81
- const repr = token.type.name === 'bytes' ? token.value : utf8Encoder.encode(token.value)
82
+ let repr = asString ? utf8Encoder.encode(token.value) : token.value
83
+ if (asString && token.byteValue !== undefined) {
84
+ if (repr.length !== token.byteValue.length) {
85
+ // bail on printing this as a string, it's probably not utf8, so treat it as bytes
86
+ // (you can probably blame a Go programmer for this)
87
+ repr = token.byteValue
88
+ asString = false
89
+ }
90
+ }
82
91
  const wh = ((width / 2) - margin.length - 1) / 2
83
92
  let snip = 0
84
93
  while (repr.length - snip > 0) {
85
94
  const piece = repr.slice(snip, snip + wh)
86
95
  snip += piece.length
87
- // the assumption that we can utf8 a byte-sliced version is a stretch,
88
- // we could be slicing in the middle of a multi-byte character
89
- const st = token.type.name === 'string'
96
+ const st = asString
90
97
  ? utf8Decoder.decode(piece)
91
98
  : piece.reduce((/** @type {string} */ p, /** @type {number} */ c) => {
92
- if (c < 0x20 || c === 0x7f) {
99
+ if (c < 0x20 || (c >= 0x7f && c < 0xa1) || c === 0xad) {
93
100
  return `${p}\\x${c.toString(16).padStart(2, '0')}`
94
101
  }
95
102
  return `${p}${String.fromCharCode(c)}`
package/lib/token.js CHANGED
@@ -54,6 +54,8 @@ class Token {
54
54
  this.encodedLength = encodedLength
55
55
  /** @type {Uint8Array|undefined} */
56
56
  this.encodedBytes = undefined
57
+ /** @type {Uint8Array|undefined} */
58
+ this.byteValue = undefined
57
59
  }
58
60
 
59
61
  /* c8 ignore next 3 */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cborg",
3
- "version": "1.7.0",
3
+ "version": "1.8.0",
4
4
  "description": "Fast CBOR with a focus on strictness",
5
5
  "main": "./cjs/cborg.js",
6
6
  "bin": {
@@ -382,4 +382,23 @@ Input may either be supplied as an argument or piped via stdin
382
382
  `)
383
383
  })
384
384
  })
385
+
386
+ it('diag non-utf8 and non-printable ascii', async () => {
387
+ const input = '7864f55ff8f12508b63ef2bfeca7557ae90df6311a5ec1631b4a1fa843310bd9c3a710eaace5a1bdd72ad0bfe049771c11e756338bd93865e645f1adec9b9c99ef407fbd4fc6859e7904c5ad7dc9bd10a5cc16973d5b28ec1a6dd43d9f82f9f18c3d03418e35'
388
+ let { stdout, stderr } = await execBin(`hex2diag ${input}`)
389
+ assert.strictEqual(stderr, '')
390
+ assert.strictEqual(stdout,
391
+ `78 64 # string(86)
392
+ f55ff8f12508b63ef2bfeca7557ae90df6311a5ec1631b # "õ_øñ%\\x08¶>ò¿ì§Uzé\\x0dö1\\x1a^Ác\\x1b"
393
+ 4a1fa843310bd9c3a710eaace5a1bdd72ad0bfe049771c # "J\\x1f¨C1\\x0bÙç\\x10ê¬å¡½×*пàIw\\x1c"
394
+ 11e756338bd93865e645f1adec9b9c99ef407fbd4fc685 # "\\x11çV3\\x8bÙ8eæEñ\\xadì\\x9b\\x9c\\x99ï@\\x7f½OÆ\\x85"
395
+ 9e7904c5ad7dc9bd10a5cc16973d5b28ec1a6dd43d9f82 # "\\x9ey\\x04Å\\xad}ɽ\\x10¥Ì\\x16\\x97=[(ì\\x1amÔ=\\x9f\\x82"
396
+ f9f18c3d03418e35 # "ùñ\\x8c=\\x03A\\x8e5"
397
+ `)
398
+
399
+ // round-trip
400
+ ;({ stdout, stderr } = await execBin('diag2hex', stdout))
401
+ assert.strictEqual(stderr, '')
402
+ assert.strictEqual(stdout, `${input}\n`)
403
+ })
385
404
  })
@@ -29,6 +29,7 @@ export interface DecodeOptions {
29
29
  allowBigInt?: boolean;
30
30
  strict?: boolean;
31
31
  useMaps?: boolean;
32
+ retainStringBytes?: boolean;
32
33
  tags?: TagDecoder[];
33
34
  tokenizer?: DecodeTokenizer;
34
35
  }
@@ -1 +1 @@
1
- {"version":3,"file":"interface.d.ts","sourceRoot":"","sources":["../interface.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,aAAa,CAAA;AACnC,OAAO,EAAE,EAAE,EAAE,MAAM,UAAU,CAAA;AAE7B,oBAAY,mBAAmB,GAAG,KAAK,GAAG,KAAK,EAAE,GAAG,mBAAmB,EAAE,CAAA;AAEzE,MAAM,WAAW,SAAS;IACxB,MAAM,EAAE,SAAS,GAAG,SAAS,CAAA;IAC7B,GAAG,EAAE,MAAM,GAAG,GAAG,EAAE,CAAA;IACnB,QAAQ,CAAC,GAAG,EAAE,MAAM,GAAG,GAAG,EAAE,GAAG,OAAO,CAAA;CACvC;AAED,oBAAY,mBAAmB,GAAG,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,aAAa,EAAE,QAAQ,CAAC,EAAE,SAAS,KAAK,mBAAmB,GAAG,IAAI,CAAA;AAEtI,oBAAY,iBAAiB,GAAG,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,aAAa,EAAE,QAAQ,CAAC,EAAE,SAAS,KAAK,mBAAmB,CAAA;AAE7H,oBAAY,gBAAgB,GAAG;IAC7B,CAAC,GAAG,EAAE,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,OAAO,CAAC,EAAE,aAAa,GAAG,IAAI,CAAC;IACvD,aAAa,CAAC,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,KAAK,GAAG,MAAM,CAAC;IAC5C,WAAW,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,OAAO,CAAC,EAAE,aAAa,GAAG,MAAM,CAAC;CAC7D,CAAA;AAED,oBAAY,SAAS,GAAG,CAAC,EAAE,EAAE,CAAC,KAAK,GAAG,KAAK,EAAE,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC,KAAK,GAAG,KAAK,EAAE,CAAC,EAAE,KAAK,MAAM,CAAA;AAEpF,oBAAY,gBAAgB,GAAG,CAAC,KAAK,EAAE,KAAK,KAAK,UAAU,GAAG,SAAS,CAAA;AAEvE,MAAM,WAAW,eAAe;IAC9B,IAAI,IAAI,OAAO,CAAC;IAChB,IAAI,IAAI,KAAK,CAAA;CACd;AAED,oBAAY,UAAU,GAAG,CAAC,KAAK,EAAE,GAAG,KAAK,GAAG,CAAA;AAE5C,MAAM,WAAW,aAAa;IAC5B,eAAe,CAAC,EAAE,OAAO,CAAA;IACzB,cAAc,CAAC,EAAE,OAAO,CAAA;IACxB,qBAAqB,CAAC,EAAE,OAAO,CAAA;IAC/B,aAAa,CAAC,EAAE,OAAO,CAAA;IACvB,QAAQ,CAAC,EAAE,OAAO,CAAA;IAClB,WAAW,CAAC,EAAE,OAAO,CAAA;IACrB,MAAM,CAAC,EAAE,OAAO,CAAA;IAChB,OAAO,CAAC,EAAE,OAAO,CAAA;IACjB,IAAI,CAAC,EAAE,UAAU,EAAE,CAAC;IACpB,SAAS,CAAC,EAAE,eAAe,CAAA;CAC5B;AAED,MAAM,WAAW,aAAa;IAC5B,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,SAAS,CAAC,EAAE,SAAS,CAAC;IACtB,gBAAgB,CAAC,EAAE,gBAAgB,CAAC;IACpC,YAAY,CAAC,EAAE;QAAE,CAAC,QAAQ,EAAE,MAAM,GAAG,mBAAmB,CAAA;KAAE,CAAA;CAC3D"}
1
+ {"version":3,"file":"interface.d.ts","sourceRoot":"","sources":["../interface.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,aAAa,CAAA;AACnC,OAAO,EAAE,EAAE,EAAE,MAAM,UAAU,CAAA;AAE7B,oBAAY,mBAAmB,GAAG,KAAK,GAAG,KAAK,EAAE,GAAG,mBAAmB,EAAE,CAAA;AAEzE,MAAM,WAAW,SAAS;IACxB,MAAM,EAAE,SAAS,GAAG,SAAS,CAAA;IAC7B,GAAG,EAAE,MAAM,GAAG,GAAG,EAAE,CAAA;IACnB,QAAQ,CAAC,GAAG,EAAE,MAAM,GAAG,GAAG,EAAE,GAAG,OAAO,CAAA;CACvC;AAED,oBAAY,mBAAmB,GAAG,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,aAAa,EAAE,QAAQ,CAAC,EAAE,SAAS,KAAK,mBAAmB,GAAG,IAAI,CAAA;AAEtI,oBAAY,iBAAiB,GAAG,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,aAAa,EAAE,QAAQ,CAAC,EAAE,SAAS,KAAK,mBAAmB,CAAA;AAE7H,oBAAY,gBAAgB,GAAG;IAC7B,CAAC,GAAG,EAAE,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,OAAO,CAAC,EAAE,aAAa,GAAG,IAAI,CAAC;IACvD,aAAa,CAAC,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,KAAK,GAAG,MAAM,CAAC;IAC5C,WAAW,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,OAAO,CAAC,EAAE,aAAa,GAAG,MAAM,CAAC;CAC7D,CAAA;AAED,oBAAY,SAAS,GAAG,CAAC,EAAE,EAAE,CAAC,KAAK,GAAG,KAAK,EAAE,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC,KAAK,GAAG,KAAK,EAAE,CAAC,EAAE,KAAK,MAAM,CAAA;AAEpF,oBAAY,gBAAgB,GAAG,CAAC,KAAK,EAAE,KAAK,KAAK,UAAU,GAAG,SAAS,CAAA;AAEvE,MAAM,WAAW,eAAe;IAC9B,IAAI,IAAI,OAAO,CAAC;IAChB,IAAI,IAAI,KAAK,CAAA;CACd;AAED,oBAAY,UAAU,GAAG,CAAC,KAAK,EAAE,GAAG,KAAK,GAAG,CAAA;AAE5C,MAAM,WAAW,aAAa;IAC5B,eAAe,CAAC,EAAE,OAAO,CAAA;IACzB,cAAc,CAAC,EAAE,OAAO,CAAA;IACxB,qBAAqB,CAAC,EAAE,OAAO,CAAA;IAC/B,aAAa,CAAC,EAAE,OAAO,CAAA;IACvB,QAAQ,CAAC,EAAE,OAAO,CAAA;IAClB,WAAW,CAAC,EAAE,OAAO,CAAA;IACrB,MAAM,CAAC,EAAE,OAAO,CAAA;IAChB,OAAO,CAAC,EAAE,OAAO,CAAA;IACjB,iBAAiB,CAAC,EAAE,OAAO,CAAA;IAC3B,IAAI,CAAC,EAAE,UAAU,EAAE,CAAC;IACpB,SAAS,CAAC,EAAE,eAAe,CAAA;CAC5B;AAED,MAAM,WAAW,aAAa;IAC5B,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,SAAS,CAAC,EAAE,SAAS,CAAC;IACtB,gBAAgB,CAAC,EAAE,gBAAgB,CAAC;IACpC,YAAY,CAAC,EAAE;QAAE,CAAC,QAAQ,EAAE,MAAM,GAAG,mBAAmB,CAAA;KAAE,CAAA;CAC3D"}
@@ -2,10 +2,10 @@
2
2
  * @param {Uint8Array} data
3
3
  * @param {number} pos
4
4
  * @param {number} minor
5
- * @param {DecodeOptions} _options
5
+ * @param {DecodeOptions} options
6
6
  * @returns {Token}
7
7
  */
8
- export function decodeStringCompact(data: Uint8Array, pos: number, minor: number, _options: DecodeOptions): Token;
8
+ export function decodeStringCompact(data: Uint8Array, pos: number, minor: number, options: DecodeOptions): Token;
9
9
  /**
10
10
  * @param {Uint8Array} data
11
11
  * @param {number} pos
@@ -1 +1 @@
1
- {"version":3,"file":"3string.d.ts","sourceRoot":"","sources":["../../lib/3string.js"],"names":[],"mappings":"AAwBA;;;;;;GAMG;AACH,0CANW,UAAU,OACV,MAAM,SACN,MAAM,YACN,aAAa,GACX,KAAK,CAIjB;AAED;;;;;;GAMG;AACH,oCANW,UAAU,OACV,MAAM,UACN,MAAM,WACN,aAAa,GACX,KAAK,CAIjB;AAED;;;;;;GAMG;AACH,qCANW,UAAU,OACV,MAAM,UACN,MAAM,WACN,aAAa,GACX,KAAK,CAIjB;AAED;;;;;;GAMG;AACH,qCANW,UAAU,OACV,MAAM,UACN,MAAM,WACN,aAAa,GACX,KAAK,CAIjB;AAGD;;;;;;GAMG;AACH,qCANW,UAAU,OACV,MAAM,UACN,MAAM,WACN,aAAa,GACX,KAAK,CAQjB;AAED,8CAAuC;iBA7E1B,OAAO,SAAS,EAAE,EAAE;4BACpB,OAAO,cAAc,EAAE,aAAa"}
1
+ {"version":3,"file":"3string.d.ts","sourceRoot":"","sources":["../../lib/3string.js"],"names":[],"mappings":"AA6BA;;;;;;GAMG;AACH,0CANW,UAAU,OACV,MAAM,SACN,MAAM,WACN,aAAa,GACX,KAAK,CAIjB;AAED;;;;;;GAMG;AACH,oCANW,UAAU,OACV,MAAM,UACN,MAAM,WACN,aAAa,GACX,KAAK,CAIjB;AAED;;;;;;GAMG;AACH,qCANW,UAAU,OACV,MAAM,UACN,MAAM,WACN,aAAa,GACX,KAAK,CAIjB;AAED;;;;;;GAMG;AACH,qCANW,UAAU,OACV,MAAM,UACN,MAAM,WACN,aAAa,GACX,KAAK,CAIjB;AAGD;;;;;;GAMG;AACH,qCANW,UAAU,OACV,MAAM,UACN,MAAM,WACN,aAAa,GACX,KAAK,CAQjB;AAED,8CAAuC;iBAlF1B,OAAO,SAAS,EAAE,EAAE;4BACpB,OAAO,cAAc,EAAE,aAAa"}
@@ -1 +1 @@
1
- {"version":3,"file":"diagnostic.d.ts","sourceRoot":"","sources":["../../lib/diagnostic.js"],"names":[],"mappings":"AAOA;;;GAGG;AACH,wCAHW,UAAU,gEAsHpB;AAED;;;;GAIG;AACH,gCAHW,MAAM,GACJ,UAAU,CAatB"}
1
+ {"version":3,"file":"diagnostic.d.ts","sourceRoot":"","sources":["../../lib/diagnostic.js"],"names":[],"mappings":"AAOA;;;GAGG;AACH,wCAHW,UAAU,gEA6HpB;AAED;;;;GAIG;AACH,gCAHW,MAAM,GACJ,UAAU,CAatB"}
@@ -47,6 +47,8 @@ export class Token {
47
47
  encodedLength: number | undefined;
48
48
  /** @type {Uint8Array|undefined} */
49
49
  encodedBytes: Uint8Array | undefined;
50
+ /** @type {Uint8Array|undefined} */
51
+ byteValue: Uint8Array | undefined;
50
52
  toString(): string;
51
53
  }
52
54
  //# sourceMappingURL=token.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"token.d.ts","sourceRoot":"","sources":["../../lib/token.js"],"names":[],"mappings":"AAAA;IACE;;;;OAIG;IACH,mBAJW,MAAM,QACN,MAAM,YACN,OAAO,EAOjB;IAJC,cAAkB;IAClB,qBAA8B;IAC9B,aAAgB;IAChB,kBAAwB;IAI1B,mBAEC;IAED;;;OAGG;IACH,aAHW,IAAI,GACF,MAAM,CAKlB;CACF;;;;;;;;;;;;;;;;;;;;AAkBD;IACE;;;;OAIG;IACH,kBAJW,IAAI,UACJ,GAAG,sCASb;IALC,WAAgB;IAChB,WAAkB;IAClB,kCAAkC;IAClC,mCAAmC;IACnC,cADW,UAAU,GAAC,SAAS,CACF;IAI/B,mBAEC;CACF"}
1
+ {"version":3,"file":"token.d.ts","sourceRoot":"","sources":["../../lib/token.js"],"names":[],"mappings":"AAAA;IACE;;;;OAIG;IACH,mBAJW,MAAM,QACN,MAAM,YACN,OAAO,EAOjB;IAJC,cAAkB;IAClB,qBAA8B;IAC9B,aAAgB;IAChB,kBAAwB;IAI1B,mBAEC;IAED;;;OAGG;IACH,aAHW,IAAI,GACF,MAAM,CAKlB;CACF;;;;;;;;;;;;;;;;;;;;AAkBD;IACE;;;;OAIG;IACH,kBAJW,IAAI,UACJ,GAAG,sCAWb;IAPC,WAAgB;IAChB,WAAkB;IAClB,kCAAkC;IAClC,mCAAmC;IACnC,cADW,UAAU,GAAC,SAAS,CACF;IAC7B,mCAAmC;IACnC,WADW,UAAU,GAAC,SAAS,CACL;IAI5B,mBAEC;CACF"}