@atproto/lex-data 0.0.4 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/CHANGELOG.md +18 -0
  2. package/dist/blob.d.ts +28 -2
  3. package/dist/blob.d.ts.map +1 -1
  4. package/dist/blob.js +43 -6
  5. package/dist/blob.js.map +1 -1
  6. package/dist/index.d.ts +1 -0
  7. package/dist/index.d.ts.map +1 -1
  8. package/dist/index.js +1 -0
  9. package/dist/index.js.map +1 -1
  10. package/dist/lex-error.d.ts +17 -0
  11. package/dist/lex-error.d.ts.map +1 -0
  12. package/dist/lex-error.js +26 -0
  13. package/dist/lex-error.js.map +1 -0
  14. package/dist/lib/nodejs-buffer.d.ts +1 -0
  15. package/dist/lib/nodejs-buffer.d.ts.map +1 -1
  16. package/dist/lib/nodejs-buffer.js +1 -1
  17. package/dist/lib/nodejs-buffer.js.map +1 -1
  18. package/dist/uint8array-concat.d.ts +3 -0
  19. package/dist/uint8array-concat.d.ts.map +1 -0
  20. package/dist/uint8array-concat.js +24 -0
  21. package/dist/uint8array-concat.js.map +1 -0
  22. package/dist/uint8array-from-base64.d.ts.map +1 -1
  23. package/dist/uint8array-from-base64.js +2 -2
  24. package/dist/uint8array-from-base64.js.map +1 -1
  25. package/dist/uint8array-to-base64.d.ts.map +1 -1
  26. package/dist/uint8array-to-base64.js +2 -2
  27. package/dist/uint8array-to-base64.js.map +1 -1
  28. package/dist/uint8array.d.ts +1 -0
  29. package/dist/uint8array.d.ts.map +1 -1
  30. package/dist/uint8array.js +14 -3
  31. package/dist/uint8array.js.map +1 -1
  32. package/dist/utf8-from-base64.d.ts +4 -0
  33. package/dist/utf8-from-base64.d.ts.map +1 -0
  34. package/dist/utf8-from-base64.js +18 -0
  35. package/dist/utf8-from-base64.js.map +1 -0
  36. package/dist/utf8-grapheme-len.d.ts.map +1 -1
  37. package/dist/utf8-grapheme-len.js +2 -2
  38. package/dist/utf8-grapheme-len.js.map +1 -1
  39. package/dist/utf8-len.d.ts.map +1 -1
  40. package/dist/utf8-len.js +1 -1
  41. package/dist/utf8-len.js.map +1 -1
  42. package/dist/utf8-to-base64.d.ts +4 -0
  43. package/dist/utf8-to-base64.d.ts.map +1 -0
  44. package/dist/utf8-to-base64.js +20 -0
  45. package/dist/utf8-to-base64.js.map +1 -0
  46. package/dist/utf8.d.ts +3 -0
  47. package/dist/utf8.d.ts.map +1 -1
  48. package/dist/utf8.js +16 -3
  49. package/dist/utf8.js.map +1 -1
  50. package/package.json +5 -5
  51. package/src/blob.test.ts +223 -20
  52. package/src/blob.ts +82 -10
  53. package/src/cid.test.ts +126 -0
  54. package/src/index.ts +1 -0
  55. package/src/language.test.ts +1 -0
  56. package/src/lex-equals.test.ts +30 -0
  57. package/src/lex-error.ts +34 -0
  58. package/src/lex.test.ts +65 -13
  59. package/src/lib/nodejs-buffer.ts +2 -1
  60. package/src/object.test.ts +2 -0
  61. package/src/uint8array-concat.test.ts +197 -0
  62. package/src/uint8array-concat.ts +21 -0
  63. package/src/uint8array-from-base64.test.ts +4 -1
  64. package/src/uint8array-from-base64.ts +2 -2
  65. package/src/uint8array-to-base64.test.ts +3 -3
  66. package/src/uint8array-to-base64.ts +2 -2
  67. package/src/uint8array.test.ts +484 -0
  68. package/src/uint8array.ts +14 -2
  69. package/src/utf8-from-base64.test.ts +39 -0
  70. package/src/utf8-from-base64.ts +23 -0
  71. package/src/utf8-grapheme-len.test.ts +3 -2
  72. package/src/utf8-grapheme-len.ts +2 -2
  73. package/src/utf8-len.test.ts +3 -2
  74. package/src/utf8-len.ts +1 -1
  75. package/src/utf8-to-base64.test.ts +35 -0
  76. package/src/utf8-to-base64.ts +22 -0
  77. package/src/utf8.ts +23 -2
  78. package/tsconfig.tests.json +1 -1
@@ -1 +1 @@
1
- {"version":3,"file":"utf8-len.d.ts","sourceRoot":"","sources":["../src/utf8-len.ts"],"names":[],"mappings":"AAMA,eAAO,MAAM,WAAW,YACS,MAAM,KAAG,MAAM,QAGxC,CAAA;AAER,wBAAgB,cAAc,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAsCrD"}
1
+ {"version":3,"file":"utf8-len.d.ts","sourceRoot":"","sources":["../src/utf8-len.ts"],"names":[],"mappings":"AAMA,eAAO,MAAM,WAAW,YACS,MAAM,KAAG,MAAM,QAGN,CAAA;AAE1C,wBAAgB,cAAc,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAsCrD"}
package/dist/utf8-len.js CHANGED
@@ -10,7 +10,7 @@ exports.utf8LenNode = nodejs_buffer_js_1.NodeJSBuffer
10
10
  ? function utf8LenNode(string) {
11
11
  return nodejs_buffer_js_1.NodeJSBuffer.byteLength(string, 'utf8');
12
12
  }
13
- : null;
13
+ : /* v8 ignore next -- @preserve */ null;
14
14
  function utf8LenCompute(string) {
15
15
  // The code below is similar to TextEncoder's implementation of UTF-8
16
16
  // encoding. However, using TextEncoder to get the byte length is slower
@@ -1 +1 @@
1
- {"version":3,"file":"utf8-len.js","sourceRoot":"","sources":["../src/utf8-len.ts"],"names":[],"mappings":";;;AAYA,wCAsCC;AAlDD,6DAAqD;AAErD,8EAA8E;AAC9E,+EAA+E;AAC/E,8EAA8E;AAEjE,QAAA,WAAW,GAAG,+BAAY;IACrC,CAAC,CAAC,SAAS,WAAW,CAAC,MAAc;QACjC,OAAO,+BAAa,CAAC,UAAU,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IACjD,CAAC;IACH,CAAC,CAAC,IAAI,CAAA;AAER,SAAgB,cAAc,CAAC,MAAc;IAC3C,qEAAqE;IACrE,wEAAwE;IACxE,+DAA+D;IAE/D,qDAAqD;IAErD,mDAAmD;IACnD,IAAI,GAAG,GAAG,MAAM,CAAC,MAAM,CAAA;IACvB,IAAI,IAAY,CAAA;IAEhB,gEAAgE;IAChE,uBAAuB;IACvB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QAC1C,IAAI,GAAG,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC,CAAA;QAE3B,IAAI,IAAI,IAAI,IAAI,EAAE,CAAC;YACjB,gBAAgB;QAClB,CAAC;aAAM,IAAI,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,eAAe;YACf,GAAG,IAAI,CAAC,CAAA;QACV,CAAC;aAAM,CAAC;YACN,eAAe;YACf,GAAG,IAAI,CAAC,CAAA;YACR,sEAAsE;YACtE,+DAA+D;YAC/D,wEAAwE;YACxE,OAAO;YACP,IAAI,IAAI,IAAI,MAAM,IAAI,IAAI,IAAI,MAAM,EAAE,CAAC;gBACrC,IAAI,GAAG,MAAM,CAAC,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;gBAC/B,IAAI,IAAI,IAAI,MAAM,IAAI,IAAI,IAAI,MAAM,EAAE,CAAC;oBACrC,CAAC,EAAE,CAAA;gBACL,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,GAAG,CAAA;AACZ,CAAC","sourcesContent":["import { NodeJSBuffer } from './lib/nodejs-buffer.js'\n\n// @NOTE This file is not meant to be exported directly. Instead, we re-export\n// public functions from ./utf8.ts. The reason for this separation is that this\n// file allows to test both the NodeJS-optimized and ponyfill implementations.\n\nexport const utf8LenNode = NodeJSBuffer\n ? function utf8LenNode(string: string): number {\n return NodeJSBuffer!.byteLength(string, 'utf8')\n }\n : null\n\nexport function utf8LenCompute(string: string): number {\n // The code below is similar to TextEncoder's implementation of UTF-8\n // encoding. However, using TextEncoder to get the byte length is slower\n // as it requires allocating a new Uint8Array and copying data:\n\n // return new TextEncoder().encode(string).byteLength\n\n // The base length is the string length (all ASCII)\n let len = string.length\n let code: number\n\n // The loop calculates the number of additional bytes needed for\n // non-ASCII characters\n for (let i = 0; i < string.length; i += 1) {\n code = string.charCodeAt(i)\n\n if (code <= 0x7f) {\n // ASCII, 1 byte\n } else if (code <= 0x7ff) {\n // 2 bytes char\n len += 1\n } else {\n // 3 bytes char\n len += 2\n // If the current char is a high surrogate, and the next char is a low\n // surrogate, skip the next char as the total is a 4 bytes char\n // (represented as a surrogate pair in UTF-16) and was already accounted\n // for.\n if (code >= 0xd800 && code <= 0xdbff) {\n code = string.charCodeAt(i + 1)\n if (code >= 0xdc00 && code <= 0xdfff) {\n i++\n }\n }\n }\n }\n\n return len\n}\n"]}
1
+ {"version":3,"file":"utf8-len.js","sourceRoot":"","sources":["../src/utf8-len.ts"],"names":[],"mappings":";;;AAYA,wCAsCC;AAlDD,6DAAqD;AAErD,8EAA8E;AAC9E,+EAA+E;AAC/E,8EAA8E;AAEjE,QAAA,WAAW,GAAG,+BAAY;IACrC,CAAC,CAAC,SAAS,WAAW,CAAC,MAAc;QACjC,OAAO,+BAAa,CAAC,UAAU,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IACjD,CAAC;IACH,CAAC,CAAC,iCAAiC,CAAC,IAAI,CAAA;AAE1C,SAAgB,cAAc,CAAC,MAAc;IAC3C,qEAAqE;IACrE,wEAAwE;IACxE,+DAA+D;IAE/D,qDAAqD;IAErD,mDAAmD;IACnD,IAAI,GAAG,GAAG,MAAM,CAAC,MAAM,CAAA;IACvB,IAAI,IAAY,CAAA;IAEhB,gEAAgE;IAChE,uBAAuB;IACvB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QAC1C,IAAI,GAAG,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC,CAAA;QAE3B,IAAI,IAAI,IAAI,IAAI,EAAE,CAAC;YACjB,gBAAgB;QAClB,CAAC;aAAM,IAAI,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,eAAe;YACf,GAAG,IAAI,CAAC,CAAA;QACV,CAAC;aAAM,CAAC;YACN,eAAe;YACf,GAAG,IAAI,CAAC,CAAA;YACR,sEAAsE;YACtE,+DAA+D;YAC/D,wEAAwE;YACxE,OAAO;YACP,IAAI,IAAI,IAAI,MAAM,IAAI,IAAI,IAAI,MAAM,EAAE,CAAC;gBACrC,IAAI,GAAG,MAAM,CAAC,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;gBAC/B,IAAI,IAAI,IAAI,MAAM,IAAI,IAAI,IAAI,MAAM,EAAE,CAAC;oBACrC,CAAC,EAAE,CAAA;gBACL,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,GAAG,CAAA;AACZ,CAAC","sourcesContent":["import { NodeJSBuffer } from './lib/nodejs-buffer.js'\n\n// @NOTE This file is not meant to be exported directly. Instead, we re-export\n// public functions from ./utf8.ts. The reason for this separation is that this\n// file allows to test both the NodeJS-optimized and ponyfill implementations.\n\nexport const utf8LenNode = NodeJSBuffer\n ? function utf8LenNode(string: string): number {\n return NodeJSBuffer!.byteLength(string, 'utf8')\n }\n : /* v8 ignore next -- @preserve */ null\n\nexport function utf8LenCompute(string: string): number {\n // The code below is similar to TextEncoder's implementation of UTF-8\n // encoding. However, using TextEncoder to get the byte length is slower\n // as it requires allocating a new Uint8Array and copying data:\n\n // return new TextEncoder().encode(string).byteLength\n\n // The base length is the string length (all ASCII)\n let len = string.length\n let code: number\n\n // The loop calculates the number of additional bytes needed for\n // non-ASCII characters\n for (let i = 0; i < string.length; i += 1) {\n code = string.charCodeAt(i)\n\n if (code <= 0x7f) {\n // ASCII, 1 byte\n } else if (code <= 0x7ff) {\n // 2 bytes char\n len += 1\n } else {\n // 3 bytes char\n len += 2\n // If the current char is a high surrogate, and the next char is a low\n // surrogate, skip the next char as the total is a 4 bytes char\n // (represented as a surrogate pair in UTF-16) and was already accounted\n // for.\n if (code >= 0xd800 && code <= 0xdbff) {\n code = string.charCodeAt(i + 1)\n if (code >= 0xdc00 && code <= 0xdfff) {\n i++\n }\n }\n }\n }\n\n return len\n}\n"]}
@@ -0,0 +1,4 @@
1
+ import { Base64Alphabet } from './uint8array-base64.js';
2
+ export declare const utf8ToBase64Node: ((text: string, alphabet?: Base64Alphabet) => string) | null;
3
+ export declare function utf8ToBase64Ponyfill(text: string, alphabet?: Base64Alphabet): string;
4
+ //# sourceMappingURL=utf8-to-base64.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"utf8-to-base64.d.ts","sourceRoot":"","sources":["../src/utf8-to-base64.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAA;AAKvD,eAAO,MAAM,gBAAgB,UACO,MAAM,aAAa,cAAc,KAAG,MAAM,QAIpC,CAAA;AAG1C,wBAAgB,oBAAoB,CAClC,IAAI,EAAE,MAAM,EACZ,QAAQ,CAAC,EAAE,cAAc,GACxB,MAAM,CAGR"}
@@ -0,0 +1,20 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.utf8ToBase64Node = void 0;
4
+ exports.utf8ToBase64Ponyfill = utf8ToBase64Ponyfill;
5
+ const to_string_1 = require("uint8arrays/to-string");
6
+ const nodejs_buffer_js_1 = require("./lib/nodejs-buffer.js");
7
+ const uint8array_to_base64_js_1 = require("./uint8array-to-base64.js");
8
+ const Buffer = nodejs_buffer_js_1.NodeJSBuffer;
9
+ exports.utf8ToBase64Node = Buffer
10
+ ? function utf8ToBase64Node(text, alphabet) {
11
+ const buffer = Buffer.from(text, 'utf8');
12
+ return uint8array_to_base64_js_1.toBase64Node(buffer, alphabet);
13
+ }
14
+ : /* v8 ignore next -- @preserve */ null;
15
+ const textEncoder = /*#__PURE__*/ new TextEncoder();
16
+ function utf8ToBase64Ponyfill(text, alphabet) {
17
+ const bytes = textEncoder.encode(text);
18
+ return (0, to_string_1.toString)(bytes, alphabet);
19
+ }
20
+ //# sourceMappingURL=utf8-to-base64.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"utf8-to-base64.js","sourceRoot":"","sources":["../src/utf8-to-base64.ts"],"names":[],"mappings":";;;AAeA,oDAMC;AArBD,qDAAgD;AAChD,6DAAqD;AAErD,uEAAwD;AAExD,MAAM,MAAM,GAAG,+BAAY,CAAA;AAEd,QAAA,gBAAgB,GAAG,MAAM;IACpC,CAAC,CAAC,SAAS,gBAAgB,CAAC,IAAY,EAAE,QAAyB;QAC/D,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;QACxC,OAAO,sCAAa,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAA;IACxC,CAAC;IACH,CAAC,CAAC,iCAAiC,CAAC,IAAI,CAAA;AAE1C,MAAM,WAAW,GAAG,aAAa,CAAC,IAAI,WAAW,EAAE,CAAA;AACnD,SAAgB,oBAAoB,CAClC,IAAY,EACZ,QAAyB;IAEzB,MAAM,KAAK,GAAG,WAAW,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;IACtC,OAAO,IAAA,oBAAQ,EAAC,KAAK,EAAE,QAAQ,CAAC,CAAA;AAClC,CAAC","sourcesContent":["import { toString } from 'uint8arrays/to-string'\nimport { NodeJSBuffer } from './lib/nodejs-buffer.js'\nimport { Base64Alphabet } from './uint8array-base64.js'\nimport { toBase64Node } from './uint8array-to-base64.js'\n\nconst Buffer = NodeJSBuffer\n\nexport const utf8ToBase64Node = Buffer\n ? function utf8ToBase64Node(text: string, alphabet?: Base64Alphabet): string {\n const buffer = Buffer.from(text, 'utf8')\n return toBase64Node!(buffer, alphabet)\n }\n : /* v8 ignore next -- @preserve */ null\n\nconst textEncoder = /*#__PURE__*/ new TextEncoder()\nexport function utf8ToBase64Ponyfill(\n text: string,\n alphabet?: Base64Alphabet,\n): string {\n const bytes = textEncoder.encode(text)\n return toString(bytes, alphabet)\n}\n"]}
package/dist/utf8.d.ts CHANGED
@@ -1,3 +1,6 @@
1
+ import { Base64Alphabet } from './uint8array.js';
1
2
  export declare const graphemeLen: (str: string) => number;
2
3
  export declare const utf8Len: (string: string) => number;
4
+ export declare const utf8ToBase64: (str: string, alphabet?: Base64Alphabet) => string;
5
+ export declare const utf8FromBase64: (b64: string, alphabet?: Base64Alphabet) => string;
3
6
  //# sourceMappingURL=utf8.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"utf8.d.ts","sourceRoot":"","sources":["../src/utf8.ts"],"names":[],"mappings":"AAGA,eAAO,MAAM,WAAW,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,MACD,CAAA;AAS1C,eAAO,MAAM,OAAO,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,MAAsC,CAAA"}
1
+ {"version":3,"file":"utf8.d.ts","sourceRoot":"","sources":["../src/utf8.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAA;AAShD,eAAO,MAAM,WAAW,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,MAEY,CAAA;AAUvD,eAAO,MAAM,OAAO,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,MAEQ,CAAA;AAElD,eAAO,MAAM,YAAY,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,cAAc,KAAK,MAEf,CAAA;AAExD,eAAO,MAAM,cAAc,EAAE,CAC3B,GAAG,EAAE,MAAM,EACX,QAAQ,CAAC,EAAE,cAAc,KACtB,MAEqD,CAAA"}
package/dist/utf8.js CHANGED
@@ -1,12 +1,25 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.utf8Len = exports.graphemeLen = void 0;
3
+ exports.utf8FromBase64 = exports.utf8ToBase64 = exports.utf8Len = exports.graphemeLen = void 0;
4
+ const utf8_from_base64_js_1 = require("./utf8-from-base64.js");
4
5
  const utf8_grapheme_len_js_1 = require("./utf8-grapheme-len.js");
5
6
  const utf8_len_js_1 = require("./utf8-len.js");
6
- exports.graphemeLen = utf8_grapheme_len_js_1.graphemeLenNative ?? utf8_grapheme_len_js_1.graphemeLenPonyfill;
7
+ const utf8_to_base64_js_1 = require("./utf8-to-base64.js");
8
+ exports.graphemeLen =
9
+ /* v8 ignore next -- @preserve */ utf8_grapheme_len_js_1.graphemeLenNative ??
10
+ /* v8 ignore next -- @preserve */ utf8_grapheme_len_js_1.graphemeLenPonyfill;
11
+ /* v8 ignore next -- @preserve */
7
12
  if (exports.graphemeLen === utf8_grapheme_len_js_1.graphemeLenPonyfill) {
8
13
  /*#__PURE__*/
9
14
  console.warn('[@atproto/lex-data]: Intl.Segmenter is not available in this environment. Falling back to ponyfill implementation.');
10
15
  }
11
- exports.utf8Len = utf8_len_js_1.utf8LenNode ?? utf8_len_js_1.utf8LenCompute;
16
+ exports.utf8Len =
17
+ /* v8 ignore next -- @preserve */ utf8_len_js_1.utf8LenNode ??
18
+ /* v8 ignore next -- @preserve */ utf8_len_js_1.utf8LenCompute;
19
+ exports.utf8ToBase64 =
20
+ /* v8 ignore next -- @preserve */ utf8_to_base64_js_1.utf8ToBase64Node ??
21
+ /* v8 ignore next -- @preserve */ utf8_to_base64_js_1.utf8ToBase64Ponyfill;
22
+ exports.utf8FromBase64 =
23
+ /* v8 ignore next -- @preserve */ utf8_from_base64_js_1.utf8FromBase64Node ??
24
+ /* v8 ignore next -- @preserve */ utf8_from_base64_js_1.utf8FromBase64Ponyfill;
12
25
  //# sourceMappingURL=utf8.js.map
package/dist/utf8.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"utf8.js","sourceRoot":"","sources":["../src/utf8.ts"],"names":[],"mappings":";;;AAAA,iEAA+E;AAC/E,+CAA2D;AAE9C,QAAA,WAAW,GACtB,wCAAiB,IAAI,0CAAmB,CAAA;AAE1C,IAAI,mBAAW,KAAK,0CAAmB,EAAE,CAAC;IACxC,aAAa;IACb,OAAO,CAAC,IAAI,CACV,oHAAoH,CACrH,CAAA;AACH,CAAC;AAEY,QAAA,OAAO,GAA+B,yBAAW,IAAI,4BAAc,CAAA","sourcesContent":["import { graphemeLenNative, graphemeLenPonyfill } from './utf8-grapheme-len.js'\nimport { utf8LenCompute, utf8LenNode } from './utf8-len.js'\n\nexport const graphemeLen: (str: string) => number =\n graphemeLenNative ?? graphemeLenPonyfill\n\nif (graphemeLen === graphemeLenPonyfill) {\n /*#__PURE__*/\n console.warn(\n '[@atproto/lex-data]: Intl.Segmenter is not available in this environment. Falling back to ponyfill implementation.',\n )\n}\n\nexport const utf8Len: (string: string) => number = utf8LenNode ?? utf8LenCompute\n"]}
1
+ {"version":3,"file":"utf8.js","sourceRoot":"","sources":["../src/utf8.ts"],"names":[],"mappings":";;;AACA,+DAG8B;AAC9B,iEAA+E;AAC/E,+CAA2D;AAC3D,2DAA4E;AAE/D,QAAA,WAAW;AACtB,iCAAiC,CAAC,wCAAiB;IACnD,iCAAiC,CAAC,0CAAmB,CAAA;AAEvD,iCAAiC;AACjC,IAAI,mBAAW,KAAK,0CAAmB,EAAE,CAAC;IACxC,aAAa;IACb,OAAO,CAAC,IAAI,CACV,oHAAoH,CACrH,CAAA;AACH,CAAC;AAEY,QAAA,OAAO;AAClB,iCAAiC,CAAC,yBAAW;IAC7C,iCAAiC,CAAC,4BAAc,CAAA;AAErC,QAAA,YAAY;AACvB,iCAAiC,CAAC,oCAAgB;IAClD,iCAAiC,CAAC,wCAAoB,CAAA;AAE3C,QAAA,cAAc;AAIzB,iCAAiC,CAAC,wCAAkB;IACpD,iCAAiC,CAAC,4CAAsB,CAAA","sourcesContent":["import { Base64Alphabet } from './uint8array.js'\nimport {\n utf8FromBase64Node,\n utf8FromBase64Ponyfill,\n} from './utf8-from-base64.js'\nimport { graphemeLenNative, graphemeLenPonyfill } from './utf8-grapheme-len.js'\nimport { utf8LenCompute, utf8LenNode } from './utf8-len.js'\nimport { utf8ToBase64Node, utf8ToBase64Ponyfill } from './utf8-to-base64.js'\n\nexport const graphemeLen: (str: string) => number =\n /* v8 ignore next -- @preserve */ graphemeLenNative ??\n /* v8 ignore next -- @preserve */ graphemeLenPonyfill\n\n/* v8 ignore next -- @preserve */\nif (graphemeLen === graphemeLenPonyfill) {\n /*#__PURE__*/\n console.warn(\n '[@atproto/lex-data]: Intl.Segmenter is not available in this environment. Falling back to ponyfill implementation.',\n )\n}\n\nexport const utf8Len: (string: string) => number =\n /* v8 ignore next -- @preserve */ utf8LenNode ??\n /* v8 ignore next -- @preserve */ utf8LenCompute\n\nexport const utf8ToBase64: (str: string, alphabet?: Base64Alphabet) => string =\n /* v8 ignore next -- @preserve */ utf8ToBase64Node ??\n /* v8 ignore next -- @preserve */ utf8ToBase64Ponyfill\n\nexport const utf8FromBase64: (\n b64: string,\n alphabet?: Base64Alphabet,\n) => string =\n /* v8 ignore next -- @preserve */ utf8FromBase64Node ??\n /* v8 ignore next -- @preserve */ utf8FromBase64Ponyfill\n"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@atproto/lex-data",
3
- "version": "0.0.4",
3
+ "version": "0.0.6",
4
4
  "license": "MIT",
5
5
  "description": "Core utilities for AT Lexicons",
6
6
  "keywords": [
@@ -28,10 +28,10 @@
28
28
  "types": "./dist/index.d.ts",
29
29
  "exports": {
30
30
  ".": {
31
+ "types": "./dist/index.d.ts",
31
32
  "browser": "./dist/index.js",
32
33
  "import": "./dist/index.js",
33
- "require": "./dist/index.js",
34
- "types": "./dist/index.d.ts"
34
+ "require": "./dist/index.js"
35
35
  }
36
36
  },
37
37
  "dependencies": {
@@ -43,10 +43,10 @@
43
43
  },
44
44
  "devDependencies": {
45
45
  "core-js": "^3",
46
- "jest": "^28.1.2"
46
+ "vitest": "^4.0.16"
47
47
  },
48
48
  "scripts": {
49
49
  "build": "tsc --build tsconfig.build.json",
50
- "test": "jest"
50
+ "test": "vitest run"
51
51
  }
52
52
  }
package/src/blob.test.ts CHANGED
@@ -1,28 +1,36 @@
1
- import { isBlobRef, isLegacyBlobRef } from './blob.js'
1
+ import { describe, expect, it } from 'vitest'
2
+ import {
3
+ BlobRef,
4
+ LegacyBlobRef,
5
+ enumBlobRefs,
6
+ isBlobRef,
7
+ isLegacyBlobRef,
8
+ } from './blob.js'
2
9
  import { parseCid } from './cid.js'
10
+ import { LexArray, LexMap, LexValue } from './lex.js'
3
11
 
4
12
  // await cidForRawBytes(Buffer.from('Hello, World!'))
5
- const blobCid = parseCid(
13
+ const validBlobCid = parseCid(
6
14
  'bafkreig77vqcdozl2wyk6z3cscaj5q5fggi53aoh64fewkdiri3cdauyn4',
7
15
  )
8
16
  // await cidForLex(Buffer.from('Hello, World!'))
9
- const lexCid = parseCid(
17
+ const invalidBlobCid = parseCid(
10
18
  'bafyreic52vzks7wdklat4evp3vimohl55i2unzqpshz2ytka5omzr7exdy',
11
19
  )
12
20
 
13
- describe('isBlobRef', () => {
21
+ describe(isBlobRef, () => {
14
22
  it('tests valid blobCid and lexCid', () => {
15
- expect(blobCid.code).toBe(0x55) // raw
16
- expect(blobCid.multihash.code).toBe(0x12) // sha2-256
17
- expect(lexCid.code).toBe(0x71) // dag-cbor
18
- expect(lexCid.multihash.code).toBe(0x12) // sha2-256
23
+ expect(validBlobCid.code).toBe(0x55) // raw
24
+ expect(validBlobCid.multihash.code).toBe(0x12) // sha2-256
25
+ expect(invalidBlobCid.code).toBe(0x71) // dag-cbor
26
+ expect(invalidBlobCid.multihash.code).toBe(0x12) // sha2-256
19
27
  })
20
28
 
21
29
  it('parses valid blob', () => {
22
30
  expect(
23
31
  isBlobRef({
24
32
  $type: 'blob',
25
- ref: blobCid,
33
+ ref: validBlobCid,
26
34
  mimeType: 'image/jpeg',
27
35
  size: 10000,
28
36
  }),
@@ -32,7 +40,7 @@ describe('isBlobRef', () => {
32
40
  isBlobRef(
33
41
  {
34
42
  $type: 'blob',
35
- ref: lexCid,
43
+ ref: invalidBlobCid,
36
44
  mimeType: 'image/jpeg',
37
45
  size: 10000,
38
46
  },
@@ -42,20 +50,50 @@ describe('isBlobRef', () => {
42
50
  ).toBe(true)
43
51
  })
44
52
 
53
+ it('performs strict validation by default', () => {
54
+ expect(
55
+ isBlobRef({
56
+ $type: 'blob',
57
+ ref: invalidBlobCid,
58
+ mimeType: 'image/jpeg',
59
+ size: 10000,
60
+ }),
61
+ ).toBe(false)
62
+ })
63
+
45
64
  it('rejects invalid inputs', () => {
46
65
  expect(
47
66
  isBlobRef({
48
67
  $type: 'blob',
49
- ref: { $link: blobCid.toString() },
68
+ ref: { $link: validBlobCid.toString() },
50
69
  mimeType: 'image/jpeg',
51
70
  size: '10000',
52
71
  }),
53
72
  ).toBe(false)
73
+
74
+ expect(
75
+ isBlobRef({
76
+ // $type: 'blob',
77
+ ref: validBlobCid,
78
+ mimeType: 'image/jpeg',
79
+ size: 10000,
80
+ }),
81
+ ).toBe(false)
82
+
83
+ expect(
84
+ isBlobRef({
85
+ $type: 'blob',
86
+ ref: validBlobCid,
87
+ mimeType: { toString: () => 'image/jpeg' },
88
+ size: 10000,
89
+ }),
90
+ ).toBe(false)
91
+
54
92
  expect(
55
93
  isBlobRef(
56
94
  {
57
95
  $type: 'blob',
58
- ref: { $link: blobCid.toString() },
96
+ ref: { $link: validBlobCid.toString() },
59
97
  mimeType: 'image/jpeg',
60
98
  size: '10000',
61
99
  },
@@ -81,6 +119,22 @@ describe('isBlobRef', () => {
81
119
  { strict: true },
82
120
  ),
83
121
  ).toBe(false)
122
+
123
+ expect(isBlobRef('not an object')).toBe(false)
124
+ expect(isBlobRef([])).toBe(false)
125
+ expect(isBlobRef(new Date())).toBe(false)
126
+ expect(isBlobRef(new Map())).toBe(false)
127
+ })
128
+
129
+ it('rejects non-integer size', () => {
130
+ expect(
131
+ isBlobRef({
132
+ $type: 'blob',
133
+ ref: validBlobCid,
134
+ mimeType: 'image/jpeg',
135
+ size: 10000.5,
136
+ }),
137
+ ).toBe(false)
84
138
  })
85
139
 
86
140
  it('rejects invalid CID/multihash code', () => {
@@ -88,7 +142,7 @@ describe('isBlobRef', () => {
88
142
  isBlobRef(
89
143
  {
90
144
  $type: 'blob',
91
- ref: blobCid,
145
+ ref: validBlobCid,
92
146
  mimeType: 'image/jpeg',
93
147
  size: 10000,
94
148
  },
@@ -100,7 +154,7 @@ describe('isBlobRef', () => {
100
154
  isBlobRef(
101
155
  {
102
156
  $type: 'blob',
103
- ref: lexCid,
157
+ ref: invalidBlobCid,
104
158
  mimeType: 'image/jpeg',
105
159
  size: 10000,
106
160
  },
@@ -113,7 +167,7 @@ describe('isBlobRef', () => {
113
167
  expect(
114
168
  isBlobRef({
115
169
  $type: 'blob',
116
- ref: blobCid,
170
+ ref: validBlobCid,
117
171
  mimeType: 'image/jpeg',
118
172
  size: 10000,
119
173
  extra: 'not allowed',
@@ -124,7 +178,7 @@ describe('isBlobRef', () => {
124
178
  isBlobRef(
125
179
  {
126
180
  $type: 'blob',
127
- ref: blobCid,
181
+ ref: validBlobCid,
128
182
  mimeType: 'image/jpeg',
129
183
  size: 10000,
130
184
  extra: 'not allowed',
@@ -133,20 +187,39 @@ describe('isBlobRef', () => {
133
187
  ),
134
188
  ).toBe(false)
135
189
  })
190
+
191
+ describe('strict mode', () => {
192
+ it('rejects invalid CID version', () => {
193
+ const cidV0 = parseCid(
194
+ 'QmYwAPJzv5CZsnA625s3Xf2nemtYgPpHdWEz79ojWnPbdG', // CID v0
195
+ )
196
+ expect(
197
+ isBlobRef(
198
+ {
199
+ $type: 'blob',
200
+ ref: cidV0,
201
+ mimeType: 'image/jpeg',
202
+ size: 10000,
203
+ },
204
+ { strict: true },
205
+ ),
206
+ ).toBe(false)
207
+ })
208
+ })
136
209
  })
137
210
 
138
- describe('isLegacyBlobRef', () => {
211
+ describe(isLegacyBlobRef, () => {
139
212
  it('parses valid legacy blob', () => {
140
213
  expect(
141
214
  isLegacyBlobRef({
142
- cid: blobCid.toString(),
215
+ cid: validBlobCid.toString(),
143
216
  mimeType: 'image/jpeg',
144
217
  }),
145
218
  ).toBe(true)
146
219
 
147
220
  expect(
148
221
  isLegacyBlobRef({
149
- cid: lexCid.toString(),
222
+ cid: invalidBlobCid.toString(),
150
223
  mimeType: 'image/jpeg',
151
224
  }),
152
225
  ).toBe(true)
@@ -172,15 +245,145 @@ describe('isLegacyBlobRef', () => {
172
245
  mimeType: 'image/jpeg',
173
246
  }),
174
247
  ).toBe(false)
248
+
249
+ expect(
250
+ isLegacyBlobRef({
251
+ cid: invalidBlobCid.toString(),
252
+ mimeType: { toString: () => 'image/jpeg' },
253
+ }),
254
+ ).toBe(false)
255
+
256
+ expect(
257
+ isLegacyBlobRef({
258
+ cid: invalidBlobCid.toString(),
259
+ mimeType: 3,
260
+ }),
261
+ ).toBe(false)
262
+
263
+ expect(
264
+ isLegacyBlobRef({
265
+ cid: invalidBlobCid.toString(),
266
+ mimeType: '',
267
+ }),
268
+ ).toBe(false)
269
+
270
+ expect(isLegacyBlobRef([])).toBe(false)
271
+ expect(isLegacyBlobRef('not an object')).toBe(false)
272
+ expect(isLegacyBlobRef(new Date())).toBe(false)
273
+ expect(isLegacyBlobRef(new Map())).toBe(false)
175
274
  })
176
275
 
177
276
  it('rejects extra keys', () => {
178
277
  expect(
179
278
  isLegacyBlobRef({
180
- cid: blobCid.toString(),
279
+ cid: validBlobCid.toString(),
181
280
  mimeType: 'image/jpeg',
182
281
  extra: 'not allowed',
183
282
  }),
184
283
  ).toBe(false)
185
284
  })
186
285
  })
286
+
287
+ describe(enumBlobRefs, () => {
288
+ const valid1: BlobRef = {
289
+ $type: 'blob',
290
+ ref: validBlobCid,
291
+ mimeType: 'image/png',
292
+ size: 2048,
293
+ }
294
+
295
+ const valid2: BlobRef = {
296
+ $type: 'blob',
297
+ ref: validBlobCid,
298
+ mimeType: 'image/jpeg',
299
+ size: 1024,
300
+ }
301
+
302
+ const invalid: BlobRef = {
303
+ $type: 'blob',
304
+ ref: invalidBlobCid,
305
+ mimeType: 'image/jpeg',
306
+ size: 1024,
307
+ }
308
+
309
+ const legacy: LegacyBlobRef = {
310
+ cid: validBlobCid.toString(),
311
+ mimeType: 'image/gif',
312
+ }
313
+
314
+ const data: LexValue = {
315
+ name: 'example',
316
+ file: { deeply: { nested: { in: { object: { valid1 } } } } },
317
+ attachments: [valid2, invalid, legacy, { description: 'not a blob' }],
318
+ }
319
+
320
+ it('enumerates valid BlobRefs by default', () => {
321
+ const refs = Array.from(enumBlobRefs(data))
322
+ expect(refs).toHaveLength(2)
323
+ expect(refs.includes(valid1)).toBe(true)
324
+ expect(refs.includes(valid2)).toBe(true)
325
+ })
326
+
327
+ describe('strict support', () => {
328
+ it('enumerates valid BlobRefs in strict mode', () => {
329
+ const refs = Array.from(enumBlobRefs(data, { strict: true }))
330
+ expect(refs).toHaveLength(2)
331
+ expect(refs.includes(valid1)).toBe(true)
332
+ expect(refs.includes(valid2)).toBe(true)
333
+ })
334
+
335
+ it('enumerates all BlobRefs in non-strict mode', () => {
336
+ const refs = Array.from(enumBlobRefs(data, { strict: false }))
337
+ expect(refs).toHaveLength(3)
338
+ expect(refs.includes(valid1)).toBe(true)
339
+ expect(refs.includes(valid2)).toBe(true)
340
+ expect(refs.includes(invalid)).toBe(true)
341
+ })
342
+ })
343
+
344
+ describe('legacy support', () => {
345
+ it('returns LegacyBlobRefs when legacy option is enabled', () => {
346
+ const refs = Array.from(enumBlobRefs(data, { allowLegacy: true }))
347
+ expect(refs).toHaveLength(3)
348
+ expect(refs.includes(valid1)).toBe(true)
349
+ expect(refs.includes(valid2)).toBe(true)
350
+ expect(refs.includes(legacy)).toBe(true)
351
+ })
352
+ })
353
+
354
+ describe('safety', () => {
355
+ it('handles cyclic structures without infinite loops', () => {
356
+ const cyclicArray: LexArray = [valid2]
357
+ const cyclicObject: LexMap = {
358
+ name: 'cyclic',
359
+ blob: valid1,
360
+ }
361
+
362
+ // Creating a cycle
363
+ cyclicArray.push(cyclicArray)
364
+ cyclicObject.self = cyclicObject
365
+
366
+ const refs = Array.from(
367
+ enumBlobRefs({
368
+ cyclicObject,
369
+ cyclicArray,
370
+ }),
371
+ )
372
+ expect(refs).toHaveLength(2)
373
+ expect(refs.includes(valid1)).toBe(true)
374
+ expect(refs.includes(valid2)).toBe(true)
375
+ })
376
+
377
+ it('handles deep structures without exceeding call stack', () => {
378
+ // Creating a deep nested structure
379
+ let deepData: LexMap = { blob: valid1 }
380
+ for (let i = 0; i < 100_000; i++) {
381
+ deepData = { nested: deepData }
382
+ }
383
+
384
+ const refs = Array.from(enumBlobRefs(deepData))
385
+ expect(refs).toHaveLength(1)
386
+ expect(refs[0]).toBe(valid1)
387
+ })
388
+ })
389
+ })
package/src/blob.ts CHANGED
@@ -5,7 +5,8 @@ import {
5
5
  asCid,
6
6
  parseCid,
7
7
  } from './cid.js'
8
- import { isPlainObject } from './object.js'
8
+ import { LexValue } from './lex.js'
9
+ import { isPlainObject, isPlainProto } from './object.js'
9
10
 
10
11
  /**
11
12
  * @note {@link BlobRef} is just a {@link LexMap} with a specific shape.
@@ -17,9 +18,20 @@ export type BlobRef = {
17
18
  size: number
18
19
  }
19
20
 
21
+ export type BlobRefValidationOptions = {
22
+ /**
23
+ * If `false`, skips strict CID validation of {@link BlobRef.ref}, allowing
24
+ * any valid CID. Otherwise, validates that the CID is v1, uses the raw
25
+ * multicodec, and has a sha256 multihash.
26
+ *
27
+ * @defaults to `true`
28
+ */
29
+ strict?: boolean
30
+ }
31
+
20
32
  export function isBlobRef(
21
33
  input: unknown,
22
- options?: { strict?: boolean },
34
+ options?: BlobRefValidationOptions,
23
35
  ): input is BlobRef {
24
36
  if (!isPlainObject(input)) {
25
37
  return false
@@ -35,12 +47,7 @@ export function isBlobRef(
35
47
  return false
36
48
  }
37
49
 
38
- if (
39
- typeof size !== 'number' ||
40
- size < 0 ||
41
- !Number.isInteger(size) ||
42
- !Number.isSafeInteger(size)
43
- ) {
50
+ if (typeof size !== 'number' || size < 0 || !Number.isSafeInteger(size)) {
44
51
  return false
45
52
  }
46
53
 
@@ -64,7 +71,7 @@ export function isBlobRef(
64
71
  return false
65
72
  }
66
73
 
67
- if (options?.strict) {
74
+ if (options?.strict !== false) {
68
75
  if (cid.version !== 1) {
69
76
  return false
70
77
  }
@@ -97,7 +104,7 @@ export function isLegacyBlobRef(input: unknown): input is LegacyBlobRef {
97
104
  return false
98
105
  }
99
106
 
100
- if (typeof mimeType !== 'string') {
107
+ if (typeof mimeType !== 'string' || mimeType.length === 0) {
101
108
  return false
102
109
  }
103
110
 
@@ -115,3 +122,68 @@ export function isLegacyBlobRef(input: unknown): input is LegacyBlobRef {
115
122
 
116
123
  return true
117
124
  }
125
+
126
+ export type EnumBlobRefsOptions = BlobRefValidationOptions & {
127
+ /**
128
+ * @defaults to `false`
129
+ */
130
+ allowLegacy?: boolean
131
+ }
132
+
133
+ /**
134
+ * Enumerates all {@link BlobRef}s (and, optionally, {@link LegacyBlobRef}s)
135
+ * found within a {@link LexValue}.
136
+ */
137
+ export function enumBlobRefs(
138
+ input: LexValue,
139
+ options: EnumBlobRefsOptions & { allowLegacy: true },
140
+ ): Generator<BlobRef | LegacyBlobRef, void, unknown>
141
+ export function enumBlobRefs(
142
+ input: LexValue,
143
+ options?: EnumBlobRefsOptions & { allowLegacy?: false },
144
+ ): Generator<BlobRef, void, unknown>
145
+ export function enumBlobRefs(
146
+ input: LexValue,
147
+ options?: EnumBlobRefsOptions,
148
+ ): Generator<BlobRef | LegacyBlobRef, void, unknown>
149
+ export function* enumBlobRefs(
150
+ input: LexValue,
151
+ options?: EnumBlobRefsOptions,
152
+ ): Generator<BlobRef | LegacyBlobRef, void, unknown> {
153
+ const includeLegacy = options?.allowLegacy === true
154
+
155
+ // Using a stack to avoid recursion depth issues.
156
+ const stack: LexValue[] = [input]
157
+
158
+ // Since we are using a stack, we could end-up in an infinite loop with cyclic
159
+ // structures. Cyclic structures are not valid LexValues and should, thus,
160
+ // never occur, but let's be safe.
161
+ const visited = new Set<object>()
162
+
163
+ do {
164
+ const value = stack.pop()!
165
+
166
+ if (value != null && typeof value === 'object') {
167
+ if (Array.isArray(value)) {
168
+ if (visited.has(value)) continue
169
+ visited.add(value)
170
+ stack.push(...value)
171
+ } else if (isPlainProto(value)) {
172
+ if (visited.has(value)) continue
173
+ visited.add(value)
174
+ if (isBlobRef(value, options)) {
175
+ yield value
176
+ } else if (includeLegacy && isLegacyBlobRef(value)) {
177
+ yield value
178
+ } else {
179
+ for (const v of Object.values(value)) {
180
+ if (v != null) stack.push(v)
181
+ }
182
+ }
183
+ }
184
+ }
185
+ } while (stack.length > 0)
186
+
187
+ // Optimization: ease GC's work
188
+ visited.clear()
189
+ }