@atproto/lex-data 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/dist/blob.d.ts +16 -0
  2. package/dist/blob.d.ts.map +1 -0
  3. package/dist/blob.js +73 -0
  4. package/dist/blob.js.map +1 -0
  5. package/dist/cid.d.ts +12 -0
  6. package/dist/cid.d.ts.map +1 -0
  7. package/dist/cid.js +47 -0
  8. package/dist/cid.js.map +1 -0
  9. package/dist/index.d.ts +9 -0
  10. package/dist/index.d.ts.map +1 -0
  11. package/dist/index.js +12 -0
  12. package/dist/index.js.map +1 -0
  13. package/dist/language.d.ts +18 -0
  14. package/dist/language.d.ts.map +1 -0
  15. package/dist/language.js +30 -0
  16. package/dist/language.js.map +1 -0
  17. package/dist/lex-equals.d.ts +3 -0
  18. package/dist/lex-equals.d.ts.map +1 -0
  19. package/dist/lex-equals.js +78 -0
  20. package/dist/lex-equals.js.map +1 -0
  21. package/dist/lex.d.ts +18 -0
  22. package/dist/lex.d.ts.map +1 -0
  23. package/dist/lex.js +83 -0
  24. package/dist/lex.js.map +1 -0
  25. package/dist/lib/nodejs-buffer.d.ts +15 -0
  26. package/dist/lib/nodejs-buffer.d.ts.map +1 -0
  27. package/dist/lib/nodejs-buffer.js +12 -0
  28. package/dist/lib/nodejs-buffer.js.map +1 -0
  29. package/dist/object.d.ts +3 -0
  30. package/dist/object.d.ts.map +1 -0
  31. package/dist/object.js +22 -0
  32. package/dist/object.js.map +1 -0
  33. package/dist/uint8array-from-base64.d.ts +16 -0
  34. package/dist/uint8array-from-base64.d.ts.map +1 -0
  35. package/dist/uint8array-from-base64.js +60 -0
  36. package/dist/uint8array-from-base64.js.map +1 -0
  37. package/dist/uint8array-to-base64.d.ts +16 -0
  38. package/dist/uint8array-to-base64.d.ts.map +1 -0
  39. package/dist/uint8array-to-base64.js +30 -0
  40. package/dist/uint8array-to-base64.js.map +1 -0
  41. package/dist/uint8array.d.ts +21 -0
  42. package/dist/uint8array.d.ts.map +1 -0
  43. package/dist/uint8array.js +57 -0
  44. package/dist/uint8array.js.map +1 -0
  45. package/dist/utf8-grapheme-len.d.ts +3 -0
  46. package/dist/utf8-grapheme-len.d.ts.map +1 -0
  47. package/dist/utf8-grapheme-len.js +23 -0
  48. package/dist/utf8-grapheme-len.js.map +1 -0
  49. package/dist/utf8-len.d.ts +3 -0
  50. package/dist/utf8-len.d.ts.map +1 -0
  51. package/dist/utf8-len.js +50 -0
  52. package/dist/utf8-len.js.map +1 -0
  53. package/dist/utf8.d.ts +3 -0
  54. package/dist/utf8.d.ts.map +1 -0
  55. package/dist/utf8.js +12 -0
  56. package/dist/utf8.js.map +1 -0
  57. package/package.json +51 -0
  58. package/src/blob.test.ts +186 -0
  59. package/src/blob.ts +99 -0
  60. package/src/cid.ts +50 -0
  61. package/src/index.ts +8 -0
  62. package/src/language.test.ts +87 -0
  63. package/src/language.ts +39 -0
  64. package/src/lex-equals.test.ts +153 -0
  65. package/src/lex-equals.ts +85 -0
  66. package/src/lex.test.ts +124 -0
  67. package/src/lex.ts +78 -0
  68. package/src/lib/nodejs-buffer.ts +27 -0
  69. package/src/object.test.ts +78 -0
  70. package/src/object.ts +21 -0
  71. package/src/uint8array-from-base64.test.ts +113 -0
  72. package/src/uint8array-from-base64.ts +85 -0
  73. package/src/uint8array-to-base64.ts +45 -0
  74. package/src/uint8array.ts +78 -0
  75. package/src/utf8-grapheme-len.test.ts +37 -0
  76. package/src/utf8-grapheme-len.ts +21 -0
  77. package/src/utf8-len.test.ts +31 -0
  78. package/src/utf8-len.ts +51 -0
  79. package/src/utf8.ts +14 -0
  80. package/tsconfig.build.json +12 -0
  81. package/tsconfig.json +7 -0
  82. package/tsconfig.tests.json +9 -0
package/src/lex.ts ADDED
@@ -0,0 +1,78 @@
1
+ import { CID, isCid } from './cid.js'
2
+ import { isPlainObject } from './object.js'
3
+
4
+ // @NOTE BlobRef is just a special case of LexMap.
5
+
6
+ export type LexScalar = number | string | boolean | null | CID | Uint8Array
7
+ export type LexValue = LexScalar | LexValue[] | { [_ in string]?: LexValue }
8
+ export type LexMap = { [_ in string]?: LexValue }
9
+ export type LexArray = LexValue[]
10
+
11
+ export function isLexMap(value: unknown): value is LexMap {
12
+ if (!isPlainObject(value)) return false
13
+ for (const key in value) {
14
+ if (!isLexValue(value[key])) return false
15
+ }
16
+ return true
17
+ }
18
+
19
+ export function isLexArray(value: unknown): value is LexArray {
20
+ if (!Array.isArray(value)) return false
21
+ for (let i = 0; i < value.length; i++) {
22
+ if (!isLexValue(value[i])) return false
23
+ }
24
+ return true
25
+ }
26
+
27
+ export function isLexScalar(value: unknown): value is LexScalar {
28
+ switch (typeof value) {
29
+ case 'object':
30
+ if (value === null) return true
31
+ return value instanceof Uint8Array || isCid(value)
32
+ case 'string':
33
+ case 'boolean':
34
+ return true
35
+ case 'number':
36
+ if (Number.isInteger(value)) return true
37
+ throw new TypeError(`Invalid Lex value: ${value}`)
38
+ default:
39
+ throw new TypeError(`Invalid Lex value: ${typeof value}`)
40
+ }
41
+ }
42
+
43
+ export function isLexValue(value: unknown): value is LexValue {
44
+ switch (typeof value) {
45
+ case 'number':
46
+ if (!Number.isInteger(value)) return false
47
+ // fallthrough
48
+ case 'string':
49
+ case 'boolean':
50
+ return true
51
+ case 'object':
52
+ if (value === null) return true
53
+ if (Array.isArray(value)) {
54
+ for (let i = 0; i < value.length; i++) {
55
+ if (!isLexValue(value[i])) return false
56
+ }
57
+ return true
58
+ }
59
+ if (isPlainObject(value)) {
60
+ for (const key in value) {
61
+ if (!isLexValue(value[key])) return false
62
+ }
63
+ return true
64
+ }
65
+ if (value instanceof Uint8Array) return true
66
+ if (isCid(value)) return true
67
+ // fallthrough
68
+ default:
69
+ return false
70
+ }
71
+ }
72
+
73
+ export type TypedLexMap = LexMap & { $type: string }
74
+ export function isTypedLexMap(value: LexValue): value is TypedLexMap {
75
+ return (
76
+ isLexMap(value) && typeof value.$type === 'string' && value.$type.length > 0
77
+ )
78
+ }
@@ -0,0 +1,27 @@
1
+ type Encoding = 'utf8' | 'base64' | 'base64url'
2
+
3
+ interface NodeJSBuffer<TArrayBuffer extends ArrayBufferLike = ArrayBufferLike>
4
+ extends Uint8Array<TArrayBuffer> {
5
+ byteLength: number
6
+ toString(encoding?: Encoding): string
7
+ }
8
+
9
+ interface NodeJSBufferConstructor {
10
+ new (input: string, encoding?: Encoding): NodeJSBuffer
11
+ from(
12
+ input: Uint8Array | ArrayBuffer | ArrayBufferView,
13
+ ): NodeJSBuffer<ArrayBuffer>
14
+ from(input: string, encoding?: Encoding): NodeJSBuffer<ArrayBuffer>
15
+ byteLength(input: string, encoding?: Encoding): number
16
+ prototype: NodeJSBuffer
17
+ }
18
+
19
+ // Avoids a direct reference to Node.js Buffer, which might not exist in some
20
+ // environments (e.g. browsers, Deno, Bun) to prevent bundlers from trying to
21
+ // include polyfills.
22
+ const BUFFER = /*#__PURE__*/ (() => 'Bu' + 'f'.repeat(2) + 'er')() as 'Buffer'
23
+ export const NodeJSBuffer: NodeJSBufferConstructor | null =
24
+ (globalThis as any)?.[BUFFER]?.prototype instanceof Uint8Array &&
25
+ 'byteLength' in (globalThis as any)[BUFFER]
26
+ ? ((globalThis as any)[BUFFER] as NodeJSBufferConstructor)
27
+ : null
@@ -0,0 +1,78 @@
1
+ import { CID } from './cid.js'
2
+ import { isObject, isPlainObject } from './object.js'
3
+ describe('isObject', () => {
4
+ it('returns true for plain objects', () => {
5
+ expect(isObject({})).toBe(true)
6
+ expect(isObject({ a: 1 })).toBe(true)
7
+ })
8
+
9
+ it('returns true for CIDs', () => {
10
+ const cid = CID.parse(
11
+ 'bafyreidfayvfuwqa7qlnopdjiqrxzs6blmoeu4rujcjtnci5beludirz2a',
12
+ )
13
+ expect(isObject(cid)).toBe(true)
14
+ })
15
+
16
+ it('returns true for class instances', () => {
17
+ class MyClass {}
18
+ expect(isObject(new MyClass())).toBe(true)
19
+ })
20
+
21
+ it('returns true for arrays', () => {
22
+ expect(isObject([])).toBe(true)
23
+ expect(isObject([1, 2, 3])).toBe(true)
24
+ })
25
+
26
+ it('returns false for null', () => {
27
+ expect(isObject(null)).toBe(false)
28
+ })
29
+
30
+ it('returns false for non-objects', () => {
31
+ expect(isObject(42)).toBe(false)
32
+ expect(isObject('string')).toBe(false)
33
+ expect(isObject(undefined)).toBe(false)
34
+ expect(isObject(true)).toBe(false)
35
+ })
36
+ })
37
+
38
+ describe('isPlainObject', () => {
39
+ it('returns true for plain objects', () => {
40
+ expect(isPlainObject({})).toBe(true)
41
+ expect(isPlainObject({ a: 1 })).toBe(true)
42
+ })
43
+
44
+ it('returns true for objects with null prototype', () => {
45
+ const obj = Object.create(null)
46
+ obj.a = 1
47
+ expect(isPlainObject(obj)).toBe(true)
48
+ expect(isPlainObject({ __proto__: null, foo: 'bar' })).toBe(true)
49
+ })
50
+
51
+ it('returns false for class instances', () => {
52
+ class MyClass {}
53
+ expect(isPlainObject(new MyClass())).toBe(false)
54
+ })
55
+
56
+ it('returns false for CIDs', () => {
57
+ const cid = CID.parse(
58
+ 'bafyreidfayvfuwqa7qlnopdjiqrxzs6blmoeu4rujcjtnci5beludirz2a',
59
+ )
60
+ expect(isPlainObject(cid)).toBe(false)
61
+ })
62
+
63
+ it('returns false for arrays', () => {
64
+ expect(isPlainObject([])).toBe(false)
65
+ expect(isPlainObject([1, 2, 3])).toBe(false)
66
+ })
67
+
68
+ it('returns false for null', () => {
69
+ expect(isPlainObject(null)).toBe(false)
70
+ })
71
+
72
+ it('returns false for non-objects', () => {
73
+ expect(isPlainObject(42)).toBe(false)
74
+ expect(isPlainObject('string')).toBe(false)
75
+ expect(isPlainObject(undefined)).toBe(false)
76
+ expect(isPlainObject(true)).toBe(false)
77
+ })
78
+ })
package/src/object.ts ADDED
@@ -0,0 +1,21 @@
1
+ export function isObject(input: unknown): input is object {
2
+ return input != null && typeof input === 'object'
3
+ }
4
+
5
+ const ObjectProto = Object.prototype
6
+ const ObjectToString = Object.prototype.toString
7
+
8
+ export function isPlainObject(
9
+ input: unknown,
10
+ ): input is object & Record<string, unknown> {
11
+ if (!input || typeof input !== 'object') return false
12
+ const proto = Object.getPrototypeOf(input)
13
+ if (proto === null) return true
14
+ return (
15
+ (proto === ObjectProto ||
16
+ // Needed to support NodeJS's `runInNewContext` which produces objects
17
+ // with a different prototype
18
+ Object.getPrototypeOf(proto) === null) &&
19
+ ObjectToString.call(input) === '[object Object]'
20
+ )
21
+ }
@@ -0,0 +1,113 @@
1
+ import 'core-js/modules/es.uint8-array.from-base64.js'
2
+ import 'core-js/modules/es.uint8-array.to-base64.js'
3
+ import assert from 'node:assert'
4
+ import {
5
+ fromBase64Native,
6
+ fromBase64Node,
7
+ fromBase64Ponyfill,
8
+ } from './uint8array-from-base64.js'
9
+ import { ui8Equals } from './uint8array.js'
10
+
11
+ // @NOTE This test suite relies on the NodeJS Buffer implementation to generate
12
+ // valid base64 strings for testing.
13
+
14
+ // @NOTE b64 needs a test suite because fromBase64 implementations differ in
15
+ // their behavior when encountering invalid base64 strings. This is not the case
16
+ // for toBase64, which is straightforward and has no edge cases.
17
+
18
+ for (const fromBase64 of [
19
+ fromBase64Native,
20
+ fromBase64Node,
21
+ fromBase64Ponyfill,
22
+ ] as const) {
23
+ // Tests should run in NodeJS where implementations are either available or
24
+ // polyfilled (see core-js imports above).
25
+ assert(fromBase64 !== null, 'fromBase64 implementation should not be null')
26
+
27
+ describe(fromBase64.name, () => {
28
+ describe('valid base64 strings', () => {
29
+ it('decodes empty string', () => {
30
+ const decoded = fromBase64('')
31
+ expect(decoded).toBeInstanceOf(Uint8Array)
32
+ expect(decoded.length).toBe(0)
33
+ })
34
+
35
+ it('decodes 10MB', () => {
36
+ const bytes = Buffer.allocUnsafe(10_000_000).fill('🐩')
37
+ const encoded = bytes.toString('base64')
38
+ const decoded = fromBase64(encoded)
39
+ expect(decoded).toBeInstanceOf(Uint8Array)
40
+ expect(ui8Equals(decoded, bytes)).toBe(true)
41
+ })
42
+
43
+ for (const string of [
44
+ '',
45
+ '\0\0',
46
+ '\0\0\0',
47
+ '\0\0\0\0',
48
+ '__',
49
+ 'é',
50
+ 'àç',
51
+ '\0éàç',
52
+ '```',
53
+ 'aaa',
54
+ 'Hello, World!',
55
+ '😀😃😄😁😆😅😂🤣😊😇',
56
+ '👩‍💻👨‍💻👩‍🔬👨‍🔬👩‍🚀👨‍🚀',
57
+ '🌍🌎🌏🌐🪐🌟✨⚡🔥💧',
58
+ ] as const) {
59
+ const buffer = Buffer.from(string, 'utf8')
60
+ const base64 = buffer.toString('base64')
61
+ const base64Unpadded = base64.replace(/=+$/, '')
62
+
63
+ it(`decodes ${JSON.stringify(string)}`, () => {
64
+ const decoded = fromBase64(base64)
65
+ expect(decoded).toBeInstanceOf(Uint8Array)
66
+ expect(ui8Equals(decoded, buffer)).toBe(true)
67
+ })
68
+
69
+ if (base64 !== base64Unpadded) {
70
+ it(`decodes ${JSON.stringify(string)} (unpadded)`, () => {
71
+ const decoded = fromBase64(base64Unpadded)
72
+ expect(decoded).toBeInstanceOf(Uint8Array)
73
+ expect(ui8Equals(decoded, buffer)).toBe(true)
74
+ })
75
+ }
76
+ }
77
+ })
78
+
79
+ describe('invalid base64 strings', () => {
80
+ for (const invalidB64 of [
81
+ 'çç',
82
+ 'é',
83
+ 'YWJjZGU$$$',
84
+ '@@@@',
85
+ 'abcd!',
86
+ 'ab=cd',
87
+ // "YWFh" is "aaa" in base64
88
+ 'YWFh' + 'é',
89
+ 'YWFh' + 'éé',
90
+ 'YWFh' + 'ééé',
91
+ 'YWFh' + 'éééé',
92
+ // Invalid padding
93
+ 'YWFh' + '=',
94
+ 'YWFh' + '==',
95
+ 'YWFh' + '===',
96
+ 'YWFh' + '====',
97
+ 'YWFh' + '=====',
98
+ 'YWFh' + '======',
99
+ // More invalid padding
100
+ // 'TWE=', // 'Ma'
101
+ 'TWE=' + '=',
102
+ 'TWE=' + '==',
103
+ // 'TQ==', // 'M'
104
+ 'TQ==' + '=',
105
+ 'TQ==' + '==',
106
+ ] as const) {
107
+ it(`throws on invalid base64 string "${invalidB64}"`, () => {
108
+ expect(() => fromBase64(invalidB64)).toThrow()
109
+ })
110
+ }
111
+ })
112
+ })
113
+ }
@@ -0,0 +1,85 @@
1
+ import { fromString } from 'uint8arrays/from-string'
2
+ import { NodeJSBuffer } from './lib/nodejs-buffer.js'
3
+
4
+ const Buffer = NodeJSBuffer
5
+
6
+ declare global {
7
+ interface Uint8ArrayConstructor {
8
+ /**
9
+ * @see {@link https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Uint8Array/fromBase64 Uint8Array.fromBase64()}
10
+ */
11
+ fromBase64?: (
12
+ b64: string,
13
+ options?: {
14
+ /** @default 'base64' */
15
+ alphabet?: 'base64' | 'base64url'
16
+ lastChunkHandling?: 'loose' | 'strict' | 'stop-before-partial'
17
+ },
18
+ ) => Uint8Array
19
+ }
20
+ }
21
+
22
+ export const fromBase64Native =
23
+ typeof Uint8Array.fromBase64 === 'function'
24
+ ? function fromBase64Native(b64: string): Uint8Array {
25
+ return Uint8Array.fromBase64!(b64, { lastChunkHandling: 'loose' })
26
+ }
27
+ : null
28
+
29
+ export const fromBase64Node = Buffer
30
+ ? function fromBase64Node(b64: string): Uint8Array {
31
+ const bytes = Buffer.from(b64, 'base64')
32
+ verifyBase64ForBytes(b64, bytes)
33
+ // Convert to Uint8Array because even though Buffer is a sub class of
34
+ // Uint8Array, it serializes differently to Uint8Array (e.g. in JSON) and
35
+ // results in unexpected behavior downstream (e.g. in tests)
36
+ return new Uint8Array(bytes.buffer, bytes.byteOffset, bytes.byteLength)
37
+ }
38
+ : null
39
+
40
+ export function fromBase64Ponyfill(b64: string): Uint8Array {
41
+ const bytes = fromString(b64, 'base64')
42
+ verifyBase64ForBytes(b64, bytes)
43
+ return bytes
44
+ }
45
+
46
+ // @NOTE NodeJS will silently stop decoding at the first invalid character,
47
+ // while "uint8arrays/from-string" will not validate that the padding is
48
+ // correct. The following function performs basic validation to ensure that the
49
+ // input was a valid base64 string. The availability of the "bytes" allows
50
+ // to perform checks with O[1] complexity.
51
+ function verifyBase64ForBytes(b64: string, bytes: Uint8Array): void {
52
+ const paddingCount = b64.endsWith('==') ? 2 : b64.endsWith('=') ? 1 : 0
53
+ const trimmedLength = b64.length - paddingCount
54
+ const expectedByteLength = Math.floor((trimmedLength * 3) / 4)
55
+ if (bytes.length !== expectedByteLength) {
56
+ throw new Error('Invalid base64 string')
57
+ }
58
+
59
+ const expectedB64Length = (bytes.length / 3) * 4
60
+ const expectedPaddingCount =
61
+ expectedB64Length % 4 === 0 ? 0 : 4 - (expectedB64Length % 4)
62
+ const expectedFullB64Length = expectedB64Length + expectedPaddingCount
63
+ if (b64.length > expectedFullB64Length) {
64
+ throw new Error('Invalid base64 string')
65
+ }
66
+
67
+ // The previous might still allow false positive if only the last few
68
+ // chars are invalid.
69
+ for (
70
+ let i = Math.ceil(expectedB64Length);
71
+ i < b64.length - paddingCount;
72
+ i++
73
+ ) {
74
+ const code = b64.charCodeAt(i)
75
+ if (
76
+ !(code >= 65 && code <= 90) && // A-Z
77
+ !(code >= 97 && code <= 122) && // a-z
78
+ !(code >= 48 && code <= 57) && // 0-9
79
+ code !== 43 && // +
80
+ code !== 47 // /
81
+ ) {
82
+ throw new Error('Invalid base64 string')
83
+ }
84
+ }
85
+ }
@@ -0,0 +1,45 @@
1
+ import { toString } from 'uint8arrays/to-string'
2
+ import { NodeJSBuffer } from './lib/nodejs-buffer.js'
3
+
4
+ const Buffer = NodeJSBuffer
5
+
6
+ declare global {
7
+ interface Uint8Array {
8
+ /**
9
+ * @see {@link https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Uint8Array/toBase64 Uint8Array.prototype.toBase64()}
10
+ */
11
+ toBase64?: (options?: {
12
+ /** @default 'base64' */
13
+ alphabet?: 'base64' | 'base64url'
14
+ omitPadding?: boolean
15
+ }) => string
16
+ }
17
+ }
18
+
19
+ export const toBase64Native =
20
+ typeof Uint8Array.prototype.toBase64 === 'function'
21
+ ? function toBase64Native(bytes: Uint8Array): string {
22
+ return bytes.toBase64!({ omitPadding: true })
23
+ }
24
+ : null
25
+
26
+ export const toBase64Node = Buffer
27
+ ? function toBase64Node(bytes: Uint8Array): string {
28
+ const b64 = (
29
+ bytes instanceof Buffer ? bytes : Buffer.from(bytes)
30
+ ).toString('base64')
31
+ // @NOTE We strip padding for strict compatibility with
32
+ // uint8arrays.toString behavior. Tests failing because of the presence of
33
+ // padding are not really synonymous with an actual error and we might
34
+ // (should?) actually want to keep the padding at some point.
35
+ return b64.charCodeAt(b64.length - 1) === /* '=' */ 0x3d
36
+ ? b64.charCodeAt(b64.length - 2) === /* '=' */ 0x3d
37
+ ? b64.slice(0, -2) // '=='
38
+ : b64.slice(0, -1) // '='
39
+ : b64
40
+ }
41
+ : null
42
+
43
+ export function toBase64Ponyfill(bytes: Uint8Array): string {
44
+ return toString(bytes, 'base64')
45
+ }
@@ -0,0 +1,78 @@
1
+ import {
2
+ fromBase64Native,
3
+ fromBase64Node,
4
+ fromBase64Ponyfill,
5
+ } from './uint8array-from-base64.js'
6
+ import {
7
+ toBase64Native,
8
+ toBase64Node,
9
+ toBase64Ponyfill,
10
+ } from './uint8array-to-base64.js'
11
+
12
+ // @TODO drop dependency on uint8arrays package once Uint8Array.fromBase64 /
13
+ // Uint8Array.prototype.toBase64 is widely supported, and mark fromBase64 /
14
+ // toBase64 as deprecated. We can also drop NodeJS specific implementations
15
+ // once NodeJS <24 is no longer supported.
16
+
17
+ /**
18
+ * Encodes a Uint8Array into a base64 string.
19
+ *
20
+ * @returns The base64 encoded string
21
+ */
22
+ export const toBase64: (bytes: Uint8Array) => string =
23
+ toBase64Native ?? toBase64Node ?? toBase64Ponyfill
24
+
25
+ /**
26
+ * Decodes a base64 string into a Uint8Array.
27
+ *
28
+ * @returns The decoded {@link Uint8Array}
29
+ * @throws If the input is not a valid base64 string
30
+ */
31
+ export const fromBase64: (b64: string) => Uint8Array =
32
+ fromBase64Native ?? fromBase64Node ?? fromBase64Ponyfill
33
+
34
+ if (toBase64 === toBase64Ponyfill || fromBase64 === fromBase64Ponyfill) {
35
+ /*#__PURE__*/
36
+ console.warn(
37
+ '[@atproto/lex-data]: Uint8Array.fromBase64 / Uint8Array.prototype.toBase64 not available in this environment. Falling back to ponyfill implementation.',
38
+ )
39
+ }
40
+
41
+ /**
42
+ * Coerces various binary data representations into a Uint8Array.
43
+ *
44
+ * @return `undefined` if the input could not be coerced into a {@link Uint8Array}.
45
+ */
46
+ export function asUint8Array(input: unknown): Uint8Array | undefined {
47
+ if (input instanceof Uint8Array) {
48
+ return input
49
+ }
50
+
51
+ if (ArrayBuffer.isView(input)) {
52
+ return new Uint8Array(
53
+ input.buffer,
54
+ input.byteOffset,
55
+ input.byteLength / Uint8Array.BYTES_PER_ELEMENT,
56
+ )
57
+ }
58
+
59
+ if (input instanceof ArrayBuffer) {
60
+ return new Uint8Array(input)
61
+ }
62
+
63
+ return undefined
64
+ }
65
+
66
+ export function ui8Equals(a: Uint8Array, b: Uint8Array): boolean {
67
+ if (a.byteLength !== b.byteLength) {
68
+ return false
69
+ }
70
+
71
+ for (let i = 0; i < a.byteLength; i++) {
72
+ if (a[i] !== b[i]) {
73
+ return false
74
+ }
75
+ }
76
+
77
+ return true
78
+ }
@@ -0,0 +1,37 @@
1
+ import { graphemeLenNative, graphemeLenPonyfill } from './utf8-grapheme-len.js'
2
+
3
+ describe('graphemeLenSegmenter', () => {
4
+ it('computes grapheme length', () => {
5
+ expect(graphemeLenNative!('a')).toBe(1)
6
+ expect(graphemeLenNative!('~')).toBe(1)
7
+ expect(graphemeLenNative!('ö')).toBe(1)
8
+ expect(graphemeLenNative!('ñ')).toBe(1)
9
+ expect(graphemeLenNative!('©')).toBe(1)
10
+ expect(graphemeLenNative!('⽘')).toBe(1)
11
+ expect(graphemeLenNative!('☎')).toBe(1)
12
+ expect(graphemeLenNative!('𓋓')).toBe(1)
13
+ expect(graphemeLenNative!('😀')).toBe(1)
14
+ expect(graphemeLenNative!('👨‍👩‍👧‍👧')).toBe(1)
15
+ expect(graphemeLenNative!('a~öñ©⽘☎𓋓😀👨‍👩‍👧‍👧')).toBe(10)
16
+ // https://github.com/bluesky-social/atproto/issues/4321
17
+ expect(graphemeLenNative!('नमस्ते')).toBe(3)
18
+ })
19
+ })
20
+
21
+ describe('graphemeLenInternal', () => {
22
+ it('computes grapheme length', () => {
23
+ expect(graphemeLenPonyfill('a')).toBe(1)
24
+ expect(graphemeLenPonyfill('~')).toBe(1)
25
+ expect(graphemeLenPonyfill('ö')).toBe(1)
26
+ expect(graphemeLenPonyfill('ñ')).toBe(1)
27
+ expect(graphemeLenPonyfill('©')).toBe(1)
28
+ expect(graphemeLenPonyfill('⽘')).toBe(1)
29
+ expect(graphemeLenPonyfill('☎')).toBe(1)
30
+ expect(graphemeLenPonyfill('𓋓')).toBe(1)
31
+ expect(graphemeLenPonyfill('😀')).toBe(1)
32
+ expect(graphemeLenPonyfill('👨‍👩‍👧‍👧')).toBe(1)
33
+ expect(graphemeLenPonyfill('a~öñ©⽘☎𓋓😀👨‍👩‍👧‍👧')).toBe(10)
34
+ // https://github.com/bluesky-social/atproto/issues/4321
35
+ expect(graphemeLenPonyfill('नमस्ते')).toBe(3)
36
+ })
37
+ })
@@ -0,0 +1,21 @@
1
+ import { countGraphemes } from 'unicode-segmenter/grapheme'
2
+
3
+ // @TODO: Drop usage of "unicode-segmenter" package when Intl.Segmenter is
4
+ // widely supported.
5
+ // https://developer.mozilla.org/fr/docs/Web/JavaScript/Reference/Global_Objects/Intl/Segmenter
6
+ const segmenter =
7
+ 'Segmenter' in Intl && typeof Intl.Segmenter === 'function'
8
+ ? /*#__PURE__*/ new Intl.Segmenter()
9
+ : null
10
+
11
+ export const graphemeLenNative = segmenter
12
+ ? function graphemeLenNative(str: string): number {
13
+ let length = 0
14
+ for (const _ of segmenter.segment(str)) length++
15
+ return length
16
+ }
17
+ : null
18
+
19
+ export function graphemeLenPonyfill(str: string): number {
20
+ return countGraphemes(str)
21
+ }
@@ -0,0 +1,31 @@
1
+ import { utf8LenCompute, utf8LenNode } from './utf8-len.js'
2
+
3
+ describe('utf8LenNode', () => {
4
+ it('computes utf8 string length', () => {
5
+ expect(utf8LenNode!('a')).toBe(1)
6
+ expect(utf8LenNode!('~')).toBe(1)
7
+ expect(utf8LenNode!('ö')).toBe(2)
8
+ expect(utf8LenNode!('ñ')).toBe(2)
9
+ expect(utf8LenNode!('©')).toBe(2)
10
+ expect(utf8LenNode!('⽘')).toBe(3)
11
+ expect(utf8LenNode!('☎')).toBe(3)
12
+ expect(utf8LenNode!('𓋓')).toBe(4)
13
+ expect(utf8LenNode!('😀')).toBe(4)
14
+ expect(utf8LenNode!('👨‍👩‍👧‍👧')).toBe(25)
15
+ })
16
+ })
17
+
18
+ describe('utf8LenInternal', () => {
19
+ it('computes utf8 string length', () => {
20
+ expect(utf8LenCompute('a')).toBe(1)
21
+ expect(utf8LenCompute('~')).toBe(1)
22
+ expect(utf8LenCompute('ö')).toBe(2)
23
+ expect(utf8LenCompute('ñ')).toBe(2)
24
+ expect(utf8LenCompute('©')).toBe(2)
25
+ expect(utf8LenCompute('⽘')).toBe(3)
26
+ expect(utf8LenCompute('☎')).toBe(3)
27
+ expect(utf8LenCompute('𓋓')).toBe(4)
28
+ expect(utf8LenCompute('😀')).toBe(4)
29
+ expect(utf8LenCompute('👨‍👩‍👧‍👧')).toBe(25)
30
+ })
31
+ })
@@ -0,0 +1,51 @@
1
+ import { NodeJSBuffer } from './lib/nodejs-buffer.js'
2
+
3
+ // @NOTE This file is not meant to be exported directly. Instead, we re-export
4
+ // public functions from ./utf8.ts. The reason for this separation is that this
5
+ // file allows to test both the NodeJS-optimized and ponyfill implementations.
6
+
7
+ export const utf8LenNode = NodeJSBuffer
8
+ ? function utf8LenNode(string: string): number {
9
+ return NodeJSBuffer!.byteLength(string, 'utf8')
10
+ }
11
+ : null
12
+
13
+ export function utf8LenCompute(string: string): number {
14
+ // The code below is similar to TextEncoder's implementation of UTF-8
15
+ // encoding. However, using TextEncoder to get the byte length is slower
16
+ // as it requires allocating a new Uint8Array and copying data:
17
+
18
+ // return new TextEncoder().encode(string).byteLength
19
+
20
+ // The base length is the string length (all ASCII)
21
+ let len = string.length
22
+ let code: number
23
+
24
+ // The loop calculates the number of additional bytes needed for
25
+ // non-ASCII characters
26
+ for (let i = 0; i < string.length; i += 1) {
27
+ code = string.charCodeAt(i)
28
+
29
+ if (code <= 0x7f) {
30
+ // ASCII, 1 byte
31
+ } else if (code <= 0x7ff) {
32
+ // 2 bytes char
33
+ len += 1
34
+ } else {
35
+ // 3 bytes char
36
+ len += 2
37
+ // If the current char is a high surrogate, and the next char is a low
38
+ // surrogate, skip the next char as the total is a 4 bytes char
39
+ // (represented as a surrogate pair in UTF-16) and was already accounted
40
+ // for.
41
+ if (code >= 0xd800 && code <= 0xdbff) {
42
+ code = string.charCodeAt(i + 1)
43
+ if (code >= 0xdc00 && code <= 0xdfff) {
44
+ i++
45
+ }
46
+ }
47
+ }
48
+ }
49
+
50
+ return len
51
+ }
package/src/utf8.ts ADDED
@@ -0,0 +1,14 @@
1
+ import { graphemeLenNative, graphemeLenPonyfill } from './utf8-grapheme-len.js'
2
+ import { utf8LenCompute, utf8LenNode } from './utf8-len.js'
3
+
4
+ export const graphemeLen: (str: string) => number =
5
+ graphemeLenNative ?? graphemeLenPonyfill
6
+
7
+ if (graphemeLen === graphemeLenPonyfill) {
8
+ /*#__PURE__*/
9
+ console.warn(
10
+ '[@atproto/lex-data]: Intl.Segmenter is not available in this environment. Falling back to ponyfill implementation.',
11
+ )
12
+ }
13
+
14
+ export const utf8Len: (string: string) => number = utf8LenNode ?? utf8LenCompute