@atproto/lex-data 0.0.10 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/dist/blob.d.ts +143 -6
- package/dist/blob.d.ts.map +1 -1
- package/dist/blob.js +23 -0
- package/dist/blob.js.map +1 -1
- package/dist/cid.d.ts +211 -27
- package/dist/cid.d.ts.map +1 -1
- package/dist/cid.js +115 -19
- package/dist/cid.js.map +1 -1
- package/dist/lex-equals.d.ts +38 -0
- package/dist/lex-equals.d.ts.map +1 -1
- package/dist/lex-equals.js +38 -0
- package/dist/lex-equals.js.map +1 -1
- package/dist/lex-error.d.ts +80 -1
- package/dist/lex-error.d.ts.map +1 -1
- package/dist/lex-error.js +49 -1
- package/dist/lex-error.js.map +1 -1
- package/dist/lex.d.ts +184 -2
- package/dist/lex.d.ts.map +1 -1
- package/dist/lex.js +99 -0
- package/dist/lex.js.map +1 -1
- package/dist/object.d.ts +54 -4
- package/dist/object.d.ts.map +1 -1
- package/dist/object.js +54 -4
- package/dist/object.js.map +1 -1
- package/dist/uint8array.d.ts +96 -4
- package/dist/uint8array.d.ts.map +1 -1
- package/dist/uint8array.js +96 -4
- package/dist/uint8array.js.map +1 -1
- package/dist/utf8.d.ts +77 -0
- package/dist/utf8.d.ts.map +1 -1
- package/dist/utf8.js +77 -0
- package/dist/utf8.js.map +1 -1
- package/package.json +2 -2
- package/src/blob.ts +143 -6
- package/src/cid-implementation.test.ts +7 -13
- package/src/cid.test.ts +14 -14
- package/src/cid.ts +228 -52
- package/src/lex-equals.ts +38 -0
- package/src/lex-error.ts +80 -1
- package/src/lex.ts +187 -1
- package/src/object.ts +54 -4
- package/src/uint8array.ts +96 -4
- package/src/utf8.ts +77 -0
package/src/lex.ts
CHANGED
|
@@ -1,19 +1,142 @@
|
|
|
1
1
|
import { Cid, isCid } from './cid.js'
|
|
2
2
|
import { isPlainObject, isPlainProto } from './object.js'
|
|
3
3
|
|
|
4
|
+
/**
|
|
5
|
+
* Primitive values in the Lexicon data model.
|
|
6
|
+
*
|
|
7
|
+
* Represents the basic scalar types that can appear in AT Protocol data:
|
|
8
|
+
* - `number` - Integer values only (no floats)
|
|
9
|
+
* - `string` - UTF-8 text
|
|
10
|
+
* - `boolean` - true or false
|
|
11
|
+
* - `null` - Explicit null value
|
|
12
|
+
* - `Cid` - Content Identifier (link by hash)
|
|
13
|
+
* - `Uint8Array` - Binary data (bytes)
|
|
14
|
+
*
|
|
15
|
+
* @see {@link LexValue} for the complete recursive value type
|
|
16
|
+
*/
|
|
4
17
|
export type LexScalar = number | string | boolean | null | Cid | Uint8Array
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Any valid Lexicon value (recursive type).
|
|
21
|
+
*
|
|
22
|
+
* This is the union of all types that can appear in AT Protocol Lexicon data:
|
|
23
|
+
* - {@link LexScalar} - Primitive values (number, string, boolean, null, Cid, Uint8Array)
|
|
24
|
+
* - `LexValue[]` - Arrays of LexValues
|
|
25
|
+
* - `{ [key: string]?: LexValue }` - Objects with string keys and LexValue values
|
|
26
|
+
*
|
|
27
|
+
* @example
|
|
28
|
+
* ```typescript
|
|
29
|
+
* import type { LexValue } from '@atproto/lex'
|
|
30
|
+
*
|
|
31
|
+
* const scalar: LexValue = 'hello'
|
|
32
|
+
* const array: LexValue = [1, 2, 3]
|
|
33
|
+
* const object: LexValue = { name: 'Alice', age: 30 }
|
|
34
|
+
* ```
|
|
35
|
+
*
|
|
36
|
+
* @see {@link LexScalar} for primitive value types
|
|
37
|
+
* @see {@link LexMap} for object types
|
|
38
|
+
* @see {@link LexArray} for array types
|
|
39
|
+
*/
|
|
5
40
|
export type LexValue = LexScalar | LexValue[] | { [_ in string]?: LexValue }
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Object with string keys and LexValue values.
|
|
44
|
+
*
|
|
45
|
+
* Represents a plain object in the Lexicon data model where all values
|
|
46
|
+
* must be valid {@link LexValue} types.
|
|
47
|
+
*
|
|
48
|
+
* @example
|
|
49
|
+
* ```typescript
|
|
50
|
+
* import type { LexMap } from '@atproto/lex'
|
|
51
|
+
*
|
|
52
|
+
* const user: LexMap = {
|
|
53
|
+
* name: 'Alice',
|
|
54
|
+
* age: 30,
|
|
55
|
+
* tags: ['admin', 'user']
|
|
56
|
+
* }
|
|
57
|
+
* ```
|
|
58
|
+
*
|
|
59
|
+
* @see {@link TypedLexMap} for objects with a required `$type` property
|
|
60
|
+
*/
|
|
6
61
|
export type LexMap = { [_ in string]?: LexValue }
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Array of {@link LexValue} elements.
|
|
65
|
+
*
|
|
66
|
+
* @example
|
|
67
|
+
* ```typescript
|
|
68
|
+
* import type { LexArray } from '@atproto/lex'
|
|
69
|
+
*
|
|
70
|
+
* const items: LexArray = [1, 'two', { three: 3 }]
|
|
71
|
+
* ```
|
|
72
|
+
*/
|
|
7
73
|
export type LexArray = LexValue[]
|
|
8
74
|
|
|
75
|
+
/**
|
|
76
|
+
* Type guard to check if a value is a valid {@link LexMap}.
|
|
77
|
+
*
|
|
78
|
+
* Returns true if the value is a plain object where all values are valid
|
|
79
|
+
* {@link LexValue} types.
|
|
80
|
+
*
|
|
81
|
+
* @param value - The value to check
|
|
82
|
+
* @returns `true` if the value is a valid LexMap
|
|
83
|
+
*
|
|
84
|
+
* @example
|
|
85
|
+
* ```typescript
|
|
86
|
+
* import { isLexMap } from '@atproto/lex'
|
|
87
|
+
*
|
|
88
|
+
* if (isLexMap(data)) {
|
|
89
|
+
* // data is narrowed to LexMap
|
|
90
|
+
* console.log(Object.keys(data))
|
|
91
|
+
* }
|
|
92
|
+
* ```
|
|
93
|
+
*/
|
|
9
94
|
export function isLexMap(value: unknown): value is LexMap {
|
|
10
95
|
return isPlainObject(value) && Object.values(value).every(isLexValue)
|
|
11
96
|
}
|
|
12
97
|
|
|
98
|
+
/**
|
|
99
|
+
* Type guard to check if a value is a valid {@link LexArray}.
|
|
100
|
+
*
|
|
101
|
+
* Returns true if the value is an array where all elements are valid
|
|
102
|
+
* {@link LexValue} types.
|
|
103
|
+
*
|
|
104
|
+
* @param value - The value to check
|
|
105
|
+
* @returns `true` if the value is a valid LexArray
|
|
106
|
+
*
|
|
107
|
+
* @example
|
|
108
|
+
* ```typescript
|
|
109
|
+
* import { isLexArray } from '@atproto/lex'
|
|
110
|
+
*
|
|
111
|
+
* if (isLexArray(data)) {
|
|
112
|
+
* // data is narrowed to LexArray
|
|
113
|
+
* data.forEach(item => console.log(item))
|
|
114
|
+
* }
|
|
115
|
+
* ```
|
|
116
|
+
*/
|
|
13
117
|
export function isLexArray(value: unknown): value is LexArray {
|
|
14
118
|
return Array.isArray(value) && value.every(isLexValue)
|
|
15
119
|
}
|
|
16
120
|
|
|
121
|
+
/**
|
|
122
|
+
* Type guard to check if a value is a valid {@link LexScalar}.
|
|
123
|
+
*
|
|
124
|
+
* Returns true if the value is one of the primitive Lexicon types:
|
|
125
|
+
* number (integer only), string, boolean, null, Cid, or Uint8Array.
|
|
126
|
+
*
|
|
127
|
+
* @param value - The value to check
|
|
128
|
+
* @returns `true` if the value is a valid LexScalar
|
|
129
|
+
*
|
|
130
|
+
* @example
|
|
131
|
+
* ```typescript
|
|
132
|
+
* import { isLexScalar } from '@atproto/lex'
|
|
133
|
+
*
|
|
134
|
+
* isLexScalar('hello') // true
|
|
135
|
+
* isLexScalar(42) // true
|
|
136
|
+
* isLexScalar(3.14) // false (floats not allowed)
|
|
137
|
+
* isLexScalar([1, 2]) // false (arrays are not scalars)
|
|
138
|
+
* ```
|
|
139
|
+
*/
|
|
17
140
|
export function isLexScalar(value: unknown): value is LexScalar {
|
|
18
141
|
switch (typeof value) {
|
|
19
142
|
case 'object':
|
|
@@ -29,6 +152,28 @@ export function isLexScalar(value: unknown): value is LexScalar {
|
|
|
29
152
|
}
|
|
30
153
|
}
|
|
31
154
|
|
|
155
|
+
/**
|
|
156
|
+
* Type guard to check if a value is a valid {@link LexValue}.
|
|
157
|
+
*
|
|
158
|
+
* Performs a deep check to validate that the value (and all nested values)
|
|
159
|
+
* conform to the Lexicon data model. This includes checking for:
|
|
160
|
+
* - Valid scalar types (number, string, boolean, null, Cid, Uint8Array)
|
|
161
|
+
* - Arrays containing only valid LexValues
|
|
162
|
+
* - Plain objects with string keys and valid LexValue values
|
|
163
|
+
* - No cyclic references (which cannot be serialized to JSON or CBOR)
|
|
164
|
+
*
|
|
165
|
+
* @param value - The value to check
|
|
166
|
+
* @returns `true` if the value is a valid LexValue
|
|
167
|
+
*
|
|
168
|
+
* @example
|
|
169
|
+
* ```typescript
|
|
170
|
+
* import { isLexValue } from '@atproto/lex'
|
|
171
|
+
*
|
|
172
|
+
* isLexValue({ name: 'Alice', tags: ['admin'] }) // true
|
|
173
|
+
* isLexValue(new Date()) // false (not a plain object)
|
|
174
|
+
* isLexValue({ fn: () => {} }) // false (functions not allowed)
|
|
175
|
+
* ```
|
|
176
|
+
*/
|
|
32
177
|
export function isLexValue(value: unknown): value is LexValue {
|
|
33
178
|
// Using a stack to avoid recursion depth issues.
|
|
34
179
|
const stack: unknown[] = [value]
|
|
@@ -77,7 +222,48 @@ export function isLexValue(value: unknown): value is LexValue {
|
|
|
77
222
|
return true
|
|
78
223
|
}
|
|
79
224
|
|
|
80
|
-
|
|
225
|
+
/**
|
|
226
|
+
* A {@link LexMap} with a required `$type` property.
|
|
227
|
+
*
|
|
228
|
+
* Used to represent typed objects in the Lexicon data model, where the
|
|
229
|
+
* `$type` property identifies the Lexicon schema that defines the object's
|
|
230
|
+
* structure.
|
|
231
|
+
*
|
|
232
|
+
* @example
|
|
233
|
+
* ```typescript
|
|
234
|
+
* import type { TypedLexMap } from '@atproto/lex'
|
|
235
|
+
*
|
|
236
|
+
* const post: TypedLexMap = {
|
|
237
|
+
* $type: 'app.bsky.feed.post',
|
|
238
|
+
* text: 'Hello world!',
|
|
239
|
+
* createdAt: '2024-01-01T00:00:00Z'
|
|
240
|
+
* }
|
|
241
|
+
* ```
|
|
242
|
+
*
|
|
243
|
+
* @see {@link isTypedLexMap} to check if a value is a TypedLexMap
|
|
244
|
+
*/
|
|
245
|
+
export type TypedLexMap<T extends string = string> = LexMap & { $type: T }
|
|
246
|
+
|
|
247
|
+
/**
|
|
248
|
+
* Type guard to check if a value is a {@link TypedLexMap}.
|
|
249
|
+
*
|
|
250
|
+
* Returns true if the value is a valid {@link LexMap} with a non-empty
|
|
251
|
+
* `$type` string property.
|
|
252
|
+
*
|
|
253
|
+
* @param value - The LexValue to check
|
|
254
|
+
* @returns `true` if the value is a TypedLexMap
|
|
255
|
+
*
|
|
256
|
+
* @example
|
|
257
|
+
* ```typescript
|
|
258
|
+
* import { isTypedLexMap } from '@atproto/lex'
|
|
259
|
+
*
|
|
260
|
+
* const data = { $type: 'app.bsky.feed.post', text: 'Hello' }
|
|
261
|
+
*
|
|
262
|
+
* if (isTypedLexMap(data)) {
|
|
263
|
+
* console.log(data.$type) // 'app.bsky.feed.post'
|
|
264
|
+
* }
|
|
265
|
+
* ```
|
|
266
|
+
*/
|
|
81
267
|
export function isTypedLexMap(value: LexValue): value is TypedLexMap {
|
|
82
268
|
return (
|
|
83
269
|
isLexMap(value) && typeof value.$type === 'string' && value.$type.length > 0
|
package/src/object.ts
CHANGED
|
@@ -1,5 +1,22 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Checks whether the input is an object (not null).
|
|
3
|
+
*
|
|
4
|
+
* Returns true for any non-null value with typeof 'object', including
|
|
5
|
+
* arrays, plain objects, class instances, etc.
|
|
6
|
+
*
|
|
7
|
+
* @param input - The value to check
|
|
8
|
+
* @returns `true` if the input is an object (not null)
|
|
9
|
+
*
|
|
10
|
+
* @example
|
|
11
|
+
* ```typescript
|
|
12
|
+
* import { isObject } from '@atproto/lex-data'
|
|
13
|
+
*
|
|
14
|
+
* isObject({}) // true
|
|
15
|
+
* isObject([1, 2, 3]) // true
|
|
16
|
+
* isObject(new Date()) // true
|
|
17
|
+
* isObject(null) // false
|
|
18
|
+
* isObject('string') // false
|
|
19
|
+
* ```
|
|
3
20
|
*/
|
|
4
21
|
export function isObject(input: unknown): input is object {
|
|
5
22
|
return input != null && typeof input === 'object'
|
|
@@ -9,16 +26,49 @@ const ObjectProto = Object.prototype
|
|
|
9
26
|
const ObjectToString = Object.prototype.toString
|
|
10
27
|
|
|
11
28
|
/**
|
|
12
|
-
* Checks whether the input is
|
|
13
|
-
*
|
|
29
|
+
* Checks whether the input is a plain object.
|
|
30
|
+
*
|
|
31
|
+
* A plain object is an object (not null) whose prototype is either null
|
|
32
|
+
* or `Object.prototype`. This excludes arrays, class instances, and other
|
|
33
|
+
* special objects.
|
|
34
|
+
*
|
|
35
|
+
* @param input - The value to check
|
|
36
|
+
* @returns `true` if the input is a plain object
|
|
37
|
+
*
|
|
38
|
+
* @example
|
|
39
|
+
* ```typescript
|
|
40
|
+
* import { isPlainObject } from '@atproto/lex-data'
|
|
41
|
+
*
|
|
42
|
+
* isPlainObject({}) // true
|
|
43
|
+
* isPlainObject({ a: 1 }) // true
|
|
44
|
+
* isPlainObject(Object.create(null)) // true
|
|
45
|
+
* isPlainObject([1, 2, 3]) // false
|
|
46
|
+
* isPlainObject(new Date()) // false
|
|
47
|
+
* isPlainObject(null) // false
|
|
48
|
+
* ```
|
|
14
49
|
*/
|
|
15
50
|
export function isPlainObject(input: unknown) {
|
|
16
51
|
return isObject(input) && isPlainProto(input)
|
|
17
52
|
}
|
|
18
53
|
|
|
19
54
|
/**
|
|
20
|
-
* Checks whether the prototype of
|
|
21
|
-
*
|
|
55
|
+
* Checks whether the prototype of an object is plain (null or Object.prototype).
|
|
56
|
+
*
|
|
57
|
+
* This is useful for checking if an object is a plain object without
|
|
58
|
+
* checking that it's non-null first (the null check is already done).
|
|
59
|
+
*
|
|
60
|
+
* @param input - The object to check (must be non-null)
|
|
61
|
+
* @returns `true` if the object's prototype is plain
|
|
62
|
+
*
|
|
63
|
+
* @example
|
|
64
|
+
* ```typescript
|
|
65
|
+
* import { isPlainProto } from '@atproto/lex-data'
|
|
66
|
+
*
|
|
67
|
+
* isPlainProto({}) // true
|
|
68
|
+
* isPlainProto(Object.create(null)) // true
|
|
69
|
+
* isPlainProto([1, 2, 3]) // false (Array.prototype)
|
|
70
|
+
* isPlainProto(new Date()) // false (Date.prototype)
|
|
71
|
+
* ```
|
|
22
72
|
*/
|
|
23
73
|
export function isPlainProto(input: object): input is Record<string, unknown> {
|
|
24
74
|
const proto = Object.getPrototypeOf(input)
|
package/src/uint8array.ts
CHANGED
|
@@ -21,7 +21,21 @@ export type { Base64Alphabet }
|
|
|
21
21
|
/**
|
|
22
22
|
* Encodes a Uint8Array into a base64 string.
|
|
23
23
|
*
|
|
24
|
+
* Uses native Uint8Array.prototype.toBase64 when available (Node.js 24+, modern browsers),
|
|
25
|
+
* falling back to Node.js Buffer or a ponyfill implementation.
|
|
26
|
+
*
|
|
27
|
+
* @param bytes - The binary data to encode
|
|
28
|
+
* @param alphabet - The base64 alphabet to use ('base64' or 'base64url'), defaults to 'base64'
|
|
24
29
|
* @returns The base64 encoded string
|
|
30
|
+
*
|
|
31
|
+
* @example
|
|
32
|
+
* ```typescript
|
|
33
|
+
* import { toBase64 } from '@atproto/lex-data'
|
|
34
|
+
*
|
|
35
|
+
* const bytes = new Uint8Array([72, 101, 108, 108, 111])
|
|
36
|
+
* toBase64(bytes) // 'SGVsbG8='
|
|
37
|
+
* toBase64(bytes, 'base64url') // 'SGVsbG8' (URL-safe, no padding)
|
|
38
|
+
* ```
|
|
25
39
|
*/
|
|
26
40
|
export const toBase64: (
|
|
27
41
|
bytes: Uint8Array,
|
|
@@ -32,11 +46,24 @@ export const toBase64: (
|
|
|
32
46
|
toBase64Ponyfill
|
|
33
47
|
|
|
34
48
|
/**
|
|
35
|
-
* Decodes a base64 string into a Uint8Array.
|
|
36
|
-
*
|
|
49
|
+
* Decodes a base64 string into a Uint8Array.
|
|
50
|
+
*
|
|
51
|
+
* Supports both padded and unpadded base64 strings. Uses native
|
|
52
|
+
* Uint8Array.fromBase64 when available, falling back to Node.js Buffer
|
|
53
|
+
* or a ponyfill implementation.
|
|
37
54
|
*
|
|
38
|
-
* @
|
|
55
|
+
* @param b64 - The base64 string to decode
|
|
56
|
+
* @param alphabet - The base64 alphabet to use ('base64' or 'base64url'), defaults to 'base64'
|
|
57
|
+
* @returns The decoded binary data
|
|
39
58
|
* @throws If the input is not a valid base64 string
|
|
59
|
+
*
|
|
60
|
+
* @example
|
|
61
|
+
* ```typescript
|
|
62
|
+
* import { fromBase64 } from '@atproto/lex-data'
|
|
63
|
+
*
|
|
64
|
+
* fromBase64('SGVsbG8=') // Uint8Array([72, 101, 108, 108, 111])
|
|
65
|
+
* fromBase64('SGVsbG8', 'base64url') // Same, URL-safe alphabet
|
|
66
|
+
* ```
|
|
40
67
|
*/
|
|
41
68
|
export const fromBase64: (
|
|
42
69
|
b64: string,
|
|
@@ -54,6 +81,21 @@ if (toBase64 === toBase64Ponyfill || fromBase64 === fromBase64Ponyfill) {
|
|
|
54
81
|
)
|
|
55
82
|
}
|
|
56
83
|
|
|
84
|
+
/**
|
|
85
|
+
* Returns the input if it is a Uint8Array, otherwise returns undefined.
|
|
86
|
+
*
|
|
87
|
+
* @param input - The value to check
|
|
88
|
+
* @returns The input if it's a Uint8Array, otherwise undefined
|
|
89
|
+
*
|
|
90
|
+
* @example
|
|
91
|
+
* ```typescript
|
|
92
|
+
* import { ifUint8Array } from '@atproto/lex-data'
|
|
93
|
+
*
|
|
94
|
+
* ifUint8Array(new Uint8Array([1, 2])) // Uint8Array([1, 2])
|
|
95
|
+
* ifUint8Array('not binary') // undefined
|
|
96
|
+
* ifUint8Array(new ArrayBuffer(4)) // undefined
|
|
97
|
+
* ```
|
|
98
|
+
*/
|
|
57
99
|
export function ifUint8Array(input: unknown): Uint8Array | undefined {
|
|
58
100
|
if (input instanceof Uint8Array) {
|
|
59
101
|
return input
|
|
@@ -65,7 +107,23 @@ export function ifUint8Array(input: unknown): Uint8Array | undefined {
|
|
|
65
107
|
/**
|
|
66
108
|
* Coerces various binary data representations into a Uint8Array.
|
|
67
109
|
*
|
|
68
|
-
*
|
|
110
|
+
* Handles the following input types:
|
|
111
|
+
* - `Uint8Array` - Returned as-is
|
|
112
|
+
* - `ArrayBufferView` (e.g., DataView, other TypedArrays) - Converted to Uint8Array
|
|
113
|
+
* - `ArrayBuffer` - Wrapped in a Uint8Array
|
|
114
|
+
*
|
|
115
|
+
* @param input - The value to convert
|
|
116
|
+
* @returns A Uint8Array, or `undefined` if the input could not be converted
|
|
117
|
+
*
|
|
118
|
+
* @example
|
|
119
|
+
* ```typescript
|
|
120
|
+
* import { asUint8Array } from '@atproto/lex-data'
|
|
121
|
+
*
|
|
122
|
+
* asUint8Array(new Uint8Array([1, 2])) // Uint8Array([1, 2])
|
|
123
|
+
* asUint8Array(new ArrayBuffer(4)) // Uint8Array of length 4
|
|
124
|
+
* asUint8Array(new Int16Array([1, 2])) // Uint8Array view of the buffer
|
|
125
|
+
* asUint8Array('string') // undefined
|
|
126
|
+
* ```
|
|
69
127
|
*/
|
|
70
128
|
export function asUint8Array(input: unknown): Uint8Array | undefined {
|
|
71
129
|
if (input instanceof Uint8Array) {
|
|
@@ -87,6 +145,22 @@ export function asUint8Array(input: unknown): Uint8Array | undefined {
|
|
|
87
145
|
return undefined
|
|
88
146
|
}
|
|
89
147
|
|
|
148
|
+
/**
|
|
149
|
+
* Compares two Uint8Arrays for byte-by-byte equality.
|
|
150
|
+
*
|
|
151
|
+
* @param a - First Uint8Array to compare
|
|
152
|
+
* @param b - Second Uint8Array to compare
|
|
153
|
+
* @returns `true` if both arrays have the same length and identical bytes
|
|
154
|
+
*
|
|
155
|
+
* @example
|
|
156
|
+
* ```typescript
|
|
157
|
+
* import { ui8Equals } from '@atproto/lex-data'
|
|
158
|
+
*
|
|
159
|
+
* ui8Equals(new Uint8Array([1, 2]), new Uint8Array([1, 2])) // true
|
|
160
|
+
* ui8Equals(new Uint8Array([1, 2]), new Uint8Array([1, 3])) // false
|
|
161
|
+
* ui8Equals(new Uint8Array([1]), new Uint8Array([1, 2])) // false
|
|
162
|
+
* ```
|
|
163
|
+
*/
|
|
90
164
|
export function ui8Equals(a: Uint8Array, b: Uint8Array): boolean {
|
|
91
165
|
if (a.byteLength !== b.byteLength) {
|
|
92
166
|
return false
|
|
@@ -101,5 +175,23 @@ export function ui8Equals(a: Uint8Array, b: Uint8Array): boolean {
|
|
|
101
175
|
return true
|
|
102
176
|
}
|
|
103
177
|
|
|
178
|
+
/**
|
|
179
|
+
* Concatenates multiple Uint8Arrays into a single Uint8Array.
|
|
180
|
+
*
|
|
181
|
+
* Uses Node.js Buffer.concat when available for performance,
|
|
182
|
+
* falling back to a ponyfill implementation.
|
|
183
|
+
*
|
|
184
|
+
* @param arrays - The Uint8Arrays to concatenate
|
|
185
|
+
* @returns A new Uint8Array containing all input bytes in order
|
|
186
|
+
*
|
|
187
|
+
* @example
|
|
188
|
+
* ```typescript
|
|
189
|
+
* import { ui8Concat } from '@atproto/lex-data'
|
|
190
|
+
*
|
|
191
|
+
* const a = new Uint8Array([1, 2])
|
|
192
|
+
* const b = new Uint8Array([3, 4])
|
|
193
|
+
* ui8Concat([a, b]) // Uint8Array([1, 2, 3, 4])
|
|
194
|
+
* ```
|
|
195
|
+
*/
|
|
104
196
|
export const ui8Concat =
|
|
105
197
|
/* v8 ignore next -- @preserve */ ui8ConcatNode ?? ui8ConcatPonyfill
|
package/src/utf8.ts
CHANGED
|
@@ -7,6 +7,29 @@ import { graphemeLenNative, graphemeLenPonyfill } from './utf8-grapheme-len.js'
|
|
|
7
7
|
import { utf8LenCompute, utf8LenNode } from './utf8-len.js'
|
|
8
8
|
import { utf8ToBase64Node, utf8ToBase64Ponyfill } from './utf8-to-base64.js'
|
|
9
9
|
|
|
10
|
+
/**
|
|
11
|
+
* Counts the number of grapheme clusters (user-perceived characters) in a string.
|
|
12
|
+
*
|
|
13
|
+
* Grapheme clusters represent what users typically think of as "characters",
|
|
14
|
+
* handling complex cases like:
|
|
15
|
+
* - Emoji with skin tones and ZWJ sequences (e.g., family emoji)
|
|
16
|
+
* - Combined characters (e.g., 'e' + combining accent)
|
|
17
|
+
* - Regional indicator pairs (flag emoji)
|
|
18
|
+
*
|
|
19
|
+
* Uses native {@link Intl.Segmenter} when available, falling back to a ponyfill.
|
|
20
|
+
*
|
|
21
|
+
* @param str - The string to measure
|
|
22
|
+
* @returns The number of grapheme clusters
|
|
23
|
+
*
|
|
24
|
+
* @example
|
|
25
|
+
* ```typescript
|
|
26
|
+
* import { graphemeLen } from '@atproto/lex-data'
|
|
27
|
+
*
|
|
28
|
+
* graphemeLen('hello') // 5
|
|
29
|
+
* graphemeLen('cafe\u0301') // 4 (cafe with combining accent)
|
|
30
|
+
* graphemeLen('\u{1F468}\u{200D}\u{1F469}\u{200D}\u{1F467}\u{200D}\u{1F466}') // 1 (family emoji)
|
|
31
|
+
* ```
|
|
32
|
+
*/
|
|
10
33
|
export const graphemeLen: (str: string) => number =
|
|
11
34
|
/* v8 ignore next -- @preserve */ graphemeLenNative ?? graphemeLenPonyfill
|
|
12
35
|
|
|
@@ -18,12 +41,66 @@ if (graphemeLen === graphemeLenPonyfill) {
|
|
|
18
41
|
)
|
|
19
42
|
}
|
|
20
43
|
|
|
44
|
+
/**
|
|
45
|
+
* Calculates the UTF-8 byte length of a string.
|
|
46
|
+
*
|
|
47
|
+
* Returns the number of bytes the string would occupy when encoded as UTF-8.
|
|
48
|
+
* This is important for Lexicon validation where schemas specify byte limits.
|
|
49
|
+
*
|
|
50
|
+
* Uses Node.js Buffer.byteLength when available for performance,
|
|
51
|
+
* falling back to a computed implementation.
|
|
52
|
+
*
|
|
53
|
+
* @param str - The string to measure
|
|
54
|
+
* @returns The UTF-8 byte length
|
|
55
|
+
*
|
|
56
|
+
* @example
|
|
57
|
+
* ```typescript
|
|
58
|
+
* import { utf8Len } from '@atproto/lex-data'
|
|
59
|
+
*
|
|
60
|
+
* utf8Len('hello') // 5 (ASCII: 1 byte per char)
|
|
61
|
+
* utf8Len('\u00e9') // 2 (e with accent: 2 bytes)
|
|
62
|
+
* utf8Len('\u{1F600}') // 4 (emoji: 4 bytes)
|
|
63
|
+
* utf8Len('\u{1F468}\u{200D}\u{1F469}\u{200D}\u{1F467}\u{200D}\u{1F466}') // 25 (family emoji)
|
|
64
|
+
* ```
|
|
65
|
+
*/
|
|
21
66
|
export const utf8Len: (string: string) => number =
|
|
22
67
|
/* v8 ignore next -- @preserve */ utf8LenNode ?? utf8LenCompute
|
|
23
68
|
|
|
69
|
+
/**
|
|
70
|
+
* Encodes a UTF-8 string to base64.
|
|
71
|
+
*
|
|
72
|
+
* First encodes the string as UTF-8 bytes, then encodes those bytes as base64.
|
|
73
|
+
*
|
|
74
|
+
* @param str - The string to encode
|
|
75
|
+
* @param alphabet - The base64 alphabet to use ('base64' or 'base64url')
|
|
76
|
+
* @returns The base64-encoded string
|
|
77
|
+
*
|
|
78
|
+
* @example
|
|
79
|
+
* ```typescript
|
|
80
|
+
* import { utf8ToBase64 } from '@atproto/lex-data'
|
|
81
|
+
*
|
|
82
|
+
* utf8ToBase64('Hello') // 'SGVsbG8='
|
|
83
|
+
* ```
|
|
84
|
+
*/
|
|
24
85
|
export const utf8ToBase64: (str: string, alphabet?: Base64Alphabet) => string =
|
|
25
86
|
/* v8 ignore next -- @preserve */ utf8ToBase64Node ?? utf8ToBase64Ponyfill
|
|
26
87
|
|
|
88
|
+
/**
|
|
89
|
+
* Decodes a base64 string to UTF-8.
|
|
90
|
+
*
|
|
91
|
+
* Decodes the base64 to bytes, then interprets those bytes as UTF-8 text.
|
|
92
|
+
*
|
|
93
|
+
* @param b64 - The base64 string to decode
|
|
94
|
+
* @param alphabet - The base64 alphabet to use ('base64' or 'base64url')
|
|
95
|
+
* @returns The decoded UTF-8 string
|
|
96
|
+
*
|
|
97
|
+
* @example
|
|
98
|
+
* ```typescript
|
|
99
|
+
* import { utf8FromBase64 } from '@atproto/lex-data'
|
|
100
|
+
*
|
|
101
|
+
* utf8FromBase64('SGVsbG8=') // 'Hello'
|
|
102
|
+
* ```
|
|
103
|
+
*/
|
|
27
104
|
export const utf8FromBase64: (
|
|
28
105
|
b64: string,
|
|
29
106
|
alphabet?: Base64Alphabet,
|