@atproto/lex-data 0.0.10 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/lex.ts CHANGED
@@ -1,19 +1,142 @@
1
1
  import { Cid, isCid } from './cid.js'
2
2
  import { isPlainObject, isPlainProto } from './object.js'
3
3
 
4
+ /**
5
+ * Primitive values in the Lexicon data model.
6
+ *
7
+ * Represents the basic scalar types that can appear in AT Protocol data:
8
+ * - `number` - Integer values only (no floats)
9
+ * - `string` - UTF-8 text
10
+ * - `boolean` - true or false
11
+ * - `null` - Explicit null value
12
+ * - `Cid` - Content Identifier (link by hash)
13
+ * - `Uint8Array` - Binary data (bytes)
14
+ *
15
+ * @see {@link LexValue} for the complete recursive value type
16
+ */
4
17
  export type LexScalar = number | string | boolean | null | Cid | Uint8Array
18
+
19
+ /**
20
+ * Any valid Lexicon value (recursive type).
21
+ *
22
+ * This is the union of all types that can appear in AT Protocol Lexicon data:
23
+ * - {@link LexScalar} - Primitive values (number, string, boolean, null, Cid, Uint8Array)
24
+ * - `LexValue[]` - Arrays of LexValues
25
+ * - `{ [key: string]?: LexValue }` - Objects with string keys and LexValue values
26
+ *
27
+ * @example
28
+ * ```typescript
29
+ * import type { LexValue } from '@atproto/lex'
30
+ *
31
+ * const scalar: LexValue = 'hello'
32
+ * const array: LexValue = [1, 2, 3]
33
+ * const object: LexValue = { name: 'Alice', age: 30 }
34
+ * ```
35
+ *
36
+ * @see {@link LexScalar} for primitive value types
37
+ * @see {@link LexMap} for object types
38
+ * @see {@link LexArray} for array types
39
+ */
5
40
  export type LexValue = LexScalar | LexValue[] | { [_ in string]?: LexValue }
41
+
42
+ /**
43
+ * Object with string keys and LexValue values.
44
+ *
45
+ * Represents a plain object in the Lexicon data model where all values
46
+ * must be valid {@link LexValue} types.
47
+ *
48
+ * @example
49
+ * ```typescript
50
+ * import type { LexMap } from '@atproto/lex'
51
+ *
52
+ * const user: LexMap = {
53
+ * name: 'Alice',
54
+ * age: 30,
55
+ * tags: ['admin', 'user']
56
+ * }
57
+ * ```
58
+ *
59
+ * @see {@link TypedLexMap} for objects with a required `$type` property
60
+ */
6
61
  export type LexMap = { [_ in string]?: LexValue }
62
+
63
+ /**
64
+ * Array of {@link LexValue} elements.
65
+ *
66
+ * @example
67
+ * ```typescript
68
+ * import type { LexArray } from '@atproto/lex'
69
+ *
70
+ * const items: LexArray = [1, 'two', { three: 3 }]
71
+ * ```
72
+ */
7
73
  export type LexArray = LexValue[]
8
74
 
75
+ /**
76
+ * Type guard to check if a value is a valid {@link LexMap}.
77
+ *
78
+ * Returns true if the value is a plain object where all values are valid
79
+ * {@link LexValue} types.
80
+ *
81
+ * @param value - The value to check
82
+ * @returns `true` if the value is a valid LexMap
83
+ *
84
+ * @example
85
+ * ```typescript
86
+ * import { isLexMap } from '@atproto/lex'
87
+ *
88
+ * if (isLexMap(data)) {
89
+ * // data is narrowed to LexMap
90
+ * console.log(Object.keys(data))
91
+ * }
92
+ * ```
93
+ */
9
94
  export function isLexMap(value: unknown): value is LexMap {
10
95
  return isPlainObject(value) && Object.values(value).every(isLexValue)
11
96
  }
12
97
 
98
+ /**
99
+ * Type guard to check if a value is a valid {@link LexArray}.
100
+ *
101
+ * Returns true if the value is an array where all elements are valid
102
+ * {@link LexValue} types.
103
+ *
104
+ * @param value - The value to check
105
+ * @returns `true` if the value is a valid LexArray
106
+ *
107
+ * @example
108
+ * ```typescript
109
+ * import { isLexArray } from '@atproto/lex'
110
+ *
111
+ * if (isLexArray(data)) {
112
+ * // data is narrowed to LexArray
113
+ * data.forEach(item => console.log(item))
114
+ * }
115
+ * ```
116
+ */
13
117
  export function isLexArray(value: unknown): value is LexArray {
14
118
  return Array.isArray(value) && value.every(isLexValue)
15
119
  }
16
120
 
121
+ /**
122
+ * Type guard to check if a value is a valid {@link LexScalar}.
123
+ *
124
+ * Returns true if the value is one of the primitive Lexicon types:
125
+ * number (integer only), string, boolean, null, Cid, or Uint8Array.
126
+ *
127
+ * @param value - The value to check
128
+ * @returns `true` if the value is a valid LexScalar
129
+ *
130
+ * @example
131
+ * ```typescript
132
+ * import { isLexScalar } from '@atproto/lex'
133
+ *
134
+ * isLexScalar('hello') // true
135
+ * isLexScalar(42) // true
136
+ * isLexScalar(3.14) // false (floats not allowed)
137
+ * isLexScalar([1, 2]) // false (arrays are not scalars)
138
+ * ```
139
+ */
17
140
  export function isLexScalar(value: unknown): value is LexScalar {
18
141
  switch (typeof value) {
19
142
  case 'object':
@@ -29,6 +152,28 @@ export function isLexScalar(value: unknown): value is LexScalar {
29
152
  }
30
153
  }
31
154
 
155
+ /**
156
+ * Type guard to check if a value is a valid {@link LexValue}.
157
+ *
158
+ * Performs a deep check to validate that the value (and all nested values)
159
+ * conform to the Lexicon data model. This includes checking for:
160
+ * - Valid scalar types (number, string, boolean, null, Cid, Uint8Array)
161
+ * - Arrays containing only valid LexValues
162
+ * - Plain objects with string keys and valid LexValue values
163
+ * - No cyclic references (which cannot be serialized to JSON or CBOR)
164
+ *
165
+ * @param value - The value to check
166
+ * @returns `true` if the value is a valid LexValue
167
+ *
168
+ * @example
169
+ * ```typescript
170
+ * import { isLexValue } from '@atproto/lex'
171
+ *
172
+ * isLexValue({ name: 'Alice', tags: ['admin'] }) // true
173
+ * isLexValue(new Date()) // false (not a plain object)
174
+ * isLexValue({ fn: () => {} }) // false (functions not allowed)
175
+ * ```
176
+ */
32
177
  export function isLexValue(value: unknown): value is LexValue {
33
178
  // Using a stack to avoid recursion depth issues.
34
179
  const stack: unknown[] = [value]
@@ -77,7 +222,48 @@ export function isLexValue(value: unknown): value is LexValue {
77
222
  return true
78
223
  }
79
224
 
80
- export type TypedLexMap = LexMap & { $type: string }
225
+ /**
226
+ * A {@link LexMap} with a required `$type` property.
227
+ *
228
+ * Used to represent typed objects in the Lexicon data model, where the
229
+ * `$type` property identifies the Lexicon schema that defines the object's
230
+ * structure.
231
+ *
232
+ * @example
233
+ * ```typescript
234
+ * import type { TypedLexMap } from '@atproto/lex'
235
+ *
236
+ * const post: TypedLexMap = {
237
+ * $type: 'app.bsky.feed.post',
238
+ * text: 'Hello world!',
239
+ * createdAt: '2024-01-01T00:00:00Z'
240
+ * }
241
+ * ```
242
+ *
243
+ * @see {@link isTypedLexMap} to check if a value is a TypedLexMap
244
+ */
245
+ export type TypedLexMap<T extends string = string> = LexMap & { $type: T }
246
+
247
+ /**
248
+ * Type guard to check if a value is a {@link TypedLexMap}.
249
+ *
250
+ * Returns true if the value is a valid {@link LexMap} with a non-empty
251
+ * `$type` string property.
252
+ *
253
+ * @param value - The LexValue to check
254
+ * @returns `true` if the value is a TypedLexMap
255
+ *
256
+ * @example
257
+ * ```typescript
258
+ * import { isTypedLexMap } from '@atproto/lex'
259
+ *
260
+ * const data = { $type: 'app.bsky.feed.post', text: 'Hello' }
261
+ *
262
+ * if (isTypedLexMap(data)) {
263
+ * console.log(data.$type) // 'app.bsky.feed.post'
264
+ * }
265
+ * ```
266
+ */
81
267
  export function isTypedLexMap(value: LexValue): value is TypedLexMap {
82
268
  return (
83
269
  isLexMap(value) && typeof value.$type === 'string' && value.$type.length > 0
package/src/object.ts CHANGED
@@ -1,5 +1,22 @@
1
1
  /**
2
2
  * Checks whether the input is an object (not null).
3
+ *
4
+ * Returns true for any non-null value with typeof 'object', including
5
+ * arrays, plain objects, class instances, etc.
6
+ *
7
+ * @param input - The value to check
8
+ * @returns `true` if the input is an object (not null)
9
+ *
10
+ * @example
11
+ * ```typescript
12
+ * import { isObject } from '@atproto/lex-data'
13
+ *
14
+ * isObject({}) // true
15
+ * isObject([1, 2, 3]) // true
16
+ * isObject(new Date()) // true
17
+ * isObject(null) // false
18
+ * isObject('string') // false
19
+ * ```
3
20
  */
4
21
  export function isObject(input: unknown): input is object {
5
22
  return input != null && typeof input === 'object'
@@ -9,16 +26,49 @@ const ObjectProto = Object.prototype
9
26
  const ObjectToString = Object.prototype.toString
10
27
 
11
28
  /**
12
- * Checks whether the input is an object (not null) whose prototype is either
13
- * null or `Object.prototype`.
29
+ * Checks whether the input is a plain object.
30
+ *
31
+ * A plain object is an object (not null) whose prototype is either null
32
+ * or `Object.prototype`. This excludes arrays, class instances, and other
33
+ * special objects.
34
+ *
35
+ * @param input - The value to check
36
+ * @returns `true` if the input is a plain object
37
+ *
38
+ * @example
39
+ * ```typescript
40
+ * import { isPlainObject } from '@atproto/lex-data'
41
+ *
42
+ * isPlainObject({}) // true
43
+ * isPlainObject({ a: 1 }) // true
44
+ * isPlainObject(Object.create(null)) // true
45
+ * isPlainObject([1, 2, 3]) // false
46
+ * isPlainObject(new Date()) // false
47
+ * isPlainObject(null) // false
48
+ * ```
14
49
  */
15
50
  export function isPlainObject(input: unknown) {
16
51
  return isObject(input) && isPlainProto(input)
17
52
  }
18
53
 
19
54
  /**
20
- * Checks whether the prototype of the input object is either null or
21
- * `Object.prototype`.
55
+ * Checks whether the prototype of an object is plain (null or Object.prototype).
56
+ *
57
+ * This is useful for checking if an object is a plain object without
58
+ * checking that it's non-null first (the null check is already done).
59
+ *
60
+ * @param input - The object to check (must be non-null)
61
+ * @returns `true` if the object's prototype is plain
62
+ *
63
+ * @example
64
+ * ```typescript
65
+ * import { isPlainProto } from '@atproto/lex-data'
66
+ *
67
+ * isPlainProto({}) // true
68
+ * isPlainProto(Object.create(null)) // true
69
+ * isPlainProto([1, 2, 3]) // false (Array.prototype)
70
+ * isPlainProto(new Date()) // false (Date.prototype)
71
+ * ```
22
72
  */
23
73
  export function isPlainProto(input: object): input is Record<string, unknown> {
24
74
  const proto = Object.getPrototypeOf(input)
package/src/uint8array.ts CHANGED
@@ -21,7 +21,21 @@ export type { Base64Alphabet }
21
21
  /**
22
22
  * Encodes a Uint8Array into a base64 string.
23
23
  *
24
+ * Uses native Uint8Array.prototype.toBase64 when available (Node.js 24+, modern browsers),
25
+ * falling back to Node.js Buffer or a ponyfill implementation.
26
+ *
27
+ * @param bytes - The binary data to encode
28
+ * @param alphabet - The base64 alphabet to use ('base64' or 'base64url'), defaults to 'base64'
24
29
  * @returns The base64 encoded string
30
+ *
31
+ * @example
32
+ * ```typescript
33
+ * import { toBase64 } from '@atproto/lex-data'
34
+ *
35
+ * const bytes = new Uint8Array([72, 101, 108, 108, 111])
36
+ * toBase64(bytes) // 'SGVsbG8='
37
+ * toBase64(bytes, 'base64url') // 'SGVsbG8' (URL-safe, no padding)
38
+ * ```
25
39
  */
26
40
  export const toBase64: (
27
41
  bytes: Uint8Array,
@@ -32,11 +46,24 @@ export const toBase64: (
32
46
  toBase64Ponyfill
33
47
 
34
48
  /**
35
- * Decodes a base64 string into a Uint8Array. This function supports both padded
36
- * and unpadded base64 strings.
49
+ * Decodes a base64 string into a Uint8Array.
50
+ *
51
+ * Supports both padded and unpadded base64 strings. Uses native
52
+ * Uint8Array.fromBase64 when available, falling back to Node.js Buffer
53
+ * or a ponyfill implementation.
37
54
  *
38
- * @returns The decoded {@link Uint8Array}
55
+ * @param b64 - The base64 string to decode
56
+ * @param alphabet - The base64 alphabet to use ('base64' or 'base64url'), defaults to 'base64'
57
+ * @returns The decoded binary data
39
58
  * @throws If the input is not a valid base64 string
59
+ *
60
+ * @example
61
+ * ```typescript
62
+ * import { fromBase64 } from '@atproto/lex-data'
63
+ *
64
+ * fromBase64('SGVsbG8=') // Uint8Array([72, 101, 108, 108, 111])
65
+ * fromBase64('SGVsbG8', 'base64url') // Same, URL-safe alphabet
66
+ * ```
40
67
  */
41
68
  export const fromBase64: (
42
69
  b64: string,
@@ -54,6 +81,21 @@ if (toBase64 === toBase64Ponyfill || fromBase64 === fromBase64Ponyfill) {
54
81
  )
55
82
  }
56
83
 
84
+ /**
85
+ * Returns the input if it is a Uint8Array, otherwise returns undefined.
86
+ *
87
+ * @param input - The value to check
88
+ * @returns The input if it's a Uint8Array, otherwise undefined
89
+ *
90
+ * @example
91
+ * ```typescript
92
+ * import { ifUint8Array } from '@atproto/lex-data'
93
+ *
94
+ * ifUint8Array(new Uint8Array([1, 2])) // Uint8Array([1, 2])
95
+ * ifUint8Array('not binary') // undefined
96
+ * ifUint8Array(new ArrayBuffer(4)) // undefined
97
+ * ```
98
+ */
57
99
  export function ifUint8Array(input: unknown): Uint8Array | undefined {
58
100
  if (input instanceof Uint8Array) {
59
101
  return input
@@ -65,7 +107,23 @@ export function ifUint8Array(input: unknown): Uint8Array | undefined {
65
107
  /**
66
108
  * Coerces various binary data representations into a Uint8Array.
67
109
  *
68
- * @return `undefined` if the input could not be coerced into a {@link Uint8Array}.
110
+ * Handles the following input types:
111
+ * - `Uint8Array` - Returned as-is
112
+ * - `ArrayBufferView` (e.g., DataView, other TypedArrays) - Converted to Uint8Array
113
+ * - `ArrayBuffer` - Wrapped in a Uint8Array
114
+ *
115
+ * @param input - The value to convert
116
+ * @returns A Uint8Array, or `undefined` if the input could not be converted
117
+ *
118
+ * @example
119
+ * ```typescript
120
+ * import { asUint8Array } from '@atproto/lex-data'
121
+ *
122
+ * asUint8Array(new Uint8Array([1, 2])) // Uint8Array([1, 2])
123
+ * asUint8Array(new ArrayBuffer(4)) // Uint8Array of length 4
124
+ * asUint8Array(new Int16Array([1, 2])) // Uint8Array view of the buffer
125
+ * asUint8Array('string') // undefined
126
+ * ```
69
127
  */
70
128
  export function asUint8Array(input: unknown): Uint8Array | undefined {
71
129
  if (input instanceof Uint8Array) {
@@ -87,6 +145,22 @@ export function asUint8Array(input: unknown): Uint8Array | undefined {
87
145
  return undefined
88
146
  }
89
147
 
148
+ /**
149
+ * Compares two Uint8Arrays for byte-by-byte equality.
150
+ *
151
+ * @param a - First Uint8Array to compare
152
+ * @param b - Second Uint8Array to compare
153
+ * @returns `true` if both arrays have the same length and identical bytes
154
+ *
155
+ * @example
156
+ * ```typescript
157
+ * import { ui8Equals } from '@atproto/lex-data'
158
+ *
159
+ * ui8Equals(new Uint8Array([1, 2]), new Uint8Array([1, 2])) // true
160
+ * ui8Equals(new Uint8Array([1, 2]), new Uint8Array([1, 3])) // false
161
+ * ui8Equals(new Uint8Array([1]), new Uint8Array([1, 2])) // false
162
+ * ```
163
+ */
90
164
  export function ui8Equals(a: Uint8Array, b: Uint8Array): boolean {
91
165
  if (a.byteLength !== b.byteLength) {
92
166
  return false
@@ -101,5 +175,23 @@ export function ui8Equals(a: Uint8Array, b: Uint8Array): boolean {
101
175
  return true
102
176
  }
103
177
 
178
+ /**
179
+ * Concatenates multiple Uint8Arrays into a single Uint8Array.
180
+ *
181
+ * Uses Node.js Buffer.concat when available for performance,
182
+ * falling back to a ponyfill implementation.
183
+ *
184
+ * @param arrays - The Uint8Arrays to concatenate
185
+ * @returns A new Uint8Array containing all input bytes in order
186
+ *
187
+ * @example
188
+ * ```typescript
189
+ * import { ui8Concat } from '@atproto/lex-data'
190
+ *
191
+ * const a = new Uint8Array([1, 2])
192
+ * const b = new Uint8Array([3, 4])
193
+ * ui8Concat([a, b]) // Uint8Array([1, 2, 3, 4])
194
+ * ```
195
+ */
104
196
  export const ui8Concat =
105
197
  /* v8 ignore next -- @preserve */ ui8ConcatNode ?? ui8ConcatPonyfill
package/src/utf8.ts CHANGED
@@ -7,6 +7,29 @@ import { graphemeLenNative, graphemeLenPonyfill } from './utf8-grapheme-len.js'
7
7
  import { utf8LenCompute, utf8LenNode } from './utf8-len.js'
8
8
  import { utf8ToBase64Node, utf8ToBase64Ponyfill } from './utf8-to-base64.js'
9
9
 
10
+ /**
11
+ * Counts the number of grapheme clusters (user-perceived characters) in a string.
12
+ *
13
+ * Grapheme clusters represent what users typically think of as "characters",
14
+ * handling complex cases like:
15
+ * - Emoji with skin tones and ZWJ sequences (e.g., family emoji)
16
+ * - Combined characters (e.g., 'e' + combining accent)
17
+ * - Regional indicator pairs (flag emoji)
18
+ *
19
+ * Uses native {@link Intl.Segmenter} when available, falling back to a ponyfill.
20
+ *
21
+ * @param str - The string to measure
22
+ * @returns The number of grapheme clusters
23
+ *
24
+ * @example
25
+ * ```typescript
26
+ * import { graphemeLen } from '@atproto/lex-data'
27
+ *
28
+ * graphemeLen('hello') // 5
29
+ * graphemeLen('cafe\u0301') // 4 (cafe with combining accent)
30
+ * graphemeLen('\u{1F468}\u{200D}\u{1F469}\u{200D}\u{1F467}\u{200D}\u{1F466}') // 1 (family emoji)
31
+ * ```
32
+ */
10
33
  export const graphemeLen: (str: string) => number =
11
34
  /* v8 ignore next -- @preserve */ graphemeLenNative ?? graphemeLenPonyfill
12
35
 
@@ -18,12 +41,66 @@ if (graphemeLen === graphemeLenPonyfill) {
18
41
  )
19
42
  }
20
43
 
44
+ /**
45
+ * Calculates the UTF-8 byte length of a string.
46
+ *
47
+ * Returns the number of bytes the string would occupy when encoded as UTF-8.
48
+ * This is important for Lexicon validation where schemas specify byte limits.
49
+ *
50
+ * Uses Node.js Buffer.byteLength when available for performance,
51
+ * falling back to a computed implementation.
52
+ *
53
+ * @param str - The string to measure
54
+ * @returns The UTF-8 byte length
55
+ *
56
+ * @example
57
+ * ```typescript
58
+ * import { utf8Len } from '@atproto/lex-data'
59
+ *
60
+ * utf8Len('hello') // 5 (ASCII: 1 byte per char)
61
+ * utf8Len('\u00e9') // 2 (e with accent: 2 bytes)
62
+ * utf8Len('\u{1F600}') // 4 (emoji: 4 bytes)
63
+ * utf8Len('\u{1F468}\u{200D}\u{1F469}\u{200D}\u{1F467}\u{200D}\u{1F466}') // 25 (family emoji)
64
+ * ```
65
+ */
21
66
  export const utf8Len: (string: string) => number =
22
67
  /* v8 ignore next -- @preserve */ utf8LenNode ?? utf8LenCompute
23
68
 
69
+ /**
70
+ * Encodes a UTF-8 string to base64.
71
+ *
72
+ * First encodes the string as UTF-8 bytes, then encodes those bytes as base64.
73
+ *
74
+ * @param str - The string to encode
75
+ * @param alphabet - The base64 alphabet to use ('base64' or 'base64url')
76
+ * @returns The base64-encoded string
77
+ *
78
+ * @example
79
+ * ```typescript
80
+ * import { utf8ToBase64 } from '@atproto/lex-data'
81
+ *
82
+ * utf8ToBase64('Hello') // 'SGVsbG8='
83
+ * ```
84
+ */
24
85
  export const utf8ToBase64: (str: string, alphabet?: Base64Alphabet) => string =
25
86
  /* v8 ignore next -- @preserve */ utf8ToBase64Node ?? utf8ToBase64Ponyfill
26
87
 
88
+ /**
89
+ * Decodes a base64 string to UTF-8.
90
+ *
91
+ * Decodes the base64 to bytes, then interprets those bytes as UTF-8 text.
92
+ *
93
+ * @param b64 - The base64 string to decode
94
+ * @param alphabet - The base64 alphabet to use ('base64' or 'base64url')
95
+ * @returns The decoded UTF-8 string
96
+ *
97
+ * @example
98
+ * ```typescript
99
+ * import { utf8FromBase64 } from '@atproto/lex-data'
100
+ *
101
+ * utf8FromBase64('SGVsbG8=') // 'Hello'
102
+ * ```
103
+ */
27
104
  export const utf8FromBase64: (
28
105
  b64: string,
29
106
  alphabet?: Base64Alphabet,