@atproto/lex-data 0.0.14 โ†’ 0.1.0-next.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/CHANGELOG.md +28 -0
  2. package/dist/blob.d.ts +118 -39
  3. package/dist/blob.d.ts.map +1 -1
  4. package/dist/blob.js +68 -22
  5. package/dist/blob.js.map +1 -1
  6. package/dist/cid.d.ts.map +1 -1
  7. package/dist/cid.js +75 -79
  8. package/dist/cid.js.map +1 -1
  9. package/dist/index.js +8 -11
  10. package/dist/index.js.map +1 -1
  11. package/dist/lex-equals.js +9 -12
  12. package/dist/lex-equals.js.map +1 -1
  13. package/dist/lex-error.js +2 -7
  14. package/dist/lex-error.js.map +1 -1
  15. package/dist/lex.js +10 -17
  16. package/dist/lex.js.map +1 -1
  17. package/dist/lib/nodejs-buffer.d.ts +4 -0
  18. package/dist/lib/nodejs-buffer.d.ts.map +1 -1
  19. package/dist/lib/nodejs-buffer.js +1 -4
  20. package/dist/lib/nodejs-buffer.js.map +1 -1
  21. package/dist/lib/util.js +2 -6
  22. package/dist/lib/util.js.map +1 -1
  23. package/dist/object.js +3 -8
  24. package/dist/object.js.map +1 -1
  25. package/dist/uint8array-base64.js +1 -2
  26. package/dist/uint8array-concat.d.ts +2 -2
  27. package/dist/uint8array-concat.d.ts.map +1 -1
  28. package/dist/uint8array-concat.js +4 -8
  29. package/dist/uint8array-concat.js.map +1 -1
  30. package/dist/uint8array-from-base64.js +7 -11
  31. package/dist/uint8array-from-base64.js.map +1 -1
  32. package/dist/uint8array-to-base64.js +7 -11
  33. package/dist/uint8array-to-base64.js.map +1 -1
  34. package/dist/uint8array.d.ts +1 -1
  35. package/dist/uint8array.d.ts.map +1 -1
  36. package/dist/uint8array.js +17 -23
  37. package/dist/uint8array.js.map +1 -1
  38. package/dist/utf8-from-base64.js +6 -10
  39. package/dist/utf8-from-base64.js.map +1 -1
  40. package/dist/utf8-from-bytes.d.ts +3 -0
  41. package/dist/utf8-from-bytes.d.ts.map +1 -0
  42. package/dist/utf8-from-bytes.js +15 -0
  43. package/dist/utf8-from-bytes.js.map +1 -0
  44. package/dist/utf8-grapheme-len.js +4 -8
  45. package/dist/utf8-grapheme-len.js.map +1 -1
  46. package/dist/utf8-len.js +4 -8
  47. package/dist/utf8-len.js.map +1 -1
  48. package/dist/utf8-to-base64.js +8 -12
  49. package/dist/utf8-to-base64.js.map +1 -1
  50. package/dist/utf8.d.ts +18 -0
  51. package/dist/utf8.d.ts.map +1 -1
  52. package/dist/utf8.js +32 -16
  53. package/dist/utf8.js.map +1 -1
  54. package/package.json +7 -8
  55. package/src/blob.test.ts +38 -25
  56. package/src/blob.ts +190 -52
  57. package/src/cid-implementation.test.ts +3 -3
  58. package/src/cid.ts +1 -0
  59. package/src/core-js.d.ts +2 -0
  60. package/src/lib/nodejs-buffer.ts +10 -0
  61. package/src/uint8array-concat.ts +7 -3
  62. package/src/uint8array-from-base64.test.ts +2 -2
  63. package/src/uint8array-to-base64.test.ts +2 -2
  64. package/src/uint8array.test.ts +2 -2
  65. package/src/utf8-from-bytes.test.ts +43 -0
  66. package/src/utf8-from-bytes.ts +21 -0
  67. package/src/utf8.ts +20 -0
  68. package/tsconfig.tests.json +1 -1
package/src/blob.ts CHANGED
@@ -1,25 +1,172 @@
1
- import { Cid, RawCid, ifCid, validateCidString } from './cid.js'
1
+ import {
2
+ CheckCidOptions,
3
+ Cid,
4
+ RawCid,
5
+ ifCid,
6
+ parseCid,
7
+ validateCidString,
8
+ } from './cid.js'
2
9
  import { LexValue } from './lex.js'
3
10
  import { isPlainObject, isPlainProto } from './object.js'
4
11
 
12
+ /**
13
+ * Options to use with {@link ifCid}, {@link validateCidString}, and related CID
14
+ * validation functions when validating CIDs in BlobRefs, in strict mode. This
15
+ * ensures that the CID is a {@link RawCid} (CID v1, raw multicodec, sha256
16
+ * multihash), which is the expected format for blob references in the AT
17
+ * Protocol data model.
18
+ */
19
+ const STRICT_CID_CHECK_OPTIONS: CheckCidOptions = { flavor: 'raw' }
20
+
5
21
  // Number.isSafeInteger is actually safe to use with non-number values, so we
6
22
  // can use it as a type guard.
7
23
  const isSafeInteger = Number.isSafeInteger as (v: unknown) => v is number
8
24
 
25
+ /**
26
+ * Reference to binary data (like images, videos, etc.) in the AT Protocol data
27
+ * model.
28
+ *
29
+ * This type represents a reference to a blob of binary data, identified by its
30
+ * content hash (CID) and accompanied by metadata such as MIME type and size.
31
+ *
32
+ * The {@link BlobRef} type is a union of the current {@link TypedBlobRef}
33
+ * format and the legacy {@link LegacyBlobRef} format.
34
+ */
35
+ export type BlobRef<Ref extends Cid = Cid> = TypedBlobRef<Ref> | LegacyBlobRef
36
+
37
+ /**
38
+ * Options for validating a {@link BlobRef}.
39
+ */
40
+ export type BlobRefCheckOptions = {
41
+ /**
42
+ * If `false`, skips strict CID validation of {@link BlobRef.ref}, allowing
43
+ * any valid CID. Otherwise, validates that the CID is v1, uses the raw
44
+ * multicodec, and has a sha256 multihash.
45
+ *
46
+ * @default true
47
+ */
48
+ strict?: boolean
49
+ }
50
+
51
+ /**
52
+ * Type guard to check if a value is a valid {@link BlobRef}, which can be
53
+ * either a {@link TypedBlobRef} or a {@link LegacyBlobRef}. By default, strict
54
+ * CID validation is applied to ensure that the CID in the blob reference is in
55
+ * the expected format for the AT Protocol, but this can be relaxed with the
56
+ * `strict: false` option.
57
+ */
58
+ export function isBlobRef(input: unknown): input is BlobRef<RawCid>
59
+ export function isBlobRef<TOptions extends BlobRefCheckOptions>(
60
+ input: unknown,
61
+ options: TOptions,
62
+ ): input is LegacyBlobRef | InferTypedBlobRef<TOptions>
63
+ export function isBlobRef(
64
+ input: unknown,
65
+ options?: BlobRefCheckOptions,
66
+ ): input is BlobRef<RawCid>
67
+ export function isBlobRef(
68
+ input: unknown,
69
+ options?: BlobRefCheckOptions,
70
+ ): input is BlobRef {
71
+ return (input as any)?.$type === 'blob'
72
+ ? isTypedBlobRef(input, options)
73
+ : isLegacyBlobRef(input, options)
74
+ }
75
+
76
+ /**
77
+ * Extracts the MIME type from a {@link BlobRef}.
78
+ *
79
+ * @example
80
+ * ```ts
81
+ * const mimeType = getBlobMime(blobRef)
82
+ * console.log(mimeType) // e.g., 'image/jpeg'
83
+ * ```
84
+ */
85
+ export function getBlobMime(blob: BlobRef): string
86
+ export function getBlobMime(blob?: BlobRef): string | undefined
87
+ export function getBlobMime(blob?: BlobRef): string | undefined {
88
+ return blob?.mimeType
89
+ }
90
+
91
+ /**
92
+ * Extracts the size (in bytes) from a {@link TypedBlobRef}. For
93
+ * {@link LegacyBlobRef}, size information is not available, so this function
94
+ * returns `undefined` for legacy refs.
95
+ *
96
+ * @note The size property, in blob refs, cannot be 100% trusted since the PDS
97
+ * might not have a local copy of the blob (to check the size against) and might
98
+ * just be passing through the blob ref from the client without validating it.
99
+ * So, while this function can be useful for getting size information when
100
+ * available, it should not be solely relied upon for critical functionality
101
+ * without additional validation.
102
+ *
103
+ * @example
104
+ * ```ts
105
+ * const size = getBlobSize(blobRef)
106
+ * if (size !== undefined) {
107
+ * console.log(`Blob size: ${size} bytes`)
108
+ * } else {
109
+ * console.log('Size information not available for legacy blob ref')
110
+ * }
111
+ * ```
112
+ */
113
+ export function getBlobSize(blob: BlobRef): number | undefined {
114
+ if ('$type' in blob && blob.size >= 0) return blob.size
115
+ // LegacyBlobRef doesn't have size information
116
+ return undefined
117
+ }
118
+
119
+ /**
120
+ * Extracts the {@link Cid} from a {@link BlobRef}.
121
+ *
122
+ * @throws If the input input is a {@link LegacyBlobRef} with an invalid CID string
123
+ * @example
124
+ * ```ts
125
+ * const cid = getBlobCid(blobRef)
126
+ * console.log(cid.bytes)
127
+ * ```
128
+ */
129
+ export function getBlobCid(blob: BlobRef): Cid
130
+ export function getBlobCid(blob?: BlobRef): Cid | undefined
131
+ export function getBlobCid(blob?: BlobRef): Cid | undefined {
132
+ if (!blob) return undefined
133
+ return '$type' in blob ? blob.ref : parseCid(blob.cid)
134
+ }
135
+
136
+ /**
137
+ * Extracts the CID string from a {@link BlobRef}.
138
+ *
139
+ * This is similar to `getBlobCid(blob).toString()` but is more optimized since
140
+ * the CID string is already available in the legacy format and we can avoid
141
+ * parsing it into a CID object just to convert it back to a string.
142
+ *
143
+ * @example
144
+ * ```ts
145
+ * const cidString = getBlobCidString(blobRef)
146
+ * console.log(cidString)
147
+ * ```
148
+ */
149
+ export function getBlobCidString(blob: BlobRef): string
150
+ export function getBlobCidString(blob?: BlobRef): string | undefined
151
+ export function getBlobCidString(blob?: BlobRef): string | undefined {
152
+ if (!blob) return undefined
153
+ return '$type' in blob ? blob.ref.toString() : blob.cid
154
+ }
155
+
9
156
  /**
10
157
  * Reference to binary data (like images, videos, etc.) in the AT Protocol data model.
11
158
  *
12
- * A BlobRef is a {@link LexMap} with a specific structure that identifies binary
13
- * content by its content hash (CID), along with metadata about the content type
14
- * and size.
159
+ * A {@link TypedBlobRef} is a {@link LexMap} with a specific structure that
160
+ * identifies binary content by its content hash (CID), along with metadata
161
+ * about the content type and size.
15
162
  *
16
163
  * @typeParam Ref - The type of CID reference, defaults to any {@link Cid}
17
164
  *
18
165
  * @example
19
166
  * ```typescript
20
- * import type { BlobRef } from '@atproto/lex-data'
167
+ * import type { TypedBlobRef } from '@atproto/lex-data'
21
168
  *
22
- * const imageRef: BlobRef = {
169
+ * const imageRef: TypedBlobRef = {
23
170
  * $type: 'blob',
24
171
  * mimeType: 'image/jpeg',
25
172
  * ref: cid, // CID of the blob content
@@ -27,41 +174,27 @@ const isSafeInteger = Number.isSafeInteger as (v: unknown) => v is number
27
174
  * }
28
175
  * ```
29
176
  *
30
- * @see {@link isBlobRef} to check if a value is a valid BlobRef
177
+ * @see {@link isTypedBlobRef} to check if a value is a valid {@link TypedBlobRef}
31
178
  * @see {@link LegacyBlobRef} for the older blob reference format
32
179
  */
33
- export type BlobRef<Ref extends Cid = Cid> = {
180
+ export type TypedBlobRef<Ref extends Cid = Cid> = {
34
181
  $type: 'blob'
35
182
  mimeType: string
36
183
  ref: Ref
37
184
  size: number
38
185
  }
39
186
 
40
- /**
41
- * Options for validating a {@link BlobRef}.
42
- */
43
- export type BlobRefCheckOptions = {
44
- /**
45
- * If `false`, skips strict CID validation of {@link BlobRef.ref}, allowing
46
- * any valid CID. Otherwise, validates that the CID is v1, uses the raw
47
- * multicodec, and has a sha256 multihash.
48
- *
49
- * @default true
50
- */
51
- strict?: boolean
52
- }
53
-
54
187
  /**
55
188
  * Infers the BlobRef type based on the check options.
56
189
  *
57
190
  * @typeParam TOptions - The options used for checking
58
191
  */
59
- export type InferCheckedBlobRef<TOptions extends BlobRefCheckOptions> =
192
+ export type InferTypedBlobRef<TOptions extends BlobRefCheckOptions> =
60
193
  TOptions extends { strict: false }
61
- ? BlobRef
194
+ ? TypedBlobRef
62
195
  : { strict: boolean } extends TOptions
63
- ? BlobRef
64
- : BlobRef<RawCid>
196
+ ? TypedBlobRef
197
+ : TypedBlobRef<RawCid>
65
198
 
66
199
  /**
67
200
  * Type guard to check if a value is a valid {@link BlobRef}.
@@ -78,32 +211,32 @@ export type InferCheckedBlobRef<TOptions extends BlobRefCheckOptions> =
78
211
  *
79
212
  * @example
80
213
  * ```typescript
81
- * import { isBlobRef } from '@atproto/lex-data'
214
+ * import { isTypedBlobRef } from '@atproto/lex-data'
82
215
  *
83
- * if (isBlobRef(data)) {
216
+ * if (isTypedBlobRef(data)) {
84
217
  * console.log(data.mimeType) // e.g., 'image/jpeg'
85
218
  * console.log(data.size) // e.g., 12345
86
219
  * }
87
220
  *
88
221
  * // Allow any valid CID (not just raw CIDs)
89
- * if (isBlobRef(data, { strict: false })) {
222
+ * if (isTypedBlobRef(data, { strict: false })) {
90
223
  * // ...
91
224
  * }
92
225
  * ```
93
226
  */
94
- export function isBlobRef(input: unknown): input is BlobRef<RawCid>
95
- export function isBlobRef<TOptions extends BlobRefCheckOptions>(
227
+ export function isTypedBlobRef(input: unknown): input is TypedBlobRef<RawCid>
228
+ export function isTypedBlobRef<TOptions extends BlobRefCheckOptions>(
96
229
  input: unknown,
97
230
  options: TOptions,
98
- ): input is InferCheckedBlobRef<TOptions>
99
- export function isBlobRef(
231
+ ): input is InferTypedBlobRef<TOptions>
232
+ export function isTypedBlobRef(
100
233
  input: unknown,
101
234
  options?: BlobRefCheckOptions,
102
- ): input is BlobRef
103
- export function isBlobRef(
235
+ ): input is TypedBlobRef<RawCid>
236
+ export function isTypedBlobRef(
104
237
  input: unknown,
105
238
  options?: BlobRefCheckOptions,
106
- ): input is BlobRef {
239
+ ): input is TypedBlobRef {
107
240
  if (!isPlainObject(input)) {
108
241
  return false
109
242
  }
@@ -143,7 +276,7 @@ export function isBlobRef(
143
276
  const cid = ifCid(
144
277
  ref,
145
278
  // Strict unless explicitly disabled
146
- options?.strict === false ? undefined : { flavor: 'raw' },
279
+ options?.strict === false ? undefined : STRICT_CID_CHECK_OPTIONS,
147
280
  )
148
281
  if (!cid) {
149
282
  return false
@@ -185,9 +318,6 @@ export type LegacyBlobRef = {
185
318
  * - `mimeType` must be a non-empty string
186
319
  * - No additional properties allowed
187
320
  *
188
- * @param input - The value to check
189
- * @returns `true` if the input is a valid LegacyBlobRef
190
- *
191
321
  * @example
192
322
  * ```typescript
193
323
  * import { isLegacyBlobRef } from '@atproto/lex-data'
@@ -198,9 +328,12 @@ export type LegacyBlobRef = {
198
328
  * }
199
329
  * ```
200
330
  *
201
- * @see {@link isBlobRef} for checking the current blob reference format
331
+ * @see {@link isTypedBlobRef} for checking the current blob reference format
202
332
  */
203
- export function isLegacyBlobRef(input: unknown): input is LegacyBlobRef {
333
+ export function isLegacyBlobRef(
334
+ input: unknown,
335
+ options?: BlobRefCheckOptions,
336
+ ): input is LegacyBlobRef {
204
337
  if (!isPlainObject(input)) {
205
338
  return false
206
339
  }
@@ -220,7 +353,12 @@ export function isLegacyBlobRef(input: unknown): input is LegacyBlobRef {
220
353
  }
221
354
  }
222
355
 
223
- if (!validateCidString(cid)) {
356
+ if (
357
+ !validateCidString(
358
+ cid,
359
+ options?.strict === false ? undefined : STRICT_CID_CHECK_OPTIONS,
360
+ )
361
+ ) {
224
362
  return false
225
363
  }
226
364
 
@@ -247,10 +385,10 @@ export type EnumBlobRefsOptions = BlobRefCheckOptions & {
247
385
  */
248
386
  export type InferEnumBlobRefs<TOptions extends EnumBlobRefsOptions> =
249
387
  TOptions extends { allowLegacy: true }
250
- ? InferCheckedBlobRef<TOptions> | LegacyBlobRef
388
+ ? InferTypedBlobRef<TOptions> | LegacyBlobRef
251
389
  : { allowLegacy: boolean } extends TOptions
252
- ? InferCheckedBlobRef<TOptions> | LegacyBlobRef
253
- : InferCheckedBlobRef<TOptions>
390
+ ? InferTypedBlobRef<TOptions> | LegacyBlobRef
391
+ : InferTypedBlobRef<TOptions>
254
392
 
255
393
  /**
256
394
  * Generator that enumerates all {@link BlobRef}s (and, optionally,
@@ -280,8 +418,8 @@ export type InferEnumBlobRefs<TOptions extends EnumBlobRefsOptions> =
280
418
  * }
281
419
  *
282
420
  * // Include legacy blob references
283
- * for (const ref of enumBlobRefs(record, { allowLegacy: true })) {
284
- * // ref may be BlobRef or LegacyBlobRef
421
+ * for (const ref of enumBlobRefs(record, { allowLegacy: true, strict: false })) {
422
+ * // ref may be BlobRef or LegacyBlobRef, with relaxed CID validation
285
423
  * }
286
424
  * ```
287
425
  */
@@ -295,11 +433,11 @@ export function enumBlobRefs<TOptions extends EnumBlobRefsOptions>(
295
433
  export function enumBlobRefs(
296
434
  input: LexValue,
297
435
  options?: EnumBlobRefsOptions,
298
- ): Generator<BlobRef | LegacyBlobRef, void, unknown>
436
+ ): Generator<BlobRef, void, unknown>
299
437
  export function* enumBlobRefs(
300
438
  input: LexValue,
301
439
  options?: EnumBlobRefsOptions,
302
- ): Generator<BlobRef | LegacyBlobRef, void, unknown> {
440
+ ): Generator<BlobRef, void, unknown> {
303
441
  // LegacyBlobRef not included by default
304
442
  const includeLegacy = options?.allowLegacy === true
305
443
 
@@ -322,9 +460,9 @@ export function* enumBlobRefs(
322
460
  } else if (isPlainProto(value)) {
323
461
  if (visited.has(value)) continue
324
462
  visited.add(value)
325
- if (isBlobRef(value, options)) {
463
+ if (isTypedBlobRef(value, options)) {
326
464
  yield value
327
- } else if (includeLegacy && isLegacyBlobRef(value)) {
465
+ } else if (includeLegacy && isLegacyBlobRef(value, options)) {
328
466
  yield value
329
467
  } else {
330
468
  for (const v of Object.values(value)) {
@@ -58,13 +58,13 @@ describe(BytesCid, () => {
58
58
  it('throws an error for invalid CID bytes', () => {
59
59
  expect(
60
60
  () => new BytesCid(new Uint8Array([2, 0x55, 0x12, 3, 1, 2, 3])),
61
- ).toThrowError('Unsupported CID version')
62
- expect(() => new BytesCid(new Uint8Array([1, 0x55, 0x12]))).toThrowError(
61
+ ).toThrow('Unsupported CID version')
62
+ expect(() => new BytesCid(new Uint8Array([1, 0x55, 0x12]))).toThrow(
63
63
  'CID bytes are too short',
64
64
  )
65
65
  expect(
66
66
  () => new BytesCid(new Uint8Array([1, 0x55, 0x12, 4, 1, 2, 3])),
67
- ).toThrowError('CID bytes length mismatch')
67
+ ).toThrow('CID bytes length mismatch')
68
68
  })
69
69
  })
70
70
 
package/src/cid.ts CHANGED
@@ -87,6 +87,7 @@ declare module 'multiformats/cid' {
87
87
  * we update or swap out `multiformats`, `@atproto/lex-data` provides its own
88
88
  * stable {@link Cid} interface.
89
89
  */
90
+ // eslint-disable-next-line @typescript-eslint/no-empty-object-type
90
91
  interface CID {}
91
92
  }
92
93
 
@@ -0,0 +1,2 @@
1
+ declare module 'core-js/es/typed-array/from-base64.js'
2
+ declare module 'core-js/es/typed-array/to-base64.js'
@@ -1,5 +1,10 @@
1
1
  type Encoding = 'utf8' | 'base64' | 'base64url'
2
2
 
3
+ // Node's buffer module declares this type internally, but referencing it here
4
+ // would couple this file to @types/node. Local copy keeps this module
5
+ // standalone so it compiles in any environment (see tsconfig/isomorphic.json).
6
+ type WithImplicitCoercion<T> = T | { valueOf(): T }
7
+
3
8
  interface NodeJSBuffer<TArrayBuffer extends ArrayBufferLike = ArrayBufferLike>
4
9
  extends Uint8Array<TArrayBuffer> {
5
10
  byteLength: number
@@ -12,6 +17,11 @@ interface NodeJSBufferConstructor {
12
17
  input: Uint8Array | ArrayBuffer | ArrayBufferView,
13
18
  ): NodeJSBuffer<ArrayBuffer>
14
19
  from(input: string, encoding?: Encoding): NodeJSBuffer<ArrayBuffer>
20
+ from<TArrayBuffer extends ArrayBufferLike>(
21
+ arrayBuffer: WithImplicitCoercion<TArrayBuffer>,
22
+ byteOffset?: number,
23
+ length?: number,
24
+ ): Buffer<TArrayBuffer>
15
25
  concat(list: readonly Uint8Array[], totalLength?: number): NodeJSBuffer
16
26
  byteLength(input: string, encoding?: Encoding): number
17
27
  prototype: NodeJSBuffer
@@ -3,12 +3,16 @@ import { NodeJSBuffer } from './lib/nodejs-buffer.js'
3
3
  const Buffer = NodeJSBuffer
4
4
 
5
5
  export const ui8ConcatNode = Buffer
6
- ? function ui8ConcatNode(array: readonly Uint8Array[]): Uint8Array {
7
- return Buffer.concat(array)
6
+ ? function ui8ConcatNode(
7
+ array: readonly Uint8Array[],
8
+ ): Uint8Array<ArrayBuffer> {
9
+ return Buffer.concat(array) as Uint8Array<ArrayBuffer>
8
10
  }
9
11
  : /* v8 ignore next -- @preserve */ null
10
12
 
11
- export function ui8ConcatPonyfill(array: readonly Uint8Array[]): Uint8Array {
13
+ export function ui8ConcatPonyfill(
14
+ array: readonly Uint8Array[],
15
+ ): Uint8Array<ArrayBuffer> {
12
16
  let totalLength = 0
13
17
  for (const arr of array) totalLength += arr.length
14
18
  const result = new Uint8Array(totalLength)
@@ -1,5 +1,5 @@
1
- import 'core-js/modules/es.uint8-array.from-base64.js'
2
- import 'core-js/modules/es.uint8-array.to-base64.js'
1
+ import 'core-js/es/typed-array/from-base64.js'
2
+ import 'core-js/es/typed-array/to-base64.js'
3
3
 
4
4
  import { assert, describe, expect, it } from 'vitest'
5
5
  import {
@@ -1,5 +1,5 @@
1
- import 'core-js/modules/es.uint8-array.from-base64.js'
2
- import 'core-js/modules/es.uint8-array.to-base64.js'
1
+ import 'core-js/es/typed-array/from-base64.js'
2
+ import 'core-js/es/typed-array/to-base64.js'
3
3
  import { assert, describe, expect, it } from 'vitest'
4
4
  import {
5
5
  toBase64Native,
@@ -1,5 +1,5 @@
1
- import 'core-js/modules/es.uint8-array.from-base64.js'
2
- import 'core-js/modules/es.uint8-array.to-base64.js'
1
+ import 'core-js/es/typed-array/from-base64.js'
2
+ import 'core-js/es/typed-array/to-base64.js'
3
3
 
4
4
  import { describe, expect, it } from 'vitest'
5
5
  import {
@@ -0,0 +1,43 @@
1
+ import { assert, describe, expect, it } from 'vitest'
2
+ import { utf8FromBytesNative, utf8FromBytesNode } from './utf8-from-bytes.js'
3
+
4
+ for (const utf8FromBytes of [utf8FromBytesNode, utf8FromBytesNative] as const) {
5
+ assert(utf8FromBytes, 'utf8FromBytes implementation should not be null')
6
+ describe(utf8FromBytes, () => {
7
+ it('decodes empty Uint8Array', () => {
8
+ const decoded = utf8FromBytes(new Uint8Array(0))
9
+ expect(typeof decoded).toBe('string')
10
+ expect(decoded).toBe('')
11
+ })
12
+
13
+ it('decodes 10MB', () => {
14
+ const bytes = Buffer.allocUnsafe(10_000_000).fill('๐Ÿฉ')
15
+ const decoded = utf8FromBytes(bytes)
16
+ expect(decoded).toBe('๐Ÿฉ'.repeat(10_000_000 / 4))
17
+ })
18
+
19
+ for (const string of [
20
+ '',
21
+ '\0\0',
22
+ '\0\0\0',
23
+ '\0\0\0\0',
24
+ '__',
25
+ 'รฉ',
26
+ 'ร รง',
27
+ '\0รฉร รง',
28
+ '```\x1b',
29
+ 'aaa',
30
+ 'Hello, World!',
31
+ '๐Ÿ˜€๐Ÿ˜ƒ๐Ÿ˜„๐Ÿ˜๐Ÿ˜†๐Ÿ˜…๐Ÿ˜‚๐Ÿคฃ๐Ÿ˜Š๐Ÿ˜‡',
32
+ '๐Ÿ‘ฉโ€๐Ÿ’ป๐Ÿ‘จโ€๐Ÿ’ป๐Ÿ‘ฉโ€๐Ÿ”ฌ๐Ÿ‘จโ€๐Ÿ”ฌ๐Ÿ‘ฉโ€๐Ÿš€๐Ÿ‘จโ€๐Ÿš€',
33
+ '๐ŸŒ๐ŸŒŽ๐ŸŒ๐ŸŒ๐Ÿช๐ŸŒŸโœจโšก๐Ÿ”ฅ๐Ÿ’ง',
34
+ ] as const) {
35
+ const buffer = Buffer.from(string, 'utf8')
36
+
37
+ it(`decodes ${JSON.stringify(string)}`, () => {
38
+ const decoded = utf8FromBytes(buffer)
39
+ expect(decoded).toBe(string)
40
+ })
41
+ }
42
+ })
43
+ }
@@ -0,0 +1,21 @@
1
+ import { NodeJSBuffer } from './lib/nodejs-buffer.js'
2
+
3
+ const Buffer = NodeJSBuffer
4
+
5
+ export const utf8FromBytesNode = Buffer
6
+ ? function utf8FromBytesNode(bytes: Uint8Array): string {
7
+ // @NOTE Buffer.from(bytes) creates a copy of the ArrayBuffer. The following
8
+ // allows us to avoid the copy by creating a Buffer that shares the same
9
+ // memory as the input Uint8Array.
10
+ const buffer = Buffer.from(
11
+ bytes.buffer,
12
+ bytes.byteOffset,
13
+ bytes.byteLength,
14
+ )
15
+ return buffer.toString('utf8')
16
+ }
17
+ : /* v8 ignore next -- @preserve */ null
18
+
19
+ export function utf8FromBytesNative(bytes: Uint8Array): string {
20
+ return new TextDecoder('utf-8').decode(bytes)
21
+ }
package/src/utf8.ts CHANGED
@@ -3,10 +3,30 @@ import {
3
3
  utf8FromBase64Node,
4
4
  utf8FromBase64Ponyfill,
5
5
  } from './utf8-from-base64.js'
6
+ import { utf8FromBytesNative, utf8FromBytesNode } from './utf8-from-bytes.js'
6
7
  import { graphemeLenNative, graphemeLenPonyfill } from './utf8-grapheme-len.js'
7
8
  import { utf8LenCompute, utf8LenNode } from './utf8-len.js'
8
9
  import { utf8ToBase64Node, utf8ToBase64Ponyfill } from './utf8-to-base64.js'
9
10
 
11
+ /**
12
+ * Converts a Uint8Array to a UTF-8 string.
13
+ *
14
+ * Uses Node.js Buffer when available for performance, falling back to
15
+ * TextDecoder in environments without Buffer support.
16
+ *
17
+ * @param bytes - The binary data to decode
18
+ * @returns The decoded string (as UTF-16 JavaScript string)
19
+ *
20
+ * @example
21
+ * ```typescript
22
+ * import { utf8FromBytes } from '@atproto/lex-data'
23
+ *
24
+ * const bytes = new Uint8Array([72, 101, 108, 108, 111])
25
+ * utf8FromBytes(bytes) // 'Hello'
26
+ * ```
27
+ */
28
+ export const utf8FromBytes = utf8FromBytesNode ?? utf8FromBytesNative
29
+
10
30
  /**
11
31
  * Counts the number of grapheme clusters (user-perceived characters) in a string.
12
32
  *
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "extends": "../../../tsconfig/vitest.json",
3
- "include": ["./tests", "./src/**/*.test.ts"],
3
+ "include": ["./tests", "./src/**/*.test.ts", "./src/core-js.d.ts"],
4
4
  "compilerOptions": {
5
5
  "noImplicitAny": true,
6
6
  "rootDir": "./",