@atproto/lex-data 0.0.14 โ 0.1.0-next.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +28 -0
- package/dist/blob.d.ts +118 -39
- package/dist/blob.d.ts.map +1 -1
- package/dist/blob.js +68 -22
- package/dist/blob.js.map +1 -1
- package/dist/cid.d.ts.map +1 -1
- package/dist/cid.js +75 -79
- package/dist/cid.js.map +1 -1
- package/dist/index.js +8 -11
- package/dist/index.js.map +1 -1
- package/dist/lex-equals.js +9 -12
- package/dist/lex-equals.js.map +1 -1
- package/dist/lex-error.js +2 -7
- package/dist/lex-error.js.map +1 -1
- package/dist/lex.js +10 -17
- package/dist/lex.js.map +1 -1
- package/dist/lib/nodejs-buffer.d.ts +4 -0
- package/dist/lib/nodejs-buffer.d.ts.map +1 -1
- package/dist/lib/nodejs-buffer.js +1 -4
- package/dist/lib/nodejs-buffer.js.map +1 -1
- package/dist/lib/util.js +2 -6
- package/dist/lib/util.js.map +1 -1
- package/dist/object.js +3 -8
- package/dist/object.js.map +1 -1
- package/dist/uint8array-base64.js +1 -2
- package/dist/uint8array-concat.d.ts +2 -2
- package/dist/uint8array-concat.d.ts.map +1 -1
- package/dist/uint8array-concat.js +4 -8
- package/dist/uint8array-concat.js.map +1 -1
- package/dist/uint8array-from-base64.js +7 -11
- package/dist/uint8array-from-base64.js.map +1 -1
- package/dist/uint8array-to-base64.js +7 -11
- package/dist/uint8array-to-base64.js.map +1 -1
- package/dist/uint8array.d.ts +1 -1
- package/dist/uint8array.d.ts.map +1 -1
- package/dist/uint8array.js +17 -23
- package/dist/uint8array.js.map +1 -1
- package/dist/utf8-from-base64.js +6 -10
- package/dist/utf8-from-base64.js.map +1 -1
- package/dist/utf8-from-bytes.d.ts +3 -0
- package/dist/utf8-from-bytes.d.ts.map +1 -0
- package/dist/utf8-from-bytes.js +15 -0
- package/dist/utf8-from-bytes.js.map +1 -0
- package/dist/utf8-grapheme-len.js +4 -8
- package/dist/utf8-grapheme-len.js.map +1 -1
- package/dist/utf8-len.js +4 -8
- package/dist/utf8-len.js.map +1 -1
- package/dist/utf8-to-base64.js +8 -12
- package/dist/utf8-to-base64.js.map +1 -1
- package/dist/utf8.d.ts +18 -0
- package/dist/utf8.d.ts.map +1 -1
- package/dist/utf8.js +32 -16
- package/dist/utf8.js.map +1 -1
- package/package.json +7 -8
- package/src/blob.test.ts +38 -25
- package/src/blob.ts +190 -52
- package/src/cid-implementation.test.ts +3 -3
- package/src/cid.ts +1 -0
- package/src/core-js.d.ts +2 -0
- package/src/lib/nodejs-buffer.ts +10 -0
- package/src/uint8array-concat.ts +7 -3
- package/src/uint8array-from-base64.test.ts +2 -2
- package/src/uint8array-to-base64.test.ts +2 -2
- package/src/uint8array.test.ts +2 -2
- package/src/utf8-from-bytes.test.ts +43 -0
- package/src/utf8-from-bytes.ts +21 -0
- package/src/utf8.ts +20 -0
- package/tsconfig.tests.json +1 -1
package/src/blob.ts
CHANGED
|
@@ -1,25 +1,172 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import {
|
|
2
|
+
CheckCidOptions,
|
|
3
|
+
Cid,
|
|
4
|
+
RawCid,
|
|
5
|
+
ifCid,
|
|
6
|
+
parseCid,
|
|
7
|
+
validateCidString,
|
|
8
|
+
} from './cid.js'
|
|
2
9
|
import { LexValue } from './lex.js'
|
|
3
10
|
import { isPlainObject, isPlainProto } from './object.js'
|
|
4
11
|
|
|
12
|
+
/**
|
|
13
|
+
* Options to use with {@link ifCid}, {@link validateCidString}, and related CID
|
|
14
|
+
* validation functions when validating CIDs in BlobRefs, in strict mode. This
|
|
15
|
+
* ensures that the CID is a {@link RawCid} (CID v1, raw multicodec, sha256
|
|
16
|
+
* multihash), which is the expected format for blob references in the AT
|
|
17
|
+
* Protocol data model.
|
|
18
|
+
*/
|
|
19
|
+
const STRICT_CID_CHECK_OPTIONS: CheckCidOptions = { flavor: 'raw' }
|
|
20
|
+
|
|
5
21
|
// Number.isSafeInteger is actually safe to use with non-number values, so we
|
|
6
22
|
// can use it as a type guard.
|
|
7
23
|
const isSafeInteger = Number.isSafeInteger as (v: unknown) => v is number
|
|
8
24
|
|
|
25
|
+
/**
|
|
26
|
+
* Reference to binary data (like images, videos, etc.) in the AT Protocol data
|
|
27
|
+
* model.
|
|
28
|
+
*
|
|
29
|
+
* This type represents a reference to a blob of binary data, identified by its
|
|
30
|
+
* content hash (CID) and accompanied by metadata such as MIME type and size.
|
|
31
|
+
*
|
|
32
|
+
* The {@link BlobRef} type is a union of the current {@link TypedBlobRef}
|
|
33
|
+
* format and the legacy {@link LegacyBlobRef} format.
|
|
34
|
+
*/
|
|
35
|
+
export type BlobRef<Ref extends Cid = Cid> = TypedBlobRef<Ref> | LegacyBlobRef
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Options for validating a {@link BlobRef}.
|
|
39
|
+
*/
|
|
40
|
+
export type BlobRefCheckOptions = {
|
|
41
|
+
/**
|
|
42
|
+
* If `false`, skips strict CID validation of {@link BlobRef.ref}, allowing
|
|
43
|
+
* any valid CID. Otherwise, validates that the CID is v1, uses the raw
|
|
44
|
+
* multicodec, and has a sha256 multihash.
|
|
45
|
+
*
|
|
46
|
+
* @default true
|
|
47
|
+
*/
|
|
48
|
+
strict?: boolean
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Type guard to check if a value is a valid {@link BlobRef}, which can be
|
|
53
|
+
* either a {@link TypedBlobRef} or a {@link LegacyBlobRef}. By default, strict
|
|
54
|
+
* CID validation is applied to ensure that the CID in the blob reference is in
|
|
55
|
+
* the expected format for the AT Protocol, but this can be relaxed with the
|
|
56
|
+
* `strict: false` option.
|
|
57
|
+
*/
|
|
58
|
+
export function isBlobRef(input: unknown): input is BlobRef<RawCid>
|
|
59
|
+
export function isBlobRef<TOptions extends BlobRefCheckOptions>(
|
|
60
|
+
input: unknown,
|
|
61
|
+
options: TOptions,
|
|
62
|
+
): input is LegacyBlobRef | InferTypedBlobRef<TOptions>
|
|
63
|
+
export function isBlobRef(
|
|
64
|
+
input: unknown,
|
|
65
|
+
options?: BlobRefCheckOptions,
|
|
66
|
+
): input is BlobRef<RawCid>
|
|
67
|
+
export function isBlobRef(
|
|
68
|
+
input: unknown,
|
|
69
|
+
options?: BlobRefCheckOptions,
|
|
70
|
+
): input is BlobRef {
|
|
71
|
+
return (input as any)?.$type === 'blob'
|
|
72
|
+
? isTypedBlobRef(input, options)
|
|
73
|
+
: isLegacyBlobRef(input, options)
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Extracts the MIME type from a {@link BlobRef}.
|
|
78
|
+
*
|
|
79
|
+
* @example
|
|
80
|
+
* ```ts
|
|
81
|
+
* const mimeType = getBlobMime(blobRef)
|
|
82
|
+
* console.log(mimeType) // e.g., 'image/jpeg'
|
|
83
|
+
* ```
|
|
84
|
+
*/
|
|
85
|
+
export function getBlobMime(blob: BlobRef): string
|
|
86
|
+
export function getBlobMime(blob?: BlobRef): string | undefined
|
|
87
|
+
export function getBlobMime(blob?: BlobRef): string | undefined {
|
|
88
|
+
return blob?.mimeType
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Extracts the size (in bytes) from a {@link TypedBlobRef}. For
|
|
93
|
+
* {@link LegacyBlobRef}, size information is not available, so this function
|
|
94
|
+
* returns `undefined` for legacy refs.
|
|
95
|
+
*
|
|
96
|
+
* @note The size property, in blob refs, cannot be 100% trusted since the PDS
|
|
97
|
+
* might not have a local copy of the blob (to check the size against) and might
|
|
98
|
+
* just be passing through the blob ref from the client without validating it.
|
|
99
|
+
* So, while this function can be useful for getting size information when
|
|
100
|
+
* available, it should not be solely relied upon for critical functionality
|
|
101
|
+
* without additional validation.
|
|
102
|
+
*
|
|
103
|
+
* @example
|
|
104
|
+
* ```ts
|
|
105
|
+
* const size = getBlobSize(blobRef)
|
|
106
|
+
* if (size !== undefined) {
|
|
107
|
+
* console.log(`Blob size: ${size} bytes`)
|
|
108
|
+
* } else {
|
|
109
|
+
* console.log('Size information not available for legacy blob ref')
|
|
110
|
+
* }
|
|
111
|
+
* ```
|
|
112
|
+
*/
|
|
113
|
+
export function getBlobSize(blob: BlobRef): number | undefined {
|
|
114
|
+
if ('$type' in blob && blob.size >= 0) return blob.size
|
|
115
|
+
// LegacyBlobRef doesn't have size information
|
|
116
|
+
return undefined
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Extracts the {@link Cid} from a {@link BlobRef}.
|
|
121
|
+
*
|
|
122
|
+
* @throws If the input input is a {@link LegacyBlobRef} with an invalid CID string
|
|
123
|
+
* @example
|
|
124
|
+
* ```ts
|
|
125
|
+
* const cid = getBlobCid(blobRef)
|
|
126
|
+
* console.log(cid.bytes)
|
|
127
|
+
* ```
|
|
128
|
+
*/
|
|
129
|
+
export function getBlobCid(blob: BlobRef): Cid
|
|
130
|
+
export function getBlobCid(blob?: BlobRef): Cid | undefined
|
|
131
|
+
export function getBlobCid(blob?: BlobRef): Cid | undefined {
|
|
132
|
+
if (!blob) return undefined
|
|
133
|
+
return '$type' in blob ? blob.ref : parseCid(blob.cid)
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Extracts the CID string from a {@link BlobRef}.
|
|
138
|
+
*
|
|
139
|
+
* This is similar to `getBlobCid(blob).toString()` but is more optimized since
|
|
140
|
+
* the CID string is already available in the legacy format and we can avoid
|
|
141
|
+
* parsing it into a CID object just to convert it back to a string.
|
|
142
|
+
*
|
|
143
|
+
* @example
|
|
144
|
+
* ```ts
|
|
145
|
+
* const cidString = getBlobCidString(blobRef)
|
|
146
|
+
* console.log(cidString)
|
|
147
|
+
* ```
|
|
148
|
+
*/
|
|
149
|
+
export function getBlobCidString(blob: BlobRef): string
|
|
150
|
+
export function getBlobCidString(blob?: BlobRef): string | undefined
|
|
151
|
+
export function getBlobCidString(blob?: BlobRef): string | undefined {
|
|
152
|
+
if (!blob) return undefined
|
|
153
|
+
return '$type' in blob ? blob.ref.toString() : blob.cid
|
|
154
|
+
}
|
|
155
|
+
|
|
9
156
|
/**
|
|
10
157
|
* Reference to binary data (like images, videos, etc.) in the AT Protocol data model.
|
|
11
158
|
*
|
|
12
|
-
* A
|
|
13
|
-
* content by its content hash (CID), along with metadata
|
|
14
|
-
* and size.
|
|
159
|
+
* A {@link TypedBlobRef} is a {@link LexMap} with a specific structure that
|
|
160
|
+
* identifies binary content by its content hash (CID), along with metadata
|
|
161
|
+
* about the content type and size.
|
|
15
162
|
*
|
|
16
163
|
* @typeParam Ref - The type of CID reference, defaults to any {@link Cid}
|
|
17
164
|
*
|
|
18
165
|
* @example
|
|
19
166
|
* ```typescript
|
|
20
|
-
* import type {
|
|
167
|
+
* import type { TypedBlobRef } from '@atproto/lex-data'
|
|
21
168
|
*
|
|
22
|
-
* const imageRef:
|
|
169
|
+
* const imageRef: TypedBlobRef = {
|
|
23
170
|
* $type: 'blob',
|
|
24
171
|
* mimeType: 'image/jpeg',
|
|
25
172
|
* ref: cid, // CID of the blob content
|
|
@@ -27,41 +174,27 @@ const isSafeInteger = Number.isSafeInteger as (v: unknown) => v is number
|
|
|
27
174
|
* }
|
|
28
175
|
* ```
|
|
29
176
|
*
|
|
30
|
-
* @see {@link
|
|
177
|
+
* @see {@link isTypedBlobRef} to check if a value is a valid {@link TypedBlobRef}
|
|
31
178
|
* @see {@link LegacyBlobRef} for the older blob reference format
|
|
32
179
|
*/
|
|
33
|
-
export type
|
|
180
|
+
export type TypedBlobRef<Ref extends Cid = Cid> = {
|
|
34
181
|
$type: 'blob'
|
|
35
182
|
mimeType: string
|
|
36
183
|
ref: Ref
|
|
37
184
|
size: number
|
|
38
185
|
}
|
|
39
186
|
|
|
40
|
-
/**
|
|
41
|
-
* Options for validating a {@link BlobRef}.
|
|
42
|
-
*/
|
|
43
|
-
export type BlobRefCheckOptions = {
|
|
44
|
-
/**
|
|
45
|
-
* If `false`, skips strict CID validation of {@link BlobRef.ref}, allowing
|
|
46
|
-
* any valid CID. Otherwise, validates that the CID is v1, uses the raw
|
|
47
|
-
* multicodec, and has a sha256 multihash.
|
|
48
|
-
*
|
|
49
|
-
* @default true
|
|
50
|
-
*/
|
|
51
|
-
strict?: boolean
|
|
52
|
-
}
|
|
53
|
-
|
|
54
187
|
/**
|
|
55
188
|
* Infers the BlobRef type based on the check options.
|
|
56
189
|
*
|
|
57
190
|
* @typeParam TOptions - The options used for checking
|
|
58
191
|
*/
|
|
59
|
-
export type
|
|
192
|
+
export type InferTypedBlobRef<TOptions extends BlobRefCheckOptions> =
|
|
60
193
|
TOptions extends { strict: false }
|
|
61
|
-
?
|
|
194
|
+
? TypedBlobRef
|
|
62
195
|
: { strict: boolean } extends TOptions
|
|
63
|
-
?
|
|
64
|
-
:
|
|
196
|
+
? TypedBlobRef
|
|
197
|
+
: TypedBlobRef<RawCid>
|
|
65
198
|
|
|
66
199
|
/**
|
|
67
200
|
* Type guard to check if a value is a valid {@link BlobRef}.
|
|
@@ -78,32 +211,32 @@ export type InferCheckedBlobRef<TOptions extends BlobRefCheckOptions> =
|
|
|
78
211
|
*
|
|
79
212
|
* @example
|
|
80
213
|
* ```typescript
|
|
81
|
-
* import {
|
|
214
|
+
* import { isTypedBlobRef } from '@atproto/lex-data'
|
|
82
215
|
*
|
|
83
|
-
* if (
|
|
216
|
+
* if (isTypedBlobRef(data)) {
|
|
84
217
|
* console.log(data.mimeType) // e.g., 'image/jpeg'
|
|
85
218
|
* console.log(data.size) // e.g., 12345
|
|
86
219
|
* }
|
|
87
220
|
*
|
|
88
221
|
* // Allow any valid CID (not just raw CIDs)
|
|
89
|
-
* if (
|
|
222
|
+
* if (isTypedBlobRef(data, { strict: false })) {
|
|
90
223
|
* // ...
|
|
91
224
|
* }
|
|
92
225
|
* ```
|
|
93
226
|
*/
|
|
94
|
-
export function
|
|
95
|
-
export function
|
|
227
|
+
export function isTypedBlobRef(input: unknown): input is TypedBlobRef<RawCid>
|
|
228
|
+
export function isTypedBlobRef<TOptions extends BlobRefCheckOptions>(
|
|
96
229
|
input: unknown,
|
|
97
230
|
options: TOptions,
|
|
98
|
-
): input is
|
|
99
|
-
export function
|
|
231
|
+
): input is InferTypedBlobRef<TOptions>
|
|
232
|
+
export function isTypedBlobRef(
|
|
100
233
|
input: unknown,
|
|
101
234
|
options?: BlobRefCheckOptions,
|
|
102
|
-
): input is
|
|
103
|
-
export function
|
|
235
|
+
): input is TypedBlobRef<RawCid>
|
|
236
|
+
export function isTypedBlobRef(
|
|
104
237
|
input: unknown,
|
|
105
238
|
options?: BlobRefCheckOptions,
|
|
106
|
-
): input is
|
|
239
|
+
): input is TypedBlobRef {
|
|
107
240
|
if (!isPlainObject(input)) {
|
|
108
241
|
return false
|
|
109
242
|
}
|
|
@@ -143,7 +276,7 @@ export function isBlobRef(
|
|
|
143
276
|
const cid = ifCid(
|
|
144
277
|
ref,
|
|
145
278
|
// Strict unless explicitly disabled
|
|
146
|
-
options?.strict === false ? undefined :
|
|
279
|
+
options?.strict === false ? undefined : STRICT_CID_CHECK_OPTIONS,
|
|
147
280
|
)
|
|
148
281
|
if (!cid) {
|
|
149
282
|
return false
|
|
@@ -185,9 +318,6 @@ export type LegacyBlobRef = {
|
|
|
185
318
|
* - `mimeType` must be a non-empty string
|
|
186
319
|
* - No additional properties allowed
|
|
187
320
|
*
|
|
188
|
-
* @param input - The value to check
|
|
189
|
-
* @returns `true` if the input is a valid LegacyBlobRef
|
|
190
|
-
*
|
|
191
321
|
* @example
|
|
192
322
|
* ```typescript
|
|
193
323
|
* import { isLegacyBlobRef } from '@atproto/lex-data'
|
|
@@ -198,9 +328,12 @@ export type LegacyBlobRef = {
|
|
|
198
328
|
* }
|
|
199
329
|
* ```
|
|
200
330
|
*
|
|
201
|
-
* @see {@link
|
|
331
|
+
* @see {@link isTypedBlobRef} for checking the current blob reference format
|
|
202
332
|
*/
|
|
203
|
-
export function isLegacyBlobRef(
|
|
333
|
+
export function isLegacyBlobRef(
|
|
334
|
+
input: unknown,
|
|
335
|
+
options?: BlobRefCheckOptions,
|
|
336
|
+
): input is LegacyBlobRef {
|
|
204
337
|
if (!isPlainObject(input)) {
|
|
205
338
|
return false
|
|
206
339
|
}
|
|
@@ -220,7 +353,12 @@ export function isLegacyBlobRef(input: unknown): input is LegacyBlobRef {
|
|
|
220
353
|
}
|
|
221
354
|
}
|
|
222
355
|
|
|
223
|
-
if (
|
|
356
|
+
if (
|
|
357
|
+
!validateCidString(
|
|
358
|
+
cid,
|
|
359
|
+
options?.strict === false ? undefined : STRICT_CID_CHECK_OPTIONS,
|
|
360
|
+
)
|
|
361
|
+
) {
|
|
224
362
|
return false
|
|
225
363
|
}
|
|
226
364
|
|
|
@@ -247,10 +385,10 @@ export type EnumBlobRefsOptions = BlobRefCheckOptions & {
|
|
|
247
385
|
*/
|
|
248
386
|
export type InferEnumBlobRefs<TOptions extends EnumBlobRefsOptions> =
|
|
249
387
|
TOptions extends { allowLegacy: true }
|
|
250
|
-
?
|
|
388
|
+
? InferTypedBlobRef<TOptions> | LegacyBlobRef
|
|
251
389
|
: { allowLegacy: boolean } extends TOptions
|
|
252
|
-
?
|
|
253
|
-
:
|
|
390
|
+
? InferTypedBlobRef<TOptions> | LegacyBlobRef
|
|
391
|
+
: InferTypedBlobRef<TOptions>
|
|
254
392
|
|
|
255
393
|
/**
|
|
256
394
|
* Generator that enumerates all {@link BlobRef}s (and, optionally,
|
|
@@ -280,8 +418,8 @@ export type InferEnumBlobRefs<TOptions extends EnumBlobRefsOptions> =
|
|
|
280
418
|
* }
|
|
281
419
|
*
|
|
282
420
|
* // Include legacy blob references
|
|
283
|
-
* for (const ref of enumBlobRefs(record, { allowLegacy: true })) {
|
|
284
|
-
* // ref may be BlobRef or LegacyBlobRef
|
|
421
|
+
* for (const ref of enumBlobRefs(record, { allowLegacy: true, strict: false })) {
|
|
422
|
+
* // ref may be BlobRef or LegacyBlobRef, with relaxed CID validation
|
|
285
423
|
* }
|
|
286
424
|
* ```
|
|
287
425
|
*/
|
|
@@ -295,11 +433,11 @@ export function enumBlobRefs<TOptions extends EnumBlobRefsOptions>(
|
|
|
295
433
|
export function enumBlobRefs(
|
|
296
434
|
input: LexValue,
|
|
297
435
|
options?: EnumBlobRefsOptions,
|
|
298
|
-
): Generator<BlobRef
|
|
436
|
+
): Generator<BlobRef, void, unknown>
|
|
299
437
|
export function* enumBlobRefs(
|
|
300
438
|
input: LexValue,
|
|
301
439
|
options?: EnumBlobRefsOptions,
|
|
302
|
-
): Generator<BlobRef
|
|
440
|
+
): Generator<BlobRef, void, unknown> {
|
|
303
441
|
// LegacyBlobRef not included by default
|
|
304
442
|
const includeLegacy = options?.allowLegacy === true
|
|
305
443
|
|
|
@@ -322,9 +460,9 @@ export function* enumBlobRefs(
|
|
|
322
460
|
} else if (isPlainProto(value)) {
|
|
323
461
|
if (visited.has(value)) continue
|
|
324
462
|
visited.add(value)
|
|
325
|
-
if (
|
|
463
|
+
if (isTypedBlobRef(value, options)) {
|
|
326
464
|
yield value
|
|
327
|
-
} else if (includeLegacy && isLegacyBlobRef(value)) {
|
|
465
|
+
} else if (includeLegacy && isLegacyBlobRef(value, options)) {
|
|
328
466
|
yield value
|
|
329
467
|
} else {
|
|
330
468
|
for (const v of Object.values(value)) {
|
|
@@ -58,13 +58,13 @@ describe(BytesCid, () => {
|
|
|
58
58
|
it('throws an error for invalid CID bytes', () => {
|
|
59
59
|
expect(
|
|
60
60
|
() => new BytesCid(new Uint8Array([2, 0x55, 0x12, 3, 1, 2, 3])),
|
|
61
|
-
).
|
|
62
|
-
expect(() => new BytesCid(new Uint8Array([1, 0x55, 0x12]))).
|
|
61
|
+
).toThrow('Unsupported CID version')
|
|
62
|
+
expect(() => new BytesCid(new Uint8Array([1, 0x55, 0x12]))).toThrow(
|
|
63
63
|
'CID bytes are too short',
|
|
64
64
|
)
|
|
65
65
|
expect(
|
|
66
66
|
() => new BytesCid(new Uint8Array([1, 0x55, 0x12, 4, 1, 2, 3])),
|
|
67
|
-
).
|
|
67
|
+
).toThrow('CID bytes length mismatch')
|
|
68
68
|
})
|
|
69
69
|
})
|
|
70
70
|
|
package/src/cid.ts
CHANGED
package/src/core-js.d.ts
ADDED
package/src/lib/nodejs-buffer.ts
CHANGED
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
type Encoding = 'utf8' | 'base64' | 'base64url'
|
|
2
2
|
|
|
3
|
+
// Node's buffer module declares this type internally, but referencing it here
|
|
4
|
+
// would couple this file to @types/node. Local copy keeps this module
|
|
5
|
+
// standalone so it compiles in any environment (see tsconfig/isomorphic.json).
|
|
6
|
+
type WithImplicitCoercion<T> = T | { valueOf(): T }
|
|
7
|
+
|
|
3
8
|
interface NodeJSBuffer<TArrayBuffer extends ArrayBufferLike = ArrayBufferLike>
|
|
4
9
|
extends Uint8Array<TArrayBuffer> {
|
|
5
10
|
byteLength: number
|
|
@@ -12,6 +17,11 @@ interface NodeJSBufferConstructor {
|
|
|
12
17
|
input: Uint8Array | ArrayBuffer | ArrayBufferView,
|
|
13
18
|
): NodeJSBuffer<ArrayBuffer>
|
|
14
19
|
from(input: string, encoding?: Encoding): NodeJSBuffer<ArrayBuffer>
|
|
20
|
+
from<TArrayBuffer extends ArrayBufferLike>(
|
|
21
|
+
arrayBuffer: WithImplicitCoercion<TArrayBuffer>,
|
|
22
|
+
byteOffset?: number,
|
|
23
|
+
length?: number,
|
|
24
|
+
): Buffer<TArrayBuffer>
|
|
15
25
|
concat(list: readonly Uint8Array[], totalLength?: number): NodeJSBuffer
|
|
16
26
|
byteLength(input: string, encoding?: Encoding): number
|
|
17
27
|
prototype: NodeJSBuffer
|
package/src/uint8array-concat.ts
CHANGED
|
@@ -3,12 +3,16 @@ import { NodeJSBuffer } from './lib/nodejs-buffer.js'
|
|
|
3
3
|
const Buffer = NodeJSBuffer
|
|
4
4
|
|
|
5
5
|
export const ui8ConcatNode = Buffer
|
|
6
|
-
? function ui8ConcatNode(
|
|
7
|
-
|
|
6
|
+
? function ui8ConcatNode(
|
|
7
|
+
array: readonly Uint8Array[],
|
|
8
|
+
): Uint8Array<ArrayBuffer> {
|
|
9
|
+
return Buffer.concat(array) as Uint8Array<ArrayBuffer>
|
|
8
10
|
}
|
|
9
11
|
: /* v8 ignore next -- @preserve */ null
|
|
10
12
|
|
|
11
|
-
export function ui8ConcatPonyfill(
|
|
13
|
+
export function ui8ConcatPonyfill(
|
|
14
|
+
array: readonly Uint8Array[],
|
|
15
|
+
): Uint8Array<ArrayBuffer> {
|
|
12
16
|
let totalLength = 0
|
|
13
17
|
for (const arr of array) totalLength += arr.length
|
|
14
18
|
const result = new Uint8Array(totalLength)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import 'core-js/
|
|
2
|
-
import 'core-js/
|
|
1
|
+
import 'core-js/es/typed-array/from-base64.js'
|
|
2
|
+
import 'core-js/es/typed-array/to-base64.js'
|
|
3
3
|
|
|
4
4
|
import { assert, describe, expect, it } from 'vitest'
|
|
5
5
|
import {
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import 'core-js/
|
|
2
|
-
import 'core-js/
|
|
1
|
+
import 'core-js/es/typed-array/from-base64.js'
|
|
2
|
+
import 'core-js/es/typed-array/to-base64.js'
|
|
3
3
|
import { assert, describe, expect, it } from 'vitest'
|
|
4
4
|
import {
|
|
5
5
|
toBase64Native,
|
package/src/uint8array.test.ts
CHANGED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import { assert, describe, expect, it } from 'vitest'
|
|
2
|
+
import { utf8FromBytesNative, utf8FromBytesNode } from './utf8-from-bytes.js'
|
|
3
|
+
|
|
4
|
+
for (const utf8FromBytes of [utf8FromBytesNode, utf8FromBytesNative] as const) {
|
|
5
|
+
assert(utf8FromBytes, 'utf8FromBytes implementation should not be null')
|
|
6
|
+
describe(utf8FromBytes, () => {
|
|
7
|
+
it('decodes empty Uint8Array', () => {
|
|
8
|
+
const decoded = utf8FromBytes(new Uint8Array(0))
|
|
9
|
+
expect(typeof decoded).toBe('string')
|
|
10
|
+
expect(decoded).toBe('')
|
|
11
|
+
})
|
|
12
|
+
|
|
13
|
+
it('decodes 10MB', () => {
|
|
14
|
+
const bytes = Buffer.allocUnsafe(10_000_000).fill('๐ฉ')
|
|
15
|
+
const decoded = utf8FromBytes(bytes)
|
|
16
|
+
expect(decoded).toBe('๐ฉ'.repeat(10_000_000 / 4))
|
|
17
|
+
})
|
|
18
|
+
|
|
19
|
+
for (const string of [
|
|
20
|
+
'',
|
|
21
|
+
'\0\0',
|
|
22
|
+
'\0\0\0',
|
|
23
|
+
'\0\0\0\0',
|
|
24
|
+
'__',
|
|
25
|
+
'รฉ',
|
|
26
|
+
'ร รง',
|
|
27
|
+
'\0รฉร รง',
|
|
28
|
+
'```\x1b',
|
|
29
|
+
'aaa',
|
|
30
|
+
'Hello, World!',
|
|
31
|
+
'๐๐๐๐๐๐
๐๐คฃ๐๐',
|
|
32
|
+
'๐ฉโ๐ป๐จโ๐ป๐ฉโ๐ฌ๐จโ๐ฌ๐ฉโ๐๐จโ๐',
|
|
33
|
+
'๐๐๐๐๐ช๐โจโก๐ฅ๐ง',
|
|
34
|
+
] as const) {
|
|
35
|
+
const buffer = Buffer.from(string, 'utf8')
|
|
36
|
+
|
|
37
|
+
it(`decodes ${JSON.stringify(string)}`, () => {
|
|
38
|
+
const decoded = utf8FromBytes(buffer)
|
|
39
|
+
expect(decoded).toBe(string)
|
|
40
|
+
})
|
|
41
|
+
}
|
|
42
|
+
})
|
|
43
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { NodeJSBuffer } from './lib/nodejs-buffer.js'
|
|
2
|
+
|
|
3
|
+
const Buffer = NodeJSBuffer
|
|
4
|
+
|
|
5
|
+
export const utf8FromBytesNode = Buffer
|
|
6
|
+
? function utf8FromBytesNode(bytes: Uint8Array): string {
|
|
7
|
+
// @NOTE Buffer.from(bytes) creates a copy of the ArrayBuffer. The following
|
|
8
|
+
// allows us to avoid the copy by creating a Buffer that shares the same
|
|
9
|
+
// memory as the input Uint8Array.
|
|
10
|
+
const buffer = Buffer.from(
|
|
11
|
+
bytes.buffer,
|
|
12
|
+
bytes.byteOffset,
|
|
13
|
+
bytes.byteLength,
|
|
14
|
+
)
|
|
15
|
+
return buffer.toString('utf8')
|
|
16
|
+
}
|
|
17
|
+
: /* v8 ignore next -- @preserve */ null
|
|
18
|
+
|
|
19
|
+
export function utf8FromBytesNative(bytes: Uint8Array): string {
|
|
20
|
+
return new TextDecoder('utf-8').decode(bytes)
|
|
21
|
+
}
|
package/src/utf8.ts
CHANGED
|
@@ -3,10 +3,30 @@ import {
|
|
|
3
3
|
utf8FromBase64Node,
|
|
4
4
|
utf8FromBase64Ponyfill,
|
|
5
5
|
} from './utf8-from-base64.js'
|
|
6
|
+
import { utf8FromBytesNative, utf8FromBytesNode } from './utf8-from-bytes.js'
|
|
6
7
|
import { graphemeLenNative, graphemeLenPonyfill } from './utf8-grapheme-len.js'
|
|
7
8
|
import { utf8LenCompute, utf8LenNode } from './utf8-len.js'
|
|
8
9
|
import { utf8ToBase64Node, utf8ToBase64Ponyfill } from './utf8-to-base64.js'
|
|
9
10
|
|
|
11
|
+
/**
|
|
12
|
+
* Converts a Uint8Array to a UTF-8 string.
|
|
13
|
+
*
|
|
14
|
+
* Uses Node.js Buffer when available for performance, falling back to
|
|
15
|
+
* TextDecoder in environments without Buffer support.
|
|
16
|
+
*
|
|
17
|
+
* @param bytes - The binary data to decode
|
|
18
|
+
* @returns The decoded string (as UTF-16 JavaScript string)
|
|
19
|
+
*
|
|
20
|
+
* @example
|
|
21
|
+
* ```typescript
|
|
22
|
+
* import { utf8FromBytes } from '@atproto/lex-data'
|
|
23
|
+
*
|
|
24
|
+
* const bytes = new Uint8Array([72, 101, 108, 108, 111])
|
|
25
|
+
* utf8FromBytes(bytes) // 'Hello'
|
|
26
|
+
* ```
|
|
27
|
+
*/
|
|
28
|
+
export const utf8FromBytes = utf8FromBytesNode ?? utf8FromBytesNative
|
|
29
|
+
|
|
10
30
|
/**
|
|
11
31
|
* Counts the number of grapheme clusters (user-perceived characters) in a string.
|
|
12
32
|
*
|
package/tsconfig.tests.json
CHANGED