@atproto/lex-data 0.0.13 โ†’ 0.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/utf8.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"utf8.js","sourceRoot":"","sources":["../src/utf8.ts"],"names":[],"mappings":";;;AACA,+DAG8B;AAC9B,iEAA+E;AAC/E,+CAA2D;AAC3D,2DAA4E;AAE5E;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACU,QAAA,WAAW;AACtB,iCAAiC,CAAC,wCAAiB,IAAI,0CAAmB,CAAA;AAE5E,iCAAiC;AACjC,IAAI,mBAAW,KAAK,0CAAmB,EAAE,CAAC;IACxC,aAAa;IACb,OAAO,CAAC,IAAI,CACV,oHAAoH,CACrH,CAAA;AACH,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;GAqBG;AACU,QAAA,OAAO;AAClB,iCAAiC,CAAC,yBAAW,IAAI,4BAAc,CAAA;AAEjE;;;;;;;;;;;;;;;GAeG;AACU,QAAA,YAAY;AACvB,iCAAiC,CAAC,oCAAgB,IAAI,wCAAoB,CAAA;AAE5E;;;;;;;;;;;;;;;GAeG;AACU,QAAA,cAAc;AAIzB,iCAAiC,CAAC,wCAAkB,IAAI,4CAAsB,CAAA","sourcesContent":["import { Base64Alphabet } from './uint8array.js'\nimport {\n utf8FromBase64Node,\n utf8FromBase64Ponyfill,\n} from './utf8-from-base64.js'\nimport { graphemeLenNative, graphemeLenPonyfill } from './utf8-grapheme-len.js'\nimport { utf8LenCompute, utf8LenNode } from './utf8-len.js'\nimport { utf8ToBase64Node, utf8ToBase64Ponyfill } from './utf8-to-base64.js'\n\n/**\n * Counts the number of grapheme clusters (user-perceived characters) in a string.\n *\n * Grapheme clusters represent what users typically think of as \"characters\",\n * handling complex cases like:\n * - Emoji with skin tones and ZWJ sequences (e.g., family emoji)\n * - Combined characters (e.g., 'e' + combining accent)\n * - Regional indicator pairs (flag emoji)\n *\n * Uses native {@link Intl.Segmenter} when available, falling back to a ponyfill.\n *\n * @param str - The string to measure\n * @returns The number of grapheme clusters\n *\n * @example\n * ```typescript\n * import { graphemeLen } from '@atproto/lex-data'\n *\n * graphemeLen('hello') // 5\n * graphemeLen('cafe\\u0301') // 4 (cafe with combining accent)\n * graphemeLen('\\u{1F468}\\u{200D}\\u{1F469}\\u{200D}\\u{1F467}\\u{200D}\\u{1F466}') // 1 (family emoji)\n * ```\n */\nexport const graphemeLen: (str: string) => number =\n /* v8 ignore next -- @preserve */ graphemeLenNative ?? graphemeLenPonyfill\n\n/* v8 ignore next -- @preserve */\nif (graphemeLen === graphemeLenPonyfill) {\n /*#__PURE__*/\n console.warn(\n '[@atproto/lex-data]: Intl.Segmenter is not available in this environment. Falling back to ponyfill implementation.',\n )\n}\n\n/**\n * Calculates the UTF-8 byte length of a string.\n *\n * Returns the number of bytes the string would occupy when encoded as UTF-8.\n * This is important for Lexicon validation where schemas specify byte limits.\n *\n * Uses Node.js Buffer.byteLength when available for performance,\n * falling back to a computed implementation.\n *\n * @param str - The string to measure\n * @returns The UTF-8 byte length\n *\n * @example\n * ```typescript\n * import { utf8Len } from '@atproto/lex-data'\n *\n * utf8Len('hello') // 5 (ASCII: 1 byte per char)\n * utf8Len('\\u00e9') // 2 (e with accent: 2 bytes)\n * utf8Len('\\u{1F600}') // 4 (emoji: 4 bytes)\n * utf8Len('\\u{1F468}\\u{200D}\\u{1F469}\\u{200D}\\u{1F467}\\u{200D}\\u{1F466}') // 25 (family emoji)\n * ```\n */\nexport const utf8Len: (string: string) => number =\n /* v8 ignore next -- @preserve */ utf8LenNode ?? utf8LenCompute\n\n/**\n * Encodes a UTF-8 string to base64.\n *\n * First encodes the string as UTF-8 bytes, then encodes those bytes as base64.\n *\n * @param str - The string to encode\n * @param alphabet - The base64 alphabet to use ('base64' or 'base64url')\n * @returns The base64-encoded string\n *\n * @example\n * ```typescript\n * import { utf8ToBase64 } from '@atproto/lex-data'\n *\n * utf8ToBase64('Hello') // 'SGVsbG8='\n * ```\n */\nexport const utf8ToBase64: (str: string, alphabet?: Base64Alphabet) => string =\n /* v8 ignore next -- @preserve */ utf8ToBase64Node ?? utf8ToBase64Ponyfill\n\n/**\n * Decodes a base64 string to UTF-8.\n *\n * Decodes the base64 to bytes, then interprets those bytes as UTF-8 text.\n *\n * @param b64 - The base64 string to decode\n * @param alphabet - The base64 alphabet to use ('base64' or 'base64url')\n * @returns The decoded UTF-8 string\n *\n * @example\n * ```typescript\n * import { utf8FromBase64 } from '@atproto/lex-data'\n *\n * utf8FromBase64('SGVsbG8=') // 'Hello'\n * ```\n */\nexport const utf8FromBase64: (\n b64: string,\n alphabet?: Base64Alphabet,\n) => string =\n /* v8 ignore next -- @preserve */ utf8FromBase64Node ?? utf8FromBase64Ponyfill\n"]}
1
+ {"version":3,"file":"utf8.js","sourceRoot":"","sources":["../src/utf8.ts"],"names":[],"mappings":";;;AACA,+DAG8B;AAC9B,6DAA6E;AAC7E,iEAA+E;AAC/E,+CAA2D;AAC3D,2DAA4E;AAE5E;;;;;;;;;;;;;;;;GAgBG;AACU,QAAA,aAAa,GAAG,sCAAiB,IAAI,wCAAmB,CAAA;AAErE;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACU,QAAA,WAAW;AACtB,iCAAiC,CAAC,wCAAiB,IAAI,0CAAmB,CAAA;AAE5E,iCAAiC;AACjC,IAAI,mBAAW,KAAK,0CAAmB,EAAE,CAAC;IACxC,aAAa;IACb,OAAO,CAAC,IAAI,CACV,oHAAoH,CACrH,CAAA;AACH,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;GAqBG;AACU,QAAA,OAAO;AAClB,iCAAiC,CAAC,yBAAW,IAAI,4BAAc,CAAA;AAEjE;;;;;;;;;;;;;;;GAeG;AACU,QAAA,YAAY;AACvB,iCAAiC,CAAC,oCAAgB,IAAI,wCAAoB,CAAA;AAE5E;;;;;;;;;;;;;;;GAeG;AACU,QAAA,cAAc;AAIzB,iCAAiC,CAAC,wCAAkB,IAAI,4CAAsB,CAAA","sourcesContent":["import { Base64Alphabet } from './uint8array.js'\nimport {\n utf8FromBase64Node,\n utf8FromBase64Ponyfill,\n} from './utf8-from-base64.js'\nimport { utf8FromBytesNative, utf8FromBytesNode } from './utf8-from-bytes.js'\nimport { graphemeLenNative, graphemeLenPonyfill } from './utf8-grapheme-len.js'\nimport { utf8LenCompute, utf8LenNode } from './utf8-len.js'\nimport { utf8ToBase64Node, utf8ToBase64Ponyfill } from './utf8-to-base64.js'\n\n/**\n * Converts a Uint8Array to a UTF-8 string.\n *\n * Uses Node.js Buffer when available for performance, falling back to\n * TextDecoder in environments without Buffer support.\n *\n * @param bytes - The binary data to decode\n * @returns The decoded string (as UTF-16 JavaScript string)\n *\n * @example\n * ```typescript\n * import { utf8FromBytes } from '@atproto/lex-data'\n *\n * const bytes = new Uint8Array([72, 101, 108, 108, 111])\n * utf8FromBytes(bytes) // 'Hello'\n * ```\n */\nexport const utf8FromBytes = utf8FromBytesNode ?? utf8FromBytesNative\n\n/**\n * Counts the number of grapheme clusters (user-perceived characters) in a string.\n *\n * Grapheme clusters represent what users typically think of as \"characters\",\n * handling complex cases like:\n * - Emoji with skin tones and ZWJ sequences (e.g., family emoji)\n * - Combined characters (e.g., 'e' + combining accent)\n * - Regional indicator pairs (flag emoji)\n *\n * Uses native {@link Intl.Segmenter} when available, falling back to a ponyfill.\n *\n * @param str - The string to measure\n * @returns The number of grapheme clusters\n *\n * @example\n * ```typescript\n * import { graphemeLen } from '@atproto/lex-data'\n *\n * graphemeLen('hello') // 5\n * graphemeLen('cafe\\u0301') // 4 (cafe with combining accent)\n * graphemeLen('\\u{1F468}\\u{200D}\\u{1F469}\\u{200D}\\u{1F467}\\u{200D}\\u{1F466}') // 1 (family emoji)\n * ```\n */\nexport const graphemeLen: (str: string) => number =\n /* v8 ignore next -- @preserve */ graphemeLenNative ?? graphemeLenPonyfill\n\n/* v8 ignore next -- @preserve */\nif (graphemeLen === graphemeLenPonyfill) {\n /*#__PURE__*/\n console.warn(\n '[@atproto/lex-data]: Intl.Segmenter is not available in this environment. Falling back to ponyfill implementation.',\n )\n}\n\n/**\n * Calculates the UTF-8 byte length of a string.\n *\n * Returns the number of bytes the string would occupy when encoded as UTF-8.\n * This is important for Lexicon validation where schemas specify byte limits.\n *\n * Uses Node.js Buffer.byteLength when available for performance,\n * falling back to a computed implementation.\n *\n * @param str - The string to measure\n * @returns The UTF-8 byte length\n *\n * @example\n * ```typescript\n * import { utf8Len } from '@atproto/lex-data'\n *\n * utf8Len('hello') // 5 (ASCII: 1 byte per char)\n * utf8Len('\\u00e9') // 2 (e with accent: 2 bytes)\n * utf8Len('\\u{1F600}') // 4 (emoji: 4 bytes)\n * utf8Len('\\u{1F468}\\u{200D}\\u{1F469}\\u{200D}\\u{1F467}\\u{200D}\\u{1F466}') // 25 (family emoji)\n * ```\n */\nexport const utf8Len: (string: string) => number =\n /* v8 ignore next -- @preserve */ utf8LenNode ?? utf8LenCompute\n\n/**\n * Encodes a UTF-8 string to base64.\n *\n * First encodes the string as UTF-8 bytes, then encodes those bytes as base64.\n *\n * @param str - The string to encode\n * @param alphabet - The base64 alphabet to use ('base64' or 'base64url')\n * @returns The base64-encoded string\n *\n * @example\n * ```typescript\n * import { utf8ToBase64 } from '@atproto/lex-data'\n *\n * utf8ToBase64('Hello') // 'SGVsbG8='\n * ```\n */\nexport const utf8ToBase64: (str: string, alphabet?: Base64Alphabet) => string =\n /* v8 ignore next -- @preserve */ utf8ToBase64Node ?? utf8ToBase64Ponyfill\n\n/**\n * Decodes a base64 string to UTF-8.\n *\n * Decodes the base64 to bytes, then interprets those bytes as UTF-8 text.\n *\n * @param b64 - The base64 string to decode\n * @param alphabet - The base64 alphabet to use ('base64' or 'base64url')\n * @returns The decoded UTF-8 string\n *\n * @example\n * ```typescript\n * import { utf8FromBase64 } from '@atproto/lex-data'\n *\n * utf8FromBase64('SGVsbG8=') // 'Hello'\n * ```\n */\nexport const utf8FromBase64: (\n b64: string,\n alphabet?: Base64Alphabet,\n) => string =\n /* v8 ignore next -- @preserve */ utf8FromBase64Node ?? utf8FromBase64Ponyfill\n"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@atproto/lex-data",
3
- "version": "0.0.13",
3
+ "version": "0.0.15",
4
4
  "license": "MIT",
5
5
  "description": "Core utilities for AT Lexicons",
6
6
  "keywords": [
package/src/blob.test.ts CHANGED
@@ -3,8 +3,8 @@ import {
3
3
  BlobRef,
4
4
  LegacyBlobRef,
5
5
  enumBlobRefs,
6
- isBlobRef,
7
6
  isLegacyBlobRef,
7
+ isTypedBlobRef,
8
8
  } from './blob.js'
9
9
  import { RawCid, parseCid } from './cid.js'
10
10
  import { LexArray, LexMap, LexValue } from './lex.js'
@@ -21,7 +21,7 @@ const invalidBlobCid = parseCid(
21
21
  { flavor: 'cbor' },
22
22
  )
23
23
 
24
- describe(isBlobRef, () => {
24
+ describe(isTypedBlobRef, () => {
25
25
  it('tests valid blobCid and lexCid', () => {
26
26
  expect(validBlobCid.code).toBe(0x55) // raw
27
27
  expect(validBlobCid.multihash.code).toBe(0x12) // sha2-256
@@ -31,7 +31,7 @@ describe(isBlobRef, () => {
31
31
 
32
32
  it('parses valid blob', () => {
33
33
  expect(
34
- isBlobRef({
34
+ isTypedBlobRef({
35
35
  $type: 'blob',
36
36
  ref: validBlobCid,
37
37
  mimeType: 'image/jpeg',
@@ -40,7 +40,7 @@ describe(isBlobRef, () => {
40
40
  ).toBe(true)
41
41
 
42
42
  expect(
43
- isBlobRef(
43
+ isTypedBlobRef(
44
44
  {
45
45
  $type: 'blob',
46
46
  ref: invalidBlobCid,
@@ -55,7 +55,7 @@ describe(isBlobRef, () => {
55
55
 
56
56
  it('performs strict validation by default', () => {
57
57
  expect(
58
- isBlobRef({
58
+ isTypedBlobRef({
59
59
  $type: 'blob',
60
60
  ref: invalidBlobCid,
61
61
  mimeType: 'image/jpeg',
@@ -66,7 +66,7 @@ describe(isBlobRef, () => {
66
66
 
67
67
  it('rejects invalid inputs', () => {
68
68
  expect(
69
- isBlobRef({
69
+ isTypedBlobRef({
70
70
  $type: 'blob',
71
71
  ref: { $link: validBlobCid.toString() },
72
72
  mimeType: 'image/jpeg',
@@ -75,7 +75,7 @@ describe(isBlobRef, () => {
75
75
  ).toBe(false)
76
76
 
77
77
  expect(
78
- isBlobRef({
78
+ isTypedBlobRef({
79
79
  // $type: 'blob',
80
80
  ref: validBlobCid,
81
81
  mimeType: 'image/jpeg',
@@ -84,7 +84,7 @@ describe(isBlobRef, () => {
84
84
  ).toBe(false)
85
85
 
86
86
  expect(
87
- isBlobRef({
87
+ isTypedBlobRef({
88
88
  $type: 'blob',
89
89
  ref: validBlobCid,
90
90
  mimeType: { toString: () => 'image/jpeg' },
@@ -93,7 +93,7 @@ describe(isBlobRef, () => {
93
93
  ).toBe(false)
94
94
 
95
95
  expect(
96
- isBlobRef(
96
+ isTypedBlobRef(
97
97
  {
98
98
  $type: 'blob',
99
99
  ref: { $link: validBlobCid.toString() },
@@ -105,7 +105,7 @@ describe(isBlobRef, () => {
105
105
  ).toBe(false)
106
106
 
107
107
  expect(
108
- isBlobRef({
108
+ isTypedBlobRef({
109
109
  $type: 'blob',
110
110
  mimeType: 'image/jpeg',
111
111
  size: 10000,
@@ -113,7 +113,7 @@ describe(isBlobRef, () => {
113
113
  ).toBe(false)
114
114
 
115
115
  expect(
116
- isBlobRef(
116
+ isTypedBlobRef(
117
117
  {
118
118
  $type: 'blob',
119
119
  mimeType: 'image/jpeg',
@@ -123,15 +123,15 @@ describe(isBlobRef, () => {
123
123
  ),
124
124
  ).toBe(false)
125
125
 
126
- expect(isBlobRef('not an object')).toBe(false)
127
- expect(isBlobRef([])).toBe(false)
128
- expect(isBlobRef(new Date())).toBe(false)
129
- expect(isBlobRef(new Map())).toBe(false)
126
+ expect(isTypedBlobRef('not an object')).toBe(false)
127
+ expect(isTypedBlobRef([])).toBe(false)
128
+ expect(isTypedBlobRef(new Date())).toBe(false)
129
+ expect(isTypedBlobRef(new Map())).toBe(false)
130
130
  })
131
131
 
132
132
  it('rejects non-integer size', () => {
133
133
  expect(
134
- isBlobRef({
134
+ isTypedBlobRef({
135
135
  $type: 'blob',
136
136
  ref: validBlobCid,
137
137
  mimeType: 'image/jpeg',
@@ -142,7 +142,7 @@ describe(isBlobRef, () => {
142
142
 
143
143
  it('rejects invalid CID/multihash code', () => {
144
144
  expect(
145
- isBlobRef(
145
+ isTypedBlobRef(
146
146
  {
147
147
  $type: 'blob',
148
148
  ref: validBlobCid,
@@ -154,7 +154,7 @@ describe(isBlobRef, () => {
154
154
  ).toBe(true)
155
155
 
156
156
  expect(
157
- isBlobRef(
157
+ isTypedBlobRef(
158
158
  {
159
159
  $type: 'blob',
160
160
  ref: invalidBlobCid,
@@ -168,7 +168,7 @@ describe(isBlobRef, () => {
168
168
 
169
169
  it('rejects extra keys', () => {
170
170
  expect(
171
- isBlobRef({
171
+ isTypedBlobRef({
172
172
  $type: 'blob',
173
173
  ref: validBlobCid,
174
174
  mimeType: 'image/jpeg',
@@ -178,7 +178,7 @@ describe(isBlobRef, () => {
178
178
  ).toBe(false)
179
179
 
180
180
  expect(
181
- isBlobRef(
181
+ isTypedBlobRef(
182
182
  {
183
183
  $type: 'blob',
184
184
  ref: validBlobCid,
@@ -197,7 +197,7 @@ describe(isBlobRef, () => {
197
197
  'QmYwAPJzv5CZsnA625s3Xf2nemtYgPpHdWEz79ojWnPbdG', // CID v0
198
198
  )
199
199
  expect(
200
- isBlobRef(
200
+ isTypedBlobRef(
201
201
  {
202
202
  $type: 'blob',
203
203
  ref: cidV0,
@@ -221,14 +221,27 @@ describe(isLegacyBlobRef, () => {
221
221
  ).toBe(true)
222
222
 
223
223
  expect(
224
- isLegacyBlobRef({
225
- cid: invalidBlobCid.toString(),
226
- mimeType: 'image/jpeg',
227
- }),
224
+ isLegacyBlobRef(
225
+ {
226
+ cid: invalidBlobCid.toString(),
227
+ mimeType: 'image/jpeg',
228
+ },
229
+ { strict: false },
230
+ ),
228
231
  ).toBe(true)
229
232
  })
230
233
 
231
234
  it('rejects invalid inputs', () => {
235
+ expect(
236
+ isLegacyBlobRef(
237
+ {
238
+ cid: invalidBlobCid.toString(),
239
+ mimeType: 'image/jpeg',
240
+ },
241
+ { strict: true },
242
+ ),
243
+ ).toBe(false)
244
+
232
245
  expect(
233
246
  isLegacyBlobRef({
234
247
  cid: 'babbaaa',
package/src/blob.ts CHANGED
@@ -1,21 +1,172 @@
1
- import { Cid, RawCid, ifCid, validateCidString } from './cid.js'
1
+ import {
2
+ CheckCidOptions,
3
+ Cid,
4
+ RawCid,
5
+ ifCid,
6
+ parseCid,
7
+ validateCidString,
8
+ } from './cid.js'
2
9
  import { LexValue } from './lex.js'
3
10
  import { isPlainObject, isPlainProto } from './object.js'
4
11
 
12
+ /**
13
+ * Options to use with {@link ifCid}, {@link validateCidString}, and related CID
14
+ * validation functions when validating CIDs in BlobRefs, in strict mode. This
15
+ * ensures that the CID is a {@link RawCid} (CID v1, raw multicodec, sha256
16
+ * multihash), which is the expected format for blob references in the AT
17
+ * Protocol data model.
18
+ */
19
+ const STRICT_CID_CHECK_OPTIONS: CheckCidOptions = { flavor: 'raw' }
20
+
21
+ // Number.isSafeInteger is actually safe to use with non-number values, so we
22
+ // can use it as a type guard.
23
+ const isSafeInteger = Number.isSafeInteger as (v: unknown) => v is number
24
+
25
+ /**
26
+ * Reference to binary data (like images, videos, etc.) in the AT Protocol data
27
+ * model.
28
+ *
29
+ * This type represents a reference to a blob of binary data, identified by its
30
+ * content hash (CID) and accompanied by metadata such as MIME type and size.
31
+ *
32
+ * The {@link BlobRef} type is a union of the current {@link TypedBlobRef}
33
+ * format and the legacy {@link LegacyBlobRef} format.
34
+ */
35
+ export type BlobRef<Ref extends Cid = Cid> = TypedBlobRef<Ref> | LegacyBlobRef
36
+
37
+ /**
38
+ * Options for validating a {@link BlobRef}.
39
+ */
40
+ export type BlobRefCheckOptions = {
41
+ /**
42
+ * If `false`, skips strict CID validation of {@link BlobRef.ref}, allowing
43
+ * any valid CID. Otherwise, validates that the CID is v1, uses the raw
44
+ * multicodec, and has a sha256 multihash.
45
+ *
46
+ * @default true
47
+ */
48
+ strict?: boolean
49
+ }
50
+
51
+ /**
52
+ * Type guard to check if a value is a valid {@link BlobRef}, which can be
53
+ * either a {@link TypedBlobRef} or a {@link LegacyBlobRef}. By default, strict
54
+ * CID validation is applied to ensure that the CID in the blob reference is in
55
+ * the expected format for the AT Protocol, but this can be relaxed with the
56
+ * `strict: false` option.
57
+ */
58
+ export function isBlobRef(input: unknown): input is BlobRef<RawCid>
59
+ export function isBlobRef<TOptions extends BlobRefCheckOptions>(
60
+ input: unknown,
61
+ options: TOptions,
62
+ ): input is LegacyBlobRef | InferTypedBlobRef<TOptions>
63
+ export function isBlobRef(
64
+ input: unknown,
65
+ options?: BlobRefCheckOptions,
66
+ ): input is BlobRef<RawCid>
67
+ export function isBlobRef(
68
+ input: unknown,
69
+ options?: BlobRefCheckOptions,
70
+ ): input is BlobRef {
71
+ return (input as any)?.$type === 'blob'
72
+ ? isTypedBlobRef(input, options)
73
+ : isLegacyBlobRef(input, options)
74
+ }
75
+
76
+ /**
77
+ * Extracts the MIME type from a {@link BlobRef}.
78
+ *
79
+ * @example
80
+ * ```ts
81
+ * const mimeType = getBlobMime(blobRef)
82
+ * console.log(mimeType) // e.g., 'image/jpeg'
83
+ * ```
84
+ */
85
+ export function getBlobMime(blob: BlobRef): string
86
+ export function getBlobMime(blob?: BlobRef): string | undefined
87
+ export function getBlobMime(blob?: BlobRef): string | undefined {
88
+ return blob?.mimeType
89
+ }
90
+
91
+ /**
92
+ * Extracts the size (in bytes) from a {@link TypedBlobRef}. For
93
+ * {@link LegacyBlobRef}, size information is not available, so this function
94
+ * returns `undefined` for legacy refs.
95
+ *
96
+ * @note The size property, in blob refs, cannot be 100% trusted since the PDS
97
+ * might not have a local copy of the blob (to check the size against) and might
98
+ * just be passing through the blob ref from the client without validating it.
99
+ * So, while this function can be useful for getting size information when
100
+ * available, it should not be solely relied upon for critical functionality
101
+ * without additional validation.
102
+ *
103
+ * @example
104
+ * ```ts
105
+ * const size = getBlobSize(blobRef)
106
+ * if (size !== undefined) {
107
+ * console.log(`Blob size: ${size} bytes`)
108
+ * } else {
109
+ * console.log('Size information not available for legacy blob ref')
110
+ * }
111
+ * ```
112
+ */
113
+ export function getBlobSize(blob: BlobRef): number | undefined {
114
+ if ('$type' in blob && blob.size >= 0) return blob.size
115
+ // LegacyBlobRef doesn't have size information
116
+ return undefined
117
+ }
118
+
119
+ /**
120
+ * Extracts the {@link Cid} from a {@link BlobRef}.
121
+ *
122
+ * @throws If the input input is a {@link LegacyBlobRef} with an invalid CID string
123
+ * @example
124
+ * ```ts
125
+ * const cid = getBlobCid(blobRef)
126
+ * console.log(cid.bytes)
127
+ * ```
128
+ */
129
+ export function getBlobCid(blob: BlobRef): Cid
130
+ export function getBlobCid(blob?: BlobRef): Cid | undefined
131
+ export function getBlobCid(blob?: BlobRef): Cid | undefined {
132
+ if (!blob) return undefined
133
+ return '$type' in blob ? blob.ref : parseCid(blob.cid)
134
+ }
135
+
136
+ /**
137
+ * Extracts the CID string from a {@link BlobRef}.
138
+ *
139
+ * This is similar to `getBlobCid(blob).toString()` but is more optimized since
140
+ * the CID string is already available in the legacy format and we can avoid
141
+ * parsing it into a CID object just to convert it back to a string.
142
+ *
143
+ * @example
144
+ * ```ts
145
+ * const cidString = getBlobCidString(blobRef)
146
+ * console.log(cidString)
147
+ * ```
148
+ */
149
+ export function getBlobCidString(blob: BlobRef): string
150
+ export function getBlobCidString(blob?: BlobRef): string | undefined
151
+ export function getBlobCidString(blob?: BlobRef): string | undefined {
152
+ if (!blob) return undefined
153
+ return '$type' in blob ? blob.ref.toString() : blob.cid
154
+ }
155
+
5
156
  /**
6
157
  * Reference to binary data (like images, videos, etc.) in the AT Protocol data model.
7
158
  *
8
- * A BlobRef is a {@link LexMap} with a specific structure that identifies binary
9
- * content by its content hash (CID), along with metadata about the content type
10
- * and size.
159
+ * A {@link TypedBlobRef} is a {@link LexMap} with a specific structure that
160
+ * identifies binary content by its content hash (CID), along with metadata
161
+ * about the content type and size.
11
162
  *
12
163
  * @typeParam Ref - The type of CID reference, defaults to any {@link Cid}
13
164
  *
14
165
  * @example
15
166
  * ```typescript
16
- * import type { BlobRef } from '@atproto/lex-data'
167
+ * import type { TypedBlobRef } from '@atproto/lex-data'
17
168
  *
18
- * const imageRef: BlobRef = {
169
+ * const imageRef: TypedBlobRef = {
19
170
  * $type: 'blob',
20
171
  * mimeType: 'image/jpeg',
21
172
  * ref: cid, // CID of the blob content
@@ -23,41 +174,27 @@ import { isPlainObject, isPlainProto } from './object.js'
23
174
  * }
24
175
  * ```
25
176
  *
26
- * @see {@link isBlobRef} to check if a value is a valid BlobRef
177
+ * @see {@link isTypedBlobRef} to check if a value is a valid {@link TypedBlobRef}
27
178
  * @see {@link LegacyBlobRef} for the older blob reference format
28
179
  */
29
- export type BlobRef<Ref extends Cid = Cid> = {
180
+ export type TypedBlobRef<Ref extends Cid = Cid> = {
30
181
  $type: 'blob'
31
182
  mimeType: string
32
183
  ref: Ref
33
184
  size: number
34
185
  }
35
186
 
36
- /**
37
- * Options for validating a {@link BlobRef}.
38
- */
39
- export type BlobRefCheckOptions = {
40
- /**
41
- * If `false`, skips strict CID validation of {@link BlobRef.ref}, allowing
42
- * any valid CID. Otherwise, validates that the CID is v1, uses the raw
43
- * multicodec, and has a sha256 multihash.
44
- *
45
- * @default true
46
- */
47
- strict?: boolean
48
- }
49
-
50
187
  /**
51
188
  * Infers the BlobRef type based on the check options.
52
189
  *
53
190
  * @typeParam TOptions - The options used for checking
54
191
  */
55
- export type InferCheckedBlobRef<TOptions extends BlobRefCheckOptions> =
192
+ export type InferTypedBlobRef<TOptions extends BlobRefCheckOptions> =
56
193
  TOptions extends { strict: false }
57
- ? BlobRef
194
+ ? TypedBlobRef
58
195
  : { strict: boolean } extends TOptions
59
- ? BlobRef
60
- : BlobRef<RawCid>
196
+ ? TypedBlobRef
197
+ : TypedBlobRef<RawCid>
61
198
 
62
199
  /**
63
200
  * Type guard to check if a value is a valid {@link BlobRef}.
@@ -74,32 +211,32 @@ export type InferCheckedBlobRef<TOptions extends BlobRefCheckOptions> =
74
211
  *
75
212
  * @example
76
213
  * ```typescript
77
- * import { isBlobRef } from '@atproto/lex-data'
214
+ * import { isTypedBlobRef } from '@atproto/lex-data'
78
215
  *
79
- * if (isBlobRef(data)) {
216
+ * if (isTypedBlobRef(data)) {
80
217
  * console.log(data.mimeType) // e.g., 'image/jpeg'
81
218
  * console.log(data.size) // e.g., 12345
82
219
  * }
83
220
  *
84
221
  * // Allow any valid CID (not just raw CIDs)
85
- * if (isBlobRef(data, { strict: false })) {
222
+ * if (isTypedBlobRef(data, { strict: false })) {
86
223
  * // ...
87
224
  * }
88
225
  * ```
89
226
  */
90
- export function isBlobRef(input: unknown): input is BlobRef<RawCid>
91
- export function isBlobRef<TOptions extends BlobRefCheckOptions>(
227
+ export function isTypedBlobRef(input: unknown): input is TypedBlobRef<RawCid>
228
+ export function isTypedBlobRef<TOptions extends BlobRefCheckOptions>(
92
229
  input: unknown,
93
230
  options: TOptions,
94
- ): input is InferCheckedBlobRef<TOptions>
95
- export function isBlobRef(
231
+ ): input is InferTypedBlobRef<TOptions>
232
+ export function isTypedBlobRef(
96
233
  input: unknown,
97
234
  options?: BlobRefCheckOptions,
98
- ): input is BlobRef
99
- export function isBlobRef(
235
+ ): input is TypedBlobRef<RawCid>
236
+ export function isTypedBlobRef(
100
237
  input: unknown,
101
238
  options?: BlobRefCheckOptions,
102
- ): input is BlobRef {
239
+ ): input is TypedBlobRef {
103
240
  if (!isPlainObject(input)) {
104
241
  return false
105
242
  }
@@ -114,7 +251,10 @@ export function isBlobRef(
114
251
  return false
115
252
  }
116
253
 
117
- if (typeof size !== 'number' || size < 0 || !Number.isSafeInteger(size)) {
254
+ if (size === -1 && options?.strict === false) {
255
+ // In non-strict mode, allow size to be -1 to accommodate legacy blob refs
256
+ // that don't include size information.
257
+ } else if (!isSafeInteger(size) || size < 0) {
118
258
  return false
119
259
  }
120
260
 
@@ -136,7 +276,7 @@ export function isBlobRef(
136
276
  const cid = ifCid(
137
277
  ref,
138
278
  // Strict unless explicitly disabled
139
- options?.strict === false ? undefined : { flavor: 'raw' },
279
+ options?.strict === false ? undefined : STRICT_CID_CHECK_OPTIONS,
140
280
  )
141
281
  if (!cid) {
142
282
  return false
@@ -178,9 +318,6 @@ export type LegacyBlobRef = {
178
318
  * - `mimeType` must be a non-empty string
179
319
  * - No additional properties allowed
180
320
  *
181
- * @param input - The value to check
182
- * @returns `true` if the input is a valid LegacyBlobRef
183
- *
184
321
  * @example
185
322
  * ```typescript
186
323
  * import { isLegacyBlobRef } from '@atproto/lex-data'
@@ -191,9 +328,12 @@ export type LegacyBlobRef = {
191
328
  * }
192
329
  * ```
193
330
  *
194
- * @see {@link isBlobRef} for checking the current blob reference format
331
+ * @see {@link isTypedBlobRef} for checking the current blob reference format
195
332
  */
196
- export function isLegacyBlobRef(input: unknown): input is LegacyBlobRef {
333
+ export function isLegacyBlobRef(
334
+ input: unknown,
335
+ options?: BlobRefCheckOptions,
336
+ ): input is LegacyBlobRef {
197
337
  if (!isPlainObject(input)) {
198
338
  return false
199
339
  }
@@ -213,7 +353,12 @@ export function isLegacyBlobRef(input: unknown): input is LegacyBlobRef {
213
353
  }
214
354
  }
215
355
 
216
- if (!validateCidString(cid)) {
356
+ if (
357
+ !validateCidString(
358
+ cid,
359
+ options?.strict === false ? undefined : STRICT_CID_CHECK_OPTIONS,
360
+ )
361
+ ) {
217
362
  return false
218
363
  }
219
364
 
@@ -240,10 +385,10 @@ export type EnumBlobRefsOptions = BlobRefCheckOptions & {
240
385
  */
241
386
  export type InferEnumBlobRefs<TOptions extends EnumBlobRefsOptions> =
242
387
  TOptions extends { allowLegacy: true }
243
- ? InferCheckedBlobRef<TOptions> | LegacyBlobRef
388
+ ? InferTypedBlobRef<TOptions> | LegacyBlobRef
244
389
  : { allowLegacy: boolean } extends TOptions
245
- ? InferCheckedBlobRef<TOptions> | LegacyBlobRef
246
- : InferCheckedBlobRef<TOptions>
390
+ ? InferTypedBlobRef<TOptions> | LegacyBlobRef
391
+ : InferTypedBlobRef<TOptions>
247
392
 
248
393
  /**
249
394
  * Generator that enumerates all {@link BlobRef}s (and, optionally,
@@ -273,8 +418,8 @@ export type InferEnumBlobRefs<TOptions extends EnumBlobRefsOptions> =
273
418
  * }
274
419
  *
275
420
  * // Include legacy blob references
276
- * for (const ref of enumBlobRefs(record, { allowLegacy: true })) {
277
- * // ref may be BlobRef or LegacyBlobRef
421
+ * for (const ref of enumBlobRefs(record, { allowLegacy: true, strict: false })) {
422
+ * // ref may be BlobRef or LegacyBlobRef, with relaxed CID validation
278
423
  * }
279
424
  * ```
280
425
  */
@@ -288,11 +433,11 @@ export function enumBlobRefs<TOptions extends EnumBlobRefsOptions>(
288
433
  export function enumBlobRefs(
289
434
  input: LexValue,
290
435
  options?: EnumBlobRefsOptions,
291
- ): Generator<BlobRef | LegacyBlobRef, void, unknown>
436
+ ): Generator<BlobRef, void, unknown>
292
437
  export function* enumBlobRefs(
293
438
  input: LexValue,
294
439
  options?: EnumBlobRefsOptions,
295
- ): Generator<BlobRef | LegacyBlobRef, void, unknown> {
440
+ ): Generator<BlobRef, void, unknown> {
296
441
  // LegacyBlobRef not included by default
297
442
  const includeLegacy = options?.allowLegacy === true
298
443
 
@@ -315,9 +460,9 @@ export function* enumBlobRefs(
315
460
  } else if (isPlainProto(value)) {
316
461
  if (visited.has(value)) continue
317
462
  visited.add(value)
318
- if (isBlobRef(value, options)) {
463
+ if (isTypedBlobRef(value, options)) {
319
464
  yield value
320
- } else if (includeLegacy && isLegacyBlobRef(value)) {
465
+ } else if (includeLegacy && isLegacyBlobRef(value, options)) {
321
466
  yield value
322
467
  } else {
323
468
  for (const v of Object.values(value)) {
@@ -12,6 +12,11 @@ interface NodeJSBufferConstructor {
12
12
  input: Uint8Array | ArrayBuffer | ArrayBufferView,
13
13
  ): NodeJSBuffer<ArrayBuffer>
14
14
  from(input: string, encoding?: Encoding): NodeJSBuffer<ArrayBuffer>
15
+ from<TArrayBuffer extends ArrayBufferLike>(
16
+ arrayBuffer: WithImplicitCoercion<TArrayBuffer>,
17
+ byteOffset?: number,
18
+ length?: number,
19
+ ): Buffer<TArrayBuffer>
15
20
  concat(list: readonly Uint8Array[], totalLength?: number): NodeJSBuffer
16
21
  byteLength(input: string, encoding?: Encoding): number
17
22
  prototype: NodeJSBuffer
@@ -0,0 +1,43 @@
1
+ import { assert, describe, expect, it } from 'vitest'
2
+ import { utf8FromBytesNative, utf8FromBytesNode } from './utf8-from-bytes.js'
3
+
4
+ for (const utf8FromBytes of [utf8FromBytesNode, utf8FromBytesNative] as const) {
5
+ assert(utf8FromBytes, 'utf8FromBytes implementation should not be null')
6
+ describe(utf8FromBytes, () => {
7
+ it('decodes empty Uint8Array', () => {
8
+ const decoded = utf8FromBytes(new Uint8Array(0))
9
+ expect(typeof decoded).toBe('string')
10
+ expect(decoded).toBe('')
11
+ })
12
+
13
+ it('decodes 10MB', () => {
14
+ const bytes = Buffer.allocUnsafe(10_000_000).fill('๐Ÿฉ')
15
+ const decoded = utf8FromBytes(bytes)
16
+ expect(decoded).toBe('๐Ÿฉ'.repeat(10_000_000 / 4))
17
+ })
18
+
19
+ for (const string of [
20
+ '',
21
+ '\0\0',
22
+ '\0\0\0',
23
+ '\0\0\0\0',
24
+ '__',
25
+ 'รฉ',
26
+ 'ร รง',
27
+ '\0รฉร รง',
28
+ '```\x1b',
29
+ 'aaa',
30
+ 'Hello, World!',
31
+ '๐Ÿ˜€๐Ÿ˜ƒ๐Ÿ˜„๐Ÿ˜๐Ÿ˜†๐Ÿ˜…๐Ÿ˜‚๐Ÿคฃ๐Ÿ˜Š๐Ÿ˜‡',
32
+ '๐Ÿ‘ฉโ€๐Ÿ’ป๐Ÿ‘จโ€๐Ÿ’ป๐Ÿ‘ฉโ€๐Ÿ”ฌ๐Ÿ‘จโ€๐Ÿ”ฌ๐Ÿ‘ฉโ€๐Ÿš€๐Ÿ‘จโ€๐Ÿš€',
33
+ '๐ŸŒ๐ŸŒŽ๐ŸŒ๐ŸŒ๐Ÿช๐ŸŒŸโœจโšก๐Ÿ”ฅ๐Ÿ’ง',
34
+ ] as const) {
35
+ const buffer = Buffer.from(string, 'utf8')
36
+
37
+ it(`decodes ${JSON.stringify(string)}`, () => {
38
+ const decoded = utf8FromBytes(buffer)
39
+ expect(decoded).toBe(string)
40
+ })
41
+ }
42
+ })
43
+ }
@@ -0,0 +1,21 @@
1
+ import { NodeJSBuffer } from './lib/nodejs-buffer.js'
2
+
3
+ const Buffer = NodeJSBuffer
4
+
5
+ export const utf8FromBytesNode = Buffer
6
+ ? function utf8FromBytesNode(bytes: Uint8Array): string {
7
+ // @NOTE Buffer.from(bytes) creates a copy of the ArrayBuffer. The following
8
+ // allows us to avoid the copy by creating a Buffer that shares the same
9
+ // memory as the input Uint8Array.
10
+ const buffer = Buffer.from(
11
+ bytes.buffer,
12
+ bytes.byteOffset,
13
+ bytes.byteLength,
14
+ )
15
+ return buffer.toString('utf8')
16
+ }
17
+ : /* v8 ignore next -- @preserve */ null
18
+
19
+ export function utf8FromBytesNative(bytes: Uint8Array): string {
20
+ return new TextDecoder('utf-8').decode(bytes)
21
+ }