@atproto/lex-data 0.0.13 โ 0.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +20 -0
- package/dist/blob.d.ts +118 -39
- package/dist/blob.d.ts.map +1 -1
- package/dist/blob.js +73 -10
- package/dist/blob.js.map +1 -1
- package/dist/lib/nodejs-buffer.d.ts +1 -0
- package/dist/lib/nodejs-buffer.d.ts.map +1 -1
- package/dist/lib/nodejs-buffer.js.map +1 -1
- package/dist/utf8-from-bytes.d.ts +3 -0
- package/dist/utf8-from-bytes.d.ts.map +1 -0
- package/dist/utf8-from-bytes.js +19 -0
- package/dist/utf8-from-bytes.js.map +1 -0
- package/dist/utf8.d.ts +18 -0
- package/dist/utf8.d.ts.map +1 -1
- package/dist/utf8.js +20 -1
- package/dist/utf8.js.map +1 -1
- package/package.json +1 -1
- package/src/blob.test.ts +38 -25
- package/src/blob.ts +198 -53
- package/src/lib/nodejs-buffer.ts +5 -0
- package/src/utf8-from-bytes.test.ts +43 -0
- package/src/utf8-from-bytes.ts +21 -0
- package/src/utf8.ts +20 -0
package/dist/utf8.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"utf8.js","sourceRoot":"","sources":["../src/utf8.ts"],"names":[],"mappings":";;;AACA,+DAG8B;AAC9B,iEAA+E;AAC/E,+CAA2D;AAC3D,2DAA4E;AAE5E;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACU,QAAA,WAAW;AACtB,iCAAiC,CAAC,wCAAiB,IAAI,0CAAmB,CAAA;AAE5E,iCAAiC;AACjC,IAAI,mBAAW,KAAK,0CAAmB,EAAE,CAAC;IACxC,aAAa;IACb,OAAO,CAAC,IAAI,CACV,oHAAoH,CACrH,CAAA;AACH,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;GAqBG;AACU,QAAA,OAAO;AAClB,iCAAiC,CAAC,yBAAW,IAAI,4BAAc,CAAA;AAEjE;;;;;;;;;;;;;;;GAeG;AACU,QAAA,YAAY;AACvB,iCAAiC,CAAC,oCAAgB,IAAI,wCAAoB,CAAA;AAE5E;;;;;;;;;;;;;;;GAeG;AACU,QAAA,cAAc;AAIzB,iCAAiC,CAAC,wCAAkB,IAAI,4CAAsB,CAAA","sourcesContent":["import { Base64Alphabet } from './uint8array.js'\nimport {\n utf8FromBase64Node,\n utf8FromBase64Ponyfill,\n} from './utf8-from-base64.js'\nimport { graphemeLenNative, graphemeLenPonyfill } from './utf8-grapheme-len.js'\nimport { utf8LenCompute, utf8LenNode } from './utf8-len.js'\nimport { utf8ToBase64Node, utf8ToBase64Ponyfill } from './utf8-to-base64.js'\n\n/**\n * Counts the number of grapheme clusters (user-perceived characters) in a string.\n *\n * Grapheme clusters represent what users typically think of as \"characters\",\n * handling complex cases like:\n * - Emoji with skin tones and ZWJ sequences (e.g., family emoji)\n * - Combined characters (e.g., 'e' + combining accent)\n * - Regional indicator pairs (flag emoji)\n *\n * Uses native {@link Intl.Segmenter} when available, falling back to a ponyfill.\n *\n * @param str - The string to measure\n * @returns The number of grapheme clusters\n *\n * @example\n * ```typescript\n * import { graphemeLen } from '@atproto/lex-data'\n *\n * graphemeLen('hello') // 5\n * graphemeLen('cafe\\u0301') // 4 (cafe with combining accent)\n * graphemeLen('\\u{1F468}\\u{200D}\\u{1F469}\\u{200D}\\u{1F467}\\u{200D}\\u{1F466}') // 1 (family emoji)\n * ```\n */\nexport const graphemeLen: (str: string) => number =\n /* v8 ignore next -- @preserve */ graphemeLenNative ?? graphemeLenPonyfill\n\n/* v8 ignore next -- @preserve */\nif (graphemeLen === graphemeLenPonyfill) {\n /*#__PURE__*/\n console.warn(\n '[@atproto/lex-data]: Intl.Segmenter is not available in this environment. Falling back to ponyfill implementation.',\n )\n}\n\n/**\n * Calculates the UTF-8 byte length of a string.\n *\n * Returns the number of bytes the string would occupy when encoded as UTF-8.\n * This is important for Lexicon validation where schemas specify byte limits.\n *\n * Uses Node.js Buffer.byteLength when available for performance,\n * falling back to a computed implementation.\n *\n * @param str - The string to measure\n * @returns The UTF-8 byte length\n *\n * @example\n * ```typescript\n * import { utf8Len } from '@atproto/lex-data'\n *\n * utf8Len('hello') // 5 (ASCII: 1 byte per char)\n * utf8Len('\\u00e9') // 2 (e with accent: 2 bytes)\n * utf8Len('\\u{1F600}') // 4 (emoji: 4 bytes)\n * utf8Len('\\u{1F468}\\u{200D}\\u{1F469}\\u{200D}\\u{1F467}\\u{200D}\\u{1F466}') // 25 (family emoji)\n * ```\n */\nexport const utf8Len: (string: string) => number =\n /* v8 ignore next -- @preserve */ utf8LenNode ?? utf8LenCompute\n\n/**\n * Encodes a UTF-8 string to base64.\n *\n * First encodes the string as UTF-8 bytes, then encodes those bytes as base64.\n *\n * @param str - The string to encode\n * @param alphabet - The base64 alphabet to use ('base64' or 'base64url')\n * @returns The base64-encoded string\n *\n * @example\n * ```typescript\n * import { utf8ToBase64 } from '@atproto/lex-data'\n *\n * utf8ToBase64('Hello') // 'SGVsbG8='\n * ```\n */\nexport const utf8ToBase64: (str: string, alphabet?: Base64Alphabet) => string =\n /* v8 ignore next -- @preserve */ utf8ToBase64Node ?? utf8ToBase64Ponyfill\n\n/**\n * Decodes a base64 string to UTF-8.\n *\n * Decodes the base64 to bytes, then interprets those bytes as UTF-8 text.\n *\n * @param b64 - The base64 string to decode\n * @param alphabet - The base64 alphabet to use ('base64' or 'base64url')\n * @returns The decoded UTF-8 string\n *\n * @example\n * ```typescript\n * import { utf8FromBase64 } from '@atproto/lex-data'\n *\n * utf8FromBase64('SGVsbG8=') // 'Hello'\n * ```\n */\nexport const utf8FromBase64: (\n b64: string,\n alphabet?: Base64Alphabet,\n) => string =\n /* v8 ignore next -- @preserve */ utf8FromBase64Node ?? utf8FromBase64Ponyfill\n"]}
|
|
1
|
+
{"version":3,"file":"utf8.js","sourceRoot":"","sources":["../src/utf8.ts"],"names":[],"mappings":";;;AACA,+DAG8B;AAC9B,6DAA6E;AAC7E,iEAA+E;AAC/E,+CAA2D;AAC3D,2DAA4E;AAE5E;;;;;;;;;;;;;;;;GAgBG;AACU,QAAA,aAAa,GAAG,sCAAiB,IAAI,wCAAmB,CAAA;AAErE;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACU,QAAA,WAAW;AACtB,iCAAiC,CAAC,wCAAiB,IAAI,0CAAmB,CAAA;AAE5E,iCAAiC;AACjC,IAAI,mBAAW,KAAK,0CAAmB,EAAE,CAAC;IACxC,aAAa;IACb,OAAO,CAAC,IAAI,CACV,oHAAoH,CACrH,CAAA;AACH,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;GAqBG;AACU,QAAA,OAAO;AAClB,iCAAiC,CAAC,yBAAW,IAAI,4BAAc,CAAA;AAEjE;;;;;;;;;;;;;;;GAeG;AACU,QAAA,YAAY;AACvB,iCAAiC,CAAC,oCAAgB,IAAI,wCAAoB,CAAA;AAE5E;;;;;;;;;;;;;;;GAeG;AACU,QAAA,cAAc;AAIzB,iCAAiC,CAAC,wCAAkB,IAAI,4CAAsB,CAAA","sourcesContent":["import { Base64Alphabet } from './uint8array.js'\nimport {\n utf8FromBase64Node,\n utf8FromBase64Ponyfill,\n} from './utf8-from-base64.js'\nimport { utf8FromBytesNative, utf8FromBytesNode } from './utf8-from-bytes.js'\nimport { graphemeLenNative, graphemeLenPonyfill } from './utf8-grapheme-len.js'\nimport { utf8LenCompute, utf8LenNode } from './utf8-len.js'\nimport { utf8ToBase64Node, utf8ToBase64Ponyfill } from './utf8-to-base64.js'\n\n/**\n * Converts a Uint8Array to a UTF-8 string.\n *\n * Uses Node.js Buffer when available for performance, falling back to\n * TextDecoder in environments without Buffer support.\n *\n * @param bytes - The binary data to decode\n * @returns The decoded string (as UTF-16 JavaScript string)\n *\n * @example\n * ```typescript\n * import { utf8FromBytes } from '@atproto/lex-data'\n *\n * const bytes = new Uint8Array([72, 101, 108, 108, 111])\n * utf8FromBytes(bytes) // 'Hello'\n * ```\n */\nexport const utf8FromBytes = utf8FromBytesNode ?? utf8FromBytesNative\n\n/**\n * Counts the number of grapheme clusters (user-perceived characters) in a string.\n *\n * Grapheme clusters represent what users typically think of as \"characters\",\n * handling complex cases like:\n * - Emoji with skin tones and ZWJ sequences (e.g., family emoji)\n * - Combined characters (e.g., 'e' + combining accent)\n * - Regional indicator pairs (flag emoji)\n *\n * Uses native {@link Intl.Segmenter} when available, falling back to a ponyfill.\n *\n * @param str - The string to measure\n * @returns The number of grapheme clusters\n *\n * @example\n * ```typescript\n * import { graphemeLen } from '@atproto/lex-data'\n *\n * graphemeLen('hello') // 5\n * graphemeLen('cafe\\u0301') // 4 (cafe with combining accent)\n * graphemeLen('\\u{1F468}\\u{200D}\\u{1F469}\\u{200D}\\u{1F467}\\u{200D}\\u{1F466}') // 1 (family emoji)\n * ```\n */\nexport const graphemeLen: (str: string) => number =\n /* v8 ignore next -- @preserve */ graphemeLenNative ?? graphemeLenPonyfill\n\n/* v8 ignore next -- @preserve */\nif (graphemeLen === graphemeLenPonyfill) {\n /*#__PURE__*/\n console.warn(\n '[@atproto/lex-data]: Intl.Segmenter is not available in this environment. Falling back to ponyfill implementation.',\n )\n}\n\n/**\n * Calculates the UTF-8 byte length of a string.\n *\n * Returns the number of bytes the string would occupy when encoded as UTF-8.\n * This is important for Lexicon validation where schemas specify byte limits.\n *\n * Uses Node.js Buffer.byteLength when available for performance,\n * falling back to a computed implementation.\n *\n * @param str - The string to measure\n * @returns The UTF-8 byte length\n *\n * @example\n * ```typescript\n * import { utf8Len } from '@atproto/lex-data'\n *\n * utf8Len('hello') // 5 (ASCII: 1 byte per char)\n * utf8Len('\\u00e9') // 2 (e with accent: 2 bytes)\n * utf8Len('\\u{1F600}') // 4 (emoji: 4 bytes)\n * utf8Len('\\u{1F468}\\u{200D}\\u{1F469}\\u{200D}\\u{1F467}\\u{200D}\\u{1F466}') // 25 (family emoji)\n * ```\n */\nexport const utf8Len: (string: string) => number =\n /* v8 ignore next -- @preserve */ utf8LenNode ?? utf8LenCompute\n\n/**\n * Encodes a UTF-8 string to base64.\n *\n * First encodes the string as UTF-8 bytes, then encodes those bytes as base64.\n *\n * @param str - The string to encode\n * @param alphabet - The base64 alphabet to use ('base64' or 'base64url')\n * @returns The base64-encoded string\n *\n * @example\n * ```typescript\n * import { utf8ToBase64 } from '@atproto/lex-data'\n *\n * utf8ToBase64('Hello') // 'SGVsbG8='\n * ```\n */\nexport const utf8ToBase64: (str: string, alphabet?: Base64Alphabet) => string =\n /* v8 ignore next -- @preserve */ utf8ToBase64Node ?? utf8ToBase64Ponyfill\n\n/**\n * Decodes a base64 string to UTF-8.\n *\n * Decodes the base64 to bytes, then interprets those bytes as UTF-8 text.\n *\n * @param b64 - The base64 string to decode\n * @param alphabet - The base64 alphabet to use ('base64' or 'base64url')\n * @returns The decoded UTF-8 string\n *\n * @example\n * ```typescript\n * import { utf8FromBase64 } from '@atproto/lex-data'\n *\n * utf8FromBase64('SGVsbG8=') // 'Hello'\n * ```\n */\nexport const utf8FromBase64: (\n b64: string,\n alphabet?: Base64Alphabet,\n) => string =\n /* v8 ignore next -- @preserve */ utf8FromBase64Node ?? utf8FromBase64Ponyfill\n"]}
|
package/package.json
CHANGED
package/src/blob.test.ts
CHANGED
|
@@ -3,8 +3,8 @@ import {
|
|
|
3
3
|
BlobRef,
|
|
4
4
|
LegacyBlobRef,
|
|
5
5
|
enumBlobRefs,
|
|
6
|
-
isBlobRef,
|
|
7
6
|
isLegacyBlobRef,
|
|
7
|
+
isTypedBlobRef,
|
|
8
8
|
} from './blob.js'
|
|
9
9
|
import { RawCid, parseCid } from './cid.js'
|
|
10
10
|
import { LexArray, LexMap, LexValue } from './lex.js'
|
|
@@ -21,7 +21,7 @@ const invalidBlobCid = parseCid(
|
|
|
21
21
|
{ flavor: 'cbor' },
|
|
22
22
|
)
|
|
23
23
|
|
|
24
|
-
describe(
|
|
24
|
+
describe(isTypedBlobRef, () => {
|
|
25
25
|
it('tests valid blobCid and lexCid', () => {
|
|
26
26
|
expect(validBlobCid.code).toBe(0x55) // raw
|
|
27
27
|
expect(validBlobCid.multihash.code).toBe(0x12) // sha2-256
|
|
@@ -31,7 +31,7 @@ describe(isBlobRef, () => {
|
|
|
31
31
|
|
|
32
32
|
it('parses valid blob', () => {
|
|
33
33
|
expect(
|
|
34
|
-
|
|
34
|
+
isTypedBlobRef({
|
|
35
35
|
$type: 'blob',
|
|
36
36
|
ref: validBlobCid,
|
|
37
37
|
mimeType: 'image/jpeg',
|
|
@@ -40,7 +40,7 @@ describe(isBlobRef, () => {
|
|
|
40
40
|
).toBe(true)
|
|
41
41
|
|
|
42
42
|
expect(
|
|
43
|
-
|
|
43
|
+
isTypedBlobRef(
|
|
44
44
|
{
|
|
45
45
|
$type: 'blob',
|
|
46
46
|
ref: invalidBlobCid,
|
|
@@ -55,7 +55,7 @@ describe(isBlobRef, () => {
|
|
|
55
55
|
|
|
56
56
|
it('performs strict validation by default', () => {
|
|
57
57
|
expect(
|
|
58
|
-
|
|
58
|
+
isTypedBlobRef({
|
|
59
59
|
$type: 'blob',
|
|
60
60
|
ref: invalidBlobCid,
|
|
61
61
|
mimeType: 'image/jpeg',
|
|
@@ -66,7 +66,7 @@ describe(isBlobRef, () => {
|
|
|
66
66
|
|
|
67
67
|
it('rejects invalid inputs', () => {
|
|
68
68
|
expect(
|
|
69
|
-
|
|
69
|
+
isTypedBlobRef({
|
|
70
70
|
$type: 'blob',
|
|
71
71
|
ref: { $link: validBlobCid.toString() },
|
|
72
72
|
mimeType: 'image/jpeg',
|
|
@@ -75,7 +75,7 @@ describe(isBlobRef, () => {
|
|
|
75
75
|
).toBe(false)
|
|
76
76
|
|
|
77
77
|
expect(
|
|
78
|
-
|
|
78
|
+
isTypedBlobRef({
|
|
79
79
|
// $type: 'blob',
|
|
80
80
|
ref: validBlobCid,
|
|
81
81
|
mimeType: 'image/jpeg',
|
|
@@ -84,7 +84,7 @@ describe(isBlobRef, () => {
|
|
|
84
84
|
).toBe(false)
|
|
85
85
|
|
|
86
86
|
expect(
|
|
87
|
-
|
|
87
|
+
isTypedBlobRef({
|
|
88
88
|
$type: 'blob',
|
|
89
89
|
ref: validBlobCid,
|
|
90
90
|
mimeType: { toString: () => 'image/jpeg' },
|
|
@@ -93,7 +93,7 @@ describe(isBlobRef, () => {
|
|
|
93
93
|
).toBe(false)
|
|
94
94
|
|
|
95
95
|
expect(
|
|
96
|
-
|
|
96
|
+
isTypedBlobRef(
|
|
97
97
|
{
|
|
98
98
|
$type: 'blob',
|
|
99
99
|
ref: { $link: validBlobCid.toString() },
|
|
@@ -105,7 +105,7 @@ describe(isBlobRef, () => {
|
|
|
105
105
|
).toBe(false)
|
|
106
106
|
|
|
107
107
|
expect(
|
|
108
|
-
|
|
108
|
+
isTypedBlobRef({
|
|
109
109
|
$type: 'blob',
|
|
110
110
|
mimeType: 'image/jpeg',
|
|
111
111
|
size: 10000,
|
|
@@ -113,7 +113,7 @@ describe(isBlobRef, () => {
|
|
|
113
113
|
).toBe(false)
|
|
114
114
|
|
|
115
115
|
expect(
|
|
116
|
-
|
|
116
|
+
isTypedBlobRef(
|
|
117
117
|
{
|
|
118
118
|
$type: 'blob',
|
|
119
119
|
mimeType: 'image/jpeg',
|
|
@@ -123,15 +123,15 @@ describe(isBlobRef, () => {
|
|
|
123
123
|
),
|
|
124
124
|
).toBe(false)
|
|
125
125
|
|
|
126
|
-
expect(
|
|
127
|
-
expect(
|
|
128
|
-
expect(
|
|
129
|
-
expect(
|
|
126
|
+
expect(isTypedBlobRef('not an object')).toBe(false)
|
|
127
|
+
expect(isTypedBlobRef([])).toBe(false)
|
|
128
|
+
expect(isTypedBlobRef(new Date())).toBe(false)
|
|
129
|
+
expect(isTypedBlobRef(new Map())).toBe(false)
|
|
130
130
|
})
|
|
131
131
|
|
|
132
132
|
it('rejects non-integer size', () => {
|
|
133
133
|
expect(
|
|
134
|
-
|
|
134
|
+
isTypedBlobRef({
|
|
135
135
|
$type: 'blob',
|
|
136
136
|
ref: validBlobCid,
|
|
137
137
|
mimeType: 'image/jpeg',
|
|
@@ -142,7 +142,7 @@ describe(isBlobRef, () => {
|
|
|
142
142
|
|
|
143
143
|
it('rejects invalid CID/multihash code', () => {
|
|
144
144
|
expect(
|
|
145
|
-
|
|
145
|
+
isTypedBlobRef(
|
|
146
146
|
{
|
|
147
147
|
$type: 'blob',
|
|
148
148
|
ref: validBlobCid,
|
|
@@ -154,7 +154,7 @@ describe(isBlobRef, () => {
|
|
|
154
154
|
).toBe(true)
|
|
155
155
|
|
|
156
156
|
expect(
|
|
157
|
-
|
|
157
|
+
isTypedBlobRef(
|
|
158
158
|
{
|
|
159
159
|
$type: 'blob',
|
|
160
160
|
ref: invalidBlobCid,
|
|
@@ -168,7 +168,7 @@ describe(isBlobRef, () => {
|
|
|
168
168
|
|
|
169
169
|
it('rejects extra keys', () => {
|
|
170
170
|
expect(
|
|
171
|
-
|
|
171
|
+
isTypedBlobRef({
|
|
172
172
|
$type: 'blob',
|
|
173
173
|
ref: validBlobCid,
|
|
174
174
|
mimeType: 'image/jpeg',
|
|
@@ -178,7 +178,7 @@ describe(isBlobRef, () => {
|
|
|
178
178
|
).toBe(false)
|
|
179
179
|
|
|
180
180
|
expect(
|
|
181
|
-
|
|
181
|
+
isTypedBlobRef(
|
|
182
182
|
{
|
|
183
183
|
$type: 'blob',
|
|
184
184
|
ref: validBlobCid,
|
|
@@ -197,7 +197,7 @@ describe(isBlobRef, () => {
|
|
|
197
197
|
'QmYwAPJzv5CZsnA625s3Xf2nemtYgPpHdWEz79ojWnPbdG', // CID v0
|
|
198
198
|
)
|
|
199
199
|
expect(
|
|
200
|
-
|
|
200
|
+
isTypedBlobRef(
|
|
201
201
|
{
|
|
202
202
|
$type: 'blob',
|
|
203
203
|
ref: cidV0,
|
|
@@ -221,14 +221,27 @@ describe(isLegacyBlobRef, () => {
|
|
|
221
221
|
).toBe(true)
|
|
222
222
|
|
|
223
223
|
expect(
|
|
224
|
-
isLegacyBlobRef(
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
224
|
+
isLegacyBlobRef(
|
|
225
|
+
{
|
|
226
|
+
cid: invalidBlobCid.toString(),
|
|
227
|
+
mimeType: 'image/jpeg',
|
|
228
|
+
},
|
|
229
|
+
{ strict: false },
|
|
230
|
+
),
|
|
228
231
|
).toBe(true)
|
|
229
232
|
})
|
|
230
233
|
|
|
231
234
|
it('rejects invalid inputs', () => {
|
|
235
|
+
expect(
|
|
236
|
+
isLegacyBlobRef(
|
|
237
|
+
{
|
|
238
|
+
cid: invalidBlobCid.toString(),
|
|
239
|
+
mimeType: 'image/jpeg',
|
|
240
|
+
},
|
|
241
|
+
{ strict: true },
|
|
242
|
+
),
|
|
243
|
+
).toBe(false)
|
|
244
|
+
|
|
232
245
|
expect(
|
|
233
246
|
isLegacyBlobRef({
|
|
234
247
|
cid: 'babbaaa',
|
package/src/blob.ts
CHANGED
|
@@ -1,21 +1,172 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import {
|
|
2
|
+
CheckCidOptions,
|
|
3
|
+
Cid,
|
|
4
|
+
RawCid,
|
|
5
|
+
ifCid,
|
|
6
|
+
parseCid,
|
|
7
|
+
validateCidString,
|
|
8
|
+
} from './cid.js'
|
|
2
9
|
import { LexValue } from './lex.js'
|
|
3
10
|
import { isPlainObject, isPlainProto } from './object.js'
|
|
4
11
|
|
|
12
|
+
/**
|
|
13
|
+
* Options to use with {@link ifCid}, {@link validateCidString}, and related CID
|
|
14
|
+
* validation functions when validating CIDs in BlobRefs, in strict mode. This
|
|
15
|
+
* ensures that the CID is a {@link RawCid} (CID v1, raw multicodec, sha256
|
|
16
|
+
* multihash), which is the expected format for blob references in the AT
|
|
17
|
+
* Protocol data model.
|
|
18
|
+
*/
|
|
19
|
+
const STRICT_CID_CHECK_OPTIONS: CheckCidOptions = { flavor: 'raw' }
|
|
20
|
+
|
|
21
|
+
// Number.isSafeInteger is actually safe to use with non-number values, so we
|
|
22
|
+
// can use it as a type guard.
|
|
23
|
+
const isSafeInteger = Number.isSafeInteger as (v: unknown) => v is number
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Reference to binary data (like images, videos, etc.) in the AT Protocol data
|
|
27
|
+
* model.
|
|
28
|
+
*
|
|
29
|
+
* This type represents a reference to a blob of binary data, identified by its
|
|
30
|
+
* content hash (CID) and accompanied by metadata such as MIME type and size.
|
|
31
|
+
*
|
|
32
|
+
* The {@link BlobRef} type is a union of the current {@link TypedBlobRef}
|
|
33
|
+
* format and the legacy {@link LegacyBlobRef} format.
|
|
34
|
+
*/
|
|
35
|
+
export type BlobRef<Ref extends Cid = Cid> = TypedBlobRef<Ref> | LegacyBlobRef
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Options for validating a {@link BlobRef}.
|
|
39
|
+
*/
|
|
40
|
+
export type BlobRefCheckOptions = {
|
|
41
|
+
/**
|
|
42
|
+
* If `false`, skips strict CID validation of {@link BlobRef.ref}, allowing
|
|
43
|
+
* any valid CID. Otherwise, validates that the CID is v1, uses the raw
|
|
44
|
+
* multicodec, and has a sha256 multihash.
|
|
45
|
+
*
|
|
46
|
+
* @default true
|
|
47
|
+
*/
|
|
48
|
+
strict?: boolean
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Type guard to check if a value is a valid {@link BlobRef}, which can be
|
|
53
|
+
* either a {@link TypedBlobRef} or a {@link LegacyBlobRef}. By default, strict
|
|
54
|
+
* CID validation is applied to ensure that the CID in the blob reference is in
|
|
55
|
+
* the expected format for the AT Protocol, but this can be relaxed with the
|
|
56
|
+
* `strict: false` option.
|
|
57
|
+
*/
|
|
58
|
+
export function isBlobRef(input: unknown): input is BlobRef<RawCid>
|
|
59
|
+
export function isBlobRef<TOptions extends BlobRefCheckOptions>(
|
|
60
|
+
input: unknown,
|
|
61
|
+
options: TOptions,
|
|
62
|
+
): input is LegacyBlobRef | InferTypedBlobRef<TOptions>
|
|
63
|
+
export function isBlobRef(
|
|
64
|
+
input: unknown,
|
|
65
|
+
options?: BlobRefCheckOptions,
|
|
66
|
+
): input is BlobRef<RawCid>
|
|
67
|
+
export function isBlobRef(
|
|
68
|
+
input: unknown,
|
|
69
|
+
options?: BlobRefCheckOptions,
|
|
70
|
+
): input is BlobRef {
|
|
71
|
+
return (input as any)?.$type === 'blob'
|
|
72
|
+
? isTypedBlobRef(input, options)
|
|
73
|
+
: isLegacyBlobRef(input, options)
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Extracts the MIME type from a {@link BlobRef}.
|
|
78
|
+
*
|
|
79
|
+
* @example
|
|
80
|
+
* ```ts
|
|
81
|
+
* const mimeType = getBlobMime(blobRef)
|
|
82
|
+
* console.log(mimeType) // e.g., 'image/jpeg'
|
|
83
|
+
* ```
|
|
84
|
+
*/
|
|
85
|
+
export function getBlobMime(blob: BlobRef): string
|
|
86
|
+
export function getBlobMime(blob?: BlobRef): string | undefined
|
|
87
|
+
export function getBlobMime(blob?: BlobRef): string | undefined {
|
|
88
|
+
return blob?.mimeType
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Extracts the size (in bytes) from a {@link TypedBlobRef}. For
|
|
93
|
+
* {@link LegacyBlobRef}, size information is not available, so this function
|
|
94
|
+
* returns `undefined` for legacy refs.
|
|
95
|
+
*
|
|
96
|
+
* @note The size property, in blob refs, cannot be 100% trusted since the PDS
|
|
97
|
+
* might not have a local copy of the blob (to check the size against) and might
|
|
98
|
+
* just be passing through the blob ref from the client without validating it.
|
|
99
|
+
* So, while this function can be useful for getting size information when
|
|
100
|
+
* available, it should not be solely relied upon for critical functionality
|
|
101
|
+
* without additional validation.
|
|
102
|
+
*
|
|
103
|
+
* @example
|
|
104
|
+
* ```ts
|
|
105
|
+
* const size = getBlobSize(blobRef)
|
|
106
|
+
* if (size !== undefined) {
|
|
107
|
+
* console.log(`Blob size: ${size} bytes`)
|
|
108
|
+
* } else {
|
|
109
|
+
* console.log('Size information not available for legacy blob ref')
|
|
110
|
+
* }
|
|
111
|
+
* ```
|
|
112
|
+
*/
|
|
113
|
+
export function getBlobSize(blob: BlobRef): number | undefined {
|
|
114
|
+
if ('$type' in blob && blob.size >= 0) return blob.size
|
|
115
|
+
// LegacyBlobRef doesn't have size information
|
|
116
|
+
return undefined
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Extracts the {@link Cid} from a {@link BlobRef}.
|
|
121
|
+
*
|
|
122
|
+
* @throws If the input input is a {@link LegacyBlobRef} with an invalid CID string
|
|
123
|
+
* @example
|
|
124
|
+
* ```ts
|
|
125
|
+
* const cid = getBlobCid(blobRef)
|
|
126
|
+
* console.log(cid.bytes)
|
|
127
|
+
* ```
|
|
128
|
+
*/
|
|
129
|
+
export function getBlobCid(blob: BlobRef): Cid
|
|
130
|
+
export function getBlobCid(blob?: BlobRef): Cid | undefined
|
|
131
|
+
export function getBlobCid(blob?: BlobRef): Cid | undefined {
|
|
132
|
+
if (!blob) return undefined
|
|
133
|
+
return '$type' in blob ? blob.ref : parseCid(blob.cid)
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Extracts the CID string from a {@link BlobRef}.
|
|
138
|
+
*
|
|
139
|
+
* This is similar to `getBlobCid(blob).toString()` but is more optimized since
|
|
140
|
+
* the CID string is already available in the legacy format and we can avoid
|
|
141
|
+
* parsing it into a CID object just to convert it back to a string.
|
|
142
|
+
*
|
|
143
|
+
* @example
|
|
144
|
+
* ```ts
|
|
145
|
+
* const cidString = getBlobCidString(blobRef)
|
|
146
|
+
* console.log(cidString)
|
|
147
|
+
* ```
|
|
148
|
+
*/
|
|
149
|
+
export function getBlobCidString(blob: BlobRef): string
|
|
150
|
+
export function getBlobCidString(blob?: BlobRef): string | undefined
|
|
151
|
+
export function getBlobCidString(blob?: BlobRef): string | undefined {
|
|
152
|
+
if (!blob) return undefined
|
|
153
|
+
return '$type' in blob ? blob.ref.toString() : blob.cid
|
|
154
|
+
}
|
|
155
|
+
|
|
5
156
|
/**
|
|
6
157
|
* Reference to binary data (like images, videos, etc.) in the AT Protocol data model.
|
|
7
158
|
*
|
|
8
|
-
* A
|
|
9
|
-
* content by its content hash (CID), along with metadata
|
|
10
|
-
* and size.
|
|
159
|
+
* A {@link TypedBlobRef} is a {@link LexMap} with a specific structure that
|
|
160
|
+
* identifies binary content by its content hash (CID), along with metadata
|
|
161
|
+
* about the content type and size.
|
|
11
162
|
*
|
|
12
163
|
* @typeParam Ref - The type of CID reference, defaults to any {@link Cid}
|
|
13
164
|
*
|
|
14
165
|
* @example
|
|
15
166
|
* ```typescript
|
|
16
|
-
* import type {
|
|
167
|
+
* import type { TypedBlobRef } from '@atproto/lex-data'
|
|
17
168
|
*
|
|
18
|
-
* const imageRef:
|
|
169
|
+
* const imageRef: TypedBlobRef = {
|
|
19
170
|
* $type: 'blob',
|
|
20
171
|
* mimeType: 'image/jpeg',
|
|
21
172
|
* ref: cid, // CID of the blob content
|
|
@@ -23,41 +174,27 @@ import { isPlainObject, isPlainProto } from './object.js'
|
|
|
23
174
|
* }
|
|
24
175
|
* ```
|
|
25
176
|
*
|
|
26
|
-
* @see {@link
|
|
177
|
+
* @see {@link isTypedBlobRef} to check if a value is a valid {@link TypedBlobRef}
|
|
27
178
|
* @see {@link LegacyBlobRef} for the older blob reference format
|
|
28
179
|
*/
|
|
29
|
-
export type
|
|
180
|
+
export type TypedBlobRef<Ref extends Cid = Cid> = {
|
|
30
181
|
$type: 'blob'
|
|
31
182
|
mimeType: string
|
|
32
183
|
ref: Ref
|
|
33
184
|
size: number
|
|
34
185
|
}
|
|
35
186
|
|
|
36
|
-
/**
|
|
37
|
-
* Options for validating a {@link BlobRef}.
|
|
38
|
-
*/
|
|
39
|
-
export type BlobRefCheckOptions = {
|
|
40
|
-
/**
|
|
41
|
-
* If `false`, skips strict CID validation of {@link BlobRef.ref}, allowing
|
|
42
|
-
* any valid CID. Otherwise, validates that the CID is v1, uses the raw
|
|
43
|
-
* multicodec, and has a sha256 multihash.
|
|
44
|
-
*
|
|
45
|
-
* @default true
|
|
46
|
-
*/
|
|
47
|
-
strict?: boolean
|
|
48
|
-
}
|
|
49
|
-
|
|
50
187
|
/**
|
|
51
188
|
* Infers the BlobRef type based on the check options.
|
|
52
189
|
*
|
|
53
190
|
* @typeParam TOptions - The options used for checking
|
|
54
191
|
*/
|
|
55
|
-
export type
|
|
192
|
+
export type InferTypedBlobRef<TOptions extends BlobRefCheckOptions> =
|
|
56
193
|
TOptions extends { strict: false }
|
|
57
|
-
?
|
|
194
|
+
? TypedBlobRef
|
|
58
195
|
: { strict: boolean } extends TOptions
|
|
59
|
-
?
|
|
60
|
-
:
|
|
196
|
+
? TypedBlobRef
|
|
197
|
+
: TypedBlobRef<RawCid>
|
|
61
198
|
|
|
62
199
|
/**
|
|
63
200
|
* Type guard to check if a value is a valid {@link BlobRef}.
|
|
@@ -74,32 +211,32 @@ export type InferCheckedBlobRef<TOptions extends BlobRefCheckOptions> =
|
|
|
74
211
|
*
|
|
75
212
|
* @example
|
|
76
213
|
* ```typescript
|
|
77
|
-
* import {
|
|
214
|
+
* import { isTypedBlobRef } from '@atproto/lex-data'
|
|
78
215
|
*
|
|
79
|
-
* if (
|
|
216
|
+
* if (isTypedBlobRef(data)) {
|
|
80
217
|
* console.log(data.mimeType) // e.g., 'image/jpeg'
|
|
81
218
|
* console.log(data.size) // e.g., 12345
|
|
82
219
|
* }
|
|
83
220
|
*
|
|
84
221
|
* // Allow any valid CID (not just raw CIDs)
|
|
85
|
-
* if (
|
|
222
|
+
* if (isTypedBlobRef(data, { strict: false })) {
|
|
86
223
|
* // ...
|
|
87
224
|
* }
|
|
88
225
|
* ```
|
|
89
226
|
*/
|
|
90
|
-
export function
|
|
91
|
-
export function
|
|
227
|
+
export function isTypedBlobRef(input: unknown): input is TypedBlobRef<RawCid>
|
|
228
|
+
export function isTypedBlobRef<TOptions extends BlobRefCheckOptions>(
|
|
92
229
|
input: unknown,
|
|
93
230
|
options: TOptions,
|
|
94
|
-
): input is
|
|
95
|
-
export function
|
|
231
|
+
): input is InferTypedBlobRef<TOptions>
|
|
232
|
+
export function isTypedBlobRef(
|
|
96
233
|
input: unknown,
|
|
97
234
|
options?: BlobRefCheckOptions,
|
|
98
|
-
): input is
|
|
99
|
-
export function
|
|
235
|
+
): input is TypedBlobRef<RawCid>
|
|
236
|
+
export function isTypedBlobRef(
|
|
100
237
|
input: unknown,
|
|
101
238
|
options?: BlobRefCheckOptions,
|
|
102
|
-
): input is
|
|
239
|
+
): input is TypedBlobRef {
|
|
103
240
|
if (!isPlainObject(input)) {
|
|
104
241
|
return false
|
|
105
242
|
}
|
|
@@ -114,7 +251,10 @@ export function isBlobRef(
|
|
|
114
251
|
return false
|
|
115
252
|
}
|
|
116
253
|
|
|
117
|
-
if (
|
|
254
|
+
if (size === -1 && options?.strict === false) {
|
|
255
|
+
// In non-strict mode, allow size to be -1 to accommodate legacy blob refs
|
|
256
|
+
// that don't include size information.
|
|
257
|
+
} else if (!isSafeInteger(size) || size < 0) {
|
|
118
258
|
return false
|
|
119
259
|
}
|
|
120
260
|
|
|
@@ -136,7 +276,7 @@ export function isBlobRef(
|
|
|
136
276
|
const cid = ifCid(
|
|
137
277
|
ref,
|
|
138
278
|
// Strict unless explicitly disabled
|
|
139
|
-
options?.strict === false ? undefined :
|
|
279
|
+
options?.strict === false ? undefined : STRICT_CID_CHECK_OPTIONS,
|
|
140
280
|
)
|
|
141
281
|
if (!cid) {
|
|
142
282
|
return false
|
|
@@ -178,9 +318,6 @@ export type LegacyBlobRef = {
|
|
|
178
318
|
* - `mimeType` must be a non-empty string
|
|
179
319
|
* - No additional properties allowed
|
|
180
320
|
*
|
|
181
|
-
* @param input - The value to check
|
|
182
|
-
* @returns `true` if the input is a valid LegacyBlobRef
|
|
183
|
-
*
|
|
184
321
|
* @example
|
|
185
322
|
* ```typescript
|
|
186
323
|
* import { isLegacyBlobRef } from '@atproto/lex-data'
|
|
@@ -191,9 +328,12 @@ export type LegacyBlobRef = {
|
|
|
191
328
|
* }
|
|
192
329
|
* ```
|
|
193
330
|
*
|
|
194
|
-
* @see {@link
|
|
331
|
+
* @see {@link isTypedBlobRef} for checking the current blob reference format
|
|
195
332
|
*/
|
|
196
|
-
export function isLegacyBlobRef(
|
|
333
|
+
export function isLegacyBlobRef(
|
|
334
|
+
input: unknown,
|
|
335
|
+
options?: BlobRefCheckOptions,
|
|
336
|
+
): input is LegacyBlobRef {
|
|
197
337
|
if (!isPlainObject(input)) {
|
|
198
338
|
return false
|
|
199
339
|
}
|
|
@@ -213,7 +353,12 @@ export function isLegacyBlobRef(input: unknown): input is LegacyBlobRef {
|
|
|
213
353
|
}
|
|
214
354
|
}
|
|
215
355
|
|
|
216
|
-
if (
|
|
356
|
+
if (
|
|
357
|
+
!validateCidString(
|
|
358
|
+
cid,
|
|
359
|
+
options?.strict === false ? undefined : STRICT_CID_CHECK_OPTIONS,
|
|
360
|
+
)
|
|
361
|
+
) {
|
|
217
362
|
return false
|
|
218
363
|
}
|
|
219
364
|
|
|
@@ -240,10 +385,10 @@ export type EnumBlobRefsOptions = BlobRefCheckOptions & {
|
|
|
240
385
|
*/
|
|
241
386
|
export type InferEnumBlobRefs<TOptions extends EnumBlobRefsOptions> =
|
|
242
387
|
TOptions extends { allowLegacy: true }
|
|
243
|
-
?
|
|
388
|
+
? InferTypedBlobRef<TOptions> | LegacyBlobRef
|
|
244
389
|
: { allowLegacy: boolean } extends TOptions
|
|
245
|
-
?
|
|
246
|
-
:
|
|
390
|
+
? InferTypedBlobRef<TOptions> | LegacyBlobRef
|
|
391
|
+
: InferTypedBlobRef<TOptions>
|
|
247
392
|
|
|
248
393
|
/**
|
|
249
394
|
* Generator that enumerates all {@link BlobRef}s (and, optionally,
|
|
@@ -273,8 +418,8 @@ export type InferEnumBlobRefs<TOptions extends EnumBlobRefsOptions> =
|
|
|
273
418
|
* }
|
|
274
419
|
*
|
|
275
420
|
* // Include legacy blob references
|
|
276
|
-
* for (const ref of enumBlobRefs(record, { allowLegacy: true })) {
|
|
277
|
-
* // ref may be BlobRef or LegacyBlobRef
|
|
421
|
+
* for (const ref of enumBlobRefs(record, { allowLegacy: true, strict: false })) {
|
|
422
|
+
* // ref may be BlobRef or LegacyBlobRef, with relaxed CID validation
|
|
278
423
|
* }
|
|
279
424
|
* ```
|
|
280
425
|
*/
|
|
@@ -288,11 +433,11 @@ export function enumBlobRefs<TOptions extends EnumBlobRefsOptions>(
|
|
|
288
433
|
export function enumBlobRefs(
|
|
289
434
|
input: LexValue,
|
|
290
435
|
options?: EnumBlobRefsOptions,
|
|
291
|
-
): Generator<BlobRef
|
|
436
|
+
): Generator<BlobRef, void, unknown>
|
|
292
437
|
export function* enumBlobRefs(
|
|
293
438
|
input: LexValue,
|
|
294
439
|
options?: EnumBlobRefsOptions,
|
|
295
|
-
): Generator<BlobRef
|
|
440
|
+
): Generator<BlobRef, void, unknown> {
|
|
296
441
|
// LegacyBlobRef not included by default
|
|
297
442
|
const includeLegacy = options?.allowLegacy === true
|
|
298
443
|
|
|
@@ -315,9 +460,9 @@ export function* enumBlobRefs(
|
|
|
315
460
|
} else if (isPlainProto(value)) {
|
|
316
461
|
if (visited.has(value)) continue
|
|
317
462
|
visited.add(value)
|
|
318
|
-
if (
|
|
463
|
+
if (isTypedBlobRef(value, options)) {
|
|
319
464
|
yield value
|
|
320
|
-
} else if (includeLegacy && isLegacyBlobRef(value)) {
|
|
465
|
+
} else if (includeLegacy && isLegacyBlobRef(value, options)) {
|
|
321
466
|
yield value
|
|
322
467
|
} else {
|
|
323
468
|
for (const v of Object.values(value)) {
|
package/src/lib/nodejs-buffer.ts
CHANGED
|
@@ -12,6 +12,11 @@ interface NodeJSBufferConstructor {
|
|
|
12
12
|
input: Uint8Array | ArrayBuffer | ArrayBufferView,
|
|
13
13
|
): NodeJSBuffer<ArrayBuffer>
|
|
14
14
|
from(input: string, encoding?: Encoding): NodeJSBuffer<ArrayBuffer>
|
|
15
|
+
from<TArrayBuffer extends ArrayBufferLike>(
|
|
16
|
+
arrayBuffer: WithImplicitCoercion<TArrayBuffer>,
|
|
17
|
+
byteOffset?: number,
|
|
18
|
+
length?: number,
|
|
19
|
+
): Buffer<TArrayBuffer>
|
|
15
20
|
concat(list: readonly Uint8Array[], totalLength?: number): NodeJSBuffer
|
|
16
21
|
byteLength(input: string, encoding?: Encoding): number
|
|
17
22
|
prototype: NodeJSBuffer
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import { assert, describe, expect, it } from 'vitest'
|
|
2
|
+
import { utf8FromBytesNative, utf8FromBytesNode } from './utf8-from-bytes.js'
|
|
3
|
+
|
|
4
|
+
for (const utf8FromBytes of [utf8FromBytesNode, utf8FromBytesNative] as const) {
|
|
5
|
+
assert(utf8FromBytes, 'utf8FromBytes implementation should not be null')
|
|
6
|
+
describe(utf8FromBytes, () => {
|
|
7
|
+
it('decodes empty Uint8Array', () => {
|
|
8
|
+
const decoded = utf8FromBytes(new Uint8Array(0))
|
|
9
|
+
expect(typeof decoded).toBe('string')
|
|
10
|
+
expect(decoded).toBe('')
|
|
11
|
+
})
|
|
12
|
+
|
|
13
|
+
it('decodes 10MB', () => {
|
|
14
|
+
const bytes = Buffer.allocUnsafe(10_000_000).fill('๐ฉ')
|
|
15
|
+
const decoded = utf8FromBytes(bytes)
|
|
16
|
+
expect(decoded).toBe('๐ฉ'.repeat(10_000_000 / 4))
|
|
17
|
+
})
|
|
18
|
+
|
|
19
|
+
for (const string of [
|
|
20
|
+
'',
|
|
21
|
+
'\0\0',
|
|
22
|
+
'\0\0\0',
|
|
23
|
+
'\0\0\0\0',
|
|
24
|
+
'__',
|
|
25
|
+
'รฉ',
|
|
26
|
+
'ร รง',
|
|
27
|
+
'\0รฉร รง',
|
|
28
|
+
'```\x1b',
|
|
29
|
+
'aaa',
|
|
30
|
+
'Hello, World!',
|
|
31
|
+
'๐๐๐๐๐๐
๐๐คฃ๐๐',
|
|
32
|
+
'๐ฉโ๐ป๐จโ๐ป๐ฉโ๐ฌ๐จโ๐ฌ๐ฉโ๐๐จโ๐',
|
|
33
|
+
'๐๐๐๐๐ช๐โจโก๐ฅ๐ง',
|
|
34
|
+
] as const) {
|
|
35
|
+
const buffer = Buffer.from(string, 'utf8')
|
|
36
|
+
|
|
37
|
+
it(`decodes ${JSON.stringify(string)}`, () => {
|
|
38
|
+
const decoded = utf8FromBytes(buffer)
|
|
39
|
+
expect(decoded).toBe(string)
|
|
40
|
+
})
|
|
41
|
+
}
|
|
42
|
+
})
|
|
43
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { NodeJSBuffer } from './lib/nodejs-buffer.js'
|
|
2
|
+
|
|
3
|
+
const Buffer = NodeJSBuffer
|
|
4
|
+
|
|
5
|
+
export const utf8FromBytesNode = Buffer
|
|
6
|
+
? function utf8FromBytesNode(bytes: Uint8Array): string {
|
|
7
|
+
// @NOTE Buffer.from(bytes) creates a copy of the ArrayBuffer. The following
|
|
8
|
+
// allows us to avoid the copy by creating a Buffer that shares the same
|
|
9
|
+
// memory as the input Uint8Array.
|
|
10
|
+
const buffer = Buffer.from(
|
|
11
|
+
bytes.buffer,
|
|
12
|
+
bytes.byteOffset,
|
|
13
|
+
bytes.byteLength,
|
|
14
|
+
)
|
|
15
|
+
return buffer.toString('utf8')
|
|
16
|
+
}
|
|
17
|
+
: /* v8 ignore next -- @preserve */ null
|
|
18
|
+
|
|
19
|
+
export function utf8FromBytesNative(bytes: Uint8Array): string {
|
|
20
|
+
return new TextDecoder('utf-8').decode(bytes)
|
|
21
|
+
}
|