@atproto/lex-data 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/dist/blob.d.ts +2 -2
- package/dist/blob.d.ts.map +1 -1
- package/dist/blob.js +1 -1
- package/dist/blob.js.map +1 -1
- package/dist/lex-equals.d.ts +1 -1
- package/dist/lex-equals.d.ts.map +1 -1
- package/dist/lex-equals.js.map +1 -1
- package/dist/lex.d.ts +1 -1
- package/dist/lex.d.ts.map +1 -1
- package/dist/lex.js.map +1 -1
- package/dist/lib/nodejs-buffer.js.map +1 -1
- package/dist/uint8array-from-base64.d.ts +1 -1
- package/dist/uint8array-from-base64.d.ts.map +1 -1
- package/dist/uint8array-from-base64.js.map +1 -1
- package/dist/uint8array-to-base64.d.ts +1 -1
- package/dist/uint8array-to-base64.d.ts.map +1 -1
- package/dist/uint8array-to-base64.js.map +1 -1
- package/dist/uint8array.d.ts +1 -1
- package/dist/uint8array.d.ts.map +1 -1
- package/dist/uint8array.js.map +1 -1
- package/dist/utf8-from-base64.d.ts +1 -1
- package/dist/utf8-from-base64.d.ts.map +1 -1
- package/dist/utf8-from-base64.js.map +1 -1
- package/dist/utf8-to-base64.d.ts +1 -1
- package/dist/utf8-to-base64.d.ts.map +1 -1
- package/dist/utf8-to-base64.js.map +1 -1
- package/dist/utf8.d.ts +1 -1
- package/dist/utf8.d.ts.map +1 -1
- package/dist/utf8.js.map +1 -1
- package/package.json +4 -8
- package/src/blob.test.ts +0 -405
- package/src/blob.ts +0 -478
- package/src/cid-implementation.test.ts +0 -129
- package/src/cid.test.ts +0 -350
- package/src/cid.ts +0 -603
- package/src/core-js.d.ts +0 -2
- package/src/index.ts +0 -8
- package/src/lex-equals.test.ts +0 -183
- package/src/lex-equals.ts +0 -123
- package/src/lex-error.test.ts +0 -54
- package/src/lex-error.ts +0 -83
- package/src/lex.test.ts +0 -279
- package/src/lex.ts +0 -253
- package/src/lib/nodejs-buffer.ts +0 -46
- package/src/lib/util.test.ts +0 -49
- package/src/lib/util.ts +0 -7
- package/src/object.test.ts +0 -80
- package/src/object.ts +0 -83
- package/src/uint8array-base64.ts +0 -2
- package/src/uint8array-concat.test.ts +0 -197
- package/src/uint8array-concat.ts +0 -25
- package/src/uint8array-from-base64.test.ts +0 -130
- package/src/uint8array-from-base64.ts +0 -98
- package/src/uint8array-to-base64.test.ts +0 -170
- package/src/uint8array-to-base64.ts +0 -55
- package/src/uint8array.test.ts +0 -503
- package/src/uint8array.ts +0 -197
- package/src/utf8-from-base64.test.ts +0 -39
- package/src/utf8-from-base64.ts +0 -23
- package/src/utf8-from-bytes.test.ts +0 -43
- package/src/utf8-from-bytes.ts +0 -21
- package/src/utf8-grapheme-len.test.ts +0 -38
- package/src/utf8-grapheme-len.ts +0 -21
- package/src/utf8-len.test.ts +0 -21
- package/src/utf8-len.ts +0 -51
- package/src/utf8-to-base64.test.ts +0 -35
- package/src/utf8-to-base64.ts +0 -22
- package/src/utf8.ts +0 -128
- package/tsconfig.build.json +0 -12
- package/tsconfig.json +0 -7
- package/tsconfig.tests.json +0 -8
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
import { assert, describe, expect, it } from 'vitest'
|
|
2
|
-
import {
|
|
3
|
-
utf8FromBase64Node,
|
|
4
|
-
utf8FromBase64Ponyfill,
|
|
5
|
-
} from './utf8-from-base64.js'
|
|
6
|
-
|
|
7
|
-
const strings = [
|
|
8
|
-
'Hello, World!',
|
|
9
|
-
'¡Hola, Mundo!',
|
|
10
|
-
'こんにちは世界',
|
|
11
|
-
'😀👩💻🌍',
|
|
12
|
-
'',
|
|
13
|
-
'𓀀𓁐𓂀𓃰𓄿𓅱𓆑𓇋𓈖𓉔𓊃𓋴𓌳𓍿𓎛𓏏',
|
|
14
|
-
]
|
|
15
|
-
|
|
16
|
-
for (const utf8FromBase64 of [
|
|
17
|
-
utf8FromBase64Node,
|
|
18
|
-
utf8FromBase64Ponyfill,
|
|
19
|
-
] as const) {
|
|
20
|
-
assert(utf8FromBase64, 'implementation should not be null')
|
|
21
|
-
|
|
22
|
-
describe(utf8FromBase64, () => {
|
|
23
|
-
it('decodes base64 to utf8 string', () => {
|
|
24
|
-
for (const text of strings) {
|
|
25
|
-
const b64 = Buffer.from(text, 'utf8').toString('base64')
|
|
26
|
-
const decoded = utf8FromBase64(b64, 'base64')
|
|
27
|
-
expect(decoded).toBe(text)
|
|
28
|
-
}
|
|
29
|
-
})
|
|
30
|
-
|
|
31
|
-
it('decodes base64url to utf8 string', () => {
|
|
32
|
-
for (const text of strings) {
|
|
33
|
-
const b64u = Buffer.from(text, 'utf8').toString('base64url')
|
|
34
|
-
const decoded = utf8FromBase64(b64u, 'base64url')
|
|
35
|
-
expect(decoded).toBe(text)
|
|
36
|
-
}
|
|
37
|
-
})
|
|
38
|
-
})
|
|
39
|
-
}
|
package/src/utf8-from-base64.ts
DELETED
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
import { fromString } from 'uint8arrays/from-string'
|
|
2
|
-
import { NodeJSBuffer } from './lib/nodejs-buffer.js'
|
|
3
|
-
import { Base64Alphabet } from './uint8array-base64.js'
|
|
4
|
-
|
|
5
|
-
const Buffer = NodeJSBuffer
|
|
6
|
-
|
|
7
|
-
export const utf8FromBase64Node = Buffer
|
|
8
|
-
? function utf8FromBase64Node(
|
|
9
|
-
b64: string,
|
|
10
|
-
alphabet: Base64Alphabet = 'base64',
|
|
11
|
-
): string {
|
|
12
|
-
return Buffer.from(b64, alphabet).toString('utf8')
|
|
13
|
-
}
|
|
14
|
-
: /* v8 ignore next -- @preserve */ null
|
|
15
|
-
|
|
16
|
-
const textDecoder = /*#__PURE__*/ new TextDecoder()
|
|
17
|
-
export function utf8FromBase64Ponyfill(
|
|
18
|
-
b64: string,
|
|
19
|
-
alphabet?: Base64Alphabet,
|
|
20
|
-
): string {
|
|
21
|
-
const bytes = fromString(b64, alphabet)
|
|
22
|
-
return textDecoder.decode(bytes)
|
|
23
|
-
}
|
|
@@ -1,43 +0,0 @@
|
|
|
1
|
-
import { assert, describe, expect, it } from 'vitest'
|
|
2
|
-
import { utf8FromBytesNative, utf8FromBytesNode } from './utf8-from-bytes.js'
|
|
3
|
-
|
|
4
|
-
for (const utf8FromBytes of [utf8FromBytesNode, utf8FromBytesNative] as const) {
|
|
5
|
-
assert(utf8FromBytes, 'utf8FromBytes implementation should not be null')
|
|
6
|
-
describe(utf8FromBytes, () => {
|
|
7
|
-
it('decodes empty Uint8Array', () => {
|
|
8
|
-
const decoded = utf8FromBytes(new Uint8Array(0))
|
|
9
|
-
expect(typeof decoded).toBe('string')
|
|
10
|
-
expect(decoded).toBe('')
|
|
11
|
-
})
|
|
12
|
-
|
|
13
|
-
it('decodes 10MB', () => {
|
|
14
|
-
const bytes = Buffer.allocUnsafe(10_000_000).fill('🐩')
|
|
15
|
-
const decoded = utf8FromBytes(bytes)
|
|
16
|
-
expect(decoded).toBe('🐩'.repeat(10_000_000 / 4))
|
|
17
|
-
})
|
|
18
|
-
|
|
19
|
-
for (const string of [
|
|
20
|
-
'',
|
|
21
|
-
'\0\0',
|
|
22
|
-
'\0\0\0',
|
|
23
|
-
'\0\0\0\0',
|
|
24
|
-
'__',
|
|
25
|
-
'é',
|
|
26
|
-
'àç',
|
|
27
|
-
'\0éàç',
|
|
28
|
-
'```\x1b',
|
|
29
|
-
'aaa',
|
|
30
|
-
'Hello, World!',
|
|
31
|
-
'😀😃😄😁😆😅😂🤣😊😇',
|
|
32
|
-
'👩💻👨💻👩🔬👨🔬👩🚀👨🚀',
|
|
33
|
-
'🌍🌎🌏🌐🪐🌟✨⚡🔥💧',
|
|
34
|
-
] as const) {
|
|
35
|
-
const buffer = Buffer.from(string, 'utf8')
|
|
36
|
-
|
|
37
|
-
it(`decodes ${JSON.stringify(string)}`, () => {
|
|
38
|
-
const decoded = utf8FromBytes(buffer)
|
|
39
|
-
expect(decoded).toBe(string)
|
|
40
|
-
})
|
|
41
|
-
}
|
|
42
|
-
})
|
|
43
|
-
}
|
package/src/utf8-from-bytes.ts
DELETED
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
import { NodeJSBuffer } from './lib/nodejs-buffer.js'
|
|
2
|
-
|
|
3
|
-
const Buffer = NodeJSBuffer
|
|
4
|
-
|
|
5
|
-
export const utf8FromBytesNode = Buffer
|
|
6
|
-
? function utf8FromBytesNode(bytes: Uint8Array): string {
|
|
7
|
-
// @NOTE Buffer.from(bytes) creates a copy of the ArrayBuffer. The following
|
|
8
|
-
// allows us to avoid the copy by creating a Buffer that shares the same
|
|
9
|
-
// memory as the input Uint8Array.
|
|
10
|
-
const buffer = Buffer.from(
|
|
11
|
-
bytes.buffer,
|
|
12
|
-
bytes.byteOffset,
|
|
13
|
-
bytes.byteLength,
|
|
14
|
-
)
|
|
15
|
-
return buffer.toString('utf8')
|
|
16
|
-
}
|
|
17
|
-
: /* v8 ignore next -- @preserve */ null
|
|
18
|
-
|
|
19
|
-
export function utf8FromBytesNative(bytes: Uint8Array): string {
|
|
20
|
-
return new TextDecoder('utf-8').decode(bytes)
|
|
21
|
-
}
|
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
import { describe, expect, it } from 'vitest'
|
|
2
|
-
import { graphemeLenNative, graphemeLenPonyfill } from './utf8-grapheme-len.js'
|
|
3
|
-
|
|
4
|
-
describe(graphemeLenNative!, () => {
|
|
5
|
-
it('computes grapheme length', () => {
|
|
6
|
-
expect(graphemeLenNative!('a')).toBe(1)
|
|
7
|
-
expect(graphemeLenNative!('~')).toBe(1)
|
|
8
|
-
expect(graphemeLenNative!('ö')).toBe(1)
|
|
9
|
-
expect(graphemeLenNative!('ñ')).toBe(1)
|
|
10
|
-
expect(graphemeLenNative!('©')).toBe(1)
|
|
11
|
-
expect(graphemeLenNative!('⽘')).toBe(1)
|
|
12
|
-
expect(graphemeLenNative!('☎')).toBe(1)
|
|
13
|
-
expect(graphemeLenNative!('𓋓')).toBe(1)
|
|
14
|
-
expect(graphemeLenNative!('😀')).toBe(1)
|
|
15
|
-
expect(graphemeLenNative!('👨👩👧👧')).toBe(1)
|
|
16
|
-
expect(graphemeLenNative!('a~öñ©⽘☎𓋓😀👨👩👧👧')).toBe(10)
|
|
17
|
-
// https://github.com/bluesky-social/atproto/issues/4321
|
|
18
|
-
expect(graphemeLenNative!('नमस्ते')).toBe(3)
|
|
19
|
-
})
|
|
20
|
-
})
|
|
21
|
-
|
|
22
|
-
describe(graphemeLenPonyfill, () => {
|
|
23
|
-
it('computes grapheme length', () => {
|
|
24
|
-
expect(graphemeLenPonyfill('a')).toBe(1)
|
|
25
|
-
expect(graphemeLenPonyfill('~')).toBe(1)
|
|
26
|
-
expect(graphemeLenPonyfill('ö')).toBe(1)
|
|
27
|
-
expect(graphemeLenPonyfill('ñ')).toBe(1)
|
|
28
|
-
expect(graphemeLenPonyfill('©')).toBe(1)
|
|
29
|
-
expect(graphemeLenPonyfill('⽘')).toBe(1)
|
|
30
|
-
expect(graphemeLenPonyfill('☎')).toBe(1)
|
|
31
|
-
expect(graphemeLenPonyfill('𓋓')).toBe(1)
|
|
32
|
-
expect(graphemeLenPonyfill('😀')).toBe(1)
|
|
33
|
-
expect(graphemeLenPonyfill('👨👩👧👧')).toBe(1)
|
|
34
|
-
expect(graphemeLenPonyfill('a~öñ©⽘☎𓋓😀👨👩👧👧')).toBe(10)
|
|
35
|
-
// https://github.com/bluesky-social/atproto/issues/4321
|
|
36
|
-
expect(graphemeLenPonyfill('नमस्ते')).toBe(3)
|
|
37
|
-
})
|
|
38
|
-
})
|
package/src/utf8-grapheme-len.ts
DELETED
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
import { countGraphemes } from 'unicode-segmenter/grapheme'
|
|
2
|
-
|
|
3
|
-
// @TODO: Drop usage of "unicode-segmenter" package when Intl.Segmenter is
|
|
4
|
-
// widely supported.
|
|
5
|
-
// https://developer.mozilla.org/fr/docs/Web/JavaScript/Reference/Global_Objects/Intl/Segmenter
|
|
6
|
-
const segmenter =
|
|
7
|
-
'Segmenter' in Intl && typeof Intl.Segmenter === 'function'
|
|
8
|
-
? /*#__PURE__*/ new Intl.Segmenter()
|
|
9
|
-
: /* v8 ignore next -- @preserve */ null
|
|
10
|
-
|
|
11
|
-
export const graphemeLenNative = segmenter
|
|
12
|
-
? function graphemeLenNative(str: string): number {
|
|
13
|
-
let length = 0
|
|
14
|
-
for (const _ of segmenter.segment(str)) length++
|
|
15
|
-
return length
|
|
16
|
-
}
|
|
17
|
-
: /* v8 ignore next -- @preserve */ null
|
|
18
|
-
|
|
19
|
-
export function graphemeLenPonyfill(str: string): number {
|
|
20
|
-
return countGraphemes(str)
|
|
21
|
-
}
|
package/src/utf8-len.test.ts
DELETED
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
import { describe, expect, it } from 'vitest'
|
|
2
|
-
import { utf8LenCompute, utf8LenNode } from './utf8-len.js'
|
|
3
|
-
|
|
4
|
-
describe.each([utf8LenNode!, utf8LenCompute!] as const)('%o', (utf8Len) => {
|
|
5
|
-
it('computes utf8 string length', () => {
|
|
6
|
-
expect(utf8Len('a')).toBe(1)
|
|
7
|
-
expect(utf8Len('~')).toBe(1)
|
|
8
|
-
expect(utf8Len('ö')).toBe(2)
|
|
9
|
-
expect(utf8Len('ñ')).toBe(2)
|
|
10
|
-
expect(utf8Len('©')).toBe(2)
|
|
11
|
-
expect(utf8Len('⽘')).toBe(3)
|
|
12
|
-
expect(utf8Len('☎')).toBe(3)
|
|
13
|
-
expect(utf8Len('𓋓')).toBe(4)
|
|
14
|
-
expect(utf8Len('😀')).toBe(4)
|
|
15
|
-
expect(utf8Len('👨👩👧👧')).toBe(25)
|
|
16
|
-
// high surrogate with no low surrogate
|
|
17
|
-
expect(utf8Len('\uD83D')).toBe(3)
|
|
18
|
-
// low surrogate with no high surrogate
|
|
19
|
-
expect(utf8Len('\uDC00')).toBe(3)
|
|
20
|
-
})
|
|
21
|
-
})
|
package/src/utf8-len.ts
DELETED
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
import { NodeJSBuffer } from './lib/nodejs-buffer.js'
|
|
2
|
-
|
|
3
|
-
// @NOTE This file is not meant to be exported directly. Instead, we re-export
|
|
4
|
-
// public functions from ./utf8.ts. The reason for this separation is that this
|
|
5
|
-
// file allows to test both the NodeJS-optimized and ponyfill implementations.
|
|
6
|
-
|
|
7
|
-
export const utf8LenNode = NodeJSBuffer
|
|
8
|
-
? function utf8LenNode(string: string): number {
|
|
9
|
-
return NodeJSBuffer!.byteLength(string, 'utf8')
|
|
10
|
-
}
|
|
11
|
-
: /* v8 ignore next -- @preserve */ null
|
|
12
|
-
|
|
13
|
-
export function utf8LenCompute(string: string): number {
|
|
14
|
-
// The code below is similar to TextEncoder's implementation of UTF-8
|
|
15
|
-
// encoding. However, using TextEncoder to get the byte length is slower
|
|
16
|
-
// as it requires allocating a new Uint8Array and copying data:
|
|
17
|
-
|
|
18
|
-
// return new TextEncoder().encode(string).byteLength
|
|
19
|
-
|
|
20
|
-
// The base length is the string length (all ASCII)
|
|
21
|
-
let len = string.length
|
|
22
|
-
let code: number
|
|
23
|
-
|
|
24
|
-
// The loop calculates the number of additional bytes needed for
|
|
25
|
-
// non-ASCII characters
|
|
26
|
-
for (let i = 0; i < string.length; i += 1) {
|
|
27
|
-
code = string.charCodeAt(i)
|
|
28
|
-
|
|
29
|
-
if (code <= 0x7f) {
|
|
30
|
-
// ASCII, 1 byte
|
|
31
|
-
} else if (code <= 0x7ff) {
|
|
32
|
-
// 2 bytes char
|
|
33
|
-
len += 1
|
|
34
|
-
} else {
|
|
35
|
-
// 3 bytes char
|
|
36
|
-
len += 2
|
|
37
|
-
// If the current char is a high surrogate, and the next char is a low
|
|
38
|
-
// surrogate, skip the next char as the total is a 4 bytes char
|
|
39
|
-
// (represented as a surrogate pair in UTF-16) and was already accounted
|
|
40
|
-
// for.
|
|
41
|
-
if (code >= 0xd800 && code <= 0xdbff) {
|
|
42
|
-
code = string.charCodeAt(i + 1)
|
|
43
|
-
if (code >= 0xdc00 && code <= 0xdfff) {
|
|
44
|
-
i++
|
|
45
|
-
}
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
return len
|
|
51
|
-
}
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
import { assert, describe, expect, it } from 'vitest'
|
|
2
|
-
import { utf8ToBase64Node, utf8ToBase64Ponyfill } from './utf8-to-base64.js'
|
|
3
|
-
|
|
4
|
-
const strings = [
|
|
5
|
-
'Hello, World!',
|
|
6
|
-
'¡Hola, Mundo!',
|
|
7
|
-
'こんにちは世界',
|
|
8
|
-
'😀👩💻🌍',
|
|
9
|
-
'',
|
|
10
|
-
'𓀀𓁐𓂀𓃰𓄿𓅱𓆑𓇋𓈖𓉔𓊃𓋴𓌳𓍿𓎛𓏏',
|
|
11
|
-
]
|
|
12
|
-
|
|
13
|
-
for (const utf8ToBase64 of [utf8ToBase64Node, utf8ToBase64Ponyfill] as const) {
|
|
14
|
-
assert(utf8ToBase64, 'implementation should not be null')
|
|
15
|
-
|
|
16
|
-
describe(utf8ToBase64, () => {
|
|
17
|
-
it('encodes utf8 string to base64', () => {
|
|
18
|
-
for (const text of strings) {
|
|
19
|
-
const b64 = Buffer.from(text, 'utf8')
|
|
20
|
-
.toString('base64')
|
|
21
|
-
.replaceAll('=', '') // utf8ToBase64 omits padding
|
|
22
|
-
const encoded = utf8ToBase64(text, 'base64')
|
|
23
|
-
expect(encoded).toBe(b64)
|
|
24
|
-
}
|
|
25
|
-
})
|
|
26
|
-
|
|
27
|
-
it('encodes utf8 string to base64url', () => {
|
|
28
|
-
for (const text of strings) {
|
|
29
|
-
const b64u = Buffer.from(text, 'utf8').toString('base64url')
|
|
30
|
-
const encoded = utf8ToBase64(text, 'base64url')
|
|
31
|
-
expect(encoded).toBe(b64u)
|
|
32
|
-
}
|
|
33
|
-
})
|
|
34
|
-
})
|
|
35
|
-
}
|
package/src/utf8-to-base64.ts
DELETED
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
import { toString } from 'uint8arrays/to-string'
|
|
2
|
-
import { NodeJSBuffer } from './lib/nodejs-buffer.js'
|
|
3
|
-
import { Base64Alphabet } from './uint8array-base64.js'
|
|
4
|
-
import { toBase64Node } from './uint8array-to-base64.js'
|
|
5
|
-
|
|
6
|
-
const Buffer = NodeJSBuffer
|
|
7
|
-
|
|
8
|
-
export const utf8ToBase64Node = Buffer
|
|
9
|
-
? function utf8ToBase64Node(text: string, alphabet?: Base64Alphabet): string {
|
|
10
|
-
const buffer = Buffer.from(text, 'utf8')
|
|
11
|
-
return toBase64Node!(buffer, alphabet)
|
|
12
|
-
}
|
|
13
|
-
: /* v8 ignore next -- @preserve */ null
|
|
14
|
-
|
|
15
|
-
const textEncoder = /*#__PURE__*/ new TextEncoder()
|
|
16
|
-
export function utf8ToBase64Ponyfill(
|
|
17
|
-
text: string,
|
|
18
|
-
alphabet?: Base64Alphabet,
|
|
19
|
-
): string {
|
|
20
|
-
const bytes = textEncoder.encode(text)
|
|
21
|
-
return toString(bytes, alphabet)
|
|
22
|
-
}
|
package/src/utf8.ts
DELETED
|
@@ -1,128 +0,0 @@
|
|
|
1
|
-
import { Base64Alphabet } from './uint8array.js'
|
|
2
|
-
import {
|
|
3
|
-
utf8FromBase64Node,
|
|
4
|
-
utf8FromBase64Ponyfill,
|
|
5
|
-
} from './utf8-from-base64.js'
|
|
6
|
-
import { utf8FromBytesNative, utf8FromBytesNode } from './utf8-from-bytes.js'
|
|
7
|
-
import { graphemeLenNative, graphemeLenPonyfill } from './utf8-grapheme-len.js'
|
|
8
|
-
import { utf8LenCompute, utf8LenNode } from './utf8-len.js'
|
|
9
|
-
import { utf8ToBase64Node, utf8ToBase64Ponyfill } from './utf8-to-base64.js'
|
|
10
|
-
|
|
11
|
-
/**
|
|
12
|
-
* Converts a Uint8Array to a UTF-8 string.
|
|
13
|
-
*
|
|
14
|
-
* Uses Node.js Buffer when available for performance, falling back to
|
|
15
|
-
* TextDecoder in environments without Buffer support.
|
|
16
|
-
*
|
|
17
|
-
* @param bytes - The binary data to decode
|
|
18
|
-
* @returns The decoded string (as UTF-16 JavaScript string)
|
|
19
|
-
*
|
|
20
|
-
* @example
|
|
21
|
-
* ```typescript
|
|
22
|
-
* import { utf8FromBytes } from '@atproto/lex-data'
|
|
23
|
-
*
|
|
24
|
-
* const bytes = new Uint8Array([72, 101, 108, 108, 111])
|
|
25
|
-
* utf8FromBytes(bytes) // 'Hello'
|
|
26
|
-
* ```
|
|
27
|
-
*/
|
|
28
|
-
export const utf8FromBytes = utf8FromBytesNode ?? utf8FromBytesNative
|
|
29
|
-
|
|
30
|
-
/**
|
|
31
|
-
* Counts the number of grapheme clusters (user-perceived characters) in a string.
|
|
32
|
-
*
|
|
33
|
-
* Grapheme clusters represent what users typically think of as "characters",
|
|
34
|
-
* handling complex cases like:
|
|
35
|
-
* - Emoji with skin tones and ZWJ sequences (e.g., family emoji)
|
|
36
|
-
* - Combined characters (e.g., 'e' + combining accent)
|
|
37
|
-
* - Regional indicator pairs (flag emoji)
|
|
38
|
-
*
|
|
39
|
-
* Uses native {@link Intl.Segmenter} when available, falling back to a ponyfill.
|
|
40
|
-
*
|
|
41
|
-
* @param str - The string to measure
|
|
42
|
-
* @returns The number of grapheme clusters
|
|
43
|
-
*
|
|
44
|
-
* @example
|
|
45
|
-
* ```typescript
|
|
46
|
-
* import { graphemeLen } from '@atproto/lex-data'
|
|
47
|
-
*
|
|
48
|
-
* graphemeLen('hello') // 5
|
|
49
|
-
* graphemeLen('cafe\u0301') // 4 (cafe with combining accent)
|
|
50
|
-
* graphemeLen('\u{1F468}\u{200D}\u{1F469}\u{200D}\u{1F467}\u{200D}\u{1F466}') // 1 (family emoji)
|
|
51
|
-
* ```
|
|
52
|
-
*/
|
|
53
|
-
export const graphemeLen: (str: string) => number =
|
|
54
|
-
/* v8 ignore next -- @preserve */ graphemeLenNative ?? graphemeLenPonyfill
|
|
55
|
-
|
|
56
|
-
/* v8 ignore next -- @preserve */
|
|
57
|
-
if (graphemeLen === graphemeLenPonyfill) {
|
|
58
|
-
/*#__PURE__*/
|
|
59
|
-
console.warn(
|
|
60
|
-
'[@atproto/lex-data]: Intl.Segmenter is not available in this environment. Falling back to ponyfill implementation.',
|
|
61
|
-
)
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
/**
|
|
65
|
-
* Calculates the UTF-8 byte length of a string.
|
|
66
|
-
*
|
|
67
|
-
* Returns the number of bytes the string would occupy when encoded as UTF-8.
|
|
68
|
-
* This is important for Lexicon validation where schemas specify byte limits.
|
|
69
|
-
*
|
|
70
|
-
* Uses Node.js Buffer.byteLength when available for performance,
|
|
71
|
-
* falling back to a computed implementation.
|
|
72
|
-
*
|
|
73
|
-
* @param str - The string to measure
|
|
74
|
-
* @returns The UTF-8 byte length
|
|
75
|
-
*
|
|
76
|
-
* @example
|
|
77
|
-
* ```typescript
|
|
78
|
-
* import { utf8Len } from '@atproto/lex-data'
|
|
79
|
-
*
|
|
80
|
-
* utf8Len('hello') // 5 (ASCII: 1 byte per char)
|
|
81
|
-
* utf8Len('\u00e9') // 2 (e with accent: 2 bytes)
|
|
82
|
-
* utf8Len('\u{1F600}') // 4 (emoji: 4 bytes)
|
|
83
|
-
* utf8Len('\u{1F468}\u{200D}\u{1F469}\u{200D}\u{1F467}\u{200D}\u{1F466}') // 25 (family emoji)
|
|
84
|
-
* ```
|
|
85
|
-
*/
|
|
86
|
-
export const utf8Len: (string: string) => number =
|
|
87
|
-
/* v8 ignore next -- @preserve */ utf8LenNode ?? utf8LenCompute
|
|
88
|
-
|
|
89
|
-
/**
|
|
90
|
-
* Encodes a UTF-8 string to base64.
|
|
91
|
-
*
|
|
92
|
-
* First encodes the string as UTF-8 bytes, then encodes those bytes as base64.
|
|
93
|
-
*
|
|
94
|
-
* @param str - The string to encode
|
|
95
|
-
* @param alphabet - The base64 alphabet to use ('base64' or 'base64url')
|
|
96
|
-
* @returns The base64-encoded string
|
|
97
|
-
*
|
|
98
|
-
* @example
|
|
99
|
-
* ```typescript
|
|
100
|
-
* import { utf8ToBase64 } from '@atproto/lex-data'
|
|
101
|
-
*
|
|
102
|
-
* utf8ToBase64('Hello') // 'SGVsbG8='
|
|
103
|
-
* ```
|
|
104
|
-
*/
|
|
105
|
-
export const utf8ToBase64: (str: string, alphabet?: Base64Alphabet) => string =
|
|
106
|
-
/* v8 ignore next -- @preserve */ utf8ToBase64Node ?? utf8ToBase64Ponyfill
|
|
107
|
-
|
|
108
|
-
/**
|
|
109
|
-
* Decodes a base64 string to UTF-8.
|
|
110
|
-
*
|
|
111
|
-
* Decodes the base64 to bytes, then interprets those bytes as UTF-8 text.
|
|
112
|
-
*
|
|
113
|
-
* @param b64 - The base64 string to decode
|
|
114
|
-
* @param alphabet - The base64 alphabet to use ('base64' or 'base64url')
|
|
115
|
-
* @returns The decoded UTF-8 string
|
|
116
|
-
*
|
|
117
|
-
* @example
|
|
118
|
-
* ```typescript
|
|
119
|
-
* import { utf8FromBase64 } from '@atproto/lex-data'
|
|
120
|
-
*
|
|
121
|
-
* utf8FromBase64('SGVsbG8=') // 'Hello'
|
|
122
|
-
* ```
|
|
123
|
-
*/
|
|
124
|
-
export const utf8FromBase64: (
|
|
125
|
-
b64: string,
|
|
126
|
-
alphabet?: Base64Alphabet,
|
|
127
|
-
) => string =
|
|
128
|
-
/* v8 ignore next -- @preserve */ utf8FromBase64Node ?? utf8FromBase64Ponyfill
|
package/tsconfig.build.json
DELETED
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"extends": ["../../../tsconfig/isomorphic.json"],
|
|
3
|
-
"include": ["./src"],
|
|
4
|
-
"exclude": ["**/*.test.ts"],
|
|
5
|
-
"compilerOptions": {
|
|
6
|
-
"noImplicitAny": true,
|
|
7
|
-
"importHelpers": true,
|
|
8
|
-
"target": "ES2023",
|
|
9
|
-
"rootDir": "./src",
|
|
10
|
-
"outDir": "./dist",
|
|
11
|
-
},
|
|
12
|
-
}
|
package/tsconfig.json
DELETED