@atproto/api 0.10.3 → 0.10.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +10 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +16 -4
- package/dist/index.js.map +3 -3
- package/dist/rich-text/util.d.ts +4 -0
- package/package.json +2 -2
- package/src/index.ts +1 -0
- package/src/rich-text/detection.ts +13 -5
- package/src/rich-text/util.ts +11 -0
- package/tests/rich-text-detection.test.ts +28 -3
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@atproto/api",
|
|
3
|
-
"version": "0.10.
|
|
3
|
+
"version": "0.10.4",
|
|
4
4
|
"license": "MIT",
|
|
5
5
|
"description": "Client library for atproto and Bluesky",
|
|
6
6
|
"keywords": [
|
|
@@ -28,7 +28,7 @@
|
|
|
28
28
|
"devDependencies": {
|
|
29
29
|
"common-tags": "^1.8.2",
|
|
30
30
|
"@atproto/lex-cli": "^0.3.1",
|
|
31
|
-
"@atproto/dev-env": "^0.2.
|
|
31
|
+
"@atproto/dev-env": "^0.2.36"
|
|
32
32
|
},
|
|
33
33
|
"scripts": {
|
|
34
34
|
"codegen": "pnpm docgen && node ./scripts/generate-code.mjs && lex gen-api ./src/client ../../lexicons/com/atproto/*/* ../../lexicons/app/bsky/*/*",
|
package/src/index.ts
CHANGED
|
@@ -14,6 +14,7 @@ export * from './agent'
|
|
|
14
14
|
export * from './rich-text/rich-text'
|
|
15
15
|
export * from './rich-text/sanitization'
|
|
16
16
|
export * from './rich-text/unicode'
|
|
17
|
+
export * from './rich-text/util'
|
|
17
18
|
export * from './moderation'
|
|
18
19
|
export * from './moderation/types'
|
|
19
20
|
export { LABELS } from './moderation/const/labels'
|
|
@@ -1,6 +1,12 @@
|
|
|
1
1
|
import TLDs from 'tlds'
|
|
2
2
|
import { AppBskyRichtextFacet } from '../client'
|
|
3
3
|
import { UnicodeString } from './unicode'
|
|
4
|
+
import {
|
|
5
|
+
URL_REGEX,
|
|
6
|
+
MENTION_REGEX,
|
|
7
|
+
TAG_REGEX,
|
|
8
|
+
TRAILING_PUNCTUATION_REGEX,
|
|
9
|
+
} from './util'
|
|
4
10
|
|
|
5
11
|
export type Facet = AppBskyRichtextFacet.Main
|
|
6
12
|
|
|
@@ -9,7 +15,7 @@ export function detectFacets(text: UnicodeString): Facet[] | undefined {
|
|
|
9
15
|
const facets: Facet[] = []
|
|
10
16
|
{
|
|
11
17
|
// mentions
|
|
12
|
-
const re =
|
|
18
|
+
const re = MENTION_REGEX
|
|
13
19
|
while ((match = re.exec(text.utf16))) {
|
|
14
20
|
if (!isValidDomain(match[3]) && !match[3].endsWith('.test')) {
|
|
15
21
|
continue // probably not a handle
|
|
@@ -33,8 +39,7 @@ export function detectFacets(text: UnicodeString): Facet[] | undefined {
|
|
|
33
39
|
}
|
|
34
40
|
{
|
|
35
41
|
// links
|
|
36
|
-
const re =
|
|
37
|
-
/(^|\s|\()((https?:\/\/[\S]+)|((?<domain>[a-z][a-z0-9]*(\.[a-z0-9]+)+)[\S]*))/gim
|
|
42
|
+
const re = URL_REGEX
|
|
38
43
|
while ((match = re.exec(text.utf16))) {
|
|
39
44
|
let uri = match[2]
|
|
40
45
|
if (!uri.startsWith('http')) {
|
|
@@ -70,11 +75,14 @@ export function detectFacets(text: UnicodeString): Facet[] | undefined {
|
|
|
70
75
|
}
|
|
71
76
|
}
|
|
72
77
|
{
|
|
73
|
-
const re =
|
|
78
|
+
const re = TAG_REGEX
|
|
74
79
|
while ((match = re.exec(text.utf16))) {
|
|
75
80
|
let [, leading, tag] = match
|
|
76
81
|
|
|
77
|
-
|
|
82
|
+
if (!tag) continue
|
|
83
|
+
|
|
84
|
+
// strip ending punctuation and any spaces
|
|
85
|
+
tag = tag.trim().replace(TRAILING_PUNCTUATION_REGEX, '')
|
|
78
86
|
|
|
79
87
|
if (tag.length === 0 || tag.length > 64) continue
|
|
80
88
|
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export const MENTION_REGEX = /(^|\s|\()(@)([a-zA-Z0-9.-]+)(\b)/g
|
|
2
|
+
export const URL_REGEX =
|
|
3
|
+
/(^|\s|\()((https?:\/\/[\S]+)|((?<domain>[a-z][a-z0-9]*(\.[a-z0-9]+)+)[\S]*))/gim
|
|
4
|
+
export const TRAILING_PUNCTUATION_REGEX = /\p{P}+$/gu
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* `\ufe0f` emoji modifier
|
|
8
|
+
* `\u00AD\u2060\u200A\u200B\u200C\u200D\u20e2` zero-width spaces (likely incomplete)
|
|
9
|
+
*/
|
|
10
|
+
export const TAG_REGEX =
|
|
11
|
+
/(^|\s)[##]((?!\ufe0f)[^\s\u00AD\u2060\u200A\u200B\u200C\u200D\u20e2]*[^\d\s\p{P}\u00AD\u2060\u200A\u200B\u200C\u200D\u20e2]+[^\s\u00AD\u2060\u200A\u200B\u200C\u200D\u20e2]*)?/gu
|
|
@@ -218,7 +218,7 @@ describe('detectFacets', () => {
|
|
|
218
218
|
}
|
|
219
219
|
})
|
|
220
220
|
|
|
221
|
-
|
|
221
|
+
describe('correctly detects tags inline', () => {
|
|
222
222
|
const inputs: [
|
|
223
223
|
string,
|
|
224
224
|
string[],
|
|
@@ -234,11 +234,13 @@ describe('detectFacets', () => {
|
|
|
234
234
|
],
|
|
235
235
|
],
|
|
236
236
|
['#1', [], []],
|
|
237
|
+
['#1a', ['1a'], [{ byteStart: 0, byteEnd: 3 }]],
|
|
237
238
|
['#tag', ['tag'], [{ byteStart: 0, byteEnd: 4 }]],
|
|
238
239
|
['body #tag', ['tag'], [{ byteStart: 5, byteEnd: 9 }]],
|
|
239
240
|
['#tag body', ['tag'], [{ byteStart: 0, byteEnd: 4 }]],
|
|
240
241
|
['body #tag body', ['tag'], [{ byteStart: 5, byteEnd: 9 }]],
|
|
241
242
|
['body #1', [], []],
|
|
243
|
+
['body #1a', ['1a'], [{ byteStart: 5, byteEnd: 8 }]],
|
|
242
244
|
['body #a1', ['a1'], [{ byteStart: 5, byteEnd: 8 }]],
|
|
243
245
|
['#', [], []],
|
|
244
246
|
['#?', [], []],
|
|
@@ -254,12 +256,18 @@ describe('detectFacets', () => {
|
|
|
254
256
|
[],
|
|
255
257
|
[],
|
|
256
258
|
],
|
|
259
|
+
[
|
|
260
|
+
'body #thisisa64characterstring_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!',
|
|
261
|
+
['thisisa64characterstring_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'],
|
|
262
|
+
[{ byteStart: 5, byteEnd: 70 }],
|
|
263
|
+
],
|
|
257
264
|
[
|
|
258
265
|
'its a #double#rainbow',
|
|
259
266
|
['double#rainbow'],
|
|
260
267
|
[{ byteStart: 6, byteEnd: 21 }],
|
|
261
268
|
],
|
|
262
269
|
['##hashash', ['#hashash'], [{ byteStart: 0, byteEnd: 9 }]],
|
|
270
|
+
['##', [], []],
|
|
263
271
|
['some #n0n3s@n5e!', ['n0n3s@n5e'], [{ byteStart: 5, byteEnd: 15 }]],
|
|
264
272
|
[
|
|
265
273
|
'works #with,punctuation',
|
|
@@ -319,9 +327,26 @@ describe('detectFacets', () => {
|
|
|
319
327
|
},
|
|
320
328
|
],
|
|
321
329
|
],
|
|
330
|
+
['no match (\\u200B): #', [], []],
|
|
331
|
+
['no match (\\u200Ba): #a', [], []],
|
|
332
|
+
['match (a\\u200Bb): #ab', ['a'], [{ byteStart: 18, byteEnd: 20 }]],
|
|
333
|
+
['match (ab\\u200B): #ab', ['ab'], [{ byteStart: 18, byteEnd: 21 }]],
|
|
334
|
+
['no match (\\u20e2tag): #⃢tag', [], []],
|
|
335
|
+
['no match (a\\u20e2b): #a⃢b', ['a'], [{ byteStart: 21, byteEnd: 23 }]],
|
|
336
|
+
[
|
|
337
|
+
'match full width number sign (tag): #tag',
|
|
338
|
+
['tag'],
|
|
339
|
+
[{ byteStart: 36, byteEnd: 42 }],
|
|
340
|
+
],
|
|
341
|
+
[
|
|
342
|
+
'match full width number sign (tag): ##️⃣tag',
|
|
343
|
+
['#️⃣tag'],
|
|
344
|
+
[{ byteStart: 36, byteEnd: 49 }],
|
|
345
|
+
],
|
|
346
|
+
['no match 1?: #1?', [], []],
|
|
322
347
|
]
|
|
323
348
|
|
|
324
|
-
|
|
349
|
+
it.each(inputs)('%s', async (input, tags, indices) => {
|
|
325
350
|
const rt = new RichText({ text: input })
|
|
326
351
|
await rt.detectFacets(agent)
|
|
327
352
|
|
|
@@ -340,7 +365,7 @@ describe('detectFacets', () => {
|
|
|
340
365
|
|
|
341
366
|
expect(detectedTags).toEqual(tags)
|
|
342
367
|
expect(detectedIndices).toEqual(indices)
|
|
343
|
-
}
|
|
368
|
+
})
|
|
344
369
|
})
|
|
345
370
|
})
|
|
346
371
|
|