@atproto/sync 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/LICENSE.txt +7 -0
  3. package/README.md +93 -0
  4. package/dist/events.d.ts +49 -0
  5. package/dist/events.d.ts.map +1 -0
  6. package/dist/events.js +3 -0
  7. package/dist/events.js.map +1 -0
  8. package/dist/firehose/index.d.ts +50 -0
  9. package/dist/firehose/index.d.ts.map +1 -0
  10. package/dist/firehose/index.js +309 -0
  11. package/dist/firehose/index.js.map +1 -0
  12. package/dist/firehose/lexicons.d.ts +118 -0
  13. package/dist/firehose/lexicons.d.ts.map +1 -0
  14. package/dist/firehose/lexicons.js +265 -0
  15. package/dist/firehose/lexicons.js.map +1 -0
  16. package/dist/index.d.ts +4 -0
  17. package/dist/index.d.ts.map +1 -0
  18. package/dist/index.js +20 -0
  19. package/dist/index.js.map +1 -0
  20. package/dist/runner/consecutive-list.d.ts +27 -0
  21. package/dist/runner/consecutive-list.d.ts.map +1 -0
  22. package/dist/runner/consecutive-list.js +68 -0
  23. package/dist/runner/consecutive-list.js.map +1 -0
  24. package/dist/runner/index.d.ts +4 -0
  25. package/dist/runner/index.d.ts.map +1 -0
  26. package/dist/runner/index.js +20 -0
  27. package/dist/runner/index.js.map +1 -0
  28. package/dist/runner/memory-runner.d.ts +24 -0
  29. package/dist/runner/memory-runner.d.ts.map +1 -0
  30. package/dist/runner/memory-runner.js +92 -0
  31. package/dist/runner/memory-runner.js.map +1 -0
  32. package/dist/runner/types.d.ts +5 -0
  33. package/dist/runner/types.d.ts.map +1 -0
  34. package/dist/runner/types.js +3 -0
  35. package/dist/runner/types.js.map +1 -0
  36. package/dist/util.d.ts +6 -0
  37. package/dist/util.d.ts.map +1 -0
  38. package/dist/util.js +13 -0
  39. package/dist/util.js.map +1 -0
  40. package/jest.config.js +8 -0
  41. package/package.json +37 -0
  42. package/src/events.ts +61 -0
  43. package/src/firehose/index.ts +357 -0
  44. package/src/firehose/lexicons.ts +407 -0
  45. package/src/index.ts +3 -0
  46. package/src/runner/consecutive-list.ts +44 -0
  47. package/src/runner/index.ts +3 -0
  48. package/src/runner/memory-runner.ts +72 -0
  49. package/src/runner/types.ts +8 -0
  50. package/src/util.ts +10 -0
  51. package/tests/firehose.test.ts +180 -0
  52. package/tests/runner.test.ts +122 -0
  53. package/tsconfig.build.json +8 -0
  54. package/tsconfig.json +4 -0
@@ -0,0 +1,407 @@
1
+ import type { IncomingMessage } from 'node:http'
2
+
3
+ import { type LexiconDoc, Lexicons } from '@atproto/lexicon'
4
+ import type { ErrorFrame, HandlerAuth } from '@atproto/xrpc-server'
5
+ import type { CID } from 'multiformats/cid'
6
+
7
+ // @NOTE: this file is an ugly copy job of codegen output. I'd like to clean this whole thing up
8
+
9
+ export function isObj(v: unknown): v is Record<string, unknown> {
10
+ return typeof v === 'object' && v !== null
11
+ }
12
+
13
+ export function hasProp<K extends PropertyKey>(
14
+ data: object,
15
+ prop: K,
16
+ ): data is Record<K, unknown> {
17
+ return prop in data
18
+ }
19
+
20
+ export interface QueryParams {
21
+ /** The last known event seq number to backfill from. */
22
+ cursor?: number
23
+ }
24
+
25
+ export type RepoEvent =
26
+ | Commit
27
+ | Identity
28
+ | Account
29
+ | Handle
30
+ | Migrate
31
+ | Tombstone
32
+ | Info
33
+ | { $type: string; [k: string]: unknown }
34
+ export type HandlerError = ErrorFrame<'FutureCursor' | 'ConsumerTooSlow'>
35
+ export type HandlerOutput = HandlerError | RepoEvent
36
+ export type HandlerReqCtx<HA extends HandlerAuth = never> = {
37
+ auth: HA
38
+ params: QueryParams
39
+ req: IncomingMessage
40
+ signal: AbortSignal
41
+ }
42
+ export type Handler<HA extends HandlerAuth = never> = (
43
+ ctx: HandlerReqCtx<HA>,
44
+ ) => AsyncIterable<HandlerOutput>
45
+
46
+ /** Represents an update of repository state. Note that empty commits are allowed, which include no repo data changes, but an update to rev and signature. */
47
+ export interface Commit {
48
+ /** The stream sequence number of this message. */
49
+ seq: number
50
+ /** DEPRECATED -- unused */
51
+ rebase: boolean
52
+ /** Indicates that this commit contained too many ops, or data size was too large. Consumers will need to make a separate request to get missing data. */
53
+ tooBig: boolean
54
+ /** The repo this event comes from. */
55
+ repo: string
56
+ /** Repo commit object CID. */
57
+ commit: CID
58
+ /** DEPRECATED -- unused. WARNING -- nullable and optional; stick with optional to ensure golang interoperability. */
59
+ prev?: CID | null
60
+ /** The rev of the emitted commit. Note that this information is also in the commit object included in blocks, unless this is a tooBig event. */
61
+ rev: string
62
+ /** The rev of the last emitted commit from this repo (if any). */
63
+ since: string | null
64
+ /** CAR file containing relevant blocks, as a diff since the previous repo state. */
65
+ blocks: Uint8Array
66
+ ops: RepoOp[]
67
+ blobs: CID[]
68
+ /** Timestamp of when this message was originally broadcast. */
69
+ time: string
70
+ [k: string]: unknown
71
+ }
72
+
73
+ export function isCommit(v: unknown): v is Commit {
74
+ return (
75
+ isObj(v) &&
76
+ hasProp(v, '$type') &&
77
+ v.$type === 'com.atproto.sync.subscribeRepos#commit'
78
+ )
79
+ }
80
+
81
+ /** Represents a change to an account's identity. Could be an updated handle, signing key, or pds hosting endpoint. Serves as a prod to all downstream services to refresh their identity cache. */
82
+ export interface Identity {
83
+ seq: number
84
+ did: string
85
+ time: string
86
+ /** The current handle for the account, or 'handle.invalid' if validation fails. This field is optional, might have been validated or passed-through from an upstream source. Semantics and behaviors for PDS vs Relay may evolve in the future; see atproto specs for more details. */
87
+ handle?: string
88
+ [k: string]: unknown
89
+ }
90
+
91
+ export function isIdentity(v: unknown): v is Identity {
92
+ return (
93
+ isObj(v) &&
94
+ hasProp(v, '$type') &&
95
+ v.$type === 'com.atproto.sync.subscribeRepos#identity'
96
+ )
97
+ }
98
+
99
+ /** Represents a change to an account's status on a host (eg, PDS or Relay). The semantics of this event are that the status is at the host which emitted the event, not necessarily that at the currently active PDS. Eg, a Relay takedown would emit a takedown with active=false, even if the PDS is still active. */
100
+ export interface Account {
101
+ seq: number
102
+ did: string
103
+ time: string
104
+ /** Indicates that the account has a repository which can be fetched from the host that emitted this event. */
105
+ active: boolean
106
+ /** If active=false, this optional field indicates a reason for why the account is not active. */
107
+ status?: 'takendown' | 'suspended' | 'deleted' | 'deactivated' | string
108
+ [k: string]: unknown
109
+ }
110
+
111
+ export function isAccount(v: unknown): v is Account {
112
+ return (
113
+ isObj(v) &&
114
+ hasProp(v, '$type') &&
115
+ v.$type === 'com.atproto.sync.subscribeRepos#account'
116
+ )
117
+ }
118
+
119
+ /** DEPRECATED -- Use #identity event instead */
120
+ export interface Handle {
121
+ seq: number
122
+ did: string
123
+ handle: string
124
+ time: string
125
+ [k: string]: unknown
126
+ }
127
+
128
+ export function isHandle(v: unknown): v is Handle {
129
+ return (
130
+ isObj(v) &&
131
+ hasProp(v, '$type') &&
132
+ v.$type === 'com.atproto.sync.subscribeRepos#handle'
133
+ )
134
+ }
135
+
136
+ /** DEPRECATED -- Use #account event instead */
137
+ export interface Migrate {
138
+ seq: number
139
+ did: string
140
+ migrateTo: string | null
141
+ time: string
142
+ [k: string]: unknown
143
+ }
144
+
145
+ export function isMigrate(v: unknown): v is Migrate {
146
+ return (
147
+ isObj(v) &&
148
+ hasProp(v, '$type') &&
149
+ v.$type === 'com.atproto.sync.subscribeRepos#migrate'
150
+ )
151
+ }
152
+
153
+ /** DEPRECATED -- Use #account event instead */
154
+ export interface Tombstone {
155
+ seq: number
156
+ did: string
157
+ time: string
158
+ [k: string]: unknown
159
+ }
160
+
161
+ export function isTombstone(v: unknown): v is Tombstone {
162
+ return (
163
+ isObj(v) &&
164
+ hasProp(v, '$type') &&
165
+ v.$type === 'com.atproto.sync.subscribeRepos#tombstone'
166
+ )
167
+ }
168
+
169
+ export interface Info {
170
+ name: 'OutdatedCursor' | string
171
+ message?: string
172
+ [k: string]: unknown
173
+ }
174
+
175
+ export function isInfo(v: unknown): v is Info {
176
+ return (
177
+ isObj(v) &&
178
+ hasProp(v, '$type') &&
179
+ v.$type === 'com.atproto.sync.subscribeRepos#info'
180
+ )
181
+ }
182
+
183
+ /** A repo operation, ie a mutation of a single record. */
184
+ export interface RepoOp {
185
+ action: 'create' | 'update' | 'delete' | string
186
+ path: string
187
+ /** For creates and updates, the new record CID. For deletions, null. */
188
+ cid: CID | null
189
+ [k: string]: unknown
190
+ }
191
+
192
+ export function isRepoOp(v: unknown): v is RepoOp {
193
+ return (
194
+ isObj(v) &&
195
+ hasProp(v, '$type') &&
196
+ v.$type === 'com.atproto.sync.subscribeRepos#repoOp'
197
+ )
198
+ }
199
+
200
+ export const ComAtprotoSyncSubscribeRepos: LexiconDoc = {
201
+ lexicon: 1,
202
+ id: 'com.atproto.sync.subscribeRepos',
203
+ defs: {
204
+ main: {
205
+ type: 'subscription',
206
+ description: 'Subscribe to repo updates',
207
+ parameters: {
208
+ type: 'params',
209
+ properties: {
210
+ cursor: {
211
+ type: 'integer',
212
+ description: 'The last known event to backfill from.',
213
+ },
214
+ },
215
+ },
216
+ message: {
217
+ schema: {
218
+ type: 'union',
219
+ refs: [
220
+ 'lex:com.atproto.sync.subscribeRepos#commit',
221
+ 'lex:com.atproto.sync.subscribeRepos#handle',
222
+ 'lex:com.atproto.sync.subscribeRepos#migrate',
223
+ 'lex:com.atproto.sync.subscribeRepos#tombstone',
224
+ 'lex:com.atproto.sync.subscribeRepos#info',
225
+ ],
226
+ },
227
+ },
228
+ errors: [
229
+ {
230
+ name: 'FutureCursor',
231
+ },
232
+ {
233
+ name: 'ConsumerTooSlow',
234
+ },
235
+ ],
236
+ },
237
+ commit: {
238
+ type: 'object',
239
+ required: [
240
+ 'seq',
241
+ 'rebase',
242
+ 'tooBig',
243
+ 'repo',
244
+ 'commit',
245
+ 'rev',
246
+ 'since',
247
+ 'blocks',
248
+ 'ops',
249
+ 'blobs',
250
+ 'time',
251
+ ],
252
+ nullable: ['prev', 'since'],
253
+ properties: {
254
+ seq: {
255
+ type: 'integer',
256
+ },
257
+ rebase: {
258
+ type: 'boolean',
259
+ },
260
+ tooBig: {
261
+ type: 'boolean',
262
+ },
263
+ repo: {
264
+ type: 'string',
265
+ format: 'did',
266
+ },
267
+ commit: {
268
+ type: 'cid-link',
269
+ },
270
+ prev: {
271
+ type: 'cid-link',
272
+ },
273
+ rev: {
274
+ type: 'string',
275
+ description: 'The rev of the emitted commit',
276
+ },
277
+ since: {
278
+ type: 'string',
279
+ description: 'The rev of the last emitted commit from this repo',
280
+ },
281
+ blocks: {
282
+ type: 'bytes',
283
+ description: 'CAR file containing relevant blocks',
284
+ maxLength: 1000000,
285
+ },
286
+ ops: {
287
+ type: 'array',
288
+ items: {
289
+ type: 'ref',
290
+ ref: 'lex:com.atproto.sync.subscribeRepos#repoOp',
291
+ },
292
+ maxLength: 200,
293
+ },
294
+ blobs: {
295
+ type: 'array',
296
+ items: {
297
+ type: 'cid-link',
298
+ },
299
+ },
300
+ time: {
301
+ type: 'string',
302
+ format: 'datetime',
303
+ },
304
+ },
305
+ },
306
+ handle: {
307
+ type: 'object',
308
+ required: ['seq', 'did', 'handle', 'time'],
309
+ properties: {
310
+ seq: {
311
+ type: 'integer',
312
+ },
313
+ did: {
314
+ type: 'string',
315
+ format: 'did',
316
+ },
317
+ handle: {
318
+ type: 'string',
319
+ format: 'handle',
320
+ },
321
+ time: {
322
+ type: 'string',
323
+ format: 'datetime',
324
+ },
325
+ },
326
+ },
327
+ migrate: {
328
+ type: 'object',
329
+ required: ['seq', 'did', 'migrateTo', 'time'],
330
+ nullable: ['migrateTo'],
331
+ properties: {
332
+ seq: {
333
+ type: 'integer',
334
+ },
335
+ did: {
336
+ type: 'string',
337
+ format: 'did',
338
+ },
339
+ migrateTo: {
340
+ type: 'string',
341
+ },
342
+ time: {
343
+ type: 'string',
344
+ format: 'datetime',
345
+ },
346
+ },
347
+ },
348
+ tombstone: {
349
+ type: 'object',
350
+ required: ['seq', 'did', 'time'],
351
+ properties: {
352
+ seq: {
353
+ type: 'integer',
354
+ },
355
+ did: {
356
+ type: 'string',
357
+ format: 'did',
358
+ },
359
+ time: {
360
+ type: 'string',
361
+ format: 'datetime',
362
+ },
363
+ },
364
+ },
365
+ info: {
366
+ type: 'object',
367
+ required: ['name'],
368
+ properties: {
369
+ name: {
370
+ type: 'string',
371
+ knownValues: ['OutdatedCursor'],
372
+ },
373
+ message: {
374
+ type: 'string',
375
+ },
376
+ },
377
+ },
378
+ repoOp: {
379
+ type: 'object',
380
+ description:
381
+ "A repo operation, ie a write of a single record. For creates and updates, cid is the record's CID as of this operation. For deletes, it's null.",
382
+ required: ['action', 'path', 'cid'],
383
+ nullable: ['cid'],
384
+ properties: {
385
+ action: {
386
+ type: 'string',
387
+ knownValues: ['create', 'update', 'delete'],
388
+ },
389
+ path: {
390
+ type: 'string',
391
+ },
392
+ cid: {
393
+ type: 'cid-link',
394
+ },
395
+ },
396
+ },
397
+ },
398
+ }
399
+
400
+ const lexicons = new Lexicons([ComAtprotoSyncSubscribeRepos])
401
+
402
+ export const isValidRepoEvent = (evt: unknown) => {
403
+ return lexicons.assertValidXrpcMessage<RepoEvent>(
404
+ 'com.atproto.sync.subscribeRepos',
405
+ evt,
406
+ )
407
+ }
package/src/index.ts ADDED
@@ -0,0 +1,3 @@
1
+ export * from './runner'
2
+ export * from './firehose'
3
+ export * from './events'
@@ -0,0 +1,44 @@
1
+ /**
2
+ * Add items to a list, and mark those items as
3
+ * completed. Upon item completion, get list of consecutive
4
+ * items completed at the head of the list. Example:
5
+ *
6
+ * const consecutive = new ConsecutiveList<number>()
7
+ * const item1 = consecutive.push(1)
8
+ * const item2 = consecutive.push(2)
9
+ * const item3 = consecutive.push(3)
10
+ * item2.complete() // []
11
+ * item1.complete() // [1, 2]
12
+ * item3.complete() // [3]
13
+ *
14
+ */
15
+ export class ConsecutiveList<T> {
16
+ list: ConsecutiveItem<T>[] = []
17
+
18
+ push(value: T) {
19
+ const item = new ConsecutiveItem<T>(this, value)
20
+ this.list.push(item)
21
+ return item
22
+ }
23
+
24
+ complete(): T[] {
25
+ let i = 0
26
+ while (this.list[i]?.isComplete) {
27
+ i += 1
28
+ }
29
+ return this.list.splice(0, i).map((item) => item.value)
30
+ }
31
+ }
32
+
33
+ export class ConsecutiveItem<T> {
34
+ isComplete = false
35
+ constructor(
36
+ private consecutive: ConsecutiveList<T>,
37
+ public value: T,
38
+ ) {}
39
+
40
+ complete() {
41
+ this.isComplete = true
42
+ return this.consecutive.complete()
43
+ }
44
+ }
@@ -0,0 +1,3 @@
1
+ export * from './consecutive-list'
2
+ export * from './memory-runner'
3
+ export * from './types'
@@ -0,0 +1,72 @@
1
+ import PQueue from 'p-queue'
2
+ import { ConsecutiveList } from './consecutive-list'
3
+ import { EventRunner } from './types'
4
+
5
+ export { ConsecutiveList }
6
+
7
+ export type MemoryRunnerOptions = {
8
+ setCursor?: (cursor: number) => Promise<void>
9
+ concurrency?: number
10
+ startCursor?: number
11
+ }
12
+
13
+ // A queue with arbitrarily many partitions, each processing work sequentially.
14
+ // Partitions are created lazily and taken out of memory when they go idle.
15
+ export class MemoryRunner implements EventRunner {
16
+ consecutive = new ConsecutiveList<number>()
17
+ mainQueue: PQueue
18
+ partitions = new Map<string, PQueue>()
19
+ cursor: number | undefined
20
+
21
+ constructor(public opts: MemoryRunnerOptions = {}) {
22
+ this.mainQueue = new PQueue({ concurrency: opts.concurrency ?? Infinity })
23
+ this.cursor = opts.startCursor
24
+ }
25
+
26
+ getCursor() {
27
+ return this.cursor
28
+ }
29
+
30
+ async addTask(partitionId: string, task: () => Promise<void>) {
31
+ if (this.mainQueue.isPaused) return
32
+ return this.mainQueue.add(() => {
33
+ return this.getPartition(partitionId).add(task)
34
+ })
35
+ }
36
+
37
+ private getPartition(partitionId: string) {
38
+ let partition = this.partitions.get(partitionId)
39
+ if (!partition) {
40
+ partition = new PQueue({ concurrency: 1 })
41
+ partition.once('idle', () => this.partitions.delete(partitionId))
42
+ this.partitions.set(partitionId, partition)
43
+ }
44
+ return partition
45
+ }
46
+
47
+ async trackEvent(did: string, seq: number, handler: () => Promise<void>) {
48
+ if (this.mainQueue.isPaused) return
49
+ const item = this.consecutive.push(seq)
50
+ await this.addTask(did, async () => {
51
+ await handler()
52
+ const latest = item.complete().at(-1)
53
+ if (latest !== undefined) {
54
+ this.cursor = latest
55
+ if (this.opts.setCursor) {
56
+ await this.opts.setCursor(this.cursor)
57
+ }
58
+ }
59
+ })
60
+ }
61
+
62
+ async processAll() {
63
+ await this.mainQueue.onIdle()
64
+ }
65
+
66
+ async destroy() {
67
+ this.mainQueue.pause()
68
+ this.mainQueue.clear()
69
+ this.partitions.forEach((p) => p.clear())
70
+ await this.mainQueue.onIdle()
71
+ }
72
+ }
@@ -0,0 +1,8 @@
1
+ export interface EventRunner {
2
+ getCursor(): Awaited<number | undefined>
3
+ trackEvent(
4
+ did: string,
5
+ seq: number,
6
+ hanlder: () => Promise<void>,
7
+ ): Promise<void>
8
+ }
package/src/util.ts ADDED
@@ -0,0 +1,10 @@
1
+ import { isAccount, isCommit, isIdentity, RepoEvent } from './firehose/lexicons'
2
+
3
+ export const didAndSeqForEvt = (
4
+ evt: RepoEvent,
5
+ ): { did: string; seq: number } | undefined => {
6
+ if (isCommit(evt)) return { seq: evt.seq, did: evt.repo }
7
+ else if (isAccount(evt) || isIdentity(evt))
8
+ return { seq: evt.seq, did: evt.did }
9
+ return undefined
10
+ }