@alteran/astro 0.3.8 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +19 -30
  3. package/index.js +34 -28
  4. package/migrations/0007_bored_spitfire.sql +26 -0
  5. package/migrations/0008_furry_ozymandias.sql +2 -0
  6. package/migrations/meta/0007_snapshot.json +534 -0
  7. package/migrations/meta/0008_snapshot.json +548 -0
  8. package/migrations/meta/_journal.json +14 -0
  9. package/package.json +10 -9
  10. package/src/app.ts +8 -4
  11. package/src/db/account.ts +25 -6
  12. package/src/db/dal.ts +34 -23
  13. package/src/db/repo.ts +35 -35
  14. package/src/db/schema.ts +5 -1
  15. package/src/db/seed.ts +5 -13
  16. package/src/entrypoints/server.ts +2 -22
  17. package/src/handlers/root.ts +4 -4
  18. package/src/lib/account-state.ts +156 -0
  19. package/src/lib/actor.ts +28 -12
  20. package/src/lib/appview/auth-policy.ts +66 -0
  21. package/src/lib/appview/did-resolver.ts +233 -0
  22. package/src/lib/appview/proxy.ts +221 -0
  23. package/src/lib/appview/service-config.ts +61 -0
  24. package/src/lib/appview/service-jwt.ts +93 -0
  25. package/src/lib/appview/types.ts +25 -0
  26. package/src/lib/appview.ts +5 -532
  27. package/src/lib/auth-errors.ts +24 -0
  28. package/src/lib/auth.ts +63 -15
  29. package/src/lib/blockstore-gc.ts +2 -1
  30. package/src/lib/cache.ts +30 -4
  31. package/src/lib/chat.ts +14 -8
  32. package/src/lib/commit.ts +26 -36
  33. package/src/lib/config.ts +26 -15
  34. package/src/lib/did-document.ts +32 -0
  35. package/src/lib/errors.ts +54 -0
  36. package/src/lib/feed.ts +18 -19
  37. package/src/lib/firehose/frames.ts +87 -47
  38. package/src/lib/firehose/validation.ts +3 -3
  39. package/src/lib/jwt.ts +85 -177
  40. package/src/lib/labeler.ts +43 -30
  41. package/src/lib/logger.ts +4 -0
  42. package/src/lib/mst/block-map.ts +172 -0
  43. package/src/lib/mst/blockstore.ts +56 -93
  44. package/src/lib/mst/index.ts +1 -0
  45. package/src/lib/mst/leaf.ts +25 -0
  46. package/src/lib/mst/mst.ts +81 -237
  47. package/src/lib/mst/serialize.ts +97 -0
  48. package/src/lib/mst/types.ts +21 -0
  49. package/src/lib/oauth/clients.ts +67 -0
  50. package/src/lib/oauth/dpop-errors.ts +15 -0
  51. package/src/lib/oauth/dpop.ts +150 -0
  52. package/src/lib/oauth/resource.ts +199 -0
  53. package/src/lib/oauth/store.ts +77 -0
  54. package/src/lib/preferences.ts +9 -34
  55. package/src/lib/refresh-session.ts +161 -0
  56. package/src/lib/relay.ts +10 -8
  57. package/src/lib/secrets.ts +6 -7
  58. package/src/lib/sequencer.ts +12 -3
  59. package/src/lib/service-auth.ts +184 -0
  60. package/src/lib/session-tokens.ts +28 -76
  61. package/src/lib/streaming-car.ts +3 -0
  62. package/src/lib/tracing.ts +4 -3
  63. package/src/lib/util.ts +65 -15
  64. package/src/middleware.ts +1 -1
  65. package/src/pages/.well-known/did.json.ts +27 -30
  66. package/src/pages/.well-known/oauth-authorization-server.ts +31 -0
  67. package/src/pages/.well-known/oauth-protected-resource.ts +22 -0
  68. package/src/pages/debug/record.ts +1 -1
  69. package/src/pages/debug/sequencer.ts +28 -0
  70. package/src/pages/oauth/authorize.ts +78 -0
  71. package/src/pages/oauth/consent.ts +80 -0
  72. package/src/pages/oauth/par.ts +121 -0
  73. package/src/pages/oauth/token.ts +158 -0
  74. package/src/pages/xrpc/[...nsid].ts +61 -0
  75. package/src/pages/xrpc/app.bsky.actor.getPreferences.ts +12 -13
  76. package/src/pages/xrpc/app.bsky.actor.putPreferences.ts +23 -23
  77. package/src/pages/xrpc/app.bsky.unspecced.getAgeAssuranceState.ts +9 -2
  78. package/src/pages/xrpc/chat.bsky.convo.getLog.ts +9 -2
  79. package/src/pages/xrpc/chat.bsky.convo.listConvos.ts +9 -2
  80. package/src/pages/xrpc/com.atproto.identity.getRecommendedDidCredentials.ts +43 -41
  81. package/src/pages/xrpc/com.atproto.identity.requestPlcOperationSignature.ts +10 -3
  82. package/src/pages/xrpc/com.atproto.identity.resolveHandle.ts +40 -9
  83. package/src/pages/xrpc/com.atproto.identity.signPlcOperation.ts +41 -29
  84. package/src/pages/xrpc/com.atproto.identity.submitPlcOperation.ts +20 -6
  85. package/src/pages/xrpc/com.atproto.identity.updateHandle.ts +1 -1
  86. package/src/pages/xrpc/com.atproto.repo.applyWrites.ts +101 -11
  87. package/src/pages/xrpc/com.atproto.repo.createRecord.ts +44 -14
  88. package/src/pages/xrpc/com.atproto.repo.deleteRecord.ts +41 -13
  89. package/src/pages/xrpc/com.atproto.repo.describeRepo.ts +2 -2
  90. package/src/pages/xrpc/com.atproto.repo.getRecord.ts +14 -1
  91. package/src/pages/xrpc/com.atproto.repo.listMissingBlobs.ts +14 -6
  92. package/src/pages/xrpc/com.atproto.repo.listRecords.ts +1 -1
  93. package/src/pages/xrpc/com.atproto.repo.putRecord.ts +42 -14
  94. package/src/pages/xrpc/com.atproto.repo.uploadBlob.ts +76 -15
  95. package/src/pages/xrpc/com.atproto.server.checkAccountStatus.ts +20 -8
  96. package/src/pages/xrpc/com.atproto.server.createSession.ts +31 -11
  97. package/src/pages/xrpc/com.atproto.server.describeServer.ts +1 -1
  98. package/src/pages/xrpc/com.atproto.server.getServiceAuth.ts +12 -5
  99. package/src/pages/xrpc/com.atproto.server.getSession.ts +22 -8
  100. package/src/pages/xrpc/com.atproto.server.refreshSession.ts +30 -72
  101. package/src/pages/xrpc/com.atproto.sync.getBlob.ts +71 -22
  102. package/src/pages/xrpc/com.atproto.sync.getCheckout.json.ts +1 -1
  103. package/src/pages/xrpc/com.atproto.sync.getCheckout.ts +1 -1
  104. package/src/pages/xrpc/com.atproto.sync.getHead.ts +7 -2
  105. package/src/pages/xrpc/com.atproto.sync.getLatestCommit.ts +1 -1
  106. package/src/pages/xrpc/com.atproto.sync.getRecord.ts +5 -27
  107. package/src/pages/xrpc/com.atproto.sync.getRepo.json.ts +1 -1
  108. package/src/pages/xrpc/com.atproto.sync.getRepo.ts +50 -5
  109. package/src/pages/xrpc/com.atproto.sync.getRepoStatus.ts +58 -0
  110. package/src/pages/xrpc/com.atproto.sync.listBlobs.ts +1 -1
  111. package/src/pages/xrpc/com.atproto.sync.listRepos.ts +5 -3
  112. package/src/services/car.ts +207 -55
  113. package/src/services/r2-blob-store.ts +1 -1
  114. package/src/services/repo/blockstore-ops.ts +29 -0
  115. package/src/services/repo/operations.ts +133 -0
  116. package/src/services/repo-manager.ts +202 -253
  117. package/src/worker/runtime.ts +53 -8
  118. package/src/worker/sequencer/broadcast.ts +91 -0
  119. package/src/worker/sequencer/cid-helpers.ts +39 -0
  120. package/src/worker/sequencer/payload.ts +84 -0
  121. package/src/worker/sequencer/types.ts +36 -0
  122. package/src/worker/sequencer/upgrade.ts +141 -0
  123. package/src/worker/sequencer.ts +288 -412
  124. package/types/env.d.ts +15 -3
  125. package/src/pages/xrpc/app.bsky.actor.getProfile.ts +0 -49
  126. package/src/pages/xrpc/app.bsky.actor.getProfiles.ts +0 -51
  127. package/src/pages/xrpc/app.bsky.feed.getActorFeeds.ts +0 -25
  128. package/src/pages/xrpc/app.bsky.feed.getAuthorFeed.ts +0 -42
  129. package/src/pages/xrpc/app.bsky.feed.getFeedGenerators.ts +0 -25
  130. package/src/pages/xrpc/app.bsky.feed.getPostThread.ts +0 -37
  131. package/src/pages/xrpc/app.bsky.feed.getPosts.ts +0 -26
  132. package/src/pages/xrpc/app.bsky.feed.getTimeline.ts +0 -47
  133. package/src/pages/xrpc/app.bsky.graph.getFollowers.ts +0 -29
  134. package/src/pages/xrpc/app.bsky.graph.getFollows.ts +0 -29
  135. package/src/pages/xrpc/app.bsky.notification.getUnreadCount.ts +0 -20
  136. package/src/pages/xrpc/app.bsky.notification.listNotifications.ts +0 -27
@@ -1,17 +1,62 @@
1
1
  import type { APIContext } from 'astro';
2
+ import { errorMessage } from '../../lib/errors';
2
3
  import { buildRepoCar } from '../../services/car';
3
4
 
4
5
  export const prerender = false;
5
6
 
7
+ /**
8
+ * com.atproto.sync.getRepo
9
+ * Returns a CAR snapshot of the repo for initial crawl/index.
10
+ */
6
11
  export async function GET({ locals, request }: APIContext) {
7
12
  const { env } = locals.runtime;
8
13
  const url = new URL(request.url);
9
- const did = url.searchParams.get('did') ?? (env.PDS_DID ?? 'did:example:single-user');
10
- const car = await buildRepoCar(env, did);
11
- return new Response(car.bytes as any, {
14
+ const did = url.searchParams.get('did') ?? (env.PDS_DID as string);
15
+ // Phase 1: accept but ignore since param (diff to be implemented later)
16
+ const _since = url.searchParams.get('since');
17
+
18
+ try {
19
+ const { bytes } = await buildRepoCar(env, did);
20
+ const stream = new ReadableStream<Uint8Array>({
21
+ start(controller) {
22
+ controller.enqueue(bytes);
23
+ controller.close();
24
+ },
25
+ });
26
+ return new Response(stream as any, {
27
+ status: 200,
28
+ headers: {
29
+ // Official content type for CAR v1
30
+ 'Content-Type': 'application/vnd.ipld.car',
31
+ 'Cache-Control': 'no-store',
32
+ },
33
+ });
34
+ } catch (error) {
35
+ const msg = String(errorMessage(error) || error);
36
+ // Map to lexicon-specified errors
37
+ const known = ['RepoNotFound', 'RepoTakendown', 'RepoSuspended', 'RepoDeactivated'];
38
+ const name = known.find((n) => msg.includes(n)) || (msg.includes('HeadNotFound') ? 'RepoNotFound' : null);
39
+ if (name) {
40
+ return new Response(JSON.stringify({ error: name, message: msg }), {
41
+ status: 400,
42
+ headers: { 'Content-Type': 'application/json' },
43
+ });
44
+ }
45
+ console.error('getRepo (CAR) error:', error);
46
+ return new Response(JSON.stringify({ error: 'InternalServerError', message: msg }), {
47
+ status: 500,
48
+ headers: { 'Content-Type': 'application/json' },
49
+ });
50
+ }
51
+ }
52
+
53
+ // Avoid heavy CAR construction for HEAD. Respond with headers only.
54
+ export async function HEAD() {
55
+ return new Response(null, {
56
+ status: 200,
12
57
  headers: {
13
- 'content-type': 'application/vnd.ipld.car; version=1',
14
- 'content-disposition': 'inline; filename="repo.car"',
58
+ 'Content-Type': 'application/vnd.ipld.car',
59
+ 'Cache-Control': 'no-store',
15
60
  },
16
61
  });
17
62
  }
@@ -0,0 +1,58 @@
1
+ import type { APIContext } from 'astro';
2
+ import { getRoot as getRepoRoot } from '../../db/repo';
3
+ import { getAccountState } from '../../db/dal';
4
+ import { toWireStatus } from '../../lib/account-state';
5
+
6
+ export const prerender = false;
7
+
8
+ /**
9
+ * com.atproto.sync.getRepoStatus
10
+ * Mirrors upstream PDS: returns did, active, optional status, and rev if active.
11
+ */
12
+ export async function GET({ locals, request }: APIContext) {
13
+ const { env } = locals.runtime;
14
+ const url = new URL(request.url);
15
+ const configuredDid = typeof env.PDS_DID === 'string' ? env.PDS_DID : '';
16
+ const did = url.searchParams.get('did') ?? configuredDid;
17
+
18
+ try {
19
+ // Best-effort FSM lookup: an unmigrated row or a transient DB error both
20
+ // fall through to active=true so reads aren't blocked by an internal hiccup.
21
+ let active = true;
22
+ let status: string | undefined;
23
+ try {
24
+ const state = await getAccountState(env, did);
25
+ if (state) {
26
+ const wire = toWireStatus(state);
27
+ active = wire.active;
28
+ status = wire.status;
29
+ }
30
+ } catch (stateError) {
31
+ console.warn('getAccountState failed:', stateError);
32
+ }
33
+
34
+ let rev: string | undefined;
35
+ if (active) {
36
+ const head = await getRepoRoot(env);
37
+ if (head?.rev) rev = String(head.rev);
38
+ }
39
+
40
+ return new Response(
41
+ JSON.stringify({ did, active, ...(status ? { status } : {}), ...(rev ? { rev } : {}) }),
42
+ { status: 200, headers: { 'Content-Type': 'application/json' } },
43
+ );
44
+ } catch (error) {
45
+ const message = error instanceof Error ? error.message : String(error);
46
+ if (message.includes('RepoNotFound')) {
47
+ return new Response(JSON.stringify({ error: 'RepoNotFound', message }), {
48
+ status: 400,
49
+ headers: { 'Content-Type': 'application/json' },
50
+ });
51
+ }
52
+ return new Response(JSON.stringify({ error: 'InternalServerError', message }), {
53
+ status: 500,
54
+ headers: { 'Content-Type': 'application/json' },
55
+ });
56
+ }
57
+ }
58
+
@@ -12,7 +12,7 @@ export const prerender = false;
12
12
  export async function GET({ locals, url }: APIContext) {
13
13
  const { env } = locals.runtime;
14
14
 
15
- const did = url.searchParams.get('did') || env.PDS_DID || 'did:example:single-user';
15
+ const did = url.searchParams.get('did') || (env.PDS_DID as string);
16
16
  const since = url.searchParams.get('since') || '';
17
17
  const limit = parseInt(url.searchParams.get('limit') || '500', 10);
18
18
 
@@ -1,4 +1,5 @@
1
1
  import type { APIContext } from 'astro';
2
+ import { getRoot as getRepoRoot } from '../../db/repo';
2
3
 
3
4
  export const prerender = false;
4
5
 
@@ -11,14 +12,15 @@ export async function GET({ locals, url }: APIContext) {
11
12
 
12
13
  const did = env.PDS_DID || 'did:example:single-user';
13
14
  const handle = env.PDS_HANDLE || 'user.example.com';
15
+ const head = await getRepoRoot(env);
14
16
 
15
17
  return new Response(
16
18
  JSON.stringify({
17
19
  repos: [
18
20
  {
19
21
  did,
20
- head: '', // TODO: Get from repo_root
21
- rev: '', // TODO: Get from repo_root
22
+ head: head?.commitCid ?? null,
23
+ rev: head?.rev ?? null,
22
24
  active: true,
23
25
  },
24
26
  ],
@@ -28,4 +30,4 @@ export async function GET({ locals, url }: APIContext) {
28
30
  headers: { 'Content-Type': 'application/json' },
29
31
  }
30
32
  );
31
- }
33
+ }
@@ -7,6 +7,7 @@ import { CID } from 'multiformats/cid';
7
7
  import * as dagCbor from '@ipld/dag-cbor';
8
8
  import { sha256 } from 'multiformats/hashes/sha2';
9
9
  import { MST, Leaf, D1Blockstore } from '../lib/mst';
10
+ import { NotFound } from '../lib/errors';
10
11
 
11
12
  export type CarSnapshot = {
12
13
  bytes: Uint8Array;
@@ -67,17 +68,22 @@ export async function buildRepoCar(env: Env, did: string): Promise<CarSnapshot>
67
68
  };
68
69
 
69
70
  const mstRoot = CID.parse(String(parsed.data));
70
- // 1) Add all MST node blocks
71
- await addMstBlocks(blockstore, mstRoot, seen, blocks);
71
+ // 1) Add all MST node blocks (batched, non-recursive) and collect leaf CIDs
72
+ const { mstBlocks, leafCids } = await collectMstBfs(blockstore, mstRoot);
73
+ for (const [cid, bytes] of mstBlocks) {
74
+ const k = cid.toString();
75
+ if (seen.has(k)) continue;
76
+ seen.add(k);
77
+ blocks.push({ cid, bytes });
78
+ }
72
79
 
73
- // 2) Add all record leaf blocks by walking the MST
74
- try {
75
- const mst = MST.load(blockstore, mstRoot);
76
- for await (const leaf of mst.walkLeavesFrom('')) {
77
- await addBlock(leaf.value);
78
- }
79
- } catch (e) {
80
- console.warn('Snapshot: failed traversing MST leaves:', e);
80
+ // 2) Add record leaf blocks by batched fetch
81
+ const leafFetched = await blockstore.getMany(leafCids);
82
+ for (const [cidStr, bytes] of leafFetched.blocks.entries()) {
83
+ const cid = CID.parse(cidStr);
84
+ if (seen.has(cidStr)) continue;
85
+ seen.add(cidStr);
86
+ blocks.push({ cid, bytes });
81
87
  }
82
88
 
83
89
  const bytes = encodeCar([commitCid], blocks);
@@ -88,20 +94,8 @@ export async function buildRepoCar(env: Env, did: string): Promise<CarSnapshot>
88
94
  console.warn('Failed to reconstruct signed commit from tip; falling back to snapshot:', e);
89
95
  }
90
96
  }
91
-
92
- // Fallback: deterministic snapshot built from current records
93
- const rows = await listRecords(env);
94
- const blocks: { cid: CID; bytes: Uint8Array }[] = [];
95
- for (const r of rows) {
96
- if (!r.uri.startsWith(`at://${did}/`)) continue;
97
- const value = JSON.parse(r.json);
98
- const block = await encodeRecordBlock(value);
99
- blocks.push(block);
100
- }
101
- const commitObj = { type: 'commit', did, records: blocks.map((b) => b.cid.toString()).sort() };
102
- const commit = await encodeRecordBlock(commitObj);
103
- const bytes = encodeCar([commit.cid], [...blocks, commit]);
104
- return { bytes, root: commit.cid, blocks: [...blocks, commit] };
97
+ // No authoritative head to build from
98
+ throw new NotFound('RepoNotFound');
105
99
  }
106
100
 
107
101
  export async function buildRepoCarRange(env: Env, fromSeq: number, toSeq: number): Promise<CarSnapshot> {
@@ -179,6 +173,7 @@ export async function encodeBlocksForCommit(
179
173
  commitCid: CID,
180
174
  mstRoot: CID,
181
175
  ops: Array<{ path: string; cid: CID | null }>,
176
+ newMstBlocks?: Array<[CID, Uint8Array]>,
182
177
  ): Promise<Uint8Array> {
183
178
  const blockstore = new D1Blockstore(env);
184
179
  const blocks: { cid: CID; bytes: Uint8Array }[] = [];
@@ -189,18 +184,60 @@ export async function encodeBlocksForCommit(
189
184
  const cidStr = cid.toString();
190
185
  if (seen.has(cidStr)) return;
191
186
  seen.add(cidStr);
192
-
193
- const bytes = await blockstore.get(cid);
194
- if (bytes) {
195
- blocks.push({ cid, bytes });
187
+ let bytes = await blockstore.get(cid);
188
+ if (!bytes) {
189
+ // Attempt to reconstruct commit block from commit_log if this is the commit cid
190
+ if (cidStr === commitCid.toString()) {
191
+ try {
192
+ const row = await (env.DB as any)
193
+ .prepare('SELECT data, sig FROM commit_log WHERE cid = ? LIMIT 1')
194
+ .bind(cidStr)
195
+ .first();
196
+ if (row && row.data && row.sig) {
197
+ const parsed = JSON.parse(String(row.data));
198
+ const sigBin = atob(String(row.sig));
199
+ const sig = new Uint8Array(sigBin.length);
200
+ for (let i = 0; i < sigBin.length; i++) sig[i] = sigBin.charCodeAt(i);
201
+ const signedCommit = {
202
+ did: String(parsed.did),
203
+ version: Number(parsed.version),
204
+ data: CID.parse(String(parsed.data)),
205
+ rev: String(parsed.rev),
206
+ prev: parsed.prev ? CID.parse(String(parsed.prev)) : null,
207
+ sig,
208
+ } as const;
209
+ bytes = dagCbor.encode(signedCommit);
210
+ }
211
+ } catch (e) {
212
+ console.warn('Failed to reconstruct commit block from commit_log:', e);
213
+ }
214
+ }
196
215
  }
216
+ if (bytes) blocks.push({ cid, bytes });
197
217
  };
198
218
 
199
219
  // 1. Add commit block
200
220
  await addBlock(commitCid);
201
221
 
202
- // 2. Add MST nodes by traversing the tree
203
- await addMstBlocks(blockstore, mstRoot, seen, blocks);
222
+ // 2. Add MST nodes
223
+ if (newMstBlocks && newMstBlocks.length > 0) {
224
+ // Prefer the exact set of MST nodes touched by this commit
225
+ for (const [cid, bytes] of newMstBlocks) {
226
+ const cidStr = cid.toString();
227
+ if (seen.has(cidStr)) continue;
228
+ seen.add(cidStr);
229
+ blocks.push({ cid, bytes });
230
+ }
231
+ } else {
232
+ // Fallback: add MST nodes by batched BFS
233
+ const { mstBlocks } = await collectMstBfs(blockstore, mstRoot);
234
+ for (const [cid, bytes] of mstBlocks) {
235
+ const k = cid.toString();
236
+ if (seen.has(k)) continue;
237
+ seen.add(k);
238
+ blocks.push({ cid, bytes });
239
+ }
240
+ }
204
241
 
205
242
  // 3. Add record blocks for all operations
206
243
  for (const op of ops) {
@@ -213,37 +250,152 @@ export async function encodeBlocksForCommit(
213
250
  return encodeCar([commitCid], blocks);
214
251
  }
215
252
 
253
+ /**
254
+ * Build a CAR proving existence or non-existence of a record at collection/rkey
255
+ * Root is the latest signed commit block; includes MST path nodes and record block if present.
256
+ */
257
+ export async function buildRecordProofCar(
258
+ env: Env,
259
+ did: string,
260
+ collection: string,
261
+ rkey: string,
262
+ ): Promise<{ bytes: Uint8Array }> {
263
+ const db = drizzle(env.DB);
264
+ const tip = await db.select().from(commit_log).orderBy(desc(commit_log.seq)).limit(1).get();
265
+ if (!tip) {
266
+ throw new NotFound('HeadNotFound');
267
+ }
268
+
269
+ // Reconstruct signed commit block and CID
270
+ const parsed = JSON.parse(tip.data as any);
271
+ const prevStr = parsed.prev ?? null;
272
+ const commitObj = {
273
+ did: String(parsed.did),
274
+ version: Number(parsed.version),
275
+ data: CID.parse(String(parsed.data)),
276
+ rev: String(parsed.rev),
277
+ prev: prevStr ? CID.parse(String(prevStr)) : null,
278
+ sig: (() => {
279
+ const bin = atob(String(tip.sig));
280
+ const u8 = new Uint8Array(bin.length);
281
+ for (let i = 0; i < bin.length; i++) u8[i] = bin.charCodeAt(i);
282
+ return u8;
283
+ })(),
284
+ } as const;
285
+ const commitBytes = dagCbor.encode(commitObj);
286
+ const hash = await sha256.digest(commitBytes);
287
+ const commitCid = CID.createV1(dagCbor.code, hash);
288
+
289
+ // Walk MST path to the target key
290
+ const blockstore = new D1Blockstore(env);
291
+ const mstRoot = CID.parse(String(parsed.data));
292
+ const key = `${collection}/${rkey}`;
293
+ const pathBlocks: Array<{ cid: CID; bytes: Uint8Array }> = [];
294
+ let cursor: CID | null = mstRoot;
295
+ let recordCid: CID | null = null;
296
+
297
+ while (cursor) {
298
+ const bytes = await blockstore.get(cursor);
299
+ if (!bytes) break;
300
+ pathBlocks.push({ cid: cursor, bytes });
301
+ try {
302
+ const node: any = dagCbor.decode(bytes);
303
+ // Reconstruct ordered entries: [l? subtree], then (leaf, subtree?)*
304
+ type Entry = { kind: 'tree'; cid: CID } | { kind: 'leaf'; key: string; value: CID };
305
+ const entries: Entry[] = [];
306
+ if (node?.l) entries.push({ kind: 'tree', cid: CID.asCID(node.l) ?? CID.parse(String(node.l)) });
307
+ let lastKey = '';
308
+ for (const e of (node?.e ?? [])) {
309
+ const keyStr = new TextDecoder('ascii').decode(e.k as Uint8Array);
310
+ const fullKey = lastKey.slice(0, e.p as number) + keyStr;
311
+ entries.push({ kind: 'leaf', key: fullKey, value: CID.asCID(e.v) ?? CID.parse(String(e.v)) });
312
+ lastKey = fullKey;
313
+ if (e.t) entries.push({ kind: 'tree', cid: CID.asCID(e.t) ?? CID.parse(String(e.t)) });
314
+ }
315
+ // Find first leaf >= key
316
+ let index = entries.findIndex((en) => en.kind === 'leaf' && (en as any).key >= key);
317
+ if (index < 0) index = entries.length;
318
+ const found = entries[index];
319
+ if (found && found.kind === 'leaf' && (found as any).key === key) {
320
+ recordCid = found.value;
321
+ break;
322
+ }
323
+ const prev = entries[index - 1];
324
+ if (prev && prev.kind === 'tree') {
325
+ cursor = prev.cid;
326
+ continue;
327
+ }
328
+ // Not found and no subtree to descend
329
+ break;
330
+ } catch {
331
+ break;
332
+ }
333
+ }
334
+
335
+ // Assemble CAR: commit as root; include path nodes; include record block if present
336
+ const blocks: { cid: CID; bytes: Uint8Array }[] = [{ cid: commitCid, bytes: commitBytes }];
337
+ const seen = new Set<string>([commitCid.toString()]);
338
+ for (const b of pathBlocks) {
339
+ const s = b.cid.toString();
340
+ if (!seen.has(s)) { seen.add(s); blocks.push(b); }
341
+ }
342
+ if (recordCid) {
343
+ const bytes = await blockstore.get(recordCid);
344
+ if (bytes) blocks.push({ cid: recordCid, bytes });
345
+ }
346
+ const bytes = encodeCar([commitCid], blocks);
347
+ return { bytes };
348
+ }
349
+
216
350
  /**
217
351
  * Recursively add all MST node blocks
218
352
  */
219
- async function addMstBlocks(
353
+ async function collectMstBfs(
220
354
  blockstore: D1Blockstore,
221
355
  rootCid: CID,
222
- seen: Set<string>,
223
- blocks: { cid: CID; bytes: Uint8Array }[],
224
- ): Promise<void> {
225
- const cidStr = rootCid.toString();
226
- if (seen.has(cidStr)) return;
227
- seen.add(cidStr);
228
-
229
- // Add the MST node block itself
230
- const bytes = await blockstore.get(rootCid);
231
- if (!bytes) return;
232
- blocks.push({ cid: rootCid, bytes });
233
-
234
- // Load MST and traverse children
235
- try {
236
- const mst = MST.load(blockstore, rootCid);
237
- const entries = await mst.getEntries();
238
-
239
- for (const entry of entries) {
240
- if (entry.isTree()) {
241
- // Recursively add child MST blocks
242
- const childCid = await entry.getPointer();
243
- await addMstBlocks(blockstore, childCid, seen, blocks);
356
+ ): Promise<{ mstBlocks: Array<[CID, Uint8Array]>; leafCids: CID[] }> {
357
+ const mstBlocks: Array<[CID, Uint8Array]> = [];
358
+ const leafCids: CID[] = [];
359
+ const seen = new Set<string>();
360
+
361
+ let toFetch: CID[] = [rootCid];
362
+ const BATCH = 200;
363
+
364
+ while (toFetch.length > 0) {
365
+ const chunk = toFetch.slice(0, BATCH);
366
+ toFetch = toFetch.slice(BATCH);
367
+ const { blocks, missing } = await blockstore.getMany(chunk);
368
+ // Push node blocks we found
369
+ for (const [cidStr, bytes] of blocks.entries()) {
370
+ if (seen.has(cidStr)) continue;
371
+ seen.add(cidStr);
372
+ mstBlocks.push([CID.parse(cidStr), bytes]);
373
+ }
374
+ // Decode nodes to collect children and leaves
375
+ for (const [cidStr, bytes] of blocks.entries()) {
376
+ try {
377
+ const node: any = dagCbor.decode(bytes);
378
+ const l = node?.l ? (CID.asCID(node.l) ?? CID.parse(String(node.l))) : null;
379
+ if (l) {
380
+ const key = l.toString();
381
+ if (!seen.has(key)) toFetch.push(l);
382
+ }
383
+ const entries: any[] = Array.isArray(node?.e) ? node.e : [];
384
+ for (const e of entries) {
385
+ const v = CID.asCID(e?.v) ?? CID.parse(String(e?.v));
386
+ if (v) leafCids.push(v);
387
+ const t = e?.t ? (CID.asCID(e.t) ?? CID.parse(String(e.t))) : null;
388
+ if (t) {
389
+ const key = t.toString();
390
+ if (!seen.has(key)) toFetch.push(t);
391
+ }
392
+ }
393
+ } catch (error) {
394
+ console.warn('collectMstBfs: failed to decode node', cidStr, error);
244
395
  }
245
396
  }
246
- } catch (error) {
247
- console.error('Error traversing MST:', error);
397
+ // Ignore missing here; caller might not need full tree for snapshots
248
398
  }
399
+
400
+ return { mstBlocks, leafCids };
249
401
  }
@@ -69,7 +69,7 @@ export class R2BlobStore {
69
69
  if (size > limit) throw new Error(`BlobTooLarge:${size}>${limit}`);
70
70
 
71
71
  const contentType = opts.contentType ?? 'application/octet-stream';
72
- const sha = await crypto.subtle.digest('SHA-256', view);
72
+ const sha = await crypto.subtle.digest('SHA-256', R2BlobStore.toArrayBuffer(view));
73
73
  const shaB64 = R2BlobStore.b64url(sha);
74
74
  const key = R2BlobStore.cidKey(shaB64);
75
75
  const buffer = R2BlobStore.toArrayBuffer(view);
@@ -0,0 +1,29 @@
1
+ import { CID } from 'multiformats/cid';
2
+ import * as dagCbor from '@ipld/dag-cbor';
3
+ import { cidForCbor } from '../../lib/mst/util';
4
+ import type { D1Blockstore, MST, BlockMap } from '../../lib/mst';
5
+
6
+ export async function storeRecord(
7
+ blockstore: D1Blockstore,
8
+ record: unknown,
9
+ ): Promise<CID> {
10
+ const bytes = dagCbor.encode(record);
11
+ const cid = await cidForCbor(record);
12
+ await blockstore.put(cid, bytes);
13
+ return cid;
14
+ }
15
+
16
+ export async function storeMstBlocks(
17
+ blockstore: D1Blockstore,
18
+ mst: MST,
19
+ ): Promise<BlockMap> {
20
+ const diff = await mst.getUnstoredBlocks();
21
+ for (const [cid, bytes] of diff.blocks) {
22
+ console.log(
23
+ `[RepoManager] Storing new MST block: ${cid.toString()}, size: ${bytes.length}`,
24
+ );
25
+ await blockstore.put(cid, bytes);
26
+ }
27
+ console.log(`[RepoManager] Stored ${diff.blocks.size} new MST blocks`);
28
+ return diff.blocks;
29
+ }
@@ -0,0 +1,133 @@
1
+ import { CID } from 'multiformats/cid';
2
+ import * as dagCbor from '@ipld/dag-cbor';
3
+ import type { D1Blockstore, NodeData } from '../../lib/mst';
4
+ import type { RepoOp } from '../../lib/firehose/frames';
5
+
6
+ interface NodeEntry {
7
+ p: number;
8
+ k: Uint8Array;
9
+ v: unknown;
10
+ t?: unknown;
11
+ }
12
+
13
+ interface DecodedNode {
14
+ e?: NodeEntry[];
15
+ l?: unknown;
16
+ }
17
+
18
+ function coerceCid(value: unknown): CID {
19
+ const asCid = (CID as unknown as { asCID?: (v: unknown) => CID | null }).asCID?.(value);
20
+ if (asCid) return asCid;
21
+ return CID.parse(String(value));
22
+ }
23
+
24
+ export async function collectLeavesBatched(
25
+ blockstore: D1Blockstore,
26
+ root: CID,
27
+ ): Promise<Map<string, CID>> {
28
+ const result = new Map<string, CID>();
29
+ const visited = new Set<string>();
30
+ let toFetch: CID[] = [root];
31
+
32
+ // Limit per getMany() request; getMany chunks the IN() list further.
33
+ const batchSize = 200;
34
+
35
+ while (toFetch.length > 0) {
36
+ const chunk = toFetch.slice(0, batchSize);
37
+ toFetch = toFetch.slice(batchSize);
38
+
39
+ const { blocks, missing } = await blockstore.getMany(chunk);
40
+
41
+ if (missing.length > 0) {
42
+ // Fail fast: missing MST nodes mean an incomplete repo, and emitting
43
+ // ops without them would produce a wrong diff.
44
+ const missingStr = missing.map((c) => c.toString()).join(', ');
45
+ throw new Error(
46
+ `[RepoManager] collectLeavesBatched: missing MST nodes: ${missingStr}`,
47
+ );
48
+ }
49
+
50
+ for (const [cidStr, bytes] of blocks.entries()) {
51
+ if (visited.has(cidStr)) continue;
52
+ visited.add(cidStr);
53
+ try {
54
+ const node = dagCbor.decode(bytes) as DecodedNode;
55
+ let lastKey = '';
56
+ const entries = Array.isArray(node.e) ? node.e : [];
57
+ for (const entry of entries) {
58
+ const keyStr = new TextDecoder('ascii').decode(entry.k);
59
+ const fullKey = lastKey.slice(0, Number(entry.p)) + keyStr;
60
+ try {
61
+ const parts = fullKey.split('/');
62
+ if (parts.length === 2 && parts[0] && parts[1]) {
63
+ result.set(fullKey, coerceCid(entry.v));
64
+ }
65
+ } catch (decodeError) {
66
+ console.warn('[RepoManager] failed to decode leaf CID:', decodeError);
67
+ }
68
+ lastKey = fullKey;
69
+
70
+ if (entry.t) {
71
+ const subtree = coerceCid(entry.t);
72
+ if (!visited.has(subtree.toString())) toFetch.push(subtree);
73
+ }
74
+ }
75
+
76
+ if (node.l) {
77
+ const left = coerceCid(node.l);
78
+ if (!visited.has(left.toString())) toFetch.push(left);
79
+ }
80
+ } catch (error) {
81
+ console.warn('[RepoManager] collectLeavesBatched: failed to decode node', cidStr, error);
82
+ }
83
+ }
84
+ }
85
+
86
+ return result;
87
+ }
88
+
89
+ export async function extractOps(
90
+ blockstore: D1Blockstore,
91
+ prevRoot: CID | null,
92
+ newRoot: CID,
93
+ ): Promise<RepoOp[]> {
94
+ const ops: RepoOp[] = [];
95
+ const newMap = await collectLeavesBatched(blockstore, newRoot);
96
+ const prevMap = prevRoot
97
+ ? await collectLeavesBatched(blockstore, prevRoot)
98
+ : new Map<string, CID>();
99
+
100
+ for (const [path, cid] of Array.from(newMap.entries())) {
101
+ const prevCid = prevMap.get(path);
102
+ if (!prevCid) {
103
+ ops.push({ action: 'create', path, cid });
104
+ } else if (!prevCid.equals(cid)) {
105
+ ops.push({ action: 'update', path, cid, prev: prevCid });
106
+ }
107
+ }
108
+
109
+ for (const [path, prevCid] of Array.from(prevMap.entries())) {
110
+ if (!newMap.has(path)) {
111
+ ops.push({ action: 'delete', path, cid: null, prev: prevCid });
112
+ }
113
+ }
114
+
115
+ ops.sort((a, b) => a.path.localeCompare(b.path));
116
+ return ops;
117
+ }
118
+
119
+ // Used by the legacy in-memory diff path (kept for compatibility). The batched
120
+ // version above is preferred because it avoids per-node round-trips to D1.
121
+ export async function collectLeavesRecursive(
122
+ entries: ReadonlyArray<{ isLeaf(): boolean; key?: string; value?: CID; getEntries?: () => Promise<unknown[]> }>,
123
+ map: Map<string, CID>,
124
+ ): Promise<void> {
125
+ for (const entry of entries) {
126
+ if (entry.isLeaf() && entry.key && entry.value) {
127
+ map.set(entry.key, entry.value);
128
+ } else if (entry.getEntries) {
129
+ const subEntries = (await entry.getEntries()) as typeof entries;
130
+ await collectLeavesRecursive(subEntries, map);
131
+ }
132
+ }
133
+ }