@filoz/repair-cli 0.1.2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/dist/package.json +1 -1
  2. package/dist/src/cli.js +2 -0
  3. package/dist/src/cli.js.map +1 -1
  4. package/dist/src/commands/datasets.d.ts +1 -0
  5. package/dist/src/commands/datasets.d.ts.map +1 -1
  6. package/dist/src/commands/datasets.js +14 -3
  7. package/dist/src/commands/datasets.js.map +1 -1
  8. package/dist/src/commands/providers.d.ts +1 -0
  9. package/dist/src/commands/providers.d.ts.map +1 -1
  10. package/dist/src/commands/providers.js.map +1 -1
  11. package/dist/src/commands/repair.d.ts +1 -0
  12. package/dist/src/commands/repair.d.ts.map +1 -1
  13. package/dist/src/commands/repair.js +7 -8
  14. package/dist/src/commands/repair.js.map +1 -1
  15. package/dist/src/commands/replicate.d.ts +24 -0
  16. package/dist/src/commands/replicate.d.ts.map +1 -0
  17. package/dist/src/commands/replicate.js +171 -0
  18. package/dist/src/commands/replicate.js.map +1 -0
  19. package/dist/src/commands/setup.d.ts +0 -1
  20. package/dist/src/commands/setup.d.ts.map +1 -1
  21. package/dist/src/commands/setup.js +17 -4
  22. package/dist/src/commands/setup.js.map +1 -1
  23. package/dist/src/commands/wallet.d.ts +1 -0
  24. package/dist/src/commands/wallet.d.ts.map +1 -1
  25. package/dist/src/db/dedupe-cids.d.ts +22 -0
  26. package/dist/src/db/dedupe-cids.d.ts.map +1 -0
  27. package/dist/src/db/dedupe-cids.js +28 -0
  28. package/dist/src/db/dedupe-cids.js.map +1 -0
  29. package/dist/src/db/find-providers-by-cid.d.ts +9 -0
  30. package/dist/src/db/find-providers-by-cid.d.ts.map +1 -0
  31. package/dist/src/db/{get-providers-by-cid.js → find-providers-by-cid.js} +4 -6
  32. package/dist/src/db/find-providers-by-cid.js.map +1 -0
  33. package/dist/src/db/find-repair-dataset.d.ts +10 -0
  34. package/dist/src/db/find-repair-dataset.d.ts.map +1 -0
  35. package/dist/src/db/{get-repair-dataset.js → find-repair-dataset.js} +3 -4
  36. package/dist/src/db/find-repair-dataset.js.map +1 -0
  37. package/dist/src/db/get-pieces.d.ts +16 -0
  38. package/dist/src/db/get-pieces.d.ts.map +1 -1
  39. package/dist/src/db/get-pieces.js +44 -3
  40. package/dist/src/db/get-pieces.js.map +1 -1
  41. package/dist/src/db/repair-create.d.ts.map +1 -1
  42. package/dist/src/db/repair-create.js +1 -0
  43. package/dist/src/db/repair-create.js.map +1 -1
  44. package/dist/src/db/repair-delete.d.ts.map +1 -1
  45. package/dist/src/db/repair-delete.js +0 -5
  46. package/dist/src/db/repair-delete.js.map +1 -1
  47. package/dist/src/db/replicate-create.d.ts +7 -0
  48. package/dist/src/db/replicate-create.d.ts.map +1 -0
  49. package/dist/src/db/replicate-create.js +78 -0
  50. package/dist/src/db/replicate-create.js.map +1 -0
  51. package/dist/src/db/upsert-operations.js +1 -1
  52. package/dist/src/db/upsert-operations.js.map +1 -1
  53. package/dist/src/local-schema.d.ts +19 -0
  54. package/dist/src/local-schema.d.ts.map +1 -1
  55. package/dist/src/local-schema.js +1 -0
  56. package/dist/src/local-schema.js.map +1 -1
  57. package/dist/src/middleware.d.ts +2 -0
  58. package/dist/src/middleware.d.ts.map +1 -1
  59. package/dist/src/middleware.js +4 -2
  60. package/dist/src/middleware.js.map +1 -1
  61. package/dist/src/pipeline/add-pieces.d.ts +12 -0
  62. package/dist/src/pipeline/add-pieces.d.ts.map +1 -0
  63. package/dist/src/pipeline/add-pieces.js +143 -0
  64. package/dist/src/pipeline/add-pieces.js.map +1 -0
  65. package/dist/src/pipeline/create-datasets.d.ts +3 -1
  66. package/dist/src/pipeline/create-datasets.d.ts.map +1 -1
  67. package/dist/src/pipeline/create-datasets.js +57 -5
  68. package/dist/src/pipeline/create-datasets.js.map +1 -1
  69. package/dist/src/utils.d.ts +32 -2
  70. package/dist/src/utils.d.ts.map +1 -1
  71. package/dist/src/utils.js +40 -7
  72. package/dist/src/utils.js.map +1 -1
  73. package/package.json +1 -1
  74. package/readme.md +110 -7
  75. package/src/cli.ts +2 -0
  76. package/src/commands/datasets.ts +15 -4
  77. package/src/commands/providers.ts +0 -1
  78. package/src/commands/repair.ts +12 -8
  79. package/src/commands/replicate.ts +183 -0
  80. package/src/commands/setup.ts +18 -4
  81. package/src/db/dedupe-cids.ts +49 -0
  82. package/src/db/{get-providers-by-cid.ts → find-providers-by-cid.ts} +5 -10
  83. package/src/db/{get-repair-dataset.ts → find-repair-dataset.ts} +6 -5
  84. package/src/db/get-pieces.ts +105 -3
  85. package/src/db/get-target-dataset.ts +1 -1
  86. package/src/db/repair-create.ts +1 -0
  87. package/src/db/repair-delete.ts +0 -5
  88. package/src/db/replicate-create.ts +106 -0
  89. package/src/db/upsert-operations.ts +1 -1
  90. package/src/local-schema.ts +1 -0
  91. package/src/middleware.ts +4 -2
  92. package/src/pipeline/add-pieces.ts +215 -0
  93. package/src/pipeline/create-datasets.ts +71 -5
  94. package/src/utils.ts +49 -10
  95. package/dist/src/db/get-providers-by-cid.d.ts +0 -10
  96. package/dist/src/db/get-providers-by-cid.d.ts.map +0 -1
  97. package/dist/src/db/get-providers-by-cid.js.map +0 -1
  98. package/dist/src/db/get-repair-dataset.d.ts +0 -9
  99. package/dist/src/db/get-repair-dataset.d.ts.map +0 -1
  100. package/dist/src/db/get-repair-dataset.js.map +0 -1
  101. package/dist/src/db/sync-pieces-onchain.d.ts +0 -10
  102. package/dist/src/db/sync-pieces-onchain.d.ts.map +0 -1
  103. package/dist/src/db/sync-pieces-onchain.js +0 -35
  104. package/dist/src/db/sync-pieces-onchain.js.map +0 -1
  105. package/dist/src/pipeline/pull.d.ts +0 -30
  106. package/dist/src/pipeline/pull.d.ts.map +0 -1
  107. package/dist/src/pipeline/pull.js +0 -169
  108. package/dist/src/pipeline/pull.js.map +0 -1
  109. package/src/db/sync-pieces-onchain.ts +0 -53
  110. package/src/pipeline/pull.ts +0 -255
@@ -3,7 +3,7 @@ import { and, asc, eq, isNull, lte, or } from 'drizzle-orm'
3
3
  import pMap from 'p-map'
4
4
  import type { OperationInsert } from '../local-schema.ts'
5
5
  import type { IndexerDatabase } from '../types.ts'
6
- import { getProvidersByCid } from './get-providers-by-cid.ts'
6
+ import { findProvidersByCid } from './find-providers-by-cid.ts'
7
7
 
8
8
  /** Default page size when paginating pieces from the indexer. */
9
9
  export const DEFAULT_PIECES_PAGE_SIZE = 3000
@@ -43,6 +43,34 @@ export type GetPiecesPageResult = {
43
43
  /** Options for {@link forEachPiecesPage}; pagination state is managed internally. */
44
44
  export type ForEachPiecesPageOptions = Omit<GetPiecesPageOptions, 'offset' | 'seenCids'>
45
45
 
46
+ /** Options for fetching one page of source-dataset pieces for replication. */
47
+ export type GetDataSetPiecesPageOptions = {
48
+ indexerDb: IndexerDatabase
49
+ /** Source dataset whose pieces are being replicated. */
50
+ dataSetId: bigint
51
+ /** Local job row to attach operations to. */
52
+ repairId: number
53
+ /** Provider URL that serves the source dataset pieces. */
54
+ sourceProviderUrl: string
55
+ /** Max indexer rows per page. Defaults to {@link DEFAULT_PIECES_PAGE_SIZE}. */
56
+ limit?: number
57
+ /** SQL offset for the indexer query. */
58
+ offset?: number
59
+ }
60
+
61
+ /** Result of a single {@link getDataSetPiecesPage} call. */
62
+ export type GetDataSetPiecesPageResult = {
63
+ /** `add_piece` operations ready to insert for this page. */
64
+ operations: OperationInsert[]
65
+ /** Whether another indexer page may exist after this one. */
66
+ hasMore: boolean
67
+ /** Offset to pass as `offset` on the next page. */
68
+ nextOffset: number
69
+ }
70
+
71
+ /** Options for {@link forEachDataSetPiecesPage}; pagination state is managed internally. */
72
+ export type ForEachDataSetPiecesPageOptions = Omit<GetDataSetPiecesPageOptions, 'offset'>
73
+
46
74
  type PieceForOperation = {
47
75
  cid: string
48
76
  metadata: Record<string, string> | null
@@ -107,10 +135,9 @@ export async function getPiecesPage({
107
135
  }
108
136
 
109
137
  // Resolve pull sources in one query per page; exclude the provider being repaired from alternates.
110
- const providersByCid = await getProvidersByCid({
138
+ const providersByCid = await findProvidersByCid({
111
139
  indexerDb,
112
140
  cids: pieces.map((piece) => piece.cid),
113
- excludedProviderIds: [],
114
141
  blockNumber,
115
142
  })
116
143
 
@@ -157,6 +184,53 @@ export async function getPiecesPage({
157
184
  }
158
185
  }
159
186
 
187
+ /**
188
+ * Fetch one page of pieces for a specific dataset and map them to replication operations.
189
+ *
190
+ * Unlike repairs, replication preserves source dataset ordering and does not dedupe repeated CIDs.
191
+ *
192
+ * @param options - Indexer connection, dataset context, and optional pagination state.
193
+ * @returns Operations for this page plus pagination cursors.
194
+ */
195
+ export async function getDataSetPiecesPage({
196
+ indexerDb,
197
+ dataSetId,
198
+ repairId,
199
+ sourceProviderUrl,
200
+ limit = DEFAULT_PIECES_PAGE_SIZE,
201
+ offset = 0,
202
+ }: GetDataSetPiecesPageOptions): Promise<GetDataSetPiecesPageResult> {
203
+ const schema = indexerDb._.fullSchema
204
+ const rows = await indexerDb
205
+ .select({
206
+ cid: schema.pieces.cid,
207
+ metadata: schema.pieces.metadata,
208
+ })
209
+ .from(schema.pieces)
210
+ .where(and(eq(schema.pieces.dataSetId, dataSetId), eq(schema.pieces.removed, false)))
211
+ .orderBy(asc(schema.pieces.pieceId))
212
+ .limit(limit)
213
+ .offset(offset)
214
+
215
+ const now = Date.now()
216
+ const operations: OperationInsert[] = rows.map(({ cid, metadata }) => ({
217
+ repairId,
218
+ type: 'add_piece',
219
+ status: 'pending',
220
+ cid,
221
+ metadata: metadata ?? {},
222
+ alternateProvider: sourceProviderUrl,
223
+ createdAt: now,
224
+ updatedAt: now,
225
+ }))
226
+
227
+ return {
228
+ operations,
229
+ hasMore: rows.length === limit,
230
+ nextOffset: offset + rows.length,
231
+ }
232
+ }
233
+
160
234
  /**
161
235
  * Walk every page of `add_piece` operations for a provider, invoking `onPage` per batch.
162
236
  *
@@ -187,3 +261,31 @@ export async function forEachPiecesPage(
187
261
  hasMore = page.hasMore
188
262
  }
189
263
  }
264
+
265
+ /**
266
+ * Walk every page of `add_piece` operations for a source dataset, invoking `onPage` per batch.
267
+ *
268
+ * Replication pagination intentionally has no CID dedupe state so duplicate pieces are preserved.
269
+ *
270
+ * @param options - Same inputs as {@link getDataSetPiecesPage} except pagination cursor.
271
+ * @param onPage - Async handler for each page result (e.g. batch insert into local DB).
272
+ */
273
+ export async function forEachDataSetPiecesPage(
274
+ options: ForEachDataSetPiecesPageOptions,
275
+ onPage: (page: GetDataSetPiecesPageResult) => Promise<void>
276
+ ): Promise<void> {
277
+ let offset = 0
278
+ let hasMore = true
279
+
280
+ while (hasMore) {
281
+ const page = await getDataSetPiecesPage({
282
+ ...options,
283
+ offset,
284
+ })
285
+
286
+ await onPage(page)
287
+
288
+ offset = page.nextOffset
289
+ hasMore = page.hasMore
290
+ }
291
+ }
@@ -6,7 +6,7 @@ import type { LocalDatabase, WalletClient } from '../types.ts'
6
6
  const targetDatasetCache = new Map<number, getDataSet.OutputType>()
7
7
 
8
8
  /**
9
- * Get the single IPFS-enabled target dataset for a repair.
9
+ * Get a targetdataset for a repair.
10
10
  *
11
11
  * @param options - The options for getting the target dataset.
12
12
  */
@@ -47,6 +47,7 @@ export async function repairCreate(options: RepairCreateOptions): Promise<number
47
47
  .insert(localSchema.repairs)
48
48
  .values({
49
49
  repairProviderId,
50
+ repairDataSetId: null,
50
51
  targetProviderId: targetProvider.providerId,
51
52
  targetProviderUrl: targetProvider.serviceUrl,
52
53
  targetDataSetId: null,
@@ -19,11 +19,6 @@ export async function repairDelete({ localDb, repairId }: RepairDeleteOptions):
19
19
  const repair = await localDb.query.repairs.findFirst({
20
20
  where: eq(localSchema.repairs.id, repairId),
21
21
  columns: { id: true },
22
- with: {
23
- operations: {
24
- columns: { id: true },
25
- },
26
- },
27
22
  })
28
23
 
29
24
  if (!repair) {
@@ -0,0 +1,106 @@
1
+ import { taskLog } from '@clack/prompts'
2
+ import { eq } from 'drizzle-orm'
3
+ import { getBlockNumber } from 'viem/actions'
4
+ import { NoAlternateProviderError, RepairCreationError } from '../error.ts'
5
+ import type { Context } from '../types.ts'
6
+ import { forEachDataSetPiecesPage } from './get-pieces.ts'
7
+ import { getRepairProvider } from './get-repair-provider.ts'
8
+
9
+ export interface ReplicateCreateOptions extends Context {
10
+ dataSetId: bigint
11
+ targetProviderId: bigint
12
+ }
13
+
14
+ /**
15
+ * Prepare a replication job by creating a local row and inserting every source dataset piece.
16
+ *
17
+ * @param options - The options for creating a replication job.
18
+ * @returns The ID of the created local job.
19
+ */
20
+ export async function replicateCreate(options: ReplicateCreateOptions): Promise<number> {
21
+ const { indexerDb, localDb, dataSetId, targetProviderId, client } = options
22
+ const indexerSchema = indexerDb._.fullSchema
23
+ const localSchema = localDb._.fullSchema
24
+ const now = Date.now()
25
+ const blockNumber = await getBlockNumber(client)
26
+
27
+ const log = taskLog({
28
+ title: 'Creating replication',
29
+ limit: 10,
30
+ retainLog: true,
31
+ })
32
+
33
+ const [sourceDataSet] = await indexerDb
34
+ .select({
35
+ dataSetId: indexerSchema.dataSets.dataSetId,
36
+ providerId: indexerSchema.dataSets.providerId,
37
+ deleted: indexerSchema.dataSets.deleted,
38
+ })
39
+ .from(indexerSchema.dataSets)
40
+ .where(eq(indexerSchema.dataSets.dataSetId, dataSetId))
41
+ .limit(1)
42
+
43
+ if (!sourceDataSet || sourceDataSet.deleted) {
44
+ throw new RepairCreationError(`Source dataset ${dataSetId} not found or deleted`)
45
+ }
46
+
47
+ if (targetProviderId === sourceDataSet.providerId) {
48
+ throw new RepairCreationError('Target provider must differ from the source dataset provider')
49
+ }
50
+
51
+ const sourceProvider = await getRepairProvider({
52
+ indexerDb,
53
+ providerId: sourceDataSet.providerId,
54
+ })
55
+
56
+ if (!sourceProvider) {
57
+ throw new RepairCreationError(`Source provider ${sourceDataSet.providerId} not found or inactive`)
58
+ }
59
+
60
+ const targetProvider = await getRepairProvider({
61
+ indexerDb,
62
+ providerId: targetProviderId,
63
+ })
64
+
65
+ if (!targetProvider) {
66
+ throw new NoAlternateProviderError(targetProviderId)
67
+ }
68
+
69
+ const [repair] = await localDb
70
+ .insert(localSchema.repairs)
71
+ .values({
72
+ repairProviderId: sourceProvider.providerId,
73
+ repairDataSetId: sourceDataSet.dataSetId,
74
+ targetProviderId: targetProvider.providerId,
75
+ targetProviderUrl: targetProvider.serviceUrl,
76
+ targetDataSetId: null,
77
+ blockNumber,
78
+ createdAt: now,
79
+ updatedAt: now,
80
+ })
81
+ .returning({ id: localSchema.repairs.id })
82
+
83
+ if (!repair) throw new RepairCreationError('Failed to create replication row')
84
+
85
+ let totalOperations = 0
86
+ await forEachDataSetPiecesPage(
87
+ {
88
+ indexerDb,
89
+ dataSetId: sourceDataSet.dataSetId,
90
+ repairId: repair.id,
91
+ sourceProviderUrl: sourceProvider.serviceUrl,
92
+ },
93
+ async (page) => {
94
+ totalOperations += page.operations.length
95
+
96
+ if (page.operations.length > 0) {
97
+ await localDb.insert(localSchema.operations).values(page.operations)
98
+ }
99
+
100
+ log.message(`Inserted ${page.operations.length} operations`)
101
+ }
102
+ )
103
+
104
+ log.success(`Created replication ${repair.id} with ${totalOperations} operations`, { showLog: true })
105
+ return repair.id
106
+ }
@@ -18,6 +18,6 @@ export async function upsertOperations({ localDb, operations }: UpsertOperations
18
18
  .values(operations.map((operation) => ({ ...operation, updatedAt: now })))
19
19
  .onConflictDoUpdate({
20
20
  target: localDb._.fullSchema.operations.id,
21
- set: buildConflictUpdateColumns(localSchema.operations, ['status', 'error', 'updatedAt']),
21
+ set: buildConflictUpdateColumns(localSchema.operations, ['status', 'error', 'updatedAt', 'result']),
22
22
  })
23
23
  }
@@ -51,6 +51,7 @@ export const repairs = table('repairs', {
51
51
  id: t.int().primaryKey({ autoIncrement: true }),
52
52
  status: t.text().$type<RepairStatus>().notNull().default('pending'),
53
53
  repairProviderId: bigintType('repair_provider_id').notNull(),
54
+ repairDataSetId: bigintType('repair_data_set_id'),
54
55
  targetProviderId: bigintType('target_provider_id').notNull(),
55
56
  targetProviderUrl: t.text('target_provider_url').notNull(),
56
57
  targetDataSetId: bigintType('target_data_set_id'),
package/src/middleware.ts CHANGED
@@ -12,12 +12,13 @@ export const contextSchema = z.object({
12
12
  config: z.custom<typeof config>(),
13
13
  client: z.custom<Client<Transport, Chain, Account>>(),
14
14
  chain: z.custom<Chain>(),
15
+ source: z.string(),
15
16
  })
16
17
 
17
18
  export const contextMiddleware = middleware<typeof contextSchema>(async (c, next) => {
18
- const { dbPath, chainId, indexerMainnetUrl, indexerCalibrationUrl } = config.store
19
+ const { dbPath, chainId, indexerMainnetUrl, indexerCalibrationUrl, source } = config.store
19
20
 
20
- if (!dbPath || !chainId || !indexerMainnetUrl || !indexerCalibrationUrl) {
21
+ if (!dbPath || !chainId || !indexerMainnetUrl || !indexerCalibrationUrl || !source) {
21
22
  return c.error({
22
23
  code: 'CONFIG_NOT_SET',
23
24
  message: 'Config not set. Please run `repair setup` first.',
@@ -36,6 +37,7 @@ export const contextMiddleware = middleware<typeof contextSchema>(async (c, next
36
37
  c.set('config', config)
37
38
  c.set('client', client)
38
39
  c.set('chain', chain)
40
+ c.set('source', source)
39
41
  await next()
40
42
 
41
43
  localDb.$client.close()
@@ -0,0 +1,215 @@
1
+ import { taskLog } from '@clack/prompts'
2
+ import * as Piece from '@filoz/synapse-core/piece'
3
+ import * as SP from '@filoz/synapse-core/sp'
4
+ import { and, asc, eq, gt, inArray } from 'drizzle-orm'
5
+ import PQueue from 'p-queue'
6
+ import { dedupeCids } from '../db/dedupe-cids.ts'
7
+ import { getTargetDataset } from '../db/get-target-dataset.ts'
8
+ import { repairUpdate } from '../db/repair-update.ts'
9
+ import { upsertOperations } from '../db/upsert-operations.ts'
10
+ import type { OperationSelect, RepairSelect } from '../local-schema.ts'
11
+ import type { IndexerDatabase, LocalDatabase, WalletClient } from '../types.ts'
12
+ import { excludeOperationsByCid, hashLink, operationsToPullPieces } from '../utils.ts'
13
+
14
+ export type RunPullPiecesPhaseOptions = {
15
+ localDb: LocalDatabase
16
+ indexerDb: IndexerDatabase
17
+ repair: RepairSelect
18
+ concurrency: number
19
+ batchSize: number
20
+ client: WalletClient
21
+ }
22
+
23
+ type CreateAddPiecesWorkerOptions = {
24
+ localDb: LocalDatabase
25
+ indexerDb: IndexerDatabase
26
+ repair: RepairSelect
27
+ client: WalletClient
28
+ state: {
29
+ totalBatches: number
30
+ totalOperations: number
31
+ completedOperations: number
32
+ failedOperations: number
33
+ }
34
+ log: ReturnType<typeof taskLog>
35
+ }
36
+
37
+ type AddPiecesJobOptions = {
38
+ operations: OperationSelect[]
39
+ batchNumber: number
40
+ }
41
+
42
+ /**
43
+ * Create a worker function that adds pieces to the target repair dataset.
44
+ */
45
+ function createAddPiecesWorker({ localDb, indexerDb, repair, client, state, log }: CreateAddPiecesWorkerOptions) {
46
+ return async (options: AddPiecesJobOptions) => {
47
+ let completedOps = 0
48
+ let failedOps = 0
49
+ let operations: OperationSelect[] = options.operations
50
+ const isRepair = repair.repairDataSetId == null
51
+
52
+ const group = log.group(`Batch ${options.batchNumber}/${state.totalBatches}`)
53
+
54
+ try {
55
+ const dataset = await getTargetDataset({ localDb, repairId: repair.id, client })
56
+
57
+ // dedupe operations by CID on the target dataset for repairs jobs
58
+ if (isRepair) {
59
+ operations = await dedupeCids({ indexerDb, localDb, dataSetId: dataset.dataSetId, operations })
60
+ }
61
+ group.message(`Pulling ${operations.length} pieces...`)
62
+ // pull pieces
63
+ if (operations.length > 0) {
64
+ const pullResult = await SP.waitForPullPieces(client, {
65
+ serviceURL: repair.targetProviderUrl,
66
+ dataSetId: dataset.dataSetId,
67
+ clientDataSetId: dataset.clientDataSetId,
68
+ pieces: operationsToPullPieces(operations),
69
+ timeout: 1000 * 60 * 30,
70
+ onStatus: (status) => {
71
+ const completed = status.pieces.filter((piece) => piece.status === 'complete').length
72
+ const failed = status.pieces.filter((piece) => piece.status === 'failed').length
73
+ group.message(`Pull ${completed} completed, ${failed} failed`)
74
+ },
75
+ })
76
+
77
+ for (const { pieceCid, status } of pullResult.pieces) {
78
+ const cid = pieceCid.toString()
79
+ if (status !== 'complete') {
80
+ state.failedOperations++
81
+ failedOps++
82
+ const { operationToFailed, operationToCommit } = excludeOperationsByCid(operations, cid)
83
+ operations = operationToCommit
84
+ await upsertOperations({
85
+ localDb,
86
+ operations: operationToFailed.map((operation) => ({
87
+ ...operation,
88
+ status: 'failed',
89
+ error: `pull failed with status ${status}`,
90
+ })),
91
+ })
92
+ }
93
+ }
94
+ }
95
+
96
+ // add pieces
97
+ if (operations.length > 0) {
98
+ group.message(`Adding ${operations.length} pieces...`)
99
+ const addPiecesResult = await SP.addPieces(client, {
100
+ serviceURL: repair.targetProviderUrl,
101
+ dataSetId: dataset.dataSetId,
102
+ clientDataSetId: dataset.clientDataSetId,
103
+ pieces: operations.map((operation) => ({
104
+ pieceCid: Piece.from(operation.cid),
105
+ metadata: isRepair ? undefined : operation.metadata,
106
+ })),
107
+ })
108
+
109
+ group.message(`Waiting for add pieces ${hashLink(addPiecesResult.txHash, client.chain)}...`)
110
+ const addPiecesResult2 = await SP.waitForAddPieces(addPiecesResult)
111
+ state.completedOperations += operations.length
112
+ completedOps += operations.length
113
+ await upsertOperations({
114
+ localDb,
115
+ operations: operations.map((operation) => ({
116
+ ...operation,
117
+ status: 'completed',
118
+ error: null,
119
+ result: { dataSetId: addPiecesResult2.dataSetId, txHash: addPiecesResult2.txHash },
120
+ })),
121
+ })
122
+ }
123
+ group.message(`Done. ${completedOps} added, ${failedOps} failed`)
124
+ } catch (error) {
125
+ state.failedOperations += operations.length
126
+ const message = error instanceof Error ? error.message : 'Unknown error'
127
+ group.message(`${message.replace(/\n/g, ' ')}`)
128
+ await upsertOperations({
129
+ localDb,
130
+ operations: operations.map((operation) => ({
131
+ ...operation,
132
+ status: 'failed',
133
+ error: message,
134
+ })),
135
+ })
136
+ }
137
+ }
138
+ }
139
+
140
+ /**
141
+ * Add pieces to the target dataset.
142
+ */
143
+ export async function runAddPieces({
144
+ localDb,
145
+ indexerDb,
146
+ repair,
147
+ concurrency,
148
+ batchSize,
149
+ client,
150
+ }: RunPullPiecesPhaseOptions): Promise<void> {
151
+ const localSchema = localDb._.fullSchema
152
+ let cursor = 0
153
+
154
+ const totalOperations = await localDb.$count(
155
+ localSchema.operations,
156
+ and(
157
+ eq(localSchema.operations.repairId, repair.id),
158
+ eq(localSchema.operations.type, 'add_piece'),
159
+ inArray(localSchema.operations.status, ['pending', 'failed'])
160
+ )
161
+ )
162
+ let batchNumber = 0
163
+ const state = {
164
+ totalBatches: Math.ceil(totalOperations / batchSize),
165
+ totalOperations,
166
+ completedOperations: 0,
167
+ failedOperations: 0,
168
+ }
169
+
170
+ const log = taskLog({
171
+ title: 'Adding pieces',
172
+ limit: 1,
173
+ })
174
+
175
+ async function getNextBatch(): Promise<OperationSelect[] | null> {
176
+ const operations = await localDb.query.operations.findMany({
177
+ where: and(
178
+ eq(localSchema.operations.repairId, repair.id),
179
+ eq(localSchema.operations.type, 'add_piece'),
180
+ inArray(localSchema.operations.status, ['pending', 'failed']),
181
+ gt(localSchema.operations.id, cursor)
182
+ ),
183
+ orderBy: [asc(localSchema.operations.id)],
184
+ limit: batchSize,
185
+ })
186
+ if (operations.length === 0) {
187
+ return null
188
+ }
189
+
190
+ cursor = operations.at(-1)?.id ?? cursor
191
+ return operations
192
+ }
193
+
194
+ const addPiecesJob = createAddPiecesWorker({ localDb, indexerDb, repair, client, state, log })
195
+ const queue = new PQueue({ concurrency })
196
+
197
+ while (true) {
198
+ await queue.onSizeLessThan(concurrency)
199
+ const operations = await getNextBatch()
200
+ if (!operations) break
201
+ batchNumber++
202
+ const currentBatchNumber = batchNumber
203
+ queue.add(() => addPiecesJob({ operations, batchNumber: currentBatchNumber })).catch(console.error)
204
+ }
205
+
206
+ await queue.onIdle()
207
+
208
+ log.success(`Added ${state.completedOperations} pieces, ${state.failedOperations} failed`, { showLog: true })
209
+
210
+ await repairUpdate({
211
+ localDb,
212
+ repairId: repair.id,
213
+ status: state.failedOperations > 0 ? 'failed' : 'completed',
214
+ })
215
+ }
@@ -1,13 +1,15 @@
1
1
  import * as p from '@clack/prompts'
2
2
  import * as SP from '@filoz/synapse-core/sp'
3
3
  import { getPDPProvider } from '@filoz/synapse-core/sp-registry'
4
- import { getRepairDataset } from '../db/get-repair-dataset.ts'
4
+ import { eq } from 'drizzle-orm'
5
+ import { findRepairDataset } from '../db/find-repair-dataset.ts'
5
6
  import { repairUpdate } from '../db/repair-update.ts'
6
7
  import type { RepairSelect } from '../local-schema.ts'
7
8
  import type { IndexerDatabase, LocalDatabase, WalletClient } from '../types.ts'
8
- import { getRepairDatasetMetadata, hashLink } from '../utils.ts'
9
+ import { hashLink } from '../utils.ts'
9
10
 
10
11
  export type EnsureRepairDatasetOptions = {
12
+ source: string
11
13
  localDb: LocalDatabase
12
14
  indexerDb: IndexerDatabase
13
15
  client: WalletClient
@@ -19,7 +21,7 @@ export type EnsureRepairDatasetOptions = {
19
21
  *
20
22
  * @param options - The options for ensuring the repair dataset.
21
23
  */
22
- export async function ensureRepairDataset({ localDb, indexerDb, client, repair }: EnsureRepairDatasetOptions) {
24
+ export async function ensureRepairDataset({ source, localDb, indexerDb, client, repair }: EnsureRepairDatasetOptions) {
23
25
  const log = p.taskLog({
24
26
  title: 'Ensuring repair dataset',
25
27
  })
@@ -31,10 +33,11 @@ export async function ensureRepairDataset({ localDb, indexerDb, client, repair }
31
33
 
32
34
  let datasetId: bigint | null = null
33
35
  // check if dataset already exists
34
- const existingDatasetId = await getRepairDataset({
36
+ const existingDatasetId = await findRepairDataset({
35
37
  indexerDb,
36
38
  providerId: repair.targetProviderId,
37
39
  payer: client.account.address,
40
+ source,
38
41
  })
39
42
 
40
43
  if (existingDatasetId) {
@@ -46,7 +49,10 @@ export async function ensureRepairDataset({ localDb, indexerDb, client, repair }
46
49
  serviceURL: provider.pdp.serviceURL,
47
50
  payer: client.account.address,
48
51
  cdn: false,
49
- metadata: getRepairDatasetMetadata(),
52
+ metadata: {
53
+ source,
54
+ withIPFSIndexing: '',
55
+ },
50
56
  })
51
57
  log.message(`Waiting for data to be created at ${provider.pdp.serviceURL} ${hashLink(txHash, client.chain)}...`)
52
58
  const waitForResult = await SP.waitForCreateDataSet({
@@ -62,3 +68,63 @@ export async function ensureRepairDataset({ localDb, indexerDb, client, repair }
62
68
  })
63
69
  return datasetId
64
70
  }
71
+
72
+ /**
73
+ * Ensure a replication target dataset exists by creating a fresh dataset with source metadata.
74
+ *
75
+ * @param options - The options for ensuring the replication dataset.
76
+ */
77
+ export async function ensureReplicateDataset({ localDb, indexerDb, client, repair }: EnsureRepairDatasetOptions) {
78
+ const log = p.taskLog({
79
+ title: 'Ensuring replication dataset',
80
+ })
81
+
82
+ if (repair.repairDataSetId == null) {
83
+ throw new Error('Missing source dataset ID')
84
+ }
85
+
86
+ const provider = await getPDPProvider(client, {
87
+ providerId: repair.targetProviderId,
88
+ })
89
+
90
+ if (!provider) throw new Error(`Target provider ${repair.targetProviderId} not found or inactive`)
91
+
92
+ if (repair.targetDataSetId != null) {
93
+ log.success(`Data set #${repair.targetDataSetId} already exists at ${provider.pdp.serviceURL}`)
94
+ return repair.targetDataSetId
95
+ }
96
+
97
+ const schema = indexerDb._.fullSchema
98
+ const sourceDataSet = await indexerDb.query.dataSets.findFirst({
99
+ where: eq(schema.dataSets.dataSetId, repair.repairDataSetId),
100
+ columns: {
101
+ metadata: true,
102
+ withCdn: true,
103
+ },
104
+ })
105
+
106
+ if (!sourceDataSet) {
107
+ throw new Error(`Source dataset ${repair.repairDataSetId} not found`)
108
+ }
109
+
110
+ const { txHash, statusUrl } = await SP.createDataSet(client, {
111
+ payee: provider.payee,
112
+ serviceURL: provider.pdp.serviceURL,
113
+ payer: client.account.address,
114
+ cdn: sourceDataSet.withCdn,
115
+ metadata: sourceDataSet.metadata ?? undefined,
116
+ })
117
+ log.message(`Waiting for data to be created at ${provider.pdp.serviceURL} ${hashLink(txHash, client.chain)}...`)
118
+ const waitForResult = await SP.waitForCreateDataSet({
119
+ statusUrl,
120
+ })
121
+ const datasetId = waitForResult.dataSetId
122
+ log.success(`Data set #${datasetId} created at ${provider.pdp.serviceURL}`)
123
+
124
+ await repairUpdate({
125
+ localDb,
126
+ repairId: repair.id,
127
+ targetDataSetId: datasetId,
128
+ })
129
+ return datasetId
130
+ }