@filoz/repair-cli 0.1.2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/dist/package.json +1 -1
  2. package/dist/src/cli.js +2 -0
  3. package/dist/src/cli.js.map +1 -1
  4. package/dist/src/commands/datasets.d.ts +1 -0
  5. package/dist/src/commands/datasets.d.ts.map +1 -1
  6. package/dist/src/commands/datasets.js +14 -3
  7. package/dist/src/commands/datasets.js.map +1 -1
  8. package/dist/src/commands/providers.d.ts +1 -0
  9. package/dist/src/commands/providers.d.ts.map +1 -1
  10. package/dist/src/commands/providers.js.map +1 -1
  11. package/dist/src/commands/repair.d.ts +1 -0
  12. package/dist/src/commands/repair.d.ts.map +1 -1
  13. package/dist/src/commands/repair.js +7 -8
  14. package/dist/src/commands/repair.js.map +1 -1
  15. package/dist/src/commands/replicate.d.ts +24 -0
  16. package/dist/src/commands/replicate.d.ts.map +1 -0
  17. package/dist/src/commands/replicate.js +171 -0
  18. package/dist/src/commands/replicate.js.map +1 -0
  19. package/dist/src/commands/setup.d.ts +0 -1
  20. package/dist/src/commands/setup.d.ts.map +1 -1
  21. package/dist/src/commands/setup.js +17 -4
  22. package/dist/src/commands/setup.js.map +1 -1
  23. package/dist/src/commands/wallet.d.ts +1 -0
  24. package/dist/src/commands/wallet.d.ts.map +1 -1
  25. package/dist/src/db/dedupe-cids.d.ts +22 -0
  26. package/dist/src/db/dedupe-cids.d.ts.map +1 -0
  27. package/dist/src/db/dedupe-cids.js +28 -0
  28. package/dist/src/db/dedupe-cids.js.map +1 -0
  29. package/dist/src/db/find-providers-by-cid.d.ts +9 -0
  30. package/dist/src/db/find-providers-by-cid.d.ts.map +1 -0
  31. package/dist/src/db/{get-providers-by-cid.js → find-providers-by-cid.js} +4 -6
  32. package/dist/src/db/find-providers-by-cid.js.map +1 -0
  33. package/dist/src/db/find-repair-dataset.d.ts +10 -0
  34. package/dist/src/db/find-repair-dataset.d.ts.map +1 -0
  35. package/dist/src/db/{get-repair-dataset.js → find-repair-dataset.js} +3 -4
  36. package/dist/src/db/find-repair-dataset.js.map +1 -0
  37. package/dist/src/db/get-pieces.d.ts +16 -0
  38. package/dist/src/db/get-pieces.d.ts.map +1 -1
  39. package/dist/src/db/get-pieces.js +44 -3
  40. package/dist/src/db/get-pieces.js.map +1 -1
  41. package/dist/src/db/repair-create.d.ts.map +1 -1
  42. package/dist/src/db/repair-create.js +1 -0
  43. package/dist/src/db/repair-create.js.map +1 -1
  44. package/dist/src/db/repair-delete.d.ts.map +1 -1
  45. package/dist/src/db/repair-delete.js +0 -5
  46. package/dist/src/db/repair-delete.js.map +1 -1
  47. package/dist/src/db/replicate-create.d.ts +7 -0
  48. package/dist/src/db/replicate-create.d.ts.map +1 -0
  49. package/dist/src/db/replicate-create.js +78 -0
  50. package/dist/src/db/replicate-create.js.map +1 -0
  51. package/dist/src/db/upsert-operations.js +1 -1
  52. package/dist/src/db/upsert-operations.js.map +1 -1
  53. package/dist/src/local-schema.d.ts +19 -0
  54. package/dist/src/local-schema.d.ts.map +1 -1
  55. package/dist/src/local-schema.js +1 -0
  56. package/dist/src/local-schema.js.map +1 -1
  57. package/dist/src/middleware.d.ts +2 -0
  58. package/dist/src/middleware.d.ts.map +1 -1
  59. package/dist/src/middleware.js +4 -2
  60. package/dist/src/middleware.js.map +1 -1
  61. package/dist/src/pipeline/add-pieces.d.ts +12 -0
  62. package/dist/src/pipeline/add-pieces.d.ts.map +1 -0
  63. package/dist/src/pipeline/add-pieces.js +143 -0
  64. package/dist/src/pipeline/add-pieces.js.map +1 -0
  65. package/dist/src/pipeline/create-datasets.d.ts +3 -1
  66. package/dist/src/pipeline/create-datasets.d.ts.map +1 -1
  67. package/dist/src/pipeline/create-datasets.js +57 -5
  68. package/dist/src/pipeline/create-datasets.js.map +1 -1
  69. package/dist/src/utils.d.ts +32 -2
  70. package/dist/src/utils.d.ts.map +1 -1
  71. package/dist/src/utils.js +40 -7
  72. package/dist/src/utils.js.map +1 -1
  73. package/package.json +1 -1
  74. package/readme.md +110 -7
  75. package/src/cli.ts +2 -0
  76. package/src/commands/datasets.ts +15 -4
  77. package/src/commands/providers.ts +0 -1
  78. package/src/commands/repair.ts +12 -8
  79. package/src/commands/replicate.ts +183 -0
  80. package/src/commands/setup.ts +18 -4
  81. package/src/db/dedupe-cids.ts +49 -0
  82. package/src/db/{get-providers-by-cid.ts → find-providers-by-cid.ts} +5 -10
  83. package/src/db/{get-repair-dataset.ts → find-repair-dataset.ts} +6 -5
  84. package/src/db/get-pieces.ts +105 -3
  85. package/src/db/get-target-dataset.ts +1 -1
  86. package/src/db/repair-create.ts +1 -0
  87. package/src/db/repair-delete.ts +0 -5
  88. package/src/db/replicate-create.ts +106 -0
  89. package/src/db/upsert-operations.ts +1 -1
  90. package/src/local-schema.ts +1 -0
  91. package/src/middleware.ts +4 -2
  92. package/src/pipeline/add-pieces.ts +215 -0
  93. package/src/pipeline/create-datasets.ts +71 -5
  94. package/src/utils.ts +49 -10
  95. package/dist/src/db/get-providers-by-cid.d.ts +0 -10
  96. package/dist/src/db/get-providers-by-cid.d.ts.map +0 -1
  97. package/dist/src/db/get-providers-by-cid.js.map +0 -1
  98. package/dist/src/db/get-repair-dataset.d.ts +0 -9
  99. package/dist/src/db/get-repair-dataset.d.ts.map +0 -1
  100. package/dist/src/db/get-repair-dataset.js.map +0 -1
  101. package/dist/src/db/sync-pieces-onchain.d.ts +0 -10
  102. package/dist/src/db/sync-pieces-onchain.d.ts.map +0 -1
  103. package/dist/src/db/sync-pieces-onchain.js +0 -35
  104. package/dist/src/db/sync-pieces-onchain.js.map +0 -1
  105. package/dist/src/pipeline/pull.d.ts +0 -30
  106. package/dist/src/pipeline/pull.d.ts.map +0 -1
  107. package/dist/src/pipeline/pull.js +0 -169
  108. package/dist/src/pipeline/pull.js.map +0 -1
  109. package/src/db/sync-pieces-onchain.ts +0 -53
  110. package/src/pipeline/pull.ts +0 -255
package/readme.md CHANGED
@@ -4,10 +4,10 @@
4
4
 
5
5
  > Early repair for faulty service providers and datasets
6
6
 
7
- The `repair` CLI helps prepare and run repair jobs that move pieces away from a faulty Filecoin service provider and into a target PDP provider. It uses:
7
+ The `repair` CLI helps prepare and run repair and replication jobs on Filecoin PDP datasets. It uses:
8
8
 
9
9
  - an indexer Postgres database as the read-only source of providers, datasets, and pieces
10
- - a local SQLite database to track repair jobs and per-piece operations
10
+ - a local SQLite database to track repair and replication jobs and per-piece operations
11
11
  - a configured Filecoin wallet to create datasets and submit on-chain add-piece transactions
12
12
 
13
13
  ## Installation
@@ -37,6 +37,7 @@ Setup prompts for:
37
37
  - calibration indexer Postgres URL
38
38
  - chain, either Filecoin Mainnet `314` or Filecoin Calibration `314159`
39
39
  - local SQLite database path
40
+ - dataset source label, used when creating repair target datasets (defaults to `early-repair`)
40
41
 
41
42
  The command stores these values in the CLI config and runs the local SQLite schema migration. It returns the configured wallet address.
42
43
 
@@ -58,7 +59,7 @@ Interactive configuration and local database setup.
58
59
  repair setup
59
60
  ```
60
61
 
61
- Use this whenever you need to initialize the CLI, change the active chain, update indexer URLs, or move the local SQLite database.
62
+ Use this whenever you need to initialize the CLI, change the active chain, update indexer URLs, change the dataset source label, or move the local SQLite database.
62
63
 
63
64
  ### `repair wallet fund`
64
65
 
@@ -140,6 +141,26 @@ Filter by provider ID:
140
141
  repair datasets list --provider-id 123
141
142
  ```
142
143
 
144
+ ### `repair datasets show <id>`
145
+
146
+ Shows a dataset from the configured indexer.
147
+
148
+ ```bash
149
+ repair datasets show 42
150
+ ```
151
+
152
+ The output includes the dataset record, its provider, and active pieces.
153
+
154
+ ### `repair datasets terminate <id>`
155
+
156
+ Terminates a dataset owned by the configured wallet.
157
+
158
+ ```bash
159
+ repair datasets terminate 42
160
+ ```
161
+
162
+ The command submits the on-chain terminate transaction, waits for it to be mined, and returns the result.
163
+
143
164
  ### `repair repair create`
144
165
 
145
166
  Creates a local repair plan for a source provider and a target provider.
@@ -165,12 +186,14 @@ repair repair list
165
186
  Each repair includes:
166
187
 
167
188
  - repair ID and status
168
- - source provider ID
189
+ - repair provider ID
169
190
  - target provider ID and target provider URL
170
191
  - target dataset ID, when one has been created or found
171
192
  - block number used when the repair was created
172
193
  - total operations and counts by `pending`, `failed`, `completed`, and `skipped`
173
194
 
195
+ This command lists provider-level repairs only. Dataset replications are listed with `repair replicate list`.
196
+
174
197
  ### `repair repair run <repairId>`
175
198
 
176
199
  Runs a pending repair.
@@ -179,18 +202,17 @@ Runs a pending repair.
179
202
  repair repair run 1
180
203
  ```
181
204
 
182
- The command first ensures the target repair dataset exists for the configured wallet and target provider. If no matching dataset exists, it creates one with IPFS indexing enabled and CDN disabled. Then it processes pending `add_piece` operations by pulling pieces from alternate providers into the target provider and committing them on-chain.
205
+ The command first ensures the target repair dataset exists for the configured wallet and target provider. If no matching dataset exists, it creates one with IPFS indexing enabled and CDN disabled, using the configured source label. Then it processes pending `add_piece` operations by pulling pieces from alternate providers into the target provider and committing them on-chain.
183
206
 
184
207
  Options:
185
208
 
186
209
  - `--concurrency <number>` controls how many pull batches run at once. Defaults to `4`.
187
210
  - `--batch-size <number>` controls the maximum number of `add_piece` operations per batch. Defaults to `40`.
188
- - `--reset` retries failed `add_piece` operations as well as pending operations.
189
211
 
190
212
  Example:
191
213
 
192
214
  ```bash
193
- repair repair run 1 --concurrency 8 --batch-size 40 --reset
215
+ repair repair run 1 --concurrency 8 --batch-size 40
194
216
  ```
195
217
 
196
218
  ### `repair repair delete <repairId>`
@@ -203,6 +225,68 @@ repair repair delete 1
203
225
 
204
226
  This only deletes local SQLite state. It does not delete on-chain datasets or remove pieces from a provider.
205
227
 
228
+ ### `repair replicate create`
229
+
230
+ Creates a local replication plan for a source dataset and a target provider.
231
+
232
+ ```bash
233
+ repair replicate create --data-set-id 42 --target-provider-id 202
234
+ ```
235
+
236
+ `--data-set-id` is the source dataset whose pieces should be replicated.
237
+
238
+ `--target-provider-id` is the provider that should receive the replicated pieces. It must be different from the source dataset's provider.
239
+
240
+ The command snapshots the current chain block number, creates a local replication row, and creates local `add_piece` operations for every active piece in the source dataset. Pieces are pulled from the source dataset's provider, not from alternate providers. The command returns a `replicateId`.
241
+
242
+ ### `repair replicate list`
243
+
244
+ Lists local dataset replications.
245
+
246
+ ```bash
247
+ repair replicate list
248
+ ```
249
+
250
+ Each replication includes:
251
+
252
+ - replication ID and status
253
+ - source provider ID and source dataset ID
254
+ - target provider ID and target provider URL
255
+ - target dataset ID, when one has been created
256
+ - block number used when the replication was created
257
+ - total operations and counts by `pending`, `failed`, `completed`, and `skipped`
258
+
259
+ ### `repair replicate run <replicateId>`
260
+
261
+ Runs a pending replication.
262
+
263
+ ```bash
264
+ repair replicate run 1
265
+ ```
266
+
267
+ The command creates a new target dataset on the target provider, copying CDN and metadata settings from the source dataset. Then it processes pending `add_piece` operations by pulling pieces from the source provider into the target provider and committing them on-chain.
268
+
269
+ Options:
270
+
271
+ - `--concurrency <number>` controls how many pull batches run at once. Defaults to `4`.
272
+ - `--batch-size <number>` controls the maximum number of `add_piece` operations per batch. Defaults to `40`.
273
+
274
+ Example:
275
+
276
+ ```bash
277
+ repair replicate run 1 --concurrency 8 --batch-size 40
278
+ ```
279
+
280
+ ### `repair replicate delete <replicateId>`
281
+
282
+ Deletes a local replication and its operations.
283
+
284
+ ```bash
285
+ repair replicate delete 1
286
+ ```
287
+
288
+ This only deletes local SQLite state. It does not delete on-chain datasets or remove pieces from a provider.
289
+
206
290
  ## Typical Workflow
207
291
 
208
292
  1. Configure the CLI.
@@ -238,6 +322,25 @@ repair repair list
238
322
  repair repair run 1
239
323
  ```
240
324
 
325
+ ### Replicating a dataset
326
+
327
+ Use replication when you want to copy an entire dataset to another provider, rather than repairing all pieces on a faulty provider.
328
+
329
+ 1. Find the source dataset.
330
+
331
+ ```bash
332
+ repair datasets list
333
+ repair datasets show 42
334
+ ```
335
+
336
+ 1. Create, inspect, and run the replication.
337
+
338
+ ```bash
339
+ repair replicate create --data-set-id 42 --target-provider-id 202
340
+ repair replicate list
341
+ repair replicate run 1
342
+ ```
343
+
241
344
  ## Contributing
242
345
 
243
346
  Read contributing [guidelines](../../.github/CONTRIBUTING.md).
package/src/cli.ts CHANGED
@@ -3,6 +3,7 @@ import { Cli } from 'incur'
3
3
  import { datasets } from './commands/datasets.ts'
4
4
  import { providers } from './commands/providers.ts'
5
5
  import { repair } from './commands/repair.ts'
6
+ import { replicate } from './commands/replicate.ts'
6
7
  import { setup } from './commands/setup.ts'
7
8
  import { wallet } from './commands/wallet.ts'
8
9
  import { version } from './utils.ts'
@@ -15,6 +16,7 @@ const cli = Cli.create('repair', {
15
16
  cli.command(setup)
16
17
  cli.command(wallet)
17
18
  cli.command(repair)
19
+ cli.command(replicate)
18
20
  cli.command(datasets)
19
21
  cli.command(providers)
20
22
  cli.serve()
@@ -1,7 +1,7 @@
1
1
  import * as p from '@clack/prompts'
2
2
  import * as SP from '@filoz/synapse-core/sp'
3
3
  import { getPdpDataSet } from '@filoz/synapse-core/warm-storage'
4
- import { and, eq } from 'drizzle-orm'
4
+ import { and, asc, eq } from 'drizzle-orm'
5
5
  import { Cli, z } from 'incur'
6
6
  import { contextMiddleware, contextSchema } from '../middleware.ts'
7
7
  import { globalOptions, hashLink } from '../utils.ts'
@@ -32,8 +32,11 @@ datasets.command('list', {
32
32
  where: and(...conditions),
33
33
  with: {
34
34
  provider: true,
35
- pieces: true,
35
+ pieces: {
36
+ where: eq(c.var.indexerDb._.fullSchema.pieces.removed, false),
37
+ },
36
38
  },
39
+ orderBy: [asc(schema.dataSets.dataSetId)],
37
40
  })
38
41
 
39
42
  const datasetsFlattened = datasets.map((dataset) => {
@@ -133,7 +136,16 @@ datasets.command('show', {
133
136
  middleware: [contextMiddleware],
134
137
  run: async (c) => {
135
138
  try {
136
- const dataset = await getPdpDataSet(c.var.client, { dataSetId: c.args.id })
139
+ const dataset = await c.var.indexerDb.query.dataSets.findFirst({
140
+ where: eq(c.var.indexerDb._.fullSchema.dataSets.dataSetId, c.args.id),
141
+ with: {
142
+ provider: true,
143
+ pieces: {
144
+ where: eq(c.var.indexerDb._.fullSchema.pieces.removed, false),
145
+ },
146
+ },
147
+ })
148
+
137
149
  if (!dataset) {
138
150
  return c.error({
139
151
  code: 'DATASET_NOT_FOUND',
@@ -141,7 +153,6 @@ datasets.command('show', {
141
153
  retryable: false,
142
154
  })
143
155
  }
144
-
145
156
  return c.ok(dataset)
146
157
  } catch (error) {
147
158
  if (c.options.debug) {
@@ -30,7 +30,6 @@ providers.command('list', {
30
30
  const filters: (SQLWrapper | undefined)[] = [
31
31
  eq(schema.providers.providerActive, true),
32
32
  eq(schema.providers.pdpProductActive, true),
33
- // or(eq(schema.providers.approved, true), eq(schema.providers.endorsed, true)),
34
33
  ]
35
34
  if (!c.options.all) {
36
35
  filters.push(or(eq(schema.providers.approved, true), eq(schema.providers.endorsed, true)))
@@ -1,11 +1,12 @@
1
- import { and, desc, eq, inArray } from 'drizzle-orm'
1
+ import { and, desc, eq, inArray, isNull } from 'drizzle-orm'
2
2
  import { Cli, z } from 'incur'
3
3
  import { repairCreate } from '../db/repair-create.ts'
4
4
  import { repairDelete } from '../db/repair-delete.ts'
5
5
  import { contextMiddleware, contextSchema } from '../middleware.ts'
6
+ import { runAddPieces } from '../pipeline/add-pieces.ts'
6
7
  import { ensureRepairDataset } from '../pipeline/create-datasets.ts'
7
- import { runPullPiecesPhase } from '../pipeline/pull.ts'
8
8
  import { globalOptions } from '../utils.ts'
9
+
9
10
  export const repair = Cli.create('repair', {
10
11
  description: 'Repair commands',
11
12
  vars: contextSchema,
@@ -50,6 +51,7 @@ repair.command('list', {
50
51
  try {
51
52
  const localSchema = c.var.localDb._.fullSchema
52
53
  const repairs = await c.var.localDb.query.repairs.findMany({
54
+ where: isNull(localSchema.repairs.repairDataSetId),
53
55
  orderBy: [desc(localSchema.repairs.createdAt)],
54
56
  with: {
55
57
  operations: true,
@@ -131,16 +133,19 @@ repair.command('run', {
131
133
  repairId: z.coerce.number().describe('Repair ID to run'),
132
134
  }),
133
135
  options: globalOptions.extend({
134
- concurrency: z.coerce.number().default(4).describe('Concurrency level'),
135
- batchSize: z.coerce.number().default(40).describe('Max add_piece operations per pull batch'),
136
- reset: z.boolean().default(false).describe('Reset the repair'),
136
+ concurrency: z.coerce.number().min(1).max(10).default(4).describe('Concurrency level'),
137
+ batchSize: z.coerce.number().min(1).max(40).default(40).describe('Max pieces per batch'),
137
138
  }),
138
139
  middleware: [contextMiddleware],
139
140
  run: async (c) => {
140
141
  try {
141
142
  const schema = c.var.localDb._.fullSchema
142
143
  const repair = await c.var.localDb.query.repairs.findFirst({
143
- where: and(eq(schema.repairs.id, c.args.repairId), inArray(schema.repairs.status, ['pending'])),
144
+ where: and(
145
+ eq(schema.repairs.id, c.args.repairId),
146
+ inArray(schema.repairs.status, ['pending', 'failed']),
147
+ isNull(schema.repairs.repairDataSetId)
148
+ ),
144
149
  })
145
150
  if (!repair) {
146
151
  return c.error({
@@ -155,12 +160,11 @@ repair.command('run', {
155
160
  repair,
156
161
  })
157
162
 
158
- await runPullPiecesPhase({
163
+ await runAddPieces({
159
164
  ...c.var,
160
165
  repair,
161
166
  concurrency: c.options.concurrency,
162
167
  batchSize: c.options.batchSize,
163
- reset: c.options.reset,
164
168
  })
165
169
  return c.ok({
166
170
  repairId: repair.id,
@@ -0,0 +1,183 @@
1
+ import { and, desc, eq, inArray, isNotNull } from 'drizzle-orm'
2
+ import { Cli, z } from 'incur'
3
+ import { repairDelete } from '../db/repair-delete.ts'
4
+ import { replicateCreate } from '../db/replicate-create.ts'
5
+ import { contextMiddleware, contextSchema } from '../middleware.ts'
6
+ import { runAddPieces } from '../pipeline/add-pieces.ts'
7
+ import { ensureReplicateDataset } from '../pipeline/create-datasets.ts'
8
+ import { globalOptions } from '../utils.ts'
9
+
10
+ export const replicate = Cli.create('replicate', {
11
+ description: 'Replicate dataset commands',
12
+ vars: contextSchema,
13
+ })
14
+
15
+ replicate.command('create', {
16
+ description: 'Create a new dataset replication',
17
+ options: globalOptions.extend({
18
+ dataSetId: z.coerce.bigint().describe('Dataset ID to replicate'),
19
+ targetProviderId: z.coerce.bigint().describe('Target provider ID for replication'),
20
+ }),
21
+ middleware: [contextMiddleware],
22
+ run: async (c) => {
23
+ try {
24
+ const { dataSetId, targetProviderId } = c.options
25
+
26
+ const replicateId = await replicateCreate({
27
+ ...c.var,
28
+ dataSetId,
29
+ targetProviderId,
30
+ })
31
+
32
+ return c.ok({
33
+ replicateId,
34
+ })
35
+ } catch (error) {
36
+ console.error(error)
37
+ return c.error({
38
+ code: 'REPLICATE_FAILED',
39
+ message: error instanceof Error ? error.message : 'Failed to replicate the dataset',
40
+ retryable: true,
41
+ })
42
+ }
43
+ },
44
+ })
45
+
46
+ replicate.command('list', {
47
+ description: 'List all replications',
48
+ options: globalOptions,
49
+ middleware: [contextMiddleware],
50
+ run: async (c) => {
51
+ try {
52
+ const localSchema = c.var.localDb._.fullSchema
53
+ const replications = await c.var.localDb.query.repairs.findMany({
54
+ where: isNotNull(localSchema.repairs.repairDataSetId),
55
+ orderBy: [desc(localSchema.repairs.createdAt)],
56
+ with: {
57
+ operations: true,
58
+ },
59
+ })
60
+
61
+ const replicationsFlattened = replications.map((repair) => {
62
+ const { operations, ...replicationWithoutOperations } = repair
63
+ return {
64
+ id: replicationWithoutOperations.id,
65
+ status: replicationWithoutOperations.status,
66
+ sourceProviderId: replicationWithoutOperations.repairProviderId,
67
+ sourceDataSetId: replicationWithoutOperations.repairDataSetId,
68
+ targetProviderId: replicationWithoutOperations.targetProviderId,
69
+ targetProviderUrl: replicationWithoutOperations.targetProviderUrl,
70
+ targetDataSetId: replicationWithoutOperations.targetDataSetId,
71
+ blockNumber: replicationWithoutOperations.blockNumber,
72
+ operations: operations.length,
73
+ pending: operations.filter((operation) => operation.status === 'pending').length,
74
+ failed: operations.filter((operation) => operation.status === 'failed').length,
75
+ completed: operations.filter((operation) => operation.status === 'completed').length,
76
+ skipped: operations.filter((operation) => operation.status === 'skipped').length,
77
+ }
78
+ })
79
+
80
+ return c.ok({
81
+ replications: replicationsFlattened,
82
+ })
83
+ } catch (error) {
84
+ console.error(error)
85
+ return c.error({
86
+ code: 'REPLICATE_FAILED',
87
+ message: error instanceof Error ? error.message : 'Failed to list replications',
88
+ retryable: true,
89
+ })
90
+ }
91
+ },
92
+ })
93
+
94
+ replicate.command('delete', {
95
+ description: 'Delete a replication',
96
+ args: z.object({
97
+ replicateId: z.coerce.number().describe('Replication ID to delete'),
98
+ }),
99
+ options: globalOptions,
100
+ middleware: [contextMiddleware],
101
+ run: async (c) => {
102
+ try {
103
+ const { deleted, operationsDeleted } = await repairDelete({
104
+ localDb: c.var.localDb,
105
+ repairId: c.args.replicateId,
106
+ })
107
+
108
+ if (!deleted) {
109
+ return c.error({
110
+ code: 'REPLICATE_NOT_FOUND',
111
+ message: 'Replication not found',
112
+ retryable: false,
113
+ })
114
+ }
115
+
116
+ return c.ok({
117
+ replicateId: c.args.replicateId,
118
+ operationsDeleted,
119
+ })
120
+ } catch (error) {
121
+ console.error(error)
122
+ return c.error({
123
+ code: 'REPLICATE_FAILED',
124
+ message: error instanceof Error ? error.message : 'Failed to delete the replication',
125
+ retryable: true,
126
+ })
127
+ }
128
+ },
129
+ })
130
+
131
+ replicate.command('run', {
132
+ description: 'Run a replication',
133
+ args: z.object({
134
+ replicateId: z.coerce.number().describe('Replication ID to run'),
135
+ }),
136
+ options: globalOptions.extend({
137
+ concurrency: z.coerce.number().min(1).max(10).default(4).describe('Concurrency level'),
138
+ batchSize: z.coerce.number().min(1).max(40).default(40).describe('Max pieces per batch'),
139
+ }),
140
+ middleware: [contextMiddleware],
141
+ run: async (c) => {
142
+ try {
143
+ const schema = c.var.localDb._.fullSchema
144
+ const repair = await c.var.localDb.query.repairs.findFirst({
145
+ where: and(
146
+ eq(schema.repairs.id, c.args.replicateId),
147
+ inArray(schema.repairs.status, ['pending', 'failed']),
148
+ isNotNull(schema.repairs.repairDataSetId)
149
+ ),
150
+ })
151
+ if (!repair) {
152
+ return c.error({
153
+ code: 'REPLICATE_NOT_FOUND',
154
+ message: 'Replication not found, it may have already been run or completed',
155
+ retryable: false,
156
+ })
157
+ }
158
+
159
+ await ensureReplicateDataset({
160
+ ...c.var,
161
+ repair,
162
+ source: c.var.source,
163
+ })
164
+
165
+ await runAddPieces({
166
+ ...c.var,
167
+ repair,
168
+ concurrency: c.options.concurrency,
169
+ batchSize: c.options.batchSize,
170
+ })
171
+ return c.ok({
172
+ replicateId: repair.id,
173
+ })
174
+ } catch (error) {
175
+ console.error(error)
176
+ return c.error({
177
+ code: 'REPLICATE_FAILED',
178
+ message: error instanceof Error ? error.message : 'Failed to replicate the dataset',
179
+ retryable: true,
180
+ })
181
+ }
182
+ },
183
+ })
@@ -22,9 +22,7 @@ function validatePostgresUrl(value: string) {
22
22
 
23
23
  export const setup = Cli.create('setup', {
24
24
  description: 'Setup the CLI',
25
- options: globalOptions.extend({
26
- privateKey: z.string().optional().describe('Private key to use'),
27
- }),
25
+ options: globalOptions,
28
26
  run: async (c) => {
29
27
  try {
30
28
  // Private key
@@ -32,7 +30,7 @@ export const setup = Cli.create('setup', {
32
30
  message: 'Enter your private key',
33
31
  validate(value) {
34
32
  if (!value || !/^0x[a-fA-F0-9]{64}$/.test(value)) {
35
- return `Invalid private key!`
33
+ return `Invalid private key! (must be 64 hex characters) starting with 0x`
36
34
  }
37
35
  },
38
36
  initialValue: config.get('privateKey'),
@@ -114,12 +112,28 @@ export const setup = Cli.create('setup', {
114
112
  })
115
113
  }
116
114
 
115
+ const source = await p.text({
116
+ message: 'Enter your source',
117
+ placeholder: 'Dataset source',
118
+ defaultValue: 'early-repair',
119
+ initialValue: config.get('source'),
120
+ withGuide: false,
121
+ })
122
+ if (p.isCancel(source)) {
123
+ return c.error({
124
+ code: 'SETUP_CANCELLED',
125
+ message: 'Setup cancelled',
126
+ retryable: false,
127
+ })
128
+ }
129
+
117
130
  // Set config
118
131
  config.set('privateKey', pk)
119
132
  config.set('indexerMainnetUrl', indexerMainnetUrl)
120
133
  config.set('indexerCalibrationUrl', indexerCalibrationUrl)
121
134
  config.set('chainId', chainId)
122
135
  config.set('dbPath', dbPath)
136
+ config.set('source', source)
123
137
 
124
138
  // setup database
125
139
  const db = await createLocalDatabase(dbPath)
@@ -0,0 +1,49 @@
1
+ import { and, eq, inArray } from 'drizzle-orm'
2
+ import type { OperationSelect } from '../local-schema.ts'
3
+ import type { IndexerDatabase, LocalDatabase } from '../types.ts'
4
+ import { upsertOperations } from './upsert-operations.ts'
5
+
6
+ export type SyncPiecesOnchainOptions = {
7
+ indexerDb: IndexerDatabase
8
+ localDb: LocalDatabase
9
+ dataSetId: bigint
10
+ operations: OperationSelect[]
11
+ }
12
+
13
+ /**
14
+ * Dedupe operations by CID on the target repair dataset.
15
+ * Returns operations that are not onchain.
16
+ */
17
+ export async function dedupeCids({ indexerDb, localDb, dataSetId, operations }: SyncPiecesOnchainOptions) {
18
+ const cids = operations.map((operation) => operation.cid)
19
+ const schema = indexerDb._.fullSchema
20
+
21
+ const rows = await indexerDb
22
+ .select({ cid: schema.pieces.cid })
23
+ .from(schema.pieces)
24
+ .where(
25
+ and(eq(schema.pieces.dataSetId, dataSetId), eq(schema.pieces.removed, false), inArray(schema.pieces.cid, cids))
26
+ )
27
+
28
+ const existingCids = new Set<string>()
29
+ const completedOperations: OperationSelect[] = []
30
+
31
+ for (const row of rows) {
32
+ completedOperations.push(...operations.filter((operation) => operation.cid === row.cid))
33
+ existingCids.add(row.cid)
34
+ }
35
+
36
+ if (completedOperations.length > 0) {
37
+ await upsertOperations({
38
+ localDb,
39
+ operations: completedOperations.map((operation) => ({
40
+ ...operation,
41
+ status: 'completed',
42
+ error: null,
43
+ })),
44
+ })
45
+ }
46
+
47
+ // return operations that are not onchain
48
+ return operations.filter((operation) => !existingCids.has(operation.cid))
49
+ }
@@ -1,10 +1,9 @@
1
- import { and, asc, eq, inArray, isNull, lte, notInArray, or } from 'drizzle-orm'
1
+ import { and, asc, eq, inArray, isNull, lte, or } from 'drizzle-orm'
2
2
  import type { IndexerDatabase, RepairProvider } from '../types.ts'
3
3
 
4
- export type GetProvidersByCidOptions = {
4
+ export type FindProvidersByCidOptions = {
5
5
  indexerDb: IndexerDatabase
6
6
  cids: readonly string[]
7
- excludedProviderIds: readonly bigint[]
8
7
  blockNumber: bigint
9
8
  }
10
9
 
@@ -20,12 +19,11 @@ export type ProvidersByCid = Record<string, RepairProvider[]>
20
19
  * included. Every requested CID is present in the result; CIDs with no alternate providers
21
20
  * map to an empty array.
22
21
  */
23
- export async function getProvidersByCid({
22
+ export async function findProvidersByCid({
24
23
  indexerDb,
25
24
  cids,
26
- excludedProviderIds,
27
25
  blockNumber,
28
- }: GetProvidersByCidOptions): Promise<ProvidersByCid> {
26
+ }: FindProvidersByCidOptions): Promise<ProvidersByCid> {
29
27
  const schema = indexerDb._.fullSchema
30
28
  const providersByCid = Object.fromEntries(cids.map((cid) => [cid, []])) as ProvidersByCid
31
29
  if (cids.length === 0) return providersByCid
@@ -35,11 +33,8 @@ export async function getProvidersByCid({
35
33
  eq(schema.dataSets.deleted, false),
36
34
  or(isNull(schema.dataSets.pdpEndEpoch), lte(schema.dataSets.pdpEndEpoch, blockNumber)),
37
35
  eq(schema.pieces.removed, false),
38
- // or(eq(schema.providers.approved, true), eq(schema.providers.endorsed, true)),
36
+ or(eq(schema.providers.approved, true), eq(schema.providers.endorsed, true)),
39
37
  ]
40
- if (excludedProviderIds.length > 0) {
41
- filters.push(notInArray(schema.dataSets.providerId, [...excludedProviderIds]))
42
- }
43
38
 
44
39
  // Join through datasets because providers own datasets, while pieces only reference dataset IDs.
45
40
  const rows = await indexerDb
@@ -1,12 +1,12 @@
1
1
  import { and, asc, eq, isNull } from 'drizzle-orm'
2
2
  import type { Address } from 'viem'
3
3
  import type { IndexerDatabase } from '../types.ts'
4
- import { EARLY_REPAIR_SOURCE } from '../utils.ts'
5
4
 
6
- export type GetRepairDatasetOptions = {
5
+ export type FindRepairDatasetOptions = {
7
6
  indexerDb: IndexerDatabase
8
7
  providerId: bigint
9
8
  payer: Address
9
+ source: string
10
10
  }
11
11
 
12
12
  /**
@@ -14,11 +14,12 @@ export type GetRepairDatasetOptions = {
14
14
  *
15
15
  * When multiple datasets match, the lowest `dataSetId` is returned.
16
16
  */
17
- export async function getRepairDataset({
17
+ export async function findRepairDataset({
18
18
  indexerDb,
19
19
  providerId,
20
20
  payer,
21
- }: GetRepairDatasetOptions): Promise<bigint | null> {
21
+ source,
22
+ }: FindRepairDatasetOptions): Promise<bigint | null> {
22
23
  const schema = indexerDb._.fullSchema
23
24
 
24
25
  const result = await indexerDb
@@ -32,7 +33,7 @@ export async function getRepairDataset({
32
33
  eq(schema.dataSets.deleted, false),
33
34
  isNull(schema.dataSets.pdpEndEpoch),
34
35
  eq(schema.dataSets.payer, payer.toLowerCase()),
35
- eq(schema.dataSets.source, EARLY_REPAIR_SOURCE),
36
+ eq(schema.dataSets.source, source),
36
37
  eq(schema.dataSets.withCdn, false),
37
38
  eq(schema.dataSets.withIpfsIndexing, true)
38
39
  )