@filoz/repair-cli 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/dist/package.json +3 -2
  2. package/dist/src/cli.js +2 -0
  3. package/dist/src/cli.js.map +1 -1
  4. package/dist/src/commands/datasets.d.ts +16 -1
  5. package/dist/src/commands/datasets.d.ts.map +1 -1
  6. package/dist/src/commands/datasets.js +107 -3
  7. package/dist/src/commands/datasets.js.map +1 -1
  8. package/dist/src/commands/providers.d.ts +16 -1
  9. package/dist/src/commands/providers.d.ts.map +1 -1
  10. package/dist/src/commands/providers.js.map +1 -1
  11. package/dist/src/commands/repair.d.ts +16 -1
  12. package/dist/src/commands/repair.d.ts.map +1 -1
  13. package/dist/src/commands/repair.js +7 -8
  14. package/dist/src/commands/repair.js.map +1 -1
  15. package/dist/src/commands/replicate.d.ts +24 -0
  16. package/dist/src/commands/replicate.d.ts.map +1 -0
  17. package/dist/src/commands/replicate.js +171 -0
  18. package/dist/src/commands/replicate.js.map +1 -0
  19. package/dist/src/commands/setup.d.ts.map +1 -1
  20. package/dist/src/commands/setup.js +15 -0
  21. package/dist/src/commands/setup.js.map +1 -1
  22. package/dist/src/commands/wallet.d.ts +16 -1
  23. package/dist/src/commands/wallet.d.ts.map +1 -1
  24. package/dist/src/db/dedupe-cids.d.ts +22 -0
  25. package/dist/src/db/dedupe-cids.d.ts.map +1 -0
  26. package/dist/src/db/dedupe-cids.js +28 -0
  27. package/dist/src/db/dedupe-cids.js.map +1 -0
  28. package/dist/src/db/find-providers-by-cid.d.ts +9 -0
  29. package/dist/src/db/find-providers-by-cid.d.ts.map +1 -0
  30. package/dist/src/db/{get-providers-by-cid.js → find-providers-by-cid.js} +4 -6
  31. package/dist/src/db/find-providers-by-cid.js.map +1 -0
  32. package/dist/src/db/find-repair-dataset.d.ts +10 -0
  33. package/dist/src/db/find-repair-dataset.d.ts.map +1 -0
  34. package/dist/src/db/{get-repair-dataset.js → find-repair-dataset.js} +8 -6
  35. package/dist/src/db/find-repair-dataset.js.map +1 -0
  36. package/dist/src/db/get-pieces.d.ts +16 -0
  37. package/dist/src/db/get-pieces.d.ts.map +1 -1
  38. package/dist/src/db/get-pieces.js +44 -3
  39. package/dist/src/db/get-pieces.js.map +1 -1
  40. package/dist/src/db/repair-create.d.ts.map +1 -1
  41. package/dist/src/db/repair-create.js +1 -0
  42. package/dist/src/db/repair-create.js.map +1 -1
  43. package/dist/src/db/repair-delete.d.ts.map +1 -1
  44. package/dist/src/db/repair-delete.js +0 -5
  45. package/dist/src/db/repair-delete.js.map +1 -1
  46. package/dist/src/db/replicate-create.d.ts +7 -0
  47. package/dist/src/db/replicate-create.d.ts.map +1 -0
  48. package/dist/src/db/replicate-create.js +78 -0
  49. package/dist/src/db/replicate-create.js.map +1 -0
  50. package/dist/src/db/upsert-operations.js +1 -1
  51. package/dist/src/db/upsert-operations.js.map +1 -1
  52. package/dist/src/local-schema.d.ts +19 -0
  53. package/dist/src/local-schema.d.ts.map +1 -1
  54. package/dist/src/local-schema.js +1 -0
  55. package/dist/src/local-schema.js.map +1 -1
  56. package/dist/src/middleware.d.ts +32 -2
  57. package/dist/src/middleware.d.ts.map +1 -1
  58. package/dist/src/middleware.js +10 -1
  59. package/dist/src/middleware.js.map +1 -1
  60. package/dist/src/pipeline/add-pieces.d.ts +12 -0
  61. package/dist/src/pipeline/add-pieces.d.ts.map +1 -0
  62. package/dist/src/pipeline/add-pieces.js +142 -0
  63. package/dist/src/pipeline/add-pieces.js.map +1 -0
  64. package/dist/src/pipeline/create-datasets.d.ts +3 -1
  65. package/dist/src/pipeline/create-datasets.d.ts.map +1 -1
  66. package/dist/src/pipeline/create-datasets.js +57 -5
  67. package/dist/src/pipeline/create-datasets.js.map +1 -1
  68. package/dist/src/utils.d.ts +166 -120
  69. package/dist/src/utils.d.ts.map +1 -1
  70. package/dist/src/utils.js +50 -25
  71. package/dist/src/utils.js.map +1 -1
  72. package/package.json +3 -2
  73. package/readme.md +110 -7
  74. package/src/cli.ts +2 -0
  75. package/src/commands/datasets.ts +109 -3
  76. package/src/commands/providers.ts +0 -1
  77. package/src/commands/repair.ts +12 -8
  78. package/src/commands/replicate.ts +183 -0
  79. package/src/commands/setup.ts +16 -0
  80. package/src/db/dedupe-cids.ts +49 -0
  81. package/src/db/{get-providers-by-cid.ts → find-providers-by-cid.ts} +5 -10
  82. package/src/db/{get-repair-dataset.ts → find-repair-dataset.ts} +12 -7
  83. package/src/db/get-pieces.ts +105 -3
  84. package/src/db/get-target-dataset.ts +1 -1
  85. package/src/db/repair-create.ts +1 -0
  86. package/src/db/repair-delete.ts +0 -5
  87. package/src/db/replicate-create.ts +106 -0
  88. package/src/db/upsert-operations.ts +1 -1
  89. package/src/local-schema.ts +1 -0
  90. package/src/middleware.ts +12 -1
  91. package/src/pipeline/add-pieces.ts +214 -0
  92. package/src/pipeline/create-datasets.ts +71 -11
  93. package/src/utils.ts +64 -32
  94. package/dist/src/db/get-providers-by-cid.d.ts +0 -10
  95. package/dist/src/db/get-providers-by-cid.d.ts.map +0 -1
  96. package/dist/src/db/get-providers-by-cid.js.map +0 -1
  97. package/dist/src/db/get-repair-dataset.d.ts +0 -9
  98. package/dist/src/db/get-repair-dataset.d.ts.map +0 -1
  99. package/dist/src/db/get-repair-dataset.js.map +0 -1
  100. package/dist/src/db/sync-pieces-onchain.d.ts +0 -10
  101. package/dist/src/db/sync-pieces-onchain.d.ts.map +0 -1
  102. package/dist/src/db/sync-pieces-onchain.js +0 -35
  103. package/dist/src/db/sync-pieces-onchain.js.map +0 -1
  104. package/dist/src/pipeline/pull.d.ts +0 -30
  105. package/dist/src/pipeline/pull.d.ts.map +0 -1
  106. package/dist/src/pipeline/pull.js +0 -169
  107. package/dist/src/pipeline/pull.js.map +0 -1
  108. package/src/db/sync-pieces-onchain.ts +0 -53
  109. package/src/pipeline/pull.ts +0 -255
@@ -0,0 +1,183 @@
1
+ import { and, desc, eq, inArray, isNotNull } from 'drizzle-orm'
2
+ import { Cli, z } from 'incur'
3
+ import { repairDelete } from '../db/repair-delete.ts'
4
+ import { replicateCreate } from '../db/replicate-create.ts'
5
+ import { contextMiddleware, contextSchema } from '../middleware.ts'
6
+ import { runAddPieces } from '../pipeline/add-pieces.ts'
7
+ import { ensureReplicateDataset } from '../pipeline/create-datasets.ts'
8
+ import { globalOptions } from '../utils.ts'
9
+
10
+ export const replicate = Cli.create('replicate', {
11
+ description: 'Replicate dataset commands',
12
+ vars: contextSchema,
13
+ })
14
+
15
+ replicate.command('create', {
16
+ description: 'Create a new dataset replication',
17
+ options: globalOptions.extend({
18
+ dataSetId: z.coerce.bigint().describe('Dataset ID to replicate'),
19
+ targetProviderId: z.coerce.bigint().describe('Target provider ID for replication'),
20
+ }),
21
+ middleware: [contextMiddleware],
22
+ run: async (c) => {
23
+ try {
24
+ const { dataSetId, targetProviderId } = c.options
25
+
26
+ const replicateId = await replicateCreate({
27
+ ...c.var,
28
+ dataSetId,
29
+ targetProviderId,
30
+ })
31
+
32
+ return c.ok({
33
+ replicateId,
34
+ })
35
+ } catch (error) {
36
+ console.error(error)
37
+ return c.error({
38
+ code: 'REPLICATE_FAILED',
39
+ message: error instanceof Error ? error.message : 'Failed to replicate the dataset',
40
+ retryable: true,
41
+ })
42
+ }
43
+ },
44
+ })
45
+
46
+ replicate.command('list', {
47
+ description: 'List all replications',
48
+ options: globalOptions,
49
+ middleware: [contextMiddleware],
50
+ run: async (c) => {
51
+ try {
52
+ const localSchema = c.var.localDb._.fullSchema
53
+ const replications = await c.var.localDb.query.repairs.findMany({
54
+ where: isNotNull(localSchema.repairs.repairDataSetId),
55
+ orderBy: [desc(localSchema.repairs.createdAt)],
56
+ with: {
57
+ operations: true,
58
+ },
59
+ })
60
+
61
+ const replicationsFlattened = replications.map((repair) => {
62
+ const { operations, ...replicationWithoutOperations } = repair
63
+ return {
64
+ id: replicationWithoutOperations.id,
65
+ status: replicationWithoutOperations.status,
66
+ sourceProviderId: replicationWithoutOperations.repairProviderId,
67
+ sourceDataSetId: replicationWithoutOperations.repairDataSetId,
68
+ targetProviderId: replicationWithoutOperations.targetProviderId,
69
+ targetProviderUrl: replicationWithoutOperations.targetProviderUrl,
70
+ targetDataSetId: replicationWithoutOperations.targetDataSetId,
71
+ blockNumber: replicationWithoutOperations.blockNumber,
72
+ operations: operations.length,
73
+ pending: operations.filter((operation) => operation.status === 'pending').length,
74
+ failed: operations.filter((operation) => operation.status === 'failed').length,
75
+ completed: operations.filter((operation) => operation.status === 'completed').length,
76
+ skipped: operations.filter((operation) => operation.status === 'skipped').length,
77
+ }
78
+ })
79
+
80
+ return c.ok({
81
+ replications: replicationsFlattened,
82
+ })
83
+ } catch (error) {
84
+ console.error(error)
85
+ return c.error({
86
+ code: 'REPLICATE_FAILED',
87
+ message: error instanceof Error ? error.message : 'Failed to list replications',
88
+ retryable: true,
89
+ })
90
+ }
91
+ },
92
+ })
93
+
94
+ replicate.command('delete', {
95
+ description: 'Delete a replication',
96
+ args: z.object({
97
+ replicateId: z.coerce.number().describe('Replication ID to delete'),
98
+ }),
99
+ options: globalOptions,
100
+ middleware: [contextMiddleware],
101
+ run: async (c) => {
102
+ try {
103
+ const { deleted, operationsDeleted } = await repairDelete({
104
+ localDb: c.var.localDb,
105
+ repairId: c.args.replicateId,
106
+ })
107
+
108
+ if (!deleted) {
109
+ return c.error({
110
+ code: 'REPLICATE_NOT_FOUND',
111
+ message: 'Replication not found',
112
+ retryable: false,
113
+ })
114
+ }
115
+
116
+ return c.ok({
117
+ replicateId: c.args.replicateId,
118
+ operationsDeleted,
119
+ })
120
+ } catch (error) {
121
+ console.error(error)
122
+ return c.error({
123
+ code: 'REPLICATE_FAILED',
124
+ message: error instanceof Error ? error.message : 'Failed to delete the replication',
125
+ retryable: true,
126
+ })
127
+ }
128
+ },
129
+ })
130
+
131
+ replicate.command('run', {
132
+ description: 'Run a replication',
133
+ args: z.object({
134
+ replicateId: z.coerce.number().describe('Replication ID to run'),
135
+ }),
136
+ options: globalOptions.extend({
137
+ concurrency: z.coerce.number().min(1).max(10).default(4).describe('Concurrency level'),
138
+ batchSize: z.coerce.number().min(1).max(40).default(40).describe('Max pieces per batch'),
139
+ }),
140
+ middleware: [contextMiddleware],
141
+ run: async (c) => {
142
+ try {
143
+ const schema = c.var.localDb._.fullSchema
144
+ const repair = await c.var.localDb.query.repairs.findFirst({
145
+ where: and(
146
+ eq(schema.repairs.id, c.args.replicateId),
147
+ inArray(schema.repairs.status, ['pending', 'failed']),
148
+ isNotNull(schema.repairs.repairDataSetId)
149
+ ),
150
+ })
151
+ if (!repair) {
152
+ return c.error({
153
+ code: 'REPLICATE_NOT_FOUND',
154
+ message: 'Replication not found, it may have already been run or completed',
155
+ retryable: false,
156
+ })
157
+ }
158
+
159
+ await ensureReplicateDataset({
160
+ ...c.var,
161
+ repair,
162
+ source: c.var.source,
163
+ })
164
+
165
+ await runAddPieces({
166
+ ...c.var,
167
+ repair,
168
+ concurrency: c.options.concurrency,
169
+ batchSize: c.options.batchSize,
170
+ })
171
+ return c.ok({
172
+ replicateId: repair.id,
173
+ })
174
+ } catch (error) {
175
+ console.error(error)
176
+ return c.error({
177
+ code: 'REPLICATE_FAILED',
178
+ message: error instanceof Error ? error.message : 'Failed to replicate the dataset',
179
+ retryable: true,
180
+ })
181
+ }
182
+ },
183
+ })
@@ -114,12 +114,28 @@ export const setup = Cli.create('setup', {
114
114
  })
115
115
  }
116
116
 
117
+ const source = await p.text({
118
+ message: 'Enter your source',
119
+ placeholder: 'Dataset source',
120
+ defaultValue: 'early-repair',
121
+ initialValue: config.get('source'),
122
+ withGuide: false,
123
+ })
124
+ if (p.isCancel(source)) {
125
+ return c.error({
126
+ code: 'SETUP_CANCELLED',
127
+ message: 'Setup cancelled',
128
+ retryable: false,
129
+ })
130
+ }
131
+
117
132
  // Set config
118
133
  config.set('privateKey', pk)
119
134
  config.set('indexerMainnetUrl', indexerMainnetUrl)
120
135
  config.set('indexerCalibrationUrl', indexerCalibrationUrl)
121
136
  config.set('chainId', chainId)
122
137
  config.set('dbPath', dbPath)
138
+ config.set('source', source)
123
139
 
124
140
  // setup database
125
141
  const db = await createLocalDatabase(dbPath)
@@ -0,0 +1,49 @@
1
+ import { and, eq, inArray } from 'drizzle-orm'
2
+ import type { OperationSelect } from '../local-schema.ts'
3
+ import type { IndexerDatabase, LocalDatabase } from '../types.ts'
4
+ import { upsertOperations } from './upsert-operations.ts'
5
+
6
+ export type SyncPiecesOnchainOptions = {
7
+ indexerDb: IndexerDatabase
8
+ localDb: LocalDatabase
9
+ dataSetId: bigint
10
+ operations: OperationSelect[]
11
+ }
12
+
13
+ /**
14
+ * Dedupe operations by CID on the target repair dataset.
15
+ * Returns operations that are not onchain.
16
+ */
17
+ export async function dedupeCids({ indexerDb, localDb, dataSetId, operations }: SyncPiecesOnchainOptions) {
18
+ const cids = operations.map((operation) => operation.cid)
19
+ const schema = indexerDb._.fullSchema
20
+
21
+ const rows = await indexerDb
22
+ .select({ cid: schema.pieces.cid })
23
+ .from(schema.pieces)
24
+ .where(
25
+ and(eq(schema.pieces.dataSetId, dataSetId), eq(schema.pieces.removed, false), inArray(schema.pieces.cid, cids))
26
+ )
27
+
28
+ const existingCids = new Set<string>()
29
+ const completedOperations: OperationSelect[] = []
30
+
31
+ for (const row of rows) {
32
+ completedOperations.push(...operations.filter((operation) => operation.cid === row.cid))
33
+ existingCids.add(row.cid)
34
+ }
35
+
36
+ if (completedOperations.length > 0) {
37
+ await upsertOperations({
38
+ localDb,
39
+ operations: completedOperations.map((operation) => ({
40
+ ...operation,
41
+ status: 'completed',
42
+ error: null,
43
+ })),
44
+ })
45
+ }
46
+
47
+ // return operations that are not onchain
48
+ return operations.filter((operation) => !existingCids.has(operation.cid))
49
+ }
@@ -1,10 +1,9 @@
1
- import { and, asc, eq, inArray, isNull, lte, notInArray, or } from 'drizzle-orm'
1
+ import { and, asc, eq, inArray, isNull, lte, or } from 'drizzle-orm'
2
2
  import type { IndexerDatabase, RepairProvider } from '../types.ts'
3
3
 
4
- export type GetProvidersByCidOptions = {
4
+ export type FindProvidersByCidOptions = {
5
5
  indexerDb: IndexerDatabase
6
6
  cids: readonly string[]
7
- excludedProviderIds: readonly bigint[]
8
7
  blockNumber: bigint
9
8
  }
10
9
 
@@ -20,12 +19,11 @@ export type ProvidersByCid = Record<string, RepairProvider[]>
20
19
  * included. Every requested CID is present in the result; CIDs with no alternate providers
21
20
  * map to an empty array.
22
21
  */
23
- export async function getProvidersByCid({
22
+ export async function findProvidersByCid({
24
23
  indexerDb,
25
24
  cids,
26
- excludedProviderIds,
27
25
  blockNumber,
28
- }: GetProvidersByCidOptions): Promise<ProvidersByCid> {
26
+ }: FindProvidersByCidOptions): Promise<ProvidersByCid> {
29
27
  const schema = indexerDb._.fullSchema
30
28
  const providersByCid = Object.fromEntries(cids.map((cid) => [cid, []])) as ProvidersByCid
31
29
  if (cids.length === 0) return providersByCid
@@ -35,11 +33,8 @@ export async function getProvidersByCid({
35
33
  eq(schema.dataSets.deleted, false),
36
34
  or(isNull(schema.dataSets.pdpEndEpoch), lte(schema.dataSets.pdpEndEpoch, blockNumber)),
37
35
  eq(schema.pieces.removed, false),
38
- // or(eq(schema.providers.approved, true), eq(schema.providers.endorsed, true)),
36
+ or(eq(schema.providers.approved, true), eq(schema.providers.endorsed, true)),
39
37
  ]
40
- if (excludedProviderIds.length > 0) {
41
- filters.push(notInArray(schema.dataSets.providerId, [...excludedProviderIds]))
42
- }
43
38
 
44
39
  // Join through datasets because providers own datasets, while pieces only reference dataset IDs.
45
40
  const rows = await indexerDb
@@ -1,12 +1,12 @@
1
1
  import { and, asc, eq, isNull } from 'drizzle-orm'
2
2
  import type { Address } from 'viem'
3
3
  import type { IndexerDatabase } from '../types.ts'
4
- import { EARLY_REPAIR_SOURCE } from '../utils.ts'
5
4
 
6
- export type GetRepairDatasetOptions = {
5
+ export type FindRepairDatasetOptions = {
7
6
  indexerDb: IndexerDatabase
8
7
  providerId: bigint
9
8
  payer: Address
9
+ source: string
10
10
  }
11
11
 
12
12
  /**
@@ -14,14 +14,15 @@ export type GetRepairDatasetOptions = {
14
14
  *
15
15
  * When multiple datasets match, the lowest `dataSetId` is returned.
16
16
  */
17
- export async function getRepairDataset({
17
+ export async function findRepairDataset({
18
18
  indexerDb,
19
19
  providerId,
20
20
  payer,
21
- }: GetRepairDatasetOptions): Promise<bigint | null> {
21
+ source,
22
+ }: FindRepairDatasetOptions): Promise<bigint | null> {
22
23
  const schema = indexerDb._.fullSchema
23
24
 
24
- const [row] = await indexerDb
25
+ const result = await indexerDb
25
26
  .select({
26
27
  dataSetId: schema.dataSets.dataSetId,
27
28
  })
@@ -32,7 +33,7 @@ export async function getRepairDataset({
32
33
  eq(schema.dataSets.deleted, false),
33
34
  isNull(schema.dataSets.pdpEndEpoch),
34
35
  eq(schema.dataSets.payer, payer.toLowerCase()),
35
- eq(schema.dataSets.source, EARLY_REPAIR_SOURCE),
36
+ eq(schema.dataSets.source, source),
36
37
  eq(schema.dataSets.withCdn, false),
37
38
  eq(schema.dataSets.withIpfsIndexing, true)
38
39
  )
@@ -40,5 +41,9 @@ export async function getRepairDataset({
40
41
  .orderBy(asc(schema.dataSets.dataSetId))
41
42
  .limit(1)
42
43
 
43
- return row.dataSetId ?? null
44
+ if (result.length === 0) {
45
+ return null
46
+ }
47
+
48
+ return result[0].dataSetId
44
49
  }
@@ -3,7 +3,7 @@ import { and, asc, eq, isNull, lte, or } from 'drizzle-orm'
3
3
  import pMap from 'p-map'
4
4
  import type { OperationInsert } from '../local-schema.ts'
5
5
  import type { IndexerDatabase } from '../types.ts'
6
- import { getProvidersByCid } from './get-providers-by-cid.ts'
6
+ import { findProvidersByCid } from './find-providers-by-cid.ts'
7
7
 
8
8
  /** Default page size when paginating pieces from the indexer. */
9
9
  export const DEFAULT_PIECES_PAGE_SIZE = 3000
@@ -43,6 +43,34 @@ export type GetPiecesPageResult = {
43
43
  /** Options for {@link forEachPiecesPage}; pagination state is managed internally. */
44
44
  export type ForEachPiecesPageOptions = Omit<GetPiecesPageOptions, 'offset' | 'seenCids'>
45
45
 
46
+ /** Options for fetching one page of source-dataset pieces for replication. */
47
+ export type GetDataSetPiecesPageOptions = {
48
+ indexerDb: IndexerDatabase
49
+ /** Source dataset whose pieces are being replicated. */
50
+ dataSetId: bigint
51
+ /** Local job row to attach operations to. */
52
+ repairId: number
53
+ /** Provider URL that serves the source dataset pieces. */
54
+ sourceProviderUrl: string
55
+ /** Max indexer rows per page. Defaults to {@link DEFAULT_PIECES_PAGE_SIZE}. */
56
+ limit?: number
57
+ /** SQL offset for the indexer query. */
58
+ offset?: number
59
+ }
60
+
61
+ /** Result of a single {@link getDataSetPiecesPage} call. */
62
+ export type GetDataSetPiecesPageResult = {
63
+ /** `add_piece` operations ready to insert for this page. */
64
+ operations: OperationInsert[]
65
+ /** Whether another indexer page may exist after this one. */
66
+ hasMore: boolean
67
+ /** Offset to pass as `offset` on the next page. */
68
+ nextOffset: number
69
+ }
70
+
71
+ /** Options for {@link forEachDataSetPiecesPage}; pagination state is managed internally. */
72
+ export type ForEachDataSetPiecesPageOptions = Omit<GetDataSetPiecesPageOptions, 'offset'>
73
+
46
74
  type PieceForOperation = {
47
75
  cid: string
48
76
  metadata: Record<string, string> | null
@@ -107,10 +135,9 @@ export async function getPiecesPage({
107
135
  }
108
136
 
109
137
  // Resolve pull sources in one query per page; exclude the provider being repaired from alternates.
110
- const providersByCid = await getProvidersByCid({
138
+ const providersByCid = await findProvidersByCid({
111
139
  indexerDb,
112
140
  cids: pieces.map((piece) => piece.cid),
113
- excludedProviderIds: [],
114
141
  blockNumber,
115
142
  })
116
143
 
@@ -157,6 +184,53 @@ export async function getPiecesPage({
157
184
  }
158
185
  }
159
186
 
187
+ /**
188
+ * Fetch one page of pieces for a specific dataset and map them to replication operations.
189
+ *
190
+ * Unlike repairs, replication preserves source dataset ordering and does not dedupe repeated CIDs.
191
+ *
192
+ * @param options - Indexer connection, dataset context, and optional pagination state.
193
+ * @returns Operations for this page plus pagination cursors.
194
+ */
195
+ export async function getDataSetPiecesPage({
196
+ indexerDb,
197
+ dataSetId,
198
+ repairId,
199
+ sourceProviderUrl,
200
+ limit = DEFAULT_PIECES_PAGE_SIZE,
201
+ offset = 0,
202
+ }: GetDataSetPiecesPageOptions): Promise<GetDataSetPiecesPageResult> {
203
+ const schema = indexerDb._.fullSchema
204
+ const rows = await indexerDb
205
+ .select({
206
+ cid: schema.pieces.cid,
207
+ metadata: schema.pieces.metadata,
208
+ })
209
+ .from(schema.pieces)
210
+ .where(and(eq(schema.pieces.dataSetId, dataSetId), eq(schema.pieces.removed, false)))
211
+ .orderBy(asc(schema.pieces.pieceId))
212
+ .limit(limit)
213
+ .offset(offset)
214
+
215
+ const now = Date.now()
216
+ const operations: OperationInsert[] = rows.map(({ cid, metadata }) => ({
217
+ repairId,
218
+ type: 'add_piece',
219
+ status: 'pending',
220
+ cid,
221
+ metadata: metadata ?? {},
222
+ alternateProvider: sourceProviderUrl,
223
+ createdAt: now,
224
+ updatedAt: now,
225
+ }))
226
+
227
+ return {
228
+ operations,
229
+ hasMore: rows.length === limit,
230
+ nextOffset: offset + rows.length,
231
+ }
232
+ }
233
+
160
234
  /**
161
235
  * Walk every page of `add_piece` operations for a provider, invoking `onPage` per batch.
162
236
  *
@@ -187,3 +261,31 @@ export async function forEachPiecesPage(
187
261
  hasMore = page.hasMore
188
262
  }
189
263
  }
264
+
265
+ /**
266
+ * Walk every page of `add_piece` operations for a source dataset, invoking `onPage` per batch.
267
+ *
268
+ * Replication pagination intentionally has no CID dedupe state so duplicate pieces are preserved.
269
+ *
270
+ * @param options - Same inputs as {@link getDataSetPiecesPage} except pagination cursor.
271
+ * @param onPage - Async handler for each page result (e.g. batch insert into local DB).
272
+ */
273
+ export async function forEachDataSetPiecesPage(
274
+ options: ForEachDataSetPiecesPageOptions,
275
+ onPage: (page: GetDataSetPiecesPageResult) => Promise<void>
276
+ ): Promise<void> {
277
+ let offset = 0
278
+ let hasMore = true
279
+
280
+ while (hasMore) {
281
+ const page = await getDataSetPiecesPage({
282
+ ...options,
283
+ offset,
284
+ })
285
+
286
+ await onPage(page)
287
+
288
+ offset = page.nextOffset
289
+ hasMore = page.hasMore
290
+ }
291
+ }
@@ -6,7 +6,7 @@ import type { LocalDatabase, WalletClient } from '../types.ts'
6
6
  const targetDatasetCache = new Map<number, getDataSet.OutputType>()
7
7
 
8
8
  /**
9
- * Get the single IPFS-enabled target dataset for a repair.
9
+ * Get a targetdataset for a repair.
10
10
  *
11
11
  * @param options - The options for getting the target dataset.
12
12
  */
@@ -47,6 +47,7 @@ export async function repairCreate(options: RepairCreateOptions): Promise<number
47
47
  .insert(localSchema.repairs)
48
48
  .values({
49
49
  repairProviderId,
50
+ repairDataSetId: null,
50
51
  targetProviderId: targetProvider.providerId,
51
52
  targetProviderUrl: targetProvider.serviceUrl,
52
53
  targetDataSetId: null,
@@ -19,11 +19,6 @@ export async function repairDelete({ localDb, repairId }: RepairDeleteOptions):
19
19
  const repair = await localDb.query.repairs.findFirst({
20
20
  where: eq(localSchema.repairs.id, repairId),
21
21
  columns: { id: true },
22
- with: {
23
- operations: {
24
- columns: { id: true },
25
- },
26
- },
27
22
  })
28
23
 
29
24
  if (!repair) {
@@ -0,0 +1,106 @@
1
+ import { taskLog } from '@clack/prompts'
2
+ import { eq } from 'drizzle-orm'
3
+ import { getBlockNumber } from 'viem/actions'
4
+ import { NoAlternateProviderError, RepairCreationError } from '../error.ts'
5
+ import type { Context } from '../types.ts'
6
+ import { forEachDataSetPiecesPage } from './get-pieces.ts'
7
+ import { getRepairProvider } from './get-repair-provider.ts'
8
+
9
+ export interface ReplicateCreateOptions extends Context {
10
+ dataSetId: bigint
11
+ targetProviderId: bigint
12
+ }
13
+
14
+ /**
15
+ * Prepare a replication job by creating a local row and inserting every source dataset piece.
16
+ *
17
+ * @param options - The options for creating a replication job.
18
+ * @returns The ID of the created local job.
19
+ */
20
+ export async function replicateCreate(options: ReplicateCreateOptions): Promise<number> {
21
+ const { indexerDb, localDb, dataSetId, targetProviderId, client } = options
22
+ const indexerSchema = indexerDb._.fullSchema
23
+ const localSchema = localDb._.fullSchema
24
+ const now = Date.now()
25
+ const blockNumber = await getBlockNumber(client)
26
+
27
+ const log = taskLog({
28
+ title: 'Creating replication',
29
+ limit: 10,
30
+ retainLog: true,
31
+ })
32
+
33
+ const [sourceDataSet] = await indexerDb
34
+ .select({
35
+ dataSetId: indexerSchema.dataSets.dataSetId,
36
+ providerId: indexerSchema.dataSets.providerId,
37
+ deleted: indexerSchema.dataSets.deleted,
38
+ })
39
+ .from(indexerSchema.dataSets)
40
+ .where(eq(indexerSchema.dataSets.dataSetId, dataSetId))
41
+ .limit(1)
42
+
43
+ if (!sourceDataSet || sourceDataSet.deleted) {
44
+ throw new RepairCreationError(`Source dataset ${dataSetId} not found or deleted`)
45
+ }
46
+
47
+ if (targetProviderId === sourceDataSet.providerId) {
48
+ throw new RepairCreationError('Target provider must differ from the source dataset provider')
49
+ }
50
+
51
+ const sourceProvider = await getRepairProvider({
52
+ indexerDb,
53
+ providerId: sourceDataSet.providerId,
54
+ })
55
+
56
+ if (!sourceProvider) {
57
+ throw new RepairCreationError(`Source provider ${sourceDataSet.providerId} not found or inactive`)
58
+ }
59
+
60
+ const targetProvider = await getRepairProvider({
61
+ indexerDb,
62
+ providerId: targetProviderId,
63
+ })
64
+
65
+ if (!targetProvider) {
66
+ throw new NoAlternateProviderError(targetProviderId)
67
+ }
68
+
69
+ const [repair] = await localDb
70
+ .insert(localSchema.repairs)
71
+ .values({
72
+ repairProviderId: sourceProvider.providerId,
73
+ repairDataSetId: sourceDataSet.dataSetId,
74
+ targetProviderId: targetProvider.providerId,
75
+ targetProviderUrl: targetProvider.serviceUrl,
76
+ targetDataSetId: null,
77
+ blockNumber,
78
+ createdAt: now,
79
+ updatedAt: now,
80
+ })
81
+ .returning({ id: localSchema.repairs.id })
82
+
83
+ if (!repair) throw new RepairCreationError('Failed to create replication row')
84
+
85
+ let totalOperations = 0
86
+ await forEachDataSetPiecesPage(
87
+ {
88
+ indexerDb,
89
+ dataSetId: sourceDataSet.dataSetId,
90
+ repairId: repair.id,
91
+ sourceProviderUrl: sourceProvider.serviceUrl,
92
+ },
93
+ async (page) => {
94
+ totalOperations += page.operations.length
95
+
96
+ if (page.operations.length > 0) {
97
+ await localDb.insert(localSchema.operations).values(page.operations)
98
+ }
99
+
100
+ log.message(`Inserted ${page.operations.length} operations`)
101
+ }
102
+ )
103
+
104
+ log.success(`Created replication ${repair.id} with ${totalOperations} operations`, { showLog: true })
105
+ return repair.id
106
+ }
@@ -18,6 +18,6 @@ export async function upsertOperations({ localDb, operations }: UpsertOperations
18
18
  .values(operations.map((operation) => ({ ...operation, updatedAt: now })))
19
19
  .onConflictDoUpdate({
20
20
  target: localDb._.fullSchema.operations.id,
21
- set: buildConflictUpdateColumns(localSchema.operations, ['status', 'error', 'updatedAt']),
21
+ set: buildConflictUpdateColumns(localSchema.operations, ['status', 'error', 'updatedAt', 'result']),
22
22
  })
23
23
  }
@@ -51,6 +51,7 @@ export const repairs = table('repairs', {
51
51
  id: t.int().primaryKey({ autoIncrement: true }),
52
52
  status: t.text().$type<RepairStatus>().notNull().default('pending'),
53
53
  repairProviderId: bigintType('repair_provider_id').notNull(),
54
+ repairDataSetId: bigintType('repair_data_set_id'),
54
55
  targetProviderId: bigintType('target_provider_id').notNull(),
55
56
  targetProviderUrl: t.text('target_provider_url').notNull(),
56
57
  targetDataSetId: bigintType('target_data_set_id'),
package/src/middleware.ts CHANGED
@@ -12,10 +12,20 @@ export const contextSchema = z.object({
12
12
  config: z.custom<typeof config>(),
13
13
  client: z.custom<Client<Transport, Chain, Account>>(),
14
14
  chain: z.custom<Chain>(),
15
+ source: z.string(),
15
16
  })
16
17
 
17
18
  export const contextMiddleware = middleware<typeof contextSchema>(async (c, next) => {
18
- const { dbPath, chainId, indexerMainnetUrl, indexerCalibrationUrl } = config.store
19
+ const { dbPath, chainId, indexerMainnetUrl, indexerCalibrationUrl, source } = config.store
20
+
21
+ if (!dbPath || !chainId || !indexerMainnetUrl || !indexerCalibrationUrl || !source) {
22
+ return c.error({
23
+ code: 'CONFIG_NOT_SET',
24
+ message: 'Config not set. Please run `repair setup` first.',
25
+ retryable: false,
26
+ })
27
+ }
28
+
19
29
  const localDb = await createLocalDatabase(dbPath)
20
30
  const indexerDb = drizzlePostgres(chainId === 314 ? indexerMainnetUrl : indexerCalibrationUrl, {
21
31
  schema: indexerSchema,
@@ -27,6 +37,7 @@ export const contextMiddleware = middleware<typeof contextSchema>(async (c, next
27
37
  c.set('config', config)
28
38
  c.set('client', client)
29
39
  c.set('chain', chain)
40
+ c.set('source', source)
30
41
  await next()
31
42
 
32
43
  localDb.$client.close()