@filoz/repair-cli 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package.json +1 -1
- package/dist/src/cli.js +2 -0
- package/dist/src/cli.js.map +1 -1
- package/dist/src/commands/datasets.d.ts +1 -0
- package/dist/src/commands/datasets.d.ts.map +1 -1
- package/dist/src/commands/datasets.js +14 -3
- package/dist/src/commands/datasets.js.map +1 -1
- package/dist/src/commands/providers.d.ts +1 -0
- package/dist/src/commands/providers.d.ts.map +1 -1
- package/dist/src/commands/providers.js.map +1 -1
- package/dist/src/commands/repair.d.ts +1 -0
- package/dist/src/commands/repair.d.ts.map +1 -1
- package/dist/src/commands/repair.js +7 -8
- package/dist/src/commands/repair.js.map +1 -1
- package/dist/src/commands/replicate.d.ts +24 -0
- package/dist/src/commands/replicate.d.ts.map +1 -0
- package/dist/src/commands/replicate.js +171 -0
- package/dist/src/commands/replicate.js.map +1 -0
- package/dist/src/commands/setup.d.ts.map +1 -1
- package/dist/src/commands/setup.js +15 -0
- package/dist/src/commands/setup.js.map +1 -1
- package/dist/src/commands/wallet.d.ts +1 -0
- package/dist/src/commands/wallet.d.ts.map +1 -1
- package/dist/src/db/dedupe-cids.d.ts +22 -0
- package/dist/src/db/dedupe-cids.d.ts.map +1 -0
- package/dist/src/db/dedupe-cids.js +28 -0
- package/dist/src/db/dedupe-cids.js.map +1 -0
- package/dist/src/db/find-providers-by-cid.d.ts +9 -0
- package/dist/src/db/find-providers-by-cid.d.ts.map +1 -0
- package/dist/src/db/{get-providers-by-cid.js → find-providers-by-cid.js} +4 -6
- package/dist/src/db/find-providers-by-cid.js.map +1 -0
- package/dist/src/db/find-repair-dataset.d.ts +10 -0
- package/dist/src/db/find-repair-dataset.d.ts.map +1 -0
- package/dist/src/db/{get-repair-dataset.js → find-repair-dataset.js} +3 -4
- package/dist/src/db/find-repair-dataset.js.map +1 -0
- package/dist/src/db/get-pieces.d.ts +16 -0
- package/dist/src/db/get-pieces.d.ts.map +1 -1
- package/dist/src/db/get-pieces.js +44 -3
- package/dist/src/db/get-pieces.js.map +1 -1
- package/dist/src/db/repair-create.d.ts.map +1 -1
- package/dist/src/db/repair-create.js +1 -0
- package/dist/src/db/repair-create.js.map +1 -1
- package/dist/src/db/repair-delete.d.ts.map +1 -1
- package/dist/src/db/repair-delete.js +0 -5
- package/dist/src/db/repair-delete.js.map +1 -1
- package/dist/src/db/replicate-create.d.ts +7 -0
- package/dist/src/db/replicate-create.d.ts.map +1 -0
- package/dist/src/db/replicate-create.js +78 -0
- package/dist/src/db/replicate-create.js.map +1 -0
- package/dist/src/db/upsert-operations.js +1 -1
- package/dist/src/db/upsert-operations.js.map +1 -1
- package/dist/src/local-schema.d.ts +19 -0
- package/dist/src/local-schema.d.ts.map +1 -1
- package/dist/src/local-schema.js +1 -0
- package/dist/src/local-schema.js.map +1 -1
- package/dist/src/middleware.d.ts +2 -0
- package/dist/src/middleware.d.ts.map +1 -1
- package/dist/src/middleware.js +4 -2
- package/dist/src/middleware.js.map +1 -1
- package/dist/src/pipeline/add-pieces.d.ts +12 -0
- package/dist/src/pipeline/add-pieces.d.ts.map +1 -0
- package/dist/src/pipeline/add-pieces.js +142 -0
- package/dist/src/pipeline/add-pieces.js.map +1 -0
- package/dist/src/pipeline/create-datasets.d.ts +3 -1
- package/dist/src/pipeline/create-datasets.d.ts.map +1 -1
- package/dist/src/pipeline/create-datasets.js +57 -5
- package/dist/src/pipeline/create-datasets.js.map +1 -1
- package/dist/src/utils.d.ts +32 -2
- package/dist/src/utils.d.ts.map +1 -1
- package/dist/src/utils.js +40 -7
- package/dist/src/utils.js.map +1 -1
- package/package.json +1 -1
- package/readme.md +110 -7
- package/src/cli.ts +2 -0
- package/src/commands/datasets.ts +15 -4
- package/src/commands/providers.ts +0 -1
- package/src/commands/repair.ts +12 -8
- package/src/commands/replicate.ts +183 -0
- package/src/commands/setup.ts +16 -0
- package/src/db/dedupe-cids.ts +49 -0
- package/src/db/{get-providers-by-cid.ts → find-providers-by-cid.ts} +5 -10
- package/src/db/{get-repair-dataset.ts → find-repair-dataset.ts} +6 -5
- package/src/db/get-pieces.ts +105 -3
- package/src/db/get-target-dataset.ts +1 -1
- package/src/db/repair-create.ts +1 -0
- package/src/db/repair-delete.ts +0 -5
- package/src/db/replicate-create.ts +106 -0
- package/src/db/upsert-operations.ts +1 -1
- package/src/local-schema.ts +1 -0
- package/src/middleware.ts +4 -2
- package/src/pipeline/add-pieces.ts +214 -0
- package/src/pipeline/create-datasets.ts +71 -5
- package/src/utils.ts +49 -10
- package/dist/src/db/get-providers-by-cid.d.ts +0 -10
- package/dist/src/db/get-providers-by-cid.d.ts.map +0 -1
- package/dist/src/db/get-providers-by-cid.js.map +0 -1
- package/dist/src/db/get-repair-dataset.d.ts +0 -9
- package/dist/src/db/get-repair-dataset.d.ts.map +0 -1
- package/dist/src/db/get-repair-dataset.js.map +0 -1
- package/dist/src/db/sync-pieces-onchain.d.ts +0 -10
- package/dist/src/db/sync-pieces-onchain.d.ts.map +0 -1
- package/dist/src/db/sync-pieces-onchain.js +0 -35
- package/dist/src/db/sync-pieces-onchain.js.map +0 -1
- package/dist/src/pipeline/pull.d.ts +0 -30
- package/dist/src/pipeline/pull.d.ts.map +0 -1
- package/dist/src/pipeline/pull.js +0 -169
- package/dist/src/pipeline/pull.js.map +0 -1
- package/src/db/sync-pieces-onchain.ts +0 -53
- package/src/pipeline/pull.ts +0 -255
package/src/db/get-pieces.ts
CHANGED
|
@@ -3,7 +3,7 @@ import { and, asc, eq, isNull, lte, or } from 'drizzle-orm'
|
|
|
3
3
|
import pMap from 'p-map'
|
|
4
4
|
import type { OperationInsert } from '../local-schema.ts'
|
|
5
5
|
import type { IndexerDatabase } from '../types.ts'
|
|
6
|
-
import {
|
|
6
|
+
import { findProvidersByCid } from './find-providers-by-cid.ts'
|
|
7
7
|
|
|
8
8
|
/** Default page size when paginating pieces from the indexer. */
|
|
9
9
|
export const DEFAULT_PIECES_PAGE_SIZE = 3000
|
|
@@ -43,6 +43,34 @@ export type GetPiecesPageResult = {
|
|
|
43
43
|
/** Options for {@link forEachPiecesPage}; pagination state is managed internally. */
|
|
44
44
|
export type ForEachPiecesPageOptions = Omit<GetPiecesPageOptions, 'offset' | 'seenCids'>
|
|
45
45
|
|
|
46
|
+
/** Options for fetching one page of source-dataset pieces for replication. */
|
|
47
|
+
export type GetDataSetPiecesPageOptions = {
|
|
48
|
+
indexerDb: IndexerDatabase
|
|
49
|
+
/** Source dataset whose pieces are being replicated. */
|
|
50
|
+
dataSetId: bigint
|
|
51
|
+
/** Local job row to attach operations to. */
|
|
52
|
+
repairId: number
|
|
53
|
+
/** Provider URL that serves the source dataset pieces. */
|
|
54
|
+
sourceProviderUrl: string
|
|
55
|
+
/** Max indexer rows per page. Defaults to {@link DEFAULT_PIECES_PAGE_SIZE}. */
|
|
56
|
+
limit?: number
|
|
57
|
+
/** SQL offset for the indexer query. */
|
|
58
|
+
offset?: number
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/** Result of a single {@link getDataSetPiecesPage} call. */
|
|
62
|
+
export type GetDataSetPiecesPageResult = {
|
|
63
|
+
/** `add_piece` operations ready to insert for this page. */
|
|
64
|
+
operations: OperationInsert[]
|
|
65
|
+
/** Whether another indexer page may exist after this one. */
|
|
66
|
+
hasMore: boolean
|
|
67
|
+
/** Offset to pass as `offset` on the next page. */
|
|
68
|
+
nextOffset: number
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/** Options for {@link forEachDataSetPiecesPage}; pagination state is managed internally. */
|
|
72
|
+
export type ForEachDataSetPiecesPageOptions = Omit<GetDataSetPiecesPageOptions, 'offset'>
|
|
73
|
+
|
|
46
74
|
type PieceForOperation = {
|
|
47
75
|
cid: string
|
|
48
76
|
metadata: Record<string, string> | null
|
|
@@ -107,10 +135,9 @@ export async function getPiecesPage({
|
|
|
107
135
|
}
|
|
108
136
|
|
|
109
137
|
// Resolve pull sources in one query per page; exclude the provider being repaired from alternates.
|
|
110
|
-
const providersByCid = await
|
|
138
|
+
const providersByCid = await findProvidersByCid({
|
|
111
139
|
indexerDb,
|
|
112
140
|
cids: pieces.map((piece) => piece.cid),
|
|
113
|
-
excludedProviderIds: [],
|
|
114
141
|
blockNumber,
|
|
115
142
|
})
|
|
116
143
|
|
|
@@ -157,6 +184,53 @@ export async function getPiecesPage({
|
|
|
157
184
|
}
|
|
158
185
|
}
|
|
159
186
|
|
|
187
|
+
/**
|
|
188
|
+
* Fetch one page of pieces for a specific dataset and map them to replication operations.
|
|
189
|
+
*
|
|
190
|
+
* Unlike repairs, replication preserves source dataset ordering and does not dedupe repeated CIDs.
|
|
191
|
+
*
|
|
192
|
+
* @param options - Indexer connection, dataset context, and optional pagination state.
|
|
193
|
+
* @returns Operations for this page plus pagination cursors.
|
|
194
|
+
*/
|
|
195
|
+
export async function getDataSetPiecesPage({
|
|
196
|
+
indexerDb,
|
|
197
|
+
dataSetId,
|
|
198
|
+
repairId,
|
|
199
|
+
sourceProviderUrl,
|
|
200
|
+
limit = DEFAULT_PIECES_PAGE_SIZE,
|
|
201
|
+
offset = 0,
|
|
202
|
+
}: GetDataSetPiecesPageOptions): Promise<GetDataSetPiecesPageResult> {
|
|
203
|
+
const schema = indexerDb._.fullSchema
|
|
204
|
+
const rows = await indexerDb
|
|
205
|
+
.select({
|
|
206
|
+
cid: schema.pieces.cid,
|
|
207
|
+
metadata: schema.pieces.metadata,
|
|
208
|
+
})
|
|
209
|
+
.from(schema.pieces)
|
|
210
|
+
.where(and(eq(schema.pieces.dataSetId, dataSetId), eq(schema.pieces.removed, false)))
|
|
211
|
+
.orderBy(asc(schema.pieces.pieceId))
|
|
212
|
+
.limit(limit)
|
|
213
|
+
.offset(offset)
|
|
214
|
+
|
|
215
|
+
const now = Date.now()
|
|
216
|
+
const operations: OperationInsert[] = rows.map(({ cid, metadata }) => ({
|
|
217
|
+
repairId,
|
|
218
|
+
type: 'add_piece',
|
|
219
|
+
status: 'pending',
|
|
220
|
+
cid,
|
|
221
|
+
metadata: metadata ?? {},
|
|
222
|
+
alternateProvider: sourceProviderUrl,
|
|
223
|
+
createdAt: now,
|
|
224
|
+
updatedAt: now,
|
|
225
|
+
}))
|
|
226
|
+
|
|
227
|
+
return {
|
|
228
|
+
operations,
|
|
229
|
+
hasMore: rows.length === limit,
|
|
230
|
+
nextOffset: offset + rows.length,
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
160
234
|
/**
|
|
161
235
|
* Walk every page of `add_piece` operations for a provider, invoking `onPage` per batch.
|
|
162
236
|
*
|
|
@@ -187,3 +261,31 @@ export async function forEachPiecesPage(
|
|
|
187
261
|
hasMore = page.hasMore
|
|
188
262
|
}
|
|
189
263
|
}
|
|
264
|
+
|
|
265
|
+
/**
|
|
266
|
+
* Walk every page of `add_piece` operations for a source dataset, invoking `onPage` per batch.
|
|
267
|
+
*
|
|
268
|
+
* Replication pagination intentionally has no CID dedupe state so duplicate pieces are preserved.
|
|
269
|
+
*
|
|
270
|
+
* @param options - Same inputs as {@link getDataSetPiecesPage} except pagination cursor.
|
|
271
|
+
* @param onPage - Async handler for each page result (e.g. batch insert into local DB).
|
|
272
|
+
*/
|
|
273
|
+
export async function forEachDataSetPiecesPage(
|
|
274
|
+
options: ForEachDataSetPiecesPageOptions,
|
|
275
|
+
onPage: (page: GetDataSetPiecesPageResult) => Promise<void>
|
|
276
|
+
): Promise<void> {
|
|
277
|
+
let offset = 0
|
|
278
|
+
let hasMore = true
|
|
279
|
+
|
|
280
|
+
while (hasMore) {
|
|
281
|
+
const page = await getDataSetPiecesPage({
|
|
282
|
+
...options,
|
|
283
|
+
offset,
|
|
284
|
+
})
|
|
285
|
+
|
|
286
|
+
await onPage(page)
|
|
287
|
+
|
|
288
|
+
offset = page.nextOffset
|
|
289
|
+
hasMore = page.hasMore
|
|
290
|
+
}
|
|
291
|
+
}
|
|
@@ -6,7 +6,7 @@ import type { LocalDatabase, WalletClient } from '../types.ts'
|
|
|
6
6
|
const targetDatasetCache = new Map<number, getDataSet.OutputType>()
|
|
7
7
|
|
|
8
8
|
/**
|
|
9
|
-
* Get
|
|
9
|
+
* Get a targetdataset for a repair.
|
|
10
10
|
*
|
|
11
11
|
* @param options - The options for getting the target dataset.
|
|
12
12
|
*/
|
package/src/db/repair-create.ts
CHANGED
|
@@ -47,6 +47,7 @@ export async function repairCreate(options: RepairCreateOptions): Promise<number
|
|
|
47
47
|
.insert(localSchema.repairs)
|
|
48
48
|
.values({
|
|
49
49
|
repairProviderId,
|
|
50
|
+
repairDataSetId: null,
|
|
50
51
|
targetProviderId: targetProvider.providerId,
|
|
51
52
|
targetProviderUrl: targetProvider.serviceUrl,
|
|
52
53
|
targetDataSetId: null,
|
package/src/db/repair-delete.ts
CHANGED
|
@@ -19,11 +19,6 @@ export async function repairDelete({ localDb, repairId }: RepairDeleteOptions):
|
|
|
19
19
|
const repair = await localDb.query.repairs.findFirst({
|
|
20
20
|
where: eq(localSchema.repairs.id, repairId),
|
|
21
21
|
columns: { id: true },
|
|
22
|
-
with: {
|
|
23
|
-
operations: {
|
|
24
|
-
columns: { id: true },
|
|
25
|
-
},
|
|
26
|
-
},
|
|
27
22
|
})
|
|
28
23
|
|
|
29
24
|
if (!repair) {
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import { taskLog } from '@clack/prompts'
|
|
2
|
+
import { eq } from 'drizzle-orm'
|
|
3
|
+
import { getBlockNumber } from 'viem/actions'
|
|
4
|
+
import { NoAlternateProviderError, RepairCreationError } from '../error.ts'
|
|
5
|
+
import type { Context } from '../types.ts'
|
|
6
|
+
import { forEachDataSetPiecesPage } from './get-pieces.ts'
|
|
7
|
+
import { getRepairProvider } from './get-repair-provider.ts'
|
|
8
|
+
|
|
9
|
+
export interface ReplicateCreateOptions extends Context {
|
|
10
|
+
dataSetId: bigint
|
|
11
|
+
targetProviderId: bigint
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Prepare a replication job by creating a local row and inserting every source dataset piece.
|
|
16
|
+
*
|
|
17
|
+
* @param options - The options for creating a replication job.
|
|
18
|
+
* @returns The ID of the created local job.
|
|
19
|
+
*/
|
|
20
|
+
export async function replicateCreate(options: ReplicateCreateOptions): Promise<number> {
|
|
21
|
+
const { indexerDb, localDb, dataSetId, targetProviderId, client } = options
|
|
22
|
+
const indexerSchema = indexerDb._.fullSchema
|
|
23
|
+
const localSchema = localDb._.fullSchema
|
|
24
|
+
const now = Date.now()
|
|
25
|
+
const blockNumber = await getBlockNumber(client)
|
|
26
|
+
|
|
27
|
+
const log = taskLog({
|
|
28
|
+
title: 'Creating replication',
|
|
29
|
+
limit: 10,
|
|
30
|
+
retainLog: true,
|
|
31
|
+
})
|
|
32
|
+
|
|
33
|
+
const [sourceDataSet] = await indexerDb
|
|
34
|
+
.select({
|
|
35
|
+
dataSetId: indexerSchema.dataSets.dataSetId,
|
|
36
|
+
providerId: indexerSchema.dataSets.providerId,
|
|
37
|
+
deleted: indexerSchema.dataSets.deleted,
|
|
38
|
+
})
|
|
39
|
+
.from(indexerSchema.dataSets)
|
|
40
|
+
.where(eq(indexerSchema.dataSets.dataSetId, dataSetId))
|
|
41
|
+
.limit(1)
|
|
42
|
+
|
|
43
|
+
if (!sourceDataSet || sourceDataSet.deleted) {
|
|
44
|
+
throw new RepairCreationError(`Source dataset ${dataSetId} not found or deleted`)
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
if (targetProviderId === sourceDataSet.providerId) {
|
|
48
|
+
throw new RepairCreationError('Target provider must differ from the source dataset provider')
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
const sourceProvider = await getRepairProvider({
|
|
52
|
+
indexerDb,
|
|
53
|
+
providerId: sourceDataSet.providerId,
|
|
54
|
+
})
|
|
55
|
+
|
|
56
|
+
if (!sourceProvider) {
|
|
57
|
+
throw new RepairCreationError(`Source provider ${sourceDataSet.providerId} not found or inactive`)
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const targetProvider = await getRepairProvider({
|
|
61
|
+
indexerDb,
|
|
62
|
+
providerId: targetProviderId,
|
|
63
|
+
})
|
|
64
|
+
|
|
65
|
+
if (!targetProvider) {
|
|
66
|
+
throw new NoAlternateProviderError(targetProviderId)
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
const [repair] = await localDb
|
|
70
|
+
.insert(localSchema.repairs)
|
|
71
|
+
.values({
|
|
72
|
+
repairProviderId: sourceProvider.providerId,
|
|
73
|
+
repairDataSetId: sourceDataSet.dataSetId,
|
|
74
|
+
targetProviderId: targetProvider.providerId,
|
|
75
|
+
targetProviderUrl: targetProvider.serviceUrl,
|
|
76
|
+
targetDataSetId: null,
|
|
77
|
+
blockNumber,
|
|
78
|
+
createdAt: now,
|
|
79
|
+
updatedAt: now,
|
|
80
|
+
})
|
|
81
|
+
.returning({ id: localSchema.repairs.id })
|
|
82
|
+
|
|
83
|
+
if (!repair) throw new RepairCreationError('Failed to create replication row')
|
|
84
|
+
|
|
85
|
+
let totalOperations = 0
|
|
86
|
+
await forEachDataSetPiecesPage(
|
|
87
|
+
{
|
|
88
|
+
indexerDb,
|
|
89
|
+
dataSetId: sourceDataSet.dataSetId,
|
|
90
|
+
repairId: repair.id,
|
|
91
|
+
sourceProviderUrl: sourceProvider.serviceUrl,
|
|
92
|
+
},
|
|
93
|
+
async (page) => {
|
|
94
|
+
totalOperations += page.operations.length
|
|
95
|
+
|
|
96
|
+
if (page.operations.length > 0) {
|
|
97
|
+
await localDb.insert(localSchema.operations).values(page.operations)
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
log.message(`Inserted ${page.operations.length} operations`)
|
|
101
|
+
}
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
log.success(`Created replication ${repair.id} with ${totalOperations} operations`, { showLog: true })
|
|
105
|
+
return repair.id
|
|
106
|
+
}
|
|
@@ -18,6 +18,6 @@ export async function upsertOperations({ localDb, operations }: UpsertOperations
|
|
|
18
18
|
.values(operations.map((operation) => ({ ...operation, updatedAt: now })))
|
|
19
19
|
.onConflictDoUpdate({
|
|
20
20
|
target: localDb._.fullSchema.operations.id,
|
|
21
|
-
set: buildConflictUpdateColumns(localSchema.operations, ['status', 'error', 'updatedAt']),
|
|
21
|
+
set: buildConflictUpdateColumns(localSchema.operations, ['status', 'error', 'updatedAt', 'result']),
|
|
22
22
|
})
|
|
23
23
|
}
|
package/src/local-schema.ts
CHANGED
|
@@ -51,6 +51,7 @@ export const repairs = table('repairs', {
|
|
|
51
51
|
id: t.int().primaryKey({ autoIncrement: true }),
|
|
52
52
|
status: t.text().$type<RepairStatus>().notNull().default('pending'),
|
|
53
53
|
repairProviderId: bigintType('repair_provider_id').notNull(),
|
|
54
|
+
repairDataSetId: bigintType('repair_data_set_id'),
|
|
54
55
|
targetProviderId: bigintType('target_provider_id').notNull(),
|
|
55
56
|
targetProviderUrl: t.text('target_provider_url').notNull(),
|
|
56
57
|
targetDataSetId: bigintType('target_data_set_id'),
|
package/src/middleware.ts
CHANGED
|
@@ -12,12 +12,13 @@ export const contextSchema = z.object({
|
|
|
12
12
|
config: z.custom<typeof config>(),
|
|
13
13
|
client: z.custom<Client<Transport, Chain, Account>>(),
|
|
14
14
|
chain: z.custom<Chain>(),
|
|
15
|
+
source: z.string(),
|
|
15
16
|
})
|
|
16
17
|
|
|
17
18
|
export const contextMiddleware = middleware<typeof contextSchema>(async (c, next) => {
|
|
18
|
-
const { dbPath, chainId, indexerMainnetUrl, indexerCalibrationUrl } = config.store
|
|
19
|
+
const { dbPath, chainId, indexerMainnetUrl, indexerCalibrationUrl, source } = config.store
|
|
19
20
|
|
|
20
|
-
if (!dbPath || !chainId || !indexerMainnetUrl || !indexerCalibrationUrl) {
|
|
21
|
+
if (!dbPath || !chainId || !indexerMainnetUrl || !indexerCalibrationUrl || !source) {
|
|
21
22
|
return c.error({
|
|
22
23
|
code: 'CONFIG_NOT_SET',
|
|
23
24
|
message: 'Config not set. Please run `repair setup` first.',
|
|
@@ -36,6 +37,7 @@ export const contextMiddleware = middleware<typeof contextSchema>(async (c, next
|
|
|
36
37
|
c.set('config', config)
|
|
37
38
|
c.set('client', client)
|
|
38
39
|
c.set('chain', chain)
|
|
40
|
+
c.set('source', source)
|
|
39
41
|
await next()
|
|
40
42
|
|
|
41
43
|
localDb.$client.close()
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
import { taskLog } from '@clack/prompts'
|
|
2
|
+
import * as Piece from '@filoz/synapse-core/piece'
|
|
3
|
+
import * as SP from '@filoz/synapse-core/sp'
|
|
4
|
+
import { and, asc, eq, gt, inArray } from 'drizzle-orm'
|
|
5
|
+
import PQueue from 'p-queue'
|
|
6
|
+
import { dedupeCids } from '../db/dedupe-cids.ts'
|
|
7
|
+
import { getTargetDataset } from '../db/get-target-dataset.ts'
|
|
8
|
+
import { repairUpdate } from '../db/repair-update.ts'
|
|
9
|
+
import { upsertOperations } from '../db/upsert-operations.ts'
|
|
10
|
+
import type { OperationSelect, RepairSelect } from '../local-schema.ts'
|
|
11
|
+
import type { IndexerDatabase, LocalDatabase, WalletClient } from '../types.ts'
|
|
12
|
+
import { excludeOperationsByCid, hashLink, operationsToPullPieces } from '../utils.ts'
|
|
13
|
+
|
|
14
|
+
export type RunPullPiecesPhaseOptions = {
|
|
15
|
+
localDb: LocalDatabase
|
|
16
|
+
indexerDb: IndexerDatabase
|
|
17
|
+
repair: RepairSelect
|
|
18
|
+
concurrency: number
|
|
19
|
+
batchSize: number
|
|
20
|
+
client: WalletClient
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
type CreateAddPiecesWorkerOptions = {
|
|
24
|
+
localDb: LocalDatabase
|
|
25
|
+
indexerDb: IndexerDatabase
|
|
26
|
+
repair: RepairSelect
|
|
27
|
+
client: WalletClient
|
|
28
|
+
state: {
|
|
29
|
+
totalBatches: number
|
|
30
|
+
totalOperations: number
|
|
31
|
+
completedOperations: number
|
|
32
|
+
failedOperations: number
|
|
33
|
+
}
|
|
34
|
+
log: ReturnType<typeof taskLog>
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
type AddPiecesJobOptions = {
|
|
38
|
+
operations: OperationSelect[]
|
|
39
|
+
batchNumber: number
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Create a worker function that adds pieces to the target repair dataset.
|
|
44
|
+
*/
|
|
45
|
+
function createAddPiecesWorker({ localDb, indexerDb, repair, client, state, log }: CreateAddPiecesWorkerOptions) {
|
|
46
|
+
return async (options: AddPiecesJobOptions) => {
|
|
47
|
+
let completedOps = 0
|
|
48
|
+
let failedOps = 0
|
|
49
|
+
let operations: OperationSelect[] = options.operations
|
|
50
|
+
|
|
51
|
+
const group = log.group(`Batch ${options.batchNumber}/${state.totalBatches}`)
|
|
52
|
+
|
|
53
|
+
try {
|
|
54
|
+
const dataset = await getTargetDataset({ localDb, repairId: repair.id, client })
|
|
55
|
+
|
|
56
|
+
// dedupe operations by CID on the target dataset for repairs jobs
|
|
57
|
+
if (repair.repairDataSetId == null) {
|
|
58
|
+
operations = await dedupeCids({ indexerDb, localDb, dataSetId: dataset.dataSetId, operations })
|
|
59
|
+
}
|
|
60
|
+
group.message(`Pulling ${operations.length} pieces...`)
|
|
61
|
+
// pull pieces
|
|
62
|
+
if (operations.length > 0) {
|
|
63
|
+
const pullResult = await SP.waitForPullPieces(client, {
|
|
64
|
+
serviceURL: repair.targetProviderUrl,
|
|
65
|
+
dataSetId: dataset.dataSetId,
|
|
66
|
+
clientDataSetId: dataset.clientDataSetId,
|
|
67
|
+
pieces: operationsToPullPieces(operations),
|
|
68
|
+
timeout: 1000 * 60 * 30,
|
|
69
|
+
onStatus: (status) => {
|
|
70
|
+
const completed = status.pieces.filter((piece) => piece.status === 'complete').length
|
|
71
|
+
const failed = status.pieces.filter((piece) => piece.status === 'failed').length
|
|
72
|
+
group.message(`Pull ${completed} completed, ${failed} failed`)
|
|
73
|
+
},
|
|
74
|
+
})
|
|
75
|
+
|
|
76
|
+
for (const { pieceCid, status } of pullResult.pieces) {
|
|
77
|
+
const cid = pieceCid.toString()
|
|
78
|
+
if (status !== 'complete') {
|
|
79
|
+
state.failedOperations++
|
|
80
|
+
failedOps++
|
|
81
|
+
const { operationToFailed, operationToCommit } = excludeOperationsByCid(operations, cid)
|
|
82
|
+
operations = operationToCommit
|
|
83
|
+
await upsertOperations({
|
|
84
|
+
localDb,
|
|
85
|
+
operations: operationToFailed.map((operation) => ({
|
|
86
|
+
...operation,
|
|
87
|
+
status: 'failed',
|
|
88
|
+
error: `pull failed with status ${status}`,
|
|
89
|
+
})),
|
|
90
|
+
})
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// add pieces
|
|
96
|
+
if (operations.length > 0) {
|
|
97
|
+
group.message(`Adding ${operations.length} pieces...`)
|
|
98
|
+
const addPiecesResult = await SP.addPieces(client, {
|
|
99
|
+
serviceURL: repair.targetProviderUrl,
|
|
100
|
+
dataSetId: dataset.dataSetId,
|
|
101
|
+
clientDataSetId: dataset.clientDataSetId,
|
|
102
|
+
pieces: operations.map((operation) => ({
|
|
103
|
+
pieceCid: Piece.from(operation.cid),
|
|
104
|
+
metadata: operation.metadata,
|
|
105
|
+
})),
|
|
106
|
+
})
|
|
107
|
+
|
|
108
|
+
group.message(`Waiting for add pieces ${hashLink(addPiecesResult.txHash, client.chain)}...`)
|
|
109
|
+
const addPiecesResult2 = await SP.waitForAddPieces(addPiecesResult)
|
|
110
|
+
state.completedOperations += operations.length
|
|
111
|
+
completedOps += operations.length
|
|
112
|
+
await upsertOperations({
|
|
113
|
+
localDb,
|
|
114
|
+
operations: operations.map((operation) => ({
|
|
115
|
+
...operation,
|
|
116
|
+
status: 'completed',
|
|
117
|
+
error: null,
|
|
118
|
+
result: { dataSetId: addPiecesResult2.dataSetId, txHash: addPiecesResult2.txHash },
|
|
119
|
+
})),
|
|
120
|
+
})
|
|
121
|
+
}
|
|
122
|
+
group.message(`Done. ${completedOps} added, ${failedOps} failed`)
|
|
123
|
+
} catch (error) {
|
|
124
|
+
state.failedOperations += operations.length
|
|
125
|
+
const message = error instanceof Error ? error.message : 'Unknown error'
|
|
126
|
+
group.message(`${message.replace(/\n/g, ' ')}`)
|
|
127
|
+
await upsertOperations({
|
|
128
|
+
localDb,
|
|
129
|
+
operations: operations.map((operation) => ({
|
|
130
|
+
...operation,
|
|
131
|
+
status: 'failed',
|
|
132
|
+
error: message,
|
|
133
|
+
})),
|
|
134
|
+
})
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Add pieces to the target dataset.
|
|
141
|
+
*/
|
|
142
|
+
export async function runAddPieces({
|
|
143
|
+
localDb,
|
|
144
|
+
indexerDb,
|
|
145
|
+
repair,
|
|
146
|
+
concurrency,
|
|
147
|
+
batchSize,
|
|
148
|
+
client,
|
|
149
|
+
}: RunPullPiecesPhaseOptions): Promise<void> {
|
|
150
|
+
const localSchema = localDb._.fullSchema
|
|
151
|
+
let cursor = 0
|
|
152
|
+
|
|
153
|
+
const totalOperations = await localDb.$count(
|
|
154
|
+
localSchema.operations,
|
|
155
|
+
and(
|
|
156
|
+
eq(localSchema.operations.repairId, repair.id),
|
|
157
|
+
eq(localSchema.operations.type, 'add_piece'),
|
|
158
|
+
inArray(localSchema.operations.status, ['pending', 'failed'])
|
|
159
|
+
)
|
|
160
|
+
)
|
|
161
|
+
let batchNumber = 0
|
|
162
|
+
const state = {
|
|
163
|
+
totalBatches: Math.ceil(totalOperations / batchSize),
|
|
164
|
+
totalOperations,
|
|
165
|
+
completedOperations: 0,
|
|
166
|
+
failedOperations: 0,
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
const log = taskLog({
|
|
170
|
+
title: 'Adding pieces',
|
|
171
|
+
limit: 1,
|
|
172
|
+
})
|
|
173
|
+
|
|
174
|
+
async function getNextBatch(): Promise<OperationSelect[] | null> {
|
|
175
|
+
const operations = await localDb.query.operations.findMany({
|
|
176
|
+
where: and(
|
|
177
|
+
eq(localSchema.operations.repairId, repair.id),
|
|
178
|
+
eq(localSchema.operations.type, 'add_piece'),
|
|
179
|
+
inArray(localSchema.operations.status, ['pending', 'failed']),
|
|
180
|
+
gt(localSchema.operations.id, cursor)
|
|
181
|
+
),
|
|
182
|
+
orderBy: [asc(localSchema.operations.id)],
|
|
183
|
+
limit: batchSize,
|
|
184
|
+
})
|
|
185
|
+
if (operations.length === 0) {
|
|
186
|
+
return null
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
cursor = operations.at(-1)?.id ?? cursor
|
|
190
|
+
return operations
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
const addPiecesJob = createAddPiecesWorker({ localDb, indexerDb, repair, client, state, log })
|
|
194
|
+
const queue = new PQueue({ concurrency })
|
|
195
|
+
|
|
196
|
+
while (true) {
|
|
197
|
+
await queue.onSizeLessThan(concurrency)
|
|
198
|
+
const operations = await getNextBatch()
|
|
199
|
+
if (!operations) break
|
|
200
|
+
batchNumber++
|
|
201
|
+
const currentBatchNumber = batchNumber
|
|
202
|
+
queue.add(() => addPiecesJob({ operations, batchNumber: currentBatchNumber })).catch(console.error)
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
await queue.onIdle()
|
|
206
|
+
|
|
207
|
+
log.success(`Added ${state.completedOperations} pieces, ${state.failedOperations} failed`, { showLog: true })
|
|
208
|
+
|
|
209
|
+
await repairUpdate({
|
|
210
|
+
localDb,
|
|
211
|
+
repairId: repair.id,
|
|
212
|
+
status: state.failedOperations > 0 ? 'failed' : 'completed',
|
|
213
|
+
})
|
|
214
|
+
}
|
|
@@ -1,13 +1,15 @@
|
|
|
1
1
|
import * as p from '@clack/prompts'
|
|
2
2
|
import * as SP from '@filoz/synapse-core/sp'
|
|
3
3
|
import { getPDPProvider } from '@filoz/synapse-core/sp-registry'
|
|
4
|
-
import {
|
|
4
|
+
import { eq } from 'drizzle-orm'
|
|
5
|
+
import { findRepairDataset } from '../db/find-repair-dataset.ts'
|
|
5
6
|
import { repairUpdate } from '../db/repair-update.ts'
|
|
6
7
|
import type { RepairSelect } from '../local-schema.ts'
|
|
7
8
|
import type { IndexerDatabase, LocalDatabase, WalletClient } from '../types.ts'
|
|
8
|
-
import {
|
|
9
|
+
import { hashLink } from '../utils.ts'
|
|
9
10
|
|
|
10
11
|
export type EnsureRepairDatasetOptions = {
|
|
12
|
+
source: string
|
|
11
13
|
localDb: LocalDatabase
|
|
12
14
|
indexerDb: IndexerDatabase
|
|
13
15
|
client: WalletClient
|
|
@@ -19,7 +21,7 @@ export type EnsureRepairDatasetOptions = {
|
|
|
19
21
|
*
|
|
20
22
|
* @param options - The options for ensuring the repair dataset.
|
|
21
23
|
*/
|
|
22
|
-
export async function ensureRepairDataset({ localDb, indexerDb, client, repair }: EnsureRepairDatasetOptions) {
|
|
24
|
+
export async function ensureRepairDataset({ source, localDb, indexerDb, client, repair }: EnsureRepairDatasetOptions) {
|
|
23
25
|
const log = p.taskLog({
|
|
24
26
|
title: 'Ensuring repair dataset',
|
|
25
27
|
})
|
|
@@ -31,10 +33,11 @@ export async function ensureRepairDataset({ localDb, indexerDb, client, repair }
|
|
|
31
33
|
|
|
32
34
|
let datasetId: bigint | null = null
|
|
33
35
|
// check if dataset already exists
|
|
34
|
-
const existingDatasetId = await
|
|
36
|
+
const existingDatasetId = await findRepairDataset({
|
|
35
37
|
indexerDb,
|
|
36
38
|
providerId: repair.targetProviderId,
|
|
37
39
|
payer: client.account.address,
|
|
40
|
+
source,
|
|
38
41
|
})
|
|
39
42
|
|
|
40
43
|
if (existingDatasetId) {
|
|
@@ -46,7 +49,10 @@ export async function ensureRepairDataset({ localDb, indexerDb, client, repair }
|
|
|
46
49
|
serviceURL: provider.pdp.serviceURL,
|
|
47
50
|
payer: client.account.address,
|
|
48
51
|
cdn: false,
|
|
49
|
-
metadata:
|
|
52
|
+
metadata: {
|
|
53
|
+
source,
|
|
54
|
+
withIPFSIndexing: '',
|
|
55
|
+
},
|
|
50
56
|
})
|
|
51
57
|
log.message(`Waiting for data to be created at ${provider.pdp.serviceURL} ${hashLink(txHash, client.chain)}...`)
|
|
52
58
|
const waitForResult = await SP.waitForCreateDataSet({
|
|
@@ -62,3 +68,63 @@ export async function ensureRepairDataset({ localDb, indexerDb, client, repair }
|
|
|
62
68
|
})
|
|
63
69
|
return datasetId
|
|
64
70
|
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Ensure a replication target dataset exists by creating a fresh dataset with source metadata.
|
|
74
|
+
*
|
|
75
|
+
* @param options - The options for ensuring the replication dataset.
|
|
76
|
+
*/
|
|
77
|
+
export async function ensureReplicateDataset({ localDb, indexerDb, client, repair }: EnsureRepairDatasetOptions) {
|
|
78
|
+
const log = p.taskLog({
|
|
79
|
+
title: 'Ensuring replication dataset',
|
|
80
|
+
})
|
|
81
|
+
|
|
82
|
+
if (repair.repairDataSetId == null) {
|
|
83
|
+
throw new Error('Missing source dataset ID')
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
const provider = await getPDPProvider(client, {
|
|
87
|
+
providerId: repair.targetProviderId,
|
|
88
|
+
})
|
|
89
|
+
|
|
90
|
+
if (!provider) throw new Error(`Target provider ${repair.targetProviderId} not found or inactive`)
|
|
91
|
+
|
|
92
|
+
if (repair.targetDataSetId != null) {
|
|
93
|
+
log.success(`Data set #${repair.targetDataSetId} already exists at ${provider.pdp.serviceURL}`)
|
|
94
|
+
return repair.targetDataSetId
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
const schema = indexerDb._.fullSchema
|
|
98
|
+
const sourceDataSet = await indexerDb.query.dataSets.findFirst({
|
|
99
|
+
where: eq(schema.dataSets.dataSetId, repair.repairDataSetId),
|
|
100
|
+
columns: {
|
|
101
|
+
metadata: true,
|
|
102
|
+
withCdn: true,
|
|
103
|
+
},
|
|
104
|
+
})
|
|
105
|
+
|
|
106
|
+
if (!sourceDataSet) {
|
|
107
|
+
throw new Error(`Source dataset ${repair.repairDataSetId} not found`)
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
const { txHash, statusUrl } = await SP.createDataSet(client, {
|
|
111
|
+
payee: provider.payee,
|
|
112
|
+
serviceURL: provider.pdp.serviceURL,
|
|
113
|
+
payer: client.account.address,
|
|
114
|
+
cdn: sourceDataSet.withCdn,
|
|
115
|
+
metadata: sourceDataSet.metadata ?? undefined,
|
|
116
|
+
})
|
|
117
|
+
log.message(`Waiting for data to be created at ${provider.pdp.serviceURL} ${hashLink(txHash, client.chain)}...`)
|
|
118
|
+
const waitForResult = await SP.waitForCreateDataSet({
|
|
119
|
+
statusUrl,
|
|
120
|
+
})
|
|
121
|
+
const datasetId = waitForResult.dataSetId
|
|
122
|
+
log.success(`Data set #${datasetId} created at ${provider.pdp.serviceURL}`)
|
|
123
|
+
|
|
124
|
+
await repairUpdate({
|
|
125
|
+
localDb,
|
|
126
|
+
repairId: repair.id,
|
|
127
|
+
targetDataSetId: datasetId,
|
|
128
|
+
})
|
|
129
|
+
return datasetId
|
|
130
|
+
}
|