@fazetitans/fscopy 1.3.1 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,26 +1,13 @@
1
- import type { Firestore, Query } from 'firebase-admin/firestore';
1
+ import type { Firestore, Query, QueryDocumentSnapshot } from 'firebase-admin/firestore';
2
2
  import type { Config } from '../types.js';
3
3
  import { matchesExcludePattern } from '../utils/patterns.js';
4
- import { getSubcollections } from './helpers.js';
4
+ import { getSubcollections, buildQueryWithFilters } from './helpers.js';
5
+ import { CLEAR_PAGE_SIZE } from '../constants.js';
5
6
 
6
7
  export interface CountProgress {
7
8
  onCollection?: (path: string, count: number) => void;
8
9
  onSubcollection?: (path: string) => void;
9
- }
10
-
11
- function buildQueryWithFilters(
12
- sourceDb: Firestore,
13
- collectionPath: string,
14
- config: Config,
15
- depth: number
16
- ): Query {
17
- let query: Query = sourceDb.collection(collectionPath);
18
- if (depth === 0 && config.where.length > 0) {
19
- for (const filter of config.where) {
20
- query = query.where(filter.field, filter.operator, filter.value);
21
- }
22
- }
23
- return query;
10
+ onSubcollectionExcluded?: (name: string) => void;
24
11
  }
25
12
 
26
13
  async function countWithSubcollections(
@@ -31,30 +18,53 @@ async function countWithSubcollections(
31
18
  depth: number,
32
19
  progress?: CountProgress
33
20
  ): Promise<number> {
34
- // Apply limit at root level only
35
- if (depth === 0 && config.limit > 0) {
36
- query = query.limit(config.limit);
37
- }
21
+ const userLimit = depth === 0 && config.limit > 0 ? config.limit : 0;
22
+ let rootCount = 0;
23
+ let subCount = 0;
24
+ let lastDoc: QueryDocumentSnapshot | undefined;
25
+
26
+ while (true) {
27
+ let pageSize = CLEAR_PAGE_SIZE;
28
+ if (userLimit > 0) {
29
+ const remaining = userLimit - rootCount;
30
+ if (remaining <= 0) break;
31
+ pageSize = Math.min(pageSize, remaining);
32
+ }
38
33
 
39
- const snapshot = await query.select().get();
40
- let count = snapshot.size;
34
+ let pageQuery = query.select().limit(pageSize);
35
+ if (lastDoc) {
36
+ pageQuery = pageQuery.startAfter(lastDoc);
37
+ }
41
38
 
42
- if (depth === 0 && progress?.onCollection) {
43
- progress.onCollection(collectionPath, snapshot.size);
39
+ const snapshot = await pageQuery.get();
40
+ if (snapshot.empty) break;
41
+
42
+ rootCount += snapshot.size;
43
+
44
+ if (depth === 0 && progress?.onCollection) {
45
+ progress.onCollection(collectionPath, rootCount);
46
+ }
47
+
48
+ for (const doc of snapshot.docs) {
49
+ subCount += await countSubcollectionsForDoc(
50
+ sourceDb,
51
+ doc,
52
+ collectionPath,
53
+ config,
54
+ depth,
55
+ progress
56
+ );
57
+ }
58
+
59
+ lastDoc = snapshot.docs[snapshot.docs.length - 1];
60
+ if (snapshot.size < pageSize) break;
44
61
  }
45
62
 
46
- for (const doc of snapshot.docs) {
47
- count += await countSubcollectionsForDoc(
48
- sourceDb,
49
- doc,
50
- collectionPath,
51
- config,
52
- depth,
53
- progress
54
- );
63
+ if (rootCount === 0 && depth === 0 && progress?.onCollection) {
64
+ progress.onCollection(collectionPath, 0);
55
65
  }
56
66
 
57
- return count;
67
+ return rootCount + subCount;
58
68
  }
59
69
 
60
70
  async function countSubcollectionsForDoc(
@@ -65,11 +75,19 @@ async function countSubcollectionsForDoc(
65
75
  depth: number,
66
76
  progress?: CountProgress
67
77
  ): Promise<number> {
78
+ // Respect maxDepth to match transfer behavior
79
+ if (config.maxDepth > 0 && depth >= config.maxDepth) return 0;
80
+
68
81
  let count = 0;
69
82
  const subcollections = await getSubcollections(doc.ref);
70
83
 
71
84
  for (const subId of subcollections) {
72
- if (matchesExcludePattern(subId, config.exclude)) continue;
85
+ if (matchesExcludePattern(subId, config.exclude)) {
86
+ if (progress?.onSubcollectionExcluded) {
87
+ progress.onSubcollectionExcluded(subId);
88
+ }
89
+ continue;
90
+ }
73
91
 
74
92
  const subPath = `${collectionPath}/${doc.id}/${subId}`;
75
93
  if (progress?.onSubcollection) {
@@ -1,4 +1,6 @@
1
- import type { DocumentReference } from 'firebase-admin/firestore';
1
+ import type { DocumentReference, Firestore, Query } from 'firebase-admin/firestore';
2
+ import type { Config } from '../types.js';
3
+ import { matchesExcludePattern } from '../utils/patterns.js';
2
4
 
3
5
  export async function getSubcollections(docRef: DocumentReference): Promise<string[]> {
4
6
  const collections = await docRef.listCollections();
@@ -35,3 +37,36 @@ export function getDestDocId(
35
37
  }
36
38
  return destId;
37
39
  }
40
+
41
+ /**
42
+ * Get non-excluded subcollection IDs for a document.
43
+ * Filters out subcollections matching exclude patterns.
44
+ */
45
+ export async function getFilteredSubcollections(
46
+ docRef: DocumentReference,
47
+ exclude: string[]
48
+ ): Promise<string[]> {
49
+ const subcollections = await getSubcollections(docRef);
50
+ return subcollections.filter((id) => !matchesExcludePattern(id, exclude));
51
+ }
52
+
53
+ /**
54
+ * Build a Firestore query with where filters applied.
55
+ * Filters are only applied at root level (depth === 0).
56
+ */
57
+ export function buildQueryWithFilters(
58
+ sourceDb: Firestore,
59
+ collectionPath: string,
60
+ config: Config,
61
+ depth: number
62
+ ): Query {
63
+ let query: Query = sourceDb.collection(collectionPath);
64
+
65
+ if (depth === 0 && config.where.length > 0) {
66
+ for (const filter of config.where) {
67
+ query = query.where(filter.field, filter.operator, filter.value);
68
+ }
69
+ }
70
+
71
+ return query;
72
+ }
@@ -1,4 +1,10 @@
1
- export { getSubcollections, getDestCollectionPath, getDestDocId } from './helpers.js';
1
+ export {
2
+ getSubcollections,
3
+ getDestCollectionPath,
4
+ getDestDocId,
5
+ getFilteredSubcollections,
6
+ buildQueryWithFilters,
7
+ } from './helpers.js';
2
8
  export { processInParallel, type ParallelResult } from './parallel.js';
3
9
  export { countDocuments, type CountProgress } from './count.js';
4
10
  export { clearCollection, deleteOrphanDocuments, type DeleteOrphansProgress } from './clear.js';
@@ -1,14 +1,23 @@
1
- import type { Firestore, WriteBatch, Query, QueryDocumentSnapshot } from 'firebase-admin/firestore';
1
+ import {
2
+ FieldPath,
3
+ type Firestore,
4
+ type WriteBatch,
5
+ type QueryDocumentSnapshot,
6
+ } from 'firebase-admin/firestore';
2
7
  import type { Config, Stats, TransformFunction, ConflictInfo } from '../types.js';
3
8
  import type { Output } from '../utils/output.js';
4
9
  import type { RateLimiter } from '../utils/rate-limiter.js';
5
10
  import type { ProgressBarWrapper } from '../utils/progress.js';
6
11
  import type { StateSaver } from '../state/index.js';
7
12
  import { withRetry } from '../utils/retry.js';
8
- import { matchesExcludePattern } from '../utils/patterns.js';
9
13
  import { estimateDocumentSize, formatBytes, FIRESTORE_MAX_DOC_SIZE } from '../utils/doc-size.js';
10
14
  import { hashDocumentData, compareHashes } from '../utils/integrity.js';
11
- import { getSubcollections, getDestCollectionPath, getDestDocId } from './helpers.js';
15
+ import {
16
+ getDestCollectionPath,
17
+ getDestDocId,
18
+ getFilteredSubcollections,
19
+ buildQueryWithFilters,
20
+ } from './helpers.js';
12
21
 
13
22
  export interface TransferContext {
14
23
  sourceDb: Firestore;
@@ -21,6 +30,7 @@ export interface TransferContext {
21
30
  stateSaver: StateSaver | null;
22
31
  rateLimiter: RateLimiter | null;
23
32
  conflictList: ConflictInfo[];
33
+ maxDepthWarningsShown: Set<string>;
24
34
  }
25
35
 
26
36
  interface DocProcessResult {
@@ -29,99 +39,6 @@ interface DocProcessResult {
29
39
  markCompleted: boolean;
30
40
  }
31
41
 
32
- // Map of destDocId -> updateTime (as ISO string for comparison)
33
- type UpdateTimeMap = Map<string, string | null>;
34
-
35
- /**
36
- * Capture updateTime of destination documents before processing.
37
- * Returns a map of docId -> updateTime (ISO string, or null if doc doesn't exist).
38
- */
39
- async function captureDestUpdateTimes(
40
- destDb: Firestore,
41
- destCollectionPath: string,
42
- destDocIds: string[]
43
- ): Promise<UpdateTimeMap> {
44
- const updateTimes: UpdateTimeMap = new Map();
45
-
46
- // Batch get dest docs to get their updateTime
47
- const docRefs = destDocIds.map((id) => destDb.collection(destCollectionPath).doc(id));
48
- const docs = await destDb.getAll(...docRefs);
49
-
50
- for (let i = 0; i < docs.length; i++) {
51
- const doc = docs[i];
52
- const docId = destDocIds[i];
53
- if (doc.exists) {
54
- const updateTime = doc.updateTime;
55
- updateTimes.set(docId, updateTime ? updateTime.toDate().toISOString() : null);
56
- } else {
57
- updateTimes.set(docId, null);
58
- }
59
- }
60
-
61
- return updateTimes;
62
- }
63
-
64
- /**
65
- * Check for conflicts by comparing current updateTimes with captured ones.
66
- * Returns array of docIds that have conflicts.
67
- */
68
- async function checkForConflicts(
69
- destDb: Firestore,
70
- destCollectionPath: string,
71
- destDocIds: string[],
72
- capturedTimes: UpdateTimeMap
73
- ): Promise<string[]> {
74
- const conflicts: string[] = [];
75
-
76
- const docRefs = destDocIds.map((id) => destDb.collection(destCollectionPath).doc(id));
77
- const docs = await destDb.getAll(...docRefs);
78
-
79
- for (let i = 0; i < docs.length; i++) {
80
- const doc = docs[i];
81
- const docId = destDocIds[i];
82
- const capturedTime = capturedTimes.get(docId);
83
-
84
- const currentTime =
85
- doc.exists && doc.updateTime ? doc.updateTime.toDate().toISOString() : null;
86
-
87
- // Conflict conditions:
88
- // 1. Doc didn't exist before but now exists (created by someone else)
89
- // 2. Doc was modified (updateTime changed)
90
- // 3. Doc was deleted during transfer (existed before, doesn't now)
91
- const isConflict =
92
- (doc.exists && capturedTime === null) ||
93
- (doc.exists && currentTime !== capturedTime) ||
94
- (!doc.exists && capturedTime !== null);
95
-
96
- if (isConflict) {
97
- conflicts.push(docId);
98
- }
99
- }
100
-
101
- return conflicts;
102
- }
103
-
104
- function buildTransferQuery(
105
- sourceDb: Firestore,
106
- collectionPath: string,
107
- config: Config,
108
- depth: number
109
- ): Query {
110
- let query: Query = sourceDb.collection(collectionPath);
111
-
112
- if (depth === 0 && config.where.length > 0) {
113
- for (const filter of config.where) {
114
- query = query.where(filter.field, filter.operator, filter.value);
115
- }
116
- }
117
-
118
- if (config.limit > 0 && depth === 0) {
119
- query = query.limit(config.limit);
120
- }
121
-
122
- return query;
123
- }
124
-
125
42
  function applyTransform(
126
43
  docData: Record<string, unknown>,
127
44
  doc: QueryDocumentSnapshot,
@@ -152,6 +69,7 @@ function applyTransform(
152
69
  collection: collectionPath,
153
70
  error: errMsg,
154
71
  });
72
+ output.warn(`⚠️ Transform error: ${collectionPath}/${doc.id} skipped (${errMsg})`);
155
73
  stats.errors++;
156
74
  return { success: false, data: null, markCompleted: false };
157
75
  }
@@ -186,9 +104,6 @@ function checkDocumentSize(
186
104
  );
187
105
  }
188
106
 
189
- // Track which collections have already shown the max-depth warning (to avoid spam)
190
- const maxDepthWarningsShown = new Set<string>();
191
-
192
107
  async function processSubcollections(
193
108
  ctx: TransferContext,
194
109
  doc: QueryDocumentSnapshot,
@@ -201,8 +116,8 @@ async function processSubcollections(
201
116
  if (config.maxDepth > 0 && depth >= config.maxDepth) {
202
117
  // Show console warning only once per root collection
203
118
  const rootCollection = collectionPath.split('/')[0];
204
- if (!maxDepthWarningsShown.has(rootCollection)) {
205
- maxDepthWarningsShown.add(rootCollection);
119
+ if (!ctx.maxDepthWarningsShown.has(rootCollection)) {
120
+ ctx.maxDepthWarningsShown.add(rootCollection);
206
121
  output.warn(
207
122
  `⚠️ Subcollections in ${rootCollection} beyond depth ${config.maxDepth} will be skipped`
208
123
  );
@@ -215,15 +130,21 @@ async function processSubcollections(
215
130
  return;
216
131
  }
217
132
 
218
- const subcollections = await getSubcollections(doc.ref);
133
+ const subcollections = await getFilteredSubcollections(doc.ref, config.exclude);
219
134
 
220
135
  for (const subcollectionId of subcollections) {
221
- if (matchesExcludePattern(subcollectionId, config.exclude)) {
222
- output.logInfo(`Skipping excluded subcollection: ${subcollectionId}`);
223
- continue;
136
+ const subcollectionPath = `${collectionPath}/${doc.id}/${subcollectionId}`;
137
+
138
+ // Count subcollection docs with .count() aggregation (1 read instead of N)
139
+ // and dynamically adjust the progress bar total
140
+ if (ctx.progressBar.isActive) {
141
+ const countSnap = await ctx.sourceDb.collection(subcollectionPath).count().get();
142
+ const subCount = countSnap.data().count;
143
+ if (subCount > 0) {
144
+ ctx.progressBar.addToTotal(subCount);
145
+ }
224
146
  }
225
147
 
226
- const subcollectionPath = `${collectionPath}/${doc.id}/${subcollectionId}`;
227
148
  const subCtx = { ...ctx, config: { ...config, limit: 0, where: [] } };
228
149
  await transferCollection(subCtx, subcollectionPath, depth + 1);
229
150
  }
@@ -244,7 +165,18 @@ function processDocument(
244
165
  }
245
166
 
246
167
  const destDocId = getDestDocId(doc.id, config.idPrefix, config.idSuffix);
247
- let docData = doc.data() as Record<string, unknown>;
168
+ let docData: Record<string, unknown>;
169
+ try {
170
+ docData = doc.data() as Record<string, unknown>;
171
+ } catch (error) {
172
+ const errMsg = error instanceof Error ? error.message : String(error);
173
+ output.logError(`Failed to read document data for ${doc.id}`, {
174
+ collection: collectionPath,
175
+ error: errMsg,
176
+ });
177
+ stats.errors++;
178
+ return { skip: true, markCompleted: false };
179
+ }
248
180
 
249
181
  // Apply transform if provided
250
182
  if (transformFn) {
@@ -295,12 +227,31 @@ async function commitBatchWithRetry(
295
227
  await rateLimiter.acquire(batchDocIds.length);
296
228
  }
297
229
 
298
- await withRetry(() => destBatch.commit(), {
299
- retries: config.retries,
300
- onRetry: (attempt, max, err, delay) => {
301
- output.logError(`Retry commit ${attempt}/${max}`, { error: err.message, delay });
302
- },
303
- });
230
+ try {
231
+ await withRetry(() => destBatch.commit(), {
232
+ retries: config.retries,
233
+ onRetry: (attempt, max, err, delay) => {
234
+ output.logError(`Retry commit ${attempt}/${max}`, { error: err.message, delay });
235
+ },
236
+ });
237
+ } catch (error) {
238
+ const err = error instanceof Error ? error : new Error(String(error));
239
+ stats.errors += batchDocIds.length;
240
+ output.logError(
241
+ `Batch commit failed for ${batchDocIds.length} documents after ${config.retries} retries`,
242
+ {
243
+ collection: collectionPath,
244
+ error: err.message,
245
+ docIds: batchDocIds.slice(0, 10),
246
+ }
247
+ );
248
+ output.warn(
249
+ `⚠️ Batch commit failed: ${batchDocIds.length} documents in ${collectionPath} were NOT written (${err.message})`
250
+ );
251
+ // Re-decrement documentsTransferred since they weren't actually committed
252
+ stats.documentsTransferred -= batchDocIds.length;
253
+ return;
254
+ }
304
255
 
305
256
  if (stateSaver && batchDocIds.length > 0) {
306
257
  stateSaver.markBatchCompleted(collectionPath, batchDocIds, stats);
@@ -365,9 +316,40 @@ async function verifyBatchIntegrity(
365
316
  preparedDocs: PreparedDoc[],
366
317
  destDb: Firestore,
367
318
  destCollectionPath: string,
319
+ merge: boolean,
368
320
  stats: Stats,
369
321
  output: Output
370
322
  ): Promise<void> {
323
+ if (!merge) {
324
+ // Non-merge mode: data written is exactly what we sent, no re-fetch needed.
325
+ // The source hash was computed from the same data we wrote, so they must match.
326
+ // We only need to verify the docs exist (spot-check a single doc for commit success).
327
+ const sampleRef = destDb.collection(destCollectionPath).doc(preparedDocs[0].destDocId);
328
+ const sampleDoc = await sampleRef.get();
329
+ if (!sampleDoc.exists) {
330
+ // Commit may have silently failed — verify all
331
+ const docRefs = preparedDocs.map((p) =>
332
+ destDb.collection(destCollectionPath).doc(p.destDocId)
333
+ );
334
+ const destDocs = await destDb.getAll(...docRefs);
335
+ for (let i = 0; i < destDocs.length; i++) {
336
+ if (!destDocs[i].exists) {
337
+ stats.integrityErrors++;
338
+ output.warn(
339
+ `⚠️ Integrity error: ${destCollectionPath}/${preparedDocs[i].destDocId} not found after write`
340
+ );
341
+ output.logError('Integrity verification failed', {
342
+ collection: destCollectionPath,
343
+ docId: preparedDocs[i].destDocId,
344
+ reason: 'document_not_found',
345
+ });
346
+ }
347
+ }
348
+ }
349
+ return;
350
+ }
351
+
352
+ // Merge mode: re-fetch and compare hashes (merged result may differ from source)
371
353
  const docRefs = preparedDocs.map((p) => destDb.collection(destCollectionPath).doc(p.destDocId));
372
354
  const destDocs = await destDb.getAll(...docRefs);
373
355
 
@@ -450,7 +432,14 @@ async function commitPreparedDocs(
450
432
 
451
433
  // Verify integrity after commit if enabled
452
434
  if (config.verifyIntegrity) {
453
- await verifyBatchIntegrity(preparedDocs, destDb, destCollectionPath, stats, output);
435
+ await verifyBatchIntegrity(
436
+ preparedDocs,
437
+ destDb,
438
+ destCollectionPath,
439
+ config.merge,
440
+ stats,
441
+ output
442
+ );
454
443
  }
455
444
  }
456
445
 
@@ -479,50 +468,50 @@ async function processBatch(
479
468
  return [];
480
469
  }
481
470
 
482
- // Step 2: If conflict detection is enabled, capture dest updateTimes and check for conflicts
483
- let docsToWrite = preparedDocs;
471
+ // Step 2: If conflict detection is enabled, check for existing docs in destination
472
+ // Uses chunked 'in' queries with .select() to minimize reads:
473
+ // - Firestore 'in' operator supports max 30 values per query
474
+ // - .select() avoids transferring field data (saves bandwidth)
475
+ // - Only existing docs cost reads; non-existent docs are free (unlike getAll)
484
476
  if (config.detectConflicts && !config.dryRun) {
485
477
  const destDocIds = preparedDocs.map((p) => p.destDocId);
486
- const capturedTimes = await captureDestUpdateTimes(destDb, destCollectionPath, destDocIds);
487
-
488
- // Check for conflicts
489
- const conflictingIds = await checkForConflicts(
490
- destDb,
491
- destCollectionPath,
492
- destDocIds,
493
- capturedTimes
494
- );
495
-
496
- if (conflictingIds.length > 0) {
497
- const conflictSet = new Set(conflictingIds);
498
-
499
- // Filter out conflicting docs
500
- docsToWrite = preparedDocs.filter((p) => !conflictSet.has(p.destDocId));
478
+ const existingIds = new Set<string>();
479
+ const FIRESTORE_IN_LIMIT = 30;
480
+
481
+ for (let i = 0; i < destDocIds.length; i += FIRESTORE_IN_LIMIT) {
482
+ const chunk = destDocIds.slice(i, i + FIRESTORE_IN_LIMIT);
483
+ const snapshot = await destDb
484
+ .collection(destCollectionPath)
485
+ .where(FieldPath.documentId(), 'in', chunk)
486
+ .select()
487
+ .get();
488
+ for (const doc of snapshot.docs) {
489
+ existingIds.add(doc.id);
490
+ }
491
+ }
501
492
 
502
- // Record conflicts
503
- for (const prepared of preparedDocs) {
504
- if (conflictSet.has(prepared.destDocId)) {
505
- stats.conflicts++;
506
- conflictList.push({
507
- collection: destCollectionPath,
508
- docId: prepared.destDocId,
509
- reason: 'Document was modified during transfer',
510
- });
511
- output.warn(
512
- `⚠️ Conflict detected: ${destCollectionPath}/${prepared.destDocId} was modified during transfer`
513
- );
514
- output.logError('Conflict detected', {
515
- collection: destCollectionPath,
516
- docId: prepared.destDocId,
517
- reason: 'modified_during_transfer',
518
- });
519
- }
493
+ if (existingIds.size > 0) {
494
+ for (const docId of existingIds) {
495
+ stats.conflicts++;
496
+ conflictList.push({
497
+ collection: destCollectionPath,
498
+ docId,
499
+ reason: 'Document already exists in destination',
500
+ });
501
+ output.logError('Conflict detected', {
502
+ collection: destCollectionPath,
503
+ docId,
504
+ reason: 'document_exists_in_destination',
505
+ });
520
506
  }
507
+ output.warn(
508
+ `⚠️ ${existingIds.size} document(s) already exist in ${destCollectionPath} and will be overwritten`
509
+ );
521
510
  }
522
511
  }
523
512
 
524
- // Step 3: Commit non-conflicting docs
525
- return commitPreparedDocs(docsToWrite, ctx, collectionPath, destCollectionPath, depth);
513
+ // Step 3: Commit docs
514
+ return commitPreparedDocs(preparedDocs, ctx, collectionPath, destCollectionPath, depth);
526
515
  }
527
516
 
528
517
  export async function transferCollection(
@@ -533,25 +522,50 @@ export async function transferCollection(
533
522
  const { sourceDb, config, stats, output } = ctx;
534
523
  const destCollectionPath = getDestCollectionPath(collectionPath, config.renameCollection);
535
524
 
536
- const query = buildTransferQuery(sourceDb, collectionPath, config, depth);
525
+ const baseQuery = buildQueryWithFilters(sourceDb, collectionPath, config, depth);
526
+ const userLimit = config.limit > 0 && depth === 0 ? config.limit : 0;
537
527
 
538
- const snapshot = await withRetry(() => query.get(), {
539
- retries: config.retries,
540
- onRetry: (attempt, max, err, delay) => {
541
- output.logError(`Retry ${attempt}/${max} for ${collectionPath}`, {
542
- error: err.message,
543
- delay,
544
- });
545
- },
546
- });
528
+ let totalProcessed = 0;
529
+ let lastDoc: QueryDocumentSnapshot | undefined;
530
+
531
+ while (true) {
532
+ // Calculate page size respecting user limit
533
+ let pageSize = config.batchSize;
534
+ if (userLimit > 0) {
535
+ const remaining = userLimit - totalProcessed;
536
+ if (remaining <= 0) break;
537
+ pageSize = Math.min(pageSize, remaining);
538
+ }
539
+
540
+ // Build paginated query
541
+ let pageQuery = baseQuery.limit(pageSize);
542
+ if (lastDoc) {
543
+ pageQuery = pageQuery.startAfter(lastDoc);
544
+ }
545
+
546
+ const snapshot = await withRetry(() => pageQuery.get(), {
547
+ retries: config.retries,
548
+ onRetry: (attempt, max, err, delay) => {
549
+ output.logError(`Retry ${attempt}/${max} for ${collectionPath}`, {
550
+ error: err.message,
551
+ delay,
552
+ });
553
+ },
554
+ });
555
+
556
+ if (snapshot.empty) break;
557
+
558
+ if (totalProcessed === 0) {
559
+ stats.collectionsProcessed++;
560
+ output.logInfo(`Processing collection: ${collectionPath}`);
561
+ }
547
562
 
548
- if (snapshot.empty) return;
563
+ await processBatch(snapshot.docs, ctx, collectionPath, destCollectionPath, depth);
549
564
 
550
- stats.collectionsProcessed++;
551
- output.logInfo(`Processing collection: ${collectionPath}`, { documents: snapshot.size });
565
+ totalProcessed += snapshot.docs.length;
566
+ lastDoc = snapshot.docs[snapshot.docs.length - 1];
552
567
 
553
- for (let i = 0; i < snapshot.docs.length; i += config.batchSize) {
554
- const batch = snapshot.docs.slice(i, i + config.batchSize);
555
- await processBatch(batch, ctx, collectionPath, destCollectionPath, depth);
568
+ // Fewer docs than requested means we've reached the end
569
+ if (snapshot.docs.length < pageSize) break;
556
570
  }
557
571
  }
@@ -2,9 +2,19 @@ import fs from 'node:fs';
2
2
  import path from 'node:path';
3
3
  import type { TransformFunction } from '../types.js';
4
4
 
5
+ const ALLOWED_EXTENSIONS = new Set(['.ts', '.js', '.mjs', '.mts']);
6
+
5
7
  export async function loadTransformFunction(transformPath: string): Promise<TransformFunction> {
6
8
  const absolutePath = path.resolve(transformPath);
7
9
 
10
+ // Validate file extension
11
+ const ext = path.extname(absolutePath).toLowerCase();
12
+ if (!ALLOWED_EXTENSIONS.has(ext)) {
13
+ throw new Error(
14
+ `Transform file must be a JavaScript or TypeScript file (${[...ALLOWED_EXTENSIONS].join(', ')}). Got: "${ext || '(no extension)'}"`
15
+ );
16
+ }
17
+
8
18
  if (!fs.existsSync(absolutePath)) {
9
19
  throw new Error(`Transform file not found: ${absolutePath}`);
10
20
  }
@@ -26,6 +36,8 @@ export async function loadTransformFunction(transformPath: string): Promise<Tran
26
36
  if ((error as Error).message.includes('Transform file')) {
27
37
  throw error;
28
38
  }
29
- throw new Error(`Failed to load transform file: ${(error as Error).message}`);
39
+ throw new Error(`Failed to load transform file: ${(error as Error).message}`, {
40
+ cause: error,
41
+ });
30
42
  }
31
43
  }