@fazetitans/fscopy 1.1.3 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,193 @@
1
1
  import fs from 'node:fs';
2
- import type { Config, TransferState } from '../types.js';
2
+ import type { Config, TransferState, Stats } from '../types.js';
3
3
 
4
4
  export const STATE_VERSION = 1;
5
5
 
6
+ // =============================================================================
7
+ // CompletedDocsCache - O(1) lookup using Set instead of Array.includes()
8
+ // =============================================================================
9
+
10
+ /**
11
+ * Cache for completed document IDs using Set for O(1) lookups.
12
+ * Wraps the TransferState.completedDocs (which uses arrays for JSON serialization).
13
+ */
14
+ export class CompletedDocsCache {
15
+ private readonly cache: Map<string, Set<string>> = new Map();
16
+
17
+ constructor(completedDocs: Record<string, string[]> = {}) {
18
+ for (const [collection, docIds] of Object.entries(completedDocs)) {
19
+ this.cache.set(collection, new Set(docIds));
20
+ }
21
+ }
22
+
23
+ /**
24
+ * Check if a document is completed. O(1) lookup.
25
+ */
26
+ has(collectionPath: string, docId: string): boolean {
27
+ return this.cache.get(collectionPath)?.has(docId) ?? false;
28
+ }
29
+
30
+ /**
31
+ * Mark a document as completed.
32
+ */
33
+ add(collectionPath: string, docId: string): void {
34
+ let set = this.cache.get(collectionPath);
35
+ if (!set) {
36
+ set = new Set();
37
+ this.cache.set(collectionPath, set);
38
+ }
39
+ set.add(docId);
40
+ }
41
+
42
+ /**
43
+ * Mark multiple documents as completed.
44
+ */
45
+ addBatch(collectionPath: string, docIds: string[]): void {
46
+ let set = this.cache.get(collectionPath);
47
+ if (!set) {
48
+ set = new Set();
49
+ this.cache.set(collectionPath, set);
50
+ }
51
+ for (const docId of docIds) {
52
+ set.add(docId);
53
+ }
54
+ }
55
+
56
+ /**
57
+ * Convert back to Record<string, string[]> for JSON serialization.
58
+ */
59
+ toRecord(): Record<string, string[]> {
60
+ const result: Record<string, string[]> = {};
61
+ for (const [collection, set] of this.cache) {
62
+ result[collection] = Array.from(set);
63
+ }
64
+ return result;
65
+ }
66
+
67
+ /**
68
+ * Get total count of completed documents.
69
+ */
70
+ get totalCount(): number {
71
+ let count = 0;
72
+ for (const set of this.cache.values()) {
73
+ count += set.size;
74
+ }
75
+ return count;
76
+ }
77
+ }
78
+
79
+ // =============================================================================
80
+ // StateSaver
81
+ // =============================================================================
82
+
83
+ export interface StateSaverOptions {
84
+ /** Save every N batches (default: 10) */
85
+ batchInterval?: number;
86
+ /** Save every N milliseconds (default: 5000) */
87
+ timeInterval?: number;
88
+ }
89
+
90
+ const DEFAULT_BATCH_INTERVAL = 10;
91
+ const DEFAULT_TIME_INTERVAL = 5000;
92
+
93
+ /**
94
+ * Throttled state saver with O(1) completed doc lookups.
95
+ * Uses CompletedDocsCache for efficient lookups during transfer.
96
+ * Saves state every N batches OR after X milliseconds, whichever comes first.
97
+ */
98
+ export class StateSaver {
99
+ private lastSaveTime: number = Date.now();
100
+ private batchesSinceLastSave: number = 0;
101
+ private readonly batchInterval: number;
102
+ private readonly timeInterval: number;
103
+ private dirty: boolean = false;
104
+ private readonly cache: CompletedDocsCache;
105
+
106
+ constructor(
107
+ private readonly stateFile: string,
108
+ private readonly state: TransferState,
109
+ options: StateSaverOptions = {}
110
+ ) {
111
+ this.batchInterval = options.batchInterval ?? DEFAULT_BATCH_INTERVAL;
112
+ this.timeInterval = options.timeInterval ?? DEFAULT_TIME_INTERVAL;
113
+ this.cache = new CompletedDocsCache(state.completedDocs);
114
+ }
115
+
116
+ /**
117
+ * Check if a document is already completed. O(1) lookup.
118
+ */
119
+ isCompleted(collectionPath: string, docId: string): boolean {
120
+ return this.cache.has(collectionPath, docId);
121
+ }
122
+
123
+ /**
124
+ * Mark documents as completed and update stats.
125
+ * Saves to disk if thresholds are met.
126
+ */
127
+ markBatchCompleted(collectionPath: string, docIds: string[], stats: Stats): void {
128
+ this.cache.addBatch(collectionPath, docIds);
129
+ this.state.stats = { ...stats };
130
+ this.dirty = true;
131
+ this.batchesSinceLastSave++;
132
+
133
+ if (this.shouldSave()) {
134
+ this.save();
135
+ }
136
+ }
137
+
138
+ /**
139
+ * Check if we should save based on batch count or time elapsed.
140
+ */
141
+ private shouldSave(): boolean {
142
+ if (this.batchesSinceLastSave >= this.batchInterval) {
143
+ return true;
144
+ }
145
+
146
+ const elapsed = Date.now() - this.lastSaveTime;
147
+ if (elapsed >= this.timeInterval) {
148
+ return true;
149
+ }
150
+
151
+ return false;
152
+ }
153
+
154
+ /**
155
+ * Sync cache to state and save to disk.
156
+ */
157
+ private save(): void {
158
+ this.state.completedDocs = this.cache.toRecord();
159
+ saveTransferState(this.stateFile, this.state);
160
+ this.lastSaveTime = Date.now();
161
+ this.batchesSinceLastSave = 0;
162
+ this.dirty = false;
163
+ }
164
+
165
+ /**
166
+ * Force save if there are unsaved changes.
167
+ * Call this before shutdown or on completion.
168
+ */
169
+ flush(): void {
170
+ if (this.dirty) {
171
+ this.save();
172
+ }
173
+ }
174
+
175
+ /**
176
+ * Get the underlying state object.
177
+ * Note: completedDocs may be stale until flush() is called.
178
+ */
179
+ getState(): TransferState {
180
+ return this.state;
181
+ }
182
+
183
+ /**
184
+ * Get total count of completed documents.
185
+ */
186
+ get completedCount(): number {
187
+ return this.cache.totalCount;
188
+ }
189
+ }
190
+
6
191
  export function loadTransferState(stateFile: string): TransferState | null {
7
192
  try {
8
193
  if (!fs.existsSync(stateFile)) {
@@ -73,6 +258,8 @@ export function createInitialState(config: Config): TransferState {
73
258
  documentsTransferred: 0,
74
259
  documentsDeleted: 0,
75
260
  errors: 0,
261
+ conflicts: 0,
262
+ integrityErrors: 0,
76
263
  },
77
264
  };
78
265
  }
@@ -1,156 +1,214 @@
1
- import type { Firestore } from 'firebase-admin/firestore';
1
+ import type { Firestore, QueryDocumentSnapshot } from 'firebase-admin/firestore';
2
2
  import type { Config } from '../types.js';
3
- import type { Logger } from '../utils/logger.js';
3
+ import type { Output } from '../utils/output.js';
4
4
  import { withRetry } from '../utils/retry.js';
5
5
  import { matchesExcludePattern } from '../utils/patterns.js';
6
6
  import { getSubcollections, getDestCollectionPath } from './helpers.js';
7
7
 
8
- export async function clearCollection(
8
+ async function clearDocSubcollections(
9
9
  db: Firestore,
10
+ doc: QueryDocumentSnapshot,
10
11
  collectionPath: string,
11
12
  config: Config,
12
- logger: Logger,
13
- includeSubcollections: boolean
13
+ output: Output
14
14
  ): Promise<number> {
15
15
  let deletedCount = 0;
16
- const collectionRef = db.collection(collectionPath);
17
- const snapshot = await collectionRef.get();
16
+ const subcollections = await getSubcollections(doc.ref);
17
+
18
+ for (const subId of subcollections) {
19
+ if (matchesExcludePattern(subId, config.exclude)) continue;
18
20
 
19
- if (snapshot.empty) {
20
- return 0;
21
+ const subPath = `${collectionPath}/${doc.id}/${subId}`;
22
+ deletedCount += await clearCollection(db, subPath, config, output, true);
21
23
  }
22
24
 
25
+ return deletedCount;
26
+ }
27
+
28
+ async function deleteBatch(
29
+ db: Firestore,
30
+ batch: QueryDocumentSnapshot[],
31
+ collectionPath: string,
32
+ config: Config,
33
+ output: Output
34
+ ): Promise<number> {
35
+ const writeBatch = db.batch();
36
+
37
+ for (const doc of batch) {
38
+ writeBatch.delete(doc.ref);
39
+ }
40
+
41
+ if (!config.dryRun) {
42
+ await withRetry(() => writeBatch.commit(), {
43
+ retries: config.retries,
44
+ onRetry: (attempt, max, err, delay) => {
45
+ output.logError(`Retry delete ${attempt}/${max} for ${collectionPath}`, {
46
+ error: err.message,
47
+ delay,
48
+ });
49
+ },
50
+ });
51
+ }
52
+
53
+ output.logInfo(`Deleted ${batch.length} documents from ${collectionPath}`);
54
+ return batch.length;
55
+ }
56
+
57
+ export async function clearCollection(
58
+ db: Firestore,
59
+ collectionPath: string,
60
+ config: Config,
61
+ output: Output,
62
+ includeSubcollections: boolean
63
+ ): Promise<number> {
64
+ const snapshot = await db.collection(collectionPath).get();
65
+ if (snapshot.empty) return 0;
66
+
67
+ let deletedCount = 0;
68
+
23
69
  // Delete subcollections first if enabled
24
70
  if (includeSubcollections) {
25
71
  for (const doc of snapshot.docs) {
26
- const subcollections = await getSubcollections(doc.ref);
27
- for (const subId of subcollections) {
28
- // Check exclude patterns
29
- if (matchesExcludePattern(subId, config.exclude)) {
30
- continue;
31
- }
32
- const subPath = `${collectionPath}/${doc.id}/${subId}`;
33
- deletedCount += await clearCollection(db, subPath, config, logger, true);
34
- }
72
+ deletedCount += await clearDocSubcollections(db, doc, collectionPath, config, output);
35
73
  }
36
74
  }
37
75
 
38
76
  // Delete documents in batches
39
- const docs = snapshot.docs;
40
- for (let i = 0; i < docs.length; i += config.batchSize) {
41
- const batch = docs.slice(i, i + config.batchSize);
42
- const writeBatch = db.batch();
43
-
44
- for (const doc of batch) {
45
- writeBatch.delete(doc.ref);
46
- deletedCount++;
77
+ for (let i = 0; i < snapshot.docs.length; i += config.batchSize) {
78
+ const batch = snapshot.docs.slice(i, i + config.batchSize);
79
+ deletedCount += await deleteBatch(db, batch, collectionPath, config, output);
80
+ }
81
+
82
+ return deletedCount;
83
+ }
84
+
85
+ async function clearOrphanSubcollections(
86
+ destDb: Firestore,
87
+ doc: QueryDocumentSnapshot,
88
+ destCollectionPath: string,
89
+ config: Config,
90
+ output: Output
91
+ ): Promise<number> {
92
+ let deletedCount = 0;
93
+ const subcollections = await getSubcollections(doc.ref);
94
+
95
+ for (const subId of subcollections) {
96
+ if (matchesExcludePattern(subId, config.exclude)) continue;
97
+
98
+ const subPath = `${destCollectionPath}/${doc.id}/${subId}`;
99
+ deletedCount += await clearCollection(destDb, subPath, config, output, true);
100
+ }
101
+
102
+ return deletedCount;
103
+ }
104
+
105
+ async function deleteOrphanBatch(
106
+ destDb: Firestore,
107
+ batch: QueryDocumentSnapshot[],
108
+ destCollectionPath: string,
109
+ config: Config,
110
+ output: Output
111
+ ): Promise<number> {
112
+ let deletedCount = 0;
113
+ const writeBatch = destDb.batch();
114
+
115
+ for (const doc of batch) {
116
+ if (config.includeSubcollections) {
117
+ deletedCount += await clearOrphanSubcollections(
118
+ destDb,
119
+ doc,
120
+ destCollectionPath,
121
+ config,
122
+ output
123
+ );
47
124
  }
125
+ writeBatch.delete(doc.ref);
126
+ deletedCount++;
127
+ }
48
128
 
49
- if (!config.dryRun) {
50
- await withRetry(() => writeBatch.commit(), {
51
- retries: config.retries,
52
- onRetry: (attempt, max, err, delay) => {
53
- logger.error(`Retry delete ${attempt}/${max} for ${collectionPath}`, {
129
+ if (!config.dryRun) {
130
+ await withRetry(() => writeBatch.commit(), {
131
+ retries: config.retries,
132
+ onRetry: (attempt, max, err, delay) => {
133
+ output.logError(
134
+ `Retry delete orphans ${attempt}/${max} for ${destCollectionPath}`,
135
+ {
54
136
  error: err.message,
55
137
  delay,
56
- });
57
- },
58
- });
59
- }
60
-
61
- logger.info(`Deleted ${batch.length} documents from ${collectionPath}`);
138
+ }
139
+ );
140
+ },
141
+ });
62
142
  }
63
143
 
144
+ output.logInfo(`Deleted ${batch.length} orphan documents from ${destCollectionPath}`);
64
145
  return deletedCount;
65
146
  }
66
147
 
67
- export async function deleteOrphanDocuments(
148
+ async function processSubcollectionOrphans(
68
149
  sourceDb: Firestore,
69
150
  destDb: Firestore,
151
+ sourceSnapshot: FirebaseFirestore.QuerySnapshot,
70
152
  sourceCollectionPath: string,
71
153
  config: Config,
72
- logger: Logger
154
+ output: Output
73
155
  ): Promise<number> {
74
156
  let deletedCount = 0;
75
157
 
76
- // Get the destination path (may be renamed)
158
+ for (const sourceDoc of sourceSnapshot.docs) {
159
+ const sourceSubcollections = await getSubcollections(sourceDoc.ref);
160
+ for (const subId of sourceSubcollections) {
161
+ if (matchesExcludePattern(subId, config.exclude)) continue;
162
+
163
+ const subPath = `${sourceCollectionPath}/${sourceDoc.id}/${subId}`;
164
+ deletedCount += await deleteOrphanDocuments(sourceDb, destDb, subPath, config, output);
165
+ }
166
+ }
167
+
168
+ return deletedCount;
169
+ }
170
+
171
+ export async function deleteOrphanDocuments(
172
+ sourceDb: Firestore,
173
+ destDb: Firestore,
174
+ sourceCollectionPath: string,
175
+ config: Config,
176
+ output: Output
177
+ ): Promise<number> {
77
178
  const destCollectionPath = getDestCollectionPath(sourceCollectionPath, config.renameCollection);
78
179
 
79
- // Get all document IDs from source (use select() to only fetch IDs, not data)
80
180
  const sourceSnapshot = await sourceDb.collection(sourceCollectionPath).select().get();
81
181
  const sourceIds = new Set(sourceSnapshot.docs.map((doc) => doc.id));
82
182
 
83
- // Get all document IDs from destination (use select() to only fetch IDs, not data)
84
183
  const destSnapshot = await destDb.collection(destCollectionPath).select().get();
85
-
86
- // Find orphan documents (in dest but not in source)
87
184
  const orphanDocs = destSnapshot.docs.filter((doc) => !sourceIds.has(doc.id));
88
185
 
89
- if (orphanDocs.length === 0) {
90
- return 0;
91
- }
92
-
93
- logger.info(`Found ${orphanDocs.length} orphan documents in ${destCollectionPath}`);
94
-
95
- // Delete orphan documents in batches
96
- for (let i = 0; i < orphanDocs.length; i += config.batchSize) {
97
- const batch = orphanDocs.slice(i, i + config.batchSize);
98
- const writeBatch = destDb.batch();
99
-
100
- for (const doc of batch) {
101
- // If subcollections are included, recursively delete orphans in subcollections first
102
- if (config.includeSubcollections) {
103
- const subcollections = await getSubcollections(doc.ref);
104
- for (const subId of subcollections) {
105
- if (matchesExcludePattern(subId, config.exclude)) {
106
- continue;
107
- }
108
- const subPath = `${destCollectionPath}/${doc.id}/${subId}`;
109
- // For orphan parent docs, clear all subcollection data
110
- deletedCount += await clearCollection(destDb, subPath, config, logger, true);
111
- }
112
- }
113
-
114
- writeBatch.delete(doc.ref);
115
- deletedCount++;
116
- }
186
+ let deletedCount = 0;
117
187
 
118
- if (!config.dryRun) {
119
- await withRetry(() => writeBatch.commit(), {
120
- retries: config.retries,
121
- onRetry: (attempt, max, err, delay) => {
122
- logger.error(
123
- `Retry delete orphans ${attempt}/${max} for ${destCollectionPath}`,
124
- {
125
- error: err.message,
126
- delay,
127
- }
128
- );
129
- },
130
- });
188
+ if (orphanDocs.length > 0) {
189
+ output.logInfo(`Found ${orphanDocs.length} orphan documents in ${destCollectionPath}`);
190
+
191
+ for (let i = 0; i < orphanDocs.length; i += config.batchSize) {
192
+ const batch = orphanDocs.slice(i, i + config.batchSize);
193
+ deletedCount += await deleteOrphanBatch(
194
+ destDb,
195
+ batch,
196
+ destCollectionPath,
197
+ config,
198
+ output
199
+ );
131
200
  }
132
-
133
- logger.info(`Deleted ${batch.length} orphan documents from ${destCollectionPath}`);
134
201
  }
135
202
 
136
- // Also check subcollections of existing documents for orphans
137
203
  if (config.includeSubcollections) {
138
- for (const sourceDoc of sourceSnapshot.docs) {
139
- const sourceSubcollections = await getSubcollections(sourceDoc.ref);
140
- for (const subId of sourceSubcollections) {
141
- if (matchesExcludePattern(subId, config.exclude)) {
142
- continue;
143
- }
144
- const subPath = `${sourceCollectionPath}/${sourceDoc.id}/${subId}`;
145
- deletedCount += await deleteOrphanDocuments(
146
- sourceDb,
147
- destDb,
148
- subPath,
149
- config,
150
- logger
151
- );
152
- }
153
- }
204
+ deletedCount += await processSubcollectionOrphans(
205
+ sourceDb,
206
+ destDb,
207
+ sourceSnapshot,
208
+ sourceCollectionPath,
209
+ config,
210
+ output
211
+ );
154
212
  }
155
213
 
156
214
  return deletedCount;
@@ -1,4 +1,4 @@
1
- import type { Firestore } from 'firebase-admin/firestore';
1
+ import type { Firestore, Query } from 'firebase-admin/firestore';
2
2
  import type { Config } from '../types.js';
3
3
  import { matchesExcludePattern } from '../utils/patterns.js';
4
4
  import { getSubcollections } from './helpers.js';
@@ -8,64 +8,114 @@ export interface CountProgress {
8
8
  onSubcollection?: (path: string) => void;
9
9
  }
10
10
 
11
- export async function countDocuments(
11
+ function buildQueryWithFilters(
12
12
  sourceDb: Firestore,
13
13
  collectionPath: string,
14
14
  config: Config,
15
- depth: number = 0,
16
- progress?: CountProgress
17
- ): Promise<number> {
18
- let count = 0;
19
-
20
- // Build query with where filters (only at root level)
21
- let query: FirebaseFirestore.Query = sourceDb.collection(collectionPath);
15
+ depth: number
16
+ ): Query {
17
+ let query: Query = sourceDb.collection(collectionPath);
22
18
  if (depth === 0 && config.where.length > 0) {
23
19
  for (const filter of config.where) {
24
20
  query = query.where(filter.field, filter.operator, filter.value);
25
21
  }
26
22
  }
23
+ return query;
24
+ }
27
25
 
28
- // Use count() aggregation to avoid downloading all documents (much cheaper)
29
- // But we need document refs for subcollections, so we'll need a different approach
30
- if (config.includeSubcollections) {
31
- // When including subcollections, we need to fetch docs to get their refs
32
- // Use select() to only fetch document IDs, not the data (reduces bandwidth)
33
- const snapshot = await query.select().get();
34
- count += snapshot.size;
35
-
36
- // Report progress for root collections
37
- if (depth === 0 && progress?.onCollection) {
38
- progress.onCollection(collectionPath, snapshot.size);
39
- }
26
+ async function countWithSubcollections(
27
+ sourceDb: Firestore,
28
+ query: Query,
29
+ collectionPath: string,
30
+ config: Config,
31
+ depth: number,
32
+ progress?: CountProgress
33
+ ): Promise<number> {
34
+ // Apply limit at root level only
35
+ if (depth === 0 && config.limit > 0) {
36
+ query = query.limit(config.limit);
37
+ }
40
38
 
41
- for (const doc of snapshot.docs) {
42
- const subcollections = await getSubcollections(doc.ref);
43
- for (const subId of subcollections) {
44
- const subPath = `${collectionPath}/${doc.id}/${subId}`;
39
+ const snapshot = await query.select().get();
40
+ let count = snapshot.size;
45
41
 
46
- // Check exclude patterns
47
- if (matchesExcludePattern(subId, config.exclude)) {
48
- continue;
49
- }
42
+ if (depth === 0 && progress?.onCollection) {
43
+ progress.onCollection(collectionPath, snapshot.size);
44
+ }
50
45
 
51
- // Report subcollection discovery
52
- if (progress?.onSubcollection) {
53
- progress.onSubcollection(subPath);
54
- }
46
+ for (const doc of snapshot.docs) {
47
+ count += await countSubcollectionsForDoc(
48
+ sourceDb,
49
+ doc,
50
+ collectionPath,
51
+ config,
52
+ depth,
53
+ progress
54
+ );
55
+ }
55
56
 
56
- count += await countDocuments(sourceDb, subPath, config, depth + 1, progress);
57
- }
58
- }
59
- } else {
60
- // No subcollections: use count() aggregation (1 read instead of N)
61
- const countSnapshot = await query.count().get();
62
- count = countSnapshot.data().count;
63
-
64
- // Report progress for root collections
65
- if (depth === 0 && progress?.onCollection) {
66
- progress.onCollection(collectionPath, count);
57
+ return count;
58
+ }
59
+
60
+ async function countSubcollectionsForDoc(
61
+ sourceDb: Firestore,
62
+ doc: FirebaseFirestore.QueryDocumentSnapshot,
63
+ collectionPath: string,
64
+ config: Config,
65
+ depth: number,
66
+ progress?: CountProgress
67
+ ): Promise<number> {
68
+ let count = 0;
69
+ const subcollections = await getSubcollections(doc.ref);
70
+
71
+ for (const subId of subcollections) {
72
+ if (matchesExcludePattern(subId, config.exclude)) continue;
73
+
74
+ const subPath = `${collectionPath}/${doc.id}/${subId}`;
75
+ if (progress?.onSubcollection) {
76
+ progress.onSubcollection(subPath);
67
77
  }
78
+
79
+ count += await countDocuments(sourceDb, subPath, config, depth + 1, progress);
80
+ }
81
+
82
+ return count;
83
+ }
84
+
85
+ async function countWithoutSubcollections(
86
+ query: Query,
87
+ collectionPath: string,
88
+ config: Config,
89
+ depth: number,
90
+ progress?: CountProgress
91
+ ): Promise<number> {
92
+ const countSnapshot = await query.count().get();
93
+ let count = countSnapshot.data().count;
94
+
95
+ // Apply limit at root level only
96
+ if (depth === 0 && config.limit > 0) {
97
+ count = Math.min(count, config.limit);
98
+ }
99
+
100
+ if (depth === 0 && progress?.onCollection) {
101
+ progress.onCollection(collectionPath, count);
68
102
  }
69
103
 
70
104
  return count;
71
105
  }
106
+
107
+ export async function countDocuments(
108
+ sourceDb: Firestore,
109
+ collectionPath: string,
110
+ config: Config,
111
+ depth: number = 0,
112
+ progress?: CountProgress
113
+ ): Promise<number> {
114
+ const query = buildQueryWithFilters(sourceDb, collectionPath, config, depth);
115
+
116
+ if (config.includeSubcollections) {
117
+ return countWithSubcollections(sourceDb, query, collectionPath, config, depth, progress);
118
+ }
119
+
120
+ return countWithoutSubcollections(query, collectionPath, config, depth, progress);
121
+ }