@cloudant/couchbackup 2.9.17 → 2.10.0-206

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- // Copyright © 2017, 2021 IBM Corp. All rights reserved.
2
+ // Copyright © 2017, 2024 IBM Corp. All rights reserved.
3
3
  //
4
4
  // Licensed under the Apache License, Version 2.0 (the "License");
5
5
  // you may not use this file except in compliance with the License.
@@ -49,9 +49,7 @@ try {
49
49
  opts,
50
50
  error.terminationCallback
51
51
  ).on('restored', function(obj) {
52
- restoreBatchDebug('restored', obj.total);
53
- }).on('error', function(e) {
54
- restoreDebug('ERROR', e);
52
+ restoreBatchDebug('Restored batch ID:', obj.batch, 'Total document revisions restored:', obj.total, 'Time:', obj.time);
55
53
  }).on('finished', function(obj) {
56
54
  restoreDebug('finished', obj);
57
55
  });
@@ -0,0 +1,53 @@
1
+ // Copyright © 2023 IBM Corp. All rights reserved.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ 'use strict';
15
+
16
+ const debug = require('debug')('couchbackup:alldocsgenerator');
17
+ const { BackupError } = require('./error.js');
18
+
19
+ /**
20
+ * Async generator function for paginating _all_docs for shallow backups.
21
+ *
22
+ * @param {object} dbClient - object for connection to source database containing name, service and url
23
+ * @param {object} options - backup configuration
24
+ * @yields {object} a "done" type backup batch {command: d, batch: #, docs: [{_id: id, ...}, ...]}
25
+ */
26
+ module.exports = async function * (dbClient, options = {}) {
27
+ let batch = 0;
28
+ let lastPage = false;
29
+ let startKey = null;
30
+ const opts = { db: dbClient.dbName, limit: options.bufferSize, includeDocs: true };
31
+ do {
32
+ if (startKey) opts.startKey = startKey;
33
+ yield dbClient.service.postAllDocs(opts).then(response => {
34
+ if (!(response.result && response.result.rows)) {
35
+ throw new BackupError('AllDocsError', 'Invalid all docs response');
36
+ }
37
+ debug(`Got page from start key '${startKey}'`);
38
+ const docs = response.result.rows;
39
+ debug(`Received ${docs.length} docs`);
40
+ lastPage = docs.length < opts.limit;
41
+ if (docs.length > 0) {
42
+ const lastKey = docs[docs.length - 1].id;
43
+ debug(`Received up to key ${lastKey}`);
44
+ // To avoid double fetching a document solely for the purposes of getting
45
+ // the next ID to use as a startKey for the next page we instead use the
46
+ // last ID of the current page and append the lowest unicode sort
47
+ // character.
48
+ startKey = `${lastKey}\0`;
49
+ }
50
+ return { command: 'd', batch: batch++, docs: docs.map(doc => { return doc.doc; }) };
51
+ });
52
+ } while (!lastPage);
53
+ };
@@ -1,4 +1,4 @@
1
- // Copyright © 2017, 2021 IBM Corp. All rights reserved.
1
+ // Copyright © 2017, 2024 IBM Corp. All rights reserved.
2
2
  //
3
3
  // Licensed under the Apache License, Version 2.0 (the "License");
4
4
  // you may not use this file except in compliance with the License.
@@ -13,269 +13,125 @@
13
13
  // limitations under the License.
14
14
  'use strict';
15
15
 
16
- const async = require('async');
17
- const events = require('events');
18
- const fs = require('fs');
19
- const error = require('./error.js');
16
+ const { createWriteStream } = require('node:fs');
17
+ const { pipeline } = require('node:stream/promises');
18
+ const { Backup } = require('./backupMappings.js');
19
+ const { BackupError } = require('./error.js');
20
+ const logFileSummary = require('./logfilesummary.js');
21
+ const logFileGetBatches = require('./logfilegetbatches.js');
20
22
  const spoolchanges = require('./spoolchanges.js');
21
- const logfilesummary = require('./logfilesummary.js');
22
- const logfilegetbatches = require('./logfilegetbatches.js');
23
+ const { MappingStream, WritableWithPassThrough, DelegateWritable } = require('./transforms.js');
24
+ const allDocsGenerator = require('./allDocsGenerator.js');
23
25
 
24
26
  /**
25
- * Read documents from a database to be backed up.
27
+ * Validate /_bulk_get support for a specified database.
26
28
  *
27
- * @param {string} db - `@cloudant/cloudant` DB object for source database.
28
- * @param {number} blocksize - number of documents to download in single request
29
- * @param {number} parallelism - number of concurrent downloads
30
- * @param {string} log - path to log file to use
31
- * @param {boolean} resume - whether to resume from an existing log file
32
- * @returns EventEmitter with following events:
33
- * - `received` - called with a block of documents to write to backup
34
- * - `error` - on error
35
- * - `finished` - when backup process is finished (either complete or errored)
29
+ * @param {object} dbClient - object for connection to source database containing name, service and url
36
30
  */
37
- module.exports = function(db, options) {
38
- const ee = new events.EventEmitter();
39
- const start = new Date().getTime(); // backup start time
40
- const batchesPerDownloadSession = 50; // max batches to read from log file for download at a time (prevent OOM)
41
-
42
- function proceedWithBackup() {
43
- if (options.resume) {
44
- // pick up from existing log file from previous run
45
- downloadRemainingBatches(options.log, db, ee, start, batchesPerDownloadSession, options.parallelism);
31
+ async function validateBulkGetSupport(dbClient) {
32
+ try {
33
+ await dbClient.service.postBulkGet({ db: dbClient.dbName, docs: [] });
34
+ } catch (err) {
35
+ // if _bulk_get isn't available throw a special error
36
+ if (err.status === 404) {
37
+ throw new BackupError('BulkGetError', 'Database does not support /_bulk_get endpoint');
46
38
  } else {
47
- // create new log file and process
48
- spoolchanges(db, options.log, options.bufferSize, ee, function(err) {
49
- if (err) {
50
- ee.emit('error', err);
51
- } else {
52
- downloadRemainingBatches(options.log, db, ee, start, batchesPerDownloadSession, options.parallelism);
53
- }
54
- });
39
+ throw err;
55
40
  }
56
41
  }
57
-
58
- validateBulkGetSupport(db, function(err) {
59
- if (err) {
60
- return ee.emit('error', err);
61
- } else {
62
- proceedWithBackup();
63
- }
64
- });
65
-
66
- return ee;
67
- };
68
-
69
- /**
70
- * Validate /_bulk_get support for a specified database.
71
- *
72
- * @param {string} db - nodejs-cloudant db
73
- * @param {function} callback - called on completion with signature (err)
74
- */
75
- function validateBulkGetSupport(db, callback) {
76
- db.service.postBulkGet({ db: db.db, docs: [] }).then(() => { callback(); }).catch(err => {
77
- err = error.convertResponseError(err, function(err) {
78
- switch (err.status) {
79
- case undefined:
80
- // There was no status code on the error
81
- return err;
82
- case 404:
83
- return new error.BackupError('BulkGetError', 'Database does not support /_bulk_get endpoint');
84
- default:
85
- return new error.HTTPError(err);
86
- }
87
- });
88
- callback(err);
89
- });
90
42
  }
91
43
 
92
44
  /**
93
- * Download remaining batches in a log file, splitting batches into sets
94
- * to avoid enqueueing too many in one go.
45
+ * Read documents from a database to be backed up.
95
46
  *
96
- * @param {string} log - log file name to maintain download state
97
- * @param {string} db - nodejs-cloudant db
98
- * @param {events.EventEmitter} ee - event emitter to emit received events on
99
- * @param {time} startTime - start time for backup process
100
- * @param {number} batchesPerDownloadSession - max batches to enqueue for
101
- * download at a time. As batches contain many doc IDs, this helps avoid
102
- * exhausting memory.
103
- * @param {number} parallelism - number of concurrent downloads
104
- * @returns function to call do download remaining batches with signature
105
- * (err, {batches: batch, docs: doccount}) {@see spoolchanges}.
47
+ * @param {object} dbClient - object for connection to source database containing name, service and url
48
+ * @param {number} options - backup configuration
49
+ * @param {Writable} targetStream - destination for the backup contents
50
+ * @param {EventEmitter} ee - user facing event emitter
51
+ * @returns pipeline promise that resolves for a successful backup or rejects on failure
106
52
  */
107
- function downloadRemainingBatches(log, db, ee, startTime, batchesPerDownloadSession, parallelism) {
108
- let total = 0; // running total of documents downloaded so far
109
- let noRemainingBatches = false;
110
-
111
- // Generate a set of batches (up to batchesPerDownloadSession) to download from the
112
- // log file and download them. Set noRemainingBatches to `true` for last batch.
113
- function downloadSingleBatchSet(done) {
114
- // Fetch the doc IDs for the batches in the current set to
115
- // download them.
116
- function batchSetComplete(err, data) {
117
- if (!err) {
118
- total = data.total;
119
- }
120
- done(err);
121
- }
122
- function processRetrievedBatches(err, batches) {
123
- if (!err) {
124
- // process them in parallelised queue
125
- processBatchSet(db, parallelism, log, batches, ee, startTime, total, batchSetComplete);
126
- } else {
127
- batchSetComplete(err);
128
- }
129
- }
130
-
131
- readBatchSetIdsFromLogFile(log, batchesPerDownloadSession, function(err, batchSetIds) {
132
- if (err) {
133
- ee.emit('error', err);
134
- // Stop processing changes file for fatal errors
135
- noRemainingBatches = true;
136
- done();
53
+ module.exports = function(dbClient, options, targetStream, ee) {
54
+ const start = new Date().getTime(); // backup start time
55
+ let total = 0; // total documents backed up
56
+
57
+ // Full backups use _bulk_get, validate it is available, shallow skips that check
58
+ return (options.mode === 'full' ? validateBulkGetSupport(dbClient) : Promise.resolve())
59
+ // Check if the backup is new or resuming and configure the source
60
+ .then(async() => {
61
+ if (options.mode === 'shallow') {
62
+ // shallow backup, start from async _all_docs generator
63
+ return [
64
+ allDocsGenerator(dbClient, options)
65
+ ];
137
66
  } else {
138
- if (batchSetIds.length === 0) {
139
- noRemainingBatches = true;
140
- return done();
67
+ // Full backup, we'll return a stream over a completed changes log file
68
+ if (!options.resume) {
69
+ // Not resuming, start spooling changes to create a log file
70
+ await spoolchanges(dbClient, options.log, (backupBatch) => {
71
+ ee.emit('changes', backupBatch.batch);
72
+ }, options.bufferSize);
141
73
  }
142
- logfilegetbatches(log, batchSetIds, processRetrievedBatches);
143
- }
144
- });
145
- }
146
-
147
- // Return true if all batches in log file have been downloaded
148
- function isFinished(callback) { callback(null, noRemainingBatches); }
149
-
150
- function onComplete() {
151
- ee.emit('finished', { total: total });
152
- }
153
-
154
- async.doUntil(downloadSingleBatchSet, isFinished, onComplete);
155
- }
156
-
157
- /**
158
- * Return a set of uncompleted download batch IDs from the log file.
159
- *
160
- * @param {string} log - log file path
161
- * @param {number} batchesPerDownloadSession - maximum IDs to return
162
- * @param {function} callback - sign (err, batchSetIds array)
163
- */
164
- function readBatchSetIdsFromLogFile(log, batchesPerDownloadSession, callback) {
165
- logfilesummary(log, function processSummary(err, summary) {
166
- if (!err) {
167
- if (!summary.changesComplete) {
168
- callback(new error.BackupError('IncompleteChangesInLogFile',
169
- 'WARNING: Changes did not finish spooling'));
170
- return;
171
- }
172
- if (Object.keys(summary.batches).length === 0) {
173
- return callback(null, []);
74
+ // At this point we should be changes complete because spooling has finished
75
+ // or because we resumed a backup that had already completed spooling (and
76
+ // potentially already downloaded some batches)
77
+ // Get the log file summary to validate changes complete and obtain the
78
+ // [remaining] batch numbers to backup
79
+ const summary = await logFileSummary(options.log);
80
+ if (!summary.changesComplete) {
81
+ // We must only backup if changes finished spooling
82
+ throw new BackupError('IncompleteChangesInLogFile',
83
+ 'WARNING: Changes did not finish spooling, a backup can only be resumed if changes finished spooling. Start a new backup.');
84
+ }
85
+ return logFileGetBatches(options.log, summary.batches);
174
86
  }
175
-
176
- // batch IDs are the property names of summary.batches
177
- const batchSetIds = getPropertyNames(summary.batches, batchesPerDownloadSession);
178
- callback(null, batchSetIds);
179
- } else {
180
- callback(err);
181
- }
182
- });
183
- }
184
-
185
- /**
186
- * Download a set of batches retrieved from a log file. When a download is
187
- * complete, add a line to the logfile indicating such.
188
- *
189
- * @param {any} db - nodejs-cloudant database
190
- * @param {any} parallelism - number of concurrent requests to make
191
- * @param {any} log - log file to drive downloads from
192
- * @param {any} batches - batches to download
193
- * @param {any} ee - event emitter for progress. This funciton emits
194
- * received and error events.
195
- * @param {any} start - time backup started, to report deltas
196
- * @param {any} grandtotal - count of documents downloaded prior to this set
197
- * of batches
198
- * @param {any} callback - completion callback, (err, {total: number}).
199
- */
200
- function processBatchSet(db, parallelism, log, batches, ee, start, grandtotal, callback) {
201
- let hasErrored = false;
202
- let total = grandtotal;
203
-
204
- // queue to process the fetch requests in an orderly fashion using _bulk_get
205
- const q = async.queue(function(payload, done) {
206
- const output = [];
207
- const thisBatch = payload.batch;
208
- delete payload.batch;
209
- delete payload.command;
210
-
211
- function logCompletedBatch(batch) {
212
- if (log) {
213
- fs.appendFile(log, ':d batch' + thisBatch + '\n', done);
87
+ })
88
+ // Create a pipeline of the source streams and the backup mappings
89
+ .then((srcStreams) => {
90
+ const backup = new Backup(dbClient);
91
+ const postWrite = (backupBatch) => {
92
+ total += backupBatch.docs.length;
93
+ const totalRunningTimeSec = (new Date().getTime() - start) / 1000;
94
+ ee.emit('written', { total, time: totalRunningTimeSec, batch: backupBatch.batch });
95
+ };
96
+
97
+ const destinationStreams = [];
98
+ if (options.mode === 'shallow') {
99
+ // shallow mode writes only to backup file
100
+ destinationStreams.push(
101
+ new DelegateWritable(
102
+ 'backup', // Name for debug
103
+ targetStream, // backup file
104
+ null, // no last chunk to write
105
+ backup.backupBatchToBackupFileLine, // map the backup batch to a string for the backup file
106
+ postWrite // post write function emits the written event
107
+ ) // DelegateWritable writes the log file done lines
108
+ );
214
109
  } else {
215
- done();
110
+ // full mode needs to fetch spooled changes and writes a backup file then finally a log file
111
+ destinationStreams.push(...[
112
+ new MappingStream(backup.pendingToFetched, options.parallelism), // fetch the batches at the configured concurrency
113
+ new WritableWithPassThrough(
114
+ 'backup', // name for logging
115
+ targetStream, // backup file
116
+ null, // no need to write a last chunk
117
+ backup.backupBatchToBackupFileLine // map the backup batch to a string for the backup file
118
+ ), // WritableWithPassThrough writes the fetched docs to the backup file and passes on the result metadata
119
+ new DelegateWritable(
120
+ 'logFileDoneWriter', // Name for debug
121
+ createWriteStream(options.log, { flags: 'a' }), // log file for appending
122
+ null, // no last chunk to write
123
+ backup.backupBatchToLogFileLine, // Map the backed up batch result to a log file "done" line
124
+ postWrite // post write function emits the written event
125
+ ) // DelegateWritable writes the log file done lines
126
+ ]);
216
127
  }
217
- }
218
128
 
219
- // do the /db/_bulk_get request
220
- db.service.postBulkGet({
221
- db: db.db,
222
- revs: true,
223
- docs: payload.docs
224
- }).then(response => {
225
- // create an output array with the docs returned
226
- response.result.results.forEach(function(d) {
227
- if (d.docs) {
228
- d.docs.forEach(function(doc) {
229
- if (doc.ok) {
230
- output.push(doc.ok);
231
- }
232
- });
233
- }
234
- });
235
- total += output.length;
236
- const t = (new Date().getTime() - start) / 1000;
237
- ee.emit('received', {
238
- batch: thisBatch,
239
- data: output,
240
- length: output.length,
241
- time: t,
242
- total: total
243
- }, q, logCompletedBatch);
244
- }).catch(err => {
245
- if (!hasErrored) {
246
- hasErrored = true;
247
- err = error.convertResponseError(err);
248
- // Kill the queue for fatal errors
249
- q.kill();
250
- ee.emit('error', err);
251
- }
252
- done();
129
+ return pipeline(
130
+ ...srcStreams, // the source streams from the previous block (all docs async generator for shallow or for full either spool changes or resumed log)
131
+ ...destinationStreams // the appropriate destination streams for the mode
132
+ );
133
+ })
134
+ .then(() => {
135
+ return { total };
253
136
  });
254
- }, parallelism);
255
-
256
- for (const i in batches) {
257
- q.push(batches[i]);
258
- }
259
-
260
- q.drain(function() {
261
- callback(null, { total: total });
262
- });
263
- }
264
-
265
- /**
266
- * Returns first N properties on an object.
267
- *
268
- * @param {object} obj - object with properties
269
- * @param {number} count - number of properties to return
270
- */
271
- function getPropertyNames(obj, count) {
272
- // decide which batch numbers to deal with
273
- const batchestofetch = [];
274
- let j = 0;
275
- for (const i in obj) {
276
- batchestofetch.push(parseInt(i));
277
- j++;
278
- if (j >= count) break;
279
- }
280
- return batchestofetch;
281
- }
137
+ };