@cloudant/couchbackup 2.9.17 → 2.10.0-206
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -0
- package/app.js +253 -268
- package/bin/couchbackup.bin.js +1 -3
- package/bin/couchrestore.bin.js +2 -4
- package/includes/allDocsGenerator.js +53 -0
- package/includes/backup.js +103 -247
- package/includes/backupMappings.js +260 -0
- package/includes/config.js +10 -9
- package/includes/error.js +42 -44
- package/includes/liner.js +134 -23
- package/includes/logfilegetbatches.js +25 -60
- package/includes/logfilesummary.js +41 -71
- package/includes/parser.js +3 -3
- package/includes/request.js +95 -106
- package/includes/restore.js +45 -14
- package/includes/restoreMappings.js +141 -0
- package/includes/spoolchanges.js +57 -79
- package/includes/transforms.js +378 -0
- package/package.json +5 -8
- package/includes/change.js +0 -41
- package/includes/shallowbackup.js +0 -80
- package/includes/writer.js +0 -164
package/bin/couchrestore.bin.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
// Copyright © 2017,
|
|
2
|
+
// Copyright © 2017, 2024 IBM Corp. All rights reserved.
|
|
3
3
|
//
|
|
4
4
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
5
|
// you may not use this file except in compliance with the License.
|
|
@@ -49,9 +49,7 @@ try {
|
|
|
49
49
|
opts,
|
|
50
50
|
error.terminationCallback
|
|
51
51
|
).on('restored', function(obj) {
|
|
52
|
-
restoreBatchDebug('restored', obj.total);
|
|
53
|
-
}).on('error', function(e) {
|
|
54
|
-
restoreDebug('ERROR', e);
|
|
52
|
+
restoreBatchDebug('Restored batch ID:', obj.batch, 'Total document revisions restored:', obj.total, 'Time:', obj.time);
|
|
55
53
|
}).on('finished', function(obj) {
|
|
56
54
|
restoreDebug('finished', obj);
|
|
57
55
|
});
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
// Copyright © 2023 IBM Corp. All rights reserved.
|
|
2
|
+
//
|
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
// you may not use this file except in compliance with the License.
|
|
5
|
+
// You may obtain a copy of the License at
|
|
6
|
+
//
|
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
//
|
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
// See the License for the specific language governing permissions and
|
|
13
|
+
// limitations under the License.
|
|
14
|
+
'use strict';
|
|
15
|
+
|
|
16
|
+
const debug = require('debug')('couchbackup:alldocsgenerator');
|
|
17
|
+
const { BackupError } = require('./error.js');
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Async generator function for paginating _all_docs for shallow backups.
|
|
21
|
+
*
|
|
22
|
+
* @param {object} dbClient - object for connection to source database containing name, service and url
|
|
23
|
+
* @param {object} options - backup configuration
|
|
24
|
+
* @yields {object} a "done" type backup batch {command: d, batch: #, docs: [{_id: id, ...}, ...]}
|
|
25
|
+
*/
|
|
26
|
+
module.exports = async function * (dbClient, options = {}) {
|
|
27
|
+
let batch = 0;
|
|
28
|
+
let lastPage = false;
|
|
29
|
+
let startKey = null;
|
|
30
|
+
const opts = { db: dbClient.dbName, limit: options.bufferSize, includeDocs: true };
|
|
31
|
+
do {
|
|
32
|
+
if (startKey) opts.startKey = startKey;
|
|
33
|
+
yield dbClient.service.postAllDocs(opts).then(response => {
|
|
34
|
+
if (!(response.result && response.result.rows)) {
|
|
35
|
+
throw new BackupError('AllDocsError', 'Invalid all docs response');
|
|
36
|
+
}
|
|
37
|
+
debug(`Got page from start key '${startKey}'`);
|
|
38
|
+
const docs = response.result.rows;
|
|
39
|
+
debug(`Received ${docs.length} docs`);
|
|
40
|
+
lastPage = docs.length < opts.limit;
|
|
41
|
+
if (docs.length > 0) {
|
|
42
|
+
const lastKey = docs[docs.length - 1].id;
|
|
43
|
+
debug(`Received up to key ${lastKey}`);
|
|
44
|
+
// To avoid double fetching a document solely for the purposes of getting
|
|
45
|
+
// the next ID to use as a startKey for the next page we instead use the
|
|
46
|
+
// last ID of the current page and append the lowest unicode sort
|
|
47
|
+
// character.
|
|
48
|
+
startKey = `${lastKey}\0`;
|
|
49
|
+
}
|
|
50
|
+
return { command: 'd', batch: batch++, docs: docs.map(doc => { return doc.doc; }) };
|
|
51
|
+
});
|
|
52
|
+
} while (!lastPage);
|
|
53
|
+
};
|
package/includes/backup.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// Copyright © 2017,
|
|
1
|
+
// Copyright © 2017, 2024 IBM Corp. All rights reserved.
|
|
2
2
|
//
|
|
3
3
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
// you may not use this file except in compliance with the License.
|
|
@@ -13,269 +13,125 @@
|
|
|
13
13
|
// limitations under the License.
|
|
14
14
|
'use strict';
|
|
15
15
|
|
|
16
|
-
const
|
|
17
|
-
const
|
|
18
|
-
const
|
|
19
|
-
const
|
|
16
|
+
const { createWriteStream } = require('node:fs');
|
|
17
|
+
const { pipeline } = require('node:stream/promises');
|
|
18
|
+
const { Backup } = require('./backupMappings.js');
|
|
19
|
+
const { BackupError } = require('./error.js');
|
|
20
|
+
const logFileSummary = require('./logfilesummary.js');
|
|
21
|
+
const logFileGetBatches = require('./logfilegetbatches.js');
|
|
20
22
|
const spoolchanges = require('./spoolchanges.js');
|
|
21
|
-
const
|
|
22
|
-
const
|
|
23
|
+
const { MappingStream, WritableWithPassThrough, DelegateWritable } = require('./transforms.js');
|
|
24
|
+
const allDocsGenerator = require('./allDocsGenerator.js');
|
|
23
25
|
|
|
24
26
|
/**
|
|
25
|
-
*
|
|
27
|
+
* Validate /_bulk_get support for a specified database.
|
|
26
28
|
*
|
|
27
|
-
* @param {
|
|
28
|
-
* @param {number} blocksize - number of documents to download in single request
|
|
29
|
-
* @param {number} parallelism - number of concurrent downloads
|
|
30
|
-
* @param {string} log - path to log file to use
|
|
31
|
-
* @param {boolean} resume - whether to resume from an existing log file
|
|
32
|
-
* @returns EventEmitter with following events:
|
|
33
|
-
* - `received` - called with a block of documents to write to backup
|
|
34
|
-
* - `error` - on error
|
|
35
|
-
* - `finished` - when backup process is finished (either complete or errored)
|
|
29
|
+
* @param {object} dbClient - object for connection to source database containing name, service and url
|
|
36
30
|
*/
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
// pick up from existing log file from previous run
|
|
45
|
-
downloadRemainingBatches(options.log, db, ee, start, batchesPerDownloadSession, options.parallelism);
|
|
31
|
+
async function validateBulkGetSupport(dbClient) {
|
|
32
|
+
try {
|
|
33
|
+
await dbClient.service.postBulkGet({ db: dbClient.dbName, docs: [] });
|
|
34
|
+
} catch (err) {
|
|
35
|
+
// if _bulk_get isn't available throw a special error
|
|
36
|
+
if (err.status === 404) {
|
|
37
|
+
throw new BackupError('BulkGetError', 'Database does not support /_bulk_get endpoint');
|
|
46
38
|
} else {
|
|
47
|
-
|
|
48
|
-
spoolchanges(db, options.log, options.bufferSize, ee, function(err) {
|
|
49
|
-
if (err) {
|
|
50
|
-
ee.emit('error', err);
|
|
51
|
-
} else {
|
|
52
|
-
downloadRemainingBatches(options.log, db, ee, start, batchesPerDownloadSession, options.parallelism);
|
|
53
|
-
}
|
|
54
|
-
});
|
|
39
|
+
throw err;
|
|
55
40
|
}
|
|
56
41
|
}
|
|
57
|
-
|
|
58
|
-
validateBulkGetSupport(db, function(err) {
|
|
59
|
-
if (err) {
|
|
60
|
-
return ee.emit('error', err);
|
|
61
|
-
} else {
|
|
62
|
-
proceedWithBackup();
|
|
63
|
-
}
|
|
64
|
-
});
|
|
65
|
-
|
|
66
|
-
return ee;
|
|
67
|
-
};
|
|
68
|
-
|
|
69
|
-
/**
|
|
70
|
-
* Validate /_bulk_get support for a specified database.
|
|
71
|
-
*
|
|
72
|
-
* @param {string} db - nodejs-cloudant db
|
|
73
|
-
* @param {function} callback - called on completion with signature (err)
|
|
74
|
-
*/
|
|
75
|
-
function validateBulkGetSupport(db, callback) {
|
|
76
|
-
db.service.postBulkGet({ db: db.db, docs: [] }).then(() => { callback(); }).catch(err => {
|
|
77
|
-
err = error.convertResponseError(err, function(err) {
|
|
78
|
-
switch (err.status) {
|
|
79
|
-
case undefined:
|
|
80
|
-
// There was no status code on the error
|
|
81
|
-
return err;
|
|
82
|
-
case 404:
|
|
83
|
-
return new error.BackupError('BulkGetError', 'Database does not support /_bulk_get endpoint');
|
|
84
|
-
default:
|
|
85
|
-
return new error.HTTPError(err);
|
|
86
|
-
}
|
|
87
|
-
});
|
|
88
|
-
callback(err);
|
|
89
|
-
});
|
|
90
42
|
}
|
|
91
43
|
|
|
92
44
|
/**
|
|
93
|
-
*
|
|
94
|
-
* to avoid enqueueing too many in one go.
|
|
45
|
+
* Read documents from a database to be backed up.
|
|
95
46
|
*
|
|
96
|
-
* @param {
|
|
97
|
-
* @param {
|
|
98
|
-
* @param {
|
|
99
|
-
* @param {
|
|
100
|
-
* @
|
|
101
|
-
* download at a time. As batches contain many doc IDs, this helps avoid
|
|
102
|
-
* exhausting memory.
|
|
103
|
-
* @param {number} parallelism - number of concurrent downloads
|
|
104
|
-
* @returns function to call do download remaining batches with signature
|
|
105
|
-
* (err, {batches: batch, docs: doccount}) {@see spoolchanges}.
|
|
47
|
+
* @param {object} dbClient - object for connection to source database containing name, service and url
|
|
48
|
+
* @param {number} options - backup configuration
|
|
49
|
+
* @param {Writable} targetStream - destination for the backup contents
|
|
50
|
+
* @param {EventEmitter} ee - user facing event emitter
|
|
51
|
+
* @returns pipeline promise that resolves for a successful backup or rejects on failure
|
|
106
52
|
*/
|
|
107
|
-
function
|
|
108
|
-
|
|
109
|
-
let
|
|
110
|
-
|
|
111
|
-
//
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
done(err);
|
|
121
|
-
}
|
|
122
|
-
function processRetrievedBatches(err, batches) {
|
|
123
|
-
if (!err) {
|
|
124
|
-
// process them in parallelised queue
|
|
125
|
-
processBatchSet(db, parallelism, log, batches, ee, startTime, total, batchSetComplete);
|
|
126
|
-
} else {
|
|
127
|
-
batchSetComplete(err);
|
|
128
|
-
}
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
readBatchSetIdsFromLogFile(log, batchesPerDownloadSession, function(err, batchSetIds) {
|
|
132
|
-
if (err) {
|
|
133
|
-
ee.emit('error', err);
|
|
134
|
-
// Stop processing changes file for fatal errors
|
|
135
|
-
noRemainingBatches = true;
|
|
136
|
-
done();
|
|
53
|
+
module.exports = function(dbClient, options, targetStream, ee) {
|
|
54
|
+
const start = new Date().getTime(); // backup start time
|
|
55
|
+
let total = 0; // total documents backed up
|
|
56
|
+
|
|
57
|
+
// Full backups use _bulk_get, validate it is available, shallow skips that check
|
|
58
|
+
return (options.mode === 'full' ? validateBulkGetSupport(dbClient) : Promise.resolve())
|
|
59
|
+
// Check if the backup is new or resuming and configure the source
|
|
60
|
+
.then(async() => {
|
|
61
|
+
if (options.mode === 'shallow') {
|
|
62
|
+
// shallow backup, start from async _all_docs generator
|
|
63
|
+
return [
|
|
64
|
+
allDocsGenerator(dbClient, options)
|
|
65
|
+
];
|
|
137
66
|
} else {
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
67
|
+
// Full backup, we'll return a stream over a completed changes log file
|
|
68
|
+
if (!options.resume) {
|
|
69
|
+
// Not resuming, start spooling changes to create a log file
|
|
70
|
+
await spoolchanges(dbClient, options.log, (backupBatch) => {
|
|
71
|
+
ee.emit('changes', backupBatch.batch);
|
|
72
|
+
}, options.bufferSize);
|
|
141
73
|
}
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
async.doUntil(downloadSingleBatchSet, isFinished, onComplete);
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
/**
|
|
158
|
-
* Return a set of uncompleted download batch IDs from the log file.
|
|
159
|
-
*
|
|
160
|
-
* @param {string} log - log file path
|
|
161
|
-
* @param {number} batchesPerDownloadSession - maximum IDs to return
|
|
162
|
-
* @param {function} callback - sign (err, batchSetIds array)
|
|
163
|
-
*/
|
|
164
|
-
function readBatchSetIdsFromLogFile(log, batchesPerDownloadSession, callback) {
|
|
165
|
-
logfilesummary(log, function processSummary(err, summary) {
|
|
166
|
-
if (!err) {
|
|
167
|
-
if (!summary.changesComplete) {
|
|
168
|
-
callback(new error.BackupError('IncompleteChangesInLogFile',
|
|
169
|
-
'WARNING: Changes did not finish spooling'));
|
|
170
|
-
return;
|
|
171
|
-
}
|
|
172
|
-
if (Object.keys(summary.batches).length === 0) {
|
|
173
|
-
return callback(null, []);
|
|
74
|
+
// At this point we should be changes complete because spooling has finished
|
|
75
|
+
// or because we resumed a backup that had already completed spooling (and
|
|
76
|
+
// potentially already downloaded some batches)
|
|
77
|
+
// Get the log file summary to validate changes complete and obtain the
|
|
78
|
+
// [remaining] batch numbers to backup
|
|
79
|
+
const summary = await logFileSummary(options.log);
|
|
80
|
+
if (!summary.changesComplete) {
|
|
81
|
+
// We must only backup if changes finished spooling
|
|
82
|
+
throw new BackupError('IncompleteChangesInLogFile',
|
|
83
|
+
'WARNING: Changes did not finish spooling, a backup can only be resumed if changes finished spooling. Start a new backup.');
|
|
84
|
+
}
|
|
85
|
+
return logFileGetBatches(options.log, summary.batches);
|
|
174
86
|
}
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
* of batches
|
|
198
|
-
* @param {any} callback - completion callback, (err, {total: number}).
|
|
199
|
-
*/
|
|
200
|
-
function processBatchSet(db, parallelism, log, batches, ee, start, grandtotal, callback) {
|
|
201
|
-
let hasErrored = false;
|
|
202
|
-
let total = grandtotal;
|
|
203
|
-
|
|
204
|
-
// queue to process the fetch requests in an orderly fashion using _bulk_get
|
|
205
|
-
const q = async.queue(function(payload, done) {
|
|
206
|
-
const output = [];
|
|
207
|
-
const thisBatch = payload.batch;
|
|
208
|
-
delete payload.batch;
|
|
209
|
-
delete payload.command;
|
|
210
|
-
|
|
211
|
-
function logCompletedBatch(batch) {
|
|
212
|
-
if (log) {
|
|
213
|
-
fs.appendFile(log, ':d batch' + thisBatch + '\n', done);
|
|
87
|
+
})
|
|
88
|
+
// Create a pipeline of the source streams and the backup mappings
|
|
89
|
+
.then((srcStreams) => {
|
|
90
|
+
const backup = new Backup(dbClient);
|
|
91
|
+
const postWrite = (backupBatch) => {
|
|
92
|
+
total += backupBatch.docs.length;
|
|
93
|
+
const totalRunningTimeSec = (new Date().getTime() - start) / 1000;
|
|
94
|
+
ee.emit('written', { total, time: totalRunningTimeSec, batch: backupBatch.batch });
|
|
95
|
+
};
|
|
96
|
+
|
|
97
|
+
const destinationStreams = [];
|
|
98
|
+
if (options.mode === 'shallow') {
|
|
99
|
+
// shallow mode writes only to backup file
|
|
100
|
+
destinationStreams.push(
|
|
101
|
+
new DelegateWritable(
|
|
102
|
+
'backup', // Name for debug
|
|
103
|
+
targetStream, // backup file
|
|
104
|
+
null, // no last chunk to write
|
|
105
|
+
backup.backupBatchToBackupFileLine, // map the backup batch to a string for the backup file
|
|
106
|
+
postWrite // post write function emits the written event
|
|
107
|
+
) // DelegateWritable writes the log file done lines
|
|
108
|
+
);
|
|
214
109
|
} else {
|
|
215
|
-
|
|
110
|
+
// full mode needs to fetch spooled changes and writes a backup file then finally a log file
|
|
111
|
+
destinationStreams.push(...[
|
|
112
|
+
new MappingStream(backup.pendingToFetched, options.parallelism), // fetch the batches at the configured concurrency
|
|
113
|
+
new WritableWithPassThrough(
|
|
114
|
+
'backup', // name for logging
|
|
115
|
+
targetStream, // backup file
|
|
116
|
+
null, // no need to write a last chunk
|
|
117
|
+
backup.backupBatchToBackupFileLine // map the backup batch to a string for the backup file
|
|
118
|
+
), // WritableWithPassThrough writes the fetched docs to the backup file and passes on the result metadata
|
|
119
|
+
new DelegateWritable(
|
|
120
|
+
'logFileDoneWriter', // Name for debug
|
|
121
|
+
createWriteStream(options.log, { flags: 'a' }), // log file for appending
|
|
122
|
+
null, // no last chunk to write
|
|
123
|
+
backup.backupBatchToLogFileLine, // Map the backed up batch result to a log file "done" line
|
|
124
|
+
postWrite // post write function emits the written event
|
|
125
|
+
) // DelegateWritable writes the log file done lines
|
|
126
|
+
]);
|
|
216
127
|
}
|
|
217
|
-
}
|
|
218
128
|
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
response.result.results.forEach(function(d) {
|
|
227
|
-
if (d.docs) {
|
|
228
|
-
d.docs.forEach(function(doc) {
|
|
229
|
-
if (doc.ok) {
|
|
230
|
-
output.push(doc.ok);
|
|
231
|
-
}
|
|
232
|
-
});
|
|
233
|
-
}
|
|
234
|
-
});
|
|
235
|
-
total += output.length;
|
|
236
|
-
const t = (new Date().getTime() - start) / 1000;
|
|
237
|
-
ee.emit('received', {
|
|
238
|
-
batch: thisBatch,
|
|
239
|
-
data: output,
|
|
240
|
-
length: output.length,
|
|
241
|
-
time: t,
|
|
242
|
-
total: total
|
|
243
|
-
}, q, logCompletedBatch);
|
|
244
|
-
}).catch(err => {
|
|
245
|
-
if (!hasErrored) {
|
|
246
|
-
hasErrored = true;
|
|
247
|
-
err = error.convertResponseError(err);
|
|
248
|
-
// Kill the queue for fatal errors
|
|
249
|
-
q.kill();
|
|
250
|
-
ee.emit('error', err);
|
|
251
|
-
}
|
|
252
|
-
done();
|
|
129
|
+
return pipeline(
|
|
130
|
+
...srcStreams, // the source streams from the previous block (all docs async generator for shallow or for full either spool changes or resumed log)
|
|
131
|
+
...destinationStreams // the appropriate destination streams for the mode
|
|
132
|
+
);
|
|
133
|
+
})
|
|
134
|
+
.then(() => {
|
|
135
|
+
return { total };
|
|
253
136
|
});
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
for (const i in batches) {
|
|
257
|
-
q.push(batches[i]);
|
|
258
|
-
}
|
|
259
|
-
|
|
260
|
-
q.drain(function() {
|
|
261
|
-
callback(null, { total: total });
|
|
262
|
-
});
|
|
263
|
-
}
|
|
264
|
-
|
|
265
|
-
/**
|
|
266
|
-
* Returns first N properties on an object.
|
|
267
|
-
*
|
|
268
|
-
* @param {object} obj - object with properties
|
|
269
|
-
* @param {number} count - number of properties to return
|
|
270
|
-
*/
|
|
271
|
-
function getPropertyNames(obj, count) {
|
|
272
|
-
// decide which batch numbers to deal with
|
|
273
|
-
const batchestofetch = [];
|
|
274
|
-
let j = 0;
|
|
275
|
-
for (const i in obj) {
|
|
276
|
-
batchestofetch.push(parseInt(i));
|
|
277
|
-
j++;
|
|
278
|
-
if (j >= count) break;
|
|
279
|
-
}
|
|
280
|
-
return batchestofetch;
|
|
281
|
-
}
|
|
137
|
+
};
|