@cloudant/couchbackup 2.9.17 → 2.10.0-206

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,260 @@
1
+ // Copyright © 2017, 2023 IBM Corp. All rights reserved.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ 'use strict';
15
+
16
+ const debug = require('debug');
17
+
18
+ const mappingDebug = debug('couchbackup:mappings');
19
+
20
+ class LogMapper {
21
+ logMetadataRegex = /^(:(?:[td]\s+batch\d+|changes_complete))\s*/;
22
+ logCommandRegex = /^:([td]|changes_complete)/;
23
+ logBatchRegex = /batch(\d+)/;
24
+
25
+ /**
26
+ * Function for splitting log file lines into summary and content sections.
27
+ *
28
+ * @param {string} logFileLine
29
+ * @returns {string[]} a max 2 element array, first element metadata, second element content
30
+ */
31
+ splitLogFileLine(logFileLine) {
32
+ if (logFileLine && logFileLine[0] === ':') {
33
+ // Allow up to 3 parts:
34
+ // 1. an empty string from the line start (will be discarded)
35
+ // 2. the capturing group from the split (the command/batch metadata)
36
+ // 3. any remaining content
37
+ const splitLine = logFileLine.split(this.logMetadataRegex, 3);
38
+ // First part of the split is an empty string because we split
39
+ // at the start of the line, so throw that out.
40
+ splitLine.shift();
41
+ return splitLine;
42
+ }
43
+ mappingDebug('Ignoring log file line does not start with :.');
44
+ return [];
45
+ }
46
+
47
+ /**
48
+ * Function to extract the command from the start of a log file line.
49
+ *
50
+ * @param {string} logLineMetadata the start of a log file line
51
+ * @returns command or null
52
+ */
53
+ getCommandFromMetadata(logLineMetadata) {
54
+ // extract command type
55
+ const commandMatches = logLineMetadata.match(this.logCommandRegex);
56
+ if (commandMatches) {
57
+ const command = commandMatches[1];
58
+ return command;
59
+ }
60
+ mappingDebug('Log line had no command.');
61
+ return null;
62
+ }
63
+
64
+ /**
65
+ * Function to extract the batch number from the start of a log file line.
66
+ *
67
+ * @param {string} logLineMetadata the start of a log file line
68
+ * @returns batch number or null
69
+ */
70
+ getBatchFromMetadata(logLineMetadata) {
71
+ // extract batch number
72
+ const batchMatches = logLineMetadata.match(this.logBatchRegex);
73
+ if (batchMatches) {
74
+ const batch = parseInt(batchMatches[1]);
75
+ return batch;
76
+ }
77
+ mappingDebug('Log line had no batch number.');
78
+ return null;
79
+ }
80
+
81
+ /**
82
+ * Function to parse the start of a log file line string into
83
+ * a backup batch object for the command and batch.
84
+ *
85
+ * @param {string} logLineMetadata
86
+ * @returns object with command, command and batch, or null
87
+ */
88
+ parseLogMetadata(logLineMetadata) {
89
+ const metadata = {};
90
+ mappingDebug(`Parsing log metadata ${logLineMetadata}`);
91
+ metadata.command = this.getCommandFromMetadata(logLineMetadata);
92
+ if (metadata.command) {
93
+ switch (metadata.command) {
94
+ case 't':
95
+ case 'd':
96
+ metadata.batch = this.getBatchFromMetadata(logLineMetadata);
97
+ if (metadata.batch === null) {
98
+ // For t and d we should have a batch, if not the line is broken
99
+ // reset the command
100
+ metadata.command = null;
101
+ } else {
102
+ mappingDebug(`Log file line for batch ${metadata.batch} with command ${metadata.command}.`);
103
+ }
104
+ break;
105
+ case 'changes_complete':
106
+ mappingDebug(`Log file line for command ${metadata.command}.`);
107
+ break;
108
+ default:
109
+ mappingDebug(`Unknown command ${metadata.command} in log file`);
110
+ break;
111
+ }
112
+ }
113
+ return metadata;
114
+ }
115
+
116
+ /**
117
+ * Function to handle parsing a log file line from a liner object.
118
+ *
119
+ * @param {object} logFileLine Liner object {lineNumber: #, line: '...data...'}
120
+ * @param {boolean} metadataOnly whether to process only the metadata
121
+ * @returns a batch object with optional batch number and docs property as determined by metadataOnly
122
+ * or the specific command content {command: t|d|changes_complete, batch: #, docs: [{id: id, ...}]}
123
+ */
124
+ handleLogLine(logFileLine, metadataOnly = false) {
125
+ mappingDebug(`Parsing line ${logFileLine.lineNumber}`);
126
+ let metadata = {};
127
+ const backupBatch = { command: null, batch: null, docs: [] };
128
+ // Split the line into command/batch metadata and remaining contents
129
+ const splitLogLine = this.splitLogFileLine(logFileLine.line);
130
+ if (splitLogLine.length >= 1) {
131
+ metadata = this.parseLogMetadata(splitLogLine[0]);
132
+ // type 't' entries have doc IDs to parse
133
+ if (!metadataOnly && metadata.command === 't' && splitLogLine.length === 2) {
134
+ const logFileContentJson = splitLogLine[1];
135
+ try {
136
+ backupBatch.docs = JSON.parse(logFileContentJson);
137
+ mappingDebug(`Parsed ${backupBatch.docs.length} doc IDs from log file line ${logFileLine.lineNumber} for batch ${metadata.batch}.`);
138
+ } catch (err) {
139
+ mappingDebug(`Ignoring parsing error ${err}`);
140
+ // Line is broken, discard metadata
141
+ metadata = {};
142
+ }
143
+ }
144
+ } else {
145
+ mappingDebug(`Ignoring empty or unknown line ${logFileLine.lineNumber} in log file.`);
146
+ }
147
+ return { ...backupBatch, ...metadata };
148
+ }
149
+
150
+ /**
151
+ *
152
+ * This is used to create a batch completeness log without
153
+ * needing to parse all the document ID information.
154
+ *
155
+ * @param {object} logFileLine Liner object {lineNumber: #, line: '...data...'}
156
+ * @returns {object} a batch object {command: t|d|changes_complete, batch: #, docs: [{id: id, ...}]}
157
+ */
158
+ logLineToMetadata = (logFileLine) => {
159
+ return this.handleLogLine(logFileLine, true);
160
+ };
161
+
162
+ /**
163
+ * Mapper for converting log file lines to batch objects.
164
+ *
165
+ * @param {object} logFileLine Liner object {lineNumber: #, line: '...data...'}
166
+ * @returns {object} a batch object {command: t|d|changes_complete, batch: #, docs: [{id: id, ...}]}
167
+ */
168
+ logLineToBackupBatch = (logFileLine) => {
169
+ return this.handleLogLine(logFileLine);
170
+ };
171
+ }
172
+
173
+ class Backup {
174
+ constructor(dbClient) {
175
+ this.dbClient = dbClient;
176
+ }
177
+
178
+ /**
179
+ * Mapper for converting a backup batch to a backup file line
180
+ *
181
+ * @param {object} backupBatch a backup batch object {command: d, batch: #, docs: [{_id: id, ...}, ...]}
182
+ * @returns {string} JSON string for the backup file
183
+ */
184
+ backupBatchToBackupFileLine = (backupBatch) => {
185
+ mappingDebug(`Stringifying batch ${backupBatch.batch} with ${backupBatch.docs.length} docs.`);
186
+ return JSON.stringify(backupBatch.docs) + '\n';
187
+ };
188
+
189
+ /**
190
+ * Mapper for converting a backup batch to a log file line
191
+ *
192
+ * @param {object} backupBatch a backup batch object {command: d, batch: #, docs: [{_id: id, ...}, ...]}
193
+ * @returns {string} log file batch done line
194
+ */
195
+ backupBatchToLogFileLine = (backupBatch) => {
196
+ mappingDebug(`Preparing log batch completion line for batch ${backupBatch.batch}.`);
197
+ return `:d batch${backupBatch.batch}\n`;
198
+ };
199
+
200
+ /**
201
+ * Mapper for converting a type t "to do" backup batch object (docs IDs to fetch)
202
+ * to a type d "done" backup batch object with the retrieved docs.
203
+ *
204
+ * @param {object} backupBatch {command: t, batch: #, docs: [{id: id}, ...]}
205
+ * @returns {object} a backup batch object {command: d, batch: #, docs: [{_id: id, ...}, ...]}
206
+ */
207
+ pendingToFetched = async(backupBatch) => {
208
+ mappingDebug(`Fetching batch ${backupBatch.batch}.`);
209
+ try {
210
+ const response = await this.dbClient.service.postBulkGet({
211
+ db: this.dbClient.dbName,
212
+ revs: true,
213
+ docs: backupBatch.docs
214
+ });
215
+
216
+ mappingDebug(`Good server response for batch ${backupBatch.batch}.`);
217
+ // create an output array with the docs returned
218
+ // Bulk get response "results" array is of objects {id: "id", docs: [...]}
219
+ // Since "docs" is an array too we use a flatMap
220
+ const documentRevisions = response.result.results.flatMap(entry => {
221
+ // for each entry in "results" we map the "docs" array
222
+ if (entry.docs) {
223
+ // Map the "docs" array entries to the document revision inside the "ok" property
224
+ return entry.docs.map((doc) => {
225
+ if (doc.ok) {
226
+ // This is the fetched document revision
227
+ return doc.ok;
228
+ }
229
+ if (doc.error) {
230
+ // This type of error was ignored previously so just debug for now.
231
+ mappingDebug(`Error ${doc.error.error} for ${doc.error.id} in batch ${backupBatch.batch}.`);
232
+ }
233
+ return null;
234
+ }).filter((doc) => {
235
+ // Filter out any entries that didn't have a document revision
236
+ return doc || false;
237
+ });
238
+ }
239
+ // Fallback to an empty array that will add nothing to the fetched docs array
240
+ return [];
241
+ });
242
+
243
+ mappingDebug(`Server returned ${documentRevisions.length} document revisions for batch ${backupBatch.batch}.`);
244
+
245
+ return {
246
+ command: 'd',
247
+ batch: backupBatch.batch,
248
+ docs: documentRevisions
249
+ };
250
+ } catch (err) {
251
+ mappingDebug(`Error response from server for batch ${backupBatch.batch}.`);
252
+ throw err;
253
+ }
254
+ };
255
+ }
256
+
257
+ module.exports = {
258
+ Backup,
259
+ LogMapper
260
+ };
@@ -1,4 +1,4 @@
1
- // Copyright © 2017, 2021 IBM Corp. All rights reserved.
1
+ // Copyright © 2017, 2023 IBM Corp. All rights reserved.
2
2
  //
3
3
  // Licensed under the Apache License, Version 2.0 (the "License");
4
4
  // you may not use this file except in compliance with the License.
@@ -13,8 +13,9 @@
13
13
  // limitations under the License.
14
14
  'use strict';
15
15
 
16
- const path = require('path');
17
- const tmp = require('tmp');
16
+ const { mkdtempSync } = require('node:fs');
17
+ const { tmpdir } = require('node:os');
18
+ const { join, normalize } = require('node:path');
18
19
 
19
20
  /**
20
21
  Return API default settings.
@@ -24,7 +25,7 @@ function apiDefaults() {
24
25
  parallelism: 5,
25
26
  bufferSize: 500,
26
27
  requestTimeout: 120000,
27
- log: tmp.tmpNameSync(),
28
+ log: join(mkdtempSync(join(tmpdir(), 'couchbackup-')), `${Date.now()}`),
28
29
  resume: false,
29
30
  mode: 'full'
30
31
  };
@@ -77,7 +78,7 @@ function applyEnvironmentVariables(opts) {
77
78
 
78
79
  // if we have a specified log file
79
80
  if (typeof process.env.COUCH_LOG !== 'undefined') {
80
- opts.log = path.normalize(process.env.COUCH_LOG);
81
+ opts.log = normalize(process.env.COUCH_LOG);
81
82
  }
82
83
 
83
84
  // if we are instructed to resume
@@ -87,7 +88,7 @@ function applyEnvironmentVariables(opts) {
87
88
 
88
89
  // if we are given an output filename
89
90
  if (typeof process.env.COUCH_OUTPUT !== 'undefined') {
90
- opts.output = path.normalize(process.env.COUCH_OUTPUT);
91
+ opts.output = normalize(process.env.COUCH_OUTPUT);
91
92
  }
92
93
 
93
94
  // if we only want a shallow copy
@@ -112,7 +113,7 @@ function applyEnvironmentVariables(opts) {
112
113
  }
113
114
 
114
115
  module.exports = {
115
- apiDefaults: apiDefaults,
116
- cliDefaults: cliDefaults,
117
- applyEnvironmentVariables: applyEnvironmentVariables
116
+ apiDefaults,
117
+ cliDefaults,
118
+ applyEnvironmentVariables
118
119
  };
package/includes/error.js CHANGED
@@ -1,4 +1,4 @@
1
- // Copyright © 2017, 2021 IBM Corp. All rights reserved.
1
+ // Copyright © 2017, 2023 IBM Corp. All rights reserved.
2
2
  //
3
3
  // Licensed under the Apache License, Version 2.0 (the "License");
4
4
  // you may not use this file except in compliance with the License.
@@ -24,6 +24,7 @@ const codes = {
24
24
  NoLogFileName: 20,
25
25
  LogDoesNotExist: 21,
26
26
  IncompleteChangesInLogFile: 22,
27
+ LogFileExists: 23,
27
28
  SpoolChangesError: 30,
28
29
  HTTPFatalError: 40,
29
30
  BulkGetError: 50
@@ -36,6 +37,12 @@ class BackupError extends Error {
36
37
  }
37
38
  }
38
39
 
40
+ class OptionError extends BackupError {
41
+ constructor(message) {
42
+ super('InvalidOption', message);
43
+ }
44
+ }
45
+
39
46
  class HTTPError extends BackupError {
40
47
  constructor(responseError, name) {
41
48
  // Special case some names for more useful error messages
@@ -53,56 +60,47 @@ class HTTPError extends BackupError {
53
60
  }
54
61
  }
55
62
 
56
- // Default function to return an error for HTTP status codes
57
- // < 400 -> OK
58
- // 4XX (except 429) -> Fatal
59
- // 429 & >=500 -> Transient
60
- function checkResponse(err) {
61
- if (err) {
62
- // Construct an HTTPError if there is request information on the error
63
- // Codes < 400 are considered OK
64
- if (err.status >= 400) {
65
- return new HTTPError(err);
66
- } else {
67
- // Send it back again if there was no status code, e.g. a cxn error
68
- return augmentMessage(err);
69
- }
70
- }
71
- }
72
-
73
- function convertResponseError(responseError, errorFactory) {
74
- if (!errorFactory) {
75
- errorFactory = checkResponse;
76
- }
77
- return errorFactory(responseError);
78
- }
79
-
80
- function augmentMessage(err) {
81
- // For errors that don't have a status code, we are likely looking at a cxn
82
- // error.
83
- // Try to augment the message with more detail (core puts the code in statusText)
84
- if (err && err.statusText) {
85
- err.message = `${err.message} ${err.statusText}`;
86
- }
87
- if (err && err.description) {
88
- err.message = `${err.message} ${err.description}`;
89
- }
90
- return err;
91
- }
92
-
93
- function wrapPossibleInvalidUrlError(err) {
94
- if (err.code === 'ERR_INVALID_URL') {
63
+ /**
64
+ * A function for converting between error types and improving error messages.
65
+ *
66
+ * Cases:
67
+ * - BackupError - return as is.
68
+ * - response "like" errors - convert to HTTPError.
69
+ * - ERR_INVALID_URL - convert to OptionError.
70
+ * - Error (general case) - augment with additional statusText
71
+ * or description if available.
72
+ *
73
+ * @param {Error} e
74
+ * @returns {Error} the modified error
75
+ */
76
+ function convertError(e) {
77
+ if (e instanceof BackupError) {
78
+ // If it's already a BackupError just pass it on
79
+ return e;
80
+ } else if (e && e.status && e.status >= 400) {
81
+ return new HTTPError(e);
82
+ } else if (e.code === 'ERR_INVALID_URL') {
95
83
  // Wrap ERR_INVALID_URL in our own InvalidOption
96
- return new BackupError('InvalidOption', err.message);
84
+ return new OptionError(e.message);
85
+ } else {
86
+ // For errors that don't have a status code, we are likely looking at a cxn
87
+ // error.
88
+ // Try to augment the message with more detail (core puts the code in statusText)
89
+ if (e && e.statusText) {
90
+ e.message = `${e.message} ${e.statusText}`;
91
+ }
92
+ if (e && e.description) {
93
+ e.message = `${e.message} ${e.description}`;
94
+ }
95
+ return e;
97
96
  }
98
- return err;
99
97
  }
100
98
 
101
99
  module.exports = {
102
100
  BackupError,
101
+ OptionError,
103
102
  HTTPError,
104
- wrapPossibleInvalidUrlError,
105
- convertResponseError,
103
+ convertError,
106
104
  terminationCallback: function terminationCallback(err, data) {
107
105
  if (err) {
108
106
  console.error(`ERROR: ${err.message}`);
package/includes/liner.js CHANGED
@@ -1,4 +1,4 @@
1
- // Copyright © 2017 IBM Corp. All rights reserved.
1
+ // Copyright © 2017, 2024 IBM Corp. All rights reserved.
2
2
  //
3
3
  // Licensed under the Apache License, Version 2.0 (the "License");
4
4
  // you may not use this file except in compliance with the License.
@@ -13,34 +13,145 @@
13
13
  // limitations under the License.
14
14
  'use strict';
15
15
 
16
- // stolen from http://strongloop.com/strongblog/practical-examples-of-the-new-node-js-streams-api/
17
- const stream = require('stream');
16
+ const { createInterface } = require('node:readline');
17
+ const { PassThrough, Duplex } = require('node:stream');
18
+ const debug = require('debug');
18
19
 
19
- module.exports = function() {
20
- const liner = new stream.Transform({ objectMode: true });
20
+ /**
21
+ * A Duplex stream that converts the input stream to a stream
22
+ * of line objects using the built-in readline interface.
23
+ *
24
+ * The new stream line objects have the form
25
+ * {lineNumber: #, line: content}
26
+ *
27
+ * Note that it uses the `line` event and not `for await...of`
28
+ * for performance reasons. See Node Readline module docs for
29
+ * details.
30
+ */
31
+ class Liner extends Duplex {
32
+ // Configure logging
33
+ log = debug(('couchbackup:liner'));
34
+ // Flag for whether the readline interface is running
35
+ isRunning = true;
36
+ // Line number state
37
+ lineNumber = 0;
38
+ // Buffer of processed lines
39
+ lines = [];
40
+ // Stream of bytes that will be processed to lines.
41
+ inStream = new PassThrough({ objectMode: false })
42
+ // if there is an error destroy this Duplex with it
43
+ .on('error', e => this.destroy(e));
21
44
 
22
- liner._transform = function(chunk, encoding, done) {
23
- let data = chunk.toString();
24
- if (this._lastLineData) {
25
- data = this._lastLineData + data;
26
- }
45
+ constructor() {
46
+ // Configuration of this Duplex:
47
+ // objectMode: false on the writable input (file chunks), true on the readable output (line objects)
48
+ // The readableHighWaterMark controls the number of lines buffered after this implementation calls
49
+ // "push". Backup lines are potentially large (default 500 documents - i.e. potentially MBs). Since
50
+ // there is additional buffering downstream and file processing is faster than the network ops
51
+ // we don't bottleneck here even without a large buffer.
52
+ super({ readableObjectMode: true, readableHighWaterMark: 0, writableObjectMode: false });
53
+ // Built-in readline interface over the inStream
54
+ this.readlineInterface = createInterface({
55
+ input: this.inStream, // the writable side of Liner, passed through
56
+ terminal: false, // expect to read from files
57
+ crlfDelay: Infinity // couchbackup files should only use "/n" EOL, but allow for all "/r/n" to be single EOL
58
+ }).on('line', (line) => {
59
+ // Wrap the line in the object format and store it an array waiting to be pushed
60
+ // when downstream is ready to receive.
61
+ const bufferedLines = this.lines.push(this.wrapLine(line));
62
+ this.log(`Liner processed line ${this.lineNumber}. Buffered lines available: ${bufferedLines}.`);
63
+ this.pushAvailable();
64
+ }).on('close', () => {
65
+ this.log('Liner readline interface closed.');
66
+ // Push null onto our lines buffer to signal EOF to downstream consumers.
67
+ this.lines.push(null);
68
+ this.pushAvailable();
69
+ });
70
+ }
27
71
 
28
- const lines = data.split('\n');
29
- this._lastLineData = lines.splice(lines.length - 1, 1)[0];
72
+ /**
73
+ * Helper function to wrap a line in the object format that Liner
74
+ * pushes to downstream consumers.
75
+ *
76
+ * @param {string} line
77
+ * @returns {object} {"lineNumber: #, line"}
78
+ */
79
+ wrapLine(line) {
80
+ // For each line wrapped, increment the line number
81
+ return { lineNumber: ++this.lineNumber, line };
82
+ }
30
83
 
31
- for (const i in lines) {
32
- this.push(lines[i]);
84
+ /**
85
+ * Function that pushes any available lines downstream.
86
+ */
87
+ pushAvailable() {
88
+ // Check readline is running flag and whether there is content to push.
89
+ while (this.isRunning && this.lines.length > 0) {
90
+ if (!this.push(this.lines.shift())) {
91
+ // Push returned false, this indicates downstream back-pressure.
92
+ // Pause the readline interface to stop pushing more lines downstream.
93
+ // Resumption is triggered by downstream calling _read which happens
94
+ // when it is ready for more data.
95
+ this.log(`Liner pausing after back-pressure from push. Buffered lines available: ${this.lines.length}.`);
96
+ this.isRunning = false;
97
+ this.readlineInterface.pause();
98
+ break;
99
+ } else {
100
+ this.log(`Liner pushed. Buffered lines available: ${this.lines.length}.`);
101
+ }
33
102
  }
34
- done();
35
- };
103
+ }
36
104
 
37
- liner._flush = function(done) {
38
- if (this._lastLineData) {
39
- this.push(this._lastLineData);
105
+ /**
106
+ * Implementation of the Readable side of the Duplex.
107
+ *
108
+ *
109
+ * @param {number} size - ignored as the Readable side is objectMode: true
110
+ */
111
+ _read(size) {
112
+ // As per the Readable contract if read has been called it won't be called
113
+ // again until after there has been a call to push.
114
+ // As part of flow control if we are not running we must resume when read
115
+ // is called to ensure that pushes are able to happen (and thereby trigger)
116
+ // subsequent reads.
117
+ if (!this.isRunning) {
118
+ this.log('Liner resuming after read.');
119
+ this.isRunning = true;
120
+ this.readlineInterface.resume();
40
121
  }
41
- this._lastLineData = null;
42
- done();
43
- };
122
+ this.pushAvailable();
123
+ }
124
+
125
+ /**
126
+ * Implementation for the Writable side of the Duplex.
127
+ * Delegates to the inStream PassThrough.
128
+ *
129
+ * @param {*} chunk
130
+ * @param {string} encoding
131
+ * @param {function} callback
132
+ */
133
+ _write(chunk, encoding, callback) {
134
+ // Note that the passed callback function controls flow from upstream.
135
+ // When the readable side is paused by downstream the inStream buffer
136
+ // will fill and then the callback will be delayed until that buffer
137
+ // is drained by the readline interface starting up again.
138
+ this.inStream.write(chunk, encoding, callback);
139
+ }
140
+
141
+ /**
142
+ * Cleanup after the last write to the Duplex.
143
+ *
144
+ * @param {function} callback
145
+ */
146
+ _final(callback) {
147
+ this.log('Finalizing liner.');
148
+ // Nothing more will be written, end our inStream which will
149
+ // cause the readLineInterface to emit 'close' and signal EOF
150
+ // to our readers after the line buffer is emptied.
151
+ this.inStream.end(callback);
152
+ }
153
+ }
44
154
 
45
- return liner;
155
+ module.exports = {
156
+ Liner
46
157
  };