npm - @cloudant/couchbackup - Versions diffs - 2.9.17 → 2.10.0-206 - Mend

@cloudant/couchbackup 2.9.17 → 2.10.0-206

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/README.md +10 -0
package/app.js +253 -268
package/bin/couchbackup.bin.js +1 -3
package/bin/couchrestore.bin.js +2 -4
package/includes/allDocsGenerator.js +53 -0
package/includes/backup.js +103 -247
package/includes/backupMappings.js +260 -0
package/includes/config.js +10 -9
package/includes/error.js +42 -44
package/includes/liner.js +134 -23
package/includes/logfilegetbatches.js +25 -60
package/includes/logfilesummary.js +41 -71
package/includes/parser.js +3 -3
package/includes/request.js +95 -106
package/includes/restore.js +45 -14
package/includes/restoreMappings.js +141 -0
package/includes/spoolchanges.js +57 -79
package/includes/transforms.js +378 -0
package/package.json +5 -8
package/includes/change.js +0 -41
package/includes/shallowbackup.js +0 -80
package/includes/writer.js +0 -164

package/includes/backupMappings.js ADDED Viewed

@@ -0,0 +1,260 @@
+// Copyright © 2017, 2023 IBM Corp. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+'use strict';
+const debug = require('debug');
+const mappingDebug = debug('couchbackup:mappings');
+class LogMapper {
+  logMetadataRegex = /^(:(?:[td]\s+batch\d+|changes_complete))\s*/;
+  logCommandRegex = /^:([td]|changes_complete)/;
+  logBatchRegex = /batch(\d+)/;
+  /**
+   * Function for splitting log file lines into summary and content sections.
+   *
+   * @param {string} logFileLine
+   * @returns {string[]} a max 2 element array, first element metadata, second element content
+   */
+  splitLogFileLine(logFileLine) {
+    if (logFileLine && logFileLine[0] === ':') {
+      // Allow up to 3 parts:
+      // 1. an empty string from the line start (will be discarded)
+      // 2. the capturing group from the split (the command/batch metadata)
+      // 3. any remaining content
+      const splitLine = logFileLine.split(this.logMetadataRegex, 3);
+      // First part of the split is an empty string because we split
+      // at the start of the line, so throw that out.
+      splitLine.shift();
+      return splitLine;
+    }
+    mappingDebug('Ignoring log file line does not start with :.');
+    return [];
+  }
+  /**
+   * Function to extract the command from the start of a log file line.
+   *
+   * @param {string} logLineMetadata the start of a log file line
+   * @returns command or null
+   */
+  getCommandFromMetadata(logLineMetadata) {
+    // extract command type
+    const commandMatches = logLineMetadata.match(this.logCommandRegex);
+    if (commandMatches) {
+      const command = commandMatches[1];
+      return command;
+    }
+    mappingDebug('Log line had no command.');
+    return null;
+  }
+  /**
+   * Function to extract the batch number from the start of a log file line.
+   *
+   * @param {string} logLineMetadata the start of a log file line
+   * @returns batch number or null
+   */
+  getBatchFromMetadata(logLineMetadata) {
+    // extract batch number
+    const batchMatches = logLineMetadata.match(this.logBatchRegex);
+    if (batchMatches) {
+      const batch = parseInt(batchMatches[1]);
+      return batch;
+    }
+    mappingDebug('Log line had no batch number.');
+    return null;
+  }
+  /**
+   * Function to parse the start of a log file line string into
+   * a backup batch object for the command and batch.
+   *
+   * @param {string} logLineMetadata
+   * @returns object with command, command and batch, or null
+   */
+  parseLogMetadata(logLineMetadata) {
+    const metadata = {};
+    mappingDebug(`Parsing log metadata ${logLineMetadata}`);
+    metadata.command = this.getCommandFromMetadata(logLineMetadata);
+    if (metadata.command) {
+      switch (metadata.command) {
+        case 't':
+        case 'd':
+          metadata.batch = this.getBatchFromMetadata(logLineMetadata);
+          if (metadata.batch === null) {
+            // For t and d we should have a batch, if not the line is broken
+            // reset the command
+            metadata.command = null;
+          } else {
+            mappingDebug(`Log file line for batch ${metadata.batch} with command ${metadata.command}.`);
+          }
+          break;
+        case 'changes_complete':
+          mappingDebug(`Log file line for command ${metadata.command}.`);
+          break;
+        default:
+          mappingDebug(`Unknown command ${metadata.command} in log file`);
+          break;
+      }
+    }
+    return metadata;
+  }
+  /**
+   * Function to handle parsing a log file line from a liner object.
+   *
+   * @param {object} logFileLine Liner object {lineNumber: #, line: '...data...'}
+   * @param {boolean} metadataOnly whether to process only the metadata
+   * @returns a batch object with optional batch number and docs property as determined by metadataOnly
+   * or the specific command content {command: t|d|changes_complete, batch: #, docs: [{id: id, ...}]}
+   */
+  handleLogLine(logFileLine, metadataOnly = false) {
+    mappingDebug(`Parsing line ${logFileLine.lineNumber}`);
+    let metadata = {};
+    const backupBatch = { command: null, batch: null, docs: [] };
+    // Split the line into command/batch metadata and remaining contents
+    const splitLogLine = this.splitLogFileLine(logFileLine.line);
+    if (splitLogLine.length >= 1) {
+      metadata = this.parseLogMetadata(splitLogLine[0]);
+      // type 't' entries have doc IDs to parse
+      if (!metadataOnly && metadata.command === 't' && splitLogLine.length === 2) {
+        const logFileContentJson = splitLogLine[1];
+        try {
+          backupBatch.docs = JSON.parse(logFileContentJson);
+          mappingDebug(`Parsed ${backupBatch.docs.length} doc IDs from log file line ${logFileLine.lineNumber} for batch ${metadata.batch}.`);
+        } catch (err) {
+          mappingDebug(`Ignoring parsing error ${err}`);
+          // Line is broken, discard metadata
+          metadata = {};
+        }
+      }
+    } else {
+      mappingDebug(`Ignoring empty or unknown line ${logFileLine.lineNumber} in log file.`);
+    }
+    return { ...backupBatch, ...metadata };
+  }
+  /**
+   *
+   * This is used to create a batch completeness log without
+   * needing to parse all the document ID information.
+   *
+   * @param {object} logFileLine Liner object {lineNumber: #, line: '...data...'}
+   * @returns {object} a batch object {command: t|d|changes_complete, batch: #, docs: [{id: id, ...}]}
+   */
+  logLineToMetadata = (logFileLine) => {
+    return this.handleLogLine(logFileLine, true);
+  };
+  /**
+   * Mapper for converting log file lines to batch objects.
+   *
+   * @param {object} logFileLine Liner object {lineNumber: #, line: '...data...'}
+   * @returns {object} a batch object {command: t|d|changes_complete, batch: #, docs: [{id: id, ...}]}
+   */
+  logLineToBackupBatch = (logFileLine) => {
+    return this.handleLogLine(logFileLine);
+  };
+}
+class Backup {
+  constructor(dbClient) {
+    this.dbClient = dbClient;
+  }
+  /**
+ * Mapper for converting a backup batch to a backup file line
+ *
+ * @param {object} backupBatch a backup batch object {command: d, batch: #, docs: [{_id: id, ...}, ...]}
+ * @returns {string} JSON string for the backup file
+ */
+  backupBatchToBackupFileLine = (backupBatch) => {
+    mappingDebug(`Stringifying batch ${backupBatch.batch} with ${backupBatch.docs.length} docs.`);
+    return JSON.stringify(backupBatch.docs) + '\n';
+  };
+  /**
+ * Mapper for converting a backup batch to a log file line
+ *
+ * @param {object} backupBatch a backup batch object {command: d, batch: #, docs: [{_id: id, ...}, ...]}
+ * @returns {string} log file batch done line
+ */
+  backupBatchToLogFileLine = (backupBatch) => {
+    mappingDebug(`Preparing log batch completion line for batch ${backupBatch.batch}.`);
+    return `:d batch${backupBatch.batch}\n`;
+  };
+  /**
+   * Mapper for converting a type t "to do" backup batch object (docs IDs to fetch)
+   * to a type d "done" backup batch object with the retrieved docs.
+   *
+   * @param {object} backupBatch  {command: t, batch: #, docs: [{id: id}, ...]}
+   * @returns {object} a backup batch object {command: d, batch: #, docs: [{_id: id, ...}, ...]}
+   */
+  pendingToFetched = async(backupBatch) => {
+    mappingDebug(`Fetching batch ${backupBatch.batch}.`);
+    try {
+      const response = await this.dbClient.service.postBulkGet({
+        db: this.dbClient.dbName,
+        revs: true,
+        docs: backupBatch.docs
+      });
+      mappingDebug(`Good server response for batch ${backupBatch.batch}.`);
+      // create an output array with the docs returned
+      // Bulk get response "results" array is of objects {id: "id", docs: [...]}
+      // Since "docs" is an array too we use a flatMap
+      const documentRevisions = response.result.results.flatMap(entry => {
+        // for each entry in "results" we map the "docs" array
+        if (entry.docs) {
+          // Map the "docs" array entries to the document revision inside the "ok" property
+          return entry.docs.map((doc) => {
+            if (doc.ok) {
+              // This is the fetched document revision
+              return doc.ok;
+            }
+            if (doc.error) {
+              // This type of error was ignored previously so just debug for now.
+              mappingDebug(`Error ${doc.error.error} for ${doc.error.id} in batch ${backupBatch.batch}.`);
+            }
+            return null;
+          }).filter((doc) => {
+            // Filter out any entries that didn't have a document revision
+            return doc || false;
+          });
+        }
+        // Fallback to an empty array that will add nothing to the fetched docs array
+        return [];
+      });
+      mappingDebug(`Server returned ${documentRevisions.length} document revisions for batch ${backupBatch.batch}.`);
+      return {
+        command: 'd',
+        batch: backupBatch.batch,
+        docs: documentRevisions
+      };
+    } catch (err) {
+      mappingDebug(`Error response from server for batch ${backupBatch.batch}.`);
+      throw err;
+    }
+  };
+}
+module.exports = {
+  Backup,
+  LogMapper
+};

package/includes/config.js CHANGED Viewed

@@ -1,4 +1,4 @@
-// Copyright © 2017, 2021 IBM Corp. All rights reserved.
+// Copyright © 2017, 2023 IBM Corp. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -13,8 +13,9 @@
 // limitations under the License.
 'use strict';
-const path = require('path');
-const tmp = require('tmp');
+const { mkdtempSync } = require('node:fs');
+const { tmpdir } = require('node:os');
+const { join, normalize } = require('node:path');
 /**
   Return API default settings.
@@ -24,7 +25,7 @@ function apiDefaults() {
     parallelism: 5,
     bufferSize: 500,
     requestTimeout: 120000,
-    log: tmp.tmpNameSync(),
+    log: join(mkdtempSync(join(tmpdir(), 'couchbackup-')), `${Date.now()}`),
     resume: false,
     mode: 'full'
   };
@@ -77,7 +78,7 @@ function applyEnvironmentVariables(opts) {
   // if we have a specified log file
   if (typeof process.env.COUCH_LOG !== 'undefined') {
-    opts.log = path.normalize(process.env.COUCH_LOG);
+    opts.log = normalize(process.env.COUCH_LOG);
   }
   // if we are instructed to resume
@@ -87,7 +88,7 @@ function applyEnvironmentVariables(opts) {
   // if we are given an output filename
   if (typeof process.env.COUCH_OUTPUT !== 'undefined') {
-    opts.output = path.normalize(process.env.COUCH_OUTPUT);
+    opts.output = normalize(process.env.COUCH_OUTPUT);
   }
   // if we only want a shallow copy
@@ -112,7 +113,7 @@ function applyEnvironmentVariables(opts) {
 }
 module.exports = {
-  apiDefaults: apiDefaults,
-  cliDefaults: cliDefaults,
-  applyEnvironmentVariables: applyEnvironmentVariables
+  apiDefaults,
+  cliDefaults,
+  applyEnvironmentVariables
 };

package/includes/error.js CHANGED Viewed

@@ -1,4 +1,4 @@
-// Copyright © 2017, 2021 IBM Corp. All rights reserved.
+// Copyright © 2017, 2023 IBM Corp. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -24,6 +24,7 @@ const codes = {
   NoLogFileName: 20,
   LogDoesNotExist: 21,
   IncompleteChangesInLogFile: 22,
+  LogFileExists: 23,
   SpoolChangesError: 30,
   HTTPFatalError: 40,
   BulkGetError: 50
@@ -36,6 +37,12 @@ class BackupError extends Error {
   }
 }
+class OptionError extends BackupError {
+  constructor(message) {
+    super('InvalidOption', message);
+  }
+}
 class HTTPError extends BackupError {
   constructor(responseError, name) {
     // Special case some names for more useful error messages
@@ -53,56 +60,47 @@ class HTTPError extends BackupError {
   }
 }
-// Default function to return an error for HTTP status codes
-// < 400 -> OK
-// 4XX (except 429) -> Fatal
-// 429 & >=500 -> Transient
-function checkResponse(err) {
-  if (err) {
-    // Construct an HTTPError if there is request information on the error
-    // Codes < 400 are considered OK
-    if (err.status >= 400) {
-      return new HTTPError(err);
-    } else {
-      // Send it back again if there was no status code, e.g. a cxn error
-      return augmentMessage(err);
-    }
-  }
-}
-function convertResponseError(responseError, errorFactory) {
-  if (!errorFactory) {
-    errorFactory = checkResponse;
-  }
-  return errorFactory(responseError);
-}
-function augmentMessage(err) {
-  // For errors that don't have a status code, we are likely looking at a cxn
-  // error.
-  // Try to augment the message with more detail (core puts the code in statusText)
-  if (err && err.statusText) {
-    err.message = `${err.message} ${err.statusText}`;
-  }
-  if (err && err.description) {
-    err.message = `${err.message} ${err.description}`;
-  }
-  return err;
-}
-function wrapPossibleInvalidUrlError(err) {
-  if (err.code === 'ERR_INVALID_URL') {
+/**
+ * A function for converting between error types and improving error messages.
+ *
+ * Cases:
+ * - BackupError - return as is.
+ * - response "like" errors - convert to HTTPError.
+ * - ERR_INVALID_URL - convert to OptionError.
+ * - Error (general case) - augment with additional statusText
+ *   or description if available.
+ *
+ * @param {Error} e
+ * @returns {Error} the modified error
+ */
+function convertError(e) {
+  if (e instanceof BackupError) {
+    // If it's already a BackupError just pass it on
+    return e;
+  } else if (e && e.status && e.status >= 400) {
+    return new HTTPError(e);
+  } else if (e.code === 'ERR_INVALID_URL') {
     // Wrap ERR_INVALID_URL in our own InvalidOption
-    return new BackupError('InvalidOption', err.message);
+    return new OptionError(e.message);
+  } else {
+    // For errors that don't have a status code, we are likely looking at a cxn
+    // error.
+    // Try to augment the message with more detail (core puts the code in statusText)
+    if (e && e.statusText) {
+      e.message = `${e.message} ${e.statusText}`;
+    }
+    if (e && e.description) {
+      e.message = `${e.message} ${e.description}`;
+    }
+    return e;
   }
-  return err;
 }
 module.exports = {
   BackupError,
+  OptionError,
   HTTPError,
-  wrapPossibleInvalidUrlError,
-  convertResponseError,
+  convertError,
   terminationCallback: function terminationCallback(err, data) {
     if (err) {
       console.error(`ERROR: ${err.message}`);

package/includes/liner.js CHANGED Viewed

@@ -1,4 +1,4 @@
-// Copyright © 2017 IBM Corp. All rights reserved.
+// Copyright © 2017, 2024 IBM Corp. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -13,34 +13,145 @@
 // limitations under the License.
 'use strict';
-// stolen from http://strongloop.com/strongblog/practical-examples-of-the-new-node-js-streams-api/
-const stream = require('stream');
+const { createInterface } = require('node:readline');
+const { PassThrough, Duplex } = require('node:stream');
+const debug = require('debug');
-module.exports = function() {
-  const liner = new stream.Transform({ objectMode: true });
+/**
+ * A Duplex stream that converts the input stream to a stream
+ * of line objects using the built-in readline interface.
+ *
+ * The new stream line objects have the form
+ * {lineNumber: #, line: content}
+ *
+ * Note that it uses the `line` event and not `for await...of`
+ * for performance reasons. See Node Readline module docs for
+ * details.
+ */
+class Liner extends Duplex {
+  // Configure logging
+  log = debug(('couchbackup:liner'));
+  // Flag for whether the readline interface is running
+  isRunning = true;
+  // Line number state
+  lineNumber = 0;
+  // Buffer of processed lines
+  lines = [];
+  // Stream of bytes that will be processed to lines.
+  inStream = new PassThrough({ objectMode: false })
+    // if there is an error destroy this Duplex with it
+    .on('error', e => this.destroy(e));
-  liner._transform = function(chunk, encoding, done) {
-    let data = chunk.toString();
-    if (this._lastLineData) {
-      data = this._lastLineData + data;
-    }
+  constructor() {
+    // Configuration of this Duplex:
+    // objectMode: false on the writable input (file chunks), true on the readable output (line objects)
+    // The readableHighWaterMark controls the number of lines buffered after this implementation calls
+    // "push". Backup lines are potentially large (default 500 documents - i.e. potentially MBs). Since
+    // there is additional buffering downstream and file processing is faster than the network ops
+    // we don't bottleneck here even without a large buffer.
+    super({ readableObjectMode: true, readableHighWaterMark: 0, writableObjectMode: false });
+    // Built-in readline interface over the inStream
+    this.readlineInterface = createInterface({
+      input: this.inStream, // the writable side of Liner, passed through
+      terminal: false, // expect to read from files
+      crlfDelay: Infinity // couchbackup files should only use "/n" EOL, but allow for all "/r/n" to be single EOL
+    }).on('line', (line) => {
+      // Wrap the line in the object format and store it an array waiting to be pushed
+      // when downstream is ready to receive.
+      const bufferedLines = this.lines.push(this.wrapLine(line));
+      this.log(`Liner processed line ${this.lineNumber}. Buffered lines available: ${bufferedLines}.`);
+      this.pushAvailable();
+    }).on('close', () => {
+      this.log('Liner readline interface closed.');
+      // Push null onto our lines buffer to signal EOF to downstream consumers.
+      this.lines.push(null);
+      this.pushAvailable();
+    });
+  }
-    const lines = data.split('\n');
-    this._lastLineData = lines.splice(lines.length - 1, 1)[0];
+  /**
+   * Helper function to wrap a line in the object format that Liner
+   * pushes to downstream consumers.
+   *
+   * @param {string} line
+   * @returns {object} {"lineNumber: #, line"}
+   */
+  wrapLine(line) {
+    // For each line wrapped, increment the line number
+    return { lineNumber: ++this.lineNumber, line };
+  }
-    for (const i in lines) {
-      this.push(lines[i]);
+  /**
+   * Function that pushes any available lines downstream.
+   */
+  pushAvailable() {
+    // Check readline is running flag and whether there is content to push.
+    while (this.isRunning && this.lines.length > 0) {
+      if (!this.push(this.lines.shift())) {
+        // Push returned false, this indicates downstream back-pressure.
+        // Pause the readline interface to stop pushing more lines downstream.
+        // Resumption is triggered by downstream calling _read which happens
+        // when it is ready for more data.
+        this.log(`Liner pausing after back-pressure from push. Buffered lines available: ${this.lines.length}.`);
+        this.isRunning = false;
+        this.readlineInterface.pause();
+        break;
+      } else {
+        this.log(`Liner pushed. Buffered lines available: ${this.lines.length}.`);
+      }
     }
-    done();
-  };
+  }
-  liner._flush = function(done) {
-    if (this._lastLineData) {
-      this.push(this._lastLineData);
+  /**
+   * Implementation of the Readable side of the Duplex.
+   *
+   *
+   * @param {number} size - ignored as the Readable side is objectMode: true
+   */
+  _read(size) {
+    // As per the Readable contract if read has been called it won't be called
+    // again until after there has been a call to push.
+    // As part of flow control if we are not running we must resume when read
+    // is called to ensure that pushes are able to happen (and thereby trigger)
+    // subsequent reads.
+    if (!this.isRunning) {
+      this.log('Liner resuming after read.');
+      this.isRunning = true;
+      this.readlineInterface.resume();
     }
-    this._lastLineData = null;
-    done();
-  };
+    this.pushAvailable();
+  }
+  /**
+   * Implementation for the Writable side of the Duplex.
+   * Delegates to the inStream PassThrough.
+   *
+   * @param {*} chunk
+   * @param {string} encoding
+   * @param {function} callback
+   */
+  _write(chunk, encoding, callback) {
+    // Note that the passed callback function controls flow from upstream.
+    // When the readable side is paused by downstream the inStream buffer
+    // will fill and then the callback will be delayed until that buffer
+    // is drained by the readline interface starting up again.
+    this.inStream.write(chunk, encoding, callback);
+  }
+  /**
+   * Cleanup after the last write to the Duplex.
+   *
+   * @param {function} callback
+   */
+  _final(callback) {
+    this.log('Finalizing liner.');
+    // Nothing more will be written, end our inStream which will
+    // cause the readLineInterface to emit 'close' and signal EOF
+    // to our readers after the line buffer is emptied.
+    this.inStream.end(callback);
+  }
+}
-  return liner;
+module.exports = {
+  Liner
 };