RubyGems - cassback - Versions diffs - 0.1.3 → 0.1.4 - Mend

cassback 0.1.3 → 0.1.4

Files changed (32) hide show

checksums.yaml +4 -4
data/.gitignore +14 -0
data/.rubocop.yml_disabled +37 -0
data/Gemfile +8 -0
data/LICENSE +194 -0
data/README.md +103 -0
data/Rakefile.rb +8 -0
data/bin/cassback +33 -5
data/bin/console +14 -0
data/bin/setup +8 -0
data/cassback.gemspec +30 -0
data/conf/local.yml +18 -0
data/conf/preprod.yml +15 -0
data/conf/prod.yml +15 -0
data/lib/backuptool.rb +53 -2
data/lib/cassandra.rb +15 -0
data/lib/cassback/version.rb +3 -0
data/lib/hadoop.rb +4 -1
data/scripts/deploy.sh +3 -0
data/scripts/manualbackups/ansible.cfg +12 -0
data/scripts/manualbackups/inventory.txt +18 -0
data/scripts/manualbackups/play_book.sh +13 -0
data/scripts/manualbackups/playbooks/backups.yml +6 -0
data/scripts/manualbackups/roles/planb/files/backup.sh +27 -0
data/scripts/manualbackups/roles/planb/files/httpfs.sh +27 -0
data/scripts/manualbackups/roles/planb/files/krb5.conf +26 -0
data/scripts/manualbackups/roles/planb/tasks/main.yml +34 -0
data/scripts/pre-push +17 -0
data/test/cassandra_stub.rb +33 -0
data/test/hadoop_stub.rb +51 -0
data/test/test_backuptool.rb +180 -0
metadata +33 -5

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 616dce141979a8e5c187b6a7c18d9c560e292edf
-  data.tar.gz: 5a2646d54b746c0fd482d2b39f50b039b5c32069
+  metadata.gz: f2b9b6aed95f39752afe6c7df4d2e404d4041450
+  data.tar.gz: 07e9fe1a67dd830ce2f45fc56b37098dbdcb01e9
 SHA512:
-  metadata.gz: 7550cee21ef0fa042d1813e011c12148745b443f6fa726653fcd801a4d7f66443e863f92f3055088e11f933e33688d14d8b22de1ee851709e9e79275011cfcfa
-  data.tar.gz: a2de6cecc4ec52c9045a10d8762dbadf876f44c9426dbbdf4330c61d152db1cb694b7a3d6cf62cf5369218707bcf606f39bffcdeaa9573997ee816fb1939afe3
+  metadata.gz: 08080fa50589f745652230d2c5879406ac0fcf1f4cee3306c89ff963c18c1208fa430dca5cef1e7b22a4f06dcb80746a4d1619c6cc622dc7cd5763bcea082eed
+  data.tar.gz: b8196fe75585a33d1224fe6cd919a14b9fb90bef4dcc7ead9097f65ad9ffc6afa1b4a60b7a56408bf037f141ad5837f465c535fc0e0e173be162e3dc4a8230a9

data/.gitignore ADDED Viewed

@@ -0,0 +1,14 @@
+Gemfile.lock
+doc
+# IntelliJ specific
+.idea
+*.iml
+*.ipr
+*.iws
+#Log files
+*.log
+#Ruby gem files
+*.gem

data/.rubocop.yml_disabled ADDED Viewed

@@ -0,0 +1,37 @@
+# This configuration was made for rubocop >= 0.36.0
+### SRE Core configuration
+### (See also https://confluence.criteois.com/pages/viewpage.action?pageId=270467645)
+# Taken from Core's rules
+Metrics/LineLength:
+  Max: 120
+# Taken from Core's rules
+Style/AlignHash:
+  EnforcedColonStyle: table
+  EnforcedHashRocketStyle: table
+### SRE Storage configuration
+# We have french people's names lying around
+Style/AsciiComments:
+  Enabled: false
+# This wants snake_case file names and we have dashes everywhere
+Style/FileName:
+  Enabled: false
+# Use consistent style for hashes (do not indent far away when in parentheses, etc.)
+Style/IndentHash:
+  EnforcedStyle: consistent
+# Enforce trailing commas in literals for consistency, ease of edition, and code generation
+Style/TrailingCommaInLiteral:
+  EnforcedStyleForMultiline: comma
+## Temporary edits (that should be fixed before enabling them)
+# Messes things up for now
+Style/BracesAroundHashParameters:
+  Enabled: false
+# Badly implemented, and crashes in some cases
+Performance/Casecmp:
+  Enabled: false
+# We should have trailing commas only inside multiline statements
+# r.veznaver said this one will be fixed in rubocop
+Style/TrailingCommaInArguments:
+  Enabled: false

data/Gemfile ADDED Viewed

@@ -0,0 +1,8 @@
+source 'https://rubygems.org'
+ruby '2.2.2'
+gem 'webhdfs'
+gem 'gssapi'
+gem 'rubocop'
+gem 'table_print'
+gem 'rspec_junit_formatter'
+gem 'rubocop-junit-formatter'

data/LICENSE ADDED Viewed

@@ -0,0 +1,194 @@
+Apache License
+Version 2.0, January 2004
+http://www.apache.org/licenses/
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+1. Definitions.
+"License" shall mean the terms and conditions for use, reproduction, and
+distribution as defined by Sections 1 through 9 of this document.
+"Licensor" shall mean the copyright owner or entity authorized by the copyright
+owner that is granting the License.
+"Legal Entity" shall mean the union of the acting entity and all other entities
+that control, are controlled by, or are under common control with that entity.
+For the purposes of this definition, "control" means (i) the power, direct or
+indirect, to cause the direction or management of such entity, whether by
+contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the
+outstanding shares, or (iii) beneficial ownership of such entity.
+"You" (or "Your") shall mean an individual or Legal Entity exercising
+permissions granted by this License.
+"Source" form shall mean the preferred form for making modifications, including
+but not limited to software source code, documentation source, and configuration
+files.
+"Object" form shall mean any form resulting from mechanical transformation or
+translation of a Source form, including but not limited to compiled object code,
+generated documentation, and conversions to other media types.
+"Work" shall mean the work of authorship, whether in Source or Object form, made
+available under the License, as indicated by a copyright notice that is included
+in or attached to the work (an example is provided in the Appendix below).
+"Derivative Works" shall mean any work, whether in Source or Object form, that
+is based on (or derived from) the Work and for which the editorial revisions,
+annotations, elaborations, or other modifications represent, as a whole, an
+original work of authorship. For the purposes of this License, Derivative Works
+shall not include works that remain separable from, or merely link (or bind by
+name) to the interfaces of, the Work and Derivative Works thereof.
+"Contribution" shall mean any work of authorship, including the original version
+of the Work and any modifications or additions to that Work or Derivative Works
+thereof, that is intentionally submitted to Licensor for inclusion in the Work
+by the copyright owner or by an individual or Legal Entity authorized to submit
+on behalf of the copyright owner. For the purposes of this definition,
+"submitted" means any form of electronic, verbal, or written communication sent
+to the Licensor or its representatives, including but not limited to
+communication on electronic mailing lists, source code control systems, and
+issue tracking systems that are managed by, or on behalf of, the Licensor for
+the purpose of discussing and improving the Work, but excluding communication
+that is conspicuously marked or otherwise designated in writing by the copyright
+owner as "Not a Contribution."
+"Contributor" shall mean Licensor and any individual or Legal Entity on behalf
+of whom a Contribution has been received by Licensor and subsequently
+incorporated within the Work.
+2. Grant of Copyright License.
+Subject to the terms and conditions of this License, each Contributor hereby
+grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
+irrevocable copyright license to reproduce, prepare Derivative Works of,
+publicly display, publicly perform, sublicense, and distribute the Work and such
+Derivative Works in Source or Object form.
+3. Grant of Patent License.
+Subject to the terms and conditions of this License, each Contributor hereby
+grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
+irrevocable (except as stated in this section) patent license to make, have
+made, use, offer to sell, sell, import, and otherwise transfer the Work, where
+such license applies only to those patent claims licensable by such Contributor
+that are necessarily infringed by their Contribution(s) alone or by combination
+of their Contribution(s) with the Work to which such Contribution(s) was
+submitted. If You institute patent litigation against any entity (including a
+cross-claim or counterclaim in a lawsuit) alleging that the Work or a
+Contribution incorporated within the Work constitutes direct or contributory
+patent infringement, then any patent licenses granted to You under this License
+for that Work shall terminate as of the date such litigation is filed.
+4. Redistribution.
+You may reproduce and distribute copies of the Work or Derivative Works thereof
+in any medium, with or without modifications, and in Source or Object form,
+provided that You meet the following conditions:
+You must give any other recipients of the Work or Derivative Works a copy of
+this License; and
+You must cause any modified files to carry prominent notices stating that You
+changed the files; and
+You must retain, in the Source form of any Derivative Works that You distribute,
+all copyright, patent, trademark, and attribution notices from the Source form
+of the Work, excluding those notices that do not pertain to any part of the
+Derivative Works; and
+If the Work includes a "NOTICE" text file as part of its distribution, then any
+Derivative Works that You distribute must include a readable copy of the
+attribution notices contained within such NOTICE file, excluding those notices
+that do not pertain to any part of the Derivative Works, in at least one of the
+following places: within a NOTICE text file distributed as part of the
+Derivative Works; within the Source form or documentation, if provided along
+with the Derivative Works; or, within a display generated by the Derivative
+Works, if and wherever such third-party notices normally appear. The contents of
+the NOTICE file are for informational purposes only and do not modify the
+License. You may add Your own attribution notices within Derivative Works that
+You distribute, alongside or as an addendum to the NOTICE text from the Work,
+provided that such additional attribution notices cannot be construed as
+modifying the License.
+You may add Your own copyright statement to Your modifications and may provide
+additional or different license terms and conditions for use, reproduction, or
+distribution of Your modifications, or for any such Derivative Works as a whole,
+provided Your use, reproduction, and distribution of the Work otherwise complies
+with the conditions stated in this License.
+5. Submission of Contributions.
+Unless You explicitly state otherwise, any Contribution intentionally submitted
+for inclusion in the Work by You to the Licensor shall be under the terms and
+conditions of this License, without any additional terms or conditions.
+Notwithstanding the above, nothing herein shall supersede or modify the terms of
+any separate license agreement you may have executed with Licensor regarding
+such Contributions.
+6. Trademarks.
+This License does not grant permission to use the trade names, trademarks,
+service marks, or product names of the Licensor, except as required for
+reasonable and customary use in describing the origin of the Work and
+reproducing the content of the NOTICE file.
+7. Disclaimer of Warranty.
+Unless required by applicable law or agreed to in writing, Licensor provides the
+Work (and each Contributor provides its Contributions) on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied,
+including, without limitation, any warranties or conditions of TITLE,
+NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are
+solely responsible for determining the appropriateness of using or
+redistributing the Work and assume any risks associated with Your exercise of
+permissions under this License.
+8. Limitation of Liability.
+In no event and under no legal theory, whether in tort (including negligence),
+contract, or otherwise, unless required by applicable law (such as deliberate
+and grossly negligent acts) or agreed to in writing, shall any Contributor be
+liable to You for damages, including any direct, indirect, special, incidental,
+or consequential damages of any character arising as a result of this License or
+out of the use or inability to use the Work (including but not limited to
+damages for loss of goodwill, work stoppage, computer failure or malfunction, or
+any and all other commercial damages or losses), even if such Contributor has
+been advised of the possibility of such damages.
+9. Accepting Warranty or Additional Liability.
+While redistributing the Work or Derivative Works thereof, You may choose to
+offer, and charge a fee for, acceptance of support, warranty, indemnity, or
+other liability obligations and/or rights consistent with this License. However,
+in accepting such obligations, You may act only on Your own behalf and on Your
+sole responsibility, not on behalf of any other Contributor, and only if You
+agree to indemnify, defend, and hold each Contributor harmless for any liability
+incurred by, or claims asserted against, such Contributor by reason of your
+accepting any such warranty or additional liability.
+END OF TERMS AND CONDITIONS
+APPENDIX: How to apply the Apache License to your work
+To apply the Apache License to your work, attach the following boilerplate
+notice, with the fields enclosed by brackets "{}" replaced with your own
+identifying information. (Don't include the brackets!) The text should be
+enclosed in the appropriate comment syntax for the file format. We also
+recommend that a file or class name and description of purpose be included on
+the same "printed page" as the copyright notice for easier identification within
+third-party archives.
+   Copyright {yyyy} {name of copyright owner}
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+     http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

data/README.md ADDED Viewed

@@ -0,0 +1,103 @@
+# Cassback
+Welcome to your Cassback!
+This is a project that aims backup Cassandra SSTables and load them into HDFS for further usage.
+## Installation
+Build the application into a gem using the command
+    $ gem build cassback.gemspec
+You should the following output :
+      Successfully built RubyGem
+      Name: cassback
+      Version: 0.1.0
+      File: cassback-0.1.0.gem
+Install the application into your local gem store using the following command :
+    $ gem install cassback-0.1.0.gem
+You should then see the following output :
+    Successfully installed cassback-0.1.0
+    Parsing documentation for cassback-0.1.0
+    Done installing documentation for cassback after 0 seconds
+    1 gem installed
+## Usage
+When the cassback gem installed it adds the **cassback** executable file into your PATH variable.
+This means that you can execute it using one of the following commands and it will return example of usage :
+    cassback
+    cassback -h
+A simple command that you can use for starting a backup is :
+    cassback -S -C path_to_some_config_file.yml
+## Configuration
+The application has some default configuration defined.
+You can overwrite the default configuration using two meanings :
+1. Using a configuration file passed as parameter on the command line.
+2. Using individual configuration properties passed as parameters on the command line.
+The command line parameters have precedence over the configuration file.
+## Orchestration
+The tool is designed to do snapshots at **node level** (and not at **cluster level**) - basically it has to be installed
+on each node and a separate process will have to be executed from there to trigger a node level snapshot. Because this task is
+quite complex it is recommended to use an orchestration tool (like Rundeck) that allows you to execute same command
+on multiple machines and run the processes in parallel.
+After all node backups are finished the orchestration tool will have to take care of signaling other applications that
+the backup is completely finished. That is done now by adding a new empty file on the cluster metadata folder that has
+the format BACKUP_COMPLETED_yyyy_MM_dd. This has to be triggered only once by using the following command :
+    cassback -B [-d date] -C conf/path_to_some_config_file.yml
+Optionally you can also pass a date, if not present current day date will be assumed.
+## Data Integrity
+The project is using internally the webhdfs tool (see https://github.com/kzk/webhdfs)  that is a Ruby project
+built on top of the WebHDFS API (https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/WebHDFS.html).
+Because we're using the WebHDFS API we get for free data integrity. The tool is also configurable so in case errors it
+can retry the file download/upload of data. This is configurable via the following config file properties :
+1. **hadoop.retryTimes** - the number of retries the tool should do before giving up. Default set to 5.
+2. **hadoop.retryInterval** - the interval (in seconds) the tool should take between two attempts. Default set to 1 second.
+If you want to check more about Hadoop's checksum algorithm that ensures data integrity you can check the
+following link : https://www.safaribooksonline.com/library/view/hadoop-the-definitive/9781449328917/ch04.html
+## Cleanup policy
+Usually backups of databases take a lot of space. Even if we have optimized the code so the backups are done incrementally
+(meaning that a file is not stored twice even if it's present in multiple backups), still cleanup needs to be done.
+The tool has a cleanup policy of cleaning snapshots after some days have passed since the snapshot has been published.
+This is configurable via the **cleanup.retentionDays** property in the configuration file. One point is that cleanup is
+done at cluster level (for all nodes) since it doesn't make sense to keep data for only some of the nodes.
+The command for triggering a cleanup is :
+    cassback -A -C conf/path_to_some_config_file.yml
+# Unit tests
+Unit tests can be executed locally by running the following command :
+    rake test
+## Contributing
+For now this is an internal Criteo project, but were aiming for making it open source and publishing to GitHub.
+Issue reports and merge requests are welcome on Criteo's GitLab at : https://gitlab.criteois.com/ruby-gems/cassback

data/Rakefile.rb ADDED Viewed

@@ -0,0 +1,8 @@
+require 'bundler/gem_tasks'
+require 'rake/testtask'
+Rake::TestTask.new do |t|
+  t.libs << 'test'
+  t.test_files = FileList['test/test*.rb']
+  t.verbose = true
+end

data/bin/cassback CHANGED Viewed

@@ -30,6 +30,7 @@ command_line_config = {
   'cassandra' => {},
   'hadoop'    => {},
   'restore'   => {},
+  'cleanup'   => {},
 }
 # Default options
@@ -38,13 +39,19 @@ options = {
     'config' => '/etc/cassandra/conf/cassandra.yaml',
   },
   'hadoop'    => {
-    'hostname'  => 'localhost',
-    'port'      => 14_000,
-    'directory' => 'cassandra',
+    'hostname'      => 'localhost',
+    'port'          => 14_000,
+    'directory'     => 'cassandra',
+    'retryTimes'    => 5,
+    'retryInterval' => 1,
   },
   'restore'   => {
     'destination' => 'cassandra',
   },
+  'cleanup'   => {
+    'retentionDays' => 30,
+  },
 }
 # If no argument given in command line, print the help
@@ -52,7 +59,7 @@ ARGV << '-h' if ARGV.empty?
 # Parse command line options
 parser = OptionParser.new do |opts|
-  opts.banner = 'Usage: cassback.rb [options]'
+  opts.banner = 'Usage: cassback [options]'
   opts.separator ''
   opts.separator 'Configuration:'
@@ -74,6 +81,13 @@ parser = OptionParser.new do |opts|
   opts.on('-F', '--flush', 'removes a backuped snapshot from Hadoop, needs a date') do |_v|
     action = 'delete'
   end
+  opts.on('-B', '--backupFlag', 'creates an empty file to signal that the backup has finished, can be used with a date, \
+    today date is assumed if no date is provided') do |_v|
+    action = 'backupFlag'
+  end
+  opts.on('-A', '--cleanup', 'cleans up old snapshots') do |_v|
+    action = 'cleanup'
+  end
   opts.separator ''
   opts.separator 'Action related:'
@@ -133,7 +147,9 @@ end
 begin
   # Create the Hadoop object
-  hadoop = Hadoop.new(host: options['hadoop']['hostname'], port: options['hadoop']['port'], base_dir: options['hadoop']['directory'])
+  hadoop = Hadoop.new(host: options['hadoop']['hostname'], port: options['hadoop']['port'],
+  base_dir: options['hadoop']['directory'], retry_times: options['hadoop']['retryTimes'],
+  retry_interval: options['hadoop']['retryInterval'])
   #  Create the Cassandra object
   cassandra = Cassandra.new(options['cassandra']['config'], logger)
@@ -161,6 +177,18 @@ begin
   elsif action == 'delete'
     raise('No date given') unless options.include? 'date'
     bck.delete_snapshots(node: options['node'], date: options['date'])
+  # Create backup flag.
+  elsif action == 'backupFlag'
+    # Use today's date if no date has been provided
+    date = options['date']
+    date ||= Time.new.strftime('%Y_%m_%d')
+    bck.create_backup_flag(date)
+  # Cleanup old snapshots based on cleanup.retentionDays
+  elsif action == 'cleanup'
+    days = options['cleanup']['retentionDays'].to_i
+    bck.cleanup(days)
   end
 #  In case of failure

data/bin/console ADDED Viewed

@@ -0,0 +1,14 @@
+#!/usr/bin/env ruby
+require 'bundler/setup'
+require 'cassback'
+# You can add fixtures and/or initialization code here to make experimenting
+# with your gem easier. You can also use a different console, if you like.
+# (If you use this, don't forget to add pry to your Gemfile!)
+# require "pry"
+# Pry.start
+require 'irb'
+IRB.start

data/bin/setup ADDED Viewed

@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+set -euo pipefail
+IFS=$'\n\t'
+set -vx
+bundle install
+# Do any other automated setup that you need to do here

data/cassback.gemspec ADDED Viewed

@@ -0,0 +1,30 @@
+# coding: utf-8
+lib = File.expand_path('../lib', __FILE__)
+$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
+require 'cassback/version'
+Gem::Specification.new do |spec|
+  spec.name          = 'cassback'
+  spec.version       = Cassback::VERSION
+  spec.authors       = ['Vincent Van Hollebeke', 'Bogdan Niculescu']
+  spec.email         = ['v.vanhollebeke@criteo.com', 'b.niculescu@criteo.com']
+  spec.summary       = 'Cassandra backup to HDFS.'
+  spec.description   = 'This is a tool that allows creating backups of Cassandra and pushing them into HDFS.'
+  spec.homepage      = 'http://rubygems.org/gems/cassback'
+  spec.licenses = ['Apache-2.0']
+  spec.files         = `git ls-files`.split("\n")
+  spec.test_files    = `git ls-files -- test/*`.split("\n")
+  spec.bindir        = 'bin'
+  spec.executables << 'cassback'
+  spec.require_paths = ['lib']
+  spec.add_development_dependency 'bundler', '~> 1.11'
+  spec.add_development_dependency 'rake', '~> 10.0'
+  spec.add_runtime_dependency 'gssapi', '~> 1.2', '>= 1.2.0'
+  spec.add_runtime_dependency 'webhdfs', '~> 0.8', '>= 0.8.0'
+  spec.add_runtime_dependency 'table_print', '~> 1.5', '>= 1.5.6'
+end

data/conf/local.yml ADDED Viewed

@@ -0,0 +1,18 @@
+cassandra:
+#  config: "/etc/cassandra/conf/cassandra.yaml"
+  config: "/Users/b.niculescu/Tools/apache-cassandra-2.0.16/conf/cassandra.yaml"
+hadoop:
+#  hostname: "10.60.34.217"
+  hostname: "jobs-user.hpc.criteo.prod"
+  port: 14000
+#  directory: "/user/v.vanhollebeke/cassandra"
+  directory: "/tmp/b.niculescu/cassandra"
+  retryTimes : 3
+  retryInterval : 1
+restore:
+  destination: "cassback_restore"
+cleanup:
+  retentionDays: 30

data/conf/preprod.yml ADDED Viewed

@@ -0,0 +1,15 @@
+cassandra:
+  config: "/etc/cassandra/conf/cassandra.yaml"
+hadoop:
+  hostname: "jobs-user.hpc.criteo.preprod"
+  port: 14000
+  directory: "/tmp/cassandraback/preprod/"
+  retryTimes : 5
+  retryInterval : 1
+restore:
+  destination: "cassback_restore"
+cleanup:
+  retentionDays: 30

data/conf/prod.yml ADDED Viewed

@@ -0,0 +1,15 @@
+cassandra:
+  config: "/etc/cassandra/conf/cassandra.yaml"
+hadoop:
+  hostname: "jobs-user.hpc.criteo.prod"
+  port: 14000
+  directory: "/tmp/cassandrabackups/prod/"
+  retryTimes : 5
+  retryInterval : 1
+restore:
+  destination: "cassback_restore"
+cleanup:
+  retentionDays: 30

data/lib/backuptool.rb CHANGED Viewed

@@ -40,7 +40,8 @@ class BackupTool
       begin
         if date == 'ALL'
           ls = @hadoop.list("#{@hadoop.base_dir}/#{@metadir}/#{@cassandra.cluster_name}/#{node}")
-          ls.each do |item|
+          ls_metadata = ls.select { |item| item['pathSuffix'].include? 'cass_snap_' }
+          ls_metadata.each do |item|
             date = item['pathSuffix'].gsub('cass_snap_', '')
             metadata = get_snapshot_metadata(node, date)
             snapshot = CassandraSnapshot.new(@cassandra.cluster_name, node, date, metadata)
@@ -60,7 +61,8 @@ class BackupTool
     if node == 'ALL'
       begin
         ls = @hadoop.list("#{@hadoop.base_dir}/#{@metadir}/#{@cassandra.cluster_name}")
-        ls.each do |item|
+        ls_nodes = ls.select { |item| item['type'].casecmp('DIRECTORY') == 0 }
+        ls_nodes.each do |item|
           n = item['pathSuffix']
           result += get_snapshots_node(n, date)
         end
@@ -141,6 +143,55 @@ class BackupTool
     end
   end
+  # Cleans up backups that are older than a number of days.
+  # This functions cleans data on all nodes.
+  def cleanup(days)
+    retention_date = Date.today - days
+    @logger.info("Cleaning backup data on all nodes before #{retention_date}.")
+    all_snapshots = search_snapshots
+    @logger.info("A total of #{all_snapshots.size} snapshots were found on Hadoop server.")
+    snapshots_to_be_deleted = all_snapshots.select { |snapshot| snapshot.get_date < retention_date }
+    @logger.info("A total of #{snapshots_to_be_deleted.size} snapshots will be deleted.")
+    snapshots_to_be_deleted.each do |snapshot|
+      delete_snapshots(node: snapshot.node, date: snapshot.date)
+    end
+    all_backup_flags = get_backup_flags
+    @logger.info("A total of #{all_backup_flags.size} back up flags were found on Hadoop server.")
+    backup_flags_to_be_delete = all_backup_flags.select { |flag| flag.date < retention_date }
+    @logger.info("A total of #{backup_flags_to_be_delete.size} backup flags will be deleted.")
+    backup_flags_location = @hadoop.base_dir + '/' + @metadir + '/' + @cassandra.cluster_name
+    backup_flags_to_be_delete.each do |flag|
+      file = backup_flags_location + '/' + flag.file
+      @logger.info("Deleting #{file}")
+      @hadoop.delete(file)
+    end
+  end
+  # Method that creates a backup flag to signal that the backup is finished on all nodes
+  # This is an individual command that has to be called manually after snapshots have finished
+  def create_backup_flag(date)
+    file_name = 'BACKUP_COMPLETED_' + date
+    remote_file = @hadoop.base_dir + '/' + @metadir + '/' + @cassandra.cluster_name + '/' + file_name
+    @logger.info('Setting backup completed flag : ' + remote_file)
+    @hadoop.create(remote_file, '', overwrite: true)
+  end
+  def get_backup_flags
+    backup_flags_location = @hadoop.base_dir + '/' + @metadir + '/' + @cassandra.cluster_name
+    ls = @hadoop.list(backup_flags_location)
+    backup_flags = ls.select { |item| item['pathSuffix'].include? 'BACKUP_COMPLETED_' }
+    backup_flags.collect do |file|
+      BackupFlag.new(@cassandra.cluster_name, file['pathSuffix'])
+    end
+  end
   # Download a file from HDFS, buffered way
   # * *Args*    :
   #   - +remote+ -> HDFS path

data/lib/cassandra.rb CHANGED Viewed

@@ -150,4 +150,19 @@ class CassandraSnapshot
     d = @date <=> other.date
     c * 3 + n * 2 + d
   end
+  def get_date
+    DateTime.strptime(@date, '%Y_%m_%d')
+  end
+end
+class BackupFlag
+  attr_reader :cluster, :date, :file
+  def initialize(cluster, file)
+    @cluster = cluster
+    @file = file.dup
+    date_as_string = file.sub! 'BACKUP_COMPLETED_', ''
+    @date = DateTime.strptime(date_as_string, '%Y_%m_%d')
+  end
 end

data/lib/cassback/version.rb ADDED Viewed

@@ -0,0 +1,3 @@
+module Cassback
+  VERSION = '0.1.4'.freeze
+end

data/lib/hadoop.rb CHANGED Viewed

@@ -6,9 +6,12 @@ WebHDFS::ClientV1::REDIRECTED_OPERATIONS.delete('OPEN')
 class Hadoop < WebHDFS::Client
   attr_reader :base_dir
-  def initialize(host: 'localhost', port: 14_000, base_dir: '/')
+  def initialize(host: 'localhost', port: 14_000, base_dir: '/', retry_times: 5, retry_interval: 1)
     super(host = host, port = port)
     @kerberos = true
     @base_dir = base_dir
+    @retry_known_errors = true
+    @retry_times = retry_times
+    @retry_interval = retry_interval
   end
 end

data/scripts/deploy.sh ADDED Viewed

@@ -0,0 +1,3 @@
+#!/bin/bash
+while [ 1 = 1 ]; do inotifywait .;scp -r . cstars01e01-par.storage.criteo.preprod:cassback2;scp -r . cstars01e02-par.storage.criteo.preprod:cassback2;done

data/scripts/manualbackups/ansible.cfg ADDED Viewed

@@ -0,0 +1,12 @@
+[defaults]
+host_key_checking=false
+record_host_keys=false
+remote_tmp=/tmp/.ansible/tmp
+forks=128
+roles_path=roles
+library=library
+[ssh_connection]
+control_path=%(directory)s/%%h-%%r
+pipelining=True
+scp_if_ssh=True

data/scripts/manualbackups/inventory.txt ADDED Viewed

@@ -0,0 +1,18 @@
+[cstars02-par]
+cstars02e01-par ansible_ssh_host="cstars02e01-par.storage.criteo.prod"
+cstars02e02-par ansible_ssh_host="cstars02e02-par.storage.criteo.prod"
+cstars02e03-par ansible_ssh_host="cstars02e03-par.storage.criteo.prod"
+cstars02e04-par ansible_ssh_host="cstars02e04-par.storage.criteo.prod"
+cstars02e05-par ansible_ssh_host="cstars02e05-par.storage.criteo.prod"
+cstars02e06-par ansible_ssh_host="cstars02e06-par.storage.criteo.prod"
+cstars02e07-par ansible_ssh_host="cstars02e07-par.storage.criteo.prod"
+cstars02e08-par ansible_ssh_host="cstars02e08-par.storage.criteo.prod"
+cstars02e09-par ansible_ssh_host="cstars02e09-par.storage.criteo.prod"
+cstars02e10-par ansible_ssh_host="cstars02e10-par.storage.criteo.prod"
+cstars02e11-par ansible_ssh_host="cstars02e11-par.storage.criteo.prod"
+cstars02e12-par ansible_ssh_host="cstars02e12-par.storage.criteo.prod"
+cstars02e13-par ansible_ssh_host="cstars02e13-par.storage.criteo.prod"
+cstars02e14-par ansible_ssh_host="cstars02e14-par.storage.criteo.prod"
+cstars02e15-par ansible_ssh_host="cstars02e15-par.storage.criteo.prod"
+cstars02e16-par ansible_ssh_host="cstars02e16-par.storage.criteo.prod"
+cstars02e17-par ansible_ssh_host="cstars02e17-par.storage.criteo.prod"

data/scripts/manualbackups/play_book.sh ADDED Viewed

@@ -0,0 +1,13 @@
+#!/bin/bash
+PLAYBOOK=$1
+if [ "$PLAYBOOK" = "" ]; then
+	echo "Usage: $0 <playbook> [ansible options]"
+	exit 65
+fi
+shift
+ansible-playbook --inventory-file=inventory.txt playbooks/$PLAYBOOK.yml --extra-vars $*
+exit $?

data/scripts/manualbackups/playbooks/backups.yml ADDED Viewed

@@ -0,0 +1,6 @@
+---
+- gather_facts: no
+  hosts: cstars02-par
+  roles:
+    - role: planb

data/scripts/manualbackups/roles/planb/files/backup.sh ADDED Viewed

@@ -0,0 +1,27 @@
+#!/bin/bash
+kinit v.vanhollebeke@CRITEOIS.LAN -k -t ~/keytab
+date=`date +%Y_%m_%d`
+nodetool clearsnapshot
+snapdir=$(nodetool snapshot| grep directory| awk '{print $NF}')
+echo "Snapshot is $snapdir"
+for dir in $(find /var/opt/cassandra/data -type d |grep snapshots/$snapdir); do
+    kok=$(klist -l|grep v.vanhollebeke@CRITEOIS.LAN|grep -v Expired|wc -l)
+    if [ $kok == 0 ]; then
+        echo "Must renew Kerberos ticket"
+        kinit v.vanhollebeke@CRITEOIS.LAN -k -t ~/keytab
+    else
+        echo "Kerberos ticket OK"
+    fi
+    keyspace=`echo $dir|awk -F\/ '{print $6}'`
+    table=`echo $dir|awk -F\/ '{print $7}'`
+    echo "Saving $keyspace $table"
+    ./httpfs.sh /var/opt/cassandra/data/$keyspace/$table/snapshots/$snapdir tmp/cassandrabackups/prod/cstars02/$date/$HOSTNAME/$table
+done
+echo "FINISHED !!!!"

data/scripts/manualbackups/roles/planb/files/httpfs.sh ADDED Viewed

@@ -0,0 +1,27 @@
+#!/bin/sh
+BASE='http://0.httpfs.hpc.criteo.prod:14000/webhdfs/v1'
+#BASE='http://httpfs.pa4.hpc.criteo.prod:14000'
+IN=$1
+OUT=$2
+echo "Creating destination directory: $OUT"
+curl --negotiate -u : "$BASE/$OUT?op=MKDIRS&permission=0777" -X PUT -s > /dev/null
+for p in $(find $IN -type f)
+do
+    f=$(basename $p)
+    echo "$IN/$f"
+    # Create file
+    dest=$(curl --negotiate -u : "$BASE/$OUT/$f?op=CREATE&overwrite=true&permission=0777" -i -X PUT -s | grep Location | tail -n1 | cut -d\  -f2 | tr -d '\r\n')
+    [ $? != 0 ] && echo "ERROR"
+    echo "DEST IS ${dest}"
+    # Upload file
+    curl --negotiate -u : "$dest" -i -X PUT -T "$IN/$f" -H 'Content-Type: application/octet-stream' > /dev/null
+    [ $? != 0 ] && echo "ERROR"
+done

data/scripts/manualbackups/roles/planb/files/krb5.conf ADDED Viewed

@@ -0,0 +1,26 @@
+[libdefaults]
+  dns_lookup_realm = true
+  dns_lookup_kdc = true
+  ticket_lifetime = 24h
+  renew_lifetime = 7d
+  forwardable = true
+  default_realm = CRITEOIS.LAN
+  udp_preference_limit = 1
+  realm_try_domains = 1
+  permitted_enctypes = aes128-cts-hmac-sha1-96 des3-cbc-sha1 arcfour-hmac
+  default_tkt_enctypes = aes128-cts-hmac-sha1-96 des3-cbc-sha1 arcfour-hmac
+[domain_realm]
+  .hpc.criteo.preprod = HPC.CRITEO.PREPROD
+  .hpc.criteo.prod = AMS.HPC.CRITEO.PROD
+  .pa4.hpc.criteo.prod = PA4.HPC.CRITEO.PROD
+  .as.hpc.criteo.prod = AS.HPC.CRITEO.PROD
+  .na.hpc.criteo.prod = NA.HPC.CRITEO.PROD
+  .cn.hpc.criteo.prod = CN.HPC.CRITEO.PROD
+[capaths]
+  CRITEOIS.LAN = {
+    AMS.HPC.CRITEO.PROD = .
+    PA4.HPC.CRITEO.PROD = AMS.HPC.CRITEO.PROD
+    AS.HPC.CRITEO.PROD = AMS.HPC.CRITEO.PROD
+    NA.HPC.CRITEO.PROD = AMS.HPC.CRITEO.PROD
+    CN.HPC.CRITEO.PROD = AMS.HPC.CRITEO.PROD
+  }

data/scripts/manualbackups/roles/planb/tasks/main.yml ADDED Viewed

@@ -0,0 +1,34 @@
+---
+- name: Copy krb5.conf into /etc
+  copy: src=krb5.conf dest=/etc/krb5.conf
+  sudo: yes
+  tags: keytab
+- name: Copy my keytab
+  copy: src=keytab dest=~/keytab
+  tags: keytab
+- name: Check if keytab works
+  command: kinit $USER@CRITEOIS.LAN -k -t ~/keytab
+  tags: keytab
+- name: Copy httpfs.sh script
+  copy: src=httpfs.sh dest=~/httpfs.sh mode=750
+  tags: backup
+- name: Copy backup.sh script
+  copy: src=backup.sh dest=~/backup.sh mode=750
+  tags: backup
+- name: Start Backup
+  shell: ./backup.sh >logfile 2>&1 chdir=~
+  tags: backup
+- name: Clear snapshots
+  shell: sudo nodetool clearsnapshot
+  tags: clear
+- name: Verify if snapshots are REALLY deleted
+  shell: "[ $(find /var/opt/cassandra -type d |grep snap|wc -l) == 0 ]"
+  tags: verify

data/scripts/pre-push ADDED Viewed

@@ -0,0 +1,17 @@
+#!/bin/bash
+echo "Running rubocop with auto-correct" >&2
+bundle exec rubocop --config .rubocop.yml --auto-correct --out /dev/null
+modified=$(git status | grep modified | wc -l)
+if [ $modified -eq 0 ]; then
+    echo -e "\e[1;32mNothing to correct, pushing\e[0m" >&2
+    exit 0
+else
+    s=''
+    if [ $modified -gt 1 ]; then
+        s='s'
+    fi
+    echo -e "\e[1;31m$modified file$s were modified, please add commit before pushing\e[0m" >&2
+    exit 1
+fi

data/test/cassandra_stub.rb ADDED Viewed

@@ -0,0 +1,33 @@
+#!/usr/bin/env ruby
+require_relative '../lib/cassandra'
+# Stub implementation that simulates cassandra backups.
+class CassandraStub
+  attr_reader :data_path, :cluster_name, :node_name
+  def initialize(cluster_name = 'cluster1', node_name = 'node1', date = '', file_indexes = [])
+    @cluster_name = cluster_name
+    @node_name = node_name
+    @date = date
+    @data_path = 'test/cassandra' + '/' + cluster_name + '/' + node_name + '/'
+    FileUtils.mkdir_p(@data_path)
+    # create some fake sstables
+    @metadata = Set.new
+    file_indexes.each do |index|
+      file_name = "SSTable-#{index}-Data.db"
+      file_path = @data_path + '/' + file_name
+      File.open(file_path, 'w') { |file| file.write('This is a test file that simulates an SSTable') }
+      @metadata.add(file_name)
+    end
+  end
+  def new_snapshot
+    # simple create a pointer to an existing location
+    CassandraSnapshot.new(@cluster_name, @node_name, @date, @metadata)
+  end
+  def delete_snapshot(_snapshot)
+    FileUtils.rm_rf(@data_path)
+  end
+end

data/test/hadoop_stub.rb ADDED Viewed

@@ -0,0 +1,51 @@
+#!/usr/bin/env ruby
+require 'fileutils'
+# A stub implementation of Hadoop that read/writes to local file instead of using webhdfs
+class HadoopStub
+  attr_reader :base_dir
+  def initialize(base_dir)
+    @base_dir = base_dir
+  end
+  def list(path, _options = {})
+    files_and_folders = Dir.glob("#{path}/*")
+    files_and_folders.collect do |file|
+      type = if File.file?(file)
+               'FILE'
+             else
+               'DIRECTORY'
+             end
+      # return a hash similar to the one that hadoop sends (containing fewer entries)
+      {
+        'pathSuffix' => File.basename(file),
+        'type'       => type,
+      }
+    end
+  end
+  def list_files(path, _options = {})
+    files_and_folders = Dir.glob("#{path}/**/*")
+    files_and_folders.select { |file| File.file?(file) }
+  end
+  def create(path, body, _options = {})
+    parent = File.expand_path('..', path)
+    FileUtils.mkdir_p parent
+    if body.is_a?(File)
+      File.open(path, 'w') { |file| file.write(body.read) }
+    elsif
+      File.open(path, 'w') { |file| file.write(body) }
+    end
+  end
+  def read(path, _options = {})
+    File.open(path, 'r').read
+  end
+  def delete(path, _options = {})
+    FileUtils.rm_rf(path)
+  end
+end

data/test/test_backuptool.rb ADDED Viewed

@@ -0,0 +1,180 @@
+#!/usr/bin/env ruby
+require 'test/unit'
+require 'logger'
+require_relative '../lib/backuptool'
+require_relative 'hadoop_stub'
+require_relative 'cassandra_stub'
+class TestSimpleNumber < Test::Unit::TestCase
+  def test_new_snapshot
+    hadoop = HadoopStub.new('test/hadoop')
+    create_new_snapshot(hadoop, 'node1', '2016_04_22', [1, 2])
+    remote_files = hadoop.list_files('test/hadoop')
+    # two files were backed up + one metadata file
+    assert_equal(3, remote_files.size)
+    # files were created in the correct location
+    assert_equal('test/hadoop/cass_snap_metadata/cluster1/node1/cass_snap_2016_04_22', remote_files[0])
+    assert_equal('test/hadoop/cluster1/node1/SSTable-1-Data.db', remote_files[1])
+    assert_equal('test/hadoop/cluster1/node1/SSTable-2-Data.db', remote_files[2])
+    # metadata file contains the sstables.
+    metadata_content = File.open(remote_files[0], 'r').read
+    assert(metadata_content.include? 'SSTable-1-Data.db')
+    assert(metadata_content.include? 'SSTable-2-Data.db')
+    # cleanup
+    hadoop.delete('test/hadoop')
+    hadoop.delete('test/cassandra')
+  end
+  def test_two_snapshots
+    hadoop = HadoopStub.new('test/hadoop')
+    create_new_snapshot(hadoop, 'node1', '2016_04_22', [1, 2])
+    create_new_snapshot(hadoop, 'node1', '2016_04_23', [2, 3, 4])
+    remote_files = hadoop.list_files('test/hadoop')
+    # two files were backed up + one metadata file
+    assert_equal(6, remote_files.size)
+    # files were created in the correct location
+    # no duplicate files are stored
+    assert_equal('test/hadoop/cass_snap_metadata/cluster1/node1/cass_snap_2016_04_22', remote_files[0])
+    assert_equal('test/hadoop/cass_snap_metadata/cluster1/node1/cass_snap_2016_04_23', remote_files[1])
+    assert_equal('test/hadoop/cluster1/node1/SSTable-1-Data.db', remote_files[2])
+    assert_equal('test/hadoop/cluster1/node1/SSTable-2-Data.db', remote_files[3])
+    assert_equal('test/hadoop/cluster1/node1/SSTable-3-Data.db', remote_files[4])
+    assert_equal('test/hadoop/cluster1/node1/SSTable-4-Data.db', remote_files[5])
+    # metadata on first backup file contains the sstables.
+    metadata_content = File.open(remote_files[0], 'r').read
+    assert(metadata_content.include? 'SSTable-1-Data.db')
+    assert(metadata_content.include? 'SSTable-2-Data.db')
+    # metadata on second backup file contains the sstables.
+    metadata_content = File.open(remote_files[1], 'r').read
+    assert(metadata_content.include? 'SSTable-2-Data.db')
+    assert(metadata_content.include? 'SSTable-3-Data.db')
+    assert(metadata_content.include? 'SSTable-4-Data.db')
+    # cleanup
+    hadoop.delete('test/hadoop')
+    hadoop.delete('test/cassandra')
+  end
+  def test_restore
+    hadoop = HadoopStub.new('test/hadoop')
+    backup_tool = create_new_snapshot(hadoop, 'node1', '2016_04_22', [1, 2])
+    # restore a newly created snapshot
+    backup_tool.restore_snapshot('node1', '2016_04_22', 'test/restore')
+    restored_files = hadoop.list_files('test/restore')
+    # two files were restored
+    assert_equal(2, restored_files.size)
+    assert_equal('test/restore/SSTable-1-Data.db', restored_files[0])
+    assert_equal('test/restore/SSTable-2-Data.db', restored_files[1])
+    # cleanup
+    hadoop.delete('test/hadoop')
+    hadoop.delete('test/restore')
+    hadoop.delete('test/cassandra')
+  end
+  def test_delete
+    hadoop = HadoopStub.new('test/hadoop')
+    backup_tool = create_new_snapshot(hadoop, 'node1', '2016_04_22', [1, 2])
+    # delete a newly created snapshot
+    backup_tool.delete_snapshots(node: 'node1', date: '2016_04_22')
+    remote_files = hadoop.list_files('test/hadoop')
+    assert_equal(0, remote_files.size)
+    hadoop.delete('test/cassandra')
+  end
+  def test_backup_flag
+    hadoop = HadoopStub.new('test/hadoop')
+    backup_tool = create_new_snapshot(hadoop, 'node1', '2016_04_22', [1, 2])
+    backup_tool.create_backup_flag('2016_04_22')
+    remote_files = hadoop.list_files('test/hadoop')
+    assert_equal(4, remote_files.size)
+    # Flag is created at cluster level
+    assert_equal('test/hadoop/cass_snap_metadata/cluster1/BACKUP_COMPLETED_2016_04_22', remote_files[0])
+    # cleanup
+    hadoop.delete('test/hadoop')
+    hadoop.delete('test/cassandra')
+  end
+  def test_get_backup_flag
+    hadoop = HadoopStub.new('test/hadoop')
+    backup_tool = create_new_snapshot(hadoop, 'node1', '2016_04_22', [1, 2])
+    backup_tool.create_backup_flag('2016_04_22')
+    flags = backup_tool.get_backup_flags
+    # One flag found
+    assert_equal(1, flags.size)
+    # Flag points to the correct file
+    assert_equal('cluster1', flags[0].cluster)
+    assert_equal('BACKUP_COMPLETED_2016_04_22', flags[0].file)
+    # cleanup
+    hadoop.delete('test/hadoop')
+    hadoop.delete('test/cassandra')
+  end
+  def test_cleanup
+    hadoop = HadoopStub.new('test/hadoop')
+    retention_days = 30
+    date_31_days_back = (Date.today - 31).strftime('%Y_%m_%d')
+    date_30_days_back = (Date.today - 30).strftime('%Y_%m_%d')
+    # Two backups on two nodes
+    create_new_snapshot(hadoop, 'node1', date_31_days_back, [1, 2, 3, 4])
+    create_new_snapshot(hadoop, 'node2', date_31_days_back, [1, 2, 3, 4])
+    create_new_snapshot(hadoop, 'node1', date_30_days_back, [3, 4, 5, 6])
+    backup_tool = create_new_snapshot(hadoop, 'node2', date_30_days_back, [4, 5, 6, 7])
+    # Both backups are marked as completed
+    backup_tool.create_backup_flag(date_31_days_back)
+    backup_tool.create_backup_flag(date_30_days_back)
+    backup_tool.create_backup_flag(date_30_days_back)
+    backup_tool.cleanup(retention_days)
+    # Two snapshots were deleted, two were kept
+    snapshots = backup_tool.search_snapshots
+    assert_equal(2, snapshots.size)
+    assert_equal('node1', snapshots[0].node)
+    assert_equal(date_30_days_back, snapshots[0].date)
+    assert_equal('node2', snapshots[1].node)
+    assert_equal(date_30_days_back, snapshots[1].date)
+    # One backup flag was deleted, one was kept.
+    backup_flags = backup_tool.get_backup_flags
+    assert_equal(1, backup_flags.size)
+    assert_equal("BACKUP_COMPLETED_#{date_30_days_back}", backup_flags[0].file)
+    # cleanup
+    hadoop.delete('test/hadoop')
+    hadoop.delete('test/cassandra')
+  end
+  def create_new_snapshot(hadoop, node, date, file_indexes)
+    logger = Logger.new(STDOUT)
+    cassandra = CassandraStub.new('cluster1', node, date, file_indexes)
+    backup_tool = BackupTool.new(cassandra, hadoop, logger)
+    backup_tool.new_snapshot
+    backup_tool
+  end
+end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: cassback
 version: !ruby/object:Gem::Version
-  version: 0.1.3
+  version: 0.1.4
 platform: ruby
 authors:
 - Vincent Van Hollebeke
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2016-04-20 00:00:00.000000000 Z
+date: 2016-04-26 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -109,10 +109,36 @@ executables:
 extensions: []
 extra_rdoc_files: []
 files:
+- ".gitignore"
+- ".rubocop.yml_disabled"
+- Gemfile
+- LICENSE
+- README.md
+- Rakefile.rb
 - bin/cassback
+- bin/console
+- bin/setup
+- cassback.gemspec
+- conf/local.yml
+- conf/preprod.yml
+- conf/prod.yml
 - lib/backuptool.rb
 - lib/cassandra.rb
+- lib/cassback/version.rb
 - lib/hadoop.rb
+- scripts/deploy.sh
+- scripts/manualbackups/ansible.cfg
+- scripts/manualbackups/inventory.txt
+- scripts/manualbackups/play_book.sh
+- scripts/manualbackups/playbooks/backups.yml
+- scripts/manualbackups/roles/planb/files/backup.sh
+- scripts/manualbackups/roles/planb/files/httpfs.sh
+- scripts/manualbackups/roles/planb/files/krb5.conf
+- scripts/manualbackups/roles/planb/tasks/main.yml
+- scripts/pre-push
+- test/cassandra_stub.rb
+- test/hadoop_stub.rb
+- test/test_backuptool.rb
 homepage: http://rubygems.org/gems/cassback
 licenses:
 - Apache-2.0
@@ -133,9 +159,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.5.2
+rubygems_version: 2.4.8
 signing_key:
 specification_version: 4
 summary: Cassandra backup to HDFS.
-test_files: []
-has_rdoc:
+test_files:
+- test/cassandra_stub.rb
+- test/hadoop_stub.rb
+- test/test_backuptool.rb