cassback 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 616dce141979a8e5c187b6a7c18d9c560e292edf
4
- data.tar.gz: 5a2646d54b746c0fd482d2b39f50b039b5c32069
3
+ metadata.gz: f2b9b6aed95f39752afe6c7df4d2e404d4041450
4
+ data.tar.gz: 07e9fe1a67dd830ce2f45fc56b37098dbdcb01e9
5
5
  SHA512:
6
- metadata.gz: 7550cee21ef0fa042d1813e011c12148745b443f6fa726653fcd801a4d7f66443e863f92f3055088e11f933e33688d14d8b22de1ee851709e9e79275011cfcfa
7
- data.tar.gz: a2de6cecc4ec52c9045a10d8762dbadf876f44c9426dbbdf4330c61d152db1cb694b7a3d6cf62cf5369218707bcf606f39bffcdeaa9573997ee816fb1939afe3
6
+ metadata.gz: 08080fa50589f745652230d2c5879406ac0fcf1f4cee3306c89ff963c18c1208fa430dca5cef1e7b22a4f06dcb80746a4d1619c6cc622dc7cd5763bcea082eed
7
+ data.tar.gz: b8196fe75585a33d1224fe6cd919a14b9fb90bef4dcc7ead9097f65ad9ffc6afa1b4a60b7a56408bf037f141ad5837f465c535fc0e0e173be162e3dc4a8230a9
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ Gemfile.lock
2
+ doc
3
+
4
+ # IntelliJ specific
5
+ .idea
6
+ *.iml
7
+ *.ipr
8
+ *.iws
9
+
10
+ #Log files
11
+ *.log
12
+
13
+ #Ruby gem files
14
+ *.gem
@@ -0,0 +1,37 @@
1
+ # This configuration was made for rubocop >= 0.36.0
2
+
3
+ ### SRE Core configuration
4
+ ### (See also https://confluence.criteois.com/pages/viewpage.action?pageId=270467645)
5
+ # Taken from Core's rules
6
+ Metrics/LineLength:
7
+ Max: 120
8
+ # Taken from Core's rules
9
+ Style/AlignHash:
10
+ EnforcedColonStyle: table
11
+ EnforcedHashRocketStyle: table
12
+
13
+ ### SRE Storage configuration
14
+ # We have french people's names lying around
15
+ Style/AsciiComments:
16
+ Enabled: false
17
+ # This wants snake_case file names and we have dashes everywhere
18
+ Style/FileName:
19
+ Enabled: false
20
+ # Use consistent style for hashes (do not indent far away when in parentheses, etc.)
21
+ Style/IndentHash:
22
+ EnforcedStyle: consistent
23
+ # Enforce trailing commas in literals for consistency, ease of edition, and code generation
24
+ Style/TrailingCommaInLiteral:
25
+ EnforcedStyleForMultiline: comma
26
+
27
+ ## Temporary edits (that should be fixed before enabling them)
28
+ # Messes things up for now
29
+ Style/BracesAroundHashParameters:
30
+ Enabled: false
31
+ # Badly implemented, and crashes in some cases
32
+ Performance/Casecmp:
33
+ Enabled: false
34
+ # We should have trailing commas only inside multiline statements
35
+ # r.veznaver said this one will be fixed in rubocop
36
+ Style/TrailingCommaInArguments:
37
+ Enabled: false
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ source 'https://rubygems.org'
2
+ ruby '2.2.2'
3
+ gem 'webhdfs'
4
+ gem 'gssapi'
5
+ gem 'rubocop'
6
+ gem 'table_print'
7
+ gem 'rspec_junit_formatter'
8
+ gem 'rubocop-junit-formatter'
data/LICENSE ADDED
@@ -0,0 +1,194 @@
1
+
2
+ Apache License
3
+ Version 2.0, January 2004
4
+ http://www.apache.org/licenses/
5
+
6
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7
+
8
+ 1. Definitions.
9
+
10
+ "License" shall mean the terms and conditions for use, reproduction, and
11
+ distribution as defined by Sections 1 through 9 of this document.
12
+
13
+ "Licensor" shall mean the copyright owner or entity authorized by the copyright
14
+ owner that is granting the License.
15
+
16
+ "Legal Entity" shall mean the union of the acting entity and all other entities
17
+ that control, are controlled by, or are under common control with that entity.
18
+ For the purposes of this definition, "control" means (i) the power, direct or
19
+ indirect, to cause the direction or management of such entity, whether by
20
+ contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity exercising
24
+ permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications, including
27
+ but not limited to software source code, documentation source, and configuration
28
+ files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical transformation or
31
+ translation of a Source form, including but not limited to compiled object code,
32
+ generated documentation, and conversions to other media types.
33
+
34
+ "Work" shall mean the work of authorship, whether in Source or Object form, made
35
+ available under the License, as indicated by a copyright notice that is included
36
+ in or attached to the work (an example is provided in the Appendix below).
37
+
38
+ "Derivative Works" shall mean any work, whether in Source or Object form, that
39
+ is based on (or derived from) the Work and for which the editorial revisions,
40
+ annotations, elaborations, or other modifications represent, as a whole, an
41
+ original work of authorship. For the purposes of this License, Derivative Works
42
+ shall not include works that remain separable from, or merely link (or bind by
43
+ name) to the interfaces of, the Work and Derivative Works thereof.
44
+
45
+ "Contribution" shall mean any work of authorship, including the original version
46
+ of the Work and any modifications or additions to that Work or Derivative Works
47
+ thereof, that is intentionally submitted to Licensor for inclusion in the Work
48
+ by the copyright owner or by an individual or Legal Entity authorized to submit
49
+ on behalf of the copyright owner. For the purposes of this definition,
50
+ "submitted" means any form of electronic, verbal, or written communication sent
51
+ to the Licensor or its representatives, including but not limited to
52
+ communication on electronic mailing lists, source code control systems, and
53
+ issue tracking systems that are managed by, or on behalf of, the Licensor for
54
+ the purpose of discussing and improving the Work, but excluding communication
55
+ that is conspicuously marked or otherwise designated in writing by the copyright
56
+ owner as "Not a Contribution."
57
+
58
+ "Contributor" shall mean Licensor and any individual or Legal Entity on behalf
59
+ of whom a Contribution has been received by Licensor and subsequently
60
+ incorporated within the Work.
61
+
62
+ 2. Grant of Copyright License.
63
+
64
+ Subject to the terms and conditions of this License, each Contributor hereby
65
+ grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
66
+ irrevocable copyright license to reproduce, prepare Derivative Works of,
67
+ publicly display, publicly perform, sublicense, and distribute the Work and such
68
+ Derivative Works in Source or Object form.
69
+
70
+ 3. Grant of Patent License.
71
+
72
+ Subject to the terms and conditions of this License, each Contributor hereby
73
+ grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
74
+ irrevocable (except as stated in this section) patent license to make, have
75
+ made, use, offer to sell, sell, import, and otherwise transfer the Work, where
76
+ such license applies only to those patent claims licensable by such Contributor
77
+ that are necessarily infringed by their Contribution(s) alone or by combination
78
+ of their Contribution(s) with the Work to which such Contribution(s) was
79
+ submitted. If You institute patent litigation against any entity (including a
80
+ cross-claim or counterclaim in a lawsuit) alleging that the Work or a
81
+ Contribution incorporated within the Work constitutes direct or contributory
82
+ patent infringement, then any patent licenses granted to You under this License
83
+ for that Work shall terminate as of the date such litigation is filed.
84
+
85
+ 4. Redistribution.
86
+
87
+ You may reproduce and distribute copies of the Work or Derivative Works thereof
88
+ in any medium, with or without modifications, and in Source or Object form,
89
+ provided that You meet the following conditions:
90
+
91
+ You must give any other recipients of the Work or Derivative Works a copy of
92
+ this License; and
93
+ You must cause any modified files to carry prominent notices stating that You
94
+ changed the files; and
95
+ You must retain, in the Source form of any Derivative Works that You distribute,
96
+ all copyright, patent, trademark, and attribution notices from the Source form
97
+ of the Work, excluding those notices that do not pertain to any part of the
98
+ Derivative Works; and
99
+ If the Work includes a "NOTICE" text file as part of its distribution, then any
100
+ Derivative Works that You distribute must include a readable copy of the
101
+ attribution notices contained within such NOTICE file, excluding those notices
102
+ that do not pertain to any part of the Derivative Works, in at least one of the
103
+ following places: within a NOTICE text file distributed as part of the
104
+ Derivative Works; within the Source form or documentation, if provided along
105
+ with the Derivative Works; or, within a display generated by the Derivative
106
+ Works, if and wherever such third-party notices normally appear. The contents of
107
+ the NOTICE file are for informational purposes only and do not modify the
108
+ License. You may add Your own attribution notices within Derivative Works that
109
+ You distribute, alongside or as an addendum to the NOTICE text from the Work,
110
+ provided that such additional attribution notices cannot be construed as
111
+ modifying the License.
112
+ You may add Your own copyright statement to Your modifications and may provide
113
+ additional or different license terms and conditions for use, reproduction, or
114
+ distribution of Your modifications, or for any such Derivative Works as a whole,
115
+ provided Your use, reproduction, and distribution of the Work otherwise complies
116
+ with the conditions stated in this License.
117
+
118
+ 5. Submission of Contributions.
119
+
120
+ Unless You explicitly state otherwise, any Contribution intentionally submitted
121
+ for inclusion in the Work by You to the Licensor shall be under the terms and
122
+ conditions of this License, without any additional terms or conditions.
123
+ Notwithstanding the above, nothing herein shall supersede or modify the terms of
124
+ any separate license agreement you may have executed with Licensor regarding
125
+ such Contributions.
126
+
127
+ 6. Trademarks.
128
+
129
+ This License does not grant permission to use the trade names, trademarks,
130
+ service marks, or product names of the Licensor, except as required for
131
+ reasonable and customary use in describing the origin of the Work and
132
+ reproducing the content of the NOTICE file.
133
+
134
+ 7. Disclaimer of Warranty.
135
+
136
+ Unless required by applicable law or agreed to in writing, Licensor provides the
137
+ Work (and each Contributor provides its Contributions) on an "AS IS" BASIS,
138
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied,
139
+ including, without limitation, any warranties or conditions of TITLE,
140
+ NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are
141
+ solely responsible for determining the appropriateness of using or
142
+ redistributing the Work and assume any risks associated with Your exercise of
143
+ permissions under this License.
144
+
145
+ 8. Limitation of Liability.
146
+
147
+ In no event and under no legal theory, whether in tort (including negligence),
148
+ contract, or otherwise, unless required by applicable law (such as deliberate
149
+ and grossly negligent acts) or agreed to in writing, shall any Contributor be
150
+ liable to You for damages, including any direct, indirect, special, incidental,
151
+ or consequential damages of any character arising as a result of this License or
152
+ out of the use or inability to use the Work (including but not limited to
153
+ damages for loss of goodwill, work stoppage, computer failure or malfunction, or
154
+ any and all other commercial damages or losses), even if such Contributor has
155
+ been advised of the possibility of such damages.
156
+
157
+ 9. Accepting Warranty or Additional Liability.
158
+
159
+ While redistributing the Work or Derivative Works thereof, You may choose to
160
+ offer, and charge a fee for, acceptance of support, warranty, indemnity, or
161
+ other liability obligations and/or rights consistent with this License. However,
162
+ in accepting such obligations, You may act only on Your own behalf and on Your
163
+ sole responsibility, not on behalf of any other Contributor, and only if You
164
+ agree to indemnify, defend, and hold each Contributor harmless for any liability
165
+ incurred by, or claims asserted against, such Contributor by reason of your
166
+ accepting any such warranty or additional liability.
167
+
168
+ END OF TERMS AND CONDITIONS
169
+
170
+ APPENDIX: How to apply the Apache License to your work
171
+
172
+ To apply the Apache License to your work, attach the following boilerplate
173
+ notice, with the fields enclosed by brackets "{}" replaced with your own
174
+ identifying information. (Don't include the brackets!) The text should be
175
+ enclosed in the appropriate comment syntax for the file format. We also
176
+ recommend that a file or class name and description of purpose be included on
177
+ the same "printed page" as the copyright notice for easier identification within
178
+ third-party archives.
179
+
180
+ Copyright {yyyy} {name of copyright owner}
181
+
182
+ Licensed under the Apache License, Version 2.0 (the "License");
183
+ you may not use this file except in compliance with the License.
184
+ You may obtain a copy of the License at
185
+
186
+ http://www.apache.org/licenses/LICENSE-2.0
187
+
188
+ Unless required by applicable law or agreed to in writing, software
189
+ distributed under the License is distributed on an "AS IS" BASIS,
190
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
191
+ See the License for the specific language governing permissions and
192
+ limitations under the License.
193
+
194
+
data/README.md ADDED
@@ -0,0 +1,103 @@
1
+ # Cassback
2
+
3
+ Welcome to your Cassback!
4
+ This is a project that aims backup Cassandra SSTables and load them into HDFS for further usage.
5
+
6
+ ## Installation
7
+
8
+ Build the application into a gem using the command
9
+
10
+ $ gem build cassback.gemspec
11
+
12
+ You should the following output :
13
+
14
+ Successfully built RubyGem
15
+ Name: cassback
16
+ Version: 0.1.0
17
+ File: cassback-0.1.0.gem
18
+
19
+
20
+ Install the application into your local gem store using the following command :
21
+
22
+ $ gem install cassback-0.1.0.gem
23
+
24
+ You should then see the following output :
25
+
26
+ Successfully installed cassback-0.1.0
27
+ Parsing documentation for cassback-0.1.0
28
+ Done installing documentation for cassback after 0 seconds
29
+ 1 gem installed
30
+
31
+ ## Usage
32
+
33
+ When the cassback gem installed it adds the **cassback** executable file into your PATH variable.
34
+ This means that you can execute it using one of the following commands and it will return example of usage :
35
+
36
+ cassback
37
+ cassback -h
38
+
39
+ A simple command that you can use for starting a backup is :
40
+
41
+ cassback -S -C path_to_some_config_file.yml
42
+
43
+ ## Configuration
44
+
45
+ The application has some default configuration defined.
46
+ You can overwrite the default configuration using two meanings :
47
+
48
+ 1. Using a configuration file passed as parameter on the command line.
49
+
50
+ 2. Using individual configuration properties passed as parameters on the command line.
51
+ The command line parameters have precedence over the configuration file.
52
+
53
+ ## Orchestration
54
+
55
+ The tool is designed to do snapshots at **node level** (and not at **cluster level**) - basically it has to be installed
56
+ on each node and a separate process will have to be executed from there to trigger a node level snapshot. Because this task is
57
+ quite complex it is recommended to use an orchestration tool (like Rundeck) that allows you to execute same command
58
+ on multiple machines and run the processes in parallel.
59
+
60
+ After all node backups are finished the orchestration tool will have to take care of signaling other applications that
61
+ the backup is completely finished. That is done now by adding a new empty file on the cluster metadata folder that has
62
+ the format BACKUP_COMPLETED_yyyy_MM_dd. This has to be triggered only once by using the following command :
63
+
64
+ cassback -B [-d date] -C conf/path_to_some_config_file.yml
65
+
66
+ Optionally you can also pass a date, if not present current day date will be assumed.
67
+
68
+ ## Data Integrity
69
+
70
+ The project is using internally the webhdfs tool (see https://github.com/kzk/webhdfs) that is a Ruby project
71
+ built on top of the WebHDFS API (https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/WebHDFS.html).
72
+ Because we're using the WebHDFS API we get for free data integrity. The tool is also configurable so in case errors it
73
+ can retry the file download/upload of data. This is configurable via the following config file properties :
74
+
75
+ 1. **hadoop.retryTimes** - the number of retries the tool should do before giving up. Default set to 5.
76
+ 2. **hadoop.retryInterval** - the interval (in seconds) the tool should take between two attempts. Default set to 1 second.
77
+
78
+ If you want to check more about Hadoop's checksum algorithm that ensures data integrity you can check the
79
+ following link : https://www.safaribooksonline.com/library/view/hadoop-the-definitive/9781449328917/ch04.html
80
+
81
+ ## Cleanup policy
82
+
83
+ Usually backups of databases take a lot of space. Even if we have optimized the code so the backups are done incrementally
84
+ (meaning that a file is not stored twice even if it's present in multiple backups), still cleanup needs to be done.
85
+ The tool has a cleanup policy of cleaning snapshots after some days have passed since the snapshot has been published.
86
+ This is configurable via the **cleanup.retentionDays** property in the configuration file. One point is that cleanup is
87
+ done at cluster level (for all nodes) since it doesn't make sense to keep data for only some of the nodes.
88
+
89
+ The command for triggering a cleanup is :
90
+
91
+ cassback -A -C conf/path_to_some_config_file.yml
92
+
93
+ # Unit tests
94
+ Unit tests can be executed locally by running the following command :
95
+
96
+ rake test
97
+
98
+ ## Contributing
99
+
100
+ For now this is an internal Criteo project, but were aiming for making it open source and publishing to GitHub.
101
+
102
+ Issue reports and merge requests are welcome on Criteo's GitLab at : https://gitlab.criteois.com/ruby-gems/cassback
103
+
data/Rakefile.rb ADDED
@@ -0,0 +1,8 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rake/testtask'
3
+
4
+ Rake::TestTask.new do |t|
5
+ t.libs << 'test'
6
+ t.test_files = FileList['test/test*.rb']
7
+ t.verbose = true
8
+ end
data/bin/cassback CHANGED
@@ -30,6 +30,7 @@ command_line_config = {
30
30
  'cassandra' => {},
31
31
  'hadoop' => {},
32
32
  'restore' => {},
33
+ 'cleanup' => {},
33
34
  }
34
35
 
35
36
  # Default options
@@ -38,13 +39,19 @@ options = {
38
39
  'config' => '/etc/cassandra/conf/cassandra.yaml',
39
40
  },
40
41
  'hadoop' => {
41
- 'hostname' => 'localhost',
42
- 'port' => 14_000,
43
- 'directory' => 'cassandra',
42
+ 'hostname' => 'localhost',
43
+ 'port' => 14_000,
44
+ 'directory' => 'cassandra',
45
+ 'retryTimes' => 5,
46
+ 'retryInterval' => 1,
44
47
  },
45
48
  'restore' => {
46
49
  'destination' => 'cassandra',
47
50
  },
51
+
52
+ 'cleanup' => {
53
+ 'retentionDays' => 30,
54
+ },
48
55
  }
49
56
 
50
57
  # If no argument given in command line, print the help
@@ -52,7 +59,7 @@ ARGV << '-h' if ARGV.empty?
52
59
 
53
60
  # Parse command line options
54
61
  parser = OptionParser.new do |opts|
55
- opts.banner = 'Usage: cassback.rb [options]'
62
+ opts.banner = 'Usage: cassback [options]'
56
63
 
57
64
  opts.separator ''
58
65
  opts.separator 'Configuration:'
@@ -74,6 +81,13 @@ parser = OptionParser.new do |opts|
74
81
  opts.on('-F', '--flush', 'removes a backuped snapshot from Hadoop, needs a date') do |_v|
75
82
  action = 'delete'
76
83
  end
84
+ opts.on('-B', '--backupFlag', 'creates an empty file to signal that the backup has finished, can be used with a date, \
85
+ today date is assumed if no date is provided') do |_v|
86
+ action = 'backupFlag'
87
+ end
88
+ opts.on('-A', '--cleanup', 'cleans up old snapshots') do |_v|
89
+ action = 'cleanup'
90
+ end
77
91
 
78
92
  opts.separator ''
79
93
  opts.separator 'Action related:'
@@ -133,7 +147,9 @@ end
133
147
 
134
148
  begin
135
149
  # Create the Hadoop object
136
- hadoop = Hadoop.new(host: options['hadoop']['hostname'], port: options['hadoop']['port'], base_dir: options['hadoop']['directory'])
150
+ hadoop = Hadoop.new(host: options['hadoop']['hostname'], port: options['hadoop']['port'],
151
+ base_dir: options['hadoop']['directory'], retry_times: options['hadoop']['retryTimes'],
152
+ retry_interval: options['hadoop']['retryInterval'])
137
153
 
138
154
  #  Create the Cassandra object
139
155
  cassandra = Cassandra.new(options['cassandra']['config'], logger)
@@ -161,6 +177,18 @@ begin
161
177
  elsif action == 'delete'
162
178
  raise('No date given') unless options.include? 'date'
163
179
  bck.delete_snapshots(node: options['node'], date: options['date'])
180
+
181
+ # Create backup flag.
182
+ elsif action == 'backupFlag'
183
+ # Use today's date if no date has been provided
184
+ date = options['date']
185
+ date ||= Time.new.strftime('%Y_%m_%d')
186
+ bck.create_backup_flag(date)
187
+
188
+ # Cleanup old snapshots based on cleanup.retentionDays
189
+ elsif action == 'cleanup'
190
+ days = options['cleanup']['retentionDays'].to_i
191
+ bck.cleanup(days)
164
192
  end
165
193
 
166
194
  #  In case of failure
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'cassback'
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require 'irb'
14
+ IRB.start
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
data/cassback.gemspec ADDED
@@ -0,0 +1,30 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'cassback/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'cassback'
8
+ spec.version = Cassback::VERSION
9
+ spec.authors = ['Vincent Van Hollebeke', 'Bogdan Niculescu']
10
+ spec.email = ['v.vanhollebeke@criteo.com', 'b.niculescu@criteo.com']
11
+
12
+ spec.summary = 'Cassandra backup to HDFS.'
13
+ spec.description = 'This is a tool that allows creating backups of Cassandra and pushing them into HDFS.'
14
+ spec.homepage = 'http://rubygems.org/gems/cassback'
15
+
16
+ spec.licenses = ['Apache-2.0']
17
+
18
+ spec.files = `git ls-files`.split("\n")
19
+ spec.test_files = `git ls-files -- test/*`.split("\n")
20
+ spec.bindir = 'bin'
21
+ spec.executables << 'cassback'
22
+ spec.require_paths = ['lib']
23
+
24
+ spec.add_development_dependency 'bundler', '~> 1.11'
25
+ spec.add_development_dependency 'rake', '~> 10.0'
26
+
27
+ spec.add_runtime_dependency 'gssapi', '~> 1.2', '>= 1.2.0'
28
+ spec.add_runtime_dependency 'webhdfs', '~> 0.8', '>= 0.8.0'
29
+ spec.add_runtime_dependency 'table_print', '~> 1.5', '>= 1.5.6'
30
+ end
data/conf/local.yml ADDED
@@ -0,0 +1,18 @@
1
+ cassandra:
2
+ # config: "/etc/cassandra/conf/cassandra.yaml"
3
+ config: "/Users/b.niculescu/Tools/apache-cassandra-2.0.16/conf/cassandra.yaml"
4
+
5
+ hadoop:
6
+ # hostname: "10.60.34.217"
7
+ hostname: "jobs-user.hpc.criteo.prod"
8
+ port: 14000
9
+ # directory: "/user/v.vanhollebeke/cassandra"
10
+ directory: "/tmp/b.niculescu/cassandra"
11
+ retryTimes : 3
12
+ retryInterval : 1
13
+
14
+ restore:
15
+ destination: "cassback_restore"
16
+
17
+ cleanup:
18
+ retentionDays: 30
data/conf/preprod.yml ADDED
@@ -0,0 +1,15 @@
1
+ cassandra:
2
+ config: "/etc/cassandra/conf/cassandra.yaml"
3
+
4
+ hadoop:
5
+ hostname: "jobs-user.hpc.criteo.preprod"
6
+ port: 14000
7
+ directory: "/tmp/cassandraback/preprod/"
8
+ retryTimes : 5
9
+ retryInterval : 1
10
+
11
+ restore:
12
+ destination: "cassback_restore"
13
+
14
+ cleanup:
15
+ retentionDays: 30
data/conf/prod.yml ADDED
@@ -0,0 +1,15 @@
1
+ cassandra:
2
+ config: "/etc/cassandra/conf/cassandra.yaml"
3
+
4
+ hadoop:
5
+ hostname: "jobs-user.hpc.criteo.prod"
6
+ port: 14000
7
+ directory: "/tmp/cassandrabackups/prod/"
8
+ retryTimes : 5
9
+ retryInterval : 1
10
+
11
+ restore:
12
+ destination: "cassback_restore"
13
+
14
+ cleanup:
15
+ retentionDays: 30
data/lib/backuptool.rb CHANGED
@@ -40,7 +40,8 @@ class BackupTool
40
40
  begin
41
41
  if date == 'ALL'
42
42
  ls = @hadoop.list("#{@hadoop.base_dir}/#{@metadir}/#{@cassandra.cluster_name}/#{node}")
43
- ls.each do |item|
43
+ ls_metadata = ls.select { |item| item['pathSuffix'].include? 'cass_snap_' }
44
+ ls_metadata.each do |item|
44
45
  date = item['pathSuffix'].gsub('cass_snap_', '')
45
46
  metadata = get_snapshot_metadata(node, date)
46
47
  snapshot = CassandraSnapshot.new(@cassandra.cluster_name, node, date, metadata)
@@ -60,7 +61,8 @@ class BackupTool
60
61
  if node == 'ALL'
61
62
  begin
62
63
  ls = @hadoop.list("#{@hadoop.base_dir}/#{@metadir}/#{@cassandra.cluster_name}")
63
- ls.each do |item|
64
+ ls_nodes = ls.select { |item| item['type'].casecmp('DIRECTORY') == 0 }
65
+ ls_nodes.each do |item|
64
66
  n = item['pathSuffix']
65
67
  result += get_snapshots_node(n, date)
66
68
  end
@@ -141,6 +143,55 @@ class BackupTool
141
143
  end
142
144
  end
143
145
 
146
+ # Cleans up backups that are older than a number of days.
147
+ # This functions cleans data on all nodes.
148
+ def cleanup(days)
149
+ retention_date = Date.today - days
150
+ @logger.info("Cleaning backup data on all nodes before #{retention_date}.")
151
+
152
+ all_snapshots = search_snapshots
153
+ @logger.info("A total of #{all_snapshots.size} snapshots were found on Hadoop server.")
154
+
155
+ snapshots_to_be_deleted = all_snapshots.select { |snapshot| snapshot.get_date < retention_date }
156
+ @logger.info("A total of #{snapshots_to_be_deleted.size} snapshots will be deleted.")
157
+
158
+ snapshots_to_be_deleted.each do |snapshot|
159
+ delete_snapshots(node: snapshot.node, date: snapshot.date)
160
+ end
161
+
162
+ all_backup_flags = get_backup_flags
163
+ @logger.info("A total of #{all_backup_flags.size} back up flags were found on Hadoop server.")
164
+
165
+ backup_flags_to_be_delete = all_backup_flags.select { |flag| flag.date < retention_date }
166
+ @logger.info("A total of #{backup_flags_to_be_delete.size} backup flags will be deleted.")
167
+
168
+ backup_flags_location = @hadoop.base_dir + '/' + @metadir + '/' + @cassandra.cluster_name
169
+ backup_flags_to_be_delete.each do |flag|
170
+ file = backup_flags_location + '/' + flag.file
171
+ @logger.info("Deleting #{file}")
172
+ @hadoop.delete(file)
173
+ end
174
+ end
175
+
176
+ # Method that creates a backup flag to signal that the backup is finished on all nodes
177
+ # This is an individual command that has to be called manually after snapshots have finished
178
+ def create_backup_flag(date)
179
+ file_name = 'BACKUP_COMPLETED_' + date
180
+ remote_file = @hadoop.base_dir + '/' + @metadir + '/' + @cassandra.cluster_name + '/' + file_name
181
+
182
+ @logger.info('Setting backup completed flag : ' + remote_file)
183
+ @hadoop.create(remote_file, '', overwrite: true)
184
+ end
185
+
186
+ def get_backup_flags
187
+ backup_flags_location = @hadoop.base_dir + '/' + @metadir + '/' + @cassandra.cluster_name
188
+ ls = @hadoop.list(backup_flags_location)
189
+ backup_flags = ls.select { |item| item['pathSuffix'].include? 'BACKUP_COMPLETED_' }
190
+ backup_flags.collect do |file|
191
+ BackupFlag.new(@cassandra.cluster_name, file['pathSuffix'])
192
+ end
193
+ end
194
+
144
195
  # Download a file from HDFS, buffered way
145
196
  # * *Args* :
146
197
  # - +remote+ -> HDFS path
data/lib/cassandra.rb CHANGED
@@ -150,4 +150,19 @@ class CassandraSnapshot
150
150
  d = @date <=> other.date
151
151
  c * 3 + n * 2 + d
152
152
  end
153
+
154
+ def get_date
155
+ DateTime.strptime(@date, '%Y_%m_%d')
156
+ end
157
+ end
158
+
159
+ class BackupFlag
160
+ attr_reader :cluster, :date, :file
161
+
162
+ def initialize(cluster, file)
163
+ @cluster = cluster
164
+ @file = file.dup
165
+ date_as_string = file.sub! 'BACKUP_COMPLETED_', ''
166
+ @date = DateTime.strptime(date_as_string, '%Y_%m_%d')
167
+ end
153
168
  end
@@ -0,0 +1,3 @@
1
+ module Cassback
2
+ VERSION = '0.1.4'.freeze
3
+ end
data/lib/hadoop.rb CHANGED
@@ -6,9 +6,12 @@ WebHDFS::ClientV1::REDIRECTED_OPERATIONS.delete('OPEN')
6
6
  class Hadoop < WebHDFS::Client
7
7
  attr_reader :base_dir
8
8
 
9
- def initialize(host: 'localhost', port: 14_000, base_dir: '/')
9
+ def initialize(host: 'localhost', port: 14_000, base_dir: '/', retry_times: 5, retry_interval: 1)
10
10
  super(host = host, port = port)
11
11
  @kerberos = true
12
12
  @base_dir = base_dir
13
+ @retry_known_errors = true
14
+ @retry_times = retry_times
15
+ @retry_interval = retry_interval
13
16
  end
14
17
  end
data/scripts/deploy.sh ADDED
@@ -0,0 +1,3 @@
1
+ #!/bin/bash
2
+
3
+ while [ 1 = 1 ]; do inotifywait .;scp -r . cstars01e01-par.storage.criteo.preprod:cassback2;scp -r . cstars01e02-par.storage.criteo.preprod:cassback2;done
@@ -0,0 +1,12 @@
1
+ [defaults]
2
+ host_key_checking=false
3
+ record_host_keys=false
4
+ remote_tmp=/tmp/.ansible/tmp
5
+ forks=128
6
+ roles_path=roles
7
+ library=library
8
+
9
+ [ssh_connection]
10
+ control_path=%(directory)s/%%h-%%r
11
+ pipelining=True
12
+ scp_if_ssh=True
@@ -0,0 +1,18 @@
1
+ [cstars02-par]
2
+ cstars02e01-par ansible_ssh_host="cstars02e01-par.storage.criteo.prod"
3
+ cstars02e02-par ansible_ssh_host="cstars02e02-par.storage.criteo.prod"
4
+ cstars02e03-par ansible_ssh_host="cstars02e03-par.storage.criteo.prod"
5
+ cstars02e04-par ansible_ssh_host="cstars02e04-par.storage.criteo.prod"
6
+ cstars02e05-par ansible_ssh_host="cstars02e05-par.storage.criteo.prod"
7
+ cstars02e06-par ansible_ssh_host="cstars02e06-par.storage.criteo.prod"
8
+ cstars02e07-par ansible_ssh_host="cstars02e07-par.storage.criteo.prod"
9
+ cstars02e08-par ansible_ssh_host="cstars02e08-par.storage.criteo.prod"
10
+ cstars02e09-par ansible_ssh_host="cstars02e09-par.storage.criteo.prod"
11
+ cstars02e10-par ansible_ssh_host="cstars02e10-par.storage.criteo.prod"
12
+ cstars02e11-par ansible_ssh_host="cstars02e11-par.storage.criteo.prod"
13
+ cstars02e12-par ansible_ssh_host="cstars02e12-par.storage.criteo.prod"
14
+ cstars02e13-par ansible_ssh_host="cstars02e13-par.storage.criteo.prod"
15
+ cstars02e14-par ansible_ssh_host="cstars02e14-par.storage.criteo.prod"
16
+ cstars02e15-par ansible_ssh_host="cstars02e15-par.storage.criteo.prod"
17
+ cstars02e16-par ansible_ssh_host="cstars02e16-par.storage.criteo.prod"
18
+ cstars02e17-par ansible_ssh_host="cstars02e17-par.storage.criteo.prod"
@@ -0,0 +1,13 @@
1
+ #!/bin/bash
2
+
3
+ PLAYBOOK=$1
4
+
5
+ if [ "$PLAYBOOK" = "" ]; then
6
+ echo "Usage: $0 <playbook> [ansible options]"
7
+ exit 65
8
+ fi
9
+
10
+ shift
11
+ ansible-playbook --inventory-file=inventory.txt playbooks/$PLAYBOOK.yml --extra-vars $*
12
+
13
+ exit $?
@@ -0,0 +1,6 @@
1
+ ---
2
+
3
+ - gather_facts: no
4
+ hosts: cstars02-par
5
+ roles:
6
+ - role: planb
@@ -0,0 +1,27 @@
1
+ #!/bin/bash
2
+
3
+ kinit v.vanhollebeke@CRITEOIS.LAN -k -t ~/keytab
4
+
5
+ date=`date +%Y_%m_%d`
6
+
7
+ nodetool clearsnapshot
8
+
9
+ snapdir=$(nodetool snapshot| grep directory| awk '{print $NF}')
10
+ echo "Snapshot is $snapdir"
11
+
12
+ for dir in $(find /var/opt/cassandra/data -type d |grep snapshots/$snapdir); do
13
+ kok=$(klist -l|grep v.vanhollebeke@CRITEOIS.LAN|grep -v Expired|wc -l)
14
+ if [ $kok == 0 ]; then
15
+ echo "Must renew Kerberos ticket"
16
+ kinit v.vanhollebeke@CRITEOIS.LAN -k -t ~/keytab
17
+ else
18
+ echo "Kerberos ticket OK"
19
+ fi
20
+ keyspace=`echo $dir|awk -F\/ '{print $6}'`
21
+ table=`echo $dir|awk -F\/ '{print $7}'`
22
+ echo "Saving $keyspace $table"
23
+ ./httpfs.sh /var/opt/cassandra/data/$keyspace/$table/snapshots/$snapdir tmp/cassandrabackups/prod/cstars02/$date/$HOSTNAME/$table
24
+
25
+ done
26
+
27
+ echo "FINISHED !!!!"
@@ -0,0 +1,27 @@
1
+ #!/bin/sh
2
+
3
+ BASE='http://0.httpfs.hpc.criteo.prod:14000/webhdfs/v1'
4
+ #BASE='http://httpfs.pa4.hpc.criteo.prod:14000'
5
+
6
+ IN=$1
7
+ OUT=$2
8
+
9
+ echo "Creating destination directory: $OUT"
10
+ curl --negotiate -u : "$BASE/$OUT?op=MKDIRS&permission=0777" -X PUT -s > /dev/null
11
+
12
+ for p in $(find $IN -type f)
13
+ do
14
+ f=$(basename $p)
15
+ echo "$IN/$f"
16
+
17
+ # Create file
18
+ dest=$(curl --negotiate -u : "$BASE/$OUT/$f?op=CREATE&overwrite=true&permission=0777" -i -X PUT -s | grep Location | tail -n1 | cut -d\ -f2 | tr -d '\r\n')
19
+ [ $? != 0 ] && echo "ERROR"
20
+
21
+ echo "DEST IS ${dest}"
22
+
23
+ # Upload file
24
+ curl --negotiate -u : "$dest" -i -X PUT -T "$IN/$f" -H 'Content-Type: application/octet-stream' > /dev/null
25
+ [ $? != 0 ] && echo "ERROR"
26
+
27
+ done
@@ -0,0 +1,26 @@
1
+ [libdefaults]
2
+ dns_lookup_realm = true
3
+ dns_lookup_kdc = true
4
+ ticket_lifetime = 24h
5
+ renew_lifetime = 7d
6
+ forwardable = true
7
+ default_realm = CRITEOIS.LAN
8
+ udp_preference_limit = 1
9
+ realm_try_domains = 1
10
+ permitted_enctypes = aes128-cts-hmac-sha1-96 des3-cbc-sha1 arcfour-hmac
11
+ default_tkt_enctypes = aes128-cts-hmac-sha1-96 des3-cbc-sha1 arcfour-hmac
12
+ [domain_realm]
13
+ .hpc.criteo.preprod = HPC.CRITEO.PREPROD
14
+ .hpc.criteo.prod = AMS.HPC.CRITEO.PROD
15
+ .pa4.hpc.criteo.prod = PA4.HPC.CRITEO.PROD
16
+ .as.hpc.criteo.prod = AS.HPC.CRITEO.PROD
17
+ .na.hpc.criteo.prod = NA.HPC.CRITEO.PROD
18
+ .cn.hpc.criteo.prod = CN.HPC.CRITEO.PROD
19
+ [capaths]
20
+ CRITEOIS.LAN = {
21
+ AMS.HPC.CRITEO.PROD = .
22
+ PA4.HPC.CRITEO.PROD = AMS.HPC.CRITEO.PROD
23
+ AS.HPC.CRITEO.PROD = AMS.HPC.CRITEO.PROD
24
+ NA.HPC.CRITEO.PROD = AMS.HPC.CRITEO.PROD
25
+ CN.HPC.CRITEO.PROD = AMS.HPC.CRITEO.PROD
26
+ }
@@ -0,0 +1,34 @@
1
+ ---
2
+
3
+ - name: Copy krb5.conf into /etc
4
+ copy: src=krb5.conf dest=/etc/krb5.conf
5
+ sudo: yes
6
+ tags: keytab
7
+
8
+ - name: Copy my keytab
9
+ copy: src=keytab dest=~/keytab
10
+ tags: keytab
11
+
12
+ - name: Check if keytab works
13
+ command: kinit $USER@CRITEOIS.LAN -k -t ~/keytab
14
+ tags: keytab
15
+
16
+ - name: Copy httpfs.sh script
17
+ copy: src=httpfs.sh dest=~/httpfs.sh mode=750
18
+ tags: backup
19
+
20
+ - name: Copy backup.sh script
21
+ copy: src=backup.sh dest=~/backup.sh mode=750
22
+ tags: backup
23
+
24
+ - name: Start Backup
25
+ shell: ./backup.sh >logfile 2>&1 chdir=~
26
+ tags: backup
27
+
28
+ - name: Clear snapshots
29
+ shell: sudo nodetool clearsnapshot
30
+ tags: clear
31
+
32
+ - name: Verify if snapshots are REALLY deleted
33
+ shell: "[ $(find /var/opt/cassandra -type d |grep snap|wc -l) == 0 ]"
34
+ tags: verify
data/scripts/pre-push ADDED
@@ -0,0 +1,17 @@
1
+ #!/bin/bash
2
+
3
+ echo "Running rubocop with auto-correct" >&2
4
+ bundle exec rubocop --config .rubocop.yml --auto-correct --out /dev/null
5
+ modified=$(git status | grep modified | wc -l)
6
+ if [ $modified -eq 0 ]; then
7
+ echo -e "\e[1;32mNothing to correct, pushing\e[0m" >&2
8
+ exit 0
9
+ else
10
+ s=''
11
+ if [ $modified -gt 1 ]; then
12
+ s='s'
13
+ fi
14
+
15
+ echo -e "\e[1;31m$modified file$s were modified, please add commit before pushing\e[0m" >&2
16
+ exit 1
17
+ fi
@@ -0,0 +1,33 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative '../lib/cassandra'
3
+
4
+ # Stub implementation that simulates cassandra backups.
5
+ class CassandraStub
6
+ attr_reader :data_path, :cluster_name, :node_name
7
+
8
+ def initialize(cluster_name = 'cluster1', node_name = 'node1', date = '', file_indexes = [])
9
+ @cluster_name = cluster_name
10
+ @node_name = node_name
11
+ @date = date
12
+ @data_path = 'test/cassandra' + '/' + cluster_name + '/' + node_name + '/'
13
+ FileUtils.mkdir_p(@data_path)
14
+
15
+ # create some fake sstables
16
+ @metadata = Set.new
17
+ file_indexes.each do |index|
18
+ file_name = "SSTable-#{index}-Data.db"
19
+ file_path = @data_path + '/' + file_name
20
+ File.open(file_path, 'w') { |file| file.write('This is a test file that simulates an SSTable') }
21
+ @metadata.add(file_name)
22
+ end
23
+ end
24
+
25
+ def new_snapshot
26
+ # simple create a pointer to an existing location
27
+ CassandraSnapshot.new(@cluster_name, @node_name, @date, @metadata)
28
+ end
29
+
30
+ def delete_snapshot(_snapshot)
31
+ FileUtils.rm_rf(@data_path)
32
+ end
33
+ end
@@ -0,0 +1,51 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'fileutils'
4
+
5
+ # A stub implementation of Hadoop that read/writes to local file instead of using webhdfs
6
+ class HadoopStub
7
+ attr_reader :base_dir
8
+
9
+ def initialize(base_dir)
10
+ @base_dir = base_dir
11
+ end
12
+
13
+ def list(path, _options = {})
14
+ files_and_folders = Dir.glob("#{path}/*")
15
+ files_and_folders.collect do |file|
16
+ type = if File.file?(file)
17
+ 'FILE'
18
+ else
19
+ 'DIRECTORY'
20
+ end
21
+ # return a hash similar to the one that hadoop sends (containing fewer entries)
22
+ {
23
+ 'pathSuffix' => File.basename(file),
24
+ 'type' => type,
25
+ }
26
+ end
27
+ end
28
+
29
+ def list_files(path, _options = {})
30
+ files_and_folders = Dir.glob("#{path}/**/*")
31
+ files_and_folders.select { |file| File.file?(file) }
32
+ end
33
+
34
+ def create(path, body, _options = {})
35
+ parent = File.expand_path('..', path)
36
+ FileUtils.mkdir_p parent
37
+ if body.is_a?(File)
38
+ File.open(path, 'w') { |file| file.write(body.read) }
39
+ elsif
40
+ File.open(path, 'w') { |file| file.write(body) }
41
+ end
42
+ end
43
+
44
+ def read(path, _options = {})
45
+ File.open(path, 'r').read
46
+ end
47
+
48
+ def delete(path, _options = {})
49
+ FileUtils.rm_rf(path)
50
+ end
51
+ end
@@ -0,0 +1,180 @@
1
+ #!/usr/bin/env ruby
2
+ require 'test/unit'
3
+ require 'logger'
4
+
5
+ require_relative '../lib/backuptool'
6
+ require_relative 'hadoop_stub'
7
+ require_relative 'cassandra_stub'
8
+
9
+ class TestSimpleNumber < Test::Unit::TestCase
10
+ def test_new_snapshot
11
+ hadoop = HadoopStub.new('test/hadoop')
12
+ create_new_snapshot(hadoop, 'node1', '2016_04_22', [1, 2])
13
+
14
+ remote_files = hadoop.list_files('test/hadoop')
15
+ # two files were backed up + one metadata file
16
+ assert_equal(3, remote_files.size)
17
+
18
+ # files were created in the correct location
19
+ assert_equal('test/hadoop/cass_snap_metadata/cluster1/node1/cass_snap_2016_04_22', remote_files[0])
20
+ assert_equal('test/hadoop/cluster1/node1/SSTable-1-Data.db', remote_files[1])
21
+ assert_equal('test/hadoop/cluster1/node1/SSTable-2-Data.db', remote_files[2])
22
+
23
+ # metadata file contains the sstables.
24
+ metadata_content = File.open(remote_files[0], 'r').read
25
+ assert(metadata_content.include? 'SSTable-1-Data.db')
26
+ assert(metadata_content.include? 'SSTable-2-Data.db')
27
+
28
+ # cleanup
29
+ hadoop.delete('test/hadoop')
30
+ hadoop.delete('test/cassandra')
31
+ end
32
+
33
+ def test_two_snapshots
34
+ hadoop = HadoopStub.new('test/hadoop')
35
+ create_new_snapshot(hadoop, 'node1', '2016_04_22', [1, 2])
36
+ create_new_snapshot(hadoop, 'node1', '2016_04_23', [2, 3, 4])
37
+
38
+ remote_files = hadoop.list_files('test/hadoop')
39
+ # two files were backed up + one metadata file
40
+ assert_equal(6, remote_files.size)
41
+
42
+ # files were created in the correct location
43
+ # no duplicate files are stored
44
+ assert_equal('test/hadoop/cass_snap_metadata/cluster1/node1/cass_snap_2016_04_22', remote_files[0])
45
+ assert_equal('test/hadoop/cass_snap_metadata/cluster1/node1/cass_snap_2016_04_23', remote_files[1])
46
+ assert_equal('test/hadoop/cluster1/node1/SSTable-1-Data.db', remote_files[2])
47
+ assert_equal('test/hadoop/cluster1/node1/SSTable-2-Data.db', remote_files[3])
48
+ assert_equal('test/hadoop/cluster1/node1/SSTable-3-Data.db', remote_files[4])
49
+ assert_equal('test/hadoop/cluster1/node1/SSTable-4-Data.db', remote_files[5])
50
+
51
+ # metadata on first backup file contains the sstables.
52
+ metadata_content = File.open(remote_files[0], 'r').read
53
+ assert(metadata_content.include? 'SSTable-1-Data.db')
54
+ assert(metadata_content.include? 'SSTable-2-Data.db')
55
+
56
+ # metadata on second backup file contains the sstables.
57
+ metadata_content = File.open(remote_files[1], 'r').read
58
+ assert(metadata_content.include? 'SSTable-2-Data.db')
59
+ assert(metadata_content.include? 'SSTable-3-Data.db')
60
+ assert(metadata_content.include? 'SSTable-4-Data.db')
61
+
62
+ # cleanup
63
+ hadoop.delete('test/hadoop')
64
+ hadoop.delete('test/cassandra')
65
+ end
66
+
67
+ def test_restore
68
+ hadoop = HadoopStub.new('test/hadoop')
69
+ backup_tool = create_new_snapshot(hadoop, 'node1', '2016_04_22', [1, 2])
70
+
71
+ # restore a newly created snapshot
72
+ backup_tool.restore_snapshot('node1', '2016_04_22', 'test/restore')
73
+
74
+ restored_files = hadoop.list_files('test/restore')
75
+ # two files were restored
76
+ assert_equal(2, restored_files.size)
77
+ assert_equal('test/restore/SSTable-1-Data.db', restored_files[0])
78
+ assert_equal('test/restore/SSTable-2-Data.db', restored_files[1])
79
+
80
+ # cleanup
81
+ hadoop.delete('test/hadoop')
82
+ hadoop.delete('test/restore')
83
+ hadoop.delete('test/cassandra')
84
+ end
85
+
86
+ def test_delete
87
+ hadoop = HadoopStub.new('test/hadoop')
88
+ backup_tool = create_new_snapshot(hadoop, 'node1', '2016_04_22', [1, 2])
89
+
90
+ # delete a newly created snapshot
91
+ backup_tool.delete_snapshots(node: 'node1', date: '2016_04_22')
92
+
93
+ remote_files = hadoop.list_files('test/hadoop')
94
+ assert_equal(0, remote_files.size)
95
+
96
+ hadoop.delete('test/cassandra')
97
+ end
98
+
99
+ def test_backup_flag
100
+ hadoop = HadoopStub.new('test/hadoop')
101
+ backup_tool = create_new_snapshot(hadoop, 'node1', '2016_04_22', [1, 2])
102
+
103
+ backup_tool.create_backup_flag('2016_04_22')
104
+
105
+ remote_files = hadoop.list_files('test/hadoop')
106
+ assert_equal(4, remote_files.size)
107
+ # Flag is created at cluster level
108
+ assert_equal('test/hadoop/cass_snap_metadata/cluster1/BACKUP_COMPLETED_2016_04_22', remote_files[0])
109
+
110
+ # cleanup
111
+ hadoop.delete('test/hadoop')
112
+ hadoop.delete('test/cassandra')
113
+ end
114
+
115
+ def test_get_backup_flag
116
+ hadoop = HadoopStub.new('test/hadoop')
117
+ backup_tool = create_new_snapshot(hadoop, 'node1', '2016_04_22', [1, 2])
118
+
119
+ backup_tool.create_backup_flag('2016_04_22')
120
+ flags = backup_tool.get_backup_flags
121
+
122
+ # One flag found
123
+ assert_equal(1, flags.size)
124
+ # Flag points to the correct file
125
+ assert_equal('cluster1', flags[0].cluster)
126
+ assert_equal('BACKUP_COMPLETED_2016_04_22', flags[0].file)
127
+
128
+ # cleanup
129
+ hadoop.delete('test/hadoop')
130
+ hadoop.delete('test/cassandra')
131
+ end
132
+
133
+ def test_cleanup
134
+ hadoop = HadoopStub.new('test/hadoop')
135
+ retention_days = 30
136
+
137
+ date_31_days_back = (Date.today - 31).strftime('%Y_%m_%d')
138
+ date_30_days_back = (Date.today - 30).strftime('%Y_%m_%d')
139
+
140
+ # Two backups on two nodes
141
+ create_new_snapshot(hadoop, 'node1', date_31_days_back, [1, 2, 3, 4])
142
+ create_new_snapshot(hadoop, 'node2', date_31_days_back, [1, 2, 3, 4])
143
+ create_new_snapshot(hadoop, 'node1', date_30_days_back, [3, 4, 5, 6])
144
+ backup_tool = create_new_snapshot(hadoop, 'node2', date_30_days_back, [4, 5, 6, 7])
145
+
146
+ # Both backups are marked as completed
147
+ backup_tool.create_backup_flag(date_31_days_back)
148
+ backup_tool.create_backup_flag(date_30_days_back)
149
+ backup_tool.create_backup_flag(date_30_days_back)
150
+
151
+ backup_tool.cleanup(retention_days)
152
+
153
+ # Two snapshots were deleted, two were kept
154
+ snapshots = backup_tool.search_snapshots
155
+ assert_equal(2, snapshots.size)
156
+ assert_equal('node1', snapshots[0].node)
157
+ assert_equal(date_30_days_back, snapshots[0].date)
158
+ assert_equal('node2', snapshots[1].node)
159
+ assert_equal(date_30_days_back, snapshots[1].date)
160
+
161
+ # One backup flag was deleted, one was kept.
162
+ backup_flags = backup_tool.get_backup_flags
163
+ assert_equal(1, backup_flags.size)
164
+ assert_equal("BACKUP_COMPLETED_#{date_30_days_back}", backup_flags[0].file)
165
+
166
+ # cleanup
167
+ hadoop.delete('test/hadoop')
168
+ hadoop.delete('test/cassandra')
169
+ end
170
+
171
+ def create_new_snapshot(hadoop, node, date, file_indexes)
172
+ logger = Logger.new(STDOUT)
173
+ cassandra = CassandraStub.new('cluster1', node, date, file_indexes)
174
+ backup_tool = BackupTool.new(cassandra, hadoop, logger)
175
+
176
+ backup_tool.new_snapshot
177
+
178
+ backup_tool
179
+ end
180
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cassback
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Vincent Van Hollebeke
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-04-20 00:00:00.000000000 Z
12
+ date: 2016-04-26 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -109,10 +109,36 @@ executables:
109
109
  extensions: []
110
110
  extra_rdoc_files: []
111
111
  files:
112
+ - ".gitignore"
113
+ - ".rubocop.yml_disabled"
114
+ - Gemfile
115
+ - LICENSE
116
+ - README.md
117
+ - Rakefile.rb
112
118
  - bin/cassback
119
+ - bin/console
120
+ - bin/setup
121
+ - cassback.gemspec
122
+ - conf/local.yml
123
+ - conf/preprod.yml
124
+ - conf/prod.yml
113
125
  - lib/backuptool.rb
114
126
  - lib/cassandra.rb
127
+ - lib/cassback/version.rb
115
128
  - lib/hadoop.rb
129
+ - scripts/deploy.sh
130
+ - scripts/manualbackups/ansible.cfg
131
+ - scripts/manualbackups/inventory.txt
132
+ - scripts/manualbackups/play_book.sh
133
+ - scripts/manualbackups/playbooks/backups.yml
134
+ - scripts/manualbackups/roles/planb/files/backup.sh
135
+ - scripts/manualbackups/roles/planb/files/httpfs.sh
136
+ - scripts/manualbackups/roles/planb/files/krb5.conf
137
+ - scripts/manualbackups/roles/planb/tasks/main.yml
138
+ - scripts/pre-push
139
+ - test/cassandra_stub.rb
140
+ - test/hadoop_stub.rb
141
+ - test/test_backuptool.rb
116
142
  homepage: http://rubygems.org/gems/cassback
117
143
  licenses:
118
144
  - Apache-2.0
@@ -133,9 +159,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
133
159
  version: '0'
134
160
  requirements: []
135
161
  rubyforge_project:
136
- rubygems_version: 2.5.2
162
+ rubygems_version: 2.4.8
137
163
  signing_key:
138
164
  specification_version: 4
139
165
  summary: Cassandra backup to HDFS.
140
- test_files: []
141
- has_rdoc:
166
+ test_files:
167
+ - test/cassandra_stub.rb
168
+ - test/hadoop_stub.rb
169
+ - test/test_backuptool.rb