cassback 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 616dce141979a8e5c187b6a7c18d9c560e292edf
4
- data.tar.gz: 5a2646d54b746c0fd482d2b39f50b039b5c32069
3
+ metadata.gz: f2b9b6aed95f39752afe6c7df4d2e404d4041450
4
+ data.tar.gz: 07e9fe1a67dd830ce2f45fc56b37098dbdcb01e9
5
5
  SHA512:
6
- metadata.gz: 7550cee21ef0fa042d1813e011c12148745b443f6fa726653fcd801a4d7f66443e863f92f3055088e11f933e33688d14d8b22de1ee851709e9e79275011cfcfa
7
- data.tar.gz: a2de6cecc4ec52c9045a10d8762dbadf876f44c9426dbbdf4330c61d152db1cb694b7a3d6cf62cf5369218707bcf606f39bffcdeaa9573997ee816fb1939afe3
6
+ metadata.gz: 08080fa50589f745652230d2c5879406ac0fcf1f4cee3306c89ff963c18c1208fa430dca5cef1e7b22a4f06dcb80746a4d1619c6cc622dc7cd5763bcea082eed
7
+ data.tar.gz: b8196fe75585a33d1224fe6cd919a14b9fb90bef4dcc7ead9097f65ad9ffc6afa1b4a60b7a56408bf037f141ad5837f465c535fc0e0e173be162e3dc4a8230a9
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ Gemfile.lock
2
+ doc
3
+
4
+ # IntelliJ specific
5
+ .idea
6
+ *.iml
7
+ *.ipr
8
+ *.iws
9
+
10
+ #Log files
11
+ *.log
12
+
13
+ #Ruby gem files
14
+ *.gem
@@ -0,0 +1,37 @@
1
+ # This configuration was made for rubocop >= 0.36.0
2
+
3
+ ### SRE Core configuration
4
+ ### (See also https://confluence.criteois.com/pages/viewpage.action?pageId=270467645)
5
+ # Taken from Core's rules
6
+ Metrics/LineLength:
7
+ Max: 120
8
+ # Taken from Core's rules
9
+ Style/AlignHash:
10
+ EnforcedColonStyle: table
11
+ EnforcedHashRocketStyle: table
12
+
13
+ ### SRE Storage configuration
14
+ # We have french people's names lying around
15
+ Style/AsciiComments:
16
+ Enabled: false
17
+ # This wants snake_case file names and we have dashes everywhere
18
+ Style/FileName:
19
+ Enabled: false
20
+ # Use consistent style for hashes (do not indent far away when in parentheses, etc.)
21
+ Style/IndentHash:
22
+ EnforcedStyle: consistent
23
+ # Enforce trailing commas in literals for consistency, ease of edition, and code generation
24
+ Style/TrailingCommaInLiteral:
25
+ EnforcedStyleForMultiline: comma
26
+
27
+ ## Temporary edits (that should be fixed before enabling them)
28
+ # Messes things up for now
29
+ Style/BracesAroundHashParameters:
30
+ Enabled: false
31
+ # Badly implemented, and crashes in some cases
32
+ Performance/Casecmp:
33
+ Enabled: false
34
+ # We should have trailing commas only inside multiline statements
35
+ # r.veznaver said this one will be fixed in rubocop
36
+ Style/TrailingCommaInArguments:
37
+ Enabled: false
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ source 'https://rubygems.org'
2
+ ruby '2.2.2'
3
+ gem 'webhdfs'
4
+ gem 'gssapi'
5
+ gem 'rubocop'
6
+ gem 'table_print'
7
+ gem 'rspec_junit_formatter'
8
+ gem 'rubocop-junit-formatter'
data/LICENSE ADDED
@@ -0,0 +1,194 @@
1
+
2
+ Apache License
3
+ Version 2.0, January 2004
4
+ http://www.apache.org/licenses/
5
+
6
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7
+
8
+ 1. Definitions.
9
+
10
+ "License" shall mean the terms and conditions for use, reproduction, and
11
+ distribution as defined by Sections 1 through 9 of this document.
12
+
13
+ "Licensor" shall mean the copyright owner or entity authorized by the copyright
14
+ owner that is granting the License.
15
+
16
+ "Legal Entity" shall mean the union of the acting entity and all other entities
17
+ that control, are controlled by, or are under common control with that entity.
18
+ For the purposes of this definition, "control" means (i) the power, direct or
19
+ indirect, to cause the direction or management of such entity, whether by
20
+ contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity exercising
24
+ permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications, including
27
+ but not limited to software source code, documentation source, and configuration
28
+ files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical transformation or
31
+ translation of a Source form, including but not limited to compiled object code,
32
+ generated documentation, and conversions to other media types.
33
+
34
+ "Work" shall mean the work of authorship, whether in Source or Object form, made
35
+ available under the License, as indicated by a copyright notice that is included
36
+ in or attached to the work (an example is provided in the Appendix below).
37
+
38
+ "Derivative Works" shall mean any work, whether in Source or Object form, that
39
+ is based on (or derived from) the Work and for which the editorial revisions,
40
+ annotations, elaborations, or other modifications represent, as a whole, an
41
+ original work of authorship. For the purposes of this License, Derivative Works
42
+ shall not include works that remain separable from, or merely link (or bind by
43
+ name) to the interfaces of, the Work and Derivative Works thereof.
44
+
45
+ "Contribution" shall mean any work of authorship, including the original version
46
+ of the Work and any modifications or additions to that Work or Derivative Works
47
+ thereof, that is intentionally submitted to Licensor for inclusion in the Work
48
+ by the copyright owner or by an individual or Legal Entity authorized to submit
49
+ on behalf of the copyright owner. For the purposes of this definition,
50
+ "submitted" means any form of electronic, verbal, or written communication sent
51
+ to the Licensor or its representatives, including but not limited to
52
+ communication on electronic mailing lists, source code control systems, and
53
+ issue tracking systems that are managed by, or on behalf of, the Licensor for
54
+ the purpose of discussing and improving the Work, but excluding communication
55
+ that is conspicuously marked or otherwise designated in writing by the copyright
56
+ owner as "Not a Contribution."
57
+
58
+ "Contributor" shall mean Licensor and any individual or Legal Entity on behalf
59
+ of whom a Contribution has been received by Licensor and subsequently
60
+ incorporated within the Work.
61
+
62
+ 2. Grant of Copyright License.
63
+
64
+ Subject to the terms and conditions of this License, each Contributor hereby
65
+ grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
66
+ irrevocable copyright license to reproduce, prepare Derivative Works of,
67
+ publicly display, publicly perform, sublicense, and distribute the Work and such
68
+ Derivative Works in Source or Object form.
69
+
70
+ 3. Grant of Patent License.
71
+
72
+ Subject to the terms and conditions of this License, each Contributor hereby
73
+ grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
74
+ irrevocable (except as stated in this section) patent license to make, have
75
+ made, use, offer to sell, sell, import, and otherwise transfer the Work, where
76
+ such license applies only to those patent claims licensable by such Contributor
77
+ that are necessarily infringed by their Contribution(s) alone or by combination
78
+ of their Contribution(s) with the Work to which such Contribution(s) was
79
+ submitted. If You institute patent litigation against any entity (including a
80
+ cross-claim or counterclaim in a lawsuit) alleging that the Work or a
81
+ Contribution incorporated within the Work constitutes direct or contributory
82
+ patent infringement, then any patent licenses granted to You under this License
83
+ for that Work shall terminate as of the date such litigation is filed.
84
+
85
+ 4. Redistribution.
86
+
87
+ You may reproduce and distribute copies of the Work or Derivative Works thereof
88
+ in any medium, with or without modifications, and in Source or Object form,
89
+ provided that You meet the following conditions:
90
+
91
+ You must give any other recipients of the Work or Derivative Works a copy of
92
+ this License; and
93
+ You must cause any modified files to carry prominent notices stating that You
94
+ changed the files; and
95
+ You must retain, in the Source form of any Derivative Works that You distribute,
96
+ all copyright, patent, trademark, and attribution notices from the Source form
97
+ of the Work, excluding those notices that do not pertain to any part of the
98
+ Derivative Works; and
99
+ If the Work includes a "NOTICE" text file as part of its distribution, then any
100
+ Derivative Works that You distribute must include a readable copy of the
101
+ attribution notices contained within such NOTICE file, excluding those notices
102
+ that do not pertain to any part of the Derivative Works, in at least one of the
103
+ following places: within a NOTICE text file distributed as part of the
104
+ Derivative Works; within the Source form or documentation, if provided along
105
+ with the Derivative Works; or, within a display generated by the Derivative
106
+ Works, if and wherever such third-party notices normally appear. The contents of
107
+ the NOTICE file are for informational purposes only and do not modify the
108
+ License. You may add Your own attribution notices within Derivative Works that
109
+ You distribute, alongside or as an addendum to the NOTICE text from the Work,
110
+ provided that such additional attribution notices cannot be construed as
111
+ modifying the License.
112
+ You may add Your own copyright statement to Your modifications and may provide
113
+ additional or different license terms and conditions for use, reproduction, or
114
+ distribution of Your modifications, or for any such Derivative Works as a whole,
115
+ provided Your use, reproduction, and distribution of the Work otherwise complies
116
+ with the conditions stated in this License.
117
+
118
+ 5. Submission of Contributions.
119
+
120
+ Unless You explicitly state otherwise, any Contribution intentionally submitted
121
+ for inclusion in the Work by You to the Licensor shall be under the terms and
122
+ conditions of this License, without any additional terms or conditions.
123
+ Notwithstanding the above, nothing herein shall supersede or modify the terms of
124
+ any separate license agreement you may have executed with Licensor regarding
125
+ such Contributions.
126
+
127
+ 6. Trademarks.
128
+
129
+ This License does not grant permission to use the trade names, trademarks,
130
+ service marks, or product names of the Licensor, except as required for
131
+ reasonable and customary use in describing the origin of the Work and
132
+ reproducing the content of the NOTICE file.
133
+
134
+ 7. Disclaimer of Warranty.
135
+
136
+ Unless required by applicable law or agreed to in writing, Licensor provides the
137
+ Work (and each Contributor provides its Contributions) on an "AS IS" BASIS,
138
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied,
139
+ including, without limitation, any warranties or conditions of TITLE,
140
+ NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are
141
+ solely responsible for determining the appropriateness of using or
142
+ redistributing the Work and assume any risks associated with Your exercise of
143
+ permissions under this License.
144
+
145
+ 8. Limitation of Liability.
146
+
147
+ In no event and under no legal theory, whether in tort (including negligence),
148
+ contract, or otherwise, unless required by applicable law (such as deliberate
149
+ and grossly negligent acts) or agreed to in writing, shall any Contributor be
150
+ liable to You for damages, including any direct, indirect, special, incidental,
151
+ or consequential damages of any character arising as a result of this License or
152
+ out of the use or inability to use the Work (including but not limited to
153
+ damages for loss of goodwill, work stoppage, computer failure or malfunction, or
154
+ any and all other commercial damages or losses), even if such Contributor has
155
+ been advised of the possibility of such damages.
156
+
157
+ 9. Accepting Warranty or Additional Liability.
158
+
159
+ While redistributing the Work or Derivative Works thereof, You may choose to
160
+ offer, and charge a fee for, acceptance of support, warranty, indemnity, or
161
+ other liability obligations and/or rights consistent with this License. However,
162
+ in accepting such obligations, You may act only on Your own behalf and on Your
163
+ sole responsibility, not on behalf of any other Contributor, and only if You
164
+ agree to indemnify, defend, and hold each Contributor harmless for any liability
165
+ incurred by, or claims asserted against, such Contributor by reason of your
166
+ accepting any such warranty or additional liability.
167
+
168
+ END OF TERMS AND CONDITIONS
169
+
170
+ APPENDIX: How to apply the Apache License to your work
171
+
172
+ To apply the Apache License to your work, attach the following boilerplate
173
+ notice, with the fields enclosed by brackets "{}" replaced with your own
174
+ identifying information. (Don't include the brackets!) The text should be
175
+ enclosed in the appropriate comment syntax for the file format. We also
176
+ recommend that a file or class name and description of purpose be included on
177
+ the same "printed page" as the copyright notice for easier identification within
178
+ third-party archives.
179
+
180
+ Copyright {yyyy} {name of copyright owner}
181
+
182
+ Licensed under the Apache License, Version 2.0 (the "License");
183
+ you may not use this file except in compliance with the License.
184
+ You may obtain a copy of the License at
185
+
186
+ http://www.apache.org/licenses/LICENSE-2.0
187
+
188
+ Unless required by applicable law or agreed to in writing, software
189
+ distributed under the License is distributed on an "AS IS" BASIS,
190
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
191
+ See the License for the specific language governing permissions and
192
+ limitations under the License.
193
+
194
+
data/README.md ADDED
@@ -0,0 +1,103 @@
1
+ # Cassback
2
+
3
+ Welcome to your Cassback!
4
+ This is a project that aims backup Cassandra SSTables and load them into HDFS for further usage.
5
+
6
+ ## Installation
7
+
8
+ Build the application into a gem using the command
9
+
10
+ $ gem build cassback.gemspec
11
+
12
+ You should the following output :
13
+
14
+ Successfully built RubyGem
15
+ Name: cassback
16
+ Version: 0.1.0
17
+ File: cassback-0.1.0.gem
18
+
19
+
20
+ Install the application into your local gem store using the following command :
21
+
22
+ $ gem install cassback-0.1.0.gem
23
+
24
+ You should then see the following output :
25
+
26
+ Successfully installed cassback-0.1.0
27
+ Parsing documentation for cassback-0.1.0
28
+ Done installing documentation for cassback after 0 seconds
29
+ 1 gem installed
30
+
31
+ ## Usage
32
+
33
+ When the cassback gem installed it adds the **cassback** executable file into your PATH variable.
34
+ This means that you can execute it using one of the following commands and it will return example of usage :
35
+
36
+ cassback
37
+ cassback -h
38
+
39
+ A simple command that you can use for starting a backup is :
40
+
41
+ cassback -S -C path_to_some_config_file.yml
42
+
43
+ ## Configuration
44
+
45
+ The application has some default configuration defined.
46
+ You can overwrite the default configuration using two meanings :
47
+
48
+ 1. Using a configuration file passed as parameter on the command line.
49
+
50
+ 2. Using individual configuration properties passed as parameters on the command line.
51
+ The command line parameters have precedence over the configuration file.
52
+
53
+ ## Orchestration
54
+
55
+ The tool is designed to do snapshots at **node level** (and not at **cluster level**) - basically it has to be installed
56
+ on each node and a separate process will have to be executed from there to trigger a node level snapshot. Because this task is
57
+ quite complex it is recommended to use an orchestration tool (like Rundeck) that allows you to execute same command
58
+ on multiple machines and run the processes in parallel.
59
+
60
+ After all node backups are finished the orchestration tool will have to take care of signaling other applications that
61
+ the backup is completely finished. That is done now by adding a new empty file on the cluster metadata folder that has
62
+ the format BACKUP_COMPLETED_yyyy_MM_dd. This has to be triggered only once by using the following command :
63
+
64
+ cassback -B [-d date] -C conf/path_to_some_config_file.yml
65
+
66
+ Optionally you can also pass a date, if not present current day date will be assumed.
67
+
68
+ ## Data Integrity
69
+
70
+ The project is using internally the webhdfs tool (see https://github.com/kzk/webhdfs) that is a Ruby project
71
+ built on top of the WebHDFS API (https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/WebHDFS.html).
72
+ Because we're using the WebHDFS API we get for free data integrity. The tool is also configurable so in case errors it
73
+ can retry the file download/upload of data. This is configurable via the following config file properties :
74
+
75
+ 1. **hadoop.retryTimes** - the number of retries the tool should do before giving up. Default set to 5.
76
+ 2. **hadoop.retryInterval** - the interval (in seconds) the tool should take between two attempts. Default set to 1 second.
77
+
78
+ If you want to check more about Hadoop's checksum algorithm that ensures data integrity you can check the
79
+ following link : https://www.safaribooksonline.com/library/view/hadoop-the-definitive/9781449328917/ch04.html
80
+
81
+ ## Cleanup policy
82
+
83
+ Usually backups of databases take a lot of space. Even if we have optimized the code so the backups are done incrementally
84
+ (meaning that a file is not stored twice even if it's present in multiple backups), still cleanup needs to be done.
85
+ The tool has a cleanup policy of cleaning snapshots after some days have passed since the snapshot has been published.
86
+ This is configurable via the **cleanup.retentionDays** property in the configuration file. One point is that cleanup is
87
+ done at cluster level (for all nodes) since it doesn't make sense to keep data for only some of the nodes.
88
+
89
+ The command for triggering a cleanup is :
90
+
91
+ cassback -A -C conf/path_to_some_config_file.yml
92
+
93
+ # Unit tests
94
+ Unit tests can be executed locally by running the following command :
95
+
96
+ rake test
97
+
98
+ ## Contributing
99
+
100
+ For now this is an internal Criteo project, but were aiming for making it open source and publishing to GitHub.
101
+
102
+ Issue reports and merge requests are welcome on Criteo's GitLab at : https://gitlab.criteois.com/ruby-gems/cassback
103
+
data/Rakefile.rb ADDED
@@ -0,0 +1,8 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rake/testtask'
3
+
4
+ Rake::TestTask.new do |t|
5
+ t.libs << 'test'
6
+ t.test_files = FileList['test/test*.rb']
7
+ t.verbose = true
8
+ end
data/bin/cassback CHANGED
@@ -30,6 +30,7 @@ command_line_config = {
30
30
  'cassandra' => {},
31
31
  'hadoop' => {},
32
32
  'restore' => {},
33
+ 'cleanup' => {},
33
34
  }
34
35
 
35
36
  # Default options
@@ -38,13 +39,19 @@ options = {
38
39
  'config' => '/etc/cassandra/conf/cassandra.yaml',
39
40
  },
40
41
  'hadoop' => {
41
- 'hostname' => 'localhost',
42
- 'port' => 14_000,
43
- 'directory' => 'cassandra',
42
+ 'hostname' => 'localhost',
43
+ 'port' => 14_000,
44
+ 'directory' => 'cassandra',
45
+ 'retryTimes' => 5,
46
+ 'retryInterval' => 1,
44
47
  },
45
48
  'restore' => {
46
49
  'destination' => 'cassandra',
47
50
  },
51
+
52
+ 'cleanup' => {
53
+ 'retentionDays' => 30,
54
+ },
48
55
  }
49
56
 
50
57
  # If no argument given in command line, print the help
@@ -52,7 +59,7 @@ ARGV << '-h' if ARGV.empty?
52
59
 
53
60
  # Parse command line options
54
61
  parser = OptionParser.new do |opts|
55
- opts.banner = 'Usage: cassback.rb [options]'
62
+ opts.banner = 'Usage: cassback [options]'
56
63
 
57
64
  opts.separator ''
58
65
  opts.separator 'Configuration:'
@@ -74,6 +81,13 @@ parser = OptionParser.new do |opts|
74
81
  opts.on('-F', '--flush', 'removes a backuped snapshot from Hadoop, needs a date') do |_v|
75
82
  action = 'delete'
76
83
  end
84
+ opts.on('-B', '--backupFlag', 'creates an empty file to signal that the backup has finished, can be used with a date, \
85
+ today date is assumed if no date is provided') do |_v|
86
+ action = 'backupFlag'
87
+ end
88
+ opts.on('-A', '--cleanup', 'cleans up old snapshots') do |_v|
89
+ action = 'cleanup'
90
+ end
77
91
 
78
92
  opts.separator ''
79
93
  opts.separator 'Action related:'
@@ -133,7 +147,9 @@ end
133
147
 
134
148
  begin
135
149
  # Create the Hadoop object
136
- hadoop = Hadoop.new(host: options['hadoop']['hostname'], port: options['hadoop']['port'], base_dir: options['hadoop']['directory'])
150
+ hadoop = Hadoop.new(host: options['hadoop']['hostname'], port: options['hadoop']['port'],
151
+ base_dir: options['hadoop']['directory'], retry_times: options['hadoop']['retryTimes'],
152
+ retry_interval: options['hadoop']['retryInterval'])
137
153
 
138
154
  #  Create the Cassandra object
139
155
  cassandra = Cassandra.new(options['cassandra']['config'], logger)
@@ -161,6 +177,18 @@ begin
161
177
  elsif action == 'delete'
162
178
  raise('No date given') unless options.include? 'date'
163
179
  bck.delete_snapshots(node: options['node'], date: options['date'])
180
+
181
+ # Create backup flag.
182
+ elsif action == 'backupFlag'
183
+ # Use today's date if no date has been provided
184
+ date = options['date']
185
+ date ||= Time.new.strftime('%Y_%m_%d')
186
+ bck.create_backup_flag(date)
187
+
188
+ # Cleanup old snapshots based on cleanup.retentionDays
189
+ elsif action == 'cleanup'
190
+ days = options['cleanup']['retentionDays'].to_i
191
+ bck.cleanup(days)
164
192
  end
165
193
 
166
194
  #  In case of failure
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'cassback'
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require 'irb'
14
+ IRB.start
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
data/cassback.gemspec ADDED
@@ -0,0 +1,30 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'cassback/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'cassback'
8
+ spec.version = Cassback::VERSION
9
+ spec.authors = ['Vincent Van Hollebeke', 'Bogdan Niculescu']
10
+ spec.email = ['v.vanhollebeke@criteo.com', 'b.niculescu@criteo.com']
11
+
12
+ spec.summary = 'Cassandra backup to HDFS.'
13
+ spec.description = 'This is a tool that allows creating backups of Cassandra and pushing them into HDFS.'
14
+ spec.homepage = 'http://rubygems.org/gems/cassback'
15
+
16
+ spec.licenses = ['Apache-2.0']
17
+
18
+ spec.files = `git ls-files`.split("\n")
19
+ spec.test_files = `git ls-files -- test/*`.split("\n")
20
+ spec.bindir = 'bin'
21
+ spec.executables << 'cassback'
22
+ spec.require_paths = ['lib']
23
+
24
+ spec.add_development_dependency 'bundler', '~> 1.11'
25
+ spec.add_development_dependency 'rake', '~> 10.0'
26
+
27
+ spec.add_runtime_dependency 'gssapi', '~> 1.2', '>= 1.2.0'
28
+ spec.add_runtime_dependency 'webhdfs', '~> 0.8', '>= 0.8.0'
29
+ spec.add_runtime_dependency 'table_print', '~> 1.5', '>= 1.5.6'
30
+ end
data/conf/local.yml ADDED
@@ -0,0 +1,18 @@
1
+ cassandra:
2
+ # config: "/etc/cassandra/conf/cassandra.yaml"
3
+ config: "/Users/b.niculescu/Tools/apache-cassandra-2.0.16/conf/cassandra.yaml"
4
+
5
+ hadoop:
6
+ # hostname: "10.60.34.217"
7
+ hostname: "jobs-user.hpc.criteo.prod"
8
+ port: 14000
9
+ # directory: "/user/v.vanhollebeke/cassandra"
10
+ directory: "/tmp/b.niculescu/cassandra"
11
+ retryTimes : 3
12
+ retryInterval : 1
13
+
14
+ restore:
15
+ destination: "cassback_restore"
16
+
17
+ cleanup:
18
+ retentionDays: 30
data/conf/preprod.yml ADDED
@@ -0,0 +1,15 @@
1
+ cassandra:
2
+ config: "/etc/cassandra/conf/cassandra.yaml"
3
+
4
+ hadoop:
5
+ hostname: "jobs-user.hpc.criteo.preprod"
6
+ port: 14000
7
+ directory: "/tmp/cassandraback/preprod/"
8
+ retryTimes : 5
9
+ retryInterval : 1
10
+
11
+ restore:
12
+ destination: "cassback_restore"
13
+
14
+ cleanup:
15
+ retentionDays: 30
data/conf/prod.yml ADDED
@@ -0,0 +1,15 @@
1
+ cassandra:
2
+ config: "/etc/cassandra/conf/cassandra.yaml"
3
+
4
+ hadoop:
5
+ hostname: "jobs-user.hpc.criteo.prod"
6
+ port: 14000
7
+ directory: "/tmp/cassandrabackups/prod/"
8
+ retryTimes : 5
9
+ retryInterval : 1
10
+
11
+ restore:
12
+ destination: "cassback_restore"
13
+
14
+ cleanup:
15
+ retentionDays: 30
data/lib/backuptool.rb CHANGED
@@ -40,7 +40,8 @@ class BackupTool
40
40
  begin
41
41
  if date == 'ALL'
42
42
  ls = @hadoop.list("#{@hadoop.base_dir}/#{@metadir}/#{@cassandra.cluster_name}/#{node}")
43
- ls.each do |item|
43
+ ls_metadata = ls.select { |item| item['pathSuffix'].include? 'cass_snap_' }
44
+ ls_metadata.each do |item|
44
45
  date = item['pathSuffix'].gsub('cass_snap_', '')
45
46
  metadata = get_snapshot_metadata(node, date)
46
47
  snapshot = CassandraSnapshot.new(@cassandra.cluster_name, node, date, metadata)
@@ -60,7 +61,8 @@ class BackupTool
60
61
  if node == 'ALL'
61
62
  begin
62
63
  ls = @hadoop.list("#{@hadoop.base_dir}/#{@metadir}/#{@cassandra.cluster_name}")
63
- ls.each do |item|
64
+ ls_nodes = ls.select { |item| item['type'].casecmp('DIRECTORY') == 0 }
65
+ ls_nodes.each do |item|
64
66
  n = item['pathSuffix']
65
67
  result += get_snapshots_node(n, date)
66
68
  end
@@ -141,6 +143,55 @@ class BackupTool
141
143
  end
142
144
  end
143
145
 
146
+ # Cleans up backups that are older than a number of days.
147
+ # This functions cleans data on all nodes.
148
+ def cleanup(days)
149
+ retention_date = Date.today - days
150
+ @logger.info("Cleaning backup data on all nodes before #{retention_date}.")
151
+
152
+ all_snapshots = search_snapshots
153
+ @logger.info("A total of #{all_snapshots.size} snapshots were found on Hadoop server.")
154
+
155
+ snapshots_to_be_deleted = all_snapshots.select { |snapshot| snapshot.get_date < retention_date }
156
+ @logger.info("A total of #{snapshots_to_be_deleted.size} snapshots will be deleted.")
157
+
158
+ snapshots_to_be_deleted.each do |snapshot|
159
+ delete_snapshots(node: snapshot.node, date: snapshot.date)
160
+ end
161
+
162
+ all_backup_flags = get_backup_flags
163
+ @logger.info("A total of #{all_backup_flags.size} back up flags were found on Hadoop server.")
164
+
165
+ backup_flags_to_be_delete = all_backup_flags.select { |flag| flag.date < retention_date }
166
+ @logger.info("A total of #{backup_flags_to_be_delete.size} backup flags will be deleted.")
167
+
168
+ backup_flags_location = @hadoop.base_dir + '/' + @metadir + '/' + @cassandra.cluster_name
169
+ backup_flags_to_be_delete.each do |flag|
170
+ file = backup_flags_location + '/' + flag.file
171
+ @logger.info("Deleting #{file}")
172
+ @hadoop.delete(file)
173
+ end
174
+ end
175
+
176
+ # Method that creates a backup flag to signal that the backup is finished on all nodes
177
+ # This is an individual command that has to be called manually after snapshots have finished
178
+ def create_backup_flag(date)
179
+ file_name = 'BACKUP_COMPLETED_' + date
180
+ remote_file = @hadoop.base_dir + '/' + @metadir + '/' + @cassandra.cluster_name + '/' + file_name
181
+
182
+ @logger.info('Setting backup completed flag : ' + remote_file)
183
+ @hadoop.create(remote_file, '', overwrite: true)
184
+ end
185
+
186
+ def get_backup_flags
187
+ backup_flags_location = @hadoop.base_dir + '/' + @metadir + '/' + @cassandra.cluster_name
188
+ ls = @hadoop.list(backup_flags_location)
189
+ backup_flags = ls.select { |item| item['pathSuffix'].include? 'BACKUP_COMPLETED_' }
190
+ backup_flags.collect do |file|
191
+ BackupFlag.new(@cassandra.cluster_name, file['pathSuffix'])
192
+ end
193
+ end
194
+
144
195
  # Download a file from HDFS, buffered way
145
196
  # * *Args* :
146
197
  # - +remote+ -> HDFS path
data/lib/cassandra.rb CHANGED
@@ -150,4 +150,19 @@ class CassandraSnapshot
150
150
  d = @date <=> other.date
151
151
  c * 3 + n * 2 + d
152
152
  end
153
+
154
+ def get_date
155
+ DateTime.strptime(@date, '%Y_%m_%d')
156
+ end
157
+ end
158
+
159
+ class BackupFlag
160
+ attr_reader :cluster, :date, :file
161
+
162
+ def initialize(cluster, file)
163
+ @cluster = cluster
164
+ @file = file.dup
165
+ date_as_string = file.sub! 'BACKUP_COMPLETED_', ''
166
+ @date = DateTime.strptime(date_as_string, '%Y_%m_%d')
167
+ end
153
168
  end
@@ -0,0 +1,3 @@
1
+ module Cassback
2
+ VERSION = '0.1.4'.freeze
3
+ end
data/lib/hadoop.rb CHANGED
@@ -6,9 +6,12 @@ WebHDFS::ClientV1::REDIRECTED_OPERATIONS.delete('OPEN')
6
6
  class Hadoop < WebHDFS::Client
7
7
  attr_reader :base_dir
8
8
 
9
- def initialize(host: 'localhost', port: 14_000, base_dir: '/')
9
+ def initialize(host: 'localhost', port: 14_000, base_dir: '/', retry_times: 5, retry_interval: 1)
10
10
  super(host = host, port = port)
11
11
  @kerberos = true
12
12
  @base_dir = base_dir
13
+ @retry_known_errors = true
14
+ @retry_times = retry_times
15
+ @retry_interval = retry_interval
13
16
  end
14
17
  end
data/scripts/deploy.sh ADDED
@@ -0,0 +1,3 @@
1
+ #!/bin/bash
2
+
3
+ while [ 1 = 1 ]; do inotifywait .;scp -r . cstars01e01-par.storage.criteo.preprod:cassback2;scp -r . cstars01e02-par.storage.criteo.preprod:cassback2;done
@@ -0,0 +1,12 @@
1
+ [defaults]
2
+ host_key_checking=false
3
+ record_host_keys=false
4
+ remote_tmp=/tmp/.ansible/tmp
5
+ forks=128
6
+ roles_path=roles
7
+ library=library
8
+
9
+ [ssh_connection]
10
+ control_path=%(directory)s/%%h-%%r
11
+ pipelining=True
12
+ scp_if_ssh=True
@@ -0,0 +1,18 @@
1
+ [cstars02-par]
2
+ cstars02e01-par ansible_ssh_host="cstars02e01-par.storage.criteo.prod"
3
+ cstars02e02-par ansible_ssh_host="cstars02e02-par.storage.criteo.prod"
4
+ cstars02e03-par ansible_ssh_host="cstars02e03-par.storage.criteo.prod"
5
+ cstars02e04-par ansible_ssh_host="cstars02e04-par.storage.criteo.prod"
6
+ cstars02e05-par ansible_ssh_host="cstars02e05-par.storage.criteo.prod"
7
+ cstars02e06-par ansible_ssh_host="cstars02e06-par.storage.criteo.prod"
8
+ cstars02e07-par ansible_ssh_host="cstars02e07-par.storage.criteo.prod"
9
+ cstars02e08-par ansible_ssh_host="cstars02e08-par.storage.criteo.prod"
10
+ cstars02e09-par ansible_ssh_host="cstars02e09-par.storage.criteo.prod"
11
+ cstars02e10-par ansible_ssh_host="cstars02e10-par.storage.criteo.prod"
12
+ cstars02e11-par ansible_ssh_host="cstars02e11-par.storage.criteo.prod"
13
+ cstars02e12-par ansible_ssh_host="cstars02e12-par.storage.criteo.prod"
14
+ cstars02e13-par ansible_ssh_host="cstars02e13-par.storage.criteo.prod"
15
+ cstars02e14-par ansible_ssh_host="cstars02e14-par.storage.criteo.prod"
16
+ cstars02e15-par ansible_ssh_host="cstars02e15-par.storage.criteo.prod"
17
+ cstars02e16-par ansible_ssh_host="cstars02e16-par.storage.criteo.prod"
18
+ cstars02e17-par ansible_ssh_host="cstars02e17-par.storage.criteo.prod"
@@ -0,0 +1,13 @@
1
+ #!/bin/bash
2
+
3
+ PLAYBOOK=$1
4
+
5
+ if [ "$PLAYBOOK" = "" ]; then
6
+ echo "Usage: $0 <playbook> [ansible options]"
7
+ exit 65
8
+ fi
9
+
10
+ shift
11
+ ansible-playbook --inventory-file=inventory.txt playbooks/$PLAYBOOK.yml --extra-vars $*
12
+
13
+ exit $?
@@ -0,0 +1,6 @@
1
+ ---
2
+
3
+ - gather_facts: no
4
+ hosts: cstars02-par
5
+ roles:
6
+ - role: planb
@@ -0,0 +1,27 @@
1
+ #!/bin/bash
2
+
3
+ kinit v.vanhollebeke@CRITEOIS.LAN -k -t ~/keytab
4
+
5
+ date=`date +%Y_%m_%d`
6
+
7
+ nodetool clearsnapshot
8
+
9
+ snapdir=$(nodetool snapshot| grep directory| awk '{print $NF}')
10
+ echo "Snapshot is $snapdir"
11
+
12
+ for dir in $(find /var/opt/cassandra/data -type d |grep snapshots/$snapdir); do
13
+ kok=$(klist -l|grep v.vanhollebeke@CRITEOIS.LAN|grep -v Expired|wc -l)
14
+ if [ $kok == 0 ]; then
15
+ echo "Must renew Kerberos ticket"
16
+ kinit v.vanhollebeke@CRITEOIS.LAN -k -t ~/keytab
17
+ else
18
+ echo "Kerberos ticket OK"
19
+ fi
20
+ keyspace=`echo $dir|awk -F\/ '{print $6}'`
21
+ table=`echo $dir|awk -F\/ '{print $7}'`
22
+ echo "Saving $keyspace $table"
23
+ ./httpfs.sh /var/opt/cassandra/data/$keyspace/$table/snapshots/$snapdir tmp/cassandrabackups/prod/cstars02/$date/$HOSTNAME/$table
24
+
25
+ done
26
+
27
+ echo "FINISHED !!!!"
@@ -0,0 +1,27 @@
1
+ #!/bin/sh
2
+
3
+ BASE='http://0.httpfs.hpc.criteo.prod:14000/webhdfs/v1'
4
+ #BASE='http://httpfs.pa4.hpc.criteo.prod:14000'
5
+
6
+ IN=$1
7
+ OUT=$2
8
+
9
+ echo "Creating destination directory: $OUT"
10
+ curl --negotiate -u : "$BASE/$OUT?op=MKDIRS&permission=0777" -X PUT -s > /dev/null
11
+
12
+ for p in $(find $IN -type f)
13
+ do
14
+ f=$(basename $p)
15
+ echo "$IN/$f"
16
+
17
+ # Create file
18
+ dest=$(curl --negotiate -u : "$BASE/$OUT/$f?op=CREATE&overwrite=true&permission=0777" -i -X PUT -s | grep Location | tail -n1 | cut -d\ -f2 | tr -d '\r\n')
19
+ [ $? != 0 ] && echo "ERROR"
20
+
21
+ echo "DEST IS ${dest}"
22
+
23
+ # Upload file
24
+ curl --negotiate -u : "$dest" -i -X PUT -T "$IN/$f" -H 'Content-Type: application/octet-stream' > /dev/null
25
+ [ $? != 0 ] && echo "ERROR"
26
+
27
+ done
@@ -0,0 +1,26 @@
1
+ [libdefaults]
2
+ dns_lookup_realm = true
3
+ dns_lookup_kdc = true
4
+ ticket_lifetime = 24h
5
+ renew_lifetime = 7d
6
+ forwardable = true
7
+ default_realm = CRITEOIS.LAN
8
+ udp_preference_limit = 1
9
+ realm_try_domains = 1
10
+ permitted_enctypes = aes128-cts-hmac-sha1-96 des3-cbc-sha1 arcfour-hmac
11
+ default_tkt_enctypes = aes128-cts-hmac-sha1-96 des3-cbc-sha1 arcfour-hmac
12
+ [domain_realm]
13
+ .hpc.criteo.preprod = HPC.CRITEO.PREPROD
14
+ .hpc.criteo.prod = AMS.HPC.CRITEO.PROD
15
+ .pa4.hpc.criteo.prod = PA4.HPC.CRITEO.PROD
16
+ .as.hpc.criteo.prod = AS.HPC.CRITEO.PROD
17
+ .na.hpc.criteo.prod = NA.HPC.CRITEO.PROD
18
+ .cn.hpc.criteo.prod = CN.HPC.CRITEO.PROD
19
+ [capaths]
20
+ CRITEOIS.LAN = {
21
+ AMS.HPC.CRITEO.PROD = .
22
+ PA4.HPC.CRITEO.PROD = AMS.HPC.CRITEO.PROD
23
+ AS.HPC.CRITEO.PROD = AMS.HPC.CRITEO.PROD
24
+ NA.HPC.CRITEO.PROD = AMS.HPC.CRITEO.PROD
25
+ CN.HPC.CRITEO.PROD = AMS.HPC.CRITEO.PROD
26
+ }
@@ -0,0 +1,34 @@
1
+ ---
2
+
3
+ - name: Copy krb5.conf into /etc
4
+ copy: src=krb5.conf dest=/etc/krb5.conf
5
+ sudo: yes
6
+ tags: keytab
7
+
8
+ - name: Copy my keytab
9
+ copy: src=keytab dest=~/keytab
10
+ tags: keytab
11
+
12
+ - name: Check if keytab works
13
+ command: kinit $USER@CRITEOIS.LAN -k -t ~/keytab
14
+ tags: keytab
15
+
16
+ - name: Copy httpfs.sh script
17
+ copy: src=httpfs.sh dest=~/httpfs.sh mode=750
18
+ tags: backup
19
+
20
+ - name: Copy backup.sh script
21
+ copy: src=backup.sh dest=~/backup.sh mode=750
22
+ tags: backup
23
+
24
+ - name: Start Backup
25
+ shell: ./backup.sh >logfile 2>&1 chdir=~
26
+ tags: backup
27
+
28
+ - name: Clear snapshots
29
+ shell: sudo nodetool clearsnapshot
30
+ tags: clear
31
+
32
+ - name: Verify if snapshots are REALLY deleted
33
+ shell: "[ $(find /var/opt/cassandra -type d |grep snap|wc -l) == 0 ]"
34
+ tags: verify
data/scripts/pre-push ADDED
@@ -0,0 +1,17 @@
1
+ #!/bin/bash
2
+
3
+ echo "Running rubocop with auto-correct" >&2
4
+ bundle exec rubocop --config .rubocop.yml --auto-correct --out /dev/null
5
+ modified=$(git status | grep modified | wc -l)
6
+ if [ $modified -eq 0 ]; then
7
+ echo -e "\e[1;32mNothing to correct, pushing\e[0m" >&2
8
+ exit 0
9
+ else
10
+ s=''
11
+ if [ $modified -gt 1 ]; then
12
+ s='s'
13
+ fi
14
+
15
+ echo -e "\e[1;31m$modified file$s were modified, please add commit before pushing\e[0m" >&2
16
+ exit 1
17
+ fi
@@ -0,0 +1,33 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative '../lib/cassandra'
3
+
4
+ # Stub implementation that simulates cassandra backups.
5
+ class CassandraStub
6
+ attr_reader :data_path, :cluster_name, :node_name
7
+
8
+ def initialize(cluster_name = 'cluster1', node_name = 'node1', date = '', file_indexes = [])
9
+ @cluster_name = cluster_name
10
+ @node_name = node_name
11
+ @date = date
12
+ @data_path = 'test/cassandra' + '/' + cluster_name + '/' + node_name + '/'
13
+ FileUtils.mkdir_p(@data_path)
14
+
15
+ # create some fake sstables
16
+ @metadata = Set.new
17
+ file_indexes.each do |index|
18
+ file_name = "SSTable-#{index}-Data.db"
19
+ file_path = @data_path + '/' + file_name
20
+ File.open(file_path, 'w') { |file| file.write('This is a test file that simulates an SSTable') }
21
+ @metadata.add(file_name)
22
+ end
23
+ end
24
+
25
+ def new_snapshot
26
+ # simple create a pointer to an existing location
27
+ CassandraSnapshot.new(@cluster_name, @node_name, @date, @metadata)
28
+ end
29
+
30
+ def delete_snapshot(_snapshot)
31
+ FileUtils.rm_rf(@data_path)
32
+ end
33
+ end
@@ -0,0 +1,51 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'fileutils'
4
+
5
+ # A stub implementation of Hadoop that read/writes to local file instead of using webhdfs
6
+ class HadoopStub
7
+ attr_reader :base_dir
8
+
9
+ def initialize(base_dir)
10
+ @base_dir = base_dir
11
+ end
12
+
13
+ def list(path, _options = {})
14
+ files_and_folders = Dir.glob("#{path}/*")
15
+ files_and_folders.collect do |file|
16
+ type = if File.file?(file)
17
+ 'FILE'
18
+ else
19
+ 'DIRECTORY'
20
+ end
21
+ # return a hash similar to the one that hadoop sends (containing fewer entries)
22
+ {
23
+ 'pathSuffix' => File.basename(file),
24
+ 'type' => type,
25
+ }
26
+ end
27
+ end
28
+
29
+ def list_files(path, _options = {})
30
+ files_and_folders = Dir.glob("#{path}/**/*")
31
+ files_and_folders.select { |file| File.file?(file) }
32
+ end
33
+
34
+ def create(path, body, _options = {})
35
+ parent = File.expand_path('..', path)
36
+ FileUtils.mkdir_p parent
37
+ if body.is_a?(File)
38
+ File.open(path, 'w') { |file| file.write(body.read) }
39
+ elsif
40
+ File.open(path, 'w') { |file| file.write(body) }
41
+ end
42
+ end
43
+
44
+ def read(path, _options = {})
45
+ File.open(path, 'r').read
46
+ end
47
+
48
+ def delete(path, _options = {})
49
+ FileUtils.rm_rf(path)
50
+ end
51
+ end
@@ -0,0 +1,180 @@
1
+ #!/usr/bin/env ruby
2
+ require 'test/unit'
3
+ require 'logger'
4
+
5
+ require_relative '../lib/backuptool'
6
+ require_relative 'hadoop_stub'
7
+ require_relative 'cassandra_stub'
8
+
9
+ class TestSimpleNumber < Test::Unit::TestCase
10
+ def test_new_snapshot
11
+ hadoop = HadoopStub.new('test/hadoop')
12
+ create_new_snapshot(hadoop, 'node1', '2016_04_22', [1, 2])
13
+
14
+ remote_files = hadoop.list_files('test/hadoop')
15
+ # two files were backed up + one metadata file
16
+ assert_equal(3, remote_files.size)
17
+
18
+ # files were created in the correct location
19
+ assert_equal('test/hadoop/cass_snap_metadata/cluster1/node1/cass_snap_2016_04_22', remote_files[0])
20
+ assert_equal('test/hadoop/cluster1/node1/SSTable-1-Data.db', remote_files[1])
21
+ assert_equal('test/hadoop/cluster1/node1/SSTable-2-Data.db', remote_files[2])
22
+
23
+ # metadata file contains the sstables.
24
+ metadata_content = File.open(remote_files[0], 'r').read
25
+ assert(metadata_content.include? 'SSTable-1-Data.db')
26
+ assert(metadata_content.include? 'SSTable-2-Data.db')
27
+
28
+ # cleanup
29
+ hadoop.delete('test/hadoop')
30
+ hadoop.delete('test/cassandra')
31
+ end
32
+
33
+ def test_two_snapshots
34
+ hadoop = HadoopStub.new('test/hadoop')
35
+ create_new_snapshot(hadoop, 'node1', '2016_04_22', [1, 2])
36
+ create_new_snapshot(hadoop, 'node1', '2016_04_23', [2, 3, 4])
37
+
38
+ remote_files = hadoop.list_files('test/hadoop')
39
+ # two files were backed up + one metadata file
40
+ assert_equal(6, remote_files.size)
41
+
42
+ # files were created in the correct location
43
+ # no duplicate files are stored
44
+ assert_equal('test/hadoop/cass_snap_metadata/cluster1/node1/cass_snap_2016_04_22', remote_files[0])
45
+ assert_equal('test/hadoop/cass_snap_metadata/cluster1/node1/cass_snap_2016_04_23', remote_files[1])
46
+ assert_equal('test/hadoop/cluster1/node1/SSTable-1-Data.db', remote_files[2])
47
+ assert_equal('test/hadoop/cluster1/node1/SSTable-2-Data.db', remote_files[3])
48
+ assert_equal('test/hadoop/cluster1/node1/SSTable-3-Data.db', remote_files[4])
49
+ assert_equal('test/hadoop/cluster1/node1/SSTable-4-Data.db', remote_files[5])
50
+
51
+ # metadata on first backup file contains the sstables.
52
+ metadata_content = File.open(remote_files[0], 'r').read
53
+ assert(metadata_content.include? 'SSTable-1-Data.db')
54
+ assert(metadata_content.include? 'SSTable-2-Data.db')
55
+
56
+ # metadata on second backup file contains the sstables.
57
+ metadata_content = File.open(remote_files[1], 'r').read
58
+ assert(metadata_content.include? 'SSTable-2-Data.db')
59
+ assert(metadata_content.include? 'SSTable-3-Data.db')
60
+ assert(metadata_content.include? 'SSTable-4-Data.db')
61
+
62
+ # cleanup
63
+ hadoop.delete('test/hadoop')
64
+ hadoop.delete('test/cassandra')
65
+ end
66
+
67
+ def test_restore
68
+ hadoop = HadoopStub.new('test/hadoop')
69
+ backup_tool = create_new_snapshot(hadoop, 'node1', '2016_04_22', [1, 2])
70
+
71
+ # restore a newly created snapshot
72
+ backup_tool.restore_snapshot('node1', '2016_04_22', 'test/restore')
73
+
74
+ restored_files = hadoop.list_files('test/restore')
75
+ # two files were restored
76
+ assert_equal(2, restored_files.size)
77
+ assert_equal('test/restore/SSTable-1-Data.db', restored_files[0])
78
+ assert_equal('test/restore/SSTable-2-Data.db', restored_files[1])
79
+
80
+ # cleanup
81
+ hadoop.delete('test/hadoop')
82
+ hadoop.delete('test/restore')
83
+ hadoop.delete('test/cassandra')
84
+ end
85
+
86
+ def test_delete
87
+ hadoop = HadoopStub.new('test/hadoop')
88
+ backup_tool = create_new_snapshot(hadoop, 'node1', '2016_04_22', [1, 2])
89
+
90
+ # delete a newly created snapshot
91
+ backup_tool.delete_snapshots(node: 'node1', date: '2016_04_22')
92
+
93
+ remote_files = hadoop.list_files('test/hadoop')
94
+ assert_equal(0, remote_files.size)
95
+
96
+ hadoop.delete('test/cassandra')
97
+ end
98
+
99
+ def test_backup_flag
100
+ hadoop = HadoopStub.new('test/hadoop')
101
+ backup_tool = create_new_snapshot(hadoop, 'node1', '2016_04_22', [1, 2])
102
+
103
+ backup_tool.create_backup_flag('2016_04_22')
104
+
105
+ remote_files = hadoop.list_files('test/hadoop')
106
+ assert_equal(4, remote_files.size)
107
+ # Flag is created at cluster level
108
+ assert_equal('test/hadoop/cass_snap_metadata/cluster1/BACKUP_COMPLETED_2016_04_22', remote_files[0])
109
+
110
+ # cleanup
111
+ hadoop.delete('test/hadoop')
112
+ hadoop.delete('test/cassandra')
113
+ end
114
+
115
+ def test_get_backup_flag
116
+ hadoop = HadoopStub.new('test/hadoop')
117
+ backup_tool = create_new_snapshot(hadoop, 'node1', '2016_04_22', [1, 2])
118
+
119
+ backup_tool.create_backup_flag('2016_04_22')
120
+ flags = backup_tool.get_backup_flags
121
+
122
+ # One flag found
123
+ assert_equal(1, flags.size)
124
+ # Flag points to the correct file
125
+ assert_equal('cluster1', flags[0].cluster)
126
+ assert_equal('BACKUP_COMPLETED_2016_04_22', flags[0].file)
127
+
128
+ # cleanup
129
+ hadoop.delete('test/hadoop')
130
+ hadoop.delete('test/cassandra')
131
+ end
132
+
133
+ def test_cleanup
134
+ hadoop = HadoopStub.new('test/hadoop')
135
+ retention_days = 30
136
+
137
+ date_31_days_back = (Date.today - 31).strftime('%Y_%m_%d')
138
+ date_30_days_back = (Date.today - 30).strftime('%Y_%m_%d')
139
+
140
+ # Two backups on two nodes
141
+ create_new_snapshot(hadoop, 'node1', date_31_days_back, [1, 2, 3, 4])
142
+ create_new_snapshot(hadoop, 'node2', date_31_days_back, [1, 2, 3, 4])
143
+ create_new_snapshot(hadoop, 'node1', date_30_days_back, [3, 4, 5, 6])
144
+ backup_tool = create_new_snapshot(hadoop, 'node2', date_30_days_back, [4, 5, 6, 7])
145
+
146
+ # Both backups are marked as completed
147
+ backup_tool.create_backup_flag(date_31_days_back)
148
+ backup_tool.create_backup_flag(date_30_days_back)
149
+ backup_tool.create_backup_flag(date_30_days_back)
150
+
151
+ backup_tool.cleanup(retention_days)
152
+
153
+ # Two snapshots were deleted, two were kept
154
+ snapshots = backup_tool.search_snapshots
155
+ assert_equal(2, snapshots.size)
156
+ assert_equal('node1', snapshots[0].node)
157
+ assert_equal(date_30_days_back, snapshots[0].date)
158
+ assert_equal('node2', snapshots[1].node)
159
+ assert_equal(date_30_days_back, snapshots[1].date)
160
+
161
+ # One backup flag was deleted, one was kept.
162
+ backup_flags = backup_tool.get_backup_flags
163
+ assert_equal(1, backup_flags.size)
164
+ assert_equal("BACKUP_COMPLETED_#{date_30_days_back}", backup_flags[0].file)
165
+
166
+ # cleanup
167
+ hadoop.delete('test/hadoop')
168
+ hadoop.delete('test/cassandra')
169
+ end
170
+
171
+ def create_new_snapshot(hadoop, node, date, file_indexes)
172
+ logger = Logger.new(STDOUT)
173
+ cassandra = CassandraStub.new('cluster1', node, date, file_indexes)
174
+ backup_tool = BackupTool.new(cassandra, hadoop, logger)
175
+
176
+ backup_tool.new_snapshot
177
+
178
+ backup_tool
179
+ end
180
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cassback
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Vincent Van Hollebeke
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-04-20 00:00:00.000000000 Z
12
+ date: 2016-04-26 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -109,10 +109,36 @@ executables:
109
109
  extensions: []
110
110
  extra_rdoc_files: []
111
111
  files:
112
+ - ".gitignore"
113
+ - ".rubocop.yml_disabled"
114
+ - Gemfile
115
+ - LICENSE
116
+ - README.md
117
+ - Rakefile.rb
112
118
  - bin/cassback
119
+ - bin/console
120
+ - bin/setup
121
+ - cassback.gemspec
122
+ - conf/local.yml
123
+ - conf/preprod.yml
124
+ - conf/prod.yml
113
125
  - lib/backuptool.rb
114
126
  - lib/cassandra.rb
127
+ - lib/cassback/version.rb
115
128
  - lib/hadoop.rb
129
+ - scripts/deploy.sh
130
+ - scripts/manualbackups/ansible.cfg
131
+ - scripts/manualbackups/inventory.txt
132
+ - scripts/manualbackups/play_book.sh
133
+ - scripts/manualbackups/playbooks/backups.yml
134
+ - scripts/manualbackups/roles/planb/files/backup.sh
135
+ - scripts/manualbackups/roles/planb/files/httpfs.sh
136
+ - scripts/manualbackups/roles/planb/files/krb5.conf
137
+ - scripts/manualbackups/roles/planb/tasks/main.yml
138
+ - scripts/pre-push
139
+ - test/cassandra_stub.rb
140
+ - test/hadoop_stub.rb
141
+ - test/test_backuptool.rb
116
142
  homepage: http://rubygems.org/gems/cassback
117
143
  licenses:
118
144
  - Apache-2.0
@@ -133,9 +159,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
133
159
  version: '0'
134
160
  requirements: []
135
161
  rubyforge_project:
136
- rubygems_version: 2.5.2
162
+ rubygems_version: 2.4.8
137
163
  signing_key:
138
164
  specification_version: 4
139
165
  summary: Cassandra backup to HDFS.
140
- test_files: []
141
- has_rdoc:
166
+ test_files:
167
+ - test/cassandra_stub.rb
168
+ - test/hadoop_stub.rb
169
+ - test/test_backuptool.rb