elastic-util 0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e7e92f652cfa5c6bcf9c4c66c9d11908f8b793ba
4
+ data.tar.gz: 64a823c77a5a7f37f50256f47f800138a1b9d166
5
+ SHA512:
6
+ metadata.gz: 49efe68f26f5b18fa7ee9e5a16b921cf564f3b656abb34e9ff6417387ee60f8d0ac69f3f0f4bb9e2bd2b9a2fab9e2986e9302114b02d969999f022eeab2df788
7
+ data.tar.gz: 93f14448ac08283321f8a66453f2fc7ed8919388a4607b56cef1ea136acce26077a60093b2e3106611ba5e1cb5e1b17476009dcabd81c40edf7895bb693a5e5d
data/.gitignore ADDED
@@ -0,0 +1,22 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ *.bundle
19
+ *.so
20
+ *.o
21
+ *.a
22
+ mkmf.log
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in elastic-util.gemspec
4
+ gemspec
data/README.md ADDED
@@ -0,0 +1,2 @@
1
+ # elastic-util
2
+ ElasticUtil gem to backup and restore elasticsearch indices
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
data/bin/elastic-util ADDED
@@ -0,0 +1,104 @@
1
+ #!/usr/bin/env ruby
2
+ require 'elastic-util'
3
+ require 'optparse'
4
+
5
+ prog_name = "elastic-util" # $0.split('/').last
6
+ usage = "Usage: #{prog_name} [backup|restore] [options]"
7
+ args = ARGV.dup
8
+ command_name = args.shift
9
+
10
+ case command_name
11
+
12
+ when "backup"
13
+
14
+ options = {}
15
+ optparse = OptionParser.new do |opts|
16
+ opts.banner = "Usage: #{prog_name} backup [url] [directory] [options]"
17
+ opts.on('--indices x,y,z', Array, "The indices to backup. Default is all.") do |val|
18
+ options[:indices] = val.collect {|it| it.strip }
19
+ end
20
+ opts.on('--exclude-indices x,y,z', Array, "The indices to exclude from backup. Default is none.") do |val|
21
+ options[:exclude_indices] = val.collect {|it| it.strip }
22
+ end
23
+ opts.on( '-s', '--size NUMBER', "The size api parameter. This dicates the size of the files and api payloads. Default is 1000." ) do |val|
24
+ options[:size] = val.to_i
25
+ end
26
+ opts.on( '-S', '--scroll STRING', "The scroll api parameter. Default is '5m'." ) do |val|
27
+ options[:scroll] = val.to_s
28
+ end
29
+ opts.on( '-f', '--force', "Delete existing backup directory instead of erroring. Default is false." ) do |val|
30
+ options[:force] = true
31
+ end
32
+ opts.on( '-q', '--quiet', "Don't print to stdout. Default is false." ) do |val|
33
+ options[:quiet] = true
34
+ end
35
+ opts.on('-h', '--help', "Prints this help" ) do
36
+ puts opts
37
+ exit
38
+ end
39
+ end
40
+ optparse.parse!(args)
41
+ url = args[0]
42
+ backup_directory = args[1]
43
+ if url.nil? || url.empty?
44
+ $stderr.puts "#{prog_name}: missing required argument [url]"
45
+ $stderr.puts optparse
46
+ exit 1
47
+ end
48
+ if backup_directory.nil? || backup_directory.empty?
49
+ $stderr.puts "#{prog_name}: missing required argument [directory]"
50
+ $stderr.puts optparse
51
+ exit 1
52
+ end
53
+
54
+ begin
55
+ result = ElasticUtil.backup(url, backup_directory, options)
56
+ exit 0
57
+ rescue ElasticUtil::Error => err
58
+ $stderr.puts "#{prog_name}: #{err.message}"
59
+ # $stderr.puts optparse
60
+ exit 1
61
+ end
62
+
63
+
64
+ when "restore"
65
+
66
+ options = {}
67
+ optparse = OptionParser.new do |opts|
68
+ opts.banner = "Usage: #{prog_name} backup [url] [directory] [options]"
69
+ opts.on( '-q', '--quiet', "Don't print to stdout. Default is false." ) do |val|
70
+ options[:quiet] = true
71
+ end
72
+ opts.on('-h', '--help', "Prints this help" ) do
73
+ puts opts
74
+ exit
75
+ end
76
+ end
77
+ optparse.parse!(args)
78
+ url = args[0]
79
+ backup_directory = args[1]
80
+ if url.nil? || url.empty?
81
+ $stderr.puts "#{prog_name}: missing required argument [url]"
82
+ $stderr.puts optparse
83
+ exit 1
84
+ end
85
+ if backup_directory.nil? || backup_directory.empty?
86
+ $stderr.puts "#{prog_name}: missing required argument [directory]"
87
+ $stderr.puts optparse
88
+ exit 1
89
+ end
90
+
91
+ begin
92
+ result = ElasticUtil.restore(url, backup_directory, options)
93
+ exit 0
94
+ rescue ElasticUtil::Error => err
95
+ $stderr.puts "#{prog_name}: #{err.message}"
96
+ # $stderr.puts optparse
97
+ exit 1
98
+ end
99
+
100
+ else
101
+ $stderr.puts usage
102
+ exit 1
103
+ end
104
+
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'elastic_util'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "elastic-util"
8
+ spec.version = ElasticUtil::VERSION
9
+ spec.authors = ["James Dickson"]
10
+ spec.email = ["dickson.james@gmail.com"]
11
+ spec.summary = "Provides backup and restore for ElasticSearch data"
12
+ spec.description = "ElasticUtil uses ElasticSearch's scroll and _bulk APIs to dump and restore indices"
13
+ #spec.homepage = "http://www.elastic-util.com"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+
18
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
19
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
20
+ spec.require_paths = ["lib"]
21
+
22
+ spec.add_development_dependency "bundler", "~> 1.6"
23
+ spec.add_development_dependency "rake"
24
+ # spec.add_dependency 'multi_json'
25
+ end
@@ -0,0 +1 @@
1
+ require 'elastic_util'
@@ -0,0 +1,243 @@
1
+ # encoding: utf-8
2
+ require 'net/http'
3
+ require 'json'
4
+ require 'fileutils'
5
+
6
+ # This module provides a way to backup and restore elasticsearch data.
7
+ #
8
+ # @example Backup data from one elasticsearch cluster and restore it to another.
9
+ #
10
+ # ElasticUtil.backup('http://localhost:9200', '/tmp/mybackup', {size:5000})
11
+ # ElasticUtil.restore('http://localhost:9201', '/tmp/mybackup')
12
+ #
13
+ module ElasticUtil
14
+
15
+ VERSION = "0.1"
16
+
17
+ # The name of the data directory, relative to the user provided backup directory.
18
+ DUMP_DIR = "es_data"
19
+
20
+ # A class to be raised for any known error condition.
21
+ class Error < StandardError; end
22
+
23
+ # Backup elasticsearch data to a local directory.
24
+ #
25
+ # This uses ElasticSearch's scroll api to fetch all records for indices
26
+ # and write the data to a local directory. The files it generates are given a
27
+ # .json.data extension. They are not valid JSON files, but rather are in the
28
+ # format expected by ElasticSearch's _bulk api.
29
+ #
30
+ # So #restore simply has to POST the contents of each file.
31
+ #
32
+ # Use the :size option to change the number or results to fetch at once,
33
+ # and also the size of the data files generated.
34
+ # The latter correlates to the of the the api requests made in #restore.
35
+ #
36
+ # @example Backup default elasticsearch running locally.
37
+ #
38
+ # ElasticUtil.backup('http://localhost:9200', '/tmp/mybackup')
39
+ #
40
+ # @param [String] url The url of the elasticsearch cluster eg. 'http://localhost:9200'
41
+ # @param [String] backup_dir The local directory to store data in. eg. '/tmp/es2.4'
42
+ # @param [Hash] opts The options for this backup.
43
+ # @option opts [Array] :indices The indices to backup. Default is all.
44
+ # @option opts [Array] :exclude_indices Exclude certain indexes.
45
+ # @option opts [String] :scroll The scroll api parameter, Default is '5m'.
46
+ # @option opts [Integer] :size The size api parameter. Default is 1000.
47
+ # @option opts [true] :force Delete existing backup directory instead of erroring. Default is false.
48
+ # @option opts [true] :quiet Don't print anything. Default is false.
49
+ #
50
+ # @return [true] or raises an error
51
+ #
52
+ def self.backup(url, backup_dir, opts={})
53
+ start_time = Time.now
54
+ backup_dir = backup_dir.strip
55
+ path = File.join(backup_dir.strip, DUMP_DIR)
56
+ indices = []
57
+
58
+ # ping it first
59
+ uri = URI(url)
60
+ response = Net::HTTP.get_response(uri)
61
+ http = Net::HTTP.new(uri.host, uri.port)
62
+ http.read_timeout = 5
63
+ http.open_timeout = 5
64
+ response = http.start() {|http|
65
+ http.get("/")
66
+ }
67
+ if !response.is_a?(Net::HTTPSuccess)
68
+ raise Error, "Unable to reach elasticsearch at url '#{url}'!\n#{response.inspect}\n#{response.body.to_s}"
69
+ end
70
+
71
+ # determine indices to backup, default is everything.
72
+ if opts[:indices]
73
+ indices = opts[:indices]
74
+ else
75
+ uri = URI(url + "/_cat/indices?format=json")
76
+ response = Net::HTTP.get_response(uri)
77
+ if !response.is_a?(Net::HTTPSuccess)
78
+ raise Error, "HTTP request failure!\n#{response.inspect}\n#{response.body.to_s}"
79
+ end
80
+ json_response = JSON.parse(response.body)
81
+ json_response.each do |record|
82
+ indices.push(record['index'])
83
+ end
84
+ end
85
+ if opts[:exclude_indices]
86
+ indices = indices.reject {|it| opts[:exclude_indices].include?(it) }
87
+ end
88
+
89
+ if indices.empty?
90
+ raise Error, "no indices to back up!"
91
+ end
92
+
93
+ opts[:scroll] ||= '5m'
94
+ opts[:size] ||= 1000
95
+
96
+ # validate backup path
97
+ if File.exists?(path)
98
+ if opts[:force]
99
+ FileUtils.rmtree(path)
100
+ else
101
+ raise Error, "backup path '#{path}' already exists! Delete it first or use --force"
102
+ end
103
+ end
104
+ FileUtils.mkdir_p(path)
105
+
106
+ # dump data
107
+ indices.each_with_index do |index_name, i|
108
+ puts "(#{i+1}/#{indices.size}) backing up index #{index_name}" unless opts[:quiet]
109
+ # initial request
110
+ file_index = 0
111
+ uri = URI(url + "/#{index_name}/_search")
112
+ params = {
113
+ :format => "json",
114
+ :scroll => opts[:scroll],
115
+ :size => opts[:size],
116
+ :sort => ["_doc"]
117
+ }
118
+ uri.query = URI.encode_www_form(params)
119
+ # puts "HTTP REQUEST #{uri.inspect}"
120
+ response = Net::HTTP.get_response(uri)
121
+ if !response.is_a?(Net::HTTPSuccess)
122
+ raise Error, "HTTP request failure!\n#{response.inspect}\n#{response.body.to_s}"
123
+ end
124
+ json_response = JSON.parse(response.body)
125
+ raise Error, "No scroll_id returned in response:\n#{response.inspect}" unless json_response['_scroll_id']
126
+ scroll_id = json_response['_scroll_id']
127
+ hits = json_response['hits']['hits']
128
+ save_bulk_data(path, hits)
129
+
130
+ file_index = 1
131
+ # scroll requests
132
+ while !hits.empty?
133
+ uri = URI(url + "/_search/scroll")
134
+ params = {
135
+ :scroll_id => scroll_id,
136
+ :scroll => opts[:scroll]
137
+ }
138
+ uri.query = URI.encode_www_form(params)
139
+ # puts "HTTP REQUEST #{uri.inspect}"
140
+ response = Net::HTTP.get_response(uri)
141
+ if !response.is_a?(Net::HTTPSuccess)
142
+ raise Error, "HTTP request failure!\n#{response.inspect}\n#{response.body.to_s}"
143
+ end
144
+ json_response = JSON.parse(response.body)
145
+ raise Error, "No scroll_id returned in response:\n#{response.inspect}\n#{response.body.to_s}" unless json_response['_scroll_id']
146
+ scroll_id = json_response['_scroll_id']
147
+ hits = json_response['hits']['hits']
148
+ if file_index > 0
149
+ save_bulk_data(path, hits, file_index)
150
+ else
151
+ save_bulk_data(path, hits)
152
+ end
153
+ file_index += 1
154
+ end
155
+ end
156
+
157
+ puts "Finished backup of elasticsearch #{url} to directory #{backup_dir} (took #{(Time.now-start_time).round(3)}s)" unless opts[:quiet]
158
+ return true
159
+ end
160
+
161
+ # Restore elasticsearch data from a backup.
162
+ # This will do a POST to the _bulk api for each file in the backup directory.
163
+ #
164
+ # @example Restore local cluster with our backup.
165
+ #
166
+ # ElasticUtil.restore('http://localhost:9201', '/tmp/mybackup')
167
+ #
168
+ # @param [String] url The url of the elasticsearch cluster eg. 'http://localhost:9200'.
169
+ # @param [String] backup_dir The backup directory.
170
+ # @param [Hash] opts The options for this backup.
171
+ # @option opts [true] :quiet Don't print anything. Default is false.
172
+ #
173
+ # @return [true] or raises an error
174
+ #
175
+ def self.restore(url, backup_dir, opts={})
176
+ start_time = Time.now
177
+ backup_dir = backup_dir.strip
178
+ path = File.join(backup_dir.strip, DUMP_DIR)
179
+
180
+ # validate backup path
181
+ if !Dir.exists?(path)
182
+ raise Error, "backup path '#{backup_dir}' does not exist!"
183
+ end
184
+
185
+ # ping it first
186
+ uri = URI(url)
187
+ response = Net::HTTP.get_response(uri)
188
+ http = Net::HTTP.new(uri.host, uri.port)
189
+ http.read_timeout = 5
190
+ http.open_timeout = 5
191
+ response = http.start() {|http|
192
+ http.get("/")
193
+ }
194
+
195
+ if !response.is_a?(Net::HTTPSuccess)
196
+ raise Error, "Unable to reach elasticsearch at url '#{url}'!\n#{response.inspect}\n#{response.body.to_s}"
197
+ end
198
+
199
+ # find files to import
200
+ found_files = Dir[File.join(path, '**', '*.json.data' )]
201
+ if found_files.empty?
202
+ raise Error, "backup path '#{backup_dir}' does not exist!"
203
+ else
204
+ puts "Found #{found_files.size} files to import" unless opts[:quiet]
205
+ end
206
+
207
+ # bulk api request for each file
208
+ found_files.each_with_index do |file, i|
209
+ puts "(#{i+1}/#{found_files.size}) bulk importing file #{file}" unless opts[:quiet]
210
+ payload = File.read(file)
211
+ # uri = URI(url)
212
+ http = Net::HTTP.new(uri.host, uri.port)
213
+ response = http.post("/_bulk", payload)
214
+ if !response.is_a?(Net::HTTPSuccess)
215
+ raise Error, "HTTP request failure!\n#{response.inspect}\n#{response.body.to_s}"
216
+ end
217
+ end
218
+
219
+ puts "Finished restore of elasticsearch #{url} with backup #{backup_dir} (took #{(Time.now-start_time).round(3)}s)" unless opts[:quiet]
220
+ return true
221
+ end
222
+
223
+ def self.save_bulk_data(path, hits, file_index=nil) # :nodoc:
224
+ if hits && !hits.empty?
225
+ hits.each do |hit|
226
+ index_name = hit['_index']
227
+ index_type = hit['_type']
228
+ dir_name = File.join(path, index_name)
229
+ FileUtils.mkdir_p(dir_name)
230
+ file_name = File.join(dir_name, index_type) + (file_index ? "_#{file_index}" : "") + ".json.data"
231
+ # prepare record for bulk api injection
232
+ action_json = {'index' => {
233
+ '_index' => hit['_index'], '_type' => hit['_type'], '_id' => hit['_id']
234
+ } }
235
+ source_json = hit['_source']
236
+ File.open(file_name, 'a') do |file|
237
+ file.write JSON.generate(action_json) + "\n" + JSON.generate(source_json) + "\n"
238
+ end
239
+ end
240
+ end
241
+ end
242
+
243
+ end
metadata ADDED
@@ -0,0 +1,82 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: elastic-util
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.1'
5
+ platform: ruby
6
+ authors:
7
+ - James Dickson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-02-27 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: ElasticUtil uses ElasticSearch's scroll and _bulk APIs to dump and restore
42
+ indices
43
+ email:
44
+ - dickson.james@gmail.com
45
+ executables:
46
+ - elastic-util
47
+ extensions: []
48
+ extra_rdoc_files: []
49
+ files:
50
+ - ".gitignore"
51
+ - Gemfile
52
+ - README.md
53
+ - Rakefile
54
+ - bin/elastic-util
55
+ - elastic-util.gemspec
56
+ - lib/elastic-util.rb
57
+ - lib/elastic_util.rb
58
+ homepage:
59
+ licenses:
60
+ - MIT
61
+ metadata: {}
62
+ post_install_message:
63
+ rdoc_options: []
64
+ require_paths:
65
+ - lib
66
+ required_ruby_version: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
71
+ required_rubygems_version: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ requirements: []
77
+ rubyforge_project:
78
+ rubygems_version: 2.4.8
79
+ signing_key:
80
+ specification_version: 4
81
+ summary: Provides backup and restore for ElasticSearch data
82
+ test_files: []