elastic-util 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e7e92f652cfa5c6bcf9c4c66c9d11908f8b793ba
4
+ data.tar.gz: 64a823c77a5a7f37f50256f47f800138a1b9d166
5
+ SHA512:
6
+ metadata.gz: 49efe68f26f5b18fa7ee9e5a16b921cf564f3b656abb34e9ff6417387ee60f8d0ac69f3f0f4bb9e2bd2b9a2fab9e2986e9302114b02d969999f022eeab2df788
7
+ data.tar.gz: 93f14448ac08283321f8a66453f2fc7ed8919388a4607b56cef1ea136acce26077a60093b2e3106611ba5e1cb5e1b17476009dcabd81c40edf7895bb693a5e5d
data/.gitignore ADDED
@@ -0,0 +1,22 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ *.bundle
19
+ *.so
20
+ *.o
21
+ *.a
22
+ mkmf.log
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in elastic-util.gemspec
4
+ gemspec
data/README.md ADDED
@@ -0,0 +1,2 @@
1
+ # elastic-util
2
+ ElasticUtil gem to backup and restore elasticsearch indices
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
data/bin/elastic-util ADDED
@@ -0,0 +1,104 @@
1
+ #!/usr/bin/env ruby
2
+ require 'elastic-util'
3
+ require 'optparse'
4
+
5
+ prog_name = "elastic-util" # $0.split('/').last
6
+ usage = "Usage: #{prog_name} [backup|restore] [options]"
7
+ args = ARGV.dup
8
+ command_name = args.shift
9
+
10
+ case command_name
11
+
12
+ when "backup"
13
+
14
+ options = {}
15
+ optparse = OptionParser.new do |opts|
16
+ opts.banner = "Usage: #{prog_name} backup [url] [directory] [options]"
17
+ opts.on('--indices x,y,z', Array, "The indices to backup. Default is all.") do |val|
18
+ options[:indices] = val.collect {|it| it.strip }
19
+ end
20
+ opts.on('--exclude-indices x,y,z', Array, "The indices to exclude from backup. Default is none.") do |val|
21
+ options[:exclude_indices] = val.collect {|it| it.strip }
22
+ end
23
+ opts.on( '-s', '--size NUMBER', "The size api parameter. This dicates the size of the files and api payloads. Default is 1000." ) do |val|
24
+ options[:size] = val.to_i
25
+ end
26
+ opts.on( '-S', '--scroll STRING', "The scroll api parameter. Default is '5m'." ) do |val|
27
+ options[:scroll] = val.to_s
28
+ end
29
+ opts.on( '-f', '--force', "Delete existing backup directory instead of erroring. Default is false." ) do |val|
30
+ options[:force] = true
31
+ end
32
+ opts.on( '-q', '--quiet', "Don't print to stdout. Default is false." ) do |val|
33
+ options[:quiet] = true
34
+ end
35
+ opts.on('-h', '--help', "Prints this help" ) do
36
+ puts opts
37
+ exit
38
+ end
39
+ end
40
+ optparse.parse!(args)
41
+ url = args[0]
42
+ backup_directory = args[1]
43
+ if url.nil? || url.empty?
44
+ $stderr.puts "#{prog_name}: missing required argument [url]"
45
+ $stderr.puts optparse
46
+ exit 1
47
+ end
48
+ if backup_directory.nil? || backup_directory.empty?
49
+ $stderr.puts "#{prog_name}: missing required argument [directory]"
50
+ $stderr.puts optparse
51
+ exit 1
52
+ end
53
+
54
+ begin
55
+ result = ElasticUtil.backup(url, backup_directory, options)
56
+ exit 0
57
+ rescue ElasticUtil::Error => err
58
+ $stderr.puts "#{prog_name}: #{err.message}"
59
+ # $stderr.puts optparse
60
+ exit 1
61
+ end
62
+
63
+
64
+ when "restore"
65
+
66
+ options = {}
67
+ optparse = OptionParser.new do |opts|
68
+ opts.banner = "Usage: #{prog_name} backup [url] [directory] [options]"
69
+ opts.on( '-q', '--quiet', "Don't print to stdout. Default is false." ) do |val|
70
+ options[:quiet] = true
71
+ end
72
+ opts.on('-h', '--help', "Prints this help" ) do
73
+ puts opts
74
+ exit
75
+ end
76
+ end
77
+ optparse.parse!(args)
78
+ url = args[0]
79
+ backup_directory = args[1]
80
+ if url.nil? || url.empty?
81
+ $stderr.puts "#{prog_name}: missing required argument [url]"
82
+ $stderr.puts optparse
83
+ exit 1
84
+ end
85
+ if backup_directory.nil? || backup_directory.empty?
86
+ $stderr.puts "#{prog_name}: missing required argument [directory]"
87
+ $stderr.puts optparse
88
+ exit 1
89
+ end
90
+
91
+ begin
92
+ result = ElasticUtil.restore(url, backup_directory, options)
93
+ exit 0
94
+ rescue ElasticUtil::Error => err
95
+ $stderr.puts "#{prog_name}: #{err.message}"
96
+ # $stderr.puts optparse
97
+ exit 1
98
+ end
99
+
100
+ else
101
+ $stderr.puts usage
102
+ exit 1
103
+ end
104
+
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'elastic_util'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "elastic-util"
8
+ spec.version = ElasticUtil::VERSION
9
+ spec.authors = ["James Dickson"]
10
+ spec.email = ["dickson.james@gmail.com"]
11
+ spec.summary = "Provides backup and restore for ElasticSearch data"
12
+ spec.description = "ElasticUtil uses ElasticSearch's scroll and _bulk APIs to dump and restore indices"
13
+ #spec.homepage = "http://www.elastic-util.com"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+
18
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
19
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
20
+ spec.require_paths = ["lib"]
21
+
22
+ spec.add_development_dependency "bundler", "~> 1.6"
23
+ spec.add_development_dependency "rake"
24
+ # spec.add_dependency 'multi_json'
25
+ end
@@ -0,0 +1 @@
1
+ require 'elastic_util'
@@ -0,0 +1,243 @@
1
+ # encoding: utf-8
2
+ require 'net/http'
3
+ require 'json'
4
+ require 'fileutils'
5
+
6
+ # This module provides a way to backup and restore elasticsearch data.
7
+ #
8
+ # @example Backup data from one elasticsearch cluster and restore it to another.
9
+ #
10
+ # ElasticUtil.backup('http://localhost:9200', '/tmp/mybackup', {size:5000})
11
+ # ElasticUtil.restore('http://localhost:9201', '/tmp/mybackup')
12
+ #
13
+ module ElasticUtil
14
+
15
+ VERSION = "0.1"
16
+
17
+ # The name of the data directory, relative to the user provided backup directory.
18
+ DUMP_DIR = "es_data"
19
+
20
+ # A class to be raised for any known error condition.
21
+ class Error < StandardError; end
22
+
23
+ # Backup elasticsearch data to a local directory.
24
+ #
25
+ # This uses ElasticSearch's scroll api to fetch all records for indices
26
+ # and write the data to a local directory. The files it generates are given a
27
+ # .json.data extension. They are not valid JSON files, but rather are in the
28
+ # format expected by ElasticSearch's _bulk api.
29
+ #
30
+ # So #restore simply has to POST the contents of each file.
31
+ #
32
+ # Use the :size option to change the number or results to fetch at once,
33
+ # and also the size of the data files generated.
34
+ # The latter correlates to the of the the api requests made in #restore.
35
+ #
36
+ # @example Backup default elasticsearch running locally.
37
+ #
38
+ # ElasticUtil.backup('http://localhost:9200', '/tmp/mybackup')
39
+ #
40
+ # @param [String] url The url of the elasticsearch cluster eg. 'http://localhost:9200'
41
+ # @param [String] backup_dir The local directory to store data in. eg. '/tmp/es2.4'
42
+ # @param [Hash] opts The options for this backup.
43
+ # @option opts [Array] :indices The indices to backup. Default is all.
44
+ # @option opts [Array] :exclude_indices Exclude certain indexes.
45
+ # @option opts [String] :scroll The scroll api parameter, Default is '5m'.
46
+ # @option opts [Integer] :size The size api parameter. Default is 1000.
47
+ # @option opts [true] :force Delete existing backup directory instead of erroring. Default is false.
48
+ # @option opts [true] :quiet Don't print anything. Default is false.
49
+ #
50
+ # @return [true] or raises an error
51
+ #
52
+ def self.backup(url, backup_dir, opts={})
53
+ start_time = Time.now
54
+ backup_dir = backup_dir.strip
55
+ path = File.join(backup_dir.strip, DUMP_DIR)
56
+ indices = []
57
+
58
+ # ping it first
59
+ uri = URI(url)
60
+ response = Net::HTTP.get_response(uri)
61
+ http = Net::HTTP.new(uri.host, uri.port)
62
+ http.read_timeout = 5
63
+ http.open_timeout = 5
64
+ response = http.start() {|http|
65
+ http.get("/")
66
+ }
67
+ if !response.is_a?(Net::HTTPSuccess)
68
+ raise Error, "Unable to reach elasticsearch at url '#{url}'!\n#{response.inspect}\n#{response.body.to_s}"
69
+ end
70
+
71
+ # determine indices to backup, default is everything.
72
+ if opts[:indices]
73
+ indices = opts[:indices]
74
+ else
75
+ uri = URI(url + "/_cat/indices?format=json")
76
+ response = Net::HTTP.get_response(uri)
77
+ if !response.is_a?(Net::HTTPSuccess)
78
+ raise Error, "HTTP request failure!\n#{response.inspect}\n#{response.body.to_s}"
79
+ end
80
+ json_response = JSON.parse(response.body)
81
+ json_response.each do |record|
82
+ indices.push(record['index'])
83
+ end
84
+ end
85
+ if opts[:exclude_indices]
86
+ indices = indices.reject {|it| opts[:exclude_indices].include?(it) }
87
+ end
88
+
89
+ if indices.empty?
90
+ raise Error, "no indices to back up!"
91
+ end
92
+
93
+ opts[:scroll] ||= '5m'
94
+ opts[:size] ||= 1000
95
+
96
+ # validate backup path
97
+ if File.exists?(path)
98
+ if opts[:force]
99
+ FileUtils.rmtree(path)
100
+ else
101
+ raise Error, "backup path '#{path}' already exists! Delete it first or use --force"
102
+ end
103
+ end
104
+ FileUtils.mkdir_p(path)
105
+
106
+ # dump data
107
+ indices.each_with_index do |index_name, i|
108
+ puts "(#{i+1}/#{indices.size}) backing up index #{index_name}" unless opts[:quiet]
109
+ # initial request
110
+ file_index = 0
111
+ uri = URI(url + "/#{index_name}/_search")
112
+ params = {
113
+ :format => "json",
114
+ :scroll => opts[:scroll],
115
+ :size => opts[:size],
116
+ :sort => ["_doc"]
117
+ }
118
+ uri.query = URI.encode_www_form(params)
119
+ # puts "HTTP REQUEST #{uri.inspect}"
120
+ response = Net::HTTP.get_response(uri)
121
+ if !response.is_a?(Net::HTTPSuccess)
122
+ raise Error, "HTTP request failure!\n#{response.inspect}\n#{response.body.to_s}"
123
+ end
124
+ json_response = JSON.parse(response.body)
125
+ raise Error, "No scroll_id returned in response:\n#{response.inspect}" unless json_response['_scroll_id']
126
+ scroll_id = json_response['_scroll_id']
127
+ hits = json_response['hits']['hits']
128
+ save_bulk_data(path, hits)
129
+
130
+ file_index = 1
131
+ # scroll requests
132
+ while !hits.empty?
133
+ uri = URI(url + "/_search/scroll")
134
+ params = {
135
+ :scroll_id => scroll_id,
136
+ :scroll => opts[:scroll]
137
+ }
138
+ uri.query = URI.encode_www_form(params)
139
+ # puts "HTTP REQUEST #{uri.inspect}"
140
+ response = Net::HTTP.get_response(uri)
141
+ if !response.is_a?(Net::HTTPSuccess)
142
+ raise Error, "HTTP request failure!\n#{response.inspect}\n#{response.body.to_s}"
143
+ end
144
+ json_response = JSON.parse(response.body)
145
+ raise Error, "No scroll_id returned in response:\n#{response.inspect}\n#{response.body.to_s}" unless json_response['_scroll_id']
146
+ scroll_id = json_response['_scroll_id']
147
+ hits = json_response['hits']['hits']
148
+ if file_index > 0
149
+ save_bulk_data(path, hits, file_index)
150
+ else
151
+ save_bulk_data(path, hits)
152
+ end
153
+ file_index += 1
154
+ end
155
+ end
156
+
157
+ puts "Finished backup of elasticsearch #{url} to directory #{backup_dir} (took #{(Time.now-start_time).round(3)}s)" unless opts[:quiet]
158
+ return true
159
+ end
160
+
161
+ # Restore elasticsearch data from a backup.
162
+ # This will do a POST to the _bulk api for each file in the backup directory.
163
+ #
164
+ # @example Restore local cluster with our backup.
165
+ #
166
+ # ElasticUtil.restore('http://localhost:9201', '/tmp/mybackup')
167
+ #
168
+ # @param [String] url The url of the elasticsearch cluster eg. 'http://localhost:9200'.
169
+ # @param [String] backup_dir The backup directory.
170
+ # @param [Hash] opts The options for this backup.
171
+ # @option opts [true] :quiet Don't print anything. Default is false.
172
+ #
173
+ # @return [true] or raises an error
174
+ #
175
+ def self.restore(url, backup_dir, opts={})
176
+ start_time = Time.now
177
+ backup_dir = backup_dir.strip
178
+ path = File.join(backup_dir.strip, DUMP_DIR)
179
+
180
+ # validate backup path
181
+ if !Dir.exists?(path)
182
+ raise Error, "backup path '#{backup_dir}' does not exist!"
183
+ end
184
+
185
+ # ping it first
186
+ uri = URI(url)
187
+ response = Net::HTTP.get_response(uri)
188
+ http = Net::HTTP.new(uri.host, uri.port)
189
+ http.read_timeout = 5
190
+ http.open_timeout = 5
191
+ response = http.start() {|http|
192
+ http.get("/")
193
+ }
194
+
195
+ if !response.is_a?(Net::HTTPSuccess)
196
+ raise Error, "Unable to reach elasticsearch at url '#{url}'!\n#{response.inspect}\n#{response.body.to_s}"
197
+ end
198
+
199
+ # find files to import
200
+ found_files = Dir[File.join(path, '**', '*.json.data' )]
201
+ if found_files.empty?
202
+ raise Error, "backup path '#{backup_dir}' does not exist!"
203
+ else
204
+ puts "Found #{found_files.size} files to import" unless opts[:quiet]
205
+ end
206
+
207
+ # bulk api request for each file
208
+ found_files.each_with_index do |file, i|
209
+ puts "(#{i+1}/#{found_files.size}) bulk importing file #{file}" unless opts[:quiet]
210
+ payload = File.read(file)
211
+ # uri = URI(url)
212
+ http = Net::HTTP.new(uri.host, uri.port)
213
+ response = http.post("/_bulk", payload)
214
+ if !response.is_a?(Net::HTTPSuccess)
215
+ raise Error, "HTTP request failure!\n#{response.inspect}\n#{response.body.to_s}"
216
+ end
217
+ end
218
+
219
+ puts "Finished restore of elasticsearch #{url} with backup #{backup_dir} (took #{(Time.now-start_time).round(3)}s)" unless opts[:quiet]
220
+ return true
221
+ end
222
+
223
+ def self.save_bulk_data(path, hits, file_index=nil) # :nodoc:
224
+ if hits && !hits.empty?
225
+ hits.each do |hit|
226
+ index_name = hit['_index']
227
+ index_type = hit['_type']
228
+ dir_name = File.join(path, index_name)
229
+ FileUtils.mkdir_p(dir_name)
230
+ file_name = File.join(dir_name, index_type) + (file_index ? "_#{file_index}" : "") + ".json.data"
231
+ # prepare record for bulk api injection
232
+ action_json = {'index' => {
233
+ '_index' => hit['_index'], '_type' => hit['_type'], '_id' => hit['_id']
234
+ } }
235
+ source_json = hit['_source']
236
+ File.open(file_name, 'a') do |file|
237
+ file.write JSON.generate(action_json) + "\n" + JSON.generate(source_json) + "\n"
238
+ end
239
+ end
240
+ end
241
+ end
242
+
243
+ end
metadata ADDED
@@ -0,0 +1,82 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: elastic-util
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.1'
5
+ platform: ruby
6
+ authors:
7
+ - James Dickson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-02-27 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: ElasticUtil uses ElasticSearch's scroll and _bulk APIs to dump and restore
42
+ indices
43
+ email:
44
+ - dickson.james@gmail.com
45
+ executables:
46
+ - elastic-util
47
+ extensions: []
48
+ extra_rdoc_files: []
49
+ files:
50
+ - ".gitignore"
51
+ - Gemfile
52
+ - README.md
53
+ - Rakefile
54
+ - bin/elastic-util
55
+ - elastic-util.gemspec
56
+ - lib/elastic-util.rb
57
+ - lib/elastic_util.rb
58
+ homepage:
59
+ licenses:
60
+ - MIT
61
+ metadata: {}
62
+ post_install_message:
63
+ rdoc_options: []
64
+ require_paths:
65
+ - lib
66
+ required_ruby_version: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
71
+ required_rubygems_version: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ requirements: []
77
+ rubyforge_project:
78
+ rubygems_version: 2.4.8
79
+ signing_key:
80
+ specification_version: 4
81
+ summary: Provides backup and restore for ElasticSearch data
82
+ test_files: []