elastic-util 0.1.7 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: af7028b9a21af54dc8f66849ce7a22e9739dbf0a2b7d8d2fd4091144337b3fda
4
- data.tar.gz: 8caf084b468de28607516b1c8d8643de9373732a5d1628db4618307f6b7dac64
3
+ metadata.gz: b30dc1de09489fede6a09227ff9a8a292f416fdcf8e9eb041bcb46c08cbd6767
4
+ data.tar.gz: 5b604aafcae78e703530acec175bf1235178cb784711c2bcb8a665b9849c7792
5
5
  SHA512:
6
- metadata.gz: 07ce1c430cd062388456fc8d19717546457aba8bd20952a54697d17192a36e63367d66a37f78f8fc08d48d0feb89fd5c26c53fccc32a0f5df26e65e31278bc73
7
- data.tar.gz: bb50a5ada50644ee7edaf831af2d1966d4610515d268310084a01c0d0f278c2679c3a6757fc62ee6c6863c5f53c03842a2190335ed807037c97d7cd2eb004a38
6
+ metadata.gz: 4d4f94be2ca1d450ea260ab4d6f8450e8dfb8025f52f72281a932dc33480fdc0a965175166d9aa50000d386a59c8b65347b02aac6472e68432f663798f2d2e0b
7
+ data.tar.gz: '0029396b7a2bc8960f10532a8bdec303e4e3e813c8acbbab795c4dffbaf0e36cb707cf311acd363a5c34237b3ce7ea0454a37c1d261d491cc6b6919a64c6da8e'
@@ -0,0 +1,14 @@
1
+ # History
2
+
3
+ This is a changelog for the changes made to the `elastic-util` gem.
4
+
5
+ ### v0.1.8
6
+ * improved `--help` output and error messages
7
+
8
+ ### v0.1.7
9
+ * basic auth support inside the url argument using the format username:password@url
10
+ * https support
11
+
12
+ ### v0.1.6
13
+ * New backup option --replace-types
14
+
@@ -3,17 +3,25 @@ require 'elastic-util'
3
3
  require 'optparse'
4
4
 
5
5
  prog_name = "elastic-util" # $0.split('/').last
6
- usage = "Usage: #{prog_name} [backup|restore] [options]"
6
+ prog_usage = "usage: #{prog_name} [backup|restore] [url] [directory] [options]"
7
7
  args = ARGV.dup
8
8
  command_name = args.shift
9
9
 
10
10
  case command_name
11
11
 
12
12
  when "backup"
13
-
13
+ banner = <<-EOT
14
+ usage: #{prog_name} #{command_name} [url] [directory] [options]
15
+ EOT
16
+ footer = <<-EOT
17
+ The backup command provides way to dump Elasticsearch index data.
18
+ The /_search API is used to paginate requests for each index being exported.
19
+ The data is written to specified directory, with a file for each api request made.
20
+ Example: #{prog_name} #{command_name} http://localhost:9300 /tmp/local-elastic-data
21
+ EOT
14
22
  options = {}
15
23
  optparse = OptionParser.new do |opts|
16
- opts.banner = "Usage: #{prog_name} #{command_name} [url] [directory] [options]"
24
+ opts.banner = banner
17
25
  opts.on('--indices x,y,z', Array, "The indices to backup. Default is all.") do |val|
18
26
  options[:indices] = val.collect {|it| it.strip }
19
27
  end
@@ -44,27 +52,24 @@ when "backup"
44
52
  opts.on( '-q', '--quiet', "Don't print to stdout. Default is false." ) do |val|
45
53
  options[:quiet] = true
46
54
  end
55
+ opts.on( '-d', '--dry-run', "Don't actually backup data, just print what would have happened. Default is false." ) do |val|
56
+ options[:dry] = true
57
+ end
47
58
  opts.on('-h', '--help', "Prints this help" ) do
48
- puts opts
49
- exit
59
+ puts opts, footer
60
+ exit 0
50
61
  end
51
62
  end
52
63
  optparse.parse!(args)
53
- url = args[0]
54
- backup_directory = args[1]
55
- if url.nil? || url.empty?
56
- $stderr.puts "#{prog_name}: missing required argument [url]"
57
- $stderr.puts optparse
58
- exit 1
59
- end
60
- if backup_directory.nil? || backup_directory.empty?
61
- $stderr.puts "#{prog_name}: missing required argument [directory]"
62
- $stderr.puts optparse
64
+ if args.count != 2
65
+ $stderr.puts "#{prog_name}: wrong number of arguments. Expected 2 and got #{args.count}: #{args.join(', ')}"
66
+ # $stderr.puts optparse
67
+ $stderr.puts banner
68
+ $stderr.puts "See `#{prog_name} #{command_name} --help` for more usage information."
63
69
  exit 1
64
70
  end
65
-
66
71
  begin
67
- result = ElasticUtil.backup(url, backup_directory, options)
72
+ result = ElasticUtil.backup(args[0], args[1], options)
68
73
  exit 0
69
74
  rescue ElasticUtil::Error => err
70
75
  $stderr.puts "#{prog_name}: #{err.message}"
@@ -74,34 +79,39 @@ when "backup"
74
79
 
75
80
 
76
81
  when "restore"
77
-
82
+ banner = <<-EOT
83
+ usage: #{prog_name} #{command_name} [url] [directory] [options]
84
+ EOT
85
+ footer = <<-EOT
86
+ The restore command provides way to restore Elasticsearch index data.
87
+ The /_bulk API is used to upload index data from the specified directory.
88
+ The directory should contain data generated by the `#{prog_name} backup` command.
89
+ e.g. #{prog_name} #{command_name} http://localhost:9400 /tmp/local-elastic-data
90
+ EOT
78
91
  options = {}
79
92
  optparse = OptionParser.new do |opts|
80
- opts.banner = "Usage: #{prog_name} #{command_name} [url] [directory] [options]"
93
+ opts.banner = banner
81
94
  opts.on( '-q', '--quiet', "Don't print to stdout. Default is false." ) do |val|
82
95
  options[:quiet] = true
83
96
  end
97
+ opts.on( '-d', '--dry-run', "Don't actually restore data, just print what would have happened. Default is false." ) do |val|
98
+ options[:dry] = true
99
+ end
84
100
  opts.on('-h', '--help', "Prints this help" ) do
85
- puts opts
86
- exit
101
+ puts opts, footer
102
+ exit 0
87
103
  end
88
104
  end
89
105
  optparse.parse!(args)
90
- url = args[0]
91
- backup_directory = args[1]
92
- if url.nil? || url.empty?
93
- $stderr.puts "#{prog_name}: missing required argument [url]"
94
- $stderr.puts optparse
95
- exit 1
96
- end
97
- if backup_directory.nil? || backup_directory.empty?
98
- $stderr.puts "#{prog_name}: missing required argument [directory]"
99
- $stderr.puts optparse
106
+ if args.count != 2
107
+ $stderr.puts "#{prog_name}: wrong number of arguments. Expected 2 and got #{args.count}: #{args.join(', ')}"
108
+ # $stderr.puts optparse
109
+ $stderr.puts banner
110
+ $stderr.puts "See `#{prog_name} #{command_name} --help` for more usage information."
100
111
  exit 1
101
112
  end
102
-
103
113
  begin
104
- result = ElasticUtil.restore(url, backup_directory, options)
114
+ result = ElasticUtil.restore(args[0], args[1], options)
105
115
  exit 0
106
116
  rescue ElasticUtil::Error => err
107
117
  $stderr.puts "#{prog_name}: #{err.message}"
@@ -109,10 +119,28 @@ when "restore"
109
119
  exit 1
110
120
  end
111
121
 
112
- when "-v"
122
+ when "-h", "--help"
123
+ puts prog_usage
124
+ puts <<-EOT
125
+ The elastic-util command provides way to dump and restore Elasticsearch index data.
126
+ Example: #{prog_name} backup http://localhost:9300 /tmp/local-elastic-data
127
+ #{prog_name} restore http://localhost:9400 /tmp/local-elastic-data
128
+ EOT
129
+ exit 0
130
+ when "-v","--version","version"
113
131
  puts ElasticUtil::VERSION
114
- else
115
- $stderr.puts usage
132
+ when "", nil
133
+ $stderr.puts "#{prog_name}: missing required argument [backup|restore]"
134
+ # $stderr.puts "Commands:"
135
+ # $stderr.puts "\tbackup"
136
+ # $stderr.puts "\trestore"
137
+ $stderr.puts prog_usage
138
+ $stderr.puts "See `#{prog_name} #{command_name} --help` for more usage information."
116
139
  exit 1
140
+ else
141
+ # $stderr.puts "#{prog_name}: '#{command_name}' is not a recognized elastic-util command. See `#{prog_name} --help`"
142
+ $stderr.puts "#{prog_name}: '#{command_name}' is not a recognized elastic-util command."
143
+ $stderr.puts prog_usage
144
+ exit 3
117
145
  end
118
146
 
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
9
9
  spec.authors = ["James Dickson"]
10
10
  spec.email = ["dickson.james@gmail.com"]
11
11
  spec.summary = "Provides backup and restore for ElasticSearch data"
12
- spec.description = "ElasticUtil uses ElasticSearch's scroll and _bulk APIs to dump and restore indices"
12
+ spec.description = "ElasticUtil uses ElasticSearch's /_search and /_bulk APIs to dump and restore indices"
13
13
  #spec.homepage = "http://www.elastic-util.com"
14
14
  spec.license = "MIT"
15
15
 
@@ -4,16 +4,16 @@ require 'openssl'
4
4
  require 'json'
5
5
  require 'fileutils'
6
6
 
7
- # This module provides a way to backup and restore elasticsearch data.
7
+ # This module provides a way to backup and restore Elasticsearch data.
8
8
  #
9
9
  # @example Backup data from one elasticsearch cluster and restore it to another.
10
10
  #
11
- # ElasticUtil.backup('http://localhost:9200', '/tmp/mybackup', {size:5000})
12
- # ElasticUtil.restore('http://localhost:9201', '/tmp/mybackup')
11
+ # ElasticUtil.backup('http://localhost:9300', '/tmp/local-elastic-data', {size:5000})
12
+ # ElasticUtil.restore('http://localhost:9301', '/tmp/local-elastic-data')
13
13
  #
14
14
  module ElasticUtil
15
15
 
16
- VERSION = "0.1.7"
16
+ VERSION = "0.1.8"
17
17
 
18
18
  # The name of the data directory, relative to the user provided backup directory.
19
19
  DUMP_DIR = "es_data"
@@ -21,24 +21,23 @@ module ElasticUtil
21
21
  # A class to be raised for any known error condition.
22
22
  class Error < StandardError; end
23
23
 
24
- # Backup elasticsearch data to a local directory.
24
+ # Backup Elasticsearch data to a local directory.
25
25
  #
26
26
  # This uses ElasticSearch's scroll api to fetch all records for indices
27
27
  # and write the data to a local directory. The files it generates are given a
28
28
  # .json.data extension. They are not valid JSON files, but rather are in the
29
29
  # format expected by ElasticSearch's _bulk api.
30
30
  #
31
- # So #restore simply has to POST the contents of each file.
31
+ # So #restore simply has to POST the contents of each file as Content-Type: application/x-ndjson
32
32
  #
33
- # Use the :size option to change the number or results to fetch at once,
34
- # and also the size of the data files generated.
35
- # The latter correlates to the of the the api requests made in #restore.
33
+ # Use the :size option to change the number of results to fetch at once and also the size of the data files generated.
34
+ # Increasing size means larger files and fewer api requests for both #backup and the subsequent #restore of that data.
36
35
  #
37
- # @example Backup default elasticsearch running locally.
36
+ # @example Backup default Elasticsearch running locally.
38
37
  #
39
- # ElasticUtil.backup('http://localhost:9200', '/tmp/mybackup')
38
+ # ElasticUtil.backup('http://localhost:9300', '/tmp/local-elastic-data')
40
39
  #
41
- # @param [String] url The url of the elasticsearch cluster eg. 'http://localhost:9200'
40
+ # @param [String] url The url of the Elasticsearch cluster eg. 'http://localhost:9300'
42
41
  # @param [String] backup_dir The local directory to store data in. eg. '/tmp/es2.4'
43
42
  # @param [Hash] opts The options for this backup.
44
43
  # @option opts [Array] :indices The indices to backup. Default is all.
@@ -58,12 +57,19 @@ module ElasticUtil
58
57
  backup_dir = backup_dir.strip
59
58
  path = File.join(backup_dir.strip, DUMP_DIR)
60
59
  indices = []
60
+
61
+ if opts[:dry]
62
+ puts "(DRY RUN) Started backup" unless opts[:quiet]
63
+ else
64
+ puts "Started backup" unless opts[:quiet]
65
+ end
66
+
61
67
  # ping it first
62
68
  response = nil
63
69
  uri = URI(url)
64
70
  response = api_get(uri)
65
71
  if !response.is_a?(Net::HTTPSuccess)
66
- raise Error, "Unable to reach elasticsearch at url '#{url}'!\n#{response.inspect}\n#{response.body.to_s}"
72
+ raise Error, "Unable to reach Elasticsearch at url '#{url}'!\n#{response.inspect}\n#{response.body.to_s}"
67
73
  end
68
74
 
69
75
  # determine indices to backup, default is everything.
@@ -107,6 +113,14 @@ module ElasticUtil
107
113
  end
108
114
  FileUtils.mkdir_p(path)
109
115
 
116
+ if opts[:dry]
117
+ indices.each_with_index do |index_name, i|
118
+ puts "(#{i+1}/#{indices.size}) backing up index #{index_name}" unless opts[:quiet]
119
+ end
120
+ puts "(DRY RUN) Finished backup of Elasticsearch #{url} to directory #{backup_dir} (took #{(Time.now-start_time).round(3)}s)" unless opts[:quiet]
121
+ return 0
122
+ end
123
+
110
124
  # dump data
111
125
  indices.each_with_index do |index_name, i|
112
126
  puts "(#{i+1}/#{indices.size}) backing up index #{index_name}" unless opts[:quiet]
@@ -154,18 +168,18 @@ module ElasticUtil
154
168
  end
155
169
  end
156
170
 
157
- puts "Finished backup of elasticsearch #{url} to directory #{backup_dir} (took #{(Time.now-start_time).round(3)}s)" unless opts[:quiet]
171
+ puts "Finished backup of Elasticsearch #{url} to directory #{backup_dir} (took #{(Time.now-start_time).round(3)}s)" unless opts[:quiet]
158
172
  return true
159
173
  end
160
174
 
161
- # Restore elasticsearch data from a backup.
175
+ # Restore Elasticsearch data from a backup.
162
176
  # This will do a POST to the _bulk api for each file in the backup directory.
163
177
  #
164
178
  # @example Restore local cluster with our backup.
165
179
  #
166
- # ElasticUtil.restore('http://localhost:9201', '/tmp/mybackup')
180
+ # ElasticUtil.restore('http://localhost:9301', '/tmp/local-elastic-data')
167
181
  #
168
- # @param [String] url The url of the elasticsearch cluster eg. 'http://localhost:9200'.
182
+ # @param [String] url The url of the Elasticsearch cluster eg. 'http://localhost:9200'.
169
183
  # @param [String] backup_dir The backup directory.
170
184
  # @param [Hash] opts The options for this backup.
171
185
  # @option opts [true] :quiet Don't print anything. Default is false.
@@ -174,9 +188,16 @@ module ElasticUtil
174
188
  #
175
189
  def self.restore(url, backup_dir, opts={})
176
190
  start_time = Time.now
191
+ url = url.strip.chomp("/")
177
192
  backup_dir = backup_dir.strip
178
193
  path = File.join(backup_dir.strip, DUMP_DIR)
179
194
 
195
+ if opts[:dry]
196
+ puts "(DRY RUN) Started restore" unless opts[:quiet]
197
+ else
198
+ puts "Started restore" unless opts[:quiet]
199
+ end
200
+
180
201
  # validate backup path
181
202
  if !Dir.exists?(path)
182
203
  raise Error, "backup path '#{backup_dir}' does not exist!"
@@ -186,7 +207,7 @@ module ElasticUtil
186
207
  uri = URI(url)
187
208
  response = api_get(uri)
188
209
  if !response.is_a?(Net::HTTPSuccess)
189
- raise Error, "Unable to reach elasticsearch at url '#{url}'!\n#{response.inspect}\n#{response.body.to_s}"
210
+ raise Error, "Unable to reach Elasticsearch at url '#{url}'!\n#{response.inspect}\n#{response.body.to_s}"
190
211
  end
191
212
 
192
213
  # find files to import
@@ -197,6 +218,14 @@ module ElasticUtil
197
218
  puts "Found #{found_files.size} files to import" unless opts[:quiet]
198
219
  end
199
220
 
221
+ if opts[:dry]
222
+ found_files.each_with_index do |file, i|
223
+ puts "(#{i+1}/#{found_files.size}) bulk importing file #{file}" unless opts[:quiet]
224
+ end
225
+ puts "(DRY RUN) Finished restore of Elasticsearch #{url} with backup #{backup_dir} (took #{(Time.now-start_time).round(3)}s)" unless opts[:quiet]
226
+ return 0
227
+ end
228
+
200
229
  # bulk api request for each file
201
230
  found_files.each_with_index do |file, i|
202
231
  puts "(#{i+1}/#{found_files.size}) bulk importing file #{file}" unless opts[:quiet]
@@ -208,7 +237,7 @@ module ElasticUtil
208
237
  end
209
238
  end
210
239
 
211
- puts "Finished restore of elasticsearch #{url} with backup #{backup_dir} (took #{(Time.now-start_time).round(3)}s)" unless opts[:quiet]
240
+ puts "Finished restore of Elasticsearch #{url} with backup #{backup_dir} (took #{(Time.now-start_time).round(3)}s)" unless opts[:quiet]
212
241
  return true
213
242
  end
214
243
 
@@ -242,7 +271,12 @@ module ElasticUtil
242
271
  end
243
272
  end
244
273
 
245
- def self.api_get(uri, opts={})
274
+ def self.exec_request(uri, http_method="GET", opts={})
275
+ # parse request URI and options
276
+ uri = uri.is_a?(URI) ? uri : URI(uri)
277
+ http_method = http_method.to_s.upcase
278
+ headers = opts[:headers] || {}
279
+ payload = opts[:payload] || opts[:body]
246
280
  http = Net::HTTP.new(uri.host, uri.port)
247
281
  if uri.scheme == 'https'
248
282
  http.use_ssl = true
@@ -251,42 +285,44 @@ module ElasticUtil
251
285
  http.verify_mode = OpenSSL::SSL::VERIFY_NONE
252
286
  end
253
287
  http.read_timeout = opts[:read_timeout] || (60*15)
254
- http.open_timeout = opts[:open_timeout] || 5
255
- request = Net::HTTP::Get.new uri.request_uri
256
- if opts[:headers]
257
- opts[:headers].each do |k,v|
258
- request[k] = v
259
- end
288
+ http.open_timeout = opts[:open_timeout] || 10
289
+ request = nil
290
+ if http_method == "GET"
291
+ request = Net::HTTP::Get.new uri.request_uri
292
+ elsif http_method == "POST"
293
+ request = Net::HTTP::Post.new uri.request_uri
294
+ request.body = payload if payload
295
+ elsif http_method == "PUT"
296
+ request = Net::HTTP::Put.new uri.request_uri
297
+ request.body = payload if payload
298
+ elsif http_method == "DELETE"
299
+ request = Net::HTTP::Delete.new uri.request_uri
300
+ else
301
+ raise "HTTP method is unknown: '#{http_method}'"
302
+ end
303
+ # set headers
304
+ headers.each { |k,v| request[k] = v }
305
+ # todo: set default Accept: application/json (probably, right?)
306
+ # set default Content-Type
307
+ if payload && headers['Content-Type'].nil?
308
+ headers['Content-Type'] = "application/json"
260
309
  end
310
+ # set basic auth
261
311
  if uri.user
262
312
  request.basic_auth uri.user, uri.password
263
313
  end
314
+ # execute request
264
315
  response = http.request(request)
316
+ # return the resulting Net::HTTPResponse
265
317
  return response
266
318
  end
267
319
 
320
+ def self.api_get(uri, opts={})
321
+ exec_request(uri, "GET", opts={})
322
+ end
323
+
268
324
  def self.api_post(uri, payload, opts={})
269
- http = Net::HTTP.new(uri.host, uri.port)
270
- if uri.scheme == 'https'
271
- http.use_ssl = true
272
- # todo: always ignore ssl errors for now, but this should be an option
273
- # http.verify_mode = OpenSSL::SSL::VERIFY_PEER
274
- http.verify_mode = OpenSSL::SSL::VERIFY_NONE
275
- end
276
- http.read_timeout = opts[:read_timeout] || (60*15)
277
- http.open_timeout = opts[:open_timeout] || 5
278
- request = Net::HTTP::Post.new uri.request_uri
279
- if opts[:headers]
280
- opts[:headers].each do |k,v|
281
- request[k] = v
282
- end
283
- end
284
- if uri.user
285
- request.basic_auth uri.user, uri.password
286
- end
287
- request.body = payload
288
- response = http.request(request)
289
- return response
325
+ exec_request(uri, "POST", opts.merge({payload:payload}))
290
326
  end
291
327
 
292
328
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: elastic-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Dickson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-06-19 00:00:00.000000000 Z
11
+ date: 2020-06-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -38,8 +38,8 @@ dependencies:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
- description: ElasticUtil uses ElasticSearch's scroll and _bulk APIs to dump and restore
42
- indices
41
+ description: ElasticUtil uses ElasticSearch's /_search and /_bulk APIs to dump and
42
+ restore indices
43
43
  email:
44
44
  - dickson.james@gmail.com
45
45
  executables:
@@ -49,6 +49,7 @@ extra_rdoc_files: []
49
49
  files:
50
50
  - ".gitignore"
51
51
  - Gemfile
52
+ - HISTORY.md
52
53
  - README.md
53
54
  - Rakefile
54
55
  - bin/elastic-util