elastic-util 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: af7028b9a21af54dc8f66849ce7a22e9739dbf0a2b7d8d2fd4091144337b3fda
4
- data.tar.gz: 8caf084b468de28607516b1c8d8643de9373732a5d1628db4618307f6b7dac64
3
+ metadata.gz: b30dc1de09489fede6a09227ff9a8a292f416fdcf8e9eb041bcb46c08cbd6767
4
+ data.tar.gz: 5b604aafcae78e703530acec175bf1235178cb784711c2bcb8a665b9849c7792
5
5
  SHA512:
6
- metadata.gz: 07ce1c430cd062388456fc8d19717546457aba8bd20952a54697d17192a36e63367d66a37f78f8fc08d48d0feb89fd5c26c53fccc32a0f5df26e65e31278bc73
7
- data.tar.gz: bb50a5ada50644ee7edaf831af2d1966d4610515d268310084a01c0d0f278c2679c3a6757fc62ee6c6863c5f53c03842a2190335ed807037c97d7cd2eb004a38
6
+ metadata.gz: 4d4f94be2ca1d450ea260ab4d6f8450e8dfb8025f52f72281a932dc33480fdc0a965175166d9aa50000d386a59c8b65347b02aac6472e68432f663798f2d2e0b
7
+ data.tar.gz: '0029396b7a2bc8960f10532a8bdec303e4e3e813c8acbbab795c4dffbaf0e36cb707cf311acd363a5c34237b3ce7ea0454a37c1d261d491cc6b6919a64c6da8e'
@@ -0,0 +1,14 @@
1
+ # History
2
+
3
+ This is a changelog for the changes made to the `elastic-util` gem.
4
+
5
+ ### v0.1.8
6
+ * improved `--help` output and error messages
7
+
8
+ ### v0.1.7
9
+ * basic auth support inside the url argument using the format username:password@url
10
+ * https support
11
+
12
+ ### v0.1.6
13
+ * New backup option --replace-types
14
+
@@ -3,17 +3,25 @@ require 'elastic-util'
3
3
  require 'optparse'
4
4
 
5
5
  prog_name = "elastic-util" # $0.split('/').last
6
- usage = "Usage: #{prog_name} [backup|restore] [options]"
6
+ prog_usage = "usage: #{prog_name} [backup|restore] [url] [directory] [options]"
7
7
  args = ARGV.dup
8
8
  command_name = args.shift
9
9
 
10
10
  case command_name
11
11
 
12
12
  when "backup"
13
-
13
+ banner = <<-EOT
14
+ usage: #{prog_name} #{command_name} [url] [directory] [options]
15
+ EOT
16
+ footer = <<-EOT
17
+ The backup command provides way to dump Elasticsearch index data.
18
+ The /_search API is used to paginate requests for each index being exported.
19
+ The data is written to specified directory, with a file for each api request made.
20
+ Example: #{prog_name} #{command_name} http://localhost:9300 /tmp/local-elastic-data
21
+ EOT
14
22
  options = {}
15
23
  optparse = OptionParser.new do |opts|
16
- opts.banner = "Usage: #{prog_name} #{command_name} [url] [directory] [options]"
24
+ opts.banner = banner
17
25
  opts.on('--indices x,y,z', Array, "The indices to backup. Default is all.") do |val|
18
26
  options[:indices] = val.collect {|it| it.strip }
19
27
  end
@@ -44,27 +52,24 @@ when "backup"
44
52
  opts.on( '-q', '--quiet', "Don't print to stdout. Default is false." ) do |val|
45
53
  options[:quiet] = true
46
54
  end
55
+ opts.on( '-d', '--dry-run', "Don't actually backup data, just print what would have happened. Default is false." ) do |val|
56
+ options[:dry] = true
57
+ end
47
58
  opts.on('-h', '--help', "Prints this help" ) do
48
- puts opts
49
- exit
59
+ puts opts, footer
60
+ exit 0
50
61
  end
51
62
  end
52
63
  optparse.parse!(args)
53
- url = args[0]
54
- backup_directory = args[1]
55
- if url.nil? || url.empty?
56
- $stderr.puts "#{prog_name}: missing required argument [url]"
57
- $stderr.puts optparse
58
- exit 1
59
- end
60
- if backup_directory.nil? || backup_directory.empty?
61
- $stderr.puts "#{prog_name}: missing required argument [directory]"
62
- $stderr.puts optparse
64
+ if args.count != 2
65
+ $stderr.puts "#{prog_name}: wrong number of arguments. Expected 2 and got #{args.count}: #{args.join(', ')}"
66
+ # $stderr.puts optparse
67
+ $stderr.puts banner
68
+ $stderr.puts "See `#{prog_name} #{command_name} --help` for more usage information."
63
69
  exit 1
64
70
  end
65
-
66
71
  begin
67
- result = ElasticUtil.backup(url, backup_directory, options)
72
+ result = ElasticUtil.backup(args[0], args[1], options)
68
73
  exit 0
69
74
  rescue ElasticUtil::Error => err
70
75
  $stderr.puts "#{prog_name}: #{err.message}"
@@ -74,34 +79,39 @@ when "backup"
74
79
 
75
80
 
76
81
  when "restore"
77
-
82
+ banner = <<-EOT
83
+ usage: #{prog_name} #{command_name} [url] [directory] [options]
84
+ EOT
85
+ footer = <<-EOT
86
+ The restore command provides way to restore Elasticsearch index data.
87
+ The /_bulk API is used to upload index data from the specified directory.
88
+ The directory should contain data generated by the `#{prog_name} backup` command.
89
+ e.g. #{prog_name} #{command_name} http://localhost:9400 /tmp/local-elastic-data
90
+ EOT
78
91
  options = {}
79
92
  optparse = OptionParser.new do |opts|
80
- opts.banner = "Usage: #{prog_name} #{command_name} [url] [directory] [options]"
93
+ opts.banner = banner
81
94
  opts.on( '-q', '--quiet', "Don't print to stdout. Default is false." ) do |val|
82
95
  options[:quiet] = true
83
96
  end
97
+ opts.on( '-d', '--dry-run', "Don't actually restore data, just print what would have happened. Default is false." ) do |val|
98
+ options[:dry] = true
99
+ end
84
100
  opts.on('-h', '--help', "Prints this help" ) do
85
- puts opts
86
- exit
101
+ puts opts, footer
102
+ exit 0
87
103
  end
88
104
  end
89
105
  optparse.parse!(args)
90
- url = args[0]
91
- backup_directory = args[1]
92
- if url.nil? || url.empty?
93
- $stderr.puts "#{prog_name}: missing required argument [url]"
94
- $stderr.puts optparse
95
- exit 1
96
- end
97
- if backup_directory.nil? || backup_directory.empty?
98
- $stderr.puts "#{prog_name}: missing required argument [directory]"
99
- $stderr.puts optparse
106
+ if args.count != 2
107
+ $stderr.puts "#{prog_name}: wrong number of arguments. Expected 2 and got #{args.count}: #{args.join(', ')}"
108
+ # $stderr.puts optparse
109
+ $stderr.puts banner
110
+ $stderr.puts "See `#{prog_name} #{command_name} --help` for more usage information."
100
111
  exit 1
101
112
  end
102
-
103
113
  begin
104
- result = ElasticUtil.restore(url, backup_directory, options)
114
+ result = ElasticUtil.restore(args[0], args[1], options)
105
115
  exit 0
106
116
  rescue ElasticUtil::Error => err
107
117
  $stderr.puts "#{prog_name}: #{err.message}"
@@ -109,10 +119,28 @@ when "restore"
109
119
  exit 1
110
120
  end
111
121
 
112
- when "-v"
122
+ when "-h", "--help"
123
+ puts prog_usage
124
+ puts <<-EOT
125
+ The elastic-util command provides way to dump and restore Elasticsearch index data.
126
+ Example: #{prog_name} backup http://localhost:9300 /tmp/local-elastic-data
127
+ #{prog_name} restore http://localhost:9400 /tmp/local-elastic-data
128
+ EOT
129
+ exit 0
130
+ when "-v","--version","version"
113
131
  puts ElasticUtil::VERSION
114
- else
115
- $stderr.puts usage
132
+ when "", nil
133
+ $stderr.puts "#{prog_name}: missing required argument [backup|restore]"
134
+ # $stderr.puts "Commands:"
135
+ # $stderr.puts "\tbackup"
136
+ # $stderr.puts "\trestore"
137
+ $stderr.puts prog_usage
138
+ $stderr.puts "See `#{prog_name} #{command_name} --help` for more usage information."
116
139
  exit 1
140
+ else
141
+ # $stderr.puts "#{prog_name}: '#{command_name}' is not a recognized elastic-util command. See `#{prog_name} --help`"
142
+ $stderr.puts "#{prog_name}: '#{command_name}' is not a recognized elastic-util command."
143
+ $stderr.puts prog_usage
144
+ exit 3
117
145
  end
118
146
 
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
9
9
  spec.authors = ["James Dickson"]
10
10
  spec.email = ["dickson.james@gmail.com"]
11
11
  spec.summary = "Provides backup and restore for ElasticSearch data"
12
- spec.description = "ElasticUtil uses ElasticSearch's scroll and _bulk APIs to dump and restore indices"
12
+ spec.description = "ElasticUtil uses ElasticSearch's /_search and /_bulk APIs to dump and restore indices"
13
13
  #spec.homepage = "http://www.elastic-util.com"
14
14
  spec.license = "MIT"
15
15
 
@@ -4,16 +4,16 @@ require 'openssl'
4
4
  require 'json'
5
5
  require 'fileutils'
6
6
 
7
- # This module provides a way to backup and restore elasticsearch data.
7
+ # This module provides a way to backup and restore Elasticsearch data.
8
8
  #
9
9
  # @example Backup data from one elasticsearch cluster and restore it to another.
10
10
  #
11
- # ElasticUtil.backup('http://localhost:9200', '/tmp/mybackup', {size:5000})
12
- # ElasticUtil.restore('http://localhost:9201', '/tmp/mybackup')
11
+ # ElasticUtil.backup('http://localhost:9300', '/tmp/local-elastic-data', {size:5000})
12
+ # ElasticUtil.restore('http://localhost:9301', '/tmp/local-elastic-data')
13
13
  #
14
14
  module ElasticUtil
15
15
 
16
- VERSION = "0.1.7"
16
+ VERSION = "0.1.8"
17
17
 
18
18
  # The name of the data directory, relative to the user provided backup directory.
19
19
  DUMP_DIR = "es_data"
@@ -21,24 +21,23 @@ module ElasticUtil
21
21
  # A class to be raised for any known error condition.
22
22
  class Error < StandardError; end
23
23
 
24
- # Backup elasticsearch data to a local directory.
24
+ # Backup Elasticsearch data to a local directory.
25
25
  #
26
26
  # This uses ElasticSearch's scroll api to fetch all records for indices
27
27
  # and write the data to a local directory. The files it generates are given a
28
28
  # .json.data extension. They are not valid JSON files, but rather are in the
29
29
  # format expected by ElasticSearch's _bulk api.
30
30
  #
31
- # So #restore simply has to POST the contents of each file.
31
+ # So #restore simply has to POST the contents of each file as Content-Type: application/x-ndjson
32
32
  #
33
- # Use the :size option to change the number or results to fetch at once,
34
- # and also the size of the data files generated.
35
- # The latter correlates to the of the the api requests made in #restore.
33
+ # Use the :size option to change the number of results to fetch at once and also the size of the data files generated.
34
+ # Increasing size means larger files and fewer api requests for both #backup and the subsequent #restore of that data.
36
35
  #
37
- # @example Backup default elasticsearch running locally.
36
+ # @example Backup default Elasticsearch running locally.
38
37
  #
39
- # ElasticUtil.backup('http://localhost:9200', '/tmp/mybackup')
38
+ # ElasticUtil.backup('http://localhost:9300', '/tmp/local-elastic-data')
40
39
  #
41
- # @param [String] url The url of the elasticsearch cluster eg. 'http://localhost:9200'
40
+ # @param [String] url The url of the Elasticsearch cluster eg. 'http://localhost:9300'
42
41
  # @param [String] backup_dir The local directory to store data in. eg. '/tmp/es2.4'
43
42
  # @param [Hash] opts The options for this backup.
44
43
  # @option opts [Array] :indices The indices to backup. Default is all.
@@ -58,12 +57,19 @@ module ElasticUtil
58
57
  backup_dir = backup_dir.strip
59
58
  path = File.join(backup_dir.strip, DUMP_DIR)
60
59
  indices = []
60
+
61
+ if opts[:dry]
62
+ puts "(DRY RUN) Started backup" unless opts[:quiet]
63
+ else
64
+ puts "Started backup" unless opts[:quiet]
65
+ end
66
+
61
67
  # ping it first
62
68
  response = nil
63
69
  uri = URI(url)
64
70
  response = api_get(uri)
65
71
  if !response.is_a?(Net::HTTPSuccess)
66
- raise Error, "Unable to reach elasticsearch at url '#{url}'!\n#{response.inspect}\n#{response.body.to_s}"
72
+ raise Error, "Unable to reach Elasticsearch at url '#{url}'!\n#{response.inspect}\n#{response.body.to_s}"
67
73
  end
68
74
 
69
75
  # determine indices to backup, default is everything.
@@ -107,6 +113,14 @@ module ElasticUtil
107
113
  end
108
114
  FileUtils.mkdir_p(path)
109
115
 
116
+ if opts[:dry]
117
+ indices.each_with_index do |index_name, i|
118
+ puts "(#{i+1}/#{indices.size}) backing up index #{index_name}" unless opts[:quiet]
119
+ end
120
+ puts "(DRY RUN) Finished backup of Elasticsearch #{url} to directory #{backup_dir} (took #{(Time.now-start_time).round(3)}s)" unless opts[:quiet]
121
+ return 0
122
+ end
123
+
110
124
  # dump data
111
125
  indices.each_with_index do |index_name, i|
112
126
  puts "(#{i+1}/#{indices.size}) backing up index #{index_name}" unless opts[:quiet]
@@ -154,18 +168,18 @@ module ElasticUtil
154
168
  end
155
169
  end
156
170
 
157
- puts "Finished backup of elasticsearch #{url} to directory #{backup_dir} (took #{(Time.now-start_time).round(3)}s)" unless opts[:quiet]
171
+ puts "Finished backup of Elasticsearch #{url} to directory #{backup_dir} (took #{(Time.now-start_time).round(3)}s)" unless opts[:quiet]
158
172
  return true
159
173
  end
160
174
 
161
- # Restore elasticsearch data from a backup.
175
+ # Restore Elasticsearch data from a backup.
162
176
  # This will do a POST to the _bulk api for each file in the backup directory.
163
177
  #
164
178
  # @example Restore local cluster with our backup.
165
179
  #
166
- # ElasticUtil.restore('http://localhost:9201', '/tmp/mybackup')
180
+ # ElasticUtil.restore('http://localhost:9301', '/tmp/local-elastic-data')
167
181
  #
168
- # @param [String] url The url of the elasticsearch cluster eg. 'http://localhost:9200'.
182
+ # @param [String] url The url of the Elasticsearch cluster eg. 'http://localhost:9200'.
169
183
  # @param [String] backup_dir The backup directory.
170
184
  # @param [Hash] opts The options for this backup.
171
185
  # @option opts [true] :quiet Don't print anything. Default is false.
@@ -174,9 +188,16 @@ module ElasticUtil
174
188
  #
175
189
  def self.restore(url, backup_dir, opts={})
176
190
  start_time = Time.now
191
+ url = url.strip.chomp("/")
177
192
  backup_dir = backup_dir.strip
178
193
  path = File.join(backup_dir.strip, DUMP_DIR)
179
194
 
195
+ if opts[:dry]
196
+ puts "(DRY RUN) Started restore" unless opts[:quiet]
197
+ else
198
+ puts "Started restore" unless opts[:quiet]
199
+ end
200
+
180
201
  # validate backup path
181
202
  if !Dir.exists?(path)
182
203
  raise Error, "backup path '#{backup_dir}' does not exist!"
@@ -186,7 +207,7 @@ module ElasticUtil
186
207
  uri = URI(url)
187
208
  response = api_get(uri)
188
209
  if !response.is_a?(Net::HTTPSuccess)
189
- raise Error, "Unable to reach elasticsearch at url '#{url}'!\n#{response.inspect}\n#{response.body.to_s}"
210
+ raise Error, "Unable to reach Elasticsearch at url '#{url}'!\n#{response.inspect}\n#{response.body.to_s}"
190
211
  end
191
212
 
192
213
  # find files to import
@@ -197,6 +218,14 @@ module ElasticUtil
197
218
  puts "Found #{found_files.size} files to import" unless opts[:quiet]
198
219
  end
199
220
 
221
+ if opts[:dry]
222
+ found_files.each_with_index do |file, i|
223
+ puts "(#{i+1}/#{found_files.size}) bulk importing file #{file}" unless opts[:quiet]
224
+ end
225
+ puts "(DRY RUN) Finished restore of Elasticsearch #{url} with backup #{backup_dir} (took #{(Time.now-start_time).round(3)}s)" unless opts[:quiet]
226
+ return 0
227
+ end
228
+
200
229
  # bulk api request for each file
201
230
  found_files.each_with_index do |file, i|
202
231
  puts "(#{i+1}/#{found_files.size}) bulk importing file #{file}" unless opts[:quiet]
@@ -208,7 +237,7 @@ module ElasticUtil
208
237
  end
209
238
  end
210
239
 
211
- puts "Finished restore of elasticsearch #{url} with backup #{backup_dir} (took #{(Time.now-start_time).round(3)}s)" unless opts[:quiet]
240
+ puts "Finished restore of Elasticsearch #{url} with backup #{backup_dir} (took #{(Time.now-start_time).round(3)}s)" unless opts[:quiet]
212
241
  return true
213
242
  end
214
243
 
@@ -242,7 +271,12 @@ module ElasticUtil
242
271
  end
243
272
  end
244
273
 
245
- def self.api_get(uri, opts={})
274
+ def self.exec_request(uri, http_method="GET", opts={})
275
+ # parse request URI and options
276
+ uri = uri.is_a?(URI) ? uri : URI(uri)
277
+ http_method = http_method.to_s.upcase
278
+ headers = opts[:headers] || {}
279
+ payload = opts[:payload] || opts[:body]
246
280
  http = Net::HTTP.new(uri.host, uri.port)
247
281
  if uri.scheme == 'https'
248
282
  http.use_ssl = true
@@ -251,42 +285,44 @@ module ElasticUtil
251
285
  http.verify_mode = OpenSSL::SSL::VERIFY_NONE
252
286
  end
253
287
  http.read_timeout = opts[:read_timeout] || (60*15)
254
- http.open_timeout = opts[:open_timeout] || 5
255
- request = Net::HTTP::Get.new uri.request_uri
256
- if opts[:headers]
257
- opts[:headers].each do |k,v|
258
- request[k] = v
259
- end
288
+ http.open_timeout = opts[:open_timeout] || 10
289
+ request = nil
290
+ if http_method == "GET"
291
+ request = Net::HTTP::Get.new uri.request_uri
292
+ elsif http_method == "POST"
293
+ request = Net::HTTP::Post.new uri.request_uri
294
+ request.body = payload if payload
295
+ elsif http_method == "PUT"
296
+ request = Net::HTTP::Put.new uri.request_uri
297
+ request.body = payload if payload
298
+ elsif http_method == "DELETE"
299
+ request = Net::HTTP::Delete.new uri.request_uri
300
+ else
301
+ raise "HTTP method is unknown: '#{http_method}'"
302
+ end
303
+ # set headers
304
+ headers.each { |k,v| request[k] = v }
305
+ # todo: set default Accept: application/json (probably, right?)
306
+ # set default Content-Type
307
+ if payload && headers['Content-Type'].nil?
308
+ headers['Content-Type'] = "application/json"
260
309
  end
310
+ # set basic auth
261
311
  if uri.user
262
312
  request.basic_auth uri.user, uri.password
263
313
  end
314
+ # execute request
264
315
  response = http.request(request)
316
+ # return the resulting Net::HTTPResponse
265
317
  return response
266
318
  end
267
319
 
320
+ def self.api_get(uri, opts={})
321
+ exec_request(uri, "GET", opts={})
322
+ end
323
+
268
324
  def self.api_post(uri, payload, opts={})
269
- http = Net::HTTP.new(uri.host, uri.port)
270
- if uri.scheme == 'https'
271
- http.use_ssl = true
272
- # todo: always ignore ssl errors for now, but this should be an option
273
- # http.verify_mode = OpenSSL::SSL::VERIFY_PEER
274
- http.verify_mode = OpenSSL::SSL::VERIFY_NONE
275
- end
276
- http.read_timeout = opts[:read_timeout] || (60*15)
277
- http.open_timeout = opts[:open_timeout] || 5
278
- request = Net::HTTP::Post.new uri.request_uri
279
- if opts[:headers]
280
- opts[:headers].each do |k,v|
281
- request[k] = v
282
- end
283
- end
284
- if uri.user
285
- request.basic_auth uri.user, uri.password
286
- end
287
- request.body = payload
288
- response = http.request(request)
289
- return response
325
+ exec_request(uri, "POST", opts.merge({payload:payload}))
290
326
  end
291
327
 
292
328
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: elastic-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Dickson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-06-19 00:00:00.000000000 Z
11
+ date: 2020-06-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -38,8 +38,8 @@ dependencies:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
- description: ElasticUtil uses ElasticSearch's scroll and _bulk APIs to dump and restore
42
- indices
41
+ description: ElasticUtil uses ElasticSearch's /_search and /_bulk APIs to dump and
42
+ restore indices
43
43
  email:
44
44
  - dickson.james@gmail.com
45
45
  executables:
@@ -49,6 +49,7 @@ extra_rdoc_files: []
49
49
  files:
50
50
  - ".gitignore"
51
51
  - Gemfile
52
+ - HISTORY.md
52
53
  - README.md
53
54
  - Rakefile
54
55
  - bin/elastic-util