es_dump_restore 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in es_dump_restore.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 PatientsLikeMe
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,30 @@
1
+ # es_dump_restore
2
+
3
+ A utility for safely dumping the contents of an ElasticSearch index to a compressed file and restoring it
4
+ later on. This can be used for backups or for cloning an ElasticSearch index without needing to take down
5
+ the server.
6
+
7
+ The file format is a ZIP file containing the index metadata, the number of objects in the index, and a
8
+ series of commands to be sent to the ElasticSearch bulk API.
9
+
10
+ ## Installation
11
+
12
+ gem install es_dump_restore
13
+
14
+ ## Usage
15
+
16
+ To dump an ElasticSearch index to a file:
17
+
18
+ es_dump_restore dump ELASTIC_SEARCH_SERVER_URL INDEX_NAME DESTINATION_FILE
19
+
20
+ To restore an index to an ElasticSearch server:
21
+
22
+ es_dump_restore restore ELASTIC_SEARCH_SERVER_URL DESTINATON_INDEX FILENAME
23
+
24
+ ## Contributing
25
+
26
+ 1. Fork it
27
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
28
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
29
+ 4. Push to the branch (`git push origin my-new-feature`)
30
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'es_dump_restore'
5
+ require 'es_dump_restore/app'
6
+
7
+ EsDumpRestore::App.start
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'es_dump_restore/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "es_dump_restore"
8
+ gem.version = EsDumpRestore::VERSION
9
+ gem.authors = ["Nat Budin"]
10
+ gem.email = ["nbudin@patientslikeme.com"]
11
+ gem.description = %q{A utility for dumping the contents of an ElasticSearch index to a compressed file and restoring the dumpfile back to an ElasticSearch server}
12
+ gem.summary = %q{Dump ElasticSearch indexes to files and restore them back}
13
+ gem.homepage = "https://github.com/patientslikeme/es_dump_restore"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+
20
+ gem.add_dependency 'multi_json'
21
+ gem.add_dependency 'httpclient'
22
+ gem.add_dependency 'thor'
23
+ gem.add_dependency 'rubyzip'
24
+ gem.add_dependency 'progress_bar'
25
+ end
@@ -0,0 +1,50 @@
1
+ require "es_dump_restore/es_client"
2
+ require "es_dump_restore/dumpfile"
3
+ require "thor"
4
+ require "progress_bar"
5
+
6
+ module EsDumpRestore
7
+ class App < Thor
8
+
9
+ desc "dump URL INDEX_NAME FILENAME", "Creates a dumpfile based on the given ElasticSearch index"
10
+ def dump(url, index_name, filename)
11
+ client = EsClient.new(url, index_name)
12
+
13
+ Dumpfile.write(filename) do |dumpfile|
14
+ dumpfile.index = {
15
+ settings: client.settings,
16
+ mappings: client.mappings
17
+ }
18
+
19
+ client.start_scan do |scroll_id, total|
20
+ dumpfile.num_objects = total
21
+ bar = ProgressBar.new(total)
22
+
23
+ dumpfile.get_objects_output_stream do |out|
24
+ client.each_scroll_hit(scroll_id) do |hit|
25
+ metadata = { index: { _type: hit["_type"], _id: hit["_id"] } }
26
+ out.write("#{metadata.to_json}\n#{hit["_source"].to_json}\n")
27
+ bar.increment!
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
33
+
34
+ desc "restore URL INDEX_NAME FILENAME", "Restores a dumpfile into the given ElasticSearch index"
35
+ def restore(url, index_name, filename)
36
+ client = EsClient.new(url, index_name)
37
+
38
+ Dumpfile.read(filename) do |dumpfile|
39
+ client.create_index(dumpfile.index)
40
+
41
+ bar = ProgressBar.new(dumpfile.num_objects)
42
+ dumpfile.scan_objects(1000) do |batch, size|
43
+ client.bulk_index batch
44
+ bar.increment!(size)
45
+ end
46
+ end
47
+ end
48
+
49
+ end
50
+ end
@@ -0,0 +1,80 @@
1
+ require 'zip/zip'
2
+ require 'multi_json'
3
+
4
+ module EsDumpRestore
5
+ class Dumpfile < Zip::ZipFile
6
+ def self.write(filename, &block)
7
+ df = Dumpfile.new(filename, Zip::ZipFile::CREATE)
8
+ begin
9
+ yield df
10
+ ensure
11
+ df.close
12
+ end
13
+ end
14
+
15
+ def self.read(filename, &block)
16
+ df = Dumpfile.new(filename)
17
+ begin
18
+ yield df
19
+ ensure
20
+ df.close
21
+ end
22
+ end
23
+
24
+ def get_objects_input_stream(&block)
25
+ get_input_stream("objects", &block)
26
+ end
27
+
28
+ def get_objects_output_stream(&block)
29
+ get_output_stream("objects", nil, &block)
30
+ end
31
+
32
+ def num_objects
33
+ read_json_file("num_objects.json")["num_objects"]
34
+ end
35
+
36
+ def num_objects=(n)
37
+ write_json_file("num_objects.json", {num_objects: n})
38
+ end
39
+
40
+ def scan_objects(batch_size, &block)
41
+ get_objects_input_stream do |input|
42
+ loop do
43
+ commands = ""
44
+ items = 0
45
+
46
+ batch_size.times do
47
+ metadata = input.gets("\n")
48
+ break if metadata.nil?
49
+ commands << metadata
50
+
51
+ source = input.gets("\n")
52
+ commands << source
53
+
54
+ items += 1
55
+ end
56
+ break if commands.empty?
57
+
58
+ yield commands, items
59
+ end
60
+ end
61
+ end
62
+
63
+ def index=(index)
64
+ write_json_file("index.json", index)
65
+ end
66
+
67
+ def index
68
+ read_json_file("index.json")
69
+ end
70
+
71
+ private
72
+ def read_json_file(filename)
73
+ get_input_stream(filename) { |i| MultiJson.load(i.read) }
74
+ end
75
+
76
+ def write_json_file(filename, object)
77
+ get_output_stream(filename) { |o| o.write MultiJson.dump(object) }
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,62 @@
1
+ require 'uri'
2
+ require 'httpclient'
3
+ require 'multi_json'
4
+
5
+ module EsDumpRestore
6
+ class EsClient
7
+ attr_accessor :base_uri
8
+ attr_accessor :index_name
9
+
10
+ def initialize(base_uri, index_name)
11
+ @httpclient = HTTPClient.new
12
+ @index_name = index_name
13
+ @base_uri = URI.parse(base_uri + "/" + index_name + "/")
14
+ end
15
+
16
+ def mappings
17
+ request(:get, '_mapping')[index_name]
18
+ end
19
+
20
+ def settings
21
+ request(:get, '_settings')[index_name]
22
+ end
23
+
24
+ def start_scan(&block)
25
+ scroll = request(:get, '_search',
26
+ query: { search_type: 'scan', scroll: '10m', size: 500 },
27
+ body: MultiJson.dump({ query: { match_all: {} } }) )
28
+ total = scroll["hits"]["total"]
29
+ scroll_id = scroll["_scroll_id"]
30
+
31
+ yield scroll_id, total
32
+ end
33
+
34
+ def each_scroll_hit(scroll_id, &block)
35
+ loop do
36
+ batch = request(:get, '/_search/scroll', query: { scroll: '10m', scroll_id: scroll_id })
37
+ hits = batch["hits"]["hits"]
38
+ break if hits.empty?
39
+
40
+ hits.each do |hit|
41
+ yield hit
42
+ end
43
+ end
44
+ end
45
+
46
+ def create_index(metadata)
47
+ request(:post, "", :body => MultiJson.dump(metadata))
48
+ end
49
+
50
+ def bulk_index(data)
51
+ request(:post, "_bulk", :body => data)
52
+ end
53
+
54
+ private
55
+
56
+ def request(method, path, options={})
57
+ request_uri = @base_uri + path
58
+ response = @httpclient.request(method, request_uri, options)
59
+ MultiJson.load(response.content)
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,3 @@
1
+ module EsDumpRestore
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,3 @@
1
+ require "es_dump_restore/version"
2
+ require "es_dump_restore/es_client"
3
+ require "es_dump_restore/app"
metadata ADDED
@@ -0,0 +1,140 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: es_dump_restore
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Nat Budin
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-08-24 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: multi_json
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: httpclient
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: thor
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: rubyzip
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: progress_bar
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :runtime
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ description: A utility for dumping the contents of an ElasticSearch index to a compressed
95
+ file and restoring the dumpfile back to an ElasticSearch server
96
+ email:
97
+ - nbudin@patientslikeme.com
98
+ executables:
99
+ - es_dump_restore
100
+ extensions: []
101
+ extra_rdoc_files: []
102
+ files:
103
+ - .gitignore
104
+ - Gemfile
105
+ - LICENSE.txt
106
+ - README.md
107
+ - Rakefile
108
+ - bin/es_dump_restore
109
+ - es_dump_restore.gemspec
110
+ - lib/es_dump_restore.rb
111
+ - lib/es_dump_restore/app.rb
112
+ - lib/es_dump_restore/dumpfile.rb
113
+ - lib/es_dump_restore/es_client.rb
114
+ - lib/es_dump_restore/version.rb
115
+ homepage: https://github.com/patientslikeme/es_dump_restore
116
+ licenses: []
117
+ post_install_message:
118
+ rdoc_options: []
119
+ require_paths:
120
+ - lib
121
+ required_ruby_version: !ruby/object:Gem::Requirement
122
+ none: false
123
+ requirements:
124
+ - - ! '>='
125
+ - !ruby/object:Gem::Version
126
+ version: '0'
127
+ required_rubygems_version: !ruby/object:Gem::Requirement
128
+ none: false
129
+ requirements:
130
+ - - ! '>='
131
+ - !ruby/object:Gem::Version
132
+ version: '0'
133
+ requirements: []
134
+ rubyforge_project:
135
+ rubygems_version: 1.8.23
136
+ signing_key:
137
+ specification_version: 3
138
+ summary: Dump ElasticSearch indexes to files and restore them back
139
+ test_files: []
140
+ has_rdoc: