es_dump_restore 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in es_dump_restore.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 PatientsLikeMe
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,30 @@
1
+ # es_dump_restore
2
+
3
+ A utility for safely dumping the contents of an ElasticSearch index to a compressed file and restoring it
4
+ later on. This can be used for backups or for cloning an ElasticSearch index without needing to take down
5
+ the server.
6
+
7
+ The file format is a ZIP file containing the index metadata, the number of objects in the index, and a
8
+ series of commands to be sent to the ElasticSearch bulk API.
9
+
10
+ ## Installation
11
+
12
+ gem install es_dump_restore
13
+
14
+ ## Usage
15
+
16
+ To dump an ElasticSearch index to a file:
17
+
18
+ es_dump_restore dump ELASTIC_SEARCH_SERVER_URL INDEX_NAME DESTINATION_FILE
19
+
20
+ To restore an index to an ElasticSearch server:
21
+
22
+ es_dump_restore restore ELASTIC_SEARCH_SERVER_URL DESTINATON_INDEX FILENAME
23
+
24
+ ## Contributing
25
+
26
+ 1. Fork it
27
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
28
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
29
+ 4. Push to the branch (`git push origin my-new-feature`)
30
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'es_dump_restore'
5
+ require 'es_dump_restore/app'
6
+
7
+ EsDumpRestore::App.start
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'es_dump_restore/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "es_dump_restore"
8
+ gem.version = EsDumpRestore::VERSION
9
+ gem.authors = ["Nat Budin"]
10
+ gem.email = ["nbudin@patientslikeme.com"]
11
+ gem.description = %q{A utility for dumping the contents of an ElasticSearch index to a compressed file and restoring the dumpfile back to an ElasticSearch server}
12
+ gem.summary = %q{Dump ElasticSearch indexes to files and restore them back}
13
+ gem.homepage = "https://github.com/patientslikeme/es_dump_restore"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+
20
+ gem.add_dependency 'multi_json'
21
+ gem.add_dependency 'httpclient'
22
+ gem.add_dependency 'thor'
23
+ gem.add_dependency 'rubyzip'
24
+ gem.add_dependency 'progress_bar'
25
+ end
@@ -0,0 +1,50 @@
1
+ require "es_dump_restore/es_client"
2
+ require "es_dump_restore/dumpfile"
3
+ require "thor"
4
+ require "progress_bar"
5
+
6
+ module EsDumpRestore
7
+ class App < Thor
8
+
9
+ desc "dump URL INDEX_NAME FILENAME", "Creates a dumpfile based on the given ElasticSearch index"
10
+ def dump(url, index_name, filename)
11
+ client = EsClient.new(url, index_name)
12
+
13
+ Dumpfile.write(filename) do |dumpfile|
14
+ dumpfile.index = {
15
+ settings: client.settings,
16
+ mappings: client.mappings
17
+ }
18
+
19
+ client.start_scan do |scroll_id, total|
20
+ dumpfile.num_objects = total
21
+ bar = ProgressBar.new(total)
22
+
23
+ dumpfile.get_objects_output_stream do |out|
24
+ client.each_scroll_hit(scroll_id) do |hit|
25
+ metadata = { index: { _type: hit["_type"], _id: hit["_id"] } }
26
+ out.write("#{metadata.to_json}\n#{hit["_source"].to_json}\n")
27
+ bar.increment!
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
33
+
34
+ desc "restore URL INDEX_NAME FILENAME", "Restores a dumpfile into the given ElasticSearch index"
35
+ def restore(url, index_name, filename)
36
+ client = EsClient.new(url, index_name)
37
+
38
+ Dumpfile.read(filename) do |dumpfile|
39
+ client.create_index(dumpfile.index)
40
+
41
+ bar = ProgressBar.new(dumpfile.num_objects)
42
+ dumpfile.scan_objects(1000) do |batch, size|
43
+ client.bulk_index batch
44
+ bar.increment!(size)
45
+ end
46
+ end
47
+ end
48
+
49
+ end
50
+ end
@@ -0,0 +1,80 @@
1
+ require 'zip/zip'
2
+ require 'multi_json'
3
+
4
+ module EsDumpRestore
5
+ class Dumpfile < Zip::ZipFile
6
+ def self.write(filename, &block)
7
+ df = Dumpfile.new(filename, Zip::ZipFile::CREATE)
8
+ begin
9
+ yield df
10
+ ensure
11
+ df.close
12
+ end
13
+ end
14
+
15
+ def self.read(filename, &block)
16
+ df = Dumpfile.new(filename)
17
+ begin
18
+ yield df
19
+ ensure
20
+ df.close
21
+ end
22
+ end
23
+
24
+ def get_objects_input_stream(&block)
25
+ get_input_stream("objects", &block)
26
+ end
27
+
28
+ def get_objects_output_stream(&block)
29
+ get_output_stream("objects", nil, &block)
30
+ end
31
+
32
+ def num_objects
33
+ read_json_file("num_objects.json")["num_objects"]
34
+ end
35
+
36
+ def num_objects=(n)
37
+ write_json_file("num_objects.json", {num_objects: n})
38
+ end
39
+
40
+ def scan_objects(batch_size, &block)
41
+ get_objects_input_stream do |input|
42
+ loop do
43
+ commands = ""
44
+ items = 0
45
+
46
+ batch_size.times do
47
+ metadata = input.gets("\n")
48
+ break if metadata.nil?
49
+ commands << metadata
50
+
51
+ source = input.gets("\n")
52
+ commands << source
53
+
54
+ items += 1
55
+ end
56
+ break if commands.empty?
57
+
58
+ yield commands, items
59
+ end
60
+ end
61
+ end
62
+
63
+ def index=(index)
64
+ write_json_file("index.json", index)
65
+ end
66
+
67
+ def index
68
+ read_json_file("index.json")
69
+ end
70
+
71
+ private
72
+ def read_json_file(filename)
73
+ get_input_stream(filename) { |i| MultiJson.load(i.read) }
74
+ end
75
+
76
+ def write_json_file(filename, object)
77
+ get_output_stream(filename) { |o| o.write MultiJson.dump(object) }
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,62 @@
1
+ require 'uri'
2
+ require 'httpclient'
3
+ require 'multi_json'
4
+
5
+ module EsDumpRestore
6
+ class EsClient
7
+ attr_accessor :base_uri
8
+ attr_accessor :index_name
9
+
10
+ def initialize(base_uri, index_name)
11
+ @httpclient = HTTPClient.new
12
+ @index_name = index_name
13
+ @base_uri = URI.parse(base_uri + "/" + index_name + "/")
14
+ end
15
+
16
+ def mappings
17
+ request(:get, '_mapping')[index_name]
18
+ end
19
+
20
+ def settings
21
+ request(:get, '_settings')[index_name]
22
+ end
23
+
24
+ def start_scan(&block)
25
+ scroll = request(:get, '_search',
26
+ query: { search_type: 'scan', scroll: '10m', size: 500 },
27
+ body: MultiJson.dump({ query: { match_all: {} } }) )
28
+ total = scroll["hits"]["total"]
29
+ scroll_id = scroll["_scroll_id"]
30
+
31
+ yield scroll_id, total
32
+ end
33
+
34
+ def each_scroll_hit(scroll_id, &block)
35
+ loop do
36
+ batch = request(:get, '/_search/scroll', query: { scroll: '10m', scroll_id: scroll_id })
37
+ hits = batch["hits"]["hits"]
38
+ break if hits.empty?
39
+
40
+ hits.each do |hit|
41
+ yield hit
42
+ end
43
+ end
44
+ end
45
+
46
+ def create_index(metadata)
47
+ request(:post, "", :body => MultiJson.dump(metadata))
48
+ end
49
+
50
+ def bulk_index(data)
51
+ request(:post, "_bulk", :body => data)
52
+ end
53
+
54
+ private
55
+
56
+ def request(method, path, options={})
57
+ request_uri = @base_uri + path
58
+ response = @httpclient.request(method, request_uri, options)
59
+ MultiJson.load(response.content)
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,3 @@
1
+ module EsDumpRestore
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,3 @@
1
+ require "es_dump_restore/version"
2
+ require "es_dump_restore/es_client"
3
+ require "es_dump_restore/app"
metadata ADDED
@@ -0,0 +1,140 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: es_dump_restore
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Nat Budin
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-08-24 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: multi_json
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: httpclient
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: thor
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: rubyzip
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: progress_bar
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :runtime
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ description: A utility for dumping the contents of an ElasticSearch index to a compressed
95
+ file and restoring the dumpfile back to an ElasticSearch server
96
+ email:
97
+ - nbudin@patientslikeme.com
98
+ executables:
99
+ - es_dump_restore
100
+ extensions: []
101
+ extra_rdoc_files: []
102
+ files:
103
+ - .gitignore
104
+ - Gemfile
105
+ - LICENSE.txt
106
+ - README.md
107
+ - Rakefile
108
+ - bin/es_dump_restore
109
+ - es_dump_restore.gemspec
110
+ - lib/es_dump_restore.rb
111
+ - lib/es_dump_restore/app.rb
112
+ - lib/es_dump_restore/dumpfile.rb
113
+ - lib/es_dump_restore/es_client.rb
114
+ - lib/es_dump_restore/version.rb
115
+ homepage: https://github.com/patientslikeme/es_dump_restore
116
+ licenses: []
117
+ post_install_message:
118
+ rdoc_options: []
119
+ require_paths:
120
+ - lib
121
+ required_ruby_version: !ruby/object:Gem::Requirement
122
+ none: false
123
+ requirements:
124
+ - - ! '>='
125
+ - !ruby/object:Gem::Version
126
+ version: '0'
127
+ required_rubygems_version: !ruby/object:Gem::Requirement
128
+ none: false
129
+ requirements:
130
+ - - ! '>='
131
+ - !ruby/object:Gem::Version
132
+ version: '0'
133
+ requirements: []
134
+ rubyforge_project:
135
+ rubygems_version: 1.8.23
136
+ signing_key:
137
+ specification_version: 3
138
+ summary: Dump ElasticSearch indexes to files and restore them back
139
+ test_files: []
140
+ has_rdoc: