es-dump 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ ODNmOTc0NGVhNmFkNmEzNDliODk5MjkxNTc0OTFlOTI2NjAyMGY2Yg==
5
+ data.tar.gz: !binary |-
6
+ YmQ2YjM3ZDk0NTY3YWRhMTRjYmQ2NWVkMTU3ODUzZTgyNWIxYjJhYQ==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ Y2MxMWZkOWRlMjRhYWZjMGFhNzE0NTM0YWRjNTc4NzE0YzNlNDE4ODBlYTk5
10
+ NTAzZTU0Y2EwNDU5Yjk3MTY4ZmM2NTg5MjUzMjU2ZTA5MjQyN2FmODlhNTQ0
11
+ NDIyMDk4NmYxY2EwZDc5ZDVkYjE3Nzc4NGU4YTgwNWJkNDRjYWM=
12
+ data.tar.gz: !binary |-
13
+ ZmE3MTgyMTcyMGIxNzYyOTczOWU2YjJkNDUyMDBmMDNiN2RiYmY4ZjFlYjRm
14
+ ZTQ3MGI4MTUxYzM5MGExMmVjNTA1NjllYjIwNjE4YzJmZGE3YmMwMGVlMjJh
15
+ ZGY2YmViZTFiN2Y0MGM1MGYwNmI0ZjJlNDRlYjk0NDExZTM1NzE=
@@ -0,0 +1,13 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.0.0
4
+ deploy:
5
+ provider: rubygems
6
+ api_key:
7
+ secure: Y60yEL3rsnFf/OkzLeIgI6MgEIunB1Rjb0p/Ig7L/mU3w8528R+PA1h0C1O6R0Fh3WfxkUoJ7gdEzS0D5wNlzUl4svhsrBfJlqYbxIjRr8TyEPDQC5M1K0rgSzio7rEKPYpg/G4aBkBtxKGmw9EIlPiGtVx06OxUKuma2+8+Fv8=
8
+ gem: es-dump
9
+ on:
10
+ repo: criteo/es-dump
11
+ all_branches: true
12
+ tags: true
13
+
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in es-dump.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Dan Forest Barbier
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,16 @@
1
+ # ES-Dump
2
+
3
+ A basic ElasticSearch dump/import tool
4
+
5
+ ## Usage
6
+
7
+ Exporting entries
8
+
9
+ es-dump --host hostname --port port --index index_name --out index_backup.json
10
+
11
+ Importing entries (will potentially overwrite existing values!)
12
+
13
+ es-dump --host hostname --port port --index index_name --in index_backup.json
14
+
15
+ In case you have small (very large) entries, you can increase (decrease) the page size
16
+ using the `--pagesize #ENTRIES` option.
@@ -0,0 +1,4 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ task :default
4
+
@@ -0,0 +1,84 @@
1
+ #!/usr/bin/env ruby
2
+ require 'elasticsearch'
3
+ require 'json'
4
+ require 'optparse'
5
+
6
+ options = {
7
+ :port => 9200,
8
+ :pagesize => 128,
9
+ }
10
+
11
+ $op = OptionParser.new
12
+ $op.banner = 'Usage: es-dump [options]'
13
+ $op.on('--host HOST', 'ES server hostname') {|v| options[:host] = v }
14
+ $op.on('--port PORT', 'ES server port') {|v| options[:port] = v }
15
+ $op.on('--index INDEX', 'Name of the index to export') {|v| options[:index] = v }
16
+ $op.on('--pagesize COUNT', 'Size of pages fetched from ES') {|v| options[:pagesize] = v.to_i }
17
+ $op.on('--in INPUT', 'Input file for import mode') {|v| options[:input] = v }
18
+ $op.on('--out OUTPUT', 'Output file for export mode') {|v| options[:output] = v }
19
+ $op.on('-h', '--help', 'Prints this help message') {|v| puts $op; exit }
20
+ $op.parse!
21
+
22
+ def error(message)
23
+ puts 'Error: ' + message
24
+ puts $op
25
+ exit
26
+ end
27
+
28
+ error 'You must specify a server hostname to connect to' if not options.has_key? :host
29
+ error 'You must specify an index name' if not options.has_key? :index
30
+
31
+ if not options.has_key? :output and not options.has_key? :input
32
+ error 'You must specify either an input or an output file'
33
+ end
34
+
35
+ puts "Connecting to ElasticSearch server at #{options[:host]}:#{options[:port]}"
36
+ es = Elasticsearch::Client.new host: "#{options[:host]}:#{options[:port]}"
37
+ offset = 0
38
+
39
+ if options.has_key? :output
40
+ puts "Writing output file at #{options[:output]}"
41
+ output = File.new(options[:output], 'w')
42
+ loop do
43
+ begin
44
+ result = es.search index: options[:index],
45
+ body: {
46
+ from: offset,
47
+ size: options[:pagesize],
48
+ }
49
+ rescue Exception => e
50
+ error e.message
51
+ end
52
+
53
+ entries = result['hits']['hits']
54
+ for entry in entries
55
+ output.write JSON.pretty_generate(entry)
56
+ output.write "\n"
57
+ end
58
+
59
+ offset += entries.size
60
+ break if entries.size < options[:pagesize]
61
+ end
62
+
63
+ output.close
64
+ elsif options.has_key? :input
65
+ puts "Reading input file at #{options[:input]}"
66
+ File.open(options[:input], 'r').each_line("}\n{") do |entry|
67
+ begin
68
+ entry = '{' + entry if entry[0] != '{' # Add missing { at the beginning
69
+ entry = entry[0..-2] # Remove extra { at the end
70
+ entry = JSON.parse entry
71
+
72
+ es.index index: options[:index],
73
+ type: entry['_type'],
74
+ id: entry['_id'],
75
+ body: entry['_source']
76
+ rescue Exception => e
77
+ error e.message
78
+ end
79
+
80
+ offset += 1
81
+ end
82
+ end
83
+
84
+ puts "Done, handled #{offset} entries"
@@ -0,0 +1,22 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "es-dump"
7
+ spec.version = "0.1.0"
8
+ spec.authors = ["Dan Forest-Barbier"]
9
+ spec.email = ["d.forestbarbier@criteo.com"]
10
+ spec.summary = %q{A small tool for Elasticsearch backup and import}
11
+ spec.description = %q{The data is dumped as JSON objects into the output file}
12
+ spec.homepage = "https://github.com/criteo/es-dump"
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0")
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.add_development_dependency "bundler", "~> 1.7"
21
+ spec.add_development_dependency "rake", "~> 10.0"
22
+ end
metadata ADDED
@@ -0,0 +1,80 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: es-dump
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Dan Forest-Barbier
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-01-08 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ description: The data is dumped as JSON objects into the output file
42
+ email:
43
+ - d.forestbarbier@criteo.com
44
+ executables:
45
+ - es-dump
46
+ extensions: []
47
+ extra_rdoc_files: []
48
+ files:
49
+ - .travis.yml
50
+ - Gemfile
51
+ - LICENSE.txt
52
+ - README.md
53
+ - Rakefile
54
+ - bin/es-dump
55
+ - es-dump.gemspec
56
+ homepage: https://github.com/criteo/es-dump
57
+ licenses:
58
+ - MIT
59
+ metadata: {}
60
+ post_install_message:
61
+ rdoc_options: []
62
+ require_paths:
63
+ - lib
64
+ required_ruby_version: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ! '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ required_rubygems_version: !ruby/object:Gem::Requirement
70
+ requirements:
71
+ - - ! '>='
72
+ - !ruby/object:Gem::Version
73
+ version: '0'
74
+ requirements: []
75
+ rubyforge_project:
76
+ rubygems_version: 2.4.5
77
+ signing_key:
78
+ specification_version: 4
79
+ summary: A small tool for Elasticsearch backup and import
80
+ test_files: []