es-dump 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.travis.yml +13 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +16 -0
- data/Rakefile +4 -0
- data/bin/es-dump +84 -0
- data/es-dump.gemspec +22 -0
- metadata +80 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
ODNmOTc0NGVhNmFkNmEzNDliODk5MjkxNTc0OTFlOTI2NjAyMGY2Yg==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
YmQ2YjM3ZDk0NTY3YWRhMTRjYmQ2NWVkMTU3ODUzZTgyNWIxYjJhYQ==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
Y2MxMWZkOWRlMjRhYWZjMGFhNzE0NTM0YWRjNTc4NzE0YzNlNDE4ODBlYTk5
|
10
|
+
NTAzZTU0Y2EwNDU5Yjk3MTY4ZmM2NTg5MjUzMjU2ZTA5MjQyN2FmODlhNTQ0
|
11
|
+
NDIyMDk4NmYxY2EwZDc5ZDVkYjE3Nzc4NGU4YTgwNWJkNDRjYWM=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
ZmE3MTgyMTcyMGIxNzYyOTczOWU2YjJkNDUyMDBmMDNiN2RiYmY4ZjFlYjRm
|
14
|
+
ZTQ3MGI4MTUxYzM5MGExMmVjNTA1NjllYjIwNjE4YzJmZGE3YmMwMGVlMjJh
|
15
|
+
ZGY2YmViZTFiN2Y0MGM1MGYwNmI0ZjJlNDRlYjk0NDExZTM1NzE=
|
data/.travis.yml
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
language: ruby
|
2
|
+
rvm:
|
3
|
+
- 2.0.0
|
4
|
+
deploy:
|
5
|
+
provider: rubygems
|
6
|
+
api_key:
|
7
|
+
secure: Y60yEL3rsnFf/OkzLeIgI6MgEIunB1Rjb0p/Ig7L/mU3w8528R+PA1h0C1O6R0Fh3WfxkUoJ7gdEzS0D5wNlzUl4svhsrBfJlqYbxIjRr8TyEPDQC5M1K0rgSzio7rEKPYpg/G4aBkBtxKGmw9EIlPiGtVx06OxUKuma2+8+Fv8=
|
8
|
+
gem: es-dump
|
9
|
+
on:
|
10
|
+
repo: criteo/es-dump
|
11
|
+
all_branches: true
|
12
|
+
tags: true
|
13
|
+
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2015 Dan Forest Barbier
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
# ES-Dump
|
2
|
+
|
3
|
+
A basic ElasticSearch dump/import tool
|
4
|
+
|
5
|
+
## Usage
|
6
|
+
|
7
|
+
Exporting entries
|
8
|
+
|
9
|
+
es-dump --host hostname --port port --index index_name --out index_backup.json
|
10
|
+
|
11
|
+
Importing entries (will potentially overwrite existing values!)
|
12
|
+
|
13
|
+
es-dump --host hostname --port port --index index_name --in index_backup.json
|
14
|
+
|
15
|
+
In case you have small (very large) entries, you can increase (decrease) the page size
|
16
|
+
using the `--pagesize #ENTRIES` option.
|
data/Rakefile
ADDED
data/bin/es-dump
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'elasticsearch'
|
3
|
+
require 'json'
|
4
|
+
require 'optparse'
|
5
|
+
|
6
|
+
options = {
|
7
|
+
:port => 9200,
|
8
|
+
:pagesize => 128,
|
9
|
+
}
|
10
|
+
|
11
|
+
$op = OptionParser.new
|
12
|
+
$op.banner = 'Usage: es-dump [options]'
|
13
|
+
$op.on('--host HOST', 'ES server hostname') {|v| options[:host] = v }
|
14
|
+
$op.on('--port PORT', 'ES server port') {|v| options[:port] = v }
|
15
|
+
$op.on('--index INDEX', 'Name of the index to export') {|v| options[:index] = v }
|
16
|
+
$op.on('--pagesize COUNT', 'Size of pages fetched from ES') {|v| options[:pagesize] = v.to_i }
|
17
|
+
$op.on('--in INPUT', 'Input file for import mode') {|v| options[:input] = v }
|
18
|
+
$op.on('--out OUTPUT', 'Output file for export mode') {|v| options[:output] = v }
|
19
|
+
$op.on('-h', '--help', 'Prints this help message') {|v| puts $op; exit }
|
20
|
+
$op.parse!
|
21
|
+
|
22
|
+
def error(message)
|
23
|
+
puts 'Error: ' + message
|
24
|
+
puts $op
|
25
|
+
exit
|
26
|
+
end
|
27
|
+
|
28
|
+
error 'You must specify a server hostname to connect to' if not options.has_key? :host
|
29
|
+
error 'You must specify an index name' if not options.has_key? :index
|
30
|
+
|
31
|
+
if not options.has_key? :output and not options.has_key? :input
|
32
|
+
error 'You must specify either an input or an output file'
|
33
|
+
end
|
34
|
+
|
35
|
+
puts "Connecting to ElasticSearch server at #{options[:host]}:#{options[:port]}"
|
36
|
+
es = Elasticsearch::Client.new host: "#{options[:host]}:#{options[:port]}"
|
37
|
+
offset = 0
|
38
|
+
|
39
|
+
if options.has_key? :output
|
40
|
+
puts "Writing output file at #{options[:output]}"
|
41
|
+
output = File.new(options[:output], 'w')
|
42
|
+
loop do
|
43
|
+
begin
|
44
|
+
result = es.search index: options[:index],
|
45
|
+
body: {
|
46
|
+
from: offset,
|
47
|
+
size: options[:pagesize],
|
48
|
+
}
|
49
|
+
rescue Exception => e
|
50
|
+
error e.message
|
51
|
+
end
|
52
|
+
|
53
|
+
entries = result['hits']['hits']
|
54
|
+
for entry in entries
|
55
|
+
output.write JSON.pretty_generate(entry)
|
56
|
+
output.write "\n"
|
57
|
+
end
|
58
|
+
|
59
|
+
offset += entries.size
|
60
|
+
break if entries.size < options[:pagesize]
|
61
|
+
end
|
62
|
+
|
63
|
+
output.close
|
64
|
+
elsif options.has_key? :input
|
65
|
+
puts "Reading input file at #{options[:input]}"
|
66
|
+
File.open(options[:input], 'r').each_line("}\n{") do |entry|
|
67
|
+
begin
|
68
|
+
entry = '{' + entry if entry[0] != '{' # Add missing { at the beginning
|
69
|
+
entry = entry[0..-2] # Remove extra { at the end
|
70
|
+
entry = JSON.parse entry
|
71
|
+
|
72
|
+
es.index index: options[:index],
|
73
|
+
type: entry['_type'],
|
74
|
+
id: entry['_id'],
|
75
|
+
body: entry['_source']
|
76
|
+
rescue Exception => e
|
77
|
+
error e.message
|
78
|
+
end
|
79
|
+
|
80
|
+
offset += 1
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
puts "Done, handled #{offset} entries"
|
data/es-dump.gemspec
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "es-dump"
|
7
|
+
spec.version = "0.1.0"
|
8
|
+
spec.authors = ["Dan Forest-Barbier"]
|
9
|
+
spec.email = ["d.forestbarbier@criteo.com"]
|
10
|
+
spec.summary = %q{A small tool for Elasticsearch backup and import}
|
11
|
+
spec.description = %q{The data is dumped as JSON objects into the output file}
|
12
|
+
spec.homepage = "https://github.com/criteo/es-dump"
|
13
|
+
spec.license = "MIT"
|
14
|
+
|
15
|
+
spec.files = `git ls-files -z`.split("\x0")
|
16
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
17
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
18
|
+
spec.require_paths = ["lib"]
|
19
|
+
|
20
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
21
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
22
|
+
end
|
metadata
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: es-dump
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Dan Forest-Barbier
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-01-08 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.7'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.7'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
description: The data is dumped as JSON objects into the output file
|
42
|
+
email:
|
43
|
+
- d.forestbarbier@criteo.com
|
44
|
+
executables:
|
45
|
+
- es-dump
|
46
|
+
extensions: []
|
47
|
+
extra_rdoc_files: []
|
48
|
+
files:
|
49
|
+
- .travis.yml
|
50
|
+
- Gemfile
|
51
|
+
- LICENSE.txt
|
52
|
+
- README.md
|
53
|
+
- Rakefile
|
54
|
+
- bin/es-dump
|
55
|
+
- es-dump.gemspec
|
56
|
+
homepage: https://github.com/criteo/es-dump
|
57
|
+
licenses:
|
58
|
+
- MIT
|
59
|
+
metadata: {}
|
60
|
+
post_install_message:
|
61
|
+
rdoc_options: []
|
62
|
+
require_paths:
|
63
|
+
- lib
|
64
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ! '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
70
|
+
requirements:
|
71
|
+
- - ! '>='
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
version: '0'
|
74
|
+
requirements: []
|
75
|
+
rubyforge_project:
|
76
|
+
rubygems_version: 2.4.5
|
77
|
+
signing_key:
|
78
|
+
specification_version: 4
|
79
|
+
summary: A small tool for Elasticsearch backup and import
|
80
|
+
test_files: []
|