elasticsearch-extensions 0.0.3 → 0.0.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +29 -0
- data/LICENSE.txt +199 -10
- data/README.md +233 -25
- data/Rakefile +33 -31
- data/elasticsearch-extensions.gemspec +46 -24
- data/lib/elasticsearch/extensions/ansi/actions.rb +45 -1
- data/lib/elasticsearch/extensions/ansi/helpers.rb +17 -0
- data/lib/elasticsearch/extensions/ansi/response.rb +23 -2
- data/lib/elasticsearch/extensions/ansi.rb +17 -0
- data/lib/elasticsearch/extensions/backup.rb +202 -0
- data/lib/elasticsearch/extensions/reindex.rb +187 -0
- data/lib/elasticsearch/extensions/test/cluster/tasks.rb +30 -0
- data/lib/elasticsearch/extensions/test/cluster.rb +701 -0
- data/lib/elasticsearch/extensions/test/profiling.rb +124 -0
- data/lib/elasticsearch/extensions/test/startup_shutdown.rb +71 -0
- data/lib/elasticsearch/extensions/version.rb +18 -1
- data/lib/elasticsearch/extensions.rb +19 -0
- data/lib/elasticsearch-extensions.rb +5 -0
- data/test/ansi/unit/ansi_test.rb +66 -0
- data/test/backup/unit/backup_test.rb +131 -0
- data/test/reindex/integration/reindex_test.rb +107 -0
- data/test/reindex/unit/reindex_test.rb +123 -0
- data/test/test/cluster/integration/cluster_test.rb +66 -0
- data/test/test/cluster/unit/cluster_test.rb +363 -0
- data/test/test_helper.rb +78 -0
- metadata +109 -89
|
@@ -1,41 +1,63 @@
|
|
|
1
|
+
# Licensed to Elasticsearch B.V. under one or more contributor
|
|
2
|
+
# license agreements. See the NOTICE file distributed with
|
|
3
|
+
# this work for additional information regarding copyright
|
|
4
|
+
# ownership. Elasticsearch B.V. licenses this file to you under
|
|
5
|
+
# the Apache License, Version 2.0 (the "License"); you may
|
|
6
|
+
# not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
|
|
1
18
|
# coding: utf-8
|
|
2
19
|
lib = File.expand_path('../lib', __FILE__)
|
|
3
20
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
21
|
require 'elasticsearch/extensions/version'
|
|
5
22
|
|
|
6
23
|
Gem::Specification.new do |s|
|
|
7
|
-
s.name =
|
|
24
|
+
s.name = 'elasticsearch-extensions'
|
|
8
25
|
s.version = Elasticsearch::Extensions::VERSION
|
|
9
|
-
s.authors = [
|
|
10
|
-
s.email = [
|
|
26
|
+
s.authors = ['Karel Minarik']
|
|
27
|
+
s.email = ['karel.minarik@elasticsearch.org']
|
|
11
28
|
s.description = %q{Extensions for the Elasticsearch Rubygem}
|
|
12
29
|
s.summary = %q{Extensions for the Elasticsearch Rubygem}
|
|
13
|
-
s.homepage =
|
|
14
|
-
s.license =
|
|
15
|
-
|
|
30
|
+
s.homepage = 'https://www.elastic.co/guide/en/elasticsearch/client/ruby-api/current/index.html'
|
|
31
|
+
s.license = 'Apache-2.0'
|
|
32
|
+
s.metadata = {
|
|
33
|
+
'homepage_uri' => 'https://www.elastic.co/guide/en/elasticsearch/client/ruby-api/current/index.html',
|
|
34
|
+
'changelog_uri' => 'https://github.com/elastic/elasticsearch-ruby/blob/master/CHANGELOG.md',
|
|
35
|
+
'source_code_uri' => 'https://github.com/elastic/elasticsearch-ruby/tree/master/elasticsearch-extensions',
|
|
36
|
+
'bug_tracker_uri' => 'https://github.com/elastic/elasticsearch-ruby/issues'
|
|
37
|
+
}
|
|
16
38
|
s.files = `git ls-files`.split($/)
|
|
17
39
|
s.executables = s.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
|
18
40
|
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
|
19
|
-
s.require_paths = [
|
|
20
|
-
|
|
21
|
-
s.add_dependency "elasticsearch", '~> 0.4.0'
|
|
22
|
-
s.add_dependency "ansi"
|
|
23
|
-
|
|
41
|
+
s.require_paths = ['lib']
|
|
24
42
|
|
|
25
|
-
s.
|
|
26
|
-
s.
|
|
43
|
+
s.add_dependency 'ansi'
|
|
44
|
+
s.add_dependency 'elasticsearch'
|
|
27
45
|
|
|
28
|
-
s.add_development_dependency
|
|
29
|
-
s.add_development_dependency
|
|
30
|
-
s.add_development_dependency
|
|
31
|
-
s.add_development_dependency
|
|
32
|
-
s.add_development_dependency
|
|
33
|
-
s.add_development_dependency
|
|
46
|
+
s.add_development_dependency 'ruby-prof' unless defined?(JRUBY_VERSION) || defined?(Rubinius)
|
|
47
|
+
s.add_development_dependency 'bundler'
|
|
48
|
+
s.add_development_dependency 'rake', '~> 12.3'
|
|
49
|
+
s.add_development_dependency 'awesome_print'
|
|
50
|
+
s.add_development_dependency 'shoulda-context'
|
|
51
|
+
s.add_development_dependency 'mocha'
|
|
52
|
+
s.add_development_dependency 'minitest', '~> 5'
|
|
53
|
+
s.add_development_dependency 'minitest-reporters', '~> 1'
|
|
54
|
+
s.add_development_dependency 'simplecov'
|
|
55
|
+
s.add_development_dependency 'yard'
|
|
56
|
+
s.add_development_dependency 'cane'
|
|
57
|
+
s.add_development_dependency 'pry'
|
|
34
58
|
|
|
35
|
-
|
|
36
|
-
s.add_development_dependency
|
|
37
|
-
s.add_development_dependency
|
|
38
|
-
s.add_development_dependency "require-prof"
|
|
39
|
-
s.add_development_dependency "coveralls"
|
|
59
|
+
unless defined?(JRUBY_VERSION)
|
|
60
|
+
s.add_development_dependency 'oj'
|
|
61
|
+
s.add_development_dependency 'patron'
|
|
40
62
|
end
|
|
41
63
|
end
|
|
@@ -1,3 +1,20 @@
|
|
|
1
|
+
# Licensed to Elasticsearch B.V. under one or more contributor
|
|
2
|
+
# license agreements. See the NOTICE file distributed with
|
|
3
|
+
# this work for additional information regarding copyright
|
|
4
|
+
# ownership. Elasticsearch B.V. licenses this file to you under
|
|
5
|
+
# the Apache License, Version 2.0 (the "License"); you may
|
|
6
|
+
# not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
|
|
1
18
|
# encoding: utf-8
|
|
2
19
|
|
|
3
20
|
module Elasticsearch
|
|
@@ -100,7 +117,7 @@ module Elasticsearch
|
|
|
100
117
|
output << ""
|
|
101
118
|
end
|
|
102
119
|
output << Helpers.___
|
|
103
|
-
output << "#{hits.size.to_s.ansi(:bold)} of #{json['hits']['total'].to_s.ansi(:bold)} results".ansi(:faint)
|
|
120
|
+
output << "#{hits.size.to_s.ansi(:bold)} of #{json['hits']['total']['value'].to_s.ansi(:bold)} results".ansi(:faint)
|
|
104
121
|
|
|
105
122
|
output.join("\n")
|
|
106
123
|
end
|
|
@@ -157,6 +174,33 @@ module Elasticsearch
|
|
|
157
174
|
output.join("\n")
|
|
158
175
|
end
|
|
159
176
|
|
|
177
|
+
# Display histogram facets
|
|
178
|
+
#
|
|
179
|
+
def display_histogram_facets(json, options={})
|
|
180
|
+
return unless json['facets']
|
|
181
|
+
|
|
182
|
+
output = [] << ''
|
|
183
|
+
|
|
184
|
+
facets = json['facets'].select { |name, values| values['_type'] == 'histogram' }
|
|
185
|
+
facets.each do |name, values|
|
|
186
|
+
max = values['entries'].map { |t| t['count'] }.max
|
|
187
|
+
padding = 27
|
|
188
|
+
ratio = ((Helpers.width)-padding)/max.to_f
|
|
189
|
+
|
|
190
|
+
histogram = values['entries']
|
|
191
|
+
histogram.each_with_index do |segment, i|
|
|
192
|
+
key = (i == 0) ? "<#{histogram[1]['key']}ms" : "#{segment['key']}ms"
|
|
193
|
+
|
|
194
|
+
output << key.rjust(7) +
|
|
195
|
+
' ' +
|
|
196
|
+
'█' * (segment['count']*ratio).ceil +
|
|
197
|
+
" [#{segment['count']}]"
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
output.join("\n")
|
|
202
|
+
end
|
|
203
|
+
|
|
160
204
|
# Display statistical facets
|
|
161
205
|
#
|
|
162
206
|
def display_statistical_facets(json, options={})
|
|
@@ -1,3 +1,20 @@
|
|
|
1
|
+
# Licensed to Elasticsearch B.V. under one or more contributor
|
|
2
|
+
# license agreements. See the NOTICE file distributed with
|
|
3
|
+
# this work for additional information regarding copyright
|
|
4
|
+
# ownership. Elasticsearch B.V. licenses this file to you under
|
|
5
|
+
# the Apache License, Version 2.0 (the "License"); you may
|
|
6
|
+
# not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
|
|
1
18
|
# encoding: utf-8
|
|
2
19
|
|
|
3
20
|
module Elasticsearch
|
|
@@ -1,3 +1,20 @@
|
|
|
1
|
+
# Licensed to Elasticsearch B.V. under one or more contributor
|
|
2
|
+
# license agreements. See the NOTICE file distributed with
|
|
3
|
+
# this work for additional information regarding copyright
|
|
4
|
+
# ownership. Elasticsearch B.V. licenses this file to you under
|
|
5
|
+
# the Apache License, Version 2.0 (the "License"); you may
|
|
6
|
+
# not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
|
|
1
18
|
module Elasticsearch
|
|
2
19
|
module Extensions
|
|
3
20
|
module ANSI
|
|
@@ -13,7 +30,7 @@ module Elasticsearch
|
|
|
13
30
|
# representation of the Elasticsearch response for:
|
|
14
31
|
#
|
|
15
32
|
# * Search results (hits and highlights)
|
|
16
|
-
# * Facets (terms, statistical, date_histogram)
|
|
33
|
+
# * Facets (terms, statistical, histogram, date_histogram)
|
|
17
34
|
# * Analyze API output
|
|
18
35
|
# * Shard allocation
|
|
19
36
|
#
|
|
@@ -31,7 +48,11 @@ module Elasticsearch
|
|
|
31
48
|
Actions.send(m, self, options)
|
|
32
49
|
end
|
|
33
50
|
|
|
34
|
-
output.compact.
|
|
51
|
+
unless output.compact.empty?
|
|
52
|
+
output.compact.join("\n")
|
|
53
|
+
else
|
|
54
|
+
self.respond_to?(:awesome_inspect) ? self.awesome_inspect : self.inspect
|
|
55
|
+
end
|
|
35
56
|
end
|
|
36
57
|
end
|
|
37
58
|
|
|
@@ -1,3 +1,20 @@
|
|
|
1
|
+
# Licensed to Elasticsearch B.V. under one or more contributor
|
|
2
|
+
# license agreements. See the NOTICE file distributed with
|
|
3
|
+
# this work for additional information regarding copyright
|
|
4
|
+
# ownership. Elasticsearch B.V. licenses this file to you under
|
|
5
|
+
# the Apache License, Version 2.0 (the "License"); you may
|
|
6
|
+
# not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
|
|
1
18
|
# encoding: utf-8
|
|
2
19
|
|
|
3
20
|
require 'elasticsearch/extensions'
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
# Licensed to Elasticsearch B.V. under one or more contributor
|
|
2
|
+
# license agreements. See the NOTICE file distributed with
|
|
3
|
+
# this work for additional information regarding copyright
|
|
4
|
+
# ownership. Elasticsearch B.V. licenses this file to you under
|
|
5
|
+
# the Apache License, Version 2.0 (the "License"); you may
|
|
6
|
+
# not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
|
|
18
|
+
# encoding: utf-8
|
|
19
|
+
|
|
20
|
+
require 'pathname'
|
|
21
|
+
require 'fileutils'
|
|
22
|
+
|
|
23
|
+
require 'multi_json'
|
|
24
|
+
|
|
25
|
+
begin
|
|
26
|
+
require 'oj'
|
|
27
|
+
rescue LoadError
|
|
28
|
+
warn('The "oj" gem could not be loaded. JSON parsing and serialization performance may not be optimal.')
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
begin
|
|
32
|
+
require 'patron'
|
|
33
|
+
rescue LoadError
|
|
34
|
+
warn('The "patron" gem could not be loaded. HTTP requests may not be performed optimally.')
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
require 'elasticsearch'
|
|
39
|
+
|
|
40
|
+
module Backup
|
|
41
|
+
module Database
|
|
42
|
+
|
|
43
|
+
# Integration with the Backup gem [http://backup.github.io/backup/v4/]
|
|
44
|
+
#
|
|
45
|
+
# This extension allows to backup Elasticsearch indices as flat JSON files on the disk.
|
|
46
|
+
#
|
|
47
|
+
# @example Use the Backup gem's DSL to configure the backup
|
|
48
|
+
#
|
|
49
|
+
# require 'elasticsearch/extensions/backup'
|
|
50
|
+
#
|
|
51
|
+
# Model.new(:elasticsearch_backup, 'Elasticsearch') do
|
|
52
|
+
#
|
|
53
|
+
# database Elasticsearch do |db|
|
|
54
|
+
# db.url = 'http://localhost:9200'
|
|
55
|
+
# db.indices = 'articles,people'
|
|
56
|
+
# db.size = 500
|
|
57
|
+
# db.scroll = '10m'
|
|
58
|
+
# end
|
|
59
|
+
#
|
|
60
|
+
# store_with Local do |local|
|
|
61
|
+
# local.path = '/tmp/backups'
|
|
62
|
+
# local.keep = 3
|
|
63
|
+
# end
|
|
64
|
+
#
|
|
65
|
+
# compress_with Gzip
|
|
66
|
+
# end
|
|
67
|
+
#
|
|
68
|
+
# Perform the backup with the Backup gem's command line utility:
|
|
69
|
+
#
|
|
70
|
+
# $ backup perform -t elasticsearch_backup
|
|
71
|
+
#
|
|
72
|
+
# The Backup gem can store your backup files on S3, Dropbox and other
|
|
73
|
+
# cloud providers, send notifications about the operation, and so on;
|
|
74
|
+
# read more in the gem documentation.
|
|
75
|
+
#
|
|
76
|
+
# @example Use the integration as a standalone script (eg. in a Rake task)
|
|
77
|
+
#
|
|
78
|
+
# require 'backup'
|
|
79
|
+
# require 'elasticsearch/extensions/backup'
|
|
80
|
+
#
|
|
81
|
+
# Backup::Logger.configure do
|
|
82
|
+
# logfile.enabled = true
|
|
83
|
+
# logfile.log_path = '/tmp/backups/log'
|
|
84
|
+
# end; Backup::Logger.start!
|
|
85
|
+
#
|
|
86
|
+
# backup = Backup::Model.new(:elasticsearch, 'Backup Elasticsearch') do
|
|
87
|
+
# database Backup::Database::Elasticsearch do |db|
|
|
88
|
+
# db.indices = 'test'
|
|
89
|
+
# end
|
|
90
|
+
#
|
|
91
|
+
# store_with Backup::Storage::Local do |local|
|
|
92
|
+
# local.path = '/tmp/backups'
|
|
93
|
+
# end
|
|
94
|
+
# end
|
|
95
|
+
#
|
|
96
|
+
# backup.perform!
|
|
97
|
+
#
|
|
98
|
+
# @example A simple recover script for the backup created in the previous examples
|
|
99
|
+
#
|
|
100
|
+
# PATH = '/path/to/backup/'
|
|
101
|
+
#
|
|
102
|
+
# require 'elasticsearch'
|
|
103
|
+
# client = Elasticsearch::Client.new log: true
|
|
104
|
+
# payload = []
|
|
105
|
+
#
|
|
106
|
+
# Dir[ File.join( PATH, '**', '*.json' ) ].each do |file|
|
|
107
|
+
# document = MultiJson.load(File.read(file))
|
|
108
|
+
# item = document.merge(data: document['_source'])
|
|
109
|
+
# document.delete('_source')
|
|
110
|
+
# document.delete('_score')
|
|
111
|
+
#
|
|
112
|
+
# payload << { index: item }
|
|
113
|
+
#
|
|
114
|
+
# if payload.size == 100
|
|
115
|
+
# client.bulk body: payload
|
|
116
|
+
# payload = []
|
|
117
|
+
# end
|
|
118
|
+
#
|
|
119
|
+
# client.bulk body: payload
|
|
120
|
+
# end
|
|
121
|
+
#
|
|
122
|
+
# @see http://backup.github.io/backup/v4/
|
|
123
|
+
#
|
|
124
|
+
class Elasticsearch < Base
|
|
125
|
+
class Error < ::Backup::Error; end
|
|
126
|
+
|
|
127
|
+
attr_accessor :url,
|
|
128
|
+
:indices,
|
|
129
|
+
:size,
|
|
130
|
+
:scroll
|
|
131
|
+
|
|
132
|
+
attr_accessor :mode
|
|
133
|
+
|
|
134
|
+
def initialize(model, database_id = nil, &block)
|
|
135
|
+
super
|
|
136
|
+
|
|
137
|
+
@url ||= 'http://localhost:9200'
|
|
138
|
+
@indices ||= '_all'
|
|
139
|
+
@size ||= 100
|
|
140
|
+
@scroll ||= '10m'
|
|
141
|
+
@mode ||= 'single'
|
|
142
|
+
|
|
143
|
+
instance_eval(&block) if block_given?
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def perform!
|
|
147
|
+
super
|
|
148
|
+
|
|
149
|
+
case mode
|
|
150
|
+
when 'single'
|
|
151
|
+
__perform_single
|
|
152
|
+
else
|
|
153
|
+
raise Error, "Unsupported mode [#{mode}]"
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
log!(:finished)
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def client
|
|
160
|
+
@client ||= ::Elasticsearch::Client.new url: url, logger: logger
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def path
|
|
164
|
+
Pathname.new File.join(dump_path , dump_filename.downcase)
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def logger
|
|
168
|
+
logger = Backup::Logger.__send__(:logger)
|
|
169
|
+
logger.instance_eval do
|
|
170
|
+
def debug(*args);end
|
|
171
|
+
# alias :debug :info
|
|
172
|
+
alias :fatal :warn
|
|
173
|
+
end
|
|
174
|
+
logger
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def __perform_single
|
|
179
|
+
r = client.search index: indices, search_type: 'scan', scroll: scroll, size: size
|
|
180
|
+
raise Error, "No scroll_id returned in response:\n#{r.inspect}" unless r['_scroll_id']
|
|
181
|
+
|
|
182
|
+
while r = client.scroll(scroll_id: r['_scroll_id'], scroll: scroll) and not r['hits']['hits'].empty? do
|
|
183
|
+
r['hits']['hits'].each do |hit|
|
|
184
|
+
FileUtils.mkdir_p "#{path.join hit['_index'], hit['_type']}"
|
|
185
|
+
File.open("#{path.join hit['_index'], hit['_type'], __sanitize_filename(hit['_id'])}.json", 'w') do |file|
|
|
186
|
+
file.write MultiJson.dump(hit)
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def __sanitize_filename name
|
|
193
|
+
name
|
|
194
|
+
.encode(Encoding::UTF_8, invalid: :replace, undef: :replace, replace: "�")
|
|
195
|
+
.strip
|
|
196
|
+
.tr("\u{202E}%$|:;/\t\r\n\\", "-")
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
::Backup::Config::DSL::Elasticsearch = ::Backup::Database::Elasticsearch
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
# Licensed to Elasticsearch B.V. under one or more contributor
|
|
2
|
+
# license agreements. See the NOTICE file distributed with
|
|
3
|
+
# this work for additional information regarding copyright
|
|
4
|
+
# ownership. Elasticsearch B.V. licenses this file to you under
|
|
5
|
+
# the Apache License, Version 2.0 (the "License"); you may
|
|
6
|
+
# not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
|
|
18
|
+
# encoding: utf-8
|
|
19
|
+
|
|
20
|
+
module Elasticsearch
|
|
21
|
+
module Extensions
|
|
22
|
+
|
|
23
|
+
# This module allows copying documents from one index/cluster to another one
|
|
24
|
+
#
|
|
25
|
+
# When required together with the client, it will add the `reindex` method
|
|
26
|
+
#
|
|
27
|
+
# @see Reindex::Reindex.initialize
|
|
28
|
+
# @see Reindex::Reindex#perform
|
|
29
|
+
#
|
|
30
|
+
# @see http://www.rubydoc.info/gems/elasticsearch-api/Elasticsearch/API/Actions#reindex-instance_method
|
|
31
|
+
#
|
|
32
|
+
module Reindex
|
|
33
|
+
|
|
34
|
+
# Initialize a new instance of the Reindex class (shortcut)
|
|
35
|
+
#
|
|
36
|
+
# @see Reindex::Reindex.initialize
|
|
37
|
+
#
|
|
38
|
+
def new(arguments={})
|
|
39
|
+
Reindex.new(arguments)
|
|
40
|
+
end; extend self
|
|
41
|
+
|
|
42
|
+
module API
|
|
43
|
+
# Copy documents from one index into another and refresh the destination index
|
|
44
|
+
#
|
|
45
|
+
# @example
|
|
46
|
+
# client.reindex source: { index: 'test1' }, dest: { index: 'test2' }, refresh: true
|
|
47
|
+
#
|
|
48
|
+
# The method allows all the options as {Reindex::Reindex.new}.
|
|
49
|
+
#
|
|
50
|
+
# This method will be mixed into the Elasticsearch client's API, if available.
|
|
51
|
+
#
|
|
52
|
+
def reindex(arguments={})
|
|
53
|
+
arguments[:source] ||= {}
|
|
54
|
+
arguments[:source][:client] = self
|
|
55
|
+
Reindex.new(arguments).perform
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Include the `reindex` method in the API and client, if available
|
|
60
|
+
Elasticsearch::API::Actions.__send__ :include, API if defined?(Elasticsearch::API::Actions)
|
|
61
|
+
Elasticsearch::Transport::Client.__send__ :include, API if defined?(Elasticsearch::Transport::Client) && defined?(Elasticsearch::API)
|
|
62
|
+
|
|
63
|
+
# Copy documents from one index into another
|
|
64
|
+
#
|
|
65
|
+
# @example Copy documents to another index
|
|
66
|
+
#
|
|
67
|
+
# client = Elasticsearch::Client.new
|
|
68
|
+
# reindex = Elasticsearch::Extensions::Reindex.new \
|
|
69
|
+
# source: { index: 'test1', client: client },
|
|
70
|
+
# dest: { index: 'test2' }
|
|
71
|
+
#
|
|
72
|
+
# reindex.perform
|
|
73
|
+
#
|
|
74
|
+
# @example Copy documents to a different cluster
|
|
75
|
+
#
|
|
76
|
+
# source_client = Elasticsearch::Client.new url: 'http://localhost:9200'
|
|
77
|
+
# destination_client = Elasticsearch::Client.new url: 'http://localhost:9250'
|
|
78
|
+
#
|
|
79
|
+
# reindex = Elasticsearch::Extensions::Reindex.new \
|
|
80
|
+
# source: { index: 'test', client: source_client },
|
|
81
|
+
# dest: { index: 'test', client: destination_client }
|
|
82
|
+
# reindex.perform
|
|
83
|
+
#
|
|
84
|
+
# @example Transform the documents during re-indexing
|
|
85
|
+
#
|
|
86
|
+
# reindex = Elasticsearch::Extensions::Reindex.new \
|
|
87
|
+
# source: { index: 'test1', client: client },
|
|
88
|
+
# dest: { index: 'test2' },
|
|
89
|
+
# transform: lambda { |doc| doc['_source']['category'].upcase! }
|
|
90
|
+
#
|
|
91
|
+
#
|
|
92
|
+
# The reindexing process works by "scrolling" an index and sending
|
|
93
|
+
# batches via the "Bulk" API to the destination index/cluster
|
|
94
|
+
#
|
|
95
|
+
# @option arguments [String] :source The source index/cluster definition (*Required*)
|
|
96
|
+
# @option arguments [String] :dest The destination index/cluster definition (*Required*)
|
|
97
|
+
# @option arguments [Proc] :transform A block which will be executed for each document
|
|
98
|
+
# @option arguments [Integer] :batch_size The size of the batch for scroll operation (Default: 1000)
|
|
99
|
+
# @option arguments [String] :scroll The timeout for the scroll operation (Default: 5min)
|
|
100
|
+
# @option arguments [Boolean] :refresh Whether to refresh the destination index after
|
|
101
|
+
# the operation is completed (Default: false)
|
|
102
|
+
#
|
|
103
|
+
# Be aware, that if you want to change the destination index settings and/or mappings,
|
|
104
|
+
# you have to do so in advance by using the "Indices Create" API.
|
|
105
|
+
#
|
|
106
|
+
# Note, that there is a native "Reindex" API in Elasticsearch 2.3.x and higer versions,
|
|
107
|
+
# which will be more performant than the Ruby version.
|
|
108
|
+
#
|
|
109
|
+
# @see http://www.rubydoc.info/gems/elasticsearch-api/Elasticsearch/API/Actions#reindex-instance_method
|
|
110
|
+
#
|
|
111
|
+
class Reindex
|
|
112
|
+
attr_reader :arguments
|
|
113
|
+
|
|
114
|
+
def initialize(arguments={})
|
|
115
|
+
[
|
|
116
|
+
[:source, :index],
|
|
117
|
+
[:source, :client],
|
|
118
|
+
[:dest, :index]
|
|
119
|
+
].each do |required_option|
|
|
120
|
+
value = required_option.reduce(arguments) { |sum, o| sum = sum[o] ? sum[o] : {} }
|
|
121
|
+
|
|
122
|
+
raise ArgumentError,
|
|
123
|
+
"Required argument '#{Hash[*required_option]}' missing" if \
|
|
124
|
+
value.respond_to?(:empty?) ? value.empty? : value.nil?
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
@arguments = {
|
|
128
|
+
batch_size: 1000,
|
|
129
|
+
scroll: '5m',
|
|
130
|
+
refresh: false
|
|
131
|
+
}.merge(arguments)
|
|
132
|
+
|
|
133
|
+
arguments[:dest][:client] ||= arguments[:source][:client]
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Performs the operation
|
|
137
|
+
#
|
|
138
|
+
# @return [Hash] A Hash with the information about the operation outcome
|
|
139
|
+
#
|
|
140
|
+
def perform
|
|
141
|
+
output = { errors: 0 }
|
|
142
|
+
|
|
143
|
+
response = arguments[:source][:client].search(
|
|
144
|
+
index: arguments[:source][:index],
|
|
145
|
+
scroll: arguments[:scroll],
|
|
146
|
+
size: arguments[:batch_size]
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
documents = response['hits']['hits']
|
|
150
|
+
|
|
151
|
+
unless documents.empty?
|
|
152
|
+
bulk_response = __store_batch(documents)
|
|
153
|
+
output[:errors] += bulk_response['items'].select { |k, v| k.values.first['error'] }.size
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
while response = arguments[:source][:client].scroll(scroll_id: response['_scroll_id'], scroll: arguments[:scroll]) do
|
|
157
|
+
documents = response['hits']['hits']
|
|
158
|
+
break if documents.empty?
|
|
159
|
+
|
|
160
|
+
bulk_response = __store_batch(documents)
|
|
161
|
+
output[:errors] += bulk_response['items'].select { |k, v| k.values.first['error'] }.size
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
arguments[:dest][:client].indices.refresh index: arguments[:dest][:index] if arguments[:refresh]
|
|
165
|
+
|
|
166
|
+
output
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def __store_batch(documents)
|
|
170
|
+
body = documents.map do |doc|
|
|
171
|
+
doc['_index'] = arguments[:dest][:index]
|
|
172
|
+
|
|
173
|
+
arguments[:transform].call(doc) if arguments[:transform]
|
|
174
|
+
|
|
175
|
+
doc['data'] = doc['_source']
|
|
176
|
+
doc.delete('_score')
|
|
177
|
+
doc.delete('_source')
|
|
178
|
+
|
|
179
|
+
{ index: doc }
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
arguments[:dest][:client].bulk body: body
|
|
183
|
+
end
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
end
|
|
187
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Licensed to Elasticsearch B.V. under one or more contributor
|
|
2
|
+
# license agreements. See the NOTICE file distributed with
|
|
3
|
+
# this work for additional information regarding copyright
|
|
4
|
+
# ownership. Elasticsearch B.V. licenses this file to you under
|
|
5
|
+
# the Apache License, Version 2.0 (the "License"); you may
|
|
6
|
+
# not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
|
|
18
|
+
require 'elasticsearch/extensions/test/cluster'
|
|
19
|
+
|
|
20
|
+
namespace :elasticsearch do
|
|
21
|
+
desc "Start Elasticsearch cluster for tests"
|
|
22
|
+
task :start do
|
|
23
|
+
Elasticsearch::Extensions::Test::Cluster.start
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
desc "Stop Elasticsearch cluster for tests"
|
|
27
|
+
task :stop do
|
|
28
|
+
Elasticsearch::Extensions::Test::Cluster.stop
|
|
29
|
+
end
|
|
30
|
+
end
|