elasticsearch-extensions 0.0.20 → 0.0.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +79 -0
- data/elasticsearch-extensions.gemspec +6 -1
- data/lib/elasticsearch/extensions.rb +2 -0
- data/lib/elasticsearch/extensions/backup.rb +34 -9
- data/lib/elasticsearch/extensions/reindex.rb +160 -0
- data/lib/elasticsearch/extensions/test/cluster.rb +6 -4
- data/lib/elasticsearch/extensions/version.rb +1 -1
- data/test/reindex/integration/reindex_test.rb +81 -0
- data/test/reindex/unit/reindex_test.rb +106 -0
- data/test/test_helper.rb +12 -0
- metadata +11 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9d1007e770ea632d144aed7e8d7d74f8eb4e9527
|
4
|
+
data.tar.gz: 1a98abf5d303f796b5b96c40bd306085d4cde8e2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cbbf6b9033cee7d278c8ef22df5996b6503e485c6fd493620967c0a8c4b2d1fc8acca3b54331f165167435716e4f445c51ea435e4cd7e51370dbc990f4c86fcd
|
7
|
+
data.tar.gz: aa3855bede2446ac3784afa8d9080beb19370cdba44e22899ecd439e83ac022ad2fc8de0bbdc0a2e0db907495ae8f9bc813f0c815617e62ab6c45cdfead731a3
|
data/README.md
CHANGED
@@ -23,6 +23,85 @@ or install it from a source code checkout:
|
|
23
23
|
|
24
24
|
## Extensions
|
25
25
|
|
26
|
+
### Backup
|
27
|
+
|
28
|
+
Backup Elasticsearch indices as flat JSON files on the disk via integration
|
29
|
+
with the [_Backup_](http://backup.github.io/backup/v4/) gem.
|
30
|
+
|
31
|
+
Use the Backup gem's DSL to configure the backup:
|
32
|
+
|
33
|
+
require 'elasticsearch/extensions/backup'
|
34
|
+
|
35
|
+
Model.new(:elasticsearch_backup, 'Elasticsearch') do
|
36
|
+
|
37
|
+
database Elasticsearch do |db|
|
38
|
+
db.url = 'http://localhost:9200'
|
39
|
+
db.indices = 'test'
|
40
|
+
end
|
41
|
+
|
42
|
+
store_with Local do |local|
|
43
|
+
local.path = '/tmp/backups'
|
44
|
+
end
|
45
|
+
|
46
|
+
compress_with Gzip
|
47
|
+
end
|
48
|
+
|
49
|
+
Perform the backup with the Backup gem's command line utility:
|
50
|
+
|
51
|
+
$ backup perform -t elasticsearch_backup
|
52
|
+
|
53
|
+
See more information in the [`Backup::Database::Elasticsearch`](lib/extensions/backup.rb)
|
54
|
+
class documentation.
|
55
|
+
|
56
|
+
### Reindex
|
57
|
+
|
58
|
+
Copy documents from one index and cluster into another one, for example for purposes of changing
|
59
|
+
the settings and mappings of the index.
|
60
|
+
|
61
|
+
**NOTE:** Elasticsearch natively supports re-indexing since version 2.3. This extension is useful
|
62
|
+
when you need the feature on older versions.
|
63
|
+
|
64
|
+
When the extension is loaded together with the
|
65
|
+
[Ruby client for Elasticsearch](../elasticsearch/README.md),
|
66
|
+
a `reindex` method is added to the client:
|
67
|
+
|
68
|
+
require 'elasticsearch'
|
69
|
+
require 'elasticsearch/extensions/reindex'
|
70
|
+
|
71
|
+
client = Elasticsearch::Client.new
|
72
|
+
target_client = Elasticsearch::Client.new url: 'http://localhost:9250', log: true
|
73
|
+
|
74
|
+
client.index index: 'test', type: 'd', body: { title: 'foo' }
|
75
|
+
|
76
|
+
client.reindex source: { index: 'test' },
|
77
|
+
target: { index: 'test', client: target_client },
|
78
|
+
transform: lambda { |doc| doc['_source']['title'].upcase! },
|
79
|
+
refresh: true
|
80
|
+
# => { errors: 0 }
|
81
|
+
|
82
|
+
target_client.search index: 'test'
|
83
|
+
# => ... hits ... "title"=>"FOO"
|
84
|
+
|
85
|
+
The method takes similar arguments as the core API
|
86
|
+
[`reindex`](http://www.rubydoc.info/gems/elasticsearch-api/Elasticsearch/API/Actions#reindex-instance_method)
|
87
|
+
method.
|
88
|
+
|
89
|
+
You can also use the `Reindex` class directly:
|
90
|
+
|
91
|
+
require 'elasticsearch'
|
92
|
+
require 'elasticsearch/extensions/reindex'
|
93
|
+
|
94
|
+
client = Elasticsearch::Client.new
|
95
|
+
|
96
|
+
reindex = Elasticsearch::Extensions::Reindex.new \
|
97
|
+
source: { index: 'test', client: client },
|
98
|
+
target: { index: 'test-copy' }
|
99
|
+
|
100
|
+
reindex.perform
|
101
|
+
|
102
|
+
See more information in the [`Elasticsearch::Extensions::Reindex::Reindex`](lib/extensions/reindex.rb)
|
103
|
+
class documentation.
|
104
|
+
|
26
105
|
### ANSI
|
27
106
|
|
28
107
|
Colorize and format selected Elasticsearch response parts in terminal:
|
@@ -30,7 +30,12 @@ Gem::Specification.new do |s|
|
|
30
30
|
end
|
31
31
|
|
32
32
|
s.add_development_dependency "bundler", "> 1"
|
33
|
-
|
33
|
+
|
34
|
+
if defined?(RUBY_VERSION) && RUBY_VERSION > '1.9'
|
35
|
+
s.add_development_dependency "rake", "~> 11.1"
|
36
|
+
else
|
37
|
+
s.add_development_dependency "rake", "< 11.0"
|
38
|
+
end
|
34
39
|
|
35
40
|
s.add_development_dependency "awesome_print"
|
36
41
|
|
@@ -12,25 +12,25 @@ require 'patron'
|
|
12
12
|
module Backup
|
13
13
|
module Database
|
14
14
|
|
15
|
-
# Integration with the Backup gem [
|
15
|
+
# Integration with the Backup gem [http://backup.github.io/backup/v4/]
|
16
16
|
#
|
17
17
|
# This extension allows to backup Elasticsearch indices as flat JSON files on the disk.
|
18
18
|
#
|
19
|
-
# Use the Backup gem's DSL to configure the backup
|
19
|
+
# @example Use the Backup gem's DSL to configure the backup
|
20
20
|
#
|
21
21
|
# require 'elasticsearch/extensions/backup'
|
22
22
|
#
|
23
23
|
# Model.new(:elasticsearch_backup, 'Elasticsearch') do
|
24
24
|
#
|
25
25
|
# database Elasticsearch do |db|
|
26
|
-
#
|
27
|
-
#
|
28
|
-
#
|
29
|
-
#
|
26
|
+
# db.url = 'http://localhost:9200'
|
27
|
+
# db.indices = 'articles,people'
|
28
|
+
# db.size = 500
|
29
|
+
# db.scroll = '10m'
|
30
30
|
# end
|
31
31
|
#
|
32
32
|
# store_with Local do |local|
|
33
|
-
# local.path = '/
|
33
|
+
# local.path = '/tmp/backups'
|
34
34
|
# local.keep = 3
|
35
35
|
# end
|
36
36
|
#
|
@@ -41,8 +41,33 @@ module Backup
|
|
41
41
|
#
|
42
42
|
# $ backup perform -t elasticsearch_backup
|
43
43
|
#
|
44
|
+
# The Backup gem can store your backup files on S3, Dropbox and other
|
45
|
+
# cloud providers, send notifications about the operation, and so on;
|
46
|
+
# read more in the gem documentation.
|
44
47
|
#
|
45
|
-
#
|
48
|
+
# @example Use the integration as a standalone script (eg. in a Rake task)
|
49
|
+
#
|
50
|
+
# require 'backup'
|
51
|
+
# require 'elasticsearch/extensions/backup'
|
52
|
+
#
|
53
|
+
# Backup::Logger.configure do
|
54
|
+
# logfile.enabled = true
|
55
|
+
# logfile.log_path = '/tmp/backups/log'
|
56
|
+
# end; Backup::Logger.start!
|
57
|
+
#
|
58
|
+
# backup = Backup::Model.new(:elasticsearch, 'Backup Elasticsearch') do
|
59
|
+
# database Backup::Database::Elasticsearch do |db|
|
60
|
+
# db.indices = 'test'
|
61
|
+
# end
|
62
|
+
#
|
63
|
+
# store_with Backup::Storage::Local do |local|
|
64
|
+
# local.path = '/tmp/backups'
|
65
|
+
# end
|
66
|
+
# end
|
67
|
+
#
|
68
|
+
# backup.perform!
|
69
|
+
#
|
70
|
+
# @example A simple recover script for the backup created in the previous examples
|
46
71
|
#
|
47
72
|
# PATH = '/path/to/backup/'
|
48
73
|
#
|
@@ -66,7 +91,7 @@ module Backup
|
|
66
91
|
# client.bulk body: payload
|
67
92
|
# end
|
68
93
|
#
|
69
|
-
# @see http://
|
94
|
+
# @see http://backup.github.io/backup/v4/
|
70
95
|
#
|
71
96
|
class Elasticsearch < Base
|
72
97
|
class Error < ::Backup::Error; end
|
@@ -0,0 +1,160 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Elasticsearch
|
4
|
+
module Extensions
|
5
|
+
|
6
|
+
# This module allows copying documents from one index/cluster to another one
|
7
|
+
#
|
8
|
+
# When required together with the client, it will add the `reindex` method
|
9
|
+
#
|
10
|
+
# @see Reindex::Reindex.initialize
|
11
|
+
# @see Reindex::Reindex#perform
|
12
|
+
#
|
13
|
+
# @see http://www.rubydoc.info/gems/elasticsearch-api/Elasticsearch/API/Actions#reindex-instance_method
|
14
|
+
#
|
15
|
+
module Reindex
|
16
|
+
|
17
|
+
# Initialize a new instance of the Reindex class (shortcut)
|
18
|
+
#
|
19
|
+
# @see Reindex::Reindex.initialize
|
20
|
+
#
|
21
|
+
def new(arguments={})
|
22
|
+
Reindex.new(arguments)
|
23
|
+
end; extend self
|
24
|
+
|
25
|
+
module API
|
26
|
+
# Copy documents from one index into another and refresh the target index
|
27
|
+
#
|
28
|
+
# @example
|
29
|
+
# client.reindex source: { index: 'test1' }, target: { index: 'test2' }, refresh: true
|
30
|
+
#
|
31
|
+
# The method allows all the options as {Reindex::Reindex.new}.
|
32
|
+
#
|
33
|
+
# This method will be mixed into the Elasticsearch client's API, if available.
|
34
|
+
#
|
35
|
+
def reindex(arguments={})
|
36
|
+
arguments[:source] ||= {}
|
37
|
+
arguments[:source][:client] = self
|
38
|
+
Reindex.new(arguments).perform
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# Include the `reindex` method in the API and client, if available
|
43
|
+
Elasticsearch::API::Actions.__send__ :include, API if defined?(Elasticsearch::API::Actions)
|
44
|
+
Elasticsearch::Transport::Client.__send__ :include, API if defined?(Elasticsearch::Transport::Client) && defined?(Elasticsearch::API)
|
45
|
+
|
46
|
+
# Copy documents from one index into another
|
47
|
+
#
|
48
|
+
# @example Copy documents to another index
|
49
|
+
#
|
50
|
+
# client = Elasticsearch::Client.new
|
51
|
+
# reindex = Elasticsearch::Extensions::Reindex.new \
|
52
|
+
# source: { index: 'test1', client: client },
|
53
|
+
# target: { index: 'test2' }
|
54
|
+
#
|
55
|
+
# reindex.perform
|
56
|
+
#
|
57
|
+
# @example Copy documents to a different cluster
|
58
|
+
#
|
59
|
+
# source_client = Elasticsearch::Client.new url: 'http://localhost:9200'
|
60
|
+
# target_client = Elasticsearch::Client.new url: 'http://localhost:9250'
|
61
|
+
#
|
62
|
+
# reindex = Elasticsearch::Extensions::Reindex.new \
|
63
|
+
# source: { index: 'test', client: source_client },
|
64
|
+
# target: { index: 'test', client: target_client }
|
65
|
+
# reindex.perform
|
66
|
+
#
|
67
|
+
# @example Transform the documents during re-indexing
|
68
|
+
#
|
69
|
+
# reindex = Elasticsearch::Extensions::Reindex.new \
|
70
|
+
# source: { index: 'test1', client: client },
|
71
|
+
# target: { index: 'test2', transform: lambda { |doc| doc['_source']['category'].upcase! } }
|
72
|
+
#
|
73
|
+
#
|
74
|
+
# The reindexing process works by "scrolling" an index and sending
|
75
|
+
# batches via the "Bulk" API to the target index/cluster
|
76
|
+
#
|
77
|
+
# @option arguments [String] :source The source index/cluster definition (*Required*)
|
78
|
+
# @option arguments [String] :target The target index/cluster definition (*Required*)
|
79
|
+
# @option arguments [Proc] :transform A block which will be executed for each document
|
80
|
+
# @option arguments [Integer] :batch_size The size of the batch for scroll operation (Default: 1000)
|
81
|
+
# @option arguments [String] :scroll The timeout for the scroll operation (Default: 5min)
|
82
|
+
# @option arguments [Boolean] :refresh Whether to refresh the target index after
|
83
|
+
# the operation is completed (Default: false)
|
84
|
+
#
|
85
|
+
# Be aware, that if you want to change the target index settings and/or mappings,
|
86
|
+
# you have to do so in advance by using the "Indices Create" API.
|
87
|
+
#
|
88
|
+
# Note, that there is a native "Reindex" API in Elasticsearch 2.3.x and higer versions,
|
89
|
+
# which will be more performant than the Ruby version.
|
90
|
+
#
|
91
|
+
# @see http://www.rubydoc.info/gems/elasticsearch-api/Elasticsearch/API/Actions#reindex-instance_method
|
92
|
+
#
|
93
|
+
class Reindex
|
94
|
+
attr_reader :arguments
|
95
|
+
|
96
|
+
def initialize(arguments={})
|
97
|
+
[
|
98
|
+
[:source, :index],
|
99
|
+
[:source, :client],
|
100
|
+
[:target, :index]
|
101
|
+
].each do |required_option|
|
102
|
+
value = required_option.reduce(arguments) { |sum, o| sum = sum[o] ? sum[o] : {} }
|
103
|
+
|
104
|
+
raise ArgumentError,
|
105
|
+
"Required argument '#{Hash[*required_option]}' missing" if \
|
106
|
+
value.respond_to?(:empty?) ? value.empty? : value.nil?
|
107
|
+
end
|
108
|
+
|
109
|
+
@arguments = {
|
110
|
+
batch_size: 1000,
|
111
|
+
scroll: '5m',
|
112
|
+
refresh: false
|
113
|
+
}.merge(arguments)
|
114
|
+
|
115
|
+
arguments[:target][:client] ||= arguments[:source][:client]
|
116
|
+
end
|
117
|
+
|
118
|
+
# Performs the operation
|
119
|
+
#
|
120
|
+
# @return [Hash] A Hash with the information about the operation outcome
|
121
|
+
#
|
122
|
+
def perform
|
123
|
+
output = { errors: 0 }
|
124
|
+
|
125
|
+
response = arguments[:source][:client].search(
|
126
|
+
index: arguments[:source][:index],
|
127
|
+
scroll: arguments[:scroll],
|
128
|
+
size: arguments[:batch_size],
|
129
|
+
search_type: 'scan',
|
130
|
+
fields: ['_source', '_parent', '_routing', '_timestamp']
|
131
|
+
)
|
132
|
+
|
133
|
+
while response = arguments[:source][:client].scroll(scroll_id: response['_scroll_id'], scroll: arguments[:scroll]) do
|
134
|
+
documents = response['hits']['hits']
|
135
|
+
break if documents.empty?
|
136
|
+
|
137
|
+
bulk = documents.map do |doc|
|
138
|
+
doc['_index'] = arguments[:target][:index]
|
139
|
+
|
140
|
+
arguments[:transform].call(doc) if arguments[:transform]
|
141
|
+
|
142
|
+
doc['data'] = doc['_source']
|
143
|
+
doc.delete('_score')
|
144
|
+
doc.delete('_source')
|
145
|
+
|
146
|
+
{ index: doc }
|
147
|
+
end
|
148
|
+
|
149
|
+
bulk_response = arguments[:target][:client].bulk body: bulk
|
150
|
+
output[:errors] += bulk_response['items'].select { |k, v| k.values.first['error'] }.size
|
151
|
+
end
|
152
|
+
|
153
|
+
arguments[:target][:client].indices.refresh index: arguments[:target][:index] if arguments[:refresh]
|
154
|
+
|
155
|
+
output
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
@@ -84,7 +84,7 @@ module Elasticsearch
|
|
84
84
|
arguments[:node_name] ||= ENV.fetch('TEST_CLUSTER_NODE_NAME', 'node')
|
85
85
|
arguments[:path_data] ||= ENV.fetch('TEST_CLUSTER_DATA', '/tmp/elasticsearch_test')
|
86
86
|
arguments[:path_work] ||= ENV.fetch('TEST_CLUSTER_TMP', '/tmp')
|
87
|
-
arguments[:path_logs] ||= ENV.fetch('TEST_CLUSTER_LOGS', '/
|
87
|
+
arguments[:path_logs] ||= ENV.fetch('TEST_CLUSTER_LOGS', '/tmp/log/elasticsearch')
|
88
88
|
arguments[:es_params] ||= ENV.fetch('TEST_CLUSTER_PARAMS', '')
|
89
89
|
arguments[:multicast_enabled] ||= ENV.fetch('TEST_CLUSTER_MULTICAST', 'true')
|
90
90
|
arguments[:timeout] ||= (ENV.fetch('TEST_CLUSTER_TIMEOUT', 30).to_i)
|
@@ -218,16 +218,18 @@ module Elasticsearch
|
|
218
218
|
#
|
219
219
|
# @option arguments [Integer] :on The port on which the node is running.
|
220
220
|
# @option arguments [String] :as The cluster name.
|
221
|
+
# @option arguments [Integer] :num Number of nodes in the cluster.
|
221
222
|
#
|
222
223
|
# @return Boolean
|
223
224
|
#
|
224
225
|
def running?(arguments={})
|
225
|
-
port
|
226
|
-
cluster_name
|
226
|
+
port = arguments[:on] || (ENV['TEST_CLUSTER_PORT'] || 9250).to_i
|
227
|
+
cluster_name = arguments[:as] || (ENV.fetch('TEST_CLUSTER_NAME', @@default_cluster_name).chomp)
|
228
|
+
number_of_nodes = arguments[:num] || (ENV.fetch('TEST_CLUSTER_NODES', @@number_of_nodes)).to_i
|
227
229
|
|
228
230
|
if cluster_health = Timeout::timeout(0.25) { __get_cluster_health(port) } rescue nil
|
229
231
|
return cluster_health['cluster_name'] == cluster_name && \
|
230
|
-
cluster_health['number_of_nodes'] ==
|
232
|
+
cluster_health['number_of_nodes'] == number_of_nodes
|
231
233
|
end
|
232
234
|
return false
|
233
235
|
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
require 'elasticsearch/extensions/reindex'
|
3
|
+
|
4
|
+
class Elasticsearch::Extensions::ReindexIntegrationTest < Elasticsearch::Test::IntegrationTestCase
|
5
|
+
context "The Reindex extension" do
|
6
|
+
setup do
|
7
|
+
@port = (ENV['TEST_CLUSTER_PORT'] || 9250).to_i
|
8
|
+
|
9
|
+
@logger = ::Logger.new(STDERR)
|
10
|
+
@logger.formatter = proc do |severity, datetime, progname, msg|
|
11
|
+
color = case severity
|
12
|
+
when /INFO/ then :green
|
13
|
+
when /ERROR|WARN|FATAL/ then :red
|
14
|
+
when /DEBUG/ then :cyan
|
15
|
+
else :white
|
16
|
+
end
|
17
|
+
ANSI.ansi(severity[0] + ' ', color, :faint) + ANSI.ansi(msg, :white, :faint) + "\n"
|
18
|
+
end
|
19
|
+
|
20
|
+
@client = Elasticsearch::Client.new host: "localhost:#{@port}", logger: @logger
|
21
|
+
@client.indices.delete index: '_all'
|
22
|
+
|
23
|
+
@client.index index: 'test1', type: 'd', id: 1, body: { title: 'TEST 1', category: 'one' }
|
24
|
+
@client.index index: 'test1', type: 'd', id: 2, body: { title: 'TEST 2', category: 'two' }
|
25
|
+
@client.index index: 'test1', type: 'd', id: 3, body: { title: 'TEST 3', category: 'three' }
|
26
|
+
@client.indices.refresh index: 'test1'
|
27
|
+
|
28
|
+
@client.cluster.health wait_for_status: 'yellow'
|
29
|
+
end
|
30
|
+
|
31
|
+
should "copy documents from one index to another" do
|
32
|
+
reindex = Elasticsearch::Extensions::Reindex.new \
|
33
|
+
source: { index: 'test1', client: @client },
|
34
|
+
target: { index: 'test2' },
|
35
|
+
refresh: true
|
36
|
+
|
37
|
+
result = reindex.perform
|
38
|
+
|
39
|
+
assert_equal 0, result[:errors]
|
40
|
+
assert_equal 3, @client.search(index: 'test2')['hits']['total']
|
41
|
+
end
|
42
|
+
|
43
|
+
should "transform documents with a lambda" do
|
44
|
+
reindex = Elasticsearch::Extensions::Reindex.new \
|
45
|
+
source: { index: 'test1', client: @client },
|
46
|
+
target: { index: 'test2' },
|
47
|
+
transform: lambda { |d| d['_source']['category'].upcase! },
|
48
|
+
refresh: true
|
49
|
+
|
50
|
+
result = reindex.perform
|
51
|
+
|
52
|
+
assert_equal 0, result[:errors]
|
53
|
+
assert_equal 3, @client.search(index: 'test2')['hits']['total']
|
54
|
+
assert_equal 'ONE', @client.get(index: 'test2', type: 'd', id: 1)['_source']['category']
|
55
|
+
end
|
56
|
+
|
57
|
+
should "return the number of errors" do
|
58
|
+
@client.indices.create index: 'test3', body: { mappings: { d: { properties: { category: { type: 'integer' } }}}}
|
59
|
+
@client.cluster.health wait_for_status: 'yellow'
|
60
|
+
|
61
|
+
reindex = Elasticsearch::Extensions::Reindex.new \
|
62
|
+
source: { index: 'test1', client: @client },
|
63
|
+
target: { index: 'test3', transform: lambda { |d| d['_source']['category'].upcase!; d } },
|
64
|
+
refresh: true
|
65
|
+
|
66
|
+
result = reindex.perform
|
67
|
+
|
68
|
+
assert_equal 3, result[:errors]
|
69
|
+
assert_equal 0, @client.search(index: 'test3')['hits']['total']
|
70
|
+
end
|
71
|
+
|
72
|
+
should "reindex via the API integration" do
|
73
|
+
@client.reindex source: { index: 'test1' }, target: { index: 'test4' }
|
74
|
+
|
75
|
+
@client.indices.refresh index: 'test4'
|
76
|
+
|
77
|
+
assert_equal 3, @client.search(index: 'test4')['hits']['total']
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
@@ -0,0 +1,106 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
require 'elasticsearch/extensions/reindex'
|
3
|
+
|
4
|
+
class Elasticsearch::Extensions::ReindexTest < Test::Unit::TestCase
|
5
|
+
context "The Reindex extension module" do
|
6
|
+
DEFAULT_OPTIONS = { source: { index: 'foo', client: Object.new }, target: { index: 'bar' } }
|
7
|
+
|
8
|
+
should "require options" do
|
9
|
+
assert_raise ArgumentError do
|
10
|
+
Elasticsearch::Extensions::Reindex.new
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
should "allow to initialize the class" do
|
15
|
+
assert_instance_of Elasticsearch::Extensions::Reindex::Reindex,
|
16
|
+
Elasticsearch::Extensions::Reindex.new(DEFAULT_OPTIONS)
|
17
|
+
end
|
18
|
+
|
19
|
+
should "add the reindex to the API and client" do
|
20
|
+
assert_includes Elasticsearch::API::Actions.public_instance_methods.sort, :reindex
|
21
|
+
assert_respond_to Elasticsearch::Client.new, :reindex
|
22
|
+
end
|
23
|
+
|
24
|
+
should "pass the client when used in API mode" do
|
25
|
+
client = Elasticsearch::Client.new
|
26
|
+
|
27
|
+
Elasticsearch::Extensions::Reindex::Reindex
|
28
|
+
.expects(:new)
|
29
|
+
.with({source: { client: client }})
|
30
|
+
.returns(stub perform: {})
|
31
|
+
|
32
|
+
client.reindex
|
33
|
+
end
|
34
|
+
|
35
|
+
context "when performing the operation" do
|
36
|
+
setup do
|
37
|
+
d = { '_id' => 'foo', '_type' => 'type', '_source' => { 'foo' => 'bar' } }
|
38
|
+
@default_response = { 'hits' => { 'hits' => [d] } }
|
39
|
+
@empty_response = { 'hits' => { 'hits' => [] } }
|
40
|
+
@bulk_request = [{ index: {
|
41
|
+
'_index' => 'bar',
|
42
|
+
'_type' => d['_type'],
|
43
|
+
'_id' => d['_id'],
|
44
|
+
'data' => d['_source']
|
45
|
+
} }]
|
46
|
+
@bulk_response = {'errors'=>false, 'items' => [{'index' => {}}]}
|
47
|
+
@bulk_response_error = {'errors'=>true, 'items' => [{'index' => {}},{'index' => {'error' => 'FOOBAR'}}]}
|
48
|
+
end
|
49
|
+
|
50
|
+
should "scroll through the index and save batches in bulk" do
|
51
|
+
client = mock()
|
52
|
+
subject = Elasticsearch::Extensions::Reindex.new source: { index: 'foo', client: client },
|
53
|
+
target: { index: 'bar' }
|
54
|
+
|
55
|
+
client.expects(:search).returns({ '_scroll_id' => 'scroll_id_1' })
|
56
|
+
client.expects(:scroll).returns(@default_response)
|
57
|
+
.then.returns(@empty_response)
|
58
|
+
.times(2)
|
59
|
+
client.expects(:bulk).with(body: @bulk_request).returns(@bulk_response)
|
60
|
+
|
61
|
+
result = subject.perform
|
62
|
+
|
63
|
+
assert_equal 0, result[:errors]
|
64
|
+
end
|
65
|
+
|
66
|
+
should "return the number of errors" do
|
67
|
+
client = mock()
|
68
|
+
subject = Elasticsearch::Extensions::Reindex.new source: { index: 'foo', client: client },
|
69
|
+
target: { index: 'bar' }
|
70
|
+
|
71
|
+
client.expects(:search).returns({ '_scroll_id' => 'scroll_id_1' })
|
72
|
+
client.expects(:scroll).returns(@default_response)
|
73
|
+
.then.returns(@empty_response)
|
74
|
+
.times(2)
|
75
|
+
client.expects(:bulk).with(body: @bulk_request).returns(@bulk_response_error)
|
76
|
+
|
77
|
+
result = subject.perform
|
78
|
+
|
79
|
+
assert_equal 1, result[:errors]
|
80
|
+
end
|
81
|
+
|
82
|
+
should "transform the documents with a lambda" do
|
83
|
+
client = mock()
|
84
|
+
subject = Elasticsearch::Extensions::Reindex.new \
|
85
|
+
source: { index: 'foo', client: client },
|
86
|
+
target: { index: 'bar' },
|
87
|
+
transform: lambda { |d| d['_source']['foo'].upcase!; d }
|
88
|
+
|
89
|
+
client.expects(:search).returns({ '_scroll_id' => 'scroll_id_1' })
|
90
|
+
client.expects(:scroll).returns(@default_response)
|
91
|
+
.then.returns(@empty_response)
|
92
|
+
.times(2)
|
93
|
+
client.expects(:bulk).with do |arguments|
|
94
|
+
assert_equal 'BAR', arguments[:body][0][:index]['data']['foo']
|
95
|
+
true
|
96
|
+
end
|
97
|
+
.returns(@bulk_response)
|
98
|
+
|
99
|
+
result = subject.perform
|
100
|
+
|
101
|
+
assert_equal 0, result[:errors]
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
end
|
106
|
+
end
|
data/test/test_helper.rb
CHANGED
@@ -23,12 +23,24 @@ require 'shoulda-context'
|
|
23
23
|
require 'mocha/setup'
|
24
24
|
require 'ansi/code'
|
25
25
|
require 'turn' unless ENV["TM_FILEPATH"] || ENV["NOTURN"] || RUBY_1_8
|
26
|
+
require 'logger'
|
26
27
|
|
27
28
|
require 'elasticsearch/extensions'
|
29
|
+
require 'elasticsearch/extensions/test/startup_shutdown'
|
30
|
+
require 'elasticsearch/extensions/test/cluster'
|
28
31
|
|
29
32
|
module Elasticsearch
|
30
33
|
module Test
|
31
34
|
class IntegrationTestCase < ::Test::Unit::TestCase
|
35
|
+
extend Elasticsearch::Extensions::Test::StartupShutdown
|
36
|
+
|
37
|
+
startup do
|
38
|
+
Elasticsearch::Extensions::Test::Cluster.start(nodes: 2) if ENV['SERVER'] and not Elasticsearch::Extensions::Test::Cluster.running?
|
39
|
+
end
|
40
|
+
|
41
|
+
shutdown do
|
42
|
+
Elasticsearch::Extensions::Test::Cluster.stop if ENV['SERVER'] and Elasticsearch::Extensions::Test::Cluster.running?
|
43
|
+
end
|
32
44
|
end
|
33
45
|
end
|
34
46
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: elasticsearch-extensions
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.21
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Karel Minarik
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-05-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ansi
|
@@ -70,16 +70,16 @@ dependencies:
|
|
70
70
|
name: rake
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- - "
|
73
|
+
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: '
|
75
|
+
version: '11.1'
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- - "
|
80
|
+
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: '
|
82
|
+
version: '11.1'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: awesome_print
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -268,6 +268,7 @@ files:
|
|
268
268
|
- lib/elasticsearch/extensions/ansi/helpers.rb
|
269
269
|
- lib/elasticsearch/extensions/ansi/response.rb
|
270
270
|
- lib/elasticsearch/extensions/backup.rb
|
271
|
+
- lib/elasticsearch/extensions/reindex.rb
|
271
272
|
- lib/elasticsearch/extensions/test/cluster.rb
|
272
273
|
- lib/elasticsearch/extensions/test/cluster/tasks.rb
|
273
274
|
- lib/elasticsearch/extensions/test/profiling.rb
|
@@ -275,6 +276,8 @@ files:
|
|
275
276
|
- lib/elasticsearch/extensions/version.rb
|
276
277
|
- test/ansi/unit/ansi_test.rb
|
277
278
|
- test/backup/unit/backup_test.rb
|
279
|
+
- test/reindex/integration/reindex_test.rb
|
280
|
+
- test/reindex/unit/reindex_test.rb
|
278
281
|
- test/test_helper.rb
|
279
282
|
homepage: ''
|
280
283
|
licenses:
|
@@ -303,5 +306,7 @@ summary: Extensions for the Elasticsearch Rubygem
|
|
303
306
|
test_files:
|
304
307
|
- test/ansi/unit/ansi_test.rb
|
305
308
|
- test/backup/unit/backup_test.rb
|
309
|
+
- test/reindex/integration/reindex_test.rb
|
310
|
+
- test/reindex/unit/reindex_test.rb
|
306
311
|
- test/test_helper.rb
|
307
312
|
has_rdoc:
|