fluent-plugin-droonga 0.7.0 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -4
- data/benchmark/watch/benchmark-notify.rb +2 -2
- data/benchmark/watch/benchmark-scan.rb +3 -0
- data/benchmark/watch/fluentd.conf +0 -1
- data/fluent-plugin-droonga.gemspec +2 -3
- data/lib/droonga/catalog.rb +10 -124
- data/lib/droonga/catalog/base.rb +140 -0
- data/lib/droonga/catalog/version1.rb +23 -0
- data/lib/droonga/catalog_loader.rb +33 -0
- data/lib/droonga/collector.rb +2 -71
- data/lib/droonga/collector_plugin.rb +2 -34
- data/lib/droonga/dispatcher.rb +141 -196
- data/lib/droonga/distribution_planner.rb +76 -0
- data/lib/droonga/distributor.rb +5 -7
- data/lib/droonga/distributor_plugin.rb +23 -15
- data/lib/droonga/engine.rb +2 -2
- data/lib/droonga/event_loop.rb +46 -0
- data/lib/droonga/farm.rb +9 -5
- data/lib/droonga/fluent_message_sender.rb +84 -0
- data/lib/droonga/forwarder.rb +43 -53
- data/lib/droonga/handler.rb +20 -68
- data/lib/droonga/handler_message.rb +61 -0
- data/lib/droonga/handler_messenger.rb +92 -0
- data/lib/droonga/handler_plugin.rb +10 -12
- data/lib/droonga/input_adapter.rb +52 -0
- data/lib/droonga/{adapter.rb → input_adapter_plugin.rb} +7 -13
- data/lib/droonga/input_message.rb +11 -11
- data/lib/droonga/logger.rb +4 -3
- data/lib/droonga/message_pack_packer.rb +62 -0
- data/lib/droonga/message_processing_error.rb +54 -0
- data/lib/droonga/message_pusher.rb +60 -0
- data/lib/droonga/message_receiver.rb +61 -0
- data/lib/droonga/output_adapter.rb +53 -0
- data/lib/droonga/{adapter_plugin.rb → output_adapter_plugin.rb} +3 -21
- data/lib/droonga/output_message.rb +37 -0
- data/lib/droonga/partition.rb +27 -5
- data/lib/droonga/pluggable.rb +9 -4
- data/lib/droonga/plugin.rb +12 -3
- data/lib/droonga/plugin/collector/basic.rb +91 -18
- data/lib/droonga/plugin/distributor/crud.rb +9 -9
- data/lib/droonga/plugin/distributor/distributed_search_planner.rb +401 -0
- data/lib/droonga/plugin/distributor/groonga.rb +5 -5
- data/lib/droonga/plugin/distributor/search.rb +4 -246
- data/lib/droonga/plugin/distributor/watch.rb +11 -6
- data/lib/droonga/plugin/handler/add.rb +69 -7
- data/lib/droonga/plugin/handler/groonga.rb +6 -6
- data/lib/droonga/plugin/handler/search.rb +5 -3
- data/lib/droonga/plugin/handler/watch.rb +19 -13
- data/lib/droonga/plugin/{adapter → input_adapter}/groonga.rb +5 -11
- data/lib/droonga/plugin/{adapter → input_adapter}/groonga/select.rb +2 -36
- data/lib/droonga/plugin/output_adapter/groonga.rb +30 -0
- data/lib/droonga/plugin/output_adapter/groonga/select.rb +54 -0
- data/lib/droonga/plugin_loader.rb +2 -2
- data/lib/droonga/processor.rb +21 -23
- data/lib/droonga/replier.rb +40 -0
- data/lib/droonga/searcher.rb +298 -174
- data/lib/droonga/server.rb +0 -67
- data/lib/droonga/session.rb +85 -0
- data/lib/droonga/test.rb +21 -0
- data/lib/droonga/test/stub_distributor.rb +31 -0
- data/lib/droonga/test/stub_handler.rb +37 -0
- data/lib/droonga/test/stub_handler_message.rb +35 -0
- data/lib/droonga/test/stub_handler_messenger.rb +34 -0
- data/lib/droonga/time_formatter.rb +37 -0
- data/lib/droonga/watcher.rb +1 -0
- data/lib/droonga/worker.rb +16 -19
- data/lib/fluent/plugin/out_droonga.rb +9 -9
- data/lib/groonga_command_converter.rb +5 -5
- data/sample/cluster/catalog.json +1 -1
- data/test/command/config/default/catalog.json +19 -1
- data/test/command/fixture/event.jsons +41 -0
- data/test/command/fixture/user-table.jsons +9 -0
- data/test/command/run-test.rb +2 -2
- data/test/command/suite/add/error/invalid-integer.expected +20 -0
- data/test/command/suite/add/error/invalid-integer.test +12 -0
- data/test/command/suite/add/error/invalid-time.expected +20 -0
- data/test/command/suite/add/error/invalid-time.test +12 -0
- data/test/command/suite/add/error/missing-key.expected +13 -0
- data/test/command/suite/add/error/missing-key.test +16 -0
- data/test/command/suite/add/error/missing-table.expected +13 -0
- data/test/command/suite/add/error/missing-table.test +16 -0
- data/test/command/suite/add/error/unknown-column.expected +20 -0
- data/test/command/suite/add/error/unknown-column.test +12 -0
- data/test/command/suite/add/error/unknown-table.expected +13 -0
- data/test/command/suite/add/error/unknown-table.test +17 -0
- data/test/command/suite/add/minimum.expected +1 -3
- data/test/command/suite/add/with-values.expected +1 -3
- data/test/command/suite/add/without-key.expected +1 -3
- data/test/command/suite/message/error/missing-dataset.expected +13 -0
- data/test/command/suite/message/error/missing-dataset.test +5 -0
- data/test/command/suite/message/error/unknown-command.expected +13 -0
- data/test/command/suite/message/error/unknown-command.test +6 -0
- data/test/command/suite/message/error/unknown-dataset.expected +13 -0
- data/test/command/suite/message/error/unknown-dataset.test +6 -0
- data/test/command/suite/search/{array-attribute-label.expected → attributes/array.expected} +0 -0
- data/test/command/suite/search/{array-attribute-label.test → attributes/array.test} +0 -0
- data/test/command/suite/search/{hash-attribute-label.expected → attributes/hash.expected} +0 -0
- data/test/command/suite/search/{hash-attribute-label.test → attributes/hash.test} +0 -0
- data/test/command/suite/search/{condition-nested.expected → condition/nested.expected} +0 -0
- data/test/command/suite/search/{condition-nested.test → condition/nested.test} +0 -0
- data/test/command/suite/search/{condition-query.expected → condition/query.expected} +0 -0
- data/test/command/suite/search/{condition-query.test → condition/query.test} +0 -0
- data/test/command/suite/search/{condition-script.expected → condition/script.expected} +0 -0
- data/test/command/suite/search/{condition-script.test → condition/script.test} +0 -0
- data/test/command/suite/search/error/cyclic-source.expected +18 -0
- data/test/command/suite/search/error/cyclic-source.test +12 -0
- data/test/command/suite/search/error/deeply-cyclic-source.expected +21 -0
- data/test/command/suite/search/error/deeply-cyclic-source.test +15 -0
- data/test/command/suite/search/error/missing-source-parameter.expected +17 -0
- data/test/command/suite/search/error/missing-source-parameter.test +11 -0
- data/test/command/suite/search/error/unknown-source.expected +18 -0
- data/test/command/suite/search/error/unknown-source.test +12 -0
- data/test/command/suite/search/{minimum.expected → group/count.expected} +2 -1
- data/test/command/suite/search/{minimum.test → group/count.test} +5 -3
- data/test/command/suite/search/group/limit.expected +19 -0
- data/test/command/suite/search/group/limit.test +20 -0
- data/test/command/suite/search/group/string.expected +36 -0
- data/test/command/suite/search/group/string.test +44 -0
- data/test/command/suite/search/{chained-queries.expected → multiple/chained.expected} +0 -0
- data/test/command/suite/search/{chained-queries.test → multiple/chained.test} +0 -0
- data/test/command/suite/search/{multiple-queries.expected → multiple/parallel.expected} +0 -0
- data/test/command/suite/search/{multiple-queries.test → multiple/parallel.test} +0 -0
- data/test/command/suite/search/{output-range.expected → range/only-output.expected} +0 -0
- data/test/command/suite/search/{output-range.test → range/only-output.test} +0 -0
- data/test/command/suite/search/{sort-range.expected → range/only-sort.expected} +0 -0
- data/test/command/suite/search/{sort-range.test → range/only-sort.test} +0 -0
- data/test/command/suite/search/{sort-and-output-range.expected → range/sort-and-output.expected} +0 -0
- data/test/command/suite/search/{sort-and-output-range.test → range/sort-and-output.test} +0 -0
- data/test/command/suite/search/range/too-large-output-offset.expected +16 -0
- data/test/command/suite/search/range/too-large-output-offset.test +25 -0
- data/test/command/suite/search/range/too-large-sort-offset.expected +16 -0
- data/test/command/suite/search/range/too-large-sort-offset.test +28 -0
- data/test/command/suite/search/response/records/value/time.expected +24 -0
- data/test/command/suite/search/response/records/value/time.test +24 -0
- data/test/command/suite/search/sort/default-offset-limit.expected +43 -0
- data/test/command/suite/search/sort/default-offset-limit.test +26 -0
- data/test/command/suite/search/{sort-with-invisible-column.expected → sort/invisible-column.expected} +0 -0
- data/test/command/suite/search/{sort-with-invisible-column.test → sort/invisible-column.test} +0 -0
- data/test/command/suite/watch/subscribe.expected +12 -0
- data/test/command/suite/watch/subscribe.test +9 -0
- data/test/command/suite/watch/unsubscribe.expected +12 -0
- data/test/command/suite/watch/unsubscribe.test +9 -0
- data/test/unit/{test_catalog.rb → catalog/test_version1.rb} +12 -4
- data/test/unit/fixtures/{catalog.json → catalog/version1.json} +0 -0
- data/test/unit/helper.rb +2 -0
- data/test/unit/plugin/collector/test_basic.rb +289 -33
- data/test/unit/plugin/distributor/test_search.rb +176 -861
- data/test/unit/plugin/distributor/test_search_planner.rb +1102 -0
- data/test/unit/plugin/handler/groonga/test_column_create.rb +17 -13
- data/test/unit/plugin/handler/groonga/test_table_create.rb +10 -10
- data/test/unit/plugin/handler/test_add.rb +74 -11
- data/test/unit/plugin/handler/test_groonga.rb +15 -1
- data/test/unit/plugin/handler/test_search.rb +33 -17
- data/test/unit/plugin/handler/test_watch.rb +43 -27
- data/test/unit/run-test.rb +2 -0
- data/test/unit/test_message_pack_packer.rb +51 -0
- data/test/unit/test_time_formatter.rb +29 -0
- metadata +208 -110
- data/lib/droonga/job_queue.rb +0 -87
- data/lib/droonga/job_queue_schema.rb +0 -65
- data/test/unit/test_adapter.rb +0 -51
- data/test/unit/test_job_queue_schema.rb +0 -45
data/lib/droonga/pluggable.rb
CHANGED
@@ -17,6 +17,14 @@
|
|
17
17
|
|
18
18
|
module Droonga
|
19
19
|
module Pluggable
|
20
|
+
class UnknownPlugin < StandardError
|
21
|
+
attr_reader :command
|
22
|
+
|
23
|
+
def initialize(command)
|
24
|
+
@command = command
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
20
28
|
def shutdown
|
21
29
|
$log.trace("#{log_tag}: shutdown: plugin: start")
|
22
30
|
@plugins.each do |plugin|
|
@@ -33,10 +41,7 @@ module Droonga
|
|
33
41
|
plugin = find_plugin(command)
|
34
42
|
$log.trace("#{log_tag}: process: start: <#{command}>",
|
35
43
|
:plugin => plugin.class)
|
36
|
-
if plugin.nil?
|
37
|
-
raise "unknown plugin: <#{command}>: " +
|
38
|
-
"TODO: improve error handling"
|
39
|
-
end
|
44
|
+
raise UnknownPlugin.new(command) if plugin.nil?
|
40
45
|
plugin.process(command, *arguments)
|
41
46
|
$log.trace("#{log_tag}: process: done: <#{command}>",
|
42
47
|
:plugin => plugin.class)
|
data/lib/droonga/plugin.rb
CHANGED
@@ -33,11 +33,20 @@ module Droonga
|
|
33
33
|
end
|
34
34
|
|
35
35
|
def process(command, *arguments)
|
36
|
-
|
36
|
+
run_command(command, *arguments)
|
37
37
|
rescue => exception
|
38
|
+
process_error(command, exception, arguments)
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
def run_command(command, *arguments)
|
43
|
+
__send__(self.class.method_name(command), *arguments)
|
44
|
+
end
|
45
|
+
|
46
|
+
def process_error(command, error, arguments)
|
38
47
|
Logger.error("error while processing #{command}",
|
39
|
-
|
40
|
-
|
48
|
+
error,
|
49
|
+
:arguments => arguments)
|
41
50
|
end
|
42
51
|
end
|
43
52
|
end
|
@@ -27,21 +27,47 @@ module Droonga
|
|
27
27
|
def collector_gather(result)
|
28
28
|
output = body ? body[input_name] : input_name
|
29
29
|
if output.is_a?(Hash)
|
30
|
-
|
31
|
-
if
|
32
|
-
|
33
|
-
|
30
|
+
elements = output["elements"]
|
31
|
+
if elements && elements.is_a?(Hash)
|
32
|
+
# phase 1: pre-process
|
33
|
+
elements.each do |element, mapper|
|
34
|
+
case mapper["type"]
|
35
|
+
when "count"
|
36
|
+
result[element] = result[mapper["target"]].size
|
37
|
+
when "sort"
|
38
|
+
# do nothing on this phase!
|
39
|
+
end
|
40
|
+
end
|
41
|
+
# phase 2: post-process
|
42
|
+
elements.each do |element, mapper|
|
43
|
+
if mapper["no_output"]
|
44
|
+
result.delete(element)
|
45
|
+
next
|
46
|
+
end
|
47
|
+
|
48
|
+
case mapper["type"]
|
49
|
+
when "count"
|
50
|
+
# do nothing on this phase!
|
51
|
+
when "sort"
|
52
|
+
# because "count" type mapper requires all items of the array,
|
53
|
+
# I have to apply "sort" type mapper later.
|
54
|
+
if result[element]
|
55
|
+
result[element] = apply_output_range(result[element], mapper)
|
56
|
+
result[element] = apply_output_attributes_and_format(result[element], mapper)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
34
60
|
end
|
35
61
|
output = output["output"]
|
36
62
|
end
|
37
|
-
emit(
|
63
|
+
emit(output, result)
|
38
64
|
end
|
39
65
|
|
40
66
|
def apply_output_range(items, output)
|
41
67
|
if items && items.is_a?(Array)
|
42
68
|
offset = output["offset"] || 0
|
43
69
|
unless offset.zero?
|
44
|
-
items = items[offset..-1]
|
70
|
+
items = items[offset..-1] || []
|
45
71
|
end
|
46
72
|
|
47
73
|
limit = output["limit"] || 0
|
@@ -80,7 +106,7 @@ module Droonga
|
|
80
106
|
value = request
|
81
107
|
old_value = output_values[output]
|
82
108
|
value = reduce(elements, old_value, request) if old_value
|
83
|
-
emit(
|
109
|
+
emit(output, value)
|
84
110
|
end
|
85
111
|
end
|
86
112
|
|
@@ -93,7 +119,10 @@ module Droonga
|
|
93
119
|
when "sum"
|
94
120
|
reduced_values = values[0][key] + values[1][key]
|
95
121
|
when "sort"
|
96
|
-
reduced_values = merge(values[0][key],
|
122
|
+
reduced_values = merge(values[0][key],
|
123
|
+
values[1][key],
|
124
|
+
:operators => deal["operators"],
|
125
|
+
:key_column => deal["key_column"])
|
97
126
|
end
|
98
127
|
|
99
128
|
reduced_values = apply_output_range(reduced_values, "limit" => deal["limit"])
|
@@ -103,16 +132,10 @@ module Droonga
|
|
103
132
|
return result
|
104
133
|
end
|
105
134
|
|
106
|
-
def merge(x, y,
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
if operator.is_a?(String)
|
111
|
-
{ "operator" => operator }
|
112
|
-
else
|
113
|
-
operator
|
114
|
-
end
|
115
|
-
end
|
135
|
+
def merge(x, y, options={})
|
136
|
+
operators = options[:operators] = normalize_operators(options[:operators])
|
137
|
+
|
138
|
+
unify_by_key!(x, y, options)
|
116
139
|
|
117
140
|
index = 0
|
118
141
|
y.each do |_y|
|
@@ -128,6 +151,17 @@ module Droonga
|
|
128
151
|
return x
|
129
152
|
end
|
130
153
|
|
154
|
+
def normalize_operators(operators)
|
155
|
+
operators ||= []
|
156
|
+
operators.collect do |operator|
|
157
|
+
if operator.is_a?(String)
|
158
|
+
{ "operator" => operator }
|
159
|
+
else
|
160
|
+
operator
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
131
165
|
def compare(x, y, operators)
|
132
166
|
operators.each_with_index do |operator, index|
|
133
167
|
column = operator["column"] || index
|
@@ -138,5 +172,44 @@ module Droonga
|
|
138
172
|
end
|
139
173
|
return false
|
140
174
|
end
|
175
|
+
|
176
|
+
def unify_by_key!(base_items, unified_items, options={})
|
177
|
+
key_column_index = options[:key_column]
|
178
|
+
return unless key_column_index
|
179
|
+
|
180
|
+
# The unified records must be smaller than the base, because
|
181
|
+
# I sort unified records at last. I want to sort only smaller array.
|
182
|
+
if base_items.size < unified_items.size
|
183
|
+
base_items, unified_items = unified_items, base_items
|
184
|
+
end
|
185
|
+
|
186
|
+
rest_unified_items = unified_items.dup
|
187
|
+
|
188
|
+
base_items.reject! do |base_item|
|
189
|
+
key = base_item[key_column_index]
|
190
|
+
rest_unified_items.any? do |unified_item|
|
191
|
+
if unified_item[key_column_index] == key
|
192
|
+
base_item.each_with_index do |value, column|
|
193
|
+
next if column == key_column_index
|
194
|
+
unified_item[column] += value
|
195
|
+
end
|
196
|
+
rest_unified_items -= [unified_item]
|
197
|
+
true
|
198
|
+
else
|
199
|
+
false
|
200
|
+
end
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
unless rest_unified_items.size == unified_items.size
|
205
|
+
unified_items.sort! do |a, b|
|
206
|
+
if compare(a, b, options[:operators])
|
207
|
+
-1
|
208
|
+
else
|
209
|
+
1
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
213
|
+
end
|
141
214
|
end
|
142
215
|
end
|
@@ -22,22 +22,22 @@ module Droonga
|
|
22
22
|
repository.register("crud", self)
|
23
23
|
|
24
24
|
command :add
|
25
|
-
def add(
|
26
|
-
key =
|
27
|
-
scatter_all(
|
25
|
+
def add(message)
|
26
|
+
key = message["body"]["key"] || rand.to_s
|
27
|
+
scatter_all(message, key)
|
28
28
|
end
|
29
29
|
|
30
30
|
command :update
|
31
|
-
def update(
|
32
|
-
key =
|
33
|
-
scatter_all(
|
31
|
+
def update(message)
|
32
|
+
key = message["body"]["key"] || rand.to_s
|
33
|
+
scatter_all(message, key)
|
34
34
|
end
|
35
35
|
|
36
36
|
# TODO: What is this?
|
37
37
|
command :reset
|
38
|
-
def reset(
|
39
|
-
key =
|
40
|
-
scatter_all(
|
38
|
+
def reset(message)
|
39
|
+
key = message["body"]["key"] || rand.to_s
|
40
|
+
scatter_all(message, key)
|
41
41
|
end
|
42
42
|
end
|
43
43
|
end
|
@@ -0,0 +1,401 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
#
|
3
|
+
# Copyright (C) 2013 Droonga Project
|
4
|
+
#
|
5
|
+
# This library is free software; you can redistribute it and/or
|
6
|
+
# modify it under the terms of the GNU Lesser General Public
|
7
|
+
# License version 2.1 as published by the Free Software Foundation.
|
8
|
+
#
|
9
|
+
# This library is distributed in the hope that it will be useful,
|
10
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
11
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
12
|
+
# Lesser General Public License for more details.
|
13
|
+
#
|
14
|
+
# You should have received a copy of the GNU Lesser General Public
|
15
|
+
# License along with this library; if not, write to the Free Software
|
16
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
17
|
+
|
18
|
+
module Droonga
|
19
|
+
class DistributedSearchPlanner
|
20
|
+
attr_reader :messages
|
21
|
+
|
22
|
+
def initialize(search_request_message)
|
23
|
+
@source_message = search_request_message
|
24
|
+
@request = @source_message["body"]
|
25
|
+
@queries = @request["queries"]
|
26
|
+
|
27
|
+
@input_names = []
|
28
|
+
@output_names = []
|
29
|
+
@output_mappers = {}
|
30
|
+
@messages = []
|
31
|
+
|
32
|
+
build_messages
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
UNLIMITED = -1
|
37
|
+
|
38
|
+
def build_messages
|
39
|
+
Searcher::QuerySorter.validate_dependencies(@queries)
|
40
|
+
|
41
|
+
ensure_unifiable!
|
42
|
+
|
43
|
+
@queries.each do |input_name, query|
|
44
|
+
transform_query(input_name, query)
|
45
|
+
end
|
46
|
+
|
47
|
+
gatherer = {
|
48
|
+
"type" => "gather",
|
49
|
+
"body" => @output_mappers,
|
50
|
+
"inputs" => @output_names, # XXX should be placed in the "body"?
|
51
|
+
"post" => true, # XXX should be placed in the "body"?
|
52
|
+
}
|
53
|
+
@messages << gatherer
|
54
|
+
searcher = {
|
55
|
+
"type" => "broadcast",
|
56
|
+
"command" => "search", # XXX should be placed in the "body"?
|
57
|
+
"dataset" => @source_message["dataset"] || @request["dataset"],
|
58
|
+
"body" => @request,
|
59
|
+
"outputs" => @input_names, # XXX should be placed in the "body"?
|
60
|
+
"replica" => "random", # XXX should be placed in the "body"?
|
61
|
+
}
|
62
|
+
@messages.push(searcher)
|
63
|
+
end
|
64
|
+
|
65
|
+
def ensure_unifiable!
|
66
|
+
@queries.each do |name, query|
|
67
|
+
if unifiable?(name) && query["output"]
|
68
|
+
query["output"]["unifiable"] = true
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def unifiable?(name)
|
74
|
+
query = @queries[name]
|
75
|
+
return true if query["groupBy"]
|
76
|
+
name = query["source"]
|
77
|
+
return false unless @queries.keys.include?(name)
|
78
|
+
unifiable?(name)
|
79
|
+
end
|
80
|
+
|
81
|
+
def transform_query(input_name, query)
|
82
|
+
output = query["output"]
|
83
|
+
|
84
|
+
# Skip reducing phase for a result with no output.
|
85
|
+
if output.nil? or
|
86
|
+
output["elements"].nil? or
|
87
|
+
(!output["elements"].include?("count") &&
|
88
|
+
!output["elements"].include?("records"))
|
89
|
+
return
|
90
|
+
end
|
91
|
+
|
92
|
+
@input_names << input_name
|
93
|
+
output_name = input_name + "_reduced"
|
94
|
+
@output_names << output_name
|
95
|
+
|
96
|
+
transformer = QueryTransformer.new(query)
|
97
|
+
|
98
|
+
reducer = {
|
99
|
+
"type" => "reduce",
|
100
|
+
"body" => {
|
101
|
+
input_name => {
|
102
|
+
output_name => transformer.reducers,
|
103
|
+
},
|
104
|
+
},
|
105
|
+
"inputs" => [input_name], # XXX should be placed in the "body"?
|
106
|
+
"outputs" => [output_name], # XXX should be placed in the "body"?
|
107
|
+
}
|
108
|
+
@messages << reducer
|
109
|
+
|
110
|
+
@output_mappers[output_name] = {
|
111
|
+
"output" => input_name,
|
112
|
+
"elements" => transformer.mappers,
|
113
|
+
}
|
114
|
+
end
|
115
|
+
|
116
|
+
class QueryTransformer
|
117
|
+
attr_reader :reducers, :mappers
|
118
|
+
|
119
|
+
def initialize(query)
|
120
|
+
@query = query
|
121
|
+
@output = @query["output"]
|
122
|
+
@reducers = {}
|
123
|
+
@mappers = {}
|
124
|
+
@output_records = true
|
125
|
+
transform!
|
126
|
+
end
|
127
|
+
|
128
|
+
def transform!
|
129
|
+
# The collector module supports only "simple" format search results.
|
130
|
+
# So we have to override the format and restore it on the gathering
|
131
|
+
# phase.
|
132
|
+
@records_format = @output["format"] || "simple"
|
133
|
+
@output["format"] = "simple"
|
134
|
+
|
135
|
+
@sort_keys = @query["sortBy"] || []
|
136
|
+
@sort_keys = @sort_keys["keys"] || [] if @sort_keys.is_a?(Hash)
|
137
|
+
|
138
|
+
calculate_offset_and_limit!
|
139
|
+
build_count_mapper_and_reducer!
|
140
|
+
build_records_mapper_and_reducer!
|
141
|
+
end
|
142
|
+
|
143
|
+
def calculate_offset_and_limit!
|
144
|
+
@original_sort_offset = sort_offset
|
145
|
+
@original_output_offset = output_offset
|
146
|
+
@original_sort_limit = sort_limit
|
147
|
+
@original_output_limit = output_limit
|
148
|
+
|
149
|
+
calculate_sort_offset!
|
150
|
+
calculate_output_offset!
|
151
|
+
|
152
|
+
# We have to calculate limit based on offset.
|
153
|
+
# <A, B = limited integer (0...MAXINT)>
|
154
|
+
# | sort limit | output limit | => | worker's sort limit | worker's output limit | final limit |
|
155
|
+
# ============================= ====================================================================
|
156
|
+
# | UNLIMITED | UNLIMITED | => | UNLIMITED | UNLIMITED | UNLIMITED |
|
157
|
+
# | UNLIMITED | B | => | final_offset + B | final_offset + B | B |
|
158
|
+
# | A | UNLIMITED | => | final_offset + A | final_offset + A | A |
|
159
|
+
# | A | B | => | final_offset + max(A, B) | final_offset + min(A, B)| min(A, B) |
|
160
|
+
|
161
|
+
# XXX final_limit and final_offset calculated in many times
|
162
|
+
|
163
|
+
@records_offset = final_offset
|
164
|
+
@records_limit = final_limit
|
165
|
+
|
166
|
+
if final_limit == UNLIMITED
|
167
|
+
@output["limit"] = UNLIMITED
|
168
|
+
else
|
169
|
+
if rich_sort?
|
170
|
+
@query["sortBy"]["limit"] = final_offset + [sort_limit, output_limit].max
|
171
|
+
end
|
172
|
+
@output["limit"] = final_offset + final_limit
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
def calculate_sort_offset!
|
177
|
+
# Offset for workers must be zero, because we have to apply "limit" and
|
178
|
+
# "offset" on the last gathering phase instead of each reducing phase.
|
179
|
+
if rich_sort?
|
180
|
+
@query["sortBy"]["offset"] = 0
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
def sort_offset
|
185
|
+
if rich_sort?
|
186
|
+
@query["sortBy"]["offset"] || 0
|
187
|
+
else
|
188
|
+
0
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
def output_offset
|
193
|
+
@output["offset"] || 0
|
194
|
+
end
|
195
|
+
|
196
|
+
def sort_limit
|
197
|
+
if rich_sort?
|
198
|
+
@query["sortBy"]["limit"] || UNLIMITED
|
199
|
+
else
|
200
|
+
UNLIMITED
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
def output_limit
|
205
|
+
@output["limit"] || 0
|
206
|
+
end
|
207
|
+
|
208
|
+
def calculate_output_offset!
|
209
|
+
@output["offset"] = 0 if have_records?
|
210
|
+
end
|
211
|
+
|
212
|
+
def final_offset
|
213
|
+
@original_sort_offset + @original_output_offset
|
214
|
+
end
|
215
|
+
|
216
|
+
def final_limit
|
217
|
+
if @original_sort_limit == UNLIMITED && @original_output_limit == UNLIMITED
|
218
|
+
UNLIMITED
|
219
|
+
else
|
220
|
+
if @original_sort_limit == UNLIMITED
|
221
|
+
@original_output_limit
|
222
|
+
elsif @original_output_limit == UNLIMITED
|
223
|
+
@original_sort_limit
|
224
|
+
else
|
225
|
+
[@original_sort_limit, @original_output_limit].min
|
226
|
+
end
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
def have_records?
|
231
|
+
@output["elements"].include?("records")
|
232
|
+
end
|
233
|
+
|
234
|
+
def rich_sort?
|
235
|
+
@query["sortBy"].is_a?(Hash)
|
236
|
+
end
|
237
|
+
|
238
|
+
def unifiable?
|
239
|
+
@output["unifiable"]
|
240
|
+
end
|
241
|
+
|
242
|
+
def build_count_mapper_and_reducer!
|
243
|
+
return unless @output["elements"].include?("count")
|
244
|
+
|
245
|
+
@reducers["count"] = {
|
246
|
+
"type" => "sum",
|
247
|
+
}
|
248
|
+
if unifiable?
|
249
|
+
@query["sortBy"]["limit"] = -1 if @query["sortBy"].is_a?(Hash)
|
250
|
+
@output["limit"] = -1
|
251
|
+
mapper = {
|
252
|
+
"type" => "count",
|
253
|
+
"target" => "records",
|
254
|
+
}
|
255
|
+
unless @output["elements"].include?("records")
|
256
|
+
@records_limit = -1
|
257
|
+
@output["elements"] << "records"
|
258
|
+
@output["attributes"] ||= ["_key"]
|
259
|
+
@output_records = false
|
260
|
+
end
|
261
|
+
@mappers["count"] = mapper
|
262
|
+
end
|
263
|
+
end
|
264
|
+
|
265
|
+
def build_records_mapper_and_reducer!
|
266
|
+
# Skip reducing phase for a result with no record output.
|
267
|
+
return if !@output["elements"].include?("records") || @records_limit.zero?
|
268
|
+
|
269
|
+
# Append sort key attributes to the list of output attributes
|
270
|
+
# temporarily, for the reducing phase. After all extra columns
|
271
|
+
# are removed on the gathering phase.
|
272
|
+
final_attributes = output_attribute_names
|
273
|
+
update_output_attributes!
|
274
|
+
|
275
|
+
@reducers["records"] = build_records_reducer
|
276
|
+
|
277
|
+
mapper = {
|
278
|
+
"type" => "sort",
|
279
|
+
"offset" => @records_offset,
|
280
|
+
"limit" => @records_limit,
|
281
|
+
"format" => @records_format,
|
282
|
+
"attributes" => final_attributes,
|
283
|
+
}
|
284
|
+
mapper["no_output"] = true unless @output_records
|
285
|
+
@mappers["records"] = mapper
|
286
|
+
end
|
287
|
+
|
288
|
+
def output_attribute_names
|
289
|
+
attributes = @output["attributes"] || []
|
290
|
+
if attributes.is_a?(Hash)
|
291
|
+
attributes.keys
|
292
|
+
else
|
293
|
+
attributes.collect do |attribute|
|
294
|
+
if attribute.is_a?(Hash)
|
295
|
+
attribute["label"] || attribute["source"]
|
296
|
+
else
|
297
|
+
attribute
|
298
|
+
end
|
299
|
+
end
|
300
|
+
end
|
301
|
+
end
|
302
|
+
|
303
|
+
def update_output_attributes!
|
304
|
+
@output["attributes"] = array_style_attributes
|
305
|
+
@output["attributes"] += sort_attribute_names
|
306
|
+
if unifiable? && !@output["attributes"].include?("_key")
|
307
|
+
@output["attributes"] << "_key"
|
308
|
+
end
|
309
|
+
end
|
310
|
+
|
311
|
+
def array_style_attributes
|
312
|
+
attributes = @output["attributes"] || []
|
313
|
+
if attributes.is_a?(Hash)
|
314
|
+
attributes.keys.collect do |key|
|
315
|
+
attribute = attributes[key]
|
316
|
+
case attribute
|
317
|
+
when String
|
318
|
+
{
|
319
|
+
"label" => key,
|
320
|
+
"source" => attribute,
|
321
|
+
}
|
322
|
+
when Hash
|
323
|
+
attribute["label"] = key
|
324
|
+
attribute
|
325
|
+
end
|
326
|
+
end
|
327
|
+
else
|
328
|
+
attributes
|
329
|
+
end
|
330
|
+
end
|
331
|
+
|
332
|
+
def source_column_names
|
333
|
+
attributes = @output["attributes"] || []
|
334
|
+
if attributes.is_a?(Hash)
|
335
|
+
attributes_hash = attributes
|
336
|
+
attributes = []
|
337
|
+
attributes_hash.each do |key, attribute|
|
338
|
+
attributes << attribute["source"] || key
|
339
|
+
end
|
340
|
+
attributes
|
341
|
+
else
|
342
|
+
attributes.collect do |attribute|
|
343
|
+
if attribute.is_a?(Hash)
|
344
|
+
attribute["source"] || attribute["label"]
|
345
|
+
else
|
346
|
+
attribute
|
347
|
+
end
|
348
|
+
end
|
349
|
+
end
|
350
|
+
end
|
351
|
+
|
352
|
+
def sort_attribute_names
|
353
|
+
sort_attributes = @sort_keys.collect do |key|
|
354
|
+
key = key[1..-1] if key[0] == "-"
|
355
|
+
key
|
356
|
+
end
|
357
|
+
attributes = source_column_names
|
358
|
+
sort_attributes.reject! do |attribute|
|
359
|
+
attributes.include?(attribute)
|
360
|
+
end
|
361
|
+
sort_attributes
|
362
|
+
end
|
363
|
+
|
364
|
+
ASCENDING_OPERATOR = "<".freeze
|
365
|
+
DESCENDING_OPERATOR = ">".freeze
|
366
|
+
|
367
|
+
def build_records_reducer
|
368
|
+
attributes = @output["attributes"]
|
369
|
+
key_column_index = attributes.index("_key")
|
370
|
+
|
371
|
+
operators = @sort_keys.collect do |sort_key|
|
372
|
+
operator = ASCENDING_OPERATOR
|
373
|
+
if sort_key[0] == "-"
|
374
|
+
operator = DESCENDING_OPERATOR
|
375
|
+
sort_key = sort_key[1..-1]
|
376
|
+
end
|
377
|
+
{
|
378
|
+
"operator" => operator,
|
379
|
+
"column" => attributes.index(sort_key),
|
380
|
+
}
|
381
|
+
end
|
382
|
+
|
383
|
+
reducer = {
|
384
|
+
"type" => "sort",
|
385
|
+
"operators" => operators,
|
386
|
+
}
|
387
|
+
if unifiable? && !key_column_index.nil?
|
388
|
+
reducer["key_column"] = key_column_index
|
389
|
+
end
|
390
|
+
|
391
|
+
# On the reducing phase, we apply only "limit". We cannot apply
|
392
|
+
# "offset" on this phase because the collector merges a pair of
|
393
|
+
# results step by step even if there are three or more results.
|
394
|
+
# Instead, we apply "offset" on the gathering phase.
|
395
|
+
reducer["limit"] = @output["limit"]
|
396
|
+
|
397
|
+
reducer
|
398
|
+
end
|
399
|
+
end
|
400
|
+
end
|
401
|
+
end
|