fluent-plugin-droonga 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -4
- data/benchmark/watch/benchmark-notify.rb +2 -2
- data/benchmark/watch/benchmark-scan.rb +3 -0
- data/benchmark/watch/fluentd.conf +0 -1
- data/fluent-plugin-droonga.gemspec +2 -3
- data/lib/droonga/catalog.rb +10 -124
- data/lib/droonga/catalog/base.rb +140 -0
- data/lib/droonga/catalog/version1.rb +23 -0
- data/lib/droonga/catalog_loader.rb +33 -0
- data/lib/droonga/collector.rb +2 -71
- data/lib/droonga/collector_plugin.rb +2 -34
- data/lib/droonga/dispatcher.rb +141 -196
- data/lib/droonga/distribution_planner.rb +76 -0
- data/lib/droonga/distributor.rb +5 -7
- data/lib/droonga/distributor_plugin.rb +23 -15
- data/lib/droonga/engine.rb +2 -2
- data/lib/droonga/event_loop.rb +46 -0
- data/lib/droonga/farm.rb +9 -5
- data/lib/droonga/fluent_message_sender.rb +84 -0
- data/lib/droonga/forwarder.rb +43 -53
- data/lib/droonga/handler.rb +20 -68
- data/lib/droonga/handler_message.rb +61 -0
- data/lib/droonga/handler_messenger.rb +92 -0
- data/lib/droonga/handler_plugin.rb +10 -12
- data/lib/droonga/input_adapter.rb +52 -0
- data/lib/droonga/{adapter.rb → input_adapter_plugin.rb} +7 -13
- data/lib/droonga/input_message.rb +11 -11
- data/lib/droonga/logger.rb +4 -3
- data/lib/droonga/message_pack_packer.rb +62 -0
- data/lib/droonga/message_processing_error.rb +54 -0
- data/lib/droonga/message_pusher.rb +60 -0
- data/lib/droonga/message_receiver.rb +61 -0
- data/lib/droonga/output_adapter.rb +53 -0
- data/lib/droonga/{adapter_plugin.rb → output_adapter_plugin.rb} +3 -21
- data/lib/droonga/output_message.rb +37 -0
- data/lib/droonga/partition.rb +27 -5
- data/lib/droonga/pluggable.rb +9 -4
- data/lib/droonga/plugin.rb +12 -3
- data/lib/droonga/plugin/collector/basic.rb +91 -18
- data/lib/droonga/plugin/distributor/crud.rb +9 -9
- data/lib/droonga/plugin/distributor/distributed_search_planner.rb +401 -0
- data/lib/droonga/plugin/distributor/groonga.rb +5 -5
- data/lib/droonga/plugin/distributor/search.rb +4 -246
- data/lib/droonga/plugin/distributor/watch.rb +11 -6
- data/lib/droonga/plugin/handler/add.rb +69 -7
- data/lib/droonga/plugin/handler/groonga.rb +6 -6
- data/lib/droonga/plugin/handler/search.rb +5 -3
- data/lib/droonga/plugin/handler/watch.rb +19 -13
- data/lib/droonga/plugin/{adapter → input_adapter}/groonga.rb +5 -11
- data/lib/droonga/plugin/{adapter → input_adapter}/groonga/select.rb +2 -36
- data/lib/droonga/plugin/output_adapter/groonga.rb +30 -0
- data/lib/droonga/plugin/output_adapter/groonga/select.rb +54 -0
- data/lib/droonga/plugin_loader.rb +2 -2
- data/lib/droonga/processor.rb +21 -23
- data/lib/droonga/replier.rb +40 -0
- data/lib/droonga/searcher.rb +298 -174
- data/lib/droonga/server.rb +0 -67
- data/lib/droonga/session.rb +85 -0
- data/lib/droonga/test.rb +21 -0
- data/lib/droonga/test/stub_distributor.rb +31 -0
- data/lib/droonga/test/stub_handler.rb +37 -0
- data/lib/droonga/test/stub_handler_message.rb +35 -0
- data/lib/droonga/test/stub_handler_messenger.rb +34 -0
- data/lib/droonga/time_formatter.rb +37 -0
- data/lib/droonga/watcher.rb +1 -0
- data/lib/droonga/worker.rb +16 -19
- data/lib/fluent/plugin/out_droonga.rb +9 -9
- data/lib/groonga_command_converter.rb +5 -5
- data/sample/cluster/catalog.json +1 -1
- data/test/command/config/default/catalog.json +19 -1
- data/test/command/fixture/event.jsons +41 -0
- data/test/command/fixture/user-table.jsons +9 -0
- data/test/command/run-test.rb +2 -2
- data/test/command/suite/add/error/invalid-integer.expected +20 -0
- data/test/command/suite/add/error/invalid-integer.test +12 -0
- data/test/command/suite/add/error/invalid-time.expected +20 -0
- data/test/command/suite/add/error/invalid-time.test +12 -0
- data/test/command/suite/add/error/missing-key.expected +13 -0
- data/test/command/suite/add/error/missing-key.test +16 -0
- data/test/command/suite/add/error/missing-table.expected +13 -0
- data/test/command/suite/add/error/missing-table.test +16 -0
- data/test/command/suite/add/error/unknown-column.expected +20 -0
- data/test/command/suite/add/error/unknown-column.test +12 -0
- data/test/command/suite/add/error/unknown-table.expected +13 -0
- data/test/command/suite/add/error/unknown-table.test +17 -0
- data/test/command/suite/add/minimum.expected +1 -3
- data/test/command/suite/add/with-values.expected +1 -3
- data/test/command/suite/add/without-key.expected +1 -3
- data/test/command/suite/message/error/missing-dataset.expected +13 -0
- data/test/command/suite/message/error/missing-dataset.test +5 -0
- data/test/command/suite/message/error/unknown-command.expected +13 -0
- data/test/command/suite/message/error/unknown-command.test +6 -0
- data/test/command/suite/message/error/unknown-dataset.expected +13 -0
- data/test/command/suite/message/error/unknown-dataset.test +6 -0
- data/test/command/suite/search/{array-attribute-label.expected → attributes/array.expected} +0 -0
- data/test/command/suite/search/{array-attribute-label.test → attributes/array.test} +0 -0
- data/test/command/suite/search/{hash-attribute-label.expected → attributes/hash.expected} +0 -0
- data/test/command/suite/search/{hash-attribute-label.test → attributes/hash.test} +0 -0
- data/test/command/suite/search/{condition-nested.expected → condition/nested.expected} +0 -0
- data/test/command/suite/search/{condition-nested.test → condition/nested.test} +0 -0
- data/test/command/suite/search/{condition-query.expected → condition/query.expected} +0 -0
- data/test/command/suite/search/{condition-query.test → condition/query.test} +0 -0
- data/test/command/suite/search/{condition-script.expected → condition/script.expected} +0 -0
- data/test/command/suite/search/{condition-script.test → condition/script.test} +0 -0
- data/test/command/suite/search/error/cyclic-source.expected +18 -0
- data/test/command/suite/search/error/cyclic-source.test +12 -0
- data/test/command/suite/search/error/deeply-cyclic-source.expected +21 -0
- data/test/command/suite/search/error/deeply-cyclic-source.test +15 -0
- data/test/command/suite/search/error/missing-source-parameter.expected +17 -0
- data/test/command/suite/search/error/missing-source-parameter.test +11 -0
- data/test/command/suite/search/error/unknown-source.expected +18 -0
- data/test/command/suite/search/error/unknown-source.test +12 -0
- data/test/command/suite/search/{minimum.expected → group/count.expected} +2 -1
- data/test/command/suite/search/{minimum.test → group/count.test} +5 -3
- data/test/command/suite/search/group/limit.expected +19 -0
- data/test/command/suite/search/group/limit.test +20 -0
- data/test/command/suite/search/group/string.expected +36 -0
- data/test/command/suite/search/group/string.test +44 -0
- data/test/command/suite/search/{chained-queries.expected → multiple/chained.expected} +0 -0
- data/test/command/suite/search/{chained-queries.test → multiple/chained.test} +0 -0
- data/test/command/suite/search/{multiple-queries.expected → multiple/parallel.expected} +0 -0
- data/test/command/suite/search/{multiple-queries.test → multiple/parallel.test} +0 -0
- data/test/command/suite/search/{output-range.expected → range/only-output.expected} +0 -0
- data/test/command/suite/search/{output-range.test → range/only-output.test} +0 -0
- data/test/command/suite/search/{sort-range.expected → range/only-sort.expected} +0 -0
- data/test/command/suite/search/{sort-range.test → range/only-sort.test} +0 -0
- data/test/command/suite/search/{sort-and-output-range.expected → range/sort-and-output.expected} +0 -0
- data/test/command/suite/search/{sort-and-output-range.test → range/sort-and-output.test} +0 -0
- data/test/command/suite/search/range/too-large-output-offset.expected +16 -0
- data/test/command/suite/search/range/too-large-output-offset.test +25 -0
- data/test/command/suite/search/range/too-large-sort-offset.expected +16 -0
- data/test/command/suite/search/range/too-large-sort-offset.test +28 -0
- data/test/command/suite/search/response/records/value/time.expected +24 -0
- data/test/command/suite/search/response/records/value/time.test +24 -0
- data/test/command/suite/search/sort/default-offset-limit.expected +43 -0
- data/test/command/suite/search/sort/default-offset-limit.test +26 -0
- data/test/command/suite/search/{sort-with-invisible-column.expected → sort/invisible-column.expected} +0 -0
- data/test/command/suite/search/{sort-with-invisible-column.test → sort/invisible-column.test} +0 -0
- data/test/command/suite/watch/subscribe.expected +12 -0
- data/test/command/suite/watch/subscribe.test +9 -0
- data/test/command/suite/watch/unsubscribe.expected +12 -0
- data/test/command/suite/watch/unsubscribe.test +9 -0
- data/test/unit/{test_catalog.rb → catalog/test_version1.rb} +12 -4
- data/test/unit/fixtures/{catalog.json → catalog/version1.json} +0 -0
- data/test/unit/helper.rb +2 -0
- data/test/unit/plugin/collector/test_basic.rb +289 -33
- data/test/unit/plugin/distributor/test_search.rb +176 -861
- data/test/unit/plugin/distributor/test_search_planner.rb +1102 -0
- data/test/unit/plugin/handler/groonga/test_column_create.rb +17 -13
- data/test/unit/plugin/handler/groonga/test_table_create.rb +10 -10
- data/test/unit/plugin/handler/test_add.rb +74 -11
- data/test/unit/plugin/handler/test_groonga.rb +15 -1
- data/test/unit/plugin/handler/test_search.rb +33 -17
- data/test/unit/plugin/handler/test_watch.rb +43 -27
- data/test/unit/run-test.rb +2 -0
- data/test/unit/test_message_pack_packer.rb +51 -0
- data/test/unit/test_time_formatter.rb +29 -0
- metadata +208 -110
- data/lib/droonga/job_queue.rb +0 -87
- data/lib/droonga/job_queue_schema.rb +0 -65
- data/test/unit/test_adapter.rb +0 -51
- data/test/unit/test_job_queue_schema.rb +0 -45
data/lib/droonga/pluggable.rb
CHANGED
@@ -17,6 +17,14 @@
|
|
17
17
|
|
18
18
|
module Droonga
|
19
19
|
module Pluggable
|
20
|
+
class UnknownPlugin < StandardError
|
21
|
+
attr_reader :command
|
22
|
+
|
23
|
+
def initialize(command)
|
24
|
+
@command = command
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
20
28
|
def shutdown
|
21
29
|
$log.trace("#{log_tag}: shutdown: plugin: start")
|
22
30
|
@plugins.each do |plugin|
|
@@ -33,10 +41,7 @@ module Droonga
|
|
33
41
|
plugin = find_plugin(command)
|
34
42
|
$log.trace("#{log_tag}: process: start: <#{command}>",
|
35
43
|
:plugin => plugin.class)
|
36
|
-
if plugin.nil?
|
37
|
-
raise "unknown plugin: <#{command}>: " +
|
38
|
-
"TODO: improve error handling"
|
39
|
-
end
|
44
|
+
raise UnknownPlugin.new(command) if plugin.nil?
|
40
45
|
plugin.process(command, *arguments)
|
41
46
|
$log.trace("#{log_tag}: process: done: <#{command}>",
|
42
47
|
:plugin => plugin.class)
|
data/lib/droonga/plugin.rb
CHANGED
@@ -33,11 +33,20 @@ module Droonga
|
|
33
33
|
end
|
34
34
|
|
35
35
|
def process(command, *arguments)
|
36
|
-
|
36
|
+
run_command(command, *arguments)
|
37
37
|
rescue => exception
|
38
|
+
process_error(command, exception, arguments)
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
def run_command(command, *arguments)
|
43
|
+
__send__(self.class.method_name(command), *arguments)
|
44
|
+
end
|
45
|
+
|
46
|
+
def process_error(command, error, arguments)
|
38
47
|
Logger.error("error while processing #{command}",
|
39
|
-
|
40
|
-
|
48
|
+
error,
|
49
|
+
:arguments => arguments)
|
41
50
|
end
|
42
51
|
end
|
43
52
|
end
|
@@ -27,21 +27,47 @@ module Droonga
|
|
27
27
|
def collector_gather(result)
|
28
28
|
output = body ? body[input_name] : input_name
|
29
29
|
if output.is_a?(Hash)
|
30
|
-
|
31
|
-
if
|
32
|
-
|
33
|
-
|
30
|
+
elements = output["elements"]
|
31
|
+
if elements && elements.is_a?(Hash)
|
32
|
+
# phase 1: pre-process
|
33
|
+
elements.each do |element, mapper|
|
34
|
+
case mapper["type"]
|
35
|
+
when "count"
|
36
|
+
result[element] = result[mapper["target"]].size
|
37
|
+
when "sort"
|
38
|
+
# do nothing on this phase!
|
39
|
+
end
|
40
|
+
end
|
41
|
+
# phase 2: post-process
|
42
|
+
elements.each do |element, mapper|
|
43
|
+
if mapper["no_output"]
|
44
|
+
result.delete(element)
|
45
|
+
next
|
46
|
+
end
|
47
|
+
|
48
|
+
case mapper["type"]
|
49
|
+
when "count"
|
50
|
+
# do nothing on this phase!
|
51
|
+
when "sort"
|
52
|
+
# because "count" type mapper requires all items of the array,
|
53
|
+
# I have to apply "sort" type mapper later.
|
54
|
+
if result[element]
|
55
|
+
result[element] = apply_output_range(result[element], mapper)
|
56
|
+
result[element] = apply_output_attributes_and_format(result[element], mapper)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
34
60
|
end
|
35
61
|
output = output["output"]
|
36
62
|
end
|
37
|
-
emit(
|
63
|
+
emit(output, result)
|
38
64
|
end
|
39
65
|
|
40
66
|
def apply_output_range(items, output)
|
41
67
|
if items && items.is_a?(Array)
|
42
68
|
offset = output["offset"] || 0
|
43
69
|
unless offset.zero?
|
44
|
-
items = items[offset..-1]
|
70
|
+
items = items[offset..-1] || []
|
45
71
|
end
|
46
72
|
|
47
73
|
limit = output["limit"] || 0
|
@@ -80,7 +106,7 @@ module Droonga
|
|
80
106
|
value = request
|
81
107
|
old_value = output_values[output]
|
82
108
|
value = reduce(elements, old_value, request) if old_value
|
83
|
-
emit(
|
109
|
+
emit(output, value)
|
84
110
|
end
|
85
111
|
end
|
86
112
|
|
@@ -93,7 +119,10 @@ module Droonga
|
|
93
119
|
when "sum"
|
94
120
|
reduced_values = values[0][key] + values[1][key]
|
95
121
|
when "sort"
|
96
|
-
reduced_values = merge(values[0][key],
|
122
|
+
reduced_values = merge(values[0][key],
|
123
|
+
values[1][key],
|
124
|
+
:operators => deal["operators"],
|
125
|
+
:key_column => deal["key_column"])
|
97
126
|
end
|
98
127
|
|
99
128
|
reduced_values = apply_output_range(reduced_values, "limit" => deal["limit"])
|
@@ -103,16 +132,10 @@ module Droonga
|
|
103
132
|
return result
|
104
133
|
end
|
105
134
|
|
106
|
-
def merge(x, y,
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
if operator.is_a?(String)
|
111
|
-
{ "operator" => operator }
|
112
|
-
else
|
113
|
-
operator
|
114
|
-
end
|
115
|
-
end
|
135
|
+
def merge(x, y, options={})
|
136
|
+
operators = options[:operators] = normalize_operators(options[:operators])
|
137
|
+
|
138
|
+
unify_by_key!(x, y, options)
|
116
139
|
|
117
140
|
index = 0
|
118
141
|
y.each do |_y|
|
@@ -128,6 +151,17 @@ module Droonga
|
|
128
151
|
return x
|
129
152
|
end
|
130
153
|
|
154
|
+
def normalize_operators(operators)
|
155
|
+
operators ||= []
|
156
|
+
operators.collect do |operator|
|
157
|
+
if operator.is_a?(String)
|
158
|
+
{ "operator" => operator }
|
159
|
+
else
|
160
|
+
operator
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
131
165
|
def compare(x, y, operators)
|
132
166
|
operators.each_with_index do |operator, index|
|
133
167
|
column = operator["column"] || index
|
@@ -138,5 +172,44 @@ module Droonga
|
|
138
172
|
end
|
139
173
|
return false
|
140
174
|
end
|
175
|
+
|
176
|
+
def unify_by_key!(base_items, unified_items, options={})
|
177
|
+
key_column_index = options[:key_column]
|
178
|
+
return unless key_column_index
|
179
|
+
|
180
|
+
# The unified records must be smaller than the base, because
|
181
|
+
# I sort unified records at last. I want to sort only smaller array.
|
182
|
+
if base_items.size < unified_items.size
|
183
|
+
base_items, unified_items = unified_items, base_items
|
184
|
+
end
|
185
|
+
|
186
|
+
rest_unified_items = unified_items.dup
|
187
|
+
|
188
|
+
base_items.reject! do |base_item|
|
189
|
+
key = base_item[key_column_index]
|
190
|
+
rest_unified_items.any? do |unified_item|
|
191
|
+
if unified_item[key_column_index] == key
|
192
|
+
base_item.each_with_index do |value, column|
|
193
|
+
next if column == key_column_index
|
194
|
+
unified_item[column] += value
|
195
|
+
end
|
196
|
+
rest_unified_items -= [unified_item]
|
197
|
+
true
|
198
|
+
else
|
199
|
+
false
|
200
|
+
end
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
unless rest_unified_items.size == unified_items.size
|
205
|
+
unified_items.sort! do |a, b|
|
206
|
+
if compare(a, b, options[:operators])
|
207
|
+
-1
|
208
|
+
else
|
209
|
+
1
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
213
|
+
end
|
141
214
|
end
|
142
215
|
end
|
@@ -22,22 +22,22 @@ module Droonga
|
|
22
22
|
repository.register("crud", self)
|
23
23
|
|
24
24
|
command :add
|
25
|
-
def add(
|
26
|
-
key =
|
27
|
-
scatter_all(
|
25
|
+
def add(message)
|
26
|
+
key = message["body"]["key"] || rand.to_s
|
27
|
+
scatter_all(message, key)
|
28
28
|
end
|
29
29
|
|
30
30
|
command :update
|
31
|
-
def update(
|
32
|
-
key =
|
33
|
-
scatter_all(
|
31
|
+
def update(message)
|
32
|
+
key = message["body"]["key"] || rand.to_s
|
33
|
+
scatter_all(message, key)
|
34
34
|
end
|
35
35
|
|
36
36
|
# TODO: What is this?
|
37
37
|
command :reset
|
38
|
-
def reset(
|
39
|
-
key =
|
40
|
-
scatter_all(
|
38
|
+
def reset(message)
|
39
|
+
key = message["body"]["key"] || rand.to_s
|
40
|
+
scatter_all(message, key)
|
41
41
|
end
|
42
42
|
end
|
43
43
|
end
|
@@ -0,0 +1,401 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
#
|
3
|
+
# Copyright (C) 2013 Droonga Project
|
4
|
+
#
|
5
|
+
# This library is free software; you can redistribute it and/or
|
6
|
+
# modify it under the terms of the GNU Lesser General Public
|
7
|
+
# License version 2.1 as published by the Free Software Foundation.
|
8
|
+
#
|
9
|
+
# This library is distributed in the hope that it will be useful,
|
10
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
11
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
12
|
+
# Lesser General Public License for more details.
|
13
|
+
#
|
14
|
+
# You should have received a copy of the GNU Lesser General Public
|
15
|
+
# License along with this library; if not, write to the Free Software
|
16
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
17
|
+
|
18
|
+
module Droonga
|
19
|
+
class DistributedSearchPlanner
|
20
|
+
attr_reader :messages
|
21
|
+
|
22
|
+
def initialize(search_request_message)
|
23
|
+
@source_message = search_request_message
|
24
|
+
@request = @source_message["body"]
|
25
|
+
@queries = @request["queries"]
|
26
|
+
|
27
|
+
@input_names = []
|
28
|
+
@output_names = []
|
29
|
+
@output_mappers = {}
|
30
|
+
@messages = []
|
31
|
+
|
32
|
+
build_messages
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
UNLIMITED = -1
|
37
|
+
|
38
|
+
def build_messages
|
39
|
+
Searcher::QuerySorter.validate_dependencies(@queries)
|
40
|
+
|
41
|
+
ensure_unifiable!
|
42
|
+
|
43
|
+
@queries.each do |input_name, query|
|
44
|
+
transform_query(input_name, query)
|
45
|
+
end
|
46
|
+
|
47
|
+
gatherer = {
|
48
|
+
"type" => "gather",
|
49
|
+
"body" => @output_mappers,
|
50
|
+
"inputs" => @output_names, # XXX should be placed in the "body"?
|
51
|
+
"post" => true, # XXX should be placed in the "body"?
|
52
|
+
}
|
53
|
+
@messages << gatherer
|
54
|
+
searcher = {
|
55
|
+
"type" => "broadcast",
|
56
|
+
"command" => "search", # XXX should be placed in the "body"?
|
57
|
+
"dataset" => @source_message["dataset"] || @request["dataset"],
|
58
|
+
"body" => @request,
|
59
|
+
"outputs" => @input_names, # XXX should be placed in the "body"?
|
60
|
+
"replica" => "random", # XXX should be placed in the "body"?
|
61
|
+
}
|
62
|
+
@messages.push(searcher)
|
63
|
+
end
|
64
|
+
|
65
|
+
def ensure_unifiable!
|
66
|
+
@queries.each do |name, query|
|
67
|
+
if unifiable?(name) && query["output"]
|
68
|
+
query["output"]["unifiable"] = true
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def unifiable?(name)
|
74
|
+
query = @queries[name]
|
75
|
+
return true if query["groupBy"]
|
76
|
+
name = query["source"]
|
77
|
+
return false unless @queries.keys.include?(name)
|
78
|
+
unifiable?(name)
|
79
|
+
end
|
80
|
+
|
81
|
+
def transform_query(input_name, query)
|
82
|
+
output = query["output"]
|
83
|
+
|
84
|
+
# Skip reducing phase for a result with no output.
|
85
|
+
if output.nil? or
|
86
|
+
output["elements"].nil? or
|
87
|
+
(!output["elements"].include?("count") &&
|
88
|
+
!output["elements"].include?("records"))
|
89
|
+
return
|
90
|
+
end
|
91
|
+
|
92
|
+
@input_names << input_name
|
93
|
+
output_name = input_name + "_reduced"
|
94
|
+
@output_names << output_name
|
95
|
+
|
96
|
+
transformer = QueryTransformer.new(query)
|
97
|
+
|
98
|
+
reducer = {
|
99
|
+
"type" => "reduce",
|
100
|
+
"body" => {
|
101
|
+
input_name => {
|
102
|
+
output_name => transformer.reducers,
|
103
|
+
},
|
104
|
+
},
|
105
|
+
"inputs" => [input_name], # XXX should be placed in the "body"?
|
106
|
+
"outputs" => [output_name], # XXX should be placed in the "body"?
|
107
|
+
}
|
108
|
+
@messages << reducer
|
109
|
+
|
110
|
+
@output_mappers[output_name] = {
|
111
|
+
"output" => input_name,
|
112
|
+
"elements" => transformer.mappers,
|
113
|
+
}
|
114
|
+
end
|
115
|
+
|
116
|
+
class QueryTransformer
|
117
|
+
attr_reader :reducers, :mappers
|
118
|
+
|
119
|
+
def initialize(query)
|
120
|
+
@query = query
|
121
|
+
@output = @query["output"]
|
122
|
+
@reducers = {}
|
123
|
+
@mappers = {}
|
124
|
+
@output_records = true
|
125
|
+
transform!
|
126
|
+
end
|
127
|
+
|
128
|
+
def transform!
|
129
|
+
# The collector module supports only "simple" format search results.
|
130
|
+
# So we have to override the format and restore it on the gathering
|
131
|
+
# phase.
|
132
|
+
@records_format = @output["format"] || "simple"
|
133
|
+
@output["format"] = "simple"
|
134
|
+
|
135
|
+
@sort_keys = @query["sortBy"] || []
|
136
|
+
@sort_keys = @sort_keys["keys"] || [] if @sort_keys.is_a?(Hash)
|
137
|
+
|
138
|
+
calculate_offset_and_limit!
|
139
|
+
build_count_mapper_and_reducer!
|
140
|
+
build_records_mapper_and_reducer!
|
141
|
+
end
|
142
|
+
|
143
|
+
def calculate_offset_and_limit!
|
144
|
+
@original_sort_offset = sort_offset
|
145
|
+
@original_output_offset = output_offset
|
146
|
+
@original_sort_limit = sort_limit
|
147
|
+
@original_output_limit = output_limit
|
148
|
+
|
149
|
+
calculate_sort_offset!
|
150
|
+
calculate_output_offset!
|
151
|
+
|
152
|
+
# We have to calculate limit based on offset.
|
153
|
+
# <A, B = limited integer (0...MAXINT)>
|
154
|
+
# | sort limit | output limit | => | worker's sort limit | worker's output limit | final limit |
|
155
|
+
# ============================= ====================================================================
|
156
|
+
# | UNLIMITED | UNLIMITED | => | UNLIMITED | UNLIMITED | UNLIMITED |
|
157
|
+
# | UNLIMITED | B | => | final_offset + B | final_offset + B | B |
|
158
|
+
# | A | UNLIMITED | => | final_offset + A | final_offset + A | A |
|
159
|
+
# | A | B | => | final_offset + max(A, B) | final_offset + min(A, B)| min(A, B) |
|
160
|
+
|
161
|
+
# XXX final_limit and final_offset calculated in many times
|
162
|
+
|
163
|
+
@records_offset = final_offset
|
164
|
+
@records_limit = final_limit
|
165
|
+
|
166
|
+
if final_limit == UNLIMITED
|
167
|
+
@output["limit"] = UNLIMITED
|
168
|
+
else
|
169
|
+
if rich_sort?
|
170
|
+
@query["sortBy"]["limit"] = final_offset + [sort_limit, output_limit].max
|
171
|
+
end
|
172
|
+
@output["limit"] = final_offset + final_limit
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
def calculate_sort_offset!
|
177
|
+
# Offset for workers must be zero, because we have to apply "limit" and
|
178
|
+
# "offset" on the last gathering phase instead of each reducing phase.
|
179
|
+
if rich_sort?
|
180
|
+
@query["sortBy"]["offset"] = 0
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
def sort_offset
|
185
|
+
if rich_sort?
|
186
|
+
@query["sortBy"]["offset"] || 0
|
187
|
+
else
|
188
|
+
0
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
def output_offset
|
193
|
+
@output["offset"] || 0
|
194
|
+
end
|
195
|
+
|
196
|
+
def sort_limit
|
197
|
+
if rich_sort?
|
198
|
+
@query["sortBy"]["limit"] || UNLIMITED
|
199
|
+
else
|
200
|
+
UNLIMITED
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
def output_limit
|
205
|
+
@output["limit"] || 0
|
206
|
+
end
|
207
|
+
|
208
|
+
def calculate_output_offset!
|
209
|
+
@output["offset"] = 0 if have_records?
|
210
|
+
end
|
211
|
+
|
212
|
+
def final_offset
|
213
|
+
@original_sort_offset + @original_output_offset
|
214
|
+
end
|
215
|
+
|
216
|
+
def final_limit
|
217
|
+
if @original_sort_limit == UNLIMITED && @original_output_limit == UNLIMITED
|
218
|
+
UNLIMITED
|
219
|
+
else
|
220
|
+
if @original_sort_limit == UNLIMITED
|
221
|
+
@original_output_limit
|
222
|
+
elsif @original_output_limit == UNLIMITED
|
223
|
+
@original_sort_limit
|
224
|
+
else
|
225
|
+
[@original_sort_limit, @original_output_limit].min
|
226
|
+
end
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
def have_records?
|
231
|
+
@output["elements"].include?("records")
|
232
|
+
end
|
233
|
+
|
234
|
+
def rich_sort?
|
235
|
+
@query["sortBy"].is_a?(Hash)
|
236
|
+
end
|
237
|
+
|
238
|
+
def unifiable?
|
239
|
+
@output["unifiable"]
|
240
|
+
end
|
241
|
+
|
242
|
+
def build_count_mapper_and_reducer!
|
243
|
+
return unless @output["elements"].include?("count")
|
244
|
+
|
245
|
+
@reducers["count"] = {
|
246
|
+
"type" => "sum",
|
247
|
+
}
|
248
|
+
if unifiable?
|
249
|
+
@query["sortBy"]["limit"] = -1 if @query["sortBy"].is_a?(Hash)
|
250
|
+
@output["limit"] = -1
|
251
|
+
mapper = {
|
252
|
+
"type" => "count",
|
253
|
+
"target" => "records",
|
254
|
+
}
|
255
|
+
unless @output["elements"].include?("records")
|
256
|
+
@records_limit = -1
|
257
|
+
@output["elements"] << "records"
|
258
|
+
@output["attributes"] ||= ["_key"]
|
259
|
+
@output_records = false
|
260
|
+
end
|
261
|
+
@mappers["count"] = mapper
|
262
|
+
end
|
263
|
+
end
|
264
|
+
|
265
|
+
def build_records_mapper_and_reducer!
|
266
|
+
# Skip reducing phase for a result with no record output.
|
267
|
+
return if !@output["elements"].include?("records") || @records_limit.zero?
|
268
|
+
|
269
|
+
# Append sort key attributes to the list of output attributes
|
270
|
+
# temporarily, for the reducing phase. After all extra columns
|
271
|
+
# are removed on the gathering phase.
|
272
|
+
final_attributes = output_attribute_names
|
273
|
+
update_output_attributes!
|
274
|
+
|
275
|
+
@reducers["records"] = build_records_reducer
|
276
|
+
|
277
|
+
mapper = {
|
278
|
+
"type" => "sort",
|
279
|
+
"offset" => @records_offset,
|
280
|
+
"limit" => @records_limit,
|
281
|
+
"format" => @records_format,
|
282
|
+
"attributes" => final_attributes,
|
283
|
+
}
|
284
|
+
mapper["no_output"] = true unless @output_records
|
285
|
+
@mappers["records"] = mapper
|
286
|
+
end
|
287
|
+
|
288
|
+
def output_attribute_names
|
289
|
+
attributes = @output["attributes"] || []
|
290
|
+
if attributes.is_a?(Hash)
|
291
|
+
attributes.keys
|
292
|
+
else
|
293
|
+
attributes.collect do |attribute|
|
294
|
+
if attribute.is_a?(Hash)
|
295
|
+
attribute["label"] || attribute["source"]
|
296
|
+
else
|
297
|
+
attribute
|
298
|
+
end
|
299
|
+
end
|
300
|
+
end
|
301
|
+
end
|
302
|
+
|
303
|
+
def update_output_attributes!
|
304
|
+
@output["attributes"] = array_style_attributes
|
305
|
+
@output["attributes"] += sort_attribute_names
|
306
|
+
if unifiable? && !@output["attributes"].include?("_key")
|
307
|
+
@output["attributes"] << "_key"
|
308
|
+
end
|
309
|
+
end
|
310
|
+
|
311
|
+
def array_style_attributes
|
312
|
+
attributes = @output["attributes"] || []
|
313
|
+
if attributes.is_a?(Hash)
|
314
|
+
attributes.keys.collect do |key|
|
315
|
+
attribute = attributes[key]
|
316
|
+
case attribute
|
317
|
+
when String
|
318
|
+
{
|
319
|
+
"label" => key,
|
320
|
+
"source" => attribute,
|
321
|
+
}
|
322
|
+
when Hash
|
323
|
+
attribute["label"] = key
|
324
|
+
attribute
|
325
|
+
end
|
326
|
+
end
|
327
|
+
else
|
328
|
+
attributes
|
329
|
+
end
|
330
|
+
end
|
331
|
+
|
332
|
+
def source_column_names
|
333
|
+
attributes = @output["attributes"] || []
|
334
|
+
if attributes.is_a?(Hash)
|
335
|
+
attributes_hash = attributes
|
336
|
+
attributes = []
|
337
|
+
attributes_hash.each do |key, attribute|
|
338
|
+
attributes << attribute["source"] || key
|
339
|
+
end
|
340
|
+
attributes
|
341
|
+
else
|
342
|
+
attributes.collect do |attribute|
|
343
|
+
if attribute.is_a?(Hash)
|
344
|
+
attribute["source"] || attribute["label"]
|
345
|
+
else
|
346
|
+
attribute
|
347
|
+
end
|
348
|
+
end
|
349
|
+
end
|
350
|
+
end
|
351
|
+
|
352
|
+
def sort_attribute_names
|
353
|
+
sort_attributes = @sort_keys.collect do |key|
|
354
|
+
key = key[1..-1] if key[0] == "-"
|
355
|
+
key
|
356
|
+
end
|
357
|
+
attributes = source_column_names
|
358
|
+
sort_attributes.reject! do |attribute|
|
359
|
+
attributes.include?(attribute)
|
360
|
+
end
|
361
|
+
sort_attributes
|
362
|
+
end
|
363
|
+
|
364
|
+
ASCENDING_OPERATOR = "<".freeze
|
365
|
+
DESCENDING_OPERATOR = ">".freeze
|
366
|
+
|
367
|
+
def build_records_reducer
|
368
|
+
attributes = @output["attributes"]
|
369
|
+
key_column_index = attributes.index("_key")
|
370
|
+
|
371
|
+
operators = @sort_keys.collect do |sort_key|
|
372
|
+
operator = ASCENDING_OPERATOR
|
373
|
+
if sort_key[0] == "-"
|
374
|
+
operator = DESCENDING_OPERATOR
|
375
|
+
sort_key = sort_key[1..-1]
|
376
|
+
end
|
377
|
+
{
|
378
|
+
"operator" => operator,
|
379
|
+
"column" => attributes.index(sort_key),
|
380
|
+
}
|
381
|
+
end
|
382
|
+
|
383
|
+
reducer = {
|
384
|
+
"type" => "sort",
|
385
|
+
"operators" => operators,
|
386
|
+
}
|
387
|
+
if unifiable? && !key_column_index.nil?
|
388
|
+
reducer["key_column"] = key_column_index
|
389
|
+
end
|
390
|
+
|
391
|
+
# On the reducing phase, we apply only "limit". We cannot apply
|
392
|
+
# "offset" on this phase because the collector merges a pair of
|
393
|
+
# results step by step even if there are three or more results.
|
394
|
+
# Instead, we apply "offset" on the gathering phase.
|
395
|
+
reducer["limit"] = @output["limit"]
|
396
|
+
|
397
|
+
reducer
|
398
|
+
end
|
399
|
+
end
|
400
|
+
end
|
401
|
+
end
|