fluent-plugin-droonga 0.7.0 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (163) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -4
  3. data/benchmark/watch/benchmark-notify.rb +2 -2
  4. data/benchmark/watch/benchmark-scan.rb +3 -0
  5. data/benchmark/watch/fluentd.conf +0 -1
  6. data/fluent-plugin-droonga.gemspec +2 -3
  7. data/lib/droonga/catalog.rb +10 -124
  8. data/lib/droonga/catalog/base.rb +140 -0
  9. data/lib/droonga/catalog/version1.rb +23 -0
  10. data/lib/droonga/catalog_loader.rb +33 -0
  11. data/lib/droonga/collector.rb +2 -71
  12. data/lib/droonga/collector_plugin.rb +2 -34
  13. data/lib/droonga/dispatcher.rb +141 -196
  14. data/lib/droonga/distribution_planner.rb +76 -0
  15. data/lib/droonga/distributor.rb +5 -7
  16. data/lib/droonga/distributor_plugin.rb +23 -15
  17. data/lib/droonga/engine.rb +2 -2
  18. data/lib/droonga/event_loop.rb +46 -0
  19. data/lib/droonga/farm.rb +9 -5
  20. data/lib/droonga/fluent_message_sender.rb +84 -0
  21. data/lib/droonga/forwarder.rb +43 -53
  22. data/lib/droonga/handler.rb +20 -68
  23. data/lib/droonga/handler_message.rb +61 -0
  24. data/lib/droonga/handler_messenger.rb +92 -0
  25. data/lib/droonga/handler_plugin.rb +10 -12
  26. data/lib/droonga/input_adapter.rb +52 -0
  27. data/lib/droonga/{adapter.rb → input_adapter_plugin.rb} +7 -13
  28. data/lib/droonga/input_message.rb +11 -11
  29. data/lib/droonga/logger.rb +4 -3
  30. data/lib/droonga/message_pack_packer.rb +62 -0
  31. data/lib/droonga/message_processing_error.rb +54 -0
  32. data/lib/droonga/message_pusher.rb +60 -0
  33. data/lib/droonga/message_receiver.rb +61 -0
  34. data/lib/droonga/output_adapter.rb +53 -0
  35. data/lib/droonga/{adapter_plugin.rb → output_adapter_plugin.rb} +3 -21
  36. data/lib/droonga/output_message.rb +37 -0
  37. data/lib/droonga/partition.rb +27 -5
  38. data/lib/droonga/pluggable.rb +9 -4
  39. data/lib/droonga/plugin.rb +12 -3
  40. data/lib/droonga/plugin/collector/basic.rb +91 -18
  41. data/lib/droonga/plugin/distributor/crud.rb +9 -9
  42. data/lib/droonga/plugin/distributor/distributed_search_planner.rb +401 -0
  43. data/lib/droonga/plugin/distributor/groonga.rb +5 -5
  44. data/lib/droonga/plugin/distributor/search.rb +4 -246
  45. data/lib/droonga/plugin/distributor/watch.rb +11 -6
  46. data/lib/droonga/plugin/handler/add.rb +69 -7
  47. data/lib/droonga/plugin/handler/groonga.rb +6 -6
  48. data/lib/droonga/plugin/handler/search.rb +5 -3
  49. data/lib/droonga/plugin/handler/watch.rb +19 -13
  50. data/lib/droonga/plugin/{adapter → input_adapter}/groonga.rb +5 -11
  51. data/lib/droonga/plugin/{adapter → input_adapter}/groonga/select.rb +2 -36
  52. data/lib/droonga/plugin/output_adapter/groonga.rb +30 -0
  53. data/lib/droonga/plugin/output_adapter/groonga/select.rb +54 -0
  54. data/lib/droonga/plugin_loader.rb +2 -2
  55. data/lib/droonga/processor.rb +21 -23
  56. data/lib/droonga/replier.rb +40 -0
  57. data/lib/droonga/searcher.rb +298 -174
  58. data/lib/droonga/server.rb +0 -67
  59. data/lib/droonga/session.rb +85 -0
  60. data/lib/droonga/test.rb +21 -0
  61. data/lib/droonga/test/stub_distributor.rb +31 -0
  62. data/lib/droonga/test/stub_handler.rb +37 -0
  63. data/lib/droonga/test/stub_handler_message.rb +35 -0
  64. data/lib/droonga/test/stub_handler_messenger.rb +34 -0
  65. data/lib/droonga/time_formatter.rb +37 -0
  66. data/lib/droonga/watcher.rb +1 -0
  67. data/lib/droonga/worker.rb +16 -19
  68. data/lib/fluent/plugin/out_droonga.rb +9 -9
  69. data/lib/groonga_command_converter.rb +5 -5
  70. data/sample/cluster/catalog.json +1 -1
  71. data/test/command/config/default/catalog.json +19 -1
  72. data/test/command/fixture/event.jsons +41 -0
  73. data/test/command/fixture/user-table.jsons +9 -0
  74. data/test/command/run-test.rb +2 -2
  75. data/test/command/suite/add/error/invalid-integer.expected +20 -0
  76. data/test/command/suite/add/error/invalid-integer.test +12 -0
  77. data/test/command/suite/add/error/invalid-time.expected +20 -0
  78. data/test/command/suite/add/error/invalid-time.test +12 -0
  79. data/test/command/suite/add/error/missing-key.expected +13 -0
  80. data/test/command/suite/add/error/missing-key.test +16 -0
  81. data/test/command/suite/add/error/missing-table.expected +13 -0
  82. data/test/command/suite/add/error/missing-table.test +16 -0
  83. data/test/command/suite/add/error/unknown-column.expected +20 -0
  84. data/test/command/suite/add/error/unknown-column.test +12 -0
  85. data/test/command/suite/add/error/unknown-table.expected +13 -0
  86. data/test/command/suite/add/error/unknown-table.test +17 -0
  87. data/test/command/suite/add/minimum.expected +1 -3
  88. data/test/command/suite/add/with-values.expected +1 -3
  89. data/test/command/suite/add/without-key.expected +1 -3
  90. data/test/command/suite/message/error/missing-dataset.expected +13 -0
  91. data/test/command/suite/message/error/missing-dataset.test +5 -0
  92. data/test/command/suite/message/error/unknown-command.expected +13 -0
  93. data/test/command/suite/message/error/unknown-command.test +6 -0
  94. data/test/command/suite/message/error/unknown-dataset.expected +13 -0
  95. data/test/command/suite/message/error/unknown-dataset.test +6 -0
  96. data/test/command/suite/search/{array-attribute-label.expected → attributes/array.expected} +0 -0
  97. data/test/command/suite/search/{array-attribute-label.test → attributes/array.test} +0 -0
  98. data/test/command/suite/search/{hash-attribute-label.expected → attributes/hash.expected} +0 -0
  99. data/test/command/suite/search/{hash-attribute-label.test → attributes/hash.test} +0 -0
  100. data/test/command/suite/search/{condition-nested.expected → condition/nested.expected} +0 -0
  101. data/test/command/suite/search/{condition-nested.test → condition/nested.test} +0 -0
  102. data/test/command/suite/search/{condition-query.expected → condition/query.expected} +0 -0
  103. data/test/command/suite/search/{condition-query.test → condition/query.test} +0 -0
  104. data/test/command/suite/search/{condition-script.expected → condition/script.expected} +0 -0
  105. data/test/command/suite/search/{condition-script.test → condition/script.test} +0 -0
  106. data/test/command/suite/search/error/cyclic-source.expected +18 -0
  107. data/test/command/suite/search/error/cyclic-source.test +12 -0
  108. data/test/command/suite/search/error/deeply-cyclic-source.expected +21 -0
  109. data/test/command/suite/search/error/deeply-cyclic-source.test +15 -0
  110. data/test/command/suite/search/error/missing-source-parameter.expected +17 -0
  111. data/test/command/suite/search/error/missing-source-parameter.test +11 -0
  112. data/test/command/suite/search/error/unknown-source.expected +18 -0
  113. data/test/command/suite/search/error/unknown-source.test +12 -0
  114. data/test/command/suite/search/{minimum.expected → group/count.expected} +2 -1
  115. data/test/command/suite/search/{minimum.test → group/count.test} +5 -3
  116. data/test/command/suite/search/group/limit.expected +19 -0
  117. data/test/command/suite/search/group/limit.test +20 -0
  118. data/test/command/suite/search/group/string.expected +36 -0
  119. data/test/command/suite/search/group/string.test +44 -0
  120. data/test/command/suite/search/{chained-queries.expected → multiple/chained.expected} +0 -0
  121. data/test/command/suite/search/{chained-queries.test → multiple/chained.test} +0 -0
  122. data/test/command/suite/search/{multiple-queries.expected → multiple/parallel.expected} +0 -0
  123. data/test/command/suite/search/{multiple-queries.test → multiple/parallel.test} +0 -0
  124. data/test/command/suite/search/{output-range.expected → range/only-output.expected} +0 -0
  125. data/test/command/suite/search/{output-range.test → range/only-output.test} +0 -0
  126. data/test/command/suite/search/{sort-range.expected → range/only-sort.expected} +0 -0
  127. data/test/command/suite/search/{sort-range.test → range/only-sort.test} +0 -0
  128. data/test/command/suite/search/{sort-and-output-range.expected → range/sort-and-output.expected} +0 -0
  129. data/test/command/suite/search/{sort-and-output-range.test → range/sort-and-output.test} +0 -0
  130. data/test/command/suite/search/range/too-large-output-offset.expected +16 -0
  131. data/test/command/suite/search/range/too-large-output-offset.test +25 -0
  132. data/test/command/suite/search/range/too-large-sort-offset.expected +16 -0
  133. data/test/command/suite/search/range/too-large-sort-offset.test +28 -0
  134. data/test/command/suite/search/response/records/value/time.expected +24 -0
  135. data/test/command/suite/search/response/records/value/time.test +24 -0
  136. data/test/command/suite/search/sort/default-offset-limit.expected +43 -0
  137. data/test/command/suite/search/sort/default-offset-limit.test +26 -0
  138. data/test/command/suite/search/{sort-with-invisible-column.expected → sort/invisible-column.expected} +0 -0
  139. data/test/command/suite/search/{sort-with-invisible-column.test → sort/invisible-column.test} +0 -0
  140. data/test/command/suite/watch/subscribe.expected +12 -0
  141. data/test/command/suite/watch/subscribe.test +9 -0
  142. data/test/command/suite/watch/unsubscribe.expected +12 -0
  143. data/test/command/suite/watch/unsubscribe.test +9 -0
  144. data/test/unit/{test_catalog.rb → catalog/test_version1.rb} +12 -4
  145. data/test/unit/fixtures/{catalog.json → catalog/version1.json} +0 -0
  146. data/test/unit/helper.rb +2 -0
  147. data/test/unit/plugin/collector/test_basic.rb +289 -33
  148. data/test/unit/plugin/distributor/test_search.rb +176 -861
  149. data/test/unit/plugin/distributor/test_search_planner.rb +1102 -0
  150. data/test/unit/plugin/handler/groonga/test_column_create.rb +17 -13
  151. data/test/unit/plugin/handler/groonga/test_table_create.rb +10 -10
  152. data/test/unit/plugin/handler/test_add.rb +74 -11
  153. data/test/unit/plugin/handler/test_groonga.rb +15 -1
  154. data/test/unit/plugin/handler/test_search.rb +33 -17
  155. data/test/unit/plugin/handler/test_watch.rb +43 -27
  156. data/test/unit/run-test.rb +2 -0
  157. data/test/unit/test_message_pack_packer.rb +51 -0
  158. data/test/unit/test_time_formatter.rb +29 -0
  159. metadata +208 -110
  160. data/lib/droonga/job_queue.rb +0 -87
  161. data/lib/droonga/job_queue_schema.rb +0 -65
  162. data/test/unit/test_adapter.rb +0 -51
  163. data/test/unit/test_job_queue_schema.rb +0 -45
@@ -17,6 +17,14 @@
17
17
 
18
18
  module Droonga
19
19
  module Pluggable
20
+ class UnknownPlugin < StandardError
21
+ attr_reader :command
22
+
23
+ def initialize(command)
24
+ @command = command
25
+ end
26
+ end
27
+
20
28
  def shutdown
21
29
  $log.trace("#{log_tag}: shutdown: plugin: start")
22
30
  @plugins.each do |plugin|
@@ -33,10 +41,7 @@ module Droonga
33
41
  plugin = find_plugin(command)
34
42
  $log.trace("#{log_tag}: process: start: <#{command}>",
35
43
  :plugin => plugin.class)
36
- if plugin.nil?
37
- raise "unknown plugin: <#{command}>: " +
38
- "TODO: improve error handling"
39
- end
44
+ raise UnknownPlugin.new(command) if plugin.nil?
40
45
  plugin.process(command, *arguments)
41
46
  $log.trace("#{log_tag}: process: done: <#{command}>",
42
47
  :plugin => plugin.class)
@@ -33,11 +33,20 @@ module Droonga
33
33
  end
34
34
 
35
35
  def process(command, *arguments)
36
- __send__(self.class.method_name(command), *arguments)
36
+ run_command(command, *arguments)
37
37
  rescue => exception
38
+ process_error(command, exception, arguments)
39
+ end
40
+
41
+ private
42
+ def run_command(command, *arguments)
43
+ __send__(self.class.method_name(command), *arguments)
44
+ end
45
+
46
+ def process_error(command, error, arguments)
38
47
  Logger.error("error while processing #{command}",
39
- arguments: arguments,
40
- exception: exception)
48
+ error,
49
+ :arguments => arguments)
41
50
  end
42
51
  end
43
52
  end
@@ -27,21 +27,47 @@ module Droonga
27
27
  def collector_gather(result)
28
28
  output = body ? body[input_name] : input_name
29
29
  if output.is_a?(Hash)
30
- element = output["element"]
31
- if element
32
- result[element] = apply_output_range(result[element], output)
33
- result[element] = apply_output_attributes_and_format(result[element], output)
30
+ elements = output["elements"]
31
+ if elements && elements.is_a?(Hash)
32
+ # phase 1: pre-process
33
+ elements.each do |element, mapper|
34
+ case mapper["type"]
35
+ when "count"
36
+ result[element] = result[mapper["target"]].size
37
+ when "sort"
38
+ # do nothing on this phase!
39
+ end
40
+ end
41
+ # phase 2: post-process
42
+ elements.each do |element, mapper|
43
+ if mapper["no_output"]
44
+ result.delete(element)
45
+ next
46
+ end
47
+
48
+ case mapper["type"]
49
+ when "count"
50
+ # do nothing on this phase!
51
+ when "sort"
52
+ # because "count" type mapper requires all items of the array,
53
+ # I have to apply "sort" type mapper later.
54
+ if result[element]
55
+ result[element] = apply_output_range(result[element], mapper)
56
+ result[element] = apply_output_attributes_and_format(result[element], mapper)
57
+ end
58
+ end
59
+ end
34
60
  end
35
61
  output = output["output"]
36
62
  end
37
- emit(result, output)
63
+ emit(output, result)
38
64
  end
39
65
 
40
66
  def apply_output_range(items, output)
41
67
  if items && items.is_a?(Array)
42
68
  offset = output["offset"] || 0
43
69
  unless offset.zero?
44
- items = items[offset..-1]
70
+ items = items[offset..-1] || []
45
71
  end
46
72
 
47
73
  limit = output["limit"] || 0
@@ -80,7 +106,7 @@ module Droonga
80
106
  value = request
81
107
  old_value = output_values[output]
82
108
  value = reduce(elements, old_value, request) if old_value
83
- emit(value, output)
109
+ emit(output, value)
84
110
  end
85
111
  end
86
112
 
@@ -93,7 +119,10 @@ module Droonga
93
119
  when "sum"
94
120
  reduced_values = values[0][key] + values[1][key]
95
121
  when "sort"
96
- reduced_values = merge(values[0][key], values[1][key], deal["operators"])
122
+ reduced_values = merge(values[0][key],
123
+ values[1][key],
124
+ :operators => deal["operators"],
125
+ :key_column => deal["key_column"])
97
126
  end
98
127
 
99
128
  reduced_values = apply_output_range(reduced_values, "limit" => deal["limit"])
@@ -103,16 +132,10 @@ module Droonga
103
132
  return result
104
133
  end
105
134
 
106
- def merge(x, y, operators)
107
- # Normalize operators at first for optimization.
108
- operators ||= []
109
- operators = operators.collect do |operator|
110
- if operator.is_a?(String)
111
- { "operator" => operator }
112
- else
113
- operator
114
- end
115
- end
135
+ def merge(x, y, options={})
136
+ operators = options[:operators] = normalize_operators(options[:operators])
137
+
138
+ unify_by_key!(x, y, options)
116
139
 
117
140
  index = 0
118
141
  y.each do |_y|
@@ -128,6 +151,17 @@ module Droonga
128
151
  return x
129
152
  end
130
153
 
154
+ def normalize_operators(operators)
155
+ operators ||= []
156
+ operators.collect do |operator|
157
+ if operator.is_a?(String)
158
+ { "operator" => operator }
159
+ else
160
+ operator
161
+ end
162
+ end
163
+ end
164
+
131
165
  def compare(x, y, operators)
132
166
  operators.each_with_index do |operator, index|
133
167
  column = operator["column"] || index
@@ -138,5 +172,44 @@ module Droonga
138
172
  end
139
173
  return false
140
174
  end
175
+
176
+ def unify_by_key!(base_items, unified_items, options={})
177
+ key_column_index = options[:key_column]
178
+ return unless key_column_index
179
+
180
+ # The unified records must be smaller than the base, because
181
+ # I sort unified records at last. I want to sort only smaller array.
182
+ if base_items.size < unified_items.size
183
+ base_items, unified_items = unified_items, base_items
184
+ end
185
+
186
+ rest_unified_items = unified_items.dup
187
+
188
+ base_items.reject! do |base_item|
189
+ key = base_item[key_column_index]
190
+ rest_unified_items.any? do |unified_item|
191
+ if unified_item[key_column_index] == key
192
+ base_item.each_with_index do |value, column|
193
+ next if column == key_column_index
194
+ unified_item[column] += value
195
+ end
196
+ rest_unified_items -= [unified_item]
197
+ true
198
+ else
199
+ false
200
+ end
201
+ end
202
+ end
203
+
204
+ unless rest_unified_items.size == unified_items.size
205
+ unified_items.sort! do |a, b|
206
+ if compare(a, b, options[:operators])
207
+ -1
208
+ else
209
+ 1
210
+ end
211
+ end
212
+ end
213
+ end
141
214
  end
142
215
  end
@@ -22,22 +22,22 @@ module Droonga
22
22
  repository.register("crud", self)
23
23
 
24
24
  command :add
25
- def add(envelope)
26
- key = envelope["body"]["key"] || rand.to_s
27
- scatter_all(envelope, key)
25
+ def add(message)
26
+ key = message["body"]["key"] || rand.to_s
27
+ scatter_all(message, key)
28
28
  end
29
29
 
30
30
  command :update
31
- def update(envelope)
32
- key = envelope["body"]["key"] || rand.to_s
33
- scatter_all(envelope, key)
31
+ def update(message)
32
+ key = message["body"]["key"] || rand.to_s
33
+ scatter_all(message, key)
34
34
  end
35
35
 
36
36
  # TODO: What is this?
37
37
  command :reset
38
- def reset(envelope)
39
- key = envelope["body"]["key"] || rand.to_s
40
- scatter_all(envelope, key)
38
+ def reset(message)
39
+ key = message["body"]["key"] || rand.to_s
40
+ scatter_all(message, key)
41
41
  end
42
42
  end
43
43
  end
@@ -0,0 +1,401 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # Copyright (C) 2013 Droonga Project
4
+ #
5
+ # This library is free software; you can redistribute it and/or
6
+ # modify it under the terms of the GNU Lesser General Public
7
+ # License version 2.1 as published by the Free Software Foundation.
8
+ #
9
+ # This library is distributed in the hope that it will be useful,
10
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12
+ # Lesser General Public License for more details.
13
+ #
14
+ # You should have received a copy of the GNU Lesser General Public
15
+ # License along with this library; if not, write to the Free Software
16
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
+
18
+ module Droonga
19
+ class DistributedSearchPlanner
20
+ attr_reader :messages
21
+
22
+ def initialize(search_request_message)
23
+ @source_message = search_request_message
24
+ @request = @source_message["body"]
25
+ @queries = @request["queries"]
26
+
27
+ @input_names = []
28
+ @output_names = []
29
+ @output_mappers = {}
30
+ @messages = []
31
+
32
+ build_messages
33
+ end
34
+
35
+ private
36
+ UNLIMITED = -1
37
+
38
+ def build_messages
39
+ Searcher::QuerySorter.validate_dependencies(@queries)
40
+
41
+ ensure_unifiable!
42
+
43
+ @queries.each do |input_name, query|
44
+ transform_query(input_name, query)
45
+ end
46
+
47
+ gatherer = {
48
+ "type" => "gather",
49
+ "body" => @output_mappers,
50
+ "inputs" => @output_names, # XXX should be placed in the "body"?
51
+ "post" => true, # XXX should be placed in the "body"?
52
+ }
53
+ @messages << gatherer
54
+ searcher = {
55
+ "type" => "broadcast",
56
+ "command" => "search", # XXX should be placed in the "body"?
57
+ "dataset" => @source_message["dataset"] || @request["dataset"],
58
+ "body" => @request,
59
+ "outputs" => @input_names, # XXX should be placed in the "body"?
60
+ "replica" => "random", # XXX should be placed in the "body"?
61
+ }
62
+ @messages.push(searcher)
63
+ end
64
+
65
+ def ensure_unifiable!
66
+ @queries.each do |name, query|
67
+ if unifiable?(name) && query["output"]
68
+ query["output"]["unifiable"] = true
69
+ end
70
+ end
71
+ end
72
+
73
+ def unifiable?(name)
74
+ query = @queries[name]
75
+ return true if query["groupBy"]
76
+ name = query["source"]
77
+ return false unless @queries.keys.include?(name)
78
+ unifiable?(name)
79
+ end
80
+
81
+ def transform_query(input_name, query)
82
+ output = query["output"]
83
+
84
+ # Skip reducing phase for a result with no output.
85
+ if output.nil? or
86
+ output["elements"].nil? or
87
+ (!output["elements"].include?("count") &&
88
+ !output["elements"].include?("records"))
89
+ return
90
+ end
91
+
92
+ @input_names << input_name
93
+ output_name = input_name + "_reduced"
94
+ @output_names << output_name
95
+
96
+ transformer = QueryTransformer.new(query)
97
+
98
+ reducer = {
99
+ "type" => "reduce",
100
+ "body" => {
101
+ input_name => {
102
+ output_name => transformer.reducers,
103
+ },
104
+ },
105
+ "inputs" => [input_name], # XXX should be placed in the "body"?
106
+ "outputs" => [output_name], # XXX should be placed in the "body"?
107
+ }
108
+ @messages << reducer
109
+
110
+ @output_mappers[output_name] = {
111
+ "output" => input_name,
112
+ "elements" => transformer.mappers,
113
+ }
114
+ end
115
+
116
+ class QueryTransformer
117
+ attr_reader :reducers, :mappers
118
+
119
+ def initialize(query)
120
+ @query = query
121
+ @output = @query["output"]
122
+ @reducers = {}
123
+ @mappers = {}
124
+ @output_records = true
125
+ transform!
126
+ end
127
+
128
+ def transform!
129
+ # The collector module supports only "simple" format search results.
130
+ # So we have to override the format and restore it on the gathering
131
+ # phase.
132
+ @records_format = @output["format"] || "simple"
133
+ @output["format"] = "simple"
134
+
135
+ @sort_keys = @query["sortBy"] || []
136
+ @sort_keys = @sort_keys["keys"] || [] if @sort_keys.is_a?(Hash)
137
+
138
+ calculate_offset_and_limit!
139
+ build_count_mapper_and_reducer!
140
+ build_records_mapper_and_reducer!
141
+ end
142
+
143
+ def calculate_offset_and_limit!
144
+ @original_sort_offset = sort_offset
145
+ @original_output_offset = output_offset
146
+ @original_sort_limit = sort_limit
147
+ @original_output_limit = output_limit
148
+
149
+ calculate_sort_offset!
150
+ calculate_output_offset!
151
+
152
+ # We have to calculate limit based on offset.
153
+ # <A, B = limited integer (0...MAXINT)>
154
+ # | sort limit | output limit | => | worker's sort limit | worker's output limit | final limit |
155
+ # ============================= ====================================================================
156
+ # | UNLIMITED | UNLIMITED | => | UNLIMITED | UNLIMITED | UNLIMITED |
157
+ # | UNLIMITED | B | => | final_offset + B | final_offset + B | B |
158
+ # | A | UNLIMITED | => | final_offset + A | final_offset + A | A |
159
+ # | A | B | => | final_offset + max(A, B) | final_offset + min(A, B)| min(A, B) |
160
+
161
+ # XXX final_limit and final_offset calculated in many times
162
+
163
+ @records_offset = final_offset
164
+ @records_limit = final_limit
165
+
166
+ if final_limit == UNLIMITED
167
+ @output["limit"] = UNLIMITED
168
+ else
169
+ if rich_sort?
170
+ @query["sortBy"]["limit"] = final_offset + [sort_limit, output_limit].max
171
+ end
172
+ @output["limit"] = final_offset + final_limit
173
+ end
174
+ end
175
+
176
+ def calculate_sort_offset!
177
+ # Offset for workers must be zero, because we have to apply "limit" and
178
+ # "offset" on the last gathering phase instead of each reducing phase.
179
+ if rich_sort?
180
+ @query["sortBy"]["offset"] = 0
181
+ end
182
+ end
183
+
184
+ def sort_offset
185
+ if rich_sort?
186
+ @query["sortBy"]["offset"] || 0
187
+ else
188
+ 0
189
+ end
190
+ end
191
+
192
+ def output_offset
193
+ @output["offset"] || 0
194
+ end
195
+
196
+ def sort_limit
197
+ if rich_sort?
198
+ @query["sortBy"]["limit"] || UNLIMITED
199
+ else
200
+ UNLIMITED
201
+ end
202
+ end
203
+
204
+ def output_limit
205
+ @output["limit"] || 0
206
+ end
207
+
208
+ def calculate_output_offset!
209
+ @output["offset"] = 0 if have_records?
210
+ end
211
+
212
+ def final_offset
213
+ @original_sort_offset + @original_output_offset
214
+ end
215
+
216
+ def final_limit
217
+ if @original_sort_limit == UNLIMITED && @original_output_limit == UNLIMITED
218
+ UNLIMITED
219
+ else
220
+ if @original_sort_limit == UNLIMITED
221
+ @original_output_limit
222
+ elsif @original_output_limit == UNLIMITED
223
+ @original_sort_limit
224
+ else
225
+ [@original_sort_limit, @original_output_limit].min
226
+ end
227
+ end
228
+ end
229
+
230
+ def have_records?
231
+ @output["elements"].include?("records")
232
+ end
233
+
234
+ def rich_sort?
235
+ @query["sortBy"].is_a?(Hash)
236
+ end
237
+
238
+ def unifiable?
239
+ @output["unifiable"]
240
+ end
241
+
242
+ def build_count_mapper_and_reducer!
243
+ return unless @output["elements"].include?("count")
244
+
245
+ @reducers["count"] = {
246
+ "type" => "sum",
247
+ }
248
+ if unifiable?
249
+ @query["sortBy"]["limit"] = -1 if @query["sortBy"].is_a?(Hash)
250
+ @output["limit"] = -1
251
+ mapper = {
252
+ "type" => "count",
253
+ "target" => "records",
254
+ }
255
+ unless @output["elements"].include?("records")
256
+ @records_limit = -1
257
+ @output["elements"] << "records"
258
+ @output["attributes"] ||= ["_key"]
259
+ @output_records = false
260
+ end
261
+ @mappers["count"] = mapper
262
+ end
263
+ end
264
+
265
+ def build_records_mapper_and_reducer!
266
+ # Skip reducing phase for a result with no record output.
267
+ return if !@output["elements"].include?("records") || @records_limit.zero?
268
+
269
+ # Append sort key attributes to the list of output attributes
270
+ # temporarily, for the reducing phase. After all extra columns
271
+ # are removed on the gathering phase.
272
+ final_attributes = output_attribute_names
273
+ update_output_attributes!
274
+
275
+ @reducers["records"] = build_records_reducer
276
+
277
+ mapper = {
278
+ "type" => "sort",
279
+ "offset" => @records_offset,
280
+ "limit" => @records_limit,
281
+ "format" => @records_format,
282
+ "attributes" => final_attributes,
283
+ }
284
+ mapper["no_output"] = true unless @output_records
285
+ @mappers["records"] = mapper
286
+ end
287
+
288
+ def output_attribute_names
289
+ attributes = @output["attributes"] || []
290
+ if attributes.is_a?(Hash)
291
+ attributes.keys
292
+ else
293
+ attributes.collect do |attribute|
294
+ if attribute.is_a?(Hash)
295
+ attribute["label"] || attribute["source"]
296
+ else
297
+ attribute
298
+ end
299
+ end
300
+ end
301
+ end
302
+
303
+ def update_output_attributes!
304
+ @output["attributes"] = array_style_attributes
305
+ @output["attributes"] += sort_attribute_names
306
+ if unifiable? && !@output["attributes"].include?("_key")
307
+ @output["attributes"] << "_key"
308
+ end
309
+ end
310
+
311
+ def array_style_attributes
312
+ attributes = @output["attributes"] || []
313
+ if attributes.is_a?(Hash)
314
+ attributes.keys.collect do |key|
315
+ attribute = attributes[key]
316
+ case attribute
317
+ when String
318
+ {
319
+ "label" => key,
320
+ "source" => attribute,
321
+ }
322
+ when Hash
323
+ attribute["label"] = key
324
+ attribute
325
+ end
326
+ end
327
+ else
328
+ attributes
329
+ end
330
+ end
331
+
332
+ def source_column_names
333
+ attributes = @output["attributes"] || []
334
+ if attributes.is_a?(Hash)
335
+ attributes_hash = attributes
336
+ attributes = []
337
+ attributes_hash.each do |key, attribute|
338
+ attributes << attribute["source"] || key
339
+ end
340
+ attributes
341
+ else
342
+ attributes.collect do |attribute|
343
+ if attribute.is_a?(Hash)
344
+ attribute["source"] || attribute["label"]
345
+ else
346
+ attribute
347
+ end
348
+ end
349
+ end
350
+ end
351
+
352
+ def sort_attribute_names
353
+ sort_attributes = @sort_keys.collect do |key|
354
+ key = key[1..-1] if key[0] == "-"
355
+ key
356
+ end
357
+ attributes = source_column_names
358
+ sort_attributes.reject! do |attribute|
359
+ attributes.include?(attribute)
360
+ end
361
+ sort_attributes
362
+ end
363
+
364
+ ASCENDING_OPERATOR = "<".freeze
365
+ DESCENDING_OPERATOR = ">".freeze
366
+
367
+ def build_records_reducer
368
+ attributes = @output["attributes"]
369
+ key_column_index = attributes.index("_key")
370
+
371
+ operators = @sort_keys.collect do |sort_key|
372
+ operator = ASCENDING_OPERATOR
373
+ if sort_key[0] == "-"
374
+ operator = DESCENDING_OPERATOR
375
+ sort_key = sort_key[1..-1]
376
+ end
377
+ {
378
+ "operator" => operator,
379
+ "column" => attributes.index(sort_key),
380
+ }
381
+ end
382
+
383
+ reducer = {
384
+ "type" => "sort",
385
+ "operators" => operators,
386
+ }
387
+ if unifiable? && !key_column_index.nil?
388
+ reducer["key_column"] = key_column_index
389
+ end
390
+
391
+ # On the reducing phase, we apply only "limit". We cannot apply
392
+ # "offset" on this phase because the collector merges a pair of
393
+ # results step by step even if there are three or more results.
394
+ # Instead, we apply "offset" on the gathering phase.
395
+ reducer["limit"] = @output["limit"]
396
+
397
+ reducer
398
+ end
399
+ end
400
+ end
401
+ end