fluent-plugin-droonga 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -4
  3. data/benchmark/watch/benchmark-notify.rb +2 -2
  4. data/benchmark/watch/benchmark-scan.rb +3 -0
  5. data/benchmark/watch/fluentd.conf +0 -1
  6. data/fluent-plugin-droonga.gemspec +2 -3
  7. data/lib/droonga/catalog.rb +10 -124
  8. data/lib/droonga/catalog/base.rb +140 -0
  9. data/lib/droonga/catalog/version1.rb +23 -0
  10. data/lib/droonga/catalog_loader.rb +33 -0
  11. data/lib/droonga/collector.rb +2 -71
  12. data/lib/droonga/collector_plugin.rb +2 -34
  13. data/lib/droonga/dispatcher.rb +141 -196
  14. data/lib/droonga/distribution_planner.rb +76 -0
  15. data/lib/droonga/distributor.rb +5 -7
  16. data/lib/droonga/distributor_plugin.rb +23 -15
  17. data/lib/droonga/engine.rb +2 -2
  18. data/lib/droonga/event_loop.rb +46 -0
  19. data/lib/droonga/farm.rb +9 -5
  20. data/lib/droonga/fluent_message_sender.rb +84 -0
  21. data/lib/droonga/forwarder.rb +43 -53
  22. data/lib/droonga/handler.rb +20 -68
  23. data/lib/droonga/handler_message.rb +61 -0
  24. data/lib/droonga/handler_messenger.rb +92 -0
  25. data/lib/droonga/handler_plugin.rb +10 -12
  26. data/lib/droonga/input_adapter.rb +52 -0
  27. data/lib/droonga/{adapter.rb → input_adapter_plugin.rb} +7 -13
  28. data/lib/droonga/input_message.rb +11 -11
  29. data/lib/droonga/logger.rb +4 -3
  30. data/lib/droonga/message_pack_packer.rb +62 -0
  31. data/lib/droonga/message_processing_error.rb +54 -0
  32. data/lib/droonga/message_pusher.rb +60 -0
  33. data/lib/droonga/message_receiver.rb +61 -0
  34. data/lib/droonga/output_adapter.rb +53 -0
  35. data/lib/droonga/{adapter_plugin.rb → output_adapter_plugin.rb} +3 -21
  36. data/lib/droonga/output_message.rb +37 -0
  37. data/lib/droonga/partition.rb +27 -5
  38. data/lib/droonga/pluggable.rb +9 -4
  39. data/lib/droonga/plugin.rb +12 -3
  40. data/lib/droonga/plugin/collector/basic.rb +91 -18
  41. data/lib/droonga/plugin/distributor/crud.rb +9 -9
  42. data/lib/droonga/plugin/distributor/distributed_search_planner.rb +401 -0
  43. data/lib/droonga/plugin/distributor/groonga.rb +5 -5
  44. data/lib/droonga/plugin/distributor/search.rb +4 -246
  45. data/lib/droonga/plugin/distributor/watch.rb +11 -6
  46. data/lib/droonga/plugin/handler/add.rb +69 -7
  47. data/lib/droonga/plugin/handler/groonga.rb +6 -6
  48. data/lib/droonga/plugin/handler/search.rb +5 -3
  49. data/lib/droonga/plugin/handler/watch.rb +19 -13
  50. data/lib/droonga/plugin/{adapter → input_adapter}/groonga.rb +5 -11
  51. data/lib/droonga/plugin/{adapter → input_adapter}/groonga/select.rb +2 -36
  52. data/lib/droonga/plugin/output_adapter/groonga.rb +30 -0
  53. data/lib/droonga/plugin/output_adapter/groonga/select.rb +54 -0
  54. data/lib/droonga/plugin_loader.rb +2 -2
  55. data/lib/droonga/processor.rb +21 -23
  56. data/lib/droonga/replier.rb +40 -0
  57. data/lib/droonga/searcher.rb +298 -174
  58. data/lib/droonga/server.rb +0 -67
  59. data/lib/droonga/session.rb +85 -0
  60. data/lib/droonga/test.rb +21 -0
  61. data/lib/droonga/test/stub_distributor.rb +31 -0
  62. data/lib/droonga/test/stub_handler.rb +37 -0
  63. data/lib/droonga/test/stub_handler_message.rb +35 -0
  64. data/lib/droonga/test/stub_handler_messenger.rb +34 -0
  65. data/lib/droonga/time_formatter.rb +37 -0
  66. data/lib/droonga/watcher.rb +1 -0
  67. data/lib/droonga/worker.rb +16 -19
  68. data/lib/fluent/plugin/out_droonga.rb +9 -9
  69. data/lib/groonga_command_converter.rb +5 -5
  70. data/sample/cluster/catalog.json +1 -1
  71. data/test/command/config/default/catalog.json +19 -1
  72. data/test/command/fixture/event.jsons +41 -0
  73. data/test/command/fixture/user-table.jsons +9 -0
  74. data/test/command/run-test.rb +2 -2
  75. data/test/command/suite/add/error/invalid-integer.expected +20 -0
  76. data/test/command/suite/add/error/invalid-integer.test +12 -0
  77. data/test/command/suite/add/error/invalid-time.expected +20 -0
  78. data/test/command/suite/add/error/invalid-time.test +12 -0
  79. data/test/command/suite/add/error/missing-key.expected +13 -0
  80. data/test/command/suite/add/error/missing-key.test +16 -0
  81. data/test/command/suite/add/error/missing-table.expected +13 -0
  82. data/test/command/suite/add/error/missing-table.test +16 -0
  83. data/test/command/suite/add/error/unknown-column.expected +20 -0
  84. data/test/command/suite/add/error/unknown-column.test +12 -0
  85. data/test/command/suite/add/error/unknown-table.expected +13 -0
  86. data/test/command/suite/add/error/unknown-table.test +17 -0
  87. data/test/command/suite/add/minimum.expected +1 -3
  88. data/test/command/suite/add/with-values.expected +1 -3
  89. data/test/command/suite/add/without-key.expected +1 -3
  90. data/test/command/suite/message/error/missing-dataset.expected +13 -0
  91. data/test/command/suite/message/error/missing-dataset.test +5 -0
  92. data/test/command/suite/message/error/unknown-command.expected +13 -0
  93. data/test/command/suite/message/error/unknown-command.test +6 -0
  94. data/test/command/suite/message/error/unknown-dataset.expected +13 -0
  95. data/test/command/suite/message/error/unknown-dataset.test +6 -0
  96. data/test/command/suite/search/{array-attribute-label.expected → attributes/array.expected} +0 -0
  97. data/test/command/suite/search/{array-attribute-label.test → attributes/array.test} +0 -0
  98. data/test/command/suite/search/{hash-attribute-label.expected → attributes/hash.expected} +0 -0
  99. data/test/command/suite/search/{hash-attribute-label.test → attributes/hash.test} +0 -0
  100. data/test/command/suite/search/{condition-nested.expected → condition/nested.expected} +0 -0
  101. data/test/command/suite/search/{condition-nested.test → condition/nested.test} +0 -0
  102. data/test/command/suite/search/{condition-query.expected → condition/query.expected} +0 -0
  103. data/test/command/suite/search/{condition-query.test → condition/query.test} +0 -0
  104. data/test/command/suite/search/{condition-script.expected → condition/script.expected} +0 -0
  105. data/test/command/suite/search/{condition-script.test → condition/script.test} +0 -0
  106. data/test/command/suite/search/error/cyclic-source.expected +18 -0
  107. data/test/command/suite/search/error/cyclic-source.test +12 -0
  108. data/test/command/suite/search/error/deeply-cyclic-source.expected +21 -0
  109. data/test/command/suite/search/error/deeply-cyclic-source.test +15 -0
  110. data/test/command/suite/search/error/missing-source-parameter.expected +17 -0
  111. data/test/command/suite/search/error/missing-source-parameter.test +11 -0
  112. data/test/command/suite/search/error/unknown-source.expected +18 -0
  113. data/test/command/suite/search/error/unknown-source.test +12 -0
  114. data/test/command/suite/search/{minimum.expected → group/count.expected} +2 -1
  115. data/test/command/suite/search/{minimum.test → group/count.test} +5 -3
  116. data/test/command/suite/search/group/limit.expected +19 -0
  117. data/test/command/suite/search/group/limit.test +20 -0
  118. data/test/command/suite/search/group/string.expected +36 -0
  119. data/test/command/suite/search/group/string.test +44 -0
  120. data/test/command/suite/search/{chained-queries.expected → multiple/chained.expected} +0 -0
  121. data/test/command/suite/search/{chained-queries.test → multiple/chained.test} +0 -0
  122. data/test/command/suite/search/{multiple-queries.expected → multiple/parallel.expected} +0 -0
  123. data/test/command/suite/search/{multiple-queries.test → multiple/parallel.test} +0 -0
  124. data/test/command/suite/search/{output-range.expected → range/only-output.expected} +0 -0
  125. data/test/command/suite/search/{output-range.test → range/only-output.test} +0 -0
  126. data/test/command/suite/search/{sort-range.expected → range/only-sort.expected} +0 -0
  127. data/test/command/suite/search/{sort-range.test → range/only-sort.test} +0 -0
  128. data/test/command/suite/search/{sort-and-output-range.expected → range/sort-and-output.expected} +0 -0
  129. data/test/command/suite/search/{sort-and-output-range.test → range/sort-and-output.test} +0 -0
  130. data/test/command/suite/search/range/too-large-output-offset.expected +16 -0
  131. data/test/command/suite/search/range/too-large-output-offset.test +25 -0
  132. data/test/command/suite/search/range/too-large-sort-offset.expected +16 -0
  133. data/test/command/suite/search/range/too-large-sort-offset.test +28 -0
  134. data/test/command/suite/search/response/records/value/time.expected +24 -0
  135. data/test/command/suite/search/response/records/value/time.test +24 -0
  136. data/test/command/suite/search/sort/default-offset-limit.expected +43 -0
  137. data/test/command/suite/search/sort/default-offset-limit.test +26 -0
  138. data/test/command/suite/search/{sort-with-invisible-column.expected → sort/invisible-column.expected} +0 -0
  139. data/test/command/suite/search/{sort-with-invisible-column.test → sort/invisible-column.test} +0 -0
  140. data/test/command/suite/watch/subscribe.expected +12 -0
  141. data/test/command/suite/watch/subscribe.test +9 -0
  142. data/test/command/suite/watch/unsubscribe.expected +12 -0
  143. data/test/command/suite/watch/unsubscribe.test +9 -0
  144. data/test/unit/{test_catalog.rb → catalog/test_version1.rb} +12 -4
  145. data/test/unit/fixtures/{catalog.json → catalog/version1.json} +0 -0
  146. data/test/unit/helper.rb +2 -0
  147. data/test/unit/plugin/collector/test_basic.rb +289 -33
  148. data/test/unit/plugin/distributor/test_search.rb +176 -861
  149. data/test/unit/plugin/distributor/test_search_planner.rb +1102 -0
  150. data/test/unit/plugin/handler/groonga/test_column_create.rb +17 -13
  151. data/test/unit/plugin/handler/groonga/test_table_create.rb +10 -10
  152. data/test/unit/plugin/handler/test_add.rb +74 -11
  153. data/test/unit/plugin/handler/test_groonga.rb +15 -1
  154. data/test/unit/plugin/handler/test_search.rb +33 -17
  155. data/test/unit/plugin/handler/test_watch.rb +43 -27
  156. data/test/unit/run-test.rb +2 -0
  157. data/test/unit/test_message_pack_packer.rb +51 -0
  158. data/test/unit/test_time_formatter.rb +29 -0
  159. metadata +208 -110
  160. data/lib/droonga/job_queue.rb +0 -87
  161. data/lib/droonga/job_queue_schema.rb +0 -65
  162. data/test/unit/test_adapter.rb +0 -51
  163. data/test/unit/test_job_queue_schema.rb +0 -45
@@ -17,6 +17,14 @@
17
17
 
18
18
  module Droonga
19
19
  module Pluggable
20
+ class UnknownPlugin < StandardError
21
+ attr_reader :command
22
+
23
+ def initialize(command)
24
+ @command = command
25
+ end
26
+ end
27
+
20
28
  def shutdown
21
29
  $log.trace("#{log_tag}: shutdown: plugin: start")
22
30
  @plugins.each do |plugin|
@@ -33,10 +41,7 @@ module Droonga
33
41
  plugin = find_plugin(command)
34
42
  $log.trace("#{log_tag}: process: start: <#{command}>",
35
43
  :plugin => plugin.class)
36
- if plugin.nil?
37
- raise "unknown plugin: <#{command}>: " +
38
- "TODO: improve error handling"
39
- end
44
+ raise UnknownPlugin.new(command) if plugin.nil?
40
45
  plugin.process(command, *arguments)
41
46
  $log.trace("#{log_tag}: process: done: <#{command}>",
42
47
  :plugin => plugin.class)
@@ -33,11 +33,20 @@ module Droonga
33
33
  end
34
34
 
35
35
  def process(command, *arguments)
36
- __send__(self.class.method_name(command), *arguments)
36
+ run_command(command, *arguments)
37
37
  rescue => exception
38
+ process_error(command, exception, arguments)
39
+ end
40
+
41
+ private
42
+ def run_command(command, *arguments)
43
+ __send__(self.class.method_name(command), *arguments)
44
+ end
45
+
46
+ def process_error(command, error, arguments)
38
47
  Logger.error("error while processing #{command}",
39
- arguments: arguments,
40
- exception: exception)
48
+ error,
49
+ :arguments => arguments)
41
50
  end
42
51
  end
43
52
  end
@@ -27,21 +27,47 @@ module Droonga
27
27
  def collector_gather(result)
28
28
  output = body ? body[input_name] : input_name
29
29
  if output.is_a?(Hash)
30
- element = output["element"]
31
- if element
32
- result[element] = apply_output_range(result[element], output)
33
- result[element] = apply_output_attributes_and_format(result[element], output)
30
+ elements = output["elements"]
31
+ if elements && elements.is_a?(Hash)
32
+ # phase 1: pre-process
33
+ elements.each do |element, mapper|
34
+ case mapper["type"]
35
+ when "count"
36
+ result[element] = result[mapper["target"]].size
37
+ when "sort"
38
+ # do nothing on this phase!
39
+ end
40
+ end
41
+ # phase 2: post-process
42
+ elements.each do |element, mapper|
43
+ if mapper["no_output"]
44
+ result.delete(element)
45
+ next
46
+ end
47
+
48
+ case mapper["type"]
49
+ when "count"
50
+ # do nothing on this phase!
51
+ when "sort"
52
+ # because "count" type mapper requires all items of the array,
53
+ # I have to apply "sort" type mapper later.
54
+ if result[element]
55
+ result[element] = apply_output_range(result[element], mapper)
56
+ result[element] = apply_output_attributes_and_format(result[element], mapper)
57
+ end
58
+ end
59
+ end
34
60
  end
35
61
  output = output["output"]
36
62
  end
37
- emit(result, output)
63
+ emit(output, result)
38
64
  end
39
65
 
40
66
  def apply_output_range(items, output)
41
67
  if items && items.is_a?(Array)
42
68
  offset = output["offset"] || 0
43
69
  unless offset.zero?
44
- items = items[offset..-1]
70
+ items = items[offset..-1] || []
45
71
  end
46
72
 
47
73
  limit = output["limit"] || 0
@@ -80,7 +106,7 @@ module Droonga
80
106
  value = request
81
107
  old_value = output_values[output]
82
108
  value = reduce(elements, old_value, request) if old_value
83
- emit(value, output)
109
+ emit(output, value)
84
110
  end
85
111
  end
86
112
 
@@ -93,7 +119,10 @@ module Droonga
93
119
  when "sum"
94
120
  reduced_values = values[0][key] + values[1][key]
95
121
  when "sort"
96
- reduced_values = merge(values[0][key], values[1][key], deal["operators"])
122
+ reduced_values = merge(values[0][key],
123
+ values[1][key],
124
+ :operators => deal["operators"],
125
+ :key_column => deal["key_column"])
97
126
  end
98
127
 
99
128
  reduced_values = apply_output_range(reduced_values, "limit" => deal["limit"])
@@ -103,16 +132,10 @@ module Droonga
103
132
  return result
104
133
  end
105
134
 
106
- def merge(x, y, operators)
107
- # Normalize operators at first for optimization.
108
- operators ||= []
109
- operators = operators.collect do |operator|
110
- if operator.is_a?(String)
111
- { "operator" => operator }
112
- else
113
- operator
114
- end
115
- end
135
+ def merge(x, y, options={})
136
+ operators = options[:operators] = normalize_operators(options[:operators])
137
+
138
+ unify_by_key!(x, y, options)
116
139
 
117
140
  index = 0
118
141
  y.each do |_y|
@@ -128,6 +151,17 @@ module Droonga
128
151
  return x
129
152
  end
130
153
 
154
+ def normalize_operators(operators)
155
+ operators ||= []
156
+ operators.collect do |operator|
157
+ if operator.is_a?(String)
158
+ { "operator" => operator }
159
+ else
160
+ operator
161
+ end
162
+ end
163
+ end
164
+
131
165
  def compare(x, y, operators)
132
166
  operators.each_with_index do |operator, index|
133
167
  column = operator["column"] || index
@@ -138,5 +172,44 @@ module Droonga
138
172
  end
139
173
  return false
140
174
  end
175
+
176
+ def unify_by_key!(base_items, unified_items, options={})
177
+ key_column_index = options[:key_column]
178
+ return unless key_column_index
179
+
180
+ # The unified records must be smaller than the base, because
181
+ # I sort unified records at last. I want to sort only smaller array.
182
+ if base_items.size < unified_items.size
183
+ base_items, unified_items = unified_items, base_items
184
+ end
185
+
186
+ rest_unified_items = unified_items.dup
187
+
188
+ base_items.reject! do |base_item|
189
+ key = base_item[key_column_index]
190
+ rest_unified_items.any? do |unified_item|
191
+ if unified_item[key_column_index] == key
192
+ base_item.each_with_index do |value, column|
193
+ next if column == key_column_index
194
+ unified_item[column] += value
195
+ end
196
+ rest_unified_items -= [unified_item]
197
+ true
198
+ else
199
+ false
200
+ end
201
+ end
202
+ end
203
+
204
+ unless rest_unified_items.size == unified_items.size
205
+ unified_items.sort! do |a, b|
206
+ if compare(a, b, options[:operators])
207
+ -1
208
+ else
209
+ 1
210
+ end
211
+ end
212
+ end
213
+ end
141
214
  end
142
215
  end
@@ -22,22 +22,22 @@ module Droonga
22
22
  repository.register("crud", self)
23
23
 
24
24
  command :add
25
- def add(envelope)
26
- key = envelope["body"]["key"] || rand.to_s
27
- scatter_all(envelope, key)
25
+ def add(message)
26
+ key = message["body"]["key"] || rand.to_s
27
+ scatter_all(message, key)
28
28
  end
29
29
 
30
30
  command :update
31
- def update(envelope)
32
- key = envelope["body"]["key"] || rand.to_s
33
- scatter_all(envelope, key)
31
+ def update(message)
32
+ key = message["body"]["key"] || rand.to_s
33
+ scatter_all(message, key)
34
34
  end
35
35
 
36
36
  # TODO: What is this?
37
37
  command :reset
38
- def reset(envelope)
39
- key = envelope["body"]["key"] || rand.to_s
40
- scatter_all(envelope, key)
38
+ def reset(message)
39
+ key = message["body"]["key"] || rand.to_s
40
+ scatter_all(message, key)
41
41
  end
42
42
  end
43
43
  end
@@ -0,0 +1,401 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # Copyright (C) 2013 Droonga Project
4
+ #
5
+ # This library is free software; you can redistribute it and/or
6
+ # modify it under the terms of the GNU Lesser General Public
7
+ # License version 2.1 as published by the Free Software Foundation.
8
+ #
9
+ # This library is distributed in the hope that it will be useful,
10
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12
+ # Lesser General Public License for more details.
13
+ #
14
+ # You should have received a copy of the GNU Lesser General Public
15
+ # License along with this library; if not, write to the Free Software
16
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
+
18
+ module Droonga
19
+ class DistributedSearchPlanner
20
+ attr_reader :messages
21
+
22
+ def initialize(search_request_message)
23
+ @source_message = search_request_message
24
+ @request = @source_message["body"]
25
+ @queries = @request["queries"]
26
+
27
+ @input_names = []
28
+ @output_names = []
29
+ @output_mappers = {}
30
+ @messages = []
31
+
32
+ build_messages
33
+ end
34
+
35
+ private
36
+ UNLIMITED = -1
37
+
38
+ def build_messages
39
+ Searcher::QuerySorter.validate_dependencies(@queries)
40
+
41
+ ensure_unifiable!
42
+
43
+ @queries.each do |input_name, query|
44
+ transform_query(input_name, query)
45
+ end
46
+
47
+ gatherer = {
48
+ "type" => "gather",
49
+ "body" => @output_mappers,
50
+ "inputs" => @output_names, # XXX should be placed in the "body"?
51
+ "post" => true, # XXX should be placed in the "body"?
52
+ }
53
+ @messages << gatherer
54
+ searcher = {
55
+ "type" => "broadcast",
56
+ "command" => "search", # XXX should be placed in the "body"?
57
+ "dataset" => @source_message["dataset"] || @request["dataset"],
58
+ "body" => @request,
59
+ "outputs" => @input_names, # XXX should be placed in the "body"?
60
+ "replica" => "random", # XXX should be placed in the "body"?
61
+ }
62
+ @messages.push(searcher)
63
+ end
64
+
65
+ def ensure_unifiable!
66
+ @queries.each do |name, query|
67
+ if unifiable?(name) && query["output"]
68
+ query["output"]["unifiable"] = true
69
+ end
70
+ end
71
+ end
72
+
73
+ def unifiable?(name)
74
+ query = @queries[name]
75
+ return true if query["groupBy"]
76
+ name = query["source"]
77
+ return false unless @queries.keys.include?(name)
78
+ unifiable?(name)
79
+ end
80
+
81
+ def transform_query(input_name, query)
82
+ output = query["output"]
83
+
84
+ # Skip reducing phase for a result with no output.
85
+ if output.nil? or
86
+ output["elements"].nil? or
87
+ (!output["elements"].include?("count") &&
88
+ !output["elements"].include?("records"))
89
+ return
90
+ end
91
+
92
+ @input_names << input_name
93
+ output_name = input_name + "_reduced"
94
+ @output_names << output_name
95
+
96
+ transformer = QueryTransformer.new(query)
97
+
98
+ reducer = {
99
+ "type" => "reduce",
100
+ "body" => {
101
+ input_name => {
102
+ output_name => transformer.reducers,
103
+ },
104
+ },
105
+ "inputs" => [input_name], # XXX should be placed in the "body"?
106
+ "outputs" => [output_name], # XXX should be placed in the "body"?
107
+ }
108
+ @messages << reducer
109
+
110
+ @output_mappers[output_name] = {
111
+ "output" => input_name,
112
+ "elements" => transformer.mappers,
113
+ }
114
+ end
115
+
116
+ class QueryTransformer
117
+ attr_reader :reducers, :mappers
118
+
119
+ def initialize(query)
120
+ @query = query
121
+ @output = @query["output"]
122
+ @reducers = {}
123
+ @mappers = {}
124
+ @output_records = true
125
+ transform!
126
+ end
127
+
128
+ def transform!
129
+ # The collector module supports only "simple" format search results.
130
+ # So we have to override the format and restore it on the gathering
131
+ # phase.
132
+ @records_format = @output["format"] || "simple"
133
+ @output["format"] = "simple"
134
+
135
+ @sort_keys = @query["sortBy"] || []
136
+ @sort_keys = @sort_keys["keys"] || [] if @sort_keys.is_a?(Hash)
137
+
138
+ calculate_offset_and_limit!
139
+ build_count_mapper_and_reducer!
140
+ build_records_mapper_and_reducer!
141
+ end
142
+
143
+ def calculate_offset_and_limit!
144
+ @original_sort_offset = sort_offset
145
+ @original_output_offset = output_offset
146
+ @original_sort_limit = sort_limit
147
+ @original_output_limit = output_limit
148
+
149
+ calculate_sort_offset!
150
+ calculate_output_offset!
151
+
152
+ # We have to calculate limit based on offset.
153
+ # <A, B = limited integer (0...MAXINT)>
154
+ # | sort limit | output limit | => | worker's sort limit | worker's output limit | final limit |
155
+ # ============================= ====================================================================
156
+ # | UNLIMITED | UNLIMITED | => | UNLIMITED | UNLIMITED | UNLIMITED |
157
+ # | UNLIMITED | B | => | final_offset + B | final_offset + B | B |
158
+ # | A | UNLIMITED | => | final_offset + A | final_offset + A | A |
159
+ # | A | B | => | final_offset + max(A, B) | final_offset + min(A, B)| min(A, B) |
160
+
161
+ # XXX final_limit and final_offset calculated in many times
162
+
163
+ @records_offset = final_offset
164
+ @records_limit = final_limit
165
+
166
+ if final_limit == UNLIMITED
167
+ @output["limit"] = UNLIMITED
168
+ else
169
+ if rich_sort?
170
+ @query["sortBy"]["limit"] = final_offset + [sort_limit, output_limit].max
171
+ end
172
+ @output["limit"] = final_offset + final_limit
173
+ end
174
+ end
175
+
176
+ def calculate_sort_offset!
177
+ # Offset for workers must be zero, because we have to apply "limit" and
178
+ # "offset" on the last gathering phase instead of each reducing phase.
179
+ if rich_sort?
180
+ @query["sortBy"]["offset"] = 0
181
+ end
182
+ end
183
+
184
+ def sort_offset
185
+ if rich_sort?
186
+ @query["sortBy"]["offset"] || 0
187
+ else
188
+ 0
189
+ end
190
+ end
191
+
192
+ def output_offset
193
+ @output["offset"] || 0
194
+ end
195
+
196
+ def sort_limit
197
+ if rich_sort?
198
+ @query["sortBy"]["limit"] || UNLIMITED
199
+ else
200
+ UNLIMITED
201
+ end
202
+ end
203
+
204
+ def output_limit
205
+ @output["limit"] || 0
206
+ end
207
+
208
+ def calculate_output_offset!
209
+ @output["offset"] = 0 if have_records?
210
+ end
211
+
212
+ def final_offset
213
+ @original_sort_offset + @original_output_offset
214
+ end
215
+
216
+ def final_limit
217
+ if @original_sort_limit == UNLIMITED && @original_output_limit == UNLIMITED
218
+ UNLIMITED
219
+ else
220
+ if @original_sort_limit == UNLIMITED
221
+ @original_output_limit
222
+ elsif @original_output_limit == UNLIMITED
223
+ @original_sort_limit
224
+ else
225
+ [@original_sort_limit, @original_output_limit].min
226
+ end
227
+ end
228
+ end
229
+
230
+ def have_records?
231
+ @output["elements"].include?("records")
232
+ end
233
+
234
+ def rich_sort?
235
+ @query["sortBy"].is_a?(Hash)
236
+ end
237
+
238
+ def unifiable?
239
+ @output["unifiable"]
240
+ end
241
+
242
+ def build_count_mapper_and_reducer!
243
+ return unless @output["elements"].include?("count")
244
+
245
+ @reducers["count"] = {
246
+ "type" => "sum",
247
+ }
248
+ if unifiable?
249
+ @query["sortBy"]["limit"] = -1 if @query["sortBy"].is_a?(Hash)
250
+ @output["limit"] = -1
251
+ mapper = {
252
+ "type" => "count",
253
+ "target" => "records",
254
+ }
255
+ unless @output["elements"].include?("records")
256
+ @records_limit = -1
257
+ @output["elements"] << "records"
258
+ @output["attributes"] ||= ["_key"]
259
+ @output_records = false
260
+ end
261
+ @mappers["count"] = mapper
262
+ end
263
+ end
264
+
265
+ def build_records_mapper_and_reducer!
266
+ # Skip reducing phase for a result with no record output.
267
+ return if !@output["elements"].include?("records") || @records_limit.zero?
268
+
269
+ # Append sort key attributes to the list of output attributes
270
+ # temporarily, for the reducing phase. After all extra columns
271
+ # are removed on the gathering phase.
272
+ final_attributes = output_attribute_names
273
+ update_output_attributes!
274
+
275
+ @reducers["records"] = build_records_reducer
276
+
277
+ mapper = {
278
+ "type" => "sort",
279
+ "offset" => @records_offset,
280
+ "limit" => @records_limit,
281
+ "format" => @records_format,
282
+ "attributes" => final_attributes,
283
+ }
284
+ mapper["no_output"] = true unless @output_records
285
+ @mappers["records"] = mapper
286
+ end
287
+
288
+ def output_attribute_names
289
+ attributes = @output["attributes"] || []
290
+ if attributes.is_a?(Hash)
291
+ attributes.keys
292
+ else
293
+ attributes.collect do |attribute|
294
+ if attribute.is_a?(Hash)
295
+ attribute["label"] || attribute["source"]
296
+ else
297
+ attribute
298
+ end
299
+ end
300
+ end
301
+ end
302
+
303
+ def update_output_attributes!
304
+ @output["attributes"] = array_style_attributes
305
+ @output["attributes"] += sort_attribute_names
306
+ if unifiable? && !@output["attributes"].include?("_key")
307
+ @output["attributes"] << "_key"
308
+ end
309
+ end
310
+
311
+ def array_style_attributes
312
+ attributes = @output["attributes"] || []
313
+ if attributes.is_a?(Hash)
314
+ attributes.keys.collect do |key|
315
+ attribute = attributes[key]
316
+ case attribute
317
+ when String
318
+ {
319
+ "label" => key,
320
+ "source" => attribute,
321
+ }
322
+ when Hash
323
+ attribute["label"] = key
324
+ attribute
325
+ end
326
+ end
327
+ else
328
+ attributes
329
+ end
330
+ end
331
+
332
+ def source_column_names
333
+ attributes = @output["attributes"] || []
334
+ if attributes.is_a?(Hash)
335
+ attributes_hash = attributes
336
+ attributes = []
337
+ attributes_hash.each do |key, attribute|
338
+ attributes << attribute["source"] || key
339
+ end
340
+ attributes
341
+ else
342
+ attributes.collect do |attribute|
343
+ if attribute.is_a?(Hash)
344
+ attribute["source"] || attribute["label"]
345
+ else
346
+ attribute
347
+ end
348
+ end
349
+ end
350
+ end
351
+
352
+ def sort_attribute_names
353
+ sort_attributes = @sort_keys.collect do |key|
354
+ key = key[1..-1] if key[0] == "-"
355
+ key
356
+ end
357
+ attributes = source_column_names
358
+ sort_attributes.reject! do |attribute|
359
+ attributes.include?(attribute)
360
+ end
361
+ sort_attributes
362
+ end
363
+
364
+ ASCENDING_OPERATOR = "<".freeze
365
+ DESCENDING_OPERATOR = ">".freeze
366
+
367
+ def build_records_reducer
368
+ attributes = @output["attributes"]
369
+ key_column_index = attributes.index("_key")
370
+
371
+ operators = @sort_keys.collect do |sort_key|
372
+ operator = ASCENDING_OPERATOR
373
+ if sort_key[0] == "-"
374
+ operator = DESCENDING_OPERATOR
375
+ sort_key = sort_key[1..-1]
376
+ end
377
+ {
378
+ "operator" => operator,
379
+ "column" => attributes.index(sort_key),
380
+ }
381
+ end
382
+
383
+ reducer = {
384
+ "type" => "sort",
385
+ "operators" => operators,
386
+ }
387
+ if unifiable? && !key_column_index.nil?
388
+ reducer["key_column"] = key_column_index
389
+ end
390
+
391
+ # On the reducing phase, we apply only "limit". We cannot apply
392
+ # "offset" on this phase because the collector merges a pair of
393
+ # results step by step even if there are three or more results.
394
+ # Instead, we apply "offset" on the gathering phase.
395
+ reducer["limit"] = @output["limit"]
396
+
397
+ reducer
398
+ end
399
+ end
400
+ end
401
+ end