mm_es_search 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (69) hide show
  1. data/.gitignore +4 -0
  2. data/.project +18 -0
  3. data/Gemfile +4 -0
  4. data/Rakefile +1 -0
  5. data/lib/mm_es_search/api/facet/abstract_facet.rb +28 -0
  6. data/lib/mm_es_search/api/facet/date_histogram_facet.rb +11 -0
  7. data/lib/mm_es_search/api/facet/filter_facet.rb +9 -0
  8. data/lib/mm_es_search/api/facet/geo_distance_facet.rb +9 -0
  9. data/lib/mm_es_search/api/facet/histogram_facet.rb +9 -0
  10. data/lib/mm_es_search/api/facet/query_facet.rb +9 -0
  11. data/lib/mm_es_search/api/facet/range_facet.rb +36 -0
  12. data/lib/mm_es_search/api/facet/range_facet_row.rb +97 -0
  13. data/lib/mm_es_search/api/facet/range_item.rb +17 -0
  14. data/lib/mm_es_search/api/facet/statistical_facet.rb +33 -0
  15. data/lib/mm_es_search/api/facet/statistical_facet_result.rb +36 -0
  16. data/lib/mm_es_search/api/facet/terms_facet.rb +62 -0
  17. data/lib/mm_es_search/api/facet/terms_facet_row.rb +35 -0
  18. data/lib/mm_es_search/api/facet/terms_stats_facet.rb +9 -0
  19. data/lib/mm_es_search/api/highlight/result_highlight.rb +40 -0
  20. data/lib/mm_es_search/api/query/abstract_filter.rb +15 -0
  21. data/lib/mm_es_search/api/query/abstract_query.rb +48 -0
  22. data/lib/mm_es_search/api/query/and_filter.rb +9 -0
  23. data/lib/mm_es_search/api/query/bool_filter.rb +11 -0
  24. data/lib/mm_es_search/api/query/bool_query.rb +67 -0
  25. data/lib/mm_es_search/api/query/constant_score_query.rb +31 -0
  26. data/lib/mm_es_search/api/query/custom_filters_score_query.rb +52 -0
  27. data/lib/mm_es_search/api/query/custom_score_query.rb +31 -0
  28. data/lib/mm_es_search/api/query/dismax_query.rb +29 -0
  29. data/lib/mm_es_search/api/query/filtered_query.rb +30 -0
  30. data/lib/mm_es_search/api/query/has_child_filter.rb +11 -0
  31. data/lib/mm_es_search/api/query/has_child_query.rb +25 -0
  32. data/lib/mm_es_search/api/query/has_parent_filter.rb +11 -0
  33. data/lib/mm_es_search/api/query/has_parent_query.rb +25 -0
  34. data/lib/mm_es_search/api/query/match_all_filter.rb +11 -0
  35. data/lib/mm_es_search/api/query/match_all_query.rb +19 -0
  36. data/lib/mm_es_search/api/query/nested_filter.rb +22 -0
  37. data/lib/mm_es_search/api/query/nested_query.rb +62 -0
  38. data/lib/mm_es_search/api/query/not_filter.rb +9 -0
  39. data/lib/mm_es_search/api/query/or_filter.rb +9 -0
  40. data/lib/mm_es_search/api/query/prefix_filter.rb +11 -0
  41. data/lib/mm_es_search/api/query/prefix_query.rb +34 -0
  42. data/lib/mm_es_search/api/query/query_filter.rb +28 -0
  43. data/lib/mm_es_search/api/query/query_string_query.rb +37 -0
  44. data/lib/mm_es_search/api/query/range_filter.rb +11 -0
  45. data/lib/mm_es_search/api/query/range_query.rb +57 -0
  46. data/lib/mm_es_search/api/query/scored_filter.rb +29 -0
  47. data/lib/mm_es_search/api/query/single_bool_filter.rb +66 -0
  48. data/lib/mm_es_search/api/query/term_filter.rb +11 -0
  49. data/lib/mm_es_search/api/query/term_query.rb +34 -0
  50. data/lib/mm_es_search/api/query/terms_filter.rb +11 -0
  51. data/lib/mm_es_search/api/query/terms_query.rb +58 -0
  52. data/lib/mm_es_search/api/query/text_query.rb +42 -0
  53. data/lib/mm_es_search/api/query/top_children_query.rb +28 -0
  54. data/lib/mm_es_search/api/sort/root_sort.rb +36 -0
  55. data/lib/mm_es_search/models/abstract_facet_model.rb +23 -0
  56. data/lib/mm_es_search/models/abstract_query_model.rb +21 -0
  57. data/lib/mm_es_search/models/abstract_range_facet_model.rb +365 -0
  58. data/lib/mm_es_search/models/abstract_search_model.OLD +538 -0
  59. data/lib/mm_es_search/models/abstract_search_model.rb +521 -0
  60. data/lib/mm_es_search/models/abstract_sort_model.rb +13 -0
  61. data/lib/mm_es_search/models/abstract_terms_facet_model.rb +87 -0
  62. data/lib/mm_es_search/models/root_sort_model.rb +20 -0
  63. data/lib/mm_es_search/models/virtual_field_sort.rb +52 -0
  64. data/lib/mm_es_search/utils/facet_row_utils.rb +86 -0
  65. data/lib/mm_es_search/utils/search_logger.rb +10 -0
  66. data/lib/mm_es_search/version.rb +3 -0
  67. data/lib/mm_es_search.rb +124 -0
  68. data/mm_es_search.gemspec +24 -0
  69. metadata +132 -0
@@ -0,0 +1,11 @@
1
+ module MmEsSearch
2
+ module Api
3
+ module Query
4
+
5
+ class TermsFilter < TermsQuery
6
+ plugin AbstractFilter
7
+ end
8
+
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,58 @@
1
+ module MmEsSearch
2
+ module Api
3
+ module Query
4
+
5
+ class TermsQuery < AbstractQuery
6
+
7
+ key :field, String
8
+ key :path, String
9
+
10
+ key :terms, Array
11
+ key :boost, Float
12
+
13
+ key :execution, Symbol
14
+
15
+ key :minimum_match, Integer
16
+
17
+ def get_object_value(obj,path,field)
18
+ method_arg_array = [path,field].join('.').split('.').map {|m| m =~ /^\d+$/ ? [:slice, m.to_i] : [m] }
19
+ method_arg_array.inject(obj) {|obj, method_and_args| obj.send(*method_and_args)}
20
+ end
21
+
22
+ def run_analyzer(obj,path,field)
23
+ #this just splits on whitespace, but we could have an instance variable store a reference to a more complex analyzer
24
+ val = get_object_value(obj,path,field)
25
+ case val
26
+ when String
27
+ val.split
28
+ when Array
29
+ val.flatten.join(' ').split
30
+ end
31
+ end
32
+
33
+ def to_object_query
34
+ return ->(obj){ (run_analyzer(obj,path,field) & terms).any? }
35
+ end
36
+
37
+ def to_mongo_query(options = {})
38
+
39
+ if options[:negated]
40
+ {mongo_abs_field => {'$nin' => terms}}
41
+ else
42
+ {mongo_abs_field => {'$in' => terms}}
43
+ end
44
+ end
45
+
46
+ def to_es_query
47
+ terms_params = {es_abs_field => terms}
48
+ terms_params.merge!(:boost => boost) if boost?
49
+ terms_params.merge!(:minimum_match => minimum_match) if minimum_match?
50
+ terms_params.merge!(:execution => execution) if execution?
51
+ return {:terms => terms_params}
52
+ end
53
+
54
+ end
55
+
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,42 @@
1
+ module MmEsSearch
2
+ module Api
3
+ module Query
4
+
5
+ class TextQuery < AbstractQuery
6
+
7
+ key :field, String
8
+ key :path, String
9
+
10
+ key :query, String
11
+ key :operator, String
12
+ key :analyzer, String
13
+
14
+ key :fuzziness, Float
15
+ key :prefix_length, Integer
16
+ key :max_expansions, Integer
17
+
18
+ key :type, String#, :default => "phrase_prefix"
19
+ key :slop, Integer
20
+ key :boost, Float
21
+
22
+ def to_mongo_query(options = {})
23
+
24
+ raise "TextQuery doesn't support mongo execution"
25
+
26
+ end
27
+
28
+ def to_es_query
29
+
30
+ params = self.attributes.except("_type", "field", "path")
31
+ field = self.field
32
+ field = "#{path}.#{field}" if path
33
+
34
+ return {:text => {field => params}}
35
+
36
+ end
37
+
38
+ end
39
+
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,28 @@
1
+ module MmEsSearch
2
+ module Api
3
+ module Query
4
+
5
+ class TopChildrenQuery < AbstractQuery
6
+
7
+ one :query, :class_name => 'MmEsSearch::Api::Query::AbstractQuery'
8
+ key :type, String
9
+ key :score, String
10
+ key :_scope, String
11
+ key :factor, Fixnum
12
+ key :incremental_factor, Fixnum
13
+
14
+ def to_mongo_query(options = {})
15
+
16
+ end
17
+
18
+ def to_es_query
19
+ query_params = attributes.except('query', '_type')
20
+ query_params[:query] = query.to_es_query
21
+ {:top_children => query_params}
22
+ end
23
+
24
+ end
25
+
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,36 @@
1
+ module MmEsSearch
2
+ module Api
3
+ module Sort
4
+
5
+ class RootSort
6
+
7
+ include MongoMapper::EmbeddedDocument
8
+ plugin MmUsesNoId
9
+
10
+ #only support one sort field as have no need for multi as yet...
11
+ key :field
12
+ key :direction
13
+
14
+ def to_mongo_query
15
+ case direction
16
+ when "asc", "ascending", nil
17
+ {field => :asc}
18
+ when "desc", "descending"
19
+ {field => :desc}
20
+ end
21
+ end
22
+
23
+ def to_es_query
24
+ case direction
25
+ when "asc", "ascending", nil
26
+ {field => {:order => :asc}}
27
+ when "desc", "descending"
28
+ {field => {:order => :desc}}
29
+ end
30
+ end
31
+
32
+ end
33
+
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,23 @@
1
+ module MmEsSearch
2
+ module Models
3
+
4
+ class AbstractFacetModel
5
+
6
+ include MmEsSearch::Api::Facet
7
+ include MmEsSearch::Api::Query
8
+ include MongoMapper::EmbeddedDocument
9
+ #plugin MmUsesNoId
10
+
11
+ key :required, Boolean
12
+
13
+ def self.prefix_label(object, label)
14
+ "#{object.object_id}_#{label}"
15
+ end
16
+
17
+ def prefix_label(label)
18
+ self.class.prefix_label(self, label)
19
+ end
20
+
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,21 @@
1
+ module MmEsSearch
2
+ module Models
3
+
4
+ class AbstractQueryModel
5
+
6
+ include MmEsSearch::Api::Query
7
+ include MongoMapper::EmbeddedDocument
8
+ plugin MmUsesNoId
9
+
10
+ def to_query
11
+
12
+ end
13
+
14
+ def to_filter
15
+
16
+ end
17
+
18
+ end
19
+
20
+ end
21
+ end
@@ -0,0 +1,365 @@
1
+ module MmEsSearch
2
+ module Models
3
+ module AbstractRangeFacetModel
4
+
5
+ extend ActiveSupport::Concern
6
+ include MmEsSearch::Api::Facet
7
+ include MmEsSearch::Api::Query
8
+
9
+ included do
10
+
11
+ TARGET_NUM_ROLES ||= 5
12
+ TIME_UNITS = [:year, :month, :day, :hour, :min, :sec].freeze
13
+ DEFAULT_TIME_PARAMS = [1970, 1, 1, 0, 0, 0].freeze
14
+
15
+ many :rows, :class_name => 'MmEsSearch::Api::Facet::RangeFacetRow'
16
+ one :stats, :class_name => 'MmEsSearch::Api::Facet::StatisticalFacetResult'
17
+ key :display_mode, String
18
+
19
+ aasm_initial_state -> facet do
20
+ if facet.valid?
21
+ if facet.rows.present?
22
+ :ready_for_display
23
+ else
24
+ :need_field_stats
25
+ end
26
+ else
27
+ :missing_required_fields
28
+ end
29
+ end
30
+
31
+ aasm_event :typed_facet_initialized do
32
+ transitions :to => :need_field_stats, :from => [:ready_for_initialization]
33
+ end
34
+
35
+ aasm_event :field_stats_set do
36
+ transitions :to => :need_row_data, :from => [:need_field_stats, :need_row_data]
37
+ end
38
+
39
+ aasm_event :prepare_for_new_data, :after => :prune_unchecked_rows do
40
+ transitions :to => :need_field_stats, :from => [:ready_for_display, :need_field_stats, :need_row_data]
41
+ end
42
+
43
+ end
44
+
45
+ module ClassMethods
46
+
47
+ def new(params = {})
48
+ new_instance = super(params)
49
+ new_instance.typed_facet_initialized
50
+ new_instance
51
+ end
52
+
53
+ end
54
+
55
+ def result_name
56
+ 'ranges'
57
+ end
58
+
59
+ def is_time?
60
+ false
61
+ end
62
+
63
+ def timezone_matters?
64
+ true
65
+ end
66
+
67
+ def row_class
68
+ RangeFacetRow
69
+ end
70
+
71
+ def build_field_stats(result)
72
+ self.stats = StatisticalFacetResult.new(result.except('_type'))
73
+ zero_rows
74
+ current_state
75
+ field_stats_set
76
+ end
77
+
78
+ def to_stats_facet
79
+ StatisticalFacet.new(
80
+ default_params.merge(
81
+ :label => prefix_label('field_stats_result'),
82
+ :facet_filter => facet_filter
83
+ )
84
+ )
85
+ end
86
+
87
+ # def facet_filter
88
+ # #override this to provide additional constraints
89
+ # nil
90
+ # end
91
+
92
+ def to_facet
93
+ case display_mode || select_display_mode
94
+ when "range"
95
+ initialize_rows
96
+ RangeFacet.new(
97
+ default_params.merge(
98
+ :label => prefix_label('display_result'),
99
+ :ranges => rows.map(&:to_range_item),
100
+ :facet_filter => facet_filter
101
+ )
102
+ )
103
+ when "histogram"
104
+ raise NotImplementedError
105
+ else
106
+ raise "display mode '#{display_mode}' is not recognised"
107
+ end
108
+ end
109
+
110
+ #NOTE: we use this pair of methods to transform between es and client-side units
111
+ # and the @transform_lookup ensures we avoid value creep through rounding errors
112
+ # in other words, if we ask for 100-200 in client-side units, we get that, not e.g. 101-201
113
+ # def deserialize_value(value)
114
+ # value
115
+ # end
116
+ #
117
+ # def serialize_value(value)
118
+ # value
119
+ # end
120
+
121
+ def initialize_rows
122
+ @transform_lookup = {}
123
+ if is_time?
124
+ initialize_time_rows
125
+ else
126
+ initialize_numeric_rows
127
+ end
128
+ end
129
+
130
+ def best_time_unit
131
+ diff = Time.diff(stats.min, stats.max)
132
+ if diff[:year].abs > 0 then :year
133
+ elsif diff[:month].abs > 0 then :month
134
+ elsif diff[:day].abs > 0 then :day
135
+ elsif diff[:hour].abs > 0 then :hour
136
+ elsif diff[:minute].abs > 0 then :min
137
+ elsif diff[:second].abs > 0 then :sec
138
+ else :year
139
+ end
140
+ end
141
+
142
+ def deserialize_value(key, es_value)
143
+
144
+ case key
145
+ when "total", "sum_of_squares", "variance"
146
+ return nil #i.e. discard these
147
+ end
148
+
149
+ if @transform_lookup and stored_val = @transform_lookup[es_value]
150
+ return stored_val
151
+ end
152
+
153
+ if is_time?
154
+
155
+ case key
156
+ when "from", "to", "min", "max", "mean"
157
+
158
+ if timezone_matters?
159
+ case es_value
160
+ when Numeric
161
+ Time.zone.at(es_value/1000)
162
+ when String
163
+ Time.zone.parse es_value
164
+ end
165
+ else
166
+ t = case es_value
167
+ when Numeric
168
+ Time.at(es_value/1000)
169
+ when String
170
+ Time.parse es_value
171
+ end
172
+ Time.zone.local(*t.to_a[0..5].reverse)
173
+ end
174
+
175
+ when "std_deviation"
176
+ es_value/1000
177
+
178
+ when "count"
179
+ es_value
180
+
181
+ end.tap { |out| binding.pry if out.is_a?(Time) and out.year == 1819 }
182
+
183
+
184
+ else
185
+ es_value
186
+ end
187
+
188
+ end
189
+
190
+ def serialize_value(value, for_filter = false)
191
+ es_value = if is_time?
192
+ t = if timezone_matters?
193
+ value.utc
194
+ else
195
+ Time.utc(*value.to_a[0..5].reverse)
196
+ end
197
+ for_filter ? t.iso8601 : t.to_f*1000
198
+ else
199
+ value
200
+ end
201
+ @transform_lookup ||= {}
202
+ @transform_lookup[es_value] = value
203
+ es_value
204
+ end
205
+
206
+ def handle_field_stats_result(result)
207
+ build_field_stats transform_field_stats(result)
208
+ end
209
+
210
+ def transform_field_stats(result)
211
+ result.each_with_object({}) do |(key, value), output|
212
+ output[key] = deserialize_value(key, value)
213
+ end
214
+ end
215
+
216
+ def get_time_stats(time_unit)
217
+ min, max, mean = [:min, :max, :mean].map do |stat|
218
+ stats.send(stat).send time_unit
219
+ end
220
+ long_time_unit = case time_unit
221
+ when :sec then :second
222
+ when :min then :minute
223
+ else time_unit
224
+ end
225
+ sd = stats.std_deviation/(1.send long_time_unit) #e.g. 1.year or 1.month
226
+ return min, max, mean, sd
227
+ end
228
+
229
+ def initialize_time_rows
230
+ time_unit = best_time_unit
231
+ min, max, mean, sd = get_time_stats(time_unit)
232
+ range_vals = calculate_range_values(min, max, mean, sd)
233
+ unit_index = TIME_UNITS.index(time_unit)
234
+ row_times = range_vals.map do |val|
235
+ time_params = DEFAULT_TIME_PARAMS.dup
236
+ if unit_index > 0 #e.g. if all same year, copy year across from stats.mean
237
+ mean_array = stats.mean.to_a[0..5].reverse
238
+ time_params[0..unit_index-1] = mean_array[0..unit_index-1]
239
+ end
240
+ time_params[unit_index] = val
241
+ Time.zone.local(*time_params)
242
+ end
243
+ es_times = row_times.map do |row_time|
244
+ serialize_value(row_time).tap do |es_time|
245
+ @transform_lookup[es_time] = row_time
246
+ end
247
+ end
248
+ build_rows(es_times)
249
+ end
250
+
251
+ def calculate_range_values(min, max, mean, sd)
252
+
253
+ #TODO come up with a system that works with decimal values
254
+ # AND when the diff between lower and upper is very small or zero
255
+
256
+ final_casting = if min.is_a?(Float) or max.is_a?(Float)
257
+ :to_f
258
+ else
259
+ :to_i
260
+ end
261
+
262
+ # orig_range = max - min
263
+ # if orig_range < 50 and not orig_range.zero?
264
+ # scale = (50/orig_range.floor)
265
+ # scale = round_to_power_of_ten(scale, :up, [(scale.to_s.length - 1), 1].max)
266
+ # min, max, mean, sd = [min, max, mean, sd].map { |v| v*scale }
267
+ # else
268
+ # scale = nil
269
+ # end
270
+
271
+ orig_lower = [min.floor, (mean - sd).floor].max
272
+ orig_upper = [max.ceil, (mean + sd).ceil].min
273
+ orig_range = orig_upper - orig_lower
274
+
275
+ if orig_range < 100 and not orig_range.zero?
276
+ #binding.pry
277
+ scale = (100/orig_range.floor)
278
+ scale = round_to_power_of_ten(scale, :up, [(scale.to_s.length - 1), 1].max)
279
+ lower, upper, range = [orig_lower, orig_upper, orig_range].map { |v| v*scale }
280
+ else
281
+ lower, upper, range = orig_lower, orig_upper, orig_range
282
+ scale = nil
283
+ end
284
+
285
+ power = if range.zero?
286
+ lower.to_s.length - 1
287
+ else
288
+ range.to_s.length - 1
289
+ end
290
+
291
+ lower = round_to_power_of_ten(lower, :down, power)
292
+ upper = round_to_power_of_ten(upper, :up, power)
293
+ orig_inc = ((upper - lower) / (TARGET_NUM_ROLES-2)).floor
294
+
295
+ #binding.pry
296
+
297
+ inc = round_to_power_of_ten(orig_inc, :up, power)
298
+
299
+ if inc == 0
300
+ values = [lower - 5, lower + 5] #TODO remove this once pruning support is added
301
+ #binding.pry
302
+ else
303
+ values = (TARGET_NUM_ROLES-1).times.map { |i| lower + inc*i }
304
+ if (gte_upper = values.select { |n| n >= upper }).length > 1
305
+ values = values[0..-gte_upper.length]
306
+ end
307
+ end
308
+
309
+ values.map!(&final_casting)
310
+ if scale
311
+ values.map! { |v| v/scale }
312
+ end
313
+
314
+ #binding.pry if orig_range == 0
315
+ puts "values are: #{values}"
316
+
317
+ values
318
+
319
+ end
320
+
321
+ def initialize_numeric_rows
322
+ values = calculate_range_values(stats.min, stats.max, stats.mean, stats.std_deviation)
323
+ build_rows(values)
324
+ end
325
+
326
+ def build_rows(values)
327
+ self.rows = checked_rows #NOTE: we preserve selected rows
328
+ if (num_values = values.length) == 2
329
+ rows << RangeFacetRow.new(:from => values.first, :to => values.last)
330
+ else
331
+ rows << RangeFacetRow.new(:to => values.first) # -> 1 (if starting with 1,2,3,4)
332
+ (num_values - 1).times do
333
+ rows << RangeFacetRow.new(:from => values.shift, :to => values.first) # 1 -> 2, 2 -> 3, 3 -> 4
334
+ end
335
+ rows << RangeFacetRow.new(:from => values.first) if num_values > 1 # 4 ->
336
+ end
337
+ end
338
+
339
+ def round_to_power_of_ten(n, direction, power)
340
+ p = 10**power
341
+ return n if (n % p).zero?
342
+ case direction
343
+ when :up
344
+ n + (p - n % p)
345
+ when :down
346
+ n - n % p
347
+ end
348
+ end
349
+
350
+ def select_display_mode
351
+ #logic for setting mode based on stats
352
+ #TODO build this logic
353
+ if true
354
+ self.display_mode = "range"
355
+ end
356
+ end
357
+
358
+ def required_row_fields
359
+ ['from', 'to']
360
+ end
361
+
362
+
363
+ end
364
+ end
365
+ end