mm_es_search 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. data/.gitignore +4 -0
  2. data/.project +18 -0
  3. data/Gemfile +4 -0
  4. data/Rakefile +1 -0
  5. data/lib/mm_es_search/api/facet/abstract_facet.rb +28 -0
  6. data/lib/mm_es_search/api/facet/date_histogram_facet.rb +11 -0
  7. data/lib/mm_es_search/api/facet/filter_facet.rb +9 -0
  8. data/lib/mm_es_search/api/facet/geo_distance_facet.rb +9 -0
  9. data/lib/mm_es_search/api/facet/histogram_facet.rb +9 -0
  10. data/lib/mm_es_search/api/facet/query_facet.rb +9 -0
  11. data/lib/mm_es_search/api/facet/range_facet.rb +36 -0
  12. data/lib/mm_es_search/api/facet/range_facet_row.rb +97 -0
  13. data/lib/mm_es_search/api/facet/range_item.rb +17 -0
  14. data/lib/mm_es_search/api/facet/statistical_facet.rb +33 -0
  15. data/lib/mm_es_search/api/facet/statistical_facet_result.rb +36 -0
  16. data/lib/mm_es_search/api/facet/terms_facet.rb +62 -0
  17. data/lib/mm_es_search/api/facet/terms_facet_row.rb +35 -0
  18. data/lib/mm_es_search/api/facet/terms_stats_facet.rb +9 -0
  19. data/lib/mm_es_search/api/highlight/result_highlight.rb +40 -0
  20. data/lib/mm_es_search/api/query/abstract_filter.rb +15 -0
  21. data/lib/mm_es_search/api/query/abstract_query.rb +48 -0
  22. data/lib/mm_es_search/api/query/and_filter.rb +9 -0
  23. data/lib/mm_es_search/api/query/bool_filter.rb +11 -0
  24. data/lib/mm_es_search/api/query/bool_query.rb +67 -0
  25. data/lib/mm_es_search/api/query/constant_score_query.rb +31 -0
  26. data/lib/mm_es_search/api/query/custom_filters_score_query.rb +52 -0
  27. data/lib/mm_es_search/api/query/custom_score_query.rb +31 -0
  28. data/lib/mm_es_search/api/query/dismax_query.rb +29 -0
  29. data/lib/mm_es_search/api/query/filtered_query.rb +30 -0
  30. data/lib/mm_es_search/api/query/has_child_filter.rb +11 -0
  31. data/lib/mm_es_search/api/query/has_child_query.rb +25 -0
  32. data/lib/mm_es_search/api/query/has_parent_filter.rb +11 -0
  33. data/lib/mm_es_search/api/query/has_parent_query.rb +25 -0
  34. data/lib/mm_es_search/api/query/match_all_filter.rb +11 -0
  35. data/lib/mm_es_search/api/query/match_all_query.rb +19 -0
  36. data/lib/mm_es_search/api/query/nested_filter.rb +22 -0
  37. data/lib/mm_es_search/api/query/nested_query.rb +62 -0
  38. data/lib/mm_es_search/api/query/not_filter.rb +9 -0
  39. data/lib/mm_es_search/api/query/or_filter.rb +9 -0
  40. data/lib/mm_es_search/api/query/prefix_filter.rb +11 -0
  41. data/lib/mm_es_search/api/query/prefix_query.rb +34 -0
  42. data/lib/mm_es_search/api/query/query_filter.rb +28 -0
  43. data/lib/mm_es_search/api/query/query_string_query.rb +37 -0
  44. data/lib/mm_es_search/api/query/range_filter.rb +11 -0
  45. data/lib/mm_es_search/api/query/range_query.rb +57 -0
  46. data/lib/mm_es_search/api/query/scored_filter.rb +29 -0
  47. data/lib/mm_es_search/api/query/single_bool_filter.rb +66 -0
  48. data/lib/mm_es_search/api/query/term_filter.rb +11 -0
  49. data/lib/mm_es_search/api/query/term_query.rb +34 -0
  50. data/lib/mm_es_search/api/query/terms_filter.rb +11 -0
  51. data/lib/mm_es_search/api/query/terms_query.rb +58 -0
  52. data/lib/mm_es_search/api/query/text_query.rb +42 -0
  53. data/lib/mm_es_search/api/query/top_children_query.rb +28 -0
  54. data/lib/mm_es_search/api/sort/root_sort.rb +36 -0
  55. data/lib/mm_es_search/models/abstract_facet_model.rb +23 -0
  56. data/lib/mm_es_search/models/abstract_query_model.rb +21 -0
  57. data/lib/mm_es_search/models/abstract_range_facet_model.rb +365 -0
  58. data/lib/mm_es_search/models/abstract_search_model.OLD +538 -0
  59. data/lib/mm_es_search/models/abstract_search_model.rb +521 -0
  60. data/lib/mm_es_search/models/abstract_sort_model.rb +13 -0
  61. data/lib/mm_es_search/models/abstract_terms_facet_model.rb +87 -0
  62. data/lib/mm_es_search/models/root_sort_model.rb +20 -0
  63. data/lib/mm_es_search/models/virtual_field_sort.rb +52 -0
  64. data/lib/mm_es_search/utils/facet_row_utils.rb +86 -0
  65. data/lib/mm_es_search/utils/search_logger.rb +10 -0
  66. data/lib/mm_es_search/version.rb +3 -0
  67. data/lib/mm_es_search.rb +124 -0
  68. data/mm_es_search.gemspec +24 -0
  69. metadata +132 -0
@@ -0,0 +1,11 @@
1
+ module MmEsSearch
2
+ module Api
3
+ module Query
4
+
5
+ class TermsFilter < TermsQuery
6
+ plugin AbstractFilter
7
+ end
8
+
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,58 @@
1
+ module MmEsSearch
2
+ module Api
3
+ module Query
4
+
5
+ class TermsQuery < AbstractQuery
6
+
7
+ key :field, String
8
+ key :path, String
9
+
10
+ key :terms, Array
11
+ key :boost, Float
12
+
13
+ key :execution, Symbol
14
+
15
+ key :minimum_match, Integer
16
+
17
+ def get_object_value(obj,path,field)
18
+ method_arg_array = [path,field].join('.').split('.').map {|m| m =~ /^\d+$/ ? [:slice, m.to_i] : [m] }
19
+ method_arg_array.inject(obj) {|obj, method_and_args| obj.send(*method_and_args)}
20
+ end
21
+
22
+ def run_analyzer(obj,path,field)
23
+ #this just splits on whitespace, but we could have an instance variable store a reference to a more complex analyzer
24
+ val = get_object_value(obj,path,field)
25
+ case val
26
+ when String
27
+ val.split
28
+ when Array
29
+ val.flatten.join(' ').split
30
+ end
31
+ end
32
+
33
+ def to_object_query
34
+ return ->(obj){ (run_analyzer(obj,path,field) & terms).any? }
35
+ end
36
+
37
+ def to_mongo_query(options = {})
38
+
39
+ if options[:negated]
40
+ {mongo_abs_field => {'$nin' => terms}}
41
+ else
42
+ {mongo_abs_field => {'$in' => terms}}
43
+ end
44
+ end
45
+
46
+ def to_es_query
47
+ terms_params = {es_abs_field => terms}
48
+ terms_params.merge!(:boost => boost) if boost?
49
+ terms_params.merge!(:minimum_match => minimum_match) if minimum_match?
50
+ terms_params.merge!(:execution => execution) if execution?
51
+ return {:terms => terms_params}
52
+ end
53
+
54
+ end
55
+
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,42 @@
1
+ module MmEsSearch
2
+ module Api
3
+ module Query
4
+
5
+ class TextQuery < AbstractQuery
6
+
7
+ key :field, String
8
+ key :path, String
9
+
10
+ key :query, String
11
+ key :operator, String
12
+ key :analyzer, String
13
+
14
+ key :fuzziness, Float
15
+ key :prefix_length, Integer
16
+ key :max_expansions, Integer
17
+
18
+ key :type, String#, :default => "phrase_prefix"
19
+ key :slop, Integer
20
+ key :boost, Float
21
+
22
+ def to_mongo_query(options = {})
23
+
24
+ raise "TextQuery doesn't support mongo execution"
25
+
26
+ end
27
+
28
+ def to_es_query
29
+
30
+ params = self.attributes.except("_type", "field", "path")
31
+ field = self.field
32
+ field = "#{path}.#{field}" if path
33
+
34
+ return {:text => {field => params}}
35
+
36
+ end
37
+
38
+ end
39
+
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,28 @@
1
+ module MmEsSearch
2
+ module Api
3
+ module Query
4
+
5
+ class TopChildrenQuery < AbstractQuery
6
+
7
+ one :query, :class_name => 'MmEsSearch::Api::Query::AbstractQuery'
8
+ key :type, String
9
+ key :score, String
10
+ key :_scope, String
11
+ key :factor, Fixnum
12
+ key :incremental_factor, Fixnum
13
+
14
+ def to_mongo_query(options = {})
15
+
16
+ end
17
+
18
+ def to_es_query
19
+ query_params = attributes.except('query', '_type')
20
+ query_params[:query] = query.to_es_query
21
+ {:top_children => query_params}
22
+ end
23
+
24
+ end
25
+
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,36 @@
1
+ module MmEsSearch
2
+ module Api
3
+ module Sort
4
+
5
+ class RootSort
6
+
7
+ include MongoMapper::EmbeddedDocument
8
+ plugin MmUsesNoId
9
+
10
+ #only support one sort field as have no need for multi as yet...
11
+ key :field
12
+ key :direction
13
+
14
+ def to_mongo_query
15
+ case direction
16
+ when "asc", "ascending", nil
17
+ {field => :asc}
18
+ when "desc", "descending"
19
+ {field => :desc}
20
+ end
21
+ end
22
+
23
+ def to_es_query
24
+ case direction
25
+ when "asc", "ascending", nil
26
+ {field => {:order => :asc}}
27
+ when "desc", "descending"
28
+ {field => {:order => :desc}}
29
+ end
30
+ end
31
+
32
+ end
33
+
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,23 @@
1
+ module MmEsSearch
2
+ module Models
3
+
4
+ class AbstractFacetModel
5
+
6
+ include MmEsSearch::Api::Facet
7
+ include MmEsSearch::Api::Query
8
+ include MongoMapper::EmbeddedDocument
9
+ #plugin MmUsesNoId
10
+
11
+ key :required, Boolean
12
+
13
+ def self.prefix_label(object, label)
14
+ "#{object.object_id}_#{label}"
15
+ end
16
+
17
+ def prefix_label(label)
18
+ self.class.prefix_label(self, label)
19
+ end
20
+
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,21 @@
1
+ module MmEsSearch
2
+ module Models
3
+
4
+ class AbstractQueryModel
5
+
6
+ include MmEsSearch::Api::Query
7
+ include MongoMapper::EmbeddedDocument
8
+ plugin MmUsesNoId
9
+
10
+ def to_query
11
+
12
+ end
13
+
14
+ def to_filter
15
+
16
+ end
17
+
18
+ end
19
+
20
+ end
21
+ end
@@ -0,0 +1,365 @@
1
+ module MmEsSearch
2
+ module Models
3
+ module AbstractRangeFacetModel
4
+
5
+ extend ActiveSupport::Concern
6
+ include MmEsSearch::Api::Facet
7
+ include MmEsSearch::Api::Query
8
+
9
+ included do
10
+
11
+ TARGET_NUM_ROLES ||= 5
12
+ TIME_UNITS = [:year, :month, :day, :hour, :min, :sec].freeze
13
+ DEFAULT_TIME_PARAMS = [1970, 1, 1, 0, 0, 0].freeze
14
+
15
+ many :rows, :class_name => 'MmEsSearch::Api::Facet::RangeFacetRow'
16
+ one :stats, :class_name => 'MmEsSearch::Api::Facet::StatisticalFacetResult'
17
+ key :display_mode, String
18
+
19
+ aasm_initial_state -> facet do
20
+ if facet.valid?
21
+ if facet.rows.present?
22
+ :ready_for_display
23
+ else
24
+ :need_field_stats
25
+ end
26
+ else
27
+ :missing_required_fields
28
+ end
29
+ end
30
+
31
+ aasm_event :typed_facet_initialized do
32
+ transitions :to => :need_field_stats, :from => [:ready_for_initialization]
33
+ end
34
+
35
+ aasm_event :field_stats_set do
36
+ transitions :to => :need_row_data, :from => [:need_field_stats, :need_row_data]
37
+ end
38
+
39
+ aasm_event :prepare_for_new_data, :after => :prune_unchecked_rows do
40
+ transitions :to => :need_field_stats, :from => [:ready_for_display, :need_field_stats, :need_row_data]
41
+ end
42
+
43
+ end
44
+
45
+ module ClassMethods
46
+
47
+ def new(params = {})
48
+ new_instance = super(params)
49
+ new_instance.typed_facet_initialized
50
+ new_instance
51
+ end
52
+
53
+ end
54
+
55
+ def result_name
56
+ 'ranges'
57
+ end
58
+
59
+ def is_time?
60
+ false
61
+ end
62
+
63
+ def timezone_matters?
64
+ true
65
+ end
66
+
67
+ def row_class
68
+ RangeFacetRow
69
+ end
70
+
71
+ def build_field_stats(result)
72
+ self.stats = StatisticalFacetResult.new(result.except('_type'))
73
+ zero_rows
74
+ current_state
75
+ field_stats_set
76
+ end
77
+
78
+ def to_stats_facet
79
+ StatisticalFacet.new(
80
+ default_params.merge(
81
+ :label => prefix_label('field_stats_result'),
82
+ :facet_filter => facet_filter
83
+ )
84
+ )
85
+ end
86
+
87
+ # def facet_filter
88
+ # #override this to provide additional constraints
89
+ # nil
90
+ # end
91
+
92
+ def to_facet
93
+ case display_mode || select_display_mode
94
+ when "range"
95
+ initialize_rows
96
+ RangeFacet.new(
97
+ default_params.merge(
98
+ :label => prefix_label('display_result'),
99
+ :ranges => rows.map(&:to_range_item),
100
+ :facet_filter => facet_filter
101
+ )
102
+ )
103
+ when "histogram"
104
+ raise NotImplementedError
105
+ else
106
+ raise "display mode '#{display_mode}' is not recognised"
107
+ end
108
+ end
109
+
110
+ #NOTE: we use this pair of methods to transform between es and client-side units
111
+ # and the @transform_lookup ensures we avoid value creep through rounding errors
112
+ # in other words, if we ask for 100-200 in client-side units, we get that, not e.g. 101-201
113
+ # def deserialize_value(value)
114
+ # value
115
+ # end
116
+ #
117
+ # def serialize_value(value)
118
+ # value
119
+ # end
120
+
121
+ def initialize_rows
122
+ @transform_lookup = {}
123
+ if is_time?
124
+ initialize_time_rows
125
+ else
126
+ initialize_numeric_rows
127
+ end
128
+ end
129
+
130
+ def best_time_unit
131
+ diff = Time.diff(stats.min, stats.max)
132
+ if diff[:year].abs > 0 then :year
133
+ elsif diff[:month].abs > 0 then :month
134
+ elsif diff[:day].abs > 0 then :day
135
+ elsif diff[:hour].abs > 0 then :hour
136
+ elsif diff[:minute].abs > 0 then :min
137
+ elsif diff[:second].abs > 0 then :sec
138
+ else :year
139
+ end
140
+ end
141
+
142
+ def deserialize_value(key, es_value)
143
+
144
+ case key
145
+ when "total", "sum_of_squares", "variance"
146
+ return nil #i.e. discard these
147
+ end
148
+
149
+ if @transform_lookup and stored_val = @transform_lookup[es_value]
150
+ return stored_val
151
+ end
152
+
153
+ if is_time?
154
+
155
+ case key
156
+ when "from", "to", "min", "max", "mean"
157
+
158
+ if timezone_matters?
159
+ case es_value
160
+ when Numeric
161
+ Time.zone.at(es_value/1000)
162
+ when String
163
+ Time.zone.parse es_value
164
+ end
165
+ else
166
+ t = case es_value
167
+ when Numeric
168
+ Time.at(es_value/1000)
169
+ when String
170
+ Time.parse es_value
171
+ end
172
+ Time.zone.local(*t.to_a[0..5].reverse)
173
+ end
174
+
175
+ when "std_deviation"
176
+ es_value/1000
177
+
178
+ when "count"
179
+ es_value
180
+
181
+ end.tap { |out| binding.pry if out.is_a?(Time) and out.year == 1819 }
182
+
183
+
184
+ else
185
+ es_value
186
+ end
187
+
188
+ end
189
+
190
+ def serialize_value(value, for_filter = false)
191
+ es_value = if is_time?
192
+ t = if timezone_matters?
193
+ value.utc
194
+ else
195
+ Time.utc(*value.to_a[0..5].reverse)
196
+ end
197
+ for_filter ? t.iso8601 : t.to_f*1000
198
+ else
199
+ value
200
+ end
201
+ @transform_lookup ||= {}
202
+ @transform_lookup[es_value] = value
203
+ es_value
204
+ end
205
+
206
+ def handle_field_stats_result(result)
207
+ build_field_stats transform_field_stats(result)
208
+ end
209
+
210
+ def transform_field_stats(result)
211
+ result.each_with_object({}) do |(key, value), output|
212
+ output[key] = deserialize_value(key, value)
213
+ end
214
+ end
215
+
216
+ def get_time_stats(time_unit)
217
+ min, max, mean = [:min, :max, :mean].map do |stat|
218
+ stats.send(stat).send time_unit
219
+ end
220
+ long_time_unit = case time_unit
221
+ when :sec then :second
222
+ when :min then :minute
223
+ else time_unit
224
+ end
225
+ sd = stats.std_deviation/(1.send long_time_unit) #e.g. 1.year or 1.month
226
+ return min, max, mean, sd
227
+ end
228
+
229
+ def initialize_time_rows
230
+ time_unit = best_time_unit
231
+ min, max, mean, sd = get_time_stats(time_unit)
232
+ range_vals = calculate_range_values(min, max, mean, sd)
233
+ unit_index = TIME_UNITS.index(time_unit)
234
+ row_times = range_vals.map do |val|
235
+ time_params = DEFAULT_TIME_PARAMS.dup
236
+ if unit_index > 0 #e.g. if all same year, copy year across from stats.mean
237
+ mean_array = stats.mean.to_a[0..5].reverse
238
+ time_params[0..unit_index-1] = mean_array[0..unit_index-1]
239
+ end
240
+ time_params[unit_index] = val
241
+ Time.zone.local(*time_params)
242
+ end
243
+ es_times = row_times.map do |row_time|
244
+ serialize_value(row_time).tap do |es_time|
245
+ @transform_lookup[es_time] = row_time
246
+ end
247
+ end
248
+ build_rows(es_times)
249
+ end
250
+
251
+ def calculate_range_values(min, max, mean, sd)
252
+
253
+ #TODO come up with a system that works with decimal values
254
+ # AND when the diff between lower and upper is very small or zero
255
+
256
+ final_casting = if min.is_a?(Float) or max.is_a?(Float)
257
+ :to_f
258
+ else
259
+ :to_i
260
+ end
261
+
262
+ # orig_range = max - min
263
+ # if orig_range < 50 and not orig_range.zero?
264
+ # scale = (50/orig_range.floor)
265
+ # scale = round_to_power_of_ten(scale, :up, [(scale.to_s.length - 1), 1].max)
266
+ # min, max, mean, sd = [min, max, mean, sd].map { |v| v*scale }
267
+ # else
268
+ # scale = nil
269
+ # end
270
+
271
+ orig_lower = [min.floor, (mean - sd).floor].max
272
+ orig_upper = [max.ceil, (mean + sd).ceil].min
273
+ orig_range = orig_upper - orig_lower
274
+
275
+ if orig_range < 100 and not orig_range.zero?
276
+ #binding.pry
277
+ scale = (100/orig_range.floor)
278
+ scale = round_to_power_of_ten(scale, :up, [(scale.to_s.length - 1), 1].max)
279
+ lower, upper, range = [orig_lower, orig_upper, orig_range].map { |v| v*scale }
280
+ else
281
+ lower, upper, range = orig_lower, orig_upper, orig_range
282
+ scale = nil
283
+ end
284
+
285
+ power = if range.zero?
286
+ lower.to_s.length - 1
287
+ else
288
+ range.to_s.length - 1
289
+ end
290
+
291
+ lower = round_to_power_of_ten(lower, :down, power)
292
+ upper = round_to_power_of_ten(upper, :up, power)
293
+ orig_inc = ((upper - lower) / (TARGET_NUM_ROLES-2)).floor
294
+
295
+ #binding.pry
296
+
297
+ inc = round_to_power_of_ten(orig_inc, :up, power)
298
+
299
+ if inc == 0
300
+ values = [lower - 5, lower + 5] #TODO remove this once pruning support is added
301
+ #binding.pry
302
+ else
303
+ values = (TARGET_NUM_ROLES-1).times.map { |i| lower + inc*i }
304
+ if (gte_upper = values.select { |n| n >= upper }).length > 1
305
+ values = values[0..-gte_upper.length]
306
+ end
307
+ end
308
+
309
+ values.map!(&final_casting)
310
+ if scale
311
+ values.map! { |v| v/scale }
312
+ end
313
+
314
+ #binding.pry if orig_range == 0
315
+ puts "values are: #{values}"
316
+
317
+ values
318
+
319
+ end
320
+
321
+ def initialize_numeric_rows
322
+ values = calculate_range_values(stats.min, stats.max, stats.mean, stats.std_deviation)
323
+ build_rows(values)
324
+ end
325
+
326
+ def build_rows(values)
327
+ self.rows = checked_rows #NOTE: we preserve selected rows
328
+ if (num_values = values.length) == 2
329
+ rows << RangeFacetRow.new(:from => values.first, :to => values.last)
330
+ else
331
+ rows << RangeFacetRow.new(:to => values.first) # -> 1 (if starting with 1,2,3,4)
332
+ (num_values - 1).times do
333
+ rows << RangeFacetRow.new(:from => values.shift, :to => values.first) # 1 -> 2, 2 -> 3, 3 -> 4
334
+ end
335
+ rows << RangeFacetRow.new(:from => values.first) if num_values > 1 # 4 ->
336
+ end
337
+ end
338
+
339
+ def round_to_power_of_ten(n, direction, power)
340
+ p = 10**power
341
+ return n if (n % p).zero?
342
+ case direction
343
+ when :up
344
+ n + (p - n % p)
345
+ when :down
346
+ n - n % p
347
+ end
348
+ end
349
+
350
+ def select_display_mode
351
+ #logic for setting mode based on stats
352
+ #TODO build this logic
353
+ if true
354
+ self.display_mode = "range"
355
+ end
356
+ end
357
+
358
+ def required_row_fields
359
+ ['from', 'to']
360
+ end
361
+
362
+
363
+ end
364
+ end
365
+ end