mm_es_search 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +4 -0
- data/.project +18 -0
- data/Gemfile +4 -0
- data/Rakefile +1 -0
- data/lib/mm_es_search/api/facet/abstract_facet.rb +28 -0
- data/lib/mm_es_search/api/facet/date_histogram_facet.rb +11 -0
- data/lib/mm_es_search/api/facet/filter_facet.rb +9 -0
- data/lib/mm_es_search/api/facet/geo_distance_facet.rb +9 -0
- data/lib/mm_es_search/api/facet/histogram_facet.rb +9 -0
- data/lib/mm_es_search/api/facet/query_facet.rb +9 -0
- data/lib/mm_es_search/api/facet/range_facet.rb +36 -0
- data/lib/mm_es_search/api/facet/range_facet_row.rb +97 -0
- data/lib/mm_es_search/api/facet/range_item.rb +17 -0
- data/lib/mm_es_search/api/facet/statistical_facet.rb +33 -0
- data/lib/mm_es_search/api/facet/statistical_facet_result.rb +36 -0
- data/lib/mm_es_search/api/facet/terms_facet.rb +62 -0
- data/lib/mm_es_search/api/facet/terms_facet_row.rb +35 -0
- data/lib/mm_es_search/api/facet/terms_stats_facet.rb +9 -0
- data/lib/mm_es_search/api/highlight/result_highlight.rb +40 -0
- data/lib/mm_es_search/api/query/abstract_filter.rb +15 -0
- data/lib/mm_es_search/api/query/abstract_query.rb +48 -0
- data/lib/mm_es_search/api/query/and_filter.rb +9 -0
- data/lib/mm_es_search/api/query/bool_filter.rb +11 -0
- data/lib/mm_es_search/api/query/bool_query.rb +67 -0
- data/lib/mm_es_search/api/query/constant_score_query.rb +31 -0
- data/lib/mm_es_search/api/query/custom_filters_score_query.rb +52 -0
- data/lib/mm_es_search/api/query/custom_score_query.rb +31 -0
- data/lib/mm_es_search/api/query/dismax_query.rb +29 -0
- data/lib/mm_es_search/api/query/filtered_query.rb +30 -0
- data/lib/mm_es_search/api/query/has_child_filter.rb +11 -0
- data/lib/mm_es_search/api/query/has_child_query.rb +25 -0
- data/lib/mm_es_search/api/query/has_parent_filter.rb +11 -0
- data/lib/mm_es_search/api/query/has_parent_query.rb +25 -0
- data/lib/mm_es_search/api/query/match_all_filter.rb +11 -0
- data/lib/mm_es_search/api/query/match_all_query.rb +19 -0
- data/lib/mm_es_search/api/query/nested_filter.rb +22 -0
- data/lib/mm_es_search/api/query/nested_query.rb +62 -0
- data/lib/mm_es_search/api/query/not_filter.rb +9 -0
- data/lib/mm_es_search/api/query/or_filter.rb +9 -0
- data/lib/mm_es_search/api/query/prefix_filter.rb +11 -0
- data/lib/mm_es_search/api/query/prefix_query.rb +34 -0
- data/lib/mm_es_search/api/query/query_filter.rb +28 -0
- data/lib/mm_es_search/api/query/query_string_query.rb +37 -0
- data/lib/mm_es_search/api/query/range_filter.rb +11 -0
- data/lib/mm_es_search/api/query/range_query.rb +57 -0
- data/lib/mm_es_search/api/query/scored_filter.rb +29 -0
- data/lib/mm_es_search/api/query/single_bool_filter.rb +66 -0
- data/lib/mm_es_search/api/query/term_filter.rb +11 -0
- data/lib/mm_es_search/api/query/term_query.rb +34 -0
- data/lib/mm_es_search/api/query/terms_filter.rb +11 -0
- data/lib/mm_es_search/api/query/terms_query.rb +58 -0
- data/lib/mm_es_search/api/query/text_query.rb +42 -0
- data/lib/mm_es_search/api/query/top_children_query.rb +28 -0
- data/lib/mm_es_search/api/sort/root_sort.rb +36 -0
- data/lib/mm_es_search/models/abstract_facet_model.rb +23 -0
- data/lib/mm_es_search/models/abstract_query_model.rb +21 -0
- data/lib/mm_es_search/models/abstract_range_facet_model.rb +365 -0
- data/lib/mm_es_search/models/abstract_search_model.OLD +538 -0
- data/lib/mm_es_search/models/abstract_search_model.rb +521 -0
- data/lib/mm_es_search/models/abstract_sort_model.rb +13 -0
- data/lib/mm_es_search/models/abstract_terms_facet_model.rb +87 -0
- data/lib/mm_es_search/models/root_sort_model.rb +20 -0
- data/lib/mm_es_search/models/virtual_field_sort.rb +52 -0
- data/lib/mm_es_search/utils/facet_row_utils.rb +86 -0
- data/lib/mm_es_search/utils/search_logger.rb +10 -0
- data/lib/mm_es_search/version.rb +3 -0
- data/lib/mm_es_search.rb +124 -0
- data/mm_es_search.gemspec +24 -0
- metadata +132 -0
@@ -0,0 +1,538 @@
|
|
1
|
+
module MmEsSearch
|
2
|
+
module Models
|
3
|
+
|
4
|
+
module AbstractSearchModel
|
5
|
+
|
6
|
+
extend ActiveSupport::Concern
|
7
|
+
include MmEsSearch::Api::Query
|
8
|
+
include MmEsSearch::Api::Sort
|
9
|
+
include MmEsSearch::Api::Facet
|
10
|
+
include MmEsSearch::Api::Highlight
|
11
|
+
include MmEsSearch::Models
|
12
|
+
include MmEsSearch::Utils
|
13
|
+
|
14
|
+
included do
|
15
|
+
|
16
|
+
plugin MmUsesUuid
|
17
|
+
|
18
|
+
key :query_string, String
|
19
|
+
one :query_object, :class_name => 'MmEsSearch::Models::AbstractQueryModel'
|
20
|
+
one :sort_object, :class_name => 'MmEsSearch::Models::AbstractSortModel'
|
21
|
+
one :highlight_object, :class_name => 'MmEsSearch::Api::Highlight::ResultHighlight'
|
22
|
+
many :facets, :class_name => 'MmEsSearch::Models::AbstractFacetModel'
|
23
|
+
key :result_ids, Array
|
24
|
+
key :result_total, Integer
|
25
|
+
key :highlights, Array
|
26
|
+
|
27
|
+
end
|
28
|
+
|
29
|
+
module ClassMethods
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
def run(target, options = {})
|
34
|
+
|
35
|
+
options.symbolize_keys.reverse_merge!(
|
36
|
+
:page => 1,
|
37
|
+
:per_page => 10,
|
38
|
+
:fields => [],
|
39
|
+
:raw_es_response => false,
|
40
|
+
:sorted => true,
|
41
|
+
:highlight => true,
|
42
|
+
:facet_query => false
|
43
|
+
)
|
44
|
+
|
45
|
+
page = options[:page]
|
46
|
+
per_page = options[:per_page]
|
47
|
+
fields = options[:fields]
|
48
|
+
|
49
|
+
case target
|
50
|
+
when :es
|
51
|
+
|
52
|
+
if options[:facet_query] and not raw_es_response
|
53
|
+
facets_in_display_state = facets.select {|facet| facet.current_state == :ready_for_display}
|
54
|
+
facets_in_display_state.each(&:prepare_for_new_data)
|
55
|
+
end
|
56
|
+
|
57
|
+
facet_es_query = case options[:facet_query]
|
58
|
+
when AbstractFacet
|
59
|
+
options[:facet_query].to_es_query
|
60
|
+
when Hash
|
61
|
+
options[:facet_query]
|
62
|
+
when :auto
|
63
|
+
unless type_facet_positively_set?
|
64
|
+
options[:facet_query] = :manual
|
65
|
+
facets.delete_if(&:unused?)
|
66
|
+
unless type_facet_initialized?
|
67
|
+
facets << build_facet_model(
|
68
|
+
:virtual_field => type_field,
|
69
|
+
:data_type => "string",
|
70
|
+
:exclude => type_field_excludes
|
71
|
+
)
|
72
|
+
end
|
73
|
+
build_next_facet_es_query(:explore_manual_facets)
|
74
|
+
else
|
75
|
+
build_next_facet_es_query(:explore_manual_and_auto_facets)
|
76
|
+
end
|
77
|
+
when :force_auto
|
78
|
+
build_next_facet_es_query(:explore_manual_and_auto_facets)
|
79
|
+
when :manual
|
80
|
+
facets_without_data_type = facets.select {|facet| facet.current_state == :need_data_type}
|
81
|
+
add_known_data_types(facets_without_data_type)
|
82
|
+
build_next_facet_es_query
|
83
|
+
else
|
84
|
+
nil
|
85
|
+
end
|
86
|
+
|
87
|
+
request = es_request(sorted, facet_es_query, highlight)
|
88
|
+
@search_log.info(request.to_json) if debug_on?
|
89
|
+
response = target_collection.search_hits(
|
90
|
+
request,
|
91
|
+
:page => page,
|
92
|
+
:per_page => per_page,
|
93
|
+
:ids_only => true
|
94
|
+
)
|
95
|
+
|
96
|
+
return response if raw_es_response
|
97
|
+
|
98
|
+
@result_ids = response.hits
|
99
|
+
@result_total = response.total_entries
|
100
|
+
@highlights = response.response['hits']['hits'].map {|hit| hit['highlight']} if highlight_object?
|
101
|
+
out = find_hits_in_mongo(@result_ids, fields, page, per_page)
|
102
|
+
|
103
|
+
if options[:facet_query]
|
104
|
+
|
105
|
+
write_facet_results_to_models(response.facets)
|
106
|
+
update_used_facet_missing_counts_to_zero
|
107
|
+
update_show_missing_facet_missing_counts_to_total
|
108
|
+
prune_facets
|
109
|
+
facets_without_data_type = facets.select {|facet| facet.current_state == :need_data_type}
|
110
|
+
add_known_data_types(facets_without_data_type)
|
111
|
+
|
112
|
+
sanity_count = 0
|
113
|
+
until facets.all? {|facet| facet.current_state == :ready_for_display}
|
114
|
+
#puts cur_facets_states = self.facets.map {|f| "#{StringUtils.label_from_URI(f.virtual_field)} => #{f.current_state}"}
|
115
|
+
facet_query = build_next_facet_es_query
|
116
|
+
facet_results = run_for_facets_only(facet_query)
|
117
|
+
write_facet_results_to_models(facet_results)
|
118
|
+
prune_facets
|
119
|
+
|
120
|
+
sanity_count += 1
|
121
|
+
raise 'until loop has looped too many times!' if sanity_count > 5
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
build_sort_options if respond_to? :build_sort_options
|
126
|
+
return out #output result set
|
127
|
+
|
128
|
+
when :mongo
|
129
|
+
|
130
|
+
request = mongo_request
|
131
|
+
@search_log.info(request.to_json) if debug_on?
|
132
|
+
query = target_collection.where(request)
|
133
|
+
if sort_object.is_a?(RootSortModel)
|
134
|
+
query = query.sort(sort_object.to_mongo_query)
|
135
|
+
end
|
136
|
+
if not fields.empty?
|
137
|
+
query = query.fields(*fields)
|
138
|
+
end
|
139
|
+
|
140
|
+
response = query.paginate(:page => page, :per_page => per_page)
|
141
|
+
@result_ids = response.map(&:_id)
|
142
|
+
@result_total = response.total_entries
|
143
|
+
|
144
|
+
return response
|
145
|
+
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
def build_next_facet_es_query(mode = nil)
|
150
|
+
facet_array = facets.map(&:next_facet_query)
|
151
|
+
case mode
|
152
|
+
when :explore_manual_facets
|
153
|
+
facet_array << manual_facet_coverage_query
|
154
|
+
when :explore_auto_facets
|
155
|
+
facet_array << auto_facet_exploratory_query
|
156
|
+
when :explore_manual_and_auto_facets
|
157
|
+
facet_array << manual_facet_coverage_query << auto_facet_exploratory_query
|
158
|
+
end
|
159
|
+
facet_array_to_es_query(facet_array.compact)
|
160
|
+
end
|
161
|
+
|
162
|
+
def facet_array_to_es_query(query_array)
|
163
|
+
es_query = {}
|
164
|
+
query_array.each do |q|
|
165
|
+
es_query.merge!(q.to_es_query)
|
166
|
+
end
|
167
|
+
|
168
|
+
return es_query.empty? ? nil : es_query
|
169
|
+
end
|
170
|
+
|
171
|
+
def write_facet_results_to_models(facet_results)
|
172
|
+
unless facet_results.nil? or facet_results.empty?
|
173
|
+
facet_results.each do |label,result|
|
174
|
+
|
175
|
+
case label
|
176
|
+
when 'auto_facet_coverage'
|
177
|
+
|
178
|
+
result['terms'].each do |params|
|
179
|
+
facets << proto_facet.new(
|
180
|
+
:virtual_field => params['term'],
|
181
|
+
:missing => @result_total - params['count']
|
182
|
+
)
|
183
|
+
end
|
184
|
+
|
185
|
+
when 'manual_facet_coverage'
|
186
|
+
|
187
|
+
result['terms'].each do |params|
|
188
|
+
if current_facet = facets.detect {|f| f.virtual_field == params['term']}
|
189
|
+
current_facet.missing = @result_total - params['count'] if current_facet
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
when /^data_type_counts_for_/
|
194
|
+
|
195
|
+
true_label = label[21..-1]
|
196
|
+
data_type_counts = result['terms']
|
197
|
+
if current_facet = facets.detect {|f| f.label == true_label}
|
198
|
+
replace_proto_facet_with_typed_facet(current_facet.virtual_field, data_type_counts)
|
199
|
+
end
|
200
|
+
|
201
|
+
else
|
202
|
+
|
203
|
+
if current_facet = facets.detect {|f| f.label == label}
|
204
|
+
case result['_type']
|
205
|
+
when "terms", "range"
|
206
|
+
current_facet.build_facet_rows(result)
|
207
|
+
when "statistical"
|
208
|
+
current_facet.build_field_stats(result)
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
end
|
213
|
+
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
def replace_proto_facet_with_typed_facet(virtual_field, data_type_param)
|
219
|
+
if indx = facets.find_index {|f| f.virtual_field == virtual_field}
|
220
|
+
|
221
|
+
current_proto_facet = facets[indx]
|
222
|
+
|
223
|
+
case data_type_param
|
224
|
+
when String
|
225
|
+
current_proto_facet.data_type = data_type_param
|
226
|
+
when Array
|
227
|
+
current_proto_facet.build_data_type_counts(data_type_param)
|
228
|
+
end
|
229
|
+
|
230
|
+
raise 'proto_facet not ready for initialization' if current_proto_facet.current_state != :ready_for_initialization
|
231
|
+
|
232
|
+
new_params = current_proto_facet.attributes.except('_type').symbolize_keys
|
233
|
+
new_facet = build_facet_model(new_params)
|
234
|
+
facets[indx] = new_facet
|
235
|
+
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
def es_request(sorted = true, facet_query = nil, highlight = true)
|
240
|
+
parse_query_string_if_needed
|
241
|
+
query = sorted ? sorted_query : unsorted_query
|
242
|
+
request = {
|
243
|
+
:query => query.to_es_query,
|
244
|
+
:query_dsl => false
|
245
|
+
}
|
246
|
+
if sort_object.is_a?(RootSortModel) and sorted
|
247
|
+
request.merge!(:sort => sort_object.to_es_query)
|
248
|
+
end
|
249
|
+
if facet_query
|
250
|
+
request.merge!(:facets => facet_query)
|
251
|
+
end
|
252
|
+
if highlight_object? and highlight
|
253
|
+
request.merge!(:highlight => highlight_object.to_es_query)
|
254
|
+
end
|
255
|
+
return request
|
256
|
+
end
|
257
|
+
|
258
|
+
def mongo_request
|
259
|
+
parse_query_string_if_needed
|
260
|
+
sorted_query.to_mongo_query
|
261
|
+
end
|
262
|
+
|
263
|
+
def update_used_facet_missing_counts_to_zero
|
264
|
+
#by definition, if it's been applied, all results must have it
|
265
|
+
used_facets.each {|facet| facet.missing = 0}
|
266
|
+
end
|
267
|
+
|
268
|
+
def update_show_missing_facet_missing_counts_to_total
|
269
|
+
used_facets.each {|facet| facet.missing = @result_total if facet.show_missing}
|
270
|
+
end
|
271
|
+
|
272
|
+
def type_facet_initialized?
|
273
|
+
facets.any? {|facet| facet.virtual_field == type_field}
|
274
|
+
end
|
275
|
+
|
276
|
+
def type_facet_positively_set?
|
277
|
+
used_facets.any? do |facet|
|
278
|
+
if facet.virtual_field == type_field
|
279
|
+
facet.rows.any? { |row| ["and", "or"].include?(row.checked) }
|
280
|
+
else
|
281
|
+
false
|
282
|
+
end
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
def parse_query_string_if_needed
|
287
|
+
if query_string? and query_object.nil?
|
288
|
+
build_query_object
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
def find_hits_in_mongo(hits = @result_ids, fields = [], page = 1, per_page = @result_ids.length)
|
293
|
+
#fetch records from db in one call and then reorder to match search result ordering
|
294
|
+
return paginate_records([], page, per_page, @result_total) if hits.empty?
|
295
|
+
|
296
|
+
ranked_ids = case hits.first
|
297
|
+
when ElasticSearch::Api::Hit
|
298
|
+
hits.map(&:_id)
|
299
|
+
else
|
300
|
+
#presume we have ids
|
301
|
+
hits
|
302
|
+
end
|
303
|
+
|
304
|
+
#NOTE: I use #find_with_fields to avoid redefining the standard MM #find method
|
305
|
+
# this can be trivially implemented with the plucky #where and #fields methods
|
306
|
+
# but is directly implemented in MmUsesUuid
|
307
|
+
unordered_records = target_collection.find_with_fields ranked_ids, :fields => fields
|
308
|
+
|
309
|
+
if unordered_records.is_a?(Array)
|
310
|
+
records = unordered_records.reorder_by(ranked_ids.map(&:to_s), &Proc.new {|r| r.id.to_s})
|
311
|
+
elsif unordered_records.nil?
|
312
|
+
records = []
|
313
|
+
else
|
314
|
+
records = [unordered_records]
|
315
|
+
end
|
316
|
+
|
317
|
+
return paginate_records(records, page, per_page, @result_total)
|
318
|
+
|
319
|
+
end
|
320
|
+
|
321
|
+
def paginate_records(records, page, per_page, total)
|
322
|
+
results = WillPaginate::Collection.new(page, per_page, total)
|
323
|
+
results.replace(records)
|
324
|
+
results
|
325
|
+
end
|
326
|
+
|
327
|
+
def count(target, options = {})
|
328
|
+
parse_query_string_if_needed
|
329
|
+
case target
|
330
|
+
when :es
|
331
|
+
target_collection.search_hits(unsorted_query.to_es_query, :per_page => 0).total_entries
|
332
|
+
when :mongo
|
333
|
+
target_collection.where(unsorted_query.to_mongo_query).count
|
334
|
+
end
|
335
|
+
end
|
336
|
+
|
337
|
+
def combine_queries(scored, unscored)
|
338
|
+
query = if scored.empty? and unscored.empty?
|
339
|
+
MatchAllQuery.new
|
340
|
+
elsif scored.empty?
|
341
|
+
ConstantScoreQuery.new(
|
342
|
+
:boost => 1,
|
343
|
+
:query => BoolQuery.new(
|
344
|
+
:musts => unscored
|
345
|
+
)
|
346
|
+
)
|
347
|
+
elsif unscored.empty?
|
348
|
+
if scored.length > 1
|
349
|
+
BoolQuery.new(
|
350
|
+
:musts => scored
|
351
|
+
)
|
352
|
+
else
|
353
|
+
scored.first
|
354
|
+
end
|
355
|
+
else
|
356
|
+
# mod_scored = scored.map {|query| q = query.dup; q.boost = 1e100; q }
|
357
|
+
mod_unscored = unscored.map {|query| q = query.dup; q.boost = 0; q }
|
358
|
+
BoolQuery.new(
|
359
|
+
:musts => scored + mod_unscored
|
360
|
+
)
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
def unsorted_query
|
365
|
+
parse_query_string_if_needed
|
366
|
+
unscored_queries, filters = sort_query_and_facets_as_filters #NOTE: we put non-RootSortModel sorts in as filters as these typically restrict results
|
367
|
+
query = combine_queries([], unscored_queries)
|
368
|
+
build_filtered_query(query, filters)
|
369
|
+
end
|
370
|
+
|
371
|
+
def sorted_query
|
372
|
+
parse_query_string_if_needed
|
373
|
+
if (sort_object.nil? and query_object.nil?) or sort_object.is_a?(RootSortModel)
|
374
|
+
unsorted_query
|
375
|
+
else
|
376
|
+
if sort_object.nil?
|
377
|
+
query = query_object.to_query
|
378
|
+
filters = facets_as_filters
|
379
|
+
else
|
380
|
+
unscored_queries, filters = query_and_facets_as_filters
|
381
|
+
query = combine_queries([sort_object.to_query], unscored_queries)
|
382
|
+
end
|
383
|
+
build_filtered_query(query, filters)
|
384
|
+
end
|
385
|
+
end
|
386
|
+
|
387
|
+
def sort_query_and_facets_as_filters
|
388
|
+
unscored_queries, filters = query_and_facets_as_filters
|
389
|
+
filters << sort_object.to_filter unless (sort_object.nil? or sort_object.is_a?(RootSortModel))
|
390
|
+
return unscored_queries, filters
|
391
|
+
end
|
392
|
+
|
393
|
+
def query_and_facets_as_filters
|
394
|
+
filters = facets_as_filters
|
395
|
+
unscored_queries = []
|
396
|
+
query_as_filter = query_object? ? query_object.to_filter : nil
|
397
|
+
if query_as_filter
|
398
|
+
filters << query_as_filter
|
399
|
+
elsif query_object?
|
400
|
+
unscored_queries << query_object.to_query
|
401
|
+
end
|
402
|
+
return unscored_queries, filters
|
403
|
+
end
|
404
|
+
|
405
|
+
def facets_as_filters
|
406
|
+
used_facets.map(&:to_filter).compact
|
407
|
+
end
|
408
|
+
|
409
|
+
def build_filtered_query(query, filters)
|
410
|
+
if filters.nil? or filters.empty?
|
411
|
+
query
|
412
|
+
else
|
413
|
+
FilteredQuery.new(
|
414
|
+
:query => query,
|
415
|
+
:filter => AndFilter.new(
|
416
|
+
:filters => filters
|
417
|
+
)
|
418
|
+
)
|
419
|
+
end
|
420
|
+
end
|
421
|
+
|
422
|
+
def build_facet_model(params)
|
423
|
+
case params[:data_type]
|
424
|
+
when /^string/, 'boolean', 'uri'
|
425
|
+
build_term_facet_model(params)
|
426
|
+
when 'integer', 'float', 'time', 'date'
|
427
|
+
build_range_facet_model(params)
|
428
|
+
else
|
429
|
+
raise "unable to build a facet model for data_type = #{params[:data_type]}"
|
430
|
+
end
|
431
|
+
end
|
432
|
+
|
433
|
+
def run_for_facets_only(facet_es_query)
|
434
|
+
facet_result = run(:es,
|
435
|
+
:facet_query => facet_es_query,
|
436
|
+
:raw_es_response => true,
|
437
|
+
:sorted => false,
|
438
|
+
:highlight => false,
|
439
|
+
:per_page => 0).facets
|
440
|
+
facet_result.nil? ? {} : facet_result
|
441
|
+
end
|
442
|
+
|
443
|
+
def used_facets
|
444
|
+
facets.select(&:used?)
|
445
|
+
end
|
446
|
+
|
447
|
+
def offered_facets
|
448
|
+
facets.select(&:unused?)
|
449
|
+
end
|
450
|
+
|
451
|
+
|
452
|
+
def prune_facets
|
453
|
+
|
454
|
+
prunable_facets = offered_facets.select { |f| not non_prunable_fields.include?(f[:virtual_field]) }
|
455
|
+
fields_to_delete = {}
|
456
|
+
|
457
|
+
prunable_facets.each do |facet|
|
458
|
+
|
459
|
+
case facet
|
460
|
+
when proto_facet
|
461
|
+
|
462
|
+
total_present = @result_total - facet.missing
|
463
|
+
coverage_ratio = total_present / @result_total.to_f
|
464
|
+
|
465
|
+
if coverage_ratio < self.class::REQUIRED_COVERAGE_RATIO
|
466
|
+
fields_to_delete.merge!(facet[:virtual_field] => 'coverage_ratio_too_low')
|
467
|
+
elsif total_present < self.class::REQUIRED_COVERAGE_COUNT
|
468
|
+
fields_to_delete.merge!(facet[:virtual_field] => 'coverage_count_too_low')
|
469
|
+
end
|
470
|
+
|
471
|
+
when AbstractTermsFacetModel
|
472
|
+
|
473
|
+
#compute some stats
|
474
|
+
largest_term_count = facet.rows.first.count
|
475
|
+
prop_of_total = largest_term_count / @result_total.to_f
|
476
|
+
|
477
|
+
if largest_term_count == 1
|
478
|
+
fields_to_delete.merge!(facet.virtual_field => 'top_count_is_unity')
|
479
|
+
elsif prop_of_total < 0.05
|
480
|
+
fields_to_delete.merge!(facet.virtual_field => 'top_count_too_small')
|
481
|
+
elsif prop_of_total > 0.75
|
482
|
+
fields_to_delete.merge!(facet.virtual_field => 'top_count_too_big')
|
483
|
+
end
|
484
|
+
|
485
|
+
when AbstractRangeFacetModel
|
486
|
+
|
487
|
+
# anything we can catch here?
|
488
|
+
|
489
|
+
end
|
490
|
+
|
491
|
+
end
|
492
|
+
|
493
|
+
prune_and_record_reason(fields_to_delete)
|
494
|
+
|
495
|
+
end
|
496
|
+
|
497
|
+
def prune_and_record_reason(fields_to_delete)
|
498
|
+
fields_to_delete.each do |virtual_field, reason|
|
499
|
+
facets.delete_if {|facet| facet[:virtual_field] == virtual_field}
|
500
|
+
record_prune_reason(virtual_field, reason)
|
501
|
+
end
|
502
|
+
end
|
503
|
+
|
504
|
+
|
505
|
+
def debug_on?
|
506
|
+
if defined?(@debug_on)
|
507
|
+
@debug_on
|
508
|
+
else
|
509
|
+
debug_off
|
510
|
+
false
|
511
|
+
end
|
512
|
+
end
|
513
|
+
|
514
|
+
def debug_on
|
515
|
+
@debug_on = true
|
516
|
+
logfile = File.open(Rails.root.to_s + '/log/search.log', 'a')
|
517
|
+
logfile.sync = true
|
518
|
+
@search_log = SearchLogger.new(logfile)
|
519
|
+
@search_log.info "#{self.class.name} now logging\n"
|
520
|
+
return self
|
521
|
+
end
|
522
|
+
|
523
|
+
def debug_off
|
524
|
+
@debug_on = false
|
525
|
+
@search_log = nil
|
526
|
+
return self
|
527
|
+
end
|
528
|
+
|
529
|
+
def target_collection
|
530
|
+
#we assume name is of form klass.name + "Search"
|
531
|
+
klass_match = self.class.name.match(/(?<klass>\w*)(?=Search)/)
|
532
|
+
raise "expected the class name '#{self.class.name}' to be of form 'SomethingSearch' so that we can extract 'Something' as the target collection" unless klass_match[:klass]
|
533
|
+
klass_match[:klass].constantize
|
534
|
+
end
|
535
|
+
|
536
|
+
end
|
537
|
+
end
|
538
|
+
end
|