mm_es_search 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -0
- data/.project +18 -0
- data/Gemfile +4 -0
- data/Rakefile +1 -0
- data/lib/mm_es_search/api/facet/abstract_facet.rb +28 -0
- data/lib/mm_es_search/api/facet/date_histogram_facet.rb +11 -0
- data/lib/mm_es_search/api/facet/filter_facet.rb +9 -0
- data/lib/mm_es_search/api/facet/geo_distance_facet.rb +9 -0
- data/lib/mm_es_search/api/facet/histogram_facet.rb +9 -0
- data/lib/mm_es_search/api/facet/query_facet.rb +9 -0
- data/lib/mm_es_search/api/facet/range_facet.rb +36 -0
- data/lib/mm_es_search/api/facet/range_facet_row.rb +97 -0
- data/lib/mm_es_search/api/facet/range_item.rb +17 -0
- data/lib/mm_es_search/api/facet/statistical_facet.rb +33 -0
- data/lib/mm_es_search/api/facet/statistical_facet_result.rb +36 -0
- data/lib/mm_es_search/api/facet/terms_facet.rb +62 -0
- data/lib/mm_es_search/api/facet/terms_facet_row.rb +35 -0
- data/lib/mm_es_search/api/facet/terms_stats_facet.rb +9 -0
- data/lib/mm_es_search/api/highlight/result_highlight.rb +40 -0
- data/lib/mm_es_search/api/query/abstract_filter.rb +15 -0
- data/lib/mm_es_search/api/query/abstract_query.rb +48 -0
- data/lib/mm_es_search/api/query/and_filter.rb +9 -0
- data/lib/mm_es_search/api/query/bool_filter.rb +11 -0
- data/lib/mm_es_search/api/query/bool_query.rb +67 -0
- data/lib/mm_es_search/api/query/constant_score_query.rb +31 -0
- data/lib/mm_es_search/api/query/custom_filters_score_query.rb +52 -0
- data/lib/mm_es_search/api/query/custom_score_query.rb +31 -0
- data/lib/mm_es_search/api/query/dismax_query.rb +29 -0
- data/lib/mm_es_search/api/query/filtered_query.rb +30 -0
- data/lib/mm_es_search/api/query/has_child_filter.rb +11 -0
- data/lib/mm_es_search/api/query/has_child_query.rb +25 -0
- data/lib/mm_es_search/api/query/has_parent_filter.rb +11 -0
- data/lib/mm_es_search/api/query/has_parent_query.rb +25 -0
- data/lib/mm_es_search/api/query/match_all_filter.rb +11 -0
- data/lib/mm_es_search/api/query/match_all_query.rb +19 -0
- data/lib/mm_es_search/api/query/nested_filter.rb +22 -0
- data/lib/mm_es_search/api/query/nested_query.rb +62 -0
- data/lib/mm_es_search/api/query/not_filter.rb +9 -0
- data/lib/mm_es_search/api/query/or_filter.rb +9 -0
- data/lib/mm_es_search/api/query/prefix_filter.rb +11 -0
- data/lib/mm_es_search/api/query/prefix_query.rb +34 -0
- data/lib/mm_es_search/api/query/query_filter.rb +28 -0
- data/lib/mm_es_search/api/query/query_string_query.rb +37 -0
- data/lib/mm_es_search/api/query/range_filter.rb +11 -0
- data/lib/mm_es_search/api/query/range_query.rb +57 -0
- data/lib/mm_es_search/api/query/scored_filter.rb +29 -0
- data/lib/mm_es_search/api/query/single_bool_filter.rb +66 -0
- data/lib/mm_es_search/api/query/term_filter.rb +11 -0
- data/lib/mm_es_search/api/query/term_query.rb +34 -0
- data/lib/mm_es_search/api/query/terms_filter.rb +11 -0
- data/lib/mm_es_search/api/query/terms_query.rb +58 -0
- data/lib/mm_es_search/api/query/text_query.rb +42 -0
- data/lib/mm_es_search/api/query/top_children_query.rb +28 -0
- data/lib/mm_es_search/api/sort/root_sort.rb +36 -0
- data/lib/mm_es_search/models/abstract_facet_model.rb +23 -0
- data/lib/mm_es_search/models/abstract_query_model.rb +21 -0
- data/lib/mm_es_search/models/abstract_range_facet_model.rb +365 -0
- data/lib/mm_es_search/models/abstract_search_model.OLD +538 -0
- data/lib/mm_es_search/models/abstract_search_model.rb +521 -0
- data/lib/mm_es_search/models/abstract_sort_model.rb +13 -0
- data/lib/mm_es_search/models/abstract_terms_facet_model.rb +87 -0
- data/lib/mm_es_search/models/root_sort_model.rb +20 -0
- data/lib/mm_es_search/models/virtual_field_sort.rb +52 -0
- data/lib/mm_es_search/utils/facet_row_utils.rb +86 -0
- data/lib/mm_es_search/utils/search_logger.rb +10 -0
- data/lib/mm_es_search/version.rb +3 -0
- data/lib/mm_es_search.rb +124 -0
- data/mm_es_search.gemspec +24 -0
- metadata +132 -0
@@ -0,0 +1,538 @@
|
|
1
|
+
module MmEsSearch
|
2
|
+
module Models
|
3
|
+
|
4
|
+
module AbstractSearchModel
|
5
|
+
|
6
|
+
extend ActiveSupport::Concern
|
7
|
+
include MmEsSearch::Api::Query
|
8
|
+
include MmEsSearch::Api::Sort
|
9
|
+
include MmEsSearch::Api::Facet
|
10
|
+
include MmEsSearch::Api::Highlight
|
11
|
+
include MmEsSearch::Models
|
12
|
+
include MmEsSearch::Utils
|
13
|
+
|
14
|
+
included do
|
15
|
+
|
16
|
+
plugin MmUsesUuid
|
17
|
+
|
18
|
+
key :query_string, String
|
19
|
+
one :query_object, :class_name => 'MmEsSearch::Models::AbstractQueryModel'
|
20
|
+
one :sort_object, :class_name => 'MmEsSearch::Models::AbstractSortModel'
|
21
|
+
one :highlight_object, :class_name => 'MmEsSearch::Api::Highlight::ResultHighlight'
|
22
|
+
many :facets, :class_name => 'MmEsSearch::Models::AbstractFacetModel'
|
23
|
+
key :result_ids, Array
|
24
|
+
key :result_total, Integer
|
25
|
+
key :highlights, Array
|
26
|
+
|
27
|
+
end
|
28
|
+
|
29
|
+
module ClassMethods
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
def run(target, options = {})
|
34
|
+
|
35
|
+
options.symbolize_keys.reverse_merge!(
|
36
|
+
:page => 1,
|
37
|
+
:per_page => 10,
|
38
|
+
:fields => [],
|
39
|
+
:raw_es_response => false,
|
40
|
+
:sorted => true,
|
41
|
+
:highlight => true,
|
42
|
+
:facet_query => false
|
43
|
+
)
|
44
|
+
|
45
|
+
page = options[:page]
|
46
|
+
per_page = options[:per_page]
|
47
|
+
fields = options[:fields]
|
48
|
+
|
49
|
+
case target
|
50
|
+
when :es
|
51
|
+
|
52
|
+
if options[:facet_query] and not raw_es_response
|
53
|
+
facets_in_display_state = facets.select {|facet| facet.current_state == :ready_for_display}
|
54
|
+
facets_in_display_state.each(&:prepare_for_new_data)
|
55
|
+
end
|
56
|
+
|
57
|
+
facet_es_query = case options[:facet_query]
|
58
|
+
when AbstractFacet
|
59
|
+
options[:facet_query].to_es_query
|
60
|
+
when Hash
|
61
|
+
options[:facet_query]
|
62
|
+
when :auto
|
63
|
+
unless type_facet_positively_set?
|
64
|
+
options[:facet_query] = :manual
|
65
|
+
facets.delete_if(&:unused?)
|
66
|
+
unless type_facet_initialized?
|
67
|
+
facets << build_facet_model(
|
68
|
+
:virtual_field => type_field,
|
69
|
+
:data_type => "string",
|
70
|
+
:exclude => type_field_excludes
|
71
|
+
)
|
72
|
+
end
|
73
|
+
build_next_facet_es_query(:explore_manual_facets)
|
74
|
+
else
|
75
|
+
build_next_facet_es_query(:explore_manual_and_auto_facets)
|
76
|
+
end
|
77
|
+
when :force_auto
|
78
|
+
build_next_facet_es_query(:explore_manual_and_auto_facets)
|
79
|
+
when :manual
|
80
|
+
facets_without_data_type = facets.select {|facet| facet.current_state == :need_data_type}
|
81
|
+
add_known_data_types(facets_without_data_type)
|
82
|
+
build_next_facet_es_query
|
83
|
+
else
|
84
|
+
nil
|
85
|
+
end
|
86
|
+
|
87
|
+
request = es_request(sorted, facet_es_query, highlight)
|
88
|
+
@search_log.info(request.to_json) if debug_on?
|
89
|
+
response = target_collection.search_hits(
|
90
|
+
request,
|
91
|
+
:page => page,
|
92
|
+
:per_page => per_page,
|
93
|
+
:ids_only => true
|
94
|
+
)
|
95
|
+
|
96
|
+
return response if raw_es_response
|
97
|
+
|
98
|
+
@result_ids = response.hits
|
99
|
+
@result_total = response.total_entries
|
100
|
+
@highlights = response.response['hits']['hits'].map {|hit| hit['highlight']} if highlight_object?
|
101
|
+
out = find_hits_in_mongo(@result_ids, fields, page, per_page)
|
102
|
+
|
103
|
+
if options[:facet_query]
|
104
|
+
|
105
|
+
write_facet_results_to_models(response.facets)
|
106
|
+
update_used_facet_missing_counts_to_zero
|
107
|
+
update_show_missing_facet_missing_counts_to_total
|
108
|
+
prune_facets
|
109
|
+
facets_without_data_type = facets.select {|facet| facet.current_state == :need_data_type}
|
110
|
+
add_known_data_types(facets_without_data_type)
|
111
|
+
|
112
|
+
sanity_count = 0
|
113
|
+
until facets.all? {|facet| facet.current_state == :ready_for_display}
|
114
|
+
#puts cur_facets_states = self.facets.map {|f| "#{StringUtils.label_from_URI(f.virtual_field)} => #{f.current_state}"}
|
115
|
+
facet_query = build_next_facet_es_query
|
116
|
+
facet_results = run_for_facets_only(facet_query)
|
117
|
+
write_facet_results_to_models(facet_results)
|
118
|
+
prune_facets
|
119
|
+
|
120
|
+
sanity_count += 1
|
121
|
+
raise 'until loop has looped too many times!' if sanity_count > 5
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
build_sort_options if respond_to? :build_sort_options
|
126
|
+
return out #output result set
|
127
|
+
|
128
|
+
when :mongo
|
129
|
+
|
130
|
+
request = mongo_request
|
131
|
+
@search_log.info(request.to_json) if debug_on?
|
132
|
+
query = target_collection.where(request)
|
133
|
+
if sort_object.is_a?(RootSortModel)
|
134
|
+
query = query.sort(sort_object.to_mongo_query)
|
135
|
+
end
|
136
|
+
if not fields.empty?
|
137
|
+
query = query.fields(*fields)
|
138
|
+
end
|
139
|
+
|
140
|
+
response = query.paginate(:page => page, :per_page => per_page)
|
141
|
+
@result_ids = response.map(&:_id)
|
142
|
+
@result_total = response.total_entries
|
143
|
+
|
144
|
+
return response
|
145
|
+
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
def build_next_facet_es_query(mode = nil)
|
150
|
+
facet_array = facets.map(&:next_facet_query)
|
151
|
+
case mode
|
152
|
+
when :explore_manual_facets
|
153
|
+
facet_array << manual_facet_coverage_query
|
154
|
+
when :explore_auto_facets
|
155
|
+
facet_array << auto_facet_exploratory_query
|
156
|
+
when :explore_manual_and_auto_facets
|
157
|
+
facet_array << manual_facet_coverage_query << auto_facet_exploratory_query
|
158
|
+
end
|
159
|
+
facet_array_to_es_query(facet_array.compact)
|
160
|
+
end
|
161
|
+
|
162
|
+
def facet_array_to_es_query(query_array)
|
163
|
+
es_query = {}
|
164
|
+
query_array.each do |q|
|
165
|
+
es_query.merge!(q.to_es_query)
|
166
|
+
end
|
167
|
+
|
168
|
+
return es_query.empty? ? nil : es_query
|
169
|
+
end
|
170
|
+
|
171
|
+
def write_facet_results_to_models(facet_results)
|
172
|
+
unless facet_results.nil? or facet_results.empty?
|
173
|
+
facet_results.each do |label,result|
|
174
|
+
|
175
|
+
case label
|
176
|
+
when 'auto_facet_coverage'
|
177
|
+
|
178
|
+
result['terms'].each do |params|
|
179
|
+
facets << proto_facet.new(
|
180
|
+
:virtual_field => params['term'],
|
181
|
+
:missing => @result_total - params['count']
|
182
|
+
)
|
183
|
+
end
|
184
|
+
|
185
|
+
when 'manual_facet_coverage'
|
186
|
+
|
187
|
+
result['terms'].each do |params|
|
188
|
+
if current_facet = facets.detect {|f| f.virtual_field == params['term']}
|
189
|
+
current_facet.missing = @result_total - params['count'] if current_facet
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
when /^data_type_counts_for_/
|
194
|
+
|
195
|
+
true_label = label[21..-1]
|
196
|
+
data_type_counts = result['terms']
|
197
|
+
if current_facet = facets.detect {|f| f.label == true_label}
|
198
|
+
replace_proto_facet_with_typed_facet(current_facet.virtual_field, data_type_counts)
|
199
|
+
end
|
200
|
+
|
201
|
+
else
|
202
|
+
|
203
|
+
if current_facet = facets.detect {|f| f.label == label}
|
204
|
+
case result['_type']
|
205
|
+
when "terms", "range"
|
206
|
+
current_facet.build_facet_rows(result)
|
207
|
+
when "statistical"
|
208
|
+
current_facet.build_field_stats(result)
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
end
|
213
|
+
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
def replace_proto_facet_with_typed_facet(virtual_field, data_type_param)
|
219
|
+
if indx = facets.find_index {|f| f.virtual_field == virtual_field}
|
220
|
+
|
221
|
+
current_proto_facet = facets[indx]
|
222
|
+
|
223
|
+
case data_type_param
|
224
|
+
when String
|
225
|
+
current_proto_facet.data_type = data_type_param
|
226
|
+
when Array
|
227
|
+
current_proto_facet.build_data_type_counts(data_type_param)
|
228
|
+
end
|
229
|
+
|
230
|
+
raise 'proto_facet not ready for initialization' if current_proto_facet.current_state != :ready_for_initialization
|
231
|
+
|
232
|
+
new_params = current_proto_facet.attributes.except('_type').symbolize_keys
|
233
|
+
new_facet = build_facet_model(new_params)
|
234
|
+
facets[indx] = new_facet
|
235
|
+
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
def es_request(sorted = true, facet_query = nil, highlight = true)
|
240
|
+
parse_query_string_if_needed
|
241
|
+
query = sorted ? sorted_query : unsorted_query
|
242
|
+
request = {
|
243
|
+
:query => query.to_es_query,
|
244
|
+
:query_dsl => false
|
245
|
+
}
|
246
|
+
if sort_object.is_a?(RootSortModel) and sorted
|
247
|
+
request.merge!(:sort => sort_object.to_es_query)
|
248
|
+
end
|
249
|
+
if facet_query
|
250
|
+
request.merge!(:facets => facet_query)
|
251
|
+
end
|
252
|
+
if highlight_object? and highlight
|
253
|
+
request.merge!(:highlight => highlight_object.to_es_query)
|
254
|
+
end
|
255
|
+
return request
|
256
|
+
end
|
257
|
+
|
258
|
+
def mongo_request
|
259
|
+
parse_query_string_if_needed
|
260
|
+
sorted_query.to_mongo_query
|
261
|
+
end
|
262
|
+
|
263
|
+
def update_used_facet_missing_counts_to_zero
|
264
|
+
#by definition, if it's been applied, all results must have it
|
265
|
+
used_facets.each {|facet| facet.missing = 0}
|
266
|
+
end
|
267
|
+
|
268
|
+
def update_show_missing_facet_missing_counts_to_total
|
269
|
+
used_facets.each {|facet| facet.missing = @result_total if facet.show_missing}
|
270
|
+
end
|
271
|
+
|
272
|
+
def type_facet_initialized?
|
273
|
+
facets.any? {|facet| facet.virtual_field == type_field}
|
274
|
+
end
|
275
|
+
|
276
|
+
def type_facet_positively_set?
|
277
|
+
used_facets.any? do |facet|
|
278
|
+
if facet.virtual_field == type_field
|
279
|
+
facet.rows.any? { |row| ["and", "or"].include?(row.checked) }
|
280
|
+
else
|
281
|
+
false
|
282
|
+
end
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
def parse_query_string_if_needed
|
287
|
+
if query_string? and query_object.nil?
|
288
|
+
build_query_object
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
def find_hits_in_mongo(hits = @result_ids, fields = [], page = 1, per_page = @result_ids.length)
|
293
|
+
#fetch records from db in one call and then reorder to match search result ordering
|
294
|
+
return paginate_records([], page, per_page, @result_total) if hits.empty?
|
295
|
+
|
296
|
+
ranked_ids = case hits.first
|
297
|
+
when ElasticSearch::Api::Hit
|
298
|
+
hits.map(&:_id)
|
299
|
+
else
|
300
|
+
#presume we have ids
|
301
|
+
hits
|
302
|
+
end
|
303
|
+
|
304
|
+
#NOTE: I use #find_with_fields to avoid redefining the standard MM #find method
|
305
|
+
# this can be trivially implemented with the plucky #where and #fields methods
|
306
|
+
# but is directly implemented in MmUsesUuid
|
307
|
+
unordered_records = target_collection.find_with_fields ranked_ids, :fields => fields
|
308
|
+
|
309
|
+
if unordered_records.is_a?(Array)
|
310
|
+
records = unordered_records.reorder_by(ranked_ids.map(&:to_s), &Proc.new {|r| r.id.to_s})
|
311
|
+
elsif unordered_records.nil?
|
312
|
+
records = []
|
313
|
+
else
|
314
|
+
records = [unordered_records]
|
315
|
+
end
|
316
|
+
|
317
|
+
return paginate_records(records, page, per_page, @result_total)
|
318
|
+
|
319
|
+
end
|
320
|
+
|
321
|
+
def paginate_records(records, page, per_page, total)
|
322
|
+
results = WillPaginate::Collection.new(page, per_page, total)
|
323
|
+
results.replace(records)
|
324
|
+
results
|
325
|
+
end
|
326
|
+
|
327
|
+
def count(target, options = {})
|
328
|
+
parse_query_string_if_needed
|
329
|
+
case target
|
330
|
+
when :es
|
331
|
+
target_collection.search_hits(unsorted_query.to_es_query, :per_page => 0).total_entries
|
332
|
+
when :mongo
|
333
|
+
target_collection.where(unsorted_query.to_mongo_query).count
|
334
|
+
end
|
335
|
+
end
|
336
|
+
|
337
|
+
def combine_queries(scored, unscored)
|
338
|
+
query = if scored.empty? and unscored.empty?
|
339
|
+
MatchAllQuery.new
|
340
|
+
elsif scored.empty?
|
341
|
+
ConstantScoreQuery.new(
|
342
|
+
:boost => 1,
|
343
|
+
:query => BoolQuery.new(
|
344
|
+
:musts => unscored
|
345
|
+
)
|
346
|
+
)
|
347
|
+
elsif unscored.empty?
|
348
|
+
if scored.length > 1
|
349
|
+
BoolQuery.new(
|
350
|
+
:musts => scored
|
351
|
+
)
|
352
|
+
else
|
353
|
+
scored.first
|
354
|
+
end
|
355
|
+
else
|
356
|
+
# mod_scored = scored.map {|query| q = query.dup; q.boost = 1e100; q }
|
357
|
+
mod_unscored = unscored.map {|query| q = query.dup; q.boost = 0; q }
|
358
|
+
BoolQuery.new(
|
359
|
+
:musts => scored + mod_unscored
|
360
|
+
)
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
def unsorted_query
|
365
|
+
parse_query_string_if_needed
|
366
|
+
unscored_queries, filters = sort_query_and_facets_as_filters #NOTE: we put non-RootSortModel sorts in as filters as these typically restrict results
|
367
|
+
query = combine_queries([], unscored_queries)
|
368
|
+
build_filtered_query(query, filters)
|
369
|
+
end
|
370
|
+
|
371
|
+
def sorted_query
|
372
|
+
parse_query_string_if_needed
|
373
|
+
if (sort_object.nil? and query_object.nil?) or sort_object.is_a?(RootSortModel)
|
374
|
+
unsorted_query
|
375
|
+
else
|
376
|
+
if sort_object.nil?
|
377
|
+
query = query_object.to_query
|
378
|
+
filters = facets_as_filters
|
379
|
+
else
|
380
|
+
unscored_queries, filters = query_and_facets_as_filters
|
381
|
+
query = combine_queries([sort_object.to_query], unscored_queries)
|
382
|
+
end
|
383
|
+
build_filtered_query(query, filters)
|
384
|
+
end
|
385
|
+
end
|
386
|
+
|
387
|
+
def sort_query_and_facets_as_filters
|
388
|
+
unscored_queries, filters = query_and_facets_as_filters
|
389
|
+
filters << sort_object.to_filter unless (sort_object.nil? or sort_object.is_a?(RootSortModel))
|
390
|
+
return unscored_queries, filters
|
391
|
+
end
|
392
|
+
|
393
|
+
def query_and_facets_as_filters
|
394
|
+
filters = facets_as_filters
|
395
|
+
unscored_queries = []
|
396
|
+
query_as_filter = query_object? ? query_object.to_filter : nil
|
397
|
+
if query_as_filter
|
398
|
+
filters << query_as_filter
|
399
|
+
elsif query_object?
|
400
|
+
unscored_queries << query_object.to_query
|
401
|
+
end
|
402
|
+
return unscored_queries, filters
|
403
|
+
end
|
404
|
+
|
405
|
+
def facets_as_filters
|
406
|
+
used_facets.map(&:to_filter).compact
|
407
|
+
end
|
408
|
+
|
409
|
+
def build_filtered_query(query, filters)
|
410
|
+
if filters.nil? or filters.empty?
|
411
|
+
query
|
412
|
+
else
|
413
|
+
FilteredQuery.new(
|
414
|
+
:query => query,
|
415
|
+
:filter => AndFilter.new(
|
416
|
+
:filters => filters
|
417
|
+
)
|
418
|
+
)
|
419
|
+
end
|
420
|
+
end
|
421
|
+
|
422
|
+
def build_facet_model(params)
|
423
|
+
case params[:data_type]
|
424
|
+
when /^string/, 'boolean', 'uri'
|
425
|
+
build_term_facet_model(params)
|
426
|
+
when 'integer', 'float', 'time', 'date'
|
427
|
+
build_range_facet_model(params)
|
428
|
+
else
|
429
|
+
raise "unable to build a facet model for data_type = #{params[:data_type]}"
|
430
|
+
end
|
431
|
+
end
|
432
|
+
|
433
|
+
def run_for_facets_only(facet_es_query)
|
434
|
+
facet_result = run(:es,
|
435
|
+
:facet_query => facet_es_query,
|
436
|
+
:raw_es_response => true,
|
437
|
+
:sorted => false,
|
438
|
+
:highlight => false,
|
439
|
+
:per_page => 0).facets
|
440
|
+
facet_result.nil? ? {} : facet_result
|
441
|
+
end
|
442
|
+
|
443
|
+
def used_facets
|
444
|
+
facets.select(&:used?)
|
445
|
+
end
|
446
|
+
|
447
|
+
def offered_facets
|
448
|
+
facets.select(&:unused?)
|
449
|
+
end
|
450
|
+
|
451
|
+
|
452
|
+
def prune_facets
|
453
|
+
|
454
|
+
prunable_facets = offered_facets.select { |f| not non_prunable_fields.include?(f[:virtual_field]) }
|
455
|
+
fields_to_delete = {}
|
456
|
+
|
457
|
+
prunable_facets.each do |facet|
|
458
|
+
|
459
|
+
case facet
|
460
|
+
when proto_facet
|
461
|
+
|
462
|
+
total_present = @result_total - facet.missing
|
463
|
+
coverage_ratio = total_present / @result_total.to_f
|
464
|
+
|
465
|
+
if coverage_ratio < self.class::REQUIRED_COVERAGE_RATIO
|
466
|
+
fields_to_delete.merge!(facet[:virtual_field] => 'coverage_ratio_too_low')
|
467
|
+
elsif total_present < self.class::REQUIRED_COVERAGE_COUNT
|
468
|
+
fields_to_delete.merge!(facet[:virtual_field] => 'coverage_count_too_low')
|
469
|
+
end
|
470
|
+
|
471
|
+
when AbstractTermsFacetModel
|
472
|
+
|
473
|
+
#compute some stats
|
474
|
+
largest_term_count = facet.rows.first.count
|
475
|
+
prop_of_total = largest_term_count / @result_total.to_f
|
476
|
+
|
477
|
+
if largest_term_count == 1
|
478
|
+
fields_to_delete.merge!(facet.virtual_field => 'top_count_is_unity')
|
479
|
+
elsif prop_of_total < 0.05
|
480
|
+
fields_to_delete.merge!(facet.virtual_field => 'top_count_too_small')
|
481
|
+
elsif prop_of_total > 0.75
|
482
|
+
fields_to_delete.merge!(facet.virtual_field => 'top_count_too_big')
|
483
|
+
end
|
484
|
+
|
485
|
+
when AbstractRangeFacetModel
|
486
|
+
|
487
|
+
# anything we can catch here?
|
488
|
+
|
489
|
+
end
|
490
|
+
|
491
|
+
end
|
492
|
+
|
493
|
+
prune_and_record_reason(fields_to_delete)
|
494
|
+
|
495
|
+
end
|
496
|
+
|
497
|
+
def prune_and_record_reason(fields_to_delete)
|
498
|
+
fields_to_delete.each do |virtual_field, reason|
|
499
|
+
facets.delete_if {|facet| facet[:virtual_field] == virtual_field}
|
500
|
+
record_prune_reason(virtual_field, reason)
|
501
|
+
end
|
502
|
+
end
|
503
|
+
|
504
|
+
|
505
|
+
def debug_on?
|
506
|
+
if defined?(@debug_on)
|
507
|
+
@debug_on
|
508
|
+
else
|
509
|
+
debug_off
|
510
|
+
false
|
511
|
+
end
|
512
|
+
end
|
513
|
+
|
514
|
+
def debug_on
|
515
|
+
@debug_on = true
|
516
|
+
logfile = File.open(Rails.root.to_s + '/log/search.log', 'a')
|
517
|
+
logfile.sync = true
|
518
|
+
@search_log = SearchLogger.new(logfile)
|
519
|
+
@search_log.info "#{self.class.name} now logging\n"
|
520
|
+
return self
|
521
|
+
end
|
522
|
+
|
523
|
+
def debug_off
|
524
|
+
@debug_on = false
|
525
|
+
@search_log = nil
|
526
|
+
return self
|
527
|
+
end
|
528
|
+
|
529
|
+
def target_collection
|
530
|
+
#we assume name is of form klass.name + "Search"
|
531
|
+
klass_match = self.class.name.match(/(?<klass>\w*)(?=Search)/)
|
532
|
+
raise "expected the class name '#{self.class.name}' to be of form 'SomethingSearch' so that we can extract 'Something' as the target collection" unless klass_match[:klass]
|
533
|
+
klass_match[:klass].constantize
|
534
|
+
end
|
535
|
+
|
536
|
+
end
|
537
|
+
end
|
538
|
+
end
|