bento_search 1.5.0 → 2.0.0.rc1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/README.md +27 -24
- data/Rakefile +30 -11
- data/app/assets/javascripts/bento_search/ajax_load.js +54 -22
- data/app/controllers/bento_search/search_controller.rb +31 -30
- data/app/helpers/bento_search_helper.rb +72 -74
- data/app/models/bento_search/concurrent_searcher.rb +136 -0
- data/app/models/bento_search/result_item.rb +15 -12
- data/app/models/bento_search/results/serialization.rb +22 -13
- data/app/models/bento_search/search_engine.rb +170 -140
- data/app/search_engines/bento_search/doaj_articles_engine.rb +20 -20
- data/app/search_engines/bento_search/ebsco_host_engine.rb +3 -3
- data/app/search_engines/bento_search/eds_engine.rb +326 -206
- data/app/search_engines/bento_search/google_books_engine.rb +2 -2
- data/app/search_engines/bento_search/scopus_engine.rb +87 -87
- data/app/search_engines/bento_search/summon_engine.rb +1 -1
- data/app/views/bento_search/_ajax_loading.html.erb +17 -0
- data/app/views/bento_search/_item_title.html.erb +2 -4
- data/app/views/bento_search/_link.html.erb +3 -3
- data/lib/bento_search.rb +24 -9
- data/lib/bento_search/engine.rb +2 -0
- data/lib/bento_search/version.rb +1 -1
- data/lib/generators/bento_search/install/ajax_load_js_generator.rb +15 -0
- data/test/decorator/standard_decorator_test.rb +30 -30
- data/test/dummy/app/assets/config/manifest.js +4 -0
- data/test/dummy/config/application.rb +7 -0
- data/test/dummy/config/boot.rb +4 -9
- data/test/dummy/config/environments/development.rb +2 -0
- data/test/dummy/config/environments/production.rb +7 -1
- data/test/dummy/config/environments/test.rb +10 -3
- data/test/functional/bento_search/search_controller_test.rb +68 -58
- data/test/helper/bento_search_helper_test.rb +103 -103
- data/test/search_engines/doaj_articles_engine_test.rb +9 -9
- data/test/search_engines/eds_engine_test.rb +91 -59
- data/test/search_engines/google_site_search_test.rb +48 -48
- data/test/search_engines/scopus_engine_test.rb +51 -51
- data/test/search_engines/search_engine_base_test.rb +108 -86
- data/test/search_engines/search_engine_test.rb +68 -56
- data/test/support/atom.xsd.xml +3 -3
- data/test/support/xml.xsd +117 -0
- data/test/test_helper.rb +23 -12
- data/test/unit/concurrent_searcher_test.rb +75 -0
- data/test/unit/pagination_test.rb +12 -12
- data/test/vcr_cassettes/eds/FullText_CustomLink.yml +198 -0
- data/test/vcr_cassettes/eds/basic_search_smoke_test.yml +1036 -1729
- data/test/vcr_cassettes/eds/catalog_ebook_query.yml +218 -0
- data/test/vcr_cassettes/eds/catalog_query.yml +255 -0
- data/test/vcr_cassettes/eds/get_auth_token.yml +11 -44
- data/test/vcr_cassettes/eds/get_auth_token_failure.yml +10 -7
- data/test/vcr_cassettes/eds/get_with_auth.yml +144 -153
- data/test/vcr_cassettes/eds/get_with_auth_recovers_from_bad_auth.yml +167 -223
- data/test/view/atom_results_test.rb +94 -94
- metadata +36 -46
- data/app/assets/javascripts/bento_search.js +0 -3
- data/app/item_decorators/bento_search/ebscohost/conditional_openurl_main_link.rb +0 -36
- data/app/item_decorators/bento_search/only_premade_openurl.rb +0 -20
- data/app/item_decorators/bento_search/openurl_add_other_link.rb +0 -39
- data/app/item_decorators/bento_search/openurl_main_link.rb +0 -34
- data/app/models/bento_search/multi_searcher.rb +0 -131
- data/test/dummy/config/initializers/secret_token.rb +0 -8
- data/test/unit/multi_searcher_test.rb +0 -49
@@ -0,0 +1,136 @@
|
|
1
|
+
begin
|
2
|
+
require 'concurrent'
|
3
|
+
|
4
|
+
# Concurrently runs multiple searches in separate threads. Since a search
|
5
|
+
# generally spends most of it's time waiting on foreign API, this is
|
6
|
+
# useful to significantly reduce total latency of running multiple searches,
|
7
|
+
# even in MRI.
|
8
|
+
#
|
9
|
+
# Uses [concurrent-ruby](https://github.com/ruby-concurrency/concurrent-ruby),
|
10
|
+
# already a dependency of Rails 5.x. To use with Rails previous to 5.x,
|
11
|
+
# just add concurrent-ruby to your `Gemfile`:
|
12
|
+
#
|
13
|
+
# gem 'concurrent-ruby', '~> 1.0'
|
14
|
+
#
|
15
|
+
# # Usage
|
16
|
+
#
|
17
|
+
# initialize with id's of registered engines:
|
18
|
+
#
|
19
|
+
# searcher = BentoBox::ConcurrentSearcher.new(:gbs, :scopus)
|
20
|
+
#
|
21
|
+
# start the concurrent searches, params same as engine.search
|
22
|
+
#
|
23
|
+
# searcher.search( query_params )
|
24
|
+
#
|
25
|
+
# retrieve results, blocking until all are completed:
|
26
|
+
#
|
27
|
+
# results = searcher.results
|
28
|
+
#
|
29
|
+
# returns a Hash keyed by engine id, values BentoSearch::Results objects.
|
30
|
+
#
|
31
|
+
# results # => { "gbs" => <BentoSearch::Results ...>, "scopus" => <BentoSearch::Results ...>}
|
32
|
+
#
|
33
|
+
# Calling results more than once will just return the initial results again
|
34
|
+
# (cached), it won't run a search again.
|
35
|
+
#
|
36
|
+
# ## Dev-mode autoloading and concurrency
|
37
|
+
#
|
38
|
+
# In Rails previous to Rails5, you may have to set config.cache_classes=true
|
39
|
+
# even in development to avoid problems. In Rails 5.x, we take advantage of
|
40
|
+
# new api that should allow concurrency-safe autoloading. But if you run into
|
41
|
+
# any weird problems (such as a deadlock), `cache_classes = true` and
|
42
|
+
# `eager_load = true` should eliminate them, at the cost of dev-mode
|
43
|
+
# auto-reloading.
|
44
|
+
#
|
45
|
+
#
|
46
|
+
# TODO: have a method that returns Futures instead of only supplying the blocking
|
47
|
+
# results method? Several tricks, including making sure to properly terminate actors.
|
48
|
+
class BentoSearch::ConcurrentSearcher
|
49
|
+
def initialize(*engine_ids)
|
50
|
+
auto_rescued_exceptions = [StandardError]
|
51
|
+
|
52
|
+
@engines = []
|
53
|
+
engine_ids.each do |id|
|
54
|
+
add_engine( BentoSearch.get_engine(id).tap { |e| e.auto_rescued_exceptions = auto_rescued_exceptions + e.auto_rescued_exceptions })
|
55
|
+
end
|
56
|
+
@extra_auto_rescue_exceptions = [StandardError]
|
57
|
+
end
|
58
|
+
|
59
|
+
# Adds an instantiated engine directly, rather than by id from global
|
60
|
+
# registry.
|
61
|
+
def add_engine(engine)
|
62
|
+
unless engine.configuration.id.present?
|
63
|
+
raise ArgumentError.new("ConcurrentSearcher engines need `configuration.id`, this one didn't have one: #{engine}")
|
64
|
+
end
|
65
|
+
@engines << engine
|
66
|
+
end
|
67
|
+
|
68
|
+
# Starts all searches, returns self so you can chain method calls if you like.
|
69
|
+
def search(*search_args)
|
70
|
+
search_args.freeze
|
71
|
+
@futures = @engines.collect do |engine|
|
72
|
+
Concurrent::Future.execute { rails_future_wrap { engine.search(*search_args) } }
|
73
|
+
end
|
74
|
+
return self
|
75
|
+
end
|
76
|
+
|
77
|
+
# Have you called #search yet? You can only call #results if you have.
|
78
|
+
# Will stay true forever, it doesn't tell you if the search is done or not.
|
79
|
+
def search_started?
|
80
|
+
!! @futures
|
81
|
+
end
|
82
|
+
|
83
|
+
# Call after #search. Blocks until each included engine is finished
|
84
|
+
# then returns a Hash keyed by engine registered id, value is a
|
85
|
+
# BentoSearch::Results object.
|
86
|
+
#
|
87
|
+
# If called multiple times, returns the same results each time, does
|
88
|
+
# not re-run searches.
|
89
|
+
#
|
90
|
+
# It is an error to invoke without having previously called #search
|
91
|
+
def results
|
92
|
+
unless search_started?
|
93
|
+
raise ArgumentError, "Can't call ConcurrentSearcher#results before you have executed a #search"
|
94
|
+
end
|
95
|
+
|
96
|
+
@results ||= begin
|
97
|
+
pairs = rails_wait_wrap do
|
98
|
+
@futures.collect { |future| [future.value!.engine_id, future.value!] }
|
99
|
+
end
|
100
|
+
Hash[ pairs ].freeze
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
protected
|
105
|
+
|
106
|
+
# In Rails5, future body's need to be wrapped in an executor,
|
107
|
+
# to handle auto-loading right in dev-mode, among other things.
|
108
|
+
# Rails docs coming, see https://github.com/rails/rails/issues/26847
|
109
|
+
@@rails_has_executor = Rails.application.respond_to?(:executor)
|
110
|
+
def rails_future_wrap
|
111
|
+
if @@rails_has_executor
|
112
|
+
Rails.application.executor.wrap { yield }
|
113
|
+
else
|
114
|
+
yield
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
# In Rails5, if we are collecting from within an action method
|
119
|
+
# (ie the 'request loop'), as we usually will be, we need to
|
120
|
+
# give up the autoload lock. Rails docs coming, see https://github.com/rails/rails/issues/26847
|
121
|
+
@@rails_needs_interlock_permit = ActiveSupport::Dependencies.respond_to?(:interlock) &&
|
122
|
+
!(Rails.application.config.eager_load && Rails.application.config.cache_classes)
|
123
|
+
def rails_wait_wrap
|
124
|
+
if @@rails_needs_interlock_permit
|
125
|
+
ActiveSupport::Dependencies.interlock.permit_concurrent_loads { yield }
|
126
|
+
else
|
127
|
+
yield
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
end
|
132
|
+
rescue LoadError
|
133
|
+
# you can use bento_search without celluloid, just not
|
134
|
+
# this class.
|
135
|
+
$stderr.puts "Tried but could not load BentoSearch::ConcurrentSearcher, concurrent-ruby not available!"
|
136
|
+
end
|
@@ -36,12 +36,12 @@ module BentoSearch
|
|
36
36
|
# search service it came from. May be alphanumeric. May be nil
|
37
37
|
# for engines that don't support it.
|
38
38
|
serializable_attr_accessor :unique_id
|
39
|
-
|
39
|
+
|
40
40
|
|
41
41
|
# If set to true, item will refuse to generate an openurl,
|
42
42
|
# returning nil from #to_openurl or #openurl_kev
|
43
43
|
serializable_attr_accessor :openurl_disabled
|
44
|
-
|
44
|
+
|
45
45
|
|
46
46
|
# Array (possibly empty) of BentoSearch::Link objects
|
47
47
|
# representing additional links. Often SearchEngine's themselves
|
@@ -52,7 +52,7 @@ module BentoSearch
|
|
52
52
|
|
53
53
|
# * dc.title
|
54
54
|
# * schema.org CreativeWork: 'name'
|
55
|
-
serializable_attr_accessor :title
|
55
|
+
serializable_attr_accessor :title
|
56
56
|
# backwards compat, we used to have separate titles and subtitles
|
57
57
|
alias_method :complete_title, :title
|
58
58
|
|
@@ -112,7 +112,7 @@ module BentoSearch
|
|
112
112
|
#
|
113
113
|
# Note: We're re-thinking this, might allow uncontrolled
|
114
114
|
# in here instead.
|
115
|
-
serializable_attr_accessor :format
|
115
|
+
serializable_attr_accessor :format
|
116
116
|
|
117
117
|
# Translated from internal format vocab at #format. Outputs
|
118
118
|
# eg http://schema.org/Book
|
@@ -137,7 +137,7 @@ module BentoSearch
|
|
137
137
|
# uncontrolled presumably english-language format string.
|
138
138
|
# if supplied will be used in display in place of controlled
|
139
139
|
# format.
|
140
|
-
serializable_attr_accessor :format_str
|
140
|
+
serializable_attr_accessor :format_str
|
141
141
|
|
142
142
|
# Language of materials. Producer can set language_code to an ISO 639-1 (two
|
143
143
|
# letter) or 639-3 (three letter) language code. If you do this, you don't
|
@@ -153,11 +153,11 @@ module BentoSearch
|
|
153
153
|
# #language_iso_639_2 (either may be null), or #language_str for uncontrolled
|
154
154
|
# string. If engine just sets one of these, internals take care of filling
|
155
155
|
# out the others. r
|
156
|
-
serializable_attr_accessor :language_code
|
156
|
+
serializable_attr_accessor :language_code
|
157
157
|
attr_writer :language_str
|
158
158
|
def language_str
|
159
|
-
@language_str || language_code.try do |code|
|
160
|
-
LanguageList::LanguageInfo.find(code).try do |lang_obj|
|
159
|
+
(@language_str ||= nil) || language_code.try do |code|
|
160
|
+
LanguageList::LanguageInfo.find(code.dup).try do |lang_obj|
|
161
161
|
lang_obj.name
|
162
162
|
end
|
163
163
|
end
|
@@ -167,7 +167,10 @@ module BentoSearch
|
|
167
167
|
# if available, otherwise from direct language_str if available and
|
168
168
|
# possible.
|
169
169
|
def language_obj
|
170
|
-
@language_obj ||=
|
170
|
+
@language_obj ||= begin
|
171
|
+
lookup = self.language_code || self.language_str
|
172
|
+
LanguageList::LanguageInfo.find( lookup.dup ) if lookup
|
173
|
+
end
|
171
174
|
end
|
172
175
|
|
173
176
|
# Two letter ISO language code, or nil
|
@@ -230,12 +233,12 @@ module BentoSearch
|
|
230
233
|
|
231
234
|
# An ARRAY of string query-in-context snippets. Will usually
|
232
235
|
# have highlighting <b> tags in it. Creator is responsible
|
233
|
-
# for making sure it's otherwise html-safe.
|
236
|
+
# for making sure it's otherwise html-safe.
|
234
237
|
#
|
235
238
|
# Not all engines may stores Snippets array in addition to abstract,
|
236
239
|
# some may only store one or the other. Some may store both but
|
237
240
|
# with same content formatted differently (array of multiple vs
|
238
|
-
# one combined string), some engines they may be different.
|
241
|
+
# one combined string), some engines they may be different.
|
239
242
|
attr_accessor :snippets
|
240
243
|
serializable_attr :snippets
|
241
244
|
|
@@ -263,7 +266,7 @@ module BentoSearch
|
|
263
266
|
# for it? Nice thing about the configuration has instead is it's
|
264
267
|
# easily serializable, it's just data.
|
265
268
|
#
|
266
|
-
# Although we intentionally do NOT include these in JSON serialization, ha.
|
269
|
+
# Although we intentionally do NOT include these in JSON serialization, ha.
|
267
270
|
attr_accessor :display_configuration
|
268
271
|
attr_accessor :engine_id
|
269
272
|
|
@@ -4,21 +4,21 @@ require 'json'
|
|
4
4
|
require 'date'
|
5
5
|
|
6
6
|
# Call #dump_to_json on a BentoSearch value object (such as BentoSearch::Result or ::Author)
|
7
|
-
# to get it in Json
|
7
|
+
# to get it in Json
|
8
8
|
#
|
9
9
|
# Values marked with serializable_attr in BentoSearch::Result are
|
10
|
-
# included in seralization.
|
10
|
+
# included in seralization.
|
11
11
|
#
|
12
12
|
# At present metadata and configuration are NOT serialized: #decorator, #display_configuration,
|
13
13
|
# and #engine_id are not included in the serialization, so when loaded from serialization,
|
14
|
-
# ResultItems will not have such things set.
|
15
|
-
#
|
14
|
+
# ResultItems will not have such things set.
|
15
|
+
#
|
16
16
|
# * Works by getting and setting instance variables directly, ignores getters/setters
|
17
17
|
#
|
18
18
|
# * This means decorated values are NOT included in serialization, the raw
|
19
19
|
# values are what is serialized. This is intended, we serialize internal
|
20
20
|
# state, not decoration which can be recreated. You should make sure the decorators you
|
21
|
-
# want are applied after de-serialization.
|
21
|
+
# want are applied after de-serialization.
|
22
22
|
#
|
23
23
|
# * preserves html_safety status in serialization, by adding extra `_attr_htmlsafe: true` key/value
|
24
24
|
#
|
@@ -31,22 +31,23 @@ module BentoSearch::Results::Serialization
|
|
31
31
|
self._serializable_attr_options = {}
|
32
32
|
end
|
33
33
|
|
34
|
+
|
34
35
|
class_methods do
|
35
36
|
# Just a macro to mark a property name serializable -- the name is
|
36
37
|
# of an instance method that will be included in our serializations
|
37
|
-
# and de-serializations.
|
38
|
+
# and de-serializations.
|
38
39
|
#
|
39
40
|
# Options:
|
40
41
|
# * collection_of: String fully qualified name of a class that is
|
41
42
|
# is also BentoSearch::Results::Serialization, the attribute
|
42
|
-
# is an array of these.
|
43
|
+
# is an array of these.
|
43
44
|
# * serializer: String fully qualified class name of a serializer
|
44
45
|
# class that has a `dump` and a `load` for individual values,
|
45
46
|
# we just use it for Date now, see BentoSearch::Results::Serialization::Date
|
46
47
|
def serializable_attr(symbol, options = nil)
|
47
48
|
symbol = symbol.to_s
|
48
49
|
self._serializable_attrs << symbol
|
49
|
-
if options
|
50
|
+
if options
|
50
51
|
self._serializable_attr_options[symbol] = options
|
51
52
|
end
|
52
53
|
end
|
@@ -67,14 +68,14 @@ module BentoSearch::Results::Serialization
|
|
67
68
|
|
68
69
|
|
69
70
|
if _serializable_attr_options[key] && _serializable_attr_options[key][:collection_of]
|
70
|
-
klass =
|
71
|
+
klass = correct_const_get(_serializable_attr_options[key][:collection_of])
|
71
72
|
value = value.collect do |item|
|
72
73
|
klass.from_internal_state_hash(item)
|
73
74
|
end
|
74
75
|
end
|
75
76
|
|
76
77
|
if _serializable_attr_options[key] && _serializable_attr_options[key][:serializer]
|
77
|
-
klass =
|
78
|
+
klass = correct_const_get(_serializable_attr_options[key][:serializer])
|
78
79
|
value = klass.load(value)
|
79
80
|
end
|
80
81
|
|
@@ -92,18 +93,26 @@ module BentoSearch::Results::Serialization
|
|
92
93
|
self.from_internal_state_hash( JSON.parse! json_str )
|
93
94
|
end
|
94
95
|
|
96
|
+
def correct_const_get(str)
|
97
|
+
if Gem::Version.new(Rails.version) > Gem::Version.new('4.2.99')
|
98
|
+
const_get(str)
|
99
|
+
else
|
100
|
+
qualified_const_get(str)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
95
104
|
end
|
96
105
|
|
97
106
|
def internal_state_hash
|
98
107
|
hash = {}
|
99
108
|
self._serializable_attrs.each do |accessor|
|
100
109
|
accessor = accessor.to_s
|
101
|
-
value = self.instance_variable_get("@#{accessor}")
|
110
|
+
value = self.instance_variable_defined?("@#{accessor}") && self.instance_variable_get("@#{accessor}")
|
102
111
|
|
103
112
|
next if value.blank?
|
104
113
|
|
105
114
|
if _serializable_attr_options[accessor] && _serializable_attr_options[accessor][:serializer]
|
106
|
-
klass = self.class.
|
115
|
+
klass = self.class.correct_const_get(_serializable_attr_options[accessor][:serializer])
|
107
116
|
value = klass.dump(value)
|
108
117
|
elsif value.respond_to?(:to_ary)
|
109
118
|
value = value.to_ary.collect do |item|
|
@@ -133,4 +142,4 @@ module BentoSearch::Results::Serialization
|
|
133
142
|
end
|
134
143
|
end
|
135
144
|
|
136
|
-
end
|
145
|
+
end
|
@@ -9,18 +9,17 @@ require 'nokogiri'
|
|
9
9
|
|
10
10
|
module BentoSearch
|
11
11
|
# Usually raised by #get on an engine, when result for specified identifier
|
12
|
-
# can't be found.
|
12
|
+
# can't be found.
|
13
13
|
class ::BentoSearch::NotFound < ::BentoSearch::Error ; end
|
14
|
-
# Usually raised by #get when identifier results in more than one record.
|
14
|
+
# Usually raised by #get when identifier results in more than one record.
|
15
15
|
class ::BentoSearch::TooManyFound < ::BentoSearch::Error ; end
|
16
16
|
# Raised for problem contacting or unexpected response from
|
17
|
-
# remote service. Not yet universally used.
|
17
|
+
# remote service. Not yet universally used.
|
18
18
|
class ::BentoSearch::FetchError < ::BentoSearch::Error ; end
|
19
19
|
|
20
|
-
|
21
|
-
# Module mix-in for bento_search search engines.
|
20
|
+
# Module mix-in for bento_search search engines.
|
22
21
|
#
|
23
|
-
# ==Using a SearchEngine
|
22
|
+
# ==Using a SearchEngine
|
24
23
|
#
|
25
24
|
# See a whole bunch more examples in the project README.
|
26
25
|
#
|
@@ -43,18 +42,18 @@ module BentoSearch
|
|
43
42
|
# of BentoSearch::Results
|
44
43
|
#
|
45
44
|
# results = engine.search("query")
|
46
|
-
#
|
47
|
-
# See more docs under #search, as well as project README.
|
48
45
|
#
|
49
|
-
#
|
50
|
-
#
|
46
|
+
# See more docs under #search, as well as project README.
|
47
|
+
#
|
48
|
+
# == Standard configuration variables.
|
49
|
+
#
|
51
50
|
# Some engines require their own engine-specific configuration for api keys
|
52
51
|
# and such, and offer their own engine-specific configuration for engine-specific
|
53
|
-
# features.
|
52
|
+
# features.
|
54
53
|
#
|
55
54
|
# An additional semi-standard configuration variable, some engines take
|
56
55
|
# an `:auth => true` to tell the engine to assume that all access is by
|
57
|
-
# authenticated local users who should be given elevated access to results.
|
56
|
+
# authenticated local users who should be given elevated access to results.
|
58
57
|
#
|
59
58
|
# Additional standard configuration keys that are implemented by the bento_search
|
60
59
|
# framework:
|
@@ -63,7 +62,12 @@ module BentoSearch
|
|
63
62
|
# String name of decorator class that will be applied by #bento_decorate
|
64
63
|
# helper in standard view. See wiki for more info on decorators. Must be
|
65
64
|
# string name, actual class object not supported (to make it easier
|
66
|
-
# to serialize and transport configuration).
|
65
|
+
# to serialize and transport configuration).
|
66
|
+
#
|
67
|
+
# [log_failed_results]
|
68
|
+
# Default false, if true all failed results are logged to
|
69
|
+
# `Rails.log.error`. Can set global default with
|
70
|
+
# `BentoSearch.defaults.log_failed_results = true`
|
67
71
|
#
|
68
72
|
# == Implementing a SearchEngine
|
69
73
|
#
|
@@ -71,7 +75,7 @@ module BentoSearch
|
|
71
75
|
# generally only responsible for the parts specific to your search engine:
|
72
76
|
# receiving a query, making a call to the external search engine, and
|
73
77
|
# translating it's result to standard a BentoSearch::Results full of
|
74
|
-
# BentoSearch::ResultItems.
|
78
|
+
# BentoSearch::ResultItems.
|
75
79
|
#
|
76
80
|
# Start out by simply including the search engine module:
|
77
81
|
#
|
@@ -85,64 +89,102 @@ module BentoSearch
|
|
85
89
|
# BentoSearch::Results item.
|
86
90
|
#
|
87
91
|
# The Results object should have #total_items set with total hitcount, and
|
88
|
-
# contain BentoSearch::ResultItem objects for each hit in the current page.
|
89
|
-
# See individual class documentation for more info.
|
92
|
+
# contain BentoSearch::ResultItem objects for each hit in the current page.
|
93
|
+
# See individual class documentation for more info.
|
90
94
|
#
|
91
95
|
# That's about the extent of your responsibilities. If the search failed
|
92
96
|
# for some reason due to an error, you should return a Results object
|
93
97
|
# with it's #error object set, so it will be `failed?`. The framework
|
94
98
|
# will take care of this for you for certain uncaught exceptions you allow
|
95
99
|
# to rise out of #search_implementation (timeouts, HTTPClient timeouts,
|
96
|
-
# nokogiri and MultiJson parse errors).
|
100
|
+
# nokogiri and MultiJson parse errors).
|
97
101
|
#
|
98
102
|
# A SearchEngine object can be re-used for multiple searches, possibly
|
99
103
|
# under concurrent multi-threading. Do not store search-specific state
|
100
104
|
# in the search object. but you can store configuration-specific state there
|
101
|
-
# of course.
|
102
|
-
#
|
105
|
+
# of course.
|
106
|
+
#
|
103
107
|
# Recommend use of HTTPClient, if possible, for http searches. Especially
|
104
108
|
# using a class-level HTTPClient instance, to re-use persistent http
|
105
109
|
# connections accross searches (can be esp important if you need to contact
|
106
110
|
# external search api via https/ssl).
|
107
111
|
#
|
108
|
-
# If you have required configuration keys, you can register that with
|
109
|
-
# class-level required_configuration_keys method.
|
112
|
+
# If you have required configuration keys, you can register that with
|
113
|
+
# class-level required_configuration_keys method.
|
110
114
|
#
|
111
|
-
# You can also advertise max per-page value by overriding max_per_page.
|
115
|
+
# You can also advertise max per-page value by overriding max_per_page.
|
112
116
|
#
|
113
|
-
# If you support fielded searching, you should over-ride
|
117
|
+
# If you support fielded searching, you should over-ride
|
114
118
|
# #search_field_definitions; if you support sorting, you should
|
115
119
|
# override #sort_definitions. See BentoSearch::SearchEngine::Capabilities
|
116
|
-
# module for documentation.
|
117
|
-
#
|
120
|
+
# module for documentation.
|
121
|
+
#
|
118
122
|
#
|
119
123
|
module SearchEngine
|
120
124
|
DefaultPerPage = 10
|
121
|
-
|
122
125
|
|
123
|
-
|
124
|
-
|
125
126
|
extend ActiveSupport::Concern
|
126
|
-
|
127
|
+
|
127
128
|
include Capabilities
|
128
|
-
|
129
|
+
|
130
|
+
mattr_accessor :default_auto_rescued_exceptions
|
131
|
+
self.default_auto_rescued_exceptions = [
|
132
|
+
BentoSearch::RubyTimeoutClass,
|
133
|
+
HTTPClient::TimeoutError,
|
134
|
+
HTTPClient::ConfigurationError,
|
135
|
+
HTTPClient::BadResponseError,
|
136
|
+
MultiJson::DecodeError,
|
137
|
+
Nokogiri::SyntaxError,
|
138
|
+
SocketError
|
139
|
+
].freeze
|
140
|
+
|
129
141
|
included do
|
130
|
-
attr_accessor :configuration
|
142
|
+
attr_accessor :configuration
|
143
|
+
|
144
|
+
# What exceptions should our #search wrapper rescue and turn
|
145
|
+
# into failed results instead of fatal errors?
|
146
|
+
#
|
147
|
+
# Can't rescue everything, or we eat VCR/webmock errors, and lots
|
148
|
+
# of other errors we don't want to eat either, making
|
149
|
+
# development really confusing. Perhaps could set this
|
150
|
+
# to be something diff in production and dev?
|
151
|
+
#
|
152
|
+
# This default list is probably useful already, but individual
|
153
|
+
# engines can override if it's convenient for their own error
|
154
|
+
# handling.
|
155
|
+
#
|
156
|
+
# Override by just using `auto_rescued_exceptions=` on class _or_ method,
|
157
|
+
# although some legacy code may override `def auto_rescue_exceptions` (note
|
158
|
+
# old `rescue` vs new `rescued`) which should work too.
|
159
|
+
self.class_attribute :auto_rescued_exceptions
|
160
|
+
self.auto_rescued_exceptions = ::BentoSearch::SearchEngine.default_auto_rescued_exceptions
|
161
|
+
|
162
|
+
# Over-ride returning a hash or Confstruct with
|
163
|
+
# any configuration values you want by default.
|
164
|
+
# actual user-specified config values will be deep-merged
|
165
|
+
# into the defaults.
|
166
|
+
def self.default_configuration
|
167
|
+
end
|
168
|
+
|
169
|
+
# Over-ride returning an array of symbols for required
|
170
|
+
# configuration keys.
|
171
|
+
def self.required_configuration
|
172
|
+
end
|
131
173
|
end
|
132
|
-
|
174
|
+
|
133
175
|
# If specific SearchEngine calls initialize, you want to call super
|
134
176
|
# handles configuration loading, mostly. Argument is a
|
135
|
-
# Confstruct::Configuration or Hash.
|
177
|
+
# Confstruct::Configuration or Hash.
|
136
178
|
def initialize(aConfiguration = Confstruct::Configuration.new)
|
137
179
|
# To work around weird confstruct bug, we need to change
|
138
|
-
# a hash to a Confstruct ourselves.
|
180
|
+
# a hash to a Confstruct ourselves.
|
139
181
|
# https://github.com/mbklein/confstruct/issues/14
|
140
182
|
unless aConfiguration.kind_of? Confstruct::Configuration
|
141
183
|
aConfiguration = Confstruct::Configuration.new aConfiguration
|
142
184
|
end
|
143
|
-
|
144
|
-
|
145
|
-
# init, from copy of default, or new
|
185
|
+
|
186
|
+
|
187
|
+
# init, from copy of default, or new
|
146
188
|
if self.class.default_configuration
|
147
189
|
self.configuration = Confstruct::Configuration.new(self.class.default_configuration)
|
148
190
|
else
|
@@ -150,187 +192,193 @@ module BentoSearch
|
|
150
192
|
end
|
151
193
|
# merge in current instance config
|
152
194
|
self.configuration.configure ( aConfiguration )
|
153
|
-
|
154
|
-
# global defaults?
|
195
|
+
|
196
|
+
# global defaults?
|
155
197
|
self.configuration[:for_display] ||= {}
|
156
|
-
|
198
|
+
unless self.configuration.has_key?(:log_failed_results)
|
199
|
+
self.configuration[:log_failed_results] = BentoSearch.defaults.log_failed_results
|
200
|
+
end
|
201
|
+
|
157
202
|
# check for required keys -- have to be present, and not nil
|
158
203
|
if self.class.required_configuration
|
159
|
-
self.class.required_configuration.each do |required_key|
|
204
|
+
self.class.required_configuration.each do |required_key|
|
160
205
|
if ["**NOT_FOUND**", nil].include? self.configuration.lookup!(required_key.to_s, "**NOT_FOUND**")
|
161
206
|
raise ArgumentError.new("#{self.class.name} requires configuration key #{required_key}")
|
162
207
|
end
|
163
208
|
end
|
164
209
|
end
|
165
|
-
|
210
|
+
|
166
211
|
end
|
167
|
-
|
168
|
-
|
169
|
-
# Method used to actually get results from a search engine.
|
212
|
+
|
213
|
+
|
214
|
+
# Method used to actually get results from a search engine.
|
170
215
|
#
|
171
216
|
# When implementing a search engine, you do not override this #search
|
172
217
|
# method, but instead override #search_implementation. #search will
|
173
218
|
# call your specific #search_implementation, first normalizing the query
|
174
|
-
# arguments, and then normalizing and adding standard metadata to your return value.
|
219
|
+
# arguments, and then normalizing and adding standard metadata to your return value.
|
175
220
|
#
|
176
221
|
# Most engines support pagination, sorting, and searching in a specific
|
177
|
-
# field.
|
222
|
+
# field.
|
178
223
|
#
|
179
224
|
# # 1-based page index
|
180
225
|
# engine.search("query", :per_page => 20, :page => 5)
|
181
226
|
# # or use 0-based per-record index, engines that don't
|
182
|
-
# # support this will round to nearest page.
|
227
|
+
# # support this will round to nearest page.
|
183
228
|
# engine.search("query", :start => 20)
|
184
229
|
#
|
185
230
|
# You can ask an engine what search fields it supports with engine.search_keys
|
186
231
|
# engine.search("query", :search_field => "engine_search_field_name")
|
187
232
|
#
|
188
233
|
# There are also normalized 'semantic' names you can use accross engines
|
189
|
-
# (if they support them): :title, :author, :subject, maybe more.
|
234
|
+
# (if they support them): :title, :author, :subject, maybe more.
|
190
235
|
#
|
191
236
|
# engine.search("query", :semantic_search_field => :title)
|
192
237
|
#
|
193
238
|
# Ask an engine what semantic field names it supports with `engine.semantic_search_keys`
|
194
239
|
#
|
195
|
-
# Unrecognized search fields will be ignored, unless you pass in
|
196
|
-
# :unrecognized_search_field => :raise (or do same in config).
|
240
|
+
# Unrecognized search fields will be ignored, unless you pass in
|
241
|
+
# :unrecognized_search_field => :raise (or do same in config).
|
197
242
|
#
|
198
243
|
# Ask an engine what sort fields it supports with `engine.sort_keys`. See
|
199
244
|
# list of standard sort keys in I18n file at ./config/locales/en.yml, in
|
200
|
-
# `en.bento_search.sort_keys`.
|
245
|
+
# `en.bento_search.sort_keys`.
|
201
246
|
#
|
202
247
|
# engine.search("query", :sort => "some_sort_key")
|
203
248
|
#
|
204
249
|
# Some engines support additional arguments to 'search', see individual
|
205
250
|
# engine documentation. For instance, some engines support `:auth => true`
|
206
251
|
# to give the user elevated search privileges when you have an authenticated
|
207
|
-
# local user.
|
252
|
+
# local user.
|
208
253
|
#
|
209
254
|
# Query as first arg is just a convenience, you can also use a single hash
|
210
|
-
# argument.
|
255
|
+
# argument.
|
211
256
|
#
|
212
257
|
# engine.search(:query => "query", :per_page => 20, :page => 4)
|
213
258
|
#
|
214
259
|
def search(*arguments)
|
215
260
|
start_t = Time.now
|
216
|
-
|
261
|
+
|
217
262
|
arguments = normalized_search_arguments(*arguments)
|
218
263
|
|
219
264
|
results = search_implementation(arguments)
|
220
|
-
|
265
|
+
|
221
266
|
fill_in_search_metadata_for(results, arguments)
|
222
|
-
|
267
|
+
|
223
268
|
results.timing = (Time.now - start_t)
|
224
|
-
|
269
|
+
|
225
270
|
return results
|
226
271
|
rescue *auto_rescue_exceptions => e
|
227
272
|
# Uncaught exception, log and turn into failed Results object. We
|
228
273
|
# only catch certain types of exceptions, or it makes dev really
|
229
274
|
# confusing eating exceptions. This is intentionally a convenience
|
230
275
|
# to allow search engine implementations to just raise the exception
|
231
|
-
# and we'll turn it into a proper error.
|
276
|
+
# and we'll turn it into a proper error.
|
232
277
|
cleaned_backtrace = Rails.backtrace_cleaner.clean(e.backtrace)
|
233
278
|
log_msg = "BentoSearch::SearchEngine failed results: #{e.inspect}\n #{cleaned_backtrace.join("\n ")}"
|
234
279
|
Rails.logger.error log_msg
|
235
|
-
|
280
|
+
|
236
281
|
failed = BentoSearch::Results.new
|
237
282
|
failed.error ||= {}
|
238
283
|
failed.error[:exception] = e
|
239
|
-
|
284
|
+
|
240
285
|
failed.timing = (Time.now - start_t)
|
241
|
-
|
286
|
+
|
242
287
|
fill_in_search_metadata_for(failed, arguments)
|
243
288
|
|
244
|
-
|
245
289
|
return failed
|
290
|
+
ensure
|
291
|
+
if results && configuration.log_failed_results && results.failed?
|
292
|
+
Rails.logger.error("Error fetching results for `#{configuration.id || self}`: #{arguments}: #{results.error}")
|
293
|
+
end
|
246
294
|
end
|
247
|
-
|
295
|
+
|
248
296
|
# SOME of the elements of Results to be returned that SearchEngine implementation
|
249
297
|
# fills in automatically post-search. Extracted into a method for DRY in
|
250
298
|
# error handling to try to fill these in even in errors. Also can be used
|
251
|
-
# as public method for de-serialized or mock results.
|
299
|
+
# as public method for de-serialized or mock results.
|
252
300
|
def fill_in_search_metadata_for(results, normalized_arguments = {})
|
253
301
|
results.search_args = normalized_arguments
|
254
302
|
results.start = normalized_arguments[:start] || 0
|
255
303
|
results.per_page = normalized_arguments[:per_page]
|
256
|
-
|
304
|
+
|
257
305
|
results.engine_id = configuration.id
|
258
306
|
results.display_configuration = configuration.for_display
|
259
307
|
|
260
308
|
# We copy some configuraton info over to each Item, as a convenience
|
261
309
|
# to display logic that may have decide what to do given only an item,
|
262
310
|
# and may want to parameterize based on configuration.
|
263
|
-
results.each do |item|
|
264
|
-
item.engine_id = configuration.id
|
311
|
+
results.each do |item|
|
312
|
+
item.engine_id = configuration.id
|
265
313
|
item.decorator = configuration.lookup!("for_display.decorator")
|
266
314
|
item.display_configuration = configuration.for_display
|
267
315
|
end
|
268
316
|
|
269
317
|
results
|
270
318
|
end
|
271
|
-
|
319
|
+
|
272
320
|
|
273
321
|
# Take the arguments passed into #search, which can be flexibly given
|
274
322
|
# in several ways, and normalize to an expected single hash that
|
275
323
|
# will be passed to an engine's #search_implementation. The output
|
276
324
|
# of this method is a single hash, and is what a #search_implementation
|
277
|
-
# can expect to receive as an argument, with keys:
|
325
|
+
# can expect to receive as an argument, with keys:
|
278
326
|
#
|
279
327
|
# [:query] the query
|
280
328
|
# [:per_page] will _always_ be present, using the default per_page if
|
281
329
|
# none given by caller
|
282
330
|
# [:start, :page] both :start and :page will _always_ be present, regardless
|
283
331
|
# of which the caller used. They will both be integers, even if strings passed in.
|
284
|
-
# [:search_field] A search field from the engine's #search_field_definitions, as string.
|
332
|
+
# [:search_field] A search field from the engine's #search_field_definitions, as string.
|
285
333
|
# Even if the caller used :semantic_search_field, it'll be normalized
|
286
|
-
# to the actual local search_field key on output.
|
287
|
-
# [:sort] Sort key.
|
334
|
+
# to the actual local search_field key on output.
|
335
|
+
# [:sort] Sort key.
|
288
336
|
#
|
289
337
|
def normalized_search_arguments(*orig_arguments)
|
290
338
|
arguments = {}
|
291
|
-
|
339
|
+
|
292
340
|
# Two-arg style to one hash, if present
|
293
341
|
if (orig_arguments.length > 1 ||
|
294
342
|
(orig_arguments.length == 1 && ! orig_arguments.first.kind_of?(Hash)))
|
295
|
-
arguments[:query] = orig_arguments.delete_at(0)
|
343
|
+
arguments[:query] = orig_arguments.delete_at(0)
|
296
344
|
end
|
297
345
|
|
298
346
|
arguments.merge!(orig_arguments.first) if orig_arguments.length > 0
|
299
|
-
|
300
|
-
|
347
|
+
|
348
|
+
|
301
349
|
# allow strings for pagination (like from url query), change to
|
302
|
-
# int please.
|
350
|
+
# int please.
|
303
351
|
[:page, :per_page, :start].each do |key|
|
304
352
|
arguments.delete(key) if arguments[key].blank?
|
305
353
|
arguments[key] = arguments[key].to_i if arguments[key]
|
306
|
-
end
|
307
|
-
arguments[:per_page] ||= DefaultPerPage
|
308
|
-
|
309
|
-
# illegal arguments
|
354
|
+
end
|
355
|
+
arguments[:per_page] ||= configuration.default_per_page || DefaultPerPage
|
356
|
+
|
357
|
+
# illegal arguments
|
310
358
|
if (arguments[:start] && arguments[:page])
|
311
359
|
raise ArgumentError.new("Can't supply both :page and :start")
|
312
360
|
end
|
313
|
-
if ( arguments[:per_page] &&
|
314
|
-
self.max_per_page &&
|
361
|
+
if ( arguments[:per_page] &&
|
362
|
+
self.max_per_page &&
|
315
363
|
arguments[:per_page] > self.max_per_page)
|
316
364
|
raise ArgumentError.new("#{arguments[:per_page]} is more than maximum :per_page of #{self.max_per_page} for #{self.class}")
|
317
365
|
end
|
318
|
-
|
319
|
-
|
366
|
+
|
367
|
+
|
320
368
|
# Normalize :page to :start, and vice versa
|
321
369
|
if arguments[:page]
|
322
370
|
arguments[:start] = (arguments[:page] - 1) * arguments[:per_page]
|
323
371
|
elsif arguments[:start]
|
324
372
|
arguments[:page] = (arguments[:start] / arguments[:per_page]) + 1
|
325
373
|
end
|
326
|
-
|
374
|
+
|
327
375
|
# normalize :sort from possibly symbol to string
|
328
376
|
# TODO: raise if unrecognized sort key?
|
329
377
|
if arguments[:sort]
|
330
378
|
arguments[:sort] = arguments[:sort].to_s
|
331
379
|
end
|
332
380
|
|
333
|
-
|
381
|
+
|
334
382
|
# Multi-field search
|
335
383
|
if arguments[:query].kind_of? Hash
|
336
384
|
# Only if allowed
|
@@ -348,7 +396,7 @@ module BentoSearch
|
|
348
396
|
# translate semantic fields, raising for unfound fields if configured
|
349
397
|
arguments[:query].transform_keys! do |key|
|
350
398
|
new_key = self.semantic_search_map[key.to_s] || key
|
351
|
-
|
399
|
+
|
352
400
|
if ( config_arg(arguments, :unrecognized_search_field) == "raise" &&
|
353
401
|
! self.search_keys.include?(new_key))
|
354
402
|
raise ArgumentError.new("#{self.class.name} does not know about search_field #{new_key}, in query Hash #{arguments[:query]}")
|
@@ -358,91 +406,73 @@ module BentoSearch
|
|
358
406
|
end
|
359
407
|
|
360
408
|
end
|
361
|
-
|
409
|
+
|
362
410
|
# translate semantic_search_field to search_field, or raise if
|
363
|
-
# can't.
|
411
|
+
# can't.
|
364
412
|
if (semantic = arguments.delete(:semantic_search_field)) && ! semantic.blank?
|
365
413
|
semantic = semantic.to_s
|
366
414
|
# Legacy publication_title is now called source_title
|
367
415
|
semantic = "source_title" if semantic == "publication_title"
|
368
416
|
|
369
417
|
mapped = self.semantic_search_map[semantic]
|
370
|
-
if config_arg(arguments, :unrecognized_search_field) == "raise" && ! mapped
|
418
|
+
if config_arg(arguments, :unrecognized_search_field) == "raise" && ! mapped
|
371
419
|
raise ArgumentError.new("#{self.class.name} does not know about :semantic_search_field #{semantic}")
|
372
420
|
end
|
373
421
|
arguments[:search_field] = mapped
|
374
|
-
end
|
422
|
+
end
|
375
423
|
if config_arg(arguments, :unrecognized_search_field) == "raise" && ! search_keys.include?(arguments[:search_field])
|
376
424
|
raise ArgumentError.new("#{self.class.name} does not know about :search_field #{arguments[:search_field]}")
|
377
425
|
end
|
378
|
-
|
379
|
-
|
426
|
+
|
427
|
+
|
380
428
|
return arguments
|
381
429
|
end
|
382
430
|
alias_method :parse_search_arguments, :normalized_search_arguments
|
383
|
-
|
384
|
-
|
385
|
-
# Used mainly/only by the AJAX results loading.
|
431
|
+
|
432
|
+
|
433
|
+
# Used mainly/only by the AJAX results loading.
|
386
434
|
# an array WHITELIST of attributes that can be sent as non-verified
|
387
435
|
# request params and used to execute a search. For instance, 'auth' is
|
388
|
-
# NOT on there, you can't trust a web request as to 'auth' status.
|
436
|
+
# NOT on there, you can't trust a web request as to 'auth' status.
|
389
437
|
# individual engines may over-ride, call super, and add additional
|
390
|
-
# engine-specific attributes.
|
438
|
+
# engine-specific attributes.
|
391
439
|
def public_settable_search_args
|
392
440
|
[:query, :search_field, :semantic_search_field, :sort, :page, :start, :per_page]
|
393
441
|
end
|
394
|
-
|
395
|
-
|
442
|
+
|
443
|
+
# Cover method for consistent api with Results
|
444
|
+
def display_configuration
|
445
|
+
configuration.for_display
|
446
|
+
end
|
447
|
+
|
448
|
+
# Cover method for consistent api with Results
|
449
|
+
def engine_id
|
450
|
+
configuration.id
|
451
|
+
end
|
452
|
+
|
453
|
+
|
396
454
|
protected
|
397
455
|
|
456
|
+
# For legacy reasons old name auto_rescue_exceptions is here, some
|
457
|
+
# sub-classes may override it. Now preferred to use auto_rescued_exceptions
|
458
|
+
# setter instead.
|
459
|
+
def auto_rescue_exceptions
|
460
|
+
self.auto_rescued_exceptions
|
461
|
+
end
|
462
|
+
|
398
463
|
# get value of an arg that can be supplied in search args OR config,
|
399
464
|
# with search_args over-ridding config. Also normalizes value to_s
|
400
|
-
# (for symbols/strings).
|
465
|
+
# (for symbols/strings).
|
401
466
|
def config_arg(arguments, key, default = nil)
|
402
467
|
value = if arguments[key].present?
|
403
468
|
arguments[key]
|
404
469
|
else
|
405
470
|
configuration[key]
|
406
471
|
end
|
407
|
-
|
472
|
+
|
408
473
|
value = value.to_s if value.kind_of? Symbol
|
409
|
-
|
474
|
+
|
410
475
|
return value
|
411
476
|
end
|
412
|
-
|
413
|
-
# What exceptions should our #search wrapper rescue and turn
|
414
|
-
# into failed results instead of fatal errors?
|
415
|
-
#
|
416
|
-
# Can't rescue everything, or we eat VCR/webmock errors, and lots
|
417
|
-
# of other errors we don't want to eat either, making
|
418
|
-
# development really confusing. Perhaps could set this
|
419
|
-
# to be something diff in production and dev?
|
420
|
-
#
|
421
|
-
# This default list is probably useful already, but individual
|
422
|
-
# engines can override if it's convenient for their own errorau
|
423
|
-
# handling.
|
424
|
-
def auto_rescue_exceptions
|
425
|
-
[TimeoutError, HTTPClient::TimeoutError,
|
426
|
-
HTTPClient::ConfigurationError, HTTPClient::BadResponseError,
|
427
|
-
MultiJson::DecodeError, Nokogiri::SyntaxError]
|
428
|
-
end
|
429
|
-
|
430
|
-
|
431
|
-
module ClassMethods
|
432
|
-
|
433
|
-
# Over-ride returning a hash or Confstruct with
|
434
|
-
# any configuration values you want by default.
|
435
|
-
# actual user-specified config values will be deep-merged
|
436
|
-
# into the defaults.
|
437
|
-
def default_configuration
|
438
|
-
end
|
439
|
-
|
440
|
-
# Over-ride returning an array of symbols for required
|
441
|
-
# configuration keys.
|
442
|
-
def required_configuration
|
443
|
-
end
|
444
|
-
|
445
|
-
end
|
446
|
-
|
447
477
|
end
|
448
478
|
end
|