bento_search 1.5.0 → 2.0.0.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +27 -24
- data/Rakefile +30 -11
- data/app/assets/javascripts/bento_search/ajax_load.js +54 -22
- data/app/controllers/bento_search/search_controller.rb +31 -30
- data/app/helpers/bento_search_helper.rb +72 -74
- data/app/models/bento_search/concurrent_searcher.rb +136 -0
- data/app/models/bento_search/result_item.rb +15 -12
- data/app/models/bento_search/results/serialization.rb +22 -13
- data/app/models/bento_search/search_engine.rb +170 -140
- data/app/search_engines/bento_search/doaj_articles_engine.rb +20 -20
- data/app/search_engines/bento_search/ebsco_host_engine.rb +3 -3
- data/app/search_engines/bento_search/eds_engine.rb +326 -206
- data/app/search_engines/bento_search/google_books_engine.rb +2 -2
- data/app/search_engines/bento_search/scopus_engine.rb +87 -87
- data/app/search_engines/bento_search/summon_engine.rb +1 -1
- data/app/views/bento_search/_ajax_loading.html.erb +17 -0
- data/app/views/bento_search/_item_title.html.erb +2 -4
- data/app/views/bento_search/_link.html.erb +3 -3
- data/lib/bento_search.rb +24 -9
- data/lib/bento_search/engine.rb +2 -0
- data/lib/bento_search/version.rb +1 -1
- data/lib/generators/bento_search/install/ajax_load_js_generator.rb +15 -0
- data/test/decorator/standard_decorator_test.rb +30 -30
- data/test/dummy/app/assets/config/manifest.js +4 -0
- data/test/dummy/config/application.rb +7 -0
- data/test/dummy/config/boot.rb +4 -9
- data/test/dummy/config/environments/development.rb +2 -0
- data/test/dummy/config/environments/production.rb +7 -1
- data/test/dummy/config/environments/test.rb +10 -3
- data/test/functional/bento_search/search_controller_test.rb +68 -58
- data/test/helper/bento_search_helper_test.rb +103 -103
- data/test/search_engines/doaj_articles_engine_test.rb +9 -9
- data/test/search_engines/eds_engine_test.rb +91 -59
- data/test/search_engines/google_site_search_test.rb +48 -48
- data/test/search_engines/scopus_engine_test.rb +51 -51
- data/test/search_engines/search_engine_base_test.rb +108 -86
- data/test/search_engines/search_engine_test.rb +68 -56
- data/test/support/atom.xsd.xml +3 -3
- data/test/support/xml.xsd +117 -0
- data/test/test_helper.rb +23 -12
- data/test/unit/concurrent_searcher_test.rb +75 -0
- data/test/unit/pagination_test.rb +12 -12
- data/test/vcr_cassettes/eds/FullText_CustomLink.yml +198 -0
- data/test/vcr_cassettes/eds/basic_search_smoke_test.yml +1036 -1729
- data/test/vcr_cassettes/eds/catalog_ebook_query.yml +218 -0
- data/test/vcr_cassettes/eds/catalog_query.yml +255 -0
- data/test/vcr_cassettes/eds/get_auth_token.yml +11 -44
- data/test/vcr_cassettes/eds/get_auth_token_failure.yml +10 -7
- data/test/vcr_cassettes/eds/get_with_auth.yml +144 -153
- data/test/vcr_cassettes/eds/get_with_auth_recovers_from_bad_auth.yml +167 -223
- data/test/view/atom_results_test.rb +94 -94
- metadata +36 -46
- data/app/assets/javascripts/bento_search.js +0 -3
- data/app/item_decorators/bento_search/ebscohost/conditional_openurl_main_link.rb +0 -36
- data/app/item_decorators/bento_search/only_premade_openurl.rb +0 -20
- data/app/item_decorators/bento_search/openurl_add_other_link.rb +0 -39
- data/app/item_decorators/bento_search/openurl_main_link.rb +0 -34
- data/app/models/bento_search/multi_searcher.rb +0 -131
- data/test/dummy/config/initializers/secret_token.rb +0 -8
- data/test/unit/multi_searcher_test.rb +0 -49
@@ -0,0 +1,136 @@
|
|
1
|
+
begin
|
2
|
+
require 'concurrent'
|
3
|
+
|
4
|
+
# Concurrently runs multiple searches in separate threads. Since a search
|
5
|
+
# generally spends most of it's time waiting on foreign API, this is
|
6
|
+
# useful to significantly reduce total latency of running multiple searches,
|
7
|
+
# even in MRI.
|
8
|
+
#
|
9
|
+
# Uses [concurrent-ruby](https://github.com/ruby-concurrency/concurrent-ruby),
|
10
|
+
# already a dependency of Rails 5.x. To use with Rails previous to 5.x,
|
11
|
+
# just add concurrent-ruby to your `Gemfile`:
|
12
|
+
#
|
13
|
+
# gem 'concurrent-ruby', '~> 1.0'
|
14
|
+
#
|
15
|
+
# # Usage
|
16
|
+
#
|
17
|
+
# initialize with id's of registered engines:
|
18
|
+
#
|
19
|
+
# searcher = BentoBox::ConcurrentSearcher.new(:gbs, :scopus)
|
20
|
+
#
|
21
|
+
# start the concurrent searches, params same as engine.search
|
22
|
+
#
|
23
|
+
# searcher.search( query_params )
|
24
|
+
#
|
25
|
+
# retrieve results, blocking until all are completed:
|
26
|
+
#
|
27
|
+
# results = searcher.results
|
28
|
+
#
|
29
|
+
# returns a Hash keyed by engine id, values BentoSearch::Results objects.
|
30
|
+
#
|
31
|
+
# results # => { "gbs" => <BentoSearch::Results ...>, "scopus" => <BentoSearch::Results ...>}
|
32
|
+
#
|
33
|
+
# Calling results more than once will just return the initial results again
|
34
|
+
# (cached), it won't run a search again.
|
35
|
+
#
|
36
|
+
# ## Dev-mode autoloading and concurrency
|
37
|
+
#
|
38
|
+
# In Rails previous to Rails5, you may have to set config.cache_classes=true
|
39
|
+
# even in development to avoid problems. In Rails 5.x, we take advantage of
|
40
|
+
# new api that should allow concurrency-safe autoloading. But if you run into
|
41
|
+
# any weird problems (such as a deadlock), `cache_classes = true` and
|
42
|
+
# `eager_load = true` should eliminate them, at the cost of dev-mode
|
43
|
+
# auto-reloading.
|
44
|
+
#
|
45
|
+
#
|
46
|
+
# TODO: have a method that returns Futures instead of only supplying the blocking
|
47
|
+
# results method? Several tricks, including making sure to properly terminate actors.
|
48
|
+
class BentoSearch::ConcurrentSearcher
|
49
|
+
def initialize(*engine_ids)
|
50
|
+
auto_rescued_exceptions = [StandardError]
|
51
|
+
|
52
|
+
@engines = []
|
53
|
+
engine_ids.each do |id|
|
54
|
+
add_engine( BentoSearch.get_engine(id).tap { |e| e.auto_rescued_exceptions = auto_rescued_exceptions + e.auto_rescued_exceptions })
|
55
|
+
end
|
56
|
+
@extra_auto_rescue_exceptions = [StandardError]
|
57
|
+
end
|
58
|
+
|
59
|
+
# Adds an instantiated engine directly, rather than by id from global
|
60
|
+
# registry.
|
61
|
+
def add_engine(engine)
|
62
|
+
unless engine.configuration.id.present?
|
63
|
+
raise ArgumentError.new("ConcurrentSearcher engines need `configuration.id`, this one didn't have one: #{engine}")
|
64
|
+
end
|
65
|
+
@engines << engine
|
66
|
+
end
|
67
|
+
|
68
|
+
# Starts all searches, returns self so you can chain method calls if you like.
|
69
|
+
def search(*search_args)
|
70
|
+
search_args.freeze
|
71
|
+
@futures = @engines.collect do |engine|
|
72
|
+
Concurrent::Future.execute { rails_future_wrap { engine.search(*search_args) } }
|
73
|
+
end
|
74
|
+
return self
|
75
|
+
end
|
76
|
+
|
77
|
+
# Have you called #search yet? You can only call #results if you have.
|
78
|
+
# Will stay true forever, it doesn't tell you if the search is done or not.
|
79
|
+
def search_started?
|
80
|
+
!! @futures
|
81
|
+
end
|
82
|
+
|
83
|
+
# Call after #search. Blocks until each included engine is finished
|
84
|
+
# then returns a Hash keyed by engine registered id, value is a
|
85
|
+
# BentoSearch::Results object.
|
86
|
+
#
|
87
|
+
# If called multiple times, returns the same results each time, does
|
88
|
+
# not re-run searches.
|
89
|
+
#
|
90
|
+
# It is an error to invoke without having previously called #search
|
91
|
+
def results
|
92
|
+
unless search_started?
|
93
|
+
raise ArgumentError, "Can't call ConcurrentSearcher#results before you have executed a #search"
|
94
|
+
end
|
95
|
+
|
96
|
+
@results ||= begin
|
97
|
+
pairs = rails_wait_wrap do
|
98
|
+
@futures.collect { |future| [future.value!.engine_id, future.value!] }
|
99
|
+
end
|
100
|
+
Hash[ pairs ].freeze
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
protected
|
105
|
+
|
106
|
+
# In Rails5, future body's need to be wrapped in an executor,
|
107
|
+
# to handle auto-loading right in dev-mode, among other things.
|
108
|
+
# Rails docs coming, see https://github.com/rails/rails/issues/26847
|
109
|
+
@@rails_has_executor = Rails.application.respond_to?(:executor)
|
110
|
+
def rails_future_wrap
|
111
|
+
if @@rails_has_executor
|
112
|
+
Rails.application.executor.wrap { yield }
|
113
|
+
else
|
114
|
+
yield
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
# In Rails5, if we are collecting from within an action method
|
119
|
+
# (ie the 'request loop'), as we usually will be, we need to
|
120
|
+
# give up the autoload lock. Rails docs coming, see https://github.com/rails/rails/issues/26847
|
121
|
+
@@rails_needs_interlock_permit = ActiveSupport::Dependencies.respond_to?(:interlock) &&
|
122
|
+
!(Rails.application.config.eager_load && Rails.application.config.cache_classes)
|
123
|
+
def rails_wait_wrap
|
124
|
+
if @@rails_needs_interlock_permit
|
125
|
+
ActiveSupport::Dependencies.interlock.permit_concurrent_loads { yield }
|
126
|
+
else
|
127
|
+
yield
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
end
|
132
|
+
rescue LoadError
|
133
|
+
# you can use bento_search without celluloid, just not
|
134
|
+
# this class.
|
135
|
+
$stderr.puts "Tried but could not load BentoSearch::ConcurrentSearcher, concurrent-ruby not available!"
|
136
|
+
end
|
@@ -36,12 +36,12 @@ module BentoSearch
|
|
36
36
|
# search service it came from. May be alphanumeric. May be nil
|
37
37
|
# for engines that don't support it.
|
38
38
|
serializable_attr_accessor :unique_id
|
39
|
-
|
39
|
+
|
40
40
|
|
41
41
|
# If set to true, item will refuse to generate an openurl,
|
42
42
|
# returning nil from #to_openurl or #openurl_kev
|
43
43
|
serializable_attr_accessor :openurl_disabled
|
44
|
-
|
44
|
+
|
45
45
|
|
46
46
|
# Array (possibly empty) of BentoSearch::Link objects
|
47
47
|
# representing additional links. Often SearchEngine's themselves
|
@@ -52,7 +52,7 @@ module BentoSearch
|
|
52
52
|
|
53
53
|
# * dc.title
|
54
54
|
# * schema.org CreativeWork: 'name'
|
55
|
-
serializable_attr_accessor :title
|
55
|
+
serializable_attr_accessor :title
|
56
56
|
# backwards compat, we used to have separate titles and subtitles
|
57
57
|
alias_method :complete_title, :title
|
58
58
|
|
@@ -112,7 +112,7 @@ module BentoSearch
|
|
112
112
|
#
|
113
113
|
# Note: We're re-thinking this, might allow uncontrolled
|
114
114
|
# in here instead.
|
115
|
-
serializable_attr_accessor :format
|
115
|
+
serializable_attr_accessor :format
|
116
116
|
|
117
117
|
# Translated from internal format vocab at #format. Outputs
|
118
118
|
# eg http://schema.org/Book
|
@@ -137,7 +137,7 @@ module BentoSearch
|
|
137
137
|
# uncontrolled presumably english-language format string.
|
138
138
|
# if supplied will be used in display in place of controlled
|
139
139
|
# format.
|
140
|
-
serializable_attr_accessor :format_str
|
140
|
+
serializable_attr_accessor :format_str
|
141
141
|
|
142
142
|
# Language of materials. Producer can set language_code to an ISO 639-1 (two
|
143
143
|
# letter) or 639-3 (three letter) language code. If you do this, you don't
|
@@ -153,11 +153,11 @@ module BentoSearch
|
|
153
153
|
# #language_iso_639_2 (either may be null), or #language_str for uncontrolled
|
154
154
|
# string. If engine just sets one of these, internals take care of filling
|
155
155
|
# out the others. r
|
156
|
-
serializable_attr_accessor :language_code
|
156
|
+
serializable_attr_accessor :language_code
|
157
157
|
attr_writer :language_str
|
158
158
|
def language_str
|
159
|
-
@language_str || language_code.try do |code|
|
160
|
-
LanguageList::LanguageInfo.find(code).try do |lang_obj|
|
159
|
+
(@language_str ||= nil) || language_code.try do |code|
|
160
|
+
LanguageList::LanguageInfo.find(code.dup).try do |lang_obj|
|
161
161
|
lang_obj.name
|
162
162
|
end
|
163
163
|
end
|
@@ -167,7 +167,10 @@ module BentoSearch
|
|
167
167
|
# if available, otherwise from direct language_str if available and
|
168
168
|
# possible.
|
169
169
|
def language_obj
|
170
|
-
@language_obj ||=
|
170
|
+
@language_obj ||= begin
|
171
|
+
lookup = self.language_code || self.language_str
|
172
|
+
LanguageList::LanguageInfo.find( lookup.dup ) if lookup
|
173
|
+
end
|
171
174
|
end
|
172
175
|
|
173
176
|
# Two letter ISO language code, or nil
|
@@ -230,12 +233,12 @@ module BentoSearch
|
|
230
233
|
|
231
234
|
# An ARRAY of string query-in-context snippets. Will usually
|
232
235
|
# have highlighting <b> tags in it. Creator is responsible
|
233
|
-
# for making sure it's otherwise html-safe.
|
236
|
+
# for making sure it's otherwise html-safe.
|
234
237
|
#
|
235
238
|
# Not all engines may stores Snippets array in addition to abstract,
|
236
239
|
# some may only store one or the other. Some may store both but
|
237
240
|
# with same content formatted differently (array of multiple vs
|
238
|
-
# one combined string), some engines they may be different.
|
241
|
+
# one combined string), some engines they may be different.
|
239
242
|
attr_accessor :snippets
|
240
243
|
serializable_attr :snippets
|
241
244
|
|
@@ -263,7 +266,7 @@ module BentoSearch
|
|
263
266
|
# for it? Nice thing about the configuration has instead is it's
|
264
267
|
# easily serializable, it's just data.
|
265
268
|
#
|
266
|
-
# Although we intentionally do NOT include these in JSON serialization, ha.
|
269
|
+
# Although we intentionally do NOT include these in JSON serialization, ha.
|
267
270
|
attr_accessor :display_configuration
|
268
271
|
attr_accessor :engine_id
|
269
272
|
|
@@ -4,21 +4,21 @@ require 'json'
|
|
4
4
|
require 'date'
|
5
5
|
|
6
6
|
# Call #dump_to_json on a BentoSearch value object (such as BentoSearch::Result or ::Author)
|
7
|
-
# to get it in Json
|
7
|
+
# to get it in Json
|
8
8
|
#
|
9
9
|
# Values marked with serializable_attr in BentoSearch::Result are
|
10
|
-
# included in seralization.
|
10
|
+
# included in seralization.
|
11
11
|
#
|
12
12
|
# At present metadata and configuration are NOT serialized: #decorator, #display_configuration,
|
13
13
|
# and #engine_id are not included in the serialization, so when loaded from serialization,
|
14
|
-
# ResultItems will not have such things set.
|
15
|
-
#
|
14
|
+
# ResultItems will not have such things set.
|
15
|
+
#
|
16
16
|
# * Works by getting and setting instance variables directly, ignores getters/setters
|
17
17
|
#
|
18
18
|
# * This means decorated values are NOT included in serialization, the raw
|
19
19
|
# values are what is serialized. This is intended, we serialize internal
|
20
20
|
# state, not decoration which can be recreated. You should make sure the decorators you
|
21
|
-
# want are applied after de-serialization.
|
21
|
+
# want are applied after de-serialization.
|
22
22
|
#
|
23
23
|
# * preserves html_safety status in serialization, by adding extra `_attr_htmlsafe: true` key/value
|
24
24
|
#
|
@@ -31,22 +31,23 @@ module BentoSearch::Results::Serialization
|
|
31
31
|
self._serializable_attr_options = {}
|
32
32
|
end
|
33
33
|
|
34
|
+
|
34
35
|
class_methods do
|
35
36
|
# Just a macro to mark a property name serializable -- the name is
|
36
37
|
# of an instance method that will be included in our serializations
|
37
|
-
# and de-serializations.
|
38
|
+
# and de-serializations.
|
38
39
|
#
|
39
40
|
# Options:
|
40
41
|
# * collection_of: String fully qualified name of a class that is
|
41
42
|
# is also BentoSearch::Results::Serialization, the attribute
|
42
|
-
# is an array of these.
|
43
|
+
# is an array of these.
|
43
44
|
# * serializer: String fully qualified class name of a serializer
|
44
45
|
# class that has a `dump` and a `load` for individual values,
|
45
46
|
# we just use it for Date now, see BentoSearch::Results::Serialization::Date
|
46
47
|
def serializable_attr(symbol, options = nil)
|
47
48
|
symbol = symbol.to_s
|
48
49
|
self._serializable_attrs << symbol
|
49
|
-
if options
|
50
|
+
if options
|
50
51
|
self._serializable_attr_options[symbol] = options
|
51
52
|
end
|
52
53
|
end
|
@@ -67,14 +68,14 @@ module BentoSearch::Results::Serialization
|
|
67
68
|
|
68
69
|
|
69
70
|
if _serializable_attr_options[key] && _serializable_attr_options[key][:collection_of]
|
70
|
-
klass =
|
71
|
+
klass = correct_const_get(_serializable_attr_options[key][:collection_of])
|
71
72
|
value = value.collect do |item|
|
72
73
|
klass.from_internal_state_hash(item)
|
73
74
|
end
|
74
75
|
end
|
75
76
|
|
76
77
|
if _serializable_attr_options[key] && _serializable_attr_options[key][:serializer]
|
77
|
-
klass =
|
78
|
+
klass = correct_const_get(_serializable_attr_options[key][:serializer])
|
78
79
|
value = klass.load(value)
|
79
80
|
end
|
80
81
|
|
@@ -92,18 +93,26 @@ module BentoSearch::Results::Serialization
|
|
92
93
|
self.from_internal_state_hash( JSON.parse! json_str )
|
93
94
|
end
|
94
95
|
|
96
|
+
def correct_const_get(str)
|
97
|
+
if Gem::Version.new(Rails.version) > Gem::Version.new('4.2.99')
|
98
|
+
const_get(str)
|
99
|
+
else
|
100
|
+
qualified_const_get(str)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
95
104
|
end
|
96
105
|
|
97
106
|
def internal_state_hash
|
98
107
|
hash = {}
|
99
108
|
self._serializable_attrs.each do |accessor|
|
100
109
|
accessor = accessor.to_s
|
101
|
-
value = self.instance_variable_get("@#{accessor}")
|
110
|
+
value = self.instance_variable_defined?("@#{accessor}") && self.instance_variable_get("@#{accessor}")
|
102
111
|
|
103
112
|
next if value.blank?
|
104
113
|
|
105
114
|
if _serializable_attr_options[accessor] && _serializable_attr_options[accessor][:serializer]
|
106
|
-
klass = self.class.
|
115
|
+
klass = self.class.correct_const_get(_serializable_attr_options[accessor][:serializer])
|
107
116
|
value = klass.dump(value)
|
108
117
|
elsif value.respond_to?(:to_ary)
|
109
118
|
value = value.to_ary.collect do |item|
|
@@ -133,4 +142,4 @@ module BentoSearch::Results::Serialization
|
|
133
142
|
end
|
134
143
|
end
|
135
144
|
|
136
|
-
end
|
145
|
+
end
|
@@ -9,18 +9,17 @@ require 'nokogiri'
|
|
9
9
|
|
10
10
|
module BentoSearch
|
11
11
|
# Usually raised by #get on an engine, when result for specified identifier
|
12
|
-
# can't be found.
|
12
|
+
# can't be found.
|
13
13
|
class ::BentoSearch::NotFound < ::BentoSearch::Error ; end
|
14
|
-
# Usually raised by #get when identifier results in more than one record.
|
14
|
+
# Usually raised by #get when identifier results in more than one record.
|
15
15
|
class ::BentoSearch::TooManyFound < ::BentoSearch::Error ; end
|
16
16
|
# Raised for problem contacting or unexpected response from
|
17
|
-
# remote service. Not yet universally used.
|
17
|
+
# remote service. Not yet universally used.
|
18
18
|
class ::BentoSearch::FetchError < ::BentoSearch::Error ; end
|
19
19
|
|
20
|
-
|
21
|
-
# Module mix-in for bento_search search engines.
|
20
|
+
# Module mix-in for bento_search search engines.
|
22
21
|
#
|
23
|
-
# ==Using a SearchEngine
|
22
|
+
# ==Using a SearchEngine
|
24
23
|
#
|
25
24
|
# See a whole bunch more examples in the project README.
|
26
25
|
#
|
@@ -43,18 +42,18 @@ module BentoSearch
|
|
43
42
|
# of BentoSearch::Results
|
44
43
|
#
|
45
44
|
# results = engine.search("query")
|
46
|
-
#
|
47
|
-
# See more docs under #search, as well as project README.
|
48
45
|
#
|
49
|
-
#
|
50
|
-
#
|
46
|
+
# See more docs under #search, as well as project README.
|
47
|
+
#
|
48
|
+
# == Standard configuration variables.
|
49
|
+
#
|
51
50
|
# Some engines require their own engine-specific configuration for api keys
|
52
51
|
# and such, and offer their own engine-specific configuration for engine-specific
|
53
|
-
# features.
|
52
|
+
# features.
|
54
53
|
#
|
55
54
|
# An additional semi-standard configuration variable, some engines take
|
56
55
|
# an `:auth => true` to tell the engine to assume that all access is by
|
57
|
-
# authenticated local users who should be given elevated access to results.
|
56
|
+
# authenticated local users who should be given elevated access to results.
|
58
57
|
#
|
59
58
|
# Additional standard configuration keys that are implemented by the bento_search
|
60
59
|
# framework:
|
@@ -63,7 +62,12 @@ module BentoSearch
|
|
63
62
|
# String name of decorator class that will be applied by #bento_decorate
|
64
63
|
# helper in standard view. See wiki for more info on decorators. Must be
|
65
64
|
# string name, actual class object not supported (to make it easier
|
66
|
-
# to serialize and transport configuration).
|
65
|
+
# to serialize and transport configuration).
|
66
|
+
#
|
67
|
+
# [log_failed_results]
|
68
|
+
# Default false, if true all failed results are logged to
|
69
|
+
# `Rails.log.error`. Can set global default with
|
70
|
+
# `BentoSearch.defaults.log_failed_results = true`
|
67
71
|
#
|
68
72
|
# == Implementing a SearchEngine
|
69
73
|
#
|
@@ -71,7 +75,7 @@ module BentoSearch
|
|
71
75
|
# generally only responsible for the parts specific to your search engine:
|
72
76
|
# receiving a query, making a call to the external search engine, and
|
73
77
|
# translating it's result to standard a BentoSearch::Results full of
|
74
|
-
# BentoSearch::ResultItems.
|
78
|
+
# BentoSearch::ResultItems.
|
75
79
|
#
|
76
80
|
# Start out by simply including the search engine module:
|
77
81
|
#
|
@@ -85,64 +89,102 @@ module BentoSearch
|
|
85
89
|
# BentoSearch::Results item.
|
86
90
|
#
|
87
91
|
# The Results object should have #total_items set with total hitcount, and
|
88
|
-
# contain BentoSearch::ResultItem objects for each hit in the current page.
|
89
|
-
# See individual class documentation for more info.
|
92
|
+
# contain BentoSearch::ResultItem objects for each hit in the current page.
|
93
|
+
# See individual class documentation for more info.
|
90
94
|
#
|
91
95
|
# That's about the extent of your responsibilities. If the search failed
|
92
96
|
# for some reason due to an error, you should return a Results object
|
93
97
|
# with it's #error object set, so it will be `failed?`. The framework
|
94
98
|
# will take care of this for you for certain uncaught exceptions you allow
|
95
99
|
# to rise out of #search_implementation (timeouts, HTTPClient timeouts,
|
96
|
-
# nokogiri and MultiJson parse errors).
|
100
|
+
# nokogiri and MultiJson parse errors).
|
97
101
|
#
|
98
102
|
# A SearchEngine object can be re-used for multiple searches, possibly
|
99
103
|
# under concurrent multi-threading. Do not store search-specific state
|
100
104
|
# in the search object. but you can store configuration-specific state there
|
101
|
-
# of course.
|
102
|
-
#
|
105
|
+
# of course.
|
106
|
+
#
|
103
107
|
# Recommend use of HTTPClient, if possible, for http searches. Especially
|
104
108
|
# using a class-level HTTPClient instance, to re-use persistent http
|
105
109
|
# connections accross searches (can be esp important if you need to contact
|
106
110
|
# external search api via https/ssl).
|
107
111
|
#
|
108
|
-
# If you have required configuration keys, you can register that with
|
109
|
-
# class-level required_configuration_keys method.
|
112
|
+
# If you have required configuration keys, you can register that with
|
113
|
+
# class-level required_configuration_keys method.
|
110
114
|
#
|
111
|
-
# You can also advertise max per-page value by overriding max_per_page.
|
115
|
+
# You can also advertise max per-page value by overriding max_per_page.
|
112
116
|
#
|
113
|
-
# If you support fielded searching, you should over-ride
|
117
|
+
# If you support fielded searching, you should over-ride
|
114
118
|
# #search_field_definitions; if you support sorting, you should
|
115
119
|
# override #sort_definitions. See BentoSearch::SearchEngine::Capabilities
|
116
|
-
# module for documentation.
|
117
|
-
#
|
120
|
+
# module for documentation.
|
121
|
+
#
|
118
122
|
#
|
119
123
|
module SearchEngine
|
120
124
|
DefaultPerPage = 10
|
121
|
-
|
122
125
|
|
123
|
-
|
124
|
-
|
125
126
|
extend ActiveSupport::Concern
|
126
|
-
|
127
|
+
|
127
128
|
include Capabilities
|
128
|
-
|
129
|
+
|
130
|
+
mattr_accessor :default_auto_rescued_exceptions
|
131
|
+
self.default_auto_rescued_exceptions = [
|
132
|
+
BentoSearch::RubyTimeoutClass,
|
133
|
+
HTTPClient::TimeoutError,
|
134
|
+
HTTPClient::ConfigurationError,
|
135
|
+
HTTPClient::BadResponseError,
|
136
|
+
MultiJson::DecodeError,
|
137
|
+
Nokogiri::SyntaxError,
|
138
|
+
SocketError
|
139
|
+
].freeze
|
140
|
+
|
129
141
|
included do
|
130
|
-
attr_accessor :configuration
|
142
|
+
attr_accessor :configuration
|
143
|
+
|
144
|
+
# What exceptions should our #search wrapper rescue and turn
|
145
|
+
# into failed results instead of fatal errors?
|
146
|
+
#
|
147
|
+
# Can't rescue everything, or we eat VCR/webmock errors, and lots
|
148
|
+
# of other errors we don't want to eat either, making
|
149
|
+
# development really confusing. Perhaps could set this
|
150
|
+
# to be something diff in production and dev?
|
151
|
+
#
|
152
|
+
# This default list is probably useful already, but individual
|
153
|
+
# engines can override if it's convenient for their own error
|
154
|
+
# handling.
|
155
|
+
#
|
156
|
+
# Override by just using `auto_rescued_exceptions=` on class _or_ method,
|
157
|
+
# although some legacy code may override `def auto_rescue_exceptions` (note
|
158
|
+
# old `rescue` vs new `rescued`) which should work too.
|
159
|
+
self.class_attribute :auto_rescued_exceptions
|
160
|
+
self.auto_rescued_exceptions = ::BentoSearch::SearchEngine.default_auto_rescued_exceptions
|
161
|
+
|
162
|
+
# Over-ride returning a hash or Confstruct with
|
163
|
+
# any configuration values you want by default.
|
164
|
+
# actual user-specified config values will be deep-merged
|
165
|
+
# into the defaults.
|
166
|
+
def self.default_configuration
|
167
|
+
end
|
168
|
+
|
169
|
+
# Over-ride returning an array of symbols for required
|
170
|
+
# configuration keys.
|
171
|
+
def self.required_configuration
|
172
|
+
end
|
131
173
|
end
|
132
|
-
|
174
|
+
|
133
175
|
# If specific SearchEngine calls initialize, you want to call super
|
134
176
|
# handles configuration loading, mostly. Argument is a
|
135
|
-
# Confstruct::Configuration or Hash.
|
177
|
+
# Confstruct::Configuration or Hash.
|
136
178
|
def initialize(aConfiguration = Confstruct::Configuration.new)
|
137
179
|
# To work around weird confstruct bug, we need to change
|
138
|
-
# a hash to a Confstruct ourselves.
|
180
|
+
# a hash to a Confstruct ourselves.
|
139
181
|
# https://github.com/mbklein/confstruct/issues/14
|
140
182
|
unless aConfiguration.kind_of? Confstruct::Configuration
|
141
183
|
aConfiguration = Confstruct::Configuration.new aConfiguration
|
142
184
|
end
|
143
|
-
|
144
|
-
|
145
|
-
# init, from copy of default, or new
|
185
|
+
|
186
|
+
|
187
|
+
# init, from copy of default, or new
|
146
188
|
if self.class.default_configuration
|
147
189
|
self.configuration = Confstruct::Configuration.new(self.class.default_configuration)
|
148
190
|
else
|
@@ -150,187 +192,193 @@ module BentoSearch
|
|
150
192
|
end
|
151
193
|
# merge in current instance config
|
152
194
|
self.configuration.configure ( aConfiguration )
|
153
|
-
|
154
|
-
# global defaults?
|
195
|
+
|
196
|
+
# global defaults?
|
155
197
|
self.configuration[:for_display] ||= {}
|
156
|
-
|
198
|
+
unless self.configuration.has_key?(:log_failed_results)
|
199
|
+
self.configuration[:log_failed_results] = BentoSearch.defaults.log_failed_results
|
200
|
+
end
|
201
|
+
|
157
202
|
# check for required keys -- have to be present, and not nil
|
158
203
|
if self.class.required_configuration
|
159
|
-
self.class.required_configuration.each do |required_key|
|
204
|
+
self.class.required_configuration.each do |required_key|
|
160
205
|
if ["**NOT_FOUND**", nil].include? self.configuration.lookup!(required_key.to_s, "**NOT_FOUND**")
|
161
206
|
raise ArgumentError.new("#{self.class.name} requires configuration key #{required_key}")
|
162
207
|
end
|
163
208
|
end
|
164
209
|
end
|
165
|
-
|
210
|
+
|
166
211
|
end
|
167
|
-
|
168
|
-
|
169
|
-
# Method used to actually get results from a search engine.
|
212
|
+
|
213
|
+
|
214
|
+
# Method used to actually get results from a search engine.
|
170
215
|
#
|
171
216
|
# When implementing a search engine, you do not override this #search
|
172
217
|
# method, but instead override #search_implementation. #search will
|
173
218
|
# call your specific #search_implementation, first normalizing the query
|
174
|
-
# arguments, and then normalizing and adding standard metadata to your return value.
|
219
|
+
# arguments, and then normalizing and adding standard metadata to your return value.
|
175
220
|
#
|
176
221
|
# Most engines support pagination, sorting, and searching in a specific
|
177
|
-
# field.
|
222
|
+
# field.
|
178
223
|
#
|
179
224
|
# # 1-based page index
|
180
225
|
# engine.search("query", :per_page => 20, :page => 5)
|
181
226
|
# # or use 0-based per-record index, engines that don't
|
182
|
-
# # support this will round to nearest page.
|
227
|
+
# # support this will round to nearest page.
|
183
228
|
# engine.search("query", :start => 20)
|
184
229
|
#
|
185
230
|
# You can ask an engine what search fields it supports with engine.search_keys
|
186
231
|
# engine.search("query", :search_field => "engine_search_field_name")
|
187
232
|
#
|
188
233
|
# There are also normalized 'semantic' names you can use accross engines
|
189
|
-
# (if they support them): :title, :author, :subject, maybe more.
|
234
|
+
# (if they support them): :title, :author, :subject, maybe more.
|
190
235
|
#
|
191
236
|
# engine.search("query", :semantic_search_field => :title)
|
192
237
|
#
|
193
238
|
# Ask an engine what semantic field names it supports with `engine.semantic_search_keys`
|
194
239
|
#
|
195
|
-
# Unrecognized search fields will be ignored, unless you pass in
|
196
|
-
# :unrecognized_search_field => :raise (or do same in config).
|
240
|
+
# Unrecognized search fields will be ignored, unless you pass in
|
241
|
+
# :unrecognized_search_field => :raise (or do same in config).
|
197
242
|
#
|
198
243
|
# Ask an engine what sort fields it supports with `engine.sort_keys`. See
|
199
244
|
# list of standard sort keys in I18n file at ./config/locales/en.yml, in
|
200
|
-
# `en.bento_search.sort_keys`.
|
245
|
+
# `en.bento_search.sort_keys`.
|
201
246
|
#
|
202
247
|
# engine.search("query", :sort => "some_sort_key")
|
203
248
|
#
|
204
249
|
# Some engines support additional arguments to 'search', see individual
|
205
250
|
# engine documentation. For instance, some engines support `:auth => true`
|
206
251
|
# to give the user elevated search privileges when you have an authenticated
|
207
|
-
# local user.
|
252
|
+
# local user.
|
208
253
|
#
|
209
254
|
# Query as first arg is just a convenience, you can also use a single hash
|
210
|
-
# argument.
|
255
|
+
# argument.
|
211
256
|
#
|
212
257
|
# engine.search(:query => "query", :per_page => 20, :page => 4)
|
213
258
|
#
|
214
259
|
def search(*arguments)
|
215
260
|
start_t = Time.now
|
216
|
-
|
261
|
+
|
217
262
|
arguments = normalized_search_arguments(*arguments)
|
218
263
|
|
219
264
|
results = search_implementation(arguments)
|
220
|
-
|
265
|
+
|
221
266
|
fill_in_search_metadata_for(results, arguments)
|
222
|
-
|
267
|
+
|
223
268
|
results.timing = (Time.now - start_t)
|
224
|
-
|
269
|
+
|
225
270
|
return results
|
226
271
|
rescue *auto_rescue_exceptions => e
|
227
272
|
# Uncaught exception, log and turn into failed Results object. We
|
228
273
|
# only catch certain types of exceptions, or it makes dev really
|
229
274
|
# confusing eating exceptions. This is intentionally a convenience
|
230
275
|
# to allow search engine implementations to just raise the exception
|
231
|
-
# and we'll turn it into a proper error.
|
276
|
+
# and we'll turn it into a proper error.
|
232
277
|
cleaned_backtrace = Rails.backtrace_cleaner.clean(e.backtrace)
|
233
278
|
log_msg = "BentoSearch::SearchEngine failed results: #{e.inspect}\n #{cleaned_backtrace.join("\n ")}"
|
234
279
|
Rails.logger.error log_msg
|
235
|
-
|
280
|
+
|
236
281
|
failed = BentoSearch::Results.new
|
237
282
|
failed.error ||= {}
|
238
283
|
failed.error[:exception] = e
|
239
|
-
|
284
|
+
|
240
285
|
failed.timing = (Time.now - start_t)
|
241
|
-
|
286
|
+
|
242
287
|
fill_in_search_metadata_for(failed, arguments)
|
243
288
|
|
244
|
-
|
245
289
|
return failed
|
290
|
+
ensure
|
291
|
+
if results && configuration.log_failed_results && results.failed?
|
292
|
+
Rails.logger.error("Error fetching results for `#{configuration.id || self}`: #{arguments}: #{results.error}")
|
293
|
+
end
|
246
294
|
end
|
247
|
-
|
295
|
+
|
248
296
|
# SOME of the elements of Results to be returned that SearchEngine implementation
|
249
297
|
# fills in automatically post-search. Extracted into a method for DRY in
|
250
298
|
# error handling to try to fill these in even in errors. Also can be used
|
251
|
-
# as public method for de-serialized or mock results.
|
299
|
+
# as public method for de-serialized or mock results.
|
252
300
|
def fill_in_search_metadata_for(results, normalized_arguments = {})
|
253
301
|
results.search_args = normalized_arguments
|
254
302
|
results.start = normalized_arguments[:start] || 0
|
255
303
|
results.per_page = normalized_arguments[:per_page]
|
256
|
-
|
304
|
+
|
257
305
|
results.engine_id = configuration.id
|
258
306
|
results.display_configuration = configuration.for_display
|
259
307
|
|
260
308
|
# We copy some configuraton info over to each Item, as a convenience
|
261
309
|
# to display logic that may have decide what to do given only an item,
|
262
310
|
# and may want to parameterize based on configuration.
|
263
|
-
results.each do |item|
|
264
|
-
item.engine_id = configuration.id
|
311
|
+
results.each do |item|
|
312
|
+
item.engine_id = configuration.id
|
265
313
|
item.decorator = configuration.lookup!("for_display.decorator")
|
266
314
|
item.display_configuration = configuration.for_display
|
267
315
|
end
|
268
316
|
|
269
317
|
results
|
270
318
|
end
|
271
|
-
|
319
|
+
|
272
320
|
|
273
321
|
# Take the arguments passed into #search, which can be flexibly given
|
274
322
|
# in several ways, and normalize to an expected single hash that
|
275
323
|
# will be passed to an engine's #search_implementation. The output
|
276
324
|
# of this method is a single hash, and is what a #search_implementation
|
277
|
-
# can expect to receive as an argument, with keys:
|
325
|
+
# can expect to receive as an argument, with keys:
|
278
326
|
#
|
279
327
|
# [:query] the query
|
280
328
|
# [:per_page] will _always_ be present, using the default per_page if
|
281
329
|
# none given by caller
|
282
330
|
# [:start, :page] both :start and :page will _always_ be present, regardless
|
283
331
|
# of which the caller used. They will both be integers, even if strings passed in.
|
284
|
-
# [:search_field] A search field from the engine's #search_field_definitions, as string.
|
332
|
+
# [:search_field] A search field from the engine's #search_field_definitions, as string.
|
285
333
|
# Even if the caller used :semantic_search_field, it'll be normalized
|
286
|
-
# to the actual local search_field key on output.
|
287
|
-
# [:sort] Sort key.
|
334
|
+
# to the actual local search_field key on output.
|
335
|
+
# [:sort] Sort key.
|
288
336
|
#
|
289
337
|
def normalized_search_arguments(*orig_arguments)
|
290
338
|
arguments = {}
|
291
|
-
|
339
|
+
|
292
340
|
# Two-arg style to one hash, if present
|
293
341
|
if (orig_arguments.length > 1 ||
|
294
342
|
(orig_arguments.length == 1 && ! orig_arguments.first.kind_of?(Hash)))
|
295
|
-
arguments[:query] = orig_arguments.delete_at(0)
|
343
|
+
arguments[:query] = orig_arguments.delete_at(0)
|
296
344
|
end
|
297
345
|
|
298
346
|
arguments.merge!(orig_arguments.first) if orig_arguments.length > 0
|
299
|
-
|
300
|
-
|
347
|
+
|
348
|
+
|
301
349
|
# allow strings for pagination (like from url query), change to
|
302
|
-
# int please.
|
350
|
+
# int please.
|
303
351
|
[:page, :per_page, :start].each do |key|
|
304
352
|
arguments.delete(key) if arguments[key].blank?
|
305
353
|
arguments[key] = arguments[key].to_i if arguments[key]
|
306
|
-
end
|
307
|
-
arguments[:per_page] ||= DefaultPerPage
|
308
|
-
|
309
|
-
# illegal arguments
|
354
|
+
end
|
355
|
+
arguments[:per_page] ||= configuration.default_per_page || DefaultPerPage
|
356
|
+
|
357
|
+
# illegal arguments
|
310
358
|
if (arguments[:start] && arguments[:page])
|
311
359
|
raise ArgumentError.new("Can't supply both :page and :start")
|
312
360
|
end
|
313
|
-
if ( arguments[:per_page] &&
|
314
|
-
self.max_per_page &&
|
361
|
+
if ( arguments[:per_page] &&
|
362
|
+
self.max_per_page &&
|
315
363
|
arguments[:per_page] > self.max_per_page)
|
316
364
|
raise ArgumentError.new("#{arguments[:per_page]} is more than maximum :per_page of #{self.max_per_page} for #{self.class}")
|
317
365
|
end
|
318
|
-
|
319
|
-
|
366
|
+
|
367
|
+
|
320
368
|
# Normalize :page to :start, and vice versa
|
321
369
|
if arguments[:page]
|
322
370
|
arguments[:start] = (arguments[:page] - 1) * arguments[:per_page]
|
323
371
|
elsif arguments[:start]
|
324
372
|
arguments[:page] = (arguments[:start] / arguments[:per_page]) + 1
|
325
373
|
end
|
326
|
-
|
374
|
+
|
327
375
|
# normalize :sort from possibly symbol to string
|
328
376
|
# TODO: raise if unrecognized sort key?
|
329
377
|
if arguments[:sort]
|
330
378
|
arguments[:sort] = arguments[:sort].to_s
|
331
379
|
end
|
332
380
|
|
333
|
-
|
381
|
+
|
334
382
|
# Multi-field search
|
335
383
|
if arguments[:query].kind_of? Hash
|
336
384
|
# Only if allowed
|
@@ -348,7 +396,7 @@ module BentoSearch
|
|
348
396
|
# translate semantic fields, raising for unfound fields if configured
|
349
397
|
arguments[:query].transform_keys! do |key|
|
350
398
|
new_key = self.semantic_search_map[key.to_s] || key
|
351
|
-
|
399
|
+
|
352
400
|
if ( config_arg(arguments, :unrecognized_search_field) == "raise" &&
|
353
401
|
! self.search_keys.include?(new_key))
|
354
402
|
raise ArgumentError.new("#{self.class.name} does not know about search_field #{new_key}, in query Hash #{arguments[:query]}")
|
@@ -358,91 +406,73 @@ module BentoSearch
|
|
358
406
|
end
|
359
407
|
|
360
408
|
end
|
361
|
-
|
409
|
+
|
362
410
|
# translate semantic_search_field to search_field, or raise if
|
363
|
-
# can't.
|
411
|
+
# can't.
|
364
412
|
if (semantic = arguments.delete(:semantic_search_field)) && ! semantic.blank?
|
365
413
|
semantic = semantic.to_s
|
366
414
|
# Legacy publication_title is now called source_title
|
367
415
|
semantic = "source_title" if semantic == "publication_title"
|
368
416
|
|
369
417
|
mapped = self.semantic_search_map[semantic]
|
370
|
-
if config_arg(arguments, :unrecognized_search_field) == "raise" && ! mapped
|
418
|
+
if config_arg(arguments, :unrecognized_search_field) == "raise" && ! mapped
|
371
419
|
raise ArgumentError.new("#{self.class.name} does not know about :semantic_search_field #{semantic}")
|
372
420
|
end
|
373
421
|
arguments[:search_field] = mapped
|
374
|
-
end
|
422
|
+
end
|
375
423
|
if config_arg(arguments, :unrecognized_search_field) == "raise" && ! search_keys.include?(arguments[:search_field])
|
376
424
|
raise ArgumentError.new("#{self.class.name} does not know about :search_field #{arguments[:search_field]}")
|
377
425
|
end
|
378
|
-
|
379
|
-
|
426
|
+
|
427
|
+
|
380
428
|
return arguments
|
381
429
|
end
|
382
430
|
alias_method :parse_search_arguments, :normalized_search_arguments
|
383
|
-
|
384
|
-
|
385
|
-
# Used mainly/only by the AJAX results loading.
|
431
|
+
|
432
|
+
|
433
|
+
# Used mainly/only by the AJAX results loading.
|
386
434
|
# an array WHITELIST of attributes that can be sent as non-verified
|
387
435
|
# request params and used to execute a search. For instance, 'auth' is
|
388
|
-
# NOT on there, you can't trust a web request as to 'auth' status.
|
436
|
+
# NOT on there, you can't trust a web request as to 'auth' status.
|
389
437
|
# individual engines may over-ride, call super, and add additional
|
390
|
-
# engine-specific attributes.
|
438
|
+
# engine-specific attributes.
|
391
439
|
def public_settable_search_args
|
392
440
|
[:query, :search_field, :semantic_search_field, :sort, :page, :start, :per_page]
|
393
441
|
end
|
394
|
-
|
395
|
-
|
442
|
+
|
443
|
+
# Cover method for consistent api with Results
|
444
|
+
def display_configuration
|
445
|
+
configuration.for_display
|
446
|
+
end
|
447
|
+
|
448
|
+
# Cover method for consistent api with Results
|
449
|
+
def engine_id
|
450
|
+
configuration.id
|
451
|
+
end
|
452
|
+
|
453
|
+
|
396
454
|
protected
|
397
455
|
|
456
|
+
# For legacy reasons old name auto_rescue_exceptions is here, some
|
457
|
+
# sub-classes may override it. Now preferred to use auto_rescued_exceptions
|
458
|
+
# setter instead.
|
459
|
+
def auto_rescue_exceptions
|
460
|
+
self.auto_rescued_exceptions
|
461
|
+
end
|
462
|
+
|
398
463
|
# get value of an arg that can be supplied in search args OR config,
|
399
464
|
# with search_args over-ridding config. Also normalizes value to_s
|
400
|
-
# (for symbols/strings).
|
465
|
+
# (for symbols/strings).
|
401
466
|
def config_arg(arguments, key, default = nil)
|
402
467
|
value = if arguments[key].present?
|
403
468
|
arguments[key]
|
404
469
|
else
|
405
470
|
configuration[key]
|
406
471
|
end
|
407
|
-
|
472
|
+
|
408
473
|
value = value.to_s if value.kind_of? Symbol
|
409
|
-
|
474
|
+
|
410
475
|
return value
|
411
476
|
end
|
412
|
-
|
413
|
-
# What exceptions should our #search wrapper rescue and turn
|
414
|
-
# into failed results instead of fatal errors?
|
415
|
-
#
|
416
|
-
# Can't rescue everything, or we eat VCR/webmock errors, and lots
|
417
|
-
# of other errors we don't want to eat either, making
|
418
|
-
# development really confusing. Perhaps could set this
|
419
|
-
# to be something diff in production and dev?
|
420
|
-
#
|
421
|
-
# This default list is probably useful already, but individual
|
422
|
-
# engines can override if it's convenient for their own errorau
|
423
|
-
# handling.
|
424
|
-
def auto_rescue_exceptions
|
425
|
-
[TimeoutError, HTTPClient::TimeoutError,
|
426
|
-
HTTPClient::ConfigurationError, HTTPClient::BadResponseError,
|
427
|
-
MultiJson::DecodeError, Nokogiri::SyntaxError]
|
428
|
-
end
|
429
|
-
|
430
|
-
|
431
|
-
module ClassMethods
|
432
|
-
|
433
|
-
# Over-ride returning a hash or Confstruct with
|
434
|
-
# any configuration values you want by default.
|
435
|
-
# actual user-specified config values will be deep-merged
|
436
|
-
# into the defaults.
|
437
|
-
def default_configuration
|
438
|
-
end
|
439
|
-
|
440
|
-
# Over-ride returning an array of symbols for required
|
441
|
-
# configuration keys.
|
442
|
-
def required_configuration
|
443
|
-
end
|
444
|
-
|
445
|
-
end
|
446
|
-
|
447
477
|
end
|
448
478
|
end
|