bento_search 1.5.0 → 2.0.0.rc1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +27 -24
  3. data/Rakefile +30 -11
  4. data/app/assets/javascripts/bento_search/ajax_load.js +54 -22
  5. data/app/controllers/bento_search/search_controller.rb +31 -30
  6. data/app/helpers/bento_search_helper.rb +72 -74
  7. data/app/models/bento_search/concurrent_searcher.rb +136 -0
  8. data/app/models/bento_search/result_item.rb +15 -12
  9. data/app/models/bento_search/results/serialization.rb +22 -13
  10. data/app/models/bento_search/search_engine.rb +170 -140
  11. data/app/search_engines/bento_search/doaj_articles_engine.rb +20 -20
  12. data/app/search_engines/bento_search/ebsco_host_engine.rb +3 -3
  13. data/app/search_engines/bento_search/eds_engine.rb +326 -206
  14. data/app/search_engines/bento_search/google_books_engine.rb +2 -2
  15. data/app/search_engines/bento_search/scopus_engine.rb +87 -87
  16. data/app/search_engines/bento_search/summon_engine.rb +1 -1
  17. data/app/views/bento_search/_ajax_loading.html.erb +17 -0
  18. data/app/views/bento_search/_item_title.html.erb +2 -4
  19. data/app/views/bento_search/_link.html.erb +3 -3
  20. data/lib/bento_search.rb +24 -9
  21. data/lib/bento_search/engine.rb +2 -0
  22. data/lib/bento_search/version.rb +1 -1
  23. data/lib/generators/bento_search/install/ajax_load_js_generator.rb +15 -0
  24. data/test/decorator/standard_decorator_test.rb +30 -30
  25. data/test/dummy/app/assets/config/manifest.js +4 -0
  26. data/test/dummy/config/application.rb +7 -0
  27. data/test/dummy/config/boot.rb +4 -9
  28. data/test/dummy/config/environments/development.rb +2 -0
  29. data/test/dummy/config/environments/production.rb +7 -1
  30. data/test/dummy/config/environments/test.rb +10 -3
  31. data/test/functional/bento_search/search_controller_test.rb +68 -58
  32. data/test/helper/bento_search_helper_test.rb +103 -103
  33. data/test/search_engines/doaj_articles_engine_test.rb +9 -9
  34. data/test/search_engines/eds_engine_test.rb +91 -59
  35. data/test/search_engines/google_site_search_test.rb +48 -48
  36. data/test/search_engines/scopus_engine_test.rb +51 -51
  37. data/test/search_engines/search_engine_base_test.rb +108 -86
  38. data/test/search_engines/search_engine_test.rb +68 -56
  39. data/test/support/atom.xsd.xml +3 -3
  40. data/test/support/xml.xsd +117 -0
  41. data/test/test_helper.rb +23 -12
  42. data/test/unit/concurrent_searcher_test.rb +75 -0
  43. data/test/unit/pagination_test.rb +12 -12
  44. data/test/vcr_cassettes/eds/FullText_CustomLink.yml +198 -0
  45. data/test/vcr_cassettes/eds/basic_search_smoke_test.yml +1036 -1729
  46. data/test/vcr_cassettes/eds/catalog_ebook_query.yml +218 -0
  47. data/test/vcr_cassettes/eds/catalog_query.yml +255 -0
  48. data/test/vcr_cassettes/eds/get_auth_token.yml +11 -44
  49. data/test/vcr_cassettes/eds/get_auth_token_failure.yml +10 -7
  50. data/test/vcr_cassettes/eds/get_with_auth.yml +144 -153
  51. data/test/vcr_cassettes/eds/get_with_auth_recovers_from_bad_auth.yml +167 -223
  52. data/test/view/atom_results_test.rb +94 -94
  53. metadata +36 -46
  54. data/app/assets/javascripts/bento_search.js +0 -3
  55. data/app/item_decorators/bento_search/ebscohost/conditional_openurl_main_link.rb +0 -36
  56. data/app/item_decorators/bento_search/only_premade_openurl.rb +0 -20
  57. data/app/item_decorators/bento_search/openurl_add_other_link.rb +0 -39
  58. data/app/item_decorators/bento_search/openurl_main_link.rb +0 -34
  59. data/app/models/bento_search/multi_searcher.rb +0 -131
  60. data/test/dummy/config/initializers/secret_token.rb +0 -8
  61. data/test/unit/multi_searcher_test.rb +0 -49
@@ -0,0 +1,136 @@
1
+ begin
2
+ require 'concurrent'
3
+
4
+ # Concurrently runs multiple searches in separate threads. Since a search
5
+ # generally spends most of it's time waiting on foreign API, this is
6
+ # useful to significantly reduce total latency of running multiple searches,
7
+ # even in MRI.
8
+ #
9
+ # Uses [concurrent-ruby](https://github.com/ruby-concurrency/concurrent-ruby),
10
+ # already a dependency of Rails 5.x. To use with Rails previous to 5.x,
11
+ # just add concurrent-ruby to your `Gemfile`:
12
+ #
13
+ # gem 'concurrent-ruby', '~> 1.0'
14
+ #
15
+ # # Usage
16
+ #
17
+ # initialize with id's of registered engines:
18
+ #
19
+ # searcher = BentoBox::ConcurrentSearcher.new(:gbs, :scopus)
20
+ #
21
+ # start the concurrent searches, params same as engine.search
22
+ #
23
+ # searcher.search( query_params )
24
+ #
25
+ # retrieve results, blocking until all are completed:
26
+ #
27
+ # results = searcher.results
28
+ #
29
+ # returns a Hash keyed by engine id, values BentoSearch::Results objects.
30
+ #
31
+ # results # => { "gbs" => <BentoSearch::Results ...>, "scopus" => <BentoSearch::Results ...>}
32
+ #
33
+ # Calling results more than once will just return the initial results again
34
+ # (cached), it won't run a search again.
35
+ #
36
+ # ## Dev-mode autoloading and concurrency
37
+ #
38
+ # In Rails previous to Rails5, you may have to set config.cache_classes=true
39
+ # even in development to avoid problems. In Rails 5.x, we take advantage of
40
+ # new api that should allow concurrency-safe autoloading. But if you run into
41
+ # any weird problems (such as a deadlock), `cache_classes = true` and
42
+ # `eager_load = true` should eliminate them, at the cost of dev-mode
43
+ # auto-reloading.
44
+ #
45
+ #
46
+ # TODO: have a method that returns Futures instead of only supplying the blocking
47
+ # results method? Several tricks, including making sure to properly terminate actors.
48
+ class BentoSearch::ConcurrentSearcher
49
+ def initialize(*engine_ids)
50
+ auto_rescued_exceptions = [StandardError]
51
+
52
+ @engines = []
53
+ engine_ids.each do |id|
54
+ add_engine( BentoSearch.get_engine(id).tap { |e| e.auto_rescued_exceptions = auto_rescued_exceptions + e.auto_rescued_exceptions })
55
+ end
56
+ @extra_auto_rescue_exceptions = [StandardError]
57
+ end
58
+
59
+ # Adds an instantiated engine directly, rather than by id from global
60
+ # registry.
61
+ def add_engine(engine)
62
+ unless engine.configuration.id.present?
63
+ raise ArgumentError.new("ConcurrentSearcher engines need `configuration.id`, this one didn't have one: #{engine}")
64
+ end
65
+ @engines << engine
66
+ end
67
+
68
+ # Starts all searches, returns self so you can chain method calls if you like.
69
+ def search(*search_args)
70
+ search_args.freeze
71
+ @futures = @engines.collect do |engine|
72
+ Concurrent::Future.execute { rails_future_wrap { engine.search(*search_args) } }
73
+ end
74
+ return self
75
+ end
76
+
77
+ # Have you called #search yet? You can only call #results if you have.
78
+ # Will stay true forever, it doesn't tell you if the search is done or not.
79
+ def search_started?
80
+ !! @futures
81
+ end
82
+
83
+ # Call after #search. Blocks until each included engine is finished
84
+ # then returns a Hash keyed by engine registered id, value is a
85
+ # BentoSearch::Results object.
86
+ #
87
+ # If called multiple times, returns the same results each time, does
88
+ # not re-run searches.
89
+ #
90
+ # It is an error to invoke without having previously called #search
91
+ def results
92
+ unless search_started?
93
+ raise ArgumentError, "Can't call ConcurrentSearcher#results before you have executed a #search"
94
+ end
95
+
96
+ @results ||= begin
97
+ pairs = rails_wait_wrap do
98
+ @futures.collect { |future| [future.value!.engine_id, future.value!] }
99
+ end
100
+ Hash[ pairs ].freeze
101
+ end
102
+ end
103
+
104
+ protected
105
+
106
+ # In Rails5, future body's need to be wrapped in an executor,
107
+ # to handle auto-loading right in dev-mode, among other things.
108
+ # Rails docs coming, see https://github.com/rails/rails/issues/26847
109
+ @@rails_has_executor = Rails.application.respond_to?(:executor)
110
+ def rails_future_wrap
111
+ if @@rails_has_executor
112
+ Rails.application.executor.wrap { yield }
113
+ else
114
+ yield
115
+ end
116
+ end
117
+
118
+ # In Rails5, if we are collecting from within an action method
119
+ # (ie the 'request loop'), as we usually will be, we need to
120
+ # give up the autoload lock. Rails docs coming, see https://github.com/rails/rails/issues/26847
121
+ @@rails_needs_interlock_permit = ActiveSupport::Dependencies.respond_to?(:interlock) &&
122
+ !(Rails.application.config.eager_load && Rails.application.config.cache_classes)
123
+ def rails_wait_wrap
124
+ if @@rails_needs_interlock_permit
125
+ ActiveSupport::Dependencies.interlock.permit_concurrent_loads { yield }
126
+ else
127
+ yield
128
+ end
129
+ end
130
+
131
+ end
132
+ rescue LoadError
133
+ # you can use bento_search without celluloid, just not
134
+ # this class.
135
+ $stderr.puts "Tried but could not load BentoSearch::ConcurrentSearcher, concurrent-ruby not available!"
136
+ end
@@ -36,12 +36,12 @@ module BentoSearch
36
36
  # search service it came from. May be alphanumeric. May be nil
37
37
  # for engines that don't support it.
38
38
  serializable_attr_accessor :unique_id
39
-
39
+
40
40
 
41
41
  # If set to true, item will refuse to generate an openurl,
42
42
  # returning nil from #to_openurl or #openurl_kev
43
43
  serializable_attr_accessor :openurl_disabled
44
-
44
+
45
45
 
46
46
  # Array (possibly empty) of BentoSearch::Link objects
47
47
  # representing additional links. Often SearchEngine's themselves
@@ -52,7 +52,7 @@ module BentoSearch
52
52
 
53
53
  # * dc.title
54
54
  # * schema.org CreativeWork: 'name'
55
- serializable_attr_accessor :title
55
+ serializable_attr_accessor :title
56
56
  # backwards compat, we used to have separate titles and subtitles
57
57
  alias_method :complete_title, :title
58
58
 
@@ -112,7 +112,7 @@ module BentoSearch
112
112
  #
113
113
  # Note: We're re-thinking this, might allow uncontrolled
114
114
  # in here instead.
115
- serializable_attr_accessor :format
115
+ serializable_attr_accessor :format
116
116
 
117
117
  # Translated from internal format vocab at #format. Outputs
118
118
  # eg http://schema.org/Book
@@ -137,7 +137,7 @@ module BentoSearch
137
137
  # uncontrolled presumably english-language format string.
138
138
  # if supplied will be used in display in place of controlled
139
139
  # format.
140
- serializable_attr_accessor :format_str
140
+ serializable_attr_accessor :format_str
141
141
 
142
142
  # Language of materials. Producer can set language_code to an ISO 639-1 (two
143
143
  # letter) or 639-3 (three letter) language code. If you do this, you don't
@@ -153,11 +153,11 @@ module BentoSearch
153
153
  # #language_iso_639_2 (either may be null), or #language_str for uncontrolled
154
154
  # string. If engine just sets one of these, internals take care of filling
155
155
  # out the others. r
156
- serializable_attr_accessor :language_code
156
+ serializable_attr_accessor :language_code
157
157
  attr_writer :language_str
158
158
  def language_str
159
- @language_str || language_code.try do |code|
160
- LanguageList::LanguageInfo.find(code).try do |lang_obj|
159
+ (@language_str ||= nil) || language_code.try do |code|
160
+ LanguageList::LanguageInfo.find(code.dup).try do |lang_obj|
161
161
  lang_obj.name
162
162
  end
163
163
  end
@@ -167,7 +167,10 @@ module BentoSearch
167
167
  # if available, otherwise from direct language_str if available and
168
168
  # possible.
169
169
  def language_obj
170
- @language_obj ||= LanguageList::LanguageInfo.find( self.language_code || self.language_str )
170
+ @language_obj ||= begin
171
+ lookup = self.language_code || self.language_str
172
+ LanguageList::LanguageInfo.find( lookup.dup ) if lookup
173
+ end
171
174
  end
172
175
 
173
176
  # Two letter ISO language code, or nil
@@ -230,12 +233,12 @@ module BentoSearch
230
233
 
231
234
  # An ARRAY of string query-in-context snippets. Will usually
232
235
  # have highlighting <b> tags in it. Creator is responsible
233
- # for making sure it's otherwise html-safe.
236
+ # for making sure it's otherwise html-safe.
234
237
  #
235
238
  # Not all engines may stores Snippets array in addition to abstract,
236
239
  # some may only store one or the other. Some may store both but
237
240
  # with same content formatted differently (array of multiple vs
238
- # one combined string), some engines they may be different.
241
+ # one combined string), some engines they may be different.
239
242
  attr_accessor :snippets
240
243
  serializable_attr :snippets
241
244
 
@@ -263,7 +266,7 @@ module BentoSearch
263
266
  # for it? Nice thing about the configuration has instead is it's
264
267
  # easily serializable, it's just data.
265
268
  #
266
- # Although we intentionally do NOT include these in JSON serialization, ha.
269
+ # Although we intentionally do NOT include these in JSON serialization, ha.
267
270
  attr_accessor :display_configuration
268
271
  attr_accessor :engine_id
269
272
 
@@ -4,21 +4,21 @@ require 'json'
4
4
  require 'date'
5
5
 
6
6
  # Call #dump_to_json on a BentoSearch value object (such as BentoSearch::Result or ::Author)
7
- # to get it in Json
7
+ # to get it in Json
8
8
  #
9
9
  # Values marked with serializable_attr in BentoSearch::Result are
10
- # included in seralization.
10
+ # included in seralization.
11
11
  #
12
12
  # At present metadata and configuration are NOT serialized: #decorator, #display_configuration,
13
13
  # and #engine_id are not included in the serialization, so when loaded from serialization,
14
- # ResultItems will not have such things set.
15
- #
14
+ # ResultItems will not have such things set.
15
+ #
16
16
  # * Works by getting and setting instance variables directly, ignores getters/setters
17
17
  #
18
18
  # * This means decorated values are NOT included in serialization, the raw
19
19
  # values are what is serialized. This is intended, we serialize internal
20
20
  # state, not decoration which can be recreated. You should make sure the decorators you
21
- # want are applied after de-serialization.
21
+ # want are applied after de-serialization.
22
22
  #
23
23
  # * preserves html_safety status in serialization, by adding extra `_attr_htmlsafe: true` key/value
24
24
  #
@@ -31,22 +31,23 @@ module BentoSearch::Results::Serialization
31
31
  self._serializable_attr_options = {}
32
32
  end
33
33
 
34
+
34
35
  class_methods do
35
36
  # Just a macro to mark a property name serializable -- the name is
36
37
  # of an instance method that will be included in our serializations
37
- # and de-serializations.
38
+ # and de-serializations.
38
39
  #
39
40
  # Options:
40
41
  # * collection_of: String fully qualified name of a class that is
41
42
  # is also BentoSearch::Results::Serialization, the attribute
42
- # is an array of these.
43
+ # is an array of these.
43
44
  # * serializer: String fully qualified class name of a serializer
44
45
  # class that has a `dump` and a `load` for individual values,
45
46
  # we just use it for Date now, see BentoSearch::Results::Serialization::Date
46
47
  def serializable_attr(symbol, options = nil)
47
48
  symbol = symbol.to_s
48
49
  self._serializable_attrs << symbol
49
- if options
50
+ if options
50
51
  self._serializable_attr_options[symbol] = options
51
52
  end
52
53
  end
@@ -67,14 +68,14 @@ module BentoSearch::Results::Serialization
67
68
 
68
69
 
69
70
  if _serializable_attr_options[key] && _serializable_attr_options[key][:collection_of]
70
- klass = qualified_const_get(_serializable_attr_options[key][:collection_of])
71
+ klass = correct_const_get(_serializable_attr_options[key][:collection_of])
71
72
  value = value.collect do |item|
72
73
  klass.from_internal_state_hash(item)
73
74
  end
74
75
  end
75
76
 
76
77
  if _serializable_attr_options[key] && _serializable_attr_options[key][:serializer]
77
- klass = qualified_const_get(_serializable_attr_options[key][:serializer])
78
+ klass = correct_const_get(_serializable_attr_options[key][:serializer])
78
79
  value = klass.load(value)
79
80
  end
80
81
 
@@ -92,18 +93,26 @@ module BentoSearch::Results::Serialization
92
93
  self.from_internal_state_hash( JSON.parse! json_str )
93
94
  end
94
95
 
96
+ def correct_const_get(str)
97
+ if Gem::Version.new(Rails.version) > Gem::Version.new('4.2.99')
98
+ const_get(str)
99
+ else
100
+ qualified_const_get(str)
101
+ end
102
+ end
103
+
95
104
  end
96
105
 
97
106
  def internal_state_hash
98
107
  hash = {}
99
108
  self._serializable_attrs.each do |accessor|
100
109
  accessor = accessor.to_s
101
- value = self.instance_variable_get("@#{accessor}")
110
+ value = self.instance_variable_defined?("@#{accessor}") && self.instance_variable_get("@#{accessor}")
102
111
 
103
112
  next if value.blank?
104
113
 
105
114
  if _serializable_attr_options[accessor] && _serializable_attr_options[accessor][:serializer]
106
- klass = self.class.qualified_const_get(_serializable_attr_options[accessor][:serializer])
115
+ klass = self.class.correct_const_get(_serializable_attr_options[accessor][:serializer])
107
116
  value = klass.dump(value)
108
117
  elsif value.respond_to?(:to_ary)
109
118
  value = value.to_ary.collect do |item|
@@ -133,4 +142,4 @@ module BentoSearch::Results::Serialization
133
142
  end
134
143
  end
135
144
 
136
- end
145
+ end
@@ -9,18 +9,17 @@ require 'nokogiri'
9
9
 
10
10
  module BentoSearch
11
11
  # Usually raised by #get on an engine, when result for specified identifier
12
- # can't be found.
12
+ # can't be found.
13
13
  class ::BentoSearch::NotFound < ::BentoSearch::Error ; end
14
- # Usually raised by #get when identifier results in more than one record.
14
+ # Usually raised by #get when identifier results in more than one record.
15
15
  class ::BentoSearch::TooManyFound < ::BentoSearch::Error ; end
16
16
  # Raised for problem contacting or unexpected response from
17
- # remote service. Not yet universally used.
17
+ # remote service. Not yet universally used.
18
18
  class ::BentoSearch::FetchError < ::BentoSearch::Error ; end
19
19
 
20
-
21
- # Module mix-in for bento_search search engines.
20
+ # Module mix-in for bento_search search engines.
22
21
  #
23
- # ==Using a SearchEngine
22
+ # ==Using a SearchEngine
24
23
  #
25
24
  # See a whole bunch more examples in the project README.
26
25
  #
@@ -43,18 +42,18 @@ module BentoSearch
43
42
  # of BentoSearch::Results
44
43
  #
45
44
  # results = engine.search("query")
46
- #
47
- # See more docs under #search, as well as project README.
48
45
  #
49
- # == Standard configuration variables.
50
- #
46
+ # See more docs under #search, as well as project README.
47
+ #
48
+ # == Standard configuration variables.
49
+ #
51
50
  # Some engines require their own engine-specific configuration for api keys
52
51
  # and such, and offer their own engine-specific configuration for engine-specific
53
- # features.
52
+ # features.
54
53
  #
55
54
  # An additional semi-standard configuration variable, some engines take
56
55
  # an `:auth => true` to tell the engine to assume that all access is by
57
- # authenticated local users who should be given elevated access to results.
56
+ # authenticated local users who should be given elevated access to results.
58
57
  #
59
58
  # Additional standard configuration keys that are implemented by the bento_search
60
59
  # framework:
@@ -63,7 +62,12 @@ module BentoSearch
63
62
  # String name of decorator class that will be applied by #bento_decorate
64
63
  # helper in standard view. See wiki for more info on decorators. Must be
65
64
  # string name, actual class object not supported (to make it easier
66
- # to serialize and transport configuration).
65
+ # to serialize and transport configuration).
66
+ #
67
+ # [log_failed_results]
68
+ # Default false, if true all failed results are logged to
69
+ # `Rails.log.error`. Can set global default with
70
+ # `BentoSearch.defaults.log_failed_results = true`
67
71
  #
68
72
  # == Implementing a SearchEngine
69
73
  #
@@ -71,7 +75,7 @@ module BentoSearch
71
75
  # generally only responsible for the parts specific to your search engine:
72
76
  # receiving a query, making a call to the external search engine, and
73
77
  # translating it's result to standard a BentoSearch::Results full of
74
- # BentoSearch::ResultItems.
78
+ # BentoSearch::ResultItems.
75
79
  #
76
80
  # Start out by simply including the search engine module:
77
81
  #
@@ -85,64 +89,102 @@ module BentoSearch
85
89
  # BentoSearch::Results item.
86
90
  #
87
91
  # The Results object should have #total_items set with total hitcount, and
88
- # contain BentoSearch::ResultItem objects for each hit in the current page.
89
- # See individual class documentation for more info.
92
+ # contain BentoSearch::ResultItem objects for each hit in the current page.
93
+ # See individual class documentation for more info.
90
94
  #
91
95
  # That's about the extent of your responsibilities. If the search failed
92
96
  # for some reason due to an error, you should return a Results object
93
97
  # with it's #error object set, so it will be `failed?`. The framework
94
98
  # will take care of this for you for certain uncaught exceptions you allow
95
99
  # to rise out of #search_implementation (timeouts, HTTPClient timeouts,
96
- # nokogiri and MultiJson parse errors).
100
+ # nokogiri and MultiJson parse errors).
97
101
  #
98
102
  # A SearchEngine object can be re-used for multiple searches, possibly
99
103
  # under concurrent multi-threading. Do not store search-specific state
100
104
  # in the search object. but you can store configuration-specific state there
101
- # of course.
102
- #
105
+ # of course.
106
+ #
103
107
  # Recommend use of HTTPClient, if possible, for http searches. Especially
104
108
  # using a class-level HTTPClient instance, to re-use persistent http
105
109
  # connections accross searches (can be esp important if you need to contact
106
110
  # external search api via https/ssl).
107
111
  #
108
- # If you have required configuration keys, you can register that with
109
- # class-level required_configuration_keys method.
112
+ # If you have required configuration keys, you can register that with
113
+ # class-level required_configuration_keys method.
110
114
  #
111
- # You can also advertise max per-page value by overriding max_per_page.
115
+ # You can also advertise max per-page value by overriding max_per_page.
112
116
  #
113
- # If you support fielded searching, you should over-ride
117
+ # If you support fielded searching, you should over-ride
114
118
  # #search_field_definitions; if you support sorting, you should
115
119
  # override #sort_definitions. See BentoSearch::SearchEngine::Capabilities
116
- # module for documentation.
117
- #
120
+ # module for documentation.
121
+ #
118
122
  #
119
123
  module SearchEngine
120
124
  DefaultPerPage = 10
121
-
122
125
 
123
-
124
-
125
126
  extend ActiveSupport::Concern
126
-
127
+
127
128
  include Capabilities
128
-
129
+
130
+ mattr_accessor :default_auto_rescued_exceptions
131
+ self.default_auto_rescued_exceptions = [
132
+ BentoSearch::RubyTimeoutClass,
133
+ HTTPClient::TimeoutError,
134
+ HTTPClient::ConfigurationError,
135
+ HTTPClient::BadResponseError,
136
+ MultiJson::DecodeError,
137
+ Nokogiri::SyntaxError,
138
+ SocketError
139
+ ].freeze
140
+
129
141
  included do
130
- attr_accessor :configuration
142
+ attr_accessor :configuration
143
+
144
+ # What exceptions should our #search wrapper rescue and turn
145
+ # into failed results instead of fatal errors?
146
+ #
147
+ # Can't rescue everything, or we eat VCR/webmock errors, and lots
148
+ # of other errors we don't want to eat either, making
149
+ # development really confusing. Perhaps could set this
150
+ # to be something diff in production and dev?
151
+ #
152
+ # This default list is probably useful already, but individual
153
+ # engines can override if it's convenient for their own error
154
+ # handling.
155
+ #
156
+ # Override by just using `auto_rescued_exceptions=` on class _or_ method,
157
+ # although some legacy code may override `def auto_rescue_exceptions` (note
158
+ # old `rescue` vs new `rescued`) which should work too.
159
+ self.class_attribute :auto_rescued_exceptions
160
+ self.auto_rescued_exceptions = ::BentoSearch::SearchEngine.default_auto_rescued_exceptions
161
+
162
+ # Over-ride returning a hash or Confstruct with
163
+ # any configuration values you want by default.
164
+ # actual user-specified config values will be deep-merged
165
+ # into the defaults.
166
+ def self.default_configuration
167
+ end
168
+
169
+ # Over-ride returning an array of symbols for required
170
+ # configuration keys.
171
+ def self.required_configuration
172
+ end
131
173
  end
132
-
174
+
133
175
  # If specific SearchEngine calls initialize, you want to call super
134
176
  # handles configuration loading, mostly. Argument is a
135
- # Confstruct::Configuration or Hash.
177
+ # Confstruct::Configuration or Hash.
136
178
  def initialize(aConfiguration = Confstruct::Configuration.new)
137
179
  # To work around weird confstruct bug, we need to change
138
- # a hash to a Confstruct ourselves.
180
+ # a hash to a Confstruct ourselves.
139
181
  # https://github.com/mbklein/confstruct/issues/14
140
182
  unless aConfiguration.kind_of? Confstruct::Configuration
141
183
  aConfiguration = Confstruct::Configuration.new aConfiguration
142
184
  end
143
-
144
-
145
- # init, from copy of default, or new
185
+
186
+
187
+ # init, from copy of default, or new
146
188
  if self.class.default_configuration
147
189
  self.configuration = Confstruct::Configuration.new(self.class.default_configuration)
148
190
  else
@@ -150,187 +192,193 @@ module BentoSearch
150
192
  end
151
193
  # merge in current instance config
152
194
  self.configuration.configure ( aConfiguration )
153
-
154
- # global defaults?
195
+
196
+ # global defaults?
155
197
  self.configuration[:for_display] ||= {}
156
-
198
+ unless self.configuration.has_key?(:log_failed_results)
199
+ self.configuration[:log_failed_results] = BentoSearch.defaults.log_failed_results
200
+ end
201
+
157
202
  # check for required keys -- have to be present, and not nil
158
203
  if self.class.required_configuration
159
- self.class.required_configuration.each do |required_key|
204
+ self.class.required_configuration.each do |required_key|
160
205
  if ["**NOT_FOUND**", nil].include? self.configuration.lookup!(required_key.to_s, "**NOT_FOUND**")
161
206
  raise ArgumentError.new("#{self.class.name} requires configuration key #{required_key}")
162
207
  end
163
208
  end
164
209
  end
165
-
210
+
166
211
  end
167
-
168
-
169
- # Method used to actually get results from a search engine.
212
+
213
+
214
+ # Method used to actually get results from a search engine.
170
215
  #
171
216
  # When implementing a search engine, you do not override this #search
172
217
  # method, but instead override #search_implementation. #search will
173
218
  # call your specific #search_implementation, first normalizing the query
174
- # arguments, and then normalizing and adding standard metadata to your return value.
219
+ # arguments, and then normalizing and adding standard metadata to your return value.
175
220
  #
176
221
  # Most engines support pagination, sorting, and searching in a specific
177
- # field.
222
+ # field.
178
223
  #
179
224
  # # 1-based page index
180
225
  # engine.search("query", :per_page => 20, :page => 5)
181
226
  # # or use 0-based per-record index, engines that don't
182
- # # support this will round to nearest page.
227
+ # # support this will round to nearest page.
183
228
  # engine.search("query", :start => 20)
184
229
  #
185
230
  # You can ask an engine what search fields it supports with engine.search_keys
186
231
  # engine.search("query", :search_field => "engine_search_field_name")
187
232
  #
188
233
  # There are also normalized 'semantic' names you can use accross engines
189
- # (if they support them): :title, :author, :subject, maybe more.
234
+ # (if they support them): :title, :author, :subject, maybe more.
190
235
  #
191
236
  # engine.search("query", :semantic_search_field => :title)
192
237
  #
193
238
  # Ask an engine what semantic field names it supports with `engine.semantic_search_keys`
194
239
  #
195
- # Unrecognized search fields will be ignored, unless you pass in
196
- # :unrecognized_search_field => :raise (or do same in config).
240
+ # Unrecognized search fields will be ignored, unless you pass in
241
+ # :unrecognized_search_field => :raise (or do same in config).
197
242
  #
198
243
  # Ask an engine what sort fields it supports with `engine.sort_keys`. See
199
244
  # list of standard sort keys in I18n file at ./config/locales/en.yml, in
200
- # `en.bento_search.sort_keys`.
245
+ # `en.bento_search.sort_keys`.
201
246
  #
202
247
  # engine.search("query", :sort => "some_sort_key")
203
248
  #
204
249
  # Some engines support additional arguments to 'search', see individual
205
250
  # engine documentation. For instance, some engines support `:auth => true`
206
251
  # to give the user elevated search privileges when you have an authenticated
207
- # local user.
252
+ # local user.
208
253
  #
209
254
  # Query as first arg is just a convenience, you can also use a single hash
210
- # argument.
255
+ # argument.
211
256
  #
212
257
  # engine.search(:query => "query", :per_page => 20, :page => 4)
213
258
  #
214
259
  def search(*arguments)
215
260
  start_t = Time.now
216
-
261
+
217
262
  arguments = normalized_search_arguments(*arguments)
218
263
 
219
264
  results = search_implementation(arguments)
220
-
265
+
221
266
  fill_in_search_metadata_for(results, arguments)
222
-
267
+
223
268
  results.timing = (Time.now - start_t)
224
-
269
+
225
270
  return results
226
271
  rescue *auto_rescue_exceptions => e
227
272
  # Uncaught exception, log and turn into failed Results object. We
228
273
  # only catch certain types of exceptions, or it makes dev really
229
274
  # confusing eating exceptions. This is intentionally a convenience
230
275
  # to allow search engine implementations to just raise the exception
231
- # and we'll turn it into a proper error.
276
+ # and we'll turn it into a proper error.
232
277
  cleaned_backtrace = Rails.backtrace_cleaner.clean(e.backtrace)
233
278
  log_msg = "BentoSearch::SearchEngine failed results: #{e.inspect}\n #{cleaned_backtrace.join("\n ")}"
234
279
  Rails.logger.error log_msg
235
-
280
+
236
281
  failed = BentoSearch::Results.new
237
282
  failed.error ||= {}
238
283
  failed.error[:exception] = e
239
-
284
+
240
285
  failed.timing = (Time.now - start_t)
241
-
286
+
242
287
  fill_in_search_metadata_for(failed, arguments)
243
288
 
244
-
245
289
  return failed
290
+ ensure
291
+ if results && configuration.log_failed_results && results.failed?
292
+ Rails.logger.error("Error fetching results for `#{configuration.id || self}`: #{arguments}: #{results.error}")
293
+ end
246
294
  end
247
-
295
+
248
296
  # SOME of the elements of Results to be returned that SearchEngine implementation
249
297
  # fills in automatically post-search. Extracted into a method for DRY in
250
298
  # error handling to try to fill these in even in errors. Also can be used
251
- # as public method for de-serialized or mock results.
299
+ # as public method for de-serialized or mock results.
252
300
  def fill_in_search_metadata_for(results, normalized_arguments = {})
253
301
  results.search_args = normalized_arguments
254
302
  results.start = normalized_arguments[:start] || 0
255
303
  results.per_page = normalized_arguments[:per_page]
256
-
304
+
257
305
  results.engine_id = configuration.id
258
306
  results.display_configuration = configuration.for_display
259
307
 
260
308
  # We copy some configuraton info over to each Item, as a convenience
261
309
  # to display logic that may have decide what to do given only an item,
262
310
  # and may want to parameterize based on configuration.
263
- results.each do |item|
264
- item.engine_id = configuration.id
311
+ results.each do |item|
312
+ item.engine_id = configuration.id
265
313
  item.decorator = configuration.lookup!("for_display.decorator")
266
314
  item.display_configuration = configuration.for_display
267
315
  end
268
316
 
269
317
  results
270
318
  end
271
-
319
+
272
320
 
273
321
  # Take the arguments passed into #search, which can be flexibly given
274
322
  # in several ways, and normalize to an expected single hash that
275
323
  # will be passed to an engine's #search_implementation. The output
276
324
  # of this method is a single hash, and is what a #search_implementation
277
- # can expect to receive as an argument, with keys:
325
+ # can expect to receive as an argument, with keys:
278
326
  #
279
327
  # [:query] the query
280
328
  # [:per_page] will _always_ be present, using the default per_page if
281
329
  # none given by caller
282
330
  # [:start, :page] both :start and :page will _always_ be present, regardless
283
331
  # of which the caller used. They will both be integers, even if strings passed in.
284
- # [:search_field] A search field from the engine's #search_field_definitions, as string.
332
+ # [:search_field] A search field from the engine's #search_field_definitions, as string.
285
333
  # Even if the caller used :semantic_search_field, it'll be normalized
286
- # to the actual local search_field key on output.
287
- # [:sort] Sort key.
334
+ # to the actual local search_field key on output.
335
+ # [:sort] Sort key.
288
336
  #
289
337
  def normalized_search_arguments(*orig_arguments)
290
338
  arguments = {}
291
-
339
+
292
340
  # Two-arg style to one hash, if present
293
341
  if (orig_arguments.length > 1 ||
294
342
  (orig_arguments.length == 1 && ! orig_arguments.first.kind_of?(Hash)))
295
- arguments[:query] = orig_arguments.delete_at(0)
343
+ arguments[:query] = orig_arguments.delete_at(0)
296
344
  end
297
345
 
298
346
  arguments.merge!(orig_arguments.first) if orig_arguments.length > 0
299
-
300
-
347
+
348
+
301
349
  # allow strings for pagination (like from url query), change to
302
- # int please.
350
+ # int please.
303
351
  [:page, :per_page, :start].each do |key|
304
352
  arguments.delete(key) if arguments[key].blank?
305
353
  arguments[key] = arguments[key].to_i if arguments[key]
306
- end
307
- arguments[:per_page] ||= DefaultPerPage
308
-
309
- # illegal arguments
354
+ end
355
+ arguments[:per_page] ||= configuration.default_per_page || DefaultPerPage
356
+
357
+ # illegal arguments
310
358
  if (arguments[:start] && arguments[:page])
311
359
  raise ArgumentError.new("Can't supply both :page and :start")
312
360
  end
313
- if ( arguments[:per_page] &&
314
- self.max_per_page &&
361
+ if ( arguments[:per_page] &&
362
+ self.max_per_page &&
315
363
  arguments[:per_page] > self.max_per_page)
316
364
  raise ArgumentError.new("#{arguments[:per_page]} is more than maximum :per_page of #{self.max_per_page} for #{self.class}")
317
365
  end
318
-
319
-
366
+
367
+
320
368
  # Normalize :page to :start, and vice versa
321
369
  if arguments[:page]
322
370
  arguments[:start] = (arguments[:page] - 1) * arguments[:per_page]
323
371
  elsif arguments[:start]
324
372
  arguments[:page] = (arguments[:start] / arguments[:per_page]) + 1
325
373
  end
326
-
374
+
327
375
  # normalize :sort from possibly symbol to string
328
376
  # TODO: raise if unrecognized sort key?
329
377
  if arguments[:sort]
330
378
  arguments[:sort] = arguments[:sort].to_s
331
379
  end
332
380
 
333
-
381
+
334
382
  # Multi-field search
335
383
  if arguments[:query].kind_of? Hash
336
384
  # Only if allowed
@@ -348,7 +396,7 @@ module BentoSearch
348
396
  # translate semantic fields, raising for unfound fields if configured
349
397
  arguments[:query].transform_keys! do |key|
350
398
  new_key = self.semantic_search_map[key.to_s] || key
351
-
399
+
352
400
  if ( config_arg(arguments, :unrecognized_search_field) == "raise" &&
353
401
  ! self.search_keys.include?(new_key))
354
402
  raise ArgumentError.new("#{self.class.name} does not know about search_field #{new_key}, in query Hash #{arguments[:query]}")
@@ -358,91 +406,73 @@ module BentoSearch
358
406
  end
359
407
 
360
408
  end
361
-
409
+
362
410
  # translate semantic_search_field to search_field, or raise if
363
- # can't.
411
+ # can't.
364
412
  if (semantic = arguments.delete(:semantic_search_field)) && ! semantic.blank?
365
413
  semantic = semantic.to_s
366
414
  # Legacy publication_title is now called source_title
367
415
  semantic = "source_title" if semantic == "publication_title"
368
416
 
369
417
  mapped = self.semantic_search_map[semantic]
370
- if config_arg(arguments, :unrecognized_search_field) == "raise" && ! mapped
418
+ if config_arg(arguments, :unrecognized_search_field) == "raise" && ! mapped
371
419
  raise ArgumentError.new("#{self.class.name} does not know about :semantic_search_field #{semantic}")
372
420
  end
373
421
  arguments[:search_field] = mapped
374
- end
422
+ end
375
423
  if config_arg(arguments, :unrecognized_search_field) == "raise" && ! search_keys.include?(arguments[:search_field])
376
424
  raise ArgumentError.new("#{self.class.name} does not know about :search_field #{arguments[:search_field]}")
377
425
  end
378
-
379
-
426
+
427
+
380
428
  return arguments
381
429
  end
382
430
  alias_method :parse_search_arguments, :normalized_search_arguments
383
-
384
-
385
- # Used mainly/only by the AJAX results loading.
431
+
432
+
433
+ # Used mainly/only by the AJAX results loading.
386
434
  # an array WHITELIST of attributes that can be sent as non-verified
387
435
  # request params and used to execute a search. For instance, 'auth' is
388
- # NOT on there, you can't trust a web request as to 'auth' status.
436
+ # NOT on there, you can't trust a web request as to 'auth' status.
389
437
  # individual engines may over-ride, call super, and add additional
390
- # engine-specific attributes.
438
+ # engine-specific attributes.
391
439
  def public_settable_search_args
392
440
  [:query, :search_field, :semantic_search_field, :sort, :page, :start, :per_page]
393
441
  end
394
-
395
-
442
+
443
+ # Cover method for consistent api with Results
444
+ def display_configuration
445
+ configuration.for_display
446
+ end
447
+
448
+ # Cover method for consistent api with Results
449
+ def engine_id
450
+ configuration.id
451
+ end
452
+
453
+
396
454
  protected
397
455
 
456
+ # For legacy reasons old name auto_rescue_exceptions is here, some
457
+ # sub-classes may override it. Now preferred to use auto_rescued_exceptions
458
+ # setter instead.
459
+ def auto_rescue_exceptions
460
+ self.auto_rescued_exceptions
461
+ end
462
+
398
463
  # get value of an arg that can be supplied in search args OR config,
399
464
  # with search_args over-ridding config. Also normalizes value to_s
400
- # (for symbols/strings).
465
+ # (for symbols/strings).
401
466
  def config_arg(arguments, key, default = nil)
402
467
  value = if arguments[key].present?
403
468
  arguments[key]
404
469
  else
405
470
  configuration[key]
406
471
  end
407
-
472
+
408
473
  value = value.to_s if value.kind_of? Symbol
409
-
474
+
410
475
  return value
411
476
  end
412
-
413
- # What exceptions should our #search wrapper rescue and turn
414
- # into failed results instead of fatal errors?
415
- #
416
- # Can't rescue everything, or we eat VCR/webmock errors, and lots
417
- # of other errors we don't want to eat either, making
418
- # development really confusing. Perhaps could set this
419
- # to be something diff in production and dev?
420
- #
421
- # This default list is probably useful already, but individual
422
- # engines can override if it's convenient for their own errorau
423
- # handling.
424
- def auto_rescue_exceptions
425
- [TimeoutError, HTTPClient::TimeoutError,
426
- HTTPClient::ConfigurationError, HTTPClient::BadResponseError,
427
- MultiJson::DecodeError, Nokogiri::SyntaxError]
428
- end
429
-
430
-
431
- module ClassMethods
432
-
433
- # Over-ride returning a hash or Confstruct with
434
- # any configuration values you want by default.
435
- # actual user-specified config values will be deep-merged
436
- # into the defaults.
437
- def default_configuration
438
- end
439
-
440
- # Over-ride returning an array of symbols for required
441
- # configuration keys.
442
- def required_configuration
443
- end
444
-
445
- end
446
-
447
477
  end
448
478
  end