bento_search 1.5.0 → 2.0.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +27 -24
  3. data/Rakefile +30 -11
  4. data/app/assets/javascripts/bento_search/ajax_load.js +54 -22
  5. data/app/controllers/bento_search/search_controller.rb +31 -30
  6. data/app/helpers/bento_search_helper.rb +72 -74
  7. data/app/models/bento_search/concurrent_searcher.rb +136 -0
  8. data/app/models/bento_search/result_item.rb +15 -12
  9. data/app/models/bento_search/results/serialization.rb +22 -13
  10. data/app/models/bento_search/search_engine.rb +170 -140
  11. data/app/search_engines/bento_search/doaj_articles_engine.rb +20 -20
  12. data/app/search_engines/bento_search/ebsco_host_engine.rb +3 -3
  13. data/app/search_engines/bento_search/eds_engine.rb +326 -206
  14. data/app/search_engines/bento_search/google_books_engine.rb +2 -2
  15. data/app/search_engines/bento_search/scopus_engine.rb +87 -87
  16. data/app/search_engines/bento_search/summon_engine.rb +1 -1
  17. data/app/views/bento_search/_ajax_loading.html.erb +17 -0
  18. data/app/views/bento_search/_item_title.html.erb +2 -4
  19. data/app/views/bento_search/_link.html.erb +3 -3
  20. data/lib/bento_search.rb +24 -9
  21. data/lib/bento_search/engine.rb +2 -0
  22. data/lib/bento_search/version.rb +1 -1
  23. data/lib/generators/bento_search/install/ajax_load_js_generator.rb +15 -0
  24. data/test/decorator/standard_decorator_test.rb +30 -30
  25. data/test/dummy/app/assets/config/manifest.js +4 -0
  26. data/test/dummy/config/application.rb +7 -0
  27. data/test/dummy/config/boot.rb +4 -9
  28. data/test/dummy/config/environments/development.rb +2 -0
  29. data/test/dummy/config/environments/production.rb +7 -1
  30. data/test/dummy/config/environments/test.rb +10 -3
  31. data/test/functional/bento_search/search_controller_test.rb +68 -58
  32. data/test/helper/bento_search_helper_test.rb +103 -103
  33. data/test/search_engines/doaj_articles_engine_test.rb +9 -9
  34. data/test/search_engines/eds_engine_test.rb +91 -59
  35. data/test/search_engines/google_site_search_test.rb +48 -48
  36. data/test/search_engines/scopus_engine_test.rb +51 -51
  37. data/test/search_engines/search_engine_base_test.rb +108 -86
  38. data/test/search_engines/search_engine_test.rb +68 -56
  39. data/test/support/atom.xsd.xml +3 -3
  40. data/test/support/xml.xsd +117 -0
  41. data/test/test_helper.rb +23 -12
  42. data/test/unit/concurrent_searcher_test.rb +75 -0
  43. data/test/unit/pagination_test.rb +12 -12
  44. data/test/vcr_cassettes/eds/FullText_CustomLink.yml +198 -0
  45. data/test/vcr_cassettes/eds/basic_search_smoke_test.yml +1036 -1729
  46. data/test/vcr_cassettes/eds/catalog_ebook_query.yml +218 -0
  47. data/test/vcr_cassettes/eds/catalog_query.yml +255 -0
  48. data/test/vcr_cassettes/eds/get_auth_token.yml +11 -44
  49. data/test/vcr_cassettes/eds/get_auth_token_failure.yml +10 -7
  50. data/test/vcr_cassettes/eds/get_with_auth.yml +144 -153
  51. data/test/vcr_cassettes/eds/get_with_auth_recovers_from_bad_auth.yml +167 -223
  52. data/test/view/atom_results_test.rb +94 -94
  53. metadata +36 -46
  54. data/app/assets/javascripts/bento_search.js +0 -3
  55. data/app/item_decorators/bento_search/ebscohost/conditional_openurl_main_link.rb +0 -36
  56. data/app/item_decorators/bento_search/only_premade_openurl.rb +0 -20
  57. data/app/item_decorators/bento_search/openurl_add_other_link.rb +0 -39
  58. data/app/item_decorators/bento_search/openurl_main_link.rb +0 -34
  59. data/app/models/bento_search/multi_searcher.rb +0 -131
  60. data/test/dummy/config/initializers/secret_token.rb +0 -8
  61. data/test/unit/multi_searcher_test.rb +0 -49
@@ -0,0 +1,136 @@
1
+ begin
2
+ require 'concurrent'
3
+
4
+ # Concurrently runs multiple searches in separate threads. Since a search
5
+ # generally spends most of it's time waiting on foreign API, this is
6
+ # useful to significantly reduce total latency of running multiple searches,
7
+ # even in MRI.
8
+ #
9
+ # Uses [concurrent-ruby](https://github.com/ruby-concurrency/concurrent-ruby),
10
+ # already a dependency of Rails 5.x. To use with Rails previous to 5.x,
11
+ # just add concurrent-ruby to your `Gemfile`:
12
+ #
13
+ # gem 'concurrent-ruby', '~> 1.0'
14
+ #
15
+ # # Usage
16
+ #
17
+ # initialize with id's of registered engines:
18
+ #
19
+ # searcher = BentoBox::ConcurrentSearcher.new(:gbs, :scopus)
20
+ #
21
+ # start the concurrent searches, params same as engine.search
22
+ #
23
+ # searcher.search( query_params )
24
+ #
25
+ # retrieve results, blocking until all are completed:
26
+ #
27
+ # results = searcher.results
28
+ #
29
+ # returns a Hash keyed by engine id, values BentoSearch::Results objects.
30
+ #
31
+ # results # => { "gbs" => <BentoSearch::Results ...>, "scopus" => <BentoSearch::Results ...>}
32
+ #
33
+ # Calling results more than once will just return the initial results again
34
+ # (cached), it won't run a search again.
35
+ #
36
+ # ## Dev-mode autoloading and concurrency
37
+ #
38
+ # In Rails previous to Rails5, you may have to set config.cache_classes=true
39
+ # even in development to avoid problems. In Rails 5.x, we take advantage of
40
+ # new api that should allow concurrency-safe autoloading. But if you run into
41
+ # any weird problems (such as a deadlock), `cache_classes = true` and
42
+ # `eager_load = true` should eliminate them, at the cost of dev-mode
43
+ # auto-reloading.
44
+ #
45
+ #
46
+ # TODO: have a method that returns Futures instead of only supplying the blocking
47
+ # results method? Several tricks, including making sure to properly terminate actors.
48
+ class BentoSearch::ConcurrentSearcher
49
+ def initialize(*engine_ids)
50
+ auto_rescued_exceptions = [StandardError]
51
+
52
+ @engines = []
53
+ engine_ids.each do |id|
54
+ add_engine( BentoSearch.get_engine(id).tap { |e| e.auto_rescued_exceptions = auto_rescued_exceptions + e.auto_rescued_exceptions })
55
+ end
56
+ @extra_auto_rescue_exceptions = [StandardError]
57
+ end
58
+
59
+ # Adds an instantiated engine directly, rather than by id from global
60
+ # registry.
61
+ def add_engine(engine)
62
+ unless engine.configuration.id.present?
63
+ raise ArgumentError.new("ConcurrentSearcher engines need `configuration.id`, this one didn't have one: #{engine}")
64
+ end
65
+ @engines << engine
66
+ end
67
+
68
+ # Starts all searches, returns self so you can chain method calls if you like.
69
+ def search(*search_args)
70
+ search_args.freeze
71
+ @futures = @engines.collect do |engine|
72
+ Concurrent::Future.execute { rails_future_wrap { engine.search(*search_args) } }
73
+ end
74
+ return self
75
+ end
76
+
77
+ # Have you called #search yet? You can only call #results if you have.
78
+ # Will stay true forever, it doesn't tell you if the search is done or not.
79
+ def search_started?
80
+ !! @futures
81
+ end
82
+
83
+ # Call after #search. Blocks until each included engine is finished
84
+ # then returns a Hash keyed by engine registered id, value is a
85
+ # BentoSearch::Results object.
86
+ #
87
+ # If called multiple times, returns the same results each time, does
88
+ # not re-run searches.
89
+ #
90
+ # It is an error to invoke without having previously called #search
91
+ def results
92
+ unless search_started?
93
+ raise ArgumentError, "Can't call ConcurrentSearcher#results before you have executed a #search"
94
+ end
95
+
96
+ @results ||= begin
97
+ pairs = rails_wait_wrap do
98
+ @futures.collect { |future| [future.value!.engine_id, future.value!] }
99
+ end
100
+ Hash[ pairs ].freeze
101
+ end
102
+ end
103
+
104
+ protected
105
+
106
+ # In Rails5, future body's need to be wrapped in an executor,
107
+ # to handle auto-loading right in dev-mode, among other things.
108
+ # Rails docs coming, see https://github.com/rails/rails/issues/26847
109
+ @@rails_has_executor = Rails.application.respond_to?(:executor)
110
+ def rails_future_wrap
111
+ if @@rails_has_executor
112
+ Rails.application.executor.wrap { yield }
113
+ else
114
+ yield
115
+ end
116
+ end
117
+
118
+ # In Rails5, if we are collecting from within an action method
119
+ # (ie the 'request loop'), as we usually will be, we need to
120
+ # give up the autoload lock. Rails docs coming, see https://github.com/rails/rails/issues/26847
121
+ @@rails_needs_interlock_permit = ActiveSupport::Dependencies.respond_to?(:interlock) &&
122
+ !(Rails.application.config.eager_load && Rails.application.config.cache_classes)
123
+ def rails_wait_wrap
124
+ if @@rails_needs_interlock_permit
125
+ ActiveSupport::Dependencies.interlock.permit_concurrent_loads { yield }
126
+ else
127
+ yield
128
+ end
129
+ end
130
+
131
+ end
132
+ rescue LoadError
133
+ # you can use bento_search without celluloid, just not
134
+ # this class.
135
+ $stderr.puts "Tried but could not load BentoSearch::ConcurrentSearcher, concurrent-ruby not available!"
136
+ end
@@ -36,12 +36,12 @@ module BentoSearch
36
36
  # search service it came from. May be alphanumeric. May be nil
37
37
  # for engines that don't support it.
38
38
  serializable_attr_accessor :unique_id
39
-
39
+
40
40
 
41
41
  # If set to true, item will refuse to generate an openurl,
42
42
  # returning nil from #to_openurl or #openurl_kev
43
43
  serializable_attr_accessor :openurl_disabled
44
-
44
+
45
45
 
46
46
  # Array (possibly empty) of BentoSearch::Link objects
47
47
  # representing additional links. Often SearchEngine's themselves
@@ -52,7 +52,7 @@ module BentoSearch
52
52
 
53
53
  # * dc.title
54
54
  # * schema.org CreativeWork: 'name'
55
- serializable_attr_accessor :title
55
+ serializable_attr_accessor :title
56
56
  # backwards compat, we used to have separate titles and subtitles
57
57
  alias_method :complete_title, :title
58
58
 
@@ -112,7 +112,7 @@ module BentoSearch
112
112
  #
113
113
  # Note: We're re-thinking this, might allow uncontrolled
114
114
  # in here instead.
115
- serializable_attr_accessor :format
115
+ serializable_attr_accessor :format
116
116
 
117
117
  # Translated from internal format vocab at #format. Outputs
118
118
  # eg http://schema.org/Book
@@ -137,7 +137,7 @@ module BentoSearch
137
137
  # uncontrolled presumably english-language format string.
138
138
  # if supplied will be used in display in place of controlled
139
139
  # format.
140
- serializable_attr_accessor :format_str
140
+ serializable_attr_accessor :format_str
141
141
 
142
142
  # Language of materials. Producer can set language_code to an ISO 639-1 (two
143
143
  # letter) or 639-3 (three letter) language code. If you do this, you don't
@@ -153,11 +153,11 @@ module BentoSearch
153
153
  # #language_iso_639_2 (either may be null), or #language_str for uncontrolled
154
154
  # string. If engine just sets one of these, internals take care of filling
155
155
  # out the others. r
156
- serializable_attr_accessor :language_code
156
+ serializable_attr_accessor :language_code
157
157
  attr_writer :language_str
158
158
  def language_str
159
- @language_str || language_code.try do |code|
160
- LanguageList::LanguageInfo.find(code).try do |lang_obj|
159
+ (@language_str ||= nil) || language_code.try do |code|
160
+ LanguageList::LanguageInfo.find(code.dup).try do |lang_obj|
161
161
  lang_obj.name
162
162
  end
163
163
  end
@@ -167,7 +167,10 @@ module BentoSearch
167
167
  # if available, otherwise from direct language_str if available and
168
168
  # possible.
169
169
  def language_obj
170
- @language_obj ||= LanguageList::LanguageInfo.find( self.language_code || self.language_str )
170
+ @language_obj ||= begin
171
+ lookup = self.language_code || self.language_str
172
+ LanguageList::LanguageInfo.find( lookup.dup ) if lookup
173
+ end
171
174
  end
172
175
 
173
176
  # Two letter ISO language code, or nil
@@ -230,12 +233,12 @@ module BentoSearch
230
233
 
231
234
  # An ARRAY of string query-in-context snippets. Will usually
232
235
  # have highlighting <b> tags in it. Creator is responsible
233
- # for making sure it's otherwise html-safe.
236
+ # for making sure it's otherwise html-safe.
234
237
  #
235
238
  # Not all engines may stores Snippets array in addition to abstract,
236
239
  # some may only store one or the other. Some may store both but
237
240
  # with same content formatted differently (array of multiple vs
238
- # one combined string), some engines they may be different.
241
+ # one combined string), some engines they may be different.
239
242
  attr_accessor :snippets
240
243
  serializable_attr :snippets
241
244
 
@@ -263,7 +266,7 @@ module BentoSearch
263
266
  # for it? Nice thing about the configuration has instead is it's
264
267
  # easily serializable, it's just data.
265
268
  #
266
- # Although we intentionally do NOT include these in JSON serialization, ha.
269
+ # Although we intentionally do NOT include these in JSON serialization, ha.
267
270
  attr_accessor :display_configuration
268
271
  attr_accessor :engine_id
269
272
 
@@ -4,21 +4,21 @@ require 'json'
4
4
  require 'date'
5
5
 
6
6
  # Call #dump_to_json on a BentoSearch value object (such as BentoSearch::Result or ::Author)
7
- # to get it in Json
7
+ # to get it in Json
8
8
  #
9
9
  # Values marked with serializable_attr in BentoSearch::Result are
10
- # included in seralization.
10
+ # included in seralization.
11
11
  #
12
12
  # At present metadata and configuration are NOT serialized: #decorator, #display_configuration,
13
13
  # and #engine_id are not included in the serialization, so when loaded from serialization,
14
- # ResultItems will not have such things set.
15
- #
14
+ # ResultItems will not have such things set.
15
+ #
16
16
  # * Works by getting and setting instance variables directly, ignores getters/setters
17
17
  #
18
18
  # * This means decorated values are NOT included in serialization, the raw
19
19
  # values are what is serialized. This is intended, we serialize internal
20
20
  # state, not decoration which can be recreated. You should make sure the decorators you
21
- # want are applied after de-serialization.
21
+ # want are applied after de-serialization.
22
22
  #
23
23
  # * preserves html_safety status in serialization, by adding extra `_attr_htmlsafe: true` key/value
24
24
  #
@@ -31,22 +31,23 @@ module BentoSearch::Results::Serialization
31
31
  self._serializable_attr_options = {}
32
32
  end
33
33
 
34
+
34
35
  class_methods do
35
36
  # Just a macro to mark a property name serializable -- the name is
36
37
  # of an instance method that will be included in our serializations
37
- # and de-serializations.
38
+ # and de-serializations.
38
39
  #
39
40
  # Options:
40
41
  # * collection_of: String fully qualified name of a class that is
41
42
  # is also BentoSearch::Results::Serialization, the attribute
42
- # is an array of these.
43
+ # is an array of these.
43
44
  # * serializer: String fully qualified class name of a serializer
44
45
  # class that has a `dump` and a `load` for individual values,
45
46
  # we just use it for Date now, see BentoSearch::Results::Serialization::Date
46
47
  def serializable_attr(symbol, options = nil)
47
48
  symbol = symbol.to_s
48
49
  self._serializable_attrs << symbol
49
- if options
50
+ if options
50
51
  self._serializable_attr_options[symbol] = options
51
52
  end
52
53
  end
@@ -67,14 +68,14 @@ module BentoSearch::Results::Serialization
67
68
 
68
69
 
69
70
  if _serializable_attr_options[key] && _serializable_attr_options[key][:collection_of]
70
- klass = qualified_const_get(_serializable_attr_options[key][:collection_of])
71
+ klass = correct_const_get(_serializable_attr_options[key][:collection_of])
71
72
  value = value.collect do |item|
72
73
  klass.from_internal_state_hash(item)
73
74
  end
74
75
  end
75
76
 
76
77
  if _serializable_attr_options[key] && _serializable_attr_options[key][:serializer]
77
- klass = qualified_const_get(_serializable_attr_options[key][:serializer])
78
+ klass = correct_const_get(_serializable_attr_options[key][:serializer])
78
79
  value = klass.load(value)
79
80
  end
80
81
 
@@ -92,18 +93,26 @@ module BentoSearch::Results::Serialization
92
93
  self.from_internal_state_hash( JSON.parse! json_str )
93
94
  end
94
95
 
96
+ def correct_const_get(str)
97
+ if Gem::Version.new(Rails.version) > Gem::Version.new('4.2.99')
98
+ const_get(str)
99
+ else
100
+ qualified_const_get(str)
101
+ end
102
+ end
103
+
95
104
  end
96
105
 
97
106
  def internal_state_hash
98
107
  hash = {}
99
108
  self._serializable_attrs.each do |accessor|
100
109
  accessor = accessor.to_s
101
- value = self.instance_variable_get("@#{accessor}")
110
+ value = self.instance_variable_defined?("@#{accessor}") && self.instance_variable_get("@#{accessor}")
102
111
 
103
112
  next if value.blank?
104
113
 
105
114
  if _serializable_attr_options[accessor] && _serializable_attr_options[accessor][:serializer]
106
- klass = self.class.qualified_const_get(_serializable_attr_options[accessor][:serializer])
115
+ klass = self.class.correct_const_get(_serializable_attr_options[accessor][:serializer])
107
116
  value = klass.dump(value)
108
117
  elsif value.respond_to?(:to_ary)
109
118
  value = value.to_ary.collect do |item|
@@ -133,4 +142,4 @@ module BentoSearch::Results::Serialization
133
142
  end
134
143
  end
135
144
 
136
- end
145
+ end
@@ -9,18 +9,17 @@ require 'nokogiri'
9
9
 
10
10
  module BentoSearch
11
11
  # Usually raised by #get on an engine, when result for specified identifier
12
- # can't be found.
12
+ # can't be found.
13
13
  class ::BentoSearch::NotFound < ::BentoSearch::Error ; end
14
- # Usually raised by #get when identifier results in more than one record.
14
+ # Usually raised by #get when identifier results in more than one record.
15
15
  class ::BentoSearch::TooManyFound < ::BentoSearch::Error ; end
16
16
  # Raised for problem contacting or unexpected response from
17
- # remote service. Not yet universally used.
17
+ # remote service. Not yet universally used.
18
18
  class ::BentoSearch::FetchError < ::BentoSearch::Error ; end
19
19
 
20
-
21
- # Module mix-in for bento_search search engines.
20
+ # Module mix-in for bento_search search engines.
22
21
  #
23
- # ==Using a SearchEngine
22
+ # ==Using a SearchEngine
24
23
  #
25
24
  # See a whole bunch more examples in the project README.
26
25
  #
@@ -43,18 +42,18 @@ module BentoSearch
43
42
  # of BentoSearch::Results
44
43
  #
45
44
  # results = engine.search("query")
46
- #
47
- # See more docs under #search, as well as project README.
48
45
  #
49
- # == Standard configuration variables.
50
- #
46
+ # See more docs under #search, as well as project README.
47
+ #
48
+ # == Standard configuration variables.
49
+ #
51
50
  # Some engines require their own engine-specific configuration for api keys
52
51
  # and such, and offer their own engine-specific configuration for engine-specific
53
- # features.
52
+ # features.
54
53
  #
55
54
  # An additional semi-standard configuration variable, some engines take
56
55
  # an `:auth => true` to tell the engine to assume that all access is by
57
- # authenticated local users who should be given elevated access to results.
56
+ # authenticated local users who should be given elevated access to results.
58
57
  #
59
58
  # Additional standard configuration keys that are implemented by the bento_search
60
59
  # framework:
@@ -63,7 +62,12 @@ module BentoSearch
63
62
  # String name of decorator class that will be applied by #bento_decorate
64
63
  # helper in standard view. See wiki for more info on decorators. Must be
65
64
  # string name, actual class object not supported (to make it easier
66
- # to serialize and transport configuration).
65
+ # to serialize and transport configuration).
66
+ #
67
+ # [log_failed_results]
68
+ # Default false, if true all failed results are logged to
69
+ # `Rails.log.error`. Can set global default with
70
+ # `BentoSearch.defaults.log_failed_results = true`
67
71
  #
68
72
  # == Implementing a SearchEngine
69
73
  #
@@ -71,7 +75,7 @@ module BentoSearch
71
75
  # generally only responsible for the parts specific to your search engine:
72
76
  # receiving a query, making a call to the external search engine, and
73
77
  # translating it's result to standard a BentoSearch::Results full of
74
- # BentoSearch::ResultItems.
78
+ # BentoSearch::ResultItems.
75
79
  #
76
80
  # Start out by simply including the search engine module:
77
81
  #
@@ -85,64 +89,102 @@ module BentoSearch
85
89
  # BentoSearch::Results item.
86
90
  #
87
91
  # The Results object should have #total_items set with total hitcount, and
88
- # contain BentoSearch::ResultItem objects for each hit in the current page.
89
- # See individual class documentation for more info.
92
+ # contain BentoSearch::ResultItem objects for each hit in the current page.
93
+ # See individual class documentation for more info.
90
94
  #
91
95
  # That's about the extent of your responsibilities. If the search failed
92
96
  # for some reason due to an error, you should return a Results object
93
97
  # with it's #error object set, so it will be `failed?`. The framework
94
98
  # will take care of this for you for certain uncaught exceptions you allow
95
99
  # to rise out of #search_implementation (timeouts, HTTPClient timeouts,
96
- # nokogiri and MultiJson parse errors).
100
+ # nokogiri and MultiJson parse errors).
97
101
  #
98
102
  # A SearchEngine object can be re-used for multiple searches, possibly
99
103
  # under concurrent multi-threading. Do not store search-specific state
100
104
  # in the search object. but you can store configuration-specific state there
101
- # of course.
102
- #
105
+ # of course.
106
+ #
103
107
  # Recommend use of HTTPClient, if possible, for http searches. Especially
104
108
  # using a class-level HTTPClient instance, to re-use persistent http
105
109
  # connections accross searches (can be esp important if you need to contact
106
110
  # external search api via https/ssl).
107
111
  #
108
- # If you have required configuration keys, you can register that with
109
- # class-level required_configuration_keys method.
112
+ # If you have required configuration keys, you can register that with
113
+ # class-level required_configuration_keys method.
110
114
  #
111
- # You can also advertise max per-page value by overriding max_per_page.
115
+ # You can also advertise max per-page value by overriding max_per_page.
112
116
  #
113
- # If you support fielded searching, you should over-ride
117
+ # If you support fielded searching, you should over-ride
114
118
  # #search_field_definitions; if you support sorting, you should
115
119
  # override #sort_definitions. See BentoSearch::SearchEngine::Capabilities
116
- # module for documentation.
117
- #
120
+ # module for documentation.
121
+ #
118
122
  #
119
123
  module SearchEngine
120
124
  DefaultPerPage = 10
121
-
122
125
 
123
-
124
-
125
126
  extend ActiveSupport::Concern
126
-
127
+
127
128
  include Capabilities
128
-
129
+
130
+ mattr_accessor :default_auto_rescued_exceptions
131
+ self.default_auto_rescued_exceptions = [
132
+ BentoSearch::RubyTimeoutClass,
133
+ HTTPClient::TimeoutError,
134
+ HTTPClient::ConfigurationError,
135
+ HTTPClient::BadResponseError,
136
+ MultiJson::DecodeError,
137
+ Nokogiri::SyntaxError,
138
+ SocketError
139
+ ].freeze
140
+
129
141
  included do
130
- attr_accessor :configuration
142
+ attr_accessor :configuration
143
+
144
+ # What exceptions should our #search wrapper rescue and turn
145
+ # into failed results instead of fatal errors?
146
+ #
147
+ # Can't rescue everything, or we eat VCR/webmock errors, and lots
148
+ # of other errors we don't want to eat either, making
149
+ # development really confusing. Perhaps could set this
150
+ # to be something diff in production and dev?
151
+ #
152
+ # This default list is probably useful already, but individual
153
+ # engines can override if it's convenient for their own error
154
+ # handling.
155
+ #
156
+ # Override by just using `auto_rescued_exceptions=` on class _or_ method,
157
+ # although some legacy code may override `def auto_rescue_exceptions` (note
158
+ # old `rescue` vs new `rescued`) which should work too.
159
+ self.class_attribute :auto_rescued_exceptions
160
+ self.auto_rescued_exceptions = ::BentoSearch::SearchEngine.default_auto_rescued_exceptions
161
+
162
+ # Over-ride returning a hash or Confstruct with
163
+ # any configuration values you want by default.
164
+ # actual user-specified config values will be deep-merged
165
+ # into the defaults.
166
+ def self.default_configuration
167
+ end
168
+
169
+ # Over-ride returning an array of symbols for required
170
+ # configuration keys.
171
+ def self.required_configuration
172
+ end
131
173
  end
132
-
174
+
133
175
  # If specific SearchEngine calls initialize, you want to call super
134
176
  # handles configuration loading, mostly. Argument is a
135
- # Confstruct::Configuration or Hash.
177
+ # Confstruct::Configuration or Hash.
136
178
  def initialize(aConfiguration = Confstruct::Configuration.new)
137
179
  # To work around weird confstruct bug, we need to change
138
- # a hash to a Confstruct ourselves.
180
+ # a hash to a Confstruct ourselves.
139
181
  # https://github.com/mbklein/confstruct/issues/14
140
182
  unless aConfiguration.kind_of? Confstruct::Configuration
141
183
  aConfiguration = Confstruct::Configuration.new aConfiguration
142
184
  end
143
-
144
-
145
- # init, from copy of default, or new
185
+
186
+
187
+ # init, from copy of default, or new
146
188
  if self.class.default_configuration
147
189
  self.configuration = Confstruct::Configuration.new(self.class.default_configuration)
148
190
  else
@@ -150,187 +192,193 @@ module BentoSearch
150
192
  end
151
193
  # merge in current instance config
152
194
  self.configuration.configure ( aConfiguration )
153
-
154
- # global defaults?
195
+
196
+ # global defaults?
155
197
  self.configuration[:for_display] ||= {}
156
-
198
+ unless self.configuration.has_key?(:log_failed_results)
199
+ self.configuration[:log_failed_results] = BentoSearch.defaults.log_failed_results
200
+ end
201
+
157
202
  # check for required keys -- have to be present, and not nil
158
203
  if self.class.required_configuration
159
- self.class.required_configuration.each do |required_key|
204
+ self.class.required_configuration.each do |required_key|
160
205
  if ["**NOT_FOUND**", nil].include? self.configuration.lookup!(required_key.to_s, "**NOT_FOUND**")
161
206
  raise ArgumentError.new("#{self.class.name} requires configuration key #{required_key}")
162
207
  end
163
208
  end
164
209
  end
165
-
210
+
166
211
  end
167
-
168
-
169
- # Method used to actually get results from a search engine.
212
+
213
+
214
+ # Method used to actually get results from a search engine.
170
215
  #
171
216
  # When implementing a search engine, you do not override this #search
172
217
  # method, but instead override #search_implementation. #search will
173
218
  # call your specific #search_implementation, first normalizing the query
174
- # arguments, and then normalizing and adding standard metadata to your return value.
219
+ # arguments, and then normalizing and adding standard metadata to your return value.
175
220
  #
176
221
  # Most engines support pagination, sorting, and searching in a specific
177
- # field.
222
+ # field.
178
223
  #
179
224
  # # 1-based page index
180
225
  # engine.search("query", :per_page => 20, :page => 5)
181
226
  # # or use 0-based per-record index, engines that don't
182
- # # support this will round to nearest page.
227
+ # # support this will round to nearest page.
183
228
  # engine.search("query", :start => 20)
184
229
  #
185
230
  # You can ask an engine what search fields it supports with engine.search_keys
186
231
  # engine.search("query", :search_field => "engine_search_field_name")
187
232
  #
188
233
  # There are also normalized 'semantic' names you can use accross engines
189
- # (if they support them): :title, :author, :subject, maybe more.
234
+ # (if they support them): :title, :author, :subject, maybe more.
190
235
  #
191
236
  # engine.search("query", :semantic_search_field => :title)
192
237
  #
193
238
  # Ask an engine what semantic field names it supports with `engine.semantic_search_keys`
194
239
  #
195
- # Unrecognized search fields will be ignored, unless you pass in
196
- # :unrecognized_search_field => :raise (or do same in config).
240
+ # Unrecognized search fields will be ignored, unless you pass in
241
+ # :unrecognized_search_field => :raise (or do same in config).
197
242
  #
198
243
  # Ask an engine what sort fields it supports with `engine.sort_keys`. See
199
244
  # list of standard sort keys in I18n file at ./config/locales/en.yml, in
200
- # `en.bento_search.sort_keys`.
245
+ # `en.bento_search.sort_keys`.
201
246
  #
202
247
  # engine.search("query", :sort => "some_sort_key")
203
248
  #
204
249
  # Some engines support additional arguments to 'search', see individual
205
250
  # engine documentation. For instance, some engines support `:auth => true`
206
251
  # to give the user elevated search privileges when you have an authenticated
207
- # local user.
252
+ # local user.
208
253
  #
209
254
  # Query as first arg is just a convenience, you can also use a single hash
210
- # argument.
255
+ # argument.
211
256
  #
212
257
  # engine.search(:query => "query", :per_page => 20, :page => 4)
213
258
  #
214
259
  def search(*arguments)
215
260
  start_t = Time.now
216
-
261
+
217
262
  arguments = normalized_search_arguments(*arguments)
218
263
 
219
264
  results = search_implementation(arguments)
220
-
265
+
221
266
  fill_in_search_metadata_for(results, arguments)
222
-
267
+
223
268
  results.timing = (Time.now - start_t)
224
-
269
+
225
270
  return results
226
271
  rescue *auto_rescue_exceptions => e
227
272
  # Uncaught exception, log and turn into failed Results object. We
228
273
  # only catch certain types of exceptions, or it makes dev really
229
274
  # confusing eating exceptions. This is intentionally a convenience
230
275
  # to allow search engine implementations to just raise the exception
231
- # and we'll turn it into a proper error.
276
+ # and we'll turn it into a proper error.
232
277
  cleaned_backtrace = Rails.backtrace_cleaner.clean(e.backtrace)
233
278
  log_msg = "BentoSearch::SearchEngine failed results: #{e.inspect}\n #{cleaned_backtrace.join("\n ")}"
234
279
  Rails.logger.error log_msg
235
-
280
+
236
281
  failed = BentoSearch::Results.new
237
282
  failed.error ||= {}
238
283
  failed.error[:exception] = e
239
-
284
+
240
285
  failed.timing = (Time.now - start_t)
241
-
286
+
242
287
  fill_in_search_metadata_for(failed, arguments)
243
288
 
244
-
245
289
  return failed
290
+ ensure
291
+ if results && configuration.log_failed_results && results.failed?
292
+ Rails.logger.error("Error fetching results for `#{configuration.id || self}`: #{arguments}: #{results.error}")
293
+ end
246
294
  end
247
-
295
+
248
296
  # SOME of the elements of Results to be returned that SearchEngine implementation
249
297
  # fills in automatically post-search. Extracted into a method for DRY in
250
298
  # error handling to try to fill these in even in errors. Also can be used
251
- # as public method for de-serialized or mock results.
299
+ # as public method for de-serialized or mock results.
252
300
  def fill_in_search_metadata_for(results, normalized_arguments = {})
253
301
  results.search_args = normalized_arguments
254
302
  results.start = normalized_arguments[:start] || 0
255
303
  results.per_page = normalized_arguments[:per_page]
256
-
304
+
257
305
  results.engine_id = configuration.id
258
306
  results.display_configuration = configuration.for_display
259
307
 
260
308
  # We copy some configuraton info over to each Item, as a convenience
261
309
  # to display logic that may have decide what to do given only an item,
262
310
  # and may want to parameterize based on configuration.
263
- results.each do |item|
264
- item.engine_id = configuration.id
311
+ results.each do |item|
312
+ item.engine_id = configuration.id
265
313
  item.decorator = configuration.lookup!("for_display.decorator")
266
314
  item.display_configuration = configuration.for_display
267
315
  end
268
316
 
269
317
  results
270
318
  end
271
-
319
+
272
320
 
273
321
  # Take the arguments passed into #search, which can be flexibly given
274
322
  # in several ways, and normalize to an expected single hash that
275
323
  # will be passed to an engine's #search_implementation. The output
276
324
  # of this method is a single hash, and is what a #search_implementation
277
- # can expect to receive as an argument, with keys:
325
+ # can expect to receive as an argument, with keys:
278
326
  #
279
327
  # [:query] the query
280
328
  # [:per_page] will _always_ be present, using the default per_page if
281
329
  # none given by caller
282
330
  # [:start, :page] both :start and :page will _always_ be present, regardless
283
331
  # of which the caller used. They will both be integers, even if strings passed in.
284
- # [:search_field] A search field from the engine's #search_field_definitions, as string.
332
+ # [:search_field] A search field from the engine's #search_field_definitions, as string.
285
333
  # Even if the caller used :semantic_search_field, it'll be normalized
286
- # to the actual local search_field key on output.
287
- # [:sort] Sort key.
334
+ # to the actual local search_field key on output.
335
+ # [:sort] Sort key.
288
336
  #
289
337
  def normalized_search_arguments(*orig_arguments)
290
338
  arguments = {}
291
-
339
+
292
340
  # Two-arg style to one hash, if present
293
341
  if (orig_arguments.length > 1 ||
294
342
  (orig_arguments.length == 1 && ! orig_arguments.first.kind_of?(Hash)))
295
- arguments[:query] = orig_arguments.delete_at(0)
343
+ arguments[:query] = orig_arguments.delete_at(0)
296
344
  end
297
345
 
298
346
  arguments.merge!(orig_arguments.first) if orig_arguments.length > 0
299
-
300
-
347
+
348
+
301
349
  # allow strings for pagination (like from url query), change to
302
- # int please.
350
+ # int please.
303
351
  [:page, :per_page, :start].each do |key|
304
352
  arguments.delete(key) if arguments[key].blank?
305
353
  arguments[key] = arguments[key].to_i if arguments[key]
306
- end
307
- arguments[:per_page] ||= DefaultPerPage
308
-
309
- # illegal arguments
354
+ end
355
+ arguments[:per_page] ||= configuration.default_per_page || DefaultPerPage
356
+
357
+ # illegal arguments
310
358
  if (arguments[:start] && arguments[:page])
311
359
  raise ArgumentError.new("Can't supply both :page and :start")
312
360
  end
313
- if ( arguments[:per_page] &&
314
- self.max_per_page &&
361
+ if ( arguments[:per_page] &&
362
+ self.max_per_page &&
315
363
  arguments[:per_page] > self.max_per_page)
316
364
  raise ArgumentError.new("#{arguments[:per_page]} is more than maximum :per_page of #{self.max_per_page} for #{self.class}")
317
365
  end
318
-
319
-
366
+
367
+
320
368
  # Normalize :page to :start, and vice versa
321
369
  if arguments[:page]
322
370
  arguments[:start] = (arguments[:page] - 1) * arguments[:per_page]
323
371
  elsif arguments[:start]
324
372
  arguments[:page] = (arguments[:start] / arguments[:per_page]) + 1
325
373
  end
326
-
374
+
327
375
  # normalize :sort from possibly symbol to string
328
376
  # TODO: raise if unrecognized sort key?
329
377
  if arguments[:sort]
330
378
  arguments[:sort] = arguments[:sort].to_s
331
379
  end
332
380
 
333
-
381
+
334
382
  # Multi-field search
335
383
  if arguments[:query].kind_of? Hash
336
384
  # Only if allowed
@@ -348,7 +396,7 @@ module BentoSearch
348
396
  # translate semantic fields, raising for unfound fields if configured
349
397
  arguments[:query].transform_keys! do |key|
350
398
  new_key = self.semantic_search_map[key.to_s] || key
351
-
399
+
352
400
  if ( config_arg(arguments, :unrecognized_search_field) == "raise" &&
353
401
  ! self.search_keys.include?(new_key))
354
402
  raise ArgumentError.new("#{self.class.name} does not know about search_field #{new_key}, in query Hash #{arguments[:query]}")
@@ -358,91 +406,73 @@ module BentoSearch
358
406
  end
359
407
 
360
408
  end
361
-
409
+
362
410
  # translate semantic_search_field to search_field, or raise if
363
- # can't.
411
+ # can't.
364
412
  if (semantic = arguments.delete(:semantic_search_field)) && ! semantic.blank?
365
413
  semantic = semantic.to_s
366
414
  # Legacy publication_title is now called source_title
367
415
  semantic = "source_title" if semantic == "publication_title"
368
416
 
369
417
  mapped = self.semantic_search_map[semantic]
370
- if config_arg(arguments, :unrecognized_search_field) == "raise" && ! mapped
418
+ if config_arg(arguments, :unrecognized_search_field) == "raise" && ! mapped
371
419
  raise ArgumentError.new("#{self.class.name} does not know about :semantic_search_field #{semantic}")
372
420
  end
373
421
  arguments[:search_field] = mapped
374
- end
422
+ end
375
423
  if config_arg(arguments, :unrecognized_search_field) == "raise" && ! search_keys.include?(arguments[:search_field])
376
424
  raise ArgumentError.new("#{self.class.name} does not know about :search_field #{arguments[:search_field]}")
377
425
  end
378
-
379
-
426
+
427
+
380
428
  return arguments
381
429
  end
382
430
  alias_method :parse_search_arguments, :normalized_search_arguments
383
-
384
-
385
- # Used mainly/only by the AJAX results loading.
431
+
432
+
433
+ # Used mainly/only by the AJAX results loading.
386
434
  # an array WHITELIST of attributes that can be sent as non-verified
387
435
  # request params and used to execute a search. For instance, 'auth' is
388
- # NOT on there, you can't trust a web request as to 'auth' status.
436
+ # NOT on there, you can't trust a web request as to 'auth' status.
389
437
  # individual engines may over-ride, call super, and add additional
390
- # engine-specific attributes.
438
+ # engine-specific attributes.
391
439
  def public_settable_search_args
392
440
  [:query, :search_field, :semantic_search_field, :sort, :page, :start, :per_page]
393
441
  end
394
-
395
-
442
+
443
+ # Cover method for consistent api with Results
444
+ def display_configuration
445
+ configuration.for_display
446
+ end
447
+
448
+ # Cover method for consistent api with Results
449
+ def engine_id
450
+ configuration.id
451
+ end
452
+
453
+
396
454
  protected
397
455
 
456
+ # For legacy reasons old name auto_rescue_exceptions is here, some
457
+ # sub-classes may override it. Now preferred to use auto_rescued_exceptions
458
+ # setter instead.
459
+ def auto_rescue_exceptions
460
+ self.auto_rescued_exceptions
461
+ end
462
+
398
463
  # get value of an arg that can be supplied in search args OR config,
399
464
  # with search_args over-ridding config. Also normalizes value to_s
400
- # (for symbols/strings).
465
+ # (for symbols/strings).
401
466
  def config_arg(arguments, key, default = nil)
402
467
  value = if arguments[key].present?
403
468
  arguments[key]
404
469
  else
405
470
  configuration[key]
406
471
  end
407
-
472
+
408
473
  value = value.to_s if value.kind_of? Symbol
409
-
474
+
410
475
  return value
411
476
  end
412
-
413
- # What exceptions should our #search wrapper rescue and turn
414
- # into failed results instead of fatal errors?
415
- #
416
- # Can't rescue everything, or we eat VCR/webmock errors, and lots
417
- # of other errors we don't want to eat either, making
418
- # development really confusing. Perhaps could set this
419
- # to be something diff in production and dev?
420
- #
421
- # This default list is probably useful already, but individual
422
- # engines can override if it's convenient for their own errorau
423
- # handling.
424
- def auto_rescue_exceptions
425
- [TimeoutError, HTTPClient::TimeoutError,
426
- HTTPClient::ConfigurationError, HTTPClient::BadResponseError,
427
- MultiJson::DecodeError, Nokogiri::SyntaxError]
428
- end
429
-
430
-
431
- module ClassMethods
432
-
433
- # Over-ride returning a hash or Confstruct with
434
- # any configuration values you want by default.
435
- # actual user-specified config values will be deep-merged
436
- # into the defaults.
437
- def default_configuration
438
- end
439
-
440
- # Over-ride returning an array of symbols for required
441
- # configuration keys.
442
- def required_configuration
443
- end
444
-
445
- end
446
-
447
477
  end
448
478
  end