bento_search 0.0.1 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. data/README.md +14 -8
  2. data/app/assets/stylesheets/bento_search/suggested_styles.css +22 -0
  3. data/app/models/bento_search/multi_searcher.rb +1 -1
  4. data/app/models/bento_search/result_item.rb +5 -0
  5. data/app/models/bento_search/results.rb +6 -1
  6. data/app/models/bento_search/search_engine.rb +185 -28
  7. data/app/models/bento_search/search_engine/capabilities.rb +7 -1
  8. data/app/search_engines/bento_search/ebsco_host_engine.rb +3 -1
  9. data/app/search_engines/bento_search/eds_engine.rb +43 -10
  10. data/app/search_engines/bento_search/google_books_engine.rb +2 -2
  11. data/app/search_engines/bento_search/mock_engine.rb +51 -0
  12. data/app/search_engines/bento_search/primo_engine.rb +86 -15
  13. data/app/search_engines/bento_search/summon_engine.rb +22 -22
  14. data/lib/bento_search.rb +4 -1
  15. data/lib/bento_search/util.rb +67 -0
  16. data/lib/bento_search/version.rb +1 -1
  17. data/test/dummy/log/development.log +1 -0
  18. data/test/dummy/log/test.log +653 -1951
  19. data/test/helper/bento_search_helper_test.rb +1 -0
  20. data/test/unit/eds_engine_test.rb +41 -1
  21. data/test/unit/google_books_engine_test.rb +8 -0
  22. data/test/unit/handle_highlight_tags_test.rb +71 -0
  23. data/test/unit/item_decorators_test.rb +2 -0
  24. data/test/unit/primo_engine_test.rb +25 -0
  25. data/test/unit/search_engine_base_test.rb +21 -0
  26. data/test/unit/search_engine_test.rb +32 -25
  27. data/test/unit/summon_engine_test.rb +15 -2
  28. data/test/vcr_cassettes/gbs/empty_results.yml +40 -0
  29. data/test/vcr_cassettes/primo/proper_tags_for_snippets.yml +812 -0
  30. data/test/vcr_cassettes/primo/search_smoke_test.yml +978 -799
  31. metadata +15 -10
  32. data/app/assets/stylesheets/bento_search/bento.css +0 -4
  33. data/app/search_engines/bento_search/#Untitled-1# +0 -11
  34. data/test/support/mock_engine.rb +0 -23
  35. data/test/unit/#vcr_test.rb# +0 -68
data/README.md CHANGED
@@ -2,9 +2,9 @@
2
2
 
3
3
  [![Build Status](https://secure.travis-ci.org/jrochkind/bento_search.png)](http://travis-ci.org/jrochkind/bento_search)
4
4
 
5
-
6
- (**in progress*, not yet ready for use, mainly because we need more
7
- out of the box search engines supported).
5
+ (Fairly robust and stable at this point, but still pre-1.0 release, may
6
+ be some breaking api changes before 1.0, but probably not too many, it's
7
+ looking pretty good).
8
8
 
9
9
  bento_search provides an abstraction/normalization layer for querying and
10
10
  displaying results for external search engines, in Ruby on Rails. Requires
@@ -37,8 +37,13 @@ code in your app that needs to be rewritten. As well as letting you get
37
37
  started quick without reinventing the wheel and figuring out poorly
38
38
  documented vendor API's yourself.
39
39
 
40
+ See code-level api documentation for more details, especially at
41
+ BentoSearch::SearchEngine. http://rubydoc.info/gems/bento_search/frames/
42
+
43
+ An example app using BentoSearch and showing it's features is
44
+ available at http://github.com/jrochkind/sample_megasearch
40
45
 
41
- ## Usage
46
+ ## Usage Examples
42
47
 
43
48
  ### Instantiate an engine, and search
44
49
 
@@ -245,8 +250,8 @@ be used to add them.
245
250
  conf.item_decorators = [ SomeModule, OtherModule]
246
251
  end
247
252
 
248
- See BentoSearch::Item for more information on decorators, and BentoSearch::Link
249
- on links.
253
+ See BentoSearch::Link for more info on links. (TODO: Better docs/examples
254
+ on decorators).
250
255
 
251
256
  ## Planned Features
252
257
 
@@ -289,8 +294,9 @@ To re-generate cached responses, delete the relevant files in
289
294
  variable with your own API keys to re-run tests without cached response
290
295
  like this.
291
296
 
292
- Also note `./test/support/mock_engine.rb`, a simple mock/dummy SearchEngine
293
- implementation that can be used in other tests.
297
+ Also note `BentoSearch::MockEngine`, a simple mock/dummy SearchEngine
298
+ implementation that can be used in other tests, including in client
299
+ software where convenient.
294
300
 
295
301
  Pull requests welcome. Pull requests with additional search engine implementations
296
302
  welcome. See more info on writing a BentoSearch::SearchEngine in the inline
@@ -0,0 +1,22 @@
1
+ /* some suggested styles for bento search results. You can
2
+ include this in your asset pipeline with `require bento_search/suggested_styles`,
3
+ or just use this as documentation for suggestions to implement yourself. */
4
+
5
+
6
+ /* year in bold in citations */
7
+ .bento_item_row.published_in .year {
8
+ font-weight: bold;
9
+ }
10
+
11
+ /* highlighted element in title is already in a <b> tag, but
12
+ title is likely to be bold already. italisize it too. */
13
+ .bento_item_title b.bento_search_highlight {
14
+ font-style: italic;
15
+ }
16
+
17
+ /* center the ajax_wait spinner */
18
+ .bento_search_ajax_wait {
19
+ text-align: center;
20
+ margin: 2em;
21
+ }
22
+
@@ -92,7 +92,7 @@ class BentoSearch::MultiSearcher
92
92
  begin
93
93
  @results = self.engine.search(*search_args)
94
94
  rescue Exception => e
95
- warn e
95
+ Rails.logger.error("\nBentoSearch:MultiSearcher caught exception: #{e}\n#{e.backtrace.join(" \n")}")
96
96
  # Make a fake results with caught exception.
97
97
  @results = BentoSearch::Results.new
98
98
  @results.error ||= {}
@@ -3,6 +3,11 @@ module BentoSearch
3
3
  # with common data fields. Usually held in a BentoSearch::Results object.
4
4
  #
5
5
  # ANY field can be nil, clients should be aware.
6
+ #
7
+ # Each item has a field for one main link as string url, at #link (which may be nil),
8
+ # as well as array of possibly additional links (with labels and metadata)
9
+ # under #other_links. #other_links is an array of BentoSearch::Link
10
+ # objects.
6
11
  class ResultItem
7
12
  include ERB::Util # for html_escape for our presentational stuff
8
13
  include ActionView::Helpers::OutputSafetyHelper # for safe_join
@@ -30,8 +30,13 @@ module BentoSearch
30
30
  # Possibly a ruby exception object. may be nil.
31
31
  attr_accessor :error
32
32
 
33
- # time it took to do search, in seconds.
33
+ # time it took to do search, in seconds as float
34
34
  attr_accessor :timing
35
+ # timing from #timing, but in miliseconds as int
36
+ def timing_ms
37
+ return nil if timing.nil?
38
+ (timing * 1000).to_i
39
+ end
35
40
 
36
41
  # search arguments as normalized by SearchEngine, not neccesarily
37
42
  # directly as input. A hash.
@@ -2,46 +2,117 @@ require 'active_support/concern'
2
2
  require 'active_support/core_ext/module/delegation'
3
3
  require 'confstruct'
4
4
 
5
+ # just so we can catch their exceptions:
6
+ require 'httpclient'
7
+ require 'multi_json'
8
+ require 'nokogiri'
5
9
 
6
10
  module BentoSearch
7
11
  # Module mix-in for bento_search search engines.
8
12
  #
9
13
  # ==Using a SearchEngine
10
14
  #
11
- # * init/config
12
- # * search
13
- # * pagination, with max per_page
14
- # * search fields, with semantics. ask for supported search fields.
15
+ # See a whole bunch more examples in the project README.
15
16
  #
16
- # == Standard config
17
- # * item_decorators : Array of Modules that will be decorated. See Decorators section.
17
+ # You can initialize a search engine with configuration (some engines
18
+ # have required configuration):
19
+ #
20
+ # engine = SomeSearchEngine.new(:config_key => 'foo')
21
+ #
22
+ # Or, it can be convenient (and is required for some features) to store
23
+ # a search engine with configuration in a global registry:
24
+ #
25
+ # BentoSearch.register_engine("some_searcher") do |config|
26
+ # config.engine = "SomeSearchEngine"
27
+ # config.config_key = "foo"
28
+ # end
29
+ # # instantiates a new engine with registered config:
30
+ # engine = BentoSearch.get_engine("some_searcher")
31
+ #
32
+ # You can then use the #search method, which returns an instance of
33
+ # of BentoSearch::Results
34
+ #
35
+ # results = engine.search("query")
36
+ #
37
+ # See more docs under #search, as well as project README.
38
+ #
39
+ # == Standard configuration variables.
40
+ #
41
+ # Some engines require their own engine-specific configuration for api keys
42
+ # and such, and offer their own engine-specific configuration for engine-specific
43
+ # features.
44
+ #
45
+ # An additional semi-standard configuration variable, some engines take
46
+ # an `:auth => true` to tell the engine to assume that all access is by
47
+ # authenticated local users who should be given elevated access to results.
48
+ #
49
+ # Additional standard configuration keys that are implemented by the bento_search
50
+ # framework:
51
+ #
52
+ # [item_decorators]
53
+ # Array of Modules that will be decorated on to each individual search
54
+ # BentoSearch::ResultItem. These can be used to, via configuration, change
55
+ # the links associated with items, change certain item behaviors, or massage
56
+ # item metadata. (Needs more documentation).
57
+ #
18
58
  #
19
59
  # == Implementing a SearchEngine
20
60
  #
21
- # `include BentoSearch::SearchEngine`
61
+ # Implmeneting a new SearchEngine is relatively straightforward -- you are
62
+ # generally only responsible for the parts specific to your search engine:
63
+ # receiving a query, making a call to the external search engine, and
64
+ # translating it's result to standard a BentoSearch::Results full of
65
+ # BentoSearch::ResultItems.
66
+ #
67
+ # Start out by simply including the search engine module:
68
+ #
69
+ # class MyEngine
70
+ # include BentoSearch::SearchEngine
71
+ # end
72
+ #
73
+ # Next, at a minimum, you need to implement a #search_implementation
74
+ # method, which takes a _normalized_ hash of search instructions as input
75
+ # (see documentation at #normalized_search_arguments), and returns
76
+ # BentoSearch::Results item.
22
77
  #
23
- # a SearchEngine's state should not be search-specific, but
24
- # is configuration specific. Don't store anything specific
25
- # to a specific search in iVars.
78
+ # The Results object should have #total_items set with total hitcount, and
79
+ # contain BentoSearch::ResultItem objects for each hit in the current page.
80
+ # See individual class documentation for more info.
26
81
  #
27
- # Do implement `#search(*args)`
82
+ # That's about the extent of your responsibilities. If the search failed
83
+ # for some reason due to an error, you should return a Results object
84
+ # with it's #error object set, so it will be `failed?`. The framework
85
+ # will take care of this for you for certain uncaught exceptions you allow
86
+ # to rise out of #search_implementation (timeouts, HTTPClient timeouts,
87
+ # nokogiri and MultiJson parse errors).
88
+ #
89
+ # A SearchEngine object can be re-used for multiple searches, possibly
90
+ # under concurrent multi-threading. Do not store search-specific state
91
+ # in the search object. but you can store configuration-specific state there
92
+ # of course.
28
93
  #
29
- # Do use HTTPClient, if possible, for http searches,
30
- # using a class-level HTTPClient to maintain persistent connections.
94
+ # Recommend use of HTTPClient, if possible, for http searches. Especially
95
+ # using a class-level HTTPClient instance, to re-use persistent http
96
+ # connections accross searches (can be esp important if you need to contact
97
+ # external search api via https/ssl).
98
+ #
99
+ # If you have required configuration keys, you can register that with
100
+ # class-level required_configuration_keys method.
31
101
  #
32
- # Other options:
33
- # * implement a class-level `self.required_configuration' returning
34
- # an array of config keys or dot keypaths, and it'll raise on init
35
- # if those config's weren't supplied.
36
- # * max per page
37
- # * search fields
102
+ # You can also advertise max per-page value by overriding max_per_page.
38
103
  #
39
- # Some engines support `:auth => true` for elevated access to affiliated
40
- # users.
104
+ # If you support fielded searching, you should over-ride
105
+ # #search_field_definitions; if you support sorting, you should
106
+ # override #sort_definitions. See BentoSearch::SearchEngine::Capabilities
107
+ # module for documentation.
108
+ #
41
109
  #
42
110
  module SearchEngine
43
111
  DefaultPerPage = 10
44
112
 
113
+
114
+
115
+
45
116
  extend ActiveSupport::Concern
46
117
 
47
118
  include Capabilities
@@ -66,10 +137,10 @@ module BentoSearch
66
137
  # global defaults?
67
138
  self.configuration[:item_decorators] ||= []
68
139
 
69
- # check for required keys
140
+ # check for required keys -- have to be present, and not nil
70
141
  if self.class.required_configuration
71
142
  self.class.required_configuration.each do |required_key|
72
- if self.configuration.lookup!(required_key.to_s, "**NOT_FOUND**") == "**NOT_FOUND**"
143
+ if ["**NOT_FOUND**", nil].include? self.configuration.lookup!(required_key.to_s, "**NOT_FOUND**")
73
144
  raise ArgumentError.new("#{self.class.name} requires configuration key #{required_key}")
74
145
  end
75
146
  end
@@ -77,9 +148,49 @@ module BentoSearch
77
148
 
78
149
  end
79
150
 
80
- # Calls individual engine #search_implementation.
81
- # first normalizes arguments, also adds on standard metadata
82
- # to results.
151
+
152
+ # Method used to actually get results from a search engine.
153
+ #
154
+ # When implementing a search engine, you do not override this #search
155
+ # method, but instead override #search_implementation. #search will
156
+ # call your specific #search_implementation, first normalizing the query
157
+ # arguments, and then normalizing and adding standard metadata to your return value.
158
+ #
159
+ # Most engines support pagination, sorting, and searching in a specific
160
+ # field.
161
+ #
162
+ # # 1-based page index
163
+ # engine.search("query", :per_page => 20, :page => 5)
164
+ # # or use 0-based per-record index, engines that don't
165
+ # # support this will round to nearest page.
166
+ # engine.search("query", :start => 20)
167
+ #
168
+ # You can ask an engine what search fields it supports with engine.search_keys
169
+ # engine.search("query", :search_field => "engine_search_field_name")
170
+ #
171
+ # There are also normalized 'semantic' names you can use accross engines
172
+ # (if they support them): :title, :author, :subject, maybe more.
173
+ #
174
+ # engine.search("query", :semantic_search_field => :title)
175
+ #
176
+ # Ask an engine what semantic field names it supports with `engine.semantic_search_keys`
177
+ #
178
+ # Ask an engine what sort fields it supports with `engine.sort_keys`. See
179
+ # list of standard sort keys in I18n file at ./config/locales/en.yml, in
180
+ # `en.bento_search.sort_keys`.
181
+ #
182
+ # engine.search("query", :sort => "some_sort_key")
183
+ #
184
+ # Some engines support additional arguments to 'search', see individual
185
+ # engine documentation. For instance, some engines support `:auth => true`
186
+ # to give the user elevated search privileges when you have an authenticated
187
+ # local user.
188
+ #
189
+ # Query as first arg is just a convenience, you can also use a single hash
190
+ # argument.
191
+ #
192
+ # engine.search(:query => "query", :per_page => 20, :page => 4)
193
+ #
83
194
  def search(*arguments)
84
195
  start_t = Time.now
85
196
 
@@ -99,10 +210,39 @@ module BentoSearch
99
210
  results.timing = (Time.now - start_t)
100
211
 
101
212
  return results
213
+ rescue *auto_rescue_exceptions => e
214
+ # Uncaught exception, log and turn into failed Results object. We
215
+ # only catch certain types of exceptions, or it makes dev really
216
+ # confusing eating exceptions. This is intentionally a convenience
217
+ # to allow search engine implementations to just raise the exception
218
+ # and we'll turn it into a proper error.
219
+ cleaned_backtrace = Rails.backtrace_cleaner.clean(e.backtrace)
220
+ log_msg = "BentoSearch::SearchEngine failed results: #{e.inspect}\n #{cleaned_backtrace.join("\n ")}"
221
+ Rails.logger.error log_msg
222
+
223
+ failed = BentoSearch::Results.new
224
+ failed.error ||= {}
225
+ failed.error[:exception] = e
226
+ return failed
102
227
  end
103
228
 
104
229
 
105
-
230
+ # Take the arguments passed into #search, which can be flexibly given
231
+ # in several ways, and normalize to an expected single hash that
232
+ # will be passed to an engine's #search_implementation. The output
233
+ # of this method is a single hash, and is what a #search_implementation
234
+ # can expect to receive as an argument, with keys:
235
+ #
236
+ # [:query] the query
237
+ # [:per_page] will _always_ be present, using the default per_page if
238
+ # none given by caller
239
+ # [:start, :page] both :start and :page will _always_ be present, regardless
240
+ # of which the caller used. They will both be integers, even if strings passed in.
241
+ # [:search_field] A search field from the engine's #search_field_definitions, as string.
242
+ # Even if the caller used :semantic_search_field, it'll be normalized
243
+ # to the actual local search_field key on output.
244
+ # [:sort] Sort key.
245
+ #
106
246
  def normalized_search_arguments(*orig_arguments)
107
247
  arguments = {}
108
248
 
@@ -163,7 +303,7 @@ module BentoSearch
163
303
  alias_method :parse_search_arguments, :normalized_search_arguments
164
304
 
165
305
 
166
-
306
+
167
307
 
168
308
 
169
309
  protected
@@ -177,6 +317,23 @@ module BentoSearch
177
317
  end
178
318
  end
179
319
 
320
+ # What exceptions should our #search wrapper rescue and turn
321
+ # into failed results instead of fatal errors?
322
+ #
323
+ # Can't rescue everything, or we eat VCR/webmock errors, and lots
324
+ # of other errors we don't want to eat either, making
325
+ # development really confusing. Perhaps could set this
326
+ # to be something diff in production and dev?
327
+ #
328
+ # This default list is probably useful already, but individual
329
+ # engines can override if it's convenient for their own error
330
+ # handling.
331
+ def auto_rescue_exceptions
332
+ [TimeoutError, HTTPClient::TimeoutError,
333
+ HTTPClient::ConfigurationError, HTTPClient::BadResponseError,
334
+ MultiJson::DecodeError, Nokogiri::SyntaxError]
335
+ end
336
+
180
337
 
181
338
  module ClassMethods
182
339
 
@@ -1,4 +1,3 @@
1
-
2
1
  # Methods that describe a search engine's capabilities,
3
2
  # mixed into SearchEngine. Individual engine implementations
4
3
  # will often over-ride some or all of these methods.
@@ -24,6 +23,7 @@ module BentoSearch::SearchEngine::Capabilities
24
23
  # Keys should where possible be _standard_ keys chosen from
25
24
  # those listed in config/i18n/en:bento_search.sort_keys.*
26
25
  # But if you need something not there, it can be custom to engine.
26
+ # (or we can add it there?).
27
27
  # Value of hash is for internal use by engine, it may be a convenient
28
28
  # place to store implementation details.
29
29
  #
@@ -32,6 +32,12 @@ module BentoSearch::SearchEngine::Capabilities
32
32
  def sort_definitions
33
33
  {}
34
34
  end
35
+
36
+ # convenience to get just the sort keys, which is what client
37
+ # cares about.
38
+ def sort_keys
39
+ sort_definitions.keys
40
+ end
35
41
 
36
42
  # Override to return int max per-page.
37
43
  def max_per_page
@@ -58,6 +58,8 @@ require 'httpclient'
58
58
  # * The 'info' service can be used to see what databases you have access to.
59
59
  # * DTD of XML Response, hard to interpret but all we've got: http://support.ebsco.com/eit/docs/DTD_EIT_WS_searchResponse.zip
60
60
  #
61
+ # Hard to find docs page on embedding EBSCO limiters (like peer reviewed only "RV Y") in search query:
62
+ # http://eit.ebscohost.com/Pages/MethodDescription.aspx?service=~/Services/SearchService.asmx&method=Info
61
63
  #
62
64
  #
63
65
  #
@@ -172,7 +174,7 @@ class BentoSearch::EbscoHostEngine
172
174
  components.uniq! # no need to have the same thing twice
173
175
 
174
176
  # some hard-coded cases for better user-displayable string
175
- if components.first == "Academic Journal" && components.last == "Article"
177
+ if ["Academic Journal", "Journal"].include?(components.first) && ["Article", "Journal Article"].include?(components.last)
176
178
  return "Journal Article"
177
179
  elsif components.first == "Periodical" && components.length > 1
178
180
  return components.last
@@ -39,10 +39,14 @@ require 'http_client_patch/include_client'
39
39
  # ourselves. However, in our testing, the first/only CustomLink was an
40
40
  # an OpenURL. If configuration.assume_first_custom_link_openurl is
41
41
  # true (as is default), it will be used to create an OpenURL link. However, in
42
- # our testing, many records don't have this at all. **Note** Ask EBSCO support
42
+ # our testing, many records don't have this at all. **Note** You want
43
43
  # to configure your profile so OpenURLs are ALWAYS included for all records, not
44
44
  # just records with no EBSCO fulltext, to ensure bento_search can get the
45
- # openurl.
45
+ # openurl. http://support.ebsco.com/knowledge_base/detail.php?id=1111 (May
46
+ # have to ask EBSCO support for help, it's confusing!).
47
+ #
48
+ # TODO: May have to add configuration code to pull the OpenURL link out by
49
+ # it's configured name or label, not assume first one is it.
46
50
  #
47
51
  # As always, you can customize links and other_links with Item Decorators.
48
52
  #
@@ -83,6 +87,12 @@ require 'http_client_patch/include_client'
83
87
  # By default the engine will search as 'guest' unauth user. But config
84
88
  # 'auth' key to true to force all searches to auth (if you are protecting your
85
89
  # app) or pass :auth => true as param into #search method.
90
+ #
91
+ # == Source Types
92
+ # # What the EBSCO 'source types' mean: http://suprpot.ebsco.com/knowledge_base/detail.php?id=5382
93
+ #
94
+ # But "Dissertations" not "Dissertations/Theses". "Music Scores" not "Music Score".
95
+
86
96
  #
87
97
  # == EDS docs:
88
98
  #
@@ -90,6 +100,7 @@ require 'http_client_patch/include_client'
90
100
  # * EDS Wiki: http://edswiki.ebscohost.com/EDS_API_Documentation
91
101
  # * You'll need to request an account to the EDS wiki, see: http://support.ebsco.com/knowledge_base/detail.php?id=5990
92
102
  #
103
+
93
104
  class BentoSearch::EdsEngine
94
105
  include BentoSearch::SearchEngine
95
106
 
@@ -152,7 +163,7 @@ class BentoSearch::EdsEngine
152
163
  end
153
164
  # Can't have any commas in query, it turns out, although
154
165
  # this is not documented.
155
- query += args[:query].gsub("/\,/", "")
166
+ query += args[:query].gsub(",", " ")
156
167
 
157
168
  url = "#{configuration.base_url}search?view=detailed&query=#{CGI.escape query}"
158
169
 
@@ -174,6 +185,11 @@ class BentoSearch::EdsEngine
174
185
  end
175
186
  end
176
187
 
188
+ if configuration.only_source_types.present?
189
+ # facetfilter=1,SourceType:Research Starters,SourceType:Books
190
+ url += "&facetfilter=" + CGI.escape("1," + configuration.only_source_types.collect {|t| "SourceType:#{t}"}.join(","))
191
+ end
192
+
177
193
 
178
194
  return url
179
195
  end
@@ -189,7 +205,9 @@ class BentoSearch::EdsEngine
189
205
  with_session(end_user_auth) do |session_token|
190
206
 
191
207
  url = construct_search_url(args)
192
-
208
+
209
+
210
+
193
211
  response = get_with_auth(url, session_token)
194
212
 
195
213
  results = BentoSearch::Results.new
@@ -202,6 +220,7 @@ class BentoSearch::EdsEngine
202
220
  item = BentoSearch::ResultItem.new
203
221
 
204
222
  item.title = prepare_eds_payload( element_by_group(record_xml, "Ti"), true )
223
+
205
224
  if item.title.nil? && ! end_user_auth
206
225
  item.title = I18n.translate("bento_search.eds.record_not_available")
207
226
  end
@@ -213,9 +232,16 @@ class BentoSearch::EdsEngine
213
232
  # actual authors out of. WTF. Thanks for handling fragments
214
233
  # nokogiri.
215
234
  author_mess = element_by_group(record_xml, "Au")
235
+ # only SOMETIMES does it have XML tags, other times it's straight text.
236
+ # ARGH.
216
237
  author_xml = Nokogiri::XML::fragment(author_mess)
217
- author_xml.xpath(".//searchLink").each do |author_node|
218
- item.authors << BentoSearch::Author.new(:display => author_node.text)
238
+ searchLinks = author_xml.xpath(".//searchLink")
239
+ if searchLinks.size > 0
240
+ author_xml.xpath(".//searchLink").each do |author_node|
241
+ item.authors << BentoSearch::Author.new(:display => author_node.text)
242
+ end
243
+ else
244
+ item.authors << BentoSearch::Author.new(:display => author_xml.text)
219
245
  end
220
246
 
221
247
 
@@ -259,10 +285,17 @@ class BentoSearch::EdsEngine
259
285
  # tags in this mess, they will be lost. Probably don't
260
286
  # need highlighting in source anyhow.
261
287
  citation_mess = element_by_group(record_xml, "Src")
262
- citation_txt = Nokogiri::XML::fragment(citation_mess).text
263
- # But strip off some "count of references" often on the end
264
- # which are confusing and useless.
265
- item.custom_data["citation_blob"] = citation_txt.gsub(/ref +\d+ +ref\.$/, '')
288
+ # Argh, but sometimes it's in SrcInfo _without_ tags instead
289
+ if citation_mess
290
+ citation_txt = Nokogiri::XML::fragment(citation_mess).text
291
+ # But strip off some "count of references" often on the end
292
+ # which are confusing and useless.
293
+ item.custom_data["citation_blob"] = citation_txt.gsub(/ref +\d+ +ref\.$/, '')
294
+ else
295
+ # try another location
296
+ item.custom_data["citation_blob"] = element_by_group(record_xml, "SrcInfo")
297
+ end
298
+
266
299
 
267
300
  item.extend CitationMessDecorator
268
301