bento_search 1.7.0.beta.1 → 1.7.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,19 +1,19 @@
1
- begin
1
+ begin
2
2
  require 'celluloid'
3
3
 
4
4
  # Based on Celluloid, concurrently runs multiple searches in
5
5
  # seperate threads. You must include 'celluloid' gem dependency
6
6
  # into your local app to use this class. Requires celluloid 0.12.0
7
- # or above (for new preferred async syntax).
7
+ # or above (for new preferred async syntax).
8
8
  #
9
9
  # Warning, if you don't have celluloid in your app, this class simply
10
10
  # won't load. TODO: We should put this file in a different directory
11
11
  # so it's never auto-loaded, and requires a "require 'bento_search/multi_searcher'",
12
12
  # such that it will raise without celluloid only then, and we don't need this
13
- # rescue LoadError stuff.
13
+ # rescue LoadError stuff.
14
14
  #
15
15
  # I am not an expert at use of Celluloid, it's possible there's a better
16
- # way to do this all, but seems to work.
16
+ # way to do this all, but seems to work.
17
17
  #
18
18
  # ## Usage
19
19
  #
@@ -26,14 +26,14 @@ begin
26
26
  # retrieve results, blocking until each is completed:
27
27
  # searcher.results
28
28
  #
29
- # returns a Hash keyed by engine id, values BentoSearch::Results objects.
29
+ # returns a Hash keyed by engine id, values BentoSearch::Results objects.
30
30
  #
31
31
  # Can only call #results once per #start, after that it'll return empty hash.
32
- # (should we make it actually raise instead?). .
33
- #
32
+ # (should we make it actually raise instead?). .
33
+ #
34
34
  # important to call results at some point after calling start, in order
35
35
  # to make sure Celluloid::Actors are properly terminated to avoid
36
- # resource leakage. May want to do it in an ensure block.
36
+ # resource leakage. May want to do it in an ensure block.
37
37
  #
38
38
  # Note that celluloid uses multi-threading in such a way that you
39
39
  # may have to set config.cache_classes=true even in development
@@ -41,87 +41,88 @@ begin
41
41
  #
42
42
  #
43
43
  # TODO: have a method that returns Futures instead of only supplying the blocking
44
- # results method? Several tricks, including making sure to properly terminate actors.
44
+ # results method? Several tricks, including making sure to properly terminate actors.
45
45
  class BentoSearch::MultiSearcher
46
-
46
+
47
47
  def initialize(*engine_ids)
48
+ ActiveSupport::Deprecation.warn('BentoSearch::MultiSearcher is deprecated and will be removed in bento_search 2.0. Please use BentoSearch::ConcurrentSearcher instead.', caller.slice(1..-1))
49
+
48
50
  @engines = []
49
51
  @actors = []
50
52
  engine_ids.each do |id|
51
53
  add_engine( BentoSearch.get_engine id )
52
54
  end
53
55
  end
54
-
56
+
55
57
  # Adds an instantiated engine directly, rather than by id from global
56
- # registry.
57
- def add_engine(engine)
58
+ # registry.
59
+ def add_engine(engine)
58
60
  @engines << engine
59
61
  end
60
-
61
- # Starts all searches, returns self so you can chain method calls if you like.
62
+
63
+ # Starts all searches, returns self so you can chain method calls if you like.
62
64
  def search(*search_args)
63
65
  @engines.each do |engine|
64
66
  a = Actor.new(engine)
65
67
  @actors << a
66
68
  a.async.start *search_args
67
- end
69
+ end
68
70
  return self
69
71
  end
70
72
  alias_method :start, :search # backwards compat
71
-
73
+
72
74
  # Call after #start. Blocks until each included engine is finished
73
75
  # then returns a Hash keyed by engine registered id, value is a
74
- # BentoSearch::Results object.
76
+ # BentoSearch::Results object.
75
77
  #
76
78
  # Can only call _once_ per invocation of #start, after that it'll return
77
- # an empty hash.
79
+ # an empty hash.
78
80
  def results
79
81
  results = {}
80
-
82
+
81
83
  # we use #delete_if to get an iterator that deletes
82
- # each item after iteration.
84
+ # each item after iteration.
83
85
  @actors.delete_if do |actor|
84
86
  result_key = (actor.engine.configuration.id || actor.engine.class.name)
85
87
  results[result_key] = actor.results
86
88
  actor.terminate
87
-
89
+
88
90
  true
89
91
  end
90
-
92
+
91
93
  return results
92
94
  end
93
-
94
-
95
+
96
+
95
97
  class Actor
96
98
  include Celluloid
97
-
99
+
98
100
  attr_accessor :engine
99
-
101
+
100
102
  def initialize(a_engine)
101
103
  self.engine = a_engine
102
104
  end
103
-
104
- # call as .async.start, to invoke async.
105
+
106
+ # call as .async.start, to invoke async.
105
107
  def start(*search_args)
106
108
  begin
107
109
  @results = self.engine.search(*search_args)
108
110
  rescue StandardError => e
109
111
  Rails.logger.error("\nBentoSearch:MultiSearcher caught exception: #{e}\n#{e.backtrace.join(" \n")}")
110
- # Make a fake results with caught exception.
112
+ # Make a fake results with caught exception.
111
113
  @results = BentoSearch::Results.new
112
114
  self.engine.fill_in_search_metadata_for(@results, self.engine.normalized_search_arguments(search_args))
113
-
115
+
114
116
  @results.error ||= {}
115
- @results.error["exception"] = e
117
+ @results.error["exception"] = e
116
118
  end
117
119
  end
118
-
120
+
119
121
  def results
120
122
  @results
121
123
  end
122
-
124
+
123
125
  end
124
-
125
126
  end
126
127
 
127
128
  rescue LoadError
@@ -17,7 +17,6 @@ module BentoSearch
17
17
  # remote service. Not yet universally used.
18
18
  class ::BentoSearch::FetchError < ::BentoSearch::Error ; end
19
19
 
20
-
21
20
  # Module mix-in for bento_search search engines.
22
21
  #
23
22
  # ==Using a SearchEngine
@@ -65,6 +64,11 @@ module BentoSearch
65
64
  # string name, actual class object not supported (to make it easier
66
65
  # to serialize and transport configuration).
67
66
  #
67
+ # [log_failed_results]
68
+ # Default false, if true all failed results are logged to
69
+ # `Rails.log.error`. Can set global default with
70
+ # `BentoSearch.defaults.log_failed_results = true`
71
+ #
68
72
  # == Implementing a SearchEngine
69
73
  #
70
74
  # Implmeneting a new SearchEngine is relatively straightforward -- you are
@@ -119,15 +123,53 @@ module BentoSearch
119
123
  module SearchEngine
120
124
  DefaultPerPage = 10
121
125
 
122
-
123
-
124
-
125
126
  extend ActiveSupport::Concern
126
127
 
127
128
  include Capabilities
128
129
 
130
+ mattr_accessor :default_auto_rescued_exceptions
131
+ self.default_auto_rescued_exceptions = [
132
+ BentoSearch::RubyTimeoutClass,
133
+ HTTPClient::TimeoutError,
134
+ HTTPClient::ConfigurationError,
135
+ HTTPClient::BadResponseError,
136
+ MultiJson::DecodeError,
137
+ Nokogiri::SyntaxError,
138
+ SocketError
139
+ ].freeze
140
+
129
141
  included do
130
142
  attr_accessor :configuration
143
+
144
+ # What exceptions should our #search wrapper rescue and turn
145
+ # into failed results instead of fatal errors?
146
+ #
147
+ # Can't rescue everything, or we eat VCR/webmock errors, and lots
148
+ # of other errors we don't want to eat either, making
149
+ # development really confusing. Perhaps could set this
150
+ # to be something diff in production and dev?
151
+ #
152
+ # This default list is probably useful already, but individual
153
+ # engines can override if it's convenient for their own error
154
+ # handling.
155
+ #
156
+ # Override by just using `auto_rescued_exceptions=` on class _or_ method,
157
+ # although some legacy code may override `def auto_rescue_exceptions` (note
158
+ # old `rescue` vs new `rescued`) which should work too.
159
+ self.class_attribute :auto_rescued_exceptions
160
+ self.auto_rescued_exceptions = ::BentoSearch::SearchEngine.default_auto_rescued_exceptions
161
+
162
+ # Over-ride returning a hash or Confstruct with
163
+ # any configuration values you want by default.
164
+ # actual user-specified config values will be deep-merged
165
+ # into the defaults.
166
+ def self.default_configuration
167
+ end
168
+
169
+ # Over-ride returning an array of symbols for required
170
+ # configuration keys.
171
+ def self.required_configuration
172
+ end
131
173
  end
132
174
 
133
175
  # If specific SearchEngine calls initialize, you want to call super
@@ -153,6 +195,9 @@ module BentoSearch
153
195
 
154
196
  # global defaults?
155
197
  self.configuration[:for_display] ||= {}
198
+ unless self.configuration.has_key?(:log_failed_results)
199
+ self.configuration[:log_failed_results] = BentoSearch.defaults.log_failed_results
200
+ end
156
201
 
157
202
  # check for required keys -- have to be present, and not nil
158
203
  if self.class.required_configuration
@@ -241,8 +286,11 @@ module BentoSearch
241
286
 
242
287
  fill_in_search_metadata_for(failed, arguments)
243
288
 
244
-
245
289
  return failed
290
+ ensure
291
+ if results && configuration.log_failed_results && results.failed?
292
+ Rails.logger.error("Error fetching results for `#{configuration.id || self}`: #{arguments}: #{results.error}")
293
+ end
246
294
  end
247
295
 
248
296
  # SOME of the elements of Results to be returned that SearchEngine implementation
@@ -392,9 +440,26 @@ module BentoSearch
392
440
  [:query, :search_field, :semantic_search_field, :sort, :page, :start, :per_page]
393
441
  end
394
442
 
443
+ # Cover method for consistent api with Results
444
+ def display_configuration
445
+ configuration.for_display
446
+ end
447
+
448
+ # Cover method for consistent api with Results
449
+ def engine_id
450
+ configuration.id
451
+ end
452
+
395
453
 
396
454
  protected
397
455
 
456
+ # For legacy reasons old name auto_rescue_exceptions is here, some
457
+ # sub-classes may override it. Now preferred to use auto_rescued_exceptions
458
+ # setter instead.
459
+ def auto_rescue_exceptions
460
+ self.auto_rescued_exceptions
461
+ end
462
+
398
463
  # get value of an arg that can be supplied in search args OR config,
399
464
  # with search_args over-ridding config. Also normalizes value to_s
400
465
  # (for symbols/strings).
@@ -409,40 +474,5 @@ module BentoSearch
409
474
 
410
475
  return value
411
476
  end
412
-
413
- # What exceptions should our #search wrapper rescue and turn
414
- # into failed results instead of fatal errors?
415
- #
416
- # Can't rescue everything, or we eat VCR/webmock errors, and lots
417
- # of other errors we don't want to eat either, making
418
- # development really confusing. Perhaps could set this
419
- # to be something diff in production and dev?
420
- #
421
- # This default list is probably useful already, but individual
422
- # engines can override if it's convenient for their own error
423
- # handling.
424
- def auto_rescue_exceptions
425
- [BentoSearch::RubyTimeoutClass, HTTPClient::TimeoutError,
426
- HTTPClient::ConfigurationError, HTTPClient::BadResponseError,
427
- MultiJson::DecodeError, Nokogiri::SyntaxError, SocketError]
428
- end
429
-
430
-
431
- module ClassMethods
432
-
433
- # Over-ride returning a hash or Confstruct with
434
- # any configuration values you want by default.
435
- # actual user-specified config values will be deep-merged
436
- # into the defaults.
437
- def default_configuration
438
- end
439
-
440
- # Over-ride returning an array of symbols for required
441
- # configuration keys.
442
- def required_configuration
443
- end
444
-
445
- end
446
-
447
477
  end
448
478
  end
@@ -16,7 +16,7 @@ module BentoSearch
16
16
  include ActionView::Helpers::SanitizeHelper
17
17
 
18
18
 
19
- class_attribute :http_timeout
19
+ class_attribute :http_timeout, instance_writer: false
20
20
  self.http_timeout = 10
21
21
 
22
22
  extend HTTPClientPatch::IncludeClient
@@ -25,45 +25,40 @@ require 'http_client_patch/include_client'
25
25
  # == Linking
26
26
  #
27
27
  # The link to record in EBSCO interface delivered as "PLink" will be listed
28
- # as record main link.
28
+ # as record main link. If the record includes a node at `./FullText/Links/Link/Type[text() = 'pdflink']`,
29
+ # the `plink` will be marked as fulltext. (There may be other cases of fulltext, but
30
+ # this seems to be all EDS API tells us.)
29
31
  #
30
32
  # Any links listed under <CustomLinks> will be listed as other_links, using
31
- # configured name provided by EBSCO for CustomLink.
33
+ # configured name provided by EBSCO for CustomLink. Same with links listed
34
+ # as `<Item><Group>URL</Group>`.
32
35
  #
33
- # EDS Response does not have sufficient metadata for us to generate an OpenURL
34
- # ourselves. However, in our testing, the first/only CustomLink was an
35
- # an OpenURL. If configuration.assume_first_custom_link_openurl is
36
- # true (as is default), it will be used to create an OpenURL link. However, in
37
- # our testing, many records don't have this at all. **Note** You want
38
- # to configure your profile so OpenURLs are ALWAYS included for all records, not
39
- # just records with no EBSCO fulltext, to ensure bento_search can get the
40
- # openurl. http://support.ebsco.com/knowledge_base/detail.php?id=1111 (May
41
- # have to ask EBSCO support for help, it's confusing!).
36
+ # As always, you can customize links and other_links with Item Decorators.
42
37
  #
43
- # TODO: May have to add configuration code to pull the OpenURL link out by
44
- # it's configured name or label, not assume first one is it.
38
+ # == Custom Data
45
39
  #
46
- # As always, you can customize links and other_links with Item Decorators.
40
+ # If present, there is a custom_data[:holdings] value, an array of
41
+ # BentoSearch::EdsEngine::Holding objects, each of which has a #location
42
+ # and #call_number. There will usually (always?) be at most 1 item in the
43
+ # array, as far as we can tell from how EDS works.
47
44
  #
48
45
  # == Technical Notes and Difficulties
49
46
  #
50
- # This API is enormously difficult to work with. Also the response is very odd
51
- # to deal with and missing some key elements. We quite possibly got something
52
- # wrong or non-optimal in this implementation, but we did our best.
47
+ # This API is pretty difficult to work with, and the response has many
48
+ # idiosyncratic undocumented parts. We think we are currently
49
+ # getting fairly complete citation detail out, at least for articles, but may be missing
50
+ # some on weird edge cases, books/book chapters, etc)
53
51
  #
54
52
  # Auth issues may make this slow -- you need to spend a (not too speedy) HTTP
55
53
  # request making a session for every new end-user -- as we have no way to keep
56
54
  # track of end-users, we do it on every request in this implementation.
57
55
  #
58
- # Responses don't include much metadata -- we don't actually have journal title,
59
- # volume, issue, etc. We probably _could_ parse it out of the OpenURL that's
60
- # there depending on your profile configuration, but we're not right now.
61
- # Instead we're using the chunk of user-displayable citation/reference it does
62
- # give us (which is very difficult to parse into something usable already),
63
- # and a custom Decorator to display that instead of normalized citation
64
- # made from individual elements.
65
- #
66
- # EBSCO says they plan to improve some of these issues in a September 2012 release.
56
+ # An older version of the EDS API returned much less info, and we tried
57
+ # to scrape out what we could anyway. Much of this logic is still there
58
+ # as backup. In the older version, not enough info was there for an
59
+ # OpenURL link, `configuration.assume_first_custom_link_openurl` was true
60
+ # by default, and used to create an OpenURL link. It now defaults to false,
61
+ # and should no longer be neccessary.
67
62
  #
68
63
  # Title and abstract data seems to be HTML with tags and character entities and
69
64
  # escaped special chars. We're trusting it and passing it on as html_safe.
@@ -91,7 +86,7 @@ require 'http_client_patch/include_client'
91
86
  #
92
87
  # == EDS docs:
93
88
  #
94
- # * Console App to demo requests: https://eds-api.ebscohost.com/Console
89
+ # * Console App to demo requests: <
95
90
  # * EDS Wiki: http://edswiki.ebscohost.com/EDS_API_Documentation
96
91
  # * You'll need to request an account to the EDS wiki, see: http://support.ebsco.com/knowledge_base/detail.php?id=5990
97
92
  #
@@ -101,11 +96,20 @@ class BentoSearch::EdsEngine
101
96
 
102
97
  # Can't change http timeout in config, because we keep an http
103
98
  # client at class-wide level, and config is not class-wide.
104
- # Change this 'constant' if you want to change it, I guess.
99
+ # We used to keep in constant, but that's not good for custom setting,
100
+ # we now use class_attribute, but in a weird backwards-compat way for
101
+ # anyone who might be using the constant.
105
102
  HttpTimeout = 4
103
+
104
+ class_attribute :http_timeout, instance_writer: false
105
+ def self.http_timeout
106
+ defined?(@http_timeout) ? @http_timeout : HttpTimeout
107
+ end
108
+
109
+
106
110
  extend HTTPClientPatch::IncludeClient
107
111
  include_http_client do |client|
108
- client.connect_timeout = client.send_timeout = client.receive_timeout = HttpTimeout
112
+ client.connect_timeout = client.send_timeout = client.receive_timeout = http_timeout
109
113
  end
110
114
 
111
115
  AuthHeader = "x-authenticationToken"
@@ -131,12 +135,7 @@ class BentoSearch::EdsEngine
131
135
  # an object that includes some Rails helper modules for
132
136
  # text handling.
133
137
  def helper
134
- unless @helper ||= nil
135
- @helper = Object.new
136
- @helper.extend ActionView::Helpers::TextHelper # for truncate
137
- @helper.extend ActionView::Helpers::OutputSafetyHelper # for safe_join
138
- end
139
- return @helper
138
+ @helper ||= Helper.new
140
139
  end
141
140
 
142
141
 
@@ -207,8 +206,6 @@ class BentoSearch::EdsEngine
207
206
 
208
207
  url = construct_search_url(args)
209
208
 
210
-
211
-
212
209
  response = get_with_auth(url, session_token)
213
210
 
214
211
  results = BentoSearch::Results.new
@@ -237,39 +234,96 @@ class BentoSearch::EdsEngine
237
234
 
238
235
  item.abstract = prepare_eds_payload( element_by_group(record_xml, "Ab"), true )
239
236
 
240
- # Believe it or not, the authors are encoded as an escaped
241
- # XML-ish payload, that we need to parse again and get the
242
- # actual authors out of. WTF. Thanks for handling fragments
243
- # nokogiri.
244
- author_mess = element_by_group(record_xml, "Au")
245
- # only SOMETIMES does it have XML tags, other times it's straight text.
246
- # ARGH.
247
- author_xml = Nokogiri::XML::fragment(author_mess)
248
- searchLinks = author_xml.xpath(".//searchLink")
249
- if searchLinks.size > 0
250
- author_xml.xpath(".//searchLink").each do |author_node|
251
- item.authors << BentoSearch::Author.new(:display => author_node.text)
237
+ # Much better way to get authors out of EDS response now...
238
+ author_full_names = record_xml.xpath("./RecordInfo/BibRecord/BibRelationships/HasContributorRelationships/HasContributor/PersonEntity/Name/NameFull")
239
+ author_full_names.each do |name_full_xml|
240
+ if name_full_xml && (text = name_full_xml.text).present?
241
+ item.authors << BentoSearch::Author.new(:display => text)
252
242
  end
253
- else
254
- item.authors << BentoSearch::Author.new(:display => author_xml.text)
255
243
  end
256
244
 
245
+ if item.authors.blank?
246
+ # Believe it or not, the authors are encoded as an escaped
247
+ # XML-ish payload, that we need to parse again and get the
248
+ # actual authors out of. WTF. Thanks for handling fragments
249
+ # nokogiri.
250
+ author_mess = element_by_group(record_xml, "Au")
251
+ # only SOMETIMES does it have XML tags, other times it's straight text.
252
+ # ARGH.
253
+ author_xml = Nokogiri::XML::fragment(author_mess)
254
+ searchLinks = author_xml.xpath(".//searchLink")
255
+ if searchLinks.size > 0
256
+ author_xml.xpath(".//searchLink").each do |author_node|
257
+ item.authors << BentoSearch::Author.new(:display => author_node.text)
258
+ end
259
+ else
260
+ item.authors << BentoSearch::Author.new(:display => author_xml.text)
261
+ end
262
+ end
257
263
 
258
264
  # PLink is main inward facing EBSCO link, put it as
259
265
  # main link.
260
266
  if direct_link = record_xml.at_xpath("./PLink")
261
- item.link = direct_link.text
267
+ item.link = direct_link.text
268
+
269
+ if record_xml.at_xpath("./FullText/Links/Link/Type[text() = 'pdflink']")
270
+ item.link_is_fulltext = true
271
+ end
262
272
  end
263
273
 
274
+
264
275
  # Other links may be found in CustomLinks, it seems like usually
265
276
  # there will be at least one, hopefully the first one is the OpenURL?
266
- record_xml.xpath("./CustomLinks/CustomLink").each do |custom_link|
277
+ #byebug if configuration.id == "articles"
278
+ record_xml.xpath("./CustomLinks/CustomLink|./FullText/CustomLinks/CustomLink").each do |custom_link|
279
+ # If it's in FullText section, give it a rel=alternate
280
+ # to indicate it's fulltext
281
+ rel = (custom_link.parent.parent.name.downcase == "fulltext") ? "alternate" : nil
282
+
267
283
  item.other_links << BentoSearch::Link.new(
268
284
  :url => custom_link.at_xpath("./Url").text,
269
- :label => custom_link.at_xpath("./Name").text
285
+ :rel => rel,
286
+ :label => custom_link.at_xpath("./Text").try(:text).presence || custom_link.at_xpath("./Name").try(:text).presence || "Link"
270
287
  )
271
288
  end
272
289
 
290
+ # More other links in 'URL' Item, in unpredictable format sometimes being
291
+ # embedded XML. Really EBSCO?
292
+ record_xml.xpath("./Items/Item[child::Group[text()='URL']]").each do |url_item|
293
+ data_element = url_item.at_xpath("./Data")
294
+ next unless data_element
295
+
296
+ # SOMETIMES the url and label are in an embedded escaped XML element...
297
+ if data_element.text.strip.start_with?("<link")
298
+ # Ugh, once unescpaed it has bare '&' in URL queries sometimes, which
299
+ # is not actually legal XML anymore, but Nokogiri::HTML parser will
300
+ # let us get away with it, but then doesn't put the actual text
301
+ # inside the 'link' item, but inside the <link> tag since it knows
302
+ # an HTML link tag has no content. Really EDS.
303
+ node = Nokogiri::HTML::fragment(data_element.text)
304
+ next unless link = node.at_xpath("./link")
305
+ next unless link["linkterm"].presence || link["linkTerm"].presence
306
+
307
+ item.other_links << BentoSearch::Link.new(
308
+ :url => link["linkterm"] || link["linkTerm"],
309
+ :label => helper.strip_tags(data_element.text).presence || "Link"
310
+ )
311
+ else
312
+ # it's just a straight URL in data element, with only label we've
313
+ # got in <label> element.
314
+ next unless data_element.text.strip.present?
315
+
316
+ label_element = url_item.at_xpath("./Label")
317
+ label = label_element.try(:text).try { |s| helper.strip_tags(s) }.presence || "Link"
318
+
319
+ item.other_links << BentoSearch::Link.new(
320
+ :url => data_element.text,
321
+ :label => label
322
+ )
323
+ end
324
+ end
325
+
326
+
273
327
  if (configuration.assume_first_custom_link_openurl &&
274
328
  (first = record_xml.xpath "./CustomLinks/CustomLink" ) &&
275
329
  (node = first.at_xpath "./Url" )
@@ -286,7 +340,58 @@ class BentoSearch::EdsEngine
286
340
  # Can't find a list of possible PubTypes to see what's there to try
287
341
  # and map to our internal controlled vocab. oh wells.
288
342
 
343
+ item.doi = at_xpath_text record_xml, "./RecordInfo/BibRecord/BibEntity/Identifiers/Identifier[child::Type[text()='doi']]/Value"
289
344
 
345
+ item.start_page = at_xpath_text(record_xml, "./RecordInfo/BibRecord/BibEntity/PhysicalDescription/Pagination/StartPage")
346
+ total_pages = at_xpath_text(record_xml, "./RecordInfo/BibRecord/BibEntity/PhysicalDescription/Pagination/PageCount")
347
+ if total_pages.to_i != 0 && item.start_page.to_i != 0
348
+ item.end_page = (item.start_page.to_i + total_pages.to_i - 1).to_s
349
+ end
350
+
351
+
352
+ # location/call number, probably only for catalog results. We only see one
353
+ # in actual data, but XML structure allows multiple, so we'll store it as multiple.
354
+ copy_informations = record_xml.xpath("./Holdings/Holding/HoldingSimple/CopyInformationList/CopyInformation")
355
+ if copy_informations.present?
356
+ item.custom_data[:holdings] =
357
+ copy_informations.collect do |copy_information|
358
+ Holding.new(:location => at_xpath_text(copy_information, "Sublocation"),
359
+ :call_number => at_xpath_text(copy_information, "ShelfLocator"))
360
+ end
361
+ end
362
+
363
+
364
+
365
+ # For some EDS results, we have actual citation information,
366
+ # for some we don't.
367
+ container_xml = record_xml.at_xpath("./RecordInfo/BibRecord/BibRelationships/IsPartOfRelationships/IsPartOf/BibEntity")
368
+ if container_xml
369
+ item.source_title = at_xpath_text(container_xml, "./Titles/Title[child::Type[text()='main']]/TitleFull")
370
+ item.volume = at_xpath_text(container_xml, "./Numbering/Number[child::Type[text()='volume']]/Value")
371
+ item.issue = at_xpath_text(container_xml, "./Numbering/Number[child::Type[text()='issue']]/Value")
372
+
373
+ item.issn = at_xpath_text(container_xml, "./Identifiers/Identifier[child::Type[text()='issn-print']]/Value")
374
+
375
+ if date_xml = container_xml.at_xpath("./Dates/Date")
376
+ item.year = at_xpath_text(date_xml, "./Y")
377
+
378
+ date = at_xpath_text(date_xml, "./D").to_i
379
+ month = at_xpath_text(date_xml, "./M").to_i
380
+ if item.year.to_i != 0 && date != 0 && month != 0
381
+ item.publication_date = Date.new(item.year.to_i, month, date)
382
+ end
383
+ end
384
+ end
385
+
386
+ # EDS annoyingly repeats a monographic title in the same place
387
+ # we look for source/container title, take it away.
388
+ if item.start_page.blank? && helper.strip_tags(item.title) == item.source_title
389
+ item.source_title = nil
390
+ end
391
+
392
+ # Legacy EDS citation extracting. We don't really need this any more
393
+ # because EDS api has improved, but leave it in in case anyone using
394
+ # older versions needed it.
290
395
 
291
396
  # We have a single blob of human-readable citation, that's also
292
397
  # littered with XML-ish tags we need to deal with. We'll save
@@ -306,7 +411,6 @@ class BentoSearch::EdsEngine
306
411
  item.custom_data["citation_blob"] = element_by_group(record_xml, "SrcInfo")
307
412
  end
308
413
 
309
-
310
414
  item.extend CitationMessDecorator
311
415
 
312
416
  results << item
@@ -509,7 +613,7 @@ class BentoSearch::EdsEngine
509
613
  :base_url => "http://eds-api.ebscohost.com/edsapi/rest/",
510
614
  :highlighting => true,
511
615
  :truncate_highlighted => 280,
512
- :assume_first_custom_link_openurl => true,
616
+ :assume_first_custom_link_openurl => false,
513
617
  :search_mode => 'all' # any | bool | all | smart ; http://support.epnet.com/knowledge_base/detail.php?topic=996&id=1288&page=1
514
618
  }
515
619
  end
@@ -559,4 +663,20 @@ class BentoSearch::EdsEngine
559
663
  end
560
664
  end
561
665
 
666
+ # a class that includes some Rails helper modules for
667
+ # text handling.
668
+ class Helper
669
+ include ActionView::Helpers::SanitizeHelper # for strip_tags
670
+ include ActionView::Helpers::TextHelper # for truncate
671
+ include ActionView::Helpers::OutputSafetyHelper # for safe_join
672
+ end
673
+
674
+ class Holding
675
+ attr_reader :location, :call_number
676
+ def initialize(args)
677
+ @location = args[:location]
678
+ @call_number = args[:call_number]
679
+ end
680
+ end
681
+
562
682
  end