bento_search 1.7.0.beta.1 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,19 +1,19 @@
1
- begin
1
+ begin
2
2
  require 'celluloid'
3
3
 
4
4
  # Based on Celluloid, concurrently runs multiple searches in
5
5
  # seperate threads. You must include 'celluloid' gem dependency
6
6
  # into your local app to use this class. Requires celluloid 0.12.0
7
- # or above (for new preferred async syntax).
7
+ # or above (for new preferred async syntax).
8
8
  #
9
9
  # Warning, if you don't have celluloid in your app, this class simply
10
10
  # won't load. TODO: We should put this file in a different directory
11
11
  # so it's never auto-loaded, and requires a "require 'bento_search/multi_searcher'",
12
12
  # such that it will raise without celluloid only then, and we don't need this
13
- # rescue LoadError stuff.
13
+ # rescue LoadError stuff.
14
14
  #
15
15
  # I am not an expert at use of Celluloid, it's possible there's a better
16
- # way to do this all, but seems to work.
16
+ # way to do this all, but seems to work.
17
17
  #
18
18
  # ## Usage
19
19
  #
@@ -26,14 +26,14 @@ begin
26
26
  # retrieve results, blocking until each is completed:
27
27
  # searcher.results
28
28
  #
29
- # returns a Hash keyed by engine id, values BentoSearch::Results objects.
29
+ # returns a Hash keyed by engine id, values BentoSearch::Results objects.
30
30
  #
31
31
  # Can only call #results once per #start, after that it'll return empty hash.
32
- # (should we make it actually raise instead?). .
33
- #
32
+ # (should we make it actually raise instead?). .
33
+ #
34
34
  # important to call results at some point after calling start, in order
35
35
  # to make sure Celluloid::Actors are properly terminated to avoid
36
- # resource leakage. May want to do it in an ensure block.
36
+ # resource leakage. May want to do it in an ensure block.
37
37
  #
38
38
  # Note that celluloid uses multi-threading in such a way that you
39
39
  # may have to set config.cache_classes=true even in development
@@ -41,87 +41,88 @@ begin
41
41
  #
42
42
  #
43
43
  # TODO: have a method that returns Futures instead of only supplying the blocking
44
- # results method? Several tricks, including making sure to properly terminate actors.
44
+ # results method? Several tricks, including making sure to properly terminate actors.
45
45
  class BentoSearch::MultiSearcher
46
-
46
+
47
47
  def initialize(*engine_ids)
48
+ ActiveSupport::Deprecation.warn('BentoSearch::MultiSearcher is deprecated and will be removed in bento_search 2.0. Please use BentoSearch::ConcurrentSearcher instead.', caller.slice(1..-1))
49
+
48
50
  @engines = []
49
51
  @actors = []
50
52
  engine_ids.each do |id|
51
53
  add_engine( BentoSearch.get_engine id )
52
54
  end
53
55
  end
54
-
56
+
55
57
  # Adds an instantiated engine directly, rather than by id from global
56
- # registry.
57
- def add_engine(engine)
58
+ # registry.
59
+ def add_engine(engine)
58
60
  @engines << engine
59
61
  end
60
-
61
- # Starts all searches, returns self so you can chain method calls if you like.
62
+
63
+ # Starts all searches, returns self so you can chain method calls if you like.
62
64
  def search(*search_args)
63
65
  @engines.each do |engine|
64
66
  a = Actor.new(engine)
65
67
  @actors << a
66
68
  a.async.start *search_args
67
- end
69
+ end
68
70
  return self
69
71
  end
70
72
  alias_method :start, :search # backwards compat
71
-
73
+
72
74
  # Call after #start. Blocks until each included engine is finished
73
75
  # then returns a Hash keyed by engine registered id, value is a
74
- # BentoSearch::Results object.
76
+ # BentoSearch::Results object.
75
77
  #
76
78
  # Can only call _once_ per invocation of #start, after that it'll return
77
- # an empty hash.
79
+ # an empty hash.
78
80
  def results
79
81
  results = {}
80
-
82
+
81
83
  # we use #delete_if to get an iterator that deletes
82
- # each item after iteration.
84
+ # each item after iteration.
83
85
  @actors.delete_if do |actor|
84
86
  result_key = (actor.engine.configuration.id || actor.engine.class.name)
85
87
  results[result_key] = actor.results
86
88
  actor.terminate
87
-
89
+
88
90
  true
89
91
  end
90
-
92
+
91
93
  return results
92
94
  end
93
-
94
-
95
+
96
+
95
97
  class Actor
96
98
  include Celluloid
97
-
99
+
98
100
  attr_accessor :engine
99
-
101
+
100
102
  def initialize(a_engine)
101
103
  self.engine = a_engine
102
104
  end
103
-
104
- # call as .async.start, to invoke async.
105
+
106
+ # call as .async.start, to invoke async.
105
107
  def start(*search_args)
106
108
  begin
107
109
  @results = self.engine.search(*search_args)
108
110
  rescue StandardError => e
109
111
  Rails.logger.error("\nBentoSearch:MultiSearcher caught exception: #{e}\n#{e.backtrace.join(" \n")}")
110
- # Make a fake results with caught exception.
112
+ # Make a fake results with caught exception.
111
113
  @results = BentoSearch::Results.new
112
114
  self.engine.fill_in_search_metadata_for(@results, self.engine.normalized_search_arguments(search_args))
113
-
115
+
114
116
  @results.error ||= {}
115
- @results.error["exception"] = e
117
+ @results.error["exception"] = e
116
118
  end
117
119
  end
118
-
120
+
119
121
  def results
120
122
  @results
121
123
  end
122
-
124
+
123
125
  end
124
-
125
126
  end
126
127
 
127
128
  rescue LoadError
@@ -17,7 +17,6 @@ module BentoSearch
17
17
  # remote service. Not yet universally used.
18
18
  class ::BentoSearch::FetchError < ::BentoSearch::Error ; end
19
19
 
20
-
21
20
  # Module mix-in for bento_search search engines.
22
21
  #
23
22
  # ==Using a SearchEngine
@@ -65,6 +64,11 @@ module BentoSearch
65
64
  # string name, actual class object not supported (to make it easier
66
65
  # to serialize and transport configuration).
67
66
  #
67
+ # [log_failed_results]
68
+ # Default false, if true all failed results are logged to
69
+ # `Rails.log.error`. Can set global default with
70
+ # `BentoSearch.defaults.log_failed_results = true`
71
+ #
68
72
  # == Implementing a SearchEngine
69
73
  #
70
74
  # Implmeneting a new SearchEngine is relatively straightforward -- you are
@@ -119,15 +123,53 @@ module BentoSearch
119
123
  module SearchEngine
120
124
  DefaultPerPage = 10
121
125
 
122
-
123
-
124
-
125
126
  extend ActiveSupport::Concern
126
127
 
127
128
  include Capabilities
128
129
 
130
+ mattr_accessor :default_auto_rescued_exceptions
131
+ self.default_auto_rescued_exceptions = [
132
+ BentoSearch::RubyTimeoutClass,
133
+ HTTPClient::TimeoutError,
134
+ HTTPClient::ConfigurationError,
135
+ HTTPClient::BadResponseError,
136
+ MultiJson::DecodeError,
137
+ Nokogiri::SyntaxError,
138
+ SocketError
139
+ ].freeze
140
+
129
141
  included do
130
142
  attr_accessor :configuration
143
+
144
+ # What exceptions should our #search wrapper rescue and turn
145
+ # into failed results instead of fatal errors?
146
+ #
147
+ # Can't rescue everything, or we eat VCR/webmock errors, and lots
148
+ # of other errors we don't want to eat either, making
149
+ # development really confusing. Perhaps could set this
150
+ # to be something diff in production and dev?
151
+ #
152
+ # This default list is probably useful already, but individual
153
+ # engines can override if it's convenient for their own error
154
+ # handling.
155
+ #
156
+ # Override by just using `auto_rescued_exceptions=` on class _or_ method,
157
+ # although some legacy code may override `def auto_rescue_exceptions` (note
158
+ # old `rescue` vs new `rescued`) which should work too.
159
+ self.class_attribute :auto_rescued_exceptions
160
+ self.auto_rescued_exceptions = ::BentoSearch::SearchEngine.default_auto_rescued_exceptions
161
+
162
+ # Over-ride returning a hash or Confstruct with
163
+ # any configuration values you want by default.
164
+ # actual user-specified config values will be deep-merged
165
+ # into the defaults.
166
+ def self.default_configuration
167
+ end
168
+
169
+ # Over-ride returning an array of symbols for required
170
+ # configuration keys.
171
+ def self.required_configuration
172
+ end
131
173
  end
132
174
 
133
175
  # If specific SearchEngine calls initialize, you want to call super
@@ -153,6 +195,9 @@ module BentoSearch
153
195
 
154
196
  # global defaults?
155
197
  self.configuration[:for_display] ||= {}
198
+ unless self.configuration.has_key?(:log_failed_results)
199
+ self.configuration[:log_failed_results] = BentoSearch.defaults.log_failed_results
200
+ end
156
201
 
157
202
  # check for required keys -- have to be present, and not nil
158
203
  if self.class.required_configuration
@@ -241,8 +286,11 @@ module BentoSearch
241
286
 
242
287
  fill_in_search_metadata_for(failed, arguments)
243
288
 
244
-
245
289
  return failed
290
+ ensure
291
+ if results && configuration.log_failed_results && results.failed?
292
+ Rails.logger.error("Error fetching results for `#{configuration.id || self}`: #{arguments}: #{results.error}")
293
+ end
246
294
  end
247
295
 
248
296
  # SOME of the elements of Results to be returned that SearchEngine implementation
@@ -392,9 +440,26 @@ module BentoSearch
392
440
  [:query, :search_field, :semantic_search_field, :sort, :page, :start, :per_page]
393
441
  end
394
442
 
443
+ # Cover method for consistent api with Results
444
+ def display_configuration
445
+ configuration.for_display
446
+ end
447
+
448
+ # Cover method for consistent api with Results
449
+ def engine_id
450
+ configuration.id
451
+ end
452
+
395
453
 
396
454
  protected
397
455
 
456
+ # For legacy reasons old name auto_rescue_exceptions is here, some
457
+ # sub-classes may override it. Now preferred to use auto_rescued_exceptions
458
+ # setter instead.
459
+ def auto_rescue_exceptions
460
+ self.auto_rescued_exceptions
461
+ end
462
+
398
463
  # get value of an arg that can be supplied in search args OR config,
399
464
  # with search_args over-ridding config. Also normalizes value to_s
400
465
  # (for symbols/strings).
@@ -409,40 +474,5 @@ module BentoSearch
409
474
 
410
475
  return value
411
476
  end
412
-
413
- # What exceptions should our #search wrapper rescue and turn
414
- # into failed results instead of fatal errors?
415
- #
416
- # Can't rescue everything, or we eat VCR/webmock errors, and lots
417
- # of other errors we don't want to eat either, making
418
- # development really confusing. Perhaps could set this
419
- # to be something diff in production and dev?
420
- #
421
- # This default list is probably useful already, but individual
422
- # engines can override if it's convenient for their own error
423
- # handling.
424
- def auto_rescue_exceptions
425
- [BentoSearch::RubyTimeoutClass, HTTPClient::TimeoutError,
426
- HTTPClient::ConfigurationError, HTTPClient::BadResponseError,
427
- MultiJson::DecodeError, Nokogiri::SyntaxError, SocketError]
428
- end
429
-
430
-
431
- module ClassMethods
432
-
433
- # Over-ride returning a hash or Confstruct with
434
- # any configuration values you want by default.
435
- # actual user-specified config values will be deep-merged
436
- # into the defaults.
437
- def default_configuration
438
- end
439
-
440
- # Over-ride returning an array of symbols for required
441
- # configuration keys.
442
- def required_configuration
443
- end
444
-
445
- end
446
-
447
477
  end
448
478
  end
@@ -16,7 +16,7 @@ module BentoSearch
16
16
  include ActionView::Helpers::SanitizeHelper
17
17
 
18
18
 
19
- class_attribute :http_timeout
19
+ class_attribute :http_timeout, instance_writer: false
20
20
  self.http_timeout = 10
21
21
 
22
22
  extend HTTPClientPatch::IncludeClient
@@ -25,45 +25,40 @@ require 'http_client_patch/include_client'
25
25
  # == Linking
26
26
  #
27
27
  # The link to record in EBSCO interface delivered as "PLink" will be listed
28
- # as record main link.
28
+ # as record main link. If the record includes a node at `./FullText/Links/Link/Type[text() = 'pdflink']`,
29
+ # the `plink` will be marked as fulltext. (There may be other cases of fulltext, but
30
+ # this seems to be all EDS API tells us.)
29
31
  #
30
32
  # Any links listed under <CustomLinks> will be listed as other_links, using
31
- # configured name provided by EBSCO for CustomLink.
33
+ # configured name provided by EBSCO for CustomLink. Same with links listed
34
+ # as `<Item><Group>URL</Group>`.
32
35
  #
33
- # EDS Response does not have sufficient metadata for us to generate an OpenURL
34
- # ourselves. However, in our testing, the first/only CustomLink was an
35
- # an OpenURL. If configuration.assume_first_custom_link_openurl is
36
- # true (as is default), it will be used to create an OpenURL link. However, in
37
- # our testing, many records don't have this at all. **Note** You want
38
- # to configure your profile so OpenURLs are ALWAYS included for all records, not
39
- # just records with no EBSCO fulltext, to ensure bento_search can get the
40
- # openurl. http://support.ebsco.com/knowledge_base/detail.php?id=1111 (May
41
- # have to ask EBSCO support for help, it's confusing!).
36
+ # As always, you can customize links and other_links with Item Decorators.
42
37
  #
43
- # TODO: May have to add configuration code to pull the OpenURL link out by
44
- # it's configured name or label, not assume first one is it.
38
+ # == Custom Data
45
39
  #
46
- # As always, you can customize links and other_links with Item Decorators.
40
+ # If present, there is a custom_data[:holdings] value, an array of
41
+ # BentoSearch::EdsEngine::Holding objects, each of which has a #location
42
+ # and #call_number. There will usually (always?) be at most 1 item in the
43
+ # array, as far as we can tell from how EDS works.
47
44
  #
48
45
  # == Technical Notes and Difficulties
49
46
  #
50
- # This API is enormously difficult to work with. Also the response is very odd
51
- # to deal with and missing some key elements. We quite possibly got something
52
- # wrong or non-optimal in this implementation, but we did our best.
47
+ # This API is pretty difficult to work with, and the response has many
48
+ # idiosyncratic undocumented parts. We think we are currently
49
+ # getting fairly complete citation detail out, at least for articles, but may be missing
50
+ # some on weird edge cases, books/book chapters, etc)
53
51
  #
54
52
  # Auth issues may make this slow -- you need to spend a (not too speedy) HTTP
55
53
  # request making a session for every new end-user -- as we have no way to keep
56
54
  # track of end-users, we do it on every request in this implementation.
57
55
  #
58
- # Responses don't include much metadata -- we don't actually have journal title,
59
- # volume, issue, etc. We probably _could_ parse it out of the OpenURL that's
60
- # there depending on your profile configuration, but we're not right now.
61
- # Instead we're using the chunk of user-displayable citation/reference it does
62
- # give us (which is very difficult to parse into something usable already),
63
- # and a custom Decorator to display that instead of normalized citation
64
- # made from individual elements.
65
- #
66
- # EBSCO says they plan to improve some of these issues in a September 2012 release.
56
+ # An older version of the EDS API returned much less info, and we tried
57
+ # to scrape out what we could anyway. Much of this logic is still there
58
+ # as backup. In the older version, not enough info was there for an
59
+ # OpenURL link, `configuration.assume_first_custom_link_openurl` was true
60
+ # by default, and used to create an OpenURL link. It now defaults to false,
61
+ # and should no longer be neccessary.
67
62
  #
68
63
  # Title and abstract data seems to be HTML with tags and character entities and
69
64
  # escaped special chars. We're trusting it and passing it on as html_safe.
@@ -91,7 +86,7 @@ require 'http_client_patch/include_client'
91
86
  #
92
87
  # == EDS docs:
93
88
  #
94
- # * Console App to demo requests: https://eds-api.ebscohost.com/Console
89
+ # * Console App to demo requests: <
95
90
  # * EDS Wiki: http://edswiki.ebscohost.com/EDS_API_Documentation
96
91
  # * You'll need to request an account to the EDS wiki, see: http://support.ebsco.com/knowledge_base/detail.php?id=5990
97
92
  #
@@ -101,11 +96,20 @@ class BentoSearch::EdsEngine
101
96
 
102
97
  # Can't change http timeout in config, because we keep an http
103
98
  # client at class-wide level, and config is not class-wide.
104
- # Change this 'constant' if you want to change it, I guess.
99
+ # We used to keep in constant, but that's not good for custom setting,
100
+ # we now use class_attribute, but in a weird backwards-compat way for
101
+ # anyone who might be using the constant.
105
102
  HttpTimeout = 4
103
+
104
+ class_attribute :http_timeout, instance_writer: false
105
+ def self.http_timeout
106
+ defined?(@http_timeout) ? @http_timeout : HttpTimeout
107
+ end
108
+
109
+
106
110
  extend HTTPClientPatch::IncludeClient
107
111
  include_http_client do |client|
108
- client.connect_timeout = client.send_timeout = client.receive_timeout = HttpTimeout
112
+ client.connect_timeout = client.send_timeout = client.receive_timeout = http_timeout
109
113
  end
110
114
 
111
115
  AuthHeader = "x-authenticationToken"
@@ -131,12 +135,7 @@ class BentoSearch::EdsEngine
131
135
  # an object that includes some Rails helper modules for
132
136
  # text handling.
133
137
  def helper
134
- unless @helper ||= nil
135
- @helper = Object.new
136
- @helper.extend ActionView::Helpers::TextHelper # for truncate
137
- @helper.extend ActionView::Helpers::OutputSafetyHelper # for safe_join
138
- end
139
- return @helper
138
+ @helper ||= Helper.new
140
139
  end
141
140
 
142
141
 
@@ -207,8 +206,6 @@ class BentoSearch::EdsEngine
207
206
 
208
207
  url = construct_search_url(args)
209
208
 
210
-
211
-
212
209
  response = get_with_auth(url, session_token)
213
210
 
214
211
  results = BentoSearch::Results.new
@@ -237,39 +234,96 @@ class BentoSearch::EdsEngine
237
234
 
238
235
  item.abstract = prepare_eds_payload( element_by_group(record_xml, "Ab"), true )
239
236
 
240
- # Believe it or not, the authors are encoded as an escaped
241
- # XML-ish payload, that we need to parse again and get the
242
- # actual authors out of. WTF. Thanks for handling fragments
243
- # nokogiri.
244
- author_mess = element_by_group(record_xml, "Au")
245
- # only SOMETIMES does it have XML tags, other times it's straight text.
246
- # ARGH.
247
- author_xml = Nokogiri::XML::fragment(author_mess)
248
- searchLinks = author_xml.xpath(".//searchLink")
249
- if searchLinks.size > 0
250
- author_xml.xpath(".//searchLink").each do |author_node|
251
- item.authors << BentoSearch::Author.new(:display => author_node.text)
237
+ # Much better way to get authors out of EDS response now...
238
+ author_full_names = record_xml.xpath("./RecordInfo/BibRecord/BibRelationships/HasContributorRelationships/HasContributor/PersonEntity/Name/NameFull")
239
+ author_full_names.each do |name_full_xml|
240
+ if name_full_xml && (text = name_full_xml.text).present?
241
+ item.authors << BentoSearch::Author.new(:display => text)
252
242
  end
253
- else
254
- item.authors << BentoSearch::Author.new(:display => author_xml.text)
255
243
  end
256
244
 
245
+ if item.authors.blank?
246
+ # Believe it or not, the authors are encoded as an escaped
247
+ # XML-ish payload, that we need to parse again and get the
248
+ # actual authors out of. WTF. Thanks for handling fragments
249
+ # nokogiri.
250
+ author_mess = element_by_group(record_xml, "Au")
251
+ # only SOMETIMES does it have XML tags, other times it's straight text.
252
+ # ARGH.
253
+ author_xml = Nokogiri::XML::fragment(author_mess)
254
+ searchLinks = author_xml.xpath(".//searchLink")
255
+ if searchLinks.size > 0
256
+ author_xml.xpath(".//searchLink").each do |author_node|
257
+ item.authors << BentoSearch::Author.new(:display => author_node.text)
258
+ end
259
+ else
260
+ item.authors << BentoSearch::Author.new(:display => author_xml.text)
261
+ end
262
+ end
257
263
 
258
264
  # PLink is main inward facing EBSCO link, put it as
259
265
  # main link.
260
266
  if direct_link = record_xml.at_xpath("./PLink")
261
- item.link = direct_link.text
267
+ item.link = direct_link.text
268
+
269
+ if record_xml.at_xpath("./FullText/Links/Link/Type[text() = 'pdflink']")
270
+ item.link_is_fulltext = true
271
+ end
262
272
  end
263
273
 
274
+
264
275
  # Other links may be found in CustomLinks, it seems like usually
265
276
  # there will be at least one, hopefully the first one is the OpenURL?
266
- record_xml.xpath("./CustomLinks/CustomLink").each do |custom_link|
277
+ #byebug if configuration.id == "articles"
278
+ record_xml.xpath("./CustomLinks/CustomLink|./FullText/CustomLinks/CustomLink").each do |custom_link|
279
+ # If it's in FullText section, give it a rel=alternate
280
+ # to indicate it's fulltext
281
+ rel = (custom_link.parent.parent.name.downcase == "fulltext") ? "alternate" : nil
282
+
267
283
  item.other_links << BentoSearch::Link.new(
268
284
  :url => custom_link.at_xpath("./Url").text,
269
- :label => custom_link.at_xpath("./Name").text
285
+ :rel => rel,
286
+ :label => custom_link.at_xpath("./Text").try(:text).presence || custom_link.at_xpath("./Name").try(:text).presence || "Link"
270
287
  )
271
288
  end
272
289
 
290
+ # More other links in 'URL' Item, in unpredictable format sometimes being
291
+ # embedded XML. Really EBSCO?
292
+ record_xml.xpath("./Items/Item[child::Group[text()='URL']]").each do |url_item|
293
+ data_element = url_item.at_xpath("./Data")
294
+ next unless data_element
295
+
296
+ # SOMETIMES the url and label are in an embedded escaped XML element...
297
+ if data_element.text.strip.start_with?("<link")
298
+ # Ugh, once unescpaed it has bare '&' in URL queries sometimes, which
299
+ # is not actually legal XML anymore, but Nokogiri::HTML parser will
300
+ # let us get away with it, but then doesn't put the actual text
301
+ # inside the 'link' item, but inside the <link> tag since it knows
302
+ # an HTML link tag has no content. Really EDS.
303
+ node = Nokogiri::HTML::fragment(data_element.text)
304
+ next unless link = node.at_xpath("./link")
305
+ next unless link["linkterm"].presence || link["linkTerm"].presence
306
+
307
+ item.other_links << BentoSearch::Link.new(
308
+ :url => link["linkterm"] || link["linkTerm"],
309
+ :label => helper.strip_tags(data_element.text).presence || "Link"
310
+ )
311
+ else
312
+ # it's just a straight URL in data element, with only label we've
313
+ # got in <label> element.
314
+ next unless data_element.text.strip.present?
315
+
316
+ label_element = url_item.at_xpath("./Label")
317
+ label = label_element.try(:text).try { |s| helper.strip_tags(s) }.presence || "Link"
318
+
319
+ item.other_links << BentoSearch::Link.new(
320
+ :url => data_element.text,
321
+ :label => label
322
+ )
323
+ end
324
+ end
325
+
326
+
273
327
  if (configuration.assume_first_custom_link_openurl &&
274
328
  (first = record_xml.xpath "./CustomLinks/CustomLink" ) &&
275
329
  (node = first.at_xpath "./Url" )
@@ -286,7 +340,58 @@ class BentoSearch::EdsEngine
286
340
  # Can't find a list of possible PubTypes to see what's there to try
287
341
  # and map to our internal controlled vocab. oh wells.
288
342
 
343
+ item.doi = at_xpath_text record_xml, "./RecordInfo/BibRecord/BibEntity/Identifiers/Identifier[child::Type[text()='doi']]/Value"
289
344
 
345
+ item.start_page = at_xpath_text(record_xml, "./RecordInfo/BibRecord/BibEntity/PhysicalDescription/Pagination/StartPage")
346
+ total_pages = at_xpath_text(record_xml, "./RecordInfo/BibRecord/BibEntity/PhysicalDescription/Pagination/PageCount")
347
+ if total_pages.to_i != 0 && item.start_page.to_i != 0
348
+ item.end_page = (item.start_page.to_i + total_pages.to_i - 1).to_s
349
+ end
350
+
351
+
352
+ # location/call number, probably only for catalog results. We only see one
353
+ # in actual data, but XML structure allows multiple, so we'll store it as multiple.
354
+ copy_informations = record_xml.xpath("./Holdings/Holding/HoldingSimple/CopyInformationList/CopyInformation")
355
+ if copy_informations.present?
356
+ item.custom_data[:holdings] =
357
+ copy_informations.collect do |copy_information|
358
+ Holding.new(:location => at_xpath_text(copy_information, "Sublocation"),
359
+ :call_number => at_xpath_text(copy_information, "ShelfLocator"))
360
+ end
361
+ end
362
+
363
+
364
+
365
+ # For some EDS results, we have actual citation information,
366
+ # for some we don't.
367
+ container_xml = record_xml.at_xpath("./RecordInfo/BibRecord/BibRelationships/IsPartOfRelationships/IsPartOf/BibEntity")
368
+ if container_xml
369
+ item.source_title = at_xpath_text(container_xml, "./Titles/Title[child::Type[text()='main']]/TitleFull")
370
+ item.volume = at_xpath_text(container_xml, "./Numbering/Number[child::Type[text()='volume']]/Value")
371
+ item.issue = at_xpath_text(container_xml, "./Numbering/Number[child::Type[text()='issue']]/Value")
372
+
373
+ item.issn = at_xpath_text(container_xml, "./Identifiers/Identifier[child::Type[text()='issn-print']]/Value")
374
+
375
+ if date_xml = container_xml.at_xpath("./Dates/Date")
376
+ item.year = at_xpath_text(date_xml, "./Y")
377
+
378
+ date = at_xpath_text(date_xml, "./D").to_i
379
+ month = at_xpath_text(date_xml, "./M").to_i
380
+ if item.year.to_i != 0 && date != 0 && month != 0
381
+ item.publication_date = Date.new(item.year.to_i, month, date)
382
+ end
383
+ end
384
+ end
385
+
386
+ # EDS annoyingly repeats a monographic title in the same place
387
+ # we look for source/container title, take it away.
388
+ if item.start_page.blank? && helper.strip_tags(item.title) == item.source_title
389
+ item.source_title = nil
390
+ end
391
+
392
+ # Legacy EDS citation extracting. We don't really need this any more
393
+ # because EDS api has improved, but leave it in in case anyone using
394
+ # older versions needed it.
290
395
 
291
396
  # We have a single blob of human-readable citation, that's also
292
397
  # littered with XML-ish tags we need to deal with. We'll save
@@ -306,7 +411,6 @@ class BentoSearch::EdsEngine
306
411
  item.custom_data["citation_blob"] = element_by_group(record_xml, "SrcInfo")
307
412
  end
308
413
 
309
-
310
414
  item.extend CitationMessDecorator
311
415
 
312
416
  results << item
@@ -509,7 +613,7 @@ class BentoSearch::EdsEngine
509
613
  :base_url => "http://eds-api.ebscohost.com/edsapi/rest/",
510
614
  :highlighting => true,
511
615
  :truncate_highlighted => 280,
512
- :assume_first_custom_link_openurl => true,
616
+ :assume_first_custom_link_openurl => false,
513
617
  :search_mode => 'all' # any | bool | all | smart ; http://support.epnet.com/knowledge_base/detail.php?topic=996&id=1288&page=1
514
618
  }
515
619
  end
@@ -559,4 +663,20 @@ class BentoSearch::EdsEngine
559
663
  end
560
664
  end
561
665
 
666
+ # a class that includes some Rails helper modules for
667
+ # text handling.
668
+ class Helper
669
+ include ActionView::Helpers::SanitizeHelper # for strip_tags
670
+ include ActionView::Helpers::TextHelper # for truncate
671
+ include ActionView::Helpers::OutputSafetyHelper # for safe_join
672
+ end
673
+
674
+ class Holding
675
+ attr_reader :location, :call_number
676
+ def initialize(args)
677
+ @location = args[:location]
678
+ @call_number = args[:call_number]
679
+ end
680
+ end
681
+
562
682
  end