bento_search 1.7.0.beta.1 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +16 -7
- data/app/assets/javascripts/bento_search/ajax_load.js +37 -21
- data/app/helpers/bento_search_helper.rb +72 -74
- data/app/models/bento_search/concurrent_searcher.rb +136 -0
- data/app/models/bento_search/multi_searcher.rb +36 -35
- data/app/models/bento_search/search_engine.rb +70 -40
- data/app/search_engines/bento_search/doaj_articles_engine.rb +1 -1
- data/app/search_engines/bento_search/eds_engine.rb +176 -56
- data/app/views/bento_search/_ajax_loading.html.erb +17 -0
- data/app/views/bento_search/_link.html.erb +3 -3
- data/lib/bento_search.rb +12 -0
- data/lib/bento_search/engine.rb +2 -0
- data/lib/bento_search/version.rb +1 -1
- data/test/search_engines/eds_engine_test.rb +91 -59
- data/test/search_engines/search_engine_base_test.rb +11 -0
- data/test/search_engines/search_engine_test.rb +12 -0
- data/test/unit/concurrent_searcher_test.rb +75 -0
- data/test/unit/multi_searcher_test.rb +16 -19
- data/test/vcr_cassettes/eds/FullText_CustomLink.yml +198 -0
- data/test/vcr_cassettes/eds/basic_search_smoke_test.yml +1036 -1729
- data/test/vcr_cassettes/eds/catalog_ebook_query.yml +218 -0
- data/test/vcr_cassettes/eds/catalog_query.yml +255 -0
- data/test/vcr_cassettes/eds/get_auth_token.yml +11 -44
- data/test/vcr_cassettes/eds/get_auth_token_failure.yml +10 -7
- data/test/vcr_cassettes/eds/get_with_auth.yml +144 -153
- data/test/vcr_cassettes/eds/get_with_auth_recovers_from_bad_auth.yml +167 -223
- metadata +15 -5
@@ -1,19 +1,19 @@
|
|
1
|
-
begin
|
1
|
+
begin
|
2
2
|
require 'celluloid'
|
3
3
|
|
4
4
|
# Based on Celluloid, concurrently runs multiple searches in
|
5
5
|
# seperate threads. You must include 'celluloid' gem dependency
|
6
6
|
# into your local app to use this class. Requires celluloid 0.12.0
|
7
|
-
# or above (for new preferred async syntax).
|
7
|
+
# or above (for new preferred async syntax).
|
8
8
|
#
|
9
9
|
# Warning, if you don't have celluloid in your app, this class simply
|
10
10
|
# won't load. TODO: We should put this file in a different directory
|
11
11
|
# so it's never auto-loaded, and requires a "require 'bento_search/multi_searcher'",
|
12
12
|
# such that it will raise without celluloid only then, and we don't need this
|
13
|
-
# rescue LoadError stuff.
|
13
|
+
# rescue LoadError stuff.
|
14
14
|
#
|
15
15
|
# I am not an expert at use of Celluloid, it's possible there's a better
|
16
|
-
# way to do this all, but seems to work.
|
16
|
+
# way to do this all, but seems to work.
|
17
17
|
#
|
18
18
|
# ## Usage
|
19
19
|
#
|
@@ -26,14 +26,14 @@ begin
|
|
26
26
|
# retrieve results, blocking until each is completed:
|
27
27
|
# searcher.results
|
28
28
|
#
|
29
|
-
# returns a Hash keyed by engine id, values BentoSearch::Results objects.
|
29
|
+
# returns a Hash keyed by engine id, values BentoSearch::Results objects.
|
30
30
|
#
|
31
31
|
# Can only call #results once per #start, after that it'll return empty hash.
|
32
|
-
# (should we make it actually raise instead?). .
|
33
|
-
#
|
32
|
+
# (should we make it actually raise instead?). .
|
33
|
+
#
|
34
34
|
# important to call results at some point after calling start, in order
|
35
35
|
# to make sure Celluloid::Actors are properly terminated to avoid
|
36
|
-
# resource leakage. May want to do it in an ensure block.
|
36
|
+
# resource leakage. May want to do it in an ensure block.
|
37
37
|
#
|
38
38
|
# Note that celluloid uses multi-threading in such a way that you
|
39
39
|
# may have to set config.cache_classes=true even in development
|
@@ -41,87 +41,88 @@ begin
|
|
41
41
|
#
|
42
42
|
#
|
43
43
|
# TODO: have a method that returns Futures instead of only supplying the blocking
|
44
|
-
# results method? Several tricks, including making sure to properly terminate actors.
|
44
|
+
# results method? Several tricks, including making sure to properly terminate actors.
|
45
45
|
class BentoSearch::MultiSearcher
|
46
|
-
|
46
|
+
|
47
47
|
def initialize(*engine_ids)
|
48
|
+
ActiveSupport::Deprecation.warn('BentoSearch::MultiSearcher is deprecated and will be removed in bento_search 2.0. Please use BentoSearch::ConcurrentSearcher instead.', caller.slice(1..-1))
|
49
|
+
|
48
50
|
@engines = []
|
49
51
|
@actors = []
|
50
52
|
engine_ids.each do |id|
|
51
53
|
add_engine( BentoSearch.get_engine id )
|
52
54
|
end
|
53
55
|
end
|
54
|
-
|
56
|
+
|
55
57
|
# Adds an instantiated engine directly, rather than by id from global
|
56
|
-
# registry.
|
57
|
-
def add_engine(engine)
|
58
|
+
# registry.
|
59
|
+
def add_engine(engine)
|
58
60
|
@engines << engine
|
59
61
|
end
|
60
|
-
|
61
|
-
# Starts all searches, returns self so you can chain method calls if you like.
|
62
|
+
|
63
|
+
# Starts all searches, returns self so you can chain method calls if you like.
|
62
64
|
def search(*search_args)
|
63
65
|
@engines.each do |engine|
|
64
66
|
a = Actor.new(engine)
|
65
67
|
@actors << a
|
66
68
|
a.async.start *search_args
|
67
|
-
end
|
69
|
+
end
|
68
70
|
return self
|
69
71
|
end
|
70
72
|
alias_method :start, :search # backwards compat
|
71
|
-
|
73
|
+
|
72
74
|
# Call after #start. Blocks until each included engine is finished
|
73
75
|
# then returns a Hash keyed by engine registered id, value is a
|
74
|
-
# BentoSearch::Results object.
|
76
|
+
# BentoSearch::Results object.
|
75
77
|
#
|
76
78
|
# Can only call _once_ per invocation of #start, after that it'll return
|
77
|
-
# an empty hash.
|
79
|
+
# an empty hash.
|
78
80
|
def results
|
79
81
|
results = {}
|
80
|
-
|
82
|
+
|
81
83
|
# we use #delete_if to get an iterator that deletes
|
82
|
-
# each item after iteration.
|
84
|
+
# each item after iteration.
|
83
85
|
@actors.delete_if do |actor|
|
84
86
|
result_key = (actor.engine.configuration.id || actor.engine.class.name)
|
85
87
|
results[result_key] = actor.results
|
86
88
|
actor.terminate
|
87
|
-
|
89
|
+
|
88
90
|
true
|
89
91
|
end
|
90
|
-
|
92
|
+
|
91
93
|
return results
|
92
94
|
end
|
93
|
-
|
94
|
-
|
95
|
+
|
96
|
+
|
95
97
|
class Actor
|
96
98
|
include Celluloid
|
97
|
-
|
99
|
+
|
98
100
|
attr_accessor :engine
|
99
|
-
|
101
|
+
|
100
102
|
def initialize(a_engine)
|
101
103
|
self.engine = a_engine
|
102
104
|
end
|
103
|
-
|
104
|
-
# call as .async.start, to invoke async.
|
105
|
+
|
106
|
+
# call as .async.start, to invoke async.
|
105
107
|
def start(*search_args)
|
106
108
|
begin
|
107
109
|
@results = self.engine.search(*search_args)
|
108
110
|
rescue StandardError => e
|
109
111
|
Rails.logger.error("\nBentoSearch:MultiSearcher caught exception: #{e}\n#{e.backtrace.join(" \n")}")
|
110
|
-
# Make a fake results with caught exception.
|
112
|
+
# Make a fake results with caught exception.
|
111
113
|
@results = BentoSearch::Results.new
|
112
114
|
self.engine.fill_in_search_metadata_for(@results, self.engine.normalized_search_arguments(search_args))
|
113
|
-
|
115
|
+
|
114
116
|
@results.error ||= {}
|
115
|
-
@results.error["exception"] = e
|
117
|
+
@results.error["exception"] = e
|
116
118
|
end
|
117
119
|
end
|
118
|
-
|
120
|
+
|
119
121
|
def results
|
120
122
|
@results
|
121
123
|
end
|
122
|
-
|
124
|
+
|
123
125
|
end
|
124
|
-
|
125
126
|
end
|
126
127
|
|
127
128
|
rescue LoadError
|
@@ -17,7 +17,6 @@ module BentoSearch
|
|
17
17
|
# remote service. Not yet universally used.
|
18
18
|
class ::BentoSearch::FetchError < ::BentoSearch::Error ; end
|
19
19
|
|
20
|
-
|
21
20
|
# Module mix-in for bento_search search engines.
|
22
21
|
#
|
23
22
|
# ==Using a SearchEngine
|
@@ -65,6 +64,11 @@ module BentoSearch
|
|
65
64
|
# string name, actual class object not supported (to make it easier
|
66
65
|
# to serialize and transport configuration).
|
67
66
|
#
|
67
|
+
# [log_failed_results]
|
68
|
+
# Default false, if true all failed results are logged to
|
69
|
+
# `Rails.log.error`. Can set global default with
|
70
|
+
# `BentoSearch.defaults.log_failed_results = true`
|
71
|
+
#
|
68
72
|
# == Implementing a SearchEngine
|
69
73
|
#
|
70
74
|
# Implmeneting a new SearchEngine is relatively straightforward -- you are
|
@@ -119,15 +123,53 @@ module BentoSearch
|
|
119
123
|
module SearchEngine
|
120
124
|
DefaultPerPage = 10
|
121
125
|
|
122
|
-
|
123
|
-
|
124
|
-
|
125
126
|
extend ActiveSupport::Concern
|
126
127
|
|
127
128
|
include Capabilities
|
128
129
|
|
130
|
+
mattr_accessor :default_auto_rescued_exceptions
|
131
|
+
self.default_auto_rescued_exceptions = [
|
132
|
+
BentoSearch::RubyTimeoutClass,
|
133
|
+
HTTPClient::TimeoutError,
|
134
|
+
HTTPClient::ConfigurationError,
|
135
|
+
HTTPClient::BadResponseError,
|
136
|
+
MultiJson::DecodeError,
|
137
|
+
Nokogiri::SyntaxError,
|
138
|
+
SocketError
|
139
|
+
].freeze
|
140
|
+
|
129
141
|
included do
|
130
142
|
attr_accessor :configuration
|
143
|
+
|
144
|
+
# What exceptions should our #search wrapper rescue and turn
|
145
|
+
# into failed results instead of fatal errors?
|
146
|
+
#
|
147
|
+
# Can't rescue everything, or we eat VCR/webmock errors, and lots
|
148
|
+
# of other errors we don't want to eat either, making
|
149
|
+
# development really confusing. Perhaps could set this
|
150
|
+
# to be something diff in production and dev?
|
151
|
+
#
|
152
|
+
# This default list is probably useful already, but individual
|
153
|
+
# engines can override if it's convenient for their own error
|
154
|
+
# handling.
|
155
|
+
#
|
156
|
+
# Override by just using `auto_rescued_exceptions=` on class _or_ method,
|
157
|
+
# although some legacy code may override `def auto_rescue_exceptions` (note
|
158
|
+
# old `rescue` vs new `rescued`) which should work too.
|
159
|
+
self.class_attribute :auto_rescued_exceptions
|
160
|
+
self.auto_rescued_exceptions = ::BentoSearch::SearchEngine.default_auto_rescued_exceptions
|
161
|
+
|
162
|
+
# Over-ride returning a hash or Confstruct with
|
163
|
+
# any configuration values you want by default.
|
164
|
+
# actual user-specified config values will be deep-merged
|
165
|
+
# into the defaults.
|
166
|
+
def self.default_configuration
|
167
|
+
end
|
168
|
+
|
169
|
+
# Over-ride returning an array of symbols for required
|
170
|
+
# configuration keys.
|
171
|
+
def self.required_configuration
|
172
|
+
end
|
131
173
|
end
|
132
174
|
|
133
175
|
# If specific SearchEngine calls initialize, you want to call super
|
@@ -153,6 +195,9 @@ module BentoSearch
|
|
153
195
|
|
154
196
|
# global defaults?
|
155
197
|
self.configuration[:for_display] ||= {}
|
198
|
+
unless self.configuration.has_key?(:log_failed_results)
|
199
|
+
self.configuration[:log_failed_results] = BentoSearch.defaults.log_failed_results
|
200
|
+
end
|
156
201
|
|
157
202
|
# check for required keys -- have to be present, and not nil
|
158
203
|
if self.class.required_configuration
|
@@ -241,8 +286,11 @@ module BentoSearch
|
|
241
286
|
|
242
287
|
fill_in_search_metadata_for(failed, arguments)
|
243
288
|
|
244
|
-
|
245
289
|
return failed
|
290
|
+
ensure
|
291
|
+
if results && configuration.log_failed_results && results.failed?
|
292
|
+
Rails.logger.error("Error fetching results for `#{configuration.id || self}`: #{arguments}: #{results.error}")
|
293
|
+
end
|
246
294
|
end
|
247
295
|
|
248
296
|
# SOME of the elements of Results to be returned that SearchEngine implementation
|
@@ -392,9 +440,26 @@ module BentoSearch
|
|
392
440
|
[:query, :search_field, :semantic_search_field, :sort, :page, :start, :per_page]
|
393
441
|
end
|
394
442
|
|
443
|
+
# Cover method for consistent api with Results
|
444
|
+
def display_configuration
|
445
|
+
configuration.for_display
|
446
|
+
end
|
447
|
+
|
448
|
+
# Cover method for consistent api with Results
|
449
|
+
def engine_id
|
450
|
+
configuration.id
|
451
|
+
end
|
452
|
+
|
395
453
|
|
396
454
|
protected
|
397
455
|
|
456
|
+
# For legacy reasons old name auto_rescue_exceptions is here, some
|
457
|
+
# sub-classes may override it. Now preferred to use auto_rescued_exceptions
|
458
|
+
# setter instead.
|
459
|
+
def auto_rescue_exceptions
|
460
|
+
self.auto_rescued_exceptions
|
461
|
+
end
|
462
|
+
|
398
463
|
# get value of an arg that can be supplied in search args OR config,
|
399
464
|
# with search_args over-ridding config. Also normalizes value to_s
|
400
465
|
# (for symbols/strings).
|
@@ -409,40 +474,5 @@ module BentoSearch
|
|
409
474
|
|
410
475
|
return value
|
411
476
|
end
|
412
|
-
|
413
|
-
# What exceptions should our #search wrapper rescue and turn
|
414
|
-
# into failed results instead of fatal errors?
|
415
|
-
#
|
416
|
-
# Can't rescue everything, or we eat VCR/webmock errors, and lots
|
417
|
-
# of other errors we don't want to eat either, making
|
418
|
-
# development really confusing. Perhaps could set this
|
419
|
-
# to be something diff in production and dev?
|
420
|
-
#
|
421
|
-
# This default list is probably useful already, but individual
|
422
|
-
# engines can override if it's convenient for their own error
|
423
|
-
# handling.
|
424
|
-
def auto_rescue_exceptions
|
425
|
-
[BentoSearch::RubyTimeoutClass, HTTPClient::TimeoutError,
|
426
|
-
HTTPClient::ConfigurationError, HTTPClient::BadResponseError,
|
427
|
-
MultiJson::DecodeError, Nokogiri::SyntaxError, SocketError]
|
428
|
-
end
|
429
|
-
|
430
|
-
|
431
|
-
module ClassMethods
|
432
|
-
|
433
|
-
# Over-ride returning a hash or Confstruct with
|
434
|
-
# any configuration values you want by default.
|
435
|
-
# actual user-specified config values will be deep-merged
|
436
|
-
# into the defaults.
|
437
|
-
def default_configuration
|
438
|
-
end
|
439
|
-
|
440
|
-
# Over-ride returning an array of symbols for required
|
441
|
-
# configuration keys.
|
442
|
-
def required_configuration
|
443
|
-
end
|
444
|
-
|
445
|
-
end
|
446
|
-
|
447
477
|
end
|
448
478
|
end
|
@@ -25,45 +25,40 @@ require 'http_client_patch/include_client'
|
|
25
25
|
# == Linking
|
26
26
|
#
|
27
27
|
# The link to record in EBSCO interface delivered as "PLink" will be listed
|
28
|
-
# as record main link.
|
28
|
+
# as record main link. If the record includes a node at `./FullText/Links/Link/Type[text() = 'pdflink']`,
|
29
|
+
# the `plink` will be marked as fulltext. (There may be other cases of fulltext, but
|
30
|
+
# this seems to be all EDS API tells us.)
|
29
31
|
#
|
30
32
|
# Any links listed under <CustomLinks> will be listed as other_links, using
|
31
|
-
# configured name provided by EBSCO for CustomLink.
|
33
|
+
# configured name provided by EBSCO for CustomLink. Same with links listed
|
34
|
+
# as `<Item><Group>URL</Group>`.
|
32
35
|
#
|
33
|
-
#
|
34
|
-
# ourselves. However, in our testing, the first/only CustomLink was an
|
35
|
-
# an OpenURL. If configuration.assume_first_custom_link_openurl is
|
36
|
-
# true (as is default), it will be used to create an OpenURL link. However, in
|
37
|
-
# our testing, many records don't have this at all. **Note** You want
|
38
|
-
# to configure your profile so OpenURLs are ALWAYS included for all records, not
|
39
|
-
# just records with no EBSCO fulltext, to ensure bento_search can get the
|
40
|
-
# openurl. http://support.ebsco.com/knowledge_base/detail.php?id=1111 (May
|
41
|
-
# have to ask EBSCO support for help, it's confusing!).
|
36
|
+
# As always, you can customize links and other_links with Item Decorators.
|
42
37
|
#
|
43
|
-
#
|
44
|
-
# it's configured name or label, not assume first one is it.
|
38
|
+
# == Custom Data
|
45
39
|
#
|
46
|
-
#
|
40
|
+
# If present, there is a custom_data[:holdings] value, an array of
|
41
|
+
# BentoSearch::EdsEngine::Holding objects, each of which has a #location
|
42
|
+
# and #call_number. There will usually (always?) be at most 1 item in the
|
43
|
+
# array, as far as we can tell from how EDS works.
|
47
44
|
#
|
48
45
|
# == Technical Notes and Difficulties
|
49
46
|
#
|
50
|
-
# This API is
|
51
|
-
#
|
52
|
-
#
|
47
|
+
# This API is pretty difficult to work with, and the response has many
|
48
|
+
# idiosyncratic undocumented parts. We think we are currently
|
49
|
+
# getting fairly complete citation detail out, at least for articles, but may be missing
|
50
|
+
# some on weird edge cases, books/book chapters, etc)
|
53
51
|
#
|
54
52
|
# Auth issues may make this slow -- you need to spend a (not too speedy) HTTP
|
55
53
|
# request making a session for every new end-user -- as we have no way to keep
|
56
54
|
# track of end-users, we do it on every request in this implementation.
|
57
55
|
#
|
58
|
-
#
|
59
|
-
#
|
60
|
-
#
|
61
|
-
#
|
62
|
-
#
|
63
|
-
# and
|
64
|
-
# made from individual elements.
|
65
|
-
#
|
66
|
-
# EBSCO says they plan to improve some of these issues in a September 2012 release.
|
56
|
+
# An older version of the EDS API returned much less info, and we tried
|
57
|
+
# to scrape out what we could anyway. Much of this logic is still there
|
58
|
+
# as backup. In the older version, not enough info was there for an
|
59
|
+
# OpenURL link, `configuration.assume_first_custom_link_openurl` was true
|
60
|
+
# by default, and used to create an OpenURL link. It now defaults to false,
|
61
|
+
# and should no longer be neccessary.
|
67
62
|
#
|
68
63
|
# Title and abstract data seems to be HTML with tags and character entities and
|
69
64
|
# escaped special chars. We're trusting it and passing it on as html_safe.
|
@@ -91,7 +86,7 @@ require 'http_client_patch/include_client'
|
|
91
86
|
#
|
92
87
|
# == EDS docs:
|
93
88
|
#
|
94
|
-
# * Console App to demo requests:
|
89
|
+
# * Console App to demo requests: <
|
95
90
|
# * EDS Wiki: http://edswiki.ebscohost.com/EDS_API_Documentation
|
96
91
|
# * You'll need to request an account to the EDS wiki, see: http://support.ebsco.com/knowledge_base/detail.php?id=5990
|
97
92
|
#
|
@@ -101,11 +96,20 @@ class BentoSearch::EdsEngine
|
|
101
96
|
|
102
97
|
# Can't change http timeout in config, because we keep an http
|
103
98
|
# client at class-wide level, and config is not class-wide.
|
104
|
-
#
|
99
|
+
# We used to keep in constant, but that's not good for custom setting,
|
100
|
+
# we now use class_attribute, but in a weird backwards-compat way for
|
101
|
+
# anyone who might be using the constant.
|
105
102
|
HttpTimeout = 4
|
103
|
+
|
104
|
+
class_attribute :http_timeout, instance_writer: false
|
105
|
+
def self.http_timeout
|
106
|
+
defined?(@http_timeout) ? @http_timeout : HttpTimeout
|
107
|
+
end
|
108
|
+
|
109
|
+
|
106
110
|
extend HTTPClientPatch::IncludeClient
|
107
111
|
include_http_client do |client|
|
108
|
-
client.connect_timeout = client.send_timeout = client.receive_timeout =
|
112
|
+
client.connect_timeout = client.send_timeout = client.receive_timeout = http_timeout
|
109
113
|
end
|
110
114
|
|
111
115
|
AuthHeader = "x-authenticationToken"
|
@@ -131,12 +135,7 @@ class BentoSearch::EdsEngine
|
|
131
135
|
# an object that includes some Rails helper modules for
|
132
136
|
# text handling.
|
133
137
|
def helper
|
134
|
-
|
135
|
-
@helper = Object.new
|
136
|
-
@helper.extend ActionView::Helpers::TextHelper # for truncate
|
137
|
-
@helper.extend ActionView::Helpers::OutputSafetyHelper # for safe_join
|
138
|
-
end
|
139
|
-
return @helper
|
138
|
+
@helper ||= Helper.new
|
140
139
|
end
|
141
140
|
|
142
141
|
|
@@ -207,8 +206,6 @@ class BentoSearch::EdsEngine
|
|
207
206
|
|
208
207
|
url = construct_search_url(args)
|
209
208
|
|
210
|
-
|
211
|
-
|
212
209
|
response = get_with_auth(url, session_token)
|
213
210
|
|
214
211
|
results = BentoSearch::Results.new
|
@@ -237,39 +234,96 @@ class BentoSearch::EdsEngine
|
|
237
234
|
|
238
235
|
item.abstract = prepare_eds_payload( element_by_group(record_xml, "Ab"), true )
|
239
236
|
|
240
|
-
#
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
# only SOMETIMES does it have XML tags, other times it's straight text.
|
246
|
-
# ARGH.
|
247
|
-
author_xml = Nokogiri::XML::fragment(author_mess)
|
248
|
-
searchLinks = author_xml.xpath(".//searchLink")
|
249
|
-
if searchLinks.size > 0
|
250
|
-
author_xml.xpath(".//searchLink").each do |author_node|
|
251
|
-
item.authors << BentoSearch::Author.new(:display => author_node.text)
|
237
|
+
# Much better way to get authors out of EDS response now...
|
238
|
+
author_full_names = record_xml.xpath("./RecordInfo/BibRecord/BibRelationships/HasContributorRelationships/HasContributor/PersonEntity/Name/NameFull")
|
239
|
+
author_full_names.each do |name_full_xml|
|
240
|
+
if name_full_xml && (text = name_full_xml.text).present?
|
241
|
+
item.authors << BentoSearch::Author.new(:display => text)
|
252
242
|
end
|
253
|
-
else
|
254
|
-
item.authors << BentoSearch::Author.new(:display => author_xml.text)
|
255
243
|
end
|
256
244
|
|
245
|
+
if item.authors.blank?
|
246
|
+
# Believe it or not, the authors are encoded as an escaped
|
247
|
+
# XML-ish payload, that we need to parse again and get the
|
248
|
+
# actual authors out of. WTF. Thanks for handling fragments
|
249
|
+
# nokogiri.
|
250
|
+
author_mess = element_by_group(record_xml, "Au")
|
251
|
+
# only SOMETIMES does it have XML tags, other times it's straight text.
|
252
|
+
# ARGH.
|
253
|
+
author_xml = Nokogiri::XML::fragment(author_mess)
|
254
|
+
searchLinks = author_xml.xpath(".//searchLink")
|
255
|
+
if searchLinks.size > 0
|
256
|
+
author_xml.xpath(".//searchLink").each do |author_node|
|
257
|
+
item.authors << BentoSearch::Author.new(:display => author_node.text)
|
258
|
+
end
|
259
|
+
else
|
260
|
+
item.authors << BentoSearch::Author.new(:display => author_xml.text)
|
261
|
+
end
|
262
|
+
end
|
257
263
|
|
258
264
|
# PLink is main inward facing EBSCO link, put it as
|
259
265
|
# main link.
|
260
266
|
if direct_link = record_xml.at_xpath("./PLink")
|
261
|
-
|
267
|
+
item.link = direct_link.text
|
268
|
+
|
269
|
+
if record_xml.at_xpath("./FullText/Links/Link/Type[text() = 'pdflink']")
|
270
|
+
item.link_is_fulltext = true
|
271
|
+
end
|
262
272
|
end
|
263
273
|
|
274
|
+
|
264
275
|
# Other links may be found in CustomLinks, it seems like usually
|
265
276
|
# there will be at least one, hopefully the first one is the OpenURL?
|
266
|
-
|
277
|
+
#byebug if configuration.id == "articles"
|
278
|
+
record_xml.xpath("./CustomLinks/CustomLink|./FullText/CustomLinks/CustomLink").each do |custom_link|
|
279
|
+
# If it's in FullText section, give it a rel=alternate
|
280
|
+
# to indicate it's fulltext
|
281
|
+
rel = (custom_link.parent.parent.name.downcase == "fulltext") ? "alternate" : nil
|
282
|
+
|
267
283
|
item.other_links << BentoSearch::Link.new(
|
268
284
|
:url => custom_link.at_xpath("./Url").text,
|
269
|
-
:
|
285
|
+
:rel => rel,
|
286
|
+
:label => custom_link.at_xpath("./Text").try(:text).presence || custom_link.at_xpath("./Name").try(:text).presence || "Link"
|
270
287
|
)
|
271
288
|
end
|
272
289
|
|
290
|
+
# More other links in 'URL' Item, in unpredictable format sometimes being
|
291
|
+
# embedded XML. Really EBSCO?
|
292
|
+
record_xml.xpath("./Items/Item[child::Group[text()='URL']]").each do |url_item|
|
293
|
+
data_element = url_item.at_xpath("./Data")
|
294
|
+
next unless data_element
|
295
|
+
|
296
|
+
# SOMETIMES the url and label are in an embedded escaped XML element...
|
297
|
+
if data_element.text.strip.start_with?("<link")
|
298
|
+
# Ugh, once unescpaed it has bare '&' in URL queries sometimes, which
|
299
|
+
# is not actually legal XML anymore, but Nokogiri::HTML parser will
|
300
|
+
# let us get away with it, but then doesn't put the actual text
|
301
|
+
# inside the 'link' item, but inside the <link> tag since it knows
|
302
|
+
# an HTML link tag has no content. Really EDS.
|
303
|
+
node = Nokogiri::HTML::fragment(data_element.text)
|
304
|
+
next unless link = node.at_xpath("./link")
|
305
|
+
next unless link["linkterm"].presence || link["linkTerm"].presence
|
306
|
+
|
307
|
+
item.other_links << BentoSearch::Link.new(
|
308
|
+
:url => link["linkterm"] || link["linkTerm"],
|
309
|
+
:label => helper.strip_tags(data_element.text).presence || "Link"
|
310
|
+
)
|
311
|
+
else
|
312
|
+
# it's just a straight URL in data element, with only label we've
|
313
|
+
# got in <label> element.
|
314
|
+
next unless data_element.text.strip.present?
|
315
|
+
|
316
|
+
label_element = url_item.at_xpath("./Label")
|
317
|
+
label = label_element.try(:text).try { |s| helper.strip_tags(s) }.presence || "Link"
|
318
|
+
|
319
|
+
item.other_links << BentoSearch::Link.new(
|
320
|
+
:url => data_element.text,
|
321
|
+
:label => label
|
322
|
+
)
|
323
|
+
end
|
324
|
+
end
|
325
|
+
|
326
|
+
|
273
327
|
if (configuration.assume_first_custom_link_openurl &&
|
274
328
|
(first = record_xml.xpath "./CustomLinks/CustomLink" ) &&
|
275
329
|
(node = first.at_xpath "./Url" )
|
@@ -286,7 +340,58 @@ class BentoSearch::EdsEngine
|
|
286
340
|
# Can't find a list of possible PubTypes to see what's there to try
|
287
341
|
# and map to our internal controlled vocab. oh wells.
|
288
342
|
|
343
|
+
item.doi = at_xpath_text record_xml, "./RecordInfo/BibRecord/BibEntity/Identifiers/Identifier[child::Type[text()='doi']]/Value"
|
289
344
|
|
345
|
+
item.start_page = at_xpath_text(record_xml, "./RecordInfo/BibRecord/BibEntity/PhysicalDescription/Pagination/StartPage")
|
346
|
+
total_pages = at_xpath_text(record_xml, "./RecordInfo/BibRecord/BibEntity/PhysicalDescription/Pagination/PageCount")
|
347
|
+
if total_pages.to_i != 0 && item.start_page.to_i != 0
|
348
|
+
item.end_page = (item.start_page.to_i + total_pages.to_i - 1).to_s
|
349
|
+
end
|
350
|
+
|
351
|
+
|
352
|
+
# location/call number, probably only for catalog results. We only see one
|
353
|
+
# in actual data, but XML structure allows multiple, so we'll store it as multiple.
|
354
|
+
copy_informations = record_xml.xpath("./Holdings/Holding/HoldingSimple/CopyInformationList/CopyInformation")
|
355
|
+
if copy_informations.present?
|
356
|
+
item.custom_data[:holdings] =
|
357
|
+
copy_informations.collect do |copy_information|
|
358
|
+
Holding.new(:location => at_xpath_text(copy_information, "Sublocation"),
|
359
|
+
:call_number => at_xpath_text(copy_information, "ShelfLocator"))
|
360
|
+
end
|
361
|
+
end
|
362
|
+
|
363
|
+
|
364
|
+
|
365
|
+
# For some EDS results, we have actual citation information,
|
366
|
+
# for some we don't.
|
367
|
+
container_xml = record_xml.at_xpath("./RecordInfo/BibRecord/BibRelationships/IsPartOfRelationships/IsPartOf/BibEntity")
|
368
|
+
if container_xml
|
369
|
+
item.source_title = at_xpath_text(container_xml, "./Titles/Title[child::Type[text()='main']]/TitleFull")
|
370
|
+
item.volume = at_xpath_text(container_xml, "./Numbering/Number[child::Type[text()='volume']]/Value")
|
371
|
+
item.issue = at_xpath_text(container_xml, "./Numbering/Number[child::Type[text()='issue']]/Value")
|
372
|
+
|
373
|
+
item.issn = at_xpath_text(container_xml, "./Identifiers/Identifier[child::Type[text()='issn-print']]/Value")
|
374
|
+
|
375
|
+
if date_xml = container_xml.at_xpath("./Dates/Date")
|
376
|
+
item.year = at_xpath_text(date_xml, "./Y")
|
377
|
+
|
378
|
+
date = at_xpath_text(date_xml, "./D").to_i
|
379
|
+
month = at_xpath_text(date_xml, "./M").to_i
|
380
|
+
if item.year.to_i != 0 && date != 0 && month != 0
|
381
|
+
item.publication_date = Date.new(item.year.to_i, month, date)
|
382
|
+
end
|
383
|
+
end
|
384
|
+
end
|
385
|
+
|
386
|
+
# EDS annoyingly repeats a monographic title in the same place
|
387
|
+
# we look for source/container title, take it away.
|
388
|
+
if item.start_page.blank? && helper.strip_tags(item.title) == item.source_title
|
389
|
+
item.source_title = nil
|
390
|
+
end
|
391
|
+
|
392
|
+
# Legacy EDS citation extracting. We don't really need this any more
|
393
|
+
# because EDS api has improved, but leave it in in case anyone using
|
394
|
+
# older versions needed it.
|
290
395
|
|
291
396
|
# We have a single blob of human-readable citation, that's also
|
292
397
|
# littered with XML-ish tags we need to deal with. We'll save
|
@@ -306,7 +411,6 @@ class BentoSearch::EdsEngine
|
|
306
411
|
item.custom_data["citation_blob"] = element_by_group(record_xml, "SrcInfo")
|
307
412
|
end
|
308
413
|
|
309
|
-
|
310
414
|
item.extend CitationMessDecorator
|
311
415
|
|
312
416
|
results << item
|
@@ -509,7 +613,7 @@ class BentoSearch::EdsEngine
|
|
509
613
|
:base_url => "http://eds-api.ebscohost.com/edsapi/rest/",
|
510
614
|
:highlighting => true,
|
511
615
|
:truncate_highlighted => 280,
|
512
|
-
:assume_first_custom_link_openurl =>
|
616
|
+
:assume_first_custom_link_openurl => false,
|
513
617
|
:search_mode => 'all' # any | bool | all | smart ; http://support.epnet.com/knowledge_base/detail.php?topic=996&id=1288&page=1
|
514
618
|
}
|
515
619
|
end
|
@@ -559,4 +663,20 @@ class BentoSearch::EdsEngine
|
|
559
663
|
end
|
560
664
|
end
|
561
665
|
|
666
|
+
# a class that includes some Rails helper modules for
|
667
|
+
# text handling.
|
668
|
+
class Helper
|
669
|
+
include ActionView::Helpers::SanitizeHelper # for strip_tags
|
670
|
+
include ActionView::Helpers::TextHelper # for truncate
|
671
|
+
include ActionView::Helpers::OutputSafetyHelper # for safe_join
|
672
|
+
end
|
673
|
+
|
674
|
+
class Holding
|
675
|
+
attr_reader :location, :call_number
|
676
|
+
def initialize(args)
|
677
|
+
@location = args[:location]
|
678
|
+
@call_number = args[:call_number]
|
679
|
+
end
|
680
|
+
end
|
681
|
+
|
562
682
|
end
|