bento_search 1.7.0.beta.1 → 1.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +16 -7
- data/app/assets/javascripts/bento_search/ajax_load.js +37 -21
- data/app/helpers/bento_search_helper.rb +72 -74
- data/app/models/bento_search/concurrent_searcher.rb +136 -0
- data/app/models/bento_search/multi_searcher.rb +36 -35
- data/app/models/bento_search/search_engine.rb +70 -40
- data/app/search_engines/bento_search/doaj_articles_engine.rb +1 -1
- data/app/search_engines/bento_search/eds_engine.rb +176 -56
- data/app/views/bento_search/_ajax_loading.html.erb +17 -0
- data/app/views/bento_search/_link.html.erb +3 -3
- data/lib/bento_search.rb +12 -0
- data/lib/bento_search/engine.rb +2 -0
- data/lib/bento_search/version.rb +1 -1
- data/test/search_engines/eds_engine_test.rb +91 -59
- data/test/search_engines/search_engine_base_test.rb +11 -0
- data/test/search_engines/search_engine_test.rb +12 -0
- data/test/unit/concurrent_searcher_test.rb +75 -0
- data/test/unit/multi_searcher_test.rb +16 -19
- data/test/vcr_cassettes/eds/FullText_CustomLink.yml +198 -0
- data/test/vcr_cassettes/eds/basic_search_smoke_test.yml +1036 -1729
- data/test/vcr_cassettes/eds/catalog_ebook_query.yml +218 -0
- data/test/vcr_cassettes/eds/catalog_query.yml +255 -0
- data/test/vcr_cassettes/eds/get_auth_token.yml +11 -44
- data/test/vcr_cassettes/eds/get_auth_token_failure.yml +10 -7
- data/test/vcr_cassettes/eds/get_with_auth.yml +144 -153
- data/test/vcr_cassettes/eds/get_with_auth_recovers_from_bad_auth.yml +167 -223
- metadata +15 -5
@@ -1,19 +1,19 @@
|
|
1
|
-
begin
|
1
|
+
begin
|
2
2
|
require 'celluloid'
|
3
3
|
|
4
4
|
# Based on Celluloid, concurrently runs multiple searches in
|
5
5
|
# seperate threads. You must include 'celluloid' gem dependency
|
6
6
|
# into your local app to use this class. Requires celluloid 0.12.0
|
7
|
-
# or above (for new preferred async syntax).
|
7
|
+
# or above (for new preferred async syntax).
|
8
8
|
#
|
9
9
|
# Warning, if you don't have celluloid in your app, this class simply
|
10
10
|
# won't load. TODO: We should put this file in a different directory
|
11
11
|
# so it's never auto-loaded, and requires a "require 'bento_search/multi_searcher'",
|
12
12
|
# such that it will raise without celluloid only then, and we don't need this
|
13
|
-
# rescue LoadError stuff.
|
13
|
+
# rescue LoadError stuff.
|
14
14
|
#
|
15
15
|
# I am not an expert at use of Celluloid, it's possible there's a better
|
16
|
-
# way to do this all, but seems to work.
|
16
|
+
# way to do this all, but seems to work.
|
17
17
|
#
|
18
18
|
# ## Usage
|
19
19
|
#
|
@@ -26,14 +26,14 @@ begin
|
|
26
26
|
# retrieve results, blocking until each is completed:
|
27
27
|
# searcher.results
|
28
28
|
#
|
29
|
-
# returns a Hash keyed by engine id, values BentoSearch::Results objects.
|
29
|
+
# returns a Hash keyed by engine id, values BentoSearch::Results objects.
|
30
30
|
#
|
31
31
|
# Can only call #results once per #start, after that it'll return empty hash.
|
32
|
-
# (should we make it actually raise instead?). .
|
33
|
-
#
|
32
|
+
# (should we make it actually raise instead?). .
|
33
|
+
#
|
34
34
|
# important to call results at some point after calling start, in order
|
35
35
|
# to make sure Celluloid::Actors are properly terminated to avoid
|
36
|
-
# resource leakage. May want to do it in an ensure block.
|
36
|
+
# resource leakage. May want to do it in an ensure block.
|
37
37
|
#
|
38
38
|
# Note that celluloid uses multi-threading in such a way that you
|
39
39
|
# may have to set config.cache_classes=true even in development
|
@@ -41,87 +41,88 @@ begin
|
|
41
41
|
#
|
42
42
|
#
|
43
43
|
# TODO: have a method that returns Futures instead of only supplying the blocking
|
44
|
-
# results method? Several tricks, including making sure to properly terminate actors.
|
44
|
+
# results method? Several tricks, including making sure to properly terminate actors.
|
45
45
|
class BentoSearch::MultiSearcher
|
46
|
-
|
46
|
+
|
47
47
|
def initialize(*engine_ids)
|
48
|
+
ActiveSupport::Deprecation.warn('BentoSearch::MultiSearcher is deprecated and will be removed in bento_search 2.0. Please use BentoSearch::ConcurrentSearcher instead.', caller.slice(1..-1))
|
49
|
+
|
48
50
|
@engines = []
|
49
51
|
@actors = []
|
50
52
|
engine_ids.each do |id|
|
51
53
|
add_engine( BentoSearch.get_engine id )
|
52
54
|
end
|
53
55
|
end
|
54
|
-
|
56
|
+
|
55
57
|
# Adds an instantiated engine directly, rather than by id from global
|
56
|
-
# registry.
|
57
|
-
def add_engine(engine)
|
58
|
+
# registry.
|
59
|
+
def add_engine(engine)
|
58
60
|
@engines << engine
|
59
61
|
end
|
60
|
-
|
61
|
-
# Starts all searches, returns self so you can chain method calls if you like.
|
62
|
+
|
63
|
+
# Starts all searches, returns self so you can chain method calls if you like.
|
62
64
|
def search(*search_args)
|
63
65
|
@engines.each do |engine|
|
64
66
|
a = Actor.new(engine)
|
65
67
|
@actors << a
|
66
68
|
a.async.start *search_args
|
67
|
-
end
|
69
|
+
end
|
68
70
|
return self
|
69
71
|
end
|
70
72
|
alias_method :start, :search # backwards compat
|
71
|
-
|
73
|
+
|
72
74
|
# Call after #start. Blocks until each included engine is finished
|
73
75
|
# then returns a Hash keyed by engine registered id, value is a
|
74
|
-
# BentoSearch::Results object.
|
76
|
+
# BentoSearch::Results object.
|
75
77
|
#
|
76
78
|
# Can only call _once_ per invocation of #start, after that it'll return
|
77
|
-
# an empty hash.
|
79
|
+
# an empty hash.
|
78
80
|
def results
|
79
81
|
results = {}
|
80
|
-
|
82
|
+
|
81
83
|
# we use #delete_if to get an iterator that deletes
|
82
|
-
# each item after iteration.
|
84
|
+
# each item after iteration.
|
83
85
|
@actors.delete_if do |actor|
|
84
86
|
result_key = (actor.engine.configuration.id || actor.engine.class.name)
|
85
87
|
results[result_key] = actor.results
|
86
88
|
actor.terminate
|
87
|
-
|
89
|
+
|
88
90
|
true
|
89
91
|
end
|
90
|
-
|
92
|
+
|
91
93
|
return results
|
92
94
|
end
|
93
|
-
|
94
|
-
|
95
|
+
|
96
|
+
|
95
97
|
class Actor
|
96
98
|
include Celluloid
|
97
|
-
|
99
|
+
|
98
100
|
attr_accessor :engine
|
99
|
-
|
101
|
+
|
100
102
|
def initialize(a_engine)
|
101
103
|
self.engine = a_engine
|
102
104
|
end
|
103
|
-
|
104
|
-
# call as .async.start, to invoke async.
|
105
|
+
|
106
|
+
# call as .async.start, to invoke async.
|
105
107
|
def start(*search_args)
|
106
108
|
begin
|
107
109
|
@results = self.engine.search(*search_args)
|
108
110
|
rescue StandardError => e
|
109
111
|
Rails.logger.error("\nBentoSearch:MultiSearcher caught exception: #{e}\n#{e.backtrace.join(" \n")}")
|
110
|
-
# Make a fake results with caught exception.
|
112
|
+
# Make a fake results with caught exception.
|
111
113
|
@results = BentoSearch::Results.new
|
112
114
|
self.engine.fill_in_search_metadata_for(@results, self.engine.normalized_search_arguments(search_args))
|
113
|
-
|
115
|
+
|
114
116
|
@results.error ||= {}
|
115
|
-
@results.error["exception"] = e
|
117
|
+
@results.error["exception"] = e
|
116
118
|
end
|
117
119
|
end
|
118
|
-
|
120
|
+
|
119
121
|
def results
|
120
122
|
@results
|
121
123
|
end
|
122
|
-
|
124
|
+
|
123
125
|
end
|
124
|
-
|
125
126
|
end
|
126
127
|
|
127
128
|
rescue LoadError
|
@@ -17,7 +17,6 @@ module BentoSearch
|
|
17
17
|
# remote service. Not yet universally used.
|
18
18
|
class ::BentoSearch::FetchError < ::BentoSearch::Error ; end
|
19
19
|
|
20
|
-
|
21
20
|
# Module mix-in for bento_search search engines.
|
22
21
|
#
|
23
22
|
# ==Using a SearchEngine
|
@@ -65,6 +64,11 @@ module BentoSearch
|
|
65
64
|
# string name, actual class object not supported (to make it easier
|
66
65
|
# to serialize and transport configuration).
|
67
66
|
#
|
67
|
+
# [log_failed_results]
|
68
|
+
# Default false, if true all failed results are logged to
|
69
|
+
# `Rails.log.error`. Can set global default with
|
70
|
+
# `BentoSearch.defaults.log_failed_results = true`
|
71
|
+
#
|
68
72
|
# == Implementing a SearchEngine
|
69
73
|
#
|
70
74
|
# Implmeneting a new SearchEngine is relatively straightforward -- you are
|
@@ -119,15 +123,53 @@ module BentoSearch
|
|
119
123
|
module SearchEngine
|
120
124
|
DefaultPerPage = 10
|
121
125
|
|
122
|
-
|
123
|
-
|
124
|
-
|
125
126
|
extend ActiveSupport::Concern
|
126
127
|
|
127
128
|
include Capabilities
|
128
129
|
|
130
|
+
mattr_accessor :default_auto_rescued_exceptions
|
131
|
+
self.default_auto_rescued_exceptions = [
|
132
|
+
BentoSearch::RubyTimeoutClass,
|
133
|
+
HTTPClient::TimeoutError,
|
134
|
+
HTTPClient::ConfigurationError,
|
135
|
+
HTTPClient::BadResponseError,
|
136
|
+
MultiJson::DecodeError,
|
137
|
+
Nokogiri::SyntaxError,
|
138
|
+
SocketError
|
139
|
+
].freeze
|
140
|
+
|
129
141
|
included do
|
130
142
|
attr_accessor :configuration
|
143
|
+
|
144
|
+
# What exceptions should our #search wrapper rescue and turn
|
145
|
+
# into failed results instead of fatal errors?
|
146
|
+
#
|
147
|
+
# Can't rescue everything, or we eat VCR/webmock errors, and lots
|
148
|
+
# of other errors we don't want to eat either, making
|
149
|
+
# development really confusing. Perhaps could set this
|
150
|
+
# to be something diff in production and dev?
|
151
|
+
#
|
152
|
+
# This default list is probably useful already, but individual
|
153
|
+
# engines can override if it's convenient for their own error
|
154
|
+
# handling.
|
155
|
+
#
|
156
|
+
# Override by just using `auto_rescued_exceptions=` on class _or_ method,
|
157
|
+
# although some legacy code may override `def auto_rescue_exceptions` (note
|
158
|
+
# old `rescue` vs new `rescued`) which should work too.
|
159
|
+
self.class_attribute :auto_rescued_exceptions
|
160
|
+
self.auto_rescued_exceptions = ::BentoSearch::SearchEngine.default_auto_rescued_exceptions
|
161
|
+
|
162
|
+
# Over-ride returning a hash or Confstruct with
|
163
|
+
# any configuration values you want by default.
|
164
|
+
# actual user-specified config values will be deep-merged
|
165
|
+
# into the defaults.
|
166
|
+
def self.default_configuration
|
167
|
+
end
|
168
|
+
|
169
|
+
# Over-ride returning an array of symbols for required
|
170
|
+
# configuration keys.
|
171
|
+
def self.required_configuration
|
172
|
+
end
|
131
173
|
end
|
132
174
|
|
133
175
|
# If specific SearchEngine calls initialize, you want to call super
|
@@ -153,6 +195,9 @@ module BentoSearch
|
|
153
195
|
|
154
196
|
# global defaults?
|
155
197
|
self.configuration[:for_display] ||= {}
|
198
|
+
unless self.configuration.has_key?(:log_failed_results)
|
199
|
+
self.configuration[:log_failed_results] = BentoSearch.defaults.log_failed_results
|
200
|
+
end
|
156
201
|
|
157
202
|
# check for required keys -- have to be present, and not nil
|
158
203
|
if self.class.required_configuration
|
@@ -241,8 +286,11 @@ module BentoSearch
|
|
241
286
|
|
242
287
|
fill_in_search_metadata_for(failed, arguments)
|
243
288
|
|
244
|
-
|
245
289
|
return failed
|
290
|
+
ensure
|
291
|
+
if results && configuration.log_failed_results && results.failed?
|
292
|
+
Rails.logger.error("Error fetching results for `#{configuration.id || self}`: #{arguments}: #{results.error}")
|
293
|
+
end
|
246
294
|
end
|
247
295
|
|
248
296
|
# SOME of the elements of Results to be returned that SearchEngine implementation
|
@@ -392,9 +440,26 @@ module BentoSearch
|
|
392
440
|
[:query, :search_field, :semantic_search_field, :sort, :page, :start, :per_page]
|
393
441
|
end
|
394
442
|
|
443
|
+
# Cover method for consistent api with Results
|
444
|
+
def display_configuration
|
445
|
+
configuration.for_display
|
446
|
+
end
|
447
|
+
|
448
|
+
# Cover method for consistent api with Results
|
449
|
+
def engine_id
|
450
|
+
configuration.id
|
451
|
+
end
|
452
|
+
|
395
453
|
|
396
454
|
protected
|
397
455
|
|
456
|
+
# For legacy reasons old name auto_rescue_exceptions is here, some
|
457
|
+
# sub-classes may override it. Now preferred to use auto_rescued_exceptions
|
458
|
+
# setter instead.
|
459
|
+
def auto_rescue_exceptions
|
460
|
+
self.auto_rescued_exceptions
|
461
|
+
end
|
462
|
+
|
398
463
|
# get value of an arg that can be supplied in search args OR config,
|
399
464
|
# with search_args over-ridding config. Also normalizes value to_s
|
400
465
|
# (for symbols/strings).
|
@@ -409,40 +474,5 @@ module BentoSearch
|
|
409
474
|
|
410
475
|
return value
|
411
476
|
end
|
412
|
-
|
413
|
-
# What exceptions should our #search wrapper rescue and turn
|
414
|
-
# into failed results instead of fatal errors?
|
415
|
-
#
|
416
|
-
# Can't rescue everything, or we eat VCR/webmock errors, and lots
|
417
|
-
# of other errors we don't want to eat either, making
|
418
|
-
# development really confusing. Perhaps could set this
|
419
|
-
# to be something diff in production and dev?
|
420
|
-
#
|
421
|
-
# This default list is probably useful already, but individual
|
422
|
-
# engines can override if it's convenient for their own error
|
423
|
-
# handling.
|
424
|
-
def auto_rescue_exceptions
|
425
|
-
[BentoSearch::RubyTimeoutClass, HTTPClient::TimeoutError,
|
426
|
-
HTTPClient::ConfigurationError, HTTPClient::BadResponseError,
|
427
|
-
MultiJson::DecodeError, Nokogiri::SyntaxError, SocketError]
|
428
|
-
end
|
429
|
-
|
430
|
-
|
431
|
-
module ClassMethods
|
432
|
-
|
433
|
-
# Over-ride returning a hash or Confstruct with
|
434
|
-
# any configuration values you want by default.
|
435
|
-
# actual user-specified config values will be deep-merged
|
436
|
-
# into the defaults.
|
437
|
-
def default_configuration
|
438
|
-
end
|
439
|
-
|
440
|
-
# Over-ride returning an array of symbols for required
|
441
|
-
# configuration keys.
|
442
|
-
def required_configuration
|
443
|
-
end
|
444
|
-
|
445
|
-
end
|
446
|
-
|
447
477
|
end
|
448
478
|
end
|
@@ -25,45 +25,40 @@ require 'http_client_patch/include_client'
|
|
25
25
|
# == Linking
|
26
26
|
#
|
27
27
|
# The link to record in EBSCO interface delivered as "PLink" will be listed
|
28
|
-
# as record main link.
|
28
|
+
# as record main link. If the record includes a node at `./FullText/Links/Link/Type[text() = 'pdflink']`,
|
29
|
+
# the `plink` will be marked as fulltext. (There may be other cases of fulltext, but
|
30
|
+
# this seems to be all EDS API tells us.)
|
29
31
|
#
|
30
32
|
# Any links listed under <CustomLinks> will be listed as other_links, using
|
31
|
-
# configured name provided by EBSCO for CustomLink.
|
33
|
+
# configured name provided by EBSCO for CustomLink. Same with links listed
|
34
|
+
# as `<Item><Group>URL</Group>`.
|
32
35
|
#
|
33
|
-
#
|
34
|
-
# ourselves. However, in our testing, the first/only CustomLink was an
|
35
|
-
# an OpenURL. If configuration.assume_first_custom_link_openurl is
|
36
|
-
# true (as is default), it will be used to create an OpenURL link. However, in
|
37
|
-
# our testing, many records don't have this at all. **Note** You want
|
38
|
-
# to configure your profile so OpenURLs are ALWAYS included for all records, not
|
39
|
-
# just records with no EBSCO fulltext, to ensure bento_search can get the
|
40
|
-
# openurl. http://support.ebsco.com/knowledge_base/detail.php?id=1111 (May
|
41
|
-
# have to ask EBSCO support for help, it's confusing!).
|
36
|
+
# As always, you can customize links and other_links with Item Decorators.
|
42
37
|
#
|
43
|
-
#
|
44
|
-
# it's configured name or label, not assume first one is it.
|
38
|
+
# == Custom Data
|
45
39
|
#
|
46
|
-
#
|
40
|
+
# If present, there is a custom_data[:holdings] value, an array of
|
41
|
+
# BentoSearch::EdsEngine::Holding objects, each of which has a #location
|
42
|
+
# and #call_number. There will usually (always?) be at most 1 item in the
|
43
|
+
# array, as far as we can tell from how EDS works.
|
47
44
|
#
|
48
45
|
# == Technical Notes and Difficulties
|
49
46
|
#
|
50
|
-
# This API is
|
51
|
-
#
|
52
|
-
#
|
47
|
+
# This API is pretty difficult to work with, and the response has many
|
48
|
+
# idiosyncratic undocumented parts. We think we are currently
|
49
|
+
# getting fairly complete citation detail out, at least for articles, but may be missing
|
50
|
+
# some on weird edge cases, books/book chapters, etc)
|
53
51
|
#
|
54
52
|
# Auth issues may make this slow -- you need to spend a (not too speedy) HTTP
|
55
53
|
# request making a session for every new end-user -- as we have no way to keep
|
56
54
|
# track of end-users, we do it on every request in this implementation.
|
57
55
|
#
|
58
|
-
#
|
59
|
-
#
|
60
|
-
#
|
61
|
-
#
|
62
|
-
#
|
63
|
-
# and
|
64
|
-
# made from individual elements.
|
65
|
-
#
|
66
|
-
# EBSCO says they plan to improve some of these issues in a September 2012 release.
|
56
|
+
# An older version of the EDS API returned much less info, and we tried
|
57
|
+
# to scrape out what we could anyway. Much of this logic is still there
|
58
|
+
# as backup. In the older version, not enough info was there for an
|
59
|
+
# OpenURL link, `configuration.assume_first_custom_link_openurl` was true
|
60
|
+
# by default, and used to create an OpenURL link. It now defaults to false,
|
61
|
+
# and should no longer be neccessary.
|
67
62
|
#
|
68
63
|
# Title and abstract data seems to be HTML with tags and character entities and
|
69
64
|
# escaped special chars. We're trusting it and passing it on as html_safe.
|
@@ -91,7 +86,7 @@ require 'http_client_patch/include_client'
|
|
91
86
|
#
|
92
87
|
# == EDS docs:
|
93
88
|
#
|
94
|
-
# * Console App to demo requests:
|
89
|
+
# * Console App to demo requests: <
|
95
90
|
# * EDS Wiki: http://edswiki.ebscohost.com/EDS_API_Documentation
|
96
91
|
# * You'll need to request an account to the EDS wiki, see: http://support.ebsco.com/knowledge_base/detail.php?id=5990
|
97
92
|
#
|
@@ -101,11 +96,20 @@ class BentoSearch::EdsEngine
|
|
101
96
|
|
102
97
|
# Can't change http timeout in config, because we keep an http
|
103
98
|
# client at class-wide level, and config is not class-wide.
|
104
|
-
#
|
99
|
+
# We used to keep in constant, but that's not good for custom setting,
|
100
|
+
# we now use class_attribute, but in a weird backwards-compat way for
|
101
|
+
# anyone who might be using the constant.
|
105
102
|
HttpTimeout = 4
|
103
|
+
|
104
|
+
class_attribute :http_timeout, instance_writer: false
|
105
|
+
def self.http_timeout
|
106
|
+
defined?(@http_timeout) ? @http_timeout : HttpTimeout
|
107
|
+
end
|
108
|
+
|
109
|
+
|
106
110
|
extend HTTPClientPatch::IncludeClient
|
107
111
|
include_http_client do |client|
|
108
|
-
client.connect_timeout = client.send_timeout = client.receive_timeout =
|
112
|
+
client.connect_timeout = client.send_timeout = client.receive_timeout = http_timeout
|
109
113
|
end
|
110
114
|
|
111
115
|
AuthHeader = "x-authenticationToken"
|
@@ -131,12 +135,7 @@ class BentoSearch::EdsEngine
|
|
131
135
|
# an object that includes some Rails helper modules for
|
132
136
|
# text handling.
|
133
137
|
def helper
|
134
|
-
|
135
|
-
@helper = Object.new
|
136
|
-
@helper.extend ActionView::Helpers::TextHelper # for truncate
|
137
|
-
@helper.extend ActionView::Helpers::OutputSafetyHelper # for safe_join
|
138
|
-
end
|
139
|
-
return @helper
|
138
|
+
@helper ||= Helper.new
|
140
139
|
end
|
141
140
|
|
142
141
|
|
@@ -207,8 +206,6 @@ class BentoSearch::EdsEngine
|
|
207
206
|
|
208
207
|
url = construct_search_url(args)
|
209
208
|
|
210
|
-
|
211
|
-
|
212
209
|
response = get_with_auth(url, session_token)
|
213
210
|
|
214
211
|
results = BentoSearch::Results.new
|
@@ -237,39 +234,96 @@ class BentoSearch::EdsEngine
|
|
237
234
|
|
238
235
|
item.abstract = prepare_eds_payload( element_by_group(record_xml, "Ab"), true )
|
239
236
|
|
240
|
-
#
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
# only SOMETIMES does it have XML tags, other times it's straight text.
|
246
|
-
# ARGH.
|
247
|
-
author_xml = Nokogiri::XML::fragment(author_mess)
|
248
|
-
searchLinks = author_xml.xpath(".//searchLink")
|
249
|
-
if searchLinks.size > 0
|
250
|
-
author_xml.xpath(".//searchLink").each do |author_node|
|
251
|
-
item.authors << BentoSearch::Author.new(:display => author_node.text)
|
237
|
+
# Much better way to get authors out of EDS response now...
|
238
|
+
author_full_names = record_xml.xpath("./RecordInfo/BibRecord/BibRelationships/HasContributorRelationships/HasContributor/PersonEntity/Name/NameFull")
|
239
|
+
author_full_names.each do |name_full_xml|
|
240
|
+
if name_full_xml && (text = name_full_xml.text).present?
|
241
|
+
item.authors << BentoSearch::Author.new(:display => text)
|
252
242
|
end
|
253
|
-
else
|
254
|
-
item.authors << BentoSearch::Author.new(:display => author_xml.text)
|
255
243
|
end
|
256
244
|
|
245
|
+
if item.authors.blank?
|
246
|
+
# Believe it or not, the authors are encoded as an escaped
|
247
|
+
# XML-ish payload, that we need to parse again and get the
|
248
|
+
# actual authors out of. WTF. Thanks for handling fragments
|
249
|
+
# nokogiri.
|
250
|
+
author_mess = element_by_group(record_xml, "Au")
|
251
|
+
# only SOMETIMES does it have XML tags, other times it's straight text.
|
252
|
+
# ARGH.
|
253
|
+
author_xml = Nokogiri::XML::fragment(author_mess)
|
254
|
+
searchLinks = author_xml.xpath(".//searchLink")
|
255
|
+
if searchLinks.size > 0
|
256
|
+
author_xml.xpath(".//searchLink").each do |author_node|
|
257
|
+
item.authors << BentoSearch::Author.new(:display => author_node.text)
|
258
|
+
end
|
259
|
+
else
|
260
|
+
item.authors << BentoSearch::Author.new(:display => author_xml.text)
|
261
|
+
end
|
262
|
+
end
|
257
263
|
|
258
264
|
# PLink is main inward facing EBSCO link, put it as
|
259
265
|
# main link.
|
260
266
|
if direct_link = record_xml.at_xpath("./PLink")
|
261
|
-
|
267
|
+
item.link = direct_link.text
|
268
|
+
|
269
|
+
if record_xml.at_xpath("./FullText/Links/Link/Type[text() = 'pdflink']")
|
270
|
+
item.link_is_fulltext = true
|
271
|
+
end
|
262
272
|
end
|
263
273
|
|
274
|
+
|
264
275
|
# Other links may be found in CustomLinks, it seems like usually
|
265
276
|
# there will be at least one, hopefully the first one is the OpenURL?
|
266
|
-
|
277
|
+
#byebug if configuration.id == "articles"
|
278
|
+
record_xml.xpath("./CustomLinks/CustomLink|./FullText/CustomLinks/CustomLink").each do |custom_link|
|
279
|
+
# If it's in FullText section, give it a rel=alternate
|
280
|
+
# to indicate it's fulltext
|
281
|
+
rel = (custom_link.parent.parent.name.downcase == "fulltext") ? "alternate" : nil
|
282
|
+
|
267
283
|
item.other_links << BentoSearch::Link.new(
|
268
284
|
:url => custom_link.at_xpath("./Url").text,
|
269
|
-
:
|
285
|
+
:rel => rel,
|
286
|
+
:label => custom_link.at_xpath("./Text").try(:text).presence || custom_link.at_xpath("./Name").try(:text).presence || "Link"
|
270
287
|
)
|
271
288
|
end
|
272
289
|
|
290
|
+
# More other links in 'URL' Item, in unpredictable format sometimes being
|
291
|
+
# embedded XML. Really EBSCO?
|
292
|
+
record_xml.xpath("./Items/Item[child::Group[text()='URL']]").each do |url_item|
|
293
|
+
data_element = url_item.at_xpath("./Data")
|
294
|
+
next unless data_element
|
295
|
+
|
296
|
+
# SOMETIMES the url and label are in an embedded escaped XML element...
|
297
|
+
if data_element.text.strip.start_with?("<link")
|
298
|
+
# Ugh, once unescpaed it has bare '&' in URL queries sometimes, which
|
299
|
+
# is not actually legal XML anymore, but Nokogiri::HTML parser will
|
300
|
+
# let us get away with it, but then doesn't put the actual text
|
301
|
+
# inside the 'link' item, but inside the <link> tag since it knows
|
302
|
+
# an HTML link tag has no content. Really EDS.
|
303
|
+
node = Nokogiri::HTML::fragment(data_element.text)
|
304
|
+
next unless link = node.at_xpath("./link")
|
305
|
+
next unless link["linkterm"].presence || link["linkTerm"].presence
|
306
|
+
|
307
|
+
item.other_links << BentoSearch::Link.new(
|
308
|
+
:url => link["linkterm"] || link["linkTerm"],
|
309
|
+
:label => helper.strip_tags(data_element.text).presence || "Link"
|
310
|
+
)
|
311
|
+
else
|
312
|
+
# it's just a straight URL in data element, with only label we've
|
313
|
+
# got in <label> element.
|
314
|
+
next unless data_element.text.strip.present?
|
315
|
+
|
316
|
+
label_element = url_item.at_xpath("./Label")
|
317
|
+
label = label_element.try(:text).try { |s| helper.strip_tags(s) }.presence || "Link"
|
318
|
+
|
319
|
+
item.other_links << BentoSearch::Link.new(
|
320
|
+
:url => data_element.text,
|
321
|
+
:label => label
|
322
|
+
)
|
323
|
+
end
|
324
|
+
end
|
325
|
+
|
326
|
+
|
273
327
|
if (configuration.assume_first_custom_link_openurl &&
|
274
328
|
(first = record_xml.xpath "./CustomLinks/CustomLink" ) &&
|
275
329
|
(node = first.at_xpath "./Url" )
|
@@ -286,7 +340,58 @@ class BentoSearch::EdsEngine
|
|
286
340
|
# Can't find a list of possible PubTypes to see what's there to try
|
287
341
|
# and map to our internal controlled vocab. oh wells.
|
288
342
|
|
343
|
+
item.doi = at_xpath_text record_xml, "./RecordInfo/BibRecord/BibEntity/Identifiers/Identifier[child::Type[text()='doi']]/Value"
|
289
344
|
|
345
|
+
item.start_page = at_xpath_text(record_xml, "./RecordInfo/BibRecord/BibEntity/PhysicalDescription/Pagination/StartPage")
|
346
|
+
total_pages = at_xpath_text(record_xml, "./RecordInfo/BibRecord/BibEntity/PhysicalDescription/Pagination/PageCount")
|
347
|
+
if total_pages.to_i != 0 && item.start_page.to_i != 0
|
348
|
+
item.end_page = (item.start_page.to_i + total_pages.to_i - 1).to_s
|
349
|
+
end
|
350
|
+
|
351
|
+
|
352
|
+
# location/call number, probably only for catalog results. We only see one
|
353
|
+
# in actual data, but XML structure allows multiple, so we'll store it as multiple.
|
354
|
+
copy_informations = record_xml.xpath("./Holdings/Holding/HoldingSimple/CopyInformationList/CopyInformation")
|
355
|
+
if copy_informations.present?
|
356
|
+
item.custom_data[:holdings] =
|
357
|
+
copy_informations.collect do |copy_information|
|
358
|
+
Holding.new(:location => at_xpath_text(copy_information, "Sublocation"),
|
359
|
+
:call_number => at_xpath_text(copy_information, "ShelfLocator"))
|
360
|
+
end
|
361
|
+
end
|
362
|
+
|
363
|
+
|
364
|
+
|
365
|
+
# For some EDS results, we have actual citation information,
|
366
|
+
# for some we don't.
|
367
|
+
container_xml = record_xml.at_xpath("./RecordInfo/BibRecord/BibRelationships/IsPartOfRelationships/IsPartOf/BibEntity")
|
368
|
+
if container_xml
|
369
|
+
item.source_title = at_xpath_text(container_xml, "./Titles/Title[child::Type[text()='main']]/TitleFull")
|
370
|
+
item.volume = at_xpath_text(container_xml, "./Numbering/Number[child::Type[text()='volume']]/Value")
|
371
|
+
item.issue = at_xpath_text(container_xml, "./Numbering/Number[child::Type[text()='issue']]/Value")
|
372
|
+
|
373
|
+
item.issn = at_xpath_text(container_xml, "./Identifiers/Identifier[child::Type[text()='issn-print']]/Value")
|
374
|
+
|
375
|
+
if date_xml = container_xml.at_xpath("./Dates/Date")
|
376
|
+
item.year = at_xpath_text(date_xml, "./Y")
|
377
|
+
|
378
|
+
date = at_xpath_text(date_xml, "./D").to_i
|
379
|
+
month = at_xpath_text(date_xml, "./M").to_i
|
380
|
+
if item.year.to_i != 0 && date != 0 && month != 0
|
381
|
+
item.publication_date = Date.new(item.year.to_i, month, date)
|
382
|
+
end
|
383
|
+
end
|
384
|
+
end
|
385
|
+
|
386
|
+
# EDS annoyingly repeats a monographic title in the same place
|
387
|
+
# we look for source/container title, take it away.
|
388
|
+
if item.start_page.blank? && helper.strip_tags(item.title) == item.source_title
|
389
|
+
item.source_title = nil
|
390
|
+
end
|
391
|
+
|
392
|
+
# Legacy EDS citation extracting. We don't really need this any more
|
393
|
+
# because EDS api has improved, but leave it in in case anyone using
|
394
|
+
# older versions needed it.
|
290
395
|
|
291
396
|
# We have a single blob of human-readable citation, that's also
|
292
397
|
# littered with XML-ish tags we need to deal with. We'll save
|
@@ -306,7 +411,6 @@ class BentoSearch::EdsEngine
|
|
306
411
|
item.custom_data["citation_blob"] = element_by_group(record_xml, "SrcInfo")
|
307
412
|
end
|
308
413
|
|
309
|
-
|
310
414
|
item.extend CitationMessDecorator
|
311
415
|
|
312
416
|
results << item
|
@@ -509,7 +613,7 @@ class BentoSearch::EdsEngine
|
|
509
613
|
:base_url => "http://eds-api.ebscohost.com/edsapi/rest/",
|
510
614
|
:highlighting => true,
|
511
615
|
:truncate_highlighted => 280,
|
512
|
-
:assume_first_custom_link_openurl =>
|
616
|
+
:assume_first_custom_link_openurl => false,
|
513
617
|
:search_mode => 'all' # any | bool | all | smart ; http://support.epnet.com/knowledge_base/detail.php?topic=996&id=1288&page=1
|
514
618
|
}
|
515
619
|
end
|
@@ -559,4 +663,20 @@ class BentoSearch::EdsEngine
|
|
559
663
|
end
|
560
664
|
end
|
561
665
|
|
666
|
+
# a class that includes some Rails helper modules for
|
667
|
+
# text handling.
|
668
|
+
class Helper
|
669
|
+
include ActionView::Helpers::SanitizeHelper # for strip_tags
|
670
|
+
include ActionView::Helpers::TextHelper # for truncate
|
671
|
+
include ActionView::Helpers::OutputSafetyHelper # for safe_join
|
672
|
+
end
|
673
|
+
|
674
|
+
class Holding
|
675
|
+
attr_reader :location, :call_number
|
676
|
+
def initialize(args)
|
677
|
+
@location = args[:location]
|
678
|
+
@call_number = args[:call_number]
|
679
|
+
end
|
680
|
+
end
|
681
|
+
|
562
682
|
end
|