bento_search 1.5.0 → 1.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +14 -16
- data/Rakefile +30 -11
- data/app/controllers/bento_search/search_controller.rb +29 -28
- data/app/models/bento_search/result_item.rb +10 -10
- data/app/models/bento_search/results/serialization.rb +22 -13
- data/app/models/bento_search/search_engine.rb +117 -117
- data/app/search_engines/bento_search/doaj_articles_engine.rb +19 -19
- data/app/search_engines/bento_search/ebsco_host_engine.rb +3 -3
- data/app/search_engines/bento_search/eds_engine.rb +166 -166
- data/app/search_engines/bento_search/google_books_engine.rb +2 -2
- data/app/search_engines/bento_search/scopus_engine.rb +87 -87
- data/app/search_engines/bento_search/summon_engine.rb +1 -1
- data/lib/bento_search.rb +12 -9
- data/lib/bento_search/version.rb +1 -1
- data/test/dummy/config/boot.rb +4 -9
- data/test/dummy/db/schema.rb +15 -0
- data/test/functional/bento_search/search_controller_test.rb +63 -57
- data/test/helper/bento_search_helper_test.rb +103 -103
- data/test/search_engines/doaj_articles_engine_test.rb +9 -9
- data/test/search_engines/search_engine_base_test.rb +86 -86
- data/test/search_engines/search_engine_test.rb +56 -56
- data/test/test_helper.rb +23 -12
- data/test/unit/multi_searcher_test.rb +18 -18
- data/test/unit/pagination_test.rb +12 -12
- metadata +6 -4
@@ -4,10 +4,10 @@ require 'http_client_patch/include_client'
|
|
4
4
|
require 'json'
|
5
5
|
|
6
6
|
module BentoSearch
|
7
|
-
# DOAJ Articles search.
|
7
|
+
# DOAJ Articles search.
|
8
8
|
# https://doaj.org/api/v1/docs
|
9
9
|
#
|
10
|
-
# Phrase searches with double quotes are respected.
|
10
|
+
# Phrase searches with double quotes are respected.
|
11
11
|
#
|
12
12
|
# Supports #get by unique_id feature
|
13
13
|
#
|
@@ -36,7 +36,7 @@ module BentoSearch
|
|
36
36
|
Rails.logger.debug("DoajEngine: requesting #{query_url}")
|
37
37
|
response = http_client.get( query_url )
|
38
38
|
json = JSON.parse(response.body)
|
39
|
-
rescue
|
39
|
+
rescue BentoSearch::RubyTimeoutClass, HTTPClient::TimeoutError,
|
40
40
|
HTTPClient::ConfigurationError, HTTPClient::BadResponseError,
|
41
41
|
JSON::ParserError => e
|
42
42
|
results.error ||= {}
|
@@ -77,7 +77,7 @@ module BentoSearch
|
|
77
77
|
def args_to_search_url(arguments)
|
78
78
|
query = if arguments[:query].kind_of?(Hash)
|
79
79
|
# multi-field query
|
80
|
-
arguments[:query].collect {|field,
|
80
|
+
arguments[:query].collect {|field, query_value| fielded_query(query_value, field)}.join(" ")
|
81
81
|
else
|
82
82
|
fielded_query(arguments[:query], arguments[:search_field])
|
83
83
|
end
|
@@ -85,7 +85,7 @@ module BentoSearch
|
|
85
85
|
# We need to escape this for going in a PATH component,
|
86
86
|
# not a query. So space can't be "+", it needs to be "%20",
|
87
87
|
# and indeed DOAJ API does not like "+".
|
88
|
-
#
|
88
|
+
#
|
89
89
|
# But neither CGI.escape nor URI.escape does quite
|
90
90
|
# the right kind of escaping, seems to work out
|
91
91
|
# if we do CGI.escape but then replace '+'
|
@@ -98,7 +98,7 @@ module BentoSearch
|
|
98
98
|
if arguments[:per_page]
|
99
99
|
query_args["pageSize"] = arguments[:per_page]
|
100
100
|
end
|
101
|
-
|
101
|
+
|
102
102
|
if arguments[:page]
|
103
103
|
query_args["page"] = arguments[:page]
|
104
104
|
end
|
@@ -115,14 +115,14 @@ module BentoSearch
|
|
115
115
|
return url
|
116
116
|
end
|
117
117
|
|
118
|
-
# Prepares a DOAJ API (elastic search) query component for
|
118
|
+
# Prepares a DOAJ API (elastic search) query component for
|
119
119
|
# given textual query in a given field (or default non-fielded search)
|
120
120
|
#
|
121
121
|
# Separates query string into tokens (bare words and phrases),
|
122
122
|
# so they can each be made mandatory for ElasticSearch. Default
|
123
123
|
# DOAJ API makes them all optional, with a very low mm, which
|
124
124
|
# leads to low-precision odd looking results for standard use
|
125
|
-
# cases.
|
125
|
+
# cases.
|
126
126
|
#
|
127
127
|
# Escapes all remaining special characters as literals (not including
|
128
128
|
# double quotes which can be used for phrases, which are respected. )
|
@@ -133,7 +133,7 @@ module BentoSearch
|
|
133
133
|
#
|
134
134
|
# The "+" prefixed before field-name is to make sure all separate
|
135
135
|
# fields are also mandatory when doing multi-field searches. It should
|
136
|
-
# make no difference for a single-field search.
|
136
|
+
# make no difference for a single-field search.
|
137
137
|
def fielded_query(query, field = nil)
|
138
138
|
if field.present?
|
139
139
|
"+#{field}:(#{prepare_mandatory_terms(query)})"
|
@@ -143,12 +143,12 @@ module BentoSearch
|
|
143
143
|
end
|
144
144
|
|
145
145
|
# Takes a query string, prepares an ElasticSearch query
|
146
|
-
# doing what we want:
|
146
|
+
# doing what we want:
|
147
147
|
# * tokenizes into bare words and double-quoted phrases
|
148
148
|
# * Escapes other punctuation to be literal not ElasticSearch operator.
|
149
149
|
# (Does NOT do URI escaping)
|
150
|
-
# * Makes each token mandatory with an ElasticSearch "+" operator prefixed.
|
151
|
-
def prepare_mandatory_terms(query)
|
150
|
+
# * Makes each token mandatory with an ElasticSearch "+" operator prefixed.
|
151
|
+
def prepare_mandatory_terms(query)
|
152
152
|
# use string split with regex to too-cleverly split into space
|
153
153
|
# seperated terms and phrases, keeping phrases as unit.
|
154
154
|
terms = query.split %r{[[:space:]]+|("[^"]+")}
|
@@ -174,13 +174,13 @@ module BentoSearch
|
|
174
174
|
|
175
175
|
item.start_page = bibjson["start_page"]
|
176
176
|
item.end_page = bibjson["end_page"]
|
177
|
-
|
177
|
+
|
178
178
|
item.year = bibjson["year"]
|
179
179
|
if (year = bibjson["year"].to_i) && (month = bibjson["month"].to_i)
|
180
180
|
if year != 0 && month != 0
|
181
181
|
item.publication_date = Date.new(bibjson["year"].to_i, bibjson["month"].to_i)
|
182
182
|
end
|
183
|
-
end
|
183
|
+
end
|
184
184
|
|
185
185
|
item.abstract = sanitize(bibjson["abstract"]) if bibjson.has_key?("abstract")
|
186
186
|
|
@@ -222,9 +222,9 @@ module BentoSearch
|
|
222
222
|
# punctuation that needs to be escaped and how to escape (backslash)
|
223
223
|
# for ES documented here: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html
|
224
224
|
#
|
225
|
-
# We do not escape double quotes, want to allow them for phrases.
|
225
|
+
# We do not escape double quotes, want to allow them for phrases.
|
226
226
|
#
|
227
|
-
# This method does NOT return URI-escaped, it returns literal, escaped for ES.
|
227
|
+
# This method does NOT return URI-escaped, it returns literal, escaped for ES.
|
228
228
|
def escape_query(q)
|
229
229
|
q.gsub(/([\+\-\=\&\|\>\<\!\(\)\{\}\[\]\^\~\*\?\:\\\/])/) {|m| "\\#{$1}"}
|
230
230
|
end
|
@@ -242,7 +242,7 @@ module BentoSearch
|
|
242
242
|
{ nil => {:semantic => :general},
|
243
243
|
"bibjson.title" => {:semantic => :title},
|
244
244
|
# Using 'exact' seems to produce much better results for
|
245
|
-
# author, don't entirely understand what's up.
|
245
|
+
# author, don't entirely understand what's up.
|
246
246
|
"bibjson.author.name" => {:semantic => :author},
|
247
247
|
"publisher" => {:semantic => :publisher},
|
248
248
|
"bibjson.subject.term" => {:semantic => :subject},
|
@@ -263,7 +263,7 @@ module BentoSearch
|
|
263
263
|
|
264
264
|
def sort_definitions
|
265
265
|
# Don't believe DOAJ supports sorting by author
|
266
|
-
{
|
266
|
+
{
|
267
267
|
"relevance" => {:implementation => nil}, # default
|
268
268
|
"title" => {:implementation => "title:asc"},
|
269
269
|
# We don't quite have publication date sorting, but we'll use
|
@@ -276,4 +276,4 @@ module BentoSearch
|
|
276
276
|
end
|
277
277
|
|
278
278
|
end
|
279
|
-
end
|
279
|
+
end
|
@@ -131,14 +131,14 @@ class BentoSearch::EbscoHostEngine
|
|
131
131
|
url = query_url(args)
|
132
132
|
|
133
133
|
Rails.logger.debug("EbscoHostEngine Search for: #{url}")
|
134
|
-
|
134
|
+
|
135
135
|
results = BentoSearch::Results.new
|
136
136
|
xml, response, exception = nil, nil, nil
|
137
137
|
|
138
138
|
begin
|
139
139
|
response = http_client.get(url)
|
140
140
|
xml = Nokogiri::XML(response.body)
|
141
|
-
rescue
|
141
|
+
rescue BentoSearch::RubyTimeoutClass, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError => e
|
142
142
|
exception = e
|
143
143
|
end
|
144
144
|
# error handle
|
@@ -361,7 +361,7 @@ class BentoSearch::EbscoHostEngine
|
|
361
361
|
query = if args[:query].kind_of?(Hash)
|
362
362
|
# multi-field query
|
363
363
|
args[:query].collect {|field, query| fielded_query(query, field)}.join(" AND ")
|
364
|
-
else
|
364
|
+
else
|
365
365
|
fielded_query(args[:query], args[:search_field])
|
366
366
|
end
|
367
367
|
|
@@ -7,25 +7,25 @@ require 'http_client_patch/include_client'
|
|
7
7
|
|
8
8
|
|
9
9
|
#
|
10
|
-
# For EBSCO Discovery Service. You will need a license to use.
|
10
|
+
# For EBSCO Discovery Service. You will need a license to use.
|
11
11
|
#
|
12
12
|
# == Required Configuration
|
13
13
|
#
|
14
|
-
# user_id, password: As given be EBSCO for access to EDS API (may be an admin account in ebscoadmin? Not sure).
|
14
|
+
# user_id, password: As given be EBSCO for access to EDS API (may be an admin account in ebscoadmin? Not sure).
|
15
15
|
# profile: As given by EBSCO, might be "edsapi"?
|
16
16
|
#
|
17
17
|
# == Highlighting
|
18
18
|
#
|
19
|
-
# EDS has a query-in-context highlighting feature. It is used by defualt, set
|
20
|
-
# config 'highlighting' to false to disable.
|
19
|
+
# EDS has a query-in-context highlighting feature. It is used by defualt, set
|
20
|
+
# config 'highlighting' to false to disable.
|
21
21
|
# If turned on, you may get <b class="bento_search_highlight"> tags
|
22
|
-
# in title and abstract output if it's on, marked html_safe.
|
22
|
+
# in title and abstract output if it's on, marked html_safe.
|
23
23
|
#
|
24
24
|
#
|
25
25
|
# == Linking
|
26
26
|
#
|
27
27
|
# The link to record in EBSCO interface delivered as "PLink" will be listed
|
28
|
-
# as record main link.
|
28
|
+
# as record main link.
|
29
29
|
#
|
30
30
|
# Any links listed under <CustomLinks> will be listed as other_links, using
|
31
31
|
# configured name provided by EBSCO for CustomLink.
|
@@ -34,26 +34,26 @@ require 'http_client_patch/include_client'
|
|
34
34
|
# ourselves. However, in our testing, the first/only CustomLink was an
|
35
35
|
# an OpenURL. If configuration.assume_first_custom_link_openurl is
|
36
36
|
# true (as is default), it will be used to create an OpenURL link. However, in
|
37
|
-
# our testing, many records don't have this at all. **Note** You want
|
37
|
+
# our testing, many records don't have this at all. **Note** You want
|
38
38
|
# to configure your profile so OpenURLs are ALWAYS included for all records, not
|
39
39
|
# just records with no EBSCO fulltext, to ensure bento_search can get the
|
40
40
|
# openurl. http://support.ebsco.com/knowledge_base/detail.php?id=1111 (May
|
41
|
-
# have to ask EBSCO support for help, it's confusing!).
|
41
|
+
# have to ask EBSCO support for help, it's confusing!).
|
42
42
|
#
|
43
43
|
# TODO: May have to add configuration code to pull the OpenURL link out by
|
44
|
-
# it's configured name or label, not assume first one is it.
|
44
|
+
# it's configured name or label, not assume first one is it.
|
45
45
|
#
|
46
|
-
# As always, you can customize links and other_links with Item Decorators.
|
46
|
+
# As always, you can customize links and other_links with Item Decorators.
|
47
47
|
#
|
48
48
|
# == Technical Notes and Difficulties
|
49
49
|
#
|
50
50
|
# This API is enormously difficult to work with. Also the response is very odd
|
51
51
|
# to deal with and missing some key elements. We quite possibly got something
|
52
|
-
# wrong or non-optimal in this implementation, but we did our best.
|
52
|
+
# wrong or non-optimal in this implementation, but we did our best.
|
53
53
|
#
|
54
54
|
# Auth issues may make this slow -- you need to spend a (not too speedy) HTTP
|
55
55
|
# request making a session for every new end-user -- as we have no way to keep
|
56
|
-
# track of end-users, we do it on every request in this implementation.
|
56
|
+
# track of end-users, we do it on every request in this implementation.
|
57
57
|
#
|
58
58
|
# Responses don't include much metadata -- we don't actually have journal title,
|
59
59
|
# volume, issue, etc. We probably _could_ parse it out of the OpenURL that's
|
@@ -61,91 +61,91 @@ require 'http_client_patch/include_client'
|
|
61
61
|
# Instead we're using the chunk of user-displayable citation/reference it does
|
62
62
|
# give us (which is very difficult to parse into something usable already),
|
63
63
|
# and a custom Decorator to display that instead of normalized citation
|
64
|
-
# made from individual elements.
|
64
|
+
# made from individual elements.
|
65
65
|
#
|
66
|
-
# EBSCO says they plan to improve some of these issues in a September 2012 release.
|
66
|
+
# EBSCO says they plan to improve some of these issues in a September 2012 release.
|
67
67
|
#
|
68
68
|
# Title and abstract data seems to be HTML with tags and character entities and
|
69
|
-
# escaped special chars. We're trusting it and passing it on as html_safe.
|
69
|
+
# escaped special chars. We're trusting it and passing it on as html_safe.
|
70
70
|
#
|
71
71
|
# Paging can only happen on even pages, with 'page' rather than 'start'. But
|
72
|
-
# you can pass in 'start' to bento_search, it'll be converted to closest page.
|
72
|
+
# you can pass in 'start' to bento_search, it'll be converted to closest page.
|
73
73
|
#
|
74
74
|
# == Authenticated Users
|
75
75
|
#
|
76
|
-
# EDS allows searches by unauthenticated users, but the results come back with
|
76
|
+
# EDS allows searches by unauthenticated users, but the results come back with
|
77
77
|
# weird blank hits. In such a case, the BentoSearch adapter will return
|
78
78
|
# records with virtually no metadata, but a title e
|
79
79
|
# (I18n at bento_search.eds.record_not_available ). Also no abstracts
|
80
|
-
# are available from unauth search.
|
80
|
+
# are available from unauth search.
|
81
81
|
#
|
82
82
|
# By default the engine will search as 'guest' unauth user. But config
|
83
83
|
# 'auth' key to true to force all searches to auth (if you are protecting your
|
84
|
-
# app) or pass :auth => true as param into #search method.
|
84
|
+
# app) or pass :auth => true as param into #search method.
|
85
85
|
#
|
86
86
|
# == Source Types
|
87
87
|
# # What the EBSCO 'source types' mean: http://suprpot.ebsco.com/knowledge_base/detail.php?id=5382
|
88
88
|
#
|
89
|
-
# But "Dissertations" not "Dissertations/Theses". "Music Scores" not "Music Score".
|
89
|
+
# But "Dissertations" not "Dissertations/Theses". "Music Scores" not "Music Score".
|
90
90
|
|
91
91
|
#
|
92
92
|
# == EDS docs:
|
93
|
-
#
|
94
|
-
# * Console App to demo requests: https://eds-api.ebscohost.com/Console
|
93
|
+
#
|
94
|
+
# * Console App to demo requests: https://eds-api.ebscohost.com/Console
|
95
95
|
# * EDS Wiki: http://edswiki.ebscohost.com/EDS_API_Documentation
|
96
96
|
# * You'll need to request an account to the EDS wiki, see: http://support.ebsco.com/knowledge_base/detail.php?id=5990
|
97
|
-
#
|
97
|
+
#
|
98
98
|
|
99
99
|
class BentoSearch::EdsEngine
|
100
100
|
include BentoSearch::SearchEngine
|
101
|
-
|
101
|
+
|
102
102
|
# Can't change http timeout in config, because we keep an http
|
103
|
-
# client at class-wide level, and config is not class-wide.
|
104
|
-
# Change this 'constant' if you want to change it, I guess.
|
103
|
+
# client at class-wide level, and config is not class-wide.
|
104
|
+
# Change this 'constant' if you want to change it, I guess.
|
105
105
|
HttpTimeout = 4
|
106
|
-
extend HTTPClientPatch::IncludeClient
|
106
|
+
extend HTTPClientPatch::IncludeClient
|
107
107
|
include_http_client do |client|
|
108
108
|
client.connect_timeout = client.send_timeout = client.receive_timeout = HttpTimeout
|
109
109
|
end
|
110
|
-
|
110
|
+
|
111
111
|
AuthHeader = "x-authenticationToken"
|
112
112
|
SessionTokenHeader = "x-sessionToken"
|
113
113
|
|
114
114
|
@@remembered_auth = nil
|
115
115
|
@@remembered_auth_lock = Mutex.new
|
116
116
|
# Class variable to save current known good auth
|
117
|
-
# uses a mutex to be threadsafe. sigh.
|
117
|
+
# uses a mutex to be threadsafe. sigh.
|
118
118
|
def self.remembered_auth
|
119
|
-
@@remembered_auth_lock.synchronize do
|
119
|
+
@@remembered_auth_lock.synchronize do
|
120
120
|
@@remembered_auth
|
121
121
|
end
|
122
122
|
end
|
123
|
-
# Set class variable with current known good auth.
|
124
|
-
# uses a mutex to be threadsafe.
|
123
|
+
# Set class variable with current known good auth.
|
124
|
+
# uses a mutex to be threadsafe.
|
125
125
|
def self.remembered_auth=(token)
|
126
126
|
@@remembered_auth_lock.synchronize do
|
127
127
|
@@remembered_auth = token
|
128
128
|
end
|
129
129
|
end
|
130
|
-
|
130
|
+
|
131
131
|
# an object that includes some Rails helper modules for
|
132
|
-
# text handling.
|
132
|
+
# text handling.
|
133
133
|
def helper
|
134
|
-
unless @helper
|
134
|
+
unless @helper ||= nil
|
135
135
|
@helper = Object.new
|
136
136
|
@helper.extend ActionView::Helpers::TextHelper # for truncate
|
137
137
|
@helper.extend ActionView::Helpers::OutputSafetyHelper # for safe_join
|
138
138
|
end
|
139
139
|
return @helper
|
140
140
|
end
|
141
|
-
|
142
|
-
|
141
|
+
|
142
|
+
|
143
143
|
def self.required_configuration
|
144
144
|
%w{user_id password profile}
|
145
145
|
end
|
146
|
-
|
146
|
+
|
147
147
|
# From config or args, args over-ride config
|
148
|
-
def authenticated_end_user?(args)
|
148
|
+
def authenticated_end_user?(args)
|
149
149
|
config = configuration.auth ? true : false
|
150
150
|
arg = args[:auth]
|
151
151
|
if ! arg.nil?
|
@@ -156,94 +156,94 @@ class BentoSearch::EdsEngine
|
|
156
156
|
false
|
157
157
|
end
|
158
158
|
end
|
159
|
-
|
159
|
+
|
160
160
|
def construct_search_url(args)
|
161
161
|
query = "AND,"
|
162
162
|
if args[:search_field]
|
163
163
|
query += "#{args[:search_field]}:"
|
164
164
|
end
|
165
165
|
# Can't have any commas in query, it turns out, although
|
166
|
-
# this is not documented.
|
166
|
+
# this is not documented.
|
167
167
|
query += args[:query].gsub(",", " ")
|
168
|
-
|
168
|
+
|
169
169
|
url = "#{configuration.base_url}search?view=detailed&query=#{CGI.escape query}"
|
170
|
-
|
170
|
+
|
171
171
|
url += "&searchmode=#{CGI.escape configuration.search_mode}"
|
172
|
-
|
172
|
+
|
173
173
|
url += "&highlight=#{configuration.highlighting ? 'y' : 'n' }"
|
174
|
-
|
174
|
+
|
175
175
|
if args[:per_page]
|
176
176
|
url += "&resultsperpage=#{args[:per_page]}"
|
177
177
|
end
|
178
178
|
if args[:page]
|
179
179
|
url += "&pagenumber=#{args[:page]}"
|
180
180
|
end
|
181
|
-
|
181
|
+
|
182
182
|
if args[:sort]
|
183
183
|
if (defn = self.sort_definitions[args[:sort]]) &&
|
184
184
|
(value = defn[:implementation] )
|
185
185
|
url += "&sort=#{CGI.escape value}"
|
186
186
|
end
|
187
187
|
end
|
188
|
-
|
188
|
+
|
189
189
|
if configuration.only_source_types.present?
|
190
190
|
# facetfilter=1,SourceType:Research Starters,SourceType:Books
|
191
191
|
url += "&facetfilter=" + CGI.escape("1," + configuration.only_source_types.collect {|t| "SourceType:#{t}"}.join(","))
|
192
192
|
end
|
193
|
-
|
194
|
-
|
193
|
+
|
194
|
+
|
195
195
|
return url
|
196
196
|
end
|
197
|
-
|
198
|
-
|
199
|
-
|
197
|
+
|
198
|
+
|
199
|
+
|
200
200
|
def search_implementation(args)
|
201
201
|
results = BentoSearch::Results.new
|
202
|
-
|
202
|
+
|
203
203
|
end_user_auth = authenticated_end_user? args
|
204
|
-
|
204
|
+
|
205
205
|
begin
|
206
206
|
with_session(end_user_auth) do |session_token|
|
207
|
-
|
207
|
+
|
208
208
|
url = construct_search_url(args)
|
209
|
-
|
210
|
-
|
211
|
-
|
209
|
+
|
210
|
+
|
211
|
+
|
212
212
|
response = get_with_auth(url, session_token)
|
213
|
-
|
213
|
+
|
214
214
|
results = BentoSearch::Results.new
|
215
|
-
|
216
|
-
if (hits_node = at_xpath_text(response, "./SearchResponseMessageGet/SearchResult/Statistics/TotalHits"))
|
215
|
+
|
216
|
+
if (hits_node = at_xpath_text(response, "./SearchResponseMessageGet/SearchResult/Statistics/TotalHits"))
|
217
217
|
results.total_items = hits_node.to_i
|
218
218
|
end
|
219
|
-
|
219
|
+
|
220
220
|
response.xpath("./SearchResponseMessageGet/SearchResult/Data/Records/Record").each do |record_xml|
|
221
221
|
item = BentoSearch::ResultItem.new
|
222
|
-
|
222
|
+
|
223
223
|
item.title = prepare_eds_payload( element_by_group(record_xml, "Ti"), true )
|
224
|
-
|
224
|
+
|
225
225
|
# To get a unique id, we need to pull out db code and accession number
|
226
|
-
# and combine em with colon, accession number is not unique by itself.
|
226
|
+
# and combine em with colon, accession number is not unique by itself.
|
227
227
|
db = record_xml.at_xpath("./Header/DbId").try(:text)
|
228
228
|
accession = record_xml.at_xpath("./Header/An").try(:text)
|
229
229
|
if db && accession
|
230
230
|
item.unique_id = "#{db}:#{accession}"
|
231
231
|
end
|
232
|
-
|
233
|
-
|
232
|
+
|
233
|
+
|
234
234
|
if item.title.nil? && ! end_user_auth
|
235
235
|
item.title = I18n.translate("bento_search.eds.record_not_available")
|
236
236
|
end
|
237
|
-
|
237
|
+
|
238
238
|
item.abstract = prepare_eds_payload( element_by_group(record_xml, "Ab"), true )
|
239
239
|
|
240
240
|
# Believe it or not, the authors are encoded as an escaped
|
241
241
|
# XML-ish payload, that we need to parse again and get the
|
242
242
|
# actual authors out of. WTF. Thanks for handling fragments
|
243
|
-
# nokogiri.
|
243
|
+
# nokogiri.
|
244
244
|
author_mess = element_by_group(record_xml, "Au")
|
245
245
|
# only SOMETIMES does it have XML tags, other times it's straight text.
|
246
|
-
# ARGH.
|
246
|
+
# ARGH.
|
247
247
|
author_xml = Nokogiri::XML::fragment(author_mess)
|
248
248
|
searchLinks = author_xml.xpath(".//searchLink")
|
249
249
|
if searchLinks.size > 0
|
@@ -253,14 +253,14 @@ class BentoSearch::EdsEngine
|
|
253
253
|
else
|
254
254
|
item.authors << BentoSearch::Author.new(:display => author_xml.text)
|
255
255
|
end
|
256
|
-
|
257
|
-
|
256
|
+
|
257
|
+
|
258
258
|
# PLink is main inward facing EBSCO link, put it as
|
259
|
-
# main link.
|
259
|
+
# main link.
|
260
260
|
if direct_link = record_xml.at_xpath("./PLink")
|
261
261
|
item.link = direct_link.text
|
262
262
|
end
|
263
|
-
|
263
|
+
|
264
264
|
# Other links may be found in CustomLinks, it seems like usually
|
265
265
|
# there will be at least one, hopefully the first one is the OpenURL?
|
266
266
|
record_xml.xpath("./CustomLinks/CustomLink").each do |custom_link|
|
@@ -269,51 +269,51 @@ class BentoSearch::EdsEngine
|
|
269
269
|
:label => custom_link.at_xpath("./Name").text
|
270
270
|
)
|
271
271
|
end
|
272
|
-
|
272
|
+
|
273
273
|
if (configuration.assume_first_custom_link_openurl &&
|
274
274
|
(first = record_xml.xpath "./CustomLinks/CustomLink" ) &&
|
275
275
|
(node = first.at_xpath "./Url" )
|
276
276
|
)
|
277
|
-
|
277
|
+
|
278
278
|
openurl = node.text
|
279
|
-
|
279
|
+
|
280
280
|
index = openurl.index('?')
|
281
|
-
item.openurl_kev_co = openurl.slice index..(openurl.length) if index
|
281
|
+
item.openurl_kev_co = openurl.slice index..(openurl.length) if index
|
282
282
|
end
|
283
283
|
|
284
|
-
# Format.
|
284
|
+
# Format.
|
285
285
|
item.format_str = at_xpath_text record_xml, "./Header/PubType"
|
286
286
|
# Can't find a list of possible PubTypes to see what's there to try
|
287
|
-
# and map to our internal controlled vocab. oh wells.
|
288
|
-
|
289
|
-
|
290
|
-
|
287
|
+
# and map to our internal controlled vocab. oh wells.
|
288
|
+
|
289
|
+
|
290
|
+
|
291
291
|
# We have a single blob of human-readable citation, that's also
|
292
292
|
# littered with XML-ish tags we need to deal with. We'll save
|
293
293
|
# it in a custom location, and use a custom Decorator to display
|
294
294
|
# it. Sorry it's way too hard for us to preserve <highlight>
|
295
295
|
# tags in this mess, they will be lost. Probably don't
|
296
|
-
# need highlighting in source anyhow.
|
296
|
+
# need highlighting in source anyhow.
|
297
297
|
citation_mess = element_by_group(record_xml, "Src")
|
298
298
|
# Argh, but sometimes it's in SrcInfo _without_ tags instead
|
299
|
-
if citation_mess
|
299
|
+
if citation_mess
|
300
300
|
citation_txt = Nokogiri::XML::fragment(citation_mess).text
|
301
301
|
# But strip off some "count of references" often on the end
|
302
|
-
# which are confusing and useless.
|
302
|
+
# which are confusing and useless.
|
303
303
|
item.custom_data["citation_blob"] = citation_txt.gsub(/ref +\d+ +ref\.$/, '')
|
304
304
|
else
|
305
305
|
# try another location
|
306
306
|
item.custom_data["citation_blob"] = element_by_group(record_xml, "SrcInfo")
|
307
307
|
end
|
308
|
-
|
309
|
-
|
308
|
+
|
309
|
+
|
310
310
|
item.extend CitationMessDecorator
|
311
|
-
|
311
|
+
|
312
312
|
results << item
|
313
|
-
end
|
313
|
+
end
|
314
314
|
end
|
315
|
-
|
316
|
-
return results
|
315
|
+
|
316
|
+
return results
|
317
317
|
rescue EdsCommException => e
|
318
318
|
results.error ||= {}
|
319
319
|
results.error[:exception] = e
|
@@ -321,137 +321,137 @@ class BentoSearch::EdsEngine
|
|
321
321
|
results.error[:http_body] = e.http_body
|
322
322
|
return results
|
323
323
|
end
|
324
|
-
|
324
|
+
|
325
325
|
end
|
326
|
-
|
326
|
+
|
327
327
|
# Difficult to get individual elements out of an EDS XML <Record>
|
328
|
-
# response, requires weird xpath, so we do it for you.
|
328
|
+
# response, requires weird xpath, so we do it for you.
|
329
329
|
# element_by_group(nokogiri_element, "Ti")
|
330
330
|
#
|
331
331
|
# Returns string or nil
|
332
332
|
def element_by_group(noko, group)
|
333
333
|
at_xpath_text(noko, "./Items/Item[child::Group[text()='#{group}']]/Data")
|
334
334
|
end
|
335
|
-
|
335
|
+
|
336
336
|
# Wraps calls to the EDS api with CreateSession and EndSession requests
|
337
337
|
# to EDS. Will pass sessionID in yield from block.
|
338
338
|
#
|
339
339
|
# Second optional arg is whether this is an authenticated user, else
|
340
|
-
# guest access will be used.
|
340
|
+
# guest access will be used.
|
341
341
|
#
|
342
342
|
# with_session(true) do |session_token|
|
343
343
|
# # can make more requests using session_token,
|
344
|
-
# # EndSession will be called for you at end of block.
|
344
|
+
# # EndSession will be called for you at end of block.
|
345
345
|
# end
|
346
346
|
def with_session(auth = false, &block)
|
347
|
-
auth_token = self.class.remembered_auth
|
347
|
+
auth_token = self.class.remembered_auth
|
348
348
|
if auth_token.nil?
|
349
349
|
auth_token = self.class.remembered_auth = get_auth_token
|
350
350
|
end
|
351
|
-
|
352
|
-
|
353
|
-
create_url = "#{configuration.base_url}createsession?profile=#{configuration.profile}&guest=#{auth ? 'n' : 'y'}"
|
354
|
-
response_xml = get_with_auth(create_url)
|
355
|
-
|
351
|
+
|
352
|
+
|
353
|
+
create_url = "#{configuration.base_url}createsession?profile=#{configuration.profile}&guest=#{auth ? 'n' : 'y'}"
|
354
|
+
response_xml = get_with_auth(create_url)
|
355
|
+
|
356
356
|
session_token = nil
|
357
|
-
unless response_xml && (session_token = at_xpath_text(response_xml, "//SessionToken"))
|
358
|
-
e = EdsCommException.new("Could not get SessionToken")
|
357
|
+
unless response_xml && (session_token = at_xpath_text(response_xml, "//SessionToken"))
|
358
|
+
e = EdsCommException.new("Could not get SessionToken")
|
359
359
|
end
|
360
|
-
|
361
|
-
begin
|
360
|
+
|
361
|
+
begin
|
362
362
|
block.yield(session_token)
|
363
|
-
ensure
|
364
|
-
if auth_token && session_token
|
363
|
+
ensure
|
364
|
+
if auth_token && session_token
|
365
365
|
end_url = "#{configuration.base_url}endsession?sessiontoken=#{CGI.escape session_token}"
|
366
|
-
response_xml = get_with_auth(end_url)
|
366
|
+
response_xml = get_with_auth(end_url)
|
367
367
|
end
|
368
368
|
end
|
369
|
-
|
369
|
+
|
370
370
|
end
|
371
|
-
|
372
|
-
# if the xpath responds, return #text of it, else nil.
|
371
|
+
|
372
|
+
# if the xpath responds, return #text of it, else nil.
|
373
373
|
def at_xpath_text(noko, xpath)
|
374
374
|
node = noko.at_xpath(xpath)
|
375
|
-
|
375
|
+
|
376
376
|
if node.nil?
|
377
377
|
return node
|
378
378
|
else
|
379
379
|
return node.text
|
380
380
|
end
|
381
381
|
end
|
382
|
-
|
382
|
+
|
383
383
|
# If EDS has put highlighting tags
|
384
384
|
# in a field, we need to HTML escape the literal values,
|
385
385
|
# while still using the highlighting tokens to put
|
386
386
|
# HTML tags around highlighted terms.
|
387
387
|
#
|
388
388
|
# Second param, if to assume EDS literals are safe HTML, as they
|
389
|
-
# seem to be.
|
389
|
+
# seem to be.
|
390
390
|
def prepare_eds_payload(str, html_safe = false)
|
391
391
|
return str if str.blank?
|
392
|
-
|
392
|
+
|
393
393
|
unless configuration.highlighting
|
394
|
-
str = str.html_safe if html_safe
|
394
|
+
str = str.html_safe if html_safe
|
395
395
|
return str
|
396
396
|
end
|
397
|
-
|
398
|
-
parts =
|
397
|
+
|
398
|
+
parts =
|
399
399
|
str.split(%r{(</?highlight>)}).collect do |substr|
|
400
400
|
case substr
|
401
401
|
when "<highlight>" then "<b class='bento_search_highlight'>".html_safe
|
402
402
|
when "</highlight>" then "</b>".html_safe
|
403
|
-
# Yes, EDS gives us HTML in the literals, we're choosing to trust it.
|
403
|
+
# Yes, EDS gives us HTML in the literals, we're choosing to trust it.
|
404
404
|
else substr.html_safe
|
405
405
|
end
|
406
406
|
end
|
407
|
-
|
408
|
-
return helper.safe_join(parts, '')
|
407
|
+
|
408
|
+
return helper.safe_join(parts, '')
|
409
409
|
end
|
410
|
-
|
410
|
+
|
411
411
|
# Give it a url pointing at EDS API.
|
412
|
-
# Second arg must be a session_token if EDS request requires one.
|
413
|
-
# It will
|
412
|
+
# Second arg must be a session_token if EDS request requires one.
|
413
|
+
# It will
|
414
414
|
# * Make a GET request
|
415
415
|
# * with memo-ized auth token added to headers
|
416
416
|
# * for XML, with all namespaces removed!
|
417
417
|
# * Parse JSON into a hash and return hash
|
418
418
|
# * Try ONCE more to get if EBSCO says bad auth token
|
419
419
|
# * Raise an EdsCommException if can't auth after second try,
|
420
|
-
# or other error message, or JSON can't be parsed.
|
420
|
+
# or other error message, or JSON can't be parsed.
|
421
421
|
def get_with_auth(url, session_token = nil)
|
422
422
|
auth_token = self.class.remembered_auth
|
423
423
|
unless auth_token
|
424
424
|
auth_token = self.class.remembered_auth = get_auth_token
|
425
425
|
end
|
426
|
-
|
426
|
+
|
427
427
|
response = nil
|
428
428
|
response_xml = nil
|
429
429
|
caught_exception = nil
|
430
|
-
|
430
|
+
|
431
431
|
begin
|
432
432
|
headers = {AuthHeader => auth_token, 'Accept' => 'application/xml'}
|
433
433
|
headers[SessionTokenHeader] = session_token if session_token
|
434
|
-
|
434
|
+
|
435
435
|
s_time = Time.now
|
436
436
|
response = http_client.get(url, nil, headers)
|
437
437
|
Rails.logger.debug("EDS timing GET: #{Time.now - s_time}:#{url}")
|
438
|
-
|
438
|
+
|
439
439
|
response_xml = Nokogiri::XML(response.body)
|
440
440
|
response_xml.remove_namespaces!
|
441
|
-
|
441
|
+
|
442
442
|
if (at_xpath_text(response_xml, "//ErrorNumber") == "104") || (at_xpath_text(response_xml, "//ErrorDescription") == "Auth Token Invalid")
|
443
443
|
# bad auth, try again just ONCE
|
444
444
|
Rails.logger.debug("EDS auth failed, getting auth again")
|
445
|
-
|
445
|
+
|
446
446
|
headers[AuthHeader] = self.class.remembered_auth = get_auth_token
|
447
447
|
response = http_client.get(url, nil, headers)
|
448
448
|
response_xml = Nokogiri::XML(response.body)
|
449
|
-
response_xml.remove_namespaces!
|
450
|
-
end
|
451
|
-
rescue
|
449
|
+
response_xml.remove_namespaces!
|
450
|
+
end
|
451
|
+
rescue BentoSearch::RubyTimeoutClass, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError => e
|
452
452
|
caught_exception = e
|
453
453
|
end
|
454
|
-
|
454
|
+
|
455
455
|
if response.nil? || response_xml.nil? || caught_exception || (! HTTP::Status.successful? response.status)
|
456
456
|
exception = EdsCommException.new("Error fetching URL: #{caught_exception.message if caught_exception} : #{url}")
|
457
457
|
if response
|
@@ -460,49 +460,49 @@ class BentoSearch::EdsEngine
|
|
460
460
|
end
|
461
461
|
raise exception
|
462
462
|
end
|
463
|
-
|
463
|
+
|
464
464
|
return response_xml
|
465
465
|
end
|
466
|
-
|
467
|
-
|
468
|
-
# Has to make an HTTP request to get EBSCO's auth token.
|
466
|
+
|
467
|
+
|
468
|
+
# Has to make an HTTP request to get EBSCO's auth token.
|
469
469
|
# returns the auth token. We aren't bothering to keep
|
470
470
|
# track of the expiration ourselves, can't neccesarily trust
|
471
|
-
# it anyway.
|
471
|
+
# it anyway.
|
472
472
|
#
|
473
|
-
# Raises an EdsCommException on error.
|
474
|
-
def get_auth_token
|
473
|
+
# Raises an EdsCommException on error.
|
474
|
+
def get_auth_token
|
475
475
|
# Can't send params as form-encoded, actually need to send a JSON or XML
|
476
|
-
# body, argh.
|
477
|
-
|
476
|
+
# body, argh.
|
477
|
+
|
478
478
|
body = <<-EOS
|
479
479
|
{
|
480
480
|
"UserId":"#{configuration.user_id}",
|
481
481
|
"Password":"#{configuration.password}"
|
482
482
|
}
|
483
483
|
EOS
|
484
|
-
|
484
|
+
|
485
485
|
s_time = Time.now
|
486
486
|
response = http_client.post(configuration.auth_url, body, {'Accept' => "application/json", "Content-type" => "application/json"})
|
487
|
-
Rails.logger.debug("EDS timing AUTH: #{Time.now - s_time}s")
|
488
|
-
|
487
|
+
Rails.logger.debug("EDS timing AUTH: #{Time.now - s_time}s")
|
488
|
+
|
489
489
|
unless HTTP::Status.successful? response.status
|
490
490
|
raise EdsCommException.new("Could not get auth", response.status, response.body)
|
491
491
|
end
|
492
|
-
|
492
|
+
|
493
493
|
response_hash = nil
|
494
494
|
begin
|
495
495
|
response_hash = MultiJson.load response.body
|
496
496
|
rescue MultiJson::DecodeError
|
497
497
|
end
|
498
|
-
|
498
|
+
|
499
499
|
unless response_hash.kind_of?(Hash) && response_hash.has_key?("AuthToken")
|
500
500
|
raise EdsCommException.new("AuthToken not found in auth response", response.status, response.body)
|
501
501
|
end
|
502
|
-
|
503
|
-
return response_hash["AuthToken"]
|
502
|
+
|
503
|
+
return response_hash["AuthToken"]
|
504
504
|
end
|
505
|
-
|
505
|
+
|
506
506
|
def self.default_configuration
|
507
507
|
{
|
508
508
|
:auth_url => 'https://eds-api.ebscohost.com/authservice/rest/uidauth',
|
@@ -513,15 +513,15 @@ class BentoSearch::EdsEngine
|
|
513
513
|
:search_mode => 'all' # any | bool | all | smart ; http://support.epnet.com/knowledge_base/detail.php?topic=996&id=1288&page=1
|
514
514
|
}
|
515
515
|
end
|
516
|
-
|
517
|
-
def sort_definitions
|
518
|
-
{
|
516
|
+
|
517
|
+
def sort_definitions
|
518
|
+
{
|
519
519
|
"date_desc" => {:implementation => "date"},
|
520
520
|
"relevance" => {:implementation => "relevance" }
|
521
521
|
# "date_asc" => {:implementaiton => "date2"}
|
522
522
|
}
|
523
523
|
end
|
524
|
-
|
524
|
+
|
525
525
|
def search_field_definitions
|
526
526
|
{
|
527
527
|
"TX" => {:semantic => :general},
|
@@ -534,11 +534,11 @@ class BentoSearch::EdsEngine
|
|
534
534
|
"IB" => {:semantic => :isbn},
|
535
535
|
}
|
536
536
|
end
|
537
|
-
|
538
|
-
# an exception talking to EDS api.
|
537
|
+
|
538
|
+
# an exception talking to EDS api.
|
539
539
|
# there's a short reason in #message, but also
|
540
540
|
# possibly an http_status and http_body copied
|
541
|
-
# from error EDS response.
|
541
|
+
# from error EDS response.
|
542
542
|
class EdsCommException < ::BentoSearch::FetchError
|
543
543
|
attr_accessor :http_status, :http_body
|
544
544
|
def initialize(message, status = nil, body = nil)
|
@@ -547,16 +547,16 @@ class BentoSearch::EdsEngine
|
|
547
547
|
self.http_body = body
|
548
548
|
end
|
549
549
|
end
|
550
|
-
|
551
|
-
|
550
|
+
|
551
|
+
|
552
552
|
# A built-in decorator alwasy applied, that over-rides
|
553
553
|
# the ResultItem#published_in display method to use our mess blob
|
554
554
|
# of human readable citation, since we don't have individual elements
|
555
|
-
# to create it from in a normalized way.
|
555
|
+
# to create it from in a normalized way.
|
556
556
|
module CitationMessDecorator
|
557
557
|
def published_in
|
558
558
|
custom_data["citation_blob"]
|
559
559
|
end
|
560
560
|
end
|
561
|
-
|
561
|
+
|
562
562
|
end
|