bento_search 1.5.0 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +14 -16
- data/Rakefile +30 -11
- data/app/controllers/bento_search/search_controller.rb +29 -28
- data/app/models/bento_search/result_item.rb +10 -10
- data/app/models/bento_search/results/serialization.rb +22 -13
- data/app/models/bento_search/search_engine.rb +117 -117
- data/app/search_engines/bento_search/doaj_articles_engine.rb +19 -19
- data/app/search_engines/bento_search/ebsco_host_engine.rb +3 -3
- data/app/search_engines/bento_search/eds_engine.rb +166 -166
- data/app/search_engines/bento_search/google_books_engine.rb +2 -2
- data/app/search_engines/bento_search/scopus_engine.rb +87 -87
- data/app/search_engines/bento_search/summon_engine.rb +1 -1
- data/lib/bento_search.rb +12 -9
- data/lib/bento_search/version.rb +1 -1
- data/test/dummy/config/boot.rb +4 -9
- data/test/dummy/db/schema.rb +15 -0
- data/test/functional/bento_search/search_controller_test.rb +63 -57
- data/test/helper/bento_search_helper_test.rb +103 -103
- data/test/search_engines/doaj_articles_engine_test.rb +9 -9
- data/test/search_engines/search_engine_base_test.rb +86 -86
- data/test/search_engines/search_engine_test.rb +56 -56
- data/test/test_helper.rb +23 -12
- data/test/unit/multi_searcher_test.rb +18 -18
- data/test/unit/pagination_test.rb +12 -12
- metadata +6 -4
@@ -4,10 +4,10 @@ require 'http_client_patch/include_client'
|
|
4
4
|
require 'json'
|
5
5
|
|
6
6
|
module BentoSearch
|
7
|
-
# DOAJ Articles search.
|
7
|
+
# DOAJ Articles search.
|
8
8
|
# https://doaj.org/api/v1/docs
|
9
9
|
#
|
10
|
-
# Phrase searches with double quotes are respected.
|
10
|
+
# Phrase searches with double quotes are respected.
|
11
11
|
#
|
12
12
|
# Supports #get by unique_id feature
|
13
13
|
#
|
@@ -36,7 +36,7 @@ module BentoSearch
|
|
36
36
|
Rails.logger.debug("DoajEngine: requesting #{query_url}")
|
37
37
|
response = http_client.get( query_url )
|
38
38
|
json = JSON.parse(response.body)
|
39
|
-
rescue
|
39
|
+
rescue BentoSearch::RubyTimeoutClass, HTTPClient::TimeoutError,
|
40
40
|
HTTPClient::ConfigurationError, HTTPClient::BadResponseError,
|
41
41
|
JSON::ParserError => e
|
42
42
|
results.error ||= {}
|
@@ -77,7 +77,7 @@ module BentoSearch
|
|
77
77
|
def args_to_search_url(arguments)
|
78
78
|
query = if arguments[:query].kind_of?(Hash)
|
79
79
|
# multi-field query
|
80
|
-
arguments[:query].collect {|field,
|
80
|
+
arguments[:query].collect {|field, query_value| fielded_query(query_value, field)}.join(" ")
|
81
81
|
else
|
82
82
|
fielded_query(arguments[:query], arguments[:search_field])
|
83
83
|
end
|
@@ -85,7 +85,7 @@ module BentoSearch
|
|
85
85
|
# We need to escape this for going in a PATH component,
|
86
86
|
# not a query. So space can't be "+", it needs to be "%20",
|
87
87
|
# and indeed DOAJ API does not like "+".
|
88
|
-
#
|
88
|
+
#
|
89
89
|
# But neither CGI.escape nor URI.escape does quite
|
90
90
|
# the right kind of escaping, seems to work out
|
91
91
|
# if we do CGI.escape but then replace '+'
|
@@ -98,7 +98,7 @@ module BentoSearch
|
|
98
98
|
if arguments[:per_page]
|
99
99
|
query_args["pageSize"] = arguments[:per_page]
|
100
100
|
end
|
101
|
-
|
101
|
+
|
102
102
|
if arguments[:page]
|
103
103
|
query_args["page"] = arguments[:page]
|
104
104
|
end
|
@@ -115,14 +115,14 @@ module BentoSearch
|
|
115
115
|
return url
|
116
116
|
end
|
117
117
|
|
118
|
-
# Prepares a DOAJ API (elastic search) query component for
|
118
|
+
# Prepares a DOAJ API (elastic search) query component for
|
119
119
|
# given textual query in a given field (or default non-fielded search)
|
120
120
|
#
|
121
121
|
# Separates query string into tokens (bare words and phrases),
|
122
122
|
# so they can each be made mandatory for ElasticSearch. Default
|
123
123
|
# DOAJ API makes them all optional, with a very low mm, which
|
124
124
|
# leads to low-precision odd looking results for standard use
|
125
|
-
# cases.
|
125
|
+
# cases.
|
126
126
|
#
|
127
127
|
# Escapes all remaining special characters as literals (not including
|
128
128
|
# double quotes which can be used for phrases, which are respected. )
|
@@ -133,7 +133,7 @@ module BentoSearch
|
|
133
133
|
#
|
134
134
|
# The "+" prefixed before field-name is to make sure all separate
|
135
135
|
# fields are also mandatory when doing multi-field searches. It should
|
136
|
-
# make no difference for a single-field search.
|
136
|
+
# make no difference for a single-field search.
|
137
137
|
def fielded_query(query, field = nil)
|
138
138
|
if field.present?
|
139
139
|
"+#{field}:(#{prepare_mandatory_terms(query)})"
|
@@ -143,12 +143,12 @@ module BentoSearch
|
|
143
143
|
end
|
144
144
|
|
145
145
|
# Takes a query string, prepares an ElasticSearch query
|
146
|
-
# doing what we want:
|
146
|
+
# doing what we want:
|
147
147
|
# * tokenizes into bare words and double-quoted phrases
|
148
148
|
# * Escapes other punctuation to be literal not ElasticSearch operator.
|
149
149
|
# (Does NOT do URI escaping)
|
150
|
-
# * Makes each token mandatory with an ElasticSearch "+" operator prefixed.
|
151
|
-
def prepare_mandatory_terms(query)
|
150
|
+
# * Makes each token mandatory with an ElasticSearch "+" operator prefixed.
|
151
|
+
def prepare_mandatory_terms(query)
|
152
152
|
# use string split with regex to too-cleverly split into space
|
153
153
|
# seperated terms and phrases, keeping phrases as unit.
|
154
154
|
terms = query.split %r{[[:space:]]+|("[^"]+")}
|
@@ -174,13 +174,13 @@ module BentoSearch
|
|
174
174
|
|
175
175
|
item.start_page = bibjson["start_page"]
|
176
176
|
item.end_page = bibjson["end_page"]
|
177
|
-
|
177
|
+
|
178
178
|
item.year = bibjson["year"]
|
179
179
|
if (year = bibjson["year"].to_i) && (month = bibjson["month"].to_i)
|
180
180
|
if year != 0 && month != 0
|
181
181
|
item.publication_date = Date.new(bibjson["year"].to_i, bibjson["month"].to_i)
|
182
182
|
end
|
183
|
-
end
|
183
|
+
end
|
184
184
|
|
185
185
|
item.abstract = sanitize(bibjson["abstract"]) if bibjson.has_key?("abstract")
|
186
186
|
|
@@ -222,9 +222,9 @@ module BentoSearch
|
|
222
222
|
# punctuation that needs to be escaped and how to escape (backslash)
|
223
223
|
# for ES documented here: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html
|
224
224
|
#
|
225
|
-
# We do not escape double quotes, want to allow them for phrases.
|
225
|
+
# We do not escape double quotes, want to allow them for phrases.
|
226
226
|
#
|
227
|
-
# This method does NOT return URI-escaped, it returns literal, escaped for ES.
|
227
|
+
# This method does NOT return URI-escaped, it returns literal, escaped for ES.
|
228
228
|
def escape_query(q)
|
229
229
|
q.gsub(/([\+\-\=\&\|\>\<\!\(\)\{\}\[\]\^\~\*\?\:\\\/])/) {|m| "\\#{$1}"}
|
230
230
|
end
|
@@ -242,7 +242,7 @@ module BentoSearch
|
|
242
242
|
{ nil => {:semantic => :general},
|
243
243
|
"bibjson.title" => {:semantic => :title},
|
244
244
|
# Using 'exact' seems to produce much better results for
|
245
|
-
# author, don't entirely understand what's up.
|
245
|
+
# author, don't entirely understand what's up.
|
246
246
|
"bibjson.author.name" => {:semantic => :author},
|
247
247
|
"publisher" => {:semantic => :publisher},
|
248
248
|
"bibjson.subject.term" => {:semantic => :subject},
|
@@ -263,7 +263,7 @@ module BentoSearch
|
|
263
263
|
|
264
264
|
def sort_definitions
|
265
265
|
# Don't believe DOAJ supports sorting by author
|
266
|
-
{
|
266
|
+
{
|
267
267
|
"relevance" => {:implementation => nil}, # default
|
268
268
|
"title" => {:implementation => "title:asc"},
|
269
269
|
# We don't quite have publication date sorting, but we'll use
|
@@ -276,4 +276,4 @@ module BentoSearch
|
|
276
276
|
end
|
277
277
|
|
278
278
|
end
|
279
|
-
end
|
279
|
+
end
|
@@ -131,14 +131,14 @@ class BentoSearch::EbscoHostEngine
|
|
131
131
|
url = query_url(args)
|
132
132
|
|
133
133
|
Rails.logger.debug("EbscoHostEngine Search for: #{url}")
|
134
|
-
|
134
|
+
|
135
135
|
results = BentoSearch::Results.new
|
136
136
|
xml, response, exception = nil, nil, nil
|
137
137
|
|
138
138
|
begin
|
139
139
|
response = http_client.get(url)
|
140
140
|
xml = Nokogiri::XML(response.body)
|
141
|
-
rescue
|
141
|
+
rescue BentoSearch::RubyTimeoutClass, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError => e
|
142
142
|
exception = e
|
143
143
|
end
|
144
144
|
# error handle
|
@@ -361,7 +361,7 @@ class BentoSearch::EbscoHostEngine
|
|
361
361
|
query = if args[:query].kind_of?(Hash)
|
362
362
|
# multi-field query
|
363
363
|
args[:query].collect {|field, query| fielded_query(query, field)}.join(" AND ")
|
364
|
-
else
|
364
|
+
else
|
365
365
|
fielded_query(args[:query], args[:search_field])
|
366
366
|
end
|
367
367
|
|
@@ -7,25 +7,25 @@ require 'http_client_patch/include_client'
|
|
7
7
|
|
8
8
|
|
9
9
|
#
|
10
|
-
# For EBSCO Discovery Service. You will need a license to use.
|
10
|
+
# For EBSCO Discovery Service. You will need a license to use.
|
11
11
|
#
|
12
12
|
# == Required Configuration
|
13
13
|
#
|
14
|
-
# user_id, password: As given be EBSCO for access to EDS API (may be an admin account in ebscoadmin? Not sure).
|
14
|
+
# user_id, password: As given be EBSCO for access to EDS API (may be an admin account in ebscoadmin? Not sure).
|
15
15
|
# profile: As given by EBSCO, might be "edsapi"?
|
16
16
|
#
|
17
17
|
# == Highlighting
|
18
18
|
#
|
19
|
-
# EDS has a query-in-context highlighting feature. It is used by defualt, set
|
20
|
-
# config 'highlighting' to false to disable.
|
19
|
+
# EDS has a query-in-context highlighting feature. It is used by defualt, set
|
20
|
+
# config 'highlighting' to false to disable.
|
21
21
|
# If turned on, you may get <b class="bento_search_highlight"> tags
|
22
|
-
# in title and abstract output if it's on, marked html_safe.
|
22
|
+
# in title and abstract output if it's on, marked html_safe.
|
23
23
|
#
|
24
24
|
#
|
25
25
|
# == Linking
|
26
26
|
#
|
27
27
|
# The link to record in EBSCO interface delivered as "PLink" will be listed
|
28
|
-
# as record main link.
|
28
|
+
# as record main link.
|
29
29
|
#
|
30
30
|
# Any links listed under <CustomLinks> will be listed as other_links, using
|
31
31
|
# configured name provided by EBSCO for CustomLink.
|
@@ -34,26 +34,26 @@ require 'http_client_patch/include_client'
|
|
34
34
|
# ourselves. However, in our testing, the first/only CustomLink was an
|
35
35
|
# an OpenURL. If configuration.assume_first_custom_link_openurl is
|
36
36
|
# true (as is default), it will be used to create an OpenURL link. However, in
|
37
|
-
# our testing, many records don't have this at all. **Note** You want
|
37
|
+
# our testing, many records don't have this at all. **Note** You want
|
38
38
|
# to configure your profile so OpenURLs are ALWAYS included for all records, not
|
39
39
|
# just records with no EBSCO fulltext, to ensure bento_search can get the
|
40
40
|
# openurl. http://support.ebsco.com/knowledge_base/detail.php?id=1111 (May
|
41
|
-
# have to ask EBSCO support for help, it's confusing!).
|
41
|
+
# have to ask EBSCO support for help, it's confusing!).
|
42
42
|
#
|
43
43
|
# TODO: May have to add configuration code to pull the OpenURL link out by
|
44
|
-
# it's configured name or label, not assume first one is it.
|
44
|
+
# it's configured name or label, not assume first one is it.
|
45
45
|
#
|
46
|
-
# As always, you can customize links and other_links with Item Decorators.
|
46
|
+
# As always, you can customize links and other_links with Item Decorators.
|
47
47
|
#
|
48
48
|
# == Technical Notes and Difficulties
|
49
49
|
#
|
50
50
|
# This API is enormously difficult to work with. Also the response is very odd
|
51
51
|
# to deal with and missing some key elements. We quite possibly got something
|
52
|
-
# wrong or non-optimal in this implementation, but we did our best.
|
52
|
+
# wrong or non-optimal in this implementation, but we did our best.
|
53
53
|
#
|
54
54
|
# Auth issues may make this slow -- you need to spend a (not too speedy) HTTP
|
55
55
|
# request making a session for every new end-user -- as we have no way to keep
|
56
|
-
# track of end-users, we do it on every request in this implementation.
|
56
|
+
# track of end-users, we do it on every request in this implementation.
|
57
57
|
#
|
58
58
|
# Responses don't include much metadata -- we don't actually have journal title,
|
59
59
|
# volume, issue, etc. We probably _could_ parse it out of the OpenURL that's
|
@@ -61,91 +61,91 @@ require 'http_client_patch/include_client'
|
|
61
61
|
# Instead we're using the chunk of user-displayable citation/reference it does
|
62
62
|
# give us (which is very difficult to parse into something usable already),
|
63
63
|
# and a custom Decorator to display that instead of normalized citation
|
64
|
-
# made from individual elements.
|
64
|
+
# made from individual elements.
|
65
65
|
#
|
66
|
-
# EBSCO says they plan to improve some of these issues in a September 2012 release.
|
66
|
+
# EBSCO says they plan to improve some of these issues in a September 2012 release.
|
67
67
|
#
|
68
68
|
# Title and abstract data seems to be HTML with tags and character entities and
|
69
|
-
# escaped special chars. We're trusting it and passing it on as html_safe.
|
69
|
+
# escaped special chars. We're trusting it and passing it on as html_safe.
|
70
70
|
#
|
71
71
|
# Paging can only happen on even pages, with 'page' rather than 'start'. But
|
72
|
-
# you can pass in 'start' to bento_search, it'll be converted to closest page.
|
72
|
+
# you can pass in 'start' to bento_search, it'll be converted to closest page.
|
73
73
|
#
|
74
74
|
# == Authenticated Users
|
75
75
|
#
|
76
|
-
# EDS allows searches by unauthenticated users, but the results come back with
|
76
|
+
# EDS allows searches by unauthenticated users, but the results come back with
|
77
77
|
# weird blank hits. In such a case, the BentoSearch adapter will return
|
78
78
|
# records with virtually no metadata, but a title e
|
79
79
|
# (I18n at bento_search.eds.record_not_available ). Also no abstracts
|
80
|
-
# are available from unauth search.
|
80
|
+
# are available from unauth search.
|
81
81
|
#
|
82
82
|
# By default the engine will search as 'guest' unauth user. But config
|
83
83
|
# 'auth' key to true to force all searches to auth (if you are protecting your
|
84
|
-
# app) or pass :auth => true as param into #search method.
|
84
|
+
# app) or pass :auth => true as param into #search method.
|
85
85
|
#
|
86
86
|
# == Source Types
|
87
87
|
# # What the EBSCO 'source types' mean: http://suprpot.ebsco.com/knowledge_base/detail.php?id=5382
|
88
88
|
#
|
89
|
-
# But "Dissertations" not "Dissertations/Theses". "Music Scores" not "Music Score".
|
89
|
+
# But "Dissertations" not "Dissertations/Theses". "Music Scores" not "Music Score".
|
90
90
|
|
91
91
|
#
|
92
92
|
# == EDS docs:
|
93
|
-
#
|
94
|
-
# * Console App to demo requests: https://eds-api.ebscohost.com/Console
|
93
|
+
#
|
94
|
+
# * Console App to demo requests: https://eds-api.ebscohost.com/Console
|
95
95
|
# * EDS Wiki: http://edswiki.ebscohost.com/EDS_API_Documentation
|
96
96
|
# * You'll need to request an account to the EDS wiki, see: http://support.ebsco.com/knowledge_base/detail.php?id=5990
|
97
|
-
#
|
97
|
+
#
|
98
98
|
|
99
99
|
class BentoSearch::EdsEngine
|
100
100
|
include BentoSearch::SearchEngine
|
101
|
-
|
101
|
+
|
102
102
|
# Can't change http timeout in config, because we keep an http
|
103
|
-
# client at class-wide level, and config is not class-wide.
|
104
|
-
# Change this 'constant' if you want to change it, I guess.
|
103
|
+
# client at class-wide level, and config is not class-wide.
|
104
|
+
# Change this 'constant' if you want to change it, I guess.
|
105
105
|
HttpTimeout = 4
|
106
|
-
extend HTTPClientPatch::IncludeClient
|
106
|
+
extend HTTPClientPatch::IncludeClient
|
107
107
|
include_http_client do |client|
|
108
108
|
client.connect_timeout = client.send_timeout = client.receive_timeout = HttpTimeout
|
109
109
|
end
|
110
|
-
|
110
|
+
|
111
111
|
AuthHeader = "x-authenticationToken"
|
112
112
|
SessionTokenHeader = "x-sessionToken"
|
113
113
|
|
114
114
|
@@remembered_auth = nil
|
115
115
|
@@remembered_auth_lock = Mutex.new
|
116
116
|
# Class variable to save current known good auth
|
117
|
-
# uses a mutex to be threadsafe. sigh.
|
117
|
+
# uses a mutex to be threadsafe. sigh.
|
118
118
|
def self.remembered_auth
|
119
|
-
@@remembered_auth_lock.synchronize do
|
119
|
+
@@remembered_auth_lock.synchronize do
|
120
120
|
@@remembered_auth
|
121
121
|
end
|
122
122
|
end
|
123
|
-
# Set class variable with current known good auth.
|
124
|
-
# uses a mutex to be threadsafe.
|
123
|
+
# Set class variable with current known good auth.
|
124
|
+
# uses a mutex to be threadsafe.
|
125
125
|
def self.remembered_auth=(token)
|
126
126
|
@@remembered_auth_lock.synchronize do
|
127
127
|
@@remembered_auth = token
|
128
128
|
end
|
129
129
|
end
|
130
|
-
|
130
|
+
|
131
131
|
# an object that includes some Rails helper modules for
|
132
|
-
# text handling.
|
132
|
+
# text handling.
|
133
133
|
def helper
|
134
|
-
unless @helper
|
134
|
+
unless @helper ||= nil
|
135
135
|
@helper = Object.new
|
136
136
|
@helper.extend ActionView::Helpers::TextHelper # for truncate
|
137
137
|
@helper.extend ActionView::Helpers::OutputSafetyHelper # for safe_join
|
138
138
|
end
|
139
139
|
return @helper
|
140
140
|
end
|
141
|
-
|
142
|
-
|
141
|
+
|
142
|
+
|
143
143
|
def self.required_configuration
|
144
144
|
%w{user_id password profile}
|
145
145
|
end
|
146
|
-
|
146
|
+
|
147
147
|
# From config or args, args over-ride config
|
148
|
-
def authenticated_end_user?(args)
|
148
|
+
def authenticated_end_user?(args)
|
149
149
|
config = configuration.auth ? true : false
|
150
150
|
arg = args[:auth]
|
151
151
|
if ! arg.nil?
|
@@ -156,94 +156,94 @@ class BentoSearch::EdsEngine
|
|
156
156
|
false
|
157
157
|
end
|
158
158
|
end
|
159
|
-
|
159
|
+
|
160
160
|
def construct_search_url(args)
|
161
161
|
query = "AND,"
|
162
162
|
if args[:search_field]
|
163
163
|
query += "#{args[:search_field]}:"
|
164
164
|
end
|
165
165
|
# Can't have any commas in query, it turns out, although
|
166
|
-
# this is not documented.
|
166
|
+
# this is not documented.
|
167
167
|
query += args[:query].gsub(",", " ")
|
168
|
-
|
168
|
+
|
169
169
|
url = "#{configuration.base_url}search?view=detailed&query=#{CGI.escape query}"
|
170
|
-
|
170
|
+
|
171
171
|
url += "&searchmode=#{CGI.escape configuration.search_mode}"
|
172
|
-
|
172
|
+
|
173
173
|
url += "&highlight=#{configuration.highlighting ? 'y' : 'n' }"
|
174
|
-
|
174
|
+
|
175
175
|
if args[:per_page]
|
176
176
|
url += "&resultsperpage=#{args[:per_page]}"
|
177
177
|
end
|
178
178
|
if args[:page]
|
179
179
|
url += "&pagenumber=#{args[:page]}"
|
180
180
|
end
|
181
|
-
|
181
|
+
|
182
182
|
if args[:sort]
|
183
183
|
if (defn = self.sort_definitions[args[:sort]]) &&
|
184
184
|
(value = defn[:implementation] )
|
185
185
|
url += "&sort=#{CGI.escape value}"
|
186
186
|
end
|
187
187
|
end
|
188
|
-
|
188
|
+
|
189
189
|
if configuration.only_source_types.present?
|
190
190
|
# facetfilter=1,SourceType:Research Starters,SourceType:Books
|
191
191
|
url += "&facetfilter=" + CGI.escape("1," + configuration.only_source_types.collect {|t| "SourceType:#{t}"}.join(","))
|
192
192
|
end
|
193
|
-
|
194
|
-
|
193
|
+
|
194
|
+
|
195
195
|
return url
|
196
196
|
end
|
197
|
-
|
198
|
-
|
199
|
-
|
197
|
+
|
198
|
+
|
199
|
+
|
200
200
|
def search_implementation(args)
|
201
201
|
results = BentoSearch::Results.new
|
202
|
-
|
202
|
+
|
203
203
|
end_user_auth = authenticated_end_user? args
|
204
|
-
|
204
|
+
|
205
205
|
begin
|
206
206
|
with_session(end_user_auth) do |session_token|
|
207
|
-
|
207
|
+
|
208
208
|
url = construct_search_url(args)
|
209
|
-
|
210
|
-
|
211
|
-
|
209
|
+
|
210
|
+
|
211
|
+
|
212
212
|
response = get_with_auth(url, session_token)
|
213
|
-
|
213
|
+
|
214
214
|
results = BentoSearch::Results.new
|
215
|
-
|
216
|
-
if (hits_node = at_xpath_text(response, "./SearchResponseMessageGet/SearchResult/Statistics/TotalHits"))
|
215
|
+
|
216
|
+
if (hits_node = at_xpath_text(response, "./SearchResponseMessageGet/SearchResult/Statistics/TotalHits"))
|
217
217
|
results.total_items = hits_node.to_i
|
218
218
|
end
|
219
|
-
|
219
|
+
|
220
220
|
response.xpath("./SearchResponseMessageGet/SearchResult/Data/Records/Record").each do |record_xml|
|
221
221
|
item = BentoSearch::ResultItem.new
|
222
|
-
|
222
|
+
|
223
223
|
item.title = prepare_eds_payload( element_by_group(record_xml, "Ti"), true )
|
224
|
-
|
224
|
+
|
225
225
|
# To get a unique id, we need to pull out db code and accession number
|
226
|
-
# and combine em with colon, accession number is not unique by itself.
|
226
|
+
# and combine em with colon, accession number is not unique by itself.
|
227
227
|
db = record_xml.at_xpath("./Header/DbId").try(:text)
|
228
228
|
accession = record_xml.at_xpath("./Header/An").try(:text)
|
229
229
|
if db && accession
|
230
230
|
item.unique_id = "#{db}:#{accession}"
|
231
231
|
end
|
232
|
-
|
233
|
-
|
232
|
+
|
233
|
+
|
234
234
|
if item.title.nil? && ! end_user_auth
|
235
235
|
item.title = I18n.translate("bento_search.eds.record_not_available")
|
236
236
|
end
|
237
|
-
|
237
|
+
|
238
238
|
item.abstract = prepare_eds_payload( element_by_group(record_xml, "Ab"), true )
|
239
239
|
|
240
240
|
# Believe it or not, the authors are encoded as an escaped
|
241
241
|
# XML-ish payload, that we need to parse again and get the
|
242
242
|
# actual authors out of. WTF. Thanks for handling fragments
|
243
|
-
# nokogiri.
|
243
|
+
# nokogiri.
|
244
244
|
author_mess = element_by_group(record_xml, "Au")
|
245
245
|
# only SOMETIMES does it have XML tags, other times it's straight text.
|
246
|
-
# ARGH.
|
246
|
+
# ARGH.
|
247
247
|
author_xml = Nokogiri::XML::fragment(author_mess)
|
248
248
|
searchLinks = author_xml.xpath(".//searchLink")
|
249
249
|
if searchLinks.size > 0
|
@@ -253,14 +253,14 @@ class BentoSearch::EdsEngine
|
|
253
253
|
else
|
254
254
|
item.authors << BentoSearch::Author.new(:display => author_xml.text)
|
255
255
|
end
|
256
|
-
|
257
|
-
|
256
|
+
|
257
|
+
|
258
258
|
# PLink is main inward facing EBSCO link, put it as
|
259
|
-
# main link.
|
259
|
+
# main link.
|
260
260
|
if direct_link = record_xml.at_xpath("./PLink")
|
261
261
|
item.link = direct_link.text
|
262
262
|
end
|
263
|
-
|
263
|
+
|
264
264
|
# Other links may be found in CustomLinks, it seems like usually
|
265
265
|
# there will be at least one, hopefully the first one is the OpenURL?
|
266
266
|
record_xml.xpath("./CustomLinks/CustomLink").each do |custom_link|
|
@@ -269,51 +269,51 @@ class BentoSearch::EdsEngine
|
|
269
269
|
:label => custom_link.at_xpath("./Name").text
|
270
270
|
)
|
271
271
|
end
|
272
|
-
|
272
|
+
|
273
273
|
if (configuration.assume_first_custom_link_openurl &&
|
274
274
|
(first = record_xml.xpath "./CustomLinks/CustomLink" ) &&
|
275
275
|
(node = first.at_xpath "./Url" )
|
276
276
|
)
|
277
|
-
|
277
|
+
|
278
278
|
openurl = node.text
|
279
|
-
|
279
|
+
|
280
280
|
index = openurl.index('?')
|
281
|
-
item.openurl_kev_co = openurl.slice index..(openurl.length) if index
|
281
|
+
item.openurl_kev_co = openurl.slice index..(openurl.length) if index
|
282
282
|
end
|
283
283
|
|
284
|
-
# Format.
|
284
|
+
# Format.
|
285
285
|
item.format_str = at_xpath_text record_xml, "./Header/PubType"
|
286
286
|
# Can't find a list of possible PubTypes to see what's there to try
|
287
|
-
# and map to our internal controlled vocab. oh wells.
|
288
|
-
|
289
|
-
|
290
|
-
|
287
|
+
# and map to our internal controlled vocab. oh wells.
|
288
|
+
|
289
|
+
|
290
|
+
|
291
291
|
# We have a single blob of human-readable citation, that's also
|
292
292
|
# littered with XML-ish tags we need to deal with. We'll save
|
293
293
|
# it in a custom location, and use a custom Decorator to display
|
294
294
|
# it. Sorry it's way too hard for us to preserve <highlight>
|
295
295
|
# tags in this mess, they will be lost. Probably don't
|
296
|
-
# need highlighting in source anyhow.
|
296
|
+
# need highlighting in source anyhow.
|
297
297
|
citation_mess = element_by_group(record_xml, "Src")
|
298
298
|
# Argh, but sometimes it's in SrcInfo _without_ tags instead
|
299
|
-
if citation_mess
|
299
|
+
if citation_mess
|
300
300
|
citation_txt = Nokogiri::XML::fragment(citation_mess).text
|
301
301
|
# But strip off some "count of references" often on the end
|
302
|
-
# which are confusing and useless.
|
302
|
+
# which are confusing and useless.
|
303
303
|
item.custom_data["citation_blob"] = citation_txt.gsub(/ref +\d+ +ref\.$/, '')
|
304
304
|
else
|
305
305
|
# try another location
|
306
306
|
item.custom_data["citation_blob"] = element_by_group(record_xml, "SrcInfo")
|
307
307
|
end
|
308
|
-
|
309
|
-
|
308
|
+
|
309
|
+
|
310
310
|
item.extend CitationMessDecorator
|
311
|
-
|
311
|
+
|
312
312
|
results << item
|
313
|
-
end
|
313
|
+
end
|
314
314
|
end
|
315
|
-
|
316
|
-
return results
|
315
|
+
|
316
|
+
return results
|
317
317
|
rescue EdsCommException => e
|
318
318
|
results.error ||= {}
|
319
319
|
results.error[:exception] = e
|
@@ -321,137 +321,137 @@ class BentoSearch::EdsEngine
|
|
321
321
|
results.error[:http_body] = e.http_body
|
322
322
|
return results
|
323
323
|
end
|
324
|
-
|
324
|
+
|
325
325
|
end
|
326
|
-
|
326
|
+
|
327
327
|
# Difficult to get individual elements out of an EDS XML <Record>
|
328
|
-
# response, requires weird xpath, so we do it for you.
|
328
|
+
# response, requires weird xpath, so we do it for you.
|
329
329
|
# element_by_group(nokogiri_element, "Ti")
|
330
330
|
#
|
331
331
|
# Returns string or nil
|
332
332
|
def element_by_group(noko, group)
|
333
333
|
at_xpath_text(noko, "./Items/Item[child::Group[text()='#{group}']]/Data")
|
334
334
|
end
|
335
|
-
|
335
|
+
|
336
336
|
# Wraps calls to the EDS api with CreateSession and EndSession requests
|
337
337
|
# to EDS. Will pass sessionID in yield from block.
|
338
338
|
#
|
339
339
|
# Second optional arg is whether this is an authenticated user, else
|
340
|
-
# guest access will be used.
|
340
|
+
# guest access will be used.
|
341
341
|
#
|
342
342
|
# with_session(true) do |session_token|
|
343
343
|
# # can make more requests using session_token,
|
344
|
-
# # EndSession will be called for you at end of block.
|
344
|
+
# # EndSession will be called for you at end of block.
|
345
345
|
# end
|
346
346
|
def with_session(auth = false, &block)
|
347
|
-
auth_token = self.class.remembered_auth
|
347
|
+
auth_token = self.class.remembered_auth
|
348
348
|
if auth_token.nil?
|
349
349
|
auth_token = self.class.remembered_auth = get_auth_token
|
350
350
|
end
|
351
|
-
|
352
|
-
|
353
|
-
create_url = "#{configuration.base_url}createsession?profile=#{configuration.profile}&guest=#{auth ? 'n' : 'y'}"
|
354
|
-
response_xml = get_with_auth(create_url)
|
355
|
-
|
351
|
+
|
352
|
+
|
353
|
+
create_url = "#{configuration.base_url}createsession?profile=#{configuration.profile}&guest=#{auth ? 'n' : 'y'}"
|
354
|
+
response_xml = get_with_auth(create_url)
|
355
|
+
|
356
356
|
session_token = nil
|
357
|
-
unless response_xml && (session_token = at_xpath_text(response_xml, "//SessionToken"))
|
358
|
-
e = EdsCommException.new("Could not get SessionToken")
|
357
|
+
unless response_xml && (session_token = at_xpath_text(response_xml, "//SessionToken"))
|
358
|
+
e = EdsCommException.new("Could not get SessionToken")
|
359
359
|
end
|
360
|
-
|
361
|
-
begin
|
360
|
+
|
361
|
+
begin
|
362
362
|
block.yield(session_token)
|
363
|
-
ensure
|
364
|
-
if auth_token && session_token
|
363
|
+
ensure
|
364
|
+
if auth_token && session_token
|
365
365
|
end_url = "#{configuration.base_url}endsession?sessiontoken=#{CGI.escape session_token}"
|
366
|
-
response_xml = get_with_auth(end_url)
|
366
|
+
response_xml = get_with_auth(end_url)
|
367
367
|
end
|
368
368
|
end
|
369
|
-
|
369
|
+
|
370
370
|
end
|
371
|
-
|
372
|
-
# if the xpath responds, return #text of it, else nil.
|
371
|
+
|
372
|
+
# if the xpath responds, return #text of it, else nil.
|
373
373
|
def at_xpath_text(noko, xpath)
|
374
374
|
node = noko.at_xpath(xpath)
|
375
|
-
|
375
|
+
|
376
376
|
if node.nil?
|
377
377
|
return node
|
378
378
|
else
|
379
379
|
return node.text
|
380
380
|
end
|
381
381
|
end
|
382
|
-
|
382
|
+
|
383
383
|
# If EDS has put highlighting tags
|
384
384
|
# in a field, we need to HTML escape the literal values,
|
385
385
|
# while still using the highlighting tokens to put
|
386
386
|
# HTML tags around highlighted terms.
|
387
387
|
#
|
388
388
|
# Second param, if to assume EDS literals are safe HTML, as they
|
389
|
-
# seem to be.
|
389
|
+
# seem to be.
|
390
390
|
def prepare_eds_payload(str, html_safe = false)
|
391
391
|
return str if str.blank?
|
392
|
-
|
392
|
+
|
393
393
|
unless configuration.highlighting
|
394
|
-
str = str.html_safe if html_safe
|
394
|
+
str = str.html_safe if html_safe
|
395
395
|
return str
|
396
396
|
end
|
397
|
-
|
398
|
-
parts =
|
397
|
+
|
398
|
+
parts =
|
399
399
|
str.split(%r{(</?highlight>)}).collect do |substr|
|
400
400
|
case substr
|
401
401
|
when "<highlight>" then "<b class='bento_search_highlight'>".html_safe
|
402
402
|
when "</highlight>" then "</b>".html_safe
|
403
|
-
# Yes, EDS gives us HTML in the literals, we're choosing to trust it.
|
403
|
+
# Yes, EDS gives us HTML in the literals, we're choosing to trust it.
|
404
404
|
else substr.html_safe
|
405
405
|
end
|
406
406
|
end
|
407
|
-
|
408
|
-
return helper.safe_join(parts, '')
|
407
|
+
|
408
|
+
return helper.safe_join(parts, '')
|
409
409
|
end
|
410
|
-
|
410
|
+
|
411
411
|
# Give it a url pointing at EDS API.
|
412
|
-
# Second arg must be a session_token if EDS request requires one.
|
413
|
-
# It will
|
412
|
+
# Second arg must be a session_token if EDS request requires one.
|
413
|
+
# It will
|
414
414
|
# * Make a GET request
|
415
415
|
# * with memo-ized auth token added to headers
|
416
416
|
# * for XML, with all namespaces removed!
|
417
417
|
# * Parse JSON into a hash and return hash
|
418
418
|
# * Try ONCE more to get if EBSCO says bad auth token
|
419
419
|
# * Raise an EdsCommException if can't auth after second try,
|
420
|
-
# or other error message, or JSON can't be parsed.
|
420
|
+
# or other error message, or JSON can't be parsed.
|
421
421
|
def get_with_auth(url, session_token = nil)
|
422
422
|
auth_token = self.class.remembered_auth
|
423
423
|
unless auth_token
|
424
424
|
auth_token = self.class.remembered_auth = get_auth_token
|
425
425
|
end
|
426
|
-
|
426
|
+
|
427
427
|
response = nil
|
428
428
|
response_xml = nil
|
429
429
|
caught_exception = nil
|
430
|
-
|
430
|
+
|
431
431
|
begin
|
432
432
|
headers = {AuthHeader => auth_token, 'Accept' => 'application/xml'}
|
433
433
|
headers[SessionTokenHeader] = session_token if session_token
|
434
|
-
|
434
|
+
|
435
435
|
s_time = Time.now
|
436
436
|
response = http_client.get(url, nil, headers)
|
437
437
|
Rails.logger.debug("EDS timing GET: #{Time.now - s_time}:#{url}")
|
438
|
-
|
438
|
+
|
439
439
|
response_xml = Nokogiri::XML(response.body)
|
440
440
|
response_xml.remove_namespaces!
|
441
|
-
|
441
|
+
|
442
442
|
if (at_xpath_text(response_xml, "//ErrorNumber") == "104") || (at_xpath_text(response_xml, "//ErrorDescription") == "Auth Token Invalid")
|
443
443
|
# bad auth, try again just ONCE
|
444
444
|
Rails.logger.debug("EDS auth failed, getting auth again")
|
445
|
-
|
445
|
+
|
446
446
|
headers[AuthHeader] = self.class.remembered_auth = get_auth_token
|
447
447
|
response = http_client.get(url, nil, headers)
|
448
448
|
response_xml = Nokogiri::XML(response.body)
|
449
|
-
response_xml.remove_namespaces!
|
450
|
-
end
|
451
|
-
rescue
|
449
|
+
response_xml.remove_namespaces!
|
450
|
+
end
|
451
|
+
rescue BentoSearch::RubyTimeoutClass, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError => e
|
452
452
|
caught_exception = e
|
453
453
|
end
|
454
|
-
|
454
|
+
|
455
455
|
if response.nil? || response_xml.nil? || caught_exception || (! HTTP::Status.successful? response.status)
|
456
456
|
exception = EdsCommException.new("Error fetching URL: #{caught_exception.message if caught_exception} : #{url}")
|
457
457
|
if response
|
@@ -460,49 +460,49 @@ class BentoSearch::EdsEngine
|
|
460
460
|
end
|
461
461
|
raise exception
|
462
462
|
end
|
463
|
-
|
463
|
+
|
464
464
|
return response_xml
|
465
465
|
end
|
466
|
-
|
467
|
-
|
468
|
-
# Has to make an HTTP request to get EBSCO's auth token.
|
466
|
+
|
467
|
+
|
468
|
+
# Has to make an HTTP request to get EBSCO's auth token.
|
469
469
|
# returns the auth token. We aren't bothering to keep
|
470
470
|
# track of the expiration ourselves, can't neccesarily trust
|
471
|
-
# it anyway.
|
471
|
+
# it anyway.
|
472
472
|
#
|
473
|
-
# Raises an EdsCommException on error.
|
474
|
-
def get_auth_token
|
473
|
+
# Raises an EdsCommException on error.
|
474
|
+
def get_auth_token
|
475
475
|
# Can't send params as form-encoded, actually need to send a JSON or XML
|
476
|
-
# body, argh.
|
477
|
-
|
476
|
+
# body, argh.
|
477
|
+
|
478
478
|
body = <<-EOS
|
479
479
|
{
|
480
480
|
"UserId":"#{configuration.user_id}",
|
481
481
|
"Password":"#{configuration.password}"
|
482
482
|
}
|
483
483
|
EOS
|
484
|
-
|
484
|
+
|
485
485
|
s_time = Time.now
|
486
486
|
response = http_client.post(configuration.auth_url, body, {'Accept' => "application/json", "Content-type" => "application/json"})
|
487
|
-
Rails.logger.debug("EDS timing AUTH: #{Time.now - s_time}s")
|
488
|
-
|
487
|
+
Rails.logger.debug("EDS timing AUTH: #{Time.now - s_time}s")
|
488
|
+
|
489
489
|
unless HTTP::Status.successful? response.status
|
490
490
|
raise EdsCommException.new("Could not get auth", response.status, response.body)
|
491
491
|
end
|
492
|
-
|
492
|
+
|
493
493
|
response_hash = nil
|
494
494
|
begin
|
495
495
|
response_hash = MultiJson.load response.body
|
496
496
|
rescue MultiJson::DecodeError
|
497
497
|
end
|
498
|
-
|
498
|
+
|
499
499
|
unless response_hash.kind_of?(Hash) && response_hash.has_key?("AuthToken")
|
500
500
|
raise EdsCommException.new("AuthToken not found in auth response", response.status, response.body)
|
501
501
|
end
|
502
|
-
|
503
|
-
return response_hash["AuthToken"]
|
502
|
+
|
503
|
+
return response_hash["AuthToken"]
|
504
504
|
end
|
505
|
-
|
505
|
+
|
506
506
|
def self.default_configuration
|
507
507
|
{
|
508
508
|
:auth_url => 'https://eds-api.ebscohost.com/authservice/rest/uidauth',
|
@@ -513,15 +513,15 @@ class BentoSearch::EdsEngine
|
|
513
513
|
:search_mode => 'all' # any | bool | all | smart ; http://support.epnet.com/knowledge_base/detail.php?topic=996&id=1288&page=1
|
514
514
|
}
|
515
515
|
end
|
516
|
-
|
517
|
-
def sort_definitions
|
518
|
-
{
|
516
|
+
|
517
|
+
def sort_definitions
|
518
|
+
{
|
519
519
|
"date_desc" => {:implementation => "date"},
|
520
520
|
"relevance" => {:implementation => "relevance" }
|
521
521
|
# "date_asc" => {:implementaiton => "date2"}
|
522
522
|
}
|
523
523
|
end
|
524
|
-
|
524
|
+
|
525
525
|
def search_field_definitions
|
526
526
|
{
|
527
527
|
"TX" => {:semantic => :general},
|
@@ -534,11 +534,11 @@ class BentoSearch::EdsEngine
|
|
534
534
|
"IB" => {:semantic => :isbn},
|
535
535
|
}
|
536
536
|
end
|
537
|
-
|
538
|
-
# an exception talking to EDS api.
|
537
|
+
|
538
|
+
# an exception talking to EDS api.
|
539
539
|
# there's a short reason in #message, but also
|
540
540
|
# possibly an http_status and http_body copied
|
541
|
-
# from error EDS response.
|
541
|
+
# from error EDS response.
|
542
542
|
class EdsCommException < ::BentoSearch::FetchError
|
543
543
|
attr_accessor :http_status, :http_body
|
544
544
|
def initialize(message, status = nil, body = nil)
|
@@ -547,16 +547,16 @@ class BentoSearch::EdsEngine
|
|
547
547
|
self.http_body = body
|
548
548
|
end
|
549
549
|
end
|
550
|
-
|
551
|
-
|
550
|
+
|
551
|
+
|
552
552
|
# A built-in decorator alwasy applied, that over-rides
|
553
553
|
# the ResultItem#published_in display method to use our mess blob
|
554
554
|
# of human readable citation, since we don't have individual elements
|
555
|
-
# to create it from in a normalized way.
|
555
|
+
# to create it from in a normalized way.
|
556
556
|
module CitationMessDecorator
|
557
557
|
def published_in
|
558
558
|
custom_data["citation_blob"]
|
559
559
|
end
|
560
560
|
end
|
561
|
-
|
561
|
+
|
562
562
|
end
|