bento_search 1.0.2 → 1.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/README.md +92 -90
- data/app/item_decorators/bento_search/decorator_base.rb +9 -6
- data/app/item_decorators/bento_search/standard_decorator.rb +24 -0
- data/app/search_engines/bento_search/ebsco_host_engine.rb +180 -179
- data/app/search_engines/bento_search/journal_tocs_for_journal.rb +179 -0
- data/app/views/bento_search/_std_item.html.erb +4 -4
- data/lib/bento_search/version.rb +1 -1
- data/test/decorator/decorator_base_test.rb +11 -1
- data/test/decorator/standard_decorator_test.rb +21 -0
- data/test/dummy/log/development.log +2 -0
- data/test/dummy/log/test.log +22324 -0
- data/test/{unit → search_engines}/ebsco_host_engine_test.rb +148 -130
- data/test/{unit → search_engines}/eds_engine_test.rb +0 -0
- data/test/{unit → search_engines}/google_books_engine_test.rb +0 -0
- data/test/{unit → search_engines}/google_site_search_test.rb +0 -0
- data/test/search_engines/journal_tocs_for_journal_test.rb +93 -0
- data/test/{unit → search_engines}/primo_engine_test.rb +0 -0
- data/test/{unit → search_engines}/scopus_engine_test.rb +0 -0
- data/test/{unit → search_engines}/search_engine_base_test.rb +0 -0
- data/test/{unit → search_engines}/search_engine_test.rb +0 -0
- data/test/{unit → search_engines}/summon_engine_test.rb +0 -0
- data/test/{unit → search_engines}/worldcat_sru_dc_engine_test.rb +0 -0
- data/test/{unit → search_engines}/xerxes_engine_test.rb +0 -0
- data/test/vcr_cassettes/ebscohost/RILM_record_with_ISSN_in__jid__element.yml +210 -0
- data/test/vcr_cassettes/journal_tocs/empty_results_on_bad_ISSN.yml +49 -0
- data/test/vcr_cassettes/journal_tocs/error_on_bad_registered_email.yml +41 -0
- data/test/vcr_cassettes/journal_tocs/error_on_error_response.yml +51 -0
- data/test/vcr_cassettes/journal_tocs/fetch_xml_with_hits.yml +328 -0
- data/test/vcr_cassettes/journal_tocs/fills_out_metadata.yml +396 -0
- data/test/vcr_cassettes/journal_tocs/smoke_test.yml +328 -0
- metadata +62 -61
@@ -50,18 +50,21 @@ module BentoSearch
|
|
50
50
|
end
|
51
51
|
|
52
52
|
# Applies decorator to item and returns decorated item.
|
53
|
-
#
|
54
|
-
#
|
55
|
-
#
|
53
|
+
# Will decide what decorator to apply based on String class name
|
54
|
+
# in item.decorator, or else apply StandardDecorator. The point of
|
55
|
+
# this method is just that logic, nothing else special.
|
56
56
|
#
|
57
57
|
# Need to pass a Rails ActionView::Context in, to use to
|
58
58
|
# initialize decorator. In Rails, in most places you can
|
59
59
|
# get one of those from #view_context. In helpers/views
|
60
60
|
# you can also use `self`.
|
61
61
|
def self.decorate(item, view_context)
|
62
|
-
# What decorator class?
|
63
|
-
#
|
64
|
-
|
62
|
+
# What decorator class? Specified in #decorator as a String,
|
63
|
+
# we intentionally do not allow an actual class constant, to
|
64
|
+
# maintain problem-free serialization of ItemResults and configuration.
|
65
|
+
decorator_class = item.decorator.try do |arg|
|
66
|
+
BentoSearch::Util.constantize(arg.to_s)
|
67
|
+
end || BentoSearch::StandardDecorator
|
65
68
|
|
66
69
|
return decorator_class.new(item, view_context)
|
67
70
|
end
|
@@ -149,6 +149,30 @@ module BentoSearch
|
|
149
149
|
return value.blank? ? nil : value
|
150
150
|
end
|
151
151
|
|
152
|
+
# outputs a date for display, from #publication_date or #year.
|
153
|
+
# Uses it's own logic to decide whether to output entire date or just
|
154
|
+
# year, if it has a complete date. (If volume and issue are present,
|
155
|
+
# just year).
|
156
|
+
#
|
157
|
+
# Over-ride in a decorator if you want to always or never or different
|
158
|
+
# logic for complete date. Or if you want to change the format of the date,
|
159
|
+
# etc.
|
160
|
+
def display_date
|
161
|
+
if self.publication_date
|
162
|
+
if self.volume && self.issue
|
163
|
+
# just the year, ma'am
|
164
|
+
I18n.localize(self.publication_date, :format => "%Y")
|
165
|
+
else
|
166
|
+
# whole date, since we got it
|
167
|
+
I18n.localize(self.publication_date, :format => "%d %b %Y")
|
168
|
+
end
|
169
|
+
elsif self.year
|
170
|
+
self.year.to_s
|
171
|
+
else
|
172
|
+
nil
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
152
176
|
# A unique opaque identifier for a record may sometimes be
|
153
177
|
# required, for instance in Atom.
|
154
178
|
#
|
@@ -5,7 +5,7 @@ require 'nokogiri'
|
|
5
5
|
require 'http_client_patch/include_client'
|
6
6
|
require 'httpclient'
|
7
7
|
|
8
|
-
# Right now for EbscoHost API (Ebsco Integration Toolkit/EIT),
|
8
|
+
# Right now for EbscoHost API (Ebsco Integration Toolkit/EIT),
|
9
9
|
# may be expanded or refactored for EDS too.
|
10
10
|
#
|
11
11
|
# == Required Configuration
|
@@ -22,28 +22,28 @@ require 'httpclient'
|
|
22
22
|
#
|
23
23
|
# [:peer_reviewed_only] Set to boolean true or string 'true', to restrict
|
24
24
|
# results to peer-reviewed only. (Or ask EBSCOHost
|
25
|
-
# api to do so, what we get is what we get).
|
25
|
+
# api to do so, what we get is what we get).
|
26
26
|
# [:pubyear_start]
|
27
27
|
# [:pubyear_end] Date range limiting, pass in custom search args,
|
28
28
|
# one or both of pubyear_start and pubyear_end
|
29
|
-
# #to_i will be called on it, so can be string.
|
29
|
+
# #to_i will be called on it, so can be string.
|
30
30
|
# .search(:query => "foo", :pubyear_start => 2000)
|
31
31
|
# [:databases] List of licensed EBSCO dbs to search, can override
|
32
|
-
# list set in config databases, just for this search.
|
33
|
-
#
|
32
|
+
# list set in config databases, just for this search.
|
33
|
+
#
|
34
34
|
# == Custom response data
|
35
|
-
#
|
36
|
-
# Iff EBSCO API reports that fulltext is available for the hit, then
|
35
|
+
#
|
36
|
+
# Iff EBSCO API reports that fulltext is available for the hit, then
|
37
37
|
# result.custom_data["fulltext_formats"] will be non-nil, and will be an array of
|
38
38
|
# one or more of EBSCO's internal codes (P=PDF, T=HTML, C=HTML+Images). If
|
39
39
|
# no fulltext is avail according to EBSCO API, result.custom_data["fulltext_formats"]
|
40
|
-
# will be nil.
|
40
|
+
# will be nil.
|
41
41
|
#
|
42
42
|
# #link_is_fulltext also set to true/false
|
43
43
|
#
|
44
44
|
# You can use this to, for instance, hyperlink the displayed title directly
|
45
45
|
# to record on EBSCO if and only if there's fulltext. By writing a custom
|
46
|
-
# decorator. See wiki on decorators.
|
46
|
+
# decorator. See wiki on decorators.
|
47
47
|
#
|
48
48
|
# == Limitations
|
49
49
|
# We do set language of ResultItems based on what ebsco tells us, but ebsoc
|
@@ -56,12 +56,12 @@ require 'httpclient'
|
|
56
56
|
# EBSCO that you want included in the search. You can't just say "all of them"
|
57
57
|
# the api doesn't support that, and also more than 30 or 40 starts getting
|
58
58
|
# horribly slow. If you include a db you do not have access to, EBSCO api
|
59
|
-
# fatal errors.
|
59
|
+
# fatal errors.
|
60
60
|
#
|
61
61
|
# You may want to make sure all your licensed databases are included
|
62
62
|
# in your EIT profile. Log onto ebscoadmin, Customize Services, choose
|
63
|
-
# EIT profile, choose 'databases' tag.
|
64
|
-
#
|
63
|
+
# EIT profile, choose 'databases' tag.
|
64
|
+
#
|
65
65
|
# === Download databases from EBSCO api
|
66
66
|
#
|
67
67
|
# We include a utility to download ALL activated databases for EIT profile
|
@@ -69,12 +69,12 @@ require 'httpclient'
|
|
69
69
|
# file as a starting point, and edit by hand:
|
70
70
|
#
|
71
71
|
# First configure your EBSCO search engine with bento_search, say under
|
72
|
-
# key 'ebscohost'.
|
72
|
+
# key 'ebscohost'.
|
73
73
|
#
|
74
74
|
# Then run:
|
75
75
|
# rails generate bento_search:pull_ebsco_dbs ebscohost
|
76
76
|
#
|
77
|
-
# assuming 'ebscohost' is the key you registered the EBSCO search engine.
|
77
|
+
# assuming 'ebscohost' is the key you registered the EBSCO search engine.
|
78
78
|
#
|
79
79
|
# This will create a file at ./config/ebsco_dbs.rb. You may want to hand
|
80
80
|
# edit it. Then, in your bento search config, you can:
|
@@ -85,7 +85,7 @@ require 'httpclient'
|
|
85
85
|
# conf.databases = $ebsco_dbs
|
86
86
|
# end
|
87
87
|
#
|
88
|
-
# == Vendor documentation
|
88
|
+
# == Vendor documentation
|
89
89
|
#
|
90
90
|
# Vendor documentation is a bit scattered, main page:
|
91
91
|
# * http://support.ebsco.com/eit/ws.php
|
@@ -93,10 +93,10 @@ require 'httpclient'
|
|
93
93
|
# * http://support.ebsco.com/eit/ws_faq.php
|
94
94
|
# * search syntax examples: http://support.ebsco.com/eit/ws_howto_queries.php
|
95
95
|
# * Try construct a query: http://eit.ebscohost.com/Pages/MethodDescription.aspx?service=/Services/SearchService.asmx&method=Search
|
96
|
-
# * The 'info' service can be used to see what databases you have access to.
|
96
|
+
# * The 'info' service can be used to see what databases you have access to.
|
97
97
|
# * DTD of XML Response, hard to interpret but all we've got: http://support.ebsco.com/eit/docs/DTD_EIT_WS_searchResponse.zip
|
98
98
|
#
|
99
|
-
# Hard to find docs page on embedding EBSCO limiters (like peer reviewed only "RV Y") in search query:
|
99
|
+
# Hard to find docs page on embedding EBSCO limiters (like peer reviewed only "RV Y") in search query:
|
100
100
|
# http://support.epnet.com/knowledge_base/detail.php?id=5397
|
101
101
|
#
|
102
102
|
# EBSCO searchable support portal has a section for the EIT api we use here:
|
@@ -104,20 +104,20 @@ require 'httpclient'
|
|
104
104
|
|
105
105
|
class BentoSearch::EbscoHostEngine
|
106
106
|
include BentoSearch::SearchEngine
|
107
|
-
|
107
|
+
|
108
108
|
# Can't change http timeout in config, because we keep an http
|
109
|
-
# client at class-wide level, and config is not class-wide.
|
109
|
+
# client at class-wide level, and config is not class-wide.
|
110
110
|
# Change this 'constant' if you want to change it, I guess.
|
111
111
|
#
|
112
112
|
# In some tests we did, 5.2s was 95th percentile slowest, but in
|
113
113
|
# actual percentage 5.2s is still timing out way too many requests,
|
114
|
-
# let's try 6.3, why not.
|
115
|
-
HttpTimeout = 6.3
|
116
|
-
extend HTTPClientPatch::IncludeClient
|
114
|
+
# let's try 6.3, why not.
|
115
|
+
HttpTimeout = 6.3
|
116
|
+
extend HTTPClientPatch::IncludeClient
|
117
117
|
include_http_client do |client|
|
118
118
|
client.connect_timeout = client.send_timeout = client.receive_timeout = HttpTimeout
|
119
119
|
end
|
120
|
-
|
120
|
+
|
121
121
|
# Include some rails helpers, text_helper.trucate
|
122
122
|
def text_helper
|
123
123
|
@@truncate ||= begin
|
@@ -126,10 +126,10 @@ class BentoSearch::EbscoHostEngine
|
|
126
126
|
o
|
127
127
|
end
|
128
128
|
end
|
129
|
-
|
129
|
+
|
130
130
|
def search_implementation(args)
|
131
131
|
url = query_url(args)
|
132
|
-
|
132
|
+
|
133
133
|
results = BentoSearch::Results.new
|
134
134
|
xml, response, exception = nil, nil, nil
|
135
135
|
|
@@ -137,85 +137,85 @@ class BentoSearch::EbscoHostEngine
|
|
137
137
|
response = http_client.get(url)
|
138
138
|
xml = Nokogiri::XML(response.body)
|
139
139
|
rescue TimeoutError, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError => e
|
140
|
-
exception = e
|
140
|
+
exception = e
|
141
141
|
end
|
142
142
|
# error handle
|
143
|
-
if ( response.nil? ||
|
144
|
-
xml.nil? ||
|
145
|
-
exception ||
|
143
|
+
if ( response.nil? ||
|
144
|
+
xml.nil? ||
|
145
|
+
exception ||
|
146
146
|
(! HTTP::Status.successful? response.status) ||
|
147
147
|
(fault = xml.at_xpath("./Fault")))
|
148
|
-
|
148
|
+
|
149
149
|
results.error ||= {}
|
150
150
|
results.error[:api_url] = url
|
151
151
|
results.error[:exception] = exception if exception
|
152
152
|
results.error[:status] = response.status if response
|
153
|
-
|
153
|
+
|
154
154
|
if fault
|
155
155
|
results.error[:error_info] = text_if_present fault.at_xpath("./Message")
|
156
156
|
end
|
157
|
-
|
157
|
+
|
158
158
|
return results
|
159
159
|
end
|
160
|
-
|
161
|
-
|
162
|
-
|
160
|
+
|
161
|
+
|
162
|
+
|
163
163
|
# the namespaces they provide are weird and don't help and sometimes
|
164
164
|
# not clearly even legal. Remove em!
|
165
165
|
xml.remove_namespaces!
|
166
|
-
|
166
|
+
|
167
167
|
results.total_items = xml.at_xpath("./searchResponse/Hits").text.to_i
|
168
|
-
|
168
|
+
|
169
169
|
xml.xpath("./searchResponse/SearchResults/records/rec").each do |xml_rec|
|
170
170
|
results << item_from_xml( xml_rec )
|
171
171
|
end
|
172
|
-
|
172
|
+
|
173
173
|
return results
|
174
|
-
|
174
|
+
|
175
175
|
end
|
176
|
-
|
176
|
+
|
177
177
|
# Method to get a single record by "identifier" string, which is really
|
178
178
|
# a combined "db:id" string, same string that would be returned by
|
179
179
|
# an individual item.identifier
|
180
180
|
#
|
181
181
|
# Returns an individual BentoSearch::Result, or raises an exception.
|
182
|
-
# Can raise BentoSearch::NotFound, BentoSearch::TooManyFound, or
|
182
|
+
# Can raise BentoSearch::NotFound, BentoSearch::TooManyFound, or
|
183
183
|
# any other weird random exception caused by problems fetching (network
|
184
184
|
# error etc. Is it bad that we don't wrap these in an expected single
|
185
185
|
# exception type? Should we?)
|
186
186
|
def get(id)
|
187
|
-
# split on first colon only.
|
187
|
+
# split on first colon only.
|
188
188
|
id =~ /^([^:]+)\:(.*)$/
|
189
189
|
db = $1 ; an = $2
|
190
|
-
|
190
|
+
|
191
191
|
raise ArgumentError.new("EbscoHostEngine#get requires an id with a colon, like `a9h:12345`. Instead, we got #{id}") unless db && an
|
192
|
-
|
192
|
+
|
193
193
|
# "AN" search_field is not listed in our search_field_definitions,
|
194
194
|
# but it is an internal EBSCOHost search index on 'accession number'
|
195
|
-
|
195
|
+
|
196
196
|
results = search(an, :search_field => "AN", :databases => [db])
|
197
|
-
|
197
|
+
|
198
198
|
raise (results.error[:exception] || Exception.new) if results.failed?
|
199
199
|
raise BentoSearch::NotFound.new("For id: #{id}") if results.length == 0
|
200
200
|
raise BentoSearch::TooManyFound.new("For id: #{id}") if results.length > 1
|
201
|
-
|
201
|
+
|
202
202
|
return results.first
|
203
203
|
end
|
204
|
-
|
205
|
-
# pass in nokogiri record xml for the records/rec node.
|
206
|
-
# Returns nil if NO fulltext is avail on ebsco platform,
|
204
|
+
|
205
|
+
# pass in nokogiri record xml for the records/rec node.
|
206
|
+
# Returns nil if NO fulltext is avail on ebsco platform,
|
207
207
|
# non-nil if fulltext is available. Non-nil value will
|
208
|
-
# actually be a non-empty ARRAY of internal EBSCO codes, P=PDF, T=HTML, C=HTML with images.
|
208
|
+
# actually be a non-empty ARRAY of internal EBSCO codes, P=PDF, T=HTML, C=HTML with images.
|
209
209
|
# http://support.epnet.com/knowledge_base/detail.php?topic=996&id=3778&page=1
|
210
210
|
def fulltext_formats(record_xml)
|
211
211
|
fulltext_formats = record_xml.xpath("./header/controlInfo/artinfo/formats/fmt/@type").collect {|n| n.text }
|
212
|
-
|
212
|
+
|
213
213
|
return nil if fulltext_formats.empty?
|
214
|
-
|
215
|
-
return fulltext_formats
|
214
|
+
|
215
|
+
return fulltext_formats
|
216
216
|
end
|
217
|
-
|
218
|
-
|
217
|
+
|
218
|
+
|
219
219
|
# Pass in a nokogiri node, return node.text, or nil if
|
220
220
|
# arg was nil or node.text was blank?
|
221
221
|
def text_if_present(node)
|
@@ -223,16 +223,16 @@ class BentoSearch::EbscoHostEngine
|
|
223
223
|
nil
|
224
224
|
else
|
225
225
|
node.text
|
226
|
-
end
|
226
|
+
end
|
227
227
|
end
|
228
|
-
|
229
|
-
# Figure out proper controlled format for an ebsco item.
|
228
|
+
|
229
|
+
# Figure out proper controlled format for an ebsco item.
|
230
230
|
# EBSCOHost (not sure about EDS) publication/document type
|
231
231
|
# are totally unusable non-normalized vocabulary for controlled
|
232
|
-
# types, we'll try to guess from other metadata features.
|
232
|
+
# types, we'll try to guess from other metadata features.
|
233
233
|
def sniff_format(xml_node)
|
234
234
|
return nil if xml_node.nil?
|
235
|
-
|
235
|
+
|
236
236
|
if xml_node.at_xpath("./dissinfo/*")
|
237
237
|
:dissertation
|
238
238
|
elsif xml_node.at_xpath("./jinfo/*") && xml_node.at_xpath("./artinfo/*")
|
@@ -246,35 +246,35 @@ class BentoSearch::EbscoHostEngine
|
|
246
246
|
# pathological case of book_item, if it has a bkinfo and an artinfo
|
247
247
|
# but the titles in both sections MATCH, it's just a book. If they're
|
248
248
|
# differnet, it's a book section, bah@
|
249
|
-
:book_item
|
249
|
+
:book_item
|
250
250
|
elsif xml_node.at_xpath("./bkinfo/*")
|
251
251
|
"Book"
|
252
252
|
elsif xml_node.at_xpath("./jinfo/*")
|
253
253
|
:serial
|
254
254
|
else
|
255
255
|
nil
|
256
|
-
end
|
256
|
+
end
|
257
257
|
end
|
258
|
-
|
258
|
+
|
259
259
|
# Figure out uncontrolled literal string format to show to users.
|
260
260
|
# We're going to try combining Ebsco Publication Type and Document Type,
|
261
|
-
# when both are present. Then a few hard-coded special transformations.
|
262
|
-
def sniff_format_str(xml_node)
|
261
|
+
# when both are present. Then a few hard-coded special transformations.
|
262
|
+
def sniff_format_str(xml_node)
|
263
263
|
pubtype = text_if_present( xml_node.at_xpath("./artinfo/pubtype") )
|
264
264
|
doctype = text_if_present( xml_node.at_xpath("./artinfo/doctype") )
|
265
|
-
|
265
|
+
|
266
266
|
components = []
|
267
267
|
components.push pubtype
|
268
268
|
components.push doctype unless doctype == pubtype
|
269
|
-
|
269
|
+
|
270
270
|
components.compact!
|
271
|
-
|
271
|
+
|
272
272
|
components = components.collect {|a| a.titlecase if a}
|
273
273
|
components.uniq! # no need to have the same thing twice
|
274
274
|
|
275
|
-
|
275
|
+
|
276
276
|
# some hard-coded cases for better user-displayable string, and other
|
277
|
-
# normalization.
|
277
|
+
# normalization.
|
278
278
|
if ["Academic Journal", "Journal"].include?(components.first) && ["Article", "Journal Article"].include?(components.last)
|
279
279
|
return "Journal Article"
|
280
280
|
elsif components.last == "Book: Monograph"
|
@@ -290,251 +290,252 @@ class BentoSearch::EbscoHostEngine
|
|
290
290
|
# first is strict substring, don't need it
|
291
291
|
return components.last
|
292
292
|
end
|
293
|
-
|
294
|
-
|
295
|
-
|
293
|
+
|
294
|
+
|
295
|
+
|
296
296
|
return components.join(": ")
|
297
297
|
end
|
298
|
-
|
298
|
+
|
299
299
|
# pass in <rec> nokogiri, will determine best link
|
300
300
|
def get_link(xml)
|
301
301
|
text_if_present(xml.at_xpath("./pdfLink")) || text_if_present(xml.at_xpath("./plink") )
|
302
302
|
end
|
303
|
-
|
304
|
-
|
305
|
-
# escape or replace special chars to ebsco
|
303
|
+
|
304
|
+
|
305
|
+
# escape or replace special chars to ebsco
|
306
306
|
def ebsco_query_escape(txt)
|
307
307
|
# it's unclear if ebsco API actually allows escaping of special chars,
|
308
308
|
# or what the special chars are. But we know parens are special, can't
|
309
309
|
# escape em, we'll just remove em (should not effect search).
|
310
|
-
|
310
|
+
|
311
311
|
# undocumented but question mark seems to cause a problem for ebsco,
|
312
|
-
# even inside quoted phrases, not sure why.
|
313
|
-
txt = txt.gsub(/[)(
|
314
|
-
|
312
|
+
# even inside quoted phrases, not sure why. Square brackets too.
|
313
|
+
txt = txt.gsub(/[)(\?\[\]]/, ' ')
|
314
|
+
|
315
315
|
# 'and' and 'or' need to be in phrase quotes to avoid being
|
316
316
|
# interpreted as boolean. For instance, when people just
|
317
317
|
# paste in a title: << A strategy for decreasing anxiety of ICU transfer patients and their families >>
|
318
318
|
# You'd think 'and' as boolean would still work there, but it resulted
|
319
319
|
# in zero hits unless quoted, I dunno. lowercase and uppercase and/or/not
|
320
|
-
# both cause observed weirdness.
|
320
|
+
# both cause observed weirdness.
|
321
321
|
if ['and', 'or', 'not'].include?( txt.downcase )
|
322
322
|
txt = %Q{"#{txt}"}
|
323
|
-
end
|
324
|
-
|
323
|
+
end
|
324
|
+
|
325
325
|
return txt
|
326
326
|
end
|
327
|
-
|
327
|
+
|
328
328
|
# Actually turn the user's query into an EBSCO "AND" boolean query,
|
329
329
|
# seems only way to get decent results where terms can match cross-fields
|
330
|
-
# at the moment, for EIT. We'll see for EDS.
|
330
|
+
# at the moment, for EIT. We'll see for EDS.
|
331
331
|
def ebsco_query_prepare(txt)
|
332
332
|
# use string split with regex cleverly to split into space
|
333
|
-
# seperated terms and phrases, keeping phrases as unit.
|
333
|
+
# seperated terms and phrases, keeping phrases as unit.
|
334
334
|
terms = txt.split %r{[[:space:]]+|("[^"]+")}
|
335
335
|
|
336
336
|
# Remove parens in non-phrase-quoted terms
|
337
|
-
terms = terms.collect do |t|
|
338
|
-
ebsco_query_escape(t)
|
337
|
+
terms = terms.collect do |t|
|
338
|
+
ebsco_query_escape(t)
|
339
339
|
end
|
340
|
-
|
340
|
+
|
341
341
|
|
342
342
|
# Remove empty strings. Remove terms that are solely punctuation
|
343
|
-
# without any letters.
|
343
|
+
# without any letters.
|
344
344
|
terms.delete_if do |term|
|
345
|
-
(
|
346
|
-
term.blank? ||
|
345
|
+
(
|
346
|
+
term.blank? ||
|
347
347
|
term =~ /\A[^[[:alnum:]]]+\Z/
|
348
348
|
)
|
349
349
|
end
|
350
|
-
|
351
|
-
terms.join(" AND ")
|
350
|
+
|
351
|
+
terms.join(" AND ")
|
352
352
|
end
|
353
|
-
|
353
|
+
|
354
354
|
def query_url(args)
|
355
|
-
|
356
|
-
url =
|
355
|
+
|
356
|
+
url =
|
357
357
|
"#{configuration.base_url}/Search?prof=#{configuration.profile_id}&pwd=#{configuration.profile_password}"
|
358
|
-
|
359
|
-
query = ebsco_query_prepare args[:query]
|
360
|
-
|
361
|
-
|
358
|
+
|
359
|
+
query = ebsco_query_prepare args[:query]
|
360
|
+
|
361
|
+
|
362
362
|
# wrap in (FI $query) if fielded search
|
363
363
|
if args[:search_field]
|
364
364
|
query = "(#{args[:search_field]} #{query})"
|
365
365
|
end
|
366
|
-
|
366
|
+
|
367
367
|
# peer-reviewed only?
|
368
368
|
if [true, "true"].include? args[:peer_reviewed_only]
|
369
369
|
query += " AND (RV Y)"
|
370
370
|
end
|
371
|
-
|
371
|
+
|
372
372
|
if args[:pubyear_start] || args[:pubyear_end]
|
373
|
-
from = args[:pubyear_start].to_i
|
373
|
+
from = args[:pubyear_start].to_i
|
374
374
|
from = nil if from == 0
|
375
|
-
|
376
|
-
to = args[:pubyear_end].to_i
|
375
|
+
|
376
|
+
to = args[:pubyear_end].to_i
|
377
377
|
to = nil if to == 0
|
378
|
-
|
378
|
+
|
379
379
|
query += " AND (DT #{from}-#{to})"
|
380
380
|
end
|
381
|
-
|
382
|
-
|
381
|
+
|
382
|
+
|
383
383
|
url += "&query=#{CGI.escape query}"
|
384
|
-
|
385
|
-
# startrec is 1-based for ebsco, not 0-based like for us.
|
384
|
+
|
385
|
+
# startrec is 1-based for ebsco, not 0-based like for us.
|
386
386
|
url += "&startrec=#{args[:start] + 1}" if args[:start]
|
387
387
|
url += "&numrec=#{args[:per_page]}" if args[:per_page]
|
388
|
-
|
389
|
-
# Make relevance our default sort, rather than EBSCO's date.
|
388
|
+
|
389
|
+
# Make relevance our default sort, rather than EBSCO's date.
|
390
390
|
args[:sort] ||= "relevance"
|
391
391
|
url += "&sort=#{ sort_definitions[args[:sort]][:implementation]}"
|
392
|
-
|
392
|
+
|
393
393
|
# Contrary to docs, don't pass these comma-seperated, pass em in seperate
|
394
|
-
# query params. args databases overrides config databases.
|
394
|
+
# query params. args databases overrides config databases.
|
395
395
|
(args[:databases] || configuration.databases).each do |db|
|
396
396
|
url += "&db=#{db}"
|
397
|
-
end
|
397
|
+
end
|
398
398
|
|
399
399
|
return url
|
400
400
|
end
|
401
|
-
|
401
|
+
|
402
402
|
# pass in a nokogiri representing an EBSCO <rec> result,
|
403
|
-
# we'll turn it into a BentoSearch::ResultItem.
|
404
|
-
def item_from_xml(xml_rec)
|
403
|
+
# we'll turn it into a BentoSearch::ResultItem.
|
404
|
+
def item_from_xml(xml_rec)
|
405
405
|
info = xml_rec.at_xpath("./header/controlInfo")
|
406
|
-
|
406
|
+
|
407
407
|
item = BentoSearch::ResultItem.new
|
408
|
-
|
408
|
+
|
409
409
|
# Get unique id. Think we need both the database code and accession
|
410
410
|
# number combined, accession numbers not neccesarily unique accross
|
411
|
-
# dbs. We'll combine with a colon.
|
411
|
+
# dbs. We'll combine with a colon.
|
412
412
|
db = text_if_present xml_rec.at_xpath("./header/@shortDbName")
|
413
|
-
accession = text_if_present xml_rec.at_xpath("./header/@uiTerm")
|
413
|
+
accession = text_if_present xml_rec.at_xpath("./header/@uiTerm")
|
414
414
|
item.unique_id = "#{db}:#{accession}" if db && accession
|
415
|
-
|
416
|
-
|
415
|
+
|
416
|
+
|
417
417
|
item.link = get_link(xml_rec)
|
418
418
|
|
419
|
-
|
420
|
-
|
419
|
+
# EBSCO is somewhat inconsistent with where it puts the ISSN
|
420
|
+
item.issn = text_if_present(info.at_xpath("./jinfo/issn")) || text_if_present(info.at_xpath("./jinfo/jid[@type='issn']"))
|
421
|
+
|
421
422
|
# Dealing with titles is a bit crazy, while articles usually have atitles and
|
422
423
|
# jtitles, sometimes they have a btitle instead. A book will usually have
|
423
|
-
# both btitle and atitle, but sometimes just atitle. Book chapter, oh boy.
|
424
|
-
|
424
|
+
# both btitle and atitle, but sometimes just atitle. Book chapter, oh boy.
|
425
|
+
|
425
426
|
jtitle = text_if_present(info.at_xpath("./jinfo/jtl"))
|
426
427
|
btitle = text_if_present info.at_xpath("./bkinfo/btl")
|
427
428
|
atitle = text_if_present info.at_xpath("./artinfo/tig/atl")
|
428
|
-
|
429
|
+
|
429
430
|
if jtitle && atitle
|
430
431
|
item.title = atitle
|
431
432
|
item.source_title = jtitle
|
432
433
|
elsif btitle && atitle && atitle != btitle
|
433
434
|
# for a book, sometimes there's an atitle block and a btitle block
|
434
|
-
# when they're identical, this ain't a book section, it's a book.
|
435
|
+
# when they're identical, this ain't a book section, it's a book.
|
435
436
|
item.title = atitle
|
436
437
|
item.source_title = btitle
|
437
438
|
else
|
438
439
|
item.title = atitle || btitle
|
439
|
-
end
|
440
|
+
end
|
440
441
|
# EBSCO sometimes has crazy long titles, truncate em.
|
441
442
|
if item.title.present?
|
442
443
|
item.title = text_helper.truncate(item.title, :length => 200, :separator => ' ', :omission => '…')
|
443
444
|
end
|
444
|
-
|
445
|
-
|
446
|
-
|
445
|
+
|
446
|
+
|
447
|
+
|
447
448
|
item.publisher = text_if_present info.at_xpath("./pubinfo/pub")
|
448
449
|
# if no publisher, but a dissertation institution, use that
|
449
|
-
# as publisher.
|
450
|
+
# as publisher.
|
450
451
|
unless item.publisher
|
451
452
|
item.publisher = text_if_present info.at_xpath("./dissinfo/dissinst")
|
452
453
|
end
|
453
|
-
|
454
|
-
|
454
|
+
|
455
|
+
|
455
456
|
# Might have multiple ISBN's in record, just take first for now
|
456
457
|
item.isbn = text_if_present info.at_xpath("./bkinfo/isbn")
|
457
|
-
|
458
|
+
|
458
459
|
item.year = text_if_present info.at_xpath("./pubinfo/dt/@year")
|
459
|
-
# fill in complete publication_date too only if we've got it.
|
460
|
+
# fill in complete publication_date too only if we've got it.
|
460
461
|
if (item.year &&
|
461
462
|
(month = text_if_present info.at_xpath("./pubinfo/dt/@month")) &&
|
462
|
-
(day = text_if_present info.at_xpath("./pubinfo/dt/@day"))
|
463
|
+
(day = text_if_present info.at_xpath("./pubinfo/dt/@day"))
|
463
464
|
)
|
464
465
|
if (item.year.to_i != 0 && month.to_i != 0 && day.to_i != 0)
|
465
|
-
item.publication_date = Date.new(item.year.to_i, month.to_i, day.to_i)
|
466
|
+
item.publication_date = Date.new(item.year.to_i, month.to_i, day.to_i)
|
466
467
|
end
|
467
468
|
end
|
468
|
-
|
469
|
+
|
469
470
|
item.volume = text_if_present info.at_xpath("./pubinfo/vid")
|
470
471
|
item.issue = text_if_present info.at_xpath("./pubinfo/iid")
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
472
|
+
|
473
|
+
|
474
|
+
|
475
|
+
|
475
476
|
item.start_page = text_if_present info.at_xpath("./artinfo/ppf")
|
476
|
-
|
477
|
+
|
477
478
|
item.doi = text_if_present info.at_xpath("./artinfo/ui[@type='doi']")
|
478
|
-
|
479
|
+
|
479
480
|
item.abstract = text_if_present info.at_xpath("./artinfo/ab")
|
480
481
|
# EBSCO abstracts have an annoying habit of beginning with "Abstract:"
|
481
482
|
if item.abstract
|
482
483
|
item.abstract.gsub!(/^Abstract\: /, "")
|
483
484
|
end
|
484
|
-
|
485
|
-
# authors, only get full display name from EBSCO.
|
485
|
+
|
486
|
+
# authors, only get full display name from EBSCO.
|
486
487
|
info.xpath("./artinfo/aug/au").each do |author|
|
487
488
|
a = BentoSearch::Author.new(:display => author.text)
|
488
489
|
item.authors << a
|
489
490
|
end
|
490
|
-
|
491
|
+
|
491
492
|
item.format = sniff_format info
|
492
493
|
item.format_str = sniff_format_str info
|
493
|
-
|
494
|
+
|
494
495
|
# Totally unreliable, seems to report english for everything? Maybe
|
495
|
-
# because abstracts are in english? Nevertheless we include for now.
|
496
|
+
# because abstracts are in english? Nevertheless we include for now.
|
496
497
|
item.language_code = text_if_present info.at_xpath("./language/@code")
|
497
498
|
# why does EBSCO return 'undetermined' sometimes? That might as well be
|
498
|
-
# not there, bah.
|
499
|
+
# not there, bah.
|
499
500
|
item.language_code = nil if item.language_code == "und"
|
500
|
-
|
501
|
-
# array of custom ebsco codes (or nil) for fulltext formats avail.
|
501
|
+
|
502
|
+
# array of custom ebsco codes (or nil) for fulltext formats avail.
|
502
503
|
item.custom_data["fulltext_formats"] = fulltext_formats xml_rec
|
503
504
|
# if any fulltext format, mark present
|
504
|
-
item.link_is_fulltext = item.custom_data["fulltext_formats"].present?
|
505
|
-
|
505
|
+
item.link_is_fulltext = item.custom_data["fulltext_formats"].present?
|
506
|
+
|
506
507
|
return item
|
507
508
|
end
|
508
|
-
|
509
|
+
|
509
510
|
# This method is not used for normal searching, but can be used by
|
510
|
-
# other code to retrieve the results of the EBSCO API Info command,
|
511
|
+
# other code to retrieve the results of the EBSCO API Info command,
|
511
512
|
# using connection details configured in this engine. The Info command
|
512
513
|
# can tell you what databases your account is authorized to see.
|
513
514
|
# Returns the complete Nokogiri response, but WITH NAMESPACES REMOVED
|
514
515
|
def get_info
|
515
|
-
url =
|
516
|
-
"#{configuration.base_url}/Info?prof=#{configuration.profile_id}&pwd=#{configuration.profile_password}"
|
517
|
-
|
516
|
+
url =
|
517
|
+
"#{configuration.base_url}/Info?prof=#{configuration.profile_id}&pwd=#{configuration.profile_password}"
|
518
|
+
|
518
519
|
noko = Nokogiri::XML( http_client.get( url ).body )
|
519
|
-
|
520
|
+
|
520
521
|
noko.remove_namespaces!
|
521
|
-
|
522
|
+
|
522
523
|
return noko
|
523
524
|
end
|
524
|
-
|
525
|
+
|
525
526
|
def public_settable_search_args
|
526
527
|
super + [:peer_reviewed_only, :pubyear_start, :pubyear_end]
|
527
528
|
end
|
528
|
-
|
529
|
+
|
529
530
|
# David Walker says pretty much only relevance and date are realiable
|
530
|
-
# in EBSCOhost cross-search.
|
531
|
+
# in EBSCOhost cross-search.
|
531
532
|
def sort_definitions
|
532
|
-
{
|
533
|
+
{
|
533
534
|
"relevance" => {:implementation => "relevance"},
|
534
535
|
"date_desc" => {:implementation => "date"}
|
535
|
-
}
|
536
|
+
}
|
536
537
|
end
|
537
|
-
|
538
|
+
|
538
539
|
def search_field_definitions
|
539
540
|
{
|
540
541
|
nil => {:semantic => :general},
|
@@ -545,17 +546,17 @@ class BentoSearch::EbscoHostEngine
|
|
545
546
|
"IB" => {:semantic => :isbn}
|
546
547
|
}
|
547
548
|
end
|
548
|
-
|
549
|
+
|
549
550
|
def max_per_page
|
550
551
|
# Actually only '50' if you ask for 'full' records, but I don't think
|
551
|
-
# we need to do that ever, that's actually getting fulltext back!
|
552
|
+
# we need to do that ever, that's actually getting fulltext back!
|
552
553
|
200
|
553
554
|
end
|
554
|
-
|
555
|
+
|
555
556
|
def self.required_configuration
|
556
557
|
["profile_id", "profile_password"]
|
557
558
|
end
|
558
|
-
|
559
|
+
|
559
560
|
def self.default_configuration
|
560
561
|
{
|
561
562
|
# /Search
|
@@ -563,5 +564,5 @@ class BentoSearch::EbscoHostEngine
|
|
563
564
|
:databases => []
|
564
565
|
}
|
565
566
|
end
|
566
|
-
|
567
|
+
|
567
568
|
end
|