bento_search 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/README.md +92 -90
- data/app/item_decorators/bento_search/decorator_base.rb +9 -6
- data/app/item_decorators/bento_search/standard_decorator.rb +24 -0
- data/app/search_engines/bento_search/ebsco_host_engine.rb +180 -179
- data/app/search_engines/bento_search/journal_tocs_for_journal.rb +179 -0
- data/app/views/bento_search/_std_item.html.erb +4 -4
- data/lib/bento_search/version.rb +1 -1
- data/test/decorator/decorator_base_test.rb +11 -1
- data/test/decorator/standard_decorator_test.rb +21 -0
- data/test/dummy/log/development.log +2 -0
- data/test/dummy/log/test.log +22324 -0
- data/test/{unit → search_engines}/ebsco_host_engine_test.rb +148 -130
- data/test/{unit → search_engines}/eds_engine_test.rb +0 -0
- data/test/{unit → search_engines}/google_books_engine_test.rb +0 -0
- data/test/{unit → search_engines}/google_site_search_test.rb +0 -0
- data/test/search_engines/journal_tocs_for_journal_test.rb +93 -0
- data/test/{unit → search_engines}/primo_engine_test.rb +0 -0
- data/test/{unit → search_engines}/scopus_engine_test.rb +0 -0
- data/test/{unit → search_engines}/search_engine_base_test.rb +0 -0
- data/test/{unit → search_engines}/search_engine_test.rb +0 -0
- data/test/{unit → search_engines}/summon_engine_test.rb +0 -0
- data/test/{unit → search_engines}/worldcat_sru_dc_engine_test.rb +0 -0
- data/test/{unit → search_engines}/xerxes_engine_test.rb +0 -0
- data/test/vcr_cassettes/ebscohost/RILM_record_with_ISSN_in__jid__element.yml +210 -0
- data/test/vcr_cassettes/journal_tocs/empty_results_on_bad_ISSN.yml +49 -0
- data/test/vcr_cassettes/journal_tocs/error_on_bad_registered_email.yml +41 -0
- data/test/vcr_cassettes/journal_tocs/error_on_error_response.yml +51 -0
- data/test/vcr_cassettes/journal_tocs/fetch_xml_with_hits.yml +328 -0
- data/test/vcr_cassettes/journal_tocs/fills_out_metadata.yml +396 -0
- data/test/vcr_cassettes/journal_tocs/smoke_test.yml +328 -0
- metadata +62 -61
@@ -50,18 +50,21 @@ module BentoSearch
|
|
50
50
|
end
|
51
51
|
|
52
52
|
# Applies decorator to item and returns decorated item.
|
53
|
-
#
|
54
|
-
#
|
55
|
-
#
|
53
|
+
# Will decide what decorator to apply based on String class name
|
54
|
+
# in item.decorator, or else apply StandardDecorator. The point of
|
55
|
+
# this method is just that logic, nothing else special.
|
56
56
|
#
|
57
57
|
# Need to pass a Rails ActionView::Context in, to use to
|
58
58
|
# initialize decorator. In Rails, in most places you can
|
59
59
|
# get one of those from #view_context. In helpers/views
|
60
60
|
# you can also use `self`.
|
61
61
|
def self.decorate(item, view_context)
|
62
|
-
# What decorator class?
|
63
|
-
#
|
64
|
-
|
62
|
+
# What decorator class? Specified in #decorator as a String,
|
63
|
+
# we intentionally do not allow an actual class constant, to
|
64
|
+
# maintain problem-free serialization of ItemResults and configuration.
|
65
|
+
decorator_class = item.decorator.try do |arg|
|
66
|
+
BentoSearch::Util.constantize(arg.to_s)
|
67
|
+
end || BentoSearch::StandardDecorator
|
65
68
|
|
66
69
|
return decorator_class.new(item, view_context)
|
67
70
|
end
|
@@ -149,6 +149,30 @@ module BentoSearch
|
|
149
149
|
return value.blank? ? nil : value
|
150
150
|
end
|
151
151
|
|
152
|
+
# outputs a date for display, from #publication_date or #year.
|
153
|
+
# Uses it's own logic to decide whether to output entire date or just
|
154
|
+
# year, if it has a complete date. (If volume and issue are present,
|
155
|
+
# just year).
|
156
|
+
#
|
157
|
+
# Over-ride in a decorator if you want to always or never or different
|
158
|
+
# logic for complete date. Or if you want to change the format of the date,
|
159
|
+
# etc.
|
160
|
+
def display_date
|
161
|
+
if self.publication_date
|
162
|
+
if self.volume && self.issue
|
163
|
+
# just the year, ma'am
|
164
|
+
I18n.localize(self.publication_date, :format => "%Y")
|
165
|
+
else
|
166
|
+
# whole date, since we got it
|
167
|
+
I18n.localize(self.publication_date, :format => "%d %b %Y")
|
168
|
+
end
|
169
|
+
elsif self.year
|
170
|
+
self.year.to_s
|
171
|
+
else
|
172
|
+
nil
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
152
176
|
# A unique opaque identifier for a record may sometimes be
|
153
177
|
# required, for instance in Atom.
|
154
178
|
#
|
@@ -5,7 +5,7 @@ require 'nokogiri'
|
|
5
5
|
require 'http_client_patch/include_client'
|
6
6
|
require 'httpclient'
|
7
7
|
|
8
|
-
# Right now for EbscoHost API (Ebsco Integration Toolkit/EIT),
|
8
|
+
# Right now for EbscoHost API (Ebsco Integration Toolkit/EIT),
|
9
9
|
# may be expanded or refactored for EDS too.
|
10
10
|
#
|
11
11
|
# == Required Configuration
|
@@ -22,28 +22,28 @@ require 'httpclient'
|
|
22
22
|
#
|
23
23
|
# [:peer_reviewed_only] Set to boolean true or string 'true', to restrict
|
24
24
|
# results to peer-reviewed only. (Or ask EBSCOHost
|
25
|
-
# api to do so, what we get is what we get).
|
25
|
+
# api to do so, what we get is what we get).
|
26
26
|
# [:pubyear_start]
|
27
27
|
# [:pubyear_end] Date range limiting, pass in custom search args,
|
28
28
|
# one or both of pubyear_start and pubyear_end
|
29
|
-
# #to_i will be called on it, so can be string.
|
29
|
+
# #to_i will be called on it, so can be string.
|
30
30
|
# .search(:query => "foo", :pubyear_start => 2000)
|
31
31
|
# [:databases] List of licensed EBSCO dbs to search, can override
|
32
|
-
# list set in config databases, just for this search.
|
33
|
-
#
|
32
|
+
# list set in config databases, just for this search.
|
33
|
+
#
|
34
34
|
# == Custom response data
|
35
|
-
#
|
36
|
-
# Iff EBSCO API reports that fulltext is available for the hit, then
|
35
|
+
#
|
36
|
+
# Iff EBSCO API reports that fulltext is available for the hit, then
|
37
37
|
# result.custom_data["fulltext_formats"] will be non-nil, and will be an array of
|
38
38
|
# one or more of EBSCO's internal codes (P=PDF, T=HTML, C=HTML+Images). If
|
39
39
|
# no fulltext is avail according to EBSCO API, result.custom_data["fulltext_formats"]
|
40
|
-
# will be nil.
|
40
|
+
# will be nil.
|
41
41
|
#
|
42
42
|
# #link_is_fulltext also set to true/false
|
43
43
|
#
|
44
44
|
# You can use this to, for instance, hyperlink the displayed title directly
|
45
45
|
# to record on EBSCO if and only if there's fulltext. By writing a custom
|
46
|
-
# decorator. See wiki on decorators.
|
46
|
+
# decorator. See wiki on decorators.
|
47
47
|
#
|
48
48
|
# == Limitations
|
49
49
|
# We do set language of ResultItems based on what ebsco tells us, but ebsoc
|
@@ -56,12 +56,12 @@ require 'httpclient'
|
|
56
56
|
# EBSCO that you want included in the search. You can't just say "all of them"
|
57
57
|
# the api doesn't support that, and also more than 30 or 40 starts getting
|
58
58
|
# horribly slow. If you include a db you do not have access to, EBSCO api
|
59
|
-
# fatal errors.
|
59
|
+
# fatal errors.
|
60
60
|
#
|
61
61
|
# You may want to make sure all your licensed databases are included
|
62
62
|
# in your EIT profile. Log onto ebscoadmin, Customize Services, choose
|
63
|
-
# EIT profile, choose 'databases' tag.
|
64
|
-
#
|
63
|
+
# EIT profile, choose 'databases' tag.
|
64
|
+
#
|
65
65
|
# === Download databases from EBSCO api
|
66
66
|
#
|
67
67
|
# We include a utility to download ALL activated databases for EIT profile
|
@@ -69,12 +69,12 @@ require 'httpclient'
|
|
69
69
|
# file as a starting point, and edit by hand:
|
70
70
|
#
|
71
71
|
# First configure your EBSCO search engine with bento_search, say under
|
72
|
-
# key 'ebscohost'.
|
72
|
+
# key 'ebscohost'.
|
73
73
|
#
|
74
74
|
# Then run:
|
75
75
|
# rails generate bento_search:pull_ebsco_dbs ebscohost
|
76
76
|
#
|
77
|
-
# assuming 'ebscohost' is the key you registered the EBSCO search engine.
|
77
|
+
# assuming 'ebscohost' is the key you registered the EBSCO search engine.
|
78
78
|
#
|
79
79
|
# This will create a file at ./config/ebsco_dbs.rb. You may want to hand
|
80
80
|
# edit it. Then, in your bento search config, you can:
|
@@ -85,7 +85,7 @@ require 'httpclient'
|
|
85
85
|
# conf.databases = $ebsco_dbs
|
86
86
|
# end
|
87
87
|
#
|
88
|
-
# == Vendor documentation
|
88
|
+
# == Vendor documentation
|
89
89
|
#
|
90
90
|
# Vendor documentation is a bit scattered, main page:
|
91
91
|
# * http://support.ebsco.com/eit/ws.php
|
@@ -93,10 +93,10 @@ require 'httpclient'
|
|
93
93
|
# * http://support.ebsco.com/eit/ws_faq.php
|
94
94
|
# * search syntax examples: http://support.ebsco.com/eit/ws_howto_queries.php
|
95
95
|
# * Try construct a query: http://eit.ebscohost.com/Pages/MethodDescription.aspx?service=/Services/SearchService.asmx&method=Search
|
96
|
-
# * The 'info' service can be used to see what databases you have access to.
|
96
|
+
# * The 'info' service can be used to see what databases you have access to.
|
97
97
|
# * DTD of XML Response, hard to interpret but all we've got: http://support.ebsco.com/eit/docs/DTD_EIT_WS_searchResponse.zip
|
98
98
|
#
|
99
|
-
# Hard to find docs page on embedding EBSCO limiters (like peer reviewed only "RV Y") in search query:
|
99
|
+
# Hard to find docs page on embedding EBSCO limiters (like peer reviewed only "RV Y") in search query:
|
100
100
|
# http://support.epnet.com/knowledge_base/detail.php?id=5397
|
101
101
|
#
|
102
102
|
# EBSCO searchable support portal has a section for the EIT api we use here:
|
@@ -104,20 +104,20 @@ require 'httpclient'
|
|
104
104
|
|
105
105
|
class BentoSearch::EbscoHostEngine
|
106
106
|
include BentoSearch::SearchEngine
|
107
|
-
|
107
|
+
|
108
108
|
# Can't change http timeout in config, because we keep an http
|
109
|
-
# client at class-wide level, and config is not class-wide.
|
109
|
+
# client at class-wide level, and config is not class-wide.
|
110
110
|
# Change this 'constant' if you want to change it, I guess.
|
111
111
|
#
|
112
112
|
# In some tests we did, 5.2s was 95th percentile slowest, but in
|
113
113
|
# actual percentage 5.2s is still timing out way too many requests,
|
114
|
-
# let's try 6.3, why not.
|
115
|
-
HttpTimeout = 6.3
|
116
|
-
extend HTTPClientPatch::IncludeClient
|
114
|
+
# let's try 6.3, why not.
|
115
|
+
HttpTimeout = 6.3
|
116
|
+
extend HTTPClientPatch::IncludeClient
|
117
117
|
include_http_client do |client|
|
118
118
|
client.connect_timeout = client.send_timeout = client.receive_timeout = HttpTimeout
|
119
119
|
end
|
120
|
-
|
120
|
+
|
121
121
|
# Include some rails helpers, text_helper.trucate
|
122
122
|
def text_helper
|
123
123
|
@@truncate ||= begin
|
@@ -126,10 +126,10 @@ class BentoSearch::EbscoHostEngine
|
|
126
126
|
o
|
127
127
|
end
|
128
128
|
end
|
129
|
-
|
129
|
+
|
130
130
|
def search_implementation(args)
|
131
131
|
url = query_url(args)
|
132
|
-
|
132
|
+
|
133
133
|
results = BentoSearch::Results.new
|
134
134
|
xml, response, exception = nil, nil, nil
|
135
135
|
|
@@ -137,85 +137,85 @@ class BentoSearch::EbscoHostEngine
|
|
137
137
|
response = http_client.get(url)
|
138
138
|
xml = Nokogiri::XML(response.body)
|
139
139
|
rescue TimeoutError, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError => e
|
140
|
-
exception = e
|
140
|
+
exception = e
|
141
141
|
end
|
142
142
|
# error handle
|
143
|
-
if ( response.nil? ||
|
144
|
-
xml.nil? ||
|
145
|
-
exception ||
|
143
|
+
if ( response.nil? ||
|
144
|
+
xml.nil? ||
|
145
|
+
exception ||
|
146
146
|
(! HTTP::Status.successful? response.status) ||
|
147
147
|
(fault = xml.at_xpath("./Fault")))
|
148
|
-
|
148
|
+
|
149
149
|
results.error ||= {}
|
150
150
|
results.error[:api_url] = url
|
151
151
|
results.error[:exception] = exception if exception
|
152
152
|
results.error[:status] = response.status if response
|
153
|
-
|
153
|
+
|
154
154
|
if fault
|
155
155
|
results.error[:error_info] = text_if_present fault.at_xpath("./Message")
|
156
156
|
end
|
157
|
-
|
157
|
+
|
158
158
|
return results
|
159
159
|
end
|
160
|
-
|
161
|
-
|
162
|
-
|
160
|
+
|
161
|
+
|
162
|
+
|
163
163
|
# the namespaces they provide are weird and don't help and sometimes
|
164
164
|
# not clearly even legal. Remove em!
|
165
165
|
xml.remove_namespaces!
|
166
|
-
|
166
|
+
|
167
167
|
results.total_items = xml.at_xpath("./searchResponse/Hits").text.to_i
|
168
|
-
|
168
|
+
|
169
169
|
xml.xpath("./searchResponse/SearchResults/records/rec").each do |xml_rec|
|
170
170
|
results << item_from_xml( xml_rec )
|
171
171
|
end
|
172
|
-
|
172
|
+
|
173
173
|
return results
|
174
|
-
|
174
|
+
|
175
175
|
end
|
176
|
-
|
176
|
+
|
177
177
|
# Method to get a single record by "identifier" string, which is really
|
178
178
|
# a combined "db:id" string, same string that would be returned by
|
179
179
|
# an individual item.identifier
|
180
180
|
#
|
181
181
|
# Returns an individual BentoSearch::Result, or raises an exception.
|
182
|
-
# Can raise BentoSearch::NotFound, BentoSearch::TooManyFound, or
|
182
|
+
# Can raise BentoSearch::NotFound, BentoSearch::TooManyFound, or
|
183
183
|
# any other weird random exception caused by problems fetching (network
|
184
184
|
# error etc. Is it bad that we don't wrap these in an expected single
|
185
185
|
# exception type? Should we?)
|
186
186
|
def get(id)
|
187
|
-
# split on first colon only.
|
187
|
+
# split on first colon only.
|
188
188
|
id =~ /^([^:]+)\:(.*)$/
|
189
189
|
db = $1 ; an = $2
|
190
|
-
|
190
|
+
|
191
191
|
raise ArgumentError.new("EbscoHostEngine#get requires an id with a colon, like `a9h:12345`. Instead, we got #{id}") unless db && an
|
192
|
-
|
192
|
+
|
193
193
|
# "AN" search_field is not listed in our search_field_definitions,
|
194
194
|
# but it is an internal EBSCOHost search index on 'accession number'
|
195
|
-
|
195
|
+
|
196
196
|
results = search(an, :search_field => "AN", :databases => [db])
|
197
|
-
|
197
|
+
|
198
198
|
raise (results.error[:exception] || Exception.new) if results.failed?
|
199
199
|
raise BentoSearch::NotFound.new("For id: #{id}") if results.length == 0
|
200
200
|
raise BentoSearch::TooManyFound.new("For id: #{id}") if results.length > 1
|
201
|
-
|
201
|
+
|
202
202
|
return results.first
|
203
203
|
end
|
204
|
-
|
205
|
-
# pass in nokogiri record xml for the records/rec node.
|
206
|
-
# Returns nil if NO fulltext is avail on ebsco platform,
|
204
|
+
|
205
|
+
# pass in nokogiri record xml for the records/rec node.
|
206
|
+
# Returns nil if NO fulltext is avail on ebsco platform,
|
207
207
|
# non-nil if fulltext is available. Non-nil value will
|
208
|
-
# actually be a non-empty ARRAY of internal EBSCO codes, P=PDF, T=HTML, C=HTML with images.
|
208
|
+
# actually be a non-empty ARRAY of internal EBSCO codes, P=PDF, T=HTML, C=HTML with images.
|
209
209
|
# http://support.epnet.com/knowledge_base/detail.php?topic=996&id=3778&page=1
|
210
210
|
def fulltext_formats(record_xml)
|
211
211
|
fulltext_formats = record_xml.xpath("./header/controlInfo/artinfo/formats/fmt/@type").collect {|n| n.text }
|
212
|
-
|
212
|
+
|
213
213
|
return nil if fulltext_formats.empty?
|
214
|
-
|
215
|
-
return fulltext_formats
|
214
|
+
|
215
|
+
return fulltext_formats
|
216
216
|
end
|
217
|
-
|
218
|
-
|
217
|
+
|
218
|
+
|
219
219
|
# Pass in a nokogiri node, return node.text, or nil if
|
220
220
|
# arg was nil or node.text was blank?
|
221
221
|
def text_if_present(node)
|
@@ -223,16 +223,16 @@ class BentoSearch::EbscoHostEngine
|
|
223
223
|
nil
|
224
224
|
else
|
225
225
|
node.text
|
226
|
-
end
|
226
|
+
end
|
227
227
|
end
|
228
|
-
|
229
|
-
# Figure out proper controlled format for an ebsco item.
|
228
|
+
|
229
|
+
# Figure out proper controlled format for an ebsco item.
|
230
230
|
# EBSCOHost (not sure about EDS) publication/document type
|
231
231
|
# are totally unusable non-normalized vocabulary for controlled
|
232
|
-
# types, we'll try to guess from other metadata features.
|
232
|
+
# types, we'll try to guess from other metadata features.
|
233
233
|
def sniff_format(xml_node)
|
234
234
|
return nil if xml_node.nil?
|
235
|
-
|
235
|
+
|
236
236
|
if xml_node.at_xpath("./dissinfo/*")
|
237
237
|
:dissertation
|
238
238
|
elsif xml_node.at_xpath("./jinfo/*") && xml_node.at_xpath("./artinfo/*")
|
@@ -246,35 +246,35 @@ class BentoSearch::EbscoHostEngine
|
|
246
246
|
# pathological case of book_item, if it has a bkinfo and an artinfo
|
247
247
|
# but the titles in both sections MATCH, it's just a book. If they're
|
248
248
|
# differnet, it's a book section, bah@
|
249
|
-
:book_item
|
249
|
+
:book_item
|
250
250
|
elsif xml_node.at_xpath("./bkinfo/*")
|
251
251
|
"Book"
|
252
252
|
elsif xml_node.at_xpath("./jinfo/*")
|
253
253
|
:serial
|
254
254
|
else
|
255
255
|
nil
|
256
|
-
end
|
256
|
+
end
|
257
257
|
end
|
258
|
-
|
258
|
+
|
259
259
|
# Figure out uncontrolled literal string format to show to users.
|
260
260
|
# We're going to try combining Ebsco Publication Type and Document Type,
|
261
|
-
# when both are present. Then a few hard-coded special transformations.
|
262
|
-
def sniff_format_str(xml_node)
|
261
|
+
# when both are present. Then a few hard-coded special transformations.
|
262
|
+
def sniff_format_str(xml_node)
|
263
263
|
pubtype = text_if_present( xml_node.at_xpath("./artinfo/pubtype") )
|
264
264
|
doctype = text_if_present( xml_node.at_xpath("./artinfo/doctype") )
|
265
|
-
|
265
|
+
|
266
266
|
components = []
|
267
267
|
components.push pubtype
|
268
268
|
components.push doctype unless doctype == pubtype
|
269
|
-
|
269
|
+
|
270
270
|
components.compact!
|
271
|
-
|
271
|
+
|
272
272
|
components = components.collect {|a| a.titlecase if a}
|
273
273
|
components.uniq! # no need to have the same thing twice
|
274
274
|
|
275
|
-
|
275
|
+
|
276
276
|
# some hard-coded cases for better user-displayable string, and other
|
277
|
-
# normalization.
|
277
|
+
# normalization.
|
278
278
|
if ["Academic Journal", "Journal"].include?(components.first) && ["Article", "Journal Article"].include?(components.last)
|
279
279
|
return "Journal Article"
|
280
280
|
elsif components.last == "Book: Monograph"
|
@@ -290,251 +290,252 @@ class BentoSearch::EbscoHostEngine
|
|
290
290
|
# first is strict substring, don't need it
|
291
291
|
return components.last
|
292
292
|
end
|
293
|
-
|
294
|
-
|
295
|
-
|
293
|
+
|
294
|
+
|
295
|
+
|
296
296
|
return components.join(": ")
|
297
297
|
end
|
298
|
-
|
298
|
+
|
299
299
|
# pass in <rec> nokogiri, will determine best link
|
300
300
|
def get_link(xml)
|
301
301
|
text_if_present(xml.at_xpath("./pdfLink")) || text_if_present(xml.at_xpath("./plink") )
|
302
302
|
end
|
303
|
-
|
304
|
-
|
305
|
-
# escape or replace special chars to ebsco
|
303
|
+
|
304
|
+
|
305
|
+
# escape or replace special chars to ebsco
|
306
306
|
def ebsco_query_escape(txt)
|
307
307
|
# it's unclear if ebsco API actually allows escaping of special chars,
|
308
308
|
# or what the special chars are. But we know parens are special, can't
|
309
309
|
# escape em, we'll just remove em (should not effect search).
|
310
|
-
|
310
|
+
|
311
311
|
# undocumented but question mark seems to cause a problem for ebsco,
|
312
|
-
# even inside quoted phrases, not sure why.
|
313
|
-
txt = txt.gsub(/[)(
|
314
|
-
|
312
|
+
# even inside quoted phrases, not sure why. Square brackets too.
|
313
|
+
txt = txt.gsub(/[)(\?\[\]]/, ' ')
|
314
|
+
|
315
315
|
# 'and' and 'or' need to be in phrase quotes to avoid being
|
316
316
|
# interpreted as boolean. For instance, when people just
|
317
317
|
# paste in a title: << A strategy for decreasing anxiety of ICU transfer patients and their families >>
|
318
318
|
# You'd think 'and' as boolean would still work there, but it resulted
|
319
319
|
# in zero hits unless quoted, I dunno. lowercase and uppercase and/or/not
|
320
|
-
# both cause observed weirdness.
|
320
|
+
# both cause observed weirdness.
|
321
321
|
if ['and', 'or', 'not'].include?( txt.downcase )
|
322
322
|
txt = %Q{"#{txt}"}
|
323
|
-
end
|
324
|
-
|
323
|
+
end
|
324
|
+
|
325
325
|
return txt
|
326
326
|
end
|
327
|
-
|
327
|
+
|
328
328
|
# Actually turn the user's query into an EBSCO "AND" boolean query,
|
329
329
|
# seems only way to get decent results where terms can match cross-fields
|
330
|
-
# at the moment, for EIT. We'll see for EDS.
|
330
|
+
# at the moment, for EIT. We'll see for EDS.
|
331
331
|
def ebsco_query_prepare(txt)
|
332
332
|
# use string split with regex cleverly to split into space
|
333
|
-
# seperated terms and phrases, keeping phrases as unit.
|
333
|
+
# seperated terms and phrases, keeping phrases as unit.
|
334
334
|
terms = txt.split %r{[[:space:]]+|("[^"]+")}
|
335
335
|
|
336
336
|
# Remove parens in non-phrase-quoted terms
|
337
|
-
terms = terms.collect do |t|
|
338
|
-
ebsco_query_escape(t)
|
337
|
+
terms = terms.collect do |t|
|
338
|
+
ebsco_query_escape(t)
|
339
339
|
end
|
340
|
-
|
340
|
+
|
341
341
|
|
342
342
|
# Remove empty strings. Remove terms that are solely punctuation
|
343
|
-
# without any letters.
|
343
|
+
# without any letters.
|
344
344
|
terms.delete_if do |term|
|
345
|
-
(
|
346
|
-
term.blank? ||
|
345
|
+
(
|
346
|
+
term.blank? ||
|
347
347
|
term =~ /\A[^[[:alnum:]]]+\Z/
|
348
348
|
)
|
349
349
|
end
|
350
|
-
|
351
|
-
terms.join(" AND ")
|
350
|
+
|
351
|
+
terms.join(" AND ")
|
352
352
|
end
|
353
|
-
|
353
|
+
|
354
354
|
def query_url(args)
|
355
|
-
|
356
|
-
url =
|
355
|
+
|
356
|
+
url =
|
357
357
|
"#{configuration.base_url}/Search?prof=#{configuration.profile_id}&pwd=#{configuration.profile_password}"
|
358
|
-
|
359
|
-
query = ebsco_query_prepare args[:query]
|
360
|
-
|
361
|
-
|
358
|
+
|
359
|
+
query = ebsco_query_prepare args[:query]
|
360
|
+
|
361
|
+
|
362
362
|
# wrap in (FI $query) if fielded search
|
363
363
|
if args[:search_field]
|
364
364
|
query = "(#{args[:search_field]} #{query})"
|
365
365
|
end
|
366
|
-
|
366
|
+
|
367
367
|
# peer-reviewed only?
|
368
368
|
if [true, "true"].include? args[:peer_reviewed_only]
|
369
369
|
query += " AND (RV Y)"
|
370
370
|
end
|
371
|
-
|
371
|
+
|
372
372
|
if args[:pubyear_start] || args[:pubyear_end]
|
373
|
-
from = args[:pubyear_start].to_i
|
373
|
+
from = args[:pubyear_start].to_i
|
374
374
|
from = nil if from == 0
|
375
|
-
|
376
|
-
to = args[:pubyear_end].to_i
|
375
|
+
|
376
|
+
to = args[:pubyear_end].to_i
|
377
377
|
to = nil if to == 0
|
378
|
-
|
378
|
+
|
379
379
|
query += " AND (DT #{from}-#{to})"
|
380
380
|
end
|
381
|
-
|
382
|
-
|
381
|
+
|
382
|
+
|
383
383
|
url += "&query=#{CGI.escape query}"
|
384
|
-
|
385
|
-
# startrec is 1-based for ebsco, not 0-based like for us.
|
384
|
+
|
385
|
+
# startrec is 1-based for ebsco, not 0-based like for us.
|
386
386
|
url += "&startrec=#{args[:start] + 1}" if args[:start]
|
387
387
|
url += "&numrec=#{args[:per_page]}" if args[:per_page]
|
388
|
-
|
389
|
-
# Make relevance our default sort, rather than EBSCO's date.
|
388
|
+
|
389
|
+
# Make relevance our default sort, rather than EBSCO's date.
|
390
390
|
args[:sort] ||= "relevance"
|
391
391
|
url += "&sort=#{ sort_definitions[args[:sort]][:implementation]}"
|
392
|
-
|
392
|
+
|
393
393
|
# Contrary to docs, don't pass these comma-seperated, pass em in seperate
|
394
|
-
# query params. args databases overrides config databases.
|
394
|
+
# query params. args databases overrides config databases.
|
395
395
|
(args[:databases] || configuration.databases).each do |db|
|
396
396
|
url += "&db=#{db}"
|
397
|
-
end
|
397
|
+
end
|
398
398
|
|
399
399
|
return url
|
400
400
|
end
|
401
|
-
|
401
|
+
|
402
402
|
# pass in a nokogiri representing an EBSCO <rec> result,
|
403
|
-
# we'll turn it into a BentoSearch::ResultItem.
|
404
|
-
def item_from_xml(xml_rec)
|
403
|
+
# we'll turn it into a BentoSearch::ResultItem.
|
404
|
+
def item_from_xml(xml_rec)
|
405
405
|
info = xml_rec.at_xpath("./header/controlInfo")
|
406
|
-
|
406
|
+
|
407
407
|
item = BentoSearch::ResultItem.new
|
408
|
-
|
408
|
+
|
409
409
|
# Get unique id. Think we need both the database code and accession
|
410
410
|
# number combined, accession numbers not neccesarily unique accross
|
411
|
-
# dbs. We'll combine with a colon.
|
411
|
+
# dbs. We'll combine with a colon.
|
412
412
|
db = text_if_present xml_rec.at_xpath("./header/@shortDbName")
|
413
|
-
accession = text_if_present xml_rec.at_xpath("./header/@uiTerm")
|
413
|
+
accession = text_if_present xml_rec.at_xpath("./header/@uiTerm")
|
414
414
|
item.unique_id = "#{db}:#{accession}" if db && accession
|
415
|
-
|
416
|
-
|
415
|
+
|
416
|
+
|
417
417
|
item.link = get_link(xml_rec)
|
418
418
|
|
419
|
-
|
420
|
-
|
419
|
+
# EBSCO is somewhat inconsistent with where it puts the ISSN
|
420
|
+
item.issn = text_if_present(info.at_xpath("./jinfo/issn")) || text_if_present(info.at_xpath("./jinfo/jid[@type='issn']"))
|
421
|
+
|
421
422
|
# Dealing with titles is a bit crazy, while articles usually have atitles and
|
422
423
|
# jtitles, sometimes they have a btitle instead. A book will usually have
|
423
|
-
# both btitle and atitle, but sometimes just atitle. Book chapter, oh boy.
|
424
|
-
|
424
|
+
# both btitle and atitle, but sometimes just atitle. Book chapter, oh boy.
|
425
|
+
|
425
426
|
jtitle = text_if_present(info.at_xpath("./jinfo/jtl"))
|
426
427
|
btitle = text_if_present info.at_xpath("./bkinfo/btl")
|
427
428
|
atitle = text_if_present info.at_xpath("./artinfo/tig/atl")
|
428
|
-
|
429
|
+
|
429
430
|
if jtitle && atitle
|
430
431
|
item.title = atitle
|
431
432
|
item.source_title = jtitle
|
432
433
|
elsif btitle && atitle && atitle != btitle
|
433
434
|
# for a book, sometimes there's an atitle block and a btitle block
|
434
|
-
# when they're identical, this ain't a book section, it's a book.
|
435
|
+
# when they're identical, this ain't a book section, it's a book.
|
435
436
|
item.title = atitle
|
436
437
|
item.source_title = btitle
|
437
438
|
else
|
438
439
|
item.title = atitle || btitle
|
439
|
-
end
|
440
|
+
end
|
440
441
|
# EBSCO sometimes has crazy long titles, truncate em.
|
441
442
|
if item.title.present?
|
442
443
|
item.title = text_helper.truncate(item.title, :length => 200, :separator => ' ', :omission => '…')
|
443
444
|
end
|
444
|
-
|
445
|
-
|
446
|
-
|
445
|
+
|
446
|
+
|
447
|
+
|
447
448
|
item.publisher = text_if_present info.at_xpath("./pubinfo/pub")
|
448
449
|
# if no publisher, but a dissertation institution, use that
|
449
|
-
# as publisher.
|
450
|
+
# as publisher.
|
450
451
|
unless item.publisher
|
451
452
|
item.publisher = text_if_present info.at_xpath("./dissinfo/dissinst")
|
452
453
|
end
|
453
|
-
|
454
|
-
|
454
|
+
|
455
|
+
|
455
456
|
# Might have multiple ISBN's in record, just take first for now
|
456
457
|
item.isbn = text_if_present info.at_xpath("./bkinfo/isbn")
|
457
|
-
|
458
|
+
|
458
459
|
item.year = text_if_present info.at_xpath("./pubinfo/dt/@year")
|
459
|
-
# fill in complete publication_date too only if we've got it.
|
460
|
+
# fill in complete publication_date too only if we've got it.
|
460
461
|
if (item.year &&
|
461
462
|
(month = text_if_present info.at_xpath("./pubinfo/dt/@month")) &&
|
462
|
-
(day = text_if_present info.at_xpath("./pubinfo/dt/@day"))
|
463
|
+
(day = text_if_present info.at_xpath("./pubinfo/dt/@day"))
|
463
464
|
)
|
464
465
|
if (item.year.to_i != 0 && month.to_i != 0 && day.to_i != 0)
|
465
|
-
item.publication_date = Date.new(item.year.to_i, month.to_i, day.to_i)
|
466
|
+
item.publication_date = Date.new(item.year.to_i, month.to_i, day.to_i)
|
466
467
|
end
|
467
468
|
end
|
468
|
-
|
469
|
+
|
469
470
|
item.volume = text_if_present info.at_xpath("./pubinfo/vid")
|
470
471
|
item.issue = text_if_present info.at_xpath("./pubinfo/iid")
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
472
|
+
|
473
|
+
|
474
|
+
|
475
|
+
|
475
476
|
item.start_page = text_if_present info.at_xpath("./artinfo/ppf")
|
476
|
-
|
477
|
+
|
477
478
|
item.doi = text_if_present info.at_xpath("./artinfo/ui[@type='doi']")
|
478
|
-
|
479
|
+
|
479
480
|
item.abstract = text_if_present info.at_xpath("./artinfo/ab")
|
480
481
|
# EBSCO abstracts have an annoying habit of beginning with "Abstract:"
|
481
482
|
if item.abstract
|
482
483
|
item.abstract.gsub!(/^Abstract\: /, "")
|
483
484
|
end
|
484
|
-
|
485
|
-
# authors, only get full display name from EBSCO.
|
485
|
+
|
486
|
+
# authors, only get full display name from EBSCO.
|
486
487
|
info.xpath("./artinfo/aug/au").each do |author|
|
487
488
|
a = BentoSearch::Author.new(:display => author.text)
|
488
489
|
item.authors << a
|
489
490
|
end
|
490
|
-
|
491
|
+
|
491
492
|
item.format = sniff_format info
|
492
493
|
item.format_str = sniff_format_str info
|
493
|
-
|
494
|
+
|
494
495
|
# Totally unreliable, seems to report english for everything? Maybe
|
495
|
-
# because abstracts are in english? Nevertheless we include for now.
|
496
|
+
# because abstracts are in english? Nevertheless we include for now.
|
496
497
|
item.language_code = text_if_present info.at_xpath("./language/@code")
|
497
498
|
# why does EBSCO return 'undetermined' sometimes? That might as well be
|
498
|
-
# not there, bah.
|
499
|
+
# not there, bah.
|
499
500
|
item.language_code = nil if item.language_code == "und"
|
500
|
-
|
501
|
-
# array of custom ebsco codes (or nil) for fulltext formats avail.
|
501
|
+
|
502
|
+
# array of custom ebsco codes (or nil) for fulltext formats avail.
|
502
503
|
item.custom_data["fulltext_formats"] = fulltext_formats xml_rec
|
503
504
|
# if any fulltext format, mark present
|
504
|
-
item.link_is_fulltext = item.custom_data["fulltext_formats"].present?
|
505
|
-
|
505
|
+
item.link_is_fulltext = item.custom_data["fulltext_formats"].present?
|
506
|
+
|
506
507
|
return item
|
507
508
|
end
|
508
|
-
|
509
|
+
|
509
510
|
# This method is not used for normal searching, but can be used by
|
510
|
-
# other code to retrieve the results of the EBSCO API Info command,
|
511
|
+
# other code to retrieve the results of the EBSCO API Info command,
|
511
512
|
# using connection details configured in this engine. The Info command
|
512
513
|
# can tell you what databases your account is authorized to see.
|
513
514
|
# Returns the complete Nokogiri response, but WITH NAMESPACES REMOVED
|
514
515
|
def get_info
|
515
|
-
url =
|
516
|
-
"#{configuration.base_url}/Info?prof=#{configuration.profile_id}&pwd=#{configuration.profile_password}"
|
517
|
-
|
516
|
+
url =
|
517
|
+
"#{configuration.base_url}/Info?prof=#{configuration.profile_id}&pwd=#{configuration.profile_password}"
|
518
|
+
|
518
519
|
noko = Nokogiri::XML( http_client.get( url ).body )
|
519
|
-
|
520
|
+
|
520
521
|
noko.remove_namespaces!
|
521
|
-
|
522
|
+
|
522
523
|
return noko
|
523
524
|
end
|
524
|
-
|
525
|
+
|
525
526
|
def public_settable_search_args
|
526
527
|
super + [:peer_reviewed_only, :pubyear_start, :pubyear_end]
|
527
528
|
end
|
528
|
-
|
529
|
+
|
529
530
|
# David Walker says pretty much only relevance and date are realiable
|
530
|
-
# in EBSCOhost cross-search.
|
531
|
+
# in EBSCOhost cross-search.
|
531
532
|
def sort_definitions
|
532
|
-
{
|
533
|
+
{
|
533
534
|
"relevance" => {:implementation => "relevance"},
|
534
535
|
"date_desc" => {:implementation => "date"}
|
535
|
-
}
|
536
|
+
}
|
536
537
|
end
|
537
|
-
|
538
|
+
|
538
539
|
def search_field_definitions
|
539
540
|
{
|
540
541
|
nil => {:semantic => :general},
|
@@ -545,17 +546,17 @@ class BentoSearch::EbscoHostEngine
|
|
545
546
|
"IB" => {:semantic => :isbn}
|
546
547
|
}
|
547
548
|
end
|
548
|
-
|
549
|
+
|
549
550
|
def max_per_page
|
550
551
|
# Actually only '50' if you ask for 'full' records, but I don't think
|
551
|
-
# we need to do that ever, that's actually getting fulltext back!
|
552
|
+
# we need to do that ever, that's actually getting fulltext back!
|
552
553
|
200
|
553
554
|
end
|
554
|
-
|
555
|
+
|
555
556
|
def self.required_configuration
|
556
557
|
["profile_id", "profile_password"]
|
557
558
|
end
|
558
|
-
|
559
|
+
|
559
560
|
def self.default_configuration
|
560
561
|
{
|
561
562
|
# /Search
|
@@ -563,5 +564,5 @@ class BentoSearch::EbscoHostEngine
|
|
563
564
|
:databases => []
|
564
565
|
}
|
565
566
|
end
|
566
|
-
|
567
|
+
|
567
568
|
end
|