bento_search 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/MIT-LICENSE +20 -0
- data/README.md +299 -0
- data/Rakefile +40 -0
- data/app/assets/images/bento_search/large_loader.gif +0 -0
- data/app/assets/javascripts/bento_search.js +3 -0
- data/app/assets/javascripts/bento_search/ajax_load.js +22 -0
- data/app/assets/stylesheets/bento_search/bento.css +4 -0
- data/app/controllers/bento_search/bento_search_controller.rb +7 -0
- data/app/controllers/bento_search/search_controller.rb +72 -0
- data/app/helpers/bento_search_helper.rb +138 -0
- data/app/item_decorators/bento_search/only_premade_openurl.rb +16 -0
- data/app/item_decorators/bento_search/openurl_add_other_link.rb +35 -0
- data/app/item_decorators/bento_search/openurl_main_link.rb +30 -0
- data/app/models/bento_search/author.rb +25 -0
- data/app/models/bento_search/link.rb +30 -0
- data/app/models/bento_search/multi_searcher.rb +109 -0
- data/app/models/bento_search/openurl_creator.rb +128 -0
- data/app/models/bento_search/registrar.rb +70 -0
- data/app/models/bento_search/result_item.rb +203 -0
- data/app/models/bento_search/results.rb +54 -0
- data/app/models/bento_search/results/pagination.rb +67 -0
- data/app/models/bento_search/search_engine.rb +219 -0
- data/app/models/bento_search/search_engine/capabilities.rb +65 -0
- data/app/search_engines/bento_search/#Untitled-1# +11 -0
- data/app/search_engines/bento_search/ebsco_host_engine.rb +356 -0
- data/app/search_engines/bento_search/eds_engine.rb +557 -0
- data/app/search_engines/bento_search/google_books_engine.rb +184 -0
- data/app/search_engines/bento_search/primo_engine.rb +231 -0
- data/app/search_engines/bento_search/scopus_engine.rb +295 -0
- data/app/search_engines/bento_search/summon_engine.rb +398 -0
- data/app/search_engines/bento_search/xerxes_engine.rb +168 -0
- data/app/views/bento_search/_link.html.erb +4 -0
- data/app/views/bento_search/_search_error.html.erb +22 -0
- data/app/views/bento_search/_std_item.html.erb +39 -0
- data/app/views/bento_search/search/search.html.erb +1 -0
- data/config/locales/en.yml +25 -0
- data/lib/bento_search.rb +29 -0
- data/lib/bento_search/engine.rb +5 -0
- data/lib/bento_search/routes.rb +45 -0
- data/lib/bento_search/version.rb +3 -0
- data/lib/generators/bento_search/pull_ebsco_dbs_generator.rb +24 -0
- data/lib/generators/bento_search/templates/ebsco_global_var.erb +6 -0
- data/lib/http_client_patch/include_client.rb +86 -0
- data/lib/tasks/bento_search_tasks.rake +4 -0
- data/test/dummy/README.rdoc +261 -0
- data/test/dummy/Rakefile +7 -0
- data/test/dummy/app/assets/javascripts/application.js +15 -0
- data/test/dummy/app/assets/stylesheets/application.css +13 -0
- data/test/dummy/app/controllers/application_controller.rb +3 -0
- data/test/dummy/app/helpers/application_helper.rb +2 -0
- data/test/dummy/app/views/layouts/application.html.erb +14 -0
- data/test/dummy/config.ru +4 -0
- data/test/dummy/config/application.rb +56 -0
- data/test/dummy/config/boot.rb +10 -0
- data/test/dummy/config/database.yml +25 -0
- data/test/dummy/config/environment.rb +5 -0
- data/test/dummy/config/environments/development.rb +37 -0
- data/test/dummy/config/environments/production.rb +67 -0
- data/test/dummy/config/environments/test.rb +37 -0
- data/test/dummy/config/initializers/backtrace_silencers.rb +7 -0
- data/test/dummy/config/initializers/inflections.rb +15 -0
- data/test/dummy/config/initializers/mime_types.rb +5 -0
- data/test/dummy/config/initializers/secret_token.rb +7 -0
- data/test/dummy/config/initializers/session_store.rb +8 -0
- data/test/dummy/config/initializers/wrap_parameters.rb +14 -0
- data/test/dummy/config/locales/en.yml +5 -0
- data/test/dummy/config/routes.rb +6 -0
- data/test/dummy/db/test.sqlite3 +0 -0
- data/test/dummy/log/test.log +3100 -0
- data/test/dummy/public/404.html +26 -0
- data/test/dummy/public/422.html +26 -0
- data/test/dummy/public/500.html +25 -0
- data/test/dummy/public/favicon.ico +0 -0
- data/test/dummy/script/rails +6 -0
- data/test/functional/bento_search/search_controller_test.rb +81 -0
- data/test/helper/bento_search_helper_test.rb +125 -0
- data/test/integration/navigation_test.rb +10 -0
- data/test/support/mock_engine.rb +23 -0
- data/test/support/test_with_cassette.rb +38 -0
- data/test/test_helper.rb +52 -0
- data/test/unit/#vcr_test.rb# +68 -0
- data/test/unit/ebsco_host_engine_test.rb +134 -0
- data/test/unit/eds_engine_test.rb +105 -0
- data/test/unit/google_books_engine_test.rb +93 -0
- data/test/unit/item_decorators_test.rb +66 -0
- data/test/unit/multi_searcher_test.rb +49 -0
- data/test/unit/openurl_creator_test.rb +111 -0
- data/test/unit/pagination_test.rb +59 -0
- data/test/unit/primo_engine_test.rb +37 -0
- data/test/unit/register_engine_test.rb +50 -0
- data/test/unit/result_item_display_test.rb +39 -0
- data/test/unit/result_item_test.rb +36 -0
- data/test/unit/scopus_engine_test.rb +130 -0
- data/test/unit/search_engine_base_test.rb +178 -0
- data/test/unit/search_engine_test.rb +95 -0
- data/test/unit/summon_engine_test.rb +161 -0
- data/test/unit/xerxes_engine_test.rb +70 -0
- data/test/vcr_cassettes/ebscohost/error_bad_db.yml +45 -0
- data/test/vcr_cassettes/ebscohost/error_bad_password.yml +45 -0
- data/test/vcr_cassettes/ebscohost/get_info.yml +3626 -0
- data/test/vcr_cassettes/ebscohost/live_search.yml +45 -0
- data/test/vcr_cassettes/ebscohost/live_search_smoke_test.yml +1311 -0
- data/test/vcr_cassettes/eds/basic_search_smoke_test.yml +1811 -0
- data/test/vcr_cassettes/eds/get_auth_token.yml +75 -0
- data/test/vcr_cassettes/eds/get_auth_token_failure.yml +39 -0
- data/test/vcr_cassettes/eds/get_with_auth.yml +243 -0
- data/test/vcr_cassettes/eds/get_with_auth_recovers_from_bad_auth.yml +368 -0
- data/test/vcr_cassettes/gbs/error_condition.yml +40 -0
- data/test/vcr_cassettes/gbs/pagination.yml +702 -0
- data/test/vcr_cassettes/gbs/search.yml +340 -0
- data/test/vcr_cassettes/primo/search_smoke_test.yml +1112 -0
- data/test/vcr_cassettes/scopus/bad_api_key_should_return_error_response.yml +60 -0
- data/test/vcr_cassettes/scopus/escaped_chars.yml +187 -0
- data/test/vcr_cassettes/scopus/fielded_search.yml +176 -0
- data/test/vcr_cassettes/scopus/simple_search.yml +227 -0
- data/test/vcr_cassettes/scopus/zero_results_search.yml +67 -0
- data/test/vcr_cassettes/summon/bad_auth.yml +54 -0
- data/test/vcr_cassettes/summon/proper_tags_for_snippets.yml +216 -0
- data/test/vcr_cassettes/summon/search.yml +242 -0
- data/test/vcr_cassettes/xerxes/live_search.yml +2580 -0
- data/test/view/std_item_test.rb +98 -0
- metadata +421 -0
@@ -0,0 +1,398 @@
|
|
1
|
+
require 'http_client_patch/include_client'
|
2
|
+
require 'httpclient'
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'time'
|
5
|
+
require 'uri'
|
6
|
+
|
7
|
+
require 'summon'
|
8
|
+
require 'summon/transport/headers'
|
9
|
+
|
10
|
+
# Search engine for Serial Solutions Summon
|
11
|
+
#
|
12
|
+
# Docs:
|
13
|
+
# http://api.summon.serialssolutions.com/help/api/search
|
14
|
+
# http://api.summon.serialssolutions.com/help/api/search/fields
|
15
|
+
#
|
16
|
+
# An example user-facing Summon UI, useful for figuring out available
|
17
|
+
# facets and facet values, or trying out searches:
|
18
|
+
# http://ncsu.summon.serialssolutions.com/
|
19
|
+
|
20
|
+
#
|
21
|
+
# == Functionality notes
|
22
|
+
#
|
23
|
+
# * for pagination, underlying summon API only supports 'page', not 'start'
|
24
|
+
# style, if you pass in 'start' style it will be 'rounded' to containing 'page'.
|
25
|
+
#
|
26
|
+
# == Required config params
|
27
|
+
# [access_id] supplied by SerSol for your account
|
28
|
+
# [secret_key] supplied by SerSol for your account
|
29
|
+
#
|
30
|
+
# == Optional custom config params
|
31
|
+
#
|
32
|
+
# [fixed_params]
|
33
|
+
# Fixed SerSol query param literals to send with every search.
|
34
|
+
# Value is a HASH, of keys and either single values or arrays
|
35
|
+
# of values. For instance, to exclude Newspaper Articles and Books
|
36
|
+
# from all search results, in config:
|
37
|
+
# :fixed_params =>
|
38
|
+
# {"s.cmd" => ["addFacetValueFilters(ContentType,Web Resource:true,Reference:true,eBook:true)"]
|
39
|
+
# Note that values are NOT URI escaped in config, code will take care
|
40
|
+
# of that for you. You could also fix "s.role" to 'authenticated' using
|
41
|
+
# this mechanism, if you restrict all access to your app to authenticated
|
42
|
+
# affiliated users.
|
43
|
+
# Note: We wanted to use this for content type facet exclusions, as
|
44
|
+
# per above. We could NOT get Summon "s.fvf" param to work right, had
|
45
|
+
# to use the s.cmd=addFacetValueFilter version.
|
46
|
+
# [highlighting]
|
47
|
+
# Default true, ask SerSol for query-in-context highlighting in
|
48
|
+
# title and snippets field. If true you WILL get HTML with <b> tags
|
49
|
+
# in your titles.
|
50
|
+
# [snippets_as_abstract]
|
51
|
+
# Defaults true, if true and :highlighting is true, we'll put the
|
52
|
+
# query-in-context snippets in the 'abstract' field. Set :max_snippets
|
53
|
+
# for how many to possibly include (default 1). We may change this functionality
|
54
|
+
# later, this is a bit of hacky way to do it.
|
55
|
+
# [use_summon_openurl] default false. If true, will use OpenURL kev context
|
56
|
+
# object passed back by summon to generate openurls, instead of creating
|
57
|
+
# one ourself from individual data elements. summon openurl is decent,
|
58
|
+
# but currently includes highlighting tags in title elements. Also note
|
59
|
+
# it includes DC-type openurls, which we don't currently generate ourselves.
|
60
|
+
#
|
61
|
+
#
|
62
|
+
# == Custom search params
|
63
|
+
#
|
64
|
+
# Pass in `:auth => true` (or "true") to send headers to summon
|
65
|
+
# indicating an authorized user, for full search results.
|
66
|
+
#
|
67
|
+
#
|
68
|
+
# == Tech notes
|
69
|
+
# We did not choose to use the summon ruby gem in general, we wanted more control
|
70
|
+
# than it offered (ability to use HTTPClient persistent connections, MultiJson
|
71
|
+
# for json parsing, etc).
|
72
|
+
#
|
73
|
+
# However, we DO use that gem specifically for constructing authentication
|
74
|
+
# headers how summon wants it, see class at
|
75
|
+
# https://github.com/summon/summon.rb/blob/master/lib/summon/transport/headers.rb
|
76
|
+
#
|
77
|
+
class BentoSearch::SummonEngine
|
78
|
+
include BentoSearch::SearchEngine
|
79
|
+
|
80
|
+
extend HTTPClientPatch::IncludeClient
|
81
|
+
include_http_client
|
82
|
+
|
83
|
+
include ActionView::Helpers::OutputSafetyHelper # for safe_join
|
84
|
+
|
85
|
+
@@hl_start_token = "$$BENTO_HL_START$$"
|
86
|
+
@@hl_end_token = "$$BENTO_HL_END$$"
|
87
|
+
|
88
|
+
def search_implementation(args)
|
89
|
+
uri, headers = construct_request(args)
|
90
|
+
|
91
|
+
results = BentoSearch::Results.new
|
92
|
+
|
93
|
+
hash, response, exception = nil
|
94
|
+
begin
|
95
|
+
response = http_client.get(uri, nil, headers)
|
96
|
+
hash = MultiJson.load( response.body )
|
97
|
+
rescue TimeoutError, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, MultiJson::DecodeError, Nokogiri::SyntaxError => e
|
98
|
+
exception = e
|
99
|
+
end
|
100
|
+
# handle some errors
|
101
|
+
if (response.nil? || hash.nil? || exception ||
|
102
|
+
(! HTTP::Status.successful? response.status))
|
103
|
+
results.error ||= {}
|
104
|
+
results.error[:exception] = e
|
105
|
+
results.error[:status] = response.status if response
|
106
|
+
|
107
|
+
return results
|
108
|
+
end
|
109
|
+
|
110
|
+
results.total_items = hash["recordCount"]
|
111
|
+
|
112
|
+
hash["documents"].each do |doc_hash|
|
113
|
+
item = BentoSearch::ResultItem.new
|
114
|
+
|
115
|
+
item.title = handle_highlighting( first_if_present doc_hash["Title"] )
|
116
|
+
item.custom_data["raw_title"] = handle_highlighting( first_if_present(doc_hash["Title"]) , :strip => true)
|
117
|
+
|
118
|
+
item.subtitle = handle_highlighting( first_if_present doc_hash["Subtitle"] )# TODO is this right?
|
119
|
+
item.custom_data["raw_subtitle"] = handle_highlighting( first_if_present(doc_hash["Subtitle"]), :strip => true )
|
120
|
+
|
121
|
+
item.link = doc_hash["link"]
|
122
|
+
|
123
|
+
if configuration.use_summon_openurl
|
124
|
+
item.openurl_kev_co = doc_hash["openUrl"] # Summon conveniently gives us pre-made OpenURL
|
125
|
+
end
|
126
|
+
|
127
|
+
item.journal_title = first_if_present doc_hash["PublicationTitle"]
|
128
|
+
item.issn = first_if_present doc_hash["ISSN"]
|
129
|
+
item.isbn = first_if_present doc_hash["ISBN"]
|
130
|
+
item.doi = first_if_present doc_hash["DOI"]
|
131
|
+
|
132
|
+
item.start_page = first_if_present doc_hash["StartPage"]
|
133
|
+
item.end_page = first_if_present doc_hash["EndPage"]
|
134
|
+
|
135
|
+
if (pubdate = first_if_present doc_hash["PublicationDate_xml"])
|
136
|
+
item.year = pubdate["year"]
|
137
|
+
end
|
138
|
+
item.volume = first_if_present doc_hash["Volume"]
|
139
|
+
item.issue = first_if_present doc_hash["Issue"]
|
140
|
+
|
141
|
+
if (pub = first_if_present doc_hash["Publisher_xml"])
|
142
|
+
item.publisher = pub["name"]
|
143
|
+
end
|
144
|
+
|
145
|
+
(doc_hash["Author_xml"] || []).each do |auth_hash|
|
146
|
+
a = BentoSearch::Author.new
|
147
|
+
|
148
|
+
a.first = name_normalize auth_hash["givenname"]
|
149
|
+
a.last = name_normalize auth_hash["surname"]
|
150
|
+
a.middle = name_normalize auth_hash["middlename"]
|
151
|
+
|
152
|
+
a.display = name_normalize auth_hash["fullname"]
|
153
|
+
|
154
|
+
item.authors << a unless a.empty?
|
155
|
+
end
|
156
|
+
|
157
|
+
item.format = normalize_content_type( first_if_present doc_hash["ContentType"] )
|
158
|
+
if doc_hash["ContentType"]
|
159
|
+
item.format_str = doc_hash["ContentType"].join(", ")
|
160
|
+
end
|
161
|
+
|
162
|
+
if ( configuration.highlighting && configuration.snippets_as_abstract &&
|
163
|
+
doc_hash["Snippet"] && doc_hash["Snippet"].length > 0 )
|
164
|
+
|
165
|
+
item.abstract = handle_highlighting doc_hash["Snippet"].slice(0, configuration.max_snippets).join(" ")
|
166
|
+
else
|
167
|
+
item.abstract = first_if_present doc_hash["Abstract"]
|
168
|
+
end
|
169
|
+
|
170
|
+
item.extend( SummonOpenurlOverride )
|
171
|
+
|
172
|
+
results << item
|
173
|
+
end
|
174
|
+
|
175
|
+
|
176
|
+
return results
|
177
|
+
end
|
178
|
+
|
179
|
+
def first_if_present(array)
|
180
|
+
array ? array.first : nil
|
181
|
+
end
|
182
|
+
|
183
|
+
|
184
|
+
# Normalize Summon Content-Type to our standardized
|
185
|
+
# list.
|
186
|
+
#
|
187
|
+
# This ends up losing useful distinctions Summon makes, however.
|
188
|
+
def normalize_content_type(summon_type)
|
189
|
+
case summon_type
|
190
|
+
when "Journal Article", "Book Review", "Trade Publication Article" then "Article"
|
191
|
+
when "Audio Recording", "Music Recording" then "AudioObject"
|
192
|
+
when "Book", "eBook" then "Book"
|
193
|
+
when "Conference Proceedings" then :conference_paper
|
194
|
+
when "Dissertation" then :dissertation
|
195
|
+
when "Journal", "Newsletter" then :serial
|
196
|
+
when "Photograph" then "Photograph"
|
197
|
+
when "Video Recording" then "VideoObject"
|
198
|
+
else nil
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
def name_normalize(str)
|
203
|
+
|
204
|
+
return nil if str.blank?
|
205
|
+
|
206
|
+
str = str.strip
|
207
|
+
|
208
|
+
return nil if str.blank? || str =~ /^[,:.]*$/
|
209
|
+
|
210
|
+
return str
|
211
|
+
end
|
212
|
+
|
213
|
+
|
214
|
+
# returns two element array: [uri, headers]
|
215
|
+
#
|
216
|
+
# uri, headers = construct_request(args)
|
217
|
+
def construct_request(args)
|
218
|
+
# Query params in a hash with array values, becuase easiest
|
219
|
+
# to generate auth headers that way. Value is array of values that
|
220
|
+
# are NOT URI-encoded yet.
|
221
|
+
query_params = Hash.new {|h, k| h[k] = [] }
|
222
|
+
|
223
|
+
# Add in fixed params from config, if any.
|
224
|
+
|
225
|
+
if configuration.fixed_params
|
226
|
+
configuration.fixed_params.each_pair do |key, value|
|
227
|
+
[value].flatten.each do |v|
|
228
|
+
query_params[key] << v
|
229
|
+
end
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
if args[:per_page]
|
234
|
+
query_params["s.ps"] = args[:per_page]
|
235
|
+
end
|
236
|
+
if args[:page]
|
237
|
+
query_params["s.pn"] = args[:page]
|
238
|
+
end
|
239
|
+
|
240
|
+
if args[:search_field]
|
241
|
+
query_params['s.q'] = "#{args[:search_field]}:(#{summon_escape(args[:query])})"
|
242
|
+
else
|
243
|
+
query_params['s.q'] = summon_escape( args[:query] )
|
244
|
+
end
|
245
|
+
|
246
|
+
if (args[:sort] &&
|
247
|
+
(defn = self.sort_definitions[args[:sort]]) &&
|
248
|
+
(literal = defn[:implementation]))
|
249
|
+
query_params['s.sort'] = literal
|
250
|
+
end
|
251
|
+
|
252
|
+
if args[:auth] == true
|
253
|
+
query_params['s.role'] = "authenticated"
|
254
|
+
end
|
255
|
+
|
256
|
+
if configuration.highlighting
|
257
|
+
query_params['s.hs'] = @@hl_start_token
|
258
|
+
query_params['s.he'] = @@hl_end_token
|
259
|
+
else
|
260
|
+
query_params['s.hl'] = "false"
|
261
|
+
end
|
262
|
+
|
263
|
+
|
264
|
+
headers = Summon::Transport::Headers.new(
|
265
|
+
:access_id => configuration.access_id,
|
266
|
+
:secret_key => configuration.secret_key,
|
267
|
+
:accept => "json",
|
268
|
+
:params => query_params,
|
269
|
+
:url => configuration.base_url
|
270
|
+
)
|
271
|
+
|
272
|
+
|
273
|
+
query_string = query_params.keys.collect do |key|
|
274
|
+
[query_params[key]].flatten.collect do |value|
|
275
|
+
"#{CGI.escape(key.to_s)}=#{CGI.escape(value.to_s)}"
|
276
|
+
end
|
277
|
+
end.flatten.join("&")
|
278
|
+
|
279
|
+
uri = "#{configuration.base_url}?#{query_string}"
|
280
|
+
|
281
|
+
return [uri, headers]
|
282
|
+
end
|
283
|
+
|
284
|
+
|
285
|
+
# Escapes special chars for Summon. Not entirely clear what
|
286
|
+
# we have to escape where (or double escape sometimes?), but
|
287
|
+
# we're just going to do a straight backslash escape of special
|
288
|
+
# chars.
|
289
|
+
#
|
290
|
+
# Does NOT do URI-escaping, that's a different step.
|
291
|
+
def summon_escape(string)
|
292
|
+
# replace with backslash followed by original matched thing,
|
293
|
+
# need to double backslash for ruby string literal makes
|
294
|
+
# this ridiculously confusing, sorry. Block form of gsub
|
295
|
+
# is the only thing that keeps it from being impossible.
|
296
|
+
#
|
297
|
+
# Do NOT escape double quotes, let people use them for
|
298
|
+
# phrases!
|
299
|
+
string.gsub(/([+\-&|!\(\){}\[\]^~*?\\:])/) do |match|
|
300
|
+
"\\#{$1}"
|
301
|
+
end
|
302
|
+
end
|
303
|
+
|
304
|
+
# If summon has put snippet highlighting tokens
|
305
|
+
# in a field, we need to HTML escape the literal values,
|
306
|
+
# while still using the highlighting tokens to put
|
307
|
+
# HTML tags around highlighted terms.
|
308
|
+
def handle_highlighting( str, options = {} )
|
309
|
+
return str if str.blank? || ! configuration.highlighting
|
310
|
+
|
311
|
+
if options[:strip]
|
312
|
+
# Just strip em, don't need to replace em with HTML
|
313
|
+
str = str.gsub(Regexp.new(Regexp.escape @@hl_start_token), '')
|
314
|
+
str = str.gsub(Regexp.new(Regexp.escape @@hl_end_token), '')
|
315
|
+
return str
|
316
|
+
end
|
317
|
+
|
318
|
+
parts =
|
319
|
+
str.
|
320
|
+
split( %r{(#{Regexp.escape @@hl_start_token}|#{Regexp.escape @@hl_end_token})} ).
|
321
|
+
collect do |substr|
|
322
|
+
case substr
|
323
|
+
when @@hl_start_token then '<b class="bento_search_highlight">'.html_safe
|
324
|
+
when @@hl_end_token then '</b>'.html_safe
|
325
|
+
else substr
|
326
|
+
end
|
327
|
+
end
|
328
|
+
|
329
|
+
return safe_join(parts, '')
|
330
|
+
end
|
331
|
+
|
332
|
+
def self.required_configuration
|
333
|
+
[:access_id, :secret_key]
|
334
|
+
end
|
335
|
+
|
336
|
+
def self.default_configuration
|
337
|
+
{
|
338
|
+
:base_url => "http://api.summon.serialssolutions.com/2.0.0/search",
|
339
|
+
:highlighting => true,
|
340
|
+
:snippets_as_abstract => true,
|
341
|
+
:max_snippets => 1,
|
342
|
+
:use_summon_openurl => false
|
343
|
+
}
|
344
|
+
end
|
345
|
+
|
346
|
+
def max_per_page
|
347
|
+
200
|
348
|
+
end
|
349
|
+
|
350
|
+
# Summon actually only supports relevancy sort, and pub year asc or desc.
|
351
|
+
# we just expose relevance and pub year desc here.
|
352
|
+
def sort_definitions
|
353
|
+
# implementation includes literal sersol value, but not yet
|
354
|
+
# uri escaped, that'll happen at a later code point.
|
355
|
+
{
|
356
|
+
"relevance" => {:implementation => nil}, # default
|
357
|
+
"date_desc" => {:implementation => "PublicationDate:desc"}
|
358
|
+
|
359
|
+
}
|
360
|
+
end
|
361
|
+
|
362
|
+
# Summon offers many more search fields than this. This is a subset
|
363
|
+
# listed here. See http://api.summon.serialssolutions.com/help/api/search/fields
|
364
|
+
# although those docs may not be up to date.
|
365
|
+
#
|
366
|
+
# The AuthorCombined, TitleCombined, and SubjectCombined indexes
|
367
|
+
# aren't even listed in the docs, but they are real. I think.
|
368
|
+
def search_field_definitions
|
369
|
+
{
|
370
|
+
"AuthorCombined" => {:semantic => :author},
|
371
|
+
"TitleCombined" => {:semantic => :title},
|
372
|
+
# SubjectTerms does not include TemporalSubjectTerms
|
373
|
+
# or Keywords, sorry.
|
374
|
+
"SubjectTerms" => {:semantic => :subject},
|
375
|
+
# ISBN and ISSN do not include seperate EISSN and EISBN
|
376
|
+
# fields, sorry.
|
377
|
+
"ISBN" => {:semantic => :isbn},
|
378
|
+
"ISSN" => {:semantic => :issn},
|
379
|
+
"OCLC" => {:semantic => :oclcnum},
|
380
|
+
"PublicationSeriesTitle" => {}
|
381
|
+
}
|
382
|
+
end
|
383
|
+
|
384
|
+
# Module that we extend our ResultItems with, to over-ride
|
385
|
+
# to_openurl to use a dup of ourselves with title/subtitle
|
386
|
+
# set to raw ones without highlighting markup.
|
387
|
+
module SummonOpenurlOverride
|
388
|
+
def to_openurl
|
389
|
+
dup = self.dup
|
390
|
+
dup.title = self.custom_data["raw_title"]
|
391
|
+
dup.subtitle = self.custom_data["raw_subtitle"]
|
392
|
+
|
393
|
+
dup.to_openurl
|
394
|
+
end
|
395
|
+
end
|
396
|
+
|
397
|
+
|
398
|
+
end
|
@@ -0,0 +1,168 @@
|
|
1
|
+
require 'uri'
|
2
|
+
require 'nokogiri'
|
3
|
+
require 'openurl'
|
4
|
+
|
5
|
+
require 'httpclient'
|
6
|
+
require 'http_client_patch/include_client'
|
7
|
+
|
8
|
+
module BentoSearch
|
9
|
+
# a **very limited and hacky** bento search engine for the Xerxes Metalib
|
10
|
+
# front-end. Probably not suitable for real production use, just a demo,
|
11
|
+
# and used for testing. Does not support pagination, or fielded searching.
|
12
|
+
# will do a new Metalib search every time you call it, which will be slow.
|
13
|
+
#
|
14
|
+
# Machine running this code needs to have IP-address authorization
|
15
|
+
# to search xerxes.
|
16
|
+
#
|
17
|
+
# jrochkind is using it for his article search provider comparison testing
|
18
|
+
# instrument.
|
19
|
+
|
20
|
+
class XerxesEngine
|
21
|
+
include BentoSearch::SearchEngine
|
22
|
+
|
23
|
+
extend HTTPClientPatch::IncludeClient
|
24
|
+
include_http_client
|
25
|
+
|
26
|
+
# also optional configuration
|
27
|
+
# [xerxes_context]
|
28
|
+
# will send as 'context' query param to xerxes, for analytics
|
29
|
+
def self.required_configuration
|
30
|
+
["base_url", "databases"]
|
31
|
+
end
|
32
|
+
|
33
|
+
def search_implementation(arguments)
|
34
|
+
|
35
|
+
# We're gonna have to do a search 'screen scrape' style, then refresh it
|
36
|
+
# until it's ready, and then request format=xerxes when it's ready
|
37
|
+
# to get XML. A bit hacky.
|
38
|
+
|
39
|
+
request_url = xerxes_search_url(arguments)
|
40
|
+
|
41
|
+
|
42
|
+
response = http_client.head request_url
|
43
|
+
|
44
|
+
# It's supposed to be a redirect
|
45
|
+
unless HTTP::Status.redirect?(response.status) && response.headers["Location"]
|
46
|
+
r = Results.new
|
47
|
+
r.error ||= {}
|
48
|
+
r.error["status"] = response.status
|
49
|
+
r.error["message"] = "Xerxes did not return expected 302 redirect"
|
50
|
+
|
51
|
+
return r
|
52
|
+
end
|
53
|
+
|
54
|
+
# Okay, now fetch the redirect, have to change it to an absolute
|
55
|
+
# URI cause Xerxes semi-illegally returns a relative one.
|
56
|
+
refreshes = 0
|
57
|
+
results_url = nil
|
58
|
+
status_url = (URI.parse(request_url) + response.headers["Location"]).to_s
|
59
|
+
while ( refreshes < 5 )
|
60
|
+
# cause of VCR, can't request the exact same URL twice
|
61
|
+
# with different results. Add `try` on the end.
|
62
|
+
response = http_client.get( status_url + "&try=#{refreshes}")
|
63
|
+
|
64
|
+
# Okay, have to follow the meta-refresh
|
65
|
+
html = Nokogiri::HTML( response.body )
|
66
|
+
|
67
|
+
if HTTP::Status.redirect? response.status
|
68
|
+
# Okay, redirect means we're done with status and
|
69
|
+
# we've got actual results url
|
70
|
+
results_url = URI.parse(request_url) + response.headers["Location"]
|
71
|
+
break
|
72
|
+
end
|
73
|
+
|
74
|
+
if ( refresh = html.css("meta[http-equiv='refresh']") )
|
75
|
+
wait = configuration.lookup!("refresh_wait", (refresh.attribute("content").value.to_i if refresh.attribute("content")))
|
76
|
+
# wait how long Xerxes asked before refreshing.
|
77
|
+
refreshes += 1
|
78
|
+
sleep wait
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
results = Results.new
|
83
|
+
|
84
|
+
# any errors?
|
85
|
+
if results_url.nil? && refreshes >= 5
|
86
|
+
results.error ||= {}
|
87
|
+
results.error["message"] = "#{refreshes} refreshes exceeded maximum"
|
88
|
+
return results
|
89
|
+
end
|
90
|
+
|
91
|
+
# Okay, fetch it as format xerxes
|
92
|
+
|
93
|
+
xml = Nokogiri::XML( http_client.get(results_url.to_s + "&format=xerxes").body )
|
94
|
+
|
95
|
+
results = Results.new
|
96
|
+
|
97
|
+
xml.xpath("//results/records/record").each do |record|
|
98
|
+
item = ResultItem.new
|
99
|
+
results << item
|
100
|
+
|
101
|
+
item.title = node_text record.at_xpath("xerxes_record/title")
|
102
|
+
|
103
|
+
xerxes_fmt_str = node_text(record.at_xpath("xerxes_record/format")).downcase
|
104
|
+
|
105
|
+
item.format = if xerxes_fmt_str.include?("article")
|
106
|
+
"Article"
|
107
|
+
elsif xerxes_fmt_str.include?("Book")
|
108
|
+
"Book"
|
109
|
+
else
|
110
|
+
nil
|
111
|
+
end
|
112
|
+
|
113
|
+
item.link = node_text record.at_xpath("xerxes_record/links/link[@type='original_record']/url")
|
114
|
+
|
115
|
+
item.year = node_text record.at_xpath("xerxes_record/year")
|
116
|
+
item.volume = node_text record.at_xpath("xerxes_record/volume")
|
117
|
+
item.issue = node_text record.at_xpath("xerxes_record/issue")
|
118
|
+
item.start_page = node_text record.at_xpath("xerxes_record/start_page")
|
119
|
+
item.end_page = node_text record.at_xpath("xerxes_record/end_page")
|
120
|
+
|
121
|
+
item.abstract = node_text(record.at_xpath("xerxes_record/abstract") || record.at_xpath("xerxes_record/summary"))
|
122
|
+
|
123
|
+
item.openurl_kev_co = node_text record.at_xpath("openurl_kev_co")
|
124
|
+
|
125
|
+
# have to get journal title out of openurl, sorry
|
126
|
+
if item.openurl_kev_co
|
127
|
+
openurl = OpenURL::ContextObject.new_from_kev( item.openurl_kev_co )
|
128
|
+
if openurl && openurl.referent && openurl.referent.format == "journal"
|
129
|
+
item.journal_title = openurl.referent.jtitle
|
130
|
+
end
|
131
|
+
end
|
132
|
+
item.issn = node_text record.at_xpath("xerxes_record/standard_numbers/issn")
|
133
|
+
|
134
|
+
# authors
|
135
|
+
record.xpath("xerxes_record/authors/author").each do |author|
|
136
|
+
next unless author.at_xpath("aulast") # don't even have a lastname, we can do nothing
|
137
|
+
|
138
|
+
item.authors << Author.new(:first => node_text(author.at_xpath("aufirst")),
|
139
|
+
:middle => node_text(author.at_xpath("auinit")),
|
140
|
+
:last => node_text(author.at_xpath("aulast"))
|
141
|
+
)
|
142
|
+
end
|
143
|
+
|
144
|
+
|
145
|
+
end
|
146
|
+
return results
|
147
|
+
end
|
148
|
+
|
149
|
+
protected
|
150
|
+
|
151
|
+
def xerxes_search_url(args)
|
152
|
+
configuration.base_url.chomp("/") + "/?base=metasearch&action=search" +
|
153
|
+
"&context=#{configuration.lookup!('xerxes_context', 'bento_search')}" +
|
154
|
+
"&field=WRD" +
|
155
|
+
"&query=#{CGI.escape(args[:query])}" +
|
156
|
+
configuration.databases.collect {|d| "&database=#{d}"}.join("&")
|
157
|
+
end
|
158
|
+
|
159
|
+
# returns nil if passed in nil, otherwise
|
160
|
+
# returns nokogiri text()
|
161
|
+
def node_text(node)
|
162
|
+
return nil if node.nil?
|
163
|
+
|
164
|
+
return node.text()
|
165
|
+
end
|
166
|
+
|
167
|
+
end
|
168
|
+
end
|