bento_search 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +20 -0
- data/README.md +299 -0
- data/Rakefile +40 -0
- data/app/assets/images/bento_search/large_loader.gif +0 -0
- data/app/assets/javascripts/bento_search.js +3 -0
- data/app/assets/javascripts/bento_search/ajax_load.js +22 -0
- data/app/assets/stylesheets/bento_search/bento.css +4 -0
- data/app/controllers/bento_search/bento_search_controller.rb +7 -0
- data/app/controllers/bento_search/search_controller.rb +72 -0
- data/app/helpers/bento_search_helper.rb +138 -0
- data/app/item_decorators/bento_search/only_premade_openurl.rb +16 -0
- data/app/item_decorators/bento_search/openurl_add_other_link.rb +35 -0
- data/app/item_decorators/bento_search/openurl_main_link.rb +30 -0
- data/app/models/bento_search/author.rb +25 -0
- data/app/models/bento_search/link.rb +30 -0
- data/app/models/bento_search/multi_searcher.rb +109 -0
- data/app/models/bento_search/openurl_creator.rb +128 -0
- data/app/models/bento_search/registrar.rb +70 -0
- data/app/models/bento_search/result_item.rb +203 -0
- data/app/models/bento_search/results.rb +54 -0
- data/app/models/bento_search/results/pagination.rb +67 -0
- data/app/models/bento_search/search_engine.rb +219 -0
- data/app/models/bento_search/search_engine/capabilities.rb +65 -0
- data/app/search_engines/bento_search/#Untitled-1# +11 -0
- data/app/search_engines/bento_search/ebsco_host_engine.rb +356 -0
- data/app/search_engines/bento_search/eds_engine.rb +557 -0
- data/app/search_engines/bento_search/google_books_engine.rb +184 -0
- data/app/search_engines/bento_search/primo_engine.rb +231 -0
- data/app/search_engines/bento_search/scopus_engine.rb +295 -0
- data/app/search_engines/bento_search/summon_engine.rb +398 -0
- data/app/search_engines/bento_search/xerxes_engine.rb +168 -0
- data/app/views/bento_search/_link.html.erb +4 -0
- data/app/views/bento_search/_search_error.html.erb +22 -0
- data/app/views/bento_search/_std_item.html.erb +39 -0
- data/app/views/bento_search/search/search.html.erb +1 -0
- data/config/locales/en.yml +25 -0
- data/lib/bento_search.rb +29 -0
- data/lib/bento_search/engine.rb +5 -0
- data/lib/bento_search/routes.rb +45 -0
- data/lib/bento_search/version.rb +3 -0
- data/lib/generators/bento_search/pull_ebsco_dbs_generator.rb +24 -0
- data/lib/generators/bento_search/templates/ebsco_global_var.erb +6 -0
- data/lib/http_client_patch/include_client.rb +86 -0
- data/lib/tasks/bento_search_tasks.rake +4 -0
- data/test/dummy/README.rdoc +261 -0
- data/test/dummy/Rakefile +7 -0
- data/test/dummy/app/assets/javascripts/application.js +15 -0
- data/test/dummy/app/assets/stylesheets/application.css +13 -0
- data/test/dummy/app/controllers/application_controller.rb +3 -0
- data/test/dummy/app/helpers/application_helper.rb +2 -0
- data/test/dummy/app/views/layouts/application.html.erb +14 -0
- data/test/dummy/config.ru +4 -0
- data/test/dummy/config/application.rb +56 -0
- data/test/dummy/config/boot.rb +10 -0
- data/test/dummy/config/database.yml +25 -0
- data/test/dummy/config/environment.rb +5 -0
- data/test/dummy/config/environments/development.rb +37 -0
- data/test/dummy/config/environments/production.rb +67 -0
- data/test/dummy/config/environments/test.rb +37 -0
- data/test/dummy/config/initializers/backtrace_silencers.rb +7 -0
- data/test/dummy/config/initializers/inflections.rb +15 -0
- data/test/dummy/config/initializers/mime_types.rb +5 -0
- data/test/dummy/config/initializers/secret_token.rb +7 -0
- data/test/dummy/config/initializers/session_store.rb +8 -0
- data/test/dummy/config/initializers/wrap_parameters.rb +14 -0
- data/test/dummy/config/locales/en.yml +5 -0
- data/test/dummy/config/routes.rb +6 -0
- data/test/dummy/db/test.sqlite3 +0 -0
- data/test/dummy/log/test.log +3100 -0
- data/test/dummy/public/404.html +26 -0
- data/test/dummy/public/422.html +26 -0
- data/test/dummy/public/500.html +25 -0
- data/test/dummy/public/favicon.ico +0 -0
- data/test/dummy/script/rails +6 -0
- data/test/functional/bento_search/search_controller_test.rb +81 -0
- data/test/helper/bento_search_helper_test.rb +125 -0
- data/test/integration/navigation_test.rb +10 -0
- data/test/support/mock_engine.rb +23 -0
- data/test/support/test_with_cassette.rb +38 -0
- data/test/test_helper.rb +52 -0
- data/test/unit/#vcr_test.rb# +68 -0
- data/test/unit/ebsco_host_engine_test.rb +134 -0
- data/test/unit/eds_engine_test.rb +105 -0
- data/test/unit/google_books_engine_test.rb +93 -0
- data/test/unit/item_decorators_test.rb +66 -0
- data/test/unit/multi_searcher_test.rb +49 -0
- data/test/unit/openurl_creator_test.rb +111 -0
- data/test/unit/pagination_test.rb +59 -0
- data/test/unit/primo_engine_test.rb +37 -0
- data/test/unit/register_engine_test.rb +50 -0
- data/test/unit/result_item_display_test.rb +39 -0
- data/test/unit/result_item_test.rb +36 -0
- data/test/unit/scopus_engine_test.rb +130 -0
- data/test/unit/search_engine_base_test.rb +178 -0
- data/test/unit/search_engine_test.rb +95 -0
- data/test/unit/summon_engine_test.rb +161 -0
- data/test/unit/xerxes_engine_test.rb +70 -0
- data/test/vcr_cassettes/ebscohost/error_bad_db.yml +45 -0
- data/test/vcr_cassettes/ebscohost/error_bad_password.yml +45 -0
- data/test/vcr_cassettes/ebscohost/get_info.yml +3626 -0
- data/test/vcr_cassettes/ebscohost/live_search.yml +45 -0
- data/test/vcr_cassettes/ebscohost/live_search_smoke_test.yml +1311 -0
- data/test/vcr_cassettes/eds/basic_search_smoke_test.yml +1811 -0
- data/test/vcr_cassettes/eds/get_auth_token.yml +75 -0
- data/test/vcr_cassettes/eds/get_auth_token_failure.yml +39 -0
- data/test/vcr_cassettes/eds/get_with_auth.yml +243 -0
- data/test/vcr_cassettes/eds/get_with_auth_recovers_from_bad_auth.yml +368 -0
- data/test/vcr_cassettes/gbs/error_condition.yml +40 -0
- data/test/vcr_cassettes/gbs/pagination.yml +702 -0
- data/test/vcr_cassettes/gbs/search.yml +340 -0
- data/test/vcr_cassettes/primo/search_smoke_test.yml +1112 -0
- data/test/vcr_cassettes/scopus/bad_api_key_should_return_error_response.yml +60 -0
- data/test/vcr_cassettes/scopus/escaped_chars.yml +187 -0
- data/test/vcr_cassettes/scopus/fielded_search.yml +176 -0
- data/test/vcr_cassettes/scopus/simple_search.yml +227 -0
- data/test/vcr_cassettes/scopus/zero_results_search.yml +67 -0
- data/test/vcr_cassettes/summon/bad_auth.yml +54 -0
- data/test/vcr_cassettes/summon/proper_tags_for_snippets.yml +216 -0
- data/test/vcr_cassettes/summon/search.yml +242 -0
- data/test/vcr_cassettes/xerxes/live_search.yml +2580 -0
- data/test/view/std_item_test.rb +98 -0
- metadata +421 -0
|
@@ -0,0 +1,398 @@
|
|
|
1
|
+
require 'http_client_patch/include_client'
|
|
2
|
+
require 'httpclient'
|
|
3
|
+
require 'nokogiri'
|
|
4
|
+
require 'time'
|
|
5
|
+
require 'uri'
|
|
6
|
+
|
|
7
|
+
require 'summon'
|
|
8
|
+
require 'summon/transport/headers'
|
|
9
|
+
|
|
10
|
+
# Search engine for Serial Solutions Summon
|
|
11
|
+
#
|
|
12
|
+
# Docs:
|
|
13
|
+
# http://api.summon.serialssolutions.com/help/api/search
|
|
14
|
+
# http://api.summon.serialssolutions.com/help/api/search/fields
|
|
15
|
+
#
|
|
16
|
+
# An example user-facing Summon UI, useful for figuring out available
|
|
17
|
+
# facets and facet values, or trying out searches:
|
|
18
|
+
# http://ncsu.summon.serialssolutions.com/
|
|
19
|
+
|
|
20
|
+
#
|
|
21
|
+
# == Functionality notes
|
|
22
|
+
#
|
|
23
|
+
# * for pagination, underlying summon API only supports 'page', not 'start'
|
|
24
|
+
# style, if you pass in 'start' style it will be 'rounded' to containing 'page'.
|
|
25
|
+
#
|
|
26
|
+
# == Required config params
|
|
27
|
+
# [access_id] supplied by SerSol for your account
|
|
28
|
+
# [secret_key] supplied by SerSol for your account
|
|
29
|
+
#
|
|
30
|
+
# == Optional custom config params
|
|
31
|
+
#
|
|
32
|
+
# [fixed_params]
|
|
33
|
+
# Fixed SerSol query param literals to send with every search.
|
|
34
|
+
# Value is a HASH, of keys and either single values or arrays
|
|
35
|
+
# of values. For instance, to exclude Newspaper Articles and Books
|
|
36
|
+
# from all search results, in config:
|
|
37
|
+
# :fixed_params =>
|
|
38
|
+
# {"s.cmd" => ["addFacetValueFilters(ContentType,Web Resource:true,Reference:true,eBook:true)"]
|
|
39
|
+
# Note that values are NOT URI escaped in config, code will take care
|
|
40
|
+
# of that for you. You could also fix "s.role" to 'authenticated' using
|
|
41
|
+
# this mechanism, if you restrict all access to your app to authenticated
|
|
42
|
+
# affiliated users.
|
|
43
|
+
# Note: We wanted to use this for content type facet exclusions, as
|
|
44
|
+
# per above. We could NOT get Summon "s.fvf" param to work right, had
|
|
45
|
+
# to use the s.cmd=addFacetValueFilter version.
|
|
46
|
+
# [highlighting]
|
|
47
|
+
# Default true, ask SerSol for query-in-context highlighting in
|
|
48
|
+
# title and snippets field. If true you WILL get HTML with <b> tags
|
|
49
|
+
# in your titles.
|
|
50
|
+
# [snippets_as_abstract]
|
|
51
|
+
# Defaults true, if true and :highlighting is true, we'll put the
|
|
52
|
+
# query-in-context snippets in the 'abstract' field. Set :max_snippets
|
|
53
|
+
# for how many to possibly include (default 1). We may change this functionality
|
|
54
|
+
# later, this is a bit of hacky way to do it.
|
|
55
|
+
# [use_summon_openurl] default false. If true, will use OpenURL kev context
|
|
56
|
+
# object passed back by summon to generate openurls, instead of creating
|
|
57
|
+
# one ourself from individual data elements. summon openurl is decent,
|
|
58
|
+
# but currently includes highlighting tags in title elements. Also note
|
|
59
|
+
# it includes DC-type openurls, which we don't currently generate ourselves.
|
|
60
|
+
#
|
|
61
|
+
#
|
|
62
|
+
# == Custom search params
|
|
63
|
+
#
|
|
64
|
+
# Pass in `:auth => true` (or "true") to send headers to summon
|
|
65
|
+
# indicating an authorized user, for full search results.
|
|
66
|
+
#
|
|
67
|
+
#
|
|
68
|
+
# == Tech notes
|
|
69
|
+
# We did not choose to use the summon ruby gem in general, we wanted more control
|
|
70
|
+
# than it offered (ability to use HTTPClient persistent connections, MultiJson
|
|
71
|
+
# for json parsing, etc).
|
|
72
|
+
#
|
|
73
|
+
# However, we DO use that gem specifically for constructing authentication
|
|
74
|
+
# headers how summon wants it, see class at
|
|
75
|
+
# https://github.com/summon/summon.rb/blob/master/lib/summon/transport/headers.rb
|
|
76
|
+
#
|
|
77
|
+
class BentoSearch::SummonEngine
|
|
78
|
+
include BentoSearch::SearchEngine
|
|
79
|
+
|
|
80
|
+
extend HTTPClientPatch::IncludeClient
|
|
81
|
+
include_http_client
|
|
82
|
+
|
|
83
|
+
include ActionView::Helpers::OutputSafetyHelper # for safe_join
|
|
84
|
+
|
|
85
|
+
@@hl_start_token = "$$BENTO_HL_START$$"
|
|
86
|
+
@@hl_end_token = "$$BENTO_HL_END$$"
|
|
87
|
+
|
|
88
|
+
def search_implementation(args)
|
|
89
|
+
uri, headers = construct_request(args)
|
|
90
|
+
|
|
91
|
+
results = BentoSearch::Results.new
|
|
92
|
+
|
|
93
|
+
hash, response, exception = nil
|
|
94
|
+
begin
|
|
95
|
+
response = http_client.get(uri, nil, headers)
|
|
96
|
+
hash = MultiJson.load( response.body )
|
|
97
|
+
rescue TimeoutError, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, MultiJson::DecodeError, Nokogiri::SyntaxError => e
|
|
98
|
+
exception = e
|
|
99
|
+
end
|
|
100
|
+
# handle some errors
|
|
101
|
+
if (response.nil? || hash.nil? || exception ||
|
|
102
|
+
(! HTTP::Status.successful? response.status))
|
|
103
|
+
results.error ||= {}
|
|
104
|
+
results.error[:exception] = e
|
|
105
|
+
results.error[:status] = response.status if response
|
|
106
|
+
|
|
107
|
+
return results
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
results.total_items = hash["recordCount"]
|
|
111
|
+
|
|
112
|
+
hash["documents"].each do |doc_hash|
|
|
113
|
+
item = BentoSearch::ResultItem.new
|
|
114
|
+
|
|
115
|
+
item.title = handle_highlighting( first_if_present doc_hash["Title"] )
|
|
116
|
+
item.custom_data["raw_title"] = handle_highlighting( first_if_present(doc_hash["Title"]) , :strip => true)
|
|
117
|
+
|
|
118
|
+
item.subtitle = handle_highlighting( first_if_present doc_hash["Subtitle"] )# TODO is this right?
|
|
119
|
+
item.custom_data["raw_subtitle"] = handle_highlighting( first_if_present(doc_hash["Subtitle"]), :strip => true )
|
|
120
|
+
|
|
121
|
+
item.link = doc_hash["link"]
|
|
122
|
+
|
|
123
|
+
if configuration.use_summon_openurl
|
|
124
|
+
item.openurl_kev_co = doc_hash["openUrl"] # Summon conveniently gives us pre-made OpenURL
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
item.journal_title = first_if_present doc_hash["PublicationTitle"]
|
|
128
|
+
item.issn = first_if_present doc_hash["ISSN"]
|
|
129
|
+
item.isbn = first_if_present doc_hash["ISBN"]
|
|
130
|
+
item.doi = first_if_present doc_hash["DOI"]
|
|
131
|
+
|
|
132
|
+
item.start_page = first_if_present doc_hash["StartPage"]
|
|
133
|
+
item.end_page = first_if_present doc_hash["EndPage"]
|
|
134
|
+
|
|
135
|
+
if (pubdate = first_if_present doc_hash["PublicationDate_xml"])
|
|
136
|
+
item.year = pubdate["year"]
|
|
137
|
+
end
|
|
138
|
+
item.volume = first_if_present doc_hash["Volume"]
|
|
139
|
+
item.issue = first_if_present doc_hash["Issue"]
|
|
140
|
+
|
|
141
|
+
if (pub = first_if_present doc_hash["Publisher_xml"])
|
|
142
|
+
item.publisher = pub["name"]
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
(doc_hash["Author_xml"] || []).each do |auth_hash|
|
|
146
|
+
a = BentoSearch::Author.new
|
|
147
|
+
|
|
148
|
+
a.first = name_normalize auth_hash["givenname"]
|
|
149
|
+
a.last = name_normalize auth_hash["surname"]
|
|
150
|
+
a.middle = name_normalize auth_hash["middlename"]
|
|
151
|
+
|
|
152
|
+
a.display = name_normalize auth_hash["fullname"]
|
|
153
|
+
|
|
154
|
+
item.authors << a unless a.empty?
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
item.format = normalize_content_type( first_if_present doc_hash["ContentType"] )
|
|
158
|
+
if doc_hash["ContentType"]
|
|
159
|
+
item.format_str = doc_hash["ContentType"].join(", ")
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
if ( configuration.highlighting && configuration.snippets_as_abstract &&
|
|
163
|
+
doc_hash["Snippet"] && doc_hash["Snippet"].length > 0 )
|
|
164
|
+
|
|
165
|
+
item.abstract = handle_highlighting doc_hash["Snippet"].slice(0, configuration.max_snippets).join(" ")
|
|
166
|
+
else
|
|
167
|
+
item.abstract = first_if_present doc_hash["Abstract"]
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
item.extend( SummonOpenurlOverride )
|
|
171
|
+
|
|
172
|
+
results << item
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
return results
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def first_if_present(array)
|
|
180
|
+
array ? array.first : nil
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
# Normalize Summon Content-Type to our standardized
|
|
185
|
+
# list.
|
|
186
|
+
#
|
|
187
|
+
# This ends up losing useful distinctions Summon makes, however.
|
|
188
|
+
def normalize_content_type(summon_type)
|
|
189
|
+
case summon_type
|
|
190
|
+
when "Journal Article", "Book Review", "Trade Publication Article" then "Article"
|
|
191
|
+
when "Audio Recording", "Music Recording" then "AudioObject"
|
|
192
|
+
when "Book", "eBook" then "Book"
|
|
193
|
+
when "Conference Proceedings" then :conference_paper
|
|
194
|
+
when "Dissertation" then :dissertation
|
|
195
|
+
when "Journal", "Newsletter" then :serial
|
|
196
|
+
when "Photograph" then "Photograph"
|
|
197
|
+
when "Video Recording" then "VideoObject"
|
|
198
|
+
else nil
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
def name_normalize(str)
|
|
203
|
+
|
|
204
|
+
return nil if str.blank?
|
|
205
|
+
|
|
206
|
+
str = str.strip
|
|
207
|
+
|
|
208
|
+
return nil if str.blank? || str =~ /^[,:.]*$/
|
|
209
|
+
|
|
210
|
+
return str
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
# returns two element array: [uri, headers]
|
|
215
|
+
#
|
|
216
|
+
# uri, headers = construct_request(args)
|
|
217
|
+
def construct_request(args)
|
|
218
|
+
# Query params in a hash with array values, becuase easiest
|
|
219
|
+
# to generate auth headers that way. Value is array of values that
|
|
220
|
+
# are NOT URI-encoded yet.
|
|
221
|
+
query_params = Hash.new {|h, k| h[k] = [] }
|
|
222
|
+
|
|
223
|
+
# Add in fixed params from config, if any.
|
|
224
|
+
|
|
225
|
+
if configuration.fixed_params
|
|
226
|
+
configuration.fixed_params.each_pair do |key, value|
|
|
227
|
+
[value].flatten.each do |v|
|
|
228
|
+
query_params[key] << v
|
|
229
|
+
end
|
|
230
|
+
end
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
if args[:per_page]
|
|
234
|
+
query_params["s.ps"] = args[:per_page]
|
|
235
|
+
end
|
|
236
|
+
if args[:page]
|
|
237
|
+
query_params["s.pn"] = args[:page]
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
if args[:search_field]
|
|
241
|
+
query_params['s.q'] = "#{args[:search_field]}:(#{summon_escape(args[:query])})"
|
|
242
|
+
else
|
|
243
|
+
query_params['s.q'] = summon_escape( args[:query] )
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
if (args[:sort] &&
|
|
247
|
+
(defn = self.sort_definitions[args[:sort]]) &&
|
|
248
|
+
(literal = defn[:implementation]))
|
|
249
|
+
query_params['s.sort'] = literal
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
if args[:auth] == true
|
|
253
|
+
query_params['s.role'] = "authenticated"
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
if configuration.highlighting
|
|
257
|
+
query_params['s.hs'] = @@hl_start_token
|
|
258
|
+
query_params['s.he'] = @@hl_end_token
|
|
259
|
+
else
|
|
260
|
+
query_params['s.hl'] = "false"
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
headers = Summon::Transport::Headers.new(
|
|
265
|
+
:access_id => configuration.access_id,
|
|
266
|
+
:secret_key => configuration.secret_key,
|
|
267
|
+
:accept => "json",
|
|
268
|
+
:params => query_params,
|
|
269
|
+
:url => configuration.base_url
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
query_string = query_params.keys.collect do |key|
|
|
274
|
+
[query_params[key]].flatten.collect do |value|
|
|
275
|
+
"#{CGI.escape(key.to_s)}=#{CGI.escape(value.to_s)}"
|
|
276
|
+
end
|
|
277
|
+
end.flatten.join("&")
|
|
278
|
+
|
|
279
|
+
uri = "#{configuration.base_url}?#{query_string}"
|
|
280
|
+
|
|
281
|
+
return [uri, headers]
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
# Escapes special chars for Summon. Not entirely clear what
|
|
286
|
+
# we have to escape where (or double escape sometimes?), but
|
|
287
|
+
# we're just going to do a straight backslash escape of special
|
|
288
|
+
# chars.
|
|
289
|
+
#
|
|
290
|
+
# Does NOT do URI-escaping, that's a different step.
|
|
291
|
+
def summon_escape(string)
|
|
292
|
+
# replace with backslash followed by original matched thing,
|
|
293
|
+
# need to double backslash for ruby string literal makes
|
|
294
|
+
# this ridiculously confusing, sorry. Block form of gsub
|
|
295
|
+
# is the only thing that keeps it from being impossible.
|
|
296
|
+
#
|
|
297
|
+
# Do NOT escape double quotes, let people use them for
|
|
298
|
+
# phrases!
|
|
299
|
+
string.gsub(/([+\-&|!\(\){}\[\]^~*?\\:])/) do |match|
|
|
300
|
+
"\\#{$1}"
|
|
301
|
+
end
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
# If summon has put snippet highlighting tokens
|
|
305
|
+
# in a field, we need to HTML escape the literal values,
|
|
306
|
+
# while still using the highlighting tokens to put
|
|
307
|
+
# HTML tags around highlighted terms.
|
|
308
|
+
def handle_highlighting( str, options = {} )
|
|
309
|
+
return str if str.blank? || ! configuration.highlighting
|
|
310
|
+
|
|
311
|
+
if options[:strip]
|
|
312
|
+
# Just strip em, don't need to replace em with HTML
|
|
313
|
+
str = str.gsub(Regexp.new(Regexp.escape @@hl_start_token), '')
|
|
314
|
+
str = str.gsub(Regexp.new(Regexp.escape @@hl_end_token), '')
|
|
315
|
+
return str
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
parts =
|
|
319
|
+
str.
|
|
320
|
+
split( %r{(#{Regexp.escape @@hl_start_token}|#{Regexp.escape @@hl_end_token})} ).
|
|
321
|
+
collect do |substr|
|
|
322
|
+
case substr
|
|
323
|
+
when @@hl_start_token then '<b class="bento_search_highlight">'.html_safe
|
|
324
|
+
when @@hl_end_token then '</b>'.html_safe
|
|
325
|
+
else substr
|
|
326
|
+
end
|
|
327
|
+
end
|
|
328
|
+
|
|
329
|
+
return safe_join(parts, '')
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
def self.required_configuration
|
|
333
|
+
[:access_id, :secret_key]
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
def self.default_configuration
|
|
337
|
+
{
|
|
338
|
+
:base_url => "http://api.summon.serialssolutions.com/2.0.0/search",
|
|
339
|
+
:highlighting => true,
|
|
340
|
+
:snippets_as_abstract => true,
|
|
341
|
+
:max_snippets => 1,
|
|
342
|
+
:use_summon_openurl => false
|
|
343
|
+
}
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
def max_per_page
|
|
347
|
+
200
|
|
348
|
+
end
|
|
349
|
+
|
|
350
|
+
# Summon actually only supports relevancy sort, and pub year asc or desc.
|
|
351
|
+
# we just expose relevance and pub year desc here.
|
|
352
|
+
def sort_definitions
|
|
353
|
+
# implementation includes literal sersol value, but not yet
|
|
354
|
+
# uri escaped, that'll happen at a later code point.
|
|
355
|
+
{
|
|
356
|
+
"relevance" => {:implementation => nil}, # default
|
|
357
|
+
"date_desc" => {:implementation => "PublicationDate:desc"}
|
|
358
|
+
|
|
359
|
+
}
|
|
360
|
+
end
|
|
361
|
+
|
|
362
|
+
# Summon offers many more search fields than this. This is a subset
|
|
363
|
+
# listed here. See http://api.summon.serialssolutions.com/help/api/search/fields
|
|
364
|
+
# although those docs may not be up to date.
|
|
365
|
+
#
|
|
366
|
+
# The AuthorCombined, TitleCombined, and SubjectCombined indexes
|
|
367
|
+
# aren't even listed in the docs, but they are real. I think.
|
|
368
|
+
def search_field_definitions
|
|
369
|
+
{
|
|
370
|
+
"AuthorCombined" => {:semantic => :author},
|
|
371
|
+
"TitleCombined" => {:semantic => :title},
|
|
372
|
+
# SubjectTerms does not include TemporalSubjectTerms
|
|
373
|
+
# or Keywords, sorry.
|
|
374
|
+
"SubjectTerms" => {:semantic => :subject},
|
|
375
|
+
# ISBN and ISSN do not include seperate EISSN and EISBN
|
|
376
|
+
# fields, sorry.
|
|
377
|
+
"ISBN" => {:semantic => :isbn},
|
|
378
|
+
"ISSN" => {:semantic => :issn},
|
|
379
|
+
"OCLC" => {:semantic => :oclcnum},
|
|
380
|
+
"PublicationSeriesTitle" => {}
|
|
381
|
+
}
|
|
382
|
+
end
|
|
383
|
+
|
|
384
|
+
# Module that we extend our ResultItems with, to over-ride
|
|
385
|
+
# to_openurl to use a dup of ourselves with title/subtitle
|
|
386
|
+
# set to raw ones without highlighting markup.
|
|
387
|
+
module SummonOpenurlOverride
|
|
388
|
+
def to_openurl
|
|
389
|
+
dup = self.dup
|
|
390
|
+
dup.title = self.custom_data["raw_title"]
|
|
391
|
+
dup.subtitle = self.custom_data["raw_subtitle"]
|
|
392
|
+
|
|
393
|
+
dup.to_openurl
|
|
394
|
+
end
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
end
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
require 'uri'
|
|
2
|
+
require 'nokogiri'
|
|
3
|
+
require 'openurl'
|
|
4
|
+
|
|
5
|
+
require 'httpclient'
|
|
6
|
+
require 'http_client_patch/include_client'
|
|
7
|
+
|
|
8
|
+
module BentoSearch
|
|
9
|
+
# a **very limited and hacky** bento search engine for the Xerxes Metalib
|
|
10
|
+
# front-end. Probably not suitable for real production use, just a demo,
|
|
11
|
+
# and used for testing. Does not support pagination, or fielded searching.
|
|
12
|
+
# will do a new Metalib search every time you call it, which will be slow.
|
|
13
|
+
#
|
|
14
|
+
# Machine running this code needs to have IP-address authorization
|
|
15
|
+
# to search xerxes.
|
|
16
|
+
#
|
|
17
|
+
# jrochkind is using it for his article search provider comparison testing
|
|
18
|
+
# instrument.
|
|
19
|
+
|
|
20
|
+
class XerxesEngine
|
|
21
|
+
include BentoSearch::SearchEngine
|
|
22
|
+
|
|
23
|
+
extend HTTPClientPatch::IncludeClient
|
|
24
|
+
include_http_client
|
|
25
|
+
|
|
26
|
+
# also optional configuration
|
|
27
|
+
# [xerxes_context]
|
|
28
|
+
# will send as 'context' query param to xerxes, for analytics
|
|
29
|
+
def self.required_configuration
|
|
30
|
+
["base_url", "databases"]
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def search_implementation(arguments)
|
|
34
|
+
|
|
35
|
+
# We're gonna have to do a search 'screen scrape' style, then refresh it
|
|
36
|
+
# until it's ready, and then request format=xerxes when it's ready
|
|
37
|
+
# to get XML. A bit hacky.
|
|
38
|
+
|
|
39
|
+
request_url = xerxes_search_url(arguments)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
response = http_client.head request_url
|
|
43
|
+
|
|
44
|
+
# It's supposed to be a redirect
|
|
45
|
+
unless HTTP::Status.redirect?(response.status) && response.headers["Location"]
|
|
46
|
+
r = Results.new
|
|
47
|
+
r.error ||= {}
|
|
48
|
+
r.error["status"] = response.status
|
|
49
|
+
r.error["message"] = "Xerxes did not return expected 302 redirect"
|
|
50
|
+
|
|
51
|
+
return r
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Okay, now fetch the redirect, have to change it to an absolute
|
|
55
|
+
# URI cause Xerxes semi-illegally returns a relative one.
|
|
56
|
+
refreshes = 0
|
|
57
|
+
results_url = nil
|
|
58
|
+
status_url = (URI.parse(request_url) + response.headers["Location"]).to_s
|
|
59
|
+
while ( refreshes < 5 )
|
|
60
|
+
# cause of VCR, can't request the exact same URL twice
|
|
61
|
+
# with different results. Add `try` on the end.
|
|
62
|
+
response = http_client.get( status_url + "&try=#{refreshes}")
|
|
63
|
+
|
|
64
|
+
# Okay, have to follow the meta-refresh
|
|
65
|
+
html = Nokogiri::HTML( response.body )
|
|
66
|
+
|
|
67
|
+
if HTTP::Status.redirect? response.status
|
|
68
|
+
# Okay, redirect means we're done with status and
|
|
69
|
+
# we've got actual results url
|
|
70
|
+
results_url = URI.parse(request_url) + response.headers["Location"]
|
|
71
|
+
break
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
if ( refresh = html.css("meta[http-equiv='refresh']") )
|
|
75
|
+
wait = configuration.lookup!("refresh_wait", (refresh.attribute("content").value.to_i if refresh.attribute("content")))
|
|
76
|
+
# wait how long Xerxes asked before refreshing.
|
|
77
|
+
refreshes += 1
|
|
78
|
+
sleep wait
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
results = Results.new
|
|
83
|
+
|
|
84
|
+
# any errors?
|
|
85
|
+
if results_url.nil? && refreshes >= 5
|
|
86
|
+
results.error ||= {}
|
|
87
|
+
results.error["message"] = "#{refreshes} refreshes exceeded maximum"
|
|
88
|
+
return results
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Okay, fetch it as format xerxes
|
|
92
|
+
|
|
93
|
+
xml = Nokogiri::XML( http_client.get(results_url.to_s + "&format=xerxes").body )
|
|
94
|
+
|
|
95
|
+
results = Results.new
|
|
96
|
+
|
|
97
|
+
xml.xpath("//results/records/record").each do |record|
|
|
98
|
+
item = ResultItem.new
|
|
99
|
+
results << item
|
|
100
|
+
|
|
101
|
+
item.title = node_text record.at_xpath("xerxes_record/title")
|
|
102
|
+
|
|
103
|
+
xerxes_fmt_str = node_text(record.at_xpath("xerxes_record/format")).downcase
|
|
104
|
+
|
|
105
|
+
item.format = if xerxes_fmt_str.include?("article")
|
|
106
|
+
"Article"
|
|
107
|
+
elsif xerxes_fmt_str.include?("Book")
|
|
108
|
+
"Book"
|
|
109
|
+
else
|
|
110
|
+
nil
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
item.link = node_text record.at_xpath("xerxes_record/links/link[@type='original_record']/url")
|
|
114
|
+
|
|
115
|
+
item.year = node_text record.at_xpath("xerxes_record/year")
|
|
116
|
+
item.volume = node_text record.at_xpath("xerxes_record/volume")
|
|
117
|
+
item.issue = node_text record.at_xpath("xerxes_record/issue")
|
|
118
|
+
item.start_page = node_text record.at_xpath("xerxes_record/start_page")
|
|
119
|
+
item.end_page = node_text record.at_xpath("xerxes_record/end_page")
|
|
120
|
+
|
|
121
|
+
item.abstract = node_text(record.at_xpath("xerxes_record/abstract") || record.at_xpath("xerxes_record/summary"))
|
|
122
|
+
|
|
123
|
+
item.openurl_kev_co = node_text record.at_xpath("openurl_kev_co")
|
|
124
|
+
|
|
125
|
+
# have to get journal title out of openurl, sorry
|
|
126
|
+
if item.openurl_kev_co
|
|
127
|
+
openurl = OpenURL::ContextObject.new_from_kev( item.openurl_kev_co )
|
|
128
|
+
if openurl && openurl.referent && openurl.referent.format == "journal"
|
|
129
|
+
item.journal_title = openurl.referent.jtitle
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
item.issn = node_text record.at_xpath("xerxes_record/standard_numbers/issn")
|
|
133
|
+
|
|
134
|
+
# authors
|
|
135
|
+
record.xpath("xerxes_record/authors/author").each do |author|
|
|
136
|
+
next unless author.at_xpath("aulast") # don't even have a lastname, we can do nothing
|
|
137
|
+
|
|
138
|
+
item.authors << Author.new(:first => node_text(author.at_xpath("aufirst")),
|
|
139
|
+
:middle => node_text(author.at_xpath("auinit")),
|
|
140
|
+
:last => node_text(author.at_xpath("aulast"))
|
|
141
|
+
)
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
end
|
|
146
|
+
return results
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
protected
|
|
150
|
+
|
|
151
|
+
def xerxes_search_url(args)
|
|
152
|
+
configuration.base_url.chomp("/") + "/?base=metasearch&action=search" +
|
|
153
|
+
"&context=#{configuration.lookup!('xerxes_context', 'bento_search')}" +
|
|
154
|
+
"&field=WRD" +
|
|
155
|
+
"&query=#{CGI.escape(args[:query])}" +
|
|
156
|
+
configuration.databases.collect {|d| "&database=#{d}"}.join("&")
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# returns nil if passed in nil, otherwise
|
|
160
|
+
# returns nokogiri text()
|
|
161
|
+
def node_text(node)
|
|
162
|
+
return nil if node.nil?
|
|
163
|
+
|
|
164
|
+
return node.text()
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
end
|
|
168
|
+
end
|