bento_search 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/MIT-LICENSE +20 -0
- data/README.md +299 -0
- data/Rakefile +40 -0
- data/app/assets/images/bento_search/large_loader.gif +0 -0
- data/app/assets/javascripts/bento_search.js +3 -0
- data/app/assets/javascripts/bento_search/ajax_load.js +22 -0
- data/app/assets/stylesheets/bento_search/bento.css +4 -0
- data/app/controllers/bento_search/bento_search_controller.rb +7 -0
- data/app/controllers/bento_search/search_controller.rb +72 -0
- data/app/helpers/bento_search_helper.rb +138 -0
- data/app/item_decorators/bento_search/only_premade_openurl.rb +16 -0
- data/app/item_decorators/bento_search/openurl_add_other_link.rb +35 -0
- data/app/item_decorators/bento_search/openurl_main_link.rb +30 -0
- data/app/models/bento_search/author.rb +25 -0
- data/app/models/bento_search/link.rb +30 -0
- data/app/models/bento_search/multi_searcher.rb +109 -0
- data/app/models/bento_search/openurl_creator.rb +128 -0
- data/app/models/bento_search/registrar.rb +70 -0
- data/app/models/bento_search/result_item.rb +203 -0
- data/app/models/bento_search/results.rb +54 -0
- data/app/models/bento_search/results/pagination.rb +67 -0
- data/app/models/bento_search/search_engine.rb +219 -0
- data/app/models/bento_search/search_engine/capabilities.rb +65 -0
- data/app/search_engines/bento_search/#Untitled-1# +11 -0
- data/app/search_engines/bento_search/ebsco_host_engine.rb +356 -0
- data/app/search_engines/bento_search/eds_engine.rb +557 -0
- data/app/search_engines/bento_search/google_books_engine.rb +184 -0
- data/app/search_engines/bento_search/primo_engine.rb +231 -0
- data/app/search_engines/bento_search/scopus_engine.rb +295 -0
- data/app/search_engines/bento_search/summon_engine.rb +398 -0
- data/app/search_engines/bento_search/xerxes_engine.rb +168 -0
- data/app/views/bento_search/_link.html.erb +4 -0
- data/app/views/bento_search/_search_error.html.erb +22 -0
- data/app/views/bento_search/_std_item.html.erb +39 -0
- data/app/views/bento_search/search/search.html.erb +1 -0
- data/config/locales/en.yml +25 -0
- data/lib/bento_search.rb +29 -0
- data/lib/bento_search/engine.rb +5 -0
- data/lib/bento_search/routes.rb +45 -0
- data/lib/bento_search/version.rb +3 -0
- data/lib/generators/bento_search/pull_ebsco_dbs_generator.rb +24 -0
- data/lib/generators/bento_search/templates/ebsco_global_var.erb +6 -0
- data/lib/http_client_patch/include_client.rb +86 -0
- data/lib/tasks/bento_search_tasks.rake +4 -0
- data/test/dummy/README.rdoc +261 -0
- data/test/dummy/Rakefile +7 -0
- data/test/dummy/app/assets/javascripts/application.js +15 -0
- data/test/dummy/app/assets/stylesheets/application.css +13 -0
- data/test/dummy/app/controllers/application_controller.rb +3 -0
- data/test/dummy/app/helpers/application_helper.rb +2 -0
- data/test/dummy/app/views/layouts/application.html.erb +14 -0
- data/test/dummy/config.ru +4 -0
- data/test/dummy/config/application.rb +56 -0
- data/test/dummy/config/boot.rb +10 -0
- data/test/dummy/config/database.yml +25 -0
- data/test/dummy/config/environment.rb +5 -0
- data/test/dummy/config/environments/development.rb +37 -0
- data/test/dummy/config/environments/production.rb +67 -0
- data/test/dummy/config/environments/test.rb +37 -0
- data/test/dummy/config/initializers/backtrace_silencers.rb +7 -0
- data/test/dummy/config/initializers/inflections.rb +15 -0
- data/test/dummy/config/initializers/mime_types.rb +5 -0
- data/test/dummy/config/initializers/secret_token.rb +7 -0
- data/test/dummy/config/initializers/session_store.rb +8 -0
- data/test/dummy/config/initializers/wrap_parameters.rb +14 -0
- data/test/dummy/config/locales/en.yml +5 -0
- data/test/dummy/config/routes.rb +6 -0
- data/test/dummy/db/test.sqlite3 +0 -0
- data/test/dummy/log/test.log +3100 -0
- data/test/dummy/public/404.html +26 -0
- data/test/dummy/public/422.html +26 -0
- data/test/dummy/public/500.html +25 -0
- data/test/dummy/public/favicon.ico +0 -0
- data/test/dummy/script/rails +6 -0
- data/test/functional/bento_search/search_controller_test.rb +81 -0
- data/test/helper/bento_search_helper_test.rb +125 -0
- data/test/integration/navigation_test.rb +10 -0
- data/test/support/mock_engine.rb +23 -0
- data/test/support/test_with_cassette.rb +38 -0
- data/test/test_helper.rb +52 -0
- data/test/unit/#vcr_test.rb# +68 -0
- data/test/unit/ebsco_host_engine_test.rb +134 -0
- data/test/unit/eds_engine_test.rb +105 -0
- data/test/unit/google_books_engine_test.rb +93 -0
- data/test/unit/item_decorators_test.rb +66 -0
- data/test/unit/multi_searcher_test.rb +49 -0
- data/test/unit/openurl_creator_test.rb +111 -0
- data/test/unit/pagination_test.rb +59 -0
- data/test/unit/primo_engine_test.rb +37 -0
- data/test/unit/register_engine_test.rb +50 -0
- data/test/unit/result_item_display_test.rb +39 -0
- data/test/unit/result_item_test.rb +36 -0
- data/test/unit/scopus_engine_test.rb +130 -0
- data/test/unit/search_engine_base_test.rb +178 -0
- data/test/unit/search_engine_test.rb +95 -0
- data/test/unit/summon_engine_test.rb +161 -0
- data/test/unit/xerxes_engine_test.rb +70 -0
- data/test/vcr_cassettes/ebscohost/error_bad_db.yml +45 -0
- data/test/vcr_cassettes/ebscohost/error_bad_password.yml +45 -0
- data/test/vcr_cassettes/ebscohost/get_info.yml +3626 -0
- data/test/vcr_cassettes/ebscohost/live_search.yml +45 -0
- data/test/vcr_cassettes/ebscohost/live_search_smoke_test.yml +1311 -0
- data/test/vcr_cassettes/eds/basic_search_smoke_test.yml +1811 -0
- data/test/vcr_cassettes/eds/get_auth_token.yml +75 -0
- data/test/vcr_cassettes/eds/get_auth_token_failure.yml +39 -0
- data/test/vcr_cassettes/eds/get_with_auth.yml +243 -0
- data/test/vcr_cassettes/eds/get_with_auth_recovers_from_bad_auth.yml +368 -0
- data/test/vcr_cassettes/gbs/error_condition.yml +40 -0
- data/test/vcr_cassettes/gbs/pagination.yml +702 -0
- data/test/vcr_cassettes/gbs/search.yml +340 -0
- data/test/vcr_cassettes/primo/search_smoke_test.yml +1112 -0
- data/test/vcr_cassettes/scopus/bad_api_key_should_return_error_response.yml +60 -0
- data/test/vcr_cassettes/scopus/escaped_chars.yml +187 -0
- data/test/vcr_cassettes/scopus/fielded_search.yml +176 -0
- data/test/vcr_cassettes/scopus/simple_search.yml +227 -0
- data/test/vcr_cassettes/scopus/zero_results_search.yml +67 -0
- data/test/vcr_cassettes/summon/bad_auth.yml +54 -0
- data/test/vcr_cassettes/summon/proper_tags_for_snippets.yml +216 -0
- data/test/vcr_cassettes/summon/search.yml +242 -0
- data/test/vcr_cassettes/xerxes/live_search.yml +2580 -0
- data/test/view/std_item_test.rb +98 -0
- metadata +421 -0
@@ -0,0 +1,65 @@
|
|
1
|
+
|
2
|
+
# Methods that describe a search engine's capabilities,
|
3
|
+
# mixed into SearchEngine. Individual engine implementations
|
4
|
+
# will often over-ride some or all of these methods.
|
5
|
+
module BentoSearch::SearchEngine::Capabilities
|
6
|
+
# If support fielded search, over-ride to specify fields
|
7
|
+
# supported. Returns a hash, key is engine-specific internal
|
8
|
+
# search field, value is nil or a hash of metadata about
|
9
|
+
# the search field, including semantic mapping.
|
10
|
+
#
|
11
|
+
# def search_field_definitions
|
12
|
+
# { "intitle" => {:semantic => :title}}
|
13
|
+
# end
|
14
|
+
def search_field_definitions
|
15
|
+
{}
|
16
|
+
end
|
17
|
+
|
18
|
+
# Over-ride with a HASH of available sorts. Each key is the string
|
19
|
+
# that will be passed in engine.search(...., :sort => key)
|
20
|
+
# The key combines a choice of sort field, ascending/descending,
|
21
|
+
# secondary sorts etc -- we combine this all with one key, because
|
22
|
+
# typical examined interfaces did same from a select menu.
|
23
|
+
#
|
24
|
+
# Keys should where possible be _standard_ keys chosen from
|
25
|
+
# those listed in config/i18n/en:bento_search.sort_keys.*
|
26
|
+
# But if you need something not there, it can be custom to engine.
|
27
|
+
# Value of hash is for internal use by engine, it may be a convenient
|
28
|
+
# place to store implementation details.
|
29
|
+
#
|
30
|
+
# For a particular engine, a sort not mentioned here will-- raise?
|
31
|
+
# be ignored? Not sure.
|
32
|
+
def sort_definitions
|
33
|
+
{}
|
34
|
+
end
|
35
|
+
|
36
|
+
# Override to return int max per-page.
|
37
|
+
def max_per_page
|
38
|
+
nil
|
39
|
+
end
|
40
|
+
|
41
|
+
# Returns list of string internal search_field's that can
|
42
|
+
# be supplied to search(:search_field => x)
|
43
|
+
def search_keys
|
44
|
+
return search_field_definitions.keys
|
45
|
+
end
|
46
|
+
|
47
|
+
# Returns list of symbol semantic_search_field that can be
|
48
|
+
# supplied to search(:semantic_search_field => x)
|
49
|
+
def semantic_search_keys
|
50
|
+
semantic_search_map.keys
|
51
|
+
end
|
52
|
+
|
53
|
+
# returns a hash keyed by semantic search field symbol,
|
54
|
+
# value string internal search field key.
|
55
|
+
def semantic_search_map
|
56
|
+
# Hash[] conveniently takes an array of k-v pairs.
|
57
|
+
return Hash[
|
58
|
+
search_field_definitions.collect do |field, defn|
|
59
|
+
[ defn[:semantic].to_s, field ] if defn && defn[:semantic]
|
60
|
+
end.compact
|
61
|
+
]
|
62
|
+
end
|
63
|
+
|
64
|
+
|
65
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
http://blacklight.mse.jhu.edu:3001/resolve?url_ver=Z39.88-2004
|
2
|
+
&url_ctx_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Actx
|
3
|
+
&ctx_ver=Z39.88-2004
|
4
|
+
&ctx_tim=2012-07-25T16%3A21%3A11-04%3A00
|
5
|
+
&ctx_id=
|
6
|
+
&ctx_enc=info%3Aofi%2Fenc%3AUTF-8
|
7
|
+
&rft.title=Monkey+Brains
|
8
|
+
&rft.creator=Will.i.am
|
9
|
+
&rft.pub=Absolute+Pitch%2C12+Dec+2007
|
10
|
+
&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Adc
|
11
|
+
&rfr_id=info%3Asid%2Fsummon.serialssolutions.com
|
@@ -0,0 +1,356 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
require 'http_client_patch/include_client'
|
4
|
+
require 'httpclient'
|
5
|
+
|
6
|
+
# Right now for EbscoHost API (Ebsco Integration Toolkit/EIT),
|
7
|
+
# may be expanded or refactored for EDS too.
|
8
|
+
#
|
9
|
+
# == Required Configuration
|
10
|
+
#
|
11
|
+
# * profile_id
|
12
|
+
# * profile_password
|
13
|
+
# * databases: ARRAY of ebsco shortcodes of what databases to include in search. If you specify one you don't have access to, you get an error message from ebsco, alas.
|
14
|
+
#
|
15
|
+
# == Note on including databases
|
16
|
+
#
|
17
|
+
# Need to specifically configure all databases your institution licenses from
|
18
|
+
# EBSCO that you want included in the search. You can't just say "all of them"
|
19
|
+
# the api doesn't support that, and also more than 30 or 40 starts getting
|
20
|
+
# horribly slow. If you include a db you do not have access to, EBSCO api
|
21
|
+
# fatal errors.
|
22
|
+
#
|
23
|
+
# You may want to make sure all your licensed databases are included
|
24
|
+
# in your EIT profile. Log onto ebscoadmin, Customize Services, choose
|
25
|
+
# EIT profile, choose 'databases' tag.
|
26
|
+
#
|
27
|
+
# === Download databases from EBSCO api
|
28
|
+
#
|
29
|
+
# We include a utility to download ALL activated databases for EIT profile
|
30
|
+
# and generate a file putting them in a ruby array. You may want to use this
|
31
|
+
# file as a starting point, and edit by hand:
|
32
|
+
#
|
33
|
+
# First configure your EBSCO search engine with bento_search, say under
|
34
|
+
# key 'ebscohost'.
|
35
|
+
#
|
36
|
+
# Then run:
|
37
|
+
# rails generate bento_search:pull_ebsco_dbs ebscohost
|
38
|
+
#
|
39
|
+
# assuming 'ebscohost' is the key you registered the EBSCO search engine.
|
40
|
+
#
|
41
|
+
# This will create a file at ./config/ebsco_dbs.rb. You may want to hand
|
42
|
+
# edit it. Then, in your bento search config, you can:
|
43
|
+
#
|
44
|
+
# require "#{Rails.root}/config/ebsco_dbs.rb"
|
45
|
+
# BentoSearch.register_engine("ebscohost") do |conf|
|
46
|
+
# # ....
|
47
|
+
# conf.databases = $ebsco_dbs
|
48
|
+
# end
|
49
|
+
#
|
50
|
+
# == Vendor documentation
|
51
|
+
#
|
52
|
+
# Vendor documentation is a bit scattered, main page:
|
53
|
+
# * http://support.ebsco.com/eit/ws.php
|
54
|
+
# Some other useful pages we discovered:
|
55
|
+
# * http://support.ebsco.com/eit/ws_faq.php
|
56
|
+
# * search syntax examples: http://support.ebsco.com/eit/ws_howto_queries.php
|
57
|
+
# * Try construct a query: http://eit.ebscohost.com/Pages/MethodDescription.aspx?service=/Services/SearchService.asmx&method=Search
|
58
|
+
# * The 'info' service can be used to see what databases you have access to.
|
59
|
+
# * DTD of XML Response, hard to interpret but all we've got: http://support.ebsco.com/eit/docs/DTD_EIT_WS_searchResponse.zip
|
60
|
+
#
|
61
|
+
#
|
62
|
+
#
|
63
|
+
#
|
64
|
+
# TODO: David Walker tells us we need to configure in EBSCO to make default operator be 'and' instead of phrase search!
|
65
|
+
# We Do need to do that to get reasonable results.
|
66
|
+
class BentoSearch::EbscoHostEngine
|
67
|
+
include BentoSearch::SearchEngine
|
68
|
+
|
69
|
+
extend HTTPClientPatch::IncludeClient
|
70
|
+
include_http_client
|
71
|
+
|
72
|
+
# Include some rails helpers, text_helper.trucate
|
73
|
+
def text_helper
|
74
|
+
@@truncate ||= begin
|
75
|
+
o = Object.new
|
76
|
+
o.extend ActionView::Helpers::TextHelper
|
77
|
+
o
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def search_implementation(args)
|
82
|
+
url = query_url(args)
|
83
|
+
|
84
|
+
results = BentoSearch::Results.new
|
85
|
+
xml, response, exception = nil, nil, nil
|
86
|
+
|
87
|
+
begin
|
88
|
+
response = http_client.get(url)
|
89
|
+
xml = Nokogiri::XML(response.body)
|
90
|
+
rescue TimeoutError, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError => e
|
91
|
+
exception = e
|
92
|
+
end
|
93
|
+
# error handle
|
94
|
+
if ( response.nil? ||
|
95
|
+
xml.nil? ||
|
96
|
+
exception ||
|
97
|
+
(! HTTP::Status.successful? response.status) ||
|
98
|
+
(fault = xml.at_xpath("./Fault")))
|
99
|
+
|
100
|
+
results.error ||= {}
|
101
|
+
results.error[:exception] = exception if exception
|
102
|
+
results.error[:status] = response.status if response
|
103
|
+
|
104
|
+
if fault
|
105
|
+
results.error[:error_info] = text_if_present fault.at_xpath("./Message")
|
106
|
+
end
|
107
|
+
|
108
|
+
return results
|
109
|
+
end
|
110
|
+
|
111
|
+
|
112
|
+
|
113
|
+
# the namespaces they provide are weird and don't help and sometimes
|
114
|
+
# not clearly even legal. Remove em!
|
115
|
+
xml.remove_namespaces!
|
116
|
+
|
117
|
+
results.total_items = xml.at_xpath("./searchResponse/Hits").text.to_i
|
118
|
+
|
119
|
+
xml.xpath("./searchResponse/SearchResults/records/rec").each do |xml_rec|
|
120
|
+
results << item_from_xml( xml_rec )
|
121
|
+
end
|
122
|
+
|
123
|
+
return results
|
124
|
+
|
125
|
+
end
|
126
|
+
|
127
|
+
|
128
|
+
# Pass in a nokogiri node, return node.text, or nil if
|
129
|
+
# arg was nil or node.text was blank?
|
130
|
+
def text_if_present(node)
|
131
|
+
if node.nil? || node.text.blank?
|
132
|
+
nil
|
133
|
+
else
|
134
|
+
node.text
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
# Figure out proper controlled format for an ebsco item.
|
139
|
+
# EBSCOHost (not sure about EDS) publication/document type
|
140
|
+
# are totally unusable non-normalized vocabulary for controlled
|
141
|
+
# types, we'll try to guess from other metadata features.
|
142
|
+
def sniff_format(xml_node)
|
143
|
+
return nil if xml_node.nil?
|
144
|
+
|
145
|
+
if xml_node.at_xpath("./bkinfo/*")
|
146
|
+
"Book"
|
147
|
+
elsif xml_node.at_xpath("./dissinfo/*")
|
148
|
+
:dissertation
|
149
|
+
elsif xml_node.at_xpath("./jinfo/*") && xml_node.at_xpath("./artinfo/*")
|
150
|
+
"Article"
|
151
|
+
elsif xml_node.at_xpath("./jinfo/*")
|
152
|
+
:serial
|
153
|
+
else
|
154
|
+
nil
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
# Figure out uncontrolled literal string format to show to users.
|
159
|
+
# We're going to try combining Ebsco Publication Type and Document Type,
|
160
|
+
# when both are present. Then a few hard-coded special transformations.
|
161
|
+
def sniff_format_str(xml_node)
|
162
|
+
pubtype = text_if_present( xml_node.at_xpath("./artinfo/pubtype") )
|
163
|
+
doctype = text_if_present( xml_node.at_xpath("./artinfo/doctype") )
|
164
|
+
|
165
|
+
components = []
|
166
|
+
components.push pubtype
|
167
|
+
components.push doctype unless doctype == pubtype
|
168
|
+
|
169
|
+
components.compact!
|
170
|
+
|
171
|
+
components = components.collect {|a| a.titlecase if a}
|
172
|
+
components.uniq! # no need to have the same thing twice
|
173
|
+
|
174
|
+
# some hard-coded cases for better user-displayable string
|
175
|
+
if components.first == "Academic Journal" && components.last == "Article"
|
176
|
+
return "Journal Article"
|
177
|
+
elsif components.first == "Periodical" && components.length > 1
|
178
|
+
return components.last
|
179
|
+
end
|
180
|
+
|
181
|
+
|
182
|
+
|
183
|
+
return components.join(": ")
|
184
|
+
end
|
185
|
+
|
186
|
+
# pass in <rec> nokogiri, will determine best link
|
187
|
+
def get_link(xml)
|
188
|
+
text_if_present(xml.at_xpath("./pdfLink")) || text_if_present(xml.at_xpath("./plink") )
|
189
|
+
end
|
190
|
+
|
191
|
+
|
192
|
+
# it's unclear if ebsco API actually allows escaping of special chars,
|
193
|
+
# or what the special chars are. But we know parens are special, can't
|
194
|
+
# escape em, we'll just remove em (should not effect search).
|
195
|
+
def ebsco_query_escape(txt)
|
196
|
+
txt.gsub(/[)(]/, ' ')
|
197
|
+
end
|
198
|
+
|
199
|
+
# Actually turn the user's query into an EBSCO "AND" boolean query,
|
200
|
+
# seems only way to get decent results where terms can match cross-fields
|
201
|
+
# at the moment, for EIT. We'll see for EDS.
|
202
|
+
def ebsco_query_prepare(txt)
|
203
|
+
# use string split with regex cleverly to split into space
|
204
|
+
# seperated terms and phrases, keeping phrases as unit.
|
205
|
+
terms = txt.split %r{[[:space:]]+|("[^"]+")}
|
206
|
+
|
207
|
+
# Remove parens in non-phrase-quoted terms
|
208
|
+
terms = terms.collect do |t|
|
209
|
+
(t =~ /^\".*\"$/) ? t : ebsco_query_escape(t)
|
210
|
+
end
|
211
|
+
|
212
|
+
# Remove boolean operators if they are bare not in a phrase, they'll
|
213
|
+
# make things weird. In phrase quotes they are okay.
|
214
|
+
# Remove empty strings. Remove terms that are solely punctuation
|
215
|
+
# without any letters.
|
216
|
+
terms.delete_if do |term|
|
217
|
+
(
|
218
|
+
term.blank? ||
|
219
|
+
["AND", "OR", "NOT"].include?(term) ||
|
220
|
+
term =~ /\A[^[[:alnum:]]]+\Z/
|
221
|
+
)
|
222
|
+
end
|
223
|
+
|
224
|
+
terms.join(" AND ")
|
225
|
+
end
|
226
|
+
|
227
|
+
def query_url(args)
|
228
|
+
|
229
|
+
url =
|
230
|
+
"#{configuration.base_url}/Search?prof=#{configuration.profile_id}&pwd=#{configuration.profile_password}"
|
231
|
+
|
232
|
+
query = ebsco_query_prepare args[:query]
|
233
|
+
|
234
|
+
# wrap in (FI $query) if fielded search
|
235
|
+
if args[:search_field]
|
236
|
+
query = "(#{args[:search_field]} #{query})"
|
237
|
+
end
|
238
|
+
|
239
|
+
url += "&query=#{CGI.escape query}"
|
240
|
+
|
241
|
+
# startrec is 1-based for ebsco, not 0-based like for us.
|
242
|
+
url += "&startrec=#{args[:start] + 1}" if args[:start]
|
243
|
+
url += "&numrec=#{args[:per_page]}" if args[:per_page]
|
244
|
+
|
245
|
+
# Make relevance our default sort, rather than EBSCO's date.
|
246
|
+
args[:sort] ||= "relevance"
|
247
|
+
url += "&sort=#{ sort_definitions[args[:sort]][:implementation]}"
|
248
|
+
|
249
|
+
# Contrary to docs, don't pass these comma-seperated, pass em in seperate
|
250
|
+
# query params.
|
251
|
+
configuration.databases.each do |db|
|
252
|
+
url += "&db=#{db}"
|
253
|
+
end
|
254
|
+
|
255
|
+
return url
|
256
|
+
end
|
257
|
+
|
258
|
+
# pass in a nokogiri representing an EBSCO <rec> result,
|
259
|
+
# we'll turn it into a BentoSearch::ResultItem.
|
260
|
+
def item_from_xml(xml_rec)
|
261
|
+
info = xml_rec.at_xpath("./header/controlInfo")
|
262
|
+
|
263
|
+
item = BentoSearch::ResultItem.new
|
264
|
+
|
265
|
+
item.link = get_link(xml_rec)
|
266
|
+
|
267
|
+
item.issn = text_if_present info.at_xpath("./jinfo/issn")
|
268
|
+
item.journal_title = text_if_present(info.at_xpath("./jinfo/jtl"))
|
269
|
+
item.publisher = text_if_present info.at_xpath("./pubinfo/pub")
|
270
|
+
# Might have multiple ISBN's in record, just take first for now
|
271
|
+
item.isbn = text_if_present info.at_xpath("./bkinfo/isbn")
|
272
|
+
|
273
|
+
item.year = text_if_present info.at_xpath("./pubinfo/dt/@year")
|
274
|
+
item.volume = text_if_present info.at_xpath("./pubinfo/vid")
|
275
|
+
item.issue = text_if_present info.at_xpath("./pubinfo/iid")
|
276
|
+
|
277
|
+
# EBSCO sometimes has crazy long titles, truncate em.
|
278
|
+
item.title = text_helper.truncate( text_if_present( info.at_xpath("./artinfo/tig/atl") ), :length => 200)
|
279
|
+
item.start_page = text_if_present info.at_xpath("./artinfo/ppf")
|
280
|
+
|
281
|
+
item.doi = text_if_present info.at_xpath("./artinfo/ui[@type='doi']")
|
282
|
+
|
283
|
+
item.abstract = text_if_present info.at_xpath("./artinfo/ab")
|
284
|
+
# EBSCO abstracts have an annoying habit of beginning with "Abstract:"
|
285
|
+
if item.abstract
|
286
|
+
item.abstract.gsub!(/^Abstract\: /, "")
|
287
|
+
end
|
288
|
+
|
289
|
+
# authors, only get full display name from EBSCO.
|
290
|
+
info.xpath("./artinfo/aug/au").each do |author|
|
291
|
+
a = BentoSearch::Author.new(:display => author.text)
|
292
|
+
item.authors << a
|
293
|
+
end
|
294
|
+
|
295
|
+
|
296
|
+
item.format = sniff_format info
|
297
|
+
item.format_str = sniff_format_str info
|
298
|
+
|
299
|
+
|
300
|
+
return item
|
301
|
+
end
|
302
|
+
|
303
|
+
# This method is not used for normal searching, but can be used by
|
304
|
+
# other code to retrieve the results of the EBSCO API Info command,
|
305
|
+
# using connection details configured in this engine. The Info command
|
306
|
+
# can tell you what databases your account is authorized to see.
|
307
|
+
# Returns the complete Nokogiri response, but WITH NAMESPACES REMOVED
|
308
|
+
def get_info
|
309
|
+
url =
|
310
|
+
"#{configuration.base_url}/Info?prof=#{configuration.profile_id}&pwd=#{configuration.profile_password}"
|
311
|
+
|
312
|
+
noko = Nokogiri::XML( http_client.get( url ).body )
|
313
|
+
|
314
|
+
noko.remove_namespaces!
|
315
|
+
|
316
|
+
return noko
|
317
|
+
end
|
318
|
+
|
319
|
+
# David Walker says pretty much only relevance and date are realiable
|
320
|
+
# in EBSCOhost cross-search.
|
321
|
+
def sort_definitions
|
322
|
+
{
|
323
|
+
"relevance" => {:implementation => "relevance"},
|
324
|
+
"date_desc" => {:implementation => "date"}
|
325
|
+
}
|
326
|
+
end
|
327
|
+
|
328
|
+
def search_field_definitions
|
329
|
+
{
|
330
|
+
"AU" => {:semantic => :author},
|
331
|
+
"TI" => {:semantic => :title},
|
332
|
+
"SU" => {:semantic => :subject},
|
333
|
+
"IS" => {:semantic => :issn},
|
334
|
+
"IB" => {:semantic => :isbn}
|
335
|
+
}
|
336
|
+
end
|
337
|
+
|
338
|
+
def max_per_page
|
339
|
+
# Actually only '50' if you ask for 'full' records, but I don't think
|
340
|
+
# we need to do that ever, that's actually getting fulltext back!
|
341
|
+
200
|
342
|
+
end
|
343
|
+
|
344
|
+
def self.required_configuration
|
345
|
+
["profile_id", "profile_password"]
|
346
|
+
end
|
347
|
+
|
348
|
+
def self.default_configuration
|
349
|
+
{
|
350
|
+
# /Search
|
351
|
+
:base_url => "http://eit.ebscohost.com/Services/SearchService.asmx",
|
352
|
+
:databases => []
|
353
|
+
}
|
354
|
+
end
|
355
|
+
|
356
|
+
end
|
@@ -0,0 +1,557 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'httpclient'
|
5
|
+
require 'multi_json'
|
6
|
+
require 'http_client_patch/include_client'
|
7
|
+
|
8
|
+
|
9
|
+
#
|
10
|
+
# For EBSCO Discovery Service. You will need a license to use.
|
11
|
+
#
|
12
|
+
# == Required Configuration
|
13
|
+
#
|
14
|
+
# user_id, password: As given be EBSCO for access to EDS API (may be an admin account in ebscoadmin? Not sure).
|
15
|
+
# profile: As given by EBSCO, might be "edsapi"?
|
16
|
+
#
|
17
|
+
# == Highlighting
|
18
|
+
#
|
19
|
+
# EDS has a query-in-context highlighting feature. It is used by defualt, set
|
20
|
+
# config 'highlighting' to false to disable.
|
21
|
+
# If turned on, you may get <b class="bento_search_highlight"> tags
|
22
|
+
# in title and abstract output if it's on, marked html_safe.
|
23
|
+
#
|
24
|
+
# If highlighting is on, since the abstract will be marked html safe, the
|
25
|
+
# view layer won't be able to safely truncate it. In fact, it's very hard
|
26
|
+
# to do here too, but we do it anyway, by default to approx configuration
|
27
|
+
# truncate_highlighted num of chars (default 280). Set to nil if you don't
|
28
|
+
# want this.
|
29
|
+
#
|
30
|
+
# == Linking
|
31
|
+
#
|
32
|
+
# The link to record in EBSCO interface delivered as "PLink" will be listed
|
33
|
+
# as record main link.
|
34
|
+
#
|
35
|
+
# Any links listed under <CustomLinks> will be listed as other_links, using
|
36
|
+
# configured name provided by EBSCO for CustomLink.
|
37
|
+
#
|
38
|
+
# EDS Response does not have sufficient metadata for us to generate an OpenURL
|
39
|
+
# ourselves. However, in our testing, the first/only CustomLink was an
|
40
|
+
# an OpenURL. If configuration.assume_first_custom_link_openurl is
|
41
|
+
# true (as is default), it will be used to create an OpenURL link. However, in
|
42
|
+
# our testing, many records don't have this at all. **Note** Ask EBSCO support
|
43
|
+
# to configure your profile so OpenURLs are ALWAYS included for all records, not
|
44
|
+
# just records with no EBSCO fulltext, to ensure bento_search can get the
|
45
|
+
# openurl.
|
46
|
+
#
|
47
|
+
# As always, you can customize links and other_links with Item Decorators.
|
48
|
+
#
|
49
|
+
# == Technical Notes and Difficulties
|
50
|
+
#
|
51
|
+
# This API is enormously difficult to work with. Also the response is very odd
|
52
|
+
# to deal with and missing some key elements. We quite possibly got something
|
53
|
+
# wrong or non-optimal in this implementation, but we did our best.
|
54
|
+
#
|
55
|
+
# Auth issues may make this slow -- you need to spend a (not too speedy) HTTP
|
56
|
+
# request making a session for every new end-user -- as we have no way to keep
|
57
|
+
# track of end-users, we do it on every request in this implementation.
|
58
|
+
#
|
59
|
+
# Responses don't include much metadata -- we don't actually have journal title,
|
60
|
+
# volume, issue, etc. We probably _could_ parse it out of the OpenURL that's
|
61
|
+
# there depending on your profile configuration, but we're not right now.
|
62
|
+
# Instead we're using the chunk of user-displayable citation/reference it does
|
63
|
+
# give us (which is very difficult to parse into something usable already),
|
64
|
+
# and a custom Decorator to display that instead of normalized citation
|
65
|
+
# made from individual elements.
|
66
|
+
#
|
67
|
+
# EBSCO says they plan to improve some of these issues in a September 2012 release.
|
68
|
+
#
|
69
|
+
# Title and abstract data seems to be HTML with tags and character entities and
|
70
|
+
# escaped special chars. We're trusting it and passing it on as html_safe.
|
71
|
+
#
|
72
|
+
# Paging can only happen on even pages, with 'page' rather than 'start'. But
|
73
|
+
# you can pass in 'start' to bento_search, it'll be converted to closest page.
|
74
|
+
#
|
75
|
+
# == Authenticated Users
|
76
|
+
#
|
77
|
+
# EDS allows searches by unauthenticated users, but the results come back with
|
78
|
+
# weird blank hits. In such a case, the BentoSearch adapter will return
|
79
|
+
# records with virtually no metadata, but a title e
|
80
|
+
# (I18n at bento_search.eds.record_not_available ). Also no abstracts
|
81
|
+
# are available from unauth search.
|
82
|
+
#
|
83
|
+
# By default the engine will search as 'guest' unauth user. But config
|
84
|
+
# 'auth' key to true to force all searches to auth (if you are protecting your
|
85
|
+
# app) or pass :auth => true as param into #search method.
|
86
|
+
#
|
87
|
+
# == EDS docs:
|
88
|
+
#
|
89
|
+
# * Console App to demo requests: https://eds-api.ebscohost.com/Console
|
90
|
+
# * EDS Wiki: http://edswiki.ebscohost.com/EDS_API_Documentation
|
91
|
+
# * You'll need to request an account to the EDS wiki, see: http://support.ebsco.com/knowledge_base/detail.php?id=5990
|
92
|
+
#
|
93
|
+
class BentoSearch::EdsEngine
|
94
|
+
include BentoSearch::SearchEngine
|
95
|
+
|
96
|
+
extend HTTPClientPatch::IncludeClient
|
97
|
+
include_http_client
|
98
|
+
|
99
|
+
AuthHeader = "x-authenticationToken"
|
100
|
+
SessionTokenHeader = "x-sessionToken"
|
101
|
+
|
102
|
+
@@remembered_auth = nil
|
103
|
+
@@remembered_auth_lock = Mutex.new
|
104
|
+
# Class variable to save current known good auth
|
105
|
+
# uses a mutex to be threadsafe. sigh.
|
106
|
+
def self.remembered_auth
|
107
|
+
@@remembered_auth_lock.synchronize do
|
108
|
+
@@remembered_auth
|
109
|
+
end
|
110
|
+
end
|
111
|
+
# Set class variable with current known good auth.
|
112
|
+
# uses a mutex to be threadsafe.
|
113
|
+
def self.remembered_auth=(token)
|
114
|
+
@@remembered_auth_lock.synchronize do
|
115
|
+
@@remembered_auth = token
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
# an object that includes some Rails helper modules for
|
120
|
+
# text handling.
|
121
|
+
def helper
|
122
|
+
unless @helper
|
123
|
+
@helper = Object.new
|
124
|
+
@helper.extend ActionView::Helpers::TextHelper # for truncate
|
125
|
+
@helper.extend ActionView::Helpers::OutputSafetyHelper # for safe_join
|
126
|
+
end
|
127
|
+
return @helper
|
128
|
+
end
|
129
|
+
|
130
|
+
|
131
|
+
def self.required_configuration
|
132
|
+
%w{user_id password profile}
|
133
|
+
end
|
134
|
+
|
135
|
+
# From config or args, args over-ride config
|
136
|
+
def authenticated_end_user?(args)
|
137
|
+
config = configuration.auth ? true : false
|
138
|
+
arg = args[:auth]
|
139
|
+
if ! arg.nil?
|
140
|
+
arg ? true : false
|
141
|
+
elsif ! config.nil?
|
142
|
+
config ? true : false
|
143
|
+
else
|
144
|
+
false
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
def construct_search_url(args)
|
149
|
+
query = "AND,"
|
150
|
+
if args[:search_field]
|
151
|
+
query += "#{args[:search_field]}:"
|
152
|
+
end
|
153
|
+
# Can't have any commas in query, it turns out, although
|
154
|
+
# this is not documented.
|
155
|
+
query += args[:query].gsub("/\,/", "")
|
156
|
+
|
157
|
+
url = "#{configuration.base_url}search?view=detailed&query=#{CGI.escape query}"
|
158
|
+
|
159
|
+
url += "&searchmode=#{CGI.escape configuration.search_mode}"
|
160
|
+
|
161
|
+
url += "&highlight=#{configuration.highlighting ? 'y' : 'n' }"
|
162
|
+
|
163
|
+
if args[:per_page]
|
164
|
+
url += "&resultsperpage=#{args[:per_page]}"
|
165
|
+
end
|
166
|
+
if args[:page]
|
167
|
+
url += "&pagenumber=#{args[:page]}"
|
168
|
+
end
|
169
|
+
|
170
|
+
if args[:sort]
|
171
|
+
if (defn = self.sort_definitions[args[:sort]]) &&
|
172
|
+
(value = defn[:implementation] )
|
173
|
+
url += "&sort=#{CGI.escape value}"
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
|
178
|
+
return url
|
179
|
+
end
|
180
|
+
|
181
|
+
|
182
|
+
|
183
|
+
def search_implementation(args)
|
184
|
+
results = BentoSearch::Results.new
|
185
|
+
|
186
|
+
end_user_auth = authenticated_end_user? args
|
187
|
+
|
188
|
+
begin
|
189
|
+
with_session(end_user_auth) do |session_token|
|
190
|
+
|
191
|
+
url = construct_search_url(args)
|
192
|
+
|
193
|
+
response = get_with_auth(url, session_token)
|
194
|
+
|
195
|
+
results = BentoSearch::Results.new
|
196
|
+
|
197
|
+
if (hits_node = at_xpath_text(response, "./SearchResponseMessageGet/SearchResult/Statistics/TotalHits"))
|
198
|
+
results.total_items = hits_node.to_i
|
199
|
+
end
|
200
|
+
|
201
|
+
response.xpath("./SearchResponseMessageGet/SearchResult/Data/Records/Record").each do |record_xml|
|
202
|
+
item = BentoSearch::ResultItem.new
|
203
|
+
|
204
|
+
item.title = prepare_eds_payload( element_by_group(record_xml, "Ti"), true )
|
205
|
+
if item.title.nil? && ! end_user_auth
|
206
|
+
item.title = I18n.translate("bento_search.eds.record_not_available")
|
207
|
+
end
|
208
|
+
|
209
|
+
item.abstract = prepare_eds_payload( element_by_group(record_xml, "Ab"), true )
|
210
|
+
|
211
|
+
# Believe it or not, the authors are encoded as an escaped
|
212
|
+
# XML-ish payload, that we need to parse again and get the
|
213
|
+
# actual authors out of. WTF. Thanks for handling fragments
|
214
|
+
# nokogiri.
|
215
|
+
author_mess = element_by_group(record_xml, "Au")
|
216
|
+
author_xml = Nokogiri::XML::fragment(author_mess)
|
217
|
+
author_xml.xpath(".//searchLink").each do |author_node|
|
218
|
+
item.authors << BentoSearch::Author.new(:display => author_node.text)
|
219
|
+
end
|
220
|
+
|
221
|
+
|
222
|
+
# PLink is main inward facing EBSCO link, put it as
|
223
|
+
# main link.
|
224
|
+
if direct_link = record_xml.at_xpath("./PLink")
|
225
|
+
item.link = direct_link.text
|
226
|
+
end
|
227
|
+
|
228
|
+
# Other links may be found in CustomLinks, it seems like usually
|
229
|
+
# there will be at least one, hopefully the first one is the OpenURL?
|
230
|
+
record_xml.xpath("./CustomLinks/CustomLink").each do |custom_link|
|
231
|
+
item.other_links << BentoSearch::Link.new(
|
232
|
+
:url => custom_link.at_xpath("./Url").text,
|
233
|
+
:label => custom_link.at_xpath("./Name").text
|
234
|
+
)
|
235
|
+
end
|
236
|
+
|
237
|
+
if (configuration.assume_first_custom_link_openurl &&
|
238
|
+
(first = record_xml.xpath "./CustomLinks/CustomLink" ) &&
|
239
|
+
(node = first.at_xpath "./Url" )
|
240
|
+
)
|
241
|
+
|
242
|
+
openurl = node.text
|
243
|
+
|
244
|
+
index = openurl.index('?')
|
245
|
+
item.openurl_kev_co = openurl.slice index..(openurl.length) if index
|
246
|
+
end
|
247
|
+
|
248
|
+
# Format.
|
249
|
+
item.format_str = at_xpath_text record_xml, "./Header/PubType"
|
250
|
+
# Can't find a list of possible PubTypes to see what's there to try
|
251
|
+
# and map to our internal controlled vocab. oh wells.
|
252
|
+
|
253
|
+
|
254
|
+
|
255
|
+
# We have a single blob of human-readable citation, that's also
|
256
|
+
# littered with XML-ish tags we need to deal with. We'll save
|
257
|
+
# it in a custom location, and use a custom Decorator to display
|
258
|
+
# it. Sorry it's way too hard for us to preserve <highlight>
|
259
|
+
# tags in this mess, they will be lost. Probably don't
|
260
|
+
# need highlighting in source anyhow.
|
261
|
+
citation_mess = element_by_group(record_xml, "Src")
|
262
|
+
citation_txt = Nokogiri::XML::fragment(citation_mess).text
|
263
|
+
# But strip off some "count of references" often on the end
|
264
|
+
# which are confusing and useless.
|
265
|
+
item.custom_data["citation_blob"] = citation_txt.gsub(/ref +\d+ +ref\.$/, '')
|
266
|
+
|
267
|
+
item.extend CitationMessDecorator
|
268
|
+
|
269
|
+
results << item
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
return results
|
274
|
+
rescue EdsCommException => e
|
275
|
+
results.error ||= {}
|
276
|
+
results.error[:exception] = e
|
277
|
+
results.error[:http_status] = e.http_status
|
278
|
+
results.error[:http_body] = e.http_body
|
279
|
+
return results
|
280
|
+
end
|
281
|
+
|
282
|
+
end
|
283
|
+
|
284
|
+
# Difficult to get individual elements out of an EDS XML <Record>
|
285
|
+
# response, requires weird xpath, so we do it for you.
|
286
|
+
# element_by_group(nokogiri_element, "Ti")
|
287
|
+
#
|
288
|
+
# Returns string or nil
|
289
|
+
def element_by_group(noko, group)
|
290
|
+
at_xpath_text(noko, "./Items/Item[child::Group[text()='#{group}']]/Data")
|
291
|
+
end
|
292
|
+
|
293
|
+
# Wraps calls to the EDS api with CreateSession and EndSession requests
|
294
|
+
# to EDS. Will pass sessionID in yield from block.
|
295
|
+
#
|
296
|
+
# Second optional arg is whether this is an authenticated user, else
|
297
|
+
# guest access will be used.
|
298
|
+
#
|
299
|
+
# with_session(true) do |session_token|
|
300
|
+
# # can make more requests using session_token,
|
301
|
+
# # EndSession will be called for you at end of block.
|
302
|
+
# end
|
303
|
+
def with_session(auth = false, &block)
|
304
|
+
auth_token = self.class.remembered_auth
|
305
|
+
if auth_token.nil?
|
306
|
+
auth_token = self.class.remembered_auth = get_auth_token
|
307
|
+
end
|
308
|
+
|
309
|
+
|
310
|
+
create_url = "#{configuration.base_url}createsession?profile=#{configuration.profile}&guest=#{auth ? 'n' : 'y'}"
|
311
|
+
response_xml = get_with_auth(create_url)
|
312
|
+
|
313
|
+
session_token = nil
|
314
|
+
unless response_xml && (session_token = at_xpath_text(response_xml, "//SessionToken"))
|
315
|
+
e = EdsCommException.new("Could not get SessionToken")
|
316
|
+
end
|
317
|
+
|
318
|
+
begin
|
319
|
+
block.yield(session_token)
|
320
|
+
ensure
|
321
|
+
if auth_token && session_token
|
322
|
+
end_url = "#{configuration.base_url}endsession?sessiontoken=#{CGI.escape session_token}"
|
323
|
+
response_xml = get_with_auth(end_url)
|
324
|
+
end
|
325
|
+
end
|
326
|
+
|
327
|
+
end
|
328
|
+
|
329
|
+
# if the xpath responds, return #text of it, else nil.
|
330
|
+
def at_xpath_text(noko, xpath)
|
331
|
+
node = noko.at_xpath(xpath)
|
332
|
+
|
333
|
+
if node.nil?
|
334
|
+
return node
|
335
|
+
else
|
336
|
+
return node.text
|
337
|
+
end
|
338
|
+
end
|
339
|
+
|
340
|
+
# If EDS has put highlighting tags
|
341
|
+
# in a field, we need to HTML escape the literal values,
|
342
|
+
# while still using the highlighting tokens to put
|
343
|
+
# HTML tags around highlighted terms.
|
344
|
+
#
|
345
|
+
# Second param, if to assume EDS literals are safe HTML, as they
|
346
|
+
# seem to be.
|
347
|
+
def prepare_eds_payload(str, html_safe = false)
|
348
|
+
return str if str.blank?
|
349
|
+
|
350
|
+
unless configuration.highlighting
|
351
|
+
str = str.html_safe if html_safe
|
352
|
+
return str
|
353
|
+
end
|
354
|
+
|
355
|
+
parts =
|
356
|
+
str.split(%r{(</?highlight>)}).collect do |substr|
|
357
|
+
case substr
|
358
|
+
when "<highlight>" then "<b class='bento_search_highlight'>".html_safe
|
359
|
+
when "</highlight>" then "</b>".html_safe
|
360
|
+
# Yes, EDS gives us HTML in the literals, we're choosing to trust it.
|
361
|
+
else substr.html_safe
|
362
|
+
end
|
363
|
+
end
|
364
|
+
|
365
|
+
|
366
|
+
|
367
|
+
|
368
|
+
|
369
|
+
# Crazy ass method to truncate without getting in the middle of our
|
370
|
+
# html tags. This is wacky hacky, yeah.
|
371
|
+
if configuration.truncate_highlighted
|
372
|
+
remainingLength = configuration.truncate_highlighted
|
373
|
+
in_tag = false
|
374
|
+
elipses_added = false
|
375
|
+
|
376
|
+
truncated_parts = []
|
377
|
+
parts.each do |substr|
|
378
|
+
if remainingLength <=0 && ! in_tag
|
379
|
+
truncated_parts << "..."
|
380
|
+
break
|
381
|
+
end
|
382
|
+
|
383
|
+
if substr =~ /^<b.*\>$/
|
384
|
+
truncated_parts << substr
|
385
|
+
in_tag = true
|
386
|
+
elsif substr == "</b>"
|
387
|
+
truncated_parts << substr
|
388
|
+
in_tag = false
|
389
|
+
elsif ((remainingLength - substr.length) > 0) || in_tag
|
390
|
+
truncated_parts << substr
|
391
|
+
else
|
392
|
+
truncated_parts << helper.truncate(substr, :length => remainingLength, :separator => ' ')
|
393
|
+
break
|
394
|
+
end
|
395
|
+
|
396
|
+
remainingLength = remainingLength - substr.length
|
397
|
+
end
|
398
|
+
|
399
|
+
parts = truncated_parts
|
400
|
+
end
|
401
|
+
|
402
|
+
|
403
|
+
return helper.safe_join(parts, '')
|
404
|
+
end
|
405
|
+
|
406
|
+
# Give it a url pointing at EDS API.
|
407
|
+
# Second arg must be a session_token if EDS request requires one.
|
408
|
+
# It will
|
409
|
+
# * Make a GET request
|
410
|
+
# * with memo-ized auth token added to headers
|
411
|
+
# * for XML, with all namespaces removed!
|
412
|
+
# * Parse JSON into a hash and return hash
|
413
|
+
# * Try ONCE more to get if EBSCO says bad auth token
|
414
|
+
# * Raise an EdsCommException if can't auth after second try,
|
415
|
+
# or other error message, or JSON can't be parsed.
|
416
|
+
def get_with_auth(url, session_token = nil)
|
417
|
+
auth_token = self.class.remembered_auth
|
418
|
+
unless auth_token
|
419
|
+
auth_token = self.class.remembered_auth = get_auth_token
|
420
|
+
end
|
421
|
+
|
422
|
+
response = nil
|
423
|
+
response_xml = nil
|
424
|
+
caught_exception = nil
|
425
|
+
|
426
|
+
begin
|
427
|
+
headers = {AuthHeader => auth_token, 'Accept' => 'application/xml'}
|
428
|
+
headers[SessionTokenHeader] = session_token if session_token
|
429
|
+
|
430
|
+
s_time = Time.now
|
431
|
+
response = http_client.get(url, nil, headers)
|
432
|
+
Rails.logger.debug("EDS timing GET: #{Time.now - s_time}:#{url}")
|
433
|
+
|
434
|
+
response_xml = Nokogiri::XML(response.body)
|
435
|
+
response_xml.remove_namespaces!
|
436
|
+
|
437
|
+
if (at_xpath_text(response_xml, "//ErrorNumber") == "104") || (at_xpath_text(response_xml, "//ErrorDescription") == "Auth Token Invalid")
|
438
|
+
# bad auth, try again just ONCE
|
439
|
+
Rails.logger.debug("EDS auth failed, getting auth again")
|
440
|
+
|
441
|
+
headers[AuthHeader] = self.class.remembered_auth = get_auth_token
|
442
|
+
response = http_client.get(url, nil, headers)
|
443
|
+
response_xml = Nokogiri::XML(response.body)
|
444
|
+
response_xml.remove_namespaces!
|
445
|
+
end
|
446
|
+
rescue TimeoutError, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError => e
|
447
|
+
caught_exception = e
|
448
|
+
end
|
449
|
+
|
450
|
+
if response.nil? || response_xml.nil? || caught_exception || (! HTTP::Status.successful? response.status)
|
451
|
+
exception = EdsCommException.new("Error fetching URL: #{caught_exception.message if caught_exception} : #{url}")
|
452
|
+
if response
|
453
|
+
exception.http_body = response.body
|
454
|
+
exception.http_status = response.status
|
455
|
+
end
|
456
|
+
raise exception
|
457
|
+
end
|
458
|
+
|
459
|
+
return response_xml
|
460
|
+
end
|
461
|
+
|
462
|
+
|
463
|
+
# Has to make an HTTP request to get EBSCO's auth token.
|
464
|
+
# returns the auth token. We aren't bothering to keep
|
465
|
+
# track of the expiration ourselves, can't neccesarily trust
|
466
|
+
# it anyway.
|
467
|
+
#
|
468
|
+
# Raises an EdsCommException on error.
|
469
|
+
def get_auth_token
|
470
|
+
# Can't send params as form-encoded, actually need to send a JSON or XML
|
471
|
+
# body, argh.
|
472
|
+
|
473
|
+
body = <<-EOS
|
474
|
+
{
|
475
|
+
"UserId":"#{configuration.user_id}",
|
476
|
+
"Password":"#{configuration.password}"
|
477
|
+
}
|
478
|
+
EOS
|
479
|
+
|
480
|
+
s_time = Time.now
|
481
|
+
response = http_client.post(configuration.auth_url, body, {'Accept' => "application/json", "Content-type" => "application/json"})
|
482
|
+
Rails.logger.debug("EDS timing AUTH: #{Time.now - s_time}s")
|
483
|
+
|
484
|
+
unless HTTP::Status.successful? response.status
|
485
|
+
raise EdsCommException.new("Could not get auth", response.status, response.body)
|
486
|
+
end
|
487
|
+
|
488
|
+
response_hash = nil
|
489
|
+
begin
|
490
|
+
response_hash = MultiJson.load response.body
|
491
|
+
rescue MultiJson::DecodeError
|
492
|
+
end
|
493
|
+
|
494
|
+
unless response_hash.kind_of?(Hash) && response_hash.has_key?("AuthToken")
|
495
|
+
raise EdsCommException.new("AuthToken not found in auth response", response.status, response.body)
|
496
|
+
end
|
497
|
+
|
498
|
+
return response_hash["AuthToken"]
|
499
|
+
end
|
500
|
+
|
501
|
+
def self.default_configuration
|
502
|
+
{
|
503
|
+
:auth_url => 'https://eds-api.ebscohost.com/authservice/rest/uidauth',
|
504
|
+
:base_url => "http://eds-api.ebscohost.com/edsapi/rest/",
|
505
|
+
:highlighting => true,
|
506
|
+
:truncate_highlighted => 280,
|
507
|
+
:assume_first_custom_link_openurl => true,
|
508
|
+
:search_mode => 'all' # any | bool | all | smart ; http://support.epnet.com/knowledge_base/detail.php?topic=996&id=1288&page=1
|
509
|
+
}
|
510
|
+
end
|
511
|
+
|
512
|
+
def sort_definitions
|
513
|
+
{
|
514
|
+
"date_desc" => {:implementation => "date"},
|
515
|
+
"relevance" => {:implementation => "relevance" }
|
516
|
+
# "date_asc" => {:implementaiton => "date2"}
|
517
|
+
}
|
518
|
+
end
|
519
|
+
|
520
|
+
def search_field_definitions
|
521
|
+
{
|
522
|
+
"TX" => {:semantic => :all},
|
523
|
+
"AU" => {:semantic => :author},
|
524
|
+
"TI" => {:semantic => :title},
|
525
|
+
"SU" => {:semantic => :subject},
|
526
|
+
"SO" => {}, # source, journal name
|
527
|
+
"AB" => {}, # abstract
|
528
|
+
"IS" => {:semantic => :issn},
|
529
|
+
"IB" => {:semantic => :isbn},
|
530
|
+
}
|
531
|
+
end
|
532
|
+
|
533
|
+
# an exception talking to EDS api.
|
534
|
+
# there's a short reason in #message, but also
|
535
|
+
# possibly an http_status and http_body copied
|
536
|
+
# from error EDS response.
|
537
|
+
class EdsCommException < Exception
|
538
|
+
attr_accessor :http_status, :http_body
|
539
|
+
def initialize(message, status = nil, body = nil)
|
540
|
+
super(message)
|
541
|
+
self.http_status = status
|
542
|
+
self.http_body = body
|
543
|
+
end
|
544
|
+
end
|
545
|
+
|
546
|
+
|
547
|
+
# A built-in decorator alwasy applied, that over-rides
|
548
|
+
# the ResultItem#published_in display method to use our mess blob
|
549
|
+
# of human readable citation, since we don't have individual elements
|
550
|
+
# to create it from in a normalized way.
|
551
|
+
module CitationMessDecorator
|
552
|
+
def published_in
|
553
|
+
custom_data["citation_blob"]
|
554
|
+
end
|
555
|
+
end
|
556
|
+
|
557
|
+
end
|