bento_search 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +20 -0
- data/README.md +299 -0
- data/Rakefile +40 -0
- data/app/assets/images/bento_search/large_loader.gif +0 -0
- data/app/assets/javascripts/bento_search.js +3 -0
- data/app/assets/javascripts/bento_search/ajax_load.js +22 -0
- data/app/assets/stylesheets/bento_search/bento.css +4 -0
- data/app/controllers/bento_search/bento_search_controller.rb +7 -0
- data/app/controllers/bento_search/search_controller.rb +72 -0
- data/app/helpers/bento_search_helper.rb +138 -0
- data/app/item_decorators/bento_search/only_premade_openurl.rb +16 -0
- data/app/item_decorators/bento_search/openurl_add_other_link.rb +35 -0
- data/app/item_decorators/bento_search/openurl_main_link.rb +30 -0
- data/app/models/bento_search/author.rb +25 -0
- data/app/models/bento_search/link.rb +30 -0
- data/app/models/bento_search/multi_searcher.rb +109 -0
- data/app/models/bento_search/openurl_creator.rb +128 -0
- data/app/models/bento_search/registrar.rb +70 -0
- data/app/models/bento_search/result_item.rb +203 -0
- data/app/models/bento_search/results.rb +54 -0
- data/app/models/bento_search/results/pagination.rb +67 -0
- data/app/models/bento_search/search_engine.rb +219 -0
- data/app/models/bento_search/search_engine/capabilities.rb +65 -0
- data/app/search_engines/bento_search/#Untitled-1# +11 -0
- data/app/search_engines/bento_search/ebsco_host_engine.rb +356 -0
- data/app/search_engines/bento_search/eds_engine.rb +557 -0
- data/app/search_engines/bento_search/google_books_engine.rb +184 -0
- data/app/search_engines/bento_search/primo_engine.rb +231 -0
- data/app/search_engines/bento_search/scopus_engine.rb +295 -0
- data/app/search_engines/bento_search/summon_engine.rb +398 -0
- data/app/search_engines/bento_search/xerxes_engine.rb +168 -0
- data/app/views/bento_search/_link.html.erb +4 -0
- data/app/views/bento_search/_search_error.html.erb +22 -0
- data/app/views/bento_search/_std_item.html.erb +39 -0
- data/app/views/bento_search/search/search.html.erb +1 -0
- data/config/locales/en.yml +25 -0
- data/lib/bento_search.rb +29 -0
- data/lib/bento_search/engine.rb +5 -0
- data/lib/bento_search/routes.rb +45 -0
- data/lib/bento_search/version.rb +3 -0
- data/lib/generators/bento_search/pull_ebsco_dbs_generator.rb +24 -0
- data/lib/generators/bento_search/templates/ebsco_global_var.erb +6 -0
- data/lib/http_client_patch/include_client.rb +86 -0
- data/lib/tasks/bento_search_tasks.rake +4 -0
- data/test/dummy/README.rdoc +261 -0
- data/test/dummy/Rakefile +7 -0
- data/test/dummy/app/assets/javascripts/application.js +15 -0
- data/test/dummy/app/assets/stylesheets/application.css +13 -0
- data/test/dummy/app/controllers/application_controller.rb +3 -0
- data/test/dummy/app/helpers/application_helper.rb +2 -0
- data/test/dummy/app/views/layouts/application.html.erb +14 -0
- data/test/dummy/config.ru +4 -0
- data/test/dummy/config/application.rb +56 -0
- data/test/dummy/config/boot.rb +10 -0
- data/test/dummy/config/database.yml +25 -0
- data/test/dummy/config/environment.rb +5 -0
- data/test/dummy/config/environments/development.rb +37 -0
- data/test/dummy/config/environments/production.rb +67 -0
- data/test/dummy/config/environments/test.rb +37 -0
- data/test/dummy/config/initializers/backtrace_silencers.rb +7 -0
- data/test/dummy/config/initializers/inflections.rb +15 -0
- data/test/dummy/config/initializers/mime_types.rb +5 -0
- data/test/dummy/config/initializers/secret_token.rb +7 -0
- data/test/dummy/config/initializers/session_store.rb +8 -0
- data/test/dummy/config/initializers/wrap_parameters.rb +14 -0
- data/test/dummy/config/locales/en.yml +5 -0
- data/test/dummy/config/routes.rb +6 -0
- data/test/dummy/db/test.sqlite3 +0 -0
- data/test/dummy/log/test.log +3100 -0
- data/test/dummy/public/404.html +26 -0
- data/test/dummy/public/422.html +26 -0
- data/test/dummy/public/500.html +25 -0
- data/test/dummy/public/favicon.ico +0 -0
- data/test/dummy/script/rails +6 -0
- data/test/functional/bento_search/search_controller_test.rb +81 -0
- data/test/helper/bento_search_helper_test.rb +125 -0
- data/test/integration/navigation_test.rb +10 -0
- data/test/support/mock_engine.rb +23 -0
- data/test/support/test_with_cassette.rb +38 -0
- data/test/test_helper.rb +52 -0
- data/test/unit/#vcr_test.rb# +68 -0
- data/test/unit/ebsco_host_engine_test.rb +134 -0
- data/test/unit/eds_engine_test.rb +105 -0
- data/test/unit/google_books_engine_test.rb +93 -0
- data/test/unit/item_decorators_test.rb +66 -0
- data/test/unit/multi_searcher_test.rb +49 -0
- data/test/unit/openurl_creator_test.rb +111 -0
- data/test/unit/pagination_test.rb +59 -0
- data/test/unit/primo_engine_test.rb +37 -0
- data/test/unit/register_engine_test.rb +50 -0
- data/test/unit/result_item_display_test.rb +39 -0
- data/test/unit/result_item_test.rb +36 -0
- data/test/unit/scopus_engine_test.rb +130 -0
- data/test/unit/search_engine_base_test.rb +178 -0
- data/test/unit/search_engine_test.rb +95 -0
- data/test/unit/summon_engine_test.rb +161 -0
- data/test/unit/xerxes_engine_test.rb +70 -0
- data/test/vcr_cassettes/ebscohost/error_bad_db.yml +45 -0
- data/test/vcr_cassettes/ebscohost/error_bad_password.yml +45 -0
- data/test/vcr_cassettes/ebscohost/get_info.yml +3626 -0
- data/test/vcr_cassettes/ebscohost/live_search.yml +45 -0
- data/test/vcr_cassettes/ebscohost/live_search_smoke_test.yml +1311 -0
- data/test/vcr_cassettes/eds/basic_search_smoke_test.yml +1811 -0
- data/test/vcr_cassettes/eds/get_auth_token.yml +75 -0
- data/test/vcr_cassettes/eds/get_auth_token_failure.yml +39 -0
- data/test/vcr_cassettes/eds/get_with_auth.yml +243 -0
- data/test/vcr_cassettes/eds/get_with_auth_recovers_from_bad_auth.yml +368 -0
- data/test/vcr_cassettes/gbs/error_condition.yml +40 -0
- data/test/vcr_cassettes/gbs/pagination.yml +702 -0
- data/test/vcr_cassettes/gbs/search.yml +340 -0
- data/test/vcr_cassettes/primo/search_smoke_test.yml +1112 -0
- data/test/vcr_cassettes/scopus/bad_api_key_should_return_error_response.yml +60 -0
- data/test/vcr_cassettes/scopus/escaped_chars.yml +187 -0
- data/test/vcr_cassettes/scopus/fielded_search.yml +176 -0
- data/test/vcr_cassettes/scopus/simple_search.yml +227 -0
- data/test/vcr_cassettes/scopus/zero_results_search.yml +67 -0
- data/test/vcr_cassettes/summon/bad_auth.yml +54 -0
- data/test/vcr_cassettes/summon/proper_tags_for_snippets.yml +216 -0
- data/test/vcr_cassettes/summon/search.yml +242 -0
- data/test/vcr_cassettes/xerxes/live_search.yml +2580 -0
- data/test/view/std_item_test.rb +98 -0
- metadata +421 -0
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
|
|
2
|
+
# Methods that describe a search engine's capabilities,
|
|
3
|
+
# mixed into SearchEngine. Individual engine implementations
|
|
4
|
+
# will often over-ride some or all of these methods.
|
|
5
|
+
module BentoSearch::SearchEngine::Capabilities
|
|
6
|
+
# If support fielded search, over-ride to specify fields
|
|
7
|
+
# supported. Returns a hash, key is engine-specific internal
|
|
8
|
+
# search field, value is nil or a hash of metadata about
|
|
9
|
+
# the search field, including semantic mapping.
|
|
10
|
+
#
|
|
11
|
+
# def search_field_definitions
|
|
12
|
+
# { "intitle" => {:semantic => :title}}
|
|
13
|
+
# end
|
|
14
|
+
def search_field_definitions
|
|
15
|
+
{}
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# Over-ride with a HASH of available sorts. Each key is the string
|
|
19
|
+
# that will be passed in engine.search(...., :sort => key)
|
|
20
|
+
# The key combines a choice of sort field, ascending/descending,
|
|
21
|
+
# secondary sorts etc -- we combine this all with one key, because
|
|
22
|
+
# typical examined interfaces did same from a select menu.
|
|
23
|
+
#
|
|
24
|
+
# Keys should where possible be _standard_ keys chosen from
|
|
25
|
+
# those listed in config/i18n/en:bento_search.sort_keys.*
|
|
26
|
+
# But if you need something not there, it can be custom to engine.
|
|
27
|
+
# Value of hash is for internal use by engine, it may be a convenient
|
|
28
|
+
# place to store implementation details.
|
|
29
|
+
#
|
|
30
|
+
# For a particular engine, a sort not mentioned here will-- raise?
|
|
31
|
+
# be ignored? Not sure.
|
|
32
|
+
def sort_definitions
|
|
33
|
+
{}
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Override to return int max per-page.
|
|
37
|
+
def max_per_page
|
|
38
|
+
nil
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Returns list of string internal search_field's that can
|
|
42
|
+
# be supplied to search(:search_field => x)
|
|
43
|
+
def search_keys
|
|
44
|
+
return search_field_definitions.keys
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Returns list of symbol semantic_search_field that can be
|
|
48
|
+
# supplied to search(:semantic_search_field => x)
|
|
49
|
+
def semantic_search_keys
|
|
50
|
+
semantic_search_map.keys
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# returns a hash keyed by semantic search field symbol,
|
|
54
|
+
# value string internal search field key.
|
|
55
|
+
def semantic_search_map
|
|
56
|
+
# Hash[] conveniently takes an array of k-v pairs.
|
|
57
|
+
return Hash[
|
|
58
|
+
search_field_definitions.collect do |field, defn|
|
|
59
|
+
[ defn[:semantic].to_s, field ] if defn && defn[:semantic]
|
|
60
|
+
end.compact
|
|
61
|
+
]
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
end
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
http://blacklight.mse.jhu.edu:3001/resolve?url_ver=Z39.88-2004
|
|
2
|
+
&url_ctx_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Actx
|
|
3
|
+
&ctx_ver=Z39.88-2004
|
|
4
|
+
&ctx_tim=2012-07-25T16%3A21%3A11-04%3A00
|
|
5
|
+
&ctx_id=
|
|
6
|
+
&ctx_enc=info%3Aofi%2Fenc%3AUTF-8
|
|
7
|
+
&rft.title=Monkey+Brains
|
|
8
|
+
&rft.creator=Will.i.am
|
|
9
|
+
&rft.pub=Absolute+Pitch%2C12+Dec+2007
|
|
10
|
+
&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Adc
|
|
11
|
+
&rfr_id=info%3Asid%2Fsummon.serialssolutions.com
|
|
@@ -0,0 +1,356 @@
|
|
|
1
|
+
require 'nokogiri'
|
|
2
|
+
|
|
3
|
+
require 'http_client_patch/include_client'
|
|
4
|
+
require 'httpclient'
|
|
5
|
+
|
|
6
|
+
# Right now for EbscoHost API (Ebsco Integration Toolkit/EIT),
|
|
7
|
+
# may be expanded or refactored for EDS too.
|
|
8
|
+
#
|
|
9
|
+
# == Required Configuration
|
|
10
|
+
#
|
|
11
|
+
# * profile_id
|
|
12
|
+
# * profile_password
|
|
13
|
+
# * databases: ARRAY of ebsco shortcodes of what databases to include in search. If you specify one you don't have access to, you get an error message from ebsco, alas.
|
|
14
|
+
#
|
|
15
|
+
# == Note on including databases
|
|
16
|
+
#
|
|
17
|
+
# Need to specifically configure all databases your institution licenses from
|
|
18
|
+
# EBSCO that you want included in the search. You can't just say "all of them"
|
|
19
|
+
# the api doesn't support that, and also more than 30 or 40 starts getting
|
|
20
|
+
# horribly slow. If you include a db you do not have access to, EBSCO api
|
|
21
|
+
# fatal errors.
|
|
22
|
+
#
|
|
23
|
+
# You may want to make sure all your licensed databases are included
|
|
24
|
+
# in your EIT profile. Log onto ebscoadmin, Customize Services, choose
|
|
25
|
+
# EIT profile, choose 'databases' tag.
|
|
26
|
+
#
|
|
27
|
+
# === Download databases from EBSCO api
|
|
28
|
+
#
|
|
29
|
+
# We include a utility to download ALL activated databases for EIT profile
|
|
30
|
+
# and generate a file putting them in a ruby array. You may want to use this
|
|
31
|
+
# file as a starting point, and edit by hand:
|
|
32
|
+
#
|
|
33
|
+
# First configure your EBSCO search engine with bento_search, say under
|
|
34
|
+
# key 'ebscohost'.
|
|
35
|
+
#
|
|
36
|
+
# Then run:
|
|
37
|
+
# rails generate bento_search:pull_ebsco_dbs ebscohost
|
|
38
|
+
#
|
|
39
|
+
# assuming 'ebscohost' is the key you registered the EBSCO search engine.
|
|
40
|
+
#
|
|
41
|
+
# This will create a file at ./config/ebsco_dbs.rb. You may want to hand
|
|
42
|
+
# edit it. Then, in your bento search config, you can:
|
|
43
|
+
#
|
|
44
|
+
# require "#{Rails.root}/config/ebsco_dbs.rb"
|
|
45
|
+
# BentoSearch.register_engine("ebscohost") do |conf|
|
|
46
|
+
# # ....
|
|
47
|
+
# conf.databases = $ebsco_dbs
|
|
48
|
+
# end
|
|
49
|
+
#
|
|
50
|
+
# == Vendor documentation
|
|
51
|
+
#
|
|
52
|
+
# Vendor documentation is a bit scattered, main page:
|
|
53
|
+
# * http://support.ebsco.com/eit/ws.php
|
|
54
|
+
# Some other useful pages we discovered:
|
|
55
|
+
# * http://support.ebsco.com/eit/ws_faq.php
|
|
56
|
+
# * search syntax examples: http://support.ebsco.com/eit/ws_howto_queries.php
|
|
57
|
+
# * Try construct a query: http://eit.ebscohost.com/Pages/MethodDescription.aspx?service=/Services/SearchService.asmx&method=Search
|
|
58
|
+
# * The 'info' service can be used to see what databases you have access to.
|
|
59
|
+
# * DTD of XML Response, hard to interpret but all we've got: http://support.ebsco.com/eit/docs/DTD_EIT_WS_searchResponse.zip
|
|
60
|
+
#
|
|
61
|
+
#
|
|
62
|
+
#
|
|
63
|
+
#
|
|
64
|
+
# TODO: David Walker tells us we need to configure in EBSCO to make default operator be 'and' instead of phrase search!
|
|
65
|
+
# We Do need to do that to get reasonable results.
|
|
66
|
+
class BentoSearch::EbscoHostEngine
|
|
67
|
+
include BentoSearch::SearchEngine
|
|
68
|
+
|
|
69
|
+
extend HTTPClientPatch::IncludeClient
|
|
70
|
+
include_http_client
|
|
71
|
+
|
|
72
|
+
# Include some rails helpers, text_helper.trucate
|
|
73
|
+
def text_helper
|
|
74
|
+
@@truncate ||= begin
|
|
75
|
+
o = Object.new
|
|
76
|
+
o.extend ActionView::Helpers::TextHelper
|
|
77
|
+
o
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def search_implementation(args)
|
|
82
|
+
url = query_url(args)
|
|
83
|
+
|
|
84
|
+
results = BentoSearch::Results.new
|
|
85
|
+
xml, response, exception = nil, nil, nil
|
|
86
|
+
|
|
87
|
+
begin
|
|
88
|
+
response = http_client.get(url)
|
|
89
|
+
xml = Nokogiri::XML(response.body)
|
|
90
|
+
rescue TimeoutError, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError => e
|
|
91
|
+
exception = e
|
|
92
|
+
end
|
|
93
|
+
# error handle
|
|
94
|
+
if ( response.nil? ||
|
|
95
|
+
xml.nil? ||
|
|
96
|
+
exception ||
|
|
97
|
+
(! HTTP::Status.successful? response.status) ||
|
|
98
|
+
(fault = xml.at_xpath("./Fault")))
|
|
99
|
+
|
|
100
|
+
results.error ||= {}
|
|
101
|
+
results.error[:exception] = exception if exception
|
|
102
|
+
results.error[:status] = response.status if response
|
|
103
|
+
|
|
104
|
+
if fault
|
|
105
|
+
results.error[:error_info] = text_if_present fault.at_xpath("./Message")
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
return results
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
# the namespaces they provide are weird and don't help and sometimes
|
|
114
|
+
# not clearly even legal. Remove em!
|
|
115
|
+
xml.remove_namespaces!
|
|
116
|
+
|
|
117
|
+
results.total_items = xml.at_xpath("./searchResponse/Hits").text.to_i
|
|
118
|
+
|
|
119
|
+
xml.xpath("./searchResponse/SearchResults/records/rec").each do |xml_rec|
|
|
120
|
+
results << item_from_xml( xml_rec )
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
return results
|
|
124
|
+
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
# Pass in a nokogiri node, return node.text, or nil if
|
|
129
|
+
# arg was nil or node.text was blank?
|
|
130
|
+
def text_if_present(node)
|
|
131
|
+
if node.nil? || node.text.blank?
|
|
132
|
+
nil
|
|
133
|
+
else
|
|
134
|
+
node.text
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# Figure out proper controlled format for an ebsco item.
|
|
139
|
+
# EBSCOHost (not sure about EDS) publication/document type
|
|
140
|
+
# are totally unusable non-normalized vocabulary for controlled
|
|
141
|
+
# types, we'll try to guess from other metadata features.
|
|
142
|
+
def sniff_format(xml_node)
|
|
143
|
+
return nil if xml_node.nil?
|
|
144
|
+
|
|
145
|
+
if xml_node.at_xpath("./bkinfo/*")
|
|
146
|
+
"Book"
|
|
147
|
+
elsif xml_node.at_xpath("./dissinfo/*")
|
|
148
|
+
:dissertation
|
|
149
|
+
elsif xml_node.at_xpath("./jinfo/*") && xml_node.at_xpath("./artinfo/*")
|
|
150
|
+
"Article"
|
|
151
|
+
elsif xml_node.at_xpath("./jinfo/*")
|
|
152
|
+
:serial
|
|
153
|
+
else
|
|
154
|
+
nil
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# Figure out uncontrolled literal string format to show to users.
|
|
159
|
+
# We're going to try combining Ebsco Publication Type and Document Type,
|
|
160
|
+
# when both are present. Then a few hard-coded special transformations.
|
|
161
|
+
def sniff_format_str(xml_node)
|
|
162
|
+
pubtype = text_if_present( xml_node.at_xpath("./artinfo/pubtype") )
|
|
163
|
+
doctype = text_if_present( xml_node.at_xpath("./artinfo/doctype") )
|
|
164
|
+
|
|
165
|
+
components = []
|
|
166
|
+
components.push pubtype
|
|
167
|
+
components.push doctype unless doctype == pubtype
|
|
168
|
+
|
|
169
|
+
components.compact!
|
|
170
|
+
|
|
171
|
+
components = components.collect {|a| a.titlecase if a}
|
|
172
|
+
components.uniq! # no need to have the same thing twice
|
|
173
|
+
|
|
174
|
+
# some hard-coded cases for better user-displayable string
|
|
175
|
+
if components.first == "Academic Journal" && components.last == "Article"
|
|
176
|
+
return "Journal Article"
|
|
177
|
+
elsif components.first == "Periodical" && components.length > 1
|
|
178
|
+
return components.last
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
return components.join(": ")
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
# pass in <rec> nokogiri, will determine best link
|
|
187
|
+
def get_link(xml)
|
|
188
|
+
text_if_present(xml.at_xpath("./pdfLink")) || text_if_present(xml.at_xpath("./plink") )
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
# it's unclear if ebsco API actually allows escaping of special chars,
|
|
193
|
+
# or what the special chars are. But we know parens are special, can't
|
|
194
|
+
# escape em, we'll just remove em (should not effect search).
|
|
195
|
+
def ebsco_query_escape(txt)
|
|
196
|
+
txt.gsub(/[)(]/, ' ')
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
# Actually turn the user's query into an EBSCO "AND" boolean query,
|
|
200
|
+
# seems only way to get decent results where terms can match cross-fields
|
|
201
|
+
# at the moment, for EIT. We'll see for EDS.
|
|
202
|
+
def ebsco_query_prepare(txt)
|
|
203
|
+
# use string split with regex cleverly to split into space
|
|
204
|
+
# seperated terms and phrases, keeping phrases as unit.
|
|
205
|
+
terms = txt.split %r{[[:space:]]+|("[^"]+")}
|
|
206
|
+
|
|
207
|
+
# Remove parens in non-phrase-quoted terms
|
|
208
|
+
terms = terms.collect do |t|
|
|
209
|
+
(t =~ /^\".*\"$/) ? t : ebsco_query_escape(t)
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
# Remove boolean operators if they are bare not in a phrase, they'll
|
|
213
|
+
# make things weird. In phrase quotes they are okay.
|
|
214
|
+
# Remove empty strings. Remove terms that are solely punctuation
|
|
215
|
+
# without any letters.
|
|
216
|
+
terms.delete_if do |term|
|
|
217
|
+
(
|
|
218
|
+
term.blank? ||
|
|
219
|
+
["AND", "OR", "NOT"].include?(term) ||
|
|
220
|
+
term =~ /\A[^[[:alnum:]]]+\Z/
|
|
221
|
+
)
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
terms.join(" AND ")
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
def query_url(args)
|
|
228
|
+
|
|
229
|
+
url =
|
|
230
|
+
"#{configuration.base_url}/Search?prof=#{configuration.profile_id}&pwd=#{configuration.profile_password}"
|
|
231
|
+
|
|
232
|
+
query = ebsco_query_prepare args[:query]
|
|
233
|
+
|
|
234
|
+
# wrap in (FI $query) if fielded search
|
|
235
|
+
if args[:search_field]
|
|
236
|
+
query = "(#{args[:search_field]} #{query})"
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
url += "&query=#{CGI.escape query}"
|
|
240
|
+
|
|
241
|
+
# startrec is 1-based for ebsco, not 0-based like for us.
|
|
242
|
+
url += "&startrec=#{args[:start] + 1}" if args[:start]
|
|
243
|
+
url += "&numrec=#{args[:per_page]}" if args[:per_page]
|
|
244
|
+
|
|
245
|
+
# Make relevance our default sort, rather than EBSCO's date.
|
|
246
|
+
args[:sort] ||= "relevance"
|
|
247
|
+
url += "&sort=#{ sort_definitions[args[:sort]][:implementation]}"
|
|
248
|
+
|
|
249
|
+
# Contrary to docs, don't pass these comma-seperated, pass em in seperate
|
|
250
|
+
# query params.
|
|
251
|
+
configuration.databases.each do |db|
|
|
252
|
+
url += "&db=#{db}"
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
return url
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
# pass in a nokogiri representing an EBSCO <rec> result,
|
|
259
|
+
# we'll turn it into a BentoSearch::ResultItem.
|
|
260
|
+
def item_from_xml(xml_rec)
|
|
261
|
+
info = xml_rec.at_xpath("./header/controlInfo")
|
|
262
|
+
|
|
263
|
+
item = BentoSearch::ResultItem.new
|
|
264
|
+
|
|
265
|
+
item.link = get_link(xml_rec)
|
|
266
|
+
|
|
267
|
+
item.issn = text_if_present info.at_xpath("./jinfo/issn")
|
|
268
|
+
item.journal_title = text_if_present(info.at_xpath("./jinfo/jtl"))
|
|
269
|
+
item.publisher = text_if_present info.at_xpath("./pubinfo/pub")
|
|
270
|
+
# Might have multiple ISBN's in record, just take first for now
|
|
271
|
+
item.isbn = text_if_present info.at_xpath("./bkinfo/isbn")
|
|
272
|
+
|
|
273
|
+
item.year = text_if_present info.at_xpath("./pubinfo/dt/@year")
|
|
274
|
+
item.volume = text_if_present info.at_xpath("./pubinfo/vid")
|
|
275
|
+
item.issue = text_if_present info.at_xpath("./pubinfo/iid")
|
|
276
|
+
|
|
277
|
+
# EBSCO sometimes has crazy long titles, truncate em.
|
|
278
|
+
item.title = text_helper.truncate( text_if_present( info.at_xpath("./artinfo/tig/atl") ), :length => 200)
|
|
279
|
+
item.start_page = text_if_present info.at_xpath("./artinfo/ppf")
|
|
280
|
+
|
|
281
|
+
item.doi = text_if_present info.at_xpath("./artinfo/ui[@type='doi']")
|
|
282
|
+
|
|
283
|
+
item.abstract = text_if_present info.at_xpath("./artinfo/ab")
|
|
284
|
+
# EBSCO abstracts have an annoying habit of beginning with "Abstract:"
|
|
285
|
+
if item.abstract
|
|
286
|
+
item.abstract.gsub!(/^Abstract\: /, "")
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
# authors, only get full display name from EBSCO.
|
|
290
|
+
info.xpath("./artinfo/aug/au").each do |author|
|
|
291
|
+
a = BentoSearch::Author.new(:display => author.text)
|
|
292
|
+
item.authors << a
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
item.format = sniff_format info
|
|
297
|
+
item.format_str = sniff_format_str info
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
return item
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
# This method is not used for normal searching, but can be used by
|
|
304
|
+
# other code to retrieve the results of the EBSCO API Info command,
|
|
305
|
+
# using connection details configured in this engine. The Info command
|
|
306
|
+
# can tell you what databases your account is authorized to see.
|
|
307
|
+
# Returns the complete Nokogiri response, but WITH NAMESPACES REMOVED
|
|
308
|
+
def get_info
|
|
309
|
+
url =
|
|
310
|
+
"#{configuration.base_url}/Info?prof=#{configuration.profile_id}&pwd=#{configuration.profile_password}"
|
|
311
|
+
|
|
312
|
+
noko = Nokogiri::XML( http_client.get( url ).body )
|
|
313
|
+
|
|
314
|
+
noko.remove_namespaces!
|
|
315
|
+
|
|
316
|
+
return noko
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
# David Walker says pretty much only relevance and date are realiable
|
|
320
|
+
# in EBSCOhost cross-search.
|
|
321
|
+
def sort_definitions
|
|
322
|
+
{
|
|
323
|
+
"relevance" => {:implementation => "relevance"},
|
|
324
|
+
"date_desc" => {:implementation => "date"}
|
|
325
|
+
}
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
def search_field_definitions
|
|
329
|
+
{
|
|
330
|
+
"AU" => {:semantic => :author},
|
|
331
|
+
"TI" => {:semantic => :title},
|
|
332
|
+
"SU" => {:semantic => :subject},
|
|
333
|
+
"IS" => {:semantic => :issn},
|
|
334
|
+
"IB" => {:semantic => :isbn}
|
|
335
|
+
}
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
def max_per_page
|
|
339
|
+
# Actually only '50' if you ask for 'full' records, but I don't think
|
|
340
|
+
# we need to do that ever, that's actually getting fulltext back!
|
|
341
|
+
200
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
def self.required_configuration
|
|
345
|
+
["profile_id", "profile_password"]
|
|
346
|
+
end
|
|
347
|
+
|
|
348
|
+
def self.default_configuration
|
|
349
|
+
{
|
|
350
|
+
# /Search
|
|
351
|
+
:base_url => "http://eit.ebscohost.com/Services/SearchService.asmx",
|
|
352
|
+
:databases => []
|
|
353
|
+
}
|
|
354
|
+
end
|
|
355
|
+
|
|
356
|
+
end
|
|
@@ -0,0 +1,557 @@
|
|
|
1
|
+
# encoding: UTF-8
|
|
2
|
+
|
|
3
|
+
require 'nokogiri'
|
|
4
|
+
require 'httpclient'
|
|
5
|
+
require 'multi_json'
|
|
6
|
+
require 'http_client_patch/include_client'
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
#
|
|
10
|
+
# For EBSCO Discovery Service. You will need a license to use.
|
|
11
|
+
#
|
|
12
|
+
# == Required Configuration
|
|
13
|
+
#
|
|
14
|
+
# user_id, password: As given be EBSCO for access to EDS API (may be an admin account in ebscoadmin? Not sure).
|
|
15
|
+
# profile: As given by EBSCO, might be "edsapi"?
|
|
16
|
+
#
|
|
17
|
+
# == Highlighting
|
|
18
|
+
#
|
|
19
|
+
# EDS has a query-in-context highlighting feature. It is used by defualt, set
|
|
20
|
+
# config 'highlighting' to false to disable.
|
|
21
|
+
# If turned on, you may get <b class="bento_search_highlight"> tags
|
|
22
|
+
# in title and abstract output if it's on, marked html_safe.
|
|
23
|
+
#
|
|
24
|
+
# If highlighting is on, since the abstract will be marked html safe, the
|
|
25
|
+
# view layer won't be able to safely truncate it. In fact, it's very hard
|
|
26
|
+
# to do here too, but we do it anyway, by default to approx configuration
|
|
27
|
+
# truncate_highlighted num of chars (default 280). Set to nil if you don't
|
|
28
|
+
# want this.
|
|
29
|
+
#
|
|
30
|
+
# == Linking
|
|
31
|
+
#
|
|
32
|
+
# The link to record in EBSCO interface delivered as "PLink" will be listed
|
|
33
|
+
# as record main link.
|
|
34
|
+
#
|
|
35
|
+
# Any links listed under <CustomLinks> will be listed as other_links, using
|
|
36
|
+
# configured name provided by EBSCO for CustomLink.
|
|
37
|
+
#
|
|
38
|
+
# EDS Response does not have sufficient metadata for us to generate an OpenURL
|
|
39
|
+
# ourselves. However, in our testing, the first/only CustomLink was an
|
|
40
|
+
# an OpenURL. If configuration.assume_first_custom_link_openurl is
|
|
41
|
+
# true (as is default), it will be used to create an OpenURL link. However, in
|
|
42
|
+
# our testing, many records don't have this at all. **Note** Ask EBSCO support
|
|
43
|
+
# to configure your profile so OpenURLs are ALWAYS included for all records, not
|
|
44
|
+
# just records with no EBSCO fulltext, to ensure bento_search can get the
|
|
45
|
+
# openurl.
|
|
46
|
+
#
|
|
47
|
+
# As always, you can customize links and other_links with Item Decorators.
|
|
48
|
+
#
|
|
49
|
+
# == Technical Notes and Difficulties
|
|
50
|
+
#
|
|
51
|
+
# This API is enormously difficult to work with. Also the response is very odd
|
|
52
|
+
# to deal with and missing some key elements. We quite possibly got something
|
|
53
|
+
# wrong or non-optimal in this implementation, but we did our best.
|
|
54
|
+
#
|
|
55
|
+
# Auth issues may make this slow -- you need to spend a (not too speedy) HTTP
|
|
56
|
+
# request making a session for every new end-user -- as we have no way to keep
|
|
57
|
+
# track of end-users, we do it on every request in this implementation.
|
|
58
|
+
#
|
|
59
|
+
# Responses don't include much metadata -- we don't actually have journal title,
|
|
60
|
+
# volume, issue, etc. We probably _could_ parse it out of the OpenURL that's
|
|
61
|
+
# there depending on your profile configuration, but we're not right now.
|
|
62
|
+
# Instead we're using the chunk of user-displayable citation/reference it does
|
|
63
|
+
# give us (which is very difficult to parse into something usable already),
|
|
64
|
+
# and a custom Decorator to display that instead of normalized citation
|
|
65
|
+
# made from individual elements.
|
|
66
|
+
#
|
|
67
|
+
# EBSCO says they plan to improve some of these issues in a September 2012 release.
|
|
68
|
+
#
|
|
69
|
+
# Title and abstract data seems to be HTML with tags and character entities and
|
|
70
|
+
# escaped special chars. We're trusting it and passing it on as html_safe.
|
|
71
|
+
#
|
|
72
|
+
# Paging can only happen on even pages, with 'page' rather than 'start'. But
|
|
73
|
+
# you can pass in 'start' to bento_search, it'll be converted to closest page.
|
|
74
|
+
#
|
|
75
|
+
# == Authenticated Users
|
|
76
|
+
#
|
|
77
|
+
# EDS allows searches by unauthenticated users, but the results come back with
|
|
78
|
+
# weird blank hits. In such a case, the BentoSearch adapter will return
|
|
79
|
+
# records with virtually no metadata, but a title e
|
|
80
|
+
# (I18n at bento_search.eds.record_not_available ). Also no abstracts
|
|
81
|
+
# are available from unauth search.
|
|
82
|
+
#
|
|
83
|
+
# By default the engine will search as 'guest' unauth user. But config
|
|
84
|
+
# 'auth' key to true to force all searches to auth (if you are protecting your
|
|
85
|
+
# app) or pass :auth => true as param into #search method.
|
|
86
|
+
#
|
|
87
|
+
# == EDS docs:
|
|
88
|
+
#
|
|
89
|
+
# * Console App to demo requests: https://eds-api.ebscohost.com/Console
|
|
90
|
+
# * EDS Wiki: http://edswiki.ebscohost.com/EDS_API_Documentation
|
|
91
|
+
# * You'll need to request an account to the EDS wiki, see: http://support.ebsco.com/knowledge_base/detail.php?id=5990
|
|
92
|
+
#
|
|
93
|
+
class BentoSearch::EdsEngine
|
|
94
|
+
include BentoSearch::SearchEngine
|
|
95
|
+
|
|
96
|
+
extend HTTPClientPatch::IncludeClient
|
|
97
|
+
include_http_client
|
|
98
|
+
|
|
99
|
+
AuthHeader = "x-authenticationToken"
|
|
100
|
+
SessionTokenHeader = "x-sessionToken"
|
|
101
|
+
|
|
102
|
+
@@remembered_auth = nil
|
|
103
|
+
@@remembered_auth_lock = Mutex.new
|
|
104
|
+
# Class variable to save current known good auth
|
|
105
|
+
# uses a mutex to be threadsafe. sigh.
|
|
106
|
+
def self.remembered_auth
|
|
107
|
+
@@remembered_auth_lock.synchronize do
|
|
108
|
+
@@remembered_auth
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
# Set class variable with current known good auth.
|
|
112
|
+
# uses a mutex to be threadsafe.
|
|
113
|
+
def self.remembered_auth=(token)
|
|
114
|
+
@@remembered_auth_lock.synchronize do
|
|
115
|
+
@@remembered_auth = token
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# an object that includes some Rails helper modules for
|
|
120
|
+
# text handling.
|
|
121
|
+
def helper
|
|
122
|
+
unless @helper
|
|
123
|
+
@helper = Object.new
|
|
124
|
+
@helper.extend ActionView::Helpers::TextHelper # for truncate
|
|
125
|
+
@helper.extend ActionView::Helpers::OutputSafetyHelper # for safe_join
|
|
126
|
+
end
|
|
127
|
+
return @helper
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def self.required_configuration
|
|
132
|
+
%w{user_id password profile}
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# From config or args, args over-ride config
|
|
136
|
+
def authenticated_end_user?(args)
|
|
137
|
+
config = configuration.auth ? true : false
|
|
138
|
+
arg = args[:auth]
|
|
139
|
+
if ! arg.nil?
|
|
140
|
+
arg ? true : false
|
|
141
|
+
elsif ! config.nil?
|
|
142
|
+
config ? true : false
|
|
143
|
+
else
|
|
144
|
+
false
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def construct_search_url(args)
|
|
149
|
+
query = "AND,"
|
|
150
|
+
if args[:search_field]
|
|
151
|
+
query += "#{args[:search_field]}:"
|
|
152
|
+
end
|
|
153
|
+
# Can't have any commas in query, it turns out, although
|
|
154
|
+
# this is not documented.
|
|
155
|
+
query += args[:query].gsub("/\,/", "")
|
|
156
|
+
|
|
157
|
+
url = "#{configuration.base_url}search?view=detailed&query=#{CGI.escape query}"
|
|
158
|
+
|
|
159
|
+
url += "&searchmode=#{CGI.escape configuration.search_mode}"
|
|
160
|
+
|
|
161
|
+
url += "&highlight=#{configuration.highlighting ? 'y' : 'n' }"
|
|
162
|
+
|
|
163
|
+
if args[:per_page]
|
|
164
|
+
url += "&resultsperpage=#{args[:per_page]}"
|
|
165
|
+
end
|
|
166
|
+
if args[:page]
|
|
167
|
+
url += "&pagenumber=#{args[:page]}"
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
if args[:sort]
|
|
171
|
+
if (defn = self.sort_definitions[args[:sort]]) &&
|
|
172
|
+
(value = defn[:implementation] )
|
|
173
|
+
url += "&sort=#{CGI.escape value}"
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
return url
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def search_implementation(args)
|
|
184
|
+
results = BentoSearch::Results.new
|
|
185
|
+
|
|
186
|
+
end_user_auth = authenticated_end_user? args
|
|
187
|
+
|
|
188
|
+
begin
|
|
189
|
+
with_session(end_user_auth) do |session_token|
|
|
190
|
+
|
|
191
|
+
url = construct_search_url(args)
|
|
192
|
+
|
|
193
|
+
response = get_with_auth(url, session_token)
|
|
194
|
+
|
|
195
|
+
results = BentoSearch::Results.new
|
|
196
|
+
|
|
197
|
+
if (hits_node = at_xpath_text(response, "./SearchResponseMessageGet/SearchResult/Statistics/TotalHits"))
|
|
198
|
+
results.total_items = hits_node.to_i
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
response.xpath("./SearchResponseMessageGet/SearchResult/Data/Records/Record").each do |record_xml|
|
|
202
|
+
item = BentoSearch::ResultItem.new
|
|
203
|
+
|
|
204
|
+
item.title = prepare_eds_payload( element_by_group(record_xml, "Ti"), true )
|
|
205
|
+
if item.title.nil? && ! end_user_auth
|
|
206
|
+
item.title = I18n.translate("bento_search.eds.record_not_available")
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
item.abstract = prepare_eds_payload( element_by_group(record_xml, "Ab"), true )
|
|
210
|
+
|
|
211
|
+
# Believe it or not, the authors are encoded as an escaped
|
|
212
|
+
# XML-ish payload, that we need to parse again and get the
|
|
213
|
+
# actual authors out of. WTF. Thanks for handling fragments
|
|
214
|
+
# nokogiri.
|
|
215
|
+
author_mess = element_by_group(record_xml, "Au")
|
|
216
|
+
author_xml = Nokogiri::XML::fragment(author_mess)
|
|
217
|
+
author_xml.xpath(".//searchLink").each do |author_node|
|
|
218
|
+
item.authors << BentoSearch::Author.new(:display => author_node.text)
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
# PLink is main inward facing EBSCO link, put it as
|
|
223
|
+
# main link.
|
|
224
|
+
if direct_link = record_xml.at_xpath("./PLink")
|
|
225
|
+
item.link = direct_link.text
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
# Other links may be found in CustomLinks, it seems like usually
|
|
229
|
+
# there will be at least one, hopefully the first one is the OpenURL?
|
|
230
|
+
record_xml.xpath("./CustomLinks/CustomLink").each do |custom_link|
|
|
231
|
+
item.other_links << BentoSearch::Link.new(
|
|
232
|
+
:url => custom_link.at_xpath("./Url").text,
|
|
233
|
+
:label => custom_link.at_xpath("./Name").text
|
|
234
|
+
)
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
if (configuration.assume_first_custom_link_openurl &&
|
|
238
|
+
(first = record_xml.xpath "./CustomLinks/CustomLink" ) &&
|
|
239
|
+
(node = first.at_xpath "./Url" )
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
openurl = node.text
|
|
243
|
+
|
|
244
|
+
index = openurl.index('?')
|
|
245
|
+
item.openurl_kev_co = openurl.slice index..(openurl.length) if index
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
# Format.
|
|
249
|
+
item.format_str = at_xpath_text record_xml, "./Header/PubType"
|
|
250
|
+
# Can't find a list of possible PubTypes to see what's there to try
|
|
251
|
+
# and map to our internal controlled vocab. oh wells.
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
# We have a single blob of human-readable citation, that's also
|
|
256
|
+
# littered with XML-ish tags we need to deal with. We'll save
|
|
257
|
+
# it in a custom location, and use a custom Decorator to display
|
|
258
|
+
# it. Sorry it's way too hard for us to preserve <highlight>
|
|
259
|
+
# tags in this mess, they will be lost. Probably don't
|
|
260
|
+
# need highlighting in source anyhow.
|
|
261
|
+
citation_mess = element_by_group(record_xml, "Src")
|
|
262
|
+
citation_txt = Nokogiri::XML::fragment(citation_mess).text
|
|
263
|
+
# But strip off some "count of references" often on the end
|
|
264
|
+
# which are confusing and useless.
|
|
265
|
+
item.custom_data["citation_blob"] = citation_txt.gsub(/ref +\d+ +ref\.$/, '')
|
|
266
|
+
|
|
267
|
+
item.extend CitationMessDecorator
|
|
268
|
+
|
|
269
|
+
results << item
|
|
270
|
+
end
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
return results
|
|
274
|
+
rescue EdsCommException => e
|
|
275
|
+
results.error ||= {}
|
|
276
|
+
results.error[:exception] = e
|
|
277
|
+
results.error[:http_status] = e.http_status
|
|
278
|
+
results.error[:http_body] = e.http_body
|
|
279
|
+
return results
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
# Difficult to get individual elements out of an EDS XML <Record>
|
|
285
|
+
# response, requires weird xpath, so we do it for you.
|
|
286
|
+
# element_by_group(nokogiri_element, "Ti")
|
|
287
|
+
#
|
|
288
|
+
# Returns string or nil
|
|
289
|
+
def element_by_group(noko, group)
|
|
290
|
+
at_xpath_text(noko, "./Items/Item[child::Group[text()='#{group}']]/Data")
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
# Wraps calls to the EDS api with CreateSession and EndSession requests
|
|
294
|
+
# to EDS. Will pass sessionID in yield from block.
|
|
295
|
+
#
|
|
296
|
+
# Second optional arg is whether this is an authenticated user, else
|
|
297
|
+
# guest access will be used.
|
|
298
|
+
#
|
|
299
|
+
# with_session(true) do |session_token|
|
|
300
|
+
# # can make more requests using session_token,
|
|
301
|
+
# # EndSession will be called for you at end of block.
|
|
302
|
+
# end
|
|
303
|
+
def with_session(auth = false, &block)
|
|
304
|
+
auth_token = self.class.remembered_auth
|
|
305
|
+
if auth_token.nil?
|
|
306
|
+
auth_token = self.class.remembered_auth = get_auth_token
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
create_url = "#{configuration.base_url}createsession?profile=#{configuration.profile}&guest=#{auth ? 'n' : 'y'}"
|
|
311
|
+
response_xml = get_with_auth(create_url)
|
|
312
|
+
|
|
313
|
+
session_token = nil
|
|
314
|
+
unless response_xml && (session_token = at_xpath_text(response_xml, "//SessionToken"))
|
|
315
|
+
e = EdsCommException.new("Could not get SessionToken")
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
begin
|
|
319
|
+
block.yield(session_token)
|
|
320
|
+
ensure
|
|
321
|
+
if auth_token && session_token
|
|
322
|
+
end_url = "#{configuration.base_url}endsession?sessiontoken=#{CGI.escape session_token}"
|
|
323
|
+
response_xml = get_with_auth(end_url)
|
|
324
|
+
end
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
end
|
|
328
|
+
|
|
329
|
+
# if the xpath responds, return #text of it, else nil.
|
|
330
|
+
def at_xpath_text(noko, xpath)
|
|
331
|
+
node = noko.at_xpath(xpath)
|
|
332
|
+
|
|
333
|
+
if node.nil?
|
|
334
|
+
return node
|
|
335
|
+
else
|
|
336
|
+
return node.text
|
|
337
|
+
end
|
|
338
|
+
end
|
|
339
|
+
|
|
340
|
+
# If EDS has put highlighting tags
|
|
341
|
+
# in a field, we need to HTML escape the literal values,
|
|
342
|
+
# while still using the highlighting tokens to put
|
|
343
|
+
# HTML tags around highlighted terms.
|
|
344
|
+
#
|
|
345
|
+
# Second param, if to assume EDS literals are safe HTML, as they
|
|
346
|
+
# seem to be.
|
|
347
|
+
def prepare_eds_payload(str, html_safe = false)
|
|
348
|
+
return str if str.blank?
|
|
349
|
+
|
|
350
|
+
unless configuration.highlighting
|
|
351
|
+
str = str.html_safe if html_safe
|
|
352
|
+
return str
|
|
353
|
+
end
|
|
354
|
+
|
|
355
|
+
parts =
|
|
356
|
+
str.split(%r{(</?highlight>)}).collect do |substr|
|
|
357
|
+
case substr
|
|
358
|
+
when "<highlight>" then "<b class='bento_search_highlight'>".html_safe
|
|
359
|
+
when "</highlight>" then "</b>".html_safe
|
|
360
|
+
# Yes, EDS gives us HTML in the literals, we're choosing to trust it.
|
|
361
|
+
else substr.html_safe
|
|
362
|
+
end
|
|
363
|
+
end
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
# Crazy ass method to truncate without getting in the middle of our
|
|
370
|
+
# html tags. This is wacky hacky, yeah.
|
|
371
|
+
if configuration.truncate_highlighted
|
|
372
|
+
remainingLength = configuration.truncate_highlighted
|
|
373
|
+
in_tag = false
|
|
374
|
+
elipses_added = false
|
|
375
|
+
|
|
376
|
+
truncated_parts = []
|
|
377
|
+
parts.each do |substr|
|
|
378
|
+
if remainingLength <=0 && ! in_tag
|
|
379
|
+
truncated_parts << "..."
|
|
380
|
+
break
|
|
381
|
+
end
|
|
382
|
+
|
|
383
|
+
if substr =~ /^<b.*\>$/
|
|
384
|
+
truncated_parts << substr
|
|
385
|
+
in_tag = true
|
|
386
|
+
elsif substr == "</b>"
|
|
387
|
+
truncated_parts << substr
|
|
388
|
+
in_tag = false
|
|
389
|
+
elsif ((remainingLength - substr.length) > 0) || in_tag
|
|
390
|
+
truncated_parts << substr
|
|
391
|
+
else
|
|
392
|
+
truncated_parts << helper.truncate(substr, :length => remainingLength, :separator => ' ')
|
|
393
|
+
break
|
|
394
|
+
end
|
|
395
|
+
|
|
396
|
+
remainingLength = remainingLength - substr.length
|
|
397
|
+
end
|
|
398
|
+
|
|
399
|
+
parts = truncated_parts
|
|
400
|
+
end
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
return helper.safe_join(parts, '')
|
|
404
|
+
end
|
|
405
|
+
|
|
406
|
+
# Give it a url pointing at EDS API.
|
|
407
|
+
# Second arg must be a session_token if EDS request requires one.
|
|
408
|
+
# It will
|
|
409
|
+
# * Make a GET request
|
|
410
|
+
# * with memo-ized auth token added to headers
|
|
411
|
+
# * for XML, with all namespaces removed!
|
|
412
|
+
# * Parse JSON into a hash and return hash
|
|
413
|
+
# * Try ONCE more to get if EBSCO says bad auth token
|
|
414
|
+
# * Raise an EdsCommException if can't auth after second try,
|
|
415
|
+
# or other error message, or JSON can't be parsed.
|
|
416
|
+
def get_with_auth(url, session_token = nil)
|
|
417
|
+
auth_token = self.class.remembered_auth
|
|
418
|
+
unless auth_token
|
|
419
|
+
auth_token = self.class.remembered_auth = get_auth_token
|
|
420
|
+
end
|
|
421
|
+
|
|
422
|
+
response = nil
|
|
423
|
+
response_xml = nil
|
|
424
|
+
caught_exception = nil
|
|
425
|
+
|
|
426
|
+
begin
|
|
427
|
+
headers = {AuthHeader => auth_token, 'Accept' => 'application/xml'}
|
|
428
|
+
headers[SessionTokenHeader] = session_token if session_token
|
|
429
|
+
|
|
430
|
+
s_time = Time.now
|
|
431
|
+
response = http_client.get(url, nil, headers)
|
|
432
|
+
Rails.logger.debug("EDS timing GET: #{Time.now - s_time}:#{url}")
|
|
433
|
+
|
|
434
|
+
response_xml = Nokogiri::XML(response.body)
|
|
435
|
+
response_xml.remove_namespaces!
|
|
436
|
+
|
|
437
|
+
if (at_xpath_text(response_xml, "//ErrorNumber") == "104") || (at_xpath_text(response_xml, "//ErrorDescription") == "Auth Token Invalid")
|
|
438
|
+
# bad auth, try again just ONCE
|
|
439
|
+
Rails.logger.debug("EDS auth failed, getting auth again")
|
|
440
|
+
|
|
441
|
+
headers[AuthHeader] = self.class.remembered_auth = get_auth_token
|
|
442
|
+
response = http_client.get(url, nil, headers)
|
|
443
|
+
response_xml = Nokogiri::XML(response.body)
|
|
444
|
+
response_xml.remove_namespaces!
|
|
445
|
+
end
|
|
446
|
+
rescue TimeoutError, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError => e
|
|
447
|
+
caught_exception = e
|
|
448
|
+
end
|
|
449
|
+
|
|
450
|
+
if response.nil? || response_xml.nil? || caught_exception || (! HTTP::Status.successful? response.status)
|
|
451
|
+
exception = EdsCommException.new("Error fetching URL: #{caught_exception.message if caught_exception} : #{url}")
|
|
452
|
+
if response
|
|
453
|
+
exception.http_body = response.body
|
|
454
|
+
exception.http_status = response.status
|
|
455
|
+
end
|
|
456
|
+
raise exception
|
|
457
|
+
end
|
|
458
|
+
|
|
459
|
+
return response_xml
|
|
460
|
+
end
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
# Has to make an HTTP request to get EBSCO's auth token.
|
|
464
|
+
# returns the auth token. We aren't bothering to keep
|
|
465
|
+
# track of the expiration ourselves, can't neccesarily trust
|
|
466
|
+
# it anyway.
|
|
467
|
+
#
|
|
468
|
+
# Raises an EdsCommException on error.
|
|
469
|
+
def get_auth_token
|
|
470
|
+
# Can't send params as form-encoded, actually need to send a JSON or XML
|
|
471
|
+
# body, argh.
|
|
472
|
+
|
|
473
|
+
body = <<-EOS
|
|
474
|
+
{
|
|
475
|
+
"UserId":"#{configuration.user_id}",
|
|
476
|
+
"Password":"#{configuration.password}"
|
|
477
|
+
}
|
|
478
|
+
EOS
|
|
479
|
+
|
|
480
|
+
s_time = Time.now
|
|
481
|
+
response = http_client.post(configuration.auth_url, body, {'Accept' => "application/json", "Content-type" => "application/json"})
|
|
482
|
+
Rails.logger.debug("EDS timing AUTH: #{Time.now - s_time}s")
|
|
483
|
+
|
|
484
|
+
unless HTTP::Status.successful? response.status
|
|
485
|
+
raise EdsCommException.new("Could not get auth", response.status, response.body)
|
|
486
|
+
end
|
|
487
|
+
|
|
488
|
+
response_hash = nil
|
|
489
|
+
begin
|
|
490
|
+
response_hash = MultiJson.load response.body
|
|
491
|
+
rescue MultiJson::DecodeError
|
|
492
|
+
end
|
|
493
|
+
|
|
494
|
+
unless response_hash.kind_of?(Hash) && response_hash.has_key?("AuthToken")
|
|
495
|
+
raise EdsCommException.new("AuthToken not found in auth response", response.status, response.body)
|
|
496
|
+
end
|
|
497
|
+
|
|
498
|
+
return response_hash["AuthToken"]
|
|
499
|
+
end
|
|
500
|
+
|
|
501
|
+
def self.default_configuration
|
|
502
|
+
{
|
|
503
|
+
:auth_url => 'https://eds-api.ebscohost.com/authservice/rest/uidauth',
|
|
504
|
+
:base_url => "http://eds-api.ebscohost.com/edsapi/rest/",
|
|
505
|
+
:highlighting => true,
|
|
506
|
+
:truncate_highlighted => 280,
|
|
507
|
+
:assume_first_custom_link_openurl => true,
|
|
508
|
+
:search_mode => 'all' # any | bool | all | smart ; http://support.epnet.com/knowledge_base/detail.php?topic=996&id=1288&page=1
|
|
509
|
+
}
|
|
510
|
+
end
|
|
511
|
+
|
|
512
|
+
def sort_definitions
|
|
513
|
+
{
|
|
514
|
+
"date_desc" => {:implementation => "date"},
|
|
515
|
+
"relevance" => {:implementation => "relevance" }
|
|
516
|
+
# "date_asc" => {:implementaiton => "date2"}
|
|
517
|
+
}
|
|
518
|
+
end
|
|
519
|
+
|
|
520
|
+
def search_field_definitions
|
|
521
|
+
{
|
|
522
|
+
"TX" => {:semantic => :all},
|
|
523
|
+
"AU" => {:semantic => :author},
|
|
524
|
+
"TI" => {:semantic => :title},
|
|
525
|
+
"SU" => {:semantic => :subject},
|
|
526
|
+
"SO" => {}, # source, journal name
|
|
527
|
+
"AB" => {}, # abstract
|
|
528
|
+
"IS" => {:semantic => :issn},
|
|
529
|
+
"IB" => {:semantic => :isbn},
|
|
530
|
+
}
|
|
531
|
+
end
|
|
532
|
+
|
|
533
|
+
# an exception talking to EDS api.
|
|
534
|
+
# there's a short reason in #message, but also
|
|
535
|
+
# possibly an http_status and http_body copied
|
|
536
|
+
# from error EDS response.
|
|
537
|
+
class EdsCommException < Exception
|
|
538
|
+
attr_accessor :http_status, :http_body
|
|
539
|
+
def initialize(message, status = nil, body = nil)
|
|
540
|
+
super(message)
|
|
541
|
+
self.http_status = status
|
|
542
|
+
self.http_body = body
|
|
543
|
+
end
|
|
544
|
+
end
|
|
545
|
+
|
|
546
|
+
|
|
547
|
+
# A built-in decorator alwasy applied, that over-rides
|
|
548
|
+
# the ResultItem#published_in display method to use our mess blob
|
|
549
|
+
# of human readable citation, since we don't have individual elements
|
|
550
|
+
# to create it from in a normalized way.
|
|
551
|
+
module CitationMessDecorator
|
|
552
|
+
def published_in
|
|
553
|
+
custom_data["citation_blob"]
|
|
554
|
+
end
|
|
555
|
+
end
|
|
556
|
+
|
|
557
|
+
end
|