bento_search 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/MIT-LICENSE +20 -0
- data/README.md +299 -0
- data/Rakefile +40 -0
- data/app/assets/images/bento_search/large_loader.gif +0 -0
- data/app/assets/javascripts/bento_search.js +3 -0
- data/app/assets/javascripts/bento_search/ajax_load.js +22 -0
- data/app/assets/stylesheets/bento_search/bento.css +4 -0
- data/app/controllers/bento_search/bento_search_controller.rb +7 -0
- data/app/controllers/bento_search/search_controller.rb +72 -0
- data/app/helpers/bento_search_helper.rb +138 -0
- data/app/item_decorators/bento_search/only_premade_openurl.rb +16 -0
- data/app/item_decorators/bento_search/openurl_add_other_link.rb +35 -0
- data/app/item_decorators/bento_search/openurl_main_link.rb +30 -0
- data/app/models/bento_search/author.rb +25 -0
- data/app/models/bento_search/link.rb +30 -0
- data/app/models/bento_search/multi_searcher.rb +109 -0
- data/app/models/bento_search/openurl_creator.rb +128 -0
- data/app/models/bento_search/registrar.rb +70 -0
- data/app/models/bento_search/result_item.rb +203 -0
- data/app/models/bento_search/results.rb +54 -0
- data/app/models/bento_search/results/pagination.rb +67 -0
- data/app/models/bento_search/search_engine.rb +219 -0
- data/app/models/bento_search/search_engine/capabilities.rb +65 -0
- data/app/search_engines/bento_search/#Untitled-1# +11 -0
- data/app/search_engines/bento_search/ebsco_host_engine.rb +356 -0
- data/app/search_engines/bento_search/eds_engine.rb +557 -0
- data/app/search_engines/bento_search/google_books_engine.rb +184 -0
- data/app/search_engines/bento_search/primo_engine.rb +231 -0
- data/app/search_engines/bento_search/scopus_engine.rb +295 -0
- data/app/search_engines/bento_search/summon_engine.rb +398 -0
- data/app/search_engines/bento_search/xerxes_engine.rb +168 -0
- data/app/views/bento_search/_link.html.erb +4 -0
- data/app/views/bento_search/_search_error.html.erb +22 -0
- data/app/views/bento_search/_std_item.html.erb +39 -0
- data/app/views/bento_search/search/search.html.erb +1 -0
- data/config/locales/en.yml +25 -0
- data/lib/bento_search.rb +29 -0
- data/lib/bento_search/engine.rb +5 -0
- data/lib/bento_search/routes.rb +45 -0
- data/lib/bento_search/version.rb +3 -0
- data/lib/generators/bento_search/pull_ebsco_dbs_generator.rb +24 -0
- data/lib/generators/bento_search/templates/ebsco_global_var.erb +6 -0
- data/lib/http_client_patch/include_client.rb +86 -0
- data/lib/tasks/bento_search_tasks.rake +4 -0
- data/test/dummy/README.rdoc +261 -0
- data/test/dummy/Rakefile +7 -0
- data/test/dummy/app/assets/javascripts/application.js +15 -0
- data/test/dummy/app/assets/stylesheets/application.css +13 -0
- data/test/dummy/app/controllers/application_controller.rb +3 -0
- data/test/dummy/app/helpers/application_helper.rb +2 -0
- data/test/dummy/app/views/layouts/application.html.erb +14 -0
- data/test/dummy/config.ru +4 -0
- data/test/dummy/config/application.rb +56 -0
- data/test/dummy/config/boot.rb +10 -0
- data/test/dummy/config/database.yml +25 -0
- data/test/dummy/config/environment.rb +5 -0
- data/test/dummy/config/environments/development.rb +37 -0
- data/test/dummy/config/environments/production.rb +67 -0
- data/test/dummy/config/environments/test.rb +37 -0
- data/test/dummy/config/initializers/backtrace_silencers.rb +7 -0
- data/test/dummy/config/initializers/inflections.rb +15 -0
- data/test/dummy/config/initializers/mime_types.rb +5 -0
- data/test/dummy/config/initializers/secret_token.rb +7 -0
- data/test/dummy/config/initializers/session_store.rb +8 -0
- data/test/dummy/config/initializers/wrap_parameters.rb +14 -0
- data/test/dummy/config/locales/en.yml +5 -0
- data/test/dummy/config/routes.rb +6 -0
- data/test/dummy/db/test.sqlite3 +0 -0
- data/test/dummy/log/test.log +3100 -0
- data/test/dummy/public/404.html +26 -0
- data/test/dummy/public/422.html +26 -0
- data/test/dummy/public/500.html +25 -0
- data/test/dummy/public/favicon.ico +0 -0
- data/test/dummy/script/rails +6 -0
- data/test/functional/bento_search/search_controller_test.rb +81 -0
- data/test/helper/bento_search_helper_test.rb +125 -0
- data/test/integration/navigation_test.rb +10 -0
- data/test/support/mock_engine.rb +23 -0
- data/test/support/test_with_cassette.rb +38 -0
- data/test/test_helper.rb +52 -0
- data/test/unit/#vcr_test.rb# +68 -0
- data/test/unit/ebsco_host_engine_test.rb +134 -0
- data/test/unit/eds_engine_test.rb +105 -0
- data/test/unit/google_books_engine_test.rb +93 -0
- data/test/unit/item_decorators_test.rb +66 -0
- data/test/unit/multi_searcher_test.rb +49 -0
- data/test/unit/openurl_creator_test.rb +111 -0
- data/test/unit/pagination_test.rb +59 -0
- data/test/unit/primo_engine_test.rb +37 -0
- data/test/unit/register_engine_test.rb +50 -0
- data/test/unit/result_item_display_test.rb +39 -0
- data/test/unit/result_item_test.rb +36 -0
- data/test/unit/scopus_engine_test.rb +130 -0
- data/test/unit/search_engine_base_test.rb +178 -0
- data/test/unit/search_engine_test.rb +95 -0
- data/test/unit/summon_engine_test.rb +161 -0
- data/test/unit/xerxes_engine_test.rb +70 -0
- data/test/vcr_cassettes/ebscohost/error_bad_db.yml +45 -0
- data/test/vcr_cassettes/ebscohost/error_bad_password.yml +45 -0
- data/test/vcr_cassettes/ebscohost/get_info.yml +3626 -0
- data/test/vcr_cassettes/ebscohost/live_search.yml +45 -0
- data/test/vcr_cassettes/ebscohost/live_search_smoke_test.yml +1311 -0
- data/test/vcr_cassettes/eds/basic_search_smoke_test.yml +1811 -0
- data/test/vcr_cassettes/eds/get_auth_token.yml +75 -0
- data/test/vcr_cassettes/eds/get_auth_token_failure.yml +39 -0
- data/test/vcr_cassettes/eds/get_with_auth.yml +243 -0
- data/test/vcr_cassettes/eds/get_with_auth_recovers_from_bad_auth.yml +368 -0
- data/test/vcr_cassettes/gbs/error_condition.yml +40 -0
- data/test/vcr_cassettes/gbs/pagination.yml +702 -0
- data/test/vcr_cassettes/gbs/search.yml +340 -0
- data/test/vcr_cassettes/primo/search_smoke_test.yml +1112 -0
- data/test/vcr_cassettes/scopus/bad_api_key_should_return_error_response.yml +60 -0
- data/test/vcr_cassettes/scopus/escaped_chars.yml +187 -0
- data/test/vcr_cassettes/scopus/fielded_search.yml +176 -0
- data/test/vcr_cassettes/scopus/simple_search.yml +227 -0
- data/test/vcr_cassettes/scopus/zero_results_search.yml +67 -0
- data/test/vcr_cassettes/summon/bad_auth.yml +54 -0
- data/test/vcr_cassettes/summon/proper_tags_for_snippets.yml +216 -0
- data/test/vcr_cassettes/summon/search.yml +242 -0
- data/test/vcr_cassettes/xerxes/live_search.yml +2580 -0
- data/test/view/std_item_test.rb +98 -0
- metadata +421 -0
@@ -0,0 +1,184 @@
|
|
1
|
+
require 'httpclient'
|
2
|
+
require 'cgi'
|
3
|
+
require 'multi_json'
|
4
|
+
|
5
|
+
# not sure why we need to require the entire 'helpers'
|
6
|
+
# when all we want is sanitize_helper, but I think we do:
|
7
|
+
require 'action_view/helpers'
|
8
|
+
#require 'action_view/helpers/sanitize_helper'
|
9
|
+
|
10
|
+
require 'http_client_patch/include_client'
|
11
|
+
|
12
|
+
module BentoSearch
|
13
|
+
#
|
14
|
+
# https://developers.google.com/books/docs/v1/using
|
15
|
+
# https://developers.google.com/books/docs/v1/reference/volumes#resource
|
16
|
+
#
|
17
|
+
# Configuration :api_key STRONGLY recommended, or google will severely
|
18
|
+
# rate-limit you.
|
19
|
+
class GoogleBooksEngine
|
20
|
+
include BentoSearch::SearchEngine
|
21
|
+
include ActionView::Helpers::SanitizeHelper
|
22
|
+
|
23
|
+
extend HTTPClientPatch::IncludeClient
|
24
|
+
include_http_client # gives us a #http_client with persistent class-level
|
25
|
+
|
26
|
+
class_attribute :base_url
|
27
|
+
self.base_url = "https://www.googleapis.com/books/v1/"
|
28
|
+
|
29
|
+
|
30
|
+
def search_implementation(arguments)
|
31
|
+
query_url = args_to_search_url(arguments)
|
32
|
+
|
33
|
+
results = Results.new
|
34
|
+
|
35
|
+
begin
|
36
|
+
response = http_client.get(query_url )
|
37
|
+
json = MultiJson.load( response.body )
|
38
|
+
# Can't rescue everything, or we catch VCR errors, making
|
39
|
+
# things confusing.
|
40
|
+
rescue TimeoutError, HTTPClient::TimeoutError,
|
41
|
+
HTTPClient::ConfigurationError, HTTPClient::BadResponseError => e
|
42
|
+
results.error ||= {}
|
43
|
+
results.error[:exception] = e
|
44
|
+
end
|
45
|
+
|
46
|
+
# Trap json parse error, but also check for bad http
|
47
|
+
# status, or error reported in the json. In any of those cases
|
48
|
+
# return results obj with error status.
|
49
|
+
#
|
50
|
+
if ( response.nil? || json.nil? ||
|
51
|
+
(! HTTP::Status.successful? response.status) ||
|
52
|
+
(json && json["error"]))
|
53
|
+
|
54
|
+
results.error ||= {}
|
55
|
+
results.error[:status] = response.status if response
|
56
|
+
if json && json["error"] && json["error"]["errors"] && json["error"]["errors"].kind_of?(Array)
|
57
|
+
results.error[:message] = json["error"]["errors"].first.values.join(", ")
|
58
|
+
end
|
59
|
+
results.error[:error_info] = json["error"] if json && json.respond_to?("[]")
|
60
|
+
|
61
|
+
# escape early!
|
62
|
+
return results
|
63
|
+
end
|
64
|
+
|
65
|
+
|
66
|
+
results.total_items = json["totalItems"]
|
67
|
+
|
68
|
+
|
69
|
+
json["items"].each do |j_item|
|
70
|
+
j_item = j_item["volumeInfo"] if j_item["volumeInfo"]
|
71
|
+
|
72
|
+
item = ResultItem.new
|
73
|
+
results << item
|
74
|
+
|
75
|
+
item.title = j_item["title"]
|
76
|
+
item.subtitle = j_item["subtitle"]
|
77
|
+
item.publisher = j_item["publisher"]
|
78
|
+
item.link = j_item["canonicalVolumeLink"]
|
79
|
+
item.abstract = sanitize j_item["description"]
|
80
|
+
item.year = get_year j_item["publishedDate"]
|
81
|
+
item.format = if j_item["printType"] == "MAGAZINE"
|
82
|
+
:serial
|
83
|
+
else
|
84
|
+
"Book"
|
85
|
+
end
|
86
|
+
|
87
|
+
(j_item["authors"] || []).each do |author_name|
|
88
|
+
item.authors << Author.new(:display => author_name)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
|
93
|
+
return results
|
94
|
+
end
|
95
|
+
|
96
|
+
|
97
|
+
|
98
|
+
|
99
|
+
###########
|
100
|
+
# BentoBox::SearchEngine API
|
101
|
+
###########
|
102
|
+
|
103
|
+
def max_per_page
|
104
|
+
100
|
105
|
+
end
|
106
|
+
|
107
|
+
def search_field_definitions
|
108
|
+
{ "intitle" => {:semantic => :title},
|
109
|
+
"inauthor" => {:semantic => :author},
|
110
|
+
"inpublisher" => {:semantic => :publisher},
|
111
|
+
"subject" => {:semantic => :subject},
|
112
|
+
"isbn" => {:semantic => :isbn}
|
113
|
+
}
|
114
|
+
end
|
115
|
+
|
116
|
+
def sort_definitions
|
117
|
+
{
|
118
|
+
"relevance" => {:implementation => nil}, # default
|
119
|
+
"date_desc" => {:implementation => "newest"}
|
120
|
+
}
|
121
|
+
end
|
122
|
+
|
123
|
+
protected
|
124
|
+
|
125
|
+
|
126
|
+
#############
|
127
|
+
# Our own implementation code
|
128
|
+
##############
|
129
|
+
|
130
|
+
|
131
|
+
# takes a normalized #search arguments hash from SearchEngine
|
132
|
+
# turns it into a URL for Google API. Factored out to make testing
|
133
|
+
# possible.
|
134
|
+
def args_to_search_url(arguments)
|
135
|
+
query = if arguments[:search_field]
|
136
|
+
fielded_query(arguments[:query], arguments[:search_field])
|
137
|
+
else
|
138
|
+
arguments[:query]
|
139
|
+
end
|
140
|
+
|
141
|
+
query_url = base_url + "volumes?q=#{CGI.escape query}"
|
142
|
+
if configuration.api_key
|
143
|
+
query_url += "&key=#{configuration.api_key}"
|
144
|
+
end
|
145
|
+
|
146
|
+
if arguments[:per_page]
|
147
|
+
query_url += "&maxResults=#{arguments[:per_page]}"
|
148
|
+
end
|
149
|
+
if arguments[:start]
|
150
|
+
query_url += "&startIndex=#{arguments[:start]}"
|
151
|
+
end
|
152
|
+
|
153
|
+
if arguments[:sort] &&
|
154
|
+
(defn = sort_definitions[arguments[:sort]]) &&
|
155
|
+
(value = defn[:implementation])
|
156
|
+
query_url += "&sort=#{CGI.escape(value)}"
|
157
|
+
end
|
158
|
+
|
159
|
+
|
160
|
+
return query_url
|
161
|
+
end
|
162
|
+
|
163
|
+
|
164
|
+
# If they ask for a <one two> :intitle, we're
|
165
|
+
# actually gonna do like google's own form does,
|
166
|
+
# and change it to <intitle:one intitle:two>. Internal
|
167
|
+
# phrases will be respected.
|
168
|
+
def fielded_query(query, field)
|
169
|
+
tokens = query.split(%r{\s|("[^"]+")}).delete_if {|a| a.blank?}
|
170
|
+
return tokens.collect {|token| "#{field}:#{token}"}.join(" ")
|
171
|
+
end
|
172
|
+
|
173
|
+
|
174
|
+
def get_year(iso8601)
|
175
|
+
return nil if iso8601.blank?
|
176
|
+
|
177
|
+
if iso8601 =~ /^(\d{4})/
|
178
|
+
return $1.to_i
|
179
|
+
end
|
180
|
+
return nil
|
181
|
+
end
|
182
|
+
|
183
|
+
end
|
184
|
+
end
|
@@ -0,0 +1,231 @@
|
|
1
|
+
require 'cgi'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
require 'http_client_patch/include_client'
|
5
|
+
require 'httpclient'
|
6
|
+
|
7
|
+
# ExLibris Primo Central.
|
8
|
+
#
|
9
|
+
# written/tested with PrimoCentral aggregated index only, but probably
|
10
|
+
# should work with any Primo, may need some assumption tweaks.
|
11
|
+
#
|
12
|
+
# == Required Configuration
|
13
|
+
#
|
14
|
+
# [:host_port] your unique Primo's host/port combo, like "something.exlibrisgroup.com:1701".
|
15
|
+
# it's assumed we can talk to your primo at
|
16
|
+
# http://$host_port/PrimoWebServices/xservice/search/brief?
|
17
|
+
# [:institution] Primo requires an institution paramter.
|
18
|
+
# right now we have a hard-coded assumed 'institution' in
|
19
|
+
# config. Eg. "GWCC"
|
20
|
+
#
|
21
|
+
#
|
22
|
+
# == Other Primo-Specific Configuration
|
23
|
+
#
|
24
|
+
# [:loc] The primo 'loc' paramter, default "adaptor,primo_central_multiple_fe"
|
25
|
+
# for Primo Central Index searches.
|
26
|
+
# [:auth] Set to 'true' to assume local auth'd users if you're going to protect
|
27
|
+
# access. Default false. Alternately, you can pass in an
|
28
|
+
# :auth => true/false to 'search', which will override config.
|
29
|
+
# PC has limited access for non-auth users.
|
30
|
+
# [:lang] Primo lang query param. "Hints input languages to search engine for language recognition. "
|
31
|
+
# For now hardcoded into config, not settable per request.default 'eng'
|
32
|
+
# [:fixed_params] Extra url query params to add on to every search request.
|
33
|
+
# Can be used to hard-code certain limits, such as:
|
34
|
+
# {"query_exc" => ["facet_rtype,exact,books", "something_else"]}
|
35
|
+
# Note neither key nor values are uri encoded, we'll take
|
36
|
+
# care of that for you. value can be array or single string.
|
37
|
+
#
|
38
|
+
# == Vendor docs
|
39
|
+
#
|
40
|
+
# http://www.exlibrisgroup.org/display/PrimoOI/Brief+Search
|
41
|
+
|
42
|
+
class BentoSearch::PrimoEngine
|
43
|
+
include BentoSearch::SearchEngine
|
44
|
+
|
45
|
+
extend HTTPClientPatch::IncludeClient
|
46
|
+
include_http_client
|
47
|
+
|
48
|
+
def search_implementation(args)
|
49
|
+
url = construct_query(args)
|
50
|
+
|
51
|
+
response = http_client.get(url)
|
52
|
+
response_xml = Nokogiri::XML response.body
|
53
|
+
# namespaces really do nobody any good
|
54
|
+
response_xml.remove_namespaces!
|
55
|
+
|
56
|
+
results = BentoSearch::Results.new
|
57
|
+
|
58
|
+
results.total_items = response_xml.at_xpath("./SEGMENTS/JAGROOT/RESULT/DOCSET")["TOTALHITS"].to_i
|
59
|
+
|
60
|
+
response_xml.xpath("./SEGMENTS/JAGROOT/RESULT/DOCSET/DOC").each do |doc_xml|
|
61
|
+
item = BentoSearch::ResultItem.new
|
62
|
+
# Data in primo response is confusing in many different places in
|
63
|
+
# variant formats. We try to pick out the best to take things from,
|
64
|
+
# but we're guessing, it's under-documented.
|
65
|
+
|
66
|
+
item.title = text_at_xpath(doc_xml, "./PrimoNMBib/record/display/title")
|
67
|
+
item.abstract = text_at_xpath(doc_xml, "./PrimoNMBib/record/addata/abstract")
|
68
|
+
|
69
|
+
|
70
|
+
doc_xml.xpath("./PrimoNMBib/record/facets/creatorcontrib").each do |author_node|
|
71
|
+
item.authors << BentoSearch::Author.new(:display => author_node.text)
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
item.journal_title = text_at_xpath(doc_xml, "./PrimoNMBib/record/addata/jtitle")
|
76
|
+
# check btitle for book chapters, the book they are in.
|
77
|
+
if item.journal_title.blank? && doc_xml.at_xpath("./PrimoNMBib/record/display/ispartof")
|
78
|
+
item.journal_title = text_at_xpath(doc_xml, "./PrimoNMBib/record/addata/btitle")
|
79
|
+
end
|
80
|
+
|
81
|
+
item.publisher = text_at_xpath doc_xml, "./PrimoNMBib/record/display/publisher"
|
82
|
+
item.volume = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/volume"
|
83
|
+
item.issue = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/issue"
|
84
|
+
item.start_page = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/spage"
|
85
|
+
item.end_page = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/epage"
|
86
|
+
item.doi = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/doi"
|
87
|
+
item.issn = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/issn"
|
88
|
+
item.isbn = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/isbn"
|
89
|
+
|
90
|
+
if (date = text_at_xpath doc_xml, "./PrimoNMBib/record/search/creationdate")
|
91
|
+
item.year = date[0,4] # first four chars
|
92
|
+
end
|
93
|
+
|
94
|
+
if fmt_str = text_at_xpath(doc_xml, "./PrimoNMBib/record/search/rsrctype")
|
95
|
+
# 'article', 'book_chapter'. abuse rails to turn into nice titlelized english.
|
96
|
+
item.format_str = fmt_str.titleize
|
97
|
+
|
98
|
+
item.format = map_format fmt_str
|
99
|
+
end
|
100
|
+
|
101
|
+
|
102
|
+
|
103
|
+
#TODO formats, highlighting
|
104
|
+
|
105
|
+
results << item
|
106
|
+
end
|
107
|
+
|
108
|
+
|
109
|
+
return results
|
110
|
+
end
|
111
|
+
|
112
|
+
# Try to map from primocentral's 'rsrctype' to our own internal
|
113
|
+
# taxonomy of formats
|
114
|
+
#
|
115
|
+
# Need docs on what the complete Primo vocabulary here is, we're
|
116
|
+
# just guessing from what we see.
|
117
|
+
def map_format(str)
|
118
|
+
case str
|
119
|
+
when "article", "newspaper_article", "review"
|
120
|
+
then "Article"
|
121
|
+
when "book" then "Book"
|
122
|
+
when "dissertation" then :dissertation
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
# Returns the text() at the xpath, if the xpath is non-nil
|
127
|
+
# and the text is non-blank
|
128
|
+
def text_at_xpath(xml, xpath)
|
129
|
+
node = xml.at_xpath(xpath)
|
130
|
+
return nil if node.nil?
|
131
|
+
text = node.text
|
132
|
+
return nil if node.blank?
|
133
|
+
return text
|
134
|
+
end
|
135
|
+
|
136
|
+
|
137
|
+
|
138
|
+
# From config or args, args over-ride config
|
139
|
+
def authenticated_end_user?(args)
|
140
|
+
config = configuration.auth ? true : false
|
141
|
+
arg = args[:auth]
|
142
|
+
if ! arg.nil?
|
143
|
+
arg ? true : false
|
144
|
+
elsif ! config.nil?
|
145
|
+
config ? true : false
|
146
|
+
else
|
147
|
+
false
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
# Docs say we need to replace any commas with spaces
|
152
|
+
def prepared_query(str)
|
153
|
+
str.gsub(/\,/, ' ')
|
154
|
+
end
|
155
|
+
|
156
|
+
|
157
|
+
def construct_query(args)
|
158
|
+
url = "http://#{configuration.host_port}/PrimoWebServices/xservice/search/brief"
|
159
|
+
url += "?institution=#{configuration.institution}"
|
160
|
+
url += "&loc=#{CGI.escape configuration.loc}"
|
161
|
+
|
162
|
+
url += "&lang=#{CGI.escape configuration.lang}"
|
163
|
+
|
164
|
+
url += "&bulkSize=#{args[:per_page]}" if args[:per_page]
|
165
|
+
# primo indx is 1-based record index, our :start is 0-based.
|
166
|
+
url += "&indx=#{args[:start] + 1}" if args[:start]
|
167
|
+
|
168
|
+
|
169
|
+
|
170
|
+
if (defn = self.sort_definitions[ args[:sort] ]) &&
|
171
|
+
(value = defn[:implementation])
|
172
|
+
|
173
|
+
url += "&sortField=#{CGI.escape value}"
|
174
|
+
end
|
175
|
+
|
176
|
+
|
177
|
+
url += "&onCampus=#{ authenticated_end_user?(args) ? 'true' : 'false'}"
|
178
|
+
|
179
|
+
|
180
|
+
field = args[:search_field].present? ? args[:search_field] : "any"
|
181
|
+
query = "#{field},contains,#{prepared_query args[:query]}"
|
182
|
+
|
183
|
+
url += "&query=#{CGI.escape query}"
|
184
|
+
|
185
|
+
configuration.fixed_params.each_pair do |key, value|
|
186
|
+
[value].flatten.each do |v|
|
187
|
+
url += "&#{CGI.escape key.to_s}=#{CGI.escape v.to_s}"
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
|
192
|
+
return url
|
193
|
+
end
|
194
|
+
|
195
|
+
|
196
|
+
def search_field_definitions
|
197
|
+
# others are avail too, this is not exhaustive.
|
198
|
+
{
|
199
|
+
"creator" => {:semantic => :author},
|
200
|
+
"title" => {:semantic => :title},
|
201
|
+
"sub" => {:semantic => :subject},
|
202
|
+
"isbn" => {:semantic => :isbn},
|
203
|
+
"issn" => {:semantic => :issn}
|
204
|
+
}
|
205
|
+
end
|
206
|
+
|
207
|
+
def sort_definitions
|
208
|
+
{
|
209
|
+
"title_asc" => {:implementation => "stitle"},
|
210
|
+
"date_desc" => {:implementation => "scdate"},
|
211
|
+
"author_asc" => {:implementation => "screator"},
|
212
|
+
# As far as I can tell, what they call 'popularity'
|
213
|
+
# is really relevance, with popularity boosting.
|
214
|
+
"relevance" => {:implementation => "popularity"}
|
215
|
+
}
|
216
|
+
end
|
217
|
+
|
218
|
+
def self.required_configuration
|
219
|
+
[:host_port, :institution]
|
220
|
+
end
|
221
|
+
|
222
|
+
def self.default_configuration
|
223
|
+
{
|
224
|
+
:loc => 'adaptor,primo_central_multiple_fe',
|
225
|
+
# "eng" or "fre" or "ger" (Code for the representation of name of language conform to ISO-639)
|
226
|
+
:lang => "eng",
|
227
|
+
:fixed_params => {}
|
228
|
+
}
|
229
|
+
end
|
230
|
+
|
231
|
+
end
|
@@ -0,0 +1,295 @@
|
|
1
|
+
require 'cgi'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
require 'http_client_patch/include_client'
|
5
|
+
require 'httpclient'
|
6
|
+
module BentoSearch
|
7
|
+
# Supports fielded searching, sorting, pagination.
|
8
|
+
#
|
9
|
+
# Required configuration:
|
10
|
+
# * api_key
|
11
|
+
#
|
12
|
+
# Defaults to 'relevance' sort, rather than scopus's default of date desc.
|
13
|
+
#
|
14
|
+
# Uses the Scopus SciVerse REST API. You need to be a Scopus customer
|
15
|
+
# to access. http://api.elsevier.com
|
16
|
+
# http://www.developers.elsevier.com/action/devprojects
|
17
|
+
#
|
18
|
+
# ToS: http://www.developers.elsevier.com/devcms/content-policies
|
19
|
+
# "Federated Search" use case.
|
20
|
+
# Also: http://www.developers.elsevier.com/cms/apiserviceagreement
|
21
|
+
#
|
22
|
+
# Note that ToS applying to you probably means you must restrict access
|
23
|
+
# to search functionality to authenticated affiliated users only.
|
24
|
+
#
|
25
|
+
# Register for an API key at "Register New Site" at http://developers.elsevier.com/action/devnewsite
|
26
|
+
# You will then need to get server IP addresses registered with Scopus too,
|
27
|
+
# apparently by emailing directly to dave.santucci at elsevier dot com.
|
28
|
+
#
|
29
|
+
# Scopus API Docs:
|
30
|
+
# * http://www.developers.elsevier.com/devcms/content-api-search-request
|
31
|
+
# * http://www.developers.elsevier.com/devcms/content/search-fields-overview
|
32
|
+
#
|
33
|
+
# Some more docs on response elements and query elements:
|
34
|
+
# * http://api.elsevier.com/content/search/#d0n14606
|
35
|
+
#
|
36
|
+
# Other API's in the suite not being used by this code at present:
|
37
|
+
# * http://www.developers.elsevier.com/devcms/content-api-retrieval-request
|
38
|
+
# * http://www.developers.elsevier.com/devcms/content-api-metadata-request
|
39
|
+
#
|
40
|
+
# Support: Integration@scopus.com
|
41
|
+
#
|
42
|
+
# TODO: Mention to Scopus: Only one author?
|
43
|
+
# Paging of 50 gets an error, but docs say I should be able to request 200. q
|
44
|
+
#
|
45
|
+
class ScopusEngine
|
46
|
+
include BentoSearch::SearchEngine
|
47
|
+
|
48
|
+
extend HTTPClientPatch::IncludeClient
|
49
|
+
include_http_client
|
50
|
+
|
51
|
+
def search_implementation(args)
|
52
|
+
results = Results.new
|
53
|
+
|
54
|
+
xml, response, exception = nil, nil, nil
|
55
|
+
|
56
|
+
url = scopus_url(args)
|
57
|
+
|
58
|
+
begin
|
59
|
+
response = http_client.get( url , nil,
|
60
|
+
# HTTP headers.
|
61
|
+
{"X-ELS-APIKey" => configuration.api_key,
|
62
|
+
"X-ELS-ResourceVersion" => "XOCS",
|
63
|
+
"Accept" => "application/atom+xml"}
|
64
|
+
)
|
65
|
+
xml = Nokogiri::XML(response.body)
|
66
|
+
rescue TimeoutError, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError => e
|
67
|
+
exception = e
|
68
|
+
end
|
69
|
+
|
70
|
+
|
71
|
+
# handle errors
|
72
|
+
if (response.nil? || xml.nil? || exception ||
|
73
|
+
(! HTTP::Status.successful? response.status) ||
|
74
|
+
xml.at_xpath("service-error")
|
75
|
+
)
|
76
|
+
|
77
|
+
# UGH. Scopus reports 0 hits as an error, not entirely distinguishable
|
78
|
+
# from an actual error. Oh well, we have to go with it.
|
79
|
+
if (
|
80
|
+
(response.status == 400) &&
|
81
|
+
xml &&
|
82
|
+
(error_xml = xml.at_xpath("./service-error/status")) &&
|
83
|
+
(node_text(error_xml.at_xpath("./statusCode")) == "INVALID_INPUT") &&
|
84
|
+
(node_text(error_xml.at_xpath("./statusText")) == "Result set was empty or Start value beyond result set")
|
85
|
+
)
|
86
|
+
# PROBABLY 0 hit count, although could be something else I'm afraid.
|
87
|
+
results.total_items = 0
|
88
|
+
return results
|
89
|
+
else
|
90
|
+
# real error
|
91
|
+
results.error ||= {}
|
92
|
+
results.error[:exception] = e
|
93
|
+
results.error[:status] = response.status if response
|
94
|
+
# keep from storing the entire possibly huge response as error
|
95
|
+
# but sometimes it's an error message.
|
96
|
+
results.error[:error_info] = xml.at_xpath("service_error") if xml
|
97
|
+
return results
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
results.total_items = (node_text xml.at_xpath("//opensearch:totalResults", xml_ns)).to_i
|
102
|
+
|
103
|
+
xml.xpath("//atom:entry", xml_ns).each do | entry |
|
104
|
+
|
105
|
+
results << (item = ResultItem.new)
|
106
|
+
if scopus_link = entry.at_xpath("atom:link[@ref='scopus']", xml_ns)
|
107
|
+
item.link = scopus_link["href"]
|
108
|
+
end
|
109
|
+
item.title = node_text entry.at_xpath("dc:title", xml_ns)
|
110
|
+
item.journal_title = node_text entry.at_xpath("prism:publicationName", xml_ns)
|
111
|
+
item.issn = node_text entry.at_xpath("prism:issn", xml_ns)
|
112
|
+
item.volume = node_text entry.at_xpath("prism:volume", xml_ns)
|
113
|
+
item.issue = node_text entry.at_xpath("prism:issueIdentifier", xml_ns)
|
114
|
+
item.doi = node_text entry.at_xpath("prism:doi", xml_ns)
|
115
|
+
|
116
|
+
# pages might be in startingPage/endingPage OR in pageRange
|
117
|
+
if (start = entry.at_xpath("prism:startingPage", xml_ns))
|
118
|
+
item.start_page = start.text.to_i
|
119
|
+
if ( epage = entry.at_xpath("prism:endingPage", xml_ns))
|
120
|
+
item.end_page = epage.text.to_i
|
121
|
+
end
|
122
|
+
elsif (range = entry.at_xpath("prism:pageRange", xml_ns))
|
123
|
+
(spage, epage) = *range.text().split("-")
|
124
|
+
item.start_page = spage
|
125
|
+
item.end_page = epage
|
126
|
+
end
|
127
|
+
|
128
|
+
# get the year out of the date
|
129
|
+
if date = entry.at_xpath("prism:coverDate", xml_ns)
|
130
|
+
date.text =~ /^(\d\d\d\d)/
|
131
|
+
item.year = $1.to_i if $1
|
132
|
+
end
|
133
|
+
|
134
|
+
# Authors might be in atom:authors seperated by |, or just
|
135
|
+
# a single one in dc:creator
|
136
|
+
if (authors = entry.at_xpath("atom:authors", xml_ns))
|
137
|
+
authors.text.split("|").each do |author|
|
138
|
+
item.authors << Author.new(:display => author.strip)
|
139
|
+
end
|
140
|
+
elsif (author = entry.at_xpath("dc:creator", xml_ns))
|
141
|
+
item.authors << Author.new(:display => author.text.strip)
|
142
|
+
end
|
143
|
+
|
144
|
+
# Format we're still trying to figure out how Scopus API
|
145
|
+
# delivers it. Here is at at least one way.
|
146
|
+
if (doctype = entry.at_xpath("atom:subtype", xml_ns))
|
147
|
+
item.format = doctype_to_format(doctype.text)
|
148
|
+
item.format_str = doctype_to_string(doctype.text)
|
149
|
+
end
|
150
|
+
|
151
|
+
end
|
152
|
+
|
153
|
+
return results
|
154
|
+
end
|
155
|
+
|
156
|
+
# The escaping rules are not entirely clear for the API. We know colons
|
157
|
+
# and parens are special chars. It's unclear how or if we can escape them,
|
158
|
+
# we'll just remove them.
|
159
|
+
def escape_query(query)
|
160
|
+
# backslash escape doesn't seem to work
|
161
|
+
#query.gsub(/([\\\(\)\:])/) do |match|
|
162
|
+
# "\\#{$1}"
|
163
|
+
#end
|
164
|
+
query.gsub(/([\\\(\)\:])/, ' ')
|
165
|
+
end
|
166
|
+
|
167
|
+
|
168
|
+
def self.required_configuration
|
169
|
+
["api_key"]
|
170
|
+
end
|
171
|
+
|
172
|
+
def self.default_configuration
|
173
|
+
{
|
174
|
+
:base_url => "http://api.elsevier.com/",
|
175
|
+
:cluster => "SCOPUS"
|
176
|
+
}
|
177
|
+
end
|
178
|
+
|
179
|
+
# Max per-page is 200, as per http://www.developers.elsevier.com/devcms/content-apis, bottom of page.
|
180
|
+
def max_per_page
|
181
|
+
200
|
182
|
+
end
|
183
|
+
|
184
|
+
def search_field_definitions
|
185
|
+
{
|
186
|
+
"AUTH" => {:semantic => :author},
|
187
|
+
"TITLE" => {:semantic => :title},
|
188
|
+
# controlled and author-assigned keywords
|
189
|
+
"KEY" => {:semantic => :subject},
|
190
|
+
"ISBN" => {:semantic => :isbn},
|
191
|
+
"ISSN" => {:semantic => :issn},
|
192
|
+
}
|
193
|
+
end
|
194
|
+
|
195
|
+
def sort_definitions
|
196
|
+
# scopus &sort= values, not yet URI-escaped, later code will do that.
|
197
|
+
#
|
198
|
+
# 'refeid' key is currently undocumented on Scopus site, but
|
199
|
+
# was given to me in email by scopus.
|
200
|
+
{
|
201
|
+
"title_asc" => {:implementation => "+itemtitle"},
|
202
|
+
"date_desc" => {:implementation => "-datesort,+auth"},
|
203
|
+
"relevance" => {:implementation => "refeid" },
|
204
|
+
"author_asc" => {:implementation => "+auth"},
|
205
|
+
"num_cite_desc" => {:implementation => "-numcitedby"}
|
206
|
+
}
|
207
|
+
end
|
208
|
+
|
209
|
+
|
210
|
+
protected
|
211
|
+
|
212
|
+
# returns nil if passed in nil, otherwise
|
213
|
+
# returns nokogiri text()
|
214
|
+
def node_text(node)
|
215
|
+
return nil if node.nil?
|
216
|
+
|
217
|
+
return node.text()
|
218
|
+
end
|
219
|
+
|
220
|
+
def xml_ns
|
221
|
+
{"opensearch" => "http://a9.com/-/spec/opensearch/1.1/",
|
222
|
+
"prism" => "http://prismstandard.org/namespaces/basic/2.0/",
|
223
|
+
"dc" => "http://purl.org/dc/elements/1.1/",
|
224
|
+
"atom" => "http://www.w3.org/2005/Atom"}
|
225
|
+
end
|
226
|
+
|
227
|
+
# Maps from Scopus "doctype" as listed at http://www.developers.elsevier.com/devcms/content/search-fields-overview
|
228
|
+
# and delivered in the XML response as atom:subtype.
|
229
|
+
# Maps to our own internal formats as documented in ResultItem#format
|
230
|
+
# Returns nil if can't map.
|
231
|
+
def doctype_to_format(doctype)
|
232
|
+
{ "ar" => "Article",
|
233
|
+
"ip" => "Article",
|
234
|
+
"bk" => "Book",
|
235
|
+
"bz" => "Article",
|
236
|
+
"re" => "Article", # most of what scopus labels 'Report' seem to be ordinary articles.
|
237
|
+
"cp" => :conference_paper,
|
238
|
+
"re" => "Article", # really 'report', but Scopus is unreliable here, most of these are actually articles.
|
239
|
+
"sh" => "Article", # 'short survey' to scopus, but seems to be used for articles.
|
240
|
+
"ip" => "Article", # 'article in press'.
|
241
|
+
'ed' => "Article", # Editorial
|
242
|
+
'le' => "Article", # Letter
|
243
|
+
'no' => "Article", # Note
|
244
|
+
}[doctype.to_s]
|
245
|
+
end
|
246
|
+
|
247
|
+
# Maps Scopus doctype to human readable strings as documented by Scopus,
|
248
|
+
# does not map 1-1 to our controlled format.
|
249
|
+
def doctype_to_string(doctype)
|
250
|
+
{ "ar" => "Article",
|
251
|
+
"ab" => "Abstract Report",
|
252
|
+
"ip" => "Article in Press",
|
253
|
+
"bk" => "Book",
|
254
|
+
"bz" => "Business Article",
|
255
|
+
"cp" => "Conference Paper",
|
256
|
+
"cr" => "Conference Review",
|
257
|
+
"ed" => "Editorial",
|
258
|
+
"er" => "Erratum",
|
259
|
+
"le" => "Letter",
|
260
|
+
"no" => "Note",
|
261
|
+
"pr" => "Press Release",
|
262
|
+
"re" => "Article", # Really 'report', but Scopus is unreliable here, most of these are actually articles.
|
263
|
+
"sh" => "Article" # Really 'short survey' to Scopus, but seems to be used for, well, articles.
|
264
|
+
}[doctype.to_s]
|
265
|
+
end
|
266
|
+
|
267
|
+
|
268
|
+
|
269
|
+
|
270
|
+
def scopus_url(args)
|
271
|
+
query = escape_query args[:query]
|
272
|
+
|
273
|
+
if args[:search_field]
|
274
|
+
query = "#{args[:search_field]}(#{query})"
|
275
|
+
end
|
276
|
+
|
277
|
+
query = "#{configuration.base_url.chomp("/")}/content/search/index:#{configuration.cluster}?query=#{CGI.escape(query)}"
|
278
|
+
|
279
|
+
query += "&count=#{args[:per_page]}" if args[:per_page]
|
280
|
+
|
281
|
+
query += "&start=#{args[:start]}" if args[:start]
|
282
|
+
|
283
|
+
# default to 'relevance' sort if not given, rather than scopus's
|
284
|
+
# default of date desc.
|
285
|
+
args[:sort] ||= "relevance"
|
286
|
+
if (defn = self.sort_definitions[args[:sort]]) &&
|
287
|
+
( value = defn[:implementation])
|
288
|
+
query += "&sort=#{CGI.escape(value)}"
|
289
|
+
end
|
290
|
+
|
291
|
+
return query
|
292
|
+
end
|
293
|
+
|
294
|
+
end
|
295
|
+
end
|