bento_search 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +20 -0
- data/README.md +299 -0
- data/Rakefile +40 -0
- data/app/assets/images/bento_search/large_loader.gif +0 -0
- data/app/assets/javascripts/bento_search.js +3 -0
- data/app/assets/javascripts/bento_search/ajax_load.js +22 -0
- data/app/assets/stylesheets/bento_search/bento.css +4 -0
- data/app/controllers/bento_search/bento_search_controller.rb +7 -0
- data/app/controllers/bento_search/search_controller.rb +72 -0
- data/app/helpers/bento_search_helper.rb +138 -0
- data/app/item_decorators/bento_search/only_premade_openurl.rb +16 -0
- data/app/item_decorators/bento_search/openurl_add_other_link.rb +35 -0
- data/app/item_decorators/bento_search/openurl_main_link.rb +30 -0
- data/app/models/bento_search/author.rb +25 -0
- data/app/models/bento_search/link.rb +30 -0
- data/app/models/bento_search/multi_searcher.rb +109 -0
- data/app/models/bento_search/openurl_creator.rb +128 -0
- data/app/models/bento_search/registrar.rb +70 -0
- data/app/models/bento_search/result_item.rb +203 -0
- data/app/models/bento_search/results.rb +54 -0
- data/app/models/bento_search/results/pagination.rb +67 -0
- data/app/models/bento_search/search_engine.rb +219 -0
- data/app/models/bento_search/search_engine/capabilities.rb +65 -0
- data/app/search_engines/bento_search/#Untitled-1# +11 -0
- data/app/search_engines/bento_search/ebsco_host_engine.rb +356 -0
- data/app/search_engines/bento_search/eds_engine.rb +557 -0
- data/app/search_engines/bento_search/google_books_engine.rb +184 -0
- data/app/search_engines/bento_search/primo_engine.rb +231 -0
- data/app/search_engines/bento_search/scopus_engine.rb +295 -0
- data/app/search_engines/bento_search/summon_engine.rb +398 -0
- data/app/search_engines/bento_search/xerxes_engine.rb +168 -0
- data/app/views/bento_search/_link.html.erb +4 -0
- data/app/views/bento_search/_search_error.html.erb +22 -0
- data/app/views/bento_search/_std_item.html.erb +39 -0
- data/app/views/bento_search/search/search.html.erb +1 -0
- data/config/locales/en.yml +25 -0
- data/lib/bento_search.rb +29 -0
- data/lib/bento_search/engine.rb +5 -0
- data/lib/bento_search/routes.rb +45 -0
- data/lib/bento_search/version.rb +3 -0
- data/lib/generators/bento_search/pull_ebsco_dbs_generator.rb +24 -0
- data/lib/generators/bento_search/templates/ebsco_global_var.erb +6 -0
- data/lib/http_client_patch/include_client.rb +86 -0
- data/lib/tasks/bento_search_tasks.rake +4 -0
- data/test/dummy/README.rdoc +261 -0
- data/test/dummy/Rakefile +7 -0
- data/test/dummy/app/assets/javascripts/application.js +15 -0
- data/test/dummy/app/assets/stylesheets/application.css +13 -0
- data/test/dummy/app/controllers/application_controller.rb +3 -0
- data/test/dummy/app/helpers/application_helper.rb +2 -0
- data/test/dummy/app/views/layouts/application.html.erb +14 -0
- data/test/dummy/config.ru +4 -0
- data/test/dummy/config/application.rb +56 -0
- data/test/dummy/config/boot.rb +10 -0
- data/test/dummy/config/database.yml +25 -0
- data/test/dummy/config/environment.rb +5 -0
- data/test/dummy/config/environments/development.rb +37 -0
- data/test/dummy/config/environments/production.rb +67 -0
- data/test/dummy/config/environments/test.rb +37 -0
- data/test/dummy/config/initializers/backtrace_silencers.rb +7 -0
- data/test/dummy/config/initializers/inflections.rb +15 -0
- data/test/dummy/config/initializers/mime_types.rb +5 -0
- data/test/dummy/config/initializers/secret_token.rb +7 -0
- data/test/dummy/config/initializers/session_store.rb +8 -0
- data/test/dummy/config/initializers/wrap_parameters.rb +14 -0
- data/test/dummy/config/locales/en.yml +5 -0
- data/test/dummy/config/routes.rb +6 -0
- data/test/dummy/db/test.sqlite3 +0 -0
- data/test/dummy/log/test.log +3100 -0
- data/test/dummy/public/404.html +26 -0
- data/test/dummy/public/422.html +26 -0
- data/test/dummy/public/500.html +25 -0
- data/test/dummy/public/favicon.ico +0 -0
- data/test/dummy/script/rails +6 -0
- data/test/functional/bento_search/search_controller_test.rb +81 -0
- data/test/helper/bento_search_helper_test.rb +125 -0
- data/test/integration/navigation_test.rb +10 -0
- data/test/support/mock_engine.rb +23 -0
- data/test/support/test_with_cassette.rb +38 -0
- data/test/test_helper.rb +52 -0
- data/test/unit/#vcr_test.rb# +68 -0
- data/test/unit/ebsco_host_engine_test.rb +134 -0
- data/test/unit/eds_engine_test.rb +105 -0
- data/test/unit/google_books_engine_test.rb +93 -0
- data/test/unit/item_decorators_test.rb +66 -0
- data/test/unit/multi_searcher_test.rb +49 -0
- data/test/unit/openurl_creator_test.rb +111 -0
- data/test/unit/pagination_test.rb +59 -0
- data/test/unit/primo_engine_test.rb +37 -0
- data/test/unit/register_engine_test.rb +50 -0
- data/test/unit/result_item_display_test.rb +39 -0
- data/test/unit/result_item_test.rb +36 -0
- data/test/unit/scopus_engine_test.rb +130 -0
- data/test/unit/search_engine_base_test.rb +178 -0
- data/test/unit/search_engine_test.rb +95 -0
- data/test/unit/summon_engine_test.rb +161 -0
- data/test/unit/xerxes_engine_test.rb +70 -0
- data/test/vcr_cassettes/ebscohost/error_bad_db.yml +45 -0
- data/test/vcr_cassettes/ebscohost/error_bad_password.yml +45 -0
- data/test/vcr_cassettes/ebscohost/get_info.yml +3626 -0
- data/test/vcr_cassettes/ebscohost/live_search.yml +45 -0
- data/test/vcr_cassettes/ebscohost/live_search_smoke_test.yml +1311 -0
- data/test/vcr_cassettes/eds/basic_search_smoke_test.yml +1811 -0
- data/test/vcr_cassettes/eds/get_auth_token.yml +75 -0
- data/test/vcr_cassettes/eds/get_auth_token_failure.yml +39 -0
- data/test/vcr_cassettes/eds/get_with_auth.yml +243 -0
- data/test/vcr_cassettes/eds/get_with_auth_recovers_from_bad_auth.yml +368 -0
- data/test/vcr_cassettes/gbs/error_condition.yml +40 -0
- data/test/vcr_cassettes/gbs/pagination.yml +702 -0
- data/test/vcr_cassettes/gbs/search.yml +340 -0
- data/test/vcr_cassettes/primo/search_smoke_test.yml +1112 -0
- data/test/vcr_cassettes/scopus/bad_api_key_should_return_error_response.yml +60 -0
- data/test/vcr_cassettes/scopus/escaped_chars.yml +187 -0
- data/test/vcr_cassettes/scopus/fielded_search.yml +176 -0
- data/test/vcr_cassettes/scopus/simple_search.yml +227 -0
- data/test/vcr_cassettes/scopus/zero_results_search.yml +67 -0
- data/test/vcr_cassettes/summon/bad_auth.yml +54 -0
- data/test/vcr_cassettes/summon/proper_tags_for_snippets.yml +216 -0
- data/test/vcr_cassettes/summon/search.yml +242 -0
- data/test/vcr_cassettes/xerxes/live_search.yml +2580 -0
- data/test/view/std_item_test.rb +98 -0
- metadata +421 -0
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
require 'httpclient'
|
|
2
|
+
require 'cgi'
|
|
3
|
+
require 'multi_json'
|
|
4
|
+
|
|
5
|
+
# not sure why we need to require the entire 'helpers'
|
|
6
|
+
# when all we want is sanitize_helper, but I think we do:
|
|
7
|
+
require 'action_view/helpers'
|
|
8
|
+
#require 'action_view/helpers/sanitize_helper'
|
|
9
|
+
|
|
10
|
+
require 'http_client_patch/include_client'
|
|
11
|
+
|
|
12
|
+
module BentoSearch
|
|
13
|
+
#
|
|
14
|
+
# https://developers.google.com/books/docs/v1/using
|
|
15
|
+
# https://developers.google.com/books/docs/v1/reference/volumes#resource
|
|
16
|
+
#
|
|
17
|
+
# Configuration :api_key STRONGLY recommended, or google will severely
|
|
18
|
+
# rate-limit you.
|
|
19
|
+
class GoogleBooksEngine
|
|
20
|
+
include BentoSearch::SearchEngine
|
|
21
|
+
include ActionView::Helpers::SanitizeHelper
|
|
22
|
+
|
|
23
|
+
extend HTTPClientPatch::IncludeClient
|
|
24
|
+
include_http_client # gives us a #http_client with persistent class-level
|
|
25
|
+
|
|
26
|
+
class_attribute :base_url
|
|
27
|
+
self.base_url = "https://www.googleapis.com/books/v1/"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def search_implementation(arguments)
|
|
31
|
+
query_url = args_to_search_url(arguments)
|
|
32
|
+
|
|
33
|
+
results = Results.new
|
|
34
|
+
|
|
35
|
+
begin
|
|
36
|
+
response = http_client.get(query_url )
|
|
37
|
+
json = MultiJson.load( response.body )
|
|
38
|
+
# Can't rescue everything, or we catch VCR errors, making
|
|
39
|
+
# things confusing.
|
|
40
|
+
rescue TimeoutError, HTTPClient::TimeoutError,
|
|
41
|
+
HTTPClient::ConfigurationError, HTTPClient::BadResponseError => e
|
|
42
|
+
results.error ||= {}
|
|
43
|
+
results.error[:exception] = e
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Trap json parse error, but also check for bad http
|
|
47
|
+
# status, or error reported in the json. In any of those cases
|
|
48
|
+
# return results obj with error status.
|
|
49
|
+
#
|
|
50
|
+
if ( response.nil? || json.nil? ||
|
|
51
|
+
(! HTTP::Status.successful? response.status) ||
|
|
52
|
+
(json && json["error"]))
|
|
53
|
+
|
|
54
|
+
results.error ||= {}
|
|
55
|
+
results.error[:status] = response.status if response
|
|
56
|
+
if json && json["error"] && json["error"]["errors"] && json["error"]["errors"].kind_of?(Array)
|
|
57
|
+
results.error[:message] = json["error"]["errors"].first.values.join(", ")
|
|
58
|
+
end
|
|
59
|
+
results.error[:error_info] = json["error"] if json && json.respond_to?("[]")
|
|
60
|
+
|
|
61
|
+
# escape early!
|
|
62
|
+
return results
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
results.total_items = json["totalItems"]
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
json["items"].each do |j_item|
|
|
70
|
+
j_item = j_item["volumeInfo"] if j_item["volumeInfo"]
|
|
71
|
+
|
|
72
|
+
item = ResultItem.new
|
|
73
|
+
results << item
|
|
74
|
+
|
|
75
|
+
item.title = j_item["title"]
|
|
76
|
+
item.subtitle = j_item["subtitle"]
|
|
77
|
+
item.publisher = j_item["publisher"]
|
|
78
|
+
item.link = j_item["canonicalVolumeLink"]
|
|
79
|
+
item.abstract = sanitize j_item["description"]
|
|
80
|
+
item.year = get_year j_item["publishedDate"]
|
|
81
|
+
item.format = if j_item["printType"] == "MAGAZINE"
|
|
82
|
+
:serial
|
|
83
|
+
else
|
|
84
|
+
"Book"
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
(j_item["authors"] || []).each do |author_name|
|
|
88
|
+
item.authors << Author.new(:display => author_name)
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
return results
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
###########
|
|
100
|
+
# BentoBox::SearchEngine API
|
|
101
|
+
###########
|
|
102
|
+
|
|
103
|
+
def max_per_page
|
|
104
|
+
100
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def search_field_definitions
|
|
108
|
+
{ "intitle" => {:semantic => :title},
|
|
109
|
+
"inauthor" => {:semantic => :author},
|
|
110
|
+
"inpublisher" => {:semantic => :publisher},
|
|
111
|
+
"subject" => {:semantic => :subject},
|
|
112
|
+
"isbn" => {:semantic => :isbn}
|
|
113
|
+
}
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def sort_definitions
|
|
117
|
+
{
|
|
118
|
+
"relevance" => {:implementation => nil}, # default
|
|
119
|
+
"date_desc" => {:implementation => "newest"}
|
|
120
|
+
}
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
protected
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
#############
|
|
127
|
+
# Our own implementation code
|
|
128
|
+
##############
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
# takes a normalized #search arguments hash from SearchEngine
|
|
132
|
+
# turns it into a URL for Google API. Factored out to make testing
|
|
133
|
+
# possible.
|
|
134
|
+
def args_to_search_url(arguments)
|
|
135
|
+
query = if arguments[:search_field]
|
|
136
|
+
fielded_query(arguments[:query], arguments[:search_field])
|
|
137
|
+
else
|
|
138
|
+
arguments[:query]
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
query_url = base_url + "volumes?q=#{CGI.escape query}"
|
|
142
|
+
if configuration.api_key
|
|
143
|
+
query_url += "&key=#{configuration.api_key}"
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
if arguments[:per_page]
|
|
147
|
+
query_url += "&maxResults=#{arguments[:per_page]}"
|
|
148
|
+
end
|
|
149
|
+
if arguments[:start]
|
|
150
|
+
query_url += "&startIndex=#{arguments[:start]}"
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
if arguments[:sort] &&
|
|
154
|
+
(defn = sort_definitions[arguments[:sort]]) &&
|
|
155
|
+
(value = defn[:implementation])
|
|
156
|
+
query_url += "&sort=#{CGI.escape(value)}"
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
return query_url
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
# If they ask for a <one two> :intitle, we're
|
|
165
|
+
# actually gonna do like google's own form does,
|
|
166
|
+
# and change it to <intitle:one intitle:two>. Internal
|
|
167
|
+
# phrases will be respected.
|
|
168
|
+
def fielded_query(query, field)
|
|
169
|
+
tokens = query.split(%r{\s|("[^"]+")}).delete_if {|a| a.blank?}
|
|
170
|
+
return tokens.collect {|token| "#{field}:#{token}"}.join(" ")
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def get_year(iso8601)
|
|
175
|
+
return nil if iso8601.blank?
|
|
176
|
+
|
|
177
|
+
if iso8601 =~ /^(\d{4})/
|
|
178
|
+
return $1.to_i
|
|
179
|
+
end
|
|
180
|
+
return nil
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
end
|
|
184
|
+
end
|
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
require 'cgi'
|
|
2
|
+
require 'nokogiri'
|
|
3
|
+
|
|
4
|
+
require 'http_client_patch/include_client'
|
|
5
|
+
require 'httpclient'
|
|
6
|
+
|
|
7
|
+
# ExLibris Primo Central.
|
|
8
|
+
#
|
|
9
|
+
# written/tested with PrimoCentral aggregated index only, but probably
|
|
10
|
+
# should work with any Primo, may need some assumption tweaks.
|
|
11
|
+
#
|
|
12
|
+
# == Required Configuration
|
|
13
|
+
#
|
|
14
|
+
# [:host_port] your unique Primo's host/port combo, like "something.exlibrisgroup.com:1701".
|
|
15
|
+
# it's assumed we can talk to your primo at
|
|
16
|
+
# http://$host_port/PrimoWebServices/xservice/search/brief?
|
|
17
|
+
# [:institution] Primo requires an institution paramter.
|
|
18
|
+
# right now we have a hard-coded assumed 'institution' in
|
|
19
|
+
# config. Eg. "GWCC"
|
|
20
|
+
#
|
|
21
|
+
#
|
|
22
|
+
# == Other Primo-Specific Configuration
|
|
23
|
+
#
|
|
24
|
+
# [:loc] The primo 'loc' paramter, default "adaptor,primo_central_multiple_fe"
|
|
25
|
+
# for Primo Central Index searches.
|
|
26
|
+
# [:auth] Set to 'true' to assume local auth'd users if you're going to protect
|
|
27
|
+
# access. Default false. Alternately, you can pass in an
|
|
28
|
+
# :auth => true/false to 'search', which will override config.
|
|
29
|
+
# PC has limited access for non-auth users.
|
|
30
|
+
# [:lang] Primo lang query param. "Hints input languages to search engine for language recognition. "
|
|
31
|
+
# For now hardcoded into config, not settable per request.default 'eng'
|
|
32
|
+
# [:fixed_params] Extra url query params to add on to every search request.
|
|
33
|
+
# Can be used to hard-code certain limits, such as:
|
|
34
|
+
# {"query_exc" => ["facet_rtype,exact,books", "something_else"]}
|
|
35
|
+
# Note neither key nor values are uri encoded, we'll take
|
|
36
|
+
# care of that for you. value can be array or single string.
|
|
37
|
+
#
|
|
38
|
+
# == Vendor docs
|
|
39
|
+
#
|
|
40
|
+
# http://www.exlibrisgroup.org/display/PrimoOI/Brief+Search
|
|
41
|
+
|
|
42
|
+
class BentoSearch::PrimoEngine
|
|
43
|
+
include BentoSearch::SearchEngine
|
|
44
|
+
|
|
45
|
+
extend HTTPClientPatch::IncludeClient
|
|
46
|
+
include_http_client
|
|
47
|
+
|
|
48
|
+
def search_implementation(args)
|
|
49
|
+
url = construct_query(args)
|
|
50
|
+
|
|
51
|
+
response = http_client.get(url)
|
|
52
|
+
response_xml = Nokogiri::XML response.body
|
|
53
|
+
# namespaces really do nobody any good
|
|
54
|
+
response_xml.remove_namespaces!
|
|
55
|
+
|
|
56
|
+
results = BentoSearch::Results.new
|
|
57
|
+
|
|
58
|
+
results.total_items = response_xml.at_xpath("./SEGMENTS/JAGROOT/RESULT/DOCSET")["TOTALHITS"].to_i
|
|
59
|
+
|
|
60
|
+
response_xml.xpath("./SEGMENTS/JAGROOT/RESULT/DOCSET/DOC").each do |doc_xml|
|
|
61
|
+
item = BentoSearch::ResultItem.new
|
|
62
|
+
# Data in primo response is confusing in many different places in
|
|
63
|
+
# variant formats. We try to pick out the best to take things from,
|
|
64
|
+
# but we're guessing, it's under-documented.
|
|
65
|
+
|
|
66
|
+
item.title = text_at_xpath(doc_xml, "./PrimoNMBib/record/display/title")
|
|
67
|
+
item.abstract = text_at_xpath(doc_xml, "./PrimoNMBib/record/addata/abstract")
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
doc_xml.xpath("./PrimoNMBib/record/facets/creatorcontrib").each do |author_node|
|
|
71
|
+
item.authors << BentoSearch::Author.new(:display => author_node.text)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
item.journal_title = text_at_xpath(doc_xml, "./PrimoNMBib/record/addata/jtitle")
|
|
76
|
+
# check btitle for book chapters, the book they are in.
|
|
77
|
+
if item.journal_title.blank? && doc_xml.at_xpath("./PrimoNMBib/record/display/ispartof")
|
|
78
|
+
item.journal_title = text_at_xpath(doc_xml, "./PrimoNMBib/record/addata/btitle")
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
item.publisher = text_at_xpath doc_xml, "./PrimoNMBib/record/display/publisher"
|
|
82
|
+
item.volume = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/volume"
|
|
83
|
+
item.issue = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/issue"
|
|
84
|
+
item.start_page = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/spage"
|
|
85
|
+
item.end_page = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/epage"
|
|
86
|
+
item.doi = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/doi"
|
|
87
|
+
item.issn = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/issn"
|
|
88
|
+
item.isbn = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/isbn"
|
|
89
|
+
|
|
90
|
+
if (date = text_at_xpath doc_xml, "./PrimoNMBib/record/search/creationdate")
|
|
91
|
+
item.year = date[0,4] # first four chars
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
if fmt_str = text_at_xpath(doc_xml, "./PrimoNMBib/record/search/rsrctype")
|
|
95
|
+
# 'article', 'book_chapter'. abuse rails to turn into nice titlelized english.
|
|
96
|
+
item.format_str = fmt_str.titleize
|
|
97
|
+
|
|
98
|
+
item.format = map_format fmt_str
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
#TODO formats, highlighting
|
|
104
|
+
|
|
105
|
+
results << item
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
return results
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Try to map from primocentral's 'rsrctype' to our own internal
|
|
113
|
+
# taxonomy of formats
|
|
114
|
+
#
|
|
115
|
+
# Need docs on what the complete Primo vocabulary here is, we're
|
|
116
|
+
# just guessing from what we see.
|
|
117
|
+
def map_format(str)
|
|
118
|
+
case str
|
|
119
|
+
when "article", "newspaper_article", "review"
|
|
120
|
+
then "Article"
|
|
121
|
+
when "book" then "Book"
|
|
122
|
+
when "dissertation" then :dissertation
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Returns the text() at the xpath, if the xpath is non-nil
|
|
127
|
+
# and the text is non-blank
|
|
128
|
+
def text_at_xpath(xml, xpath)
|
|
129
|
+
node = xml.at_xpath(xpath)
|
|
130
|
+
return nil if node.nil?
|
|
131
|
+
text = node.text
|
|
132
|
+
return nil if node.blank?
|
|
133
|
+
return text
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
# From config or args, args over-ride config
|
|
139
|
+
def authenticated_end_user?(args)
|
|
140
|
+
config = configuration.auth ? true : false
|
|
141
|
+
arg = args[:auth]
|
|
142
|
+
if ! arg.nil?
|
|
143
|
+
arg ? true : false
|
|
144
|
+
elsif ! config.nil?
|
|
145
|
+
config ? true : false
|
|
146
|
+
else
|
|
147
|
+
false
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# Docs say we need to replace any commas with spaces
|
|
152
|
+
def prepared_query(str)
|
|
153
|
+
str.gsub(/\,/, ' ')
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def construct_query(args)
|
|
158
|
+
url = "http://#{configuration.host_port}/PrimoWebServices/xservice/search/brief"
|
|
159
|
+
url += "?institution=#{configuration.institution}"
|
|
160
|
+
url += "&loc=#{CGI.escape configuration.loc}"
|
|
161
|
+
|
|
162
|
+
url += "&lang=#{CGI.escape configuration.lang}"
|
|
163
|
+
|
|
164
|
+
url += "&bulkSize=#{args[:per_page]}" if args[:per_page]
|
|
165
|
+
# primo indx is 1-based record index, our :start is 0-based.
|
|
166
|
+
url += "&indx=#{args[:start] + 1}" if args[:start]
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
if (defn = self.sort_definitions[ args[:sort] ]) &&
|
|
171
|
+
(value = defn[:implementation])
|
|
172
|
+
|
|
173
|
+
url += "&sortField=#{CGI.escape value}"
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
url += "&onCampus=#{ authenticated_end_user?(args) ? 'true' : 'false'}"
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
field = args[:search_field].present? ? args[:search_field] : "any"
|
|
181
|
+
query = "#{field},contains,#{prepared_query args[:query]}"
|
|
182
|
+
|
|
183
|
+
url += "&query=#{CGI.escape query}"
|
|
184
|
+
|
|
185
|
+
configuration.fixed_params.each_pair do |key, value|
|
|
186
|
+
[value].flatten.each do |v|
|
|
187
|
+
url += "&#{CGI.escape key.to_s}=#{CGI.escape v.to_s}"
|
|
188
|
+
end
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
return url
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def search_field_definitions
|
|
197
|
+
# others are avail too, this is not exhaustive.
|
|
198
|
+
{
|
|
199
|
+
"creator" => {:semantic => :author},
|
|
200
|
+
"title" => {:semantic => :title},
|
|
201
|
+
"sub" => {:semantic => :subject},
|
|
202
|
+
"isbn" => {:semantic => :isbn},
|
|
203
|
+
"issn" => {:semantic => :issn}
|
|
204
|
+
}
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
def sort_definitions
|
|
208
|
+
{
|
|
209
|
+
"title_asc" => {:implementation => "stitle"},
|
|
210
|
+
"date_desc" => {:implementation => "scdate"},
|
|
211
|
+
"author_asc" => {:implementation => "screator"},
|
|
212
|
+
# As far as I can tell, what they call 'popularity'
|
|
213
|
+
# is really relevance, with popularity boosting.
|
|
214
|
+
"relevance" => {:implementation => "popularity"}
|
|
215
|
+
}
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def self.required_configuration
|
|
219
|
+
[:host_port, :institution]
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def self.default_configuration
|
|
223
|
+
{
|
|
224
|
+
:loc => 'adaptor,primo_central_multiple_fe',
|
|
225
|
+
# "eng" or "fre" or "ger" (Code for the representation of name of language conform to ISO-639)
|
|
226
|
+
:lang => "eng",
|
|
227
|
+
:fixed_params => {}
|
|
228
|
+
}
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
end
|
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
require 'cgi'
|
|
2
|
+
require 'nokogiri'
|
|
3
|
+
|
|
4
|
+
require 'http_client_patch/include_client'
|
|
5
|
+
require 'httpclient'
|
|
6
|
+
module BentoSearch
|
|
7
|
+
# Supports fielded searching, sorting, pagination.
|
|
8
|
+
#
|
|
9
|
+
# Required configuration:
|
|
10
|
+
# * api_key
|
|
11
|
+
#
|
|
12
|
+
# Defaults to 'relevance' sort, rather than scopus's default of date desc.
|
|
13
|
+
#
|
|
14
|
+
# Uses the Scopus SciVerse REST API. You need to be a Scopus customer
|
|
15
|
+
# to access. http://api.elsevier.com
|
|
16
|
+
# http://www.developers.elsevier.com/action/devprojects
|
|
17
|
+
#
|
|
18
|
+
# ToS: http://www.developers.elsevier.com/devcms/content-policies
|
|
19
|
+
# "Federated Search" use case.
|
|
20
|
+
# Also: http://www.developers.elsevier.com/cms/apiserviceagreement
|
|
21
|
+
#
|
|
22
|
+
# Note that ToS applying to you probably means you must restrict access
|
|
23
|
+
# to search functionality to authenticated affiliated users only.
|
|
24
|
+
#
|
|
25
|
+
# Register for an API key at "Register New Site" at http://developers.elsevier.com/action/devnewsite
|
|
26
|
+
# You will then need to get server IP addresses registered with Scopus too,
|
|
27
|
+
# apparently by emailing directly to dave.santucci at elsevier dot com.
|
|
28
|
+
#
|
|
29
|
+
# Scopus API Docs:
|
|
30
|
+
# * http://www.developers.elsevier.com/devcms/content-api-search-request
|
|
31
|
+
# * http://www.developers.elsevier.com/devcms/content/search-fields-overview
|
|
32
|
+
#
|
|
33
|
+
# Some more docs on response elements and query elements:
|
|
34
|
+
# * http://api.elsevier.com/content/search/#d0n14606
|
|
35
|
+
#
|
|
36
|
+
# Other API's in the suite not being used by this code at present:
|
|
37
|
+
# * http://www.developers.elsevier.com/devcms/content-api-retrieval-request
|
|
38
|
+
# * http://www.developers.elsevier.com/devcms/content-api-metadata-request
|
|
39
|
+
#
|
|
40
|
+
# Support: Integration@scopus.com
|
|
41
|
+
#
|
|
42
|
+
# TODO: Mention to Scopus: Only one author?
|
|
43
|
+
# Paging of 50 gets an error, but docs say I should be able to request 200. q
|
|
44
|
+
#
|
|
45
|
+
class ScopusEngine
|
|
46
|
+
include BentoSearch::SearchEngine
|
|
47
|
+
|
|
48
|
+
extend HTTPClientPatch::IncludeClient
|
|
49
|
+
include_http_client
|
|
50
|
+
|
|
51
|
+
def search_implementation(args)
|
|
52
|
+
results = Results.new
|
|
53
|
+
|
|
54
|
+
xml, response, exception = nil, nil, nil
|
|
55
|
+
|
|
56
|
+
url = scopus_url(args)
|
|
57
|
+
|
|
58
|
+
begin
|
|
59
|
+
response = http_client.get( url , nil,
|
|
60
|
+
# HTTP headers.
|
|
61
|
+
{"X-ELS-APIKey" => configuration.api_key,
|
|
62
|
+
"X-ELS-ResourceVersion" => "XOCS",
|
|
63
|
+
"Accept" => "application/atom+xml"}
|
|
64
|
+
)
|
|
65
|
+
xml = Nokogiri::XML(response.body)
|
|
66
|
+
rescue TimeoutError, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError => e
|
|
67
|
+
exception = e
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
# handle errors
|
|
72
|
+
if (response.nil? || xml.nil? || exception ||
|
|
73
|
+
(! HTTP::Status.successful? response.status) ||
|
|
74
|
+
xml.at_xpath("service-error")
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
# UGH. Scopus reports 0 hits as an error, not entirely distinguishable
|
|
78
|
+
# from an actual error. Oh well, we have to go with it.
|
|
79
|
+
if (
|
|
80
|
+
(response.status == 400) &&
|
|
81
|
+
xml &&
|
|
82
|
+
(error_xml = xml.at_xpath("./service-error/status")) &&
|
|
83
|
+
(node_text(error_xml.at_xpath("./statusCode")) == "INVALID_INPUT") &&
|
|
84
|
+
(node_text(error_xml.at_xpath("./statusText")) == "Result set was empty or Start value beyond result set")
|
|
85
|
+
)
|
|
86
|
+
# PROBABLY 0 hit count, although could be something else I'm afraid.
|
|
87
|
+
results.total_items = 0
|
|
88
|
+
return results
|
|
89
|
+
else
|
|
90
|
+
# real error
|
|
91
|
+
results.error ||= {}
|
|
92
|
+
results.error[:exception] = e
|
|
93
|
+
results.error[:status] = response.status if response
|
|
94
|
+
# keep from storing the entire possibly huge response as error
|
|
95
|
+
# but sometimes it's an error message.
|
|
96
|
+
results.error[:error_info] = xml.at_xpath("service_error") if xml
|
|
97
|
+
return results
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
results.total_items = (node_text xml.at_xpath("//opensearch:totalResults", xml_ns)).to_i
|
|
102
|
+
|
|
103
|
+
xml.xpath("//atom:entry", xml_ns).each do | entry |
|
|
104
|
+
|
|
105
|
+
results << (item = ResultItem.new)
|
|
106
|
+
if scopus_link = entry.at_xpath("atom:link[@ref='scopus']", xml_ns)
|
|
107
|
+
item.link = scopus_link["href"]
|
|
108
|
+
end
|
|
109
|
+
item.title = node_text entry.at_xpath("dc:title", xml_ns)
|
|
110
|
+
item.journal_title = node_text entry.at_xpath("prism:publicationName", xml_ns)
|
|
111
|
+
item.issn = node_text entry.at_xpath("prism:issn", xml_ns)
|
|
112
|
+
item.volume = node_text entry.at_xpath("prism:volume", xml_ns)
|
|
113
|
+
item.issue = node_text entry.at_xpath("prism:issueIdentifier", xml_ns)
|
|
114
|
+
item.doi = node_text entry.at_xpath("prism:doi", xml_ns)
|
|
115
|
+
|
|
116
|
+
# pages might be in startingPage/endingPage OR in pageRange
|
|
117
|
+
if (start = entry.at_xpath("prism:startingPage", xml_ns))
|
|
118
|
+
item.start_page = start.text.to_i
|
|
119
|
+
if ( epage = entry.at_xpath("prism:endingPage", xml_ns))
|
|
120
|
+
item.end_page = epage.text.to_i
|
|
121
|
+
end
|
|
122
|
+
elsif (range = entry.at_xpath("prism:pageRange", xml_ns))
|
|
123
|
+
(spage, epage) = *range.text().split("-")
|
|
124
|
+
item.start_page = spage
|
|
125
|
+
item.end_page = epage
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# get the year out of the date
|
|
129
|
+
if date = entry.at_xpath("prism:coverDate", xml_ns)
|
|
130
|
+
date.text =~ /^(\d\d\d\d)/
|
|
131
|
+
item.year = $1.to_i if $1
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# Authors might be in atom:authors seperated by |, or just
|
|
135
|
+
# a single one in dc:creator
|
|
136
|
+
if (authors = entry.at_xpath("atom:authors", xml_ns))
|
|
137
|
+
authors.text.split("|").each do |author|
|
|
138
|
+
item.authors << Author.new(:display => author.strip)
|
|
139
|
+
end
|
|
140
|
+
elsif (author = entry.at_xpath("dc:creator", xml_ns))
|
|
141
|
+
item.authors << Author.new(:display => author.text.strip)
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# Format we're still trying to figure out how Scopus API
|
|
145
|
+
# delivers it. Here is at at least one way.
|
|
146
|
+
if (doctype = entry.at_xpath("atom:subtype", xml_ns))
|
|
147
|
+
item.format = doctype_to_format(doctype.text)
|
|
148
|
+
item.format_str = doctype_to_string(doctype.text)
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
return results
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# The escaping rules are not entirely clear for the API. We know colons
|
|
157
|
+
# and parens are special chars. It's unclear how or if we can escape them,
|
|
158
|
+
# we'll just remove them.
|
|
159
|
+
def escape_query(query)
|
|
160
|
+
# backslash escape doesn't seem to work
|
|
161
|
+
#query.gsub(/([\\\(\)\:])/) do |match|
|
|
162
|
+
# "\\#{$1}"
|
|
163
|
+
#end
|
|
164
|
+
query.gsub(/([\\\(\)\:])/, ' ')
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def self.required_configuration
|
|
169
|
+
["api_key"]
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def self.default_configuration
|
|
173
|
+
{
|
|
174
|
+
:base_url => "http://api.elsevier.com/",
|
|
175
|
+
:cluster => "SCOPUS"
|
|
176
|
+
}
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
# Max per-page is 200, as per http://www.developers.elsevier.com/devcms/content-apis, bottom of page.
|
|
180
|
+
def max_per_page
|
|
181
|
+
200
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def search_field_definitions
|
|
185
|
+
{
|
|
186
|
+
"AUTH" => {:semantic => :author},
|
|
187
|
+
"TITLE" => {:semantic => :title},
|
|
188
|
+
# controlled and author-assigned keywords
|
|
189
|
+
"KEY" => {:semantic => :subject},
|
|
190
|
+
"ISBN" => {:semantic => :isbn},
|
|
191
|
+
"ISSN" => {:semantic => :issn},
|
|
192
|
+
}
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
def sort_definitions
|
|
196
|
+
# scopus &sort= values, not yet URI-escaped, later code will do that.
|
|
197
|
+
#
|
|
198
|
+
# 'refeid' key is currently undocumented on Scopus site, but
|
|
199
|
+
# was given to me in email by scopus.
|
|
200
|
+
{
|
|
201
|
+
"title_asc" => {:implementation => "+itemtitle"},
|
|
202
|
+
"date_desc" => {:implementation => "-datesort,+auth"},
|
|
203
|
+
"relevance" => {:implementation => "refeid" },
|
|
204
|
+
"author_asc" => {:implementation => "+auth"},
|
|
205
|
+
"num_cite_desc" => {:implementation => "-numcitedby"}
|
|
206
|
+
}
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
protected
|
|
211
|
+
|
|
212
|
+
# returns nil if passed in nil, otherwise
|
|
213
|
+
# returns nokogiri text()
|
|
214
|
+
def node_text(node)
|
|
215
|
+
return nil if node.nil?
|
|
216
|
+
|
|
217
|
+
return node.text()
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
def xml_ns
|
|
221
|
+
{"opensearch" => "http://a9.com/-/spec/opensearch/1.1/",
|
|
222
|
+
"prism" => "http://prismstandard.org/namespaces/basic/2.0/",
|
|
223
|
+
"dc" => "http://purl.org/dc/elements/1.1/",
|
|
224
|
+
"atom" => "http://www.w3.org/2005/Atom"}
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
# Maps from Scopus "doctype" as listed at http://www.developers.elsevier.com/devcms/content/search-fields-overview
|
|
228
|
+
# and delivered in the XML response as atom:subtype.
|
|
229
|
+
# Maps to our own internal formats as documented in ResultItem#format
|
|
230
|
+
# Returns nil if can't map.
|
|
231
|
+
def doctype_to_format(doctype)
|
|
232
|
+
{ "ar" => "Article",
|
|
233
|
+
"ip" => "Article",
|
|
234
|
+
"bk" => "Book",
|
|
235
|
+
"bz" => "Article",
|
|
236
|
+
"re" => "Article", # most of what scopus labels 'Report' seem to be ordinary articles.
|
|
237
|
+
"cp" => :conference_paper,
|
|
238
|
+
"re" => "Article", # really 'report', but Scopus is unreliable here, most of these are actually articles.
|
|
239
|
+
"sh" => "Article", # 'short survey' to scopus, but seems to be used for articles.
|
|
240
|
+
"ip" => "Article", # 'article in press'.
|
|
241
|
+
'ed' => "Article", # Editorial
|
|
242
|
+
'le' => "Article", # Letter
|
|
243
|
+
'no' => "Article", # Note
|
|
244
|
+
}[doctype.to_s]
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
# Maps Scopus doctype to human readable strings as documented by Scopus,
|
|
248
|
+
# does not map 1-1 to our controlled format.
|
|
249
|
+
def doctype_to_string(doctype)
|
|
250
|
+
{ "ar" => "Article",
|
|
251
|
+
"ab" => "Abstract Report",
|
|
252
|
+
"ip" => "Article in Press",
|
|
253
|
+
"bk" => "Book",
|
|
254
|
+
"bz" => "Business Article",
|
|
255
|
+
"cp" => "Conference Paper",
|
|
256
|
+
"cr" => "Conference Review",
|
|
257
|
+
"ed" => "Editorial",
|
|
258
|
+
"er" => "Erratum",
|
|
259
|
+
"le" => "Letter",
|
|
260
|
+
"no" => "Note",
|
|
261
|
+
"pr" => "Press Release",
|
|
262
|
+
"re" => "Article", # Really 'report', but Scopus is unreliable here, most of these are actually articles.
|
|
263
|
+
"sh" => "Article" # Really 'short survey' to Scopus, but seems to be used for, well, articles.
|
|
264
|
+
}[doctype.to_s]
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def scopus_url(args)
|
|
271
|
+
query = escape_query args[:query]
|
|
272
|
+
|
|
273
|
+
if args[:search_field]
|
|
274
|
+
query = "#{args[:search_field]}(#{query})"
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
query = "#{configuration.base_url.chomp("/")}/content/search/index:#{configuration.cluster}?query=#{CGI.escape(query)}"
|
|
278
|
+
|
|
279
|
+
query += "&count=#{args[:per_page]}" if args[:per_page]
|
|
280
|
+
|
|
281
|
+
query += "&start=#{args[:start]}" if args[:start]
|
|
282
|
+
|
|
283
|
+
# default to 'relevance' sort if not given, rather than scopus's
|
|
284
|
+
# default of date desc.
|
|
285
|
+
args[:sort] ||= "relevance"
|
|
286
|
+
if (defn = self.sort_definitions[args[:sort]]) &&
|
|
287
|
+
( value = defn[:implementation])
|
|
288
|
+
query += "&sort=#{CGI.escape(value)}"
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
return query
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
end
|
|
295
|
+
end
|