bento_search 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (122) hide show
  1. data/MIT-LICENSE +20 -0
  2. data/README.md +299 -0
  3. data/Rakefile +40 -0
  4. data/app/assets/images/bento_search/large_loader.gif +0 -0
  5. data/app/assets/javascripts/bento_search.js +3 -0
  6. data/app/assets/javascripts/bento_search/ajax_load.js +22 -0
  7. data/app/assets/stylesheets/bento_search/bento.css +4 -0
  8. data/app/controllers/bento_search/bento_search_controller.rb +7 -0
  9. data/app/controllers/bento_search/search_controller.rb +72 -0
  10. data/app/helpers/bento_search_helper.rb +138 -0
  11. data/app/item_decorators/bento_search/only_premade_openurl.rb +16 -0
  12. data/app/item_decorators/bento_search/openurl_add_other_link.rb +35 -0
  13. data/app/item_decorators/bento_search/openurl_main_link.rb +30 -0
  14. data/app/models/bento_search/author.rb +25 -0
  15. data/app/models/bento_search/link.rb +30 -0
  16. data/app/models/bento_search/multi_searcher.rb +109 -0
  17. data/app/models/bento_search/openurl_creator.rb +128 -0
  18. data/app/models/bento_search/registrar.rb +70 -0
  19. data/app/models/bento_search/result_item.rb +203 -0
  20. data/app/models/bento_search/results.rb +54 -0
  21. data/app/models/bento_search/results/pagination.rb +67 -0
  22. data/app/models/bento_search/search_engine.rb +219 -0
  23. data/app/models/bento_search/search_engine/capabilities.rb +65 -0
  24. data/app/search_engines/bento_search/#Untitled-1# +11 -0
  25. data/app/search_engines/bento_search/ebsco_host_engine.rb +356 -0
  26. data/app/search_engines/bento_search/eds_engine.rb +557 -0
  27. data/app/search_engines/bento_search/google_books_engine.rb +184 -0
  28. data/app/search_engines/bento_search/primo_engine.rb +231 -0
  29. data/app/search_engines/bento_search/scopus_engine.rb +295 -0
  30. data/app/search_engines/bento_search/summon_engine.rb +398 -0
  31. data/app/search_engines/bento_search/xerxes_engine.rb +168 -0
  32. data/app/views/bento_search/_link.html.erb +4 -0
  33. data/app/views/bento_search/_search_error.html.erb +22 -0
  34. data/app/views/bento_search/_std_item.html.erb +39 -0
  35. data/app/views/bento_search/search/search.html.erb +1 -0
  36. data/config/locales/en.yml +25 -0
  37. data/lib/bento_search.rb +29 -0
  38. data/lib/bento_search/engine.rb +5 -0
  39. data/lib/bento_search/routes.rb +45 -0
  40. data/lib/bento_search/version.rb +3 -0
  41. data/lib/generators/bento_search/pull_ebsco_dbs_generator.rb +24 -0
  42. data/lib/generators/bento_search/templates/ebsco_global_var.erb +6 -0
  43. data/lib/http_client_patch/include_client.rb +86 -0
  44. data/lib/tasks/bento_search_tasks.rake +4 -0
  45. data/test/dummy/README.rdoc +261 -0
  46. data/test/dummy/Rakefile +7 -0
  47. data/test/dummy/app/assets/javascripts/application.js +15 -0
  48. data/test/dummy/app/assets/stylesheets/application.css +13 -0
  49. data/test/dummy/app/controllers/application_controller.rb +3 -0
  50. data/test/dummy/app/helpers/application_helper.rb +2 -0
  51. data/test/dummy/app/views/layouts/application.html.erb +14 -0
  52. data/test/dummy/config.ru +4 -0
  53. data/test/dummy/config/application.rb +56 -0
  54. data/test/dummy/config/boot.rb +10 -0
  55. data/test/dummy/config/database.yml +25 -0
  56. data/test/dummy/config/environment.rb +5 -0
  57. data/test/dummy/config/environments/development.rb +37 -0
  58. data/test/dummy/config/environments/production.rb +67 -0
  59. data/test/dummy/config/environments/test.rb +37 -0
  60. data/test/dummy/config/initializers/backtrace_silencers.rb +7 -0
  61. data/test/dummy/config/initializers/inflections.rb +15 -0
  62. data/test/dummy/config/initializers/mime_types.rb +5 -0
  63. data/test/dummy/config/initializers/secret_token.rb +7 -0
  64. data/test/dummy/config/initializers/session_store.rb +8 -0
  65. data/test/dummy/config/initializers/wrap_parameters.rb +14 -0
  66. data/test/dummy/config/locales/en.yml +5 -0
  67. data/test/dummy/config/routes.rb +6 -0
  68. data/test/dummy/db/test.sqlite3 +0 -0
  69. data/test/dummy/log/test.log +3100 -0
  70. data/test/dummy/public/404.html +26 -0
  71. data/test/dummy/public/422.html +26 -0
  72. data/test/dummy/public/500.html +25 -0
  73. data/test/dummy/public/favicon.ico +0 -0
  74. data/test/dummy/script/rails +6 -0
  75. data/test/functional/bento_search/search_controller_test.rb +81 -0
  76. data/test/helper/bento_search_helper_test.rb +125 -0
  77. data/test/integration/navigation_test.rb +10 -0
  78. data/test/support/mock_engine.rb +23 -0
  79. data/test/support/test_with_cassette.rb +38 -0
  80. data/test/test_helper.rb +52 -0
  81. data/test/unit/#vcr_test.rb# +68 -0
  82. data/test/unit/ebsco_host_engine_test.rb +134 -0
  83. data/test/unit/eds_engine_test.rb +105 -0
  84. data/test/unit/google_books_engine_test.rb +93 -0
  85. data/test/unit/item_decorators_test.rb +66 -0
  86. data/test/unit/multi_searcher_test.rb +49 -0
  87. data/test/unit/openurl_creator_test.rb +111 -0
  88. data/test/unit/pagination_test.rb +59 -0
  89. data/test/unit/primo_engine_test.rb +37 -0
  90. data/test/unit/register_engine_test.rb +50 -0
  91. data/test/unit/result_item_display_test.rb +39 -0
  92. data/test/unit/result_item_test.rb +36 -0
  93. data/test/unit/scopus_engine_test.rb +130 -0
  94. data/test/unit/search_engine_base_test.rb +178 -0
  95. data/test/unit/search_engine_test.rb +95 -0
  96. data/test/unit/summon_engine_test.rb +161 -0
  97. data/test/unit/xerxes_engine_test.rb +70 -0
  98. data/test/vcr_cassettes/ebscohost/error_bad_db.yml +45 -0
  99. data/test/vcr_cassettes/ebscohost/error_bad_password.yml +45 -0
  100. data/test/vcr_cassettes/ebscohost/get_info.yml +3626 -0
  101. data/test/vcr_cassettes/ebscohost/live_search.yml +45 -0
  102. data/test/vcr_cassettes/ebscohost/live_search_smoke_test.yml +1311 -0
  103. data/test/vcr_cassettes/eds/basic_search_smoke_test.yml +1811 -0
  104. data/test/vcr_cassettes/eds/get_auth_token.yml +75 -0
  105. data/test/vcr_cassettes/eds/get_auth_token_failure.yml +39 -0
  106. data/test/vcr_cassettes/eds/get_with_auth.yml +243 -0
  107. data/test/vcr_cassettes/eds/get_with_auth_recovers_from_bad_auth.yml +368 -0
  108. data/test/vcr_cassettes/gbs/error_condition.yml +40 -0
  109. data/test/vcr_cassettes/gbs/pagination.yml +702 -0
  110. data/test/vcr_cassettes/gbs/search.yml +340 -0
  111. data/test/vcr_cassettes/primo/search_smoke_test.yml +1112 -0
  112. data/test/vcr_cassettes/scopus/bad_api_key_should_return_error_response.yml +60 -0
  113. data/test/vcr_cassettes/scopus/escaped_chars.yml +187 -0
  114. data/test/vcr_cassettes/scopus/fielded_search.yml +176 -0
  115. data/test/vcr_cassettes/scopus/simple_search.yml +227 -0
  116. data/test/vcr_cassettes/scopus/zero_results_search.yml +67 -0
  117. data/test/vcr_cassettes/summon/bad_auth.yml +54 -0
  118. data/test/vcr_cassettes/summon/proper_tags_for_snippets.yml +216 -0
  119. data/test/vcr_cassettes/summon/search.yml +242 -0
  120. data/test/vcr_cassettes/xerxes/live_search.yml +2580 -0
  121. data/test/view/std_item_test.rb +98 -0
  122. metadata +421 -0
@@ -0,0 +1,184 @@
1
+ require 'httpclient'
2
+ require 'cgi'
3
+ require 'multi_json'
4
+
5
+ # not sure why we need to require the entire 'helpers'
6
+ # when all we want is sanitize_helper, but I think we do:
7
+ require 'action_view/helpers'
8
+ #require 'action_view/helpers/sanitize_helper'
9
+
10
+ require 'http_client_patch/include_client'
11
+
12
+ module BentoSearch
13
+ #
14
+ # https://developers.google.com/books/docs/v1/using
15
+ # https://developers.google.com/books/docs/v1/reference/volumes#resource
16
+ #
17
+ # Configuration :api_key STRONGLY recommended, or google will severely
18
+ # rate-limit you.
19
+ class GoogleBooksEngine
20
+ include BentoSearch::SearchEngine
21
+ include ActionView::Helpers::SanitizeHelper
22
+
23
+ extend HTTPClientPatch::IncludeClient
24
+ include_http_client # gives us a #http_client with persistent class-level
25
+
26
+ class_attribute :base_url
27
+ self.base_url = "https://www.googleapis.com/books/v1/"
28
+
29
+
30
+ def search_implementation(arguments)
31
+ query_url = args_to_search_url(arguments)
32
+
33
+ results = Results.new
34
+
35
+ begin
36
+ response = http_client.get(query_url )
37
+ json = MultiJson.load( response.body )
38
+ # Can't rescue everything, or we catch VCR errors, making
39
+ # things confusing.
40
+ rescue TimeoutError, HTTPClient::TimeoutError,
41
+ HTTPClient::ConfigurationError, HTTPClient::BadResponseError => e
42
+ results.error ||= {}
43
+ results.error[:exception] = e
44
+ end
45
+
46
+ # Trap json parse error, but also check for bad http
47
+ # status, or error reported in the json. In any of those cases
48
+ # return results obj with error status.
49
+ #
50
+ if ( response.nil? || json.nil? ||
51
+ (! HTTP::Status.successful? response.status) ||
52
+ (json && json["error"]))
53
+
54
+ results.error ||= {}
55
+ results.error[:status] = response.status if response
56
+ if json && json["error"] && json["error"]["errors"] && json["error"]["errors"].kind_of?(Array)
57
+ results.error[:message] = json["error"]["errors"].first.values.join(", ")
58
+ end
59
+ results.error[:error_info] = json["error"] if json && json.respond_to?("[]")
60
+
61
+ # escape early!
62
+ return results
63
+ end
64
+
65
+
66
+ results.total_items = json["totalItems"]
67
+
68
+
69
+ json["items"].each do |j_item|
70
+ j_item = j_item["volumeInfo"] if j_item["volumeInfo"]
71
+
72
+ item = ResultItem.new
73
+ results << item
74
+
75
+ item.title = j_item["title"]
76
+ item.subtitle = j_item["subtitle"]
77
+ item.publisher = j_item["publisher"]
78
+ item.link = j_item["canonicalVolumeLink"]
79
+ item.abstract = sanitize j_item["description"]
80
+ item.year = get_year j_item["publishedDate"]
81
+ item.format = if j_item["printType"] == "MAGAZINE"
82
+ :serial
83
+ else
84
+ "Book"
85
+ end
86
+
87
+ (j_item["authors"] || []).each do |author_name|
88
+ item.authors << Author.new(:display => author_name)
89
+ end
90
+ end
91
+
92
+
93
+ return results
94
+ end
95
+
96
+
97
+
98
+
99
+ ###########
100
+ # BentoBox::SearchEngine API
101
+ ###########
102
+
103
+ def max_per_page
104
+ 100
105
+ end
106
+
107
+ def search_field_definitions
108
+ { "intitle" => {:semantic => :title},
109
+ "inauthor" => {:semantic => :author},
110
+ "inpublisher" => {:semantic => :publisher},
111
+ "subject" => {:semantic => :subject},
112
+ "isbn" => {:semantic => :isbn}
113
+ }
114
+ end
115
+
116
+ def sort_definitions
117
+ {
118
+ "relevance" => {:implementation => nil}, # default
119
+ "date_desc" => {:implementation => "newest"}
120
+ }
121
+ end
122
+
123
+ protected
124
+
125
+
126
+ #############
127
+ # Our own implementation code
128
+ ##############
129
+
130
+
131
+ # takes a normalized #search arguments hash from SearchEngine
132
+ # turns it into a URL for Google API. Factored out to make testing
133
+ # possible.
134
+ def args_to_search_url(arguments)
135
+ query = if arguments[:search_field]
136
+ fielded_query(arguments[:query], arguments[:search_field])
137
+ else
138
+ arguments[:query]
139
+ end
140
+
141
+ query_url = base_url + "volumes?q=#{CGI.escape query}"
142
+ if configuration.api_key
143
+ query_url += "&key=#{configuration.api_key}"
144
+ end
145
+
146
+ if arguments[:per_page]
147
+ query_url += "&maxResults=#{arguments[:per_page]}"
148
+ end
149
+ if arguments[:start]
150
+ query_url += "&startIndex=#{arguments[:start]}"
151
+ end
152
+
153
+ if arguments[:sort] &&
154
+ (defn = sort_definitions[arguments[:sort]]) &&
155
+ (value = defn[:implementation])
156
+ query_url += "&sort=#{CGI.escape(value)}"
157
+ end
158
+
159
+
160
+ return query_url
161
+ end
162
+
163
+
164
+ # If they ask for a <one two> :intitle, we're
165
+ # actually gonna do like google's own form does,
166
+ # and change it to <intitle:one intitle:two>. Internal
167
+ # phrases will be respected.
168
+ def fielded_query(query, field)
169
+ tokens = query.split(%r{\s|("[^"]+")}).delete_if {|a| a.blank?}
170
+ return tokens.collect {|token| "#{field}:#{token}"}.join(" ")
171
+ end
172
+
173
+
174
+ def get_year(iso8601)
175
+ return nil if iso8601.blank?
176
+
177
+ if iso8601 =~ /^(\d{4})/
178
+ return $1.to_i
179
+ end
180
+ return nil
181
+ end
182
+
183
+ end
184
+ end
@@ -0,0 +1,231 @@
1
+ require 'cgi'
2
+ require 'nokogiri'
3
+
4
+ require 'http_client_patch/include_client'
5
+ require 'httpclient'
6
+
7
+ # ExLibris Primo Central.
8
+ #
9
+ # written/tested with PrimoCentral aggregated index only, but probably
10
+ # should work with any Primo, may need some assumption tweaks.
11
+ #
12
+ # == Required Configuration
13
+ #
14
+ # [:host_port] your unique Primo's host/port combo, like "something.exlibrisgroup.com:1701".
15
+ # it's assumed we can talk to your primo at
16
+ # http://$host_port/PrimoWebServices/xservice/search/brief?
17
+ # [:institution] Primo requires an institution paramter.
18
+ # right now we have a hard-coded assumed 'institution' in
19
+ # config. Eg. "GWCC"
20
+ #
21
+ #
22
+ # == Other Primo-Specific Configuration
23
+ #
24
+ # [:loc] The primo 'loc' paramter, default "adaptor,primo_central_multiple_fe"
25
+ # for Primo Central Index searches.
26
+ # [:auth] Set to 'true' to assume local auth'd users if you're going to protect
27
+ # access. Default false. Alternately, you can pass in an
28
+ # :auth => true/false to 'search', which will override config.
29
+ # PC has limited access for non-auth users.
30
+ # [:lang] Primo lang query param. "Hints input languages to search engine for language recognition. "
31
+ # For now hardcoded into config, not settable per request.default 'eng'
32
+ # [:fixed_params] Extra url query params to add on to every search request.
33
+ # Can be used to hard-code certain limits, such as:
34
+ # {"query_exc" => ["facet_rtype,exact,books", "something_else"]}
35
+ # Note neither key nor values are uri encoded, we'll take
36
+ # care of that for you. value can be array or single string.
37
+ #
38
+ # == Vendor docs
39
+ #
40
+ # http://www.exlibrisgroup.org/display/PrimoOI/Brief+Search
41
+
42
+ class BentoSearch::PrimoEngine
43
+ include BentoSearch::SearchEngine
44
+
45
+ extend HTTPClientPatch::IncludeClient
46
+ include_http_client
47
+
48
+ def search_implementation(args)
49
+ url = construct_query(args)
50
+
51
+ response = http_client.get(url)
52
+ response_xml = Nokogiri::XML response.body
53
+ # namespaces really do nobody any good
54
+ response_xml.remove_namespaces!
55
+
56
+ results = BentoSearch::Results.new
57
+
58
+ results.total_items = response_xml.at_xpath("./SEGMENTS/JAGROOT/RESULT/DOCSET")["TOTALHITS"].to_i
59
+
60
+ response_xml.xpath("./SEGMENTS/JAGROOT/RESULT/DOCSET/DOC").each do |doc_xml|
61
+ item = BentoSearch::ResultItem.new
62
+ # Data in primo response is confusing in many different places in
63
+ # variant formats. We try to pick out the best to take things from,
64
+ # but we're guessing, it's under-documented.
65
+
66
+ item.title = text_at_xpath(doc_xml, "./PrimoNMBib/record/display/title")
67
+ item.abstract = text_at_xpath(doc_xml, "./PrimoNMBib/record/addata/abstract")
68
+
69
+
70
+ doc_xml.xpath("./PrimoNMBib/record/facets/creatorcontrib").each do |author_node|
71
+ item.authors << BentoSearch::Author.new(:display => author_node.text)
72
+ end
73
+
74
+
75
+ item.journal_title = text_at_xpath(doc_xml, "./PrimoNMBib/record/addata/jtitle")
76
+ # check btitle for book chapters, the book they are in.
77
+ if item.journal_title.blank? && doc_xml.at_xpath("./PrimoNMBib/record/display/ispartof")
78
+ item.journal_title = text_at_xpath(doc_xml, "./PrimoNMBib/record/addata/btitle")
79
+ end
80
+
81
+ item.publisher = text_at_xpath doc_xml, "./PrimoNMBib/record/display/publisher"
82
+ item.volume = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/volume"
83
+ item.issue = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/issue"
84
+ item.start_page = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/spage"
85
+ item.end_page = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/epage"
86
+ item.doi = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/doi"
87
+ item.issn = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/issn"
88
+ item.isbn = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/isbn"
89
+
90
+ if (date = text_at_xpath doc_xml, "./PrimoNMBib/record/search/creationdate")
91
+ item.year = date[0,4] # first four chars
92
+ end
93
+
94
+ if fmt_str = text_at_xpath(doc_xml, "./PrimoNMBib/record/search/rsrctype")
95
+ # 'article', 'book_chapter'. abuse rails to turn into nice titlelized english.
96
+ item.format_str = fmt_str.titleize
97
+
98
+ item.format = map_format fmt_str
99
+ end
100
+
101
+
102
+
103
+ #TODO formats, highlighting
104
+
105
+ results << item
106
+ end
107
+
108
+
109
+ return results
110
+ end
111
+
112
+ # Try to map from primocentral's 'rsrctype' to our own internal
113
+ # taxonomy of formats
114
+ #
115
+ # Need docs on what the complete Primo vocabulary here is, we're
116
+ # just guessing from what we see.
117
+ def map_format(str)
118
+ case str
119
+ when "article", "newspaper_article", "review"
120
+ then "Article"
121
+ when "book" then "Book"
122
+ when "dissertation" then :dissertation
123
+ end
124
+ end
125
+
126
+ # Returns the text() at the xpath, if the xpath is non-nil
127
+ # and the text is non-blank
128
+ def text_at_xpath(xml, xpath)
129
+ node = xml.at_xpath(xpath)
130
+ return nil if node.nil?
131
+ text = node.text
132
+ return nil if node.blank?
133
+ return text
134
+ end
135
+
136
+
137
+
138
+ # From config or args, args over-ride config
139
+ def authenticated_end_user?(args)
140
+ config = configuration.auth ? true : false
141
+ arg = args[:auth]
142
+ if ! arg.nil?
143
+ arg ? true : false
144
+ elsif ! config.nil?
145
+ config ? true : false
146
+ else
147
+ false
148
+ end
149
+ end
150
+
151
+ # Docs say we need to replace any commas with spaces
152
+ def prepared_query(str)
153
+ str.gsub(/\,/, ' ')
154
+ end
155
+
156
+
157
+ def construct_query(args)
158
+ url = "http://#{configuration.host_port}/PrimoWebServices/xservice/search/brief"
159
+ url += "?institution=#{configuration.institution}"
160
+ url += "&loc=#{CGI.escape configuration.loc}"
161
+
162
+ url += "&lang=#{CGI.escape configuration.lang}"
163
+
164
+ url += "&bulkSize=#{args[:per_page]}" if args[:per_page]
165
+ # primo indx is 1-based record index, our :start is 0-based.
166
+ url += "&indx=#{args[:start] + 1}" if args[:start]
167
+
168
+
169
+
170
+ if (defn = self.sort_definitions[ args[:sort] ]) &&
171
+ (value = defn[:implementation])
172
+
173
+ url += "&sortField=#{CGI.escape value}"
174
+ end
175
+
176
+
177
+ url += "&onCampus=#{ authenticated_end_user?(args) ? 'true' : 'false'}"
178
+
179
+
180
+ field = args[:search_field].present? ? args[:search_field] : "any"
181
+ query = "#{field},contains,#{prepared_query args[:query]}"
182
+
183
+ url += "&query=#{CGI.escape query}"
184
+
185
+ configuration.fixed_params.each_pair do |key, value|
186
+ [value].flatten.each do |v|
187
+ url += "&#{CGI.escape key.to_s}=#{CGI.escape v.to_s}"
188
+ end
189
+ end
190
+
191
+
192
+ return url
193
+ end
194
+
195
+
196
+ def search_field_definitions
197
+ # others are avail too, this is not exhaustive.
198
+ {
199
+ "creator" => {:semantic => :author},
200
+ "title" => {:semantic => :title},
201
+ "sub" => {:semantic => :subject},
202
+ "isbn" => {:semantic => :isbn},
203
+ "issn" => {:semantic => :issn}
204
+ }
205
+ end
206
+
207
+ def sort_definitions
208
+ {
209
+ "title_asc" => {:implementation => "stitle"},
210
+ "date_desc" => {:implementation => "scdate"},
211
+ "author_asc" => {:implementation => "screator"},
212
+ # As far as I can tell, what they call 'popularity'
213
+ # is really relevance, with popularity boosting.
214
+ "relevance" => {:implementation => "popularity"}
215
+ }
216
+ end
217
+
218
+ def self.required_configuration
219
+ [:host_port, :institution]
220
+ end
221
+
222
+ def self.default_configuration
223
+ {
224
+ :loc => 'adaptor,primo_central_multiple_fe',
225
+ # "eng" or "fre" or "ger" (Code for the representation of name of language conform to ISO-639)
226
+ :lang => "eng",
227
+ :fixed_params => {}
228
+ }
229
+ end
230
+
231
+ end
@@ -0,0 +1,295 @@
1
+ require 'cgi'
2
+ require 'nokogiri'
3
+
4
+ require 'http_client_patch/include_client'
5
+ require 'httpclient'
6
+ module BentoSearch
7
+ # Supports fielded searching, sorting, pagination.
8
+ #
9
+ # Required configuration:
10
+ # * api_key
11
+ #
12
+ # Defaults to 'relevance' sort, rather than scopus's default of date desc.
13
+ #
14
+ # Uses the Scopus SciVerse REST API. You need to be a Scopus customer
15
+ # to access. http://api.elsevier.com
16
+ # http://www.developers.elsevier.com/action/devprojects
17
+ #
18
+ # ToS: http://www.developers.elsevier.com/devcms/content-policies
19
+ # "Federated Search" use case.
20
+ # Also: http://www.developers.elsevier.com/cms/apiserviceagreement
21
+ #
22
+ # Note that ToS applying to you probably means you must restrict access
23
+ # to search functionality to authenticated affiliated users only.
24
+ #
25
+ # Register for an API key at "Register New Site" at http://developers.elsevier.com/action/devnewsite
26
+ # You will then need to get server IP addresses registered with Scopus too,
27
+ # apparently by emailing directly to dave.santucci at elsevier dot com.
28
+ #
29
+ # Scopus API Docs:
30
+ # * http://www.developers.elsevier.com/devcms/content-api-search-request
31
+ # * http://www.developers.elsevier.com/devcms/content/search-fields-overview
32
+ #
33
+ # Some more docs on response elements and query elements:
34
+ # * http://api.elsevier.com/content/search/#d0n14606
35
+ #
36
+ # Other API's in the suite not being used by this code at present:
37
+ # * http://www.developers.elsevier.com/devcms/content-api-retrieval-request
38
+ # * http://www.developers.elsevier.com/devcms/content-api-metadata-request
39
+ #
40
+ # Support: Integration@scopus.com
41
+ #
42
+ # TODO: Mention to Scopus: Only one author?
43
+ # Paging of 50 gets an error, but docs say I should be able to request 200. q
44
+ #
45
+ class ScopusEngine
46
+ include BentoSearch::SearchEngine
47
+
48
+ extend HTTPClientPatch::IncludeClient
49
+ include_http_client
50
+
51
+ def search_implementation(args)
52
+ results = Results.new
53
+
54
+ xml, response, exception = nil, nil, nil
55
+
56
+ url = scopus_url(args)
57
+
58
+ begin
59
+ response = http_client.get( url , nil,
60
+ # HTTP headers.
61
+ {"X-ELS-APIKey" => configuration.api_key,
62
+ "X-ELS-ResourceVersion" => "XOCS",
63
+ "Accept" => "application/atom+xml"}
64
+ )
65
+ xml = Nokogiri::XML(response.body)
66
+ rescue TimeoutError, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError => e
67
+ exception = e
68
+ end
69
+
70
+
71
+ # handle errors
72
+ if (response.nil? || xml.nil? || exception ||
73
+ (! HTTP::Status.successful? response.status) ||
74
+ xml.at_xpath("service-error")
75
+ )
76
+
77
+ # UGH. Scopus reports 0 hits as an error, not entirely distinguishable
78
+ # from an actual error. Oh well, we have to go with it.
79
+ if (
80
+ (response.status == 400) &&
81
+ xml &&
82
+ (error_xml = xml.at_xpath("./service-error/status")) &&
83
+ (node_text(error_xml.at_xpath("./statusCode")) == "INVALID_INPUT") &&
84
+ (node_text(error_xml.at_xpath("./statusText")) == "Result set was empty or Start value beyond result set")
85
+ )
86
+ # PROBABLY 0 hit count, although could be something else I'm afraid.
87
+ results.total_items = 0
88
+ return results
89
+ else
90
+ # real error
91
+ results.error ||= {}
92
+ results.error[:exception] = e
93
+ results.error[:status] = response.status if response
94
+ # keep from storing the entire possibly huge response as error
95
+ # but sometimes it's an error message.
96
+ results.error[:error_info] = xml.at_xpath("service_error") if xml
97
+ return results
98
+ end
99
+ end
100
+
101
+ results.total_items = (node_text xml.at_xpath("//opensearch:totalResults", xml_ns)).to_i
102
+
103
+ xml.xpath("//atom:entry", xml_ns).each do | entry |
104
+
105
+ results << (item = ResultItem.new)
106
+ if scopus_link = entry.at_xpath("atom:link[@ref='scopus']", xml_ns)
107
+ item.link = scopus_link["href"]
108
+ end
109
+ item.title = node_text entry.at_xpath("dc:title", xml_ns)
110
+ item.journal_title = node_text entry.at_xpath("prism:publicationName", xml_ns)
111
+ item.issn = node_text entry.at_xpath("prism:issn", xml_ns)
112
+ item.volume = node_text entry.at_xpath("prism:volume", xml_ns)
113
+ item.issue = node_text entry.at_xpath("prism:issueIdentifier", xml_ns)
114
+ item.doi = node_text entry.at_xpath("prism:doi", xml_ns)
115
+
116
+ # pages might be in startingPage/endingPage OR in pageRange
117
+ if (start = entry.at_xpath("prism:startingPage", xml_ns))
118
+ item.start_page = start.text.to_i
119
+ if ( epage = entry.at_xpath("prism:endingPage", xml_ns))
120
+ item.end_page = epage.text.to_i
121
+ end
122
+ elsif (range = entry.at_xpath("prism:pageRange", xml_ns))
123
+ (spage, epage) = *range.text().split("-")
124
+ item.start_page = spage
125
+ item.end_page = epage
126
+ end
127
+
128
+ # get the year out of the date
129
+ if date = entry.at_xpath("prism:coverDate", xml_ns)
130
+ date.text =~ /^(\d\d\d\d)/
131
+ item.year = $1.to_i if $1
132
+ end
133
+
134
+ # Authors might be in atom:authors seperated by |, or just
135
+ # a single one in dc:creator
136
+ if (authors = entry.at_xpath("atom:authors", xml_ns))
137
+ authors.text.split("|").each do |author|
138
+ item.authors << Author.new(:display => author.strip)
139
+ end
140
+ elsif (author = entry.at_xpath("dc:creator", xml_ns))
141
+ item.authors << Author.new(:display => author.text.strip)
142
+ end
143
+
144
+ # Format we're still trying to figure out how Scopus API
145
+ # delivers it. Here is at at least one way.
146
+ if (doctype = entry.at_xpath("atom:subtype", xml_ns))
147
+ item.format = doctype_to_format(doctype.text)
148
+ item.format_str = doctype_to_string(doctype.text)
149
+ end
150
+
151
+ end
152
+
153
+ return results
154
+ end
155
+
156
+ # The escaping rules are not entirely clear for the API. We know colons
157
+ # and parens are special chars. It's unclear how or if we can escape them,
158
+ # we'll just remove them.
159
+ def escape_query(query)
160
+ # backslash escape doesn't seem to work
161
+ #query.gsub(/([\\\(\)\:])/) do |match|
162
+ # "\\#{$1}"
163
+ #end
164
+ query.gsub(/([\\\(\)\:])/, ' ')
165
+ end
166
+
167
+
168
+ def self.required_configuration
169
+ ["api_key"]
170
+ end
171
+
172
+ def self.default_configuration
173
+ {
174
+ :base_url => "http://api.elsevier.com/",
175
+ :cluster => "SCOPUS"
176
+ }
177
+ end
178
+
179
+ # Max per-page is 200, as per http://www.developers.elsevier.com/devcms/content-apis, bottom of page.
180
+ def max_per_page
181
+ 200
182
+ end
183
+
184
+ def search_field_definitions
185
+ {
186
+ "AUTH" => {:semantic => :author},
187
+ "TITLE" => {:semantic => :title},
188
+ # controlled and author-assigned keywords
189
+ "KEY" => {:semantic => :subject},
190
+ "ISBN" => {:semantic => :isbn},
191
+ "ISSN" => {:semantic => :issn},
192
+ }
193
+ end
194
+
195
+ def sort_definitions
196
+ # scopus &sort= values, not yet URI-escaped, later code will do that.
197
+ #
198
+ # 'refeid' key is currently undocumented on Scopus site, but
199
+ # was given to me in email by scopus.
200
+ {
201
+ "title_asc" => {:implementation => "+itemtitle"},
202
+ "date_desc" => {:implementation => "-datesort,+auth"},
203
+ "relevance" => {:implementation => "refeid" },
204
+ "author_asc" => {:implementation => "+auth"},
205
+ "num_cite_desc" => {:implementation => "-numcitedby"}
206
+ }
207
+ end
208
+
209
+
210
+ protected
211
+
212
+ # returns nil if passed in nil, otherwise
213
+ # returns nokogiri text()
214
+ def node_text(node)
215
+ return nil if node.nil?
216
+
217
+ return node.text()
218
+ end
219
+
220
+ def xml_ns
221
+ {"opensearch" => "http://a9.com/-/spec/opensearch/1.1/",
222
+ "prism" => "http://prismstandard.org/namespaces/basic/2.0/",
223
+ "dc" => "http://purl.org/dc/elements/1.1/",
224
+ "atom" => "http://www.w3.org/2005/Atom"}
225
+ end
226
+
227
+ # Maps from Scopus "doctype" as listed at http://www.developers.elsevier.com/devcms/content/search-fields-overview
228
+ # and delivered in the XML response as atom:subtype.
229
+ # Maps to our own internal formats as documented in ResultItem#format
230
+ # Returns nil if can't map.
231
+ def doctype_to_format(doctype)
232
+ { "ar" => "Article",
233
+ "ip" => "Article",
234
+ "bk" => "Book",
235
+ "bz" => "Article",
236
+ "re" => "Article", # most of what scopus labels 'Report' seem to be ordinary articles.
237
+ "cp" => :conference_paper,
238
+ "re" => "Article", # really 'report', but Scopus is unreliable here, most of these are actually articles.
239
+ "sh" => "Article", # 'short survey' to scopus, but seems to be used for articles.
240
+ "ip" => "Article", # 'article in press'.
241
+ 'ed' => "Article", # Editorial
242
+ 'le' => "Article", # Letter
243
+ 'no' => "Article", # Note
244
+ }[doctype.to_s]
245
+ end
246
+
247
+ # Maps Scopus doctype to human readable strings as documented by Scopus,
248
+ # does not map 1-1 to our controlled format.
249
+ def doctype_to_string(doctype)
250
+ { "ar" => "Article",
251
+ "ab" => "Abstract Report",
252
+ "ip" => "Article in Press",
253
+ "bk" => "Book",
254
+ "bz" => "Business Article",
255
+ "cp" => "Conference Paper",
256
+ "cr" => "Conference Review",
257
+ "ed" => "Editorial",
258
+ "er" => "Erratum",
259
+ "le" => "Letter",
260
+ "no" => "Note",
261
+ "pr" => "Press Release",
262
+ "re" => "Article", # Really 'report', but Scopus is unreliable here, most of these are actually articles.
263
+ "sh" => "Article" # Really 'short survey' to Scopus, but seems to be used for, well, articles.
264
+ }[doctype.to_s]
265
+ end
266
+
267
+
268
+
269
+
270
+ def scopus_url(args)
271
+ query = escape_query args[:query]
272
+
273
+ if args[:search_field]
274
+ query = "#{args[:search_field]}(#{query})"
275
+ end
276
+
277
+ query = "#{configuration.base_url.chomp("/")}/content/search/index:#{configuration.cluster}?query=#{CGI.escape(query)}"
278
+
279
+ query += "&count=#{args[:per_page]}" if args[:per_page]
280
+
281
+ query += "&start=#{args[:start]}" if args[:start]
282
+
283
+ # default to 'relevance' sort if not given, rather than scopus's
284
+ # default of date desc.
285
+ args[:sort] ||= "relevance"
286
+ if (defn = self.sort_definitions[args[:sort]]) &&
287
+ ( value = defn[:implementation])
288
+ query += "&sort=#{CGI.escape(value)}"
289
+ end
290
+
291
+ return query
292
+ end
293
+
294
+ end
295
+ end