bento_search 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -48,7 +48,7 @@ module BentoSearch
48
48
  json = MultiJson.load( response.body )
49
49
  # Can't rescue everything, or we catch VCR errors, making
50
50
  # things confusing.
51
- rescue TimeoutError, HTTPClient::TimeoutError,
51
+ rescue BentoSearch::RubyTimeoutClass, HTTPClient::TimeoutError,
52
52
  HTTPClient::ConfigurationError, HTTPClient::BadResponseError => e
53
53
  results.error ||= {}
54
54
  results.error[:exception] = e
@@ -223,7 +223,7 @@ module BentoSearch
223
223
  def args_to_search_url(arguments)
224
224
  query = if arguments[:query].kind_of? Hash
225
225
  #multi-field
226
- arguments[:query].collect {|field, query| fielded_query(query, field)}.join(" ")
226
+ arguments[:query].collect {|field, query_value| fielded_query(query_value, field)}.join(" ")
227
227
  elsif arguments[:search_field]
228
228
  fielded_query(arguments[:query], arguments[:search_field])
229
229
  else
@@ -4,29 +4,29 @@ require 'nokogiri'
4
4
  require 'http_client_patch/include_client'
5
5
  require 'httpclient'
6
6
  module BentoSearch
7
- # Supports fielded searching, sorting, pagination.
8
- #
9
- # Required configuration:
7
+ # Supports fielded searching, sorting, pagination.
8
+ #
9
+ # Required configuration:
10
10
  # * api_key
11
- #
12
- # Defaults to 'relevance' sort, rather than scopus's default of date desc.
11
+ #
12
+ # Defaults to 'relevance' sort, rather than scopus's default of date desc.
13
13
  #
14
14
  # Uses the Scopus SciVerse REST API. You need to be a Scopus customer
15
15
  # to access. http://api.elsevier.com
16
16
  # http://www.developers.elsevier.com/action/devprojects
17
- #
17
+ #
18
18
  # ToS: http://www.developers.elsevier.com/devcms/content-policies
19
- # "Federated Search" use case.
19
+ # "Federated Search" use case.
20
20
  # Also: http://www.developers.elsevier.com/cms/apiserviceagreement
21
21
  #
22
22
  # Note that ToS applying to you probably means you must restrict access
23
- # to search functionality to authenticated affiliated users only.
23
+ # to search functionality to authenticated affiliated users only.
24
24
  #
25
25
  # Register for an API key at "Register New Site" at http://developers.elsevier.com/action/devnewsite
26
- # You will then need to get server IP addresses registered with Scopus too,
27
- # apparently by emailing directly to dave.santucci at elsevier dot com.
28
- #
29
- # Scopus API Docs:
26
+ # You will then need to get server IP addresses registered with Scopus too,
27
+ # apparently by emailing directly to dave.santucci at elsevier dot com.
28
+ #
29
+ # Scopus API Docs:
30
30
  # * http://api.elsevier.com/documentation/SCOPUSSearchAPI.wadl
31
31
  # * http://api.elsevier.com/documentation/search/SCOPUSSearchViews.htm
32
32
  #
@@ -34,9 +34,9 @@ module BentoSearch
34
34
  # * http://api.elsevier.com/documentation/search/SCOPUSSearchTips.htm
35
35
  #
36
36
  # Some more docs on response elements and query elements:
37
- # * http://api.elsevier.com/content/search/#d0n14606
38
- #
39
- # Other API's in the suite not being used by this code at present:
37
+ # * http://api.elsevier.com/content/search/#d0n14606
38
+ #
39
+ # Other API's in the suite not being used by this code at present:
40
40
  # * http://www.developers.elsevier.com/devcms/content-api-retrieval-request
41
41
  # * http://www.developers.elsevier.com/devcms/content-api-metadata-request
42
42
  #
@@ -44,52 +44,52 @@ module BentoSearch
44
44
  #
45
45
  # TODO: Mention to Scopus: Only one author?
46
46
  # Paging of 50 gets an error, but docs say I should be able to request 200. q
47
- #
47
+ #
48
48
  # Scopus response does not seem to include language of hit, even though
49
49
  # api allows you to restrict by language. ask scopus if we're missing something?
50
50
  class ScopusEngine
51
51
  include BentoSearch::SearchEngine
52
-
52
+
53
53
  extend HTTPClientPatch::IncludeClient
54
54
  include_http_client
55
-
56
- def search_implementation(args)
55
+
56
+ def search_implementation(args)
57
57
  results = Results.new
58
-
58
+
59
59
  xml, response, exception = nil, nil, nil
60
-
61
- url = scopus_url(args)
60
+
61
+ url = scopus_url(args)
62
62
 
63
63
  begin
64
64
  response = http_client.get( url , nil,
65
- # HTTP headers.
66
- {"X-ELS-APIKey" => configuration.api_key,
65
+ # HTTP headers.
66
+ {"X-ELS-APIKey" => configuration.api_key,
67
67
  "X-ELS-ResourceVersion" => "XOCS",
68
68
  "Accept" => "application/atom+xml"}
69
69
  )
70
70
 
71
71
  xml = Nokogiri::XML(response.body)
72
- rescue TimeoutError, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError => e
73
- exception = e
72
+ rescue BentoSearch::RubyTimeoutClass, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError => e
73
+ exception = e
74
74
  end
75
75
 
76
76
  # handle errors
77
- if (response.nil? || xml.nil? || exception ||
77
+ if (response.nil? || xml.nil? || exception ||
78
78
  (! HTTP::Status.successful? response.status) ||
79
79
  xml.at_xpath("service-error") ||
80
80
  xml.at_xpath("./atom:feed/atom:entry/atom:error", xml_ns)
81
81
  )
82
-
82
+
83
83
  # UGH. Scopus reports 0 hits as an error, not entirely distinguishable
84
- # from an actual error. Oh well, we have to go with it.
85
- if (
84
+ # from an actual error. Oh well, we have to go with it.
85
+ if (
86
86
  (response.status == 400) &&
87
87
  xml &&
88
88
  (error_xml = xml.at_xpath("./service-error/status")) &&
89
89
  (node_text(error_xml.at_xpath("./statusCode")) == "INVALID_INPUT") &&
90
90
  (node_text(error_xml.at_xpath("./statusText")).starts_with? "Result set was empty")
91
91
  )
92
- # PROBABLY 0 hit count, although could be something else I'm afraid.
92
+ # PROBABLY 0 hit count, although could be something else I'm afraid.
93
93
  results.total_items = 0
94
94
  return results
95
95
  elsif (
@@ -102,7 +102,7 @@ module BentoSearch
102
102
  results.total_items = 0
103
103
  return results
104
104
  else
105
- # real error
105
+ # real error
106
106
  results.error ||= {}
107
107
  results.error[:exception] = e
108
108
  results.error[:status] = response.status if response
@@ -110,27 +110,27 @@ module BentoSearch
110
110
  results.error[:error_info] ||= xml.at_xpath("./atom:feed/atom:entry/atom:error", xml_ns).text if xml
111
111
  return results
112
112
  end
113
- end
114
-
115
-
113
+ end
114
+
115
+
116
116
  results.total_items = (node_text xml.at_xpath("//opensearch:totalResults", xml_ns)).to_i
117
-
117
+
118
118
  xml.xpath("//atom:entry", xml_ns).each do | entry |
119
119
 
120
- results << (item = ResultItem.new)
120
+ results << (item = ResultItem.new)
121
121
  if scopus_link = entry.at_xpath("atom:link[@ref='scopus']", xml_ns)
122
122
  item.link = scopus_link["href"]
123
123
  end
124
-
124
+
125
125
  item.unique_id = node_text entry.at_xpath("dc:identifier", xml_ns)
126
-
126
+
127
127
  item.title = node_text entry.at_xpath("dc:title", xml_ns)
128
128
  item.journal_title = node_text entry.at_xpath("prism:publicationName", xml_ns)
129
129
  item.issn = node_text entry.at_xpath("prism:issn", xml_ns)
130
130
  item.volume = node_text entry.at_xpath("prism:volume", xml_ns)
131
131
  item.issue = node_text entry.at_xpath("prism:issueIdentifier", xml_ns)
132
132
  item.doi = node_text entry.at_xpath("prism:doi", xml_ns)
133
-
133
+
134
134
  # pages might be in startingPage/endingPage OR in pageRange
135
135
  if (start = entry.at_xpath("prism:startingPage", xml_ns))
136
136
  item.start_page = start.text.to_i
@@ -142,13 +142,13 @@ module BentoSearch
142
142
  item.start_page = spage
143
143
  item.end_page = epage
144
144
  end
145
-
145
+
146
146
  # get the year out of the date
147
147
  if date = entry.at_xpath("prism:coverDate", xml_ns)
148
148
  date.text =~ /^(\d\d\d\d)/
149
149
  item.year = $1.to_i if $1
150
150
  end
151
-
151
+
152
152
  # Authors might be in atom:authors seperated by |, or just
153
153
  # a single one in dc:creator
154
154
  if (authors = entry.at_xpath("atom:authors", xml_ns))
@@ -158,47 +158,47 @@ module BentoSearch
158
158
  elsif (author = entry.at_xpath("dc:creator", xml_ns))
159
159
  item.authors << Author.new(:display => author.text.strip)
160
160
  end
161
-
161
+
162
162
  # Format we're still trying to figure out how Scopus API
163
163
  # delivers it. Here is at at least one way.
164
164
  if (doctype = entry.at_xpath("atom:subtype", xml_ns))
165
165
  item.format = doctype_to_format(doctype.text)
166
- item.format_str = doctype_to_string(doctype.text)
166
+ item.format_str = doctype_to_string(doctype.text)
167
167
  end
168
-
168
+
169
169
  end
170
-
170
+
171
171
  return results
172
172
  end
173
-
173
+
174
174
  # The escaping rules are not entirely clear for the API. We know colons
175
175
  # and parens are special chars. It's unclear how or if we can escape them,
176
- # we'll just remove them.
176
+ # we'll just remove them.
177
177
  def escape_query(query)
178
178
  # backslash escape doesn't seem to work
179
179
  #query.gsub(/([\\\(\)\:])/) do |match|
180
180
  # "\\#{$1}"
181
181
  #end
182
- query.gsub(/([\\\(\)\:])/, ' ')
182
+ query.gsub(/([\\\(\)\:])/, ' ')
183
183
  end
184
-
185
-
184
+
185
+
186
186
  def self.required_configuration
187
187
  ["api_key"]
188
188
  end
189
-
189
+
190
190
  def self.default_configuration
191
- {
191
+ {
192
192
  :base_url => "http://api.elsevier.com/",
193
193
  :cluster => "SCOPUS"
194
194
  }
195
195
  end
196
-
197
- # Max per-page is 200, as per http://www.developers.elsevier.com/devcms/content-apis, bottom of page.
196
+
197
+ # Max per-page is 200, as per http://www.developers.elsevier.com/devcms/content-apis, bottom of page.
198
198
  def max_per_page
199
199
  200
200
200
  end
201
-
201
+
202
202
  def search_field_definitions
203
203
  {
204
204
  nil => {:semantic => :general},
@@ -217,17 +217,17 @@ module BentoSearch
217
217
  "PUBYEAR" => {:semantic => :year}
218
218
  }
219
219
  end
220
-
220
+
221
221
  def sort_definitions
222
- # scopus &sort= values, not yet URI-escaped, later code will do that.
222
+ # scopus &sort= values, not yet URI-escaped, later code will do that.
223
223
  #
224
224
  # 'refeid' key is currently undocumented on Scopus site, but
225
- # was given to me in email by scopus.
225
+ # was given to me in email by scopus.
226
226
  {
227
227
  "title_asc" => {:implementation => "+itemtitle"},
228
228
  "date_desc" => {:implementation => "-datesort,+auth"},
229
- "relevance" => {:implementation => "refeid" },
230
- "author_asc" => {:implementation => "+auth"},
229
+ "relevance" => {:implementation => "refeid" },
230
+ "author_asc" => {:implementation => "+auth"},
231
231
  "num_cite_desc" => {:implementation => "-numcitedby"}
232
232
  }
233
233
  end
@@ -235,44 +235,44 @@ module BentoSearch
235
235
  def multi_field_search?
236
236
  true
237
237
  end
238
-
238
+
239
239
  protected
240
-
240
+
241
241
  # returns nil if passed in nil, otherwise
242
242
  # returns nokogiri text()
243
243
  def node_text(node)
244
244
  return nil if node.nil?
245
-
245
+
246
246
  return node.text()
247
247
  end
248
-
248
+
249
249
  def xml_ns
250
250
  {"opensearch" => "http://a9.com/-/spec/opensearch/1.1/",
251
251
  "prism" => "http://prismstandard.org/namespaces/basic/2.0/",
252
252
  "dc" => "http://purl.org/dc/elements/1.1/",
253
253
  "atom" => "http://www.w3.org/2005/Atom"}
254
- end
255
-
254
+ end
255
+
256
256
  # Maps from Scopus "doctype" as listed at http://www.developers.elsevier.com/devcms/content/search-fields-overview
257
- # and delivered in the XML response as atom:subtype.
257
+ # and delivered in the XML response as atom:subtype.
258
258
  # Maps to our own internal formats as documented in ResultItem#format
259
- # Returns nil if can't map.
259
+ # Returns nil if can't map.
260
260
  def doctype_to_format(doctype)
261
261
  { "ar" => "Article",
262
262
  "bk" => "Book",
263
263
  "bz" => "Article",
264
264
  "re" => "Article", # most of what scopus labels 'Report' seem to be ordinary articles.
265
265
  "cp" => :conference_paper,
266
- "sh" => "Article", # 'short survey' to scopus, but seems to be used for articles.
267
- "ip" => "Article", # 'article in press'.
266
+ "sh" => "Article", # 'short survey' to scopus, but seems to be used for articles.
267
+ "ip" => "Article", # 'article in press'.
268
268
  'ed' => "Article", # Editorial
269
269
  'le' => "Article", # Letter
270
270
  'no' => "Article", # Note
271
271
  }[doctype.to_s]
272
272
  end
273
-
273
+
274
274
  # Maps Scopus doctype to human readable strings as documented by Scopus,
275
- # does not map 1-1 to our controlled format.
275
+ # does not map 1-1 to our controlled format.
276
276
  def doctype_to_string(doctype)
277
277
  { "ar" => "Article",
278
278
  "ab" => "Abstract Report",
@@ -286,14 +286,14 @@ module BentoSearch
286
286
  "le" => "Letter",
287
287
  "no" => "Note",
288
288
  "pr" => "Press Release",
289
- "re" => "Article", # Really 'report', but Scopus is unreliable here, most of these are actually articles.
290
- "sh" => "Article" # Really 'short survey' to Scopus, but seems to be used for, well, articles.
289
+ "re" => "Article", # Really 'report', but Scopus is unreliable here, most of these are actually articles.
290
+ "sh" => "Article" # Really 'short survey' to Scopus, but seems to be used for, well, articles.
291
291
  }[doctype.to_s]
292
292
  end
293
-
294
-
295
-
296
-
293
+
294
+
295
+
296
+
297
297
  def scopus_url(args)
298
298
  query = if args[:query].kind_of? Hash
299
299
  args[:query].collect {|field, query| fielded_query(query,field)}.join(" AND ")
@@ -302,27 +302,27 @@ module BentoSearch
302
302
  else
303
303
  escape_query args[:query]
304
304
  end
305
-
305
+
306
306
  query = "#{configuration.base_url.chomp("/")}/content/search/index:#{configuration.cluster}?query=#{CGI.escape(query)}"
307
-
307
+
308
308
  query += "&count=#{args[:per_page]}" if args[:per_page]
309
-
309
+
310
310
  query += "&start=#{args[:start]}" if args[:start]
311
-
311
+
312
312
  # default to 'relevance' sort if not given, rather than scopus's
313
- # default of date desc.
313
+ # default of date desc.
314
314
  args[:sort] ||= "relevance"
315
315
  if (defn = self.sort_definitions[args[:sort]]) &&
316
316
  ( value = defn[:implementation])
317
317
  query += "&sort=#{CGI.escape(value)}"
318
- end
319
-
318
+ end
319
+
320
320
  return query
321
321
  end
322
322
 
323
323
  def fielded_query(query, field)
324
324
  "#{field}(#{escape_query query})"
325
325
  end
326
-
326
+
327
327
  end
328
328
  end
@@ -132,7 +132,7 @@ class BentoSearch::SummonEngine
132
132
  begin
133
133
  response = http_client.get(uri, nil, headers)
134
134
  hash = MultiJson.load( response.body )
135
- rescue TimeoutError, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, MultiJson::DecodeError, Nokogiri::SyntaxError => e
135
+ rescue BentoSearch::RubyTimeoutClass, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, MultiJson::DecodeError, Nokogiri::SyntaxError => e
136
136
  exception = e
137
137
  end
138
138
  # handle some errors
data/lib/bento_search.rb CHANGED
@@ -2,7 +2,7 @@ require 'confstruct'
2
2
 
3
3
  module BentoSearch
4
4
  class Error < ::StandardError ; end
5
- end
5
+ end
6
6
 
7
7
  require "bento_search/engine"
8
8
  require 'bento_search/routes'
@@ -14,36 +14,39 @@ require File.dirname(__FILE__) + '/../app/models/bento_search/registrar'
14
14
 
15
15
  # Crazy workaround to the fact that some versions of Hashie::Mash,
16
16
  # when used with SafeAssignment as Confstruct does, don't let
17
- # you use :id as a key.
17
+ # you use :id as a key.
18
18
  # https://github.com/intridea/hashie/issues/290
19
19
  # We fix by removing the unused method with vary hacky meta programming
20
- # sorry.
20
+ # sorry.
21
21
  require 'hashie/mash'
22
22
  if Hashie::Mash.instance_methods(false).include?(:id)
23
23
  Hashie::Mash.send(:remove_method, :id)
24
24
  end
25
25
 
26
26
 
27
- module BentoSearch
27
+ module BentoSearch
28
28
  def self.global_registrar
29
29
  @@global_registrar ||= BentoSearch::Registrar.new
30
30
  end
31
-
31
+
32
32
  # See BentoSearch::Registrar#register_engine, this is a
33
- # default global registrar.
33
+ # default global registrar.
34
34
  def self.register_engine(id, data = nil, &block)
35
- global_registrar.register_engine(id, data, &block)
35
+ global_registrar.register_engine(id, data, &block)
36
36
  end
37
-
37
+
38
38
  def self.get_engine(id)
39
39
  global_registrar.get_engine(id)
40
40
  end
41
-
41
+
42
42
  # Mostly just used for testing
43
43
  def self.reset_engine_registrations!
44
44
  global_registrar.reset_engine_registrations!
45
45
  end
46
46
 
47
+ # Avoid deprecation warnings in ruby 2.3.0
48
+ RubyTimeoutClass = (defined?(Timeout::Error) ? Timeout::Error : TimeoutError)
49
+
47
50
  end
48
51
 
49
52