bento_search 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. data/MIT-LICENSE +20 -0
  2. data/README.md +299 -0
  3. data/Rakefile +40 -0
  4. data/app/assets/images/bento_search/large_loader.gif +0 -0
  5. data/app/assets/javascripts/bento_search.js +3 -0
  6. data/app/assets/javascripts/bento_search/ajax_load.js +22 -0
  7. data/app/assets/stylesheets/bento_search/bento.css +4 -0
  8. data/app/controllers/bento_search/bento_search_controller.rb +7 -0
  9. data/app/controllers/bento_search/search_controller.rb +72 -0
  10. data/app/helpers/bento_search_helper.rb +138 -0
  11. data/app/item_decorators/bento_search/only_premade_openurl.rb +16 -0
  12. data/app/item_decorators/bento_search/openurl_add_other_link.rb +35 -0
  13. data/app/item_decorators/bento_search/openurl_main_link.rb +30 -0
  14. data/app/models/bento_search/author.rb +25 -0
  15. data/app/models/bento_search/link.rb +30 -0
  16. data/app/models/bento_search/multi_searcher.rb +109 -0
  17. data/app/models/bento_search/openurl_creator.rb +128 -0
  18. data/app/models/bento_search/registrar.rb +70 -0
  19. data/app/models/bento_search/result_item.rb +203 -0
  20. data/app/models/bento_search/results.rb +54 -0
  21. data/app/models/bento_search/results/pagination.rb +67 -0
  22. data/app/models/bento_search/search_engine.rb +219 -0
  23. data/app/models/bento_search/search_engine/capabilities.rb +65 -0
  24. data/app/search_engines/bento_search/#Untitled-1# +11 -0
  25. data/app/search_engines/bento_search/ebsco_host_engine.rb +356 -0
  26. data/app/search_engines/bento_search/eds_engine.rb +557 -0
  27. data/app/search_engines/bento_search/google_books_engine.rb +184 -0
  28. data/app/search_engines/bento_search/primo_engine.rb +231 -0
  29. data/app/search_engines/bento_search/scopus_engine.rb +295 -0
  30. data/app/search_engines/bento_search/summon_engine.rb +398 -0
  31. data/app/search_engines/bento_search/xerxes_engine.rb +168 -0
  32. data/app/views/bento_search/_link.html.erb +4 -0
  33. data/app/views/bento_search/_search_error.html.erb +22 -0
  34. data/app/views/bento_search/_std_item.html.erb +39 -0
  35. data/app/views/bento_search/search/search.html.erb +1 -0
  36. data/config/locales/en.yml +25 -0
  37. data/lib/bento_search.rb +29 -0
  38. data/lib/bento_search/engine.rb +5 -0
  39. data/lib/bento_search/routes.rb +45 -0
  40. data/lib/bento_search/version.rb +3 -0
  41. data/lib/generators/bento_search/pull_ebsco_dbs_generator.rb +24 -0
  42. data/lib/generators/bento_search/templates/ebsco_global_var.erb +6 -0
  43. data/lib/http_client_patch/include_client.rb +86 -0
  44. data/lib/tasks/bento_search_tasks.rake +4 -0
  45. data/test/dummy/README.rdoc +261 -0
  46. data/test/dummy/Rakefile +7 -0
  47. data/test/dummy/app/assets/javascripts/application.js +15 -0
  48. data/test/dummy/app/assets/stylesheets/application.css +13 -0
  49. data/test/dummy/app/controllers/application_controller.rb +3 -0
  50. data/test/dummy/app/helpers/application_helper.rb +2 -0
  51. data/test/dummy/app/views/layouts/application.html.erb +14 -0
  52. data/test/dummy/config.ru +4 -0
  53. data/test/dummy/config/application.rb +56 -0
  54. data/test/dummy/config/boot.rb +10 -0
  55. data/test/dummy/config/database.yml +25 -0
  56. data/test/dummy/config/environment.rb +5 -0
  57. data/test/dummy/config/environments/development.rb +37 -0
  58. data/test/dummy/config/environments/production.rb +67 -0
  59. data/test/dummy/config/environments/test.rb +37 -0
  60. data/test/dummy/config/initializers/backtrace_silencers.rb +7 -0
  61. data/test/dummy/config/initializers/inflections.rb +15 -0
  62. data/test/dummy/config/initializers/mime_types.rb +5 -0
  63. data/test/dummy/config/initializers/secret_token.rb +7 -0
  64. data/test/dummy/config/initializers/session_store.rb +8 -0
  65. data/test/dummy/config/initializers/wrap_parameters.rb +14 -0
  66. data/test/dummy/config/locales/en.yml +5 -0
  67. data/test/dummy/config/routes.rb +6 -0
  68. data/test/dummy/db/test.sqlite3 +0 -0
  69. data/test/dummy/log/test.log +3100 -0
  70. data/test/dummy/public/404.html +26 -0
  71. data/test/dummy/public/422.html +26 -0
  72. data/test/dummy/public/500.html +25 -0
  73. data/test/dummy/public/favicon.ico +0 -0
  74. data/test/dummy/script/rails +6 -0
  75. data/test/functional/bento_search/search_controller_test.rb +81 -0
  76. data/test/helper/bento_search_helper_test.rb +125 -0
  77. data/test/integration/navigation_test.rb +10 -0
  78. data/test/support/mock_engine.rb +23 -0
  79. data/test/support/test_with_cassette.rb +38 -0
  80. data/test/test_helper.rb +52 -0
  81. data/test/unit/#vcr_test.rb# +68 -0
  82. data/test/unit/ebsco_host_engine_test.rb +134 -0
  83. data/test/unit/eds_engine_test.rb +105 -0
  84. data/test/unit/google_books_engine_test.rb +93 -0
  85. data/test/unit/item_decorators_test.rb +66 -0
  86. data/test/unit/multi_searcher_test.rb +49 -0
  87. data/test/unit/openurl_creator_test.rb +111 -0
  88. data/test/unit/pagination_test.rb +59 -0
  89. data/test/unit/primo_engine_test.rb +37 -0
  90. data/test/unit/register_engine_test.rb +50 -0
  91. data/test/unit/result_item_display_test.rb +39 -0
  92. data/test/unit/result_item_test.rb +36 -0
  93. data/test/unit/scopus_engine_test.rb +130 -0
  94. data/test/unit/search_engine_base_test.rb +178 -0
  95. data/test/unit/search_engine_test.rb +95 -0
  96. data/test/unit/summon_engine_test.rb +161 -0
  97. data/test/unit/xerxes_engine_test.rb +70 -0
  98. data/test/vcr_cassettes/ebscohost/error_bad_db.yml +45 -0
  99. data/test/vcr_cassettes/ebscohost/error_bad_password.yml +45 -0
  100. data/test/vcr_cassettes/ebscohost/get_info.yml +3626 -0
  101. data/test/vcr_cassettes/ebscohost/live_search.yml +45 -0
  102. data/test/vcr_cassettes/ebscohost/live_search_smoke_test.yml +1311 -0
  103. data/test/vcr_cassettes/eds/basic_search_smoke_test.yml +1811 -0
  104. data/test/vcr_cassettes/eds/get_auth_token.yml +75 -0
  105. data/test/vcr_cassettes/eds/get_auth_token_failure.yml +39 -0
  106. data/test/vcr_cassettes/eds/get_with_auth.yml +243 -0
  107. data/test/vcr_cassettes/eds/get_with_auth_recovers_from_bad_auth.yml +368 -0
  108. data/test/vcr_cassettes/gbs/error_condition.yml +40 -0
  109. data/test/vcr_cassettes/gbs/pagination.yml +702 -0
  110. data/test/vcr_cassettes/gbs/search.yml +340 -0
  111. data/test/vcr_cassettes/primo/search_smoke_test.yml +1112 -0
  112. data/test/vcr_cassettes/scopus/bad_api_key_should_return_error_response.yml +60 -0
  113. data/test/vcr_cassettes/scopus/escaped_chars.yml +187 -0
  114. data/test/vcr_cassettes/scopus/fielded_search.yml +176 -0
  115. data/test/vcr_cassettes/scopus/simple_search.yml +227 -0
  116. data/test/vcr_cassettes/scopus/zero_results_search.yml +67 -0
  117. data/test/vcr_cassettes/summon/bad_auth.yml +54 -0
  118. data/test/vcr_cassettes/summon/proper_tags_for_snippets.yml +216 -0
  119. data/test/vcr_cassettes/summon/search.yml +242 -0
  120. data/test/vcr_cassettes/xerxes/live_search.yml +2580 -0
  121. data/test/view/std_item_test.rb +98 -0
  122. metadata +421 -0
@@ -0,0 +1,184 @@
1
+ require 'httpclient'
2
+ require 'cgi'
3
+ require 'multi_json'
4
+
5
+ # not sure why we need to require the entire 'helpers'
6
+ # when all we want is sanitize_helper, but I think we do:
7
+ require 'action_view/helpers'
8
+ #require 'action_view/helpers/sanitize_helper'
9
+
10
+ require 'http_client_patch/include_client'
11
+
12
+ module BentoSearch
13
+ #
14
+ # https://developers.google.com/books/docs/v1/using
15
+ # https://developers.google.com/books/docs/v1/reference/volumes#resource
16
+ #
17
+ # Configuration :api_key STRONGLY recommended, or google will severely
18
+ # rate-limit you.
19
+ class GoogleBooksEngine
20
+ include BentoSearch::SearchEngine
21
+ include ActionView::Helpers::SanitizeHelper
22
+
23
+ extend HTTPClientPatch::IncludeClient
24
+ include_http_client # gives us a #http_client with persistent class-level
25
+
26
+ class_attribute :base_url
27
+ self.base_url = "https://www.googleapis.com/books/v1/"
28
+
29
+
30
+ def search_implementation(arguments)
31
+ query_url = args_to_search_url(arguments)
32
+
33
+ results = Results.new
34
+
35
+ begin
36
+ response = http_client.get(query_url )
37
+ json = MultiJson.load( response.body )
38
+ # Can't rescue everything, or we catch VCR errors, making
39
+ # things confusing.
40
+ rescue TimeoutError, HTTPClient::TimeoutError,
41
+ HTTPClient::ConfigurationError, HTTPClient::BadResponseError => e
42
+ results.error ||= {}
43
+ results.error[:exception] = e
44
+ end
45
+
46
+ # Trap json parse error, but also check for bad http
47
+ # status, or error reported in the json. In any of those cases
48
+ # return results obj with error status.
49
+ #
50
+ if ( response.nil? || json.nil? ||
51
+ (! HTTP::Status.successful? response.status) ||
52
+ (json && json["error"]))
53
+
54
+ results.error ||= {}
55
+ results.error[:status] = response.status if response
56
+ if json && json["error"] && json["error"]["errors"] && json["error"]["errors"].kind_of?(Array)
57
+ results.error[:message] = json["error"]["errors"].first.values.join(", ")
58
+ end
59
+ results.error[:error_info] = json["error"] if json && json.respond_to?("[]")
60
+
61
+ # escape early!
62
+ return results
63
+ end
64
+
65
+
66
+ results.total_items = json["totalItems"]
67
+
68
+
69
+ json["items"].each do |j_item|
70
+ j_item = j_item["volumeInfo"] if j_item["volumeInfo"]
71
+
72
+ item = ResultItem.new
73
+ results << item
74
+
75
+ item.title = j_item["title"]
76
+ item.subtitle = j_item["subtitle"]
77
+ item.publisher = j_item["publisher"]
78
+ item.link = j_item["canonicalVolumeLink"]
79
+ item.abstract = sanitize j_item["description"]
80
+ item.year = get_year j_item["publishedDate"]
81
+ item.format = if j_item["printType"] == "MAGAZINE"
82
+ :serial
83
+ else
84
+ "Book"
85
+ end
86
+
87
+ (j_item["authors"] || []).each do |author_name|
88
+ item.authors << Author.new(:display => author_name)
89
+ end
90
+ end
91
+
92
+
93
+ return results
94
+ end
95
+
96
+
97
+
98
+
99
+ ###########
100
+ # BentoBox::SearchEngine API
101
+ ###########
102
+
103
+ def max_per_page
104
+ 100
105
+ end
106
+
107
+ def search_field_definitions
108
+ { "intitle" => {:semantic => :title},
109
+ "inauthor" => {:semantic => :author},
110
+ "inpublisher" => {:semantic => :publisher},
111
+ "subject" => {:semantic => :subject},
112
+ "isbn" => {:semantic => :isbn}
113
+ }
114
+ end
115
+
116
+ def sort_definitions
117
+ {
118
+ "relevance" => {:implementation => nil}, # default
119
+ "date_desc" => {:implementation => "newest"}
120
+ }
121
+ end
122
+
123
+ protected
124
+
125
+
126
+ #############
127
+ # Our own implementation code
128
+ ##############
129
+
130
+
131
+ # takes a normalized #search arguments hash from SearchEngine
132
+ # turns it into a URL for Google API. Factored out to make testing
133
+ # possible.
134
+ def args_to_search_url(arguments)
135
+ query = if arguments[:search_field]
136
+ fielded_query(arguments[:query], arguments[:search_field])
137
+ else
138
+ arguments[:query]
139
+ end
140
+
141
+ query_url = base_url + "volumes?q=#{CGI.escape query}"
142
+ if configuration.api_key
143
+ query_url += "&key=#{configuration.api_key}"
144
+ end
145
+
146
+ if arguments[:per_page]
147
+ query_url += "&maxResults=#{arguments[:per_page]}"
148
+ end
149
+ if arguments[:start]
150
+ query_url += "&startIndex=#{arguments[:start]}"
151
+ end
152
+
153
+ if arguments[:sort] &&
154
+ (defn = sort_definitions[arguments[:sort]]) &&
155
+ (value = defn[:implementation])
156
+ query_url += "&sort=#{CGI.escape(value)}"
157
+ end
158
+
159
+
160
+ return query_url
161
+ end
162
+
163
+
164
+ # If they ask for a <one two> :intitle, we're
165
+ # actually gonna do like google's own form does,
166
+ # and change it to <intitle:one intitle:two>. Internal
167
+ # phrases will be respected.
168
+ def fielded_query(query, field)
169
+ tokens = query.split(%r{\s|("[^"]+")}).delete_if {|a| a.blank?}
170
+ return tokens.collect {|token| "#{field}:#{token}"}.join(" ")
171
+ end
172
+
173
+
174
+ def get_year(iso8601)
175
+ return nil if iso8601.blank?
176
+
177
+ if iso8601 =~ /^(\d{4})/
178
+ return $1.to_i
179
+ end
180
+ return nil
181
+ end
182
+
183
+ end
184
+ end
@@ -0,0 +1,231 @@
1
+ require 'cgi'
2
+ require 'nokogiri'
3
+
4
+ require 'http_client_patch/include_client'
5
+ require 'httpclient'
6
+
7
+ # ExLibris Primo Central.
8
+ #
9
+ # written/tested with PrimoCentral aggregated index only, but probably
10
+ # should work with any Primo, may need some assumption tweaks.
11
+ #
12
+ # == Required Configuration
13
+ #
14
+ # [:host_port] your unique Primo's host/port combo, like "something.exlibrisgroup.com:1701".
15
+ # it's assumed we can talk to your primo at
16
+ # http://$host_port/PrimoWebServices/xservice/search/brief?
17
+ # [:institution] Primo requires an institution paramter.
18
+ # right now we have a hard-coded assumed 'institution' in
19
+ # config. Eg. "GWCC"
20
+ #
21
+ #
22
+ # == Other Primo-Specific Configuration
23
+ #
24
+ # [:loc] The primo 'loc' paramter, default "adaptor,primo_central_multiple_fe"
25
+ # for Primo Central Index searches.
26
+ # [:auth] Set to 'true' to assume local auth'd users if you're going to protect
27
+ # access. Default false. Alternately, you can pass in an
28
+ # :auth => true/false to 'search', which will override config.
29
+ # PC has limited access for non-auth users.
30
+ # [:lang] Primo lang query param. "Hints input languages to search engine for language recognition. "
31
+ # For now hardcoded into config, not settable per request.default 'eng'
32
+ # [:fixed_params] Extra url query params to add on to every search request.
33
+ # Can be used to hard-code certain limits, such as:
34
+ # {"query_exc" => ["facet_rtype,exact,books", "something_else"]}
35
+ # Note neither key nor values are uri encoded, we'll take
36
+ # care of that for you. value can be array or single string.
37
+ #
38
+ # == Vendor docs
39
+ #
40
+ # http://www.exlibrisgroup.org/display/PrimoOI/Brief+Search
41
+
42
+ class BentoSearch::PrimoEngine
43
+ include BentoSearch::SearchEngine
44
+
45
+ extend HTTPClientPatch::IncludeClient
46
+ include_http_client
47
+
48
+ def search_implementation(args)
49
+ url = construct_query(args)
50
+
51
+ response = http_client.get(url)
52
+ response_xml = Nokogiri::XML response.body
53
+ # namespaces really do nobody any good
54
+ response_xml.remove_namespaces!
55
+
56
+ results = BentoSearch::Results.new
57
+
58
+ results.total_items = response_xml.at_xpath("./SEGMENTS/JAGROOT/RESULT/DOCSET")["TOTALHITS"].to_i
59
+
60
+ response_xml.xpath("./SEGMENTS/JAGROOT/RESULT/DOCSET/DOC").each do |doc_xml|
61
+ item = BentoSearch::ResultItem.new
62
+ # Data in primo response is confusing in many different places in
63
+ # variant formats. We try to pick out the best to take things from,
64
+ # but we're guessing, it's under-documented.
65
+
66
+ item.title = text_at_xpath(doc_xml, "./PrimoNMBib/record/display/title")
67
+ item.abstract = text_at_xpath(doc_xml, "./PrimoNMBib/record/addata/abstract")
68
+
69
+
70
+ doc_xml.xpath("./PrimoNMBib/record/facets/creatorcontrib").each do |author_node|
71
+ item.authors << BentoSearch::Author.new(:display => author_node.text)
72
+ end
73
+
74
+
75
+ item.journal_title = text_at_xpath(doc_xml, "./PrimoNMBib/record/addata/jtitle")
76
+ # check btitle for book chapters, the book they are in.
77
+ if item.journal_title.blank? && doc_xml.at_xpath("./PrimoNMBib/record/display/ispartof")
78
+ item.journal_title = text_at_xpath(doc_xml, "./PrimoNMBib/record/addata/btitle")
79
+ end
80
+
81
+ item.publisher = text_at_xpath doc_xml, "./PrimoNMBib/record/display/publisher"
82
+ item.volume = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/volume"
83
+ item.issue = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/issue"
84
+ item.start_page = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/spage"
85
+ item.end_page = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/epage"
86
+ item.doi = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/doi"
87
+ item.issn = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/issn"
88
+ item.isbn = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/isbn"
89
+
90
+ if (date = text_at_xpath doc_xml, "./PrimoNMBib/record/search/creationdate")
91
+ item.year = date[0,4] # first four chars
92
+ end
93
+
94
+ if fmt_str = text_at_xpath(doc_xml, "./PrimoNMBib/record/search/rsrctype")
95
+ # 'article', 'book_chapter'. abuse rails to turn into nice titlelized english.
96
+ item.format_str = fmt_str.titleize
97
+
98
+ item.format = map_format fmt_str
99
+ end
100
+
101
+
102
+
103
+ #TODO formats, highlighting
104
+
105
+ results << item
106
+ end
107
+
108
+
109
+ return results
110
+ end
111
+
112
+ # Try to map from primocentral's 'rsrctype' to our own internal
113
+ # taxonomy of formats
114
+ #
115
+ # Need docs on what the complete Primo vocabulary here is, we're
116
+ # just guessing from what we see.
117
+ def map_format(str)
118
+ case str
119
+ when "article", "newspaper_article", "review"
120
+ then "Article"
121
+ when "book" then "Book"
122
+ when "dissertation" then :dissertation
123
+ end
124
+ end
125
+
126
+ # Returns the text() at the xpath, if the xpath is non-nil
127
+ # and the text is non-blank
128
+ def text_at_xpath(xml, xpath)
129
+ node = xml.at_xpath(xpath)
130
+ return nil if node.nil?
131
+ text = node.text
132
+ return nil if node.blank?
133
+ return text
134
+ end
135
+
136
+
137
+
138
+ # From config or args, args over-ride config
139
+ def authenticated_end_user?(args)
140
+ config = configuration.auth ? true : false
141
+ arg = args[:auth]
142
+ if ! arg.nil?
143
+ arg ? true : false
144
+ elsif ! config.nil?
145
+ config ? true : false
146
+ else
147
+ false
148
+ end
149
+ end
150
+
151
+ # Docs say we need to replace any commas with spaces
152
+ def prepared_query(str)
153
+ str.gsub(/\,/, ' ')
154
+ end
155
+
156
+
157
+ def construct_query(args)
158
+ url = "http://#{configuration.host_port}/PrimoWebServices/xservice/search/brief"
159
+ url += "?institution=#{configuration.institution}"
160
+ url += "&loc=#{CGI.escape configuration.loc}"
161
+
162
+ url += "&lang=#{CGI.escape configuration.lang}"
163
+
164
+ url += "&bulkSize=#{args[:per_page]}" if args[:per_page]
165
+ # primo indx is 1-based record index, our :start is 0-based.
166
+ url += "&indx=#{args[:start] + 1}" if args[:start]
167
+
168
+
169
+
170
+ if (defn = self.sort_definitions[ args[:sort] ]) &&
171
+ (value = defn[:implementation])
172
+
173
+ url += "&sortField=#{CGI.escape value}"
174
+ end
175
+
176
+
177
+ url += "&onCampus=#{ authenticated_end_user?(args) ? 'true' : 'false'}"
178
+
179
+
180
+ field = args[:search_field].present? ? args[:search_field] : "any"
181
+ query = "#{field},contains,#{prepared_query args[:query]}"
182
+
183
+ url += "&query=#{CGI.escape query}"
184
+
185
+ configuration.fixed_params.each_pair do |key, value|
186
+ [value].flatten.each do |v|
187
+ url += "&#{CGI.escape key.to_s}=#{CGI.escape v.to_s}"
188
+ end
189
+ end
190
+
191
+
192
+ return url
193
+ end
194
+
195
+
196
+ def search_field_definitions
197
+ # others are avail too, this is not exhaustive.
198
+ {
199
+ "creator" => {:semantic => :author},
200
+ "title" => {:semantic => :title},
201
+ "sub" => {:semantic => :subject},
202
+ "isbn" => {:semantic => :isbn},
203
+ "issn" => {:semantic => :issn}
204
+ }
205
+ end
206
+
207
+ def sort_definitions
208
+ {
209
+ "title_asc" => {:implementation => "stitle"},
210
+ "date_desc" => {:implementation => "scdate"},
211
+ "author_asc" => {:implementation => "screator"},
212
+ # As far as I can tell, what they call 'popularity'
213
+ # is really relevance, with popularity boosting.
214
+ "relevance" => {:implementation => "popularity"}
215
+ }
216
+ end
217
+
218
+ def self.required_configuration
219
+ [:host_port, :institution]
220
+ end
221
+
222
+ def self.default_configuration
223
+ {
224
+ :loc => 'adaptor,primo_central_multiple_fe',
225
+ # "eng" or "fre" or "ger" (Code for the representation of name of language conform to ISO-639)
226
+ :lang => "eng",
227
+ :fixed_params => {}
228
+ }
229
+ end
230
+
231
+ end
@@ -0,0 +1,295 @@
1
+ require 'cgi'
2
+ require 'nokogiri'
3
+
4
+ require 'http_client_patch/include_client'
5
+ require 'httpclient'
6
+ module BentoSearch
7
+ # Supports fielded searching, sorting, pagination.
8
+ #
9
+ # Required configuration:
10
+ # * api_key
11
+ #
12
+ # Defaults to 'relevance' sort, rather than scopus's default of date desc.
13
+ #
14
+ # Uses the Scopus SciVerse REST API. You need to be a Scopus customer
15
+ # to access. http://api.elsevier.com
16
+ # http://www.developers.elsevier.com/action/devprojects
17
+ #
18
+ # ToS: http://www.developers.elsevier.com/devcms/content-policies
19
+ # "Federated Search" use case.
20
+ # Also: http://www.developers.elsevier.com/cms/apiserviceagreement
21
+ #
22
+ # Note that ToS applying to you probably means you must restrict access
23
+ # to search functionality to authenticated affiliated users only.
24
+ #
25
+ # Register for an API key at "Register New Site" at http://developers.elsevier.com/action/devnewsite
26
+ # You will then need to get server IP addresses registered with Scopus too,
27
+ # apparently by emailing directly to dave.santucci at elsevier dot com.
28
+ #
29
+ # Scopus API Docs:
30
+ # * http://www.developers.elsevier.com/devcms/content-api-search-request
31
+ # * http://www.developers.elsevier.com/devcms/content/search-fields-overview
32
+ #
33
+ # Some more docs on response elements and query elements:
34
+ # * http://api.elsevier.com/content/search/#d0n14606
35
+ #
36
+ # Other API's in the suite not being used by this code at present:
37
+ # * http://www.developers.elsevier.com/devcms/content-api-retrieval-request
38
+ # * http://www.developers.elsevier.com/devcms/content-api-metadata-request
39
+ #
40
+ # Support: Integration@scopus.com
41
+ #
42
+ # TODO: Mention to Scopus: Only one author?
43
+ # Paging of 50 gets an error, but docs say I should be able to request 200. q
44
+ #
45
+ class ScopusEngine
46
+ include BentoSearch::SearchEngine
47
+
48
+ extend HTTPClientPatch::IncludeClient
49
+ include_http_client
50
+
51
+ def search_implementation(args)
52
+ results = Results.new
53
+
54
+ xml, response, exception = nil, nil, nil
55
+
56
+ url = scopus_url(args)
57
+
58
+ begin
59
+ response = http_client.get( url , nil,
60
+ # HTTP headers.
61
+ {"X-ELS-APIKey" => configuration.api_key,
62
+ "X-ELS-ResourceVersion" => "XOCS",
63
+ "Accept" => "application/atom+xml"}
64
+ )
65
+ xml = Nokogiri::XML(response.body)
66
+ rescue TimeoutError, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError => e
67
+ exception = e
68
+ end
69
+
70
+
71
+ # handle errors
72
+ if (response.nil? || xml.nil? || exception ||
73
+ (! HTTP::Status.successful? response.status) ||
74
+ xml.at_xpath("service-error")
75
+ )
76
+
77
+ # UGH. Scopus reports 0 hits as an error, not entirely distinguishable
78
+ # from an actual error. Oh well, we have to go with it.
79
+ if (
80
+ (response.status == 400) &&
81
+ xml &&
82
+ (error_xml = xml.at_xpath("./service-error/status")) &&
83
+ (node_text(error_xml.at_xpath("./statusCode")) == "INVALID_INPUT") &&
84
+ (node_text(error_xml.at_xpath("./statusText")) == "Result set was empty or Start value beyond result set")
85
+ )
86
+ # PROBABLY 0 hit count, although could be something else I'm afraid.
87
+ results.total_items = 0
88
+ return results
89
+ else
90
+ # real error
91
+ results.error ||= {}
92
+ results.error[:exception] = e
93
+ results.error[:status] = response.status if response
94
+ # keep from storing the entire possibly huge response as error
95
+ # but sometimes it's an error message.
96
+ results.error[:error_info] = xml.at_xpath("service_error") if xml
97
+ return results
98
+ end
99
+ end
100
+
101
+ results.total_items = (node_text xml.at_xpath("//opensearch:totalResults", xml_ns)).to_i
102
+
103
+ xml.xpath("//atom:entry", xml_ns).each do | entry |
104
+
105
+ results << (item = ResultItem.new)
106
+ if scopus_link = entry.at_xpath("atom:link[@ref='scopus']", xml_ns)
107
+ item.link = scopus_link["href"]
108
+ end
109
+ item.title = node_text entry.at_xpath("dc:title", xml_ns)
110
+ item.journal_title = node_text entry.at_xpath("prism:publicationName", xml_ns)
111
+ item.issn = node_text entry.at_xpath("prism:issn", xml_ns)
112
+ item.volume = node_text entry.at_xpath("prism:volume", xml_ns)
113
+ item.issue = node_text entry.at_xpath("prism:issueIdentifier", xml_ns)
114
+ item.doi = node_text entry.at_xpath("prism:doi", xml_ns)
115
+
116
+ # pages might be in startingPage/endingPage OR in pageRange
117
+ if (start = entry.at_xpath("prism:startingPage", xml_ns))
118
+ item.start_page = start.text.to_i
119
+ if ( epage = entry.at_xpath("prism:endingPage", xml_ns))
120
+ item.end_page = epage.text.to_i
121
+ end
122
+ elsif (range = entry.at_xpath("prism:pageRange", xml_ns))
123
+ (spage, epage) = *range.text().split("-")
124
+ item.start_page = spage
125
+ item.end_page = epage
126
+ end
127
+
128
+ # get the year out of the date
129
+ if date = entry.at_xpath("prism:coverDate", xml_ns)
130
+ date.text =~ /^(\d\d\d\d)/
131
+ item.year = $1.to_i if $1
132
+ end
133
+
134
+ # Authors might be in atom:authors seperated by |, or just
135
+ # a single one in dc:creator
136
+ if (authors = entry.at_xpath("atom:authors", xml_ns))
137
+ authors.text.split("|").each do |author|
138
+ item.authors << Author.new(:display => author.strip)
139
+ end
140
+ elsif (author = entry.at_xpath("dc:creator", xml_ns))
141
+ item.authors << Author.new(:display => author.text.strip)
142
+ end
143
+
144
+ # Format we're still trying to figure out how Scopus API
145
+ # delivers it. Here is at at least one way.
146
+ if (doctype = entry.at_xpath("atom:subtype", xml_ns))
147
+ item.format = doctype_to_format(doctype.text)
148
+ item.format_str = doctype_to_string(doctype.text)
149
+ end
150
+
151
+ end
152
+
153
+ return results
154
+ end
155
+
156
+ # The escaping rules are not entirely clear for the API. We know colons
157
+ # and parens are special chars. It's unclear how or if we can escape them,
158
+ # we'll just remove them.
159
+ def escape_query(query)
160
+ # backslash escape doesn't seem to work
161
+ #query.gsub(/([\\\(\)\:])/) do |match|
162
+ # "\\#{$1}"
163
+ #end
164
+ query.gsub(/([\\\(\)\:])/, ' ')
165
+ end
166
+
167
+
168
+ def self.required_configuration
169
+ ["api_key"]
170
+ end
171
+
172
+ def self.default_configuration
173
+ {
174
+ :base_url => "http://api.elsevier.com/",
175
+ :cluster => "SCOPUS"
176
+ }
177
+ end
178
+
179
+ # Max per-page is 200, as per http://www.developers.elsevier.com/devcms/content-apis, bottom of page.
180
+ def max_per_page
181
+ 200
182
+ end
183
+
184
+ def search_field_definitions
185
+ {
186
+ "AUTH" => {:semantic => :author},
187
+ "TITLE" => {:semantic => :title},
188
+ # controlled and author-assigned keywords
189
+ "KEY" => {:semantic => :subject},
190
+ "ISBN" => {:semantic => :isbn},
191
+ "ISSN" => {:semantic => :issn},
192
+ }
193
+ end
194
+
195
+ def sort_definitions
196
+ # scopus &sort= values, not yet URI-escaped, later code will do that.
197
+ #
198
+ # 'refeid' key is currently undocumented on Scopus site, but
199
+ # was given to me in email by scopus.
200
+ {
201
+ "title_asc" => {:implementation => "+itemtitle"},
202
+ "date_desc" => {:implementation => "-datesort,+auth"},
203
+ "relevance" => {:implementation => "refeid" },
204
+ "author_asc" => {:implementation => "+auth"},
205
+ "num_cite_desc" => {:implementation => "-numcitedby"}
206
+ }
207
+ end
208
+
209
+
210
+ protected
211
+
212
+ # returns nil if passed in nil, otherwise
213
+ # returns nokogiri text()
214
+ def node_text(node)
215
+ return nil if node.nil?
216
+
217
+ return node.text()
218
+ end
219
+
220
+ def xml_ns
221
+ {"opensearch" => "http://a9.com/-/spec/opensearch/1.1/",
222
+ "prism" => "http://prismstandard.org/namespaces/basic/2.0/",
223
+ "dc" => "http://purl.org/dc/elements/1.1/",
224
+ "atom" => "http://www.w3.org/2005/Atom"}
225
+ end
226
+
227
+ # Maps from Scopus "doctype" as listed at http://www.developers.elsevier.com/devcms/content/search-fields-overview
228
+ # and delivered in the XML response as atom:subtype.
229
+ # Maps to our own internal formats as documented in ResultItem#format
230
+ # Returns nil if can't map.
231
+ def doctype_to_format(doctype)
232
+ { "ar" => "Article",
233
+ "ip" => "Article",
234
+ "bk" => "Book",
235
+ "bz" => "Article",
236
+ "re" => "Article", # most of what scopus labels 'Report' seem to be ordinary articles.
237
+ "cp" => :conference_paper,
238
+ "re" => "Article", # really 'report', but Scopus is unreliable here, most of these are actually articles.
239
+ "sh" => "Article", # 'short survey' to scopus, but seems to be used for articles.
240
+ "ip" => "Article", # 'article in press'.
241
+ 'ed' => "Article", # Editorial
242
+ 'le' => "Article", # Letter
243
+ 'no' => "Article", # Note
244
+ }[doctype.to_s]
245
+ end
246
+
247
+ # Maps Scopus doctype to human readable strings as documented by Scopus,
248
+ # does not map 1-1 to our controlled format.
249
+ def doctype_to_string(doctype)
250
+ { "ar" => "Article",
251
+ "ab" => "Abstract Report",
252
+ "ip" => "Article in Press",
253
+ "bk" => "Book",
254
+ "bz" => "Business Article",
255
+ "cp" => "Conference Paper",
256
+ "cr" => "Conference Review",
257
+ "ed" => "Editorial",
258
+ "er" => "Erratum",
259
+ "le" => "Letter",
260
+ "no" => "Note",
261
+ "pr" => "Press Release",
262
+ "re" => "Article", # Really 'report', but Scopus is unreliable here, most of these are actually articles.
263
+ "sh" => "Article" # Really 'short survey' to Scopus, but seems to be used for, well, articles.
264
+ }[doctype.to_s]
265
+ end
266
+
267
+
268
+
269
+
270
+ def scopus_url(args)
271
+ query = escape_query args[:query]
272
+
273
+ if args[:search_field]
274
+ query = "#{args[:search_field]}(#{query})"
275
+ end
276
+
277
+ query = "#{configuration.base_url.chomp("/")}/content/search/index:#{configuration.cluster}?query=#{CGI.escape(query)}"
278
+
279
+ query += "&count=#{args[:per_page]}" if args[:per_page]
280
+
281
+ query += "&start=#{args[:start]}" if args[:start]
282
+
283
+ # default to 'relevance' sort if not given, rather than scopus's
284
+ # default of date desc.
285
+ args[:sort] ||= "relevance"
286
+ if (defn = self.sort_definitions[args[:sort]]) &&
287
+ ( value = defn[:implementation])
288
+ query += "&sort=#{CGI.escape(value)}"
289
+ end
290
+
291
+ return query
292
+ end
293
+
294
+ end
295
+ end