bento_search 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (122) hide show
  1. data/MIT-LICENSE +20 -0
  2. data/README.md +299 -0
  3. data/Rakefile +40 -0
  4. data/app/assets/images/bento_search/large_loader.gif +0 -0
  5. data/app/assets/javascripts/bento_search.js +3 -0
  6. data/app/assets/javascripts/bento_search/ajax_load.js +22 -0
  7. data/app/assets/stylesheets/bento_search/bento.css +4 -0
  8. data/app/controllers/bento_search/bento_search_controller.rb +7 -0
  9. data/app/controllers/bento_search/search_controller.rb +72 -0
  10. data/app/helpers/bento_search_helper.rb +138 -0
  11. data/app/item_decorators/bento_search/only_premade_openurl.rb +16 -0
  12. data/app/item_decorators/bento_search/openurl_add_other_link.rb +35 -0
  13. data/app/item_decorators/bento_search/openurl_main_link.rb +30 -0
  14. data/app/models/bento_search/author.rb +25 -0
  15. data/app/models/bento_search/link.rb +30 -0
  16. data/app/models/bento_search/multi_searcher.rb +109 -0
  17. data/app/models/bento_search/openurl_creator.rb +128 -0
  18. data/app/models/bento_search/registrar.rb +70 -0
  19. data/app/models/bento_search/result_item.rb +203 -0
  20. data/app/models/bento_search/results.rb +54 -0
  21. data/app/models/bento_search/results/pagination.rb +67 -0
  22. data/app/models/bento_search/search_engine.rb +219 -0
  23. data/app/models/bento_search/search_engine/capabilities.rb +65 -0
  24. data/app/search_engines/bento_search/#Untitled-1# +11 -0
  25. data/app/search_engines/bento_search/ebsco_host_engine.rb +356 -0
  26. data/app/search_engines/bento_search/eds_engine.rb +557 -0
  27. data/app/search_engines/bento_search/google_books_engine.rb +184 -0
  28. data/app/search_engines/bento_search/primo_engine.rb +231 -0
  29. data/app/search_engines/bento_search/scopus_engine.rb +295 -0
  30. data/app/search_engines/bento_search/summon_engine.rb +398 -0
  31. data/app/search_engines/bento_search/xerxes_engine.rb +168 -0
  32. data/app/views/bento_search/_link.html.erb +4 -0
  33. data/app/views/bento_search/_search_error.html.erb +22 -0
  34. data/app/views/bento_search/_std_item.html.erb +39 -0
  35. data/app/views/bento_search/search/search.html.erb +1 -0
  36. data/config/locales/en.yml +25 -0
  37. data/lib/bento_search.rb +29 -0
  38. data/lib/bento_search/engine.rb +5 -0
  39. data/lib/bento_search/routes.rb +45 -0
  40. data/lib/bento_search/version.rb +3 -0
  41. data/lib/generators/bento_search/pull_ebsco_dbs_generator.rb +24 -0
  42. data/lib/generators/bento_search/templates/ebsco_global_var.erb +6 -0
  43. data/lib/http_client_patch/include_client.rb +86 -0
  44. data/lib/tasks/bento_search_tasks.rake +4 -0
  45. data/test/dummy/README.rdoc +261 -0
  46. data/test/dummy/Rakefile +7 -0
  47. data/test/dummy/app/assets/javascripts/application.js +15 -0
  48. data/test/dummy/app/assets/stylesheets/application.css +13 -0
  49. data/test/dummy/app/controllers/application_controller.rb +3 -0
  50. data/test/dummy/app/helpers/application_helper.rb +2 -0
  51. data/test/dummy/app/views/layouts/application.html.erb +14 -0
  52. data/test/dummy/config.ru +4 -0
  53. data/test/dummy/config/application.rb +56 -0
  54. data/test/dummy/config/boot.rb +10 -0
  55. data/test/dummy/config/database.yml +25 -0
  56. data/test/dummy/config/environment.rb +5 -0
  57. data/test/dummy/config/environments/development.rb +37 -0
  58. data/test/dummy/config/environments/production.rb +67 -0
  59. data/test/dummy/config/environments/test.rb +37 -0
  60. data/test/dummy/config/initializers/backtrace_silencers.rb +7 -0
  61. data/test/dummy/config/initializers/inflections.rb +15 -0
  62. data/test/dummy/config/initializers/mime_types.rb +5 -0
  63. data/test/dummy/config/initializers/secret_token.rb +7 -0
  64. data/test/dummy/config/initializers/session_store.rb +8 -0
  65. data/test/dummy/config/initializers/wrap_parameters.rb +14 -0
  66. data/test/dummy/config/locales/en.yml +5 -0
  67. data/test/dummy/config/routes.rb +6 -0
  68. data/test/dummy/db/test.sqlite3 +0 -0
  69. data/test/dummy/log/test.log +3100 -0
  70. data/test/dummy/public/404.html +26 -0
  71. data/test/dummy/public/422.html +26 -0
  72. data/test/dummy/public/500.html +25 -0
  73. data/test/dummy/public/favicon.ico +0 -0
  74. data/test/dummy/script/rails +6 -0
  75. data/test/functional/bento_search/search_controller_test.rb +81 -0
  76. data/test/helper/bento_search_helper_test.rb +125 -0
  77. data/test/integration/navigation_test.rb +10 -0
  78. data/test/support/mock_engine.rb +23 -0
  79. data/test/support/test_with_cassette.rb +38 -0
  80. data/test/test_helper.rb +52 -0
  81. data/test/unit/#vcr_test.rb# +68 -0
  82. data/test/unit/ebsco_host_engine_test.rb +134 -0
  83. data/test/unit/eds_engine_test.rb +105 -0
  84. data/test/unit/google_books_engine_test.rb +93 -0
  85. data/test/unit/item_decorators_test.rb +66 -0
  86. data/test/unit/multi_searcher_test.rb +49 -0
  87. data/test/unit/openurl_creator_test.rb +111 -0
  88. data/test/unit/pagination_test.rb +59 -0
  89. data/test/unit/primo_engine_test.rb +37 -0
  90. data/test/unit/register_engine_test.rb +50 -0
  91. data/test/unit/result_item_display_test.rb +39 -0
  92. data/test/unit/result_item_test.rb +36 -0
  93. data/test/unit/scopus_engine_test.rb +130 -0
  94. data/test/unit/search_engine_base_test.rb +178 -0
  95. data/test/unit/search_engine_test.rb +95 -0
  96. data/test/unit/summon_engine_test.rb +161 -0
  97. data/test/unit/xerxes_engine_test.rb +70 -0
  98. data/test/vcr_cassettes/ebscohost/error_bad_db.yml +45 -0
  99. data/test/vcr_cassettes/ebscohost/error_bad_password.yml +45 -0
  100. data/test/vcr_cassettes/ebscohost/get_info.yml +3626 -0
  101. data/test/vcr_cassettes/ebscohost/live_search.yml +45 -0
  102. data/test/vcr_cassettes/ebscohost/live_search_smoke_test.yml +1311 -0
  103. data/test/vcr_cassettes/eds/basic_search_smoke_test.yml +1811 -0
  104. data/test/vcr_cassettes/eds/get_auth_token.yml +75 -0
  105. data/test/vcr_cassettes/eds/get_auth_token_failure.yml +39 -0
  106. data/test/vcr_cassettes/eds/get_with_auth.yml +243 -0
  107. data/test/vcr_cassettes/eds/get_with_auth_recovers_from_bad_auth.yml +368 -0
  108. data/test/vcr_cassettes/gbs/error_condition.yml +40 -0
  109. data/test/vcr_cassettes/gbs/pagination.yml +702 -0
  110. data/test/vcr_cassettes/gbs/search.yml +340 -0
  111. data/test/vcr_cassettes/primo/search_smoke_test.yml +1112 -0
  112. data/test/vcr_cassettes/scopus/bad_api_key_should_return_error_response.yml +60 -0
  113. data/test/vcr_cassettes/scopus/escaped_chars.yml +187 -0
  114. data/test/vcr_cassettes/scopus/fielded_search.yml +176 -0
  115. data/test/vcr_cassettes/scopus/simple_search.yml +227 -0
  116. data/test/vcr_cassettes/scopus/zero_results_search.yml +67 -0
  117. data/test/vcr_cassettes/summon/bad_auth.yml +54 -0
  118. data/test/vcr_cassettes/summon/proper_tags_for_snippets.yml +216 -0
  119. data/test/vcr_cassettes/summon/search.yml +242 -0
  120. data/test/vcr_cassettes/xerxes/live_search.yml +2580 -0
  121. data/test/view/std_item_test.rb +98 -0
  122. metadata +421 -0
@@ -0,0 +1,65 @@
1
+
2
+ # Methods that describe a search engine's capabilities,
3
+ # mixed into SearchEngine. Individual engine implementations
4
+ # will often over-ride some or all of these methods.
5
+ module BentoSearch::SearchEngine::Capabilities
6
+ # If support fielded search, over-ride to specify fields
7
+ # supported. Returns a hash, key is engine-specific internal
8
+ # search field, value is nil or a hash of metadata about
9
+ # the search field, including semantic mapping.
10
+ #
11
+ # def search_field_definitions
12
+ # { "intitle" => {:semantic => :title}}
13
+ # end
14
+ def search_field_definitions
15
+ {}
16
+ end
17
+
18
+ # Over-ride with a HASH of available sorts. Each key is the string
19
+ # that will be passed in engine.search(...., :sort => key)
20
+ # The key combines a choice of sort field, ascending/descending,
21
+ # secondary sorts etc -- we combine this all with one key, because
22
+ # typical examined interfaces did same from a select menu.
23
+ #
24
+ # Keys should where possible be _standard_ keys chosen from
25
+ # those listed in config/i18n/en:bento_search.sort_keys.*
26
+ # But if you need something not there, it can be custom to engine.
27
+ # Value of hash is for internal use by engine, it may be a convenient
28
+ # place to store implementation details.
29
+ #
30
+ # For a particular engine, a sort not mentioned here will-- raise?
31
+ # be ignored? Not sure.
32
+ def sort_definitions
33
+ {}
34
+ end
35
+
36
+ # Override to return int max per-page.
37
+ def max_per_page
38
+ nil
39
+ end
40
+
41
+ # Returns list of string internal search_field's that can
42
+ # be supplied to search(:search_field => x)
43
+ def search_keys
44
+ return search_field_definitions.keys
45
+ end
46
+
47
+ # Returns list of symbol semantic_search_field that can be
48
+ # supplied to search(:semantic_search_field => x)
49
+ def semantic_search_keys
50
+ semantic_search_map.keys
51
+ end
52
+
53
+ # returns a hash keyed by semantic search field symbol,
54
+ # value string internal search field key.
55
+ def semantic_search_map
56
+ # Hash[] conveniently takes an array of k-v pairs.
57
+ return Hash[
58
+ search_field_definitions.collect do |field, defn|
59
+ [ defn[:semantic].to_s, field ] if defn && defn[:semantic]
60
+ end.compact
61
+ ]
62
+ end
63
+
64
+
65
+ end
@@ -0,0 +1,11 @@
1
+ http://blacklight.mse.jhu.edu:3001/resolve?url_ver=Z39.88-2004
2
+ &url_ctx_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Actx
3
+ &ctx_ver=Z39.88-2004
4
+ &ctx_tim=2012-07-25T16%3A21%3A11-04%3A00
5
+ &ctx_id=
6
+ &ctx_enc=info%3Aofi%2Fenc%3AUTF-8
7
+ &rft.title=Monkey+Brains
8
+ &rft.creator=Will.i.am
9
+ &rft.pub=Absolute+Pitch%2C12+Dec+2007
10
+ &rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Adc
11
+ &rfr_id=info%3Asid%2Fsummon.serialssolutions.com
@@ -0,0 +1,356 @@
1
+ require 'nokogiri'
2
+
3
+ require 'http_client_patch/include_client'
4
+ require 'httpclient'
5
+
6
+ # Right now for EbscoHost API (Ebsco Integration Toolkit/EIT),
7
+ # may be expanded or refactored for EDS too.
8
+ #
9
+ # == Required Configuration
10
+ #
11
+ # * profile_id
12
+ # * profile_password
13
+ # * databases: ARRAY of ebsco shortcodes of what databases to include in search. If you specify one you don't have access to, you get an error message from ebsco, alas.
14
+ #
15
+ # == Note on including databases
16
+ #
17
+ # Need to specifically configure all databases your institution licenses from
18
+ # EBSCO that you want included in the search. You can't just say "all of them"
19
+ # the api doesn't support that, and also more than 30 or 40 starts getting
20
+ # horribly slow. If you include a db you do not have access to, EBSCO api
21
+ # fatal errors.
22
+ #
23
+ # You may want to make sure all your licensed databases are included
24
+ # in your EIT profile. Log onto ebscoadmin, Customize Services, choose
25
+ # EIT profile, choose 'databases' tag.
26
+ #
27
+ # === Download databases from EBSCO api
28
+ #
29
+ # We include a utility to download ALL activated databases for EIT profile
30
+ # and generate a file putting them in a ruby array. You may want to use this
31
+ # file as a starting point, and edit by hand:
32
+ #
33
+ # First configure your EBSCO search engine with bento_search, say under
34
+ # key 'ebscohost'.
35
+ #
36
+ # Then run:
37
+ # rails generate bento_search:pull_ebsco_dbs ebscohost
38
+ #
39
+ # assuming 'ebscohost' is the key you registered the EBSCO search engine.
40
+ #
41
+ # This will create a file at ./config/ebsco_dbs.rb. You may want to hand
42
+ # edit it. Then, in your bento search config, you can:
43
+ #
44
+ # require "#{Rails.root}/config/ebsco_dbs.rb"
45
+ # BentoSearch.register_engine("ebscohost") do |conf|
46
+ # # ....
47
+ # conf.databases = $ebsco_dbs
48
+ # end
49
+ #
50
+ # == Vendor documentation
51
+ #
52
+ # Vendor documentation is a bit scattered, main page:
53
+ # * http://support.ebsco.com/eit/ws.php
54
+ # Some other useful pages we discovered:
55
+ # * http://support.ebsco.com/eit/ws_faq.php
56
+ # * search syntax examples: http://support.ebsco.com/eit/ws_howto_queries.php
57
+ # * Try construct a query: http://eit.ebscohost.com/Pages/MethodDescription.aspx?service=/Services/SearchService.asmx&method=Search
58
+ # * The 'info' service can be used to see what databases you have access to.
59
+ # * DTD of XML Response, hard to interpret but all we've got: http://support.ebsco.com/eit/docs/DTD_EIT_WS_searchResponse.zip
60
+ #
61
+ #
62
+ #
63
+ #
64
+ # TODO: David Walker tells us we need to configure in EBSCO to make default operator be 'and' instead of phrase search!
65
+ # We Do need to do that to get reasonable results.
66
+ class BentoSearch::EbscoHostEngine
67
+ include BentoSearch::SearchEngine
68
+
69
+ extend HTTPClientPatch::IncludeClient
70
+ include_http_client
71
+
72
+ # Include some rails helpers, text_helper.trucate
73
+ def text_helper
74
+ @@truncate ||= begin
75
+ o = Object.new
76
+ o.extend ActionView::Helpers::TextHelper
77
+ o
78
+ end
79
+ end
80
+
81
+ def search_implementation(args)
82
+ url = query_url(args)
83
+
84
+ results = BentoSearch::Results.new
85
+ xml, response, exception = nil, nil, nil
86
+
87
+ begin
88
+ response = http_client.get(url)
89
+ xml = Nokogiri::XML(response.body)
90
+ rescue TimeoutError, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError => e
91
+ exception = e
92
+ end
93
+ # error handle
94
+ if ( response.nil? ||
95
+ xml.nil? ||
96
+ exception ||
97
+ (! HTTP::Status.successful? response.status) ||
98
+ (fault = xml.at_xpath("./Fault")))
99
+
100
+ results.error ||= {}
101
+ results.error[:exception] = exception if exception
102
+ results.error[:status] = response.status if response
103
+
104
+ if fault
105
+ results.error[:error_info] = text_if_present fault.at_xpath("./Message")
106
+ end
107
+
108
+ return results
109
+ end
110
+
111
+
112
+
113
+ # the namespaces they provide are weird and don't help and sometimes
114
+ # not clearly even legal. Remove em!
115
+ xml.remove_namespaces!
116
+
117
+ results.total_items = xml.at_xpath("./searchResponse/Hits").text.to_i
118
+
119
+ xml.xpath("./searchResponse/SearchResults/records/rec").each do |xml_rec|
120
+ results << item_from_xml( xml_rec )
121
+ end
122
+
123
+ return results
124
+
125
+ end
126
+
127
+
128
+ # Pass in a nokogiri node, return node.text, or nil if
129
+ # arg was nil or node.text was blank?
130
+ def text_if_present(node)
131
+ if node.nil? || node.text.blank?
132
+ nil
133
+ else
134
+ node.text
135
+ end
136
+ end
137
+
138
+ # Figure out proper controlled format for an ebsco item.
139
+ # EBSCOHost (not sure about EDS) publication/document type
140
+ # are totally unusable non-normalized vocabulary for controlled
141
+ # types, we'll try to guess from other metadata features.
142
+ def sniff_format(xml_node)
143
+ return nil if xml_node.nil?
144
+
145
+ if xml_node.at_xpath("./bkinfo/*")
146
+ "Book"
147
+ elsif xml_node.at_xpath("./dissinfo/*")
148
+ :dissertation
149
+ elsif xml_node.at_xpath("./jinfo/*") && xml_node.at_xpath("./artinfo/*")
150
+ "Article"
151
+ elsif xml_node.at_xpath("./jinfo/*")
152
+ :serial
153
+ else
154
+ nil
155
+ end
156
+ end
157
+
158
+ # Figure out uncontrolled literal string format to show to users.
159
+ # We're going to try combining Ebsco Publication Type and Document Type,
160
+ # when both are present. Then a few hard-coded special transformations.
161
+ def sniff_format_str(xml_node)
162
+ pubtype = text_if_present( xml_node.at_xpath("./artinfo/pubtype") )
163
+ doctype = text_if_present( xml_node.at_xpath("./artinfo/doctype") )
164
+
165
+ components = []
166
+ components.push pubtype
167
+ components.push doctype unless doctype == pubtype
168
+
169
+ components.compact!
170
+
171
+ components = components.collect {|a| a.titlecase if a}
172
+ components.uniq! # no need to have the same thing twice
173
+
174
+ # some hard-coded cases for better user-displayable string
175
+ if components.first == "Academic Journal" && components.last == "Article"
176
+ return "Journal Article"
177
+ elsif components.first == "Periodical" && components.length > 1
178
+ return components.last
179
+ end
180
+
181
+
182
+
183
+ return components.join(": ")
184
+ end
185
+
186
+ # pass in <rec> nokogiri, will determine best link
187
+ def get_link(xml)
188
+ text_if_present(xml.at_xpath("./pdfLink")) || text_if_present(xml.at_xpath("./plink") )
189
+ end
190
+
191
+
192
+ # it's unclear if ebsco API actually allows escaping of special chars,
193
+ # or what the special chars are. But we know parens are special, can't
194
+ # escape em, we'll just remove em (should not effect search).
195
+ def ebsco_query_escape(txt)
196
+ txt.gsub(/[)(]/, ' ')
197
+ end
198
+
199
+ # Actually turn the user's query into an EBSCO "AND" boolean query,
200
+ # seems only way to get decent results where terms can match cross-fields
201
+ # at the moment, for EIT. We'll see for EDS.
202
+ def ebsco_query_prepare(txt)
203
+ # use string split with regex cleverly to split into space
204
+ # seperated terms and phrases, keeping phrases as unit.
205
+ terms = txt.split %r{[[:space:]]+|("[^"]+")}
206
+
207
+ # Remove parens in non-phrase-quoted terms
208
+ terms = terms.collect do |t|
209
+ (t =~ /^\".*\"$/) ? t : ebsco_query_escape(t)
210
+ end
211
+
212
+ # Remove boolean operators if they are bare not in a phrase, they'll
213
+ # make things weird. In phrase quotes they are okay.
214
+ # Remove empty strings. Remove terms that are solely punctuation
215
+ # without any letters.
216
+ terms.delete_if do |term|
217
+ (
218
+ term.blank? ||
219
+ ["AND", "OR", "NOT"].include?(term) ||
220
+ term =~ /\A[^[[:alnum:]]]+\Z/
221
+ )
222
+ end
223
+
224
+ terms.join(" AND ")
225
+ end
226
+
227
+ def query_url(args)
228
+
229
+ url =
230
+ "#{configuration.base_url}/Search?prof=#{configuration.profile_id}&pwd=#{configuration.profile_password}"
231
+
232
+ query = ebsco_query_prepare args[:query]
233
+
234
+ # wrap in (FI $query) if fielded search
235
+ if args[:search_field]
236
+ query = "(#{args[:search_field]} #{query})"
237
+ end
238
+
239
+ url += "&query=#{CGI.escape query}"
240
+
241
+ # startrec is 1-based for ebsco, not 0-based like for us.
242
+ url += "&startrec=#{args[:start] + 1}" if args[:start]
243
+ url += "&numrec=#{args[:per_page]}" if args[:per_page]
244
+
245
+ # Make relevance our default sort, rather than EBSCO's date.
246
+ args[:sort] ||= "relevance"
247
+ url += "&sort=#{ sort_definitions[args[:sort]][:implementation]}"
248
+
249
+ # Contrary to docs, don't pass these comma-seperated, pass em in seperate
250
+ # query params.
251
+ configuration.databases.each do |db|
252
+ url += "&db=#{db}"
253
+ end
254
+
255
+ return url
256
+ end
257
+
258
+ # pass in a nokogiri representing an EBSCO <rec> result,
259
+ # we'll turn it into a BentoSearch::ResultItem.
260
+ def item_from_xml(xml_rec)
261
+ info = xml_rec.at_xpath("./header/controlInfo")
262
+
263
+ item = BentoSearch::ResultItem.new
264
+
265
+ item.link = get_link(xml_rec)
266
+
267
+ item.issn = text_if_present info.at_xpath("./jinfo/issn")
268
+ item.journal_title = text_if_present(info.at_xpath("./jinfo/jtl"))
269
+ item.publisher = text_if_present info.at_xpath("./pubinfo/pub")
270
+ # Might have multiple ISBN's in record, just take first for now
271
+ item.isbn = text_if_present info.at_xpath("./bkinfo/isbn")
272
+
273
+ item.year = text_if_present info.at_xpath("./pubinfo/dt/@year")
274
+ item.volume = text_if_present info.at_xpath("./pubinfo/vid")
275
+ item.issue = text_if_present info.at_xpath("./pubinfo/iid")
276
+
277
+ # EBSCO sometimes has crazy long titles, truncate em.
278
+ item.title = text_helper.truncate( text_if_present( info.at_xpath("./artinfo/tig/atl") ), :length => 200)
279
+ item.start_page = text_if_present info.at_xpath("./artinfo/ppf")
280
+
281
+ item.doi = text_if_present info.at_xpath("./artinfo/ui[@type='doi']")
282
+
283
+ item.abstract = text_if_present info.at_xpath("./artinfo/ab")
284
+ # EBSCO abstracts have an annoying habit of beginning with "Abstract:"
285
+ if item.abstract
286
+ item.abstract.gsub!(/^Abstract\: /, "")
287
+ end
288
+
289
+ # authors, only get full display name from EBSCO.
290
+ info.xpath("./artinfo/aug/au").each do |author|
291
+ a = BentoSearch::Author.new(:display => author.text)
292
+ item.authors << a
293
+ end
294
+
295
+
296
+ item.format = sniff_format info
297
+ item.format_str = sniff_format_str info
298
+
299
+
300
+ return item
301
+ end
302
+
303
+ # This method is not used for normal searching, but can be used by
304
+ # other code to retrieve the results of the EBSCO API Info command,
305
+ # using connection details configured in this engine. The Info command
306
+ # can tell you what databases your account is authorized to see.
307
+ # Returns the complete Nokogiri response, but WITH NAMESPACES REMOVED
308
+ def get_info
309
+ url =
310
+ "#{configuration.base_url}/Info?prof=#{configuration.profile_id}&pwd=#{configuration.profile_password}"
311
+
312
+ noko = Nokogiri::XML( http_client.get( url ).body )
313
+
314
+ noko.remove_namespaces!
315
+
316
+ return noko
317
+ end
318
+
319
+ # David Walker says pretty much only relevance and date are realiable
320
+ # in EBSCOhost cross-search.
321
+ def sort_definitions
322
+ {
323
+ "relevance" => {:implementation => "relevance"},
324
+ "date_desc" => {:implementation => "date"}
325
+ }
326
+ end
327
+
328
+ def search_field_definitions
329
+ {
330
+ "AU" => {:semantic => :author},
331
+ "TI" => {:semantic => :title},
332
+ "SU" => {:semantic => :subject},
333
+ "IS" => {:semantic => :issn},
334
+ "IB" => {:semantic => :isbn}
335
+ }
336
+ end
337
+
338
+ def max_per_page
339
+ # Actually only '50' if you ask for 'full' records, but I don't think
340
+ # we need to do that ever, that's actually getting fulltext back!
341
+ 200
342
+ end
343
+
344
+ def self.required_configuration
345
+ ["profile_id", "profile_password"]
346
+ end
347
+
348
+ def self.default_configuration
349
+ {
350
+ # /Search
351
+ :base_url => "http://eit.ebscohost.com/Services/SearchService.asmx",
352
+ :databases => []
353
+ }
354
+ end
355
+
356
+ end
@@ -0,0 +1,557 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'nokogiri'
4
+ require 'httpclient'
5
+ require 'multi_json'
6
+ require 'http_client_patch/include_client'
7
+
8
+
9
+ #
10
+ # For EBSCO Discovery Service. You will need a license to use.
11
+ #
12
+ # == Required Configuration
13
+ #
14
+ # user_id, password: As given be EBSCO for access to EDS API (may be an admin account in ebscoadmin? Not sure).
15
+ # profile: As given by EBSCO, might be "edsapi"?
16
+ #
17
+ # == Highlighting
18
+ #
19
+ # EDS has a query-in-context highlighting feature. It is used by defualt, set
20
+ # config 'highlighting' to false to disable.
21
+ # If turned on, you may get <b class="bento_search_highlight"> tags
22
+ # in title and abstract output if it's on, marked html_safe.
23
+ #
24
+ # If highlighting is on, since the abstract will be marked html safe, the
25
+ # view layer won't be able to safely truncate it. In fact, it's very hard
26
+ # to do here too, but we do it anyway, by default to approx configuration
27
+ # truncate_highlighted num of chars (default 280). Set to nil if you don't
28
+ # want this.
29
+ #
30
+ # == Linking
31
+ #
32
+ # The link to record in EBSCO interface delivered as "PLink" will be listed
33
+ # as record main link.
34
+ #
35
+ # Any links listed under <CustomLinks> will be listed as other_links, using
36
+ # configured name provided by EBSCO for CustomLink.
37
+ #
38
+ # EDS Response does not have sufficient metadata for us to generate an OpenURL
39
+ # ourselves. However, in our testing, the first/only CustomLink was an
40
+ # an OpenURL. If configuration.assume_first_custom_link_openurl is
41
+ # true (as is default), it will be used to create an OpenURL link. However, in
42
+ # our testing, many records don't have this at all. **Note** Ask EBSCO support
43
+ # to configure your profile so OpenURLs are ALWAYS included for all records, not
44
+ # just records with no EBSCO fulltext, to ensure bento_search can get the
45
+ # openurl.
46
+ #
47
+ # As always, you can customize links and other_links with Item Decorators.
48
+ #
49
+ # == Technical Notes and Difficulties
50
+ #
51
+ # This API is enormously difficult to work with. Also the response is very odd
52
+ # to deal with and missing some key elements. We quite possibly got something
53
+ # wrong or non-optimal in this implementation, but we did our best.
54
+ #
55
+ # Auth issues may make this slow -- you need to spend a (not too speedy) HTTP
56
+ # request making a session for every new end-user -- as we have no way to keep
57
+ # track of end-users, we do it on every request in this implementation.
58
+ #
59
+ # Responses don't include much metadata -- we don't actually have journal title,
60
+ # volume, issue, etc. We probably _could_ parse it out of the OpenURL that's
61
+ # there depending on your profile configuration, but we're not right now.
62
+ # Instead we're using the chunk of user-displayable citation/reference it does
63
+ # give us (which is very difficult to parse into something usable already),
64
+ # and a custom Decorator to display that instead of normalized citation
65
+ # made from individual elements.
66
+ #
67
+ # EBSCO says they plan to improve some of these issues in a September 2012 release.
68
+ #
69
+ # Title and abstract data seems to be HTML with tags and character entities and
70
+ # escaped special chars. We're trusting it and passing it on as html_safe.
71
+ #
72
+ # Paging can only happen on even pages, with 'page' rather than 'start'. But
73
+ # you can pass in 'start' to bento_search, it'll be converted to closest page.
74
+ #
75
+ # == Authenticated Users
76
+ #
77
+ # EDS allows searches by unauthenticated users, but the results come back with
78
+ # weird blank hits. In such a case, the BentoSearch adapter will return
79
+ # records with virtually no metadata, but a title e
80
+ # (I18n at bento_search.eds.record_not_available ). Also no abstracts
81
+ # are available from unauth search.
82
+ #
83
+ # By default the engine will search as 'guest' unauth user. But config
84
+ # 'auth' key to true to force all searches to auth (if you are protecting your
85
+ # app) or pass :auth => true as param into #search method.
86
+ #
87
+ # == EDS docs:
88
+ #
89
+ # * Console App to demo requests: https://eds-api.ebscohost.com/Console
90
+ # * EDS Wiki: http://edswiki.ebscohost.com/EDS_API_Documentation
91
+ # * You'll need to request an account to the EDS wiki, see: http://support.ebsco.com/knowledge_base/detail.php?id=5990
92
+ #
93
+ class BentoSearch::EdsEngine
94
+ include BentoSearch::SearchEngine
95
+
96
+ extend HTTPClientPatch::IncludeClient
97
+ include_http_client
98
+
99
+ AuthHeader = "x-authenticationToken"
100
+ SessionTokenHeader = "x-sessionToken"
101
+
102
+ @@remembered_auth = nil
103
+ @@remembered_auth_lock = Mutex.new
104
+ # Class variable to save current known good auth
105
+ # uses a mutex to be threadsafe. sigh.
106
+ def self.remembered_auth
107
+ @@remembered_auth_lock.synchronize do
108
+ @@remembered_auth
109
+ end
110
+ end
111
+ # Set class variable with current known good auth.
112
+ # uses a mutex to be threadsafe.
113
+ def self.remembered_auth=(token)
114
+ @@remembered_auth_lock.synchronize do
115
+ @@remembered_auth = token
116
+ end
117
+ end
118
+
119
+ # an object that includes some Rails helper modules for
120
+ # text handling.
121
+ def helper
122
+ unless @helper
123
+ @helper = Object.new
124
+ @helper.extend ActionView::Helpers::TextHelper # for truncate
125
+ @helper.extend ActionView::Helpers::OutputSafetyHelper # for safe_join
126
+ end
127
+ return @helper
128
+ end
129
+
130
+
131
+ def self.required_configuration
132
+ %w{user_id password profile}
133
+ end
134
+
135
+ # From config or args, args over-ride config
136
+ def authenticated_end_user?(args)
137
+ config = configuration.auth ? true : false
138
+ arg = args[:auth]
139
+ if ! arg.nil?
140
+ arg ? true : false
141
+ elsif ! config.nil?
142
+ config ? true : false
143
+ else
144
+ false
145
+ end
146
+ end
147
+
148
+ def construct_search_url(args)
149
+ query = "AND,"
150
+ if args[:search_field]
151
+ query += "#{args[:search_field]}:"
152
+ end
153
+ # Can't have any commas in query, it turns out, although
154
+ # this is not documented.
155
+ query += args[:query].gsub("/\,/", "")
156
+
157
+ url = "#{configuration.base_url}search?view=detailed&query=#{CGI.escape query}"
158
+
159
+ url += "&searchmode=#{CGI.escape configuration.search_mode}"
160
+
161
+ url += "&highlight=#{configuration.highlighting ? 'y' : 'n' }"
162
+
163
+ if args[:per_page]
164
+ url += "&resultsperpage=#{args[:per_page]}"
165
+ end
166
+ if args[:page]
167
+ url += "&pagenumber=#{args[:page]}"
168
+ end
169
+
170
+ if args[:sort]
171
+ if (defn = self.sort_definitions[args[:sort]]) &&
172
+ (value = defn[:implementation] )
173
+ url += "&sort=#{CGI.escape value}"
174
+ end
175
+ end
176
+
177
+
178
+ return url
179
+ end
180
+
181
+
182
+
183
+ def search_implementation(args)
184
+ results = BentoSearch::Results.new
185
+
186
+ end_user_auth = authenticated_end_user? args
187
+
188
+ begin
189
+ with_session(end_user_auth) do |session_token|
190
+
191
+ url = construct_search_url(args)
192
+
193
+ response = get_with_auth(url, session_token)
194
+
195
+ results = BentoSearch::Results.new
196
+
197
+ if (hits_node = at_xpath_text(response, "./SearchResponseMessageGet/SearchResult/Statistics/TotalHits"))
198
+ results.total_items = hits_node.to_i
199
+ end
200
+
201
+ response.xpath("./SearchResponseMessageGet/SearchResult/Data/Records/Record").each do |record_xml|
202
+ item = BentoSearch::ResultItem.new
203
+
204
+ item.title = prepare_eds_payload( element_by_group(record_xml, "Ti"), true )
205
+ if item.title.nil? && ! end_user_auth
206
+ item.title = I18n.translate("bento_search.eds.record_not_available")
207
+ end
208
+
209
+ item.abstract = prepare_eds_payload( element_by_group(record_xml, "Ab"), true )
210
+
211
+ # Believe it or not, the authors are encoded as an escaped
212
+ # XML-ish payload, that we need to parse again and get the
213
+ # actual authors out of. WTF. Thanks for handling fragments
214
+ # nokogiri.
215
+ author_mess = element_by_group(record_xml, "Au")
216
+ author_xml = Nokogiri::XML::fragment(author_mess)
217
+ author_xml.xpath(".//searchLink").each do |author_node|
218
+ item.authors << BentoSearch::Author.new(:display => author_node.text)
219
+ end
220
+
221
+
222
+ # PLink is main inward facing EBSCO link, put it as
223
+ # main link.
224
+ if direct_link = record_xml.at_xpath("./PLink")
225
+ item.link = direct_link.text
226
+ end
227
+
228
+ # Other links may be found in CustomLinks, it seems like usually
229
+ # there will be at least one, hopefully the first one is the OpenURL?
230
+ record_xml.xpath("./CustomLinks/CustomLink").each do |custom_link|
231
+ item.other_links << BentoSearch::Link.new(
232
+ :url => custom_link.at_xpath("./Url").text,
233
+ :label => custom_link.at_xpath("./Name").text
234
+ )
235
+ end
236
+
237
+ if (configuration.assume_first_custom_link_openurl &&
238
+ (first = record_xml.xpath "./CustomLinks/CustomLink" ) &&
239
+ (node = first.at_xpath "./Url" )
240
+ )
241
+
242
+ openurl = node.text
243
+
244
+ index = openurl.index('?')
245
+ item.openurl_kev_co = openurl.slice index..(openurl.length) if index
246
+ end
247
+
248
+ # Format.
249
+ item.format_str = at_xpath_text record_xml, "./Header/PubType"
250
+ # Can't find a list of possible PubTypes to see what's there to try
251
+ # and map to our internal controlled vocab. oh wells.
252
+
253
+
254
+
255
+ # We have a single blob of human-readable citation, that's also
256
+ # littered with XML-ish tags we need to deal with. We'll save
257
+ # it in a custom location, and use a custom Decorator to display
258
+ # it. Sorry it's way too hard for us to preserve <highlight>
259
+ # tags in this mess, they will be lost. Probably don't
260
+ # need highlighting in source anyhow.
261
+ citation_mess = element_by_group(record_xml, "Src")
262
+ citation_txt = Nokogiri::XML::fragment(citation_mess).text
263
+ # But strip off some "count of references" often on the end
264
+ # which are confusing and useless.
265
+ item.custom_data["citation_blob"] = citation_txt.gsub(/ref +\d+ +ref\.$/, '')
266
+
267
+ item.extend CitationMessDecorator
268
+
269
+ results << item
270
+ end
271
+ end
272
+
273
+ return results
274
+ rescue EdsCommException => e
275
+ results.error ||= {}
276
+ results.error[:exception] = e
277
+ results.error[:http_status] = e.http_status
278
+ results.error[:http_body] = e.http_body
279
+ return results
280
+ end
281
+
282
+ end
283
+
284
+ # Difficult to get individual elements out of an EDS XML <Record>
285
+ # response, requires weird xpath, so we do it for you.
286
+ # element_by_group(nokogiri_element, "Ti")
287
+ #
288
+ # Returns string or nil
289
+ def element_by_group(noko, group)
290
+ at_xpath_text(noko, "./Items/Item[child::Group[text()='#{group}']]/Data")
291
+ end
292
+
293
+ # Wraps calls to the EDS api with CreateSession and EndSession requests
294
+ # to EDS. Will pass sessionID in yield from block.
295
+ #
296
+ # Second optional arg is whether this is an authenticated user, else
297
+ # guest access will be used.
298
+ #
299
+ # with_session(true) do |session_token|
300
+ # # can make more requests using session_token,
301
+ # # EndSession will be called for you at end of block.
302
+ # end
303
+ def with_session(auth = false, &block)
304
+ auth_token = self.class.remembered_auth
305
+ if auth_token.nil?
306
+ auth_token = self.class.remembered_auth = get_auth_token
307
+ end
308
+
309
+
310
+ create_url = "#{configuration.base_url}createsession?profile=#{configuration.profile}&guest=#{auth ? 'n' : 'y'}"
311
+ response_xml = get_with_auth(create_url)
312
+
313
+ session_token = nil
314
+ unless response_xml && (session_token = at_xpath_text(response_xml, "//SessionToken"))
315
+ e = EdsCommException.new("Could not get SessionToken")
316
+ end
317
+
318
+ begin
319
+ block.yield(session_token)
320
+ ensure
321
+ if auth_token && session_token
322
+ end_url = "#{configuration.base_url}endsession?sessiontoken=#{CGI.escape session_token}"
323
+ response_xml = get_with_auth(end_url)
324
+ end
325
+ end
326
+
327
+ end
328
+
329
+ # if the xpath responds, return #text of it, else nil.
330
+ def at_xpath_text(noko, xpath)
331
+ node = noko.at_xpath(xpath)
332
+
333
+ if node.nil?
334
+ return node
335
+ else
336
+ return node.text
337
+ end
338
+ end
339
+
340
+ # If EDS has put highlighting tags
341
+ # in a field, we need to HTML escape the literal values,
342
+ # while still using the highlighting tokens to put
343
+ # HTML tags around highlighted terms.
344
+ #
345
+ # Second param, if to assume EDS literals are safe HTML, as they
346
+ # seem to be.
347
+ def prepare_eds_payload(str, html_safe = false)
348
+ return str if str.blank?
349
+
350
+ unless configuration.highlighting
351
+ str = str.html_safe if html_safe
352
+ return str
353
+ end
354
+
355
+ parts =
356
+ str.split(%r{(</?highlight>)}).collect do |substr|
357
+ case substr
358
+ when "<highlight>" then "<b class='bento_search_highlight'>".html_safe
359
+ when "</highlight>" then "</b>".html_safe
360
+ # Yes, EDS gives us HTML in the literals, we're choosing to trust it.
361
+ else substr.html_safe
362
+ end
363
+ end
364
+
365
+
366
+
367
+
368
+
369
+ # Crazy ass method to truncate without getting in the middle of our
370
+ # html tags. This is wacky hacky, yeah.
371
+ if configuration.truncate_highlighted
372
+ remainingLength = configuration.truncate_highlighted
373
+ in_tag = false
374
+ elipses_added = false
375
+
376
+ truncated_parts = []
377
+ parts.each do |substr|
378
+ if remainingLength <=0 && ! in_tag
379
+ truncated_parts << "..."
380
+ break
381
+ end
382
+
383
+ if substr =~ /^<b.*\>$/
384
+ truncated_parts << substr
385
+ in_tag = true
386
+ elsif substr == "</b>"
387
+ truncated_parts << substr
388
+ in_tag = false
389
+ elsif ((remainingLength - substr.length) > 0) || in_tag
390
+ truncated_parts << substr
391
+ else
392
+ truncated_parts << helper.truncate(substr, :length => remainingLength, :separator => ' ')
393
+ break
394
+ end
395
+
396
+ remainingLength = remainingLength - substr.length
397
+ end
398
+
399
+ parts = truncated_parts
400
+ end
401
+
402
+
403
+ return helper.safe_join(parts, '')
404
+ end
405
+
406
+ # Give it a url pointing at EDS API.
407
+ # Second arg must be a session_token if EDS request requires one.
408
+ # It will
409
+ # * Make a GET request
410
+ # * with memo-ized auth token added to headers
411
+ # * for XML, with all namespaces removed!
412
+ # * Parse JSON into a hash and return hash
413
+ # * Try ONCE more to get if EBSCO says bad auth token
414
+ # * Raise an EdsCommException if can't auth after second try,
415
+ # or other error message, or JSON can't be parsed.
416
+ def get_with_auth(url, session_token = nil)
417
+ auth_token = self.class.remembered_auth
418
+ unless auth_token
419
+ auth_token = self.class.remembered_auth = get_auth_token
420
+ end
421
+
422
+ response = nil
423
+ response_xml = nil
424
+ caught_exception = nil
425
+
426
+ begin
427
+ headers = {AuthHeader => auth_token, 'Accept' => 'application/xml'}
428
+ headers[SessionTokenHeader] = session_token if session_token
429
+
430
+ s_time = Time.now
431
+ response = http_client.get(url, nil, headers)
432
+ Rails.logger.debug("EDS timing GET: #{Time.now - s_time}:#{url}")
433
+
434
+ response_xml = Nokogiri::XML(response.body)
435
+ response_xml.remove_namespaces!
436
+
437
+ if (at_xpath_text(response_xml, "//ErrorNumber") == "104") || (at_xpath_text(response_xml, "//ErrorDescription") == "Auth Token Invalid")
438
+ # bad auth, try again just ONCE
439
+ Rails.logger.debug("EDS auth failed, getting auth again")
440
+
441
+ headers[AuthHeader] = self.class.remembered_auth = get_auth_token
442
+ response = http_client.get(url, nil, headers)
443
+ response_xml = Nokogiri::XML(response.body)
444
+ response_xml.remove_namespaces!
445
+ end
446
+ rescue TimeoutError, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError => e
447
+ caught_exception = e
448
+ end
449
+
450
+ if response.nil? || response_xml.nil? || caught_exception || (! HTTP::Status.successful? response.status)
451
+ exception = EdsCommException.new("Error fetching URL: #{caught_exception.message if caught_exception} : #{url}")
452
+ if response
453
+ exception.http_body = response.body
454
+ exception.http_status = response.status
455
+ end
456
+ raise exception
457
+ end
458
+
459
+ return response_xml
460
+ end
461
+
462
+
463
+ # Has to make an HTTP request to get EBSCO's auth token.
464
+ # returns the auth token. We aren't bothering to keep
465
+ # track of the expiration ourselves, can't neccesarily trust
466
+ # it anyway.
467
+ #
468
+ # Raises an EdsCommException on error.
469
+ def get_auth_token
470
+ # Can't send params as form-encoded, actually need to send a JSON or XML
471
+ # body, argh.
472
+
473
+ body = <<-EOS
474
+ {
475
+ "UserId":"#{configuration.user_id}",
476
+ "Password":"#{configuration.password}"
477
+ }
478
+ EOS
479
+
480
+ s_time = Time.now
481
+ response = http_client.post(configuration.auth_url, body, {'Accept' => "application/json", "Content-type" => "application/json"})
482
+ Rails.logger.debug("EDS timing AUTH: #{Time.now - s_time}s")
483
+
484
+ unless HTTP::Status.successful? response.status
485
+ raise EdsCommException.new("Could not get auth", response.status, response.body)
486
+ end
487
+
488
+ response_hash = nil
489
+ begin
490
+ response_hash = MultiJson.load response.body
491
+ rescue MultiJson::DecodeError
492
+ end
493
+
494
+ unless response_hash.kind_of?(Hash) && response_hash.has_key?("AuthToken")
495
+ raise EdsCommException.new("AuthToken not found in auth response", response.status, response.body)
496
+ end
497
+
498
+ return response_hash["AuthToken"]
499
+ end
500
+
501
+ def self.default_configuration
502
+ {
503
+ :auth_url => 'https://eds-api.ebscohost.com/authservice/rest/uidauth',
504
+ :base_url => "http://eds-api.ebscohost.com/edsapi/rest/",
505
+ :highlighting => true,
506
+ :truncate_highlighted => 280,
507
+ :assume_first_custom_link_openurl => true,
508
+ :search_mode => 'all' # any | bool | all | smart ; http://support.epnet.com/knowledge_base/detail.php?topic=996&id=1288&page=1
509
+ }
510
+ end
511
+
512
+ def sort_definitions
513
+ {
514
+ "date_desc" => {:implementation => "date"},
515
+ "relevance" => {:implementation => "relevance" }
516
+ # "date_asc" => {:implementaiton => "date2"}
517
+ }
518
+ end
519
+
520
+ def search_field_definitions
521
+ {
522
+ "TX" => {:semantic => :all},
523
+ "AU" => {:semantic => :author},
524
+ "TI" => {:semantic => :title},
525
+ "SU" => {:semantic => :subject},
526
+ "SO" => {}, # source, journal name
527
+ "AB" => {}, # abstract
528
+ "IS" => {:semantic => :issn},
529
+ "IB" => {:semantic => :isbn},
530
+ }
531
+ end
532
+
533
+ # an exception talking to EDS api.
534
+ # there's a short reason in #message, but also
535
+ # possibly an http_status and http_body copied
536
+ # from error EDS response.
537
+ class EdsCommException < Exception
538
+ attr_accessor :http_status, :http_body
539
+ def initialize(message, status = nil, body = nil)
540
+ super(message)
541
+ self.http_status = status
542
+ self.http_body = body
543
+ end
544
+ end
545
+
546
+
547
+ # A built-in decorator alwasy applied, that over-rides
548
+ # the ResultItem#published_in display method to use our mess blob
549
+ # of human readable citation, since we don't have individual elements
550
+ # to create it from in a normalized way.
551
+ module CitationMessDecorator
552
+ def published_in
553
+ custom_data["citation_blob"]
554
+ end
555
+ end
556
+
557
+ end