bento_search 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. data/MIT-LICENSE +20 -0
  2. data/README.md +299 -0
  3. data/Rakefile +40 -0
  4. data/app/assets/images/bento_search/large_loader.gif +0 -0
  5. data/app/assets/javascripts/bento_search.js +3 -0
  6. data/app/assets/javascripts/bento_search/ajax_load.js +22 -0
  7. data/app/assets/stylesheets/bento_search/bento.css +4 -0
  8. data/app/controllers/bento_search/bento_search_controller.rb +7 -0
  9. data/app/controllers/bento_search/search_controller.rb +72 -0
  10. data/app/helpers/bento_search_helper.rb +138 -0
  11. data/app/item_decorators/bento_search/only_premade_openurl.rb +16 -0
  12. data/app/item_decorators/bento_search/openurl_add_other_link.rb +35 -0
  13. data/app/item_decorators/bento_search/openurl_main_link.rb +30 -0
  14. data/app/models/bento_search/author.rb +25 -0
  15. data/app/models/bento_search/link.rb +30 -0
  16. data/app/models/bento_search/multi_searcher.rb +109 -0
  17. data/app/models/bento_search/openurl_creator.rb +128 -0
  18. data/app/models/bento_search/registrar.rb +70 -0
  19. data/app/models/bento_search/result_item.rb +203 -0
  20. data/app/models/bento_search/results.rb +54 -0
  21. data/app/models/bento_search/results/pagination.rb +67 -0
  22. data/app/models/bento_search/search_engine.rb +219 -0
  23. data/app/models/bento_search/search_engine/capabilities.rb +65 -0
  24. data/app/search_engines/bento_search/#Untitled-1# +11 -0
  25. data/app/search_engines/bento_search/ebsco_host_engine.rb +356 -0
  26. data/app/search_engines/bento_search/eds_engine.rb +557 -0
  27. data/app/search_engines/bento_search/google_books_engine.rb +184 -0
  28. data/app/search_engines/bento_search/primo_engine.rb +231 -0
  29. data/app/search_engines/bento_search/scopus_engine.rb +295 -0
  30. data/app/search_engines/bento_search/summon_engine.rb +398 -0
  31. data/app/search_engines/bento_search/xerxes_engine.rb +168 -0
  32. data/app/views/bento_search/_link.html.erb +4 -0
  33. data/app/views/bento_search/_search_error.html.erb +22 -0
  34. data/app/views/bento_search/_std_item.html.erb +39 -0
  35. data/app/views/bento_search/search/search.html.erb +1 -0
  36. data/config/locales/en.yml +25 -0
  37. data/lib/bento_search.rb +29 -0
  38. data/lib/bento_search/engine.rb +5 -0
  39. data/lib/bento_search/routes.rb +45 -0
  40. data/lib/bento_search/version.rb +3 -0
  41. data/lib/generators/bento_search/pull_ebsco_dbs_generator.rb +24 -0
  42. data/lib/generators/bento_search/templates/ebsco_global_var.erb +6 -0
  43. data/lib/http_client_patch/include_client.rb +86 -0
  44. data/lib/tasks/bento_search_tasks.rake +4 -0
  45. data/test/dummy/README.rdoc +261 -0
  46. data/test/dummy/Rakefile +7 -0
  47. data/test/dummy/app/assets/javascripts/application.js +15 -0
  48. data/test/dummy/app/assets/stylesheets/application.css +13 -0
  49. data/test/dummy/app/controllers/application_controller.rb +3 -0
  50. data/test/dummy/app/helpers/application_helper.rb +2 -0
  51. data/test/dummy/app/views/layouts/application.html.erb +14 -0
  52. data/test/dummy/config.ru +4 -0
  53. data/test/dummy/config/application.rb +56 -0
  54. data/test/dummy/config/boot.rb +10 -0
  55. data/test/dummy/config/database.yml +25 -0
  56. data/test/dummy/config/environment.rb +5 -0
  57. data/test/dummy/config/environments/development.rb +37 -0
  58. data/test/dummy/config/environments/production.rb +67 -0
  59. data/test/dummy/config/environments/test.rb +37 -0
  60. data/test/dummy/config/initializers/backtrace_silencers.rb +7 -0
  61. data/test/dummy/config/initializers/inflections.rb +15 -0
  62. data/test/dummy/config/initializers/mime_types.rb +5 -0
  63. data/test/dummy/config/initializers/secret_token.rb +7 -0
  64. data/test/dummy/config/initializers/session_store.rb +8 -0
  65. data/test/dummy/config/initializers/wrap_parameters.rb +14 -0
  66. data/test/dummy/config/locales/en.yml +5 -0
  67. data/test/dummy/config/routes.rb +6 -0
  68. data/test/dummy/db/test.sqlite3 +0 -0
  69. data/test/dummy/log/test.log +3100 -0
  70. data/test/dummy/public/404.html +26 -0
  71. data/test/dummy/public/422.html +26 -0
  72. data/test/dummy/public/500.html +25 -0
  73. data/test/dummy/public/favicon.ico +0 -0
  74. data/test/dummy/script/rails +6 -0
  75. data/test/functional/bento_search/search_controller_test.rb +81 -0
  76. data/test/helper/bento_search_helper_test.rb +125 -0
  77. data/test/integration/navigation_test.rb +10 -0
  78. data/test/support/mock_engine.rb +23 -0
  79. data/test/support/test_with_cassette.rb +38 -0
  80. data/test/test_helper.rb +52 -0
  81. data/test/unit/#vcr_test.rb# +68 -0
  82. data/test/unit/ebsco_host_engine_test.rb +134 -0
  83. data/test/unit/eds_engine_test.rb +105 -0
  84. data/test/unit/google_books_engine_test.rb +93 -0
  85. data/test/unit/item_decorators_test.rb +66 -0
  86. data/test/unit/multi_searcher_test.rb +49 -0
  87. data/test/unit/openurl_creator_test.rb +111 -0
  88. data/test/unit/pagination_test.rb +59 -0
  89. data/test/unit/primo_engine_test.rb +37 -0
  90. data/test/unit/register_engine_test.rb +50 -0
  91. data/test/unit/result_item_display_test.rb +39 -0
  92. data/test/unit/result_item_test.rb +36 -0
  93. data/test/unit/scopus_engine_test.rb +130 -0
  94. data/test/unit/search_engine_base_test.rb +178 -0
  95. data/test/unit/search_engine_test.rb +95 -0
  96. data/test/unit/summon_engine_test.rb +161 -0
  97. data/test/unit/xerxes_engine_test.rb +70 -0
  98. data/test/vcr_cassettes/ebscohost/error_bad_db.yml +45 -0
  99. data/test/vcr_cassettes/ebscohost/error_bad_password.yml +45 -0
  100. data/test/vcr_cassettes/ebscohost/get_info.yml +3626 -0
  101. data/test/vcr_cassettes/ebscohost/live_search.yml +45 -0
  102. data/test/vcr_cassettes/ebscohost/live_search_smoke_test.yml +1311 -0
  103. data/test/vcr_cassettes/eds/basic_search_smoke_test.yml +1811 -0
  104. data/test/vcr_cassettes/eds/get_auth_token.yml +75 -0
  105. data/test/vcr_cassettes/eds/get_auth_token_failure.yml +39 -0
  106. data/test/vcr_cassettes/eds/get_with_auth.yml +243 -0
  107. data/test/vcr_cassettes/eds/get_with_auth_recovers_from_bad_auth.yml +368 -0
  108. data/test/vcr_cassettes/gbs/error_condition.yml +40 -0
  109. data/test/vcr_cassettes/gbs/pagination.yml +702 -0
  110. data/test/vcr_cassettes/gbs/search.yml +340 -0
  111. data/test/vcr_cassettes/primo/search_smoke_test.yml +1112 -0
  112. data/test/vcr_cassettes/scopus/bad_api_key_should_return_error_response.yml +60 -0
  113. data/test/vcr_cassettes/scopus/escaped_chars.yml +187 -0
  114. data/test/vcr_cassettes/scopus/fielded_search.yml +176 -0
  115. data/test/vcr_cassettes/scopus/simple_search.yml +227 -0
  116. data/test/vcr_cassettes/scopus/zero_results_search.yml +67 -0
  117. data/test/vcr_cassettes/summon/bad_auth.yml +54 -0
  118. data/test/vcr_cassettes/summon/proper_tags_for_snippets.yml +216 -0
  119. data/test/vcr_cassettes/summon/search.yml +242 -0
  120. data/test/vcr_cassettes/xerxes/live_search.yml +2580 -0
  121. data/test/view/std_item_test.rb +98 -0
  122. metadata +421 -0
@@ -0,0 +1,65 @@
1
+
2
+ # Methods that describe a search engine's capabilities,
3
+ # mixed into SearchEngine. Individual engine implementations
4
+ # will often over-ride some or all of these methods.
5
+ module BentoSearch::SearchEngine::Capabilities
6
+ # If support fielded search, over-ride to specify fields
7
+ # supported. Returns a hash, key is engine-specific internal
8
+ # search field, value is nil or a hash of metadata about
9
+ # the search field, including semantic mapping.
10
+ #
11
+ # def search_field_definitions
12
+ # { "intitle" => {:semantic => :title}}
13
+ # end
14
+ def search_field_definitions
15
+ {}
16
+ end
17
+
18
+ # Over-ride with a HASH of available sorts. Each key is the string
19
+ # that will be passed in engine.search(...., :sort => key)
20
+ # The key combines a choice of sort field, ascending/descending,
21
+ # secondary sorts etc -- we combine this all with one key, because
22
+ # typical examined interfaces did same from a select menu.
23
+ #
24
+ # Keys should where possible be _standard_ keys chosen from
25
+ # those listed in config/i18n/en:bento_search.sort_keys.*
26
+ # But if you need something not there, it can be custom to engine.
27
+ # Value of hash is for internal use by engine, it may be a convenient
28
+ # place to store implementation details.
29
+ #
30
+ # For a particular engine, a sort not mentioned here will-- raise?
31
+ # be ignored? Not sure.
32
+ def sort_definitions
33
+ {}
34
+ end
35
+
36
+ # Override to return int max per-page.
37
+ def max_per_page
38
+ nil
39
+ end
40
+
41
+ # Returns list of string internal search_field's that can
42
+ # be supplied to search(:search_field => x)
43
+ def search_keys
44
+ return search_field_definitions.keys
45
+ end
46
+
47
+ # Returns list of symbol semantic_search_field that can be
48
+ # supplied to search(:semantic_search_field => x)
49
+ def semantic_search_keys
50
+ semantic_search_map.keys
51
+ end
52
+
53
+ # returns a hash keyed by semantic search field symbol,
54
+ # value string internal search field key.
55
+ def semantic_search_map
56
+ # Hash[] conveniently takes an array of k-v pairs.
57
+ return Hash[
58
+ search_field_definitions.collect do |field, defn|
59
+ [ defn[:semantic].to_s, field ] if defn && defn[:semantic]
60
+ end.compact
61
+ ]
62
+ end
63
+
64
+
65
+ end
@@ -0,0 +1,11 @@
1
+ http://blacklight.mse.jhu.edu:3001/resolve?url_ver=Z39.88-2004
2
+ &url_ctx_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Actx
3
+ &ctx_ver=Z39.88-2004
4
+ &ctx_tim=2012-07-25T16%3A21%3A11-04%3A00
5
+ &ctx_id=
6
+ &ctx_enc=info%3Aofi%2Fenc%3AUTF-8
7
+ &rft.title=Monkey+Brains
8
+ &rft.creator=Will.i.am
9
+ &rft.pub=Absolute+Pitch%2C12+Dec+2007
10
+ &rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Adc
11
+ &rfr_id=info%3Asid%2Fsummon.serialssolutions.com
@@ -0,0 +1,356 @@
1
+ require 'nokogiri'
2
+
3
+ require 'http_client_patch/include_client'
4
+ require 'httpclient'
5
+
6
+ # Right now for EbscoHost API (Ebsco Integration Toolkit/EIT),
7
+ # may be expanded or refactored for EDS too.
8
+ #
9
+ # == Required Configuration
10
+ #
11
+ # * profile_id
12
+ # * profile_password
13
+ # * databases: ARRAY of ebsco shortcodes of what databases to include in search. If you specify one you don't have access to, you get an error message from ebsco, alas.
14
+ #
15
+ # == Note on including databases
16
+ #
17
+ # Need to specifically configure all databases your institution licenses from
18
+ # EBSCO that you want included in the search. You can't just say "all of them"
19
+ # the api doesn't support that, and also more than 30 or 40 starts getting
20
+ # horribly slow. If you include a db you do not have access to, EBSCO api
21
+ # fatal errors.
22
+ #
23
+ # You may want to make sure all your licensed databases are included
24
+ # in your EIT profile. Log onto ebscoadmin, Customize Services, choose
25
+ # EIT profile, choose 'databases' tag.
26
+ #
27
+ # === Download databases from EBSCO api
28
+ #
29
+ # We include a utility to download ALL activated databases for EIT profile
30
+ # and generate a file putting them in a ruby array. You may want to use this
31
+ # file as a starting point, and edit by hand:
32
+ #
33
+ # First configure your EBSCO search engine with bento_search, say under
34
+ # key 'ebscohost'.
35
+ #
36
+ # Then run:
37
+ # rails generate bento_search:pull_ebsco_dbs ebscohost
38
+ #
39
+ # assuming 'ebscohost' is the key you registered the EBSCO search engine.
40
+ #
41
+ # This will create a file at ./config/ebsco_dbs.rb. You may want to hand
42
+ # edit it. Then, in your bento search config, you can:
43
+ #
44
+ # require "#{Rails.root}/config/ebsco_dbs.rb"
45
+ # BentoSearch.register_engine("ebscohost") do |conf|
46
+ # # ....
47
+ # conf.databases = $ebsco_dbs
48
+ # end
49
+ #
50
+ # == Vendor documentation
51
+ #
52
+ # Vendor documentation is a bit scattered, main page:
53
+ # * http://support.ebsco.com/eit/ws.php
54
+ # Some other useful pages we discovered:
55
+ # * http://support.ebsco.com/eit/ws_faq.php
56
+ # * search syntax examples: http://support.ebsco.com/eit/ws_howto_queries.php
57
+ # * Try construct a query: http://eit.ebscohost.com/Pages/MethodDescription.aspx?service=/Services/SearchService.asmx&method=Search
58
+ # * The 'info' service can be used to see what databases you have access to.
59
+ # * DTD of XML Response, hard to interpret but all we've got: http://support.ebsco.com/eit/docs/DTD_EIT_WS_searchResponse.zip
60
+ #
61
+ #
62
+ #
63
+ #
64
+ # TODO: David Walker tells us we need to configure in EBSCO to make default operator be 'and' instead of phrase search!
65
+ # We Do need to do that to get reasonable results.
66
+ class BentoSearch::EbscoHostEngine
67
+ include BentoSearch::SearchEngine
68
+
69
+ extend HTTPClientPatch::IncludeClient
70
+ include_http_client
71
+
72
+ # Include some rails helpers, text_helper.trucate
73
+ def text_helper
74
+ @@truncate ||= begin
75
+ o = Object.new
76
+ o.extend ActionView::Helpers::TextHelper
77
+ o
78
+ end
79
+ end
80
+
81
+ def search_implementation(args)
82
+ url = query_url(args)
83
+
84
+ results = BentoSearch::Results.new
85
+ xml, response, exception = nil, nil, nil
86
+
87
+ begin
88
+ response = http_client.get(url)
89
+ xml = Nokogiri::XML(response.body)
90
+ rescue TimeoutError, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError => e
91
+ exception = e
92
+ end
93
+ # error handle
94
+ if ( response.nil? ||
95
+ xml.nil? ||
96
+ exception ||
97
+ (! HTTP::Status.successful? response.status) ||
98
+ (fault = xml.at_xpath("./Fault")))
99
+
100
+ results.error ||= {}
101
+ results.error[:exception] = exception if exception
102
+ results.error[:status] = response.status if response
103
+
104
+ if fault
105
+ results.error[:error_info] = text_if_present fault.at_xpath("./Message")
106
+ end
107
+
108
+ return results
109
+ end
110
+
111
+
112
+
113
+ # the namespaces they provide are weird and don't help and sometimes
114
+ # not clearly even legal. Remove em!
115
+ xml.remove_namespaces!
116
+
117
+ results.total_items = xml.at_xpath("./searchResponse/Hits").text.to_i
118
+
119
+ xml.xpath("./searchResponse/SearchResults/records/rec").each do |xml_rec|
120
+ results << item_from_xml( xml_rec )
121
+ end
122
+
123
+ return results
124
+
125
+ end
126
+
127
+
128
+ # Pass in a nokogiri node, return node.text, or nil if
129
+ # arg was nil or node.text was blank?
130
+ def text_if_present(node)
131
+ if node.nil? || node.text.blank?
132
+ nil
133
+ else
134
+ node.text
135
+ end
136
+ end
137
+
138
+ # Figure out proper controlled format for an ebsco item.
139
+ # EBSCOHost (not sure about EDS) publication/document type
140
+ # are totally unusable non-normalized vocabulary for controlled
141
+ # types, we'll try to guess from other metadata features.
142
+ def sniff_format(xml_node)
143
+ return nil if xml_node.nil?
144
+
145
+ if xml_node.at_xpath("./bkinfo/*")
146
+ "Book"
147
+ elsif xml_node.at_xpath("./dissinfo/*")
148
+ :dissertation
149
+ elsif xml_node.at_xpath("./jinfo/*") && xml_node.at_xpath("./artinfo/*")
150
+ "Article"
151
+ elsif xml_node.at_xpath("./jinfo/*")
152
+ :serial
153
+ else
154
+ nil
155
+ end
156
+ end
157
+
158
+ # Figure out uncontrolled literal string format to show to users.
159
+ # We're going to try combining Ebsco Publication Type and Document Type,
160
+ # when both are present. Then a few hard-coded special transformations.
161
+ def sniff_format_str(xml_node)
162
+ pubtype = text_if_present( xml_node.at_xpath("./artinfo/pubtype") )
163
+ doctype = text_if_present( xml_node.at_xpath("./artinfo/doctype") )
164
+
165
+ components = []
166
+ components.push pubtype
167
+ components.push doctype unless doctype == pubtype
168
+
169
+ components.compact!
170
+
171
+ components = components.collect {|a| a.titlecase if a}
172
+ components.uniq! # no need to have the same thing twice
173
+
174
+ # some hard-coded cases for better user-displayable string
175
+ if components.first == "Academic Journal" && components.last == "Article"
176
+ return "Journal Article"
177
+ elsif components.first == "Periodical" && components.length > 1
178
+ return components.last
179
+ end
180
+
181
+
182
+
183
+ return components.join(": ")
184
+ end
185
+
186
+ # pass in <rec> nokogiri, will determine best link
187
+ def get_link(xml)
188
+ text_if_present(xml.at_xpath("./pdfLink")) || text_if_present(xml.at_xpath("./plink") )
189
+ end
190
+
191
+
192
+ # it's unclear if ebsco API actually allows escaping of special chars,
193
+ # or what the special chars are. But we know parens are special, can't
194
+ # escape em, we'll just remove em (should not effect search).
195
+ def ebsco_query_escape(txt)
196
+ txt.gsub(/[)(]/, ' ')
197
+ end
198
+
199
+ # Actually turn the user's query into an EBSCO "AND" boolean query,
200
+ # seems only way to get decent results where terms can match cross-fields
201
+ # at the moment, for EIT. We'll see for EDS.
202
+ def ebsco_query_prepare(txt)
203
+ # use string split with regex cleverly to split into space
204
+ # seperated terms and phrases, keeping phrases as unit.
205
+ terms = txt.split %r{[[:space:]]+|("[^"]+")}
206
+
207
+ # Remove parens in non-phrase-quoted terms
208
+ terms = terms.collect do |t|
209
+ (t =~ /^\".*\"$/) ? t : ebsco_query_escape(t)
210
+ end
211
+
212
+ # Remove boolean operators if they are bare not in a phrase, they'll
213
+ # make things weird. In phrase quotes they are okay.
214
+ # Remove empty strings. Remove terms that are solely punctuation
215
+ # without any letters.
216
+ terms.delete_if do |term|
217
+ (
218
+ term.blank? ||
219
+ ["AND", "OR", "NOT"].include?(term) ||
220
+ term =~ /\A[^[[:alnum:]]]+\Z/
221
+ )
222
+ end
223
+
224
+ terms.join(" AND ")
225
+ end
226
+
227
+ def query_url(args)
228
+
229
+ url =
230
+ "#{configuration.base_url}/Search?prof=#{configuration.profile_id}&pwd=#{configuration.profile_password}"
231
+
232
+ query = ebsco_query_prepare args[:query]
233
+
234
+ # wrap in (FI $query) if fielded search
235
+ if args[:search_field]
236
+ query = "(#{args[:search_field]} #{query})"
237
+ end
238
+
239
+ url += "&query=#{CGI.escape query}"
240
+
241
+ # startrec is 1-based for ebsco, not 0-based like for us.
242
+ url += "&startrec=#{args[:start] + 1}" if args[:start]
243
+ url += "&numrec=#{args[:per_page]}" if args[:per_page]
244
+
245
+ # Make relevance our default sort, rather than EBSCO's date.
246
+ args[:sort] ||= "relevance"
247
+ url += "&sort=#{ sort_definitions[args[:sort]][:implementation]}"
248
+
249
+ # Contrary to docs, don't pass these comma-seperated, pass em in seperate
250
+ # query params.
251
+ configuration.databases.each do |db|
252
+ url += "&db=#{db}"
253
+ end
254
+
255
+ return url
256
+ end
257
+
258
+ # pass in a nokogiri representing an EBSCO <rec> result,
259
+ # we'll turn it into a BentoSearch::ResultItem.
260
+ def item_from_xml(xml_rec)
261
+ info = xml_rec.at_xpath("./header/controlInfo")
262
+
263
+ item = BentoSearch::ResultItem.new
264
+
265
+ item.link = get_link(xml_rec)
266
+
267
+ item.issn = text_if_present info.at_xpath("./jinfo/issn")
268
+ item.journal_title = text_if_present(info.at_xpath("./jinfo/jtl"))
269
+ item.publisher = text_if_present info.at_xpath("./pubinfo/pub")
270
+ # Might have multiple ISBN's in record, just take first for now
271
+ item.isbn = text_if_present info.at_xpath("./bkinfo/isbn")
272
+
273
+ item.year = text_if_present info.at_xpath("./pubinfo/dt/@year")
274
+ item.volume = text_if_present info.at_xpath("./pubinfo/vid")
275
+ item.issue = text_if_present info.at_xpath("./pubinfo/iid")
276
+
277
+ # EBSCO sometimes has crazy long titles, truncate em.
278
+ item.title = text_helper.truncate( text_if_present( info.at_xpath("./artinfo/tig/atl") ), :length => 200)
279
+ item.start_page = text_if_present info.at_xpath("./artinfo/ppf")
280
+
281
+ item.doi = text_if_present info.at_xpath("./artinfo/ui[@type='doi']")
282
+
283
+ item.abstract = text_if_present info.at_xpath("./artinfo/ab")
284
+ # EBSCO abstracts have an annoying habit of beginning with "Abstract:"
285
+ if item.abstract
286
+ item.abstract.gsub!(/^Abstract\: /, "")
287
+ end
288
+
289
+ # authors, only get full display name from EBSCO.
290
+ info.xpath("./artinfo/aug/au").each do |author|
291
+ a = BentoSearch::Author.new(:display => author.text)
292
+ item.authors << a
293
+ end
294
+
295
+
296
+ item.format = sniff_format info
297
+ item.format_str = sniff_format_str info
298
+
299
+
300
+ return item
301
+ end
302
+
303
+ # This method is not used for normal searching, but can be used by
304
+ # other code to retrieve the results of the EBSCO API Info command,
305
+ # using connection details configured in this engine. The Info command
306
+ # can tell you what databases your account is authorized to see.
307
+ # Returns the complete Nokogiri response, but WITH NAMESPACES REMOVED
308
+ def get_info
309
+ url =
310
+ "#{configuration.base_url}/Info?prof=#{configuration.profile_id}&pwd=#{configuration.profile_password}"
311
+
312
+ noko = Nokogiri::XML( http_client.get( url ).body )
313
+
314
+ noko.remove_namespaces!
315
+
316
+ return noko
317
+ end
318
+
319
+ # David Walker says pretty much only relevance and date are realiable
320
+ # in EBSCOhost cross-search.
321
+ def sort_definitions
322
+ {
323
+ "relevance" => {:implementation => "relevance"},
324
+ "date_desc" => {:implementation => "date"}
325
+ }
326
+ end
327
+
328
+ def search_field_definitions
329
+ {
330
+ "AU" => {:semantic => :author},
331
+ "TI" => {:semantic => :title},
332
+ "SU" => {:semantic => :subject},
333
+ "IS" => {:semantic => :issn},
334
+ "IB" => {:semantic => :isbn}
335
+ }
336
+ end
337
+
338
+ def max_per_page
339
+ # Actually only '50' if you ask for 'full' records, but I don't think
340
+ # we need to do that ever, that's actually getting fulltext back!
341
+ 200
342
+ end
343
+
344
+ def self.required_configuration
345
+ ["profile_id", "profile_password"]
346
+ end
347
+
348
+ def self.default_configuration
349
+ {
350
+ # /Search
351
+ :base_url => "http://eit.ebscohost.com/Services/SearchService.asmx",
352
+ :databases => []
353
+ }
354
+ end
355
+
356
+ end
@@ -0,0 +1,557 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'nokogiri'
4
+ require 'httpclient'
5
+ require 'multi_json'
6
+ require 'http_client_patch/include_client'
7
+
8
+
9
+ #
10
+ # For EBSCO Discovery Service. You will need a license to use.
11
+ #
12
+ # == Required Configuration
13
+ #
14
+ # user_id, password: As given be EBSCO for access to EDS API (may be an admin account in ebscoadmin? Not sure).
15
+ # profile: As given by EBSCO, might be "edsapi"?
16
+ #
17
+ # == Highlighting
18
+ #
19
+ # EDS has a query-in-context highlighting feature. It is used by defualt, set
20
+ # config 'highlighting' to false to disable.
21
+ # If turned on, you may get <b class="bento_search_highlight"> tags
22
+ # in title and abstract output if it's on, marked html_safe.
23
+ #
24
+ # If highlighting is on, since the abstract will be marked html safe, the
25
+ # view layer won't be able to safely truncate it. In fact, it's very hard
26
+ # to do here too, but we do it anyway, by default to approx configuration
27
+ # truncate_highlighted num of chars (default 280). Set to nil if you don't
28
+ # want this.
29
+ #
30
+ # == Linking
31
+ #
32
+ # The link to record in EBSCO interface delivered as "PLink" will be listed
33
+ # as record main link.
34
+ #
35
+ # Any links listed under <CustomLinks> will be listed as other_links, using
36
+ # configured name provided by EBSCO for CustomLink.
37
+ #
38
+ # EDS Response does not have sufficient metadata for us to generate an OpenURL
39
+ # ourselves. However, in our testing, the first/only CustomLink was an
40
+ # an OpenURL. If configuration.assume_first_custom_link_openurl is
41
+ # true (as is default), it will be used to create an OpenURL link. However, in
42
+ # our testing, many records don't have this at all. **Note** Ask EBSCO support
43
+ # to configure your profile so OpenURLs are ALWAYS included for all records, not
44
+ # just records with no EBSCO fulltext, to ensure bento_search can get the
45
+ # openurl.
46
+ #
47
+ # As always, you can customize links and other_links with Item Decorators.
48
+ #
49
+ # == Technical Notes and Difficulties
50
+ #
51
+ # This API is enormously difficult to work with. Also the response is very odd
52
+ # to deal with and missing some key elements. We quite possibly got something
53
+ # wrong or non-optimal in this implementation, but we did our best.
54
+ #
55
+ # Auth issues may make this slow -- you need to spend a (not too speedy) HTTP
56
+ # request making a session for every new end-user -- as we have no way to keep
57
+ # track of end-users, we do it on every request in this implementation.
58
+ #
59
+ # Responses don't include much metadata -- we don't actually have journal title,
60
+ # volume, issue, etc. We probably _could_ parse it out of the OpenURL that's
61
+ # there depending on your profile configuration, but we're not right now.
62
+ # Instead we're using the chunk of user-displayable citation/reference it does
63
+ # give us (which is very difficult to parse into something usable already),
64
+ # and a custom Decorator to display that instead of normalized citation
65
+ # made from individual elements.
66
+ #
67
+ # EBSCO says they plan to improve some of these issues in a September 2012 release.
68
+ #
69
+ # Title and abstract data seems to be HTML with tags and character entities and
70
+ # escaped special chars. We're trusting it and passing it on as html_safe.
71
+ #
72
+ # Paging can only happen on even pages, with 'page' rather than 'start'. But
73
+ # you can pass in 'start' to bento_search, it'll be converted to closest page.
74
+ #
75
+ # == Authenticated Users
76
+ #
77
+ # EDS allows searches by unauthenticated users, but the results come back with
78
+ # weird blank hits. In such a case, the BentoSearch adapter will return
79
+ # records with virtually no metadata, but a title e
80
+ # (I18n at bento_search.eds.record_not_available ). Also no abstracts
81
+ # are available from unauth search.
82
+ #
83
+ # By default the engine will search as 'guest' unauth user. But config
84
+ # 'auth' key to true to force all searches to auth (if you are protecting your
85
+ # app) or pass :auth => true as param into #search method.
86
+ #
87
+ # == EDS docs:
88
+ #
89
+ # * Console App to demo requests: https://eds-api.ebscohost.com/Console
90
+ # * EDS Wiki: http://edswiki.ebscohost.com/EDS_API_Documentation
91
+ # * You'll need to request an account to the EDS wiki, see: http://support.ebsco.com/knowledge_base/detail.php?id=5990
92
+ #
93
+ class BentoSearch::EdsEngine
94
+ include BentoSearch::SearchEngine
95
+
96
+ extend HTTPClientPatch::IncludeClient
97
+ include_http_client
98
+
99
+ AuthHeader = "x-authenticationToken"
100
+ SessionTokenHeader = "x-sessionToken"
101
+
102
+ @@remembered_auth = nil
103
+ @@remembered_auth_lock = Mutex.new
104
+ # Class variable to save current known good auth
105
+ # uses a mutex to be threadsafe. sigh.
106
+ def self.remembered_auth
107
+ @@remembered_auth_lock.synchronize do
108
+ @@remembered_auth
109
+ end
110
+ end
111
+ # Set class variable with current known good auth.
112
+ # uses a mutex to be threadsafe.
113
+ def self.remembered_auth=(token)
114
+ @@remembered_auth_lock.synchronize do
115
+ @@remembered_auth = token
116
+ end
117
+ end
118
+
119
+ # an object that includes some Rails helper modules for
120
+ # text handling.
121
+ def helper
122
+ unless @helper
123
+ @helper = Object.new
124
+ @helper.extend ActionView::Helpers::TextHelper # for truncate
125
+ @helper.extend ActionView::Helpers::OutputSafetyHelper # for safe_join
126
+ end
127
+ return @helper
128
+ end
129
+
130
+
131
+ def self.required_configuration
132
+ %w{user_id password profile}
133
+ end
134
+
135
+ # From config or args, args over-ride config
136
+ def authenticated_end_user?(args)
137
+ config = configuration.auth ? true : false
138
+ arg = args[:auth]
139
+ if ! arg.nil?
140
+ arg ? true : false
141
+ elsif ! config.nil?
142
+ config ? true : false
143
+ else
144
+ false
145
+ end
146
+ end
147
+
148
+ def construct_search_url(args)
149
+ query = "AND,"
150
+ if args[:search_field]
151
+ query += "#{args[:search_field]}:"
152
+ end
153
+ # Can't have any commas in query, it turns out, although
154
+ # this is not documented.
155
+ query += args[:query].gsub("/\,/", "")
156
+
157
+ url = "#{configuration.base_url}search?view=detailed&query=#{CGI.escape query}"
158
+
159
+ url += "&searchmode=#{CGI.escape configuration.search_mode}"
160
+
161
+ url += "&highlight=#{configuration.highlighting ? 'y' : 'n' }"
162
+
163
+ if args[:per_page]
164
+ url += "&resultsperpage=#{args[:per_page]}"
165
+ end
166
+ if args[:page]
167
+ url += "&pagenumber=#{args[:page]}"
168
+ end
169
+
170
+ if args[:sort]
171
+ if (defn = self.sort_definitions[args[:sort]]) &&
172
+ (value = defn[:implementation] )
173
+ url += "&sort=#{CGI.escape value}"
174
+ end
175
+ end
176
+
177
+
178
+ return url
179
+ end
180
+
181
+
182
+
183
+ def search_implementation(args)
184
+ results = BentoSearch::Results.new
185
+
186
+ end_user_auth = authenticated_end_user? args
187
+
188
+ begin
189
+ with_session(end_user_auth) do |session_token|
190
+
191
+ url = construct_search_url(args)
192
+
193
+ response = get_with_auth(url, session_token)
194
+
195
+ results = BentoSearch::Results.new
196
+
197
+ if (hits_node = at_xpath_text(response, "./SearchResponseMessageGet/SearchResult/Statistics/TotalHits"))
198
+ results.total_items = hits_node.to_i
199
+ end
200
+
201
+ response.xpath("./SearchResponseMessageGet/SearchResult/Data/Records/Record").each do |record_xml|
202
+ item = BentoSearch::ResultItem.new
203
+
204
+ item.title = prepare_eds_payload( element_by_group(record_xml, "Ti"), true )
205
+ if item.title.nil? && ! end_user_auth
206
+ item.title = I18n.translate("bento_search.eds.record_not_available")
207
+ end
208
+
209
+ item.abstract = prepare_eds_payload( element_by_group(record_xml, "Ab"), true )
210
+
211
+ # Believe it or not, the authors are encoded as an escaped
212
+ # XML-ish payload, that we need to parse again and get the
213
+ # actual authors out of. WTF. Thanks for handling fragments
214
+ # nokogiri.
215
+ author_mess = element_by_group(record_xml, "Au")
216
+ author_xml = Nokogiri::XML::fragment(author_mess)
217
+ author_xml.xpath(".//searchLink").each do |author_node|
218
+ item.authors << BentoSearch::Author.new(:display => author_node.text)
219
+ end
220
+
221
+
222
+ # PLink is main inward facing EBSCO link, put it as
223
+ # main link.
224
+ if direct_link = record_xml.at_xpath("./PLink")
225
+ item.link = direct_link.text
226
+ end
227
+
228
+ # Other links may be found in CustomLinks, it seems like usually
229
+ # there will be at least one, hopefully the first one is the OpenURL?
230
+ record_xml.xpath("./CustomLinks/CustomLink").each do |custom_link|
231
+ item.other_links << BentoSearch::Link.new(
232
+ :url => custom_link.at_xpath("./Url").text,
233
+ :label => custom_link.at_xpath("./Name").text
234
+ )
235
+ end
236
+
237
+ if (configuration.assume_first_custom_link_openurl &&
238
+ (first = record_xml.xpath "./CustomLinks/CustomLink" ) &&
239
+ (node = first.at_xpath "./Url" )
240
+ )
241
+
242
+ openurl = node.text
243
+
244
+ index = openurl.index('?')
245
+ item.openurl_kev_co = openurl.slice index..(openurl.length) if index
246
+ end
247
+
248
+ # Format.
249
+ item.format_str = at_xpath_text record_xml, "./Header/PubType"
250
+ # Can't find a list of possible PubTypes to see what's there to try
251
+ # and map to our internal controlled vocab. oh wells.
252
+
253
+
254
+
255
+ # We have a single blob of human-readable citation, that's also
256
+ # littered with XML-ish tags we need to deal with. We'll save
257
+ # it in a custom location, and use a custom Decorator to display
258
+ # it. Sorry it's way too hard for us to preserve <highlight>
259
+ # tags in this mess, they will be lost. Probably don't
260
+ # need highlighting in source anyhow.
261
+ citation_mess = element_by_group(record_xml, "Src")
262
+ citation_txt = Nokogiri::XML::fragment(citation_mess).text
263
+ # But strip off some "count of references" often on the end
264
+ # which are confusing and useless.
265
+ item.custom_data["citation_blob"] = citation_txt.gsub(/ref +\d+ +ref\.$/, '')
266
+
267
+ item.extend CitationMessDecorator
268
+
269
+ results << item
270
+ end
271
+ end
272
+
273
+ return results
274
+ rescue EdsCommException => e
275
+ results.error ||= {}
276
+ results.error[:exception] = e
277
+ results.error[:http_status] = e.http_status
278
+ results.error[:http_body] = e.http_body
279
+ return results
280
+ end
281
+
282
+ end
283
+
284
+ # Difficult to get individual elements out of an EDS XML <Record>
285
+ # response, requires weird xpath, so we do it for you.
286
+ # element_by_group(nokogiri_element, "Ti")
287
+ #
288
+ # Returns string or nil
289
+ def element_by_group(noko, group)
290
+ at_xpath_text(noko, "./Items/Item[child::Group[text()='#{group}']]/Data")
291
+ end
292
+
293
+ # Wraps calls to the EDS api with CreateSession and EndSession requests
294
+ # to EDS. Will pass sessionID in yield from block.
295
+ #
296
+ # Second optional arg is whether this is an authenticated user, else
297
+ # guest access will be used.
298
+ #
299
+ # with_session(true) do |session_token|
300
+ # # can make more requests using session_token,
301
+ # # EndSession will be called for you at end of block.
302
+ # end
303
+ def with_session(auth = false, &block)
304
+ auth_token = self.class.remembered_auth
305
+ if auth_token.nil?
306
+ auth_token = self.class.remembered_auth = get_auth_token
307
+ end
308
+
309
+
310
+ create_url = "#{configuration.base_url}createsession?profile=#{configuration.profile}&guest=#{auth ? 'n' : 'y'}"
311
+ response_xml = get_with_auth(create_url)
312
+
313
+ session_token = nil
314
+ unless response_xml && (session_token = at_xpath_text(response_xml, "//SessionToken"))
315
+ e = EdsCommException.new("Could not get SessionToken")
316
+ end
317
+
318
+ begin
319
+ block.yield(session_token)
320
+ ensure
321
+ if auth_token && session_token
322
+ end_url = "#{configuration.base_url}endsession?sessiontoken=#{CGI.escape session_token}"
323
+ response_xml = get_with_auth(end_url)
324
+ end
325
+ end
326
+
327
+ end
328
+
329
+ # if the xpath responds, return #text of it, else nil.
330
+ def at_xpath_text(noko, xpath)
331
+ node = noko.at_xpath(xpath)
332
+
333
+ if node.nil?
334
+ return node
335
+ else
336
+ return node.text
337
+ end
338
+ end
339
+
340
+ # If EDS has put highlighting tags
341
+ # in a field, we need to HTML escape the literal values,
342
+ # while still using the highlighting tokens to put
343
+ # HTML tags around highlighted terms.
344
+ #
345
+ # Second param, if to assume EDS literals are safe HTML, as they
346
+ # seem to be.
347
+ def prepare_eds_payload(str, html_safe = false)
348
+ return str if str.blank?
349
+
350
+ unless configuration.highlighting
351
+ str = str.html_safe if html_safe
352
+ return str
353
+ end
354
+
355
+ parts =
356
+ str.split(%r{(</?highlight>)}).collect do |substr|
357
+ case substr
358
+ when "<highlight>" then "<b class='bento_search_highlight'>".html_safe
359
+ when "</highlight>" then "</b>".html_safe
360
+ # Yes, EDS gives us HTML in the literals, we're choosing to trust it.
361
+ else substr.html_safe
362
+ end
363
+ end
364
+
365
+
366
+
367
+
368
+
369
+ # Crazy ass method to truncate without getting in the middle of our
370
+ # html tags. This is wacky hacky, yeah.
371
+ if configuration.truncate_highlighted
372
+ remainingLength = configuration.truncate_highlighted
373
+ in_tag = false
374
+ elipses_added = false
375
+
376
+ truncated_parts = []
377
+ parts.each do |substr|
378
+ if remainingLength <=0 && ! in_tag
379
+ truncated_parts << "..."
380
+ break
381
+ end
382
+
383
+ if substr =~ /^<b.*\>$/
384
+ truncated_parts << substr
385
+ in_tag = true
386
+ elsif substr == "</b>"
387
+ truncated_parts << substr
388
+ in_tag = false
389
+ elsif ((remainingLength - substr.length) > 0) || in_tag
390
+ truncated_parts << substr
391
+ else
392
+ truncated_parts << helper.truncate(substr, :length => remainingLength, :separator => ' ')
393
+ break
394
+ end
395
+
396
+ remainingLength = remainingLength - substr.length
397
+ end
398
+
399
+ parts = truncated_parts
400
+ end
401
+
402
+
403
+ return helper.safe_join(parts, '')
404
+ end
405
+
406
+ # Give it a url pointing at EDS API.
407
+ # Second arg must be a session_token if EDS request requires one.
408
+ # It will
409
+ # * Make a GET request
410
+ # * with memo-ized auth token added to headers
411
+ # * for XML, with all namespaces removed!
412
+ # * Parse JSON into a hash and return hash
413
+ # * Try ONCE more to get if EBSCO says bad auth token
414
+ # * Raise an EdsCommException if can't auth after second try,
415
+ # or other error message, or JSON can't be parsed.
416
+ def get_with_auth(url, session_token = nil)
417
+ auth_token = self.class.remembered_auth
418
+ unless auth_token
419
+ auth_token = self.class.remembered_auth = get_auth_token
420
+ end
421
+
422
+ response = nil
423
+ response_xml = nil
424
+ caught_exception = nil
425
+
426
+ begin
427
+ headers = {AuthHeader => auth_token, 'Accept' => 'application/xml'}
428
+ headers[SessionTokenHeader] = session_token if session_token
429
+
430
+ s_time = Time.now
431
+ response = http_client.get(url, nil, headers)
432
+ Rails.logger.debug("EDS timing GET: #{Time.now - s_time}:#{url}")
433
+
434
+ response_xml = Nokogiri::XML(response.body)
435
+ response_xml.remove_namespaces!
436
+
437
+ if (at_xpath_text(response_xml, "//ErrorNumber") == "104") || (at_xpath_text(response_xml, "//ErrorDescription") == "Auth Token Invalid")
438
+ # bad auth, try again just ONCE
439
+ Rails.logger.debug("EDS auth failed, getting auth again")
440
+
441
+ headers[AuthHeader] = self.class.remembered_auth = get_auth_token
442
+ response = http_client.get(url, nil, headers)
443
+ response_xml = Nokogiri::XML(response.body)
444
+ response_xml.remove_namespaces!
445
+ end
446
+ rescue TimeoutError, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError => e
447
+ caught_exception = e
448
+ end
449
+
450
+ if response.nil? || response_xml.nil? || caught_exception || (! HTTP::Status.successful? response.status)
451
+ exception = EdsCommException.new("Error fetching URL: #{caught_exception.message if caught_exception} : #{url}")
452
+ if response
453
+ exception.http_body = response.body
454
+ exception.http_status = response.status
455
+ end
456
+ raise exception
457
+ end
458
+
459
+ return response_xml
460
+ end
461
+
462
+
463
+ # Has to make an HTTP request to get EBSCO's auth token.
464
+ # returns the auth token. We aren't bothering to keep
465
+ # track of the expiration ourselves, can't neccesarily trust
466
+ # it anyway.
467
+ #
468
+ # Raises an EdsCommException on error.
469
+ def get_auth_token
470
+ # Can't send params as form-encoded, actually need to send a JSON or XML
471
+ # body, argh.
472
+
473
+ body = <<-EOS
474
+ {
475
+ "UserId":"#{configuration.user_id}",
476
+ "Password":"#{configuration.password}"
477
+ }
478
+ EOS
479
+
480
+ s_time = Time.now
481
+ response = http_client.post(configuration.auth_url, body, {'Accept' => "application/json", "Content-type" => "application/json"})
482
+ Rails.logger.debug("EDS timing AUTH: #{Time.now - s_time}s")
483
+
484
+ unless HTTP::Status.successful? response.status
485
+ raise EdsCommException.new("Could not get auth", response.status, response.body)
486
+ end
487
+
488
+ response_hash = nil
489
+ begin
490
+ response_hash = MultiJson.load response.body
491
+ rescue MultiJson::DecodeError
492
+ end
493
+
494
+ unless response_hash.kind_of?(Hash) && response_hash.has_key?("AuthToken")
495
+ raise EdsCommException.new("AuthToken not found in auth response", response.status, response.body)
496
+ end
497
+
498
+ return response_hash["AuthToken"]
499
+ end
500
+
501
+ def self.default_configuration
502
+ {
503
+ :auth_url => 'https://eds-api.ebscohost.com/authservice/rest/uidauth',
504
+ :base_url => "http://eds-api.ebscohost.com/edsapi/rest/",
505
+ :highlighting => true,
506
+ :truncate_highlighted => 280,
507
+ :assume_first_custom_link_openurl => true,
508
+ :search_mode => 'all' # any | bool | all | smart ; http://support.epnet.com/knowledge_base/detail.php?topic=996&id=1288&page=1
509
+ }
510
+ end
511
+
512
+ def sort_definitions
513
+ {
514
+ "date_desc" => {:implementation => "date"},
515
+ "relevance" => {:implementation => "relevance" }
516
+ # "date_asc" => {:implementaiton => "date2"}
517
+ }
518
+ end
519
+
520
+ def search_field_definitions
521
+ {
522
+ "TX" => {:semantic => :all},
523
+ "AU" => {:semantic => :author},
524
+ "TI" => {:semantic => :title},
525
+ "SU" => {:semantic => :subject},
526
+ "SO" => {}, # source, journal name
527
+ "AB" => {}, # abstract
528
+ "IS" => {:semantic => :issn},
529
+ "IB" => {:semantic => :isbn},
530
+ }
531
+ end
532
+
533
+ # an exception talking to EDS api.
534
+ # there's a short reason in #message, but also
535
+ # possibly an http_status and http_body copied
536
+ # from error EDS response.
537
+ class EdsCommException < Exception
538
+ attr_accessor :http_status, :http_body
539
+ def initialize(message, status = nil, body = nil)
540
+ super(message)
541
+ self.http_status = status
542
+ self.http_body = body
543
+ end
544
+ end
545
+
546
+
547
+ # A built-in decorator alwasy applied, that over-rides
548
+ # the ResultItem#published_in display method to use our mess blob
549
+ # of human readable citation, since we don't have individual elements
550
+ # to create it from in a normalized way.
551
+ module CitationMessDecorator
552
+ def published_in
553
+ custom_data["citation_blob"]
554
+ end
555
+ end
556
+
557
+ end