bento_search 1.5.0 → 2.0.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +27 -24
  3. data/Rakefile +30 -11
  4. data/app/assets/javascripts/bento_search/ajax_load.js +54 -22
  5. data/app/controllers/bento_search/search_controller.rb +31 -30
  6. data/app/helpers/bento_search_helper.rb +72 -74
  7. data/app/models/bento_search/concurrent_searcher.rb +136 -0
  8. data/app/models/bento_search/result_item.rb +15 -12
  9. data/app/models/bento_search/results/serialization.rb +22 -13
  10. data/app/models/bento_search/search_engine.rb +170 -140
  11. data/app/search_engines/bento_search/doaj_articles_engine.rb +20 -20
  12. data/app/search_engines/bento_search/ebsco_host_engine.rb +3 -3
  13. data/app/search_engines/bento_search/eds_engine.rb +326 -206
  14. data/app/search_engines/bento_search/google_books_engine.rb +2 -2
  15. data/app/search_engines/bento_search/scopus_engine.rb +87 -87
  16. data/app/search_engines/bento_search/summon_engine.rb +1 -1
  17. data/app/views/bento_search/_ajax_loading.html.erb +17 -0
  18. data/app/views/bento_search/_item_title.html.erb +2 -4
  19. data/app/views/bento_search/_link.html.erb +3 -3
  20. data/lib/bento_search.rb +24 -9
  21. data/lib/bento_search/engine.rb +2 -0
  22. data/lib/bento_search/version.rb +1 -1
  23. data/lib/generators/bento_search/install/ajax_load_js_generator.rb +15 -0
  24. data/test/decorator/standard_decorator_test.rb +30 -30
  25. data/test/dummy/app/assets/config/manifest.js +4 -0
  26. data/test/dummy/config/application.rb +7 -0
  27. data/test/dummy/config/boot.rb +4 -9
  28. data/test/dummy/config/environments/development.rb +2 -0
  29. data/test/dummy/config/environments/production.rb +7 -1
  30. data/test/dummy/config/environments/test.rb +10 -3
  31. data/test/functional/bento_search/search_controller_test.rb +68 -58
  32. data/test/helper/bento_search_helper_test.rb +103 -103
  33. data/test/search_engines/doaj_articles_engine_test.rb +9 -9
  34. data/test/search_engines/eds_engine_test.rb +91 -59
  35. data/test/search_engines/google_site_search_test.rb +48 -48
  36. data/test/search_engines/scopus_engine_test.rb +51 -51
  37. data/test/search_engines/search_engine_base_test.rb +108 -86
  38. data/test/search_engines/search_engine_test.rb +68 -56
  39. data/test/support/atom.xsd.xml +3 -3
  40. data/test/support/xml.xsd +117 -0
  41. data/test/test_helper.rb +23 -12
  42. data/test/unit/concurrent_searcher_test.rb +75 -0
  43. data/test/unit/pagination_test.rb +12 -12
  44. data/test/vcr_cassettes/eds/FullText_CustomLink.yml +198 -0
  45. data/test/vcr_cassettes/eds/basic_search_smoke_test.yml +1036 -1729
  46. data/test/vcr_cassettes/eds/catalog_ebook_query.yml +218 -0
  47. data/test/vcr_cassettes/eds/catalog_query.yml +255 -0
  48. data/test/vcr_cassettes/eds/get_auth_token.yml +11 -44
  49. data/test/vcr_cassettes/eds/get_auth_token_failure.yml +10 -7
  50. data/test/vcr_cassettes/eds/get_with_auth.yml +144 -153
  51. data/test/vcr_cassettes/eds/get_with_auth_recovers_from_bad_auth.yml +167 -223
  52. data/test/view/atom_results_test.rb +94 -94
  53. metadata +36 -46
  54. data/app/assets/javascripts/bento_search.js +0 -3
  55. data/app/item_decorators/bento_search/ebscohost/conditional_openurl_main_link.rb +0 -36
  56. data/app/item_decorators/bento_search/only_premade_openurl.rb +0 -20
  57. data/app/item_decorators/bento_search/openurl_add_other_link.rb +0 -39
  58. data/app/item_decorators/bento_search/openurl_main_link.rb +0 -34
  59. data/app/models/bento_search/multi_searcher.rb +0 -131
  60. data/test/dummy/config/initializers/secret_token.rb +0 -8
  61. data/test/unit/multi_searcher_test.rb +0 -49
@@ -4,10 +4,10 @@ require 'http_client_patch/include_client'
4
4
  require 'json'
5
5
 
6
6
  module BentoSearch
7
- # DOAJ Articles search.
7
+ # DOAJ Articles search.
8
8
  # https://doaj.org/api/v1/docs
9
9
  #
10
- # Phrase searches with double quotes are respected.
10
+ # Phrase searches with double quotes are respected.
11
11
  #
12
12
  # Supports #get by unique_id feature
13
13
  #
@@ -16,7 +16,7 @@ module BentoSearch
16
16
  include ActionView::Helpers::SanitizeHelper
17
17
 
18
18
 
19
- class_attribute :http_timeout
19
+ class_attribute :http_timeout, instance_writer: false
20
20
  self.http_timeout = 10
21
21
 
22
22
  extend HTTPClientPatch::IncludeClient
@@ -36,7 +36,7 @@ module BentoSearch
36
36
  Rails.logger.debug("DoajEngine: requesting #{query_url}")
37
37
  response = http_client.get( query_url )
38
38
  json = JSON.parse(response.body)
39
- rescue TimeoutError, HTTPClient::TimeoutError,
39
+ rescue BentoSearch::RubyTimeoutClass, HTTPClient::TimeoutError,
40
40
  HTTPClient::ConfigurationError, HTTPClient::BadResponseError,
41
41
  JSON::ParserError => e
42
42
  results.error ||= {}
@@ -77,7 +77,7 @@ module BentoSearch
77
77
  def args_to_search_url(arguments)
78
78
  query = if arguments[:query].kind_of?(Hash)
79
79
  # multi-field query
80
- arguments[:query].collect {|field, query| fielded_query(query, field)}.join(" ")
80
+ arguments[:query].collect {|field, query_value| fielded_query(query_value, field)}.join(" ")
81
81
  else
82
82
  fielded_query(arguments[:query], arguments[:search_field])
83
83
  end
@@ -85,7 +85,7 @@ module BentoSearch
85
85
  # We need to escape this for going in a PATH component,
86
86
  # not a query. So space can't be "+", it needs to be "%20",
87
87
  # and indeed DOAJ API does not like "+".
88
- #
88
+ #
89
89
  # But neither CGI.escape nor URI.escape does quite
90
90
  # the right kind of escaping, seems to work out
91
91
  # if we do CGI.escape but then replace '+'
@@ -98,7 +98,7 @@ module BentoSearch
98
98
  if arguments[:per_page]
99
99
  query_args["pageSize"] = arguments[:per_page]
100
100
  end
101
-
101
+
102
102
  if arguments[:page]
103
103
  query_args["page"] = arguments[:page]
104
104
  end
@@ -115,14 +115,14 @@ module BentoSearch
115
115
  return url
116
116
  end
117
117
 
118
- # Prepares a DOAJ API (elastic search) query component for
118
+ # Prepares a DOAJ API (elastic search) query component for
119
119
  # given textual query in a given field (or default non-fielded search)
120
120
  #
121
121
  # Separates query string into tokens (bare words and phrases),
122
122
  # so they can each be made mandatory for ElasticSearch. Default
123
123
  # DOAJ API makes them all optional, with a very low mm, which
124
124
  # leads to low-precision odd looking results for standard use
125
- # cases.
125
+ # cases.
126
126
  #
127
127
  # Escapes all remaining special characters as literals (not including
128
128
  # double quotes which can be used for phrases, which are respected. )
@@ -133,7 +133,7 @@ module BentoSearch
133
133
  #
134
134
  # The "+" prefixed before field-name is to make sure all separate
135
135
  # fields are also mandatory when doing multi-field searches. It should
136
- # make no difference for a single-field search.
136
+ # make no difference for a single-field search.
137
137
  def fielded_query(query, field = nil)
138
138
  if field.present?
139
139
  "+#{field}:(#{prepare_mandatory_terms(query)})"
@@ -143,12 +143,12 @@ module BentoSearch
143
143
  end
144
144
 
145
145
  # Takes a query string, prepares an ElasticSearch query
146
- # doing what we want:
146
+ # doing what we want:
147
147
  # * tokenizes into bare words and double-quoted phrases
148
148
  # * Escapes other punctuation to be literal not ElasticSearch operator.
149
149
  # (Does NOT do URI escaping)
150
- # * Makes each token mandatory with an ElasticSearch "+" operator prefixed.
151
- def prepare_mandatory_terms(query)
150
+ # * Makes each token mandatory with an ElasticSearch "+" operator prefixed.
151
+ def prepare_mandatory_terms(query)
152
152
  # use string split with regex to too-cleverly split into space
153
153
  # seperated terms and phrases, keeping phrases as unit.
154
154
  terms = query.split %r{[[:space:]]+|("[^"]+")}
@@ -174,13 +174,13 @@ module BentoSearch
174
174
 
175
175
  item.start_page = bibjson["start_page"]
176
176
  item.end_page = bibjson["end_page"]
177
-
177
+
178
178
  item.year = bibjson["year"]
179
179
  if (year = bibjson["year"].to_i) && (month = bibjson["month"].to_i)
180
180
  if year != 0 && month != 0
181
181
  item.publication_date = Date.new(bibjson["year"].to_i, bibjson["month"].to_i)
182
182
  end
183
- end
183
+ end
184
184
 
185
185
  item.abstract = sanitize(bibjson["abstract"]) if bibjson.has_key?("abstract")
186
186
 
@@ -222,9 +222,9 @@ module BentoSearch
222
222
  # punctuation that needs to be escaped and how to escape (backslash)
223
223
  # for ES documented here: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html
224
224
  #
225
- # We do not escape double quotes, want to allow them for phrases.
225
+ # We do not escape double quotes, want to allow them for phrases.
226
226
  #
227
- # This method does NOT return URI-escaped, it returns literal, escaped for ES.
227
+ # This method does NOT return URI-escaped, it returns literal, escaped for ES.
228
228
  def escape_query(q)
229
229
  q.gsub(/([\+\-\=\&\|\>\<\!\(\)\{\}\[\]\^\~\*\?\:\\\/])/) {|m| "\\#{$1}"}
230
230
  end
@@ -242,7 +242,7 @@ module BentoSearch
242
242
  { nil => {:semantic => :general},
243
243
  "bibjson.title" => {:semantic => :title},
244
244
  # Using 'exact' seems to produce much better results for
245
- # author, don't entirely understand what's up.
245
+ # author, don't entirely understand what's up.
246
246
  "bibjson.author.name" => {:semantic => :author},
247
247
  "publisher" => {:semantic => :publisher},
248
248
  "bibjson.subject.term" => {:semantic => :subject},
@@ -263,7 +263,7 @@ module BentoSearch
263
263
 
264
264
  def sort_definitions
265
265
  # Don't believe DOAJ supports sorting by author
266
- {
266
+ {
267
267
  "relevance" => {:implementation => nil}, # default
268
268
  "title" => {:implementation => "title:asc"},
269
269
  # We don't quite have publication date sorting, but we'll use
@@ -276,4 +276,4 @@ module BentoSearch
276
276
  end
277
277
 
278
278
  end
279
- end
279
+ end
@@ -131,14 +131,14 @@ class BentoSearch::EbscoHostEngine
131
131
  url = query_url(args)
132
132
 
133
133
  Rails.logger.debug("EbscoHostEngine Search for: #{url}")
134
-
134
+
135
135
  results = BentoSearch::Results.new
136
136
  xml, response, exception = nil, nil, nil
137
137
 
138
138
  begin
139
139
  response = http_client.get(url)
140
140
  xml = Nokogiri::XML(response.body)
141
- rescue TimeoutError, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError => e
141
+ rescue BentoSearch::RubyTimeoutClass, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError => e
142
142
  exception = e
143
143
  end
144
144
  # error handle
@@ -361,7 +361,7 @@ class BentoSearch::EbscoHostEngine
361
361
  query = if args[:query].kind_of?(Hash)
362
362
  # multi-field query
363
363
  args[:query].collect {|field, query| fielded_query(query, field)}.join(" AND ")
364
- else
364
+ else
365
365
  fielded_query(args[:query], args[:search_field])
366
366
  end
367
367
 
@@ -7,145 +7,144 @@ require 'http_client_patch/include_client'
7
7
 
8
8
 
9
9
  #
10
- # For EBSCO Discovery Service. You will need a license to use.
10
+ # For EBSCO Discovery Service. You will need a license to use.
11
11
  #
12
12
  # == Required Configuration
13
13
  #
14
- # user_id, password: As given be EBSCO for access to EDS API (may be an admin account in ebscoadmin? Not sure).
14
+ # user_id, password: As given be EBSCO for access to EDS API (may be an admin account in ebscoadmin? Not sure).
15
15
  # profile: As given by EBSCO, might be "edsapi"?
16
16
  #
17
17
  # == Highlighting
18
18
  #
19
- # EDS has a query-in-context highlighting feature. It is used by defualt, set
20
- # config 'highlighting' to false to disable.
19
+ # EDS has a query-in-context highlighting feature. It is used by defualt, set
20
+ # config 'highlighting' to false to disable.
21
21
  # If turned on, you may get <b class="bento_search_highlight"> tags
22
- # in title and abstract output if it's on, marked html_safe.
22
+ # in title and abstract output if it's on, marked html_safe.
23
23
  #
24
24
  #
25
25
  # == Linking
26
26
  #
27
27
  # The link to record in EBSCO interface delivered as "PLink" will be listed
28
- # as record main link.
28
+ # as record main link. If the record includes a node at `./FullText/Links/Link/Type[text() = 'pdflink']`,
29
+ # the `plink` will be marked as fulltext. (There may be other cases of fulltext, but
30
+ # this seems to be all EDS API tells us.)
29
31
  #
30
32
  # Any links listed under <CustomLinks> will be listed as other_links, using
31
- # configured name provided by EBSCO for CustomLink.
33
+ # configured name provided by EBSCO for CustomLink. Same with links listed
34
+ # as `<Item><Group>URL</Group>`.
32
35
  #
33
- # EDS Response does not have sufficient metadata for us to generate an OpenURL
34
- # ourselves. However, in our testing, the first/only CustomLink was an
35
- # an OpenURL. If configuration.assume_first_custom_link_openurl is
36
- # true (as is default), it will be used to create an OpenURL link. However, in
37
- # our testing, many records don't have this at all. **Note** You want
38
- # to configure your profile so OpenURLs are ALWAYS included for all records, not
39
- # just records with no EBSCO fulltext, to ensure bento_search can get the
40
- # openurl. http://support.ebsco.com/knowledge_base/detail.php?id=1111 (May
41
- # have to ask EBSCO support for help, it's confusing!).
36
+ # As always, you can customize links and other_links with Item Decorators.
42
37
  #
43
- # TODO: May have to add configuration code to pull the OpenURL link out by
44
- # it's configured name or label, not assume first one is it.
38
+ # == Custom Data
45
39
  #
46
- # As always, you can customize links and other_links with Item Decorators.
40
+ # If present, there is a custom_data[:holdings] value, an array of
41
+ # BentoSearch::EdsEngine::Holding objects, each of which has a #location
42
+ # and #call_number. There will usually (always?) be at most 1 item in the
43
+ # array, as far as we can tell from how EDS works.
47
44
  #
48
45
  # == Technical Notes and Difficulties
49
46
  #
50
- # This API is enormously difficult to work with. Also the response is very odd
51
- # to deal with and missing some key elements. We quite possibly got something
52
- # wrong or non-optimal in this implementation, but we did our best.
47
+ # This API is pretty difficult to work with, and the response has many
48
+ # idiosyncratic undocumented parts. We think we are currently
49
+ # getting fairly complete citation detail out, at least for articles, but may be missing
50
+ # some on weird edge cases, books/book chapters, etc)
53
51
  #
54
52
  # Auth issues may make this slow -- you need to spend a (not too speedy) HTTP
55
53
  # request making a session for every new end-user -- as we have no way to keep
56
- # track of end-users, we do it on every request in this implementation.
54
+ # track of end-users, we do it on every request in this implementation.
57
55
  #
58
- # Responses don't include much metadata -- we don't actually have journal title,
59
- # volume, issue, etc. We probably _could_ parse it out of the OpenURL that's
60
- # there depending on your profile configuration, but we're not right now.
61
- # Instead we're using the chunk of user-displayable citation/reference it does
62
- # give us (which is very difficult to parse into something usable already),
63
- # and a custom Decorator to display that instead of normalized citation
64
- # made from individual elements.
65
- #
66
- # EBSCO says they plan to improve some of these issues in a September 2012 release.
56
+ # An older version of the EDS API returned much less info, and we tried
57
+ # to scrape out what we could anyway. Much of this logic is still there
58
+ # as backup. In the older version, not enough info was there for an
59
+ # OpenURL link, `configuration.assume_first_custom_link_openurl` was true
60
+ # by default, and used to create an OpenURL link. It now defaults to false,
61
+ # and should no longer be neccessary.
67
62
  #
68
63
  # Title and abstract data seems to be HTML with tags and character entities and
69
- # escaped special chars. We're trusting it and passing it on as html_safe.
64
+ # escaped special chars. We're trusting it and passing it on as html_safe.
70
65
  #
71
66
  # Paging can only happen on even pages, with 'page' rather than 'start'. But
72
- # you can pass in 'start' to bento_search, it'll be converted to closest page.
67
+ # you can pass in 'start' to bento_search, it'll be converted to closest page.
73
68
  #
74
69
  # == Authenticated Users
75
70
  #
76
- # EDS allows searches by unauthenticated users, but the results come back with
71
+ # EDS allows searches by unauthenticated users, but the results come back with
77
72
  # weird blank hits. In such a case, the BentoSearch adapter will return
78
73
  # records with virtually no metadata, but a title e
79
74
  # (I18n at bento_search.eds.record_not_available ). Also no abstracts
80
- # are available from unauth search.
75
+ # are available from unauth search.
81
76
  #
82
77
  # By default the engine will search as 'guest' unauth user. But config
83
78
  # 'auth' key to true to force all searches to auth (if you are protecting your
84
- # app) or pass :auth => true as param into #search method.
79
+ # app) or pass :auth => true as param into #search method.
85
80
  #
86
81
  # == Source Types
87
82
  # # What the EBSCO 'source types' mean: http://suprpot.ebsco.com/knowledge_base/detail.php?id=5382
88
83
  #
89
- # But "Dissertations" not "Dissertations/Theses". "Music Scores" not "Music Score".
84
+ # But "Dissertations" not "Dissertations/Theses". "Music Scores" not "Music Score".
90
85
 
91
86
  #
92
87
  # == EDS docs:
93
- #
94
- # * Console App to demo requests: https://eds-api.ebscohost.com/Console
88
+ #
89
+ # * Console App to demo requests: <
95
90
  # * EDS Wiki: http://edswiki.ebscohost.com/EDS_API_Documentation
96
91
  # * You'll need to request an account to the EDS wiki, see: http://support.ebsco.com/knowledge_base/detail.php?id=5990
97
- #
92
+ #
98
93
 
99
94
  class BentoSearch::EdsEngine
100
95
  include BentoSearch::SearchEngine
101
-
96
+
102
97
  # Can't change http timeout in config, because we keep an http
103
- # client at class-wide level, and config is not class-wide.
104
- # Change this 'constant' if you want to change it, I guess.
98
+ # client at class-wide level, and config is not class-wide.
99
+ # We used to keep in constant, but that's not good for custom setting,
100
+ # we now use class_attribute, but in a weird backwards-compat way for
101
+ # anyone who might be using the constant.
105
102
  HttpTimeout = 4
106
- extend HTTPClientPatch::IncludeClient
103
+
104
+ class_attribute :http_timeout, instance_writer: false
105
+ def self.http_timeout
106
+ defined?(@http_timeout) ? @http_timeout : HttpTimeout
107
+ end
108
+
109
+
110
+ extend HTTPClientPatch::IncludeClient
107
111
  include_http_client do |client|
108
- client.connect_timeout = client.send_timeout = client.receive_timeout = HttpTimeout
112
+ client.connect_timeout = client.send_timeout = client.receive_timeout = http_timeout
109
113
  end
110
-
114
+
111
115
  AuthHeader = "x-authenticationToken"
112
116
  SessionTokenHeader = "x-sessionToken"
113
117
 
114
118
  @@remembered_auth = nil
115
119
  @@remembered_auth_lock = Mutex.new
116
120
  # Class variable to save current known good auth
117
- # uses a mutex to be threadsafe. sigh.
121
+ # uses a mutex to be threadsafe. sigh.
118
122
  def self.remembered_auth
119
- @@remembered_auth_lock.synchronize do
123
+ @@remembered_auth_lock.synchronize do
120
124
  @@remembered_auth
121
125
  end
122
126
  end
123
- # Set class variable with current known good auth.
124
- # uses a mutex to be threadsafe.
127
+ # Set class variable with current known good auth.
128
+ # uses a mutex to be threadsafe.
125
129
  def self.remembered_auth=(token)
126
130
  @@remembered_auth_lock.synchronize do
127
131
  @@remembered_auth = token
128
132
  end
129
133
  end
130
-
134
+
131
135
  # an object that includes some Rails helper modules for
132
- # text handling.
136
+ # text handling.
133
137
  def helper
134
- unless @helper
135
- @helper = Object.new
136
- @helper.extend ActionView::Helpers::TextHelper # for truncate
137
- @helper.extend ActionView::Helpers::OutputSafetyHelper # for safe_join
138
- end
139
- return @helper
138
+ @helper ||= Helper.new
140
139
  end
141
-
142
-
140
+
141
+
143
142
  def self.required_configuration
144
143
  %w{user_id password profile}
145
144
  end
146
-
145
+
147
146
  # From config or args, args over-ride config
148
- def authenticated_end_user?(args)
147
+ def authenticated_end_user?(args)
149
148
  config = configuration.auth ? true : false
150
149
  arg = args[:auth]
151
150
  if ! arg.nil?
@@ -156,164 +155,269 @@ class BentoSearch::EdsEngine
156
155
  false
157
156
  end
158
157
  end
159
-
158
+
160
159
  def construct_search_url(args)
161
160
  query = "AND,"
162
161
  if args[:search_field]
163
162
  query += "#{args[:search_field]}:"
164
163
  end
165
164
  # Can't have any commas in query, it turns out, although
166
- # this is not documented.
165
+ # this is not documented.
167
166
  query += args[:query].gsub(",", " ")
168
-
167
+
169
168
  url = "#{configuration.base_url}search?view=detailed&query=#{CGI.escape query}"
170
-
169
+
171
170
  url += "&searchmode=#{CGI.escape configuration.search_mode}"
172
-
171
+
173
172
  url += "&highlight=#{configuration.highlighting ? 'y' : 'n' }"
174
-
173
+
175
174
  if args[:per_page]
176
175
  url += "&resultsperpage=#{args[:per_page]}"
177
176
  end
178
177
  if args[:page]
179
178
  url += "&pagenumber=#{args[:page]}"
180
179
  end
181
-
180
+
182
181
  if args[:sort]
183
182
  if (defn = self.sort_definitions[args[:sort]]) &&
184
183
  (value = defn[:implementation] )
185
184
  url += "&sort=#{CGI.escape value}"
186
185
  end
187
186
  end
188
-
187
+
189
188
  if configuration.only_source_types.present?
190
189
  # facetfilter=1,SourceType:Research Starters,SourceType:Books
191
190
  url += "&facetfilter=" + CGI.escape("1," + configuration.only_source_types.collect {|t| "SourceType:#{t}"}.join(","))
192
191
  end
193
-
194
-
192
+
193
+
195
194
  return url
196
195
  end
197
-
198
-
199
-
196
+
197
+
198
+
200
199
  def search_implementation(args)
201
200
  results = BentoSearch::Results.new
202
-
201
+
203
202
  end_user_auth = authenticated_end_user? args
204
-
203
+
205
204
  begin
206
205
  with_session(end_user_auth) do |session_token|
207
-
206
+
208
207
  url = construct_search_url(args)
209
-
210
-
211
-
208
+
212
209
  response = get_with_auth(url, session_token)
213
-
210
+
214
211
  results = BentoSearch::Results.new
215
-
216
- if (hits_node = at_xpath_text(response, "./SearchResponseMessageGet/SearchResult/Statistics/TotalHits"))
212
+
213
+ if (hits_node = at_xpath_text(response, "./SearchResponseMessageGet/SearchResult/Statistics/TotalHits"))
217
214
  results.total_items = hits_node.to_i
218
215
  end
219
-
216
+
220
217
  response.xpath("./SearchResponseMessageGet/SearchResult/Data/Records/Record").each do |record_xml|
221
218
  item = BentoSearch::ResultItem.new
222
-
219
+
223
220
  item.title = prepare_eds_payload( element_by_group(record_xml, "Ti"), true )
224
-
221
+
225
222
  # To get a unique id, we need to pull out db code and accession number
226
- # and combine em with colon, accession number is not unique by itself.
223
+ # and combine em with colon, accession number is not unique by itself.
227
224
  db = record_xml.at_xpath("./Header/DbId").try(:text)
228
225
  accession = record_xml.at_xpath("./Header/An").try(:text)
229
226
  if db && accession
230
227
  item.unique_id = "#{db}:#{accession}"
231
228
  end
232
-
233
-
229
+
230
+
234
231
  if item.title.nil? && ! end_user_auth
235
232
  item.title = I18n.translate("bento_search.eds.record_not_available")
236
233
  end
237
-
234
+
238
235
  item.abstract = prepare_eds_payload( element_by_group(record_xml, "Ab"), true )
239
236
 
240
- # Believe it or not, the authors are encoded as an escaped
241
- # XML-ish payload, that we need to parse again and get the
242
- # actual authors out of. WTF. Thanks for handling fragments
243
- # nokogiri.
244
- author_mess = element_by_group(record_xml, "Au")
245
- # only SOMETIMES does it have XML tags, other times it's straight text.
246
- # ARGH.
247
- author_xml = Nokogiri::XML::fragment(author_mess)
248
- searchLinks = author_xml.xpath(".//searchLink")
249
- if searchLinks.size > 0
250
- author_xml.xpath(".//searchLink").each do |author_node|
251
- item.authors << BentoSearch::Author.new(:display => author_node.text)
237
+ # Much better way to get authors out of EDS response now...
238
+ author_full_names = record_xml.xpath("./RecordInfo/BibRecord/BibRelationships/HasContributorRelationships/HasContributor/PersonEntity/Name/NameFull")
239
+ author_full_names.each do |name_full_xml|
240
+ if name_full_xml && (text = name_full_xml.text).present?
241
+ item.authors << BentoSearch::Author.new(:display => text)
252
242
  end
253
- else
254
- item.authors << BentoSearch::Author.new(:display => author_xml.text)
255
243
  end
256
-
257
-
244
+
245
+ if item.authors.blank?
246
+ # Believe it or not, the authors are encoded as an escaped
247
+ # XML-ish payload, that we need to parse again and get the
248
+ # actual authors out of. WTF. Thanks for handling fragments
249
+ # nokogiri.
250
+ author_mess = element_by_group(record_xml, "Au")
251
+ # only SOMETIMES does it have XML tags, other times it's straight text.
252
+ # ARGH.
253
+ author_xml = Nokogiri::XML::fragment(author_mess)
254
+ searchLinks = author_xml.xpath(".//searchLink")
255
+ if searchLinks.size > 0
256
+ author_xml.xpath(".//searchLink").each do |author_node|
257
+ item.authors << BentoSearch::Author.new(:display => author_node.text)
258
+ end
259
+ else
260
+ item.authors << BentoSearch::Author.new(:display => author_xml.text)
261
+ end
262
+ end
263
+
258
264
  # PLink is main inward facing EBSCO link, put it as
259
- # main link.
265
+ # main link.
260
266
  if direct_link = record_xml.at_xpath("./PLink")
261
- item.link = direct_link.text
267
+ item.link = direct_link.text
268
+
269
+ if record_xml.at_xpath("./FullText/Links/Link/Type[text() = 'pdflink']")
270
+ item.link_is_fulltext = true
271
+ end
262
272
  end
263
-
273
+
274
+
264
275
  # Other links may be found in CustomLinks, it seems like usually
265
276
  # there will be at least one, hopefully the first one is the OpenURL?
266
- record_xml.xpath("./CustomLinks/CustomLink").each do |custom_link|
277
+ #byebug if configuration.id == "articles"
278
+ record_xml.xpath("./CustomLinks/CustomLink|./FullText/CustomLinks/CustomLink").each do |custom_link|
279
+ # If it's in FullText section, give it a rel=alternate
280
+ # to indicate it's fulltext
281
+ rel = (custom_link.parent.parent.name.downcase == "fulltext") ? "alternate" : nil
282
+
267
283
  item.other_links << BentoSearch::Link.new(
268
284
  :url => custom_link.at_xpath("./Url").text,
269
- :label => custom_link.at_xpath("./Name").text
285
+ :rel => rel,
286
+ :label => custom_link.at_xpath("./Text").try(:text).presence || custom_link.at_xpath("./Name").try(:text).presence || "Link"
270
287
  )
271
288
  end
272
-
289
+
290
+ # More other links in 'URL' Item, in unpredictable format sometimes being
291
+ # embedded XML. Really EBSCO?
292
+ record_xml.xpath("./Items/Item[child::Group[text()='URL']]").each do |url_item|
293
+ data_element = url_item.at_xpath("./Data")
294
+ next unless data_element
295
+
296
+ # SOMETIMES the url and label are in an embedded escaped XML element...
297
+ if data_element.text.strip.start_with?("<link")
298
+ # Ugh, once unescpaed it has bare '&' in URL queries sometimes, which
299
+ # is not actually legal XML anymore, but Nokogiri::HTML parser will
300
+ # let us get away with it, but then doesn't put the actual text
301
+ # inside the 'link' item, but inside the <link> tag since it knows
302
+ # an HTML link tag has no content. Really EDS.
303
+ node = Nokogiri::HTML::fragment(data_element.text)
304
+ next unless link = node.at_xpath("./link")
305
+ next unless link["linkterm"].presence || link["linkTerm"].presence
306
+
307
+ item.other_links << BentoSearch::Link.new(
308
+ :url => link["linkterm"] || link["linkTerm"],
309
+ :label => helper.strip_tags(data_element.text).presence || "Link"
310
+ )
311
+ else
312
+ # it's just a straight URL in data element, with only label we've
313
+ # got in <label> element.
314
+ next unless data_element.text.strip.present?
315
+
316
+ label_element = url_item.at_xpath("./Label")
317
+ label = label_element.try(:text).try { |s| helper.strip_tags(s) }.presence || "Link"
318
+
319
+ item.other_links << BentoSearch::Link.new(
320
+ :url => data_element.text,
321
+ :label => label
322
+ )
323
+ end
324
+ end
325
+
326
+
273
327
  if (configuration.assume_first_custom_link_openurl &&
274
328
  (first = record_xml.xpath "./CustomLinks/CustomLink" ) &&
275
329
  (node = first.at_xpath "./Url" )
276
330
  )
277
-
331
+
278
332
  openurl = node.text
279
-
333
+
280
334
  index = openurl.index('?')
281
- item.openurl_kev_co = openurl.slice index..(openurl.length) if index
335
+ item.openurl_kev_co = openurl.slice index..(openurl.length) if index
282
336
  end
283
337
 
284
- # Format.
338
+ # Format.
285
339
  item.format_str = at_xpath_text record_xml, "./Header/PubType"
286
340
  # Can't find a list of possible PubTypes to see what's there to try
287
- # and map to our internal controlled vocab. oh wells.
288
-
289
-
290
-
341
+ # and map to our internal controlled vocab. oh wells.
342
+
343
+ item.doi = at_xpath_text record_xml, "./RecordInfo/BibRecord/BibEntity/Identifiers/Identifier[child::Type[text()='doi']]/Value"
344
+
345
+ item.start_page = at_xpath_text(record_xml, "./RecordInfo/BibRecord/BibEntity/PhysicalDescription/Pagination/StartPage")
346
+ total_pages = at_xpath_text(record_xml, "./RecordInfo/BibRecord/BibEntity/PhysicalDescription/Pagination/PageCount")
347
+ if total_pages.to_i != 0 && item.start_page.to_i != 0
348
+ item.end_page = (item.start_page.to_i + total_pages.to_i - 1).to_s
349
+ end
350
+
351
+
352
+ # location/call number, probably only for catalog results. We only see one
353
+ # in actual data, but XML structure allows multiple, so we'll store it as multiple.
354
+ copy_informations = record_xml.xpath("./Holdings/Holding/HoldingSimple/CopyInformationList/CopyInformation")
355
+ if copy_informations.present?
356
+ item.custom_data[:holdings] =
357
+ copy_informations.collect do |copy_information|
358
+ Holding.new(:location => at_xpath_text(copy_information, "Sublocation"),
359
+ :call_number => at_xpath_text(copy_information, "ShelfLocator"))
360
+ end
361
+ end
362
+
363
+
364
+
365
+ # For some EDS results, we have actual citation information,
366
+ # for some we don't.
367
+ container_xml = record_xml.at_xpath("./RecordInfo/BibRecord/BibRelationships/IsPartOfRelationships/IsPartOf/BibEntity")
368
+ if container_xml
369
+ item.source_title = at_xpath_text(container_xml, "./Titles/Title[child::Type[text()='main']]/TitleFull")
370
+ item.volume = at_xpath_text(container_xml, "./Numbering/Number[child::Type[text()='volume']]/Value")
371
+ item.issue = at_xpath_text(container_xml, "./Numbering/Number[child::Type[text()='issue']]/Value")
372
+
373
+ item.issn = at_xpath_text(container_xml, "./Identifiers/Identifier[child::Type[text()='issn-print']]/Value")
374
+
375
+ if date_xml = container_xml.at_xpath("./Dates/Date")
376
+ item.year = at_xpath_text(date_xml, "./Y")
377
+
378
+ date = at_xpath_text(date_xml, "./D").to_i
379
+ month = at_xpath_text(date_xml, "./M").to_i
380
+ if item.year.to_i != 0 && date != 0 && month != 0
381
+ item.publication_date = Date.new(item.year.to_i, month, date)
382
+ end
383
+ end
384
+ end
385
+
386
+ # EDS annoyingly repeats a monographic title in the same place
387
+ # we look for source/container title, take it away.
388
+ if item.start_page.blank? && helper.strip_tags(item.title) == item.source_title
389
+ item.source_title = nil
390
+ end
391
+
392
+ # Legacy EDS citation extracting. We don't really need this any more
393
+ # because EDS api has improved, but leave it in in case anyone using
394
+ # older versions needed it.
395
+
291
396
  # We have a single blob of human-readable citation, that's also
292
397
  # littered with XML-ish tags we need to deal with. We'll save
293
398
  # it in a custom location, and use a custom Decorator to display
294
399
  # it. Sorry it's way too hard for us to preserve <highlight>
295
400
  # tags in this mess, they will be lost. Probably don't
296
- # need highlighting in source anyhow.
401
+ # need highlighting in source anyhow.
297
402
  citation_mess = element_by_group(record_xml, "Src")
298
403
  # Argh, but sometimes it's in SrcInfo _without_ tags instead
299
- if citation_mess
404
+ if citation_mess
300
405
  citation_txt = Nokogiri::XML::fragment(citation_mess).text
301
406
  # But strip off some "count of references" often on the end
302
- # which are confusing and useless.
407
+ # which are confusing and useless.
303
408
  item.custom_data["citation_blob"] = citation_txt.gsub(/ref +\d+ +ref\.$/, '')
304
409
  else
305
410
  # try another location
306
411
  item.custom_data["citation_blob"] = element_by_group(record_xml, "SrcInfo")
307
412
  end
308
-
309
-
413
+
310
414
  item.extend CitationMessDecorator
311
-
415
+
312
416
  results << item
313
- end
417
+ end
314
418
  end
315
-
316
- return results
419
+
420
+ return results
317
421
  rescue EdsCommException => e
318
422
  results.error ||= {}
319
423
  results.error[:exception] = e
@@ -321,137 +425,137 @@ class BentoSearch::EdsEngine
321
425
  results.error[:http_body] = e.http_body
322
426
  return results
323
427
  end
324
-
428
+
325
429
  end
326
-
430
+
327
431
  # Difficult to get individual elements out of an EDS XML <Record>
328
- # response, requires weird xpath, so we do it for you.
432
+ # response, requires weird xpath, so we do it for you.
329
433
  # element_by_group(nokogiri_element, "Ti")
330
434
  #
331
435
  # Returns string or nil
332
436
  def element_by_group(noko, group)
333
437
  at_xpath_text(noko, "./Items/Item[child::Group[text()='#{group}']]/Data")
334
438
  end
335
-
439
+
336
440
  # Wraps calls to the EDS api with CreateSession and EndSession requests
337
441
  # to EDS. Will pass sessionID in yield from block.
338
442
  #
339
443
  # Second optional arg is whether this is an authenticated user, else
340
- # guest access will be used.
444
+ # guest access will be used.
341
445
  #
342
446
  # with_session(true) do |session_token|
343
447
  # # can make more requests using session_token,
344
- # # EndSession will be called for you at end of block.
448
+ # # EndSession will be called for you at end of block.
345
449
  # end
346
450
  def with_session(auth = false, &block)
347
- auth_token = self.class.remembered_auth
451
+ auth_token = self.class.remembered_auth
348
452
  if auth_token.nil?
349
453
  auth_token = self.class.remembered_auth = get_auth_token
350
454
  end
351
-
352
-
353
- create_url = "#{configuration.base_url}createsession?profile=#{configuration.profile}&guest=#{auth ? 'n' : 'y'}"
354
- response_xml = get_with_auth(create_url)
355
-
455
+
456
+
457
+ create_url = "#{configuration.base_url}createsession?profile=#{configuration.profile}&guest=#{auth ? 'n' : 'y'}"
458
+ response_xml = get_with_auth(create_url)
459
+
356
460
  session_token = nil
357
- unless response_xml && (session_token = at_xpath_text(response_xml, "//SessionToken"))
358
- e = EdsCommException.new("Could not get SessionToken")
461
+ unless response_xml && (session_token = at_xpath_text(response_xml, "//SessionToken"))
462
+ e = EdsCommException.new("Could not get SessionToken")
359
463
  end
360
-
361
- begin
464
+
465
+ begin
362
466
  block.yield(session_token)
363
- ensure
364
- if auth_token && session_token
467
+ ensure
468
+ if auth_token && session_token
365
469
  end_url = "#{configuration.base_url}endsession?sessiontoken=#{CGI.escape session_token}"
366
- response_xml = get_with_auth(end_url)
470
+ response_xml = get_with_auth(end_url)
367
471
  end
368
472
  end
369
-
473
+
370
474
  end
371
-
372
- # if the xpath responds, return #text of it, else nil.
475
+
476
+ # if the xpath responds, return #text of it, else nil.
373
477
  def at_xpath_text(noko, xpath)
374
478
  node = noko.at_xpath(xpath)
375
-
479
+
376
480
  if node.nil?
377
481
  return node
378
482
  else
379
483
  return node.text
380
484
  end
381
485
  end
382
-
486
+
383
487
  # If EDS has put highlighting tags
384
488
  # in a field, we need to HTML escape the literal values,
385
489
  # while still using the highlighting tokens to put
386
490
  # HTML tags around highlighted terms.
387
491
  #
388
492
  # Second param, if to assume EDS literals are safe HTML, as they
389
- # seem to be.
493
+ # seem to be.
390
494
  def prepare_eds_payload(str, html_safe = false)
391
495
  return str if str.blank?
392
-
496
+
393
497
  unless configuration.highlighting
394
- str = str.html_safe if html_safe
498
+ str = str.html_safe if html_safe
395
499
  return str
396
500
  end
397
-
398
- parts =
501
+
502
+ parts =
399
503
  str.split(%r{(</?highlight>)}).collect do |substr|
400
504
  case substr
401
505
  when "<highlight>" then "<b class='bento_search_highlight'>".html_safe
402
506
  when "</highlight>" then "</b>".html_safe
403
- # Yes, EDS gives us HTML in the literals, we're choosing to trust it.
507
+ # Yes, EDS gives us HTML in the literals, we're choosing to trust it.
404
508
  else substr.html_safe
405
509
  end
406
510
  end
407
-
408
- return helper.safe_join(parts, '')
511
+
512
+ return helper.safe_join(parts, '')
409
513
  end
410
-
514
+
411
515
  # Give it a url pointing at EDS API.
412
- # Second arg must be a session_token if EDS request requires one.
413
- # It will
516
+ # Second arg must be a session_token if EDS request requires one.
517
+ # It will
414
518
  # * Make a GET request
415
519
  # * with memo-ized auth token added to headers
416
520
  # * for XML, with all namespaces removed!
417
521
  # * Parse JSON into a hash and return hash
418
522
  # * Try ONCE more to get if EBSCO says bad auth token
419
523
  # * Raise an EdsCommException if can't auth after second try,
420
- # or other error message, or JSON can't be parsed.
524
+ # or other error message, or JSON can't be parsed.
421
525
  def get_with_auth(url, session_token = nil)
422
526
  auth_token = self.class.remembered_auth
423
527
  unless auth_token
424
528
  auth_token = self.class.remembered_auth = get_auth_token
425
529
  end
426
-
530
+
427
531
  response = nil
428
532
  response_xml = nil
429
533
  caught_exception = nil
430
-
534
+
431
535
  begin
432
536
  headers = {AuthHeader => auth_token, 'Accept' => 'application/xml'}
433
537
  headers[SessionTokenHeader] = session_token if session_token
434
-
538
+
435
539
  s_time = Time.now
436
540
  response = http_client.get(url, nil, headers)
437
541
  Rails.logger.debug("EDS timing GET: #{Time.now - s_time}:#{url}")
438
-
542
+
439
543
  response_xml = Nokogiri::XML(response.body)
440
544
  response_xml.remove_namespaces!
441
-
545
+
442
546
  if (at_xpath_text(response_xml, "//ErrorNumber") == "104") || (at_xpath_text(response_xml, "//ErrorDescription") == "Auth Token Invalid")
443
547
  # bad auth, try again just ONCE
444
548
  Rails.logger.debug("EDS auth failed, getting auth again")
445
-
549
+
446
550
  headers[AuthHeader] = self.class.remembered_auth = get_auth_token
447
551
  response = http_client.get(url, nil, headers)
448
552
  response_xml = Nokogiri::XML(response.body)
449
- response_xml.remove_namespaces!
450
- end
451
- rescue TimeoutError, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError => e
553
+ response_xml.remove_namespaces!
554
+ end
555
+ rescue BentoSearch::RubyTimeoutClass, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError => e
452
556
  caught_exception = e
453
557
  end
454
-
558
+
455
559
  if response.nil? || response_xml.nil? || caught_exception || (! HTTP::Status.successful? response.status)
456
560
  exception = EdsCommException.new("Error fetching URL: #{caught_exception.message if caught_exception} : #{url}")
457
561
  if response
@@ -460,68 +564,68 @@ class BentoSearch::EdsEngine
460
564
  end
461
565
  raise exception
462
566
  end
463
-
567
+
464
568
  return response_xml
465
569
  end
466
-
467
-
468
- # Has to make an HTTP request to get EBSCO's auth token.
570
+
571
+
572
+ # Has to make an HTTP request to get EBSCO's auth token.
469
573
  # returns the auth token. We aren't bothering to keep
470
574
  # track of the expiration ourselves, can't neccesarily trust
471
- # it anyway.
575
+ # it anyway.
472
576
  #
473
- # Raises an EdsCommException on error.
474
- def get_auth_token
577
+ # Raises an EdsCommException on error.
578
+ def get_auth_token
475
579
  # Can't send params as form-encoded, actually need to send a JSON or XML
476
- # body, argh.
477
-
580
+ # body, argh.
581
+
478
582
  body = <<-EOS
479
583
  {
480
584
  "UserId":"#{configuration.user_id}",
481
585
  "Password":"#{configuration.password}"
482
586
  }
483
587
  EOS
484
-
588
+
485
589
  s_time = Time.now
486
590
  response = http_client.post(configuration.auth_url, body, {'Accept' => "application/json", "Content-type" => "application/json"})
487
- Rails.logger.debug("EDS timing AUTH: #{Time.now - s_time}s")
488
-
591
+ Rails.logger.debug("EDS timing AUTH: #{Time.now - s_time}s")
592
+
489
593
  unless HTTP::Status.successful? response.status
490
594
  raise EdsCommException.new("Could not get auth", response.status, response.body)
491
595
  end
492
-
596
+
493
597
  response_hash = nil
494
598
  begin
495
599
  response_hash = MultiJson.load response.body
496
600
  rescue MultiJson::DecodeError
497
601
  end
498
-
602
+
499
603
  unless response_hash.kind_of?(Hash) && response_hash.has_key?("AuthToken")
500
604
  raise EdsCommException.new("AuthToken not found in auth response", response.status, response.body)
501
605
  end
502
-
503
- return response_hash["AuthToken"]
606
+
607
+ return response_hash["AuthToken"]
504
608
  end
505
-
609
+
506
610
  def self.default_configuration
507
611
  {
508
612
  :auth_url => 'https://eds-api.ebscohost.com/authservice/rest/uidauth',
509
613
  :base_url => "http://eds-api.ebscohost.com/edsapi/rest/",
510
614
  :highlighting => true,
511
615
  :truncate_highlighted => 280,
512
- :assume_first_custom_link_openurl => true,
616
+ :assume_first_custom_link_openurl => false,
513
617
  :search_mode => 'all' # any | bool | all | smart ; http://support.epnet.com/knowledge_base/detail.php?topic=996&id=1288&page=1
514
618
  }
515
619
  end
516
-
517
- def sort_definitions
518
- {
620
+
621
+ def sort_definitions
622
+ {
519
623
  "date_desc" => {:implementation => "date"},
520
624
  "relevance" => {:implementation => "relevance" }
521
625
  # "date_asc" => {:implementaiton => "date2"}
522
626
  }
523
627
  end
524
-
628
+
525
629
  def search_field_definitions
526
630
  {
527
631
  "TX" => {:semantic => :general},
@@ -534,11 +638,11 @@ class BentoSearch::EdsEngine
534
638
  "IB" => {:semantic => :isbn},
535
639
  }
536
640
  end
537
-
538
- # an exception talking to EDS api.
641
+
642
+ # an exception talking to EDS api.
539
643
  # there's a short reason in #message, but also
540
644
  # possibly an http_status and http_body copied
541
- # from error EDS response.
645
+ # from error EDS response.
542
646
  class EdsCommException < ::BentoSearch::FetchError
543
647
  attr_accessor :http_status, :http_body
544
648
  def initialize(message, status = nil, body = nil)
@@ -547,16 +651,32 @@ class BentoSearch::EdsEngine
547
651
  self.http_body = body
548
652
  end
549
653
  end
550
-
551
-
654
+
655
+
552
656
  # A built-in decorator alwasy applied, that over-rides
553
657
  # the ResultItem#published_in display method to use our mess blob
554
658
  # of human readable citation, since we don't have individual elements
555
- # to create it from in a normalized way.
659
+ # to create it from in a normalized way.
556
660
  module CitationMessDecorator
557
661
  def published_in
558
662
  custom_data["citation_blob"]
559
663
  end
560
664
  end
561
-
665
+
666
+ # a class that includes some Rails helper modules for
667
+ # text handling.
668
+ class Helper
669
+ include ActionView::Helpers::SanitizeHelper # for strip_tags
670
+ include ActionView::Helpers::TextHelper # for truncate
671
+ include ActionView::Helpers::OutputSafetyHelper # for safe_join
672
+ end
673
+
674
+ class Holding
675
+ attr_reader :location, :call_number
676
+ def initialize(args)
677
+ @location = args[:location]
678
+ @call_number = args[:call_number]
679
+ end
680
+ end
681
+
562
682
  end