bento_search 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,10 +4,10 @@ require 'http_client_patch/include_client'
4
4
  require 'json'
5
5
 
6
6
  module BentoSearch
7
- # DOAJ Articles search.
7
+ # DOAJ Articles search.
8
8
  # https://doaj.org/api/v1/docs
9
9
  #
10
- # Phrase searches with double quotes are respected.
10
+ # Phrase searches with double quotes are respected.
11
11
  #
12
12
  # Supports #get by unique_id feature
13
13
  #
@@ -36,7 +36,7 @@ module BentoSearch
36
36
  Rails.logger.debug("DoajEngine: requesting #{query_url}")
37
37
  response = http_client.get( query_url )
38
38
  json = JSON.parse(response.body)
39
- rescue TimeoutError, HTTPClient::TimeoutError,
39
+ rescue BentoSearch::RubyTimeoutClass, HTTPClient::TimeoutError,
40
40
  HTTPClient::ConfigurationError, HTTPClient::BadResponseError,
41
41
  JSON::ParserError => e
42
42
  results.error ||= {}
@@ -77,7 +77,7 @@ module BentoSearch
77
77
  def args_to_search_url(arguments)
78
78
  query = if arguments[:query].kind_of?(Hash)
79
79
  # multi-field query
80
- arguments[:query].collect {|field, query| fielded_query(query, field)}.join(" ")
80
+ arguments[:query].collect {|field, query_value| fielded_query(query_value, field)}.join(" ")
81
81
  else
82
82
  fielded_query(arguments[:query], arguments[:search_field])
83
83
  end
@@ -85,7 +85,7 @@ module BentoSearch
85
85
  # We need to escape this for going in a PATH component,
86
86
  # not a query. So space can't be "+", it needs to be "%20",
87
87
  # and indeed DOAJ API does not like "+".
88
- #
88
+ #
89
89
  # But neither CGI.escape nor URI.escape does quite
90
90
  # the right kind of escaping, seems to work out
91
91
  # if we do CGI.escape but then replace '+'
@@ -98,7 +98,7 @@ module BentoSearch
98
98
  if arguments[:per_page]
99
99
  query_args["pageSize"] = arguments[:per_page]
100
100
  end
101
-
101
+
102
102
  if arguments[:page]
103
103
  query_args["page"] = arguments[:page]
104
104
  end
@@ -115,14 +115,14 @@ module BentoSearch
115
115
  return url
116
116
  end
117
117
 
118
- # Prepares a DOAJ API (elastic search) query component for
118
+ # Prepares a DOAJ API (elastic search) query component for
119
119
  # given textual query in a given field (or default non-fielded search)
120
120
  #
121
121
  # Separates query string into tokens (bare words and phrases),
122
122
  # so they can each be made mandatory for ElasticSearch. Default
123
123
  # DOAJ API makes them all optional, with a very low mm, which
124
124
  # leads to low-precision odd looking results for standard use
125
- # cases.
125
+ # cases.
126
126
  #
127
127
  # Escapes all remaining special characters as literals (not including
128
128
  # double quotes which can be used for phrases, which are respected. )
@@ -133,7 +133,7 @@ module BentoSearch
133
133
  #
134
134
  # The "+" prefixed before field-name is to make sure all separate
135
135
  # fields are also mandatory when doing multi-field searches. It should
136
- # make no difference for a single-field search.
136
+ # make no difference for a single-field search.
137
137
  def fielded_query(query, field = nil)
138
138
  if field.present?
139
139
  "+#{field}:(#{prepare_mandatory_terms(query)})"
@@ -143,12 +143,12 @@ module BentoSearch
143
143
  end
144
144
 
145
145
  # Takes a query string, prepares an ElasticSearch query
146
- # doing what we want:
146
+ # doing what we want:
147
147
  # * tokenizes into bare words and double-quoted phrases
148
148
  # * Escapes other punctuation to be literal not ElasticSearch operator.
149
149
  # (Does NOT do URI escaping)
150
- # * Makes each token mandatory with an ElasticSearch "+" operator prefixed.
151
- def prepare_mandatory_terms(query)
150
+ # * Makes each token mandatory with an ElasticSearch "+" operator prefixed.
151
+ def prepare_mandatory_terms(query)
152
152
  # use string split with regex to too-cleverly split into space
153
153
  # seperated terms and phrases, keeping phrases as unit.
154
154
  terms = query.split %r{[[:space:]]+|("[^"]+")}
@@ -174,13 +174,13 @@ module BentoSearch
174
174
 
175
175
  item.start_page = bibjson["start_page"]
176
176
  item.end_page = bibjson["end_page"]
177
-
177
+
178
178
  item.year = bibjson["year"]
179
179
  if (year = bibjson["year"].to_i) && (month = bibjson["month"].to_i)
180
180
  if year != 0 && month != 0
181
181
  item.publication_date = Date.new(bibjson["year"].to_i, bibjson["month"].to_i)
182
182
  end
183
- end
183
+ end
184
184
 
185
185
  item.abstract = sanitize(bibjson["abstract"]) if bibjson.has_key?("abstract")
186
186
 
@@ -222,9 +222,9 @@ module BentoSearch
222
222
  # punctuation that needs to be escaped and how to escape (backslash)
223
223
  # for ES documented here: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html
224
224
  #
225
- # We do not escape double quotes, want to allow them for phrases.
225
+ # We do not escape double quotes, want to allow them for phrases.
226
226
  #
227
- # This method does NOT return URI-escaped, it returns literal, escaped for ES.
227
+ # This method does NOT return URI-escaped, it returns literal, escaped for ES.
228
228
  def escape_query(q)
229
229
  q.gsub(/([\+\-\=\&\|\>\<\!\(\)\{\}\[\]\^\~\*\?\:\\\/])/) {|m| "\\#{$1}"}
230
230
  end
@@ -242,7 +242,7 @@ module BentoSearch
242
242
  { nil => {:semantic => :general},
243
243
  "bibjson.title" => {:semantic => :title},
244
244
  # Using 'exact' seems to produce much better results for
245
- # author, don't entirely understand what's up.
245
+ # author, don't entirely understand what's up.
246
246
  "bibjson.author.name" => {:semantic => :author},
247
247
  "publisher" => {:semantic => :publisher},
248
248
  "bibjson.subject.term" => {:semantic => :subject},
@@ -263,7 +263,7 @@ module BentoSearch
263
263
 
264
264
  def sort_definitions
265
265
  # Don't believe DOAJ supports sorting by author
266
- {
266
+ {
267
267
  "relevance" => {:implementation => nil}, # default
268
268
  "title" => {:implementation => "title:asc"},
269
269
  # We don't quite have publication date sorting, but we'll use
@@ -276,4 +276,4 @@ module BentoSearch
276
276
  end
277
277
 
278
278
  end
279
- end
279
+ end
@@ -131,14 +131,14 @@ class BentoSearch::EbscoHostEngine
131
131
  url = query_url(args)
132
132
 
133
133
  Rails.logger.debug("EbscoHostEngine Search for: #{url}")
134
-
134
+
135
135
  results = BentoSearch::Results.new
136
136
  xml, response, exception = nil, nil, nil
137
137
 
138
138
  begin
139
139
  response = http_client.get(url)
140
140
  xml = Nokogiri::XML(response.body)
141
- rescue TimeoutError, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError => e
141
+ rescue BentoSearch::RubyTimeoutClass, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError => e
142
142
  exception = e
143
143
  end
144
144
  # error handle
@@ -361,7 +361,7 @@ class BentoSearch::EbscoHostEngine
361
361
  query = if args[:query].kind_of?(Hash)
362
362
  # multi-field query
363
363
  args[:query].collect {|field, query| fielded_query(query, field)}.join(" AND ")
364
- else
364
+ else
365
365
  fielded_query(args[:query], args[:search_field])
366
366
  end
367
367
 
@@ -7,25 +7,25 @@ require 'http_client_patch/include_client'
7
7
 
8
8
 
9
9
  #
10
- # For EBSCO Discovery Service. You will need a license to use.
10
+ # For EBSCO Discovery Service. You will need a license to use.
11
11
  #
12
12
  # == Required Configuration
13
13
  #
14
- # user_id, password: As given be EBSCO for access to EDS API (may be an admin account in ebscoadmin? Not sure).
14
+ # user_id, password: As given be EBSCO for access to EDS API (may be an admin account in ebscoadmin? Not sure).
15
15
  # profile: As given by EBSCO, might be "edsapi"?
16
16
  #
17
17
  # == Highlighting
18
18
  #
19
- # EDS has a query-in-context highlighting feature. It is used by defualt, set
20
- # config 'highlighting' to false to disable.
19
+ # EDS has a query-in-context highlighting feature. It is used by defualt, set
20
+ # config 'highlighting' to false to disable.
21
21
  # If turned on, you may get <b class="bento_search_highlight"> tags
22
- # in title and abstract output if it's on, marked html_safe.
22
+ # in title and abstract output if it's on, marked html_safe.
23
23
  #
24
24
  #
25
25
  # == Linking
26
26
  #
27
27
  # The link to record in EBSCO interface delivered as "PLink" will be listed
28
- # as record main link.
28
+ # as record main link.
29
29
  #
30
30
  # Any links listed under <CustomLinks> will be listed as other_links, using
31
31
  # configured name provided by EBSCO for CustomLink.
@@ -34,26 +34,26 @@ require 'http_client_patch/include_client'
34
34
  # ourselves. However, in our testing, the first/only CustomLink was an
35
35
  # an OpenURL. If configuration.assume_first_custom_link_openurl is
36
36
  # true (as is default), it will be used to create an OpenURL link. However, in
37
- # our testing, many records don't have this at all. **Note** You want
37
+ # our testing, many records don't have this at all. **Note** You want
38
38
  # to configure your profile so OpenURLs are ALWAYS included for all records, not
39
39
  # just records with no EBSCO fulltext, to ensure bento_search can get the
40
40
  # openurl. http://support.ebsco.com/knowledge_base/detail.php?id=1111 (May
41
- # have to ask EBSCO support for help, it's confusing!).
41
+ # have to ask EBSCO support for help, it's confusing!).
42
42
  #
43
43
  # TODO: May have to add configuration code to pull the OpenURL link out by
44
- # it's configured name or label, not assume first one is it.
44
+ # it's configured name or label, not assume first one is it.
45
45
  #
46
- # As always, you can customize links and other_links with Item Decorators.
46
+ # As always, you can customize links and other_links with Item Decorators.
47
47
  #
48
48
  # == Technical Notes and Difficulties
49
49
  #
50
50
  # This API is enormously difficult to work with. Also the response is very odd
51
51
  # to deal with and missing some key elements. We quite possibly got something
52
- # wrong or non-optimal in this implementation, but we did our best.
52
+ # wrong or non-optimal in this implementation, but we did our best.
53
53
  #
54
54
  # Auth issues may make this slow -- you need to spend a (not too speedy) HTTP
55
55
  # request making a session for every new end-user -- as we have no way to keep
56
- # track of end-users, we do it on every request in this implementation.
56
+ # track of end-users, we do it on every request in this implementation.
57
57
  #
58
58
  # Responses don't include much metadata -- we don't actually have journal title,
59
59
  # volume, issue, etc. We probably _could_ parse it out of the OpenURL that's
@@ -61,91 +61,91 @@ require 'http_client_patch/include_client'
61
61
  # Instead we're using the chunk of user-displayable citation/reference it does
62
62
  # give us (which is very difficult to parse into something usable already),
63
63
  # and a custom Decorator to display that instead of normalized citation
64
- # made from individual elements.
64
+ # made from individual elements.
65
65
  #
66
- # EBSCO says they plan to improve some of these issues in a September 2012 release.
66
+ # EBSCO says they plan to improve some of these issues in a September 2012 release.
67
67
  #
68
68
  # Title and abstract data seems to be HTML with tags and character entities and
69
- # escaped special chars. We're trusting it and passing it on as html_safe.
69
+ # escaped special chars. We're trusting it and passing it on as html_safe.
70
70
  #
71
71
  # Paging can only happen on even pages, with 'page' rather than 'start'. But
72
- # you can pass in 'start' to bento_search, it'll be converted to closest page.
72
+ # you can pass in 'start' to bento_search, it'll be converted to closest page.
73
73
  #
74
74
  # == Authenticated Users
75
75
  #
76
- # EDS allows searches by unauthenticated users, but the results come back with
76
+ # EDS allows searches by unauthenticated users, but the results come back with
77
77
  # weird blank hits. In such a case, the BentoSearch adapter will return
78
78
  # records with virtually no metadata, but a title e
79
79
  # (I18n at bento_search.eds.record_not_available ). Also no abstracts
80
- # are available from unauth search.
80
+ # are available from unauth search.
81
81
  #
82
82
  # By default the engine will search as 'guest' unauth user. But config
83
83
  # 'auth' key to true to force all searches to auth (if you are protecting your
84
- # app) or pass :auth => true as param into #search method.
84
+ # app) or pass :auth => true as param into #search method.
85
85
  #
86
86
  # == Source Types
87
87
  # # What the EBSCO 'source types' mean: http://suprpot.ebsco.com/knowledge_base/detail.php?id=5382
88
88
  #
89
- # But "Dissertations" not "Dissertations/Theses". "Music Scores" not "Music Score".
89
+ # But "Dissertations" not "Dissertations/Theses". "Music Scores" not "Music Score".
90
90
 
91
91
  #
92
92
  # == EDS docs:
93
- #
94
- # * Console App to demo requests: https://eds-api.ebscohost.com/Console
93
+ #
94
+ # * Console App to demo requests: https://eds-api.ebscohost.com/Console
95
95
  # * EDS Wiki: http://edswiki.ebscohost.com/EDS_API_Documentation
96
96
  # * You'll need to request an account to the EDS wiki, see: http://support.ebsco.com/knowledge_base/detail.php?id=5990
97
- #
97
+ #
98
98
 
99
99
  class BentoSearch::EdsEngine
100
100
  include BentoSearch::SearchEngine
101
-
101
+
102
102
  # Can't change http timeout in config, because we keep an http
103
- # client at class-wide level, and config is not class-wide.
104
- # Change this 'constant' if you want to change it, I guess.
103
+ # client at class-wide level, and config is not class-wide.
104
+ # Change this 'constant' if you want to change it, I guess.
105
105
  HttpTimeout = 4
106
- extend HTTPClientPatch::IncludeClient
106
+ extend HTTPClientPatch::IncludeClient
107
107
  include_http_client do |client|
108
108
  client.connect_timeout = client.send_timeout = client.receive_timeout = HttpTimeout
109
109
  end
110
-
110
+
111
111
  AuthHeader = "x-authenticationToken"
112
112
  SessionTokenHeader = "x-sessionToken"
113
113
 
114
114
  @@remembered_auth = nil
115
115
  @@remembered_auth_lock = Mutex.new
116
116
  # Class variable to save current known good auth
117
- # uses a mutex to be threadsafe. sigh.
117
+ # uses a mutex to be threadsafe. sigh.
118
118
  def self.remembered_auth
119
- @@remembered_auth_lock.synchronize do
119
+ @@remembered_auth_lock.synchronize do
120
120
  @@remembered_auth
121
121
  end
122
122
  end
123
- # Set class variable with current known good auth.
124
- # uses a mutex to be threadsafe.
123
+ # Set class variable with current known good auth.
124
+ # uses a mutex to be threadsafe.
125
125
  def self.remembered_auth=(token)
126
126
  @@remembered_auth_lock.synchronize do
127
127
  @@remembered_auth = token
128
128
  end
129
129
  end
130
-
130
+
131
131
  # an object that includes some Rails helper modules for
132
- # text handling.
132
+ # text handling.
133
133
  def helper
134
- unless @helper
134
+ unless @helper ||= nil
135
135
  @helper = Object.new
136
136
  @helper.extend ActionView::Helpers::TextHelper # for truncate
137
137
  @helper.extend ActionView::Helpers::OutputSafetyHelper # for safe_join
138
138
  end
139
139
  return @helper
140
140
  end
141
-
142
-
141
+
142
+
143
143
  def self.required_configuration
144
144
  %w{user_id password profile}
145
145
  end
146
-
146
+
147
147
  # From config or args, args over-ride config
148
- def authenticated_end_user?(args)
148
+ def authenticated_end_user?(args)
149
149
  config = configuration.auth ? true : false
150
150
  arg = args[:auth]
151
151
  if ! arg.nil?
@@ -156,94 +156,94 @@ class BentoSearch::EdsEngine
156
156
  false
157
157
  end
158
158
  end
159
-
159
+
160
160
  def construct_search_url(args)
161
161
  query = "AND,"
162
162
  if args[:search_field]
163
163
  query += "#{args[:search_field]}:"
164
164
  end
165
165
  # Can't have any commas in query, it turns out, although
166
- # this is not documented.
166
+ # this is not documented.
167
167
  query += args[:query].gsub(",", " ")
168
-
168
+
169
169
  url = "#{configuration.base_url}search?view=detailed&query=#{CGI.escape query}"
170
-
170
+
171
171
  url += "&searchmode=#{CGI.escape configuration.search_mode}"
172
-
172
+
173
173
  url += "&highlight=#{configuration.highlighting ? 'y' : 'n' }"
174
-
174
+
175
175
  if args[:per_page]
176
176
  url += "&resultsperpage=#{args[:per_page]}"
177
177
  end
178
178
  if args[:page]
179
179
  url += "&pagenumber=#{args[:page]}"
180
180
  end
181
-
181
+
182
182
  if args[:sort]
183
183
  if (defn = self.sort_definitions[args[:sort]]) &&
184
184
  (value = defn[:implementation] )
185
185
  url += "&sort=#{CGI.escape value}"
186
186
  end
187
187
  end
188
-
188
+
189
189
  if configuration.only_source_types.present?
190
190
  # facetfilter=1,SourceType:Research Starters,SourceType:Books
191
191
  url += "&facetfilter=" + CGI.escape("1," + configuration.only_source_types.collect {|t| "SourceType:#{t}"}.join(","))
192
192
  end
193
-
194
-
193
+
194
+
195
195
  return url
196
196
  end
197
-
198
-
199
-
197
+
198
+
199
+
200
200
  def search_implementation(args)
201
201
  results = BentoSearch::Results.new
202
-
202
+
203
203
  end_user_auth = authenticated_end_user? args
204
-
204
+
205
205
  begin
206
206
  with_session(end_user_auth) do |session_token|
207
-
207
+
208
208
  url = construct_search_url(args)
209
-
210
-
211
-
209
+
210
+
211
+
212
212
  response = get_with_auth(url, session_token)
213
-
213
+
214
214
  results = BentoSearch::Results.new
215
-
216
- if (hits_node = at_xpath_text(response, "./SearchResponseMessageGet/SearchResult/Statistics/TotalHits"))
215
+
216
+ if (hits_node = at_xpath_text(response, "./SearchResponseMessageGet/SearchResult/Statistics/TotalHits"))
217
217
  results.total_items = hits_node.to_i
218
218
  end
219
-
219
+
220
220
  response.xpath("./SearchResponseMessageGet/SearchResult/Data/Records/Record").each do |record_xml|
221
221
  item = BentoSearch::ResultItem.new
222
-
222
+
223
223
  item.title = prepare_eds_payload( element_by_group(record_xml, "Ti"), true )
224
-
224
+
225
225
  # To get a unique id, we need to pull out db code and accession number
226
- # and combine em with colon, accession number is not unique by itself.
226
+ # and combine em with colon, accession number is not unique by itself.
227
227
  db = record_xml.at_xpath("./Header/DbId").try(:text)
228
228
  accession = record_xml.at_xpath("./Header/An").try(:text)
229
229
  if db && accession
230
230
  item.unique_id = "#{db}:#{accession}"
231
231
  end
232
-
233
-
232
+
233
+
234
234
  if item.title.nil? && ! end_user_auth
235
235
  item.title = I18n.translate("bento_search.eds.record_not_available")
236
236
  end
237
-
237
+
238
238
  item.abstract = prepare_eds_payload( element_by_group(record_xml, "Ab"), true )
239
239
 
240
240
  # Believe it or not, the authors are encoded as an escaped
241
241
  # XML-ish payload, that we need to parse again and get the
242
242
  # actual authors out of. WTF. Thanks for handling fragments
243
- # nokogiri.
243
+ # nokogiri.
244
244
  author_mess = element_by_group(record_xml, "Au")
245
245
  # only SOMETIMES does it have XML tags, other times it's straight text.
246
- # ARGH.
246
+ # ARGH.
247
247
  author_xml = Nokogiri::XML::fragment(author_mess)
248
248
  searchLinks = author_xml.xpath(".//searchLink")
249
249
  if searchLinks.size > 0
@@ -253,14 +253,14 @@ class BentoSearch::EdsEngine
253
253
  else
254
254
  item.authors << BentoSearch::Author.new(:display => author_xml.text)
255
255
  end
256
-
257
-
256
+
257
+
258
258
  # PLink is main inward facing EBSCO link, put it as
259
- # main link.
259
+ # main link.
260
260
  if direct_link = record_xml.at_xpath("./PLink")
261
261
  item.link = direct_link.text
262
262
  end
263
-
263
+
264
264
  # Other links may be found in CustomLinks, it seems like usually
265
265
  # there will be at least one, hopefully the first one is the OpenURL?
266
266
  record_xml.xpath("./CustomLinks/CustomLink").each do |custom_link|
@@ -269,51 +269,51 @@ class BentoSearch::EdsEngine
269
269
  :label => custom_link.at_xpath("./Name").text
270
270
  )
271
271
  end
272
-
272
+
273
273
  if (configuration.assume_first_custom_link_openurl &&
274
274
  (first = record_xml.xpath "./CustomLinks/CustomLink" ) &&
275
275
  (node = first.at_xpath "./Url" )
276
276
  )
277
-
277
+
278
278
  openurl = node.text
279
-
279
+
280
280
  index = openurl.index('?')
281
- item.openurl_kev_co = openurl.slice index..(openurl.length) if index
281
+ item.openurl_kev_co = openurl.slice index..(openurl.length) if index
282
282
  end
283
283
 
284
- # Format.
284
+ # Format.
285
285
  item.format_str = at_xpath_text record_xml, "./Header/PubType"
286
286
  # Can't find a list of possible PubTypes to see what's there to try
287
- # and map to our internal controlled vocab. oh wells.
288
-
289
-
290
-
287
+ # and map to our internal controlled vocab. oh wells.
288
+
289
+
290
+
291
291
  # We have a single blob of human-readable citation, that's also
292
292
  # littered with XML-ish tags we need to deal with. We'll save
293
293
  # it in a custom location, and use a custom Decorator to display
294
294
  # it. Sorry it's way too hard for us to preserve <highlight>
295
295
  # tags in this mess, they will be lost. Probably don't
296
- # need highlighting in source anyhow.
296
+ # need highlighting in source anyhow.
297
297
  citation_mess = element_by_group(record_xml, "Src")
298
298
  # Argh, but sometimes it's in SrcInfo _without_ tags instead
299
- if citation_mess
299
+ if citation_mess
300
300
  citation_txt = Nokogiri::XML::fragment(citation_mess).text
301
301
  # But strip off some "count of references" often on the end
302
- # which are confusing and useless.
302
+ # which are confusing and useless.
303
303
  item.custom_data["citation_blob"] = citation_txt.gsub(/ref +\d+ +ref\.$/, '')
304
304
  else
305
305
  # try another location
306
306
  item.custom_data["citation_blob"] = element_by_group(record_xml, "SrcInfo")
307
307
  end
308
-
309
-
308
+
309
+
310
310
  item.extend CitationMessDecorator
311
-
311
+
312
312
  results << item
313
- end
313
+ end
314
314
  end
315
-
316
- return results
315
+
316
+ return results
317
317
  rescue EdsCommException => e
318
318
  results.error ||= {}
319
319
  results.error[:exception] = e
@@ -321,137 +321,137 @@ class BentoSearch::EdsEngine
321
321
  results.error[:http_body] = e.http_body
322
322
  return results
323
323
  end
324
-
324
+
325
325
  end
326
-
326
+
327
327
  # Difficult to get individual elements out of an EDS XML <Record>
328
- # response, requires weird xpath, so we do it for you.
328
+ # response, requires weird xpath, so we do it for you.
329
329
  # element_by_group(nokogiri_element, "Ti")
330
330
  #
331
331
  # Returns string or nil
332
332
  def element_by_group(noko, group)
333
333
  at_xpath_text(noko, "./Items/Item[child::Group[text()='#{group}']]/Data")
334
334
  end
335
-
335
+
336
336
  # Wraps calls to the EDS api with CreateSession and EndSession requests
337
337
  # to EDS. Will pass sessionID in yield from block.
338
338
  #
339
339
  # Second optional arg is whether this is an authenticated user, else
340
- # guest access will be used.
340
+ # guest access will be used.
341
341
  #
342
342
  # with_session(true) do |session_token|
343
343
  # # can make more requests using session_token,
344
- # # EndSession will be called for you at end of block.
344
+ # # EndSession will be called for you at end of block.
345
345
  # end
346
346
  def with_session(auth = false, &block)
347
- auth_token = self.class.remembered_auth
347
+ auth_token = self.class.remembered_auth
348
348
  if auth_token.nil?
349
349
  auth_token = self.class.remembered_auth = get_auth_token
350
350
  end
351
-
352
-
353
- create_url = "#{configuration.base_url}createsession?profile=#{configuration.profile}&guest=#{auth ? 'n' : 'y'}"
354
- response_xml = get_with_auth(create_url)
355
-
351
+
352
+
353
+ create_url = "#{configuration.base_url}createsession?profile=#{configuration.profile}&guest=#{auth ? 'n' : 'y'}"
354
+ response_xml = get_with_auth(create_url)
355
+
356
356
  session_token = nil
357
- unless response_xml && (session_token = at_xpath_text(response_xml, "//SessionToken"))
358
- e = EdsCommException.new("Could not get SessionToken")
357
+ unless response_xml && (session_token = at_xpath_text(response_xml, "//SessionToken"))
358
+ e = EdsCommException.new("Could not get SessionToken")
359
359
  end
360
-
361
- begin
360
+
361
+ begin
362
362
  block.yield(session_token)
363
- ensure
364
- if auth_token && session_token
363
+ ensure
364
+ if auth_token && session_token
365
365
  end_url = "#{configuration.base_url}endsession?sessiontoken=#{CGI.escape session_token}"
366
- response_xml = get_with_auth(end_url)
366
+ response_xml = get_with_auth(end_url)
367
367
  end
368
368
  end
369
-
369
+
370
370
  end
371
-
372
- # if the xpath responds, return #text of it, else nil.
371
+
372
+ # if the xpath responds, return #text of it, else nil.
373
373
  def at_xpath_text(noko, xpath)
374
374
  node = noko.at_xpath(xpath)
375
-
375
+
376
376
  if node.nil?
377
377
  return node
378
378
  else
379
379
  return node.text
380
380
  end
381
381
  end
382
-
382
+
383
383
  # If EDS has put highlighting tags
384
384
  # in a field, we need to HTML escape the literal values,
385
385
  # while still using the highlighting tokens to put
386
386
  # HTML tags around highlighted terms.
387
387
  #
388
388
  # Second param, if to assume EDS literals are safe HTML, as they
389
- # seem to be.
389
+ # seem to be.
390
390
  def prepare_eds_payload(str, html_safe = false)
391
391
  return str if str.blank?
392
-
392
+
393
393
  unless configuration.highlighting
394
- str = str.html_safe if html_safe
394
+ str = str.html_safe if html_safe
395
395
  return str
396
396
  end
397
-
398
- parts =
397
+
398
+ parts =
399
399
  str.split(%r{(</?highlight>)}).collect do |substr|
400
400
  case substr
401
401
  when "<highlight>" then "<b class='bento_search_highlight'>".html_safe
402
402
  when "</highlight>" then "</b>".html_safe
403
- # Yes, EDS gives us HTML in the literals, we're choosing to trust it.
403
+ # Yes, EDS gives us HTML in the literals, we're choosing to trust it.
404
404
  else substr.html_safe
405
405
  end
406
406
  end
407
-
408
- return helper.safe_join(parts, '')
407
+
408
+ return helper.safe_join(parts, '')
409
409
  end
410
-
410
+
411
411
  # Give it a url pointing at EDS API.
412
- # Second arg must be a session_token if EDS request requires one.
413
- # It will
412
+ # Second arg must be a session_token if EDS request requires one.
413
+ # It will
414
414
  # * Make a GET request
415
415
  # * with memo-ized auth token added to headers
416
416
  # * for XML, with all namespaces removed!
417
417
  # * Parse JSON into a hash and return hash
418
418
  # * Try ONCE more to get if EBSCO says bad auth token
419
419
  # * Raise an EdsCommException if can't auth after second try,
420
- # or other error message, or JSON can't be parsed.
420
+ # or other error message, or JSON can't be parsed.
421
421
  def get_with_auth(url, session_token = nil)
422
422
  auth_token = self.class.remembered_auth
423
423
  unless auth_token
424
424
  auth_token = self.class.remembered_auth = get_auth_token
425
425
  end
426
-
426
+
427
427
  response = nil
428
428
  response_xml = nil
429
429
  caught_exception = nil
430
-
430
+
431
431
  begin
432
432
  headers = {AuthHeader => auth_token, 'Accept' => 'application/xml'}
433
433
  headers[SessionTokenHeader] = session_token if session_token
434
-
434
+
435
435
  s_time = Time.now
436
436
  response = http_client.get(url, nil, headers)
437
437
  Rails.logger.debug("EDS timing GET: #{Time.now - s_time}:#{url}")
438
-
438
+
439
439
  response_xml = Nokogiri::XML(response.body)
440
440
  response_xml.remove_namespaces!
441
-
441
+
442
442
  if (at_xpath_text(response_xml, "//ErrorNumber") == "104") || (at_xpath_text(response_xml, "//ErrorDescription") == "Auth Token Invalid")
443
443
  # bad auth, try again just ONCE
444
444
  Rails.logger.debug("EDS auth failed, getting auth again")
445
-
445
+
446
446
  headers[AuthHeader] = self.class.remembered_auth = get_auth_token
447
447
  response = http_client.get(url, nil, headers)
448
448
  response_xml = Nokogiri::XML(response.body)
449
- response_xml.remove_namespaces!
450
- end
451
- rescue TimeoutError, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError => e
449
+ response_xml.remove_namespaces!
450
+ end
451
+ rescue BentoSearch::RubyTimeoutClass, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError => e
452
452
  caught_exception = e
453
453
  end
454
-
454
+
455
455
  if response.nil? || response_xml.nil? || caught_exception || (! HTTP::Status.successful? response.status)
456
456
  exception = EdsCommException.new("Error fetching URL: #{caught_exception.message if caught_exception} : #{url}")
457
457
  if response
@@ -460,49 +460,49 @@ class BentoSearch::EdsEngine
460
460
  end
461
461
  raise exception
462
462
  end
463
-
463
+
464
464
  return response_xml
465
465
  end
466
-
467
-
468
- # Has to make an HTTP request to get EBSCO's auth token.
466
+
467
+
468
+ # Has to make an HTTP request to get EBSCO's auth token.
469
469
  # returns the auth token. We aren't bothering to keep
470
470
  # track of the expiration ourselves, can't neccesarily trust
471
- # it anyway.
471
+ # it anyway.
472
472
  #
473
- # Raises an EdsCommException on error.
474
- def get_auth_token
473
+ # Raises an EdsCommException on error.
474
+ def get_auth_token
475
475
  # Can't send params as form-encoded, actually need to send a JSON or XML
476
- # body, argh.
477
-
476
+ # body, argh.
477
+
478
478
  body = <<-EOS
479
479
  {
480
480
  "UserId":"#{configuration.user_id}",
481
481
  "Password":"#{configuration.password}"
482
482
  }
483
483
  EOS
484
-
484
+
485
485
  s_time = Time.now
486
486
  response = http_client.post(configuration.auth_url, body, {'Accept' => "application/json", "Content-type" => "application/json"})
487
- Rails.logger.debug("EDS timing AUTH: #{Time.now - s_time}s")
488
-
487
+ Rails.logger.debug("EDS timing AUTH: #{Time.now - s_time}s")
488
+
489
489
  unless HTTP::Status.successful? response.status
490
490
  raise EdsCommException.new("Could not get auth", response.status, response.body)
491
491
  end
492
-
492
+
493
493
  response_hash = nil
494
494
  begin
495
495
  response_hash = MultiJson.load response.body
496
496
  rescue MultiJson::DecodeError
497
497
  end
498
-
498
+
499
499
  unless response_hash.kind_of?(Hash) && response_hash.has_key?("AuthToken")
500
500
  raise EdsCommException.new("AuthToken not found in auth response", response.status, response.body)
501
501
  end
502
-
503
- return response_hash["AuthToken"]
502
+
503
+ return response_hash["AuthToken"]
504
504
  end
505
-
505
+
506
506
  def self.default_configuration
507
507
  {
508
508
  :auth_url => 'https://eds-api.ebscohost.com/authservice/rest/uidauth',
@@ -513,15 +513,15 @@ class BentoSearch::EdsEngine
513
513
  :search_mode => 'all' # any | bool | all | smart ; http://support.epnet.com/knowledge_base/detail.php?topic=996&id=1288&page=1
514
514
  }
515
515
  end
516
-
517
- def sort_definitions
518
- {
516
+
517
+ def sort_definitions
518
+ {
519
519
  "date_desc" => {:implementation => "date"},
520
520
  "relevance" => {:implementation => "relevance" }
521
521
  # "date_asc" => {:implementaiton => "date2"}
522
522
  }
523
523
  end
524
-
524
+
525
525
  def search_field_definitions
526
526
  {
527
527
  "TX" => {:semantic => :general},
@@ -534,11 +534,11 @@ class BentoSearch::EdsEngine
534
534
  "IB" => {:semantic => :isbn},
535
535
  }
536
536
  end
537
-
538
- # an exception talking to EDS api.
537
+
538
+ # an exception talking to EDS api.
539
539
  # there's a short reason in #message, but also
540
540
  # possibly an http_status and http_body copied
541
- # from error EDS response.
541
+ # from error EDS response.
542
542
  class EdsCommException < ::BentoSearch::FetchError
543
543
  attr_accessor :http_status, :http_body
544
544
  def initialize(message, status = nil, body = nil)
@@ -547,16 +547,16 @@ class BentoSearch::EdsEngine
547
547
  self.http_body = body
548
548
  end
549
549
  end
550
-
551
-
550
+
551
+
552
552
  # A built-in decorator alwasy applied, that over-rides
553
553
  # the ResultItem#published_in display method to use our mess blob
554
554
  # of human readable citation, since we don't have individual elements
555
- # to create it from in a normalized way.
555
+ # to create it from in a normalized way.
556
556
  module CitationMessDecorator
557
557
  def published_in
558
558
  custom_data["citation_blob"]
559
559
  end
560
560
  end
561
-
561
+
562
562
  end