bento_search 1.5.0 → 1.6.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -4,10 +4,10 @@ require 'http_client_patch/include_client'
4
4
  require 'json'
5
5
 
6
6
  module BentoSearch
7
- # DOAJ Articles search.
7
+ # DOAJ Articles search.
8
8
  # https://doaj.org/api/v1/docs
9
9
  #
10
- # Phrase searches with double quotes are respected.
10
+ # Phrase searches with double quotes are respected.
11
11
  #
12
12
  # Supports #get by unique_id feature
13
13
  #
@@ -36,7 +36,7 @@ module BentoSearch
36
36
  Rails.logger.debug("DoajEngine: requesting #{query_url}")
37
37
  response = http_client.get( query_url )
38
38
  json = JSON.parse(response.body)
39
- rescue TimeoutError, HTTPClient::TimeoutError,
39
+ rescue BentoSearch::RubyTimeoutClass, HTTPClient::TimeoutError,
40
40
  HTTPClient::ConfigurationError, HTTPClient::BadResponseError,
41
41
  JSON::ParserError => e
42
42
  results.error ||= {}
@@ -77,7 +77,7 @@ module BentoSearch
77
77
  def args_to_search_url(arguments)
78
78
  query = if arguments[:query].kind_of?(Hash)
79
79
  # multi-field query
80
- arguments[:query].collect {|field, query| fielded_query(query, field)}.join(" ")
80
+ arguments[:query].collect {|field, query_value| fielded_query(query_value, field)}.join(" ")
81
81
  else
82
82
  fielded_query(arguments[:query], arguments[:search_field])
83
83
  end
@@ -85,7 +85,7 @@ module BentoSearch
85
85
  # We need to escape this for going in a PATH component,
86
86
  # not a query. So space can't be "+", it needs to be "%20",
87
87
  # and indeed DOAJ API does not like "+".
88
- #
88
+ #
89
89
  # But neither CGI.escape nor URI.escape does quite
90
90
  # the right kind of escaping, seems to work out
91
91
  # if we do CGI.escape but then replace '+'
@@ -98,7 +98,7 @@ module BentoSearch
98
98
  if arguments[:per_page]
99
99
  query_args["pageSize"] = arguments[:per_page]
100
100
  end
101
-
101
+
102
102
  if arguments[:page]
103
103
  query_args["page"] = arguments[:page]
104
104
  end
@@ -115,14 +115,14 @@ module BentoSearch
115
115
  return url
116
116
  end
117
117
 
118
- # Prepares a DOAJ API (elastic search) query component for
118
+ # Prepares a DOAJ API (elastic search) query component for
119
119
  # given textual query in a given field (or default non-fielded search)
120
120
  #
121
121
  # Separates query string into tokens (bare words and phrases),
122
122
  # so they can each be made mandatory for ElasticSearch. Default
123
123
  # DOAJ API makes them all optional, with a very low mm, which
124
124
  # leads to low-precision odd looking results for standard use
125
- # cases.
125
+ # cases.
126
126
  #
127
127
  # Escapes all remaining special characters as literals (not including
128
128
  # double quotes which can be used for phrases, which are respected. )
@@ -133,7 +133,7 @@ module BentoSearch
133
133
  #
134
134
  # The "+" prefixed before field-name is to make sure all separate
135
135
  # fields are also mandatory when doing multi-field searches. It should
136
- # make no difference for a single-field search.
136
+ # make no difference for a single-field search.
137
137
  def fielded_query(query, field = nil)
138
138
  if field.present?
139
139
  "+#{field}:(#{prepare_mandatory_terms(query)})"
@@ -143,12 +143,12 @@ module BentoSearch
143
143
  end
144
144
 
145
145
  # Takes a query string, prepares an ElasticSearch query
146
- # doing what we want:
146
+ # doing what we want:
147
147
  # * tokenizes into bare words and double-quoted phrases
148
148
  # * Escapes other punctuation to be literal not ElasticSearch operator.
149
149
  # (Does NOT do URI escaping)
150
- # * Makes each token mandatory with an ElasticSearch "+" operator prefixed.
151
- def prepare_mandatory_terms(query)
150
+ # * Makes each token mandatory with an ElasticSearch "+" operator prefixed.
151
+ def prepare_mandatory_terms(query)
152
152
  # use string split with regex to too-cleverly split into space
153
153
  # seperated terms and phrases, keeping phrases as unit.
154
154
  terms = query.split %r{[[:space:]]+|("[^"]+")}
@@ -174,13 +174,13 @@ module BentoSearch
174
174
 
175
175
  item.start_page = bibjson["start_page"]
176
176
  item.end_page = bibjson["end_page"]
177
-
177
+
178
178
  item.year = bibjson["year"]
179
179
  if (year = bibjson["year"].to_i) && (month = bibjson["month"].to_i)
180
180
  if year != 0 && month != 0
181
181
  item.publication_date = Date.new(bibjson["year"].to_i, bibjson["month"].to_i)
182
182
  end
183
- end
183
+ end
184
184
 
185
185
  item.abstract = sanitize(bibjson["abstract"]) if bibjson.has_key?("abstract")
186
186
 
@@ -222,9 +222,9 @@ module BentoSearch
222
222
  # punctuation that needs to be escaped and how to escape (backslash)
223
223
  # for ES documented here: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html
224
224
  #
225
- # We do not escape double quotes, want to allow them for phrases.
225
+ # We do not escape double quotes, want to allow them for phrases.
226
226
  #
227
- # This method does NOT return URI-escaped, it returns literal, escaped for ES.
227
+ # This method does NOT return URI-escaped, it returns literal, escaped for ES.
228
228
  def escape_query(q)
229
229
  q.gsub(/([\+\-\=\&\|\>\<\!\(\)\{\}\[\]\^\~\*\?\:\\\/])/) {|m| "\\#{$1}"}
230
230
  end
@@ -242,7 +242,7 @@ module BentoSearch
242
242
  { nil => {:semantic => :general},
243
243
  "bibjson.title" => {:semantic => :title},
244
244
  # Using 'exact' seems to produce much better results for
245
- # author, don't entirely understand what's up.
245
+ # author, don't entirely understand what's up.
246
246
  "bibjson.author.name" => {:semantic => :author},
247
247
  "publisher" => {:semantic => :publisher},
248
248
  "bibjson.subject.term" => {:semantic => :subject},
@@ -263,7 +263,7 @@ module BentoSearch
263
263
 
264
264
  def sort_definitions
265
265
  # Don't believe DOAJ supports sorting by author
266
- {
266
+ {
267
267
  "relevance" => {:implementation => nil}, # default
268
268
  "title" => {:implementation => "title:asc"},
269
269
  # We don't quite have publication date sorting, but we'll use
@@ -276,4 +276,4 @@ module BentoSearch
276
276
  end
277
277
 
278
278
  end
279
- end
279
+ end
@@ -131,14 +131,14 @@ class BentoSearch::EbscoHostEngine
131
131
  url = query_url(args)
132
132
 
133
133
  Rails.logger.debug("EbscoHostEngine Search for: #{url}")
134
-
134
+
135
135
  results = BentoSearch::Results.new
136
136
  xml, response, exception = nil, nil, nil
137
137
 
138
138
  begin
139
139
  response = http_client.get(url)
140
140
  xml = Nokogiri::XML(response.body)
141
- rescue TimeoutError, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError => e
141
+ rescue BentoSearch::RubyTimeoutClass, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError => e
142
142
  exception = e
143
143
  end
144
144
  # error handle
@@ -361,7 +361,7 @@ class BentoSearch::EbscoHostEngine
361
361
  query = if args[:query].kind_of?(Hash)
362
362
  # multi-field query
363
363
  args[:query].collect {|field, query| fielded_query(query, field)}.join(" AND ")
364
- else
364
+ else
365
365
  fielded_query(args[:query], args[:search_field])
366
366
  end
367
367
 
@@ -7,25 +7,25 @@ require 'http_client_patch/include_client'
7
7
 
8
8
 
9
9
  #
10
- # For EBSCO Discovery Service. You will need a license to use.
10
+ # For EBSCO Discovery Service. You will need a license to use.
11
11
  #
12
12
  # == Required Configuration
13
13
  #
14
- # user_id, password: As given be EBSCO for access to EDS API (may be an admin account in ebscoadmin? Not sure).
14
+ # user_id, password: As given be EBSCO for access to EDS API (may be an admin account in ebscoadmin? Not sure).
15
15
  # profile: As given by EBSCO, might be "edsapi"?
16
16
  #
17
17
  # == Highlighting
18
18
  #
19
- # EDS has a query-in-context highlighting feature. It is used by defualt, set
20
- # config 'highlighting' to false to disable.
19
+ # EDS has a query-in-context highlighting feature. It is used by defualt, set
20
+ # config 'highlighting' to false to disable.
21
21
  # If turned on, you may get <b class="bento_search_highlight"> tags
22
- # in title and abstract output if it's on, marked html_safe.
22
+ # in title and abstract output if it's on, marked html_safe.
23
23
  #
24
24
  #
25
25
  # == Linking
26
26
  #
27
27
  # The link to record in EBSCO interface delivered as "PLink" will be listed
28
- # as record main link.
28
+ # as record main link.
29
29
  #
30
30
  # Any links listed under <CustomLinks> will be listed as other_links, using
31
31
  # configured name provided by EBSCO for CustomLink.
@@ -34,26 +34,26 @@ require 'http_client_patch/include_client'
34
34
  # ourselves. However, in our testing, the first/only CustomLink was an
35
35
  # an OpenURL. If configuration.assume_first_custom_link_openurl is
36
36
  # true (as is default), it will be used to create an OpenURL link. However, in
37
- # our testing, many records don't have this at all. **Note** You want
37
+ # our testing, many records don't have this at all. **Note** You want
38
38
  # to configure your profile so OpenURLs are ALWAYS included for all records, not
39
39
  # just records with no EBSCO fulltext, to ensure bento_search can get the
40
40
  # openurl. http://support.ebsco.com/knowledge_base/detail.php?id=1111 (May
41
- # have to ask EBSCO support for help, it's confusing!).
41
+ # have to ask EBSCO support for help, it's confusing!).
42
42
  #
43
43
  # TODO: May have to add configuration code to pull the OpenURL link out by
44
- # it's configured name or label, not assume first one is it.
44
+ # it's configured name or label, not assume first one is it.
45
45
  #
46
- # As always, you can customize links and other_links with Item Decorators.
46
+ # As always, you can customize links and other_links with Item Decorators.
47
47
  #
48
48
  # == Technical Notes and Difficulties
49
49
  #
50
50
  # This API is enormously difficult to work with. Also the response is very odd
51
51
  # to deal with and missing some key elements. We quite possibly got something
52
- # wrong or non-optimal in this implementation, but we did our best.
52
+ # wrong or non-optimal in this implementation, but we did our best.
53
53
  #
54
54
  # Auth issues may make this slow -- you need to spend a (not too speedy) HTTP
55
55
  # request making a session for every new end-user -- as we have no way to keep
56
- # track of end-users, we do it on every request in this implementation.
56
+ # track of end-users, we do it on every request in this implementation.
57
57
  #
58
58
  # Responses don't include much metadata -- we don't actually have journal title,
59
59
  # volume, issue, etc. We probably _could_ parse it out of the OpenURL that's
@@ -61,91 +61,91 @@ require 'http_client_patch/include_client'
61
61
  # Instead we're using the chunk of user-displayable citation/reference it does
62
62
  # give us (which is very difficult to parse into something usable already),
63
63
  # and a custom Decorator to display that instead of normalized citation
64
- # made from individual elements.
64
+ # made from individual elements.
65
65
  #
66
- # EBSCO says they plan to improve some of these issues in a September 2012 release.
66
+ # EBSCO says they plan to improve some of these issues in a September 2012 release.
67
67
  #
68
68
  # Title and abstract data seems to be HTML with tags and character entities and
69
- # escaped special chars. We're trusting it and passing it on as html_safe.
69
+ # escaped special chars. We're trusting it and passing it on as html_safe.
70
70
  #
71
71
  # Paging can only happen on even pages, with 'page' rather than 'start'. But
72
- # you can pass in 'start' to bento_search, it'll be converted to closest page.
72
+ # you can pass in 'start' to bento_search, it'll be converted to closest page.
73
73
  #
74
74
  # == Authenticated Users
75
75
  #
76
- # EDS allows searches by unauthenticated users, but the results come back with
76
+ # EDS allows searches by unauthenticated users, but the results come back with
77
77
  # weird blank hits. In such a case, the BentoSearch adapter will return
78
78
  # records with virtually no metadata, but a title e
79
79
  # (I18n at bento_search.eds.record_not_available ). Also no abstracts
80
- # are available from unauth search.
80
+ # are available from unauth search.
81
81
  #
82
82
  # By default the engine will search as 'guest' unauth user. But config
83
83
  # 'auth' key to true to force all searches to auth (if you are protecting your
84
- # app) or pass :auth => true as param into #search method.
84
+ # app) or pass :auth => true as param into #search method.
85
85
  #
86
86
  # == Source Types
87
87
  # # What the EBSCO 'source types' mean: http://suprpot.ebsco.com/knowledge_base/detail.php?id=5382
88
88
  #
89
- # But "Dissertations" not "Dissertations/Theses". "Music Scores" not "Music Score".
89
+ # But "Dissertations" not "Dissertations/Theses". "Music Scores" not "Music Score".
90
90
 
91
91
  #
92
92
  # == EDS docs:
93
- #
94
- # * Console App to demo requests: https://eds-api.ebscohost.com/Console
93
+ #
94
+ # * Console App to demo requests: https://eds-api.ebscohost.com/Console
95
95
  # * EDS Wiki: http://edswiki.ebscohost.com/EDS_API_Documentation
96
96
  # * You'll need to request an account to the EDS wiki, see: http://support.ebsco.com/knowledge_base/detail.php?id=5990
97
- #
97
+ #
98
98
 
99
99
  class BentoSearch::EdsEngine
100
100
  include BentoSearch::SearchEngine
101
-
101
+
102
102
  # Can't change http timeout in config, because we keep an http
103
- # client at class-wide level, and config is not class-wide.
104
- # Change this 'constant' if you want to change it, I guess.
103
+ # client at class-wide level, and config is not class-wide.
104
+ # Change this 'constant' if you want to change it, I guess.
105
105
  HttpTimeout = 4
106
- extend HTTPClientPatch::IncludeClient
106
+ extend HTTPClientPatch::IncludeClient
107
107
  include_http_client do |client|
108
108
  client.connect_timeout = client.send_timeout = client.receive_timeout = HttpTimeout
109
109
  end
110
-
110
+
111
111
  AuthHeader = "x-authenticationToken"
112
112
  SessionTokenHeader = "x-sessionToken"
113
113
 
114
114
  @@remembered_auth = nil
115
115
  @@remembered_auth_lock = Mutex.new
116
116
  # Class variable to save current known good auth
117
- # uses a mutex to be threadsafe. sigh.
117
+ # uses a mutex to be threadsafe. sigh.
118
118
  def self.remembered_auth
119
- @@remembered_auth_lock.synchronize do
119
+ @@remembered_auth_lock.synchronize do
120
120
  @@remembered_auth
121
121
  end
122
122
  end
123
- # Set class variable with current known good auth.
124
- # uses a mutex to be threadsafe.
123
+ # Set class variable with current known good auth.
124
+ # uses a mutex to be threadsafe.
125
125
  def self.remembered_auth=(token)
126
126
  @@remembered_auth_lock.synchronize do
127
127
  @@remembered_auth = token
128
128
  end
129
129
  end
130
-
130
+
131
131
  # an object that includes some Rails helper modules for
132
- # text handling.
132
+ # text handling.
133
133
  def helper
134
- unless @helper
134
+ unless @helper ||= nil
135
135
  @helper = Object.new
136
136
  @helper.extend ActionView::Helpers::TextHelper # for truncate
137
137
  @helper.extend ActionView::Helpers::OutputSafetyHelper # for safe_join
138
138
  end
139
139
  return @helper
140
140
  end
141
-
142
-
141
+
142
+
143
143
  def self.required_configuration
144
144
  %w{user_id password profile}
145
145
  end
146
-
146
+
147
147
  # From config or args, args over-ride config
148
- def authenticated_end_user?(args)
148
+ def authenticated_end_user?(args)
149
149
  config = configuration.auth ? true : false
150
150
  arg = args[:auth]
151
151
  if ! arg.nil?
@@ -156,94 +156,94 @@ class BentoSearch::EdsEngine
156
156
  false
157
157
  end
158
158
  end
159
-
159
+
160
160
  def construct_search_url(args)
161
161
  query = "AND,"
162
162
  if args[:search_field]
163
163
  query += "#{args[:search_field]}:"
164
164
  end
165
165
  # Can't have any commas in query, it turns out, although
166
- # this is not documented.
166
+ # this is not documented.
167
167
  query += args[:query].gsub(",", " ")
168
-
168
+
169
169
  url = "#{configuration.base_url}search?view=detailed&query=#{CGI.escape query}"
170
-
170
+
171
171
  url += "&searchmode=#{CGI.escape configuration.search_mode}"
172
-
172
+
173
173
  url += "&highlight=#{configuration.highlighting ? 'y' : 'n' }"
174
-
174
+
175
175
  if args[:per_page]
176
176
  url += "&resultsperpage=#{args[:per_page]}"
177
177
  end
178
178
  if args[:page]
179
179
  url += "&pagenumber=#{args[:page]}"
180
180
  end
181
-
181
+
182
182
  if args[:sort]
183
183
  if (defn = self.sort_definitions[args[:sort]]) &&
184
184
  (value = defn[:implementation] )
185
185
  url += "&sort=#{CGI.escape value}"
186
186
  end
187
187
  end
188
-
188
+
189
189
  if configuration.only_source_types.present?
190
190
  # facetfilter=1,SourceType:Research Starters,SourceType:Books
191
191
  url += "&facetfilter=" + CGI.escape("1," + configuration.only_source_types.collect {|t| "SourceType:#{t}"}.join(","))
192
192
  end
193
-
194
-
193
+
194
+
195
195
  return url
196
196
  end
197
-
198
-
199
-
197
+
198
+
199
+
200
200
  def search_implementation(args)
201
201
  results = BentoSearch::Results.new
202
-
202
+
203
203
  end_user_auth = authenticated_end_user? args
204
-
204
+
205
205
  begin
206
206
  with_session(end_user_auth) do |session_token|
207
-
207
+
208
208
  url = construct_search_url(args)
209
-
210
-
211
-
209
+
210
+
211
+
212
212
  response = get_with_auth(url, session_token)
213
-
213
+
214
214
  results = BentoSearch::Results.new
215
-
216
- if (hits_node = at_xpath_text(response, "./SearchResponseMessageGet/SearchResult/Statistics/TotalHits"))
215
+
216
+ if (hits_node = at_xpath_text(response, "./SearchResponseMessageGet/SearchResult/Statistics/TotalHits"))
217
217
  results.total_items = hits_node.to_i
218
218
  end
219
-
219
+
220
220
  response.xpath("./SearchResponseMessageGet/SearchResult/Data/Records/Record").each do |record_xml|
221
221
  item = BentoSearch::ResultItem.new
222
-
222
+
223
223
  item.title = prepare_eds_payload( element_by_group(record_xml, "Ti"), true )
224
-
224
+
225
225
  # To get a unique id, we need to pull out db code and accession number
226
- # and combine em with colon, accession number is not unique by itself.
226
+ # and combine em with colon, accession number is not unique by itself.
227
227
  db = record_xml.at_xpath("./Header/DbId").try(:text)
228
228
  accession = record_xml.at_xpath("./Header/An").try(:text)
229
229
  if db && accession
230
230
  item.unique_id = "#{db}:#{accession}"
231
231
  end
232
-
233
-
232
+
233
+
234
234
  if item.title.nil? && ! end_user_auth
235
235
  item.title = I18n.translate("bento_search.eds.record_not_available")
236
236
  end
237
-
237
+
238
238
  item.abstract = prepare_eds_payload( element_by_group(record_xml, "Ab"), true )
239
239
 
240
240
  # Believe it or not, the authors are encoded as an escaped
241
241
  # XML-ish payload, that we need to parse again and get the
242
242
  # actual authors out of. WTF. Thanks for handling fragments
243
- # nokogiri.
243
+ # nokogiri.
244
244
  author_mess = element_by_group(record_xml, "Au")
245
245
  # only SOMETIMES does it have XML tags, other times it's straight text.
246
- # ARGH.
246
+ # ARGH.
247
247
  author_xml = Nokogiri::XML::fragment(author_mess)
248
248
  searchLinks = author_xml.xpath(".//searchLink")
249
249
  if searchLinks.size > 0
@@ -253,14 +253,14 @@ class BentoSearch::EdsEngine
253
253
  else
254
254
  item.authors << BentoSearch::Author.new(:display => author_xml.text)
255
255
  end
256
-
257
-
256
+
257
+
258
258
  # PLink is main inward facing EBSCO link, put it as
259
- # main link.
259
+ # main link.
260
260
  if direct_link = record_xml.at_xpath("./PLink")
261
261
  item.link = direct_link.text
262
262
  end
263
-
263
+
264
264
  # Other links may be found in CustomLinks, it seems like usually
265
265
  # there will be at least one, hopefully the first one is the OpenURL?
266
266
  record_xml.xpath("./CustomLinks/CustomLink").each do |custom_link|
@@ -269,51 +269,51 @@ class BentoSearch::EdsEngine
269
269
  :label => custom_link.at_xpath("./Name").text
270
270
  )
271
271
  end
272
-
272
+
273
273
  if (configuration.assume_first_custom_link_openurl &&
274
274
  (first = record_xml.xpath "./CustomLinks/CustomLink" ) &&
275
275
  (node = first.at_xpath "./Url" )
276
276
  )
277
-
277
+
278
278
  openurl = node.text
279
-
279
+
280
280
  index = openurl.index('?')
281
- item.openurl_kev_co = openurl.slice index..(openurl.length) if index
281
+ item.openurl_kev_co = openurl.slice index..(openurl.length) if index
282
282
  end
283
283
 
284
- # Format.
284
+ # Format.
285
285
  item.format_str = at_xpath_text record_xml, "./Header/PubType"
286
286
  # Can't find a list of possible PubTypes to see what's there to try
287
- # and map to our internal controlled vocab. oh wells.
288
-
289
-
290
-
287
+ # and map to our internal controlled vocab. oh wells.
288
+
289
+
290
+
291
291
  # We have a single blob of human-readable citation, that's also
292
292
  # littered with XML-ish tags we need to deal with. We'll save
293
293
  # it in a custom location, and use a custom Decorator to display
294
294
  # it. Sorry it's way too hard for us to preserve <highlight>
295
295
  # tags in this mess, they will be lost. Probably don't
296
- # need highlighting in source anyhow.
296
+ # need highlighting in source anyhow.
297
297
  citation_mess = element_by_group(record_xml, "Src")
298
298
  # Argh, but sometimes it's in SrcInfo _without_ tags instead
299
- if citation_mess
299
+ if citation_mess
300
300
  citation_txt = Nokogiri::XML::fragment(citation_mess).text
301
301
  # But strip off some "count of references" often on the end
302
- # which are confusing and useless.
302
+ # which are confusing and useless.
303
303
  item.custom_data["citation_blob"] = citation_txt.gsub(/ref +\d+ +ref\.$/, '')
304
304
  else
305
305
  # try another location
306
306
  item.custom_data["citation_blob"] = element_by_group(record_xml, "SrcInfo")
307
307
  end
308
-
309
-
308
+
309
+
310
310
  item.extend CitationMessDecorator
311
-
311
+
312
312
  results << item
313
- end
313
+ end
314
314
  end
315
-
316
- return results
315
+
316
+ return results
317
317
  rescue EdsCommException => e
318
318
  results.error ||= {}
319
319
  results.error[:exception] = e
@@ -321,137 +321,137 @@ class BentoSearch::EdsEngine
321
321
  results.error[:http_body] = e.http_body
322
322
  return results
323
323
  end
324
-
324
+
325
325
  end
326
-
326
+
327
327
  # Difficult to get individual elements out of an EDS XML <Record>
328
- # response, requires weird xpath, so we do it for you.
328
+ # response, requires weird xpath, so we do it for you.
329
329
  # element_by_group(nokogiri_element, "Ti")
330
330
  #
331
331
  # Returns string or nil
332
332
  def element_by_group(noko, group)
333
333
  at_xpath_text(noko, "./Items/Item[child::Group[text()='#{group}']]/Data")
334
334
  end
335
-
335
+
336
336
  # Wraps calls to the EDS api with CreateSession and EndSession requests
337
337
  # to EDS. Will pass sessionID in yield from block.
338
338
  #
339
339
  # Second optional arg is whether this is an authenticated user, else
340
- # guest access will be used.
340
+ # guest access will be used.
341
341
  #
342
342
  # with_session(true) do |session_token|
343
343
  # # can make more requests using session_token,
344
- # # EndSession will be called for you at end of block.
344
+ # # EndSession will be called for you at end of block.
345
345
  # end
346
346
  def with_session(auth = false, &block)
347
- auth_token = self.class.remembered_auth
347
+ auth_token = self.class.remembered_auth
348
348
  if auth_token.nil?
349
349
  auth_token = self.class.remembered_auth = get_auth_token
350
350
  end
351
-
352
-
353
- create_url = "#{configuration.base_url}createsession?profile=#{configuration.profile}&guest=#{auth ? 'n' : 'y'}"
354
- response_xml = get_with_auth(create_url)
355
-
351
+
352
+
353
+ create_url = "#{configuration.base_url}createsession?profile=#{configuration.profile}&guest=#{auth ? 'n' : 'y'}"
354
+ response_xml = get_with_auth(create_url)
355
+
356
356
  session_token = nil
357
- unless response_xml && (session_token = at_xpath_text(response_xml, "//SessionToken"))
358
- e = EdsCommException.new("Could not get SessionToken")
357
+ unless response_xml && (session_token = at_xpath_text(response_xml, "//SessionToken"))
358
+ e = EdsCommException.new("Could not get SessionToken")
359
359
  end
360
-
361
- begin
360
+
361
+ begin
362
362
  block.yield(session_token)
363
- ensure
364
- if auth_token && session_token
363
+ ensure
364
+ if auth_token && session_token
365
365
  end_url = "#{configuration.base_url}endsession?sessiontoken=#{CGI.escape session_token}"
366
- response_xml = get_with_auth(end_url)
366
+ response_xml = get_with_auth(end_url)
367
367
  end
368
368
  end
369
-
369
+
370
370
  end
371
-
372
- # if the xpath responds, return #text of it, else nil.
371
+
372
+ # if the xpath responds, return #text of it, else nil.
373
373
  def at_xpath_text(noko, xpath)
374
374
  node = noko.at_xpath(xpath)
375
-
375
+
376
376
  if node.nil?
377
377
  return node
378
378
  else
379
379
  return node.text
380
380
  end
381
381
  end
382
-
382
+
383
383
  # If EDS has put highlighting tags
384
384
  # in a field, we need to HTML escape the literal values,
385
385
  # while still using the highlighting tokens to put
386
386
  # HTML tags around highlighted terms.
387
387
  #
388
388
  # Second param, if to assume EDS literals are safe HTML, as they
389
- # seem to be.
389
+ # seem to be.
390
390
  def prepare_eds_payload(str, html_safe = false)
391
391
  return str if str.blank?
392
-
392
+
393
393
  unless configuration.highlighting
394
- str = str.html_safe if html_safe
394
+ str = str.html_safe if html_safe
395
395
  return str
396
396
  end
397
-
398
- parts =
397
+
398
+ parts =
399
399
  str.split(%r{(</?highlight>)}).collect do |substr|
400
400
  case substr
401
401
  when "<highlight>" then "<b class='bento_search_highlight'>".html_safe
402
402
  when "</highlight>" then "</b>".html_safe
403
- # Yes, EDS gives us HTML in the literals, we're choosing to trust it.
403
+ # Yes, EDS gives us HTML in the literals, we're choosing to trust it.
404
404
  else substr.html_safe
405
405
  end
406
406
  end
407
-
408
- return helper.safe_join(parts, '')
407
+
408
+ return helper.safe_join(parts, '')
409
409
  end
410
-
410
+
411
411
  # Give it a url pointing at EDS API.
412
- # Second arg must be a session_token if EDS request requires one.
413
- # It will
412
+ # Second arg must be a session_token if EDS request requires one.
413
+ # It will
414
414
  # * Make a GET request
415
415
  # * with memo-ized auth token added to headers
416
416
  # * for XML, with all namespaces removed!
417
417
  # * Parse JSON into a hash and return hash
418
418
  # * Try ONCE more to get if EBSCO says bad auth token
419
419
  # * Raise an EdsCommException if can't auth after second try,
420
- # or other error message, or JSON can't be parsed.
420
+ # or other error message, or JSON can't be parsed.
421
421
  def get_with_auth(url, session_token = nil)
422
422
  auth_token = self.class.remembered_auth
423
423
  unless auth_token
424
424
  auth_token = self.class.remembered_auth = get_auth_token
425
425
  end
426
-
426
+
427
427
  response = nil
428
428
  response_xml = nil
429
429
  caught_exception = nil
430
-
430
+
431
431
  begin
432
432
  headers = {AuthHeader => auth_token, 'Accept' => 'application/xml'}
433
433
  headers[SessionTokenHeader] = session_token if session_token
434
-
434
+
435
435
  s_time = Time.now
436
436
  response = http_client.get(url, nil, headers)
437
437
  Rails.logger.debug("EDS timing GET: #{Time.now - s_time}:#{url}")
438
-
438
+
439
439
  response_xml = Nokogiri::XML(response.body)
440
440
  response_xml.remove_namespaces!
441
-
441
+
442
442
  if (at_xpath_text(response_xml, "//ErrorNumber") == "104") || (at_xpath_text(response_xml, "//ErrorDescription") == "Auth Token Invalid")
443
443
  # bad auth, try again just ONCE
444
444
  Rails.logger.debug("EDS auth failed, getting auth again")
445
-
445
+
446
446
  headers[AuthHeader] = self.class.remembered_auth = get_auth_token
447
447
  response = http_client.get(url, nil, headers)
448
448
  response_xml = Nokogiri::XML(response.body)
449
- response_xml.remove_namespaces!
450
- end
451
- rescue TimeoutError, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError => e
449
+ response_xml.remove_namespaces!
450
+ end
451
+ rescue BentoSearch::RubyTimeoutClass, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError => e
452
452
  caught_exception = e
453
453
  end
454
-
454
+
455
455
  if response.nil? || response_xml.nil? || caught_exception || (! HTTP::Status.successful? response.status)
456
456
  exception = EdsCommException.new("Error fetching URL: #{caught_exception.message if caught_exception} : #{url}")
457
457
  if response
@@ -460,49 +460,49 @@ class BentoSearch::EdsEngine
460
460
  end
461
461
  raise exception
462
462
  end
463
-
463
+
464
464
  return response_xml
465
465
  end
466
-
467
-
468
- # Has to make an HTTP request to get EBSCO's auth token.
466
+
467
+
468
+ # Has to make an HTTP request to get EBSCO's auth token.
469
469
  # returns the auth token. We aren't bothering to keep
470
470
  # track of the expiration ourselves, can't neccesarily trust
471
- # it anyway.
471
+ # it anyway.
472
472
  #
473
- # Raises an EdsCommException on error.
474
- def get_auth_token
473
+ # Raises an EdsCommException on error.
474
+ def get_auth_token
475
475
  # Can't send params as form-encoded, actually need to send a JSON or XML
476
- # body, argh.
477
-
476
+ # body, argh.
477
+
478
478
  body = <<-EOS
479
479
  {
480
480
  "UserId":"#{configuration.user_id}",
481
481
  "Password":"#{configuration.password}"
482
482
  }
483
483
  EOS
484
-
484
+
485
485
  s_time = Time.now
486
486
  response = http_client.post(configuration.auth_url, body, {'Accept' => "application/json", "Content-type" => "application/json"})
487
- Rails.logger.debug("EDS timing AUTH: #{Time.now - s_time}s")
488
-
487
+ Rails.logger.debug("EDS timing AUTH: #{Time.now - s_time}s")
488
+
489
489
  unless HTTP::Status.successful? response.status
490
490
  raise EdsCommException.new("Could not get auth", response.status, response.body)
491
491
  end
492
-
492
+
493
493
  response_hash = nil
494
494
  begin
495
495
  response_hash = MultiJson.load response.body
496
496
  rescue MultiJson::DecodeError
497
497
  end
498
-
498
+
499
499
  unless response_hash.kind_of?(Hash) && response_hash.has_key?("AuthToken")
500
500
  raise EdsCommException.new("AuthToken not found in auth response", response.status, response.body)
501
501
  end
502
-
503
- return response_hash["AuthToken"]
502
+
503
+ return response_hash["AuthToken"]
504
504
  end
505
-
505
+
506
506
  def self.default_configuration
507
507
  {
508
508
  :auth_url => 'https://eds-api.ebscohost.com/authservice/rest/uidauth',
@@ -513,15 +513,15 @@ class BentoSearch::EdsEngine
513
513
  :search_mode => 'all' # any | bool | all | smart ; http://support.epnet.com/knowledge_base/detail.php?topic=996&id=1288&page=1
514
514
  }
515
515
  end
516
-
517
- def sort_definitions
518
- {
516
+
517
+ def sort_definitions
518
+ {
519
519
  "date_desc" => {:implementation => "date"},
520
520
  "relevance" => {:implementation => "relevance" }
521
521
  # "date_asc" => {:implementaiton => "date2"}
522
522
  }
523
523
  end
524
-
524
+
525
525
  def search_field_definitions
526
526
  {
527
527
  "TX" => {:semantic => :general},
@@ -534,11 +534,11 @@ class BentoSearch::EdsEngine
534
534
  "IB" => {:semantic => :isbn},
535
535
  }
536
536
  end
537
-
538
- # an exception talking to EDS api.
537
+
538
+ # an exception talking to EDS api.
539
539
  # there's a short reason in #message, but also
540
540
  # possibly an http_status and http_body copied
541
- # from error EDS response.
541
+ # from error EDS response.
542
542
  class EdsCommException < ::BentoSearch::FetchError
543
543
  attr_accessor :http_status, :http_body
544
544
  def initialize(message, status = nil, body = nil)
@@ -547,16 +547,16 @@ class BentoSearch::EdsEngine
547
547
  self.http_body = body
548
548
  end
549
549
  end
550
-
551
-
550
+
551
+
552
552
  # A built-in decorator alwasy applied, that over-rides
553
553
  # the ResultItem#published_in display method to use our mess blob
554
554
  # of human readable citation, since we don't have individual elements
555
- # to create it from in a normalized way.
555
+ # to create it from in a normalized way.
556
556
  module CitationMessDecorator
557
557
  def published_in
558
558
  custom_data["citation_blob"]
559
559
  end
560
560
  end
561
-
561
+
562
562
  end