bento_search 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. checksums.yaml +15 -0
  2. data/README.md +92 -90
  3. data/app/item_decorators/bento_search/decorator_base.rb +9 -6
  4. data/app/item_decorators/bento_search/standard_decorator.rb +24 -0
  5. data/app/search_engines/bento_search/ebsco_host_engine.rb +180 -179
  6. data/app/search_engines/bento_search/journal_tocs_for_journal.rb +179 -0
  7. data/app/views/bento_search/_std_item.html.erb +4 -4
  8. data/lib/bento_search/version.rb +1 -1
  9. data/test/decorator/decorator_base_test.rb +11 -1
  10. data/test/decorator/standard_decorator_test.rb +21 -0
  11. data/test/dummy/log/development.log +2 -0
  12. data/test/dummy/log/test.log +22324 -0
  13. data/test/{unit → search_engines}/ebsco_host_engine_test.rb +148 -130
  14. data/test/{unit → search_engines}/eds_engine_test.rb +0 -0
  15. data/test/{unit → search_engines}/google_books_engine_test.rb +0 -0
  16. data/test/{unit → search_engines}/google_site_search_test.rb +0 -0
  17. data/test/search_engines/journal_tocs_for_journal_test.rb +93 -0
  18. data/test/{unit → search_engines}/primo_engine_test.rb +0 -0
  19. data/test/{unit → search_engines}/scopus_engine_test.rb +0 -0
  20. data/test/{unit → search_engines}/search_engine_base_test.rb +0 -0
  21. data/test/{unit → search_engines}/search_engine_test.rb +0 -0
  22. data/test/{unit → search_engines}/summon_engine_test.rb +0 -0
  23. data/test/{unit → search_engines}/worldcat_sru_dc_engine_test.rb +0 -0
  24. data/test/{unit → search_engines}/xerxes_engine_test.rb +0 -0
  25. data/test/vcr_cassettes/ebscohost/RILM_record_with_ISSN_in__jid__element.yml +210 -0
  26. data/test/vcr_cassettes/journal_tocs/empty_results_on_bad_ISSN.yml +49 -0
  27. data/test/vcr_cassettes/journal_tocs/error_on_bad_registered_email.yml +41 -0
  28. data/test/vcr_cassettes/journal_tocs/error_on_error_response.yml +51 -0
  29. data/test/vcr_cassettes/journal_tocs/fetch_xml_with_hits.yml +328 -0
  30. data/test/vcr_cassettes/journal_tocs/fills_out_metadata.yml +396 -0
  31. data/test/vcr_cassettes/journal_tocs/smoke_test.yml +328 -0
  32. metadata +62 -61
@@ -50,18 +50,21 @@ module BentoSearch
50
50
  end
51
51
 
52
52
  # Applies decorator to item and returns decorated item.
53
- # uses standard logic to look up which decorator to apply or
54
- # applies default one. The point of this method is just that
55
- # standard logic.
53
+ # Will decide what decorator to apply based on String class name
54
+ # in item.decorator, or else apply StandardDecorator. The point of
55
+ # this method is just that logic, nothing else special.
56
56
  #
57
57
  # Need to pass a Rails ActionView::Context in, to use to
58
58
  # initialize decorator. In Rails, in most places you can
59
59
  # get one of those from #view_context. In helpers/views
60
60
  # you can also use `self`.
61
61
  def self.decorate(item, view_context)
62
- # What decorator class? If specified as string in #decorator,
63
- # look it up as a class object, else default.
64
- decorator_class = item.decorator.try {|name| BentoSearch::Util.constantize(name) } || BentoSearch::StandardDecorator
62
+ # What decorator class? Specified in #decorator as a String,
63
+ # we intentionally do not allow an actual class constant, to
64
+ # maintain problem-free serialization of ItemResults and configuration.
65
+ decorator_class = item.decorator.try do |arg|
66
+ BentoSearch::Util.constantize(arg.to_s)
67
+ end || BentoSearch::StandardDecorator
65
68
 
66
69
  return decorator_class.new(item, view_context)
67
70
  end
@@ -149,6 +149,30 @@ module BentoSearch
149
149
  return value.blank? ? nil : value
150
150
  end
151
151
 
152
+ # outputs a date for display, from #publication_date or #year.
153
+ # Uses it's own logic to decide whether to output entire date or just
154
+ # year, if it has a complete date. (If volume and issue are present,
155
+ # just year).
156
+ #
157
+ # Over-ride in a decorator if you want to always or never or different
158
+ # logic for complete date. Or if you want to change the format of the date,
159
+ # etc.
160
+ def display_date
161
+ if self.publication_date
162
+ if self.volume && self.issue
163
+ # just the year, ma'am
164
+ I18n.localize(self.publication_date, :format => "%Y")
165
+ else
166
+ # whole date, since we got it
167
+ I18n.localize(self.publication_date, :format => "%d %b %Y")
168
+ end
169
+ elsif self.year
170
+ self.year.to_s
171
+ else
172
+ nil
173
+ end
174
+ end
175
+
152
176
  # A unique opaque identifier for a record may sometimes be
153
177
  # required, for instance in Atom.
154
178
  #
@@ -5,7 +5,7 @@ require 'nokogiri'
5
5
  require 'http_client_patch/include_client'
6
6
  require 'httpclient'
7
7
 
8
- # Right now for EbscoHost API (Ebsco Integration Toolkit/EIT),
8
+ # Right now for EbscoHost API (Ebsco Integration Toolkit/EIT),
9
9
  # may be expanded or refactored for EDS too.
10
10
  #
11
11
  # == Required Configuration
@@ -22,28 +22,28 @@ require 'httpclient'
22
22
  #
23
23
  # [:peer_reviewed_only] Set to boolean true or string 'true', to restrict
24
24
  # results to peer-reviewed only. (Or ask EBSCOHost
25
- # api to do so, what we get is what we get).
25
+ # api to do so, what we get is what we get).
26
26
  # [:pubyear_start]
27
27
  # [:pubyear_end] Date range limiting, pass in custom search args,
28
28
  # one or both of pubyear_start and pubyear_end
29
- # #to_i will be called on it, so can be string.
29
+ # #to_i will be called on it, so can be string.
30
30
  # .search(:query => "foo", :pubyear_start => 2000)
31
31
  # [:databases] List of licensed EBSCO dbs to search, can override
32
- # list set in config databases, just for this search.
33
- #
32
+ # list set in config databases, just for this search.
33
+ #
34
34
  # == Custom response data
35
- #
36
- # Iff EBSCO API reports that fulltext is available for the hit, then
35
+ #
36
+ # Iff EBSCO API reports that fulltext is available for the hit, then
37
37
  # result.custom_data["fulltext_formats"] will be non-nil, and will be an array of
38
38
  # one or more of EBSCO's internal codes (P=PDF, T=HTML, C=HTML+Images). If
39
39
  # no fulltext is avail according to EBSCO API, result.custom_data["fulltext_formats"]
40
- # will be nil.
40
+ # will be nil.
41
41
  #
42
42
  # #link_is_fulltext also set to true/false
43
43
  #
44
44
  # You can use this to, for instance, hyperlink the displayed title directly
45
45
  # to record on EBSCO if and only if there's fulltext. By writing a custom
46
- # decorator. See wiki on decorators.
46
+ # decorator. See wiki on decorators.
47
47
  #
48
48
  # == Limitations
49
49
  # We do set language of ResultItems based on what ebsco tells us, but ebsoc
@@ -56,12 +56,12 @@ require 'httpclient'
56
56
  # EBSCO that you want included in the search. You can't just say "all of them"
57
57
  # the api doesn't support that, and also more than 30 or 40 starts getting
58
58
  # horribly slow. If you include a db you do not have access to, EBSCO api
59
- # fatal errors.
59
+ # fatal errors.
60
60
  #
61
61
  # You may want to make sure all your licensed databases are included
62
62
  # in your EIT profile. Log onto ebscoadmin, Customize Services, choose
63
- # EIT profile, choose 'databases' tag.
64
- #
63
+ # EIT profile, choose 'databases' tag.
64
+ #
65
65
  # === Download databases from EBSCO api
66
66
  #
67
67
  # We include a utility to download ALL activated databases for EIT profile
@@ -69,12 +69,12 @@ require 'httpclient'
69
69
  # file as a starting point, and edit by hand:
70
70
  #
71
71
  # First configure your EBSCO search engine with bento_search, say under
72
- # key 'ebscohost'.
72
+ # key 'ebscohost'.
73
73
  #
74
74
  # Then run:
75
75
  # rails generate bento_search:pull_ebsco_dbs ebscohost
76
76
  #
77
- # assuming 'ebscohost' is the key you registered the EBSCO search engine.
77
+ # assuming 'ebscohost' is the key you registered the EBSCO search engine.
78
78
  #
79
79
  # This will create a file at ./config/ebsco_dbs.rb. You may want to hand
80
80
  # edit it. Then, in your bento search config, you can:
@@ -85,7 +85,7 @@ require 'httpclient'
85
85
  # conf.databases = $ebsco_dbs
86
86
  # end
87
87
  #
88
- # == Vendor documentation
88
+ # == Vendor documentation
89
89
  #
90
90
  # Vendor documentation is a bit scattered, main page:
91
91
  # * http://support.ebsco.com/eit/ws.php
@@ -93,10 +93,10 @@ require 'httpclient'
93
93
  # * http://support.ebsco.com/eit/ws_faq.php
94
94
  # * search syntax examples: http://support.ebsco.com/eit/ws_howto_queries.php
95
95
  # * Try construct a query: http://eit.ebscohost.com/Pages/MethodDescription.aspx?service=/Services/SearchService.asmx&method=Search
96
- # * The 'info' service can be used to see what databases you have access to.
96
+ # * The 'info' service can be used to see what databases you have access to.
97
97
  # * DTD of XML Response, hard to interpret but all we've got: http://support.ebsco.com/eit/docs/DTD_EIT_WS_searchResponse.zip
98
98
  #
99
- # Hard to find docs page on embedding EBSCO limiters (like peer reviewed only "RV Y") in search query:
99
+ # Hard to find docs page on embedding EBSCO limiters (like peer reviewed only "RV Y") in search query:
100
100
  # http://support.epnet.com/knowledge_base/detail.php?id=5397
101
101
  #
102
102
  # EBSCO searchable support portal has a section for the EIT api we use here:
@@ -104,20 +104,20 @@ require 'httpclient'
104
104
 
105
105
  class BentoSearch::EbscoHostEngine
106
106
  include BentoSearch::SearchEngine
107
-
107
+
108
108
  # Can't change http timeout in config, because we keep an http
109
- # client at class-wide level, and config is not class-wide.
109
+ # client at class-wide level, and config is not class-wide.
110
110
  # Change this 'constant' if you want to change it, I guess.
111
111
  #
112
112
  # In some tests we did, 5.2s was 95th percentile slowest, but in
113
113
  # actual percentage 5.2s is still timing out way too many requests,
114
- # let's try 6.3, why not.
115
- HttpTimeout = 6.3
116
- extend HTTPClientPatch::IncludeClient
114
+ # let's try 6.3, why not.
115
+ HttpTimeout = 6.3
116
+ extend HTTPClientPatch::IncludeClient
117
117
  include_http_client do |client|
118
118
  client.connect_timeout = client.send_timeout = client.receive_timeout = HttpTimeout
119
119
  end
120
-
120
+
121
121
  # Include some rails helpers, text_helper.trucate
122
122
  def text_helper
123
123
  @@truncate ||= begin
@@ -126,10 +126,10 @@ class BentoSearch::EbscoHostEngine
126
126
  o
127
127
  end
128
128
  end
129
-
129
+
130
130
  def search_implementation(args)
131
131
  url = query_url(args)
132
-
132
+
133
133
  results = BentoSearch::Results.new
134
134
  xml, response, exception = nil, nil, nil
135
135
 
@@ -137,85 +137,85 @@ class BentoSearch::EbscoHostEngine
137
137
  response = http_client.get(url)
138
138
  xml = Nokogiri::XML(response.body)
139
139
  rescue TimeoutError, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError => e
140
- exception = e
140
+ exception = e
141
141
  end
142
142
  # error handle
143
- if ( response.nil? ||
144
- xml.nil? ||
145
- exception ||
143
+ if ( response.nil? ||
144
+ xml.nil? ||
145
+ exception ||
146
146
  (! HTTP::Status.successful? response.status) ||
147
147
  (fault = xml.at_xpath("./Fault")))
148
-
148
+
149
149
  results.error ||= {}
150
150
  results.error[:api_url] = url
151
151
  results.error[:exception] = exception if exception
152
152
  results.error[:status] = response.status if response
153
-
153
+
154
154
  if fault
155
155
  results.error[:error_info] = text_if_present fault.at_xpath("./Message")
156
156
  end
157
-
157
+
158
158
  return results
159
159
  end
160
-
161
-
162
-
160
+
161
+
162
+
163
163
  # the namespaces they provide are weird and don't help and sometimes
164
164
  # not clearly even legal. Remove em!
165
165
  xml.remove_namespaces!
166
-
166
+
167
167
  results.total_items = xml.at_xpath("./searchResponse/Hits").text.to_i
168
-
168
+
169
169
  xml.xpath("./searchResponse/SearchResults/records/rec").each do |xml_rec|
170
170
  results << item_from_xml( xml_rec )
171
171
  end
172
-
172
+
173
173
  return results
174
-
174
+
175
175
  end
176
-
176
+
177
177
  # Method to get a single record by "identifier" string, which is really
178
178
  # a combined "db:id" string, same string that would be returned by
179
179
  # an individual item.identifier
180
180
  #
181
181
  # Returns an individual BentoSearch::Result, or raises an exception.
182
- # Can raise BentoSearch::NotFound, BentoSearch::TooManyFound, or
182
+ # Can raise BentoSearch::NotFound, BentoSearch::TooManyFound, or
183
183
  # any other weird random exception caused by problems fetching (network
184
184
  # error etc. Is it bad that we don't wrap these in an expected single
185
185
  # exception type? Should we?)
186
186
  def get(id)
187
- # split on first colon only.
187
+ # split on first colon only.
188
188
  id =~ /^([^:]+)\:(.*)$/
189
189
  db = $1 ; an = $2
190
-
190
+
191
191
  raise ArgumentError.new("EbscoHostEngine#get requires an id with a colon, like `a9h:12345`. Instead, we got #{id}") unless db && an
192
-
192
+
193
193
  # "AN" search_field is not listed in our search_field_definitions,
194
194
  # but it is an internal EBSCOHost search index on 'accession number'
195
-
195
+
196
196
  results = search(an, :search_field => "AN", :databases => [db])
197
-
197
+
198
198
  raise (results.error[:exception] || Exception.new) if results.failed?
199
199
  raise BentoSearch::NotFound.new("For id: #{id}") if results.length == 0
200
200
  raise BentoSearch::TooManyFound.new("For id: #{id}") if results.length > 1
201
-
201
+
202
202
  return results.first
203
203
  end
204
-
205
- # pass in nokogiri record xml for the records/rec node.
206
- # Returns nil if NO fulltext is avail on ebsco platform,
204
+
205
+ # pass in nokogiri record xml for the records/rec node.
206
+ # Returns nil if NO fulltext is avail on ebsco platform,
207
207
  # non-nil if fulltext is available. Non-nil value will
208
- # actually be a non-empty ARRAY of internal EBSCO codes, P=PDF, T=HTML, C=HTML with images.
208
+ # actually be a non-empty ARRAY of internal EBSCO codes, P=PDF, T=HTML, C=HTML with images.
209
209
  # http://support.epnet.com/knowledge_base/detail.php?topic=996&id=3778&page=1
210
210
  def fulltext_formats(record_xml)
211
211
  fulltext_formats = record_xml.xpath("./header/controlInfo/artinfo/formats/fmt/@type").collect {|n| n.text }
212
-
212
+
213
213
  return nil if fulltext_formats.empty?
214
-
215
- return fulltext_formats
214
+
215
+ return fulltext_formats
216
216
  end
217
-
218
-
217
+
218
+
219
219
  # Pass in a nokogiri node, return node.text, or nil if
220
220
  # arg was nil or node.text was blank?
221
221
  def text_if_present(node)
@@ -223,16 +223,16 @@ class BentoSearch::EbscoHostEngine
223
223
  nil
224
224
  else
225
225
  node.text
226
- end
226
+ end
227
227
  end
228
-
229
- # Figure out proper controlled format for an ebsco item.
228
+
229
+ # Figure out proper controlled format for an ebsco item.
230
230
  # EBSCOHost (not sure about EDS) publication/document type
231
231
  # are totally unusable non-normalized vocabulary for controlled
232
- # types, we'll try to guess from other metadata features.
232
+ # types, we'll try to guess from other metadata features.
233
233
  def sniff_format(xml_node)
234
234
  return nil if xml_node.nil?
235
-
235
+
236
236
  if xml_node.at_xpath("./dissinfo/*")
237
237
  :dissertation
238
238
  elsif xml_node.at_xpath("./jinfo/*") && xml_node.at_xpath("./artinfo/*")
@@ -246,35 +246,35 @@ class BentoSearch::EbscoHostEngine
246
246
  # pathological case of book_item, if it has a bkinfo and an artinfo
247
247
  # but the titles in both sections MATCH, it's just a book. If they're
248
248
  # differnet, it's a book section, bah@
249
- :book_item
249
+ :book_item
250
250
  elsif xml_node.at_xpath("./bkinfo/*")
251
251
  "Book"
252
252
  elsif xml_node.at_xpath("./jinfo/*")
253
253
  :serial
254
254
  else
255
255
  nil
256
- end
256
+ end
257
257
  end
258
-
258
+
259
259
  # Figure out uncontrolled literal string format to show to users.
260
260
  # We're going to try combining Ebsco Publication Type and Document Type,
261
- # when both are present. Then a few hard-coded special transformations.
262
- def sniff_format_str(xml_node)
261
+ # when both are present. Then a few hard-coded special transformations.
262
+ def sniff_format_str(xml_node)
263
263
  pubtype = text_if_present( xml_node.at_xpath("./artinfo/pubtype") )
264
264
  doctype = text_if_present( xml_node.at_xpath("./artinfo/doctype") )
265
-
265
+
266
266
  components = []
267
267
  components.push pubtype
268
268
  components.push doctype unless doctype == pubtype
269
-
269
+
270
270
  components.compact!
271
-
271
+
272
272
  components = components.collect {|a| a.titlecase if a}
273
273
  components.uniq! # no need to have the same thing twice
274
274
 
275
-
275
+
276
276
  # some hard-coded cases for better user-displayable string, and other
277
- # normalization.
277
+ # normalization.
278
278
  if ["Academic Journal", "Journal"].include?(components.first) && ["Article", "Journal Article"].include?(components.last)
279
279
  return "Journal Article"
280
280
  elsif components.last == "Book: Monograph"
@@ -290,251 +290,252 @@ class BentoSearch::EbscoHostEngine
290
290
  # first is strict substring, don't need it
291
291
  return components.last
292
292
  end
293
-
294
-
295
-
293
+
294
+
295
+
296
296
  return components.join(": ")
297
297
  end
298
-
298
+
299
299
  # pass in <rec> nokogiri, will determine best link
300
300
  def get_link(xml)
301
301
  text_if_present(xml.at_xpath("./pdfLink")) || text_if_present(xml.at_xpath("./plink") )
302
302
  end
303
-
304
-
305
- # escape or replace special chars to ebsco
303
+
304
+
305
+ # escape or replace special chars to ebsco
306
306
  def ebsco_query_escape(txt)
307
307
  # it's unclear if ebsco API actually allows escaping of special chars,
308
308
  # or what the special chars are. But we know parens are special, can't
309
309
  # escape em, we'll just remove em (should not effect search).
310
-
310
+
311
311
  # undocumented but question mark seems to cause a problem for ebsco,
312
- # even inside quoted phrases, not sure why.
313
- txt = txt.gsub(/[)(\?]/, ' ')
314
-
312
+ # even inside quoted phrases, not sure why. Square brackets too.
313
+ txt = txt.gsub(/[)(\?\[\]]/, ' ')
314
+
315
315
  # 'and' and 'or' need to be in phrase quotes to avoid being
316
316
  # interpreted as boolean. For instance, when people just
317
317
  # paste in a title: << A strategy for decreasing anxiety of ICU transfer patients and their families >>
318
318
  # You'd think 'and' as boolean would still work there, but it resulted
319
319
  # in zero hits unless quoted, I dunno. lowercase and uppercase and/or/not
320
- # both cause observed weirdness.
320
+ # both cause observed weirdness.
321
321
  if ['and', 'or', 'not'].include?( txt.downcase )
322
322
  txt = %Q{"#{txt}"}
323
- end
324
-
323
+ end
324
+
325
325
  return txt
326
326
  end
327
-
327
+
328
328
  # Actually turn the user's query into an EBSCO "AND" boolean query,
329
329
  # seems only way to get decent results where terms can match cross-fields
330
- # at the moment, for EIT. We'll see for EDS.
330
+ # at the moment, for EIT. We'll see for EDS.
331
331
  def ebsco_query_prepare(txt)
332
332
  # use string split with regex cleverly to split into space
333
- # seperated terms and phrases, keeping phrases as unit.
333
+ # seperated terms and phrases, keeping phrases as unit.
334
334
  terms = txt.split %r{[[:space:]]+|("[^"]+")}
335
335
 
336
336
  # Remove parens in non-phrase-quoted terms
337
- terms = terms.collect do |t|
338
- ebsco_query_escape(t)
337
+ terms = terms.collect do |t|
338
+ ebsco_query_escape(t)
339
339
  end
340
-
340
+
341
341
 
342
342
  # Remove empty strings. Remove terms that are solely punctuation
343
- # without any letters.
343
+ # without any letters.
344
344
  terms.delete_if do |term|
345
- (
346
- term.blank? ||
345
+ (
346
+ term.blank? ||
347
347
  term =~ /\A[^[[:alnum:]]]+\Z/
348
348
  )
349
349
  end
350
-
351
- terms.join(" AND ")
350
+
351
+ terms.join(" AND ")
352
352
  end
353
-
353
+
354
354
  def query_url(args)
355
-
356
- url =
355
+
356
+ url =
357
357
  "#{configuration.base_url}/Search?prof=#{configuration.profile_id}&pwd=#{configuration.profile_password}"
358
-
359
- query = ebsco_query_prepare args[:query]
360
-
361
-
358
+
359
+ query = ebsco_query_prepare args[:query]
360
+
361
+
362
362
  # wrap in (FI $query) if fielded search
363
363
  if args[:search_field]
364
364
  query = "(#{args[:search_field]} #{query})"
365
365
  end
366
-
366
+
367
367
  # peer-reviewed only?
368
368
  if [true, "true"].include? args[:peer_reviewed_only]
369
369
  query += " AND (RV Y)"
370
370
  end
371
-
371
+
372
372
  if args[:pubyear_start] || args[:pubyear_end]
373
- from = args[:pubyear_start].to_i
373
+ from = args[:pubyear_start].to_i
374
374
  from = nil if from == 0
375
-
376
- to = args[:pubyear_end].to_i
375
+
376
+ to = args[:pubyear_end].to_i
377
377
  to = nil if to == 0
378
-
378
+
379
379
  query += " AND (DT #{from}-#{to})"
380
380
  end
381
-
382
-
381
+
382
+
383
383
  url += "&query=#{CGI.escape query}"
384
-
385
- # startrec is 1-based for ebsco, not 0-based like for us.
384
+
385
+ # startrec is 1-based for ebsco, not 0-based like for us.
386
386
  url += "&startrec=#{args[:start] + 1}" if args[:start]
387
387
  url += "&numrec=#{args[:per_page]}" if args[:per_page]
388
-
389
- # Make relevance our default sort, rather than EBSCO's date.
388
+
389
+ # Make relevance our default sort, rather than EBSCO's date.
390
390
  args[:sort] ||= "relevance"
391
391
  url += "&sort=#{ sort_definitions[args[:sort]][:implementation]}"
392
-
392
+
393
393
  # Contrary to docs, don't pass these comma-seperated, pass em in seperate
394
- # query params. args databases overrides config databases.
394
+ # query params. args databases overrides config databases.
395
395
  (args[:databases] || configuration.databases).each do |db|
396
396
  url += "&db=#{db}"
397
- end
397
+ end
398
398
 
399
399
  return url
400
400
  end
401
-
401
+
402
402
  # pass in a nokogiri representing an EBSCO <rec> result,
403
- # we'll turn it into a BentoSearch::ResultItem.
404
- def item_from_xml(xml_rec)
403
+ # we'll turn it into a BentoSearch::ResultItem.
404
+ def item_from_xml(xml_rec)
405
405
  info = xml_rec.at_xpath("./header/controlInfo")
406
-
406
+
407
407
  item = BentoSearch::ResultItem.new
408
-
408
+
409
409
  # Get unique id. Think we need both the database code and accession
410
410
  # number combined, accession numbers not neccesarily unique accross
411
- # dbs. We'll combine with a colon.
411
+ # dbs. We'll combine with a colon.
412
412
  db = text_if_present xml_rec.at_xpath("./header/@shortDbName")
413
- accession = text_if_present xml_rec.at_xpath("./header/@uiTerm")
413
+ accession = text_if_present xml_rec.at_xpath("./header/@uiTerm")
414
414
  item.unique_id = "#{db}:#{accession}" if db && accession
415
-
416
-
415
+
416
+
417
417
  item.link = get_link(xml_rec)
418
418
 
419
- item.issn = text_if_present info.at_xpath("./jinfo/issn")
420
-
419
+ # EBSCO is somewhat inconsistent with where it puts the ISSN
420
+ item.issn = text_if_present(info.at_xpath("./jinfo/issn")) || text_if_present(info.at_xpath("./jinfo/jid[@type='issn']"))
421
+
421
422
  # Dealing with titles is a bit crazy, while articles usually have atitles and
422
423
  # jtitles, sometimes they have a btitle instead. A book will usually have
423
- # both btitle and atitle, but sometimes just atitle. Book chapter, oh boy.
424
-
424
+ # both btitle and atitle, but sometimes just atitle. Book chapter, oh boy.
425
+
425
426
  jtitle = text_if_present(info.at_xpath("./jinfo/jtl"))
426
427
  btitle = text_if_present info.at_xpath("./bkinfo/btl")
427
428
  atitle = text_if_present info.at_xpath("./artinfo/tig/atl")
428
-
429
+
429
430
  if jtitle && atitle
430
431
  item.title = atitle
431
432
  item.source_title = jtitle
432
433
  elsif btitle && atitle && atitle != btitle
433
434
  # for a book, sometimes there's an atitle block and a btitle block
434
- # when they're identical, this ain't a book section, it's a book.
435
+ # when they're identical, this ain't a book section, it's a book.
435
436
  item.title = atitle
436
437
  item.source_title = btitle
437
438
  else
438
439
  item.title = atitle || btitle
439
- end
440
+ end
440
441
  # EBSCO sometimes has crazy long titles, truncate em.
441
442
  if item.title.present?
442
443
  item.title = text_helper.truncate(item.title, :length => 200, :separator => ' ', :omission => '…')
443
444
  end
444
-
445
-
446
-
445
+
446
+
447
+
447
448
  item.publisher = text_if_present info.at_xpath("./pubinfo/pub")
448
449
  # if no publisher, but a dissertation institution, use that
449
- # as publisher.
450
+ # as publisher.
450
451
  unless item.publisher
451
452
  item.publisher = text_if_present info.at_xpath("./dissinfo/dissinst")
452
453
  end
453
-
454
-
454
+
455
+
455
456
  # Might have multiple ISBN's in record, just take first for now
456
457
  item.isbn = text_if_present info.at_xpath("./bkinfo/isbn")
457
-
458
+
458
459
  item.year = text_if_present info.at_xpath("./pubinfo/dt/@year")
459
- # fill in complete publication_date too only if we've got it.
460
+ # fill in complete publication_date too only if we've got it.
460
461
  if (item.year &&
461
462
  (month = text_if_present info.at_xpath("./pubinfo/dt/@month")) &&
462
- (day = text_if_present info.at_xpath("./pubinfo/dt/@day"))
463
+ (day = text_if_present info.at_xpath("./pubinfo/dt/@day"))
463
464
  )
464
465
  if (item.year.to_i != 0 && month.to_i != 0 && day.to_i != 0)
465
- item.publication_date = Date.new(item.year.to_i, month.to_i, day.to_i)
466
+ item.publication_date = Date.new(item.year.to_i, month.to_i, day.to_i)
466
467
  end
467
468
  end
468
-
469
+
469
470
  item.volume = text_if_present info.at_xpath("./pubinfo/vid")
470
471
  item.issue = text_if_present info.at_xpath("./pubinfo/iid")
471
-
472
-
473
-
474
-
472
+
473
+
474
+
475
+
475
476
  item.start_page = text_if_present info.at_xpath("./artinfo/ppf")
476
-
477
+
477
478
  item.doi = text_if_present info.at_xpath("./artinfo/ui[@type='doi']")
478
-
479
+
479
480
  item.abstract = text_if_present info.at_xpath("./artinfo/ab")
480
481
  # EBSCO abstracts have an annoying habit of beginning with "Abstract:"
481
482
  if item.abstract
482
483
  item.abstract.gsub!(/^Abstract\: /, "")
483
484
  end
484
-
485
- # authors, only get full display name from EBSCO.
485
+
486
+ # authors, only get full display name from EBSCO.
486
487
  info.xpath("./artinfo/aug/au").each do |author|
487
488
  a = BentoSearch::Author.new(:display => author.text)
488
489
  item.authors << a
489
490
  end
490
-
491
+
491
492
  item.format = sniff_format info
492
493
  item.format_str = sniff_format_str info
493
-
494
+
494
495
  # Totally unreliable, seems to report english for everything? Maybe
495
- # because abstracts are in english? Nevertheless we include for now.
496
+ # because abstracts are in english? Nevertheless we include for now.
496
497
  item.language_code = text_if_present info.at_xpath("./language/@code")
497
498
  # why does EBSCO return 'undetermined' sometimes? That might as well be
498
- # not there, bah.
499
+ # not there, bah.
499
500
  item.language_code = nil if item.language_code == "und"
500
-
501
- # array of custom ebsco codes (or nil) for fulltext formats avail.
501
+
502
+ # array of custom ebsco codes (or nil) for fulltext formats avail.
502
503
  item.custom_data["fulltext_formats"] = fulltext_formats xml_rec
503
504
  # if any fulltext format, mark present
504
- item.link_is_fulltext = item.custom_data["fulltext_formats"].present?
505
-
505
+ item.link_is_fulltext = item.custom_data["fulltext_formats"].present?
506
+
506
507
  return item
507
508
  end
508
-
509
+
509
510
  # This method is not used for normal searching, but can be used by
510
- # other code to retrieve the results of the EBSCO API Info command,
511
+ # other code to retrieve the results of the EBSCO API Info command,
511
512
  # using connection details configured in this engine. The Info command
512
513
  # can tell you what databases your account is authorized to see.
513
514
  # Returns the complete Nokogiri response, but WITH NAMESPACES REMOVED
514
515
  def get_info
515
- url =
516
- "#{configuration.base_url}/Info?prof=#{configuration.profile_id}&pwd=#{configuration.profile_password}"
517
-
516
+ url =
517
+ "#{configuration.base_url}/Info?prof=#{configuration.profile_id}&pwd=#{configuration.profile_password}"
518
+
518
519
  noko = Nokogiri::XML( http_client.get( url ).body )
519
-
520
+
520
521
  noko.remove_namespaces!
521
-
522
+
522
523
  return noko
523
524
  end
524
-
525
+
525
526
  def public_settable_search_args
526
527
  super + [:peer_reviewed_only, :pubyear_start, :pubyear_end]
527
528
  end
528
-
529
+
529
530
  # David Walker says pretty much only relevance and date are realiable
530
- # in EBSCOhost cross-search.
531
+ # in EBSCOhost cross-search.
531
532
  def sort_definitions
532
- {
533
+ {
533
534
  "relevance" => {:implementation => "relevance"},
534
535
  "date_desc" => {:implementation => "date"}
535
- }
536
+ }
536
537
  end
537
-
538
+
538
539
  def search_field_definitions
539
540
  {
540
541
  nil => {:semantic => :general},
@@ -545,17 +546,17 @@ class BentoSearch::EbscoHostEngine
545
546
  "IB" => {:semantic => :isbn}
546
547
  }
547
548
  end
548
-
549
+
549
550
  def max_per_page
550
551
  # Actually only '50' if you ask for 'full' records, but I don't think
551
- # we need to do that ever, that's actually getting fulltext back!
552
+ # we need to do that ever, that's actually getting fulltext back!
552
553
  200
553
554
  end
554
-
555
+
555
556
  def self.required_configuration
556
557
  ["profile_id", "profile_password"]
557
558
  end
558
-
559
+
559
560
  def self.default_configuration
560
561
  {
561
562
  # /Search
@@ -563,5 +564,5 @@ class BentoSearch::EbscoHostEngine
563
564
  :databases => []
564
565
  }
565
566
  end
566
-
567
+
567
568
  end