bento_search 1.0.2 → 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (32) hide show
  1. checksums.yaml +15 -0
  2. data/README.md +92 -90
  3. data/app/item_decorators/bento_search/decorator_base.rb +9 -6
  4. data/app/item_decorators/bento_search/standard_decorator.rb +24 -0
  5. data/app/search_engines/bento_search/ebsco_host_engine.rb +180 -179
  6. data/app/search_engines/bento_search/journal_tocs_for_journal.rb +179 -0
  7. data/app/views/bento_search/_std_item.html.erb +4 -4
  8. data/lib/bento_search/version.rb +1 -1
  9. data/test/decorator/decorator_base_test.rb +11 -1
  10. data/test/decorator/standard_decorator_test.rb +21 -0
  11. data/test/dummy/log/development.log +2 -0
  12. data/test/dummy/log/test.log +22324 -0
  13. data/test/{unit → search_engines}/ebsco_host_engine_test.rb +148 -130
  14. data/test/{unit → search_engines}/eds_engine_test.rb +0 -0
  15. data/test/{unit → search_engines}/google_books_engine_test.rb +0 -0
  16. data/test/{unit → search_engines}/google_site_search_test.rb +0 -0
  17. data/test/search_engines/journal_tocs_for_journal_test.rb +93 -0
  18. data/test/{unit → search_engines}/primo_engine_test.rb +0 -0
  19. data/test/{unit → search_engines}/scopus_engine_test.rb +0 -0
  20. data/test/{unit → search_engines}/search_engine_base_test.rb +0 -0
  21. data/test/{unit → search_engines}/search_engine_test.rb +0 -0
  22. data/test/{unit → search_engines}/summon_engine_test.rb +0 -0
  23. data/test/{unit → search_engines}/worldcat_sru_dc_engine_test.rb +0 -0
  24. data/test/{unit → search_engines}/xerxes_engine_test.rb +0 -0
  25. data/test/vcr_cassettes/ebscohost/RILM_record_with_ISSN_in__jid__element.yml +210 -0
  26. data/test/vcr_cassettes/journal_tocs/empty_results_on_bad_ISSN.yml +49 -0
  27. data/test/vcr_cassettes/journal_tocs/error_on_bad_registered_email.yml +41 -0
  28. data/test/vcr_cassettes/journal_tocs/error_on_error_response.yml +51 -0
  29. data/test/vcr_cassettes/journal_tocs/fetch_xml_with_hits.yml +328 -0
  30. data/test/vcr_cassettes/journal_tocs/fills_out_metadata.yml +396 -0
  31. data/test/vcr_cassettes/journal_tocs/smoke_test.yml +328 -0
  32. metadata +62 -61
@@ -50,18 +50,21 @@ module BentoSearch
50
50
  end
51
51
 
52
52
  # Applies decorator to item and returns decorated item.
53
- # uses standard logic to look up which decorator to apply or
54
- # applies default one. The point of this method is just that
55
- # standard logic.
53
+ # Will decide what decorator to apply based on String class name
54
+ # in item.decorator, or else apply StandardDecorator. The point of
55
+ # this method is just that logic, nothing else special.
56
56
  #
57
57
  # Need to pass a Rails ActionView::Context in, to use to
58
58
  # initialize decorator. In Rails, in most places you can
59
59
  # get one of those from #view_context. In helpers/views
60
60
  # you can also use `self`.
61
61
  def self.decorate(item, view_context)
62
- # What decorator class? If specified as string in #decorator,
63
- # look it up as a class object, else default.
64
- decorator_class = item.decorator.try {|name| BentoSearch::Util.constantize(name) } || BentoSearch::StandardDecorator
62
+ # What decorator class? Specified in #decorator as a String,
63
+ # we intentionally do not allow an actual class constant, to
64
+ # maintain problem-free serialization of ItemResults and configuration.
65
+ decorator_class = item.decorator.try do |arg|
66
+ BentoSearch::Util.constantize(arg.to_s)
67
+ end || BentoSearch::StandardDecorator
65
68
 
66
69
  return decorator_class.new(item, view_context)
67
70
  end
@@ -149,6 +149,30 @@ module BentoSearch
149
149
  return value.blank? ? nil : value
150
150
  end
151
151
 
152
+ # outputs a date for display, from #publication_date or #year.
153
+ # Uses it's own logic to decide whether to output entire date or just
154
+ # year, if it has a complete date. (If volume and issue are present,
155
+ # just year).
156
+ #
157
+ # Over-ride in a decorator if you want to always or never or different
158
+ # logic for complete date. Or if you want to change the format of the date,
159
+ # etc.
160
+ def display_date
161
+ if self.publication_date
162
+ if self.volume && self.issue
163
+ # just the year, ma'am
164
+ I18n.localize(self.publication_date, :format => "%Y")
165
+ else
166
+ # whole date, since we got it
167
+ I18n.localize(self.publication_date, :format => "%d %b %Y")
168
+ end
169
+ elsif self.year
170
+ self.year.to_s
171
+ else
172
+ nil
173
+ end
174
+ end
175
+
152
176
  # A unique opaque identifier for a record may sometimes be
153
177
  # required, for instance in Atom.
154
178
  #
@@ -5,7 +5,7 @@ require 'nokogiri'
5
5
  require 'http_client_patch/include_client'
6
6
  require 'httpclient'
7
7
 
8
- # Right now for EbscoHost API (Ebsco Integration Toolkit/EIT),
8
+ # Right now for EbscoHost API (Ebsco Integration Toolkit/EIT),
9
9
  # may be expanded or refactored for EDS too.
10
10
  #
11
11
  # == Required Configuration
@@ -22,28 +22,28 @@ require 'httpclient'
22
22
  #
23
23
  # [:peer_reviewed_only] Set to boolean true or string 'true', to restrict
24
24
  # results to peer-reviewed only. (Or ask EBSCOHost
25
- # api to do so, what we get is what we get).
25
+ # api to do so, what we get is what we get).
26
26
  # [:pubyear_start]
27
27
  # [:pubyear_end] Date range limiting, pass in custom search args,
28
28
  # one or both of pubyear_start and pubyear_end
29
- # #to_i will be called on it, so can be string.
29
+ # #to_i will be called on it, so can be string.
30
30
  # .search(:query => "foo", :pubyear_start => 2000)
31
31
  # [:databases] List of licensed EBSCO dbs to search, can override
32
- # list set in config databases, just for this search.
33
- #
32
+ # list set in config databases, just for this search.
33
+ #
34
34
  # == Custom response data
35
- #
36
- # Iff EBSCO API reports that fulltext is available for the hit, then
35
+ #
36
+ # Iff EBSCO API reports that fulltext is available for the hit, then
37
37
  # result.custom_data["fulltext_formats"] will be non-nil, and will be an array of
38
38
  # one or more of EBSCO's internal codes (P=PDF, T=HTML, C=HTML+Images). If
39
39
  # no fulltext is avail according to EBSCO API, result.custom_data["fulltext_formats"]
40
- # will be nil.
40
+ # will be nil.
41
41
  #
42
42
  # #link_is_fulltext also set to true/false
43
43
  #
44
44
  # You can use this to, for instance, hyperlink the displayed title directly
45
45
  # to record on EBSCO if and only if there's fulltext. By writing a custom
46
- # decorator. See wiki on decorators.
46
+ # decorator. See wiki on decorators.
47
47
  #
48
48
  # == Limitations
49
49
  # We do set language of ResultItems based on what ebsco tells us, but ebsoc
@@ -56,12 +56,12 @@ require 'httpclient'
56
56
  # EBSCO that you want included in the search. You can't just say "all of them"
57
57
  # the api doesn't support that, and also more than 30 or 40 starts getting
58
58
  # horribly slow. If you include a db you do not have access to, EBSCO api
59
- # fatal errors.
59
+ # fatal errors.
60
60
  #
61
61
  # You may want to make sure all your licensed databases are included
62
62
  # in your EIT profile. Log onto ebscoadmin, Customize Services, choose
63
- # EIT profile, choose 'databases' tag.
64
- #
63
+ # EIT profile, choose 'databases' tag.
64
+ #
65
65
  # === Download databases from EBSCO api
66
66
  #
67
67
  # We include a utility to download ALL activated databases for EIT profile
@@ -69,12 +69,12 @@ require 'httpclient'
69
69
  # file as a starting point, and edit by hand:
70
70
  #
71
71
  # First configure your EBSCO search engine with bento_search, say under
72
- # key 'ebscohost'.
72
+ # key 'ebscohost'.
73
73
  #
74
74
  # Then run:
75
75
  # rails generate bento_search:pull_ebsco_dbs ebscohost
76
76
  #
77
- # assuming 'ebscohost' is the key you registered the EBSCO search engine.
77
+ # assuming 'ebscohost' is the key you registered the EBSCO search engine.
78
78
  #
79
79
  # This will create a file at ./config/ebsco_dbs.rb. You may want to hand
80
80
  # edit it. Then, in your bento search config, you can:
@@ -85,7 +85,7 @@ require 'httpclient'
85
85
  # conf.databases = $ebsco_dbs
86
86
  # end
87
87
  #
88
- # == Vendor documentation
88
+ # == Vendor documentation
89
89
  #
90
90
  # Vendor documentation is a bit scattered, main page:
91
91
  # * http://support.ebsco.com/eit/ws.php
@@ -93,10 +93,10 @@ require 'httpclient'
93
93
  # * http://support.ebsco.com/eit/ws_faq.php
94
94
  # * search syntax examples: http://support.ebsco.com/eit/ws_howto_queries.php
95
95
  # * Try construct a query: http://eit.ebscohost.com/Pages/MethodDescription.aspx?service=/Services/SearchService.asmx&method=Search
96
- # * The 'info' service can be used to see what databases you have access to.
96
+ # * The 'info' service can be used to see what databases you have access to.
97
97
  # * DTD of XML Response, hard to interpret but all we've got: http://support.ebsco.com/eit/docs/DTD_EIT_WS_searchResponse.zip
98
98
  #
99
- # Hard to find docs page on embedding EBSCO limiters (like peer reviewed only "RV Y") in search query:
99
+ # Hard to find docs page on embedding EBSCO limiters (like peer reviewed only "RV Y") in search query:
100
100
  # http://support.epnet.com/knowledge_base/detail.php?id=5397
101
101
  #
102
102
  # EBSCO searchable support portal has a section for the EIT api we use here:
@@ -104,20 +104,20 @@ require 'httpclient'
104
104
 
105
105
  class BentoSearch::EbscoHostEngine
106
106
  include BentoSearch::SearchEngine
107
-
107
+
108
108
  # Can't change http timeout in config, because we keep an http
109
- # client at class-wide level, and config is not class-wide.
109
+ # client at class-wide level, and config is not class-wide.
110
110
  # Change this 'constant' if you want to change it, I guess.
111
111
  #
112
112
  # In some tests we did, 5.2s was 95th percentile slowest, but in
113
113
  # actual percentage 5.2s is still timing out way too many requests,
114
- # let's try 6.3, why not.
115
- HttpTimeout = 6.3
116
- extend HTTPClientPatch::IncludeClient
114
+ # let's try 6.3, why not.
115
+ HttpTimeout = 6.3
116
+ extend HTTPClientPatch::IncludeClient
117
117
  include_http_client do |client|
118
118
  client.connect_timeout = client.send_timeout = client.receive_timeout = HttpTimeout
119
119
  end
120
-
120
+
121
121
  # Include some rails helpers, text_helper.trucate
122
122
  def text_helper
123
123
  @@truncate ||= begin
@@ -126,10 +126,10 @@ class BentoSearch::EbscoHostEngine
126
126
  o
127
127
  end
128
128
  end
129
-
129
+
130
130
  def search_implementation(args)
131
131
  url = query_url(args)
132
-
132
+
133
133
  results = BentoSearch::Results.new
134
134
  xml, response, exception = nil, nil, nil
135
135
 
@@ -137,85 +137,85 @@ class BentoSearch::EbscoHostEngine
137
137
  response = http_client.get(url)
138
138
  xml = Nokogiri::XML(response.body)
139
139
  rescue TimeoutError, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError => e
140
- exception = e
140
+ exception = e
141
141
  end
142
142
  # error handle
143
- if ( response.nil? ||
144
- xml.nil? ||
145
- exception ||
143
+ if ( response.nil? ||
144
+ xml.nil? ||
145
+ exception ||
146
146
  (! HTTP::Status.successful? response.status) ||
147
147
  (fault = xml.at_xpath("./Fault")))
148
-
148
+
149
149
  results.error ||= {}
150
150
  results.error[:api_url] = url
151
151
  results.error[:exception] = exception if exception
152
152
  results.error[:status] = response.status if response
153
-
153
+
154
154
  if fault
155
155
  results.error[:error_info] = text_if_present fault.at_xpath("./Message")
156
156
  end
157
-
157
+
158
158
  return results
159
159
  end
160
-
161
-
162
-
160
+
161
+
162
+
163
163
  # the namespaces they provide are weird and don't help and sometimes
164
164
  # not clearly even legal. Remove em!
165
165
  xml.remove_namespaces!
166
-
166
+
167
167
  results.total_items = xml.at_xpath("./searchResponse/Hits").text.to_i
168
-
168
+
169
169
  xml.xpath("./searchResponse/SearchResults/records/rec").each do |xml_rec|
170
170
  results << item_from_xml( xml_rec )
171
171
  end
172
-
172
+
173
173
  return results
174
-
174
+
175
175
  end
176
-
176
+
177
177
  # Method to get a single record by "identifier" string, which is really
178
178
  # a combined "db:id" string, same string that would be returned by
179
179
  # an individual item.identifier
180
180
  #
181
181
  # Returns an individual BentoSearch::Result, or raises an exception.
182
- # Can raise BentoSearch::NotFound, BentoSearch::TooManyFound, or
182
+ # Can raise BentoSearch::NotFound, BentoSearch::TooManyFound, or
183
183
  # any other weird random exception caused by problems fetching (network
184
184
  # error etc. Is it bad that we don't wrap these in an expected single
185
185
  # exception type? Should we?)
186
186
  def get(id)
187
- # split on first colon only.
187
+ # split on first colon only.
188
188
  id =~ /^([^:]+)\:(.*)$/
189
189
  db = $1 ; an = $2
190
-
190
+
191
191
  raise ArgumentError.new("EbscoHostEngine#get requires an id with a colon, like `a9h:12345`. Instead, we got #{id}") unless db && an
192
-
192
+
193
193
  # "AN" search_field is not listed in our search_field_definitions,
194
194
  # but it is an internal EBSCOHost search index on 'accession number'
195
-
195
+
196
196
  results = search(an, :search_field => "AN", :databases => [db])
197
-
197
+
198
198
  raise (results.error[:exception] || Exception.new) if results.failed?
199
199
  raise BentoSearch::NotFound.new("For id: #{id}") if results.length == 0
200
200
  raise BentoSearch::TooManyFound.new("For id: #{id}") if results.length > 1
201
-
201
+
202
202
  return results.first
203
203
  end
204
-
205
- # pass in nokogiri record xml for the records/rec node.
206
- # Returns nil if NO fulltext is avail on ebsco platform,
204
+
205
+ # pass in nokogiri record xml for the records/rec node.
206
+ # Returns nil if NO fulltext is avail on ebsco platform,
207
207
  # non-nil if fulltext is available. Non-nil value will
208
- # actually be a non-empty ARRAY of internal EBSCO codes, P=PDF, T=HTML, C=HTML with images.
208
+ # actually be a non-empty ARRAY of internal EBSCO codes, P=PDF, T=HTML, C=HTML with images.
209
209
  # http://support.epnet.com/knowledge_base/detail.php?topic=996&id=3778&page=1
210
210
  def fulltext_formats(record_xml)
211
211
  fulltext_formats = record_xml.xpath("./header/controlInfo/artinfo/formats/fmt/@type").collect {|n| n.text }
212
-
212
+
213
213
  return nil if fulltext_formats.empty?
214
-
215
- return fulltext_formats
214
+
215
+ return fulltext_formats
216
216
  end
217
-
218
-
217
+
218
+
219
219
  # Pass in a nokogiri node, return node.text, or nil if
220
220
  # arg was nil or node.text was blank?
221
221
  def text_if_present(node)
@@ -223,16 +223,16 @@ class BentoSearch::EbscoHostEngine
223
223
  nil
224
224
  else
225
225
  node.text
226
- end
226
+ end
227
227
  end
228
-
229
- # Figure out proper controlled format for an ebsco item.
228
+
229
+ # Figure out proper controlled format for an ebsco item.
230
230
  # EBSCOHost (not sure about EDS) publication/document type
231
231
  # are totally unusable non-normalized vocabulary for controlled
232
- # types, we'll try to guess from other metadata features.
232
+ # types, we'll try to guess from other metadata features.
233
233
  def sniff_format(xml_node)
234
234
  return nil if xml_node.nil?
235
-
235
+
236
236
  if xml_node.at_xpath("./dissinfo/*")
237
237
  :dissertation
238
238
  elsif xml_node.at_xpath("./jinfo/*") && xml_node.at_xpath("./artinfo/*")
@@ -246,35 +246,35 @@ class BentoSearch::EbscoHostEngine
246
246
  # pathological case of book_item, if it has a bkinfo and an artinfo
247
247
  # but the titles in both sections MATCH, it's just a book. If they're
248
248
  # differnet, it's a book section, bah@
249
- :book_item
249
+ :book_item
250
250
  elsif xml_node.at_xpath("./bkinfo/*")
251
251
  "Book"
252
252
  elsif xml_node.at_xpath("./jinfo/*")
253
253
  :serial
254
254
  else
255
255
  nil
256
- end
256
+ end
257
257
  end
258
-
258
+
259
259
  # Figure out uncontrolled literal string format to show to users.
260
260
  # We're going to try combining Ebsco Publication Type and Document Type,
261
- # when both are present. Then a few hard-coded special transformations.
262
- def sniff_format_str(xml_node)
261
+ # when both are present. Then a few hard-coded special transformations.
262
+ def sniff_format_str(xml_node)
263
263
  pubtype = text_if_present( xml_node.at_xpath("./artinfo/pubtype") )
264
264
  doctype = text_if_present( xml_node.at_xpath("./artinfo/doctype") )
265
-
265
+
266
266
  components = []
267
267
  components.push pubtype
268
268
  components.push doctype unless doctype == pubtype
269
-
269
+
270
270
  components.compact!
271
-
271
+
272
272
  components = components.collect {|a| a.titlecase if a}
273
273
  components.uniq! # no need to have the same thing twice
274
274
 
275
-
275
+
276
276
  # some hard-coded cases for better user-displayable string, and other
277
- # normalization.
277
+ # normalization.
278
278
  if ["Academic Journal", "Journal"].include?(components.first) && ["Article", "Journal Article"].include?(components.last)
279
279
  return "Journal Article"
280
280
  elsif components.last == "Book: Monograph"
@@ -290,251 +290,252 @@ class BentoSearch::EbscoHostEngine
290
290
  # first is strict substring, don't need it
291
291
  return components.last
292
292
  end
293
-
294
-
295
-
293
+
294
+
295
+
296
296
  return components.join(": ")
297
297
  end
298
-
298
+
299
299
  # pass in <rec> nokogiri, will determine best link
300
300
  def get_link(xml)
301
301
  text_if_present(xml.at_xpath("./pdfLink")) || text_if_present(xml.at_xpath("./plink") )
302
302
  end
303
-
304
-
305
- # escape or replace special chars to ebsco
303
+
304
+
305
+ # escape or replace special chars to ebsco
306
306
  def ebsco_query_escape(txt)
307
307
  # it's unclear if ebsco API actually allows escaping of special chars,
308
308
  # or what the special chars are. But we know parens are special, can't
309
309
  # escape em, we'll just remove em (should not effect search).
310
-
310
+
311
311
  # undocumented but question mark seems to cause a problem for ebsco,
312
- # even inside quoted phrases, not sure why.
313
- txt = txt.gsub(/[)(\?]/, ' ')
314
-
312
+ # even inside quoted phrases, not sure why. Square brackets too.
313
+ txt = txt.gsub(/[)(\?\[\]]/, ' ')
314
+
315
315
  # 'and' and 'or' need to be in phrase quotes to avoid being
316
316
  # interpreted as boolean. For instance, when people just
317
317
  # paste in a title: << A strategy for decreasing anxiety of ICU transfer patients and their families >>
318
318
  # You'd think 'and' as boolean would still work there, but it resulted
319
319
  # in zero hits unless quoted, I dunno. lowercase and uppercase and/or/not
320
- # both cause observed weirdness.
320
+ # both cause observed weirdness.
321
321
  if ['and', 'or', 'not'].include?( txt.downcase )
322
322
  txt = %Q{"#{txt}"}
323
- end
324
-
323
+ end
324
+
325
325
  return txt
326
326
  end
327
-
327
+
328
328
  # Actually turn the user's query into an EBSCO "AND" boolean query,
329
329
  # seems only way to get decent results where terms can match cross-fields
330
- # at the moment, for EIT. We'll see for EDS.
330
+ # at the moment, for EIT. We'll see for EDS.
331
331
  def ebsco_query_prepare(txt)
332
332
  # use string split with regex cleverly to split into space
333
- # seperated terms and phrases, keeping phrases as unit.
333
+ # seperated terms and phrases, keeping phrases as unit.
334
334
  terms = txt.split %r{[[:space:]]+|("[^"]+")}
335
335
 
336
336
  # Remove parens in non-phrase-quoted terms
337
- terms = terms.collect do |t|
338
- ebsco_query_escape(t)
337
+ terms = terms.collect do |t|
338
+ ebsco_query_escape(t)
339
339
  end
340
-
340
+
341
341
 
342
342
  # Remove empty strings. Remove terms that are solely punctuation
343
- # without any letters.
343
+ # without any letters.
344
344
  terms.delete_if do |term|
345
- (
346
- term.blank? ||
345
+ (
346
+ term.blank? ||
347
347
  term =~ /\A[^[[:alnum:]]]+\Z/
348
348
  )
349
349
  end
350
-
351
- terms.join(" AND ")
350
+
351
+ terms.join(" AND ")
352
352
  end
353
-
353
+
354
354
  def query_url(args)
355
-
356
- url =
355
+
356
+ url =
357
357
  "#{configuration.base_url}/Search?prof=#{configuration.profile_id}&pwd=#{configuration.profile_password}"
358
-
359
- query = ebsco_query_prepare args[:query]
360
-
361
-
358
+
359
+ query = ebsco_query_prepare args[:query]
360
+
361
+
362
362
  # wrap in (FI $query) if fielded search
363
363
  if args[:search_field]
364
364
  query = "(#{args[:search_field]} #{query})"
365
365
  end
366
-
366
+
367
367
  # peer-reviewed only?
368
368
  if [true, "true"].include? args[:peer_reviewed_only]
369
369
  query += " AND (RV Y)"
370
370
  end
371
-
371
+
372
372
  if args[:pubyear_start] || args[:pubyear_end]
373
- from = args[:pubyear_start].to_i
373
+ from = args[:pubyear_start].to_i
374
374
  from = nil if from == 0
375
-
376
- to = args[:pubyear_end].to_i
375
+
376
+ to = args[:pubyear_end].to_i
377
377
  to = nil if to == 0
378
-
378
+
379
379
  query += " AND (DT #{from}-#{to})"
380
380
  end
381
-
382
-
381
+
382
+
383
383
  url += "&query=#{CGI.escape query}"
384
-
385
- # startrec is 1-based for ebsco, not 0-based like for us.
384
+
385
+ # startrec is 1-based for ebsco, not 0-based like for us.
386
386
  url += "&startrec=#{args[:start] + 1}" if args[:start]
387
387
  url += "&numrec=#{args[:per_page]}" if args[:per_page]
388
-
389
- # Make relevance our default sort, rather than EBSCO's date.
388
+
389
+ # Make relevance our default sort, rather than EBSCO's date.
390
390
  args[:sort] ||= "relevance"
391
391
  url += "&sort=#{ sort_definitions[args[:sort]][:implementation]}"
392
-
392
+
393
393
  # Contrary to docs, don't pass these comma-seperated, pass em in seperate
394
- # query params. args databases overrides config databases.
394
+ # query params. args databases overrides config databases.
395
395
  (args[:databases] || configuration.databases).each do |db|
396
396
  url += "&db=#{db}"
397
- end
397
+ end
398
398
 
399
399
  return url
400
400
  end
401
-
401
+
402
402
  # pass in a nokogiri representing an EBSCO <rec> result,
403
- # we'll turn it into a BentoSearch::ResultItem.
404
- def item_from_xml(xml_rec)
403
+ # we'll turn it into a BentoSearch::ResultItem.
404
+ def item_from_xml(xml_rec)
405
405
  info = xml_rec.at_xpath("./header/controlInfo")
406
-
406
+
407
407
  item = BentoSearch::ResultItem.new
408
-
408
+
409
409
  # Get unique id. Think we need both the database code and accession
410
410
  # number combined, accession numbers not neccesarily unique accross
411
- # dbs. We'll combine with a colon.
411
+ # dbs. We'll combine with a colon.
412
412
  db = text_if_present xml_rec.at_xpath("./header/@shortDbName")
413
- accession = text_if_present xml_rec.at_xpath("./header/@uiTerm")
413
+ accession = text_if_present xml_rec.at_xpath("./header/@uiTerm")
414
414
  item.unique_id = "#{db}:#{accession}" if db && accession
415
-
416
-
415
+
416
+
417
417
  item.link = get_link(xml_rec)
418
418
 
419
- item.issn = text_if_present info.at_xpath("./jinfo/issn")
420
-
419
+ # EBSCO is somewhat inconsistent with where it puts the ISSN
420
+ item.issn = text_if_present(info.at_xpath("./jinfo/issn")) || text_if_present(info.at_xpath("./jinfo/jid[@type='issn']"))
421
+
421
422
  # Dealing with titles is a bit crazy, while articles usually have atitles and
422
423
  # jtitles, sometimes they have a btitle instead. A book will usually have
423
- # both btitle and atitle, but sometimes just atitle. Book chapter, oh boy.
424
-
424
+ # both btitle and atitle, but sometimes just atitle. Book chapter, oh boy.
425
+
425
426
  jtitle = text_if_present(info.at_xpath("./jinfo/jtl"))
426
427
  btitle = text_if_present info.at_xpath("./bkinfo/btl")
427
428
  atitle = text_if_present info.at_xpath("./artinfo/tig/atl")
428
-
429
+
429
430
  if jtitle && atitle
430
431
  item.title = atitle
431
432
  item.source_title = jtitle
432
433
  elsif btitle && atitle && atitle != btitle
433
434
  # for a book, sometimes there's an atitle block and a btitle block
434
- # when they're identical, this ain't a book section, it's a book.
435
+ # when they're identical, this ain't a book section, it's a book.
435
436
  item.title = atitle
436
437
  item.source_title = btitle
437
438
  else
438
439
  item.title = atitle || btitle
439
- end
440
+ end
440
441
  # EBSCO sometimes has crazy long titles, truncate em.
441
442
  if item.title.present?
442
443
  item.title = text_helper.truncate(item.title, :length => 200, :separator => ' ', :omission => '…')
443
444
  end
444
-
445
-
446
-
445
+
446
+
447
+
447
448
  item.publisher = text_if_present info.at_xpath("./pubinfo/pub")
448
449
  # if no publisher, but a dissertation institution, use that
449
- # as publisher.
450
+ # as publisher.
450
451
  unless item.publisher
451
452
  item.publisher = text_if_present info.at_xpath("./dissinfo/dissinst")
452
453
  end
453
-
454
-
454
+
455
+
455
456
  # Might have multiple ISBN's in record, just take first for now
456
457
  item.isbn = text_if_present info.at_xpath("./bkinfo/isbn")
457
-
458
+
458
459
  item.year = text_if_present info.at_xpath("./pubinfo/dt/@year")
459
- # fill in complete publication_date too only if we've got it.
460
+ # fill in complete publication_date too only if we've got it.
460
461
  if (item.year &&
461
462
  (month = text_if_present info.at_xpath("./pubinfo/dt/@month")) &&
462
- (day = text_if_present info.at_xpath("./pubinfo/dt/@day"))
463
+ (day = text_if_present info.at_xpath("./pubinfo/dt/@day"))
463
464
  )
464
465
  if (item.year.to_i != 0 && month.to_i != 0 && day.to_i != 0)
465
- item.publication_date = Date.new(item.year.to_i, month.to_i, day.to_i)
466
+ item.publication_date = Date.new(item.year.to_i, month.to_i, day.to_i)
466
467
  end
467
468
  end
468
-
469
+
469
470
  item.volume = text_if_present info.at_xpath("./pubinfo/vid")
470
471
  item.issue = text_if_present info.at_xpath("./pubinfo/iid")
471
-
472
-
473
-
474
-
472
+
473
+
474
+
475
+
475
476
  item.start_page = text_if_present info.at_xpath("./artinfo/ppf")
476
-
477
+
477
478
  item.doi = text_if_present info.at_xpath("./artinfo/ui[@type='doi']")
478
-
479
+
479
480
  item.abstract = text_if_present info.at_xpath("./artinfo/ab")
480
481
  # EBSCO abstracts have an annoying habit of beginning with "Abstract:"
481
482
  if item.abstract
482
483
  item.abstract.gsub!(/^Abstract\: /, "")
483
484
  end
484
-
485
- # authors, only get full display name from EBSCO.
485
+
486
+ # authors, only get full display name from EBSCO.
486
487
  info.xpath("./artinfo/aug/au").each do |author|
487
488
  a = BentoSearch::Author.new(:display => author.text)
488
489
  item.authors << a
489
490
  end
490
-
491
+
491
492
  item.format = sniff_format info
492
493
  item.format_str = sniff_format_str info
493
-
494
+
494
495
  # Totally unreliable, seems to report english for everything? Maybe
495
- # because abstracts are in english? Nevertheless we include for now.
496
+ # because abstracts are in english? Nevertheless we include for now.
496
497
  item.language_code = text_if_present info.at_xpath("./language/@code")
497
498
  # why does EBSCO return 'undetermined' sometimes? That might as well be
498
- # not there, bah.
499
+ # not there, bah.
499
500
  item.language_code = nil if item.language_code == "und"
500
-
501
- # array of custom ebsco codes (or nil) for fulltext formats avail.
501
+
502
+ # array of custom ebsco codes (or nil) for fulltext formats avail.
502
503
  item.custom_data["fulltext_formats"] = fulltext_formats xml_rec
503
504
  # if any fulltext format, mark present
504
- item.link_is_fulltext = item.custom_data["fulltext_formats"].present?
505
-
505
+ item.link_is_fulltext = item.custom_data["fulltext_formats"].present?
506
+
506
507
  return item
507
508
  end
508
-
509
+
509
510
  # This method is not used for normal searching, but can be used by
510
- # other code to retrieve the results of the EBSCO API Info command,
511
+ # other code to retrieve the results of the EBSCO API Info command,
511
512
  # using connection details configured in this engine. The Info command
512
513
  # can tell you what databases your account is authorized to see.
513
514
  # Returns the complete Nokogiri response, but WITH NAMESPACES REMOVED
514
515
  def get_info
515
- url =
516
- "#{configuration.base_url}/Info?prof=#{configuration.profile_id}&pwd=#{configuration.profile_password}"
517
-
516
+ url =
517
+ "#{configuration.base_url}/Info?prof=#{configuration.profile_id}&pwd=#{configuration.profile_password}"
518
+
518
519
  noko = Nokogiri::XML( http_client.get( url ).body )
519
-
520
+
520
521
  noko.remove_namespaces!
521
-
522
+
522
523
  return noko
523
524
  end
524
-
525
+
525
526
  def public_settable_search_args
526
527
  super + [:peer_reviewed_only, :pubyear_start, :pubyear_end]
527
528
  end
528
-
529
+
529
530
  # David Walker says pretty much only relevance and date are realiable
530
- # in EBSCOhost cross-search.
531
+ # in EBSCOhost cross-search.
531
532
  def sort_definitions
532
- {
533
+ {
533
534
  "relevance" => {:implementation => "relevance"},
534
535
  "date_desc" => {:implementation => "date"}
535
- }
536
+ }
536
537
  end
537
-
538
+
538
539
  def search_field_definitions
539
540
  {
540
541
  nil => {:semantic => :general},
@@ -545,17 +546,17 @@ class BentoSearch::EbscoHostEngine
545
546
  "IB" => {:semantic => :isbn}
546
547
  }
547
548
  end
548
-
549
+
549
550
  def max_per_page
550
551
  # Actually only '50' if you ask for 'full' records, but I don't think
551
- # we need to do that ever, that's actually getting fulltext back!
552
+ # we need to do that ever, that's actually getting fulltext back!
552
553
  200
553
554
  end
554
-
555
+
555
556
  def self.required_configuration
556
557
  ["profile_id", "profile_password"]
557
558
  end
558
-
559
+
559
560
  def self.default_configuration
560
561
  {
561
562
  # /Search
@@ -563,5 +564,5 @@ class BentoSearch::EbscoHostEngine
563
564
  :databases => []
564
565
  }
565
566
  end
566
-
567
+
567
568
  end