bento_search 0.8.0 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. data/README.md +16 -2
  2. data/app/helpers/bento_search_helper.rb +4 -10
  3. data/app/item_decorators/bento_search/decorator_base.rb +17 -0
  4. data/app/item_decorators/bento_search/standard_decorator.rb +3 -3
  5. data/app/models/bento_search/link.rb +3 -0
  6. data/app/models/bento_search/result_item.rb +17 -1
  7. data/app/models/bento_search/results.rb +5 -0
  8. data/app/models/bento_search/ris_creator.rb +166 -0
  9. data/app/models/bento_search/search_engine.rb +22 -2
  10. data/app/search_engines/bento_search/ebsco_host_engine.rb +54 -6
  11. data/app/search_engines/bento_search/eds_engine.rb +18 -3
  12. data/app/search_engines/bento_search/google_books_engine.rb +81 -54
  13. data/app/search_engines/bento_search/mock_engine.rb +8 -0
  14. data/app/search_engines/bento_search/primo_engine.rb +6 -2
  15. data/app/search_engines/bento_search/scopus_engine.rb +4 -0
  16. data/app/search_engines/bento_search/summon_engine.rb +26 -4
  17. data/app/search_engines/bento_search/worldcat_sru_dc_engine.rb +15 -0
  18. data/app/views/bento_search/_link.html.erb +3 -2
  19. data/lib/bento_search/version.rb +1 -1
  20. data/test/dummy/app/views/_test_custom_item_partial.html.erb +9 -0
  21. data/test/dummy/log/development.log +1 -0
  22. data/test/dummy/log/test.log +84981 -0
  23. data/test/helper/bento_search_helper_test.rb +11 -1
  24. data/test/unit/ebsco_host_engine_test.rb +59 -0
  25. data/test/unit/eds_engine_test.rb +10 -0
  26. data/test/unit/google_books_engine_test.rb +20 -0
  27. data/test/unit/primo_engine_test.rb +1 -1
  28. data/test/unit/ris_creator_test.rb +159 -0
  29. data/test/unit/scopus_engine_test.rb +2 -0
  30. data/test/unit/search_engine_test.rb +37 -0
  31. data/test/unit/summon_engine_test.rb +21 -0
  32. data/test/unit/worldcat_sru_dc_engine_test.rb +151 -0
  33. data/test/vcr_cassettes/ebscohost/live__get_identifier__round_trip.yml +1324 -0
  34. data/test/vcr_cassettes/ebscohost/live_get_id__on_bad_db_raises.yml +45 -0
  35. data/test/vcr_cassettes/ebscohost/live_get_id__with_no_results_raises.yml +45 -0
  36. data/test/vcr_cassettes/gbs/live_get_id_.yml +415 -0
  37. data/test/vcr_cassettes/gbs/live_get_id__with_not_found_id.yml +41 -0
  38. data/test/vcr_cassettes/summon/live__get_id_.yml +313 -0
  39. data/test/vcr_cassettes/summon/live_get_id__on_non-existing_id.yml +54 -0
  40. data/test/vcr_cassettes/worldcat_sru_dc/live_get_id_.yml +769 -0
  41. data/test/vcr_cassettes/worldcat_sru_dc/live_get_id__for_bad_id.yml +68 -0
  42. data/test/view/link_test.rb +21 -0
  43. metadata +31 -6
  44. data/test/vcr_cassettes/max_out_pagination.yml +0 -155
data/README.md CHANGED
@@ -292,7 +292,7 @@ do not provide additional links by default, custom local Decorators would
292
292
  be used to add them. See wiki for more info on decorators, and BentoSearch::Link
293
293
  for fields.
294
294
 
295
- ## OpenURL and metadata
295
+ ### OpenURL and metadata
296
296
 
297
297
  Academic library uses often need openurl links from scholarly citations. One of
298
298
  the design goals of bento_search is to produce standardized normalized BentoSearch::ResultItem
@@ -309,7 +309,21 @@ can be solved.
309
309
 
310
310
  See `./app/item_decorators/bento_search/openurl_add_other_link.rb` for an example
311
311
  of using item decorators to add a link to your openurl resover to an item when
312
- displayed.
312
+ displayed.
313
+
314
+ ### Exporting (eg as RIS) and get by unique_id
315
+
316
+ A class is included to convert an individual BentoSearch::ResultItem to
317
+ the RIS format, suitable for import into EndNote, Refworks, etc.
318
+
319
+ ~~~ruby
320
+ ris_data = RISCreator.new( bento_item ).export
321
+ ~~~
322
+
323
+ Accomodating actual exports into the transactional flow of a web app can be
324
+ tricky, and often requires use of the `result_item#unique_id` and
325
+ `engine.get( unique_id )` features. See the wiki at
326
+
313
327
 
314
328
  ## Planned Features
315
329
 
@@ -60,12 +60,10 @@ module BentoSearchHelper
60
60
  end +
61
61
  content_tag(:div,
62
62
  :class => "bento_search_ajax_loading",
63
- :style => "display:none") do
64
-
63
+ :style => "display:none") do
65
64
  image_tag("bento_search/large_loader.gif",
66
- :alt => I18n.translate("bento_search.ajax_loading"),
65
+ :alt => I18n.translate("bento_search.ajax_loading")
67
66
  )
68
-
69
67
  end
70
68
  end
71
69
  else
@@ -88,13 +86,9 @@ module BentoSearchHelper
88
86
 
89
87
  # Wrap a ResultItem in a decorator! For now hard-coded to
90
88
  # BentoSearch::StandardDecorator
91
- def bento_decorate(result_item)
92
- # What decorator class? If specified as string in #decorator,
93
- # look it up as a class object, else default.
94
- decorator_class = result_item.decorator.try {|name| BentoSearch::Util.constantize(name) } || BentoSearch::StandardDecorator
95
-
89
+ def bento_decorate(result_item)
96
90
  # in a helper method, 'self' is a view_context already I think?
97
- decorated = decorator_class.new(result_item, self)
91
+ decorated = BentoSearch::DecoratorBase.decorate(result_item, self)
98
92
  yield(decorated) if block_given?
99
93
  return decorated
100
94
  end
@@ -49,5 +49,22 @@ module BentoSearch
49
49
  ERB::Util.html_escape(*args, &block)
50
50
  end
51
51
 
52
+ # Applies decorator to item and returns decorated item.
53
+ # uses standard logic to look up which decorator to apply or
54
+ # applies default one. The point of this method is just that
55
+ # standard logic.
56
+ #
57
+ # Need to pass a Rails ActionView::Context in, to use to
58
+ # initialize decorator. In Rails, in most places you can
59
+ # get one of those from #view_context. In helpers/views
60
+ # you can also use `self`.
61
+ def self.decorate(item, view_context)
62
+ # What decorator class? If specified as string in #decorator,
63
+ # look it up as a class object, else default.
64
+ decorator_class = item.decorator.try {|name| BentoSearch::Util.constantize(name) } || BentoSearch::StandardDecorator
65
+
66
+ return decorator_class.new(item, view_context)
67
+ end
68
+
52
69
  end
53
70
  end
@@ -16,11 +16,11 @@ module BentoSearch
16
16
 
17
17
  # How to display a BentoSearch::Author object as a name
18
18
  def author_display(author)
19
- if (author.first && author.last)
19
+ if (author.first.present? && author.last.present?)
20
20
  "#{author.last}, #{author.first.slice(0,1)}"
21
- elsif author.display
21
+ elsif author.display.present?
22
22
  author.display
23
- elsif author.last
23
+ elsif author.last.present?
24
24
  author.last
25
25
  else
26
26
  nil
@@ -18,6 +18,9 @@ module BentoSearch
18
18
  # contexts too.
19
19
  attr_accessor :style_classes
20
20
 
21
+ # Suggested `target` attribute to render link with as html <a>
22
+ attr_accessor :target
23
+
21
24
  def initialize(hash = {})
22
25
  self.style_classes = []
23
26
 
@@ -26,6 +26,11 @@ module BentoSearch
26
26
  self.custom_data ||= {}
27
27
  end
28
28
 
29
+ # internal unique id for the document, from the particular
30
+ # search service it came from. May be alphanumeric. May be nil
31
+ # for engines that don't support it.
32
+ attr_accessor :unique_id
33
+
29
34
  # If set to true, item will refuse to generate an openurl,
30
35
  # returning nil from #to_openurl or #openurl_kev
31
36
  attr_accessor :openurl_disabled
@@ -75,7 +80,6 @@ module BentoSearch
75
80
  # * WebPage
76
81
  # * VideoObject
77
82
  # * AudioObject
78
- # * SoftwareApplication
79
83
  #
80
84
  #
81
85
  #
@@ -185,6 +189,18 @@ module BentoSearch
185
189
  # qualified name of a decorator class. Can be nil for default.
186
190
  attr_accessor :decorator
187
191
 
192
+ # Copied over from engine configuration :for_display key
193
+ # by BentoSearch#search wrapper, here as a convenience t
194
+ # parameterize logic in decorators or other presentational logic, based
195
+ # on configuration, in places where logic has access to an item but
196
+ # not the overall Results obj anymore.
197
+ #
198
+ # TODO: Consider, should we just copy over the whole Results
199
+ # into a backpointing reference instead? And user cover-methods
200
+ # for it? Nice thing about the configuration has instead is it's
201
+ # easily serializable, it's just data.
202
+ attr_accessor :display_configuration
203
+ attr_accessor :engine_id
188
204
 
189
205
  end
190
206
  end
@@ -59,5 +59,10 @@ module BentoSearch
59
59
  def failed?
60
60
  ! error.nil?
61
61
  end
62
+
63
+ def inspect
64
+ "<BentoSearch::Results #{super} #{'FAILED' if self.failed?}>"
65
+ end
66
+
62
67
  end
63
68
  end
@@ -0,0 +1,166 @@
1
+ module BentoSearch
2
+ #
3
+ # Export a BentoSearch::ResultItem in RIS format, as a file
4
+ # to import into EndNote etc, or in a callback for Refworks export, etc.
5
+ #
6
+ # RISCreator.new( result_item ).export
7
+ #
8
+ # Note: We assume input and output in UTF8. The RIS spec kind of says
9
+ # it has to be ascii only, but most actual software seems to be able to do
10
+ # UTF8.
11
+ #
12
+ # Note: If you want your decorator to be taken into account in links
13
+ # or other data, you have to make sure it's applied. If you got result_item
14
+ # from SearchEngine#get, you should apply decorators yourself:
15
+ #
16
+ # RISCreator.new( BentoSearch::StandardDecorator.decorate(result_item) ).export
17
+ #
18
+ #
19
+ # Best spec/docs for RIS format seems to be at
20
+ # http://www.refman.com/support/risformat_intro.asp
21
+ # Download zip file there, pay attention to excel spreadsheet
22
+ # as well as PDF overview.
23
+ #
24
+ # But note this 'spec' is often ignored/violated, even by the vendors
25
+ # who wrote it. Wikipedia at http://en.wikipedia.org/wiki/RIS_(file_format)#Tags
26
+ # contains some additional tags not mentioned in 'spec'.
27
+ class RISCreator
28
+ def initialize(i)
29
+ @item = i
30
+ @ris_format = translate_ris_format
31
+ end
32
+
33
+ def export
34
+ out = "".force_encoding("UTF-8")
35
+
36
+ out << tag_format("TY", @ris_format)
37
+
38
+ out << tag_format("TI", @item.title)
39
+
40
+ @item.authors.each do |author|
41
+ out << tag_format("AU", format_author_name(author))
42
+ end
43
+
44
+ out << tag_format("PY", @item.year)
45
+ out << tag_format("DA", format_date(@item.publication_date))
46
+
47
+ out << tag_format("LA", @item.language_str)
48
+
49
+ out << tag_format("VL", @item.volume)
50
+ out << tag_format("IS", @item.issue)
51
+ out << tag_format("SP", @item.start_page)
52
+ out << tag_format("EP", @item.end_page)
53
+
54
+ out << tag_format("T2", @item.source_title)
55
+
56
+ # ISSN and ISBN both share SN, sigh.
57
+ out << tag_format("SN", @item.issn)
58
+ out << tag_format("SN", @item.isbn)
59
+ out << tag_format("DO", @item.doi)
60
+
61
+ out << tag_format("PB", @item.publisher)
62
+
63
+ out << tag_format("AB", @item.abstract)
64
+
65
+ # include main link and any other links?
66
+ out << tag_format("UR", @item.link)
67
+ @item.other_links.each do |link|
68
+ out << tag_format("UR", link.url)
69
+ end
70
+
71
+ # end with blank lines, so multiple ones can be concatenated for
72
+ # a file.
73
+ out << "\r\nER - \r\n\r\n"
74
+ end
75
+
76
+ @@format_map = {
77
+ # bento_search doesn't distinguish between journal, magazine, and newspaper,
78
+ # RIS does, sorry, we map all to journal article.
79
+ "Article" => "JOUR",
80
+ "Book" => "BOOK",
81
+ "Movie" => "MPCT",
82
+ "MusicRecording" => "MUSIC",
83
+ #"Photograph" => "GEN",
84
+ "SoftwareApplication" => "COMP",
85
+ "WebPage" => "ELEC",
86
+ "VideoObject" => "VIDEO",
87
+ "AudioObject" => "SOUND",
88
+ :serial => "SER",
89
+ :dissertation => "THES",
90
+ :conference_paper => "CPAPER",
91
+ :conference_proceedings => "CONF",
92
+ :report => "RPRT",
93
+ :book_item => "CHAP"
94
+ }
95
+
96
+ # based on current @item.format, output
97
+ # appropriate RIS format string
98
+ def translate_ris_format
99
+ # default "GEN"=generic if unknown
100
+ @@format_map[@item.format] || "GEN"
101
+ end
102
+
103
+ # Formats refworks tag/value line and returns it.
104
+ #
105
+ # Returns empty string if you pass in an empty value though.
106
+ #
107
+ # "Each six-character tag must be in the following format:
108
+ # "<upper-case letter><upper-case letter or number><space><space><dash><space>"
109
+ #
110
+ # "Each tag and its contents must be on a separate line,
111
+ # preceded by a "carriage return/line feed" (ANSI 13 10)."
112
+ #
113
+ # "Note, however, that the asterisk (character 42)
114
+ # is not allowed in the author, keywords or periodical name fields."
115
+ #
116
+ # The spec also seems to say ascii-only, but I don't think that's true
117
+ # for actually existing software, we do utf-8.
118
+ #
119
+ # Refworks MAY require unicode composed normalization if it accepts utf8
120
+ # at all. but not doing that yet. http://bibwild.wordpress.com/2010/04/28/refworks-problems-importing-diacritics/
121
+ def tag_format(tag, value)
122
+ return "" if value.blank?
123
+
124
+ raise ArgumentError.new("Illegal RIS tag") unless tag =~ /[A-Z][A-Z0-9]/
125
+
126
+ # "T2" seems to be the only "journal name field", which is
127
+ # mentioned along with these others as not being allowed to contain
128
+ # asterisk.
129
+ if ["AU", "A2", "A3", "A4", "KW", "T2"].include? tag
130
+ value = value.gsub("*", " ")
131
+ end
132
+
133
+ return "\r\n#{tag} - #{value}"
134
+ end
135
+
136
+ # Take a ruby Date and translate to RIS date format
137
+ # "YYYY/MM/DD/other info"
138
+ #
139
+ # returns nil if input is nil.
140
+ def format_date(d)
141
+ return nil if d.nil?
142
+
143
+ return d.strftime("%Y/%m/%d")
144
+ end
145
+
146
+ # RIS wants `Last, First M.`, we'll do what we can.
147
+ def format_author_name(author)
148
+ if author.last.present? && author.first.present?
149
+ str = "#{author.last}, #{author.first}"
150
+ if author.middle.present?
151
+ middle = author.middle
152
+ middle += "." if middle.length == 1
153
+ str += " #{middle}"
154
+ end
155
+ return str
156
+ elsif author.display.present?
157
+ return author.display
158
+ elsif author.last.present?
159
+ return author.last?
160
+ else
161
+ return nil
162
+ end
163
+ end
164
+
165
+ end
166
+ end
@@ -8,6 +8,12 @@ require 'multi_json'
8
8
  require 'nokogiri'
9
9
 
10
10
  module BentoSearch
11
+ # Usually raised by #get on an engine, when result for specified identifier
12
+ # can't be found.
13
+ class NotFound < Exception ; end
14
+ # Usually raised by #get when identifier results in more than one record.
15
+ class TooManyFound < Exception ; end
16
+
11
17
  # Module mix-in for bento_search search engines.
12
18
  #
13
19
  # ==Using a SearchEngine
@@ -219,7 +225,14 @@ module BentoSearch
219
225
  results.timing = (Time.now - start_t)
220
226
 
221
227
  results.display_configuration = configuration.for_display
222
- results.each {|item| item.decorator = configuration.lookup!("for_display.decorator") }
228
+ results.each do |item|
229
+ # We copy some configuraton info over to each Item, as a convenience
230
+ # to display logic that may have decide what to do given only an item,
231
+ # and may want to parameterize based on configuration.
232
+ item.engine_id = results.engine_id
233
+ item.decorator = configuration.lookup!("for_display.decorator")
234
+ item.display_configuration = configuration.for_display
235
+ end
223
236
 
224
237
  return results
225
238
  rescue *auto_rescue_exceptions => e
@@ -235,6 +248,13 @@ module BentoSearch
235
248
  failed = BentoSearch::Results.new
236
249
  failed.error ||= {}
237
250
  failed.error[:exception] = e
251
+
252
+ failed.search_args = arguments
253
+ failed.engine_id = configuration.id
254
+ failed.display_configuration = configuration.for_display
255
+ failed.timing = (Time.now - start_t)
256
+
257
+
238
258
  return failed
239
259
  end
240
260
 
@@ -355,7 +375,7 @@ module BentoSearch
355
375
  # to be something diff in production and dev?
356
376
  #
357
377
  # This default list is probably useful already, but individual
358
- # engines can override if it's convenient for their own error
378
+ # engines can override if it's convenient for their own errorau
359
379
  # handling.
360
380
  def auto_rescue_exceptions
361
381
  [TimeoutError, HTTPClient::TimeoutError,
@@ -28,6 +28,8 @@ require 'httpclient'
28
28
  # one or both of pubyear_start and pubyear_end
29
29
  # #to_i will be called on it, so can be string.
30
30
  # .search(:query => "foo", :pubyear_start => 2000)
31
+ # [:databases] List of licensed EBSCO dbs to search, can override
32
+ # list set in config databases, just for this search.
31
33
  #
32
34
  # == Custom response data
33
35
  #
@@ -103,8 +105,18 @@ require 'httpclient'
103
105
  class BentoSearch::EbscoHostEngine
104
106
  include BentoSearch::SearchEngine
105
107
 
106
- extend HTTPClientPatch::IncludeClient
107
- include_http_client
108
+ # Can't change http timeout in config, because we keep an http
109
+ # client at class-wide level, and config is not class-wide.
110
+ # Change this 'constant' if you want to change it, I guess.
111
+ #
112
+ # In some tests we did, 5.2s was 95th percentile slowest, but in
113
+ # actual percentage 5.2s is still timing out way too many requests,
114
+ # let's try 6.3, why not.
115
+ HttpTimeout = 6.3
116
+ extend HTTPClientPatch::IncludeClient
117
+ include_http_client do |client|
118
+ client.connect_timeout = client.send_timeout = client.receive_timeout = HttpTimeout
119
+ end
108
120
 
109
121
  # Include some rails helpers, text_helper.trucate
110
122
  def text_helper
@@ -117,7 +129,7 @@ class BentoSearch::EbscoHostEngine
117
129
 
118
130
  def search_implementation(args)
119
131
  url = query_url(args)
120
-
132
+
121
133
  results = BentoSearch::Results.new
122
134
  xml, response, exception = nil, nil, nil
123
135
 
@@ -162,6 +174,34 @@ class BentoSearch::EbscoHostEngine
162
174
 
163
175
  end
164
176
 
177
+ # Method to get a single record by "identifier" string, which is really
178
+ # a combined "db:id" string, same string that would be returned by
179
+ # an individual item.identifier
180
+ #
181
+ # Returns an individual BentoSearch::Result, or raises an exception.
182
+ # Can raise BentoSearch::NotFound, BentoSearch::TooManyFound, or
183
+ # any other weird random exception caused by problems fetching (network
184
+ # error etc. Is it bad that we don't wrap these in an expected single
185
+ # exception type? Should we?)
186
+ def get(id)
187
+ # split on first colon only.
188
+ id =~ /^([^:]+)\:(.*)$/
189
+ db = $1 ; an = $2
190
+
191
+ raise ArgumentError.new("EbscoHostEngine#get requires an id with a colon, like `a9h:12345`. Instead, we got #{id}") unless db && an
192
+
193
+ # "AN" search_field is not listed in our search_field_definitions,
194
+ # but it is an internal EBSCOHost search index on 'accession number'
195
+
196
+ results = search(an, :search_field => "AN", :databases => [db])
197
+
198
+ raise (results.error[:exception] || Exception.new) if results.failed?
199
+ raise BentoSearch::NotFound.new("For id: #{id}") if results.length == 0
200
+ raise BentoSearch::TooManyFound.new("For id: #{id}") if results.length > 1
201
+
202
+ return results.first
203
+ end
204
+
165
205
  # pass in nokogiri record xml for the records/rec node.
166
206
  # Returns nil if NO fulltext is avail on ebsco platform,
167
207
  # non-nil if fulltext is available. Non-nil value will
@@ -351,8 +391,8 @@ class BentoSearch::EbscoHostEngine
351
391
  url += "&sort=#{ sort_definitions[args[:sort]][:implementation]}"
352
392
 
353
393
  # Contrary to docs, don't pass these comma-seperated, pass em in seperate
354
- # query params.
355
- configuration.databases.each do |db|
394
+ # query params. args databases overrides config databases.
395
+ (args[:databases] || configuration.databases).each do |db|
356
396
  url += "&db=#{db}"
357
397
  end
358
398
 
@@ -361,11 +401,19 @@ class BentoSearch::EbscoHostEngine
361
401
 
362
402
  # pass in a nokogiri representing an EBSCO <rec> result,
363
403
  # we'll turn it into a BentoSearch::ResultItem.
364
- def item_from_xml(xml_rec)
404
+ def item_from_xml(xml_rec)
365
405
  info = xml_rec.at_xpath("./header/controlInfo")
366
406
 
367
407
  item = BentoSearch::ResultItem.new
368
408
 
409
+ # Get unique id. Think we need both the database code and accession
410
+ # number combined, accession numbers not neccesarily unique accross
411
+ # dbs. We'll combine with a colon.
412
+ db = text_if_present xml_rec.at_xpath("./header/@shortDbName")
413
+ accession = text_if_present xml_rec.at_xpath("./header/@uiTerm")
414
+ item.unique_id = "#{db}:#{accession}" if db && accession
415
+
416
+
369
417
  item.link = get_link(xml_rec)
370
418
 
371
419
  item.issn = text_if_present info.at_xpath("./jinfo/issn")