bento_search 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. data/README.md +16 -2
  2. data/app/helpers/bento_search_helper.rb +4 -10
  3. data/app/item_decorators/bento_search/decorator_base.rb +17 -0
  4. data/app/item_decorators/bento_search/standard_decorator.rb +3 -3
  5. data/app/models/bento_search/link.rb +3 -0
  6. data/app/models/bento_search/result_item.rb +17 -1
  7. data/app/models/bento_search/results.rb +5 -0
  8. data/app/models/bento_search/ris_creator.rb +166 -0
  9. data/app/models/bento_search/search_engine.rb +22 -2
  10. data/app/search_engines/bento_search/ebsco_host_engine.rb +54 -6
  11. data/app/search_engines/bento_search/eds_engine.rb +18 -3
  12. data/app/search_engines/bento_search/google_books_engine.rb +81 -54
  13. data/app/search_engines/bento_search/mock_engine.rb +8 -0
  14. data/app/search_engines/bento_search/primo_engine.rb +6 -2
  15. data/app/search_engines/bento_search/scopus_engine.rb +4 -0
  16. data/app/search_engines/bento_search/summon_engine.rb +26 -4
  17. data/app/search_engines/bento_search/worldcat_sru_dc_engine.rb +15 -0
  18. data/app/views/bento_search/_link.html.erb +3 -2
  19. data/lib/bento_search/version.rb +1 -1
  20. data/test/dummy/app/views/_test_custom_item_partial.html.erb +9 -0
  21. data/test/dummy/log/development.log +1 -0
  22. data/test/dummy/log/test.log +84981 -0
  23. data/test/helper/bento_search_helper_test.rb +11 -1
  24. data/test/unit/ebsco_host_engine_test.rb +59 -0
  25. data/test/unit/eds_engine_test.rb +10 -0
  26. data/test/unit/google_books_engine_test.rb +20 -0
  27. data/test/unit/primo_engine_test.rb +1 -1
  28. data/test/unit/ris_creator_test.rb +159 -0
  29. data/test/unit/scopus_engine_test.rb +2 -0
  30. data/test/unit/search_engine_test.rb +37 -0
  31. data/test/unit/summon_engine_test.rb +21 -0
  32. data/test/unit/worldcat_sru_dc_engine_test.rb +151 -0
  33. data/test/vcr_cassettes/ebscohost/live__get_identifier__round_trip.yml +1324 -0
  34. data/test/vcr_cassettes/ebscohost/live_get_id__on_bad_db_raises.yml +45 -0
  35. data/test/vcr_cassettes/ebscohost/live_get_id__with_no_results_raises.yml +45 -0
  36. data/test/vcr_cassettes/gbs/live_get_id_.yml +415 -0
  37. data/test/vcr_cassettes/gbs/live_get_id__with_not_found_id.yml +41 -0
  38. data/test/vcr_cassettes/summon/live__get_id_.yml +313 -0
  39. data/test/vcr_cassettes/summon/live_get_id__on_non-existing_id.yml +54 -0
  40. data/test/vcr_cassettes/worldcat_sru_dc/live_get_id_.yml +769 -0
  41. data/test/vcr_cassettes/worldcat_sru_dc/live_get_id__for_bad_id.yml +68 -0
  42. data/test/view/link_test.rb +21 -0
  43. metadata +31 -6
  44. data/test/vcr_cassettes/max_out_pagination.yml +0 -155
data/README.md CHANGED
@@ -292,7 +292,7 @@ do not provide additional links by default, custom local Decorators would
292
292
  be used to add them. See wiki for more info on decorators, and BentoSearch::Link
293
293
  for fields.
294
294
 
295
- ## OpenURL and metadata
295
+ ### OpenURL and metadata
296
296
 
297
297
  Academic library uses often need openurl links from scholarly citations. One of
298
298
  the design goals of bento_search is to produce standardized normalized BentoSearch::ResultItem
@@ -309,7 +309,21 @@ can be solved.
309
309
 
310
310
  See `./app/item_decorators/bento_search/openurl_add_other_link.rb` for an example
311
311
  of using item decorators to add a link to your openurl resover to an item when
312
- displayed.
312
+ displayed.
313
+
314
+ ### Exporting (eg as RIS) and get by unique_id
315
+
316
+ A class is included to convert an individual BentoSearch::ResultItem to
317
+ the RIS format, suitable for import into EndNote, Refworks, etc.
318
+
319
+ ~~~ruby
320
+ ris_data = RISCreator.new( bento_item ).export
321
+ ~~~
322
+
323
+ Accomodating actual exports into the transactional flow of a web app can be
324
+ tricky, and often requires use of the `result_item#unique_id` and
325
+ `engine.get( unique_id )` features. See the wiki at
326
+
313
327
 
314
328
  ## Planned Features
315
329
 
@@ -60,12 +60,10 @@ module BentoSearchHelper
60
60
  end +
61
61
  content_tag(:div,
62
62
  :class => "bento_search_ajax_loading",
63
- :style => "display:none") do
64
-
63
+ :style => "display:none") do
65
64
  image_tag("bento_search/large_loader.gif",
66
- :alt => I18n.translate("bento_search.ajax_loading"),
65
+ :alt => I18n.translate("bento_search.ajax_loading")
67
66
  )
68
-
69
67
  end
70
68
  end
71
69
  else
@@ -88,13 +86,9 @@ module BentoSearchHelper
88
86
 
89
87
  # Wrap a ResultItem in a decorator! For now hard-coded to
90
88
  # BentoSearch::StandardDecorator
91
- def bento_decorate(result_item)
92
- # What decorator class? If specified as string in #decorator,
93
- # look it up as a class object, else default.
94
- decorator_class = result_item.decorator.try {|name| BentoSearch::Util.constantize(name) } || BentoSearch::StandardDecorator
95
-
89
+ def bento_decorate(result_item)
96
90
  # in a helper method, 'self' is a view_context already I think?
97
- decorated = decorator_class.new(result_item, self)
91
+ decorated = BentoSearch::DecoratorBase.decorate(result_item, self)
98
92
  yield(decorated) if block_given?
99
93
  return decorated
100
94
  end
@@ -49,5 +49,22 @@ module BentoSearch
49
49
  ERB::Util.html_escape(*args, &block)
50
50
  end
51
51
 
52
+ # Applies decorator to item and returns decorated item.
53
+ # uses standard logic to look up which decorator to apply or
54
+ # applies default one. The point of this method is just that
55
+ # standard logic.
56
+ #
57
+ # Need to pass a Rails ActionView::Context in, to use to
58
+ # initialize decorator. In Rails, in most places you can
59
+ # get one of those from #view_context. In helpers/views
60
+ # you can also use `self`.
61
+ def self.decorate(item, view_context)
62
+ # What decorator class? If specified as string in #decorator,
63
+ # look it up as a class object, else default.
64
+ decorator_class = item.decorator.try {|name| BentoSearch::Util.constantize(name) } || BentoSearch::StandardDecorator
65
+
66
+ return decorator_class.new(item, view_context)
67
+ end
68
+
52
69
  end
53
70
  end
@@ -16,11 +16,11 @@ module BentoSearch
16
16
 
17
17
  # How to display a BentoSearch::Author object as a name
18
18
  def author_display(author)
19
- if (author.first && author.last)
19
+ if (author.first.present? && author.last.present?)
20
20
  "#{author.last}, #{author.first.slice(0,1)}"
21
- elsif author.display
21
+ elsif author.display.present?
22
22
  author.display
23
- elsif author.last
23
+ elsif author.last.present?
24
24
  author.last
25
25
  else
26
26
  nil
@@ -18,6 +18,9 @@ module BentoSearch
18
18
  # contexts too.
19
19
  attr_accessor :style_classes
20
20
 
21
+ # Suggested `target` attribute to render link with as html <a>
22
+ attr_accessor :target
23
+
21
24
  def initialize(hash = {})
22
25
  self.style_classes = []
23
26
 
@@ -26,6 +26,11 @@ module BentoSearch
26
26
  self.custom_data ||= {}
27
27
  end
28
28
 
29
+ # internal unique id for the document, from the particular
30
+ # search service it came from. May be alphanumeric. May be nil
31
+ # for engines that don't support it.
32
+ attr_accessor :unique_id
33
+
29
34
  # If set to true, item will refuse to generate an openurl,
30
35
  # returning nil from #to_openurl or #openurl_kev
31
36
  attr_accessor :openurl_disabled
@@ -75,7 +80,6 @@ module BentoSearch
75
80
  # * WebPage
76
81
  # * VideoObject
77
82
  # * AudioObject
78
- # * SoftwareApplication
79
83
  #
80
84
  #
81
85
  #
@@ -185,6 +189,18 @@ module BentoSearch
185
189
  # qualified name of a decorator class. Can be nil for default.
186
190
  attr_accessor :decorator
187
191
 
192
+ # Copied over from engine configuration :for_display key
193
+ # by BentoSearch#search wrapper, here as a convenience t
194
+ # parameterize logic in decorators or other presentational logic, based
195
+ # on configuration, in places where logic has access to an item but
196
+ # not the overall Results obj anymore.
197
+ #
198
+ # TODO: Consider, should we just copy over the whole Results
199
+ # into a backpointing reference instead? And user cover-methods
200
+ # for it? Nice thing about the configuration has instead is it's
201
+ # easily serializable, it's just data.
202
+ attr_accessor :display_configuration
203
+ attr_accessor :engine_id
188
204
 
189
205
  end
190
206
  end
@@ -59,5 +59,10 @@ module BentoSearch
59
59
  def failed?
60
60
  ! error.nil?
61
61
  end
62
+
63
+ def inspect
64
+ "<BentoSearch::Results #{super} #{'FAILED' if self.failed?}>"
65
+ end
66
+
62
67
  end
63
68
  end
@@ -0,0 +1,166 @@
1
+ module BentoSearch
2
+ #
3
+ # Export a BentoSearch::ResultItem in RIS format, as a file
4
+ # to import into EndNote etc, or in a callback for Refworks export, etc.
5
+ #
6
+ # RISCreator.new( result_item ).export
7
+ #
8
+ # Note: We assume input and output in UTF8. The RIS spec kind of says
9
+ # it has to be ascii only, but most actual software seems to be able to do
10
+ # UTF8.
11
+ #
12
+ # Note: If you want your decorator to be taken into account in links
13
+ # or other data, you have to make sure it's applied. If you got result_item
14
+ # from SearchEngine#get, you should apply decorators yourself:
15
+ #
16
+ # RISCreator.new( BentoSearch::StandardDecorator.decorate(result_item) ).export
17
+ #
18
+ #
19
+ # Best spec/docs for RIS format seems to be at
20
+ # http://www.refman.com/support/risformat_intro.asp
21
+ # Download zip file there, pay attention to excel spreadsheet
22
+ # as well as PDF overview.
23
+ #
24
+ # But note this 'spec' is often ignored/violated, even by the vendors
25
+ # who wrote it. Wikipedia at http://en.wikipedia.org/wiki/RIS_(file_format)#Tags
26
+ # contains some additional tags not mentioned in 'spec'.
27
+ class RISCreator
28
+ def initialize(i)
29
+ @item = i
30
+ @ris_format = translate_ris_format
31
+ end
32
+
33
+ def export
34
+ out = "".force_encoding("UTF-8")
35
+
36
+ out << tag_format("TY", @ris_format)
37
+
38
+ out << tag_format("TI", @item.title)
39
+
40
+ @item.authors.each do |author|
41
+ out << tag_format("AU", format_author_name(author))
42
+ end
43
+
44
+ out << tag_format("PY", @item.year)
45
+ out << tag_format("DA", format_date(@item.publication_date))
46
+
47
+ out << tag_format("LA", @item.language_str)
48
+
49
+ out << tag_format("VL", @item.volume)
50
+ out << tag_format("IS", @item.issue)
51
+ out << tag_format("SP", @item.start_page)
52
+ out << tag_format("EP", @item.end_page)
53
+
54
+ out << tag_format("T2", @item.source_title)
55
+
56
+ # ISSN and ISBN both share SN, sigh.
57
+ out << tag_format("SN", @item.issn)
58
+ out << tag_format("SN", @item.isbn)
59
+ out << tag_format("DO", @item.doi)
60
+
61
+ out << tag_format("PB", @item.publisher)
62
+
63
+ out << tag_format("AB", @item.abstract)
64
+
65
+ # include main link and any other links?
66
+ out << tag_format("UR", @item.link)
67
+ @item.other_links.each do |link|
68
+ out << tag_format("UR", link.url)
69
+ end
70
+
71
+ # end with blank lines, so multiple ones can be concatenated for
72
+ # a file.
73
+ out << "\r\nER - \r\n\r\n"
74
+ end
75
+
76
+ @@format_map = {
77
+ # bento_search doesn't distinguish between journal, magazine, and newspaper,
78
+ # RIS does, sorry, we map all to journal article.
79
+ "Article" => "JOUR",
80
+ "Book" => "BOOK",
81
+ "Movie" => "MPCT",
82
+ "MusicRecording" => "MUSIC",
83
+ #"Photograph" => "GEN",
84
+ "SoftwareApplication" => "COMP",
85
+ "WebPage" => "ELEC",
86
+ "VideoObject" => "VIDEO",
87
+ "AudioObject" => "SOUND",
88
+ :serial => "SER",
89
+ :dissertation => "THES",
90
+ :conference_paper => "CPAPER",
91
+ :conference_proceedings => "CONF",
92
+ :report => "RPRT",
93
+ :book_item => "CHAP"
94
+ }
95
+
96
+ # based on current @item.format, output
97
+ # appropriate RIS format string
98
+ def translate_ris_format
99
+ # default "GEN"=generic if unknown
100
+ @@format_map[@item.format] || "GEN"
101
+ end
102
+
103
+ # Formats refworks tag/value line and returns it.
104
+ #
105
+ # Returns empty string if you pass in an empty value though.
106
+ #
107
+ # "Each six-character tag must be in the following format:
108
+ # "<upper-case letter><upper-case letter or number><space><space><dash><space>"
109
+ #
110
+ # "Each tag and its contents must be on a separate line,
111
+ # preceded by a "carriage return/line feed" (ANSI 13 10)."
112
+ #
113
+ # "Note, however, that the asterisk (character 42)
114
+ # is not allowed in the author, keywords or periodical name fields."
115
+ #
116
+ # The spec also seems to say ascii-only, but I don't think that's true
117
+ # for actually existing software, we do utf-8.
118
+ #
119
+ # Refworks MAY require unicode composed normalization if it accepts utf8
120
+ # at all. but not doing that yet. http://bibwild.wordpress.com/2010/04/28/refworks-problems-importing-diacritics/
121
+ def tag_format(tag, value)
122
+ return "" if value.blank?
123
+
124
+ raise ArgumentError.new("Illegal RIS tag") unless tag =~ /[A-Z][A-Z0-9]/
125
+
126
+ # "T2" seems to be the only "journal name field", which is
127
+ # mentioned along with these others as not being allowed to contain
128
+ # asterisk.
129
+ if ["AU", "A2", "A3", "A4", "KW", "T2"].include? tag
130
+ value = value.gsub("*", " ")
131
+ end
132
+
133
+ return "\r\n#{tag} - #{value}"
134
+ end
135
+
136
+ # Take a ruby Date and translate to RIS date format
137
+ # "YYYY/MM/DD/other info"
138
+ #
139
+ # returns nil if input is nil.
140
+ def format_date(d)
141
+ return nil if d.nil?
142
+
143
+ return d.strftime("%Y/%m/%d")
144
+ end
145
+
146
+ # RIS wants `Last, First M.`, we'll do what we can.
147
+ def format_author_name(author)
148
+ if author.last.present? && author.first.present?
149
+ str = "#{author.last}, #{author.first}"
150
+ if author.middle.present?
151
+ middle = author.middle
152
+ middle += "." if middle.length == 1
153
+ str += " #{middle}"
154
+ end
155
+ return str
156
+ elsif author.display.present?
157
+ return author.display
158
+ elsif author.last.present?
159
+ return author.last?
160
+ else
161
+ return nil
162
+ end
163
+ end
164
+
165
+ end
166
+ end
@@ -8,6 +8,12 @@ require 'multi_json'
8
8
  require 'nokogiri'
9
9
 
10
10
  module BentoSearch
11
+ # Usually raised by #get on an engine, when result for specified identifier
12
+ # can't be found.
13
+ class NotFound < Exception ; end
14
+ # Usually raised by #get when identifier results in more than one record.
15
+ class TooManyFound < Exception ; end
16
+
11
17
  # Module mix-in for bento_search search engines.
12
18
  #
13
19
  # ==Using a SearchEngine
@@ -219,7 +225,14 @@ module BentoSearch
219
225
  results.timing = (Time.now - start_t)
220
226
 
221
227
  results.display_configuration = configuration.for_display
222
- results.each {|item| item.decorator = configuration.lookup!("for_display.decorator") }
228
+ results.each do |item|
229
+ # We copy some configuraton info over to each Item, as a convenience
230
+ # to display logic that may have decide what to do given only an item,
231
+ # and may want to parameterize based on configuration.
232
+ item.engine_id = results.engine_id
233
+ item.decorator = configuration.lookup!("for_display.decorator")
234
+ item.display_configuration = configuration.for_display
235
+ end
223
236
 
224
237
  return results
225
238
  rescue *auto_rescue_exceptions => e
@@ -235,6 +248,13 @@ module BentoSearch
235
248
  failed = BentoSearch::Results.new
236
249
  failed.error ||= {}
237
250
  failed.error[:exception] = e
251
+
252
+ failed.search_args = arguments
253
+ failed.engine_id = configuration.id
254
+ failed.display_configuration = configuration.for_display
255
+ failed.timing = (Time.now - start_t)
256
+
257
+
238
258
  return failed
239
259
  end
240
260
 
@@ -355,7 +375,7 @@ module BentoSearch
355
375
  # to be something diff in production and dev?
356
376
  #
357
377
  # This default list is probably useful already, but individual
358
- # engines can override if it's convenient for their own error
378
+ # engines can override if it's convenient for their own errorau
359
379
  # handling.
360
380
  def auto_rescue_exceptions
361
381
  [TimeoutError, HTTPClient::TimeoutError,
@@ -28,6 +28,8 @@ require 'httpclient'
28
28
  # one or both of pubyear_start and pubyear_end
29
29
  # #to_i will be called on it, so can be string.
30
30
  # .search(:query => "foo", :pubyear_start => 2000)
31
+ # [:databases] List of licensed EBSCO dbs to search, can override
32
+ # list set in config databases, just for this search.
31
33
  #
32
34
  # == Custom response data
33
35
  #
@@ -103,8 +105,18 @@ require 'httpclient'
103
105
  class BentoSearch::EbscoHostEngine
104
106
  include BentoSearch::SearchEngine
105
107
 
106
- extend HTTPClientPatch::IncludeClient
107
- include_http_client
108
+ # Can't change http timeout in config, because we keep an http
109
+ # client at class-wide level, and config is not class-wide.
110
+ # Change this 'constant' if you want to change it, I guess.
111
+ #
112
+ # In some tests we did, 5.2s was 95th percentile slowest, but in
113
+ # actual percentage 5.2s is still timing out way too many requests,
114
+ # let's try 6.3, why not.
115
+ HttpTimeout = 6.3
116
+ extend HTTPClientPatch::IncludeClient
117
+ include_http_client do |client|
118
+ client.connect_timeout = client.send_timeout = client.receive_timeout = HttpTimeout
119
+ end
108
120
 
109
121
  # Include some rails helpers, text_helper.trucate
110
122
  def text_helper
@@ -117,7 +129,7 @@ class BentoSearch::EbscoHostEngine
117
129
 
118
130
  def search_implementation(args)
119
131
  url = query_url(args)
120
-
132
+
121
133
  results = BentoSearch::Results.new
122
134
  xml, response, exception = nil, nil, nil
123
135
 
@@ -162,6 +174,34 @@ class BentoSearch::EbscoHostEngine
162
174
 
163
175
  end
164
176
 
177
+ # Method to get a single record by "identifier" string, which is really
178
+ # a combined "db:id" string, same string that would be returned by
179
+ # an individual item.identifier
180
+ #
181
+ # Returns an individual BentoSearch::Result, or raises an exception.
182
+ # Can raise BentoSearch::NotFound, BentoSearch::TooManyFound, or
183
+ # any other weird random exception caused by problems fetching (network
184
+ # error etc. Is it bad that we don't wrap these in an expected single
185
+ # exception type? Should we?)
186
+ def get(id)
187
+ # split on first colon only.
188
+ id =~ /^([^:]+)\:(.*)$/
189
+ db = $1 ; an = $2
190
+
191
+ raise ArgumentError.new("EbscoHostEngine#get requires an id with a colon, like `a9h:12345`. Instead, we got #{id}") unless db && an
192
+
193
+ # "AN" search_field is not listed in our search_field_definitions,
194
+ # but it is an internal EBSCOHost search index on 'accession number'
195
+
196
+ results = search(an, :search_field => "AN", :databases => [db])
197
+
198
+ raise (results.error[:exception] || Exception.new) if results.failed?
199
+ raise BentoSearch::NotFound.new("For id: #{id}") if results.length == 0
200
+ raise BentoSearch::TooManyFound.new("For id: #{id}") if results.length > 1
201
+
202
+ return results.first
203
+ end
204
+
165
205
  # pass in nokogiri record xml for the records/rec node.
166
206
  # Returns nil if NO fulltext is avail on ebsco platform,
167
207
  # non-nil if fulltext is available. Non-nil value will
@@ -351,8 +391,8 @@ class BentoSearch::EbscoHostEngine
351
391
  url += "&sort=#{ sort_definitions[args[:sort]][:implementation]}"
352
392
 
353
393
  # Contrary to docs, don't pass these comma-seperated, pass em in seperate
354
- # query params.
355
- configuration.databases.each do |db|
394
+ # query params. args databases overrides config databases.
395
+ (args[:databases] || configuration.databases).each do |db|
356
396
  url += "&db=#{db}"
357
397
  end
358
398
 
@@ -361,11 +401,19 @@ class BentoSearch::EbscoHostEngine
361
401
 
362
402
  # pass in a nokogiri representing an EBSCO <rec> result,
363
403
  # we'll turn it into a BentoSearch::ResultItem.
364
- def item_from_xml(xml_rec)
404
+ def item_from_xml(xml_rec)
365
405
  info = xml_rec.at_xpath("./header/controlInfo")
366
406
 
367
407
  item = BentoSearch::ResultItem.new
368
408
 
409
+ # Get unique id. Think we need both the database code and accession
410
+ # number combined, accession numbers not neccesarily unique accross
411
+ # dbs. We'll combine with a colon.
412
+ db = text_if_present xml_rec.at_xpath("./header/@shortDbName")
413
+ accession = text_if_present xml_rec.at_xpath("./header/@uiTerm")
414
+ item.unique_id = "#{db}:#{accession}" if db && accession
415
+
416
+
369
417
  item.link = get_link(xml_rec)
370
418
 
371
419
  item.issn = text_if_present info.at_xpath("./jinfo/issn")