bento_search 1.4.4 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +41 -19
  3. data/app/models/bento_search/result_item.rb +1 -1
  4. data/app/models/bento_search/search_engine.rb +36 -3
  5. data/app/models/bento_search/search_engine/capabilities.rb +14 -0
  6. data/app/search_engines/bento_search/doaj_articles_engine.rb +279 -0
  7. data/app/search_engines/bento_search/ebsco_host_engine.rb +27 -7
  8. data/app/search_engines/bento_search/google_books_engine.rb +8 -1
  9. data/app/search_engines/bento_search/mock_engine.rb +8 -2
  10. data/app/search_engines/bento_search/scopus_engine.rb +27 -8
  11. data/app/search_engines/bento_search/summon_engine.rb +1 -1
  12. data/app/search_engines/bento_search/worldcat_sru_dc_engine.rb +22 -3
  13. data/config/locales/en.yml +5 -2
  14. data/lib/bento_search/version.rb +1 -1
  15. data/test/dummy/config/environments/development.rb +0 -4
  16. data/test/dummy/config/environments/production.rb +0 -4
  17. data/test/search_engines/doaj_articles_engine_test.rb +200 -0
  18. data/test/search_engines/ebsco_host_engine_test.rb +38 -0
  19. data/test/search_engines/google_books_engine_test.rb +18 -2
  20. data/test/search_engines/scopus_engine_test.rb +45 -1
  21. data/test/search_engines/search_engine_base_test.rb +59 -0
  22. data/test/search_engines/worldcat_sru_dc_engine_test.rb +17 -0
  23. data/test/vcr_cassettes/doaj_articles/basic_search.yml +97 -0
  24. data/test/vcr_cassettes/doaj_articles/catches_errors.yml +42 -0
  25. data/test/vcr_cassettes/doaj_articles/complex_multi-field.yml +67 -0
  26. data/test/vcr_cassettes/doaj_articles/live__get_identifier__round_trip.yml +387 -0
  27. data/test/vcr_cassettes/doaj_articles/live_get_identifier__raises_on_no_results.yml +41 -0
  28. data/test/vcr_cassettes/doaj_articles/multifield_author-title.yml +79 -0
  29. data/test/vcr_cassettes/doaj_articles/pagination.yml +691 -0
  30. data/test/vcr_cassettes/ebscohost/affiliation_search.yml +929 -0
  31. data/test/vcr_cassettes/ebscohost/multi-field_author_title.yml +122 -0
  32. data/test/vcr_cassettes/ebscohost/multi-field_citation_numbers.yml +122 -0
  33. data/test/vcr_cassettes/scopus/multi-field_search.yml +55 -0
  34. data/test/vcr_cassettes/scopus/multi-fielded_citation_details_search.yml +86 -0
  35. data/test/vcr_cassettes/worldcat_sru_dc/multi_field_search.yml +1839 -0
  36. metadata +31 -2
@@ -358,12 +358,11 @@ class BentoSearch::EbscoHostEngine
358
358
  url =
359
359
  "#{configuration.base_url}/Search?prof=#{configuration.profile_id}&pwd=#{configuration.profile_password}"
360
360
 
361
- query = ebsco_query_prepare args[:query]
362
-
363
-
364
- # wrap in (FI $query) if fielded search
365
- if args[:search_field]
366
- query = "(#{args[:search_field]} #{query})"
361
+ query = if args[:query].kind_of?(Hash)
362
+ # multi-field query
363
+ args[:query].collect {|field, query| fielded_query(query, field)}.join(" AND ")
364
+ else
365
+ fielded_query(args[:query], args[:search_field])
367
366
  end
368
367
 
369
368
  # peer-reviewed only?
@@ -401,6 +400,17 @@ class BentoSearch::EbscoHostEngine
401
400
  return url
402
401
  end
403
402
 
403
+ def fielded_query(query, field = nil)
404
+ output = ebsco_query_prepare(query)
405
+
406
+ # wrap in (FI $query) if fielded search
407
+ if field.present?
408
+ output= "(#{field} #{query})"
409
+ end
410
+
411
+ return output
412
+ end
413
+
404
414
  # pass in a nokogiri representing an EBSCO <rec> result,
405
415
  # we'll turn it into a BentoSearch::ResultItem.
406
416
  def item_from_xml(xml_rec)
@@ -547,10 +557,20 @@ class BentoSearch::EbscoHostEngine
547
557
  "TI" => {:semantic => :title},
548
558
  "SU" => {:semantic => :subject},
549
559
  "IS" => {:semantic => :issn},
550
- "IB" => {:semantic => :isbn}
560
+ "IB" => {:semantic => :isbn},
561
+ "SO" => {:semantic => :source_title},
562
+ # These may not be defined in all databases....
563
+ "VI" => {:semantic => :volume},
564
+ "IP" => {:semantic => :issue},
565
+ "SP" => {:semantic => :start_page},
566
+ "AF" => {:semantic => :author_affiliation}
551
567
  }
552
568
  end
553
569
 
570
+ def multi_field_search?
571
+ true
572
+ end
573
+
554
574
  def max_per_page
555
575
  # Actually only '50' if you ask for 'full' records, but I don't think
556
576
  # we need to do that ever, that's actually getting fulltext back!
@@ -205,6 +205,10 @@ module BentoSearch
205
205
  }
206
206
  end
207
207
 
208
+ def multi_field_search?
209
+ true
210
+ end
211
+
208
212
  protected
209
213
 
210
214
 
@@ -217,7 +221,10 @@ module BentoSearch
217
221
  # turns it into a URL for Google API. Factored out to make testing
218
222
  # possible.
219
223
  def args_to_search_url(arguments)
220
- query = if arguments[:search_field]
224
+ query = if arguments[:query].kind_of? Hash
225
+ #multi-field
226
+ arguments[:query].collect {|field, query| fielded_query(query, field)}.join(" ")
227
+ elsif arguments[:search_field]
221
228
  fielded_query(arguments[:query], arguments[:search_field])
222
229
  else
223
230
  arguments[:query]
@@ -9,6 +9,7 @@
9
9
  # specified per_page, or 10)
10
10
  # [:total_items] total_items to report
11
11
  # [:sort_definitions] hash for #sort_definitions
12
+ # [:search_field_definitions] hash for #search_field_definitions
12
13
  # [:link] link to give to each item in results
13
14
  # [:error] set to an error value hash and results returned
14
15
  # will all be failed? with that error hash.
@@ -16,6 +17,7 @@
16
17
  # to be caught by BentoSearch::SearchEngine wrapper possibly.
17
18
  # [:timing] in seconds, fill out the results as if they took
18
19
  # this long.
20
+ # [:supports_multi_search] true or false
19
21
  class BentoSearch::MockEngine
20
22
  include BentoSearch::SearchEngine
21
23
 
@@ -58,11 +60,15 @@ class BentoSearch::MockEngine
58
60
  end
59
61
 
60
62
  def sort_definitions
61
- configuration.sort_definitions || {}
63
+ configuration.sort_definitions.try(:to_hash).try(:stringify_keys) || {}
62
64
  end
63
65
 
64
66
  def search_field_definitions
65
- configuration.search_field_definitions || {}
67
+ configuration.search_field_definitions.try(:to_hash).try(:stringify_keys) || {}
68
+ end
69
+
70
+ def multi_field_search?
71
+ configuration.multi_field_search || false
66
72
  end
67
73
 
68
74
  end
@@ -27,11 +27,14 @@ module BentoSearch
27
27
  # apparently by emailing directly to dave.santucci at elsevier dot com.
28
28
  #
29
29
  # Scopus API Docs:
30
- # * http://www.developers.elsevier.com/devcms/content-api-search-request
31
- # * http://www.developers.elsevier.com/devcms/content/search-fields-overview
30
+ # * http://api.elsevier.com/documentation/SCOPUSSearchAPI.wadl
31
+ # * http://api.elsevier.com/documentation/search/SCOPUSSearchViews.htm
32
+ #
33
+ # Query syntax and search fields:
34
+ # * http://api.elsevier.com/documentation/search/SCOPUSSearchTips.htm
32
35
  #
33
36
  # Some more docs on response elements and query elements:
34
- # * http://api.elsevier.com/content/search/#d0n14606
37
+ # * http://api.elsevier.com/content/search/#d0n14606
35
38
  #
36
39
  # Other API's in the suite not being used by this code at present:
37
40
  # * http://www.developers.elsevier.com/devcms/content-api-retrieval-request
@@ -204,7 +207,14 @@ module BentoSearch
204
207
  # controlled and author-assigned keywords
205
208
  "KEY" => {:semantic => :subject},
206
209
  "ISBN" => {:semantic => :isbn},
207
- "ISSN" => {:semantic => :issn},
210
+ "ISSN" => {:semantic => :issn},
211
+ "VOLUME" => {:semantic => :volume},
212
+ "ISSUE" => {:semantic => :issue},
213
+ "PAGEFIRST" => {:semantic => :start_page},
214
+ # Should we use SRCTITLE instead? I think exact match might be better?
215
+ "EXACTSRCTITLE" => {:semantic => :source_title},
216
+ "DOI" => {:semantic => :doi},
217
+ "PUBYEAR" => {:semantic => :year}
208
218
  }
209
219
  end
210
220
 
@@ -222,6 +232,9 @@ module BentoSearch
222
232
  }
223
233
  end
224
234
 
235
+ def multi_field_search?
236
+ true
237
+ end
225
238
 
226
239
  protected
227
240
 
@@ -282,10 +295,12 @@ module BentoSearch
282
295
 
283
296
 
284
297
  def scopus_url(args)
285
- query = escape_query args[:query]
286
-
287
- if args[:search_field]
288
- query = "#{args[:search_field]}(#{query})"
298
+ query = if args[:query].kind_of? Hash
299
+ args[:query].collect {|field, query| fielded_query(query,field)}.join(" AND ")
300
+ elsif args[:search_field]
301
+ fielded_query(args[:query], args[:search_field])
302
+ else
303
+ escape_query args[:query]
289
304
  end
290
305
 
291
306
  query = "#{configuration.base_url.chomp("/")}/content/search/index:#{configuration.cluster}?query=#{CGI.escape(query)}"
@@ -304,6 +319,10 @@ module BentoSearch
304
319
 
305
320
  return query
306
321
  end
322
+
323
+ def fielded_query(query, field)
324
+ "#{field}(#{escape_query query})"
325
+ end
307
326
 
308
327
  end
309
328
  end
@@ -471,7 +471,7 @@ class BentoSearch::SummonEngine
471
471
  "ISBN" => {:semantic => :isbn},
472
472
  "ISSN" => {:semantic => :issn},
473
473
  "OCLC" => {:semantic => :oclcnum},
474
- "PublicationSeriesTitle" => {:semantic => :publication_title }
474
+ "PublicationSeriesTitle" => {:semantic => :source_title }
475
475
  }
476
476
  end
477
477
 
@@ -263,12 +263,25 @@ class BentoSearch::WorldcatSruDcEngine
263
263
  #
264
264
  # returns CQL that is NOT uri escaped yet.
265
265
  def construct_cql_query(args)
266
+ if args[:query].kind_of?(Hash)
267
+ # multi-field
268
+ args[:query].collect {|field, query| fielded_cql_query(query, field)}.join(" AND ")
269
+ else
270
+ fielded_cql_query(args[:query], args[:search_field] || "srw.kw")
271
+ end
272
+ end
273
+
274
+ # construct valid CQL for the API's "query" param, from search
275
+ # args. Tricky because we need to split terms/phrases ourselves
276
+ #
277
+ # returns CQL that is NOT uri escaped yet.
278
+ def fielded_cql_query(query, field = nil)
266
279
  # default is srw.kw, Keyword anywhere.
267
- field = args[:search_field] || "srw.kw"
280
+ field ||= "srw.kw"
268
281
 
269
282
  # We need to split terms and phrases, so we can formulate
270
283
  # CQL with seperate clauses for each, bah.
271
- tokens = args[:query].split(%r{\s|("[^"]+")}).delete_if {|a| a.blank?}
284
+ tokens = query.split(%r{\s|("[^"]+")}).delete_if {|a| a.blank?}
272
285
 
273
286
 
274
287
 
@@ -311,7 +324,9 @@ class BentoSearch::WorldcatSruDcEngine
311
324
  "srw.au" => {:semantic => :author},
312
325
  "srw.su" => {:semantic => :subject},
313
326
  "srw.bn" => {:semantic => :isbn},
314
- # Oddly no ISSN index, all we get is 'number'
327
+ "srw.in" => {:semantic => :issn},
328
+ "srw.dn" => {:semantic => :lccn},
329
+ # generic 'number', probably not useful
315
330
  "srw.sn" => {:semantic => :number},
316
331
  "srw.no" => {:semantic => :oclcnum}
317
332
  }
@@ -332,5 +347,9 @@ class BentoSearch::WorldcatSruDcEngine
332
347
  :auth => false
333
348
  }
334
349
  end
350
+
351
+ def multi_field_search?
352
+ true
353
+ end
335
354
 
336
355
  end
@@ -46,6 +46,9 @@ en:
46
46
  isbn: "ISBN"
47
47
  oclcnum: "OCLCnum"
48
48
  lccn: "LCCN"
49
+ doi: "DOI"
49
50
  publisher: "Publisher"
50
- publication_title: "Publication"
51
-
51
+ source_title: "Publication"
52
+ volume: "Volume"
53
+ issue: "Issue"
54
+ start_page: "Start Page"
@@ -1,3 +1,3 @@
1
1
  module BentoSearch
2
- VERSION = "1.4.4"
2
+ VERSION = "1.5.0"
3
3
  end
@@ -22,10 +22,6 @@ Dummy::Application.configure do
22
22
  # Only use best-standards-support built into browsers
23
23
  config.action_dispatch.best_standards_support = :builtin
24
24
 
25
- # Log the query plan for queries taking more than this (works
26
- # with SQLite, MySQL, and PostgreSQL)
27
- config.active_record.auto_explain_threshold_in_seconds = 0.5
28
-
29
25
  # Do not compress assets
30
26
  config.assets.compress = false
31
27
 
@@ -60,8 +60,4 @@ Dummy::Application.configure do
60
60
 
61
61
  # Send deprecation notices to registered listeners
62
62
  config.active_support.deprecation = :notify
63
-
64
- # Log the query plan for queries taking more than this (works
65
- # with SQLite, MySQL, and PostgreSQL)
66
- # config.active_record.auto_explain_threshold_in_seconds = 0.5
67
63
  end
@@ -0,0 +1,200 @@
1
+ require 'test_helper'
2
+ require 'uri'
3
+ require 'cgi'
4
+
5
+ class DoajArticlesEngineTest < ActiveSupport::TestCase
6
+ extend TestWithCassette
7
+
8
+ def setup
9
+ @engine = BentoSearch::DoajArticlesEngine.new
10
+ # tell it not to send our bad API key
11
+ end
12
+
13
+ test_with_cassette("basic search", :doaj_articles) do
14
+ results = @engine.search("Breast cancer patients with lobular cancer more commonly have a father than a mother diagnosed with cancer")
15
+
16
+ assert_kind_of BentoSearch::Results, results
17
+ assert ! results.failed?
18
+
19
+ assert_not_nil results.total_items
20
+ assert_equal 0, results.start
21
+ assert_equal 10, results.per_page
22
+
23
+ assert_not_empty results
24
+
25
+ first = results.first
26
+
27
+ assert_present first.unique_id
28
+ assert_equal "Article", first.format
29
+
30
+ assert_present first.title
31
+
32
+ assert_not_empty first.authors
33
+ assert_not_empty first.authors.first.display
34
+
35
+
36
+ assert_present first.source_title
37
+ assert_present first.issn
38
+ assert_present first.volume
39
+ assert_present first.issue
40
+
41
+ assert_present first.start_page
42
+
43
+ assert_present first.year
44
+ assert_present first.publication_date
45
+
46
+ assert_present first.abstract
47
+ assert first.abstract.html_safe?
48
+
49
+ assert_present first.link
50
+ assert first.link_is_fulltext?
51
+ end
52
+
53
+ test_with_cassette("pagination", :doaj_articles) do
54
+ results = @engine.search("cancer", :per_page => 20, :page => 3)
55
+
56
+ assert ! results.failed?
57
+
58
+ assert_equal 20, results.length
59
+
60
+ assert_equal 20, results.size
61
+ assert_equal 40, results.start
62
+ assert_equal 20, results.per_page
63
+ end
64
+
65
+ test_with_cassette("catches errors", :doaj_articles) do
66
+ @engine.base_url = "https://doaj.org/api/v1/search/articles_bad_url/"
67
+
68
+ results = @engine.search("something")
69
+
70
+ assert results.failed?
71
+ assert_kind_of Hash, results.error
72
+ assert_present results.error[:message]
73
+ assert_present results.error[:status]
74
+ end
75
+
76
+ test_with_cassette("live #get(identifier) round trip", :doaj_articles) do
77
+ results = @engine.search("cancer")
78
+
79
+ assert (! results.failed?)
80
+
81
+ item = @engine.get( results.first.unique_id )
82
+
83
+ assert_not_nil item
84
+ assert_kind_of BentoSearch::ResultItem, item
85
+ end
86
+
87
+ test_with_cassette("live get(identifier) raises on no results", :doaj_articles) do
88
+ assert_raises(BentoSearch::NotFound) { item = @engine.get( "no_such_id" ) }
89
+ end
90
+
91
+ test_with_cassette("multifield author-title", :doaj_articles) do
92
+ results = @engine.search(:query => {
93
+ :author => "Huxtable",
94
+ :title => '"Global Unions as the Missing Link in Labour Movement Studies"'
95
+ })
96
+
97
+ assert ! results.failed?
98
+
99
+ assert_present results
100
+ end
101
+
102
+ test_with_cassette("complex multi-field", :doaj_articles) do
103
+ results = @engine.search(:query => {
104
+ nil => "Anti-war",
105
+ :author => "Caffentzis",
106
+ :title => '"Respect Your Enemies" first rule of peace',
107
+ :source_title => '"Revista Theomai"'
108
+ })
109
+
110
+ assert ! results.failed?
111
+
112
+ assert_equal 1, results.total_items
113
+ assert_equal 1, results.count
114
+
115
+ result = results.first
116
+
117
+ assert_equal "Revista Theomai", result.source_title
118
+ assert_equal "Respect Your Enemies - The First Rule of Peace: An Essay Addressed to the U. S. Anti-war Movement", result.title
119
+ end
120
+
121
+ test "escapes spaces how DOAJ likes it" do
122
+ url = @engine.args_to_search_url(:query => "One Two")
123
+ parsed = URI.parse(url)
124
+ last_path = parsed.path.split('/').last
125
+
126
+ # %20 not + for space.
127
+ # %2B for "+""
128
+ assert_equal "%2BOne%20%2BTwo", last_path
129
+ end
130
+
131
+ test "escapes special chars" do
132
+ url = @engine.args_to_search_url(:query => "Me: And/Or You")
133
+
134
+ parsed = URI.parse(url)
135
+
136
+ last_path = parsed.path.split('/').last
137
+ last_path = CGI.unescape(last_path)
138
+
139
+ assert_equal "+Me\\: +And\\\/Or +You", last_path
140
+ end
141
+
142
+ test "generates fielded searches" do
143
+ url = @engine.args_to_search_url(:query => "Smith", :search_field => "bibjson.author.name")
144
+
145
+ parsed = URI.parse(url)
146
+
147
+ last_path = parsed.path.split('/').last
148
+ last_path = CGI.unescape(last_path)
149
+
150
+ assert_equal "+bibjson.author.name:(+Smith)", last_path
151
+ end
152
+
153
+ test "generates multi-field search" do
154
+ url = @engine.args_to_search_url(:query => {
155
+ nil => "Anti-war",
156
+ :author => "Caffentzis",
157
+ :title => '"Respect Your Enemies" first rule of peace'
158
+ })
159
+
160
+ parsed = URI.parse(url)
161
+
162
+ last_path = parsed.path.split('/').last
163
+ last_path = CGI.unescape(last_path)
164
+
165
+ assert_equal '+Anti\-war +author:(+Caffentzis) +title:(+"Respect Your Enemies" +first +rule +of +peace)', last_path
166
+ end
167
+
168
+ test "does not escape double quotes" do
169
+ # we want to allow them for phrase searching
170
+ url = @engine.args_to_search_url(:query => '"This is a phrase"')
171
+
172
+ parsed = URI.parse(url)
173
+
174
+ last_path = parsed.path.split('/').last
175
+ last_path = CGI.unescape(last_path)
176
+
177
+ assert_equal '+"This is a phrase"', last_path
178
+ end
179
+
180
+ test "multi-token fielded search" do
181
+ url = @engine.args_to_search_url(:query => 'apple orange "strawberry banana"', :search_field => "bibjson.title")
182
+
183
+ parsed = URI.parse(url)
184
+
185
+ last_path = parsed.path.split('/').last
186
+ last_path = CGI.unescape(last_path)
187
+
188
+ assert_equal '+bibjson.title:(+apple +orange +"strawberry banana")', last_path
189
+ end
190
+
191
+ test "adds sort to query url" do
192
+ url = @engine.args_to_search_url(:query => "cancer", :sort => 'date_desc')
193
+
194
+ parsed = URI.parse(url)
195
+ query = CGI.parse(parsed.query)
196
+
197
+ assert_equal ["article.created_date:desc"], query["sort"]
198
+ end
199
+
200
+ end