bento_search 1.4.4 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +41 -19
- data/app/models/bento_search/result_item.rb +1 -1
- data/app/models/bento_search/search_engine.rb +36 -3
- data/app/models/bento_search/search_engine/capabilities.rb +14 -0
- data/app/search_engines/bento_search/doaj_articles_engine.rb +279 -0
- data/app/search_engines/bento_search/ebsco_host_engine.rb +27 -7
- data/app/search_engines/bento_search/google_books_engine.rb +8 -1
- data/app/search_engines/bento_search/mock_engine.rb +8 -2
- data/app/search_engines/bento_search/scopus_engine.rb +27 -8
- data/app/search_engines/bento_search/summon_engine.rb +1 -1
- data/app/search_engines/bento_search/worldcat_sru_dc_engine.rb +22 -3
- data/config/locales/en.yml +5 -2
- data/lib/bento_search/version.rb +1 -1
- data/test/dummy/config/environments/development.rb +0 -4
- data/test/dummy/config/environments/production.rb +0 -4
- data/test/search_engines/doaj_articles_engine_test.rb +200 -0
- data/test/search_engines/ebsco_host_engine_test.rb +38 -0
- data/test/search_engines/google_books_engine_test.rb +18 -2
- data/test/search_engines/scopus_engine_test.rb +45 -1
- data/test/search_engines/search_engine_base_test.rb +59 -0
- data/test/search_engines/worldcat_sru_dc_engine_test.rb +17 -0
- data/test/vcr_cassettes/doaj_articles/basic_search.yml +97 -0
- data/test/vcr_cassettes/doaj_articles/catches_errors.yml +42 -0
- data/test/vcr_cassettes/doaj_articles/complex_multi-field.yml +67 -0
- data/test/vcr_cassettes/doaj_articles/live__get_identifier__round_trip.yml +387 -0
- data/test/vcr_cassettes/doaj_articles/live_get_identifier__raises_on_no_results.yml +41 -0
- data/test/vcr_cassettes/doaj_articles/multifield_author-title.yml +79 -0
- data/test/vcr_cassettes/doaj_articles/pagination.yml +691 -0
- data/test/vcr_cassettes/ebscohost/affiliation_search.yml +929 -0
- data/test/vcr_cassettes/ebscohost/multi-field_author_title.yml +122 -0
- data/test/vcr_cassettes/ebscohost/multi-field_citation_numbers.yml +122 -0
- data/test/vcr_cassettes/scopus/multi-field_search.yml +55 -0
- data/test/vcr_cassettes/scopus/multi-fielded_citation_details_search.yml +86 -0
- data/test/vcr_cassettes/worldcat_sru_dc/multi_field_search.yml +1839 -0
- metadata +31 -2
@@ -358,12 +358,11 @@ class BentoSearch::EbscoHostEngine
|
|
358
358
|
url =
|
359
359
|
"#{configuration.base_url}/Search?prof=#{configuration.profile_id}&pwd=#{configuration.profile_password}"
|
360
360
|
|
361
|
-
query =
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
query = "(#{args[:search_field]} #{query})"
|
361
|
+
query = if args[:query].kind_of?(Hash)
|
362
|
+
# multi-field query
|
363
|
+
args[:query].collect {|field, query| fielded_query(query, field)}.join(" AND ")
|
364
|
+
else
|
365
|
+
fielded_query(args[:query], args[:search_field])
|
367
366
|
end
|
368
367
|
|
369
368
|
# peer-reviewed only?
|
@@ -401,6 +400,17 @@ class BentoSearch::EbscoHostEngine
|
|
401
400
|
return url
|
402
401
|
end
|
403
402
|
|
403
|
+
def fielded_query(query, field = nil)
|
404
|
+
output = ebsco_query_prepare(query)
|
405
|
+
|
406
|
+
# wrap in (FI $query) if fielded search
|
407
|
+
if field.present?
|
408
|
+
output= "(#{field} #{query})"
|
409
|
+
end
|
410
|
+
|
411
|
+
return output
|
412
|
+
end
|
413
|
+
|
404
414
|
# pass in a nokogiri representing an EBSCO <rec> result,
|
405
415
|
# we'll turn it into a BentoSearch::ResultItem.
|
406
416
|
def item_from_xml(xml_rec)
|
@@ -547,10 +557,20 @@ class BentoSearch::EbscoHostEngine
|
|
547
557
|
"TI" => {:semantic => :title},
|
548
558
|
"SU" => {:semantic => :subject},
|
549
559
|
"IS" => {:semantic => :issn},
|
550
|
-
"IB" => {:semantic => :isbn}
|
560
|
+
"IB" => {:semantic => :isbn},
|
561
|
+
"SO" => {:semantic => :source_title},
|
562
|
+
# These may not be defined in all databases....
|
563
|
+
"VI" => {:semantic => :volume},
|
564
|
+
"IP" => {:semantic => :issue},
|
565
|
+
"SP" => {:semantic => :start_page},
|
566
|
+
"AF" => {:semantic => :author_affiliation}
|
551
567
|
}
|
552
568
|
end
|
553
569
|
|
570
|
+
def multi_field_search?
|
571
|
+
true
|
572
|
+
end
|
573
|
+
|
554
574
|
def max_per_page
|
555
575
|
# Actually only '50' if you ask for 'full' records, but I don't think
|
556
576
|
# we need to do that ever, that's actually getting fulltext back!
|
@@ -205,6 +205,10 @@ module BentoSearch
|
|
205
205
|
}
|
206
206
|
end
|
207
207
|
|
208
|
+
def multi_field_search?
|
209
|
+
true
|
210
|
+
end
|
211
|
+
|
208
212
|
protected
|
209
213
|
|
210
214
|
|
@@ -217,7 +221,10 @@ module BentoSearch
|
|
217
221
|
# turns it into a URL for Google API. Factored out to make testing
|
218
222
|
# possible.
|
219
223
|
def args_to_search_url(arguments)
|
220
|
-
query = if arguments[:
|
224
|
+
query = if arguments[:query].kind_of? Hash
|
225
|
+
#multi-field
|
226
|
+
arguments[:query].collect {|field, query| fielded_query(query, field)}.join(" ")
|
227
|
+
elsif arguments[:search_field]
|
221
228
|
fielded_query(arguments[:query], arguments[:search_field])
|
222
229
|
else
|
223
230
|
arguments[:query]
|
@@ -9,6 +9,7 @@
|
|
9
9
|
# specified per_page, or 10)
|
10
10
|
# [:total_items] total_items to report
|
11
11
|
# [:sort_definitions] hash for #sort_definitions
|
12
|
+
# [:search_field_definitions] hash for #search_field_definitions
|
12
13
|
# [:link] link to give to each item in results
|
13
14
|
# [:error] set to an error value hash and results returned
|
14
15
|
# will all be failed? with that error hash.
|
@@ -16,6 +17,7 @@
|
|
16
17
|
# to be caught by BentoSearch::SearchEngine wrapper possibly.
|
17
18
|
# [:timing] in seconds, fill out the results as if they took
|
18
19
|
# this long.
|
20
|
+
# [:supports_multi_search] true or false
|
19
21
|
class BentoSearch::MockEngine
|
20
22
|
include BentoSearch::SearchEngine
|
21
23
|
|
@@ -58,11 +60,15 @@ class BentoSearch::MockEngine
|
|
58
60
|
end
|
59
61
|
|
60
62
|
def sort_definitions
|
61
|
-
configuration.sort_definitions || {}
|
63
|
+
configuration.sort_definitions.try(:to_hash).try(:stringify_keys) || {}
|
62
64
|
end
|
63
65
|
|
64
66
|
def search_field_definitions
|
65
|
-
configuration.search_field_definitions || {}
|
67
|
+
configuration.search_field_definitions.try(:to_hash).try(:stringify_keys) || {}
|
68
|
+
end
|
69
|
+
|
70
|
+
def multi_field_search?
|
71
|
+
configuration.multi_field_search || false
|
66
72
|
end
|
67
73
|
|
68
74
|
end
|
@@ -27,11 +27,14 @@ module BentoSearch
|
|
27
27
|
# apparently by emailing directly to dave.santucci at elsevier dot com.
|
28
28
|
#
|
29
29
|
# Scopus API Docs:
|
30
|
-
# * http://
|
31
|
-
# * http://
|
30
|
+
# * http://api.elsevier.com/documentation/SCOPUSSearchAPI.wadl
|
31
|
+
# * http://api.elsevier.com/documentation/search/SCOPUSSearchViews.htm
|
32
|
+
#
|
33
|
+
# Query syntax and search fields:
|
34
|
+
# * http://api.elsevier.com/documentation/search/SCOPUSSearchTips.htm
|
32
35
|
#
|
33
36
|
# Some more docs on response elements and query elements:
|
34
|
-
# * http://api.elsevier.com/content/search/#d0n14606
|
37
|
+
# * http://api.elsevier.com/content/search/#d0n14606
|
35
38
|
#
|
36
39
|
# Other API's in the suite not being used by this code at present:
|
37
40
|
# * http://www.developers.elsevier.com/devcms/content-api-retrieval-request
|
@@ -204,7 +207,14 @@ module BentoSearch
|
|
204
207
|
# controlled and author-assigned keywords
|
205
208
|
"KEY" => {:semantic => :subject},
|
206
209
|
"ISBN" => {:semantic => :isbn},
|
207
|
-
"ISSN" => {:semantic => :issn},
|
210
|
+
"ISSN" => {:semantic => :issn},
|
211
|
+
"VOLUME" => {:semantic => :volume},
|
212
|
+
"ISSUE" => {:semantic => :issue},
|
213
|
+
"PAGEFIRST" => {:semantic => :start_page},
|
214
|
+
# Should we use SRCTITLE instead? I think exact match might be better?
|
215
|
+
"EXACTSRCTITLE" => {:semantic => :source_title},
|
216
|
+
"DOI" => {:semantic => :doi},
|
217
|
+
"PUBYEAR" => {:semantic => :year}
|
208
218
|
}
|
209
219
|
end
|
210
220
|
|
@@ -222,6 +232,9 @@ module BentoSearch
|
|
222
232
|
}
|
223
233
|
end
|
224
234
|
|
235
|
+
def multi_field_search?
|
236
|
+
true
|
237
|
+
end
|
225
238
|
|
226
239
|
protected
|
227
240
|
|
@@ -282,10 +295,12 @@ module BentoSearch
|
|
282
295
|
|
283
296
|
|
284
297
|
def scopus_url(args)
|
285
|
-
query =
|
286
|
-
|
287
|
-
|
288
|
-
query
|
298
|
+
query = if args[:query].kind_of? Hash
|
299
|
+
args[:query].collect {|field, query| fielded_query(query,field)}.join(" AND ")
|
300
|
+
elsif args[:search_field]
|
301
|
+
fielded_query(args[:query], args[:search_field])
|
302
|
+
else
|
303
|
+
escape_query args[:query]
|
289
304
|
end
|
290
305
|
|
291
306
|
query = "#{configuration.base_url.chomp("/")}/content/search/index:#{configuration.cluster}?query=#{CGI.escape(query)}"
|
@@ -304,6 +319,10 @@ module BentoSearch
|
|
304
319
|
|
305
320
|
return query
|
306
321
|
end
|
322
|
+
|
323
|
+
def fielded_query(query, field)
|
324
|
+
"#{field}(#{escape_query query})"
|
325
|
+
end
|
307
326
|
|
308
327
|
end
|
309
328
|
end
|
@@ -471,7 +471,7 @@ class BentoSearch::SummonEngine
|
|
471
471
|
"ISBN" => {:semantic => :isbn},
|
472
472
|
"ISSN" => {:semantic => :issn},
|
473
473
|
"OCLC" => {:semantic => :oclcnum},
|
474
|
-
"PublicationSeriesTitle" => {:semantic => :
|
474
|
+
"PublicationSeriesTitle" => {:semantic => :source_title }
|
475
475
|
}
|
476
476
|
end
|
477
477
|
|
@@ -263,12 +263,25 @@ class BentoSearch::WorldcatSruDcEngine
|
|
263
263
|
#
|
264
264
|
# returns CQL that is NOT uri escaped yet.
|
265
265
|
def construct_cql_query(args)
|
266
|
+
if args[:query].kind_of?(Hash)
|
267
|
+
# multi-field
|
268
|
+
args[:query].collect {|field, query| fielded_cql_query(query, field)}.join(" AND ")
|
269
|
+
else
|
270
|
+
fielded_cql_query(args[:query], args[:search_field] || "srw.kw")
|
271
|
+
end
|
272
|
+
end
|
273
|
+
|
274
|
+
# construct valid CQL for the API's "query" param, from search
|
275
|
+
# args. Tricky because we need to split terms/phrases ourselves
|
276
|
+
#
|
277
|
+
# returns CQL that is NOT uri escaped yet.
|
278
|
+
def fielded_cql_query(query, field = nil)
|
266
279
|
# default is srw.kw, Keyword anywhere.
|
267
|
-
field
|
280
|
+
field ||= "srw.kw"
|
268
281
|
|
269
282
|
# We need to split terms and phrases, so we can formulate
|
270
283
|
# CQL with seperate clauses for each, bah.
|
271
|
-
tokens =
|
284
|
+
tokens = query.split(%r{\s|("[^"]+")}).delete_if {|a| a.blank?}
|
272
285
|
|
273
286
|
|
274
287
|
|
@@ -311,7 +324,9 @@ class BentoSearch::WorldcatSruDcEngine
|
|
311
324
|
"srw.au" => {:semantic => :author},
|
312
325
|
"srw.su" => {:semantic => :subject},
|
313
326
|
"srw.bn" => {:semantic => :isbn},
|
314
|
-
|
327
|
+
"srw.in" => {:semantic => :issn},
|
328
|
+
"srw.dn" => {:semantic => :lccn},
|
329
|
+
# generic 'number', probably not useful
|
315
330
|
"srw.sn" => {:semantic => :number},
|
316
331
|
"srw.no" => {:semantic => :oclcnum}
|
317
332
|
}
|
@@ -332,5 +347,9 @@ class BentoSearch::WorldcatSruDcEngine
|
|
332
347
|
:auth => false
|
333
348
|
}
|
334
349
|
end
|
350
|
+
|
351
|
+
def multi_field_search?
|
352
|
+
true
|
353
|
+
end
|
335
354
|
|
336
355
|
end
|
data/config/locales/en.yml
CHANGED
data/lib/bento_search/version.rb
CHANGED
@@ -22,10 +22,6 @@ Dummy::Application.configure do
|
|
22
22
|
# Only use best-standards-support built into browsers
|
23
23
|
config.action_dispatch.best_standards_support = :builtin
|
24
24
|
|
25
|
-
# Log the query plan for queries taking more than this (works
|
26
|
-
# with SQLite, MySQL, and PostgreSQL)
|
27
|
-
config.active_record.auto_explain_threshold_in_seconds = 0.5
|
28
|
-
|
29
25
|
# Do not compress assets
|
30
26
|
config.assets.compress = false
|
31
27
|
|
@@ -60,8 +60,4 @@ Dummy::Application.configure do
|
|
60
60
|
|
61
61
|
# Send deprecation notices to registered listeners
|
62
62
|
config.active_support.deprecation = :notify
|
63
|
-
|
64
|
-
# Log the query plan for queries taking more than this (works
|
65
|
-
# with SQLite, MySQL, and PostgreSQL)
|
66
|
-
# config.active_record.auto_explain_threshold_in_seconds = 0.5
|
67
63
|
end
|
@@ -0,0 +1,200 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
require 'uri'
|
3
|
+
require 'cgi'
|
4
|
+
|
5
|
+
class DoajArticlesEngineTest < ActiveSupport::TestCase
|
6
|
+
extend TestWithCassette
|
7
|
+
|
8
|
+
def setup
|
9
|
+
@engine = BentoSearch::DoajArticlesEngine.new
|
10
|
+
# tell it not to send our bad API key
|
11
|
+
end
|
12
|
+
|
13
|
+
test_with_cassette("basic search", :doaj_articles) do
|
14
|
+
results = @engine.search("Breast cancer patients with lobular cancer more commonly have a father than a mother diagnosed with cancer")
|
15
|
+
|
16
|
+
assert_kind_of BentoSearch::Results, results
|
17
|
+
assert ! results.failed?
|
18
|
+
|
19
|
+
assert_not_nil results.total_items
|
20
|
+
assert_equal 0, results.start
|
21
|
+
assert_equal 10, results.per_page
|
22
|
+
|
23
|
+
assert_not_empty results
|
24
|
+
|
25
|
+
first = results.first
|
26
|
+
|
27
|
+
assert_present first.unique_id
|
28
|
+
assert_equal "Article", first.format
|
29
|
+
|
30
|
+
assert_present first.title
|
31
|
+
|
32
|
+
assert_not_empty first.authors
|
33
|
+
assert_not_empty first.authors.first.display
|
34
|
+
|
35
|
+
|
36
|
+
assert_present first.source_title
|
37
|
+
assert_present first.issn
|
38
|
+
assert_present first.volume
|
39
|
+
assert_present first.issue
|
40
|
+
|
41
|
+
assert_present first.start_page
|
42
|
+
|
43
|
+
assert_present first.year
|
44
|
+
assert_present first.publication_date
|
45
|
+
|
46
|
+
assert_present first.abstract
|
47
|
+
assert first.abstract.html_safe?
|
48
|
+
|
49
|
+
assert_present first.link
|
50
|
+
assert first.link_is_fulltext?
|
51
|
+
end
|
52
|
+
|
53
|
+
test_with_cassette("pagination", :doaj_articles) do
|
54
|
+
results = @engine.search("cancer", :per_page => 20, :page => 3)
|
55
|
+
|
56
|
+
assert ! results.failed?
|
57
|
+
|
58
|
+
assert_equal 20, results.length
|
59
|
+
|
60
|
+
assert_equal 20, results.size
|
61
|
+
assert_equal 40, results.start
|
62
|
+
assert_equal 20, results.per_page
|
63
|
+
end
|
64
|
+
|
65
|
+
test_with_cassette("catches errors", :doaj_articles) do
|
66
|
+
@engine.base_url = "https://doaj.org/api/v1/search/articles_bad_url/"
|
67
|
+
|
68
|
+
results = @engine.search("something")
|
69
|
+
|
70
|
+
assert results.failed?
|
71
|
+
assert_kind_of Hash, results.error
|
72
|
+
assert_present results.error[:message]
|
73
|
+
assert_present results.error[:status]
|
74
|
+
end
|
75
|
+
|
76
|
+
test_with_cassette("live #get(identifier) round trip", :doaj_articles) do
|
77
|
+
results = @engine.search("cancer")
|
78
|
+
|
79
|
+
assert (! results.failed?)
|
80
|
+
|
81
|
+
item = @engine.get( results.first.unique_id )
|
82
|
+
|
83
|
+
assert_not_nil item
|
84
|
+
assert_kind_of BentoSearch::ResultItem, item
|
85
|
+
end
|
86
|
+
|
87
|
+
test_with_cassette("live get(identifier) raises on no results", :doaj_articles) do
|
88
|
+
assert_raises(BentoSearch::NotFound) { item = @engine.get( "no_such_id" ) }
|
89
|
+
end
|
90
|
+
|
91
|
+
test_with_cassette("multifield author-title", :doaj_articles) do
|
92
|
+
results = @engine.search(:query => {
|
93
|
+
:author => "Huxtable",
|
94
|
+
:title => '"Global Unions as the Missing Link in Labour Movement Studies"'
|
95
|
+
})
|
96
|
+
|
97
|
+
assert ! results.failed?
|
98
|
+
|
99
|
+
assert_present results
|
100
|
+
end
|
101
|
+
|
102
|
+
test_with_cassette("complex multi-field", :doaj_articles) do
|
103
|
+
results = @engine.search(:query => {
|
104
|
+
nil => "Anti-war",
|
105
|
+
:author => "Caffentzis",
|
106
|
+
:title => '"Respect Your Enemies" first rule of peace',
|
107
|
+
:source_title => '"Revista Theomai"'
|
108
|
+
})
|
109
|
+
|
110
|
+
assert ! results.failed?
|
111
|
+
|
112
|
+
assert_equal 1, results.total_items
|
113
|
+
assert_equal 1, results.count
|
114
|
+
|
115
|
+
result = results.first
|
116
|
+
|
117
|
+
assert_equal "Revista Theomai", result.source_title
|
118
|
+
assert_equal "Respect Your Enemies - The First Rule of Peace: An Essay Addressed to the U. S. Anti-war Movement", result.title
|
119
|
+
end
|
120
|
+
|
121
|
+
test "escapes spaces how DOAJ likes it" do
|
122
|
+
url = @engine.args_to_search_url(:query => "One Two")
|
123
|
+
parsed = URI.parse(url)
|
124
|
+
last_path = parsed.path.split('/').last
|
125
|
+
|
126
|
+
# %20 not + for space.
|
127
|
+
# %2B for "+""
|
128
|
+
assert_equal "%2BOne%20%2BTwo", last_path
|
129
|
+
end
|
130
|
+
|
131
|
+
test "escapes special chars" do
|
132
|
+
url = @engine.args_to_search_url(:query => "Me: And/Or You")
|
133
|
+
|
134
|
+
parsed = URI.parse(url)
|
135
|
+
|
136
|
+
last_path = parsed.path.split('/').last
|
137
|
+
last_path = CGI.unescape(last_path)
|
138
|
+
|
139
|
+
assert_equal "+Me\\: +And\\\/Or +You", last_path
|
140
|
+
end
|
141
|
+
|
142
|
+
test "generates fielded searches" do
|
143
|
+
url = @engine.args_to_search_url(:query => "Smith", :search_field => "bibjson.author.name")
|
144
|
+
|
145
|
+
parsed = URI.parse(url)
|
146
|
+
|
147
|
+
last_path = parsed.path.split('/').last
|
148
|
+
last_path = CGI.unescape(last_path)
|
149
|
+
|
150
|
+
assert_equal "+bibjson.author.name:(+Smith)", last_path
|
151
|
+
end
|
152
|
+
|
153
|
+
test "generates multi-field search" do
|
154
|
+
url = @engine.args_to_search_url(:query => {
|
155
|
+
nil => "Anti-war",
|
156
|
+
:author => "Caffentzis",
|
157
|
+
:title => '"Respect Your Enemies" first rule of peace'
|
158
|
+
})
|
159
|
+
|
160
|
+
parsed = URI.parse(url)
|
161
|
+
|
162
|
+
last_path = parsed.path.split('/').last
|
163
|
+
last_path = CGI.unescape(last_path)
|
164
|
+
|
165
|
+
assert_equal '+Anti\-war +author:(+Caffentzis) +title:(+"Respect Your Enemies" +first +rule +of +peace)', last_path
|
166
|
+
end
|
167
|
+
|
168
|
+
test "does not escape double quotes" do
|
169
|
+
# we want to allow them for phrase searching
|
170
|
+
url = @engine.args_to_search_url(:query => '"This is a phrase"')
|
171
|
+
|
172
|
+
parsed = URI.parse(url)
|
173
|
+
|
174
|
+
last_path = parsed.path.split('/').last
|
175
|
+
last_path = CGI.unescape(last_path)
|
176
|
+
|
177
|
+
assert_equal '+"This is a phrase"', last_path
|
178
|
+
end
|
179
|
+
|
180
|
+
test "multi-token fielded search" do
|
181
|
+
url = @engine.args_to_search_url(:query => 'apple orange "strawberry banana"', :search_field => "bibjson.title")
|
182
|
+
|
183
|
+
parsed = URI.parse(url)
|
184
|
+
|
185
|
+
last_path = parsed.path.split('/').last
|
186
|
+
last_path = CGI.unescape(last_path)
|
187
|
+
|
188
|
+
assert_equal '+bibjson.title:(+apple +orange +"strawberry banana")', last_path
|
189
|
+
end
|
190
|
+
|
191
|
+
test "adds sort to query url" do
|
192
|
+
url = @engine.args_to_search_url(:query => "cancer", :sort => 'date_desc')
|
193
|
+
|
194
|
+
parsed = URI.parse(url)
|
195
|
+
query = CGI.parse(parsed.query)
|
196
|
+
|
197
|
+
assert_equal ["article.created_date:desc"], query["sort"]
|
198
|
+
end
|
199
|
+
|
200
|
+
end
|