bento_search 1.5.0 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +14 -16
- data/Rakefile +30 -11
- data/app/controllers/bento_search/search_controller.rb +29 -28
- data/app/models/bento_search/result_item.rb +10 -10
- data/app/models/bento_search/results/serialization.rb +22 -13
- data/app/models/bento_search/search_engine.rb +117 -117
- data/app/search_engines/bento_search/doaj_articles_engine.rb +19 -19
- data/app/search_engines/bento_search/ebsco_host_engine.rb +3 -3
- data/app/search_engines/bento_search/eds_engine.rb +166 -166
- data/app/search_engines/bento_search/google_books_engine.rb +2 -2
- data/app/search_engines/bento_search/scopus_engine.rb +87 -87
- data/app/search_engines/bento_search/summon_engine.rb +1 -1
- data/lib/bento_search.rb +12 -9
- data/lib/bento_search/version.rb +1 -1
- data/test/dummy/config/boot.rb +4 -9
- data/test/dummy/db/schema.rb +15 -0
- data/test/functional/bento_search/search_controller_test.rb +63 -57
- data/test/helper/bento_search_helper_test.rb +103 -103
- data/test/search_engines/doaj_articles_engine_test.rb +9 -9
- data/test/search_engines/search_engine_base_test.rb +86 -86
- data/test/search_engines/search_engine_test.rb +56 -56
- data/test/test_helper.rb +23 -12
- data/test/unit/multi_searcher_test.rb +18 -18
- data/test/unit/pagination_test.rb +12 -12
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6f070dacd096eb963619edf893eeae9d44303d28
|
4
|
+
data.tar.gz: 41b90c919f85df3a67f3ed15ca8764d57484d952
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 19a68228f6e2224f23308953e96354a7df221570acb214e8134c8eab511239412cc7bf5947aeba050c4a9fa9817383d957d00a72df3ca920fdc9d7244336f603
|
7
|
+
data.tar.gz: fc1f57fc1718bee5ca509facdd3299194c7e277ded15930c4098e3f9c4800f80c3ee363a413c27db5c8e488a3902e9f6ab04c34960bf654acab436cbbfeac7ca
|
data/README.md
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
|
6
6
|
bento_search provides an abstraction/normalization layer for querying and
|
7
7
|
displaying results from external search engines, in Ruby on Rails. Works with
|
8
|
-
Rails 3.x
|
8
|
+
Rails 3.x, 4.x, or 5.0. ruby 1.9.3+
|
9
9
|
|
10
10
|
### Goals: To help you
|
11
11
|
|
@@ -35,6 +35,7 @@ Adapters currently included in bento_search
|
|
35
35
|
* [WorldCat Search](https://www.worldcat.org/) (requires OCLC membership to get api key)
|
36
36
|
* [Google Site Search](https://www.google.com/work/search/products/gss.html) (requires sign-up for more than 100 searches/day)
|
37
37
|
* [JournalTOCs](http://www.journaltocs.hw.ac.uk/) (limited support for fetching current articles by ISSN, free but requires registration)
|
38
|
+
* [Directory of Open Access Journals (DOAJ)](https://doaj.org/) article search. (free, no registration required)
|
38
39
|
|
39
40
|
|
40
41
|
|
@@ -46,12 +47,11 @@ search' functionality, but it does not and will never support merging results
|
|
46
47
|
from multiple engines into one result set. It is meant to support displaying the
|
47
48
|
first few results from multiple engines on one page, "bento box" style (as
|
48
49
|
named by Tito Sierra@NCSU), as well as more expanded single-search-on-a-page
|
49
|
-
uses.
|
50
|
+
uses -- or back-end functionality supporting features that are not straight discovery.
|
50
51
|
|
51
52
|
* bento_search provides abstract functionality for pagination, sorting,
|
52
|
-
and single-field-specified queries. Faceting
|
53
|
-
|
54
|
-
out in the future.
|
53
|
+
and single-field-specified queries. Faceting and generalized limiting are
|
54
|
+
not yet supported, but possibly will be built out in the future.
|
55
55
|
|
56
56
|
Not all search engine adapters support all features. Some engines offer
|
57
57
|
engine-specific features, such as limiting. Search engine adapters can
|
@@ -86,7 +86,7 @@ may be required for certain engines.
|
|
86
86
|
`results` are a [BentoSearch::Results](./app/models/bento_search/results.rb) object, which acts like an array of
|
87
87
|
[BentoSearch::ResultItem](./app/models/bento_search/result_item.rb) objects, along with some meta-information about the
|
88
88
|
search itself (pagination keys, etc). BentoSearch::Results and Item fields
|
89
|
-
are standardized
|
89
|
+
are standardized across engines. BentoSearch::Items provide semantic
|
90
90
|
values (title, author, etc.), as available from the particular engine.
|
91
91
|
|
92
92
|
To see which engines come bundled with BentoSearch, and any special
|
@@ -238,7 +238,7 @@ declared for the engine, that will be preferred.
|
|
238
238
|
This can be used to expose a multi-field search to users, and the `bento_field_hash_for`
|
239
239
|
helper method might be helpful in creating your UI. But this is also useful for looking
|
240
240
|
up known-item citations -- either by author/title, or issn/volume/issue/page, or doi, or
|
241
|
-
anything else -- as back-end support for various possible functions.
|
241
|
+
anything else -- as back-end support for various possible functions.
|
242
242
|
|
243
243
|
### Concurrent searching
|
244
244
|
|
@@ -375,7 +375,7 @@ There are additional details that might matter to you, for more info see the
|
|
375
375
|
### Round-Trip Serialization to JSON
|
376
376
|
|
377
377
|
You can serialize BentoSearch::Results to a simple straightforward JSON structure, and de-serialize
|
378
|
-
them back into BentoSearch::Results.
|
378
|
+
them back into BentoSearch::Results.
|
379
379
|
|
380
380
|
~~~ruby
|
381
381
|
json_str = results.dump_to_json
|
@@ -383,19 +383,19 @@ copy_of_results = BentoSearch::Results.load_json(json_str)
|
|
383
383
|
~~~
|
384
384
|
|
385
385
|
Search context (query, start, per_page) are not serialized, and will be lost
|
386
|
-
on de-serialization.
|
386
|
+
on de-serialization.
|
387
387
|
|
388
388
|
Unlike the Atom serialization, **the JSON serialization is of internal data
|
389
|
-
state, without decoration.** Configuration context is not serialized.
|
389
|
+
state, without decoration.** Configuration context is not serialized.
|
390
390
|
|
391
|
-
However, the engine_id is included in serialization if present,
|
391
|
+
However, the engine_id is included in serialization if present,
|
392
392
|
and configuration from the specified engine
|
393
393
|
will be re-assigned on de-serialization. This means if the configuration
|
394
394
|
changed between serialization and de-serialization, you get the new stuff
|
395
|
-
assigned on de-serialization.
|
395
|
+
assigned on de-serialization.
|
396
396
|
|
397
397
|
The use case guiding JSON serialization is storage somewhere, and
|
398
|
-
round-trip de-serialization in the current app context.
|
398
|
+
round-trip de-serialization in the current app context.
|
399
399
|
|
400
400
|
If you want to take de-serialized results that did not have an engine_id,
|
401
401
|
or set configuration on them to a different engine (registered or not) you can:
|
@@ -410,7 +410,7 @@ or set configuration on them to a different engine (registered or not) you can:
|
|
410
410
|
|
411
411
|
If you want a serialization to be consumed by something other than an
|
412
412
|
app using the bento_search gem, as an API, we recommend the [Atom serialization](https://github.com/jrochkind/bento_search/wiki/Machine-Readable-Serialization-With-Atom)
|
413
|
-
instead.
|
413
|
+
instead.
|
414
414
|
|
415
415
|
## Planned Features
|
416
416
|
|
@@ -423,8 +423,6 @@ Probably:
|
|
423
423
|
* Support for display facets for engines that support such, as well as
|
424
424
|
search with limits from controlled vocabulary (ie, selected facet, but
|
425
425
|
also may be supported by some engines that do not support facetting).
|
426
|
-
* Support for multi-field, multi-entry-box 'advanced search' UI's, in
|
427
|
-
a normalized cross-engine way.
|
428
426
|
|
429
427
|
Other needs or suggestions?
|
430
428
|
|
data/Rakefile
CHANGED
@@ -23,17 +23,36 @@ end
|
|
23
23
|
APP_RAKEFILE = File.expand_path("../test/dummy/Rakefile", __FILE__)
|
24
24
|
load 'rails/tasks/engine.rake'
|
25
25
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
26
|
+
load 'rails/tasks/statistics.rake'
|
27
|
+
|
28
|
+
require 'bundler/gem_tasks'
|
29
|
+
|
30
|
+
|
31
|
+
if Gem::Version.new(Rails.version) > Gem::Version.new('4.2.99999')
|
32
|
+
desc "Run tests"
|
33
|
+
task :test do
|
34
|
+
Rake::Task["app:test"].invoke
|
35
|
+
end
|
36
|
+
# use built-in Rails test command
|
37
|
+
# task :test do
|
38
|
+
# require "rails/test_unit/minitest_plugin"
|
39
|
+
# #$: << File.expand_path('test', ENGINE_ROOT)
|
40
|
+
# Minitest.rake_run([])
|
41
|
+
|
42
|
+
# # require 'rails/engine/commands_tasks'
|
43
|
+
# # Rails::Engine::CommandsTasks.new("").run_command!('test')
|
44
|
+
# end
|
45
|
+
else
|
46
|
+
# old rails4 style
|
47
|
+
require 'rake/testtask'
|
48
|
+
|
49
|
+
Rake::TestTask.new(:test) do |t|
|
50
|
+
t.libs << 'lib'
|
51
|
+
t.libs << 'test'
|
52
|
+
t.pattern = 'test/**/*_test.rb'
|
53
|
+
t.verbose = false
|
54
|
+
t.warning = false
|
55
|
+
end
|
37
56
|
end
|
38
57
|
|
39
58
|
|
@@ -2,23 +2,23 @@ module BentoSearch
|
|
2
2
|
# This is a controller that provides stand-alone search results
|
3
3
|
# for registered engines. Right now, this is only for automatic
|
4
4
|
# AJAX delayed loading. In the future it may be used for atom results,
|
5
|
-
# or other such.
|
5
|
+
# or other such.
|
6
6
|
#
|
7
7
|
# You need to make sure to include routing for this controller in your
|
8
8
|
# app to use it, for instance with `BentoSearch::Routes.new(self).draw`
|
9
9
|
# in your ./config/routes.rb
|
10
10
|
#
|
11
11
|
# # Authorization Issues
|
12
|
-
#
|
12
|
+
#
|
13
13
|
# You may have some engines which should not be publically searchable,
|
14
14
|
# they should only be searchable by certain auth'd users. This controller
|
15
15
|
# could accidentally provide a non-protected endpoint to get results if
|
16
|
-
# nothing were done to prevent it.
|
16
|
+
# nothing were done to prevent it.
|
17
17
|
#
|
18
18
|
# Only engines which have a :allow_routable_results => true key
|
19
|
-
# in their config will be served by this controller.
|
19
|
+
# in their config will be served by this controller.
|
20
20
|
#
|
21
|
-
# If you need routable results on an engine which ALSO needs to
|
21
|
+
# If you need routable results on an engine which ALSO needs to
|
22
22
|
# be protected by auth, you can add your own Rails before_filter
|
23
23
|
# to provide auth. Say, in an initializer in your app:
|
24
24
|
#
|
@@ -27,26 +27,26 @@ module BentoSearch
|
|
27
27
|
# raise BentoSearch::SearchController::AccessDenied
|
28
28
|
# end
|
29
29
|
# end
|
30
|
-
#
|
31
|
-
#
|
30
|
+
#
|
31
|
+
#
|
32
32
|
# We may provide fancier/nicer API for this in the future, if there's
|
33
|
-
# demand.
|
33
|
+
# demand.
|
34
34
|
class SearchController < BentoSearchController
|
35
35
|
class AccessDenied < BentoSearch::Error ; end
|
36
36
|
|
37
|
-
|
37
|
+
|
38
38
|
rescue_from AccessDenied, :with => :deny_access
|
39
39
|
rescue_from NoSuchEngine, :with => :render_404
|
40
|
-
|
40
|
+
|
41
41
|
# returns partial HTML results, suitable for
|
42
|
-
# AJAX to insert into DOM.
|
42
|
+
# AJAX to insert into DOM.
|
43
43
|
# arguments for engine.search are taken from URI request params, whitelisted
|
44
|
-
def search
|
44
|
+
def search
|
45
45
|
engine = BentoSearch.get_engine(params[:engine_id])
|
46
|
-
# put it in an iVar mainly for testing purposes.
|
46
|
+
# put it in an iVar mainly for testing purposes.
|
47
47
|
@engine = engine
|
48
48
|
|
49
|
-
|
49
|
+
|
50
50
|
unless engine.configuration.allow_routable_results == true
|
51
51
|
raise AccessDenied.new("engine needs to be registered with :allow_routable_results => true")
|
52
52
|
end
|
@@ -54,28 +54,29 @@ module BentoSearch
|
|
54
54
|
@results = engine.search safe_search_args(engine, params)
|
55
55
|
# template name of a partial with 'yield' to use to wrap the results
|
56
56
|
@partial_wrapper = @results.display_configuration.lookup!("ajax.wrapper_template")
|
57
|
-
|
57
|
+
|
58
58
|
# partial HTML results
|
59
|
-
render "bento_search/search/search", :layout => false
|
59
|
+
render "bento_search/search/search", :layout => false
|
60
60
|
|
61
61
|
end
|
62
|
-
|
63
62
|
|
64
|
-
|
65
|
-
|
66
|
-
|
63
|
+
|
64
|
+
|
65
|
+
protected
|
66
|
+
|
67
67
|
def safe_search_args(engine, params)
|
68
|
-
params.
|
68
|
+
all_hash = params.respond_to?(:to_unsafe_hash) ? params.to_unsafe_hash : params.to_hash
|
69
|
+
all_hash.symbolize_keys.slice( *engine.public_settable_search_args )
|
69
70
|
end
|
70
|
-
|
71
|
+
|
71
72
|
def deny_access(exception)
|
72
|
-
render :
|
73
|
+
render :plain => exception.message, :status => 403
|
73
74
|
end
|
74
|
-
|
75
|
-
def render_404(exception)
|
76
|
-
render :
|
75
|
+
|
76
|
+
def render_404(exception)
|
77
|
+
render :plain => exception.message, :status => 404
|
77
78
|
end
|
78
|
-
|
79
|
-
|
79
|
+
|
80
|
+
|
80
81
|
end
|
81
82
|
end
|
@@ -36,12 +36,12 @@ module BentoSearch
|
|
36
36
|
# search service it came from. May be alphanumeric. May be nil
|
37
37
|
# for engines that don't support it.
|
38
38
|
serializable_attr_accessor :unique_id
|
39
|
-
|
39
|
+
|
40
40
|
|
41
41
|
# If set to true, item will refuse to generate an openurl,
|
42
42
|
# returning nil from #to_openurl or #openurl_kev
|
43
43
|
serializable_attr_accessor :openurl_disabled
|
44
|
-
|
44
|
+
|
45
45
|
|
46
46
|
# Array (possibly empty) of BentoSearch::Link objects
|
47
47
|
# representing additional links. Often SearchEngine's themselves
|
@@ -52,7 +52,7 @@ module BentoSearch
|
|
52
52
|
|
53
53
|
# * dc.title
|
54
54
|
# * schema.org CreativeWork: 'name'
|
55
|
-
serializable_attr_accessor :title
|
55
|
+
serializable_attr_accessor :title
|
56
56
|
# backwards compat, we used to have separate titles and subtitles
|
57
57
|
alias_method :complete_title, :title
|
58
58
|
|
@@ -112,7 +112,7 @@ module BentoSearch
|
|
112
112
|
#
|
113
113
|
# Note: We're re-thinking this, might allow uncontrolled
|
114
114
|
# in here instead.
|
115
|
-
serializable_attr_accessor :format
|
115
|
+
serializable_attr_accessor :format
|
116
116
|
|
117
117
|
# Translated from internal format vocab at #format. Outputs
|
118
118
|
# eg http://schema.org/Book
|
@@ -137,7 +137,7 @@ module BentoSearch
|
|
137
137
|
# uncontrolled presumably english-language format string.
|
138
138
|
# if supplied will be used in display in place of controlled
|
139
139
|
# format.
|
140
|
-
serializable_attr_accessor :format_str
|
140
|
+
serializable_attr_accessor :format_str
|
141
141
|
|
142
142
|
# Language of materials. Producer can set language_code to an ISO 639-1 (two
|
143
143
|
# letter) or 639-3 (three letter) language code. If you do this, you don't
|
@@ -153,10 +153,10 @@ module BentoSearch
|
|
153
153
|
# #language_iso_639_2 (either may be null), or #language_str for uncontrolled
|
154
154
|
# string. If engine just sets one of these, internals take care of filling
|
155
155
|
# out the others. r
|
156
|
-
serializable_attr_accessor :language_code
|
156
|
+
serializable_attr_accessor :language_code
|
157
157
|
attr_writer :language_str
|
158
158
|
def language_str
|
159
|
-
@language_str || language_code.try do |code|
|
159
|
+
(@language_str ||= nil) || language_code.try do |code|
|
160
160
|
LanguageList::LanguageInfo.find(code).try do |lang_obj|
|
161
161
|
lang_obj.name
|
162
162
|
end
|
@@ -230,12 +230,12 @@ module BentoSearch
|
|
230
230
|
|
231
231
|
# An ARRAY of string query-in-context snippets. Will usually
|
232
232
|
# have highlighting <b> tags in it. Creator is responsible
|
233
|
-
# for making sure it's otherwise html-safe.
|
233
|
+
# for making sure it's otherwise html-safe.
|
234
234
|
#
|
235
235
|
# Not all engines may stores Snippets array in addition to abstract,
|
236
236
|
# some may only store one or the other. Some may store both but
|
237
237
|
# with same content formatted differently (array of multiple vs
|
238
|
-
# one combined string), some engines they may be different.
|
238
|
+
# one combined string), some engines they may be different.
|
239
239
|
attr_accessor :snippets
|
240
240
|
serializable_attr :snippets
|
241
241
|
|
@@ -263,7 +263,7 @@ module BentoSearch
|
|
263
263
|
# for it? Nice thing about the configuration has instead is it's
|
264
264
|
# easily serializable, it's just data.
|
265
265
|
#
|
266
|
-
# Although we intentionally do NOT include these in JSON serialization, ha.
|
266
|
+
# Although we intentionally do NOT include these in JSON serialization, ha.
|
267
267
|
attr_accessor :display_configuration
|
268
268
|
attr_accessor :engine_id
|
269
269
|
|
@@ -4,21 +4,21 @@ require 'json'
|
|
4
4
|
require 'date'
|
5
5
|
|
6
6
|
# Call #dump_to_json on a BentoSearch value object (such as BentoSearch::Result or ::Author)
|
7
|
-
# to get it in Json
|
7
|
+
# to get it in Json
|
8
8
|
#
|
9
9
|
# Values marked with serializable_attr in BentoSearch::Result are
|
10
|
-
# included in seralization.
|
10
|
+
# included in seralization.
|
11
11
|
#
|
12
12
|
# At present metadata and configuration are NOT serialized: #decorator, #display_configuration,
|
13
13
|
# and #engine_id are not included in the serialization, so when loaded from serialization,
|
14
|
-
# ResultItems will not have such things set.
|
15
|
-
#
|
14
|
+
# ResultItems will not have such things set.
|
15
|
+
#
|
16
16
|
# * Works by getting and setting instance variables directly, ignores getters/setters
|
17
17
|
#
|
18
18
|
# * This means decorated values are NOT included in serialization, the raw
|
19
19
|
# values are what is serialized. This is intended, we serialize internal
|
20
20
|
# state, not decoration which can be recreated. You should make sure the decorators you
|
21
|
-
# want are applied after de-serialization.
|
21
|
+
# want are applied after de-serialization.
|
22
22
|
#
|
23
23
|
# * preserves html_safety status in serialization, by adding extra `_attr_htmlsafe: true` key/value
|
24
24
|
#
|
@@ -31,22 +31,23 @@ module BentoSearch::Results::Serialization
|
|
31
31
|
self._serializable_attr_options = {}
|
32
32
|
end
|
33
33
|
|
34
|
+
|
34
35
|
class_methods do
|
35
36
|
# Just a macro to mark a property name serializable -- the name is
|
36
37
|
# of an instance method that will be included in our serializations
|
37
|
-
# and de-serializations.
|
38
|
+
# and de-serializations.
|
38
39
|
#
|
39
40
|
# Options:
|
40
41
|
# * collection_of: String fully qualified name of a class that is
|
41
42
|
# is also BentoSearch::Results::Serialization, the attribute
|
42
|
-
# is an array of these.
|
43
|
+
# is an array of these.
|
43
44
|
# * serializer: String fully qualified class name of a serializer
|
44
45
|
# class that has a `dump` and a `load` for individual values,
|
45
46
|
# we just use it for Date now, see BentoSearch::Results::Serialization::Date
|
46
47
|
def serializable_attr(symbol, options = nil)
|
47
48
|
symbol = symbol.to_s
|
48
49
|
self._serializable_attrs << symbol
|
49
|
-
if options
|
50
|
+
if options
|
50
51
|
self._serializable_attr_options[symbol] = options
|
51
52
|
end
|
52
53
|
end
|
@@ -67,14 +68,14 @@ module BentoSearch::Results::Serialization
|
|
67
68
|
|
68
69
|
|
69
70
|
if _serializable_attr_options[key] && _serializable_attr_options[key][:collection_of]
|
70
|
-
klass =
|
71
|
+
klass = correct_const_get(_serializable_attr_options[key][:collection_of])
|
71
72
|
value = value.collect do |item|
|
72
73
|
klass.from_internal_state_hash(item)
|
73
74
|
end
|
74
75
|
end
|
75
76
|
|
76
77
|
if _serializable_attr_options[key] && _serializable_attr_options[key][:serializer]
|
77
|
-
klass =
|
78
|
+
klass = correct_const_get(_serializable_attr_options[key][:serializer])
|
78
79
|
value = klass.load(value)
|
79
80
|
end
|
80
81
|
|
@@ -92,18 +93,26 @@ module BentoSearch::Results::Serialization
|
|
92
93
|
self.from_internal_state_hash( JSON.parse! json_str )
|
93
94
|
end
|
94
95
|
|
96
|
+
def correct_const_get(str)
|
97
|
+
if Gem::Version.new(Rails.version) > Gem::Version.new('4.2.99')
|
98
|
+
const_get(str)
|
99
|
+
else
|
100
|
+
qualified_const_get(str)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
95
104
|
end
|
96
105
|
|
97
106
|
def internal_state_hash
|
98
107
|
hash = {}
|
99
108
|
self._serializable_attrs.each do |accessor|
|
100
109
|
accessor = accessor.to_s
|
101
|
-
value = self.instance_variable_get("@#{accessor}")
|
110
|
+
value = self.instance_variable_defined?("@#{accessor}") && self.instance_variable_get("@#{accessor}")
|
102
111
|
|
103
112
|
next if value.blank?
|
104
113
|
|
105
114
|
if _serializable_attr_options[accessor] && _serializable_attr_options[accessor][:serializer]
|
106
|
-
klass = self.class.
|
115
|
+
klass = self.class.correct_const_get(_serializable_attr_options[accessor][:serializer])
|
107
116
|
value = klass.dump(value)
|
108
117
|
elsif value.respond_to?(:to_ary)
|
109
118
|
value = value.to_ary.collect do |item|
|
@@ -133,4 +142,4 @@ module BentoSearch::Results::Serialization
|
|
133
142
|
end
|
134
143
|
end
|
135
144
|
|
136
|
-
end
|
145
|
+
end
|