ebsco-eds 0.3.14.pre → 0.3.15.pre
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/ebsco/eds/configuration.rb +3 -1
- data/lib/ebsco/eds/options.rb +2 -0
- data/lib/ebsco/eds/record.rb +100 -19
- data/lib/ebsco/eds/results.rb +4 -4
- data/lib/ebsco/eds/session.rb +43 -10
- data/lib/ebsco/eds/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c0ed32f227645931d31d5ee595ca1b8793f4be7f
|
4
|
+
data.tar.gz: b5085f8c0c463f7bf5b084104119baf15befff4a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 72e23e4ee620db0170f53541684aa44d83f33822a44ec1d896f258c32908eeab0accf8041a860438e631e5dc0fe7b5366a48c205d68e8b293236c37abf1c8fcb
|
7
|
+
data.tar.gz: 929364e6faa8bebaa6f30288806cca3dff1d06bb3319c6603fab2091cf2ca7f76b36b9c9d2481fdf4dc8ff5ad2a489f4f62b2edcff5a0e598b1c465499237a3f
|
@@ -38,7 +38,9 @@ module EBSCO
|
|
38
38
|
:open_timeout => 12,
|
39
39
|
:max_page_jumps => 6,
|
40
40
|
:max_page_jump_attempts => 10,
|
41
|
-
:recover_from_bad_source_type => false
|
41
|
+
:recover_from_bad_source_type => false,
|
42
|
+
:all_subjects_search_links => false,
|
43
|
+
:decode_sanitize_html => false
|
42
44
|
}
|
43
45
|
@valid_config_keys = @config.keys
|
44
46
|
end
|
data/lib/ebsco/eds/options.rb
CHANGED
data/lib/ebsco/eds/record.rb
CHANGED
@@ -121,7 +121,6 @@ module EBSCO
|
|
121
121
|
'Related ISBNs'
|
122
122
|
]
|
123
123
|
|
124
|
-
|
125
124
|
# Raw record as returned by the \EDS API via search or retrieve
|
126
125
|
attr_accessor(*ATTRIBUTES)
|
127
126
|
|
@@ -130,7 +129,25 @@ module EBSCO
|
|
130
129
|
end
|
131
130
|
|
132
131
|
# Creates a search or retrieval result record
|
133
|
-
def initialize(results_record)
|
132
|
+
def initialize(results_record, eds_config = nil)
|
133
|
+
|
134
|
+
# translate all subject search link field codes to DE?
|
135
|
+
@all_subjects_search_links = false
|
136
|
+
if eds_config
|
137
|
+
@all_subjects_search_links = eds_config[:all_subjects_search_links]
|
138
|
+
end
|
139
|
+
if ENV.has_key? 'EDS_ALL_SUBJECTS_SEARCH_LINKS'
|
140
|
+
@all_subjects_search_links = ENV['EDS_ALL_SUBJECTS_SEARCH_LINKS']
|
141
|
+
end
|
142
|
+
|
143
|
+
# decode and sanitize html in item data?
|
144
|
+
@decode_sanitize_html = false
|
145
|
+
if eds_config
|
146
|
+
@decode_sanitize_html = eds_config[:decode_sanitize_html]
|
147
|
+
end
|
148
|
+
if ENV.has_key? 'EDS_DECODE_SANITIZE_HTML'
|
149
|
+
@decode_sanitize_html = ENV['EDS_DECODE_SANITIZE_HTML']
|
150
|
+
end
|
134
151
|
|
135
152
|
if results_record.key? 'Record'
|
136
153
|
@record = results_record['Record'] # single record returned by retrieve api
|
@@ -169,6 +186,7 @@ module EBSCO
|
|
169
186
|
@eds_subjects =
|
170
187
|
get_item_data({name: 'Subject', label: 'Subject Terms', group: 'Su'}) ||
|
171
188
|
get_item_data({name: 'Subject', label: 'Subject Indexing', group: 'Su'}) ||
|
189
|
+
get_item_data({name: 'Subject', label: 'Subject Category', group: 'Su'}) ||
|
172
190
|
bib_subjects
|
173
191
|
@eds_subjects_geographic =
|
174
192
|
get_item_data({name: 'SubjectGeographic', label: 'Geographic Terms', group: 'Su'}) ||
|
@@ -295,7 +313,40 @@ module EBSCO
|
|
295
313
|
# Fulltext - RETRIEVE ONLY
|
296
314
|
def html_fulltext
|
297
315
|
if @record.fetch('FullText',{}).fetch('Text',{}).fetch('Availability',0) == '1'
|
298
|
-
|
316
|
+
|
317
|
+
# sanitize?
|
318
|
+
if @decode_sanitize_html
|
319
|
+
|
320
|
+
# transformer
|
321
|
+
clean_fulltext = lambda do |env|
|
322
|
+
node = env[:node]
|
323
|
+
if node.name == 'title'
|
324
|
+
node.name = 'h1'
|
325
|
+
end
|
326
|
+
if node.name == 'sbt'
|
327
|
+
node.name = 'h2'
|
328
|
+
end
|
329
|
+
if node.name == 'jsection'
|
330
|
+
node.name = 'h3'
|
331
|
+
end
|
332
|
+
if node.name == 'et'
|
333
|
+
node.name = 'h3'
|
334
|
+
end
|
335
|
+
node
|
336
|
+
end
|
337
|
+
|
338
|
+
fulltext_config = Sanitize::Config.merge(Sanitize::Config::RELAXED,
|
339
|
+
:elements => Sanitize::Config::RELAXED[:elements] +
|
340
|
+
%w[relatesto searchlink],
|
341
|
+
:attributes => Sanitize::Config::RELAXED[:attributes].merge(
|
342
|
+
'searchlink' => %w[fieldcode term]),
|
343
|
+
:remove_contents => true,
|
344
|
+
:transformers => [clean_fulltext])
|
345
|
+
|
346
|
+
html_decode_and_sanitize(@record.fetch('FullText',{}).fetch('Text',{})['Value'], fulltext_config)
|
347
|
+
else
|
348
|
+
@record.fetch('FullText',{}).fetch('Text',{})['Value']
|
349
|
+
end
|
299
350
|
else
|
300
351
|
nil
|
301
352
|
end
|
@@ -757,8 +808,7 @@ module EBSCO
|
|
757
808
|
|
758
809
|
@items.each do |item|
|
759
810
|
if item['Name'] == options[:name] && item['Label'] == options[:label] && item['Group'] == options[:group]
|
760
|
-
|
761
|
-
return sanitize_data(item['Data'])
|
811
|
+
return sanitize_data(item)
|
762
812
|
end
|
763
813
|
end
|
764
814
|
return nil
|
@@ -767,7 +817,7 @@ module EBSCO
|
|
767
817
|
|
768
818
|
@items.each do |item|
|
769
819
|
if item['Name'] == options[:name] && item['Label'] == options[:label]
|
770
|
-
return sanitize_data(item
|
820
|
+
return sanitize_data(item)
|
771
821
|
end
|
772
822
|
end
|
773
823
|
return nil
|
@@ -776,7 +826,7 @@ module EBSCO
|
|
776
826
|
|
777
827
|
@items.each do |item|
|
778
828
|
if item['Name'] == options[:name] && item['Group'] == options[:group]
|
779
|
-
return sanitize_data(item
|
829
|
+
return sanitize_data(item)
|
780
830
|
end
|
781
831
|
end
|
782
832
|
return nil
|
@@ -785,7 +835,7 @@ module EBSCO
|
|
785
835
|
|
786
836
|
@items.each do |item|
|
787
837
|
if item['Label'] == options[:label] && item['Group'] == options[:group]
|
788
|
-
return sanitize_data(item
|
838
|
+
return sanitize_data(item)
|
789
839
|
end
|
790
840
|
end
|
791
841
|
return nil
|
@@ -794,7 +844,7 @@ module EBSCO
|
|
794
844
|
|
795
845
|
@items.each do |item|
|
796
846
|
if item['Label'] == options[:label]
|
797
|
-
return sanitize_data(item
|
847
|
+
return sanitize_data(item)
|
798
848
|
end
|
799
849
|
end
|
800
850
|
return nil
|
@@ -803,7 +853,7 @@ module EBSCO
|
|
803
853
|
|
804
854
|
@items.each do |item|
|
805
855
|
if item['Name'] == options[:name]
|
806
|
-
return sanitize_data(item
|
856
|
+
return sanitize_data(item)
|
807
857
|
end
|
808
858
|
end
|
809
859
|
return nil
|
@@ -815,16 +865,47 @@ module EBSCO
|
|
815
865
|
end
|
816
866
|
end
|
817
867
|
|
818
|
-
# sanitize html
|
819
|
-
def sanitize_data(
|
820
|
-
|
821
|
-
|
822
|
-
|
868
|
+
# decode & sanitize html tags found in item data; apply any special transformations
|
869
|
+
def sanitize_data(item)
|
870
|
+
|
871
|
+
if item['Data']
|
872
|
+
data = item['Data']
|
873
|
+
|
874
|
+
# group-specific transformations
|
875
|
+
if item['Group']
|
876
|
+
group = item['Group']
|
877
|
+
if group == 'Su'
|
878
|
+
# translate searchLink field codes to DE?
|
879
|
+
if @all_subjects_search_links
|
880
|
+
data = data.gsub(/(searchLink fieldCode=")([A-Z]+)/, '\1DE')
|
881
|
+
end
|
882
|
+
end
|
883
|
+
end
|
884
|
+
|
885
|
+
# decode-sanitize?
|
886
|
+
if @decode_sanitize_html
|
887
|
+
data = html_decode_and_sanitize(data)
|
888
|
+
end
|
889
|
+
|
890
|
+
data
|
891
|
+
|
892
|
+
else
|
893
|
+
nil # no item data present
|
894
|
+
end
|
895
|
+
|
896
|
+
end
|
897
|
+
|
898
|
+
# Decode any html elements and then run it through sanitize to preserve entities (eg: ampersand) and strip out
|
899
|
+
# elements/attributes that aren't explicitly whitelisted.
|
900
|
+
# The RELAXED config: https://github.com/rgrove/sanitize/blob/master/lib/sanitize/config/relaxed.rb
|
901
|
+
def html_decode_and_sanitize(data, config = nil)
|
902
|
+
default_config = Sanitize::Config.merge(Sanitize::Config::RELAXED,
|
903
|
+
:elements => Sanitize::Config::RELAXED[:elements] +
|
904
|
+
%w[relatesto searchlink],
|
823
905
|
:attributes => Sanitize::Config::RELAXED[:attributes].merge(
|
824
|
-
'searchlink' => [
|
825
|
-
|
826
|
-
)
|
827
|
-
Sanitize.fragment(html, sanitize_config)
|
906
|
+
'searchlink' => %w[fieldcode term]))
|
907
|
+
sanitize_config = config.nil? ? default_config : config
|
908
|
+
Sanitize.fragment(CGI.unescapeHTML(data.to_s), sanitize_config)
|
828
909
|
end
|
829
910
|
|
830
911
|
# dynamically add item metadata as 'eds_extra_ItemNameOrLabel'
|
data/lib/ebsco/eds/results.rb
CHANGED
@@ -28,7 +28,7 @@ module EBSCO
|
|
28
28
|
|
29
29
|
# Creates search results from the \EDS API search response. It includes information about the results and a list
|
30
30
|
# of Record items.
|
31
|
-
def initialize(search_results, additional_limiters = {}, options = {})
|
31
|
+
def initialize(search_results, eds_config = nil, additional_limiters = {}, options = {})
|
32
32
|
|
33
33
|
@results = search_results
|
34
34
|
@limiters = additional_limiters
|
@@ -39,7 +39,7 @@ module EBSCO
|
|
39
39
|
if @results['SearchResult']['Data']['Records']
|
40
40
|
@results['SearchResult']['Data']['Records'].each { |record|
|
41
41
|
|
42
|
-
@records.push(EBSCO::EDS::Record.new(record))
|
42
|
+
@records.push(EBSCO::EDS::Record.new(record, eds_config))
|
43
43
|
|
44
44
|
# # records hidden in guest mode
|
45
45
|
# if record['Header']['AccessLevel']
|
@@ -64,7 +64,7 @@ module EBSCO
|
|
64
64
|
rs_entries = related_item.fetch('Records',{})
|
65
65
|
if rs_entries.count > 0
|
66
66
|
rs_entries.each do |rs_record|
|
67
|
-
@research_starters.push(EBSCO::EDS::Record.new(rs_record))
|
67
|
+
@research_starters.push(EBSCO::EDS::Record.new(rs_record, eds_config))
|
68
68
|
end
|
69
69
|
end
|
70
70
|
end
|
@@ -80,7 +80,7 @@ module EBSCO
|
|
80
80
|
_publication_matches = related_item.fetch('PublicationRecords',{})
|
81
81
|
if _publication_matches.count > 0
|
82
82
|
_publication_matches.each do |publication_record|
|
83
|
-
@publication_match.push(EBSCO::EDS::Record.new(publication_record))
|
83
|
+
@publication_match.push(EBSCO::EDS::Record.new(publication_record, eds_config))
|
84
84
|
end
|
85
85
|
end
|
86
86
|
end
|
data/lib/ebsco/eds/session.rb
CHANGED
@@ -207,10 +207,11 @@ module EBSCO
|
|
207
207
|
# use existing/updated SearchOptions
|
208
208
|
if options.empty?
|
209
209
|
if @search_options.nil?
|
210
|
-
@search_results = EBSCO::EDS::Results.new(empty_results)
|
210
|
+
@search_results = EBSCO::EDS::Results.new(empty_results,@config)
|
211
211
|
else
|
212
212
|
_response = do_request(:post, path: '/edsapi/rest/Search', payload: @search_options)
|
213
|
-
@search_results = EBSCO::EDS::Results.new(_response, @
|
213
|
+
@search_results = EBSCO::EDS::Results.new(_response, @config,
|
214
|
+
@info.available_limiters, options)
|
214
215
|
if increment_page
|
215
216
|
@current_page = @search_results.page_number
|
216
217
|
end
|
@@ -226,7 +227,8 @@ module EBSCO
|
|
226
227
|
end
|
227
228
|
|
228
229
|
_response = do_request(:post, path: '/edsapi/rest/Search', payload: @search_options)
|
229
|
-
@search_results = EBSCO::EDS::Results.new(_response, @
|
230
|
+
@search_results = EBSCO::EDS::Results.new(_response, @config,
|
231
|
+
@info.available_limiters, options)
|
230
232
|
|
231
233
|
# create temp format facet results if needed
|
232
234
|
if options['f']
|
@@ -236,7 +238,10 @@ module EBSCO
|
|
236
238
|
format_search_options = EBSCO::EDS::Options.new(format_options, @info)
|
237
239
|
format_search_options.Comment = 'temp source type facets'
|
238
240
|
_format_response = do_request(:post, path: '/edsapi/rest/Search', payload: format_search_options)
|
239
|
-
@search_results.temp_format_facet_results = EBSCO::EDS::Results.new(_format_response,
|
241
|
+
@search_results.temp_format_facet_results = EBSCO::EDS::Results.new(_format_response,
|
242
|
+
@config,
|
243
|
+
@info.available_limiters,
|
244
|
+
format_options)
|
240
245
|
end
|
241
246
|
end
|
242
247
|
|
@@ -248,7 +253,10 @@ module EBSCO
|
|
248
253
|
content_search_options = EBSCO::EDS::Options.new(content_options, @info)
|
249
254
|
content_search_options.Comment = 'temp content provider facet'
|
250
255
|
_content_response = do_request(:post, path: '/edsapi/rest/Search', payload: content_search_options)
|
251
|
-
@search_results.temp_content_provider_facet_results = EBSCO::EDS::Results.new(_content_response,
|
256
|
+
@search_results.temp_content_provider_facet_results = EBSCO::EDS::Results.new(_content_response,
|
257
|
+
@config,
|
258
|
+
@info.available_limiters,
|
259
|
+
content_options)
|
252
260
|
end
|
253
261
|
end
|
254
262
|
|
@@ -297,8 +305,8 @@ module EBSCO
|
|
297
305
|
retrieve_response = do_request(:post, path: @config[:retrieve_url], payload: payload)
|
298
306
|
#retrieve_params = "?an=#{an}&dbid=#{dbid}&ebookpreferredformat=#{ebook}"
|
299
307
|
#retrieve_response = do_request(:get, path: @config[:retrieve_url] + retrieve_params)
|
300
|
-
record = EBSCO::EDS::Record.new(retrieve_response)
|
301
|
-
# puts 'RECORD: ' + record.
|
308
|
+
record = EBSCO::EDS::Record.new(retrieve_response, @config)
|
309
|
+
# puts 'RECORD: ' + record.inspect
|
302
310
|
record
|
303
311
|
end
|
304
312
|
|
@@ -357,7 +365,7 @@ module EBSCO
|
|
357
365
|
|
358
366
|
# return json result set with just the previous and next records in it
|
359
367
|
r = empty_results(cached_results.stat_total_hits)
|
360
|
-
results = EBSCO::EDS::Results.new(r)
|
368
|
+
results = EBSCO::EDS::Results.new(r, @config)
|
361
369
|
next_previous_records = []
|
362
370
|
unless result_prev.nil?
|
363
371
|
next_previous_records << result_prev
|
@@ -380,7 +388,7 @@ module EBSCO
|
|
380
388
|
}
|
381
389
|
end
|
382
390
|
r = empty_results(records.length)
|
383
|
-
results = EBSCO::EDS::Results.new(r)
|
391
|
+
results = EBSCO::EDS::Results.new(r, @config)
|
384
392
|
results.records = records
|
385
393
|
results.to_solr
|
386
394
|
end
|
@@ -818,7 +826,7 @@ module EBSCO
|
|
818
826
|
else
|
819
827
|
if !action.include?('SourceType:'+bad_source_type+')')
|
820
828
|
# not a bad source type, keep it
|
821
|
-
new_actions
|
829
|
+
new_actions << action
|
822
830
|
end
|
823
831
|
end
|
824
832
|
else
|
@@ -826,6 +834,31 @@ module EBSCO
|
|
826
834
|
new_actions << action
|
827
835
|
end
|
828
836
|
}
|
837
|
+
|
838
|
+
new_filters = []
|
839
|
+
filter_id = 1
|
840
|
+
payload.SearchCriteria.FacetFilters.each { |filter|
|
841
|
+
filter['FacetValues'].each { |facet_val|
|
842
|
+
if facet_val['Id'] == 'SourceType'
|
843
|
+
if bad_source_type.nil?
|
844
|
+
# skip the source type since we don't know if it's bad or not
|
845
|
+
else
|
846
|
+
# not a bad sourcetype, add it
|
847
|
+
if !facet_val['Value'].include?(bad_source_type)
|
848
|
+
filter['FilterId'] = filter_id
|
849
|
+
filter_id += 1
|
850
|
+
new_filters << filter
|
851
|
+
end
|
852
|
+
end
|
853
|
+
else
|
854
|
+
# not a SourceType filter, add it
|
855
|
+
filter['FilterId'] = filter_id
|
856
|
+
filter_id += 1
|
857
|
+
new_filters << filter
|
858
|
+
end
|
859
|
+
}
|
860
|
+
}
|
861
|
+
payload.SearchCriteria.FacetFilters = new_filters
|
829
862
|
payload.Actions = new_actions
|
830
863
|
do_request(method, path: path, payload: payload, attempt: attempt+1)
|
831
864
|
else
|
data/lib/ebsco/eds/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ebsco-eds
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.15.pre
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Bill McKinney
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-09-
|
12
|
+
date: 2017-09-18 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: faraday
|