ebsco-eds 0.3.14.pre → 0.3.15.pre
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/ebsco/eds/configuration.rb +3 -1
- data/lib/ebsco/eds/options.rb +2 -0
- data/lib/ebsco/eds/record.rb +100 -19
- data/lib/ebsco/eds/results.rb +4 -4
- data/lib/ebsco/eds/session.rb +43 -10
- data/lib/ebsco/eds/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c0ed32f227645931d31d5ee595ca1b8793f4be7f
|
4
|
+
data.tar.gz: b5085f8c0c463f7bf5b084104119baf15befff4a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 72e23e4ee620db0170f53541684aa44d83f33822a44ec1d896f258c32908eeab0accf8041a860438e631e5dc0fe7b5366a48c205d68e8b293236c37abf1c8fcb
|
7
|
+
data.tar.gz: 929364e6faa8bebaa6f30288806cca3dff1d06bb3319c6603fab2091cf2ca7f76b36b9c9d2481fdf4dc8ff5ad2a489f4f62b2edcff5a0e598b1c465499237a3f
|
@@ -38,7 +38,9 @@ module EBSCO
|
|
38
38
|
:open_timeout => 12,
|
39
39
|
:max_page_jumps => 6,
|
40
40
|
:max_page_jump_attempts => 10,
|
41
|
-
:recover_from_bad_source_type => false
|
41
|
+
:recover_from_bad_source_type => false,
|
42
|
+
:all_subjects_search_links => false,
|
43
|
+
:decode_sanitize_html => false
|
42
44
|
}
|
43
45
|
@valid_config_keys = @config.keys
|
44
46
|
end
|
data/lib/ebsco/eds/options.rb
CHANGED
data/lib/ebsco/eds/record.rb
CHANGED
@@ -121,7 +121,6 @@ module EBSCO
|
|
121
121
|
'Related ISBNs'
|
122
122
|
]
|
123
123
|
|
124
|
-
|
125
124
|
# Raw record as returned by the \EDS API via search or retrieve
|
126
125
|
attr_accessor(*ATTRIBUTES)
|
127
126
|
|
@@ -130,7 +129,25 @@ module EBSCO
|
|
130
129
|
end
|
131
130
|
|
132
131
|
# Creates a search or retrieval result record
|
133
|
-
def initialize(results_record)
|
132
|
+
def initialize(results_record, eds_config = nil)
|
133
|
+
|
134
|
+
# translate all subject search link field codes to DE?
|
135
|
+
@all_subjects_search_links = false
|
136
|
+
if eds_config
|
137
|
+
@all_subjects_search_links = eds_config[:all_subjects_search_links]
|
138
|
+
end
|
139
|
+
if ENV.has_key? 'EDS_ALL_SUBJECTS_SEARCH_LINKS'
|
140
|
+
@all_subjects_search_links = ENV['EDS_ALL_SUBJECTS_SEARCH_LINKS']
|
141
|
+
end
|
142
|
+
|
143
|
+
# decode and sanitize html in item data?
|
144
|
+
@decode_sanitize_html = false
|
145
|
+
if eds_config
|
146
|
+
@decode_sanitize_html = eds_config[:decode_sanitize_html]
|
147
|
+
end
|
148
|
+
if ENV.has_key? 'EDS_DECODE_SANITIZE_HTML'
|
149
|
+
@decode_sanitize_html = ENV['EDS_DECODE_SANITIZE_HTML']
|
150
|
+
end
|
134
151
|
|
135
152
|
if results_record.key? 'Record'
|
136
153
|
@record = results_record['Record'] # single record returned by retrieve api
|
@@ -169,6 +186,7 @@ module EBSCO
|
|
169
186
|
@eds_subjects =
|
170
187
|
get_item_data({name: 'Subject', label: 'Subject Terms', group: 'Su'}) ||
|
171
188
|
get_item_data({name: 'Subject', label: 'Subject Indexing', group: 'Su'}) ||
|
189
|
+
get_item_data({name: 'Subject', label: 'Subject Category', group: 'Su'}) ||
|
172
190
|
bib_subjects
|
173
191
|
@eds_subjects_geographic =
|
174
192
|
get_item_data({name: 'SubjectGeographic', label: 'Geographic Terms', group: 'Su'}) ||
|
@@ -295,7 +313,40 @@ module EBSCO
|
|
295
313
|
# Fulltext - RETRIEVE ONLY
|
296
314
|
def html_fulltext
|
297
315
|
if @record.fetch('FullText',{}).fetch('Text',{}).fetch('Availability',0) == '1'
|
298
|
-
|
316
|
+
|
317
|
+
# sanitize?
|
318
|
+
if @decode_sanitize_html
|
319
|
+
|
320
|
+
# transformer
|
321
|
+
clean_fulltext = lambda do |env|
|
322
|
+
node = env[:node]
|
323
|
+
if node.name == 'title'
|
324
|
+
node.name = 'h1'
|
325
|
+
end
|
326
|
+
if node.name == 'sbt'
|
327
|
+
node.name = 'h2'
|
328
|
+
end
|
329
|
+
if node.name == 'jsection'
|
330
|
+
node.name = 'h3'
|
331
|
+
end
|
332
|
+
if node.name == 'et'
|
333
|
+
node.name = 'h3'
|
334
|
+
end
|
335
|
+
node
|
336
|
+
end
|
337
|
+
|
338
|
+
fulltext_config = Sanitize::Config.merge(Sanitize::Config::RELAXED,
|
339
|
+
:elements => Sanitize::Config::RELAXED[:elements] +
|
340
|
+
%w[relatesto searchlink],
|
341
|
+
:attributes => Sanitize::Config::RELAXED[:attributes].merge(
|
342
|
+
'searchlink' => %w[fieldcode term]),
|
343
|
+
:remove_contents => true,
|
344
|
+
:transformers => [clean_fulltext])
|
345
|
+
|
346
|
+
html_decode_and_sanitize(@record.fetch('FullText',{}).fetch('Text',{})['Value'], fulltext_config)
|
347
|
+
else
|
348
|
+
@record.fetch('FullText',{}).fetch('Text',{})['Value']
|
349
|
+
end
|
299
350
|
else
|
300
351
|
nil
|
301
352
|
end
|
@@ -757,8 +808,7 @@ module EBSCO
|
|
757
808
|
|
758
809
|
@items.each do |item|
|
759
810
|
if item['Name'] == options[:name] && item['Label'] == options[:label] && item['Group'] == options[:group]
|
760
|
-
|
761
|
-
return sanitize_data(item['Data'])
|
811
|
+
return sanitize_data(item)
|
762
812
|
end
|
763
813
|
end
|
764
814
|
return nil
|
@@ -767,7 +817,7 @@ module EBSCO
|
|
767
817
|
|
768
818
|
@items.each do |item|
|
769
819
|
if item['Name'] == options[:name] && item['Label'] == options[:label]
|
770
|
-
return sanitize_data(item
|
820
|
+
return sanitize_data(item)
|
771
821
|
end
|
772
822
|
end
|
773
823
|
return nil
|
@@ -776,7 +826,7 @@ module EBSCO
|
|
776
826
|
|
777
827
|
@items.each do |item|
|
778
828
|
if item['Name'] == options[:name] && item['Group'] == options[:group]
|
779
|
-
return sanitize_data(item
|
829
|
+
return sanitize_data(item)
|
780
830
|
end
|
781
831
|
end
|
782
832
|
return nil
|
@@ -785,7 +835,7 @@ module EBSCO
|
|
785
835
|
|
786
836
|
@items.each do |item|
|
787
837
|
if item['Label'] == options[:label] && item['Group'] == options[:group]
|
788
|
-
return sanitize_data(item
|
838
|
+
return sanitize_data(item)
|
789
839
|
end
|
790
840
|
end
|
791
841
|
return nil
|
@@ -794,7 +844,7 @@ module EBSCO
|
|
794
844
|
|
795
845
|
@items.each do |item|
|
796
846
|
if item['Label'] == options[:label]
|
797
|
-
return sanitize_data(item
|
847
|
+
return sanitize_data(item)
|
798
848
|
end
|
799
849
|
end
|
800
850
|
return nil
|
@@ -803,7 +853,7 @@ module EBSCO
|
|
803
853
|
|
804
854
|
@items.each do |item|
|
805
855
|
if item['Name'] == options[:name]
|
806
|
-
return sanitize_data(item
|
856
|
+
return sanitize_data(item)
|
807
857
|
end
|
808
858
|
end
|
809
859
|
return nil
|
@@ -815,16 +865,47 @@ module EBSCO
|
|
815
865
|
end
|
816
866
|
end
|
817
867
|
|
818
|
-
# sanitize html
|
819
|
-
def sanitize_data(
|
820
|
-
|
821
|
-
|
822
|
-
|
868
|
+
# decode & sanitize html tags found in item data; apply any special transformations
|
869
|
+
def sanitize_data(item)
|
870
|
+
|
871
|
+
if item['Data']
|
872
|
+
data = item['Data']
|
873
|
+
|
874
|
+
# group-specific transformations
|
875
|
+
if item['Group']
|
876
|
+
group = item['Group']
|
877
|
+
if group == 'Su'
|
878
|
+
# translate searchLink field codes to DE?
|
879
|
+
if @all_subjects_search_links
|
880
|
+
data = data.gsub(/(searchLink fieldCode=")([A-Z]+)/, '\1DE')
|
881
|
+
end
|
882
|
+
end
|
883
|
+
end
|
884
|
+
|
885
|
+
# decode-sanitize?
|
886
|
+
if @decode_sanitize_html
|
887
|
+
data = html_decode_and_sanitize(data)
|
888
|
+
end
|
889
|
+
|
890
|
+
data
|
891
|
+
|
892
|
+
else
|
893
|
+
nil # no item data present
|
894
|
+
end
|
895
|
+
|
896
|
+
end
|
897
|
+
|
898
|
+
# Decode any html elements and then run it through sanitize to preserve entities (eg: ampersand) and strip out
|
899
|
+
# elements/attributes that aren't explicitly whitelisted.
|
900
|
+
# The RELAXED config: https://github.com/rgrove/sanitize/blob/master/lib/sanitize/config/relaxed.rb
|
901
|
+
def html_decode_and_sanitize(data, config = nil)
|
902
|
+
default_config = Sanitize::Config.merge(Sanitize::Config::RELAXED,
|
903
|
+
:elements => Sanitize::Config::RELAXED[:elements] +
|
904
|
+
%w[relatesto searchlink],
|
823
905
|
:attributes => Sanitize::Config::RELAXED[:attributes].merge(
|
824
|
-
'searchlink' => [
|
825
|
-
|
826
|
-
)
|
827
|
-
Sanitize.fragment(html, sanitize_config)
|
906
|
+
'searchlink' => %w[fieldcode term]))
|
907
|
+
sanitize_config = config.nil? ? default_config : config
|
908
|
+
Sanitize.fragment(CGI.unescapeHTML(data.to_s), sanitize_config)
|
828
909
|
end
|
829
910
|
|
830
911
|
# dynamically add item metadata as 'eds_extra_ItemNameOrLabel'
|
data/lib/ebsco/eds/results.rb
CHANGED
@@ -28,7 +28,7 @@ module EBSCO
|
|
28
28
|
|
29
29
|
# Creates search results from the \EDS API search response. It includes information about the results and a list
|
30
30
|
# of Record items.
|
31
|
-
def initialize(search_results, additional_limiters = {}, options = {})
|
31
|
+
def initialize(search_results, eds_config = nil, additional_limiters = {}, options = {})
|
32
32
|
|
33
33
|
@results = search_results
|
34
34
|
@limiters = additional_limiters
|
@@ -39,7 +39,7 @@ module EBSCO
|
|
39
39
|
if @results['SearchResult']['Data']['Records']
|
40
40
|
@results['SearchResult']['Data']['Records'].each { |record|
|
41
41
|
|
42
|
-
@records.push(EBSCO::EDS::Record.new(record))
|
42
|
+
@records.push(EBSCO::EDS::Record.new(record, eds_config))
|
43
43
|
|
44
44
|
# # records hidden in guest mode
|
45
45
|
# if record['Header']['AccessLevel']
|
@@ -64,7 +64,7 @@ module EBSCO
|
|
64
64
|
rs_entries = related_item.fetch('Records',{})
|
65
65
|
if rs_entries.count > 0
|
66
66
|
rs_entries.each do |rs_record|
|
67
|
-
@research_starters.push(EBSCO::EDS::Record.new(rs_record))
|
67
|
+
@research_starters.push(EBSCO::EDS::Record.new(rs_record, eds_config))
|
68
68
|
end
|
69
69
|
end
|
70
70
|
end
|
@@ -80,7 +80,7 @@ module EBSCO
|
|
80
80
|
_publication_matches = related_item.fetch('PublicationRecords',{})
|
81
81
|
if _publication_matches.count > 0
|
82
82
|
_publication_matches.each do |publication_record|
|
83
|
-
@publication_match.push(EBSCO::EDS::Record.new(publication_record))
|
83
|
+
@publication_match.push(EBSCO::EDS::Record.new(publication_record, eds_config))
|
84
84
|
end
|
85
85
|
end
|
86
86
|
end
|
data/lib/ebsco/eds/session.rb
CHANGED
@@ -207,10 +207,11 @@ module EBSCO
|
|
207
207
|
# use existing/updated SearchOptions
|
208
208
|
if options.empty?
|
209
209
|
if @search_options.nil?
|
210
|
-
@search_results = EBSCO::EDS::Results.new(empty_results)
|
210
|
+
@search_results = EBSCO::EDS::Results.new(empty_results,@config)
|
211
211
|
else
|
212
212
|
_response = do_request(:post, path: '/edsapi/rest/Search', payload: @search_options)
|
213
|
-
@search_results = EBSCO::EDS::Results.new(_response, @
|
213
|
+
@search_results = EBSCO::EDS::Results.new(_response, @config,
|
214
|
+
@info.available_limiters, options)
|
214
215
|
if increment_page
|
215
216
|
@current_page = @search_results.page_number
|
216
217
|
end
|
@@ -226,7 +227,8 @@ module EBSCO
|
|
226
227
|
end
|
227
228
|
|
228
229
|
_response = do_request(:post, path: '/edsapi/rest/Search', payload: @search_options)
|
229
|
-
@search_results = EBSCO::EDS::Results.new(_response, @
|
230
|
+
@search_results = EBSCO::EDS::Results.new(_response, @config,
|
231
|
+
@info.available_limiters, options)
|
230
232
|
|
231
233
|
# create temp format facet results if needed
|
232
234
|
if options['f']
|
@@ -236,7 +238,10 @@ module EBSCO
|
|
236
238
|
format_search_options = EBSCO::EDS::Options.new(format_options, @info)
|
237
239
|
format_search_options.Comment = 'temp source type facets'
|
238
240
|
_format_response = do_request(:post, path: '/edsapi/rest/Search', payload: format_search_options)
|
239
|
-
@search_results.temp_format_facet_results = EBSCO::EDS::Results.new(_format_response,
|
241
|
+
@search_results.temp_format_facet_results = EBSCO::EDS::Results.new(_format_response,
|
242
|
+
@config,
|
243
|
+
@info.available_limiters,
|
244
|
+
format_options)
|
240
245
|
end
|
241
246
|
end
|
242
247
|
|
@@ -248,7 +253,10 @@ module EBSCO
|
|
248
253
|
content_search_options = EBSCO::EDS::Options.new(content_options, @info)
|
249
254
|
content_search_options.Comment = 'temp content provider facet'
|
250
255
|
_content_response = do_request(:post, path: '/edsapi/rest/Search', payload: content_search_options)
|
251
|
-
@search_results.temp_content_provider_facet_results = EBSCO::EDS::Results.new(_content_response,
|
256
|
+
@search_results.temp_content_provider_facet_results = EBSCO::EDS::Results.new(_content_response,
|
257
|
+
@config,
|
258
|
+
@info.available_limiters,
|
259
|
+
content_options)
|
252
260
|
end
|
253
261
|
end
|
254
262
|
|
@@ -297,8 +305,8 @@ module EBSCO
|
|
297
305
|
retrieve_response = do_request(:post, path: @config[:retrieve_url], payload: payload)
|
298
306
|
#retrieve_params = "?an=#{an}&dbid=#{dbid}&ebookpreferredformat=#{ebook}"
|
299
307
|
#retrieve_response = do_request(:get, path: @config[:retrieve_url] + retrieve_params)
|
300
|
-
record = EBSCO::EDS::Record.new(retrieve_response)
|
301
|
-
# puts 'RECORD: ' + record.
|
308
|
+
record = EBSCO::EDS::Record.new(retrieve_response, @config)
|
309
|
+
# puts 'RECORD: ' + record.inspect
|
302
310
|
record
|
303
311
|
end
|
304
312
|
|
@@ -357,7 +365,7 @@ module EBSCO
|
|
357
365
|
|
358
366
|
# return json result set with just the previous and next records in it
|
359
367
|
r = empty_results(cached_results.stat_total_hits)
|
360
|
-
results = EBSCO::EDS::Results.new(r)
|
368
|
+
results = EBSCO::EDS::Results.new(r, @config)
|
361
369
|
next_previous_records = []
|
362
370
|
unless result_prev.nil?
|
363
371
|
next_previous_records << result_prev
|
@@ -380,7 +388,7 @@ module EBSCO
|
|
380
388
|
}
|
381
389
|
end
|
382
390
|
r = empty_results(records.length)
|
383
|
-
results = EBSCO::EDS::Results.new(r)
|
391
|
+
results = EBSCO::EDS::Results.new(r, @config)
|
384
392
|
results.records = records
|
385
393
|
results.to_solr
|
386
394
|
end
|
@@ -818,7 +826,7 @@ module EBSCO
|
|
818
826
|
else
|
819
827
|
if !action.include?('SourceType:'+bad_source_type+')')
|
820
828
|
# not a bad source type, keep it
|
821
|
-
new_actions
|
829
|
+
new_actions << action
|
822
830
|
end
|
823
831
|
end
|
824
832
|
else
|
@@ -826,6 +834,31 @@ module EBSCO
|
|
826
834
|
new_actions << action
|
827
835
|
end
|
828
836
|
}
|
837
|
+
|
838
|
+
new_filters = []
|
839
|
+
filter_id = 1
|
840
|
+
payload.SearchCriteria.FacetFilters.each { |filter|
|
841
|
+
filter['FacetValues'].each { |facet_val|
|
842
|
+
if facet_val['Id'] == 'SourceType'
|
843
|
+
if bad_source_type.nil?
|
844
|
+
# skip the source type since we don't know if it's bad or not
|
845
|
+
else
|
846
|
+
# not a bad sourcetype, add it
|
847
|
+
if !facet_val['Value'].include?(bad_source_type)
|
848
|
+
filter['FilterId'] = filter_id
|
849
|
+
filter_id += 1
|
850
|
+
new_filters << filter
|
851
|
+
end
|
852
|
+
end
|
853
|
+
else
|
854
|
+
# not a SourceType filter, add it
|
855
|
+
filter['FilterId'] = filter_id
|
856
|
+
filter_id += 1
|
857
|
+
new_filters << filter
|
858
|
+
end
|
859
|
+
}
|
860
|
+
}
|
861
|
+
payload.SearchCriteria.FacetFilters = new_filters
|
829
862
|
payload.Actions = new_actions
|
830
863
|
do_request(method, path: path, payload: payload, attempt: attempt+1)
|
831
864
|
else
|
data/lib/ebsco/eds/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ebsco-eds
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.15.pre
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Bill McKinney
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-09-
|
12
|
+
date: 2017-09-18 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: faraday
|