ebsco-eds 0.3.14.pre → 0.3.15.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e2cd745f89b12f16e7db05e16bb5ad97d3545e68
4
- data.tar.gz: 77c9f067970412617da0219fc60acf1e395d302e
3
+ metadata.gz: c0ed32f227645931d31d5ee595ca1b8793f4be7f
4
+ data.tar.gz: b5085f8c0c463f7bf5b084104119baf15befff4a
5
5
  SHA512:
6
- metadata.gz: e62963cfd798ac3b22c24d331db5fa4512f928eb124ee0a7ca6dbcd1fb2029e550e913e6c86f625ee2380d06402855296733b348fa02c0d74bfd2666a2243d37
7
- data.tar.gz: eb3e9b97eeb05a9d7a286b96b31d4e6ebf9caed16e23d0037d8425988824f5ee3007499e474026759fb94637a06c555013099a5838fdf60f04da23ab0fa0e410
6
+ metadata.gz: 72e23e4ee620db0170f53541684aa44d83f33822a44ec1d896f258c32908eeab0accf8041a860438e631e5dc0fe7b5366a48c205d68e8b293236c37abf1c8fcb
7
+ data.tar.gz: 929364e6faa8bebaa6f30288806cca3dff1d06bb3319c6603fab2091cf2ca7f76b36b9c9d2481fdf4dc8ff5ad2a489f4f62b2edcff5a0e598b1c465499237a3f
@@ -38,7 +38,9 @@ module EBSCO
38
38
  :open_timeout => 12,
39
39
  :max_page_jumps => 6,
40
40
  :max_page_jump_attempts => 10,
41
- :recover_from_bad_source_type => false
41
+ :recover_from_bad_source_type => false,
42
+ :all_subjects_search_links => false,
43
+ :decode_sanitize_html => false
42
44
  }
43
45
  @valid_config_keys = @config.keys
44
46
  end
@@ -244,6 +244,8 @@ module EBSCO
244
244
  _field_code = 'SH'
245
245
  when 'keywords'
246
246
  _field_code = 'KW'
247
+ when /[A-Z]{2}/
248
+ _field_code = _field
247
249
  end
248
250
  end
249
251
 
@@ -121,7 +121,6 @@ module EBSCO
121
121
  'Related ISBNs'
122
122
  ]
123
123
 
124
-
125
124
  # Raw record as returned by the \EDS API via search or retrieve
126
125
  attr_accessor(*ATTRIBUTES)
127
126
 
@@ -130,7 +129,25 @@ module EBSCO
130
129
  end
131
130
 
132
131
  # Creates a search or retrieval result record
133
- def initialize(results_record)
132
+ def initialize(results_record, eds_config = nil)
133
+
134
+ # translate all subject search link field codes to DE?
135
+ @all_subjects_search_links = false
136
+ if eds_config
137
+ @all_subjects_search_links = eds_config[:all_subjects_search_links]
138
+ end
139
+ if ENV.has_key? 'EDS_ALL_SUBJECTS_SEARCH_LINKS'
140
+ @all_subjects_search_links = ENV['EDS_ALL_SUBJECTS_SEARCH_LINKS']
141
+ end
142
+
143
+ # decode and sanitize html in item data?
144
+ @decode_sanitize_html = false
145
+ if eds_config
146
+ @decode_sanitize_html = eds_config[:decode_sanitize_html]
147
+ end
148
+ if ENV.has_key? 'EDS_DECODE_SANITIZE_HTML'
149
+ @decode_sanitize_html = ENV['EDS_DECODE_SANITIZE_HTML']
150
+ end
134
151
 
135
152
  if results_record.key? 'Record'
136
153
  @record = results_record['Record'] # single record returned by retrieve api
@@ -169,6 +186,7 @@ module EBSCO
169
186
  @eds_subjects =
170
187
  get_item_data({name: 'Subject', label: 'Subject Terms', group: 'Su'}) ||
171
188
  get_item_data({name: 'Subject', label: 'Subject Indexing', group: 'Su'}) ||
189
+ get_item_data({name: 'Subject', label: 'Subject Category', group: 'Su'}) ||
172
190
  bib_subjects
173
191
  @eds_subjects_geographic =
174
192
  get_item_data({name: 'SubjectGeographic', label: 'Geographic Terms', group: 'Su'}) ||
@@ -295,7 +313,40 @@ module EBSCO
295
313
  # Fulltext - RETRIEVE ONLY
296
314
  def html_fulltext
297
315
  if @record.fetch('FullText',{}).fetch('Text',{}).fetch('Availability',0) == '1'
298
- @record.fetch('FullText',{}).fetch('Text',{})['Value']
316
+
317
+ # sanitize?
318
+ if @decode_sanitize_html
319
+
320
+ # transformer
321
+ clean_fulltext = lambda do |env|
322
+ node = env[:node]
323
+ if node.name == 'title'
324
+ node.name = 'h1'
325
+ end
326
+ if node.name == 'sbt'
327
+ node.name = 'h2'
328
+ end
329
+ if node.name == 'jsection'
330
+ node.name = 'h3'
331
+ end
332
+ if node.name == 'et'
333
+ node.name = 'h3'
334
+ end
335
+ node
336
+ end
337
+
338
+ fulltext_config = Sanitize::Config.merge(Sanitize::Config::RELAXED,
339
+ :elements => Sanitize::Config::RELAXED[:elements] +
340
+ %w[relatesto searchlink],
341
+ :attributes => Sanitize::Config::RELAXED[:attributes].merge(
342
+ 'searchlink' => %w[fieldcode term]),
343
+ :remove_contents => true,
344
+ :transformers => [clean_fulltext])
345
+
346
+ html_decode_and_sanitize(@record.fetch('FullText',{}).fetch('Text',{})['Value'], fulltext_config)
347
+ else
348
+ @record.fetch('FullText',{}).fetch('Text',{})['Value']
349
+ end
299
350
  else
300
351
  nil
301
352
  end
@@ -757,8 +808,7 @@ module EBSCO
757
808
 
758
809
  @items.each do |item|
759
810
  if item['Name'] == options[:name] && item['Label'] == options[:label] && item['Group'] == options[:group]
760
- # puts 'FOUND ALL 3: ' + item.inspect
761
- return sanitize_data(item['Data'])
811
+ return sanitize_data(item)
762
812
  end
763
813
  end
764
814
  return nil
@@ -767,7 +817,7 @@ module EBSCO
767
817
 
768
818
  @items.each do |item|
769
819
  if item['Name'] == options[:name] && item['Label'] == options[:label]
770
- return sanitize_data(item['Data'])
820
+ return sanitize_data(item)
771
821
  end
772
822
  end
773
823
  return nil
@@ -776,7 +826,7 @@ module EBSCO
776
826
 
777
827
  @items.each do |item|
778
828
  if item['Name'] == options[:name] && item['Group'] == options[:group]
779
- return sanitize_data(item['Data'])
829
+ return sanitize_data(item)
780
830
  end
781
831
  end
782
832
  return nil
@@ -785,7 +835,7 @@ module EBSCO
785
835
 
786
836
  @items.each do |item|
787
837
  if item['Label'] == options[:label] && item['Group'] == options[:group]
788
- return sanitize_data(item['Data'])
838
+ return sanitize_data(item)
789
839
  end
790
840
  end
791
841
  return nil
@@ -794,7 +844,7 @@ module EBSCO
794
844
 
795
845
  @items.each do |item|
796
846
  if item['Label'] == options[:label]
797
- return sanitize_data(item['Data'])
847
+ return sanitize_data(item)
798
848
  end
799
849
  end
800
850
  return nil
@@ -803,7 +853,7 @@ module EBSCO
803
853
 
804
854
  @items.each do |item|
805
855
  if item['Name'] == options[:name]
806
- return sanitize_data(item['Data'])
856
+ return sanitize_data(item)
807
857
  end
808
858
  end
809
859
  return nil
@@ -815,16 +865,47 @@ module EBSCO
815
865
  end
816
866
  end
817
867
 
818
- # sanitize html, allow custom links
819
- def sanitize_data(data)
820
- html = CGI.unescapeHTML(data.to_s)
821
- sanitize_config = Sanitize::Config.merge(Sanitize::Config::RELAXED,
822
- :elements => Sanitize::Config::RELAXED[:elements] + ['relatesto', 'searchlink'],
868
+ # decode & sanitize html tags found in item data; apply any special transformations
869
+ def sanitize_data(item)
870
+
871
+ if item['Data']
872
+ data = item['Data']
873
+
874
+ # group-specific transformations
875
+ if item['Group']
876
+ group = item['Group']
877
+ if group == 'Su'
878
+ # translate searchLink field codes to DE?
879
+ if @all_subjects_search_links
880
+ data = data.gsub(/(searchLink fieldCode=")([A-Z]+)/, '\1DE')
881
+ end
882
+ end
883
+ end
884
+
885
+ # decode-sanitize?
886
+ if @decode_sanitize_html
887
+ data = html_decode_and_sanitize(data)
888
+ end
889
+
890
+ data
891
+
892
+ else
893
+ nil # no item data present
894
+ end
895
+
896
+ end
897
+
898
+ # Decode any html elements and then run it through sanitize to preserve entities (eg: ampersand) and strip out
899
+ # elements/attributes that aren't explicitly whitelisted.
900
+ # The RELAXED config: https://github.com/rgrove/sanitize/blob/master/lib/sanitize/config/relaxed.rb
901
+ def html_decode_and_sanitize(data, config = nil)
902
+ default_config = Sanitize::Config.merge(Sanitize::Config::RELAXED,
903
+ :elements => Sanitize::Config::RELAXED[:elements] +
904
+ %w[relatesto searchlink],
823
905
  :attributes => Sanitize::Config::RELAXED[:attributes].merge(
824
- 'searchlink' => ['fieldcode', 'term']
825
- )
826
- )
827
- Sanitize.fragment(html, sanitize_config)
906
+ 'searchlink' => %w[fieldcode term]))
907
+ sanitize_config = config.nil? ? default_config : config
908
+ Sanitize.fragment(CGI.unescapeHTML(data.to_s), sanitize_config)
828
909
  end
829
910
 
830
911
  # dynamically add item metadata as 'eds_extra_ItemNameOrLabel'
@@ -28,7 +28,7 @@ module EBSCO
28
28
 
29
29
  # Creates search results from the \EDS API search response. It includes information about the results and a list
30
30
  # of Record items.
31
- def initialize(search_results, additional_limiters = {}, options = {})
31
+ def initialize(search_results, eds_config = nil, additional_limiters = {}, options = {})
32
32
 
33
33
  @results = search_results
34
34
  @limiters = additional_limiters
@@ -39,7 +39,7 @@ module EBSCO
39
39
  if @results['SearchResult']['Data']['Records']
40
40
  @results['SearchResult']['Data']['Records'].each { |record|
41
41
 
42
- @records.push(EBSCO::EDS::Record.new(record))
42
+ @records.push(EBSCO::EDS::Record.new(record, eds_config))
43
43
 
44
44
  # # records hidden in guest mode
45
45
  # if record['Header']['AccessLevel']
@@ -64,7 +64,7 @@ module EBSCO
64
64
  rs_entries = related_item.fetch('Records',{})
65
65
  if rs_entries.count > 0
66
66
  rs_entries.each do |rs_record|
67
- @research_starters.push(EBSCO::EDS::Record.new(rs_record))
67
+ @research_starters.push(EBSCO::EDS::Record.new(rs_record, eds_config))
68
68
  end
69
69
  end
70
70
  end
@@ -80,7 +80,7 @@ module EBSCO
80
80
  _publication_matches = related_item.fetch('PublicationRecords',{})
81
81
  if _publication_matches.count > 0
82
82
  _publication_matches.each do |publication_record|
83
- @publication_match.push(EBSCO::EDS::Record.new(publication_record))
83
+ @publication_match.push(EBSCO::EDS::Record.new(publication_record, eds_config))
84
84
  end
85
85
  end
86
86
  end
@@ -207,10 +207,11 @@ module EBSCO
207
207
  # use existing/updated SearchOptions
208
208
  if options.empty?
209
209
  if @search_options.nil?
210
- @search_results = EBSCO::EDS::Results.new(empty_results)
210
+ @search_results = EBSCO::EDS::Results.new(empty_results,@config)
211
211
  else
212
212
  _response = do_request(:post, path: '/edsapi/rest/Search', payload: @search_options)
213
- @search_results = EBSCO::EDS::Results.new(_response, @info.available_limiters, options)
213
+ @search_results = EBSCO::EDS::Results.new(_response, @config,
214
+ @info.available_limiters, options)
214
215
  if increment_page
215
216
  @current_page = @search_results.page_number
216
217
  end
@@ -226,7 +227,8 @@ module EBSCO
226
227
  end
227
228
 
228
229
  _response = do_request(:post, path: '/edsapi/rest/Search', payload: @search_options)
229
- @search_results = EBSCO::EDS::Results.new(_response, @info.available_limiters, options)
230
+ @search_results = EBSCO::EDS::Results.new(_response, @config,
231
+ @info.available_limiters, options)
230
232
 
231
233
  # create temp format facet results if needed
232
234
  if options['f']
@@ -236,7 +238,10 @@ module EBSCO
236
238
  format_search_options = EBSCO::EDS::Options.new(format_options, @info)
237
239
  format_search_options.Comment = 'temp source type facets'
238
240
  _format_response = do_request(:post, path: '/edsapi/rest/Search', payload: format_search_options)
239
- @search_results.temp_format_facet_results = EBSCO::EDS::Results.new(_format_response, @info.available_limiters, format_options)
241
+ @search_results.temp_format_facet_results = EBSCO::EDS::Results.new(_format_response,
242
+ @config,
243
+ @info.available_limiters,
244
+ format_options)
240
245
  end
241
246
  end
242
247
 
@@ -248,7 +253,10 @@ module EBSCO
248
253
  content_search_options = EBSCO::EDS::Options.new(content_options, @info)
249
254
  content_search_options.Comment = 'temp content provider facet'
250
255
  _content_response = do_request(:post, path: '/edsapi/rest/Search', payload: content_search_options)
251
- @search_results.temp_content_provider_facet_results = EBSCO::EDS::Results.new(_content_response, @info.available_limiters, content_options)
256
+ @search_results.temp_content_provider_facet_results = EBSCO::EDS::Results.new(_content_response,
257
+ @config,
258
+ @info.available_limiters,
259
+ content_options)
252
260
  end
253
261
  end
254
262
 
@@ -297,8 +305,8 @@ module EBSCO
297
305
  retrieve_response = do_request(:post, path: @config[:retrieve_url], payload: payload)
298
306
  #retrieve_params = "?an=#{an}&dbid=#{dbid}&ebookpreferredformat=#{ebook}"
299
307
  #retrieve_response = do_request(:get, path: @config[:retrieve_url] + retrieve_params)
300
- record = EBSCO::EDS::Record.new(retrieve_response)
301
- # puts 'RECORD: ' + record.pretty_inspect
308
+ record = EBSCO::EDS::Record.new(retrieve_response, @config)
309
+ # puts 'RECORD: ' + record.inspect
302
310
  record
303
311
  end
304
312
 
@@ -357,7 +365,7 @@ module EBSCO
357
365
 
358
366
  # return json result set with just the previous and next records in it
359
367
  r = empty_results(cached_results.stat_total_hits)
360
- results = EBSCO::EDS::Results.new(r)
368
+ results = EBSCO::EDS::Results.new(r, @config)
361
369
  next_previous_records = []
362
370
  unless result_prev.nil?
363
371
  next_previous_records << result_prev
@@ -380,7 +388,7 @@ module EBSCO
380
388
  }
381
389
  end
382
390
  r = empty_results(records.length)
383
- results = EBSCO::EDS::Results.new(r)
391
+ results = EBSCO::EDS::Results.new(r, @config)
384
392
  results.records = records
385
393
  results.to_solr
386
394
  end
@@ -818,7 +826,7 @@ module EBSCO
818
826
  else
819
827
  if !action.include?('SourceType:'+bad_source_type+')')
820
828
  # not a bad source type, keep it
821
- new_actions >> action
829
+ new_actions << action
822
830
  end
823
831
  end
824
832
  else
@@ -826,6 +834,31 @@ module EBSCO
826
834
  new_actions << action
827
835
  end
828
836
  }
837
+
838
+ new_filters = []
839
+ filter_id = 1
840
+ payload.SearchCriteria.FacetFilters.each { |filter|
841
+ filter['FacetValues'].each { |facet_val|
842
+ if facet_val['Id'] == 'SourceType'
843
+ if bad_source_type.nil?
844
+ # skip the source type since we don't know if it's bad or not
845
+ else
846
+ # not a bad sourcetype, add it
847
+ if !facet_val['Value'].include?(bad_source_type)
848
+ filter['FilterId'] = filter_id
849
+ filter_id += 1
850
+ new_filters << filter
851
+ end
852
+ end
853
+ else
854
+ # not a SourceType filter, add it
855
+ filter['FilterId'] = filter_id
856
+ filter_id += 1
857
+ new_filters << filter
858
+ end
859
+ }
860
+ }
861
+ payload.SearchCriteria.FacetFilters = new_filters
829
862
  payload.Actions = new_actions
830
863
  do_request(method, path: path, payload: payload, attempt: attempt+1)
831
864
  else
@@ -1,5 +1,5 @@
1
1
  module EBSCO
2
2
  module EDS
3
- VERSION = '0.3.14.pre'
3
+ VERSION = '0.3.15.pre'
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ebsco-eds
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.14.pre
4
+ version: 0.3.15.pre
5
5
  platform: ruby
6
6
  authors:
7
7
  - Bill McKinney
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2017-09-14 00:00:00.000000000 Z
12
+ date: 2017-09-18 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: faraday