ebsco-eds 0.3.14.pre → 0.3.15.pre

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e2cd745f89b12f16e7db05e16bb5ad97d3545e68
4
- data.tar.gz: 77c9f067970412617da0219fc60acf1e395d302e
3
+ metadata.gz: c0ed32f227645931d31d5ee595ca1b8793f4be7f
4
+ data.tar.gz: b5085f8c0c463f7bf5b084104119baf15befff4a
5
5
  SHA512:
6
- metadata.gz: e62963cfd798ac3b22c24d331db5fa4512f928eb124ee0a7ca6dbcd1fb2029e550e913e6c86f625ee2380d06402855296733b348fa02c0d74bfd2666a2243d37
7
- data.tar.gz: eb3e9b97eeb05a9d7a286b96b31d4e6ebf9caed16e23d0037d8425988824f5ee3007499e474026759fb94637a06c555013099a5838fdf60f04da23ab0fa0e410
6
+ metadata.gz: 72e23e4ee620db0170f53541684aa44d83f33822a44ec1d896f258c32908eeab0accf8041a860438e631e5dc0fe7b5366a48c205d68e8b293236c37abf1c8fcb
7
+ data.tar.gz: 929364e6faa8bebaa6f30288806cca3dff1d06bb3319c6603fab2091cf2ca7f76b36b9c9d2481fdf4dc8ff5ad2a489f4f62b2edcff5a0e598b1c465499237a3f
@@ -38,7 +38,9 @@ module EBSCO
38
38
  :open_timeout => 12,
39
39
  :max_page_jumps => 6,
40
40
  :max_page_jump_attempts => 10,
41
- :recover_from_bad_source_type => false
41
+ :recover_from_bad_source_type => false,
42
+ :all_subjects_search_links => false,
43
+ :decode_sanitize_html => false
42
44
  }
43
45
  @valid_config_keys = @config.keys
44
46
  end
@@ -244,6 +244,8 @@ module EBSCO
244
244
  _field_code = 'SH'
245
245
  when 'keywords'
246
246
  _field_code = 'KW'
247
+ when /[A-Z]{2}/
248
+ _field_code = _field
247
249
  end
248
250
  end
249
251
 
@@ -121,7 +121,6 @@ module EBSCO
121
121
  'Related ISBNs'
122
122
  ]
123
123
 
124
-
125
124
  # Raw record as returned by the \EDS API via search or retrieve
126
125
  attr_accessor(*ATTRIBUTES)
127
126
 
@@ -130,7 +129,25 @@ module EBSCO
130
129
  end
131
130
 
132
131
  # Creates a search or retrieval result record
133
- def initialize(results_record)
132
+ def initialize(results_record, eds_config = nil)
133
+
134
+ # translate all subject search link field codes to DE?
135
+ @all_subjects_search_links = false
136
+ if eds_config
137
+ @all_subjects_search_links = eds_config[:all_subjects_search_links]
138
+ end
139
+ if ENV.has_key? 'EDS_ALL_SUBJECTS_SEARCH_LINKS'
140
+ @all_subjects_search_links = ENV['EDS_ALL_SUBJECTS_SEARCH_LINKS']
141
+ end
142
+
143
+ # decode and sanitize html in item data?
144
+ @decode_sanitize_html = false
145
+ if eds_config
146
+ @decode_sanitize_html = eds_config[:decode_sanitize_html]
147
+ end
148
+ if ENV.has_key? 'EDS_DECODE_SANITIZE_HTML'
149
+ @decode_sanitize_html = ENV['EDS_DECODE_SANITIZE_HTML']
150
+ end
134
151
 
135
152
  if results_record.key? 'Record'
136
153
  @record = results_record['Record'] # single record returned by retrieve api
@@ -169,6 +186,7 @@ module EBSCO
169
186
  @eds_subjects =
170
187
  get_item_data({name: 'Subject', label: 'Subject Terms', group: 'Su'}) ||
171
188
  get_item_data({name: 'Subject', label: 'Subject Indexing', group: 'Su'}) ||
189
+ get_item_data({name: 'Subject', label: 'Subject Category', group: 'Su'}) ||
172
190
  bib_subjects
173
191
  @eds_subjects_geographic =
174
192
  get_item_data({name: 'SubjectGeographic', label: 'Geographic Terms', group: 'Su'}) ||
@@ -295,7 +313,40 @@ module EBSCO
295
313
  # Fulltext - RETRIEVE ONLY
296
314
  def html_fulltext
297
315
  if @record.fetch('FullText',{}).fetch('Text',{}).fetch('Availability',0) == '1'
298
- @record.fetch('FullText',{}).fetch('Text',{})['Value']
316
+
317
+ # sanitize?
318
+ if @decode_sanitize_html
319
+
320
+ # transformer
321
+ clean_fulltext = lambda do |env|
322
+ node = env[:node]
323
+ if node.name == 'title'
324
+ node.name = 'h1'
325
+ end
326
+ if node.name == 'sbt'
327
+ node.name = 'h2'
328
+ end
329
+ if node.name == 'jsection'
330
+ node.name = 'h3'
331
+ end
332
+ if node.name == 'et'
333
+ node.name = 'h3'
334
+ end
335
+ node
336
+ end
337
+
338
+ fulltext_config = Sanitize::Config.merge(Sanitize::Config::RELAXED,
339
+ :elements => Sanitize::Config::RELAXED[:elements] +
340
+ %w[relatesto searchlink],
341
+ :attributes => Sanitize::Config::RELAXED[:attributes].merge(
342
+ 'searchlink' => %w[fieldcode term]),
343
+ :remove_contents => true,
344
+ :transformers => [clean_fulltext])
345
+
346
+ html_decode_and_sanitize(@record.fetch('FullText',{}).fetch('Text',{})['Value'], fulltext_config)
347
+ else
348
+ @record.fetch('FullText',{}).fetch('Text',{})['Value']
349
+ end
299
350
  else
300
351
  nil
301
352
  end
@@ -757,8 +808,7 @@ module EBSCO
757
808
 
758
809
  @items.each do |item|
759
810
  if item['Name'] == options[:name] && item['Label'] == options[:label] && item['Group'] == options[:group]
760
- # puts 'FOUND ALL 3: ' + item.inspect
761
- return sanitize_data(item['Data'])
811
+ return sanitize_data(item)
762
812
  end
763
813
  end
764
814
  return nil
@@ -767,7 +817,7 @@ module EBSCO
767
817
 
768
818
  @items.each do |item|
769
819
  if item['Name'] == options[:name] && item['Label'] == options[:label]
770
- return sanitize_data(item['Data'])
820
+ return sanitize_data(item)
771
821
  end
772
822
  end
773
823
  return nil
@@ -776,7 +826,7 @@ module EBSCO
776
826
 
777
827
  @items.each do |item|
778
828
  if item['Name'] == options[:name] && item['Group'] == options[:group]
779
- return sanitize_data(item['Data'])
829
+ return sanitize_data(item)
780
830
  end
781
831
  end
782
832
  return nil
@@ -785,7 +835,7 @@ module EBSCO
785
835
 
786
836
  @items.each do |item|
787
837
  if item['Label'] == options[:label] && item['Group'] == options[:group]
788
- return sanitize_data(item['Data'])
838
+ return sanitize_data(item)
789
839
  end
790
840
  end
791
841
  return nil
@@ -794,7 +844,7 @@ module EBSCO
794
844
 
795
845
  @items.each do |item|
796
846
  if item['Label'] == options[:label]
797
- return sanitize_data(item['Data'])
847
+ return sanitize_data(item)
798
848
  end
799
849
  end
800
850
  return nil
@@ -803,7 +853,7 @@ module EBSCO
803
853
 
804
854
  @items.each do |item|
805
855
  if item['Name'] == options[:name]
806
- return sanitize_data(item['Data'])
856
+ return sanitize_data(item)
807
857
  end
808
858
  end
809
859
  return nil
@@ -815,16 +865,47 @@ module EBSCO
815
865
  end
816
866
  end
817
867
 
818
- # sanitize html, allow custom links
819
- def sanitize_data(data)
820
- html = CGI.unescapeHTML(data.to_s)
821
- sanitize_config = Sanitize::Config.merge(Sanitize::Config::RELAXED,
822
- :elements => Sanitize::Config::RELAXED[:elements] + ['relatesto', 'searchlink'],
868
+ # decode & sanitize html tags found in item data; apply any special transformations
869
+ def sanitize_data(item)
870
+
871
+ if item['Data']
872
+ data = item['Data']
873
+
874
+ # group-specific transformations
875
+ if item['Group']
876
+ group = item['Group']
877
+ if group == 'Su'
878
+ # translate searchLink field codes to DE?
879
+ if @all_subjects_search_links
880
+ data = data.gsub(/(searchLink fieldCode=")([A-Z]+)/, '\1DE')
881
+ end
882
+ end
883
+ end
884
+
885
+ # decode-sanitize?
886
+ if @decode_sanitize_html
887
+ data = html_decode_and_sanitize(data)
888
+ end
889
+
890
+ data
891
+
892
+ else
893
+ nil # no item data present
894
+ end
895
+
896
+ end
897
+
898
+ # Decode any html elements and then run it through sanitize to preserve entities (eg: ampersand) and strip out
899
+ # elements/attributes that aren't explicitly whitelisted.
900
+ # The RELAXED config: https://github.com/rgrove/sanitize/blob/master/lib/sanitize/config/relaxed.rb
901
+ def html_decode_and_sanitize(data, config = nil)
902
+ default_config = Sanitize::Config.merge(Sanitize::Config::RELAXED,
903
+ :elements => Sanitize::Config::RELAXED[:elements] +
904
+ %w[relatesto searchlink],
823
905
  :attributes => Sanitize::Config::RELAXED[:attributes].merge(
824
- 'searchlink' => ['fieldcode', 'term']
825
- )
826
- )
827
- Sanitize.fragment(html, sanitize_config)
906
+ 'searchlink' => %w[fieldcode term]))
907
+ sanitize_config = config.nil? ? default_config : config
908
+ Sanitize.fragment(CGI.unescapeHTML(data.to_s), sanitize_config)
828
909
  end
829
910
 
830
911
  # dynamically add item metadata as 'eds_extra_ItemNameOrLabel'
@@ -28,7 +28,7 @@ module EBSCO
28
28
 
29
29
  # Creates search results from the \EDS API search response. It includes information about the results and a list
30
30
  # of Record items.
31
- def initialize(search_results, additional_limiters = {}, options = {})
31
+ def initialize(search_results, eds_config = nil, additional_limiters = {}, options = {})
32
32
 
33
33
  @results = search_results
34
34
  @limiters = additional_limiters
@@ -39,7 +39,7 @@ module EBSCO
39
39
  if @results['SearchResult']['Data']['Records']
40
40
  @results['SearchResult']['Data']['Records'].each { |record|
41
41
 
42
- @records.push(EBSCO::EDS::Record.new(record))
42
+ @records.push(EBSCO::EDS::Record.new(record, eds_config))
43
43
 
44
44
  # # records hidden in guest mode
45
45
  # if record['Header']['AccessLevel']
@@ -64,7 +64,7 @@ module EBSCO
64
64
  rs_entries = related_item.fetch('Records',{})
65
65
  if rs_entries.count > 0
66
66
  rs_entries.each do |rs_record|
67
- @research_starters.push(EBSCO::EDS::Record.new(rs_record))
67
+ @research_starters.push(EBSCO::EDS::Record.new(rs_record, eds_config))
68
68
  end
69
69
  end
70
70
  end
@@ -80,7 +80,7 @@ module EBSCO
80
80
  _publication_matches = related_item.fetch('PublicationRecords',{})
81
81
  if _publication_matches.count > 0
82
82
  _publication_matches.each do |publication_record|
83
- @publication_match.push(EBSCO::EDS::Record.new(publication_record))
83
+ @publication_match.push(EBSCO::EDS::Record.new(publication_record, eds_config))
84
84
  end
85
85
  end
86
86
  end
@@ -207,10 +207,11 @@ module EBSCO
207
207
  # use existing/updated SearchOptions
208
208
  if options.empty?
209
209
  if @search_options.nil?
210
- @search_results = EBSCO::EDS::Results.new(empty_results)
210
+ @search_results = EBSCO::EDS::Results.new(empty_results,@config)
211
211
  else
212
212
  _response = do_request(:post, path: '/edsapi/rest/Search', payload: @search_options)
213
- @search_results = EBSCO::EDS::Results.new(_response, @info.available_limiters, options)
213
+ @search_results = EBSCO::EDS::Results.new(_response, @config,
214
+ @info.available_limiters, options)
214
215
  if increment_page
215
216
  @current_page = @search_results.page_number
216
217
  end
@@ -226,7 +227,8 @@ module EBSCO
226
227
  end
227
228
 
228
229
  _response = do_request(:post, path: '/edsapi/rest/Search', payload: @search_options)
229
- @search_results = EBSCO::EDS::Results.new(_response, @info.available_limiters, options)
230
+ @search_results = EBSCO::EDS::Results.new(_response, @config,
231
+ @info.available_limiters, options)
230
232
 
231
233
  # create temp format facet results if needed
232
234
  if options['f']
@@ -236,7 +238,10 @@ module EBSCO
236
238
  format_search_options = EBSCO::EDS::Options.new(format_options, @info)
237
239
  format_search_options.Comment = 'temp source type facets'
238
240
  _format_response = do_request(:post, path: '/edsapi/rest/Search', payload: format_search_options)
239
- @search_results.temp_format_facet_results = EBSCO::EDS::Results.new(_format_response, @info.available_limiters, format_options)
241
+ @search_results.temp_format_facet_results = EBSCO::EDS::Results.new(_format_response,
242
+ @config,
243
+ @info.available_limiters,
244
+ format_options)
240
245
  end
241
246
  end
242
247
 
@@ -248,7 +253,10 @@ module EBSCO
248
253
  content_search_options = EBSCO::EDS::Options.new(content_options, @info)
249
254
  content_search_options.Comment = 'temp content provider facet'
250
255
  _content_response = do_request(:post, path: '/edsapi/rest/Search', payload: content_search_options)
251
- @search_results.temp_content_provider_facet_results = EBSCO::EDS::Results.new(_content_response, @info.available_limiters, content_options)
256
+ @search_results.temp_content_provider_facet_results = EBSCO::EDS::Results.new(_content_response,
257
+ @config,
258
+ @info.available_limiters,
259
+ content_options)
252
260
  end
253
261
  end
254
262
 
@@ -297,8 +305,8 @@ module EBSCO
297
305
  retrieve_response = do_request(:post, path: @config[:retrieve_url], payload: payload)
298
306
  #retrieve_params = "?an=#{an}&dbid=#{dbid}&ebookpreferredformat=#{ebook}"
299
307
  #retrieve_response = do_request(:get, path: @config[:retrieve_url] + retrieve_params)
300
- record = EBSCO::EDS::Record.new(retrieve_response)
301
- # puts 'RECORD: ' + record.pretty_inspect
308
+ record = EBSCO::EDS::Record.new(retrieve_response, @config)
309
+ # puts 'RECORD: ' + record.inspect
302
310
  record
303
311
  end
304
312
 
@@ -357,7 +365,7 @@ module EBSCO
357
365
 
358
366
  # return json result set with just the previous and next records in it
359
367
  r = empty_results(cached_results.stat_total_hits)
360
- results = EBSCO::EDS::Results.new(r)
368
+ results = EBSCO::EDS::Results.new(r, @config)
361
369
  next_previous_records = []
362
370
  unless result_prev.nil?
363
371
  next_previous_records << result_prev
@@ -380,7 +388,7 @@ module EBSCO
380
388
  }
381
389
  end
382
390
  r = empty_results(records.length)
383
- results = EBSCO::EDS::Results.new(r)
391
+ results = EBSCO::EDS::Results.new(r, @config)
384
392
  results.records = records
385
393
  results.to_solr
386
394
  end
@@ -818,7 +826,7 @@ module EBSCO
818
826
  else
819
827
  if !action.include?('SourceType:'+bad_source_type+')')
820
828
  # not a bad source type, keep it
821
- new_actions >> action
829
+ new_actions << action
822
830
  end
823
831
  end
824
832
  else
@@ -826,6 +834,31 @@ module EBSCO
826
834
  new_actions << action
827
835
  end
828
836
  }
837
+
838
+ new_filters = []
839
+ filter_id = 1
840
+ payload.SearchCriteria.FacetFilters.each { |filter|
841
+ filter['FacetValues'].each { |facet_val|
842
+ if facet_val['Id'] == 'SourceType'
843
+ if bad_source_type.nil?
844
+ # skip the source type since we don't know if it's bad or not
845
+ else
846
+ # not a bad sourcetype, add it
847
+ if !facet_val['Value'].include?(bad_source_type)
848
+ filter['FilterId'] = filter_id
849
+ filter_id += 1
850
+ new_filters << filter
851
+ end
852
+ end
853
+ else
854
+ # not a SourceType filter, add it
855
+ filter['FilterId'] = filter_id
856
+ filter_id += 1
857
+ new_filters << filter
858
+ end
859
+ }
860
+ }
861
+ payload.SearchCriteria.FacetFilters = new_filters
829
862
  payload.Actions = new_actions
830
863
  do_request(method, path: path, payload: payload, attempt: attempt+1)
831
864
  else
@@ -1,5 +1,5 @@
1
1
  module EBSCO
2
2
  module EDS
3
- VERSION = '0.3.14.pre'
3
+ VERSION = '0.3.15.pre'
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ebsco-eds
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.14.pre
4
+ version: 0.3.15.pre
5
5
  platform: ruby
6
6
  authors:
7
7
  - Bill McKinney
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2017-09-14 00:00:00.000000000 Z
12
+ date: 2017-09-18 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: faraday