blacklight_internet_archive 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/helpers/blacklight_url_helper.rb +24 -0
- data/lib/blacklight_internet_archive.rb +0 -3
- data/lib/blacklight_internet_archive/client.rb +35 -36
- data/lib/blacklight_internet_archive/entity_processor.rb +11 -122
- data/lib/blacklight_internet_archive/hash_with_response.rb +7 -5
- data/lib/blacklight_internet_archive/repository.rb +1 -0
- data/lib/blacklight_internet_archive/request.rb +2 -2
- data/lib/blacklight_internet_archive/response.rb +55 -58
- data/lib/blacklight_internet_archive/response_adapter.rb +6 -14
- data/lib/blacklight_internet_archive/sites_entity_processor.rb +131 -0
- data/lib/blacklight_internet_archive/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a5625555cccea17b68cd7aad8d0f89eee7f1771e
|
4
|
+
data.tar.gz: 36aa4cb5f8bd71e9680d177357a648f37af7581a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: baccecc09685c732a4b3cda62a0a70c261ccb6ddded8b0c82dedf513afff7dbd2ffd744abad3d108db0c9ed0a5471a40580a67ea89ba0e74d5a5b5ccbd2d2e77
|
7
|
+
data.tar.gz: adaa0c0cb7471631a0065346ae9ec8ae1e14fea82f5526ec966304d3e5283d205d632ce04f41c8c39a15be3d3e80659e4b6639ff32288d199b7035c08712f7bb
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module BlacklightUrlHelper
|
2
|
+
include Blacklight::UrlHelperBehavior
|
3
|
+
|
4
|
+
#link_to label, 'https://wayback.archive-it.org/1068/*/http://www.mahteso.org/', { data: { :'context-href' => 'https://wayback.archive-it.org/1068/*/http://www.mahteso.org/' } }
|
5
|
+
# link_to_document(doc, 'VIEW', :counter => 3)
|
6
|
+
# Use the catalog_path RESTful route to create a link to the show page for a specific item.
|
7
|
+
# catalog_path accepts a hash. The solr query params are stored in the session,
|
8
|
+
# so we only need the +counter+ param here. We also need to know if we are viewing to document as part of search results.
|
9
|
+
# TODO: move this to the IndexPresenter
|
10
|
+
def link_to_document(doc, field_or_opts = nil, opts={:counter => nil})
|
11
|
+
if field_or_opts.is_a? Hash
|
12
|
+
opts = field_or_opts
|
13
|
+
else
|
14
|
+
field = field_or_opts
|
15
|
+
end
|
16
|
+
|
17
|
+
field ||= document_show_link_field(doc)
|
18
|
+
label = index_presenter(doc).label field, opts
|
19
|
+
path = doc['allURL']
|
20
|
+
data_path = "#{path}?counter=#{opts[:counter]}"
|
21
|
+
link_to label, path, { data: { :'context-href' => path } }
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
@@ -1,7 +1,6 @@
|
|
1
1
|
require "blacklight_internet_archive/version"
|
2
2
|
|
3
3
|
module BlacklightInternetArchive
|
4
|
-
|
5
4
|
autoload :InternetArchive, 'blacklight_internet_archive/internet_archive'
|
6
5
|
autoload :Client, 'blacklight_internet_archive/client'
|
7
6
|
autoload :Repository, 'blacklight_internet_archive/repository'
|
@@ -18,6 +17,4 @@ module BlacklightInternetArchive
|
|
18
17
|
|
19
18
|
Client.new connection, opts
|
20
19
|
end
|
21
|
-
|
22
|
-
|
23
20
|
end
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'net/http'
|
4
|
+
require 'active_support/core_ext/hash'
|
4
5
|
|
5
6
|
module BlacklightInternetArchive
|
6
7
|
class Client
|
@@ -22,14 +23,14 @@ module BlacklightInternetArchive
|
|
22
23
|
def execute(request_context)
|
23
24
|
uri_string = request_context[:params][:uri]
|
24
25
|
uri = URI.parse(uri_string)
|
26
|
+
search_type = request_context[:params][:controller]
|
25
27
|
|
26
28
|
res = Net::HTTP.get_response(uri)
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
end
|
29
|
+
return unless res.is_a?(Net::HTTPSuccess)
|
30
|
+
res_data = res.read_body
|
31
|
+
return if res_data.nil? || res_data.empty?
|
32
|
+
res_data_mod = BlacklightInternetArchive::ResponseAdapter.adapt_response(res_data, @connection_url, search_type)
|
33
|
+
BlacklightInternetArchive::HashWithResponse.new(request_context, res, res_data_mod)
|
33
34
|
end
|
34
35
|
|
35
36
|
# +build_request+ accepts a path and options hash
|
@@ -38,33 +39,43 @@ module BlacklightInternetArchive
|
|
38
39
|
# :uri
|
39
40
|
# :path
|
40
41
|
# :query
|
42
|
+
# :rows and :start
|
43
|
+
|
41
44
|
|
42
45
|
def build_request(path, opts)
|
43
46
|
raise "path must be a string or symbol, not #{path.inspect}" unless [String, Symbol].include?(path.class)
|
44
|
-
path = "#{path}.json"
|
47
|
+
opts[:path] = "#{path}.json"
|
48
|
+
query_opts = construct_query_options(opts)
|
49
|
+
opts[:start] = calculate_start(query_opts)
|
50
|
+
facet_string = construct_facet_string(opts)
|
51
|
+
query = query_opts.to_query
|
52
|
+
query = "#{query}&#{facet_string}" if facet_string
|
53
|
+
opts[:query] = query
|
54
|
+
opts[:uri] = opts[:path].to_s + (query ? "?#{query}" : '')
|
55
|
+
opts[:rows] = 10 if opts[:rows].nil?
|
56
|
+
{ params: opts }
|
57
|
+
end
|
45
58
|
|
46
|
-
|
59
|
+
def construct_query_options(opts)
|
47
60
|
query_opts = {}
|
48
61
|
query_opts['pageSize'] = '10'
|
49
|
-
if opts['rows']
|
50
|
-
|
51
|
-
|
52
|
-
query_opts['pageSize'] = '10'
|
53
|
-
end
|
54
|
-
if opts['page']
|
55
|
-
query_opts['page'] = opts['page']
|
56
|
-
else
|
57
|
-
query_opts['page'] = '1'
|
58
|
-
end
|
59
|
-
if query_opts['page'].to_i < 2
|
60
|
-
opts[:start] = 0
|
61
|
-
else
|
62
|
-
opts[:start] = ((query_opts['page'].to_i - 1) * query_opts['pageSize'].to_i)
|
63
|
-
end
|
64
|
-
|
62
|
+
query_opts['pageSize'] = opts['rows'] if opts['rows']
|
63
|
+
query_opts['page'] = '1'
|
64
|
+
query_opts['page'] = opts['page'] if opts['page']
|
65
65
|
query_opts['q'] = ''
|
66
66
|
query_opts['q'] = CGI.escape(opts['q']) if opts['q']
|
67
|
+
query_opts
|
68
|
+
end
|
67
69
|
|
70
|
+
def calculate_start(query_opts)
|
71
|
+
start = 0
|
72
|
+
if query_opts['page'].to_i >= 2
|
73
|
+
start = ((query_opts['page'].to_i - 1) * query_opts['pageSize'].to_i)
|
74
|
+
end
|
75
|
+
start
|
76
|
+
end
|
77
|
+
|
78
|
+
def construct_facet_string(opts)
|
68
79
|
facet_string = ''
|
69
80
|
if opts['f']
|
70
81
|
opts['f'].each do |k, v|
|
@@ -74,19 +85,7 @@ module BlacklightInternetArchive
|
|
74
85
|
end
|
75
86
|
end
|
76
87
|
end
|
77
|
-
|
78
88
|
facet_string = facet_string.tr(' ', '+').chomp('&')
|
79
|
-
query = query_opts.to_query
|
80
|
-
query = "#{query}&#{facet_string}" if facet_string
|
81
|
-
|
82
|
-
opts[:query] = query
|
83
|
-
opts[:uri] = path.to_s + (query ? "?#{query}" : '')
|
84
|
-
|
85
|
-
opts[:rows] = 10 if opts[:rows].nil?
|
86
|
-
|
87
|
-
opts[:start] = 0 if opts[:start].nil?
|
88
|
-
|
89
|
-
{ :params => opts }
|
90
89
|
end
|
91
90
|
end
|
92
91
|
end
|
@@ -1,135 +1,24 @@
|
|
1
1
|
require 'active_support/core_ext/string/output_safety'
|
2
2
|
require 'cgi'
|
3
|
+
require_relative 'sites_entity_processor'
|
3
4
|
|
4
5
|
module BlacklightInternetArchive
|
5
6
|
# extract and convert individual results from response
|
6
7
|
class EntityProcessor
|
7
|
-
@metadata_fields = %w[meta_Creator meta_Coverage meta_Subject meta_Language meta_Collector meta_Title]
|
8
|
-
@date_fields = %w[firstCapture lastCapture]
|
9
|
-
@linkable_fields = { 'meta_Title' => 'allURL', 'url' => 'allURL', 'numCaptures' => 'allURL',
|
10
|
-
'numVideos' => 'seedVideosUrl' }
|
11
8
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
entities_clone[entities.index(entity)] = reformat_entity(entity, response_json['results']['searchedFacets'], base_url)
|
9
|
+
def self.get_processor(search_type = 'catalog')
|
10
|
+
if search_type == 'search_pages'
|
11
|
+
# return ArchivedPagesEntityProcessor.new
|
12
|
+
elsif search_type == 'search_videos'
|
13
|
+
# return SeedVideosEntityProcessor.new
|
14
|
+
else
|
15
|
+
return SitesEntityProcessor.new
|
20
16
|
end
|
21
|
-
|
22
|
-
entities_clone
|
23
|
-
end
|
24
|
-
|
25
|
-
def self.reformat_entity(entity, searched_facets, base_url)
|
26
|
-
entity_clone = entity.clone
|
27
|
-
entity.each do |entity_key, entity_val|
|
28
|
-
if entity_key == 'metadata'
|
29
|
-
entity_clone = facet_link_metadata(entity_val, entity_clone, searched_facets)
|
30
|
-
end
|
31
|
-
entity_clone = set_date_fields(entity_clone, entity_key, entity_val)
|
32
|
-
entity_clone = set_linked_fields(entity_clone, base_url)
|
33
|
-
end
|
34
|
-
# this field is not under the metadata node and not handled by process_entities
|
35
|
-
entity_clone['linked_websiteGroup'] = link_faceted_results_data('websiteGroup', [entity['websiteGroup']], searched_facets)
|
36
|
-
entity_clone
|
37
|
-
end
|
38
|
-
|
39
|
-
def self.facet_link_metadata(entval, ent_clone, facet_info)
|
40
|
-
@metadata_fields.each do |k|
|
41
|
-
if entval[k]
|
42
|
-
ent_clone[k] = entval[k].map(&:html_safe)
|
43
|
-
ent_clone["linked_#{k}"] = link_faceted_results_data(k, entval[k], facet_info)
|
44
|
-
end
|
45
|
-
end
|
46
|
-
ent_clone
|
47
|
-
end
|
48
|
-
|
49
|
-
def self.set_date_fields(e_clone, ent, entval)
|
50
|
-
@date_fields.each do |d|
|
51
|
-
next unless ent == d
|
52
|
-
new_key = "#{d}_date"
|
53
|
-
next unless entval['formattedDate']
|
54
|
-
formatted_date = entval['formattedDate']
|
55
|
-
date_url = entval['waybackUrl']
|
56
|
-
date_link = make_link(formatted_date, date_url)
|
57
|
-
e_clone[new_key] = formatted_date
|
58
|
-
e_clone["linked_#{new_key}"] = date_link.html_safe
|
59
|
-
end
|
60
|
-
e_clone
|
61
|
-
end
|
62
|
-
|
63
|
-
def self.set_linked_fields(e_clone, base_url)
|
64
|
-
@linkable_fields.each do |l, l_url|
|
65
|
-
val = e_clone[l]
|
66
|
-
val_url = e_clone[l_url]
|
67
|
-
if val_url.start_with?('?')
|
68
|
-
val_url = "#{base_url}#{val_url}"
|
69
|
-
end
|
70
|
-
linked_val = make_link(val, val_url)
|
71
|
-
e_clone["linked_#{l}"] = linked_val
|
72
|
-
end
|
73
|
-
e_clone
|
74
|
-
end
|
17
|
+
end
|
75
18
|
|
76
|
-
def self.link_faceted_results_data(meta_field, meta_val, searched_facets)
|
77
|
-
link_facets = []
|
78
|
-
meta_val.each do |mv|
|
79
|
-
searched_facets.each do |sf|
|
80
|
-
next unless sf['id'] == meta_field
|
81
|
-
sf['results'].each do |ra|
|
82
|
-
if ra['name'] == mv
|
83
|
-
link_facets << make_link(ra['name'], convert_ia_facet_url(ra['addFacetURL']))
|
84
|
-
end
|
85
|
-
end
|
86
|
-
end
|
87
|
-
end
|
88
|
-
link_facets.map(&:html_safe)
|
89
|
-
end
|
90
|
-
|
91
|
-
# translate ia facet url into blacklight facet syntax
|
92
|
-
def self.convert_ia_facet_url(ia_facet_url)
|
93
|
-
ifu_hash = CGI.parse(ia_facet_url.tr('?', ''))
|
94
|
-
url_arrays = prepare_url_params(ifu_hash, [], [])
|
95
|
-
compose_url(url_arrays)
|
96
|
-
end
|
97
|
-
|
98
|
-
def self.prepare_url_params(facet_hash, facet_url_arr, new_url_arr)
|
99
|
-
facet_hash.each do |k, v|
|
100
|
-
if k == 'fc'
|
101
|
-
v.each do |v_fc|
|
102
|
-
facet_url_arr << convert_ia_facet_url_param(v_fc)
|
103
|
-
end
|
104
|
-
else
|
105
|
-
new_url_arr << "#{k}=#{v[0]}"
|
106
|
-
end
|
107
|
-
end
|
108
|
-
[new_url_arr, facet_url_arr]
|
109
|
-
end
|
110
|
-
|
111
|
-
def self.compose_url(url_arrays)
|
112
|
-
new_url = ''
|
113
|
-
url_arrays[0].each do |param_string|
|
114
|
-
new_url = if new_url == ''
|
115
|
-
"#{param_string}&"
|
116
|
-
else
|
117
|
-
"#{new_url}&#{param_string}&"
|
118
|
-
end
|
119
|
-
end
|
120
|
-
url_arrays[1].each do |fps|
|
121
|
-
new_url = "#{new_url}#{fps}&"
|
122
|
-
end
|
123
|
-
"?#{new_url.chomp('&')}"
|
124
|
-
end
|
125
|
-
|
126
|
-
def self.convert_ia_facet_url_param(value)
|
127
|
-
ifu_arr = value.split(':')
|
128
|
-
"f[#{ifu_arr[0]}][]=#{ifu_arr[1]}"
|
129
|
-
end
|
130
19
|
|
131
|
-
def
|
132
|
-
|
20
|
+
def run(response_json, base_url)
|
21
|
+
raise NotImplementedError
|
133
22
|
end
|
134
23
|
end
|
135
24
|
end
|
@@ -1,9 +1,11 @@
|
|
1
1
|
# make response blacklight-ready
|
2
|
-
|
3
|
-
|
2
|
+
module BlacklightInternetArchive
|
3
|
+
class HashWithResponse < Hash
|
4
|
+
include BlacklightInternetArchive::Response
|
4
5
|
|
5
|
-
|
6
|
-
|
7
|
-
|
6
|
+
def initialize(request, response, result)
|
7
|
+
super()
|
8
|
+
initialize_response(request, response, result || {})
|
9
|
+
end
|
8
10
|
end
|
9
11
|
end
|
@@ -2,8 +2,8 @@
|
|
2
2
|
|
3
3
|
module BlacklightInternetArchive
|
4
4
|
class Request < ActiveSupport::HashWithIndifferentAccess
|
5
|
-
SINGULAR_KEYS = %w
|
6
|
-
ARRAY_KEYS = %w
|
5
|
+
SINGULAR_KEYS = %w[facet fl q qt rows start spellcheck spellcheck.q sort per_page wt hl group defType].freeze
|
6
|
+
ARRAY_KEYS = %w[facet.field facet.query facet.pivot fq hl.fl].freeze
|
7
7
|
|
8
8
|
def initialize(constructor = {})
|
9
9
|
if constructor.is_a?(Hash)
|
@@ -1,30 +1,29 @@
|
|
1
|
-
module BlacklightInternetArchive
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
1
|
+
module BlacklightInternetArchive
|
2
|
+
module Response
|
3
|
+
def self.included(base)
|
4
|
+
unless base < Hash
|
5
|
+
raise ArgumentError, "InternetArchive::Response expects to included only in (sub)classes of Hash; got included in '#{base}' instead."
|
6
|
+
end
|
7
|
+
base.send(:attr_reader, :request, :response)
|
6
8
|
end
|
7
|
-
base.send(:attr_reader, :request, :response)
|
8
|
-
end
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
10
|
+
def initialize_response(request, response, result)
|
11
|
+
@request = request
|
12
|
+
@response = response
|
13
|
+
merge!(result)
|
14
|
+
if self['response'] && self['response']['docs'].is_a?(::Array)
|
15
|
+
docs = PaginatedDocSet.new(self['response']['docs'])
|
16
|
+
docs.per_page = request[:params]['rows']
|
17
|
+
docs.page_start = request[:params]['start']
|
18
|
+
docs.total = self['response']['numFound'].to_s.to_i
|
19
|
+
self['response']['docs'] = docs
|
20
|
+
end
|
20
21
|
end
|
21
|
-
end
|
22
22
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
if defined?(ActiveSupport::HashWithIndifferentAccess)
|
23
|
+
def with_indifferent_access
|
24
|
+
if defined?(::BlacklightInternetArchive::HashWithIndifferentAccessWithResponse)
|
25
|
+
::BlacklightInternetArchive::HashWithIndifferentAccessWithResponse.new(request, response, self)
|
26
|
+
elsif defined?(ActiveSupport::HashWithIndifferentAccess)
|
28
27
|
BlacklightInternetArchive.const_set('HashWithIndifferentAccessWithResponse', Class.new(ActiveSupport::HashWithIndifferentAccess))
|
29
28
|
BlacklightInternetArchive::HashWithIndifferentAccessWithResponse.class_eval <<-eos
|
30
29
|
include BlacklightInternetArchive::Response
|
@@ -35,50 +34,48 @@ module BlacklightInternetArchive::Response
|
|
35
34
|
end
|
36
35
|
eos
|
37
36
|
::BlacklightInternetArchive::HashWithIndifferentAccessWithResponse.new(request, response, self)
|
38
|
-
else
|
39
|
-
raise RuntimeError, 'HashWithIndifferentAccess is not currently defined'
|
40
37
|
end
|
41
38
|
end
|
42
|
-
end
|
43
39
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
40
|
+
# A response module which gets mixed into the ['response']['docs'] array.
|
41
|
+
class PaginatedDocSet < ::Array
|
42
|
+
attr_accessor :page_start, :per_page, :page_total
|
43
|
+
unless (Object.const_defined?('RUBY_ENGINE') && Object::RUBY_ENGINE == 'rbx')
|
44
|
+
alias start page_start
|
45
|
+
alias start= page_start=
|
46
|
+
alias total page_total
|
47
|
+
alias total= page_total=
|
48
|
+
end
|
53
49
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
50
|
+
# Returns the current page calculated from 'rows' and 'start'
|
51
|
+
def current_page
|
52
|
+
return 1 if start < 1
|
53
|
+
per_page_normalized = per_page < 1 ? 1 : per_page
|
54
|
+
@current_page ||= (start / per_page_normalized).ceil + 1
|
55
|
+
end
|
60
56
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
57
|
+
# Calcuates the total pages from 'numFound' and 'rows'
|
58
|
+
def total_pages
|
59
|
+
@total_pages ||= per_page > 0 ? (total / per_page.to_f).ceil : 1
|
60
|
+
end
|
65
61
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
62
|
+
# returns the previous page number or 1
|
63
|
+
def previous_page
|
64
|
+
@previous_page ||= current_page > 1 ? current_page - 1 : 1
|
65
|
+
end
|
70
66
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
67
|
+
# returns the next page number or the last
|
68
|
+
def next_page
|
69
|
+
@next_page ||= current_page == total_pages ? total_pages : current_page + 1
|
70
|
+
end
|
75
71
|
|
76
|
-
|
77
|
-
|
78
|
-
|
72
|
+
def has_next?
|
73
|
+
current_page < total_pages
|
74
|
+
end
|
79
75
|
|
80
|
-
|
81
|
-
|
76
|
+
def has_previous?
|
77
|
+
current_page > 1
|
78
|
+
end
|
82
79
|
end
|
83
80
|
end
|
84
81
|
end
|
@@ -1,20 +1,15 @@
|
|
1
1
|
require 'json'
|
2
|
+
require_relative 'sites_entity_processor'
|
2
3
|
|
3
4
|
module BlacklightInternetArchive
|
5
|
+
|
4
6
|
class ResponseAdapter
|
5
|
-
|
7
|
+
|
8
|
+
def self.adapt_response(response_body, base_url, search_type)
|
6
9
|
response_body_string = convert_highlighting(response_body.to_s)
|
7
10
|
res_data_json = JSON.parse(response_body_string)
|
8
|
-
|
9
|
-
|
10
|
-
# processed_entities = process_entities(entities, res_data_json['results']['searchedFacets'], base_url)
|
11
|
-
# response_docs = { 'response' => { 'docs' => processed_entities } }
|
12
|
-
|
13
|
-
# response_docs = EntityProcessor.run(res_data_json, base_url)
|
14
|
-
response_docs = { 'response' => { 'docs' => EntityProcessor.run(res_data_json, base_url) } }
|
15
|
-
|
16
|
-
|
17
|
-
|
11
|
+
processor = EntityProcessor.get_processor(search_type)
|
12
|
+
response_docs = { 'response' => { 'docs' => processor.run(res_data_json, base_url) } }
|
18
13
|
response_docs.merge!('facet_counts' => { 'facet_queries' => {},
|
19
14
|
'facet_fields' => reformat_facets(res_data_json), 'facet_dates' => {} })
|
20
15
|
set_paging_stats(response_docs, res_data_json)
|
@@ -27,7 +22,6 @@ module BlacklightInternetArchive
|
|
27
22
|
response_string
|
28
23
|
end
|
29
24
|
|
30
|
-
|
31
25
|
def self.set_paging_stats(response_docs, res_data_json)
|
32
26
|
response_docs['response']['numFound'] = res_data_json['results']['totalResultCount']
|
33
27
|
response_docs['response']['page'] = res_data_json['pageParams']['page']
|
@@ -35,7 +29,6 @@ module BlacklightInternetArchive
|
|
35
29
|
response_docs
|
36
30
|
end
|
37
31
|
|
38
|
-
|
39
32
|
def self.reformat_facets(response_json)
|
40
33
|
facets_hash = {}
|
41
34
|
facets = response_json['results']['searchedFacets']
|
@@ -54,6 +47,5 @@ module BlacklightInternetArchive
|
|
54
47
|
end
|
55
48
|
new_item_arr
|
56
49
|
end
|
57
|
-
|
58
50
|
end
|
59
51
|
end
|
@@ -0,0 +1,131 @@
|
|
1
|
+
require 'active_support/core_ext/string/output_safety'
|
2
|
+
require 'cgi'
|
3
|
+
require_relative 'entity_processor'
|
4
|
+
|
5
|
+
module BlacklightInternetArchive
|
6
|
+
# extract and convert individual results from response
|
7
|
+
|
8
|
+
class SitesEntityProcessor < EntityProcessor
|
9
|
+
|
10
|
+
@metadata_fields
|
11
|
+
@linkable_fields
|
12
|
+
|
13
|
+
def initialize
|
14
|
+
|
15
|
+
@metadata_fields = %w[meta_Creator meta_Coverage meta_Subject meta_Language meta_Collector meta_Title]
|
16
|
+
@linkable_fields = { 'meta_Title' => 'allURL', 'url' => 'allURL', 'numCaptures' => 'allURL',
|
17
|
+
'numVideos' => 'seedVideosUrl' }
|
18
|
+
end
|
19
|
+
|
20
|
+
def run(response_json, base_url)
|
21
|
+
raise ArgumentError 'No entities in response.' unless response_json['results']['entities']
|
22
|
+
raise ArgumentError 'Base url required.' unless base_url
|
23
|
+
|
24
|
+
entities = response_json['results']['entities']
|
25
|
+
entities_clone = entities.clone
|
26
|
+
entities.each do |entity|
|
27
|
+
next unless entity['isSeed']
|
28
|
+
entities_clone[entities.index(entity)] = reformat_entity(entity, response_json['results']['searchedFacets'], base_url)
|
29
|
+
end
|
30
|
+
entities_clone
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
def reformat_entity(entity, searched_facets, base_url)
|
35
|
+
entity_clone = entity.clone
|
36
|
+
entity.each do |entity_key, entity_val|
|
37
|
+
if entity_key == 'metadata'
|
38
|
+
entity_clone = facet_link_metadata(entity_val, entity_clone, searched_facets)
|
39
|
+
end
|
40
|
+
entity_clone = set_linked_fields(entity_clone, base_url)
|
41
|
+
end
|
42
|
+
# these fields are not under the metadata node and not handled by process_entities
|
43
|
+
entity_clone['linked_websiteGroup'] = link_faceted_results_data('websiteGroup', [entity['websiteGroup']], searched_facets)
|
44
|
+
val_url = entity_clone['allURL']
|
45
|
+
val_url = "#{base_url}#{val_url}" if val_url.start_with?('?')
|
46
|
+
entity_clone['linked_Captures'] = linked_val = make_link('View Site Captures on Wayback.org', val_url)
|
47
|
+
entity_clone
|
48
|
+
end
|
49
|
+
|
50
|
+
def facet_link_metadata(entval, ent_clone, facet_info)
|
51
|
+
@metadata_fields.each do |k|
|
52
|
+
if entval[k]
|
53
|
+
ent_clone[k] = entval[k].map(&:html_safe)
|
54
|
+
ent_clone["linked_#{k}"] = link_faceted_results_data(k, entval[k], facet_info)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
ent_clone
|
58
|
+
end
|
59
|
+
|
60
|
+
def set_linked_fields(e_clone, base_url)
|
61
|
+
@linkable_fields.each do |l, l_url|
|
62
|
+
val = e_clone[l]
|
63
|
+
val_url = e_clone[l_url]
|
64
|
+
val_url = "#{base_url}#{val_url}" if val_url.start_with?('?')
|
65
|
+
linked_val = make_link(val, val_url)
|
66
|
+
e_clone["linked_#{l}"] = linked_val
|
67
|
+
end
|
68
|
+
e_clone
|
69
|
+
end
|
70
|
+
|
71
|
+
def link_faceted_results_data(meta_field, meta_val, searched_facets)
|
72
|
+
link_facets = []
|
73
|
+
meta_val.each do |mv|
|
74
|
+
searched_facets.each do |sf|
|
75
|
+
next unless sf['id'] == meta_field
|
76
|
+
sf['results'].each do |ra|
|
77
|
+
if ra['name'] == mv
|
78
|
+
link_facets << make_link(ra['name'], convert_ia_facet_url(ra['addFacetURL']))
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
link_facets.map(&:html_safe)
|
84
|
+
end
|
85
|
+
|
86
|
+
# translate ia facet url into blacklight facet syntax
|
87
|
+
def convert_ia_facet_url(ia_facet_url)
|
88
|
+
ifu_hash = CGI.parse(ia_facet_url.tr('?', ''))
|
89
|
+
url_arrays = prepare_url_params(ifu_hash, [], [])
|
90
|
+
compose_url(url_arrays)
|
91
|
+
end
|
92
|
+
|
93
|
+
def prepare_url_params(facet_hash, facet_url_arr, new_url_arr)
|
94
|
+
facet_hash.each do |k, v|
|
95
|
+
if k == 'fc'
|
96
|
+
v.each do |v_fc|
|
97
|
+
facet_url_arr << convert_ia_facet_url_param(v_fc)
|
98
|
+
end
|
99
|
+
else
|
100
|
+
new_url_arr << "#{k}=#{v[0]}"
|
101
|
+
end
|
102
|
+
end
|
103
|
+
[new_url_arr, facet_url_arr]
|
104
|
+
end
|
105
|
+
|
106
|
+
def compose_url(url_arrays)
|
107
|
+
new_url = ''
|
108
|
+
url_arrays[0].each do |param_string|
|
109
|
+
new_url = if new_url == ''
|
110
|
+
"#{param_string}&"
|
111
|
+
else
|
112
|
+
"#{new_url}&#{param_string}&"
|
113
|
+
end
|
114
|
+
end
|
115
|
+
url_arrays[1].each do |fps|
|
116
|
+
new_url = "#{new_url}#{fps}&"
|
117
|
+
end
|
118
|
+
"?#{new_url.chomp('&')}"
|
119
|
+
end
|
120
|
+
|
121
|
+
def convert_ia_facet_url_param(value)
|
122
|
+
ifu_arr = value.split(':')
|
123
|
+
"f[#{ifu_arr[0]}][]=#{ifu_arr[1]}"
|
124
|
+
end
|
125
|
+
|
126
|
+
def make_link(value, url)
|
127
|
+
"<a href=\"#{url}\">#{value}</a>".html_safe
|
128
|
+
end
|
129
|
+
|
130
|
+
end
|
131
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: blacklight_internet_archive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- jd2148
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-12-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -115,6 +115,7 @@ files:
|
|
115
115
|
- LICENSE.txt
|
116
116
|
- README.md
|
117
117
|
- Rakefile
|
118
|
+
- app/helpers/blacklight_url_helper.rb
|
118
119
|
- blacklight_internet_archive.gemspec
|
119
120
|
- lib/blacklight_internet_archive.rb
|
120
121
|
- lib/blacklight_internet_archive/blacklight_response.rb
|
@@ -126,6 +127,7 @@ files:
|
|
126
127
|
- lib/blacklight_internet_archive/request.rb
|
127
128
|
- lib/blacklight_internet_archive/response.rb
|
128
129
|
- lib/blacklight_internet_archive/response_adapter.rb
|
130
|
+
- lib/blacklight_internet_archive/sites_entity_processor.rb
|
129
131
|
- lib/blacklight_internet_archive/version.rb
|
130
132
|
homepage:
|
131
133
|
licenses:
|