blacklight_internet_archive 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/app/helpers/blacklight_url_helper.rb +24 -0
- data/lib/blacklight_internet_archive.rb +0 -3
- data/lib/blacklight_internet_archive/client.rb +35 -36
- data/lib/blacklight_internet_archive/entity_processor.rb +11 -122
- data/lib/blacklight_internet_archive/hash_with_response.rb +7 -5
- data/lib/blacklight_internet_archive/repository.rb +1 -0
- data/lib/blacklight_internet_archive/request.rb +2 -2
- data/lib/blacklight_internet_archive/response.rb +55 -58
- data/lib/blacklight_internet_archive/response_adapter.rb +6 -14
- data/lib/blacklight_internet_archive/sites_entity_processor.rb +131 -0
- data/lib/blacklight_internet_archive/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a5625555cccea17b68cd7aad8d0f89eee7f1771e
|
4
|
+
data.tar.gz: 36aa4cb5f8bd71e9680d177357a648f37af7581a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: baccecc09685c732a4b3cda62a0a70c261ccb6ddded8b0c82dedf513afff7dbd2ffd744abad3d108db0c9ed0a5471a40580a67ea89ba0e74d5a5b5ccbd2d2e77
|
7
|
+
data.tar.gz: adaa0c0cb7471631a0065346ae9ec8ae1e14fea82f5526ec966304d3e5283d205d632ce04f41c8c39a15be3d3e80659e4b6639ff32288d199b7035c08712f7bb
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module BlacklightUrlHelper
|
2
|
+
include Blacklight::UrlHelperBehavior
|
3
|
+
|
4
|
+
#link_to label, 'https://wayback.archive-it.org/1068/*/http://www.mahteso.org/', { data: { :'context-href' => 'https://wayback.archive-it.org/1068/*/http://www.mahteso.org/' } }
|
5
|
+
# link_to_document(doc, 'VIEW', :counter => 3)
|
6
|
+
# Use the catalog_path RESTful route to create a link to the show page for a specific item.
|
7
|
+
# catalog_path accepts a hash. The solr query params are stored in the session,
|
8
|
+
# so we only need the +counter+ param here. We also need to know if we are viewing to document as part of search results.
|
9
|
+
# TODO: move this to the IndexPresenter
|
10
|
+
def link_to_document(doc, field_or_opts = nil, opts={:counter => nil})
|
11
|
+
if field_or_opts.is_a? Hash
|
12
|
+
opts = field_or_opts
|
13
|
+
else
|
14
|
+
field = field_or_opts
|
15
|
+
end
|
16
|
+
|
17
|
+
field ||= document_show_link_field(doc)
|
18
|
+
label = index_presenter(doc).label field, opts
|
19
|
+
path = doc['allURL']
|
20
|
+
data_path = "#{path}?counter=#{opts[:counter]}"
|
21
|
+
link_to label, path, { data: { :'context-href' => path } }
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
@@ -1,7 +1,6 @@
|
|
1
1
|
require "blacklight_internet_archive/version"
|
2
2
|
|
3
3
|
module BlacklightInternetArchive
|
4
|
-
|
5
4
|
autoload :InternetArchive, 'blacklight_internet_archive/internet_archive'
|
6
5
|
autoload :Client, 'blacklight_internet_archive/client'
|
7
6
|
autoload :Repository, 'blacklight_internet_archive/repository'
|
@@ -18,6 +17,4 @@ module BlacklightInternetArchive
|
|
18
17
|
|
19
18
|
Client.new connection, opts
|
20
19
|
end
|
21
|
-
|
22
|
-
|
23
20
|
end
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'net/http'
|
4
|
+
require 'active_support/core_ext/hash'
|
4
5
|
|
5
6
|
module BlacklightInternetArchive
|
6
7
|
class Client
|
@@ -22,14 +23,14 @@ module BlacklightInternetArchive
|
|
22
23
|
def execute(request_context)
|
23
24
|
uri_string = request_context[:params][:uri]
|
24
25
|
uri = URI.parse(uri_string)
|
26
|
+
search_type = request_context[:params][:controller]
|
25
27
|
|
26
28
|
res = Net::HTTP.get_response(uri)
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
end
|
29
|
+
return unless res.is_a?(Net::HTTPSuccess)
|
30
|
+
res_data = res.read_body
|
31
|
+
return if res_data.nil? || res_data.empty?
|
32
|
+
res_data_mod = BlacklightInternetArchive::ResponseAdapter.adapt_response(res_data, @connection_url, search_type)
|
33
|
+
BlacklightInternetArchive::HashWithResponse.new(request_context, res, res_data_mod)
|
33
34
|
end
|
34
35
|
|
35
36
|
# +build_request+ accepts a path and options hash
|
@@ -38,33 +39,43 @@ module BlacklightInternetArchive
|
|
38
39
|
# :uri
|
39
40
|
# :path
|
40
41
|
# :query
|
42
|
+
# :rows and :start
|
43
|
+
|
41
44
|
|
42
45
|
def build_request(path, opts)
|
43
46
|
raise "path must be a string or symbol, not #{path.inspect}" unless [String, Symbol].include?(path.class)
|
44
|
-
path = "#{path}.json"
|
47
|
+
opts[:path] = "#{path}.json"
|
48
|
+
query_opts = construct_query_options(opts)
|
49
|
+
opts[:start] = calculate_start(query_opts)
|
50
|
+
facet_string = construct_facet_string(opts)
|
51
|
+
query = query_opts.to_query
|
52
|
+
query = "#{query}&#{facet_string}" if facet_string
|
53
|
+
opts[:query] = query
|
54
|
+
opts[:uri] = opts[:path].to_s + (query ? "?#{query}" : '')
|
55
|
+
opts[:rows] = 10 if opts[:rows].nil?
|
56
|
+
{ params: opts }
|
57
|
+
end
|
45
58
|
|
46
|
-
|
59
|
+
def construct_query_options(opts)
|
47
60
|
query_opts = {}
|
48
61
|
query_opts['pageSize'] = '10'
|
49
|
-
if opts['rows']
|
50
|
-
|
51
|
-
|
52
|
-
query_opts['pageSize'] = '10'
|
53
|
-
end
|
54
|
-
if opts['page']
|
55
|
-
query_opts['page'] = opts['page']
|
56
|
-
else
|
57
|
-
query_opts['page'] = '1'
|
58
|
-
end
|
59
|
-
if query_opts['page'].to_i < 2
|
60
|
-
opts[:start] = 0
|
61
|
-
else
|
62
|
-
opts[:start] = ((query_opts['page'].to_i - 1) * query_opts['pageSize'].to_i)
|
63
|
-
end
|
64
|
-
|
62
|
+
query_opts['pageSize'] = opts['rows'] if opts['rows']
|
63
|
+
query_opts['page'] = '1'
|
64
|
+
query_opts['page'] = opts['page'] if opts['page']
|
65
65
|
query_opts['q'] = ''
|
66
66
|
query_opts['q'] = CGI.escape(opts['q']) if opts['q']
|
67
|
+
query_opts
|
68
|
+
end
|
67
69
|
|
70
|
+
def calculate_start(query_opts)
|
71
|
+
start = 0
|
72
|
+
if query_opts['page'].to_i >= 2
|
73
|
+
start = ((query_opts['page'].to_i - 1) * query_opts['pageSize'].to_i)
|
74
|
+
end
|
75
|
+
start
|
76
|
+
end
|
77
|
+
|
78
|
+
def construct_facet_string(opts)
|
68
79
|
facet_string = ''
|
69
80
|
if opts['f']
|
70
81
|
opts['f'].each do |k, v|
|
@@ -74,19 +85,7 @@ module BlacklightInternetArchive
|
|
74
85
|
end
|
75
86
|
end
|
76
87
|
end
|
77
|
-
|
78
88
|
facet_string = facet_string.tr(' ', '+').chomp('&')
|
79
|
-
query = query_opts.to_query
|
80
|
-
query = "#{query}&#{facet_string}" if facet_string
|
81
|
-
|
82
|
-
opts[:query] = query
|
83
|
-
opts[:uri] = path.to_s + (query ? "?#{query}" : '')
|
84
|
-
|
85
|
-
opts[:rows] = 10 if opts[:rows].nil?
|
86
|
-
|
87
|
-
opts[:start] = 0 if opts[:start].nil?
|
88
|
-
|
89
|
-
{ :params => opts }
|
90
89
|
end
|
91
90
|
end
|
92
91
|
end
|
@@ -1,135 +1,24 @@
|
|
1
1
|
require 'active_support/core_ext/string/output_safety'
|
2
2
|
require 'cgi'
|
3
|
+
require_relative 'sites_entity_processor'
|
3
4
|
|
4
5
|
module BlacklightInternetArchive
|
5
6
|
# extract and convert individual results from response
|
6
7
|
class EntityProcessor
|
7
|
-
@metadata_fields = %w[meta_Creator meta_Coverage meta_Subject meta_Language meta_Collector meta_Title]
|
8
|
-
@date_fields = %w[firstCapture lastCapture]
|
9
|
-
@linkable_fields = { 'meta_Title' => 'allURL', 'url' => 'allURL', 'numCaptures' => 'allURL',
|
10
|
-
'numVideos' => 'seedVideosUrl' }
|
11
8
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
entities_clone[entities.index(entity)] = reformat_entity(entity, response_json['results']['searchedFacets'], base_url)
|
9
|
+
def self.get_processor(search_type = 'catalog')
|
10
|
+
if search_type == 'search_pages'
|
11
|
+
# return ArchivedPagesEntityProcessor.new
|
12
|
+
elsif search_type == 'search_videos'
|
13
|
+
# return SeedVideosEntityProcessor.new
|
14
|
+
else
|
15
|
+
return SitesEntityProcessor.new
|
20
16
|
end
|
21
|
-
|
22
|
-
entities_clone
|
23
|
-
end
|
24
|
-
|
25
|
-
def self.reformat_entity(entity, searched_facets, base_url)
|
26
|
-
entity_clone = entity.clone
|
27
|
-
entity.each do |entity_key, entity_val|
|
28
|
-
if entity_key == 'metadata'
|
29
|
-
entity_clone = facet_link_metadata(entity_val, entity_clone, searched_facets)
|
30
|
-
end
|
31
|
-
entity_clone = set_date_fields(entity_clone, entity_key, entity_val)
|
32
|
-
entity_clone = set_linked_fields(entity_clone, base_url)
|
33
|
-
end
|
34
|
-
# this field is not under the metadata node and not handled by process_entities
|
35
|
-
entity_clone['linked_websiteGroup'] = link_faceted_results_data('websiteGroup', [entity['websiteGroup']], searched_facets)
|
36
|
-
entity_clone
|
37
|
-
end
|
38
|
-
|
39
|
-
def self.facet_link_metadata(entval, ent_clone, facet_info)
|
40
|
-
@metadata_fields.each do |k|
|
41
|
-
if entval[k]
|
42
|
-
ent_clone[k] = entval[k].map(&:html_safe)
|
43
|
-
ent_clone["linked_#{k}"] = link_faceted_results_data(k, entval[k], facet_info)
|
44
|
-
end
|
45
|
-
end
|
46
|
-
ent_clone
|
47
|
-
end
|
48
|
-
|
49
|
-
def self.set_date_fields(e_clone, ent, entval)
|
50
|
-
@date_fields.each do |d|
|
51
|
-
next unless ent == d
|
52
|
-
new_key = "#{d}_date"
|
53
|
-
next unless entval['formattedDate']
|
54
|
-
formatted_date = entval['formattedDate']
|
55
|
-
date_url = entval['waybackUrl']
|
56
|
-
date_link = make_link(formatted_date, date_url)
|
57
|
-
e_clone[new_key] = formatted_date
|
58
|
-
e_clone["linked_#{new_key}"] = date_link.html_safe
|
59
|
-
end
|
60
|
-
e_clone
|
61
|
-
end
|
62
|
-
|
63
|
-
def self.set_linked_fields(e_clone, base_url)
|
64
|
-
@linkable_fields.each do |l, l_url|
|
65
|
-
val = e_clone[l]
|
66
|
-
val_url = e_clone[l_url]
|
67
|
-
if val_url.start_with?('?')
|
68
|
-
val_url = "#{base_url}#{val_url}"
|
69
|
-
end
|
70
|
-
linked_val = make_link(val, val_url)
|
71
|
-
e_clone["linked_#{l}"] = linked_val
|
72
|
-
end
|
73
|
-
e_clone
|
74
|
-
end
|
17
|
+
end
|
75
18
|
|
76
|
-
def self.link_faceted_results_data(meta_field, meta_val, searched_facets)
|
77
|
-
link_facets = []
|
78
|
-
meta_val.each do |mv|
|
79
|
-
searched_facets.each do |sf|
|
80
|
-
next unless sf['id'] == meta_field
|
81
|
-
sf['results'].each do |ra|
|
82
|
-
if ra['name'] == mv
|
83
|
-
link_facets << make_link(ra['name'], convert_ia_facet_url(ra['addFacetURL']))
|
84
|
-
end
|
85
|
-
end
|
86
|
-
end
|
87
|
-
end
|
88
|
-
link_facets.map(&:html_safe)
|
89
|
-
end
|
90
|
-
|
91
|
-
# translate ia facet url into blacklight facet syntax
|
92
|
-
def self.convert_ia_facet_url(ia_facet_url)
|
93
|
-
ifu_hash = CGI.parse(ia_facet_url.tr('?', ''))
|
94
|
-
url_arrays = prepare_url_params(ifu_hash, [], [])
|
95
|
-
compose_url(url_arrays)
|
96
|
-
end
|
97
|
-
|
98
|
-
def self.prepare_url_params(facet_hash, facet_url_arr, new_url_arr)
|
99
|
-
facet_hash.each do |k, v|
|
100
|
-
if k == 'fc'
|
101
|
-
v.each do |v_fc|
|
102
|
-
facet_url_arr << convert_ia_facet_url_param(v_fc)
|
103
|
-
end
|
104
|
-
else
|
105
|
-
new_url_arr << "#{k}=#{v[0]}"
|
106
|
-
end
|
107
|
-
end
|
108
|
-
[new_url_arr, facet_url_arr]
|
109
|
-
end
|
110
|
-
|
111
|
-
def self.compose_url(url_arrays)
|
112
|
-
new_url = ''
|
113
|
-
url_arrays[0].each do |param_string|
|
114
|
-
new_url = if new_url == ''
|
115
|
-
"#{param_string}&"
|
116
|
-
else
|
117
|
-
"#{new_url}&#{param_string}&"
|
118
|
-
end
|
119
|
-
end
|
120
|
-
url_arrays[1].each do |fps|
|
121
|
-
new_url = "#{new_url}#{fps}&"
|
122
|
-
end
|
123
|
-
"?#{new_url.chomp('&')}"
|
124
|
-
end
|
125
|
-
|
126
|
-
def self.convert_ia_facet_url_param(value)
|
127
|
-
ifu_arr = value.split(':')
|
128
|
-
"f[#{ifu_arr[0]}][]=#{ifu_arr[1]}"
|
129
|
-
end
|
130
19
|
|
131
|
-
def
|
132
|
-
|
20
|
+
def run(response_json, base_url)
|
21
|
+
raise NotImplementedError
|
133
22
|
end
|
134
23
|
end
|
135
24
|
end
|
@@ -1,9 +1,11 @@
|
|
1
1
|
# make response blacklight-ready
|
2
|
-
|
3
|
-
|
2
|
+
module BlacklightInternetArchive
|
3
|
+
class HashWithResponse < Hash
|
4
|
+
include BlacklightInternetArchive::Response
|
4
5
|
|
5
|
-
|
6
|
-
|
7
|
-
|
6
|
+
def initialize(request, response, result)
|
7
|
+
super()
|
8
|
+
initialize_response(request, response, result || {})
|
9
|
+
end
|
8
10
|
end
|
9
11
|
end
|
@@ -2,8 +2,8 @@
|
|
2
2
|
|
3
3
|
module BlacklightInternetArchive
|
4
4
|
class Request < ActiveSupport::HashWithIndifferentAccess
|
5
|
-
SINGULAR_KEYS = %w
|
6
|
-
ARRAY_KEYS = %w
|
5
|
+
SINGULAR_KEYS = %w[facet fl q qt rows start spellcheck spellcheck.q sort per_page wt hl group defType].freeze
|
6
|
+
ARRAY_KEYS = %w[facet.field facet.query facet.pivot fq hl.fl].freeze
|
7
7
|
|
8
8
|
def initialize(constructor = {})
|
9
9
|
if constructor.is_a?(Hash)
|
@@ -1,30 +1,29 @@
|
|
1
|
-
module BlacklightInternetArchive
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
1
|
+
module BlacklightInternetArchive
|
2
|
+
module Response
|
3
|
+
def self.included(base)
|
4
|
+
unless base < Hash
|
5
|
+
raise ArgumentError, "InternetArchive::Response expects to included only in (sub)classes of Hash; got included in '#{base}' instead."
|
6
|
+
end
|
7
|
+
base.send(:attr_reader, :request, :response)
|
6
8
|
end
|
7
|
-
base.send(:attr_reader, :request, :response)
|
8
|
-
end
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
10
|
+
def initialize_response(request, response, result)
|
11
|
+
@request = request
|
12
|
+
@response = response
|
13
|
+
merge!(result)
|
14
|
+
if self['response'] && self['response']['docs'].is_a?(::Array)
|
15
|
+
docs = PaginatedDocSet.new(self['response']['docs'])
|
16
|
+
docs.per_page = request[:params]['rows']
|
17
|
+
docs.page_start = request[:params]['start']
|
18
|
+
docs.total = self['response']['numFound'].to_s.to_i
|
19
|
+
self['response']['docs'] = docs
|
20
|
+
end
|
20
21
|
end
|
21
|
-
end
|
22
22
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
if defined?(ActiveSupport::HashWithIndifferentAccess)
|
23
|
+
def with_indifferent_access
|
24
|
+
if defined?(::BlacklightInternetArchive::HashWithIndifferentAccessWithResponse)
|
25
|
+
::BlacklightInternetArchive::HashWithIndifferentAccessWithResponse.new(request, response, self)
|
26
|
+
elsif defined?(ActiveSupport::HashWithIndifferentAccess)
|
28
27
|
BlacklightInternetArchive.const_set('HashWithIndifferentAccessWithResponse', Class.new(ActiveSupport::HashWithIndifferentAccess))
|
29
28
|
BlacklightInternetArchive::HashWithIndifferentAccessWithResponse.class_eval <<-eos
|
30
29
|
include BlacklightInternetArchive::Response
|
@@ -35,50 +34,48 @@ module BlacklightInternetArchive::Response
|
|
35
34
|
end
|
36
35
|
eos
|
37
36
|
::BlacklightInternetArchive::HashWithIndifferentAccessWithResponse.new(request, response, self)
|
38
|
-
else
|
39
|
-
raise RuntimeError, 'HashWithIndifferentAccess is not currently defined'
|
40
37
|
end
|
41
38
|
end
|
42
|
-
end
|
43
39
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
40
|
+
# A response module which gets mixed into the ['response']['docs'] array.
|
41
|
+
class PaginatedDocSet < ::Array
|
42
|
+
attr_accessor :page_start, :per_page, :page_total
|
43
|
+
unless (Object.const_defined?('RUBY_ENGINE') && Object::RUBY_ENGINE == 'rbx')
|
44
|
+
alias start page_start
|
45
|
+
alias start= page_start=
|
46
|
+
alias total page_total
|
47
|
+
alias total= page_total=
|
48
|
+
end
|
53
49
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
50
|
+
# Returns the current page calculated from 'rows' and 'start'
|
51
|
+
def current_page
|
52
|
+
return 1 if start < 1
|
53
|
+
per_page_normalized = per_page < 1 ? 1 : per_page
|
54
|
+
@current_page ||= (start / per_page_normalized).ceil + 1
|
55
|
+
end
|
60
56
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
57
|
+
# Calcuates the total pages from 'numFound' and 'rows'
|
58
|
+
def total_pages
|
59
|
+
@total_pages ||= per_page > 0 ? (total / per_page.to_f).ceil : 1
|
60
|
+
end
|
65
61
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
62
|
+
# returns the previous page number or 1
|
63
|
+
def previous_page
|
64
|
+
@previous_page ||= current_page > 1 ? current_page - 1 : 1
|
65
|
+
end
|
70
66
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
67
|
+
# returns the next page number or the last
|
68
|
+
def next_page
|
69
|
+
@next_page ||= current_page == total_pages ? total_pages : current_page + 1
|
70
|
+
end
|
75
71
|
|
76
|
-
|
77
|
-
|
78
|
-
|
72
|
+
def has_next?
|
73
|
+
current_page < total_pages
|
74
|
+
end
|
79
75
|
|
80
|
-
|
81
|
-
|
76
|
+
def has_previous?
|
77
|
+
current_page > 1
|
78
|
+
end
|
82
79
|
end
|
83
80
|
end
|
84
81
|
end
|
@@ -1,20 +1,15 @@
|
|
1
1
|
require 'json'
|
2
|
+
require_relative 'sites_entity_processor'
|
2
3
|
|
3
4
|
module BlacklightInternetArchive
|
5
|
+
|
4
6
|
class ResponseAdapter
|
5
|
-
|
7
|
+
|
8
|
+
def self.adapt_response(response_body, base_url, search_type)
|
6
9
|
response_body_string = convert_highlighting(response_body.to_s)
|
7
10
|
res_data_json = JSON.parse(response_body_string)
|
8
|
-
|
9
|
-
|
10
|
-
# processed_entities = process_entities(entities, res_data_json['results']['searchedFacets'], base_url)
|
11
|
-
# response_docs = { 'response' => { 'docs' => processed_entities } }
|
12
|
-
|
13
|
-
# response_docs = EntityProcessor.run(res_data_json, base_url)
|
14
|
-
response_docs = { 'response' => { 'docs' => EntityProcessor.run(res_data_json, base_url) } }
|
15
|
-
|
16
|
-
|
17
|
-
|
11
|
+
processor = EntityProcessor.get_processor(search_type)
|
12
|
+
response_docs = { 'response' => { 'docs' => processor.run(res_data_json, base_url) } }
|
18
13
|
response_docs.merge!('facet_counts' => { 'facet_queries' => {},
|
19
14
|
'facet_fields' => reformat_facets(res_data_json), 'facet_dates' => {} })
|
20
15
|
set_paging_stats(response_docs, res_data_json)
|
@@ -27,7 +22,6 @@ module BlacklightInternetArchive
|
|
27
22
|
response_string
|
28
23
|
end
|
29
24
|
|
30
|
-
|
31
25
|
def self.set_paging_stats(response_docs, res_data_json)
|
32
26
|
response_docs['response']['numFound'] = res_data_json['results']['totalResultCount']
|
33
27
|
response_docs['response']['page'] = res_data_json['pageParams']['page']
|
@@ -35,7 +29,6 @@ module BlacklightInternetArchive
|
|
35
29
|
response_docs
|
36
30
|
end
|
37
31
|
|
38
|
-
|
39
32
|
def self.reformat_facets(response_json)
|
40
33
|
facets_hash = {}
|
41
34
|
facets = response_json['results']['searchedFacets']
|
@@ -54,6 +47,5 @@ module BlacklightInternetArchive
|
|
54
47
|
end
|
55
48
|
new_item_arr
|
56
49
|
end
|
57
|
-
|
58
50
|
end
|
59
51
|
end
|
@@ -0,0 +1,131 @@
|
|
1
|
+
require 'active_support/core_ext/string/output_safety'
|
2
|
+
require 'cgi'
|
3
|
+
require_relative 'entity_processor'
|
4
|
+
|
5
|
+
module BlacklightInternetArchive
|
6
|
+
# extract and convert individual results from response
|
7
|
+
|
8
|
+
class SitesEntityProcessor < EntityProcessor
|
9
|
+
|
10
|
+
@metadata_fields
|
11
|
+
@linkable_fields
|
12
|
+
|
13
|
+
def initialize
|
14
|
+
|
15
|
+
@metadata_fields = %w[meta_Creator meta_Coverage meta_Subject meta_Language meta_Collector meta_Title]
|
16
|
+
@linkable_fields = { 'meta_Title' => 'allURL', 'url' => 'allURL', 'numCaptures' => 'allURL',
|
17
|
+
'numVideos' => 'seedVideosUrl' }
|
18
|
+
end
|
19
|
+
|
20
|
+
def run(response_json, base_url)
|
21
|
+
raise ArgumentError 'No entities in response.' unless response_json['results']['entities']
|
22
|
+
raise ArgumentError 'Base url required.' unless base_url
|
23
|
+
|
24
|
+
entities = response_json['results']['entities']
|
25
|
+
entities_clone = entities.clone
|
26
|
+
entities.each do |entity|
|
27
|
+
next unless entity['isSeed']
|
28
|
+
entities_clone[entities.index(entity)] = reformat_entity(entity, response_json['results']['searchedFacets'], base_url)
|
29
|
+
end
|
30
|
+
entities_clone
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
def reformat_entity(entity, searched_facets, base_url)
|
35
|
+
entity_clone = entity.clone
|
36
|
+
entity.each do |entity_key, entity_val|
|
37
|
+
if entity_key == 'metadata'
|
38
|
+
entity_clone = facet_link_metadata(entity_val, entity_clone, searched_facets)
|
39
|
+
end
|
40
|
+
entity_clone = set_linked_fields(entity_clone, base_url)
|
41
|
+
end
|
42
|
+
# these fields are not under the metadata node and not handled by process_entities
|
43
|
+
entity_clone['linked_websiteGroup'] = link_faceted_results_data('websiteGroup', [entity['websiteGroup']], searched_facets)
|
44
|
+
val_url = entity_clone['allURL']
|
45
|
+
val_url = "#{base_url}#{val_url}" if val_url.start_with?('?')
|
46
|
+
entity_clone['linked_Captures'] = linked_val = make_link('View Site Captures on Wayback.org', val_url)
|
47
|
+
entity_clone
|
48
|
+
end
|
49
|
+
|
50
|
+
def facet_link_metadata(entval, ent_clone, facet_info)
|
51
|
+
@metadata_fields.each do |k|
|
52
|
+
if entval[k]
|
53
|
+
ent_clone[k] = entval[k].map(&:html_safe)
|
54
|
+
ent_clone["linked_#{k}"] = link_faceted_results_data(k, entval[k], facet_info)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
ent_clone
|
58
|
+
end
|
59
|
+
|
60
|
+
def set_linked_fields(e_clone, base_url)
|
61
|
+
@linkable_fields.each do |l, l_url|
|
62
|
+
val = e_clone[l]
|
63
|
+
val_url = e_clone[l_url]
|
64
|
+
val_url = "#{base_url}#{val_url}" if val_url.start_with?('?')
|
65
|
+
linked_val = make_link(val, val_url)
|
66
|
+
e_clone["linked_#{l}"] = linked_val
|
67
|
+
end
|
68
|
+
e_clone
|
69
|
+
end
|
70
|
+
|
71
|
+
def link_faceted_results_data(meta_field, meta_val, searched_facets)
|
72
|
+
link_facets = []
|
73
|
+
meta_val.each do |mv|
|
74
|
+
searched_facets.each do |sf|
|
75
|
+
next unless sf['id'] == meta_field
|
76
|
+
sf['results'].each do |ra|
|
77
|
+
if ra['name'] == mv
|
78
|
+
link_facets << make_link(ra['name'], convert_ia_facet_url(ra['addFacetURL']))
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
link_facets.map(&:html_safe)
|
84
|
+
end
|
85
|
+
|
86
|
+
# translate ia facet url into blacklight facet syntax
|
87
|
+
def convert_ia_facet_url(ia_facet_url)
|
88
|
+
ifu_hash = CGI.parse(ia_facet_url.tr('?', ''))
|
89
|
+
url_arrays = prepare_url_params(ifu_hash, [], [])
|
90
|
+
compose_url(url_arrays)
|
91
|
+
end
|
92
|
+
|
93
|
+
def prepare_url_params(facet_hash, facet_url_arr, new_url_arr)
|
94
|
+
facet_hash.each do |k, v|
|
95
|
+
if k == 'fc'
|
96
|
+
v.each do |v_fc|
|
97
|
+
facet_url_arr << convert_ia_facet_url_param(v_fc)
|
98
|
+
end
|
99
|
+
else
|
100
|
+
new_url_arr << "#{k}=#{v[0]}"
|
101
|
+
end
|
102
|
+
end
|
103
|
+
[new_url_arr, facet_url_arr]
|
104
|
+
end
|
105
|
+
|
106
|
+
def compose_url(url_arrays)
|
107
|
+
new_url = ''
|
108
|
+
url_arrays[0].each do |param_string|
|
109
|
+
new_url = if new_url == ''
|
110
|
+
"#{param_string}&"
|
111
|
+
else
|
112
|
+
"#{new_url}&#{param_string}&"
|
113
|
+
end
|
114
|
+
end
|
115
|
+
url_arrays[1].each do |fps|
|
116
|
+
new_url = "#{new_url}#{fps}&"
|
117
|
+
end
|
118
|
+
"?#{new_url.chomp('&')}"
|
119
|
+
end
|
120
|
+
|
121
|
+
def convert_ia_facet_url_param(value)
|
122
|
+
ifu_arr = value.split(':')
|
123
|
+
"f[#{ifu_arr[0]}][]=#{ifu_arr[1]}"
|
124
|
+
end
|
125
|
+
|
126
|
+
def make_link(value, url)
|
127
|
+
"<a href=\"#{url}\">#{value}</a>".html_safe
|
128
|
+
end
|
129
|
+
|
130
|
+
end
|
131
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: blacklight_internet_archive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- jd2148
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-12-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -115,6 +115,7 @@ files:
|
|
115
115
|
- LICENSE.txt
|
116
116
|
- README.md
|
117
117
|
- Rakefile
|
118
|
+
- app/helpers/blacklight_url_helper.rb
|
118
119
|
- blacklight_internet_archive.gemspec
|
119
120
|
- lib/blacklight_internet_archive.rb
|
120
121
|
- lib/blacklight_internet_archive/blacklight_response.rb
|
@@ -126,6 +127,7 @@ files:
|
|
126
127
|
- lib/blacklight_internet_archive/request.rb
|
127
128
|
- lib/blacklight_internet_archive/response.rb
|
128
129
|
- lib/blacklight_internet_archive/response_adapter.rb
|
130
|
+
- lib/blacklight_internet_archive/sites_entity_processor.rb
|
129
131
|
- lib/blacklight_internet_archive/version.rb
|
130
132
|
homepage:
|
131
133
|
licenses:
|