fsp_harvester 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +3 -3
- data/Rakefile +3 -3
- data/lib/fsp_harvester/version.rb +1 -1
- data/lib/fsp_harvester.rb +26 -93
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7da32f3321193e93f64154c35db0c840f5e7f086451660f99b62d3f4c834e295
|
4
|
+
data.tar.gz: 5a0a1ff4ef6b2100accd8bab4f20d6842d25ebf88d5e600e646804da9ed24bd9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dad90f81b73489a151220c132d74508832573a352b209a537bbbaf6543a90b9e132cca80ecca33d15d02eca91841642d73e795113076f4564730194b5bf1fa53
|
7
|
+
data.tar.gz: 5d11c2f002f4e73a4971aec0d047cd14d35f6763155d0effa9c79f419ff6fd26f853158b290d2264edd148633570f5ecd50d4e1f6c7e9d136c7d2880517bdefc
|
data/Gemfile.lock
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
fsp_harvester (0.1.
|
4
|
+
fsp_harvester (0.1.7)
|
5
5
|
json (~> 2.0)
|
6
6
|
linkeddata (~> 3.2)
|
7
|
-
linkheaders-processor (~> 0.1.
|
7
|
+
linkheaders-processor (~> 0.1.13)
|
8
8
|
metainspector (~> 5.11.2)
|
9
9
|
parseconfig (~> 1.1)
|
10
10
|
rake (~> 13.0)
|
@@ -126,7 +126,7 @@ GEM
|
|
126
126
|
shex (~> 0.7)
|
127
127
|
sparql (~> 3.2)
|
128
128
|
sparql-client (~> 3.2)
|
129
|
-
linkheaders-processor (0.1.
|
129
|
+
linkheaders-processor (0.1.13)
|
130
130
|
json (~> 2.0)
|
131
131
|
json-ld (~> 3.2)
|
132
132
|
json-ld-preloaded (~> 3.2)
|
data/Rakefile
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
3
|
+
require 'bundler/gem_tasks'
|
4
|
+
require 'rspec/core/rake_task'
|
5
5
|
|
6
6
|
RSpec::Core::RakeTask.new(:spec)
|
7
7
|
|
8
|
-
require
|
8
|
+
require 'rubocop/rake_task'
|
9
9
|
|
10
10
|
RuboCop::RakeTask.new
|
11
11
|
|
data/lib/fsp_harvester.rb
CHANGED
@@ -19,6 +19,7 @@ require 'rdf/xsd'
|
|
19
19
|
require_relative './metadata_object'
|
20
20
|
require_relative './constants'
|
21
21
|
require_relative './web_utils'
|
22
|
+
require_relative './signposting_tests'
|
22
23
|
|
23
24
|
module FspHarvester
|
24
25
|
class Error < StandardError
|
@@ -27,14 +28,15 @@ module FspHarvester
|
|
27
28
|
class Utils
|
28
29
|
# @@distillerknown = {} # global, hash of sha256 keys of message bodies - have they been seen before t/f
|
29
30
|
# @warnings = JSON.parse(File.read("warnings.json"))
|
30
|
-
|
31
|
+
|
31
32
|
|
32
33
|
def self.resolve_guid(guid:)
|
34
|
+
@meta = FspHarvester::MetadataObject.new
|
33
35
|
@meta.finalURI = [guid]
|
34
36
|
type, url = convertToURL(guid: guid)
|
35
|
-
links =
|
37
|
+
links = Array.new
|
36
38
|
if type
|
37
|
-
links
|
39
|
+
links = resolve_url(url: url)
|
38
40
|
else
|
39
41
|
@meta.warnings << ['006', guid, '']
|
40
42
|
@meta.comments << "FATAL: GUID type not recognized.\n"
|
@@ -75,14 +77,14 @@ module FspHarvester
|
|
75
77
|
unless response
|
76
78
|
@meta.warnings << ['001', url, header]
|
77
79
|
@meta.comments << "WARN: Unable to resolve #{url} using HTTP Accept header #{header}.\n"
|
78
|
-
return [
|
80
|
+
return []
|
79
81
|
end
|
80
82
|
|
81
83
|
@meta.comments << "INFO: following redirection using this header led to the following URL: #{@meta.finalURI.last}. Using the output from this URL for the next few tests..."
|
82
84
|
@meta.full_response << response.body
|
83
85
|
|
84
86
|
links = process_link_headers(response: response) unless nolinkheaders
|
85
|
-
|
87
|
+
links
|
86
88
|
end
|
87
89
|
|
88
90
|
def self.process_link_headers(response:)
|
@@ -92,111 +94,42 @@ module FspHarvester
|
|
92
94
|
parser.extract_and_parse(response: response)
|
93
95
|
factory = parser.factory # LinkHeaders::LinkFactory
|
94
96
|
|
95
|
-
warn "\n\n length #{factory.all_links.length}\n\n"
|
97
|
+
warn "\n\n length bfore #{factory.all_links.length}\n\n"
|
96
98
|
signpostingcheck(factory: factory)
|
99
|
+
warn "\n\n length aftr #{factory.all_links.length}\n\n"
|
100
|
+
warn "\n\n links #{factory.all_links}\n\n"
|
101
|
+
factory.all_links
|
97
102
|
end
|
98
103
|
|
99
104
|
def self.signpostingcheck(factory:)
|
100
|
-
citeas =
|
101
|
-
describedby =
|
105
|
+
citeas = Array.new
|
106
|
+
describedby = Array.new
|
107
|
+
item = Array.new
|
102
108
|
factory.all_links.each do |l|
|
103
109
|
case l.relation
|
104
110
|
when 'cite-as'
|
105
|
-
citeas
|
111
|
+
citeas << l
|
106
112
|
when 'item'
|
107
|
-
|
108
|
-
@meta.warnings << ['011', l.href, '']
|
109
|
-
@meta.comments << "WARN: The resource does not follow the FAIR Signposting standard, which encourages any item links to also have a 'type' attribute.\n"
|
110
|
-
end
|
111
|
-
type = l.type if l.respond_to? 'type'
|
112
|
-
type = '*/*' unless type # this becomes a frozen string
|
113
|
-
header = { accept: type }
|
114
|
-
response = FspHarvester::WebUtils.fspfetch(url: l.href, headers: header, method: :head)
|
115
|
-
|
116
|
-
if response
|
117
|
-
if response.headers[:content_type] and !(type == '*/*')
|
118
|
-
rtype = type.gsub(%r{/}, "\/") # because type is a frozen string
|
119
|
-
rtype = rtype.gsub(/\+/, '.')
|
120
|
-
typeregex = Regexp.new(type)
|
121
|
-
if response.headers[:content_type].match(typeregex)
|
122
|
-
warn response.headers[:content_type]
|
123
|
-
warn typeregex.inspect
|
124
|
-
@meta.comments << "INFO: item link responds according to Signposting specifications\n"
|
125
|
-
else
|
126
|
-
@meta.warnings << ['012', l.href, header]
|
127
|
-
@meta.comments << "WARN: Content type of returned item link does not match the 'type' attribute\n"
|
128
|
-
end
|
129
|
-
else
|
130
|
-
@meta.warnings << ['013', l.href, header]
|
131
|
-
@meta.comments << "WARN: Content type of returned item link is not specified in response headers or cannot be matched against accept headers\n"
|
132
|
-
end
|
133
|
-
else
|
134
|
-
@meta.warnings << ['014', l.href, header]
|
135
|
-
@meta.comments << "WARN: item link doesn't resolve\n"
|
136
|
-
end
|
137
|
-
|
113
|
+
item << l
|
138
114
|
when 'describedby'
|
139
|
-
describedby
|
140
|
-
if !(l.respond_to? 'type')
|
141
|
-
@meta.warnings << ['005', l.href, '']
|
142
|
-
@meta.comments << "WARN: The resource does not follow the FAIR Signposting standard, which requires any describedby links to also have a 'type' attribute.\n"
|
143
|
-
end
|
144
|
-
type = l.type if l.respond_to? 'type'
|
145
|
-
type = '*/*' unless type
|
146
|
-
header = { accept: type }
|
147
|
-
response = FspHarvester::WebUtils.fspfetch(url: l.href, headers: header, method: :head)
|
148
|
-
if response
|
149
|
-
if response.headers[:content_type] and !(type == '*/*')
|
150
|
-
rtype = type.gsub(%r{/}, "\/")
|
151
|
-
rtype = rtype.gsub(/\+/, '.')
|
152
|
-
typeregex = Regexp.new(rtype)
|
153
|
-
if response.headers[:content_type].match(typeregex)
|
154
|
-
warn response.headers[:content_type]
|
155
|
-
warn typeregex.inspect
|
156
|
-
@meta.comments << "INFO: describedby link responds according to Signposting specifications\n"
|
157
|
-
else
|
158
|
-
@meta.warnings << ['009', l.href, header]
|
159
|
-
@meta.comments << "WARN: Content type of returned describedby link does not match the 'type' attribute\n"
|
160
|
-
end
|
161
|
-
else
|
162
|
-
@meta.warnings << ['010', l.href, header]
|
163
|
-
@meta.comments << "WARN: Content type of returned describedby link is not specified in response headers or cannot be matched against accept headers\n"
|
164
|
-
end
|
165
|
-
else
|
166
|
-
@meta.warnings << ['008', l.href, header]
|
167
|
-
@meta.comments << "WARN: describedby link doesn't resolve\n"
|
168
|
-
end
|
115
|
+
describedby << l
|
169
116
|
end
|
170
117
|
end
|
171
|
-
|
118
|
+
|
119
|
+
check_describedby_rules(describedby: describedby)
|
120
|
+
check_item_rules(item: item)
|
121
|
+
|
122
|
+
uniqueciteas = Array.new
|
123
|
+
if citeas.length > 1
|
124
|
+
warn "INFO: multiple cite-as links found. Checking for conflicts\n"
|
172
125
|
@meta.comments << "INFO: multiple cite-as links found. Checking for conflicts\n"
|
173
|
-
|
126
|
+
uniqueciteas = check_for_citeas_conflicts(citeas: citeas) # this adds to the metadata objects if there are conflicts, returns the list of unique citeas (SHOULD ONLY BE ONE!)
|
174
127
|
end
|
175
128
|
|
176
|
-
unless
|
129
|
+
unless uniqueciteas == 1 && describedby.length > 0
|
177
130
|
@meta.warnings << ['004', '', '']
|
178
131
|
@meta.comments << "WARN: The resource does not follow the FAIR Signposting standard, which requires exactly one cite-as header, and at least one describedby header\n"
|
179
132
|
end
|
180
|
-
factory.all_links
|
181
|
-
end
|
182
|
-
|
183
|
-
def self.check_for_citeas_conflicts(factory:)
|
184
|
-
@meta.comments << 'INFO: checking for conflicting cite-as links'
|
185
|
-
citeas = []
|
186
|
-
factory.all_links.each do |link|
|
187
|
-
next unless link.relation == 'cite-as'
|
188
|
-
|
189
|
-
@meta.comments << "INFO: Adding citeas #{link.href} to the testing queue."
|
190
|
-
citeas << link.href
|
191
|
-
end
|
192
|
-
|
193
|
-
if citeas.uniq.length == 1
|
194
|
-
@meta.comments << 'INFO: No conflicting cite-as links found.'
|
195
|
-
else # only one allowed!
|
196
|
-
@meta.warnings << ['007', '', '']
|
197
|
-
@meta.comments << "WARN: The resource does not follow the FAIR Signposting standard: Found conflicting cite-as link headers\n"
|
198
|
-
end
|
199
|
-
citeas.uniq
|
200
133
|
end
|
201
134
|
end
|
202
135
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fsp_harvester
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mark Wilkinson
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-08-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: json
|
@@ -44,14 +44,14 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 0.1.
|
47
|
+
version: 0.1.13
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: 0.1.
|
54
|
+
version: 0.1.13
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: metainspector
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|