fsp_harvester 0.1.6 → 0.1.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +3 -3
- data/Rakefile +3 -3
- data/lib/fsp_harvester/version.rb +1 -1
- data/lib/fsp_harvester.rb +26 -93
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7da32f3321193e93f64154c35db0c840f5e7f086451660f99b62d3f4c834e295
|
4
|
+
data.tar.gz: 5a0a1ff4ef6b2100accd8bab4f20d6842d25ebf88d5e600e646804da9ed24bd9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dad90f81b73489a151220c132d74508832573a352b209a537bbbaf6543a90b9e132cca80ecca33d15d02eca91841642d73e795113076f4564730194b5bf1fa53
|
7
|
+
data.tar.gz: 5d11c2f002f4e73a4971aec0d047cd14d35f6763155d0effa9c79f419ff6fd26f853158b290d2264edd148633570f5ecd50d4e1f6c7e9d136c7d2880517bdefc
|
data/Gemfile.lock
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
fsp_harvester (0.1.
|
4
|
+
fsp_harvester (0.1.7)
|
5
5
|
json (~> 2.0)
|
6
6
|
linkeddata (~> 3.2)
|
7
|
-
linkheaders-processor (~> 0.1.
|
7
|
+
linkheaders-processor (~> 0.1.13)
|
8
8
|
metainspector (~> 5.11.2)
|
9
9
|
parseconfig (~> 1.1)
|
10
10
|
rake (~> 13.0)
|
@@ -126,7 +126,7 @@ GEM
|
|
126
126
|
shex (~> 0.7)
|
127
127
|
sparql (~> 3.2)
|
128
128
|
sparql-client (~> 3.2)
|
129
|
-
linkheaders-processor (0.1.
|
129
|
+
linkheaders-processor (0.1.13)
|
130
130
|
json (~> 2.0)
|
131
131
|
json-ld (~> 3.2)
|
132
132
|
json-ld-preloaded (~> 3.2)
|
data/Rakefile
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
3
|
+
require 'bundler/gem_tasks'
|
4
|
+
require 'rspec/core/rake_task'
|
5
5
|
|
6
6
|
RSpec::Core::RakeTask.new(:spec)
|
7
7
|
|
8
|
-
require
|
8
|
+
require 'rubocop/rake_task'
|
9
9
|
|
10
10
|
RuboCop::RakeTask.new
|
11
11
|
|
data/lib/fsp_harvester.rb
CHANGED
@@ -19,6 +19,7 @@ require 'rdf/xsd'
|
|
19
19
|
require_relative './metadata_object'
|
20
20
|
require_relative './constants'
|
21
21
|
require_relative './web_utils'
|
22
|
+
require_relative './signposting_tests'
|
22
23
|
|
23
24
|
module FspHarvester
|
24
25
|
class Error < StandardError
|
@@ -27,14 +28,15 @@ module FspHarvester
|
|
27
28
|
class Utils
|
28
29
|
# @@distillerknown = {} # global, hash of sha256 keys of message bodies - have they been seen before t/f
|
29
30
|
# @warnings = JSON.parse(File.read("warnings.json"))
|
30
|
-
|
31
|
+
|
31
32
|
|
32
33
|
def self.resolve_guid(guid:)
|
34
|
+
@meta = FspHarvester::MetadataObject.new
|
33
35
|
@meta.finalURI = [guid]
|
34
36
|
type, url = convertToURL(guid: guid)
|
35
|
-
links =
|
37
|
+
links = Array.new
|
36
38
|
if type
|
37
|
-
links
|
39
|
+
links = resolve_url(url: url)
|
38
40
|
else
|
39
41
|
@meta.warnings << ['006', guid, '']
|
40
42
|
@meta.comments << "FATAL: GUID type not recognized.\n"
|
@@ -75,14 +77,14 @@ module FspHarvester
|
|
75
77
|
unless response
|
76
78
|
@meta.warnings << ['001', url, header]
|
77
79
|
@meta.comments << "WARN: Unable to resolve #{url} using HTTP Accept header #{header}.\n"
|
78
|
-
return [
|
80
|
+
return []
|
79
81
|
end
|
80
82
|
|
81
83
|
@meta.comments << "INFO: following redirection using this header led to the following URL: #{@meta.finalURI.last}. Using the output from this URL for the next few tests..."
|
82
84
|
@meta.full_response << response.body
|
83
85
|
|
84
86
|
links = process_link_headers(response: response) unless nolinkheaders
|
85
|
-
|
87
|
+
links
|
86
88
|
end
|
87
89
|
|
88
90
|
def self.process_link_headers(response:)
|
@@ -92,111 +94,42 @@ module FspHarvester
|
|
92
94
|
parser.extract_and_parse(response: response)
|
93
95
|
factory = parser.factory # LinkHeaders::LinkFactory
|
94
96
|
|
95
|
-
warn "\n\n length #{factory.all_links.length}\n\n"
|
97
|
+
warn "\n\n length bfore #{factory.all_links.length}\n\n"
|
96
98
|
signpostingcheck(factory: factory)
|
99
|
+
warn "\n\n length aftr #{factory.all_links.length}\n\n"
|
100
|
+
warn "\n\n links #{factory.all_links}\n\n"
|
101
|
+
factory.all_links
|
97
102
|
end
|
98
103
|
|
99
104
|
def self.signpostingcheck(factory:)
|
100
|
-
citeas =
|
101
|
-
describedby =
|
105
|
+
citeas = Array.new
|
106
|
+
describedby = Array.new
|
107
|
+
item = Array.new
|
102
108
|
factory.all_links.each do |l|
|
103
109
|
case l.relation
|
104
110
|
when 'cite-as'
|
105
|
-
citeas
|
111
|
+
citeas << l
|
106
112
|
when 'item'
|
107
|
-
|
108
|
-
@meta.warnings << ['011', l.href, '']
|
109
|
-
@meta.comments << "WARN: The resource does not follow the FAIR Signposting standard, which encourages any item links to also have a 'type' attribute.\n"
|
110
|
-
end
|
111
|
-
type = l.type if l.respond_to? 'type'
|
112
|
-
type = '*/*' unless type # this becomes a frozen string
|
113
|
-
header = { accept: type }
|
114
|
-
response = FspHarvester::WebUtils.fspfetch(url: l.href, headers: header, method: :head)
|
115
|
-
|
116
|
-
if response
|
117
|
-
if response.headers[:content_type] and !(type == '*/*')
|
118
|
-
rtype = type.gsub(%r{/}, "\/") # because type is a frozen string
|
119
|
-
rtype = rtype.gsub(/\+/, '.')
|
120
|
-
typeregex = Regexp.new(type)
|
121
|
-
if response.headers[:content_type].match(typeregex)
|
122
|
-
warn response.headers[:content_type]
|
123
|
-
warn typeregex.inspect
|
124
|
-
@meta.comments << "INFO: item link responds according to Signposting specifications\n"
|
125
|
-
else
|
126
|
-
@meta.warnings << ['012', l.href, header]
|
127
|
-
@meta.comments << "WARN: Content type of returned item link does not match the 'type' attribute\n"
|
128
|
-
end
|
129
|
-
else
|
130
|
-
@meta.warnings << ['013', l.href, header]
|
131
|
-
@meta.comments << "WARN: Content type of returned item link is not specified in response headers or cannot be matched against accept headers\n"
|
132
|
-
end
|
133
|
-
else
|
134
|
-
@meta.warnings << ['014', l.href, header]
|
135
|
-
@meta.comments << "WARN: item link doesn't resolve\n"
|
136
|
-
end
|
137
|
-
|
113
|
+
item << l
|
138
114
|
when 'describedby'
|
139
|
-
describedby
|
140
|
-
if !(l.respond_to? 'type')
|
141
|
-
@meta.warnings << ['005', l.href, '']
|
142
|
-
@meta.comments << "WARN: The resource does not follow the FAIR Signposting standard, which requires any describedby links to also have a 'type' attribute.\n"
|
143
|
-
end
|
144
|
-
type = l.type if l.respond_to? 'type'
|
145
|
-
type = '*/*' unless type
|
146
|
-
header = { accept: type }
|
147
|
-
response = FspHarvester::WebUtils.fspfetch(url: l.href, headers: header, method: :head)
|
148
|
-
if response
|
149
|
-
if response.headers[:content_type] and !(type == '*/*')
|
150
|
-
rtype = type.gsub(%r{/}, "\/")
|
151
|
-
rtype = rtype.gsub(/\+/, '.')
|
152
|
-
typeregex = Regexp.new(rtype)
|
153
|
-
if response.headers[:content_type].match(typeregex)
|
154
|
-
warn response.headers[:content_type]
|
155
|
-
warn typeregex.inspect
|
156
|
-
@meta.comments << "INFO: describedby link responds according to Signposting specifications\n"
|
157
|
-
else
|
158
|
-
@meta.warnings << ['009', l.href, header]
|
159
|
-
@meta.comments << "WARN: Content type of returned describedby link does not match the 'type' attribute\n"
|
160
|
-
end
|
161
|
-
else
|
162
|
-
@meta.warnings << ['010', l.href, header]
|
163
|
-
@meta.comments << "WARN: Content type of returned describedby link is not specified in response headers or cannot be matched against accept headers\n"
|
164
|
-
end
|
165
|
-
else
|
166
|
-
@meta.warnings << ['008', l.href, header]
|
167
|
-
@meta.comments << "WARN: describedby link doesn't resolve\n"
|
168
|
-
end
|
115
|
+
describedby << l
|
169
116
|
end
|
170
117
|
end
|
171
|
-
|
118
|
+
|
119
|
+
check_describedby_rules(describedby: describedby)
|
120
|
+
check_item_rules(item: item)
|
121
|
+
|
122
|
+
uniqueciteas = Array.new
|
123
|
+
if citeas.length > 1
|
124
|
+
warn "INFO: multiple cite-as links found. Checking for conflicts\n"
|
172
125
|
@meta.comments << "INFO: multiple cite-as links found. Checking for conflicts\n"
|
173
|
-
|
126
|
+
uniqueciteas = check_for_citeas_conflicts(citeas: citeas) # this adds to the metadata objects if there are conflicts, returns the list of unique citeas (SHOULD ONLY BE ONE!)
|
174
127
|
end
|
175
128
|
|
176
|
-
unless
|
129
|
+
unless uniqueciteas == 1 && describedby.length > 0
|
177
130
|
@meta.warnings << ['004', '', '']
|
178
131
|
@meta.comments << "WARN: The resource does not follow the FAIR Signposting standard, which requires exactly one cite-as header, and at least one describedby header\n"
|
179
132
|
end
|
180
|
-
factory.all_links
|
181
|
-
end
|
182
|
-
|
183
|
-
def self.check_for_citeas_conflicts(factory:)
|
184
|
-
@meta.comments << 'INFO: checking for conflicting cite-as links'
|
185
|
-
citeas = []
|
186
|
-
factory.all_links.each do |link|
|
187
|
-
next unless link.relation == 'cite-as'
|
188
|
-
|
189
|
-
@meta.comments << "INFO: Adding citeas #{link.href} to the testing queue."
|
190
|
-
citeas << link.href
|
191
|
-
end
|
192
|
-
|
193
|
-
if citeas.uniq.length == 1
|
194
|
-
@meta.comments << 'INFO: No conflicting cite-as links found.'
|
195
|
-
else # only one allowed!
|
196
|
-
@meta.warnings << ['007', '', '']
|
197
|
-
@meta.comments << "WARN: The resource does not follow the FAIR Signposting standard: Found conflicting cite-as link headers\n"
|
198
|
-
end
|
199
|
-
citeas.uniq
|
200
133
|
end
|
201
134
|
end
|
202
135
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fsp_harvester
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mark Wilkinson
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-08-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: json
|
@@ -44,14 +44,14 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 0.1.
|
47
|
+
version: 0.1.13
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: 0.1.
|
54
|
+
version: 0.1.13
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: metainspector
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|