fsp_harvester 0.1.12 → 0.1.15
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rspec_status +48 -47
- data/Gemfile.lock +34 -26
- data/lib/constants.rb +8 -5
- data/lib/fsp_harvester/version.rb +1 -1
- data/lib/fsp_harvester.rb +1 -1
- data/lib/harvester.rb +1 -0
- data/lib/harvester_brute.rb +48 -0
- data/lib/harvester_utils.rb +10 -7
- data/lib/metadata_harvester.rb +48 -30
- data/lib/warnings.json +33 -21
- data/lib/web_utils.rb +5 -5
- metadata +5 -5
- data/lib/swagger.rb +0 -184
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9e0ffd5048e360ce8e8cced890a586664af797065d2c8d6312927d694835e84b
|
4
|
+
data.tar.gz: 840269a8b28da70bed8c5e46674ff3730cbee66f624064cab84f98d5b3a2ff00
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4c01cc88a8f57e024c7aeed89a8251d97b130bca987dc14d914e87fa87ea744d3de7ab11ca340b0456f295edafdd872d4f63d0f0ef23dbe9c3cc8ebc97a64ae5
|
7
|
+
data.tar.gz: 2c274758ec874bb1c25ebd5286ecbc2b7e91205430a94cf3ada9c7350511fe362532f2c6d213a6fd6657ccdf7184df9c0eaf9c2461c0d25dc87da00b2aded390
|
data/.rspec_status
CHANGED
@@ -1,55 +1,56 @@
|
|
1
1
|
example_id | status | run_time |
|
2
2
|
---------------------------------- | ------ | --------------- |
|
3
|
-
./spec/cite-as_spec.rb[1:1:1] | passed | 1.
|
4
|
-
./spec/cite-as_spec.rb[1:1:2] | passed | 1.
|
5
|
-
./spec/cite-as_spec.rb[1:1:3] | passed | 1.
|
6
|
-
./spec/cite-as_spec.rb[1:1:4] | passed | 1.
|
7
|
-
./spec/cite-as_spec.rb[1:1:5] | passed | 2.
|
8
|
-
./spec/cite-as_spec.rb[1:1:6] | passed | 2.
|
9
|
-
./spec/cite-as_spec.rb[1:1:7] | passed | 3.
|
10
|
-
./spec/cite-as_spec.rb[1:1:8] | passed | 2.
|
11
|
-
./spec/cite-as_spec.rb[1:1:9] | passed | 2.
|
12
|
-
./spec/cite-as_spec.rb[1:1:10] | passed | 2.
|
13
|
-
./spec/cite-as_spec.rb[1:1:11] | passed | 3.
|
14
|
-
./spec/cite-as_spec.rb[1:1:12] | passed | 2.
|
15
|
-
./spec/cite-as_spec.rb[1:1:13] | passed | 2.
|
16
|
-
./spec/cite-as_spec.rb[1:1:14] | passed | 2.
|
17
|
-
./spec/cite-as_spec.rb[1:1:15] | passed | 1.
|
18
|
-
./spec/cite-as_spec.rb[1:1:16] | passed | 1.
|
19
|
-
./spec/cite-as_spec.rb[1:1:17] | passed | 1.
|
20
|
-
./spec/cite-as_spec.rb[1:1:18] | passed | 1.
|
21
|
-
./spec/cite-as_spec.rb[1:1:19] | passed | 1.
|
3
|
+
./spec/cite-as_spec.rb[1:1:1] | passed | 1.66 seconds |
|
4
|
+
./spec/cite-as_spec.rb[1:1:2] | passed | 1.13 seconds |
|
5
|
+
./spec/cite-as_spec.rb[1:1:3] | passed | 1.08 seconds |
|
6
|
+
./spec/cite-as_spec.rb[1:1:4] | passed | 1.68 seconds |
|
7
|
+
./spec/cite-as_spec.rb[1:1:5] | passed | 2.86 seconds |
|
8
|
+
./spec/cite-as_spec.rb[1:1:6] | passed | 2.11 seconds |
|
9
|
+
./spec/cite-as_spec.rb[1:1:7] | passed | 3.07 seconds |
|
10
|
+
./spec/cite-as_spec.rb[1:1:8] | passed | 2.13 seconds |
|
11
|
+
./spec/cite-as_spec.rb[1:1:9] | passed | 2.73 seconds |
|
12
|
+
./spec/cite-as_spec.rb[1:1:10] | passed | 2.64 seconds |
|
13
|
+
./spec/cite-as_spec.rb[1:1:11] | passed | 3.36 seconds |
|
14
|
+
./spec/cite-as_spec.rb[1:1:12] | passed | 2.26 seconds |
|
15
|
+
./spec/cite-as_spec.rb[1:1:13] | passed | 2.9 seconds |
|
16
|
+
./spec/cite-as_spec.rb[1:1:14] | passed | 2.31 seconds |
|
17
|
+
./spec/cite-as_spec.rb[1:1:15] | passed | 1.47 seconds |
|
18
|
+
./spec/cite-as_spec.rb[1:1:16] | passed | 1.22 seconds |
|
19
|
+
./spec/cite-as_spec.rb[1:1:17] | passed | 1.23 seconds |
|
20
|
+
./spec/cite-as_spec.rb[1:1:18] | passed | 1.2 seconds |
|
21
|
+
./spec/cite-as_spec.rb[1:1:19] | passed | 1.71 seconds |
|
22
22
|
./spec/cite-as_spec.rb[1:1:20] | passed | 1.66 seconds |
|
23
|
-
./spec/cite-as_spec.rb[1:1:21] | passed | 2.
|
24
|
-
./spec/cite-as_spec.rb[1:1:22] | passed | 1.
|
25
|
-
./spec/cite-as_spec.rb[1:1:23] | passed | 1.
|
26
|
-
./spec/cite-as_spec.rb[1:1:24] | failed | 1.
|
27
|
-
./spec/cite-as_spec.rb[1:1:25] | passed | 0.
|
28
|
-
./spec/describedby_spec.rb[1:1:1] | passed |
|
29
|
-
./spec/describedby_spec.rb[1:1:2] | passed | 1.
|
23
|
+
./spec/cite-as_spec.rb[1:1:21] | passed | 2.5 seconds |
|
24
|
+
./spec/cite-as_spec.rb[1:1:22] | passed | 1.54 seconds |
|
25
|
+
./spec/cite-as_spec.rb[1:1:23] | passed | 1.25 seconds |
|
26
|
+
./spec/cite-as_spec.rb[1:1:24] | failed | 1.35 seconds |
|
27
|
+
./spec/cite-as_spec.rb[1:1:25] | passed | 0.50811 seconds |
|
28
|
+
./spec/describedby_spec.rb[1:1:1] | passed | 3.45 seconds |
|
29
|
+
./spec/describedby_spec.rb[1:1:2] | passed | 1.3 seconds |
|
30
30
|
./spec/describedby_spec.rb[1:1:3] | passed | 1.22 seconds |
|
31
31
|
./spec/describedby_spec.rb[1:1:4] | passed | 1.22 seconds |
|
32
|
-
./spec/describedby_spec.rb[1:1:5] | passed | 1.
|
33
|
-
./spec/describedby_spec.rb[1:1:6] | passed | 1.
|
34
|
-
./spec/describedby_spec.rb[1:1:7] | passed |
|
32
|
+
./spec/describedby_spec.rb[1:1:5] | passed | 1.15 seconds |
|
33
|
+
./spec/describedby_spec.rb[1:1:6] | passed | 1.04 seconds |
|
34
|
+
./spec/describedby_spec.rb[1:1:7] | passed | 1.12 seconds |
|
35
35
|
./spec/describedby_spec.rb[1:1:8] | passed | 2.44 seconds |
|
36
|
-
./spec/describedby_spec.rb[1:1:9] | passed |
|
37
|
-
./spec/describedby_spec.rb[1:1:10] | passed | 2.
|
38
|
-
./spec/describedby_spec.rb[1:1:11] | passed |
|
39
|
-
./spec/describedby_spec.rb[1:1:12] | passed |
|
40
|
-
./spec/describedby_spec.rb[1:1:13] | passed | 1.
|
41
|
-
./spec/describedby_spec.rb[1:1:14] | passed | 2.
|
42
|
-
./spec/describedby_spec.rb[1:1:15] | passed | 2.
|
36
|
+
./spec/describedby_spec.rb[1:1:9] | passed | 2.15 seconds |
|
37
|
+
./spec/describedby_spec.rb[1:1:10] | passed | 2.19 seconds |
|
38
|
+
./spec/describedby_spec.rb[1:1:11] | passed | 2.98 seconds |
|
39
|
+
./spec/describedby_spec.rb[1:1:12] | passed | 2.87 seconds |
|
40
|
+
./spec/describedby_spec.rb[1:1:13] | passed | 1.74 seconds |
|
41
|
+
./spec/describedby_spec.rb[1:1:14] | passed | 2.27 seconds |
|
42
|
+
./spec/describedby_spec.rb[1:1:15] | passed | 2.28 seconds |
|
43
43
|
./spec/fsp_harvester_spec.rb[1:1] | passed | 0.00058 seconds |
|
44
44
|
./spec/fsp_harvester_spec.rb[1:2] | failed | 2.92 seconds |
|
45
|
-
./spec/
|
46
|
-
./spec/item_spec.rb[1:1:
|
47
|
-
./spec/item_spec.rb[1:1:
|
48
|
-
./spec/item_spec.rb[1:1:
|
49
|
-
./spec/item_spec.rb[1:1:
|
50
|
-
./spec/item_spec.rb[1:1:
|
51
|
-
./spec/item_spec.rb[1:1:
|
52
|
-
./spec/item_spec.rb[1:1:
|
53
|
-
./spec/
|
54
|
-
./spec/type_spec.rb[1:1:
|
55
|
-
./spec/type_spec.rb[1:1:
|
45
|
+
./spec/fsp_harvester_spec.rb[1:3] | passed | 6.87 seconds |
|
46
|
+
./spec/item_spec.rb[1:1:1] | passed | 3.19 seconds |
|
47
|
+
./spec/item_spec.rb[1:1:2] | passed | 2.81 seconds |
|
48
|
+
./spec/item_spec.rb[1:1:3] | passed | 1.27 seconds |
|
49
|
+
./spec/item_spec.rb[1:1:4] | passed | 1.76 seconds |
|
50
|
+
./spec/item_spec.rb[1:1:5] | passed | 2.43 seconds |
|
51
|
+
./spec/item_spec.rb[1:1:6] | passed | 2.23 seconds |
|
52
|
+
./spec/item_spec.rb[1:1:7] | passed | 2.94 seconds |
|
53
|
+
./spec/item_spec.rb[1:1:8] | passed | 0.52517 seconds |
|
54
|
+
./spec/type_spec.rb[1:1:1] | passed | 1.35 seconds |
|
55
|
+
./spec/type_spec.rb[1:1:2] | passed | 1.32 seconds |
|
56
|
+
./spec/type_spec.rb[1:1:3] | passed | 1.54 seconds |
|
data/Gemfile.lock
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
fsp_harvester (0.1.
|
4
|
+
fsp_harvester (0.1.15)
|
5
5
|
json (~> 2.0)
|
6
6
|
linkeddata (~> 3.2)
|
7
|
-
linkheaders-processor (~> 0.1.
|
7
|
+
linkheaders-processor (~> 0.1.17)
|
8
8
|
metainspector (~> 5.11.2)
|
9
9
|
parseconfig (~> 1.1)
|
10
10
|
rake (~> 13.0)
|
@@ -56,7 +56,7 @@ GEM
|
|
56
56
|
faraday-encoding (0.0.5)
|
57
57
|
faraday
|
58
58
|
faraday-excon (1.1.0)
|
59
|
-
faraday-http-cache (2.4.
|
59
|
+
faraday-http-cache (2.4.1)
|
60
60
|
faraday (>= 0.8)
|
61
61
|
faraday-httpclient (1.0.1)
|
62
62
|
faraday-multipart (1.0.4)
|
@@ -99,34 +99,35 @@ GEM
|
|
99
99
|
sparql (~> 3.2)
|
100
100
|
sxp (~> 1.2)
|
101
101
|
link_header (0.0.8)
|
102
|
-
linkeddata (3.2.
|
103
|
-
json-ld (~> 3.2)
|
102
|
+
linkeddata (3.2.1)
|
103
|
+
json-ld (~> 3.2, >= 3.2.3)
|
104
104
|
json-ld-preloaded (~> 3.2)
|
105
105
|
ld-patch (~> 3.2)
|
106
|
-
nokogiri (~> 1.
|
107
|
-
rdf (~> 3.2)
|
108
|
-
rdf-aggregate-repo (~> 3.2)
|
106
|
+
nokogiri (~> 1.13, >= 1.13.8)
|
107
|
+
rdf (~> 3.2, >= 3.2.9)
|
108
|
+
rdf-aggregate-repo (~> 3.2, >= 3.2.1)
|
109
109
|
rdf-hamster-repo (~> 3.2)
|
110
|
-
rdf-isomorphic (~> 3.2)
|
110
|
+
rdf-isomorphic (~> 3.2, >= 3.2.1)
|
111
111
|
rdf-json (~> 3.2)
|
112
|
-
rdf-microdata (~> 3.2)
|
113
|
-
rdf-n3 (~> 3.2)
|
112
|
+
rdf-microdata (~> 3.2, >= 3.2.1)
|
113
|
+
rdf-n3 (~> 3.2, >= 3.2.1)
|
114
114
|
rdf-normalize (~> 0.5)
|
115
|
-
rdf-ordered-repo (~> 3.2)
|
115
|
+
rdf-ordered-repo (~> 3.2, >= 3.2.1)
|
116
116
|
rdf-rdfa (~> 3.2)
|
117
117
|
rdf-rdfxml (~> 3.2)
|
118
118
|
rdf-reasoner (~> 0.8)
|
119
|
-
rdf-tabular (~> 3.2)
|
119
|
+
rdf-tabular (~> 3.2, >= 3.2.1)
|
120
120
|
rdf-trig (~> 3.2)
|
121
121
|
rdf-trix (~> 3.2)
|
122
|
-
rdf-turtle (~> 3.2)
|
123
|
-
rdf-vocab (~> 3.2)
|
124
|
-
rdf-xsd (~> 3.2)
|
125
|
-
shacl (~> 0.2)
|
126
|
-
shex (~> 0.7)
|
127
|
-
sparql (~> 3.2)
|
128
|
-
sparql-client (~> 3.2)
|
129
|
-
|
122
|
+
rdf-turtle (~> 3.2, >= 3.2.1)
|
123
|
+
rdf-vocab (~> 3.2, >= 3.2.1)
|
124
|
+
rdf-xsd (~> 3.2, >= 3.2.1)
|
125
|
+
shacl (~> 0.2, >= 0.2.1)
|
126
|
+
shex (~> 0.7, >= 0.7.1)
|
127
|
+
sparql (~> 3.2, >= 3.2.4)
|
128
|
+
sparql-client (~> 3.2, >= 3.2.1)
|
129
|
+
yaml-ld (~> 0.0)
|
130
|
+
linkheaders-processor (0.1.17)
|
130
131
|
json (~> 2.0)
|
131
132
|
json-ld (~> 3.2)
|
132
133
|
json-ld-preloaded (~> 3.2)
|
@@ -159,8 +160,10 @@ GEM
|
|
159
160
|
racc (~> 1.4)
|
160
161
|
parallel (1.22.1)
|
161
162
|
parseconfig (1.1.2)
|
162
|
-
parser (3.1.2.
|
163
|
+
parser (3.1.2.1)
|
163
164
|
ast (~> 2.4.1)
|
165
|
+
psych (4.0.4)
|
166
|
+
stringio
|
164
167
|
public_suffix (4.0.7)
|
165
168
|
racc (1.6.0)
|
166
169
|
rack (2.2.4)
|
@@ -249,17 +252,17 @@ GEM
|
|
249
252
|
diff-lcs (>= 1.2.0, < 2.0)
|
250
253
|
rspec-support (~> 3.11.0)
|
251
254
|
rspec-support (3.11.0)
|
252
|
-
rubocop (1.
|
255
|
+
rubocop (1.34.1)
|
253
256
|
json (~> 2.3)
|
254
257
|
parallel (~> 1.10)
|
255
|
-
parser (>= 3.1.
|
258
|
+
parser (>= 3.1.2.1)
|
256
259
|
rainbow (>= 2.2.2, < 4.0)
|
257
260
|
regexp_parser (>= 1.8, < 3.0)
|
258
261
|
rexml (>= 3.2.5, < 4.0)
|
259
|
-
rubocop-ast (>= 1.
|
262
|
+
rubocop-ast (>= 1.20.0, < 2.0)
|
260
263
|
ruby-progressbar (~> 1.7)
|
261
264
|
unicode-display_width (>= 1.4.0, < 3.0)
|
262
|
-
rubocop-ast (1.
|
265
|
+
rubocop-ast (1.21.0)
|
263
266
|
parser (>= 3.1.1.0)
|
264
267
|
ruby-progressbar (1.11.0)
|
265
268
|
ruby2_keywords (0.0.5)
|
@@ -291,6 +294,7 @@ GEM
|
|
291
294
|
sparql-client (3.2.1)
|
292
295
|
net-http-persistent (~> 4.0, >= 4.0.1)
|
293
296
|
rdf (~> 3.2, >= 3.2.6)
|
297
|
+
stringio (3.0.2)
|
294
298
|
sxp (1.2.2)
|
295
299
|
matrix
|
296
300
|
rdf (~> 3.2)
|
@@ -303,6 +307,10 @@ GEM
|
|
303
307
|
unicode-types (1.7.0)
|
304
308
|
xml-simple (1.1.9)
|
305
309
|
rexml
|
310
|
+
yaml-ld (0.0.1)
|
311
|
+
json-ld (~> 3.2, >= 3.2.2)
|
312
|
+
psych (~> 4.0)
|
313
|
+
rdf (~> 3.2)
|
306
314
|
|
307
315
|
PLATFORMS
|
308
316
|
x86_64-linux
|
data/lib/constants.rb
CHANGED
@@ -69,11 +69,14 @@ SELF_IDENTIFIER_PREDICATES = [
|
|
69
69
|
'https://schema.org/identifier'
|
70
70
|
]
|
71
71
|
|
72
|
-
GUID_TYPES = {
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
72
|
+
GUID_TYPES = {
|
73
|
+
'inchi' => Regexp.new(/^\w{14}-\w{10}-\w$/),
|
74
|
+
'doi' => Regexp.new(%r{^10.\d{4,9}/[-._;()/:A-Z0-9]+$}i),
|
75
|
+
'handle1' => Regexp.new(%r{^[^/]+/[^/]+$}i),
|
76
|
+
'handle2' => Regexp.new(%r{^\d{4,5}/[-._;()/:A-Z0-9]+$}i), # legacy style 12345/AGB47A
|
77
|
+
'uri' => Regexp.new(%r{^\w+:/?/?[^\s]+$}),
|
78
|
+
'ark' => Regexp.new(%r{^ark:/[^\s]+$})
|
79
|
+
}
|
77
80
|
|
78
81
|
CONFIG = File.exist?('config.conf') ? ParseConfig.new('config.conf') : {}
|
79
82
|
extruct = CONFIG.dig(:extruct, :command)
|
data/lib/fsp_harvester.rb
CHANGED
@@ -12,7 +12,7 @@ module FspHarvester
|
|
12
12
|
links.each do |l|
|
13
13
|
db << l if l.relation == 'describedby'
|
14
14
|
end
|
15
|
-
HarvesterTools::MetadataHarvester.
|
15
|
+
HarvesterTools::MetadataHarvester.extract_metadata_from_links(links: db, metadata: @meta) # everything is gathered into the @meta metadata object
|
16
16
|
@meta
|
17
17
|
end
|
18
18
|
|
data/lib/harvester.rb
CHANGED
@@ -23,5 +23,6 @@ require_relative './signposting_tests'
|
|
23
23
|
require_relative './metadata_harvester'
|
24
24
|
require_relative './fsp_harvester'
|
25
25
|
require_relative './harvester_utils'
|
26
|
+
require_relative './harvester_brute'
|
26
27
|
require_relative './external_tools'
|
27
28
|
require_relative './metadata_parser'
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module HarvesterTools
|
2
|
+
class Error < StandardError
|
3
|
+
end
|
4
|
+
|
5
|
+
class BruteForce
|
6
|
+
def self.begin_brute_force(guid:, metadata: HarvesterTools::MetadataObject.new)
|
7
|
+
type, url = HarvesterTools::Utils.convertToURL(guid: guid)
|
8
|
+
return false unless type
|
9
|
+
|
10
|
+
do_content_negotiation(url: url, metadata: metadata)
|
11
|
+
metadata
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.do_content_negotiation(url:, metadata:)
|
15
|
+
response = resolve_url_brute(url: url, metadata: metadata, headers: ACCEPT_ALL_HEADER)
|
16
|
+
if response
|
17
|
+
HarvesterTools::MetadataHarvester.extract_metadata_from_body(response: response, metadata: metadata)
|
18
|
+
end
|
19
|
+
response = resolve_url_brute(url: url, metadata: metadata, headers: ACCEPT_STAR_HEADER)
|
20
|
+
if response
|
21
|
+
HarvesterTools::MetadataHarvester.extract_metadata_from_body(response: response, metadata: metadata) # extract from landing page
|
22
|
+
response = resolve_url_brute(url: response.request.url, metadata: metadata, headers: ACCEPT_ALL_HEADER) # now do content negotiation on the landing page
|
23
|
+
if response
|
24
|
+
HarvesterTools::MetadataHarvester.extract_metadata_from_body(response: response, metadata: metadata) # extract from landing page
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.resolve_url_brute(url:, method: :get, nolinkheaders: true, headers:, metadata:)
|
30
|
+
@meta = metadata
|
31
|
+
@meta.guidtype = 'uri' if @meta.guidtype.nil?
|
32
|
+
warn "\n\n BRUTE FETCHING #{url} #{headers}\n\n"
|
33
|
+
response = HarvesterTools::WebUtils.fspfetch(url: url, headers: headers, method: method, meta: @meta)
|
34
|
+
warn "\n\n head #{response.headers.inspect}\n\n" if response
|
35
|
+
|
36
|
+
unless response
|
37
|
+
@meta.add_warning(['001', url, headers])
|
38
|
+
@meta.comments << "WARN: Unable to resolve #{url} using HTTP Accept header #{headers}.\n"
|
39
|
+
@meta.full_response << [url, "No response"]
|
40
|
+
false
|
41
|
+
end
|
42
|
+
|
43
|
+
@meta.comments << "INFO: following redirection using this header led to the following URL: #{@meta.all_uris.last}. Using the output from this URL for the next few tests..."
|
44
|
+
@meta.full_response << [url, response.body]
|
45
|
+
response
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
data/lib/harvester_utils.rb
CHANGED
@@ -10,7 +10,7 @@ module HarvesterTools
|
|
10
10
|
type, url = convertToURL(guid: guid)
|
11
11
|
links = Array.new
|
12
12
|
if type
|
13
|
-
links = resolve_url(url: url)
|
13
|
+
links = resolve_url(url: url, metadata: @meta)
|
14
14
|
@meta.links = @meta.links | links
|
15
15
|
else
|
16
16
|
@meta.add_warning(['006', guid, ''])
|
@@ -31,6 +31,8 @@ module HarvesterTools
|
|
31
31
|
return 'uri', guid
|
32
32
|
elsif k == 'doi' and regex.match(guid)
|
33
33
|
return 'doi', "https://doi.org/#{guid}"
|
34
|
+
elsif k == 'ark' and regex.match(guid)
|
35
|
+
return 'ark', "https://n2t.net/#{guid}"
|
34
36
|
end
|
35
37
|
end
|
36
38
|
[nil, nil]
|
@@ -43,7 +45,8 @@ module HarvesterTools
|
|
43
45
|
false
|
44
46
|
end
|
45
47
|
|
46
|
-
def self.resolve_url(url:, method: :get, nolinkheaders: false, header: ACCEPT_STAR_HEADER)
|
48
|
+
def self.resolve_url(url:, method: :get, nolinkheaders: false, metadata:, header: ACCEPT_STAR_HEADER)
|
49
|
+
@meta = metadata
|
47
50
|
@meta.guidtype = 'uri' if @meta.guidtype.nil?
|
48
51
|
warn "\n\n FETCHING #{url} #{header}\n\n"
|
49
52
|
response = HarvesterTools::WebUtils.fspfetch(url: url, headers: header, method: method, meta: @meta)
|
@@ -58,17 +61,17 @@ module HarvesterTools
|
|
58
61
|
@meta.comments << "INFO: following redirection using this header led to the following URL: #{@meta.all_uris.last}. Using the output from this URL for the next few tests..."
|
59
62
|
@meta.full_response << response.body
|
60
63
|
|
61
|
-
links = process_link_headers(response: response) unless nolinkheaders
|
64
|
+
links = process_link_headers(response: response, metadata: @meta) unless nolinkheaders
|
62
65
|
links
|
63
66
|
end
|
64
67
|
|
65
|
-
def self.process_link_headers(response:)
|
68
|
+
def self.process_link_headers(response:, metadata:)
|
66
69
|
warn "\n\n parsing #{response.headers}\n\n"
|
67
|
-
|
68
|
-
parser = LinkHeaders::Processor.new(default_anchor:
|
70
|
+
|
71
|
+
parser = LinkHeaders::Processor.new(default_anchor: metadata.all_uris.last)
|
69
72
|
parser.extract_and_parse(response: response)
|
70
73
|
factory = parser.factory # LinkHeaders::LinkFactory
|
71
|
-
FspHarvester::Utils.signpostingcheck(factory: factory, metadata:
|
74
|
+
FspHarvester::Utils.signpostingcheck(factory: factory, metadata: metadata)
|
72
75
|
factory.all_links
|
73
76
|
end
|
74
77
|
end
|
data/lib/metadata_harvester.rb
CHANGED
@@ -5,7 +5,7 @@ module HarvesterTools
|
|
5
5
|
end
|
6
6
|
|
7
7
|
class MetadataHarvester
|
8
|
-
def self.
|
8
|
+
def self.extract_metadata_from_links(links: [], metadata: HarvesterTools::MetadataObject.new)
|
9
9
|
@meta = metadata
|
10
10
|
@meta.comments << 'INFO: now collecting both linked data and hash-style data using the harvested links'
|
11
11
|
|
@@ -26,23 +26,42 @@ module HarvesterTools
|
|
26
26
|
next
|
27
27
|
end
|
28
28
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
29
|
+
process_according_to_type(body: response.body, uri: link, metadata: @meta, abbreviation: abbreviation,
|
30
|
+
content_type: content_type, harvester: hvst)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.extract_metadata_from_body(response:, metadata: HarvesterTools::MetadataObject.new)
|
35
|
+
@meta = metadata
|
36
|
+
@meta.comments << 'INFO: now collecting both linked data and hash-style data using the harvested links'
|
37
|
+
|
38
|
+
abbreviation, content_type = attempt_to_detect_type(body: response.body, headers: response.headers)
|
39
|
+
unless abbreviation
|
40
|
+
@meta.add_warning(['017', response.request.url, ''])
|
41
|
+
@meta.comments << "WARN: metadata format returned from #{response.request.url} is not recognized. Moving on.\n"
|
42
|
+
return
|
43
|
+
end
|
44
|
+
process_according_to_type(body: response.body, uri: response.request.url, metadata: @meta,
|
45
|
+
abbreviation: abbreviation, content_type: content_type)
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.process_according_to_type(body:, uri:, abbreviation:, content_type:, metadata:,
|
49
|
+
harvester: HarvesterTools::MetadataParser.new(metadata_object: @meta))
|
50
|
+
case abbreviation
|
51
|
+
when 'html'
|
52
|
+
@meta.comments << 'INFO: Processing html'
|
53
|
+
harvester.process_html(body: body, uri: uri, metadata: @meta)
|
54
|
+
when 'xml'
|
55
|
+
@meta.comments << 'INFO: Processing xml'
|
56
|
+
harvester.process_xml(body: body, metadata: @meta)
|
57
|
+
when 'json'
|
58
|
+
@meta.comments << 'INFO: Processing json'
|
59
|
+
harvester.process_json(body: body, metadata: @meta)
|
60
|
+
when 'jsonld', 'rdfxml', 'turtle', 'ntriples', 'nquads'
|
61
|
+
@meta.comments << 'INFO: Processing linked data'
|
62
|
+
harvester.process_ld(body: body, content_type: content_type, metadata: @meta)
|
63
|
+
when 'specialist'
|
64
|
+
warn 'no specialized parsers so far'
|
46
65
|
end
|
47
66
|
end
|
48
67
|
|
@@ -111,24 +130,23 @@ module HarvesterTools
|
|
111
130
|
[abbreviation, contenttype]
|
112
131
|
end
|
113
132
|
|
114
|
-
def self.ntriples_hack(body:)
|
133
|
+
def self.ntriples_hack(body:) # distriller cannot recognize single-line ntriples unless they end with a period, which is not required by the spec... so hack it!
|
115
134
|
detected_type = nil
|
116
135
|
body.split.each do |line|
|
117
136
|
line.strip!
|
118
137
|
next if line.empty?
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
@meta.comments << "INFO: ntriples hack found: #{detected_type.to_s}\n"
|
126
|
-
if detected_type != RDF::NTriples::Format # only return the hacky case
|
127
|
-
return nil
|
138
|
+
|
139
|
+
next unless line =~ /\s*<[^>]+>\s*<[^>]+>\s\S+/
|
140
|
+
|
141
|
+
@meta.comments << "INFO: running ntriples hack on #{line + ' .'}\n"
|
142
|
+
detected_type = RDF::Format.for({ sample: "#{line} ." }) # adding a period allows detection of ntriples by distiller
|
143
|
+
break
|
128
144
|
end
|
129
|
-
|
130
|
-
|
145
|
+
@meta.comments << "INFO: ntriples hack found: #{detected_type}\n"
|
146
|
+
return nil if detected_type != RDF::NTriples::Format # only return the hacky case
|
131
147
|
|
148
|
+
detected_type
|
149
|
+
end
|
132
150
|
|
133
151
|
def self.check_json(body:)
|
134
152
|
abbreviation = nil
|
data/lib/warnings.json
CHANGED
@@ -1,107 +1,119 @@
|
|
1
1
|
{
|
2
2
|
"001": {
|
3
3
|
"message": "Unable to resolve guid using default (*/*) Accept headers",
|
4
|
-
"linkout": "",
|
4
|
+
"linkout": [{"FAIR Principle": "https://www.go-fair.org/fair-principles/metadata-retrievable-identifier-standardised-communication-protocol/"},
|
5
|
+
{"FAIRsharing": "https://doi.org/10.25504/FAIRsharing.cd2f9e"}
|
6
|
+
],
|
5
7
|
"severity": "WARN"
|
6
8
|
},
|
7
9
|
"002": {
|
8
10
|
"message": "HTTP Response (203) is non-authoritative",
|
9
|
-
"linkout": "",
|
11
|
+
"linkout": [{"Documentation": "https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/203"}],
|
10
12
|
"severity": "WARN"
|
11
13
|
},
|
12
14
|
"003": {
|
13
15
|
"message": "HTTP Response indicates failure (500-range)",
|
14
|
-
"linkout": "",
|
16
|
+
"linkout": [{"Documentation": "https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/500"}],
|
15
17
|
"severity": "WARN"
|
16
18
|
},
|
17
19
|
"004": {
|
18
20
|
"message": "The resource does not follow the FAIR Signposting standard, which requires exactly one cite-as header, and at least one describedby header",
|
19
|
-
"linkout": "",
|
21
|
+
"linkout": [{"Documentation": "http://www.signposting.org/FAIR/#level1"}],
|
20
22
|
"severity": "WARN"
|
21
23
|
},
|
22
24
|
"005": {
|
23
25
|
"message": "The resource does not follow the FAIR Signposting standard, which requires any describedby links to also have a 'type' attribute indicating the Accept headers that should be sent with the request",
|
24
|
-
"linkout": "",
|
26
|
+
"linkout": [{"Documentation": "http://www.signposting.org/FAIR/#level1"}],
|
25
27
|
"severity": "WARN"
|
26
28
|
},
|
27
29
|
"006": {
|
28
30
|
"message": "GUID type not recognized",
|
29
|
-
"linkout": "",
|
31
|
+
"linkout": [{"FAIRsharing": "https://fairsharing.org/search?fairsharingRegistry=Standard&recordType=identifier_schema&page=1"}],
|
30
32
|
"severity": "WARN"
|
31
33
|
},
|
32
34
|
"007": {
|
33
35
|
"message": "Conflicting cite-as links",
|
34
|
-
"linkout": "",
|
36
|
+
"linkout": [{"Documentation": "http://www.signposting.org/FAIR/#level1"}],
|
35
37
|
"severity": "WARN"
|
36
38
|
},
|
37
39
|
"008": {
|
38
40
|
"message": "describedby link does not resolve",
|
39
|
-
"linkout": "",
|
41
|
+
"linkout": [{"Documentation": "http://www.signposting.org/FAIR/#level1"}],
|
40
42
|
"severity": "WARN"
|
41
43
|
},
|
42
44
|
"009": {
|
43
45
|
"message": "Content-type of described-by link does not match the type attribute in the link header itself",
|
44
|
-
"linkout": "",
|
46
|
+
"linkout": [{"Documentation": "http://www.signposting.org/FAIR/#level1"},
|
47
|
+
{"Documentation": "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type"}],
|
45
48
|
"severity": "WARN"
|
46
49
|
},
|
47
50
|
"010": {
|
48
51
|
"message": "Content-type of response from described-by link is undefined or cannot be compared to the link type",
|
49
|
-
"linkout": "",
|
52
|
+
"linkout": [{"Documentation": "http://www.signposting.org/FAIR/#level1"}],
|
50
53
|
"severity": "WARN"
|
51
54
|
},
|
52
55
|
"011": {
|
53
56
|
"message": "The resource does not follow the FAIR Signposting standard, which encourages any item links to have a type attribute",
|
54
|
-
"linkout": "",
|
57
|
+
"linkout": [{"Documentation": "http://www.signposting.org/FAIR/#level1"}],
|
55
58
|
"severity": "WARN"
|
56
59
|
},
|
57
60
|
"012": {
|
58
61
|
"message": "Content-type of response from resolving an item doesn't match the item type attribute in the link header",
|
59
|
-
"linkout": "",
|
62
|
+
"linkout": [{"Documentation": "http://www.signposting.org/FAIR/#level1"},
|
63
|
+
{"Documentation": "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type"}],
|
60
64
|
"severity": "WARN"
|
61
65
|
},
|
62
66
|
"013": {
|
63
67
|
"message": "Content-type of response from resolving an item is undefined or cannot be compared to the link type",
|
64
|
-
"linkout": "",
|
68
|
+
"linkout": [{"Documentation": "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type"}],
|
65
69
|
"severity": "WARN"
|
66
70
|
},
|
67
71
|
"014": {
|
68
72
|
"message": "Item link does not resolve",
|
69
|
-
"linkout": "",
|
73
|
+
"linkout": [{"Documentation": "http://www.signposting.org/FAIR/#level1"}],
|
70
74
|
"severity": "WARN"
|
71
75
|
},
|
72
76
|
"015": {
|
73
77
|
"message": "Link headers do not include a link of type 'type', as required by the FAIR Signposting specification",
|
74
|
-
"linkout": "",
|
78
|
+
"linkout": [{"Documentation": "http://www.signposting.org/FAIR/#level1"}],
|
75
79
|
"severity": "WARN"
|
76
80
|
},
|
77
81
|
"016": {
|
78
82
|
"message": "Unable to resolve describedby link using Accept headers with the MIME type indicated in the link",
|
79
|
-
"linkout": "",
|
83
|
+
"linkout": [{"Documentation": "https://developer.mozilla.org/en-US/docs/Web/HTTP/Content_negotiation"}],
|
80
84
|
"severity": "WARN"
|
81
85
|
},
|
82
86
|
"017": {
|
83
87
|
"message": "Metadata format not recognized.",
|
84
|
-
"linkout": "",
|
88
|
+
"linkout": [{"FAIRsharing": "https://fairsharing.org/search?subjects=Computer%2520Science,subject%2520agnostic&page=1&recordType=model_and_format"}],
|
85
89
|
"severity": "WARN"
|
86
90
|
},
|
87
91
|
"018": {
|
88
92
|
"message": "RDF parsing error - likely malformed RDF document.",
|
89
|
-
"linkout": "",
|
93
|
+
"linkout": [{"FAIRsharing": "https://fairsharing.org/FAIRsharing.p77ph9"},
|
94
|
+
{"Documentation": "http://www.w3.org/TR/2014/REC-rdf11-concepts-20140225/"},
|
95
|
+
{"Validator": "http://rdf.greggkellogg.net/distiller"}],
|
90
96
|
"severity": "WARN"
|
91
97
|
},
|
92
98
|
"019": {
|
93
99
|
"message": "HTML parsing error - unable to extract linked data from HTML.",
|
94
|
-
"linkout": "",
|
100
|
+
"linkout": [{"FAIRsharing": "https://fairsharing.org/FAIRsharing.YugnuL"},
|
101
|
+
{"Documentation": "https://www.w3.org/TR/html53/"},
|
102
|
+
{"validator": "https://validator.w3.org/"}],
|
95
103
|
"severity": "WARN"
|
96
104
|
},
|
97
105
|
"020": {
|
98
106
|
"message": "XML parsing error - unable to process XML document.",
|
99
|
-
"linkout": "",
|
107
|
+
"linkout": [{"FAIRsharing": "https://fairsharing.org/FAIRsharing.b5cc91"},
|
108
|
+
{"Documentation": "https://www.w3.org/TR/xml/"},
|
109
|
+
{"Validator": "https://www.xmlvalidation.com/"}],
|
100
110
|
"severity": "WARN"
|
101
111
|
},
|
102
112
|
"021": {
|
103
113
|
"message": "JSON parsing error - unable to process JSON document.",
|
104
|
-
"linkout": "",
|
114
|
+
"linkout": [{"FAIRsharing": "https://fairsharing.org/FAIRsharing.5bbab9"},
|
115
|
+
{"Documentation": "http://dx.doi.org/10.17487/RFC8259"},
|
116
|
+
{"Validator": "https://jsononline.net/json-validator"}],
|
105
117
|
"severity": "WARN"
|
106
118
|
}
|
107
119
|
}
|
data/lib/web_utils.rb
CHANGED
@@ -18,13 +18,13 @@ module HarvesterTools
|
|
18
18
|
warn "final URL #{response.request.url}"
|
19
19
|
warn "Response code #{response.code}"
|
20
20
|
if response.code == 203
|
21
|
-
meta.
|
21
|
+
meta.add_warning(["002", url, headers])
|
22
22
|
meta.comments << "WARN: Response is non-authoritative (HTTP response code: #{response.code}). Headers may have been manipulated encountered when trying to resolve #{url}\n"
|
23
23
|
end
|
24
24
|
response
|
25
25
|
rescue RestClient::ExceptionWithResponse => e
|
26
|
-
warn "EXCEPTION WITH RESPONSE! #{e.response}\
|
27
|
-
meta.
|
26
|
+
warn "EXCEPTION WITH RESPONSE! #{e.response.code} with response #{e.response}\nfailed response headers: #{e.response.headers}"
|
27
|
+
meta.add_warning(["003", url, headers])
|
28
28
|
meta.comments << "WARN: HTTP error #{e} encountered when trying to resolve #{url}\n"
|
29
29
|
if (e.response.code == 500 or e.response.code == 404)
|
30
30
|
return false
|
@@ -34,13 +34,13 @@ module HarvesterTools
|
|
34
34
|
# now we are returning the headers and body that were returned
|
35
35
|
rescue RestClient::Exception => e
|
36
36
|
warn "EXCEPTION WITH NO RESPONSE! #{e}"
|
37
|
-
meta.
|
37
|
+
meta.add_warning(["003", url, headers])
|
38
38
|
meta.comments << "WARN: HTTP error #{e} encountered when trying to resolve #{url}\n"
|
39
39
|
false
|
40
40
|
# now we are returning 'False', and we will check that with an \"if\" statement in our main code
|
41
41
|
rescue Exception => e
|
42
42
|
warn "EXCEPTION UNKNOWN! #{e}"
|
43
|
-
meta.
|
43
|
+
meta.add_warning(["003", url, headers])
|
44
44
|
meta.comments << "WARN: HTTP error #{e} encountered when trying to resolve #{url}\n"
|
45
45
|
false
|
46
46
|
# now we are returning 'False', and we will check that with an \"if\" statement in our main code
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fsp_harvester
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.15
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mark Wilkinson
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-08-
|
11
|
+
date: 2022-08-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: json
|
@@ -44,14 +44,14 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 0.1.
|
47
|
+
version: 0.1.17
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: 0.1.
|
54
|
+
version: 0.1.17
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: metainspector
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -190,12 +190,12 @@ files:
|
|
190
190
|
- lib/fsp_harvester.rb
|
191
191
|
- lib/fsp_harvester/version.rb
|
192
192
|
- lib/harvester.rb
|
193
|
+
- lib/harvester_brute.rb
|
193
194
|
- lib/harvester_utils.rb
|
194
195
|
- lib/metadata_harvester.rb
|
195
196
|
- lib/metadata_object.rb
|
196
197
|
- lib/metadata_parser.rb
|
197
198
|
- lib/signposting_tests.rb
|
198
|
-
- lib/swagger.rb
|
199
199
|
- lib/warnings.json
|
200
200
|
- lib/web_utils.rb
|
201
201
|
homepage: https://github.com/markwilkinson/FAIR-Signposting-Harvester
|
data/lib/swagger.rb
DELETED
@@ -1,184 +0,0 @@
|
|
1
|
-
class Swagger
|
2
|
-
attr_accessor :debug, :title, :tests_metric, :description, :applies_to_principle, :organization, :org_url,
|
3
|
-
:responsible_developer, :email, :developer_ORCiD, :protocol, :host, :basePath, :path,
|
4
|
-
:response_description, :schemas, :comments, :fairsharing_key_location, :score, :testedGUID
|
5
|
-
|
6
|
-
def initialize(params = {})
|
7
|
-
@debug = params.fetch(:debug, false)
|
8
|
-
|
9
|
-
@title = params.fetch(:title, 'unnamed')
|
10
|
-
@tests_metric = params.fetch(:tests_metric)
|
11
|
-
@description = params.fetch(:description, 'default_description')
|
12
|
-
@applies_to_principle = params.fetch(:applies_to_principle, 'some principle')
|
13
|
-
@version = params.fetch(:version, '0.1')
|
14
|
-
@organization = params.fetch(:organization, 'Some Organization')
|
15
|
-
@org_url = params.fetch(:org_url)
|
16
|
-
@responsible_develper = params.fetch(:responsible_developer, 'Some Person')
|
17
|
-
@email = params.fetch(:email)
|
18
|
-
@developer_ORCiD = params.fetch(:developer_ORCiD)
|
19
|
-
@host = params.fetch(:host)
|
20
|
-
@protocol = params.fetch(:protocol, 'https')
|
21
|
-
@basePath = params.fetch(:basePath)
|
22
|
-
@path = params.fetch(:path)
|
23
|
-
@response_description = params.fetch(:response_description)
|
24
|
-
@schemas = params.fetch(:schemas, [])
|
25
|
-
@comments = params.fetch(:comments, [])
|
26
|
-
@fairsharing_key_location = params.fetch(:fairsharing_key_location)
|
27
|
-
@score = params.fetch(:score, 0)
|
28
|
-
@testedGUID = params.fetch(:testedGUID, '')
|
29
|
-
end
|
30
|
-
|
31
|
-
def fairsharing_key
|
32
|
-
@fairsharing_key_location
|
33
|
-
end
|
34
|
-
|
35
|
-
def getSwagger
|
36
|
-
message = <<"EOF_EOF"
|
37
|
-
swagger: '2.0'
|
38
|
-
info:
|
39
|
-
version: '#{@version}'
|
40
|
-
title: "#{@title}"
|
41
|
-
x-tests_metric: '#{@tests_metric}'
|
42
|
-
description: >-
|
43
|
-
#{@description}
|
44
|
-
x-applies_to_principle: "#{@applies_to_principle}"
|
45
|
-
contact:
|
46
|
-
x-organization: "#{@organization}"
|
47
|
-
url: "#{@org_url}"
|
48
|
-
name: '#{@responsible_develper}'
|
49
|
-
x-role: "responsible developer"
|
50
|
-
email: #{@email}
|
51
|
-
x-id: '#{developer_ORCiD}'
|
52
|
-
host: #{@host}
|
53
|
-
basePath: #{@basePath}
|
54
|
-
schemes:
|
55
|
-
- #{@protocol}
|
56
|
-
paths:
|
57
|
-
#{@path}:
|
58
|
-
post:
|
59
|
-
parameters:
|
60
|
-
- name: content
|
61
|
-
in: body
|
62
|
-
required: true
|
63
|
-
schema:
|
64
|
-
$ref: '#/definitions/schemas'
|
65
|
-
consumes:
|
66
|
-
- application/json
|
67
|
-
produces:#{' '}
|
68
|
-
- application/json
|
69
|
-
responses:
|
70
|
-
"200":
|
71
|
-
description: >-
|
72
|
-
#{@response_description}
|
73
|
-
definitions:
|
74
|
-
schemas:
|
75
|
-
required:
|
76
|
-
EOF_EOF
|
77
|
-
|
78
|
-
schemas.keys.each do |key|
|
79
|
-
message += " - #{key}\n"
|
80
|
-
end
|
81
|
-
message += " properties:\n"
|
82
|
-
schemas.keys.each do |key|
|
83
|
-
message += " #{key}:\n"
|
84
|
-
message += " type: #{schemas[key][0]}\n"
|
85
|
-
message += " description: >-\n"
|
86
|
-
message += " #{schemas[key][1]}\n"
|
87
|
-
end
|
88
|
-
|
89
|
-
message
|
90
|
-
end
|
91
|
-
|
92
|
-
# A utility function that SHOULD NOT BE CALLED EXTERNALLY
|
93
|
-
#
|
94
|
-
# @param s - subject node
|
95
|
-
# @param p - predicate node
|
96
|
-
# @param o - object node
|
97
|
-
# @param repo - an RDF::Graph object
|
98
|
-
def triplify(s, p, o, repo)
|
99
|
-
s = s.strip if s.instance_of?(String)
|
100
|
-
p = p.strip if p.instance_of?(String)
|
101
|
-
o = o.strip if o.instance_of?(String)
|
102
|
-
|
103
|
-
unless s.respond_to?('uri')
|
104
|
-
|
105
|
-
if s.to_s =~ %r{^\w+:/?/?[^\s]+}
|
106
|
-
s = RDF::URI.new(s.to_s)
|
107
|
-
else
|
108
|
-
debug and warn "Subject #{s} must be a URI-compatible thingy"
|
109
|
-
abort "Subject #{s} must be a URI-compatible thingy"
|
110
|
-
end
|
111
|
-
end
|
112
|
-
|
113
|
-
unless p.respond_to?('uri')
|
114
|
-
|
115
|
-
if p.to_s =~ %r{^\w+:/?/?[^\s]+}
|
116
|
-
p = RDF::URI.new(p.to_s)
|
117
|
-
else
|
118
|
-
debug and warn "Predicate #{p} must be a URI-compatible thingy"
|
119
|
-
abort "Predicate #{p} must be a URI-compatible thingy"
|
120
|
-
end
|
121
|
-
end
|
122
|
-
|
123
|
-
unless o.respond_to?('uri')
|
124
|
-
o = if o.to_s =~ %r{\A\w+:/?/?\w[^\s]+}
|
125
|
-
RDF::URI.new(o.to_s)
|
126
|
-
elsif o.to_s =~ /^\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d/
|
127
|
-
RDF::Literal.new(o.to_s, datatype: RDF::XSD.date)
|
128
|
-
elsif o.to_s =~ /^[+-]?\d+\.\d+/
|
129
|
-
RDF::Literal.new(o.to_s, datatype: RDF::XSD.float)
|
130
|
-
elsif o.to_s =~ /^[+-]?[0-9]+$/
|
131
|
-
RDF::Literal.new(o.to_s, datatype: RDF::XSD.int)
|
132
|
-
else
|
133
|
-
RDF::Literal.new(o.to_s, language: :en)
|
134
|
-
end
|
135
|
-
end
|
136
|
-
|
137
|
-
debug and warn("\n\ninserting #{s} #{p} #{o}\n\n")
|
138
|
-
triple = RDF::Statement(s, p, o)
|
139
|
-
repo.insert(triple)
|
140
|
-
|
141
|
-
true
|
142
|
-
end
|
143
|
-
|
144
|
-
# A utility function that SHOULD NOT BE CALLED EXTERNALLY
|
145
|
-
#
|
146
|
-
# @param s - subject node
|
147
|
-
# @param p - predicate node
|
148
|
-
# @param o - object node
|
149
|
-
# @param repo - an RDF::Graph object
|
150
|
-
def self.triplify(s, p, o, repo)
|
151
|
-
triplify(s, p, o, repo)
|
152
|
-
end
|
153
|
-
|
154
|
-
def addComment(newcomment)
|
155
|
-
comments << newcomment.to_s
|
156
|
-
# return self.comments
|
157
|
-
end
|
158
|
-
|
159
|
-
def createEvaluationResponse
|
160
|
-
g = RDF::Graph.new
|
161
|
-
|
162
|
-
dt = Time.now.iso8601
|
163
|
-
uri = testedGUID
|
164
|
-
|
165
|
-
me = protocol + '://' + host + '/' + basePath + path
|
166
|
-
|
167
|
-
meURI = "#{me}##{uri}/result-#{dt}"
|
168
|
-
meURI = Addressable::URI.escape(meURI)
|
169
|
-
|
170
|
-
triplify(meURI, 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type',
|
171
|
-
'http://fairmetrics.org/resources/metric_evaluation_result', g)
|
172
|
-
triplify(meURI, 'http://semanticscience.org/resource/SIO_000300', score, g)
|
173
|
-
triplify(meURI, 'http://purl.obolibrary.org/obo/date', dt, g)
|
174
|
-
triplify(meURI, 'http://schema.org/softwareVersion', VERSION, g)
|
175
|
-
triplify(meURI, 'http://semanticscience.org/resource/SIO_000332', uri, g)
|
176
|
-
|
177
|
-
comments = 'no comments received. '
|
178
|
-
|
179
|
-
comments = self.comments.join("\n") if self.comments.size > 0
|
180
|
-
triplify(meURI, 'http://schema.org/comment', comments, g)
|
181
|
-
|
182
|
-
g.dump(:jsonld)
|
183
|
-
end
|
184
|
-
end
|