fsp_harvester 0.1.11 → 0.1.12
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rspec_status +53 -53
- data/Gemfile.lock +1 -1
- data/lib/config.conf +8 -0
- data/lib/{fsp_metadata_external_tools.rb → external_tools.rb} +17 -15
- data/lib/fsp_harvester/version.rb +1 -1
- data/lib/fsp_harvester.rb +8 -106
- data/lib/harvester.rb +27 -0
- data/lib/harvester_utils.rb +75 -0
- data/lib/{fsp_metadata_harvester.rb → metadata_harvester.rb} +8 -8
- data/lib/metadata_object.rb +1 -1
- data/lib/{fsp_metadata_parser.rb → metadata_parser.rb} +28 -13
- data/lib/signposting_tests.rb +9 -6
- data/lib/swagger.rb +137 -177
- data/lib/web_utils.rb +2 -2
- metadata +8 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b38eea15fa26a3fe07290024342f8b6121dbd78c3cd2dd3496ca118fca22f6d4
|
4
|
+
data.tar.gz: a25ea37ecd78b2ef8dc41dca391c161ba4b262d910dbadcf34f07f0cd8e54af5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 83b766f2896a0776ed75ab3fd1e235d2c80173d3ffc0c22aea80b234856392497daf312db36461f03d0ca168228e6e385edf6d789b42dc4b43fcd6a073cda234
|
7
|
+
data.tar.gz: 16d74c199a138db0225e88c0e092a5272a518785c397d27da74c511a69260444e6c1c440021e08413c131d672cd2d528e8d19dbd8a20ca1bff97dad38600c995
|
data/.rspec_status
CHANGED
@@ -1,55 +1,55 @@
|
|
1
1
|
example_id | status | run_time |
|
2
2
|
---------------------------------- | ------ | --------------- |
|
3
|
-
./spec/cite-as_spec.rb[1:1:1] | passed | 1.
|
4
|
-
./spec/cite-as_spec.rb[1:1:2] | passed | 1.
|
5
|
-
./spec/cite-as_spec.rb[1:1:3] | passed | 1.
|
6
|
-
./spec/cite-as_spec.rb[1:1:4] | passed | 1.
|
7
|
-
./spec/cite-as_spec.rb[1:1:5] | passed | 2.
|
8
|
-
./spec/cite-as_spec.rb[1:1:6] | passed | 2.
|
9
|
-
./spec/cite-as_spec.rb[1:1:7] | passed |
|
10
|
-
./spec/cite-as_spec.rb[1:1:8] | passed | 2.
|
11
|
-
./spec/cite-as_spec.rb[1:1:9] | passed | 2.
|
12
|
-
./spec/cite-as_spec.rb[1:1:10] | passed | 2.
|
13
|
-
./spec/cite-as_spec.rb[1:1:11] | passed | 3.
|
14
|
-
./spec/cite-as_spec.rb[1:1:12] | passed | 2.
|
15
|
-
./spec/cite-as_spec.rb[1:1:13] | passed | 2.
|
16
|
-
./spec/cite-as_spec.rb[1:1:14] | passed | 2.
|
17
|
-
./spec/cite-as_spec.rb[1:1:15] | passed | 1.
|
18
|
-
./spec/cite-as_spec.rb[1:1:16] | passed | 1.
|
19
|
-
./spec/cite-as_spec.rb[1:1:17] | passed | 1.
|
20
|
-
./spec/cite-as_spec.rb[1:1:18] | passed | 1.
|
21
|
-
./spec/cite-as_spec.rb[1:1:19] | passed | 1.
|
22
|
-
./spec/cite-as_spec.rb[1:1:20] | passed | 1.
|
23
|
-
./spec/cite-as_spec.rb[1:1:21] | passed | 2.
|
24
|
-
./spec/cite-as_spec.rb[1:1:22] | passed | 1.
|
25
|
-
./spec/cite-as_spec.rb[1:1:23] | passed | 1.
|
26
|
-
./spec/cite-as_spec.rb[1:1:24] | failed | 1.
|
27
|
-
./spec/cite-as_spec.rb[1:1:25] | passed | 0.
|
28
|
-
./spec/describedby_spec.rb[1:1:1] | passed | 2.
|
29
|
-
./spec/describedby_spec.rb[1:1:2] | passed |
|
30
|
-
./spec/describedby_spec.rb[1:1:3] | passed |
|
31
|
-
./spec/describedby_spec.rb[1:1:4] | passed |
|
32
|
-
./spec/describedby_spec.rb[1:1:5] | passed | 1
|
33
|
-
./spec/describedby_spec.rb[1:1:6] | passed |
|
34
|
-
./spec/describedby_spec.rb[1:1:7] | passed | 0.
|
35
|
-
./spec/describedby_spec.rb[1:1:8] | passed |
|
36
|
-
./spec/describedby_spec.rb[1:1:9] | passed | 1.
|
37
|
-
./spec/describedby_spec.rb[1:1:10] | passed |
|
38
|
-
./spec/describedby_spec.rb[1:1:11] | passed |
|
39
|
-
./spec/describedby_spec.rb[1:1:12] | passed |
|
40
|
-
./spec/describedby_spec.rb[1:1:13] | passed | 1.
|
41
|
-
./spec/describedby_spec.rb[1:1:14] | passed |
|
42
|
-
./spec/describedby_spec.rb[1:1:15] | passed |
|
43
|
-
./spec/fsp_harvester_spec.rb[1:1] | passed | 0.
|
44
|
-
./spec/fsp_harvester_spec.rb[1:2] | failed |
|
45
|
-
./spec/item_spec.rb[1:1:1] | passed |
|
46
|
-
./spec/item_spec.rb[1:1:2] | passed | 2 seconds
|
47
|
-
./spec/item_spec.rb[1:1:3] | passed |
|
48
|
-
./spec/item_spec.rb[1:1:4] | passed | 1.
|
49
|
-
./spec/item_spec.rb[1:1:5] | passed |
|
50
|
-
./spec/item_spec.rb[1:1:6] | passed |
|
51
|
-
./spec/item_spec.rb[1:1:7] | passed |
|
52
|
-
./spec/item_spec.rb[1:1:8] | passed | 0.
|
53
|
-
./spec/type_spec.rb[1:1:1] | passed |
|
54
|
-
./spec/type_spec.rb[1:1:2] | passed |
|
55
|
-
./spec/type_spec.rb[1:1:3] | passed |
|
3
|
+
./spec/cite-as_spec.rb[1:1:1] | passed | 1.3 seconds |
|
4
|
+
./spec/cite-as_spec.rb[1:1:2] | passed | 1.21 seconds |
|
5
|
+
./spec/cite-as_spec.rb[1:1:3] | passed | 1.09 seconds |
|
6
|
+
./spec/cite-as_spec.rb[1:1:4] | passed | 1.69 seconds |
|
7
|
+
./spec/cite-as_spec.rb[1:1:5] | passed | 2.72 seconds |
|
8
|
+
./spec/cite-as_spec.rb[1:1:6] | passed | 2.3 seconds |
|
9
|
+
./spec/cite-as_spec.rb[1:1:7] | passed | 3.36 seconds |
|
10
|
+
./spec/cite-as_spec.rb[1:1:8] | passed | 2.26 seconds |
|
11
|
+
./spec/cite-as_spec.rb[1:1:9] | passed | 2.82 seconds |
|
12
|
+
./spec/cite-as_spec.rb[1:1:10] | passed | 2.3 seconds |
|
13
|
+
./spec/cite-as_spec.rb[1:1:11] | passed | 3.37 seconds |
|
14
|
+
./spec/cite-as_spec.rb[1:1:12] | passed | 2.2 seconds |
|
15
|
+
./spec/cite-as_spec.rb[1:1:13] | passed | 2.94 seconds |
|
16
|
+
./spec/cite-as_spec.rb[1:1:14] | passed | 2.44 seconds |
|
17
|
+
./spec/cite-as_spec.rb[1:1:15] | passed | 1.54 seconds |
|
18
|
+
./spec/cite-as_spec.rb[1:1:16] | passed | 1.29 seconds |
|
19
|
+
./spec/cite-as_spec.rb[1:1:17] | passed | 1.25 seconds |
|
20
|
+
./spec/cite-as_spec.rb[1:1:18] | passed | 1.15 seconds |
|
21
|
+
./spec/cite-as_spec.rb[1:1:19] | passed | 1.7 seconds |
|
22
|
+
./spec/cite-as_spec.rb[1:1:20] | passed | 1.66 seconds |
|
23
|
+
./spec/cite-as_spec.rb[1:1:21] | passed | 2.41 seconds |
|
24
|
+
./spec/cite-as_spec.rb[1:1:22] | passed | 1.64 seconds |
|
25
|
+
./spec/cite-as_spec.rb[1:1:23] | passed | 1.35 seconds |
|
26
|
+
./spec/cite-as_spec.rb[1:1:24] | failed | 1.25 seconds |
|
27
|
+
./spec/cite-as_spec.rb[1:1:25] | passed | 0.51152 seconds |
|
28
|
+
./spec/describedby_spec.rb[1:1:1] | passed | 2.71 seconds |
|
29
|
+
./spec/describedby_spec.rb[1:1:2] | passed | 1.25 seconds |
|
30
|
+
./spec/describedby_spec.rb[1:1:3] | passed | 1.22 seconds |
|
31
|
+
./spec/describedby_spec.rb[1:1:4] | passed | 1.22 seconds |
|
32
|
+
./spec/describedby_spec.rb[1:1:5] | passed | 1.21 seconds |
|
33
|
+
./spec/describedby_spec.rb[1:1:6] | passed | 1.02 seconds |
|
34
|
+
./spec/describedby_spec.rb[1:1:7] | passed | 0.99175 seconds |
|
35
|
+
./spec/describedby_spec.rb[1:1:8] | passed | 2.44 seconds |
|
36
|
+
./spec/describedby_spec.rb[1:1:9] | passed | 1.72 seconds |
|
37
|
+
./spec/describedby_spec.rb[1:1:10] | passed | 2.15 seconds |
|
38
|
+
./spec/describedby_spec.rb[1:1:11] | passed | 3.17 seconds |
|
39
|
+
./spec/describedby_spec.rb[1:1:12] | passed | 3.1 seconds |
|
40
|
+
./spec/describedby_spec.rb[1:1:13] | passed | 1.7 seconds |
|
41
|
+
./spec/describedby_spec.rb[1:1:14] | passed | 2.21 seconds |
|
42
|
+
./spec/describedby_spec.rb[1:1:15] | passed | 2.18 seconds |
|
43
|
+
./spec/fsp_harvester_spec.rb[1:1] | passed | 0.00058 seconds |
|
44
|
+
./spec/fsp_harvester_spec.rb[1:2] | failed | 2.92 seconds |
|
45
|
+
./spec/item_spec.rb[1:1:1] | passed | 3.09 seconds |
|
46
|
+
./spec/item_spec.rb[1:1:2] | passed | 2.92 seconds |
|
47
|
+
./spec/item_spec.rb[1:1:3] | passed | 1.12 seconds |
|
48
|
+
./spec/item_spec.rb[1:1:4] | passed | 1.7 seconds |
|
49
|
+
./spec/item_spec.rb[1:1:5] | passed | 2.24 seconds |
|
50
|
+
./spec/item_spec.rb[1:1:6] | passed | 2.87 seconds |
|
51
|
+
./spec/item_spec.rb[1:1:7] | passed | 3.03 seconds |
|
52
|
+
./spec/item_spec.rb[1:1:8] | passed | 0.52338 seconds |
|
53
|
+
./spec/type_spec.rb[1:1:1] | passed | 1.42 seconds |
|
54
|
+
./spec/type_spec.rb[1:1:2] | passed | 1.28 seconds |
|
55
|
+
./spec/type_spec.rb[1:1:3] | passed | 1.52 seconds |
|
data/Gemfile.lock
CHANGED
data/lib/config.conf
ADDED
@@ -1,12 +1,12 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
module
|
3
|
+
module HarvesterTools
|
4
4
|
class Error < StandardError
|
5
5
|
end
|
6
6
|
|
7
7
|
class ExternalTools
|
8
8
|
|
9
|
-
def initialize(metadata:
|
9
|
+
def initialize(metadata: HarvesterTools::MetadataObject.new)
|
10
10
|
@meta = metadata
|
11
11
|
end
|
12
12
|
|
@@ -25,10 +25,7 @@ module FspHarvester
|
|
25
25
|
file.rewind
|
26
26
|
|
27
27
|
@meta.comments << "INFO: The message body is being examined by Distiller\n"
|
28
|
-
|
29
|
-
command = "LANG=en_US.UTF-8 #{Utils::RDFCommand} serialize --input-format rdfa --output-format jsonld #{file.path}"
|
30
|
-
# command = "LANG=en_US.UTF-8 /usr/local/bin/ruby #{@rdf_command} serialize --input-format rdfa --output-format jsonld #{file.path}"
|
31
|
-
# command = "LANG=en_US.UTF-8 /home/osboxes/.rvm/rubies/ruby-2.6.3/bin/ruby /home/osboxes/.rvm/gems/ruby-2.6.3/bin/rdf serialize --output-format jsonld #{file.path}"
|
28
|
+
command = "LANG=en_US.UTF-8 #{RDFCommand} serialize --input-format rdfa --output-format jsonld #{file.path}"
|
32
29
|
warn "distiller command: #{command}"
|
33
30
|
result, _stderr, _status = Open3.capture3(command)
|
34
31
|
warn ''
|
@@ -41,12 +38,13 @@ module FspHarvester
|
|
41
38
|
if result !~ /@context/i # failure returns nil
|
42
39
|
@meta.comments << "WARN: The Distiller tool failed to find parseable data in the body, perhaps due to incorrectly formatted HTML..\n"
|
43
40
|
@meta.add_warning(['018', '', ''])
|
41
|
+
result = "{}"
|
44
42
|
else
|
45
43
|
@meta.comments << "INFO: The Distiller found parseable data. Parsing as JSON-LD\n"
|
46
|
-
parse_rdf(result: result, content_type: "application/ld+json")
|
47
44
|
end
|
48
45
|
@@distillerknown[bhash] = true
|
49
46
|
end
|
47
|
+
result
|
50
48
|
end
|
51
49
|
|
52
50
|
def processs_with_extruct(uri:)
|
@@ -55,6 +53,11 @@ module FspHarvester
|
|
55
53
|
stdout, stderr, status = Open3.capture3(EXTRUCT_COMMAND + ' ' + uri)
|
56
54
|
warn "open3 status: #{status} #{stdout}"
|
57
55
|
result = stderr # absurd that the output comes over stderr! LOL!
|
56
|
+
jsonld = {}
|
57
|
+
microdata = Hash.new
|
58
|
+
microformat = Hash.new
|
59
|
+
opengraph = Hash.new
|
60
|
+
rdfa = Hash.new
|
58
61
|
|
59
62
|
if result.to_s.match(/(Failed\sto\sextract.*?)\n/)
|
60
63
|
@meta.comments << "WARN: extruct threw an error #{Regexp.last_match(1)} when attempting to parse return value (message body) of #{uri}.\n"
|
@@ -66,17 +69,16 @@ module FspHarvester
|
|
66
69
|
elsif result.to_s.match(/^\s+?\{/) or result.to_s.match(/^\s+\[/) # this is JSON
|
67
70
|
json = JSON.parse result
|
68
71
|
@meta.comments << "INFO: the extruct tool found parseable data at #{uri}\n"
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
@meta.merge_hash(json.first) if json.first.is_a? Hash
|
72
|
+
jsonld = json['json-ld'].to_json if json['json-ld'].any?
|
73
|
+
microdata = json['microdata'].first if json['microdata'].any
|
74
|
+
microformat = json['microformat'].first if json['microformat'].any?
|
75
|
+
opengraph = json['opengraph'].first if json['opengraph'].any?
|
76
|
+
rdfa = json['rdfa'].to_json if json['rdfa'].any?
|
77
|
+
# @meta.merge_hash(json.first) if json.first.is_a? Hash
|
77
78
|
else
|
78
79
|
@meta.comments << "WARN: the extruct tool failed to find parseable data at #{uri}\n"
|
79
80
|
end
|
81
|
+
[jsonld, microdata, microformat, opengraph, rdfa]
|
80
82
|
end
|
81
83
|
end
|
82
84
|
end
|
data/lib/fsp_harvester.rb
CHANGED
@@ -1,121 +1,23 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require_relative 'fsp_harvester/version'
|
4
|
-
require 'json/ld'
|
5
|
-
require 'json/ld/preloaded'
|
6
|
-
require 'json'
|
7
|
-
require 'linkheaders/processor'
|
8
|
-
require 'addressable'
|
9
|
-
require 'tempfile'
|
10
|
-
require 'xmlsimple'
|
11
|
-
require 'nokogiri'
|
12
|
-
require 'parseconfig'
|
13
|
-
require 'rest-client'
|
14
|
-
require 'cgi'
|
15
|
-
require 'digest'
|
16
|
-
require 'open3'
|
17
|
-
require 'metainspector'
|
18
|
-
require 'rdf/xsd'
|
19
|
-
require_relative './metadata_object'
|
20
|
-
require_relative './constants'
|
21
|
-
require_relative './web_utils'
|
22
|
-
require_relative './signposting_tests'
|
23
|
-
require_relative './fsp_metadata_harvester'
|
24
|
-
require_relative './fsp_metadata_parser'
|
25
|
-
|
26
1
|
|
2
|
+
require_relative 'harvester'
|
27
3
|
module FspHarvester
|
28
4
|
class Error < StandardError
|
29
5
|
end
|
30
6
|
|
31
7
|
class Utils
|
32
|
-
# @@distillerknown = {} # global, hash of sha256 keys of message bodies - have they been seen before t/f
|
33
|
-
# @warnings = JSON.parse(File.read("warnings.json"))
|
34
|
-
|
35
|
-
|
36
|
-
def self.resolve_guid(guid:)
|
37
|
-
@meta = FspHarvester::MetadataObject.new
|
38
|
-
@meta.all_uris = [guid]
|
39
|
-
type, url = convertToURL(guid: guid)
|
40
|
-
links = Array.new
|
41
|
-
if type
|
42
|
-
links = resolve_url(url: url)
|
43
|
-
@meta.links << links
|
44
|
-
else
|
45
|
-
@meta.add_warning(['006', guid, ''])
|
46
|
-
@meta.comments << "FATAL: GUID type not recognized.\n"
|
47
|
-
end
|
48
|
-
[links, @meta]
|
49
|
-
end
|
50
8
|
|
51
|
-
def self.gather_metadata_from_describedby_links(links: [], metadata:
|
9
|
+
def self.gather_metadata_from_describedby_links(links: [], metadata: HarvesterTools::MetadataObject.new) # meta should have already been created by resolve+guid, but maybe not
|
52
10
|
@meta = metadata
|
53
11
|
db = []
|
54
12
|
links.each do |l|
|
55
13
|
db << l if l.relation == 'describedby'
|
56
14
|
end
|
57
|
-
|
15
|
+
HarvesterTools::MetadataHarvester.extract_metadata(links: db, metadata: @meta) # everything is gathered into the @meta metadata object
|
58
16
|
@meta
|
59
17
|
end
|
60
18
|
|
61
|
-
def self.
|
62
|
-
|
63
|
-
if k == 'inchi' and regex.match(guid)
|
64
|
-
return 'inchi', "https://pubchem.ncbi.nlm.nih.gov/rest/rdf/inchikey/#{guid}"
|
65
|
-
elsif k == 'handle1' and regex.match(guid)
|
66
|
-
return 'handle', "http://hdl.handle.net/#{guid}"
|
67
|
-
elsif k == 'handle2' and regex.match(guid)
|
68
|
-
return 'handle', "http://hdl.handle.net/#{guid}"
|
69
|
-
elsif k == 'uri' and regex.match(guid)
|
70
|
-
return 'uri', guid
|
71
|
-
elsif k == 'doi' and regex.match(guid)
|
72
|
-
return 'doi', "https://doi.org/#{guid}"
|
73
|
-
end
|
74
|
-
end
|
75
|
-
[nil, nil]
|
76
|
-
end
|
77
|
-
|
78
|
-
def self.typeit(guid:)
|
79
|
-
Utils::GUID_TYPES.each do |type, regex|
|
80
|
-
return type if regex.match(guid)
|
81
|
-
end
|
82
|
-
false
|
83
|
-
end
|
84
|
-
|
85
|
-
def self.resolve_url(url:, method: :get, nolinkheaders: false, header: ACCEPT_STAR_HEADER)
|
86
|
-
@meta.guidtype = 'uri' if @meta.guidtype.nil?
|
87
|
-
warn "\n\n FETCHING #{url} #{header}\n\n"
|
88
|
-
response = FspHarvester::WebUtils.fspfetch(url: url, headers: header, method: method, meta: @meta)
|
89
|
-
warn "\n\n head #{response.headers.inspect}\n\n" if response
|
90
|
-
|
91
|
-
unless response
|
92
|
-
@meta.add_warning(['001', url, header])
|
93
|
-
@meta.comments << "WARN: Unable to resolve #{url} using HTTP Accept header #{header}.\n"
|
94
|
-
return []
|
95
|
-
end
|
96
|
-
|
97
|
-
@meta.comments << "INFO: following redirection using this header led to the following URL: #{@meta.all_uris.last}. Using the output from this URL for the next few tests..."
|
98
|
-
@meta.full_response << response.body
|
99
|
-
|
100
|
-
links = process_link_headers(response: response) unless nolinkheaders
|
101
|
-
links
|
102
|
-
end
|
103
|
-
|
104
|
-
def self.process_link_headers(response:)
|
105
|
-
warn "\n\n parsing #{response.headers}\n\n"
|
106
|
-
|
107
|
-
parser = LinkHeaders::Processor.new(default_anchor: @meta.all_uris.last)
|
108
|
-
parser.extract_and_parse(response: response)
|
109
|
-
factory = parser.factory # LinkHeaders::LinkFactory
|
110
|
-
|
111
|
-
warn "\n\n length bfore #{factory.all_links.length}\n\n"
|
112
|
-
signpostingcheck(factory: factory)
|
113
|
-
warn "\n\n length aftr #{factory.all_links.length}\n\n"
|
114
|
-
warn "\n\n links #{factory.all_links}\n\n"
|
115
|
-
factory.all_links
|
116
|
-
end
|
117
|
-
|
118
|
-
def self.signpostingcheck(factory:)
|
19
|
+
def self.signpostingcheck(factory:, metadata: HarvesterTools::MetadataObject.new)
|
20
|
+
@meta = metadata
|
119
21
|
citeas = Array.new
|
120
22
|
describedby = Array.new
|
121
23
|
item = Array.new
|
@@ -134,13 +36,13 @@ module FspHarvester
|
|
134
36
|
end
|
135
37
|
end
|
136
38
|
|
137
|
-
check_describedby_rules(describedby: describedby)
|
138
|
-
check_item_rules(item: item)
|
39
|
+
check_describedby_rules(describedby: describedby, metadata: @meta)
|
40
|
+
check_item_rules(item: item, metadata: @meta)
|
139
41
|
|
140
42
|
if citeas.length > 1
|
141
43
|
warn "INFO: multiple cite-as links found. Checking for conflicts\n"
|
142
44
|
@meta.comments << "INFO: multiple cite-as links found. Checking for conflicts\n"
|
143
|
-
citeas = check_for_citeas_conflicts(citeas: citeas) # this adds to the metadata objects if there are conflicts, returns the list of unique citeas (SHOULD ONLY BE ONE!)
|
45
|
+
citeas = check_for_citeas_conflicts(citeas: citeas, metadata: @meta) # this adds to the metadata objects if there are conflicts, returns the list of unique citeas (SHOULD ONLY BE ONE!)
|
144
46
|
end
|
145
47
|
|
146
48
|
unless citeas.length == 1 && describedby.length > 0
|
data/lib/harvester.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#require_relative 'fsp_harvester/version'
|
4
|
+
require 'json/ld'
|
5
|
+
require 'json/ld/preloaded'
|
6
|
+
require 'json'
|
7
|
+
require 'linkheaders/processor'
|
8
|
+
require 'addressable'
|
9
|
+
require 'tempfile'
|
10
|
+
require 'xmlsimple'
|
11
|
+
require 'nokogiri'
|
12
|
+
require 'parseconfig'
|
13
|
+
require 'rest-client'
|
14
|
+
require 'cgi'
|
15
|
+
require 'digest'
|
16
|
+
require 'open3'
|
17
|
+
require 'metainspector'
|
18
|
+
require 'rdf/xsd'
|
19
|
+
require_relative './metadata_object'
|
20
|
+
require_relative './constants'
|
21
|
+
require_relative './web_utils'
|
22
|
+
require_relative './signposting_tests'
|
23
|
+
require_relative './metadata_harvester'
|
24
|
+
require_relative './fsp_harvester'
|
25
|
+
require_relative './harvester_utils'
|
26
|
+
require_relative './external_tools'
|
27
|
+
require_relative './metadata_parser'
|
@@ -0,0 +1,75 @@
|
|
1
|
+
module HarvesterTools
|
2
|
+
class Error < StandardError
|
3
|
+
end
|
4
|
+
|
5
|
+
class Utils
|
6
|
+
|
7
|
+
def self.resolve_guid(guid:)
|
8
|
+
@meta = HarvesterTools::MetadataObject.new
|
9
|
+
@meta.all_uris = [guid]
|
10
|
+
type, url = convertToURL(guid: guid)
|
11
|
+
links = Array.new
|
12
|
+
if type
|
13
|
+
links = resolve_url(url: url)
|
14
|
+
@meta.links = @meta.links | links
|
15
|
+
else
|
16
|
+
@meta.add_warning(['006', guid, ''])
|
17
|
+
@meta.comments << "FATAL: GUID type not recognized.\n"
|
18
|
+
end
|
19
|
+
[links, @meta]
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.convertToURL(guid:)
|
23
|
+
GUID_TYPES.each do |k, regex|
|
24
|
+
if k == 'inchi' and regex.match(guid)
|
25
|
+
return 'inchi', "https://pubchem.ncbi.nlm.nih.gov/rest/rdf/inchikey/#{guid}"
|
26
|
+
elsif k == 'handle1' and regex.match(guid)
|
27
|
+
return 'handle', "http://hdl.handle.net/#{guid}"
|
28
|
+
elsif k == 'handle2' and regex.match(guid)
|
29
|
+
return 'handle', "http://hdl.handle.net/#{guid}"
|
30
|
+
elsif k == 'uri' and regex.match(guid)
|
31
|
+
return 'uri', guid
|
32
|
+
elsif k == 'doi' and regex.match(guid)
|
33
|
+
return 'doi', "https://doi.org/#{guid}"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
[nil, nil]
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.typeit(guid:)
|
40
|
+
GUID_TYPES.each do |type, regex|
|
41
|
+
return type if regex.match(guid)
|
42
|
+
end
|
43
|
+
false
|
44
|
+
end
|
45
|
+
|
46
|
+
def self.resolve_url(url:, method: :get, nolinkheaders: false, header: ACCEPT_STAR_HEADER)
|
47
|
+
@meta.guidtype = 'uri' if @meta.guidtype.nil?
|
48
|
+
warn "\n\n FETCHING #{url} #{header}\n\n"
|
49
|
+
response = HarvesterTools::WebUtils.fspfetch(url: url, headers: header, method: method, meta: @meta)
|
50
|
+
warn "\n\n head #{response.headers.inspect}\n\n" if response
|
51
|
+
|
52
|
+
unless response
|
53
|
+
@meta.add_warning(['001', url, header])
|
54
|
+
@meta.comments << "WARN: Unable to resolve #{url} using HTTP Accept header #{header}.\n"
|
55
|
+
return []
|
56
|
+
end
|
57
|
+
|
58
|
+
@meta.comments << "INFO: following redirection using this header led to the following URL: #{@meta.all_uris.last}. Using the output from this URL for the next few tests..."
|
59
|
+
@meta.full_response << response.body
|
60
|
+
|
61
|
+
links = process_link_headers(response: response) unless nolinkheaders
|
62
|
+
links
|
63
|
+
end
|
64
|
+
|
65
|
+
def self.process_link_headers(response:)
|
66
|
+
warn "\n\n parsing #{response.headers}\n\n"
|
67
|
+
|
68
|
+
parser = LinkHeaders::Processor.new(default_anchor: @meta.all_uris.last)
|
69
|
+
parser.extract_and_parse(response: response)
|
70
|
+
factory = parser.factory # LinkHeaders::LinkFactory
|
71
|
+
FspHarvester::Utils.signpostingcheck(factory: factory, metadata: @meta)
|
72
|
+
factory.all_links
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -1,17 +1,17 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
module
|
3
|
+
module HarvesterTools
|
4
4
|
class Error < StandardError
|
5
5
|
end
|
6
6
|
|
7
7
|
class MetadataHarvester
|
8
|
-
def self.extract_metadata(links: [], metadata:
|
8
|
+
def self.extract_metadata(links: [], metadata: HarvesterTools::MetadataObject.new)
|
9
9
|
@meta = metadata
|
10
10
|
@meta.comments << 'INFO: now collecting both linked data and hash-style data using the harvested links'
|
11
11
|
|
12
12
|
describedby = links.select { |l| l if l.relation == 'describedby' }
|
13
13
|
|
14
|
-
hvst =
|
14
|
+
hvst = HarvesterTools::MetadataParser.new(metadata_object: @meta) # put here because the class variable for detecting duplicates should apply to all URIs
|
15
15
|
describedby.each do |link|
|
16
16
|
accepttype = ACCEPT_STAR_HEADER
|
17
17
|
accept = link.respond_to?('type') ? link.type : nil
|
@@ -30,16 +30,16 @@ module FspHarvester
|
|
30
30
|
case abbreviation
|
31
31
|
when 'html'
|
32
32
|
@meta.comments << 'INFO: Processing html'
|
33
|
-
hvst.process_html(body: response.body, uri: link)
|
33
|
+
hvst.process_html(body: response.body, uri: link, metadata: @meta)
|
34
34
|
when 'xml'
|
35
35
|
@meta.comments << 'INFO: Processing xml'
|
36
|
-
hvst.process_xml(body: response.body)
|
36
|
+
hvst.process_xml(body: response.body, metadata: @meta)
|
37
37
|
when 'json'
|
38
38
|
@meta.comments << 'INFO: Processing json'
|
39
|
-
hvst.process_json(body: response.body)
|
39
|
+
hvst.process_json(body: response.body, metadata: @meta)
|
40
40
|
when 'jsonld', 'rdfxml', 'turtle', 'ntriples', 'nquads'
|
41
41
|
@meta.comments << 'INFO: Processing linked data'
|
42
|
-
hvst.process_ld(body: response.body, content_type: content_type)
|
42
|
+
hvst.process_ld(body: response.body, content_type: content_type, metadata: @meta)
|
43
43
|
when 'specialist'
|
44
44
|
warn 'no specialized parsers so far'
|
45
45
|
end
|
@@ -54,7 +54,7 @@ module FspHarvester
|
|
54
54
|
@meta.comments << "INFO: link #{link.href} has no MIME type, defaulting to */*"
|
55
55
|
end
|
56
56
|
url = link.href
|
57
|
-
response =
|
57
|
+
response = HarvesterTools::WebUtils.fspfetch(url: url, method: :get, headers: header)
|
58
58
|
unless response
|
59
59
|
@meta.add_warning(['016', url, header])
|
60
60
|
@meta.comments << "WARN: Unable to resolve describedby link #{url} using HTTP Accept header #{header}.\n"
|
data/lib/metadata_object.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
module
|
1
|
+
module HarvesterTools
|
2
2
|
class MetadataObject
|
3
3
|
attr_accessor :hash, :graph, :comments, :links, :warnings, :guidtype, :full_response, :all_uris # a hash of metadata # a RDF.rb graph of metadata # an array of comments # the type of GUID that was detected # will be an array of Net::HTTP::Response
|
4
4
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
module
|
3
|
+
module HarvesterTools
|
4
4
|
class Error < StandardError
|
5
5
|
end
|
6
6
|
|
@@ -9,17 +9,25 @@ module FspHarvester
|
|
9
9
|
|
10
10
|
@@distillerknown = {}
|
11
11
|
|
12
|
-
def initialize(metadata_object:
|
12
|
+
def initialize(metadata_object: HarvesterTools::MetadataObject.new)
|
13
13
|
@meta = metadata_object
|
14
14
|
end
|
15
15
|
|
16
|
-
def process_html(body:, uri:)
|
17
|
-
|
18
|
-
tools.
|
19
|
-
tools.
|
16
|
+
def process_html(body:, uri:, metadata:)
|
17
|
+
@meta = metadata
|
18
|
+
tools = HarvesterTools::ExternalTools.new(metadata: @meta)
|
19
|
+
result = tools.process_with_distiller(body: body)
|
20
|
+
|
21
|
+
jsonld, microdata, microformat, opengraph, rdfa = tools.process_with_extruct(uri: uri)
|
22
|
+
parse_rdf(body: jsonld, content_type: 'application/ld+json')
|
23
|
+
@meta.merge_hash(microdata)
|
24
|
+
@meta.merge_hash(microformat)
|
25
|
+
@meta.merge_hash(opengraph)
|
26
|
+
parse_rdf(body: rdfa, content_type: 'application/ld+json')
|
20
27
|
end
|
21
28
|
|
22
|
-
def process_xml(body:)
|
29
|
+
def process_xml(body:, metadata:)
|
30
|
+
@meta = metadata
|
23
31
|
begin
|
24
32
|
hash = XmlSimple.xml_in(body)
|
25
33
|
rescue
|
@@ -30,7 +38,8 @@ module FspHarvester
|
|
30
38
|
@meta.hash.merge hash
|
31
39
|
end
|
32
40
|
|
33
|
-
def process_json(body:)
|
41
|
+
def process_json(body:, metadata:)
|
42
|
+
@meta = metadata
|
34
43
|
begin
|
35
44
|
hash = JSON.parse(body)
|
36
45
|
rescue
|
@@ -41,11 +50,17 @@ module FspHarvester
|
|
41
50
|
@meta.hash.merge hash
|
42
51
|
end
|
43
52
|
|
44
|
-
def process_ld(body:, content_type:)
|
45
|
-
|
53
|
+
def process_ld(body:, content_type:, metadata:)
|
54
|
+
@meta = metadata
|
55
|
+
parse_rdf(body: body, content_type: content_type, metadata: @meta)
|
56
|
+
end
|
57
|
+
|
58
|
+
def parse_rdf(body:, content_type:, metadata:)
|
59
|
+
self.class.parse_rdf(body: body, content_type: content_type, metadata: metadata)
|
46
60
|
end
|
47
61
|
|
48
|
-
def parse_rdf(body:, content_type:)
|
62
|
+
def self.parse_rdf(body:, content_type:, metadata:)
|
63
|
+
@meta = metadata
|
49
64
|
unless body
|
50
65
|
@meta.comments << "CRITICAL: The response message body component appears to have no content.\n"
|
51
66
|
@meta.add_warning(['018', '', ''])
|
@@ -65,7 +80,7 @@ module FspHarvester
|
|
65
80
|
return
|
66
81
|
end
|
67
82
|
|
68
|
-
graph =
|
83
|
+
graph = HarvesterTools::Cache.checkRDFCache(body: body)
|
69
84
|
if graph.size > 0
|
70
85
|
warn "\n\n\n unmarshalling graph from cache\n\ngraph size #{graph.size}\n\n"
|
71
86
|
@meta.merge_rdf(graph.to_a)
|
@@ -88,7 +103,7 @@ module FspHarvester
|
|
88
103
|
end
|
89
104
|
reader = rdfformat.reader.new(body) # have to re-read it here, but now its safe because we have already caught errors
|
90
105
|
warn 'WRITING TO CACHE'
|
91
|
-
|
106
|
+
HarvesterTools::Cache.writeRDFCache(reader: reader, body: body) # write to the special RDF graph cache
|
92
107
|
warn 'WRITING DONE'
|
93
108
|
reader = rdfformat.reader.new(body) # frustrating that we cannot rewind!
|
94
109
|
warn 'RE-READING DONE'
|
data/lib/signposting_tests.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
def check_for_citeas_conflicts(citeas: )
|
1
|
+
def check_for_citeas_conflicts(citeas:, metadata: )
|
2
|
+
@meta = metadata
|
2
3
|
@meta.comments << 'INFO: checking for conflicting cite-as links'
|
3
4
|
citeas_hrefs = Hash.new
|
4
5
|
citeas.each do |link|
|
@@ -6,7 +7,7 @@ def check_for_citeas_conflicts(citeas: )
|
|
6
7
|
@meta.comments << "INFO: Adding citeas #{link.href} to the testing queue."
|
7
8
|
citeas_hrefs[link.href] = link
|
8
9
|
end
|
9
|
-
|
10
|
+
#warn "finalhash #{citeas_hrefs}"
|
10
11
|
if citeas_hrefs.length > 1
|
11
12
|
@meta.comments << 'INFO: Found multiple non-identical cite-as links.'
|
12
13
|
@meta.add_warning(['007', '', ''])
|
@@ -16,7 +17,8 @@ def check_for_citeas_conflicts(citeas: )
|
|
16
17
|
end
|
17
18
|
|
18
19
|
|
19
|
-
def check_describedby_rules(describedby:)
|
20
|
+
def check_describedby_rules(describedby:, metadata:)
|
21
|
+
@meta = metadata
|
20
22
|
describedby.each do |l|
|
21
23
|
unless l.respond_to? 'type'
|
22
24
|
@meta.add_warning(['005', l.href, ''])
|
@@ -25,7 +27,7 @@ def check_describedby_rules(describedby:)
|
|
25
27
|
type = l.type if l.respond_to? 'type'
|
26
28
|
type ||= '*/*'
|
27
29
|
header = { accept: type }
|
28
|
-
response =
|
30
|
+
response = HarvesterTools::WebUtils.fspfetch(url: l.href, headers: header, method: :head)
|
29
31
|
if response
|
30
32
|
responsetype = response.headers[:content_type]
|
31
33
|
@meta.comments << "INFO: describedby link responds with content type #{responsetype}\n"
|
@@ -51,7 +53,8 @@ def check_describedby_rules(describedby:)
|
|
51
53
|
end
|
52
54
|
end
|
53
55
|
|
54
|
-
def check_item_rules(item:)
|
56
|
+
def check_item_rules(item:, metadata:)
|
57
|
+
@meta = metadata
|
55
58
|
item.each do |l| # l = LinkHeaders::Link
|
56
59
|
unless l.respond_to? 'type'
|
57
60
|
@meta.add_warning(['011', l.href, ''])
|
@@ -60,7 +63,7 @@ def check_item_rules(item:)
|
|
60
63
|
type = l.type if l.respond_to? 'type'
|
61
64
|
type ||= '*/*' # this becomes a frozen string
|
62
65
|
header = { accept: type }
|
63
|
-
response =
|
66
|
+
response = HarvesterTools::WebUtils.fspfetch(url: l.href, headers: header, method: :head)
|
64
67
|
|
65
68
|
if response
|
66
69
|
if response.headers[:content_type] and type != '*/*'
|
data/lib/swagger.rb
CHANGED
@@ -1,64 +1,39 @@
|
|
1
|
-
class Swagger
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
@protocol = params.fetch(:protocol, "https")
|
38
|
-
@basePath = params.fetch(:basePath)
|
39
|
-
@path = params.fetch(:path)
|
40
|
-
@response_description = params.fetch(:response_description)
|
41
|
-
@schemas = params.fetch(:schemas, [])
|
42
|
-
@comments = params.fetch(:comments, [])
|
43
|
-
@fairsharing_key_location = params.fetch(:fairsharing_key_location)
|
44
|
-
@score = params.fetch(:score, 0)
|
45
|
-
@testedGUID = params.fetch(:testedGUID, "")
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
end
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
def fairsharing_key
|
54
|
-
return @fairsharing_key_location
|
55
|
-
end
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
def getSwagger
|
60
|
-
|
61
|
-
message = <<"EOF_EOF"
|
1
|
+
class Swagger
|
2
|
+
attr_accessor :debug, :title, :tests_metric, :description, :applies_to_principle, :organization, :org_url,
|
3
|
+
:responsible_developer, :email, :developer_ORCiD, :protocol, :host, :basePath, :path,
|
4
|
+
:response_description, :schemas, :comments, :fairsharing_key_location, :score, :testedGUID
|
5
|
+
|
6
|
+
def initialize(params = {})
|
7
|
+
@debug = params.fetch(:debug, false)
|
8
|
+
|
9
|
+
@title = params.fetch(:title, 'unnamed')
|
10
|
+
@tests_metric = params.fetch(:tests_metric)
|
11
|
+
@description = params.fetch(:description, 'default_description')
|
12
|
+
@applies_to_principle = params.fetch(:applies_to_principle, 'some principle')
|
13
|
+
@version = params.fetch(:version, '0.1')
|
14
|
+
@organization = params.fetch(:organization, 'Some Organization')
|
15
|
+
@org_url = params.fetch(:org_url)
|
16
|
+
@responsible_develper = params.fetch(:responsible_developer, 'Some Person')
|
17
|
+
@email = params.fetch(:email)
|
18
|
+
@developer_ORCiD = params.fetch(:developer_ORCiD)
|
19
|
+
@host = params.fetch(:host)
|
20
|
+
@protocol = params.fetch(:protocol, 'https')
|
21
|
+
@basePath = params.fetch(:basePath)
|
22
|
+
@path = params.fetch(:path)
|
23
|
+
@response_description = params.fetch(:response_description)
|
24
|
+
@schemas = params.fetch(:schemas, [])
|
25
|
+
@comments = params.fetch(:comments, [])
|
26
|
+
@fairsharing_key_location = params.fetch(:fairsharing_key_location)
|
27
|
+
@score = params.fetch(:score, 0)
|
28
|
+
@testedGUID = params.fetch(:testedGUID, '')
|
29
|
+
end
|
30
|
+
|
31
|
+
def fairsharing_key
|
32
|
+
@fairsharing_key_location
|
33
|
+
end
|
34
|
+
|
35
|
+
def getSwagger
|
36
|
+
message = <<"EOF_EOF"
|
62
37
|
swagger: '2.0'
|
63
38
|
info:
|
64
39
|
version: '#{@version}'
|
@@ -89,7 +64,7 @@ class Swagger
|
|
89
64
|
$ref: '#/definitions/schemas'
|
90
65
|
consumes:
|
91
66
|
- application/json
|
92
|
-
produces
|
67
|
+
produces:#{' '}
|
93
68
|
- application/json
|
94
69
|
responses:
|
95
70
|
"200":
|
@@ -98,127 +73,112 @@ class Swagger
|
|
98
73
|
definitions:
|
99
74
|
schemas:
|
100
75
|
required:
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
76
|
+
EOF_EOF
|
77
|
+
|
78
|
+
schemas.keys.each do |key|
|
79
|
+
message += " - #{key}\n"
|
80
|
+
end
|
81
|
+
message += " properties:\n"
|
82
|
+
schemas.keys.each do |key|
|
83
|
+
message += " #{key}:\n"
|
84
|
+
message += " type: #{schemas[key][0]}\n"
|
85
|
+
message += " description: >-\n"
|
86
|
+
message += " #{schemas[key][1]}\n"
|
87
|
+
end
|
88
|
+
|
89
|
+
message
|
90
|
+
end
|
91
|
+
|
92
|
+
# A utility function that SHOULD NOT BE CALLED EXTERNALLY
|
93
|
+
#
|
94
|
+
# @param s - subject node
|
95
|
+
# @param p - predicate node
|
96
|
+
# @param o - object node
|
97
|
+
# @param repo - an RDF::Graph object
|
98
|
+
def triplify(s, p, o, repo)
|
99
|
+
s = s.strip if s.instance_of?(String)
|
100
|
+
p = p.strip if p.instance_of?(String)
|
101
|
+
o = o.strip if o.instance_of?(String)
|
102
|
+
|
103
|
+
unless s.respond_to?('uri')
|
104
|
+
|
105
|
+
if s.to_s =~ %r{^\w+:/?/?[^\s]+}
|
106
|
+
s = RDF::URI.new(s.to_s)
|
107
|
+
else
|
108
|
+
debug and warn "Subject #{s} must be a URI-compatible thingy"
|
109
|
+
abort "Subject #{s} must be a URI-compatible thingy"
|
107
110
|
end
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
111
|
+
end
|
112
|
+
|
113
|
+
unless p.respond_to?('uri')
|
114
|
+
|
115
|
+
if p.to_s =~ %r{^\w+:/?/?[^\s]+}
|
116
|
+
p = RDF::URI.new(p.to_s)
|
117
|
+
else
|
118
|
+
debug and warn "Predicate #{p} must be a URI-compatible thingy"
|
119
|
+
abort "Predicate #{p} must be a URI-compatible thingy"
|
114
120
|
end
|
115
|
-
|
116
|
-
return message
|
117
121
|
end
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
#
|
123
|
-
# @param s - subject node
|
124
|
-
# @param p - predicate node
|
125
|
-
# @param o - object node
|
126
|
-
# @param repo - an RDF::Graph object
|
127
|
-
def triplify(s, p, o, repo)
|
128
|
-
|
129
|
-
if s.class == String
|
130
|
-
s = s.strip
|
131
|
-
end
|
132
|
-
if p.class == String
|
133
|
-
p = p.strip
|
134
|
-
end
|
135
|
-
if o.class == String
|
136
|
-
o = o.strip
|
137
|
-
end
|
138
|
-
|
139
|
-
unless s.respond_to?('uri')
|
140
|
-
|
141
|
-
if s.to_s =~ /^\w+:\/?\/?[^\s]+/
|
142
|
-
s = RDF::URI.new(s.to_s)
|
143
|
-
else
|
144
|
-
self.debug and $stderr.puts "Subject #{s.to_s} must be a URI-compatible thingy"
|
145
|
-
abort "Subject #{s.to_s} must be a URI-compatible thingy"
|
146
|
-
end
|
147
|
-
end
|
148
|
-
|
149
|
-
unless p.respond_to?('uri')
|
150
|
-
|
151
|
-
if p.to_s =~ /^\w+:\/?\/?[^\s]+/
|
152
|
-
p = RDF::URI.new(p.to_s)
|
153
|
-
else
|
154
|
-
self.debug and $stderr.puts "Predicate #{p.to_s} must be a URI-compatible thingy"
|
155
|
-
abort "Predicate #{p.to_s} must be a URI-compatible thingy"
|
156
|
-
end
|
157
|
-
end
|
158
|
-
|
159
|
-
unless o.respond_to?('uri')
|
160
|
-
if o.to_s =~ /\A\w+:\/?\/?\w[^\s]+/
|
161
|
-
o = RDF::URI.new(o.to_s)
|
122
|
+
|
123
|
+
unless o.respond_to?('uri')
|
124
|
+
o = if o.to_s =~ %r{\A\w+:/?/?\w[^\s]+}
|
125
|
+
RDF::URI.new(o.to_s)
|
162
126
|
elsif o.to_s =~ /^\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d/
|
163
|
-
|
127
|
+
RDF::Literal.new(o.to_s, datatype: RDF::XSD.date)
|
164
128
|
elsif o.to_s =~ /^[+-]?\d+\.\d+/
|
165
|
-
|
129
|
+
RDF::Literal.new(o.to_s, datatype: RDF::XSD.float)
|
166
130
|
elsif o.to_s =~ /^[+-]?[0-9]+$/
|
167
|
-
|
131
|
+
RDF::Literal.new(o.to_s, datatype: RDF::XSD.int)
|
168
132
|
else
|
169
|
-
|
133
|
+
RDF::Literal.new(o.to_s, language: :en)
|
170
134
|
end
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
return g.dump(:jsonld)
|
220
|
-
end
|
221
|
-
|
135
|
+
end
|
136
|
+
|
137
|
+
debug and warn("\n\ninserting #{s} #{p} #{o}\n\n")
|
138
|
+
triple = RDF::Statement(s, p, o)
|
139
|
+
repo.insert(triple)
|
140
|
+
|
141
|
+
true
|
142
|
+
end
|
143
|
+
|
144
|
+
# A utility function that SHOULD NOT BE CALLED EXTERNALLY
|
145
|
+
#
|
146
|
+
# @param s - subject node
|
147
|
+
# @param p - predicate node
|
148
|
+
# @param o - object node
|
149
|
+
# @param repo - an RDF::Graph object
|
150
|
+
def self.triplify(s, p, o, repo)
|
151
|
+
triplify(s, p, o, repo)
|
152
|
+
end
|
153
|
+
|
154
|
+
def addComment(newcomment)
|
155
|
+
comments << newcomment.to_s
|
156
|
+
# return self.comments
|
157
|
+
end
|
158
|
+
|
159
|
+
def createEvaluationResponse
|
160
|
+
g = RDF::Graph.new
|
161
|
+
|
162
|
+
dt = Time.now.iso8601
|
163
|
+
uri = testedGUID
|
164
|
+
|
165
|
+
me = protocol + '://' + host + '/' + basePath + path
|
166
|
+
|
167
|
+
meURI = "#{me}##{uri}/result-#{dt}"
|
168
|
+
meURI = Addressable::URI.escape(meURI)
|
169
|
+
|
170
|
+
triplify(meURI, 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type',
|
171
|
+
'http://fairmetrics.org/resources/metric_evaluation_result', g)
|
172
|
+
triplify(meURI, 'http://semanticscience.org/resource/SIO_000300', score, g)
|
173
|
+
triplify(meURI, 'http://purl.obolibrary.org/obo/date', dt, g)
|
174
|
+
triplify(meURI, 'http://schema.org/softwareVersion', VERSION, g)
|
175
|
+
triplify(meURI, 'http://semanticscience.org/resource/SIO_000332', uri, g)
|
176
|
+
|
177
|
+
comments = 'no comments received. '
|
178
|
+
|
179
|
+
comments = self.comments.join("\n") if self.comments.size > 0
|
180
|
+
triplify(meURI, 'http://schema.org/comment', comments, g)
|
181
|
+
|
182
|
+
g.dump(:jsonld)
|
222
183
|
end
|
223
|
-
|
224
|
-
|
184
|
+
end
|
data/lib/web_utils.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
module
|
1
|
+
module HarvesterTools
|
2
2
|
|
3
3
|
class WebUtils
|
4
|
-
def self.fspfetch(url:, headers: ACCEPT_ALL_HEADER, method: :get, meta:
|
4
|
+
def self.fspfetch(url:, headers: ACCEPT_ALL_HEADER, method: :get, meta: HarvesterTools::MetadataObject.new)
|
5
5
|
warn 'In fetch routine now. '
|
6
6
|
|
7
7
|
begin
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fsp_harvester
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mark Wilkinson
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-08-
|
11
|
+
date: 2022-08-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: json
|
@@ -182,15 +182,18 @@ files:
|
|
182
182
|
- bin/setup
|
183
183
|
- example_test.rb
|
184
184
|
- launch.json
|
185
|
+
- lib/config.conf
|
185
186
|
- lib/config.conf_docker
|
186
187
|
- lib/config.conf_local
|
187
188
|
- lib/constants.rb
|
189
|
+
- lib/external_tools.rb
|
188
190
|
- lib/fsp_harvester.rb
|
189
191
|
- lib/fsp_harvester/version.rb
|
190
|
-
- lib/
|
191
|
-
- lib/
|
192
|
-
- lib/
|
192
|
+
- lib/harvester.rb
|
193
|
+
- lib/harvester_utils.rb
|
194
|
+
- lib/metadata_harvester.rb
|
193
195
|
- lib/metadata_object.rb
|
196
|
+
- lib/metadata_parser.rb
|
194
197
|
- lib/signposting_tests.rb
|
195
198
|
- lib/swagger.rb
|
196
199
|
- lib/warnings.json
|