fsp_harvester 0.1.10 → 0.1.13
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rspec_status +53 -53
- data/Gemfile.lock +38 -30
- data/lib/config.conf +8 -0
- data/lib/constants.rb +8 -5
- data/lib/{fsp_metadata_external_tools.rb → external_tools.rb} +17 -15
- data/lib/fsp_harvester/version.rb +1 -1
- data/lib/fsp_harvester.rb +8 -106
- data/lib/harvester.rb +28 -0
- data/lib/harvester_utils.rb +78 -0
- data/lib/{fsp_metadata_harvester.rb → metadata_harvester.rb} +51 -33
- data/lib/metadata_object.rb +4 -3
- data/lib/{fsp_metadata_parser.rb → metadata_parser.rb} +28 -13
- data/lib/signposting_tests.rb +9 -6
- data/lib/warnings.json +33 -24
- data/lib/web_utils.rb +3 -3
- metadata +10 -8
- data/lib/swagger.rb +0 -224
@@ -1,17 +1,17 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
module
|
3
|
+
module HarvesterTools
|
4
4
|
class Error < StandardError
|
5
5
|
end
|
6
6
|
|
7
7
|
class MetadataHarvester
|
8
|
-
def self.
|
8
|
+
def self.extract_metadata_from_links(links: [], metadata: HarvesterTools::MetadataObject.new)
|
9
9
|
@meta = metadata
|
10
10
|
@meta.comments << 'INFO: now collecting both linked data and hash-style data using the harvested links'
|
11
11
|
|
12
12
|
describedby = links.select { |l| l if l.relation == 'describedby' }
|
13
13
|
|
14
|
-
hvst =
|
14
|
+
hvst = HarvesterTools::MetadataParser.new(metadata_object: @meta) # put here because the class variable for detecting duplicates should apply to all URIs
|
15
15
|
describedby.each do |link|
|
16
16
|
accepttype = ACCEPT_STAR_HEADER
|
17
17
|
accept = link.respond_to?('type') ? link.type : nil
|
@@ -26,23 +26,42 @@ module FspHarvester
|
|
26
26
|
next
|
27
27
|
end
|
28
28
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
29
|
+
process_according_to_type(body: response.body, uri: link, metadata: @meta, abbreviation: abbreviation,
|
30
|
+
content_type: content_type, harvester: hvst)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.extract_metadata_from_body(response:, metadata: HarvesterTools::MetadataObject.new)
|
35
|
+
@meta = metadata
|
36
|
+
@meta.comments << 'INFO: now collecting both linked data and hash-style data using the harvested links'
|
37
|
+
|
38
|
+
abbreviation, content_type = attempt_to_detect_type(body: response.body, headers: response.headers)
|
39
|
+
unless abbreviation
|
40
|
+
@meta.add_warning(['017', response.request.url, ''])
|
41
|
+
@meta.comments << "WARN: metadata format returned from #{response.request.url} is not recognized. Moving on.\n"
|
42
|
+
return
|
43
|
+
end
|
44
|
+
process_according_to_type(body: response.body, uri: response.request.url, metadata: @meta,
|
45
|
+
abbreviation: abbreviation, content_type: content_type)
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.process_according_to_type(body:, uri:, abbreviation:, content_type:, metadata:,
|
49
|
+
harvester: HarvesterTools::MetadataParser.new(metadata_object: @meta))
|
50
|
+
case abbreviation
|
51
|
+
when 'html'
|
52
|
+
@meta.comments << 'INFO: Processing html'
|
53
|
+
harvester.process_html(body: body, uri: uri, metadata: @meta)
|
54
|
+
when 'xml'
|
55
|
+
@meta.comments << 'INFO: Processing xml'
|
56
|
+
harvester.process_xml(body: body, metadata: @meta)
|
57
|
+
when 'json'
|
58
|
+
@meta.comments << 'INFO: Processing json'
|
59
|
+
harvester.process_json(body: body, metadata: @meta)
|
60
|
+
when 'jsonld', 'rdfxml', 'turtle', 'ntriples', 'nquads'
|
61
|
+
@meta.comments << 'INFO: Processing linked data'
|
62
|
+
harvester.process_ld(body: body, content_type: content_type, metadata: @meta)
|
63
|
+
when 'specialist'
|
64
|
+
warn 'no specialized parsers so far'
|
46
65
|
end
|
47
66
|
end
|
48
67
|
|
@@ -54,7 +73,7 @@ module FspHarvester
|
|
54
73
|
@meta.comments << "INFO: link #{link.href} has no MIME type, defaulting to */*"
|
55
74
|
end
|
56
75
|
url = link.href
|
57
|
-
response =
|
76
|
+
response = HarvesterTools::WebUtils.fspfetch(url: url, method: :get, headers: header)
|
58
77
|
unless response
|
59
78
|
@meta.add_warning(['016', url, header])
|
60
79
|
@meta.comments << "WARN: Unable to resolve describedby link #{url} using HTTP Accept header #{header}.\n"
|
@@ -111,24 +130,23 @@ module FspHarvester
|
|
111
130
|
[abbreviation, contenttype]
|
112
131
|
end
|
113
132
|
|
114
|
-
def self.ntriples_hack(body:)
|
133
|
+
def self.ntriples_hack(body:) # distriller cannot recognize single-line ntriples unless they end with a period, which is not required by the spec... so hack it!
|
115
134
|
detected_type = nil
|
116
135
|
body.split.each do |line|
|
117
136
|
line.strip!
|
118
137
|
next if line.empty?
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
@meta.comments << "INFO: ntriples hack found: #{detected_type.to_s}\n"
|
126
|
-
if detected_type != RDF::NTriples::Format # only return the hacky case
|
127
|
-
return nil
|
138
|
+
|
139
|
+
next unless line =~ /\s*<[^>]+>\s*<[^>]+>\s\S+/
|
140
|
+
|
141
|
+
@meta.comments << "INFO: running ntriples hack on #{line + ' .'}\n"
|
142
|
+
detected_type = RDF::Format.for({ sample: "#{line} ." }) # adding a period allows detection of ntriples by distiller
|
143
|
+
break
|
128
144
|
end
|
129
|
-
|
130
|
-
|
145
|
+
@meta.comments << "INFO: ntriples hack found: #{detected_type}\n"
|
146
|
+
return nil if detected_type != RDF::NTriples::Format # only return the hacky case
|
131
147
|
|
148
|
+
detected_type
|
149
|
+
end
|
132
150
|
|
133
151
|
def self.check_json(body:)
|
134
152
|
abbreviation = nil
|
data/lib/metadata_object.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
module
|
1
|
+
module HarvesterTools
|
2
2
|
class MetadataObject
|
3
3
|
attr_accessor :hash, :graph, :comments, :links, :warnings, :guidtype, :full_response, :all_uris # a hash of metadata # a RDF.rb graph of metadata # an array of comments # the type of GUID that was detected # will be an array of Net::HTTP::Response
|
4
4
|
|
@@ -10,8 +10,9 @@ module FspHarvester
|
|
10
10
|
@full_response = []
|
11
11
|
@links = []
|
12
12
|
@all_uris = []
|
13
|
-
|
14
|
-
|
13
|
+
w = RestClient.get("https://raw.githubusercontent.com/markwilkinson/FAIR-Signposting-Harvester/master/lib/warnings.json")
|
14
|
+
#@warn = File.read("./lib/warnings.json")
|
15
|
+
@warn = JSON.parse(w)
|
15
16
|
end
|
16
17
|
|
17
18
|
def merge_hash(hash)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
module
|
3
|
+
module HarvesterTools
|
4
4
|
class Error < StandardError
|
5
5
|
end
|
6
6
|
|
@@ -9,17 +9,25 @@ module FspHarvester
|
|
9
9
|
|
10
10
|
@@distillerknown = {}
|
11
11
|
|
12
|
-
def initialize(metadata_object:
|
12
|
+
def initialize(metadata_object: HarvesterTools::MetadataObject.new)
|
13
13
|
@meta = metadata_object
|
14
14
|
end
|
15
15
|
|
16
|
-
def process_html(body:, uri:)
|
17
|
-
|
18
|
-
tools.
|
19
|
-
tools.
|
16
|
+
def process_html(body:, uri:, metadata:)
|
17
|
+
@meta = metadata
|
18
|
+
tools = HarvesterTools::ExternalTools.new(metadata: @meta)
|
19
|
+
result = tools.process_with_distiller(body: body)
|
20
|
+
|
21
|
+
jsonld, microdata, microformat, opengraph, rdfa = tools.process_with_extruct(uri: uri)
|
22
|
+
parse_rdf(body: jsonld, content_type: 'application/ld+json')
|
23
|
+
@meta.merge_hash(microdata)
|
24
|
+
@meta.merge_hash(microformat)
|
25
|
+
@meta.merge_hash(opengraph)
|
26
|
+
parse_rdf(body: rdfa, content_type: 'application/ld+json')
|
20
27
|
end
|
21
28
|
|
22
|
-
def process_xml(body:)
|
29
|
+
def process_xml(body:, metadata:)
|
30
|
+
@meta = metadata
|
23
31
|
begin
|
24
32
|
hash = XmlSimple.xml_in(body)
|
25
33
|
rescue
|
@@ -30,7 +38,8 @@ module FspHarvester
|
|
30
38
|
@meta.hash.merge hash
|
31
39
|
end
|
32
40
|
|
33
|
-
def process_json(body:)
|
41
|
+
def process_json(body:, metadata:)
|
42
|
+
@meta = metadata
|
34
43
|
begin
|
35
44
|
hash = JSON.parse(body)
|
36
45
|
rescue
|
@@ -41,11 +50,17 @@ module FspHarvester
|
|
41
50
|
@meta.hash.merge hash
|
42
51
|
end
|
43
52
|
|
44
|
-
def process_ld(body:, content_type:)
|
45
|
-
|
53
|
+
def process_ld(body:, content_type:, metadata:)
|
54
|
+
@meta = metadata
|
55
|
+
parse_rdf(body: body, content_type: content_type, metadata: @meta)
|
56
|
+
end
|
57
|
+
|
58
|
+
def parse_rdf(body:, content_type:, metadata:)
|
59
|
+
self.class.parse_rdf(body: body, content_type: content_type, metadata: metadata)
|
46
60
|
end
|
47
61
|
|
48
|
-
def parse_rdf(body:, content_type:)
|
62
|
+
def self.parse_rdf(body:, content_type:, metadata:)
|
63
|
+
@meta = metadata
|
49
64
|
unless body
|
50
65
|
@meta.comments << "CRITICAL: The response message body component appears to have no content.\n"
|
51
66
|
@meta.add_warning(['018', '', ''])
|
@@ -65,7 +80,7 @@ module FspHarvester
|
|
65
80
|
return
|
66
81
|
end
|
67
82
|
|
68
|
-
graph =
|
83
|
+
graph = HarvesterTools::Cache.checkRDFCache(body: body)
|
69
84
|
if graph.size > 0
|
70
85
|
warn "\n\n\n unmarshalling graph from cache\n\ngraph size #{graph.size}\n\n"
|
71
86
|
@meta.merge_rdf(graph.to_a)
|
@@ -88,7 +103,7 @@ module FspHarvester
|
|
88
103
|
end
|
89
104
|
reader = rdfformat.reader.new(body) # have to re-read it here, but now its safe because we have already caught errors
|
90
105
|
warn 'WRITING TO CACHE'
|
91
|
-
|
106
|
+
HarvesterTools::Cache.writeRDFCache(reader: reader, body: body) # write to the special RDF graph cache
|
92
107
|
warn 'WRITING DONE'
|
93
108
|
reader = rdfformat.reader.new(body) # frustrating that we cannot rewind!
|
94
109
|
warn 'RE-READING DONE'
|
data/lib/signposting_tests.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
def check_for_citeas_conflicts(citeas: )
|
1
|
+
def check_for_citeas_conflicts(citeas:, metadata: )
|
2
|
+
@meta = metadata
|
2
3
|
@meta.comments << 'INFO: checking for conflicting cite-as links'
|
3
4
|
citeas_hrefs = Hash.new
|
4
5
|
citeas.each do |link|
|
@@ -6,7 +7,7 @@ def check_for_citeas_conflicts(citeas: )
|
|
6
7
|
@meta.comments << "INFO: Adding citeas #{link.href} to the testing queue."
|
7
8
|
citeas_hrefs[link.href] = link
|
8
9
|
end
|
9
|
-
|
10
|
+
#warn "finalhash #{citeas_hrefs}"
|
10
11
|
if citeas_hrefs.length > 1
|
11
12
|
@meta.comments << 'INFO: Found multiple non-identical cite-as links.'
|
12
13
|
@meta.add_warning(['007', '', ''])
|
@@ -16,7 +17,8 @@ def check_for_citeas_conflicts(citeas: )
|
|
16
17
|
end
|
17
18
|
|
18
19
|
|
19
|
-
def check_describedby_rules(describedby:)
|
20
|
+
def check_describedby_rules(describedby:, metadata:)
|
21
|
+
@meta = metadata
|
20
22
|
describedby.each do |l|
|
21
23
|
unless l.respond_to? 'type'
|
22
24
|
@meta.add_warning(['005', l.href, ''])
|
@@ -25,7 +27,7 @@ def check_describedby_rules(describedby:)
|
|
25
27
|
type = l.type if l.respond_to? 'type'
|
26
28
|
type ||= '*/*'
|
27
29
|
header = { accept: type }
|
28
|
-
response =
|
30
|
+
response = HarvesterTools::WebUtils.fspfetch(url: l.href, headers: header, method: :head)
|
29
31
|
if response
|
30
32
|
responsetype = response.headers[:content_type]
|
31
33
|
@meta.comments << "INFO: describedby link responds with content type #{responsetype}\n"
|
@@ -51,7 +53,8 @@ def check_describedby_rules(describedby:)
|
|
51
53
|
end
|
52
54
|
end
|
53
55
|
|
54
|
-
def check_item_rules(item:)
|
56
|
+
def check_item_rules(item:, metadata:)
|
57
|
+
@meta = metadata
|
55
58
|
item.each do |l| # l = LinkHeaders::Link
|
56
59
|
unless l.respond_to? 'type'
|
57
60
|
@meta.add_warning(['011', l.href, ''])
|
@@ -60,7 +63,7 @@ def check_item_rules(item:)
|
|
60
63
|
type = l.type if l.respond_to? 'type'
|
61
64
|
type ||= '*/*' # this becomes a frozen string
|
62
65
|
header = { accept: type }
|
63
|
-
response =
|
66
|
+
response = HarvesterTools::WebUtils.fspfetch(url: l.href, headers: header, method: :head)
|
64
67
|
|
65
68
|
if response
|
66
69
|
if response.headers[:content_type] and type != '*/*'
|
data/lib/warnings.json
CHANGED
@@ -1,110 +1,119 @@
|
|
1
1
|
{
|
2
2
|
"001": {
|
3
3
|
"message": "Unable to resolve guid using default (*/*) Accept headers",
|
4
|
-
"linkout": "",
|
4
|
+
"linkout": [{"FAIR Principle": "https://www.go-fair.org/fair-principles/metadata-retrievable-identifier-standardised-communication-protocol/"},
|
5
|
+
{"FAIRsharing": "https://doi.org/10.25504/FAIRsharing.cd2f9e"}
|
6
|
+
],
|
5
7
|
"severity": "WARN"
|
6
8
|
},
|
7
9
|
"002": {
|
8
10
|
"message": "HTTP Response (203) is non-authoritative",
|
9
|
-
"linkout": "",
|
11
|
+
"linkout": [{"Documentation": "https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/203"}],
|
10
12
|
"severity": "WARN"
|
11
13
|
},
|
12
14
|
"003": {
|
13
15
|
"message": "HTTP Response indicates failure (500-range)",
|
14
|
-
"linkout": "",
|
16
|
+
"linkout": [{"Documentation": "https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/500"}],
|
15
17
|
"severity": "WARN"
|
16
18
|
},
|
17
19
|
"004": {
|
18
20
|
"message": "The resource does not follow the FAIR Signposting standard, which requires exactly one cite-as header, and at least one describedby header",
|
19
|
-
"linkout": "",
|
21
|
+
"linkout": [{"Documentation": "http://www.signposting.org/FAIR/#level1"}],
|
20
22
|
"severity": "WARN"
|
21
23
|
},
|
22
24
|
"005": {
|
23
25
|
"message": "The resource does not follow the FAIR Signposting standard, which requires any describedby links to also have a 'type' attribute indicating the Accept headers that should be sent with the request",
|
24
|
-
"linkout": "",
|
26
|
+
"linkout": [{"Documentation": "http://www.signposting.org/FAIR/#level1"}],
|
25
27
|
"severity": "WARN"
|
26
28
|
},
|
27
29
|
"006": {
|
28
30
|
"message": "GUID type not recognized",
|
29
|
-
"linkout": "",
|
31
|
+
"linkout": [{"FAIRsharing": "https://fairsharing.org/search?fairsharingRegistry=Standard&recordType=identifier_schema&page=1"}],
|
30
32
|
"severity": "WARN"
|
31
33
|
},
|
32
34
|
"007": {
|
33
35
|
"message": "Conflicting cite-as links",
|
34
|
-
"linkout": "",
|
36
|
+
"linkout": [{"Documentation": "http://www.signposting.org/FAIR/#level1"}],
|
35
37
|
"severity": "WARN"
|
36
38
|
},
|
37
39
|
"008": {
|
38
40
|
"message": "describedby link does not resolve",
|
39
|
-
"linkout": "",
|
41
|
+
"linkout": [{"Documentation": "http://www.signposting.org/FAIR/#level1"}],
|
40
42
|
"severity": "WARN"
|
41
43
|
},
|
42
44
|
"009": {
|
43
45
|
"message": "Content-type of described-by link does not match the type attribute in the link header itself",
|
44
|
-
"linkout": "",
|
46
|
+
"linkout": [{"Documentation": "http://www.signposting.org/FAIR/#level1"},
|
47
|
+
{"Documentation": "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type"}],
|
45
48
|
"severity": "WARN"
|
46
49
|
},
|
47
50
|
"010": {
|
48
51
|
"message": "Content-type of response from described-by link is undefined or cannot be compared to the link type",
|
49
|
-
"linkout": "",
|
52
|
+
"linkout": [{"Documentation": "http://www.signposting.org/FAIR/#level1"}],
|
50
53
|
"severity": "WARN"
|
51
54
|
},
|
52
55
|
"011": {
|
53
56
|
"message": "The resource does not follow the FAIR Signposting standard, which encourages any item links to have a type attribute",
|
54
|
-
"linkout": "",
|
57
|
+
"linkout": [{"Documentation": "http://www.signposting.org/FAIR/#level1"}],
|
55
58
|
"severity": "WARN"
|
56
59
|
},
|
57
60
|
"012": {
|
58
61
|
"message": "Content-type of response from resolving an item doesn't match the item type attribute in the link header",
|
59
|
-
"linkout": "",
|
62
|
+
"linkout": [{"Documentation": "http://www.signposting.org/FAIR/#level1"},
|
63
|
+
{"Documentation": "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type"}],
|
60
64
|
"severity": "WARN"
|
61
65
|
},
|
62
66
|
"013": {
|
63
67
|
"message": "Content-type of response from resolving an item is undefined or cannot be compared to the link type",
|
64
|
-
"linkout": "",
|
68
|
+
"linkout": [{"Documentation": "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type"}],
|
65
69
|
"severity": "WARN"
|
66
70
|
},
|
67
71
|
"014": {
|
68
72
|
"message": "Item link does not resolve",
|
69
|
-
"linkout": "",
|
73
|
+
"linkout": [{"Documentation": "http://www.signposting.org/FAIR/#level1"}],
|
70
74
|
"severity": "WARN"
|
71
75
|
},
|
72
76
|
"015": {
|
73
77
|
"message": "Link headers do not include a link of type 'type', as required by the FAIR Signposting specification",
|
74
|
-
"linkout": "",
|
78
|
+
"linkout": [{"Documentation": "http://www.signposting.org/FAIR/#level1"}],
|
75
79
|
"severity": "WARN"
|
76
80
|
},
|
77
81
|
"016": {
|
78
82
|
"message": "Unable to resolve describedby link using Accept headers with the MIME type indicated in the link",
|
79
|
-
"linkout": "",
|
83
|
+
"linkout": [{"Documentation": "https://developer.mozilla.org/en-US/docs/Web/HTTP/Content_negotiation"}],
|
80
84
|
"severity": "WARN"
|
81
85
|
},
|
82
86
|
"017": {
|
83
87
|
"message": "Metadata format not recognized.",
|
84
|
-
"linkout": "",
|
88
|
+
"linkout": [{"FAIRsharing": "https://fairsharing.org/search?subjects=Computer%2520Science,subject%2520agnostic&page=1&recordType=model_and_format"}],
|
85
89
|
"severity": "WARN"
|
86
90
|
},
|
87
91
|
"018": {
|
88
92
|
"message": "RDF parsing error - likely malformed RDF document.",
|
89
|
-
"linkout": "",
|
93
|
+
"linkout": [{"FAIRsharing": "https://fairsharing.org/FAIRsharing.p77ph9"},
|
94
|
+
{"Documentation": "http://www.w3.org/TR/2014/REC-rdf11-concepts-20140225/"},
|
95
|
+
{"Validator": "http://rdf.greggkellogg.net/distiller"}],
|
90
96
|
"severity": "WARN"
|
91
97
|
},
|
92
98
|
"019": {
|
93
99
|
"message": "HTML parsing error - unable to extract linked data from HTML.",
|
94
|
-
"linkout": "",
|
100
|
+
"linkout": [{"FAIRsharing": "https://fairsharing.org/FAIRsharing.YugnuL"},
|
101
|
+
{"Documentation": "https://www.w3.org/TR/html53/"},
|
102
|
+
{"validator": "https://validator.w3.org/"}],
|
95
103
|
"severity": "WARN"
|
96
104
|
},
|
97
105
|
"020": {
|
98
106
|
"message": "XML parsing error - unable to process XML document.",
|
99
|
-
"linkout": "",
|
107
|
+
"linkout": [{"FAIRsharing": "https://fairsharing.org/FAIRsharing.b5cc91"},
|
108
|
+
{"Documentation": "https://www.w3.org/TR/xml/"},
|
109
|
+
{"Validator": "https://www.xmlvalidation.com/"}],
|
100
110
|
"severity": "WARN"
|
101
111
|
},
|
102
112
|
"021": {
|
103
113
|
"message": "JSON parsing error - unable to process JSON document.",
|
104
|
-
"linkout": "",
|
114
|
+
"linkout": [{"FAIRsharing": "https://fairsharing.org/FAIRsharing.5bbab9"},
|
115
|
+
{"Documentation": "http://dx.doi.org/10.17487/RFC8259"},
|
116
|
+
{"Validator": "https://jsononline.net/json-validator"}],
|
105
117
|
"severity": "WARN"
|
106
118
|
}
|
107
|
-
|
108
|
-
|
109
|
-
|
110
119
|
}
|
data/lib/web_utils.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
module
|
1
|
+
module HarvesterTools
|
2
2
|
|
3
3
|
class WebUtils
|
4
|
-
def self.fspfetch(url:, headers: ACCEPT_ALL_HEADER, method: :get, meta:
|
4
|
+
def self.fspfetch(url:, headers: ACCEPT_ALL_HEADER, method: :get, meta: HarvesterTools::MetadataObject.new)
|
5
5
|
warn 'In fetch routine now. '
|
6
6
|
|
7
7
|
begin
|
@@ -23,7 +23,7 @@ module FspHarvester
|
|
23
23
|
end
|
24
24
|
response
|
25
25
|
rescue RestClient::ExceptionWithResponse => e
|
26
|
-
warn "EXCEPTION WITH RESPONSE! #{e.response}\
|
26
|
+
warn "EXCEPTION WITH RESPONSE! #{e.response.code} with response #{e.response}\nfailed response headers: #{e.response.headers}"
|
27
27
|
meta.warnings << ["003", url, headers]
|
28
28
|
meta.comments << "WARN: HTTP error #{e} encountered when trying to resolve #{url}\n"
|
29
29
|
if (e.response.code == 500 or e.response.code == 404)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fsp_harvester
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mark Wilkinson
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-08-
|
11
|
+
date: 2022-08-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: json
|
@@ -44,14 +44,14 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 0.1.
|
47
|
+
version: 0.1.17
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: 0.1.
|
54
|
+
version: 0.1.17
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: metainspector
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -182,17 +182,19 @@ files:
|
|
182
182
|
- bin/setup
|
183
183
|
- example_test.rb
|
184
184
|
- launch.json
|
185
|
+
- lib/config.conf
|
185
186
|
- lib/config.conf_docker
|
186
187
|
- lib/config.conf_local
|
187
188
|
- lib/constants.rb
|
189
|
+
- lib/external_tools.rb
|
188
190
|
- lib/fsp_harvester.rb
|
189
191
|
- lib/fsp_harvester/version.rb
|
190
|
-
- lib/
|
191
|
-
- lib/
|
192
|
-
- lib/
|
192
|
+
- lib/harvester.rb
|
193
|
+
- lib/harvester_utils.rb
|
194
|
+
- lib/metadata_harvester.rb
|
193
195
|
- lib/metadata_object.rb
|
196
|
+
- lib/metadata_parser.rb
|
194
197
|
- lib/signposting_tests.rb
|
195
|
-
- lib/swagger.rb
|
196
198
|
- lib/warnings.json
|
197
199
|
- lib/web_utils.rb
|
198
200
|
homepage: https://github.com/markwilkinson/FAIR-Signposting-Harvester
|