fsp_harvester 0.1.10 → 0.1.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec_status +53 -53
- data/Gemfile.lock +38 -30
- data/lib/config.conf +8 -0
- data/lib/constants.rb +8 -5
- data/lib/{fsp_metadata_external_tools.rb → external_tools.rb} +17 -15
- data/lib/fsp_harvester/version.rb +1 -1
- data/lib/fsp_harvester.rb +8 -106
- data/lib/harvester.rb +28 -0
- data/lib/harvester_utils.rb +78 -0
- data/lib/{fsp_metadata_harvester.rb → metadata_harvester.rb} +51 -33
- data/lib/metadata_object.rb +4 -3
- data/lib/{fsp_metadata_parser.rb → metadata_parser.rb} +28 -13
- data/lib/signposting_tests.rb +9 -6
- data/lib/warnings.json +33 -24
- data/lib/web_utils.rb +3 -3
- metadata +10 -8
- data/lib/swagger.rb +0 -224
@@ -1,17 +1,17 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
module
|
3
|
+
module HarvesterTools
|
4
4
|
class Error < StandardError
|
5
5
|
end
|
6
6
|
|
7
7
|
class MetadataHarvester
|
8
|
-
def self.
|
8
|
+
def self.extract_metadata_from_links(links: [], metadata: HarvesterTools::MetadataObject.new)
|
9
9
|
@meta = metadata
|
10
10
|
@meta.comments << 'INFO: now collecting both linked data and hash-style data using the harvested links'
|
11
11
|
|
12
12
|
describedby = links.select { |l| l if l.relation == 'describedby' }
|
13
13
|
|
14
|
-
hvst =
|
14
|
+
hvst = HarvesterTools::MetadataParser.new(metadata_object: @meta) # put here because the class variable for detecting duplicates should apply to all URIs
|
15
15
|
describedby.each do |link|
|
16
16
|
accepttype = ACCEPT_STAR_HEADER
|
17
17
|
accept = link.respond_to?('type') ? link.type : nil
|
@@ -26,23 +26,42 @@ module FspHarvester
|
|
26
26
|
next
|
27
27
|
end
|
28
28
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
29
|
+
process_according_to_type(body: response.body, uri: link, metadata: @meta, abbreviation: abbreviation,
|
30
|
+
content_type: content_type, harvester: hvst)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.extract_metadata_from_body(response:, metadata: HarvesterTools::MetadataObject.new)
|
35
|
+
@meta = metadata
|
36
|
+
@meta.comments << 'INFO: now collecting both linked data and hash-style data using the harvested links'
|
37
|
+
|
38
|
+
abbreviation, content_type = attempt_to_detect_type(body: response.body, headers: response.headers)
|
39
|
+
unless abbreviation
|
40
|
+
@meta.add_warning(['017', response.request.url, ''])
|
41
|
+
@meta.comments << "WARN: metadata format returned from #{response.request.url} is not recognized. Moving on.\n"
|
42
|
+
return
|
43
|
+
end
|
44
|
+
process_according_to_type(body: response.body, uri: response.request.url, metadata: @meta,
|
45
|
+
abbreviation: abbreviation, content_type: content_type)
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.process_according_to_type(body:, uri:, abbreviation:, content_type:, metadata:,
|
49
|
+
harvester: HarvesterTools::MetadataParser.new(metadata_object: @meta))
|
50
|
+
case abbreviation
|
51
|
+
when 'html'
|
52
|
+
@meta.comments << 'INFO: Processing html'
|
53
|
+
harvester.process_html(body: body, uri: uri, metadata: @meta)
|
54
|
+
when 'xml'
|
55
|
+
@meta.comments << 'INFO: Processing xml'
|
56
|
+
harvester.process_xml(body: body, metadata: @meta)
|
57
|
+
when 'json'
|
58
|
+
@meta.comments << 'INFO: Processing json'
|
59
|
+
harvester.process_json(body: body, metadata: @meta)
|
60
|
+
when 'jsonld', 'rdfxml', 'turtle', 'ntriples', 'nquads'
|
61
|
+
@meta.comments << 'INFO: Processing linked data'
|
62
|
+
harvester.process_ld(body: body, content_type: content_type, metadata: @meta)
|
63
|
+
when 'specialist'
|
64
|
+
warn 'no specialized parsers so far'
|
46
65
|
end
|
47
66
|
end
|
48
67
|
|
@@ -54,7 +73,7 @@ module FspHarvester
|
|
54
73
|
@meta.comments << "INFO: link #{link.href} has no MIME type, defaulting to */*"
|
55
74
|
end
|
56
75
|
url = link.href
|
57
|
-
response =
|
76
|
+
response = HarvesterTools::WebUtils.fspfetch(url: url, method: :get, headers: header)
|
58
77
|
unless response
|
59
78
|
@meta.add_warning(['016', url, header])
|
60
79
|
@meta.comments << "WARN: Unable to resolve describedby link #{url} using HTTP Accept header #{header}.\n"
|
@@ -111,24 +130,23 @@ module FspHarvester
|
|
111
130
|
[abbreviation, contenttype]
|
112
131
|
end
|
113
132
|
|
114
|
-
def self.ntriples_hack(body:)
|
133
|
+
def self.ntriples_hack(body:) # distriller cannot recognize single-line ntriples unless they end with a period, which is not required by the spec... so hack it!
|
115
134
|
detected_type = nil
|
116
135
|
body.split.each do |line|
|
117
136
|
line.strip!
|
118
137
|
next if line.empty?
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
@meta.comments << "INFO: ntriples hack found: #{detected_type.to_s}\n"
|
126
|
-
if detected_type != RDF::NTriples::Format # only return the hacky case
|
127
|
-
return nil
|
138
|
+
|
139
|
+
next unless line =~ /\s*<[^>]+>\s*<[^>]+>\s\S+/
|
140
|
+
|
141
|
+
@meta.comments << "INFO: running ntriples hack on #{line + ' .'}\n"
|
142
|
+
detected_type = RDF::Format.for({ sample: "#{line} ." }) # adding a period allows detection of ntriples by distiller
|
143
|
+
break
|
128
144
|
end
|
129
|
-
|
130
|
-
|
145
|
+
@meta.comments << "INFO: ntriples hack found: #{detected_type}\n"
|
146
|
+
return nil if detected_type != RDF::NTriples::Format # only return the hacky case
|
131
147
|
|
148
|
+
detected_type
|
149
|
+
end
|
132
150
|
|
133
151
|
def self.check_json(body:)
|
134
152
|
abbreviation = nil
|
data/lib/metadata_object.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
module
|
1
|
+
module HarvesterTools
|
2
2
|
class MetadataObject
|
3
3
|
attr_accessor :hash, :graph, :comments, :links, :warnings, :guidtype, :full_response, :all_uris # a hash of metadata # a RDF.rb graph of metadata # an array of comments # the type of GUID that was detected # will be an array of Net::HTTP::Response
|
4
4
|
|
@@ -10,8 +10,9 @@ module FspHarvester
|
|
10
10
|
@full_response = []
|
11
11
|
@links = []
|
12
12
|
@all_uris = []
|
13
|
-
|
14
|
-
|
13
|
+
w = RestClient.get("https://raw.githubusercontent.com/markwilkinson/FAIR-Signposting-Harvester/master/lib/warnings.json")
|
14
|
+
#@warn = File.read("./lib/warnings.json")
|
15
|
+
@warn = JSON.parse(w)
|
15
16
|
end
|
16
17
|
|
17
18
|
def merge_hash(hash)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
module
|
3
|
+
module HarvesterTools
|
4
4
|
class Error < StandardError
|
5
5
|
end
|
6
6
|
|
@@ -9,17 +9,25 @@ module FspHarvester
|
|
9
9
|
|
10
10
|
@@distillerknown = {}
|
11
11
|
|
12
|
-
def initialize(metadata_object:
|
12
|
+
def initialize(metadata_object: HarvesterTools::MetadataObject.new)
|
13
13
|
@meta = metadata_object
|
14
14
|
end
|
15
15
|
|
16
|
-
def process_html(body:, uri:)
|
17
|
-
|
18
|
-
tools.
|
19
|
-
tools.
|
16
|
+
def process_html(body:, uri:, metadata:)
|
17
|
+
@meta = metadata
|
18
|
+
tools = HarvesterTools::ExternalTools.new(metadata: @meta)
|
19
|
+
result = tools.process_with_distiller(body: body)
|
20
|
+
|
21
|
+
jsonld, microdata, microformat, opengraph, rdfa = tools.process_with_extruct(uri: uri)
|
22
|
+
parse_rdf(body: jsonld, content_type: 'application/ld+json')
|
23
|
+
@meta.merge_hash(microdata)
|
24
|
+
@meta.merge_hash(microformat)
|
25
|
+
@meta.merge_hash(opengraph)
|
26
|
+
parse_rdf(body: rdfa, content_type: 'application/ld+json')
|
20
27
|
end
|
21
28
|
|
22
|
-
def process_xml(body:)
|
29
|
+
def process_xml(body:, metadata:)
|
30
|
+
@meta = metadata
|
23
31
|
begin
|
24
32
|
hash = XmlSimple.xml_in(body)
|
25
33
|
rescue
|
@@ -30,7 +38,8 @@ module FspHarvester
|
|
30
38
|
@meta.hash.merge hash
|
31
39
|
end
|
32
40
|
|
33
|
-
def process_json(body:)
|
41
|
+
def process_json(body:, metadata:)
|
42
|
+
@meta = metadata
|
34
43
|
begin
|
35
44
|
hash = JSON.parse(body)
|
36
45
|
rescue
|
@@ -41,11 +50,17 @@ module FspHarvester
|
|
41
50
|
@meta.hash.merge hash
|
42
51
|
end
|
43
52
|
|
44
|
-
def process_ld(body:, content_type:)
|
45
|
-
|
53
|
+
def process_ld(body:, content_type:, metadata:)
|
54
|
+
@meta = metadata
|
55
|
+
parse_rdf(body: body, content_type: content_type, metadata: @meta)
|
56
|
+
end
|
57
|
+
|
58
|
+
def parse_rdf(body:, content_type:, metadata:)
|
59
|
+
self.class.parse_rdf(body: body, content_type: content_type, metadata: metadata)
|
46
60
|
end
|
47
61
|
|
48
|
-
def parse_rdf(body:, content_type:)
|
62
|
+
def self.parse_rdf(body:, content_type:, metadata:)
|
63
|
+
@meta = metadata
|
49
64
|
unless body
|
50
65
|
@meta.comments << "CRITICAL: The response message body component appears to have no content.\n"
|
51
66
|
@meta.add_warning(['018', '', ''])
|
@@ -65,7 +80,7 @@ module FspHarvester
|
|
65
80
|
return
|
66
81
|
end
|
67
82
|
|
68
|
-
graph =
|
83
|
+
graph = HarvesterTools::Cache.checkRDFCache(body: body)
|
69
84
|
if graph.size > 0
|
70
85
|
warn "\n\n\n unmarshalling graph from cache\n\ngraph size #{graph.size}\n\n"
|
71
86
|
@meta.merge_rdf(graph.to_a)
|
@@ -88,7 +103,7 @@ module FspHarvester
|
|
88
103
|
end
|
89
104
|
reader = rdfformat.reader.new(body) # have to re-read it here, but now its safe because we have already caught errors
|
90
105
|
warn 'WRITING TO CACHE'
|
91
|
-
|
106
|
+
HarvesterTools::Cache.writeRDFCache(reader: reader, body: body) # write to the special RDF graph cache
|
92
107
|
warn 'WRITING DONE'
|
93
108
|
reader = rdfformat.reader.new(body) # frustrating that we cannot rewind!
|
94
109
|
warn 'RE-READING DONE'
|
data/lib/signposting_tests.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
def check_for_citeas_conflicts(citeas: )
|
1
|
+
def check_for_citeas_conflicts(citeas:, metadata: )
|
2
|
+
@meta = metadata
|
2
3
|
@meta.comments << 'INFO: checking for conflicting cite-as links'
|
3
4
|
citeas_hrefs = Hash.new
|
4
5
|
citeas.each do |link|
|
@@ -6,7 +7,7 @@ def check_for_citeas_conflicts(citeas: )
|
|
6
7
|
@meta.comments << "INFO: Adding citeas #{link.href} to the testing queue."
|
7
8
|
citeas_hrefs[link.href] = link
|
8
9
|
end
|
9
|
-
|
10
|
+
#warn "finalhash #{citeas_hrefs}"
|
10
11
|
if citeas_hrefs.length > 1
|
11
12
|
@meta.comments << 'INFO: Found multiple non-identical cite-as links.'
|
12
13
|
@meta.add_warning(['007', '', ''])
|
@@ -16,7 +17,8 @@ def check_for_citeas_conflicts(citeas: )
|
|
16
17
|
end
|
17
18
|
|
18
19
|
|
19
|
-
def check_describedby_rules(describedby:)
|
20
|
+
def check_describedby_rules(describedby:, metadata:)
|
21
|
+
@meta = metadata
|
20
22
|
describedby.each do |l|
|
21
23
|
unless l.respond_to? 'type'
|
22
24
|
@meta.add_warning(['005', l.href, ''])
|
@@ -25,7 +27,7 @@ def check_describedby_rules(describedby:)
|
|
25
27
|
type = l.type if l.respond_to? 'type'
|
26
28
|
type ||= '*/*'
|
27
29
|
header = { accept: type }
|
28
|
-
response =
|
30
|
+
response = HarvesterTools::WebUtils.fspfetch(url: l.href, headers: header, method: :head)
|
29
31
|
if response
|
30
32
|
responsetype = response.headers[:content_type]
|
31
33
|
@meta.comments << "INFO: describedby link responds with content type #{responsetype}\n"
|
@@ -51,7 +53,8 @@ def check_describedby_rules(describedby:)
|
|
51
53
|
end
|
52
54
|
end
|
53
55
|
|
54
|
-
def check_item_rules(item:)
|
56
|
+
def check_item_rules(item:, metadata:)
|
57
|
+
@meta = metadata
|
55
58
|
item.each do |l| # l = LinkHeaders::Link
|
56
59
|
unless l.respond_to? 'type'
|
57
60
|
@meta.add_warning(['011', l.href, ''])
|
@@ -60,7 +63,7 @@ def check_item_rules(item:)
|
|
60
63
|
type = l.type if l.respond_to? 'type'
|
61
64
|
type ||= '*/*' # this becomes a frozen string
|
62
65
|
header = { accept: type }
|
63
|
-
response =
|
66
|
+
response = HarvesterTools::WebUtils.fspfetch(url: l.href, headers: header, method: :head)
|
64
67
|
|
65
68
|
if response
|
66
69
|
if response.headers[:content_type] and type != '*/*'
|
data/lib/warnings.json
CHANGED
@@ -1,110 +1,119 @@
|
|
1
1
|
{
|
2
2
|
"001": {
|
3
3
|
"message": "Unable to resolve guid using default (*/*) Accept headers",
|
4
|
-
"linkout": "",
|
4
|
+
"linkout": [{"FAIR Principle": "https://www.go-fair.org/fair-principles/metadata-retrievable-identifier-standardised-communication-protocol/"},
|
5
|
+
{"FAIRsharing": "https://doi.org/10.25504/FAIRsharing.cd2f9e"}
|
6
|
+
],
|
5
7
|
"severity": "WARN"
|
6
8
|
},
|
7
9
|
"002": {
|
8
10
|
"message": "HTTP Response (203) is non-authoritative",
|
9
|
-
"linkout": "",
|
11
|
+
"linkout": [{"Documentation": "https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/203"}],
|
10
12
|
"severity": "WARN"
|
11
13
|
},
|
12
14
|
"003": {
|
13
15
|
"message": "HTTP Response indicates failure (500-range)",
|
14
|
-
"linkout": "",
|
16
|
+
"linkout": [{"Documentation": "https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/500"}],
|
15
17
|
"severity": "WARN"
|
16
18
|
},
|
17
19
|
"004": {
|
18
20
|
"message": "The resource does not follow the FAIR Signposting standard, which requires exactly one cite-as header, and at least one describedby header",
|
19
|
-
"linkout": "",
|
21
|
+
"linkout": [{"Documentation": "http://www.signposting.org/FAIR/#level1"}],
|
20
22
|
"severity": "WARN"
|
21
23
|
},
|
22
24
|
"005": {
|
23
25
|
"message": "The resource does not follow the FAIR Signposting standard, which requires any describedby links to also have a 'type' attribute indicating the Accept headers that should be sent with the request",
|
24
|
-
"linkout": "",
|
26
|
+
"linkout": [{"Documentation": "http://www.signposting.org/FAIR/#level1"}],
|
25
27
|
"severity": "WARN"
|
26
28
|
},
|
27
29
|
"006": {
|
28
30
|
"message": "GUID type not recognized",
|
29
|
-
"linkout": "",
|
31
|
+
"linkout": [{"FAIRsharing": "https://fairsharing.org/search?fairsharingRegistry=Standard&recordType=identifier_schema&page=1"}],
|
30
32
|
"severity": "WARN"
|
31
33
|
},
|
32
34
|
"007": {
|
33
35
|
"message": "Conflicting cite-as links",
|
34
|
-
"linkout": "",
|
36
|
+
"linkout": [{"Documentation": "http://www.signposting.org/FAIR/#level1"}],
|
35
37
|
"severity": "WARN"
|
36
38
|
},
|
37
39
|
"008": {
|
38
40
|
"message": "describedby link does not resolve",
|
39
|
-
"linkout": "",
|
41
|
+
"linkout": [{"Documentation": "http://www.signposting.org/FAIR/#level1"}],
|
40
42
|
"severity": "WARN"
|
41
43
|
},
|
42
44
|
"009": {
|
43
45
|
"message": "Content-type of described-by link does not match the type attribute in the link header itself",
|
44
|
-
"linkout": "",
|
46
|
+
"linkout": [{"Documentation": "http://www.signposting.org/FAIR/#level1"},
|
47
|
+
{"Documentation": "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type"}],
|
45
48
|
"severity": "WARN"
|
46
49
|
},
|
47
50
|
"010": {
|
48
51
|
"message": "Content-type of response from described-by link is undefined or cannot be compared to the link type",
|
49
|
-
"linkout": "",
|
52
|
+
"linkout": [{"Documentation": "http://www.signposting.org/FAIR/#level1"}],
|
50
53
|
"severity": "WARN"
|
51
54
|
},
|
52
55
|
"011": {
|
53
56
|
"message": "The resource does not follow the FAIR Signposting standard, which encourages any item links to have a type attribute",
|
54
|
-
"linkout": "",
|
57
|
+
"linkout": [{"Documentation": "http://www.signposting.org/FAIR/#level1"}],
|
55
58
|
"severity": "WARN"
|
56
59
|
},
|
57
60
|
"012": {
|
58
61
|
"message": "Content-type of response from resolving an item doesn't match the item type attribute in the link header",
|
59
|
-
"linkout": "",
|
62
|
+
"linkout": [{"Documentation": "http://www.signposting.org/FAIR/#level1"},
|
63
|
+
{"Documentation": "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type"}],
|
60
64
|
"severity": "WARN"
|
61
65
|
},
|
62
66
|
"013": {
|
63
67
|
"message": "Content-type of response from resolving an item is undefined or cannot be compared to the link type",
|
64
|
-
"linkout": "",
|
68
|
+
"linkout": [{"Documentation": "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type"}],
|
65
69
|
"severity": "WARN"
|
66
70
|
},
|
67
71
|
"014": {
|
68
72
|
"message": "Item link does not resolve",
|
69
|
-
"linkout": "",
|
73
|
+
"linkout": [{"Documentation": "http://www.signposting.org/FAIR/#level1"}],
|
70
74
|
"severity": "WARN"
|
71
75
|
},
|
72
76
|
"015": {
|
73
77
|
"message": "Link headers do not include a link of type 'type', as required by the FAIR Signposting specification",
|
74
|
-
"linkout": "",
|
78
|
+
"linkout": [{"Documentation": "http://www.signposting.org/FAIR/#level1"}],
|
75
79
|
"severity": "WARN"
|
76
80
|
},
|
77
81
|
"016": {
|
78
82
|
"message": "Unable to resolve describedby link using Accept headers with the MIME type indicated in the link",
|
79
|
-
"linkout": "",
|
83
|
+
"linkout": [{"Documentation": "https://developer.mozilla.org/en-US/docs/Web/HTTP/Content_negotiation"}],
|
80
84
|
"severity": "WARN"
|
81
85
|
},
|
82
86
|
"017": {
|
83
87
|
"message": "Metadata format not recognized.",
|
84
|
-
"linkout": "",
|
88
|
+
"linkout": [{"FAIRsharing": "https://fairsharing.org/search?subjects=Computer%2520Science,subject%2520agnostic&page=1&recordType=model_and_format"}],
|
85
89
|
"severity": "WARN"
|
86
90
|
},
|
87
91
|
"018": {
|
88
92
|
"message": "RDF parsing error - likely malformed RDF document.",
|
89
|
-
"linkout": "",
|
93
|
+
"linkout": [{"FAIRsharing": "https://fairsharing.org/FAIRsharing.p77ph9"},
|
94
|
+
{"Documentation": "http://www.w3.org/TR/2014/REC-rdf11-concepts-20140225/"},
|
95
|
+
{"Validator": "http://rdf.greggkellogg.net/distiller"}],
|
90
96
|
"severity": "WARN"
|
91
97
|
},
|
92
98
|
"019": {
|
93
99
|
"message": "HTML parsing error - unable to extract linked data from HTML.",
|
94
|
-
"linkout": "",
|
100
|
+
"linkout": [{"FAIRsharing": "https://fairsharing.org/FAIRsharing.YugnuL"},
|
101
|
+
{"Documentation": "https://www.w3.org/TR/html53/"},
|
102
|
+
{"validator": "https://validator.w3.org/"}],
|
95
103
|
"severity": "WARN"
|
96
104
|
},
|
97
105
|
"020": {
|
98
106
|
"message": "XML parsing error - unable to process XML document.",
|
99
|
-
"linkout": "",
|
107
|
+
"linkout": [{"FAIRsharing": "https://fairsharing.org/FAIRsharing.b5cc91"},
|
108
|
+
{"Documentation": "https://www.w3.org/TR/xml/"},
|
109
|
+
{"Validator": "https://www.xmlvalidation.com/"}],
|
100
110
|
"severity": "WARN"
|
101
111
|
},
|
102
112
|
"021": {
|
103
113
|
"message": "JSON parsing error - unable to process JSON document.",
|
104
|
-
"linkout": "",
|
114
|
+
"linkout": [{"FAIRsharing": "https://fairsharing.org/FAIRsharing.5bbab9"},
|
115
|
+
{"Documentation": "http://dx.doi.org/10.17487/RFC8259"},
|
116
|
+
{"Validator": "https://jsononline.net/json-validator"}],
|
105
117
|
"severity": "WARN"
|
106
118
|
}
|
107
|
-
|
108
|
-
|
109
|
-
|
110
119
|
}
|
data/lib/web_utils.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
module
|
1
|
+
module HarvesterTools
|
2
2
|
|
3
3
|
class WebUtils
|
4
|
-
def self.fspfetch(url:, headers: ACCEPT_ALL_HEADER, method: :get, meta:
|
4
|
+
def self.fspfetch(url:, headers: ACCEPT_ALL_HEADER, method: :get, meta: HarvesterTools::MetadataObject.new)
|
5
5
|
warn 'In fetch routine now. '
|
6
6
|
|
7
7
|
begin
|
@@ -23,7 +23,7 @@ module FspHarvester
|
|
23
23
|
end
|
24
24
|
response
|
25
25
|
rescue RestClient::ExceptionWithResponse => e
|
26
|
-
warn "EXCEPTION WITH RESPONSE! #{e.response}\
|
26
|
+
warn "EXCEPTION WITH RESPONSE! #{e.response.code} with response #{e.response}\nfailed response headers: #{e.response.headers}"
|
27
27
|
meta.warnings << ["003", url, headers]
|
28
28
|
meta.comments << "WARN: HTTP error #{e} encountered when trying to resolve #{url}\n"
|
29
29
|
if (e.response.code == 500 or e.response.code == 404)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fsp_harvester
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mark Wilkinson
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-08-
|
11
|
+
date: 2022-08-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: json
|
@@ -44,14 +44,14 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 0.1.
|
47
|
+
version: 0.1.17
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: 0.1.
|
54
|
+
version: 0.1.17
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: metainspector
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -182,17 +182,19 @@ files:
|
|
182
182
|
- bin/setup
|
183
183
|
- example_test.rb
|
184
184
|
- launch.json
|
185
|
+
- lib/config.conf
|
185
186
|
- lib/config.conf_docker
|
186
187
|
- lib/config.conf_local
|
187
188
|
- lib/constants.rb
|
189
|
+
- lib/external_tools.rb
|
188
190
|
- lib/fsp_harvester.rb
|
189
191
|
- lib/fsp_harvester/version.rb
|
190
|
-
- lib/
|
191
|
-
- lib/
|
192
|
-
- lib/
|
192
|
+
- lib/harvester.rb
|
193
|
+
- lib/harvester_utils.rb
|
194
|
+
- lib/metadata_harvester.rb
|
193
195
|
- lib/metadata_object.rb
|
196
|
+
- lib/metadata_parser.rb
|
194
197
|
- lib/signposting_tests.rb
|
195
|
-
- lib/swagger.rb
|
196
198
|
- lib/warnings.json
|
197
199
|
- lib/web_utils.rb
|
198
200
|
homepage: https://github.com/markwilkinson/FAIR-Signposting-Harvester
|