fsp_harvester 0.1.6 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f24e34b5239426a8555e5d893cb3692a1f03442f6b4af3c03c5751c975a7871b
4
- data.tar.gz: d372b73eb7693e5a4c9a2f78e20d02b62f3c195ed4185db7532018a45a694570
3
+ metadata.gz: 7da32f3321193e93f64154c35db0c840f5e7f086451660f99b62d3f4c834e295
4
+ data.tar.gz: 5a0a1ff4ef6b2100accd8bab4f20d6842d25ebf88d5e600e646804da9ed24bd9
5
5
  SHA512:
6
- metadata.gz: e85c8ba90bee37156e8a4d8e98ec0d8c2148ffc86e24ac3faf0adde1146efaafa2333e7e25acf2b5b4d05aa1f2a9a411deb18890175e93bd1b8d0980773e42c2
7
- data.tar.gz: f20559315f1b9aff81978600f50743fdee2d3b200eae0dc375ccf267fda90909cb16a6e43686a5efea0f6c583c90c4511c7390702ad7f9e9704f80f829da128b
6
+ metadata.gz: dad90f81b73489a151220c132d74508832573a352b209a537bbbaf6543a90b9e132cca80ecca33d15d02eca91841642d73e795113076f4564730194b5bf1fa53
7
+ data.tar.gz: 5d11c2f002f4e73a4971aec0d047cd14d35f6763155d0effa9c79f419ff6fd26f853158b290d2264edd148633570f5ecd50d4e1f6c7e9d136c7d2880517bdefc
data/Gemfile.lock CHANGED
@@ -1,10 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- fsp_harvester (0.1.6)
4
+ fsp_harvester (0.1.7)
5
5
  json (~> 2.0)
6
6
  linkeddata (~> 3.2)
7
- linkheaders-processor (~> 0.1.12)
7
+ linkheaders-processor (~> 0.1.13)
8
8
  metainspector (~> 5.11.2)
9
9
  parseconfig (~> 1.1)
10
10
  rake (~> 13.0)
@@ -126,7 +126,7 @@ GEM
126
126
  shex (~> 0.7)
127
127
  sparql (~> 3.2)
128
128
  sparql-client (~> 3.2)
129
- linkheaders-processor (0.1.12)
129
+ linkheaders-processor (0.1.13)
130
130
  json (~> 2.0)
131
131
  json-ld (~> 3.2)
132
132
  json-ld-preloaded (~> 3.2)
data/Rakefile CHANGED
@@ -1,11 +1,11 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "bundler/gem_tasks"
4
- require "rspec/core/rake_task"
3
+ require 'bundler/gem_tasks'
4
+ require 'rspec/core/rake_task'
5
5
 
6
6
  RSpec::Core::RakeTask.new(:spec)
7
7
 
8
- require "rubocop/rake_task"
8
+ require 'rubocop/rake_task'
9
9
 
10
10
  RuboCop::RakeTask.new
11
11
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module FspHarvester
4
- VERSION = "0.1.6"
4
+ VERSION = "0.1.7" # up to date
5
5
  end
data/lib/fsp_harvester.rb CHANGED
@@ -19,6 +19,7 @@ require 'rdf/xsd'
19
19
  require_relative './metadata_object'
20
20
  require_relative './constants'
21
21
  require_relative './web_utils'
22
+ require_relative './signposting_tests'
22
23
 
23
24
  module FspHarvester
24
25
  class Error < StandardError
@@ -27,14 +28,15 @@ module FspHarvester
27
28
  class Utils
28
29
  # @@distillerknown = {} # global, hash of sha256 keys of message bodies - have they been seen before t/f
29
30
  # @warnings = JSON.parse(File.read("warnings.json"))
30
- @meta = FspHarvester::MetadataObject.new
31
+
31
32
 
32
33
  def self.resolve_guid(guid:)
34
+ @meta = FspHarvester::MetadataObject.new
33
35
  @meta.finalURI = [guid]
34
36
  type, url = convertToURL(guid: guid)
35
- links = []
37
+ links = Array.new
36
38
  if type
37
- links, @meta = resolve_url(url: url)
39
+ links = resolve_url(url: url)
38
40
  else
39
41
  @meta.warnings << ['006', guid, '']
40
42
  @meta.comments << "FATAL: GUID type not recognized.\n"
@@ -75,14 +77,14 @@ module FspHarvester
75
77
  unless response
76
78
  @meta.warnings << ['001', url, header]
77
79
  @meta.comments << "WARN: Unable to resolve #{url} using HTTP Accept header #{header}.\n"
78
- return [[], @meta]
80
+ return []
79
81
  end
80
82
 
81
83
  @meta.comments << "INFO: following redirection using this header led to the following URL: #{@meta.finalURI.last}. Using the output from this URL for the next few tests..."
82
84
  @meta.full_response << response.body
83
85
 
84
86
  links = process_link_headers(response: response) unless nolinkheaders
85
- [links, @meta]
87
+ links
86
88
  end
87
89
 
88
90
  def self.process_link_headers(response:)
@@ -92,111 +94,42 @@ module FspHarvester
92
94
  parser.extract_and_parse(response: response)
93
95
  factory = parser.factory # LinkHeaders::LinkFactory
94
96
 
95
- warn "\n\n length #{factory.all_links.length}\n\n"
97
+ warn "\n\n length bfore #{factory.all_links.length}\n\n"
96
98
  signpostingcheck(factory: factory)
99
+ warn "\n\n length aftr #{factory.all_links.length}\n\n"
100
+ warn "\n\n links #{factory.all_links}\n\n"
101
+ factory.all_links
97
102
  end
98
103
 
99
104
  def self.signpostingcheck(factory:)
100
- citeas = 0
101
- describedby = 0
105
+ citeas = Array.new
106
+ describedby = Array.new
107
+ item = Array.new
102
108
  factory.all_links.each do |l|
103
109
  case l.relation
104
110
  when 'cite-as'
105
- citeas += 1
111
+ citeas << l
106
112
  when 'item'
107
- if !(l.respond_to? 'type')
108
- @meta.warnings << ['011', l.href, '']
109
- @meta.comments << "WARN: The resource does not follow the FAIR Signposting standard, which encourages any item links to also have a 'type' attribute.\n"
110
- end
111
- type = l.type if l.respond_to? 'type'
112
- type = '*/*' unless type # this becomes a frozen string
113
- header = { accept: type }
114
- response = FspHarvester::WebUtils.fspfetch(url: l.href, headers: header, method: :head)
115
-
116
- if response
117
- if response.headers[:content_type] and !(type == '*/*')
118
- rtype = type.gsub(%r{/}, "\/") # because type is a frozen string
119
- rtype = rtype.gsub(/\+/, '.')
120
- typeregex = Regexp.new(type)
121
- if response.headers[:content_type].match(typeregex)
122
- warn response.headers[:content_type]
123
- warn typeregex.inspect
124
- @meta.comments << "INFO: item link responds according to Signposting specifications\n"
125
- else
126
- @meta.warnings << ['012', l.href, header]
127
- @meta.comments << "WARN: Content type of returned item link does not match the 'type' attribute\n"
128
- end
129
- else
130
- @meta.warnings << ['013', l.href, header]
131
- @meta.comments << "WARN: Content type of returned item link is not specified in response headers or cannot be matched against accept headers\n"
132
- end
133
- else
134
- @meta.warnings << ['014', l.href, header]
135
- @meta.comments << "WARN: item link doesn't resolve\n"
136
- end
137
-
113
+ item << l
138
114
  when 'describedby'
139
- describedby += 1
140
- if !(l.respond_to? 'type')
141
- @meta.warnings << ['005', l.href, '']
142
- @meta.comments << "WARN: The resource does not follow the FAIR Signposting standard, which requires any describedby links to also have a 'type' attribute.\n"
143
- end
144
- type = l.type if l.respond_to? 'type'
145
- type = '*/*' unless type
146
- header = { accept: type }
147
- response = FspHarvester::WebUtils.fspfetch(url: l.href, headers: header, method: :head)
148
- if response
149
- if response.headers[:content_type] and !(type == '*/*')
150
- rtype = type.gsub(%r{/}, "\/")
151
- rtype = rtype.gsub(/\+/, '.')
152
- typeregex = Regexp.new(rtype)
153
- if response.headers[:content_type].match(typeregex)
154
- warn response.headers[:content_type]
155
- warn typeregex.inspect
156
- @meta.comments << "INFO: describedby link responds according to Signposting specifications\n"
157
- else
158
- @meta.warnings << ['009', l.href, header]
159
- @meta.comments << "WARN: Content type of returned describedby link does not match the 'type' attribute\n"
160
- end
161
- else
162
- @meta.warnings << ['010', l.href, header]
163
- @meta.comments << "WARN: Content type of returned describedby link is not specified in response headers or cannot be matched against accept headers\n"
164
- end
165
- else
166
- @meta.warnings << ['008', l.href, header]
167
- @meta.comments << "WARN: describedby link doesn't resolve\n"
168
- end
115
+ describedby << l
169
116
  end
170
117
  end
171
- if citeas > 1
118
+
119
+ check_describedby_rules(describedby: describedby)
120
+ check_item_rules(item: item)
121
+
122
+ uniqueciteas = Array.new
123
+ if citeas.length > 1
124
+ warn "INFO: multiple cite-as links found. Checking for conflicts\n"
172
125
  @meta.comments << "INFO: multiple cite-as links found. Checking for conflicts\n"
173
- citeas = check_for_citeas_conflicts(factory: factory) # this adds to the metadata objects if there are conflicts, returns the list of unique citeas (SHOULD ONLY BE ONE!)
126
+ uniqueciteas = check_for_citeas_conflicts(citeas: citeas) # this adds to the metadata objects if there are conflicts, returns the list of unique citeas (SHOULD ONLY BE ONE!)
174
127
  end
175
128
 
176
- unless citeas == 1 && describedby > 0
129
+ unless uniqueciteas == 1 && describedby.length > 0
177
130
  @meta.warnings << ['004', '', '']
178
131
  @meta.comments << "WARN: The resource does not follow the FAIR Signposting standard, which requires exactly one cite-as header, and at least one describedby header\n"
179
132
  end
180
- factory.all_links
181
- end
182
-
183
- def self.check_for_citeas_conflicts(factory:)
184
- @meta.comments << 'INFO: checking for conflicting cite-as links'
185
- citeas = []
186
- factory.all_links.each do |link|
187
- next unless link.relation == 'cite-as'
188
-
189
- @meta.comments << "INFO: Adding citeas #{link.href} to the testing queue."
190
- citeas << link.href
191
- end
192
-
193
- if citeas.uniq.length == 1
194
- @meta.comments << 'INFO: No conflicting cite-as links found.'
195
- else # only one allowed!
196
- @meta.warnings << ['007', '', '']
197
- @meta.comments << "WARN: The resource does not follow the FAIR Signposting standard: Found conflicting cite-as link headers\n"
198
- end
199
- citeas.uniq
200
133
  end
201
134
  end
202
135
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fsp_harvester
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mark Wilkinson
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-07-28 00:00:00.000000000 Z
11
+ date: 2022-08-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json
@@ -44,14 +44,14 @@ dependencies:
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: 0.1.12
47
+ version: 0.1.13
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: 0.1.12
54
+ version: 0.1.13
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: metainspector
57
57
  requirement: !ruby/object:Gem::Requirement