fsp_harvester 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f24e34b5239426a8555e5d893cb3692a1f03442f6b4af3c03c5751c975a7871b
4
- data.tar.gz: d372b73eb7693e5a4c9a2f78e20d02b62f3c195ed4185db7532018a45a694570
3
+ metadata.gz: 7da32f3321193e93f64154c35db0c840f5e7f086451660f99b62d3f4c834e295
4
+ data.tar.gz: 5a0a1ff4ef6b2100accd8bab4f20d6842d25ebf88d5e600e646804da9ed24bd9
5
5
  SHA512:
6
- metadata.gz: e85c8ba90bee37156e8a4d8e98ec0d8c2148ffc86e24ac3faf0adde1146efaafa2333e7e25acf2b5b4d05aa1f2a9a411deb18890175e93bd1b8d0980773e42c2
7
- data.tar.gz: f20559315f1b9aff81978600f50743fdee2d3b200eae0dc375ccf267fda90909cb16a6e43686a5efea0f6c583c90c4511c7390702ad7f9e9704f80f829da128b
6
+ metadata.gz: dad90f81b73489a151220c132d74508832573a352b209a537bbbaf6543a90b9e132cca80ecca33d15d02eca91841642d73e795113076f4564730194b5bf1fa53
7
+ data.tar.gz: 5d11c2f002f4e73a4971aec0d047cd14d35f6763155d0effa9c79f419ff6fd26f853158b290d2264edd148633570f5ecd50d4e1f6c7e9d136c7d2880517bdefc
data/Gemfile.lock CHANGED
@@ -1,10 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- fsp_harvester (0.1.6)
4
+ fsp_harvester (0.1.7)
5
5
  json (~> 2.0)
6
6
  linkeddata (~> 3.2)
7
- linkheaders-processor (~> 0.1.12)
7
+ linkheaders-processor (~> 0.1.13)
8
8
  metainspector (~> 5.11.2)
9
9
  parseconfig (~> 1.1)
10
10
  rake (~> 13.0)
@@ -126,7 +126,7 @@ GEM
126
126
  shex (~> 0.7)
127
127
  sparql (~> 3.2)
128
128
  sparql-client (~> 3.2)
129
- linkheaders-processor (0.1.12)
129
+ linkheaders-processor (0.1.13)
130
130
  json (~> 2.0)
131
131
  json-ld (~> 3.2)
132
132
  json-ld-preloaded (~> 3.2)
data/Rakefile CHANGED
@@ -1,11 +1,11 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "bundler/gem_tasks"
4
- require "rspec/core/rake_task"
3
+ require 'bundler/gem_tasks'
4
+ require 'rspec/core/rake_task'
5
5
 
6
6
  RSpec::Core::RakeTask.new(:spec)
7
7
 
8
- require "rubocop/rake_task"
8
+ require 'rubocop/rake_task'
9
9
 
10
10
  RuboCop::RakeTask.new
11
11
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module FspHarvester
4
- VERSION = "0.1.6"
4
+ VERSION = "0.1.7" # up to date
5
5
  end
data/lib/fsp_harvester.rb CHANGED
@@ -19,6 +19,7 @@ require 'rdf/xsd'
19
19
  require_relative './metadata_object'
20
20
  require_relative './constants'
21
21
  require_relative './web_utils'
22
+ require_relative './signposting_tests'
22
23
 
23
24
  module FspHarvester
24
25
  class Error < StandardError
@@ -27,14 +28,15 @@ module FspHarvester
27
28
  class Utils
28
29
  # @@distillerknown = {} # global, hash of sha256 keys of message bodies - have they been seen before t/f
29
30
  # @warnings = JSON.parse(File.read("warnings.json"))
30
- @meta = FspHarvester::MetadataObject.new
31
+
31
32
 
32
33
  def self.resolve_guid(guid:)
34
+ @meta = FspHarvester::MetadataObject.new
33
35
  @meta.finalURI = [guid]
34
36
  type, url = convertToURL(guid: guid)
35
- links = []
37
+ links = Array.new
36
38
  if type
37
- links, @meta = resolve_url(url: url)
39
+ links = resolve_url(url: url)
38
40
  else
39
41
  @meta.warnings << ['006', guid, '']
40
42
  @meta.comments << "FATAL: GUID type not recognized.\n"
@@ -75,14 +77,14 @@ module FspHarvester
75
77
  unless response
76
78
  @meta.warnings << ['001', url, header]
77
79
  @meta.comments << "WARN: Unable to resolve #{url} using HTTP Accept header #{header}.\n"
78
- return [[], @meta]
80
+ return []
79
81
  end
80
82
 
81
83
  @meta.comments << "INFO: following redirection using this header led to the following URL: #{@meta.finalURI.last}. Using the output from this URL for the next few tests..."
82
84
  @meta.full_response << response.body
83
85
 
84
86
  links = process_link_headers(response: response) unless nolinkheaders
85
- [links, @meta]
87
+ links
86
88
  end
87
89
 
88
90
  def self.process_link_headers(response:)
@@ -92,111 +94,42 @@ module FspHarvester
92
94
  parser.extract_and_parse(response: response)
93
95
  factory = parser.factory # LinkHeaders::LinkFactory
94
96
 
95
- warn "\n\n length #{factory.all_links.length}\n\n"
97
+ warn "\n\n length bfore #{factory.all_links.length}\n\n"
96
98
  signpostingcheck(factory: factory)
99
+ warn "\n\n length aftr #{factory.all_links.length}\n\n"
100
+ warn "\n\n links #{factory.all_links}\n\n"
101
+ factory.all_links
97
102
  end
98
103
 
99
104
  def self.signpostingcheck(factory:)
100
- citeas = 0
101
- describedby = 0
105
+ citeas = Array.new
106
+ describedby = Array.new
107
+ item = Array.new
102
108
  factory.all_links.each do |l|
103
109
  case l.relation
104
110
  when 'cite-as'
105
- citeas += 1
111
+ citeas << l
106
112
  when 'item'
107
- if !(l.respond_to? 'type')
108
- @meta.warnings << ['011', l.href, '']
109
- @meta.comments << "WARN: The resource does not follow the FAIR Signposting standard, which encourages any item links to also have a 'type' attribute.\n"
110
- end
111
- type = l.type if l.respond_to? 'type'
112
- type = '*/*' unless type # this becomes a frozen string
113
- header = { accept: type }
114
- response = FspHarvester::WebUtils.fspfetch(url: l.href, headers: header, method: :head)
115
-
116
- if response
117
- if response.headers[:content_type] and !(type == '*/*')
118
- rtype = type.gsub(%r{/}, "\/") # because type is a frozen string
119
- rtype = rtype.gsub(/\+/, '.')
120
- typeregex = Regexp.new(type)
121
- if response.headers[:content_type].match(typeregex)
122
- warn response.headers[:content_type]
123
- warn typeregex.inspect
124
- @meta.comments << "INFO: item link responds according to Signposting specifications\n"
125
- else
126
- @meta.warnings << ['012', l.href, header]
127
- @meta.comments << "WARN: Content type of returned item link does not match the 'type' attribute\n"
128
- end
129
- else
130
- @meta.warnings << ['013', l.href, header]
131
- @meta.comments << "WARN: Content type of returned item link is not specified in response headers or cannot be matched against accept headers\n"
132
- end
133
- else
134
- @meta.warnings << ['014', l.href, header]
135
- @meta.comments << "WARN: item link doesn't resolve\n"
136
- end
137
-
113
+ item << l
138
114
  when 'describedby'
139
- describedby += 1
140
- if !(l.respond_to? 'type')
141
- @meta.warnings << ['005', l.href, '']
142
- @meta.comments << "WARN: The resource does not follow the FAIR Signposting standard, which requires any describedby links to also have a 'type' attribute.\n"
143
- end
144
- type = l.type if l.respond_to? 'type'
145
- type = '*/*' unless type
146
- header = { accept: type }
147
- response = FspHarvester::WebUtils.fspfetch(url: l.href, headers: header, method: :head)
148
- if response
149
- if response.headers[:content_type] and !(type == '*/*')
150
- rtype = type.gsub(%r{/}, "\/")
151
- rtype = rtype.gsub(/\+/, '.')
152
- typeregex = Regexp.new(rtype)
153
- if response.headers[:content_type].match(typeregex)
154
- warn response.headers[:content_type]
155
- warn typeregex.inspect
156
- @meta.comments << "INFO: describedby link responds according to Signposting specifications\n"
157
- else
158
- @meta.warnings << ['009', l.href, header]
159
- @meta.comments << "WARN: Content type of returned describedby link does not match the 'type' attribute\n"
160
- end
161
- else
162
- @meta.warnings << ['010', l.href, header]
163
- @meta.comments << "WARN: Content type of returned describedby link is not specified in response headers or cannot be matched against accept headers\n"
164
- end
165
- else
166
- @meta.warnings << ['008', l.href, header]
167
- @meta.comments << "WARN: describedby link doesn't resolve\n"
168
- end
115
+ describedby << l
169
116
  end
170
117
  end
171
- if citeas > 1
118
+
119
+ check_describedby_rules(describedby: describedby)
120
+ check_item_rules(item: item)
121
+
122
+ uniqueciteas = Array.new
123
+ if citeas.length > 1
124
+ warn "INFO: multiple cite-as links found. Checking for conflicts\n"
172
125
  @meta.comments << "INFO: multiple cite-as links found. Checking for conflicts\n"
173
- citeas = check_for_citeas_conflicts(factory: factory) # this adds to the metadata objects if there are conflicts, returns the list of unique citeas (SHOULD ONLY BE ONE!)
126
+ uniqueciteas = check_for_citeas_conflicts(citeas: citeas) # this adds to the metadata objects if there are conflicts, returns the list of unique citeas (SHOULD ONLY BE ONE!)
174
127
  end
175
128
 
176
- unless citeas == 1 && describedby > 0
129
+ unless uniqueciteas == 1 && describedby.length > 0
177
130
  @meta.warnings << ['004', '', '']
178
131
  @meta.comments << "WARN: The resource does not follow the FAIR Signposting standard, which requires exactly one cite-as header, and at least one describedby header\n"
179
132
  end
180
- factory.all_links
181
- end
182
-
183
- def self.check_for_citeas_conflicts(factory:)
184
- @meta.comments << 'INFO: checking for conflicting cite-as links'
185
- citeas = []
186
- factory.all_links.each do |link|
187
- next unless link.relation == 'cite-as'
188
-
189
- @meta.comments << "INFO: Adding citeas #{link.href} to the testing queue."
190
- citeas << link.href
191
- end
192
-
193
- if citeas.uniq.length == 1
194
- @meta.comments << 'INFO: No conflicting cite-as links found.'
195
- else # only one allowed!
196
- @meta.warnings << ['007', '', '']
197
- @meta.comments << "WARN: The resource does not follow the FAIR Signposting standard: Found conflicting cite-as link headers\n"
198
- end
199
- citeas.uniq
200
133
  end
201
134
  end
202
135
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fsp_harvester
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mark Wilkinson
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-07-28 00:00:00.000000000 Z
11
+ date: 2022-08-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json
@@ -44,14 +44,14 @@ dependencies:
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: 0.1.12
47
+ version: 0.1.13
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: 0.1.12
54
+ version: 0.1.13
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: metainspector
57
57
  requirement: !ruby/object:Gem::Requirement