fsp_harvester 0.1.4 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ceb83f45b1094afb1bab24e948cf863c764cb9f045f42706017dd36c7507aa1f
4
- data.tar.gz: 41fdf2bfb9166cfaff70058523f442630967721541cb9edc1fc147932f0229b3
3
+ metadata.gz: 7da32f3321193e93f64154c35db0c840f5e7f086451660f99b62d3f4c834e295
4
+ data.tar.gz: 5a0a1ff4ef6b2100accd8bab4f20d6842d25ebf88d5e600e646804da9ed24bd9
5
5
  SHA512:
6
- metadata.gz: 7db52cf3b6fcfc8b4ab9a9f5b470ab97d24e192130eb54c3850a5b0c46a4dd4595748266f50322775d3078f7e372c0ba00c4a414342b1b7b82268565b230a173
7
- data.tar.gz: c2f2bc421c0c0dca304d4ed7e9393b023b9d1af4fc6e0457b334296010d0e08132f44fed64202afb52df913ba6a8b69e1c91b0309b6020f8d79a3f7cb7de2a03
6
+ metadata.gz: dad90f81b73489a151220c132d74508832573a352b209a537bbbaf6543a90b9e132cca80ecca33d15d02eca91841642d73e795113076f4564730194b5bf1fa53
7
+ data.tar.gz: 5d11c2f002f4e73a4971aec0d047cd14d35f6763155d0effa9c79f419ff6fd26f853158b290d2264edd148633570f5ecd50d4e1f6c7e9d136c7d2880517bdefc
data/Gemfile.lock CHANGED
@@ -1,10 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- fsp_harvester (0.1.4)
4
+ fsp_harvester (0.1.7)
5
5
  json (~> 2.0)
6
6
  linkeddata (~> 3.2)
7
- linkheaders-processor (~> 0.1.11)
7
+ linkheaders-processor (~> 0.1.13)
8
8
  metainspector (~> 5.11.2)
9
9
  parseconfig (~> 1.1)
10
10
  rake (~> 13.0)
@@ -126,7 +126,7 @@ GEM
126
126
  shex (~> 0.7)
127
127
  sparql (~> 3.2)
128
128
  sparql-client (~> 3.2)
129
- linkheaders-processor (0.1.11)
129
+ linkheaders-processor (0.1.13)
130
130
  json (~> 2.0)
131
131
  json-ld (~> 3.2)
132
132
  json-ld-preloaded (~> 3.2)
data/Rakefile CHANGED
@@ -1,11 +1,11 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "bundler/gem_tasks"
4
- require "rspec/core/rake_task"
3
+ require 'bundler/gem_tasks'
4
+ require 'rspec/core/rake_task'
5
5
 
6
6
  RSpec::Core::RakeTask.new(:spec)
7
7
 
8
- require "rubocop/rake_task"
8
+ require 'rubocop/rake_task'
9
9
 
10
10
  RuboCop::RakeTask.new
11
11
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module FspHarvester
4
- VERSION = "0.1.4"
4
+ VERSION = "0.1.7" # up to date
5
5
  end
data/lib/fsp_harvester.rb CHANGED
@@ -1,24 +1,25 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "fsp_harvester/version"
4
- require "json/ld"
5
- require "json/ld/preloaded"
6
- require "json"
7
- require "linkheaders/processor"
8
- require "addressable"
9
- require "tempfile"
10
- require "xmlsimple"
11
- require "nokogiri"
12
- require "parseconfig"
13
- require "rest-client"
14
- require "cgi"
15
- require "digest"
16
- require "open3"
17
- require "metainspector"
18
- require "rdf/xsd"
19
- require_relative "./metadata_object"
20
- require_relative "./constants"
21
- require_relative "./web_utils"
3
+ require_relative 'fsp_harvester/version'
4
+ require 'json/ld'
5
+ require 'json/ld/preloaded'
6
+ require 'json'
7
+ require 'linkheaders/processor'
8
+ require 'addressable'
9
+ require 'tempfile'
10
+ require 'xmlsimple'
11
+ require 'nokogiri'
12
+ require 'parseconfig'
13
+ require 'rest-client'
14
+ require 'cgi'
15
+ require 'digest'
16
+ require 'open3'
17
+ require 'metainspector'
18
+ require 'rdf/xsd'
19
+ require_relative './metadata_object'
20
+ require_relative './constants'
21
+ require_relative './web_utils'
22
+ require_relative './signposting_tests'
22
23
 
23
24
  module FspHarvester
24
25
  class Error < StandardError
@@ -27,33 +28,34 @@ module FspHarvester
27
28
  class Utils
28
29
  # @@distillerknown = {} # global, hash of sha256 keys of message bodies - have they been seen before t/f
29
30
  # @warnings = JSON.parse(File.read("warnings.json"))
30
- @meta = FspHarvester::MetadataObject.new
31
+
31
32
 
32
33
  def self.resolve_guid(guid:)
34
+ @meta = FspHarvester::MetadataObject.new
33
35
  @meta.finalURI = [guid]
34
36
  type, url = convertToURL(guid: guid)
35
37
  links = Array.new
36
- unless type
37
- @meta.warnings << ["006", guid, ""]
38
- @meta.comments << "FATAL: GUID type not recognized.\n"
38
+ if type
39
+ links = resolve_url(url: url)
39
40
  else
40
- links, @meta = resolve_url(url: url)
41
+ @meta.warnings << ['006', guid, '']
42
+ @meta.comments << "FATAL: GUID type not recognized.\n"
41
43
  end
42
44
  [links, @meta]
43
45
  end
44
46
 
45
47
  def self.convertToURL(guid:)
46
48
  GUID_TYPES.each do |k, regex|
47
- if k == "inchi" and regex.match(guid)
48
- return "inchi", "https://pubchem.ncbi.nlm.nih.gov/rest/rdf/inchikey/#{guid}"
49
- elsif k == "handle1" and regex.match(guid)
50
- return "handle", "http://hdl.handle.net/#{guid}"
51
- elsif k == "handle2" and regex.match(guid)
52
- return "handle", "http://hdl.handle.net/#{guid}"
53
- elsif k == "uri" and regex.match(guid)
54
- return "uri", guid
55
- elsif k == "doi" and regex.match(guid)
56
- return "doi", "https://doi.org/#{guid}"
49
+ if k == 'inchi' and regex.match(guid)
50
+ return 'inchi', "https://pubchem.ncbi.nlm.nih.gov/rest/rdf/inchikey/#{guid}"
51
+ elsif k == 'handle1' and regex.match(guid)
52
+ return 'handle', "http://hdl.handle.net/#{guid}"
53
+ elsif k == 'handle2' and regex.match(guid)
54
+ return 'handle', "http://hdl.handle.net/#{guid}"
55
+ elsif k == 'uri' and regex.match(guid)
56
+ return 'uri', guid
57
+ elsif k == 'doi' and regex.match(guid)
58
+ return 'doi', "https://doi.org/#{guid}"
57
59
  end
58
60
  end
59
61
  [nil, nil]
@@ -66,23 +68,23 @@ module FspHarvester
66
68
  false
67
69
  end
68
70
 
69
- def self.resolve_url(url:, nolinkheaders: false, header: ACCEPT_ALL_HEADER)
70
- @meta.guidtype = "uri" if @meta.guidtype.nil?
71
+ def self.resolve_url(url:, method: :get, nolinkheaders: false, header: ACCEPT_ALL_HEADER)
72
+ @meta.guidtype = 'uri' if @meta.guidtype.nil?
71
73
  warn "\n\n FETCHING #{url} #{header}\n\n"
72
- response = FspHarvester::WebUtils.fspfetch(url: url, headers: header)
73
- warn "\n\n head #{response.headers.inspect}\n\n"
74
+ response = FspHarvester::WebUtils.fspfetch(url: url, headers: header, method: method)
75
+ warn "\n\n head #{response.headers.inspect}\n\n" if response
74
76
 
75
77
  unless response
76
- @meta.warnings << ["001", url, header]
78
+ @meta.warnings << ['001', url, header]
77
79
  @meta.comments << "WARN: Unable to resolve #{url} using HTTP Accept header #{header}.\n"
78
- return [[], @meta]
80
+ return []
79
81
  end
80
82
 
81
83
  @meta.comments << "INFO: following redirection using this header led to the following URL: #{@meta.finalURI.last}. Using the output from this URL for the next few tests..."
82
84
  @meta.full_response << response.body
83
85
 
84
86
  links = process_link_headers(response: response) unless nolinkheaders
85
- [links, @meta]
87
+ links
86
88
  end
87
89
 
88
90
  def self.process_link_headers(response:)
@@ -90,47 +92,43 @@ module FspHarvester
90
92
 
91
93
  parser = LinkHeaders::Processor.new(default_anchor: @meta.finalURI.last)
92
94
  parser.extract_and_parse(response: response)
93
- factory = parser.factory # LinkHeaders::LinkFactory
95
+ factory = parser.factory # LinkHeaders::LinkFactory
94
96
 
95
- citeas = 0
96
- describedby = 0
97
- warn "\n\n length #{factory.all_links.length}\n\n"
97
+ warn "\n\n length bfore #{factory.all_links.length}\n\n"
98
+ signpostingcheck(factory: factory)
99
+ warn "\n\n length aftr #{factory.all_links.length}\n\n"
100
+ warn "\n\n links #{factory.all_links}\n\n"
101
+ factory.all_links
102
+ end
98
103
 
104
+ def self.signpostingcheck(factory:)
105
+ citeas = Array.new
106
+ describedby = Array.new
107
+ item = Array.new
99
108
  factory.all_links.each do |l|
100
109
  case l.relation
101
- when "cite-as"
102
- citeas += 1
103
- when "describedby"
104
- describedby += 1
105
- unless l.respond_to? "type"
106
- @meta.warnings << ["005", l.url, ""]
107
- @meta.comments << "WARN: The resource does not follow the FAIR Signposting standard, which requires any describedby links to also have a 'type' attribute\n"
108
- end
110
+ when 'cite-as'
111
+ citeas << l
112
+ when 'item'
113
+ item << l
114
+ when 'describedby'
115
+ describedby << l
109
116
  end
110
117
  end
111
- if citeas > 1
112
- self.check_for_conflicts(factory: factory) # this merelty adsds to the metadata objects if there are conflicts
113
- end
114
118
 
115
- unless citeas == 1 && describedby > 0
116
- @meta.warnings << ["004", "", ""]
117
- @meta.comments << "WARN: The resource does not follow the FAIR Signposting standard, which requires exactly one cite-as header, and at least one describedby header\n"
118
- end
119
- factory.all_links
120
- end
119
+ check_describedby_rules(describedby: describedby)
120
+ check_item_rules(item: item)
121
121
 
122
- def self.check_for_conflicts(factory:) # incoming: {"link1" => {"sectiontype1" => value, "sectiontype2" => value2}}
123
- @meta.comments << "INFO: checking for conflicting cite-as links"
124
- citeas = Array.new
125
- factory.all_links.each do |link|
126
- next unless link.relation == 'cite-as'
127
- citeas << link.href
122
+ uniqueciteas = Array.new
123
+ if citeas.length > 1
124
+ warn "INFO: multiple cite-as links found. Checking for conflicts\n"
125
+ @meta.comments << "INFO: multiple cite-as links found. Checking for conflicts\n"
126
+ uniqueciteas = check_for_citeas_conflicts(citeas: citeas) # this adds to the metadata objects if there are conflicts, returns the list of unique citeas (SHOULD ONLY BE ONE!)
128
127
  end
129
- unless citeas == citeas.uniq
130
- @meta.warnings << ["007", url, header]
131
- @meta.comments << "WARN: The resource does not follow the FAIR Signposting standard: Found conflicting cite-as link headers\n"
132
- else
133
- @meta.comments << "INFO: No conflicting cite-as links found."
128
+
129
+ unless uniqueciteas == 1 && describedby.length > 0
130
+ @meta.warnings << ['004', '', '']
131
+ @meta.comments << "WARN: The resource does not follow the FAIR Signposting standard, which requires exactly one cite-as header, and at least one describedby header\n"
134
132
  end
135
133
  end
136
134
  end
data/lib/warnings.json CHANGED
@@ -28,6 +28,47 @@
28
28
  "message": "GUID type not recognized",
29
29
  "linkout": "",
30
30
  "severity": "WARN"
31
+ },
32
+ "007": {
33
+ "message": "Conflicting cite-as links",
34
+ "linkout": "",
35
+ "severity": "WARN"
36
+ },
37
+ "008": {
38
+ "message": "describedby link does not resolve",
39
+ "linkout": "",
40
+ "severity": "WARN"
41
+ },
42
+ "009": {
43
+ "message": "Content-type of described-by link does not match the type attribute in the link header itself",
44
+ "linkout": "",
45
+ "severity": "WARN"
46
+ },
47
+ "010": {
48
+ "message": "Content-type of response from described-by link is undefined or cannot be compared to the link type",
49
+ "linkout": "",
50
+ "severity": "WARN"
51
+ },
52
+ "011": {
53
+ "message": "The resource does not follow the FAIR Signposting standard, which encourages any item links to have a type attribute",
54
+ "linkout": "",
55
+ "severity": "WARN"
56
+ },
57
+ "012": {
58
+ "message": "Content-type of response from resolving an item doesn't match the item type attribute in the link header",
59
+ "linkout": "",
60
+ "severity": "WARN"
61
+ },
62
+ "013": {
63
+ "message": "Content-type of response from resolving an item is undefined or cannot be compared to the link type",
64
+ "linkout": "",
65
+ "severity": "WARN"
66
+ },
67
+ "014": {
68
+ "message": "Item link does not resolve",
69
+ "linkout": "",
70
+ "severity": "WARN"
31
71
  }
72
+
32
73
 
33
74
  }
data/lib/web_utils.rb CHANGED
@@ -1,13 +1,13 @@
1
1
  module FspHarvester
2
2
 
3
3
  class WebUtils
4
- def self.fspfetch(url:, headers: ACCEPT_ALL_HEADER) # we will try to retrieve turtle whenever possible
4
+ def self.fspfetch(url:, headers: ACCEPT_ALL_HEADER, method: :get)
5
5
  warn 'In fetch routine now. '
6
6
 
7
7
  begin
8
8
  warn "executing call over the Web to #{url}"
9
9
  response = RestClient::Request.execute({
10
- method: :get,
10
+ method: method,
11
11
  url: url.to_s,
12
12
  # user: user,
13
13
  # password: pass,
@@ -26,7 +26,7 @@ module FspHarvester
26
26
  warn "EXCEPTION WITH RESPONSE! #{e.response}\n#{e.response.headers}"
27
27
  @meta.warnings << ["003", url, headers] if @meta
28
28
  @meta.comments << "WARN: HTTP error #{e} encountered when trying to resolve #{url}\n" if @meta
29
- if e.response.code == 500
29
+ if (e.response.code == 500 or e.response.code == 404)
30
30
  return false
31
31
  else
32
32
  e.response
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fsp_harvester
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mark Wilkinson
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-07-27 00:00:00.000000000 Z
11
+ date: 2022-08-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json
@@ -44,14 +44,14 @@ dependencies:
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: 0.1.11
47
+ version: 0.1.13
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: 0.1.11
54
+ version: 0.1.13
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: metainspector
57
57
  requirement: !ruby/object:Gem::Requirement