fsp_harvester 0.1.11 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 895567e9edd571dbca7dee89a0270d1c14342fed06c3eb81c81e06f3c07ddbed
4
- data.tar.gz: 7eee65295c206d6cee7b4ef28830f64087ba172a294cde7401490bffa20dbe1a
3
+ metadata.gz: b38eea15fa26a3fe07290024342f8b6121dbd78c3cd2dd3496ca118fca22f6d4
4
+ data.tar.gz: a25ea37ecd78b2ef8dc41dca391c161ba4b262d910dbadcf34f07f0cd8e54af5
5
5
  SHA512:
6
- metadata.gz: f0c7727598525cb55b6c2bfaf36d5ce3dda5da6efddf85888328b7c93b874c508989122627e5deaa5101fc0a20279432aa023ecefef112926219f267e3622234
7
- data.tar.gz: 29f834c57ec73e27f988948893dc92fe56550b829585df390a9a1398770845115202289f6f9557c01eb2fc3eec218f863371db60649f6a3fef01da9457c2862e
6
+ metadata.gz: 83b766f2896a0776ed75ab3fd1e235d2c80173d3ffc0c22aea80b234856392497daf312db36461f03d0ca168228e6e385edf6d789b42dc4b43fcd6a073cda234
7
+ data.tar.gz: 16d74c199a138db0225e88c0e092a5272a518785c397d27da74c511a69260444e6c1c440021e08413c131d672cd2d528e8d19dbd8a20ca1bff97dad38600c995
data/.rspec_status CHANGED
@@ -1,55 +1,55 @@
1
1
  example_id | status | run_time |
2
2
  ---------------------------------- | ------ | --------------- |
3
- ./spec/cite-as_spec.rb[1:1:1] | passed | 1.61 seconds |
4
- ./spec/cite-as_spec.rb[1:1:2] | passed | 1.18 seconds |
5
- ./spec/cite-as_spec.rb[1:1:3] | passed | 1.02 seconds |
6
- ./spec/cite-as_spec.rb[1:1:4] | passed | 1.6 seconds |
7
- ./spec/cite-as_spec.rb[1:1:5] | passed | 2.78 seconds |
8
- ./spec/cite-as_spec.rb[1:1:6] | passed | 2.09 seconds |
9
- ./spec/cite-as_spec.rb[1:1:7] | passed | 2.98 seconds |
10
- ./spec/cite-as_spec.rb[1:1:8] | passed | 2.2 seconds |
11
- ./spec/cite-as_spec.rb[1:1:9] | passed | 2.87 seconds |
12
- ./spec/cite-as_spec.rb[1:1:10] | passed | 2.18 seconds |
13
- ./spec/cite-as_spec.rb[1:1:11] | passed | 3.16 seconds |
14
- ./spec/cite-as_spec.rb[1:1:12] | passed | 2.36 seconds |
15
- ./spec/cite-as_spec.rb[1:1:13] | passed | 2.89 seconds |
16
- ./spec/cite-as_spec.rb[1:1:14] | passed | 2.13 seconds |
17
- ./spec/cite-as_spec.rb[1:1:15] | passed | 1.18 seconds |
18
- ./spec/cite-as_spec.rb[1:1:16] | passed | 1.3 seconds |
19
- ./spec/cite-as_spec.rb[1:1:17] | passed | 1.17 seconds |
20
- ./spec/cite-as_spec.rb[1:1:18] | passed | 1.2 seconds |
21
- ./spec/cite-as_spec.rb[1:1:19] | passed | 1.71 seconds |
22
- ./spec/cite-as_spec.rb[1:1:20] | passed | 1.69 seconds |
23
- ./spec/cite-as_spec.rb[1:1:21] | passed | 2.22 seconds |
24
- ./spec/cite-as_spec.rb[1:1:22] | passed | 1.09 seconds |
25
- ./spec/cite-as_spec.rb[1:1:23] | passed | 1.17 seconds |
26
- ./spec/cite-as_spec.rb[1:1:24] | failed | 1.2 seconds |
27
- ./spec/cite-as_spec.rb[1:1:25] | passed | 0.48048 seconds |
28
- ./spec/describedby_spec.rb[1:1:1] | passed | 2.12 seconds |
29
- ./spec/describedby_spec.rb[1:1:2] | passed | 0.96254 seconds |
30
- ./spec/describedby_spec.rb[1:1:3] | passed | 0.92669 seconds |
31
- ./spec/describedby_spec.rb[1:1:4] | passed | 0.92801 seconds |
32
- ./spec/describedby_spec.rb[1:1:5] | passed | 1 second |
33
- ./spec/describedby_spec.rb[1:1:6] | passed | 0.66763 seconds |
34
- ./spec/describedby_spec.rb[1:1:7] | passed | 0.66021 seconds |
35
- ./spec/describedby_spec.rb[1:1:8] | passed | 1.89 seconds |
36
- ./spec/describedby_spec.rb[1:1:9] | passed | 1.3 seconds |
37
- ./spec/describedby_spec.rb[1:1:10] | passed | 1.7 seconds |
38
- ./spec/describedby_spec.rb[1:1:11] | passed | 2.28 seconds |
39
- ./spec/describedby_spec.rb[1:1:12] | passed | 2.27 seconds |
40
- ./spec/describedby_spec.rb[1:1:13] | passed | 1.39 seconds |
41
- ./spec/describedby_spec.rb[1:1:14] | passed | 1.65 seconds |
42
- ./spec/describedby_spec.rb[1:1:15] | passed | 1.7 seconds |
43
- ./spec/fsp_harvester_spec.rb[1:1] | passed | 0.00215 seconds |
44
- ./spec/fsp_harvester_spec.rb[1:2] | failed | 0.00021 seconds |
45
- ./spec/item_spec.rb[1:1:1] | passed | 2.04 seconds |
46
- ./spec/item_spec.rb[1:1:2] | passed | 2 seconds |
47
- ./spec/item_spec.rb[1:1:3] | passed | 0.92924 seconds |
48
- ./spec/item_spec.rb[1:1:4] | passed | 1.36 seconds |
49
- ./spec/item_spec.rb[1:1:5] | passed | 1.71 seconds |
50
- ./spec/item_spec.rb[1:1:6] | passed | 1.68 seconds |
51
- ./spec/item_spec.rb[1:1:7] | passed | 2.37 seconds |
52
- ./spec/item_spec.rb[1:1:8] | passed | 0.34241 seconds |
53
- ./spec/type_spec.rb[1:1:1] | passed | 0.9855 seconds |
54
- ./spec/type_spec.rb[1:1:2] | passed | 0.96202 seconds |
55
- ./spec/type_spec.rb[1:1:3] | passed | 0.96005 seconds |
3
+ ./spec/cite-as_spec.rb[1:1:1] | passed | 1.3 seconds |
4
+ ./spec/cite-as_spec.rb[1:1:2] | passed | 1.21 seconds |
5
+ ./spec/cite-as_spec.rb[1:1:3] | passed | 1.09 seconds |
6
+ ./spec/cite-as_spec.rb[1:1:4] | passed | 1.69 seconds |
7
+ ./spec/cite-as_spec.rb[1:1:5] | passed | 2.72 seconds |
8
+ ./spec/cite-as_spec.rb[1:1:6] | passed | 2.3 seconds |
9
+ ./spec/cite-as_spec.rb[1:1:7] | passed | 3.36 seconds |
10
+ ./spec/cite-as_spec.rb[1:1:8] | passed | 2.26 seconds |
11
+ ./spec/cite-as_spec.rb[1:1:9] | passed | 2.82 seconds |
12
+ ./spec/cite-as_spec.rb[1:1:10] | passed | 2.3 seconds |
13
+ ./spec/cite-as_spec.rb[1:1:11] | passed | 3.37 seconds |
14
+ ./spec/cite-as_spec.rb[1:1:12] | passed | 2.2 seconds |
15
+ ./spec/cite-as_spec.rb[1:1:13] | passed | 2.94 seconds |
16
+ ./spec/cite-as_spec.rb[1:1:14] | passed | 2.44 seconds |
17
+ ./spec/cite-as_spec.rb[1:1:15] | passed | 1.54 seconds |
18
+ ./spec/cite-as_spec.rb[1:1:16] | passed | 1.29 seconds |
19
+ ./spec/cite-as_spec.rb[1:1:17] | passed | 1.25 seconds |
20
+ ./spec/cite-as_spec.rb[1:1:18] | passed | 1.15 seconds |
21
+ ./spec/cite-as_spec.rb[1:1:19] | passed | 1.7 seconds |
22
+ ./spec/cite-as_spec.rb[1:1:20] | passed | 1.66 seconds |
23
+ ./spec/cite-as_spec.rb[1:1:21] | passed | 2.41 seconds |
24
+ ./spec/cite-as_spec.rb[1:1:22] | passed | 1.64 seconds |
25
+ ./spec/cite-as_spec.rb[1:1:23] | passed | 1.35 seconds |
26
+ ./spec/cite-as_spec.rb[1:1:24] | failed | 1.25 seconds |
27
+ ./spec/cite-as_spec.rb[1:1:25] | passed | 0.51152 seconds |
28
+ ./spec/describedby_spec.rb[1:1:1] | passed | 2.71 seconds |
29
+ ./spec/describedby_spec.rb[1:1:2] | passed | 1.25 seconds |
30
+ ./spec/describedby_spec.rb[1:1:3] | passed | 1.22 seconds |
31
+ ./spec/describedby_spec.rb[1:1:4] | passed | 1.22 seconds |
32
+ ./spec/describedby_spec.rb[1:1:5] | passed | 1.21 seconds |
33
+ ./spec/describedby_spec.rb[1:1:6] | passed | 1.02 seconds |
34
+ ./spec/describedby_spec.rb[1:1:7] | passed | 0.99175 seconds |
35
+ ./spec/describedby_spec.rb[1:1:8] | passed | 2.44 seconds |
36
+ ./spec/describedby_spec.rb[1:1:9] | passed | 1.72 seconds |
37
+ ./spec/describedby_spec.rb[1:1:10] | passed | 2.15 seconds |
38
+ ./spec/describedby_spec.rb[1:1:11] | passed | 3.17 seconds |
39
+ ./spec/describedby_spec.rb[1:1:12] | passed | 3.1 seconds |
40
+ ./spec/describedby_spec.rb[1:1:13] | passed | 1.7 seconds |
41
+ ./spec/describedby_spec.rb[1:1:14] | passed | 2.21 seconds |
42
+ ./spec/describedby_spec.rb[1:1:15] | passed | 2.18 seconds |
43
+ ./spec/fsp_harvester_spec.rb[1:1] | passed | 0.00058 seconds |
44
+ ./spec/fsp_harvester_spec.rb[1:2] | failed | 2.92 seconds |
45
+ ./spec/item_spec.rb[1:1:1] | passed | 3.09 seconds |
46
+ ./spec/item_spec.rb[1:1:2] | passed | 2.92 seconds |
47
+ ./spec/item_spec.rb[1:1:3] | passed | 1.12 seconds |
48
+ ./spec/item_spec.rb[1:1:4] | passed | 1.7 seconds |
49
+ ./spec/item_spec.rb[1:1:5] | passed | 2.24 seconds |
50
+ ./spec/item_spec.rb[1:1:6] | passed | 2.87 seconds |
51
+ ./spec/item_spec.rb[1:1:7] | passed | 3.03 seconds |
52
+ ./spec/item_spec.rb[1:1:8] | passed | 0.52338 seconds |
53
+ ./spec/type_spec.rb[1:1:1] | passed | 1.42 seconds |
54
+ ./spec/type_spec.rb[1:1:2] | passed | 1.28 seconds |
55
+ ./spec/type_spec.rb[1:1:3] | passed | 1.52 seconds |
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- fsp_harvester (0.1.11)
4
+ fsp_harvester (0.1.12)
5
5
  json (~> 2.0)
6
6
  linkeddata (~> 3.2)
7
7
  linkheaders-processor (~> 0.1.16)
data/lib/config.conf ADDED
@@ -0,0 +1,8 @@
1
+ [extruct]
2
+ command="extruct"
3
+
4
+ [rdf]
5
+ command="/home/osboxes/.rvm/gems/ruby-3.0.0/bin/rdf"
6
+
7
+ [tika]
8
+ command="http://tika:9998/meta"
@@ -1,12 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module FspHarvester
3
+ module HarvesterTools
4
4
  class Error < StandardError
5
5
  end
6
6
 
7
7
  class ExternalTools
8
8
 
9
- def initialize(metadata: FspHarvester::MetadataObject.new)
9
+ def initialize(metadata: HarvesterTools::MetadataObject.new)
10
10
  @meta = metadata
11
11
  end
12
12
 
@@ -25,10 +25,7 @@ module FspHarvester
25
25
  file.rewind
26
26
 
27
27
  @meta.comments << "INFO: The message body is being examined by Distiller\n"
28
- # command = "LANG=en_US.UTF-8 #{Utils::RDFCommand} serialize --input-format rdfa --output-format turtle #{file.path} 2>/dev/null"
29
- command = "LANG=en_US.UTF-8 #{Utils::RDFCommand} serialize --input-format rdfa --output-format jsonld #{file.path}"
30
- # command = "LANG=en_US.UTF-8 /usr/local/bin/ruby #{@rdf_command} serialize --input-format rdfa --output-format jsonld #{file.path}"
31
- # command = "LANG=en_US.UTF-8 /home/osboxes/.rvm/rubies/ruby-2.6.3/bin/ruby /home/osboxes/.rvm/gems/ruby-2.6.3/bin/rdf serialize --output-format jsonld #{file.path}"
28
+ command = "LANG=en_US.UTF-8 #{RDFCommand} serialize --input-format rdfa --output-format jsonld #{file.path}"
32
29
  warn "distiller command: #{command}"
33
30
  result, _stderr, _status = Open3.capture3(command)
34
31
  warn ''
@@ -41,12 +38,13 @@ module FspHarvester
41
38
  if result !~ /@context/i # failure returns nil
42
39
  @meta.comments << "WARN: The Distiller tool failed to find parseable data in the body, perhaps due to incorrectly formatted HTML..\n"
43
40
  @meta.add_warning(['018', '', ''])
41
+ result = "{}"
44
42
  else
45
43
  @meta.comments << "INFO: The Distiller found parseable data. Parsing as JSON-LD\n"
46
- parse_rdf(result: result, content_type: "application/ld+json")
47
44
  end
48
45
  @@distillerknown[bhash] = true
49
46
  end
47
+ result
50
48
  end
51
49
 
52
50
  def processs_with_extruct(uri:)
@@ -55,6 +53,11 @@ module FspHarvester
55
53
  stdout, stderr, status = Open3.capture3(EXTRUCT_COMMAND + ' ' + uri)
56
54
  warn "open3 status: #{status} #{stdout}"
57
55
  result = stderr # absurd that the output comes over stderr! LOL!
56
+ jsonld = {}
57
+ microdata = Hash.new
58
+ microformat = Hash.new
59
+ opengraph = Hash.new
60
+ rdfa = Hash.new
58
61
 
59
62
  if result.to_s.match(/(Failed\sto\sextract.*?)\n/)
60
63
  @meta.comments << "WARN: extruct threw an error #{Regexp.last_match(1)} when attempting to parse return value (message body) of #{uri}.\n"
@@ -66,17 +69,16 @@ module FspHarvester
66
69
  elsif result.to_s.match(/^\s+?\{/) or result.to_s.match(/^\s+\[/) # this is JSON
67
70
  json = JSON.parse result
68
71
  @meta.comments << "INFO: the extruct tool found parseable data at #{uri}\n"
69
-
70
- parse_rdf(body: json['json-ld'].to_json, content_type: 'application/ld+json') if json['json-ld'].any? # RDF
71
- @meta.merge_hash(json['microdata'].first) if json['microdata'].any?
72
- @meta.merge_hash(json['microformat'].first) if json['microformat'].any?
73
- @meta.merge_hash(json['opengraph'].first) if json['opengraph'].any?
74
- parse_rdf(body: json['rdfa'].to_json, content_type: 'application/ld+json') if json['rdfa'].any? # RDF
75
-
76
- @meta.merge_hash(json.first) if json.first.is_a? Hash
72
+ jsonld = json['json-ld'].to_json if json['json-ld'].any?
73
+ microdata = json['microdata'].first if json['microdata'].any
74
+ microformat = json['microformat'].first if json['microformat'].any?
75
+ opengraph = json['opengraph'].first if json['opengraph'].any?
76
+ rdfa = json['rdfa'].to_json if json['rdfa'].any?
77
+ # @meta.merge_hash(json.first) if json.first.is_a? Hash
77
78
  else
78
79
  @meta.comments << "WARN: the extruct tool failed to find parseable data at #{uri}\n"
79
80
  end
81
+ [jsonld, microdata, microformat, opengraph, rdfa]
80
82
  end
81
83
  end
82
84
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module FspHarvester
4
- VERSION = "0.1.11"
4
+ VERSION = "0.1.12"
5
5
  end
data/lib/fsp_harvester.rb CHANGED
@@ -1,121 +1,23 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative 'fsp_harvester/version'
4
- require 'json/ld'
5
- require 'json/ld/preloaded'
6
- require 'json'
7
- require 'linkheaders/processor'
8
- require 'addressable'
9
- require 'tempfile'
10
- require 'xmlsimple'
11
- require 'nokogiri'
12
- require 'parseconfig'
13
- require 'rest-client'
14
- require 'cgi'
15
- require 'digest'
16
- require 'open3'
17
- require 'metainspector'
18
- require 'rdf/xsd'
19
- require_relative './metadata_object'
20
- require_relative './constants'
21
- require_relative './web_utils'
22
- require_relative './signposting_tests'
23
- require_relative './fsp_metadata_harvester'
24
- require_relative './fsp_metadata_parser'
25
-
26
1
 
2
+ require_relative 'harvester'
27
3
  module FspHarvester
28
4
  class Error < StandardError
29
5
  end
30
6
 
31
7
  class Utils
32
- # @@distillerknown = {} # global, hash of sha256 keys of message bodies - have they been seen before t/f
33
- # @warnings = JSON.parse(File.read("warnings.json"))
34
-
35
-
36
- def self.resolve_guid(guid:)
37
- @meta = FspHarvester::MetadataObject.new
38
- @meta.all_uris = [guid]
39
- type, url = convertToURL(guid: guid)
40
- links = Array.new
41
- if type
42
- links = resolve_url(url: url)
43
- @meta.links << links
44
- else
45
- @meta.add_warning(['006', guid, ''])
46
- @meta.comments << "FATAL: GUID type not recognized.\n"
47
- end
48
- [links, @meta]
49
- end
50
8
 
51
- def self.gather_metadata_from_describedby_links(links: [], metadata: FspHarvester::MetadataObject.new) # meta should have already been created by resolve+guid, but maybe not
9
+ def self.gather_metadata_from_describedby_links(links: [], metadata: HarvesterTools::MetadataObject.new) # meta should have already been created by resolve+guid, but maybe not
52
10
  @meta = metadata
53
11
  db = []
54
12
  links.each do |l|
55
13
  db << l if l.relation == 'describedby'
56
14
  end
57
- FspHarvester::MetadataHarvester.extract_metadata(links: db, metadata: @meta) # everything is gathered into the @meta metadata object
15
+ HarvesterTools::MetadataHarvester.extract_metadata(links: db, metadata: @meta) # everything is gathered into the @meta metadata object
58
16
  @meta
59
17
  end
60
18
 
61
- def self.convertToURL(guid:)
62
- GUID_TYPES.each do |k, regex|
63
- if k == 'inchi' and regex.match(guid)
64
- return 'inchi', "https://pubchem.ncbi.nlm.nih.gov/rest/rdf/inchikey/#{guid}"
65
- elsif k == 'handle1' and regex.match(guid)
66
- return 'handle', "http://hdl.handle.net/#{guid}"
67
- elsif k == 'handle2' and regex.match(guid)
68
- return 'handle', "http://hdl.handle.net/#{guid}"
69
- elsif k == 'uri' and regex.match(guid)
70
- return 'uri', guid
71
- elsif k == 'doi' and regex.match(guid)
72
- return 'doi', "https://doi.org/#{guid}"
73
- end
74
- end
75
- [nil, nil]
76
- end
77
-
78
- def self.typeit(guid:)
79
- Utils::GUID_TYPES.each do |type, regex|
80
- return type if regex.match(guid)
81
- end
82
- false
83
- end
84
-
85
- def self.resolve_url(url:, method: :get, nolinkheaders: false, header: ACCEPT_STAR_HEADER)
86
- @meta.guidtype = 'uri' if @meta.guidtype.nil?
87
- warn "\n\n FETCHING #{url} #{header}\n\n"
88
- response = FspHarvester::WebUtils.fspfetch(url: url, headers: header, method: method, meta: @meta)
89
- warn "\n\n head #{response.headers.inspect}\n\n" if response
90
-
91
- unless response
92
- @meta.add_warning(['001', url, header])
93
- @meta.comments << "WARN: Unable to resolve #{url} using HTTP Accept header #{header}.\n"
94
- return []
95
- end
96
-
97
- @meta.comments << "INFO: following redirection using this header led to the following URL: #{@meta.all_uris.last}. Using the output from this URL for the next few tests..."
98
- @meta.full_response << response.body
99
-
100
- links = process_link_headers(response: response) unless nolinkheaders
101
- links
102
- end
103
-
104
- def self.process_link_headers(response:)
105
- warn "\n\n parsing #{response.headers}\n\n"
106
-
107
- parser = LinkHeaders::Processor.new(default_anchor: @meta.all_uris.last)
108
- parser.extract_and_parse(response: response)
109
- factory = parser.factory # LinkHeaders::LinkFactory
110
-
111
- warn "\n\n length bfore #{factory.all_links.length}\n\n"
112
- signpostingcheck(factory: factory)
113
- warn "\n\n length aftr #{factory.all_links.length}\n\n"
114
- warn "\n\n links #{factory.all_links}\n\n"
115
- factory.all_links
116
- end
117
-
118
- def self.signpostingcheck(factory:)
19
+ def self.signpostingcheck(factory:, metadata: HarvesterTools::MetadataObject.new)
20
+ @meta = metadata
119
21
  citeas = Array.new
120
22
  describedby = Array.new
121
23
  item = Array.new
@@ -134,13 +36,13 @@ module FspHarvester
134
36
  end
135
37
  end
136
38
 
137
- check_describedby_rules(describedby: describedby)
138
- check_item_rules(item: item)
39
+ check_describedby_rules(describedby: describedby, metadata: @meta)
40
+ check_item_rules(item: item, metadata: @meta)
139
41
 
140
42
  if citeas.length > 1
141
43
  warn "INFO: multiple cite-as links found. Checking for conflicts\n"
142
44
  @meta.comments << "INFO: multiple cite-as links found. Checking for conflicts\n"
143
- citeas = check_for_citeas_conflicts(citeas: citeas) # this adds to the metadata objects if there are conflicts, returns the list of unique citeas (SHOULD ONLY BE ONE!)
45
+ citeas = check_for_citeas_conflicts(citeas: citeas, metadata: @meta) # this adds to the metadata objects if there are conflicts, returns the list of unique citeas (SHOULD ONLY BE ONE!)
144
46
  end
145
47
 
146
48
  unless citeas.length == 1 && describedby.length > 0
data/lib/harvester.rb ADDED
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ #require_relative 'fsp_harvester/version'
4
+ require 'json/ld'
5
+ require 'json/ld/preloaded'
6
+ require 'json'
7
+ require 'linkheaders/processor'
8
+ require 'addressable'
9
+ require 'tempfile'
10
+ require 'xmlsimple'
11
+ require 'nokogiri'
12
+ require 'parseconfig'
13
+ require 'rest-client'
14
+ require 'cgi'
15
+ require 'digest'
16
+ require 'open3'
17
+ require 'metainspector'
18
+ require 'rdf/xsd'
19
+ require_relative './metadata_object'
20
+ require_relative './constants'
21
+ require_relative './web_utils'
22
+ require_relative './signposting_tests'
23
+ require_relative './metadata_harvester'
24
+ require_relative './fsp_harvester'
25
+ require_relative './harvester_utils'
26
+ require_relative './external_tools'
27
+ require_relative './metadata_parser'
@@ -0,0 +1,75 @@
1
+ module HarvesterTools
2
+ class Error < StandardError
3
+ end
4
+
5
+ class Utils
6
+
7
+ def self.resolve_guid(guid:)
8
+ @meta = HarvesterTools::MetadataObject.new
9
+ @meta.all_uris = [guid]
10
+ type, url = convertToURL(guid: guid)
11
+ links = Array.new
12
+ if type
13
+ links = resolve_url(url: url)
14
+ @meta.links = @meta.links | links
15
+ else
16
+ @meta.add_warning(['006', guid, ''])
17
+ @meta.comments << "FATAL: GUID type not recognized.\n"
18
+ end
19
+ [links, @meta]
20
+ end
21
+
22
+ def self.convertToURL(guid:)
23
+ GUID_TYPES.each do |k, regex|
24
+ if k == 'inchi' and regex.match(guid)
25
+ return 'inchi', "https://pubchem.ncbi.nlm.nih.gov/rest/rdf/inchikey/#{guid}"
26
+ elsif k == 'handle1' and regex.match(guid)
27
+ return 'handle', "http://hdl.handle.net/#{guid}"
28
+ elsif k == 'handle2' and regex.match(guid)
29
+ return 'handle', "http://hdl.handle.net/#{guid}"
30
+ elsif k == 'uri' and regex.match(guid)
31
+ return 'uri', guid
32
+ elsif k == 'doi' and regex.match(guid)
33
+ return 'doi', "https://doi.org/#{guid}"
34
+ end
35
+ end
36
+ [nil, nil]
37
+ end
38
+
39
+ def self.typeit(guid:)
40
+ GUID_TYPES.each do |type, regex|
41
+ return type if regex.match(guid)
42
+ end
43
+ false
44
+ end
45
+
46
+ def self.resolve_url(url:, method: :get, nolinkheaders: false, header: ACCEPT_STAR_HEADER)
47
+ @meta.guidtype = 'uri' if @meta.guidtype.nil?
48
+ warn "\n\n FETCHING #{url} #{header}\n\n"
49
+ response = HarvesterTools::WebUtils.fspfetch(url: url, headers: header, method: method, meta: @meta)
50
+ warn "\n\n head #{response.headers.inspect}\n\n" if response
51
+
52
+ unless response
53
+ @meta.add_warning(['001', url, header])
54
+ @meta.comments << "WARN: Unable to resolve #{url} using HTTP Accept header #{header}.\n"
55
+ return []
56
+ end
57
+
58
+ @meta.comments << "INFO: following redirection using this header led to the following URL: #{@meta.all_uris.last}. Using the output from this URL for the next few tests..."
59
+ @meta.full_response << response.body
60
+
61
+ links = process_link_headers(response: response) unless nolinkheaders
62
+ links
63
+ end
64
+
65
+ def self.process_link_headers(response:)
66
+ warn "\n\n parsing #{response.headers}\n\n"
67
+
68
+ parser = LinkHeaders::Processor.new(default_anchor: @meta.all_uris.last)
69
+ parser.extract_and_parse(response: response)
70
+ factory = parser.factory # LinkHeaders::LinkFactory
71
+ FspHarvester::Utils.signpostingcheck(factory: factory, metadata: @meta)
72
+ factory.all_links
73
+ end
74
+ end
75
+ end
@@ -1,17 +1,17 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module FspHarvester
3
+ module HarvesterTools
4
4
  class Error < StandardError
5
5
  end
6
6
 
7
7
  class MetadataHarvester
8
- def self.extract_metadata(links: [], metadata: FspHarvester::MetadataObject.new)
8
+ def self.extract_metadata(links: [], metadata: HarvesterTools::MetadataObject.new)
9
9
  @meta = metadata
10
10
  @meta.comments << 'INFO: now collecting both linked data and hash-style data using the harvested links'
11
11
 
12
12
  describedby = links.select { |l| l if l.relation == 'describedby' }
13
13
 
14
- hvst = FspHarvester::MetadataParser.new(metadata_object: @meta) # put here because the class variable for detecting duplicates should apply to all URIs
14
+ hvst = HarvesterTools::MetadataParser.new(metadata_object: @meta) # put here because the class variable for detecting duplicates should apply to all URIs
15
15
  describedby.each do |link|
16
16
  accepttype = ACCEPT_STAR_HEADER
17
17
  accept = link.respond_to?('type') ? link.type : nil
@@ -30,16 +30,16 @@ module FspHarvester
30
30
  case abbreviation
31
31
  when 'html'
32
32
  @meta.comments << 'INFO: Processing html'
33
- hvst.process_html(body: response.body, uri: link)
33
+ hvst.process_html(body: response.body, uri: link, metadata: @meta)
34
34
  when 'xml'
35
35
  @meta.comments << 'INFO: Processing xml'
36
- hvst.process_xml(body: response.body)
36
+ hvst.process_xml(body: response.body, metadata: @meta)
37
37
  when 'json'
38
38
  @meta.comments << 'INFO: Processing json'
39
- hvst.process_json(body: response.body)
39
+ hvst.process_json(body: response.body, metadata: @meta)
40
40
  when 'jsonld', 'rdfxml', 'turtle', 'ntriples', 'nquads'
41
41
  @meta.comments << 'INFO: Processing linked data'
42
- hvst.process_ld(body: response.body, content_type: content_type)
42
+ hvst.process_ld(body: response.body, content_type: content_type, metadata: @meta)
43
43
  when 'specialist'
44
44
  warn 'no specialized parsers so far'
45
45
  end
@@ -54,7 +54,7 @@ module FspHarvester
54
54
  @meta.comments << "INFO: link #{link.href} has no MIME type, defaulting to */*"
55
55
  end
56
56
  url = link.href
57
- response = FspHarvester::WebUtils.fspfetch(url: url, method: :get, headers: header)
57
+ response = HarvesterTools::WebUtils.fspfetch(url: url, method: :get, headers: header)
58
58
  unless response
59
59
  @meta.add_warning(['016', url, header])
60
60
  @meta.comments << "WARN: Unable to resolve describedby link #{url} using HTTP Accept header #{header}.\n"
@@ -1,4 +1,4 @@
1
- module FspHarvester
1
+ module HarvesterTools
2
2
  class MetadataObject
3
3
  attr_accessor :hash, :graph, :comments, :links, :warnings, :guidtype, :full_response, :all_uris # a hash of metadata # a RDF.rb graph of metadata # an array of comments # the type of GUID that was detected # will be an array of Net::HTTP::Response
4
4
 
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module FspHarvester
3
+ module HarvesterTools
4
4
  class Error < StandardError
5
5
  end
6
6
 
@@ -9,17 +9,25 @@ module FspHarvester
9
9
 
10
10
  @@distillerknown = {}
11
11
 
12
- def initialize(metadata_object: FspHarvester::MetadataObject.new)
12
+ def initialize(metadata_object: HarvesterTools::MetadataObject.new)
13
13
  @meta = metadata_object
14
14
  end
15
15
 
16
- def process_html(body:, uri:)
17
- tools = FspHarvester::ExternalTools.new(metadata: @meta)
18
- tools.process_with_distiller(body: body)
19
- tools.process_with_extruct(uri: uri)
16
+ def process_html(body:, uri:, metadata:)
17
+ @meta = metadata
18
+ tools = HarvesterTools::ExternalTools.new(metadata: @meta)
19
+ result = tools.process_with_distiller(body: body)
20
+
21
+ jsonld, microdata, microformat, opengraph, rdfa = tools.process_with_extruct(uri: uri)
22
+ parse_rdf(body: jsonld, content_type: 'application/ld+json')
23
+ @meta.merge_hash(microdata)
24
+ @meta.merge_hash(microformat)
25
+ @meta.merge_hash(opengraph)
26
+ parse_rdf(body: rdfa, content_type: 'application/ld+json')
20
27
  end
21
28
 
22
- def process_xml(body:)
29
+ def process_xml(body:, metadata:)
30
+ @meta = metadata
23
31
  begin
24
32
  hash = XmlSimple.xml_in(body)
25
33
  rescue
@@ -30,7 +38,8 @@ module FspHarvester
30
38
  @meta.hash.merge hash
31
39
  end
32
40
 
33
- def process_json(body:)
41
+ def process_json(body:, metadata:)
42
+ @meta = metadata
34
43
  begin
35
44
  hash = JSON.parse(body)
36
45
  rescue
@@ -41,11 +50,17 @@ module FspHarvester
41
50
  @meta.hash.merge hash
42
51
  end
43
52
 
44
- def process_ld(body:, content_type:)
45
- parse_rdf(body: body, content_type: content_type)
53
+ def process_ld(body:, content_type:, metadata:)
54
+ @meta = metadata
55
+ parse_rdf(body: body, content_type: content_type, metadata: @meta)
56
+ end
57
+
58
+ def parse_rdf(body:, content_type:, metadata:)
59
+ self.class.parse_rdf(body: body, content_type: content_type, metadata: metadata)
46
60
  end
47
61
 
48
- def parse_rdf(body:, content_type:)
62
+ def self.parse_rdf(body:, content_type:, metadata:)
63
+ @meta = metadata
49
64
  unless body
50
65
  @meta.comments << "CRITICAL: The response message body component appears to have no content.\n"
51
66
  @meta.add_warning(['018', '', ''])
@@ -65,7 +80,7 @@ module FspHarvester
65
80
  return
66
81
  end
67
82
 
68
- graph = FspHarvester::Cache.checkRDFCache(body: body)
83
+ graph = HarvesterTools::Cache.checkRDFCache(body: body)
69
84
  if graph.size > 0
70
85
  warn "\n\n\n unmarshalling graph from cache\n\ngraph size #{graph.size}\n\n"
71
86
  @meta.merge_rdf(graph.to_a)
@@ -88,7 +103,7 @@ module FspHarvester
88
103
  end
89
104
  reader = rdfformat.reader.new(body) # have to re-read it here, but now its safe because we have already caught errors
90
105
  warn 'WRITING TO CACHE'
91
- FspHarvester::Cache.writeRDFCache(reader: reader, body: body) # write to the special RDF graph cache
106
+ HarvesterTools::Cache.writeRDFCache(reader: reader, body: body) # write to the special RDF graph cache
92
107
  warn 'WRITING DONE'
93
108
  reader = rdfformat.reader.new(body) # frustrating that we cannot rewind!
94
109
  warn 'RE-READING DONE'
@@ -1,4 +1,5 @@
1
- def check_for_citeas_conflicts(citeas: )
1
+ def check_for_citeas_conflicts(citeas:, metadata: )
2
+ @meta = metadata
2
3
  @meta.comments << 'INFO: checking for conflicting cite-as links'
3
4
  citeas_hrefs = Hash.new
4
5
  citeas.each do |link|
@@ -6,7 +7,7 @@ def check_for_citeas_conflicts(citeas: )
6
7
  @meta.comments << "INFO: Adding citeas #{link.href} to the testing queue."
7
8
  citeas_hrefs[link.href] = link
8
9
  end
9
-
10
+ #warn "finalhash #{citeas_hrefs}"
10
11
  if citeas_hrefs.length > 1
11
12
  @meta.comments << 'INFO: Found multiple non-identical cite-as links.'
12
13
  @meta.add_warning(['007', '', ''])
@@ -16,7 +17,8 @@ def check_for_citeas_conflicts(citeas: )
16
17
  end
17
18
 
18
19
 
19
- def check_describedby_rules(describedby:)
20
+ def check_describedby_rules(describedby:, metadata:)
21
+ @meta = metadata
20
22
  describedby.each do |l|
21
23
  unless l.respond_to? 'type'
22
24
  @meta.add_warning(['005', l.href, ''])
@@ -25,7 +27,7 @@ def check_describedby_rules(describedby:)
25
27
  type = l.type if l.respond_to? 'type'
26
28
  type ||= '*/*'
27
29
  header = { accept: type }
28
- response = FspHarvester::WebUtils.fspfetch(url: l.href, headers: header, method: :head)
30
+ response = HarvesterTools::WebUtils.fspfetch(url: l.href, headers: header, method: :head)
29
31
  if response
30
32
  responsetype = response.headers[:content_type]
31
33
  @meta.comments << "INFO: describedby link responds with content type #{responsetype}\n"
@@ -51,7 +53,8 @@ def check_describedby_rules(describedby:)
51
53
  end
52
54
  end
53
55
 
54
- def check_item_rules(item:)
56
+ def check_item_rules(item:, metadata:)
57
+ @meta = metadata
55
58
  item.each do |l| # l = LinkHeaders::Link
56
59
  unless l.respond_to? 'type'
57
60
  @meta.add_warning(['011', l.href, ''])
@@ -60,7 +63,7 @@ def check_item_rules(item:)
60
63
  type = l.type if l.respond_to? 'type'
61
64
  type ||= '*/*' # this becomes a frozen string
62
65
  header = { accept: type }
63
- response = FspHarvester::WebUtils.fspfetch(url: l.href, headers: header, method: :head)
66
+ response = HarvesterTools::WebUtils.fspfetch(url: l.href, headers: header, method: :head)
64
67
 
65
68
  if response
66
69
  if response.headers[:content_type] and type != '*/*'
data/lib/swagger.rb CHANGED
@@ -1,64 +1,39 @@
1
- class Swagger
2
- attr_accessor :debug
3
- attr_accessor :title
4
- attr_accessor :tests_metric
5
- attr_accessor :description
6
- attr_accessor :applies_to_principle
7
- attr_accessor :organization
8
- attr_accessor :org_url
9
- attr_accessor :responsible_developer
10
- attr_accessor :email
11
- attr_accessor :developer_ORCiD
12
- attr_accessor :protocol
13
- attr_accessor :host
14
- attr_accessor :basePath
15
- attr_accessor :path
16
- attr_accessor :response_description
17
- attr_accessor :schemas
18
- attr_accessor :comments
19
- attr_accessor :fairsharing_key_location
20
- attr_accessor :score
21
- attr_accessor :testedGUID
22
-
23
- def initialize(params = {})
24
- @debug = params.fetch(:debug, false)
25
-
26
- @title = params.fetch(:title, 'unnamed')
27
- @tests_metric = params.fetch(:tests_metric)
28
- @description = params.fetch(:description, 'default_description')
29
- @applies_to_principle = params.fetch(:applies_to_principle, 'some principle')
30
- @version = params.fetch(:version, "0.1")
31
- @organization = params.fetch(:organization, 'Some Organization')
32
- @org_url = params.fetch(:org_url)
33
- @responsible_develper = params.fetch(:responsible_developer, 'Some Person')
34
- @email = params.fetch(:email)
35
- @developer_ORCiD = params.fetch(:developer_ORCiD)
36
- @host = params.fetch(:host)
37
- @protocol = params.fetch(:protocol, "https")
38
- @basePath = params.fetch(:basePath)
39
- @path = params.fetch(:path)
40
- @response_description = params.fetch(:response_description)
41
- @schemas = params.fetch(:schemas, [])
42
- @comments = params.fetch(:comments, [])
43
- @fairsharing_key_location = params.fetch(:fairsharing_key_location)
44
- @score = params.fetch(:score, 0)
45
- @testedGUID = params.fetch(:testedGUID, "")
46
-
47
-
48
-
49
- end
50
-
51
-
52
-
53
- def fairsharing_key
54
- return @fairsharing_key_location
55
- end
56
-
57
-
58
-
59
- def getSwagger
60
-
61
- message = <<"EOF_EOF"
1
+ class Swagger
2
+ attr_accessor :debug, :title, :tests_metric, :description, :applies_to_principle, :organization, :org_url,
3
+ :responsible_developer, :email, :developer_ORCiD, :protocol, :host, :basePath, :path,
4
+ :response_description, :schemas, :comments, :fairsharing_key_location, :score, :testedGUID
5
+
6
+ def initialize(params = {})
7
+ @debug = params.fetch(:debug, false)
8
+
9
+ @title = params.fetch(:title, 'unnamed')
10
+ @tests_metric = params.fetch(:tests_metric)
11
+ @description = params.fetch(:description, 'default_description')
12
+ @applies_to_principle = params.fetch(:applies_to_principle, 'some principle')
13
+ @version = params.fetch(:version, '0.1')
14
+ @organization = params.fetch(:organization, 'Some Organization')
15
+ @org_url = params.fetch(:org_url)
16
+ @responsible_develper = params.fetch(:responsible_developer, 'Some Person')
17
+ @email = params.fetch(:email)
18
+ @developer_ORCiD = params.fetch(:developer_ORCiD)
19
+ @host = params.fetch(:host)
20
+ @protocol = params.fetch(:protocol, 'https')
21
+ @basePath = params.fetch(:basePath)
22
+ @path = params.fetch(:path)
23
+ @response_description = params.fetch(:response_description)
24
+ @schemas = params.fetch(:schemas, [])
25
+ @comments = params.fetch(:comments, [])
26
+ @fairsharing_key_location = params.fetch(:fairsharing_key_location)
27
+ @score = params.fetch(:score, 0)
28
+ @testedGUID = params.fetch(:testedGUID, '')
29
+ end
30
+
31
+ def fairsharing_key
32
+ @fairsharing_key_location
33
+ end
34
+
35
+ def getSwagger
36
+ message = <<"EOF_EOF"
62
37
  swagger: '2.0'
63
38
  info:
64
39
  version: '#{@version}'
@@ -89,7 +64,7 @@ class Swagger
89
64
  $ref: '#/definitions/schemas'
90
65
  consumes:
91
66
  - application/json
92
- produces:
67
+ produces:#{' '}
93
68
  - application/json
94
69
  responses:
95
70
  "200":
@@ -98,127 +73,112 @@ class Swagger
98
73
  definitions:
99
74
  schemas:
100
75
  required:
101
- EOF_EOF
102
-
103
-
104
-
105
- self.schemas.keys.each do |key|
106
- message += " - #{key}\n"
76
+ EOF_EOF
77
+
78
+ schemas.keys.each do |key|
79
+ message += " - #{key}\n"
80
+ end
81
+ message += " properties:\n"
82
+ schemas.keys.each do |key|
83
+ message += " #{key}:\n"
84
+ message += " type: #{schemas[key][0]}\n"
85
+ message += " description: >-\n"
86
+ message += " #{schemas[key][1]}\n"
87
+ end
88
+
89
+ message
90
+ end
91
+
92
+ # A utility function that SHOULD NOT BE CALLED EXTERNALLY
93
+ #
94
+ # @param s - subject node
95
+ # @param p - predicate node
96
+ # @param o - object node
97
+ # @param repo - an RDF::Graph object
98
+ def triplify(s, p, o, repo)
99
+ s = s.strip if s.instance_of?(String)
100
+ p = p.strip if p.instance_of?(String)
101
+ o = o.strip if o.instance_of?(String)
102
+
103
+ unless s.respond_to?('uri')
104
+
105
+ if s.to_s =~ %r{^\w+:/?/?[^\s]+}
106
+ s = RDF::URI.new(s.to_s)
107
+ else
108
+ debug and warn "Subject #{s} must be a URI-compatible thingy"
109
+ abort "Subject #{s} must be a URI-compatible thingy"
107
110
  end
108
- message += " properties:\n"
109
- self.schemas.keys.each do |key|
110
- message += " #{key}:\n"
111
- message += " type: #{self.schemas[key][0]}\n"
112
- message += " description: >-\n"
113
- message += " #{self.schemas[key][1]}\n"
111
+ end
112
+
113
+ unless p.respond_to?('uri')
114
+
115
+ if p.to_s =~ %r{^\w+:/?/?[^\s]+}
116
+ p = RDF::URI.new(p.to_s)
117
+ else
118
+ debug and warn "Predicate #{p} must be a URI-compatible thingy"
119
+ abort "Predicate #{p} must be a URI-compatible thingy"
114
120
  end
115
-
116
- return message
117
121
  end
118
-
119
-
120
-
121
- # A utility function that SHOULD NOT BE CALLED EXTERNALLY
122
- #
123
- # @param s - subject node
124
- # @param p - predicate node
125
- # @param o - object node
126
- # @param repo - an RDF::Graph object
127
- def triplify(s, p, o, repo)
128
-
129
- if s.class == String
130
- s = s.strip
131
- end
132
- if p.class == String
133
- p = p.strip
134
- end
135
- if o.class == String
136
- o = o.strip
137
- end
138
-
139
- unless s.respond_to?('uri')
140
-
141
- if s.to_s =~ /^\w+:\/?\/?[^\s]+/
142
- s = RDF::URI.new(s.to_s)
143
- else
144
- self.debug and $stderr.puts "Subject #{s.to_s} must be a URI-compatible thingy"
145
- abort "Subject #{s.to_s} must be a URI-compatible thingy"
146
- end
147
- end
148
-
149
- unless p.respond_to?('uri')
150
-
151
- if p.to_s =~ /^\w+:\/?\/?[^\s]+/
152
- p = RDF::URI.new(p.to_s)
153
- else
154
- self.debug and $stderr.puts "Predicate #{p.to_s} must be a URI-compatible thingy"
155
- abort "Predicate #{p.to_s} must be a URI-compatible thingy"
156
- end
157
- end
158
-
159
- unless o.respond_to?('uri')
160
- if o.to_s =~ /\A\w+:\/?\/?\w[^\s]+/
161
- o = RDF::URI.new(o.to_s)
122
+
123
+ unless o.respond_to?('uri')
124
+ o = if o.to_s =~ %r{\A\w+:/?/?\w[^\s]+}
125
+ RDF::URI.new(o.to_s)
162
126
  elsif o.to_s =~ /^\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d/
163
- o = RDF::Literal.new(o.to_s, :datatype => RDF::XSD.date)
127
+ RDF::Literal.new(o.to_s, datatype: RDF::XSD.date)
164
128
  elsif o.to_s =~ /^[+-]?\d+\.\d+/
165
- o = RDF::Literal.new(o.to_s, :datatype => RDF::XSD.float)
129
+ RDF::Literal.new(o.to_s, datatype: RDF::XSD.float)
166
130
  elsif o.to_s =~ /^[+-]?[0-9]+$/
167
- o = RDF::Literal.new(o.to_s, :datatype => RDF::XSD.int)
131
+ RDF::Literal.new(o.to_s, datatype: RDF::XSD.int)
168
132
  else
169
- o = RDF::Literal.new(o.to_s, :language => :en)
133
+ RDF::Literal.new(o.to_s, language: :en)
170
134
  end
171
- end
172
-
173
- self.debug and $stderr.puts("\n\ninserting #{s.to_s} #{p.to_s} #{o.to_s}\n\n")
174
- triple = RDF::Statement(s, p, o)
175
- repo.insert(triple)
176
-
177
- return true
178
- end
179
-
180
-
181
- # A utility function that SHOULD NOT BE CALLED EXTERNALLY
182
- #
183
- # @param s - subject node
184
- # @param p - predicate node
185
- # @param o - object node
186
- # @param repo - an RDF::Graph object
187
- def Swagger.triplify(s, p, o, repo)
188
- return triplify(s,p,o,repo)
189
- end
190
-
191
- def addComment(newcomment)
192
- self.comments << newcomment.to_s
193
- #return self.comments
194
- end
195
-
196
- def createEvaluationResponse
197
-
198
- g = RDF::Graph.new
199
-
200
- dt = Time.now.iso8601
201
- uri = self.testedGUID
202
-
203
- me = self.protocol + "://" + self.host + "/" + self.basePath + self.path
204
-
205
- meURI ="#{me}##{uri}/result-#{dt}"
206
- meURI =Addressable::URI.escape(meURI)
207
-
208
- triplify(meURI, "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "http://fairmetrics.org/resources/metric_evaluation_result", g );
209
- triplify(meURI, "http://semanticscience.org/resource/SIO_000300", self.score, g )
210
- triplify(meURI, "http://purl.obolibrary.org/obo/date", dt, g )
211
- triplify(meURI, "http://schema.org/softwareVersion", VERSION, g )
212
- triplify(meURI,"http://semanticscience.org/resource/SIO_000332", uri, g)
213
-
214
- comments = "no comments received. "
215
-
216
- comments = self.comments.join("\n") if self.comments.size > 0
217
- triplify(meURI, "http://schema.org/comment", comments, g)
218
-
219
- return g.dump(:jsonld)
220
- end
221
-
135
+ end
136
+
137
+ debug and warn("\n\ninserting #{s} #{p} #{o}\n\n")
138
+ triple = RDF::Statement(s, p, o)
139
+ repo.insert(triple)
140
+
141
+ true
142
+ end
143
+
144
+ # A utility function that SHOULD NOT BE CALLED EXTERNALLY
145
+ #
146
+ # @param s - subject node
147
+ # @param p - predicate node
148
+ # @param o - object node
149
+ # @param repo - an RDF::Graph object
150
+ def self.triplify(s, p, o, repo)
151
+ triplify(s, p, o, repo)
152
+ end
153
+
154
+ def addComment(newcomment)
155
+ comments << newcomment.to_s
156
+ # return self.comments
157
+ end
158
+
159
+ def createEvaluationResponse
160
+ g = RDF::Graph.new
161
+
162
+ dt = Time.now.iso8601
163
+ uri = testedGUID
164
+
165
+ me = protocol + '://' + host + '/' + basePath + path
166
+
167
+ meURI = "#{me}##{uri}/result-#{dt}"
168
+ meURI = Addressable::URI.escape(meURI)
169
+
170
+ triplify(meURI, 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type',
171
+ 'http://fairmetrics.org/resources/metric_evaluation_result', g)
172
+ triplify(meURI, 'http://semanticscience.org/resource/SIO_000300', score, g)
173
+ triplify(meURI, 'http://purl.obolibrary.org/obo/date', dt, g)
174
+ triplify(meURI, 'http://schema.org/softwareVersion', VERSION, g)
175
+ triplify(meURI, 'http://semanticscience.org/resource/SIO_000332', uri, g)
176
+
177
+ comments = 'no comments received. '
178
+
179
+ comments = self.comments.join("\n") if self.comments.size > 0
180
+ triplify(meURI, 'http://schema.org/comment', comments, g)
181
+
182
+ g.dump(:jsonld)
222
183
  end
223
-
224
-
184
+ end
data/lib/web_utils.rb CHANGED
@@ -1,7 +1,7 @@
1
- module FspHarvester
1
+ module HarvesterTools
2
2
 
3
3
  class WebUtils
4
- def self.fspfetch(url:, headers: ACCEPT_ALL_HEADER, method: :get, meta: FspHarvester::MetadataObject.new)
4
+ def self.fspfetch(url:, headers: ACCEPT_ALL_HEADER, method: :get, meta: HarvesterTools::MetadataObject.new)
5
5
  warn 'In fetch routine now. '
6
6
 
7
7
  begin
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fsp_harvester
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.11
4
+ version: 0.1.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mark Wilkinson
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-08-08 00:00:00.000000000 Z
11
+ date: 2022-08-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json
@@ -182,15 +182,18 @@ files:
182
182
  - bin/setup
183
183
  - example_test.rb
184
184
  - launch.json
185
+ - lib/config.conf
185
186
  - lib/config.conf_docker
186
187
  - lib/config.conf_local
187
188
  - lib/constants.rb
189
+ - lib/external_tools.rb
188
190
  - lib/fsp_harvester.rb
189
191
  - lib/fsp_harvester/version.rb
190
- - lib/fsp_metadata_external_tools.rb
191
- - lib/fsp_metadata_harvester.rb
192
- - lib/fsp_metadata_parser.rb
192
+ - lib/harvester.rb
193
+ - lib/harvester_utils.rb
194
+ - lib/metadata_harvester.rb
193
195
  - lib/metadata_object.rb
196
+ - lib/metadata_parser.rb
194
197
  - lib/signposting_tests.rb
195
198
  - lib/swagger.rb
196
199
  - lib/warnings.json