fsp_harvester 0.1.26 → 0.1.28

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 28c9dc04a4c6b47876df2299f2cf3c5538abd17ca3dfb0e5855cd730542d576b
4
- data.tar.gz: 1cf14b8a2a63b6a48e2f903de2142ecb90ace3b8ba0358016f40eb63672347e6
3
+ metadata.gz: cabced56c6a38753eab00ae6ab2138eac066936cbd58b4b01386185eaee8e3dc
4
+ data.tar.gz: 2c1386b89eb95081d3d53de8e187dcab99e7514b6021fad4de931db3c52d2af0
5
5
  SHA512:
6
- metadata.gz: 53736a81539ffb3a9eac6876722c1eaa15451ec611c7f18e650c6c0d1d4ef33e1edbc1ee2d5c95d82190af61de7defd80c27686c433233ccdefb885981d40999
7
- data.tar.gz: 7ba4389b25038fad44c298315d3b14ac2bf705d59c53bc2174624ef194b226da22f3303358a611c283facac07a8e05b076c4fff0312c3721089abcc83b6952d3
6
+ metadata.gz: 3df3d534079f2401ee0049446eb623afcd94a2d524c6d6a6f4fa12f6e472fbf71b8059ccb657f1277519eb797744150b79ba4e499fdb6300ad323f9199aead19
7
+ data.tar.gz: fd9c5ce047e93ed86fe1a6572548e15511690eed7e6af65bfbc23bbdb61b74fa9ead00cf9a5dbbce0cbecd8e2e8ef9d56b4e3d0f70512971be41198b49c76a7a
data/.rspec_status CHANGED
@@ -1,60 +1,61 @@
1
- example_id | status | run_time |
2
- ---------------------------------- | ------ | --------------------- |
3
- ./spec/cite-as_spec.rb[1:1:1] | passed | 1.87 seconds |
4
- ./spec/cite-as_spec.rb[1:1:2] | passed | 1.3 seconds |
5
- ./spec/cite-as_spec.rb[1:1:3] | passed | 1.53 seconds |
6
- ./spec/cite-as_spec.rb[1:1:4] | passed | 2.09 seconds |
7
- ./spec/cite-as_spec.rb[1:1:5] | passed | 8.09 seconds |
8
- ./spec/cite-as_spec.rb[1:1:6] | passed | 2.63 seconds |
9
- ./spec/cite-as_spec.rb[1:1:7] | passed | 2.9 seconds |
10
- ./spec/cite-as_spec.rb[1:1:8] | passed | 2.21 seconds |
11
- ./spec/cite-as_spec.rb[1:1:9] | passed | 2.85 seconds |
12
- ./spec/cite-as_spec.rb[1:1:10] | passed | 2.89 seconds |
13
- ./spec/cite-as_spec.rb[1:1:11] | passed | 3.16 seconds |
14
- ./spec/cite-as_spec.rb[1:1:12] | passed | 2.23 seconds |
15
- ./spec/cite-as_spec.rb[1:1:13] | passed | 2.92 seconds |
16
- ./spec/cite-as_spec.rb[1:1:14] | passed | 2.8 seconds |
17
- ./spec/cite-as_spec.rb[1:1:15] | passed | 1.21 seconds |
18
- ./spec/cite-as_spec.rb[1:1:16] | passed | 1.28 seconds |
19
- ./spec/cite-as_spec.rb[1:1:17] | passed | 1.19 seconds |
20
- ./spec/cite-as_spec.rb[1:1:18] | passed | 1.24 seconds |
21
- ./spec/cite-as_spec.rb[1:1:19] | passed | 1.7 seconds |
22
- ./spec/cite-as_spec.rb[1:1:20] | passed | 1.74 seconds |
23
- ./spec/cite-as_spec.rb[1:1:21] | passed | 2.75 seconds |
24
- ./spec/cite-as_spec.rb[1:1:22] | passed | 1.35 seconds |
25
- ./spec/cite-as_spec.rb[1:1:23] | passed | 1.19 seconds |
26
- ./spec/cite-as_spec.rb[1:1:24] | failed | 1.2 seconds |
27
- ./spec/cite-as_spec.rb[1:1:25] | passed | 0.60282 seconds |
28
- ./spec/describedby_spec.rb[1:1:1] | passed | 3.23 seconds |
29
- ./spec/describedby_spec.rb[1:1:2] | passed | 1.43 seconds |
30
- ./spec/describedby_spec.rb[1:1:3] | passed | 1.31 seconds |
31
- ./spec/describedby_spec.rb[1:1:4] | passed | 1.37 seconds |
32
- ./spec/describedby_spec.rb[1:1:5] | passed | 1.24 seconds |
33
- ./spec/describedby_spec.rb[1:1:6] | passed | 1.09 seconds |
34
- ./spec/describedby_spec.rb[1:1:7] | passed | 1.03 seconds |
35
- ./spec/describedby_spec.rb[1:1:8] | passed | 2.28 seconds |
36
- ./spec/describedby_spec.rb[1:1:9] | passed | 1.84 seconds |
37
- ./spec/describedby_spec.rb[1:1:10] | passed | 2.23 seconds |
38
- ./spec/describedby_spec.rb[1:1:11] | passed | 2.97 seconds |
39
- ./spec/describedby_spec.rb[1:1:12] | passed | 2.97 seconds |
40
- ./spec/describedby_spec.rb[1:1:13] | passed | 1.65 seconds |
41
- ./spec/describedby_spec.rb[1:1:14] | passed | 2.27 seconds |
42
- ./spec/describedby_spec.rb[1:1:15] | passed | 2.53 seconds |
43
- ./spec/fsp_harvester_spec.rb[1:1] | passed | 0.00178 seconds |
44
- ./spec/fsp_harvester_spec.rb[1:2] | passed | 5.49 seconds |
45
- ./spec/fsp_harvester_spec.rb[1:3] | passed | 39.87 seconds |
46
- ./spec/fsp_harvester_spec.rb[1:4] | passed | 2.62 seconds |
47
- ./spec/fsp_harvester_spec.rb[1:5] | passed | 2.61 seconds |
48
- ./spec/fsp_harvester_spec.rb[1:6] | failed | 54.05 seconds |
49
- ./spec/fsp_harvester_spec.rb[1:7] | passed | 1 minute 26.9 seconds |
50
- ./spec/item_spec.rb[1:1:1] | passed | 3.8 seconds |
51
- ./spec/item_spec.rb[1:1:2] | passed | 3.3 seconds |
52
- ./spec/item_spec.rb[1:1:3] | passed | 1.33 seconds |
53
- ./spec/item_spec.rb[1:1:4] | passed | 1.68 seconds |
54
- ./spec/item_spec.rb[1:1:5] | passed | 2.44 seconds |
55
- ./spec/item_spec.rb[1:1:6] | passed | 2.64 seconds |
56
- ./spec/item_spec.rb[1:1:7] | passed | 3.02 seconds |
57
- ./spec/item_spec.rb[1:1:8] | passed | 0.49403 seconds |
58
- ./spec/type_spec.rb[1:1:1] | passed | 1.23 seconds |
59
- ./spec/type_spec.rb[1:1:2] | passed | 1.25 seconds |
60
- ./spec/type_spec.rb[1:1:3] | passed | 1.57 seconds |
1
+ example_id | status | run_time |
2
+ ---------------------------------- | ------- | --------------- |
3
+ ./spec/cite-as_spec.rb[1:1:1] | passed | 1.77 seconds |
4
+ ./spec/cite-as_spec.rb[1:1:2] | passed | 1.17 seconds |
5
+ ./spec/cite-as_spec.rb[1:1:3] | passed | 0.70388 seconds |
6
+ ./spec/cite-as_spec.rb[1:1:4] | passed | 1.38 seconds |
7
+ ./spec/cite-as_spec.rb[1:1:5] | passed | 2.07 seconds |
8
+ ./spec/cite-as_spec.rb[1:1:6] | passed | 1.18 seconds |
9
+ ./spec/cite-as_spec.rb[1:1:7] | passed | 2.19 seconds |
10
+ ./spec/cite-as_spec.rb[1:1:8] | passed | 1.25 seconds |
11
+ ./spec/cite-as_spec.rb[1:1:9] | passed | 4.16 seconds |
12
+ ./spec/cite-as_spec.rb[1:1:10] | passed | 1.3 seconds |
13
+ ./spec/cite-as_spec.rb[1:1:11] | passed | 2.12 seconds |
14
+ ./spec/cite-as_spec.rb[1:1:12] | passed | 1.51 seconds |
15
+ ./spec/cite-as_spec.rb[1:1:13] | passed | 1.97 seconds |
16
+ ./spec/cite-as_spec.rb[1:1:14] | passed | 2.72 seconds |
17
+ ./spec/cite-as_spec.rb[1:1:15] | passed | 1.2 seconds |
18
+ ./spec/cite-as_spec.rb[1:1:16] | passed | 0.94397 seconds |
19
+ ./spec/cite-as_spec.rb[1:1:17] | passed | 0.9681 seconds |
20
+ ./spec/cite-as_spec.rb[1:1:18] | passed | 0.91536 seconds |
21
+ ./spec/cite-as_spec.rb[1:1:19] | passed | 1.44 seconds |
22
+ ./spec/cite-as_spec.rb[1:1:20] | passed | 1.3 seconds |
23
+ ./spec/cite-as_spec.rb[1:1:21] | passed | 1.7 seconds |
24
+ ./spec/cite-as_spec.rb[1:1:22] | passed | 1.28 seconds |
25
+ ./spec/cite-as_spec.rb[1:1:23] | passed | 1.06 seconds |
26
+ ./spec/cite-as_spec.rb[1:1:24] | passed | 1.31 seconds |
27
+ ./spec/cite-as_spec.rb[1:1:25] | passed | 0.30764 seconds |
28
+ ./spec/describedby_spec.rb[1:1:1] | failed | 8.23 seconds |
29
+ ./spec/describedby_spec.rb[1:1:2] | passed | 1.02 seconds |
30
+ ./spec/describedby_spec.rb[1:1:3] | passed | 0.98522 seconds |
31
+ ./spec/describedby_spec.rb[1:1:4] | passed | 0.98303 seconds |
32
+ ./spec/describedby_spec.rb[1:1:5] | passed | 0.92518 seconds |
33
+ ./spec/describedby_spec.rb[1:1:6] | passed | 0.5518 seconds |
34
+ ./spec/describedby_spec.rb[1:1:7] | passed | 0.64188 seconds |
35
+ ./spec/describedby_spec.rb[1:1:8] | passed | 1.63 seconds |
36
+ ./spec/describedby_spec.rb[1:1:9] | passed | 2.52 seconds |
37
+ ./spec/describedby_spec.rb[1:1:10] | passed | 1.4 seconds |
38
+ ./spec/describedby_spec.rb[1:1:11] | passed | 2.02 seconds |
39
+ ./spec/describedby_spec.rb[1:1:12] | passed | 2.02 seconds |
40
+ ./spec/describedby_spec.rb[1:1:13] | passed | 1.11 seconds |
41
+ ./spec/describedby_spec.rb[1:1:14] | passed | 1.48 seconds |
42
+ ./spec/describedby_spec.rb[1:1:15] | passed | 1.5 seconds |
43
+ ./spec/fsp_harvester_spec.rb[1:1] | passed | 0.0002 seconds |
44
+ ./spec/fsp_harvester_spec.rb[1:2] | passed | 1.87 seconds |
45
+ ./spec/fsp_harvester_spec.rb[1:3] | passed | 6.76 seconds |
46
+ ./spec/fsp_harvester_spec.rb[1:4] | passed | 1.65 seconds |
47
+ ./spec/fsp_harvester_spec.rb[1:5] | passed | 1.59 seconds |
48
+ ./spec/fsp_harvester_spec.rb[1:6] | passed | 36.97 seconds |
49
+ ./spec/fsp_harvester_spec.rb[1:7] | passed | 33.62 seconds |
50
+ ./spec/fsp_harvester_spec.rb[1:8] | unknown | |
51
+ ./spec/item_spec.rb[1:1:1] | passed | 2.05 seconds |
52
+ ./spec/item_spec.rb[1:1:2] | passed | 1.78 seconds |
53
+ ./spec/item_spec.rb[1:1:3] | passed | 0.82912 seconds |
54
+ ./spec/item_spec.rb[1:1:4] | passed | 1.36 seconds |
55
+ ./spec/item_spec.rb[1:1:5] | passed | 1.19 seconds |
56
+ ./spec/item_spec.rb[1:1:6] | passed | 1.27 seconds |
57
+ ./spec/item_spec.rb[1:1:7] | passed | 1.92 seconds |
58
+ ./spec/item_spec.rb[1:1:8] | passed | 0.30252 seconds |
59
+ ./spec/type_spec.rb[1:1:1] | passed | 0.99993 seconds |
60
+ ./spec/type_spec.rb[1:1:2] | passed | 0.84393 seconds |
61
+ ./spec/type_spec.rb[1:1:3] | passed | 0.84347 seconds |
@@ -0,0 +1,21 @@
1
+ {
2
+ // Use IntelliSense to learn about possible attributes.
3
+ // Hover to view descriptions of existing attributes.
4
+ // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5
+ "version": "0.2.0",
6
+ "configurations": [
7
+ {
8
+ "type": "rdbg",
9
+ "name": "Debug current file with rdbg",
10
+ "request": "launch",
11
+ "script": "${file}",
12
+ "args": [],
13
+ "askParameters": true
14
+ },
15
+ {
16
+ "type": "rdbg",
17
+ "name": "Attach with rdbg",
18
+ "request": "attach"
19
+ }
20
+ ]
21
+ }
data/Gemfile.lock CHANGED
@@ -1,13 +1,15 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- fsp_harvester (0.1.26)
4
+ fsp_harvester (0.1.28)
5
5
  json (~> 2.0)
6
6
  linkeddata (~> 3.2)
7
7
  linkheaders-processor (~> 0.1.19)
8
8
  metainspector (~> 5.11.2)
9
9
  parseconfig (~> 1.1)
10
10
  rake (~> 13.0)
11
+ rdf-raptor (~> 3.2.0)
12
+ require_all (~> 3.0.0)
11
13
  rest-client (~> 2.1)
12
14
  rspec (~> 3.11)
13
15
  rubocop (~> 1.7)
@@ -66,6 +68,7 @@ GEM
66
68
  faraday_middleware (1.2.0)
67
69
  faraday (~> 1.0)
68
70
  fastimage (2.2.7)
71
+ ffi (1.16.2)
69
72
  haml (6.1.2)
70
73
  temple (>= 0.8.2)
71
74
  thor
@@ -192,6 +195,9 @@ GEM
192
195
  rdf (~> 3.3)
193
196
  rdf-ordered-repo (3.3.0)
194
197
  rdf (~> 3.3)
198
+ rdf-raptor (3.2.0)
199
+ ffi (~> 1.15)
200
+ rdf (~> 3.2)
195
201
  rdf-rdfa (3.3.0)
196
202
  haml (~> 6.1)
197
203
  htmlentities (~> 4.3)
@@ -230,6 +236,7 @@ GEM
230
236
  rdf (~> 3.3)
231
237
  rexml (~> 3.2)
232
238
  regexp_parser (2.5.0)
239
+ require_all (3.0.0)
233
240
  rest-client (2.1.0)
234
241
  http-accept (>= 1.7.0, < 2.0)
235
242
  http-cookie (>= 1.0.2, < 2.0)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module FspHarvester
4
- VERSION = "0.1.26"
4
+ VERSION = "0.1.28"
5
5
  end
data/lib/fsp_harvester.rb CHANGED
@@ -12,6 +12,7 @@ module FspHarvester
12
12
  links.each do |l|
13
13
  db << l if l.relation == 'describedby'
14
14
  end
15
+ warn db.length
15
16
  HarvesterTools::MetadataHarvester.extract_metadata_from_links(links: db, metadata: @meta) # everything is gathered into the @meta metadata object
16
17
  @meta
17
18
  end
data/lib/harvester.rb CHANGED
@@ -16,6 +16,8 @@ require 'digest'
16
16
  require 'open3'
17
17
  require 'metainspector'
18
18
  require 'rdf/xsd'
19
+ require 'linkeddata'
20
+ require 'rdf/raptor'
19
21
  require_relative './metadata_object'
20
22
  require_relative './constants'
21
23
  require_relative './web_utils'
@@ -10,16 +10,20 @@ module HarvesterTools
10
10
  @meta.comments << 'INFO: now collecting both linked data and hash-style data using the harvested links'
11
11
 
12
12
  describedby = links.select { |l| l if l.relation == 'describedby' }
13
+ warn "metadata harvester links length #{describedby.length}"
13
14
 
14
15
  hvst = HarvesterTools::MetadataParser.new(metadata_object: @meta) # put here because the class variable for detecting duplicates should apply to all URIs
15
16
  describedby.each do |link|
16
17
  accepttype = FspHarvester::ACCEPT_STAR_HEADER
17
18
  accept = link.respond_to?('type') ? link.type : nil
19
+ accept.gsub!('json+ld', 'ld+json') # patch for bug in Dataverse 5.14 linksets
18
20
  accepttype = { 'Accept' => accept } if accept
19
21
 
20
22
  response = attempt_to_resolve(link: link, headers: accepttype)
23
+ warn "\n\nRESPONSE #{response}\n\n"
21
24
 
22
25
  abbreviation, content_type = attempt_to_detect_type(body: response.body, headers: response.headers)
26
+ warn "ABBR #{abbreviation} CONT #{content_type}\n\n"
23
27
  unless abbreviation
24
28
  @meta.add_warning(['017', url, header])
25
29
  @meta.comments << "WARN: metadata format returned from #{url} using Accept header #{header} is not recognized. Processing will end now.\n"
@@ -52,6 +56,7 @@ module HarvesterTools
52
56
 
53
57
  def self.process_according_to_type(body:, uri:, abbreviation:, content_type:, metadata:,
54
58
  harvester: HarvesterTools::MetadataParser.new(metadata_object: @meta))
59
+ warn "PROCESSING #{abbreviation}"
55
60
  case abbreviation
56
61
  when 'html'
57
62
  @meta.comments << 'INFO: Processing html'
@@ -63,6 +68,7 @@ module HarvesterTools
63
68
  @meta.comments << 'INFO: Processing json'
64
69
  harvester.process_json(body: body, metadata: @meta)
65
70
  when 'jsonld', 'rdfxml', 'turtle', 'ntriples', 'nquads'
71
+ warn "PROCESSING USING TURTLE"
66
72
  @meta.comments << 'INFO: Processing linked data'
67
73
  harvester.process_ld(body: body, content_type: content_type, metadata: @meta)
68
74
  when 'specialist'
@@ -1,5 +1,4 @@
1
1
  # frozen_string_literal: true
2
-
3
2
  module HarvesterTools
4
3
  class Error < StandardError
5
4
  end
@@ -58,19 +57,24 @@ module HarvesterTools
58
57
 
59
58
  def self.parse_rdf(body:, content_type:, metadata:)
60
59
  @meta = metadata
60
+ warn "1 PARSING RDF #{body}"
61
61
  unless body
62
62
  metadata.comments << "CRITICAL: The response message body component appears to have no content.\n"
63
63
  metadata.add_warning(['018', '', ''])
64
64
  return
65
65
  end
66
+ warn "2 PARSING RDF #{body}"
66
67
 
67
68
  unless body.match(/\w/)
68
69
  metadata.comments << "CRITICAL: The response message body component appears to have no content.\n"
69
70
  metadata.add_warning(['018', '', ''])
70
71
  return
71
72
  end
73
+ warn "3 PARSING RDF #{body} content type #{content_type.class}"
72
74
 
73
75
  rdfformat = RDF::Format.for(content_type: content_type)
76
+ warn "FORMAT #{rdfformat}"
77
+ warn "FORMAT #{RDF::Format.for(content_type: 'text/turtle')}"
74
78
  unless rdfformat
75
79
  metadata.comments << "CRITICAL: Found what appears to be RDF (sample: #{body[0..300].delete!("\n")}), but it could not find a parser. Please report this error, along with the GUID of the resource, to the maintainer of the system.\n"
76
80
  metadata.add_warning(['018', '', ''])
data/tryme.rb ADDED
@@ -0,0 +1,9 @@
1
+ require 'require_all'
2
+ warn `pwd`
3
+ require_all './lib/'
4
+
5
+ guid = 'https://w3id.org/a2a-fair-metrics/22-http-html-citeas-describedby-mixed/'
6
+ guid = 'https://doi.org/10.7910/DVN/Z2JD58'
7
+ links, metadata = HarvesterTools::Utils.resolve_guid(guid: guid)
8
+ meta = FspHarvester::Utils.gather_metadata_from_describedby_links(links: links, metadata: metadata)
9
+ puts meta.graph.triples
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fsp_harvester
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.26
4
+ version: 0.1.28
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mark Wilkinson
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-09-25 00:00:00.000000000 Z
11
+ date: 2023-09-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: '3.2'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rdf-raptor
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 3.2.0
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 3.2.0
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: linkheaders-processor
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -164,6 +178,20 @@ dependencies:
164
178
  - - "~>"
165
179
  - !ruby/object:Gem::Version
166
180
  version: '1.1'
181
+ - !ruby/object:Gem::Dependency
182
+ name: require_all
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - "~>"
186
+ - !ruby/object:Gem::Version
187
+ version: 3.0.0
188
+ type: :runtime
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - "~>"
193
+ - !ruby/object:Gem::Version
194
+ version: 3.0.0
167
195
  description: Metadata harvester that follows the FAIR Signposting specification.
168
196
  email:
169
197
  - markw@illuminae.com
@@ -172,6 +200,7 @@ extensions: []
172
200
  extra_rdoc_files: []
173
201
  files:
174
202
  - ".rspec_status"
203
+ - ".vscode/launch.json"
175
204
  - CHANGELOG.md
176
205
  - Gemfile
177
206
  - Gemfile.lock
@@ -180,7 +209,6 @@ files:
180
209
  - Rakefile
181
210
  - bin/console
182
211
  - bin/setup
183
- - launch.json
184
212
  - lib/constants.rb
185
213
  - lib/external_tools.rb
186
214
  - lib/fsp_harvester.rb
@@ -197,6 +225,7 @@ files:
197
225
  - lib/signposting_tests.rb
198
226
  - lib/warnings.json
199
227
  - lib/web_utils.rb
228
+ - tryme.rb
200
229
  homepage: https://github.com/markwilkinson/FAIR-Signposting-Harvester
201
230
  licenses:
202
231
  - MIT
data/launch.json DELETED
@@ -1,11 +0,0 @@
1
- {
2
- "name": "RSpec - all",
3
- "type": "Ruby",
4
- "request": "launch",
5
- "cwd": "${workspaceRoot}",
6
- "program": "/home/osboxes/.rvm/gems/ruby-3.0.0/bin/rspec",
7
- "args": [
8
- "-I",
9
- "${workspaceRoot}"
10
- ]
11
- }