fsp_harvester 0.1.26 → 0.1.27

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 28c9dc04a4c6b47876df2299f2cf3c5538abd17ca3dfb0e5855cd730542d576b
4
- data.tar.gz: 1cf14b8a2a63b6a48e2f903de2142ecb90ace3b8ba0358016f40eb63672347e6
3
+ metadata.gz: 6a12db5f7c5c6ade0a6009a073db5c499453ca68d19fc7ea436f8bde29fafded
4
+ data.tar.gz: ca57a628a60a64bab692a2fcbd020f18fdc18604f272530435d66d4ecd5be9ba
5
5
  SHA512:
6
- metadata.gz: 53736a81539ffb3a9eac6876722c1eaa15451ec611c7f18e650c6c0d1d4ef33e1edbc1ee2d5c95d82190af61de7defd80c27686c433233ccdefb885981d40999
7
- data.tar.gz: 7ba4389b25038fad44c298315d3b14ac2bf705d59c53bc2174624ef194b226da22f3303358a611c283facac07a8e05b076c4fff0312c3721089abcc83b6952d3
6
+ metadata.gz: 2f07c82fd470041116a98723ac5b8bf151552951f3caa07f00c720b58ff768d1daae57641664ec0b93cdf2205d66e99cfeb5d05371342c626ed5d7d7796944f4
7
+ data.tar.gz: 0253eac41a6b38f427accfb68239c3e6274ac9dd661a220e466164f49a40ac6de2b3afd37d263d2478faba20406cc6f296a30efca72c69c26c0f4a5a507d7444
data/.rspec_status CHANGED
@@ -1,60 +1,60 @@
1
- example_id | status | run_time |
2
- ---------------------------------- | ------ | --------------------- |
3
- ./spec/cite-as_spec.rb[1:1:1] | passed | 1.87 seconds |
4
- ./spec/cite-as_spec.rb[1:1:2] | passed | 1.3 seconds |
5
- ./spec/cite-as_spec.rb[1:1:3] | passed | 1.53 seconds |
6
- ./spec/cite-as_spec.rb[1:1:4] | passed | 2.09 seconds |
7
- ./spec/cite-as_spec.rb[1:1:5] | passed | 8.09 seconds |
8
- ./spec/cite-as_spec.rb[1:1:6] | passed | 2.63 seconds |
9
- ./spec/cite-as_spec.rb[1:1:7] | passed | 2.9 seconds |
10
- ./spec/cite-as_spec.rb[1:1:8] | passed | 2.21 seconds |
11
- ./spec/cite-as_spec.rb[1:1:9] | passed | 2.85 seconds |
12
- ./spec/cite-as_spec.rb[1:1:10] | passed | 2.89 seconds |
13
- ./spec/cite-as_spec.rb[1:1:11] | passed | 3.16 seconds |
14
- ./spec/cite-as_spec.rb[1:1:12] | passed | 2.23 seconds |
15
- ./spec/cite-as_spec.rb[1:1:13] | passed | 2.92 seconds |
16
- ./spec/cite-as_spec.rb[1:1:14] | passed | 2.8 seconds |
17
- ./spec/cite-as_spec.rb[1:1:15] | passed | 1.21 seconds |
18
- ./spec/cite-as_spec.rb[1:1:16] | passed | 1.28 seconds |
19
- ./spec/cite-as_spec.rb[1:1:17] | passed | 1.19 seconds |
20
- ./spec/cite-as_spec.rb[1:1:18] | passed | 1.24 seconds |
21
- ./spec/cite-as_spec.rb[1:1:19] | passed | 1.7 seconds |
22
- ./spec/cite-as_spec.rb[1:1:20] | passed | 1.74 seconds |
23
- ./spec/cite-as_spec.rb[1:1:21] | passed | 2.75 seconds |
24
- ./spec/cite-as_spec.rb[1:1:22] | passed | 1.35 seconds |
25
- ./spec/cite-as_spec.rb[1:1:23] | passed | 1.19 seconds |
26
- ./spec/cite-as_spec.rb[1:1:24] | failed | 1.2 seconds |
27
- ./spec/cite-as_spec.rb[1:1:25] | passed | 0.60282 seconds |
28
- ./spec/describedby_spec.rb[1:1:1] | passed | 3.23 seconds |
29
- ./spec/describedby_spec.rb[1:1:2] | passed | 1.43 seconds |
30
- ./spec/describedby_spec.rb[1:1:3] | passed | 1.31 seconds |
31
- ./spec/describedby_spec.rb[1:1:4] | passed | 1.37 seconds |
32
- ./spec/describedby_spec.rb[1:1:5] | passed | 1.24 seconds |
33
- ./spec/describedby_spec.rb[1:1:6] | passed | 1.09 seconds |
34
- ./spec/describedby_spec.rb[1:1:7] | passed | 1.03 seconds |
35
- ./spec/describedby_spec.rb[1:1:8] | passed | 2.28 seconds |
36
- ./spec/describedby_spec.rb[1:1:9] | passed | 1.84 seconds |
37
- ./spec/describedby_spec.rb[1:1:10] | passed | 2.23 seconds |
38
- ./spec/describedby_spec.rb[1:1:11] | passed | 2.97 seconds |
39
- ./spec/describedby_spec.rb[1:1:12] | passed | 2.97 seconds |
40
- ./spec/describedby_spec.rb[1:1:13] | passed | 1.65 seconds |
41
- ./spec/describedby_spec.rb[1:1:14] | passed | 2.27 seconds |
42
- ./spec/describedby_spec.rb[1:1:15] | passed | 2.53 seconds |
43
- ./spec/fsp_harvester_spec.rb[1:1] | passed | 0.00178 seconds |
44
- ./spec/fsp_harvester_spec.rb[1:2] | passed | 5.49 seconds |
45
- ./spec/fsp_harvester_spec.rb[1:3] | passed | 39.87 seconds |
46
- ./spec/fsp_harvester_spec.rb[1:4] | passed | 2.62 seconds |
47
- ./spec/fsp_harvester_spec.rb[1:5] | passed | 2.61 seconds |
48
- ./spec/fsp_harvester_spec.rb[1:6] | failed | 54.05 seconds |
49
- ./spec/fsp_harvester_spec.rb[1:7] | passed | 1 minute 26.9 seconds |
50
- ./spec/item_spec.rb[1:1:1] | passed | 3.8 seconds |
51
- ./spec/item_spec.rb[1:1:2] | passed | 3.3 seconds |
52
- ./spec/item_spec.rb[1:1:3] | passed | 1.33 seconds |
53
- ./spec/item_spec.rb[1:1:4] | passed | 1.68 seconds |
54
- ./spec/item_spec.rb[1:1:5] | passed | 2.44 seconds |
55
- ./spec/item_spec.rb[1:1:6] | passed | 2.64 seconds |
56
- ./spec/item_spec.rb[1:1:7] | passed | 3.02 seconds |
57
- ./spec/item_spec.rb[1:1:8] | passed | 0.49403 seconds |
58
- ./spec/type_spec.rb[1:1:1] | passed | 1.23 seconds |
59
- ./spec/type_spec.rb[1:1:2] | passed | 1.25 seconds |
60
- ./spec/type_spec.rb[1:1:3] | passed | 1.57 seconds |
1
+ example_id | status | run_time |
2
+ ---------------------------------- | ------ | --------------- |
3
+ ./spec/cite-as_spec.rb[1:1:1] | passed | 0.92505 seconds |
4
+ ./spec/cite-as_spec.rb[1:1:2] | passed | 0.91715 seconds |
5
+ ./spec/cite-as_spec.rb[1:1:3] | passed | 0.59784 seconds |
6
+ ./spec/cite-as_spec.rb[1:1:4] | passed | 1.09 seconds |
7
+ ./spec/cite-as_spec.rb[1:1:5] | passed | 2.43 seconds |
8
+ ./spec/cite-as_spec.rb[1:1:6] | passed | 1.49 seconds |
9
+ ./spec/cite-as_spec.rb[1:1:7] | passed | 2.5 seconds |
10
+ ./spec/cite-as_spec.rb[1:1:8] | passed | 1.43 seconds |
11
+ ./spec/cite-as_spec.rb[1:1:9] | passed | 3.36 seconds |
12
+ ./spec/cite-as_spec.rb[1:1:10] | passed | 1.19 seconds |
13
+ ./spec/cite-as_spec.rb[1:1:11] | passed | 3.03 seconds |
14
+ ./spec/cite-as_spec.rb[1:1:12] | passed | 1.21 seconds |
15
+ ./spec/cite-as_spec.rb[1:1:13] | passed | 2.4 seconds |
16
+ ./spec/cite-as_spec.rb[1:1:14] | passed | 1.27 seconds |
17
+ ./spec/cite-as_spec.rb[1:1:15] | passed | 0.91669 seconds |
18
+ ./spec/cite-as_spec.rb[1:1:16] | passed | 0.8317 seconds |
19
+ ./spec/cite-as_spec.rb[1:1:17] | passed | 0.95717 seconds |
20
+ ./spec/cite-as_spec.rb[1:1:18] | passed | 0.86864 seconds |
21
+ ./spec/cite-as_spec.rb[1:1:19] | passed | 2.17 seconds |
22
+ ./spec/cite-as_spec.rb[1:1:20] | passed | 1.24 seconds |
23
+ ./spec/cite-as_spec.rb[1:1:21] | passed | 1.79 seconds |
24
+ ./spec/cite-as_spec.rb[1:1:22] | passed | 0.89065 seconds |
25
+ ./spec/cite-as_spec.rb[1:1:23] | passed | 0.81873 seconds |
26
+ ./spec/cite-as_spec.rb[1:1:24] | passed | 0.97719 seconds |
27
+ ./spec/cite-as_spec.rb[1:1:25] | passed | 0.25903 seconds |
28
+ ./spec/describedby_spec.rb[1:1:1] | passed | 1.66 seconds |
29
+ ./spec/describedby_spec.rb[1:1:2] | passed | 0.92657 seconds |
30
+ ./spec/describedby_spec.rb[1:1:3] | passed | 0.92638 seconds |
31
+ ./spec/describedby_spec.rb[1:1:4] | passed | 0.83822 seconds |
32
+ ./spec/describedby_spec.rb[1:1:5] | passed | 0.83941 seconds |
33
+ ./spec/describedby_spec.rb[1:1:6] | passed | 0.55956 seconds |
34
+ ./spec/describedby_spec.rb[1:1:7] | passed | 0.60106 seconds |
35
+ ./spec/describedby_spec.rb[1:1:8] | passed | 1.76 seconds |
36
+ ./spec/describedby_spec.rb[1:1:9] | passed | 1.14 seconds |
37
+ ./spec/describedby_spec.rb[1:1:10] | passed | 1.35 seconds |
38
+ ./spec/describedby_spec.rb[1:1:11] | passed | 2.01 seconds |
39
+ ./spec/describedby_spec.rb[1:1:12] | passed | 1.9 seconds |
40
+ ./spec/describedby_spec.rb[1:1:13] | passed | 1.16 seconds |
41
+ ./spec/describedby_spec.rb[1:1:14] | passed | 1.62 seconds |
42
+ ./spec/describedby_spec.rb[1:1:15] | passed | 1.39 seconds |
43
+ ./spec/fsp_harvester_spec.rb[1:1] | passed | 0.0001 seconds |
44
+ ./spec/fsp_harvester_spec.rb[1:2] | passed | 1.46 seconds |
45
+ ./spec/fsp_harvester_spec.rb[1:3] | passed | 20.9 seconds |
46
+ ./spec/fsp_harvester_spec.rb[1:4] | passed | 2.11 seconds |
47
+ ./spec/fsp_harvester_spec.rb[1:5] | passed | 2.16 seconds |
48
+ ./spec/fsp_harvester_spec.rb[1:6] | passed | 23.24 seconds |
49
+ ./spec/fsp_harvester_spec.rb[1:7] | passed | 20.85 seconds |
50
+ ./spec/item_spec.rb[1:1:1] | passed | 1.72 seconds |
51
+ ./spec/item_spec.rb[1:1:2] | passed | 1.61 seconds |
52
+ ./spec/item_spec.rb[1:1:3] | passed | 0.89622 seconds |
53
+ ./spec/item_spec.rb[1:1:4] | passed | 1.18 seconds |
54
+ ./spec/item_spec.rb[1:1:5] | passed | 1.32 seconds |
55
+ ./spec/item_spec.rb[1:1:6] | passed | 1.24 seconds |
56
+ ./spec/item_spec.rb[1:1:7] | passed | 1.82 seconds |
57
+ ./spec/item_spec.rb[1:1:8] | passed | 0.27709 seconds |
58
+ ./spec/type_spec.rb[1:1:1] | passed | 0.80665 seconds |
59
+ ./spec/type_spec.rb[1:1:2] | passed | 0.84617 seconds |
60
+ ./spec/type_spec.rb[1:1:3] | passed | 0.82173 seconds |
@@ -0,0 +1,21 @@
1
+ {
2
+ // Use IntelliSense to learn about possible attributes.
3
+ // Hover to view descriptions of existing attributes.
4
+ // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5
+ "version": "0.2.0",
6
+ "configurations": [
7
+ {
8
+ "type": "rdbg",
9
+ "name": "Debug current file with rdbg",
10
+ "request": "launch",
11
+ "script": "${file}",
12
+ "args": [],
13
+ "askParameters": true
14
+ },
15
+ {
16
+ "type": "rdbg",
17
+ "name": "Attach with rdbg",
18
+ "request": "attach"
19
+ }
20
+ ]
21
+ }
data/Gemfile.lock CHANGED
@@ -1,13 +1,15 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- fsp_harvester (0.1.26)
4
+ fsp_harvester (0.1.27)
5
5
  json (~> 2.0)
6
6
  linkeddata (~> 3.2)
7
7
  linkheaders-processor (~> 0.1.19)
8
8
  metainspector (~> 5.11.2)
9
9
  parseconfig (~> 1.1)
10
10
  rake (~> 13.0)
11
+ rdf-raptor (~> 3.2.0)
12
+ require_all (~> 3.0.0)
11
13
  rest-client (~> 2.1)
12
14
  rspec (~> 3.11)
13
15
  rubocop (~> 1.7)
@@ -66,6 +68,7 @@ GEM
66
68
  faraday_middleware (1.2.0)
67
69
  faraday (~> 1.0)
68
70
  fastimage (2.2.7)
71
+ ffi (1.15.5)
69
72
  haml (6.1.2)
70
73
  temple (>= 0.8.2)
71
74
  thor
@@ -192,6 +195,9 @@ GEM
192
195
  rdf (~> 3.3)
193
196
  rdf-ordered-repo (3.3.0)
194
197
  rdf (~> 3.3)
198
+ rdf-raptor (3.2.0)
199
+ ffi (~> 1.15)
200
+ rdf (~> 3.2)
195
201
  rdf-rdfa (3.3.0)
196
202
  haml (~> 6.1)
197
203
  htmlentities (~> 4.3)
@@ -230,6 +236,7 @@ GEM
230
236
  rdf (~> 3.3)
231
237
  rexml (~> 3.2)
232
238
  regexp_parser (2.5.0)
239
+ require_all (3.0.0)
233
240
  rest-client (2.1.0)
234
241
  http-accept (>= 1.7.0, < 2.0)
235
242
  http-cookie (>= 1.0.2, < 2.0)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module FspHarvester
4
- VERSION = "0.1.26"
4
+ VERSION = "0.1.27"
5
5
  end
data/lib/fsp_harvester.rb CHANGED
@@ -12,6 +12,7 @@ module FspHarvester
12
12
  links.each do |l|
13
13
  db << l if l.relation == 'describedby'
14
14
  end
15
+ warn db.length
15
16
  HarvesterTools::MetadataHarvester.extract_metadata_from_links(links: db, metadata: @meta) # everything is gathered into the @meta metadata object
16
17
  @meta
17
18
  end
data/lib/harvester.rb CHANGED
@@ -16,6 +16,8 @@ require 'digest'
16
16
  require 'open3'
17
17
  require 'metainspector'
18
18
  require 'rdf/xsd'
19
+ require 'linkeddata'
20
+ require 'rdf/raptor'
19
21
  require_relative './metadata_object'
20
22
  require_relative './constants'
21
23
  require_relative './web_utils'
@@ -10,6 +10,7 @@ module HarvesterTools
10
10
  @meta.comments << 'INFO: now collecting both linked data and hash-style data using the harvested links'
11
11
 
12
12
  describedby = links.select { |l| l if l.relation == 'describedby' }
13
+ warn "metadata harvester links length #{describedby.length}"
13
14
 
14
15
  hvst = HarvesterTools::MetadataParser.new(metadata_object: @meta) # put here because the class variable for detecting duplicates should apply to all URIs
15
16
  describedby.each do |link|
@@ -18,8 +19,10 @@ module HarvesterTools
18
19
  accepttype = { 'Accept' => accept } if accept
19
20
 
20
21
  response = attempt_to_resolve(link: link, headers: accepttype)
22
+ warn "\n\nRESPONSE #{response}\n\n"
21
23
 
22
24
  abbreviation, content_type = attempt_to_detect_type(body: response.body, headers: response.headers)
25
+ warn "ABBR #{abbreviation} CONT #{content_type}\n\n"
23
26
  unless abbreviation
24
27
  @meta.add_warning(['017', url, header])
25
28
  @meta.comments << "WARN: metadata format returned from #{url} using Accept header #{header} is not recognized. Processing will end now.\n"
@@ -52,6 +55,7 @@ module HarvesterTools
52
55
 
53
56
  def self.process_according_to_type(body:, uri:, abbreviation:, content_type:, metadata:,
54
57
  harvester: HarvesterTools::MetadataParser.new(metadata_object: @meta))
58
+ warn "PROCESSING #{abbreviation}"
55
59
  case abbreviation
56
60
  when 'html'
57
61
  @meta.comments << 'INFO: Processing html'
@@ -63,6 +67,7 @@ module HarvesterTools
63
67
  @meta.comments << 'INFO: Processing json'
64
68
  harvester.process_json(body: body, metadata: @meta)
65
69
  when 'jsonld', 'rdfxml', 'turtle', 'ntriples', 'nquads'
70
+ warn "PROCESSING USING TURTLE"
66
71
  @meta.comments << 'INFO: Processing linked data'
67
72
  harvester.process_ld(body: body, content_type: content_type, metadata: @meta)
68
73
  when 'specialist'
@@ -1,5 +1,4 @@
1
1
  # frozen_string_literal: true
2
-
3
2
  module HarvesterTools
4
3
  class Error < StandardError
5
4
  end
@@ -58,19 +57,24 @@ module HarvesterTools
58
57
 
59
58
  def self.parse_rdf(body:, content_type:, metadata:)
60
59
  @meta = metadata
60
+ warn "1 PARSING RDF #{body}"
61
61
  unless body
62
62
  metadata.comments << "CRITICAL: The response message body component appears to have no content.\n"
63
63
  metadata.add_warning(['018', '', ''])
64
64
  return
65
65
  end
66
+ warn "2 PARSING RDF #{body}"
66
67
 
67
68
  unless body.match(/\w/)
68
69
  metadata.comments << "CRITICAL: The response message body component appears to have no content.\n"
69
70
  metadata.add_warning(['018', '', ''])
70
71
  return
71
72
  end
73
+ warn "3 PARSING RDF #{body} content type #{content_type.class}"
72
74
 
73
75
  rdfformat = RDF::Format.for(content_type: content_type)
76
+ warn "FORMAT #{rdfformat}"
77
+ warn "FORMAT #{RDF::Format.for(content_type: 'text/turtle')}"
74
78
  unless rdfformat
75
79
  metadata.comments << "CRITICAL: Found what appears to be RDF (sample: #{body[0..300].delete!("\n")}), but it could not find a parser. Please report this error, along with the GUID of the resource, to the maintainer of the system.\n"
76
80
  metadata.add_warning(['018', '', ''])
data/tryme.rb ADDED
@@ -0,0 +1,9 @@
1
+ require 'require_all'
2
+ warn `pwd`
3
+ require_all './lib/'
4
+
5
+ guid = 'https://w3id.org/a2a-fair-metrics/22-http-html-citeas-describedby-mixed/'
6
+ guid = 'https://doi.org/10.7910/DVN/Z2JD58'
7
+ links, metadata = HarvesterTools::Utils.resolve_guid(guid: guid)
8
+ meta = FspHarvester::Utils.gather_metadata_from_describedby_links(links: links, metadata: metadata)
9
+ puts meta.graph.triples
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fsp_harvester
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.26
4
+ version: 0.1.27
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mark Wilkinson
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-09-25 00:00:00.000000000 Z
11
+ date: 2023-09-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: '3.2'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rdf-raptor
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 3.2.0
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 3.2.0
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: linkheaders-processor
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -164,6 +178,20 @@ dependencies:
164
178
  - - "~>"
165
179
  - !ruby/object:Gem::Version
166
180
  version: '1.1'
181
+ - !ruby/object:Gem::Dependency
182
+ name: require_all
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - "~>"
186
+ - !ruby/object:Gem::Version
187
+ version: 3.0.0
188
+ type: :runtime
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - "~>"
193
+ - !ruby/object:Gem::Version
194
+ version: 3.0.0
167
195
  description: Metadata harvester that follows the FAIR Signposting specification.
168
196
  email:
169
197
  - markw@illuminae.com
@@ -172,6 +200,7 @@ extensions: []
172
200
  extra_rdoc_files: []
173
201
  files:
174
202
  - ".rspec_status"
203
+ - ".vscode/launch.json"
175
204
  - CHANGELOG.md
176
205
  - Gemfile
177
206
  - Gemfile.lock
@@ -180,7 +209,6 @@ files:
180
209
  - Rakefile
181
210
  - bin/console
182
211
  - bin/setup
183
- - launch.json
184
212
  - lib/constants.rb
185
213
  - lib/external_tools.rb
186
214
  - lib/fsp_harvester.rb
@@ -197,6 +225,7 @@ files:
197
225
  - lib/signposting_tests.rb
198
226
  - lib/warnings.json
199
227
  - lib/web_utils.rb
228
+ - tryme.rb
200
229
  homepage: https://github.com/markwilkinson/FAIR-Signposting-Harvester
201
230
  licenses:
202
231
  - MIT
data/launch.json DELETED
@@ -1,11 +0,0 @@
1
- {
2
- "name": "RSpec - all",
3
- "type": "Ruby",
4
- "request": "launch",
5
- "cwd": "${workspaceRoot}",
6
- "program": "/home/osboxes/.rvm/gems/ruby-3.0.0/bin/rspec",
7
- "args": [
8
- "-I",
9
- "${workspaceRoot}"
10
- ]
11
- }