fsp_harvester 0.1.16 → 0.1.19

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: aec11fd57963ffb176ddb88338b9e262027c9a7d39364089ae130fb4b628bf5b
4
- data.tar.gz: f8733a00de5c6c24a622235c18ba0dae208f5bac52d50607480e51fd563678c8
3
+ metadata.gz: 548e58dcb21d312f32780345f954f523e253c8fffaa4fbe9598032c3081adb1a
4
+ data.tar.gz: 7bea92aa5809b6651acb76aa2ea470c069e480783eb9d1acfd407a8a5fbd261c
5
5
  SHA512:
6
- metadata.gz: c484e41aa0305f34d0bf7f82cad60b9b02106ffe80b9371c99e77b199eef9ce52818222368b8b3a3ff73d94dba89b8d7fb815d29c95ca335772946e1e9762849
7
- data.tar.gz: '09dfdcc12b9176bc88c31a196893ae9ede6c35c2fd59271ca8fa5b1c29f0807ee82c8416ca5f8b7ff75c6caab71648b6d0e15d0976784cb7d34ff8686332be37'
6
+ metadata.gz: 5da8243a22e50244b30f5672b34e5d2f484a6dbf4125fb80a5c93cf1580dd561d6fb4c9354dcce44588f8223d5ec9eb7a8a70bd4904a67b0b59c34196194db0b
7
+ data.tar.gz: dfdc907ba268b0e509256000a0bd36adbcafe70a1490066acc91deef0255bc13cfe74dad3ddc31352ab6d6eff80fde6a8c3669924c9cb58febd8684a902c06a7
data/.rspec_status CHANGED
@@ -1,56 +1,57 @@
1
1
  example_id | status | run_time |
2
2
  ---------------------------------- | ------ | --------------- |
3
- ./spec/cite-as_spec.rb[1:1:1] | passed | 1.66 seconds |
4
- ./spec/cite-as_spec.rb[1:1:2] | passed | 1.13 seconds |
5
- ./spec/cite-as_spec.rb[1:1:3] | passed | 1.08 seconds |
6
- ./spec/cite-as_spec.rb[1:1:4] | passed | 1.68 seconds |
7
- ./spec/cite-as_spec.rb[1:1:5] | passed | 2.86 seconds |
8
- ./spec/cite-as_spec.rb[1:1:6] | passed | 2.11 seconds |
9
- ./spec/cite-as_spec.rb[1:1:7] | passed | 3.07 seconds |
10
- ./spec/cite-as_spec.rb[1:1:8] | passed | 2.13 seconds |
11
- ./spec/cite-as_spec.rb[1:1:9] | passed | 2.73 seconds |
12
- ./spec/cite-as_spec.rb[1:1:10] | passed | 2.64 seconds |
13
- ./spec/cite-as_spec.rb[1:1:11] | passed | 3.36 seconds |
14
- ./spec/cite-as_spec.rb[1:1:12] | passed | 2.26 seconds |
15
- ./spec/cite-as_spec.rb[1:1:13] | passed | 2.9 seconds |
16
- ./spec/cite-as_spec.rb[1:1:14] | passed | 2.31 seconds |
17
- ./spec/cite-as_spec.rb[1:1:15] | passed | 1.47 seconds |
18
- ./spec/cite-as_spec.rb[1:1:16] | passed | 1.22 seconds |
19
- ./spec/cite-as_spec.rb[1:1:17] | passed | 1.23 seconds |
20
- ./spec/cite-as_spec.rb[1:1:18] | passed | 1.2 seconds |
21
- ./spec/cite-as_spec.rb[1:1:19] | passed | 1.71 seconds |
22
- ./spec/cite-as_spec.rb[1:1:20] | passed | 1.66 seconds |
23
- ./spec/cite-as_spec.rb[1:1:21] | passed | 2.5 seconds |
24
- ./spec/cite-as_spec.rb[1:1:22] | passed | 1.54 seconds |
25
- ./spec/cite-as_spec.rb[1:1:23] | passed | 1.25 seconds |
26
- ./spec/cite-as_spec.rb[1:1:24] | failed | 1.35 seconds |
27
- ./spec/cite-as_spec.rb[1:1:25] | passed | 0.50811 seconds |
28
- ./spec/describedby_spec.rb[1:1:1] | passed | 3.45 seconds |
29
- ./spec/describedby_spec.rb[1:1:2] | passed | 1.3 seconds |
3
+ ./spec/cite-as_spec.rb[1:1:1] | passed | 1.36 seconds |
4
+ ./spec/cite-as_spec.rb[1:1:2] | passed | 1.31 seconds |
5
+ ./spec/cite-as_spec.rb[1:1:3] | passed | 1.53 seconds |
6
+ ./spec/cite-as_spec.rb[1:1:4] | passed | 1.84 seconds |
7
+ ./spec/cite-as_spec.rb[1:1:5] | passed | 2.77 seconds |
8
+ ./spec/cite-as_spec.rb[1:1:6] | passed | 2.06 seconds |
9
+ ./spec/cite-as_spec.rb[1:1:7] | passed | 2.96 seconds |
10
+ ./spec/cite-as_spec.rb[1:1:8] | passed | 2.28 seconds |
11
+ ./spec/cite-as_spec.rb[1:1:9] | passed | 2.83 seconds |
12
+ ./spec/cite-as_spec.rb[1:1:10] | passed | 2.14 seconds |
13
+ ./spec/cite-as_spec.rb[1:1:11] | passed | 3.19 seconds |
14
+ ./spec/cite-as_spec.rb[1:1:12] | passed | 3.06 seconds |
15
+ ./spec/cite-as_spec.rb[1:1:13] | passed | 2.77 seconds |
16
+ ./spec/cite-as_spec.rb[1:1:14] | passed | 2.2 seconds |
17
+ ./spec/cite-as_spec.rb[1:1:15] | passed | 1.19 seconds |
18
+ ./spec/cite-as_spec.rb[1:1:16] | passed | 1.1 seconds |
19
+ ./spec/cite-as_spec.rb[1:1:17] | passed | 1.31 seconds |
20
+ ./spec/cite-as_spec.rb[1:1:18] | passed | 1.14 seconds |
21
+ ./spec/cite-as_spec.rb[1:1:19] | passed | 1.68 seconds |
22
+ ./spec/cite-as_spec.rb[1:1:20] | passed | 1.69 seconds |
23
+ ./spec/cite-as_spec.rb[1:1:21] | passed | 2.35 seconds |
24
+ ./spec/cite-as_spec.rb[1:1:22] | passed | 1.12 seconds |
25
+ ./spec/cite-as_spec.rb[1:1:23] | passed | 1.16 seconds |
26
+ ./spec/cite-as_spec.rb[1:1:24] | failed | 1.45 seconds |
27
+ ./spec/cite-as_spec.rb[1:1:25] | passed | 0.72571 seconds |
28
+ ./spec/describedby_spec.rb[1:1:1] | passed | 3.09 seconds |
29
+ ./spec/describedby_spec.rb[1:1:2] | passed | 1.13 seconds |
30
30
  ./spec/describedby_spec.rb[1:1:3] | passed | 1.22 seconds |
31
- ./spec/describedby_spec.rb[1:1:4] | passed | 1.22 seconds |
32
- ./spec/describedby_spec.rb[1:1:5] | passed | 1.15 seconds |
33
- ./spec/describedby_spec.rb[1:1:6] | passed | 1.04 seconds |
34
- ./spec/describedby_spec.rb[1:1:7] | passed | 1.12 seconds |
35
- ./spec/describedby_spec.rb[1:1:8] | passed | 2.44 seconds |
36
- ./spec/describedby_spec.rb[1:1:9] | passed | 2.15 seconds |
37
- ./spec/describedby_spec.rb[1:1:10] | passed | 2.19 seconds |
38
- ./spec/describedby_spec.rb[1:1:11] | passed | 2.98 seconds |
39
- ./spec/describedby_spec.rb[1:1:12] | passed | 2.87 seconds |
40
- ./spec/describedby_spec.rb[1:1:13] | passed | 1.74 seconds |
41
- ./spec/describedby_spec.rb[1:1:14] | passed | 2.27 seconds |
42
- ./spec/describedby_spec.rb[1:1:15] | passed | 2.28 seconds |
43
- ./spec/fsp_harvester_spec.rb[1:1] | passed | 0.00058 seconds |
44
- ./spec/fsp_harvester_spec.rb[1:2] | failed | 2.92 seconds |
45
- ./spec/fsp_harvester_spec.rb[1:3] | passed | 6.87 seconds |
46
- ./spec/item_spec.rb[1:1:1] | passed | 3.19 seconds |
47
- ./spec/item_spec.rb[1:1:2] | passed | 2.81 seconds |
48
- ./spec/item_spec.rb[1:1:3] | passed | 1.27 seconds |
49
- ./spec/item_spec.rb[1:1:4] | passed | 1.76 seconds |
50
- ./spec/item_spec.rb[1:1:5] | passed | 2.43 seconds |
51
- ./spec/item_spec.rb[1:1:6] | passed | 2.23 seconds |
52
- ./spec/item_spec.rb[1:1:7] | passed | 2.94 seconds |
53
- ./spec/item_spec.rb[1:1:8] | passed | 0.52517 seconds |
54
- ./spec/type_spec.rb[1:1:1] | passed | 1.35 seconds |
55
- ./spec/type_spec.rb[1:1:2] | passed | 1.32 seconds |
56
- ./spec/type_spec.rb[1:1:3] | passed | 1.54 seconds |
31
+ ./spec/describedby_spec.rb[1:1:4] | passed | 1.11 seconds |
32
+ ./spec/describedby_spec.rb[1:1:5] | passed | 1.21 seconds |
33
+ ./spec/describedby_spec.rb[1:1:6] | passed | 1.24 seconds |
34
+ ./spec/describedby_spec.rb[1:1:7] | passed | 1.53 seconds |
35
+ ./spec/describedby_spec.rb[1:1:8] | passed | 2.53 seconds |
36
+ ./spec/describedby_spec.rb[1:1:9] | passed | 1.74 seconds |
37
+ ./spec/describedby_spec.rb[1:1:10] | passed | 2.59 seconds |
38
+ ./spec/describedby_spec.rb[1:1:11] | passed | 3.49 seconds |
39
+ ./spec/describedby_spec.rb[1:1:12] | passed | 3.82 seconds |
40
+ ./spec/describedby_spec.rb[1:1:13] | passed | 1.65 seconds |
41
+ ./spec/describedby_spec.rb[1:1:14] | passed | 2.19 seconds |
42
+ ./spec/describedby_spec.rb[1:1:15] | passed | 2.16 seconds |
43
+ ./spec/fsp_harvester_spec.rb[1:1] | passed | 0.00015 seconds |
44
+ ./spec/fsp_harvester_spec.rb[1:2] | passed | 2.49 seconds |
45
+ ./spec/fsp_harvester_spec.rb[1:3] | passed | 7.06 seconds |
46
+ ./spec/fsp_harvester_spec.rb[1:4] | passed | 2.74 seconds |
47
+ ./spec/item_spec.rb[1:1:1] | passed | 3.41 seconds |
48
+ ./spec/item_spec.rb[1:1:2] | passed | 2.84 seconds |
49
+ ./spec/item_spec.rb[1:1:3] | passed | 1.15 seconds |
50
+ ./spec/item_spec.rb[1:1:4] | passed | 1.74 seconds |
51
+ ./spec/item_spec.rb[1:1:5] | passed | 2.6 seconds |
52
+ ./spec/item_spec.rb[1:1:6] | passed | 2.32 seconds |
53
+ ./spec/item_spec.rb[1:1:7] | passed | 2.81 seconds |
54
+ ./spec/item_spec.rb[1:1:8] | passed | 0.49717 seconds |
55
+ ./spec/type_spec.rb[1:1:1] | passed | 1.25 seconds |
56
+ ./spec/type_spec.rb[1:1:2] | passed | 1.18 seconds |
57
+ ./spec/type_spec.rb[1:1:3] | passed | 1.58 seconds |
data/Gemfile.lock CHANGED
@@ -1,10 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- fsp_harvester (0.1.16)
4
+ fsp_harvester (0.1.19)
5
5
  json (~> 2.0)
6
6
  linkeddata (~> 3.2)
7
- linkheaders-processor (~> 0.1.17)
7
+ linkheaders-processor (~> 0.1.18)
8
8
  metainspector (~> 5.11.2)
9
9
  parseconfig (~> 1.1)
10
10
  rake (~> 13.0)
@@ -127,7 +127,7 @@ GEM
127
127
  sparql (~> 3.2, >= 3.2.4)
128
128
  sparql-client (~> 3.2, >= 3.2.1)
129
129
  yaml-ld (~> 0.0)
130
- linkheaders-processor (0.1.17)
130
+ linkheaders-processor (0.1.18)
131
131
  json (~> 2.0)
132
132
  json-ld (~> 3.2)
133
133
  json-ld-preloaded (~> 3.2)
@@ -252,14 +252,14 @@ GEM
252
252
  diff-lcs (>= 1.2.0, < 2.0)
253
253
  rspec-support (~> 3.11.0)
254
254
  rspec-support (3.11.0)
255
- rubocop (1.34.1)
255
+ rubocop (1.35.0)
256
256
  json (~> 2.3)
257
257
  parallel (~> 1.10)
258
258
  parser (>= 3.1.2.1)
259
259
  rainbow (>= 2.2.2, < 4.0)
260
260
  regexp_parser (>= 1.8, < 3.0)
261
261
  rexml (>= 3.2.5, < 4.0)
262
- rubocop-ast (>= 1.20.0, < 2.0)
262
+ rubocop-ast (>= 1.20.1, < 2.0)
263
263
  ruby-progressbar (~> 1.7)
264
264
  unicode-display_width (>= 1.4.0, < 3.0)
265
265
  rubocop-ast (1.21.0)
data/README.md CHANGED
@@ -20,6 +20,39 @@ Or install it yourself as:
20
20
 
21
21
  ## Usage
22
22
 
23
+ ```
24
+ require 'fsp_harvester'
25
+
26
+ ENV['EXTRUCT_COMMAND'] = "extruct"
27
+ ENV['RDF_COMMAND'] = "/home/user/.rvm/gems/ruby-3.0.0/bin/rdf" # kelloggs distiller
28
+ ENV['TIKA_COMMAND'] = "http://localhost:9998/meta" # assumes using the docker version of tika
29
+
30
+ # to only follow the FAIR signposting specification:
31
+ links, metadata = HarvesterTools::Utils.resolve_guid(guid: guid)
32
+
33
+ links.each do |link|
34
+ puts link.href
35
+ puts link.relation
36
+ end
37
+
38
+ # note, you don't need to catch the return value here. The metadata object that is passed in will be modified
39
+ metadata = FspHarvester::Utils.gather_metadata_from_describedby_links(links: links, metadata: metadata)
40
+
41
+ linkeddata = metadata.graph
42
+ hashdata = metadata.hash
43
+ comments = metadata.comments
44
+ warnings = metadata.warnings
45
+
46
+ # if you want to try other things like content negotiation and "scraping" from HTML, do this:
47
+ # note, you don't need to catch the return value here. The metadata object that is passed in will be modified
48
+ metadata = HarvesterTools::BruteForce.begin_brute_force(guid: guid, metadata: metadata)
49
+
50
+ linkeddata = metadata.graph
51
+ hashdata = metadata.hash
52
+ comments = metadata.comments
53
+ warnings = metadata.warnings
54
+
55
+ ```
23
56
 
24
57
 
25
58
  ## Development
data/lib/constants.rb CHANGED
@@ -78,31 +78,31 @@ GUID_TYPES = {
78
78
  'ark' => Regexp.new(%r{^ark:/[^\s]+$})
79
79
  }
80
80
 
81
- CONFIG = File.exist?('config.conf') ? ParseConfig.new('config.conf') : {}
82
- extruct = CONFIG.dig(:extruct, :command)
83
- extruct ||= 'extruct'
81
+ # CONFIG = File.exist?('config.conf') ? ParseConfig.new('config.conf') : {}
82
+ # extruct = CONFIG.dig(:extruct, :command)
83
+ extruct = ENV['EXTRUCT_COMMAND'] || 'extruct'
84
84
  extruct.strip!
85
85
  case extruct
86
86
  when /[&|;`$\s]/
87
- abort 'The Extruct command in the config file appears to be subject to command injection. I will not continue'
87
+ abort 'The Extruct command appears to be subject to command injection. I will not continue'
88
88
  when /echo/i
89
- abort 'The Extruct command in the config file appears to be subject to command injection. I will not continue'
89
+ abort 'The Extruct command appears to be subject to command injection. I will not continue'
90
90
  end
91
91
  EXTRUCT_COMMAND = extruct
92
92
 
93
- rdf_command = CONFIG.dig(:rdf, :command)
94
- rdf_command ||= 'rdf'
93
+ # rdf_command = CONFIG.dig(:rdf, :command)
94
+ rdf_command = ENV['RDF_COMMAND'] || 'rdf'
95
95
  rdf_command.strip
96
96
  case rdf_command
97
97
  when /[&|;`$\s]/
98
- abort 'The RDF command in the config file appears to be subject to command injection. I will not continue'
98
+ abort 'The RDF command appears to be subject to command injection. I will not continue'
99
99
  when /echo/i
100
- abort 'The RDF command in the config file appears to be subject to command injection. I will not continue'
100
+ abort 'The RDF command appears to be subject to command injection. I will not continue'
101
101
  when !(/rdf$/ =~ $_)
102
102
  abort "this software requires that Kelloggs Distiller tool is used. The distiller command must end in 'rdf'"
103
103
  end
104
104
  RDF_COMMAND = rdf_command
105
105
 
106
- tika_command = CONFIG.dig(:tika, :command)
107
- tika_command ||= 'http://localhost:9998/meta'
106
+ # tika_command = CONFIG.dig(:tika, :command)
107
+ tika_command = ENV['TIKA_COMMAND'] || 'http://localhost:9998/meta'
108
108
  TIKA_COMMAND = tika_command
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module FspHarvester
4
- VERSION = "0.1.16"
4
+ VERSION = "0.1.19"
5
5
  end
@@ -4,14 +4,14 @@ module HarvesterTools
4
4
 
5
5
  class Utils
6
6
 
7
- def self.resolve_guid(guid:)
8
- @meta = HarvesterTools::MetadataObject.new
7
+ def self.resolve_guid(guid:, metadata: HarvesterTools::MetadataObject.new)
8
+ @meta = metadata
9
9
  @meta.all_uris = [guid]
10
10
  type, url = convertToURL(guid: guid)
11
11
  links = Array.new
12
12
  if type
13
13
  links = resolve_url(url: url, metadata: @meta)
14
- @meta.links = @meta.links | links
14
+ @meta.links = @meta.links.append(*links)
15
15
  else
16
16
  @meta.add_warning(['006', guid, ''])
17
17
  @meta.comments << "FATAL: GUID type not recognized.\n"
@@ -1,15 +1,21 @@
1
1
  module HarvesterTools
2
2
  class MetadataObject
3
- attr_accessor :hash, :graph, :comments, :links, :warnings, :guidtype, :full_response, :all_uris # a hash of metadata # a RDF.rb graph of metadata # an array of comments # the type of GUID that was detected # will be an array of Net::HTTP::Response
3
+ attr_accessor :id, :hash, :graph, :comments, :links, :warnings, :guidtype, :full_response, :all_uris, :tested_guid, :score, :version, :date # a hash of metadata # a RDF.rb graph of metadata # an array of comments # the type of GUID that was detected # will be an array of Net::HTTP::Response
4
4
 
5
- def initialize(_params = {}) # get a name from the "new" call, or set a default
5
+ def initialize(id: "unidentified_metadata") # get a name from the "new" call, or set a default
6
+ @id = id
6
7
  @hash = {}
7
8
  @graph = RDF::Graph.new
8
9
  @comments = []
9
10
  @warnings = []
10
11
  @full_response = []
11
12
  @links = []
13
+ @guidtype = ""
12
14
  @all_uris = []
15
+ @tested_guid = ""
16
+ @score = 0
17
+ @version = '0.0'
18
+ @date = Time.now.strftime('%Y-%m-%dT%H:%M:%S.%L%z')
13
19
  w = RestClient.get("https://raw.githubusercontent.com/markwilkinson/FAIR-Signposting-Harvester/master/lib/warnings.json")
14
20
  #@warn = File.read("./lib/warnings.json")
15
21
  @warn = JSON.parse(w)
File without changes
data/lib/warnings.json CHANGED
@@ -115,5 +115,12 @@
115
115
  {"Documentation": "http://dx.doi.org/10.17487/RFC8259"},
116
116
  {"Validator": "https://jsononline.net/json-validator"}],
117
117
  "severity": "WARN"
118
+ },
119
+ "600": {
120
+ "message": "Data identifier cannot be unambiguously determined, therefore cannot be tested against known persistent identifier schemas",
121
+ "linkout": [],
122
+ "severity": "FAILURE"
118
123
  }
124
+
125
+
119
126
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fsp_harvester
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.16
4
+ version: 0.1.19
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mark Wilkinson
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-08-12 00:00:00.000000000 Z
11
+ date: 2022-08-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json
@@ -44,14 +44,14 @@ dependencies:
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: 0.1.17
47
+ version: 0.1.18
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: 0.1.17
54
+ version: 0.1.18
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: metainspector
57
57
  requirement: !ruby/object:Gem::Requirement
@@ -182,9 +182,6 @@ files:
182
182
  - bin/setup
183
183
  - example_test.rb
184
184
  - launch.json
185
- - lib/config.conf
186
- - lib/config.conf_docker
187
- - lib/config.conf_local
188
185
  - lib/constants.rb
189
186
  - lib/external_tools.rb
190
187
  - lib/fsp_harvester.rb
@@ -195,6 +192,9 @@ files:
195
192
  - lib/metadata_harvester.rb
196
193
  - lib/metadata_object.rb
197
194
  - lib/metadata_parser.rb
195
+ - lib/obselete_config.conf
196
+ - lib/obselete_config.conf_docker
197
+ - lib/obselete_config.conf_local
198
198
  - lib/signposting_tests.rb
199
199
  - lib/warnings.json
200
200
  - lib/web_utils.rb