fsp_harvester 0.1.15 → 0.1.18

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9e0ffd5048e360ce8e8cced890a586664af797065d2c8d6312927d694835e84b
4
- data.tar.gz: 840269a8b28da70bed8c5e46674ff3730cbee66f624064cab84f98d5b3a2ff00
3
+ metadata.gz: 3f6bed703ae1a03ff30a1abb88e54b033a6ffbd24df5cacc70b2e0662af7e1be
4
+ data.tar.gz: 49370b82123eb0b7b6c92fd603996cec6909becf35fc789885de218aae0fb446
5
5
  SHA512:
6
- metadata.gz: 4c01cc88a8f57e024c7aeed89a8251d97b130bca987dc14d914e87fa87ea744d3de7ab11ca340b0456f295edafdd872d4f63d0f0ef23dbe9c3cc8ebc97a64ae5
7
- data.tar.gz: 2c274758ec874bb1c25ebd5286ecbc2b7e91205430a94cf3ada9c7350511fe362532f2c6d213a6fd6657ccdf7184df9c0eaf9c2461c0d25dc87da00b2aded390
6
+ metadata.gz: 2abbbfba153e08b83e832640942c978f9612437d1ee7fc3891122be96bfd8da442c2460d8a7b6f303ecc469b6de568907ed3e592a359513d8ab23966fad39786
7
+ data.tar.gz: 146004aae9a8495523b2c2578a84a8268b1398f005fdd98788b816f256c98128c34a537e68c078fa681e262bec3a2d0890e9226168b85735c95742f88848a0d0
data/.rspec_status CHANGED
@@ -25,24 +25,25 @@ example_id | status | run_time |
25
25
  ./spec/cite-as_spec.rb[1:1:23] | passed | 1.25 seconds |
26
26
  ./spec/cite-as_spec.rb[1:1:24] | failed | 1.35 seconds |
27
27
  ./spec/cite-as_spec.rb[1:1:25] | passed | 0.50811 seconds |
28
- ./spec/describedby_spec.rb[1:1:1] | passed | 3.45 seconds |
29
- ./spec/describedby_spec.rb[1:1:2] | passed | 1.3 seconds |
30
- ./spec/describedby_spec.rb[1:1:3] | passed | 1.22 seconds |
31
- ./spec/describedby_spec.rb[1:1:4] | passed | 1.22 seconds |
32
- ./spec/describedby_spec.rb[1:1:5] | passed | 1.15 seconds |
33
- ./spec/describedby_spec.rb[1:1:6] | passed | 1.04 seconds |
34
- ./spec/describedby_spec.rb[1:1:7] | passed | 1.12 seconds |
35
- ./spec/describedby_spec.rb[1:1:8] | passed | 2.44 seconds |
36
- ./spec/describedby_spec.rb[1:1:9] | passed | 2.15 seconds |
37
- ./spec/describedby_spec.rb[1:1:10] | passed | 2.19 seconds |
38
- ./spec/describedby_spec.rb[1:1:11] | passed | 2.98 seconds |
39
- ./spec/describedby_spec.rb[1:1:12] | passed | 2.87 seconds |
40
- ./spec/describedby_spec.rb[1:1:13] | passed | 1.74 seconds |
41
- ./spec/describedby_spec.rb[1:1:14] | passed | 2.27 seconds |
42
- ./spec/describedby_spec.rb[1:1:15] | passed | 2.28 seconds |
43
- ./spec/fsp_harvester_spec.rb[1:1] | passed | 0.00058 seconds |
44
- ./spec/fsp_harvester_spec.rb[1:2] | failed | 2.92 seconds |
45
- ./spec/fsp_harvester_spec.rb[1:3] | passed | 6.87 seconds |
28
+ ./spec/describedby_spec.rb[1:1:1] | passed | 4.39 seconds |
29
+ ./spec/describedby_spec.rb[1:1:2] | passed | 1.63 seconds |
30
+ ./spec/describedby_spec.rb[1:1:3] | passed | 1.49 seconds |
31
+ ./spec/describedby_spec.rb[1:1:4] | passed | 1.21 seconds |
32
+ ./spec/describedby_spec.rb[1:1:5] | passed | 1.17 seconds |
33
+ ./spec/describedby_spec.rb[1:1:6] | passed | 1.11 seconds |
34
+ ./spec/describedby_spec.rb[1:1:7] | passed | 1.11 seconds |
35
+ ./spec/describedby_spec.rb[1:1:8] | passed | 2.34 seconds |
36
+ ./spec/describedby_spec.rb[1:1:9] | passed | 2.09 seconds |
37
+ ./spec/describedby_spec.rb[1:1:10] | passed | 2.69 seconds |
38
+ ./spec/describedby_spec.rb[1:1:11] | passed | 3.02 seconds |
39
+ ./spec/describedby_spec.rb[1:1:12] | passed | 2.9 seconds |
40
+ ./spec/describedby_spec.rb[1:1:13] | passed | 2.27 seconds |
41
+ ./spec/describedby_spec.rb[1:1:14] | passed | 2.53 seconds |
42
+ ./spec/describedby_spec.rb[1:1:15] | passed | 2.33 seconds |
43
+ ./spec/fsp_harvester_spec.rb[1:1] | passed | 0.00101 seconds |
44
+ ./spec/fsp_harvester_spec.rb[1:2] | passed | 3.15 seconds |
45
+ ./spec/fsp_harvester_spec.rb[1:3] | passed | 7.1 seconds |
46
+ ./spec/fsp_harvester_spec.rb[1:4] | passed | 2.47 seconds |
46
47
  ./spec/item_spec.rb[1:1:1] | passed | 3.19 seconds |
47
48
  ./spec/item_spec.rb[1:1:2] | passed | 2.81 seconds |
48
49
  ./spec/item_spec.rb[1:1:3] | passed | 1.27 seconds |
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- fsp_harvester (0.1.15)
4
+ fsp_harvester (0.1.18)
5
5
  json (~> 2.0)
6
6
  linkeddata (~> 3.2)
7
7
  linkheaders-processor (~> 0.1.17)
data/README.md CHANGED
@@ -20,6 +20,39 @@ Or install it yourself as:
20
20
 
21
21
  ## Usage
22
22
 
23
+ ```
24
+ require 'fsp_harvester'
25
+
26
+ ENV['EXTRUCT_COMMAND'] = "extruct"
27
+ ENV['RDF_COMMAND'] = "/home/user/.rvm/gems/ruby-3.0.0/bin/rdf" # kelloggs distiller
28
+ ENV['TIKA_COMMAND'] = "http://localhost:9998/meta" # assumes using the docker version of tika
29
+
30
+ # to only follow the FAIR signposting specification:
31
+ links, metadata = HarvesterTools::Utils.resolve_guid(guid: guid)
32
+
33
+ links.each do |link|
34
+ puts link.href
35
+ puts link.relation
36
+ end
37
+
38
+ # note, you don't need to catch the return value here. The metadata object that is passed in will be modified
39
+ metadata = FspHarvester::Utils.gather_metadata_from_describedby_links(links: links, metadata: metadata)
40
+
41
+ linkeddata = metadata.graph
42
+ hashdata = metadata.hash
43
+ comments = metadata.comments
44
+ warnings = metadata.warnings
45
+
46
+ # if you want to try other things like content negotiation and "scraping" from HTML, do this:
47
+ # note, you don't need to catch the return value here. The metadata object that is passed in will be modified
48
+ metadata = HarvesterTools::BruteForce.begin_brute_force(guid: guid, metadata: metadata)
49
+
50
+ linkeddata = metadata.graph
51
+ hashdata = metadata.hash
52
+ comments = metadata.comments
53
+ warnings = metadata.warnings
54
+
55
+ ```
23
56
 
24
57
 
25
58
  ## Development
data/lib/constants.rb CHANGED
@@ -78,31 +78,31 @@ GUID_TYPES = {
78
78
  'ark' => Regexp.new(%r{^ark:/[^\s]+$})
79
79
  }
80
80
 
81
- CONFIG = File.exist?('config.conf') ? ParseConfig.new('config.conf') : {}
82
- extruct = CONFIG.dig(:extruct, :command)
83
- extruct ||= 'extruct'
81
+ # CONFIG = File.exist?('config.conf') ? ParseConfig.new('config.conf') : {}
82
+ # extruct = CONFIG.dig(:extruct, :command)
83
+ extruct = ENV['EXTRUCT_COMMAND'] || 'extruct'
84
84
  extruct.strip!
85
85
  case extruct
86
86
  when /[&|;`$\s]/
87
- abort 'The Extruct command in the config file appears to be subject to command injection. I will not continue'
87
+ abort 'The Extruct command appears to be subject to command injection. I will not continue'
88
88
  when /echo/i
89
- abort 'The Extruct command in the config file appears to be subject to command injection. I will not continue'
89
+ abort 'The Extruct command appears to be subject to command injection. I will not continue'
90
90
  end
91
91
  EXTRUCT_COMMAND = extruct
92
92
 
93
- rdf_command = CONFIG.dig(:rdf, :command)
94
- rdf_command ||= 'rdf'
93
+ # rdf_command = CONFIG.dig(:rdf, :command)
94
+ rdf_command = ENV['RDF_COMMAND'] || 'rdf'
95
95
  rdf_command.strip
96
96
  case rdf_command
97
97
  when /[&|;`$\s]/
98
- abort 'The RDF command in the config file appears to be subject to command injection. I will not continue'
98
+ abort 'The RDF command appears to be subject to command injection. I will not continue'
99
99
  when /echo/i
100
- abort 'The RDF command in the config file appears to be subject to command injection. I will not continue'
100
+ abort 'The RDF command appears to be subject to command injection. I will not continue'
101
101
  when !(/rdf$/ =~ $_)
102
102
  abort "this software requires that Kelloggs Distiller tool is used. The distiller command must end in 'rdf'"
103
103
  end
104
104
  RDF_COMMAND = rdf_command
105
105
 
106
- tika_command = CONFIG.dig(:tika, :command)
107
- tika_command ||= 'http://localhost:9998/meta'
106
+ # tika_command = CONFIG.dig(:tika, :command)
107
+ tika_command = ENV['TIKA_COMMAND'] || 'http://localhost:9998/meta'
108
108
  TIKA_COMMAND = tika_command
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module FspHarvester
4
- VERSION = "0.1.15"
4
+ VERSION = "0.1.18"
5
5
  end
@@ -4,8 +4,8 @@ module HarvesterTools
4
4
 
5
5
  class Utils
6
6
 
7
- def self.resolve_guid(guid:)
8
- @meta = HarvesterTools::MetadataObject.new
7
+ def self.resolve_guid(guid:, metadata: HarvesterTools::MetadataObject.new)
8
+ @meta = metadata
9
9
  @meta.all_uris = [guid]
10
10
  type, url = convertToURL(guid: guid)
11
11
  links = Array.new
@@ -1,8 +1,8 @@
1
1
  module HarvesterTools
2
2
  class MetadataObject
3
- attr_accessor :hash, :graph, :comments, :links, :warnings, :guidtype, :full_response, :all_uris # a hash of metadata # a RDF.rb graph of metadata # an array of comments # the type of GUID that was detected # will be an array of Net::HTTP::Response
3
+ attr_accessor :hash, :graph, :comments, :links, :warnings, :guidtype, :full_response, :all_uris, :guid, :score, :version, :date # a hash of metadata # a RDF.rb graph of metadata # an array of comments # the type of GUID that was detected # will be an array of Net::HTTP::Response
4
4
 
5
- def initialize(_params = {}) # get a name from the "new" call, or set a default
5
+ def initialize() # get a name from the "new" call, or set a default
6
6
  @hash = {}
7
7
  @graph = RDF::Graph.new
8
8
  @comments = []
@@ -10,6 +10,10 @@ module HarvesterTools
10
10
  @full_response = []
11
11
  @links = []
12
12
  @all_uris = []
13
+ @guid = ""
14
+ @score = 0
15
+ @version = '0.0'
16
+ @date = Time.now.strftime('%Y-%m-%dT%H:%M:%S.%L%z')
13
17
  w = RestClient.get("https://raw.githubusercontent.com/markwilkinson/FAIR-Signposting-Harvester/master/lib/warnings.json")
14
18
  #@warn = File.read("./lib/warnings.json")
15
19
  @warn = JSON.parse(w)
File without changes
@@ -2,7 +2,7 @@
2
2
  command="extruct"
3
3
 
4
4
  [rdf]
5
- command="/home/osboxes/.rvm/gems/ruby-3.0.0/bin/rdf"
5
+ command="/usr/local/bundle/bin/rdf"
6
6
 
7
7
  [tika]
8
8
  command="http://tika:9998/meta"
File without changes
data/lib/warnings.json CHANGED
@@ -115,5 +115,12 @@
115
115
  {"Documentation": "http://dx.doi.org/10.17487/RFC8259"},
116
116
  {"Validator": "https://jsononline.net/json-validator"}],
117
117
  "severity": "WARN"
118
+ },
119
+ "600": {
120
+ "message": "Data identifier cannot be unambiguously determined, therefore cannot be tested against known persistent identifier schemas",
121
+ "linkout": [],
122
+ "severity": "FAILURE"
118
123
  }
124
+
125
+
119
126
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fsp_harvester
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.15
4
+ version: 0.1.18
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mark Wilkinson
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-08-12 00:00:00.000000000 Z
11
+ date: 2022-08-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json
@@ -182,9 +182,6 @@ files:
182
182
  - bin/setup
183
183
  - example_test.rb
184
184
  - launch.json
185
- - lib/config.conf
186
- - lib/config.conf_docker
187
- - lib/config.conf_local
188
185
  - lib/constants.rb
189
186
  - lib/external_tools.rb
190
187
  - lib/fsp_harvester.rb
@@ -195,6 +192,9 @@ files:
195
192
  - lib/metadata_harvester.rb
196
193
  - lib/metadata_object.rb
197
194
  - lib/metadata_parser.rb
195
+ - lib/obselete_config.conf
196
+ - lib/obselete_config.conf_docker
197
+ - lib/obselete_config.conf_local
198
198
  - lib/signposting_tests.rb
199
199
  - lib/warnings.json
200
200
  - lib/web_utils.rb