fsp_harvester 0.1.16 → 0.1.17

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: aec11fd57963ffb176ddb88338b9e262027c9a7d39364089ae130fb4b628bf5b
4
- data.tar.gz: f8733a00de5c6c24a622235c18ba0dae208f5bac52d50607480e51fd563678c8
3
+ metadata.gz: c3f2b3409b575db21edc69a7e0e4bfbf5be09734fbcc4f4b0d5accb5fedad6c2
4
+ data.tar.gz: 27813b4e090515a869d5fbc519a717eb06e7bc0559e42d33d1284093758f229f
5
5
  SHA512:
6
- metadata.gz: c484e41aa0305f34d0bf7f82cad60b9b02106ffe80b9371c99e77b199eef9ce52818222368b8b3a3ff73d94dba89b8d7fb815d29c95ca335772946e1e9762849
7
- data.tar.gz: '09dfdcc12b9176bc88c31a196893ae9ede6c35c2fd59271ca8fa5b1c29f0807ee82c8416ca5f8b7ff75c6caab71648b6d0e15d0976784cb7d34ff8686332be37'
6
+ metadata.gz: 33aeec82ef754f219db35eba08e68e35f0aecc570b2769f943f073cf1c4b5a9cdfed912c7fc40261797e4d3b00d7f668228aa5cd442eec213c272455aab1a275
7
+ data.tar.gz: 80f03c769794a7bf8054d95c667274a3a3ddde26242b0969e15511f78b74cdd5d20508ed7c36e47a446012ff021f0f3ad5a9aa4a44dc91d07c8c86d0da35c59e
data/.rspec_status CHANGED
@@ -40,9 +40,9 @@ example_id | status | run_time |
40
40
  ./spec/describedby_spec.rb[1:1:13] | passed | 1.74 seconds |
41
41
  ./spec/describedby_spec.rb[1:1:14] | passed | 2.27 seconds |
42
42
  ./spec/describedby_spec.rb[1:1:15] | passed | 2.28 seconds |
43
- ./spec/fsp_harvester_spec.rb[1:1] | passed | 0.00058 seconds |
44
- ./spec/fsp_harvester_spec.rb[1:2] | failed | 2.92 seconds |
45
- ./spec/fsp_harvester_spec.rb[1:3] | passed | 6.87 seconds |
43
+ ./spec/fsp_harvester_spec.rb[1:1] | passed | 0.00693 seconds |
44
+ ./spec/fsp_harvester_spec.rb[1:2] | passed | 3.65 seconds |
45
+ ./spec/fsp_harvester_spec.rb[1:3] | passed | 9.96 seconds |
46
46
  ./spec/item_spec.rb[1:1:1] | passed | 3.19 seconds |
47
47
  ./spec/item_spec.rb[1:1:2] | passed | 2.81 seconds |
48
48
  ./spec/item_spec.rb[1:1:3] | passed | 1.27 seconds |
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- fsp_harvester (0.1.16)
4
+ fsp_harvester (0.1.17)
5
5
  json (~> 2.0)
6
6
  linkeddata (~> 3.2)
7
7
  linkheaders-processor (~> 0.1.17)
data/README.md CHANGED
@@ -20,6 +20,39 @@ Or install it yourself as:
20
20
 
21
21
  ## Usage
22
22
 
23
+ ```
24
+ require 'fsp_harvester'
25
+
26
+ ENV['EXTRUCT_COMMAND'] = "extruct"
27
+ ENV['RDF_COMMAND'] = "/home/user/.rvm/gems/ruby-3.0.0/bin/rdf" # kelloggs distiller
28
+ ENV['TIKA_COMMAND'] = "http://localhost:9998/meta" # assumes using the docker version of tika
29
+
30
+ # to only follow the FAIR signposting specification:
31
+ links, metadata = HarvesterTools::Utils.resolve_guid(guid: guid)
32
+
33
+ links.each do |link|
34
+ puts link.href
35
+ puts link.relation
36
+ end
37
+
38
+ # note, you don't need to catch the return value here. The metadata object that is passed in will be modified
39
+ metadata = FspHarvester::Utils.gather_metadata_from_describedby_links(links: links, metadata: metadata)
40
+
41
+ linkeddata = metadata.graph
42
+ hashdata = metadata.hash
43
+ comments = metadata.comments
44
+ warnings = metadata.warnings
45
+
46
+ # if you want to try other things like content negotiation and "scraping" from HTML, do this:
47
+ # note, you don't need to catch the return value here. The metadata object that is passed in will be modified
48
+ metadata = HarvesterTools::BruteForce.begin_brute_force(guid: guid, metadata: metadata)
49
+
50
+ linkeddata = metadata.graph
51
+ hashdata = metadata.hash
52
+ comments = metadata.comments
53
+ warnings = metadata.warnings
54
+
55
+ ```
23
56
 
24
57
 
25
58
  ## Development
data/lib/constants.rb CHANGED
@@ -78,31 +78,31 @@ GUID_TYPES = {
78
78
  'ark' => Regexp.new(%r{^ark:/[^\s]+$})
79
79
  }
80
80
 
81
- CONFIG = File.exist?('config.conf') ? ParseConfig.new('config.conf') : {}
82
- extruct = CONFIG.dig(:extruct, :command)
83
- extruct ||= 'extruct'
81
+ # CONFIG = File.exist?('config.conf') ? ParseConfig.new('config.conf') : {}
82
+ # extruct = CONFIG.dig(:extruct, :command)
83
+ extruct = ENV['EXTRUCT_COMMAND'] || 'extruct'
84
84
  extruct.strip!
85
85
  case extruct
86
86
  when /[&|;`$\s]/
87
- abort 'The Extruct command in the config file appears to be subject to command injection. I will not continue'
87
+ abort 'The Extruct command appears to be subject to command injection. I will not continue'
88
88
  when /echo/i
89
- abort 'The Extruct command in the config file appears to be subject to command injection. I will not continue'
89
+ abort 'The Extruct command appears to be subject to command injection. I will not continue'
90
90
  end
91
91
  EXTRUCT_COMMAND = extruct
92
92
 
93
- rdf_command = CONFIG.dig(:rdf, :command)
94
- rdf_command ||= 'rdf'
93
+ # rdf_command = CONFIG.dig(:rdf, :command)
94
+ rdf_command = ENV['RDF_COMMAND'] || 'rdf'
95
95
  rdf_command.strip
96
96
  case rdf_command
97
97
  when /[&|;`$\s]/
98
- abort 'The RDF command in the config file appears to be subject to command injection. I will not continue'
98
+ abort 'The RDF command appears to be subject to command injection. I will not continue'
99
99
  when /echo/i
100
- abort 'The RDF command in the config file appears to be subject to command injection. I will not continue'
100
+ abort 'The RDF command appears to be subject to command injection. I will not continue'
101
101
  when !(/rdf$/ =~ $_)
102
102
  abort "this software requires that Kelloggs Distiller tool is used. The distiller command must end in 'rdf'"
103
103
  end
104
104
  RDF_COMMAND = rdf_command
105
105
 
106
- tika_command = CONFIG.dig(:tika, :command)
107
- tika_command ||= 'http://localhost:9998/meta'
106
+ # tika_command = CONFIG.dig(:tika, :command)
107
+ tika_command = ENV['TIKA_COMMAND'] || 'http://localhost:9998/meta'
108
108
  TIKA_COMMAND = tika_command
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module FspHarvester
4
- VERSION = "0.1.16"
4
+ VERSION = "0.1.17"
5
5
  end
@@ -4,8 +4,8 @@ module HarvesterTools
4
4
 
5
5
  class Utils
6
6
 
7
- def self.resolve_guid(guid:)
8
- @meta = HarvesterTools::MetadataObject.new
7
+ def self.resolve_guid(guid:, metadata: HarvesterTools::MetadataObject.new)
8
+ @meta = metadata
9
9
  @meta.all_uris = [guid]
10
10
  type, url = convertToURL(guid: guid)
11
11
  links = Array.new
@@ -1,6 +1,6 @@
1
1
  module HarvesterTools
2
2
  class MetadataObject
3
- attr_accessor :hash, :graph, :comments, :links, :warnings, :guidtype, :full_response, :all_uris # a hash of metadata # a RDF.rb graph of metadata # an array of comments # the type of GUID that was detected # will be an array of Net::HTTP::Response
3
+ attr_accessor :hash, :graph, :comments, :links, :warnings, :guidtype, :full_response, :all_uris, :guid, :score # a hash of metadata # a RDF.rb graph of metadata # an array of comments # the type of GUID that was detected # will be an array of Net::HTTP::Response
4
4
 
5
5
  def initialize(_params = {}) # get a name from the "new" call, or set a default
6
6
  @hash = {}
@@ -10,6 +10,8 @@ module HarvesterTools
10
10
  @full_response = []
11
11
  @links = []
12
12
  @all_uris = []
13
+ @guid = ""
14
+ @score = 0
13
15
  w = RestClient.get("https://raw.githubusercontent.com/markwilkinson/FAIR-Signposting-Harvester/master/lib/warnings.json")
14
16
  #@warn = File.read("./lib/warnings.json")
15
17
  @warn = JSON.parse(w)
File without changes
data/lib/warnings.json CHANGED
@@ -115,5 +115,12 @@
115
115
  {"Documentation": "http://dx.doi.org/10.17487/RFC8259"},
116
116
  {"Validator": "https://jsononline.net/json-validator"}],
117
117
  "severity": "WARN"
118
+ },
119
+ "600": {
120
+ "message": "Data identifier cannot be unambiguously determined, therefore cannot be tested against known persistent identifier schemas",
121
+ "linkout": [],
122
+ "severity": "FAILURE"
118
123
  }
124
+
125
+
119
126
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fsp_harvester
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.16
4
+ version: 0.1.17
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mark Wilkinson
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-08-12 00:00:00.000000000 Z
11
+ date: 2022-08-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json
@@ -182,9 +182,6 @@ files:
182
182
  - bin/setup
183
183
  - example_test.rb
184
184
  - launch.json
185
- - lib/config.conf
186
- - lib/config.conf_docker
187
- - lib/config.conf_local
188
185
  - lib/constants.rb
189
186
  - lib/external_tools.rb
190
187
  - lib/fsp_harvester.rb
@@ -195,6 +192,9 @@ files:
195
192
  - lib/metadata_harvester.rb
196
193
  - lib/metadata_object.rb
197
194
  - lib/metadata_parser.rb
195
+ - lib/obselete_config.conf
196
+ - lib/obselete_config.conf_docker
197
+ - lib/obselete_config.conf_local
198
198
  - lib/signposting_tests.rb
199
199
  - lib/warnings.json
200
200
  - lib/web_utils.rb