fsp_harvester 0.1.16 → 0.1.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec_status +3 -3
- data/Gemfile.lock +1 -1
- data/README.md +33 -0
- data/lib/constants.rb +11 -11
- data/lib/fsp_harvester/version.rb +1 -1
- data/lib/harvester_utils.rb +2 -2
- data/lib/metadata_object.rb +3 -1
- data/lib/{config.conf → obselete_config.conf} +0 -0
- data/lib/{config.conf_docker → obselete_config.conf_docker} +0 -0
- data/lib/{config.conf_local → obselete_config.conf_local} +0 -0
- data/lib/warnings.json +7 -0
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c3f2b3409b575db21edc69a7e0e4bfbf5be09734fbcc4f4b0d5accb5fedad6c2
|
4
|
+
data.tar.gz: 27813b4e090515a869d5fbc519a717eb06e7bc0559e42d33d1284093758f229f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 33aeec82ef754f219db35eba08e68e35f0aecc570b2769f943f073cf1c4b5a9cdfed912c7fc40261797e4d3b00d7f668228aa5cd442eec213c272455aab1a275
|
7
|
+
data.tar.gz: 80f03c769794a7bf8054d95c667274a3a3ddde26242b0969e15511f78b74cdd5d20508ed7c36e47a446012ff021f0f3ad5a9aa4a44dc91d07c8c86d0da35c59e
|
data/.rspec_status
CHANGED
@@ -40,9 +40,9 @@ example_id | status | run_time |
|
|
40
40
|
./spec/describedby_spec.rb[1:1:13] | passed | 1.74 seconds |
|
41
41
|
./spec/describedby_spec.rb[1:1:14] | passed | 2.27 seconds |
|
42
42
|
./spec/describedby_spec.rb[1:1:15] | passed | 2.28 seconds |
|
43
|
-
./spec/fsp_harvester_spec.rb[1:1] | passed | 0.
|
44
|
-
./spec/fsp_harvester_spec.rb[1:2] |
|
45
|
-
./spec/fsp_harvester_spec.rb[1:3] | passed |
|
43
|
+
./spec/fsp_harvester_spec.rb[1:1] | passed | 0.00693 seconds |
|
44
|
+
./spec/fsp_harvester_spec.rb[1:2] | passed | 3.65 seconds |
|
45
|
+
./spec/fsp_harvester_spec.rb[1:3] | passed | 9.96 seconds |
|
46
46
|
./spec/item_spec.rb[1:1:1] | passed | 3.19 seconds |
|
47
47
|
./spec/item_spec.rb[1:1:2] | passed | 2.81 seconds |
|
48
48
|
./spec/item_spec.rb[1:1:3] | passed | 1.27 seconds |
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -20,6 +20,39 @@ Or install it yourself as:
|
|
20
20
|
|
21
21
|
## Usage
|
22
22
|
|
23
|
+
```
|
24
|
+
require 'fsp_harvester'
|
25
|
+
|
26
|
+
ENV['EXTRUCT_COMMAND'] = "extruct"
|
27
|
+
ENV['RDF_COMMAND'] = "/home/user/.rvm/gems/ruby-3.0.0/bin/rdf" # kelloggs distiller
|
28
|
+
ENV['TIKA_COMMAND'] = "http://localhost:9998/meta" # assumes using the docker version of tika
|
29
|
+
|
30
|
+
# to only follow the FAIR signposting specification:
|
31
|
+
links, metadata = HarvesterTools::Utils.resolve_guid(guid: guid)
|
32
|
+
|
33
|
+
links.each do |link|
|
34
|
+
puts link.href
|
35
|
+
puts link.relation
|
36
|
+
end
|
37
|
+
|
38
|
+
# note, you don't need to catch the return value here. The metadata object that is passed in will be modified
|
39
|
+
metadata = FspHarvester::Utils.gather_metadata_from_describedby_links(links: links, metadata: metadata)
|
40
|
+
|
41
|
+
linkeddata = metadata.graph
|
42
|
+
hashdata = metadata.hash
|
43
|
+
comments = metadata.comments
|
44
|
+
warnings = metadata.warnings
|
45
|
+
|
46
|
+
# if you want to try other things like content negotiation and "scraping" from HTML, do this:
|
47
|
+
# note, you don't need to catch the return value here. The metadata object that is passed in will be modified
|
48
|
+
metadata = HarvesterTools::BruteForce.begin_brute_force(guid: guid, metadata: metadata)
|
49
|
+
|
50
|
+
linkeddata = metadata.graph
|
51
|
+
hashdata = metadata.hash
|
52
|
+
comments = metadata.comments
|
53
|
+
warnings = metadata.warnings
|
54
|
+
|
55
|
+
```
|
23
56
|
|
24
57
|
|
25
58
|
## Development
|
data/lib/constants.rb
CHANGED
@@ -78,31 +78,31 @@ GUID_TYPES = {
|
|
78
78
|
'ark' => Regexp.new(%r{^ark:/[^\s]+$})
|
79
79
|
}
|
80
80
|
|
81
|
-
CONFIG = File.exist?('config.conf') ? ParseConfig.new('config.conf') : {}
|
82
|
-
extruct = CONFIG.dig(:extruct, :command)
|
83
|
-
extruct
|
81
|
+
# CONFIG = File.exist?('config.conf') ? ParseConfig.new('config.conf') : {}
|
82
|
+
# extruct = CONFIG.dig(:extruct, :command)
|
83
|
+
extruct = ENV['EXTRUCT_COMMAND'] || 'extruct'
|
84
84
|
extruct.strip!
|
85
85
|
case extruct
|
86
86
|
when /[&|;`$\s]/
|
87
|
-
abort 'The Extruct command
|
87
|
+
abort 'The Extruct command appears to be subject to command injection. I will not continue'
|
88
88
|
when /echo/i
|
89
|
-
abort 'The Extruct command
|
89
|
+
abort 'The Extruct command appears to be subject to command injection. I will not continue'
|
90
90
|
end
|
91
91
|
EXTRUCT_COMMAND = extruct
|
92
92
|
|
93
|
-
rdf_command = CONFIG.dig(:rdf, :command)
|
94
|
-
rdf_command
|
93
|
+
# rdf_command = CONFIG.dig(:rdf, :command)
|
94
|
+
rdf_command = ENV['RDF_COMMAND'] || 'rdf'
|
95
95
|
rdf_command.strip
|
96
96
|
case rdf_command
|
97
97
|
when /[&|;`$\s]/
|
98
|
-
abort 'The RDF command
|
98
|
+
abort 'The RDF command appears to be subject to command injection. I will not continue'
|
99
99
|
when /echo/i
|
100
|
-
abort 'The RDF command
|
100
|
+
abort 'The RDF command appears to be subject to command injection. I will not continue'
|
101
101
|
when !(/rdf$/ =~ $_)
|
102
102
|
abort "this software requires that Kelloggs Distiller tool is used. The distiller command must end in 'rdf'"
|
103
103
|
end
|
104
104
|
RDF_COMMAND = rdf_command
|
105
105
|
|
106
|
-
tika_command = CONFIG.dig(:tika, :command)
|
107
|
-
tika_command
|
106
|
+
# tika_command = CONFIG.dig(:tika, :command)
|
107
|
+
tika_command = ENV['TIKA_COMMAND'] || 'http://localhost:9998/meta'
|
108
108
|
TIKA_COMMAND = tika_command
|
data/lib/harvester_utils.rb
CHANGED
@@ -4,8 +4,8 @@ module HarvesterTools
|
|
4
4
|
|
5
5
|
class Utils
|
6
6
|
|
7
|
-
def self.resolve_guid(guid:)
|
8
|
-
@meta =
|
7
|
+
def self.resolve_guid(guid:, metadata: HarvesterTools::MetadataObject.new)
|
8
|
+
@meta = metadata
|
9
9
|
@meta.all_uris = [guid]
|
10
10
|
type, url = convertToURL(guid: guid)
|
11
11
|
links = Array.new
|
data/lib/metadata_object.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
module HarvesterTools
|
2
2
|
class MetadataObject
|
3
|
-
attr_accessor :hash, :graph, :comments, :links, :warnings, :guidtype, :full_response, :all_uris # a hash of metadata # a RDF.rb graph of metadata # an array of comments # the type of GUID that was detected # will be an array of Net::HTTP::Response
|
3
|
+
attr_accessor :hash, :graph, :comments, :links, :warnings, :guidtype, :full_response, :all_uris, :guid, :score # a hash of metadata # a RDF.rb graph of metadata # an array of comments # the type of GUID that was detected # will be an array of Net::HTTP::Response
|
4
4
|
|
5
5
|
def initialize(_params = {}) # get a name from the "new" call, or set a default
|
6
6
|
@hash = {}
|
@@ -10,6 +10,8 @@ module HarvesterTools
|
|
10
10
|
@full_response = []
|
11
11
|
@links = []
|
12
12
|
@all_uris = []
|
13
|
+
@guid = ""
|
14
|
+
@score = 0
|
13
15
|
w = RestClient.get("https://raw.githubusercontent.com/markwilkinson/FAIR-Signposting-Harvester/master/lib/warnings.json")
|
14
16
|
#@warn = File.read("./lib/warnings.json")
|
15
17
|
@warn = JSON.parse(w)
|
File without changes
|
File without changes
|
File without changes
|
data/lib/warnings.json
CHANGED
@@ -115,5 +115,12 @@
|
|
115
115
|
{"Documentation": "http://dx.doi.org/10.17487/RFC8259"},
|
116
116
|
{"Validator": "https://jsononline.net/json-validator"}],
|
117
117
|
"severity": "WARN"
|
118
|
+
},
|
119
|
+
"600": {
|
120
|
+
"message": "Data identifier cannot be unambiguously determined, therefore cannot be tested against known persistent identifier schemas",
|
121
|
+
"linkout": [],
|
122
|
+
"severity": "FAILURE"
|
118
123
|
}
|
124
|
+
|
125
|
+
|
119
126
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fsp_harvester
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.17
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mark Wilkinson
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-08-
|
11
|
+
date: 2022-08-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: json
|
@@ -182,9 +182,6 @@ files:
|
|
182
182
|
- bin/setup
|
183
183
|
- example_test.rb
|
184
184
|
- launch.json
|
185
|
-
- lib/config.conf
|
186
|
-
- lib/config.conf_docker
|
187
|
-
- lib/config.conf_local
|
188
185
|
- lib/constants.rb
|
189
186
|
- lib/external_tools.rb
|
190
187
|
- lib/fsp_harvester.rb
|
@@ -195,6 +192,9 @@ files:
|
|
195
192
|
- lib/metadata_harvester.rb
|
196
193
|
- lib/metadata_object.rb
|
197
194
|
- lib/metadata_parser.rb
|
195
|
+
- lib/obselete_config.conf
|
196
|
+
- lib/obselete_config.conf_docker
|
197
|
+
- lib/obselete_config.conf_local
|
198
198
|
- lib/signposting_tests.rb
|
199
199
|
- lib/warnings.json
|
200
200
|
- lib/web_utils.rb
|