fsp_harvester 0.1.15 → 0.1.18
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rspec_status +19 -18
- data/Gemfile.lock +1 -1
- data/README.md +33 -0
- data/lib/constants.rb +11 -11
- data/lib/fsp_harvester/version.rb +1 -1
- data/lib/harvester_utils.rb +2 -2
- data/lib/metadata_object.rb +6 -2
- data/lib/{config.conf_docker → obselete_config.conf} +0 -0
- data/lib/{config.conf_local → obselete_config.conf_docker} +1 -1
- data/lib/{config.conf → obselete_config.conf_local} +0 -0
- data/lib/warnings.json +7 -0
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3f6bed703ae1a03ff30a1abb88e54b033a6ffbd24df5cacc70b2e0662af7e1be
|
4
|
+
data.tar.gz: 49370b82123eb0b7b6c92fd603996cec6909becf35fc789885de218aae0fb446
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2abbbfba153e08b83e832640942c978f9612437d1ee7fc3891122be96bfd8da442c2460d8a7b6f303ecc469b6de568907ed3e592a359513d8ab23966fad39786
|
7
|
+
data.tar.gz: 146004aae9a8495523b2c2578a84a8268b1398f005fdd98788b816f256c98128c34a537e68c078fa681e262bec3a2d0890e9226168b85735c95742f88848a0d0
|
data/.rspec_status
CHANGED
@@ -25,24 +25,25 @@ example_id | status | run_time |
|
|
25
25
|
./spec/cite-as_spec.rb[1:1:23] | passed | 1.25 seconds |
|
26
26
|
./spec/cite-as_spec.rb[1:1:24] | failed | 1.35 seconds |
|
27
27
|
./spec/cite-as_spec.rb[1:1:25] | passed | 0.50811 seconds |
|
28
|
-
./spec/describedby_spec.rb[1:1:1] | passed |
|
29
|
-
./spec/describedby_spec.rb[1:1:2] | passed | 1.
|
30
|
-
./spec/describedby_spec.rb[1:1:3] | passed | 1.
|
31
|
-
./spec/describedby_spec.rb[1:1:4] | passed | 1.
|
32
|
-
./spec/describedby_spec.rb[1:1:5] | passed | 1.
|
33
|
-
./spec/describedby_spec.rb[1:1:6] | passed | 1.
|
34
|
-
./spec/describedby_spec.rb[1:1:7] | passed | 1.
|
35
|
-
./spec/describedby_spec.rb[1:1:8] | passed | 2.
|
36
|
-
./spec/describedby_spec.rb[1:1:9] | passed | 2.
|
37
|
-
./spec/describedby_spec.rb[1:1:10] | passed | 2.
|
38
|
-
./spec/describedby_spec.rb[1:1:11] | passed |
|
39
|
-
./spec/describedby_spec.rb[1:1:12] | passed | 2.
|
40
|
-
./spec/describedby_spec.rb[1:1:13] | passed |
|
41
|
-
./spec/describedby_spec.rb[1:1:14] | passed | 2.
|
42
|
-
./spec/describedby_spec.rb[1:1:15] | passed | 2.
|
43
|
-
./spec/fsp_harvester_spec.rb[1:1] | passed | 0.
|
44
|
-
./spec/fsp_harvester_spec.rb[1:2] |
|
45
|
-
./spec/fsp_harvester_spec.rb[1:3] | passed |
|
28
|
+
./spec/describedby_spec.rb[1:1:1] | passed | 4.39 seconds |
|
29
|
+
./spec/describedby_spec.rb[1:1:2] | passed | 1.63 seconds |
|
30
|
+
./spec/describedby_spec.rb[1:1:3] | passed | 1.49 seconds |
|
31
|
+
./spec/describedby_spec.rb[1:1:4] | passed | 1.21 seconds |
|
32
|
+
./spec/describedby_spec.rb[1:1:5] | passed | 1.17 seconds |
|
33
|
+
./spec/describedby_spec.rb[1:1:6] | passed | 1.11 seconds |
|
34
|
+
./spec/describedby_spec.rb[1:1:7] | passed | 1.11 seconds |
|
35
|
+
./spec/describedby_spec.rb[1:1:8] | passed | 2.34 seconds |
|
36
|
+
./spec/describedby_spec.rb[1:1:9] | passed | 2.09 seconds |
|
37
|
+
./spec/describedby_spec.rb[1:1:10] | passed | 2.69 seconds |
|
38
|
+
./spec/describedby_spec.rb[1:1:11] | passed | 3.02 seconds |
|
39
|
+
./spec/describedby_spec.rb[1:1:12] | passed | 2.9 seconds |
|
40
|
+
./spec/describedby_spec.rb[1:1:13] | passed | 2.27 seconds |
|
41
|
+
./spec/describedby_spec.rb[1:1:14] | passed | 2.53 seconds |
|
42
|
+
./spec/describedby_spec.rb[1:1:15] | passed | 2.33 seconds |
|
43
|
+
./spec/fsp_harvester_spec.rb[1:1] | passed | 0.00101 seconds |
|
44
|
+
./spec/fsp_harvester_spec.rb[1:2] | passed | 3.15 seconds |
|
45
|
+
./spec/fsp_harvester_spec.rb[1:3] | passed | 7.1 seconds |
|
46
|
+
./spec/fsp_harvester_spec.rb[1:4] | passed | 2.47 seconds |
|
46
47
|
./spec/item_spec.rb[1:1:1] | passed | 3.19 seconds |
|
47
48
|
./spec/item_spec.rb[1:1:2] | passed | 2.81 seconds |
|
48
49
|
./spec/item_spec.rb[1:1:3] | passed | 1.27 seconds |
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -20,6 +20,39 @@ Or install it yourself as:
|
|
20
20
|
|
21
21
|
## Usage
|
22
22
|
|
23
|
+
```
|
24
|
+
require 'fsp_harvester'
|
25
|
+
|
26
|
+
ENV['EXTRUCT_COMMAND'] = "extruct"
|
27
|
+
ENV['RDF_COMMAND'] = "/home/user/.rvm/gems/ruby-3.0.0/bin/rdf" # kelloggs distiller
|
28
|
+
ENV['TIKA_COMMAND'] = "http://localhost:9998/meta" # assumes using the docker version of tika
|
29
|
+
|
30
|
+
# to only follow the FAIR signposting specification:
|
31
|
+
links, metadata = HarvesterTools::Utils.resolve_guid(guid: guid)
|
32
|
+
|
33
|
+
links.each do |link|
|
34
|
+
puts link.href
|
35
|
+
puts link.relation
|
36
|
+
end
|
37
|
+
|
38
|
+
# note, you don't need to catch the return value here. The metadata object that is passed in will be modified
|
39
|
+
metadata = FspHarvester::Utils.gather_metadata_from_describedby_links(links: links, metadata: metadata)
|
40
|
+
|
41
|
+
linkeddata = metadata.graph
|
42
|
+
hashdata = metadata.hash
|
43
|
+
comments = metadata.comments
|
44
|
+
warnings = metadata.warnings
|
45
|
+
|
46
|
+
# if you want to try other things like content negotiation and "scraping" from HTML, do this:
|
47
|
+
# note, you don't need to catch the return value here. The metadata object that is passed in will be modified
|
48
|
+
metadata = HarvesterTools::BruteForce.begin_brute_force(guid: guid, metadata: metadata)
|
49
|
+
|
50
|
+
linkeddata = metadata.graph
|
51
|
+
hashdata = metadata.hash
|
52
|
+
comments = metadata.comments
|
53
|
+
warnings = metadata.warnings
|
54
|
+
|
55
|
+
```
|
23
56
|
|
24
57
|
|
25
58
|
## Development
|
data/lib/constants.rb
CHANGED
@@ -78,31 +78,31 @@ GUID_TYPES = {
|
|
78
78
|
'ark' => Regexp.new(%r{^ark:/[^\s]+$})
|
79
79
|
}
|
80
80
|
|
81
|
-
CONFIG = File.exist?('config.conf') ? ParseConfig.new('config.conf') : {}
|
82
|
-
extruct = CONFIG.dig(:extruct, :command)
|
83
|
-
extruct
|
81
|
+
# CONFIG = File.exist?('config.conf') ? ParseConfig.new('config.conf') : {}
|
82
|
+
# extruct = CONFIG.dig(:extruct, :command)
|
83
|
+
extruct = ENV['EXTRUCT_COMMAND'] || 'extruct'
|
84
84
|
extruct.strip!
|
85
85
|
case extruct
|
86
86
|
when /[&|;`$\s]/
|
87
|
-
abort 'The Extruct command
|
87
|
+
abort 'The Extruct command appears to be subject to command injection. I will not continue'
|
88
88
|
when /echo/i
|
89
|
-
abort 'The Extruct command
|
89
|
+
abort 'The Extruct command appears to be subject to command injection. I will not continue'
|
90
90
|
end
|
91
91
|
EXTRUCT_COMMAND = extruct
|
92
92
|
|
93
|
-
rdf_command = CONFIG.dig(:rdf, :command)
|
94
|
-
rdf_command
|
93
|
+
# rdf_command = CONFIG.dig(:rdf, :command)
|
94
|
+
rdf_command = ENV['RDF_COMMAND'] || 'rdf'
|
95
95
|
rdf_command.strip
|
96
96
|
case rdf_command
|
97
97
|
when /[&|;`$\s]/
|
98
|
-
abort 'The RDF command
|
98
|
+
abort 'The RDF command appears to be subject to command injection. I will not continue'
|
99
99
|
when /echo/i
|
100
|
-
abort 'The RDF command
|
100
|
+
abort 'The RDF command appears to be subject to command injection. I will not continue'
|
101
101
|
when !(/rdf$/ =~ $_)
|
102
102
|
abort "this software requires that Kelloggs Distiller tool is used. The distiller command must end in 'rdf'"
|
103
103
|
end
|
104
104
|
RDF_COMMAND = rdf_command
|
105
105
|
|
106
|
-
tika_command = CONFIG.dig(:tika, :command)
|
107
|
-
tika_command
|
106
|
+
# tika_command = CONFIG.dig(:tika, :command)
|
107
|
+
tika_command = ENV['TIKA_COMMAND'] || 'http://localhost:9998/meta'
|
108
108
|
TIKA_COMMAND = tika_command
|
data/lib/harvester_utils.rb
CHANGED
@@ -4,8 +4,8 @@ module HarvesterTools
|
|
4
4
|
|
5
5
|
class Utils
|
6
6
|
|
7
|
-
def self.resolve_guid(guid:)
|
8
|
-
@meta =
|
7
|
+
def self.resolve_guid(guid:, metadata: HarvesterTools::MetadataObject.new)
|
8
|
+
@meta = metadata
|
9
9
|
@meta.all_uris = [guid]
|
10
10
|
type, url = convertToURL(guid: guid)
|
11
11
|
links = Array.new
|
data/lib/metadata_object.rb
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
module HarvesterTools
|
2
2
|
class MetadataObject
|
3
|
-
attr_accessor :hash, :graph, :comments, :links, :warnings, :guidtype, :full_response, :all_uris # a hash of metadata # a RDF.rb graph of metadata # an array of comments # the type of GUID that was detected # will be an array of Net::HTTP::Response
|
3
|
+
attr_accessor :hash, :graph, :comments, :links, :warnings, :guidtype, :full_response, :all_uris, :guid, :score, :version, :date # a hash of metadata # a RDF.rb graph of metadata # an array of comments # the type of GUID that was detected # will be an array of Net::HTTP::Response
|
4
4
|
|
5
|
-
def initialize(
|
5
|
+
def initialize() # get a name from the "new" call, or set a default
|
6
6
|
@hash = {}
|
7
7
|
@graph = RDF::Graph.new
|
8
8
|
@comments = []
|
@@ -10,6 +10,10 @@ module HarvesterTools
|
|
10
10
|
@full_response = []
|
11
11
|
@links = []
|
12
12
|
@all_uris = []
|
13
|
+
@guid = ""
|
14
|
+
@score = 0
|
15
|
+
@version = '0.0'
|
16
|
+
@date = Time.now.strftime('%Y-%m-%dT%H:%M:%S.%L%z')
|
13
17
|
w = RestClient.get("https://raw.githubusercontent.com/markwilkinson/FAIR-Signposting-Harvester/master/lib/warnings.json")
|
14
18
|
#@warn = File.read("./lib/warnings.json")
|
15
19
|
@warn = JSON.parse(w)
|
File without changes
|
File without changes
|
data/lib/warnings.json
CHANGED
@@ -115,5 +115,12 @@
|
|
115
115
|
{"Documentation": "http://dx.doi.org/10.17487/RFC8259"},
|
116
116
|
{"Validator": "https://jsononline.net/json-validator"}],
|
117
117
|
"severity": "WARN"
|
118
|
+
},
|
119
|
+
"600": {
|
120
|
+
"message": "Data identifier cannot be unambiguously determined, therefore cannot be tested against known persistent identifier schemas",
|
121
|
+
"linkout": [],
|
122
|
+
"severity": "FAILURE"
|
118
123
|
}
|
124
|
+
|
125
|
+
|
119
126
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fsp_harvester
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.18
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mark Wilkinson
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-08-
|
11
|
+
date: 2022-08-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: json
|
@@ -182,9 +182,6 @@ files:
|
|
182
182
|
- bin/setup
|
183
183
|
- example_test.rb
|
184
184
|
- launch.json
|
185
|
-
- lib/config.conf
|
186
|
-
- lib/config.conf_docker
|
187
|
-
- lib/config.conf_local
|
188
185
|
- lib/constants.rb
|
189
186
|
- lib/external_tools.rb
|
190
187
|
- lib/fsp_harvester.rb
|
@@ -195,6 +192,9 @@ files:
|
|
195
192
|
- lib/metadata_harvester.rb
|
196
193
|
- lib/metadata_object.rb
|
197
194
|
- lib/metadata_parser.rb
|
195
|
+
- lib/obselete_config.conf
|
196
|
+
- lib/obselete_config.conf_docker
|
197
|
+
- lib/obselete_config.conf_local
|
198
198
|
- lib/signposting_tests.rb
|
199
199
|
- lib/warnings.json
|
200
200
|
- lib/web_utils.rb
|