fsp_harvester 0.1.16 → 0.1.19
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rspec_status +54 -53
- data/Gemfile.lock +5 -5
- data/README.md +33 -0
- data/lib/constants.rb +11 -11
- data/lib/fsp_harvester/version.rb +1 -1
- data/lib/harvester_utils.rb +3 -3
- data/lib/metadata_object.rb +8 -2
- data/lib/{config.conf → obselete_config.conf} +0 -0
- data/lib/{config.conf_docker → obselete_config.conf_docker} +0 -0
- data/lib/{config.conf_local → obselete_config.conf_local} +0 -0
- data/lib/warnings.json +7 -0
- metadata +7 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 548e58dcb21d312f32780345f954f523e253c8fffaa4fbe9598032c3081adb1a
|
4
|
+
data.tar.gz: 7bea92aa5809b6651acb76aa2ea470c069e480783eb9d1acfd407a8a5fbd261c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5da8243a22e50244b30f5672b34e5d2f484a6dbf4125fb80a5c93cf1580dd561d6fb4c9354dcce44588f8223d5ec9eb7a8a70bd4904a67b0b59c34196194db0b
|
7
|
+
data.tar.gz: dfdc907ba268b0e509256000a0bd36adbcafe70a1490066acc91deef0255bc13cfe74dad3ddc31352ab6d6eff80fde6a8c3669924c9cb58febd8684a902c06a7
|
data/.rspec_status
CHANGED
@@ -1,56 +1,57 @@
|
|
1
1
|
example_id | status | run_time |
|
2
2
|
---------------------------------- | ------ | --------------- |
|
3
|
-
./spec/cite-as_spec.rb[1:1:1] | passed | 1.
|
4
|
-
./spec/cite-as_spec.rb[1:1:2] | passed | 1.
|
5
|
-
./spec/cite-as_spec.rb[1:1:3] | passed | 1.
|
6
|
-
./spec/cite-as_spec.rb[1:1:4] | passed | 1.
|
7
|
-
./spec/cite-as_spec.rb[1:1:5] | passed | 2.
|
8
|
-
./spec/cite-as_spec.rb[1:1:6] | passed | 2.
|
9
|
-
./spec/cite-as_spec.rb[1:1:7] | passed |
|
10
|
-
./spec/cite-as_spec.rb[1:1:8] | passed | 2.
|
11
|
-
./spec/cite-as_spec.rb[1:1:9] | passed | 2.
|
12
|
-
./spec/cite-as_spec.rb[1:1:10] | passed | 2.
|
13
|
-
./spec/cite-as_spec.rb[1:1:11] | passed | 3.
|
14
|
-
./spec/cite-as_spec.rb[1:1:12] | passed |
|
15
|
-
./spec/cite-as_spec.rb[1:1:13] | passed | 2.
|
16
|
-
./spec/cite-as_spec.rb[1:1:14] | passed | 2.
|
17
|
-
./spec/cite-as_spec.rb[1:1:15] | passed | 1.
|
18
|
-
./spec/cite-as_spec.rb[1:1:16] | passed | 1.
|
19
|
-
./spec/cite-as_spec.rb[1:1:17] | passed | 1.
|
20
|
-
./spec/cite-as_spec.rb[1:1:18] | passed | 1.
|
21
|
-
./spec/cite-as_spec.rb[1:1:19] | passed | 1.
|
22
|
-
./spec/cite-as_spec.rb[1:1:20] | passed | 1.
|
23
|
-
./spec/cite-as_spec.rb[1:1:21] | passed | 2.
|
24
|
-
./spec/cite-as_spec.rb[1:1:22] | passed | 1.
|
25
|
-
./spec/cite-as_spec.rb[1:1:23] | passed | 1.
|
26
|
-
./spec/cite-as_spec.rb[1:1:24] | failed | 1.
|
27
|
-
./spec/cite-as_spec.rb[1:1:25] | passed | 0.
|
28
|
-
./spec/describedby_spec.rb[1:1:1] | passed | 3.
|
29
|
-
./spec/describedby_spec.rb[1:1:2] | passed | 1.
|
3
|
+
./spec/cite-as_spec.rb[1:1:1] | passed | 1.36 seconds |
|
4
|
+
./spec/cite-as_spec.rb[1:1:2] | passed | 1.31 seconds |
|
5
|
+
./spec/cite-as_spec.rb[1:1:3] | passed | 1.53 seconds |
|
6
|
+
./spec/cite-as_spec.rb[1:1:4] | passed | 1.84 seconds |
|
7
|
+
./spec/cite-as_spec.rb[1:1:5] | passed | 2.77 seconds |
|
8
|
+
./spec/cite-as_spec.rb[1:1:6] | passed | 2.06 seconds |
|
9
|
+
./spec/cite-as_spec.rb[1:1:7] | passed | 2.96 seconds |
|
10
|
+
./spec/cite-as_spec.rb[1:1:8] | passed | 2.28 seconds |
|
11
|
+
./spec/cite-as_spec.rb[1:1:9] | passed | 2.83 seconds |
|
12
|
+
./spec/cite-as_spec.rb[1:1:10] | passed | 2.14 seconds |
|
13
|
+
./spec/cite-as_spec.rb[1:1:11] | passed | 3.19 seconds |
|
14
|
+
./spec/cite-as_spec.rb[1:1:12] | passed | 3.06 seconds |
|
15
|
+
./spec/cite-as_spec.rb[1:1:13] | passed | 2.77 seconds |
|
16
|
+
./spec/cite-as_spec.rb[1:1:14] | passed | 2.2 seconds |
|
17
|
+
./spec/cite-as_spec.rb[1:1:15] | passed | 1.19 seconds |
|
18
|
+
./spec/cite-as_spec.rb[1:1:16] | passed | 1.1 seconds |
|
19
|
+
./spec/cite-as_spec.rb[1:1:17] | passed | 1.31 seconds |
|
20
|
+
./spec/cite-as_spec.rb[1:1:18] | passed | 1.14 seconds |
|
21
|
+
./spec/cite-as_spec.rb[1:1:19] | passed | 1.68 seconds |
|
22
|
+
./spec/cite-as_spec.rb[1:1:20] | passed | 1.69 seconds |
|
23
|
+
./spec/cite-as_spec.rb[1:1:21] | passed | 2.35 seconds |
|
24
|
+
./spec/cite-as_spec.rb[1:1:22] | passed | 1.12 seconds |
|
25
|
+
./spec/cite-as_spec.rb[1:1:23] | passed | 1.16 seconds |
|
26
|
+
./spec/cite-as_spec.rb[1:1:24] | failed | 1.45 seconds |
|
27
|
+
./spec/cite-as_spec.rb[1:1:25] | passed | 0.72571 seconds |
|
28
|
+
./spec/describedby_spec.rb[1:1:1] | passed | 3.09 seconds |
|
29
|
+
./spec/describedby_spec.rb[1:1:2] | passed | 1.13 seconds |
|
30
30
|
./spec/describedby_spec.rb[1:1:3] | passed | 1.22 seconds |
|
31
|
-
./spec/describedby_spec.rb[1:1:4] | passed | 1.
|
32
|
-
./spec/describedby_spec.rb[1:1:5] | passed | 1.
|
33
|
-
./spec/describedby_spec.rb[1:1:6] | passed | 1.
|
34
|
-
./spec/describedby_spec.rb[1:1:7] | passed | 1.
|
35
|
-
./spec/describedby_spec.rb[1:1:8] | passed | 2.
|
36
|
-
./spec/describedby_spec.rb[1:1:9] | passed |
|
37
|
-
./spec/describedby_spec.rb[1:1:10] | passed | 2.
|
38
|
-
./spec/describedby_spec.rb[1:1:11] | passed |
|
39
|
-
./spec/describedby_spec.rb[1:1:12] | passed |
|
40
|
-
./spec/describedby_spec.rb[1:1:13] | passed | 1.
|
41
|
-
./spec/describedby_spec.rb[1:1:14] | passed | 2.
|
42
|
-
./spec/describedby_spec.rb[1:1:15] | passed | 2.
|
43
|
-
./spec/fsp_harvester_spec.rb[1:1] | passed | 0.
|
44
|
-
./spec/fsp_harvester_spec.rb[1:2] |
|
45
|
-
./spec/fsp_harvester_spec.rb[1:3] | passed |
|
46
|
-
./spec/
|
47
|
-
./spec/item_spec.rb[1:1:
|
48
|
-
./spec/item_spec.rb[1:1:
|
49
|
-
./spec/item_spec.rb[1:1:
|
50
|
-
./spec/item_spec.rb[1:1:
|
51
|
-
./spec/item_spec.rb[1:1:
|
52
|
-
./spec/item_spec.rb[1:1:
|
53
|
-
./spec/item_spec.rb[1:1:
|
54
|
-
./spec/
|
55
|
-
./spec/type_spec.rb[1:1:
|
56
|
-
./spec/type_spec.rb[1:1:
|
31
|
+
./spec/describedby_spec.rb[1:1:4] | passed | 1.11 seconds |
|
32
|
+
./spec/describedby_spec.rb[1:1:5] | passed | 1.21 seconds |
|
33
|
+
./spec/describedby_spec.rb[1:1:6] | passed | 1.24 seconds |
|
34
|
+
./spec/describedby_spec.rb[1:1:7] | passed | 1.53 seconds |
|
35
|
+
./spec/describedby_spec.rb[1:1:8] | passed | 2.53 seconds |
|
36
|
+
./spec/describedby_spec.rb[1:1:9] | passed | 1.74 seconds |
|
37
|
+
./spec/describedby_spec.rb[1:1:10] | passed | 2.59 seconds |
|
38
|
+
./spec/describedby_spec.rb[1:1:11] | passed | 3.49 seconds |
|
39
|
+
./spec/describedby_spec.rb[1:1:12] | passed | 3.82 seconds |
|
40
|
+
./spec/describedby_spec.rb[1:1:13] | passed | 1.65 seconds |
|
41
|
+
./spec/describedby_spec.rb[1:1:14] | passed | 2.19 seconds |
|
42
|
+
./spec/describedby_spec.rb[1:1:15] | passed | 2.16 seconds |
|
43
|
+
./spec/fsp_harvester_spec.rb[1:1] | passed | 0.00015 seconds |
|
44
|
+
./spec/fsp_harvester_spec.rb[1:2] | passed | 2.49 seconds |
|
45
|
+
./spec/fsp_harvester_spec.rb[1:3] | passed | 7.06 seconds |
|
46
|
+
./spec/fsp_harvester_spec.rb[1:4] | passed | 2.74 seconds |
|
47
|
+
./spec/item_spec.rb[1:1:1] | passed | 3.41 seconds |
|
48
|
+
./spec/item_spec.rb[1:1:2] | passed | 2.84 seconds |
|
49
|
+
./spec/item_spec.rb[1:1:3] | passed | 1.15 seconds |
|
50
|
+
./spec/item_spec.rb[1:1:4] | passed | 1.74 seconds |
|
51
|
+
./spec/item_spec.rb[1:1:5] | passed | 2.6 seconds |
|
52
|
+
./spec/item_spec.rb[1:1:6] | passed | 2.32 seconds |
|
53
|
+
./spec/item_spec.rb[1:1:7] | passed | 2.81 seconds |
|
54
|
+
./spec/item_spec.rb[1:1:8] | passed | 0.49717 seconds |
|
55
|
+
./spec/type_spec.rb[1:1:1] | passed | 1.25 seconds |
|
56
|
+
./spec/type_spec.rb[1:1:2] | passed | 1.18 seconds |
|
57
|
+
./spec/type_spec.rb[1:1:3] | passed | 1.58 seconds |
|
data/Gemfile.lock
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
fsp_harvester (0.1.
|
4
|
+
fsp_harvester (0.1.19)
|
5
5
|
json (~> 2.0)
|
6
6
|
linkeddata (~> 3.2)
|
7
|
-
linkheaders-processor (~> 0.1.
|
7
|
+
linkheaders-processor (~> 0.1.18)
|
8
8
|
metainspector (~> 5.11.2)
|
9
9
|
parseconfig (~> 1.1)
|
10
10
|
rake (~> 13.0)
|
@@ -127,7 +127,7 @@ GEM
|
|
127
127
|
sparql (~> 3.2, >= 3.2.4)
|
128
128
|
sparql-client (~> 3.2, >= 3.2.1)
|
129
129
|
yaml-ld (~> 0.0)
|
130
|
-
linkheaders-processor (0.1.
|
130
|
+
linkheaders-processor (0.1.18)
|
131
131
|
json (~> 2.0)
|
132
132
|
json-ld (~> 3.2)
|
133
133
|
json-ld-preloaded (~> 3.2)
|
@@ -252,14 +252,14 @@ GEM
|
|
252
252
|
diff-lcs (>= 1.2.0, < 2.0)
|
253
253
|
rspec-support (~> 3.11.0)
|
254
254
|
rspec-support (3.11.0)
|
255
|
-
rubocop (1.
|
255
|
+
rubocop (1.35.0)
|
256
256
|
json (~> 2.3)
|
257
257
|
parallel (~> 1.10)
|
258
258
|
parser (>= 3.1.2.1)
|
259
259
|
rainbow (>= 2.2.2, < 4.0)
|
260
260
|
regexp_parser (>= 1.8, < 3.0)
|
261
261
|
rexml (>= 3.2.5, < 4.0)
|
262
|
-
rubocop-ast (>= 1.20.
|
262
|
+
rubocop-ast (>= 1.20.1, < 2.0)
|
263
263
|
ruby-progressbar (~> 1.7)
|
264
264
|
unicode-display_width (>= 1.4.0, < 3.0)
|
265
265
|
rubocop-ast (1.21.0)
|
data/README.md
CHANGED
@@ -20,6 +20,39 @@ Or install it yourself as:
|
|
20
20
|
|
21
21
|
## Usage
|
22
22
|
|
23
|
+
```
|
24
|
+
require 'fsp_harvester'
|
25
|
+
|
26
|
+
ENV['EXTRUCT_COMMAND'] = "extruct"
|
27
|
+
ENV['RDF_COMMAND'] = "/home/user/.rvm/gems/ruby-3.0.0/bin/rdf" # kelloggs distiller
|
28
|
+
ENV['TIKA_COMMAND'] = "http://localhost:9998/meta" # assumes using the docker version of tika
|
29
|
+
|
30
|
+
# to only follow the FAIR signposting specification:
|
31
|
+
links, metadata = HarvesterTools::Utils.resolve_guid(guid: guid)
|
32
|
+
|
33
|
+
links.each do |link|
|
34
|
+
puts link.href
|
35
|
+
puts link.relation
|
36
|
+
end
|
37
|
+
|
38
|
+
# note, you don't need to catch the return value here. The metadata object that is passed in will be modified
|
39
|
+
metadata = FspHarvester::Utils.gather_metadata_from_describedby_links(links: links, metadata: metadata)
|
40
|
+
|
41
|
+
linkeddata = metadata.graph
|
42
|
+
hashdata = metadata.hash
|
43
|
+
comments = metadata.comments
|
44
|
+
warnings = metadata.warnings
|
45
|
+
|
46
|
+
# if you want to try other things like content negotiation and "scraping" from HTML, do this:
|
47
|
+
# note, you don't need to catch the return value here. The metadata object that is passed in will be modified
|
48
|
+
metadata = HarvesterTools::BruteForce.begin_brute_force(guid: guid, metadata: metadata)
|
49
|
+
|
50
|
+
linkeddata = metadata.graph
|
51
|
+
hashdata = metadata.hash
|
52
|
+
comments = metadata.comments
|
53
|
+
warnings = metadata.warnings
|
54
|
+
|
55
|
+
```
|
23
56
|
|
24
57
|
|
25
58
|
## Development
|
data/lib/constants.rb
CHANGED
@@ -78,31 +78,31 @@ GUID_TYPES = {
|
|
78
78
|
'ark' => Regexp.new(%r{^ark:/[^\s]+$})
|
79
79
|
}
|
80
80
|
|
81
|
-
CONFIG = File.exist?('config.conf') ? ParseConfig.new('config.conf') : {}
|
82
|
-
extruct = CONFIG.dig(:extruct, :command)
|
83
|
-
extruct
|
81
|
+
# CONFIG = File.exist?('config.conf') ? ParseConfig.new('config.conf') : {}
|
82
|
+
# extruct = CONFIG.dig(:extruct, :command)
|
83
|
+
extruct = ENV['EXTRUCT_COMMAND'] || 'extruct'
|
84
84
|
extruct.strip!
|
85
85
|
case extruct
|
86
86
|
when /[&|;`$\s]/
|
87
|
-
abort 'The Extruct command
|
87
|
+
abort 'The Extruct command appears to be subject to command injection. I will not continue'
|
88
88
|
when /echo/i
|
89
|
-
abort 'The Extruct command
|
89
|
+
abort 'The Extruct command appears to be subject to command injection. I will not continue'
|
90
90
|
end
|
91
91
|
EXTRUCT_COMMAND = extruct
|
92
92
|
|
93
|
-
rdf_command = CONFIG.dig(:rdf, :command)
|
94
|
-
rdf_command
|
93
|
+
# rdf_command = CONFIG.dig(:rdf, :command)
|
94
|
+
rdf_command = ENV['RDF_COMMAND'] || 'rdf'
|
95
95
|
rdf_command.strip
|
96
96
|
case rdf_command
|
97
97
|
when /[&|;`$\s]/
|
98
|
-
abort 'The RDF command
|
98
|
+
abort 'The RDF command appears to be subject to command injection. I will not continue'
|
99
99
|
when /echo/i
|
100
|
-
abort 'The RDF command
|
100
|
+
abort 'The RDF command appears to be subject to command injection. I will not continue'
|
101
101
|
when !(/rdf$/ =~ $_)
|
102
102
|
abort "this software requires that Kelloggs Distiller tool is used. The distiller command must end in 'rdf'"
|
103
103
|
end
|
104
104
|
RDF_COMMAND = rdf_command
|
105
105
|
|
106
|
-
tika_command = CONFIG.dig(:tika, :command)
|
107
|
-
tika_command
|
106
|
+
# tika_command = CONFIG.dig(:tika, :command)
|
107
|
+
tika_command = ENV['TIKA_COMMAND'] || 'http://localhost:9998/meta'
|
108
108
|
TIKA_COMMAND = tika_command
|
data/lib/harvester_utils.rb
CHANGED
@@ -4,14 +4,14 @@ module HarvesterTools
|
|
4
4
|
|
5
5
|
class Utils
|
6
6
|
|
7
|
-
def self.resolve_guid(guid:)
|
8
|
-
@meta =
|
7
|
+
def self.resolve_guid(guid:, metadata: HarvesterTools::MetadataObject.new)
|
8
|
+
@meta = metadata
|
9
9
|
@meta.all_uris = [guid]
|
10
10
|
type, url = convertToURL(guid: guid)
|
11
11
|
links = Array.new
|
12
12
|
if type
|
13
13
|
links = resolve_url(url: url, metadata: @meta)
|
14
|
-
@meta.links = @meta.links
|
14
|
+
@meta.links = @meta.links.append(*links)
|
15
15
|
else
|
16
16
|
@meta.add_warning(['006', guid, ''])
|
17
17
|
@meta.comments << "FATAL: GUID type not recognized.\n"
|
data/lib/metadata_object.rb
CHANGED
@@ -1,15 +1,21 @@
|
|
1
1
|
module HarvesterTools
|
2
2
|
class MetadataObject
|
3
|
-
attr_accessor :hash, :graph, :comments, :links, :warnings, :guidtype, :full_response, :all_uris # a hash of metadata # a RDF.rb graph of metadata # an array of comments # the type of GUID that was detected # will be an array of Net::HTTP::Response
|
3
|
+
attr_accessor :id, :hash, :graph, :comments, :links, :warnings, :guidtype, :full_response, :all_uris, :tested_guid, :score, :version, :date # a hash of metadata # a RDF.rb graph of metadata # an array of comments # the type of GUID that was detected # will be an array of Net::HTTP::Response
|
4
4
|
|
5
|
-
def initialize(
|
5
|
+
def initialize(id: "unidentified_metadata") # get a name from the "new" call, or set a default
|
6
|
+
@id = id
|
6
7
|
@hash = {}
|
7
8
|
@graph = RDF::Graph.new
|
8
9
|
@comments = []
|
9
10
|
@warnings = []
|
10
11
|
@full_response = []
|
11
12
|
@links = []
|
13
|
+
@guidtype = ""
|
12
14
|
@all_uris = []
|
15
|
+
@tested_guid = ""
|
16
|
+
@score = 0
|
17
|
+
@version = '0.0'
|
18
|
+
@date = Time.now.strftime('%Y-%m-%dT%H:%M:%S.%L%z')
|
13
19
|
w = RestClient.get("https://raw.githubusercontent.com/markwilkinson/FAIR-Signposting-Harvester/master/lib/warnings.json")
|
14
20
|
#@warn = File.read("./lib/warnings.json")
|
15
21
|
@warn = JSON.parse(w)
|
File without changes
|
File without changes
|
File without changes
|
data/lib/warnings.json
CHANGED
@@ -115,5 +115,12 @@
|
|
115
115
|
{"Documentation": "http://dx.doi.org/10.17487/RFC8259"},
|
116
116
|
{"Validator": "https://jsononline.net/json-validator"}],
|
117
117
|
"severity": "WARN"
|
118
|
+
},
|
119
|
+
"600": {
|
120
|
+
"message": "Data identifier cannot be unambiguously determined, therefore cannot be tested against known persistent identifier schemas",
|
121
|
+
"linkout": [],
|
122
|
+
"severity": "FAILURE"
|
118
123
|
}
|
124
|
+
|
125
|
+
|
119
126
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fsp_harvester
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.19
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mark Wilkinson
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-08-
|
11
|
+
date: 2022-08-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: json
|
@@ -44,14 +44,14 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 0.1.
|
47
|
+
version: 0.1.18
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: 0.1.
|
54
|
+
version: 0.1.18
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: metainspector
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -182,9 +182,6 @@ files:
|
|
182
182
|
- bin/setup
|
183
183
|
- example_test.rb
|
184
184
|
- launch.json
|
185
|
-
- lib/config.conf
|
186
|
-
- lib/config.conf_docker
|
187
|
-
- lib/config.conf_local
|
188
185
|
- lib/constants.rb
|
189
186
|
- lib/external_tools.rb
|
190
187
|
- lib/fsp_harvester.rb
|
@@ -195,6 +192,9 @@ files:
|
|
195
192
|
- lib/metadata_harvester.rb
|
196
193
|
- lib/metadata_object.rb
|
197
194
|
- lib/metadata_parser.rb
|
195
|
+
- lib/obselete_config.conf
|
196
|
+
- lib/obselete_config.conf_docker
|
197
|
+
- lib/obselete_config.conf_local
|
198
198
|
- lib/signposting_tests.rb
|
199
199
|
- lib/warnings.json
|
200
200
|
- lib/web_utils.rb
|