fsp_harvester 0.1.21 → 0.1.22
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rspec_status +54 -54
- data/Gemfile.lock +1 -1
- data/lib/fsp_harvester/version.rb +1 -1
- data/lib/metadata_harvester.rb +2 -2
- data/lib/metadata_parser.rb +4 -4
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dfe28e2fc429fd0c550539b356e325a1735d323e72fa9c4ee502fbedb1c818df
|
4
|
+
data.tar.gz: b1ec033372645ca2129f44c4faad380b690527ab2f0823583985b12328bdce54
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 11b5ce8b8368d70171e3e376ee75275e1c2892ec58be07976fa225a2df1841ae11b06c335105ab1915a001fb1f0e6247fadf00e3d06f33b21918593c3ada5fc0
|
7
|
+
data.tar.gz: b0c82ea9e81183789227a22eb3c160bfad5d4aa346f0aebb349b383f3d920877c28addb6f936a8a4f5b59d352bcecd313984f7ef7bb90aa4b5e93653e22fd176
|
data/.rspec_status
CHANGED
@@ -1,60 +1,60 @@
|
|
1
1
|
example_id | status | run_time |
|
2
2
|
---------------------------------- | ------ | ---------------------- |
|
3
|
-
./spec/cite-as_spec.rb[1:1:1] | passed | 1.
|
4
|
-
./spec/cite-as_spec.rb[1:1:2] | passed | 1.
|
5
|
-
./spec/cite-as_spec.rb[1:1:3] | passed | 1.
|
6
|
-
./spec/cite-as_spec.rb[1:1:4] | passed |
|
7
|
-
./spec/cite-as_spec.rb[1:1:5] | passed |
|
8
|
-
./spec/cite-as_spec.rb[1:1:6] | passed | 2.
|
9
|
-
./spec/cite-as_spec.rb[1:1:7] | passed | 2.
|
10
|
-
./spec/cite-as_spec.rb[1:1:8] | passed | 2.
|
11
|
-
./spec/cite-as_spec.rb[1:1:9] | passed |
|
12
|
-
./spec/cite-as_spec.rb[1:1:10] | passed | 2.
|
13
|
-
./spec/cite-as_spec.rb[1:1:11] | passed |
|
3
|
+
./spec/cite-as_spec.rb[1:1:1] | passed | 1.87 seconds |
|
4
|
+
./spec/cite-as_spec.rb[1:1:2] | passed | 1.3 seconds |
|
5
|
+
./spec/cite-as_spec.rb[1:1:3] | passed | 1.53 seconds |
|
6
|
+
./spec/cite-as_spec.rb[1:1:4] | passed | 2.09 seconds |
|
7
|
+
./spec/cite-as_spec.rb[1:1:5] | passed | 8.09 seconds |
|
8
|
+
./spec/cite-as_spec.rb[1:1:6] | passed | 2.63 seconds |
|
9
|
+
./spec/cite-as_spec.rb[1:1:7] | passed | 2.9 seconds |
|
10
|
+
./spec/cite-as_spec.rb[1:1:8] | passed | 2.21 seconds |
|
11
|
+
./spec/cite-as_spec.rb[1:1:9] | passed | 2.85 seconds |
|
12
|
+
./spec/cite-as_spec.rb[1:1:10] | passed | 2.89 seconds |
|
13
|
+
./spec/cite-as_spec.rb[1:1:11] | passed | 3.16 seconds |
|
14
14
|
./spec/cite-as_spec.rb[1:1:12] | passed | 2.23 seconds |
|
15
|
-
./spec/cite-as_spec.rb[1:1:13] | passed |
|
16
|
-
./spec/cite-as_spec.rb[1:1:14] | passed | 2.
|
17
|
-
./spec/cite-as_spec.rb[1:1:15] | passed | 1.
|
18
|
-
./spec/cite-as_spec.rb[1:1:16] | passed | 1.
|
15
|
+
./spec/cite-as_spec.rb[1:1:13] | passed | 2.92 seconds |
|
16
|
+
./spec/cite-as_spec.rb[1:1:14] | passed | 2.8 seconds |
|
17
|
+
./spec/cite-as_spec.rb[1:1:15] | passed | 1.21 seconds |
|
18
|
+
./spec/cite-as_spec.rb[1:1:16] | passed | 1.28 seconds |
|
19
19
|
./spec/cite-as_spec.rb[1:1:17] | passed | 1.19 seconds |
|
20
|
-
./spec/cite-as_spec.rb[1:1:18] | passed | 1.
|
21
|
-
./spec/cite-as_spec.rb[1:1:19] | passed | 1.
|
22
|
-
./spec/cite-as_spec.rb[1:1:20] | passed |
|
23
|
-
./spec/cite-as_spec.rb[1:1:21] | passed | 2.
|
24
|
-
./spec/cite-as_spec.rb[1:1:22] | passed | 1.
|
25
|
-
./spec/cite-as_spec.rb[1:1:23] | passed | 1.
|
26
|
-
./spec/cite-as_spec.rb[1:1:24] | failed | 1.
|
27
|
-
./spec/cite-as_spec.rb[1:1:25] | passed | 0.
|
28
|
-
./spec/describedby_spec.rb[1:1:1] | passed | 3.
|
29
|
-
./spec/describedby_spec.rb[1:1:2] | passed | 1.
|
30
|
-
./spec/describedby_spec.rb[1:1:3] | passed | 1.
|
31
|
-
./spec/describedby_spec.rb[1:1:4] | passed | 1.
|
20
|
+
./spec/cite-as_spec.rb[1:1:18] | passed | 1.24 seconds |
|
21
|
+
./spec/cite-as_spec.rb[1:1:19] | passed | 1.7 seconds |
|
22
|
+
./spec/cite-as_spec.rb[1:1:20] | passed | 1.74 seconds |
|
23
|
+
./spec/cite-as_spec.rb[1:1:21] | passed | 2.75 seconds |
|
24
|
+
./spec/cite-as_spec.rb[1:1:22] | passed | 1.35 seconds |
|
25
|
+
./spec/cite-as_spec.rb[1:1:23] | passed | 1.19 seconds |
|
26
|
+
./spec/cite-as_spec.rb[1:1:24] | failed | 1.2 seconds |
|
27
|
+
./spec/cite-as_spec.rb[1:1:25] | passed | 0.60282 seconds |
|
28
|
+
./spec/describedby_spec.rb[1:1:1] | passed | 3.23 seconds |
|
29
|
+
./spec/describedby_spec.rb[1:1:2] | passed | 1.43 seconds |
|
30
|
+
./spec/describedby_spec.rb[1:1:3] | passed | 1.31 seconds |
|
31
|
+
./spec/describedby_spec.rb[1:1:4] | passed | 1.37 seconds |
|
32
32
|
./spec/describedby_spec.rb[1:1:5] | passed | 1.24 seconds |
|
33
|
-
./spec/describedby_spec.rb[1:1:6] | passed | 1.
|
34
|
-
./spec/describedby_spec.rb[1:1:7] | passed |
|
35
|
-
./spec/describedby_spec.rb[1:1:8] | passed | 2.
|
36
|
-
./spec/describedby_spec.rb[1:1:9] | passed |
|
37
|
-
./spec/describedby_spec.rb[1:1:10] | passed | 2.
|
38
|
-
./spec/describedby_spec.rb[1:1:11] | passed | 2.
|
39
|
-
./spec/describedby_spec.rb[1:1:12] | passed | 2.
|
40
|
-
./spec/describedby_spec.rb[1:1:13] | passed | 1.
|
41
|
-
./spec/describedby_spec.rb[1:1:14] | passed | 2.
|
42
|
-
./spec/describedby_spec.rb[1:1:15] | passed | 2.
|
43
|
-
./spec/fsp_harvester_spec.rb[1:1] | passed | 0.
|
44
|
-
./spec/fsp_harvester_spec.rb[1:2] | passed | 2.
|
45
|
-
./spec/fsp_harvester_spec.rb[1:3] | passed |
|
46
|
-
./spec/fsp_harvester_spec.rb[1:4] | passed | 2.
|
47
|
-
./spec/fsp_harvester_spec.rb[1:5] | passed | 2.
|
48
|
-
./spec/fsp_harvester_spec.rb[1:6] |
|
49
|
-
./spec/fsp_harvester_spec.rb[1:7] | passed |
|
50
|
-
./spec/item_spec.rb[1:1:1] | passed |
|
51
|
-
./spec/item_spec.rb[1:1:2] | passed |
|
33
|
+
./spec/describedby_spec.rb[1:1:6] | passed | 1.09 seconds |
|
34
|
+
./spec/describedby_spec.rb[1:1:7] | passed | 1.03 seconds |
|
35
|
+
./spec/describedby_spec.rb[1:1:8] | passed | 2.28 seconds |
|
36
|
+
./spec/describedby_spec.rb[1:1:9] | passed | 1.84 seconds |
|
37
|
+
./spec/describedby_spec.rb[1:1:10] | passed | 2.23 seconds |
|
38
|
+
./spec/describedby_spec.rb[1:1:11] | passed | 2.97 seconds |
|
39
|
+
./spec/describedby_spec.rb[1:1:12] | passed | 2.97 seconds |
|
40
|
+
./spec/describedby_spec.rb[1:1:13] | passed | 1.65 seconds |
|
41
|
+
./spec/describedby_spec.rb[1:1:14] | passed | 2.27 seconds |
|
42
|
+
./spec/describedby_spec.rb[1:1:15] | passed | 2.53 seconds |
|
43
|
+
./spec/fsp_harvester_spec.rb[1:1] | passed | 0.00025 seconds |
|
44
|
+
./spec/fsp_harvester_spec.rb[1:2] | passed | 2.69 seconds |
|
45
|
+
./spec/fsp_harvester_spec.rb[1:3] | passed | 45.63 seconds |
|
46
|
+
./spec/fsp_harvester_spec.rb[1:4] | passed | 2.86 seconds |
|
47
|
+
./spec/fsp_harvester_spec.rb[1:5] | passed | 2.67 seconds |
|
48
|
+
./spec/fsp_harvester_spec.rb[1:6] | passed | 2 minutes 1.6 seconds |
|
49
|
+
./spec/fsp_harvester_spec.rb[1:7] | passed | 1 minute 28.63 seconds |
|
50
|
+
./spec/item_spec.rb[1:1:1] | passed | 3.8 seconds |
|
51
|
+
./spec/item_spec.rb[1:1:2] | passed | 3.3 seconds |
|
52
52
|
./spec/item_spec.rb[1:1:3] | passed | 1.33 seconds |
|
53
|
-
./spec/item_spec.rb[1:1:4] | passed | 1.
|
54
|
-
./spec/item_spec.rb[1:1:5] | passed | 2.
|
55
|
-
./spec/item_spec.rb[1:1:6] | passed | 2.
|
56
|
-
./spec/item_spec.rb[1:1:7] | passed |
|
57
|
-
./spec/item_spec.rb[1:1:8] | passed | 0.
|
58
|
-
./spec/type_spec.rb[1:1:1] | passed | 1.
|
59
|
-
./spec/type_spec.rb[1:1:2] | passed | 1.
|
60
|
-
./spec/type_spec.rb[1:1:3] | passed | 1.
|
53
|
+
./spec/item_spec.rb[1:1:4] | passed | 1.68 seconds |
|
54
|
+
./spec/item_spec.rb[1:1:5] | passed | 2.44 seconds |
|
55
|
+
./spec/item_spec.rb[1:1:6] | passed | 2.64 seconds |
|
56
|
+
./spec/item_spec.rb[1:1:7] | passed | 3.02 seconds |
|
57
|
+
./spec/item_spec.rb[1:1:8] | passed | 0.49403 seconds |
|
58
|
+
./spec/type_spec.rb[1:1:1] | passed | 1.23 seconds |
|
59
|
+
./spec/type_spec.rb[1:1:2] | passed | 1.25 seconds |
|
60
|
+
./spec/type_spec.rb[1:1:3] | passed | 1.57 seconds |
|
data/Gemfile.lock
CHANGED
data/lib/metadata_harvester.rb
CHANGED
@@ -153,7 +153,7 @@ module HarvesterTools
|
|
153
153
|
def self.check_ld(body:, claimed_type:)
|
154
154
|
detected_type = ntriples_hack(body: body) # ntriples hack for one-line metadata records
|
155
155
|
unless detected_type # see if distiller can detect a type
|
156
|
-
detected_type = RDF::Format.for({ sample: body[0..5000]
|
156
|
+
detected_type = RDF::Format.for({ sample: body[0..5000].force_encoding('UTF-8')})
|
157
157
|
@meta.comments << "INFO: Auto-detected type #{detected_type}\n"
|
158
158
|
end
|
159
159
|
# at this point, detected_type is something like RDF::Turtle::Format (or nil). This will return a content-type
|
@@ -199,7 +199,7 @@ module HarvesterTools
|
|
199
199
|
abbreviation = nil
|
200
200
|
parsed = nil
|
201
201
|
begin
|
202
|
-
parsed = JSON.parse(body)
|
202
|
+
parsed = JSON.parse(body.force_encoding('UTF-8'))
|
203
203
|
rescue StandardError
|
204
204
|
abbreviation = nil
|
205
205
|
end
|
data/lib/metadata_parser.rb
CHANGED
@@ -88,7 +88,7 @@ module HarvesterTools
|
|
88
88
|
@meta.comments << "INFO: The response message body component appears to contain #{rdfformat}.\n"
|
89
89
|
reader = ''
|
90
90
|
begin
|
91
|
-
reader = rdfformat.reader.new(body)
|
91
|
+
reader = rdfformat.reader.new(body.force_encoding('UTF-8'))
|
92
92
|
rescue Exception => e
|
93
93
|
@meta.comments << "WARN: Though linked data was found, it failed to parse (Exception #{e}). This likely indicates some syntax error in the data. As a result, no metadata will be extracted from this message.\n"
|
94
94
|
@meta.add_warning(['018', '', ''])
|
@@ -102,9 +102,9 @@ module HarvesterTools
|
|
102
102
|
end
|
103
103
|
reader = rdfformat.reader.new(body) # have to re-read it here, but now its safe because we have already caught errors
|
104
104
|
warn 'WRITING TO CACHE'
|
105
|
-
HarvesterTools::Cache.writeRDFCache(reader: reader, body: body) # write to the special RDF graph cache
|
105
|
+
HarvesterTools::Cache.writeRDFCache(reader: reader, body: body.force_encoding('UTF-8')) # write to the special RDF graph cache
|
106
106
|
warn 'WRITING DONE'
|
107
|
-
reader = rdfformat.reader.new(body) # frustrating that we cannot rewind!
|
107
|
+
reader = rdfformat.reader.new(body.force_encoding('UTF-8')) # frustrating that we cannot rewind!
|
108
108
|
warn 'RE-READING DONE'
|
109
109
|
@meta.merge_rdf(reader.to_a)
|
110
110
|
warn 'MERGE DONE'
|
@@ -114,7 +114,7 @@ module HarvesterTools
|
|
114
114
|
@meta.add_warning(['018', '', ''])
|
115
115
|
rescue Exception => e
|
116
116
|
meta.comments << "CRITICAL: An unknown error occurred while parsing the (apparent) Linked Data (sample of what was parsed: #{body[0..300].delete("\n")}). Moving on...\n"
|
117
|
-
warn "\n\nCRITICAL: #{e.inspect} An unknown error occurred while parsing the (apparent) Linked Data (full body: #{body}). Moving on...\n"
|
117
|
+
warn "\n\nCRITICAL: #{e.inspect} An unknown error occurred while parsing the (apparent) Linked Data (full body: #{body.force_encoding('UTF-8')}). Moving on...\n"
|
118
118
|
@meta.add_warning(['018', '', ''])
|
119
119
|
end
|
120
120
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fsp_harvester
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.22
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mark Wilkinson
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-08-
|
11
|
+
date: 2022-08-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: json
|