fsp_harvester 0.1.21 → 0.1.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec_status +54 -54
- data/Gemfile.lock +1 -1
- data/lib/fsp_harvester/version.rb +1 -1
- data/lib/metadata_harvester.rb +2 -2
- data/lib/metadata_parser.rb +4 -4
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dfe28e2fc429fd0c550539b356e325a1735d323e72fa9c4ee502fbedb1c818df
|
4
|
+
data.tar.gz: b1ec033372645ca2129f44c4faad380b690527ab2f0823583985b12328bdce54
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 11b5ce8b8368d70171e3e376ee75275e1c2892ec58be07976fa225a2df1841ae11b06c335105ab1915a001fb1f0e6247fadf00e3d06f33b21918593c3ada5fc0
|
7
|
+
data.tar.gz: b0c82ea9e81183789227a22eb3c160bfad5d4aa346f0aebb349b383f3d920877c28addb6f936a8a4f5b59d352bcecd313984f7ef7bb90aa4b5e93653e22fd176
|
data/.rspec_status
CHANGED
@@ -1,60 +1,60 @@
|
|
1
1
|
example_id | status | run_time |
|
2
2
|
---------------------------------- | ------ | ---------------------- |
|
3
|
-
./spec/cite-as_spec.rb[1:1:1] | passed | 1.
|
4
|
-
./spec/cite-as_spec.rb[1:1:2] | passed | 1.
|
5
|
-
./spec/cite-as_spec.rb[1:1:3] | passed | 1.
|
6
|
-
./spec/cite-as_spec.rb[1:1:4] | passed |
|
7
|
-
./spec/cite-as_spec.rb[1:1:5] | passed |
|
8
|
-
./spec/cite-as_spec.rb[1:1:6] | passed | 2.
|
9
|
-
./spec/cite-as_spec.rb[1:1:7] | passed | 2.
|
10
|
-
./spec/cite-as_spec.rb[1:1:8] | passed | 2.
|
11
|
-
./spec/cite-as_spec.rb[1:1:9] | passed |
|
12
|
-
./spec/cite-as_spec.rb[1:1:10] | passed | 2.
|
13
|
-
./spec/cite-as_spec.rb[1:1:11] | passed |
|
3
|
+
./spec/cite-as_spec.rb[1:1:1] | passed | 1.87 seconds |
|
4
|
+
./spec/cite-as_spec.rb[1:1:2] | passed | 1.3 seconds |
|
5
|
+
./spec/cite-as_spec.rb[1:1:3] | passed | 1.53 seconds |
|
6
|
+
./spec/cite-as_spec.rb[1:1:4] | passed | 2.09 seconds |
|
7
|
+
./spec/cite-as_spec.rb[1:1:5] | passed | 8.09 seconds |
|
8
|
+
./spec/cite-as_spec.rb[1:1:6] | passed | 2.63 seconds |
|
9
|
+
./spec/cite-as_spec.rb[1:1:7] | passed | 2.9 seconds |
|
10
|
+
./spec/cite-as_spec.rb[1:1:8] | passed | 2.21 seconds |
|
11
|
+
./spec/cite-as_spec.rb[1:1:9] | passed | 2.85 seconds |
|
12
|
+
./spec/cite-as_spec.rb[1:1:10] | passed | 2.89 seconds |
|
13
|
+
./spec/cite-as_spec.rb[1:1:11] | passed | 3.16 seconds |
|
14
14
|
./spec/cite-as_spec.rb[1:1:12] | passed | 2.23 seconds |
|
15
|
-
./spec/cite-as_spec.rb[1:1:13] | passed |
|
16
|
-
./spec/cite-as_spec.rb[1:1:14] | passed | 2.
|
17
|
-
./spec/cite-as_spec.rb[1:1:15] | passed | 1.
|
18
|
-
./spec/cite-as_spec.rb[1:1:16] | passed | 1.
|
15
|
+
./spec/cite-as_spec.rb[1:1:13] | passed | 2.92 seconds |
|
16
|
+
./spec/cite-as_spec.rb[1:1:14] | passed | 2.8 seconds |
|
17
|
+
./spec/cite-as_spec.rb[1:1:15] | passed | 1.21 seconds |
|
18
|
+
./spec/cite-as_spec.rb[1:1:16] | passed | 1.28 seconds |
|
19
19
|
./spec/cite-as_spec.rb[1:1:17] | passed | 1.19 seconds |
|
20
|
-
./spec/cite-as_spec.rb[1:1:18] | passed | 1.
|
21
|
-
./spec/cite-as_spec.rb[1:1:19] | passed | 1.
|
22
|
-
./spec/cite-as_spec.rb[1:1:20] | passed |
|
23
|
-
./spec/cite-as_spec.rb[1:1:21] | passed | 2.
|
24
|
-
./spec/cite-as_spec.rb[1:1:22] | passed | 1.
|
25
|
-
./spec/cite-as_spec.rb[1:1:23] | passed | 1.
|
26
|
-
./spec/cite-as_spec.rb[1:1:24] | failed | 1.
|
27
|
-
./spec/cite-as_spec.rb[1:1:25] | passed | 0.
|
28
|
-
./spec/describedby_spec.rb[1:1:1] | passed | 3.
|
29
|
-
./spec/describedby_spec.rb[1:1:2] | passed | 1.
|
30
|
-
./spec/describedby_spec.rb[1:1:3] | passed | 1.
|
31
|
-
./spec/describedby_spec.rb[1:1:4] | passed | 1.
|
20
|
+
./spec/cite-as_spec.rb[1:1:18] | passed | 1.24 seconds |
|
21
|
+
./spec/cite-as_spec.rb[1:1:19] | passed | 1.7 seconds |
|
22
|
+
./spec/cite-as_spec.rb[1:1:20] | passed | 1.74 seconds |
|
23
|
+
./spec/cite-as_spec.rb[1:1:21] | passed | 2.75 seconds |
|
24
|
+
./spec/cite-as_spec.rb[1:1:22] | passed | 1.35 seconds |
|
25
|
+
./spec/cite-as_spec.rb[1:1:23] | passed | 1.19 seconds |
|
26
|
+
./spec/cite-as_spec.rb[1:1:24] | failed | 1.2 seconds |
|
27
|
+
./spec/cite-as_spec.rb[1:1:25] | passed | 0.60282 seconds |
|
28
|
+
./spec/describedby_spec.rb[1:1:1] | passed | 3.23 seconds |
|
29
|
+
./spec/describedby_spec.rb[1:1:2] | passed | 1.43 seconds |
|
30
|
+
./spec/describedby_spec.rb[1:1:3] | passed | 1.31 seconds |
|
31
|
+
./spec/describedby_spec.rb[1:1:4] | passed | 1.37 seconds |
|
32
32
|
./spec/describedby_spec.rb[1:1:5] | passed | 1.24 seconds |
|
33
|
-
./spec/describedby_spec.rb[1:1:6] | passed | 1.
|
34
|
-
./spec/describedby_spec.rb[1:1:7] | passed |
|
35
|
-
./spec/describedby_spec.rb[1:1:8] | passed | 2.
|
36
|
-
./spec/describedby_spec.rb[1:1:9] | passed |
|
37
|
-
./spec/describedby_spec.rb[1:1:10] | passed | 2.
|
38
|
-
./spec/describedby_spec.rb[1:1:11] | passed | 2.
|
39
|
-
./spec/describedby_spec.rb[1:1:12] | passed | 2.
|
40
|
-
./spec/describedby_spec.rb[1:1:13] | passed | 1.
|
41
|
-
./spec/describedby_spec.rb[1:1:14] | passed | 2.
|
42
|
-
./spec/describedby_spec.rb[1:1:15] | passed | 2.
|
43
|
-
./spec/fsp_harvester_spec.rb[1:1] | passed | 0.
|
44
|
-
./spec/fsp_harvester_spec.rb[1:2] | passed | 2.
|
45
|
-
./spec/fsp_harvester_spec.rb[1:3] | passed |
|
46
|
-
./spec/fsp_harvester_spec.rb[1:4] | passed | 2.
|
47
|
-
./spec/fsp_harvester_spec.rb[1:5] | passed | 2.
|
48
|
-
./spec/fsp_harvester_spec.rb[1:6] |
|
49
|
-
./spec/fsp_harvester_spec.rb[1:7] | passed |
|
50
|
-
./spec/item_spec.rb[1:1:1] | passed |
|
51
|
-
./spec/item_spec.rb[1:1:2] | passed |
|
33
|
+
./spec/describedby_spec.rb[1:1:6] | passed | 1.09 seconds |
|
34
|
+
./spec/describedby_spec.rb[1:1:7] | passed | 1.03 seconds |
|
35
|
+
./spec/describedby_spec.rb[1:1:8] | passed | 2.28 seconds |
|
36
|
+
./spec/describedby_spec.rb[1:1:9] | passed | 1.84 seconds |
|
37
|
+
./spec/describedby_spec.rb[1:1:10] | passed | 2.23 seconds |
|
38
|
+
./spec/describedby_spec.rb[1:1:11] | passed | 2.97 seconds |
|
39
|
+
./spec/describedby_spec.rb[1:1:12] | passed | 2.97 seconds |
|
40
|
+
./spec/describedby_spec.rb[1:1:13] | passed | 1.65 seconds |
|
41
|
+
./spec/describedby_spec.rb[1:1:14] | passed | 2.27 seconds |
|
42
|
+
./spec/describedby_spec.rb[1:1:15] | passed | 2.53 seconds |
|
43
|
+
./spec/fsp_harvester_spec.rb[1:1] | passed | 0.00025 seconds |
|
44
|
+
./spec/fsp_harvester_spec.rb[1:2] | passed | 2.69 seconds |
|
45
|
+
./spec/fsp_harvester_spec.rb[1:3] | passed | 45.63 seconds |
|
46
|
+
./spec/fsp_harvester_spec.rb[1:4] | passed | 2.86 seconds |
|
47
|
+
./spec/fsp_harvester_spec.rb[1:5] | passed | 2.67 seconds |
|
48
|
+
./spec/fsp_harvester_spec.rb[1:6] | passed | 2 minutes 1.6 seconds |
|
49
|
+
./spec/fsp_harvester_spec.rb[1:7] | passed | 1 minute 28.63 seconds |
|
50
|
+
./spec/item_spec.rb[1:1:1] | passed | 3.8 seconds |
|
51
|
+
./spec/item_spec.rb[1:1:2] | passed | 3.3 seconds |
|
52
52
|
./spec/item_spec.rb[1:1:3] | passed | 1.33 seconds |
|
53
|
-
./spec/item_spec.rb[1:1:4] | passed | 1.
|
54
|
-
./spec/item_spec.rb[1:1:5] | passed | 2.
|
55
|
-
./spec/item_spec.rb[1:1:6] | passed | 2.
|
56
|
-
./spec/item_spec.rb[1:1:7] | passed |
|
57
|
-
./spec/item_spec.rb[1:1:8] | passed | 0.
|
58
|
-
./spec/type_spec.rb[1:1:1] | passed | 1.
|
59
|
-
./spec/type_spec.rb[1:1:2] | passed | 1.
|
60
|
-
./spec/type_spec.rb[1:1:3] | passed | 1.
|
53
|
+
./spec/item_spec.rb[1:1:4] | passed | 1.68 seconds |
|
54
|
+
./spec/item_spec.rb[1:1:5] | passed | 2.44 seconds |
|
55
|
+
./spec/item_spec.rb[1:1:6] | passed | 2.64 seconds |
|
56
|
+
./spec/item_spec.rb[1:1:7] | passed | 3.02 seconds |
|
57
|
+
./spec/item_spec.rb[1:1:8] | passed | 0.49403 seconds |
|
58
|
+
./spec/type_spec.rb[1:1:1] | passed | 1.23 seconds |
|
59
|
+
./spec/type_spec.rb[1:1:2] | passed | 1.25 seconds |
|
60
|
+
./spec/type_spec.rb[1:1:3] | passed | 1.57 seconds |
|
data/Gemfile.lock
CHANGED
data/lib/metadata_harvester.rb
CHANGED
@@ -153,7 +153,7 @@ module HarvesterTools
|
|
153
153
|
def self.check_ld(body:, claimed_type:)
|
154
154
|
detected_type = ntriples_hack(body: body) # ntriples hack for one-line metadata records
|
155
155
|
unless detected_type # see if distiller can detect a type
|
156
|
-
detected_type = RDF::Format.for({ sample: body[0..5000]
|
156
|
+
detected_type = RDF::Format.for({ sample: body[0..5000].force_encoding('UTF-8')})
|
157
157
|
@meta.comments << "INFO: Auto-detected type #{detected_type}\n"
|
158
158
|
end
|
159
159
|
# at this point, detected_type is something like RDF::Turtle::Format (or nil). This will return a content-type
|
@@ -199,7 +199,7 @@ module HarvesterTools
|
|
199
199
|
abbreviation = nil
|
200
200
|
parsed = nil
|
201
201
|
begin
|
202
|
-
parsed = JSON.parse(body)
|
202
|
+
parsed = JSON.parse(body.force_encoding('UTF-8'))
|
203
203
|
rescue StandardError
|
204
204
|
abbreviation = nil
|
205
205
|
end
|
data/lib/metadata_parser.rb
CHANGED
@@ -88,7 +88,7 @@ module HarvesterTools
|
|
88
88
|
@meta.comments << "INFO: The response message body component appears to contain #{rdfformat}.\n"
|
89
89
|
reader = ''
|
90
90
|
begin
|
91
|
-
reader = rdfformat.reader.new(body)
|
91
|
+
reader = rdfformat.reader.new(body.force_encoding('UTF-8'))
|
92
92
|
rescue Exception => e
|
93
93
|
@meta.comments << "WARN: Though linked data was found, it failed to parse (Exception #{e}). This likely indicates some syntax error in the data. As a result, no metadata will be extracted from this message.\n"
|
94
94
|
@meta.add_warning(['018', '', ''])
|
@@ -102,9 +102,9 @@ module HarvesterTools
|
|
102
102
|
end
|
103
103
|
reader = rdfformat.reader.new(body) # have to re-read it here, but now its safe because we have already caught errors
|
104
104
|
warn 'WRITING TO CACHE'
|
105
|
-
HarvesterTools::Cache.writeRDFCache(reader: reader, body: body) # write to the special RDF graph cache
|
105
|
+
HarvesterTools::Cache.writeRDFCache(reader: reader, body: body.force_encoding('UTF-8')) # write to the special RDF graph cache
|
106
106
|
warn 'WRITING DONE'
|
107
|
-
reader = rdfformat.reader.new(body) # frustrating that we cannot rewind!
|
107
|
+
reader = rdfformat.reader.new(body.force_encoding('UTF-8')) # frustrating that we cannot rewind!
|
108
108
|
warn 'RE-READING DONE'
|
109
109
|
@meta.merge_rdf(reader.to_a)
|
110
110
|
warn 'MERGE DONE'
|
@@ -114,7 +114,7 @@ module HarvesterTools
|
|
114
114
|
@meta.add_warning(['018', '', ''])
|
115
115
|
rescue Exception => e
|
116
116
|
meta.comments << "CRITICAL: An unknown error occurred while parsing the (apparent) Linked Data (sample of what was parsed: #{body[0..300].delete("\n")}). Moving on...\n"
|
117
|
-
warn "\n\nCRITICAL: #{e.inspect} An unknown error occurred while parsing the (apparent) Linked Data (full body: #{body}). Moving on...\n"
|
117
|
+
warn "\n\nCRITICAL: #{e.inspect} An unknown error occurred while parsing the (apparent) Linked Data (full body: #{body.force_encoding('UTF-8')}). Moving on...\n"
|
118
118
|
@meta.add_warning(['018', '', ''])
|
119
119
|
end
|
120
120
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fsp_harvester
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.22
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mark Wilkinson
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-08-
|
11
|
+
date: 2022-08-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: json
|