bolognese 1.4.1 → 1.4.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +52 -48
- data/lib/bolognese/readers/datacite_reader.rb +2 -2
- data/lib/bolognese/utils.rb +13 -0
- data/lib/bolognese/version.rb +1 -1
- data/lib/bolognese.rb +1 -0
- data/spec/fixtures/datacite-example-complicated-tba.xml +56 -0
- data/spec/fixtures/datacite-example-escaped-text.xml +56 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/write_metadata_as_turtle/BlogPosting_Citeproc_JSON.yml +91 -2460
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/write_metadata_as_turtle/Crossref_DOI.yml +144 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/write_metadata_as_turtle/Dataset.yml +152 -9
- data/spec/readers/datacite_reader_spec.rb +17 -0
- data/spec/writers/citation_writer_spec.rb +3 -1
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 228903b64465ba1351f4856c7137f9a801c619d05412cd125795f69404a2debe
|
4
|
+
data.tar.gz: 88b6b30d92f41133f5d1a18ac656891fce74b00867f82f7a055ef90e01c6b6db
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 40039c3403e1f6fae2aea53913b98ad8e5c8a68a75116c11fd62a86728d1f5151c6a5ae6223dabeea95464baf778ca4ae56138ae583fa43d90a9320a47748e06
|
7
|
+
data.tar.gz: f2da6c0392eefe64d4c4a2e9af3cb49ff376a4df5566014e57ba69245fff37d6ac92419172f49c38e4af6b22ea27c72caf99287f95a0b76138f2a6bad40d860f
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
bolognese (1.4.
|
4
|
+
bolognese (1.4.2)
|
5
5
|
activesupport (>= 4.2.5, < 6)
|
6
6
|
benchmark_methods (~> 0.7)
|
7
7
|
bibtex-ruby (~> 4.1)
|
@@ -30,7 +30,7 @@ PATH
|
|
30
30
|
GEM
|
31
31
|
remote: https://rubygems.org/
|
32
32
|
specs:
|
33
|
-
activesupport (5.2.4)
|
33
|
+
activesupport (5.2.4.1)
|
34
34
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
35
35
|
i18n (>= 0.7, < 2)
|
36
36
|
minitest (~> 5.1)
|
@@ -41,9 +41,9 @@ GEM
|
|
41
41
|
bibtex-ruby (4.4.7)
|
42
42
|
latex-decode (~> 0.0)
|
43
43
|
builder (3.2.4)
|
44
|
-
citeproc (1.0.
|
44
|
+
citeproc (1.0.10)
|
45
45
|
namae (~> 1.0)
|
46
|
-
citeproc-ruby (1.1.
|
46
|
+
citeproc-ruby (1.1.12)
|
47
47
|
citeproc (~> 1.0, >= 1.0.9)
|
48
48
|
csl (~> 1.5)
|
49
49
|
codeclimate-test-reporter (1.0.9)
|
@@ -52,19 +52,19 @@ GEM
|
|
52
52
|
concurrent-ruby (1.0.5)
|
53
53
|
crack (0.4.3)
|
54
54
|
safe_yaml (~> 1.0.0)
|
55
|
-
crass (1.0.
|
56
|
-
csl (1.5.
|
55
|
+
crass (1.0.6)
|
56
|
+
csl (1.5.1)
|
57
57
|
namae (~> 1.0)
|
58
|
-
csl-styles (1.0.1.
|
58
|
+
csl-styles (1.0.1.10)
|
59
59
|
csl (~> 1.0)
|
60
60
|
diff-lcs (1.3)
|
61
61
|
docile (1.1.5)
|
62
|
-
ebnf (1.
|
63
|
-
rdf (~> 3.
|
64
|
-
sxp (~> 1.
|
62
|
+
ebnf (1.2.0)
|
63
|
+
rdf (~> 3.1)
|
64
|
+
sxp (~> 1.1)
|
65
65
|
edtf (3.0.5)
|
66
66
|
activesupport (>= 3.0, < 7.0)
|
67
|
-
excon (0.
|
67
|
+
excon (0.71.1)
|
68
68
|
faraday (0.17.0)
|
69
69
|
multipart-post (>= 1.2, < 3)
|
70
70
|
faraday-encoding (0.0.5)
|
@@ -80,17 +80,21 @@ GEM
|
|
80
80
|
concurrent-ruby (~> 1.0)
|
81
81
|
hashdiff (1.0.0)
|
82
82
|
htmlentities (4.3.4)
|
83
|
-
i18n (1.
|
83
|
+
i18n (1.8.2)
|
84
84
|
concurrent-ruby (~> 1.0)
|
85
85
|
iso8601 (0.9.1)
|
86
86
|
json (2.3.0)
|
87
|
-
json-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
multi_json (~> 1.
|
93
|
-
|
87
|
+
json-canonicalization (0.2.0)
|
88
|
+
json-ld (3.1.0)
|
89
|
+
htmlentities (~> 4.3)
|
90
|
+
json-canonicalization (~> 0.1)
|
91
|
+
link_header (~> 0.0, >= 0.0.8)
|
92
|
+
multi_json (~> 1.14)
|
93
|
+
rack (~> 2.0)
|
94
|
+
rdf (~> 3.1)
|
95
|
+
json-ld-preloaded (3.1.0)
|
96
|
+
json-ld (~> 3.1)
|
97
|
+
rdf (~> 3.1)
|
94
98
|
jsonlint (0.3.0)
|
95
99
|
oj (~> 3)
|
96
100
|
optimist (~> 3)
|
@@ -99,25 +103,25 @@ GEM
|
|
99
103
|
loofah (2.4.0)
|
100
104
|
crass (~> 1.0.2)
|
101
105
|
nokogiri (>= 1.5.9)
|
102
|
-
maremma (4.
|
106
|
+
maremma (4.5)
|
103
107
|
activesupport (>= 4.2.5, < 6)
|
104
108
|
addressable (>= 2.3.6)
|
105
109
|
builder (~> 3.2, >= 3.2.2)
|
106
|
-
excon (~> 0.
|
107
|
-
faraday (
|
110
|
+
excon (~> 0.71.0)
|
111
|
+
faraday (= 0.17.0)
|
108
112
|
faraday-encoding (~> 0.0.4)
|
109
113
|
faraday_middleware (~> 0.13.1)
|
110
|
-
multi_json (~> 1.12)
|
111
114
|
nokogiri (~> 1.10.4)
|
112
115
|
oj (>= 2.8.3)
|
116
|
+
oj_mimic_json (~> 1.0, >= 1.0.1)
|
113
117
|
mini_portile2 (2.4.0)
|
114
|
-
minitest (5.
|
118
|
+
minitest (5.14.0)
|
115
119
|
multi_json (1.14.1)
|
116
120
|
multipart-post (2.1.1)
|
117
121
|
namae (1.0.1)
|
118
122
|
nokogiri (1.10.7)
|
119
123
|
mini_portile2 (~> 2.4.0)
|
120
|
-
oj (3.10.
|
124
|
+
oj (3.10.2)
|
121
125
|
oj_mimic_json (1.0.1)
|
122
126
|
optimist (3.0.0)
|
123
127
|
postrank-uri (1.0.24)
|
@@ -125,44 +129,44 @@ GEM
|
|
125
129
|
nokogiri (>= 1.8.0)
|
126
130
|
public_suffix (>= 2.0.0, < 2.1)
|
127
131
|
public_suffix (2.0.5)
|
128
|
-
rack (2.
|
132
|
+
rack (2.1.2)
|
129
133
|
rack-test (0.8.3)
|
130
134
|
rack (>= 1.0, < 3)
|
131
135
|
rake (12.3.3)
|
132
|
-
rdf (3.
|
136
|
+
rdf (3.1.1)
|
133
137
|
hamster (~> 3.0)
|
134
138
|
link_header (~> 0.0, >= 0.0.8)
|
135
|
-
rdf-aggregate-repo (
|
136
|
-
rdf (
|
137
|
-
rdf-rdfa (3.0
|
138
|
-
haml (~> 5.
|
139
|
+
rdf-aggregate-repo (3.1.0)
|
140
|
+
rdf (~> 3.1)
|
141
|
+
rdf-rdfa (3.1.0)
|
142
|
+
haml (~> 5.1)
|
139
143
|
htmlentities (~> 4.3)
|
140
|
-
rdf (~> 3.
|
141
|
-
rdf-aggregate-repo (
|
142
|
-
rdf-xsd (~> 3.
|
144
|
+
rdf (~> 3.1)
|
145
|
+
rdf-aggregate-repo (~> 3.1)
|
146
|
+
rdf-xsd (~> 3.1)
|
143
147
|
rdf-rdfxml (2.2.1)
|
144
148
|
htmlentities (~> 4.3)
|
145
149
|
rdf (>= 2.2, < 4.0)
|
146
150
|
rdf-rdfa (>= 2.2, < 4.0)
|
147
151
|
rdf-xsd (>= 2.2, < 4.0)
|
148
|
-
rdf-turtle (3.0
|
149
|
-
ebnf (~> 1.
|
150
|
-
rdf (~> 3.
|
151
|
-
rdf-xsd (3.0
|
152
|
-
rdf (~> 3.
|
152
|
+
rdf-turtle (3.1.0)
|
153
|
+
ebnf (~> 1.2)
|
154
|
+
rdf (~> 3.1)
|
155
|
+
rdf-xsd (3.1.0)
|
156
|
+
rdf (~> 3.1)
|
153
157
|
rspec (3.9.0)
|
154
158
|
rspec-core (~> 3.9.0)
|
155
159
|
rspec-expectations (~> 3.9.0)
|
156
160
|
rspec-mocks (~> 3.9.0)
|
157
|
-
rspec-core (3.9.
|
158
|
-
rspec-support (~> 3.9.
|
161
|
+
rspec-core (3.9.1)
|
162
|
+
rspec-support (~> 3.9.1)
|
159
163
|
rspec-expectations (3.9.0)
|
160
164
|
diff-lcs (>= 1.2.0, < 2.0)
|
161
165
|
rspec-support (~> 3.9.0)
|
162
|
-
rspec-mocks (3.9.
|
166
|
+
rspec-mocks (3.9.1)
|
163
167
|
diff-lcs (>= 1.2.0, < 2.0)
|
164
168
|
rspec-support (~> 3.9.0)
|
165
|
-
rspec-support (3.9.
|
169
|
+
rspec-support (3.9.2)
|
166
170
|
rspec-xsd (0.1.0)
|
167
171
|
nokogiri (~> 1.6)
|
168
172
|
rspec (~> 3)
|
@@ -172,17 +176,17 @@ GEM
|
|
172
176
|
json (>= 1.8, < 3)
|
173
177
|
simplecov-html (~> 0.10.0)
|
174
178
|
simplecov-html (0.10.2)
|
175
|
-
sxp (1.0
|
176
|
-
rdf (~> 3.
|
179
|
+
sxp (1.1.0)
|
180
|
+
rdf (~> 3.1)
|
177
181
|
temple (0.8.2)
|
178
182
|
thor (0.20.3)
|
179
183
|
thread_safe (0.3.6)
|
180
184
|
tilt (2.0.10)
|
181
|
-
tzinfo (1.2.
|
185
|
+
tzinfo (1.2.6)
|
182
186
|
thread_safe (~> 0.1)
|
183
187
|
unicode_utils (1.4.0)
|
184
188
|
vcr (3.0.3)
|
185
|
-
webmock (3.
|
189
|
+
webmock (3.8.1)
|
186
190
|
addressable (>= 2.3.6)
|
187
191
|
crack (>= 0.3.2)
|
188
192
|
hashdiff (>= 0.4.0, < 2.0.0)
|
@@ -204,4 +208,4 @@ DEPENDENCIES
|
|
204
208
|
webmock (~> 3.0, >= 3.0.1)
|
205
209
|
|
206
210
|
BUNDLED WITH
|
207
|
-
2.
|
211
|
+
2.1.4
|
@@ -133,8 +133,8 @@ module Bolognese
|
|
133
133
|
end.compact
|
134
134
|
dates = Array.wrap(meta.dig("dates", "date")).map do |r|
|
135
135
|
if r.is_a?(Hash) && date = sanitize(r["__content__"]).presence
|
136
|
-
if Date.edtf(date).present?
|
137
|
-
{ "date" => date,
|
136
|
+
if Date.edtf(date).present? || Bolognese::Utils::UNKNOWN_INFORMATION.key?(date)
|
137
|
+
{ "date" => date,
|
138
138
|
"dateType" => parse_attributes(r, content: "dateType"),
|
139
139
|
"dateInformation" => parse_attributes(r, content: "dateInformation")
|
140
140
|
}.compact
|
data/lib/bolognese/utils.rb
CHANGED
@@ -322,6 +322,19 @@ module Bolognese
|
|
322
322
|
"WebSite" => "misc"
|
323
323
|
}
|
324
324
|
|
325
|
+
UNKNOWN_INFORMATION = {
|
326
|
+
":unac" => "temporarily inaccessible",
|
327
|
+
":unal" => "unallowed, suppressed intentionally",
|
328
|
+
":unap" => "not applicable, makes no sense",
|
329
|
+
":unas" => "value unassigned (e.g., Untitled)",
|
330
|
+
":unav" => "value unavailable, possibly unknown",
|
331
|
+
":unkn" => "known to be unknown (e.g., Anonymous, Inconnue)",
|
332
|
+
":none" => "never had a value, never will",
|
333
|
+
":null" => "explicitly and meaningfully empty",
|
334
|
+
":tba" => "to be assigned or announced later",
|
335
|
+
":etal" => "too numerous to list (et alia)"
|
336
|
+
}
|
337
|
+
|
325
338
|
def find_from_format(id: nil, string: nil, ext: nil, filename: nil)
|
326
339
|
if id.present?
|
327
340
|
find_from_format_by_id(id)
|
data/lib/bolognese/version.rb
CHANGED
data/lib/bolognese.rb
CHANGED
@@ -0,0 +1,56 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<resource xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://datacite.org/schema/kernel-4" xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4/metadata.xsd">
|
3
|
+
<identifier identifierType="DOI">10.5072/testpub</identifier>
|
4
|
+
<creators>
|
5
|
+
<creator>
|
6
|
+
<creatorName nameType="Personal">Smith, John</creatorName>
|
7
|
+
</creator>
|
8
|
+
<creator>
|
9
|
+
<creatorName>つまらないものですが</creatorName>
|
10
|
+
<nameIdentifier nameIdentifierScheme="ISNI" schemeURI="http://isni.org/isni/">0000000134596520</nameIdentifier>
|
11
|
+
</creator>
|
12
|
+
</creators>
|
13
|
+
<titles>
|
14
|
+
<title>:unav</title>
|
15
|
+
</titles>
|
16
|
+
<publisher>Springer</publisher>
|
17
|
+
<publicationYear>2010</publicationYear>
|
18
|
+
<subjects>
|
19
|
+
<subject subjectScheme="DDC">830 German & related literatures</subject>
|
20
|
+
<subject>Polish Literature</subject>
|
21
|
+
</subjects>
|
22
|
+
<dates>
|
23
|
+
<date dateType="Other" dateInformation="Correction">:tba</date>
|
24
|
+
</dates>
|
25
|
+
<contributors>
|
26
|
+
<contributor contributorType="DataCollector">
|
27
|
+
<contributorName>Doe, John</contributorName>
|
28
|
+
<nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org/">0000-0001-5393-1421</nameIdentifier>
|
29
|
+
</contributor>
|
30
|
+
</contributors>
|
31
|
+
<language>GER</language>
|
32
|
+
<resourceType resourceTypeGeneral="Text">Monograph</resourceType>
|
33
|
+
<alternateIdentifiers>
|
34
|
+
<alternateIdentifier alternateIdentifierType="ISBN">937-0-4523-12357-6</alternateIdentifier>
|
35
|
+
</alternateIdentifiers>
|
36
|
+
<relatedIdentifiers>
|
37
|
+
<relatedIdentifier resourceTypeGeneral="Text" relatedIdentifierType="DOI" relationType="IsPartOf">10.5272/oldertestpub</relatedIdentifier>
|
38
|
+
</relatedIdentifiers>
|
39
|
+
<sizes>
|
40
|
+
<size>256 pages</size>
|
41
|
+
</sizes>
|
42
|
+
<formats>
|
43
|
+
<format>:null</format>
|
44
|
+
</formats>
|
45
|
+
<version>2</version>
|
46
|
+
<rightsList>
|
47
|
+
<rights xml:lang="eng" rightsURI="http://creativecommons.org/licenses/by-nd/2.0/">Creative Commons Attribution-NoDerivs 2.0 Generic</rights>
|
48
|
+
</rightsList>
|
49
|
+
<descriptions>
|
50
|
+
<description descriptionType="Abstract">
|
51
|
+
Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea
|
52
|
+
takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores
|
53
|
+
et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.
|
54
|
+
</description>
|
55
|
+
</descriptions>
|
56
|
+
</resource>
|
@@ -0,0 +1,56 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<resource xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://datacite.org/schema/kernel-4" xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4/metadata.xsd">
|
3
|
+
<identifier identifierType="DOI">10.5072/testpub</identifier>
|
4
|
+
<creators>
|
5
|
+
<creator>
|
6
|
+
<creatorName nameType="Personal">Smith, John</creatorName>
|
7
|
+
</creator>
|
8
|
+
<creator>
|
9
|
+
<creatorName>つまらないものですが</creatorName>
|
10
|
+
<nameIdentifier nameIdentifierScheme="ISNI" schemeURI="http://isni.org/isni/">0000000134596520</nameIdentifier>
|
11
|
+
</creator>
|
12
|
+
</creators>
|
13
|
+
<titles>
|
14
|
+
<title>Some initial text<the rest of the text won't display.</title>
|
15
|
+
</titles>
|
16
|
+
<publisher>Springer</publisher>
|
17
|
+
<publicationYear>2010</publicationYear>
|
18
|
+
<subjects>
|
19
|
+
<subject subjectScheme="DDC">830 German & related literatures</subject>
|
20
|
+
<subject>Polish Literature</subject>
|
21
|
+
</subjects>
|
22
|
+
<dates>
|
23
|
+
<date dateType="Other" dateInformation="Correction">2012-12-13</date>
|
24
|
+
</dates>
|
25
|
+
<contributors>
|
26
|
+
<contributor contributorType="DataCollector">
|
27
|
+
<contributorName>Doe, John</contributorName>
|
28
|
+
<nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org/">0000-0001-5393-1421</nameIdentifier>
|
29
|
+
</contributor>
|
30
|
+
</contributors>
|
31
|
+
<language>GER</language>
|
32
|
+
<resourceType resourceTypeGeneral="Text">Monograph</resourceType>
|
33
|
+
<alternateIdentifiers>
|
34
|
+
<alternateIdentifier alternateIdentifierType="ISBN">937-0-4523-12357-6</alternateIdentifier>
|
35
|
+
</alternateIdentifiers>
|
36
|
+
<relatedIdentifiers>
|
37
|
+
<relatedIdentifier resourceTypeGeneral="Text" relatedIdentifierType="DOI" relationType="IsPartOf">10.5272/oldertestpub</relatedIdentifier>
|
38
|
+
</relatedIdentifiers>
|
39
|
+
<sizes>
|
40
|
+
<size>256 pages</size>
|
41
|
+
</sizes>
|
42
|
+
<formats>
|
43
|
+
<format>pdf</format>
|
44
|
+
</formats>
|
45
|
+
<version>2</version>
|
46
|
+
<rightsList>
|
47
|
+
<rights xml:lang="eng" rightsURI="http://creativecommons.org/licenses/by-nd/2.0/">Creative Commons Attribution-NoDerivs 2.0 Generic</rights>
|
48
|
+
</rightsList>
|
49
|
+
<descriptions>
|
50
|
+
<description descriptionType="Abstract">
|
51
|
+
Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea
|
52
|
+
takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores
|
53
|
+
et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.
|
54
|
+
</description>
|
55
|
+
</descriptions>
|
56
|
+
</resource>
|