bolognese 0.9.95 → 0.9.96

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,378 @@
1
+ require_relative 'doi_utils'
2
+ require_relative 'author_utils'
3
+ require_relative 'datacite_utils'
4
+ require_relative 'utils'
5
+
6
+ require_relative 'readers/bibtex_reader'
7
+ require_relative 'readers/citeproc_reader'
8
+ require_relative 'readers/codemeta_reader'
9
+ require_relative 'readers/crosscite_reader'
10
+ require_relative 'readers/crossref_reader'
11
+ require_relative 'readers/datacite_json_reader'
12
+ require_relative 'readers/datacite_reader'
13
+ require_relative 'readers/ris_reader'
14
+ require_relative 'readers/schema_org_reader'
15
+
16
+ require_relative 'writers/bibtex_writer'
17
+ require_relative 'writers/citation_writer'
18
+ require_relative 'writers/citeproc_writer'
19
+ require_relative 'writers/codemeta_writer'
20
+ require_relative 'writers/crosscite_writer'
21
+ require_relative 'writers/crossref_writer'
22
+ require_relative 'writers/datacite_writer'
23
+ require_relative 'writers/datacite_json_writer'
24
+ require_relative 'writers/jats_writer'
25
+ require_relative 'writers/rdf_xml_writer'
26
+ require_relative 'writers/ris_writer'
27
+ require_relative 'writers/schema_org_writer'
28
+ require_relative 'writers/turtle_writer'
29
+
30
+ module Bolognese
31
+ module MetadataUtils
32
+ # include BenchmarkMethods
33
+ include Bolognese::DoiUtils
34
+ include Bolognese::AuthorUtils
35
+ include Bolognese::DataciteUtils
36
+ include Bolognese::Utils
37
+
38
+ include Bolognese::Readers::BibtexReader
39
+ include Bolognese::Readers::CiteprocReader
40
+ include Bolognese::Readers::CodemetaReader
41
+ include Bolognese::Readers::CrossciteReader
42
+ include Bolognese::Readers::CrossrefReader
43
+ include Bolognese::Readers::DataciteReader
44
+ include Bolognese::Readers::DataciteJsonReader
45
+ include Bolognese::Readers::RisReader
46
+ include Bolognese::Readers::SchemaOrgReader
47
+
48
+ include Bolognese::Writers::BibtexWriter
49
+ include Bolognese::Writers::CitationWriter
50
+ include Bolognese::Writers::CiteprocWriter
51
+ include Bolognese::Writers::CodemetaWriter
52
+ include Bolognese::Writers::CrossciteWriter
53
+ include Bolognese::Writers::CrossrefWriter
54
+ include Bolognese::Writers::DataciteWriter
55
+ include Bolognese::Writers::DataciteJsonWriter
56
+ include Bolognese::Writers::JatsWriter
57
+ include Bolognese::Writers::RdfXmlWriter
58
+ include Bolognese::Writers::RisWriter
59
+ include Bolognese::Writers::SchemaOrgWriter
60
+ include Bolognese::Writers::TurtleWriter
61
+
62
+ attr_accessor :string, :identifier, :from, :author,
63
+ :creator, :title, :publisher, :contributor, :license,
64
+ :date_accepted, :date_available, :date_copyrighted, :date_collected,
65
+ :date_submitted, :date_valid, :date_created, :date_modified, :date_updated, :provider_id, :client_id, :journal,
66
+ :volume, :issue, :first_page, :last_page, :b_doi, :b_url, :b_version, :keywords, :editor,
67
+ :description, :alternate_name, :language, :content_size, :spatial_coverage,
68
+ :schema_version, :has_part, :same_as,
69
+ :is_previous_version_of, :is_new_version_of, :is_cited_by, :cites,
70
+ :is_supplement_to, :is_supplemented_by, :is_continued_by, :continues,
71
+ :has_metadata, :is_metadata_for, :is_referenced_by, :references,
72
+ :is_documented_by, :documents, :is_compiled_by, :compiles,
73
+ :is_variant_form_of, :is_original_form_of, :is_reviewed_by, :reviews,
74
+ :is_derived_from, :is_source_of, :format, :funding, :style, :locale, :state, :regenerate, :sandbox
75
+
76
+ attr_reader :doc, :service_provider, :page_start, :page_end, :related_identifier, :reverse, :name_detector
77
+
78
+ attr_writer :id, :type, :additional_type, :citeproc_type, :bibtex_type, :doi,
79
+ :ris_type, :meta
80
+
81
+ def exists?
82
+ meta.fetch("state", "not_found") != "not_found"
83
+ end
84
+
85
+ def valid?
86
+ exists? && errors.nil?
87
+ end
88
+
89
+ # validate against DataCite schema, unless there are already errors in the reader
90
+ def errors
91
+ meta.fetch("errors", nil) || datacite_errors(xml: datacite, schema_version: schema_version)
92
+ end
93
+
94
+ # replace DOI in XML if provided in options
95
+ def raw
96
+ r = string.present? ? string.strip : nil
97
+ return r unless (from == "datacite" && r.present?)
98
+
99
+ doc = Nokogiri::XML(string, nil, 'UTF-8', &:noblanks)
100
+ node = doc.at_css("identifier")
101
+ node.content = doi.to_s.upcase
102
+ doc.to_xml.strip
103
+ end
104
+
105
+ def should_passthru
106
+ (from == "datacite") && regenerate.blank?
107
+ end
108
+
109
+ # generate name for method to call dynamically
110
+ # the id might change
111
+ def meta
112
+ m = from.present? ? send("read_" + from, string: string, sandbox: sandbox) : {}
113
+ @id = b_doi || m.fetch("id", nil) || m.fetch("identifier", nil)
114
+
115
+ m
116
+ end
117
+
118
+ def id
119
+ @id ||= meta.fetch("id", nil)
120
+ end
121
+
122
+ def type
123
+ @type ||= meta.fetch("type", nil)
124
+ end
125
+
126
+ def additional_type
127
+ @additional_type ||= meta.fetch("additional_type", nil)
128
+ end
129
+
130
+ def citeproc_type
131
+ @citeproc_type ||= meta.fetch("citeproc_type", nil)
132
+ end
133
+
134
+ def bibtex_type
135
+ @bibtex_type ||= meta.fetch("bibtex_type", nil)
136
+ end
137
+
138
+ def ris_type
139
+ @ris_type ||= meta.fetch("ris_type", nil)
140
+ end
141
+
142
+ def resource_type_general
143
+ @resource_type_general ||= meta.fetch("resource_type_general", nil)
144
+ end
145
+
146
+ def doi
147
+ @doi ||= @id.present? ? doi_from_url(@id) : meta.fetch("doi", nil)
148
+ end
149
+
150
+ def b_url
151
+ @b_url ||= meta.fetch("b_url", nil)
152
+ end
153
+
154
+ def identifier
155
+ @identifier ||= meta.fetch("id", nil)
156
+ end
157
+
158
+ def state
159
+ @state ||= meta.fetch("state", nil)
160
+ end
161
+
162
+ def title
163
+ @title ||= meta.fetch("title", nil)
164
+ end
165
+
166
+ def alternate_name
167
+ @alternate_name ||= meta.fetch("alternate_name", nil)
168
+ end
169
+
170
+ def author
171
+ @author ||= meta.fetch("author", nil)
172
+ end
173
+
174
+ def editor
175
+ @editor ||= meta.fetch("editor", nil)
176
+ end
177
+
178
+ def publisher
179
+ @publisher ||= meta.fetch("publisher", nil)
180
+ end
181
+
182
+ def service_provider
183
+ @service_provider ||= meta.fetch("service_provider", nil)
184
+ end
185
+
186
+ def date_created
187
+ @date_created ||= meta.fetch("date_created", nil)
188
+ end
189
+
190
+ def date_accepted
191
+ @date_accepted ||= meta.fetch("date_accepted", nil)
192
+ end
193
+
194
+ def date_available
195
+ @date_available ||= meta.fetch("date_available", nil)
196
+ end
197
+
198
+ def date_copyrighted
199
+ @date_copyrighted ||= meta.fetch("date_copyrighted", nil)
200
+ end
201
+
202
+ def date_collected
203
+ @date_collected ||= meta.fetch("date_collected", nil)
204
+ end
205
+
206
+ def date_submitted
207
+ @date_submitted ||= meta.fetch("date_submitted", nil)
208
+ end
209
+
210
+ def date_valid
211
+ @date_valid ||= meta.fetch("date_valid", nil)
212
+ end
213
+
214
+ def date_published
215
+ @date_published ||= meta.fetch("date_published", nil)
216
+ end
217
+
218
+ def date_modified
219
+ @date_modified ||= meta.fetch("date_modified", nil)
220
+ end
221
+
222
+ def date_registered
223
+ @date_registered ||= meta.fetch("date_registered", nil)
224
+ end
225
+
226
+ def date_updated
227
+ @date_updated ||= meta.fetch("date_updated", nil)
228
+ end
229
+
230
+ def volume
231
+ @volume ||= meta.fetch("volume", nil)
232
+ end
233
+
234
+ def first_page
235
+ @first_page ||= meta.fetch("first_page", nil)
236
+ end
237
+
238
+ def last_page
239
+ @last_page ||= meta.fetch("last_page", nil)
240
+ end
241
+
242
+ def description
243
+ @description ||= meta.fetch("description", nil)
244
+ end
245
+
246
+ def license
247
+ @license ||= meta.fetch("license", nil)
248
+ end
249
+
250
+ def b_version
251
+ @b_version ||= meta.fetch("b_version", nil)
252
+ end
253
+
254
+ def keywords
255
+ @keywords ||= meta.fetch("keywords", nil)
256
+ end
257
+
258
+ def language
259
+ @language ||= meta.fetch("language", nil)
260
+ end
261
+
262
+ def content_size
263
+ @content_size ||= meta.fetch("content_size", nil)
264
+ end
265
+
266
+ def schema_version
267
+ @schema_version ||= meta.fetch("schema_version", nil)
268
+ end
269
+
270
+ def funding
271
+ @funding ||= meta.fetch("funding", nil)
272
+ end
273
+
274
+ def provider_id
275
+ @provider_id ||= meta.fetch("provider_id", nil)
276
+ end
277
+
278
+ def client_id
279
+ @client_id ||= meta.fetch("client_id", nil)
280
+ end
281
+
282
+ def is_identical_to
283
+ meta.fetch("is_identical_to", nil)
284
+ end
285
+
286
+ def is_part_of
287
+ meta.fetch("is_part_of", nil)
288
+ end
289
+
290
+ def has_part
291
+ meta.fetch("has_part", nil)
292
+ end
293
+
294
+ def is_previous_version_of
295
+ meta.fetch("is_previous_of", nil)
296
+ end
297
+
298
+ def is_new_version_of
299
+ meta.fetch("is_new_version_of", nil)
300
+ end
301
+
302
+ def is_variant_form_of
303
+ meta.fetch("is_variant_form_of", nil)
304
+ end
305
+
306
+ def is_original_form_of
307
+ meta.fetch("is_original_form_of", nil)
308
+ end
309
+
310
+ def references
311
+ meta.fetch("references", nil)
312
+ end
313
+
314
+ def is_referenced_by
315
+ meta.fetch("is_referenced_by", nil)
316
+ end
317
+
318
+ def is_supplement_to
319
+ meta.fetch("is_supplement_to", nil)
320
+ end
321
+
322
+ def is_supplemented_by
323
+ meta.fetch("is_supplemented_by", nil)
324
+ end
325
+
326
+ def reviews
327
+ meta.fetch("reviews", nil)
328
+ end
329
+
330
+ def is_reviewed_by
331
+ meta.fetch("is_reviewed_by", nil)
332
+ end
333
+
334
+ def related_identifier_hsh(relation_type)
335
+ Array.wrap(send(relation_type)).select { |r| r["id"] || r["issn"] }
336
+ .map { |r| r.merge("relationType" => relation_type.camelize) }
337
+ end
338
+
339
+ def related_identifier
340
+ relation_types = %w(is_part_of has_part references is_referenced_by is_supplement_to is_supplemented_by)
341
+ relation_types.reduce([]) { |sum, r| sum += related_identifier_hsh(r) }
342
+ end
343
+
344
+ # recognize given name. Can be loaded once as ::NameDetector, e.g. in a Rails initializer
345
+ def name_detector
346
+ @name_detector ||= defined?(::NameDetector) ? ::NameDetector : nil
347
+ end
348
+
349
+ def publication_year
350
+ date_published.present? ? date_published[0..3].to_i.presence : nil
351
+ end
352
+
353
+ def container_title
354
+ Array.wrap(is_part_of).first.to_h.fetch("title", nil)
355
+ end
356
+
357
+ def descriptions
358
+ Array.wrap(description)
359
+ end
360
+
361
+ def reverse
362
+ { "citation" => Array.wrap(is_referenced_by).map { |r| { "@id" => r["id"] }}.unwrap,
363
+ "isBasedOn" => Array.wrap(is_supplement_to).map { |r| { "@id" => r["id"] }}.unwrap }.compact
364
+ end
365
+
366
+ def graph
367
+ RDF::Graph.new << JSON::LD::API.toRdf(schema_hsh)
368
+ end
369
+
370
+ def style
371
+ @style || "apa"
372
+ end
373
+
374
+ def locale
375
+ @locale || "en-US"
376
+ end
377
+ end
378
+ end
@@ -17,7 +17,7 @@ module Bolognese
17
17
  end
18
18
 
19
19
  meta = string.present? ? Maremma.from_json(string) : {}
20
- identifier = meta.fetch("identifier", nil) || options[:id]
20
+ identifier = meta.fetch("identifier", nil)
21
21
  id = normalize_id(meta.fetch("@id", nil) || identifier)
22
22
  type = meta.fetch("@type", nil)
23
23
  author = get_authors(from_schema_org(Array.wrap(meta.fetch("agents", nil))))
@@ -129,7 +129,7 @@ module Bolognese
129
129
  additional_type = (additional_type || model).to_s.underscore.camelize.presence
130
130
  type = CR_TO_SO_TRANSLATIONS[additional_type] || "ScholarlyArticle"
131
131
 
132
- doi = bibliographic_metadata.dig("doi_data", "doi").to_s.downcase.presence || doi_from_url(options[:id])
132
+ doi = bibliographic_metadata.dig("doi_data", "doi").to_s.downcase.presence #|| doi_from_url(options[:id])
133
133
 
134
134
  # Crossref servers run on Eastern Time
135
135
  Time.zone = 'Eastern Time (US & Canada)'
@@ -16,7 +16,7 @@ module Bolognese
16
16
  id = normalize_id(id)
17
17
  response = Maremma.get(id)
18
18
  doc = Nokogiri::XML(response.body.fetch("data", nil), nil, 'UTF-8')
19
- #string = doc.at_xpath('//script[@type="application/ld+json"]')
19
+
20
20
  # workaround for xhtml documents
21
21
  nodeset = doc.css("script")
22
22
  string = nodeset.find { |element| element["type"] == "application/ld+json" }
@@ -33,7 +33,7 @@ module Bolognese
33
33
 
34
34
  meta = string.present? ? Maremma.from_json(string) : {}
35
35
 
36
- id = normalize_id(meta.fetch("@id", nil) || options[:id])
36
+ id = normalize_id(meta.fetch("@id", nil) || meta.fetch("identifier", nil))
37
37
  type = meta.fetch("@type", nil) && meta.fetch("@type").camelcase
38
38
  resource_type_general = Bolognese::Utils::SO_TO_DC_TRANSLATIONS[type]
39
39
  authors = meta.fetch("author", nil) || meta.fetch("creator", nil)
@@ -65,11 +65,13 @@ module Bolognese
65
65
  "ris_type" => Bolognese::Utils::SO_TO_RIS_TRANSLATIONS[resource_type_general.to_s.dasherize] || "GEN",
66
66
  "resource_type_general" => resource_type_general,
67
67
  "doi" => validate_doi(id),
68
+ "identifier" => id,
68
69
  "b_url" => normalize_id(meta.fetch("url", nil)),
69
70
  "title" => meta.fetch("name", nil),
70
71
  "alternate_name" => meta.fetch("alternateName", nil),
71
72
  "author" => author,
72
- "publisher" => meta.dig("publisher", "name"),
73
+ "editor" => editor,
74
+ "publisher" => publisher,
73
75
  "service_provider" => meta.fetch("provider", nil),
74
76
  "is_identical_to" => schema_org_is_identical_to(meta),
75
77
  "is_part_of" => is_part_of,
@@ -1,3 +1,3 @@
1
1
  module Bolognese
2
- VERSION = "0.9.95"
2
+ VERSION = "0.9.96"
3
3
  end
@@ -29,17 +29,17 @@ http_interactions:
29
29
  Location:
30
30
  - "/eating-your-own-dog-food/"
31
31
  Date:
32
- - Sun, 01 Apr 2018 09:07:34 GMT
32
+ - Sat, 05 May 2018 11:18:18 GMT
33
33
  Server:
34
34
  - AmazonS3
35
35
  Age:
36
- - '7'
36
+ - '1'
37
37
  X-Cache:
38
38
  - Hit from cloudfront
39
39
  Via:
40
- - 1.1 b4a55cbff1b10f55c71caa19690c960e.cloudfront.net (CloudFront)
40
+ - 1.1 79503619d600dbc1c9e04a650d3d7f3f.cloudfront.net (CloudFront)
41
41
  X-Amz-Cf-Id:
42
- - PiYKF90NnJ3q74OCFm-SsaoZ4dFao2Mf2nsqorGgC5peSZ1G49af-w==
42
+ - STgSOeC22bUHkt2ibmg8PYtQx3wOaP4LuhYPvdMOpKbqG6whLJv1CA==
43
43
  body:
44
44
  encoding: ASCII-8BIT
45
45
  string: |
@@ -50,14 +50,14 @@ http_interactions:
50
50
  <ul>
51
51
  <li>Code: Found</li>
52
52
  <li>Message: Resource Found</li>
53
- <li>RequestId: 9969744BDEDE2DCF</li>
54
- <li>HostId: CHCBOci8Ah7seqAe+QhEI4MkzkKUaFeWxNnz62xRgBd+OUDoxKz3/WrhvPncEjkePwujz5xR060=</li>
53
+ <li>RequestId: D9CCB55C4CD3060A</li>
54
+ <li>HostId: 96Rt2L2T/eEFeJZens9Xh0CiPmwnzPOib+s4Z2/eYdziOrw0Ja/kRiarTIWcIYC/CM6+fDkkYCk=</li>
55
55
  </ul>
56
56
  <hr/>
57
57
  </body>
58
58
  </html>
59
59
  http_version:
60
- recorded_at: Sun, 01 Apr 2018 09:07:41 GMT
60
+ recorded_at: Sat, 05 May 2018 11:18:19 GMT
61
61
  - request:
62
62
  method: get
63
63
  uri: https://blog.datacite.org/eating-your-own-dog-food/
@@ -81,27 +81,27 @@ http_interactions:
81
81
  Connection:
82
82
  - keep-alive
83
83
  Date:
84
- - Sun, 01 Apr 2018 09:07:35 GMT
84
+ - Fri, 04 May 2018 14:28:27 GMT
85
85
  Cache-Control:
86
86
  - max-age=31536000
87
87
  Last-Modified:
88
- - Sat, 17 Mar 2018 06:52:49 GMT
88
+ - Thu, 19 Apr 2018 19:39:54 GMT
89
89
  Etag:
90
90
  - '"1503fefbb078bce096ab37de682aaef9"'
91
91
  Server:
92
92
  - AmazonS3
93
93
  Age:
94
- - '7'
94
+ - '74993'
95
95
  X-Cache:
96
96
  - Hit from cloudfront
97
97
  Via:
98
- - 1.1 f5d27f80802e2b6e66ec3970da5568b8.cloudfront.net (CloudFront)
98
+ - 1.1 268be5c908db8ae22ed9c5c6cfffc109.cloudfront.net (CloudFront)
99
99
  X-Amz-Cf-Id:
100
- - "-aA8hrGsojsp7E9okzWAiBPen1XoAZfca4cKjVGRw_EbL3f41hhFcA=="
100
+ - gPl2ytth78oV9A5h_Y65W3oEivzS7-GV4ZH0Yq-vw9guHRfEex266w==
101
101
  body:
102
102
  encoding: ASCII-8BIT
103
103
  string: !binary |-
104
104
  
105
105
  http_version:
106
- recorded_at: Sun, 01 Apr 2018 09:07:41 GMT
106
+ recorded_at: Sat, 05 May 2018 11:18:19 GMT
107
107
  recorded_with: VCR 3.0.3