bolognese 0.9.95 → 0.9.96

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,378 @@
1
+ require_relative 'doi_utils'
2
+ require_relative 'author_utils'
3
+ require_relative 'datacite_utils'
4
+ require_relative 'utils'
5
+
6
+ require_relative 'readers/bibtex_reader'
7
+ require_relative 'readers/citeproc_reader'
8
+ require_relative 'readers/codemeta_reader'
9
+ require_relative 'readers/crosscite_reader'
10
+ require_relative 'readers/crossref_reader'
11
+ require_relative 'readers/datacite_json_reader'
12
+ require_relative 'readers/datacite_reader'
13
+ require_relative 'readers/ris_reader'
14
+ require_relative 'readers/schema_org_reader'
15
+
16
+ require_relative 'writers/bibtex_writer'
17
+ require_relative 'writers/citation_writer'
18
+ require_relative 'writers/citeproc_writer'
19
+ require_relative 'writers/codemeta_writer'
20
+ require_relative 'writers/crosscite_writer'
21
+ require_relative 'writers/crossref_writer'
22
+ require_relative 'writers/datacite_writer'
23
+ require_relative 'writers/datacite_json_writer'
24
+ require_relative 'writers/jats_writer'
25
+ require_relative 'writers/rdf_xml_writer'
26
+ require_relative 'writers/ris_writer'
27
+ require_relative 'writers/schema_org_writer'
28
+ require_relative 'writers/turtle_writer'
29
+
30
+ module Bolognese
31
+ module MetadataUtils
32
+ # include BenchmarkMethods
33
+ include Bolognese::DoiUtils
34
+ include Bolognese::AuthorUtils
35
+ include Bolognese::DataciteUtils
36
+ include Bolognese::Utils
37
+
38
+ include Bolognese::Readers::BibtexReader
39
+ include Bolognese::Readers::CiteprocReader
40
+ include Bolognese::Readers::CodemetaReader
41
+ include Bolognese::Readers::CrossciteReader
42
+ include Bolognese::Readers::CrossrefReader
43
+ include Bolognese::Readers::DataciteReader
44
+ include Bolognese::Readers::DataciteJsonReader
45
+ include Bolognese::Readers::RisReader
46
+ include Bolognese::Readers::SchemaOrgReader
47
+
48
+ include Bolognese::Writers::BibtexWriter
49
+ include Bolognese::Writers::CitationWriter
50
+ include Bolognese::Writers::CiteprocWriter
51
+ include Bolognese::Writers::CodemetaWriter
52
+ include Bolognese::Writers::CrossciteWriter
53
+ include Bolognese::Writers::CrossrefWriter
54
+ include Bolognese::Writers::DataciteWriter
55
+ include Bolognese::Writers::DataciteJsonWriter
56
+ include Bolognese::Writers::JatsWriter
57
+ include Bolognese::Writers::RdfXmlWriter
58
+ include Bolognese::Writers::RisWriter
59
+ include Bolognese::Writers::SchemaOrgWriter
60
+ include Bolognese::Writers::TurtleWriter
61
+
62
+ attr_accessor :string, :identifier, :from, :author,
63
+ :creator, :title, :publisher, :contributor, :license,
64
+ :date_accepted, :date_available, :date_copyrighted, :date_collected,
65
+ :date_submitted, :date_valid, :date_created, :date_modified, :date_updated, :provider_id, :client_id, :journal,
66
+ :volume, :issue, :first_page, :last_page, :b_doi, :b_url, :b_version, :keywords, :editor,
67
+ :description, :alternate_name, :language, :content_size, :spatial_coverage,
68
+ :schema_version, :has_part, :same_as,
69
+ :is_previous_version_of, :is_new_version_of, :is_cited_by, :cites,
70
+ :is_supplement_to, :is_supplemented_by, :is_continued_by, :continues,
71
+ :has_metadata, :is_metadata_for, :is_referenced_by, :references,
72
+ :is_documented_by, :documents, :is_compiled_by, :compiles,
73
+ :is_variant_form_of, :is_original_form_of, :is_reviewed_by, :reviews,
74
+ :is_derived_from, :is_source_of, :format, :funding, :style, :locale, :state, :regenerate, :sandbox
75
+
76
+ attr_reader :doc, :service_provider, :page_start, :page_end, :related_identifier, :reverse, :name_detector
77
+
78
+ attr_writer :id, :type, :additional_type, :citeproc_type, :bibtex_type, :doi,
79
+ :ris_type, :meta
80
+
81
+ def exists?
82
+ meta.fetch("state", "not_found") != "not_found"
83
+ end
84
+
85
+ def valid?
86
+ exists? && errors.nil?
87
+ end
88
+
89
+ # validate against DataCite schema, unless there are already errors in the reader
90
+ def errors
91
+ meta.fetch("errors", nil) || datacite_errors(xml: datacite, schema_version: schema_version)
92
+ end
93
+
94
+ # replace DOI in XML if provided in options
95
+ def raw
96
+ r = string.present? ? string.strip : nil
97
+ return r unless (from == "datacite" && r.present?)
98
+
99
+ doc = Nokogiri::XML(string, nil, 'UTF-8', &:noblanks)
100
+ node = doc.at_css("identifier")
101
+ node.content = doi.to_s.upcase
102
+ doc.to_xml.strip
103
+ end
104
+
105
+ def should_passthru
106
+ (from == "datacite") && regenerate.blank?
107
+ end
108
+
109
+ # generate name for method to call dynamically
110
+ # the id might change
111
+ def meta
112
+ m = from.present? ? send("read_" + from, string: string, sandbox: sandbox) : {}
113
+ @id = b_doi || m.fetch("id", nil) || m.fetch("identifier", nil)
114
+
115
+ m
116
+ end
117
+
118
+ def id
119
+ @id ||= meta.fetch("id", nil)
120
+ end
121
+
122
+ def type
123
+ @type ||= meta.fetch("type", nil)
124
+ end
125
+
126
+ def additional_type
127
+ @additional_type ||= meta.fetch("additional_type", nil)
128
+ end
129
+
130
+ def citeproc_type
131
+ @citeproc_type ||= meta.fetch("citeproc_type", nil)
132
+ end
133
+
134
+ def bibtex_type
135
+ @bibtex_type ||= meta.fetch("bibtex_type", nil)
136
+ end
137
+
138
+ def ris_type
139
+ @ris_type ||= meta.fetch("ris_type", nil)
140
+ end
141
+
142
+ def resource_type_general
143
+ @resource_type_general ||= meta.fetch("resource_type_general", nil)
144
+ end
145
+
146
+ def doi
147
+ @doi ||= @id.present? ? doi_from_url(@id) : meta.fetch("doi", nil)
148
+ end
149
+
150
+ def b_url
151
+ @b_url ||= meta.fetch("b_url", nil)
152
+ end
153
+
154
+ def identifier
155
+ @identifier ||= meta.fetch("id", nil)
156
+ end
157
+
158
+ def state
159
+ @state ||= meta.fetch("state", nil)
160
+ end
161
+
162
+ def title
163
+ @title ||= meta.fetch("title", nil)
164
+ end
165
+
166
+ def alternate_name
167
+ @alternate_name ||= meta.fetch("alternate_name", nil)
168
+ end
169
+
170
+ def author
171
+ @author ||= meta.fetch("author", nil)
172
+ end
173
+
174
+ def editor
175
+ @editor ||= meta.fetch("editor", nil)
176
+ end
177
+
178
+ def publisher
179
+ @publisher ||= meta.fetch("publisher", nil)
180
+ end
181
+
182
+ def service_provider
183
+ @service_provider ||= meta.fetch("service_provider", nil)
184
+ end
185
+
186
+ def date_created
187
+ @date_created ||= meta.fetch("date_created", nil)
188
+ end
189
+
190
+ def date_accepted
191
+ @date_accepted ||= meta.fetch("date_accepted", nil)
192
+ end
193
+
194
+ def date_available
195
+ @date_available ||= meta.fetch("date_available", nil)
196
+ end
197
+
198
+ def date_copyrighted
199
+ @date_copyrighted ||= meta.fetch("date_copyrighted", nil)
200
+ end
201
+
202
+ def date_collected
203
+ @date_collected ||= meta.fetch("date_collected", nil)
204
+ end
205
+
206
+ def date_submitted
207
+ @date_submitted ||= meta.fetch("date_submitted", nil)
208
+ end
209
+
210
+ def date_valid
211
+ @date_valid ||= meta.fetch("date_valid", nil)
212
+ end
213
+
214
+ def date_published
215
+ @date_published ||= meta.fetch("date_published", nil)
216
+ end
217
+
218
+ def date_modified
219
+ @date_modified ||= meta.fetch("date_modified", nil)
220
+ end
221
+
222
+ def date_registered
223
+ @date_registered ||= meta.fetch("date_registered", nil)
224
+ end
225
+
226
+ def date_updated
227
+ @date_updated ||= meta.fetch("date_updated", nil)
228
+ end
229
+
230
+ def volume
231
+ @volume ||= meta.fetch("volume", nil)
232
+ end
233
+
234
+ def first_page
235
+ @first_page ||= meta.fetch("first_page", nil)
236
+ end
237
+
238
+ def last_page
239
+ @last_page ||= meta.fetch("last_page", nil)
240
+ end
241
+
242
+ def description
243
+ @description ||= meta.fetch("description", nil)
244
+ end
245
+
246
+ def license
247
+ @license ||= meta.fetch("license", nil)
248
+ end
249
+
250
+ def b_version
251
+ @b_version ||= meta.fetch("b_version", nil)
252
+ end
253
+
254
+ def keywords
255
+ @keywords ||= meta.fetch("keywords", nil)
256
+ end
257
+
258
+ def language
259
+ @language ||= meta.fetch("language", nil)
260
+ end
261
+
262
+ def content_size
263
+ @content_size ||= meta.fetch("content_size", nil)
264
+ end
265
+
266
+ def schema_version
267
+ @schema_version ||= meta.fetch("schema_version", nil)
268
+ end
269
+
270
+ def funding
271
+ @funding ||= meta.fetch("funding", nil)
272
+ end
273
+
274
+ def provider_id
275
+ @provider_id ||= meta.fetch("provider_id", nil)
276
+ end
277
+
278
+ def client_id
279
+ @client_id ||= meta.fetch("client_id", nil)
280
+ end
281
+
282
+ def is_identical_to
283
+ meta.fetch("is_identical_to", nil)
284
+ end
285
+
286
+ def is_part_of
287
+ meta.fetch("is_part_of", nil)
288
+ end
289
+
290
+ def has_part
291
+ meta.fetch("has_part", nil)
292
+ end
293
+
294
+ def is_previous_version_of
295
+ meta.fetch("is_previous_of", nil)
296
+ end
297
+
298
+ def is_new_version_of
299
+ meta.fetch("is_new_version_of", nil)
300
+ end
301
+
302
+ def is_variant_form_of
303
+ meta.fetch("is_variant_form_of", nil)
304
+ end
305
+
306
+ def is_original_form_of
307
+ meta.fetch("is_original_form_of", nil)
308
+ end
309
+
310
+ def references
311
+ meta.fetch("references", nil)
312
+ end
313
+
314
+ def is_referenced_by
315
+ meta.fetch("is_referenced_by", nil)
316
+ end
317
+
318
+ def is_supplement_to
319
+ meta.fetch("is_supplement_to", nil)
320
+ end
321
+
322
+ def is_supplemented_by
323
+ meta.fetch("is_supplemented_by", nil)
324
+ end
325
+
326
+ def reviews
327
+ meta.fetch("reviews", nil)
328
+ end
329
+
330
+ def is_reviewed_by
331
+ meta.fetch("is_reviewed_by", nil)
332
+ end
333
+
334
+ def related_identifier_hsh(relation_type)
335
+ Array.wrap(send(relation_type)).select { |r| r["id"] || r["issn"] }
336
+ .map { |r| r.merge("relationType" => relation_type.camelize) }
337
+ end
338
+
339
+ def related_identifier
340
+ relation_types = %w(is_part_of has_part references is_referenced_by is_supplement_to is_supplemented_by)
341
+ relation_types.reduce([]) { |sum, r| sum += related_identifier_hsh(r) }
342
+ end
343
+
344
+ # recognize given name. Can be loaded once as ::NameDetector, e.g. in a Rails initializer
345
+ def name_detector
346
+ @name_detector ||= defined?(::NameDetector) ? ::NameDetector : nil
347
+ end
348
+
349
+ def publication_year
350
+ date_published.present? ? date_published[0..3].to_i.presence : nil
351
+ end
352
+
353
+ def container_title
354
+ Array.wrap(is_part_of).first.to_h.fetch("title", nil)
355
+ end
356
+
357
+ def descriptions
358
+ Array.wrap(description)
359
+ end
360
+
361
+ def reverse
362
+ { "citation" => Array.wrap(is_referenced_by).map { |r| { "@id" => r["id"] }}.unwrap,
363
+ "isBasedOn" => Array.wrap(is_supplement_to).map { |r| { "@id" => r["id"] }}.unwrap }.compact
364
+ end
365
+
366
+ def graph
367
+ RDF::Graph.new << JSON::LD::API.toRdf(schema_hsh)
368
+ end
369
+
370
+ def style
371
+ @style || "apa"
372
+ end
373
+
374
+ def locale
375
+ @locale || "en-US"
376
+ end
377
+ end
378
+ end
@@ -17,7 +17,7 @@ module Bolognese
17
17
  end
18
18
 
19
19
  meta = string.present? ? Maremma.from_json(string) : {}
20
- identifier = meta.fetch("identifier", nil) || options[:id]
20
+ identifier = meta.fetch("identifier", nil)
21
21
  id = normalize_id(meta.fetch("@id", nil) || identifier)
22
22
  type = meta.fetch("@type", nil)
23
23
  author = get_authors(from_schema_org(Array.wrap(meta.fetch("agents", nil))))
@@ -129,7 +129,7 @@ module Bolognese
129
129
  additional_type = (additional_type || model).to_s.underscore.camelize.presence
130
130
  type = CR_TO_SO_TRANSLATIONS[additional_type] || "ScholarlyArticle"
131
131
 
132
- doi = bibliographic_metadata.dig("doi_data", "doi").to_s.downcase.presence || doi_from_url(options[:id])
132
+ doi = bibliographic_metadata.dig("doi_data", "doi").to_s.downcase.presence #|| doi_from_url(options[:id])
133
133
 
134
134
  # Crossref servers run on Eastern Time
135
135
  Time.zone = 'Eastern Time (US & Canada)'
@@ -16,7 +16,7 @@ module Bolognese
16
16
  id = normalize_id(id)
17
17
  response = Maremma.get(id)
18
18
  doc = Nokogiri::XML(response.body.fetch("data", nil), nil, 'UTF-8')
19
- #string = doc.at_xpath('//script[@type="application/ld+json"]')
19
+
20
20
  # workaround for xhtml documents
21
21
  nodeset = doc.css("script")
22
22
  string = nodeset.find { |element| element["type"] == "application/ld+json" }
@@ -33,7 +33,7 @@ module Bolognese
33
33
 
34
34
  meta = string.present? ? Maremma.from_json(string) : {}
35
35
 
36
- id = normalize_id(meta.fetch("@id", nil) || options[:id])
36
+ id = normalize_id(meta.fetch("@id", nil) || meta.fetch("identifier", nil))
37
37
  type = meta.fetch("@type", nil) && meta.fetch("@type").camelcase
38
38
  resource_type_general = Bolognese::Utils::SO_TO_DC_TRANSLATIONS[type]
39
39
  authors = meta.fetch("author", nil) || meta.fetch("creator", nil)
@@ -65,11 +65,13 @@ module Bolognese
65
65
  "ris_type" => Bolognese::Utils::SO_TO_RIS_TRANSLATIONS[resource_type_general.to_s.dasherize] || "GEN",
66
66
  "resource_type_general" => resource_type_general,
67
67
  "doi" => validate_doi(id),
68
+ "identifier" => id,
68
69
  "b_url" => normalize_id(meta.fetch("url", nil)),
69
70
  "title" => meta.fetch("name", nil),
70
71
  "alternate_name" => meta.fetch("alternateName", nil),
71
72
  "author" => author,
72
- "publisher" => meta.dig("publisher", "name"),
73
+ "editor" => editor,
74
+ "publisher" => publisher,
73
75
  "service_provider" => meta.fetch("provider", nil),
74
76
  "is_identical_to" => schema_org_is_identical_to(meta),
75
77
  "is_part_of" => is_part_of,
@@ -1,3 +1,3 @@
1
1
  module Bolognese
2
- VERSION = "0.9.95"
2
+ VERSION = "0.9.96"
3
3
  end
@@ -29,17 +29,17 @@ http_interactions:
29
29
  Location:
30
30
  - "/eating-your-own-dog-food/"
31
31
  Date:
32
- - Sun, 01 Apr 2018 09:07:34 GMT
32
+ - Sat, 05 May 2018 11:18:18 GMT
33
33
  Server:
34
34
  - AmazonS3
35
35
  Age:
36
- - '7'
36
+ - '1'
37
37
  X-Cache:
38
38
  - Hit from cloudfront
39
39
  Via:
40
- - 1.1 b4a55cbff1b10f55c71caa19690c960e.cloudfront.net (CloudFront)
40
+ - 1.1 79503619d600dbc1c9e04a650d3d7f3f.cloudfront.net (CloudFront)
41
41
  X-Amz-Cf-Id:
42
- - PiYKF90NnJ3q74OCFm-SsaoZ4dFao2Mf2nsqorGgC5peSZ1G49af-w==
42
+ - STgSOeC22bUHkt2ibmg8PYtQx3wOaP4LuhYPvdMOpKbqG6whLJv1CA==
43
43
  body:
44
44
  encoding: ASCII-8BIT
45
45
  string: |
@@ -50,14 +50,14 @@ http_interactions:
50
50
  <ul>
51
51
  <li>Code: Found</li>
52
52
  <li>Message: Resource Found</li>
53
- <li>RequestId: 9969744BDEDE2DCF</li>
54
- <li>HostId: CHCBOci8Ah7seqAe+QhEI4MkzkKUaFeWxNnz62xRgBd+OUDoxKz3/WrhvPncEjkePwujz5xR060=</li>
53
+ <li>RequestId: D9CCB55C4CD3060A</li>
54
+ <li>HostId: 96Rt2L2T/eEFeJZens9Xh0CiPmwnzPOib+s4Z2/eYdziOrw0Ja/kRiarTIWcIYC/CM6+fDkkYCk=</li>
55
55
  </ul>
56
56
  <hr/>
57
57
  </body>
58
58
  </html>
59
59
  http_version:
60
- recorded_at: Sun, 01 Apr 2018 09:07:41 GMT
60
+ recorded_at: Sat, 05 May 2018 11:18:19 GMT
61
61
  - request:
62
62
  method: get
63
63
  uri: https://blog.datacite.org/eating-your-own-dog-food/
@@ -81,27 +81,27 @@ http_interactions:
81
81
  Connection:
82
82
  - keep-alive
83
83
  Date:
84
- - Sun, 01 Apr 2018 09:07:35 GMT
84
+ - Fri, 04 May 2018 14:28:27 GMT
85
85
  Cache-Control:
86
86
  - max-age=31536000
87
87
  Last-Modified:
88
- - Sat, 17 Mar 2018 06:52:49 GMT
88
+ - Thu, 19 Apr 2018 19:39:54 GMT
89
89
  Etag:
90
90
  - '"1503fefbb078bce096ab37de682aaef9"'
91
91
  Server:
92
92
  - AmazonS3
93
93
  Age:
94
- - '7'
94
+ - '74993'
95
95
  X-Cache:
96
96
  - Hit from cloudfront
97
97
  Via:
98
- - 1.1 f5d27f80802e2b6e66ec3970da5568b8.cloudfront.net (CloudFront)
98
+ - 1.1 268be5c908db8ae22ed9c5c6cfffc109.cloudfront.net (CloudFront)
99
99
  X-Amz-Cf-Id:
100
- - "-aA8hrGsojsp7E9okzWAiBPen1XoAZfca4cKjVGRw_EbL3f41hhFcA=="
100
+ - gPl2ytth78oV9A5h_Y65W3oEivzS7-GV4ZH0Yq-vw9guHRfEex266w==
101
101
  body:
102
102
  encoding: ASCII-8BIT
103
103
  string: !binary |-
104
104
  <!DOCTYPE html>
  <html>
    <head>
    <meta charset="utf-8">
    <!-- (1) Optimize for mobile versions: http://goo.gl/EOpFl -->
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <!-- (1) force latest IE rendering engine: bit.ly/1c8EiC9 -->
    <meta http-equiv="X-UA-Compatible" content="IE=edge">


    <title>Eating your own Dog Food</title>
    <meta name="description" content="Eating your own dog food is a slang term to describe that an organization should itself use the products and services it provides. For DataCite this means that we should use DOIs with appropriate metadata and strategies for long-term preservation for..." />

    <meta name="HandheldFriendly" content="True" />
    <meta name="MobileOptimized" content="320" />
    <meta name="apple-mobile-web-app-capable" content="yes">
    <meta name="apple-mobile-web-app-status-bar-style" content="black-translucent">

    <!-- DublinCore Metadata -->
    <meta property="dc:title" content="Eating your own Dog Food" />
    <meta property="dc:format" content="text/html" />
    <meta property="dc:language" content="en" />
    <meta property="dc:rights" content="CC-BY" />
    <meta property="dc:source" content="DataCite Blog" />
    <meta property="dc:subject" content="Scholarly Communication" />
    <meta property="dc:type" content="website" />


    <meta property="og:site_name" content="Eating your own Dog Food" />
    <meta property="og:description" content="Eating your own dog food is a slang term to describe that an organization should itself use the products and services it provides. For DataCite this means that we should use DOIs with appropriate metadata and strategies for long-term preservation for..." />
    <meta property="og:image" content="https://blog.datacite.org/images/2016/12/230785.jpg" />
    <meta property="og:type" content="blog" />

    <link href="//fonts.googleapis.com/css?family=Libre+Baskerville:400,400i,700" rel="stylesheet">
    <link href='//fonts.googleapis.com/css?family=Raleway:400,600,400italic,600italic' rel='stylesheet' type='text/css'>
    <link href="//maxcdn.bootstrapcdn.com/font-awesome/4.6.1/css/font-awesome.min.css" rel="stylesheet" type='text/css'>
    <link href="https://assets.datacite.org/stylesheets/datacite.css" rel='stylesheet' type='text/css'>
    <link href="https://assets.datacite.org/images/favicon.ico" rel="icon" type="image/ico" />

    <script src="//cdnjs.cloudflare.com/ajax/libs/fitvids/1.1.0/jquery.fitvids.min.js"></script>

      <script
        src="//d2wy8f7a9ursnm.cloudfront.net/bugsnag-2.min.js"
        data-apikey="c37a5861967091a9b42a1a77e235114a">
      </script>

    <script type="application/ld+json">
      {"@context":"http://schema.org","@type":"BlogPosting","@id":"https://doi.org/10.5438/4k3m-nyvg","name":"Eating your own Dog Food","alternateName":"MS-49-3632-5083","url":"https://blog.datacite.org/eating-your-own-dog-food/","author":[{"@type":"Person","@id":"https://orcid.org/0000-0003-1419-2405","givenName":"Martin","familyName":"Fenner","name":"Martin Fenner"}],"publisher":{"@type":"Organization","name":"DataCite"},"dateCreated":"2016-12-20","datePublished":"2016-12-20","dateModified":"2016-12-20","keywords":"datacite, doi, metadata, featured","version":"1.0","description":"Eating your own dog food is a slang term to describe that an organization should itself use the products and services it provides. For DataCite this means that we should use DOIs with appropriate metadata and strategies for long-term preservation for...","license":"https://creativecommons.org/licenses/by/4.0/","image":"https://blog.datacite.org/images/2016/12/230785.jpg","isPartOf":{"@type":"Blog","@id":"https://doi.org/10.5438/0000-00SS","name":"DataCite Blog"},"citation":[{"@type":"CreativeWork","@id":"https://doi.org/10.5438/0012"},{"@type":"CreativeWork","@id":"https://doi.org/10.5438/55E5-T5C0"}]}
    </script>
  </head>
  <body>
    <header class="header" id="navtop">
      <div class="navbar navbar-white" role="navigation">
        <div class="container-fluid">
          <div class="navbar-header"
            <button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse">
              <span class="sr-only">Toggle navigation</span>
              <span class="icon-bar"></span>
              <span class="icon-bar"></span>
              <span class="icon-bar"></span>
            </button>
          </div>
          <a class="navbar-brand" href="/">DataCite Blog</a>
          <div class="navbar-collapse collapse">
            <ul class="nav navbar-nav navbar-right">
              <li><a href="https://support.datacite.org">Support</a></li>
              <li class="dropdown">
                <a href="#" class="dropdown-toggle" data-toggle="dropdown" id="sites"><i class='fa fa-th'></i> <span class="caret"></span></a>
                <ul class="dropdown-menu" role="menu">
                  <li><a href="https://www.datacite.org">
                    <i class='fa fa-globe fa-fw'></i>
                    Homepage</a>
                  </li>
                  <li><a href="https://blog.datacite.org">
                    <i class='fa fa-rss fa-fw'></i>
                    Blog</a>
                  </li>
                  <li class="divider"></li>
                  <li><a href="https://mds.datacite.org">
                    <i class='fa fa-database fa-fw'></i>
                    MDS</a>
                  </li>
                  <li><a href="https://schema.datacite.org">
                    <i class='fa fa-file-code-o fa-fw'></i>
                    Schema</a>
                  </li>
                  <li><a href="http://citation.crosscite.org">
                    <i class='fa fa-file-text-o fa-fw'></i>
                    Citation Formatter</a>
                  </li>
                  <li class="divider"></li>
                  <li><a href="https://search.datacite.org">
                    <i class='fa fa-search fa-fw'></i>
                    Search</a>
                  </li>
                  <li><a href="https://oai.datacite.org">
                    <i class='fa fa-table fa-fw'></i>
                    OAI-PMH</a>
                  </li>
                  <li><a href="https://stats.datacite.org">
                    <i class='fa fa-bar-chart fa-fw'></i>
                    Statistics</a>
                  </li>
                  <li><a href="https://api.datacite.org">
                    <i class='fa fa-cogs fa-fw'></i>
                    REST API</a>
                  </li>
                  <li><a href="http://www.re3data.org">
                    <i class='fa fa-cubes fa-fw'></i>
                    re3data</a>
                  </li>
                  <li class="divider"></li>
                  <li><a href="http://status.datacite.org">
                    <i class='fa fa-calendar-check-o fa-fw'></i>
                    Status</a>
                  </li>
                </ul>
              </li>
            </ul>
          </div>
        </div>
      </div>
    </header>
      <div class="wrapper">
    <div class="section section-white">
      <div class="container-fluid">
        <div class="row row-section">
          <div class="col-md-8 col-md-offset-2 post-content">
            <a name="topofpage"></a>
            <div class="post-meta">
              <h1>Eating your own Dog Food</h1>
              December 20, 2016 by Martin Fenner
              • <span class="post-reading-time"></span> read
                <p class="doi"><a href="https://doi.org/10.5438/4k3m-nyvg">https://doi.org/10.5438/4k3m-nyvg</a></p>
            </div>

            <p><a href="https://newrepublic.com/article/115349/dogfooding-tech-slang-working-out-glitches">Eating your own dog food</a> is a slang term to describe that an organization should itself use the products and services it provides. For DataCite this means that we should use DOIs with appropriate metadata and strategies for long-term preservation for the scholarly outputs we produce. For the most part this is not research data, but rather technical documents such as the DataCite Schema and its documentation <span class="citation">(<a href="#ref-https://doi.org/10.5438/0012">2016</a>)</span>.</p>
<p>These outputs also include the posts on this blog, where we discuss topics relevant for the DataCite community, but also of broader interest to anyone who cares about research data, persistent identifiers, and scholarly infrastructure. And starting today all blog posts on this blog will have a DOI, metadata and use a persistent storage mechanism.</p>
<div class="figure">
<img src="/images/2016/12/230785.jpg" alt="Photo by Bill Emrich. CC Zero." />
<p class="caption">Photo by <a href="https://www.pexels.com/photo/black-and-tan-yorkshire-terrier-puppy-230785/">Bill Emrich</a>. <a href="https://creativecommons.org/publicdomain/zero/1.0/">CC Zero</a>.</p>
</div>
<h3 id="technical-implementation">Technical Implementation</h3>
<p>This blog is powered by the static site generator <a href="https://middlemanapp.com/">Middleman</a>, with blog posts written in <a href="http://commonmark.org/">Markdown</a> and converted to HTML using <a href="http://pandoc.org/">Pandoc</a> and the <a href="https://travis-ci.org">Travis CI</a> continuous integration service. Static site generator means that there is no database or application server powering the site, making website adminstration simpler, cheaper and safer. In addition to the blog, the <a href="https://www.datacite.org">DataCite homepage</a> and <a href="https://schema.datacite.org">Metadata Schema subsite</a> are also generated using Middleman.</p>
<p>The simplicity is particularly important here, as registering the DOIs and metadata can be accomplished using a command line utility written by DataCite staff that doesn’t need to know much about the internals of Middleman, and thus can be easily adapted to other static site generators such as <a href="http://jekyllrb.com/">Jekyll</a>, <a href="http://gohugo.io/">Hugo</a> or <a href="https://hexo.io/">Hexo</a>. The command line utility is <a href="https://github.com/datacite/cirneco">Cirneco</a>, generating the metadata XML according to the DataCite Metadata Schema, and registering DOI and metadata with the DataCite MDS. Like all tools mentioned in this post Cirneco is open source software, please reach out to us if you are interested in implementing similar functionality for your blog.</p>
<h3 id="generating-dois">Generating DOIs</h3>
<p>The DOIs for this blog are generated automatically, using a modified base32 encoding algorithm that is provided by Cirneco, as discussed last week <span class="citation">(Fenner, <a href="#ref-https://doi.org/10.5438/55E5-T5C0">2016</a>)</span>. The DOI is generated and minted when a new post is pushed to <a href="https://blog.datacite.org" class="uri">https://blog.datacite.org</a>. This avoids two problems: a) DOI-like strings in the wild before publication and b) the randomly generated DOI exists already (we can simply generate a new one). All DOIs are short, without semantic infomation that might change over time, and with a checksum to minimize transcription errors, for example <strong>https://doi.org/10.5438/XCBJ-G7ZY</strong>. Going forward we encourage users to link to the DataCite Blog using the DOI, as these links will continue to work even if we ever move the blog to a different location.</p>
<h3 id="generating-metadata">Generating Metadata</h3>
<p>For the generation of metadata, we need to strike a balance between simple author provided metadata, but rich enough to aid discovery. We are doing this via three mechanisms:</p>
<ul>
<li>metadata provided by the author</li>
<li>default metadata for the blog</li>
<li>metadata automatically extracted from content</li>
</ul>
<p>The metadata provided by the author are the typical metadata for blog posts, provided via <a href="https://gohugo.io/content/front-matter/">YAML front matter</a> at the beginning of each post:</p>
<div class="sourceCode"><pre class="sourceCode yaml"><code class="sourceCode yaml"><span class="ot">---</span>
<span class="fu">layout:</span> post
<span class="fu">title:</span> Eating your own Dog Food
<span class="fu">author:</span> mfenner
<span class="fu">date:</span> 2016-12-19
<span class="fu">tags:</span>
<span class="kw">-</span> datacite
<span class="kw">-</span> doi
<span class="kw">-</span> metadata
<span class="ot">---</span></code></pre></div>
<p>We can reuse all these metadata when generating DataCite metadata, using the tags as <code>subjects</code>.</p>
<p>The default metadata are metadata that always stay the same for the blog, such as <code>publisher</code>, <code>HostingInstitution</code> and <code>rights</code>. We can store them in a site-wide configuration file. We can also assume reasonable defaults that can be overridden in the YAML front matter, e.g. <code>resourceType</code> (we use <a href="https://schema.org/BlogPosting">BlogPosting</a> with <code>resourceTypeGeneral</code> Text) and <code>version</code>. We store more information about authors outside the blog post, including <code>givenName</code>, <code>familyName</code> and <code>nameIdentifier</code> (we now show the ORCID ID of every blog author at the bottom of the post).</p>
<p>Finally, there are metadata that we can automatically extract from the blog post, and we are currently doing this for the <code>description</code> and <code>relatedIdentifier</code>. This blog uses Pandoc and BibTex to generate the references section at the end, and we can fetch this information and convert it into the format needed for <code>relatedIdentifier</code>.</p>
<p>Taken together we can provide all metadata that are <em>required</em> or <em>recommended</em> in the Metadata Schema documentation <span class="citation">(<a href="#ref-https://doi.org/10.5438/0012">2016</a>)</span>, and we can do this without any extra effort for the author. The full XML is avalailable <a href="https://data.datacite.org/application/x-datacite+xml/10.5438/4K3M-NYVG">here</a>.</p>
<p>Not all blog posts need to be cited formally with metadata in a <em>references</em> list formatted according to a specific citation style. But these metadata greatly help with discovery, a search in DataCite Search for <a href="http://search.datacite.org/works?query=eating+dog+food">eating dog food</a> will for example bring up this blog post as the first hit.</p>
<h3 id="persistent-storage">Persistent storage</h3>
<p>Using DOIs means that readers not only expect rich metadata that help with citation and discovery, but also that DataCite takes extra care to preserve the blog posts, thinking beyond the particular technical implementation or even the contiuing existence of this blog. This is an area where we do need to do more work, starting with a decision about the best archival format for a blog post (HTML, PDF, <a href="https://jats.nlm.nih.gov/">JATS</a>?). For now blog posts are hosted in multiple Git repositories (<a href="https://github.com/datacite/blog">one of them on Github</a>), and in two independent Amazon S3 buckets that each use <a href="http://docs.aws.amazon.com/AmazonS3/latest/dev/Versioning.html">versioning</a>. Multiple locations with versioning are a good start, but more work is clearly needed.</p>
<h3 id="references" class="unnumbered">References</h3>
<div id="refs" class="references">
<div id="ref-https://doi.org/10.5438/0012">
<p>DataCite Metadata Working Group. (2016). DataCite Metadata Schema for the Publication and Citation of Research Data v4.0. <em>DataCite</em>. <a href="https://doi.org/10.5438/0012" class="uri">https://doi.org/10.5438/0012</a></p>
</div>
<div id="ref-https://doi.org/10.5438/55E5-T5C0">
<p>Fenner, M. (2016). Cool DOI’s. <em>DataCite</em>. <a href="https://doi.org/10.5438/55E5-T5C0" class="uri">https://doi.org/10.5438/55E5-T5C0</a></p>
</div>
</div>

            <hr width="80%">
          </div>
        </div>
        <div class="row">
          <div class="col-md-5 col-md-offset-2 post-content">
            <div class="bottom-teaser cf">
  <div class="isLeft">
    <section class="author">
          <div class="author-image" style="background-image: url(https://www.gravatar.com/avatar/434592a097e91261792ebd6b492042bc?s=250&d=mm&r=x)">Blog Logo</div>
        <h4>Martin Fenner</h4>
        <p class="bio">DataCite Technical Director</p>
        <p class="orcid"><a href="https://orcid.org/0000-0003-1419-2405">https://orcid.org/0000-0003-1419-2405</a></p>
        <div class="clearfix"></div>
      <h4>Eating your own Dog Food</h4>
        <p class="published"><a href="https://doi.org/10.5438/4k3m-nyvg">https://doi.org/10.5438/4k3m-nyvg</a>
      <p class="published"><i class="fa fa-calendar"></i> <time datetime="2016-12-20 00:00">December 20, 2016</time></p>
      <p class="published"><i class="fa fa-history"></i> <a href="https://github.com/datacite/blog/commits/master/source/posts/eating-your-own-dog-food.html.md">History</a></p>
      <p class="published">© 2016 Martin Fenner. Distributed under the terms of the <a href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution license</a>.</p>
      <p class="published">
        <i class="fa fa-tags"></i>
        <a href="/index.html?tag=datacite">datacite</a>, <a href="/index.html?tag=doi">doi</a>, <a href="/index.html?tag=metadata">metadata</a>, <a href="/index.html?tag=featured">featured</a>
      </p>
    </section>
  </div>
</div>

          </div>
          <div class="col-md-2 col-md-offset-1">
             <div class="bottom-teaser cf">
  <div class="isLeft">
    <h5 class="index-headline featured"><span>Share on</span></h5>
      <a class="icon-twitter" href="http://twitter.com/share?text=On the @datacite blog: Eating your own Dog Food&amp;url=https://blog.datacite.org/eating-your-own-dog-food/"
        onclick="window.open(this.href, 'twitter-share', 'width=550,height=255');return false;">
        <i class="fa fa-twitter fa-2x"></i><span class="hidden">twitter</span>
      </a>
      <a class="icon-facebook" href="https://www.facebook.com/sharer.php?t=On the @datacite blog: Eating your own Dog Food&amp;u=https://blog.datacite.org/eating-your-own-dog-food/"
        onclick="window.open(this.href, 'facebook-share', 'width=550,height=255');return false;">
        <i class="fa fa-facebook fa-2x"></i><span class="hidden">facebook</span>
      </a>
  </div>
</div>

          </div>
        </div>
          <div class="row">
            <div class="col-md-8 col-md-offset-2 post-content">
              <div id="disqus_thread"></div>
<script>
    var disqus_config = function () {
        this.page.url = 'https://blog.datacite.org/eating-your-own-dog-food/';
        this.page.identifier = 'https://blog.datacite.org/eating-your-own-dog-food/';
    };
    (function() {
        var d = document, s = d.createElement('script');

        s.src = '//datacite.disqus.com/embed.js';  //

        s.setAttribute('data-timestamp', +new Date());
        (d.head || d.body).appendChild(s);
    })();
</script>
<noscript>Please enable JavaScript to view the <a href="https://disqus.com/?ref_noscript" rel="nofollow">comments powered by Disqus.</a></noscript>

            </div>
          </div>
      </div>
    </div>
  </div>
    <footer class='row footer'>
      <div class="container-fluid">
        <div class='col-md-3 col-sm-4'>
          <h4>About DataCite</h4>
          <ul>
            <li><a href="https://www.datacite.org/mission.html">What we do</a></li>
            <li><a href="https://www.datacite.org/board.html">Board</a></li>
            <li><a href="https://www.datacite.org/steering.html">Steering groups</a></li>
            <li><a href="https://www.datacite.org/staff.html">Staff</a></li>
            <li><a href="https://www.datacite.org/jobopportunities.html">Job opportunities</a></li>
          </ul>
        </div>
        <div class='col-md-3 col-sm-4'>
          <h4>Services</h4>
          <ul>
            <li><a href="https://www.datacite.org/dois.html">Assign DOIs</a></li>
            <li><a href="https://www.datacite.org/search.html">Metadata search</a></li>
            <li><a href="https://www.datacite.org/eventdata.html">Event data</a></li>
            <li><a href="https://www.datacite.org/profiles.html">Profiles</a></li>
            <li><a href="https://www.datacite.org/re3data.html">re3data</a></li>
            <li><a href="https://www.datacite.org/citation.html">Citation formatter</a></li>
            <li><a href="https://www.datacite.org/stats.html">Statistics</a></li>
            <li><a href="https://www.datacite.org/content.html">Content negotiation</a></li>
            <li><a href="https://www.datacite.org/oaipmh.html">OAI-PMH</a></li>
          </ul>
        </div>
        <div class='col-md-3 col-sm-4'>
          <h4>Resources</h4>
          <ul>
            <li><a href="https://schema.datacite.org">Metadata schema</a></li>
            <li><a href="https://support.datacite.org">Support</a></li>
          </ul>
          <h4>Community</h4>
          <ul>
            <li><a href="https://www.datacite.org/members.html">Members</a></li>
            <li><a href="https://www.datacite.org/partners.html">Partners</a></li>
            <li><a href="https://www.datacite.org/steering.html">Steering groups</a></li>
            <li><a href="https://www.datacite.org/events.html">Events</a></li>
            <li><a href="https://www.datacite.org/roadmap.html">Roadmap</a></li>
            <li><a href="https://www.datacite.org/user-stories.html">User Stories</a></li>
          </ul>
        </div>
        <div class='col-md-3'>
          <h4 class="share">Contact us</h4>
          <a href='mailto:support@datacite.org' class="share">
            <i class='fa fa-at'></i>
          </a>
          <a href='https://blog.datacite.org/feed.xml' class="share">
            <i class='fa fa-rss'></i>
          </a>
          <a href='https://twitter.com/datacite' class="share">
            <i class='fa fa-twitter'></i>
          </a>
          <a href='https://github.com/datacite/datacite' class="share">
            <i class='fa fa-github'></i>
          </a>
          <a href='https://www.linkedin.com/company/datacite' class="share">
            <i class='fa fa-linkedin'></i>
          </a>
          <ul>
            <li><a href="https://www.datacite.org/terms.html">Terms and conditions</a></li>
            <li><a href="https://www.datacite.org/privacy.html">Privacy policy</a></li>
            <li><a href="https://www.datacite.org/acknowledgments.html">Acknowledgements</a></li>
          </ul>
          <a href="http://status.datacite.org" target="_blank">
            <span class="color-dot"></span>
            <span class="color-description"></span>
          </a>
        </div>
      </div>
    </footer>
    <script src="//code.jquery.com/jquery-2.1.4.min.js"></script>
    <script src="//maxcdn.bootstrapcdn.com/bootstrap/3.3.6/js/bootstrap.min.js"></script>
    <script src="//cdn.statuspage.io/se-v2.js"></script>
    <script src="https://assets.datacite.org/javascripts/default.js"></script>
    <script src="/javascripts/readingTime.min.js"></script>
    <script src="/javascripts/index.js"></script>
    <script src="/javascripts/search.js"></script>
    <script id="dsq-count-scr" src="//datacite.disqus.com/count.js" async></script>
    <script>
      (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
      m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
      })(window,document,'script','//www.google-analytics.com/analytics.js','ga');

      ga('create', 'UA-22806196-6', 'auto');
      ga('send', 'pageview');
    </script>
  </body>
</html>

105
105
  http_version:
106
- recorded_at: Sun, 01 Apr 2018 09:07:41 GMT
106
+ recorded_at: Sat, 05 May 2018 11:18:19 GMT
107
107
  recorded_with: VCR 3.0.3